Coverage Report

Created: 2025-06-13 07:02

/src/tesseract/src/textord/colpartition.h
Line
Count
Source (jump to first uncovered line)
1
///////////////////////////////////////////////////////////////////////
2
// File:        colpartition.h
3
// Description: Class to hold partitions of the page that correspond
4
//              roughly to text lines.
5
// Author:      Ray Smith
6
//
7
// (C) Copyright 2008, Google Inc.
8
// Licensed under the Apache License, Version 2.0 (the "License");
9
// you may not use this file except in compliance with the License.
10
// You may obtain a copy of the License at
11
// http://www.apache.org/licenses/LICENSE-2.0
12
// Unless required by applicable law or agreed to in writing, software
13
// distributed under the License is distributed on an "AS IS" BASIS,
14
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
// See the License for the specific language governing permissions and
16
// limitations under the License.
17
//
18
///////////////////////////////////////////////////////////////////////
19
20
#ifndef TESSERACT_TEXTORD_COLPARTITION_H_
21
#define TESSERACT_TEXTORD_COLPARTITION_H_
22
23
#include "bbgrid.h"
24
#include "blobbox.h" // For BlobRegionType.
25
#include "ocrblock.h"
26
#include "rect.h" // For TBOX.
27
#include "scrollview.h"
28
#include "tabfind.h"   // For WidthCallback.
29
#include "tabvector.h" // For BLOBNBOX_CLIST.
30
31
#include <algorithm>
32
33
namespace tesseract {
34
35
// Number of colors in the color1, color2 arrays.
36
const int kRGBRMSColors = 4;
37
38
class ColPartition;
39
class ColPartitionSet;
40
class ColPartitionGrid;
41
class WorkingPartSet;
42
class WorkingPartSet_LIST;
43
44
// An enum to indicate how a partition sits on the columns.
45
// The order of flowing/heading/pullout must be kept consistent with
46
// PolyBlockType.
47
enum ColumnSpanningType {
48
  CST_NOISE,   // Strictly between columns.
49
  CST_FLOWING, // Strictly within a single column.
50
  CST_HEADING, // Spans multiple columns.
51
  CST_PULLOUT, // Touches multiple columns, but doesn't span them.
52
  CST_COUNT    // Number of entries.
53
};
54
55
ELIST2IZEH(ColPartition)
56
CLISTIZEH(ColPartition)
57
58
/**
59
 * ColPartition is a partition of a horizontal slice of the page.
60
 * It starts out as a collection of blobs at a particular y-coord in the grid,
61
 * but ends up (after merging and uniquing) as an approximate text line.
62
 * ColPartitions are also used to hold a partitioning of the page into
63
 * columns, each representing one column. Although a ColPartition applies
64
 * to a given y-coordinate range, eventually, a ColPartitionSet of ColPartitions
65
 * emerges, which represents the columns over a wide y-coordinate range.
66
 */
67
class TESS_API ColPartition : public ELIST2<ColPartition>::LINK {
68
public:
69
  // This empty constructor is here only so that the class can be ELISTIZED.
70
  // TODO(rays) change deep_copy in elst.h line 955 to take a callback copier
71
  // and eliminate CLASSNAME##_copier.
72
  ColPartition() = default;
73
74
  /**
75
   * @param blob_type is the blob_region_type_ of the blobs in this partition.
76
   * @param vertical is the direction of logical vertical on the possibly skewed
77
   * image.
78
   */
79
  ColPartition(BlobRegionType blob_type, const ICOORD &vertical);
80
  /**
81
   * Constructs a fake ColPartition with no BLOBNBOXes to represent a
82
   * horizontal or vertical line, given a type and a bounding box.
83
   */
84
  static ColPartition *MakeLinePartition(BlobRegionType blob_type,
85
                                         const ICOORD &vertical, int left,
86
                                         int bottom, int right, int top);
87
88
  // Constructs and returns a fake ColPartition with a single fake BLOBNBOX,
89
  // all made from a single TBOX.
90
  // WARNING: Despite being on C_LISTs, the BLOBNBOX owns the C_BLOB and
91
  // the ColPartition owns the BLOBNBOX!!!
92
  // Call DeleteBoxes before deleting the ColPartition.
93
  static ColPartition *FakePartition(const TBOX &box, PolyBlockType block_type,
94
                                     BlobRegionType blob_type,
95
                                     BlobTextFlowType flow);
96
97
  // Constructs and returns a ColPartition with the given real BLOBNBOX,
98
  // and sets it up to be a "big" partition (single-blob partition bigger
99
  // than the surrounding text that may be a dropcap, two or more vertically
100
  // touching characters, or some graphic element.
101
  // If the given list is not nullptr, the partition is also added to the list.
102
  static ColPartition *MakeBigPartition(BLOBNBOX *box,
103
                                        ColPartition_LIST *big_part_list);
104
105
  ~ColPartition();
106
107
  // Simple accessors.
108
0
  const TBOX &bounding_box() const {
109
0
    return bounding_box_;
110
0
  }
111
0
  int left_margin() const {
112
0
    return left_margin_;
113
0
  }
114
0
  void set_left_margin(int margin) {
115
0
    left_margin_ = margin;
116
0
  }
117
0
  int right_margin() const {
118
0
    return right_margin_;
119
0
  }
120
0
  void set_right_margin(int margin) {
121
0
    right_margin_ = margin;
122
0
  }
123
0
  int median_top() const {
124
0
    return median_top_;
125
0
  }
126
0
  int median_bottom() const {
127
0
    return median_bottom_;
128
0
  }
129
0
  int median_left() const {
130
0
    return median_left_;
131
0
  }
132
0
  int median_right() const {
133
0
    return median_right_;
134
0
  }
135
0
  int median_height() const {
136
0
    return median_height_;
137
0
  }
138
0
  void set_median_height(int height) {
139
0
    median_height_ = height;
140
0
  }
141
0
  int median_width() const {
142
0
    return median_width_;
143
0
  }
144
0
  void set_median_width(int width) {
145
0
    median_width_ = width;
146
0
  }
147
0
  BlobRegionType blob_type() const {
148
0
    return blob_type_;
149
0
  }
150
0
  void set_blob_type(BlobRegionType t) {
151
0
    blob_type_ = t;
152
0
  }
153
0
  BlobTextFlowType flow() const {
154
0
    return flow_;
155
0
  }
156
0
  void set_flow(BlobTextFlowType f) {
157
0
    flow_ = f;
158
0
  }
159
0
  int good_blob_score() const {
160
0
    return good_blob_score_;
161
0
  }
162
0
  bool good_width() const {
163
0
    return good_width_;
164
0
  }
165
0
  bool good_column() const {
166
0
    return good_column_;
167
0
  }
168
0
  bool left_key_tab() const {
169
0
    return left_key_tab_;
170
0
  }
171
0
  int left_key() const {
172
0
    return left_key_;
173
0
  }
174
0
  bool right_key_tab() const {
175
0
    return right_key_tab_;
176
0
  }
177
0
  int right_key() const {
178
0
    return right_key_;
179
0
  }
180
0
  PolyBlockType type() const {
181
0
    return type_;
182
0
  }
183
0
  void set_type(PolyBlockType t) {
184
0
    type_ = t;
185
0
  }
186
0
  BLOBNBOX_CLIST *boxes() {
187
0
    return &boxes_;
188
0
  }
189
0
  int boxes_count() const {
190
0
    return boxes_.length();
191
0
  }
192
0
  void set_vertical(const ICOORD &v) {
193
0
    vertical_ = v;
194
0
  }
195
0
  ColPartition_CLIST *upper_partners() {
196
0
    return &upper_partners_;
197
0
  }
198
0
  ColPartition_CLIST *lower_partners() {
199
0
    return &lower_partners_;
200
0
  }
201
0
  void set_working_set(WorkingPartSet *working_set) {
202
0
    working_set_ = working_set;
203
0
  }
204
0
  bool block_owned() const {
205
0
    return block_owned_;
206
0
  }
207
0
  void set_block_owned(bool owned) {
208
0
    block_owned_ = owned;
209
0
  }
210
0
  bool desperately_merged() const {
211
0
    return desperately_merged_;
212
0
  }
213
0
  ColPartitionSet *column_set() const {
214
0
    return column_set_;
215
0
  }
216
0
  void set_side_step(int step) {
217
0
    side_step_ = step;
218
0
  }
219
0
  int bottom_spacing() const {
220
0
    return bottom_spacing_;
221
0
  }
222
0
  void set_bottom_spacing(int spacing) {
223
0
    bottom_spacing_ = spacing;
224
0
  }
225
0
  int top_spacing() const {
226
0
    return top_spacing_;
227
0
  }
228
0
  void set_top_spacing(int spacing) {
229
0
    top_spacing_ = spacing;
230
0
  }
231
232
0
  void set_table_type() {
233
0
    if (type_ != PT_TABLE) {
234
0
      type_before_table_ = type_;
235
0
      type_ = PT_TABLE;
236
0
    }
237
0
  }
238
0
  void clear_table_type() {
239
0
    if (type_ == PT_TABLE) {
240
0
      type_ = type_before_table_;
241
0
    }
242
0
  }
243
0
  bool inside_table_column() {
244
0
    return inside_table_column_;
245
0
  }
246
0
  void set_inside_table_column(bool val) {
247
0
    inside_table_column_ = val;
248
0
  }
249
0
  ColPartition *nearest_neighbor_above() const {
250
0
    return nearest_neighbor_above_;
251
0
  }
252
0
  void set_nearest_neighbor_above(ColPartition *part) {
253
0
    nearest_neighbor_above_ = part;
254
0
  }
255
0
  ColPartition *nearest_neighbor_below() const {
256
0
    return nearest_neighbor_below_;
257
0
  }
258
0
  void set_nearest_neighbor_below(ColPartition *part) {
259
0
    nearest_neighbor_below_ = part;
260
0
  }
261
0
  int space_above() const {
262
0
    return space_above_;
263
0
  }
264
0
  void set_space_above(int space) {
265
0
    space_above_ = space;
266
0
  }
267
0
  int space_below() const {
268
0
    return space_below_;
269
0
  }
270
0
  void set_space_below(int space) {
271
0
    space_below_ = space;
272
0
  }
273
0
  int space_to_left() const {
274
0
    return space_to_left_;
275
0
  }
276
0
  void set_space_to_left(int space) {
277
0
    space_to_left_ = space;
278
0
  }
279
0
  int space_to_right() const {
280
0
    return space_to_right_;
281
0
  }
282
0
  void set_space_to_right(int space) {
283
0
    space_to_right_ = space;
284
0
  }
285
0
  uint8_t *color1() {
286
0
    return color1_;
287
0
  }
288
0
  uint8_t *color2() {
289
0
    return color2_;
290
0
  }
291
0
  bool owns_blobs() const {
292
0
    return owns_blobs_;
293
0
  }
294
0
  void set_owns_blobs(bool owns_blobs) {
295
    // Do NOT change ownership flag when there are blobs in the list.
296
    // Immediately set the ownership flag when creating copies.
297
0
    ASSERT_HOST(boxes_.empty());
298
0
    owns_blobs_ = owns_blobs;
299
0
  }
300
301
  // Inline quasi-accessors that require some computation.
302
303
  // Returns the middle y-coord of the bounding box.
304
0
  int MidY() const {
305
0
    return (bounding_box_.top() + bounding_box_.bottom()) / 2;
306
0
  }
307
  // Returns the middle y-coord of the median top and bottom.
308
0
  int MedianY() const {
309
0
    return (median_top_ + median_bottom_) / 2;
310
0
  }
311
  // Returns the middle x-coord of the bounding box.
312
0
  int MidX() const {
313
0
    return (bounding_box_.left() + bounding_box_.right()) / 2;
314
0
  }
315
  // Returns the sort key at any given x,y.
316
0
  int SortKey(int x, int y) const {
317
0
    return TabVector::SortKey(vertical_, x, y);
318
0
  }
319
  // Returns the x corresponding to the sortkey, y pair.
320
0
  int XAtY(int sort_key, int y) const {
321
0
    return TabVector::XAtY(vertical_, sort_key, y);
322
0
  }
323
  // Returns the x difference between the two sort keys.
324
0
  int KeyWidth(int left_key, int right_key) const {
325
0
    return (right_key - left_key) / vertical_.y();
326
0
  }
327
  // Returns the column width between the left and right keys.
328
0
  int ColumnWidth() const {
329
0
    return KeyWidth(left_key_, right_key_);
330
0
  }
331
  // Returns the sort key of the box left edge.
332
0
  int BoxLeftKey() const {
333
0
    return SortKey(bounding_box_.left(), MidY());
334
0
  }
335
  // Returns the sort key of the box right edge.
336
0
  int BoxRightKey() const {
337
0
    return SortKey(bounding_box_.right(), MidY());
338
0
  }
339
  // Returns the left edge at the given y, using the sort key.
340
0
  int LeftAtY(int y) const {
341
0
    return XAtY(left_key_, y);
342
0
  }
343
  // Returns the right edge at the given y, using the sort key.
344
0
  int RightAtY(int y) const {
345
0
    return XAtY(right_key_, y);
346
0
  }
347
  // Returns true if the right edge of this is to the left of the right
348
  // edge of other.
349
0
  bool IsLeftOf(const ColPartition &other) const {
350
0
    return bounding_box_.right() < other.bounding_box_.right();
351
0
  }
352
  // Returns true if the partition contains the given x coordinate at the y.
353
0
  bool ColumnContains(int x, int y) const {
354
0
    return LeftAtY(y) - 1 <= x && x <= RightAtY(y) + 1;
355
0
  }
356
  // Returns true if there are no blobs in the list.
357
0
  bool IsEmpty() const {
358
0
    return boxes_.empty();
359
0
  }
360
  // Returns true if there is a single blob in the list.
361
0
  bool IsSingleton() const {
362
0
    return boxes_.singleton();
363
0
  }
364
  // Returns true if this and other overlap horizontally by bounding box.
365
0
  bool HOverlaps(const ColPartition &other) const {
366
0
    return bounding_box_.x_overlap(other.bounding_box_);
367
0
  }
368
  // Returns true if this and other's bounding boxes overlap vertically.
369
  // TODO(rays) Make HOverlaps and VOverlaps truly symmetric.
370
0
  bool VOverlaps(const ColPartition &other) const {
371
0
    return bounding_box_.y_gap(other.bounding_box_) < 0;
372
0
  }
373
  // Returns the vertical overlap (by median) of this and other.
374
  // WARNING! Only makes sense on horizontal partitions!
375
0
  int VCoreOverlap(const ColPartition &other) const {
376
0
    if (median_bottom_ == INT32_MAX || other.median_bottom_ == INT32_MAX) {
377
0
      return 0;
378
0
    }
379
0
    return std::min(median_top_, other.median_top_) -
380
0
           std::max(median_bottom_, other.median_bottom_);
381
0
  }
382
  // Returns the horizontal overlap (by median) of this and other.
383
  // WARNING! Only makes sense on vertical partitions!
384
0
  int HCoreOverlap(const ColPartition &other) const {
385
0
    return std::min(median_right_, other.median_right_) -
386
0
           std::max(median_left_, other.median_left_);
387
0
  }
388
  // Returns true if this and other overlap significantly vertically.
389
  // WARNING! Only makes sense on horizontal partitions!
390
0
  bool VSignificantCoreOverlap(const ColPartition &other) const {
391
0
    if (median_bottom_ == INT32_MAX || other.median_bottom_ == INT32_MAX) {
392
0
      return false;
393
0
    }
394
0
    int overlap = VCoreOverlap(other);
395
0
    int height = std::min(median_top_ - median_bottom_,
396
0
                          other.median_top_ - other.median_bottom_);
397
0
    return overlap * 3 > height;
398
0
  }
399
  // Returns true if this and other can be combined without putting a
400
  // horizontal step in either left or right edge of the resulting block.
401
0
  bool WithinSameMargins(const ColPartition &other) const {
402
0
    return left_margin_ <= other.bounding_box_.left() &&
403
0
           bounding_box_.left() >= other.left_margin_ &&
404
0
           bounding_box_.right() <= other.right_margin_ &&
405
0
           right_margin_ >= other.bounding_box_.right();
406
0
  }
407
  // Returns true if the region types (aligned_text_) match.
408
  // Lines never match anything, as they should never be merged or chained.
409
0
  bool TypesMatch(const ColPartition &other) const {
410
0
    return TypesMatch(blob_type_, other.blob_type_);
411
0
  }
412
0
  static bool TypesMatch(BlobRegionType type1, BlobRegionType type2) {
413
0
    return (type1 == type2 || type1 == BRT_UNKNOWN || type2 == BRT_UNKNOWN) &&
414
0
           !BLOBNBOX::IsLineType(type1) && !BLOBNBOX::IsLineType(type2);
415
0
  }
416
417
  // Returns true if the types are similar to each other.
418
0
  static bool TypesSimilar(PolyBlockType type1, PolyBlockType type2) {
419
0
    return (type1 == type2 ||
420
0
            (type1 == PT_FLOWING_TEXT && type2 == PT_INLINE_EQUATION) ||
421
0
            (type2 == PT_FLOWING_TEXT && type1 == PT_INLINE_EQUATION));
422
0
  }
423
424
  // Returns true if partitions is of horizontal line type
425
0
  bool IsLineType() const {
426
0
    return PTIsLineType(type_);
427
0
  }
428
  // Returns true if partitions is of image type
429
0
  bool IsImageType() const {
430
0
    return PTIsImageType(type_);
431
0
  }
432
  // Returns true if partitions is of text type
433
0
  bool IsTextType() const {
434
0
    return PTIsTextType(type_);
435
0
  }
436
  // Returns true if partitions is of pullout(inter-column) type
437
0
  bool IsPulloutType() const {
438
0
    return PTIsPulloutType(type_);
439
0
  }
440
  // Returns true if the partition is of an exclusively vertical type.
441
0
  bool IsVerticalType() const {
442
0
    return blob_type_ == BRT_VERT_TEXT || blob_type_ == BRT_VLINE;
443
0
  }
444
  // Returns true if the partition is of a definite horizontal type.
445
0
  bool IsHorizontalType() const {
446
0
    return blob_type_ == BRT_TEXT || blob_type_ == BRT_HLINE;
447
0
  }
448
  // Returns true is the partition is of a type that cannot be merged.
449
0
  bool IsUnMergeableType() const {
450
0
    return BLOBNBOX::UnMergeableType(blob_type_) || type_ == PT_NOISE;
451
0
  }
452
  // Returns true if this partition is a vertical line
453
  // TODO(nbeato): Use PartitionType enum when Ray's code is submitted.
454
0
  bool IsVerticalLine() const {
455
0
    return IsVerticalType() && IsLineType();
456
0
  }
457
  // Returns true if this partition is a horizontal line
458
  // TODO(nbeato): Use PartitionType enum when Ray's code is submitted.
459
0
  bool IsHorizontalLine() const {
460
0
    return IsHorizontalType() && IsLineType();
461
0
  }
462
463
  // Adds the given box to the partition, updating the partition bounds.
464
  // The list of boxes in the partition is updated, ensuring that no box is
465
  // recorded twice, and the boxes are kept in increasing left position.
466
  void AddBox(BLOBNBOX *box);
467
468
  // Removes the given box from the partition, updating the bounds.
469
  void RemoveBox(BLOBNBOX *box);
470
471
  // Returns the tallest box in the partition, as measured perpendicular to the
472
  // presumed flow of text.
473
  BLOBNBOX *BiggestBox();
474
475
  // Returns the bounding box excluding the given box.
476
  TBOX BoundsWithoutBox(BLOBNBOX *box);
477
478
  // Claims the boxes in the boxes_list by marking them with a this owner
479
  // pointer.
480
  void ClaimBoxes();
481
482
  // nullptr the owner of the blobs in this partition, so they can be deleted
483
  // independently of the ColPartition.
484
  void DisownBoxes();
485
  // nullptr the owner of the blobs in this partition that are owned by this
486
  // partition, so they can be deleted independently of the ColPartition.
487
  // Any blobs that are not owned by this partition get to keep their owner
488
  // without an assert failure.
489
  void DisownBoxesNoAssert();
490
  // Nulls the owner of the blobs in this partition that are owned by this
491
  // partition and not leader blobs, removing them from the boxes_ list, thus
492
  // turning this partition back to a leader partition if it contains a leader,
493
  // or otherwise leaving it empty. Returns true if any boxes remain.
494
  bool ReleaseNonLeaderBoxes();
495
496
  // Delete the boxes that this partition owns.
497
  void DeleteBoxes();
498
499
  // Reflects the partition in the y-axis, assuming that its blobs have
500
  // already been done. Corrects only a limited part of the members, since
501
  // this function is assumed to be used shortly after initial creation, which
502
  // is before a lot of the members are used.
503
  void ReflectInYAxis();
504
505
  // Returns true if this is a legal partition - meaning that the conditions
506
  // left_margin <= bounding_box left
507
  // left_key <= bounding box left key
508
  // bounding box left <= bounding box right
509
  // and likewise for right margin and key
510
  // are all met.
511
  bool IsLegal();
512
513
  // Returns true if the left and right edges are approximately equal.
514
  bool MatchingColumns(const ColPartition &other) const;
515
516
  // Returns true if the colors match for two text partitions.
517
  bool MatchingTextColor(const ColPartition &other) const;
518
519
  // Returns true if the sizes match for two text partitions,
520
  // taking orientation into account
521
  bool MatchingSizes(const ColPartition &other) const;
522
523
  // Returns true if there is no tabstop violation in merging this and other.
524
  bool ConfirmNoTabViolation(const ColPartition &other) const;
525
526
  // Returns true if other has a similar stroke width to this.
527
  bool MatchingStrokeWidth(const ColPartition &other,
528
                           double fractional_tolerance,
529
                           double constant_tolerance) const;
530
  // Returns true if candidate is an acceptable diacritic base char merge
531
  // with this as the diacritic.
532
  bool OKDiacriticMerge(const ColPartition &candidate, bool debug) const;
533
534
  // Sets the sort key using either the tab vector, or the bounding box if
535
  // the tab vector is nullptr. If the tab_vector lies inside the bounding_box,
536
  // use the edge of the box as a key any way.
537
  void SetLeftTab(const TabVector *tab_vector);
538
  void SetRightTab(const TabVector *tab_vector);
539
540
  // Copies the left/right tab from the src partition, but if take_box is
541
  // true, copies the box instead and uses that as a key.
542
  void CopyLeftTab(const ColPartition &src, bool take_box);
543
  void CopyRightTab(const ColPartition &src, bool take_box);
544
545
  // Returns the left rule line x coord of the leftmost blob.
546
  int LeftBlobRule() const;
547
  // Returns the right rule line x coord of the rightmost blob.
548
  int RightBlobRule() const;
549
550
  // Returns the density value for a particular BlobSpecialTextType.
551
  float SpecialBlobsDensity(const BlobSpecialTextType type) const;
552
  // Returns the number of blobs for a  particular BlobSpecialTextType.
553
  int SpecialBlobsCount(const BlobSpecialTextType type);
554
  // Set the density value for a particular BlobSpecialTextType, should ONLY be
555
  // used for debugging or testing. In production code, use
556
  // ComputeSpecialBlobsDensity instead.
557
  void SetSpecialBlobsDensity(const BlobSpecialTextType type,
558
                              const float density);
559
  // Compute the SpecialTextType density of blobs, where we assume
560
  // that the SpecialTextType in the boxes_ has been set.
561
  void ComputeSpecialBlobsDensity();
562
563
  // Add a partner above if upper, otherwise below.
564
  // Add them uniquely and keep the list sorted by box left.
565
  // Partnerships are added symmetrically to partner and this.
566
  void AddPartner(bool upper, ColPartition *partner);
567
  // Removes the partner from this, but does not remove this from partner.
568
  // This asymmetric removal is so as not to mess up the iterator that is
569
  // working on partner's partner list.
570
  void RemovePartner(bool upper, ColPartition *partner);
571
  // Returns the partner if the given partner is a singleton, otherwise nullptr.
572
  ColPartition *SingletonPartner(bool upper);
573
574
  // Merge with the other partition and delete it.
575
  void Absorb(ColPartition *other, const WidthCallback &cb);
576
577
  // Returns true if the overlap between this and the merged pair of
578
  // merge candidates is sufficiently trivial to be allowed.
579
  // The merged box can graze the edge of this by the ok_box_overlap
580
  // if that exceeds the margin to the median top and bottom.
581
  bool OKMergeOverlap(const ColPartition &merge1, const ColPartition &merge2,
582
                      int ok_box_overlap, bool debug);
583
584
  // Find the blob at which to split this to minimize the overlap with the
585
  // given box. Returns the first blob to go in the second partition.
586
  BLOBNBOX *OverlapSplitBlob(const TBOX &box);
587
588
  // Split this partition keeping the first half in this and returning
589
  // the second half.
590
  // Splits by putting the split_blob and the blobs that follow
591
  // in the second half, and the rest in the first half.
592
  ColPartition *SplitAtBlob(BLOBNBOX *split_blob);
593
594
  // Splits this partition at the given x coordinate, returning the right
595
  // half and keeping the left half in this.
596
  ColPartition *SplitAt(int split_x);
597
598
  // Recalculates all the coordinate limits of the partition.
599
  void ComputeLimits();
600
601
  // Returns the number of boxes that overlap the given box.
602
  int CountOverlappingBoxes(const TBOX &box);
603
604
  // Computes and sets the type_, first_column_, last_column_ and column_set_.
605
  // resolution refers to the ppi resolution of the image.
606
  void SetPartitionType(int resolution, ColPartitionSet *columns);
607
608
  // Returns the PartitionType from the current BlobRegionType and a column
609
  // flow spanning type ColumnSpanningType, generated by
610
  // ColPartitionSet::SpanningType, that indicates how the partition sits
611
  // in the columns.
612
  PolyBlockType PartitionType(ColumnSpanningType flow) const;
613
614
  // Returns the first and last column touched by this partition.
615
  // resolution refers to the ppi resolution of the image.
616
  void ColumnRange(int resolution, ColPartitionSet *columns, int *first_col,
617
                   int *last_col);
618
619
  // Sets the internal flags good_width_ and good_column_.
620
  void SetColumnGoodness(const WidthCallback &cb);
621
622
  // Determines whether the blobs in this partition mostly represent
623
  // a leader (fixed pitch sequence) and sets the member blobs accordingly.
624
  // Note that height is assumed to have been tested elsewhere, and that this
625
  // function will find most fixed-pitch text as leader without a height filter.
626
  // Leader detection is limited to sequences of identical width objects,
627
  // such as .... or ----, so patterns, such as .-.-.-.-. will not be found.
628
  bool MarkAsLeaderIfMonospaced();
629
  // Given the result of TextlineProjection::EvaluateColPartition, (positive for
630
  // horizontal text, negative for vertical text, and near zero for non-text),
631
  // sets the blob_type_ and flow_ for this partition to indicate whether it
632
  // is strongly or weakly vertical or horizontal text, or non-text.
633
  void SetRegionAndFlowTypesFromProjectionValue(int value);
634
635
  // Sets all blobs with the partition blob type and flow, but never overwrite
636
  // leader blobs, as we need to be able to identify them later.
637
  void SetBlobTypes();
638
639
  // Returns true if a decent baseline can be fitted through the blobs.
640
  // Works for both horizontal and vertical text.
641
  bool HasGoodBaseline();
642
643
  // Adds this ColPartition to a matching WorkingPartSet if one can be found,
644
  // otherwise starts a new one in the appropriate column, ending the previous.
645
  void AddToWorkingSet(const ICOORD &bleft, const ICOORD &tright,
646
                       int resolution, ColPartition_LIST *used_parts,
647
                       WorkingPartSet_LIST *working_set);
648
649
  // From the given block_parts list, builds one or more BLOCKs and
650
  // corresponding TO_BLOCKs, such that the line spacing is uniform in each.
651
  // Created blocks are appended to the end of completed_blocks and to_blocks.
652
  // The used partitions are put onto used_parts, as they may still be referred
653
  // to in the partition grid. bleft, tright and resolution are the bounds
654
  // and resolution of the original image.
655
  static void LineSpacingBlocks(const ICOORD &bleft, const ICOORD &tright,
656
                                int resolution, ColPartition_LIST *block_parts,
657
                                ColPartition_LIST *used_parts,
658
                                BLOCK_LIST *completed_blocks,
659
                                TO_BLOCK_LIST *to_blocks);
660
  // Constructs a block from the given list of partitions.
661
  // Arguments are as LineSpacingBlocks above.
662
  static TO_BLOCK *MakeBlock(const ICOORD &bleft, const ICOORD &tright,
663
                             ColPartition_LIST *block_parts,
664
                             ColPartition_LIST *used_parts);
665
666
  // Constructs a block from the given list of vertical text partitions.
667
  // Currently only creates rectangular blocks.
668
  static TO_BLOCK *MakeVerticalTextBlock(const ICOORD &bleft,
669
                                         const ICOORD &tright,
670
                                         ColPartition_LIST *block_parts,
671
                                         ColPartition_LIST *used_parts);
672
673
  // Makes a TO_ROW matching this and moves all the blobs to it, transferring
674
  // ownership to returned TO_ROW.
675
  TO_ROW *MakeToRow();
676
677
  // Returns a copy of everything except the list of boxes. The resulting
678
  // ColPartition is only suitable for keeping in a column candidate list.
679
  ColPartition *ShallowCopy() const;
680
  // Returns a copy of everything with a shallow copy of the blobs.
681
  // The blobs are still owned by their original parent, so they are
682
  // treated as read-only.
683
  ColPartition *CopyButDontOwnBlobs();
684
685
#ifndef GRAPHICS_DISABLED
686
  // Provides a color for BBGrid to draw the rectangle.
687
  ScrollView::Color BoxColor() const;
688
#endif // !GRAPHICS_DISABLED
689
690
  // Prints debug information on this.
691
  void Print() const;
692
  // Prints debug information on the colors.
693
  void PrintColors();
694
695
  // Sets the types of all partitions in the run to be the max of the types.
696
  void SmoothPartnerRun(int working_set_count);
697
698
  // Cleans up the partners of the given type so that there is at most
699
  // one partner. This makes block creation simpler.
700
  // If get_desperate is true, goes to more desperate merge methods
701
  // to merge flowing text before breaking partnerships.
702
  void RefinePartners(PolyBlockType type, bool get_desperate,
703
                      ColPartitionGrid *grid);
704
705
  // Returns true if this column partition is in the same column as
706
  // part. This function will only work after the SetPartitionType function
707
  // has been called on both column partitions. This is useful for
708
  // doing a SideSearch when you want things in the same page column.
709
  bool IsInSameColumnAs(const ColPartition &part) const;
710
711
  // Sort function to sort by bounding box.
712
0
  static int SortByBBox(const ColPartition *part1, const ColPartition *part2) {
713
0
    int mid_y1 = part1->bounding_box_.y_middle();
714
0
    int mid_y2 = part2->bounding_box_.y_middle();
715
0
    if ((part2->bounding_box_.bottom() <= mid_y1 &&
716
0
         mid_y1 <= part2->bounding_box_.top()) ||
717
0
        (part1->bounding_box_.bottom() <= mid_y2 &&
718
0
         mid_y2 <= part1->bounding_box_.top())) {
719
      // Sort by increasing x.
720
0
      return part1->bounding_box_.x_middle() - part2->bounding_box_.x_middle();
721
0
    }
722
    // Sort by decreasing y.
723
0
    return mid_y2 - mid_y1;
724
0
  }
725
726
  // Sets the column bounds. Primarily used in testing.
727
0
  void set_first_column(int column) {
728
0
    first_column_ = column;
729
0
  }
730
0
  void set_last_column(int column) {
731
0
    last_column_ = column;
732
0
  }
733
734
private:
735
  // Cleans up the partners above if upper is true, else below.
736
  // If get_desperate is true, goes to more desperate merge methods
737
  // to merge flowing text before breaking partnerships.
738
  void RefinePartnersInternal(bool upper, bool get_desperate,
739
                              ColPartitionGrid *grid);
740
  // Restricts the partners to only desirable types. For text and BRT_HLINE this
741
  // means the same type_ , and for image types it means any image type.
742
  void RefinePartnersByType(bool upper, ColPartition_CLIST *partners);
743
  // Remove transitive partnerships: this<->a, and a<->b and this<->b.
744
  // Gets rid of this<->b, leaving a clean chain.
745
  // Also if we have this<->a and a<->this, then gets rid of this<->a, as
746
  // this has multiple partners.
747
  void RefinePartnerShortcuts(bool upper, ColPartition_CLIST *partners);
748
  // If multiple text partners can be merged, then do so.
749
  // If desperate is true, then an increase in overlap with the merge is
750
  // allowed. If the overlap increases, then the desperately_merged_ flag
751
  // is set, indicating that the textlines probably need to be regenerated
752
  // by aggressive line fitting/splitting, as there are probably vertically
753
  // joined blobs that cross textlines.
754
  void RefineTextPartnersByMerge(bool upper, bool desperate,
755
                                 ColPartition_CLIST *partners,
756
                                 ColPartitionGrid *grid);
757
  // Keep the partner with the biggest overlap.
758
  void RefinePartnersByOverlap(bool upper, ColPartition_CLIST *partners);
759
760
  // Return true if bbox belongs better in this than other.
761
  bool ThisPartitionBetter(BLOBNBOX *bbox, const ColPartition &other);
762
763
  // Smoothes the spacings in the list into groups of equal linespacing.
764
  // resolution is the resolution of the original image, used as a basis
765
  // for thresholds in change of spacing. page_height is in pixels.
766
  static void SmoothSpacings(int resolution, int page_height,
767
                             ColPartition_LIST *parts);
768
769
  // Returns true if the parts array of pointers to partitions matches the
770
  // condition for a spacing blip. See SmoothSpacings for what this means
771
  // and how it is used.
772
  static bool OKSpacingBlip(int resolution, int median_spacing,
773
                            ColPartition **parts, int offset);
774
775
  // Returns true if both the top and bottom spacings of this match the given
776
  // spacing to within suitable margins dictated by the image resolution.
777
  bool SpacingEqual(int spacing, int resolution) const;
778
779
  // Returns true if both the top and bottom spacings of this and other
780
  // match to within suitable margins dictated by the image resolution.
781
  bool SpacingsEqual(const ColPartition &other, int resolution) const;
782
783
  // Returns true if the sum spacing of this and other match the given
784
  // spacing (or twice the given spacing) to within a suitable margin dictated
785
  // by the image resolution.
786
  bool SummedSpacingOK(const ColPartition &other, int spacing,
787
                       int resolution) const;
788
789
  // Returns a suitable spacing margin that can be applied to bottoms of
790
  // text lines, based on the resolution and the stored side_step_.
791
  int BottomSpacingMargin(int resolution) const;
792
793
  // Returns a suitable spacing margin that can be applied to tops of
794
  // text lines, based on the resolution and the stored side_step_.
795
  int TopSpacingMargin(int resolution) const;
796
797
  // Returns true if the median text sizes of this and other agree to within
798
  // a reasonable multiplicative factor.
799
  bool SizesSimilar(const ColPartition &other) const;
800
801
  // Computes and returns in start, end a line segment formed from a
802
  // forwards-iterated group of left edges of partitions that satisfy the
803
  // condition that the rightmost left margin is to the left of the
804
  // leftmost left bounding box edge.
805
  // TODO(rays) Not good enough. Needs improving to tightly wrap text in both
806
  // directions, and to loosely wrap images.
807
  static void LeftEdgeRun(ColPartition_IT *part_it, ICOORD *start, ICOORD *end);
808
  // Computes and returns in start, end a line segment formed from a
809
  // backwards-iterated group of right edges of partitions that satisfy the
810
  // condition that the leftmost right margin is to the right of the
811
  // rightmost right bounding box edge.
812
  // TODO(rays) Not good enough. Needs improving to tightly wrap text in both
813
  // directions, and to loosely wrap images.
814
  static void RightEdgeRun(ColPartition_IT *part_it, ICOORD *start,
815
                           ICOORD *end);
816
817
  // The margins are determined by the position of the nearest vertically
818
  // overlapping neighbour to the side. They indicate the maximum extent
819
  // that the block/column may be extended without touching something else.
820
  // Leftmost coordinate that the region may occupy over the y limits.
821
  int left_margin_ = 0;
822
  // Rightmost coordinate that the region may occupy over the y limits.
823
  int right_margin_ = 0;
824
  // Bounding box of all blobs in the partition.
825
  TBOX bounding_box_;
826
  // Median top and bottom of blobs in this partition.
827
  int median_bottom_ = 0;
828
  int median_top_ = 0;
829
  // Median height of blobs in this partition.
830
  int median_height_ = 0;
831
  // Median left and right of blobs in this partition.
832
  int median_left_ = 0;
833
  int median_right_ = 0;
834
  // Median width of blobs in this partition.
835
  int median_width_ = 0;
836
  // blob_region_type_ for the blobs in this partition.
837
  BlobRegionType blob_type_ = BRT_UNKNOWN;
838
  BlobTextFlowType flow_ = BTFT_NONE; // Quality of text flow.
839
  // Total of GoodTextBlob results for all blobs in the partition.
840
  int good_blob_score_ = 0;
841
  // True if this partition has a common width.
842
  bool good_width_ = false;
843
  // True if this is a good column candidate.
844
  bool good_column_ = false;
845
  // True if the left_key_ is from a tab vector.
846
  bool left_key_tab_ = false;
847
  // True if the right_key_ is from a tab vector.
848
  bool right_key_tab_ = false;
849
  // Left and right sort keys for the edges of the partition.
850
  // If the respective *_key_tab_ is true then this key came from a tab vector.
851
  // If not, then the class promises to keep the key equal to the sort key
852
  // for the respective edge of the bounding box at the MidY, so that
853
  // LeftAtY and RightAtY always returns an x coordinate on the line parallel
854
  // to vertical_ through the bounding box edge at MidY.
855
  int left_key_ = 0;
856
  int right_key_ = 0;
857
  // Type of this partition after looking at its relation to the columns.
858
  PolyBlockType type_ = PT_UNKNOWN;
859
  // The global vertical skew direction.
860
  ICOORD vertical_;
861
  // All boxes in the partition stored in increasing left edge coordinate.
862
  BLOBNBOX_CLIST boxes_;
863
  // The partitions above that matched this.
864
  ColPartition_CLIST upper_partners_;
865
  // The partitions below that matched this.
866
  ColPartition_CLIST lower_partners_;
867
  // The WorkingPartSet it lives in while blocks are being made.
868
  WorkingPartSet *working_set_ = nullptr;
869
  // Column_set_ is the column layout applicable to this ColPartition.
870
  ColPartitionSet *column_set_ = nullptr;
871
  // Flag is true when AddBox is sorting vertically, false otherwise.
872
  bool last_add_was_vertical_ = false;
873
  // True when the partition's ownership has been taken from the grid and
874
  // placed in a working set, or, after that, in the good_parts_ list.
875
  bool block_owned_ = false;
876
  // Flag to indicate that this partition was subjected to a desperate merge,
877
  // and therefore the textlines need rebuilding.
878
  bool desperately_merged_ = false;
879
  bool owns_blobs_ = true; // Does the partition own its blobs?
880
  // The first and last column that this partition applies to.
881
  // Flowing partitions (see type_) will have an equal first and last value
882
  // of the form 2n + 1, where n is the zero-based index into the partitions
883
  // in column_set_. (See ColPartitionSet::GetColumnByIndex).
884
  // Heading partitions will have unequal values of the same form.
885
  // Pullout partitions will have equal values, but may have even values,
886
  // indicating placement between columns.
887
  int first_column_ = -1;
888
  int last_column_ = -1;
889
  // Linespacing data.
890
  int side_step_ = 0;      // Median y-shift to next blob on same line.
891
  int top_spacing_ = 0;    // Line spacing from median_top_.
892
  int bottom_spacing_ = 0; // Line spacing from median_bottom_.
893
894
  // Nearest neighbor above with major x-overlap
895
  ColPartition *nearest_neighbor_above_ = nullptr;
896
  // Nearest neighbor below with major x-overlap
897
  ColPartition *nearest_neighbor_below_ = nullptr;
898
  int space_above_ = 0;    // Distance from nearest_neighbor_above
899
  int space_below_ = 0;    // Distance from nearest_neighbor_below
900
  int space_to_left_ = 0;  // Distance from the left edge of the column
901
  int space_to_right_ = 0; // Distance from the right edge of the column
902
  // Color foreground/background data.
903
  uint8_t color1_[kRGBRMSColors];
904
  uint8_t color2_[kRGBRMSColors];
905
  // The density of special blobs.
906
  float special_blobs_densities_[BSTT_COUNT];
907
  // Type of this partition before considering it as a table cell. This is
908
  // used to revert the type if a partition is first marked as a table cell but
909
  // later filtering steps decide it does not belong to a table
910
  PolyBlockType type_before_table_ = PT_UNKNOWN;
911
  // Check whether the current partition has been assigned to a table column.
912
  bool inside_table_column_ = false;
913
};
914
915
// Typedef it now in case it becomes a class later.
916
using ColPartitionGridSearch =
917
    GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>;
918
919
} // namespace tesseract.
920
921
#endif // TESSERACT_TEXTORD_COLPARTITION_H_