Coverage Report

Created: 2025-07-23 07:12

/src/tesseract/src/ccstruct/blobbox.h
Line
Count
Source (jump to first uncovered line)
1
/**********************************************************************
2
 * File:        blobbox.h  (Formerly blobnbox.h)
3
 * Description: Code for the textord blob class.
4
 * Author:      Ray Smith
5
 *
6
 * (C) Copyright 1992, Hewlett-Packard Ltd.
7
 ** Licensed under the Apache License, Version 2.0 (the "License");
8
 ** you may not use this file except in compliance with the License.
9
 ** You may obtain a copy of the License at
10
 ** http://www.apache.org/licenses/LICENSE-2.0
11
 ** Unless required by applicable law or agreed to in writing, software
12
 ** distributed under the License is distributed on an "AS IS" BASIS,
13
 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 ** See the License for the specific language governing permissions and
15
 ** limitations under the License.
16
 *
17
 **********************************************************************/
18
19
#ifndef BLOBBOX_H
20
#define BLOBBOX_H
21
22
#include "elst.h"       // for ELIST_ITERATOR, ELISTIZEH, ELIST_LINK
23
#include "elst2.h"      // for ELIST2_ITERATOR, ELIST2IZEH, ELIST2_LINK
24
#include "errcode.h"    // for ASSERT_HOST
25
#include "ocrblock.h"   // for BLOCK
26
#include "params.h"     // for DoubleParam, double_VAR_H
27
#include "pdblock.h"    // for PDBLK
28
#include "points.h"     // for FCOORD, ICOORD, ICOORDELT_LIST
29
#include "quspline.h"   // for QSPLINE
30
#include "rect.h"       // for TBOX
31
#include "scrollview.h" // for ScrollView, ScrollView::Color
32
#include "statistc.h"   // for STATS
33
#include "stepblob.h"   // for C_BLOB
34
#include "tprintf.h"    // for tprintf
35
#include "werd.h"       // for WERD_LIST
36
37
#include <cinttypes> // for PRId32
38
#include <cmath>     // for std::sqrt
39
#include <cstdint>   // for int16_t, int32_t
40
41
struct Pix;
42
43
namespace tesseract {
44
45
class C_OUTLINE;
46
47
enum PITCH_TYPE {
48
  PITCH_DUNNO,       // insufficient data
49
  PITCH_DEF_FIXED,   // definitely fixed
50
  PITCH_MAYBE_FIXED, // could be
51
  PITCH_DEF_PROP,
52
  PITCH_MAYBE_PROP,
53
  PITCH_CORR_FIXED,
54
  PITCH_CORR_PROP
55
};
56
57
// The possible tab-stop types of each side of a BLOBNBOX.
58
// The ordering is important, as it is used for deleting dead-ends in the
59
// search. ALIGNED, CONFIRMED and VLINE should remain greater than the
60
// non-aligned, unset, or deleted members.
61
enum TabType {
62
  TT_NONE,          // Not a tab.
63
  TT_DELETED,       // Not a tab after detailed analysis.
64
  TT_MAYBE_RAGGED,  // Initial designation of a tab-stop candidate.
65
  TT_MAYBE_ALIGNED, // Initial designation of a tab-stop candidate.
66
  TT_CONFIRMED,     // Aligned with neighbours.
67
  TT_VLINE          // Detected as a vertical line.
68
};
69
70
// The possible region types of a BLOBNBOX.
71
// Note: keep all the text types > BRT_UNKNOWN and all the image types less.
72
// Keep in sync with kBlobTypes in colpartition.cpp and BoxColor, and the
73
// *Type static functions below.
74
enum BlobRegionType {
75
  BRT_NOISE,     // Neither text nor image.
76
  BRT_HLINE,     // Horizontal separator line.
77
  BRT_VLINE,     // Vertical separator line.
78
  BRT_RECTIMAGE, // Rectangular image.
79
  BRT_POLYIMAGE, // Non-rectangular image.
80
  BRT_UNKNOWN,   // Not determined yet.
81
  BRT_VERT_TEXT, // Vertical alignment, not necessarily vertically oriented.
82
  BRT_TEXT,      // Convincing text.
83
84
  BRT_COUNT // Number of possibilities.
85
};
86
87
// enum for elements of arrays that refer to neighbours.
88
// NOTE: keep in this order, so ^2 can be used to flip direction.
89
enum BlobNeighbourDir { BND_LEFT, BND_BELOW, BND_RIGHT, BND_ABOVE, BND_COUNT };
90
91
// enum for special type of text characters, such as math symbol or italic.
92
enum BlobSpecialTextType {
93
  BSTT_NONE,    // No special.
94
  BSTT_ITALIC,  // Italic style.
95
  BSTT_DIGIT,   // Digit symbols.
96
  BSTT_MATH,    // Mathematical symbols (not including digit).
97
  BSTT_UNCLEAR, // Characters with low recognition rate.
98
  BSTT_SKIP,    // Characters that we skip labeling (usually too small).
99
  BSTT_COUNT
100
};
101
102
0
inline BlobNeighbourDir DirOtherWay(BlobNeighbourDir dir) {
103
0
  return static_cast<BlobNeighbourDir>(dir ^ 2);
104
0
}
105
106
// BlobTextFlowType indicates the quality of neighbouring information
107
// related to a chain of connected components, either horizontally or
108
// vertically. Also used by ColPartition for the collection of blobs
109
// within, which should all have the same value in most cases.
110
enum BlobTextFlowType {
111
  BTFT_NONE,          // No text flow set yet.
112
  BTFT_NONTEXT,       // Flow too poor to be likely text.
113
  BTFT_NEIGHBOURS,    // Neighbours support flow in this direction.
114
  BTFT_CHAIN,         // There is a weak chain of text in this direction.
115
  BTFT_STRONG_CHAIN,  // There is a strong chain of text in this direction.
116
  BTFT_TEXT_ON_IMAGE, // There is a strong chain of text on an image.
117
  BTFT_LEADER,        // Leader dots/dashes etc.
118
  BTFT_COUNT
119
};
120
121
// Returns true if type1 dominates type2 in a merge. Mostly determined by the
122
// ordering of the enum, LEADER is weak and dominates nothing.
123
// The function is anti-symmetric (t1 > t2) === !(t2 > t1), except that
124
// this cannot be true if t1 == t2, so the result is undefined.
125
0
inline bool DominatesInMerge(BlobTextFlowType type1, BlobTextFlowType type2) {
126
  // LEADER always loses.
127
0
  if (type1 == BTFT_LEADER) {
128
0
    return false;
129
0
  }
130
0
  if (type2 == BTFT_LEADER) {
131
0
    return true;
132
0
  }
133
  // With those out of the way, the ordering of the enum determines the result.
134
0
  return type1 >= type2;
135
0
}
136
137
class ColPartition;
138
139
class BLOBNBOX;
140
ELISTIZEH(BLOBNBOX)
141
class BLOBNBOX : public ELIST<BLOBNBOX>::LINK {
142
public:
143
234k
  BLOBNBOX() {
144
234k
    ReInit();
145
234k
  }
146
2.73M
  explicit BLOBNBOX(C_BLOB *srcblob) {
147
2.73M
    box = srcblob->bounding_box();
148
2.73M
    ReInit();
149
2.73M
    cblob_ptr = srcblob;
150
2.73M
    area = static_cast<int>(srcblob->area());
151
2.73M
  }
152
2.97M
  ~BLOBNBOX() {
153
2.97M
    if (owns_cblob_) {
154
14.5k
      delete cblob_ptr;
155
14.5k
    }
156
2.97M
  }
157
158
86.4k
  static void clear_blobnboxes(BLOBNBOX_LIST *boxes) {
159
86.4k
    BLOBNBOX_IT it = boxes;
160
    // A BLOBNBOX generally doesn't own its blobs, so if they do, you
161
    // have to delete them explicitly.
162
340k
    for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
163
253k
      BLOBNBOX *box = it.data();
164
      // TODO: remove next line, currently still needed for resultiterator_test.
165
253k
      delete box->remove_cblob();
166
253k
    }
167
86.4k
  }
168
169
0
  static BLOBNBOX *RealBlob(C_OUTLINE *outline) {
170
0
    auto *blob = new C_BLOB(outline);
171
0
    return new BLOBNBOX(blob);
172
0
  }
173
174
  // Rotates the box and the underlying blob.
175
  void rotate(FCOORD rotation);
176
177
  // Methods that act on the box without touching the underlying blob.
178
  // Reflect the box in the y-axis, leaving the underlying blob untouched.
179
  void reflect_box_in_y_axis();
180
  // Rotates the box by the angle given by rotation.
181
  // If the blob is a diacritic, then only small rotations for skew
182
  // correction can be applied.
183
  void rotate_box(FCOORD rotation);
184
  // Moves just the box by the given vector.
185
0
  void translate_box(ICOORD v) {
186
0
    if (IsDiacritic()) {
187
0
      box.move(v);
188
0
      base_char_top_ += v.y();
189
0
      base_char_bottom_ += v.y();
190
0
    } else {
191
0
      box.move(v);
192
0
      set_diacritic_box(box);
193
0
    }
194
0
  }
195
  void merge(BLOBNBOX *nextblob);
196
  void really_merge(BLOBNBOX *other);
197
  void chop(                 // fake chop blob
198
      BLOBNBOX_IT *start_it, // location of this
199
      BLOBNBOX_IT *blob_it,  // iterator
200
      FCOORD rotation,       // for landscape
201
      float xheight);        // line height
202
203
  void NeighbourGaps(int gaps[BND_COUNT]) const;
204
  void MinMaxGapsClipped(int *h_min, int *h_max, int *v_min, int *v_max) const;
205
  void CleanNeighbours();
206
  // Returns positive if there is at least one side neighbour that has a
207
  // similar stroke width and is not on the other side of a rule line.
208
  int GoodTextBlob() const;
209
  // Returns the number of side neighbours that are of type BRT_NOISE.
210
  int NoisyNeighbours() const;
211
212
  // Returns true if the blob is noise and has no owner.
213
0
  bool DeletableNoise() const {
214
0
    return owner() == nullptr && region_type() == BRT_NOISE;
215
0
  }
216
217
  // Returns true, and sets vert_possible/horz_possible if the blob has some
218
  // feature that makes it individually appear to flow one way.
219
  // eg if it has a high aspect ratio, yet has a complex shape, such as a
220
  // joined word in Latin, Arabic, or Hindi, rather than being a -, I, l, 1.
221
  bool DefiniteIndividualFlow();
222
223
  // Returns true if there is no tabstop violation in merging this and other.
224
  bool ConfirmNoTabViolation(const BLOBNBOX &other) const;
225
226
  // Returns true if other has a similar stroke width to this.
227
  bool MatchingStrokeWidth(const BLOBNBOX &other, double fractional_tolerance,
228
                           double constant_tolerance) const;
229
230
  // Returns a bounding box of the outline contained within the
231
  // given horizontal range.
232
  TBOX BoundsWithinLimits(int left, int right);
233
234
  // Estimates and stores the baseline position based on the shape of the
235
  // outline.
236
  void EstimateBaselinePosition();
237
238
  // Simple accessors.
239
279M
  const TBOX &bounding_box() const {
240
279M
    return box;
241
279M
  }
242
  // Set the bounding box. Use with caution.
243
  // Normally use compute_bounding_box instead.
244
0
  void set_bounding_box(const TBOX &new_box) {
245
0
    box = new_box;
246
0
    base_char_top_ = box.top();
247
0
    base_char_bottom_ = box.bottom();
248
0
  }
249
0
  void compute_bounding_box() {
250
0
    box = cblob_ptr->bounding_box();
251
0
    base_char_top_ = box.top();
252
0
    base_char_bottom_ = box.bottom();
253
0
    baseline_y_ = box.bottom();
254
0
  }
255
5.30M
  const TBOX &reduced_box() const {
256
5.30M
    return red_box;
257
5.30M
  }
258
1.24M
  void set_reduced_box(TBOX new_box) {
259
1.24M
    red_box = new_box;
260
1.24M
    reduced = true;
261
1.24M
  }
262
1.12M
  int32_t enclosed_area() const {
263
1.12M
    return area;
264
1.12M
  }
265
49.3M
  bool joined_to_prev() const {
266
49.3M
    return joined;
267
49.3M
  }
268
6.55M
  bool red_box_set() const {
269
6.55M
    return reduced;
270
6.55M
  }
271
0
  int repeated_set() const {
272
0
    return repeated_set_;
273
0
  }
274
2.66M
  void set_repeated_set(int set_id) {
275
2.66M
    repeated_set_ = set_id;
276
2.66M
  }
277
53.6M
  C_BLOB *cblob() const {
278
53.6M
    return cblob_ptr;
279
53.6M
  }
280
1.48M
  C_BLOB *remove_cblob() {
281
1.48M
    auto blob = cblob_ptr;
282
1.48M
    cblob_ptr = nullptr;
283
1.48M
    owns_cblob_ = false;
284
1.48M
    return blob;
285
1.48M
  }
286
0
  TabType left_tab_type() const {
287
0
    return left_tab_type_;
288
0
  }
289
0
  void set_left_tab_type(TabType new_type) {
290
0
    left_tab_type_ = new_type;
291
0
  }
292
0
  TabType right_tab_type() const {
293
0
    return right_tab_type_;
294
0
  }
295
0
  void set_right_tab_type(TabType new_type) {
296
0
    right_tab_type_ = new_type;
297
0
  }
298
0
  BlobRegionType region_type() const {
299
0
    return region_type_;
300
0
  }
301
0
  void set_region_type(BlobRegionType new_type) {
302
0
    region_type_ = new_type;
303
0
  }
304
0
  BlobSpecialTextType special_text_type() const {
305
0
    return spt_type_;
306
0
  }
307
0
  void set_special_text_type(BlobSpecialTextType new_type) {
308
0
    spt_type_ = new_type;
309
0
  }
310
2.66M
  BlobTextFlowType flow() const {
311
2.66M
    return flow_;
312
2.66M
  }
313
0
  void set_flow(BlobTextFlowType value) {
314
0
    flow_ = value;
315
0
  }
316
0
  bool vert_possible() const {
317
0
    return vert_possible_;
318
0
  }
319
0
  void set_vert_possible(bool value) {
320
0
    vert_possible_ = value;
321
0
  }
322
0
  bool horz_possible() const {
323
0
    return horz_possible_;
324
0
  }
325
0
  void set_horz_possible(bool value) {
326
0
    horz_possible_ = value;
327
0
  }
328
0
  int left_rule() const {
329
0
    return left_rule_;
330
0
  }
331
0
  void set_left_rule(int new_left) {
332
0
    left_rule_ = new_left;
333
0
  }
334
0
  int right_rule() const {
335
0
    return right_rule_;
336
0
  }
337
0
  void set_right_rule(int new_right) {
338
0
    right_rule_ = new_right;
339
0
  }
340
0
  int left_crossing_rule() const {
341
0
    return left_crossing_rule_;
342
0
  }
343
0
  void set_left_crossing_rule(int new_left) {
344
0
    left_crossing_rule_ = new_left;
345
0
  }
346
0
  int right_crossing_rule() const {
347
0
    return right_crossing_rule_;
348
0
  }
349
0
  void set_right_crossing_rule(int new_right) {
350
0
    right_crossing_rule_ = new_right;
351
0
  }
352
0
  float horz_stroke_width() const {
353
0
    return horz_stroke_width_;
354
0
  }
355
2.70M
  void set_horz_stroke_width(float width) {
356
2.70M
    horz_stroke_width_ = width;
357
2.70M
  }
358
0
  float vert_stroke_width() const {
359
0
    return vert_stroke_width_;
360
0
  }
361
2.70M
  void set_vert_stroke_width(float width) {
362
2.70M
    vert_stroke_width_ = width;
363
2.70M
  }
364
0
  float area_stroke_width() const {
365
0
    return area_stroke_width_;
366
0
  }
367
0
  tesseract::ColPartition *owner() const {
368
0
    return owner_;
369
0
  }
370
0
  void set_owner(tesseract::ColPartition *new_owner) {
371
0
    owner_ = new_owner;
372
0
  }
373
0
  bool leader_on_left() const {
374
0
    return leader_on_left_;
375
0
  }
376
0
  void set_leader_on_left(bool flag) {
377
0
    leader_on_left_ = flag;
378
0
  }
379
0
  bool leader_on_right() const {
380
0
    return leader_on_right_;
381
0
  }
382
0
  void set_leader_on_right(bool flag) {
383
0
    leader_on_right_ = flag;
384
0
  }
385
0
  BLOBNBOX *neighbour(BlobNeighbourDir n) const {
386
0
    return neighbours_[n];
387
0
  }
388
0
  bool good_stroke_neighbour(BlobNeighbourDir n) const {
389
0
    return good_stroke_neighbours_[n];
390
0
  }
391
0
  void set_neighbour(BlobNeighbourDir n, BLOBNBOX *neighbour, bool good) {
392
0
    neighbours_[n] = neighbour;
393
0
    good_stroke_neighbours_[n] = good;
394
0
  }
395
0
  bool IsDiacritic() const {
396
0
    return base_char_top_ != box.top() || base_char_bottom_ != box.bottom();
397
0
  }
398
0
  int base_char_top() const {
399
0
    return base_char_top_;
400
0
  }
401
0
  int base_char_bottom() const {
402
0
    return base_char_bottom_;
403
0
  }
404
7.38M
  int baseline_position() const {
405
7.38M
    return baseline_y_;
406
7.38M
  }
407
0
  int line_crossings() const {
408
0
    return line_crossings_;
409
0
  }
410
0
  void set_line_crossings(int value) {
411
0
    line_crossings_ = value;
412
0
  }
413
1.16M
  void set_diacritic_box(const TBOX &diacritic_box) {
414
1.16M
    base_char_top_ = diacritic_box.top();
415
1.16M
    base_char_bottom_ = diacritic_box.bottom();
416
1.16M
  }
417
0
  BLOBNBOX *base_char_blob() const {
418
0
    return base_char_blob_;
419
0
  }
420
0
  void set_base_char_blob(BLOBNBOX *blob) {
421
0
    base_char_blob_ = blob;
422
0
  }
423
3.94M
  void set_owns_cblob(bool value) {
424
3.94M
    owns_cblob_ = value;
425
3.94M
  }
426
427
0
  bool UniquelyVertical() const {
428
0
    return vert_possible_ && !horz_possible_;
429
0
  }
430
0
  bool UniquelyHorizontal() const {
431
0
    return horz_possible_ && !vert_possible_;
432
0
  }
433
434
  // Returns true if the region type is text.
435
0
  static bool IsTextType(BlobRegionType type) {
436
0
    return type == BRT_TEXT || type == BRT_VERT_TEXT;
437
0
  }
438
  // Returns true if the region type is image.
439
0
  static bool IsImageType(BlobRegionType type) {
440
0
    return type == BRT_RECTIMAGE || type == BRT_POLYIMAGE;
441
0
  }
442
  // Returns true if the region type is line.
443
0
  static bool IsLineType(BlobRegionType type) {
444
0
    return type == BRT_HLINE || type == BRT_VLINE;
445
0
  }
446
  // Returns true if the region type cannot be merged.
447
0
  static bool UnMergeableType(BlobRegionType type) {
448
0
    return IsLineType(type) || IsImageType(type);
449
0
  }
450
  // Helper to call CleanNeighbours on all blobs on the list.
451
  static void CleanNeighbours(BLOBNBOX_LIST *blobs);
452
  // Helper to delete all the deletable blobs on the list.
453
  static void DeleteNoiseBlobs(BLOBNBOX_LIST *blobs);
454
  // Helper to compute edge offsets for  all the blobs on the list.
455
  // See coutln.h for an explanation of edge offsets.
456
  static void ComputeEdgeOffsets(Image thresholds, Image grey, BLOBNBOX_LIST *blobs);
457
458
#ifndef GRAPHICS_DISABLED
459
  // Helper to draw all the blobs on the list in the given body_colour,
460
  // with child outlines in the child_colour.
461
  static void PlotBlobs(BLOBNBOX_LIST *list, ScrollView::Color body_colour,
462
                        ScrollView::Color child_colour, ScrollView *win);
463
  // Helper to draw only DeletableNoise blobs (unowned, BRT_NOISE) on the
464
  // given list in the given body_colour, with child outlines in the
465
  // child_colour.
466
  static void PlotNoiseBlobs(BLOBNBOX_LIST *list, ScrollView::Color body_colour,
467
                             ScrollView::Color child_colour, ScrollView *win);
468
469
  static ScrollView::Color TextlineColor(BlobRegionType region_type, BlobTextFlowType flow_type);
470
471
  // Keep in sync with BlobRegionType.
472
  ScrollView::Color BoxColor() const;
473
474
  void plot(ScrollView *window,              // window to draw in
475
            ScrollView::Color blob_colour,   // for outer bits
476
            ScrollView::Color child_colour); // for holes
477
#endif
478
479
  // Initializes members set by StrokeWidth and beyond, without discarding
480
  // stored area and strokewidth values, which are expensive to calculate.
481
2.97M
  void ReInit() {
482
2.97M
    joined = false;
483
2.97M
    reduced = false;
484
2.97M
    repeated_set_ = 0;
485
2.97M
    left_tab_type_ = TT_NONE;
486
2.97M
    right_tab_type_ = TT_NONE;
487
2.97M
    region_type_ = BRT_UNKNOWN;
488
2.97M
    flow_ = BTFT_NONE;
489
2.97M
    spt_type_ = BSTT_SKIP;
490
2.97M
    left_rule_ = 0;
491
2.97M
    right_rule_ = 0;
492
2.97M
    left_crossing_rule_ = 0;
493
2.97M
    right_crossing_rule_ = 0;
494
2.97M
    if (area_stroke_width_ == 0.0f && area > 0 && cblob() != nullptr && cblob()->perimeter() != 0) {
495
0
      area_stroke_width_ = 2.0f * area / cblob()->perimeter();
496
0
    }
497
2.97M
    owner_ = nullptr;
498
2.97M
    base_char_top_ = box.top();
499
2.97M
    base_char_bottom_ = box.bottom();
500
2.97M
    baseline_y_ = box.bottom();
501
2.97M
    line_crossings_ = 0;
502
2.97M
    base_char_blob_ = nullptr;
503
2.97M
    horz_possible_ = false;
504
2.97M
    vert_possible_ = false;
505
2.97M
    leader_on_left_ = false;
506
2.97M
    leader_on_right_ = false;
507
2.97M
    ClearNeighbours();
508
2.97M
  }
509
510
2.97M
  void ClearNeighbours() {
511
14.8M
    for (int n = 0; n < BND_COUNT; ++n) {
512
11.8M
      neighbours_[n] = nullptr;
513
11.8M
      good_stroke_neighbours_[n] = false;
514
11.8M
    }
515
2.97M
  }
516
517
private:
518
  C_BLOB *cblob_ptr = nullptr;               // edgestep blob
519
  TBOX box;                                  // bounding box
520
  TBOX red_box;                              // bounding box
521
  int32_t area = 0;                          // enclosed area
522
  int32_t repeated_set_ = 0;                 // id of the set of repeated blobs
523
  TabType left_tab_type_ = TT_NONE;          // Indicates tab-stop assessment
524
  TabType right_tab_type_ = TT_NONE;         // Indicates tab-stop assessment
525
  BlobRegionType region_type_ = BRT_UNKNOWN; // Type of region this blob belongs to
526
  BlobTextFlowType flow_ = BTFT_NONE;        // Quality of text flow.
527
  BlobSpecialTextType spt_type_;             // Special text type.
528
  bool joined = false;                       // joined to prev
529
  bool reduced = false;                      // reduced box set
530
  int16_t left_rule_ = 0;                    // x-coord of nearest but not crossing rule line
531
  int16_t right_rule_ = 0;                   // x-coord of nearest but not crossing rule line
532
  int16_t left_crossing_rule_;               // x-coord of nearest or crossing rule line
533
  int16_t right_crossing_rule_;              // x-coord of nearest or crossing rule line
534
  int16_t base_char_top_;                    // y-coord of top/bottom of diacritic base,
535
  int16_t base_char_bottom_;                 // if it exists else top/bottom of this blob.
536
  int16_t baseline_y_;                       // Estimate of baseline position.
537
  int32_t line_crossings_;                   // Number of line intersections touched.
538
  BLOBNBOX *base_char_blob_;                 // The blob that was the base char.
539
  tesseract::ColPartition *owner_;           // Who will delete me when I am not needed
540
  BLOBNBOX *neighbours_[BND_COUNT];
541
  float horz_stroke_width_ = 0.0f; // Median horizontal stroke width
542
  float vert_stroke_width_ = 0.0f; // Median vertical stroke width
543
  float area_stroke_width_ = 0.0f; // Stroke width from area/perimeter ratio.
544
  bool good_stroke_neighbours_[BND_COUNT];
545
  bool horz_possible_;   // Could be part of horizontal flow.
546
  bool vert_possible_;   // Could be part of vertical flow.
547
  bool leader_on_left_;  // There is a leader to the left.
548
  bool leader_on_right_; // There is a leader to the right.
549
  // Iff true, then the destructor should delete the cblob_ptr.
550
  // TODO(rays) migrate all uses to correctly setting this flag instead of
551
  // deleting the C_BLOB before deleting the BLOBNBOX.
552
  bool owns_cblob_ = false;
553
};
554
555
class TO_ROW : public ELIST2<TO_ROW>::LINK {
556
public:
557
  static const int kErrorWeight = 3;
558
559
0
  TO_ROW() {
560
0
    clear();
561
0
  }                   // empty
562
  TO_ROW(             // constructor
563
      BLOBNBOX *blob, // from first blob
564
      float top,      // of row //target height
565
      float bottom, float row_size);
566
567
  void print() const;
568
48.7M
  float max_y() const { // access function
569
48.7M
    return y_max;
570
48.7M
  }
571
137M
  float min_y() const {
572
137M
    return y_min;
573
137M
  }
574
0
  float mean_y() const {
575
0
    return (y_min + y_max) / 2.0f;
576
0
  }
577
3.23M
  float initial_min_y() const {
578
3.23M
    return initial_y_min;
579
3.23M
  }
580
2.52M
  float line_m() const { // access to line fit
581
2.52M
    return m;
582
2.52M
  }
583
1.43M
  float line_c() const {
584
1.43M
    return c;
585
1.43M
  }
586
894k
  float line_error() const {
587
894k
    return error;
588
894k
  }
589
2.39M
  float parallel_c() const {
590
2.39M
    return para_c;
591
2.39M
  }
592
0
  float parallel_error() const {
593
0
    return para_error;
594
0
  }
595
6.88k
  float believability() const { // baseline goodness
596
6.88k
    return credibility;
597
6.88k
  }
598
1.31M
  float intercept() const { // real parallel_c
599
1.31M
    return y_origin;
600
1.31M
  }
601
  void add_blob(      // put in row
602
      BLOBNBOX *blob, // blob to add
603
      float top,      // of row //target height
604
      float bottom, float row_size);
605
  void insert_blob( // put in row in order
606
      BLOBNBOX *blob);
607
608
17.7M
  BLOBNBOX_LIST *blob_list() { // get list
609
17.7M
    return &blobs;
610
17.7M
  }
611
612
  void set_line(   // set line spec
613
      float new_m, // line to set
614
726k
      float new_c, float new_error) {
615
726k
    m = new_m;
616
726k
    c = new_c;
617
726k
    error = new_error;
618
726k
  }
619
  void set_parallel_line( // set fixed gradient line
620
      float gradient,     // page gradient
621
513k
      float new_c, float new_error) {
622
513k
    para_c = new_c;
623
513k
    para_error = new_error;
624
513k
    credibility = blobs.length() - kErrorWeight * new_error;
625
513k
    y_origin = new_c / std::sqrt(1 + gradient * gradient);
626
    // real intercept
627
513k
  }
628
  void set_limits(     // set min,max
629
      float new_min,   // bottom and
630
241k
      float new_max) { // top of row
631
241k
    y_min = new_min;
632
241k
    y_max = new_max;
633
241k
  }
634
  void compute_vertical_projection();
635
  // get projection
636
637
856k
  bool rep_chars_marked() const {
638
856k
    return num_repeated_sets_ != -1;
639
856k
  }
640
0
  void clear_rep_chars_marked() {
641
0
    num_repeated_sets_ = -1;
642
0
  }
643
544k
  int num_repeated_sets() const {
644
544k
    return num_repeated_sets_;
645
544k
  }
646
185k
  void set_num_repeated_sets(int num_sets) {
647
185k
    num_repeated_sets_ = num_sets;
648
185k
  }
649
650
  // true when dead
651
  bool merged = false;
652
  bool all_caps;             // had no ascenders
653
  bool used_dm_model;        // in guessing pitch
654
  int16_t projection_left;   // start of projection
655
  int16_t projection_right;  // start of projection
656
  PITCH_TYPE pitch_decision; // how strong is decision
657
  float fixed_pitch;         // pitch or 0
658
  float fp_space;            // sp if fixed pitch
659
  float fp_nonsp;            // nonsp if fixed pitch
660
  float pr_space;            // sp if prop
661
  float pr_nonsp;            // non sp if prop
662
  float spacing;             // to "next" row
663
  float xheight;             // of line
664
  int xheight_evidence;      // number of blobs of height xheight
665
  float ascrise;             // ascenders
666
  float descdrop;            // descenders
667
  float body_size;           // of CJK characters.  Assumed to be
668
                             // xheight+ascrise for non-CJK text.
669
  int32_t min_space;         // min size for real space
670
  int32_t max_nonspace;      // max size of non-space
671
  int32_t space_threshold;   // space vs nonspace
672
  float kern_size;           // average non-space
673
  float space_size;          // average space
674
  WERD_LIST rep_words;       // repeated chars
675
  ICOORDELT_LIST char_cells; // fixed pitch cells
676
  QSPLINE baseline;          // curved baseline
677
  STATS projection;          // vertical projection
678
679
private:
680
  void clear(); // clear all values to reasonable defaults
681
682
  BLOBNBOX_LIST blobs; // blobs in row
683
  float y_min;         // coords
684
  float y_max;
685
  float initial_y_min;
686
  float m, c;   // line spec
687
  float error;  // line error
688
  float para_c; // constrained fit
689
  float para_error;
690
  float y_origin;         // rotated para_c;
691
  float credibility;      // baseline believability
692
  int num_repeated_sets_; // number of sets of repeated blobs
693
                          // set to -1 if we have not searched
694
                          // for repeated blobs in this row yet
695
};
696
697
ELIST2IZEH(TO_ROW)
698
class TESS_API TO_BLOCK : public ELIST<TO_BLOCK>::LINK {
699
public:
700
0
  TO_BLOCK() : pitch_decision(PITCH_DUNNO) {
701
0
    clear();
702
0
  }                      // empty
703
  TO_BLOCK(              // constructor
704
      BLOCK *src_block); // real block
705
  ~TO_BLOCK();
706
707
  void clear(); // clear all scalar members.
708
709
894k
  TO_ROW_LIST *get_rows() { // access function
710
894k
    return &row_list;
711
894k
  }
712
713
  // Rotate all the blobnbox lists and the underlying block. Then update the
714
  // median size statistic from the blobs list.
715
0
  void rotate(const FCOORD &rotation) {
716
0
    BLOBNBOX_LIST *blobnbox_list[] = {&blobs,       &underlines,  &noise_blobs,
717
0
                                      &small_blobs, &large_blobs, nullptr};
718
0
    for (BLOBNBOX_LIST **list = blobnbox_list; *list != nullptr; ++list) {
719
0
      BLOBNBOX_IT it(*list);
720
0
      for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
721
0
        it.data()->rotate(rotation);
722
0
      }
723
0
    }
724
    // Rotate the block
725
0
    ASSERT_HOST(block->pdblk.poly_block() != nullptr);
726
0
    block->rotate(rotation);
727
    // Update the median size statistic from the blobs list.
728
0
    STATS widths(0, block->pdblk.bounding_box().width() - 1);
729
0
    STATS heights(0, block->pdblk.bounding_box().height() - 1);
730
0
    BLOBNBOX_IT blob_it(&blobs);
731
0
    for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
732
0
      widths.add(blob_it.data()->bounding_box().width(), 1);
733
0
      heights.add(blob_it.data()->bounding_box().height(), 1);
734
0
    }
735
0
    block->set_median_size(static_cast<int>(widths.median() + 0.5),
736
0
                           static_cast<int>(heights.median() + 0.5));
737
0
  }
738
739
0
  void print_rows() { // debug info
740
0
    TO_ROW_IT row_it = &row_list;
741
0
    for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
742
0
      auto row = row_it.data();
743
0
      tprintf("Row range (%g,%g), para_c=%g, blobcount=%" PRId32 "\n",
744
0
              static_cast<double>(row->min_y()),
745
0
              static_cast<double>(row->max_y()),
746
0
              static_cast<double>(row->parallel_c()),
747
0
              row->blob_list()->length());
748
0
    }
749
0
  }
750
751
  // Reorganizes the blob lists with a different definition of small, medium
752
  // and large, compared to the original definition.
753
  // Height is still the primary filter key, but medium width blobs of small
754
  // height become medium, and very wide blobs of small height stay small.
755
  void ReSetAndReFilterBlobs();
756
757
  // Deletes noise blobs from all lists where not owned by a ColPartition.
758
  void DeleteUnownedNoise();
759
760
  // Computes and stores the edge offsets on each blob for use in feature
761
  // extraction, using greyscale if the supplied grey and thresholds pixes
762
  // are 8-bit or otherwise (if nullptr or not 8 bit) the original binary
763
  // edge step outlines.
764
  // Thresholds must either be the same size as grey or an integer down-scale
765
  // of grey.
766
  // See coutln.h for an explanation of edge offsets.
767
  void ComputeEdgeOffsets(Image thresholds, Image grey);
768
769
#ifndef GRAPHICS_DISABLED
770
  // Draw the noise blobs from all lists in red.
771
  void plot_noise_blobs(ScrollView *to_win);
772
  // Draw the blobs on the various lists in the block in different colors.
773
  void plot_graded_blobs(ScrollView *to_win);
774
#endif
775
776
  BLOBNBOX_LIST blobs;       // medium size
777
  BLOBNBOX_LIST underlines;  // underline blobs
778
  BLOBNBOX_LIST noise_blobs; // very small
779
  BLOBNBOX_LIST small_blobs; // fairly small
780
  BLOBNBOX_LIST large_blobs; // big blobs
781
  BLOCK *block;              // real block
782
  PITCH_TYPE pitch_decision; // how strong is decision
783
  float line_spacing;        // estimate
784
  // line_size is a lower-bound estimate of the font size in pixels of
785
  // the text in the block (with ascenders and descenders), being a small
786
  // (1.25) multiple of the median height of filtered blobs.
787
  // In most cases the font size will be bigger, but it will be closer
788
  // if the text is allcaps, or in a no-x-height script.
789
  float line_size;       // estimate
790
  float max_blob_size;   // line assignment limit
791
  float baseline_offset; // phase shift
792
  float xheight;         // median blob size
793
  float fixed_pitch;     // pitch or 0
794
  float kern_size;       // average non-space
795
  float space_size;      // average space
796
  int32_t min_space;     // min definite space
797
  int32_t max_nonspace;  // max definite
798
  float fp_space;        // sp if fixed pitch
799
  float fp_nonsp;        // nonsp if fixed pitch
800
  float pr_space;        // sp if prop
801
  float pr_nonsp;        // non sp if prop
802
  TO_ROW *key_row;       // starting row
803
804
private:
805
  TO_ROW_LIST row_list; // temporary rows
806
};
807
808
ELISTIZEH(TO_BLOCK)
809
void find_cblob_limits( // get y limits
810
    C_BLOB *blob,       // blob to search
811
    float leftx,        // x limits
812
    float rightx,
813
    FCOORD rotation, // for landscape
814
    float &ymin,     // output y limits
815
    float &ymax);
816
void find_cblob_vlimits( // get y limits
817
    C_BLOB *blob,        // blob to search
818
    float leftx,         // x limits
819
    float rightx,
820
    float &ymin, // output y limits
821
    float &ymax);
822
void find_cblob_hlimits( // get x limits
823
    C_BLOB *blob,        // blob to search
824
    float bottomy,       // y limits
825
    float topy,
826
    float &xmin, // output x limits
827
    float &xymax);
828
C_BLOB *crotate_cblob( // rotate it
829
    C_BLOB *blob,      // blob to search
830
    FCOORD rotation    // for landscape
831
);
832
TBOX box_next(      // get bounding box
833
    BLOBNBOX_IT *it // iterator to blobds
834
);
835
TBOX box_next_pre_chopped( // get bounding box
836
    BLOBNBOX_IT *it        // iterator to blobds
837
);
838
void vertical_cblob_projection( // project outlines
839
    C_BLOB *blob,               // blob to project
840
    STATS *stats                // output
841
);
842
void vertical_coutline_projection( // project outlines
843
    C_OUTLINE *outline,            // outline to project
844
    STATS *stats                   // output
845
);
846
#ifndef GRAPHICS_DISABLED
847
void plot_blob_list(ScrollView *win,                 // window to draw in
848
                    BLOBNBOX_LIST *list,             // blob list
849
                    ScrollView::Color body_colour,   // colour to draw
850
                    ScrollView::Color child_colour); // colour of child
851
#endif                                               // !GRAPHICS_DISABLED
852
853
} // namespace tesseract
854
855
#endif