Coverage Report

Created: 2024-02-28 06:46

/src/tesseract/src/textord/baselinedetect.h
Line
Count
Source (jump to first uncovered line)
1
///////////////////////////////////////////////////////////////////////
2
// File:        baselinedetect.h
3
// Description: Initial Baseline Determination.
4
// Copyright 2012 Google Inc. All Rights Reserved.
5
// Author:      rays@google.com (Ray Smith)
6
//
7
// Licensed under the Apache License, Version 2.0 (the "License");
8
// you may not use this file except in compliance with the License.
9
// You may obtain a copy of the License at
10
// http://www.apache.org/licenses/LICENSE-2.0
11
// Unless required by applicable law or agreed to in writing, software
12
// distributed under the License is distributed on an "AS IS" BASIS,
13
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
// See the License for the specific language governing permissions and
15
// limitations under the License.
16
//
17
///////////////////////////////////////////////////////////////////////
18
19
#ifndef TESSERACT_TEXTORD_BASELINEDETECT_H_
20
#define TESSERACT_TEXTORD_BASELINEDETECT_H_
21
22
#include "detlinefit.h"
23
#include "points.h"
24
#include "rect.h"
25
26
struct Pix;
27
28
namespace tesseract {
29
30
class Textord;
31
class BLOBNBOX_LIST;
32
class TO_BLOCK;
33
class TO_BLOCK_LIST;
34
class TO_ROW;
35
36
// Class to compute and hold baseline data for a TO_ROW.
37
class BaselineRow {
38
public:
39
  BaselineRow(double line_size, TO_ROW *to_row);
40
41
586k
  const TBOX &bounding_box() const {
42
586k
    return bounding_box_;
43
586k
  }
44
  // Sets the TO_ROW with the output straight line.
45
  void SetupOldLineParameters(TO_ROW *row) const;
46
47
  // Outputs diagnostic information.
48
  void Print() const;
49
50
  // Returns the skew angle (in radians) of the current baseline in [-pi,pi].
51
  double BaselineAngle() const;
52
  // Computes and returns the linespacing at the middle of the overlap
53
  // between this and other.
54
  double SpaceBetween(const BaselineRow &other) const;
55
  // Computes and returns the displacement of the center of the line
56
  // perpendicular to the given direction.
57
  double PerpDisp(const FCOORD &direction) const;
58
  // Computes the y coordinate at the given x using the straight baseline
59
  // defined by baseline1_ and baseline2_.
60
  double StraightYAtX(double x) const;
61
62
  // Fits a straight baseline to the points. Returns true if it had enough
63
  // points to be reasonably sure of the fitted baseline.
64
  // If use_box_bottoms is false, baselines positions are formed by
65
  // considering the outlines of the blobs.
66
  bool FitBaseline(bool use_box_bottoms);
67
  // Modifies an existing result of FitBaseline to be parallel to the given
68
  // vector if that produces a better result.
69
  void AdjustBaselineToParallel(int debug, const FCOORD &direction);
70
  // Modifies the baseline to snap to the textline grid if the existing
71
  // result is not good enough.
72
  double AdjustBaselineToGrid(int debug, const FCOORD &direction, double line_spacing,
73
                              double line_offset);
74
75
private:
76
  // Sets up displacement_modes_ with the top few modes of the perpendicular
77
  // distance of each blob from the given direction vector, after rounding.
78
  void SetupBlobDisplacements(const FCOORD &direction);
79
80
  // Fits a line in the given direction to blobs that are close to the given
81
  // target_offset perpendicular displacement from the direction. The fit
82
  // error is allowed to be cheat_allowance worse than the existing fit, and
83
  // will still be used.
84
  // If cheat_allowance > 0, the new fit will be good and replace the current
85
  // fit if it has better fit (with cheat) OR its error is below
86
  // max_baseline_error_ and the old fit is marked bad.
87
  // Otherwise the new fit will only replace the old if it is really better,
88
  // or the old fit is marked bad and the new fit has sufficient points, as
89
  // well as being within the max_baseline_error_.
90
  void FitConstrainedIfBetter(int debug, const FCOORD &direction, double cheat_allowance,
91
                              double target_offset);
92
  // Returns the perpendicular distance of the point from the straight
93
  // baseline.
94
  float PerpDistanceFromBaseline(const FCOORD &pt) const;
95
  // Computes the bounding box of the row.
96
  void ComputeBoundingBox();
97
98
  // The blobs of the row to which this BaselineRow adds extra information
99
  // during baseline fitting. Note that blobs_ could easily come from either
100
  // a TO_ROW or a ColPartition.
101
  BLOBNBOX_LIST *blobs_;
102
  // Bounding box of all the blobs.
103
  TBOX bounding_box_;
104
  // Fitter used to fit lines to the blobs.
105
  DetLineFit fitter_;
106
  // 2 points on the straight baseline.
107
  FCOORD baseline_pt1_;
108
  FCOORD baseline_pt2_;
109
  // Set of modes of displacements. They indicate preferable baseline positions.
110
  std::vector<double> displacement_modes_;
111
  // Quantization factor used for displacement_modes_.
112
  double disp_quant_factor_;
113
  // Half the acceptance range of blob displacements for computing the
114
  // error during a constrained fit.
115
  double fit_halfrange_;
116
  // Max baseline error before a line is regarded as fitting badly.
117
  double max_baseline_error_;
118
  // The error of fit of the baseline.
119
  double baseline_error_;
120
  // True if this row seems to have a good baseline.
121
  bool good_baseline_;
122
};
123
124
// Class to compute and hold baseline data for a TO_BLOCK.
125
class BaselineBlock {
126
public:
127
  BaselineBlock(int debug_level, bool non_text, TO_BLOCK *block);
128
129
16.6k
  ~BaselineBlock() {
130
199k
    for (auto row : rows_) {
131
199k
      delete row;
132
199k
    }
133
16.6k
  }
134
135
0
  TO_BLOCK *block() const {
136
0
    return block_;
137
0
  }
138
1.23k
  double skew_angle() const {
139
1.23k
    return skew_angle_;
140
1.23k
  }
141
142
  // Computes and returns the absolute error of the given perp_disp from the
143
  // given linespacing model.
144
  static double SpacingModelError(double perp_disp, double line_spacing, double line_offset);
145
146
  // Fits straight line baselines and computes the skew angle from the
147
  // median angle. Returns true if a good angle is found.
148
  // If use_box_bottoms is false, baseline positions are formed by
149
  // considering the outlines of the blobs.
150
  bool FitBaselinesAndFindSkew(bool use_box_bottoms);
151
152
  // Refits the baseline to a constrained angle, using the stored block
153
  // skew if good enough, otherwise the supplied default skew.
154
  void ParallelizeBaselines(double default_block_skew);
155
156
  // Sets the parameters in TO_BLOCK that are needed by subsequent processes.
157
  void SetupBlockParameters() const;
158
159
  // Processing that is required before fitting baseline splines, but requires
160
  // linear baselines in order to be successful:
161
  //   Removes noise if required
162
  //   Separates out underlines
163
  //   Pre-associates blob fragments.
164
  // TODO(rays/joeliu) This entire section of code is inherited from the past
165
  // and could be improved/eliminated.
166
  // page_tr is used to size a debug window.
167
  void PrepareForSplineFitting(ICOORD page_tr, bool remove_noise);
168
169
  // Fits splines to the textlines, or creates fake QSPLINES from the straight
170
  // baselines that are already on the TO_ROWs.
171
  // As a side-effect, computes the xheights of the rows and the block.
172
  // Although x-height estimation is conceptually separate, it is part of
173
  // detecting perspective distortion and therefore baseline fitting.
174
  void FitBaselineSplines(bool enable_splines, bool show_final_rows, Textord *textord);
175
176
  // Draws the (straight) baselines and final blobs colored according to
177
  // what was discarded as noise and what is associated with each row.
178
  void DrawFinalRows(const ICOORD &page_tr);
179
180
  // Render the generated spline baselines for this block on pix_in.
181
  void DrawPixSpline(Image pix_in);
182
183
private:
184
  // Top-level line-spacing calculation. Computes an estimate of the line-
185
  // spacing, using the current baselines in the TO_ROWS of the block, and
186
  // then refines it by fitting a regression line to the baseline positions
187
  // as a function of their integer index.
188
  // Returns true if it seems that the model is a reasonable fit to the
189
  // observations.
190
  bool ComputeLineSpacing();
191
192
  // Computes the deskewed vertical position of each baseline in the block and
193
  // stores them in the given vector.
194
  void ComputeBaselinePositions(const FCOORD &direction, std::vector<double> *positions);
195
196
  // Computes an estimate of the line spacing of the block from the median
197
  // of the spacings between adjacent overlapping textlines.
198
  void EstimateLineSpacing();
199
200
  // Refines the line spacing of the block by fitting a regression
201
  // line to the deskewed y-position of each baseline as a function of its
202
  // estimated line index, allowing for a small error in the initial linespacing
203
  // and choosing the best available model.
204
  void RefineLineSpacing(const std::vector<double> &positions);
205
206
  // Given an initial estimate of line spacing (m_in) and the positions of each
207
  // baseline, computes the line spacing of the block more accurately in m_out,
208
  // and the corresponding intercept in c_out, and the number of spacings seen
209
  // in index_delta. Returns the error of fit to the line spacing model.
210
  double FitLineSpacingModel(const std::vector<double> &positions, double m_in, double *m_out,
211
                             double *c_out, int *index_delta);
212
213
  // The block to which this class adds extra information used during baseline
214
  // calculation.
215
  TO_BLOCK *block_;
216
  // The rows in the block that we will be working with.
217
  std::vector<BaselineRow *> rows_;
218
  // Amount of debugging output to provide.
219
  int debug_level_;
220
  // True if the block is non-text (graphic).
221
  bool non_text_block_;
222
  // True if the block has at least one good enough baseline to compute the
223
  // skew angle and therefore skew_angle_ is valid.
224
  bool good_skew_angle_;
225
  // Angle of skew in radians using the conventional anticlockwise from x-axis.
226
  double skew_angle_;
227
  // Current best estimate line spacing in pixels perpendicular to skew_angle_.
228
  double line_spacing_;
229
  // Offset for baseline positions, in pixels. Each baseline is at
230
  // line_spacing_ * n + line_offset_ for integer n, which represents
231
  // [textline] line number in a line numbering system that has line 0 on or
232
  // at least near the x-axis. Not equal to the actual line number of a line
233
  // within a block as most blocks are not near the x-axis.
234
  double line_offset_;
235
  // The error of the line spacing model.
236
  double model_error_;
237
};
238
239
class BaselineDetect {
240
public:
241
  BaselineDetect(int debug_level, const FCOORD &page_skew, TO_BLOCK_LIST *blocks);
242
243
16.6k
  ~BaselineDetect() {
244
16.6k
    for (auto block : blocks_) {
245
16.6k
      delete block;
246
16.6k
    }
247
16.6k
  }
248
249
  // Finds the initial baselines for each TO_ROW in each TO_BLOCK, gathers
250
  // block-wise and page-wise data to smooth small blocks/rows, and applies
251
  // smoothing based on block/page-level skew and block-level linespacing.
252
  void ComputeStraightBaselines(bool use_box_bottoms);
253
254
  // Computes the baseline splines for each TO_ROW in each TO_BLOCK and
255
  // other associated side-effects, including pre-associating blobs, computing
256
  // x-heights and displaying debug information.
257
  // NOTE that ComputeStraightBaselines must have been called first as this
258
  // sets up data in the TO_ROWs upon which this function depends.
259
  void ComputeBaselineSplinesAndXheights(const ICOORD &page_tr, bool enable_splines,
260
                                         bool remove_noise, bool show_final_rows, Textord *textord);
261
262
private:
263
  // Average (median) skew of the blocks on the page among those that have
264
  // a good angle of their own.
265
  FCOORD page_skew_;
266
  // Amount of debug output to produce.
267
  int debug_level_;
268
  // The blocks that we are working with.
269
  std::vector<BaselineBlock *> blocks_;
270
};
271
272
} // namespace tesseract
273
274
#endif // TESSERACT_TEXTORD_BASELINEDETECT_H_