/src/tesseract/src/textord/baselinedetect.h
Line | Count | Source (jump to first uncovered line) |
1 | | /////////////////////////////////////////////////////////////////////// |
2 | | // File: baselinedetect.h |
3 | | // Description: Initial Baseline Determination. |
4 | | // Copyright 2012 Google Inc. All Rights Reserved. |
5 | | // Author: rays@google.com (Ray Smith) |
6 | | // |
7 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
8 | | // you may not use this file except in compliance with the License. |
9 | | // You may obtain a copy of the License at |
10 | | // http://www.apache.org/licenses/LICENSE-2.0 |
11 | | // Unless required by applicable law or agreed to in writing, software |
12 | | // distributed under the License is distributed on an "AS IS" BASIS, |
13 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | // See the License for the specific language governing permissions and |
15 | | // limitations under the License. |
16 | | // |
17 | | /////////////////////////////////////////////////////////////////////// |
18 | | |
19 | | #ifndef TESSERACT_TEXTORD_BASELINEDETECT_H_ |
20 | | #define TESSERACT_TEXTORD_BASELINEDETECT_H_ |
21 | | |
22 | | #include "detlinefit.h" |
23 | | #include "points.h" |
24 | | #include "rect.h" |
25 | | |
26 | | struct Pix; |
27 | | |
28 | | namespace tesseract { |
29 | | |
30 | | class Textord; |
31 | | class BLOBNBOX_LIST; |
32 | | class TO_BLOCK; |
33 | | class TO_BLOCK_LIST; |
34 | | class TO_ROW; |
35 | | |
36 | | // Class to compute and hold baseline data for a TO_ROW. |
37 | | class BaselineRow { |
38 | | public: |
39 | | BaselineRow(double line_size, TO_ROW *to_row); |
40 | | |
41 | 586k | const TBOX &bounding_box() const { |
42 | 586k | return bounding_box_; |
43 | 586k | } |
44 | | // Sets the TO_ROW with the output straight line. |
45 | | void SetupOldLineParameters(TO_ROW *row) const; |
46 | | |
47 | | // Outputs diagnostic information. |
48 | | void Print() const; |
49 | | |
50 | | // Returns the skew angle (in radians) of the current baseline in [-pi,pi]. |
51 | | double BaselineAngle() const; |
52 | | // Computes and returns the linespacing at the middle of the overlap |
53 | | // between this and other. |
54 | | double SpaceBetween(const BaselineRow &other) const; |
55 | | // Computes and returns the displacement of the center of the line |
56 | | // perpendicular to the given direction. |
57 | | double PerpDisp(const FCOORD &direction) const; |
58 | | // Computes the y coordinate at the given x using the straight baseline |
59 | | // defined by baseline1_ and baseline2_. |
60 | | double StraightYAtX(double x) const; |
61 | | |
62 | | // Fits a straight baseline to the points. Returns true if it had enough |
63 | | // points to be reasonably sure of the fitted baseline. |
64 | | // If use_box_bottoms is false, baselines positions are formed by |
65 | | // considering the outlines of the blobs. |
66 | | bool FitBaseline(bool use_box_bottoms); |
67 | | // Modifies an existing result of FitBaseline to be parallel to the given |
68 | | // vector if that produces a better result. |
69 | | void AdjustBaselineToParallel(int debug, const FCOORD &direction); |
70 | | // Modifies the baseline to snap to the textline grid if the existing |
71 | | // result is not good enough. |
72 | | double AdjustBaselineToGrid(int debug, const FCOORD &direction, double line_spacing, |
73 | | double line_offset); |
74 | | |
75 | | private: |
76 | | // Sets up displacement_modes_ with the top few modes of the perpendicular |
77 | | // distance of each blob from the given direction vector, after rounding. |
78 | | void SetupBlobDisplacements(const FCOORD &direction); |
79 | | |
80 | | // Fits a line in the given direction to blobs that are close to the given |
81 | | // target_offset perpendicular displacement from the direction. The fit |
82 | | // error is allowed to be cheat_allowance worse than the existing fit, and |
83 | | // will still be used. |
84 | | // If cheat_allowance > 0, the new fit will be good and replace the current |
85 | | // fit if it has better fit (with cheat) OR its error is below |
86 | | // max_baseline_error_ and the old fit is marked bad. |
87 | | // Otherwise the new fit will only replace the old if it is really better, |
88 | | // or the old fit is marked bad and the new fit has sufficient points, as |
89 | | // well as being within the max_baseline_error_. |
90 | | void FitConstrainedIfBetter(int debug, const FCOORD &direction, double cheat_allowance, |
91 | | double target_offset); |
92 | | // Returns the perpendicular distance of the point from the straight |
93 | | // baseline. |
94 | | float PerpDistanceFromBaseline(const FCOORD &pt) const; |
95 | | // Computes the bounding box of the row. |
96 | | void ComputeBoundingBox(); |
97 | | |
98 | | // The blobs of the row to which this BaselineRow adds extra information |
99 | | // during baseline fitting. Note that blobs_ could easily come from either |
100 | | // a TO_ROW or a ColPartition. |
101 | | BLOBNBOX_LIST *blobs_; |
102 | | // Bounding box of all the blobs. |
103 | | TBOX bounding_box_; |
104 | | // Fitter used to fit lines to the blobs. |
105 | | DetLineFit fitter_; |
106 | | // 2 points on the straight baseline. |
107 | | FCOORD baseline_pt1_; |
108 | | FCOORD baseline_pt2_; |
109 | | // Set of modes of displacements. They indicate preferable baseline positions. |
110 | | std::vector<double> displacement_modes_; |
111 | | // Quantization factor used for displacement_modes_. |
112 | | double disp_quant_factor_; |
113 | | // Half the acceptance range of blob displacements for computing the |
114 | | // error during a constrained fit. |
115 | | double fit_halfrange_; |
116 | | // Max baseline error before a line is regarded as fitting badly. |
117 | | double max_baseline_error_; |
118 | | // The error of fit of the baseline. |
119 | | double baseline_error_; |
120 | | // True if this row seems to have a good baseline. |
121 | | bool good_baseline_; |
122 | | }; |
123 | | |
124 | | // Class to compute and hold baseline data for a TO_BLOCK. |
125 | | class BaselineBlock { |
126 | | public: |
127 | | BaselineBlock(int debug_level, bool non_text, TO_BLOCK *block); |
128 | | |
129 | 16.6k | ~BaselineBlock() { |
130 | 199k | for (auto row : rows_) { |
131 | 199k | delete row; |
132 | 199k | } |
133 | 16.6k | } |
134 | | |
135 | 0 | TO_BLOCK *block() const { |
136 | 0 | return block_; |
137 | 0 | } |
138 | 1.23k | double skew_angle() const { |
139 | 1.23k | return skew_angle_; |
140 | 1.23k | } |
141 | | |
142 | | // Computes and returns the absolute error of the given perp_disp from the |
143 | | // given linespacing model. |
144 | | static double SpacingModelError(double perp_disp, double line_spacing, double line_offset); |
145 | | |
146 | | // Fits straight line baselines and computes the skew angle from the |
147 | | // median angle. Returns true if a good angle is found. |
148 | | // If use_box_bottoms is false, baseline positions are formed by |
149 | | // considering the outlines of the blobs. |
150 | | bool FitBaselinesAndFindSkew(bool use_box_bottoms); |
151 | | |
152 | | // Refits the baseline to a constrained angle, using the stored block |
153 | | // skew if good enough, otherwise the supplied default skew. |
154 | | void ParallelizeBaselines(double default_block_skew); |
155 | | |
156 | | // Sets the parameters in TO_BLOCK that are needed by subsequent processes. |
157 | | void SetupBlockParameters() const; |
158 | | |
159 | | // Processing that is required before fitting baseline splines, but requires |
160 | | // linear baselines in order to be successful: |
161 | | // Removes noise if required |
162 | | // Separates out underlines |
163 | | // Pre-associates blob fragments. |
164 | | // TODO(rays/joeliu) This entire section of code is inherited from the past |
165 | | // and could be improved/eliminated. |
166 | | // page_tr is used to size a debug window. |
167 | | void PrepareForSplineFitting(ICOORD page_tr, bool remove_noise); |
168 | | |
169 | | // Fits splines to the textlines, or creates fake QSPLINES from the straight |
170 | | // baselines that are already on the TO_ROWs. |
171 | | // As a side-effect, computes the xheights of the rows and the block. |
172 | | // Although x-height estimation is conceptually separate, it is part of |
173 | | // detecting perspective distortion and therefore baseline fitting. |
174 | | void FitBaselineSplines(bool enable_splines, bool show_final_rows, Textord *textord); |
175 | | |
176 | | // Draws the (straight) baselines and final blobs colored according to |
177 | | // what was discarded as noise and what is associated with each row. |
178 | | void DrawFinalRows(const ICOORD &page_tr); |
179 | | |
180 | | // Render the generated spline baselines for this block on pix_in. |
181 | | void DrawPixSpline(Image pix_in); |
182 | | |
183 | | private: |
184 | | // Top-level line-spacing calculation. Computes an estimate of the line- |
185 | | // spacing, using the current baselines in the TO_ROWS of the block, and |
186 | | // then refines it by fitting a regression line to the baseline positions |
187 | | // as a function of their integer index. |
188 | | // Returns true if it seems that the model is a reasonable fit to the |
189 | | // observations. |
190 | | bool ComputeLineSpacing(); |
191 | | |
192 | | // Computes the deskewed vertical position of each baseline in the block and |
193 | | // stores them in the given vector. |
194 | | void ComputeBaselinePositions(const FCOORD &direction, std::vector<double> *positions); |
195 | | |
196 | | // Computes an estimate of the line spacing of the block from the median |
197 | | // of the spacings between adjacent overlapping textlines. |
198 | | void EstimateLineSpacing(); |
199 | | |
200 | | // Refines the line spacing of the block by fitting a regression |
201 | | // line to the deskewed y-position of each baseline as a function of its |
202 | | // estimated line index, allowing for a small error in the initial linespacing |
203 | | // and choosing the best available model. |
204 | | void RefineLineSpacing(const std::vector<double> &positions); |
205 | | |
206 | | // Given an initial estimate of line spacing (m_in) and the positions of each |
207 | | // baseline, computes the line spacing of the block more accurately in m_out, |
208 | | // and the corresponding intercept in c_out, and the number of spacings seen |
209 | | // in index_delta. Returns the error of fit to the line spacing model. |
210 | | double FitLineSpacingModel(const std::vector<double> &positions, double m_in, double *m_out, |
211 | | double *c_out, int *index_delta); |
212 | | |
213 | | // The block to which this class adds extra information used during baseline |
214 | | // calculation. |
215 | | TO_BLOCK *block_; |
216 | | // The rows in the block that we will be working with. |
217 | | std::vector<BaselineRow *> rows_; |
218 | | // Amount of debugging output to provide. |
219 | | int debug_level_; |
220 | | // True if the block is non-text (graphic). |
221 | | bool non_text_block_; |
222 | | // True if the block has at least one good enough baseline to compute the |
223 | | // skew angle and therefore skew_angle_ is valid. |
224 | | bool good_skew_angle_; |
225 | | // Angle of skew in radians using the conventional anticlockwise from x-axis. |
226 | | double skew_angle_; |
227 | | // Current best estimate line spacing in pixels perpendicular to skew_angle_. |
228 | | double line_spacing_; |
229 | | // Offset for baseline positions, in pixels. Each baseline is at |
230 | | // line_spacing_ * n + line_offset_ for integer n, which represents |
231 | | // [textline] line number in a line numbering system that has line 0 on or |
232 | | // at least near the x-axis. Not equal to the actual line number of a line |
233 | | // within a block as most blocks are not near the x-axis. |
234 | | double line_offset_; |
235 | | // The error of the line spacing model. |
236 | | double model_error_; |
237 | | }; |
238 | | |
239 | | class BaselineDetect { |
240 | | public: |
241 | | BaselineDetect(int debug_level, const FCOORD &page_skew, TO_BLOCK_LIST *blocks); |
242 | | |
243 | 16.6k | ~BaselineDetect() { |
244 | 16.6k | for (auto block : blocks_) { |
245 | 16.6k | delete block; |
246 | 16.6k | } |
247 | 16.6k | } |
248 | | |
249 | | // Finds the initial baselines for each TO_ROW in each TO_BLOCK, gathers |
250 | | // block-wise and page-wise data to smooth small blocks/rows, and applies |
251 | | // smoothing based on block/page-level skew and block-level linespacing. |
252 | | void ComputeStraightBaselines(bool use_box_bottoms); |
253 | | |
254 | | // Computes the baseline splines for each TO_ROW in each TO_BLOCK and |
255 | | // other associated side-effects, including pre-associating blobs, computing |
256 | | // x-heights and displaying debug information. |
257 | | // NOTE that ComputeStraightBaselines must have been called first as this |
258 | | // sets up data in the TO_ROWs upon which this function depends. |
259 | | void ComputeBaselineSplinesAndXheights(const ICOORD &page_tr, bool enable_splines, |
260 | | bool remove_noise, bool show_final_rows, Textord *textord); |
261 | | |
262 | | private: |
263 | | // Average (median) skew of the blocks on the page among those that have |
264 | | // a good angle of their own. |
265 | | FCOORD page_skew_; |
266 | | // Amount of debug output to produce. |
267 | | int debug_level_; |
268 | | // The blocks that we are working with. |
269 | | std::vector<BaselineBlock *> blocks_; |
270 | | }; |
271 | | |
272 | | } // namespace tesseract |
273 | | |
274 | | #endif // TESSERACT_TEXTORD_BASELINEDETECT_H_ |