/src/tesseract/src/textord/makerow.h
Line | Count | Source |
1 | | /********************************************************************** |
2 | | * File: makerow.h (Formerly makerows.h) |
3 | | * Description: Code to arrange blobs into rows of text. |
4 | | * Author: Ray Smith |
5 | | * |
6 | | * (C) Copyright 1992, Hewlett-Packard Ltd. |
7 | | ** Licensed under the Apache License, Version 2.0 (the "License"); |
8 | | ** you may not use this file except in compliance with the License. |
9 | | ** You may obtain a copy of the License at |
10 | | ** http://www.apache.org/licenses/LICENSE-2.0 |
11 | | ** Unless required by applicable law or agreed to in writing, software |
12 | | ** distributed under the License is distributed on an "AS IS" BASIS, |
13 | | ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | ** See the License for the specific language governing permissions and |
15 | | ** limitations under the License. |
16 | | * |
17 | | **********************************************************************/ |
18 | | |
19 | | #ifndef MAKEROW_H |
20 | | #define MAKEROW_H |
21 | | |
22 | | #include "blobbox.h" |
23 | | #include "blobs.h" |
24 | | #include "ocrblock.h" |
25 | | #include "params.h" |
26 | | #include "statistc.h" |
27 | | |
28 | | namespace tesseract { |
29 | | |
30 | | enum OVERLAP_STATE { |
31 | | ASSIGN, // assign it to row |
32 | | REJECT, // reject it - dual overlap |
33 | | NEW_ROW |
34 | | }; |
35 | | |
36 | | enum ROW_CATEGORY { |
37 | | ROW_ASCENDERS_FOUND, |
38 | | ROW_DESCENDERS_FOUND, |
39 | | ROW_UNKNOWN, |
40 | | ROW_INVALID, |
41 | | }; |
42 | | |
43 | | extern BOOL_VAR_H(textord_heavy_nr); |
44 | | extern BOOL_VAR_H(textord_show_initial_rows); |
45 | | extern BOOL_VAR_H(textord_show_parallel_rows); |
46 | | extern BOOL_VAR_H(textord_show_expanded_rows); |
47 | | extern BOOL_VAR_H(textord_show_final_rows); |
48 | | extern BOOL_VAR_H(textord_show_final_blobs); |
49 | | extern BOOL_VAR_H(textord_test_landscape); |
50 | | extern BOOL_VAR_H(textord_parallel_baselines); |
51 | | extern BOOL_VAR_H(textord_straight_baselines); |
52 | | extern BOOL_VAR_H(textord_old_baselines); |
53 | | extern BOOL_VAR_H(textord_old_xheight); |
54 | | extern BOOL_VAR_H(textord_fix_xheight_bug); |
55 | | extern BOOL_VAR_H(textord_fix_makerow_bug); |
56 | | extern BOOL_VAR_H(textord_debug_xheights); |
57 | | extern INT_VAR_H(textord_test_x); |
58 | | extern INT_VAR_H(textord_test_y); |
59 | | extern INT_VAR_H(textord_min_blobs_in_row); |
60 | | extern INT_VAR_H(textord_spline_minblobs); |
61 | | extern INT_VAR_H(textord_spline_medianwin); |
62 | | extern INT_VAR_H(textord_min_xheight); |
63 | | extern double_VAR_H(textord_spline_shift_fraction); |
64 | | extern double_VAR_H(textord_skew_ile); |
65 | | extern double_VAR_H(textord_skew_lag); |
66 | | extern double_VAR_H(textord_linespace_iqrlimit); |
67 | | extern double_VAR_H(textord_width_limit); |
68 | | extern double_VAR_H(textord_chop_width); |
69 | | extern double_VAR_H(textord_minxh); |
70 | | extern double_VAR_H(textord_min_linesize); |
71 | | extern double_VAR_H(textord_excess_blobsize); |
72 | | extern double_VAR_H(textord_occupancy_threshold); |
73 | | extern double_VAR_H(textord_underline_width); |
74 | | extern double_VAR_H(textord_min_blob_height_fraction); |
75 | | extern double_VAR_H(textord_xheight_mode_fraction); |
76 | | extern double_VAR_H(textord_ascheight_mode_fraction); |
77 | | extern double_VAR_H(textord_ascx_ratio_min); |
78 | | extern double_VAR_H(textord_ascx_ratio_max); |
79 | | extern double_VAR_H(textord_descx_ratio_min); |
80 | | extern double_VAR_H(textord_descx_ratio_max); |
81 | | extern double_VAR_H(textord_xheight_error_margin); |
82 | | extern INT_VAR_H(textord_lms_line_trials); |
83 | | extern BOOL_VAR_H(textord_new_initial_xheight); |
84 | | extern BOOL_VAR_H(textord_debug_blob); |
85 | | |
86 | 330k | inline void get_min_max_xheight(int block_linesize, int *min_height, int *max_height) { |
87 | 330k | *min_height = static_cast<int32_t>(floor(block_linesize * textord_minxh)); |
88 | 330k | if (*min_height < textord_min_xheight) { |
89 | 322k | *min_height = textord_min_xheight; |
90 | 322k | } |
91 | 330k | *max_height = static_cast<int32_t>(ceil(block_linesize * 3.0)); |
92 | 330k | } |
93 | | |
94 | 713k | inline ROW_CATEGORY get_row_category(const TO_ROW *row) { |
95 | 713k | if (row->xheight <= 0) { |
96 | 205k | return ROW_INVALID; |
97 | 205k | } |
98 | 508k | return (row->ascrise > 0) ? ROW_ASCENDERS_FOUND |
99 | 508k | : (row->descdrop != 0) ? ROW_DESCENDERS_FOUND : ROW_UNKNOWN; |
100 | 713k | } |
101 | | |
102 | 713k | inline bool within_error_margin(float test, float num, float margin) { |
103 | 713k | return (test >= num * (1 - margin) && test <= num * (1 + margin)); |
104 | 713k | } |
105 | | |
106 | | void fill_heights(TO_ROW *row, float gradient, int min_height, int max_height, STATS *heights, |
107 | | STATS *floating_heights); |
108 | | |
109 | | float make_single_row(ICOORD page_tr, bool allow_sub_blobs, TO_BLOCK *block, TO_BLOCK_LIST *blocks); |
110 | | float make_rows(ICOORD page_tr, // top right |
111 | | TO_BLOCK_LIST *port_blocks); |
112 | | void make_initial_textrows(ICOORD page_tr, |
113 | | TO_BLOCK *block, // block to do |
114 | | FCOORD rotation, // for drawing |
115 | | bool testing_on); // correct orientation |
116 | | void fit_lms_line(TO_ROW *row); |
117 | | void compute_page_skew(TO_BLOCK_LIST *blocks, // list of blocks |
118 | | float &page_m, // average gradient |
119 | | float &page_err); // average error |
120 | | void vigorous_noise_removal(TO_BLOCK *block); |
121 | | void cleanup_rows_making(ICOORD page_tr, // top right |
122 | | TO_BLOCK *block, // block to do |
123 | | float gradient, // gradient to fit |
124 | | FCOORD rotation, // for drawing |
125 | | int32_t block_edge, // edge of block |
126 | | bool testing_on); // correct orientation |
127 | | void delete_non_dropout_rows( // find lines |
128 | | TO_BLOCK *block, // block to do |
129 | | float gradient, // global skew |
130 | | FCOORD rotation, // deskew vector |
131 | | int32_t block_edge, // left edge |
132 | | bool testing_on // correct orientation |
133 | | ); |
134 | | bool find_best_dropout_row( // find neighbours |
135 | | TO_ROW *row, // row to test |
136 | | int32_t distance, // dropout dist |
137 | | float dist_limit, // threshold distance |
138 | | int32_t line_index, // index of row |
139 | | TO_ROW_IT *row_it, // current position |
140 | | bool testing_on // correct orientation |
141 | | ); |
142 | | TBOX deskew_block_coords( // block box |
143 | | TO_BLOCK *block, // block to do |
144 | | float gradient // global skew |
145 | | ); |
146 | | void compute_line_occupation( // project blobs |
147 | | TO_BLOCK *block, // block to do |
148 | | float gradient, // global skew |
149 | | int32_t min_y, // min coord in block |
150 | | int32_t max_y, // in block |
151 | | int32_t *occupation, // output projection |
152 | | int32_t *deltas // derivative |
153 | | ); |
154 | | void compute_occupation_threshold( // project blobs |
155 | | int32_t low_window, // below result point |
156 | | int32_t high_window, // above result point |
157 | | int32_t line_count, // array sizes |
158 | | int32_t *occupation, // input projection |
159 | | int32_t *thresholds // output thresholds |
160 | | ); |
161 | | void compute_dropout_distances( // project blobs |
162 | | int32_t *occupation, // input projection |
163 | | int32_t *thresholds, // output thresholds |
164 | | int32_t line_count // array sizes |
165 | | ); |
166 | | void expand_rows( // find lines |
167 | | ICOORD page_tr, // top right |
168 | | TO_BLOCK *block, // block to do |
169 | | float gradient, // gradient to fit |
170 | | FCOORD rotation, // for drawing |
171 | | int32_t block_edge, // edge of block |
172 | | bool testing_on // correct orientation |
173 | | ); |
174 | | void adjust_row_limits( // tidy limits |
175 | | TO_BLOCK *block // block to do |
176 | | ); |
177 | | void compute_row_stats( // find lines |
178 | | TO_BLOCK *block, // block to do |
179 | | bool testing_on // correct orientation |
180 | | ); |
181 | | float median_block_xheight( // find lines |
182 | | TO_BLOCK *block, // block to do |
183 | | float gradient // global skew |
184 | | ); |
185 | | |
186 | | int compute_xheight_from_modes(STATS *heights, STATS *floating_heights, bool cap_only, |
187 | | int min_height, int max_height, float *xheight, float *ascrise); |
188 | | |
189 | | int32_t compute_row_descdrop(TO_ROW *row, // row to do |
190 | | float gradient, // global skew |
191 | | int xheight_blob_count, STATS *heights); |
192 | | int32_t compute_height_modes(STATS *heights, // stats to search |
193 | | int32_t min_height, // bottom of range |
194 | | int32_t max_height, // top of range |
195 | | int32_t *modes, // output array |
196 | | int32_t maxmodes); // size of modes |
197 | | void correct_row_xheight(TO_ROW *row, // row to fix |
198 | | float xheight, // average values |
199 | | float ascrise, float descdrop); |
200 | | void separate_underlines(TO_BLOCK *block, // block to do |
201 | | float gradient, // skew angle |
202 | | FCOORD rotation, // inverse landscape |
203 | | bool testing_on); // correct orientation |
204 | | void pre_associate_blobs(ICOORD page_tr, // top right |
205 | | TO_BLOCK *block, // block to do |
206 | | FCOORD rotation, // inverse landscape |
207 | | bool testing_on); // correct orientation |
208 | | void fit_parallel_rows(TO_BLOCK *block, // block to do |
209 | | float gradient, // gradient to fit |
210 | | FCOORD rotation, // for drawing |
211 | | int32_t block_edge, // edge of block |
212 | | bool testing_on); // correct orientation |
213 | | void fit_parallel_lms(float gradient, // forced gradient |
214 | | TO_ROW *row); // row to fit |
215 | | void make_baseline_spline(TO_ROW *row, // row to fit |
216 | | TO_BLOCK *block); // block it came from |
217 | | bool segment_baseline( // split baseline |
218 | | TO_ROW *row, // row to fit |
219 | | TO_BLOCK *block, // block it came from |
220 | | int32_t &segments, // no fo segments |
221 | | int32_t *xstarts // coords of segments |
222 | | ); |
223 | | double *linear_spline_baseline( // split baseline |
224 | | TO_ROW *row, // row to fit |
225 | | TO_BLOCK *block, // block it came from |
226 | | int32_t &segments, // no fo segments |
227 | | int32_t xstarts[] // coords of segments |
228 | | ); |
229 | | void assign_blobs_to_rows( // find lines |
230 | | TO_BLOCK *block, // block to do |
231 | | float *gradient, // block skew |
232 | | int pass, // identification |
233 | | bool reject_misses, // chuck big ones out |
234 | | bool make_new_rows, // add rows for unmatched |
235 | | bool drawing_skew // draw smoothed skew |
236 | | ); |
237 | | // find best row |
238 | | OVERLAP_STATE most_overlapping_row(TO_ROW_IT *row_it, // iterator |
239 | | TO_ROW *&best_row, // output row |
240 | | float top, // top of blob |
241 | | float bottom, // bottom of blob |
242 | | float rowsize, // max row size |
243 | | bool testing_blob // test stuff |
244 | | ); |
245 | | int blob_x_order( // sort function |
246 | | const BLOBNBOX *item1, // items to compare |
247 | | const BLOBNBOX *item2); |
248 | | |
249 | | void mark_repeated_chars(TO_ROW *row); |
250 | | |
251 | | } // namespace tesseract |
252 | | |
253 | | #endif |