/src/tesseract/src/textord/textord.h
Line | Count | Source (jump to first uncovered line) |
1 | | /////////////////////////////////////////////////////////////////////// |
2 | | // File: textord.h |
3 | | // Description: The Textord class definition gathers text line and word |
4 | | // finding functionality. |
5 | | // Author: Ray Smith |
6 | | // Created: Fri Mar 13 14:29:01 PDT 2009 |
7 | | // |
8 | | // (C) Copyright 2009, Google Inc. |
9 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
10 | | // you may not use this file except in compliance with the License. |
11 | | // You may obtain a copy of the License at |
12 | | // http://www.apache.org/licenses/LICENSE-2.0 |
13 | | // Unless required by applicable law or agreed to in writing, software |
14 | | // distributed under the License is distributed on an "AS IS" BASIS, |
15 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
16 | | // See the License for the specific language governing permissions and |
17 | | // limitations under the License. |
18 | | // |
19 | | /////////////////////////////////////////////////////////////////////// |
20 | | |
21 | | #ifndef TESSERACT_TEXTORD_TEXTORD_H_ |
22 | | #define TESSERACT_TEXTORD_TEXTORD_H_ |
23 | | |
24 | | #include "bbgrid.h" |
25 | | #include "blobbox.h" |
26 | | #include "ccstruct.h" |
27 | | #include "gap_map.h" |
28 | | |
29 | | #include <tesseract/publictypes.h> // For PageSegMode. |
30 | | |
31 | | namespace tesseract { |
32 | | |
33 | | class FCOORD; |
34 | | class BLOCK_LIST; |
35 | | class PAGE_RES; |
36 | | class TO_BLOCK; |
37 | | class TO_BLOCK_LIST; |
38 | | class ScrollView; |
39 | | |
40 | | // A simple class that can be used by BBGrid to hold a word and an expanded |
41 | | // bounding box that makes it easy to find words to put diacritics. |
42 | | class WordWithBox { |
43 | | public: |
44 | 0 | WordWithBox() : word_(nullptr) {} |
45 | 206k | explicit WordWithBox(WERD *word) : word_(word), bounding_box_(word->bounding_box()) { |
46 | 206k | int height = bounding_box_.height(); |
47 | 206k | bounding_box_.pad(height, height); |
48 | 206k | } |
49 | | |
50 | 27.1M | const TBOX &bounding_box() const { |
51 | 27.1M | return bounding_box_; |
52 | 27.1M | } |
53 | | // Returns the bounding box of only the good blobs. |
54 | 0 | TBOX true_bounding_box() const { |
55 | 0 | return word_->true_bounding_box(); |
56 | 0 | } |
57 | 0 | C_BLOB_LIST *RejBlobs() const { |
58 | 0 | return word_->rej_cblob_list(); |
59 | 0 | } |
60 | 0 | const WERD *word() const { |
61 | 0 | return word_; |
62 | 0 | } |
63 | | |
64 | | private: |
65 | | // Borrowed pointer to a real word somewhere that must outlive this class. |
66 | | WERD *word_; |
67 | | // Cached expanded bounding box of the word, padded all round by its height. |
68 | | TBOX bounding_box_; |
69 | | }; |
70 | | |
71 | | // Make it usable by BBGrid. |
72 | | CLISTIZEH(WordWithBox) |
73 | | using WordGrid = BBGrid<WordWithBox, WordWithBox_CLIST, WordWithBox_C_IT>; |
74 | | using WordSearch = GridSearch<WordWithBox, WordWithBox_CLIST, WordWithBox_C_IT>; |
75 | | |
76 | | class Textord { |
77 | | public: |
78 | | explicit Textord(CCStruct *ccstruct); |
79 | 0 | ~Textord() = default; |
80 | | |
81 | | // Make the textlines and words inside each block. |
82 | | // binary_pix is mandatory and is the binarized input after line removal. |
83 | | // grey_pix is optional, but if present must match the binary_pix in size, |
84 | | // and must be a *real* grey image instead of binary_pix * 255. |
85 | | // thresholds_pix is expected to be present iff grey_pix is present and |
86 | | // can be an integer factor reduction of the grey_pix. It represents the |
87 | | // thresholds that were used to create the binary_pix from the grey_pix. |
88 | | // diacritic_blobs contain small confusing components that should be added |
89 | | // to the appropriate word(s) in case they are really diacritics. |
90 | | void TextordPage(PageSegMode pageseg_mode, const FCOORD &reskew, int width, int height, |
91 | | Image binary_pix, Image thresholds_pix, Image grey_pix, bool use_box_bottoms, |
92 | | BLOBNBOX_LIST *diacritic_blobs, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks, |
93 | | float *gradient); |
94 | | |
95 | | // If we were supposed to return only a single textline, and there is more |
96 | | // than one, clean up and leave only the best. |
97 | | void CleanupSingleRowResult(PageSegMode pageseg_mode, PAGE_RES *page_res); |
98 | | |
99 | 15.4k | bool use_cjk_fp_model() const { |
100 | 15.4k | return use_cjk_fp_model_; |
101 | 15.4k | } |
102 | 15.4k | void set_use_cjk_fp_model(bool flag) { |
103 | 15.4k | use_cjk_fp_model_ = flag; |
104 | 15.4k | } |
105 | | |
106 | | // tospace.cpp /////////////////////////////////////////// |
107 | | void to_spacing(ICOORD page_tr, // topright of page |
108 | | TO_BLOCK_LIST *blocks // blocks on page |
109 | | ); |
110 | | ROW *make_prop_words(TO_ROW *row, // row to make |
111 | | FCOORD rotation // for drawing |
112 | | ); |
113 | | ROW *make_blob_words(TO_ROW *row, // row to make |
114 | | FCOORD rotation // for drawing |
115 | | ); |
116 | | // tordmain.cpp /////////////////////////////////////////// |
117 | | void find_components(Image pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks); |
118 | | void filter_blobs(ICOORD page_tr, TO_BLOCK_LIST *blocks, bool testing_on); |
119 | | |
120 | | private: |
121 | | // For underlying memory management and other utilities. |
122 | | CCStruct *ccstruct_; |
123 | | |
124 | | // The size of the input image. |
125 | | ICOORD page_tr_; |
126 | | |
127 | | bool use_cjk_fp_model_; |
128 | | |
129 | | // makerow.cpp /////////////////////////////////////////// |
130 | | // Make the textlines inside each block. |
131 | | void MakeRows(PageSegMode pageseg_mode, const FCOORD &skew, int width, int height, |
132 | | TO_BLOCK_LIST *to_blocks); |
133 | | // Make the textlines inside a single block. |
134 | | void MakeBlockRows(int min_spacing, int max_spacing, const FCOORD &skew, TO_BLOCK *block, |
135 | | ScrollView *win); |
136 | | |
137 | | public: |
138 | | void compute_block_xheight(TO_BLOCK *block, float gradient); |
139 | | void compute_row_xheight(TO_ROW *row, // row to do |
140 | | const FCOORD &rotation, |
141 | | float gradient, // global skew |
142 | | int block_line_size); |
143 | | void make_spline_rows(TO_BLOCK *block, // block to do |
144 | | float gradient, // gradient to fit |
145 | | bool testing_on); |
146 | | |
147 | | private: |
148 | | //// oldbasel.cpp //////////////////////////////////////// |
149 | | void make_old_baselines(TO_BLOCK *block, // block to do |
150 | | bool testing_on, // correct orientation |
151 | | float gradient); |
152 | | void correlate_lines(TO_BLOCK *block, float gradient); |
153 | | void correlate_neighbours(TO_BLOCK *block, // block rows are in. |
154 | | TO_ROW **rows, // rows of block. |
155 | | int rowcount); // no of rows to do. |
156 | | int correlate_with_stats(TO_ROW **rows, // rows of block. |
157 | | int rowcount, // no of rows to do. |
158 | | TO_BLOCK *block); |
159 | | void find_textlines(TO_BLOCK *block, // block row is in |
160 | | TO_ROW *row, // row to do |
161 | | int degree, // required approximation |
162 | | QSPLINE *spline); // starting spline |
163 | | // tospace.cpp /////////////////////////////////////////// |
164 | | // DEBUG USE ONLY |
165 | | void block_spacing_stats(TO_BLOCK *block, GAPMAP *gapmap, bool &old_text_ord_proportional, |
166 | | // resulting estimate |
167 | | int16_t &block_space_gap_width, |
168 | | // resulting estimate |
169 | | int16_t &block_non_space_gap_width); |
170 | | void row_spacing_stats(TO_ROW *row, GAPMAP *gapmap, int16_t block_idx, int16_t row_idx, |
171 | | // estimate for block |
172 | | int16_t block_space_gap_width, |
173 | | // estimate for block |
174 | | int16_t block_non_space_gap_width); |
175 | | void old_to_method(TO_ROW *row, STATS *all_gap_stats, STATS *space_gap_stats, |
176 | | STATS *small_gap_stats, int16_t block_space_gap_width, |
177 | | // estimate for block |
178 | | int16_t block_non_space_gap_width); |
179 | | bool isolated_row_stats(TO_ROW *row, GAPMAP *gapmap, STATS *all_gap_stats, bool suspected_table, |
180 | | int16_t block_idx, int16_t row_idx); |
181 | | int16_t stats_count_under(STATS *stats, int16_t threshold); |
182 | | void improve_row_threshold(TO_ROW *row, STATS *all_gap_stats); |
183 | | bool make_a_word_break(TO_ROW *row, // row being made |
184 | | TBOX blob_box, // for next_blob // how many blanks? |
185 | | int16_t prev_gap, TBOX prev_blob_box, int16_t real_current_gap, |
186 | | int16_t within_xht_current_gap, TBOX next_blob_box, int16_t next_gap, |
187 | | uint8_t &blanks, bool &fuzzy_sp, bool &fuzzy_non, |
188 | | bool &prev_gap_was_a_space, bool &break_at_next_gap); |
189 | | bool narrow_blob(TO_ROW *row, TBOX blob_box); |
190 | | bool wide_blob(TO_ROW *row, TBOX blob_box); |
191 | | bool suspected_punct_blob(TO_ROW *row, TBOX box); |
192 | | void peek_at_next_gap(TO_ROW *row, BLOBNBOX_IT box_it, TBOX &next_blob_box, int16_t &next_gap, |
193 | | int16_t &next_within_xht_gap); |
194 | | void mark_gap(TBOX blob, // blob following gap |
195 | | int16_t rule, // heuristic id |
196 | | int16_t prev_gap, int16_t prev_blob_width, int16_t current_gap, |
197 | | int16_t next_blob_width, int16_t next_gap); |
198 | | float find_mean_blob_spacing(WERD *word); |
199 | | bool ignore_big_gap(TO_ROW *row, int32_t row_length, GAPMAP *gapmap, int16_t left, int16_t right); |
200 | | // get bounding box |
201 | | TBOX reduced_box_next(TO_ROW *row, // current row |
202 | | BLOBNBOX_IT *it // iterator to blobds |
203 | | ); |
204 | | TBOX reduced_box_for_blob(BLOBNBOX *blob, TO_ROW *row, int16_t *left_above_xht); |
205 | | // tordmain.cpp /////////////////////////////////////////// |
206 | | float filter_noise_blobs(BLOBNBOX_LIST *src_list, BLOBNBOX_LIST *noise_list, |
207 | | BLOBNBOX_LIST *small_list, BLOBNBOX_LIST *large_list); |
208 | | // Fixes the block so it obeys all the rules: |
209 | | // Must have at least one ROW. |
210 | | // Must have at least one WERD. |
211 | | // WERDs contain a fake blob. |
212 | | void cleanup_nontext_block(BLOCK *block); |
213 | | void cleanup_blocks(bool clean_noise, BLOCK_LIST *blocks); |
214 | | bool clean_noise_from_row(ROW *row); |
215 | | void clean_noise_from_words(ROW *row); |
216 | | // Remove outlines that are a tiny fraction in either width or height |
217 | | // of the word height. |
218 | | void clean_small_noise_from_words(ROW *row); |
219 | | // Groups blocks by rotation, then, for each group, makes a WordGrid and calls |
220 | | // TransferDiacriticsToWords to copy the diacritic blobs to the most |
221 | | // appropriate words in the group of blocks. Source blobs are not touched. |
222 | | void TransferDiacriticsToBlockGroups(BLOBNBOX_LIST *diacritic_blobs, BLOCK_LIST *blocks); |
223 | | // Places a copy of blobs that are near a word (after applying rotation to the |
224 | | // blob) in the most appropriate word, unless there is doubt, in which case a |
225 | | // blob can end up in two words. Source blobs are not touched. |
226 | | void TransferDiacriticsToWords(BLOBNBOX_LIST *diacritic_blobs, const FCOORD &rotation, |
227 | | WordGrid *word_grid); |
228 | | |
229 | | public: |
230 | | // makerow.cpp /////////////////////////////////////////// |
231 | | BOOL_VAR_H(textord_single_height_mode); |
232 | | // tospace.cpp /////////////////////////////////////////// |
233 | | BOOL_VAR_H(tosp_old_to_method); |
234 | | BOOL_VAR_H(tosp_old_to_constrain_sp_kn); |
235 | | BOOL_VAR_H(tosp_only_use_prop_rows); |
236 | | BOOL_VAR_H(tosp_force_wordbreak_on_punct); |
237 | | BOOL_VAR_H(tosp_use_pre_chopping); |
238 | | BOOL_VAR_H(tosp_old_to_bug_fix); |
239 | | BOOL_VAR_H(tosp_block_use_cert_spaces); |
240 | | BOOL_VAR_H(tosp_row_use_cert_spaces); |
241 | | BOOL_VAR_H(tosp_narrow_blobs_not_cert); |
242 | | BOOL_VAR_H(tosp_row_use_cert_spaces1); |
243 | | BOOL_VAR_H(tosp_recovery_isolated_row_stats); |
244 | | BOOL_VAR_H(tosp_only_small_gaps_for_kern); |
245 | | BOOL_VAR_H(tosp_all_flips_fuzzy); |
246 | | BOOL_VAR_H(tosp_fuzzy_limit_all); |
247 | | BOOL_VAR_H(tosp_stats_use_xht_gaps); |
248 | | BOOL_VAR_H(tosp_use_xht_gaps); |
249 | | BOOL_VAR_H(tosp_only_use_xht_gaps); |
250 | | BOOL_VAR_H(tosp_rule_9_test_punct); |
251 | | BOOL_VAR_H(tosp_flip_fuzz_kn_to_sp); |
252 | | BOOL_VAR_H(tosp_flip_fuzz_sp_to_kn); |
253 | | BOOL_VAR_H(tosp_improve_thresh); |
254 | | INT_VAR_H(tosp_debug_level); |
255 | | INT_VAR_H(tosp_enough_space_samples_for_median); |
256 | | INT_VAR_H(tosp_redo_kern_limit); |
257 | | INT_VAR_H(tosp_few_samples); |
258 | | INT_VAR_H(tosp_short_row); |
259 | | INT_VAR_H(tosp_sanity_method); |
260 | | double_VAR_H(tosp_old_sp_kn_th_factor); |
261 | | double_VAR_H(tosp_threshold_bias1); |
262 | | double_VAR_H(tosp_threshold_bias2); |
263 | | double_VAR_H(tosp_narrow_fraction); |
264 | | double_VAR_H(tosp_narrow_aspect_ratio); |
265 | | double_VAR_H(tosp_wide_fraction); |
266 | | double_VAR_H(tosp_wide_aspect_ratio); |
267 | | double_VAR_H(tosp_fuzzy_space_factor); |
268 | | double_VAR_H(tosp_fuzzy_space_factor1); |
269 | | double_VAR_H(tosp_fuzzy_space_factor2); |
270 | | double_VAR_H(tosp_gap_factor); |
271 | | double_VAR_H(tosp_kern_gap_factor1); |
272 | | double_VAR_H(tosp_kern_gap_factor2); |
273 | | double_VAR_H(tosp_kern_gap_factor3); |
274 | | double_VAR_H(tosp_ignore_big_gaps); |
275 | | double_VAR_H(tosp_ignore_very_big_gaps); |
276 | | double_VAR_H(tosp_rep_space); |
277 | | double_VAR_H(tosp_enough_small_gaps); |
278 | | double_VAR_H(tosp_table_kn_sp_ratio); |
279 | | double_VAR_H(tosp_table_xht_sp_ratio); |
280 | | double_VAR_H(tosp_table_fuzzy_kn_sp_ratio); |
281 | | double_VAR_H(tosp_fuzzy_kn_fraction); |
282 | | double_VAR_H(tosp_fuzzy_sp_fraction); |
283 | | double_VAR_H(tosp_min_sane_kn_sp); |
284 | | double_VAR_H(tosp_init_guess_kn_mult); |
285 | | double_VAR_H(tosp_init_guess_xht_mult); |
286 | | double_VAR_H(tosp_max_sane_kn_thresh); |
287 | | double_VAR_H(tosp_flip_caution); |
288 | | double_VAR_H(tosp_large_kerning); |
289 | | double_VAR_H(tosp_dont_fool_with_small_kerns); |
290 | | double_VAR_H(tosp_near_lh_edge); |
291 | | double_VAR_H(tosp_silly_kn_sp_gap); |
292 | | double_VAR_H(tosp_pass_wide_fuzz_sp_to_context); |
293 | | // tordmain.cpp /////////////////////////////////////////// |
294 | | BOOL_VAR_H(textord_no_rejects); |
295 | | BOOL_VAR_H(textord_show_blobs); |
296 | | BOOL_VAR_H(textord_show_boxes); |
297 | | INT_VAR_H(textord_max_noise_size); |
298 | | INT_VAR_H(textord_baseline_debug); |
299 | | double_VAR_H(textord_noise_area_ratio); |
300 | | double_VAR_H(textord_initialx_ile); |
301 | | double_VAR_H(textord_initialasc_ile); |
302 | | INT_VAR_H(textord_noise_sizefraction); |
303 | | double_VAR_H(textord_noise_sizelimit); |
304 | | INT_VAR_H(textord_noise_translimit); |
305 | | double_VAR_H(textord_noise_normratio); |
306 | | BOOL_VAR_H(textord_noise_rejwords); |
307 | | BOOL_VAR_H(textord_noise_rejrows); |
308 | | double_VAR_H(textord_noise_syfract); |
309 | | double_VAR_H(textord_noise_sxfract); |
310 | | double_VAR_H(textord_noise_hfract); |
311 | | INT_VAR_H(textord_noise_sncount); |
312 | | double_VAR_H(textord_noise_rowratio); |
313 | | BOOL_VAR_H(textord_noise_debug); |
314 | | double_VAR_H(textord_blshift_maxshift); |
315 | | double_VAR_H(textord_blshift_xfraction); |
316 | | }; |
317 | | |
318 | | } // namespace tesseract |
319 | | |
320 | | #endif // TESSERACT_TEXTORD_TEXTORD_H_ |