/src/tesseract/src/textord/textord.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /////////////////////////////////////////////////////////////////////// |
2 | | // File: textord.cpp |
3 | | // Description: The top-level text line and word finding functionality. |
4 | | // Author: Ray Smith |
5 | | // Created: Fri Mar 13 14:43:01 PDT 2009 |
6 | | // |
7 | | // (C) Copyright 2009, Google Inc. |
8 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
9 | | // you may not use this file except in compliance with the License. |
10 | | // You may obtain a copy of the License at |
11 | | // http://www.apache.org/licenses/LICENSE-2.0 |
12 | | // Unless required by applicable law or agreed to in writing, software |
13 | | // distributed under the License is distributed on an "AS IS" BASIS, |
14 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
15 | | // See the License for the specific language governing permissions and |
16 | | // limitations under the License. |
17 | | // |
18 | | /////////////////////////////////////////////////////////////////////// |
19 | | |
20 | | // Include automatically generated configuration file if running autoconf. |
21 | | #ifdef HAVE_CONFIG_H |
22 | | # include "config_auto.h" |
23 | | #endif |
24 | | |
25 | | #include "baselinedetect.h" |
26 | | #include "drawtord.h" |
27 | | #include "makerow.h" |
28 | | #include "pageres.h" |
29 | | #include "textord.h" |
30 | | #include "tordmain.h" |
31 | | #include "wordseg.h" |
32 | | |
33 | | namespace tesseract { |
34 | | |
35 | | Textord::Textord(CCStruct *ccstruct) |
36 | 4 | : ccstruct_(ccstruct) |
37 | 4 | , use_cjk_fp_model_(false) |
38 | | , |
39 | | // makerow.cpp /////////////////////////////////////////// |
40 | 4 | BOOL_MEMBER(textord_single_height_mode, false, "Script has no xheight, so use a single mode", |
41 | | ccstruct_->params()) |
42 | | , |
43 | | // tospace.cpp /////////////////////////////////////////// |
44 | 4 | BOOL_MEMBER(tosp_old_to_method, false, "Space stats use prechopping?", ccstruct_->params()) |
45 | 4 | , BOOL_MEMBER(tosp_old_to_constrain_sp_kn, false, |
46 | | "Constrain relative values of inter and intra-word gaps for " |
47 | | "old_to_method.", |
48 | | ccstruct_->params()) |
49 | 4 | , BOOL_MEMBER(tosp_only_use_prop_rows, true, "Block stats to use fixed pitch rows?", |
50 | | ccstruct_->params()) |
51 | 4 | , BOOL_MEMBER(tosp_force_wordbreak_on_punct, false, |
52 | | "Force word breaks on punct to break long lines in non-space " |
53 | | "delimited langs", |
54 | | ccstruct_->params()) |
55 | 4 | , BOOL_MEMBER(tosp_use_pre_chopping, false, "Space stats use prechopping?", ccstruct_->params()) |
56 | 4 | , BOOL_MEMBER(tosp_old_to_bug_fix, false, "Fix suspected bug in old code", ccstruct_->params()) |
57 | 4 | , BOOL_MEMBER(tosp_block_use_cert_spaces, true, "Only stat OBVIOUS spaces", ccstruct_->params()) |
58 | 4 | , BOOL_MEMBER(tosp_row_use_cert_spaces, true, "Only stat OBVIOUS spaces", ccstruct_->params()) |
59 | 4 | , BOOL_MEMBER(tosp_narrow_blobs_not_cert, true, "Only stat OBVIOUS spaces", ccstruct_->params()) |
60 | 4 | , BOOL_MEMBER(tosp_row_use_cert_spaces1, true, "Only stat OBVIOUS spaces", ccstruct_->params()) |
61 | 4 | , BOOL_MEMBER(tosp_recovery_isolated_row_stats, true, |
62 | | "Use row alone when inadequate cert spaces", ccstruct_->params()) |
63 | 4 | , BOOL_MEMBER(tosp_only_small_gaps_for_kern, false, "Better guess", ccstruct_->params()) |
64 | 4 | , BOOL_MEMBER(tosp_all_flips_fuzzy, false, "Pass ANY flip to context?", ccstruct_->params()) |
65 | 4 | , BOOL_MEMBER(tosp_fuzzy_limit_all, true, "Don't restrict kn->sp fuzzy limit to tables", |
66 | | ccstruct_->params()) |
67 | 4 | , BOOL_MEMBER(tosp_stats_use_xht_gaps, true, "Use within xht gap for wd breaks", |
68 | | ccstruct_->params()) |
69 | 4 | , BOOL_MEMBER(tosp_use_xht_gaps, true, "Use within xht gap for wd breaks", ccstruct_->params()) |
70 | 4 | , BOOL_MEMBER(tosp_only_use_xht_gaps, false, "Only use within xht gap for wd breaks", |
71 | | ccstruct_->params()) |
72 | 4 | , BOOL_MEMBER(tosp_rule_9_test_punct, false, "Don't chng kn to space next to punct", |
73 | | ccstruct_->params()) |
74 | 4 | , BOOL_MEMBER(tosp_flip_fuzz_kn_to_sp, true, "Default flip", ccstruct_->params()) |
75 | 4 | , BOOL_MEMBER(tosp_flip_fuzz_sp_to_kn, true, "Default flip", ccstruct_->params()) |
76 | 4 | , BOOL_MEMBER(tosp_improve_thresh, false, "Enable improvement heuristic", ccstruct_->params()) |
77 | 4 | , INT_MEMBER(tosp_debug_level, 0, "Debug data", ccstruct_->params()) |
78 | 4 | , INT_MEMBER(tosp_enough_space_samples_for_median, 3, "or should we use mean", |
79 | | ccstruct_->params()) |
80 | 4 | , INT_MEMBER(tosp_redo_kern_limit, 10, "No.samples reqd to reestimate for row", |
81 | | ccstruct_->params()) |
82 | 4 | , INT_MEMBER(tosp_few_samples, 40, "No.gaps reqd with 1 large gap to treat as a table", |
83 | | ccstruct_->params()) |
84 | 4 | , INT_MEMBER(tosp_short_row, 20, "No.gaps reqd with few cert spaces to use certs", |
85 | | ccstruct_->params()) |
86 | 4 | , INT_MEMBER(tosp_sanity_method, 1, "How to avoid being silly", ccstruct_->params()) |
87 | 4 | , double_MEMBER(tosp_old_sp_kn_th_factor, 2.0, |
88 | | "Factor for defining space threshold in terms of space and " |
89 | | "kern sizes", |
90 | | ccstruct_->params()) |
91 | 4 | , double_MEMBER(tosp_threshold_bias1, 0, "how far between kern and space?", ccstruct_->params()) |
92 | 4 | , double_MEMBER(tosp_threshold_bias2, 0, "how far between kern and space?", ccstruct_->params()) |
93 | 4 | , double_MEMBER(tosp_narrow_fraction, 0.3, "Fract of xheight for narrow", ccstruct_->params()) |
94 | 4 | , double_MEMBER(tosp_narrow_aspect_ratio, 0.48, "narrow if w/h less than this", |
95 | | ccstruct_->params()) |
96 | 4 | , double_MEMBER(tosp_wide_fraction, 0.52, "Fract of xheight for wide", ccstruct_->params()) |
97 | 4 | , double_MEMBER(tosp_wide_aspect_ratio, 0.0, "wide if w/h less than this", ccstruct_->params()) |
98 | 4 | , double_MEMBER(tosp_fuzzy_space_factor, 0.6, "Fract of xheight for fuzz sp", |
99 | | ccstruct_->params()) |
100 | 4 | , double_MEMBER(tosp_fuzzy_space_factor1, 0.5, "Fract of xheight for fuzz sp", |
101 | | ccstruct_->params()) |
102 | 4 | , double_MEMBER(tosp_fuzzy_space_factor2, 0.72, "Fract of xheight for fuzz sp", |
103 | | ccstruct_->params()) |
104 | 4 | , double_MEMBER(tosp_gap_factor, 0.83, "gap ratio to flip sp->kern", ccstruct_->params()) |
105 | 4 | , double_MEMBER(tosp_kern_gap_factor1, 2.0, "gap ratio to flip kern->sp", ccstruct_->params()) |
106 | 4 | , double_MEMBER(tosp_kern_gap_factor2, 1.3, "gap ratio to flip kern->sp", ccstruct_->params()) |
107 | 4 | , double_MEMBER(tosp_kern_gap_factor3, 2.5, "gap ratio to flip kern->sp", ccstruct_->params()) |
108 | 4 | , double_MEMBER(tosp_ignore_big_gaps, -1, "xht multiplier", ccstruct_->params()) |
109 | 4 | , double_MEMBER(tosp_ignore_very_big_gaps, 3.5, "xht multiplier", ccstruct_->params()) |
110 | 4 | , double_MEMBER(tosp_rep_space, 1.6, "rep gap multiplier for space", ccstruct_->params()) |
111 | 4 | , double_MEMBER(tosp_enough_small_gaps, 0.65, "Fract of kerns reqd for isolated row stats", |
112 | | ccstruct_->params()) |
113 | 4 | , double_MEMBER(tosp_table_kn_sp_ratio, 2.25, "Min difference of kn & sp in table", |
114 | | ccstruct_->params()) |
115 | 4 | , double_MEMBER(tosp_table_xht_sp_ratio, 0.33, "Expect spaces bigger than this", |
116 | | ccstruct_->params()) |
117 | 4 | , double_MEMBER(tosp_table_fuzzy_kn_sp_ratio, 3.0, "Fuzzy if less than this", |
118 | | ccstruct_->params()) |
119 | 4 | , double_MEMBER(tosp_fuzzy_kn_fraction, 0.5, "New fuzzy kn alg", ccstruct_->params()) |
120 | 4 | , double_MEMBER(tosp_fuzzy_sp_fraction, 0.5, "New fuzzy sp alg", ccstruct_->params()) |
121 | 4 | , double_MEMBER(tosp_min_sane_kn_sp, 1.5, "Don't trust spaces less than this time kn", |
122 | | ccstruct_->params()) |
123 | 4 | , double_MEMBER(tosp_init_guess_kn_mult, 2.2, "Thresh guess - mult kn by this", |
124 | | ccstruct_->params()) |
125 | 4 | , double_MEMBER(tosp_init_guess_xht_mult, 0.28, "Thresh guess - mult xht by this", |
126 | | ccstruct_->params()) |
127 | 4 | , double_MEMBER(tosp_max_sane_kn_thresh, 5.0, "Multiplier on kn to limit thresh", |
128 | | ccstruct_->params()) |
129 | 4 | , double_MEMBER(tosp_flip_caution, 0.0, "Don't autoflip kn to sp when large separation", |
130 | | ccstruct_->params()) |
131 | 4 | , double_MEMBER(tosp_large_kerning, 0.19, "Limit use of xht gap with large kns", |
132 | | ccstruct_->params()) |
133 | 4 | , double_MEMBER(tosp_dont_fool_with_small_kerns, -1, "Limit use of xht gap with odd small kns", |
134 | | ccstruct_->params()) |
135 | 4 | , double_MEMBER(tosp_near_lh_edge, 0, "Don't reduce box if the top left is non blank", |
136 | | ccstruct_->params()) |
137 | 4 | , double_MEMBER(tosp_silly_kn_sp_gap, 0.2, "Don't let sp minus kn get too small", |
138 | | ccstruct_->params()) |
139 | 4 | , double_MEMBER(tosp_pass_wide_fuzz_sp_to_context, 0.75, "How wide fuzzies need context", |
140 | | ccstruct_->params()) |
141 | | , |
142 | | // tordmain.cpp /////////////////////////////////////////// |
143 | 4 | BOOL_MEMBER(textord_no_rejects, false, "Don't remove noise blobs", ccstruct_->params()) |
144 | 4 | , BOOL_MEMBER(textord_show_blobs, false, "Display unsorted blobs", ccstruct_->params()) |
145 | 4 | , BOOL_MEMBER(textord_show_boxes, false, "Display unsorted blobs", ccstruct_->params()) |
146 | 4 | , INT_MEMBER(textord_max_noise_size, 7, "Pixel size of noise", ccstruct_->params()) |
147 | 4 | , INT_MEMBER(textord_baseline_debug, 0, "Baseline debug level", ccstruct_->params()) |
148 | 4 | , double_MEMBER(textord_noise_area_ratio, 0.7, "Fraction of bounding box for noise", |
149 | | ccstruct_->params()) |
150 | 4 | , double_MEMBER(textord_initialx_ile, 0.75, "Ile of sizes for xheight guess", |
151 | | ccstruct_->params()) |
152 | 4 | , double_MEMBER(textord_initialasc_ile, 0.90, "Ile of sizes for xheight guess", |
153 | | ccstruct_->params()) |
154 | 4 | , INT_MEMBER(textord_noise_sizefraction, 10, "Fraction of size for maxima", ccstruct_->params()) |
155 | 4 | , double_MEMBER(textord_noise_sizelimit, 0.5, "Fraction of x for big t count", |
156 | | ccstruct_->params()) |
157 | 4 | , INT_MEMBER(textord_noise_translimit, 16, "Transitions for normal blob", ccstruct_->params()) |
158 | 4 | , double_MEMBER(textord_noise_normratio, 2.0, "Dot to norm ratio for deletion", |
159 | | ccstruct_->params()) |
160 | 4 | , BOOL_MEMBER(textord_noise_rejwords, true, "Reject noise-like words", ccstruct_->params()) |
161 | 4 | , BOOL_MEMBER(textord_noise_rejrows, true, "Reject noise-like rows", ccstruct_->params()) |
162 | 4 | , double_MEMBER(textord_noise_syfract, 0.2, "xh fract height error for norm blobs", |
163 | | ccstruct_->params()) |
164 | 4 | , double_MEMBER(textord_noise_sxfract, 0.4, "xh fract width error for norm blobs", |
165 | | ccstruct_->params()) |
166 | 4 | , double_MEMBER(textord_noise_hfract, 1.0 / 64, |
167 | | "Height fraction to discard outlines as speckle noise", ccstruct_->params()) |
168 | 4 | , INT_MEMBER(textord_noise_sncount, 1, "super norm blobs to save row", ccstruct_->params()) |
169 | 4 | , double_MEMBER(textord_noise_rowratio, 6.0, "Dot to norm ratio for deletion", |
170 | | ccstruct_->params()) |
171 | 4 | , BOOL_MEMBER(textord_noise_debug, false, "Debug row garbage detector", ccstruct_->params()) |
172 | 4 | , double_MEMBER(textord_blshift_maxshift, 0.00, "Max baseline shift", ccstruct_->params()) |
173 | 4 | , double_MEMBER(textord_blshift_xfraction, 9.99, "Min size of baseline shift", |
174 | 4 | ccstruct_->params()) {} |
175 | | |
176 | | // Make the textlines and words inside each block. |
177 | | void Textord::TextordPage(PageSegMode pageseg_mode, const FCOORD &reskew, int width, int height, |
178 | | Image binary_pix, Image thresholds_pix, Image grey_pix, bool use_box_bottoms, |
179 | | BLOBNBOX_LIST *diacritic_blobs, BLOCK_LIST *blocks, |
180 | 17.2k | TO_BLOCK_LIST *to_blocks, float *gradient) { |
181 | 17.2k | page_tr_.set_x(width); |
182 | 17.2k | page_tr_.set_y(height); |
183 | 17.2k | if (to_blocks->empty()) { |
184 | | // AutoPageSeg was not used, so we need to find_components first. |
185 | 17.2k | find_components(binary_pix, blocks, to_blocks); |
186 | 17.2k | TO_BLOCK_IT it(to_blocks); |
187 | 34.5k | for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { |
188 | 17.2k | TO_BLOCK *to_block = it.data(); |
189 | | // Compute the edge offsets whether or not there is a grey_pix. |
190 | | // We have by-passed auto page seg, so we have to run it here. |
191 | | // By page segmentation mode there is no non-text to avoid running on. |
192 | 17.2k | to_block->ComputeEdgeOffsets(thresholds_pix, grey_pix); |
193 | 17.2k | } |
194 | 17.2k | } else if (!PSM_SPARSE(pageseg_mode)) { |
195 | | // AutoPageSeg does not need to find_components as it did that already. |
196 | | // Filter_blobs sets up the TO_BLOCKs the same as find_components does. |
197 | 0 | filter_blobs(page_tr_, to_blocks, true); |
198 | 0 | } |
199 | | |
200 | 17.2k | ASSERT_HOST(!to_blocks->empty()); |
201 | 17.2k | if (pageseg_mode == PSM_SINGLE_BLOCK_VERT_TEXT) { |
202 | 0 | const FCOORD anticlockwise90(0.0f, 1.0f); |
203 | 0 | const FCOORD clockwise90(0.0f, -1.0f); |
204 | 0 | TO_BLOCK_IT it(to_blocks); |
205 | 0 | for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { |
206 | 0 | TO_BLOCK *to_block = it.data(); |
207 | 0 | BLOCK *block = to_block->block; |
208 | | // Create a fake poly_block in block from its bounding box. |
209 | 0 | block->pdblk.set_poly_block(new POLY_BLOCK(block->pdblk.bounding_box(), PT_VERTICAL_TEXT)); |
210 | | // Rotate the to_block along with its contained block and blobnbox lists. |
211 | 0 | to_block->rotate(anticlockwise90); |
212 | | // Set the block's rotation values to obey the convention followed in |
213 | | // layout analysis for vertical text. |
214 | 0 | block->set_re_rotation(clockwise90); |
215 | 0 | block->set_classify_rotation(clockwise90); |
216 | 0 | } |
217 | 0 | } |
218 | | |
219 | 17.2k | TO_BLOCK_IT to_block_it(to_blocks); |
220 | 17.2k | TO_BLOCK *to_block = to_block_it.data(); |
221 | | // Make the rows in the block. |
222 | | // Do it the old fashioned way. |
223 | 17.2k | if (PSM_LINE_FIND_ENABLED(pageseg_mode)) { |
224 | 17.2k | *gradient = make_rows(page_tr_, to_blocks); |
225 | 17.2k | } else if (!PSM_SPARSE(pageseg_mode)) { |
226 | | // RAW_LINE, SINGLE_LINE, SINGLE_WORD and SINGLE_CHAR all need a single row. |
227 | 0 | *gradient = make_single_row(page_tr_, pageseg_mode != PSM_RAW_LINE, to_block, to_blocks); |
228 | 0 | } else { |
229 | 0 | *gradient = 0.0f; |
230 | 0 | } |
231 | 17.2k | BaselineDetect baseline_detector(textord_baseline_debug, reskew, to_blocks); |
232 | 17.2k | baseline_detector.ComputeStraightBaselines(use_box_bottoms); |
233 | 17.2k | baseline_detector.ComputeBaselineSplinesAndXheights( |
234 | 17.2k | page_tr_, pageseg_mode != PSM_RAW_LINE, textord_heavy_nr, textord_show_final_rows, this); |
235 | | // Now make the words in the lines. |
236 | 17.2k | if (PSM_WORD_FIND_ENABLED(pageseg_mode)) { |
237 | | // SINGLE_LINE uses the old word maker on the single line. |
238 | 17.2k | make_words(this, page_tr_, *gradient, blocks, to_blocks); |
239 | 17.2k | } else { |
240 | | // SINGLE_WORD and SINGLE_CHAR cram all the blobs into a |
241 | | // single word, and in SINGLE_CHAR mode, all the outlines |
242 | | // go in a single blob. |
243 | 0 | TO_BLOCK *to_block = to_block_it.data(); |
244 | 0 | make_single_word(pageseg_mode == PSM_SINGLE_CHAR, to_block->get_rows(), |
245 | 0 | to_block->block->row_list()); |
246 | 0 | } |
247 | | // Remove empties. |
248 | 17.2k | cleanup_blocks(PSM_WORD_FIND_ENABLED(pageseg_mode), blocks); |
249 | 17.2k | TransferDiacriticsToBlockGroups(diacritic_blobs, blocks); |
250 | | // Compute the margins for each row in the block, to be used later for |
251 | | // paragraph detection. |
252 | 17.2k | BLOCK_IT b_it(blocks); |
253 | 32.7k | for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { |
254 | 15.4k | b_it.data()->compute_row_margins(); |
255 | 15.4k | } |
256 | | #ifndef GRAPHICS_DISABLED |
257 | | close_to_win(); |
258 | | #endif |
259 | 17.2k | } |
260 | | |
261 | | // If we were supposed to return only a single textline, and there is more |
262 | | // than one, clean up and leave only the best. |
263 | 15.4k | void Textord::CleanupSingleRowResult(PageSegMode pageseg_mode, PAGE_RES *page_res) { |
264 | 15.4k | if (PSM_LINE_FIND_ENABLED(pageseg_mode) || PSM_SPARSE(pageseg_mode)) { |
265 | 15.4k | return; // No cleanup required. |
266 | 15.4k | } |
267 | 0 | PAGE_RES_IT it(page_res); |
268 | | // Find the best row, being the greatest mean word conf. |
269 | 0 | float row_total_conf = 0.0f; |
270 | 0 | int row_word_count = 0; |
271 | 0 | ROW_RES *best_row = nullptr; |
272 | 0 | float best_conf = 0.0f; |
273 | 0 | for (it.restart_page(); it.word() != nullptr; it.forward()) { |
274 | 0 | WERD_RES *word = it.word(); |
275 | 0 | row_total_conf += word->best_choice->certainty(); |
276 | 0 | ++row_word_count; |
277 | 0 | if (it.next_row() != it.row()) { |
278 | 0 | row_total_conf /= row_word_count; |
279 | 0 | if (best_row == nullptr || best_conf < row_total_conf) { |
280 | 0 | best_row = it.row(); |
281 | 0 | best_conf = row_total_conf; |
282 | 0 | } |
283 | 0 | row_total_conf = 0.0f; |
284 | 0 | row_word_count = 0; |
285 | 0 | } |
286 | 0 | } |
287 | | // Now eliminate any word not in the best row. |
288 | 0 | for (it.restart_page(); it.word() != nullptr; it.forward()) { |
289 | 0 | if (it.row() != best_row) { |
290 | 0 | it.DeleteCurrentWord(); |
291 | 0 | } |
292 | 0 | } |
293 | 0 | } |
294 | | |
295 | | } // namespace tesseract. |