Coverage Report

Created: 2025-07-23 07:12

/src/tesseract/src/textord/textord.cpp
Line
Count
Source (jump to first uncovered line)
1
///////////////////////////////////////////////////////////////////////
2
// File:        textord.cpp
3
// Description: The top-level text line and word finding functionality.
4
// Author:      Ray Smith
5
// Created:     Fri Mar 13 14:43:01 PDT 2009
6
//
7
// (C) Copyright 2009, Google Inc.
8
// Licensed under the Apache License, Version 2.0 (the "License");
9
// you may not use this file except in compliance with the License.
10
// You may obtain a copy of the License at
11
// http://www.apache.org/licenses/LICENSE-2.0
12
// Unless required by applicable law or agreed to in writing, software
13
// distributed under the License is distributed on an "AS IS" BASIS,
14
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
// See the License for the specific language governing permissions and
16
// limitations under the License.
17
//
18
///////////////////////////////////////////////////////////////////////
19
20
// Include automatically generated configuration file if running autoconf.
21
#ifdef HAVE_CONFIG_H
22
#  include "config_auto.h"
23
#endif
24
25
#include "baselinedetect.h"
26
#include "drawtord.h"
27
#include "makerow.h"
28
#include "pageres.h"
29
#include "textord.h"
30
#include "tordmain.h"
31
#include "wordseg.h"
32
33
namespace tesseract {
34
35
Textord::Textord(CCStruct *ccstruct)
36
4
    : ccstruct_(ccstruct)
37
4
    , use_cjk_fp_model_(false)
38
    ,
39
    // makerow.cpp ///////////////////////////////////////////
40
4
    BOOL_MEMBER(textord_single_height_mode, false, "Script has no xheight, so use a single mode",
41
                ccstruct_->params())
42
    ,
43
    // tospace.cpp ///////////////////////////////////////////
44
4
    BOOL_MEMBER(tosp_old_to_method, false, "Space stats use prechopping?", ccstruct_->params())
45
4
    , BOOL_MEMBER(tosp_old_to_constrain_sp_kn, false,
46
                  "Constrain relative values of inter and intra-word gaps for "
47
                  "old_to_method.",
48
                  ccstruct_->params())
49
4
    , BOOL_MEMBER(tosp_only_use_prop_rows, true, "Block stats to use fixed pitch rows?",
50
                  ccstruct_->params())
51
4
    , BOOL_MEMBER(tosp_force_wordbreak_on_punct, false,
52
                  "Force word breaks on punct to break long lines in non-space "
53
                  "delimited langs",
54
                  ccstruct_->params())
55
4
    , BOOL_MEMBER(tosp_use_pre_chopping, false, "Space stats use prechopping?", ccstruct_->params())
56
4
    , BOOL_MEMBER(tosp_old_to_bug_fix, false, "Fix suspected bug in old code", ccstruct_->params())
57
4
    , BOOL_MEMBER(tosp_block_use_cert_spaces, true, "Only stat OBVIOUS spaces", ccstruct_->params())
58
4
    , BOOL_MEMBER(tosp_row_use_cert_spaces, true, "Only stat OBVIOUS spaces", ccstruct_->params())
59
4
    , BOOL_MEMBER(tosp_narrow_blobs_not_cert, true, "Only stat OBVIOUS spaces", ccstruct_->params())
60
4
    , BOOL_MEMBER(tosp_row_use_cert_spaces1, true, "Only stat OBVIOUS spaces", ccstruct_->params())
61
4
    , BOOL_MEMBER(tosp_recovery_isolated_row_stats, true,
62
                  "Use row alone when inadequate cert spaces", ccstruct_->params())
63
4
    , BOOL_MEMBER(tosp_only_small_gaps_for_kern, false, "Better guess", ccstruct_->params())
64
4
    , BOOL_MEMBER(tosp_all_flips_fuzzy, false, "Pass ANY flip to context?", ccstruct_->params())
65
4
    , BOOL_MEMBER(tosp_fuzzy_limit_all, true, "Don't restrict kn->sp fuzzy limit to tables",
66
                  ccstruct_->params())
67
4
    , BOOL_MEMBER(tosp_stats_use_xht_gaps, true, "Use within xht gap for wd breaks",
68
                  ccstruct_->params())
69
4
    , BOOL_MEMBER(tosp_use_xht_gaps, true, "Use within xht gap for wd breaks", ccstruct_->params())
70
4
    , BOOL_MEMBER(tosp_only_use_xht_gaps, false, "Only use within xht gap for wd breaks",
71
                  ccstruct_->params())
72
4
    , BOOL_MEMBER(tosp_rule_9_test_punct, false, "Don't chng kn to space next to punct",
73
                  ccstruct_->params())
74
4
    , BOOL_MEMBER(tosp_flip_fuzz_kn_to_sp, true, "Default flip", ccstruct_->params())
75
4
    , BOOL_MEMBER(tosp_flip_fuzz_sp_to_kn, true, "Default flip", ccstruct_->params())
76
4
    , BOOL_MEMBER(tosp_improve_thresh, false, "Enable improvement heuristic", ccstruct_->params())
77
4
    , INT_MEMBER(tosp_debug_level, 0, "Debug data", ccstruct_->params())
78
4
    , INT_MEMBER(tosp_enough_space_samples_for_median, 3, "or should we use mean",
79
                 ccstruct_->params())
80
4
    , INT_MEMBER(tosp_redo_kern_limit, 10, "No.samples reqd to reestimate for row",
81
                 ccstruct_->params())
82
4
    , INT_MEMBER(tosp_few_samples, 40, "No.gaps reqd with 1 large gap to treat as a table",
83
                 ccstruct_->params())
84
4
    , INT_MEMBER(tosp_short_row, 20, "No.gaps reqd with few cert spaces to use certs",
85
                 ccstruct_->params())
86
4
    , INT_MEMBER(tosp_sanity_method, 1, "How to avoid being silly", ccstruct_->params())
87
4
    , double_MEMBER(tosp_old_sp_kn_th_factor, 2.0,
88
                    "Factor for defining space threshold in terms of space and "
89
                    "kern sizes",
90
                    ccstruct_->params())
91
4
    , double_MEMBER(tosp_threshold_bias1, 0, "how far between kern and space?", ccstruct_->params())
92
4
    , double_MEMBER(tosp_threshold_bias2, 0, "how far between kern and space?", ccstruct_->params())
93
4
    , double_MEMBER(tosp_narrow_fraction, 0.3, "Fract of xheight for narrow", ccstruct_->params())
94
4
    , double_MEMBER(tosp_narrow_aspect_ratio, 0.48, "narrow if w/h less than this",
95
                    ccstruct_->params())
96
4
    , double_MEMBER(tosp_wide_fraction, 0.52, "Fract of xheight for wide", ccstruct_->params())
97
4
    , double_MEMBER(tosp_wide_aspect_ratio, 0.0, "wide if w/h less than this", ccstruct_->params())
98
4
    , double_MEMBER(tosp_fuzzy_space_factor, 0.6, "Fract of xheight for fuzz sp",
99
                    ccstruct_->params())
100
4
    , double_MEMBER(tosp_fuzzy_space_factor1, 0.5, "Fract of xheight for fuzz sp",
101
                    ccstruct_->params())
102
4
    , double_MEMBER(tosp_fuzzy_space_factor2, 0.72, "Fract of xheight for fuzz sp",
103
                    ccstruct_->params())
104
4
    , double_MEMBER(tosp_gap_factor, 0.83, "gap ratio to flip sp->kern", ccstruct_->params())
105
4
    , double_MEMBER(tosp_kern_gap_factor1, 2.0, "gap ratio to flip kern->sp", ccstruct_->params())
106
4
    , double_MEMBER(tosp_kern_gap_factor2, 1.3, "gap ratio to flip kern->sp", ccstruct_->params())
107
4
    , double_MEMBER(tosp_kern_gap_factor3, 2.5, "gap ratio to flip kern->sp", ccstruct_->params())
108
4
    , double_MEMBER(tosp_ignore_big_gaps, -1, "xht multiplier", ccstruct_->params())
109
4
    , double_MEMBER(tosp_ignore_very_big_gaps, 3.5, "xht multiplier", ccstruct_->params())
110
4
    , double_MEMBER(tosp_rep_space, 1.6, "rep gap multiplier for space", ccstruct_->params())
111
4
    , double_MEMBER(tosp_enough_small_gaps, 0.65, "Fract of kerns reqd for isolated row stats",
112
                    ccstruct_->params())
113
4
    , double_MEMBER(tosp_table_kn_sp_ratio, 2.25, "Min difference of kn & sp in table",
114
                    ccstruct_->params())
115
4
    , double_MEMBER(tosp_table_xht_sp_ratio, 0.33, "Expect spaces bigger than this",
116
                    ccstruct_->params())
117
4
    , double_MEMBER(tosp_table_fuzzy_kn_sp_ratio, 3.0, "Fuzzy if less than this",
118
                    ccstruct_->params())
119
4
    , double_MEMBER(tosp_fuzzy_kn_fraction, 0.5, "New fuzzy kn alg", ccstruct_->params())
120
4
    , double_MEMBER(tosp_fuzzy_sp_fraction, 0.5, "New fuzzy sp alg", ccstruct_->params())
121
4
    , double_MEMBER(tosp_min_sane_kn_sp, 1.5, "Don't trust spaces less than this time kn",
122
                    ccstruct_->params())
123
4
    , double_MEMBER(tosp_init_guess_kn_mult, 2.2, "Thresh guess - mult kn by this",
124
                    ccstruct_->params())
125
4
    , double_MEMBER(tosp_init_guess_xht_mult, 0.28, "Thresh guess - mult xht by this",
126
                    ccstruct_->params())
127
4
    , double_MEMBER(tosp_max_sane_kn_thresh, 5.0, "Multiplier on kn to limit thresh",
128
                    ccstruct_->params())
129
4
    , double_MEMBER(tosp_flip_caution, 0.0, "Don't autoflip kn to sp when large separation",
130
                    ccstruct_->params())
131
4
    , double_MEMBER(tosp_large_kerning, 0.19, "Limit use of xht gap with large kns",
132
                    ccstruct_->params())
133
4
    , double_MEMBER(tosp_dont_fool_with_small_kerns, -1, "Limit use of xht gap with odd small kns",
134
                    ccstruct_->params())
135
4
    , double_MEMBER(tosp_near_lh_edge, 0, "Don't reduce box if the top left is non blank",
136
                    ccstruct_->params())
137
4
    , double_MEMBER(tosp_silly_kn_sp_gap, 0.2, "Don't let sp minus kn get too small",
138
                    ccstruct_->params())
139
4
    , double_MEMBER(tosp_pass_wide_fuzz_sp_to_context, 0.75, "How wide fuzzies need context",
140
                    ccstruct_->params())
141
    ,
142
    // tordmain.cpp ///////////////////////////////////////////
143
4
    BOOL_MEMBER(textord_no_rejects, false, "Don't remove noise blobs", ccstruct_->params())
144
4
    , BOOL_MEMBER(textord_show_blobs, false, "Display unsorted blobs", ccstruct_->params())
145
4
    , BOOL_MEMBER(textord_show_boxes, false, "Display unsorted blobs", ccstruct_->params())
146
4
    , INT_MEMBER(textord_max_noise_size, 7, "Pixel size of noise", ccstruct_->params())
147
4
    , INT_MEMBER(textord_baseline_debug, 0, "Baseline debug level", ccstruct_->params())
148
4
    , double_MEMBER(textord_noise_area_ratio, 0.7, "Fraction of bounding box for noise",
149
                    ccstruct_->params())
150
4
    , double_MEMBER(textord_initialx_ile, 0.75, "Ile of sizes for xheight guess",
151
                    ccstruct_->params())
152
4
    , double_MEMBER(textord_initialasc_ile, 0.90, "Ile of sizes for xheight guess",
153
                    ccstruct_->params())
154
4
    , INT_MEMBER(textord_noise_sizefraction, 10, "Fraction of size for maxima", ccstruct_->params())
155
4
    , double_MEMBER(textord_noise_sizelimit, 0.5, "Fraction of x for big t count",
156
                    ccstruct_->params())
157
4
    , INT_MEMBER(textord_noise_translimit, 16, "Transitions for normal blob", ccstruct_->params())
158
4
    , double_MEMBER(textord_noise_normratio, 2.0, "Dot to norm ratio for deletion",
159
                    ccstruct_->params())
160
4
    , BOOL_MEMBER(textord_noise_rejwords, true, "Reject noise-like words", ccstruct_->params())
161
4
    , BOOL_MEMBER(textord_noise_rejrows, true, "Reject noise-like rows", ccstruct_->params())
162
4
    , double_MEMBER(textord_noise_syfract, 0.2, "xh fract height error for norm blobs",
163
                    ccstruct_->params())
164
4
    , double_MEMBER(textord_noise_sxfract, 0.4, "xh fract width error for norm blobs",
165
                    ccstruct_->params())
166
4
    , double_MEMBER(textord_noise_hfract, 1.0 / 64,
167
                    "Height fraction to discard outlines as speckle noise", ccstruct_->params())
168
4
    , INT_MEMBER(textord_noise_sncount, 1, "super norm blobs to save row", ccstruct_->params())
169
4
    , double_MEMBER(textord_noise_rowratio, 6.0, "Dot to norm ratio for deletion",
170
                    ccstruct_->params())
171
4
    , BOOL_MEMBER(textord_noise_debug, false, "Debug row garbage detector", ccstruct_->params())
172
4
    , double_MEMBER(textord_blshift_maxshift, 0.00, "Max baseline shift", ccstruct_->params())
173
4
    , double_MEMBER(textord_blshift_xfraction, 9.99, "Min size of baseline shift",
174
4
                    ccstruct_->params()) {}
175
176
// Make the textlines and words inside each block.
177
void Textord::TextordPage(PageSegMode pageseg_mode, const FCOORD &reskew, int width, int height,
178
                          Image binary_pix, Image thresholds_pix, Image grey_pix, bool use_box_bottoms,
179
                          BLOBNBOX_LIST *diacritic_blobs, BLOCK_LIST *blocks,
180
17.2k
                          TO_BLOCK_LIST *to_blocks, float *gradient) {
181
17.2k
  page_tr_.set_x(width);
182
17.2k
  page_tr_.set_y(height);
183
17.2k
  if (to_blocks->empty()) {
184
    // AutoPageSeg was not used, so we need to find_components first.
185
17.2k
    find_components(binary_pix, blocks, to_blocks);
186
17.2k
    TO_BLOCK_IT it(to_blocks);
187
34.5k
    for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
188
17.2k
      TO_BLOCK *to_block = it.data();
189
      // Compute the edge offsets whether or not there is a grey_pix.
190
      // We have by-passed auto page seg, so we have to run it here.
191
      // By page segmentation mode there is no non-text to avoid running on.
192
17.2k
      to_block->ComputeEdgeOffsets(thresholds_pix, grey_pix);
193
17.2k
    }
194
17.2k
  } else if (!PSM_SPARSE(pageseg_mode)) {
195
    // AutoPageSeg does not need to find_components as it did that already.
196
    // Filter_blobs sets up the TO_BLOCKs the same as find_components does.
197
0
    filter_blobs(page_tr_, to_blocks, true);
198
0
  }
199
200
17.2k
  ASSERT_HOST(!to_blocks->empty());
201
17.2k
  if (pageseg_mode == PSM_SINGLE_BLOCK_VERT_TEXT) {
202
0
    const FCOORD anticlockwise90(0.0f, 1.0f);
203
0
    const FCOORD clockwise90(0.0f, -1.0f);
204
0
    TO_BLOCK_IT it(to_blocks);
205
0
    for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
206
0
      TO_BLOCK *to_block = it.data();
207
0
      BLOCK *block = to_block->block;
208
      // Create a fake poly_block in block from its bounding box.
209
0
      block->pdblk.set_poly_block(new POLY_BLOCK(block->pdblk.bounding_box(), PT_VERTICAL_TEXT));
210
      // Rotate the to_block along with its contained block and blobnbox lists.
211
0
      to_block->rotate(anticlockwise90);
212
      // Set the block's rotation values to obey the convention followed in
213
      // layout analysis for vertical text.
214
0
      block->set_re_rotation(clockwise90);
215
0
      block->set_classify_rotation(clockwise90);
216
0
    }
217
0
  }
218
219
17.2k
  TO_BLOCK_IT to_block_it(to_blocks);
220
17.2k
  TO_BLOCK *to_block = to_block_it.data();
221
  // Make the rows in the block.
222
  // Do it the old fashioned way.
223
17.2k
  if (PSM_LINE_FIND_ENABLED(pageseg_mode)) {
224
17.2k
    *gradient = make_rows(page_tr_, to_blocks);
225
17.2k
  } else if (!PSM_SPARSE(pageseg_mode)) {
226
    // RAW_LINE, SINGLE_LINE, SINGLE_WORD and SINGLE_CHAR all need a single row.
227
0
    *gradient = make_single_row(page_tr_, pageseg_mode != PSM_RAW_LINE, to_block, to_blocks);
228
0
  } else {
229
0
    *gradient = 0.0f;
230
0
  }
231
17.2k
  BaselineDetect baseline_detector(textord_baseline_debug, reskew, to_blocks);
232
17.2k
  baseline_detector.ComputeStraightBaselines(use_box_bottoms);
233
17.2k
  baseline_detector.ComputeBaselineSplinesAndXheights(
234
17.2k
      page_tr_, pageseg_mode != PSM_RAW_LINE, textord_heavy_nr, textord_show_final_rows, this);
235
  // Now make the words in the lines.
236
17.2k
  if (PSM_WORD_FIND_ENABLED(pageseg_mode)) {
237
    // SINGLE_LINE uses the old word maker on the single line.
238
17.2k
    make_words(this, page_tr_, *gradient, blocks, to_blocks);
239
17.2k
  } else {
240
    // SINGLE_WORD and SINGLE_CHAR cram all the blobs into a
241
    // single word, and in SINGLE_CHAR mode, all the outlines
242
    // go in a single blob.
243
0
    TO_BLOCK *to_block = to_block_it.data();
244
0
    make_single_word(pageseg_mode == PSM_SINGLE_CHAR, to_block->get_rows(),
245
0
                     to_block->block->row_list());
246
0
  }
247
  // Remove empties.
248
17.2k
  cleanup_blocks(PSM_WORD_FIND_ENABLED(pageseg_mode), blocks);
249
17.2k
  TransferDiacriticsToBlockGroups(diacritic_blobs, blocks);
250
  // Compute the margins for each row in the block, to be used later for
251
  // paragraph detection.
252
17.2k
  BLOCK_IT b_it(blocks);
253
32.7k
  for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
254
15.4k
    b_it.data()->compute_row_margins();
255
15.4k
  }
256
#ifndef GRAPHICS_DISABLED
257
  close_to_win();
258
#endif
259
17.2k
}
260
261
// If we were supposed to return only a single textline, and there is more
262
// than one, clean up and leave only the best.
263
15.4k
void Textord::CleanupSingleRowResult(PageSegMode pageseg_mode, PAGE_RES *page_res) {
264
15.4k
  if (PSM_LINE_FIND_ENABLED(pageseg_mode) || PSM_SPARSE(pageseg_mode)) {
265
15.4k
    return; // No cleanup required.
266
15.4k
  }
267
0
  PAGE_RES_IT it(page_res);
268
  // Find the best row, being the greatest mean word conf.
269
0
  float row_total_conf = 0.0f;
270
0
  int row_word_count = 0;
271
0
  ROW_RES *best_row = nullptr;
272
0
  float best_conf = 0.0f;
273
0
  for (it.restart_page(); it.word() != nullptr; it.forward()) {
274
0
    WERD_RES *word = it.word();
275
0
    row_total_conf += word->best_choice->certainty();
276
0
    ++row_word_count;
277
0
    if (it.next_row() != it.row()) {
278
0
      row_total_conf /= row_word_count;
279
0
      if (best_row == nullptr || best_conf < row_total_conf) {
280
0
        best_row = it.row();
281
0
        best_conf = row_total_conf;
282
0
      }
283
0
      row_total_conf = 0.0f;
284
0
      row_word_count = 0;
285
0
    }
286
0
  }
287
  // Now eliminate any word not in the best row.
288
0
  for (it.restart_page(); it.word() != nullptr; it.forward()) {
289
0
    if (it.row() != best_row) {
290
0
      it.DeleteCurrentWord();
291
0
    }
292
0
  }
293
0
}
294
295
} // namespace tesseract.