Coverage Report

Created: 2025-06-13 07:15

/src/tesseract/src/textord/textord.h
Line
Count
Source (jump to first uncovered line)
1
///////////////////////////////////////////////////////////////////////
2
// File:        textord.h
3
// Description: The Textord class definition gathers text line and word
4
//              finding functionality.
5
// Author:      Ray Smith
6
// Created:     Fri Mar 13 14:29:01 PDT 2009
7
//
8
// (C) Copyright 2009, Google Inc.
9
// Licensed under the Apache License, Version 2.0 (the "License");
10
// you may not use this file except in compliance with the License.
11
// You may obtain a copy of the License at
12
// http://www.apache.org/licenses/LICENSE-2.0
13
// Unless required by applicable law or agreed to in writing, software
14
// distributed under the License is distributed on an "AS IS" BASIS,
15
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
// See the License for the specific language governing permissions and
17
// limitations under the License.
18
//
19
///////////////////////////////////////////////////////////////////////
20
21
#ifndef TESSERACT_TEXTORD_TEXTORD_H_
22
#define TESSERACT_TEXTORD_TEXTORD_H_
23
24
#include "bbgrid.h"
25
#include "blobbox.h"
26
#include "ccstruct.h"
27
#include "gap_map.h"
28
29
#include <tesseract/publictypes.h> // For PageSegMode.
30
31
namespace tesseract {
32
33
class FCOORD;
34
class BLOCK_LIST;
35
class PAGE_RES;
36
class TO_BLOCK;
37
class TO_BLOCK_LIST;
38
class ScrollView;
39
40
// A simple class that can be used by BBGrid to hold a word and an expanded
41
// bounding box that makes it easy to find words to put diacritics.
42
class WordWithBox {
43
public:
44
0
  WordWithBox() : word_(nullptr) {}
45
206k
  explicit WordWithBox(WERD *word) : word_(word), bounding_box_(word->bounding_box()) {
46
206k
    int height = bounding_box_.height();
47
206k
    bounding_box_.pad(height, height);
48
206k
  }
49
50
27.1M
  const TBOX &bounding_box() const {
51
27.1M
    return bounding_box_;
52
27.1M
  }
53
  // Returns the bounding box of only the good blobs.
54
0
  TBOX true_bounding_box() const {
55
0
    return word_->true_bounding_box();
56
0
  }
57
0
  C_BLOB_LIST *RejBlobs() const {
58
0
    return word_->rej_cblob_list();
59
0
  }
60
0
  const WERD *word() const {
61
0
    return word_;
62
0
  }
63
64
private:
65
  // Borrowed pointer to a real word somewhere that must outlive this class.
66
  WERD *word_;
67
  // Cached expanded bounding box of the word, padded all round by its height.
68
  TBOX bounding_box_;
69
};
70
71
// Make it usable by BBGrid.
72
CLISTIZEH(WordWithBox)
73
using WordGrid = BBGrid<WordWithBox, WordWithBox_CLIST, WordWithBox_C_IT>;
74
using WordSearch = GridSearch<WordWithBox, WordWithBox_CLIST, WordWithBox_C_IT>;
75
76
class Textord {
77
public:
78
  explicit Textord(CCStruct *ccstruct);
79
0
  ~Textord() = default;
80
81
  // Make the textlines and words inside each block.
82
  // binary_pix is mandatory and is the binarized input after line removal.
83
  // grey_pix is optional, but if present must match the binary_pix in size,
84
  // and must be a *real* grey image instead of binary_pix * 255.
85
  // thresholds_pix is expected to be present iff grey_pix is present and
86
  // can be an integer factor reduction of the grey_pix. It represents the
87
  // thresholds that were used to create the binary_pix from the grey_pix.
88
  // diacritic_blobs contain small confusing components that should be added
89
  // to the appropriate word(s) in case they are really diacritics.
90
  void TextordPage(PageSegMode pageseg_mode, const FCOORD &reskew, int width, int height,
91
                   Image binary_pix, Image thresholds_pix, Image grey_pix, bool use_box_bottoms,
92
                   BLOBNBOX_LIST *diacritic_blobs, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks,
93
                   float *gradient);
94
95
  // If we were supposed to return only a single textline, and there is more
96
  // than one, clean up and leave only the best.
97
  void CleanupSingleRowResult(PageSegMode pageseg_mode, PAGE_RES *page_res);
98
99
15.4k
  bool use_cjk_fp_model() const {
100
15.4k
    return use_cjk_fp_model_;
101
15.4k
  }
102
15.4k
  void set_use_cjk_fp_model(bool flag) {
103
15.4k
    use_cjk_fp_model_ = flag;
104
15.4k
  }
105
106
  // tospace.cpp ///////////////////////////////////////////
107
  void to_spacing(ICOORD page_tr,       // topright of page
108
                  TO_BLOCK_LIST *blocks // blocks on page
109
  );
110
  ROW *make_prop_words(TO_ROW *row,    // row to make
111
                       FCOORD rotation // for drawing
112
  );
113
  ROW *make_blob_words(TO_ROW *row,    // row to make
114
                       FCOORD rotation // for drawing
115
  );
116
  // tordmain.cpp ///////////////////////////////////////////
117
  void find_components(Image pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks);
118
  void filter_blobs(ICOORD page_tr, TO_BLOCK_LIST *blocks, bool testing_on);
119
120
private:
121
  // For underlying memory management and other utilities.
122
  CCStruct *ccstruct_;
123
124
  // The size of the input image.
125
  ICOORD page_tr_;
126
127
  bool use_cjk_fp_model_;
128
129
  // makerow.cpp ///////////////////////////////////////////
130
  // Make the textlines inside each block.
131
  void MakeRows(PageSegMode pageseg_mode, const FCOORD &skew, int width, int height,
132
                TO_BLOCK_LIST *to_blocks);
133
  // Make the textlines inside a single block.
134
  void MakeBlockRows(int min_spacing, int max_spacing, const FCOORD &skew, TO_BLOCK *block,
135
                     ScrollView *win);
136
137
public:
138
  void compute_block_xheight(TO_BLOCK *block, float gradient);
139
  void compute_row_xheight(TO_ROW *row, // row to do
140
                           const FCOORD &rotation,
141
                           float gradient, // global skew
142
                           int block_line_size);
143
  void make_spline_rows(TO_BLOCK *block, // block to do
144
                        float gradient,  // gradient to fit
145
                        bool testing_on);
146
147
private:
148
  //// oldbasel.cpp ////////////////////////////////////////
149
  void make_old_baselines(TO_BLOCK *block, // block to do
150
                          bool testing_on, // correct orientation
151
                          float gradient);
152
  void correlate_lines(TO_BLOCK *block, float gradient);
153
  void correlate_neighbours(TO_BLOCK *block, // block rows are in.
154
                            TO_ROW **rows,   // rows of block.
155
                            int rowcount);   // no of rows to do.
156
  int correlate_with_stats(TO_ROW **rows,    // rows of block.
157
                           int rowcount,     // no of rows to do.
158
                           TO_BLOCK *block);
159
  void find_textlines(TO_BLOCK *block,  // block row is in
160
                      TO_ROW *row,      // row to do
161
                      int degree,       // required approximation
162
                      QSPLINE *spline); // starting spline
163
  // tospace.cpp ///////////////////////////////////////////
164
  // DEBUG USE ONLY
165
  void block_spacing_stats(TO_BLOCK *block, GAPMAP *gapmap, bool &old_text_ord_proportional,
166
                           // resulting estimate
167
                           int16_t &block_space_gap_width,
168
                           // resulting estimate
169
                           int16_t &block_non_space_gap_width);
170
  void row_spacing_stats(TO_ROW *row, GAPMAP *gapmap, int16_t block_idx, int16_t row_idx,
171
                         // estimate for block
172
                         int16_t block_space_gap_width,
173
                         // estimate for block
174
                         int16_t block_non_space_gap_width);
175
  void old_to_method(TO_ROW *row, STATS *all_gap_stats, STATS *space_gap_stats,
176
                     STATS *small_gap_stats, int16_t block_space_gap_width,
177
                     // estimate for block
178
                     int16_t block_non_space_gap_width);
179
  bool isolated_row_stats(TO_ROW *row, GAPMAP *gapmap, STATS *all_gap_stats, bool suspected_table,
180
                          int16_t block_idx, int16_t row_idx);
181
  int16_t stats_count_under(STATS *stats, int16_t threshold);
182
  void improve_row_threshold(TO_ROW *row, STATS *all_gap_stats);
183
  bool make_a_word_break(TO_ROW *row,   // row being made
184
                         TBOX blob_box, // for next_blob // how many blanks?
185
                         int16_t prev_gap, TBOX prev_blob_box, int16_t real_current_gap,
186
                         int16_t within_xht_current_gap, TBOX next_blob_box, int16_t next_gap,
187
                         uint8_t &blanks, bool &fuzzy_sp, bool &fuzzy_non,
188
                         bool &prev_gap_was_a_space, bool &break_at_next_gap);
189
  bool narrow_blob(TO_ROW *row, TBOX blob_box);
190
  bool wide_blob(TO_ROW *row, TBOX blob_box);
191
  bool suspected_punct_blob(TO_ROW *row, TBOX box);
192
  void peek_at_next_gap(TO_ROW *row, BLOBNBOX_IT box_it, TBOX &next_blob_box, int16_t &next_gap,
193
                        int16_t &next_within_xht_gap);
194
  void mark_gap(TBOX blob,    // blob following gap
195
                int16_t rule, // heuristic id
196
                int16_t prev_gap, int16_t prev_blob_width, int16_t current_gap,
197
                int16_t next_blob_width, int16_t next_gap);
198
  float find_mean_blob_spacing(WERD *word);
199
  bool ignore_big_gap(TO_ROW *row, int32_t row_length, GAPMAP *gapmap, int16_t left, int16_t right);
200
  // get bounding box
201
  TBOX reduced_box_next(TO_ROW *row,    // current row
202
                        BLOBNBOX_IT *it // iterator to blobds
203
  );
204
  TBOX reduced_box_for_blob(BLOBNBOX *blob, TO_ROW *row, int16_t *left_above_xht);
205
  // tordmain.cpp ///////////////////////////////////////////
206
  float filter_noise_blobs(BLOBNBOX_LIST *src_list, BLOBNBOX_LIST *noise_list,
207
                           BLOBNBOX_LIST *small_list, BLOBNBOX_LIST *large_list);
208
  // Fixes the block so it obeys all the rules:
209
  // Must have at least one ROW.
210
  // Must have at least one WERD.
211
  // WERDs contain a fake blob.
212
  void cleanup_nontext_block(BLOCK *block);
213
  void cleanup_blocks(bool clean_noise, BLOCK_LIST *blocks);
214
  bool clean_noise_from_row(ROW *row);
215
  void clean_noise_from_words(ROW *row);
216
  // Remove outlines that are a tiny fraction in either width or height
217
  // of the word height.
218
  void clean_small_noise_from_words(ROW *row);
219
  // Groups blocks by rotation, then, for each group, makes a WordGrid and calls
220
  // TransferDiacriticsToWords to copy the diacritic blobs to the most
221
  // appropriate words in the group of blocks. Source blobs are not touched.
222
  void TransferDiacriticsToBlockGroups(BLOBNBOX_LIST *diacritic_blobs, BLOCK_LIST *blocks);
223
  // Places a copy of blobs that are near a word (after applying rotation to the
224
  // blob) in the most appropriate word, unless there is doubt, in which case a
225
  // blob can end up in two words. Source blobs are not touched.
226
  void TransferDiacriticsToWords(BLOBNBOX_LIST *diacritic_blobs, const FCOORD &rotation,
227
                                 WordGrid *word_grid);
228
229
public:
230
  // makerow.cpp ///////////////////////////////////////////
231
  BOOL_VAR_H(textord_single_height_mode);
232
  // tospace.cpp ///////////////////////////////////////////
233
  BOOL_VAR_H(tosp_old_to_method);
234
  BOOL_VAR_H(tosp_old_to_constrain_sp_kn);
235
  BOOL_VAR_H(tosp_only_use_prop_rows);
236
  BOOL_VAR_H(tosp_force_wordbreak_on_punct);
237
  BOOL_VAR_H(tosp_use_pre_chopping);
238
  BOOL_VAR_H(tosp_old_to_bug_fix);
239
  BOOL_VAR_H(tosp_block_use_cert_spaces);
240
  BOOL_VAR_H(tosp_row_use_cert_spaces);
241
  BOOL_VAR_H(tosp_narrow_blobs_not_cert);
242
  BOOL_VAR_H(tosp_row_use_cert_spaces1);
243
  BOOL_VAR_H(tosp_recovery_isolated_row_stats);
244
  BOOL_VAR_H(tosp_only_small_gaps_for_kern);
245
  BOOL_VAR_H(tosp_all_flips_fuzzy);
246
  BOOL_VAR_H(tosp_fuzzy_limit_all);
247
  BOOL_VAR_H(tosp_stats_use_xht_gaps);
248
  BOOL_VAR_H(tosp_use_xht_gaps);
249
  BOOL_VAR_H(tosp_only_use_xht_gaps);
250
  BOOL_VAR_H(tosp_rule_9_test_punct);
251
  BOOL_VAR_H(tosp_flip_fuzz_kn_to_sp);
252
  BOOL_VAR_H(tosp_flip_fuzz_sp_to_kn);
253
  BOOL_VAR_H(tosp_improve_thresh);
254
  INT_VAR_H(tosp_debug_level);
255
  INT_VAR_H(tosp_enough_space_samples_for_median);
256
  INT_VAR_H(tosp_redo_kern_limit);
257
  INT_VAR_H(tosp_few_samples);
258
  INT_VAR_H(tosp_short_row);
259
  INT_VAR_H(tosp_sanity_method);
260
  double_VAR_H(tosp_old_sp_kn_th_factor);
261
  double_VAR_H(tosp_threshold_bias1);
262
  double_VAR_H(tosp_threshold_bias2);
263
  double_VAR_H(tosp_narrow_fraction);
264
  double_VAR_H(tosp_narrow_aspect_ratio);
265
  double_VAR_H(tosp_wide_fraction);
266
  double_VAR_H(tosp_wide_aspect_ratio);
267
  double_VAR_H(tosp_fuzzy_space_factor);
268
  double_VAR_H(tosp_fuzzy_space_factor1);
269
  double_VAR_H(tosp_fuzzy_space_factor2);
270
  double_VAR_H(tosp_gap_factor);
271
  double_VAR_H(tosp_kern_gap_factor1);
272
  double_VAR_H(tosp_kern_gap_factor2);
273
  double_VAR_H(tosp_kern_gap_factor3);
274
  double_VAR_H(tosp_ignore_big_gaps);
275
  double_VAR_H(tosp_ignore_very_big_gaps);
276
  double_VAR_H(tosp_rep_space);
277
  double_VAR_H(tosp_enough_small_gaps);
278
  double_VAR_H(tosp_table_kn_sp_ratio);
279
  double_VAR_H(tosp_table_xht_sp_ratio);
280
  double_VAR_H(tosp_table_fuzzy_kn_sp_ratio);
281
  double_VAR_H(tosp_fuzzy_kn_fraction);
282
  double_VAR_H(tosp_fuzzy_sp_fraction);
283
  double_VAR_H(tosp_min_sane_kn_sp);
284
  double_VAR_H(tosp_init_guess_kn_mult);
285
  double_VAR_H(tosp_init_guess_xht_mult);
286
  double_VAR_H(tosp_max_sane_kn_thresh);
287
  double_VAR_H(tosp_flip_caution);
288
  double_VAR_H(tosp_large_kerning);
289
  double_VAR_H(tosp_dont_fool_with_small_kerns);
290
  double_VAR_H(tosp_near_lh_edge);
291
  double_VAR_H(tosp_silly_kn_sp_gap);
292
  double_VAR_H(tosp_pass_wide_fuzz_sp_to_context);
293
  // tordmain.cpp ///////////////////////////////////////////
294
  BOOL_VAR_H(textord_no_rejects);
295
  BOOL_VAR_H(textord_show_blobs);
296
  BOOL_VAR_H(textord_show_boxes);
297
  INT_VAR_H(textord_max_noise_size);
298
  INT_VAR_H(textord_baseline_debug);
299
  double_VAR_H(textord_noise_area_ratio);
300
  double_VAR_H(textord_initialx_ile);
301
  double_VAR_H(textord_initialasc_ile);
302
  INT_VAR_H(textord_noise_sizefraction);
303
  double_VAR_H(textord_noise_sizelimit);
304
  INT_VAR_H(textord_noise_translimit);
305
  double_VAR_H(textord_noise_normratio);
306
  BOOL_VAR_H(textord_noise_rejwords);
307
  BOOL_VAR_H(textord_noise_rejrows);
308
  double_VAR_H(textord_noise_syfract);
309
  double_VAR_H(textord_noise_sxfract);
310
  double_VAR_H(textord_noise_hfract);
311
  INT_VAR_H(textord_noise_sncount);
312
  double_VAR_H(textord_noise_rowratio);
313
  BOOL_VAR_H(textord_noise_debug);
314
  double_VAR_H(textord_blshift_maxshift);
315
  double_VAR_H(textord_blshift_xfraction);
316
};
317
318
} // namespace tesseract
319
320
#endif // TESSERACT_TEXTORD_TEXTORD_H_