Coverage Report

Created: 2025-11-16 06:50

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/tesseract/src/textord/makerow.h
Line
Count
Source
1
/**********************************************************************
2
 * File:        makerow.h  (Formerly makerows.h)
3
 * Description: Code to arrange blobs into rows of text.
4
 * Author:      Ray Smith
5
 *
6
 * (C) Copyright 1992, Hewlett-Packard Ltd.
7
 ** Licensed under the Apache License, Version 2.0 (the "License");
8
 ** you may not use this file except in compliance with the License.
9
 ** You may obtain a copy of the License at
10
 ** http://www.apache.org/licenses/LICENSE-2.0
11
 ** Unless required by applicable law or agreed to in writing, software
12
 ** distributed under the License is distributed on an "AS IS" BASIS,
13
 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 ** See the License for the specific language governing permissions and
15
 ** limitations under the License.
16
 *
17
 **********************************************************************/
18
19
#ifndef MAKEROW_H
20
#define MAKEROW_H
21
22
#include "blobbox.h"
23
#include "blobs.h"
24
#include "ocrblock.h"
25
#include "params.h"
26
#include "statistc.h"
27
28
namespace tesseract {
29
30
enum OVERLAP_STATE {
31
  ASSIGN, // assign it to row
32
  REJECT, // reject it - dual overlap
33
  NEW_ROW
34
};
35
36
enum ROW_CATEGORY {
37
  ROW_ASCENDERS_FOUND,
38
  ROW_DESCENDERS_FOUND,
39
  ROW_UNKNOWN,
40
  ROW_INVALID,
41
};
42
43
extern BOOL_VAR_H(textord_heavy_nr);
44
extern BOOL_VAR_H(textord_show_initial_rows);
45
extern BOOL_VAR_H(textord_show_parallel_rows);
46
extern BOOL_VAR_H(textord_show_expanded_rows);
47
extern BOOL_VAR_H(textord_show_final_rows);
48
extern BOOL_VAR_H(textord_show_final_blobs);
49
extern BOOL_VAR_H(textord_test_landscape);
50
extern BOOL_VAR_H(textord_parallel_baselines);
51
extern BOOL_VAR_H(textord_straight_baselines);
52
extern BOOL_VAR_H(textord_old_baselines);
53
extern BOOL_VAR_H(textord_old_xheight);
54
extern BOOL_VAR_H(textord_fix_xheight_bug);
55
extern BOOL_VAR_H(textord_fix_makerow_bug);
56
extern BOOL_VAR_H(textord_debug_xheights);
57
extern INT_VAR_H(textord_test_x);
58
extern INT_VAR_H(textord_test_y);
59
extern INT_VAR_H(textord_min_blobs_in_row);
60
extern INT_VAR_H(textord_spline_minblobs);
61
extern INT_VAR_H(textord_spline_medianwin);
62
extern INT_VAR_H(textord_min_xheight);
63
extern double_VAR_H(textord_spline_shift_fraction);
64
extern double_VAR_H(textord_skew_ile);
65
extern double_VAR_H(textord_skew_lag);
66
extern double_VAR_H(textord_linespace_iqrlimit);
67
extern double_VAR_H(textord_width_limit);
68
extern double_VAR_H(textord_chop_width);
69
extern double_VAR_H(textord_minxh);
70
extern double_VAR_H(textord_min_linesize);
71
extern double_VAR_H(textord_excess_blobsize);
72
extern double_VAR_H(textord_occupancy_threshold);
73
extern double_VAR_H(textord_underline_width);
74
extern double_VAR_H(textord_min_blob_height_fraction);
75
extern double_VAR_H(textord_xheight_mode_fraction);
76
extern double_VAR_H(textord_ascheight_mode_fraction);
77
extern double_VAR_H(textord_ascx_ratio_min);
78
extern double_VAR_H(textord_ascx_ratio_max);
79
extern double_VAR_H(textord_descx_ratio_min);
80
extern double_VAR_H(textord_descx_ratio_max);
81
extern double_VAR_H(textord_xheight_error_margin);
82
extern INT_VAR_H(textord_lms_line_trials);
83
extern BOOL_VAR_H(textord_new_initial_xheight);
84
extern BOOL_VAR_H(textord_debug_blob);
85
86
330k
inline void get_min_max_xheight(int block_linesize, int *min_height, int *max_height) {
87
330k
  *min_height = static_cast<int32_t>(floor(block_linesize * textord_minxh));
88
330k
  if (*min_height < textord_min_xheight) {
89
322k
    *min_height = textord_min_xheight;
90
322k
  }
91
330k
  *max_height = static_cast<int32_t>(ceil(block_linesize * 3.0));
92
330k
}
93
94
713k
inline ROW_CATEGORY get_row_category(const TO_ROW *row) {
95
713k
  if (row->xheight <= 0) {
96
205k
    return ROW_INVALID;
97
205k
  }
98
508k
  return (row->ascrise > 0) ? ROW_ASCENDERS_FOUND
99
508k
                            : (row->descdrop != 0) ? ROW_DESCENDERS_FOUND : ROW_UNKNOWN;
100
713k
}
101
102
713k
inline bool within_error_margin(float test, float num, float margin) {
103
713k
  return (test >= num * (1 - margin) && test <= num * (1 + margin));
104
713k
}
105
106
void fill_heights(TO_ROW *row, float gradient, int min_height, int max_height, STATS *heights,
107
                  STATS *floating_heights);
108
109
float make_single_row(ICOORD page_tr, bool allow_sub_blobs, TO_BLOCK *block, TO_BLOCK_LIST *blocks);
110
float make_rows(ICOORD page_tr, // top right
111
                TO_BLOCK_LIST *port_blocks);
112
void make_initial_textrows(ICOORD page_tr,
113
                           TO_BLOCK *block,  // block to do
114
                           FCOORD rotation,  // for drawing
115
                           bool testing_on); // correct orientation
116
void fit_lms_line(TO_ROW *row);
117
void compute_page_skew(TO_BLOCK_LIST *blocks, // list of blocks
118
                       float &page_m,         // average gradient
119
                       float &page_err);      // average error
120
void vigorous_noise_removal(TO_BLOCK *block);
121
void cleanup_rows_making(ICOORD page_tr,     // top right
122
                         TO_BLOCK *block,    // block to do
123
                         float gradient,     // gradient to fit
124
                         FCOORD rotation,    // for drawing
125
                         int32_t block_edge, // edge of block
126
                         bool testing_on);   // correct orientation
127
void delete_non_dropout_rows(                // find lines
128
    TO_BLOCK *block,                         // block to do
129
    float gradient,                          // global skew
130
    FCOORD rotation,                         // deskew vector
131
    int32_t block_edge,                      // left edge
132
    bool testing_on                          // correct orientation
133
);
134
bool find_best_dropout_row( // find neighbours
135
    TO_ROW *row,            // row to test
136
    int32_t distance,       // dropout dist
137
    float dist_limit,       // threshold distance
138
    int32_t line_index,     // index of row
139
    TO_ROW_IT *row_it,      // current position
140
    bool testing_on         // correct orientation
141
);
142
TBOX deskew_block_coords( // block box
143
    TO_BLOCK *block,      // block to do
144
    float gradient        // global skew
145
);
146
void compute_line_occupation( // project blobs
147
    TO_BLOCK *block,          // block to do
148
    float gradient,           // global skew
149
    int32_t min_y,            // min coord in block
150
    int32_t max_y,            // in block
151
    int32_t *occupation,      // output projection
152
    int32_t *deltas           // derivative
153
);
154
void compute_occupation_threshold( // project blobs
155
    int32_t low_window,            // below result point
156
    int32_t high_window,           // above result point
157
    int32_t line_count,            // array sizes
158
    int32_t *occupation,           // input projection
159
    int32_t *thresholds            // output thresholds
160
);
161
void compute_dropout_distances( // project blobs
162
    int32_t *occupation,        // input projection
163
    int32_t *thresholds,        // output thresholds
164
    int32_t line_count          // array sizes
165
);
166
void expand_rows(       // find lines
167
    ICOORD page_tr,     // top right
168
    TO_BLOCK *block,    // block to do
169
    float gradient,     // gradient to fit
170
    FCOORD rotation,    // for drawing
171
    int32_t block_edge, // edge of block
172
    bool testing_on     // correct orientation
173
);
174
void adjust_row_limits( // tidy limits
175
    TO_BLOCK *block     // block to do
176
);
177
void compute_row_stats( // find lines
178
    TO_BLOCK *block,    // block to do
179
    bool testing_on     // correct orientation
180
);
181
float median_block_xheight( // find lines
182
    TO_BLOCK *block,        // block to do
183
    float gradient          // global skew
184
);
185
186
int compute_xheight_from_modes(STATS *heights, STATS *floating_heights, bool cap_only,
187
                               int min_height, int max_height, float *xheight, float *ascrise);
188
189
int32_t compute_row_descdrop(TO_ROW *row,    // row to do
190
                             float gradient, // global skew
191
                             int xheight_blob_count, STATS *heights);
192
int32_t compute_height_modes(STATS *heights,     // stats to search
193
                             int32_t min_height, // bottom of range
194
                             int32_t max_height, // top of range
195
                             int32_t *modes,     // output array
196
                             int32_t maxmodes);  // size of modes
197
void correct_row_xheight(TO_ROW *row,            // row to fix
198
                         float xheight,          // average values
199
                         float ascrise, float descdrop);
200
void separate_underlines(TO_BLOCK *block,   // block to do
201
                         float gradient,    // skew angle
202
                         FCOORD rotation,   // inverse landscape
203
                         bool testing_on);  // correct orientation
204
void pre_associate_blobs(ICOORD page_tr,    // top right
205
                         TO_BLOCK *block,   // block to do
206
                         FCOORD rotation,   // inverse landscape
207
                         bool testing_on);  // correct orientation
208
void fit_parallel_rows(TO_BLOCK *block,     // block to do
209
                       float gradient,      // gradient to fit
210
                       FCOORD rotation,     // for drawing
211
                       int32_t block_edge,  // edge of block
212
                       bool testing_on);    // correct orientation
213
void fit_parallel_lms(float gradient,       // forced gradient
214
                      TO_ROW *row);         // row to fit
215
void make_baseline_spline(TO_ROW *row,      // row to fit
216
                          TO_BLOCK *block); // block it came from
217
bool segment_baseline(                      // split baseline
218
    TO_ROW *row,                            // row to fit
219
    TO_BLOCK *block,                        // block it came from
220
    int32_t &segments,                      // no fo segments
221
    int32_t *xstarts                        // coords of segments
222
);
223
double *linear_spline_baseline( // split baseline
224
    TO_ROW *row,                // row to fit
225
    TO_BLOCK *block,            // block it came from
226
    int32_t &segments,          // no fo segments
227
    int32_t xstarts[]           // coords of segments
228
);
229
void assign_blobs_to_rows( // find lines
230
    TO_BLOCK *block,       // block to do
231
    float *gradient,       // block skew
232
    int pass,              // identification
233
    bool reject_misses,    // chuck big ones out
234
    bool make_new_rows,    // add rows for unmatched
235
    bool drawing_skew      // draw smoothed skew
236
);
237
// find best row
238
OVERLAP_STATE most_overlapping_row(TO_ROW_IT *row_it, // iterator
239
                                   TO_ROW *&best_row, // output row
240
                                   float top,         // top of blob
241
                                   float bottom,      // bottom of blob
242
                                   float rowsize,     // max row size
243
                                   bool testing_blob  // test stuff
244
);
245
int blob_x_order(      // sort function
246
    const BLOBNBOX *item1, // items to compare
247
    const BLOBNBOX *item2);
248
249
void mark_repeated_chars(TO_ROW *row);
250
251
} // namespace tesseract
252
253
#endif