Coverage Report

Created: 2025-07-23 07:12

/src/tesseract/src/ccstruct/normalis.h
Line
Count
Source (jump to first uncovered line)
1
/**********************************************************************
2
 * File:        normalis.h  (Formerly denorm.h)
3
 * Description: Code for the DENORM class.
4
 * Author:      Ray Smith
5
 *
6
 * (C) Copyright 1992, Hewlett-Packard Ltd.
7
 ** Licensed under the Apache License, Version 2.0 (the "License");
8
 ** you may not use this file except in compliance with the License.
9
 ** You may obtain a copy of the License at
10
 ** http://www.apache.org/licenses/LICENSE-2.0
11
 ** Unless required by applicable law or agreed to in writing, software
12
 ** distributed under the License is distributed on an "AS IS" BASIS,
13
 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 ** See the License for the specific language governing permissions and
15
 ** limitations under the License.
16
 *
17
 **********************************************************************/
18
19
#ifndef NORMALIS_H
20
#define NORMALIS_H
21
22
#include "image.h"
23
24
#include <tesseract/export.h>
25
26
#include <vector>
27
28
struct Pix;
29
30
namespace tesseract {
31
32
const int kBlnCellHeight = 256;    // Full-height for baseline normalization.
33
const int kBlnXHeight = 128;       // x-height for baseline normalization.
34
const int kBlnBaselineOffset = 64; // offset for baseline normalization.
35
36
class BLOCK;
37
class FCOORD;
38
class TBOX;
39
class UNICHARSET;
40
41
struct TBLOB;
42
struct TPOINT;
43
44
// Possible normalization methods. Use NEGATIVE values as these also
45
// double up as markers for the last sub-classifier.
46
enum NormalizationMode {
47
  NM_BASELINE = -3,        // The original BL normalization mode.
48
  NM_CHAR_ISOTROPIC = -2,  // Character normalization but isotropic.
49
  NM_CHAR_ANISOTROPIC = -1 // The original CN normalization mode.
50
};
51
52
class TESS_API DENORM {
53
public:
54
  DENORM();
55
56
  // Copying a DENORM is allowed.
57
  DENORM(const DENORM &);
58
  DENORM &operator=(const DENORM &);
59
  ~DENORM();
60
61
  // Setup the normalization transformation parameters.
62
  // The normalizations applied to a blob are as follows:
63
  // 1. An optional block layout rotation that was applied during layout
64
  // analysis to make the textlines horizontal.
65
  // 2. A normalization transformation (LocalNormTransform):
66
  // Subtract the "origin"
67
  // Apply an x,y scaling.
68
  // Apply an optional rotation.
69
  // Add back a final translation.
70
  // The origin is in the block-rotated space, and is usually something like
71
  // the x-middle of the word at the baseline.
72
  // 3. Zero or more further normalization transformations that are applied
73
  // in sequence, with a similar pattern to the first normalization transform.
74
  //
75
  // A DENORM holds the parameters of a single normalization, and can execute
76
  // both the LocalNormTransform (a forwards normalization), and the
77
  // LocalDenormTransform which is an inverse transform or de-normalization.
78
  // A DENORM may point to a predecessor DENORM, which is actually the earlier
79
  // normalization, so the full normalization sequence involves executing all
80
  // predecessors first and then the transform in "this".
81
  // Let x be image coordinates and that we have normalization classes A, B, C
82
  // where we first apply A then B then C to get normalized x':
83
  // x' = CBAx
84
  // Then the backwards (to original coordinates) would be:
85
  // x = A^-1 B^-1 C^-1 x'
86
  // and A = B->predecessor_ and B = C->predecessor_
87
  // NormTransform executes all predecessors recursively, and then this.
88
  // NormTransform would be used to transform an image-based feature to
89
  // normalized space for use in a classifier
90
  // DenormTransform inverts this and then all predecessors. It can be
91
  // used to get back to the original image coordinates from normalized space.
92
  // The LocalNormTransform member executes just the transformation
93
  // in "this" without the layout rotation or any predecessors. It would be
94
  // used to run each successive normalization, eg the word normalization,
95
  // and later the character normalization.
96
97
  // Arguments:
98
  // block: if not nullptr, then this is the first transformation, and
99
  //        block->re_rotation() needs to be used after the Denorm
100
  //        transformation to get back to the image coords.
101
  // rotation: if not nullptr, apply this rotation after translation to the
102
  //           origin and scaling. (Usually a classify rotation.)
103
  // predecessor: if not nullptr, then predecessor has been applied to the
104
  //              input space and needs to be undone to complete the inverse.
105
  // The above pointers are not owned by this DENORM and are assumed to live
106
  // longer than this denorm, except rotation, which is deep copied on input.
107
  //
108
  // x_origin: The x origin which will be mapped to final_xshift in the result.
109
  // y_origin: The y origin which will be mapped to final_yshift in the result.
110
  //           Added to result of row->baseline(x) if not nullptr.
111
  //
112
  // x_scale: scale factor for the x-coordinate.
113
  // y_scale: scale factor for the y-coordinate. Ignored if segs is given.
114
  // Note that these scale factors apply to the same x and y system as the
115
  // x-origin and y-origin apply, ie after any block rotation, but before
116
  // the rotation argument is applied.
117
  //
118
  // final_xshift: The x component of the final translation.
119
  // final_yshift: The y component of the final translation.
120
  //
121
  // In theory, any of the commonly used normalizations can be setup here:
122
  // * Traditional baseline normalization on a word:
123
  // SetupNormalization(block, nullptr, nullptr,
124
  //                    box.x_middle(), baseline,
125
  //                    kBlnXHeight / x_height, kBlnXHeight / x_height,
126
  //                    0, kBlnBaselineOffset);
127
  // * "Numeric mode" baseline normalization on a word, in which the blobs
128
  //   are positioned with the bottom as the baseline is achieved by making
129
  //   a separate DENORM for each blob.
130
  // SetupNormalization(block, nullptr, nullptr,
131
  //                    box.x_middle(), box.bottom(),
132
  //                    kBlnXHeight / x_height, kBlnXHeight / x_height,
133
  //                    0, kBlnBaselineOffset);
134
  // * Anisotropic character normalization used by IntFx.
135
  // SetupNormalization(nullptr, nullptr, denorm,
136
  //                    centroid_x, centroid_y,
137
  //                    51.2 / ry, 51.2 / rx, 128, 128);
138
  // * Normalize blob height to x-height (current OSD):
139
  // SetupNormalization(nullptr, &rotation, nullptr,
140
  //                    box.rotational_x_middle(rotation),
141
  //                    box.rotational_y_middle(rotation),
142
  //                    kBlnXHeight / box.rotational_height(rotation),
143
  //                    kBlnXHeight / box.rotational_height(rotation),
144
  //                    0, kBlnBaselineOffset);
145
  // * Secondary normalization for classification rotation (current):
146
  // FCOORD rotation = block->classify_rotation();
147
  // float target_height = kBlnXHeight / CCStruct::kXHeightCapRatio;
148
  // SetupNormalization(nullptr, &rotation, denorm,
149
  //                    box.rotational_x_middle(rotation),
150
  //                    box.rotational_y_middle(rotation),
151
  //                    target_height / box.rotational_height(rotation),
152
  //                    target_height / box.rotational_height(rotation),
153
  //                    0, kBlnBaselineOffset);
154
  // * Proposed new normalizations for CJK: Between them there is then
155
  // no need for further normalization at all, and the character fills the cell.
156
  // ** Replacement for baseline normalization on a word:
157
  // Scales height and width independently so that modal height and pitch
158
  // fill the cell respectively.
159
  // float cap_height = x_height / CCStruct::kXHeightCapRatio;
160
  // SetupNormalization(block, nullptr, nullptr,
161
  //                    box.x_middle(), cap_height / 2.0f,
162
  //                    kBlnCellHeight / fixed_pitch,
163
  //                    kBlnCellHeight / cap_height,
164
  //                    0, 0);
165
  // ** Secondary normalization for classification (with rotation) (proposed):
166
  // Requires a simple translation to the center of the appropriate character
167
  // cell, no further scaling and a simple rotation (or nothing) about the
168
  // cell center.
169
  // FCOORD rotation = block->classify_rotation();
170
  // SetupNormalization(nullptr, &rotation, denorm,
171
  //                    fixed_pitch_cell_center,
172
  //                    0.0f,
173
  //                    1.0f,
174
  //                    1.0f,
175
  //                    0, 0);
176
  void SetupNormalization(const BLOCK *block, const FCOORD *rotation, const DENORM *predecessor,
177
                          float x_origin, float y_origin, float x_scale, float y_scale,
178
                          float final_xshift, float final_yshift);
179
180
  // Sets up the DENORM to execute a non-linear transformation based on
181
  // preserving an even distribution of stroke edges. The transformation
182
  // operates only within the given box, scaling input coords within the box
183
  // non-linearly to a box of target_width by target_height, with all other
184
  // coords being clipped to the box edge. As with SetupNormalization above,
185
  // final_xshift and final_yshift are applied after scaling, and the bottom-
186
  // left of box is used as a pre-scaling origin.
187
  // x_coords is a collection of the x-coords of vertical edges for each
188
  // y-coord starting at box.bottom().
189
  // y_coords is a collection of the y-coords of horizontal edges for each
190
  // x-coord starting at box.left().
191
  // Eg x_coords[0] is a collection of the x-coords of edges at y=bottom.
192
  // Eg x_coords[1] is a collection of the x-coords of edges at y=bottom + 1.
193
  // The second-level vectors must all be sorted in ascending order.
194
  void SetupNonLinear(const DENORM *predecessor, const TBOX &box, float target_width,
195
                      float target_height, float final_xshift, float final_yshift,
196
                      const std::vector<std::vector<int>> &x_coords,
197
                      const std::vector<std::vector<int>> &y_coords);
198
199
  // Transforms the given coords one step forward to normalized space, without
200
  // using any block rotation or predecessor.
201
  void LocalNormTransform(const TPOINT &pt, TPOINT *transformed) const;
202
  void LocalNormTransform(const FCOORD &pt, FCOORD *transformed) const;
203
  // Transforms the given coords forward to normalized space using the
204
  // full transformation sequence defined by the block rotation, the
205
  // predecessors, deepest first, and finally this. If first_norm is not
206
  // nullptr, then the first and deepest transformation used is first_norm,
207
  // ending with this, and the block rotation will not be applied.
208
  void NormTransform(const DENORM *first_norm, const TPOINT &pt, TPOINT *transformed) const;
209
  void NormTransform(const DENORM *first_norm, const FCOORD &pt, FCOORD *transformed) const;
210
  // Transforms the given coords one step back to source space, without
211
  // using to any block rotation or predecessor.
212
  void LocalDenormTransform(const TPOINT &pt, TPOINT *original) const;
213
  void LocalDenormTransform(const FCOORD &pt, FCOORD *original) const;
214
  // Transforms the given coords all the way back to source image space using
215
  // the full transformation sequence defined by this and its predecessors
216
  // recursively, shallowest first, and finally any block re_rotation.
217
  // If last_denorm is not nullptr, then the last transformation used will
218
  // be last_denorm, and the block re_rotation will never be executed.
219
  void DenormTransform(const DENORM *last_denorm, const TPOINT &pt, TPOINT *original) const;
220
  void DenormTransform(const DENORM *last_denorm, const FCOORD &pt, FCOORD *original) const;
221
222
  // Normalize a blob using blob transformations. Less accurate, but
223
  // more accurately copies the old way.
224
  void LocalNormBlob(TBLOB *blob) const;
225
226
  // Fills in the x-height range accepted by the given unichar_id in blob
227
  // coordinates, given its bounding box in the usual baseline-normalized
228
  // coordinates, with some initial crude x-height estimate (such as word
229
  // size) and this denoting the transformation that was used.
230
  // Also returns the amount the character must have shifted up or down.
231
  void XHeightRange(int unichar_id, const UNICHARSET &unicharset, const TBOX &bbox, float *min_xht,
232
                    float *max_xht, float *yshift) const;
233
234
  // Prints the content of the DENORM for debug purposes.
235
  void Print() const;
236
237
1.93M
  Image pix() const {
238
1.93M
    return pix_;
239
1.93M
  }
240
2.40M
  void set_pix(Image pix) {
241
2.40M
    pix_ = pix;
242
2.40M
  }
243
0
  bool inverse() const {
244
0
    return inverse_;
245
0
  }
246
2.40M
  void set_inverse(bool value) {
247
2.40M
    inverse_ = value;
248
2.40M
  }
249
0
  const DENORM *RootDenorm() const {
250
0
    if (predecessor_ != nullptr) {
251
0
      return predecessor_->RootDenorm();
252
0
    }
253
0
    return this;
254
0
  }
255
0
  const DENORM *predecessor() const {
256
0
    return predecessor_;
257
0
  }
258
  // Accessors - perhaps should not be needed.
259
465k
  float x_scale() const {
260
465k
    return x_scale_;
261
465k
  }
262
9.13M
  float y_scale() const {
263
9.13M
    return y_scale_;
264
9.13M
  }
265
4.01M
  const BLOCK *block() const {
266
4.01M
    return block_;
267
4.01M
  }
268
0
  void set_block(const BLOCK *block) {
269
0
    block_ = block;
270
0
  }
271
272
private:
273
  // Free allocated memory and clear pointers.
274
  void Clear();
275
  // Setup default values.
276
  void Init();
277
278
  // Best available image.
279
  Image pix_;
280
  // True if the source image is white-on-black.
281
  bool inverse_;
282
  // Block the word came from. If not null, block->re_rotation() takes the
283
  // "untransformed" coordinates even further back to the original image.
284
  // Used only on the first DENORM in a chain.
285
  const BLOCK *block_;
286
  // Rotation to apply between translation to the origin and scaling.
287
  const FCOORD *rotation_;
288
  // Previous transformation in a chain.
289
  const DENORM *predecessor_;
290
  // Non-linear transformation maps directly from each integer offset from the
291
  // origin to the corresponding x-coord. Owned by the DENORM.
292
  std::vector<float> *x_map_;
293
  // Non-linear transformation maps directly from each integer offset from the
294
  // origin to the corresponding y-coord. Owned by the DENORM.
295
  std::vector<float> *y_map_;
296
  // x-coordinate to be mapped to final_xshift_ in the result.
297
  float x_origin_;
298
  // y-coordinate to be mapped to final_yshift_ in the result.
299
  float y_origin_;
300
  // Scale factors for x and y coords. Applied to pre-rotation system.
301
  float x_scale_;
302
  float y_scale_;
303
  // Destination coords of the x_origin_ and y_origin_.
304
  float final_xshift_;
305
  float final_yshift_;
306
};
307
308
} // namespace tesseract
309
310
#endif