/src/tesseract/src/ccstruct/normalis.h
Line | Count | Source (jump to first uncovered line) |
1 | | /********************************************************************** |
2 | | * File: normalis.h (Formerly denorm.h) |
3 | | * Description: Code for the DENORM class. |
4 | | * Author: Ray Smith |
5 | | * |
6 | | * (C) Copyright 1992, Hewlett-Packard Ltd. |
7 | | ** Licensed under the Apache License, Version 2.0 (the "License"); |
8 | | ** you may not use this file except in compliance with the License. |
9 | | ** You may obtain a copy of the License at |
10 | | ** http://www.apache.org/licenses/LICENSE-2.0 |
11 | | ** Unless required by applicable law or agreed to in writing, software |
12 | | ** distributed under the License is distributed on an "AS IS" BASIS, |
13 | | ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | ** See the License for the specific language governing permissions and |
15 | | ** limitations under the License. |
16 | | * |
17 | | **********************************************************************/ |
18 | | |
19 | | #ifndef NORMALIS_H |
20 | | #define NORMALIS_H |
21 | | |
22 | | #include "image.h" |
23 | | |
24 | | #include <tesseract/export.h> |
25 | | |
26 | | #include <vector> |
27 | | |
28 | | struct Pix; |
29 | | |
30 | | namespace tesseract { |
31 | | |
32 | | const int kBlnCellHeight = 256; // Full-height for baseline normalization. |
33 | | const int kBlnXHeight = 128; // x-height for baseline normalization. |
34 | | const int kBlnBaselineOffset = 64; // offset for baseline normalization. |
35 | | |
36 | | class BLOCK; |
37 | | class FCOORD; |
38 | | class TBOX; |
39 | | class UNICHARSET; |
40 | | |
41 | | struct TBLOB; |
42 | | struct TPOINT; |
43 | | |
44 | | // Possible normalization methods. Use NEGATIVE values as these also |
45 | | // double up as markers for the last sub-classifier. |
46 | | enum NormalizationMode { |
47 | | NM_BASELINE = -3, // The original BL normalization mode. |
48 | | NM_CHAR_ISOTROPIC = -2, // Character normalization but isotropic. |
49 | | NM_CHAR_ANISOTROPIC = -1 // The original CN normalization mode. |
50 | | }; |
51 | | |
52 | | class TESS_API DENORM { |
53 | | public: |
54 | | DENORM(); |
55 | | |
56 | | // Copying a DENORM is allowed. |
57 | | DENORM(const DENORM &); |
58 | | DENORM &operator=(const DENORM &); |
59 | | ~DENORM(); |
60 | | |
61 | | // Setup the normalization transformation parameters. |
62 | | // The normalizations applied to a blob are as follows: |
63 | | // 1. An optional block layout rotation that was applied during layout |
64 | | // analysis to make the textlines horizontal. |
65 | | // 2. A normalization transformation (LocalNormTransform): |
66 | | // Subtract the "origin" |
67 | | // Apply an x,y scaling. |
68 | | // Apply an optional rotation. |
69 | | // Add back a final translation. |
70 | | // The origin is in the block-rotated space, and is usually something like |
71 | | // the x-middle of the word at the baseline. |
72 | | // 3. Zero or more further normalization transformations that are applied |
73 | | // in sequence, with a similar pattern to the first normalization transform. |
74 | | // |
75 | | // A DENORM holds the parameters of a single normalization, and can execute |
76 | | // both the LocalNormTransform (a forwards normalization), and the |
77 | | // LocalDenormTransform which is an inverse transform or de-normalization. |
78 | | // A DENORM may point to a predecessor DENORM, which is actually the earlier |
79 | | // normalization, so the full normalization sequence involves executing all |
80 | | // predecessors first and then the transform in "this". |
81 | | // Let x be image coordinates and that we have normalization classes A, B, C |
82 | | // where we first apply A then B then C to get normalized x': |
83 | | // x' = CBAx |
84 | | // Then the backwards (to original coordinates) would be: |
85 | | // x = A^-1 B^-1 C^-1 x' |
86 | | // and A = B->predecessor_ and B = C->predecessor_ |
87 | | // NormTransform executes all predecessors recursively, and then this. |
88 | | // NormTransform would be used to transform an image-based feature to |
89 | | // normalized space for use in a classifier |
90 | | // DenormTransform inverts this and then all predecessors. It can be |
91 | | // used to get back to the original image coordinates from normalized space. |
92 | | // The LocalNormTransform member executes just the transformation |
93 | | // in "this" without the layout rotation or any predecessors. It would be |
94 | | // used to run each successive normalization, eg the word normalization, |
95 | | // and later the character normalization. |
96 | | |
97 | | // Arguments: |
98 | | // block: if not nullptr, then this is the first transformation, and |
99 | | // block->re_rotation() needs to be used after the Denorm |
100 | | // transformation to get back to the image coords. |
101 | | // rotation: if not nullptr, apply this rotation after translation to the |
102 | | // origin and scaling. (Usually a classify rotation.) |
103 | | // predecessor: if not nullptr, then predecessor has been applied to the |
104 | | // input space and needs to be undone to complete the inverse. |
105 | | // The above pointers are not owned by this DENORM and are assumed to live |
106 | | // longer than this denorm, except rotation, which is deep copied on input. |
107 | | // |
108 | | // x_origin: The x origin which will be mapped to final_xshift in the result. |
109 | | // y_origin: The y origin which will be mapped to final_yshift in the result. |
110 | | // Added to result of row->baseline(x) if not nullptr. |
111 | | // |
112 | | // x_scale: scale factor for the x-coordinate. |
113 | | // y_scale: scale factor for the y-coordinate. Ignored if segs is given. |
114 | | // Note that these scale factors apply to the same x and y system as the |
115 | | // x-origin and y-origin apply, ie after any block rotation, but before |
116 | | // the rotation argument is applied. |
117 | | // |
118 | | // final_xshift: The x component of the final translation. |
119 | | // final_yshift: The y component of the final translation. |
120 | | // |
121 | | // In theory, any of the commonly used normalizations can be setup here: |
122 | | // * Traditional baseline normalization on a word: |
123 | | // SetupNormalization(block, nullptr, nullptr, |
124 | | // box.x_middle(), baseline, |
125 | | // kBlnXHeight / x_height, kBlnXHeight / x_height, |
126 | | // 0, kBlnBaselineOffset); |
127 | | // * "Numeric mode" baseline normalization on a word, in which the blobs |
128 | | // are positioned with the bottom as the baseline is achieved by making |
129 | | // a separate DENORM for each blob. |
130 | | // SetupNormalization(block, nullptr, nullptr, |
131 | | // box.x_middle(), box.bottom(), |
132 | | // kBlnXHeight / x_height, kBlnXHeight / x_height, |
133 | | // 0, kBlnBaselineOffset); |
134 | | // * Anisotropic character normalization used by IntFx. |
135 | | // SetupNormalization(nullptr, nullptr, denorm, |
136 | | // centroid_x, centroid_y, |
137 | | // 51.2 / ry, 51.2 / rx, 128, 128); |
138 | | // * Normalize blob height to x-height (current OSD): |
139 | | // SetupNormalization(nullptr, &rotation, nullptr, |
140 | | // box.rotational_x_middle(rotation), |
141 | | // box.rotational_y_middle(rotation), |
142 | | // kBlnXHeight / box.rotational_height(rotation), |
143 | | // kBlnXHeight / box.rotational_height(rotation), |
144 | | // 0, kBlnBaselineOffset); |
145 | | // * Secondary normalization for classification rotation (current): |
146 | | // FCOORD rotation = block->classify_rotation(); |
147 | | // float target_height = kBlnXHeight / CCStruct::kXHeightCapRatio; |
148 | | // SetupNormalization(nullptr, &rotation, denorm, |
149 | | // box.rotational_x_middle(rotation), |
150 | | // box.rotational_y_middle(rotation), |
151 | | // target_height / box.rotational_height(rotation), |
152 | | // target_height / box.rotational_height(rotation), |
153 | | // 0, kBlnBaselineOffset); |
154 | | // * Proposed new normalizations for CJK: Between them there is then |
155 | | // no need for further normalization at all, and the character fills the cell. |
156 | | // ** Replacement for baseline normalization on a word: |
157 | | // Scales height and width independently so that modal height and pitch |
158 | | // fill the cell respectively. |
159 | | // float cap_height = x_height / CCStruct::kXHeightCapRatio; |
160 | | // SetupNormalization(block, nullptr, nullptr, |
161 | | // box.x_middle(), cap_height / 2.0f, |
162 | | // kBlnCellHeight / fixed_pitch, |
163 | | // kBlnCellHeight / cap_height, |
164 | | // 0, 0); |
165 | | // ** Secondary normalization for classification (with rotation) (proposed): |
166 | | // Requires a simple translation to the center of the appropriate character |
167 | | // cell, no further scaling and a simple rotation (or nothing) about the |
168 | | // cell center. |
169 | | // FCOORD rotation = block->classify_rotation(); |
170 | | // SetupNormalization(nullptr, &rotation, denorm, |
171 | | // fixed_pitch_cell_center, |
172 | | // 0.0f, |
173 | | // 1.0f, |
174 | | // 1.0f, |
175 | | // 0, 0); |
176 | | void SetupNormalization(const BLOCK *block, const FCOORD *rotation, const DENORM *predecessor, |
177 | | float x_origin, float y_origin, float x_scale, float y_scale, |
178 | | float final_xshift, float final_yshift); |
179 | | |
180 | | // Sets up the DENORM to execute a non-linear transformation based on |
181 | | // preserving an even distribution of stroke edges. The transformation |
182 | | // operates only within the given box, scaling input coords within the box |
183 | | // non-linearly to a box of target_width by target_height, with all other |
184 | | // coords being clipped to the box edge. As with SetupNormalization above, |
185 | | // final_xshift and final_yshift are applied after scaling, and the bottom- |
186 | | // left of box is used as a pre-scaling origin. |
187 | | // x_coords is a collection of the x-coords of vertical edges for each |
188 | | // y-coord starting at box.bottom(). |
189 | | // y_coords is a collection of the y-coords of horizontal edges for each |
190 | | // x-coord starting at box.left(). |
191 | | // Eg x_coords[0] is a collection of the x-coords of edges at y=bottom. |
192 | | // Eg x_coords[1] is a collection of the x-coords of edges at y=bottom + 1. |
193 | | // The second-level vectors must all be sorted in ascending order. |
194 | | void SetupNonLinear(const DENORM *predecessor, const TBOX &box, float target_width, |
195 | | float target_height, float final_xshift, float final_yshift, |
196 | | const std::vector<std::vector<int>> &x_coords, |
197 | | const std::vector<std::vector<int>> &y_coords); |
198 | | |
199 | | // Transforms the given coords one step forward to normalized space, without |
200 | | // using any block rotation or predecessor. |
201 | | void LocalNormTransform(const TPOINT &pt, TPOINT *transformed) const; |
202 | | void LocalNormTransform(const FCOORD &pt, FCOORD *transformed) const; |
203 | | // Transforms the given coords forward to normalized space using the |
204 | | // full transformation sequence defined by the block rotation, the |
205 | | // predecessors, deepest first, and finally this. If first_norm is not |
206 | | // nullptr, then the first and deepest transformation used is first_norm, |
207 | | // ending with this, and the block rotation will not be applied. |
208 | | void NormTransform(const DENORM *first_norm, const TPOINT &pt, TPOINT *transformed) const; |
209 | | void NormTransform(const DENORM *first_norm, const FCOORD &pt, FCOORD *transformed) const; |
210 | | // Transforms the given coords one step back to source space, without |
211 | | // using to any block rotation or predecessor. |
212 | | void LocalDenormTransform(const TPOINT &pt, TPOINT *original) const; |
213 | | void LocalDenormTransform(const FCOORD &pt, FCOORD *original) const; |
214 | | // Transforms the given coords all the way back to source image space using |
215 | | // the full transformation sequence defined by this and its predecessors |
216 | | // recursively, shallowest first, and finally any block re_rotation. |
217 | | // If last_denorm is not nullptr, then the last transformation used will |
218 | | // be last_denorm, and the block re_rotation will never be executed. |
219 | | void DenormTransform(const DENORM *last_denorm, const TPOINT &pt, TPOINT *original) const; |
220 | | void DenormTransform(const DENORM *last_denorm, const FCOORD &pt, FCOORD *original) const; |
221 | | |
222 | | // Normalize a blob using blob transformations. Less accurate, but |
223 | | // more accurately copies the old way. |
224 | | void LocalNormBlob(TBLOB *blob) const; |
225 | | |
226 | | // Fills in the x-height range accepted by the given unichar_id in blob |
227 | | // coordinates, given its bounding box in the usual baseline-normalized |
228 | | // coordinates, with some initial crude x-height estimate (such as word |
229 | | // size) and this denoting the transformation that was used. |
230 | | // Also returns the amount the character must have shifted up or down. |
231 | | void XHeightRange(int unichar_id, const UNICHARSET &unicharset, const TBOX &bbox, float *min_xht, |
232 | | float *max_xht, float *yshift) const; |
233 | | |
234 | | // Prints the content of the DENORM for debug purposes. |
235 | | void Print() const; |
236 | | |
237 | 1.93M | Image pix() const { |
238 | 1.93M | return pix_; |
239 | 1.93M | } |
240 | 2.40M | void set_pix(Image pix) { |
241 | 2.40M | pix_ = pix; |
242 | 2.40M | } |
243 | 0 | bool inverse() const { |
244 | 0 | return inverse_; |
245 | 0 | } |
246 | 2.40M | void set_inverse(bool value) { |
247 | 2.40M | inverse_ = value; |
248 | 2.40M | } |
249 | 0 | const DENORM *RootDenorm() const { |
250 | 0 | if (predecessor_ != nullptr) { |
251 | 0 | return predecessor_->RootDenorm(); |
252 | 0 | } |
253 | 0 | return this; |
254 | 0 | } |
255 | 0 | const DENORM *predecessor() const { |
256 | 0 | return predecessor_; |
257 | 0 | } |
258 | | // Accessors - perhaps should not be needed. |
259 | 465k | float x_scale() const { |
260 | 465k | return x_scale_; |
261 | 465k | } |
262 | 9.13M | float y_scale() const { |
263 | 9.13M | return y_scale_; |
264 | 9.13M | } |
265 | 4.01M | const BLOCK *block() const { |
266 | 4.01M | return block_; |
267 | 4.01M | } |
268 | 0 | void set_block(const BLOCK *block) { |
269 | 0 | block_ = block; |
270 | 0 | } |
271 | | |
272 | | private: |
273 | | // Free allocated memory and clear pointers. |
274 | | void Clear(); |
275 | | // Setup default values. |
276 | | void Init(); |
277 | | |
278 | | // Best available image. |
279 | | Image pix_; |
280 | | // True if the source image is white-on-black. |
281 | | bool inverse_; |
282 | | // Block the word came from. If not null, block->re_rotation() takes the |
283 | | // "untransformed" coordinates even further back to the original image. |
284 | | // Used only on the first DENORM in a chain. |
285 | | const BLOCK *block_; |
286 | | // Rotation to apply between translation to the origin and scaling. |
287 | | const FCOORD *rotation_; |
288 | | // Previous transformation in a chain. |
289 | | const DENORM *predecessor_; |
290 | | // Non-linear transformation maps directly from each integer offset from the |
291 | | // origin to the corresponding x-coord. Owned by the DENORM. |
292 | | std::vector<float> *x_map_; |
293 | | // Non-linear transformation maps directly from each integer offset from the |
294 | | // origin to the corresponding y-coord. Owned by the DENORM. |
295 | | std::vector<float> *y_map_; |
296 | | // x-coordinate to be mapped to final_xshift_ in the result. |
297 | | float x_origin_; |
298 | | // y-coordinate to be mapped to final_yshift_ in the result. |
299 | | float y_origin_; |
300 | | // Scale factors for x and y coords. Applied to pre-rotation system. |
301 | | float x_scale_; |
302 | | float y_scale_; |
303 | | // Destination coords of the x_origin_ and y_origin_. |
304 | | float final_xshift_; |
305 | | float final_yshift_; |
306 | | }; |
307 | | |
308 | | } // namespace tesseract |
309 | | |
310 | | #endif |