/src/tesseract/src/ccstruct/normalis.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /********************************************************************** |
2 | | * File: normalis.cpp (Formerly denorm.c) |
3 | | * Description: Code for the DENORM class. |
4 | | * Author: Ray Smith |
5 | | * |
6 | | * (C) Copyright 1992, Hewlett-Packard Ltd. |
7 | | ** Licensed under the Apache License, Version 2.0 (the "License"); |
8 | | ** you may not use this file except in compliance with the License. |
9 | | ** You may obtain a copy of the License at |
10 | | ** http://www.apache.org/licenses/LICENSE-2.0 |
11 | | ** Unless required by applicable law or agreed to in writing, software |
12 | | ** distributed under the License is distributed on an "AS IS" BASIS, |
13 | | ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | ** See the License for the specific language governing permissions and |
15 | | ** limitations under the License. |
16 | | * |
17 | | **********************************************************************/ |
18 | | |
19 | | #include "normalis.h" |
20 | | |
21 | | #include <allheaders.h> |
22 | | #include "blobs.h" |
23 | | #include "helpers.h" |
24 | | #include "matrix.h" |
25 | | #include "ocrblock.h" |
26 | | #include "unicharset.h" |
27 | | #include "werd.h" |
28 | | |
29 | | #include <cfloat> // for FLT_MAX |
30 | | #include <cstdlib> |
31 | | |
32 | | namespace tesseract { |
33 | | |
34 | | // Tolerance in pixels used for baseline and xheight on non-upper/lower scripts. |
35 | | const int kSloppyTolerance = 4; |
36 | | // Final tolerance in pixels added to the computed xheight range. |
37 | | const float kFinalPixelTolerance = 0.125f; |
38 | | |
39 | 3.42M | DENORM::DENORM() { |
40 | 3.42M | Init(); |
41 | 3.42M | } |
42 | | |
43 | 0 | DENORM::DENORM(const DENORM &src) { |
44 | 0 | rotation_ = nullptr; |
45 | 0 | x_map_ = nullptr; |
46 | 0 | y_map_ = nullptr; |
47 | 0 | *this = src; |
48 | 0 | } |
49 | | |
50 | 1.07M | DENORM &DENORM::operator=(const DENORM &src) { |
51 | 1.07M | Clear(); |
52 | 1.07M | inverse_ = src.inverse_; |
53 | 1.07M | predecessor_ = src.predecessor_; |
54 | 1.07M | pix_ = src.pix_; |
55 | 1.07M | block_ = src.block_; |
56 | 1.07M | if (src.rotation_ == nullptr) { |
57 | 1.07M | rotation_ = nullptr; |
58 | 1.07M | } else { |
59 | 0 | rotation_ = new FCOORD(*src.rotation_); |
60 | 0 | } |
61 | 1.07M | x_origin_ = src.x_origin_; |
62 | 1.07M | y_origin_ = src.y_origin_; |
63 | 1.07M | x_scale_ = src.x_scale_; |
64 | 1.07M | y_scale_ = src.y_scale_; |
65 | 1.07M | final_xshift_ = src.final_xshift_; |
66 | 1.07M | final_yshift_ = src.final_yshift_; |
67 | 1.07M | return *this; |
68 | 1.07M | } |
69 | | |
70 | 3.42M | DENORM::~DENORM() { |
71 | 3.42M | Clear(); |
72 | 3.42M | } |
73 | | |
74 | | // Initializes the denorm for a transformation. For details see the large |
75 | | // comment in normalis.h. |
76 | | // Arguments: |
77 | | // block: if not nullptr, then this is the first transformation, and |
78 | | // block->re_rotation() needs to be used after the Denorm |
79 | | // transformation to get back to the image coords. |
80 | | // rotation: if not nullptr, apply this rotation after translation to the |
81 | | // origin and scaling. (Usually a classify rotation.) |
82 | | // predecessor: if not nullptr, then predecessor has been applied to the |
83 | | // input space and needs to be undone to complete the inverse. |
84 | | // The above pointers are not owned by this DENORM and are assumed to live |
85 | | // longer than this denorm, except rotation, which is deep copied on input. |
86 | | // |
87 | | // x_origin: The x origin which will be mapped to final_xshift in the result. |
88 | | // y_origin: The y origin which will be mapped to final_yshift in the result. |
89 | | // Added to result of row->baseline(x) if not nullptr. |
90 | | // |
91 | | // x_scale: scale factor for the x-coordinate. |
92 | | // y_scale: scale factor for the y-coordinate. Ignored if segs is given. |
93 | | // Note that these scale factors apply to the same x and y system as the |
94 | | // x-origin and y-origin apply, ie after any block rotation, but before |
95 | | // the rotation argument is applied. |
96 | | // |
97 | | // final_xshift: The x component of the final translation. |
98 | | // final_yshift: The y component of the final translation. |
99 | | void DENORM::SetupNormalization(const BLOCK *block, const FCOORD *rotation, |
100 | | const DENORM *predecessor, float x_origin, float y_origin, |
101 | | float x_scale, float y_scale, float final_xshift, |
102 | 2.30M | float final_yshift) { |
103 | 2.30M | Clear(); |
104 | 2.30M | block_ = block; |
105 | 2.30M | if (rotation == nullptr) { |
106 | 2.30M | rotation_ = nullptr; |
107 | 2.30M | } else { |
108 | 0 | rotation_ = new FCOORD(*rotation); |
109 | 0 | } |
110 | 2.30M | predecessor_ = predecessor; |
111 | 2.30M | x_origin_ = x_origin; |
112 | 2.30M | y_origin_ = y_origin; |
113 | 2.30M | x_scale_ = x_scale; |
114 | 2.30M | y_scale_ = y_scale; |
115 | 2.30M | final_xshift_ = final_xshift; |
116 | 2.30M | final_yshift_ = final_yshift; |
117 | 2.30M | } |
118 | | |
119 | | // Helper for SetupNonLinear computes an image of shortest run-lengths from |
120 | | // the x/y edges provided. |
121 | | // Based on "A nonlinear normalization method for handprinted Kanji character |
122 | | // recognition -- line density equalization" by Hiromitsu Yamada et al. |
123 | | // Eg below is an O in a 1-pixel margin-ed bounding box and the corresponding |
124 | | // ______________ input x_coords and y_coords. |
125 | | // | _________ | <empty> |
126 | | // | | _ | | 1, 6 |
127 | | // | | | | | | 1, 3, 4, 6 |
128 | | // | | | | | | 1, 3, 4, 6 |
129 | | // | | | | | | 1, 3, 4, 6 |
130 | | // | | |_| | | 1, 3, 4, 6 |
131 | | // | |_________| | 1, 6 |
132 | | // |_____________| <empty> |
133 | | // E 1 1 1 1 1 E |
134 | | // m 7 7 2 7 7 m |
135 | | // p 6 p |
136 | | // t 7 t |
137 | | // y y |
138 | | // The output image contains the min of the x and y run-length (distance |
139 | | // between edges) at each coordinate in the image thus: |
140 | | // ______________ |
141 | | // |7 1_1_1_1_1 7| |
142 | | // |1|5 5 1 5 5|1| |
143 | | // |1|2 2|1|2 2|1| |
144 | | // |1|2 2|1|2 2|1| |
145 | | // |1|2 2|1|2 2|1| |
146 | | // |1|2 2|1|2 2|1| |
147 | | // |1|5_5_1_5_5|1| |
148 | | // |7_1_1_1_1_1_7| |
149 | | // Note that the input coords are all integer, so all partial pixels are dealt |
150 | | // with elsewhere. Although it is nice for outlines to be properly connected |
151 | | // and continuous, there is no requirement that they be as such, so they could |
152 | | // have been derived from a flaky source, such as greyscale. |
153 | | // This function works only within the provided box, and it is assumed that the |
154 | | // input x_coords and y_coords have already been translated to have the bottom- |
155 | | // left of box as the origin. Although an output, the minruns should have been |
156 | | // pre-initialized to be the same size as box. Each element will contain the |
157 | | // minimum of x and y run-length as shown above. |
158 | | static void ComputeRunlengthImage(const TBOX &box, |
159 | | const std::vector<std::vector<int>> &x_coords, |
160 | | const std::vector<std::vector<int>> &y_coords, |
161 | 0 | GENERIC_2D_ARRAY<int> *minruns) { |
162 | 0 | int width = box.width(); |
163 | 0 | int height = box.height(); |
164 | 0 | ASSERT_HOST(minruns->dim1() == width); |
165 | 0 | ASSERT_HOST(minruns->dim2() == height); |
166 | | // Set a 2-d image array to the run lengths at each pixel. |
167 | 0 | for (int ix = 0; ix < width; ++ix) { |
168 | 0 | int y = 0; |
169 | 0 | for (auto y_coord : y_coords[ix]) { |
170 | 0 | int y_edge = ClipToRange(y_coord, 0, height); |
171 | 0 | int gap = y_edge - y; |
172 | | // Every pixel between the last and current edge get set to the gap. |
173 | 0 | while (y < y_edge) { |
174 | 0 | (*minruns)(ix, y) = gap; |
175 | 0 | ++y; |
176 | 0 | } |
177 | 0 | } |
178 | | // Pretend there is a bounding box of edges all around the image. |
179 | 0 | int gap = height - y; |
180 | 0 | while (y < height) { |
181 | 0 | (*minruns)(ix, y) = gap; |
182 | 0 | ++y; |
183 | 0 | } |
184 | 0 | } |
185 | | // Now set the image pixels the MIN of the x and y runlengths. |
186 | 0 | for (int iy = 0; iy < height; ++iy) { |
187 | 0 | int x = 0; |
188 | 0 | for (auto x_coord : x_coords[iy]) { |
189 | 0 | int x_edge = ClipToRange(x_coord, 0, width); |
190 | 0 | int gap = x_edge - x; |
191 | 0 | while (x < x_edge) { |
192 | 0 | if (gap < (*minruns)(x, iy)) { |
193 | 0 | (*minruns)(x, iy) = gap; |
194 | 0 | } |
195 | 0 | ++x; |
196 | 0 | } |
197 | 0 | } |
198 | 0 | int gap = width - x; |
199 | 0 | while (x < width) { |
200 | 0 | if (gap < (*minruns)(x, iy)) { |
201 | 0 | (*minruns)(x, iy) = gap; |
202 | 0 | } |
203 | 0 | ++x; |
204 | 0 | } |
205 | 0 | } |
206 | 0 | } |
207 | | // Converts the run-length image (see above to the edge density profiles used |
208 | | // for scaling, thus: |
209 | | // ______________ |
210 | | // |7 1_1_1_1_1 7| = 5.28 |
211 | | // |1|5 5 1 5 5|1| = 3.8 |
212 | | // |1|2 2|1|2 2|1| = 5 |
213 | | // |1|2 2|1|2 2|1| = 5 |
214 | | // |1|2 2|1|2 2|1| = 5 |
215 | | // |1|2 2|1|2 2|1| = 5 |
216 | | // |1|5_5_1_5_5|1| = 3.8 |
217 | | // |7_1_1_1_1_1_7| = 5.28 |
218 | | // 6 4 4 8 4 4 6 |
219 | | // . . . . . . . |
220 | | // 2 4 4 0 4 4 2 |
221 | | // 8 8 |
222 | | // Each profile is the sum of the reciprocals of the pixels in the image in |
223 | | // the appropriate row or column, and these are then normalized to sum to 1. |
224 | | // On output hx, hy contain an extra element, which will eventually be used |
225 | | // to guarantee that the top/right edge of the box (and anything beyond) always |
226 | | // gets mapped to the maximum target coordinate. |
227 | | static void ComputeEdgeDensityProfiles(const TBOX &box, const GENERIC_2D_ARRAY<int> &minruns, |
228 | 0 | std::vector<float> &hx, std::vector<float> &hy) { |
229 | 0 | int width = box.width(); |
230 | 0 | int height = box.height(); |
231 | 0 | hx.clear(); |
232 | 0 | hx.resize(width + 1); |
233 | 0 | hy.clear(); |
234 | 0 | hy.resize(height + 1); |
235 | 0 | double total = 0.0; |
236 | 0 | for (int iy = 0; iy < height; ++iy) { |
237 | 0 | for (int ix = 0; ix < width; ++ix) { |
238 | 0 | int run = minruns(ix, iy); |
239 | 0 | if (run == 0) { |
240 | 0 | run = 1; |
241 | 0 | } |
242 | 0 | float density = 1.0f / run; |
243 | 0 | hx[ix] += density; |
244 | 0 | hy[iy] += density; |
245 | 0 | } |
246 | 0 | total += hy[iy]; |
247 | 0 | } |
248 | | // Normalize each profile to sum to 1. |
249 | 0 | if (total > 0.0) { |
250 | 0 | for (int ix = 0; ix < width; ++ix) { |
251 | 0 | hx[ix] /= total; |
252 | 0 | } |
253 | 0 | for (int iy = 0; iy < height; ++iy) { |
254 | 0 | hy[iy] /= total; |
255 | 0 | } |
256 | 0 | } |
257 | | // There is an extra element in each array, so initialize to 1. |
258 | 0 | hx[width] = 1.0f; |
259 | 0 | hy[height] = 1.0f; |
260 | 0 | } |
261 | | |
262 | | // Sets up the DENORM to execute a non-linear transformation based on |
263 | | // preserving an even distribution of stroke edges. The transformation |
264 | | // operates only within the given box. |
265 | | // x_coords is a collection of the x-coords of vertical edges for each |
266 | | // y-coord starting at box.bottom(). |
267 | | // y_coords is a collection of the y-coords of horizontal edges for each |
268 | | // x-coord starting at box.left(). |
269 | | // Eg x_coords[0] is a collection of the x-coords of edges at y=bottom. |
270 | | // Eg x_coords[1] is a collection of the x-coords of edges at y=bottom + 1. |
271 | | // The second-level vectors must all be sorted in ascending order. |
272 | | // See comments on the helper functions above for more details. |
273 | | void DENORM::SetupNonLinear(const DENORM *predecessor, const TBOX &box, float target_width, |
274 | | float target_height, float final_xshift, float final_yshift, |
275 | | const std::vector<std::vector<int>> &x_coords, |
276 | 0 | const std::vector<std::vector<int>> &y_coords) { |
277 | 0 | Clear(); |
278 | 0 | predecessor_ = predecessor; |
279 | | // x_map_ and y_map_ store a mapping from input x and y coordinate to output |
280 | | // x and y coordinate, based on scaling to the supplied target_width and |
281 | | // target_height. |
282 | 0 | x_map_ = new std::vector<float>; |
283 | 0 | y_map_ = new std::vector<float>; |
284 | | // Set a 2-d image array to the run lengths at each pixel. |
285 | 0 | int width = box.width(); |
286 | 0 | int height = box.height(); |
287 | 0 | GENERIC_2D_ARRAY<int> minruns(width, height, 0); |
288 | 0 | ComputeRunlengthImage(box, x_coords, y_coords, &minruns); |
289 | | // Edge density is the sum of the inverses of the run lengths. Compute |
290 | | // edge density projection profiles. |
291 | 0 | ComputeEdgeDensityProfiles(box, minruns, *x_map_, *y_map_); |
292 | | // Convert the edge density profiles to the coordinates by multiplying by |
293 | | // the desired size and accumulating. |
294 | 0 | (*x_map_)[width] = target_width; |
295 | 0 | for (int x = width - 1; x >= 0; --x) { |
296 | 0 | (*x_map_)[x] = (*x_map_)[x + 1] - (*x_map_)[x] * target_width; |
297 | 0 | } |
298 | 0 | (*y_map_)[height] = target_height; |
299 | 0 | for (int y = height - 1; y >= 0; --y) { |
300 | 0 | (*y_map_)[y] = (*y_map_)[y + 1] - (*y_map_)[y] * target_height; |
301 | 0 | } |
302 | 0 | x_origin_ = box.left(); |
303 | 0 | y_origin_ = box.bottom(); |
304 | 0 | final_xshift_ = final_xshift; |
305 | 0 | final_yshift_ = final_yshift; |
306 | 0 | } |
307 | | |
308 | | // Transforms the given coords one step forward to normalized space, without |
309 | | // using any block rotation or predecessor. |
310 | 0 | void DENORM::LocalNormTransform(const TPOINT &pt, TPOINT *transformed) const { |
311 | 0 | FCOORD src_pt(pt.x, pt.y); |
312 | 0 | FCOORD float_result; |
313 | 0 | LocalNormTransform(src_pt, &float_result); |
314 | 0 | transformed->x = IntCastRounded(float_result.x()); |
315 | 0 | transformed->y = IntCastRounded(float_result.y()); |
316 | 0 | } |
317 | 46.1M | void DENORM::LocalNormTransform(const FCOORD &pt, FCOORD *transformed) const { |
318 | 46.1M | FCOORD translated(pt.x() - x_origin_, pt.y() - y_origin_); |
319 | 46.1M | if (x_map_ != nullptr && y_map_ != nullptr) { |
320 | 0 | int x = ClipToRange(IntCastRounded(translated.x()), 0, static_cast<int>(x_map_->size() - 1)); |
321 | 0 | translated.set_x((*x_map_)[x]); |
322 | 0 | int y = ClipToRange(IntCastRounded(translated.y()), 0, static_cast<int>(y_map_->size() - 1)); |
323 | 0 | translated.set_y((*y_map_)[y]); |
324 | 46.1M | } else { |
325 | 46.1M | translated.set_x(translated.x() * x_scale_); |
326 | 46.1M | translated.set_y(translated.y() * y_scale_); |
327 | 46.1M | if (rotation_ != nullptr) { |
328 | 0 | translated.rotate(*rotation_); |
329 | 0 | } |
330 | 46.1M | } |
331 | 46.1M | transformed->set_x(translated.x() + final_xshift_); |
332 | 46.1M | transformed->set_y(translated.y() + final_yshift_); |
333 | 46.1M | } |
334 | | |
335 | | // Transforms the given coords forward to normalized space using the |
336 | | // full transformation sequence defined by the block rotation, the |
337 | | // predecessors, deepest first, and finally this. If first_norm is not nullptr, |
338 | | // then the first and deepest transformation used is first_norm, ending |
339 | | // with this, and the block rotation will not be applied. |
340 | 0 | void DENORM::NormTransform(const DENORM *first_norm, const TPOINT &pt, TPOINT *transformed) const { |
341 | 0 | FCOORD src_pt(pt.x, pt.y); |
342 | 0 | FCOORD float_result; |
343 | 0 | NormTransform(first_norm, src_pt, &float_result); |
344 | 0 | transformed->x = IntCastRounded(float_result.x()); |
345 | 0 | transformed->y = IntCastRounded(float_result.y()); |
346 | 0 | } |
347 | 0 | void DENORM::NormTransform(const DENORM *first_norm, const FCOORD &pt, FCOORD *transformed) const { |
348 | 0 | FCOORD src_pt(pt); |
349 | 0 | if (first_norm != this) { |
350 | 0 | if (predecessor_ != nullptr) { |
351 | 0 | predecessor_->NormTransform(first_norm, pt, &src_pt); |
352 | 0 | } else if (block_ != nullptr) { |
353 | 0 | FCOORD fwd_rotation(block_->re_rotation().x(), -block_->re_rotation().y()); |
354 | 0 | src_pt.rotate(fwd_rotation); |
355 | 0 | } |
356 | 0 | } |
357 | 0 | LocalNormTransform(src_pt, transformed); |
358 | 0 | } |
359 | | |
360 | | // Transforms the given coords one step back to source space, without |
361 | | // using to any block rotation or predecessor. |
362 | 0 | void DENORM::LocalDenormTransform(const TPOINT &pt, TPOINT *original) const { |
363 | 0 | FCOORD src_pt(pt.x, pt.y); |
364 | 0 | FCOORD float_result; |
365 | 0 | LocalDenormTransform(src_pt, &float_result); |
366 | 0 | original->x = IntCastRounded(float_result.x()); |
367 | 0 | original->y = IntCastRounded(float_result.y()); |
368 | 0 | } |
369 | | |
370 | 18.0M | void DENORM::LocalDenormTransform(const FCOORD &pt, FCOORD *original) const { |
371 | 18.0M | FCOORD rotated(pt.x() - final_xshift_, pt.y() - final_yshift_); |
372 | 18.0M | if (x_map_ != nullptr && y_map_ != nullptr) { |
373 | 0 | auto pos = std::upper_bound(x_map_->begin(), x_map_->end(), rotated.x()); |
374 | 0 | if (pos > x_map_->begin()) { |
375 | 0 | --pos; |
376 | 0 | } |
377 | 0 | auto x = pos - x_map_->begin(); |
378 | 0 | original->set_x(x + x_origin_); |
379 | 0 | pos = std::upper_bound(y_map_->begin(), y_map_->end(), rotated.y()); |
380 | 0 | if (pos > y_map_->begin()) { |
381 | 0 | --pos; |
382 | 0 | } |
383 | 0 | auto y = pos - y_map_->begin(); |
384 | 0 | original->set_y(y + y_origin_); |
385 | 18.0M | } else { |
386 | 18.0M | if (rotation_ != nullptr) { |
387 | 0 | FCOORD inverse_rotation(rotation_->x(), -rotation_->y()); |
388 | 0 | rotated.rotate(inverse_rotation); |
389 | 0 | } |
390 | 18.0M | original->set_x(rotated.x() / x_scale_ + x_origin_); |
391 | 18.0M | float y_scale = y_scale_; |
392 | 18.0M | original->set_y(rotated.y() / y_scale + y_origin_); |
393 | 18.0M | } |
394 | 18.0M | } |
395 | | |
396 | | // Transforms the given coords all the way back to source image space using |
397 | | // the full transformation sequence defined by this and its predecessors |
398 | | // recursively, shallowest first, and finally any block re_rotation. |
399 | | // If last_denorm is not nullptr, then the last transformation used will |
400 | | // be last_denorm, and the block re_rotation will never be executed. |
401 | 10.8M | void DENORM::DenormTransform(const DENORM *last_denorm, const TPOINT &pt, TPOINT *original) const { |
402 | 10.8M | FCOORD src_pt(pt.x, pt.y); |
403 | 10.8M | FCOORD float_result; |
404 | 10.8M | DenormTransform(last_denorm, src_pt, &float_result); |
405 | 10.8M | original->x = IntCastRounded(float_result.x()); |
406 | 10.8M | original->y = IntCastRounded(float_result.y()); |
407 | 10.8M | } |
408 | 18.0M | void DENORM::DenormTransform(const DENORM *last_denorm, const FCOORD &pt, FCOORD *original) const { |
409 | 18.0M | LocalDenormTransform(pt, original); |
410 | 18.0M | if (last_denorm != this) { |
411 | 18.0M | if (predecessor_ != nullptr) { |
412 | 0 | predecessor_->DenormTransform(last_denorm, *original, original); |
413 | 18.0M | } else if (block_ != nullptr) { |
414 | 18.0M | original->rotate(block_->re_rotation()); |
415 | 18.0M | } |
416 | 18.0M | } |
417 | 18.0M | } |
418 | | |
419 | | // Normalize a blob using blob transformations. Less accurate, but |
420 | | // more accurately copies the old way. |
421 | 656k | void DENORM::LocalNormBlob(TBLOB *blob) const { |
422 | 656k | ICOORD translation(-IntCastRounded(x_origin_), -IntCastRounded(y_origin_)); |
423 | 656k | blob->Move(translation); |
424 | 656k | if (y_scale_ != 1.0f) { |
425 | 656k | blob->Scale(y_scale_); |
426 | 656k | } |
427 | 656k | if (rotation_ != nullptr) { |
428 | 0 | blob->Rotate(*rotation_); |
429 | 0 | } |
430 | 656k | translation.set_x(IntCastRounded(final_xshift_)); |
431 | 656k | translation.set_y(IntCastRounded(final_yshift_)); |
432 | 656k | blob->Move(translation); |
433 | 656k | } |
434 | | |
435 | | // Fills in the x-height range accepted by the given unichar_id, given its |
436 | | // bounding box in the usual baseline-normalized coordinates, with some |
437 | | // initial crude x-height estimate (such as word size) and this denoting the |
438 | | // transformation that was used. |
439 | | void DENORM::XHeightRange(int unichar_id, const UNICHARSET &unicharset, const TBOX &bbox, |
440 | 3.62M | float *min_xht, float *max_xht, float *yshift) const { |
441 | | // Default return -- accept anything. |
442 | 3.62M | *yshift = 0.0f; |
443 | 3.62M | *min_xht = 0.0f; |
444 | 3.62M | *max_xht = FLT_MAX; |
445 | | |
446 | 3.62M | if (!unicharset.top_bottom_useful()) { |
447 | 0 | return; |
448 | 0 | } |
449 | | |
450 | | // Clip the top and bottom to the limit of normalized feature space. |
451 | 3.62M | int top = ClipToRange<int>(bbox.top(), 0, kBlnCellHeight - 1); |
452 | 3.62M | int bottom = ClipToRange<int>(bbox.bottom(), 0, kBlnCellHeight - 1); |
453 | | // A tolerance of yscale corresponds to 1 pixel in the image. |
454 | 3.62M | double tolerance = y_scale(); |
455 | | // If the script doesn't have upper and lower-case characters, widen the |
456 | | // tolerance to allow sloppy baseline/x-height estimates. |
457 | 3.62M | if (!unicharset.script_has_upper_lower()) { |
458 | 0 | tolerance = y_scale() * kSloppyTolerance; |
459 | 0 | } |
460 | | |
461 | 3.62M | int min_bottom, max_bottom, min_top, max_top; |
462 | 3.62M | unicharset.get_top_bottom(unichar_id, &min_bottom, &max_bottom, &min_top, &max_top); |
463 | | |
464 | | // Calculate the scale factor we'll use to get to image y-pixels |
465 | 3.62M | double midx = (bbox.left() + bbox.right()) / 2.0; |
466 | 3.62M | double ydiff = (bbox.top() - bbox.bottom()) + 2.0; |
467 | 3.62M | FCOORD mid_bot(midx, bbox.bottom()), tmid_bot; |
468 | 3.62M | FCOORD mid_high(midx, bbox.bottom() + ydiff), tmid_high; |
469 | 3.62M | DenormTransform(nullptr, mid_bot, &tmid_bot); |
470 | 3.62M | DenormTransform(nullptr, mid_high, &tmid_high); |
471 | | |
472 | | // bln_y_measure * yscale = image_y_measure |
473 | 3.62M | double yscale = tmid_high.pt_to_pt_dist(tmid_bot) / ydiff; |
474 | | |
475 | | // Calculate y-shift |
476 | 3.62M | int bln_yshift = 0, bottom_shift = 0, top_shift = 0; |
477 | 3.62M | if (bottom < min_bottom - tolerance) { |
478 | 1.03M | bottom_shift = bottom - min_bottom; |
479 | 2.59M | } else if (bottom > max_bottom + tolerance) { |
480 | 837k | bottom_shift = bottom - max_bottom; |
481 | 837k | } |
482 | 3.62M | if (top < min_top - tolerance) { |
483 | 1.77M | top_shift = top - min_top; |
484 | 1.85M | } else if (top > max_top + tolerance) { |
485 | 207k | top_shift = top - max_top; |
486 | 207k | } |
487 | 3.62M | if ((top_shift >= 0 && bottom_shift > 0) || (top_shift < 0 && bottom_shift < 0)) { |
488 | 1.58M | bln_yshift = (top_shift + bottom_shift) / 2; |
489 | 1.58M | } |
490 | 3.62M | *yshift = bln_yshift * yscale; |
491 | | |
492 | | // To help very high cap/xheight ratio fonts accept the correct x-height, |
493 | | // and to allow the large caps in small caps to accept the xheight of the |
494 | | // small caps, add kBlnBaselineOffset to chars with a maximum max, and have |
495 | | // a top already at a significantly high position. |
496 | 3.62M | if (max_top == kBlnCellHeight - 1 && top > kBlnCellHeight - kBlnBaselineOffset / 2) { |
497 | 1.24M | max_top += kBlnBaselineOffset; |
498 | 1.24M | } |
499 | 3.62M | top -= bln_yshift; |
500 | 3.62M | int height = top - kBlnBaselineOffset; |
501 | 3.62M | double min_height = min_top - kBlnBaselineOffset - tolerance; |
502 | 3.62M | double max_height = max_top - kBlnBaselineOffset + tolerance; |
503 | | |
504 | | // We shouldn't try calculations if the characters are very short (for example |
505 | | // for punctuation). |
506 | 3.62M | if (min_height > kBlnXHeight / 8 && height > 0) { |
507 | 3.43M | float result = height * kBlnXHeight * yscale / min_height; |
508 | 3.43M | *max_xht = result + kFinalPixelTolerance; |
509 | 3.43M | result = height * kBlnXHeight * yscale / max_height; |
510 | 3.43M | *min_xht = result - kFinalPixelTolerance; |
511 | 3.43M | } |
512 | 3.62M | } |
513 | | |
514 | | // Prints the content of the DENORM for debug purposes. |
515 | 0 | void DENORM::Print() const { |
516 | 0 | if (pix_ != nullptr) { |
517 | 0 | tprintf("Pix dimensions %d x %d x %d\n", pixGetWidth(pix_), pixGetHeight(pix_), |
518 | 0 | pixGetDepth(pix_)); |
519 | 0 | } |
520 | 0 | if (inverse_) { |
521 | 0 | tprintf("Inverse\n"); |
522 | 0 | } |
523 | 0 | if (block_ && block_->re_rotation().x() != 1.0f) { |
524 | 0 | tprintf("Block rotation %g, %g\n", block_->re_rotation().x(), block_->re_rotation().y()); |
525 | 0 | } |
526 | 0 | tprintf("Input Origin = (%g, %g)\n", x_origin_, y_origin_); |
527 | 0 | if (x_map_ != nullptr && y_map_ != nullptr) { |
528 | 0 | tprintf("x map:\n"); |
529 | 0 | for (auto x : *x_map_) { |
530 | 0 | tprintf("%g ", x); |
531 | 0 | } |
532 | 0 | tprintf("\ny map:\n"); |
533 | 0 | for (auto y : *y_map_) { |
534 | 0 | tprintf("%g ", y); |
535 | 0 | } |
536 | 0 | tprintf("\n"); |
537 | 0 | } else { |
538 | 0 | tprintf("Scale = (%g, %g)\n", x_scale_, y_scale_); |
539 | 0 | if (rotation_ != nullptr) { |
540 | 0 | tprintf("Rotation = (%g, %g)\n", rotation_->x(), rotation_->y()); |
541 | 0 | } |
542 | 0 | } |
543 | 0 | tprintf("Final Origin = (%g, %g)\n", final_xshift_, final_xshift_); |
544 | 0 | if (predecessor_ != nullptr) { |
545 | 0 | tprintf("Predecessor:\n"); |
546 | 0 | predecessor_->Print(); |
547 | 0 | } |
548 | 0 | } |
549 | | |
550 | | // ============== Private Code ====================== |
551 | | |
552 | | // Free allocated memory and clear pointers. |
553 | 6.81M | void DENORM::Clear() { |
554 | 6.81M | delete x_map_; |
555 | 6.81M | x_map_ = nullptr; |
556 | 6.81M | delete y_map_; |
557 | 6.81M | y_map_ = nullptr; |
558 | 6.81M | delete rotation_; |
559 | 6.81M | rotation_ = nullptr; |
560 | 6.81M | } |
561 | | |
562 | | // Setup default values. |
563 | 3.42M | void DENORM::Init() { |
564 | 3.42M | inverse_ = false; |
565 | 3.42M | pix_ = nullptr; |
566 | 3.42M | block_ = nullptr; |
567 | 3.42M | rotation_ = nullptr; |
568 | 3.42M | predecessor_ = nullptr; |
569 | 3.42M | x_map_ = nullptr; |
570 | 3.42M | y_map_ = nullptr; |
571 | 3.42M | x_origin_ = 0.0f; |
572 | 3.42M | y_origin_ = 0.0f; |
573 | 3.42M | x_scale_ = 1.0f; |
574 | 3.42M | y_scale_ = 1.0f; |
575 | 3.42M | final_xshift_ = 0.0f; |
576 | 3.42M | final_yshift_ = static_cast<float>(kBlnBaselineOffset); |
577 | 3.42M | } |
578 | | |
579 | | } // namespace tesseract |