/src/tesseract/src/textord/cjkpitch.cpp
Line | Count | Source |
1 | | /////////////////////////////////////////////////////////////////////// |
2 | | // File: cjkpitch.cpp |
3 | | // Description: Code to determine fixed pitchness and the pitch if fixed, |
4 | | // for CJK text. |
5 | | // Author: takenaka@google.com (Hiroshi Takenaka) |
6 | | // |
7 | | // Copyright 2011 Google Inc. All Rights Reserved. |
8 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
9 | | // you may not use this file except in compliance with the License. |
10 | | // You may obtain a copy of the License at |
11 | | // http://www.apache.org/licenses/LICENSE-2.0 |
12 | | // Unless required by applicable law or agreed to in writing, software |
13 | | // distributed under the License is distributed on an "AS IS" BASIS, |
14 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
15 | | // See the License for the specific language governing permissions and |
16 | | // limitations under the License. |
17 | | // |
18 | | /////////////////////////////////////////////////////////////////////// |
19 | | |
20 | | #include "cjkpitch.h" |
21 | | #include "topitch.h" |
22 | | #include "tovars.h" |
23 | | |
24 | | #include <algorithm> // for std::sort |
25 | | #include <cmath> |
26 | | #include <vector> // for std::vector |
27 | | |
28 | | namespace tesseract { |
29 | | |
30 | | static BOOL_VAR(textord_space_size_is_variable, false, |
31 | | "If true, word delimiter spaces are assumed to have " |
32 | | "variable width, even though characters have fixed pitch."); |
33 | | |
34 | | // Allow +/-10% error for character pitch / body size. |
35 | | static const float kFPTolerance = 0.1f; |
36 | | |
37 | | // Minimum ratio of "good" character pitch for a row to be considered |
38 | | // to be fixed-pitch. |
39 | | static const float kFixedPitchThreshold = 0.35f; |
40 | | |
41 | | // rank statistics for a small collection of float values. |
42 | | class SimpleStats { |
43 | | public: |
44 | 0 | SimpleStats() = default; |
45 | 0 | ~SimpleStats() = default; |
46 | | |
47 | 0 | void Clear() { |
48 | 0 | values_.clear(); |
49 | 0 | finalized_ = false; |
50 | 0 | } |
51 | | |
52 | 0 | void Add(float value) { |
53 | 0 | values_.push_back(value); |
54 | 0 | finalized_ = false; |
55 | 0 | } |
56 | | |
57 | 0 | void Finish() { |
58 | 0 | std::sort(values_.begin(), values_.end()); |
59 | 0 | finalized_ = true; |
60 | 0 | } |
61 | | |
62 | 0 | float ile(double frac) { |
63 | 0 | if (!finalized_) { |
64 | 0 | Finish(); |
65 | 0 | } |
66 | 0 | if (values_.empty()) { |
67 | 0 | return 0.0f; |
68 | 0 | } |
69 | 0 | if (frac >= 1.0) { |
70 | 0 | return values_.back(); |
71 | 0 | } |
72 | 0 | if (frac <= 0.0 || values_.size() == 1) { |
73 | 0 | return values_[0]; |
74 | 0 | } |
75 | 0 | int index = static_cast<int>((values_.size() - 1) * frac); |
76 | 0 | float reminder = (values_.size() - 1) * frac - index; |
77 | |
|
78 | 0 | return values_[index] * (1.0f - reminder) + values_[index + 1] * reminder; |
79 | 0 | } |
80 | | |
81 | 0 | float median() { |
82 | 0 | return ile(0.5); |
83 | 0 | } |
84 | | |
85 | 0 | float minimum() { |
86 | 0 | if (!finalized_) { |
87 | 0 | Finish(); |
88 | 0 | } |
89 | 0 | if (values_.empty()) { |
90 | 0 | return 0.0f; |
91 | 0 | } |
92 | 0 | return values_[0]; |
93 | 0 | } |
94 | | |
95 | 0 | bool empty() const { |
96 | 0 | return values_.empty(); |
97 | 0 | } |
98 | | |
99 | 0 | int size() const { |
100 | 0 | return values_.size(); |
101 | 0 | } |
102 | | |
103 | | private: |
104 | | bool finalized_ = false; |
105 | | std::vector<float> values_; |
106 | | }; |
107 | | |
108 | | // statistics for a small collection of float pairs (x, y). |
109 | | // EstimateYFor(x, r) returns the estimated y at x, based on |
110 | | // existing samples between x*(1-r) ~ x*(1+r). |
111 | | class LocalCorrelation { |
112 | | public: |
113 | | struct float_pair { |
114 | | float x, y; |
115 | | int vote; |
116 | | }; |
117 | | |
118 | 0 | LocalCorrelation() : finalized_(false) {} |
119 | 0 | ~LocalCorrelation() = default; |
120 | | |
121 | 0 | void Finish() { |
122 | 0 | std::sort(values_.begin(), values_.end(), float_pair_compare); |
123 | 0 | finalized_ = true; |
124 | 0 | } |
125 | | |
126 | 0 | void Clear() { |
127 | 0 | finalized_ = false; |
128 | 0 | } |
129 | | |
130 | 0 | void Add(float x, float y, int v) { |
131 | 0 | struct float_pair value; |
132 | 0 | value.x = x; |
133 | 0 | value.y = y; |
134 | 0 | value.vote = v; |
135 | 0 | values_.push_back(value); |
136 | 0 | finalized_ = false; |
137 | 0 | } |
138 | | |
139 | 0 | float EstimateYFor(float x, float r) { |
140 | 0 | ASSERT_HOST(finalized_); |
141 | 0 | unsigned start = 0, end = values_.size(); |
142 | | // Because the number of samples (used_) is assumed to be small, |
143 | | // just use linear search to find values within the range. |
144 | 0 | while (start < values_.size() && values_[start].x < x * (1 - r)) { |
145 | 0 | start++; |
146 | 0 | } |
147 | 0 | while (end > 0 && values_[end - 1].x > x * (1 + r)) { |
148 | 0 | end--; |
149 | 0 | } |
150 | | |
151 | | // Fall back to the global average if there are no data within r |
152 | | // of x. |
153 | 0 | if (start >= end) { |
154 | 0 | start = 0; |
155 | 0 | end = values_.size(); |
156 | 0 | } |
157 | | |
158 | | // Compute weighted average of the values. |
159 | 0 | float rc = 0; |
160 | 0 | int vote = 0; |
161 | 0 | for (auto i = start; i < end; i++) { |
162 | 0 | rc += values_[i].vote * x * values_[i].y / values_[i].x; |
163 | 0 | vote += values_[i].vote; |
164 | 0 | } |
165 | |
|
166 | 0 | return vote == 0 ? 0.0f : rc / vote; |
167 | 0 | } |
168 | | |
169 | | private: |
170 | 0 | static bool float_pair_compare(const float_pair f_a, const float_pair f_b) { |
171 | 0 | return f_a.x < f_b.x; |
172 | 0 | } |
173 | | |
174 | | bool finalized_; |
175 | | std::vector<struct float_pair> values_; |
176 | | }; |
177 | | |
178 | | // Class to represent a character on a fixed pitch row. A FPChar may |
179 | | // consist of multiple blobs (BLOBNBOX's). |
180 | | class FPChar { |
181 | | public: |
182 | | enum Alignment { ALIGN_UNKNOWN, ALIGN_GOOD, ALIGN_BAD }; |
183 | | |
184 | | FPChar() |
185 | 0 | : box_() |
186 | 0 | , real_body_() |
187 | 0 | , from_(nullptr) |
188 | 0 | , to_(nullptr) |
189 | 0 | , num_blobs_(0) |
190 | 0 | , max_gap_(0) |
191 | 0 | , final_(false) |
192 | 0 | , alignment_(ALIGN_UNKNOWN) |
193 | 0 | , merge_to_prev_(false) |
194 | 0 | , delete_flag_(false) {} |
195 | | |
196 | | // Initialize from blob. |
197 | 0 | void Init(BLOBNBOX *blob) { |
198 | 0 | box_ = blob->bounding_box(); |
199 | 0 | real_body_ = box_; |
200 | 0 | from_ = to_ = blob; |
201 | 0 | num_blobs_ = 1; |
202 | 0 | } |
203 | | |
204 | | // Merge this character with "next". The "next" character should |
205 | | // consist of succeeding blobs on the same row. |
206 | 0 | void Merge(const FPChar &next) { |
207 | 0 | int gap = real_body_.x_gap(next.real_body_); |
208 | 0 | if (gap > max_gap_) { |
209 | 0 | max_gap_ = gap; |
210 | 0 | } |
211 | |
|
212 | 0 | box_ += next.box_; |
213 | 0 | real_body_ += next.real_body_; |
214 | 0 | to_ = next.to_; |
215 | 0 | num_blobs_ += next.num_blobs_; |
216 | 0 | } |
217 | | |
218 | | // Accessors. |
219 | 0 | const TBOX &box() const { |
220 | 0 | return box_; |
221 | 0 | } |
222 | 0 | void set_box(const TBOX &box) { |
223 | 0 | box_ = box; |
224 | 0 | } |
225 | 0 | const TBOX &real_body() const { |
226 | 0 | return real_body_; |
227 | 0 | } |
228 | | |
229 | 0 | bool is_final() const { |
230 | 0 | return final_; |
231 | 0 | } |
232 | 0 | void set_final(bool flag) { |
233 | 0 | final_ = flag; |
234 | 0 | } |
235 | | |
236 | 0 | const Alignment &alignment() const { |
237 | 0 | return alignment_; |
238 | 0 | } |
239 | 0 | void set_alignment(Alignment alignment) { |
240 | 0 | alignment_ = alignment; |
241 | 0 | } |
242 | | |
243 | 0 | bool merge_to_prev() const { |
244 | 0 | return merge_to_prev_; |
245 | 0 | } |
246 | 0 | void set_merge_to_prev(bool flag) { |
247 | 0 | merge_to_prev_ = flag; |
248 | 0 | } |
249 | | |
250 | 0 | bool delete_flag() const { |
251 | 0 | return delete_flag_; |
252 | 0 | } |
253 | 0 | void set_delete_flag(bool flag) { |
254 | 0 | delete_flag_ = flag; |
255 | 0 | } |
256 | | |
257 | 0 | int max_gap() const { |
258 | 0 | return max_gap_; |
259 | 0 | } |
260 | | |
261 | 0 | int num_blobs() const { |
262 | 0 | return num_blobs_; |
263 | 0 | } |
264 | | |
265 | | private: |
266 | | TBOX box_; // Rectangle region considered to be occupied by this |
267 | | // character. It could be bigger than the bounding box. |
268 | | TBOX real_body_; // Real bounding box of this character. |
269 | | BLOBNBOX *from_; // The first blob of this character. |
270 | | BLOBNBOX *to_; // The last blob of this character. |
271 | | int num_blobs_; // Number of blobs that belong to this character. |
272 | | int max_gap_; // Maximum x gap between the blobs. |
273 | | |
274 | | bool final_; // True if alignment/fragmentation decision for this |
275 | | // character is finalized. |
276 | | |
277 | | Alignment alignment_; // Alignment status. |
278 | | bool merge_to_prev_; // True if this is a fragmented blob that |
279 | | // needs to be merged to the previous |
280 | | // character. |
281 | | |
282 | | int delete_flag_; // True if this character is merged to another |
283 | | // one and needs to be deleted. |
284 | | }; |
285 | | |
286 | | // Class to represent a fixed pitch row, as a linear collection of |
287 | | // FPChar's. |
288 | | class FPRow { |
289 | | public: |
290 | 0 | FPRow() : all_pitches_(), all_gaps_(), good_pitches_(), good_gaps_(), heights_(), characters_() {} |
291 | | |
292 | 0 | ~FPRow() = default; |
293 | | |
294 | | // Initialize from TD_ROW. |
295 | | void Init(TO_ROW *row); |
296 | | |
297 | | // Estimate character pitch of this row, based on current alignment |
298 | | // status of underlying FPChar's. The argument pass1 can be set to |
299 | | // true if the function is called after Pass1Analyze(), to eliminate |
300 | | // some redundant computation. |
301 | | void EstimatePitch(bool pass1); |
302 | | |
303 | | // Check each character if it has good character pitches between its |
304 | | // predecessor and its successor and set its alignment status. If |
305 | | // we already calculated the estimated pitch for this row, the value |
306 | | // is used. If we didn't, a character is considered to be good, if |
307 | | // the pitches between its predecessor and its successor are almost |
308 | | // equal. |
309 | | void Pass1Analyze(); |
310 | | |
311 | | // Find characters that fit nicely into one imaginary body next to a |
312 | | // character which is already finalized. Then mark them as character |
313 | | // fragments. |
314 | | bool Pass2Analyze(); |
315 | | |
316 | | // Merge FPChars marked as character fragments into one. |
317 | | void MergeFragments(); |
318 | | |
319 | | // Finalize characters that are already large enough and cannot be |
320 | | // merged with others any more. |
321 | | void FinalizeLargeChars(); |
322 | | |
323 | | // Output pitch estimation results to attributes of TD_ROW. |
324 | | void OutputEstimations(); |
325 | | |
326 | | void DebugOutputResult(int row_index); |
327 | | |
328 | 0 | int good_pitches() { |
329 | 0 | return good_pitches_.size(); |
330 | 0 | } |
331 | | |
332 | 0 | float pitch() { |
333 | 0 | return pitch_; |
334 | 0 | } |
335 | | |
336 | 0 | float estimated_pitch() { |
337 | 0 | return estimated_pitch_; |
338 | 0 | } |
339 | | |
340 | 0 | void set_estimated_pitch(float v) { |
341 | 0 | estimated_pitch_ = v; |
342 | 0 | } |
343 | | |
344 | 0 | float height() { |
345 | 0 | return height_; |
346 | 0 | } |
347 | | |
348 | 0 | float height_pitch_ratio() { |
349 | 0 | if (good_pitches_.size() < 2) { |
350 | 0 | return -1.0; |
351 | 0 | } |
352 | 0 | return height_ / good_pitches_.median(); |
353 | 0 | } |
354 | | |
355 | 0 | float gap() { |
356 | 0 | return gap_; |
357 | 0 | } |
358 | | |
359 | 0 | size_t num_chars() { |
360 | 0 | return characters_.size(); |
361 | 0 | } |
362 | 0 | FPChar *character(int i) { |
363 | 0 | return &characters_[i]; |
364 | 0 | } |
365 | | |
366 | 0 | const TBOX &box(int i) { |
367 | 0 | return characters_[i].box(); |
368 | 0 | } |
369 | | |
370 | 0 | const TBOX &real_body(int i) { |
371 | 0 | return characters_[i].real_body(); |
372 | 0 | } |
373 | | |
374 | 0 | bool is_box_modified(int i) { |
375 | 0 | return !(characters_[i].box() == characters_[i].real_body()); |
376 | 0 | } |
377 | | |
378 | 0 | float center_x(int i) { |
379 | 0 | return (characters_[i].box().left() + characters_[i].box().right()) / 2.0; |
380 | 0 | } |
381 | | |
382 | 0 | bool is_final(int i) { |
383 | 0 | return characters_[i].is_final(); |
384 | 0 | } |
385 | | |
386 | 0 | void finalize(int i) { |
387 | 0 | characters_[i].set_final(true); |
388 | 0 | } |
389 | | |
390 | 0 | bool is_good(int i) { |
391 | 0 | return characters_[i].alignment() == FPChar::ALIGN_GOOD; |
392 | 0 | } |
393 | | |
394 | 0 | void mark_good(int i) { |
395 | 0 | characters_[i].set_alignment(FPChar::ALIGN_GOOD); |
396 | 0 | } |
397 | | |
398 | 0 | void mark_bad(int i) { |
399 | 0 | characters_[i].set_alignment(FPChar::ALIGN_BAD); |
400 | 0 | } |
401 | | |
402 | 0 | void clear_alignment(int i) { |
403 | 0 | characters_[i].set_alignment(FPChar::ALIGN_UNKNOWN); |
404 | 0 | } |
405 | | |
406 | | private: |
407 | 0 | static float x_overlap_fraction(const TBOX &box1, const TBOX &box2) { |
408 | 0 | if (std::min(box1.width(), box2.width()) == 0) { |
409 | 0 | return 0.0; |
410 | 0 | } |
411 | 0 | return -box1.x_gap(box2) / static_cast<float>(std::min(box1.width(), box2.width())); |
412 | 0 | } |
413 | | |
414 | 0 | static bool mostly_overlap(const TBOX &box1, const TBOX &box2) { |
415 | 0 | return x_overlap_fraction(box1, box2) > 0.9; |
416 | 0 | } |
417 | | |
418 | 0 | static bool significant_overlap(const TBOX &box1, const TBOX &box2) { |
419 | 0 | if (std::min(box1.width(), box2.width()) == 0) { |
420 | 0 | return false; |
421 | 0 | } |
422 | 0 | int overlap = -box1.x_gap(box2); |
423 | 0 | return overlap > 1 || x_overlap_fraction(box1, box2) > 0.1; |
424 | 0 | } |
425 | | |
426 | 0 | static float box_pitch(const TBOX &ref, const TBOX &box) { |
427 | 0 | return abs(ref.left() + ref.right() - box.left() - box.right()) / 2.0; |
428 | 0 | } |
429 | | |
430 | | // Check if two neighboring characters satisfy the fixed pitch model. |
431 | 0 | static bool is_good_pitch(float pitch, const TBOX &box1, const TBOX &box2) { |
432 | | // Character box shouldn't exceed pitch. |
433 | 0 | if (box1.width() >= pitch * (1.0 + kFPTolerance) || |
434 | 0 | box2.width() >= pitch * (1.0 + kFPTolerance) || |
435 | 0 | box1.height() >= pitch * (1.0 + kFPTolerance) || |
436 | 0 | box2.height() >= pitch * (1.0 + kFPTolerance)) { |
437 | 0 | return false; |
438 | 0 | } |
439 | | |
440 | 0 | const float real_pitch = box_pitch(box1, box2); |
441 | 0 | if (std::fabs(real_pitch - pitch) < pitch * kFPTolerance) { |
442 | 0 | return true; |
443 | 0 | } |
444 | | |
445 | 0 | if (textord_space_size_is_variable) { |
446 | | // Hangul characters usually have fixed pitch, but words are |
447 | | // delimited by space which can be narrower than characters. |
448 | 0 | if (real_pitch > pitch && real_pitch < pitch * 2.0 && real_pitch - box1.x_gap(box2) < pitch) { |
449 | 0 | return true; |
450 | 0 | } |
451 | 0 | } |
452 | 0 | return false; |
453 | 0 | } |
454 | | |
455 | 0 | static bool is_interesting_blob(const BLOBNBOX *blob) { |
456 | 0 | return !blob->joined_to_prev() && blob->flow() != BTFT_LEADER; |
457 | 0 | } |
458 | | |
459 | | // Cleanup chars that are already merged to others. |
460 | 0 | void DeleteChars() { |
461 | 0 | unsigned index = 0; |
462 | 0 | for (unsigned i = 0; i < characters_.size(); ++i) { |
463 | 0 | if (!characters_[i].delete_flag()) { |
464 | 0 | if (index != i) { |
465 | 0 | characters_[index] = characters_[i]; |
466 | 0 | } |
467 | 0 | index++; |
468 | 0 | } |
469 | 0 | } |
470 | 0 | characters_.resize(index); |
471 | 0 | } |
472 | | |
473 | | float pitch_ = 0.0f; // Character pitch. |
474 | | float estimated_pitch_ = 0.0f; // equal to pitch_ if pitch_ is considered |
475 | | // to be good enough. |
476 | | float height_ = 0.0f; // Character height. |
477 | | float gap_ = 0.0f; // Minimum gap between characters. |
478 | | |
479 | | // Pitches between any two successive characters. |
480 | | SimpleStats all_pitches_; |
481 | | // Gaps between any two successive characters. |
482 | | SimpleStats all_gaps_; |
483 | | // Pitches between any two successive characters that are consistent |
484 | | // with the fixed pitch model. |
485 | | SimpleStats good_pitches_; |
486 | | // Gaps between any two successive characters that are consistent |
487 | | // with the fixed pitch model. |
488 | | SimpleStats good_gaps_; |
489 | | |
490 | | SimpleStats heights_; |
491 | | |
492 | | std::vector<FPChar> characters_; |
493 | | TO_ROW *real_row_ = nullptr; // Underlying TD_ROW for this row. |
494 | | }; |
495 | | |
496 | 0 | void FPRow::Init(TO_ROW *row) { |
497 | 0 | ASSERT_HOST(row != nullptr); |
498 | 0 | ASSERT_HOST(row->xheight > 0); |
499 | 0 | real_row_ = row; |
500 | 0 | real_row_->pitch_decision = PITCH_CORR_PROP; // Default decision. |
501 | |
|
502 | 0 | BLOBNBOX_IT blob_it = row->blob_list(); |
503 | | // Initialize characters_ and compute the initial estimation of |
504 | | // character height. |
505 | 0 | for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { |
506 | 0 | if (is_interesting_blob(blob_it.data())) { |
507 | 0 | FPChar fp_char; |
508 | 0 | fp_char.Init(blob_it.data()); |
509 | | // Merge unconditionally if two blobs overlap. |
510 | 0 | if (!characters_.empty() && significant_overlap(fp_char.box(), characters_.back().box())) { |
511 | 0 | characters_.back().Merge(fp_char); |
512 | 0 | } else { |
513 | 0 | characters_.push_back(fp_char); |
514 | 0 | } |
515 | 0 | TBOX bound = blob_it.data()->bounding_box(); |
516 | 0 | if (bound.height() * 3.0 > bound.width()) { |
517 | 0 | heights_.Add(bound.height()); |
518 | 0 | } |
519 | 0 | } |
520 | 0 | } |
521 | 0 | heights_.Finish(); |
522 | 0 | height_ = heights_.ile(0.875); |
523 | 0 | } |
524 | | |
525 | 0 | void FPRow::OutputEstimations() { |
526 | 0 | if (good_pitches_.empty()) { |
527 | 0 | pitch_ = 0.0f; |
528 | 0 | real_row_->pitch_decision = PITCH_CORR_PROP; |
529 | 0 | return; |
530 | 0 | } |
531 | | |
532 | 0 | pitch_ = good_pitches_.median(); |
533 | 0 | real_row_->fixed_pitch = pitch_; |
534 | | // good_gaps_.ile(0.125) can be large if most characters on the row |
535 | | // are skinny. Use pitch_ - height_ instead if it's smaller, but |
536 | | // positive. |
537 | 0 | real_row_->kern_size = real_row_->pr_nonsp = |
538 | 0 | std::min(good_gaps_.ile(0.125), std::max(pitch_ - height_, 0.0f)); |
539 | 0 | real_row_->body_size = pitch_ - real_row_->kern_size; |
540 | |
|
541 | 0 | if (good_pitches_.size() < all_pitches_.size() * kFixedPitchThreshold) { |
542 | | // If more than half of the characters of a line don't fit to the |
543 | | // fixed pitch model, consider the line to be proportional. 50% |
544 | | // seems to be a good threshold in practice as well. |
545 | | // Anyway we store estimated values (fixed_pitch, kern_size, etc.) in |
546 | | // real_row_ as a partial estimation result and try to use them in the |
547 | | // normalization process. |
548 | 0 | real_row_->pitch_decision = PITCH_CORR_PROP; |
549 | 0 | return; |
550 | 0 | } else if (good_pitches_.size() > all_pitches_.size() * 0.75) { |
551 | 0 | real_row_->pitch_decision = PITCH_DEF_FIXED; |
552 | 0 | } else { |
553 | 0 | real_row_->pitch_decision = PITCH_CORR_FIXED; |
554 | 0 | } |
555 | | |
556 | 0 | real_row_->space_size = real_row_->pr_space = pitch_; |
557 | | // Set min_space to 50% of character pitch so that we can break CJK |
558 | | // text at a half-width space after punctuation. |
559 | 0 | real_row_->min_space = (pitch_ + good_gaps_.minimum()) * 0.5; |
560 | | |
561 | | // Don't consider a quarter space as a real space, because it's used |
562 | | // for line justification in traditional Japanese books. |
563 | 0 | real_row_->max_nonspace = |
564 | 0 | std::max(pitch_ * 0.25 + good_gaps_.minimum(), static_cast<double>(good_gaps_.ile(0.875))); |
565 | |
|
566 | 0 | int space_threshold = std::min((real_row_->max_nonspace + real_row_->min_space) / 2, |
567 | 0 | static_cast<int>(real_row_->xheight)); |
568 | | |
569 | | // Make max_nonspace larger than any intra-character gap so that |
570 | | // make_prop_words() won't break a row at the middle of a character. |
571 | 0 | for (size_t i = 0; i < num_chars(); ++i) { |
572 | 0 | if (characters_[i].max_gap() > real_row_->max_nonspace) { |
573 | 0 | real_row_->max_nonspace = characters_[i].max_gap(); |
574 | 0 | } |
575 | 0 | } |
576 | 0 | real_row_->space_threshold = std::min((real_row_->max_nonspace + real_row_->min_space) / 2, |
577 | 0 | static_cast<int>(real_row_->xheight)); |
578 | 0 | real_row_->used_dm_model = false; |
579 | | |
580 | | // Setup char_cells. |
581 | 0 | ICOORDELT_IT cell_it = &real_row_->char_cells; |
582 | 0 | auto *cell = new ICOORDELT(real_body(0).left(), 0); |
583 | 0 | cell_it.add_after_then_move(cell); |
584 | |
|
585 | 0 | int right = real_body(0).right(); |
586 | 0 | for (size_t i = 1; i < num_chars(); ++i) { |
587 | | // Put a word break if gap between two characters is bigger than |
588 | | // space_threshold. Don't break if none of two characters |
589 | | // couldn't be "finalized", because maybe they need to be merged |
590 | | // to one character. |
591 | 0 | if ((is_final(i - 1) || is_final(i)) && |
592 | 0 | real_body(i - 1).x_gap(real_body(i)) > space_threshold) { |
593 | 0 | cell = new ICOORDELT(right + 1, 0); |
594 | 0 | cell_it.add_after_then_move(cell); |
595 | 0 | while (right + pitch_ < box(i).left()) { |
596 | 0 | right += pitch_; |
597 | 0 | cell = new ICOORDELT(right + 1, 0); |
598 | 0 | cell_it.add_after_then_move(cell); |
599 | 0 | } |
600 | 0 | right = box(i).left(); |
601 | 0 | } |
602 | 0 | cell = new ICOORDELT((right + real_body(i).left()) / 2, 0); |
603 | 0 | cell_it.add_after_then_move(cell); |
604 | 0 | right = real_body(i).right(); |
605 | 0 | } |
606 | |
|
607 | 0 | cell = new ICOORDELT(right + 1, 0); |
608 | 0 | cell_it.add_after_then_move(cell); |
609 | | |
610 | | // TODO(takenaka): add code to store alignment/fragmentation |
611 | | // information to blobs so that it can be reused later, e.g. in |
612 | | // recognition phase. |
613 | 0 | } |
614 | | |
615 | 0 | void FPRow::EstimatePitch(bool pass1) { |
616 | 0 | good_pitches_.Clear(); |
617 | 0 | all_pitches_.Clear(); |
618 | 0 | good_gaps_.Clear(); |
619 | 0 | all_gaps_.Clear(); |
620 | 0 | heights_.Clear(); |
621 | 0 | if (num_chars() == 0) { |
622 | 0 | return; |
623 | 0 | } |
624 | | |
625 | 0 | int32_t cx0, cx1; |
626 | 0 | bool prev_was_good = is_good(0); |
627 | 0 | cx0 = center_x(0); |
628 | |
|
629 | 0 | heights_.Add(box(0).height()); |
630 | 0 | for (size_t i = 1; i < num_chars(); i++) { |
631 | 0 | cx1 = center_x(i); |
632 | 0 | int32_t pitch = cx1 - cx0; |
633 | 0 | int32_t gap = std::max(0, real_body(i - 1).x_gap(real_body(i))); |
634 | |
|
635 | 0 | heights_.Add(box(i).height()); |
636 | | // Ignore if the pitch is too close. But don't ignore wide pitch |
637 | | // may be the result of large tracking. |
638 | 0 | if (pitch > height_ * 0.5) { |
639 | 0 | all_pitches_.Add(pitch); |
640 | 0 | all_gaps_.Add(gap); |
641 | 0 | if (is_good(i)) { |
642 | | // In pass1 (after Pass1Analyze()), all characters marked as |
643 | | // "good" have a good consistent pitch with their previous |
644 | | // characters. However, it's not true in pass2 and a good |
645 | | // character may have a good pitch only between its successor. |
646 | | // So we collect only pitch values between two good |
647 | | // characters. and within tolerance in pass2. |
648 | 0 | if (pass1 || |
649 | 0 | (prev_was_good && std::fabs(estimated_pitch_ - pitch) < kFPTolerance * estimated_pitch_)) { |
650 | 0 | good_pitches_.Add(pitch); |
651 | 0 | if (!is_box_modified(i - 1) && !is_box_modified(i)) { |
652 | 0 | good_gaps_.Add(gap); |
653 | 0 | } |
654 | 0 | } |
655 | 0 | prev_was_good = true; |
656 | 0 | } else { |
657 | 0 | prev_was_good = false; |
658 | 0 | } |
659 | 0 | } |
660 | 0 | cx0 = cx1; |
661 | 0 | } |
662 | |
|
663 | 0 | good_pitches_.Finish(); |
664 | 0 | all_pitches_.Finish(); |
665 | 0 | good_gaps_.Finish(); |
666 | 0 | all_gaps_.Finish(); |
667 | 0 | heights_.Finish(); |
668 | |
|
669 | 0 | height_ = heights_.ile(0.875); |
670 | 0 | if (all_pitches_.empty()) { |
671 | 0 | pitch_ = 0.0f; |
672 | 0 | gap_ = 0.0f; |
673 | 0 | } else if (good_pitches_.size() < 2) { |
674 | | // We don't have enough data to estimate the pitch of this row yet. |
675 | | // Use median of all pitches as the initial guess. |
676 | 0 | pitch_ = all_pitches_.median(); |
677 | 0 | ASSERT_HOST(pitch_ > 0.0f); |
678 | 0 | gap_ = all_gaps_.ile(0.125); |
679 | 0 | } else { |
680 | 0 | pitch_ = good_pitches_.median(); |
681 | 0 | ASSERT_HOST(pitch_ > 0.0f); |
682 | 0 | gap_ = good_gaps_.ile(0.125); |
683 | 0 | } |
684 | 0 | } |
685 | | |
686 | 0 | void FPRow::DebugOutputResult(int row_index) { |
687 | 0 | if (num_chars() > 0) { |
688 | 0 | tprintf( |
689 | 0 | "Row %d: pitch_decision=%d, fixed_pitch=%f, max_nonspace=%d, " |
690 | 0 | "space_size=%f, space_threshold=%d, xheight=%f\n", |
691 | 0 | row_index, static_cast<int>(real_row_->pitch_decision), real_row_->fixed_pitch, |
692 | 0 | real_row_->max_nonspace, real_row_->space_size, real_row_->space_threshold, |
693 | 0 | real_row_->xheight); |
694 | |
|
695 | 0 | for (unsigned i = 0; i < num_chars(); i++) { |
696 | 0 | tprintf("Char %u: is_final=%d is_good=%d num_blobs=%d: ", i, is_final(i), is_good(i), |
697 | 0 | character(i)->num_blobs()); |
698 | 0 | box(i).print(); |
699 | 0 | } |
700 | 0 | } |
701 | 0 | } |
702 | | |
703 | 0 | void FPRow::Pass1Analyze() { |
704 | 0 | if (num_chars() < 2) { |
705 | 0 | return; |
706 | 0 | } |
707 | | |
708 | 0 | if (estimated_pitch_ > 0.0f) { |
709 | 0 | for (size_t i = 2; i < num_chars(); i++) { |
710 | 0 | if (is_good_pitch(estimated_pitch_, box(i - 2), box(i - 1)) && |
711 | 0 | is_good_pitch(estimated_pitch_, box(i - 1), box(i))) { |
712 | 0 | mark_good(i - 1); |
713 | 0 | } |
714 | 0 | } |
715 | 0 | } else { |
716 | 0 | for (size_t i = 2; i < num_chars(); i++) { |
717 | 0 | if (is_good_pitch(box_pitch(box(i - 2), box(i - 1)), box(i - 1), box(i))) { |
718 | 0 | mark_good(i - 1); |
719 | 0 | } |
720 | 0 | } |
721 | 0 | } |
722 | 0 | character(0)->set_alignment(character(1)->alignment()); |
723 | 0 | character(num_chars() - 1)->set_alignment(character(num_chars() - 2)->alignment()); |
724 | 0 | } |
725 | | |
726 | 0 | bool FPRow::Pass2Analyze() { |
727 | 0 | bool changed = false; |
728 | 0 | if (num_chars() <= 1 || estimated_pitch_ == 0.0f) { |
729 | 0 | return false; |
730 | 0 | } |
731 | 0 | for (size_t i = 0; i < num_chars(); i++) { |
732 | 0 | if (is_final(i)) { |
733 | 0 | continue; |
734 | 0 | } |
735 | | |
736 | 0 | FPChar::Alignment alignment = character(i)->alignment(); |
737 | 0 | bool intersecting = false; |
738 | 0 | bool not_intersecting = false; |
739 | |
|
740 | 0 | if (i < num_chars() - 1 && is_final(i + 1)) { |
741 | | // Next character is already finalized. Estimate the imaginary |
742 | | // body including this character based on the character. Skip |
743 | | // whitespace if necessary. |
744 | 0 | bool skipped_whitespaces = false; |
745 | 0 | float c1 = center_x(i + 1) - 1.5 * estimated_pitch_; |
746 | 0 | while (c1 > box(i).right()) { |
747 | 0 | skipped_whitespaces = true; |
748 | 0 | c1 -= estimated_pitch_; |
749 | 0 | } |
750 | 0 | TBOX ibody(c1, box(i).bottom(), c1 + estimated_pitch_, box(i).top()); |
751 | | |
752 | | // Collect all characters that mostly fit in the region. |
753 | | // Also, their union height shouldn't be too big. |
754 | 0 | int j = i; |
755 | 0 | TBOX merged; |
756 | 0 | while (j >= 0 && !is_final(j) && mostly_overlap(ibody, box(j)) && |
757 | 0 | merged.bounding_union(box(j)).height() < estimated_pitch_ * (1 + kFPTolerance)) { |
758 | 0 | merged += box(j); |
759 | 0 | j--; |
760 | 0 | } |
761 | |
|
762 | 0 | if (j >= 0 && significant_overlap(ibody, box(j))) { |
763 | | // character(j) lies on the character boundary and doesn't fit |
764 | | // well into the imaginary body. |
765 | 0 | if (!is_final(j)) { |
766 | 0 | intersecting = true; |
767 | 0 | } |
768 | 0 | } else { |
769 | 0 | not_intersecting = true; |
770 | 0 | if (i - j > 0) { |
771 | | // Merge character(j+1) ... character(i) because they fit |
772 | | // into the body nicely. |
773 | 0 | if (i - j == 1) { |
774 | | // Only one char in the imaginary body. |
775 | 0 | if (!skipped_whitespaces) { |
776 | 0 | mark_good(i); |
777 | 0 | } |
778 | | // set ibody as bounding box of this character to get |
779 | | // better pitch analysis result for halfwidth glyphs |
780 | | // followed by a halfwidth space. |
781 | 0 | if (box(i).width() <= estimated_pitch_ * 0.5) { |
782 | 0 | ibody += box(i); |
783 | 0 | character(i)->set_box(ibody); |
784 | 0 | } |
785 | 0 | character(i)->set_merge_to_prev(false); |
786 | 0 | finalize(i); |
787 | 0 | } else { |
788 | 0 | for (int k = i; k > j + 1; k--) { |
789 | 0 | character(k)->set_merge_to_prev(true); |
790 | 0 | } |
791 | 0 | } |
792 | 0 | } |
793 | 0 | } |
794 | 0 | } |
795 | 0 | if (i > 0 && is_final(i - 1)) { |
796 | | // Now we repeat everything from the opposite side. Previous |
797 | | // character is already finalized. Estimate the imaginary body |
798 | | // including this character based on the character. |
799 | 0 | bool skipped_whitespaces = false; |
800 | 0 | float c1 = center_x(i - 1) + 1.5 * estimated_pitch_; |
801 | 0 | while (c1 < box(i).left()) { |
802 | 0 | skipped_whitespaces = true; |
803 | 0 | c1 += estimated_pitch_; |
804 | 0 | } |
805 | 0 | TBOX ibody(c1 - estimated_pitch_, box(i).bottom(), c1, box(i).top()); |
806 | |
|
807 | 0 | size_t j = i; |
808 | 0 | TBOX merged; |
809 | 0 | while (j < num_chars() && !is_final(j) && mostly_overlap(ibody, box(j)) && |
810 | 0 | merged.bounding_union(box(j)).height() < estimated_pitch_ * (1 + kFPTolerance)) { |
811 | 0 | merged += box(j); |
812 | 0 | j++; |
813 | 0 | } |
814 | |
|
815 | 0 | if (j < num_chars() && significant_overlap(ibody, box(j))) { |
816 | 0 | if (!is_final(j)) { |
817 | 0 | intersecting = true; |
818 | 0 | } |
819 | 0 | } else { |
820 | 0 | not_intersecting = true; |
821 | 0 | if (j - i > 0) { |
822 | 0 | if (j - i == 1) { |
823 | 0 | if (!skipped_whitespaces) { |
824 | 0 | mark_good(i); |
825 | 0 | } |
826 | 0 | if (box(i).width() <= estimated_pitch_ * 0.5) { |
827 | 0 | ibody += box(i); |
828 | 0 | character(i)->set_box(ibody); |
829 | 0 | } |
830 | 0 | character(i)->set_merge_to_prev(false); |
831 | 0 | finalize(i); |
832 | 0 | } else { |
833 | 0 | for (size_t k = i + 1; k < j; k++) { |
834 | 0 | character(k)->set_merge_to_prev(true); |
835 | 0 | } |
836 | 0 | } |
837 | 0 | } |
838 | 0 | } |
839 | 0 | } |
840 | | |
841 | | // This character doesn't fit well into the estimated imaginary |
842 | | // bodies. Mark it as bad. |
843 | 0 | if (intersecting && !not_intersecting) { |
844 | 0 | mark_bad(i); |
845 | 0 | } |
846 | 0 | if (character(i)->alignment() != alignment || character(i)->merge_to_prev()) { |
847 | 0 | changed = true; |
848 | 0 | } |
849 | 0 | } |
850 | |
|
851 | 0 | return changed; |
852 | 0 | } |
853 | | |
854 | 0 | void FPRow::MergeFragments() { |
855 | 0 | int last_char = 0; |
856 | |
|
857 | 0 | for (size_t j = 0; j < num_chars(); ++j) { |
858 | 0 | if (character(j)->merge_to_prev()) { |
859 | 0 | character(last_char)->Merge(*character(j)); |
860 | 0 | character(j)->set_delete_flag(true); |
861 | 0 | clear_alignment(last_char); |
862 | 0 | character(j - 1)->set_merge_to_prev(false); |
863 | 0 | } else { |
864 | 0 | last_char = j; |
865 | 0 | } |
866 | 0 | } |
867 | 0 | DeleteChars(); |
868 | 0 | } |
869 | | |
870 | 0 | void FPRow::FinalizeLargeChars() { |
871 | 0 | float row_pitch = estimated_pitch(); |
872 | 0 | for (size_t i = 0; i < num_chars(); i++) { |
873 | 0 | if (is_final(i)) { |
874 | 0 | continue; |
875 | 0 | } |
876 | | |
877 | | // Finalize if both neighbors are finalized. We have no other choice. |
878 | 0 | if (i > 0 && is_final(i - 1) && i < num_chars() - 1 && is_final(i + 1)) { |
879 | 0 | finalize(i); |
880 | 0 | continue; |
881 | 0 | } |
882 | | |
883 | 0 | float cx = center_x(i); |
884 | 0 | TBOX ibody(cx - 0.5 * row_pitch, 0, cx + 0.5 * row_pitch, 1); |
885 | 0 | if (i > 0) { |
886 | | // The preceding character significantly intersects with the |
887 | | // imaginary body of this character. Let Pass2Analyze() handle |
888 | | // this case. |
889 | 0 | if (x_overlap_fraction(ibody, box(i - 1)) > 0.1) { |
890 | 0 | continue; |
891 | 0 | } |
892 | 0 | if (!is_final(i - 1)) { |
893 | 0 | TBOX merged = box(i); |
894 | 0 | merged += box(i - 1); |
895 | 0 | if (merged.width() < row_pitch) { |
896 | 0 | continue; |
897 | 0 | } |
898 | | // This character cannot be finalized yet because it can be |
899 | | // merged with the previous one. Again, let Pass2Analyze() |
900 | | // handle this case. |
901 | 0 | } |
902 | 0 | } |
903 | 0 | if (i < num_chars() - 1) { |
904 | 0 | if (x_overlap_fraction(ibody, box(i + 1)) > 0.1) { |
905 | 0 | continue; |
906 | 0 | } |
907 | 0 | if (!is_final(i + 1)) { |
908 | 0 | TBOX merged = box(i); |
909 | 0 | merged += box(i + 1); |
910 | 0 | if (merged.width() < row_pitch) { |
911 | 0 | continue; |
912 | 0 | } |
913 | 0 | } |
914 | 0 | } |
915 | 0 | finalize(i); |
916 | 0 | } |
917 | | |
918 | | // Update alignment decision. We only consider finalized characters |
919 | | // in pass2. E.g. if a finalized character C has another finalized |
920 | | // character L on its left and a not-finalized character R on its |
921 | | // right, we mark C as good if the pitch between C and L is good, |
922 | | // regardless of the pitch between C and R. |
923 | 0 | for (size_t i = 0; i < num_chars(); i++) { |
924 | 0 | if (!is_final(i)) { |
925 | 0 | continue; |
926 | 0 | } |
927 | 0 | bool good_pitch = false; |
928 | 0 | bool bad_pitch = false; |
929 | 0 | if (i > 0 && is_final(i - 1)) { |
930 | 0 | if (is_good_pitch(row_pitch, box(i - 1), box(i))) { |
931 | 0 | good_pitch = true; |
932 | 0 | } else { |
933 | 0 | bad_pitch = true; |
934 | 0 | } |
935 | 0 | } |
936 | 0 | if (i < num_chars() - 1 && is_final(i + 1)) { |
937 | 0 | if (is_good_pitch(row_pitch, box(i), box(i + 1))) { |
938 | 0 | good_pitch = true; |
939 | 0 | } else { |
940 | 0 | bad_pitch = true; |
941 | 0 | } |
942 | 0 | } |
943 | 0 | if (good_pitch && !bad_pitch) { |
944 | 0 | mark_good(i); |
945 | 0 | } else if (!good_pitch && bad_pitch) { |
946 | 0 | mark_bad(i); |
947 | 0 | } |
948 | 0 | } |
949 | 0 | } |
950 | | |
951 | | class FPAnalyzer { |
952 | | public: |
953 | | FPAnalyzer(ICOORD page_tr, TO_BLOCK_LIST *port_blocks); |
954 | 0 | ~FPAnalyzer() = default; |
955 | | |
956 | 0 | void Pass1Analyze() { |
957 | 0 | for (auto &row : rows_) { |
958 | 0 | row.Pass1Analyze(); |
959 | 0 | } |
960 | 0 | } |
961 | | |
962 | | // Estimate character pitch for each row. The argument pass1 can be |
963 | | // set to true if the function is called after Pass1Analyze(), to |
964 | | // eliminate some redundant computation. |
965 | | void EstimatePitch(bool pass1); |
966 | | |
967 | 0 | bool maybe_fixed_pitch() { |
968 | 0 | if (rows_.empty() || rows_.size() <= num_bad_rows_ + num_tall_rows_ + 1) { |
969 | 0 | return false; |
970 | 0 | } |
971 | 0 | return true; |
972 | 0 | } |
973 | | |
974 | 0 | void MergeFragments() { |
975 | 0 | for (auto &row : rows_) { |
976 | 0 | row.MergeFragments(); |
977 | 0 | } |
978 | 0 | } |
979 | | |
980 | 0 | void FinalizeLargeChars() { |
981 | 0 | for (auto &row : rows_) { |
982 | 0 | row.FinalizeLargeChars(); |
983 | 0 | } |
984 | 0 | } |
985 | | |
986 | 0 | bool Pass2Analyze() { |
987 | 0 | bool changed = false; |
988 | 0 | for (auto &row : rows_) { |
989 | 0 | if (row.Pass2Analyze()) { |
990 | 0 | changed = true; |
991 | 0 | } |
992 | 0 | } |
993 | 0 | return changed; |
994 | 0 | } |
995 | | |
996 | 0 | void OutputEstimations() { |
997 | 0 | for (auto &row : rows_) { |
998 | 0 | row.OutputEstimations(); |
999 | 0 | } |
1000 | | // Don't we need page-level estimation of gaps/spaces? |
1001 | 0 | } |
1002 | | |
1003 | 0 | void DebugOutputResult() { |
1004 | 0 | tprintf("FPAnalyzer: final result\n"); |
1005 | 0 | for (size_t i = 0; i < rows_.size(); i++) { |
1006 | 0 | rows_[i].DebugOutputResult(i); |
1007 | 0 | } |
1008 | 0 | } |
1009 | | |
1010 | 0 | size_t num_rows() { |
1011 | 0 | return rows_.size(); |
1012 | 0 | } |
1013 | | |
1014 | | // Returns the upper limit for pass2 loop iteration. |
1015 | 0 | unsigned max_iteration() { |
1016 | | // We're fixing at least one character per iteration. So basically |
1017 | | // we shouldn't require more than max_chars_per_row_ iterations. |
1018 | 0 | return max_chars_per_row_ + 100; |
1019 | 0 | } |
1020 | | |
1021 | | private: |
1022 | | ICOORD page_tr_; |
1023 | | std::vector<FPRow> rows_; |
1024 | | unsigned num_tall_rows_; |
1025 | | unsigned num_bad_rows_; |
1026 | | // TODO: num_empty_rows_ is incremented, but never used otherwise. |
1027 | | unsigned num_empty_rows_; |
1028 | | unsigned max_chars_per_row_; |
1029 | | }; |
1030 | | |
1031 | | FPAnalyzer::FPAnalyzer(ICOORD page_tr, TO_BLOCK_LIST *port_blocks) |
1032 | 0 | : page_tr_(page_tr) |
1033 | 0 | , num_tall_rows_(0) |
1034 | 0 | , num_bad_rows_(0) |
1035 | 0 | , num_empty_rows_(0) |
1036 | 0 | , max_chars_per_row_(0) { |
1037 | 0 | TO_BLOCK_IT block_it(port_blocks); |
1038 | |
|
1039 | 0 | for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) { |
1040 | 0 | TO_BLOCK *block = block_it.data(); |
1041 | 0 | if (!block->get_rows()->empty()) { |
1042 | 0 | ASSERT_HOST(block->xheight > 0); |
1043 | 0 | find_repeated_chars(block, false); |
1044 | 0 | } |
1045 | 0 | } |
1046 | |
|
1047 | 0 | for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) { |
1048 | 0 | TO_ROW_IT row_it = block_it.data()->get_rows(); |
1049 | 0 | for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { |
1050 | 0 | FPRow row; |
1051 | 0 | row.Init(row_it.data()); |
1052 | 0 | rows_.push_back(row); |
1053 | 0 | size_t num_chars = rows_.back().num_chars(); |
1054 | 0 | if (num_chars <= 1) { |
1055 | 0 | num_empty_rows_++; |
1056 | 0 | } |
1057 | 0 | if (num_chars > max_chars_per_row_) { |
1058 | 0 | max_chars_per_row_ = num_chars; |
1059 | 0 | } |
1060 | 0 | } |
1061 | 0 | } |
1062 | 0 | } |
1063 | | |
1064 | 0 | void FPAnalyzer::EstimatePitch(bool pass1) { |
1065 | 0 | LocalCorrelation pitch_height_stats; |
1066 | |
|
1067 | 0 | num_tall_rows_ = 0; |
1068 | 0 | num_bad_rows_ = 0; |
1069 | 0 | pitch_height_stats.Clear(); |
1070 | 0 | for (auto &row : rows_) { |
1071 | 0 | row.EstimatePitch(pass1); |
1072 | 0 | if (row.good_pitches()) { |
1073 | 0 | pitch_height_stats.Add(row.height() + row.gap(), row.pitch(), row.good_pitches()); |
1074 | 0 | if (row.height_pitch_ratio() > 1.1) { |
1075 | 0 | num_tall_rows_++; |
1076 | 0 | } |
1077 | 0 | } else { |
1078 | 0 | num_bad_rows_++; |
1079 | 0 | } |
1080 | 0 | } |
1081 | |
|
1082 | 0 | pitch_height_stats.Finish(); |
1083 | 0 | for (auto &row : rows_) { |
1084 | 0 | if (row.good_pitches() >= 5) { |
1085 | | // We have enough evidences. Just use the pitch estimation |
1086 | | // from this row. |
1087 | 0 | row.set_estimated_pitch(row.pitch()); |
1088 | 0 | } else if (row.num_chars() > 1) { |
1089 | 0 | float estimated_pitch = pitch_height_stats.EstimateYFor(row.height() + row.gap(), 0.1f); |
1090 | | // CJK characters are more likely to be fragmented than poorly |
1091 | | // chopped. So trust the page-level estimation of character |
1092 | | // pitch only if it's larger than row-level estimation or |
1093 | | // row-level estimation is too large (2x bigger than row height). |
1094 | 0 | if (estimated_pitch > row.pitch() || row.pitch() > row.height() * 2.0) { |
1095 | 0 | row.set_estimated_pitch(estimated_pitch); |
1096 | 0 | } else { |
1097 | 0 | row.set_estimated_pitch(row.pitch()); |
1098 | 0 | } |
1099 | 0 | } |
1100 | 0 | } |
1101 | 0 | } |
1102 | | |
1103 | 0 | void compute_fixed_pitch_cjk(ICOORD page_tr, TO_BLOCK_LIST *port_blocks) { |
1104 | 0 | FPAnalyzer analyzer(page_tr, port_blocks); |
1105 | 0 | if (analyzer.num_rows() == 0) { |
1106 | 0 | return; |
1107 | 0 | } |
1108 | | |
1109 | 0 | analyzer.Pass1Analyze(); |
1110 | 0 | analyzer.EstimatePitch(true); |
1111 | | |
1112 | | // Perform pass1 analysis again with the initial estimation of row |
1113 | | // pitches, for better estimation. |
1114 | 0 | analyzer.Pass1Analyze(); |
1115 | 0 | analyzer.EstimatePitch(true); |
1116 | | |
1117 | | // Early exit if the page doesn't seem to contain fixed pitch rows. |
1118 | 0 | if (!analyzer.maybe_fixed_pitch()) { |
1119 | 0 | if (textord_debug_pitch_test) { |
1120 | 0 | tprintf("Page doesn't seem to contain fixed pitch rows\n"); |
1121 | 0 | } |
1122 | 0 | return; |
1123 | 0 | } |
1124 | | |
1125 | 0 | unsigned iteration = 0; |
1126 | 0 | do { |
1127 | 0 | analyzer.MergeFragments(); |
1128 | 0 | analyzer.FinalizeLargeChars(); |
1129 | 0 | analyzer.EstimatePitch(false); |
1130 | 0 | iteration++; |
1131 | 0 | } while (analyzer.Pass2Analyze() && iteration < analyzer.max_iteration()); |
1132 | |
|
1133 | 0 | if (textord_debug_pitch_test) { |
1134 | 0 | tprintf("compute_fixed_pitch_cjk finished after %u iteration (limit=%u)\n", iteration, |
1135 | 0 | analyzer.max_iteration()); |
1136 | 0 | } |
1137 | |
|
1138 | 0 | analyzer.OutputEstimations(); |
1139 | 0 | if (textord_debug_pitch_test) { |
1140 | 0 | analyzer.DebugOutputResult(); |
1141 | 0 | } |
1142 | 0 | } |
1143 | | |
1144 | | } // namespace tesseract |