Coverage Report

Created: 2026-06-13 06:51

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/tesseract/src/textord/cjkpitch.cpp
Line
Count
Source
1
///////////////////////////////////////////////////////////////////////
2
// File:        cjkpitch.cpp
3
// Description: Code to determine fixed pitchness and the pitch if fixed,
4
//              for CJK text.
5
// Author:      takenaka@google.com (Hiroshi Takenaka)
6
//
7
// Copyright 2011 Google Inc. All Rights Reserved.
8
// Licensed under the Apache License, Version 2.0 (the "License");
9
// you may not use this file except in compliance with the License.
10
// You may obtain a copy of the License at
11
// http://www.apache.org/licenses/LICENSE-2.0
12
// Unless required by applicable law or agreed to in writing, software
13
// distributed under the License is distributed on an "AS IS" BASIS,
14
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
// See the License for the specific language governing permissions and
16
// limitations under the License.
17
//
18
///////////////////////////////////////////////////////////////////////
19
20
#include "cjkpitch.h"
21
#include "topitch.h"
22
#include "tovars.h"
23
24
#include <algorithm> // for std::sort
25
#include <cmath>
26
#include <vector>    // for std::vector
27
28
namespace tesseract {
29
30
static BOOL_VAR(textord_space_size_is_variable, false,
31
                "If true, word delimiter spaces are assumed to have "
32
                "variable width, even though characters have fixed pitch.");
33
34
// Allow +/-10% error for character pitch / body size.
35
static const float kFPTolerance = 0.1f;
36
37
// Minimum ratio of "good" character pitch for a row to be considered
38
// to be fixed-pitch.
39
static const float kFixedPitchThreshold = 0.35f;
40
41
// rank statistics for a small collection of float values.
42
class SimpleStats {
43
public:
44
0
  SimpleStats() = default;
45
0
  ~SimpleStats() = default;
46
47
0
  void Clear() {
48
0
    values_.clear();
49
0
    finalized_ = false;
50
0
  }
51
52
0
  void Add(float value) {
53
0
    values_.push_back(value);
54
0
    finalized_ = false;
55
0
  }
56
57
0
  void Finish() {
58
0
    std::sort(values_.begin(), values_.end());
59
0
    finalized_ = true;
60
0
  }
61
62
0
  float ile(double frac) {
63
0
    if (!finalized_) {
64
0
      Finish();
65
0
    }
66
0
    if (values_.empty()) {
67
0
      return 0.0f;
68
0
    }
69
0
    if (frac >= 1.0) {
70
0
      return values_.back();
71
0
    }
72
0
    if (frac <= 0.0 || values_.size() == 1) {
73
0
      return values_[0];
74
0
    }
75
0
    int index = static_cast<int>((values_.size() - 1) * frac);
76
0
    float reminder = (values_.size() - 1) * frac - index;
77
78
0
    return values_[index] * (1.0f - reminder) + values_[index + 1] * reminder;
79
0
  }
80
81
0
  float median() {
82
0
    return ile(0.5);
83
0
  }
84
85
0
  float minimum() {
86
0
    if (!finalized_) {
87
0
      Finish();
88
0
    }
89
0
    if (values_.empty()) {
90
0
      return 0.0f;
91
0
    }
92
0
    return values_[0];
93
0
  }
94
95
0
  bool empty() const {
96
0
    return values_.empty();
97
0
  }
98
99
0
  int size() const {
100
0
    return values_.size();
101
0
  }
102
103
private:
104
  bool finalized_ = false;
105
  std::vector<float> values_;
106
};
107
108
// statistics for a small collection of float pairs (x, y).
109
// EstimateYFor(x, r) returns the estimated y at x, based on
110
// existing samples between x*(1-r) ~ x*(1+r).
111
class LocalCorrelation {
112
public:
113
  struct float_pair {
114
    float x, y;
115
    int vote;
116
  };
117
118
0
  LocalCorrelation() : finalized_(false) {}
119
0
  ~LocalCorrelation() = default;
120
121
0
  void Finish() {
122
0
    std::sort(values_.begin(), values_.end(), float_pair_compare);
123
0
    finalized_ = true;
124
0
  }
125
126
0
  void Clear() {
127
0
    finalized_ = false;
128
0
  }
129
130
0
  void Add(float x, float y, int v) {
131
0
    struct float_pair value;
132
0
    value.x = x;
133
0
    value.y = y;
134
0
    value.vote = v;
135
0
    values_.push_back(value);
136
0
    finalized_ = false;
137
0
  }
138
139
0
  float EstimateYFor(float x, float r) {
140
0
    ASSERT_HOST(finalized_);
141
0
    unsigned start = 0, end = values_.size();
142
    // Because the number of samples (used_) is assumed to be small,
143
    // just use linear search to find values within the range.
144
0
    while (start < values_.size() && values_[start].x < x * (1 - r)) {
145
0
      start++;
146
0
    }
147
0
    while (end > 0 && values_[end - 1].x > x * (1 + r)) {
148
0
      end--;
149
0
    }
150
151
    // Fall back to the global average if there are no data within r
152
    // of x.
153
0
    if (start >= end) {
154
0
      start = 0;
155
0
      end = values_.size();
156
0
    }
157
158
    // Compute weighted average of the values.
159
0
    float rc = 0;
160
0
    int vote = 0;
161
0
    for (auto i = start; i < end; i++) {
162
0
      rc += values_[i].vote * x * values_[i].y / values_[i].x;
163
0
      vote += values_[i].vote;
164
0
    }
165
166
0
    return vote == 0 ? 0.0f : rc / vote;
167
0
  }
168
169
private:
170
0
  static bool float_pair_compare(const float_pair f_a, const float_pair f_b) {
171
0
    return f_a.x < f_b.x;
172
0
  }
173
174
  bool finalized_;
175
  std::vector<struct float_pair> values_;
176
};
177
178
// Class to represent a character on a fixed pitch row.  A FPChar may
179
// consist of multiple blobs (BLOBNBOX's).
180
class FPChar {
181
public:
182
  enum Alignment { ALIGN_UNKNOWN, ALIGN_GOOD, ALIGN_BAD };
183
184
  FPChar()
185
0
      : box_()
186
0
      , real_body_()
187
0
      , from_(nullptr)
188
0
      , to_(nullptr)
189
0
      , num_blobs_(0)
190
0
      , max_gap_(0)
191
0
      , final_(false)
192
0
      , alignment_(ALIGN_UNKNOWN)
193
0
      , merge_to_prev_(false)
194
0
      , delete_flag_(false) {}
195
196
  // Initialize from blob.
197
0
  void Init(BLOBNBOX *blob) {
198
0
    box_ = blob->bounding_box();
199
0
    real_body_ = box_;
200
0
    from_ = to_ = blob;
201
0
    num_blobs_ = 1;
202
0
  }
203
204
  // Merge this character with "next". The "next" character should
205
  // consist of succeeding blobs on the same row.
206
0
  void Merge(const FPChar &next) {
207
0
    int gap = real_body_.x_gap(next.real_body_);
208
0
    if (gap > max_gap_) {
209
0
      max_gap_ = gap;
210
0
    }
211
212
0
    box_ += next.box_;
213
0
    real_body_ += next.real_body_;
214
0
    to_ = next.to_;
215
0
    num_blobs_ += next.num_blobs_;
216
0
  }
217
218
  // Accessors.
219
0
  const TBOX &box() const {
220
0
    return box_;
221
0
  }
222
0
  void set_box(const TBOX &box) {
223
0
    box_ = box;
224
0
  }
225
0
  const TBOX &real_body() const {
226
0
    return real_body_;
227
0
  }
228
229
0
  bool is_final() const {
230
0
    return final_;
231
0
  }
232
0
  void set_final(bool flag) {
233
0
    final_ = flag;
234
0
  }
235
236
0
  const Alignment &alignment() const {
237
0
    return alignment_;
238
0
  }
239
0
  void set_alignment(Alignment alignment) {
240
0
    alignment_ = alignment;
241
0
  }
242
243
0
  bool merge_to_prev() const {
244
0
    return merge_to_prev_;
245
0
  }
246
0
  void set_merge_to_prev(bool flag) {
247
0
    merge_to_prev_ = flag;
248
0
  }
249
250
0
  bool delete_flag() const {
251
0
    return delete_flag_;
252
0
  }
253
0
  void set_delete_flag(bool flag) {
254
0
    delete_flag_ = flag;
255
0
  }
256
257
0
  int max_gap() const {
258
0
    return max_gap_;
259
0
  }
260
261
0
  int num_blobs() const {
262
0
    return num_blobs_;
263
0
  }
264
265
private:
266
  TBOX box_; // Rectangle region considered to be occupied by this
267
  // character.  It could be bigger than the bounding box.
268
  TBOX real_body_; // Real bounding box of this character.
269
  BLOBNBOX *from_; // The first blob of this character.
270
  BLOBNBOX *to_;   // The last blob of this character.
271
  int num_blobs_;  // Number of blobs that belong to this character.
272
  int max_gap_;    // Maximum x gap between the blobs.
273
274
  bool final_; // True if alignment/fragmentation decision for this
275
  // character is finalized.
276
277
  Alignment alignment_; // Alignment status.
278
  bool merge_to_prev_;  // True if this is a fragmented blob that
279
  // needs to be merged to the previous
280
  // character.
281
282
  int delete_flag_; // True if this character is merged to another
283
                    // one and needs to be deleted.
284
};
285
286
// Class to represent a fixed pitch row, as a linear collection of
287
// FPChar's.
288
class FPRow {
289
public:
290
0
  FPRow() : all_pitches_(), all_gaps_(), good_pitches_(), good_gaps_(), heights_(), characters_() {}
291
292
0
  ~FPRow() = default;
293
294
  // Initialize from TD_ROW.
295
  void Init(TO_ROW *row);
296
297
  // Estimate character pitch of this row, based on current alignment
298
  // status of underlying FPChar's.  The argument pass1 can be set to
299
  // true if the function is called after Pass1Analyze(), to eliminate
300
  // some redundant computation.
301
  void EstimatePitch(bool pass1);
302
303
  // Check each character if it has good character pitches between its
304
  // predecessor and its successor and set its alignment status.  If
305
  // we already calculated the estimated pitch for this row, the value
306
  // is used.  If we didn't, a character is considered to be good, if
307
  // the pitches between its predecessor and its successor are almost
308
  // equal.
309
  void Pass1Analyze();
310
311
  // Find characters that fit nicely into one imaginary body next to a
312
  // character which is already finalized. Then mark them as character
313
  // fragments.
314
  bool Pass2Analyze();
315
316
  // Merge FPChars marked as character fragments into one.
317
  void MergeFragments();
318
319
  // Finalize characters that are already large enough and cannot be
320
  // merged with others any more.
321
  void FinalizeLargeChars();
322
323
  // Output pitch estimation results to attributes of TD_ROW.
324
  void OutputEstimations();
325
326
  void DebugOutputResult(int row_index);
327
328
0
  int good_pitches() {
329
0
    return good_pitches_.size();
330
0
  }
331
332
0
  float pitch() {
333
0
    return pitch_;
334
0
  }
335
336
0
  float estimated_pitch() {
337
0
    return estimated_pitch_;
338
0
  }
339
340
0
  void set_estimated_pitch(float v) {
341
0
    estimated_pitch_ = v;
342
0
  }
343
344
0
  float height() {
345
0
    return height_;
346
0
  }
347
348
0
  float height_pitch_ratio() {
349
0
    if (good_pitches_.size() < 2) {
350
0
      return -1.0;
351
0
    }
352
0
    return height_ / good_pitches_.median();
353
0
  }
354
355
0
  float gap() {
356
0
    return gap_;
357
0
  }
358
359
0
  size_t num_chars() {
360
0
    return characters_.size();
361
0
  }
362
0
  FPChar *character(int i) {
363
0
    return &characters_[i];
364
0
  }
365
366
0
  const TBOX &box(int i) {
367
0
    return characters_[i].box();
368
0
  }
369
370
0
  const TBOX &real_body(int i) {
371
0
    return characters_[i].real_body();
372
0
  }
373
374
0
  bool is_box_modified(int i) {
375
0
    return !(characters_[i].box() == characters_[i].real_body());
376
0
  }
377
378
0
  float center_x(int i) {
379
0
    return (characters_[i].box().left() + characters_[i].box().right()) / 2.0;
380
0
  }
381
382
0
  bool is_final(int i) {
383
0
    return characters_[i].is_final();
384
0
  }
385
386
0
  void finalize(int i) {
387
0
    characters_[i].set_final(true);
388
0
  }
389
390
0
  bool is_good(int i) {
391
0
    return characters_[i].alignment() == FPChar::ALIGN_GOOD;
392
0
  }
393
394
0
  void mark_good(int i) {
395
0
    characters_[i].set_alignment(FPChar::ALIGN_GOOD);
396
0
  }
397
398
0
  void mark_bad(int i) {
399
0
    characters_[i].set_alignment(FPChar::ALIGN_BAD);
400
0
  }
401
402
0
  void clear_alignment(int i) {
403
0
    characters_[i].set_alignment(FPChar::ALIGN_UNKNOWN);
404
0
  }
405
406
private:
407
0
  static float x_overlap_fraction(const TBOX &box1, const TBOX &box2) {
408
0
    if (std::min(box1.width(), box2.width()) == 0) {
409
0
      return 0.0;
410
0
    }
411
0
    return -box1.x_gap(box2) / static_cast<float>(std::min(box1.width(), box2.width()));
412
0
  }
413
414
0
  static bool mostly_overlap(const TBOX &box1, const TBOX &box2) {
415
0
    return x_overlap_fraction(box1, box2) > 0.9;
416
0
  }
417
418
0
  static bool significant_overlap(const TBOX &box1, const TBOX &box2) {
419
0
    if (std::min(box1.width(), box2.width()) == 0) {
420
0
      return false;
421
0
    }
422
0
    int overlap = -box1.x_gap(box2);
423
0
    return overlap > 1 || x_overlap_fraction(box1, box2) > 0.1;
424
0
  }
425
426
0
  static float box_pitch(const TBOX &ref, const TBOX &box) {
427
0
    return abs(ref.left() + ref.right() - box.left() - box.right()) / 2.0;
428
0
  }
429
430
  // Check if two neighboring characters satisfy the fixed pitch model.
431
0
  static bool is_good_pitch(float pitch, const TBOX &box1, const TBOX &box2) {
432
    // Character box shouldn't exceed pitch.
433
0
    if (box1.width() >= pitch * (1.0 + kFPTolerance) ||
434
0
        box2.width() >= pitch * (1.0 + kFPTolerance) ||
435
0
        box1.height() >= pitch * (1.0 + kFPTolerance) ||
436
0
        box2.height() >= pitch * (1.0 + kFPTolerance)) {
437
0
      return false;
438
0
    }
439
440
0
    const float real_pitch = box_pitch(box1, box2);
441
0
    if (std::fabs(real_pitch - pitch) < pitch * kFPTolerance) {
442
0
      return true;
443
0
    }
444
445
0
    if (textord_space_size_is_variable) {
446
      // Hangul characters usually have fixed pitch, but words are
447
      // delimited by space which can be narrower than characters.
448
0
      if (real_pitch > pitch && real_pitch < pitch * 2.0 && real_pitch - box1.x_gap(box2) < pitch) {
449
0
        return true;
450
0
      }
451
0
    }
452
0
    return false;
453
0
  }
454
455
0
  static bool is_interesting_blob(const BLOBNBOX *blob) {
456
0
    return !blob->joined_to_prev() && blob->flow() != BTFT_LEADER;
457
0
  }
458
459
  // Cleanup chars that are already merged to others.
460
0
  void DeleteChars() {
461
0
    unsigned index = 0;
462
0
    for (unsigned i = 0; i < characters_.size(); ++i) {
463
0
      if (!characters_[i].delete_flag()) {
464
0
        if (index != i) {
465
0
          characters_[index] = characters_[i];
466
0
        }
467
0
        index++;
468
0
      }
469
0
    }
470
0
    characters_.resize(index);
471
0
  }
472
473
  float pitch_ = 0.0f;           // Character pitch.
474
  float estimated_pitch_ = 0.0f; // equal to pitch_ if pitch_ is considered
475
  // to be good enough.
476
  float height_ = 0.0f; // Character height.
477
  float gap_ = 0.0f;    // Minimum gap between characters.
478
479
  // Pitches between any two successive characters.
480
  SimpleStats all_pitches_;
481
  // Gaps between any two successive characters.
482
  SimpleStats all_gaps_;
483
  // Pitches between any two successive characters that are consistent
484
  // with the fixed pitch model.
485
  SimpleStats good_pitches_;
486
  // Gaps between any two successive characters that are consistent
487
  // with the fixed pitch model.
488
  SimpleStats good_gaps_;
489
490
  SimpleStats heights_;
491
492
  std::vector<FPChar> characters_;
493
  TO_ROW *real_row_ = nullptr; // Underlying TD_ROW for this row.
494
};
495
496
0
void FPRow::Init(TO_ROW *row) {
497
0
  ASSERT_HOST(row != nullptr);
498
0
  ASSERT_HOST(row->xheight > 0);
499
0
  real_row_ = row;
500
0
  real_row_->pitch_decision = PITCH_CORR_PROP; // Default decision.
501
502
0
  BLOBNBOX_IT blob_it = row->blob_list();
503
  // Initialize characters_ and compute the initial estimation of
504
  // character height.
505
0
  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
506
0
    if (is_interesting_blob(blob_it.data())) {
507
0
      FPChar fp_char;
508
0
      fp_char.Init(blob_it.data());
509
      // Merge unconditionally if two blobs overlap.
510
0
      if (!characters_.empty() && significant_overlap(fp_char.box(), characters_.back().box())) {
511
0
        characters_.back().Merge(fp_char);
512
0
      } else {
513
0
        characters_.push_back(fp_char);
514
0
      }
515
0
      TBOX bound = blob_it.data()->bounding_box();
516
0
      if (bound.height() * 3.0 > bound.width()) {
517
0
        heights_.Add(bound.height());
518
0
      }
519
0
    }
520
0
  }
521
0
  heights_.Finish();
522
0
  height_ = heights_.ile(0.875);
523
0
}
524
525
0
void FPRow::OutputEstimations() {
526
0
  if (good_pitches_.empty()) {
527
0
    pitch_ = 0.0f;
528
0
    real_row_->pitch_decision = PITCH_CORR_PROP;
529
0
    return;
530
0
  }
531
532
0
  pitch_ = good_pitches_.median();
533
0
  real_row_->fixed_pitch = pitch_;
534
  // good_gaps_.ile(0.125) can be large if most characters on the row
535
  // are skinny. Use pitch_ - height_ instead if it's smaller, but
536
  // positive.
537
0
  real_row_->kern_size = real_row_->pr_nonsp =
538
0
      std::min(good_gaps_.ile(0.125), std::max(pitch_ - height_, 0.0f));
539
0
  real_row_->body_size = pitch_ - real_row_->kern_size;
540
541
0
  if (good_pitches_.size() < all_pitches_.size() * kFixedPitchThreshold) {
542
    // If more than half of the characters of a line don't fit to the
543
    // fixed pitch model, consider the line to be proportional. 50%
544
    // seems to be a good threshold in practice as well.
545
    // Anyway we store estimated values (fixed_pitch, kern_size, etc.) in
546
    // real_row_ as a partial estimation result and try to use them in the
547
    // normalization process.
548
0
    real_row_->pitch_decision = PITCH_CORR_PROP;
549
0
    return;
550
0
  } else if (good_pitches_.size() > all_pitches_.size() * 0.75) {
551
0
    real_row_->pitch_decision = PITCH_DEF_FIXED;
552
0
  } else {
553
0
    real_row_->pitch_decision = PITCH_CORR_FIXED;
554
0
  }
555
556
0
  real_row_->space_size = real_row_->pr_space = pitch_;
557
  // Set min_space to 50% of character pitch so that we can break CJK
558
  // text at a half-width space after punctuation.
559
0
  real_row_->min_space = (pitch_ + good_gaps_.minimum()) * 0.5;
560
561
  // Don't consider a quarter space as a real space, because it's used
562
  // for line justification in traditional Japanese books.
563
0
  real_row_->max_nonspace =
564
0
      std::max(pitch_ * 0.25 + good_gaps_.minimum(), static_cast<double>(good_gaps_.ile(0.875)));
565
566
0
  int space_threshold = std::min((real_row_->max_nonspace + real_row_->min_space) / 2,
567
0
                                 static_cast<int>(real_row_->xheight));
568
569
  // Make max_nonspace larger than any intra-character gap so that
570
  // make_prop_words() won't break a row at the middle of a character.
571
0
  for (size_t i = 0; i < num_chars(); ++i) {
572
0
    if (characters_[i].max_gap() > real_row_->max_nonspace) {
573
0
      real_row_->max_nonspace = characters_[i].max_gap();
574
0
    }
575
0
  }
576
0
  real_row_->space_threshold = std::min((real_row_->max_nonspace + real_row_->min_space) / 2,
577
0
                                        static_cast<int>(real_row_->xheight));
578
0
  real_row_->used_dm_model = false;
579
580
  // Setup char_cells.
581
0
  ICOORDELT_IT cell_it = &real_row_->char_cells;
582
0
  auto *cell = new ICOORDELT(real_body(0).left(), 0);
583
0
  cell_it.add_after_then_move(cell);
584
585
0
  int right = real_body(0).right();
586
0
  for (size_t i = 1; i < num_chars(); ++i) {
587
    // Put a word break if gap between two characters is bigger than
588
    // space_threshold.  Don't break if none of two characters
589
    // couldn't be "finalized", because maybe they need to be merged
590
    // to one character.
591
0
    if ((is_final(i - 1) || is_final(i)) &&
592
0
        real_body(i - 1).x_gap(real_body(i)) > space_threshold) {
593
0
      cell = new ICOORDELT(right + 1, 0);
594
0
      cell_it.add_after_then_move(cell);
595
0
      while (right + pitch_ < box(i).left()) {
596
0
        right += pitch_;
597
0
        cell = new ICOORDELT(right + 1, 0);
598
0
        cell_it.add_after_then_move(cell);
599
0
      }
600
0
      right = box(i).left();
601
0
    }
602
0
    cell = new ICOORDELT((right + real_body(i).left()) / 2, 0);
603
0
    cell_it.add_after_then_move(cell);
604
0
    right = real_body(i).right();
605
0
  }
606
607
0
  cell = new ICOORDELT(right + 1, 0);
608
0
  cell_it.add_after_then_move(cell);
609
610
  // TODO(takenaka): add code to store alignment/fragmentation
611
  // information to blobs so that it can be reused later, e.g. in
612
  // recognition phase.
613
0
}
614
615
0
void FPRow::EstimatePitch(bool pass1) {
616
0
  good_pitches_.Clear();
617
0
  all_pitches_.Clear();
618
0
  good_gaps_.Clear();
619
0
  all_gaps_.Clear();
620
0
  heights_.Clear();
621
0
  if (num_chars() == 0) {
622
0
    return;
623
0
  }
624
625
0
  int32_t cx0, cx1;
626
0
  bool prev_was_good = is_good(0);
627
0
  cx0 = center_x(0);
628
629
0
  heights_.Add(box(0).height());
630
0
  for (size_t i = 1; i < num_chars(); i++) {
631
0
    cx1 = center_x(i);
632
0
    int32_t pitch = cx1 - cx0;
633
0
    int32_t gap = std::max(0, real_body(i - 1).x_gap(real_body(i)));
634
635
0
    heights_.Add(box(i).height());
636
    // Ignore if the pitch is too close.  But don't ignore wide pitch
637
    // may be the result of large tracking.
638
0
    if (pitch > height_ * 0.5) {
639
0
      all_pitches_.Add(pitch);
640
0
      all_gaps_.Add(gap);
641
0
      if (is_good(i)) {
642
        // In pass1 (after Pass1Analyze()), all characters marked as
643
        // "good" have a good consistent pitch with their previous
644
        // characters.  However, it's not true in pass2 and a good
645
        // character may have a good pitch only between its successor.
646
        // So we collect only pitch values between two good
647
        // characters. and within tolerance in pass2.
648
0
        if (pass1 ||
649
0
            (prev_was_good && std::fabs(estimated_pitch_ - pitch) < kFPTolerance * estimated_pitch_)) {
650
0
          good_pitches_.Add(pitch);
651
0
          if (!is_box_modified(i - 1) && !is_box_modified(i)) {
652
0
            good_gaps_.Add(gap);
653
0
          }
654
0
        }
655
0
        prev_was_good = true;
656
0
      } else {
657
0
        prev_was_good = false;
658
0
      }
659
0
    }
660
0
    cx0 = cx1;
661
0
  }
662
663
0
  good_pitches_.Finish();
664
0
  all_pitches_.Finish();
665
0
  good_gaps_.Finish();
666
0
  all_gaps_.Finish();
667
0
  heights_.Finish();
668
669
0
  height_ = heights_.ile(0.875);
670
0
  if (all_pitches_.empty()) {
671
0
    pitch_ = 0.0f;
672
0
    gap_ = 0.0f;
673
0
  } else if (good_pitches_.size() < 2) {
674
    // We don't have enough data to estimate the pitch of this row yet.
675
    // Use median of all pitches as the initial guess.
676
0
    pitch_ = all_pitches_.median();
677
0
    ASSERT_HOST(pitch_ > 0.0f);
678
0
    gap_ = all_gaps_.ile(0.125);
679
0
  } else {
680
0
    pitch_ = good_pitches_.median();
681
0
    ASSERT_HOST(pitch_ > 0.0f);
682
0
    gap_ = good_gaps_.ile(0.125);
683
0
  }
684
0
}
685
686
0
void FPRow::DebugOutputResult(int row_index) {
687
0
  if (num_chars() > 0) {
688
0
    tprintf(
689
0
        "Row %d: pitch_decision=%d, fixed_pitch=%f, max_nonspace=%d, "
690
0
        "space_size=%f, space_threshold=%d, xheight=%f\n",
691
0
        row_index, static_cast<int>(real_row_->pitch_decision), real_row_->fixed_pitch,
692
0
        real_row_->max_nonspace, real_row_->space_size, real_row_->space_threshold,
693
0
        real_row_->xheight);
694
695
0
    for (unsigned i = 0; i < num_chars(); i++) {
696
0
      tprintf("Char %u: is_final=%d is_good=%d num_blobs=%d: ", i, is_final(i), is_good(i),
697
0
              character(i)->num_blobs());
698
0
      box(i).print();
699
0
    }
700
0
  }
701
0
}
702
703
0
void FPRow::Pass1Analyze() {
704
0
  if (num_chars() < 2) {
705
0
    return;
706
0
  }
707
708
0
  if (estimated_pitch_ > 0.0f) {
709
0
    for (size_t i = 2; i < num_chars(); i++) {
710
0
      if (is_good_pitch(estimated_pitch_, box(i - 2), box(i - 1)) &&
711
0
          is_good_pitch(estimated_pitch_, box(i - 1), box(i))) {
712
0
        mark_good(i - 1);
713
0
      }
714
0
    }
715
0
  } else {
716
0
    for (size_t i = 2; i < num_chars(); i++) {
717
0
      if (is_good_pitch(box_pitch(box(i - 2), box(i - 1)), box(i - 1), box(i))) {
718
0
        mark_good(i - 1);
719
0
      }
720
0
    }
721
0
  }
722
0
  character(0)->set_alignment(character(1)->alignment());
723
0
  character(num_chars() - 1)->set_alignment(character(num_chars() - 2)->alignment());
724
0
}
725
726
0
bool FPRow::Pass2Analyze() {
727
0
  bool changed = false;
728
0
  if (num_chars() <= 1 || estimated_pitch_ == 0.0f) {
729
0
    return false;
730
0
  }
731
0
  for (size_t i = 0; i < num_chars(); i++) {
732
0
    if (is_final(i)) {
733
0
      continue;
734
0
    }
735
736
0
    FPChar::Alignment alignment = character(i)->alignment();
737
0
    bool intersecting = false;
738
0
    bool not_intersecting = false;
739
740
0
    if (i < num_chars() - 1 && is_final(i + 1)) {
741
      // Next character is already finalized. Estimate the imaginary
742
      // body including this character based on the character. Skip
743
      // whitespace if necessary.
744
0
      bool skipped_whitespaces = false;
745
0
      float c1 = center_x(i + 1) - 1.5 * estimated_pitch_;
746
0
      while (c1 > box(i).right()) {
747
0
        skipped_whitespaces = true;
748
0
        c1 -= estimated_pitch_;
749
0
      }
750
0
      TBOX ibody(c1, box(i).bottom(), c1 + estimated_pitch_, box(i).top());
751
752
      // Collect all characters that mostly fit in the region.
753
      // Also, their union height shouldn't be too big.
754
0
      int j = i;
755
0
      TBOX merged;
756
0
      while (j >= 0 && !is_final(j) && mostly_overlap(ibody, box(j)) &&
757
0
             merged.bounding_union(box(j)).height() < estimated_pitch_ * (1 + kFPTolerance)) {
758
0
        merged += box(j);
759
0
        j--;
760
0
      }
761
762
0
      if (j >= 0 && significant_overlap(ibody, box(j))) {
763
        // character(j) lies on the character boundary and doesn't fit
764
        // well into the imaginary body.
765
0
        if (!is_final(j)) {
766
0
          intersecting = true;
767
0
        }
768
0
      } else {
769
0
        not_intersecting = true;
770
0
        if (i - j > 0) {
771
          // Merge character(j+1) ... character(i) because they fit
772
          // into the body nicely.
773
0
          if (i - j == 1) {
774
            // Only one char in the imaginary body.
775
0
            if (!skipped_whitespaces) {
776
0
              mark_good(i);
777
0
            }
778
            // set ibody as bounding box of this character to get
779
            // better pitch analysis result for halfwidth glyphs
780
            // followed by a halfwidth space.
781
0
            if (box(i).width() <= estimated_pitch_ * 0.5) {
782
0
              ibody += box(i);
783
0
              character(i)->set_box(ibody);
784
0
            }
785
0
            character(i)->set_merge_to_prev(false);
786
0
            finalize(i);
787
0
          } else {
788
0
            for (int k = i; k > j + 1; k--) {
789
0
              character(k)->set_merge_to_prev(true);
790
0
            }
791
0
          }
792
0
        }
793
0
      }
794
0
    }
795
0
    if (i > 0 && is_final(i - 1)) {
796
      // Now we repeat everything from the opposite side.  Previous
797
      // character is already finalized. Estimate the imaginary body
798
      // including this character based on the character.
799
0
      bool skipped_whitespaces = false;
800
0
      float c1 = center_x(i - 1) + 1.5 * estimated_pitch_;
801
0
      while (c1 < box(i).left()) {
802
0
        skipped_whitespaces = true;
803
0
        c1 += estimated_pitch_;
804
0
      }
805
0
      TBOX ibody(c1 - estimated_pitch_, box(i).bottom(), c1, box(i).top());
806
807
0
      size_t j = i;
808
0
      TBOX merged;
809
0
      while (j < num_chars() && !is_final(j) && mostly_overlap(ibody, box(j)) &&
810
0
             merged.bounding_union(box(j)).height() < estimated_pitch_ * (1 + kFPTolerance)) {
811
0
        merged += box(j);
812
0
        j++;
813
0
      }
814
815
0
      if (j < num_chars() && significant_overlap(ibody, box(j))) {
816
0
        if (!is_final(j)) {
817
0
          intersecting = true;
818
0
        }
819
0
      } else {
820
0
        not_intersecting = true;
821
0
        if (j - i > 0) {
822
0
          if (j - i == 1) {
823
0
            if (!skipped_whitespaces) {
824
0
              mark_good(i);
825
0
            }
826
0
            if (box(i).width() <= estimated_pitch_ * 0.5) {
827
0
              ibody += box(i);
828
0
              character(i)->set_box(ibody);
829
0
            }
830
0
            character(i)->set_merge_to_prev(false);
831
0
            finalize(i);
832
0
          } else {
833
0
            for (size_t k = i + 1; k < j; k++) {
834
0
              character(k)->set_merge_to_prev(true);
835
0
            }
836
0
          }
837
0
        }
838
0
      }
839
0
    }
840
841
    // This character doesn't fit well into the estimated imaginary
842
    // bodies. Mark it as bad.
843
0
    if (intersecting && !not_intersecting) {
844
0
      mark_bad(i);
845
0
    }
846
0
    if (character(i)->alignment() != alignment || character(i)->merge_to_prev()) {
847
0
      changed = true;
848
0
    }
849
0
  }
850
851
0
  return changed;
852
0
}
853
854
0
void FPRow::MergeFragments() {
855
0
  int last_char = 0;
856
857
0
  for (size_t j = 0; j < num_chars(); ++j) {
858
0
    if (character(j)->merge_to_prev()) {
859
0
      character(last_char)->Merge(*character(j));
860
0
      character(j)->set_delete_flag(true);
861
0
      clear_alignment(last_char);
862
0
      character(j - 1)->set_merge_to_prev(false);
863
0
    } else {
864
0
      last_char = j;
865
0
    }
866
0
  }
867
0
  DeleteChars();
868
0
}
869
870
0
void FPRow::FinalizeLargeChars() {
871
0
  float row_pitch = estimated_pitch();
872
0
  for (size_t i = 0; i < num_chars(); i++) {
873
0
    if (is_final(i)) {
874
0
      continue;
875
0
    }
876
877
    // Finalize if both neighbors are finalized. We have no other choice.
878
0
    if (i > 0 && is_final(i - 1) && i < num_chars() - 1 && is_final(i + 1)) {
879
0
      finalize(i);
880
0
      continue;
881
0
    }
882
883
0
    float cx = center_x(i);
884
0
    TBOX ibody(cx - 0.5 * row_pitch, 0, cx + 0.5 * row_pitch, 1);
885
0
    if (i > 0) {
886
      // The preceding character significantly intersects with the
887
      // imaginary body of this character. Let Pass2Analyze() handle
888
      // this case.
889
0
      if (x_overlap_fraction(ibody, box(i - 1)) > 0.1) {
890
0
        continue;
891
0
      }
892
0
      if (!is_final(i - 1)) {
893
0
        TBOX merged = box(i);
894
0
        merged += box(i - 1);
895
0
        if (merged.width() < row_pitch) {
896
0
          continue;
897
0
        }
898
        // This character cannot be finalized yet because it can be
899
        // merged with the previous one.  Again, let Pass2Analyze()
900
        // handle this case.
901
0
      }
902
0
    }
903
0
    if (i < num_chars() - 1) {
904
0
      if (x_overlap_fraction(ibody, box(i + 1)) > 0.1) {
905
0
        continue;
906
0
      }
907
0
      if (!is_final(i + 1)) {
908
0
        TBOX merged = box(i);
909
0
        merged += box(i + 1);
910
0
        if (merged.width() < row_pitch) {
911
0
          continue;
912
0
        }
913
0
      }
914
0
    }
915
0
    finalize(i);
916
0
  }
917
918
  // Update alignment decision.  We only consider finalized characters
919
  // in pass2.  E.g. if a finalized character C has another finalized
920
  // character L on its left and a not-finalized character R on its
921
  // right, we mark C as good if the pitch between C and L is good,
922
  // regardless of the pitch between C and R.
923
0
  for (size_t i = 0; i < num_chars(); i++) {
924
0
    if (!is_final(i)) {
925
0
      continue;
926
0
    }
927
0
    bool good_pitch = false;
928
0
    bool bad_pitch = false;
929
0
    if (i > 0 && is_final(i - 1)) {
930
0
      if (is_good_pitch(row_pitch, box(i - 1), box(i))) {
931
0
        good_pitch = true;
932
0
      } else {
933
0
        bad_pitch = true;
934
0
      }
935
0
    }
936
0
    if (i < num_chars() - 1 && is_final(i + 1)) {
937
0
      if (is_good_pitch(row_pitch, box(i), box(i + 1))) {
938
0
        good_pitch = true;
939
0
      } else {
940
0
        bad_pitch = true;
941
0
      }
942
0
    }
943
0
    if (good_pitch && !bad_pitch) {
944
0
      mark_good(i);
945
0
    } else if (!good_pitch && bad_pitch) {
946
0
      mark_bad(i);
947
0
    }
948
0
  }
949
0
}
950
951
class FPAnalyzer {
952
public:
953
  FPAnalyzer(ICOORD page_tr, TO_BLOCK_LIST *port_blocks);
954
0
  ~FPAnalyzer() = default;
955
956
0
  void Pass1Analyze() {
957
0
    for (auto &row : rows_) {
958
0
      row.Pass1Analyze();
959
0
    }
960
0
  }
961
962
  // Estimate character pitch for each row.  The argument pass1 can be
963
  // set to true if the function is called after Pass1Analyze(), to
964
  // eliminate some redundant computation.
965
  void EstimatePitch(bool pass1);
966
967
0
  bool maybe_fixed_pitch() {
968
0
    if (rows_.empty() || rows_.size() <= num_bad_rows_ + num_tall_rows_ + 1) {
969
0
      return false;
970
0
    }
971
0
    return true;
972
0
  }
973
974
0
  void MergeFragments() {
975
0
    for (auto &row : rows_) {
976
0
      row.MergeFragments();
977
0
    }
978
0
  }
979
980
0
  void FinalizeLargeChars() {
981
0
    for (auto &row : rows_) {
982
0
      row.FinalizeLargeChars();
983
0
    }
984
0
  }
985
986
0
  bool Pass2Analyze() {
987
0
    bool changed = false;
988
0
    for (auto &row : rows_) {
989
0
      if (row.Pass2Analyze()) {
990
0
        changed = true;
991
0
      }
992
0
    }
993
0
    return changed;
994
0
  }
995
996
0
  void OutputEstimations() {
997
0
    for (auto &row : rows_) {
998
0
      row.OutputEstimations();
999
0
    }
1000
    // Don't we need page-level estimation of gaps/spaces?
1001
0
  }
1002
1003
0
  void DebugOutputResult() {
1004
0
    tprintf("FPAnalyzer: final result\n");
1005
0
    for (size_t i = 0; i < rows_.size(); i++) {
1006
0
      rows_[i].DebugOutputResult(i);
1007
0
    }
1008
0
  }
1009
1010
0
  size_t num_rows() {
1011
0
    return rows_.size();
1012
0
  }
1013
1014
  // Returns the upper limit for pass2 loop iteration.
1015
0
  unsigned max_iteration() {
1016
    // We're fixing at least one character per iteration. So basically
1017
    // we shouldn't require more than max_chars_per_row_ iterations.
1018
0
    return max_chars_per_row_ + 100;
1019
0
  }
1020
1021
private:
1022
  ICOORD page_tr_;
1023
  std::vector<FPRow> rows_;
1024
  unsigned num_tall_rows_;
1025
  unsigned num_bad_rows_;
1026
  // TODO: num_empty_rows_ is incremented, but never used otherwise.
1027
  unsigned num_empty_rows_;
1028
  unsigned max_chars_per_row_;
1029
};
1030
1031
FPAnalyzer::FPAnalyzer(ICOORD page_tr, TO_BLOCK_LIST *port_blocks)
1032
0
    : page_tr_(page_tr)
1033
0
    , num_tall_rows_(0)
1034
0
    , num_bad_rows_(0)
1035
0
    , num_empty_rows_(0)
1036
0
    , max_chars_per_row_(0) {
1037
0
  TO_BLOCK_IT block_it(port_blocks);
1038
1039
0
  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
1040
0
    TO_BLOCK *block = block_it.data();
1041
0
    if (!block->get_rows()->empty()) {
1042
0
      ASSERT_HOST(block->xheight > 0);
1043
0
      find_repeated_chars(block, false);
1044
0
    }
1045
0
  }
1046
1047
0
  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
1048
0
    TO_ROW_IT row_it = block_it.data()->get_rows();
1049
0
    for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
1050
0
      FPRow row;
1051
0
      row.Init(row_it.data());
1052
0
      rows_.push_back(row);
1053
0
      size_t num_chars = rows_.back().num_chars();
1054
0
      if (num_chars <= 1) {
1055
0
        num_empty_rows_++;
1056
0
      }
1057
0
      if (num_chars > max_chars_per_row_) {
1058
0
        max_chars_per_row_ = num_chars;
1059
0
      }
1060
0
    }
1061
0
  }
1062
0
}
1063
1064
0
void FPAnalyzer::EstimatePitch(bool pass1) {
1065
0
  LocalCorrelation pitch_height_stats;
1066
1067
0
  num_tall_rows_ = 0;
1068
0
  num_bad_rows_ = 0;
1069
0
  pitch_height_stats.Clear();
1070
0
  for (auto &row : rows_) {
1071
0
    row.EstimatePitch(pass1);
1072
0
    if (row.good_pitches()) {
1073
0
      pitch_height_stats.Add(row.height() + row.gap(), row.pitch(), row.good_pitches());
1074
0
      if (row.height_pitch_ratio() > 1.1) {
1075
0
        num_tall_rows_++;
1076
0
      }
1077
0
    } else {
1078
0
      num_bad_rows_++;
1079
0
    }
1080
0
  }
1081
1082
0
  pitch_height_stats.Finish();
1083
0
  for (auto &row : rows_) {
1084
0
    if (row.good_pitches() >= 5) {
1085
      // We have enough evidences. Just use the pitch estimation
1086
      // from this row.
1087
0
      row.set_estimated_pitch(row.pitch());
1088
0
    } else if (row.num_chars() > 1) {
1089
0
      float estimated_pitch = pitch_height_stats.EstimateYFor(row.height() + row.gap(), 0.1f);
1090
      // CJK characters are more likely to be fragmented than poorly
1091
      // chopped. So trust the page-level estimation of character
1092
      // pitch only if it's larger than row-level estimation or
1093
      // row-level estimation is too large (2x bigger than row height).
1094
0
      if (estimated_pitch > row.pitch() || row.pitch() > row.height() * 2.0) {
1095
0
        row.set_estimated_pitch(estimated_pitch);
1096
0
      } else {
1097
0
        row.set_estimated_pitch(row.pitch());
1098
0
      }
1099
0
    }
1100
0
  }
1101
0
}
1102
1103
0
void compute_fixed_pitch_cjk(ICOORD page_tr, TO_BLOCK_LIST *port_blocks) {
1104
0
  FPAnalyzer analyzer(page_tr, port_blocks);
1105
0
  if (analyzer.num_rows() == 0) {
1106
0
    return;
1107
0
  }
1108
1109
0
  analyzer.Pass1Analyze();
1110
0
  analyzer.EstimatePitch(true);
1111
1112
  // Perform pass1 analysis again with the initial estimation of row
1113
  // pitches, for better estimation.
1114
0
  analyzer.Pass1Analyze();
1115
0
  analyzer.EstimatePitch(true);
1116
1117
  // Early exit if the page doesn't seem to contain fixed pitch rows.
1118
0
  if (!analyzer.maybe_fixed_pitch()) {
1119
0
    if (textord_debug_pitch_test) {
1120
0
      tprintf("Page doesn't seem to contain fixed pitch rows\n");
1121
0
    }
1122
0
    return;
1123
0
  }
1124
1125
0
  unsigned iteration = 0;
1126
0
  do {
1127
0
    analyzer.MergeFragments();
1128
0
    analyzer.FinalizeLargeChars();
1129
0
    analyzer.EstimatePitch(false);
1130
0
    iteration++;
1131
0
  } while (analyzer.Pass2Analyze() && iteration < analyzer.max_iteration());
1132
1133
0
  if (textord_debug_pitch_test) {
1134
0
    tprintf("compute_fixed_pitch_cjk finished after %u iteration (limit=%u)\n", iteration,
1135
0
            analyzer.max_iteration());
1136
0
  }
1137
1138
0
  analyzer.OutputEstimations();
1139
0
  if (textord_debug_pitch_test) {
1140
0
    analyzer.DebugOutputResult();
1141
0
  }
1142
0
}
1143
1144
} // namespace tesseract