Coverage Report

Created: 2026-06-13 06:51

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/tesseract/src/wordrec/associate.cpp
Line
Count
Source
1
///////////////////////////////////////////////////////////////////////
2
// File:        associate.cpp
3
// Description: Functions for scoring segmentation paths according to
4
//              their character widths, gap widths and seam cuts.
5
// Author:      Daria Antonova
6
// Created:     Mon Mar 8 11:26:43 PDT 2010
7
//
8
// (C) Copyright 2010, Google Inc.
9
// Licensed under the Apache License, Version 2.0 (the "License");
10
// you may not use this file except in compliance with the License.
11
// You may obtain a copy of the License at
12
// http://www.apache.org/licenses/LICENSE-2.0
13
// Unless required by applicable law or agreed to in writing, software
14
// distributed under the License is distributed on an "AS IS" BASIS,
15
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
// See the License for the specific language governing permissions and
17
// limitations under the License.
18
//
19
///////////////////////////////////////////////////////////////////////
20
21
#include <cmath>
22
#include <cstdio>
23
24
#include "associate.h"
25
#include "normalis.h"
26
#include "pageres.h"
27
28
namespace tesseract {
29
30
const float AssociateUtils::kMaxFixedPitchCharAspectRatio = 2.0f;
31
const float AssociateUtils::kMinGap = 0.03f;
32
33
void AssociateUtils::ComputeStats(int col, int row, const AssociateStats *parent_stats,
34
                                  int parent_path_length, bool fixed_pitch, float max_char_wh_ratio,
35
22.1M
                                  WERD_RES *word_res, bool debug, AssociateStats *stats) {
36
22.1M
  stats->Clear();
37
38
22.1M
  ASSERT_HOST(word_res != nullptr);
39
22.1M
  if (word_res->blob_widths.empty()) {
40
0
    return;
41
0
  }
42
22.1M
  if (debug) {
43
0
    tprintf("AssociateUtils::ComputeStats() for col=%d, row=%d%s\n", col, row,
44
0
            fixed_pitch ? " (fixed pitch)" : "");
45
0
  }
46
22.1M
  float normalizing_height = kBlnXHeight;
47
22.1M
  ROW *blob_row = word_res->blob_row;
48
  // TODO(rays/daria) Can unicharset.script_has_xheight be useful here?
49
22.1M
  if (fixed_pitch && blob_row != nullptr) {
50
    // For fixed pitch language like CJK, we use the full text height
51
    // as the normalizing factor so we are not dependent on xheight
52
    // calculation.
53
0
    if (blob_row->body_size() > 0.0f) {
54
0
      normalizing_height = word_res->denorm.y_scale() * blob_row->body_size();
55
0
    } else {
56
0
      normalizing_height =
57
0
          word_res->denorm.y_scale() * (blob_row->x_height() + blob_row->ascenders());
58
0
    }
59
0
    if (debug) {
60
0
      tprintf("normalizing height = %g (scale %g xheight %g ascenders %g)\n", normalizing_height,
61
0
              word_res->denorm.y_scale(), blob_row->x_height(), blob_row->ascenders());
62
0
    }
63
0
  }
64
22.1M
  float wh_ratio = word_res->GetBlobsWidth(col, row) / normalizing_height;
65
22.1M
  if (wh_ratio > max_char_wh_ratio) {
66
431k
    stats->bad_shape = true;
67
431k
  }
68
  // Compute the gap sum for this shape. If there are only negative or only
69
  // positive gaps, record their sum in stats->gap_sum. However, if there is
70
  // a mixture, record only the sum of the positive gaps.
71
  // TODO(antonova): explain fragment.
72
22.1M
  int negative_gap_sum = 0;
73
55.2M
  for (int c = col; c < row; ++c) {
74
33.1M
    int gap = word_res->GetBlobsGap(c);
75
33.1M
    (gap > 0) ? stats->gap_sum += gap : negative_gap_sum += gap;
76
33.1M
  }
77
22.1M
  if (stats->gap_sum == 0) {
78
13.8M
    stats->gap_sum = negative_gap_sum;
79
13.8M
  }
80
22.1M
  if (debug) {
81
0
    tprintf("wh_ratio=%g (max_char_wh_ratio=%g) gap_sum=%d %s\n", wh_ratio, max_char_wh_ratio,
82
0
            stats->gap_sum, stats->bad_shape ? "bad_shape" : "");
83
0
  }
84
  // Compute shape_cost (for fixed pitch mode).
85
22.1M
  if (fixed_pitch) {
86
0
    bool end_row = (row == (word_res->ratings->dimension() - 1));
87
88
    // Ensure that the blob has gaps on the left and the right sides
89
    // (except for beginning and ending punctuation) and that there is
90
    // no cutting through ink at the blob boundaries.
91
0
    if (col > 0) {
92
0
      float left_gap = word_res->GetBlobsGap(col - 1) / normalizing_height;
93
0
      SEAM *left_seam = word_res->seam_array[col - 1];
94
0
      if ((!end_row && left_gap < kMinGap) || left_seam->priority() > 0.0f) {
95
0
        stats->bad_shape = true;
96
0
      }
97
0
      if (debug) {
98
0
        tprintf("left_gap %g, left_seam %g %s\n", left_gap, left_seam->priority(),
99
0
                stats->bad_shape ? "bad_shape" : "");
100
0
      }
101
0
    }
102
0
    float right_gap = 0.0f;
103
0
    if (!end_row) {
104
0
      right_gap = word_res->GetBlobsGap(row) / normalizing_height;
105
0
      SEAM *right_seam = word_res->seam_array[row];
106
0
      if (right_gap < kMinGap || right_seam->priority() > 0.0f) {
107
0
        stats->bad_shape = true;
108
0
        if (right_gap < kMinGap) {
109
0
          stats->bad_fixed_pitch_right_gap = true;
110
0
        }
111
0
      }
112
0
      if (debug) {
113
0
        tprintf("right_gap %g right_seam %g %s\n", right_gap, right_seam->priority(),
114
0
                stats->bad_shape ? "bad_shape" : "");
115
0
      }
116
0
    }
117
118
    // Impose additional segmentation penalties if blob widths or gaps
119
    // distribution don't fit a fixed-pitch model.
120
    // Since we only know the widths and gaps of the path explored so far,
121
    // the means and variances are computed for the path so far (not
122
    // considering characters to the right of the last character on the path).
123
0
    stats->full_wh_ratio = wh_ratio + right_gap;
124
0
    if (parent_stats != nullptr) {
125
0
      stats->full_wh_ratio_total = (parent_stats->full_wh_ratio_total + stats->full_wh_ratio);
126
0
      float mean = stats->full_wh_ratio_total / static_cast<float>(parent_path_length + 1);
127
0
      stats->full_wh_ratio_var =
128
0
          parent_stats->full_wh_ratio_var + pow(mean - stats->full_wh_ratio, 2);
129
0
    } else {
130
0
      stats->full_wh_ratio_total = stats->full_wh_ratio;
131
0
    }
132
0
    if (debug) {
133
0
      tprintf("full_wh_ratio %g full_wh_ratio_total %g full_wh_ratio_var %g\n",
134
0
              stats->full_wh_ratio, stats->full_wh_ratio_total, stats->full_wh_ratio_var);
135
0
    }
136
137
0
    stats->shape_cost = FixedPitchWidthCost(wh_ratio, right_gap, end_row, max_char_wh_ratio);
138
139
    // For some reason Tesseract prefers to treat the whole CJ words
140
    // as one blob when the initial segmentation is particularly bad.
141
    // This hack is to avoid favoring such states.
142
0
    if (col == 0 && end_row && wh_ratio > max_char_wh_ratio) {
143
0
      stats->shape_cost += 10;
144
0
    }
145
0
    stats->shape_cost += stats->full_wh_ratio_var;
146
0
    if (debug) {
147
0
      tprintf("shape_cost %g\n", stats->shape_cost);
148
0
    }
149
0
  }
150
22.1M
}
151
152
float AssociateUtils::FixedPitchWidthCost(float norm_width, float right_gap, bool end_pos,
153
0
                                          float max_char_wh_ratio) {
154
0
  float cost = 0.0f;
155
0
  if (norm_width > max_char_wh_ratio) {
156
0
    cost += norm_width;
157
0
  }
158
0
  if (norm_width > kMaxFixedPitchCharAspectRatio) {
159
0
    cost += norm_width * norm_width; // extra penalty for merging CJK chars
160
0
  }
161
  // Penalize skinny blobs, except for punctuation in the last position.
162
0
  if (norm_width + right_gap < 0.5f && !end_pos) {
163
0
    cost += 1.0f - (norm_width + right_gap);
164
0
  }
165
0
  return cost;
166
0
}
167
168
} // namespace tesseract