/src/tesseract/src/wordrec/associate.cpp
Line | Count | Source |
1 | | /////////////////////////////////////////////////////////////////////// |
2 | | // File: associate.cpp |
3 | | // Description: Functions for scoring segmentation paths according to |
4 | | // their character widths, gap widths and seam cuts. |
5 | | // Author: Daria Antonova |
6 | | // Created: Mon Mar 8 11:26:43 PDT 2010 |
7 | | // |
8 | | // (C) Copyright 2010, Google Inc. |
9 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
10 | | // you may not use this file except in compliance with the License. |
11 | | // You may obtain a copy of the License at |
12 | | // http://www.apache.org/licenses/LICENSE-2.0 |
13 | | // Unless required by applicable law or agreed to in writing, software |
14 | | // distributed under the License is distributed on an "AS IS" BASIS, |
15 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
16 | | // See the License for the specific language governing permissions and |
17 | | // limitations under the License. |
18 | | // |
19 | | /////////////////////////////////////////////////////////////////////// |
20 | | |
21 | | #include <cmath> |
22 | | #include <cstdio> |
23 | | |
24 | | #include "associate.h" |
25 | | #include "normalis.h" |
26 | | #include "pageres.h" |
27 | | |
28 | | namespace tesseract { |
29 | | |
30 | | const float AssociateUtils::kMaxFixedPitchCharAspectRatio = 2.0f; |
31 | | const float AssociateUtils::kMinGap = 0.03f; |
32 | | |
33 | | void AssociateUtils::ComputeStats(int col, int row, const AssociateStats *parent_stats, |
34 | | int parent_path_length, bool fixed_pitch, float max_char_wh_ratio, |
35 | 22.1M | WERD_RES *word_res, bool debug, AssociateStats *stats) { |
36 | 22.1M | stats->Clear(); |
37 | | |
38 | 22.1M | ASSERT_HOST(word_res != nullptr); |
39 | 22.1M | if (word_res->blob_widths.empty()) { |
40 | 0 | return; |
41 | 0 | } |
42 | 22.1M | if (debug) { |
43 | 0 | tprintf("AssociateUtils::ComputeStats() for col=%d, row=%d%s\n", col, row, |
44 | 0 | fixed_pitch ? " (fixed pitch)" : ""); |
45 | 0 | } |
46 | 22.1M | float normalizing_height = kBlnXHeight; |
47 | 22.1M | ROW *blob_row = word_res->blob_row; |
48 | | // TODO(rays/daria) Can unicharset.script_has_xheight be useful here? |
49 | 22.1M | if (fixed_pitch && blob_row != nullptr) { |
50 | | // For fixed pitch language like CJK, we use the full text height |
51 | | // as the normalizing factor so we are not dependent on xheight |
52 | | // calculation. |
53 | 0 | if (blob_row->body_size() > 0.0f) { |
54 | 0 | normalizing_height = word_res->denorm.y_scale() * blob_row->body_size(); |
55 | 0 | } else { |
56 | 0 | normalizing_height = |
57 | 0 | word_res->denorm.y_scale() * (blob_row->x_height() + blob_row->ascenders()); |
58 | 0 | } |
59 | 0 | if (debug) { |
60 | 0 | tprintf("normalizing height = %g (scale %g xheight %g ascenders %g)\n", normalizing_height, |
61 | 0 | word_res->denorm.y_scale(), blob_row->x_height(), blob_row->ascenders()); |
62 | 0 | } |
63 | 0 | } |
64 | 22.1M | float wh_ratio = word_res->GetBlobsWidth(col, row) / normalizing_height; |
65 | 22.1M | if (wh_ratio > max_char_wh_ratio) { |
66 | 431k | stats->bad_shape = true; |
67 | 431k | } |
68 | | // Compute the gap sum for this shape. If there are only negative or only |
69 | | // positive gaps, record their sum in stats->gap_sum. However, if there is |
70 | | // a mixture, record only the sum of the positive gaps. |
71 | | // TODO(antonova): explain fragment. |
72 | 22.1M | int negative_gap_sum = 0; |
73 | 55.2M | for (int c = col; c < row; ++c) { |
74 | 33.1M | int gap = word_res->GetBlobsGap(c); |
75 | 33.1M | (gap > 0) ? stats->gap_sum += gap : negative_gap_sum += gap; |
76 | 33.1M | } |
77 | 22.1M | if (stats->gap_sum == 0) { |
78 | 13.8M | stats->gap_sum = negative_gap_sum; |
79 | 13.8M | } |
80 | 22.1M | if (debug) { |
81 | 0 | tprintf("wh_ratio=%g (max_char_wh_ratio=%g) gap_sum=%d %s\n", wh_ratio, max_char_wh_ratio, |
82 | 0 | stats->gap_sum, stats->bad_shape ? "bad_shape" : ""); |
83 | 0 | } |
84 | | // Compute shape_cost (for fixed pitch mode). |
85 | 22.1M | if (fixed_pitch) { |
86 | 0 | bool end_row = (row == (word_res->ratings->dimension() - 1)); |
87 | | |
88 | | // Ensure that the blob has gaps on the left and the right sides |
89 | | // (except for beginning and ending punctuation) and that there is |
90 | | // no cutting through ink at the blob boundaries. |
91 | 0 | if (col > 0) { |
92 | 0 | float left_gap = word_res->GetBlobsGap(col - 1) / normalizing_height; |
93 | 0 | SEAM *left_seam = word_res->seam_array[col - 1]; |
94 | 0 | if ((!end_row && left_gap < kMinGap) || left_seam->priority() > 0.0f) { |
95 | 0 | stats->bad_shape = true; |
96 | 0 | } |
97 | 0 | if (debug) { |
98 | 0 | tprintf("left_gap %g, left_seam %g %s\n", left_gap, left_seam->priority(), |
99 | 0 | stats->bad_shape ? "bad_shape" : ""); |
100 | 0 | } |
101 | 0 | } |
102 | 0 | float right_gap = 0.0f; |
103 | 0 | if (!end_row) { |
104 | 0 | right_gap = word_res->GetBlobsGap(row) / normalizing_height; |
105 | 0 | SEAM *right_seam = word_res->seam_array[row]; |
106 | 0 | if (right_gap < kMinGap || right_seam->priority() > 0.0f) { |
107 | 0 | stats->bad_shape = true; |
108 | 0 | if (right_gap < kMinGap) { |
109 | 0 | stats->bad_fixed_pitch_right_gap = true; |
110 | 0 | } |
111 | 0 | } |
112 | 0 | if (debug) { |
113 | 0 | tprintf("right_gap %g right_seam %g %s\n", right_gap, right_seam->priority(), |
114 | 0 | stats->bad_shape ? "bad_shape" : ""); |
115 | 0 | } |
116 | 0 | } |
117 | | |
118 | | // Impose additional segmentation penalties if blob widths or gaps |
119 | | // distribution don't fit a fixed-pitch model. |
120 | | // Since we only know the widths and gaps of the path explored so far, |
121 | | // the means and variances are computed for the path so far (not |
122 | | // considering characters to the right of the last character on the path). |
123 | 0 | stats->full_wh_ratio = wh_ratio + right_gap; |
124 | 0 | if (parent_stats != nullptr) { |
125 | 0 | stats->full_wh_ratio_total = (parent_stats->full_wh_ratio_total + stats->full_wh_ratio); |
126 | 0 | float mean = stats->full_wh_ratio_total / static_cast<float>(parent_path_length + 1); |
127 | 0 | stats->full_wh_ratio_var = |
128 | 0 | parent_stats->full_wh_ratio_var + pow(mean - stats->full_wh_ratio, 2); |
129 | 0 | } else { |
130 | 0 | stats->full_wh_ratio_total = stats->full_wh_ratio; |
131 | 0 | } |
132 | 0 | if (debug) { |
133 | 0 | tprintf("full_wh_ratio %g full_wh_ratio_total %g full_wh_ratio_var %g\n", |
134 | 0 | stats->full_wh_ratio, stats->full_wh_ratio_total, stats->full_wh_ratio_var); |
135 | 0 | } |
136 | |
|
137 | 0 | stats->shape_cost = FixedPitchWidthCost(wh_ratio, right_gap, end_row, max_char_wh_ratio); |
138 | | |
139 | | // For some reason Tesseract prefers to treat the whole CJ words |
140 | | // as one blob when the initial segmentation is particularly bad. |
141 | | // This hack is to avoid favoring such states. |
142 | 0 | if (col == 0 && end_row && wh_ratio > max_char_wh_ratio) { |
143 | 0 | stats->shape_cost += 10; |
144 | 0 | } |
145 | 0 | stats->shape_cost += stats->full_wh_ratio_var; |
146 | 0 | if (debug) { |
147 | 0 | tprintf("shape_cost %g\n", stats->shape_cost); |
148 | 0 | } |
149 | 0 | } |
150 | 22.1M | } |
151 | | |
152 | | float AssociateUtils::FixedPitchWidthCost(float norm_width, float right_gap, bool end_pos, |
153 | 0 | float max_char_wh_ratio) { |
154 | 0 | float cost = 0.0f; |
155 | 0 | if (norm_width > max_char_wh_ratio) { |
156 | 0 | cost += norm_width; |
157 | 0 | } |
158 | 0 | if (norm_width > kMaxFixedPitchCharAspectRatio) { |
159 | 0 | cost += norm_width * norm_width; // extra penalty for merging CJK chars |
160 | 0 | } |
161 | | // Penalize skinny blobs, except for punctuation in the last position. |
162 | 0 | if (norm_width + right_gap < 0.5f && !end_pos) { |
163 | 0 | cost += 1.0f - (norm_width + right_gap); |
164 | 0 | } |
165 | 0 | return cost; |
166 | 0 | } |
167 | | |
168 | | } // namespace tesseract |