/src/tesseract/src/wordrec/lm_consistency.cpp
Line | Count | Source |
1 | | /////////////////////////////////////////////////////////////////////// |
2 | | // File: lm_consistency.cpp |
3 | | // Description: Struct for recording consistency of the paths representing |
4 | | // OCR hypotheses. |
5 | | // Author: Rika Antonova |
6 | | // Created: Mon Jun 20 11:26:43 PST 2012 |
7 | | // |
8 | | // (C) Copyright 2012, Google Inc. |
9 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
10 | | // you may not use this file except in compliance with the License. |
11 | | // You may obtain a copy of the License at |
12 | | // http://www.apache.org/licenses/LICENSE-2.0 |
13 | | // Unless required by applicable law or agreed to in writing, software |
14 | | // distributed under the License is distributed on an "AS IS" BASIS, |
15 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
16 | | // See the License for the specific language governing permissions and |
17 | | // limitations under the License. |
18 | | // |
19 | | //////////////////////////////////////////////////////////////////////// |
20 | | |
21 | | #include "lm_consistency.h" |
22 | | |
23 | | #include "associate.h" |
24 | | #include "dict.h" |
25 | | #include "ratngs.h" |
26 | | |
27 | | namespace tesseract { |
28 | | |
29 | 10.2M | void LMConsistencyInfo::ComputeXheightConsistency(const BLOB_CHOICE *b, bool is_punc) { |
30 | 10.2M | if (xht_decision == XH_INCONSISTENT) { |
31 | 4.11M | return; // It isn't going to get any better. |
32 | 4.11M | } |
33 | | |
34 | | // Compute xheight consistency. |
35 | 6.09M | bool parent_null = xht_sp < 0; |
36 | 6.09M | int parent_sp = xht_sp; |
37 | | // Debug strings. |
38 | 6.09M | if (b->yshift() > LMConsistencyInfo::kShiftThresh) { |
39 | 1.33M | xht_sp = LMConsistencyInfo::kSUP; |
40 | 4.76M | } else if (b->yshift() < -LMConsistencyInfo::kShiftThresh) { |
41 | 1.44M | xht_sp = LMConsistencyInfo::kSUB; |
42 | 3.32M | } else { |
43 | 3.32M | xht_sp = LMConsistencyInfo::kNORM; |
44 | 3.32M | } |
45 | 6.09M | xht_count[xht_sp]++; |
46 | 6.09M | if (is_punc) { |
47 | 1.60M | xht_count_punc[xht_sp]++; |
48 | 1.60M | } |
49 | 6.09M | if (!parent_null) { |
50 | 5.14M | xpos_entropy += abs(parent_sp - xht_sp); |
51 | 5.14M | } |
52 | | // TODO(eger): Figure out a better way to account for small caps. |
53 | | // For the first character not y-shifted, we only care if it is too small. |
54 | | // Too large is common in drop caps and small caps. |
55 | | // int16_t small_xht = b->min_xheight(); |
56 | | // if (parent_vse == nullptr && sp == LanguageModelConsistencyInfo::kNORM) { |
57 | | // small_xht = 0; |
58 | | // } |
59 | 6.09M | IntersectRange(b->min_xheight(), b->max_xheight(), &(xht_lo[xht_sp]), &(xht_hi[xht_sp])); |
60 | | |
61 | | // Compute xheight inconsistency kinds. |
62 | 6.09M | if (parent_null) { |
63 | 953k | if (xht_count[kNORM] == 1) { |
64 | 578k | xht_decision = XH_GOOD; |
65 | 578k | } else { |
66 | 375k | xht_decision = XH_SUBNORMAL; |
67 | 375k | } |
68 | 953k | return; |
69 | 953k | } |
70 | | |
71 | | // When we intersect the ranges of xheights in pixels for all characters in |
72 | | // each position (subscript, normal, superscript), |
73 | | // How much range must be left? 0? [exactly one pixel height for xheight] 1? |
74 | | // TODO(eger): Extend this code to take a prior for the rest of the line. |
75 | 5.14M | const int kMinIntersectedXHeightRange = 0; |
76 | 19.0M | for (int i = 0; i < kNumPos; i++) { |
77 | 14.5M | if (xht_lo[i] > xht_hi[i] - kMinIntersectedXHeightRange) { |
78 | 663k | xht_decision = XH_INCONSISTENT; |
79 | 663k | return; |
80 | 663k | } |
81 | 14.5M | } |
82 | | |
83 | | // Reject as improbable anything where there's much punctuation in subscript |
84 | | // or superscript regions. |
85 | 4.48M | if (xht_count_punc[kSUB] > xht_count[kSUB] * 0.4 || |
86 | 4.48M | xht_count_punc[kSUP] > xht_count[kSUP] * 0.4) { |
87 | 610k | xht_decision = XH_INCONSISTENT; |
88 | 610k | return; |
89 | 610k | } |
90 | | |
91 | | // Now check that the subscript and superscript aren't too small relative to |
92 | | // the mainline. |
93 | 3.87M | auto mainline_xht = static_cast<double>(xht_lo[kNORM]); |
94 | 3.87M | double kMinSizeRatio = 0.4; |
95 | 3.87M | if (mainline_xht > 0.0 && (static_cast<double>(xht_hi[kSUB]) / mainline_xht < kMinSizeRatio || |
96 | 3.23M | static_cast<double>(xht_hi[kSUP]) / mainline_xht < kMinSizeRatio)) { |
97 | 565 | xht_decision = XH_INCONSISTENT; |
98 | 565 | return; |
99 | 565 | } |
100 | | // TODO(eger): Check into inconsistency of super/subscript y offsets. |
101 | 3.87M | if (xpos_entropy > kMaxEntropy) { |
102 | 616k | xht_decision = XH_INCONSISTENT; |
103 | 616k | return; |
104 | 616k | } |
105 | 3.25M | if (xht_count[kSUB] == 0 && xht_count[kSUP] == 0) { |
106 | 1.27M | xht_decision = XH_GOOD; |
107 | 1.27M | return; |
108 | 1.27M | } |
109 | 1.97M | xht_decision = XH_SUBNORMAL; |
110 | 1.97M | } |
111 | | |
112 | | } // namespace tesseract |