/src/tesseract/src/wordrec/lm_state.h
Line | Count | Source (jump to first uncovered line) |
1 | | /////////////////////////////////////////////////////////////////////// |
2 | | // File: lm_state.h |
3 | | // Description: Structures and functionality for capturing the state of |
4 | | // segmentation search guided by the language model. |
5 | | // Author: Rika Antonova |
6 | | // |
7 | | // (C) Copyright 2012, Google Inc. |
8 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
9 | | // you may not use this file except in compliance with the License. |
10 | | // You may obtain a copy of the License at |
11 | | // http://www.apache.org/licenses/LICENSE-2.0 |
12 | | // Unless required by applicable law or agreed to in writing, software |
13 | | // distributed under the License is distributed on an "AS IS" BASIS, |
14 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
15 | | // See the License for the specific language governing permissions and |
16 | | // limitations under the License. |
17 | | // |
18 | | /////////////////////////////////////////////////////////////////////// |
19 | | |
20 | | #ifndef TESSERACT_WORDREC_LANGUAGE_MODEL_DEFS_H_ |
21 | | #define TESSERACT_WORDREC_LANGUAGE_MODEL_DEFS_H_ |
22 | | |
23 | | #include <tesseract/unichar.h> // for UNICHAR_ID |
24 | | #include "associate.h" // for AssociateStats |
25 | | #include "dawg.h" // for DawgPositionVector |
26 | | #include "elst.h" // for ELIST_ITERATOR, ELISTIZEH, ELIST_LINK |
27 | | #include "lm_consistency.h" // for LMConsistencyInfo |
28 | | #include "ratngs.h" // for BLOB_CHOICE, PermuterType |
29 | | #include "stopper.h" // for DANGERR |
30 | | #include "unicharset.h" // for UNICHARSET |
31 | | |
32 | | namespace tesseract { |
33 | | |
34 | | /// Used for expressing various language model flags. |
35 | | using LanguageModelFlagsType = unsigned char; |
36 | | |
37 | | /// The following structs are used for storing the state of the language model |
38 | | /// in the segmentation search graph. In this graph the nodes are BLOB_CHOICEs |
39 | | /// and the links are the relationships between the underlying blobs (see |
40 | | /// segsearch.h for a more detailed description). |
41 | | /// |
42 | | /// Each of the BLOB_CHOICEs contains LanguageModelState struct, which has |
43 | | /// a list of N best paths (list of ViterbiStateEntry) explored by the Viterbi |
44 | | /// search leading up to and including this BLOB_CHOICE. |
45 | | /// |
46 | | /// Each ViterbiStateEntry contains information from various components of the |
47 | | /// language model: dawgs in which the path is found, character ngram model |
48 | | /// probability of the path, script/chartype/font consistency info, state for |
49 | | /// language-specific heuristics (e.g. hyphenated and compound words, |
50 | | /// lower/upper case preferences, etc). |
51 | | /// |
52 | | /// Each ViterbiStateEntry also contains the parent pointer, so that the path |
53 | | /// that it represents (WERD_CHOICE) can be constructed by following these |
54 | | /// parent pointers. |
55 | | |
56 | | /// Struct for storing additional information used by Dawg language model |
57 | | /// component. It stores the set of active dawgs in which the sequence of |
58 | | /// letters on a path can be found. |
59 | | struct LanguageModelDawgInfo { |
60 | | LanguageModelDawgInfo(const DawgPositionVector *a, PermuterType pt) |
61 | 0 | : active_dawgs(*a), permuter(pt) {} |
62 | | DawgPositionVector active_dawgs; |
63 | | PermuterType permuter; |
64 | | }; |
65 | | |
66 | | /// Struct for storing additional information used by Ngram language model |
67 | | /// component. |
68 | | struct LanguageModelNgramInfo { |
69 | | LanguageModelNgramInfo(const char *c, int l, bool p, float nc, float ncc) |
70 | 0 | : context(c) |
71 | 0 | , context_unichar_step_len(l) |
72 | 0 | , pruned(p) |
73 | 0 | , ngram_cost(nc) |
74 | 0 | , ngram_and_classifier_cost(ncc) {} |
75 | | std::string context; ///< context string |
76 | | /// Length of the context measured by advancing using UNICHAR::utf8_step() |
77 | | /// (should be at most the order of the character ngram model used). |
78 | | int context_unichar_step_len; |
79 | | /// The paths with pruned set are pruned out from the perspective of the |
80 | | /// character ngram model. They are explored further because they represent |
81 | | /// a dictionary match or a top choice. Thus ngram_info is still computed |
82 | | /// for them in order to calculate the combined cost. |
83 | | bool pruned; |
84 | | /// -ln(P_ngram_model(path)) |
85 | | float ngram_cost; |
86 | | /// -[ ln(P_classifier(path)) + scale_factor * ln(P_ngram_model(path)) ] |
87 | | float ngram_and_classifier_cost; |
88 | | }; |
89 | | |
90 | | /// Struct for storing the information about a path in the segmentation graph |
91 | | /// explored by Viterbi search. |
92 | | struct ViterbiStateEntry : public ELIST<ViterbiStateEntry>::LINK { |
93 | | ViterbiStateEntry(ViterbiStateEntry *pe, BLOB_CHOICE *b, float c, float ol, |
94 | | const LMConsistencyInfo &ci, const AssociateStats &as, |
95 | | LanguageModelFlagsType tcf, LanguageModelDawgInfo *d, LanguageModelNgramInfo *n, |
96 | | const char *debug_uch) |
97 | 22.3M | : curr_b(b) |
98 | 22.3M | , parent_vse(pe) |
99 | 22.3M | , competing_vse(nullptr) |
100 | 22.3M | , dawg_info(d) |
101 | 22.3M | , ngram_info(n) |
102 | 22.3M | , cost(c) |
103 | 22.3M | , ratings_sum(b->rating()) |
104 | 22.3M | , min_certainty(b->certainty()) |
105 | 22.3M | , adapted(b->IsAdapted()) |
106 | 22.3M | , length(1) |
107 | 22.3M | , outline_length(ol) |
108 | 22.3M | , consistency_info(ci) |
109 | 22.3M | , associate_stats(as) |
110 | 22.3M | , top_choice_flags(tcf) |
111 | 22.3M | , updated(true) { |
112 | 22.3M | debug_str = (debug_uch == nullptr) ? nullptr : new std::string(); |
113 | 22.3M | if (pe != nullptr) { |
114 | 20.7M | ratings_sum += pe->ratings_sum; |
115 | 20.7M | if (pe->min_certainty < min_certainty) { |
116 | 11.4M | min_certainty = pe->min_certainty; |
117 | 11.4M | } |
118 | 20.7M | adapted += pe->adapted; |
119 | 20.7M | length += pe->length; |
120 | 20.7M | outline_length += pe->outline_length; |
121 | 20.7M | if (debug_uch != nullptr) { |
122 | 0 | *debug_str += *(pe->debug_str); |
123 | 0 | } |
124 | 20.7M | } |
125 | 22.3M | if (debug_str != nullptr && debug_uch != nullptr) { |
126 | 0 | *debug_str += debug_uch; |
127 | 0 | } |
128 | 22.3M | } |
129 | 22.3M | ~ViterbiStateEntry() { |
130 | 22.3M | delete dawg_info; |
131 | 22.3M | delete ngram_info; |
132 | 22.3M | delete debug_str; |
133 | 22.3M | } |
134 | | /// Comparator function for sorting ViterbiStateEntry_LISTs in |
135 | | /// non-increasing order of costs. |
136 | 16.0M | static int Compare(const ViterbiStateEntry *ve1, const ViterbiStateEntry *ve2) { |
137 | 16.0M | return (ve1->cost < ve2->cost) ? -1 : 1; |
138 | 16.0M | } |
139 | 0 | inline bool Consistent() const { |
140 | 0 | if (dawg_info != nullptr && consistency_info.NumInconsistentCase() == 0) { |
141 | 0 | return true; |
142 | 0 | } |
143 | 0 | return consistency_info.Consistent(); |
144 | 0 | } |
145 | | /// Returns true if this VSE has an alphanumeric character as its classifier |
146 | | /// result. |
147 | 66.5M | bool HasAlnumChoice(const UNICHARSET &unicharset) { |
148 | 66.5M | if (curr_b == nullptr) { |
149 | 0 | return false; |
150 | 0 | } |
151 | 66.5M | UNICHAR_ID unichar_id = curr_b->unichar_id(); |
152 | 66.5M | if (unicharset.get_isalpha(unichar_id) || unicharset.get_isdigit(unichar_id)) { |
153 | 35.9M | return true; |
154 | 35.9M | } |
155 | 30.6M | return false; |
156 | 66.5M | } |
157 | | void Print(const char *msg) const; |
158 | | |
159 | | /// Pointers to BLOB_CHOICE and parent ViterbiStateEntry (not owned by this). |
160 | | BLOB_CHOICE *curr_b; |
161 | | ViterbiStateEntry *parent_vse; |
162 | | /// Pointer to a case-competing ViterbiStateEntry in the same list that |
163 | | /// represents a path ending in the same letter of the opposite case. |
164 | | ViterbiStateEntry *competing_vse; |
165 | | |
166 | | /// Extra information maintained by Dawg language model component |
167 | | /// (owned by ViterbiStateEntry). |
168 | | LanguageModelDawgInfo *dawg_info; |
169 | | |
170 | | /// Extra information maintained by Ngram language model component |
171 | | /// (owned by ViterbiStateEntry). |
172 | | LanguageModelNgramInfo *ngram_info; |
173 | | |
174 | | /// UTF8 string representing the path corresponding to this vse. |
175 | | /// Populated only in when language_model_debug_level > 0. |
176 | | std::string *debug_str; |
177 | | |
178 | | /// The cost is an adjusted ratings sum, that is adjusted by all the language |
179 | | /// model components that use Viterbi search. |
180 | | float cost; |
181 | | |
182 | | /// Various information about the characters on the path represented |
183 | | /// by this ViterbiStateEntry. |
184 | | float ratings_sum; ///< sum of ratings of character on the path |
185 | | float min_certainty; ///< minimum certainty on the path |
186 | | int adapted; ///< number of BLOB_CHOICES from adapted templates |
187 | | int length; ///< number of characters on the path |
188 | | float outline_length; ///< length of the outline so far |
189 | | LMConsistencyInfo consistency_info; ///< path consistency info |
190 | | AssociateStats associate_stats; ///< character widths/gaps/seams |
191 | | |
192 | | /// Flags for marking the entry as a top choice path with |
193 | | /// the smallest rating or lower/upper case letters). |
194 | | LanguageModelFlagsType top_choice_flags; |
195 | | |
196 | | bool updated; ///< set to true if the entry has just been created/updated |
197 | | }; |
198 | | |
199 | | ELISTIZEH(ViterbiStateEntry) |
200 | | |
201 | | /// Struct to store information maintained by various language model components. |
202 | | struct LanguageModelState { |
203 | | LanguageModelState() |
204 | 814k | : viterbi_state_entries_prunable_length(0) |
205 | | , viterbi_state_entries_prunable_max_cost(FLT_MAX) |
206 | 814k | , viterbi_state_entries_length(0) {} |
207 | 814k | ~LanguageModelState() = default; |
208 | | |
209 | | /// Clears the viterbi search state back to its initial conditions. |
210 | | void Clear(); |
211 | | |
212 | | void Print(const char *msg); |
213 | | |
214 | | /// Storage for the Viterbi state. |
215 | | ViterbiStateEntry_LIST viterbi_state_entries; |
216 | | /// Number and max cost of prunable paths in viterbi_state_entries. |
217 | | int viterbi_state_entries_prunable_length; |
218 | | float viterbi_state_entries_prunable_max_cost; |
219 | | /// Total number of entries in viterbi_state_entries. |
220 | | int viterbi_state_entries_length; |
221 | | }; |
222 | | |
223 | | /// Bundle together all the things pertaining to the best choice/state. |
224 | | struct BestChoiceBundle { |
225 | 155k | explicit BestChoiceBundle(int matrix_dimension) : updated(false), best_vse(nullptr) { |
226 | 155k | beam.reserve(matrix_dimension); |
227 | 762k | for (int i = 0; i < matrix_dimension; ++i) { |
228 | 606k | beam.push_back(new LanguageModelState); |
229 | 606k | } |
230 | 155k | } |
231 | 155k | ~BestChoiceBundle() { |
232 | 814k | for (auto &state : beam) { |
233 | 814k | delete state; |
234 | 814k | } |
235 | 155k | } |
236 | | |
237 | | /// Flag to indicate whether anything was changed. |
238 | | bool updated; |
239 | | /// Places to try to fix the word suggested by ambiguity checking. |
240 | | DANGERR fixpt; |
241 | | /// The beam. One LanguageModelState containing a list of ViterbiStateEntry |
242 | | /// per row in the ratings matrix containing all VSEs whose BLOB_CHOICE is |
243 | | /// somewhere in the corresponding row. |
244 | | std::vector<LanguageModelState *> beam; |
245 | | /// Best ViterbiStateEntry and BLOB_CHOICE. |
246 | | ViterbiStateEntry *best_vse; |
247 | | }; |
248 | | |
249 | | } // namespace tesseract |
250 | | |
251 | | #endif // TESSERACT_WORDREC_LANGUAGE_MODEL_DEFS_H_ |