/src/tesseract/src/wordrec/lm_state.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /////////////////////////////////////////////////////////////////////// |
2 | | // File: lm_state.cpp |
3 | | // Description: Structures and functionality for capturing the state of |
4 | | // segmentation search guided by the language model. |
5 | | // Author: Rika Antonova |
6 | | // |
7 | | // (C) Copyright 2012, Google Inc. |
8 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
9 | | // you may not use this file except in compliance with the License. |
10 | | // You may obtain a copy of the License at |
11 | | // http://www.apache.org/licenses/LICENSE-2.0 |
12 | | // Unless required by applicable law or agreed to in writing, software |
13 | | // distributed under the License is distributed on an "AS IS" BASIS, |
14 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
15 | | // See the License for the specific language governing permissions and |
16 | | // limitations under the License. |
17 | | // |
18 | | /////////////////////////////////////////////////////////////////////// |
19 | | |
20 | | #include "lm_state.h" |
21 | | |
22 | | namespace tesseract { |
23 | | |
24 | 0 | void ViterbiStateEntry::Print(const char *msg) const { |
25 | 0 | tprintf("%s ViterbiStateEntry", msg); |
26 | 0 | if (updated) { |
27 | 0 | tprintf("(NEW)"); |
28 | 0 | } |
29 | 0 | if (this->debug_str != nullptr) { |
30 | 0 | tprintf(" str=%s", this->debug_str->c_str()); |
31 | 0 | } |
32 | 0 | tprintf(" with ratings_sum=%.4f length=%d cost=%.6f", this->ratings_sum, this->length, |
33 | 0 | this->cost); |
34 | 0 | if (this->top_choice_flags) { |
35 | 0 | tprintf(" top_choice_flags=0x%x", this->top_choice_flags); |
36 | 0 | } |
37 | 0 | if (!this->Consistent()) { |
38 | 0 | tprintf(" inconsistent=(punc %d case %d chartype %d script %d font %d)", |
39 | 0 | this->consistency_info.NumInconsistentPunc(), |
40 | 0 | this->consistency_info.NumInconsistentCase(), |
41 | 0 | this->consistency_info.NumInconsistentChartype(), |
42 | 0 | this->consistency_info.inconsistent_script, this->consistency_info.inconsistent_font); |
43 | 0 | } |
44 | 0 | if (this->dawg_info) { |
45 | 0 | tprintf(" permuter=%d", this->dawg_info->permuter); |
46 | 0 | } |
47 | 0 | if (this->ngram_info) { |
48 | 0 | tprintf(" ngram_cl_cost=%g context=%s ngram pruned=%d", |
49 | 0 | this->ngram_info->ngram_and_classifier_cost, this->ngram_info->context.c_str(), |
50 | 0 | this->ngram_info->pruned); |
51 | 0 | } |
52 | 0 | if (this->associate_stats.shape_cost > 0.0f) { |
53 | 0 | tprintf(" shape_cost=%g", this->associate_stats.shape_cost); |
54 | 0 | } |
55 | 0 | tprintf(" %s", XHeightConsistencyEnumName[this->consistency_info.xht_decision]); |
56 | |
|
57 | 0 | tprintf("\n"); |
58 | 0 | } |
59 | | |
60 | | /// Clears the viterbi search state back to its initial conditions. |
61 | 0 | void LanguageModelState::Clear() { |
62 | 0 | viterbi_state_entries.clear(); |
63 | 0 | viterbi_state_entries_prunable_length = 0; |
64 | 0 | viterbi_state_entries_prunable_max_cost = FLT_MAX; |
65 | 0 | viterbi_state_entries_length = 0; |
66 | 0 | } |
67 | | |
68 | 0 | void LanguageModelState::Print(const char *msg) { |
69 | 0 | tprintf("%s VSEs (max_cost=%g prn_len=%d tot_len=%d):\n", msg, |
70 | 0 | viterbi_state_entries_prunable_max_cost, viterbi_state_entries_prunable_length, |
71 | 0 | viterbi_state_entries_length); |
72 | 0 | ViterbiStateEntry_IT vit(&viterbi_state_entries); |
73 | 0 | for (vit.mark_cycle_pt(); !vit.cycled_list(); vit.forward()) { |
74 | 0 | vit.data()->Print(""); |
75 | 0 | } |
76 | 0 | } |
77 | | |
78 | | } // namespace tesseract |