Coverage Report

Created: 2025-07-12 06:44

/src/tesseract/src/wordrec/lm_state.h
Line
Count
Source (jump to first uncovered line)
1
///////////////////////////////////////////////////////////////////////
2
// File:        lm_state.h
3
// Description: Structures and functionality for capturing the state of
4
//              segmentation search guided by the language model.
5
// Author:      Rika Antonova
6
//
7
// (C) Copyright 2012, Google Inc.
8
// Licensed under the Apache License, Version 2.0 (the "License");
9
// you may not use this file except in compliance with the License.
10
// You may obtain a copy of the License at
11
// http://www.apache.org/licenses/LICENSE-2.0
12
// Unless required by applicable law or agreed to in writing, software
13
// distributed under the License is distributed on an "AS IS" BASIS,
14
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
// See the License for the specific language governing permissions and
16
// limitations under the License.
17
//
18
///////////////////////////////////////////////////////////////////////
19
20
#ifndef TESSERACT_WORDREC_LANGUAGE_MODEL_DEFS_H_
21
#define TESSERACT_WORDREC_LANGUAGE_MODEL_DEFS_H_
22
23
#include <tesseract/unichar.h> // for UNICHAR_ID
24
#include "associate.h"         // for AssociateStats
25
#include "dawg.h"              // for DawgPositionVector
26
#include "elst.h"              // for ELIST_ITERATOR, ELISTIZEH, ELIST_LINK
27
#include "lm_consistency.h"    // for LMConsistencyInfo
28
#include "ratngs.h"            // for BLOB_CHOICE, PermuterType
29
#include "stopper.h"           // for DANGERR
30
#include "unicharset.h"        // for UNICHARSET
31
32
namespace tesseract {
33
34
/// Used for expressing various language model flags.
35
using LanguageModelFlagsType = unsigned char;
36
37
/// The following structs are used for storing the state of the language model
38
/// in the segmentation search graph. In this graph the nodes are BLOB_CHOICEs
39
/// and the links are the relationships between the underlying blobs (see
40
/// segsearch.h for a more detailed description).
41
///
42
/// Each of the BLOB_CHOICEs contains LanguageModelState struct, which has
43
/// a list of N best paths (list of ViterbiStateEntry) explored by the Viterbi
44
/// search leading up to and including this BLOB_CHOICE.
45
///
46
/// Each ViterbiStateEntry contains information from various components of the
47
/// language model: dawgs in which the path is found, character ngram model
48
/// probability of the path, script/chartype/font consistency info, state for
49
/// language-specific heuristics (e.g. hyphenated and compound words,
50
/// lower/upper case preferences, etc).
51
///
52
/// Each ViterbiStateEntry also contains the parent pointer, so that the path
53
/// that it represents (WERD_CHOICE) can be constructed by following these
54
/// parent pointers.
55
56
/// Struct for storing additional information used by Dawg language model
57
/// component. It stores the set of active dawgs in which the sequence of
58
/// letters on a path can be found.
59
struct LanguageModelDawgInfo {
60
  LanguageModelDawgInfo(const DawgPositionVector *a, PermuterType pt)
61
0
      : active_dawgs(*a), permuter(pt) {}
62
  DawgPositionVector active_dawgs;
63
  PermuterType permuter;
64
};
65
66
/// Struct for storing additional information used by Ngram language model
67
/// component.
68
struct LanguageModelNgramInfo {
69
  LanguageModelNgramInfo(const char *c, int l, bool p, float nc, float ncc)
70
0
      : context(c)
71
0
      , context_unichar_step_len(l)
72
0
      , pruned(p)
73
0
      , ngram_cost(nc)
74
0
      , ngram_and_classifier_cost(ncc) {}
75
  std::string context; ///< context string
76
  /// Length of the context measured by advancing using UNICHAR::utf8_step()
77
  /// (should be at most the order of the character ngram model used).
78
  int context_unichar_step_len;
79
  /// The paths with pruned set are pruned out from the perspective of the
80
  /// character ngram model. They are explored further because they represent
81
  /// a dictionary match or a top choice. Thus ngram_info is still computed
82
  /// for them in order to calculate the combined cost.
83
  bool pruned;
84
  /// -ln(P_ngram_model(path))
85
  float ngram_cost;
86
  /// -[ ln(P_classifier(path)) + scale_factor * ln(P_ngram_model(path)) ]
87
  float ngram_and_classifier_cost;
88
};
89
90
/// Struct for storing the information about a path in the segmentation graph
91
/// explored by Viterbi search.
92
struct ViterbiStateEntry : public ELIST<ViterbiStateEntry>::LINK {
93
  ViterbiStateEntry(ViterbiStateEntry *pe, BLOB_CHOICE *b, float c, float ol,
94
                    const LMConsistencyInfo &ci, const AssociateStats &as,
95
                    LanguageModelFlagsType tcf, LanguageModelDawgInfo *d, LanguageModelNgramInfo *n,
96
                    const char *debug_uch)
97
22.3M
      : curr_b(b)
98
22.3M
      , parent_vse(pe)
99
22.3M
      , competing_vse(nullptr)
100
22.3M
      , dawg_info(d)
101
22.3M
      , ngram_info(n)
102
22.3M
      , cost(c)
103
22.3M
      , ratings_sum(b->rating())
104
22.3M
      , min_certainty(b->certainty())
105
22.3M
      , adapted(b->IsAdapted())
106
22.3M
      , length(1)
107
22.3M
      , outline_length(ol)
108
22.3M
      , consistency_info(ci)
109
22.3M
      , associate_stats(as)
110
22.3M
      , top_choice_flags(tcf)
111
22.3M
      , updated(true) {
112
22.3M
    debug_str = (debug_uch == nullptr) ? nullptr : new std::string();
113
22.3M
    if (pe != nullptr) {
114
20.7M
      ratings_sum += pe->ratings_sum;
115
20.7M
      if (pe->min_certainty < min_certainty) {
116
11.4M
        min_certainty = pe->min_certainty;
117
11.4M
      }
118
20.7M
      adapted += pe->adapted;
119
20.7M
      length += pe->length;
120
20.7M
      outline_length += pe->outline_length;
121
20.7M
      if (debug_uch != nullptr) {
122
0
        *debug_str += *(pe->debug_str);
123
0
      }
124
20.7M
    }
125
22.3M
    if (debug_str != nullptr && debug_uch != nullptr) {
126
0
      *debug_str += debug_uch;
127
0
    }
128
22.3M
  }
129
22.3M
  ~ViterbiStateEntry() {
130
22.3M
    delete dawg_info;
131
22.3M
    delete ngram_info;
132
22.3M
    delete debug_str;
133
22.3M
  }
134
  /// Comparator function for sorting ViterbiStateEntry_LISTs in
135
  /// non-increasing order of costs.
136
16.0M
  static int Compare(const ViterbiStateEntry *ve1, const ViterbiStateEntry *ve2) {
137
16.0M
    return (ve1->cost < ve2->cost) ? -1 : 1;
138
16.0M
  }
139
0
  inline bool Consistent() const {
140
0
    if (dawg_info != nullptr && consistency_info.NumInconsistentCase() == 0) {
141
0
      return true;
142
0
    }
143
0
    return consistency_info.Consistent();
144
0
  }
145
  /// Returns true if this VSE has an alphanumeric character as its classifier
146
  /// result.
147
66.5M
  bool HasAlnumChoice(const UNICHARSET &unicharset) {
148
66.5M
    if (curr_b == nullptr) {
149
0
      return false;
150
0
    }
151
66.5M
    UNICHAR_ID unichar_id = curr_b->unichar_id();
152
66.5M
    if (unicharset.get_isalpha(unichar_id) || unicharset.get_isdigit(unichar_id)) {
153
35.9M
      return true;
154
35.9M
    }
155
30.6M
    return false;
156
66.5M
  }
157
  void Print(const char *msg) const;
158
159
  /// Pointers to BLOB_CHOICE and parent ViterbiStateEntry (not owned by this).
160
  BLOB_CHOICE *curr_b;
161
  ViterbiStateEntry *parent_vse;
162
  /// Pointer to a case-competing ViterbiStateEntry in the same list that
163
  /// represents a path ending in the same letter of the opposite case.
164
  ViterbiStateEntry *competing_vse;
165
166
  /// Extra information maintained by Dawg language model component
167
  /// (owned by ViterbiStateEntry).
168
  LanguageModelDawgInfo *dawg_info;
169
170
  /// Extra information maintained by Ngram language model component
171
  /// (owned by ViterbiStateEntry).
172
  LanguageModelNgramInfo *ngram_info;
173
174
  /// UTF8 string representing the path corresponding to this vse.
175
  /// Populated only in when language_model_debug_level > 0.
176
  std::string *debug_str;
177
178
  /// The cost is an adjusted ratings sum, that is adjusted by all the language
179
  /// model components that use Viterbi search.
180
  float cost;
181
182
  /// Various information about the characters on the path represented
183
  /// by this ViterbiStateEntry.
184
  float ratings_sum;                  ///< sum of ratings of character on the path
185
  float min_certainty;                ///< minimum certainty on the path
186
  int adapted;                        ///< number of BLOB_CHOICES from adapted templates
187
  int length;                         ///< number of characters on the path
188
  float outline_length;               ///< length of the outline so far
189
  LMConsistencyInfo consistency_info; ///< path consistency info
190
  AssociateStats associate_stats;     ///< character widths/gaps/seams
191
192
  /// Flags for marking the entry as a top choice path with
193
  /// the smallest rating or lower/upper case letters).
194
  LanguageModelFlagsType top_choice_flags;
195
196
  bool updated; ///< set to true if the entry has just been created/updated
197
};
198
199
ELISTIZEH(ViterbiStateEntry)
200
201
/// Struct to store information maintained by various language model components.
202
struct LanguageModelState {
203
  LanguageModelState()
204
814k
      : viterbi_state_entries_prunable_length(0)
205
      , viterbi_state_entries_prunable_max_cost(FLT_MAX)
206
814k
      , viterbi_state_entries_length(0) {}
207
814k
  ~LanguageModelState() = default;
208
209
  /// Clears the viterbi search state back to its initial conditions.
210
  void Clear();
211
212
  void Print(const char *msg);
213
214
  /// Storage for the Viterbi state.
215
  ViterbiStateEntry_LIST viterbi_state_entries;
216
  /// Number and max cost of prunable paths in viterbi_state_entries.
217
  int viterbi_state_entries_prunable_length;
218
  float viterbi_state_entries_prunable_max_cost;
219
  /// Total number of entries in viterbi_state_entries.
220
  int viterbi_state_entries_length;
221
};
222
223
/// Bundle together all the things pertaining to the best choice/state.
224
struct BestChoiceBundle {
225
155k
  explicit BestChoiceBundle(int matrix_dimension) : updated(false), best_vse(nullptr) {
226
155k
    beam.reserve(matrix_dimension);
227
762k
    for (int i = 0; i < matrix_dimension; ++i) {
228
606k
      beam.push_back(new LanguageModelState);
229
606k
    }
230
155k
  }
231
155k
  ~BestChoiceBundle() {
232
814k
    for (auto &state : beam) {
233
814k
      delete state;
234
814k
    }
235
155k
  }
236
237
  /// Flag to indicate whether anything was changed.
238
  bool updated;
239
  /// Places to try to fix the word suggested by ambiguity checking.
240
  DANGERR fixpt;
241
  /// The beam. One LanguageModelState containing a list of ViterbiStateEntry
242
  /// per row in the ratings matrix containing all VSEs whose BLOB_CHOICE is
243
  /// somewhere in the corresponding row.
244
  std::vector<LanguageModelState *> beam;
245
  /// Best ViterbiStateEntry and BLOB_CHOICE.
246
  ViterbiStateEntry *best_vse;
247
};
248
249
} // namespace tesseract
250
251
#endif // TESSERACT_WORDREC_LANGUAGE_MODEL_DEFS_H_