Coverage Report

Created: 2025-11-16 06:50

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/tesseract/src/ccstruct/blamer.h
Line
Count
Source
1
///////////////////////////////////////////////////////////////////////
2
// File:        blamer.h
3
// Description: Module allowing precise error causes to be allocated.
4
// Author:      Rike Antonova
5
// Refactored:  Ray Smith
6
//
7
// (C) Copyright 2013, Google Inc.
8
// Licensed under the Apache License, Version 2.0 (the "License");
9
// you may not use this file except in compliance with the License.
10
// You may obtain a copy of the License at
11
// http://www.apache.org/licenses/LICENSE-2.0
12
// Unless required by applicable law or agreed to in writing, software
13
// distributed under the License is distributed on an "AS IS" BASIS,
14
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
// See the License for the specific language governing permissions and
16
// limitations under the License.
17
//
18
///////////////////////////////////////////////////////////////////////
19
20
#ifndef TESSERACT_CCSTRUCT_BLAMER_H_
21
#define TESSERACT_CCSTRUCT_BLAMER_H_
22
23
#ifdef HAVE_CONFIG_H
24
#  include "config_auto.h" // DISABLED_LEGACY_ENGINE
25
#endif
26
#include "boxword.h" // for BoxWord
27
#ifndef DISABLED_LEGACY_ENGINE
28
#  include "params_training_featdef.h" // for ParamsTrainingBundle, ParamsTra...
29
#endif                                 //  ndef DISABLED_LEGACY_ENGINE
30
#include "ratngs.h"                    // for BLOB_CHOICE_LIST (ptr only)
31
#include "rect.h"                      // for TBOX
32
#include "tprintf.h"                   // for tprintf
33
34
#include <tesseract/unichar.h> // for UNICHAR_ID
35
36
#include <cstdint> // for int16_t
37
#include <cstring> // for memcpy
38
#include <vector>  // for std::vector
39
40
namespace tesseract {
41
42
class DENORM;
43
class MATRIX;
44
class UNICHARSET;
45
class WERD_RES;
46
47
struct MATRIX_COORD;
48
struct TWERD;
49
50
class LMPainPoints;
51
52
static const int16_t kBlamerBoxTolerance = 5;
53
54
// Enum for expressing the source of error.
55
// Note: Please update kIncorrectResultReasonNames when modifying this enum.
56
enum IncorrectResultReason {
57
  // The text recorded in best choice == truth text
58
  IRR_CORRECT,
59
  // Either: Top choice is incorrect and is a dictionary word (language model
60
  // is unlikely to help correct such errors, so blame the classifier).
61
  // Or: the correct unichar was not included in shortlist produced by the
62
  // classifier at all.
63
  IRR_CLASSIFIER,
64
  // Chopper have not found one or more splits that correspond to the correct
65
  // character bounding boxes recorded in BlamerBundle::truth_word.
66
  IRR_CHOPPER,
67
  // Classifier did include correct unichars for each blob in the correct
68
  // segmentation, however its rating could have been too bad to allow the
69
  // language model to pull out the correct choice. On the other hand the
70
  // strength of the language model might have been too weak to favor the
71
  // correct answer, this we call this case a classifier-language model
72
  // tradeoff error.
73
  IRR_CLASS_LM_TRADEOFF,
74
  // Page layout failed to produce the correct bounding box. Blame page layout
75
  // if the truth was not found for the word, which implies that the bounding
76
  // box of the word was incorrect (no truth word had a similar bounding box).
77
  IRR_PAGE_LAYOUT,
78
  // SegSearch heuristic prevented one or more blobs from the correct
79
  // segmentation state to be classified (e.g. the blob was too wide).
80
  IRR_SEGSEARCH_HEUR,
81
  // The correct segmentaiton state was not explored because of poor SegSearch
82
  // pain point prioritization. We blame SegSearch pain point prioritization
83
  // if the best rating of a choice constructed from correct segmentation is
84
  // better than that of the best choice (i.e. if we got to explore the correct
85
  // segmentation state, language model would have picked the correct choice).
86
  IRR_SEGSEARCH_PP,
87
  // Same as IRR_CLASS_LM_TRADEOFF, but used when we only run chopper on a word,
88
  // and thus use the old language model (permuters).
89
  // TODO(antonova): integrate the new language mode with chopper
90
  IRR_CLASS_OLD_LM_TRADEOFF,
91
  // If there is an incorrect adaptive template match with a better score than
92
  // a correct one (either pre-trained or adapted), mark this as adaption error.
93
  IRR_ADAPTION,
94
  // split_and_recog_word() failed to find a suitable split in truth.
95
  IRR_NO_TRUTH_SPLIT,
96
  // Truth is not available for this word (e.g. when words in corrected content
97
  // file are turned into ~~~~ because an appropriate alignment was not found.
98
  IRR_NO_TRUTH,
99
  // The text recorded in best choice != truth text, but none of the above
100
  // reasons are set.
101
  IRR_UNKNOWN,
102
103
  IRR_NUM_REASONS
104
};
105
106
// Blamer-related information to determine the source of errors.
107
struct BlamerBundle {
108
  static const char *IncorrectReasonName(IncorrectResultReason irr);
109
  BlamerBundle()
110
0
      : truth_has_char_boxes_(false)
111
0
      , incorrect_result_reason_(IRR_CORRECT)
112
0
      , lattice_data_(nullptr) {
113
0
    ClearResults();
114
0
  }
115
0
  BlamerBundle(const BlamerBundle &other) {
116
0
    this->CopyTruth(other);
117
0
    this->CopyResults(other);
118
0
  }
119
0
  ~BlamerBundle() {
120
0
    delete[] lattice_data_;
121
0
  }
122
123
  // Accessors.
124
0
  std::string TruthString() const {
125
0
    std::string truth_str;
126
0
    for (auto &text : truth_text_) {
127
0
      truth_str += text;
128
0
    }
129
0
    return truth_str;
130
0
  }
131
0
  IncorrectResultReason incorrect_result_reason() const {
132
0
    return incorrect_result_reason_;
133
0
  }
134
0
  bool NoTruth() const {
135
0
    return incorrect_result_reason_ == IRR_NO_TRUTH || incorrect_result_reason_ == IRR_PAGE_LAYOUT;
136
0
  }
137
0
  bool HasDebugInfo() const {
138
0
    return debug_.length() > 0 || misadaption_debug_.length() > 0;
139
0
  }
140
0
  const std::string &debug() const {
141
0
    return debug_;
142
0
  }
143
0
  const std::string &misadaption_debug() const {
144
0
    return misadaption_debug_;
145
0
  }
146
0
  void UpdateBestRating(float rating) {
147
0
    if (rating < best_correctly_segmented_rating_) {
148
0
      best_correctly_segmented_rating_ = rating;
149
0
    }
150
0
  }
151
0
  int correct_segmentation_length() const {
152
0
    return correct_segmentation_cols_.size();
153
0
  }
154
  // Returns true if the given ratings matrix col,row position is included
155
  // in the correct segmentation path at the given index.
156
0
  bool MatrixPositionCorrect(int index, const MATRIX_COORD &coord) {
157
0
    return correct_segmentation_cols_[index] == coord.col &&
158
0
           correct_segmentation_rows_[index] == coord.row;
159
0
  }
160
0
  void set_best_choice_is_dict_and_top_choice(bool value) {
161
0
    best_choice_is_dict_and_top_choice_ = value;
162
0
  }
163
0
  const char *lattice_data() const {
164
0
    return lattice_data_;
165
0
  }
166
0
  int lattice_size() const {
167
0
    return lattice_size_; // size of lattice_data in bytes
168
0
  }
169
0
  void set_lattice_data(const char *data, int size) {
170
0
    lattice_size_ = size;
171
0
    delete[] lattice_data_;
172
0
    lattice_data_ = new char[lattice_size_];
173
0
    memcpy(lattice_data_, data, lattice_size_);
174
0
  }
175
#ifndef DISABLED_LEGACY_ENGINE
176
0
  const tesseract::ParamsTrainingBundle &params_training_bundle() const {
177
0
    return params_training_bundle_;
178
0
  }
179
  // Adds a new ParamsTrainingHypothesis to the current hypothesis list.
180
0
  void AddHypothesis(const tesseract::ParamsTrainingHypothesis &hypo) {
181
0
    params_training_bundle_.AddHypothesis(hypo);
182
0
  }
183
#endif // ndef DISABLED_LEGACY_ENGINE
184
185
  // Functions to setup the blamer.
186
  // Whole word string, whole word bounding box.
187
  void SetWordTruth(const UNICHARSET &unicharset, const char *truth_str, const TBOX &word_box);
188
  // Single "character" string, "character" bounding box.
189
  // May be called multiple times to indicate the characters in a word.
190
  void SetSymbolTruth(const UNICHARSET &unicharset, const char *char_str, const TBOX &char_box);
191
  // Marks that there is something wrong with the truth text, like it contains
192
  // reject characters.
193
  void SetRejectedTruth();
194
195
  // Returns true if the provided word_choice is correct.
196
  bool ChoiceIsCorrect(const WERD_CHOICE *word_choice) const;
197
198
0
  void ClearResults() {
199
0
    norm_truth_word_.DeleteAllBoxes();
200
0
    norm_box_tolerance_ = 0;
201
0
    if (!NoTruth()) {
202
0
      incorrect_result_reason_ = IRR_CORRECT;
203
0
    }
204
0
    debug_ = "";
205
0
    segsearch_is_looking_for_blame_ = false;
206
0
    best_correctly_segmented_rating_ = WERD_CHOICE::kBadRating;
207
0
    correct_segmentation_cols_.clear();
208
0
    correct_segmentation_rows_.clear();
209
0
    best_choice_is_dict_and_top_choice_ = false;
210
0
    delete[] lattice_data_;
211
0
    lattice_data_ = nullptr;
212
0
    lattice_size_ = 0;
213
0
  }
214
0
  void CopyTruth(const BlamerBundle &other) {
215
0
    truth_has_char_boxes_ = other.truth_has_char_boxes_;
216
0
    truth_word_ = other.truth_word_;
217
0
    truth_text_ = other.truth_text_;
218
0
    incorrect_result_reason_ = (other.NoTruth() ? other.incorrect_result_reason_ : IRR_CORRECT);
219
0
  }
220
0
  void CopyResults(const BlamerBundle &other) {
221
0
    norm_truth_word_ = other.norm_truth_word_;
222
0
    norm_box_tolerance_ = other.norm_box_tolerance_;
223
0
    incorrect_result_reason_ = other.incorrect_result_reason_;
224
0
    segsearch_is_looking_for_blame_ = other.segsearch_is_looking_for_blame_;
225
0
    best_correctly_segmented_rating_ = other.best_correctly_segmented_rating_;
226
0
    correct_segmentation_cols_ = other.correct_segmentation_cols_;
227
0
    correct_segmentation_rows_ = other.correct_segmentation_rows_;
228
0
    best_choice_is_dict_and_top_choice_ = other.best_choice_is_dict_and_top_choice_;
229
0
    if (other.lattice_data_ != nullptr) {
230
0
      lattice_data_ = new char[other.lattice_size_];
231
0
      memcpy(lattice_data_, other.lattice_data_, other.lattice_size_);
232
0
      lattice_size_ = other.lattice_size_;
233
0
    } else {
234
0
      lattice_data_ = nullptr;
235
0
    }
236
0
  }
237
  const char *IncorrectReason() const;
238
239
  // Appends choice and truth details to the given debug string.
240
  void FillDebugString(const std::string &msg, const WERD_CHOICE *choice, std::string &debug);
241
242
  // Sets up the norm_truth_word from truth_word using the given DENORM.
243
  void SetupNormTruthWord(const DENORM &denorm);
244
245
  // Splits *this into two pieces in bundle1 and bundle2 (preallocated, empty
246
  // bundles) where the right edge/ of the left-hand word is word1_right,
247
  // and the left edge of the right-hand word is word2_left.
248
  void SplitBundle(int word1_right, int word2_left, bool debug, BlamerBundle *bundle1,
249
                   BlamerBundle *bundle2) const;
250
  // "Joins" the blames from bundle1 and bundle2 into *this.
251
  void JoinBlames(const BlamerBundle &bundle1, const BlamerBundle &bundle2, bool debug);
252
253
  // If a blob with the same bounding box as one of the truth character
254
  // bounding boxes is not classified as the corresponding truth character
255
  // blames character classifier for incorrect answer.
256
  void BlameClassifier(const UNICHARSET &unicharset, const TBOX &blob_box,
257
                       const BLOB_CHOICE_LIST &choices, bool debug);
258
259
  // Checks whether chops were made at all the character bounding box
260
  // boundaries in word->truth_word. If not - blames the chopper for an
261
  // incorrect answer.
262
  void SetChopperBlame(const WERD_RES *word, bool debug);
263
  // Blames the classifier or the language model if, after running only the
264
  // chopper, best_choice is incorrect and no blame has been yet set.
265
  // Blames the classifier if best_choice is classifier's top choice and is a
266
  // dictionary word (i.e. language model could not have helped).
267
  // Otherwise, blames the language model (formerly permuter word adjustment).
268
  void BlameClassifierOrLangModel(const WERD_RES *word, const UNICHARSET &unicharset,
269
                                  bool valid_permuter, bool debug);
270
  // Sets up the correct_segmentation_* to mark the correct bounding boxes.
271
  void SetupCorrectSegmentation(const TWERD *word, bool debug);
272
273
  // Returns true if a guided segmentation search is needed.
274
  bool GuidedSegsearchNeeded(const WERD_CHOICE *best_choice) const;
275
  // Setup ready to guide the segmentation search to the correct segmentation.
276
  void InitForSegSearch(const WERD_CHOICE *best_choice, MATRIX *ratings, UNICHAR_ID wildcard_id,
277
                        bool debug, std::string &debug_str, tesseract::LMPainPoints *pain_points,
278
                        double max_char_wh_ratio, WERD_RES *word_res);
279
  // Returns true if the guided segsearch is in progress.
280
  bool GuidedSegsearchStillGoing() const;
281
  // The segmentation search has ended. Sets the blame appropriately.
282
  void FinishSegSearch(const WERD_CHOICE *best_choice, bool debug, std::string &debug_str);
283
284
  // If the bundle is null or still does not indicate the correct result,
285
  // fix it and use some backup reason for the blame.
286
  static void LastChanceBlame(bool debug, WERD_RES *word);
287
288
  // Sets the misadaption debug if this word is incorrect, as this word is
289
  // being adapted to.
290
  void SetMisAdaptionDebug(const WERD_CHOICE *best_choice, bool debug);
291
292
private:
293
  // Copy assignment operator (currently unused, therefore private).
294
  BlamerBundle &operator=(const BlamerBundle &other) = delete;
295
  void SetBlame(IncorrectResultReason irr, const std::string &msg, const WERD_CHOICE *choice,
296
0
                bool debug) {
297
0
    incorrect_result_reason_ = irr;
298
0
    debug_ = IncorrectReason();
299
0
    debug_ += " to blame: ";
300
0
    FillDebugString(msg, choice, debug_);
301
0
    if (debug) {
302
0
      tprintf("SetBlame(): %s", debug_.c_str());
303
0
    }
304
0
  }
305
306
private:
307
  // Set to true when bounding boxes for individual unichars are recorded.
308
  bool truth_has_char_boxes_;
309
  // Variables used by the segmentation search when looking for the blame.
310
  // Set to true while segmentation search is continued after the usual
311
  // termination condition in order to look for the blame.
312
  bool segsearch_is_looking_for_blame_;
313
  // Set to true if best choice is a dictionary word and
314
  // classifier's top choice.
315
  bool best_choice_is_dict_and_top_choice_;
316
  // Tolerance for bounding box comparisons in normalized space.
317
  int norm_box_tolerance_;
318
  // The true_word (in the original image coordinate space) contains ground
319
  // truth bounding boxes for this WERD_RES.
320
  tesseract::BoxWord truth_word_;
321
  // Same as above, but in normalized coordinates
322
  // (filled in by WERD_RES::SetupForRecognition()).
323
  tesseract::BoxWord norm_truth_word_;
324
  // Contains ground truth unichar for each of the bounding boxes in truth_word.
325
  std::vector<std::string> truth_text_;
326
  // The reason for incorrect OCR result.
327
  IncorrectResultReason incorrect_result_reason_;
328
  // Debug text associated with the blame.
329
  std::string debug_;
330
  // Misadaption debug information (filled in if this word was misadapted to).
331
  std::string misadaption_debug_;
332
  // Vectors populated by SegSearch to indicate column and row indices that
333
  // correspond to blobs with correct bounding boxes.
334
  std::vector<int> correct_segmentation_cols_;
335
  std::vector<int> correct_segmentation_rows_;
336
  // Best rating for correctly segmented path
337
  // (set and used by SegSearch when looking for blame).
338
  float best_correctly_segmented_rating_;
339
  int lattice_size_; // size of lattice_data in bytes
340
  // Serialized segmentation search lattice.
341
  char *lattice_data_;
342
  // Information about hypotheses (paths) explored by the segmentation search.
343
#ifndef DISABLED_LEGACY_ENGINE
344
  tesseract::ParamsTrainingBundle params_training_bundle_;
345
#endif // ndef DISABLED_LEGACY_ENGINE
346
};
347
348
} // namespace tesseract
349
350
#endif // TESSERACT_CCSTRUCT_BLAMER_H_