/src/tesseract/src/ccstruct/blamer.h

Source
///////////////////////////////////////////////////////////////////////
// File:        blamer.h
// Description: Module allowing precise error causes to be allocated.
// Author:      Rike Antonova
// Refactored:  Ray Smith
//
// (C) Copyright 2013, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////

#ifndef TESSERACT_CCSTRUCT_BLAMER_H_
#define TESSERACT_CCSTRUCT_BLAMER_H_

#ifdef HAVE_CONFIG_H
#  include "config_auto.h" // DISABLED_LEGACY_ENGINE
#endif
#include "boxword.h" // for BoxWord
#ifndef DISABLED_LEGACY_ENGINE
#  include "params_training_featdef.h" // for ParamsTrainingBundle, ParamsTra...
#endif                                 //  ndef DISABLED_LEGACY_ENGINE
#include "ratngs.h"                    // for BLOB_CHOICE_LIST (ptr only)
#include "rect.h"                      // for TBOX
#include "tprintf.h"                   // for tprintf

#include <tesseract/unichar.h> // for UNICHAR_ID

#include <cstdint> // for int16_t
#include <cstring> // for memcpy
#include <vector>  // for std::vector

namespace tesseract {

class DENORM;
class MATRIX;
class UNICHARSET;
class WERD_RES;

struct MATRIX_COORD;
struct TWERD;

class LMPainPoints;

static const int16_t kBlamerBoxTolerance = 5;

// Enum for expressing the source of error.
// Note: Please update kIncorrectResultReasonNames when modifying this enum.
enum IncorrectResultReason {
  // The text recorded in best choice == truth text
  IRR_CORRECT,
  // Either: Top choice is incorrect and is a dictionary word (language model
  // is unlikely to help correct such errors, so blame the classifier).
  // Or: the correct unichar was not included in shortlist produced by the
  // classifier at all.
  IRR_CLASSIFIER,
  // Chopper have not found one or more splits that correspond to the correct
  // character bounding boxes recorded in BlamerBundle::truth_word.
  IRR_CHOPPER,
  // Classifier did include correct unichars for each blob in the correct
  // segmentation, however its rating could have been too bad to allow the
  // language model to pull out the correct choice. On the other hand the
  // strength of the language model might have been too weak to favor the
  // correct answer, this we call this case a classifier-language model
  // tradeoff error.
  IRR_CLASS_LM_TRADEOFF,
  // Page layout failed to produce the correct bounding box. Blame page layout
  // if the truth was not found for the word, which implies that the bounding
  // box of the word was incorrect (no truth word had a similar bounding box).
  IRR_PAGE_LAYOUT,
  // SegSearch heuristic prevented one or more blobs from the correct
  // segmentation state to be classified (e.g. the blob was too wide).
  IRR_SEGSEARCH_HEUR,
  // The correct segmentaiton state was not explored because of poor SegSearch
  // pain point prioritization. We blame SegSearch pain point prioritization
  // if the best rating of a choice constructed from correct segmentation is
  // better than that of the best choice (i.e. if we got to explore the correct
  // segmentation state, language model would have picked the correct choice).
  IRR_SEGSEARCH_PP,
  // Same as IRR_CLASS_LM_TRADEOFF, but used when we only run chopper on a word,
  // and thus use the old language model (permuters).
  // TODO(antonova): integrate the new language mode with chopper
  IRR_CLASS_OLD_LM_TRADEOFF,
  // If there is an incorrect adaptive template match with a better score than
  // a correct one (either pre-trained or adapted), mark this as adaption error.
  IRR_ADAPTION,
  // split_and_recog_word() failed to find a suitable split in truth.
  IRR_NO_TRUTH_SPLIT,
  // Truth is not available for this word (e.g. when words in corrected content
  // file are turned into ~~~~ because an appropriate alignment was not found.
  IRR_NO_TRUTH,
  // The text recorded in best choice != truth text, but none of the above
  // reasons are set.
  IRR_UNKNOWN,

  IRR_NUM_REASONS
};

// Blamer-related information to determine the source of errors.
struct BlamerBundle {
  static const char *IncorrectReasonName(IncorrectResultReason irr);
  BlamerBundle()
      : truth_has_char_boxes_(false)
      , incorrect_result_reason_(IRR_CORRECT)
      , lattice_data_(nullptr) {
    ClearResults();
  }
  BlamerBundle(const BlamerBundle &other) {
    this->CopyTruth(other);
    this->CopyResults(other);
  }
  ~BlamerBundle() {
    delete[] lattice_data_;
  }

  // Accessors.
  std::string TruthString() const {
    std::string truth_str;
    for (auto &text : truth_text_) {
      truth_str += text;
    }
    return truth_str;
  }
  IncorrectResultReason incorrect_result_reason() const {
    return incorrect_result_reason_;
  }
  bool NoTruth() const {
    return incorrect_result_reason_ == IRR_NO_TRUTH || incorrect_result_reason_ == IRR_PAGE_LAYOUT;
  }
  bool HasDebugInfo() const {
    return debug_.length() > 0 || misadaption_debug_.length() > 0;
  }
  const std::string &debug() const {
    return debug_;
  }
  const std::string &misadaption_debug() const {
    return misadaption_debug_;
  }
  void UpdateBestRating(float rating) {
    if (rating < best_correctly_segmented_rating_) {
      best_correctly_segmented_rating_ = rating;
    }
  }
  int correct_segmentation_length() const {
    return correct_segmentation_cols_.size();
  }
  // Returns true if the given ratings matrix col,row position is included
  // in the correct segmentation path at the given index.
  bool MatrixPositionCorrect(int index, const MATRIX_COORD &coord) {
    return correct_segmentation_cols_[index] == coord.col &&
           correct_segmentation_rows_[index] == coord.row;
  }
  void set_best_choice_is_dict_and_top_choice(bool value) {
    best_choice_is_dict_and_top_choice_ = value;
  }
  const char *lattice_data() const {
    return lattice_data_;
  }
  int lattice_size() const {
    return lattice_size_; // size of lattice_data in bytes
  }
  void set_lattice_data(const char *data, int size) {
    lattice_size_ = size;
    delete[] lattice_data_;
    lattice_data_ = new char[lattice_size_];
    memcpy(lattice_data_, data, lattice_size_);
  }
#ifndef DISABLED_LEGACY_ENGINE
  const tesseract::ParamsTrainingBundle &params_training_bundle() const {
    return params_training_bundle_;
  }
  // Adds a new ParamsTrainingHypothesis to the current hypothesis list.
  void AddHypothesis(const tesseract::ParamsTrainingHypothesis &hypo) {
    params_training_bundle_.AddHypothesis(hypo);
  }
#endif // ndef DISABLED_LEGACY_ENGINE

  // Functions to setup the blamer.
  // Whole word string, whole word bounding box.
  void SetWordTruth(const UNICHARSET &unicharset, const char *truth_str, const TBOX &word_box);
  // Single "character" string, "character" bounding box.
  // May be called multiple times to indicate the characters in a word.
  void SetSymbolTruth(const UNICHARSET &unicharset, const char *char_str, const TBOX &char_box);
  // Marks that there is something wrong with the truth text, like it contains
  // reject characters.
  void SetRejectedTruth();

  // Returns true if the provided word_choice is correct.
  bool ChoiceIsCorrect(const WERD_CHOICE *word_choice) const;

  void ClearResults() {
    norm_truth_word_.DeleteAllBoxes();
    norm_box_tolerance_ = 0;
    if (!NoTruth()) {
      incorrect_result_reason_ = IRR_CORRECT;
    }
    debug_ = "";
    segsearch_is_looking_for_blame_ = false;
    best_correctly_segmented_rating_ = WERD_CHOICE::kBadRating;
    correct_segmentation_cols_.clear();
    correct_segmentation_rows_.clear();
    best_choice_is_dict_and_top_choice_ = false;
    delete[] lattice_data_;
    lattice_data_ = nullptr;
    lattice_size_ = 0;
  }
  void CopyTruth(const BlamerBundle &other) {
    truth_has_char_boxes_ = other.truth_has_char_boxes_;
    truth_word_ = other.truth_word_;
    truth_text_ = other.truth_text_;
    incorrect_result_reason_ = (other.NoTruth() ? other.incorrect_result_reason_ : IRR_CORRECT);
  }
  void CopyResults(const BlamerBundle &other) {
    norm_truth_word_ = other.norm_truth_word_;
    norm_box_tolerance_ = other.norm_box_tolerance_;
    incorrect_result_reason_ = other.incorrect_result_reason_;
    segsearch_is_looking_for_blame_ = other.segsearch_is_looking_for_blame_;
    best_correctly_segmented_rating_ = other.best_correctly_segmented_rating_;
    correct_segmentation_cols_ = other.correct_segmentation_cols_;
    correct_segmentation_rows_ = other.correct_segmentation_rows_;
    best_choice_is_dict_and_top_choice_ = other.best_choice_is_dict_and_top_choice_;
    if (other.lattice_data_ != nullptr) {
      lattice_data_ = new char[other.lattice_size_];
      memcpy(lattice_data_, other.lattice_data_, other.lattice_size_);
      lattice_size_ = other.lattice_size_;
    } else {
      lattice_data_ = nullptr;
    }
  }
  const char *IncorrectReason() const;

  // Appends choice and truth details to the given debug string.
  void FillDebugString(const std::string &msg, const WERD_CHOICE *choice, std::string &debug);

  // Sets up the norm_truth_word from truth_word using the given DENORM.
  void SetupNormTruthWord(const DENORM &denorm);

  // Splits *this into two pieces in bundle1 and bundle2 (preallocated, empty
  // bundles) where the right edge/ of the left-hand word is word1_right,
  // and the left edge of the right-hand word is word2_left.
  void SplitBundle(int word1_right, int word2_left, bool debug, BlamerBundle *bundle1,
                   BlamerBundle *bundle2) const;
  // "Joins" the blames from bundle1 and bundle2 into *this.
  void JoinBlames(const BlamerBundle &bundle1, const BlamerBundle &bundle2, bool debug);

  // If a blob with the same bounding box as one of the truth character
  // bounding boxes is not classified as the corresponding truth character
  // blames character classifier for incorrect answer.
  void BlameClassifier(const UNICHARSET &unicharset, const TBOX &blob_box,
                       const BLOB_CHOICE_LIST &choices, bool debug);

  // Checks whether chops were made at all the character bounding box
  // boundaries in word->truth_word. If not - blames the chopper for an
  // incorrect answer.
  void SetChopperBlame(const WERD_RES *word, bool debug);
  // Blames the classifier or the language model if, after running only the
  // chopper, best_choice is incorrect and no blame has been yet set.
  // Blames the classifier if best_choice is classifier's top choice and is a
  // dictionary word (i.e. language model could not have helped).
  // Otherwise, blames the language model (formerly permuter word adjustment).
  void BlameClassifierOrLangModel(const WERD_RES *word, const UNICHARSET &unicharset,
                                  bool valid_permuter, bool debug);
  // Sets up the correct_segmentation_* to mark the correct bounding boxes.
  void SetupCorrectSegmentation(const TWERD *word, bool debug);

  // Returns true if a guided segmentation search is needed.
  bool GuidedSegsearchNeeded(const WERD_CHOICE *best_choice) const;
  // Setup ready to guide the segmentation search to the correct segmentation.
  void InitForSegSearch(const WERD_CHOICE *best_choice, MATRIX *ratings, UNICHAR_ID wildcard_id,
                        bool debug, std::string &debug_str, tesseract::LMPainPoints *pain_points,
                        double max_char_wh_ratio, WERD_RES *word_res);
  // Returns true if the guided segsearch is in progress.
  bool GuidedSegsearchStillGoing() const;
  // The segmentation search has ended. Sets the blame appropriately.
  void FinishSegSearch(const WERD_CHOICE *best_choice, bool debug, std::string &debug_str);

  // If the bundle is null or still does not indicate the correct result,
  // fix it and use some backup reason for the blame.
  static void LastChanceBlame(bool debug, WERD_RES *word);

  // Sets the misadaption debug if this word is incorrect, as this word is
  // being adapted to.
  void SetMisAdaptionDebug(const WERD_CHOICE *best_choice, bool debug);

private:
  // Copy assignment operator (currently unused, therefore private).
  BlamerBundle &operator=(const BlamerBundle &other) = delete;
  void SetBlame(IncorrectResultReason irr, const std::string &msg, const WERD_CHOICE *choice,
                bool debug) {
    incorrect_result_reason_ = irr;
    debug_ = IncorrectReason();
    debug_ += " to blame: ";
    FillDebugString(msg, choice, debug_);
    if (debug) {
      tprintf("SetBlame(): %s", debug_.c_str());
    }
  }

private:
  // Set to true when bounding boxes for individual unichars are recorded.
  bool truth_has_char_boxes_;
  // Variables used by the segmentation search when looking for the blame.
  // Set to true while segmentation search is continued after the usual
  // termination condition in order to look for the blame.
  bool segsearch_is_looking_for_blame_;
  // Set to true if best choice is a dictionary word and
  // classifier's top choice.
  bool best_choice_is_dict_and_top_choice_;
  // Tolerance for bounding box comparisons in normalized space.
  int norm_box_tolerance_;
  // The true_word (in the original image coordinate space) contains ground
  // truth bounding boxes for this WERD_RES.
  tesseract::BoxWord truth_word_;
  // Same as above, but in normalized coordinates
  // (filled in by WERD_RES::SetupForRecognition()).
  tesseract::BoxWord norm_truth_word_;
  // Contains ground truth unichar for each of the bounding boxes in truth_word.
  std::vector<std::string> truth_text_;
  // The reason for incorrect OCR result.
  IncorrectResultReason incorrect_result_reason_;
  // Debug text associated with the blame.
  std::string debug_;
  // Misadaption debug information (filled in if this word was misadapted to).
  std::string misadaption_debug_;
  // Vectors populated by SegSearch to indicate column and row indices that
  // correspond to blobs with correct bounding boxes.
  std::vector<int> correct_segmentation_cols_;
  std::vector<int> correct_segmentation_rows_;
  // Best rating for correctly segmented path
  // (set and used by SegSearch when looking for blame).
  float best_correctly_segmented_rating_;
  int lattice_size_; // size of lattice_data in bytes
  // Serialized segmentation search lattice.
  char *lattice_data_;
  // Information about hypotheses (paths) explored by the segmentation search.
#ifndef DISABLED_LEGACY_ENGINE
  tesseract::ParamsTrainingBundle params_training_bundle_;
#endif // ndef DISABLED_LEGACY_ENGINE
};

} // namespace tesseract

#endif // TESSERACT_CCSTRUCT_BLAMER_H_

Coverage Report

Created: 2025-11-16 06:50

Line	Count	Source
1		///////////////////////////////////////////////////////////////////////
2		// File: blamer.h
3		// Description: Module allowing precise error causes to be allocated.
4		// Author: Rike Antonova
5		// Refactored: Ray Smith
6		//
7		// (C) Copyright 2013, Google Inc.
8		// Licensed under the Apache License, Version 2.0 (the "License");
9		// you may not use this file except in compliance with the License.
10		// You may obtain a copy of the License at
11		// http://www.apache.org/licenses/LICENSE-2.0
12		// Unless required by applicable law or agreed to in writing, software
13		// distributed under the License is distributed on an "AS IS" BASIS,
14		// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15		// See the License for the specific language governing permissions and
16		// limitations under the License.
17		//
18		///////////////////////////////////////////////////////////////////////
19
20		#ifndef TESSERACT_CCSTRUCT_BLAMER_H_
21		#define TESSERACT_CCSTRUCT_BLAMER_H_
22
23		#ifdef HAVE_CONFIG_H
24		# include "config_auto.h" // DISABLED_LEGACY_ENGINE
25		#endif
26		#include "boxword.h" // for BoxWord
27		#ifndef DISABLED_LEGACY_ENGINE
28		# include "params_training_featdef.h" // for ParamsTrainingBundle, ParamsTra...
29		#endif // ndef DISABLED_LEGACY_ENGINE
30		#include "ratngs.h" // for BLOB_CHOICE_LIST (ptr only)
31		#include "rect.h" // for TBOX
32		#include "tprintf.h" // for tprintf
33
34		#include <tesseract/unichar.h> // for UNICHAR_ID
35
36		#include <cstdint> // for int16_t
37		#include <cstring> // for memcpy
38		#include <vector> // for std::vector
39
40		namespace tesseract {
41
42		class DENORM;
43		class MATRIX;
44		class UNICHARSET;
45		class WERD_RES;
46
47		struct MATRIX_COORD;
48		struct TWERD;
49
50		class LMPainPoints;
51
52		static const int16_t kBlamerBoxTolerance = 5;
53
54		// Enum for expressing the source of error.
55		// Note: Please update kIncorrectResultReasonNames when modifying this enum.
56		enum IncorrectResultReason {
57		// The text recorded in best choice == truth text
58		IRR_CORRECT,
59		// Either: Top choice is incorrect and is a dictionary word (language model
60		// is unlikely to help correct such errors, so blame the classifier).
61		// Or: the correct unichar was not included in shortlist produced by the
62		// classifier at all.
63		IRR_CLASSIFIER,
64		// Chopper have not found one or more splits that correspond to the correct
65		// character bounding boxes recorded in BlamerBundle::truth_word.
66		IRR_CHOPPER,
67		// Classifier did include correct unichars for each blob in the correct
68		// segmentation, however its rating could have been too bad to allow the
69		// language model to pull out the correct choice. On the other hand the
70		// strength of the language model might have been too weak to favor the
71		// correct answer, this we call this case a classifier-language model
72		// tradeoff error.
73		IRR_CLASS_LM_TRADEOFF,
74		// Page layout failed to produce the correct bounding box. Blame page layout
75		// if the truth was not found for the word, which implies that the bounding
76		// box of the word was incorrect (no truth word had a similar bounding box).
77		IRR_PAGE_LAYOUT,
78		// SegSearch heuristic prevented one or more blobs from the correct
79		// segmentation state to be classified (e.g. the blob was too wide).
80		IRR_SEGSEARCH_HEUR,
81		// The correct segmentaiton state was not explored because of poor SegSearch
82		// pain point prioritization. We blame SegSearch pain point prioritization
83		// if the best rating of a choice constructed from correct segmentation is
84		// better than that of the best choice (i.e. if we got to explore the correct
85		// segmentation state, language model would have picked the correct choice).
86		IRR_SEGSEARCH_PP,
87		// Same as IRR_CLASS_LM_TRADEOFF, but used when we only run chopper on a word,
88		// and thus use the old language model (permuters).
89		// TODO(antonova): integrate the new language mode with chopper
90		IRR_CLASS_OLD_LM_TRADEOFF,
91		// If there is an incorrect adaptive template match with a better score than
92		// a correct one (either pre-trained or adapted), mark this as adaption error.
93		IRR_ADAPTION,
94		// split_and_recog_word() failed to find a suitable split in truth.
95		IRR_NO_TRUTH_SPLIT,
96		// Truth is not available for this word (e.g. when words in corrected content
97		// file are turned into ~~~~ because an appropriate alignment was not found.
98		IRR_NO_TRUTH,
99		// The text recorded in best choice != truth text, but none of the above
100		// reasons are set.
101		IRR_UNKNOWN,
102
103		IRR_NUM_REASONS
104		};
105
106		// Blamer-related information to determine the source of errors.
107		struct BlamerBundle {
108		static const char *IncorrectReasonName(IncorrectResultReason irr);
109		BlamerBundle()
110	0	: truth_has_char_boxes_(false)
111	0	, incorrect_result_reason_(IRR_CORRECT)
112	0	, lattice_data_(nullptr) {
113	0	ClearResults();
114	0	}
115	0	BlamerBundle(const BlamerBundle &other) {
116	0	this->CopyTruth(other);
117	0	this->CopyResults(other);
118	0	}
119	0	~BlamerBundle() {
120	0	delete[] lattice_data_;
121	0	}
122
123		// Accessors.
124	0	std::string TruthString() const {
125	0	std::string truth_str;
126	0	for (auto &text : truth_text_) {
127	0	truth_str += text;
128	0	}
129	0	return truth_str;
130	0	}
131	0	IncorrectResultReason incorrect_result_reason() const {
132	0	return incorrect_result_reason_;
133	0	}
134	0	bool NoTruth() const {
135	0	return incorrect_result_reason_ == IRR_NO_TRUTH \|\| incorrect_result_reason_ == IRR_PAGE_LAYOUT;
136	0	}
137	0	bool HasDebugInfo() const {
138	0	return debug_.length() > 0 \|\| misadaption_debug_.length() > 0;
139	0	}
140	0	const std::string &debug() const {
141	0	return debug_;
142	0	}
143	0	const std::string &misadaption_debug() const {
144	0	return misadaption_debug_;
145	0	}
146	0	void UpdateBestRating(float rating) {
147	0	if (rating < best_correctly_segmented_rating_) {
148	0	best_correctly_segmented_rating_ = rating;
149	0	}
150	0	}
151	0	int correct_segmentation_length() const {
152	0	return correct_segmentation_cols_.size();
153	0	}
154		// Returns true if the given ratings matrix col,row position is included
155		// in the correct segmentation path at the given index.
156	0	bool MatrixPositionCorrect(int index, const MATRIX_COORD &coord) {
157	0	return correct_segmentation_cols_[index] == coord.col &&
158	0	correct_segmentation_rows_[index] == coord.row;
159	0	}
160	0	void set_best_choice_is_dict_and_top_choice(bool value) {
161	0	best_choice_is_dict_and_top_choice_ = value;
162	0	}
163	0	const char *lattice_data() const {
164	0	return lattice_data_;
165	0	}
166	0	int lattice_size() const {
167	0	return lattice_size_; // size of lattice_data in bytes
168	0	}
169	0	void set_lattice_data(const char *data, int size) {
170	0	lattice_size_ = size;
171	0	delete[] lattice_data_;
172	0	lattice_data_ = new char[lattice_size_];
173	0	memcpy(lattice_data_, data, lattice_size_);
174	0	}
175		#ifndef DISABLED_LEGACY_ENGINE
176	0	const tesseract::ParamsTrainingBundle &params_training_bundle() const {
177	0	return params_training_bundle_;
178	0	}
179		// Adds a new ParamsTrainingHypothesis to the current hypothesis list.
180	0	void AddHypothesis(const tesseract::ParamsTrainingHypothesis &hypo) {
181	0	params_training_bundle_.AddHypothesis(hypo);
182	0	}
183		#endif // ndef DISABLED_LEGACY_ENGINE
184
185		// Functions to setup the blamer.
186		// Whole word string, whole word bounding box.
187		void SetWordTruth(const UNICHARSET &unicharset, const char *truth_str, const TBOX &word_box);
188		// Single "character" string, "character" bounding box.
189		// May be called multiple times to indicate the characters in a word.
190		void SetSymbolTruth(const UNICHARSET &unicharset, const char *char_str, const TBOX &char_box);
191		// Marks that there is something wrong with the truth text, like it contains
192		// reject characters.
193		void SetRejectedTruth();
194
195		// Returns true if the provided word_choice is correct.
196		bool ChoiceIsCorrect(const WERD_CHOICE *word_choice) const;
197
198	0	void ClearResults() {
199	0	norm_truth_word_.DeleteAllBoxes();
200	0	norm_box_tolerance_ = 0;
201	0	if (!NoTruth()) {
202	0	incorrect_result_reason_ = IRR_CORRECT;
203	0	}
204	0	debug_ = "";
205	0	segsearch_is_looking_for_blame_ = false;
206	0	best_correctly_segmented_rating_ = WERD_CHOICE::kBadRating;
207	0	correct_segmentation_cols_.clear();
208	0	correct_segmentation_rows_.clear();
209	0	best_choice_is_dict_and_top_choice_ = false;
210	0	delete[] lattice_data_;
211	0	lattice_data_ = nullptr;
212	0	lattice_size_ = 0;
213	0	}
214	0	void CopyTruth(const BlamerBundle &other) {
215	0	truth_has_char_boxes_ = other.truth_has_char_boxes_;
216	0	truth_word_ = other.truth_word_;
217	0	truth_text_ = other.truth_text_;
218	0	incorrect_result_reason_ = (other.NoTruth() ? other.incorrect_result_reason_ : IRR_CORRECT);
219	0	}
220	0	void CopyResults(const BlamerBundle &other) {
221	0	norm_truth_word_ = other.norm_truth_word_;
222	0	norm_box_tolerance_ = other.norm_box_tolerance_;
223	0	incorrect_result_reason_ = other.incorrect_result_reason_;
224	0	segsearch_is_looking_for_blame_ = other.segsearch_is_looking_for_blame_;
225	0	best_correctly_segmented_rating_ = other.best_correctly_segmented_rating_;
226	0	correct_segmentation_cols_ = other.correct_segmentation_cols_;
227	0	correct_segmentation_rows_ = other.correct_segmentation_rows_;
228	0	best_choice_is_dict_and_top_choice_ = other.best_choice_is_dict_and_top_choice_;
229	0	if (other.lattice_data_ != nullptr) {
230	0	lattice_data_ = new char[other.lattice_size_];
231	0	memcpy(lattice_data_, other.lattice_data_, other.lattice_size_);
232	0	lattice_size_ = other.lattice_size_;
233	0	} else {
234	0	lattice_data_ = nullptr;
235	0	}
236	0	}
237		const char *IncorrectReason() const;
238
239		// Appends choice and truth details to the given debug string.
240		void FillDebugString(const std::string &msg, const WERD_CHOICE *choice, std::string &debug);
241
242		// Sets up the norm_truth_word from truth_word using the given DENORM.
243		void SetupNormTruthWord(const DENORM &denorm);
244
245		// Splits *this into two pieces in bundle1 and bundle2 (preallocated, empty
246		// bundles) where the right edge/ of the left-hand word is word1_right,
247		// and the left edge of the right-hand word is word2_left.
248		void SplitBundle(int word1_right, int word2_left, bool debug, BlamerBundle *bundle1,
249		BlamerBundle *bundle2) const;
250		// "Joins" the blames from bundle1 and bundle2 into *this.
251		void JoinBlames(const BlamerBundle &bundle1, const BlamerBundle &bundle2, bool debug);
252
253		// If a blob with the same bounding box as one of the truth character
254		// bounding boxes is not classified as the corresponding truth character
255		// blames character classifier for incorrect answer.
256		void BlameClassifier(const UNICHARSET &unicharset, const TBOX &blob_box,
257		const BLOB_CHOICE_LIST &choices, bool debug);
258
259		// Checks whether chops were made at all the character bounding box
260		// boundaries in word->truth_word. If not - blames the chopper for an
261		// incorrect answer.
262		void SetChopperBlame(const WERD_RES *word, bool debug);
263		// Blames the classifier or the language model if, after running only the
264		// chopper, best_choice is incorrect and no blame has been yet set.
265		// Blames the classifier if best_choice is classifier's top choice and is a
266		// dictionary word (i.e. language model could not have helped).
267		// Otherwise, blames the language model (formerly permuter word adjustment).
268		void BlameClassifierOrLangModel(const WERD_RES *word, const UNICHARSET &unicharset,
269		bool valid_permuter, bool debug);
270		// Sets up the correct_segmentation_* to mark the correct bounding boxes.
271		void SetupCorrectSegmentation(const TWERD *word, bool debug);
272
273		// Returns true if a guided segmentation search is needed.
274		bool GuidedSegsearchNeeded(const WERD_CHOICE *best_choice) const;
275		// Setup ready to guide the segmentation search to the correct segmentation.
276		void InitForSegSearch(const WERD_CHOICE best_choice, MATRIX ratings, UNICHAR_ID wildcard_id,
277		bool debug, std::string &debug_str, tesseract::LMPainPoints *pain_points,
278		double max_char_wh_ratio, WERD_RES *word_res);
279		// Returns true if the guided segsearch is in progress.
280		bool GuidedSegsearchStillGoing() const;
281		// The segmentation search has ended. Sets the blame appropriately.
282		void FinishSegSearch(const WERD_CHOICE *best_choice, bool debug, std::string &debug_str);
283
284		// If the bundle is null or still does not indicate the correct result,
285		// fix it and use some backup reason for the blame.
286		static void LastChanceBlame(bool debug, WERD_RES *word);
287
288		// Sets the misadaption debug if this word is incorrect, as this word is
289		// being adapted to.
290		void SetMisAdaptionDebug(const WERD_CHOICE *best_choice, bool debug);
291
292		private:
293		// Copy assignment operator (currently unused, therefore private).
294		BlamerBundle &operator=(const BlamerBundle &other) = delete;
295		void SetBlame(IncorrectResultReason irr, const std::string &msg, const WERD_CHOICE *choice,
296	0	bool debug) {
297	0	incorrect_result_reason_ = irr;
298	0	debug_ = IncorrectReason();
299	0	debug_ += " to blame: ";
300	0	FillDebugString(msg, choice, debug_);
301	0	if (debug) {
302	0	tprintf("SetBlame(): %s", debug_.c_str());
303	0	}
304	0	}
305
306		private:
307		// Set to true when bounding boxes for individual unichars are recorded.
308		bool truth_has_char_boxes_;
309		// Variables used by the segmentation search when looking for the blame.
310		// Set to true while segmentation search is continued after the usual
311		// termination condition in order to look for the blame.
312		bool segsearch_is_looking_for_blame_;
313		// Set to true if best choice is a dictionary word and
314		// classifier's top choice.
315		bool best_choice_is_dict_and_top_choice_;
316		// Tolerance for bounding box comparisons in normalized space.
317		int norm_box_tolerance_;
318		// The true_word (in the original image coordinate space) contains ground
319		// truth bounding boxes for this WERD_RES.
320		tesseract::BoxWord truth_word_;
321		// Same as above, but in normalized coordinates
322		// (filled in by WERD_RES::SetupForRecognition()).
323		tesseract::BoxWord norm_truth_word_;
324		// Contains ground truth unichar for each of the bounding boxes in truth_word.
325		std::vector<std::string> truth_text_;
326		// The reason for incorrect OCR result.
327		IncorrectResultReason incorrect_result_reason_;
328		// Debug text associated with the blame.
329		std::string debug_;
330		// Misadaption debug information (filled in if this word was misadapted to).
331		std::string misadaption_debug_;
332		// Vectors populated by SegSearch to indicate column and row indices that
333		// correspond to blobs with correct bounding boxes.
334		std::vector<int> correct_segmentation_cols_;
335		std::vector<int> correct_segmentation_rows_;
336		// Best rating for correctly segmented path
337		// (set and used by SegSearch when looking for blame).
338		float best_correctly_segmented_rating_;
339		int lattice_size_; // size of lattice_data in bytes
340		// Serialized segmentation search lattice.
341		char *lattice_data_;
342		// Information about hypotheses (paths) explored by the segmentation search.
343		#ifndef DISABLED_LEGACY_ENGINE
344		tesseract::ParamsTrainingBundle params_training_bundle_;
345		#endif // ndef DISABLED_LEGACY_ENGINE
346		};
347
348		} // namespace tesseract
349
350		#endif // TESSERACT_CCSTRUCT_BLAMER_H_