Coverage Report

Created: 2025-06-13 07:02

/src/tesseract/src/classify/classify.cpp
Line
Count
Source (jump to first uncovered line)
1
///////////////////////////////////////////////////////////////////////
2
// File:        classify.cpp
3
// Description: classify class.
4
// Author:      Samuel Charron
5
//
6
// (C) Copyright 2006, Google Inc.
7
// Licensed under the Apache License, Version 2.0 (the "License");
8
// you may not use this file except in compliance with the License.
9
// You may obtain a copy of the License at
10
// http://www.apache.org/licenses/LICENSE-2.0
11
// Unless required by applicable law or agreed to in writing, software
12
// distributed under the License is distributed on an "AS IS" BASIS,
13
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
// See the License for the specific language governing permissions and
15
// limitations under the License.
16
//
17
///////////////////////////////////////////////////////////////////////
18
19
#include "classify.h"
20
21
#ifdef DISABLED_LEGACY_ENGINE
22
23
#  include <string.h>
24
25
namespace tesseract {
26
27
Classify::Classify()
28
    : INT_MEMBER(classify_debug_level, 0, "Classify debug level", this->params())
29
    ,
30
31
    BOOL_MEMBER(classify_bln_numeric_mode, 0, "Assume the input is numbers [0-9].", this->params())
32
    ,
33
34
    double_MEMBER(classify_max_rating_ratio, 1.5, "Veto ratio between classifier ratings",
35
                  this->params())
36
    ,
37
38
    double_MEMBER(classify_max_certainty_margin, 5.5,
39
                  "Veto difference between classifier certainties", this->params())
40
    ,
41
42
    dict_(this) {}
43
44
Classify::~Classify() {}
45
46
} // namespace tesseract
47
48
#else // DISABLED_LEGACY_ENGINE not defined
49
50
#  include <cstring>
51
#  include "fontinfo.h"
52
#  include "intproto.h"
53
#  include "mfoutline.h"
54
#  include "scrollview.h"
55
#  include "shapeclassifier.h"
56
#  include "shapetable.h"
57
#  include "unicity_table.h"
58
59
namespace tesseract {
60
Classify::Classify()
61
2
    : BOOL_MEMBER(allow_blob_division, true, "Use divisible blobs chopping", this->params())
62
2
    , BOOL_MEMBER(prioritize_division, false, "Prioritize blob division over chopping",
63
                  this->params())
64
2
    , BOOL_MEMBER(classify_enable_learning, true, "Enable adaptive classifier", this->params())
65
2
    , INT_MEMBER(classify_debug_level, 0, "Classify debug level", this->params())
66
2
    , INT_MEMBER(classify_norm_method, character, "Normalization Method   ...", this->params())
67
2
    , double_MEMBER(classify_char_norm_range, 0.2, "Character Normalization Range ...",
68
                    this->params())
69
2
    , double_MEMBER(classify_max_rating_ratio, 1.5, "Veto ratio between classifier ratings",
70
                    this->params())
71
2
    , double_MEMBER(classify_max_certainty_margin, 5.5,
72
                    "Veto difference between classifier certainties", this->params())
73
2
    , BOOL_MEMBER(tess_cn_matching, 0, "Character Normalized Matching", this->params())
74
2
    , BOOL_MEMBER(tess_bn_matching, 0, "Baseline Normalized Matching", this->params())
75
2
    , BOOL_MEMBER(classify_enable_adaptive_matcher, 1, "Enable adaptive classifier", this->params())
76
2
    , BOOL_MEMBER(classify_use_pre_adapted_templates, 0, "Use pre-adapted classifier templates",
77
                  this->params())
78
2
    , BOOL_MEMBER(classify_save_adapted_templates, 0, "Save adapted templates to a file",
79
                  this->params())
80
2
    , BOOL_MEMBER(classify_enable_adaptive_debugger, 0, "Enable match debugger", this->params())
81
2
    , BOOL_MEMBER(classify_nonlinear_norm, 0, "Non-linear stroke-density normalization",
82
                  this->params())
83
2
    , INT_MEMBER(matcher_debug_level, 0, "Matcher Debug Level", this->params())
84
2
    , INT_MEMBER(matcher_debug_flags, 0, "Matcher Debug Flags", this->params())
85
2
    , INT_MEMBER(classify_learning_debug_level, 0, "Learning Debug Level: ", this->params())
86
2
    , double_MEMBER(matcher_good_threshold, 0.125, "Good Match (0-1)", this->params())
87
2
    , double_MEMBER(matcher_reliable_adaptive_result, 0.0, "Great Match (0-1)", this->params())
88
2
    , double_MEMBER(matcher_perfect_threshold, 0.02, "Perfect Match (0-1)", this->params())
89
2
    , double_MEMBER(matcher_bad_match_pad, 0.15, "Bad Match Pad (0-1)", this->params())
90
2
    , double_MEMBER(matcher_rating_margin, 0.1, "New template margin (0-1)", this->params())
91
2
    , double_MEMBER(matcher_avg_noise_size, 12.0, "Avg. noise blob length", this->params())
92
2
    , INT_MEMBER(matcher_permanent_classes_min, 1, "Min # of permanent classes", this->params())
93
2
    , INT_MEMBER(matcher_min_examples_for_prototyping, 3, "Reliable Config Threshold",
94
                 this->params())
95
2
    , INT_MEMBER(matcher_sufficient_examples_for_prototyping, 5,
96
                 "Enable adaption even if the ambiguities have not been seen", this->params())
97
2
    , double_MEMBER(matcher_clustering_max_angle_delta, 0.015,
98
                    "Maximum angle delta for prototype clustering", this->params())
99
2
    , double_MEMBER(classify_misfit_junk_penalty, 0.0,
100
                    "Penalty to apply when a non-alnum is vertically out of "
101
                    "its expected textline position",
102
                    this->params())
103
2
    , double_MEMBER(rating_scale, 1.5, "Rating scaling factor", this->params())
104
2
    , double_MEMBER(tessedit_class_miss_scale, 0.00390625, "Scale factor for features not used",
105
                    this->params())
106
2
    , double_MEMBER(classify_adapted_pruning_factor, 2.5,
107
                    "Prune poor adapted results this much worse than best result", this->params())
108
2
    , double_MEMBER(classify_adapted_pruning_threshold, -1.0,
109
                    "Threshold at which classify_adapted_pruning_factor starts", this->params())
110
2
    , INT_MEMBER(classify_adapt_proto_threshold, 230,
111
                 "Threshold for good protos during adaptive 0-255", this->params())
112
2
    , INT_MEMBER(classify_adapt_feature_threshold, 230,
113
                 "Threshold for good features during adaptive 0-255", this->params())
114
2
    , BOOL_MEMBER(disable_character_fragments, true,
115
                  "Do not include character fragments in the"
116
                  " results of the classifier",
117
                  this->params())
118
2
    , double_MEMBER(classify_character_fragments_garbage_certainty_threshold, -3.0,
119
                    "Exclude fragments that do not look like whole"
120
                    " characters from training and adaption",
121
                    this->params())
122
2
    , BOOL_MEMBER(classify_debug_character_fragments, false,
123
                  "Bring up graphical debugging windows for fragments training", this->params())
124
2
    , BOOL_MEMBER(matcher_debug_separate_windows, false,
125
                  "Use two different windows for debugging the matching: "
126
                  "One for the protos and one for the features.",
127
                  this->params())
128
2
    , STRING_MEMBER(classify_learn_debug_str, "", "Class str to debug learning", this->params())
129
2
    , INT_MEMBER(classify_class_pruner_threshold, 229, "Class Pruner Threshold 0-255",
130
                 this->params())
131
2
    , INT_MEMBER(classify_class_pruner_multiplier, 15,
132
                 "Class Pruner Multiplier 0-255:       ", this->params())
133
2
    , INT_MEMBER(classify_cp_cutoff_strength, 7,
134
                 "Class Pruner CutoffStrength:         ", this->params())
135
2
    , INT_MEMBER(classify_integer_matcher_multiplier, 10,
136
                 "Integer Matcher Multiplier  0-255:   ", this->params())
137
2
    , BOOL_MEMBER(classify_bln_numeric_mode, 0, "Assume the input is numbers [0-9].",
138
                  this->params())
139
2
    , double_MEMBER(speckle_large_max_size, 0.30, "Max large speckle size", this->params())
140
2
    , double_MEMBER(speckle_rating_penalty, 10.0, "Penalty to add to worst rating for noise",
141
                    this->params())
142
2
    , im_(&classify_debug_level)
143
2
    , dict_(this) {
144
2
  using namespace std::placeholders; // for _1, _2
145
2
  fontinfo_table_.set_clear_callback(std::bind(FontInfoDeleteCallback, _1));
146
147
2
  InitFeatureDefs(&feature_defs_);
148
2
}
149
150
0
Classify::~Classify() {
151
0
  EndAdaptiveClassifier();
152
#ifndef GRAPHICS_DISABLED
153
  delete learn_debug_win_;
154
  delete learn_fragmented_word_debug_win_;
155
  delete learn_fragments_debug_win_;
156
#endif
157
0
}
158
159
// Takes ownership of the given classifier, and uses it for future calls
160
// to CharNormClassifier.
161
0
void Classify::SetStaticClassifier(ShapeClassifier *static_classifier) {
162
0
  delete static_classifier_;
163
0
  static_classifier_ = static_classifier;
164
0
}
165
166
// Moved from speckle.cpp
167
// Adds a noise classification result that is a bit worse than the worst
168
// current result, or the worst possible result if no current results.
169
34.8k
void Classify::AddLargeSpeckleTo(int blob_length, BLOB_CHOICE_LIST *choices) {
170
34.8k
  BLOB_CHOICE_IT bc_it(choices);
171
  // If there is no classifier result, we will use the worst possible certainty
172
  // and corresponding rating.
173
34.8k
  float certainty = -getDict().certainty_scale;
174
34.8k
  float rating = rating_scale * blob_length;
175
34.8k
  if (!choices->empty() && blob_length > 0) {
176
34.8k
    bc_it.move_to_last();
177
34.8k
    BLOB_CHOICE *worst_choice = bc_it.data();
178
    // Add speckle_rating_penalty to worst rating, matching old value.
179
34.8k
    rating = worst_choice->rating() + speckle_rating_penalty;
180
    // Compute the rating to correspond to the certainty. (Used to be kept
181
    // the same, but that messes up the language model search.)
182
34.8k
    certainty = -rating * getDict().certainty_scale / (rating_scale * blob_length);
183
34.8k
  }
184
34.8k
  auto *blob_choice = new BLOB_CHOICE(UNICHAR_SPACE, rating, certainty, -1, 0.0f, FLT_MAX, 0,
185
34.8k
                                      BCC_SPECKLE_CLASSIFIER);
186
34.8k
  bc_it.add_to_end(blob_choice);
187
34.8k
}
188
189
// Returns true if the blob is small enough to be a large speckle.
190
755k
bool Classify::LargeSpeckle(const TBLOB &blob) {
191
755k
  double speckle_size = kBlnXHeight * speckle_large_max_size;
192
755k
  TBOX bbox = blob.bounding_box();
193
755k
  return bbox.width() < speckle_size && bbox.height() < speckle_size;
194
755k
}
195
196
} // namespace tesseract
197
198
#endif // def DISABLED_LEGACY_ENGINE