Coverage Report

Created: 2025-06-13 07:15

/src/tesseract/src/classify/intmatcher.h
Line
Count
Source
1
/******************************************************************************
2
 ** Filename:    intmatcher.h
3
 ** Purpose:     Interface to high level generic classifier routines.
4
 ** Author:      Robert Moss
5
 **
6
 ** (c) Copyright Hewlett-Packard Company, 1988.
7
 ** Licensed under the Apache License, Version 2.0 (the "License");
8
 ** you may not use this file except in compliance with the License.
9
 ** You may obtain a copy of the License at
10
 ** http://www.apache.org/licenses/LICENSE-2.0
11
 ** Unless required by applicable law or agreed to in writing, software
12
 ** distributed under the License is distributed on an "AS IS" BASIS,
13
 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 ** See the License for the specific language governing permissions and
15
 ** limitations under the License.
16
 ******************************************************************************/
17
#ifndef INTMATCHER_H
18
#define INTMATCHER_H
19
20
#include "intproto.h"
21
#include "params.h"
22
23
namespace tesseract {
24
25
// Character fragments could be present in the trained templaes
26
// but turned on/off on the language-by-language basis or depending
27
// on particular properties of the corpus (e.g. when we expect the
28
// images to have low exposure).
29
extern BOOL_VAR_H(disable_character_fragments);
30
31
extern INT_VAR_H(classify_integer_matcher_multiplier);
32
33
struct UnicharRating;
34
35
struct CP_RESULT_STRUCT {
36
8.56M
  CP_RESULT_STRUCT() : Rating(0.0f), Class(0) {}
37
38
  float Rating;
39
  CLASS_ID Class;
40
};
41
42
/**----------------------------------------------------------------------------
43
          Public Function Prototypes
44
----------------------------------------------------------------------------**/
45
46
1.02k
#define SE_TABLE_BITS 9
47
1.02k
#define SE_TABLE_SIZE 512
48
49
struct ScratchEvidence {
50
  uint8_t feature_evidence_[MAX_NUM_CONFIGS];
51
  int sum_feature_evidence_[MAX_NUM_CONFIGS];
52
  uint8_t proto_evidence_[MAX_NUM_PROTOS][MAX_PROTO_INDEX];
53
54
  void Clear(const INT_CLASS_STRUCT *class_template);
55
  void ClearFeatureEvidence(const INT_CLASS_STRUCT *class_template);
56
  void NormalizeSums(INT_CLASS_STRUCT *ClassTemplate, int16_t NumFeatures);
57
  void UpdateSumOfProtoEvidences(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ConfigMask);
58
};
59
60
class IntegerMatcher {
61
public:
62
  // Integer Matcher Theta Fudge (0-255).
63
  static const int kIntThetaFudge = 128;
64
  // Bits in Similarity to Evidence Lookup (8-9).
65
  static const int kEvidenceTableBits = 9;
66
  // Integer Evidence Truncation Bits (8-14).
67
  static const int kIntEvidenceTruncBits = 14;
68
  // Similarity to Evidence Table Exponential Multiplier.
69
  static const float kSEExponentialMultiplier;
70
  // Center of Similarity Curve.
71
  static const float kSimilarityCenter;
72
73
  IntegerMatcher(tesseract::IntParam *classify_debug_level);
74
75
  void Match(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask,
76
             int16_t NumFeatures, const INT_FEATURE_STRUCT *Features,
77
             tesseract::UnicharRating *Result, int AdaptFeatureThreshold, int Debug,
78
             bool SeparateDebugWindows);
79
80
  // Applies the CN normalization factor to the given rating and returns
81
  // the modified rating.
82
  float ApplyCNCorrection(float rating, int blob_length, int normalization_factor,
83
                          int matcher_multiplier);
84
85
  int FindGoodProtos(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask,
86
                     int16_t NumFeatures, INT_FEATURE_ARRAY Features, PROTO_ID *ProtoArray,
87
                     int AdaptProtoThreshold, int Debug);
88
89
  int FindBadFeatures(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask,
90
                      int16_t NumFeatures, INT_FEATURE_ARRAY Features, FEATURE_ID *FeatureArray,
91
                      int AdaptFeatureThreshold, int Debug);
92
93
private:
94
  int UpdateTablesForFeature(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask,
95
                             int FeatureNum, const INT_FEATURE_STRUCT *Feature,
96
                             ScratchEvidence *evidence, int Debug);
97
98
  int FindBestMatch(INT_CLASS_STRUCT *ClassTemplate, const ScratchEvidence &tables,
99
                    tesseract::UnicharRating *Result);
100
101
#ifndef GRAPHICS_DISABLED
102
  void DebugFeatureProtoError(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask,
103
                              const ScratchEvidence &tables, int16_t NumFeatures, int Debug);
104
105
  void DisplayProtoDebugInfo(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ConfigMask,
106
                             const ScratchEvidence &tables, bool SeparateDebugWindows);
107
108
  void DisplayFeatureDebugInfo(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask,
109
                               int16_t NumFeatures, const INT_FEATURE_STRUCT *Features,
110
                               int AdaptFeatureThreshold, int Debug, bool SeparateDebugWindows);
111
#endif
112
113
private:
114
  tesseract::IntParam *classify_debug_level_;
115
  uint8_t similarity_evidence_table_[SE_TABLE_SIZE];
116
  uint32_t evidence_table_mask_;
117
  uint32_t mult_trunc_shift_bits_;
118
  uint32_t table_trunc_shift_bits_;
119
  uint32_t evidence_mult_mask_;
120
};
121
122
} // namespace tesseract
123
124
#endif