/src/tesseract/src/classify/intmatcher.h
Line  | Count  | Source  | 
1  |  | /******************************************************************************  | 
2  |  |  ** Filename:    intmatcher.h  | 
3  |  |  ** Purpose:     Interface to high level generic classifier routines.  | 
4  |  |  ** Author:      Robert Moss  | 
5  |  |  **  | 
6  |  |  ** (c) Copyright Hewlett-Packard Company, 1988.  | 
7  |  |  ** Licensed under the Apache License, Version 2.0 (the "License");  | 
8  |  |  ** you may not use this file except in compliance with the License.  | 
9  |  |  ** You may obtain a copy of the License at  | 
10  |  |  ** http://www.apache.org/licenses/LICENSE-2.0  | 
11  |  |  ** Unless required by applicable law or agreed to in writing, software  | 
12  |  |  ** distributed under the License is distributed on an "AS IS" BASIS,  | 
13  |  |  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  | 
14  |  |  ** See the License for the specific language governing permissions and  | 
15  |  |  ** limitations under the License.  | 
16  |  |  ******************************************************************************/  | 
17  |  | #ifndef INTMATCHER_H  | 
18  |  | #define INTMATCHER_H  | 
19  |  |  | 
20  |  | #include "intproto.h"  | 
21  |  | #include "params.h"  | 
22  |  |  | 
23  |  | namespace tesseract { | 
24  |  |  | 
25  |  | // Character fragments could be present in the trained templaes  | 
26  |  | // but turned on/off on the language-by-language basis or depending  | 
27  |  | // on particular properties of the corpus (e.g. when we expect the  | 
28  |  | // images to have low exposure).  | 
29  |  | extern BOOL_VAR_H(disable_character_fragments);  | 
30  |  |  | 
31  |  | extern INT_VAR_H(classify_integer_matcher_multiplier);  | 
32  |  |  | 
33  |  | struct UnicharRating;  | 
34  |  |  | 
35  |  | struct CP_RESULT_STRUCT { | 
36  | 8.56M  |   CP_RESULT_STRUCT() : Rating(0.0f), Class(0) {} | 
37  |  |  | 
38  |  |   float Rating;  | 
39  |  |   CLASS_ID Class;  | 
40  |  | };  | 
41  |  |  | 
42  |  | /**----------------------------------------------------------------------------  | 
43  |  |           Public Function Prototypes  | 
44  |  | ----------------------------------------------------------------------------**/  | 
45  |  |  | 
46  | 1.02k  | #define SE_TABLE_BITS 9  | 
47  | 1.02k  | #define SE_TABLE_SIZE 512  | 
48  |  |  | 
49  |  | struct ScratchEvidence { | 
50  |  |   uint8_t feature_evidence_[MAX_NUM_CONFIGS];  | 
51  |  |   int sum_feature_evidence_[MAX_NUM_CONFIGS];  | 
52  |  |   uint8_t proto_evidence_[MAX_NUM_PROTOS][MAX_PROTO_INDEX];  | 
53  |  |  | 
54  |  |   void Clear(const INT_CLASS_STRUCT *class_template);  | 
55  |  |   void ClearFeatureEvidence(const INT_CLASS_STRUCT *class_template);  | 
56  |  |   void NormalizeSums(INT_CLASS_STRUCT *ClassTemplate, int16_t NumFeatures);  | 
57  |  |   void UpdateSumOfProtoEvidences(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ConfigMask);  | 
58  |  | };  | 
59  |  |  | 
60  |  | class IntegerMatcher { | 
61  |  | public:  | 
62  |  |   // Integer Matcher Theta Fudge (0-255).  | 
63  |  |   static const int kIntThetaFudge = 128;  | 
64  |  |   // Bits in Similarity to Evidence Lookup (8-9).  | 
65  |  |   static const int kEvidenceTableBits = 9;  | 
66  |  |   // Integer Evidence Truncation Bits (8-14).  | 
67  |  |   static const int kIntEvidenceTruncBits = 14;  | 
68  |  |   // Similarity to Evidence Table Exponential Multiplier.  | 
69  |  |   static const float kSEExponentialMultiplier;  | 
70  |  |   // Center of Similarity Curve.  | 
71  |  |   static const float kSimilarityCenter;  | 
72  |  |  | 
73  |  |   IntegerMatcher(tesseract::IntParam *classify_debug_level);  | 
74  |  |  | 
75  |  |   void Match(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask,  | 
76  |  |              int16_t NumFeatures, const INT_FEATURE_STRUCT *Features,  | 
77  |  |              tesseract::UnicharRating *Result, int AdaptFeatureThreshold, int Debug,  | 
78  |  |              bool SeparateDebugWindows);  | 
79  |  |  | 
80  |  |   // Applies the CN normalization factor to the given rating and returns  | 
81  |  |   // the modified rating.  | 
82  |  |   float ApplyCNCorrection(float rating, int blob_length, int normalization_factor,  | 
83  |  |                           int matcher_multiplier);  | 
84  |  |  | 
85  |  |   int FindGoodProtos(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask,  | 
86  |  |                      int16_t NumFeatures, INT_FEATURE_ARRAY Features, PROTO_ID *ProtoArray,  | 
87  |  |                      int AdaptProtoThreshold, int Debug);  | 
88  |  |  | 
89  |  |   int FindBadFeatures(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask,  | 
90  |  |                       int16_t NumFeatures, INT_FEATURE_ARRAY Features, FEATURE_ID *FeatureArray,  | 
91  |  |                       int AdaptFeatureThreshold, int Debug);  | 
92  |  |  | 
93  |  | private:  | 
94  |  |   int UpdateTablesForFeature(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask,  | 
95  |  |                              int FeatureNum, const INT_FEATURE_STRUCT *Feature,  | 
96  |  |                              ScratchEvidence *evidence, int Debug);  | 
97  |  |  | 
98  |  |   int FindBestMatch(INT_CLASS_STRUCT *ClassTemplate, const ScratchEvidence &tables,  | 
99  |  |                     tesseract::UnicharRating *Result);  | 
100  |  |  | 
101  |  | #ifndef GRAPHICS_DISABLED  | 
102  |  |   void DebugFeatureProtoError(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask,  | 
103  |  |                               const ScratchEvidence &tables, int16_t NumFeatures, int Debug);  | 
104  |  |  | 
105  |  |   void DisplayProtoDebugInfo(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ConfigMask,  | 
106  |  |                              const ScratchEvidence &tables, bool SeparateDebugWindows);  | 
107  |  |  | 
108  |  |   void DisplayFeatureDebugInfo(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask,  | 
109  |  |                                int16_t NumFeatures, const INT_FEATURE_STRUCT *Features,  | 
110  |  |                                int AdaptFeatureThreshold, int Debug, bool SeparateDebugWindows);  | 
111  |  | #endif  | 
112  |  |  | 
113  |  | private:  | 
114  |  |   tesseract::IntParam *classify_debug_level_;  | 
115  |  |   uint8_t similarity_evidence_table_[SE_TABLE_SIZE];  | 
116  |  |   uint32_t evidence_table_mask_;  | 
117  |  |   uint32_t mult_trunc_shift_bits_;  | 
118  |  |   uint32_t table_trunc_shift_bits_;  | 
119  |  |   uint32_t evidence_mult_mask_;  | 
120  |  | };  | 
121  |  |  | 
122  |  | } // namespace tesseract  | 
123  |  |  | 
124  |  | #endif  |