/src/tesseract/src/classify/intmatcher.h
Line | Count | Source |
1 | | /****************************************************************************** |
2 | | ** Filename: intmatcher.h |
3 | | ** Purpose: Interface to high level generic classifier routines. |
4 | | ** Author: Robert Moss |
5 | | ** |
6 | | ** (c) Copyright Hewlett-Packard Company, 1988. |
7 | | ** Licensed under the Apache License, Version 2.0 (the "License"); |
8 | | ** you may not use this file except in compliance with the License. |
9 | | ** You may obtain a copy of the License at |
10 | | ** http://www.apache.org/licenses/LICENSE-2.0 |
11 | | ** Unless required by applicable law or agreed to in writing, software |
12 | | ** distributed under the License is distributed on an "AS IS" BASIS, |
13 | | ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | ** See the License for the specific language governing permissions and |
15 | | ** limitations under the License. |
16 | | ******************************************************************************/ |
17 | | #ifndef INTMATCHER_H |
18 | | #define INTMATCHER_H |
19 | | |
20 | | #include "intproto.h" |
21 | | #include "params.h" |
22 | | |
23 | | namespace tesseract { |
24 | | |
25 | | // Character fragments could be present in the trained templaes |
26 | | // but turned on/off on the language-by-language basis or depending |
27 | | // on particular properties of the corpus (e.g. when we expect the |
28 | | // images to have low exposure). |
29 | | extern BOOL_VAR_H(disable_character_fragments); |
30 | | |
31 | | extern INT_VAR_H(classify_integer_matcher_multiplier); |
32 | | |
33 | | struct UnicharRating; |
34 | | |
35 | | struct CP_RESULT_STRUCT { |
36 | 8.56M | CP_RESULT_STRUCT() : Rating(0.0f), Class(0) {} |
37 | | |
38 | | float Rating; |
39 | | CLASS_ID Class; |
40 | | }; |
41 | | |
42 | | /**---------------------------------------------------------------------------- |
43 | | Public Function Prototypes |
44 | | ----------------------------------------------------------------------------**/ |
45 | | |
46 | 1.02k | #define SE_TABLE_BITS 9 |
47 | 1.02k | #define SE_TABLE_SIZE 512 |
48 | | |
49 | | struct ScratchEvidence { |
50 | | uint8_t feature_evidence_[MAX_NUM_CONFIGS]; |
51 | | int sum_feature_evidence_[MAX_NUM_CONFIGS]; |
52 | | uint8_t proto_evidence_[MAX_NUM_PROTOS][MAX_PROTO_INDEX]; |
53 | | |
54 | | void Clear(const INT_CLASS_STRUCT *class_template); |
55 | | void ClearFeatureEvidence(const INT_CLASS_STRUCT *class_template); |
56 | | void NormalizeSums(INT_CLASS_STRUCT *ClassTemplate, int16_t NumFeatures); |
57 | | void UpdateSumOfProtoEvidences(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ConfigMask); |
58 | | }; |
59 | | |
60 | | class IntegerMatcher { |
61 | | public: |
62 | | // Integer Matcher Theta Fudge (0-255). |
63 | | static const int kIntThetaFudge = 128; |
64 | | // Bits in Similarity to Evidence Lookup (8-9). |
65 | | static const int kEvidenceTableBits = 9; |
66 | | // Integer Evidence Truncation Bits (8-14). |
67 | | static const int kIntEvidenceTruncBits = 14; |
68 | | // Similarity to Evidence Table Exponential Multiplier. |
69 | | static const float kSEExponentialMultiplier; |
70 | | // Center of Similarity Curve. |
71 | | static const float kSimilarityCenter; |
72 | | |
73 | | IntegerMatcher(tesseract::IntParam *classify_debug_level); |
74 | | |
75 | | void Match(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, |
76 | | int16_t NumFeatures, const INT_FEATURE_STRUCT *Features, |
77 | | tesseract::UnicharRating *Result, int AdaptFeatureThreshold, int Debug, |
78 | | bool SeparateDebugWindows); |
79 | | |
80 | | // Applies the CN normalization factor to the given rating and returns |
81 | | // the modified rating. |
82 | | float ApplyCNCorrection(float rating, int blob_length, int normalization_factor, |
83 | | int matcher_multiplier); |
84 | | |
85 | | int FindGoodProtos(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, |
86 | | int16_t NumFeatures, INT_FEATURE_ARRAY Features, PROTO_ID *ProtoArray, |
87 | | int AdaptProtoThreshold, int Debug); |
88 | | |
89 | | int FindBadFeatures(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, |
90 | | int16_t NumFeatures, INT_FEATURE_ARRAY Features, FEATURE_ID *FeatureArray, |
91 | | int AdaptFeatureThreshold, int Debug); |
92 | | |
93 | | private: |
94 | | int UpdateTablesForFeature(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, |
95 | | int FeatureNum, const INT_FEATURE_STRUCT *Feature, |
96 | | ScratchEvidence *evidence, int Debug); |
97 | | |
98 | | int FindBestMatch(INT_CLASS_STRUCT *ClassTemplate, const ScratchEvidence &tables, |
99 | | tesseract::UnicharRating *Result); |
100 | | |
101 | | #ifndef GRAPHICS_DISABLED |
102 | | void DebugFeatureProtoError(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, |
103 | | const ScratchEvidence &tables, int16_t NumFeatures, int Debug); |
104 | | |
105 | | void DisplayProtoDebugInfo(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ConfigMask, |
106 | | const ScratchEvidence &tables, bool SeparateDebugWindows); |
107 | | |
108 | | void DisplayFeatureDebugInfo(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, |
109 | | int16_t NumFeatures, const INT_FEATURE_STRUCT *Features, |
110 | | int AdaptFeatureThreshold, int Debug, bool SeparateDebugWindows); |
111 | | #endif |
112 | | |
113 | | private: |
114 | | tesseract::IntParam *classify_debug_level_; |
115 | | uint8_t similarity_evidence_table_[SE_TABLE_SIZE]; |
116 | | uint32_t evidence_table_mask_; |
117 | | uint32_t mult_trunc_shift_bits_; |
118 | | uint32_t table_trunc_shift_bits_; |
119 | | uint32_t evidence_mult_mask_; |
120 | | }; |
121 | | |
122 | | } // namespace tesseract |
123 | | |
124 | | #endif |