/src/tesseract/src/classify/featdefs.cpp
Line | Count | Source |
1 | | /****************************************************************************** |
2 | | ** Filename: featdefs.cpp |
3 | | ** Purpose: Definitions of currently defined feature types. |
4 | | ** Author: Dan Johnson |
5 | | ** |
6 | | ** (c) Copyright Hewlett-Packard Company, 1988. |
7 | | ** Licensed under the Apache License, Version 2.0 (the "License"); |
8 | | ** you may not use this file except in compliance with the License. |
9 | | ** You may obtain a copy of the License at |
10 | | ** http://www.apache.org/licenses/LICENSE-2.0 |
11 | | ** Unless required by applicable law or agreed to in writing, software |
12 | | ** distributed under the License is distributed on an "AS IS" BASIS, |
13 | | ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | ** See the License for the specific language governing permissions and |
15 | | ** limitations under the License. |
16 | | ******************************************************************************/ |
17 | | |
18 | | #include "featdefs.h" |
19 | | |
20 | | #include "picofeat.h" // for PicoFeatureLength |
21 | | #include "scanutils.h" |
22 | | |
23 | | #include <cstdio> |
24 | | #include <cstring> |
25 | | |
26 | | namespace tesseract { |
27 | | |
28 | | #define PICO_FEATURE_LENGTH 0.05 |
29 | | |
30 | | /*----------------------------------------------------------------------------- |
31 | | Global Data Definitions and Declarations |
32 | | -----------------------------------------------------------------------------*/ |
33 | | const char *const kMicroFeatureType = "mf"; |
34 | | const char *const kCNFeatureType = "cn"; |
35 | | const char *const kIntFeatureType = "if"; |
36 | | const char *const kGeoFeatureType = "tb"; |
37 | | |
38 | | // Define all of the parameters for the MicroFeature type. |
39 | | StartParamDesc(MicroFeatureParams) DefineParam(0, 0, -0.5, 0.5) DefineParam(0, 0, -0.25, 0.75) |
40 | | DefineParam(0, 1, 0.0, 1.0) DefineParam(1, 0, 0.0, 1.0) DefineParam(0, 1, -0.5, 0.5) |
41 | | DefineParam(0, 1, -0.5, 0.5) EndParamDesc |
42 | | // Now define the feature type itself (see features.h for parameters). |
43 | | DefineFeature(MicroFeatureDesc, 5, 1, kMicroFeatureType, MicroFeatureParams) |
44 | | |
45 | | // Define all of the parameters for the NormFeat type. |
46 | | StartParamDesc(CharNormParams) DefineParam(0, 0, -0.25, 0.75) DefineParam(0, 1, 0.0, 1.0) |
47 | | DefineParam(0, 0, 0.0, 1.0) DefineParam(0, 0, 0.0, 1.0) EndParamDesc |
48 | | // Now define the feature type itself (see features.h for parameters). |
49 | | DefineFeature(CharNormDesc, 4, 0, kCNFeatureType, CharNormParams) |
50 | | |
51 | | // Define all of the parameters for the IntFeature type |
52 | | StartParamDesc(IntFeatParams) DefineParam(0, 0, 0.0, 255.0) DefineParam(0, 0, 0.0, 255.0) |
53 | | DefineParam(1, 0, 0.0, 255.0) EndParamDesc |
54 | | // Now define the feature type itself (see features.h for parameters). |
55 | | DefineFeature(IntFeatDesc, 2, 1, kIntFeatureType, IntFeatParams) |
56 | | |
57 | | // Define all of the parameters for the GeoFeature type |
58 | | StartParamDesc(GeoFeatParams) DefineParam(0, 0, 0.0, 255.0) DefineParam(0, 0, 0.0, 255.0) |
59 | | DefineParam(0, 0, 0.0, 255.0) EndParamDesc |
60 | | // Now define the feature type itself (see features.h for parameters). |
61 | | DefineFeature(GeoFeatDesc, 3, 0, kGeoFeatureType, GeoFeatParams) |
62 | | |
63 | | // Other features used for training the adaptive classifier, but not used |
64 | | // during normal training, therefore not in the DescDefs array. |
65 | | |
66 | | // Define all of the parameters for the PicoFeature type |
67 | | // define knob that can be used to adjust pico-feature length. |
68 | | float PicoFeatureLength = PICO_FEATURE_LENGTH; |
69 | | StartParamDesc(PicoFeatParams) DefineParam(0, 0, -0.25, 0.75) DefineParam(1, 0, 0.0, 1.0) |
70 | | DefineParam(0, 0, -0.5, 0.5) EndParamDesc |
71 | | // Now define the feature type itself (see features.h for parameters). |
72 | | DefineFeature(PicoFeatDesc, 2, 1, "pf", PicoFeatParams) |
73 | | |
74 | | // Define all of the parameters for the OutlineFeature type. |
75 | | StartParamDesc(OutlineFeatParams) DefineParam(0, 0, -0.5, 0.5) DefineParam(0, 0, -0.25, 0.75) |
76 | | DefineParam(0, 0, 0.0, 1.0) DefineParam(1, 0, 0.0, 1.0) EndParamDesc |
77 | | // Now define the feature type itself (see features.h for parameters). |
78 | | DefineFeature(OutlineFeatDesc, 3, 1, "of", OutlineFeatParams) |
79 | | |
80 | | // MUST be kept in-sync with ExtractorDefs in fxdefs.cpp. |
81 | | static const FEATURE_DESC_STRUCT *DescDefs[NUM_FEATURE_TYPES] = { |
82 | | &MicroFeatureDesc, &CharNormDesc, &IntFeatDesc, &GeoFeatDesc}; |
83 | | |
84 | | /*----------------------------------------------------------------------------- |
85 | | Public Code |
86 | | -----------------------------------------------------------------------------*/ |
87 | 8 | void InitFeatureDefs(FEATURE_DEFS_STRUCT *featuredefs) { |
88 | 8 | featuredefs->NumFeatureTypes = NUM_FEATURE_TYPES; |
89 | 40 | for (int i = 0; i < NUM_FEATURE_TYPES; ++i) { |
90 | 32 | featuredefs->FeatureDesc[i] = DescDefs[i]; |
91 | 32 | } |
92 | 8 | } |
93 | | |
94 | | /*---------------------------------------------------------------------------*/ |
95 | | /** |
96 | | * Appends a textual representation of CharDesc to str. |
97 | | * The format used is to write out the number of feature |
98 | | * sets which will be written followed by a representation of |
99 | | * each feature set. |
100 | | * |
101 | | * Each set starts with the short name for that feature followed |
102 | | * by a description of the feature set. Feature sets which are |
103 | | * not present are not written. |
104 | | * |
105 | | * @param FeatureDefs definitions of feature types/extractors |
106 | | * @param str string to append CharDesc to |
107 | | * @param CharDesc character description to write to File |
108 | | */ |
109 | 0 | void WriteCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs, CHAR_DESC_STRUCT *CharDesc, std::string &str) { |
110 | 0 | int NumSetsToWrite = 0; |
111 | |
|
112 | 0 | for (size_t Type = 0; Type < CharDesc->NumFeatureSets; Type++) { |
113 | 0 | if (CharDesc->FeatureSets[Type]) { |
114 | 0 | NumSetsToWrite++; |
115 | 0 | } |
116 | 0 | } |
117 | |
|
118 | 0 | str += " " + std::to_string(NumSetsToWrite); |
119 | 0 | str += "\n"; |
120 | 0 | for (size_t Type = 0; Type < CharDesc->NumFeatureSets; Type++) { |
121 | 0 | if (CharDesc->FeatureSets[Type]) { |
122 | 0 | str += FeatureDefs.FeatureDesc[Type]->ShortName; |
123 | 0 | str += " "; |
124 | 0 | WriteFeatureSet(CharDesc->FeatureSets[Type], str); |
125 | 0 | } |
126 | 0 | } |
127 | 0 | } /* WriteCharDescription */ |
128 | | |
129 | | // Return whether all of the fields of the given feature set |
130 | | // are well defined (not inf or nan). |
131 | 0 | bool ValidCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs, CHAR_DESC_STRUCT *CharDesc) { |
132 | 0 | bool anything_written = false; |
133 | 0 | bool well_formed = true; |
134 | 0 | for (size_t Type = 0; Type < CharDesc->NumFeatureSets; Type++) { |
135 | 0 | if (CharDesc->FeatureSets[Type]) { |
136 | 0 | for (int i = 0; i < CharDesc->FeatureSets[Type]->NumFeatures; i++) { |
137 | 0 | FEATURE feat = CharDesc->FeatureSets[Type]->Features[i]; |
138 | 0 | for (int p = 0; p < feat->Type->NumParams; p++) { |
139 | 0 | if (std::isnan(feat->Params[p]) || std::isinf(feat->Params[p])) { |
140 | 0 | well_formed = false; |
141 | 0 | } else { |
142 | 0 | anything_written = true; |
143 | 0 | } |
144 | 0 | } |
145 | 0 | } |
146 | 0 | } else { |
147 | 0 | return false; |
148 | 0 | } |
149 | 0 | } |
150 | 0 | return anything_written && well_formed; |
151 | 0 | } /* ValidCharDescription */ |
152 | | |
153 | | /*---------------------------------------------------------------------------*/ |
154 | | /** |
155 | | * Read a character description from File, and return |
156 | | * a data structure containing this information. The data |
157 | | * is formatted as follows: |
158 | | * @verbatim |
159 | | NumberOfSets |
160 | | ShortNameForSet1 Set1 |
161 | | ShortNameForSet2 Set2 |
162 | | ... |
163 | | @endverbatim |
164 | | * |
165 | | * Globals: |
166 | | * - none |
167 | | * |
168 | | * @param FeatureDefs definitions of feature types/extractors |
169 | | * @param File open text file to read character description from |
170 | | * @return Character description read from File. |
171 | | */ |
172 | 0 | CHAR_DESC_STRUCT *ReadCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs, FILE *File) { |
173 | 0 | int NumSetsToRead; |
174 | 0 | char ShortName[FEAT_NAME_SIZE]; |
175 | 0 | int Type; |
176 | |
|
177 | 0 | ASSERT_HOST(tfscanf(File, "%d", &NumSetsToRead) == 1); |
178 | 0 | ASSERT_HOST(NumSetsToRead >= 0); |
179 | 0 | ASSERT_HOST(NumSetsToRead <= FeatureDefs.NumFeatureTypes); |
180 | |
|
181 | 0 | auto CharDesc = new CHAR_DESC_STRUCT(FeatureDefs); |
182 | 0 | for (; NumSetsToRead > 0; NumSetsToRead--) { |
183 | 0 | tfscanf(File, "%s", ShortName); |
184 | 0 | Type = ShortNameToFeatureType(FeatureDefs, ShortName); |
185 | 0 | CharDesc->FeatureSets[Type] = ReadFeatureSet(File, FeatureDefs.FeatureDesc[Type]); |
186 | 0 | } |
187 | 0 | return CharDesc; |
188 | 0 | } |
189 | | |
190 | | /*---------------------------------------------------------------------------*/ |
191 | | /** |
192 | | * Search through all features currently defined and return |
193 | | * the feature type for the feature with the specified short |
194 | | * name. Trap an error if the specified name is not found. |
195 | | * |
196 | | * Globals: |
197 | | * - none |
198 | | * |
199 | | * @param FeatureDefs definitions of feature types/extractors |
200 | | * @param ShortName short name of a feature type |
201 | | * @return Feature type which corresponds to ShortName. |
202 | | */ |
203 | 0 | uint32_t ShortNameToFeatureType(const FEATURE_DEFS_STRUCT &FeatureDefs, const char *ShortName) { |
204 | 0 | for (int i = 0; i < FeatureDefs.NumFeatureTypes; i++) { |
205 | 0 | if (!strcmp((FeatureDefs.FeatureDesc[i]->ShortName), ShortName)) { |
206 | 0 | return static_cast<uint32_t>(i); |
207 | 0 | } |
208 | 0 | } |
209 | 0 | ASSERT_HOST(!"Illegal short name for a feature"); |
210 | 0 | return 0; |
211 | 0 | } |
212 | | |
213 | | } // namespace tesseract |