/src/tesseract/src/classify/picofeat.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /****************************************************************************** |
2 | | ** Filename: picofeat.c |
3 | | ** Purpose: Definition of pico-features. |
4 | | ** Author: Dan Johnson |
5 | | ** |
6 | | ** (c) Copyright Hewlett-Packard Company, 1988. |
7 | | ** Licensed under the Apache License, Version 2.0 (the "License"); |
8 | | ** you may not use this file except in compliance with the License. |
9 | | ** You may obtain a copy of the License at |
10 | | ** http://www.apache.org/licenses/LICENSE-2.0 |
11 | | ** Unless required by applicable law or agreed to in writing, software |
12 | | ** distributed under the License is distributed on an "AS IS" BASIS, |
13 | | ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | ** See the License for the specific language governing permissions and |
15 | | ** limitations under the License. |
16 | | ******************************************************************************/ |
17 | | |
18 | | #include "picofeat.h" |
19 | | |
20 | | #include "classify.h" |
21 | | #include "featdefs.h" |
22 | | #include "fpoint.h" |
23 | | #include "mfoutline.h" |
24 | | #include "ocrfeatures.h" |
25 | | #include "params.h" |
26 | | #include "trainingsample.h" |
27 | | |
28 | | #include <cmath> |
29 | | #include <cstdio> |
30 | | |
31 | | namespace tesseract { |
32 | | |
33 | | /*--------------------------------------------------------------------------- |
34 | | Variables |
35 | | ----------------------------------------------------------------------------*/ |
36 | | |
37 | | double_VAR(classify_pico_feature_length, 0.05, "Pico Feature Length"); |
38 | | |
39 | | /*--------------------------------------------------------------------------- |
40 | | Private Function Prototypes |
41 | | ----------------------------------------------------------------------------*/ |
42 | | void ConvertSegmentToPicoFeat(FPOINT *Start, FPOINT *End, FEATURE_SET FeatureSet); |
43 | | |
44 | | void ConvertToPicoFeatures2(MFOUTLINE Outline, FEATURE_SET FeatureSet); |
45 | | |
46 | | void NormalizePicoX(FEATURE_SET FeatureSet); |
47 | | |
48 | | /*---------------------------------------------------------------------------- |
49 | | Public Code |
50 | | ----------------------------------------------------------------------------*/ |
51 | | /*---------------------------------------------------------------------------*/ |
52 | | /** |
53 | | * Operation: Dummy for now. |
54 | | * |
55 | | * Globals: |
56 | | * - classify_norm_method normalization method currently specified |
57 | | * @param Blob blob to extract pico-features from |
58 | | * @return Pico-features for Blob. |
59 | | */ |
60 | 504 | FEATURE_SET Classify::ExtractPicoFeatures(TBLOB *Blob) { |
61 | 504 | auto FeatureSet = new FEATURE_SET_STRUCT(MAX_PICO_FEATURES); |
62 | 504 | auto Outlines = ConvertBlob(Blob); |
63 | 504 | float XScale, YScale; |
64 | 504 | NormalizeOutlines(Outlines, &XScale, &YScale); |
65 | 504 | auto RemainingOutlines = Outlines; |
66 | 1.15k | iterate(RemainingOutlines) { |
67 | 1.15k | auto Outline = static_cast<MFOUTLINE>(RemainingOutlines->first_node()); |
68 | 1.15k | ConvertToPicoFeatures2(Outline, FeatureSet); |
69 | 1.15k | } |
70 | 504 | if (classify_norm_method == baseline) { |
71 | 504 | NormalizePicoX(FeatureSet); |
72 | 504 | } |
73 | 504 | FreeOutlines(Outlines); |
74 | 504 | return (FeatureSet); |
75 | | |
76 | 504 | } /* ExtractPicoFeatures */ |
77 | | |
78 | | /*---------------------------------------------------------------------------- |
79 | | Private Code |
80 | | ----------------------------------------------------------------------------*/ |
81 | | /*---------------------------------------------------------------------------*/ |
82 | | /** |
83 | | * This routine converts an entire segment of an outline |
84 | | * into a set of pico features which are added to |
85 | | * FeatureSet. The length of the segment is rounded to the |
86 | | * nearest whole number of pico-features. The pico-features |
87 | | * are spaced evenly over the entire segment. |
88 | | * Results are placed in FeatureSet. |
89 | | * Globals: |
90 | | * - classify_pico_feature_length length of a single pico-feature |
91 | | * @param Start starting point of pico-feature |
92 | | * @param End ending point of pico-feature |
93 | | * @param FeatureSet set to add pico-feature to |
94 | | */ |
95 | 5.95k | void ConvertSegmentToPicoFeat(FPOINT *Start, FPOINT *End, FEATURE_SET FeatureSet) { |
96 | 5.95k | float Angle; |
97 | 5.95k | float Length; |
98 | 5.95k | int NumFeatures; |
99 | 5.95k | FPOINT Center; |
100 | 5.95k | FPOINT Delta; |
101 | 5.95k | int i; |
102 | | |
103 | 5.95k | Angle = NormalizedAngleFrom(Start, End, 1.0); |
104 | 5.95k | Length = DistanceBetween(*Start, *End); |
105 | 5.95k | NumFeatures = static_cast<int>(floor(Length / classify_pico_feature_length + 0.5)); |
106 | 5.95k | if (NumFeatures < 1) { |
107 | 786 | NumFeatures = 1; |
108 | 786 | } |
109 | | |
110 | | /* compute vector for one pico feature */ |
111 | 5.95k | Delta.x = XDelta(*Start, *End) / NumFeatures; |
112 | 5.95k | Delta.y = YDelta(*Start, *End) / NumFeatures; |
113 | | |
114 | | /* compute position of first pico feature */ |
115 | 5.95k | Center.x = Start->x + Delta.x / 2.0; |
116 | 5.95k | Center.y = Start->y + Delta.y / 2.0; |
117 | | |
118 | | /* compute each pico feature in segment and add to feature set */ |
119 | 33.1k | for (i = 0; i < NumFeatures; i++) { |
120 | 27.2k | auto Feature = new FEATURE_STRUCT(&PicoFeatDesc); |
121 | 27.2k | Feature->Params[PicoFeatDir] = Angle; |
122 | 27.2k | Feature->Params[PicoFeatX] = Center.x; |
123 | 27.2k | Feature->Params[PicoFeatY] = Center.y; |
124 | 27.2k | AddFeature(FeatureSet, Feature); |
125 | | |
126 | 27.2k | Center.x += Delta.x; |
127 | 27.2k | Center.y += Delta.y; |
128 | 27.2k | } |
129 | 5.95k | } /* ConvertSegmentToPicoFeat */ |
130 | | |
131 | | /*---------------------------------------------------------------------------*/ |
132 | | /** |
133 | | * This routine steps through the specified outline and cuts it |
134 | | * up into pieces of equal length. These pieces become the |
135 | | * desired pico-features. Each segment in the outline |
136 | | * is converted into an integral number of pico-features. |
137 | | * Results are returned in FeatureSet. |
138 | | * |
139 | | * Globals: |
140 | | * - classify_pico_feature_length length of features to be extracted |
141 | | * @param Outline outline to extract micro-features from |
142 | | * @param FeatureSet set of features to add pico-features to |
143 | | */ |
144 | 1.15k | void ConvertToPicoFeatures2(MFOUTLINE Outline, FEATURE_SET FeatureSet) { |
145 | 1.15k | MFOUTLINE Next; |
146 | 1.15k | MFOUTLINE First; |
147 | 1.15k | MFOUTLINE Current; |
148 | | |
149 | 1.15k | if (DegenerateOutline(Outline)) { |
150 | 0 | return; |
151 | 0 | } |
152 | | |
153 | 1.15k | First = Outline; |
154 | 1.15k | Current = First; |
155 | 1.15k | Next = NextPointAfter(Current); |
156 | 5.95k | do { |
157 | | /* note that an edge is hidden if the ending point of the edge is |
158 | | marked as hidden. This situation happens because the order of |
159 | | the outlines is reversed when they are converted from the old |
160 | | format. In the old format, a hidden edge is marked by the |
161 | | starting point for that edge. */ |
162 | 5.95k | if (!(PointAt(Next)->Hidden)) { |
163 | 5.95k | ConvertSegmentToPicoFeat(&(PointAt(Current)->Point), &(PointAt(Next)->Point), FeatureSet); |
164 | 5.95k | } |
165 | | |
166 | 5.95k | Current = Next; |
167 | 5.95k | Next = NextPointAfter(Current); |
168 | 5.95k | } while (Current != First); |
169 | | |
170 | 1.15k | } /* ConvertToPicoFeatures2 */ |
171 | | |
172 | | /*---------------------------------------------------------------------------*/ |
173 | | /** |
174 | | * This routine computes the average x position over all |
175 | | * of the pico-features in FeatureSet and then renormalizes |
176 | | * the pico-features to force this average to be the x origin |
177 | | * (i.e. x=0). |
178 | | * FeatureSet is changed. |
179 | | * @param FeatureSet pico-features to be normalized |
180 | | */ |
181 | 504 | void NormalizePicoX(FEATURE_SET FeatureSet) { |
182 | 504 | int i; |
183 | 504 | FEATURE Feature; |
184 | 504 | float Origin = 0.0; |
185 | | |
186 | 27.7k | for (i = 0; i < FeatureSet->NumFeatures; i++) { |
187 | 27.2k | Feature = FeatureSet->Features[i]; |
188 | 27.2k | Origin += Feature->Params[PicoFeatX]; |
189 | 27.2k | } |
190 | 504 | Origin /= FeatureSet->NumFeatures; |
191 | | |
192 | 27.7k | for (i = 0; i < FeatureSet->NumFeatures; i++) { |
193 | 27.2k | Feature = FeatureSet->Features[i]; |
194 | 27.2k | Feature->Params[PicoFeatX] -= Origin; |
195 | 27.2k | } |
196 | 504 | } /* NormalizePicoX */ |
197 | | |
198 | | /*---------------------------------------------------------------------------*/ |
199 | | /** |
200 | | * @param blob blob to extract features from |
201 | | * @param fx_info |
202 | | * @return Integer character-normalized features for blob. |
203 | | */ |
204 | 0 | FEATURE_SET Classify::ExtractIntCNFeatures(const TBLOB &blob, const INT_FX_RESULT_STRUCT &fx_info) { |
205 | 0 | INT_FX_RESULT_STRUCT local_fx_info(fx_info); |
206 | 0 | std::vector<INT_FEATURE_STRUCT> bl_features; |
207 | 0 | tesseract::TrainingSample *sample = |
208 | 0 | tesseract::BlobToTrainingSample(blob, false, &local_fx_info, &bl_features); |
209 | 0 | if (sample == nullptr) { |
210 | 0 | return nullptr; |
211 | 0 | } |
212 | | |
213 | 0 | uint32_t num_features = sample->num_features(); |
214 | 0 | const INT_FEATURE_STRUCT *features = sample->features(); |
215 | 0 | auto feature_set = new FEATURE_SET_STRUCT(num_features); |
216 | 0 | for (uint32_t f = 0; f < num_features; ++f) { |
217 | 0 | auto feature = new FEATURE_STRUCT(&IntFeatDesc); |
218 | 0 | feature->Params[IntX] = features[f].X; |
219 | 0 | feature->Params[IntY] = features[f].Y; |
220 | 0 | feature->Params[IntDir] = features[f].Theta; |
221 | 0 | AddFeature(feature_set, feature); |
222 | 0 | } |
223 | 0 | delete sample; |
224 | |
|
225 | 0 | return feature_set; |
226 | 0 | } /* ExtractIntCNFeatures */ |
227 | | |
228 | | /*---------------------------------------------------------------------------*/ |
229 | | /** |
230 | | * @param blob blob to extract features from |
231 | | * @param fx_info |
232 | | * @return Geometric (top/bottom/width) features for blob. |
233 | | */ |
234 | | FEATURE_SET Classify::ExtractIntGeoFeatures(const TBLOB &blob, |
235 | 0 | const INT_FX_RESULT_STRUCT &fx_info) { |
236 | 0 | INT_FX_RESULT_STRUCT local_fx_info(fx_info); |
237 | 0 | std::vector<INT_FEATURE_STRUCT> bl_features; |
238 | 0 | tesseract::TrainingSample *sample = |
239 | 0 | tesseract::BlobToTrainingSample(blob, false, &local_fx_info, &bl_features); |
240 | 0 | if (sample == nullptr) { |
241 | 0 | return nullptr; |
242 | 0 | } |
243 | | |
244 | 0 | auto feature_set = new FEATURE_SET_STRUCT(1); |
245 | 0 | auto feature = new FEATURE_STRUCT(&IntFeatDesc); |
246 | |
|
247 | 0 | feature->Params[GeoBottom] = sample->geo_feature(GeoBottom); |
248 | 0 | feature->Params[GeoTop] = sample->geo_feature(GeoTop); |
249 | 0 | feature->Params[GeoWidth] = sample->geo_feature(GeoWidth); |
250 | 0 | AddFeature(feature_set, feature); |
251 | 0 | delete sample; |
252 | |
|
253 | 0 | return feature_set; |
254 | 0 | } /* ExtractIntGeoFeatures */ |
255 | | |
256 | | } // namespace tesseract. |