/src/tesseract/src/classify/trainingsample.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright 2010 Google Inc. All Rights Reserved. |
2 | | // Author: rays@google.com (Ray Smith) |
3 | | // |
4 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
5 | | // you may not use this file except in compliance with the License. |
6 | | // You may obtain a copy of the License at |
7 | | // http://www.apache.org/licenses/LICENSE-2.0 |
8 | | // Unless required by applicable law or agreed to in writing, software |
9 | | // distributed under the License is distributed on an "AS IS" BASIS, |
10 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
11 | | // See the License for the specific language governing permissions and |
12 | | // limitations under the License. |
13 | | // |
14 | | /////////////////////////////////////////////////////////////////////// |
15 | | |
16 | | #define _USE_MATH_DEFINES // for M_PI |
17 | | // Include automatically generated configuration file if running autoconf. |
18 | | #ifdef HAVE_CONFIG_H |
19 | | # include "config_auto.h" |
20 | | #endif |
21 | | |
22 | | #include "trainingsample.h" |
23 | | |
24 | | #include "helpers.h" |
25 | | #include "intfeaturespace.h" |
26 | | #include "normfeat.h" |
27 | | #include "shapetable.h" |
28 | | |
29 | | #include <allheaders.h> |
30 | | |
31 | | #include <cmath> // for M_PI |
32 | | |
33 | | namespace tesseract { |
34 | | |
35 | | // Center of randomizing operations. |
36 | | const int kRandomizingCenter = 128; |
37 | | |
38 | | // Randomizing factors. |
39 | | const int TrainingSample::kYShiftValues[kSampleYShiftSize] = {6, 3, -3, -6, 0}; |
40 | | const double TrainingSample::kScaleValues[kSampleScaleSize] = {1.0625, 0.9375, 1.0}; |
41 | | |
42 | 1.96M | TrainingSample::~TrainingSample() { |
43 | 1.96M | delete[] features_; |
44 | 1.96M | delete[] micro_features_; |
45 | 1.96M | } |
46 | | |
47 | | // WARNING! Serialize/DeSerialize do not save/restore the "cache" data |
48 | | // members, which is mostly the mapped features, and the weight. |
49 | | // It is assumed these can all be reconstructed from what is saved. |
50 | | // Writes to the given file. Returns false in case of error. |
51 | 0 | bool TrainingSample::Serialize(FILE *fp) const { |
52 | 0 | if (fwrite(&class_id_, sizeof(class_id_), 1, fp) != 1) { |
53 | 0 | return false; |
54 | 0 | } |
55 | 0 | if (fwrite(&font_id_, sizeof(font_id_), 1, fp) != 1) { |
56 | 0 | return false; |
57 | 0 | } |
58 | 0 | if (fwrite(&page_num_, sizeof(page_num_), 1, fp) != 1) { |
59 | 0 | return false; |
60 | 0 | } |
61 | 0 | if (!bounding_box_.Serialize(fp)) { |
62 | 0 | return false; |
63 | 0 | } |
64 | 0 | if (fwrite(&num_features_, sizeof(num_features_), 1, fp) != 1) { |
65 | 0 | return false; |
66 | 0 | } |
67 | 0 | if (fwrite(&num_micro_features_, sizeof(num_micro_features_), 1, fp) != 1) { |
68 | 0 | return false; |
69 | 0 | } |
70 | 0 | if (fwrite(&outline_length_, sizeof(outline_length_), 1, fp) != 1) { |
71 | 0 | return false; |
72 | 0 | } |
73 | 0 | if (fwrite(features_, sizeof(*features_), num_features_, fp) != num_features_) { |
74 | 0 | return false; |
75 | 0 | } |
76 | 0 | if (fwrite(micro_features_, sizeof(*micro_features_), num_micro_features_, fp) != |
77 | 0 | num_micro_features_) { |
78 | 0 | return false; |
79 | 0 | } |
80 | 0 | if (fwrite(cn_feature_, sizeof(*cn_feature_), kNumCNParams, fp) != kNumCNParams) { |
81 | 0 | return false; |
82 | 0 | } |
83 | 0 | if (fwrite(geo_feature_, sizeof(*geo_feature_), GeoCount, fp) != GeoCount) { |
84 | 0 | return false; |
85 | 0 | } |
86 | 0 | return true; |
87 | 0 | } |
88 | | |
89 | | // Creates from the given file. Returns nullptr in case of error. |
90 | | // If swap is true, assumes a big/little-endian swap is needed. |
91 | 0 | TrainingSample *TrainingSample::DeSerializeCreate(bool swap, FILE *fp) { |
92 | 0 | auto *sample = new TrainingSample; |
93 | 0 | if (sample->DeSerialize(swap, fp)) { |
94 | 0 | return sample; |
95 | 0 | } |
96 | 0 | delete sample; |
97 | 0 | return nullptr; |
98 | 0 | } |
99 | | |
100 | | // Reads from the given file. Returns false in case of error. |
101 | | // If swap is true, assumes a big/little-endian swap is needed. |
102 | 0 | bool TrainingSample::DeSerialize(bool swap, FILE *fp) { |
103 | 0 | if (fread(&class_id_, sizeof(class_id_), 1, fp) != 1) { |
104 | 0 | return false; |
105 | 0 | } |
106 | 0 | if (fread(&font_id_, sizeof(font_id_), 1, fp) != 1) { |
107 | 0 | return false; |
108 | 0 | } |
109 | 0 | if (fread(&page_num_, sizeof(page_num_), 1, fp) != 1) { |
110 | 0 | return false; |
111 | 0 | } |
112 | 0 | if (!bounding_box_.DeSerialize(swap, fp)) { |
113 | 0 | return false; |
114 | 0 | } |
115 | 0 | if (fread(&num_features_, sizeof(num_features_), 1, fp) != 1) { |
116 | 0 | return false; |
117 | 0 | } |
118 | 0 | if (fread(&num_micro_features_, sizeof(num_micro_features_), 1, fp) != 1) { |
119 | 0 | return false; |
120 | 0 | } |
121 | 0 | if (fread(&outline_length_, sizeof(outline_length_), 1, fp) != 1) { |
122 | 0 | return false; |
123 | 0 | } |
124 | 0 | if (swap) { |
125 | 0 | ReverseN(&class_id_, sizeof(class_id_)); |
126 | 0 | ReverseN(&num_features_, sizeof(num_features_)); |
127 | 0 | ReverseN(&num_micro_features_, sizeof(num_micro_features_)); |
128 | 0 | ReverseN(&outline_length_, sizeof(outline_length_)); |
129 | 0 | } |
130 | | // Arbitrarily limit the number of elements to protect against bad data. |
131 | 0 | if (num_features_ > UINT16_MAX) { |
132 | 0 | return false; |
133 | 0 | } |
134 | 0 | if (num_micro_features_ > UINT16_MAX) { |
135 | 0 | return false; |
136 | 0 | } |
137 | 0 | delete[] features_; |
138 | 0 | features_ = new INT_FEATURE_STRUCT[num_features_]; |
139 | 0 | if (fread(features_, sizeof(*features_), num_features_, fp) != num_features_) { |
140 | 0 | return false; |
141 | 0 | } |
142 | 0 | delete[] micro_features_; |
143 | 0 | micro_features_ = new MicroFeature[num_micro_features_]; |
144 | 0 | if (fread(micro_features_, sizeof(*micro_features_), num_micro_features_, fp) != |
145 | 0 | num_micro_features_) { |
146 | 0 | return false; |
147 | 0 | } |
148 | 0 | if (fread(cn_feature_, sizeof(*cn_feature_), kNumCNParams, fp) != kNumCNParams) { |
149 | 0 | return false; |
150 | 0 | } |
151 | 0 | if (fread(geo_feature_, sizeof(*geo_feature_), GeoCount, fp) != GeoCount) { |
152 | 0 | return false; |
153 | 0 | } |
154 | 0 | return true; |
155 | 0 | } |
156 | | |
157 | | // Saves the given features into a TrainingSample. |
158 | | TrainingSample *TrainingSample::CopyFromFeatures(const INT_FX_RESULT_STRUCT &fx_info, |
159 | | const TBOX &bounding_box, |
160 | | const INT_FEATURE_STRUCT *features, |
161 | 1.96M | int num_features) { |
162 | 1.96M | auto *sample = new TrainingSample; |
163 | 1.96M | sample->num_features_ = num_features; |
164 | 1.96M | sample->features_ = new INT_FEATURE_STRUCT[num_features]; |
165 | 1.96M | sample->outline_length_ = fx_info.Length; |
166 | 1.96M | memcpy(sample->features_, features, num_features * sizeof(features[0])); |
167 | 1.96M | sample->geo_feature_[GeoBottom] = bounding_box.bottom(); |
168 | 1.96M | sample->geo_feature_[GeoTop] = bounding_box.top(); |
169 | 1.96M | sample->geo_feature_[GeoWidth] = bounding_box.width(); |
170 | | |
171 | | // Generate the cn_feature_ from the fx_info. |
172 | 1.96M | sample->cn_feature_[CharNormY] = MF_SCALE_FACTOR * (fx_info.Ymean - kBlnBaselineOffset); |
173 | 1.96M | sample->cn_feature_[CharNormLength] = MF_SCALE_FACTOR * fx_info.Length / LENGTH_COMPRESSION; |
174 | 1.96M | sample->cn_feature_[CharNormRx] = MF_SCALE_FACTOR * fx_info.Rx; |
175 | 1.96M | sample->cn_feature_[CharNormRy] = MF_SCALE_FACTOR * fx_info.Ry; |
176 | | |
177 | 1.96M | sample->features_are_indexed_ = false; |
178 | 1.96M | sample->features_are_mapped_ = false; |
179 | 1.96M | return sample; |
180 | 1.96M | } |
181 | | |
182 | | // Returns the cn_feature as a FEATURE_STRUCT* needed by cntraining. |
183 | 1.96M | FEATURE_STRUCT *TrainingSample::GetCNFeature() const { |
184 | 1.96M | auto feature = new FEATURE_STRUCT(&CharNormDesc); |
185 | 9.81M | for (int i = 0; i < kNumCNParams; ++i) { |
186 | 7.85M | feature->Params[i] = cn_feature_[i]; |
187 | 7.85M | } |
188 | 1.96M | return feature; |
189 | 1.96M | } |
190 | | |
191 | | // Constructs and returns a copy randomized by the method given by |
192 | | // the randomizer index. If index is out of [0, kSampleRandomSize) then |
193 | | // an exact copy is returned. |
194 | 0 | TrainingSample *TrainingSample::RandomizedCopy(int index) const { |
195 | 0 | TrainingSample *sample = Copy(); |
196 | 0 | if (index >= 0 && index < kSampleRandomSize) { |
197 | 0 | ++index; // Remove the first combination. |
198 | 0 | const int yshift = kYShiftValues[index / kSampleScaleSize]; |
199 | 0 | double scaling = kScaleValues[index % kSampleScaleSize]; |
200 | 0 | for (uint32_t i = 0; i < num_features_; ++i) { |
201 | 0 | double result = (features_[i].X - kRandomizingCenter) * scaling; |
202 | 0 | result += kRandomizingCenter; |
203 | 0 | sample->features_[i].X = ClipToRange<int>(result + 0.5, 0, UINT8_MAX); |
204 | 0 | result = (features_[i].Y - kRandomizingCenter) * scaling; |
205 | 0 | result += kRandomizingCenter + yshift; |
206 | 0 | sample->features_[i].Y = ClipToRange<int>(result + 0.5, 0, UINT8_MAX); |
207 | 0 | } |
208 | 0 | } |
209 | 0 | return sample; |
210 | 0 | } |
211 | | |
212 | | // Constructs and returns an exact copy. |
213 | 0 | TrainingSample *TrainingSample::Copy() const { |
214 | 0 | auto *sample = new TrainingSample; |
215 | 0 | sample->class_id_ = class_id_; |
216 | 0 | sample->font_id_ = font_id_; |
217 | 0 | sample->weight_ = weight_; |
218 | 0 | sample->sample_index_ = sample_index_; |
219 | 0 | sample->num_features_ = num_features_; |
220 | 0 | if (num_features_ > 0) { |
221 | 0 | sample->features_ = new INT_FEATURE_STRUCT[num_features_]; |
222 | 0 | memcpy(sample->features_, features_, num_features_ * sizeof(features_[0])); |
223 | 0 | } |
224 | 0 | sample->num_micro_features_ = num_micro_features_; |
225 | 0 | if (num_micro_features_ > 0) { |
226 | 0 | sample->micro_features_ = new MicroFeature[num_micro_features_]; |
227 | 0 | memcpy(sample->micro_features_, micro_features_, |
228 | 0 | num_micro_features_ * sizeof(micro_features_[0])); |
229 | 0 | } |
230 | 0 | memcpy(sample->cn_feature_, cn_feature_, sizeof(*cn_feature_) * kNumCNParams); |
231 | 0 | memcpy(sample->geo_feature_, geo_feature_, sizeof(*geo_feature_) * GeoCount); |
232 | 0 | return sample; |
233 | 0 | } |
234 | | |
235 | | // Extracts the needed information from the CHAR_DESC_STRUCT. |
236 | | void TrainingSample::ExtractCharDesc(int int_feature_type, int micro_type, int cn_type, |
237 | 0 | int geo_type, CHAR_DESC_STRUCT *char_desc) { |
238 | | // Extract the INT features. |
239 | 0 | delete[] features_; |
240 | 0 | FEATURE_SET_STRUCT *char_features = char_desc->FeatureSets[int_feature_type]; |
241 | 0 | if (char_features == nullptr) { |
242 | 0 | tprintf("Error: no features to train on of type %s\n", kIntFeatureType); |
243 | 0 | num_features_ = 0; |
244 | 0 | features_ = nullptr; |
245 | 0 | } else { |
246 | 0 | num_features_ = char_features->NumFeatures; |
247 | 0 | features_ = new INT_FEATURE_STRUCT[num_features_]; |
248 | 0 | for (uint32_t f = 0; f < num_features_; ++f) { |
249 | 0 | features_[f].X = static_cast<uint8_t>(char_features->Features[f]->Params[IntX]); |
250 | 0 | features_[f].Y = static_cast<uint8_t>(char_features->Features[f]->Params[IntY]); |
251 | 0 | features_[f].Theta = static_cast<uint8_t>(char_features->Features[f]->Params[IntDir]); |
252 | 0 | features_[f].CP_misses = 0; |
253 | 0 | } |
254 | 0 | } |
255 | | // Extract the Micro features. |
256 | 0 | delete[] micro_features_; |
257 | 0 | char_features = char_desc->FeatureSets[micro_type]; |
258 | 0 | if (char_features == nullptr) { |
259 | 0 | tprintf("Error: no features to train on of type %s\n", kMicroFeatureType); |
260 | 0 | num_micro_features_ = 0; |
261 | 0 | micro_features_ = nullptr; |
262 | 0 | } else { |
263 | 0 | num_micro_features_ = char_features->NumFeatures; |
264 | 0 | micro_features_ = new MicroFeature[num_micro_features_]; |
265 | 0 | for (uint32_t f = 0; f < num_micro_features_; ++f) { |
266 | 0 | for (int d = 0; d < (int)MicroFeatureParameter::MFCount; ++d) { |
267 | 0 | micro_features_[f][d] = char_features->Features[f]->Params[d]; |
268 | 0 | } |
269 | 0 | } |
270 | 0 | } |
271 | | // Extract the CN feature. |
272 | 0 | char_features = char_desc->FeatureSets[cn_type]; |
273 | 0 | if (char_features == nullptr) { |
274 | 0 | tprintf("Error: no CN feature to train on.\n"); |
275 | 0 | } else { |
276 | 0 | ASSERT_HOST(char_features->NumFeatures == 1); |
277 | 0 | cn_feature_[CharNormY] = char_features->Features[0]->Params[CharNormY]; |
278 | 0 | cn_feature_[CharNormLength] = char_features->Features[0]->Params[CharNormLength]; |
279 | 0 | cn_feature_[CharNormRx] = char_features->Features[0]->Params[CharNormRx]; |
280 | 0 | cn_feature_[CharNormRy] = char_features->Features[0]->Params[CharNormRy]; |
281 | 0 | } |
282 | | // Extract the Geo feature. |
283 | 0 | char_features = char_desc->FeatureSets[geo_type]; |
284 | 0 | if (char_features == nullptr) { |
285 | 0 | tprintf("Error: no Geo feature to train on.\n"); |
286 | 0 | } else { |
287 | 0 | ASSERT_HOST(char_features->NumFeatures == 1); |
288 | 0 | geo_feature_[GeoBottom] = char_features->Features[0]->Params[GeoBottom]; |
289 | 0 | geo_feature_[GeoTop] = char_features->Features[0]->Params[GeoTop]; |
290 | 0 | geo_feature_[GeoWidth] = char_features->Features[0]->Params[GeoWidth]; |
291 | 0 | } |
292 | 0 | features_are_indexed_ = false; |
293 | 0 | features_are_mapped_ = false; |
294 | 0 | } |
295 | | |
296 | | // Sets the mapped_features_ from the features_ using the provided |
297 | | // feature_space to the indexed versions of the features. |
298 | 0 | void TrainingSample::IndexFeatures(const IntFeatureSpace &feature_space) { |
299 | 0 | std::vector<int> indexed_features; |
300 | 0 | feature_space.IndexAndSortFeatures(features_, num_features_, &mapped_features_); |
301 | 0 | features_are_indexed_ = true; |
302 | 0 | features_are_mapped_ = false; |
303 | 0 | } |
304 | | |
305 | | // Returns a pix representing the sample. (Int features only.) |
306 | 0 | Image TrainingSample::RenderToPix(const UNICHARSET *unicharset) const { |
307 | 0 | Image pix = pixCreate(kIntFeatureExtent, kIntFeatureExtent, 1); |
308 | 0 | for (uint32_t f = 0; f < num_features_; ++f) { |
309 | 0 | int start_x = features_[f].X; |
310 | 0 | int start_y = kIntFeatureExtent - features_[f].Y; |
311 | 0 | double dx = cos((features_[f].Theta / 256.0) * 2.0 * M_PI - M_PI); |
312 | 0 | double dy = -sin((features_[f].Theta / 256.0) * 2.0 * M_PI - M_PI); |
313 | 0 | for (int i = 0; i <= 5; ++i) { |
314 | 0 | int x = static_cast<int>(start_x + dx * i); |
315 | 0 | int y = static_cast<int>(start_y + dy * i); |
316 | 0 | if (x >= 0 && x < 256 && y >= 0 && y < 256) { |
317 | 0 | pixSetPixel(pix, x, y, 1); |
318 | 0 | } |
319 | 0 | } |
320 | 0 | } |
321 | 0 | if (unicharset != nullptr) { |
322 | 0 | pixSetText(pix, unicharset->id_to_unichar(class_id_)); |
323 | 0 | } |
324 | 0 | return pix; |
325 | 0 | } |
326 | | |
327 | | #ifndef GRAPHICS_DISABLED |
328 | | |
329 | | // Displays the features in the given window with the given color. |
330 | | void TrainingSample::DisplayFeatures(ScrollView::Color color, ScrollView *window) const { |
331 | | for (uint32_t f = 0; f < num_features_; ++f) { |
332 | | RenderIntFeature(window, &features_[f], color); |
333 | | } |
334 | | } |
335 | | |
336 | | #endif // !GRAPHICS_DISABLED |
337 | | |
338 | | // Returns a pix of the original sample image. The pix is padded all round |
339 | | // by padding wherever possible. |
340 | | // The returned Pix must be pixDestroyed after use. |
341 | | // If the input page_pix is nullptr, nullptr is returned. |
342 | 0 | Image TrainingSample::GetSamplePix(int padding, Image page_pix) const { |
343 | 0 | if (page_pix == nullptr) { |
344 | 0 | return nullptr; |
345 | 0 | } |
346 | 0 | int page_width = pixGetWidth(page_pix); |
347 | 0 | int page_height = pixGetHeight(page_pix); |
348 | 0 | TBOX padded_box = bounding_box(); |
349 | 0 | padded_box.pad(padding, padding); |
350 | | // Clip the padded_box to the limits of the page |
351 | 0 | TBOX page_box(0, 0, page_width, page_height); |
352 | 0 | padded_box &= page_box; |
353 | 0 | Box *box = |
354 | 0 | boxCreate(page_box.left(), page_height - page_box.top(), page_box.width(), page_box.height()); |
355 | 0 | Image sample_pix = pixClipRectangle(page_pix, box, nullptr); |
356 | 0 | boxDestroy(&box); |
357 | 0 | return sample_pix; |
358 | 0 | } |
359 | | |
360 | | } // namespace tesseract |