/src/tesseract/src/classify/trainingsample.h
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright 2010 Google Inc. All Rights Reserved. |
2 | | // Author: rays@google.com (Ray Smith) |
3 | | // |
4 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
5 | | // you may not use this file except in compliance with the License. |
6 | | // You may obtain a copy of the License at |
7 | | // http://www.apache.org/licenses/LICENSE-2.0 |
8 | | // Unless required by applicable law or agreed to in writing, software |
9 | | // distributed under the License is distributed on an "AS IS" BASIS, |
10 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
11 | | // See the License for the specific language governing permissions and |
12 | | // limitations under the License. |
13 | | // |
14 | | /////////////////////////////////////////////////////////////////////// |
15 | | |
16 | | #ifndef TESSERACT_TRAINING_TRAININGSAMPLE_H_ |
17 | | #define TESSERACT_TRAINING_TRAININGSAMPLE_H_ |
18 | | |
19 | | #include "elst.h" |
20 | | #include "featdefs.h" |
21 | | #include "intfx.h" |
22 | | #include "intmatcher.h" |
23 | | #include "matrix.h" |
24 | | #include "mf.h" |
25 | | #include "mfdefs.h" |
26 | | #include "picofeat.h" |
27 | | #include "shapetable.h" |
28 | | #include "unicharset.h" |
29 | | |
30 | | struct Pix; |
31 | | |
32 | | namespace tesseract { |
33 | | |
34 | | class IntFeatureMap; |
35 | | class IntFeatureSpace; |
36 | | class ShapeTable; |
37 | | |
38 | | // Number of elements of cn_feature_. |
39 | | static const int kNumCNParams = 4; |
40 | | // Number of ways to shift the features when randomizing. |
41 | | static const int kSampleYShiftSize = 5; |
42 | | // Number of ways to scale the features when randomizing. |
43 | | static const int kSampleScaleSize = 3; |
44 | | // Total number of different ways to manipulate the features when randomizing. |
45 | | // The first and last combinations are removed to avoid an excessive |
46 | | // top movement (first) and an identity transformation (last). |
47 | | // WARNING: To avoid patterned duplication of samples, be sure to keep |
48 | | // kSampleRandomSize prime! |
49 | | // Eg with current values (kSampleYShiftSize = 5 and TkSampleScaleSize = 3) |
50 | | // kSampleRandomSize is 13, which is prime. |
51 | | static const int kSampleRandomSize = kSampleYShiftSize * kSampleScaleSize - 2; |
52 | | // ASSERT_IS_PRIME(kSampleRandomSize) !! |
53 | | |
54 | | class TESS_API TrainingSample : public ELIST<TrainingSample>::LINK { |
55 | | public: |
56 | | TrainingSample() |
57 | 1.93M | : class_id_(INVALID_UNICHAR_ID) |
58 | 1.93M | , font_id_(0) |
59 | 1.93M | , page_num_(0) |
60 | 1.93M | , num_features_(0) |
61 | 1.93M | , num_micro_features_(0) |
62 | 1.93M | , outline_length_(0) |
63 | 1.93M | , features_(nullptr) |
64 | 1.93M | , micro_features_(nullptr) |
65 | 1.93M | , weight_(1.0) |
66 | 1.93M | , max_dist_(0.0) |
67 | 1.93M | , sample_index_(0) |
68 | 1.93M | , features_are_indexed_(false) |
69 | 1.93M | , features_are_mapped_(false) |
70 | 1.93M | , is_error_(false) {} |
71 | | ~TrainingSample(); |
72 | | |
73 | | // Saves the given features into a TrainingSample. The features are copied, |
74 | | // so may be deleted afterwards. Delete the return value after use. |
75 | | static TrainingSample *CopyFromFeatures(const INT_FX_RESULT_STRUCT &fx_info, |
76 | | const TBOX &bounding_box, |
77 | | const INT_FEATURE_STRUCT *features, int num_features); |
78 | | // Returns the cn_feature as a FEATURE_STRUCT* needed by cntraining. |
79 | | FEATURE_STRUCT *GetCNFeature() const; |
80 | | // Constructs and returns a copy "randomized" by the method given by |
81 | | // the randomizer index. If index is out of [0, kSampleRandomSize) then |
82 | | // an exact copy is returned. |
83 | | TrainingSample *RandomizedCopy(int index) const; |
84 | | // Constructs and returns an exact copy. |
85 | | TrainingSample *Copy() const; |
86 | | |
87 | | // WARNING! Serialize/DeSerialize do not save/restore the "cache" data |
88 | | // members, which is mostly the mapped features, and the weight. |
89 | | // It is assumed these can all be reconstructed from what is saved. |
90 | | // Writes to the given file. Returns false in case of error. |
91 | | bool Serialize(FILE *fp) const; |
92 | | // Creates from the given file. Returns nullptr in case of error. |
93 | | // If swap is true, assumes a big/little-endian swap is needed. |
94 | | static TrainingSample *DeSerializeCreate(bool swap, FILE *fp); |
95 | | // Reads from the given file. Returns false in case of error. |
96 | | // If swap is true, assumes a big/little-endian swap is needed. |
97 | | bool DeSerialize(bool swap, FILE *fp); |
98 | | |
99 | | // Extracts the needed information from the CHAR_DESC_STRUCT. |
100 | | void ExtractCharDesc(int feature_type, int micro_type, int cn_type, int geo_type, |
101 | | CHAR_DESC_STRUCT *char_desc); |
102 | | |
103 | | // Sets the mapped_features_ from the features_ using the provided |
104 | | // feature_space to the indexed versions of the features. |
105 | | void IndexFeatures(const IntFeatureSpace &feature_space); |
106 | | |
107 | | // Returns a pix representing the sample. (Int features only.) |
108 | | Image RenderToPix(const UNICHARSET *unicharset) const; |
109 | | // Displays the features in the given window with the given color. |
110 | | void DisplayFeatures(ScrollView::Color color, ScrollView *window) const; |
111 | | |
112 | | // Returns a pix of the original sample image. The pix is padded all round |
113 | | // by padding wherever possible. |
114 | | // The returned Pix must be pixDestroyed after use. |
115 | | // If the input page_pix is nullptr, nullptr is returned. |
116 | | Image GetSamplePix(int padding, Image page_pix) const; |
117 | | |
118 | | // Accessors. |
119 | 0 | UNICHAR_ID class_id() const { |
120 | 0 | return class_id_; |
121 | 0 | } |
122 | 0 | void set_class_id(int id) { |
123 | 0 | class_id_ = id; |
124 | 0 | } |
125 | 0 | int font_id() const { |
126 | 0 | return font_id_; |
127 | 0 | } |
128 | 0 | void set_font_id(int id) { |
129 | 0 | font_id_ = id; |
130 | 0 | } |
131 | 0 | int page_num() const { |
132 | 0 | return page_num_; |
133 | 0 | } |
134 | 0 | void set_page_num(int page) { |
135 | 0 | page_num_ = page; |
136 | 0 | } |
137 | 0 | const TBOX &bounding_box() const { |
138 | 0 | return bounding_box_; |
139 | 0 | } |
140 | 1.93M | void set_bounding_box(const TBOX &box) { |
141 | 1.93M | bounding_box_ = box; |
142 | 1.93M | } |
143 | 3.86M | uint32_t num_features() const { |
144 | 3.86M | return num_features_; |
145 | 3.86M | } |
146 | 3.86M | const INT_FEATURE_STRUCT *features() const { |
147 | 3.86M | return features_; |
148 | 3.86M | } |
149 | 0 | uint32_t num_micro_features() const { |
150 | 0 | return num_micro_features_; |
151 | 0 | } |
152 | 0 | const MicroFeature *micro_features() const { |
153 | 0 | return micro_features_; |
154 | 0 | } |
155 | 1.93M | int outline_length() const { |
156 | 1.93M | return outline_length_; |
157 | 1.93M | } |
158 | 0 | float cn_feature(int index) const { |
159 | 0 | return cn_feature_[index]; |
160 | 0 | } |
161 | 7.73M | int geo_feature(int index) const { |
162 | 7.73M | return geo_feature_[index]; |
163 | 7.73M | } |
164 | 0 | double weight() const { |
165 | 0 | return weight_; |
166 | 0 | } |
167 | 0 | void set_weight(double value) { |
168 | 0 | weight_ = value; |
169 | 0 | } |
170 | 0 | double max_dist() const { |
171 | 0 | return max_dist_; |
172 | 0 | } |
173 | 0 | void set_max_dist(double value) { |
174 | 0 | max_dist_ = value; |
175 | 0 | } |
176 | 0 | int sample_index() const { |
177 | 0 | return sample_index_; |
178 | 0 | } |
179 | 0 | void set_sample_index(int value) { |
180 | 0 | sample_index_ = value; |
181 | 0 | } |
182 | 0 | bool features_are_mapped() const { |
183 | 0 | return features_are_mapped_; |
184 | 0 | } |
185 | 0 | const std::vector<int> &mapped_features() const { |
186 | 0 | ASSERT_HOST(features_are_mapped_); |
187 | 0 | return mapped_features_; |
188 | 0 | } |
189 | 0 | const std::vector<int> &indexed_features() const { |
190 | 0 | ASSERT_HOST(features_are_indexed_); |
191 | 0 | return mapped_features_; |
192 | 0 | } |
193 | 0 | bool is_error() const { |
194 | 0 | return is_error_; |
195 | 0 | } |
196 | 0 | void set_is_error(bool value) { |
197 | 0 | is_error_ = value; |
198 | 0 | } |
199 | | |
200 | | private: |
201 | | // Unichar id that this sample represents. There obviously must be a |
202 | | // reference UNICHARSET somewhere. Usually in TrainingSampleSet. |
203 | | UNICHAR_ID class_id_; |
204 | | // Font id in which this sample was printed. Refers to a fontinfo_table_ in |
205 | | // MasterTrainer. |
206 | | int font_id_; |
207 | | // Number of page that the sample came from. |
208 | | int page_num_; |
209 | | // Bounding box of sample in original image. |
210 | | TBOX bounding_box_; |
211 | | // Number of INT_FEATURE_STRUCT in features_ array. |
212 | | uint32_t num_features_; |
213 | | // Number of MicroFeature in micro_features_ array. |
214 | | uint32_t num_micro_features_; |
215 | | // Total length of outline in the baseline normalized coordinate space. |
216 | | // See comment in WERD_RES class definition for a discussion of coordinate |
217 | | // spaces. |
218 | | int outline_length_; |
219 | | // Array of features. |
220 | | INT_FEATURE_STRUCT *features_; |
221 | | // Array of features. |
222 | | MicroFeature *micro_features_; |
223 | | // The one and only CN feature. Indexed by NORM_PARAM_NAME enum. |
224 | | float cn_feature_[kNumCNParams]; |
225 | | // The one and only geometric feature. (Aims at replacing cn_feature_). |
226 | | // Indexed by GeoParams enum in picofeat.h |
227 | | int geo_feature_[GeoCount]; |
228 | | |
229 | | // Non-serialized cache data. |
230 | | // Weight used for boosting training. |
231 | | double weight_; |
232 | | // Maximum distance to other samples of same class/font used in computing |
233 | | // the canonical sample. |
234 | | double max_dist_; |
235 | | // Global index of this sample. |
236 | | int sample_index_; |
237 | | |
238 | | public: |
239 | | // both are used in training tools |
240 | | // hide after refactoring |
241 | | |
242 | | // Indexed/mapped features, as indicated by the bools below. |
243 | | std::vector<int> mapped_features_; |
244 | | bool features_are_indexed_; |
245 | | bool features_are_mapped_; |
246 | | |
247 | | private: |
248 | | // True if the last classification was an error by the current definition. |
249 | | bool is_error_; |
250 | | |
251 | | // Randomizing factors. |
252 | | static const int kYShiftValues[kSampleYShiftSize]; |
253 | | static const double kScaleValues[kSampleScaleSize]; |
254 | | }; |
255 | | |
256 | | ELISTIZEH(TrainingSample) |
257 | | |
258 | | } // namespace tesseract |
259 | | |
260 | | #endif // TESSERACT_TRAINING_TRAININGSAMPLE_H_ |