Coverage Report

Created: 2025-07-23 07:12

/src/tesseract/src/classify/trainingsample.h
Line
Count
Source (jump to first uncovered line)
1
// Copyright 2010 Google Inc. All Rights Reserved.
2
// Author: rays@google.com (Ray Smith)
3
//
4
// Licensed under the Apache License, Version 2.0 (the "License");
5
// you may not use this file except in compliance with the License.
6
// You may obtain a copy of the License at
7
// http://www.apache.org/licenses/LICENSE-2.0
8
// Unless required by applicable law or agreed to in writing, software
9
// distributed under the License is distributed on an "AS IS" BASIS,
10
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
// See the License for the specific language governing permissions and
12
// limitations under the License.
13
//
14
///////////////////////////////////////////////////////////////////////
15
16
#ifndef TESSERACT_TRAINING_TRAININGSAMPLE_H_
17
#define TESSERACT_TRAINING_TRAININGSAMPLE_H_
18
19
#include "elst.h"
20
#include "featdefs.h"
21
#include "intfx.h"
22
#include "intmatcher.h"
23
#include "matrix.h"
24
#include "mf.h"
25
#include "mfdefs.h"
26
#include "picofeat.h"
27
#include "shapetable.h"
28
#include "unicharset.h"
29
30
struct Pix;
31
32
namespace tesseract {
33
34
class IntFeatureMap;
35
class IntFeatureSpace;
36
class ShapeTable;
37
38
// Number of elements of cn_feature_.
39
static const int kNumCNParams = 4;
40
// Number of ways to shift the features when randomizing.
41
static const int kSampleYShiftSize = 5;
42
// Number of ways to scale the features when randomizing.
43
static const int kSampleScaleSize = 3;
44
// Total number of different ways to manipulate the features when randomizing.
45
// The first and last combinations are removed to avoid an excessive
46
// top movement (first) and an identity transformation (last).
47
// WARNING: To avoid patterned duplication of samples, be sure to keep
48
// kSampleRandomSize prime!
49
// Eg with current values (kSampleYShiftSize = 5 and TkSampleScaleSize = 3)
50
// kSampleRandomSize is 13, which is prime.
51
static const int kSampleRandomSize = kSampleYShiftSize * kSampleScaleSize - 2;
52
// ASSERT_IS_PRIME(kSampleRandomSize) !!
53
54
class TESS_API TrainingSample : public ELIST<TrainingSample>::LINK {
55
public:
56
  TrainingSample()
57
1.93M
      : class_id_(INVALID_UNICHAR_ID)
58
1.93M
      , font_id_(0)
59
1.93M
      , page_num_(0)
60
1.93M
      , num_features_(0)
61
1.93M
      , num_micro_features_(0)
62
1.93M
      , outline_length_(0)
63
1.93M
      , features_(nullptr)
64
1.93M
      , micro_features_(nullptr)
65
1.93M
      , weight_(1.0)
66
1.93M
      , max_dist_(0.0)
67
1.93M
      , sample_index_(0)
68
1.93M
      , features_are_indexed_(false)
69
1.93M
      , features_are_mapped_(false)
70
1.93M
      , is_error_(false) {}
71
  ~TrainingSample();
72
73
  // Saves the given features into a TrainingSample. The features are copied,
74
  // so may be deleted afterwards. Delete the return value after use.
75
  static TrainingSample *CopyFromFeatures(const INT_FX_RESULT_STRUCT &fx_info,
76
                                          const TBOX &bounding_box,
77
                                          const INT_FEATURE_STRUCT *features, int num_features);
78
  // Returns the cn_feature as a FEATURE_STRUCT* needed by cntraining.
79
  FEATURE_STRUCT *GetCNFeature() const;
80
  // Constructs and returns a copy "randomized" by the method given by
81
  // the randomizer index. If index is out of [0, kSampleRandomSize) then
82
  // an exact copy is returned.
83
  TrainingSample *RandomizedCopy(int index) const;
84
  // Constructs and returns an exact copy.
85
  TrainingSample *Copy() const;
86
87
  // WARNING! Serialize/DeSerialize do not save/restore the "cache" data
88
  // members, which is mostly the mapped features, and the weight.
89
  // It is assumed these can all be reconstructed from what is saved.
90
  // Writes to the given file. Returns false in case of error.
91
  bool Serialize(FILE *fp) const;
92
  // Creates from the given file. Returns nullptr in case of error.
93
  // If swap is true, assumes a big/little-endian swap is needed.
94
  static TrainingSample *DeSerializeCreate(bool swap, FILE *fp);
95
  // Reads from the given file. Returns false in case of error.
96
  // If swap is true, assumes a big/little-endian swap is needed.
97
  bool DeSerialize(bool swap, FILE *fp);
98
99
  // Extracts the needed information from the CHAR_DESC_STRUCT.
100
  void ExtractCharDesc(int feature_type, int micro_type, int cn_type, int geo_type,
101
                       CHAR_DESC_STRUCT *char_desc);
102
103
  // Sets the mapped_features_ from the features_ using the provided
104
  // feature_space to the indexed versions of the features.
105
  void IndexFeatures(const IntFeatureSpace &feature_space);
106
107
  // Returns a pix representing the sample. (Int features only.)
108
  Image RenderToPix(const UNICHARSET *unicharset) const;
109
  // Displays the features in the given window with the given color.
110
  void DisplayFeatures(ScrollView::Color color, ScrollView *window) const;
111
112
  // Returns a pix of the original sample image. The pix is padded all round
113
  // by padding wherever possible.
114
  // The returned Pix must be pixDestroyed after use.
115
  // If the input page_pix is nullptr, nullptr is returned.
116
  Image GetSamplePix(int padding, Image page_pix) const;
117
118
  // Accessors.
119
0
  UNICHAR_ID class_id() const {
120
0
    return class_id_;
121
0
  }
122
0
  void set_class_id(int id) {
123
0
    class_id_ = id;
124
0
  }
125
0
  int font_id() const {
126
0
    return font_id_;
127
0
  }
128
0
  void set_font_id(int id) {
129
0
    font_id_ = id;
130
0
  }
131
0
  int page_num() const {
132
0
    return page_num_;
133
0
  }
134
0
  void set_page_num(int page) {
135
0
    page_num_ = page;
136
0
  }
137
0
  const TBOX &bounding_box() const {
138
0
    return bounding_box_;
139
0
  }
140
1.93M
  void set_bounding_box(const TBOX &box) {
141
1.93M
    bounding_box_ = box;
142
1.93M
  }
143
3.86M
  uint32_t num_features() const {
144
3.86M
    return num_features_;
145
3.86M
  }
146
3.86M
  const INT_FEATURE_STRUCT *features() const {
147
3.86M
    return features_;
148
3.86M
  }
149
0
  uint32_t num_micro_features() const {
150
0
    return num_micro_features_;
151
0
  }
152
0
  const MicroFeature *micro_features() const {
153
0
    return micro_features_;
154
0
  }
155
1.93M
  int outline_length() const {
156
1.93M
    return outline_length_;
157
1.93M
  }
158
0
  float cn_feature(int index) const {
159
0
    return cn_feature_[index];
160
0
  }
161
7.73M
  int geo_feature(int index) const {
162
7.73M
    return geo_feature_[index];
163
7.73M
  }
164
0
  double weight() const {
165
0
    return weight_;
166
0
  }
167
0
  void set_weight(double value) {
168
0
    weight_ = value;
169
0
  }
170
0
  double max_dist() const {
171
0
    return max_dist_;
172
0
  }
173
0
  void set_max_dist(double value) {
174
0
    max_dist_ = value;
175
0
  }
176
0
  int sample_index() const {
177
0
    return sample_index_;
178
0
  }
179
0
  void set_sample_index(int value) {
180
0
    sample_index_ = value;
181
0
  }
182
0
  bool features_are_mapped() const {
183
0
    return features_are_mapped_;
184
0
  }
185
0
  const std::vector<int> &mapped_features() const {
186
0
    ASSERT_HOST(features_are_mapped_);
187
0
    return mapped_features_;
188
0
  }
189
0
  const std::vector<int> &indexed_features() const {
190
0
    ASSERT_HOST(features_are_indexed_);
191
0
    return mapped_features_;
192
0
  }
193
0
  bool is_error() const {
194
0
    return is_error_;
195
0
  }
196
0
  void set_is_error(bool value) {
197
0
    is_error_ = value;
198
0
  }
199
200
private:
201
  // Unichar id that this sample represents. There obviously must be a
202
  // reference UNICHARSET somewhere. Usually in TrainingSampleSet.
203
  UNICHAR_ID class_id_;
204
  // Font id in which this sample was printed. Refers to a fontinfo_table_ in
205
  // MasterTrainer.
206
  int font_id_;
207
  // Number of page that the sample came from.
208
  int page_num_;
209
  // Bounding box of sample in original image.
210
  TBOX bounding_box_;
211
  // Number of INT_FEATURE_STRUCT in features_ array.
212
  uint32_t num_features_;
213
  // Number of MicroFeature in micro_features_ array.
214
  uint32_t num_micro_features_;
215
  // Total length of outline in the baseline normalized coordinate space.
216
  // See comment in WERD_RES class definition for a discussion of coordinate
217
  // spaces.
218
  int outline_length_;
219
  // Array of features.
220
  INT_FEATURE_STRUCT *features_;
221
  // Array of features.
222
  MicroFeature *micro_features_;
223
  // The one and only CN feature. Indexed by NORM_PARAM_NAME enum.
224
  float cn_feature_[kNumCNParams];
225
  // The one and only geometric feature. (Aims at replacing cn_feature_).
226
  // Indexed by GeoParams enum in picofeat.h
227
  int geo_feature_[GeoCount];
228
229
  // Non-serialized cache data.
230
  // Weight used for boosting training.
231
  double weight_;
232
  // Maximum distance to other samples of same class/font used in computing
233
  // the canonical sample.
234
  double max_dist_;
235
  // Global index of this sample.
236
  int sample_index_;
237
238
public:
239
  // both are used in training tools
240
  // hide after refactoring
241
242
  // Indexed/mapped features, as indicated by the bools below.
243
  std::vector<int> mapped_features_;
244
  bool features_are_indexed_;
245
  bool features_are_mapped_;
246
247
private:
248
  // True if the last classification was an error by the current definition.
249
  bool is_error_;
250
251
  // Randomizing factors.
252
  static const int kYShiftValues[kSampleYShiftSize];
253
  static const double kScaleValues[kSampleScaleSize];
254
};
255
256
ELISTIZEH(TrainingSample)
257
258
} // namespace tesseract
259
260
#endif // TESSERACT_TRAINING_TRAININGSAMPLE_H_