Coverage Report

Created: 2025-06-13 07:02

/src/tesseract/src/classify/picofeat.cpp
Line
Count
Source (jump to first uncovered line)
1
/******************************************************************************
2
 ** Filename:    picofeat.c
3
 ** Purpose:     Definition of pico-features.
4
 ** Author:      Dan Johnson
5
 **
6
 ** (c) Copyright Hewlett-Packard Company, 1988.
7
 ** Licensed under the Apache License, Version 2.0 (the "License");
8
 ** you may not use this file except in compliance with the License.
9
 ** You may obtain a copy of the License at
10
 ** http://www.apache.org/licenses/LICENSE-2.0
11
 ** Unless required by applicable law or agreed to in writing, software
12
 ** distributed under the License is distributed on an "AS IS" BASIS,
13
 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 ** See the License for the specific language governing permissions and
15
 ** limitations under the License.
16
 ******************************************************************************/
17
18
#include "picofeat.h"
19
20
#include "classify.h"
21
#include "featdefs.h"
22
#include "fpoint.h"
23
#include "mfoutline.h"
24
#include "ocrfeatures.h"
25
#include "params.h"
26
#include "trainingsample.h"
27
28
#include <cmath>
29
#include <cstdio>
30
31
namespace tesseract {
32
33
/*---------------------------------------------------------------------------
34
          Variables
35
----------------------------------------------------------------------------*/
36
37
double_VAR(classify_pico_feature_length, 0.05, "Pico Feature Length");
38
39
/*---------------------------------------------------------------------------
40
          Private Function Prototypes
41
----------------------------------------------------------------------------*/
42
void ConvertSegmentToPicoFeat(FPOINT *Start, FPOINT *End, FEATURE_SET FeatureSet);
43
44
void ConvertToPicoFeatures2(MFOUTLINE Outline, FEATURE_SET FeatureSet);
45
46
void NormalizePicoX(FEATURE_SET FeatureSet);
47
48
/*----------------------------------------------------------------------------
49
              Public Code
50
----------------------------------------------------------------------------*/
51
/*---------------------------------------------------------------------------*/
52
/**
53
 * Operation: Dummy for now.
54
 *
55
 * Globals:
56
 * - classify_norm_method normalization method currently specified
57
 * @param Blob blob to extract pico-features from
58
 * @return Pico-features for Blob.
59
 */
60
504
FEATURE_SET Classify::ExtractPicoFeatures(TBLOB *Blob) {
61
504
  auto FeatureSet = new FEATURE_SET_STRUCT(MAX_PICO_FEATURES);
62
504
  auto Outlines = ConvertBlob(Blob);
63
504
  float XScale, YScale;
64
504
  NormalizeOutlines(Outlines, &XScale, &YScale);
65
504
  auto RemainingOutlines = Outlines;
66
1.15k
  iterate(RemainingOutlines) {
67
1.15k
    auto Outline = static_cast<MFOUTLINE>(RemainingOutlines->first_node());
68
1.15k
    ConvertToPicoFeatures2(Outline, FeatureSet);
69
1.15k
  }
70
504
  if (classify_norm_method == baseline) {
71
504
    NormalizePicoX(FeatureSet);
72
504
  }
73
504
  FreeOutlines(Outlines);
74
504
  return (FeatureSet);
75
76
504
} /* ExtractPicoFeatures */
77
78
/*----------------------------------------------------------------------------
79
              Private Code
80
----------------------------------------------------------------------------*/
81
/*---------------------------------------------------------------------------*/
82
/**
83
 * This routine converts an entire segment of an outline
84
 * into a set of pico features which are added to
85
 * FeatureSet.  The length of the segment is rounded to the
86
 * nearest whole number of pico-features.  The pico-features
87
 * are spaced evenly over the entire segment.
88
 * Results are placed in FeatureSet.
89
 * Globals:
90
 * - classify_pico_feature_length length of a single pico-feature
91
 * @param Start starting point of pico-feature
92
 * @param End ending point of pico-feature
93
 * @param FeatureSet set to add pico-feature to
94
 */
95
5.95k
void ConvertSegmentToPicoFeat(FPOINT *Start, FPOINT *End, FEATURE_SET FeatureSet) {
96
5.95k
  float Angle;
97
5.95k
  float Length;
98
5.95k
  int NumFeatures;
99
5.95k
  FPOINT Center;
100
5.95k
  FPOINT Delta;
101
5.95k
  int i;
102
103
5.95k
  Angle = NormalizedAngleFrom(Start, End, 1.0);
104
5.95k
  Length = DistanceBetween(*Start, *End);
105
5.95k
  NumFeatures = static_cast<int>(floor(Length / classify_pico_feature_length + 0.5));
106
5.95k
  if (NumFeatures < 1) {
107
786
    NumFeatures = 1;
108
786
  }
109
110
  /* compute vector for one pico feature */
111
5.95k
  Delta.x = XDelta(*Start, *End) / NumFeatures;
112
5.95k
  Delta.y = YDelta(*Start, *End) / NumFeatures;
113
114
  /* compute position of first pico feature */
115
5.95k
  Center.x = Start->x + Delta.x / 2.0;
116
5.95k
  Center.y = Start->y + Delta.y / 2.0;
117
118
  /* compute each pico feature in segment and add to feature set */
119
33.1k
  for (i = 0; i < NumFeatures; i++) {
120
27.2k
    auto Feature = new FEATURE_STRUCT(&PicoFeatDesc);
121
27.2k
    Feature->Params[PicoFeatDir] = Angle;
122
27.2k
    Feature->Params[PicoFeatX] = Center.x;
123
27.2k
    Feature->Params[PicoFeatY] = Center.y;
124
27.2k
    AddFeature(FeatureSet, Feature);
125
126
27.2k
    Center.x += Delta.x;
127
27.2k
    Center.y += Delta.y;
128
27.2k
  }
129
5.95k
} /* ConvertSegmentToPicoFeat */
130
131
/*---------------------------------------------------------------------------*/
132
/**
133
 * This routine steps through the specified outline and cuts it
134
 * up into pieces of equal length.  These pieces become the
135
 * desired pico-features.  Each segment in the outline
136
 * is converted into an integral number of pico-features.
137
 * Results are returned in FeatureSet.
138
 *
139
 * Globals:
140
 * - classify_pico_feature_length length of features to be extracted
141
 * @param Outline outline to extract micro-features from
142
 * @param FeatureSet set of features to add pico-features to
143
 */
144
1.15k
void ConvertToPicoFeatures2(MFOUTLINE Outline, FEATURE_SET FeatureSet) {
145
1.15k
  MFOUTLINE Next;
146
1.15k
  MFOUTLINE First;
147
1.15k
  MFOUTLINE Current;
148
149
1.15k
  if (DegenerateOutline(Outline)) {
150
0
    return;
151
0
  }
152
153
1.15k
  First = Outline;
154
1.15k
  Current = First;
155
1.15k
  Next = NextPointAfter(Current);
156
5.95k
  do {
157
    /* note that an edge is hidden if the ending point of the edge is
158
   marked as hidden.  This situation happens because the order of
159
   the outlines is reversed when they are converted from the old
160
   format.  In the old format, a hidden edge is marked by the
161
   starting point for that edge. */
162
5.95k
    if (!(PointAt(Next)->Hidden)) {
163
5.95k
      ConvertSegmentToPicoFeat(&(PointAt(Current)->Point), &(PointAt(Next)->Point), FeatureSet);
164
5.95k
    }
165
166
5.95k
    Current = Next;
167
5.95k
    Next = NextPointAfter(Current);
168
5.95k
  } while (Current != First);
169
170
1.15k
} /* ConvertToPicoFeatures2 */
171
172
/*---------------------------------------------------------------------------*/
173
/**
174
 * This routine computes the average x position over all
175
 * of the pico-features in FeatureSet and then renormalizes
176
 * the pico-features to force this average to be the x origin
177
 * (i.e. x=0).
178
 * FeatureSet is changed.
179
 * @param FeatureSet pico-features to be normalized
180
 */
181
504
void NormalizePicoX(FEATURE_SET FeatureSet) {
182
504
  int i;
183
504
  FEATURE Feature;
184
504
  float Origin = 0.0;
185
186
27.7k
  for (i = 0; i < FeatureSet->NumFeatures; i++) {
187
27.2k
    Feature = FeatureSet->Features[i];
188
27.2k
    Origin += Feature->Params[PicoFeatX];
189
27.2k
  }
190
504
  Origin /= FeatureSet->NumFeatures;
191
192
27.7k
  for (i = 0; i < FeatureSet->NumFeatures; i++) {
193
27.2k
    Feature = FeatureSet->Features[i];
194
27.2k
    Feature->Params[PicoFeatX] -= Origin;
195
27.2k
  }
196
504
} /* NormalizePicoX */
197
198
/*---------------------------------------------------------------------------*/
199
/**
200
 * @param blob blob to extract features from
201
 * @param fx_info
202
 * @return Integer character-normalized features for blob.
203
 */
204
0
FEATURE_SET Classify::ExtractIntCNFeatures(const TBLOB &blob, const INT_FX_RESULT_STRUCT &fx_info) {
205
0
  INT_FX_RESULT_STRUCT local_fx_info(fx_info);
206
0
  std::vector<INT_FEATURE_STRUCT> bl_features;
207
0
  tesseract::TrainingSample *sample =
208
0
      tesseract::BlobToTrainingSample(blob, false, &local_fx_info, &bl_features);
209
0
  if (sample == nullptr) {
210
0
    return nullptr;
211
0
  }
212
213
0
  uint32_t num_features = sample->num_features();
214
0
  const INT_FEATURE_STRUCT *features = sample->features();
215
0
  auto feature_set = new FEATURE_SET_STRUCT(num_features);
216
0
  for (uint32_t f = 0; f < num_features; ++f) {
217
0
    auto feature = new FEATURE_STRUCT(&IntFeatDesc);
218
0
    feature->Params[IntX] = features[f].X;
219
0
    feature->Params[IntY] = features[f].Y;
220
0
    feature->Params[IntDir] = features[f].Theta;
221
0
    AddFeature(feature_set, feature);
222
0
  }
223
0
  delete sample;
224
225
0
  return feature_set;
226
0
} /* ExtractIntCNFeatures */
227
228
/*---------------------------------------------------------------------------*/
229
/**
230
 * @param blob blob to extract features from
231
 * @param fx_info
232
 * @return Geometric (top/bottom/width) features for blob.
233
 */
234
FEATURE_SET Classify::ExtractIntGeoFeatures(const TBLOB &blob,
235
0
                                            const INT_FX_RESULT_STRUCT &fx_info) {
236
0
  INT_FX_RESULT_STRUCT local_fx_info(fx_info);
237
0
  std::vector<INT_FEATURE_STRUCT> bl_features;
238
0
  tesseract::TrainingSample *sample =
239
0
      tesseract::BlobToTrainingSample(blob, false, &local_fx_info, &bl_features);
240
0
  if (sample == nullptr) {
241
0
    return nullptr;
242
0
  }
243
244
0
  auto feature_set = new FEATURE_SET_STRUCT(1);
245
0
  auto feature = new FEATURE_STRUCT(&IntFeatDesc);
246
247
0
  feature->Params[GeoBottom] = sample->geo_feature(GeoBottom);
248
0
  feature->Params[GeoTop] = sample->geo_feature(GeoTop);
249
0
  feature->Params[GeoWidth] = sample->geo_feature(GeoWidth);
250
0
  AddFeature(feature_set, feature);
251
0
  delete sample;
252
253
0
  return feature_set;
254
0
} /* ExtractIntGeoFeatures */
255
256
} // namespace tesseract.