Coverage Report

Created: 2024-02-28 06:46

/src/tesseract/src/classify/clusttool.cpp
Line
Count
Source (jump to first uncovered line)
1
/******************************************************************************
2
 ** Filename: clusttool.cpp
3
 ** Purpose:  Misc. tools for use with the clustering routines
4
 ** Author:   Dan Johnson
5
 **
6
 ** (c) Copyright Hewlett-Packard Company, 1988.
7
 ** Licensed under the Apache License, Version 2.0 (the "License");
8
 ** you may not use this file except in compliance with the License.
9
 ** You may obtain a copy of the License at
10
 ** http://www.apache.org/licenses/LICENSE-2.0
11
 ** Unless required by applicable law or agreed to in writing, software
12
 ** distributed under the License is distributed on an "AS IS" BASIS,
13
 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 ** See the License for the specific language governing permissions and
15
 ** limitations under the License.
16
 *****************************************************************************/
17
18
#define _USE_MATH_DEFINES // for M_PI
19
20
#include "clusttool.h"
21
22
#include <cmath>   // for M_PI, std::isnan
23
#include <locale>  // for std::locale::classic
24
#include <sstream> // for std::stringstream
25
26
namespace tesseract {
27
28
//---------------Global Data Definitions and Declarations--------------------
29
456
#define TOKENSIZE 80 ///< max size of tokens read from an input file
30
#define QUOTED_TOKENSIZE "79"
31
#define MAXSAMPLESIZE 65535 ///< max num of dimensions in feature space
32
33
/**
34
 * This routine reads N floats from the specified text file
35
 * and places them into Buffer.  If Buffer is nullptr, a buffer
36
 * is created and passed back to the caller.  If EOF is
37
 * encountered before any floats can be read, nullptr is
38
 * returned.
39
 * @param fp open text file to read floats from
40
 * @param N number of floats to read
41
 * @param Buffer pointer to buffer to place floats into
42
 * @return Pointer to buffer holding floats or nullptr if EOF
43
 * @note Globals: None
44
 */
45
880
static bool ReadNFloats(TFile *fp, uint16_t N, float Buffer[]) {
46
880
  const int kMaxLineSize = 1024;
47
880
  char line[kMaxLineSize];
48
880
  if (fp->FGets(line, kMaxLineSize) == nullptr) {
49
0
    tprintf("Hit EOF in ReadNFloats!\n");
50
0
    return false;
51
0
  }
52
53
880
  std::stringstream stream(line);
54
  // Use "C" locale (needed for float values Buffer[i]).
55
880
  stream.imbue(std::locale::classic());
56
4.40k
  for (uint16_t i = 0; i < N; i++) {
57
3.52k
    float f = NAN;
58
3.52k
    stream >> f;
59
3.52k
    if (std::isnan(f)) {
60
0
      tprintf("Read of %u floats failed!\n", N);
61
0
      return false;
62
0
    }
63
3.52k
    Buffer[i] = f;
64
3.52k
  }
65
880
  return true;
66
880
}
67
68
/**
69
 * This routine writes a text representation of N floats from
70
 * an array to a file.  All of the floats are placed on one line.
71
 * @param File open text file to write N floats to
72
 * @param N number of floats to write
73
 * @param Array array of floats to write
74
 */
75
0
static void WriteNFloats(FILE *File, uint16_t N, float Array[]) {
76
0
  for (int i = 0; i < N; i++) {
77
0
    fprintf(File, " %9.6f", Array[i]);
78
0
  }
79
0
  fprintf(File, "\n");
80
0
}
81
82
/**
83
 * This routine writes to the specified text file a word
84
 * which represents the ProtoStyle.  It does not append
85
 * a carriage return to the end.
86
 * @param File open text file to write prototype style to
87
 * @param ProtoStyle prototype style to write
88
 */
89
0
static void WriteProtoStyle(FILE *File, PROTOSTYLE ProtoStyle) {
90
0
  switch (ProtoStyle) {
91
0
    case spherical:
92
0
      fprintf(File, "spherical");
93
0
      break;
94
0
    case elliptical:
95
0
      fprintf(File, "elliptical");
96
0
      break;
97
0
    case mixed:
98
0
      fprintf(File, "mixed");
99
0
      break;
100
0
    case automatic:
101
0
      fprintf(File, "automatic");
102
0
      break;
103
0
  }
104
0
}
105
106
/**
107
 * This routine reads a single integer from the specified
108
 * file and checks to ensure that it is between 0 and
109
 * MAXSAMPLESIZE.
110
 * @param fp open text file to read sample size from
111
 * @return Sample size
112
 * @note Globals: None
113
 */
114
4
uint16_t ReadSampleSize(TFile *fp) {
115
4
  int SampleSize = 0;
116
117
4
  const int kMaxLineSize = 100;
118
4
  char line[kMaxLineSize];
119
4
  ASSERT_HOST(fp->FGets(line, kMaxLineSize) != nullptr);
120
4
  ASSERT_HOST(sscanf(line, "%d", &SampleSize) == 1);
121
4
  ASSERT_HOST(SampleSize >= 0 && SampleSize <= MAXSAMPLESIZE);
122
4
  return SampleSize;
123
4
}
124
125
/**
126
 * This routine reads textual descriptions of sets of parameters
127
 * which describe the characteristics of feature dimensions.
128
 *
129
 * @param fp open text file to read N parameter descriptions from
130
 * @param N number of parameter descriptions to read
131
 * @return Pointer to an array of parameter descriptors.
132
 * @note Globals: None
133
 */
134
4
PARAM_DESC *ReadParamDesc(TFile *fp, uint16_t N) {
135
4
  auto ParamDesc = new PARAM_DESC[N];
136
20
  for (int i = 0; i < N; i++) {
137
16
    const int kMaxLineSize = TOKENSIZE * 4;
138
16
    char line[kMaxLineSize];
139
16
    ASSERT_HOST(fp->FGets(line, kMaxLineSize) != nullptr);
140
16
    std::istringstream stream(line);
141
    // Use "C" locale (needed for float values Min, Max).
142
16
    stream.imbue(std::locale::classic());
143
16
    std::string linear_token;
144
16
    stream >> linear_token;
145
16
    std::string essential_token;
146
16
    stream >> essential_token;
147
16
    stream >> ParamDesc[i].Min;
148
16
    stream >> ParamDesc[i].Max;
149
16
    ASSERT_HOST(!stream.fail());
150
16
    ParamDesc[i].Circular = (linear_token[0] == 'c');
151
16
    ParamDesc[i].NonEssential = (essential_token[0] != 'e');
152
16
    ParamDesc[i].Range = ParamDesc[i].Max - ParamDesc[i].Min;
153
16
    ParamDesc[i].HalfRange = ParamDesc[i].Range / 2;
154
16
    ParamDesc[i].MidRange = (ParamDesc[i].Max + ParamDesc[i].Min) / 2;
155
16
  }
156
4
  return (ParamDesc);
157
4
}
158
159
/**
160
 * This routine reads a textual description of a prototype from
161
 * the specified file.
162
 *
163
 * @param fp open text file to read prototype from
164
 * @param N number of dimensions used in prototype
165
 * @return List of prototypes
166
 * @note Globals: None
167
 */
168
440
PROTOTYPE *ReadPrototype(TFile *fp, uint16_t N) {
169
440
  char sig_token[TOKENSIZE], shape_token[TOKENSIZE];
170
440
  int SampleCount;
171
440
  int i;
172
173
440
  const int kMaxLineSize = TOKENSIZE * 4;
174
440
  char line[kMaxLineSize];
175
440
  if (fp->FGets(line, kMaxLineSize) == nullptr ||
176
440
      sscanf(line, "%" QUOTED_TOKENSIZE "s %" QUOTED_TOKENSIZE "s %d", sig_token, shape_token,
177
440
             &SampleCount) != 3) {
178
0
    tprintf("Invalid prototype: %s\n", line);
179
0
    return nullptr;
180
0
  }
181
440
  auto Proto = new PROTOTYPE;
182
440
  Proto->Cluster = nullptr;
183
440
  Proto->Significant = (sig_token[0] == 's');
184
185
440
  switch (shape_token[0]) {
186
0
    case 's':
187
0
      Proto->Style = spherical;
188
0
      break;
189
440
    case 'e':
190
440
      Proto->Style = elliptical;
191
440
      break;
192
0
    case 'a':
193
0
      Proto->Style = automatic;
194
0
      break;
195
0
    default:
196
0
      tprintf("Invalid prototype style specification:%s\n", shape_token);
197
0
      Proto->Style = elliptical;
198
440
  }
199
200
440
  ASSERT_HOST(SampleCount >= 0);
201
440
  Proto->NumSamples = SampleCount;
202
203
440
  Proto->Mean.resize(N);
204
440
  ReadNFloats(fp, N, &Proto->Mean[0]);
205
206
440
  switch (Proto->Style) {
207
0
    case spherical:
208
0
      ReadNFloats(fp, 1, &(Proto->Variance.Spherical));
209
0
      Proto->Magnitude.Spherical = 1.0 / sqrt(2.0 * M_PI * Proto->Variance.Spherical);
210
0
      Proto->TotalMagnitude = std::pow(Proto->Magnitude.Spherical, static_cast<float>(N));
211
0
      Proto->LogMagnitude = log(static_cast<double>(Proto->TotalMagnitude));
212
0
      Proto->Weight.Spherical = 1.0 / Proto->Variance.Spherical;
213
0
      Proto->Distrib.clear();
214
0
      break;
215
440
    case elliptical:
216
440
      Proto->Variance.Elliptical = new float[N];
217
440
      ReadNFloats(fp, N, Proto->Variance.Elliptical);
218
440
      Proto->Magnitude.Elliptical = new float[N];
219
440
      Proto->Weight.Elliptical = new float[N];
220
440
      Proto->TotalMagnitude = 1.0;
221
2.20k
      for (i = 0; i < N; i++) {
222
1.76k
        Proto->Magnitude.Elliptical[i] = 1.0f / sqrt(2.0f * M_PI * Proto->Variance.Elliptical[i]);
223
1.76k
        Proto->Weight.Elliptical[i] = 1.0f / Proto->Variance.Elliptical[i];
224
1.76k
        Proto->TotalMagnitude *= Proto->Magnitude.Elliptical[i];
225
1.76k
      }
226
440
      Proto->LogMagnitude = log(static_cast<double>(Proto->TotalMagnitude));
227
440
      Proto->Distrib.clear();
228
440
      break;
229
0
    default:
230
0
      delete Proto;
231
0
      tprintf("Invalid prototype style\n");
232
0
      return nullptr;
233
440
  }
234
440
  return Proto;
235
440
}
236
237
/**
238
 * This routine writes an array of dimension descriptors to
239
 * the specified text file.
240
 * @param File open text file to write param descriptors to
241
 * @param N number of param descriptors to write
242
 * @param ParamDesc array of param descriptors to write
243
 */
244
0
void WriteParamDesc(FILE *File, uint16_t N, const PARAM_DESC ParamDesc[]) {
245
0
  int i;
246
247
0
  for (i = 0; i < N; i++) {
248
0
    if (ParamDesc[i].Circular) {
249
0
      fprintf(File, "circular ");
250
0
    } else {
251
0
      fprintf(File, "linear   ");
252
0
    }
253
254
0
    if (ParamDesc[i].NonEssential) {
255
0
      fprintf(File, "non-essential ");
256
0
    } else {
257
0
      fprintf(File, "essential     ");
258
0
    }
259
260
0
    fprintf(File, "%10.6f %10.6f\n", ParamDesc[i].Min, ParamDesc[i].Max);
261
0
  }
262
0
}
263
264
/**
265
 * This routine writes a textual description of a prototype
266
 * to the specified text file.
267
 * @param File open text file to write prototype to
268
 * @param N number of dimensions in feature space
269
 * @param Proto prototype to write out
270
 */
271
0
void WritePrototype(FILE *File, uint16_t N, PROTOTYPE *Proto) {
272
0
  int i;
273
274
0
  if (Proto->Significant) {
275
0
    fprintf(File, "significant   ");
276
0
  } else {
277
0
    fprintf(File, "insignificant ");
278
0
  }
279
0
  WriteProtoStyle(File, static_cast<PROTOSTYLE>(Proto->Style));
280
0
  fprintf(File, "%6d\n\t", Proto->NumSamples);
281
0
  WriteNFloats(File, N, &Proto->Mean[0]);
282
0
  fprintf(File, "\t");
283
284
0
  switch (Proto->Style) {
285
0
    case spherical:
286
0
      WriteNFloats(File, 1, &(Proto->Variance.Spherical));
287
0
      break;
288
0
    case elliptical:
289
0
      WriteNFloats(File, N, Proto->Variance.Elliptical);
290
0
      break;
291
0
    case mixed:
292
0
      for (i = 0; i < N; i++) {
293
0
        switch (Proto->Distrib[i]) {
294
0
          case normal:
295
0
            fprintf(File, " %9s", "normal");
296
0
            break;
297
0
          case uniform:
298
0
            fprintf(File, " %9s", "uniform");
299
0
            break;
300
0
          case D_random:
301
0
            fprintf(File, " %9s", "random");
302
0
            break;
303
0
          case DISTRIBUTION_COUNT:
304
0
            ASSERT_HOST(!"Distribution count not allowed!");
305
0
        }
306
0
      }
307
0
      fprintf(File, "\n\t");
308
0
      WriteNFloats(File, N, Proto->Variance.Elliptical);
309
0
  }
310
0
}
311
312
} // namespace tesseract