Coverage Report

Created: 2025-06-13 07:15

/src/tesseract/src/classify/adaptive.cpp
Line
Count
Source (jump to first uncovered line)
1
/******************************************************************************
2
 ** Filename:    adaptive.c
3
 ** Purpose:     Adaptive matcher.
4
 ** Author:      Dan Johnson
5
 **
6
 ** (c) Copyright Hewlett-Packard Company, 1988.
7
 ** Licensed under the Apache License, Version 2.0 (the "License");
8
 ** you may not use this file except in compliance with the License.
9
 ** You may obtain a copy of the License at
10
 ** http://www.apache.org/licenses/LICENSE-2.0
11
 ** Unless required by applicable law or agreed to in writing, software
12
 ** distributed under the License is distributed on an "AS IS" BASIS,
13
 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 ** See the License for the specific language governing permissions and
15
 ** limitations under the License.
16
 ******************************************************************************/
17
18
#include "adaptive.h"
19
20
#include "classify.h"
21
22
#include <cassert>
23
#include <cstdio>
24
25
namespace tesseract {
26
27
/*----------------------------------------------------------------------------
28
              Public Code
29
----------------------------------------------------------------------------*/
30
/*---------------------------------------------------------------------------*/
31
/**
32
 * This routine adds a new adapted class to an existing
33
 * set of adapted templates.
34
 *
35
 * @param Templates set of templates to add new class to
36
 * @param Class new class to add to templates
37
 * @param ClassId class id to associate with new class
38
 *
39
 * @note Globals: none
40
 */
41
3.15M
void AddAdaptedClass(ADAPT_TEMPLATES_STRUCT *Templates, ADAPT_CLASS_STRUCT *Class, CLASS_ID ClassId) {
42
3.15M
  assert(Templates != nullptr);
43
3.15M
  assert(Class != nullptr);
44
3.15M
  assert(LegalClassId(ClassId));
45
3.15M
  assert(UnusedClassIdIn(Templates->Templates, ClassId));
46
3.15M
  assert(Class->NumPermConfigs == 0);
47
48
3.15M
  auto IntClass = new INT_CLASS_STRUCT(1, 1);
49
3.15M
  AddIntClass(Templates->Templates, ClassId, IntClass);
50
51
3.15M
  assert(Templates->Class[ClassId] == nullptr);
52
3.15M
  Templates->Class[ClassId] = Class;
53
54
3.15M
} /* AddAdaptedClass */
55
56
/*---------------------------------------------------------------------------*/
57
58
204
PERM_CONFIG_STRUCT::~PERM_CONFIG_STRUCT() {
59
204
  delete[] Ambigs;
60
204
}
61
62
ADAPT_CLASS_STRUCT::ADAPT_CLASS_STRUCT() :
63
3.15M
  NumPermConfigs(0),
64
3.15M
  MaxNumTimesSeen(0),
65
3.15M
  PermProtos(NewBitVector(MAX_NUM_PROTOS)),
66
3.15M
  PermConfigs(NewBitVector(MAX_NUM_CONFIGS)),
67
3.15M
  TempProtos(NIL_LIST) {
68
3.15M
  zero_all_bits(PermProtos, WordsInVectorOfSize(MAX_NUM_PROTOS));
69
3.15M
  zero_all_bits(PermConfigs, WordsInVectorOfSize(MAX_NUM_CONFIGS));
70
71
205M
  for (int i = 0; i < MAX_NUM_CONFIGS; i++) {
72
202M
    TempConfigFor(this, i) = nullptr;
73
202M
  }
74
3.15M
}
75
76
3.15M
ADAPT_CLASS_STRUCT::~ADAPT_CLASS_STRUCT() {
77
205M
  for (int i = 0; i < MAX_NUM_CONFIGS; i++) {
78
201M
    if (ConfigIsPermanent(this, i) && PermConfigFor(this, i) != nullptr) {
79
204
      delete PermConfigFor(this, i);
80
201M
    } else if (!ConfigIsPermanent(this, i) && TempConfigFor(this, i) != nullptr) {
81
1.18k
      delete TempConfigFor(this, i);
82
1.18k
    }
83
201M
  }
84
3.15M
  FreeBitVector(PermProtos);
85
3.15M
  FreeBitVector(PermConfigs);
86
3.15M
  auto list = TempProtos;
87
3.16M
  while (list != nullptr) {
88
11.4k
    delete reinterpret_cast<TEMP_PROTO_STRUCT *>(list->node);
89
11.4k
    list = pop(list);
90
11.4k
  }
91
3.15M
}
92
93
/// Constructor for adapted templates.
94
/// Add an empty class for each char in unicharset to the newly created templates.
95
6.57k
ADAPT_TEMPLATES_STRUCT::ADAPT_TEMPLATES_STRUCT(UNICHARSET &unicharset) {
96
6.57k
  Templates = new INT_TEMPLATES_STRUCT;
97
6.57k
  NumPermClasses = 0;
98
6.57k
  NumNonEmptyClasses = 0;
99
100
  /* Insert an empty class for each unichar id in unicharset */
101
215M
  for (unsigned i = 0; i < MAX_NUM_CLASSES; i++) {
102
215M
    Class[i] = nullptr;
103
215M
    if (i < unicharset.size()) {
104
3.15M
      AddAdaptedClass(this, new ADAPT_CLASS_STRUCT, i);
105
3.15M
    }
106
215M
  }
107
6.57k
}
108
109
6.57k
ADAPT_TEMPLATES_STRUCT::~ADAPT_TEMPLATES_STRUCT() {
110
3.16M
  for (unsigned i = 0; i < (Templates)->NumClasses; i++) {
111
3.15M
    delete Class[i];
112
3.15M
  }
113
6.57k
  delete Templates;
114
6.57k
}
115
116
// Returns FontinfoId of the given config of the given adapted class.
117
53.4M
int Classify::GetFontinfoId(ADAPT_CLASS_STRUCT *Class, uint8_t ConfigId) {
118
53.4M
  return (ConfigIsPermanent(Class, ConfigId) ? PermConfigFor(Class, ConfigId)->FontinfoId
119
53.4M
                                             : TempConfigFor(Class, ConfigId)->FontinfoId);
120
53.4M
}
121
122
/// This constructor allocates and returns a new temporary config.
123
///
124
/// @param MaxProtoId  max id of any proto in new config
125
/// @param FontinfoId font information from pre-trained templates
126
TEMP_CONFIG_STRUCT::TEMP_CONFIG_STRUCT(int maxProtoId, int fontinfoId) :
127
1.45k
  NumTimesSeen(1),
128
1.45k
  ProtoVectorSize(WordsInVectorOfSize(maxProtoId + 1)),
129
1.45k
  MaxProtoId(maxProtoId),
130
1.45k
  Protos(NewBitVector(maxProtoId + 1)),
131
1.45k
  FontinfoId(fontinfoId) {
132
1.45k
  zero_all_bits(Protos, ProtoVectorSize);
133
1.45k
}
134
135
1.41k
TEMP_CONFIG_STRUCT::~TEMP_CONFIG_STRUCT() {
136
1.41k
  FreeBitVector(Protos);
137
1.41k
}
138
139
/*---------------------------------------------------------------------------*/
140
/**
141
 * This routine prints a summary of the adapted templates
142
 *  in Templates to File.
143
 *
144
 * @param File    open text file to print Templates to
145
 * @param Templates adapted templates to print to File
146
 *
147
 * @note Globals: none
148
 */
149
0
void Classify::PrintAdaptedTemplates(FILE *File, ADAPT_TEMPLATES_STRUCT *Templates) {
150
0
  INT_CLASS_STRUCT *IClass;
151
0
  ADAPT_CLASS_STRUCT *AClass;
152
153
0
  fprintf(File, "\n\nSUMMARY OF ADAPTED TEMPLATES:\n\n");
154
0
  fprintf(File, "Num classes = %d;  Num permanent classes = %d\n\n", Templates->NumNonEmptyClasses,
155
0
          Templates->NumPermClasses);
156
0
  fprintf(File, "   Id  NC NPC  NP NPP\n");
157
0
  fprintf(File, "------------------------\n");
158
159
0
  for (unsigned i = 0; i < (Templates->Templates)->NumClasses; i++) {
160
0
    IClass = Templates->Templates->Class[i];
161
0
    AClass = Templates->Class[i];
162
0
    if (!IsEmptyAdaptedClass(AClass)) {
163
0
      fprintf(File, "%5u  %s %3d %3d %3d %3zd\n", i, unicharset.id_to_unichar(i), IClass->NumConfigs,
164
0
              AClass->NumPermConfigs, IClass->NumProtos,
165
0
              IClass->NumProtos - AClass->TempProtos->size());
166
0
    }
167
0
  }
168
0
  fprintf(File, "\n");
169
170
0
} /* PrintAdaptedTemplates */
171
172
/*---------------------------------------------------------------------------*/
173
/**
174
 * Read an adapted class description from file and return
175
 * a ptr to the adapted class.
176
 *
177
 * @param fp open file to read adapted class from
178
 * @return Ptr to new adapted class.
179
 *
180
 * @note Globals: none
181
 */
182
0
ADAPT_CLASS_STRUCT *ReadAdaptedClass(TFile *fp) {
183
0
  int NumTempProtos;
184
0
  int NumConfigs;
185
0
  int i;
186
0
  ADAPT_CLASS_STRUCT *Class;
187
188
  /* first read high level adapted class structure */
189
0
  Class = new ADAPT_CLASS_STRUCT;
190
0
  fp->FRead(Class, sizeof(ADAPT_CLASS_STRUCT), 1);
191
192
  /* then read in the definitions of the permanent protos and configs */
193
0
  Class->PermProtos = NewBitVector(MAX_NUM_PROTOS);
194
0
  Class->PermConfigs = NewBitVector(MAX_NUM_CONFIGS);
195
0
  fp->FRead(Class->PermProtos, sizeof(uint32_t), WordsInVectorOfSize(MAX_NUM_PROTOS));
196
0
  fp->FRead(Class->PermConfigs, sizeof(uint32_t), WordsInVectorOfSize(MAX_NUM_CONFIGS));
197
198
  /* then read in the list of temporary protos */
199
0
  fp->FRead(&NumTempProtos, sizeof(int), 1);
200
0
  Class->TempProtos = NIL_LIST;
201
0
  for (i = 0; i < NumTempProtos; i++) {
202
0
    auto TempProto = new TEMP_PROTO_STRUCT;
203
0
    fp->FRead(TempProto, sizeof(TEMP_PROTO_STRUCT), 1);
204
0
    Class->TempProtos = push_last(Class->TempProtos, TempProto);
205
0
  }
206
207
  /* then read in the adapted configs */
208
0
  fp->FRead(&NumConfigs, sizeof(int), 1);
209
0
  for (i = 0; i < NumConfigs; i++) {
210
0
    if (test_bit(Class->PermConfigs, i)) {
211
0
      Class->Config[i].Perm = ReadPermConfig(fp);
212
0
    } else {
213
0
      Class->Config[i].Temp = ReadTempConfig(fp);
214
0
    }
215
0
  }
216
217
0
  return (Class);
218
219
0
} /* ReadAdaptedClass */
220
221
/*---------------------------------------------------------------------------*/
222
/**
223
 * Read a set of adapted templates from file and return
224
 * a ptr to the templates.
225
 *
226
 * @param fp open text file to read adapted templates from
227
 * @return Ptr to adapted templates read from file.
228
 *
229
 * @note Globals: none
230
 */
231
0
ADAPT_TEMPLATES_STRUCT *Classify::ReadAdaptedTemplates(TFile *fp) {
232
0
  auto Templates = new ADAPT_TEMPLATES_STRUCT;
233
234
  /* first read the high level adaptive template struct */
235
0
  fp->FRead(Templates, sizeof(ADAPT_TEMPLATES_STRUCT), 1);
236
237
  /* then read in the basic integer templates */
238
0
  Templates->Templates = ReadIntTemplates(fp);
239
240
  /* then read in the adaptive info for each class */
241
0
  for (unsigned i = 0; i < (Templates->Templates)->NumClasses; i++) {
242
0
    Templates->Class[i] = ReadAdaptedClass(fp);
243
0
  }
244
0
  return (Templates);
245
246
0
} /* ReadAdaptedTemplates */
247
248
/*---------------------------------------------------------------------------*/
249
/**
250
 * Read a permanent configuration description from file
251
 * and return a ptr to it.
252
 *
253
 * @param fp open file to read permanent config from
254
 * @return Ptr to new permanent configuration description.
255
 *
256
 * @note Globals: none
257
 */
258
0
PERM_CONFIG_STRUCT *ReadPermConfig(TFile *fp) {
259
0
  auto Config = new PERM_CONFIG_STRUCT;
260
0
  uint8_t NumAmbigs;
261
0
  fp->FRead(&NumAmbigs, sizeof(NumAmbigs), 1);
262
0
  Config->Ambigs = new UNICHAR_ID[NumAmbigs + 1];
263
0
  fp->FRead(Config->Ambigs, sizeof(UNICHAR_ID), NumAmbigs);
264
0
  Config->Ambigs[NumAmbigs] = -1;
265
0
  fp->FRead(&(Config->FontinfoId), sizeof(int), 1);
266
267
0
  return (Config);
268
269
0
} /* ReadPermConfig */
270
271
/*---------------------------------------------------------------------------*/
272
/**
273
 * Read a temporary configuration description from file
274
 * and return a ptr to it.
275
 *
276
 * @param fp open file to read temporary config from
277
 * @return Ptr to new temporary configuration description.
278
 *
279
 * @note Globals: none
280
 */
281
0
TEMP_CONFIG_STRUCT *ReadTempConfig(TFile *fp) {
282
0
  auto Config = new TEMP_CONFIG_STRUCT;
283
0
  fp->FRead(Config, sizeof(TEMP_CONFIG_STRUCT), 1);
284
285
0
  Config->Protos = NewBitVector(Config->ProtoVectorSize * BITSINLONG);
286
0
  fp->FRead(Config->Protos, sizeof(uint32_t), Config->ProtoVectorSize);
287
288
0
  return (Config);
289
290
0
} /* ReadTempConfig */
291
292
/*---------------------------------------------------------------------------*/
293
/**
294
 * This routine writes a binary representation of Class
295
 * to File.
296
 *
297
 * @param File    open file to write Class to
298
 * @param Class   adapted class to write to File
299
 * @param NumConfigs  number of configs in Class
300
 *
301
 * @note Globals: none
302
 */
303
0
void WriteAdaptedClass(FILE *File, ADAPT_CLASS_STRUCT *Class, int NumConfigs) {
304
  /* first write high level adapted class structure */
305
0
  fwrite(Class, sizeof(ADAPT_CLASS_STRUCT), 1, File);
306
307
  /* then write out the definitions of the permanent protos and configs */
308
0
  fwrite(Class->PermProtos, sizeof(uint32_t), WordsInVectorOfSize(MAX_NUM_PROTOS), File);
309
0
  fwrite(Class->PermConfigs, sizeof(uint32_t), WordsInVectorOfSize(MAX_NUM_CONFIGS), File);
310
311
  /* then write out the list of temporary protos */
312
0
  uint32_t NumTempProtos = Class->TempProtos->size();
313
0
  fwrite(&NumTempProtos, sizeof(NumTempProtos), 1, File);
314
0
  auto TempProtos = Class->TempProtos;
315
0
  iterate(TempProtos) {
316
0
    void *proto = TempProtos->node;
317
0
    fwrite(proto, sizeof(TEMP_PROTO_STRUCT), 1, File);
318
0
  }
319
320
  /* then write out the adapted configs */
321
0
  fwrite(&NumConfigs, sizeof(int), 1, File);
322
0
  for (int i = 0; i < NumConfigs; i++) {
323
0
    if (test_bit(Class->PermConfigs, i)) {
324
0
      WritePermConfig(File, Class->Config[i].Perm);
325
0
    } else {
326
0
      WriteTempConfig(File, Class->Config[i].Temp);
327
0
    }
328
0
  }
329
330
0
} /* WriteAdaptedClass */
331
332
/*---------------------------------------------------------------------------*/
333
/**
334
 * This routine saves Templates to File in a binary format.
335
 *
336
 * @param File    open text file to write Templates to
337
 * @param Templates set of adapted templates to write to File
338
 *
339
 * @note Globals: none
340
 */
341
0
void Classify::WriteAdaptedTemplates(FILE *File, ADAPT_TEMPLATES_STRUCT *Templates) {
342
  /* first write the high level adaptive template struct */
343
0
  fwrite(Templates, sizeof(ADAPT_TEMPLATES_STRUCT), 1, File);
344
345
  /* then write out the basic integer templates */
346
0
  WriteIntTemplates(File, Templates->Templates, unicharset);
347
348
  /* then write out the adaptive info for each class */
349
0
  for (unsigned i = 0; i < (Templates->Templates)->NumClasses; i++) {
350
0
    WriteAdaptedClass(File, Templates->Class[i], Templates->Templates->Class[i]->NumConfigs);
351
0
  }
352
0
} /* WriteAdaptedTemplates */
353
354
/*---------------------------------------------------------------------------*/
355
/**
356
 * This routine writes a binary representation of a
357
 * permanent configuration to File.
358
 *
359
 * @param File  open file to write Config to
360
 * @param Config  permanent config to write to File
361
 *
362
 * @note Globals: none
363
 */
364
0
void WritePermConfig(FILE *File, PERM_CONFIG_STRUCT *Config) {
365
0
  uint8_t NumAmbigs = 0;
366
367
0
  assert(Config != nullptr);
368
0
  while (Config->Ambigs[NumAmbigs] > 0) {
369
0
    ++NumAmbigs;
370
0
  }
371
372
0
  fwrite(&NumAmbigs, sizeof(uint8_t), 1, File);
373
0
  fwrite(Config->Ambigs, sizeof(UNICHAR_ID), NumAmbigs, File);
374
0
  fwrite(&(Config->FontinfoId), sizeof(int), 1, File);
375
0
} /* WritePermConfig */
376
377
/*---------------------------------------------------------------------------*/
378
/**
379
 * This routine writes a binary representation of a
380
 * temporary configuration to File.
381
 *
382
 * @param File  open file to write Config to
383
 * @param Config  temporary config to write to File
384
 *
385
 * @note Globals: none
386
 */
387
0
void WriteTempConfig(FILE *File, TEMP_CONFIG_STRUCT *Config) {
388
0
  assert(Config != nullptr);
389
390
0
  fwrite(Config, sizeof(TEMP_CONFIG_STRUCT), 1, File);
391
0
  fwrite(Config->Protos, sizeof(uint32_t), Config->ProtoVectorSize, File);
392
393
0
} /* WriteTempConfig */
394
395
} // namespace tesseract