/src/tesseract/src/classify/adaptive.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /****************************************************************************** |
2 | | ** Filename: adaptive.c |
3 | | ** Purpose: Adaptive matcher. |
4 | | ** Author: Dan Johnson |
5 | | ** |
6 | | ** (c) Copyright Hewlett-Packard Company, 1988. |
7 | | ** Licensed under the Apache License, Version 2.0 (the "License"); |
8 | | ** you may not use this file except in compliance with the License. |
9 | | ** You may obtain a copy of the License at |
10 | | ** http://www.apache.org/licenses/LICENSE-2.0 |
11 | | ** Unless required by applicable law or agreed to in writing, software |
12 | | ** distributed under the License is distributed on an "AS IS" BASIS, |
13 | | ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | ** See the License for the specific language governing permissions and |
15 | | ** limitations under the License. |
16 | | ******************************************************************************/ |
17 | | |
18 | | #include "adaptive.h" |
19 | | |
20 | | #include "classify.h" |
21 | | |
22 | | #include <cassert> |
23 | | #include <cstdio> |
24 | | |
25 | | namespace tesseract { |
26 | | |
27 | | /*---------------------------------------------------------------------------- |
28 | | Public Code |
29 | | ----------------------------------------------------------------------------*/ |
30 | | /*---------------------------------------------------------------------------*/ |
31 | | /** |
32 | | * This routine adds a new adapted class to an existing |
33 | | * set of adapted templates. |
34 | | * |
35 | | * @param Templates set of templates to add new class to |
36 | | * @param Class new class to add to templates |
37 | | * @param ClassId class id to associate with new class |
38 | | * |
39 | | * @note Globals: none |
40 | | */ |
41 | 3.15M | void AddAdaptedClass(ADAPT_TEMPLATES_STRUCT *Templates, ADAPT_CLASS_STRUCT *Class, CLASS_ID ClassId) { |
42 | 3.15M | assert(Templates != nullptr); |
43 | 3.15M | assert(Class != nullptr); |
44 | 3.15M | assert(LegalClassId(ClassId)); |
45 | 3.15M | assert(UnusedClassIdIn(Templates->Templates, ClassId)); |
46 | 3.15M | assert(Class->NumPermConfigs == 0); |
47 | | |
48 | 3.15M | auto IntClass = new INT_CLASS_STRUCT(1, 1); |
49 | 3.15M | AddIntClass(Templates->Templates, ClassId, IntClass); |
50 | | |
51 | 3.15M | assert(Templates->Class[ClassId] == nullptr); |
52 | 3.15M | Templates->Class[ClassId] = Class; |
53 | | |
54 | 3.15M | } /* AddAdaptedClass */ |
55 | | |
56 | | /*---------------------------------------------------------------------------*/ |
57 | | |
58 | 204 | PERM_CONFIG_STRUCT::~PERM_CONFIG_STRUCT() { |
59 | 204 | delete[] Ambigs; |
60 | 204 | } |
61 | | |
62 | | ADAPT_CLASS_STRUCT::ADAPT_CLASS_STRUCT() : |
63 | 3.15M | NumPermConfigs(0), |
64 | 3.15M | MaxNumTimesSeen(0), |
65 | 3.15M | PermProtos(NewBitVector(MAX_NUM_PROTOS)), |
66 | 3.15M | PermConfigs(NewBitVector(MAX_NUM_CONFIGS)), |
67 | 3.15M | TempProtos(NIL_LIST) { |
68 | 3.15M | zero_all_bits(PermProtos, WordsInVectorOfSize(MAX_NUM_PROTOS)); |
69 | 3.15M | zero_all_bits(PermConfigs, WordsInVectorOfSize(MAX_NUM_CONFIGS)); |
70 | | |
71 | 205M | for (int i = 0; i < MAX_NUM_CONFIGS; i++) { |
72 | 202M | TempConfigFor(this, i) = nullptr; |
73 | 202M | } |
74 | 3.15M | } |
75 | | |
76 | 3.15M | ADAPT_CLASS_STRUCT::~ADAPT_CLASS_STRUCT() { |
77 | 205M | for (int i = 0; i < MAX_NUM_CONFIGS; i++) { |
78 | 201M | if (ConfigIsPermanent(this, i) && PermConfigFor(this, i) != nullptr) { |
79 | 204 | delete PermConfigFor(this, i); |
80 | 201M | } else if (!ConfigIsPermanent(this, i) && TempConfigFor(this, i) != nullptr) { |
81 | 1.18k | delete TempConfigFor(this, i); |
82 | 1.18k | } |
83 | 201M | } |
84 | 3.15M | FreeBitVector(PermProtos); |
85 | 3.15M | FreeBitVector(PermConfigs); |
86 | 3.15M | auto list = TempProtos; |
87 | 3.16M | while (list != nullptr) { |
88 | 11.4k | delete reinterpret_cast<TEMP_PROTO_STRUCT *>(list->node); |
89 | 11.4k | list = pop(list); |
90 | 11.4k | } |
91 | 3.15M | } |
92 | | |
93 | | /// Constructor for adapted templates. |
94 | | /// Add an empty class for each char in unicharset to the newly created templates. |
95 | 6.57k | ADAPT_TEMPLATES_STRUCT::ADAPT_TEMPLATES_STRUCT(UNICHARSET &unicharset) { |
96 | 6.57k | Templates = new INT_TEMPLATES_STRUCT; |
97 | 6.57k | NumPermClasses = 0; |
98 | 6.57k | NumNonEmptyClasses = 0; |
99 | | |
100 | | /* Insert an empty class for each unichar id in unicharset */ |
101 | 215M | for (unsigned i = 0; i < MAX_NUM_CLASSES; i++) { |
102 | 215M | Class[i] = nullptr; |
103 | 215M | if (i < unicharset.size()) { |
104 | 3.15M | AddAdaptedClass(this, new ADAPT_CLASS_STRUCT, i); |
105 | 3.15M | } |
106 | 215M | } |
107 | 6.57k | } |
108 | | |
109 | 6.57k | ADAPT_TEMPLATES_STRUCT::~ADAPT_TEMPLATES_STRUCT() { |
110 | 3.16M | for (unsigned i = 0; i < (Templates)->NumClasses; i++) { |
111 | 3.15M | delete Class[i]; |
112 | 3.15M | } |
113 | 6.57k | delete Templates; |
114 | 6.57k | } |
115 | | |
116 | | // Returns FontinfoId of the given config of the given adapted class. |
117 | 53.4M | int Classify::GetFontinfoId(ADAPT_CLASS_STRUCT *Class, uint8_t ConfigId) { |
118 | 53.4M | return (ConfigIsPermanent(Class, ConfigId) ? PermConfigFor(Class, ConfigId)->FontinfoId |
119 | 53.4M | : TempConfigFor(Class, ConfigId)->FontinfoId); |
120 | 53.4M | } |
121 | | |
122 | | /// This constructor allocates and returns a new temporary config. |
123 | | /// |
124 | | /// @param MaxProtoId max id of any proto in new config |
125 | | /// @param FontinfoId font information from pre-trained templates |
126 | | TEMP_CONFIG_STRUCT::TEMP_CONFIG_STRUCT(int maxProtoId, int fontinfoId) : |
127 | 1.45k | NumTimesSeen(1), |
128 | 1.45k | ProtoVectorSize(WordsInVectorOfSize(maxProtoId + 1)), |
129 | 1.45k | MaxProtoId(maxProtoId), |
130 | 1.45k | Protos(NewBitVector(maxProtoId + 1)), |
131 | 1.45k | FontinfoId(fontinfoId) { |
132 | 1.45k | zero_all_bits(Protos, ProtoVectorSize); |
133 | 1.45k | } |
134 | | |
135 | 1.41k | TEMP_CONFIG_STRUCT::~TEMP_CONFIG_STRUCT() { |
136 | 1.41k | FreeBitVector(Protos); |
137 | 1.41k | } |
138 | | |
139 | | /*---------------------------------------------------------------------------*/ |
140 | | /** |
141 | | * This routine prints a summary of the adapted templates |
142 | | * in Templates to File. |
143 | | * |
144 | | * @param File open text file to print Templates to |
145 | | * @param Templates adapted templates to print to File |
146 | | * |
147 | | * @note Globals: none |
148 | | */ |
149 | 0 | void Classify::PrintAdaptedTemplates(FILE *File, ADAPT_TEMPLATES_STRUCT *Templates) { |
150 | 0 | INT_CLASS_STRUCT *IClass; |
151 | 0 | ADAPT_CLASS_STRUCT *AClass; |
152 | |
|
153 | 0 | fprintf(File, "\n\nSUMMARY OF ADAPTED TEMPLATES:\n\n"); |
154 | 0 | fprintf(File, "Num classes = %d; Num permanent classes = %d\n\n", Templates->NumNonEmptyClasses, |
155 | 0 | Templates->NumPermClasses); |
156 | 0 | fprintf(File, " Id NC NPC NP NPP\n"); |
157 | 0 | fprintf(File, "------------------------\n"); |
158 | |
|
159 | 0 | for (unsigned i = 0; i < (Templates->Templates)->NumClasses; i++) { |
160 | 0 | IClass = Templates->Templates->Class[i]; |
161 | 0 | AClass = Templates->Class[i]; |
162 | 0 | if (!IsEmptyAdaptedClass(AClass)) { |
163 | 0 | fprintf(File, "%5u %s %3d %3d %3d %3zd\n", i, unicharset.id_to_unichar(i), IClass->NumConfigs, |
164 | 0 | AClass->NumPermConfigs, IClass->NumProtos, |
165 | 0 | IClass->NumProtos - AClass->TempProtos->size()); |
166 | 0 | } |
167 | 0 | } |
168 | 0 | fprintf(File, "\n"); |
169 | |
|
170 | 0 | } /* PrintAdaptedTemplates */ |
171 | | |
172 | | /*---------------------------------------------------------------------------*/ |
173 | | /** |
174 | | * Read an adapted class description from file and return |
175 | | * a ptr to the adapted class. |
176 | | * |
177 | | * @param fp open file to read adapted class from |
178 | | * @return Ptr to new adapted class. |
179 | | * |
180 | | * @note Globals: none |
181 | | */ |
182 | 0 | ADAPT_CLASS_STRUCT *ReadAdaptedClass(TFile *fp) { |
183 | 0 | int NumTempProtos; |
184 | 0 | int NumConfigs; |
185 | 0 | int i; |
186 | 0 | ADAPT_CLASS_STRUCT *Class; |
187 | | |
188 | | /* first read high level adapted class structure */ |
189 | 0 | Class = new ADAPT_CLASS_STRUCT; |
190 | 0 | fp->FRead(Class, sizeof(ADAPT_CLASS_STRUCT), 1); |
191 | | |
192 | | /* then read in the definitions of the permanent protos and configs */ |
193 | 0 | Class->PermProtos = NewBitVector(MAX_NUM_PROTOS); |
194 | 0 | Class->PermConfigs = NewBitVector(MAX_NUM_CONFIGS); |
195 | 0 | fp->FRead(Class->PermProtos, sizeof(uint32_t), WordsInVectorOfSize(MAX_NUM_PROTOS)); |
196 | 0 | fp->FRead(Class->PermConfigs, sizeof(uint32_t), WordsInVectorOfSize(MAX_NUM_CONFIGS)); |
197 | | |
198 | | /* then read in the list of temporary protos */ |
199 | 0 | fp->FRead(&NumTempProtos, sizeof(int), 1); |
200 | 0 | Class->TempProtos = NIL_LIST; |
201 | 0 | for (i = 0; i < NumTempProtos; i++) { |
202 | 0 | auto TempProto = new TEMP_PROTO_STRUCT; |
203 | 0 | fp->FRead(TempProto, sizeof(TEMP_PROTO_STRUCT), 1); |
204 | 0 | Class->TempProtos = push_last(Class->TempProtos, TempProto); |
205 | 0 | } |
206 | | |
207 | | /* then read in the adapted configs */ |
208 | 0 | fp->FRead(&NumConfigs, sizeof(int), 1); |
209 | 0 | for (i = 0; i < NumConfigs; i++) { |
210 | 0 | if (test_bit(Class->PermConfigs, i)) { |
211 | 0 | Class->Config[i].Perm = ReadPermConfig(fp); |
212 | 0 | } else { |
213 | 0 | Class->Config[i].Temp = ReadTempConfig(fp); |
214 | 0 | } |
215 | 0 | } |
216 | |
|
217 | 0 | return (Class); |
218 | |
|
219 | 0 | } /* ReadAdaptedClass */ |
220 | | |
221 | | /*---------------------------------------------------------------------------*/ |
222 | | /** |
223 | | * Read a set of adapted templates from file and return |
224 | | * a ptr to the templates. |
225 | | * |
226 | | * @param fp open text file to read adapted templates from |
227 | | * @return Ptr to adapted templates read from file. |
228 | | * |
229 | | * @note Globals: none |
230 | | */ |
231 | 0 | ADAPT_TEMPLATES_STRUCT *Classify::ReadAdaptedTemplates(TFile *fp) { |
232 | 0 | auto Templates = new ADAPT_TEMPLATES_STRUCT; |
233 | | |
234 | | /* first read the high level adaptive template struct */ |
235 | 0 | fp->FRead(Templates, sizeof(ADAPT_TEMPLATES_STRUCT), 1); |
236 | | |
237 | | /* then read in the basic integer templates */ |
238 | 0 | Templates->Templates = ReadIntTemplates(fp); |
239 | | |
240 | | /* then read in the adaptive info for each class */ |
241 | 0 | for (unsigned i = 0; i < (Templates->Templates)->NumClasses; i++) { |
242 | 0 | Templates->Class[i] = ReadAdaptedClass(fp); |
243 | 0 | } |
244 | 0 | return (Templates); |
245 | |
|
246 | 0 | } /* ReadAdaptedTemplates */ |
247 | | |
248 | | /*---------------------------------------------------------------------------*/ |
249 | | /** |
250 | | * Read a permanent configuration description from file |
251 | | * and return a ptr to it. |
252 | | * |
253 | | * @param fp open file to read permanent config from |
254 | | * @return Ptr to new permanent configuration description. |
255 | | * |
256 | | * @note Globals: none |
257 | | */ |
258 | 0 | PERM_CONFIG_STRUCT *ReadPermConfig(TFile *fp) { |
259 | 0 | auto Config = new PERM_CONFIG_STRUCT; |
260 | 0 | uint8_t NumAmbigs; |
261 | 0 | fp->FRead(&NumAmbigs, sizeof(NumAmbigs), 1); |
262 | 0 | Config->Ambigs = new UNICHAR_ID[NumAmbigs + 1]; |
263 | 0 | fp->FRead(Config->Ambigs, sizeof(UNICHAR_ID), NumAmbigs); |
264 | 0 | Config->Ambigs[NumAmbigs] = -1; |
265 | 0 | fp->FRead(&(Config->FontinfoId), sizeof(int), 1); |
266 | |
|
267 | 0 | return (Config); |
268 | |
|
269 | 0 | } /* ReadPermConfig */ |
270 | | |
271 | | /*---------------------------------------------------------------------------*/ |
272 | | /** |
273 | | * Read a temporary configuration description from file |
274 | | * and return a ptr to it. |
275 | | * |
276 | | * @param fp open file to read temporary config from |
277 | | * @return Ptr to new temporary configuration description. |
278 | | * |
279 | | * @note Globals: none |
280 | | */ |
281 | 0 | TEMP_CONFIG_STRUCT *ReadTempConfig(TFile *fp) { |
282 | 0 | auto Config = new TEMP_CONFIG_STRUCT; |
283 | 0 | fp->FRead(Config, sizeof(TEMP_CONFIG_STRUCT), 1); |
284 | |
|
285 | 0 | Config->Protos = NewBitVector(Config->ProtoVectorSize * BITSINLONG); |
286 | 0 | fp->FRead(Config->Protos, sizeof(uint32_t), Config->ProtoVectorSize); |
287 | |
|
288 | 0 | return (Config); |
289 | |
|
290 | 0 | } /* ReadTempConfig */ |
291 | | |
292 | | /*---------------------------------------------------------------------------*/ |
293 | | /** |
294 | | * This routine writes a binary representation of Class |
295 | | * to File. |
296 | | * |
297 | | * @param File open file to write Class to |
298 | | * @param Class adapted class to write to File |
299 | | * @param NumConfigs number of configs in Class |
300 | | * |
301 | | * @note Globals: none |
302 | | */ |
303 | 0 | void WriteAdaptedClass(FILE *File, ADAPT_CLASS_STRUCT *Class, int NumConfigs) { |
304 | | /* first write high level adapted class structure */ |
305 | 0 | fwrite(Class, sizeof(ADAPT_CLASS_STRUCT), 1, File); |
306 | | |
307 | | /* then write out the definitions of the permanent protos and configs */ |
308 | 0 | fwrite(Class->PermProtos, sizeof(uint32_t), WordsInVectorOfSize(MAX_NUM_PROTOS), File); |
309 | 0 | fwrite(Class->PermConfigs, sizeof(uint32_t), WordsInVectorOfSize(MAX_NUM_CONFIGS), File); |
310 | | |
311 | | /* then write out the list of temporary protos */ |
312 | 0 | uint32_t NumTempProtos = Class->TempProtos->size(); |
313 | 0 | fwrite(&NumTempProtos, sizeof(NumTempProtos), 1, File); |
314 | 0 | auto TempProtos = Class->TempProtos; |
315 | 0 | iterate(TempProtos) { |
316 | 0 | void *proto = TempProtos->node; |
317 | 0 | fwrite(proto, sizeof(TEMP_PROTO_STRUCT), 1, File); |
318 | 0 | } |
319 | | |
320 | | /* then write out the adapted configs */ |
321 | 0 | fwrite(&NumConfigs, sizeof(int), 1, File); |
322 | 0 | for (int i = 0; i < NumConfigs; i++) { |
323 | 0 | if (test_bit(Class->PermConfigs, i)) { |
324 | 0 | WritePermConfig(File, Class->Config[i].Perm); |
325 | 0 | } else { |
326 | 0 | WriteTempConfig(File, Class->Config[i].Temp); |
327 | 0 | } |
328 | 0 | } |
329 | |
|
330 | 0 | } /* WriteAdaptedClass */ |
331 | | |
332 | | /*---------------------------------------------------------------------------*/ |
333 | | /** |
334 | | * This routine saves Templates to File in a binary format. |
335 | | * |
336 | | * @param File open text file to write Templates to |
337 | | * @param Templates set of adapted templates to write to File |
338 | | * |
339 | | * @note Globals: none |
340 | | */ |
341 | 0 | void Classify::WriteAdaptedTemplates(FILE *File, ADAPT_TEMPLATES_STRUCT *Templates) { |
342 | | /* first write the high level adaptive template struct */ |
343 | 0 | fwrite(Templates, sizeof(ADAPT_TEMPLATES_STRUCT), 1, File); |
344 | | |
345 | | /* then write out the basic integer templates */ |
346 | 0 | WriteIntTemplates(File, Templates->Templates, unicharset); |
347 | | |
348 | | /* then write out the adaptive info for each class */ |
349 | 0 | for (unsigned i = 0; i < (Templates->Templates)->NumClasses; i++) { |
350 | 0 | WriteAdaptedClass(File, Templates->Class[i], Templates->Templates->Class[i]->NumConfigs); |
351 | 0 | } |
352 | 0 | } /* WriteAdaptedTemplates */ |
353 | | |
354 | | /*---------------------------------------------------------------------------*/ |
355 | | /** |
356 | | * This routine writes a binary representation of a |
357 | | * permanent configuration to File. |
358 | | * |
359 | | * @param File open file to write Config to |
360 | | * @param Config permanent config to write to File |
361 | | * |
362 | | * @note Globals: none |
363 | | */ |
364 | 0 | void WritePermConfig(FILE *File, PERM_CONFIG_STRUCT *Config) { |
365 | 0 | uint8_t NumAmbigs = 0; |
366 | |
|
367 | 0 | assert(Config != nullptr); |
368 | 0 | while (Config->Ambigs[NumAmbigs] > 0) { |
369 | 0 | ++NumAmbigs; |
370 | 0 | } |
371 | |
|
372 | 0 | fwrite(&NumAmbigs, sizeof(uint8_t), 1, File); |
373 | 0 | fwrite(Config->Ambigs, sizeof(UNICHAR_ID), NumAmbigs, File); |
374 | 0 | fwrite(&(Config->FontinfoId), sizeof(int), 1, File); |
375 | 0 | } /* WritePermConfig */ |
376 | | |
377 | | /*---------------------------------------------------------------------------*/ |
378 | | /** |
379 | | * This routine writes a binary representation of a |
380 | | * temporary configuration to File. |
381 | | * |
382 | | * @param File open file to write Config to |
383 | | * @param Config temporary config to write to File |
384 | | * |
385 | | * @note Globals: none |
386 | | */ |
387 | 0 | void WriteTempConfig(FILE *File, TEMP_CONFIG_STRUCT *Config) { |
388 | 0 | assert(Config != nullptr); |
389 | |
|
390 | 0 | fwrite(Config, sizeof(TEMP_CONFIG_STRUCT), 1, File); |
391 | 0 | fwrite(Config->Protos, sizeof(uint32_t), Config->ProtoVectorSize, File); |
392 | |
|
393 | 0 | } /* WriteTempConfig */ |
394 | | |
395 | | } // namespace tesseract |