Coverage Report

Created: 2025-11-16 06:50

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/tesseract/src/classify/shapetable.cpp
Line
Count
Source
1
// Copyright 2010 Google Inc. All Rights Reserved.
2
// Author: rays@google.com (Ray Smith)
3
///////////////////////////////////////////////////////////////////////
4
// File:        shapetable.cpp
5
// Description: Class to map a classifier shape index to unicharset
6
//              indices and font indices.
7
// Author:      Ray Smith
8
//
9
// (C) Copyright 2010, Google Inc.
10
// Licensed under the Apache License, Version 2.0 (the "License");
11
// you may not use this file except in compliance with the License.
12
// You may obtain a copy of the License at
13
// http://www.apache.org/licenses/LICENSE-2.0
14
// Unless required by applicable law or agreed to in writing, software
15
// distributed under the License is distributed on an "AS IS" BASIS,
16
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17
// See the License for the specific language governing permissions and
18
// limitations under the License.
19
//
20
///////////////////////////////////////////////////////////////////////
21
22
#include "shapetable.h"
23
24
#include "bitvector.h"
25
#include "fontinfo.h"
26
#include "intfeaturespace.h"
27
#include "unicharset.h"
28
#include "unicity_table.h"
29
30
#include <algorithm>
31
32
namespace tesseract {
33
34
// Helper function to get the index of the first result with the required
35
// unichar_id. If the results are sorted by rating, this will also be the
36
// best result with the required unichar_id.
37
// Returns -1 if the unichar_id is not found
38
int ShapeRating::FirstResultWithUnichar(const std::vector<ShapeRating> &results,
39
0
                                        const ShapeTable &shape_table, UNICHAR_ID unichar_id) {
40
0
  size_t r = 0;
41
0
  for (const auto &result : results) {
42
0
    const auto shape_id = result.shape_id;
43
0
    const Shape &shape = shape_table.GetShape(shape_id);
44
0
    if (shape.ContainsUnichar(unichar_id)) {
45
0
      return r;
46
0
    }
47
0
    ++r;
48
0
  }
49
0
  return -1;
50
0
}
51
52
// Helper function to get the index of the first result with the required
53
// unichar_id. If the results are sorted by rating, this will also be the
54
// best result with the required unichar_id.
55
// Returns -1 if the unichar_id is not found
56
int UnicharRating::FirstResultWithUnichar(const std::vector<UnicharRating> &results,
57
0
                                          UNICHAR_ID unichar_id) {
58
0
  size_t r = 0;
59
0
  for (const auto &result : results) {
60
0
    if (result.unichar_id == unichar_id) {
61
0
      return r;
62
0
    }
63
0
    ++r;
64
0
  }
65
0
  return -1;
66
0
}
67
68
// Writes to the given file. Returns false in case of error.
69
0
bool UnicharAndFonts::Serialize(FILE *fp) const {
70
0
  return tesseract::Serialize(fp, &unichar_id) && tesseract::Serialize(fp, font_ids);
71
0
}
72
73
// Reads from the given file. Returns false in case of error.
74
14.0k
bool UnicharAndFonts::DeSerialize(TFile *fp) {
75
14.0k
  return fp->DeSerialize(&unichar_id) && fp->DeSerialize(font_ids);
76
14.0k
}
77
78
// Sort function to sort a pair of UnicharAndFonts by unichar_id.
79
0
int UnicharAndFonts::SortByUnicharId(const void *v1, const void *v2) {
80
0
  const auto *p1 = static_cast<const UnicharAndFonts *>(v1);
81
0
  const auto *p2 = static_cast<const UnicharAndFonts *>(v2);
82
0
  return p1->unichar_id - p2->unichar_id;
83
0
}
84
85
0
bool UnicharAndFonts::StdSortByUnicharId(const UnicharAndFonts &v1, const UnicharAndFonts &v2) {
86
0
  return v1.unichar_id < v2.unichar_id;
87
0
}
88
89
// Writes to the given file. Returns false in case of error.
90
0
bool Shape::Serialize(FILE *fp) const {
91
0
  uint8_t sorted = unichars_sorted_;
92
0
  return tesseract::Serialize(fp, &sorted) && tesseract::Serialize(fp, unichars_);
93
0
}
94
// Reads from the given file. Returns false in case of error.
95
96
14.0k
bool Shape::DeSerialize(TFile *fp) {
97
14.0k
  uint8_t sorted;
98
14.0k
  if (!fp->DeSerialize(&sorted)) {
99
0
    return false;
100
0
  }
101
14.0k
  unichars_sorted_ = sorted != 0;
102
14.0k
  return fp->DeSerialize(unichars_);
103
14.0k
}
104
105
// Adds a font_id for the given unichar_id. If the unichar_id is not
106
// in the shape, it is added.
107
0
void Shape::AddToShape(int unichar_id, int font_id) {
108
0
  for (auto &unichar : unichars_) {
109
0
    if (unichar.unichar_id == unichar_id) {
110
      // Found the unichar in the shape table.
111
0
      std::vector<int> &font_list = unichar.font_ids;
112
0
      for (int f : font_list) {
113
0
        if (f == font_id) {
114
0
          return; // Font is already there.
115
0
        }
116
0
      }
117
0
      font_list.push_back(font_id);
118
0
      return;
119
0
    }
120
0
  }
121
  // Unichar_id is not in shape, so add it to shape.
122
0
  unichars_.emplace_back(unichar_id, font_id);
123
0
  unichars_sorted_ = unichars_.size() <= 1;
124
0
}
125
126
// Adds everything in other to this.
127
0
void Shape::AddShape(const Shape &other) {
128
0
  for (const auto &unichar : other.unichars_) {
129
0
    for (auto font_id : unichar.font_ids) {
130
0
      AddToShape(unichar.unichar_id, font_id);
131
0
    }
132
0
  }
133
0
  unichars_sorted_ = unichars_.size() <= 1;
134
0
}
135
136
// Returns true if the shape contains the given unichar_id, font_id pair.
137
0
bool Shape::ContainsUnicharAndFont(int unichar_id, int font_id) const {
138
0
  for (const auto &unichar : unichars_) {
139
0
    if (unichar.unichar_id == unichar_id) {
140
      // Found the unichar, so look for the font.
141
0
      auto &font_list = unichar.font_ids;
142
0
      for (int f : font_list) {
143
0
        if (f == font_id) {
144
0
          return true;
145
0
        }
146
0
      }
147
0
      return false;
148
0
    }
149
0
  }
150
0
  return false;
151
0
}
152
153
// Returns true if the shape contains the given unichar_id, ignoring font.
154
0
bool Shape::ContainsUnichar(int unichar_id) const {
155
0
  for (const auto &unichar : unichars_) {
156
0
    if (unichar.unichar_id == unichar_id) {
157
0
      return true;
158
0
    }
159
0
  }
160
0
  return false;
161
0
}
162
163
// Returns true if the shape contains the given font, ignoring unichar_id.
164
0
bool Shape::ContainsFont(int font_id) const {
165
0
  for (const auto &unichar : unichars_) {
166
0
    auto &font_list = unichar.font_ids;
167
0
    for (int f : font_list) {
168
0
      if (f == font_id) {
169
0
        return true;
170
0
      }
171
0
    }
172
0
  }
173
0
  return false;
174
0
}
175
// Returns true if the shape contains the given font properties, ignoring
176
// unichar_id.
177
0
bool Shape::ContainsFontProperties(const FontInfoTable &font_table, uint32_t properties) const {
178
0
  for (const auto &unichar : unichars_) {
179
0
    auto &font_list = unichar.font_ids;
180
0
    for (int f : font_list) {
181
0
      if (font_table.at(f).properties == properties) {
182
0
        return true;
183
0
      }
184
0
    }
185
0
  }
186
0
  return false;
187
0
}
188
// Returns true if the shape contains multiple different font properties,
189
// ignoring unichar_id.
190
0
bool Shape::ContainsMultipleFontProperties(const FontInfoTable &font_table) const {
191
0
  uint32_t properties = font_table.at(unichars_[0].font_ids[0]).properties;
192
0
  for (const auto &unichar : unichars_) {
193
0
    auto &font_list = unichar.font_ids;
194
0
    for (int f : font_list) {
195
0
      if (font_table.at(f).properties != properties) {
196
0
        return true;
197
0
      }
198
0
    }
199
0
  }
200
0
  return false;
201
0
}
202
203
// Returns true if this shape is equal to other (ignoring order of unichars
204
// and fonts).
205
0
bool Shape::operator==(const Shape &other) const {
206
0
  return IsSubsetOf(other) && other.IsSubsetOf(*this);
207
0
}
208
209
// Returns true if this is a subset (including equal) of other.
210
0
bool Shape::IsSubsetOf(const Shape &other) const {
211
0
  for (const auto &unichar : unichars_) {
212
0
    int unichar_id = unichar.unichar_id;
213
0
    const std::vector<int> &font_list = unichar.font_ids;
214
0
    for (int f : font_list) {
215
0
      if (!other.ContainsUnicharAndFont(unichar_id, f)) {
216
0
        return false;
217
0
      }
218
0
    }
219
0
  }
220
0
  return true;
221
0
}
222
223
// Returns true if the lists of unichar ids are the same in this and other,
224
// ignoring fonts.
225
// NOT const, as it will sort the unichars on demand.
226
0
bool Shape::IsEqualUnichars(Shape *other) {
227
0
  if (unichars_.size() != other->unichars_.size()) {
228
0
    return false;
229
0
  }
230
0
  if (!unichars_sorted_) {
231
0
    SortUnichars();
232
0
  }
233
0
  if (!other->unichars_sorted_) {
234
0
    other->SortUnichars();
235
0
  }
236
0
  for (unsigned c = 0; c < unichars_.size(); ++c) {
237
0
    if (unichars_[c].unichar_id != other->unichars_[c].unichar_id) {
238
0
      return false;
239
0
    }
240
0
  }
241
0
  return true;
242
0
}
243
244
// Sorts the unichars_ vector by unichar.
245
0
void Shape::SortUnichars() {
246
0
  std::sort(unichars_.begin(), unichars_.end(), UnicharAndFonts::StdSortByUnicharId);
247
0
  unichars_sorted_ = true;
248
0
}
249
250
0
ShapeTable::ShapeTable() : unicharset_(nullptr), num_fonts_(0) {}
251
4
ShapeTable::ShapeTable(const UNICHARSET &unicharset) : unicharset_(&unicharset), num_fonts_(0) {}
252
253
// Writes to the given file. Returns false in case of error.
254
0
bool ShapeTable::Serialize(FILE *fp) const {
255
0
  return tesseract::Serialize(fp, shape_table_);
256
0
}
257
// Reads from the given file. Returns false in case of error.
258
259
4
bool ShapeTable::DeSerialize(TFile *fp) {
260
4
  if (!fp->DeSerialize(shape_table_)) {
261
0
    return false;
262
0
  }
263
4
  num_fonts_ = 0;
264
4
  return true;
265
4
}
266
267
// Returns the number of fonts used in this ShapeTable, computing it if
268
// necessary.
269
0
int ShapeTable::NumFonts() const {
270
0
  if (num_fonts_ <= 0) {
271
0
    for (auto shape_id : shape_table_) {
272
0
      const Shape &shape = *shape_id;
273
0
      for (int c = 0; c < shape.size(); ++c) {
274
0
        for (auto font_id : shape[c].font_ids) {
275
0
          if (font_id >= num_fonts_) {
276
0
            num_fonts_ = font_id + 1;
277
0
          }
278
0
        }
279
0
      }
280
0
    }
281
0
  }
282
0
  return num_fonts_;
283
0
}
284
285
// Re-indexes the class_ids in the shapetable according to the given map.
286
// Useful in conjunction with set_unicharset.
287
0
void ShapeTable::ReMapClassIds(const std::vector<int> &unicharset_map) {
288
0
  for (auto shape : shape_table_) {
289
0
    for (int c = 0; c < shape->size(); ++c) {
290
0
      shape->SetUnicharId(c, unicharset_map[(*shape)[c].unichar_id]);
291
0
    }
292
0
  }
293
0
}
294
295
// Returns a string listing the classes/fonts in a shape.
296
0
std::string ShapeTable::DebugStr(unsigned shape_id) const {
297
0
  if (shape_id >= shape_table_.size()) {
298
0
    return "INVALID_UNICHAR_ID";
299
0
  }
300
0
  const Shape &shape = GetShape(shape_id);
301
0
  std::string result;
302
0
  result += "Shape" + std::to_string(shape_id);
303
0
  if (shape.size() > 100) {
304
0
    result += " Num unichars=" + std::to_string(shape.size());
305
0
    return result;
306
0
  }
307
0
  for (int c = 0; c < shape.size(); ++c) {
308
0
    result += " c_id=" + std::to_string(shape[c].unichar_id);
309
0
    result += "=";
310
0
    result += unicharset_->id_to_unichar(shape[c].unichar_id);
311
0
    if (shape.size() < 10) {
312
0
      result += ", " + std::to_string(shape[c].font_ids.size());
313
0
      result += " fonts =";
314
0
      int num_fonts = shape[c].font_ids.size();
315
0
      if (num_fonts > 10) {
316
0
        result += " " + std::to_string(shape[c].font_ids[0]);
317
0
        result += " ... " + std::to_string(shape[c].font_ids[num_fonts - 1]);
318
0
      } else {
319
0
        for (int f = 0; f < num_fonts; ++f) {
320
0
          result += " " + std::to_string(shape[c].font_ids[f]);
321
0
        }
322
0
      }
323
0
    }
324
0
  }
325
0
  return result;
326
0
}
327
328
// Returns a debug string summarizing the table.
329
0
std::string ShapeTable::SummaryStr() const {
330
0
  int max_unichars = 0;
331
0
  int num_multi_shapes = 0;
332
0
  int num_master_shapes = 0;
333
0
  for (unsigned s = 0; s < shape_table_.size(); ++s) {
334
0
    if (MasterDestinationIndex(s) != s) {
335
0
      continue;
336
0
    }
337
0
    ++num_master_shapes;
338
0
    int shape_size = GetShape(s).size();
339
0
    if (shape_size > 1) {
340
0
      ++num_multi_shapes;
341
0
    }
342
0
    if (shape_size > max_unichars) {
343
0
      max_unichars = shape_size;
344
0
    }
345
0
  }
346
0
  std::string result;
347
0
  result += "Number of shapes = " + std::to_string(num_master_shapes);
348
0
  result += " max unichars = " + std::to_string(max_unichars);
349
0
  result += " number with multiple unichars = " + std::to_string(num_multi_shapes);
350
0
  return result;
351
0
}
352
353
// Adds a new shape starting with the given unichar_id and font_id.
354
// Returns the assigned index.
355
0
unsigned ShapeTable::AddShape(int unichar_id, int font_id) {
356
0
  auto index = shape_table_.size();
357
0
  auto *shape = new Shape;
358
0
  shape->AddToShape(unichar_id, font_id);
359
0
  shape_table_.push_back(shape);
360
0
  num_fonts_ = std::max(num_fonts_, font_id + 1);
361
0
  return index;
362
0
}
363
364
// Adds a copy of the given shape unless it is already present.
365
// Returns the assigned index or index of existing shape if already present.
366
0
unsigned ShapeTable::AddShape(const Shape &other) {
367
0
  unsigned index;
368
0
  for (index = 0; index < shape_table_.size() && !(other == *shape_table_[index]); ++index) {
369
0
    continue;
370
0
  }
371
0
  if (index == shape_table_.size()) {
372
0
    auto *shape = new Shape(other);
373
0
    shape_table_.push_back(shape);
374
0
  }
375
0
  num_fonts_ = 0;
376
0
  return index;
377
0
}
378
379
// Removes the shape given by the shape index.
380
0
void ShapeTable::DeleteShape(unsigned shape_id) {
381
0
  delete shape_table_[shape_id];
382
0
  shape_table_.erase(shape_table_.begin() + shape_id);
383
0
}
384
385
// Adds a font_id to the given existing shape index for the given
386
// unichar_id. If the unichar_id is not in the shape, it is added.
387
0
void ShapeTable::AddToShape(unsigned shape_id, int unichar_id, int font_id) {
388
0
  Shape &shape = *shape_table_[shape_id];
389
0
  shape.AddToShape(unichar_id, font_id);
390
0
  num_fonts_ = std::max(num_fonts_, font_id + 1);
391
0
}
392
393
// Adds the given shape to the existing shape with the given index.
394
0
void ShapeTable::AddShapeToShape(unsigned shape_id, const Shape &other) {
395
0
  Shape &shape = *shape_table_[shape_id];
396
0
  shape.AddShape(other);
397
0
  num_fonts_ = 0;
398
0
}
399
400
// Returns the id of the shape that contains the given unichar and font.
401
// If not found, returns -1.
402
// If font_id < 0, the font_id is ignored and the first shape that matches
403
// the unichar_id is returned.
404
0
int ShapeTable::FindShape(int unichar_id, int font_id) const {
405
0
  for (unsigned s = 0; s < shape_table_.size(); ++s) {
406
0
    const Shape &shape = GetShape(s);
407
0
    for (int c = 0; c < shape.size(); ++c) {
408
0
      if (shape[c].unichar_id == unichar_id) {
409
0
        if (font_id < 0) {
410
0
          return s; // We don't care about the font.
411
0
        }
412
0
        for (auto f : shape[c].font_ids) {
413
0
          if (f == font_id) {
414
0
            return s;
415
0
          }
416
0
        }
417
0
      }
418
0
    }
419
0
  }
420
0
  return -1;
421
0
}
422
423
// Returns the first unichar_id and font_id in the given shape.
424
0
void ShapeTable::GetFirstUnicharAndFont(unsigned shape_id, int *unichar_id, int *font_id) const {
425
0
  const UnicharAndFonts &unichar_and_fonts = (*shape_table_[shape_id])[0];
426
0
  *unichar_id = unichar_and_fonts.unichar_id;
427
0
  *font_id = unichar_and_fonts.font_ids[0];
428
0
}
429
430
// Expands all the classes/fonts in the shape individually to build
431
// a ShapeTable.
432
0
int ShapeTable::BuildFromShape(const Shape &shape, const ShapeTable &master_shapes) {
433
0
  BitVector shape_map(master_shapes.NumShapes());
434
0
  for (int u_ind = 0; u_ind < shape.size(); ++u_ind) {
435
0
    for (auto font_id : shape[u_ind].font_ids) {
436
0
      int c = shape[u_ind].unichar_id;
437
0
      int master_id = master_shapes.FindShape(c, font_id);
438
0
      if (master_id >= 0) {
439
0
        shape_map.SetBit(master_id);
440
0
      } else if (FindShape(c, font_id) < 0) {
441
0
        AddShape(c, font_id);
442
0
      }
443
0
    }
444
0
  }
445
0
  int num_masters = 0;
446
0
  for (unsigned s = 0; s < master_shapes.NumShapes(); ++s) {
447
0
    if (shape_map[s]) {
448
0
      AddShape(master_shapes.GetShape(s));
449
0
      ++num_masters;
450
0
    }
451
0
  }
452
0
  return num_masters;
453
0
}
454
455
// Returns true if the shapes are already merged.
456
0
bool ShapeTable::AlreadyMerged(unsigned shape_id1, unsigned shape_id2) const {
457
0
  return MasterDestinationIndex(shape_id1) == MasterDestinationIndex(shape_id2);
458
0
}
459
460
// Returns true if any shape contains multiple unichars.
461
0
bool ShapeTable::AnyMultipleUnichars() const {
462
0
  auto num_shapes = NumShapes();
463
0
  for (unsigned s1 = 0; s1 < num_shapes; ++s1) {
464
0
    if (MasterDestinationIndex(s1) != s1) {
465
0
      continue;
466
0
    }
467
0
    if (GetShape(s1).size() > 1) {
468
0
      return true;
469
0
    }
470
0
  }
471
0
  return false;
472
0
}
473
474
// Returns the maximum number of unichars over all shapes.
475
1.85M
int ShapeTable::MaxNumUnichars() const {
476
1.85M
  int max_num_unichars = 0;
477
1.85M
  int num_shapes = NumShapes();
478
6.51G
  for (int s = 0; s < num_shapes; ++s) {
479
6.51G
    if (GetShape(s).size() > max_num_unichars) {
480
1.85M
      max_num_unichars = GetShape(s).size();
481
1.85M
    }
482
6.51G
  }
483
1.85M
  return max_num_unichars;
484
1.85M
}
485
486
// Merges shapes with a common unichar over the [start, end) interval.
487
// Assumes single unichar per shape.
488
0
void ShapeTable::ForceFontMerges(unsigned start, unsigned end) {
489
0
  for (unsigned s1 = start; s1 < end; ++s1) {
490
0
    if (MasterDestinationIndex(s1) == s1 && GetShape(s1).size() == 1) {
491
0
      int unichar_id = GetShape(s1)[0].unichar_id;
492
0
      for (auto s2 = s1 + 1; s2 < end; ++s2) {
493
0
        if (MasterDestinationIndex(s2) == s2 && GetShape(s2).size() == 1 &&
494
0
            unichar_id == GetShape(s2)[0].unichar_id) {
495
0
          MergeShapes(s1, s2);
496
0
        }
497
0
      }
498
0
    }
499
0
  }
500
0
  ShapeTable compacted(*unicharset_);
501
0
  compacted.AppendMasterShapes(*this, nullptr);
502
0
  *this = compacted;
503
0
}
504
505
// Returns the number of unichars in the master shape.
506
0
unsigned ShapeTable::MasterUnicharCount(unsigned shape_id) const {
507
0
  int master_id = MasterDestinationIndex(shape_id);
508
0
  return GetShape(master_id).size();
509
0
}
510
511
// Returns the sum of the font counts in the master shape.
512
0
int ShapeTable::MasterFontCount(unsigned shape_id) const {
513
0
  int master_id = MasterDestinationIndex(shape_id);
514
0
  const Shape &shape = GetShape(master_id);
515
0
  int font_count = 0;
516
0
  for (int c = 0; c < shape.size(); ++c) {
517
0
    font_count += shape[c].font_ids.size();
518
0
  }
519
0
  return font_count;
520
0
}
521
522
// Returns the number of unichars that would result from merging the shapes.
523
0
int ShapeTable::MergedUnicharCount(unsigned shape_id1, unsigned shape_id2) const {
524
  // Do it the easy way for now.
525
0
  int master_id1 = MasterDestinationIndex(shape_id1);
526
0
  int master_id2 = MasterDestinationIndex(shape_id2);
527
0
  Shape combined_shape(*shape_table_[master_id1]);
528
0
  combined_shape.AddShape(*shape_table_[master_id2]);
529
0
  return combined_shape.size();
530
0
}
531
532
// Merges two shape_ids, leaving shape_id2 marked as merged.
533
0
void ShapeTable::MergeShapes(unsigned shape_id1, unsigned shape_id2) {
534
0
  auto master_id1 = MasterDestinationIndex(shape_id1);
535
0
  auto master_id2 = MasterDestinationIndex(shape_id2);
536
  // Point master_id2 (and all merged shapes) to master_id1.
537
0
  shape_table_[master_id2]->set_destination_index(master_id1);
538
  // Add all the shapes of master_id2 to master_id1.
539
0
  shape_table_[master_id1]->AddShape(*shape_table_[master_id2]);
540
0
}
541
542
// Swaps two shape_ids.
543
0
void ShapeTable::SwapShapes(unsigned shape_id1, unsigned shape_id2) {
544
0
  Shape *tmp = shape_table_[shape_id1];
545
0
  shape_table_[shape_id1] = shape_table_[shape_id2];
546
0
  shape_table_[shape_id2] = tmp;
547
0
}
548
549
// Returns the destination of this shape, (if merged), taking into account
550
// the fact that the destination may itself have been merged.
551
0
unsigned ShapeTable::MasterDestinationIndex(unsigned shape_id) const {
552
0
  auto dest_id = shape_table_[shape_id]->destination_index();
553
0
  if (static_cast<unsigned>(dest_id) == shape_id || dest_id < 0) {
554
0
    return shape_id; // Is master already.
555
0
  }
556
0
  auto master_id = shape_table_[dest_id]->destination_index();
557
0
  if (master_id == dest_id || master_id < 0) {
558
0
    return dest_id; // Dest is the master and shape_id points to it.
559
0
  }
560
0
  master_id = MasterDestinationIndex(master_id);
561
0
  return master_id;
562
0
}
563
564
// Returns false if the unichars in neither shape is a subset of the other.
565
0
bool ShapeTable::SubsetUnichar(unsigned shape_id1, unsigned shape_id2) const {
566
0
  const Shape &shape1 = GetShape(shape_id1);
567
0
  const Shape &shape2 = GetShape(shape_id2);
568
0
  int c1, c2;
569
0
  for (c1 = 0; c1 < shape1.size(); ++c1) {
570
0
    int unichar_id1 = shape1[c1].unichar_id;
571
0
    if (!shape2.ContainsUnichar(unichar_id1)) {
572
0
      break;
573
0
    }
574
0
  }
575
0
  for (c2 = 0; c2 < shape2.size(); ++c2) {
576
0
    int unichar_id2 = shape2[c2].unichar_id;
577
0
    if (!shape1.ContainsUnichar(unichar_id2)) {
578
0
      break;
579
0
    }
580
0
  }
581
0
  return c1 == shape1.size() || c2 == shape2.size();
582
0
}
583
584
// Returns false if the unichars in neither shape is a subset of the other.
585
0
bool ShapeTable::MergeSubsetUnichar(int merge_id1, int merge_id2, unsigned shape_id) const {
586
0
  const Shape &merge1 = GetShape(merge_id1);
587
0
  const Shape &merge2 = GetShape(merge_id2);
588
0
  const Shape &shape = GetShape(shape_id);
589
0
  int cm1, cm2, cs;
590
0
  for (cs = 0; cs < shape.size(); ++cs) {
591
0
    int unichar_id = shape[cs].unichar_id;
592
0
    if (!merge1.ContainsUnichar(unichar_id) && !merge2.ContainsUnichar(unichar_id)) {
593
0
      break; // Shape is not a subset of the merge.
594
0
    }
595
0
  }
596
0
  for (cm1 = 0; cm1 < merge1.size(); ++cm1) {
597
0
    int unichar_id1 = merge1[cm1].unichar_id;
598
0
    if (!shape.ContainsUnichar(unichar_id1)) {
599
0
      break; // Merge is not a subset of shape
600
0
    }
601
0
  }
602
0
  for (cm2 = 0; cm2 < merge2.size(); ++cm2) {
603
0
    int unichar_id2 = merge2[cm2].unichar_id;
604
0
    if (!shape.ContainsUnichar(unichar_id2)) {
605
0
      break; // Merge is not a subset of shape
606
0
    }
607
0
  }
608
0
  return cs == shape.size() || (cm1 == merge1.size() && cm2 == merge2.size());
609
0
}
610
611
// Returns true if the unichar sets are equal between the shapes.
612
0
bool ShapeTable::EqualUnichars(unsigned shape_id1, unsigned shape_id2) const {
613
0
  const Shape &shape1 = GetShape(shape_id1);
614
0
  const Shape &shape2 = GetShape(shape_id2);
615
0
  for (int c1 = 0; c1 < shape1.size(); ++c1) {
616
0
    int unichar_id1 = shape1[c1].unichar_id;
617
0
    if (!shape2.ContainsUnichar(unichar_id1)) {
618
0
      return false;
619
0
    }
620
0
  }
621
0
  for (int c2 = 0; c2 < shape2.size(); ++c2) {
622
0
    int unichar_id2 = shape2[c2].unichar_id;
623
0
    if (!shape1.ContainsUnichar(unichar_id2)) {
624
0
      return false;
625
0
    }
626
0
  }
627
0
  return true;
628
0
}
629
630
// Returns true if the unichar sets are equal between the shapes.
631
0
bool ShapeTable::MergeEqualUnichars(int merge_id1, int merge_id2, unsigned shape_id) const {
632
0
  const Shape &merge1 = GetShape(merge_id1);
633
0
  const Shape &merge2 = GetShape(merge_id2);
634
0
  const Shape &shape = GetShape(shape_id);
635
0
  for (int cs = 0; cs < shape.size(); ++cs) {
636
0
    auto unichar_id = shape[cs].unichar_id;
637
0
    if (!merge1.ContainsUnichar(unichar_id) && !merge2.ContainsUnichar(unichar_id)) {
638
0
      return false; // Shape has a unichar that appears in neither merge.
639
0
    }
640
0
  }
641
0
  for (int cm1 = 0; cm1 < merge1.size(); ++cm1) {
642
0
    auto unichar_id1 = merge1[cm1].unichar_id;
643
0
    if (!shape.ContainsUnichar(unichar_id1)) {
644
0
      return false; // Merge has a unichar that is not in shape.
645
0
    }
646
0
  }
647
0
  for (int cm2 = 0; cm2 < merge2.size(); ++cm2) {
648
0
    auto unichar_id2 = merge2[cm2].unichar_id;
649
0
    if (!shape.ContainsUnichar(unichar_id2)) {
650
0
      return false; // Merge has a unichar that is not in shape.
651
0
    }
652
0
  }
653
0
  return true;
654
0
}
655
656
// Returns true if there is a common unichar between the shapes.
657
0
bool ShapeTable::CommonUnichars(unsigned shape_id1, unsigned shape_id2) const {
658
0
  const Shape &shape1 = GetShape(shape_id1);
659
0
  const Shape &shape2 = GetShape(shape_id2);
660
0
  for (int c1 = 0; c1 < shape1.size(); ++c1) {
661
0
    auto unichar_id1 = shape1[c1].unichar_id;
662
0
    if (shape2.ContainsUnichar(unichar_id1)) {
663
0
      return true;
664
0
    }
665
0
  }
666
0
  return false;
667
0
}
668
669
// Returns true if there is a common font id between the shapes.
670
0
bool ShapeTable::CommonFont(unsigned shape_id1, unsigned shape_id2) const {
671
0
  const Shape &shape1 = GetShape(shape_id1);
672
0
  const Shape &shape2 = GetShape(shape_id2);
673
0
  for (int c1 = 0; c1 < shape1.size(); ++c1) {
674
0
    const std::vector<int> &font_list1 = shape1[c1].font_ids;
675
0
    for (int f : font_list1) {
676
0
      if (shape2.ContainsFont(f)) {
677
0
        return true;
678
0
      }
679
0
    }
680
0
  }
681
0
  return false;
682
0
}
683
684
// Appends the master shapes from other to this.
685
// If not nullptr, shape_map is set to map other shape_ids to this's shape_ids.
686
0
void ShapeTable::AppendMasterShapes(const ShapeTable &other, std::vector<int> *shape_map) {
687
0
  if (shape_map != nullptr) {
688
0
    shape_map->clear();
689
0
    shape_map->resize(other.NumShapes(), -1);
690
0
  }
691
0
  for (unsigned s = 0; s < other.shape_table_.size(); ++s) {
692
0
    if (other.shape_table_[s]->destination_index() < 0) {
693
0
      int index = AddShape(*other.shape_table_[s]);
694
0
      if (shape_map != nullptr) {
695
0
        (*shape_map)[s] = index;
696
0
      }
697
0
    }
698
0
  }
699
0
}
700
701
// Returns the number of master shapes remaining after merging.
702
0
int ShapeTable::NumMasterShapes() const {
703
0
  int num_shapes = 0;
704
0
  for (auto s : shape_table_) {
705
0
    if (s->destination_index() < 0) {
706
0
      ++num_shapes;
707
0
    }
708
0
  }
709
0
  return num_shapes;
710
0
}
711
712
// Adds the unichars of the given shape_id to the vector of results. Any
713
// unichar_id that is already present just has the fonts added to the
714
// font set for that result without adding a new entry in the vector.
715
// NOTE: it is assumed that the results are given to this function in order
716
// of decreasing rating.
717
// The unichar_map vector indicates the index of the results entry containing
718
// each unichar, or -1 if the unichar is not yet included in results.
719
void ShapeTable::AddShapeToResults(const ShapeRating &shape_rating, std::vector<int> *unichar_map,
720
0
                                   std::vector<UnicharRating> *results) const {
721
0
  if (shape_rating.joined) {
722
0
    AddUnicharToResults(UNICHAR_JOINED, shape_rating.rating, unichar_map, results);
723
0
  }
724
0
  if (shape_rating.broken) {
725
0
    AddUnicharToResults(UNICHAR_BROKEN, shape_rating.rating, unichar_map, results);
726
0
  }
727
0
  const Shape &shape = GetShape(shape_rating.shape_id);
728
0
  for (int u = 0; u < shape.size(); ++u) {
729
0
    int result_index =
730
0
        AddUnicharToResults(shape[u].unichar_id, shape_rating.rating, unichar_map, results);
731
0
    for (auto font_id : shape[u].font_ids) {
732
0
      (*results)[result_index].fonts.emplace_back(font_id,
733
0
                                                  IntCastRounded(shape_rating.rating * INT16_MAX));
734
0
    }
735
0
  }
736
0
}
737
738
// Adds the given unichar_id to the results if needed, updating unichar_map
739
// and returning the index of unichar in results.
740
int ShapeTable::AddUnicharToResults(int unichar_id, float rating, std::vector<int> *unichar_map,
741
0
                                    std::vector<UnicharRating> *results) const {
742
0
  int result_index = unichar_map->at(unichar_id);
743
0
  if (result_index < 0) {
744
0
    UnicharRating result(unichar_id, rating);
745
0
    result_index = results->size();
746
0
    results->push_back(result);
747
0
    (*unichar_map)[unichar_id] = result_index;
748
0
  }
749
0
  return result_index;
750
0
}
751
752
} // namespace tesseract