Coverage Report

Created: 2026-02-14 06:30

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/tesseract/src/ccstruct/stepblob.cpp
Line
Count
Source
1
/**********************************************************************
2
 * File:        stepblob.cpp  (Formerly cblob.c)
3
 * Description: Code for C_BLOB class.
4
 * Author:      Ray Smith
5
 *
6
 * (C) Copyright 1991, Hewlett-Packard Ltd.
7
 ** Licensed under the Apache License, Version 2.0 (the "License");
8
 ** you may not use this file except in compliance with the License.
9
 ** You may obtain a copy of the License at
10
 ** http://www.apache.org/licenses/LICENSE-2.0
11
 ** Unless required by applicable law or agreed to in writing, software
12
 ** distributed under the License is distributed on an "AS IS" BASIS,
13
 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 ** See the License for the specific language governing permissions and
15
 ** limitations under the License.
16
 *
17
 **********************************************************************/
18
19
// Include automatically generated configuration file if running autoconf.
20
#ifdef HAVE_CONFIG_H
21
#  include "config_auto.h"
22
#endif
23
24
#include "stepblob.h"
25
26
#include "points.h" // for operator+=, FCOORD, ICOORD
27
28
#include <allheaders.h> // for pixCreate, pixGetDepth
29
#include <vector>       // for std::vector
30
31
namespace tesseract {
32
33
class DENORM;
34
35
// Max perimeter to width ratio for a baseline position above box bottom.
36
const double kMaxPerimeterWidthRatio = 8.0;
37
38
/**********************************************************************
39
 * position_outline
40
 *
41
 * Position the outline in the given list at the relevant place
42
 * according to its nesting.
43
 **********************************************************************/
44
static void position_outline( // put in place
45
    C_OUTLINE *outline,       // thing to place
46
    C_OUTLINE_LIST *destlist  // destination list
47
3.24M
) {
48
3.24M
  C_OUTLINE_IT it = destlist; // iterator
49
                              // iterator on children
50
3.24M
  C_OUTLINE_IT child_it = outline->child();
51
52
3.24M
  if (!it.empty()) {
53
1.93M
    do {
54
      // outline from dest list
55
1.93M
      C_OUTLINE *dest_outline = it.data(); // get destination
56
                                // encloses dest
57
1.93M
      if (*dest_outline < *outline) {
58
        // take off list
59
944
        dest_outline = it.extract();
60
        // put this in place
61
944
        it.add_after_then_move(outline);
62
        // make it a child
63
944
        child_it.add_to_end(dest_outline);
64
1.71k
        while (!it.at_last()) {
65
771
          it.forward(); // do rest of list
66
                        // check for other children
67
771
          dest_outline = it.data();
68
771
          if (*dest_outline < *outline) {
69
            // take off list
70
340
            dest_outline = it.extract();
71
340
            child_it.add_to_end(dest_outline);
72
            // make it a child
73
340
            if (it.empty()) {
74
0
              break;
75
0
            }
76
340
          }
77
771
        }
78
944
        return; // finished
79
944
      }
80
      // enclosed by dest
81
1.93M
      else if (*outline < *dest_outline) {
82
208k
        position_outline(outline, dest_outline->child());
83
        // place in child list
84
208k
        return; // finished
85
208k
      }
86
1.72M
      it.forward();
87
1.72M
    } while (!it.at_first());
88
404k
  }
89
3.03M
  it.add_to_end(outline); // at outer level
90
3.03M
}
91
92
/**********************************************************************
93
 * plot_outline_list
94
 *
95
 * Draw a list of outlines in the given colour and their children
96
 * in the child colour.
97
 **********************************************************************/
98
99
#ifndef GRAPHICS_DISABLED
100
static void plot_outline_list(     // draw outlines
101
    C_OUTLINE_LIST *list,          // outline to draw
102
    ScrollView *window,            // window to draw in
103
    ScrollView::Color colour,      // colour to use
104
    ScrollView::Color child_colour // colour of children
105
) {
106
  C_OUTLINE *outline;     // current outline
107
  C_OUTLINE_IT it = list; // iterator
108
109
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
110
    outline = it.data();
111
    // draw it
112
    outline->plot(window, colour);
113
    if (!outline->child()->empty()) {
114
      plot_outline_list(outline->child(), window, child_colour, child_colour);
115
    }
116
  }
117
}
118
// Draws the outlines in the given colour, and child_colour, normalized
119
// using the given denorm, making use of sub-pixel accurate information
120
// if available.
121
static void plot_normed_outline_list(const DENORM &denorm, C_OUTLINE_LIST *list,
122
                                     ScrollView::Color colour, ScrollView::Color child_colour,
123
                                     ScrollView *window) {
124
  C_OUTLINE_IT it(list);
125
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
126
    C_OUTLINE *outline = it.data();
127
    outline->plot_normed(denorm, colour, window);
128
    if (!outline->child()->empty()) {
129
      plot_normed_outline_list(denorm, outline->child(), child_colour, child_colour, window);
130
    }
131
  }
132
}
133
#endif
134
135
/**********************************************************************
136
 * reverse_outline_list
137
 *
138
 * Reverse a list of outlines and their children.
139
 **********************************************************************/
140
141
859k
static void reverse_outline_list(C_OUTLINE_LIST *list) {
142
859k
  C_OUTLINE_IT it = list; // iterator
143
144
877k
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
145
17.9k
    C_OUTLINE *outline = it.data();
146
17.9k
    outline->reverse(); // reverse it
147
17.9k
    outline->set_flag(COUT_INVERSE, true);
148
17.9k
    if (!outline->child()->empty()) {
149
257
      reverse_outline_list(outline->child());
150
257
    }
151
17.9k
  }
152
859k
}
153
154
/**********************************************************************
155
 * C_BLOB::C_BLOB
156
 *
157
 * Constructor to build a C_BLOB from a list of C_OUTLINEs.
158
 * The C_OUTLINEs are not copied so the source list is emptied.
159
 * The C_OUTLINEs are nested correctly in the blob.
160
 **********************************************************************/
161
162
300k
C_BLOB::C_BLOB(C_OUTLINE_LIST *outline_list) {
163
660k
  for (C_OUTLINE_IT ol_it(outline_list); !ol_it.empty(); ol_it.forward()) {
164
359k
    C_OUTLINE *outline = ol_it.extract();
165
    // Position this outline in appropriate position in the hierarchy.
166
359k
    position_outline(outline, &outlines);
167
359k
  }
168
300k
  CheckInverseFlagAndDirection();
169
300k
}
170
171
// Simpler constructor to build a blob from a single outline that has
172
// already been fully initialized.
173
2.56M
C_BLOB::C_BLOB(C_OUTLINE *outline) {
174
2.56M
  C_OUTLINE_IT it(&outlines);
175
2.56M
  it.add_to_end(outline);
176
2.56M
}
177
178
// Builds a set of one or more blobs from a list of outlines.
179
// Input: one outline on outline_list contains all the others, but the
180
// nesting and order are undefined.
181
// If good_blob is true, the blob is added to good_blobs_it, unless
182
// an illegal (generation-skipping) parent-child relationship is found.
183
// If so, the parent blob goes to bad_blobs_it, and the immediate children
184
// are promoted to the top level, recursively being sent to good_blobs_it.
185
// If good_blob is false, all created blobs will go to the bad_blobs_it.
186
// Output: outline_list is empty. One or more blobs are added to
187
// good_blobs_it and/or bad_blobs_it.
188
void C_BLOB::ConstructBlobsFromOutlines(bool good_blob, C_OUTLINE_LIST *outline_list,
189
2.47M
                                        C_BLOB_IT *good_blobs_it, C_BLOB_IT *bad_blobs_it) {
190
  // List of top-level outlines with correctly nested children.
191
2.47M
  C_OUTLINE_LIST nested_outlines;
192
5.14M
  for (C_OUTLINE_IT ol_it(outline_list); !ol_it.empty(); ol_it.forward()) {
193
2.67M
    C_OUTLINE *outline = ol_it.extract();
194
    // Position this outline in appropriate position in the hierarchy.
195
2.67M
    position_outline(outline, &nested_outlines);
196
2.67M
  }
197
  // Check for legal nesting and reassign as required.
198
4.94M
  for (C_OUTLINE_IT ol_it(&nested_outlines); !ol_it.empty(); ol_it.forward()) {
199
2.47M
    C_OUTLINE *outline = ol_it.extract();
200
2.47M
    bool blob_is_good = good_blob;
201
2.47M
    if (!outline->IsLegallyNested()) {
202
      // The blob is illegally nested.
203
      // Mark it bad, and add all its children to the top-level list.
204
0
      blob_is_good = false;
205
0
      ol_it.add_list_after(outline->child());
206
0
    }
207
2.47M
    auto *blob = new C_BLOB(outline);
208
    // Set inverse flag and reverse if needed.
209
2.47M
    blob->CheckInverseFlagAndDirection();
210
    // Put on appropriate list.
211
2.47M
    if (!blob_is_good && bad_blobs_it != nullptr) {
212
3.22k
      bad_blobs_it->add_after_then_move(blob);
213
2.46M
    } else {
214
2.46M
      good_blobs_it->add_after_then_move(blob);
215
2.46M
    }
216
2.47M
  }
217
2.47M
}
218
219
// Sets the COUT_INVERSE flag appropriately on the outlines and their
220
// children recursively, reversing the outlines if needed so that
221
// everything has an anticlockwise top-level.
222
2.77M
void C_BLOB::CheckInverseFlagAndDirection() {
223
2.77M
  C_OUTLINE_IT ol_it(&outlines);
224
5.60M
  for (ol_it.mark_cycle_pt(); !ol_it.cycled_list(); ol_it.forward()) {
225
2.82M
    C_OUTLINE *outline = ol_it.data();
226
2.82M
    if (outline->turn_direction() < 0) {
227
859k
      outline->reverse();
228
859k
      reverse_outline_list(outline->child());
229
859k
      outline->set_flag(COUT_INVERSE, true);
230
1.97M
    } else {
231
1.97M
      outline->set_flag(COUT_INVERSE, false);
232
1.97M
    }
233
2.82M
  }
234
2.77M
}
235
236
// Build and return a fake blob containing a single fake outline with no
237
// steps.
238
180k
C_BLOB *C_BLOB::FakeBlob(const TBOX &box) {
239
180k
  C_OUTLINE_LIST outlines;
240
180k
  C_OUTLINE::FakeOutline(box, &outlines);
241
180k
  return new C_BLOB(&outlines);
242
180k
}
243
244
/**********************************************************************
245
 * C_BLOB::bounding_box
246
 *
247
 * Return the bounding box of the blob.
248
 **********************************************************************/
249
250
92.5M
TBOX C_BLOB::bounding_box() const { // bounding box
251
  // This is a read-only iteration of the outlines.
252
92.5M
  C_OUTLINE_IT it = const_cast<C_OUTLINE_LIST *>(&outlines);
253
92.5M
  TBOX box; // bounding box
254
255
222M
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
256
129M
    C_OUTLINE *outline = it.data();
257
129M
    box += outline->bounding_box();
258
129M
  }
259
92.5M
  return box;
260
92.5M
}
261
262
/**********************************************************************
263
 * C_BLOB::area
264
 *
265
 * Return the area of the blob.
266
 **********************************************************************/
267
268
2.50M
int32_t C_BLOB::area() {       // area
269
2.50M
  C_OUTLINE_IT it = &outlines; // outlines of blob
270
2.50M
  int32_t total = 0;           // total area
271
272
5.01M
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
273
2.50M
    C_OUTLINE *outline = it.data();
274
2.50M
    total += outline->area();
275
2.50M
  }
276
2.50M
  return total;
277
2.50M
}
278
279
/**********************************************************************
280
 * C_BLOB::perimeter
281
 *
282
 * Return the perimeter of the top and 2nd level outlines.
283
 **********************************************************************/
284
285
2.24M
int32_t C_BLOB::perimeter() {
286
2.24M
  C_OUTLINE_IT it = &outlines; // outlines of blob
287
2.24M
  int32_t total = 0;           // total perimeter
288
289
4.49M
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
290
2.24M
    C_OUTLINE *outline = it.data();
291
2.24M
    total += outline->perimeter();
292
2.24M
  }
293
2.24M
  return total;
294
2.24M
}
295
296
/**********************************************************************
297
 * C_BLOB::outer_area
298
 *
299
 * Return the area of the blob.
300
 **********************************************************************/
301
302
0
int32_t C_BLOB::outer_area() { // area
303
0
  C_OUTLINE_IT it = &outlines; // outlines of blob
304
0
  int32_t total = 0;           // total area
305
306
0
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
307
0
    C_OUTLINE *outline = it.data();
308
0
    total += outline->outer_area();
309
0
  }
310
0
  return total;
311
0
}
312
313
/**********************************************************************
314
 * C_BLOB::count_transitions
315
 *
316
 * Return the total x and y maxes and mins in the blob.
317
 * Child outlines are not counted.
318
 **********************************************************************/
319
320
int32_t C_BLOB::count_transitions( // area
321
    int32_t threshold              // on size
322
1.12M
) {
323
1.12M
  C_OUTLINE_IT it = &outlines; // outlines of blob
324
1.12M
  int32_t total = 0;           // total area
325
326
3.15M
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
327
2.03M
    C_OUTLINE *outline = it.data();
328
2.03M
    total += outline->count_transitions(threshold);
329
2.03M
  }
330
1.12M
  return total;
331
1.12M
}
332
333
/**********************************************************************
334
 * C_BLOB::move
335
 *
336
 * Move C_BLOB by vector
337
 **********************************************************************/
338
339
void C_BLOB::move(   // reposition blob
340
    const ICOORD vec // by vector
341
0
) {
342
0
  C_OUTLINE_IT it(&outlines); // iterator
343
344
0
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
345
0
    it.data()->move(vec); // move each outline
346
0
  }
347
0
}
348
349
// Static helper for C_BLOB::rotate to allow recursion of child outlines.
350
0
static void RotateOutlineList(const FCOORD &rotation, C_OUTLINE_LIST *outlines) {
351
0
  C_OUTLINE_LIST new_outlines;
352
0
  C_OUTLINE_IT src_it(outlines);
353
0
  C_OUTLINE_IT dest_it(&new_outlines);
354
0
  while (!src_it.empty()) {
355
0
    C_OUTLINE *old_outline = src_it.extract();
356
0
    src_it.forward();
357
0
    auto *new_outline = new C_OUTLINE(old_outline, rotation);
358
0
    if (!old_outline->child()->empty()) {
359
0
      RotateOutlineList(rotation, old_outline->child());
360
0
      C_OUTLINE_IT child_it(new_outline->child());
361
0
      child_it.add_list_after(old_outline->child());
362
0
    }
363
0
    delete old_outline;
364
0
    dest_it.add_to_end(new_outline);
365
0
  }
366
0
  src_it.add_list_after(&new_outlines);
367
0
}
368
369
/**********************************************************************
370
 * C_BLOB::rotate
371
 *
372
 * Rotate C_BLOB by rotation.
373
 * Warning! has to rebuild all the C_OUTLINEs.
374
 **********************************************************************/
375
0
void C_BLOB::rotate(const FCOORD &rotation) {
376
0
  RotateOutlineList(rotation, &outlines);
377
0
}
378
379
// Helper calls ComputeEdgeOffsets or ComputeBinaryOffsets recursively on the
380
// outline list and its children.
381
2.51M
static void ComputeEdgeOffsetsOutlineList(int threshold, Image pix, C_OUTLINE_LIST *list) {
382
2.51M
  C_OUTLINE_IT it(list);
383
5.09M
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
384
2.58M
    C_OUTLINE *outline = it.data();
385
2.58M
    if (pix != nullptr && pixGetDepth(pix) == 8) {
386
0
      outline->ComputeEdgeOffsets(threshold, pix);
387
2.58M
    } else {
388
2.58M
      outline->ComputeBinaryOffsets();
389
2.58M
    }
390
2.58M
    if (!outline->child()->empty()) {
391
61.8k
      ComputeEdgeOffsetsOutlineList(threshold, pix, outline->child());
392
61.8k
    }
393
2.58M
  }
394
2.51M
}
395
396
// Adds sub-pixel resolution EdgeOffsets for the outlines using greyscale
397
// if the supplied pix is 8-bit or the binary edges if nullptr.
398
2.45M
void C_BLOB::ComputeEdgeOffsets(int threshold, Image pix) {
399
2.45M
  ComputeEdgeOffsetsOutlineList(threshold, pix, &outlines);
400
2.45M
}
401
402
// Estimates and returns the baseline position based on the shape of the
403
// outlines.
404
// We first find the minimum y-coord (y_mins) at each x-coord within the blob.
405
// If there is a run of some y or y+1 in y_mins that is longer than the total
406
// number of positions at bottom or bottom+1, subject to the additional
407
// condition that at least one side of the y/y+1 run is higher than y+1, so it
408
// is not a local minimum, then y, not the bottom, makes a good candidate
409
// baseline position for this blob. Eg
410
//   |                  ---|
411
//   |                  |
412
//   |-      -----------|        <=  Good candidate baseline position.
413
//    |-    -|
414
//     |   -|
415
//     |---|                     <=  Bottom of blob
416
2.24M
int16_t C_BLOB::EstimateBaselinePosition() {
417
2.24M
  TBOX box = bounding_box();
418
2.24M
  int left = box.left();
419
2.24M
  int width = box.width();
420
2.24M
  int bottom = box.bottom();
421
2.24M
  if (outlines.empty() || perimeter() > width * kMaxPerimeterWidthRatio) {
422
847k
    return bottom; // This is only for non-CJK blobs.
423
847k
  }
424
  // Get the minimum y coordinate at each x-coordinate.
425
1.39M
  std::vector<int> y_mins(width + 1, box.top());
426
1.39M
  C_OUTLINE_IT it(&outlines);
427
2.79M
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
428
1.39M
    C_OUTLINE *outline = it.data();
429
1.39M
    ICOORD pos = outline->start_pos();
430
34.0M
    for (int s = 0; s < outline->pathlength(); ++s) {
431
32.6M
      if (pos.y() < y_mins[pos.x() - left]) {
432
16.1M
        y_mins[pos.x() - left] = pos.y();
433
16.1M
      }
434
32.6M
      pos += outline->step(s);
435
32.6M
    }
436
1.39M
  }
437
  // Find the total extent of the bottom or bottom + 1.
438
1.39M
  int bottom_extent = 0;
439
10.2M
  for (int x = 0; x <= width; ++x) {
440
8.89M
    if (y_mins[x] == bottom || y_mins[x] == bottom + 1) {
441
7.46M
      ++bottom_extent;
442
7.46M
    }
443
8.89M
  }
444
  // Find the lowest run longer than the bottom extent that is not the bottom.
445
1.39M
  int best_min = box.top();
446
1.39M
  int prev_run = 0;
447
1.39M
  int prev_y = box.top();
448
1.39M
  int prev_prev_y = box.top();
449
4.03M
  for (int x = 0; x < width; x += prev_run) {
450
    // Find the length of the current run.
451
2.63M
    int y_at_x = y_mins[x];
452
2.63M
    int run = 1;
453
8.58M
    while (x + run <= width && y_mins[x + run] == y_at_x) {
454
5.95M
      ++run;
455
5.95M
    }
456
2.63M
    if (y_at_x > bottom + 1) {
457
      // Possible contender.
458
564k
      int total_run = run;
459
      // Find extent of current value or +1 to the right of x.
460
1.45M
      while (x + total_run <= width &&
461
1.36M
             (y_mins[x + total_run] == y_at_x || y_mins[x + total_run] == y_at_x + 1)) {
462
895k
        ++total_run;
463
895k
      }
464
      // At least one end has to be higher so it is not a local max.
465
564k
      if (prev_prev_y > y_at_x + 1 || x + total_run > width || y_mins[x + total_run] > y_at_x + 1) {
466
        // If the prev_run is at y + 1, then we can add that too. There cannot
467
        // be a suitable run at y before that or we would have found it already.
468
411k
        if (prev_run > 0 && prev_y == y_at_x + 1) {
469
69.6k
          total_run += prev_run;
470
69.6k
        }
471
411k
        if (total_run > bottom_extent && y_at_x < best_min) {
472
41.6k
          best_min = y_at_x;
473
41.6k
        }
474
411k
      }
475
564k
    }
476
2.63M
    prev_run = run;
477
2.63M
    prev_prev_y = prev_y;
478
2.63M
    prev_y = y_at_x;
479
2.63M
  }
480
1.39M
  return best_min == box.top() ? bottom : best_min;
481
2.24M
}
482
483
0
static void render_outline_list(C_OUTLINE_LIST *list, int left, int top, Image pix) {
484
0
  C_OUTLINE_IT it(list);
485
0
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
486
0
    C_OUTLINE *outline = it.data();
487
0
    outline->render(left, top, pix);
488
0
    if (!outline->child()->empty()) {
489
0
      render_outline_list(outline->child(), left, top, pix);
490
0
    }
491
0
  }
492
0
}
493
494
0
static void render_outline_list_outline(C_OUTLINE_LIST *list, int left, int top, Image pix) {
495
0
  C_OUTLINE_IT it(list);
496
0
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
497
0
    C_OUTLINE *outline = it.data();
498
0
    outline->render_outline(left, top, pix);
499
0
  }
500
0
}
501
502
// Returns a Pix rendering of the blob. pixDestroy after use.
503
0
Image C_BLOB::render() {
504
0
  TBOX box = bounding_box();
505
0
  Image pix = pixCreate(box.width(), box.height(), 1);
506
0
  render_outline_list(&outlines, box.left(), box.top(), pix);
507
0
  return pix;
508
0
}
509
510
// Returns a Pix rendering of the outline of the blob. (no fill).
511
// pixDestroy after use.
512
0
Image C_BLOB::render_outline() {
513
0
  TBOX box = bounding_box();
514
0
  Image pix = pixCreate(box.width(), box.height(), 1);
515
0
  render_outline_list_outline(&outlines, box.left(), box.top(), pix);
516
0
  return pix;
517
0
}
518
519
/**********************************************************************
520
 * C_BLOB::plot
521
 *
522
 * Draw the C_BLOB in the given colour.
523
 **********************************************************************/
524
525
#ifndef GRAPHICS_DISABLED
526
void C_BLOB::plot(ScrollView *window,               // window to draw in
527
                  ScrollView::Color blob_colour,    // main colour
528
                  ScrollView::Color child_colour) { // for holes
529
  plot_outline_list(&outlines, window, blob_colour, child_colour);
530
}
531
// Draws the blob in the given colour, and child_colour, normalized
532
// using the given denorm, making use of sub-pixel accurate information
533
// if available.
534
void C_BLOB::plot_normed(const DENORM &denorm, ScrollView::Color blob_colour,
535
                         ScrollView::Color child_colour, ScrollView *window) {
536
  plot_normed_outline_list(denorm, &outlines, blob_colour, child_colour, window);
537
}
538
#endif
539
540
} // namespace tesseract