Coverage Report

Created: 2025-06-13 07:15

/src/tesseract/src/ccstruct/blobs.cpp
Line
Count
Source (jump to first uncovered line)
1
/******************************************************************************
2
 *
3
 * File:         blobs.cpp  (Formerly blobs.c)
4
 * Description:  Blob definition
5
 * Author:       Mark Seaman, OCR Technology
6
 *
7
 * (c) Copyright 1989, Hewlett-Packard Company.
8
 ** Licensed under the Apache License, Version 2.0 (the "License");
9
 ** you may not use this file except in compliance with the License.
10
 ** You may obtain a copy of the License at
11
 ** http://www.apache.org/licenses/LICENSE-2.0
12
 ** Unless required by applicable law or agreed to in writing, software
13
 ** distributed under the License is distributed on an "AS IS" BASIS,
14
 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
 ** See the License for the specific language governing permissions and
16
 ** limitations under the License.
17
 *
18
 *****************************************************************************/
19
20
/*----------------------------------------------------------------------
21
              I n c l u d e s
22
----------------------------------------------------------------------*/
23
// Include automatically generated configuration file if running autoconf.
24
#ifdef HAVE_CONFIG_H
25
#  include "config_auto.h"
26
#endif
27
28
#include "blobs.h"
29
30
#include "ccstruct.h"
31
#include "clst.h"
32
#include "linlsq.h"
33
#include "normalis.h"
34
#include "ocrblock.h"
35
#include "ocrrow.h"
36
#include "points.h"
37
#include "polyaprx.h"
38
#include "werd.h"
39
40
#include "helpers.h"
41
42
#include <algorithm>
43
44
namespace tesseract {
45
46
// A Vector representing the "vertical" direction when measuring the
47
// divisiblity of blobs into multiple blobs just by separating outlines.
48
// See divisible_blob below for the use.
49
const TPOINT kDivisibleVerticalUpright(0, 1);
50
// A vector representing the "vertical" direction for italic text for use
51
// when separating outlines. Using it actually deteriorates final accuracy,
52
// so it is only used for ApplyBoxes chopping to get a better segmentation.
53
const TPOINT kDivisibleVerticalItalic(1, 5);
54
55
/*----------------------------------------------------------------------
56
              F u n c t i o n s
57
----------------------------------------------------------------------*/
58
59
// Returns true when the two line segments cross each other.
60
// (Moved from outlines.cpp).
61
// Finds where the projected lines would cross and then checks to see if the
62
// point of intersection lies on both of the line segments. If it does
63
// then these two segments cross.
64
/* static */
65
7.56M
bool TPOINT::IsCrossed(const TPOINT &a0, const TPOINT &a1, const TPOINT &b0, const TPOINT &b1) {
66
7.56M
  TPOINT b0a1, b0a0, a1b1, b0b1, a1a0;
67
68
7.56M
  b0a1.x = a1.x - b0.x;
69
7.56M
  b0a0.x = a0.x - b0.x;
70
7.56M
  a1b1.x = b1.x - a1.x;
71
7.56M
  b0b1.x = b1.x - b0.x;
72
7.56M
  a1a0.x = a0.x - a1.x;
73
7.56M
  b0a1.y = a1.y - b0.y;
74
7.56M
  b0a0.y = a0.y - b0.y;
75
7.56M
  a1b1.y = b1.y - a1.y;
76
7.56M
  b0b1.y = b1.y - b0.y;
77
7.56M
  a1a0.y = a0.y - a1.y;
78
79
7.56M
  int b0a1xb0b1 = b0a1.cross(b0b1);
80
7.56M
  int b0b1xb0a0 = b0b1.cross(b0a0);
81
7.56M
  int a1b1xa1a0 = a1b1.cross(a1a0);
82
  // For clarity, we want a1a0.cross(a1b0) here but we have b0a1 instead of a1b0
83
  // so use -a1b0.cross(b0a1) instead, which is the same.
84
7.56M
  int a1a0xa1b0 = -a1a0.cross(b0a1);
85
86
7.56M
  return ((b0a1xb0b1 > 0 && b0b1xb0a0 > 0) || (b0a1xb0b1 < 0 && b0b1xb0a0 < 0)) &&
87
7.56M
         ((a1b1xa1a0 > 0 && a1a0xa1b0 > 0) || (a1b1xa1a0 < 0 && a1a0xa1b0 < 0));
88
7.56M
}
89
90
// Consume the circular list of EDGEPTs to make a TESSLINE.
91
2.97M
TESSLINE *TESSLINE::BuildFromOutlineList(EDGEPT *outline) {
92
2.97M
  auto *result = new TESSLINE;
93
2.97M
  result->loop = outline;
94
2.97M
  if (outline->src_outline != nullptr) {
95
    // ASSUMPTION: This function is only ever called from ApproximateOutline
96
    // and therefore either all points have a src_outline or all do not.
97
    // Just as SetupFromPos sets the vectors from the vertices, setup the
98
    // step_count members to indicate the (positive) number of original
99
    // C_OUTLINE steps to the next vertex.
100
0
    EDGEPT *pt = outline;
101
0
    do {
102
0
      pt->step_count = pt->next->start_step - pt->start_step;
103
0
      if (pt->step_count < 0) {
104
0
        pt->step_count += pt->src_outline->pathlength();
105
0
      }
106
0
      pt = pt->next;
107
0
    } while (pt != outline);
108
0
  }
109
2.97M
  result->SetupFromPos();
110
2.97M
  return result;
111
2.97M
}
112
113
// Copies the data and the outline, but leaves next untouched.
114
2.65M
void TESSLINE::CopyFrom(const TESSLINE &src) {
115
2.65M
  Clear();
116
2.65M
  topleft = src.topleft;
117
2.65M
  botright = src.botright;
118
2.65M
  start = src.start;
119
2.65M
  is_hole = src.is_hole;
120
2.65M
  if (src.loop != nullptr) {
121
2.65M
    EDGEPT *prevpt = nullptr;
122
2.65M
    EDGEPT *newpt = nullptr;
123
2.65M
    EDGEPT *srcpt = src.loop;
124
11.3M
    do {
125
11.3M
      newpt = new EDGEPT(*srcpt);
126
11.3M
      if (prevpt == nullptr) {
127
2.65M
        loop = newpt;
128
8.69M
      } else {
129
8.69M
        newpt->prev = prevpt;
130
8.69M
        prevpt->next = newpt;
131
8.69M
      }
132
11.3M
      prevpt = newpt;
133
11.3M
      srcpt = srcpt->next;
134
11.3M
    } while (srcpt != src.loop);
135
2.65M
    loop->prev = newpt;
136
2.65M
    newpt->next = loop;
137
2.65M
  }
138
2.65M
}
139
140
// Deletes owned data.
141
8.74M
void TESSLINE::Clear() {
142
8.74M
  if (loop == nullptr) {
143
3.06M
    return;
144
3.06M
  }
145
146
5.68M
  EDGEPT *this_edge = loop;
147
27.4M
  do {
148
27.4M
    EDGEPT *next_edge = this_edge->next;
149
27.4M
    delete this_edge;
150
27.4M
    this_edge = next_edge;
151
27.4M
  } while (this_edge != loop);
152
5.68M
  loop = nullptr;
153
5.68M
}
154
155
// Normalize in-place using the DENORM.
156
0
void TESSLINE::Normalize(const DENORM &denorm) {
157
0
  EDGEPT *pt = loop;
158
0
  do {
159
0
    denorm.LocalNormTransform(pt->pos, &pt->pos);
160
0
    pt = pt->next;
161
0
  } while (pt != loop);
162
0
  SetupFromPos();
163
0
}
164
165
// Rotates by the given rotation in place.
166
0
void TESSLINE::Rotate(const FCOORD rot) {
167
0
  EDGEPT *pt = loop;
168
0
  do {
169
0
    int tmp = static_cast<int>(floor(pt->pos.x * rot.x() - pt->pos.y * rot.y() + 0.5));
170
0
    pt->pos.y = static_cast<int>(floor(pt->pos.y * rot.x() + pt->pos.x * rot.y() + 0.5));
171
0
    pt->pos.x = tmp;
172
0
    pt = pt->next;
173
0
  } while (pt != loop);
174
0
  SetupFromPos();
175
0
}
176
177
// Moves by the given vec in place.
178
5.94M
void TESSLINE::Move(const ICOORD vec) {
179
5.94M
  EDGEPT *pt = loop;
180
31.8M
  do {
181
31.8M
    pt->pos.x += vec.x();
182
31.8M
    pt->pos.y += vec.y();
183
31.8M
    pt = pt->next;
184
31.8M
  } while (pt != loop);
185
5.94M
  SetupFromPos();
186
5.94M
}
187
188
// Scales by the given factor in place.
189
2.97M
void TESSLINE::Scale(float factor) {
190
2.97M
  EDGEPT *pt = loop;
191
15.9M
  do {
192
15.9M
    pt->pos.x = static_cast<int>(floor(pt->pos.x * factor + 0.5));
193
15.9M
    pt->pos.y = static_cast<int>(floor(pt->pos.y * factor + 0.5));
194
15.9M
    pt = pt->next;
195
15.9M
  } while (pt != loop);
196
2.97M
  SetupFromPos();
197
2.97M
}
198
199
// Sets up the start and vec members of the loop from the pos members.
200
11.8M
void TESSLINE::SetupFromPos() {
201
11.8M
  EDGEPT *pt = loop;
202
63.7M
  do {
203
63.7M
    pt->vec.x = pt->next->pos.x - pt->pos.x;
204
63.7M
    pt->vec.y = pt->next->pos.y - pt->pos.y;
205
63.7M
    pt = pt->next;
206
63.7M
  } while (pt != loop);
207
11.8M
  start = pt->pos;
208
11.8M
  ComputeBoundingBox();
209
11.8M
}
210
211
// Recomputes the bounding box from the points in the loop.
212
16.1M
void TESSLINE::ComputeBoundingBox() {
213
16.1M
  int minx = INT32_MAX;
214
16.1M
  int miny = INT32_MAX;
215
16.1M
  int maxx = -INT32_MAX;
216
16.1M
  int maxy = -INT32_MAX;
217
218
  // Find boundaries.
219
16.1M
  start = loop->pos;
220
16.1M
  EDGEPT *this_edge = loop;
221
99.3M
  do {
222
99.3M
    if (!this_edge->IsHidden() || !this_edge->prev->IsHidden()) {
223
99.3M
      if (this_edge->pos.x < minx) {
224
23.6M
        minx = this_edge->pos.x;
225
23.6M
      }
226
99.3M
      if (this_edge->pos.y < miny) {
227
40.7M
        miny = this_edge->pos.y;
228
40.7M
      }
229
99.3M
      if (this_edge->pos.x > maxx) {
230
42.0M
        maxx = this_edge->pos.x;
231
42.0M
      }
232
99.3M
      if (this_edge->pos.y > maxy) {
233
24.9M
        maxy = this_edge->pos.y;
234
24.9M
      }
235
99.3M
    }
236
99.3M
    this_edge = this_edge->next;
237
99.3M
  } while (this_edge != loop);
238
  // Reset bounds.
239
16.1M
  topleft.x = minx;
240
16.1M
  topleft.y = maxy;
241
16.1M
  botright.x = maxx;
242
16.1M
  botright.y = miny;
243
16.1M
}
244
245
// Computes the min and max cross product of the outline points with the
246
// given vec and returns the results in min_xp and max_xp. Geometrically
247
// this is the left and right edge of the outline perpendicular to the
248
// given direction, but to get the distance units correct, you would
249
// have to divide by the modulus of vec.
250
2.26M
void TESSLINE::MinMaxCrossProduct(const TPOINT vec, int *min_xp, int *max_xp) const {
251
2.26M
  *min_xp = INT32_MAX;
252
2.26M
  *max_xp = INT32_MIN;
253
2.26M
  EDGEPT *this_edge = loop;
254
15.6M
  do {
255
15.6M
    if (!this_edge->IsHidden() || !this_edge->prev->IsHidden()) {
256
15.6M
      int product = this_edge->pos.cross(vec);
257
15.6M
      UpdateRange(product, min_xp, max_xp);
258
15.6M
    }
259
15.6M
    this_edge = this_edge->next;
260
15.6M
  } while (this_edge != loop);
261
2.26M
}
262
263
158M
TBOX TESSLINE::bounding_box() const {
264
158M
  return TBOX(topleft.x, botright.y, botright.x, topleft.y);
265
158M
}
266
267
#ifndef GRAPHICS_DISABLED
268
void TESSLINE::plot(ScrollView *window, ScrollView::Color color, ScrollView::Color child_color) {
269
  if (is_hole) {
270
    window->Pen(child_color);
271
  } else {
272
    window->Pen(color);
273
  }
274
  window->SetCursor(start.x, start.y);
275
  EDGEPT *pt = loop;
276
  do {
277
    bool prev_hidden = pt->IsHidden();
278
    pt = pt->next;
279
    if (prev_hidden) {
280
      window->SetCursor(pt->pos.x, pt->pos.y);
281
    } else {
282
      window->DrawTo(pt->pos.x, pt->pos.y);
283
    }
284
  } while (pt != loop);
285
}
286
#endif // !GRAPHICS_DISABLED
287
288
// Returns the first non-hidden EDGEPT that has a different src_outline to
289
// its predecessor, or, if all the same, the lowest indexed point.
290
11.2M
EDGEPT *TESSLINE::FindBestStartPt() const {
291
11.2M
  EDGEPT *best_start = loop;
292
11.2M
  int best_step = loop->start_step;
293
  // Iterate the polygon.
294
11.2M
  EDGEPT *pt = loop;
295
50.8M
  do {
296
50.8M
    if (pt->IsHidden()) {
297
823k
      continue;
298
823k
    }
299
50.0M
    if (pt->prev->IsHidden() || pt->prev->src_outline != pt->src_outline) {
300
823k
      return pt; // Qualifies as the best.
301
823k
    }
302
49.2M
    if (pt->start_step < best_step) {
303
0
      best_step = pt->start_step;
304
0
      best_start = pt;
305
0
    }
306
50.0M
  } while ((pt = pt->next) != loop);
307
10.4M
  return best_start;
308
11.2M
}
309
310
// Iterate the given list of outlines, converting to TESSLINE by polygonal
311
// approximation and recursively any children, returning the current tail
312
// of the resulting list of TESSLINEs.
313
static TESSLINE **ApproximateOutlineList(bool allow_detailed_fx, C_OUTLINE_LIST *outlines,
314
2.03M
                                         bool children, TESSLINE **tail) {
315
2.03M
  C_OUTLINE_IT ol_it(outlines);
316
5.00M
  for (ol_it.mark_cycle_pt(); !ol_it.cycled_list(); ol_it.forward()) {
317
2.97M
    C_OUTLINE *outline = ol_it.data();
318
2.97M
    if (outline->pathlength() > 0) {
319
2.97M
      TESSLINE *tessline = ApproximateOutline(allow_detailed_fx, outline);
320
2.97M
      tessline->is_hole = children;
321
2.97M
      *tail = tessline;
322
2.97M
      tail = &tessline->next;
323
2.97M
    }
324
2.97M
    if (!outline->child()->empty()) {
325
124k
      tail = ApproximateOutlineList(allow_detailed_fx, outline->child(), true, tail);
326
124k
    }
327
2.97M
  }
328
2.03M
  return tail;
329
2.03M
}
330
331
// Factory to build a TBLOB from a C_BLOB with polygonal approximation along
332
// the way. If allow_detailed_fx is true, the EDGEPTs in the returned TBLOB
333
// contain pointers to the input C_OUTLINEs that enable higher-resolution
334
// feature extraction that does not use the polygonal approximation.
335
1.91M
TBLOB *TBLOB::PolygonalCopy(bool allow_detailed_fx, C_BLOB *src) {
336
1.91M
  auto *tblob = new TBLOB;
337
1.91M
  ApproximateOutlineList(allow_detailed_fx, src->out_list(), false, &tblob->outlines);
338
1.91M
  return tblob;
339
1.91M
}
340
341
// Factory builds a blob with no outlines, but copies the other member data.
342
1.21M
TBLOB *TBLOB::ShallowCopy(const TBLOB &src) {
343
1.21M
  auto *blob = new TBLOB;
344
1.21M
  blob->denorm_ = src.denorm_;
345
1.21M
  return blob;
346
1.21M
}
347
348
// Normalizes the blob for classification only if needed.
349
// (Normally this means a non-zero classify rotation.)
350
// If no Normalization is needed, then nullptr is returned, and the input blob
351
// can be used directly. Otherwise a new TBLOB is returned which must be
352
// deleted after use.
353
1.96M
TBLOB *TBLOB::ClassifyNormalizeIfNeeded() const {
354
1.96M
  TBLOB *rotated_blob = nullptr;
355
  // If necessary, copy the blob and rotate it. The rotation is always
356
  // +/- 90 degrees, as 180 was already taken care of.
357
1.96M
  if (denorm_.block() != nullptr && denorm_.block()->classify_rotation().y() != 0.0) {
358
0
    TBOX box = bounding_box();
359
0
    int x_middle = (box.left() + box.right()) / 2;
360
0
    int y_middle = (box.top() + box.bottom()) / 2;
361
0
    rotated_blob = new TBLOB(*this);
362
0
    const FCOORD &rotation = denorm_.block()->classify_rotation();
363
    // Move the rotated blob back to the same y-position so that we
364
    // can still distinguish similar glyphs with different y-position.
365
0
    float target_y =
366
0
        kBlnBaselineOffset + (rotation.y() > 0 ? x_middle - box.left() : box.right() - x_middle);
367
0
    rotated_blob->Normalize(nullptr, &rotation, &denorm_, x_middle, y_middle, 1.0f, 1.0f, 0.0f,
368
0
                            target_y, denorm_.inverse(), denorm_.pix());
369
0
  }
370
1.96M
  return rotated_blob;
371
1.96M
}
372
373
// Copies the data and the outline, but leaves next untouched.
374
1.72M
void TBLOB::CopyFrom(const TBLOB &src) {
375
1.72M
  Clear();
376
1.72M
  TESSLINE *prev_outline = nullptr;
377
4.38M
  for (TESSLINE *srcline = src.outlines; srcline != nullptr; srcline = srcline->next) {
378
2.65M
    auto *new_outline = new TESSLINE(*srcline);
379
2.65M
    if (outlines == nullptr) {
380
1.72M
      outlines = new_outline;
381
1.72M
    } else {
382
929k
      prev_outline->next = new_outline;
383
929k
    }
384
2.65M
    prev_outline = new_outline;
385
2.65M
  }
386
1.72M
  denorm_ = src.denorm_;
387
1.72M
}
388
389
// Deletes owned data.
390
6.58M
void TBLOB::Clear() {
391
12.2M
  for (TESSLINE *next_outline = nullptr; outlines != nullptr; outlines = next_outline) {
392
5.68M
    next_outline = outlines->next;
393
5.68M
    delete outlines;
394
5.68M
  }
395
6.58M
}
396
397
// Sets up the built-in DENORM and normalizes the blob in-place.
398
// For parameters see DENORM::SetupNormalization, plus the inverse flag for
399
// this blob and the Pix for the full image.
400
void TBLOB::Normalize(const BLOCK *block, const FCOORD *rotation, const DENORM *predecessor,
401
                      float x_origin, float y_origin, float x_scale, float y_scale,
402
1.91M
                      float final_xshift, float final_yshift, bool inverse, Image pix) {
403
1.91M
  denorm_.SetupNormalization(block, rotation, predecessor, x_origin, y_origin, x_scale, y_scale,
404
1.91M
                             final_xshift, final_yshift);
405
1.91M
  denorm_.set_inverse(inverse);
406
1.91M
  denorm_.set_pix(pix);
407
  // TODO(rays) outline->Normalize is more accurate, but breaks tests due
408
  // the changes it makes. Reinstate this code with a retraining.
409
  // The reason this change is troublesome is that it normalizes for the
410
  // baseline value computed independently at each x-coord. If the baseline
411
  // is not horizontal, this introduces shear into the normalized blob, which
412
  // is useful on the rare occasions that the baseline is really curved, but
413
  // the baselines need to be stabilized the rest of the time.
414
#if 0
415
  for (TESSLINE* outline = outlines; outline != nullptr; outline = outline->next) {
416
    outline->Normalize(denorm_);
417
  }
418
#else
419
1.91M
  denorm_.LocalNormBlob(this);
420
1.91M
#endif
421
1.91M
}
422
423
// Rotates by the given rotation in place.
424
0
void TBLOB::Rotate(const FCOORD rotation) {
425
0
  for (TESSLINE *outline = outlines; outline != nullptr; outline = outline->next) {
426
0
    outline->Rotate(rotation);
427
0
  }
428
0
}
429
430
// Moves by the given vec in place.
431
3.82M
void TBLOB::Move(const ICOORD vec) {
432
9.76M
  for (TESSLINE *outline = outlines; outline != nullptr; outline = outline->next) {
433
5.94M
    outline->Move(vec);
434
5.94M
  }
435
3.82M
}
436
437
// Scales by the given factor in place.
438
1.91M
void TBLOB::Scale(float factor) {
439
4.88M
  for (TESSLINE *outline = outlines; outline != nullptr; outline = outline->next) {
440
2.97M
    outline->Scale(factor);
441
2.97M
  }
442
1.91M
}
443
444
// Recomputes the bounding boxes of the outlines.
445
1.57M
void TBLOB::ComputeBoundingBoxes() {
446
5.57M
  for (TESSLINE *outline = outlines; outline != nullptr; outline = outline->next) {
447
4.00M
    outline->ComputeBoundingBox();
448
4.00M
  }
449
1.57M
}
450
451
// Returns the number of outlines.
452
0
int TBLOB::NumOutlines() const {
453
0
  int result = 0;
454
0
  for (TESSLINE *outline = outlines; outline != nullptr; outline = outline->next) {
455
0
    ++result;
456
0
  }
457
0
  return result;
458
0
}
459
460
/**********************************************************************
461
 * TBLOB::bounding_box()
462
 *
463
 * Compute the bounding_box of a compound blob, defined to be the
464
 * bounding box of the union of all top-level outlines in the blob.
465
 **********************************************************************/
466
57.5M
TBOX TBLOB::bounding_box() const {
467
57.5M
  if (outlines == nullptr) {
468
7.02k
    return TBOX(0, 0, 0, 0);
469
7.02k
  }
470
57.5M
  TESSLINE *outline = outlines;
471
57.5M
  TBOX box = outline->bounding_box();
472
158M
  for (outline = outline->next; outline != nullptr; outline = outline->next) {
473
101M
    box += outline->bounding_box();
474
101M
  }
475
57.5M
  return box;
476
57.5M
}
477
478
// Finds and deletes any duplicate outlines in this blob, without deleting
479
// their EDGEPTs.
480
619k
void TBLOB::EliminateDuplicateOutlines() {
481
2.06M
  for (TESSLINE *outline = outlines; outline != nullptr; outline = outline->next) {
482
1.44M
    TESSLINE *last_outline = outline;
483
5.59M
    for (TESSLINE *other_outline = outline->next; other_outline != nullptr;
484
4.15M
         last_outline = other_outline, other_outline = other_outline->next) {
485
4.15M
      if (outline->SameBox(*other_outline)) {
486
403k
        last_outline->next = other_outline->next;
487
        // This doesn't leak - the outlines share the EDGEPTs.
488
403k
        other_outline->loop = nullptr;
489
403k
        delete other_outline;
490
403k
        other_outline = last_outline;
491
        // If it is part of a cut, then it can't be a hole any more.
492
403k
        outline->is_hole = false;
493
403k
      }
494
4.15M
    }
495
1.44M
  }
496
619k
}
497
498
// Swaps the outlines of *this and next if needed to keep the centers in
499
// increasing x.
500
269k
void TBLOB::CorrectBlobOrder(TBLOB *next) {
501
269k
  TBOX box = bounding_box();
502
269k
  TBOX next_box = next->bounding_box();
503
269k
  if (box.x_middle() > next_box.x_middle()) {
504
3.62k
    std::swap(outlines, next->outlines);
505
3.62k
  }
506
269k
}
507
508
#ifndef GRAPHICS_DISABLED
509
void TBLOB::plot(ScrollView *window, ScrollView::Color color, ScrollView::Color child_color) {
510
  for (TESSLINE *outline = outlines; outline != nullptr; outline = outline->next) {
511
    outline->plot(window, color, child_color);
512
  }
513
}
514
#endif // !GRAPHICS_DISABLED
515
516
// Computes the center of mass and second moments for the old baseline and
517
// 2nd moment normalizations. Returns the outline length.
518
// The input denorm should be the normalizations that have been applied from
519
// the image to the current state of this TBLOB.
520
1.96M
int TBLOB::ComputeMoments(FCOORD *center, FCOORD *second_moments) const {
521
  // Compute 1st and 2nd moments of the original outline.
522
1.96M
  LLSQ accumulator;
523
1.96M
  TBOX box = bounding_box();
524
  // Iterate the outlines, accumulating edges relative the box.botleft().
525
1.96M
  CollectEdges(box, nullptr, &accumulator, nullptr, nullptr);
526
1.96M
  *center = accumulator.mean_point() + box.botleft();
527
  // The 2nd moments are just the standard deviation of the point positions.
528
1.96M
  double x2nd = sqrt(accumulator.x_variance());
529
1.96M
  double y2nd = sqrt(accumulator.y_variance());
530
1.96M
  if (x2nd < 1.0) {
531
629
    x2nd = 1.0;
532
629
  }
533
1.96M
  if (y2nd < 1.0) {
534
103
    y2nd = 1.0;
535
103
  }
536
1.96M
  second_moments->set_x(x2nd);
537
1.96M
  second_moments->set_y(y2nd);
538
1.96M
  return accumulator.count();
539
1.96M
}
540
541
// Computes the precise bounding box of the coords that are generated by
542
// GetEdgeCoords. This may be different from the bounding box of the polygon.
543
0
void TBLOB::GetPreciseBoundingBox(TBOX *precise_box) const {
544
0
  TBOX box = bounding_box();
545
0
  *precise_box = TBOX();
546
0
  CollectEdges(box, precise_box, nullptr, nullptr, nullptr);
547
0
  precise_box->move(box.botleft());
548
0
}
549
550
// Adds edges to the given vectors.
551
// For all the edge steps in all the outlines, or polygonal approximation
552
// where there are no edge steps, collects the steps into x_coords/y_coords.
553
// x_coords is a collection of the x-coords of vertical edges for each
554
// y-coord starting at box.bottom().
555
// y_coords is a collection of the y-coords of horizontal edges for each
556
// x-coord starting at box.left().
557
// Eg x_coords[0] is a collection of the x-coords of edges at y=bottom.
558
// Eg x_coords[1] is a collection of the x-coords of edges at y=bottom + 1.
559
void TBLOB::GetEdgeCoords(const TBOX &box, std::vector<std::vector<int>> &x_coords,
560
0
                          std::vector<std::vector<int>> &y_coords) const {
561
0
  x_coords.clear();
562
0
  x_coords.resize(box.height());
563
0
  y_coords.clear();
564
0
  y_coords.resize(box.width());
565
0
  CollectEdges(box, nullptr, nullptr, &x_coords, &y_coords);
566
  // Sort the output vectors.
567
0
  for (auto &coord : x_coords) {
568
0
    std::sort(coord.begin(), coord.end());
569
0
  }
570
0
  for (auto &coord : y_coords) {
571
0
    std::sort(coord.begin(), coord.end());
572
0
  }
573
0
}
574
575
// Accumulates the segment between pt1 and pt2 in the LLSQ, quantizing over
576
// the integer coordinate grid to properly weight long vectors.
577
26.8M
static void SegmentLLSQ(const FCOORD &pt1, const FCOORD &pt2, LLSQ *accumulator) {
578
26.8M
  FCOORD step(pt2);
579
26.8M
  step -= pt1;
580
26.8M
  int xstart = IntCastRounded(std::min(pt1.x(), pt2.x()));
581
26.8M
  int xend = IntCastRounded(std::max(pt1.x(), pt2.x()));
582
26.8M
  int ystart = IntCastRounded(std::min(pt1.y(), pt2.y()));
583
26.8M
  int yend = IntCastRounded(std::max(pt1.y(), pt2.y()));
584
26.8M
  if (xstart == xend && ystart == yend) {
585
18.5k
    return; // Nothing to do.
586
18.5k
  }
587
26.8M
  double weight = step.length() / (xend - xstart + yend - ystart);
588
  // Compute and save the y-position at the middle of each x-step.
589
493M
  for (int x = xstart; x < xend; ++x) {
590
466M
    double y = pt1.y() + step.y() * (x + 0.5 - pt1.x()) / step.x();
591
466M
    accumulator->add(x + 0.5, y, weight);
592
466M
  }
593
  // Compute and save the x-position at the middle of each y-step.
594
1.11G
  for (int y = ystart; y < yend; ++y) {
595
1.09G
    double x = pt1.x() + step.x() * (y + 0.5 - pt1.y()) / step.y();
596
1.09G
    accumulator->add(x, y + 0.5, weight);
597
1.09G
  }
598
26.8M
}
599
600
// Adds any edges from a single segment of outline between pt1 and pt2 to
601
// the x_coords, y_coords vectors. pt1 and pt2 should be relative to the
602
// bottom-left of the bounding box, hence indices to x_coords, y_coords
603
// are clipped to ([0,x_limit], [0,y_limit]).
604
// See GetEdgeCoords above for a description of x_coords, y_coords.
605
static void SegmentCoords(const FCOORD &pt1, const FCOORD &pt2, int x_limit, int y_limit,
606
                          std::vector<std::vector<int>> *x_coords,
607
0
                          std::vector<std::vector<int>> *y_coords) {
608
0
  FCOORD step(pt2);
609
0
  step -= pt1;
610
0
  int start = ClipToRange(IntCastRounded(std::min(pt1.x(), pt2.x())), 0, x_limit);
611
0
  int end = ClipToRange(IntCastRounded(std::max(pt1.x(), pt2.x())), 0, x_limit);
612
0
  for (int x = start; x < end; ++x) {
613
0
    int y = IntCastRounded(pt1.y() + step.y() * (x + 0.5 - pt1.x()) / step.x());
614
0
    (*y_coords)[x].push_back(y);
615
0
  }
616
0
  start = ClipToRange(IntCastRounded(std::min(pt1.y(), pt2.y())), 0, y_limit);
617
0
  end = ClipToRange(IntCastRounded(std::max(pt1.y(), pt2.y())), 0, y_limit);
618
0
  for (int y = start; y < end; ++y) {
619
0
    int x = IntCastRounded(pt1.x() + step.x() * (y + 0.5 - pt1.y()) / step.y());
620
0
    (*x_coords)[y].push_back(x);
621
0
  }
622
0
}
623
624
// Adds any edges from a single segment of outline between pt1 and pt2 to
625
// the bbox such that it guarantees to contain anything produced by
626
// SegmentCoords.
627
0
static void SegmentBBox(const FCOORD &pt1, const FCOORD &pt2, TBOX *bbox) {
628
0
  FCOORD step(pt2);
629
0
  step -= pt1;
630
0
  int x1 = IntCastRounded(std::min(pt1.x(), pt2.x()));
631
0
  int x2 = IntCastRounded(std::max(pt1.x(), pt2.x()));
632
0
  if (x2 > x1) {
633
0
    int y1 = IntCastRounded(pt1.y() + step.y() * (x1 + 0.5 - pt1.x()) / step.x());
634
0
    int y2 = IntCastRounded(pt1.y() + step.y() * (x2 - 0.5 - pt1.x()) / step.x());
635
0
    TBOX point(x1, std::min(y1, y2), x2, std::max(y1, y2));
636
0
    *bbox += point;
637
0
  }
638
0
  int y1 = IntCastRounded(std::min(pt1.y(), pt2.y()));
639
0
  int y2 = IntCastRounded(std::max(pt1.y(), pt2.y()));
640
0
  if (y2 > y1) {
641
0
    int x1 = IntCastRounded(pt1.x() + step.x() * (y1 + 0.5 - pt1.y()) / step.y());
642
0
    int x2 = IntCastRounded(pt1.x() + step.x() * (y2 - 0.5 - pt1.y()) / step.y());
643
0
    TBOX point(std::min(x1, x2), y1, std::max(x1, x2), y2);
644
0
    *bbox += point;
645
0
  }
646
0
}
647
648
// Collects edges into the given bounding box, LLSQ accumulator and/or x_coords,
649
// y_coords vectors.
650
// For a description of x_coords/y_coords, see GetEdgeCoords above.
651
// Startpt to lastpt, inclusive, MUST have the same src_outline member,
652
// which may be nullptr. The vector from lastpt to its next is included in
653
// the accumulation. Hidden edges should be excluded by the caller.
654
// The input denorm should be the normalizations that have been applied from
655
// the image to the current state of the TBLOB from which startpt, lastpt come.
656
// box is the bounding box of the blob from which the EDGEPTs are taken and
657
// indices into x_coords, y_coords are offset by box.botleft().
658
static void CollectEdgesOfRun(const EDGEPT *startpt, const EDGEPT *lastpt, const DENORM &denorm,
659
                              const TBOX &box, TBOX *bounding_box, LLSQ *accumulator,
660
                              std::vector<std::vector<int>> *x_coords,
661
5.81M
                              std::vector<std::vector<int>> *y_coords) {
662
5.81M
  const C_OUTLINE *outline = startpt->src_outline;
663
5.81M
  int x_limit = box.width() - 1;
664
5.81M
  int y_limit = box.height() - 1;
665
5.81M
  if (outline != nullptr) {
666
    // Use higher-resolution edge points stored on the outline.
667
    // The outline coordinates may not match the binary image because of the
668
    // rotation for vertical text lines, but the root_denorm IS the matching
669
    // start of the DENORM chain.
670
0
    const DENORM *root_denorm = denorm.RootDenorm();
671
0
    int step_length = outline->pathlength();
672
0
    int start_index = startpt->start_step;
673
    // Note that if this run straddles the wrap-around point of the outline,
674
    // that lastpt->start_step may have a lower index than startpt->start_step,
675
    // and we want to use an end_index that allows us to use a positive
676
    // increment, so we add step_length if necessary, but that may be beyond the
677
    // bounds of the outline steps/ due to wrap-around, so we use % step_length
678
    // everywhere, except for start_index.
679
0
    int end_index = lastpt->start_step + lastpt->step_count;
680
0
    if (end_index <= start_index) {
681
0
      end_index += step_length;
682
0
    }
683
    // pos is the integer coordinates of the binary image steps.
684
0
    ICOORD pos = outline->position_at_index(start_index);
685
0
    FCOORD origin(box.left(), box.bottom());
686
    // f_pos is a floating-point version of pos that offers improved edge
687
    // positioning using greyscale information or smoothing of edge steps.
688
0
    FCOORD f_pos = outline->sub_pixel_pos_at_index(pos, start_index);
689
    // pos_normed is f_pos after the appropriate normalization, and relative
690
    // to origin.
691
    // prev_normed is the previous value of pos_normed.
692
0
    FCOORD prev_normed;
693
0
    denorm.NormTransform(root_denorm, f_pos, &prev_normed);
694
0
    prev_normed -= origin;
695
0
    for (int index = start_index; index < end_index; ++index) {
696
0
      ICOORD step = outline->step(index % step_length);
697
      // Only use the point if its edge strength is positive. This excludes
698
      // points that don't provide useful information, eg
699
      // ___________
700
      //            |___________
701
      // The vertical step provides only noisy, damaging information, as even
702
      // with a greyscale image, the positioning of the edge there may be a
703
      // fictitious extrapolation, so previous processing has eliminated it.
704
0
      if (outline->edge_strength_at_index(index % step_length) > 0) {
705
0
        FCOORD f_pos = outline->sub_pixel_pos_at_index(pos, index % step_length);
706
0
        FCOORD pos_normed;
707
0
        denorm.NormTransform(root_denorm, f_pos, &pos_normed);
708
0
        pos_normed -= origin;
709
        // Accumulate the information that is selected by the caller.
710
0
        if (bounding_box != nullptr) {
711
0
          SegmentBBox(pos_normed, prev_normed, bounding_box);
712
0
        }
713
0
        if (accumulator != nullptr) {
714
0
          SegmentLLSQ(pos_normed, prev_normed, accumulator);
715
0
        }
716
0
        if (x_coords != nullptr && y_coords != nullptr) {
717
0
          SegmentCoords(pos_normed, prev_normed, x_limit, y_limit, x_coords, y_coords);
718
0
        }
719
0
        prev_normed = pos_normed;
720
0
      }
721
0
      pos += step;
722
0
    }
723
5.81M
  } else {
724
    // There is no outline, so we are forced to use the polygonal approximation.
725
5.81M
    const EDGEPT *endpt = lastpt->next;
726
5.81M
    const EDGEPT *pt = startpt;
727
26.8M
    do {
728
26.8M
      FCOORD next_pos(pt->next->pos.x - box.left(), pt->next->pos.y - box.bottom());
729
26.8M
      FCOORD pos(pt->pos.x - box.left(), pt->pos.y - box.bottom());
730
26.8M
      if (bounding_box != nullptr) {
731
0
        SegmentBBox(next_pos, pos, bounding_box);
732
0
      }
733
26.8M
      if (accumulator != nullptr) {
734
26.8M
        SegmentLLSQ(next_pos, pos, accumulator);
735
26.8M
      }
736
26.8M
      if (x_coords != nullptr && y_coords != nullptr) {
737
0
        SegmentCoords(next_pos, pos, x_limit, y_limit, x_coords, y_coords);
738
0
      }
739
26.8M
    } while ((pt = pt->next) != endpt);
740
5.81M
  }
741
5.81M
}
742
743
// For all the edge steps in all the outlines, or polygonal approximation
744
// where there are no edge steps, collects the steps into the bounding_box,
745
// llsq and/or the x_coords/y_coords. Both are used in different kinds of
746
// normalization.
747
// For a description of x_coords, y_coords, see GetEdgeCoords above.
748
void TBLOB::CollectEdges(const TBOX &box, TBOX *bounding_box, LLSQ *llsq,
749
                         std::vector<std::vector<int>> *x_coords,
750
1.96M
                         std::vector<std::vector<int>> *y_coords) const {
751
  // Iterate the outlines.
752
7.59M
  for (const TESSLINE *ol = outlines; ol != nullptr; ol = ol->next) {
753
    // Iterate the polygon.
754
5.62M
    EDGEPT *loop_pt = ol->FindBestStartPt();
755
5.62M
    EDGEPT *pt = loop_pt;
756
5.62M
    if (pt == nullptr) {
757
0
      continue;
758
0
    }
759
6.42M
    do {
760
6.42M
      if (pt->IsHidden()) {
761
603k
        continue;
762
603k
      }
763
      // Find a run of equal src_outline.
764
5.81M
      EDGEPT *last_pt = pt;
765
26.8M
      do {
766
26.8M
        last_pt = last_pt->next;
767
26.8M
      } while (last_pt != loop_pt && !last_pt->IsHidden() &&
768
26.8M
               last_pt->src_outline == pt->src_outline);
769
5.81M
      last_pt = last_pt->prev;
770
5.81M
      CollectEdgesOfRun(pt, last_pt, denorm_, box, bounding_box, llsq, x_coords, y_coords);
771
5.81M
      pt = last_pt;
772
6.42M
    } while ((pt = pt->next) != loop_pt);
773
5.62M
  }
774
1.96M
}
775
776
// Factory to build a TWERD from a (C_BLOB) WERD, with polygonal
777
// approximation along the way.
778
438k
TWERD *TWERD::PolygonalCopy(bool allow_detailed_fx, WERD *src) {
779
438k
  auto *tessword = new TWERD;
780
438k
  tessword->latin_script = src->flag(W_SCRIPT_IS_LATIN);
781
438k
  C_BLOB_IT b_it(src->cblob_list());
782
2.35M
  for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
783
1.91M
    C_BLOB *blob = b_it.data();
784
1.91M
    TBLOB *tblob = TBLOB::PolygonalCopy(allow_detailed_fx, blob);
785
1.91M
    tessword->blobs.push_back(tblob);
786
1.91M
  }
787
438k
  return tessword;
788
438k
}
789
790
// Baseline normalizes the blobs in-place, recording the normalization in the
791
// DENORMs in the blobs.
792
void TWERD::BLNormalize(const BLOCK *block, const ROW *row, Image pix, bool inverse, float x_height,
793
                        float baseline_shift, bool numeric_mode, tesseract::OcrEngineMode hint,
794
438k
                        const TBOX *norm_box, DENORM *word_denorm) {
795
438k
  TBOX word_box = bounding_box();
796
438k
  if (norm_box != nullptr) {
797
0
    word_box = *norm_box;
798
0
  }
799
438k
  float word_middle = (word_box.left() + word_box.right()) / 2.0f;
800
438k
  float input_y_offset = 0.0f;
801
438k
  auto final_y_offset = static_cast<float>(kBlnBaselineOffset);
802
438k
  float scale = kBlnXHeight / x_height;
803
438k
  if (row == nullptr) {
804
0
    word_middle = word_box.left();
805
0
    input_y_offset = word_box.bottom();
806
0
    final_y_offset = 0.0f;
807
438k
  } else {
808
438k
    input_y_offset = row->base_line(word_middle) + baseline_shift;
809
438k
  }
810
1.91M
  for (auto blob : blobs) {
811
1.91M
    TBOX blob_box = blob->bounding_box();
812
1.91M
    float mid_x = (blob_box.left() + blob_box.right()) / 2.0f;
813
1.91M
    float baseline = input_y_offset;
814
1.91M
    float blob_scale = scale;
815
1.91M
    if (numeric_mode) {
816
0
      baseline = blob_box.bottom();
817
0
      blob_scale = ClipToRange(kBlnXHeight * 4.0f / (3 * blob_box.height()), scale, scale * 1.5f);
818
1.91M
    } else if (row != nullptr) {
819
1.91M
      baseline = row->base_line(mid_x) + baseline_shift;
820
1.91M
    }
821
    // The image will be 8-bit grey if the input was grey or color. Note that in
822
    // a grey image 0 is black and 255 is white. If the input was binary, then
823
    // the pix will be binary and 0 is white, with 1 being black.
824
    // To tell the difference pixGetDepth() will return 8 or 1.
825
    // The inverse flag will be true iff the word has been determined to be
826
    // white on black, and is independent of whether the pix is 8 bit or 1 bit.
827
1.91M
    blob->Normalize(block, nullptr, nullptr, word_middle, baseline, blob_scale, blob_scale, 0.0f,
828
1.91M
                    final_y_offset, inverse, pix);
829
1.91M
  }
830
438k
  if (word_denorm != nullptr) {
831
438k
    word_denorm->SetupNormalization(block, nullptr, nullptr, word_middle, input_y_offset, scale,
832
438k
                                    scale, 0.0f, final_y_offset);
833
438k
    word_denorm->set_inverse(inverse);
834
438k
    word_denorm->set_pix(pix);
835
438k
  }
836
438k
}
837
838
// Copies the data and the blobs, but leaves next untouched.
839
54.4k
void TWERD::CopyFrom(const TWERD &src) {
840
54.4k
  Clear();
841
54.4k
  latin_script = src.latin_script;
842
1.19M
  for (auto blob : src.blobs) {
843
1.19M
    auto *new_blob = new TBLOB(*blob);
844
1.19M
    blobs.push_back(new_blob);
845
1.19M
  }
846
54.4k
}
847
848
// Deletes owned data.
849
722k
void TWERD::Clear() {
850
3.82M
  for (auto blob : blobs) {
851
3.82M
    delete blob;
852
3.82M
  }
853
722k
  blobs.clear();
854
722k
}
855
856
// Recomputes the bounding boxes of the blobs.
857
135k
void TWERD::ComputeBoundingBoxes() {
858
1.22M
  for (auto &blob : blobs) {
859
1.22M
    blob->ComputeBoundingBoxes();
860
1.22M
  }
861
135k
}
862
863
438k
TBOX TWERD::bounding_box() const {
864
438k
  TBOX result;
865
1.91M
  for (auto blob : blobs) {
866
1.91M
    TBOX box = blob->bounding_box();
867
1.91M
    result += box;
868
1.91M
  }
869
438k
  return result;
870
438k
}
871
872
// Merges the blobs from start to end, not including end, and deletes
873
// the blobs between start and end.
874
782
void TWERD::MergeBlobs(unsigned start, unsigned end) {
875
782
  if (end > blobs.size()) {
876
0
    end = blobs.size();
877
0
  }
878
782
  if (start >= end) {
879
0
    return; // Nothing to do.
880
0
  }
881
782
  TESSLINE *outline = blobs[start]->outlines;
882
1.56k
  for (auto i = start + 1; i < end; ++i) {
883
782
    TBLOB *next_blob = blobs[i];
884
    // Take the outlines from the next blob.
885
782
    if (outline == nullptr) {
886
0
      blobs[start]->outlines = next_blob->outlines;
887
0
      outline = blobs[start]->outlines;
888
782
    } else {
889
838
      while (outline->next != nullptr) {
890
56
        outline = outline->next;
891
56
      }
892
782
      outline->next = next_blob->outlines;
893
782
      next_blob->outlines = nullptr;
894
782
    }
895
    // Delete the next blob and move on.
896
782
    delete next_blob;
897
782
    blobs[i] = nullptr;
898
782
  }
899
  // Remove dead blobs from the vector.
900
  // TODO: optimize.
901
1.56k
  for (auto i = start + 1; i < end && start + 1 < blobs.size(); ++i) {
902
782
    blobs.erase(blobs.begin() + start + 1);
903
782
  }
904
782
}
905
906
#ifndef GRAPHICS_DISABLED
907
void TWERD::plot(ScrollView *window) {
908
  ScrollView::Color color = WERD::NextColor(ScrollView::BLACK);
909
  for (auto &blob : blobs) {
910
    blob->plot(window, color, ScrollView::BROWN);
911
    color = WERD::NextColor(color);
912
  }
913
}
914
#endif // !GRAPHICS_DISABLED
915
916
/**********************************************************************
917
 * divisible_blob
918
 *
919
 * Returns true if the blob contains multiple outlines than can be
920
 * separated using divide_blobs. Sets the location to be used in the
921
 * call to divide_blobs.
922
 **********************************************************************/
923
996k
bool divisible_blob(TBLOB *blob, bool italic_blob, TPOINT *location) {
924
996k
  if (blob->outlines == nullptr || blob->outlines->next == nullptr) {
925
586k
    return false; // Need at least 2 outlines for it to be possible.
926
586k
  }
927
410k
  int max_gap = 0;
928
410k
  TPOINT vertical = italic_blob ? kDivisibleVerticalItalic : kDivisibleVerticalUpright;
929
1.61M
  for (TESSLINE *outline1 = blob->outlines; outline1 != nullptr; outline1 = outline1->next) {
930
1.20M
    if (outline1->is_hole) {
931
261k
      continue; // Holes do not count as separable.
932
261k
    }
933
941k
    TPOINT mid_pt1((outline1->topleft.x + outline1->botright.x) / 2,
934
941k
                   (outline1->topleft.y + outline1->botright.y) / 2);
935
941k
    int mid_prod1 = mid_pt1.cross(vertical);
936
941k
    int min_prod1, max_prod1;
937
941k
    outline1->MinMaxCrossProduct(vertical, &min_prod1, &max_prod1);
938
2.59M
    for (TESSLINE *outline2 = outline1->next; outline2 != nullptr; outline2 = outline2->next) {
939
1.65M
      if (outline2->is_hole) {
940
324k
        continue; // Holes do not count as separable.
941
324k
      }
942
1.32M
      TPOINT mid_pt2((outline2->topleft.x + outline2->botright.x) / 2,
943
1.32M
                     (outline2->topleft.y + outline2->botright.y) / 2);
944
1.32M
      int mid_prod2 = mid_pt2.cross(vertical);
945
1.32M
      int min_prod2, max_prod2;
946
1.32M
      outline2->MinMaxCrossProduct(vertical, &min_prod2, &max_prod2);
947
1.32M
      int mid_gap = abs(mid_prod2 - mid_prod1);
948
1.32M
      int overlap = std::min(max_prod1, max_prod2) - std::max(min_prod1, min_prod2);
949
1.32M
      if (mid_gap - overlap / 4 > max_gap) {
950
285k
        max_gap = mid_gap - overlap / 4;
951
285k
        *location = mid_pt1;
952
285k
        *location += mid_pt2;
953
285k
        *location /= 2;
954
285k
      }
955
1.32M
    }
956
941k
  }
957
  // Use the y component of the vertical vector as an approximation to its
958
  // length.
959
410k
  return max_gap > vertical.y;
960
996k
}
961
962
/**********************************************************************
963
 * divide_blobs
964
 *
965
 * Create two blobs by grouping the outlines in the appropriate blob.
966
 * The outlines that are beyond the location point are moved to the
967
 * other blob.  The ones whose x location is less than that point are
968
 * retained in the original blob.
969
 **********************************************************************/
970
269k
void divide_blobs(TBLOB *blob, TBLOB *other_blob, bool italic_blob, const TPOINT &location) {
971
269k
  TPOINT vertical = italic_blob ? kDivisibleVerticalItalic : kDivisibleVerticalUpright;
972
269k
  TESSLINE *outline1 = nullptr;
973
269k
  TESSLINE *outline2 = nullptr;
974
975
269k
  TESSLINE *outline = blob->outlines;
976
269k
  blob->outlines = nullptr;
977
269k
  int location_prod = location.cross(vertical);
978
979
1.53M
  while (outline != nullptr) {
980
1.26M
    TPOINT mid_pt((outline->topleft.x + outline->botright.x) / 2,
981
1.26M
                  (outline->topleft.y + outline->botright.y) / 2);
982
1.26M
    int mid_prod = mid_pt.cross(vertical);
983
1.26M
    if (mid_prod < location_prod) {
984
      // Outline is in left blob.
985
626k
      if (outline1) {
986
360k
        outline1->next = outline;
987
360k
      } else {
988
265k
        blob->outlines = outline;
989
265k
      }
990
626k
      outline1 = outline;
991
642k
    } else {
992
      // Outline is in right blob.
993
642k
      if (outline2) {
994
376k
        outline2->next = outline;
995
376k
      } else {
996
266k
        other_blob->outlines = outline;
997
266k
      }
998
642k
      outline2 = outline;
999
642k
    }
1000
1.26M
    outline = outline->next;
1001
1.26M
  }
1002
1003
269k
  if (outline1) {
1004
265k
    outline1->next = nullptr;
1005
265k
  }
1006
269k
  if (outline2) {
1007
266k
    outline2->next = nullptr;
1008
266k
  }
1009
269k
}
1010
1011
} // namespace tesseract