Coverage Report

Created: 2024-02-28 06:46

/src/tesseract/src/textord/imagefind.cpp
Line
Count
Source (jump to first uncovered line)
1
///////////////////////////////////////////////////////////////////////
2
// File:        imagefind.cpp
3
// Description: Function to find image and drawing regions in an image
4
//              and create a corresponding list of empty blobs.
5
// Author:      Ray Smith
6
//
7
// (C) Copyright 2008, Google Inc.
8
// Licensed under the Apache License, Version 2.0 (the "License");
9
// you may not use this file except in compliance with the License.
10
// You may obtain a copy of the License at
11
// http://www.apache.org/licenses/LICENSE-2.0
12
// Unless required by applicable law or agreed to in writing, software
13
// distributed under the License is distributed on an "AS IS" BASIS,
14
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
// See the License for the specific language governing permissions and
16
// limitations under the License.
17
//
18
///////////////////////////////////////////////////////////////////////
19
20
#ifdef HAVE_CONFIG_H
21
#  include "config_auto.h"
22
#endif
23
24
#include "imagefind.h"
25
26
#include "colpartitiongrid.h"
27
#include "linlsq.h"
28
#include "params.h"
29
#include "statistc.h"
30
31
#include <allheaders.h>
32
33
#include <algorithm>
34
35
namespace tesseract {
36
37
static INT_VAR(textord_tabfind_show_images, false, "Show image blobs");
38
39
// Fraction of width or height of on pixels that can be discarded from a
40
// roughly rectangular image.
41
const double kMinRectangularFraction = 0.125;
42
// Fraction of width or height to consider image completely used.
43
const double kMaxRectangularFraction = 0.75;
44
// Fraction of width or height to allow transition from kMinRectangularFraction
45
// to kMaxRectangularFraction, equivalent to a dy/dx skew.
46
const double kMaxRectangularGradient = 0.1; // About 6 degrees.
47
// Minimum image size to be worth looking for images on.
48
const int kMinImageFindSize = 100;
49
// Pixel padding for noise blobs and partitions when rendering on the image
50
// mask to encourage them to join together. Make it too big and images
51
// will fatten out too much and have to be clipped to text.
52
const int kNoisePadding = 4;
53
54
// Scans horizontally on x=[x_start,x_end), starting with y=*y_start,
55
// stepping y+=y_step, until y=y_end. *ystart is input/output.
56
// If the number of black pixels in a row, pix_count fits this pattern:
57
// 0 or more rows with pix_count < min_count then
58
// <= mid_width rows with min_count <= pix_count <= max_count then
59
// a row with pix_count > max_count then
60
// true is returned, and *y_start = the first y with pix_count >= min_count.
61
static bool HScanForEdge(uint32_t *data, int wpl, int x_start, int x_end, int min_count,
62
0
                         int mid_width, int max_count, int y_end, int y_step, int *y_start) {
63
0
  int mid_rows = 0;
64
0
  for (int y = *y_start; y != y_end; y += y_step) {
65
    // Need pixCountPixelsInRow(pix, y, &pix_count, nullptr) to count in a
66
    // subset.
67
0
    int pix_count = 0;
68
0
    uint32_t *line = data + wpl * y;
69
0
    for (int x = x_start; x < x_end; ++x) {
70
0
      if (GET_DATA_BIT(line, x)) {
71
0
        ++pix_count;
72
0
      }
73
0
    }
74
0
    if (mid_rows == 0 && pix_count < min_count) {
75
0
      continue; // In the min phase.
76
0
    }
77
0
    if (mid_rows == 0) {
78
0
      *y_start = y; // Save the y_start where we came out of the min phase.
79
0
    }
80
0
    if (pix_count > max_count) {
81
0
      return true; // Found the pattern.
82
0
    }
83
0
    ++mid_rows;
84
0
    if (mid_rows > mid_width) {
85
0
      break; // Middle too big.
86
0
    }
87
0
  }
88
0
  return false; // Never found max_count.
89
0
}
90
91
// Scans vertically on y=[y_start,y_end), starting with x=*x_start,
92
// stepping x+=x_step, until x=x_end. *x_start is input/output.
93
// If the number of black pixels in a column, pix_count fits this pattern:
94
// 0 or more cols with pix_count < min_count then
95
// <= mid_width cols with min_count <= pix_count <= max_count then
96
// a column with pix_count > max_count then
97
// true is returned, and *x_start = the first x with pix_count >= min_count.
98
static bool VScanForEdge(uint32_t *data, int wpl, int y_start, int y_end, int min_count,
99
0
                         int mid_width, int max_count, int x_end, int x_step, int *x_start) {
100
0
  int mid_cols = 0;
101
0
  for (int x = *x_start; x != x_end; x += x_step) {
102
0
    int pix_count = 0;
103
0
    uint32_t *line = data + y_start * wpl;
104
0
    for (int y = y_start; y < y_end; ++y, line += wpl) {
105
0
      if (GET_DATA_BIT(line, x)) {
106
0
        ++pix_count;
107
0
      }
108
0
    }
109
0
    if (mid_cols == 0 && pix_count < min_count) {
110
0
      continue; // In the min phase.
111
0
    }
112
0
    if (mid_cols == 0) {
113
0
      *x_start = x; // Save the place where we came out of the min phase.
114
0
    }
115
0
    if (pix_count > max_count) {
116
0
      return true; // found the pattern.
117
0
    }
118
0
    ++mid_cols;
119
0
    if (mid_cols > mid_width) {
120
0
      break; // Middle too big.
121
0
    }
122
0
  }
123
0
  return false; // Never found max_count.
124
0
}
125
126
// Returns true if there is a rectangle in the source pix, such that all
127
// pixel rows and column slices outside of it have less than
128
// min_fraction of the pixels black, and within max_skew_gradient fraction
129
// of the pixels on the inside, there are at least max_fraction of the
130
// pixels black. In other words, the inside of the rectangle looks roughly
131
// rectangular, and the outside of it looks like extra bits.
132
// On return, the rectangle is defined by x_start, y_start, x_end and y_end.
133
// Note: the algorithm is iterative, allowing it to slice off pixels from
134
// one edge, allowing it to then slice off more pixels from another edge.
135
static bool pixNearlyRectangular(Image pix, double min_fraction, double max_fraction,
136
                                 double max_skew_gradient, int *x_start, int *y_start,
137
0
                                 int *x_end, int *y_end) {
138
0
  ASSERT_HOST(pix != nullptr);
139
0
  *x_start = 0;
140
0
  *x_end = pixGetWidth(pix);
141
0
  *y_start = 0;
142
0
  *y_end = pixGetHeight(pix);
143
144
0
  uint32_t *data = pixGetData(pix);
145
0
  int wpl = pixGetWpl(pix);
146
0
  bool any_cut = false;
147
0
  bool left_done = false;
148
0
  bool right_done = false;
149
0
  bool top_done = false;
150
0
  bool bottom_done = false;
151
0
  do {
152
0
    any_cut = false;
153
    // Find the top/bottom edges.
154
0
    int width = *x_end - *x_start;
155
0
    int min_count = static_cast<int>(width * min_fraction);
156
0
    int max_count = static_cast<int>(width * max_fraction);
157
0
    int edge_width = static_cast<int>(width * max_skew_gradient);
158
0
    if (HScanForEdge(data, wpl, *x_start, *x_end, min_count, edge_width, max_count, *y_end, 1,
159
0
                     y_start) &&
160
0
        !top_done) {
161
0
      top_done = true;
162
0
      any_cut = true;
163
0
    }
164
0
    --(*y_end);
165
0
    if (HScanForEdge(data, wpl, *x_start, *x_end, min_count, edge_width, max_count, *y_start, -1,
166
0
                     y_end) &&
167
0
        !bottom_done) {
168
0
      bottom_done = true;
169
0
      any_cut = true;
170
0
    }
171
0
    ++(*y_end);
172
173
    // Find the left/right edges.
174
0
    int height = *y_end - *y_start;
175
0
    min_count = static_cast<int>(height * min_fraction);
176
0
    max_count = static_cast<int>(height * max_fraction);
177
0
    edge_width = static_cast<int>(height * max_skew_gradient);
178
0
    if (VScanForEdge(data, wpl, *y_start, *y_end, min_count, edge_width, max_count, *x_end, 1,
179
0
                     x_start) &&
180
0
        !left_done) {
181
0
      left_done = true;
182
0
      any_cut = true;
183
0
    }
184
0
    --(*x_end);
185
0
    if (VScanForEdge(data, wpl, *y_start, *y_end, min_count, edge_width, max_count, *x_start, -1,
186
0
                     x_end) &&
187
0
        !right_done) {
188
0
      right_done = true;
189
0
      any_cut = true;
190
0
    }
191
0
    ++(*x_end);
192
0
  } while (any_cut);
193
194
  // All edges must satisfy the condition of sharp gradient in pixel density
195
  // in order for the full rectangle to be present.
196
0
  return left_done && right_done && top_done && bottom_done;
197
0
}
198
199
// Generates a Boxa, Pixa pair from the input binary (image mask) pix,
200
// analogous to pixConnComp, except that connected components which are nearly
201
// rectangular are replaced with solid rectangles.
202
// The returned boxa, pixa may be nullptr, meaning no images found.
203
// If not nullptr, they must be destroyed by the caller.
204
// Resolution of pix should match the source image (Tesseract::pix_binary_)
205
// so the output coordinate systems match.
206
static void ConnCompAndRectangularize(Image pix, DebugPixa *pixa_debug, Boxa **boxa,
207
0
                                      Pixa **pixa) {
208
0
  *boxa = nullptr;
209
0
  *pixa = nullptr;
210
211
0
  if (textord_tabfind_show_images && pixa_debug != nullptr) {
212
0
    pixa_debug->AddPix(pix, "Conncompimage");
213
0
  }
214
  // Find the individual image regions in the mask image.
215
0
  *boxa = pixConnComp(pix, pixa, 8);
216
  // Rectangularize the individual images. If a sharp edge in vertical and/or
217
  // horizontal occupancy can be found, it indicates a probably rectangular
218
  // image with unwanted bits merged on, so clip to the approximate rectangle.
219
0
  int npixes = 0;
220
0
  if (*boxa != nullptr && *pixa != nullptr) {
221
0
    npixes = pixaGetCount(*pixa);
222
0
  }
223
0
  for (int i = 0; i < npixes; ++i) {
224
0
    int x_start, x_end, y_start, y_end;
225
0
    Image img_pix = pixaGetPix(*pixa, i, L_CLONE);
226
0
    if (textord_tabfind_show_images && pixa_debug != nullptr) {
227
0
      pixa_debug->AddPix(img_pix, "A component");
228
0
    }
229
0
    if (pixNearlyRectangular(img_pix, kMinRectangularFraction, kMaxRectangularFraction,
230
0
                             kMaxRectangularGradient, &x_start, &y_start, &x_end, &y_end)) {
231
0
      Image simple_pix = pixCreate(x_end - x_start, y_end - y_start, 1);
232
0
      pixSetAll(simple_pix);
233
0
      img_pix.destroy();
234
      // pixaReplacePix takes ownership of the simple_pix.
235
0
      pixaReplacePix(*pixa, i, simple_pix, nullptr);
236
0
      img_pix = pixaGetPix(*pixa, i, L_CLONE);
237
      // Fix the box to match the new pix.
238
0
      l_int32 x, y, width, height;
239
0
      boxaGetBoxGeometry(*boxa, i, &x, &y, &width, &height);
240
0
      Box *simple_box = boxCreate(x + x_start, y + y_start, x_end - x_start, y_end - y_start);
241
0
      boxaReplaceBox(*boxa, i, simple_box);
242
0
    }
243
0
    img_pix.destroy();
244
0
  }
245
0
}
246
247
// Finds image regions within the BINARY source pix (page image) and returns
248
// the image regions as a mask image.
249
// The returned pix may be nullptr, meaning no images found.
250
// If not nullptr, it must be PixDestroyed by the caller.
251
// If textord_tabfind_show_images, debug images are appended to pixa_debug.
252
0
Image ImageFind::FindImages(Image pix, DebugPixa *pixa_debug) {
253
0
  auto width = pixGetWidth(pix);
254
0
  auto height = pixGetHeight(pix);
255
  // Not worth looking at small images.
256
  // Leptonica will print an error message and return nullptr if we call
257
  // pixGenHalftoneMask(pixr, nullptr, ...) with width or height < 100
258
  // for the reduced image, so we want to bypass that, too.
259
0
  if (width / 2 < kMinImageFindSize || height / 2 < kMinImageFindSize) {
260
0
    return pixCreate(width, height, 1);
261
0
  }
262
263
  // Reduce by factor 2.
264
0
  Image pixr = pixReduceRankBinaryCascade(pix, 1, 0, 0, 0);
265
0
  if (textord_tabfind_show_images && pixa_debug != nullptr) {
266
0
    pixa_debug->AddPix(pixr, "CascadeReduced");
267
0
  }
268
269
  // Get the halftone mask directly from Leptonica.
270
0
  l_int32 ht_found = 0;
271
0
  Pixa *pixadb = (textord_tabfind_show_images && pixa_debug != nullptr) ? pixaCreate(0) : nullptr;
272
0
  Image pixht2 = pixGenerateHalftoneMask(pixr, nullptr, &ht_found, pixadb);
273
0
  if (pixadb) {
274
0
    Image pixdb = pixaDisplayTiledInColumns(pixadb, 3, 1.0, 20, 2);
275
0
    if (textord_tabfind_show_images && pixa_debug != nullptr) {
276
0
      pixa_debug->AddPix(pixdb, "HalftoneMask");
277
0
    }
278
0
    pixdb.destroy();
279
0
    pixaDestroy(&pixadb);
280
0
  }
281
0
  pixr.destroy();
282
0
  if (!ht_found && pixht2 != nullptr) {
283
0
    pixht2.destroy();
284
0
  }
285
0
  if (pixht2 == nullptr) {
286
0
    return pixCreate(width, height, 1);
287
0
  }
288
289
  // Expand back up again.
290
0
  Image pixht = pixExpandReplicate(pixht2, 2);
291
0
  if (textord_tabfind_show_images && pixa_debug != nullptr) {
292
0
    pixa_debug->AddPix(pixht, "HalftoneReplicated");
293
0
  }
294
0
  pixht2.destroy();
295
296
  // Fill to capture pixels near the mask edges that were missed
297
0
  Image pixt = pixSeedfillBinary(nullptr, pixht, pix, 8);
298
0
  pixht |= pixt;
299
0
  pixt.destroy();
300
301
  // Eliminate lines and bars that may be joined to images.
302
0
  Image pixfinemask = pixReduceRankBinaryCascade(pixht, 1, 1, 3, 3);
303
0
  pixDilateBrick(pixfinemask, pixfinemask, 5, 5);
304
0
  if (textord_tabfind_show_images && pixa_debug != nullptr) {
305
0
    pixa_debug->AddPix(pixfinemask, "FineMask");
306
0
  }
307
0
  Image pixreduced = pixReduceRankBinaryCascade(pixht, 1, 1, 1, 1);
308
0
  Image pixreduced2 = pixReduceRankBinaryCascade(pixreduced, 3, 3, 3, 0);
309
0
  pixreduced.destroy();
310
0
  pixDilateBrick(pixreduced2, pixreduced2, 5, 5);
311
0
  Image pixcoarsemask = pixExpandReplicate(pixreduced2, 8);
312
0
  pixreduced2.destroy();
313
0
  if (textord_tabfind_show_images && pixa_debug != nullptr) {
314
0
    pixa_debug->AddPix(pixcoarsemask, "CoarseMask");
315
0
  }
316
  // Combine the coarse and fine image masks.
317
0
  pixcoarsemask &= pixfinemask;
318
0
  pixfinemask.destroy();
319
  // Dilate a bit to make sure we get everything.
320
0
  pixDilateBrick(pixcoarsemask, pixcoarsemask, 3, 3);
321
0
  Image pixmask = pixExpandReplicate(pixcoarsemask, 16);
322
0
  pixcoarsemask.destroy();
323
0
  if (textord_tabfind_show_images && pixa_debug != nullptr) {
324
0
    pixa_debug->AddPix(pixmask, "MaskDilated");
325
0
  }
326
  // And the image mask with the line and bar remover.
327
0
  pixht &= pixmask;
328
0
  pixmask.destroy();
329
0
  if (textord_tabfind_show_images && pixa_debug != nullptr) {
330
0
    pixa_debug->AddPix(pixht, "FinalMask");
331
0
  }
332
  // Make the result image the same size as the input.
333
0
  Image result = pixCreate(width, height, 1);
334
0
  result |= pixht;
335
0
  pixht.destroy();
336
0
  return result;
337
0
}
338
339
// Given an input pix, and a bounding rectangle, the sides of the rectangle
340
// are shrunk inwards until they bound any black pixels found within the
341
// original rectangle. Returns false if the rectangle contains no black
342
// pixels at all.
343
0
bool ImageFind::BoundsWithinRect(Image pix, int *x_start, int *y_start, int *x_end, int *y_end) {
344
0
  Box *input_box = boxCreate(*x_start, *y_start, *x_end - *x_start, *y_end - *y_start);
345
0
  Box *output_box = nullptr;
346
0
  pixClipBoxToForeground(pix, input_box, nullptr, &output_box);
347
0
  bool result = output_box != nullptr;
348
0
  if (result) {
349
0
    l_int32 x, y, width, height;
350
0
    boxGetGeometry(output_box, &x, &y, &width, &height);
351
0
    *x_start = x;
352
0
    *y_start = y;
353
0
    *x_end = x + width;
354
0
    *y_end = y + height;
355
0
    boxDestroy(&output_box);
356
0
  }
357
0
  boxDestroy(&input_box);
358
0
  return result;
359
0
}
360
361
// Given a point in 3-D (RGB) space, returns the squared Euclidean distance
362
// of the point from the given line, defined by a pair of points in the 3-D
363
// (RGB) space, line1 and line2.
364
double ImageFind::ColorDistanceFromLine(const uint8_t *line1, const uint8_t *line2,
365
0
                                        const uint8_t *point) {
366
0
  int line_vector[kRGBRMSColors];
367
0
  int point_vector[kRGBRMSColors];
368
0
  for (int i = 0; i < kRGBRMSColors; ++i) {
369
0
    line_vector[i] = static_cast<int>(line2[i]) - static_cast<int>(line1[i]);
370
0
    point_vector[i] = static_cast<int>(point[i]) - static_cast<int>(line1[i]);
371
0
  }
372
0
  line_vector[L_ALPHA_CHANNEL] = 0;
373
  // Now the cross product in 3d.
374
0
  int cross[kRGBRMSColors];
375
0
  cross[COLOR_RED] = line_vector[COLOR_GREEN] * point_vector[COLOR_BLUE] -
376
0
                     line_vector[COLOR_BLUE] * point_vector[COLOR_GREEN];
377
0
  cross[COLOR_GREEN] = line_vector[COLOR_BLUE] * point_vector[COLOR_RED] -
378
0
                       line_vector[COLOR_RED] * point_vector[COLOR_BLUE];
379
0
  cross[COLOR_BLUE] = line_vector[COLOR_RED] * point_vector[COLOR_GREEN] -
380
0
                      line_vector[COLOR_GREEN] * point_vector[COLOR_RED];
381
0
  cross[L_ALPHA_CHANNEL] = 0;
382
  // Now the sums of the squares.
383
0
  double cross_sq = 0.0;
384
0
  double line_sq = 0.0;
385
0
  for (int j = 0; j < kRGBRMSColors; ++j) {
386
0
    cross_sq += static_cast<double>(cross[j]) * cross[j];
387
0
    line_sq += static_cast<double>(line_vector[j]) * line_vector[j];
388
0
  }
389
0
  if (line_sq == 0.0) {
390
0
    return 0.0;
391
0
  }
392
0
  return cross_sq / line_sq; // This is the squared distance.
393
0
}
394
395
// ================ CUTTING POLYGONAL IMAGES FROM A RECTANGLE ================
396
// The following functions are responsible for cutting a polygonal image from
397
// a rectangle: CountPixelsInRotatedBox, AttemptToShrinkBox, CutChunkFromParts
398
// with DivideImageIntoParts as the master.
399
// Problem statement:
400
// We start with a single connected component from the image mask: we get
401
// a Pix of the component, and its location on the page (im_box).
402
// The objective of cutting a polygonal image from its rectangle is to avoid
403
// interfering text, but not text that completely overlaps the image.
404
//     ------------------------------      ------------------------------
405
//     |   Single input partition   |      | 1 Cut up output partitions |
406
//     |                            |      ------------------------------
407
//   Av|oid                         |    Avoid |                        |
408
//     |                            |          |________________________|
409
//  Int|erfering                    |   Interfering  |                  |
410
//     |                            |           _____|__________________|
411
//    T|ext                         |     Text |                        |
412
//     |        Text-on-image       |          |     Text-on-image      |
413
//     ------------------------------          --------------------------
414
// DivideImageIntoParts does this by building a ColPartition_LIST (not in the
415
// grid) with each ColPartition representing one of the rectangles needed,
416
// starting with a single rectangle for the whole image component, and cutting
417
// bits out of it with CutChunkFromParts as needed to avoid text. The output
418
// ColPartitions are supposed to be ordered from top to bottom.
419
420
// The problem is complicated by the fact that we have rotated the coordinate
421
// system to make text lines horizontal, so if we need to look at the component
422
// image, we have to rotate the coordinates. Throughout the functions in this
423
// section im_box is the rectangle representing the image component in the
424
// rotated page coordinates (where we are building our output ColPartitions),
425
// rotation is the rotation that we used to get there, and rerotation is the
426
// rotation required to get back to original page image coordinates.
427
// To get to coordinates in the component image, pix, we rotate the im_box,
428
// the point we want to locate, and subtract the rotated point from the top-left
429
// of the rotated im_box.
430
// im_box is therefore essential to calculating coordinates within the pix.
431
432
// Returns true if there are no black pixels in between the boxes.
433
// The im_box must represent the bounding box of the pix in tesseract
434
// coordinates, which may be negative, due to rotations to make the textlines
435
// horizontal. The boxes are rotated by rotation, which should undo such
436
// rotations, before mapping them onto the pix.
437
bool ImageFind::BlankImageInBetween(const TBOX &box1, const TBOX &box2, const TBOX &im_box,
438
0
                                    const FCOORD &rotation, Image pix) {
439
0
  TBOX search_box(box1);
440
0
  search_box += box2;
441
0
  if (box1.x_gap(box2) >= box1.y_gap(box2)) {
442
0
    if (box1.x_gap(box2) <= 0) {
443
0
      return true;
444
0
    }
445
0
    search_box.set_left(std::min(box1.right(), box2.right()));
446
0
    search_box.set_right(std::max(box1.left(), box2.left()));
447
0
  } else {
448
0
    if (box1.y_gap(box2) <= 0) {
449
0
      return true;
450
0
    }
451
0
    search_box.set_top(std::max(box1.bottom(), box2.bottom()));
452
0
    search_box.set_bottom(std::min(box1.top(), box2.top()));
453
0
  }
454
0
  return CountPixelsInRotatedBox(search_box, im_box, rotation, pix) == 0;
455
0
}
456
457
// Returns the number of pixels in box in the pix.
458
// rotation, pix and im_box are defined in the large comment above.
459
int ImageFind::CountPixelsInRotatedBox(TBOX box, const TBOX &im_box, const FCOORD &rotation,
460
0
                                       Image pix) {
461
  // Intersect it with the image box.
462
0
  box &= im_box; // This is in-place box intersection.
463
0
  if (box.null_box()) {
464
0
    return 0;
465
0
  }
466
0
  box.rotate(rotation);
467
0
  TBOX rotated_im_box(im_box);
468
0
  rotated_im_box.rotate(rotation);
469
0
  Image rect_pix = pixCreate(box.width(), box.height(), 1);
470
0
  pixRasterop(rect_pix, 0, 0, box.width(), box.height(), PIX_SRC, pix,
471
0
              box.left() - rotated_im_box.left(), rotated_im_box.top() - box.top());
472
0
  l_int32 result;
473
0
  pixCountPixels(rect_pix, &result, nullptr);
474
0
  rect_pix.destroy();
475
0
  return result;
476
0
}
477
478
// The box given by slice contains some black pixels, but not necessarily
479
// over the whole box. Shrink the x bounds of slice, but not the y bounds
480
// until there is at least one black pixel in the outermost columns.
481
// rotation, rerotation, pix and im_box are defined in the large comment above.
482
static void AttemptToShrinkBox(const FCOORD &rotation, const FCOORD &rerotation, const TBOX &im_box,
483
0
                               Image pix, TBOX *slice) {
484
0
  TBOX rotated_box(*slice);
485
0
  rotated_box.rotate(rerotation);
486
0
  TBOX rotated_im_box(im_box);
487
0
  rotated_im_box.rotate(rerotation);
488
0
  int left = rotated_box.left() - rotated_im_box.left();
489
0
  int right = rotated_box.right() - rotated_im_box.left();
490
0
  int top = rotated_im_box.top() - rotated_box.top();
491
0
  int bottom = rotated_im_box.top() - rotated_box.bottom();
492
0
  ImageFind::BoundsWithinRect(pix, &left, &top, &right, &bottom);
493
0
  top = rotated_im_box.top() - top;
494
0
  bottom = rotated_im_box.top() - bottom;
495
0
  left += rotated_im_box.left();
496
0
  right += rotated_im_box.left();
497
0
  rotated_box.set_to_given_coords(left, bottom, right, top);
498
0
  rotated_box.rotate(rotation);
499
0
  slice->set_left(rotated_box.left());
500
0
  slice->set_right(rotated_box.right());
501
0
}
502
503
// The meat of cutting a polygonal image around text.
504
// This function covers the general case of cutting a box out of a box
505
// as shown:
506
// Input                               Output
507
// ------------------------------      ------------------------------
508
// |   Single input partition   |      | 1 Cut up output partitions |
509
// |                            |      ------------------------------
510
// |         ----------         |      ---------           ----------
511
// |         |  box   |         |      |   2   |   box     |    3   |
512
// |         |        |         |      |       |  is cut   |        |
513
// |         ----------         |      ---------   out     ----------
514
// |                            |      ------------------------------
515
// |                            |      |   4                        |
516
// ------------------------------      ------------------------------
517
// In the context that this function is used, at most 3 of the above output
518
// boxes will be created, as the overlapping box is never contained by the
519
// input.
520
// The above cutting operation is executed for each element of part_list that
521
// is overlapped by the input box. Each modified ColPartition is replaced
522
// in place in the list by the output of the cutting operation in the order
523
// shown above, so iff no holes are ever created, the output will be in
524
// top-to-bottom order, but in extreme cases, hole creation is possible.
525
// In such cases, the output order may cause strange block polygons.
526
// rotation, rerotation, pix and im_box are defined in the large comment above.
527
static void CutChunkFromParts(const TBOX &box, const TBOX &im_box, const FCOORD &rotation,
528
0
                              const FCOORD &rerotation, Image pix, ColPartition_LIST *part_list) {
529
0
  ASSERT_HOST(!part_list->empty());
530
0
  ColPartition_IT part_it(part_list);
531
0
  do {
532
0
    ColPartition *part = part_it.data();
533
0
    TBOX part_box = part->bounding_box();
534
0
    if (part_box.overlap(box)) {
535
      // This part must be cut and replaced with the remains. There are
536
      // up to 4 pieces to be made. Start with the first one and use
537
      // add_before_stay_put. For each piece if it has no black pixels
538
      // left, just don't make the box.
539
      // Above box.
540
0
      if (box.top() < part_box.top()) {
541
0
        TBOX slice(part_box);
542
0
        slice.set_bottom(box.top());
543
0
        if (ImageFind::CountPixelsInRotatedBox(slice, im_box, rerotation, pix) > 0) {
544
0
          AttemptToShrinkBox(rotation, rerotation, im_box, pix, &slice);
545
0
          part_it.add_before_stay_put(
546
0
              ColPartition::FakePartition(slice, PT_UNKNOWN, BRT_POLYIMAGE, BTFT_NONTEXT));
547
0
        }
548
0
      }
549
      // Left of box.
550
0
      if (box.left() > part_box.left()) {
551
0
        TBOX slice(part_box);
552
0
        slice.set_right(box.left());
553
0
        if (box.top() < part_box.top()) {
554
0
          slice.set_top(box.top());
555
0
        }
556
0
        if (box.bottom() > part_box.bottom()) {
557
0
          slice.set_bottom(box.bottom());
558
0
        }
559
0
        if (ImageFind::CountPixelsInRotatedBox(slice, im_box, rerotation, pix) > 0) {
560
0
          AttemptToShrinkBox(rotation, rerotation, im_box, pix, &slice);
561
0
          part_it.add_before_stay_put(
562
0
              ColPartition::FakePartition(slice, PT_UNKNOWN, BRT_POLYIMAGE, BTFT_NONTEXT));
563
0
        }
564
0
      }
565
      // Right of box.
566
0
      if (box.right() < part_box.right()) {
567
0
        TBOX slice(part_box);
568
0
        slice.set_left(box.right());
569
0
        if (box.top() < part_box.top()) {
570
0
          slice.set_top(box.top());
571
0
        }
572
0
        if (box.bottom() > part_box.bottom()) {
573
0
          slice.set_bottom(box.bottom());
574
0
        }
575
0
        if (ImageFind::CountPixelsInRotatedBox(slice, im_box, rerotation, pix) > 0) {
576
0
          AttemptToShrinkBox(rotation, rerotation, im_box, pix, &slice);
577
0
          part_it.add_before_stay_put(
578
0
              ColPartition::FakePartition(slice, PT_UNKNOWN, BRT_POLYIMAGE, BTFT_NONTEXT));
579
0
        }
580
0
      }
581
      // Below box.
582
0
      if (box.bottom() > part_box.bottom()) {
583
0
        TBOX slice(part_box);
584
0
        slice.set_top(box.bottom());
585
0
        if (ImageFind::CountPixelsInRotatedBox(slice, im_box, rerotation, pix) > 0) {
586
0
          AttemptToShrinkBox(rotation, rerotation, im_box, pix, &slice);
587
0
          part_it.add_before_stay_put(
588
0
              ColPartition::FakePartition(slice, PT_UNKNOWN, BRT_POLYIMAGE, BTFT_NONTEXT));
589
0
        }
590
0
      }
591
0
      part->DeleteBoxes();
592
0
      delete part_it.extract();
593
0
    }
594
0
    part_it.forward();
595
0
  } while (!part_it.at_first());
596
0
}
597
598
// Starts with the bounding box of the image component and cuts it up
599
// so that it doesn't intersect text where possible.
600
// Strong fully contained horizontal text is marked as text on image,
601
// and does not cause a division of the image.
602
// For more detail see the large comment above on cutting polygonal images
603
// from a rectangle.
604
// rotation, rerotation, pix and im_box are defined in the large comment above.
605
static void DivideImageIntoParts(const TBOX &im_box, const FCOORD &rotation,
606
                                 const FCOORD &rerotation, Image pix,
607
0
                                 ColPartitionGridSearch *rectsearch, ColPartition_LIST *part_list) {
608
  // Add the full im_box partition to the list to begin with.
609
0
  ColPartition *pix_part =
610
0
      ColPartition::FakePartition(im_box, PT_UNKNOWN, BRT_RECTIMAGE, BTFT_NONTEXT);
611
0
  ColPartition_IT part_it(part_list);
612
0
  part_it.add_after_then_move(pix_part);
613
614
0
  rectsearch->StartRectSearch(im_box);
615
0
  ColPartition *part;
616
0
  while ((part = rectsearch->NextRectSearch()) != nullptr) {
617
0
    TBOX part_box = part->bounding_box();
618
0
    if (part_box.contains(im_box) && part->flow() >= BTFT_CHAIN) {
619
      // This image is completely covered by an existing text partition.
620
0
      for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
621
0
        ColPartition *pix_part = part_it.extract();
622
0
        pix_part->DeleteBoxes();
623
0
        delete pix_part;
624
0
      }
625
0
    } else if (part->flow() == BTFT_STRONG_CHAIN) {
626
      // Text intersects the box.
627
0
      TBOX overlap_box = part_box.intersection(im_box);
628
      // Intersect it with the image box.
629
0
      int black_area = ImageFind::CountPixelsInRotatedBox(overlap_box, im_box, rerotation, pix);
630
0
      if (black_area * 2 < part_box.area() || !im_box.contains(part_box)) {
631
        // Eat a piece out of the image.
632
        // Pad it so that pieces eaten out look decent.
633
0
        int padding = part->blob_type() == BRT_VERT_TEXT ? part_box.width() : part_box.height();
634
0
        part_box.set_top(part_box.top() + padding / 2);
635
0
        part_box.set_bottom(part_box.bottom() - padding / 2);
636
0
        CutChunkFromParts(part_box, im_box, rotation, rerotation, pix, part_list);
637
0
      } else {
638
        // Strong overlap with the black area, so call it text on image.
639
0
        part->set_flow(BTFT_TEXT_ON_IMAGE);
640
0
      }
641
0
    }
642
0
    if (part_list->empty()) {
643
0
      break;
644
0
    }
645
0
  }
646
0
}
647
648
// Search for the rightmost text that overlaps vertically and is to the left
649
// of the given box, but within the given left limit.
650
0
static int ExpandImageLeft(const TBOX &box, int left_limit, ColPartitionGrid *part_grid) {
651
0
  ColPartitionGridSearch search(part_grid);
652
0
  ColPartition *part;
653
  // Search right to left for any text that overlaps.
654
0
  search.StartSideSearch(box.left(), box.bottom(), box.top());
655
0
  while ((part = search.NextSideSearch(true)) != nullptr) {
656
0
    if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) {
657
0
      const TBOX &part_box(part->bounding_box());
658
0
      if (part_box.y_gap(box) < 0) {
659
0
        if (part_box.right() > left_limit && part_box.right() < box.left()) {
660
0
          left_limit = part_box.right();
661
0
        }
662
0
        break;
663
0
      }
664
0
    }
665
0
  }
666
0
  if (part != nullptr) {
667
    // Search for the nearest text up to the one we already found.
668
0
    TBOX search_box(left_limit, box.bottom(), box.left(), box.top());
669
0
    search.StartRectSearch(search_box);
670
0
    while ((part = search.NextRectSearch()) != nullptr) {
671
0
      if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) {
672
0
        const TBOX &part_box(part->bounding_box());
673
0
        if (part_box.y_gap(box) < 0) {
674
0
          if (part_box.right() > left_limit && part_box.right() < box.left()) {
675
0
            left_limit = part_box.right();
676
0
          }
677
0
        }
678
0
      }
679
0
    }
680
0
  }
681
0
  return left_limit;
682
0
}
683
684
// Search for the leftmost text that overlaps vertically and is to the right
685
// of the given box, but within the given right limit.
686
0
static int ExpandImageRight(const TBOX &box, int right_limit, ColPartitionGrid *part_grid) {
687
0
  ColPartitionGridSearch search(part_grid);
688
0
  ColPartition *part;
689
  // Search left to right for any text that overlaps.
690
0
  search.StartSideSearch(box.right(), box.bottom(), box.top());
691
0
  while ((part = search.NextSideSearch(false)) != nullptr) {
692
0
    if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) {
693
0
      const TBOX &part_box(part->bounding_box());
694
0
      if (part_box.y_gap(box) < 0) {
695
0
        if (part_box.left() < right_limit && part_box.left() > box.right()) {
696
0
          right_limit = part_box.left();
697
0
        }
698
0
        break;
699
0
      }
700
0
    }
701
0
  }
702
0
  if (part != nullptr) {
703
    // Search for the nearest text up to the one we already found.
704
0
    TBOX search_box(box.left(), box.bottom(), right_limit, box.top());
705
0
    search.StartRectSearch(search_box);
706
0
    while ((part = search.NextRectSearch()) != nullptr) {
707
0
      if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) {
708
0
        const TBOX &part_box(part->bounding_box());
709
0
        if (part_box.y_gap(box) < 0) {
710
0
          if (part_box.left() < right_limit && part_box.left() > box.right()) {
711
0
            right_limit = part_box.left();
712
0
          }
713
0
        }
714
0
      }
715
0
    }
716
0
  }
717
0
  return right_limit;
718
0
}
719
720
// Search for the topmost text that overlaps horizontally and is below
721
// the given box, but within the given bottom limit.
722
0
static int ExpandImageBottom(const TBOX &box, int bottom_limit, ColPartitionGrid *part_grid) {
723
0
  ColPartitionGridSearch search(part_grid);
724
0
  ColPartition *part;
725
  // Search right to left for any text that overlaps.
726
0
  search.StartVerticalSearch(box.left(), box.right(), box.bottom());
727
0
  while ((part = search.NextVerticalSearch(true)) != nullptr) {
728
0
    if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) {
729
0
      const TBOX &part_box(part->bounding_box());
730
0
      if (part_box.x_gap(box) < 0) {
731
0
        if (part_box.top() > bottom_limit && part_box.top() < box.bottom()) {
732
0
          bottom_limit = part_box.top();
733
0
        }
734
0
        break;
735
0
      }
736
0
    }
737
0
  }
738
0
  if (part != nullptr) {
739
    // Search for the nearest text up to the one we already found.
740
0
    TBOX search_box(box.left(), bottom_limit, box.right(), box.bottom());
741
0
    search.StartRectSearch(search_box);
742
0
    while ((part = search.NextRectSearch()) != nullptr) {
743
0
      if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) {
744
0
        const TBOX &part_box(part->bounding_box());
745
0
        if (part_box.x_gap(box) < 0) {
746
0
          if (part_box.top() > bottom_limit && part_box.top() < box.bottom()) {
747
0
            bottom_limit = part_box.top();
748
0
          }
749
0
        }
750
0
      }
751
0
    }
752
0
  }
753
0
  return bottom_limit;
754
0
}
755
756
// Search for the bottommost text that overlaps horizontally and is above
757
// the given box, but within the given top limit.
758
0
static int ExpandImageTop(const TBOX &box, int top_limit, ColPartitionGrid *part_grid) {
759
0
  ColPartitionGridSearch search(part_grid);
760
0
  ColPartition *part;
761
  // Search right to left for any text that overlaps.
762
0
  search.StartVerticalSearch(box.left(), box.right(), box.top());
763
0
  while ((part = search.NextVerticalSearch(false)) != nullptr) {
764
0
    if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) {
765
0
      const TBOX &part_box(part->bounding_box());
766
0
      if (part_box.x_gap(box) < 0) {
767
0
        if (part_box.bottom() < top_limit && part_box.bottom() > box.top()) {
768
0
          top_limit = part_box.bottom();
769
0
        }
770
0
        break;
771
0
      }
772
0
    }
773
0
  }
774
0
  if (part != nullptr) {
775
    // Search for the nearest text up to the one we already found.
776
0
    TBOX search_box(box.left(), box.top(), box.right(), top_limit);
777
0
    search.StartRectSearch(search_box);
778
0
    while ((part = search.NextRectSearch()) != nullptr) {
779
0
      if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) {
780
0
        const TBOX &part_box(part->bounding_box());
781
0
        if (part_box.x_gap(box) < 0) {
782
0
          if (part_box.bottom() < top_limit && part_box.bottom() > box.top()) {
783
0
            top_limit = part_box.bottom();
784
0
          }
785
0
        }
786
0
      }
787
0
    }
788
0
  }
789
0
  return top_limit;
790
0
}
791
792
// Expands the image box in the given direction until it hits text,
793
// limiting the expansion to the given limit box, returning the result
794
// in the expanded box, and
795
// returning the increase in area resulting from the expansion.
796
static int ExpandImageDir(BlobNeighbourDir dir, const TBOX &im_box, const TBOX &limit_box,
797
0
                          ColPartitionGrid *part_grid, TBOX *expanded_box) {
798
0
  *expanded_box = im_box;
799
0
  switch (dir) {
800
0
    case BND_LEFT:
801
0
      expanded_box->set_left(ExpandImageLeft(im_box, limit_box.left(), part_grid));
802
0
      break;
803
0
    case BND_RIGHT:
804
0
      expanded_box->set_right(ExpandImageRight(im_box, limit_box.right(), part_grid));
805
0
      break;
806
0
    case BND_ABOVE:
807
0
      expanded_box->set_top(ExpandImageTop(im_box, limit_box.top(), part_grid));
808
0
      break;
809
0
    case BND_BELOW:
810
0
      expanded_box->set_bottom(ExpandImageBottom(im_box, limit_box.bottom(), part_grid));
811
0
      break;
812
0
    default:
813
0
      return 0;
814
0
  }
815
0
  return expanded_box->area() - im_box.area();
816
0
}
817
818
// Expands the image partition into any non-text until it touches text.
819
// The expansion proceeds in the order of increasing increase in area
820
// as a heuristic to find the best rectangle by expanding in the most
821
// constrained direction first.
822
0
static void MaximalImageBoundingBox(ColPartitionGrid *part_grid, TBOX *im_box) {
823
0
  bool dunnit[BND_COUNT];
824
0
  memset(dunnit, 0, sizeof(dunnit));
825
0
  TBOX limit_box(part_grid->bleft().x(), part_grid->bleft().y(), part_grid->tright().x(),
826
0
                 part_grid->tright().y());
827
0
  TBOX text_box(*im_box);
828
0
  for (int iteration = 0; iteration < BND_COUNT; ++iteration) {
829
    // Find the direction with least area increase.
830
0
    int best_delta = -1;
831
0
    BlobNeighbourDir best_dir = BND_LEFT;
832
0
    TBOX expanded_boxes[BND_COUNT];
833
0
    for (int dir = 0; dir < BND_COUNT; ++dir) {
834
0
      auto bnd = static_cast<BlobNeighbourDir>(dir);
835
0
      if (!dunnit[bnd]) {
836
0
        TBOX expanded_box;
837
0
        int area_delta = ExpandImageDir(bnd, text_box, limit_box, part_grid, &expanded_boxes[bnd]);
838
0
        if (best_delta < 0 || area_delta < best_delta) {
839
0
          best_delta = area_delta;
840
0
          best_dir = bnd;
841
0
        }
842
0
      }
843
0
    }
844
    // Run the best and remember the direction.
845
0
    dunnit[best_dir] = true;
846
0
    text_box = expanded_boxes[best_dir];
847
0
  }
848
0
  *im_box = text_box;
849
0
}
850
851
// Helper deletes the given partition but first marks up all the blobs as
852
// noise, so they get deleted later, and disowns them.
853
// If the initial type of the partition is image, then it actually deletes
854
// the blobs, as the partition owns them in that case.
855
0
static void DeletePartition(ColPartition *part) {
856
0
  BlobRegionType type = part->blob_type();
857
0
  if (type == BRT_RECTIMAGE || type == BRT_POLYIMAGE) {
858
    // The partition owns the boxes of these types, so just delete them.
859
0
    part->DeleteBoxes(); // From a previous iteration.
860
0
  } else {
861
    // Once marked, the blobs will be swept up by TidyBlobs.
862
0
    part->set_flow(BTFT_NONTEXT);
863
0
    part->set_blob_type(BRT_NOISE);
864
0
    part->SetBlobTypes();
865
0
    part->DisownBoxes(); // Created before FindImagePartitions.
866
0
  }
867
0
  delete part;
868
0
}
869
870
// The meat of joining fragmented images and consuming ColPartitions of
871
// uncertain type.
872
// *part_ptr is an input/output BRT_RECTIMAGE ColPartition that is to be
873
// expanded to consume overlapping and nearby ColPartitions of uncertain type
874
// and other BRT_RECTIMAGE partitions, but NOT to be expanded beyond
875
// max_image_box. *part_ptr is NOT in the part_grid.
876
// rectsearch is already constructed on the part_grid, and is used for
877
// searching for overlapping and nearby ColPartitions.
878
// ExpandImageIntoParts is called iteratively until it returns false. Each
879
// time it absorbs the nearest non-contained candidate, and everything that
880
// is fully contained within part_ptr's bounding box.
881
// TODO(rays) what if it just eats everything inside max_image_box in one go?
882
static bool ExpandImageIntoParts(const TBOX &max_image_box, ColPartitionGridSearch *rectsearch,
883
0
                                 ColPartitionGrid *part_grid, ColPartition **part_ptr) {
884
0
  ColPartition *image_part = *part_ptr;
885
0
  TBOX im_part_box = image_part->bounding_box();
886
0
  if (textord_tabfind_show_images > 1) {
887
0
    tprintf("Searching for merge with image part:");
888
0
    im_part_box.print();
889
0
    tprintf("Text box=");
890
0
    max_image_box.print();
891
0
  }
892
0
  rectsearch->StartRectSearch(max_image_box);
893
0
  ColPartition *part;
894
0
  ColPartition *best_part = nullptr;
895
0
  int best_dist = 0;
896
0
  while ((part = rectsearch->NextRectSearch()) != nullptr) {
897
0
    if (textord_tabfind_show_images > 1) {
898
0
      tprintf("Considering merge with part:");
899
0
      part->Print();
900
0
      if (im_part_box.contains(part->bounding_box())) {
901
0
        tprintf("Fully contained\n");
902
0
      } else if (!max_image_box.contains(part->bounding_box())) {
903
0
        tprintf("Not within text box\n");
904
0
      } else if (part->flow() == BTFT_STRONG_CHAIN) {
905
0
        tprintf("Too strong text\n");
906
0
      } else {
907
0
        tprintf("Real candidate\n");
908
0
      }
909
0
    }
910
0
    if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_TEXT_ON_IMAGE ||
911
0
        part->blob_type() == BRT_POLYIMAGE) {
912
0
      continue;
913
0
    }
914
0
    TBOX box = part->bounding_box();
915
0
    if (max_image_box.contains(box) && part->blob_type() != BRT_NOISE) {
916
0
      if (im_part_box.contains(box)) {
917
        // Eat it completely.
918
0
        rectsearch->RemoveBBox();
919
0
        DeletePartition(part);
920
0
        continue;
921
0
      }
922
0
      int x_dist = std::max(0, box.x_gap(im_part_box));
923
0
      int y_dist = std::max(0, box.y_gap(im_part_box));
924
0
      int dist = x_dist * x_dist + y_dist * y_dist;
925
0
      if (dist > box.area() || dist > im_part_box.area()) {
926
0
        continue; // Not close enough.
927
0
      }
928
0
      if (best_part == nullptr || dist < best_dist) {
929
        // We keep the nearest qualifier, which is not necessarily the nearest.
930
0
        best_part = part;
931
0
        best_dist = dist;
932
0
      }
933
0
    }
934
0
  }
935
0
  if (best_part != nullptr) {
936
    // It needs expanding. We can do it without touching text.
937
0
    TBOX box = best_part->bounding_box();
938
0
    if (textord_tabfind_show_images > 1) {
939
0
      tprintf("Merging image part:");
940
0
      im_part_box.print();
941
0
      tprintf("with part:");
942
0
      box.print();
943
0
    }
944
0
    im_part_box += box;
945
0
    *part_ptr = ColPartition::FakePartition(im_part_box, PT_UNKNOWN, BRT_RECTIMAGE, BTFT_NONTEXT);
946
0
    DeletePartition(image_part);
947
0
    part_grid->RemoveBBox(best_part);
948
0
    DeletePartition(best_part);
949
0
    rectsearch->RepositionIterator();
950
0
    return true;
951
0
  }
952
0
  return false;
953
0
}
954
955
// Helper function to compute the overlap area between the box and the
956
// given list of partitions.
957
0
static int IntersectArea(const TBOX &box, ColPartition_LIST *part_list) {
958
0
  int intersect_area = 0;
959
0
  ColPartition_IT part_it(part_list);
960
  // Iterate the parts and subtract intersecting area.
961
0
  for (part_it.mark_cycle_pt(); !part_it.cycled_list(); part_it.forward()) {
962
0
    ColPartition *image_part = part_it.data();
963
0
    TBOX intersect = box.intersection(image_part->bounding_box());
964
0
    intersect_area += intersect.area();
965
0
  }
966
0
  return intersect_area;
967
0
}
968
969
// part_list is a set of ColPartitions representing a polygonal image, and
970
// im_box is the union of the bounding boxes of all the parts in part_list.
971
// Tests whether part is to be consumed by the polygonal image.
972
// Returns true if part is weak text and more than half of its area is
973
// intersected by parts from the part_list, and it is contained within im_box.
974
static bool TestWeakIntersectedPart(const TBOX &im_box, ColPartition_LIST *part_list,
975
0
                                    ColPartition *part) {
976
0
  if (part->flow() < BTFT_STRONG_CHAIN) {
977
    // A weak partition intersects the box.
978
0
    const TBOX &part_box = part->bounding_box();
979
0
    if (im_box.contains(part_box)) {
980
0
      int area = part_box.area();
981
0
      int intersect_area = IntersectArea(part_box, part_list);
982
0
      if (area < 2 * intersect_area) {
983
0
        return true;
984
0
      }
985
0
    }
986
0
  }
987
0
  return false;
988
0
}
989
990
// A rectangular or polygonal image has been completed, in part_list, bounding
991
// box in im_box. We want to eliminate weak text or other uncertain partitions
992
// (basically anything that is not BRT_STRONG_CHAIN or better) from both the
993
// part_grid and the big_parts list that are contained within im_box and
994
// overlapped enough by the possibly polygonal image.
995
static void EliminateWeakParts(const TBOX &im_box, ColPartitionGrid *part_grid,
996
0
                               ColPartition_LIST *big_parts, ColPartition_LIST *part_list) {
997
0
  ColPartitionGridSearch rectsearch(part_grid);
998
0
  ColPartition *part;
999
0
  rectsearch.StartRectSearch(im_box);
1000
0
  while ((part = rectsearch.NextRectSearch()) != nullptr) {
1001
0
    if (TestWeakIntersectedPart(im_box, part_list, part)) {
1002
0
      BlobRegionType type = part->blob_type();
1003
0
      if (type == BRT_POLYIMAGE || type == BRT_RECTIMAGE) {
1004
0
        rectsearch.RemoveBBox();
1005
0
        DeletePartition(part);
1006
0
      } else {
1007
        // The part is mostly covered, so mark it. Non-image partitions are
1008
        // kept hanging around to mark the image for pass2
1009
0
        part->set_flow(BTFT_NONTEXT);
1010
0
        part->set_blob_type(BRT_NOISE);
1011
0
        part->SetBlobTypes();
1012
0
      }
1013
0
    }
1014
0
  }
1015
0
  ColPartition_IT big_it(big_parts);
1016
0
  for (big_it.mark_cycle_pt(); !big_it.cycled_list(); big_it.forward()) {
1017
0
    part = big_it.data();
1018
0
    if (TestWeakIntersectedPart(im_box, part_list, part)) {
1019
      // Once marked, the blobs will be swept up by TidyBlobs.
1020
0
      DeletePartition(big_it.extract());
1021
0
    }
1022
0
  }
1023
0
}
1024
1025
// Helper scans for good text partitions overlapping the given box.
1026
// If there are no good text partitions overlapping an expanded box, then
1027
// the box is expanded, otherwise, the original box is returned.
1028
// If good text overlaps the box, true is returned.
1029
0
static bool ScanForOverlappingText(ColPartitionGrid *part_grid, TBOX *box) {
1030
0
  ColPartitionGridSearch rectsearch(part_grid);
1031
0
  TBOX padded_box(*box);
1032
0
  padded_box.pad(kNoisePadding, kNoisePadding);
1033
0
  rectsearch.StartRectSearch(padded_box);
1034
0
  ColPartition *part;
1035
0
  bool any_text_in_padded_rect = false;
1036
0
  while ((part = rectsearch.NextRectSearch()) != nullptr) {
1037
0
    if (part->flow() == BTFT_CHAIN || part->flow() == BTFT_STRONG_CHAIN) {
1038
      // Text intersects the box.
1039
0
      any_text_in_padded_rect = true;
1040
0
      const TBOX &part_box = part->bounding_box();
1041
0
      if (box->overlap(part_box)) {
1042
0
        return true;
1043
0
      }
1044
0
    }
1045
0
  }
1046
0
  if (!any_text_in_padded_rect) {
1047
0
    *box = padded_box;
1048
0
  }
1049
0
  return false;
1050
0
}
1051
1052
// Renders the boxes of image parts from the supplied list onto the image_pix,
1053
// except where they interfere with existing strong text in the part_grid,
1054
// and then deletes them.
1055
// Box coordinates are rotated by rerotate to match the image.
1056
static void MarkAndDeleteImageParts(const FCOORD &rerotate, ColPartitionGrid *part_grid,
1057
0
                                    ColPartition_LIST *image_parts, Image image_pix) {
1058
0
  if (image_pix == nullptr) {
1059
0
    return;
1060
0
  }
1061
0
  int imageheight = pixGetHeight(image_pix);
1062
0
  ColPartition_IT part_it(image_parts);
1063
0
  for (; !part_it.empty(); part_it.forward()) {
1064
0
    ColPartition *part = part_it.extract();
1065
0
    TBOX part_box = part->bounding_box();
1066
0
    BlobRegionType type = part->blob_type();
1067
0
    if (!ScanForOverlappingText(part_grid, &part_box) || type == BRT_RECTIMAGE ||
1068
0
        type == BRT_POLYIMAGE) {
1069
      // Mark the box on the image.
1070
      // All coords need to be rotated to match the image.
1071
0
      part_box.rotate(rerotate);
1072
0
      int left = part_box.left();
1073
0
      int top = part_box.top();
1074
0
      pixRasterop(image_pix, left, imageheight - top, part_box.width(), part_box.height(), PIX_SET,
1075
0
                  nullptr, 0, 0);
1076
0
    }
1077
0
    DeletePartition(part);
1078
0
  }
1079
0
}
1080
1081
// Locates all the image partitions in the part_grid, that were found by a
1082
// previous call to FindImagePartitions, marks them in the image_mask,
1083
// removes them from the grid, and deletes them. This makes it possible to
1084
// call FindImagePartitions again to produce less broken-up and less
1085
// overlapping image partitions.
1086
// rerotation specifies how to rotate the partition coords to match
1087
// the image_mask, since this function is used after orientation correction.
1088
void ImageFind::TransferImagePartsToImageMask(const FCOORD &rerotation, ColPartitionGrid *part_grid,
1089
0
                                              Image image_mask) {
1090
  // Extract the noise parts from the grid and put them on a temporary list.
1091
0
  ColPartition_LIST parts_list;
1092
0
  ColPartition_IT part_it(&parts_list);
1093
0
  ColPartitionGridSearch gsearch(part_grid);
1094
0
  gsearch.StartFullSearch();
1095
0
  ColPartition *part;
1096
0
  while ((part = gsearch.NextFullSearch()) != nullptr) {
1097
0
    BlobRegionType type = part->blob_type();
1098
0
    if (type == BRT_NOISE || type == BRT_RECTIMAGE || type == BRT_POLYIMAGE) {
1099
0
      part_it.add_after_then_move(part);
1100
0
      gsearch.RemoveBBox();
1101
0
    }
1102
0
  }
1103
  // Render listed noise partitions to the image mask.
1104
0
  MarkAndDeleteImageParts(rerotation, part_grid, &parts_list, image_mask);
1105
0
}
1106
1107
// Removes and deletes all image partitions that are too small to be worth
1108
// keeping. We have to do this as a separate phase after creating the image
1109
// partitions as the small images are needed to join the larger ones together.
1110
0
static void DeleteSmallImages(ColPartitionGrid *part_grid) {
1111
0
  if (part_grid != nullptr) {
1112
0
    return;
1113
0
  }
1114
0
  ColPartitionGridSearch gsearch(part_grid);
1115
0
  gsearch.StartFullSearch();
1116
0
  ColPartition *part;
1117
0
  while ((part = gsearch.NextFullSearch()) != nullptr) {
1118
    // Only delete rectangular images, since if it became a poly image, it
1119
    // is more evidence that it is somehow important.
1120
0
    if (part->blob_type() == BRT_RECTIMAGE) {
1121
0
      const TBOX &part_box = part->bounding_box();
1122
0
      if (part_box.width() < kMinImageFindSize || part_box.height() < kMinImageFindSize) {
1123
        // It is too small to keep. Just make it disappear.
1124
0
        gsearch.RemoveBBox();
1125
0
        DeletePartition(part);
1126
0
      }
1127
0
    }
1128
0
  }
1129
0
}
1130
1131
// Runs a CC analysis on the image_pix mask image, and creates
1132
// image partitions from them, cutting out strong text, and merging with
1133
// nearby image regions such that they don't interfere with text.
1134
// Rotation and rerotation specify how to rotate image coords to match
1135
// the blob and partition coords and back again.
1136
// The input/output part_grid owns all the created partitions, and
1137
// the partitions own all the fake blobs that belong in the partitions.
1138
// Since the other blobs in the other partitions will be owned by the block,
1139
// ColPartitionGrid::ReTypeBlobs must be called afterwards to fix this
1140
// situation and collect the image blobs.
1141
void ImageFind::FindImagePartitions(Image image_pix, const FCOORD &rotation,
1142
                                    const FCOORD &rerotation, TO_BLOCK *block, TabFind *tab_grid,
1143
                                    DebugPixa *pixa_debug, ColPartitionGrid *part_grid,
1144
0
                                    ColPartition_LIST *big_parts) {
1145
0
  int imageheight = pixGetHeight(image_pix);
1146
0
  Boxa *boxa;
1147
0
  Pixa *pixa;
1148
0
  ConnCompAndRectangularize(image_pix, pixa_debug, &boxa, &pixa);
1149
  // Iterate the connected components in the image regions mask.
1150
0
  int nboxes = 0;
1151
0
  if (boxa != nullptr && pixa != nullptr) {
1152
0
    nboxes = boxaGetCount(boxa);
1153
0
  }
1154
0
  for (int i = 0; i < nboxes; ++i) {
1155
0
    l_int32 x, y, width, height;
1156
0
    boxaGetBoxGeometry(boxa, i, &x, &y, &width, &height);
1157
0
    Image pix = pixaGetPix(pixa, i, L_CLONE);
1158
0
    TBOX im_box(x, imageheight - y - height, x + width, imageheight - y);
1159
0
    im_box.rotate(rotation); // Now matches all partitions and blobs.
1160
0
    ColPartitionGridSearch rectsearch(part_grid);
1161
0
    rectsearch.SetUniqueMode(true);
1162
0
    ColPartition_LIST part_list;
1163
0
    DivideImageIntoParts(im_box, rotation, rerotation, pix, &rectsearch, &part_list);
1164
0
    if (textord_tabfind_show_images && pixa_debug != nullptr) {
1165
0
      pixa_debug->AddPix(pix, "ImageComponent");
1166
0
      tprintf("Component has %d parts\n", part_list.length());
1167
0
    }
1168
0
    pix.destroy();
1169
0
    if (!part_list.empty()) {
1170
0
      ColPartition_IT part_it(&part_list);
1171
0
      if (part_list.singleton()) {
1172
        // We didn't have to chop it into a polygon to fit around text, so
1173
        // try expanding it to merge fragmented image parts, as long as it
1174
        // doesn't touch strong text.
1175
0
        ColPartition *part = part_it.extract();
1176
0
        TBOX text_box(im_box);
1177
0
        MaximalImageBoundingBox(part_grid, &text_box);
1178
0
        while (ExpandImageIntoParts(text_box, &rectsearch, part_grid, &part)) {
1179
0
          ;
1180
0
        }
1181
0
        part_it.set_to_list(&part_list);
1182
0
        part_it.add_after_then_move(part);
1183
0
        im_box = part->bounding_box();
1184
0
      }
1185
0
      EliminateWeakParts(im_box, part_grid, big_parts, &part_list);
1186
      // Iterate the part_list and put the parts into the grid.
1187
0
      for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
1188
0
        ColPartition *image_part = part_it.extract();
1189
0
        im_box = image_part->bounding_box();
1190
0
        part_grid->InsertBBox(true, true, image_part);
1191
0
        if (!part_it.at_last()) {
1192
0
          ColPartition *neighbour = part_it.data_relative(1);
1193
0
          image_part->AddPartner(false, neighbour);
1194
0
          neighbour->AddPartner(true, image_part);
1195
0
        }
1196
0
      }
1197
0
    }
1198
0
  }
1199
0
  boxaDestroy(&boxa);
1200
0
  pixaDestroy(&pixa);
1201
0
  DeleteSmallImages(part_grid);
1202
#ifndef GRAPHICS_DISABLED
1203
  if (textord_tabfind_show_images) {
1204
    ScrollView *images_win_ = part_grid->MakeWindow(1000, 400, "With Images");
1205
    part_grid->DisplayBoxes(images_win_);
1206
  }
1207
#endif
1208
0
}
1209
1210
} // namespace tesseract.