/src/tesseract/src/textord/imagefind.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /////////////////////////////////////////////////////////////////////// |
2 | | // File: imagefind.cpp |
3 | | // Description: Function to find image and drawing regions in an image |
4 | | // and create a corresponding list of empty blobs. |
5 | | // Author: Ray Smith |
6 | | // |
7 | | // (C) Copyright 2008, Google Inc. |
8 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
9 | | // you may not use this file except in compliance with the License. |
10 | | // You may obtain a copy of the License at |
11 | | // http://www.apache.org/licenses/LICENSE-2.0 |
12 | | // Unless required by applicable law or agreed to in writing, software |
13 | | // distributed under the License is distributed on an "AS IS" BASIS, |
14 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
15 | | // See the License for the specific language governing permissions and |
16 | | // limitations under the License. |
17 | | // |
18 | | /////////////////////////////////////////////////////////////////////// |
19 | | |
20 | | #ifdef HAVE_CONFIG_H |
21 | | # include "config_auto.h" |
22 | | #endif |
23 | | |
24 | | #include "imagefind.h" |
25 | | |
26 | | #include "colpartitiongrid.h" |
27 | | #include "linlsq.h" |
28 | | #include "params.h" |
29 | | #include "statistc.h" |
30 | | |
31 | | #include <allheaders.h> |
32 | | |
33 | | #include <algorithm> |
34 | | |
35 | | namespace tesseract { |
36 | | |
37 | | static INT_VAR(textord_tabfind_show_images, false, "Show image blobs"); |
38 | | |
39 | | // Fraction of width or height of on pixels that can be discarded from a |
40 | | // roughly rectangular image. |
41 | | const double kMinRectangularFraction = 0.125; |
42 | | // Fraction of width or height to consider image completely used. |
43 | | const double kMaxRectangularFraction = 0.75; |
44 | | // Fraction of width or height to allow transition from kMinRectangularFraction |
45 | | // to kMaxRectangularFraction, equivalent to a dy/dx skew. |
46 | | const double kMaxRectangularGradient = 0.1; // About 6 degrees. |
47 | | // Minimum image size to be worth looking for images on. |
48 | | const int kMinImageFindSize = 100; |
49 | | // Pixel padding for noise blobs and partitions when rendering on the image |
50 | | // mask to encourage them to join together. Make it too big and images |
51 | | // will fatten out too much and have to be clipped to text. |
52 | | const int kNoisePadding = 4; |
53 | | |
54 | | // Scans horizontally on x=[x_start,x_end), starting with y=*y_start, |
55 | | // stepping y+=y_step, until y=y_end. *ystart is input/output. |
56 | | // If the number of black pixels in a row, pix_count fits this pattern: |
57 | | // 0 or more rows with pix_count < min_count then |
58 | | // <= mid_width rows with min_count <= pix_count <= max_count then |
59 | | // a row with pix_count > max_count then |
60 | | // true is returned, and *y_start = the first y with pix_count >= min_count. |
61 | | static bool HScanForEdge(uint32_t *data, int wpl, int x_start, int x_end, int min_count, |
62 | 0 | int mid_width, int max_count, int y_end, int y_step, int *y_start) { |
63 | 0 | int mid_rows = 0; |
64 | 0 | for (int y = *y_start; y != y_end; y += y_step) { |
65 | | // Need pixCountPixelsInRow(pix, y, &pix_count, nullptr) to count in a |
66 | | // subset. |
67 | 0 | int pix_count = 0; |
68 | 0 | uint32_t *line = data + wpl * y; |
69 | 0 | for (int x = x_start; x < x_end; ++x) { |
70 | 0 | if (GET_DATA_BIT(line, x)) { |
71 | 0 | ++pix_count; |
72 | 0 | } |
73 | 0 | } |
74 | 0 | if (mid_rows == 0 && pix_count < min_count) { |
75 | 0 | continue; // In the min phase. |
76 | 0 | } |
77 | 0 | if (mid_rows == 0) { |
78 | 0 | *y_start = y; // Save the y_start where we came out of the min phase. |
79 | 0 | } |
80 | 0 | if (pix_count > max_count) { |
81 | 0 | return true; // Found the pattern. |
82 | 0 | } |
83 | 0 | ++mid_rows; |
84 | 0 | if (mid_rows > mid_width) { |
85 | 0 | break; // Middle too big. |
86 | 0 | } |
87 | 0 | } |
88 | 0 | return false; // Never found max_count. |
89 | 0 | } |
90 | | |
91 | | // Scans vertically on y=[y_start,y_end), starting with x=*x_start, |
92 | | // stepping x+=x_step, until x=x_end. *x_start is input/output. |
93 | | // If the number of black pixels in a column, pix_count fits this pattern: |
94 | | // 0 or more cols with pix_count < min_count then |
95 | | // <= mid_width cols with min_count <= pix_count <= max_count then |
96 | | // a column with pix_count > max_count then |
97 | | // true is returned, and *x_start = the first x with pix_count >= min_count. |
98 | | static bool VScanForEdge(uint32_t *data, int wpl, int y_start, int y_end, int min_count, |
99 | 0 | int mid_width, int max_count, int x_end, int x_step, int *x_start) { |
100 | 0 | int mid_cols = 0; |
101 | 0 | for (int x = *x_start; x != x_end; x += x_step) { |
102 | 0 | int pix_count = 0; |
103 | 0 | uint32_t *line = data + y_start * wpl; |
104 | 0 | for (int y = y_start; y < y_end; ++y, line += wpl) { |
105 | 0 | if (GET_DATA_BIT(line, x)) { |
106 | 0 | ++pix_count; |
107 | 0 | } |
108 | 0 | } |
109 | 0 | if (mid_cols == 0 && pix_count < min_count) { |
110 | 0 | continue; // In the min phase. |
111 | 0 | } |
112 | 0 | if (mid_cols == 0) { |
113 | 0 | *x_start = x; // Save the place where we came out of the min phase. |
114 | 0 | } |
115 | 0 | if (pix_count > max_count) { |
116 | 0 | return true; // found the pattern. |
117 | 0 | } |
118 | 0 | ++mid_cols; |
119 | 0 | if (mid_cols > mid_width) { |
120 | 0 | break; // Middle too big. |
121 | 0 | } |
122 | 0 | } |
123 | 0 | return false; // Never found max_count. |
124 | 0 | } |
125 | | |
126 | | // Returns true if there is a rectangle in the source pix, such that all |
127 | | // pixel rows and column slices outside of it have less than |
128 | | // min_fraction of the pixels black, and within max_skew_gradient fraction |
129 | | // of the pixels on the inside, there are at least max_fraction of the |
130 | | // pixels black. In other words, the inside of the rectangle looks roughly |
131 | | // rectangular, and the outside of it looks like extra bits. |
132 | | // On return, the rectangle is defined by x_start, y_start, x_end and y_end. |
133 | | // Note: the algorithm is iterative, allowing it to slice off pixels from |
134 | | // one edge, allowing it to then slice off more pixels from another edge. |
135 | | static bool pixNearlyRectangular(Image pix, double min_fraction, double max_fraction, |
136 | | double max_skew_gradient, int *x_start, int *y_start, |
137 | 0 | int *x_end, int *y_end) { |
138 | 0 | ASSERT_HOST(pix != nullptr); |
139 | 0 | *x_start = 0; |
140 | 0 | *x_end = pixGetWidth(pix); |
141 | 0 | *y_start = 0; |
142 | 0 | *y_end = pixGetHeight(pix); |
143 | |
|
144 | 0 | uint32_t *data = pixGetData(pix); |
145 | 0 | int wpl = pixGetWpl(pix); |
146 | 0 | bool any_cut = false; |
147 | 0 | bool left_done = false; |
148 | 0 | bool right_done = false; |
149 | 0 | bool top_done = false; |
150 | 0 | bool bottom_done = false; |
151 | 0 | do { |
152 | 0 | any_cut = false; |
153 | | // Find the top/bottom edges. |
154 | 0 | int width = *x_end - *x_start; |
155 | 0 | int min_count = static_cast<int>(width * min_fraction); |
156 | 0 | int max_count = static_cast<int>(width * max_fraction); |
157 | 0 | int edge_width = static_cast<int>(width * max_skew_gradient); |
158 | 0 | if (HScanForEdge(data, wpl, *x_start, *x_end, min_count, edge_width, max_count, *y_end, 1, |
159 | 0 | y_start) && |
160 | 0 | !top_done) { |
161 | 0 | top_done = true; |
162 | 0 | any_cut = true; |
163 | 0 | } |
164 | 0 | --(*y_end); |
165 | 0 | if (HScanForEdge(data, wpl, *x_start, *x_end, min_count, edge_width, max_count, *y_start, -1, |
166 | 0 | y_end) && |
167 | 0 | !bottom_done) { |
168 | 0 | bottom_done = true; |
169 | 0 | any_cut = true; |
170 | 0 | } |
171 | 0 | ++(*y_end); |
172 | | |
173 | | // Find the left/right edges. |
174 | 0 | int height = *y_end - *y_start; |
175 | 0 | min_count = static_cast<int>(height * min_fraction); |
176 | 0 | max_count = static_cast<int>(height * max_fraction); |
177 | 0 | edge_width = static_cast<int>(height * max_skew_gradient); |
178 | 0 | if (VScanForEdge(data, wpl, *y_start, *y_end, min_count, edge_width, max_count, *x_end, 1, |
179 | 0 | x_start) && |
180 | 0 | !left_done) { |
181 | 0 | left_done = true; |
182 | 0 | any_cut = true; |
183 | 0 | } |
184 | 0 | --(*x_end); |
185 | 0 | if (VScanForEdge(data, wpl, *y_start, *y_end, min_count, edge_width, max_count, *x_start, -1, |
186 | 0 | x_end) && |
187 | 0 | !right_done) { |
188 | 0 | right_done = true; |
189 | 0 | any_cut = true; |
190 | 0 | } |
191 | 0 | ++(*x_end); |
192 | 0 | } while (any_cut); |
193 | | |
194 | | // All edges must satisfy the condition of sharp gradient in pixel density |
195 | | // in order for the full rectangle to be present. |
196 | 0 | return left_done && right_done && top_done && bottom_done; |
197 | 0 | } |
198 | | |
199 | | // Generates a Boxa, Pixa pair from the input binary (image mask) pix, |
200 | | // analogous to pixConnComp, except that connected components which are nearly |
201 | | // rectangular are replaced with solid rectangles. |
202 | | // The returned boxa, pixa may be nullptr, meaning no images found. |
203 | | // If not nullptr, they must be destroyed by the caller. |
204 | | // Resolution of pix should match the source image (Tesseract::pix_binary_) |
205 | | // so the output coordinate systems match. |
206 | | static void ConnCompAndRectangularize(Image pix, DebugPixa *pixa_debug, Boxa **boxa, |
207 | 0 | Pixa **pixa) { |
208 | 0 | *boxa = nullptr; |
209 | 0 | *pixa = nullptr; |
210 | |
|
211 | 0 | if (textord_tabfind_show_images && pixa_debug != nullptr) { |
212 | 0 | pixa_debug->AddPix(pix, "Conncompimage"); |
213 | 0 | } |
214 | | // Find the individual image regions in the mask image. |
215 | 0 | *boxa = pixConnComp(pix, pixa, 8); |
216 | | // Rectangularize the individual images. If a sharp edge in vertical and/or |
217 | | // horizontal occupancy can be found, it indicates a probably rectangular |
218 | | // image with unwanted bits merged on, so clip to the approximate rectangle. |
219 | 0 | int npixes = 0; |
220 | 0 | if (*boxa != nullptr && *pixa != nullptr) { |
221 | 0 | npixes = pixaGetCount(*pixa); |
222 | 0 | } |
223 | 0 | for (int i = 0; i < npixes; ++i) { |
224 | 0 | int x_start, x_end, y_start, y_end; |
225 | 0 | Image img_pix = pixaGetPix(*pixa, i, L_CLONE); |
226 | 0 | if (textord_tabfind_show_images && pixa_debug != nullptr) { |
227 | 0 | pixa_debug->AddPix(img_pix, "A component"); |
228 | 0 | } |
229 | 0 | if (pixNearlyRectangular(img_pix, kMinRectangularFraction, kMaxRectangularFraction, |
230 | 0 | kMaxRectangularGradient, &x_start, &y_start, &x_end, &y_end)) { |
231 | 0 | Image simple_pix = pixCreate(x_end - x_start, y_end - y_start, 1); |
232 | 0 | pixSetAll(simple_pix); |
233 | 0 | img_pix.destroy(); |
234 | | // pixaReplacePix takes ownership of the simple_pix. |
235 | 0 | pixaReplacePix(*pixa, i, simple_pix, nullptr); |
236 | 0 | img_pix = pixaGetPix(*pixa, i, L_CLONE); |
237 | | // Fix the box to match the new pix. |
238 | 0 | l_int32 x, y, width, height; |
239 | 0 | boxaGetBoxGeometry(*boxa, i, &x, &y, &width, &height); |
240 | 0 | Box *simple_box = boxCreate(x + x_start, y + y_start, x_end - x_start, y_end - y_start); |
241 | 0 | boxaReplaceBox(*boxa, i, simple_box); |
242 | 0 | } |
243 | 0 | img_pix.destroy(); |
244 | 0 | } |
245 | 0 | } |
246 | | |
247 | | // Finds image regions within the BINARY source pix (page image) and returns |
248 | | // the image regions as a mask image. |
249 | | // The returned pix may be nullptr, meaning no images found. |
250 | | // If not nullptr, it must be PixDestroyed by the caller. |
251 | | // If textord_tabfind_show_images, debug images are appended to pixa_debug. |
252 | 0 | Image ImageFind::FindImages(Image pix, DebugPixa *pixa_debug) { |
253 | 0 | auto width = pixGetWidth(pix); |
254 | 0 | auto height = pixGetHeight(pix); |
255 | | // Not worth looking at small images. |
256 | | // Leptonica will print an error message and return nullptr if we call |
257 | | // pixGenHalftoneMask(pixr, nullptr, ...) with width or height < 100 |
258 | | // for the reduced image, so we want to bypass that, too. |
259 | 0 | if (width / 2 < kMinImageFindSize || height / 2 < kMinImageFindSize) { |
260 | 0 | return pixCreate(width, height, 1); |
261 | 0 | } |
262 | | |
263 | | // Reduce by factor 2. |
264 | 0 | Image pixr = pixReduceRankBinaryCascade(pix, 1, 0, 0, 0); |
265 | 0 | if (textord_tabfind_show_images && pixa_debug != nullptr) { |
266 | 0 | pixa_debug->AddPix(pixr, "CascadeReduced"); |
267 | 0 | } |
268 | | |
269 | | // Get the halftone mask directly from Leptonica. |
270 | 0 | l_int32 ht_found = 0; |
271 | 0 | Pixa *pixadb = (textord_tabfind_show_images && pixa_debug != nullptr) ? pixaCreate(0) : nullptr; |
272 | 0 | Image pixht2 = pixGenerateHalftoneMask(pixr, nullptr, &ht_found, pixadb); |
273 | 0 | if (pixadb) { |
274 | 0 | Image pixdb = pixaDisplayTiledInColumns(pixadb, 3, 1.0, 20, 2); |
275 | 0 | if (textord_tabfind_show_images && pixa_debug != nullptr) { |
276 | 0 | pixa_debug->AddPix(pixdb, "HalftoneMask"); |
277 | 0 | } |
278 | 0 | pixdb.destroy(); |
279 | 0 | pixaDestroy(&pixadb); |
280 | 0 | } |
281 | 0 | pixr.destroy(); |
282 | 0 | if (!ht_found && pixht2 != nullptr) { |
283 | 0 | pixht2.destroy(); |
284 | 0 | } |
285 | 0 | if (pixht2 == nullptr) { |
286 | 0 | return pixCreate(width, height, 1); |
287 | 0 | } |
288 | | |
289 | | // Expand back up again. |
290 | 0 | Image pixht = pixExpandReplicate(pixht2, 2); |
291 | 0 | if (textord_tabfind_show_images && pixa_debug != nullptr) { |
292 | 0 | pixa_debug->AddPix(pixht, "HalftoneReplicated"); |
293 | 0 | } |
294 | 0 | pixht2.destroy(); |
295 | | |
296 | | // Fill to capture pixels near the mask edges that were missed |
297 | 0 | Image pixt = pixSeedfillBinary(nullptr, pixht, pix, 8); |
298 | 0 | pixht |= pixt; |
299 | 0 | pixt.destroy(); |
300 | | |
301 | | // Eliminate lines and bars that may be joined to images. |
302 | 0 | Image pixfinemask = pixReduceRankBinaryCascade(pixht, 1, 1, 3, 3); |
303 | 0 | pixDilateBrick(pixfinemask, pixfinemask, 5, 5); |
304 | 0 | if (textord_tabfind_show_images && pixa_debug != nullptr) { |
305 | 0 | pixa_debug->AddPix(pixfinemask, "FineMask"); |
306 | 0 | } |
307 | 0 | Image pixreduced = pixReduceRankBinaryCascade(pixht, 1, 1, 1, 1); |
308 | 0 | Image pixreduced2 = pixReduceRankBinaryCascade(pixreduced, 3, 3, 3, 0); |
309 | 0 | pixreduced.destroy(); |
310 | 0 | pixDilateBrick(pixreduced2, pixreduced2, 5, 5); |
311 | 0 | Image pixcoarsemask = pixExpandReplicate(pixreduced2, 8); |
312 | 0 | pixreduced2.destroy(); |
313 | 0 | if (textord_tabfind_show_images && pixa_debug != nullptr) { |
314 | 0 | pixa_debug->AddPix(pixcoarsemask, "CoarseMask"); |
315 | 0 | } |
316 | | // Combine the coarse and fine image masks. |
317 | 0 | pixcoarsemask &= pixfinemask; |
318 | 0 | pixfinemask.destroy(); |
319 | | // Dilate a bit to make sure we get everything. |
320 | 0 | pixDilateBrick(pixcoarsemask, pixcoarsemask, 3, 3); |
321 | 0 | Image pixmask = pixExpandReplicate(pixcoarsemask, 16); |
322 | 0 | pixcoarsemask.destroy(); |
323 | 0 | if (textord_tabfind_show_images && pixa_debug != nullptr) { |
324 | 0 | pixa_debug->AddPix(pixmask, "MaskDilated"); |
325 | 0 | } |
326 | | // And the image mask with the line and bar remover. |
327 | 0 | pixht &= pixmask; |
328 | 0 | pixmask.destroy(); |
329 | 0 | if (textord_tabfind_show_images && pixa_debug != nullptr) { |
330 | 0 | pixa_debug->AddPix(pixht, "FinalMask"); |
331 | 0 | } |
332 | | // Make the result image the same size as the input. |
333 | 0 | Image result = pixCreate(width, height, 1); |
334 | 0 | result |= pixht; |
335 | 0 | pixht.destroy(); |
336 | 0 | return result; |
337 | 0 | } |
338 | | |
339 | | // Given an input pix, and a bounding rectangle, the sides of the rectangle |
340 | | // are shrunk inwards until they bound any black pixels found within the |
341 | | // original rectangle. Returns false if the rectangle contains no black |
342 | | // pixels at all. |
343 | 0 | bool ImageFind::BoundsWithinRect(Image pix, int *x_start, int *y_start, int *x_end, int *y_end) { |
344 | 0 | Box *input_box = boxCreate(*x_start, *y_start, *x_end - *x_start, *y_end - *y_start); |
345 | 0 | Box *output_box = nullptr; |
346 | 0 | pixClipBoxToForeground(pix, input_box, nullptr, &output_box); |
347 | 0 | bool result = output_box != nullptr; |
348 | 0 | if (result) { |
349 | 0 | l_int32 x, y, width, height; |
350 | 0 | boxGetGeometry(output_box, &x, &y, &width, &height); |
351 | 0 | *x_start = x; |
352 | 0 | *y_start = y; |
353 | 0 | *x_end = x + width; |
354 | 0 | *y_end = y + height; |
355 | 0 | boxDestroy(&output_box); |
356 | 0 | } |
357 | 0 | boxDestroy(&input_box); |
358 | 0 | return result; |
359 | 0 | } |
360 | | |
361 | | // Given a point in 3-D (RGB) space, returns the squared Euclidean distance |
362 | | // of the point from the given line, defined by a pair of points in the 3-D |
363 | | // (RGB) space, line1 and line2. |
364 | | double ImageFind::ColorDistanceFromLine(const uint8_t *line1, const uint8_t *line2, |
365 | 0 | const uint8_t *point) { |
366 | 0 | int line_vector[kRGBRMSColors]; |
367 | 0 | int point_vector[kRGBRMSColors]; |
368 | 0 | for (int i = 0; i < kRGBRMSColors; ++i) { |
369 | 0 | line_vector[i] = static_cast<int>(line2[i]) - static_cast<int>(line1[i]); |
370 | 0 | point_vector[i] = static_cast<int>(point[i]) - static_cast<int>(line1[i]); |
371 | 0 | } |
372 | 0 | line_vector[L_ALPHA_CHANNEL] = 0; |
373 | | // Now the cross product in 3d. |
374 | 0 | int cross[kRGBRMSColors]; |
375 | 0 | cross[COLOR_RED] = line_vector[COLOR_GREEN] * point_vector[COLOR_BLUE] - |
376 | 0 | line_vector[COLOR_BLUE] * point_vector[COLOR_GREEN]; |
377 | 0 | cross[COLOR_GREEN] = line_vector[COLOR_BLUE] * point_vector[COLOR_RED] - |
378 | 0 | line_vector[COLOR_RED] * point_vector[COLOR_BLUE]; |
379 | 0 | cross[COLOR_BLUE] = line_vector[COLOR_RED] * point_vector[COLOR_GREEN] - |
380 | 0 | line_vector[COLOR_GREEN] * point_vector[COLOR_RED]; |
381 | 0 | cross[L_ALPHA_CHANNEL] = 0; |
382 | | // Now the sums of the squares. |
383 | 0 | double cross_sq = 0.0; |
384 | 0 | double line_sq = 0.0; |
385 | 0 | for (int j = 0; j < kRGBRMSColors; ++j) { |
386 | 0 | cross_sq += static_cast<double>(cross[j]) * cross[j]; |
387 | 0 | line_sq += static_cast<double>(line_vector[j]) * line_vector[j]; |
388 | 0 | } |
389 | 0 | if (line_sq == 0.0) { |
390 | 0 | return 0.0; |
391 | 0 | } |
392 | 0 | return cross_sq / line_sq; // This is the squared distance. |
393 | 0 | } |
394 | | |
395 | | // ================ CUTTING POLYGONAL IMAGES FROM A RECTANGLE ================ |
396 | | // The following functions are responsible for cutting a polygonal image from |
397 | | // a rectangle: CountPixelsInRotatedBox, AttemptToShrinkBox, CutChunkFromParts |
398 | | // with DivideImageIntoParts as the master. |
399 | | // Problem statement: |
400 | | // We start with a single connected component from the image mask: we get |
401 | | // a Pix of the component, and its location on the page (im_box). |
402 | | // The objective of cutting a polygonal image from its rectangle is to avoid |
403 | | // interfering text, but not text that completely overlaps the image. |
404 | | // ------------------------------ ------------------------------ |
405 | | // | Single input partition | | 1 Cut up output partitions | |
406 | | // | | ------------------------------ |
407 | | // Av|oid | Avoid | | |
408 | | // | | |________________________| |
409 | | // Int|erfering | Interfering | | |
410 | | // | | _____|__________________| |
411 | | // T|ext | Text | | |
412 | | // | Text-on-image | | Text-on-image | |
413 | | // ------------------------------ -------------------------- |
414 | | // DivideImageIntoParts does this by building a ColPartition_LIST (not in the |
415 | | // grid) with each ColPartition representing one of the rectangles needed, |
416 | | // starting with a single rectangle for the whole image component, and cutting |
417 | | // bits out of it with CutChunkFromParts as needed to avoid text. The output |
418 | | // ColPartitions are supposed to be ordered from top to bottom. |
419 | | |
420 | | // The problem is complicated by the fact that we have rotated the coordinate |
421 | | // system to make text lines horizontal, so if we need to look at the component |
422 | | // image, we have to rotate the coordinates. Throughout the functions in this |
423 | | // section im_box is the rectangle representing the image component in the |
424 | | // rotated page coordinates (where we are building our output ColPartitions), |
425 | | // rotation is the rotation that we used to get there, and rerotation is the |
426 | | // rotation required to get back to original page image coordinates. |
427 | | // To get to coordinates in the component image, pix, we rotate the im_box, |
428 | | // the point we want to locate, and subtract the rotated point from the top-left |
429 | | // of the rotated im_box. |
430 | | // im_box is therefore essential to calculating coordinates within the pix. |
431 | | |
432 | | // Returns true if there are no black pixels in between the boxes. |
433 | | // The im_box must represent the bounding box of the pix in tesseract |
434 | | // coordinates, which may be negative, due to rotations to make the textlines |
435 | | // horizontal. The boxes are rotated by rotation, which should undo such |
436 | | // rotations, before mapping them onto the pix. |
437 | | bool ImageFind::BlankImageInBetween(const TBOX &box1, const TBOX &box2, const TBOX &im_box, |
438 | 0 | const FCOORD &rotation, Image pix) { |
439 | 0 | TBOX search_box(box1); |
440 | 0 | search_box += box2; |
441 | 0 | if (box1.x_gap(box2) >= box1.y_gap(box2)) { |
442 | 0 | if (box1.x_gap(box2) <= 0) { |
443 | 0 | return true; |
444 | 0 | } |
445 | 0 | search_box.set_left(std::min(box1.right(), box2.right())); |
446 | 0 | search_box.set_right(std::max(box1.left(), box2.left())); |
447 | 0 | } else { |
448 | 0 | if (box1.y_gap(box2) <= 0) { |
449 | 0 | return true; |
450 | 0 | } |
451 | 0 | search_box.set_top(std::max(box1.bottom(), box2.bottom())); |
452 | 0 | search_box.set_bottom(std::min(box1.top(), box2.top())); |
453 | 0 | } |
454 | 0 | return CountPixelsInRotatedBox(search_box, im_box, rotation, pix) == 0; |
455 | 0 | } |
456 | | |
457 | | // Returns the number of pixels in box in the pix. |
458 | | // rotation, pix and im_box are defined in the large comment above. |
459 | | int ImageFind::CountPixelsInRotatedBox(TBOX box, const TBOX &im_box, const FCOORD &rotation, |
460 | 0 | Image pix) { |
461 | | // Intersect it with the image box. |
462 | 0 | box &= im_box; // This is in-place box intersection. |
463 | 0 | if (box.null_box()) { |
464 | 0 | return 0; |
465 | 0 | } |
466 | 0 | box.rotate(rotation); |
467 | 0 | TBOX rotated_im_box(im_box); |
468 | 0 | rotated_im_box.rotate(rotation); |
469 | 0 | Image rect_pix = pixCreate(box.width(), box.height(), 1); |
470 | 0 | pixRasterop(rect_pix, 0, 0, box.width(), box.height(), PIX_SRC, pix, |
471 | 0 | box.left() - rotated_im_box.left(), rotated_im_box.top() - box.top()); |
472 | 0 | l_int32 result; |
473 | 0 | pixCountPixels(rect_pix, &result, nullptr); |
474 | 0 | rect_pix.destroy(); |
475 | 0 | return result; |
476 | 0 | } |
477 | | |
478 | | // The box given by slice contains some black pixels, but not necessarily |
479 | | // over the whole box. Shrink the x bounds of slice, but not the y bounds |
480 | | // until there is at least one black pixel in the outermost columns. |
481 | | // rotation, rerotation, pix and im_box are defined in the large comment above. |
482 | | static void AttemptToShrinkBox(const FCOORD &rotation, const FCOORD &rerotation, const TBOX &im_box, |
483 | 0 | Image pix, TBOX *slice) { |
484 | 0 | TBOX rotated_box(*slice); |
485 | 0 | rotated_box.rotate(rerotation); |
486 | 0 | TBOX rotated_im_box(im_box); |
487 | 0 | rotated_im_box.rotate(rerotation); |
488 | 0 | int left = rotated_box.left() - rotated_im_box.left(); |
489 | 0 | int right = rotated_box.right() - rotated_im_box.left(); |
490 | 0 | int top = rotated_im_box.top() - rotated_box.top(); |
491 | 0 | int bottom = rotated_im_box.top() - rotated_box.bottom(); |
492 | 0 | ImageFind::BoundsWithinRect(pix, &left, &top, &right, &bottom); |
493 | 0 | top = rotated_im_box.top() - top; |
494 | 0 | bottom = rotated_im_box.top() - bottom; |
495 | 0 | left += rotated_im_box.left(); |
496 | 0 | right += rotated_im_box.left(); |
497 | 0 | rotated_box.set_to_given_coords(left, bottom, right, top); |
498 | 0 | rotated_box.rotate(rotation); |
499 | 0 | slice->set_left(rotated_box.left()); |
500 | 0 | slice->set_right(rotated_box.right()); |
501 | 0 | } |
502 | | |
503 | | // The meat of cutting a polygonal image around text. |
504 | | // This function covers the general case of cutting a box out of a box |
505 | | // as shown: |
506 | | // Input Output |
507 | | // ------------------------------ ------------------------------ |
508 | | // | Single input partition | | 1 Cut up output partitions | |
509 | | // | | ------------------------------ |
510 | | // | ---------- | --------- ---------- |
511 | | // | | box | | | 2 | box | 3 | |
512 | | // | | | | | | is cut | | |
513 | | // | ---------- | --------- out ---------- |
514 | | // | | ------------------------------ |
515 | | // | | | 4 | |
516 | | // ------------------------------ ------------------------------ |
517 | | // In the context that this function is used, at most 3 of the above output |
518 | | // boxes will be created, as the overlapping box is never contained by the |
519 | | // input. |
520 | | // The above cutting operation is executed for each element of part_list that |
521 | | // is overlapped by the input box. Each modified ColPartition is replaced |
522 | | // in place in the list by the output of the cutting operation in the order |
523 | | // shown above, so iff no holes are ever created, the output will be in |
524 | | // top-to-bottom order, but in extreme cases, hole creation is possible. |
525 | | // In such cases, the output order may cause strange block polygons. |
526 | | // rotation, rerotation, pix and im_box are defined in the large comment above. |
527 | | static void CutChunkFromParts(const TBOX &box, const TBOX &im_box, const FCOORD &rotation, |
528 | 0 | const FCOORD &rerotation, Image pix, ColPartition_LIST *part_list) { |
529 | 0 | ASSERT_HOST(!part_list->empty()); |
530 | 0 | ColPartition_IT part_it(part_list); |
531 | 0 | do { |
532 | 0 | ColPartition *part = part_it.data(); |
533 | 0 | TBOX part_box = part->bounding_box(); |
534 | 0 | if (part_box.overlap(box)) { |
535 | | // This part must be cut and replaced with the remains. There are |
536 | | // up to 4 pieces to be made. Start with the first one and use |
537 | | // add_before_stay_put. For each piece if it has no black pixels |
538 | | // left, just don't make the box. |
539 | | // Above box. |
540 | 0 | if (box.top() < part_box.top()) { |
541 | 0 | TBOX slice(part_box); |
542 | 0 | slice.set_bottom(box.top()); |
543 | 0 | if (ImageFind::CountPixelsInRotatedBox(slice, im_box, rerotation, pix) > 0) { |
544 | 0 | AttemptToShrinkBox(rotation, rerotation, im_box, pix, &slice); |
545 | 0 | part_it.add_before_stay_put( |
546 | 0 | ColPartition::FakePartition(slice, PT_UNKNOWN, BRT_POLYIMAGE, BTFT_NONTEXT)); |
547 | 0 | } |
548 | 0 | } |
549 | | // Left of box. |
550 | 0 | if (box.left() > part_box.left()) { |
551 | 0 | TBOX slice(part_box); |
552 | 0 | slice.set_right(box.left()); |
553 | 0 | if (box.top() < part_box.top()) { |
554 | 0 | slice.set_top(box.top()); |
555 | 0 | } |
556 | 0 | if (box.bottom() > part_box.bottom()) { |
557 | 0 | slice.set_bottom(box.bottom()); |
558 | 0 | } |
559 | 0 | if (ImageFind::CountPixelsInRotatedBox(slice, im_box, rerotation, pix) > 0) { |
560 | 0 | AttemptToShrinkBox(rotation, rerotation, im_box, pix, &slice); |
561 | 0 | part_it.add_before_stay_put( |
562 | 0 | ColPartition::FakePartition(slice, PT_UNKNOWN, BRT_POLYIMAGE, BTFT_NONTEXT)); |
563 | 0 | } |
564 | 0 | } |
565 | | // Right of box. |
566 | 0 | if (box.right() < part_box.right()) { |
567 | 0 | TBOX slice(part_box); |
568 | 0 | slice.set_left(box.right()); |
569 | 0 | if (box.top() < part_box.top()) { |
570 | 0 | slice.set_top(box.top()); |
571 | 0 | } |
572 | 0 | if (box.bottom() > part_box.bottom()) { |
573 | 0 | slice.set_bottom(box.bottom()); |
574 | 0 | } |
575 | 0 | if (ImageFind::CountPixelsInRotatedBox(slice, im_box, rerotation, pix) > 0) { |
576 | 0 | AttemptToShrinkBox(rotation, rerotation, im_box, pix, &slice); |
577 | 0 | part_it.add_before_stay_put( |
578 | 0 | ColPartition::FakePartition(slice, PT_UNKNOWN, BRT_POLYIMAGE, BTFT_NONTEXT)); |
579 | 0 | } |
580 | 0 | } |
581 | | // Below box. |
582 | 0 | if (box.bottom() > part_box.bottom()) { |
583 | 0 | TBOX slice(part_box); |
584 | 0 | slice.set_top(box.bottom()); |
585 | 0 | if (ImageFind::CountPixelsInRotatedBox(slice, im_box, rerotation, pix) > 0) { |
586 | 0 | AttemptToShrinkBox(rotation, rerotation, im_box, pix, &slice); |
587 | 0 | part_it.add_before_stay_put( |
588 | 0 | ColPartition::FakePartition(slice, PT_UNKNOWN, BRT_POLYIMAGE, BTFT_NONTEXT)); |
589 | 0 | } |
590 | 0 | } |
591 | 0 | part->DeleteBoxes(); |
592 | 0 | delete part_it.extract(); |
593 | 0 | } |
594 | 0 | part_it.forward(); |
595 | 0 | } while (!part_it.at_first()); |
596 | 0 | } |
597 | | |
598 | | // Starts with the bounding box of the image component and cuts it up |
599 | | // so that it doesn't intersect text where possible. |
600 | | // Strong fully contained horizontal text is marked as text on image, |
601 | | // and does not cause a division of the image. |
602 | | // For more detail see the large comment above on cutting polygonal images |
603 | | // from a rectangle. |
604 | | // rotation, rerotation, pix and im_box are defined in the large comment above. |
605 | | static void DivideImageIntoParts(const TBOX &im_box, const FCOORD &rotation, |
606 | | const FCOORD &rerotation, Image pix, |
607 | 0 | ColPartitionGridSearch *rectsearch, ColPartition_LIST *part_list) { |
608 | | // Add the full im_box partition to the list to begin with. |
609 | 0 | ColPartition *pix_part = |
610 | 0 | ColPartition::FakePartition(im_box, PT_UNKNOWN, BRT_RECTIMAGE, BTFT_NONTEXT); |
611 | 0 | ColPartition_IT part_it(part_list); |
612 | 0 | part_it.add_after_then_move(pix_part); |
613 | |
|
614 | 0 | rectsearch->StartRectSearch(im_box); |
615 | 0 | ColPartition *part; |
616 | 0 | while ((part = rectsearch->NextRectSearch()) != nullptr) { |
617 | 0 | TBOX part_box = part->bounding_box(); |
618 | 0 | if (part_box.contains(im_box) && part->flow() >= BTFT_CHAIN) { |
619 | | // This image is completely covered by an existing text partition. |
620 | 0 | for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) { |
621 | 0 | ColPartition *pix_part = part_it.extract(); |
622 | 0 | pix_part->DeleteBoxes(); |
623 | 0 | delete pix_part; |
624 | 0 | } |
625 | 0 | } else if (part->flow() == BTFT_STRONG_CHAIN) { |
626 | | // Text intersects the box. |
627 | 0 | TBOX overlap_box = part_box.intersection(im_box); |
628 | | // Intersect it with the image box. |
629 | 0 | int black_area = ImageFind::CountPixelsInRotatedBox(overlap_box, im_box, rerotation, pix); |
630 | 0 | if (black_area * 2 < part_box.area() || !im_box.contains(part_box)) { |
631 | | // Eat a piece out of the image. |
632 | | // Pad it so that pieces eaten out look decent. |
633 | 0 | int padding = part->blob_type() == BRT_VERT_TEXT ? part_box.width() : part_box.height(); |
634 | 0 | part_box.set_top(part_box.top() + padding / 2); |
635 | 0 | part_box.set_bottom(part_box.bottom() - padding / 2); |
636 | 0 | CutChunkFromParts(part_box, im_box, rotation, rerotation, pix, part_list); |
637 | 0 | } else { |
638 | | // Strong overlap with the black area, so call it text on image. |
639 | 0 | part->set_flow(BTFT_TEXT_ON_IMAGE); |
640 | 0 | } |
641 | 0 | } |
642 | 0 | if (part_list->empty()) { |
643 | 0 | break; |
644 | 0 | } |
645 | 0 | } |
646 | 0 | } |
647 | | |
648 | | // Search for the rightmost text that overlaps vertically and is to the left |
649 | | // of the given box, but within the given left limit. |
650 | 0 | static int ExpandImageLeft(const TBOX &box, int left_limit, ColPartitionGrid *part_grid) { |
651 | 0 | ColPartitionGridSearch search(part_grid); |
652 | 0 | ColPartition *part; |
653 | | // Search right to left for any text that overlaps. |
654 | 0 | search.StartSideSearch(box.left(), box.bottom(), box.top()); |
655 | 0 | while ((part = search.NextSideSearch(true)) != nullptr) { |
656 | 0 | if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) { |
657 | 0 | const TBOX &part_box(part->bounding_box()); |
658 | 0 | if (part_box.y_gap(box) < 0) { |
659 | 0 | if (part_box.right() > left_limit && part_box.right() < box.left()) { |
660 | 0 | left_limit = part_box.right(); |
661 | 0 | } |
662 | 0 | break; |
663 | 0 | } |
664 | 0 | } |
665 | 0 | } |
666 | 0 | if (part != nullptr) { |
667 | | // Search for the nearest text up to the one we already found. |
668 | 0 | TBOX search_box(left_limit, box.bottom(), box.left(), box.top()); |
669 | 0 | search.StartRectSearch(search_box); |
670 | 0 | while ((part = search.NextRectSearch()) != nullptr) { |
671 | 0 | if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) { |
672 | 0 | const TBOX &part_box(part->bounding_box()); |
673 | 0 | if (part_box.y_gap(box) < 0) { |
674 | 0 | if (part_box.right() > left_limit && part_box.right() < box.left()) { |
675 | 0 | left_limit = part_box.right(); |
676 | 0 | } |
677 | 0 | } |
678 | 0 | } |
679 | 0 | } |
680 | 0 | } |
681 | 0 | return left_limit; |
682 | 0 | } |
683 | | |
684 | | // Search for the leftmost text that overlaps vertically and is to the right |
685 | | // of the given box, but within the given right limit. |
686 | 0 | static int ExpandImageRight(const TBOX &box, int right_limit, ColPartitionGrid *part_grid) { |
687 | 0 | ColPartitionGridSearch search(part_grid); |
688 | 0 | ColPartition *part; |
689 | | // Search left to right for any text that overlaps. |
690 | 0 | search.StartSideSearch(box.right(), box.bottom(), box.top()); |
691 | 0 | while ((part = search.NextSideSearch(false)) != nullptr) { |
692 | 0 | if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) { |
693 | 0 | const TBOX &part_box(part->bounding_box()); |
694 | 0 | if (part_box.y_gap(box) < 0) { |
695 | 0 | if (part_box.left() < right_limit && part_box.left() > box.right()) { |
696 | 0 | right_limit = part_box.left(); |
697 | 0 | } |
698 | 0 | break; |
699 | 0 | } |
700 | 0 | } |
701 | 0 | } |
702 | 0 | if (part != nullptr) { |
703 | | // Search for the nearest text up to the one we already found. |
704 | 0 | TBOX search_box(box.left(), box.bottom(), right_limit, box.top()); |
705 | 0 | search.StartRectSearch(search_box); |
706 | 0 | while ((part = search.NextRectSearch()) != nullptr) { |
707 | 0 | if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) { |
708 | 0 | const TBOX &part_box(part->bounding_box()); |
709 | 0 | if (part_box.y_gap(box) < 0) { |
710 | 0 | if (part_box.left() < right_limit && part_box.left() > box.right()) { |
711 | 0 | right_limit = part_box.left(); |
712 | 0 | } |
713 | 0 | } |
714 | 0 | } |
715 | 0 | } |
716 | 0 | } |
717 | 0 | return right_limit; |
718 | 0 | } |
719 | | |
720 | | // Search for the topmost text that overlaps horizontally and is below |
721 | | // the given box, but within the given bottom limit. |
722 | 0 | static int ExpandImageBottom(const TBOX &box, int bottom_limit, ColPartitionGrid *part_grid) { |
723 | 0 | ColPartitionGridSearch search(part_grid); |
724 | 0 | ColPartition *part; |
725 | | // Search right to left for any text that overlaps. |
726 | 0 | search.StartVerticalSearch(box.left(), box.right(), box.bottom()); |
727 | 0 | while ((part = search.NextVerticalSearch(true)) != nullptr) { |
728 | 0 | if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) { |
729 | 0 | const TBOX &part_box(part->bounding_box()); |
730 | 0 | if (part_box.x_gap(box) < 0) { |
731 | 0 | if (part_box.top() > bottom_limit && part_box.top() < box.bottom()) { |
732 | 0 | bottom_limit = part_box.top(); |
733 | 0 | } |
734 | 0 | break; |
735 | 0 | } |
736 | 0 | } |
737 | 0 | } |
738 | 0 | if (part != nullptr) { |
739 | | // Search for the nearest text up to the one we already found. |
740 | 0 | TBOX search_box(box.left(), bottom_limit, box.right(), box.bottom()); |
741 | 0 | search.StartRectSearch(search_box); |
742 | 0 | while ((part = search.NextRectSearch()) != nullptr) { |
743 | 0 | if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) { |
744 | 0 | const TBOX &part_box(part->bounding_box()); |
745 | 0 | if (part_box.x_gap(box) < 0) { |
746 | 0 | if (part_box.top() > bottom_limit && part_box.top() < box.bottom()) { |
747 | 0 | bottom_limit = part_box.top(); |
748 | 0 | } |
749 | 0 | } |
750 | 0 | } |
751 | 0 | } |
752 | 0 | } |
753 | 0 | return bottom_limit; |
754 | 0 | } |
755 | | |
756 | | // Search for the bottommost text that overlaps horizontally and is above |
757 | | // the given box, but within the given top limit. |
758 | 0 | static int ExpandImageTop(const TBOX &box, int top_limit, ColPartitionGrid *part_grid) { |
759 | 0 | ColPartitionGridSearch search(part_grid); |
760 | 0 | ColPartition *part; |
761 | | // Search right to left for any text that overlaps. |
762 | 0 | search.StartVerticalSearch(box.left(), box.right(), box.top()); |
763 | 0 | while ((part = search.NextVerticalSearch(false)) != nullptr) { |
764 | 0 | if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) { |
765 | 0 | const TBOX &part_box(part->bounding_box()); |
766 | 0 | if (part_box.x_gap(box) < 0) { |
767 | 0 | if (part_box.bottom() < top_limit && part_box.bottom() > box.top()) { |
768 | 0 | top_limit = part_box.bottom(); |
769 | 0 | } |
770 | 0 | break; |
771 | 0 | } |
772 | 0 | } |
773 | 0 | } |
774 | 0 | if (part != nullptr) { |
775 | | // Search for the nearest text up to the one we already found. |
776 | 0 | TBOX search_box(box.left(), box.top(), box.right(), top_limit); |
777 | 0 | search.StartRectSearch(search_box); |
778 | 0 | while ((part = search.NextRectSearch()) != nullptr) { |
779 | 0 | if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) { |
780 | 0 | const TBOX &part_box(part->bounding_box()); |
781 | 0 | if (part_box.x_gap(box) < 0) { |
782 | 0 | if (part_box.bottom() < top_limit && part_box.bottom() > box.top()) { |
783 | 0 | top_limit = part_box.bottom(); |
784 | 0 | } |
785 | 0 | } |
786 | 0 | } |
787 | 0 | } |
788 | 0 | } |
789 | 0 | return top_limit; |
790 | 0 | } |
791 | | |
792 | | // Expands the image box in the given direction until it hits text, |
793 | | // limiting the expansion to the given limit box, returning the result |
794 | | // in the expanded box, and |
795 | | // returning the increase in area resulting from the expansion. |
796 | | static int ExpandImageDir(BlobNeighbourDir dir, const TBOX &im_box, const TBOX &limit_box, |
797 | 0 | ColPartitionGrid *part_grid, TBOX *expanded_box) { |
798 | 0 | *expanded_box = im_box; |
799 | 0 | switch (dir) { |
800 | 0 | case BND_LEFT: |
801 | 0 | expanded_box->set_left(ExpandImageLeft(im_box, limit_box.left(), part_grid)); |
802 | 0 | break; |
803 | 0 | case BND_RIGHT: |
804 | 0 | expanded_box->set_right(ExpandImageRight(im_box, limit_box.right(), part_grid)); |
805 | 0 | break; |
806 | 0 | case BND_ABOVE: |
807 | 0 | expanded_box->set_top(ExpandImageTop(im_box, limit_box.top(), part_grid)); |
808 | 0 | break; |
809 | 0 | case BND_BELOW: |
810 | 0 | expanded_box->set_bottom(ExpandImageBottom(im_box, limit_box.bottom(), part_grid)); |
811 | 0 | break; |
812 | 0 | default: |
813 | 0 | return 0; |
814 | 0 | } |
815 | 0 | return expanded_box->area() - im_box.area(); |
816 | 0 | } |
817 | | |
818 | | // Expands the image partition into any non-text until it touches text. |
819 | | // The expansion proceeds in the order of increasing increase in area |
820 | | // as a heuristic to find the best rectangle by expanding in the most |
821 | | // constrained direction first. |
822 | 0 | static void MaximalImageBoundingBox(ColPartitionGrid *part_grid, TBOX *im_box) { |
823 | 0 | bool dunnit[BND_COUNT]; |
824 | 0 | memset(dunnit, 0, sizeof(dunnit)); |
825 | 0 | TBOX limit_box(part_grid->bleft().x(), part_grid->bleft().y(), part_grid->tright().x(), |
826 | 0 | part_grid->tright().y()); |
827 | 0 | TBOX text_box(*im_box); |
828 | 0 | for (int iteration = 0; iteration < BND_COUNT; ++iteration) { |
829 | | // Find the direction with least area increase. |
830 | 0 | int best_delta = -1; |
831 | 0 | BlobNeighbourDir best_dir = BND_LEFT; |
832 | 0 | TBOX expanded_boxes[BND_COUNT]; |
833 | 0 | for (int dir = 0; dir < BND_COUNT; ++dir) { |
834 | 0 | auto bnd = static_cast<BlobNeighbourDir>(dir); |
835 | 0 | if (!dunnit[bnd]) { |
836 | 0 | TBOX expanded_box; |
837 | 0 | int area_delta = ExpandImageDir(bnd, text_box, limit_box, part_grid, &expanded_boxes[bnd]); |
838 | 0 | if (best_delta < 0 || area_delta < best_delta) { |
839 | 0 | best_delta = area_delta; |
840 | 0 | best_dir = bnd; |
841 | 0 | } |
842 | 0 | } |
843 | 0 | } |
844 | | // Run the best and remember the direction. |
845 | 0 | dunnit[best_dir] = true; |
846 | 0 | text_box = expanded_boxes[best_dir]; |
847 | 0 | } |
848 | 0 | *im_box = text_box; |
849 | 0 | } |
850 | | |
851 | | // Helper deletes the given partition but first marks up all the blobs as |
852 | | // noise, so they get deleted later, and disowns them. |
853 | | // If the initial type of the partition is image, then it actually deletes |
854 | | // the blobs, as the partition owns them in that case. |
855 | 0 | static void DeletePartition(ColPartition *part) { |
856 | 0 | BlobRegionType type = part->blob_type(); |
857 | 0 | if (type == BRT_RECTIMAGE || type == BRT_POLYIMAGE) { |
858 | | // The partition owns the boxes of these types, so just delete them. |
859 | 0 | part->DeleteBoxes(); // From a previous iteration. |
860 | 0 | } else { |
861 | | // Once marked, the blobs will be swept up by TidyBlobs. |
862 | 0 | part->set_flow(BTFT_NONTEXT); |
863 | 0 | part->set_blob_type(BRT_NOISE); |
864 | 0 | part->SetBlobTypes(); |
865 | 0 | part->DisownBoxes(); // Created before FindImagePartitions. |
866 | 0 | } |
867 | 0 | delete part; |
868 | 0 | } |
869 | | |
870 | | // The meat of joining fragmented images and consuming ColPartitions of |
871 | | // uncertain type. |
872 | | // *part_ptr is an input/output BRT_RECTIMAGE ColPartition that is to be |
873 | | // expanded to consume overlapping and nearby ColPartitions of uncertain type |
874 | | // and other BRT_RECTIMAGE partitions, but NOT to be expanded beyond |
875 | | // max_image_box. *part_ptr is NOT in the part_grid. |
876 | | // rectsearch is already constructed on the part_grid, and is used for |
877 | | // searching for overlapping and nearby ColPartitions. |
878 | | // ExpandImageIntoParts is called iteratively until it returns false. Each |
879 | | // time it absorbs the nearest non-contained candidate, and everything that |
880 | | // is fully contained within part_ptr's bounding box. |
881 | | // TODO(rays) what if it just eats everything inside max_image_box in one go? |
882 | | static bool ExpandImageIntoParts(const TBOX &max_image_box, ColPartitionGridSearch *rectsearch, |
883 | 0 | ColPartitionGrid *part_grid, ColPartition **part_ptr) { |
884 | 0 | ColPartition *image_part = *part_ptr; |
885 | 0 | TBOX im_part_box = image_part->bounding_box(); |
886 | 0 | if (textord_tabfind_show_images > 1) { |
887 | 0 | tprintf("Searching for merge with image part:"); |
888 | 0 | im_part_box.print(); |
889 | 0 | tprintf("Text box="); |
890 | 0 | max_image_box.print(); |
891 | 0 | } |
892 | 0 | rectsearch->StartRectSearch(max_image_box); |
893 | 0 | ColPartition *part; |
894 | 0 | ColPartition *best_part = nullptr; |
895 | 0 | int best_dist = 0; |
896 | 0 | while ((part = rectsearch->NextRectSearch()) != nullptr) { |
897 | 0 | if (textord_tabfind_show_images > 1) { |
898 | 0 | tprintf("Considering merge with part:"); |
899 | 0 | part->Print(); |
900 | 0 | if (im_part_box.contains(part->bounding_box())) { |
901 | 0 | tprintf("Fully contained\n"); |
902 | 0 | } else if (!max_image_box.contains(part->bounding_box())) { |
903 | 0 | tprintf("Not within text box\n"); |
904 | 0 | } else if (part->flow() == BTFT_STRONG_CHAIN) { |
905 | 0 | tprintf("Too strong text\n"); |
906 | 0 | } else { |
907 | 0 | tprintf("Real candidate\n"); |
908 | 0 | } |
909 | 0 | } |
910 | 0 | if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_TEXT_ON_IMAGE || |
911 | 0 | part->blob_type() == BRT_POLYIMAGE) { |
912 | 0 | continue; |
913 | 0 | } |
914 | 0 | TBOX box = part->bounding_box(); |
915 | 0 | if (max_image_box.contains(box) && part->blob_type() != BRT_NOISE) { |
916 | 0 | if (im_part_box.contains(box)) { |
917 | | // Eat it completely. |
918 | 0 | rectsearch->RemoveBBox(); |
919 | 0 | DeletePartition(part); |
920 | 0 | continue; |
921 | 0 | } |
922 | 0 | int x_dist = std::max(0, box.x_gap(im_part_box)); |
923 | 0 | int y_dist = std::max(0, box.y_gap(im_part_box)); |
924 | 0 | int dist = x_dist * x_dist + y_dist * y_dist; |
925 | 0 | if (dist > box.area() || dist > im_part_box.area()) { |
926 | 0 | continue; // Not close enough. |
927 | 0 | } |
928 | 0 | if (best_part == nullptr || dist < best_dist) { |
929 | | // We keep the nearest qualifier, which is not necessarily the nearest. |
930 | 0 | best_part = part; |
931 | 0 | best_dist = dist; |
932 | 0 | } |
933 | 0 | } |
934 | 0 | } |
935 | 0 | if (best_part != nullptr) { |
936 | | // It needs expanding. We can do it without touching text. |
937 | 0 | TBOX box = best_part->bounding_box(); |
938 | 0 | if (textord_tabfind_show_images > 1) { |
939 | 0 | tprintf("Merging image part:"); |
940 | 0 | im_part_box.print(); |
941 | 0 | tprintf("with part:"); |
942 | 0 | box.print(); |
943 | 0 | } |
944 | 0 | im_part_box += box; |
945 | 0 | *part_ptr = ColPartition::FakePartition(im_part_box, PT_UNKNOWN, BRT_RECTIMAGE, BTFT_NONTEXT); |
946 | 0 | DeletePartition(image_part); |
947 | 0 | part_grid->RemoveBBox(best_part); |
948 | 0 | DeletePartition(best_part); |
949 | 0 | rectsearch->RepositionIterator(); |
950 | 0 | return true; |
951 | 0 | } |
952 | 0 | return false; |
953 | 0 | } |
954 | | |
955 | | // Helper function to compute the overlap area between the box and the |
956 | | // given list of partitions. |
957 | 0 | static int IntersectArea(const TBOX &box, ColPartition_LIST *part_list) { |
958 | 0 | int intersect_area = 0; |
959 | 0 | ColPartition_IT part_it(part_list); |
960 | | // Iterate the parts and subtract intersecting area. |
961 | 0 | for (part_it.mark_cycle_pt(); !part_it.cycled_list(); part_it.forward()) { |
962 | 0 | ColPartition *image_part = part_it.data(); |
963 | 0 | TBOX intersect = box.intersection(image_part->bounding_box()); |
964 | 0 | intersect_area += intersect.area(); |
965 | 0 | } |
966 | 0 | return intersect_area; |
967 | 0 | } |
968 | | |
969 | | // part_list is a set of ColPartitions representing a polygonal image, and |
970 | | // im_box is the union of the bounding boxes of all the parts in part_list. |
971 | | // Tests whether part is to be consumed by the polygonal image. |
972 | | // Returns true if part is weak text and more than half of its area is |
973 | | // intersected by parts from the part_list, and it is contained within im_box. |
974 | | static bool TestWeakIntersectedPart(const TBOX &im_box, ColPartition_LIST *part_list, |
975 | 0 | ColPartition *part) { |
976 | 0 | if (part->flow() < BTFT_STRONG_CHAIN) { |
977 | | // A weak partition intersects the box. |
978 | 0 | const TBOX &part_box = part->bounding_box(); |
979 | 0 | if (im_box.contains(part_box)) { |
980 | 0 | int area = part_box.area(); |
981 | 0 | int intersect_area = IntersectArea(part_box, part_list); |
982 | 0 | if (area < 2 * intersect_area) { |
983 | 0 | return true; |
984 | 0 | } |
985 | 0 | } |
986 | 0 | } |
987 | 0 | return false; |
988 | 0 | } |
989 | | |
990 | | // A rectangular or polygonal image has been completed, in part_list, bounding |
991 | | // box in im_box. We want to eliminate weak text or other uncertain partitions |
992 | | // (basically anything that is not BRT_STRONG_CHAIN or better) from both the |
993 | | // part_grid and the big_parts list that are contained within im_box and |
994 | | // overlapped enough by the possibly polygonal image. |
995 | | static void EliminateWeakParts(const TBOX &im_box, ColPartitionGrid *part_grid, |
996 | 0 | ColPartition_LIST *big_parts, ColPartition_LIST *part_list) { |
997 | 0 | ColPartitionGridSearch rectsearch(part_grid); |
998 | 0 | ColPartition *part; |
999 | 0 | rectsearch.StartRectSearch(im_box); |
1000 | 0 | while ((part = rectsearch.NextRectSearch()) != nullptr) { |
1001 | 0 | if (TestWeakIntersectedPart(im_box, part_list, part)) { |
1002 | 0 | BlobRegionType type = part->blob_type(); |
1003 | 0 | if (type == BRT_POLYIMAGE || type == BRT_RECTIMAGE) { |
1004 | 0 | rectsearch.RemoveBBox(); |
1005 | 0 | DeletePartition(part); |
1006 | 0 | } else { |
1007 | | // The part is mostly covered, so mark it. Non-image partitions are |
1008 | | // kept hanging around to mark the image for pass2 |
1009 | 0 | part->set_flow(BTFT_NONTEXT); |
1010 | 0 | part->set_blob_type(BRT_NOISE); |
1011 | 0 | part->SetBlobTypes(); |
1012 | 0 | } |
1013 | 0 | } |
1014 | 0 | } |
1015 | 0 | ColPartition_IT big_it(big_parts); |
1016 | 0 | for (big_it.mark_cycle_pt(); !big_it.cycled_list(); big_it.forward()) { |
1017 | 0 | part = big_it.data(); |
1018 | 0 | if (TestWeakIntersectedPart(im_box, part_list, part)) { |
1019 | | // Once marked, the blobs will be swept up by TidyBlobs. |
1020 | 0 | DeletePartition(big_it.extract()); |
1021 | 0 | } |
1022 | 0 | } |
1023 | 0 | } |
1024 | | |
1025 | | // Helper scans for good text partitions overlapping the given box. |
1026 | | // If there are no good text partitions overlapping an expanded box, then |
1027 | | // the box is expanded, otherwise, the original box is returned. |
1028 | | // If good text overlaps the box, true is returned. |
1029 | 0 | static bool ScanForOverlappingText(ColPartitionGrid *part_grid, TBOX *box) { |
1030 | 0 | ColPartitionGridSearch rectsearch(part_grid); |
1031 | 0 | TBOX padded_box(*box); |
1032 | 0 | padded_box.pad(kNoisePadding, kNoisePadding); |
1033 | 0 | rectsearch.StartRectSearch(padded_box); |
1034 | 0 | ColPartition *part; |
1035 | 0 | bool any_text_in_padded_rect = false; |
1036 | 0 | while ((part = rectsearch.NextRectSearch()) != nullptr) { |
1037 | 0 | if (part->flow() == BTFT_CHAIN || part->flow() == BTFT_STRONG_CHAIN) { |
1038 | | // Text intersects the box. |
1039 | 0 | any_text_in_padded_rect = true; |
1040 | 0 | const TBOX &part_box = part->bounding_box(); |
1041 | 0 | if (box->overlap(part_box)) { |
1042 | 0 | return true; |
1043 | 0 | } |
1044 | 0 | } |
1045 | 0 | } |
1046 | 0 | if (!any_text_in_padded_rect) { |
1047 | 0 | *box = padded_box; |
1048 | 0 | } |
1049 | 0 | return false; |
1050 | 0 | } |
1051 | | |
1052 | | // Renders the boxes of image parts from the supplied list onto the image_pix, |
1053 | | // except where they interfere with existing strong text in the part_grid, |
1054 | | // and then deletes them. |
1055 | | // Box coordinates are rotated by rerotate to match the image. |
1056 | | static void MarkAndDeleteImageParts(const FCOORD &rerotate, ColPartitionGrid *part_grid, |
1057 | 0 | ColPartition_LIST *image_parts, Image image_pix) { |
1058 | 0 | if (image_pix == nullptr) { |
1059 | 0 | return; |
1060 | 0 | } |
1061 | 0 | int imageheight = pixGetHeight(image_pix); |
1062 | 0 | ColPartition_IT part_it(image_parts); |
1063 | 0 | for (; !part_it.empty(); part_it.forward()) { |
1064 | 0 | ColPartition *part = part_it.extract(); |
1065 | 0 | TBOX part_box = part->bounding_box(); |
1066 | 0 | BlobRegionType type = part->blob_type(); |
1067 | 0 | if (!ScanForOverlappingText(part_grid, &part_box) || type == BRT_RECTIMAGE || |
1068 | 0 | type == BRT_POLYIMAGE) { |
1069 | | // Mark the box on the image. |
1070 | | // All coords need to be rotated to match the image. |
1071 | 0 | part_box.rotate(rerotate); |
1072 | 0 | int left = part_box.left(); |
1073 | 0 | int top = part_box.top(); |
1074 | 0 | pixRasterop(image_pix, left, imageheight - top, part_box.width(), part_box.height(), PIX_SET, |
1075 | 0 | nullptr, 0, 0); |
1076 | 0 | } |
1077 | 0 | DeletePartition(part); |
1078 | 0 | } |
1079 | 0 | } |
1080 | | |
1081 | | // Locates all the image partitions in the part_grid, that were found by a |
1082 | | // previous call to FindImagePartitions, marks them in the image_mask, |
1083 | | // removes them from the grid, and deletes them. This makes it possible to |
1084 | | // call FindImagePartitions again to produce less broken-up and less |
1085 | | // overlapping image partitions. |
1086 | | // rerotation specifies how to rotate the partition coords to match |
1087 | | // the image_mask, since this function is used after orientation correction. |
1088 | | void ImageFind::TransferImagePartsToImageMask(const FCOORD &rerotation, ColPartitionGrid *part_grid, |
1089 | 0 | Image image_mask) { |
1090 | | // Extract the noise parts from the grid and put them on a temporary list. |
1091 | 0 | ColPartition_LIST parts_list; |
1092 | 0 | ColPartition_IT part_it(&parts_list); |
1093 | 0 | ColPartitionGridSearch gsearch(part_grid); |
1094 | 0 | gsearch.StartFullSearch(); |
1095 | 0 | ColPartition *part; |
1096 | 0 | while ((part = gsearch.NextFullSearch()) != nullptr) { |
1097 | 0 | BlobRegionType type = part->blob_type(); |
1098 | 0 | if (type == BRT_NOISE || type == BRT_RECTIMAGE || type == BRT_POLYIMAGE) { |
1099 | 0 | part_it.add_after_then_move(part); |
1100 | 0 | gsearch.RemoveBBox(); |
1101 | 0 | } |
1102 | 0 | } |
1103 | | // Render listed noise partitions to the image mask. |
1104 | 0 | MarkAndDeleteImageParts(rerotation, part_grid, &parts_list, image_mask); |
1105 | 0 | } |
1106 | | |
1107 | | // Removes and deletes all image partitions that are too small to be worth |
1108 | | // keeping. We have to do this as a separate phase after creating the image |
1109 | | // partitions as the small images are needed to join the larger ones together. |
1110 | 0 | static void DeleteSmallImages(ColPartitionGrid *part_grid) { |
1111 | 0 | if (part_grid != nullptr) { |
1112 | 0 | return; |
1113 | 0 | } |
1114 | 0 | ColPartitionGridSearch gsearch(part_grid); |
1115 | 0 | gsearch.StartFullSearch(); |
1116 | 0 | ColPartition *part; |
1117 | 0 | while ((part = gsearch.NextFullSearch()) != nullptr) { |
1118 | | // Only delete rectangular images, since if it became a poly image, it |
1119 | | // is more evidence that it is somehow important. |
1120 | 0 | if (part->blob_type() == BRT_RECTIMAGE) { |
1121 | 0 | const TBOX &part_box = part->bounding_box(); |
1122 | 0 | if (part_box.width() < kMinImageFindSize || part_box.height() < kMinImageFindSize) { |
1123 | | // It is too small to keep. Just make it disappear. |
1124 | 0 | gsearch.RemoveBBox(); |
1125 | 0 | DeletePartition(part); |
1126 | 0 | } |
1127 | 0 | } |
1128 | 0 | } |
1129 | 0 | } |
1130 | | |
1131 | | // Runs a CC analysis on the image_pix mask image, and creates |
1132 | | // image partitions from them, cutting out strong text, and merging with |
1133 | | // nearby image regions such that they don't interfere with text. |
1134 | | // Rotation and rerotation specify how to rotate image coords to match |
1135 | | // the blob and partition coords and back again. |
1136 | | // The input/output part_grid owns all the created partitions, and |
1137 | | // the partitions own all the fake blobs that belong in the partitions. |
1138 | | // Since the other blobs in the other partitions will be owned by the block, |
1139 | | // ColPartitionGrid::ReTypeBlobs must be called afterwards to fix this |
1140 | | // situation and collect the image blobs. |
1141 | | void ImageFind::FindImagePartitions(Image image_pix, const FCOORD &rotation, |
1142 | | const FCOORD &rerotation, TO_BLOCK *block, TabFind *tab_grid, |
1143 | | DebugPixa *pixa_debug, ColPartitionGrid *part_grid, |
1144 | 0 | ColPartition_LIST *big_parts) { |
1145 | 0 | int imageheight = pixGetHeight(image_pix); |
1146 | 0 | Boxa *boxa; |
1147 | 0 | Pixa *pixa; |
1148 | 0 | ConnCompAndRectangularize(image_pix, pixa_debug, &boxa, &pixa); |
1149 | | // Iterate the connected components in the image regions mask. |
1150 | 0 | int nboxes = 0; |
1151 | 0 | if (boxa != nullptr && pixa != nullptr) { |
1152 | 0 | nboxes = boxaGetCount(boxa); |
1153 | 0 | } |
1154 | 0 | for (int i = 0; i < nboxes; ++i) { |
1155 | 0 | l_int32 x, y, width, height; |
1156 | 0 | boxaGetBoxGeometry(boxa, i, &x, &y, &width, &height); |
1157 | 0 | Image pix = pixaGetPix(pixa, i, L_CLONE); |
1158 | 0 | TBOX im_box(x, imageheight - y - height, x + width, imageheight - y); |
1159 | 0 | im_box.rotate(rotation); // Now matches all partitions and blobs. |
1160 | 0 | ColPartitionGridSearch rectsearch(part_grid); |
1161 | 0 | rectsearch.SetUniqueMode(true); |
1162 | 0 | ColPartition_LIST part_list; |
1163 | 0 | DivideImageIntoParts(im_box, rotation, rerotation, pix, &rectsearch, &part_list); |
1164 | 0 | if (textord_tabfind_show_images && pixa_debug != nullptr) { |
1165 | 0 | pixa_debug->AddPix(pix, "ImageComponent"); |
1166 | 0 | tprintf("Component has %d parts\n", part_list.length()); |
1167 | 0 | } |
1168 | 0 | pix.destroy(); |
1169 | 0 | if (!part_list.empty()) { |
1170 | 0 | ColPartition_IT part_it(&part_list); |
1171 | 0 | if (part_list.singleton()) { |
1172 | | // We didn't have to chop it into a polygon to fit around text, so |
1173 | | // try expanding it to merge fragmented image parts, as long as it |
1174 | | // doesn't touch strong text. |
1175 | 0 | ColPartition *part = part_it.extract(); |
1176 | 0 | TBOX text_box(im_box); |
1177 | 0 | MaximalImageBoundingBox(part_grid, &text_box); |
1178 | 0 | while (ExpandImageIntoParts(text_box, &rectsearch, part_grid, &part)) { |
1179 | 0 | ; |
1180 | 0 | } |
1181 | 0 | part_it.set_to_list(&part_list); |
1182 | 0 | part_it.add_after_then_move(part); |
1183 | 0 | im_box = part->bounding_box(); |
1184 | 0 | } |
1185 | 0 | EliminateWeakParts(im_box, part_grid, big_parts, &part_list); |
1186 | | // Iterate the part_list and put the parts into the grid. |
1187 | 0 | for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) { |
1188 | 0 | ColPartition *image_part = part_it.extract(); |
1189 | 0 | im_box = image_part->bounding_box(); |
1190 | 0 | part_grid->InsertBBox(true, true, image_part); |
1191 | 0 | if (!part_it.at_last()) { |
1192 | 0 | ColPartition *neighbour = part_it.data_relative(1); |
1193 | 0 | image_part->AddPartner(false, neighbour); |
1194 | 0 | neighbour->AddPartner(true, image_part); |
1195 | 0 | } |
1196 | 0 | } |
1197 | 0 | } |
1198 | 0 | } |
1199 | 0 | boxaDestroy(&boxa); |
1200 | 0 | pixaDestroy(&pixa); |
1201 | 0 | DeleteSmallImages(part_grid); |
1202 | | #ifndef GRAPHICS_DISABLED |
1203 | | if (textord_tabfind_show_images) { |
1204 | | ScrollView *images_win_ = part_grid->MakeWindow(1000, 400, "With Images"); |
1205 | | part_grid->DisplayBoxes(images_win_); |
1206 | | } |
1207 | | #endif |
1208 | 0 | } |
1209 | | |
1210 | | } // namespace tesseract. |