/src/leptonica/src/pageseg.c
Line | Count | Source (jump to first uncovered line) |
1 | | /*====================================================================* |
2 | | - Copyright (C) 2001 Leptonica. All rights reserved. |
3 | | - |
4 | | - Redistribution and use in source and binary forms, with or without |
5 | | - modification, are permitted provided that the following conditions |
6 | | - are met: |
7 | | - 1. Redistributions of source code must retain the above copyright |
8 | | - notice, this list of conditions and the following disclaimer. |
9 | | - 2. Redistributions in binary form must reproduce the above |
10 | | - copyright notice, this list of conditions and the following |
11 | | - disclaimer in the documentation and/or other materials |
12 | | - provided with the distribution. |
13 | | - |
14 | | - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
15 | | - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
16 | | - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
17 | | - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY |
18 | | - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
19 | | - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
20 | | - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
21 | | - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
22 | | - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
23 | | - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
24 | | - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
25 | | *====================================================================*/ |
26 | | |
27 | | /*! |
28 | | * \file pageseg.c |
29 | | * <pre> |
30 | | * |
31 | | * Top level page segmentation |
32 | | * l_int32 pixGetRegionsBinary() |
33 | | * |
34 | | * Halftone region extraction |
35 | | * PIX *pixGenHalftoneMask() **Deprecated wrapper** |
36 | | * PIX *pixGenerateHalftoneMask() |
37 | | * |
38 | | * Textline extraction |
39 | | * PIX *pixGenTextlineMask() |
40 | | * |
41 | | * Textblock extraction |
42 | | * PIX *pixGenTextblockMask() |
43 | | * |
44 | | * Location and extraction of page foreground; cleaning pages |
45 | | * PIX *pixCropImage() |
46 | | * static l_int32 pixMaxCompAfterVClosing() |
47 | | * static l_int32 pixFindPageInsideBlackBorder() |
48 | | * static PIX *pixRescaleForCropping() |
49 | | * PIX *pixCleanImage() |
50 | | * BOX *pixFindPageForeground() |
51 | | * |
52 | | * Extraction of characters from image with only text |
53 | | * l_int32 pixSplitIntoCharacters() |
54 | | * BOXA *pixSplitComponentWithProfile() |
55 | | * |
56 | | * Extraction of lines of text |
57 | | * PIXA *pixExtractTextlines() |
58 | | * PIXA *pixExtractRawTextlines() |
59 | | * |
60 | | * How many text columns |
61 | | * l_int32 pixCountTextColumns() |
62 | | * |
63 | | * Decision: text vs photo |
64 | | * l_int32 pixDecideIfText() |
65 | | * l_int32 pixFindThreshFgExtent() |
66 | | * |
67 | | * Decision: table vs text |
68 | | * l_int32 pixDecideIfTable() |
69 | | * Pix *pixPrepare1bpp() |
70 | | * |
71 | | * Estimate the grayscale background value |
72 | | * l_int32 pixEstimateBackground() |
73 | | * |
74 | | * Largest white or black rectangles in an image |
75 | | * l_int32 pixFindLargeRectangles() |
76 | | * l_int32 pixFindLargestRectangle() |
77 | | * |
78 | | * Generate rectangle inside connected component |
79 | | * BOX *pixFindRectangleInCC() |
80 | | * |
81 | | * Automatic photoinvert for OCR |
82 | | * PIX *pixAutoPhotoinvert() |
83 | | * </pre> |
84 | | */ |
85 | | |
86 | | #ifdef HAVE_CONFIG_H |
87 | | #include <config_auto.h> |
88 | | #endif /* HAVE_CONFIG_H */ |
89 | | |
90 | | #include <math.h> |
91 | | #include "allheaders.h" |
92 | | #include "pix_internal.h" |
93 | | |
94 | | /* These functions are not intended to work on very low-res images */ |
95 | | static const l_int32 MinWidth = 100; |
96 | | static const l_int32 MinHeight = 100; |
97 | | |
98 | | static l_ok pixMaxCompAfterVClosing(PIX *pixs, BOX **pbox); |
99 | | static l_ok pixFindPageInsideBlackBorder(PIX *pixs, BOX **pbox); |
100 | | static PIX *pixRescaleForCropping(PIX *pixs, l_int32 w, l_int32 h, |
101 | | l_int32 lr_border, l_int32 tb_border, |
102 | | l_float32 maxwiden, PIX **ppixsc); |
103 | | |
104 | | /*------------------------------------------------------------------* |
105 | | * Top level page segmentation * |
106 | | *------------------------------------------------------------------*/ |
107 | | /*! |
108 | | * \brief pixGetRegionsBinary() |
109 | | * |
110 | | * \param[in] pixs 1 bpp, assumed to be 300 to 400 ppi |
111 | | * \param[out] ppixhm [optional] halftone mask |
112 | | * \param[out] ppixtm [optional] textline mask |
113 | | * \param[out] ppixtb [optional] textblock mask |
114 | | * \param[in] pixadb input for collecting debug pix; use NULL to skip |
115 | | * \return 0 if OK, 1 on error |
116 | | * |
117 | | * <pre> |
118 | | * Notes: |
119 | | * (1) It is best to deskew the image before segmenting. |
120 | | * (2) Passing in %pixadb enables debug output. |
121 | | * </pre> |
122 | | */ |
123 | | l_ok |
124 | | pixGetRegionsBinary(PIX *pixs, |
125 | | PIX **ppixhm, |
126 | | PIX **ppixtm, |
127 | | PIX **ppixtb, |
128 | | PIXA *pixadb) |
129 | 0 | { |
130 | 0 | l_int32 w, h, htfound, tlfound; |
131 | 0 | PIX *pixr, *pix1, *pix2; |
132 | 0 | PIX *pixtext; /* text pixels only */ |
133 | 0 | PIX *pixhm2; /* halftone mask; 2x reduction */ |
134 | 0 | PIX *pixhm; /* halftone mask; */ |
135 | 0 | PIX *pixtm2; /* textline mask; 2x reduction */ |
136 | 0 | PIX *pixtm; /* textline mask */ |
137 | 0 | PIX *pixvws; /* vertical white space mask */ |
138 | 0 | PIX *pixtb2; /* textblock mask; 2x reduction */ |
139 | 0 | PIX *pixtbf2; /* textblock mask; 2x reduction; small comps filtered */ |
140 | 0 | PIX *pixtb; /* textblock mask */ |
141 | |
|
142 | 0 | if (ppixhm) *ppixhm = NULL; |
143 | 0 | if (ppixtm) *ppixtm = NULL; |
144 | 0 | if (ppixtb) *ppixtb = NULL; |
145 | 0 | if (!pixs || pixGetDepth(pixs) != 1) |
146 | 0 | return ERROR_INT("pixs undefined or not 1 bpp", __func__, 1); |
147 | 0 | pixGetDimensions(pixs, &w, &h, NULL); |
148 | 0 | if (w < MinWidth || h < MinHeight) { |
149 | 0 | L_ERROR("pix too small: w = %d, h = %d\n", __func__, w, h); |
150 | 0 | return 1; |
151 | 0 | } |
152 | | |
153 | | /* 2x reduce, to 150 -200 ppi */ |
154 | 0 | pixr = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0); |
155 | 0 | if (pixadb) pixaAddPix(pixadb, pixr, L_COPY); |
156 | | |
157 | | /* Get the halftone mask */ |
158 | 0 | pixhm2 = pixGenerateHalftoneMask(pixr, &pixtext, &htfound, pixadb); |
159 | | |
160 | | /* Get the textline mask from the text pixels */ |
161 | 0 | pixtm2 = pixGenTextlineMask(pixtext, &pixvws, &tlfound, pixadb); |
162 | | |
163 | | /* Get the textblock mask from the textline mask */ |
164 | 0 | pixtb2 = pixGenTextblockMask(pixtm2, pixvws, pixadb); |
165 | 0 | pixDestroy(&pixr); |
166 | 0 | pixDestroy(&pixtext); |
167 | 0 | pixDestroy(&pixvws); |
168 | | |
169 | | /* Remove small components from the mask, where a small |
170 | | * component is defined as one with both width and height < 60 */ |
171 | 0 | pixtbf2 = NULL; |
172 | 0 | if (pixtb2) { |
173 | 0 | pixtbf2 = pixSelectBySize(pixtb2, 60, 60, 4, L_SELECT_IF_EITHER, |
174 | 0 | L_SELECT_IF_GTE, NULL); |
175 | 0 | pixDestroy(&pixtb2); |
176 | 0 | if (pixadb) pixaAddPix(pixadb, pixtbf2, L_COPY); |
177 | 0 | } |
178 | | |
179 | | /* Expand all masks to full resolution, and do filling or |
180 | | * small dilations for better coverage. */ |
181 | 0 | pixhm = pixExpandReplicate(pixhm2, 2); |
182 | 0 | pix1 = pixSeedfillBinary(NULL, pixhm, pixs, 8); |
183 | 0 | pixOr(pixhm, pixhm, pix1); |
184 | 0 | pixDestroy(&pixhm2); |
185 | 0 | pixDestroy(&pix1); |
186 | 0 | if (pixadb) pixaAddPix(pixadb, pixhm, L_COPY); |
187 | |
|
188 | 0 | pix1 = pixExpandReplicate(pixtm2, 2); |
189 | 0 | pixtm = pixDilateBrick(NULL, pix1, 3, 3); |
190 | 0 | pixDestroy(&pixtm2); |
191 | 0 | pixDestroy(&pix1); |
192 | 0 | if (pixadb) pixaAddPix(pixadb, pixtm, L_COPY); |
193 | |
|
194 | 0 | if (pixtbf2) { |
195 | 0 | pix1 = pixExpandReplicate(pixtbf2, 2); |
196 | 0 | pixtb = pixDilateBrick(NULL, pix1, 3, 3); |
197 | 0 | pixDestroy(&pixtbf2); |
198 | 0 | pixDestroy(&pix1); |
199 | 0 | if (pixadb) pixaAddPix(pixadb, pixtb, L_COPY); |
200 | 0 | } else { |
201 | 0 | pixtb = pixCreateTemplate(pixs); /* empty mask */ |
202 | 0 | } |
203 | | |
204 | | /* Debug: identify objects that are neither text nor halftone image */ |
205 | 0 | if (pixadb) { |
206 | 0 | pix1 = pixSubtract(NULL, pixs, pixtm); /* remove text pixels */ |
207 | 0 | pix2 = pixSubtract(NULL, pix1, pixhm); /* remove halftone pixels */ |
208 | 0 | pixaAddPix(pixadb, pix2, L_INSERT); |
209 | 0 | pixDestroy(&pix1); |
210 | 0 | } |
211 | | |
212 | | /* Debug: display textline components with random colors */ |
213 | 0 | if (pixadb) { |
214 | 0 | l_int32 w, h; |
215 | 0 | BOXA *boxa; |
216 | 0 | PIXA *pixa; |
217 | 0 | boxa = pixConnComp(pixtm, &pixa, 8); |
218 | 0 | pixGetDimensions(pixtm, &w, &h, NULL); |
219 | 0 | pix1 = pixaDisplayRandomCmap(pixa, w, h); |
220 | 0 | pixcmapResetColor(pixGetColormap(pix1), 0, 255, 255, 255); |
221 | 0 | pixaAddPix(pixadb, pix1, L_INSERT); |
222 | 0 | pixaDestroy(&pixa); |
223 | 0 | boxaDestroy(&boxa); |
224 | 0 | } |
225 | | |
226 | | /* Debug: identify the outlines of each textblock */ |
227 | 0 | if (pixadb) { |
228 | 0 | PIXCMAP *cmap; |
229 | 0 | PTAA *ptaa; |
230 | 0 | ptaa = pixGetOuterBordersPtaa(pixtb); |
231 | 0 | lept_mkdir("lept/pageseg"); |
232 | 0 | ptaaWriteDebug("/tmp/lept/pageseg/tb_outlines.ptaa", ptaa, 1); |
233 | 0 | pix1 = pixRenderRandomCmapPtaa(pixtb, ptaa, 1, 16, 1); |
234 | 0 | cmap = pixGetColormap(pix1); |
235 | 0 | pixcmapResetColor(cmap, 0, 130, 130, 130); |
236 | 0 | pixaAddPix(pixadb, pix1, L_INSERT); |
237 | 0 | ptaaDestroy(&ptaa); |
238 | 0 | } |
239 | | |
240 | | /* Debug: get b.b. for all mask components */ |
241 | 0 | if (pixadb) { |
242 | 0 | BOXA *bahm, *batm, *batb; |
243 | 0 | bahm = pixConnComp(pixhm, NULL, 4); |
244 | 0 | batm = pixConnComp(pixtm, NULL, 4); |
245 | 0 | batb = pixConnComp(pixtb, NULL, 4); |
246 | 0 | boxaWriteDebug("/tmp/lept/pageseg/htmask.boxa", bahm); |
247 | 0 | boxaWriteDebug("/tmp/lept/pageseg/textmask.boxa", batm); |
248 | 0 | boxaWriteDebug("/tmp/lept/pageseg/textblock.boxa", batb); |
249 | 0 | boxaDestroy(&bahm); |
250 | 0 | boxaDestroy(&batm); |
251 | 0 | boxaDestroy(&batb); |
252 | 0 | } |
253 | 0 | if (pixadb) { |
254 | 0 | pixaConvertToPdf(pixadb, 0, 1.0, 0, 0, "Debug page segmentation", |
255 | 0 | "/tmp/lept/pageseg/debug.pdf"); |
256 | 0 | L_INFO("Writing debug pdf to /tmp/lept/pageseg/debug.pdf\n", __func__); |
257 | 0 | } |
258 | |
|
259 | 0 | if (ppixhm) |
260 | 0 | *ppixhm = pixhm; |
261 | 0 | else |
262 | 0 | pixDestroy(&pixhm); |
263 | 0 | if (ppixtm) |
264 | 0 | *ppixtm = pixtm; |
265 | 0 | else |
266 | 0 | pixDestroy(&pixtm); |
267 | 0 | if (ppixtb) |
268 | 0 | *ppixtb = pixtb; |
269 | 0 | else |
270 | 0 | pixDestroy(&pixtb); |
271 | |
|
272 | 0 | return 0; |
273 | 0 | } |
274 | | |
275 | | |
276 | | /*------------------------------------------------------------------* |
277 | | * Halftone region extraction * |
278 | | *------------------------------------------------------------------*/ |
279 | | /*! |
280 | | * \brief pixGenHalftoneMask() |
281 | | * |
282 | | * <pre> |
283 | | * Deprecated: |
284 | | * This wrapper avoids an ABI change with tesseract 3.0.4. |
285 | | * It should be removed when we no longer need to support 3.0.4. |
286 | | * The debug parameter is ignored (assumed 0). |
287 | | * </pre> |
288 | | */ |
289 | | PIX * |
290 | | pixGenHalftoneMask(PIX *pixs, |
291 | | PIX **ppixtext, |
292 | | l_int32 *phtfound, |
293 | | l_int32 debug) |
294 | 0 | { |
295 | 0 | return pixGenerateHalftoneMask(pixs, ppixtext, phtfound, NULL); |
296 | 0 | } |
297 | | |
298 | | |
299 | | /*! |
300 | | * \brief pixGenerateHalftoneMask() |
301 | | * |
302 | | * \param[in] pixs 1 bpp, assumed to be 150 to 200 ppi |
303 | | * \param[out] ppixtext [optional] text part of pixs |
304 | | * \param[out] phtfound [optional] 1 if the mask is not empty |
305 | | * \param[in] pixadb input for collecting debug pix; use NULL to skip |
306 | | * \return pixd halftone mask, or NULL on error |
307 | | * |
308 | | * <pre> |
309 | | * Notes: |
310 | | * (1) This is not intended to work on small thumbnails. The |
311 | | * dimensions of pixs must be at least MinWidth x MinHeight. |
312 | | * </pre> |
313 | | */ |
314 | | PIX * |
315 | | pixGenerateHalftoneMask(PIX *pixs, |
316 | | PIX **ppixtext, |
317 | | l_int32 *phtfound, |
318 | | PIXA *pixadb) |
319 | 0 | { |
320 | 0 | l_int32 w, h, empty; |
321 | 0 | PIX *pix1, *pix2, *pixhs, *pixhm, *pixd; |
322 | |
|
323 | 0 | if (ppixtext) *ppixtext = NULL; |
324 | 0 | if (phtfound) *phtfound = 0; |
325 | 0 | if (!pixs || pixGetDepth(pixs) != 1) |
326 | 0 | return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", __func__, NULL); |
327 | 0 | pixGetDimensions(pixs, &w, &h, NULL); |
328 | 0 | if (w < MinWidth || h < MinHeight) { |
329 | 0 | L_ERROR("pix too small: w = %d, h = %d\n", __func__, w, h); |
330 | 0 | return NULL; |
331 | 0 | } |
332 | | |
333 | | /* Compute seed for halftone parts at 8x reduction */ |
334 | 0 | pix1 = pixReduceRankBinaryCascade(pixs, 4, 4, 0, 0); |
335 | 0 | pix2 = pixOpenBrick(NULL, pix1, 5, 5); |
336 | 0 | pixhs = pixExpandReplicate(pix2, 4); /* back to 2x reduction */ |
337 | 0 | pixDestroy(&pix1); |
338 | 0 | pixDestroy(&pix2); |
339 | 0 | if (pixadb) pixaAddPix(pixadb, pixhs, L_COPY); |
340 | | |
341 | | /* Compute mask for connected regions */ |
342 | 0 | pixhm = pixCloseSafeBrick(NULL, pixs, 4, 4); |
343 | 0 | if (pixadb) pixaAddPix(pixadb, pixhm, L_COPY); |
344 | | |
345 | | /* Fill seed into mask to get halftone mask */ |
346 | 0 | pixd = pixSeedfillBinary(NULL, pixhs, pixhm, 4); |
347 | 0 | if (pixadb) pixaAddPix(pixadb, pixd, L_COPY); |
348 | |
|
349 | | #if 0 |
350 | | pixOpenBrick(pixd, pixd, 9, 9); |
351 | | #endif |
352 | | |
353 | | /* Check if mask is empty */ |
354 | 0 | pixZero(pixd, &empty); |
355 | 0 | if (phtfound && !empty) |
356 | 0 | *phtfound = 1; |
357 | | |
358 | | /* Optionally, get all pixels that are not under the halftone mask */ |
359 | 0 | if (ppixtext) { |
360 | 0 | if (empty) |
361 | 0 | *ppixtext = pixCopy(NULL, pixs); |
362 | 0 | else |
363 | 0 | *ppixtext = pixSubtract(NULL, pixs, pixd); |
364 | 0 | if (pixadb) pixaAddPix(pixadb, *ppixtext, L_COPY); |
365 | 0 | } |
366 | |
|
367 | 0 | pixDestroy(&pixhs); |
368 | 0 | pixDestroy(&pixhm); |
369 | 0 | return pixd; |
370 | 0 | } |
371 | | |
372 | | |
373 | | /*------------------------------------------------------------------* |
374 | | * Textline extraction * |
375 | | *------------------------------------------------------------------*/ |
376 | | /*! |
377 | | * \brief pixGenTextlineMask() |
378 | | * |
379 | | * \param[in] pixs 1 bpp, assumed to be 150 to 200 ppi |
380 | | * \param[out] ppixvws vertical whitespace mask |
381 | | * \param[out] ptlfound [optional] 1 if the mask is not empty |
382 | | * \param[in] pixadb input for collecting debug pix; use NULL to skip |
383 | | * \return pixd textline mask, or NULL on error |
384 | | * |
385 | | * <pre> |
386 | | * Notes: |
387 | | * (1) The input pixs should be deskewed. |
388 | | * (2) pixs should have no halftone pixels. |
389 | | * (3) This is not intended to work on small thumbnails. The |
390 | | * dimensions of pixs must be at least MinWidth x MinHeight. |
391 | | * (4) Both the input image and the returned textline mask |
392 | | * are at the same resolution. |
393 | | * </pre> |
394 | | */ |
395 | | PIX * |
396 | | pixGenTextlineMask(PIX *pixs, |
397 | | PIX **ppixvws, |
398 | | l_int32 *ptlfound, |
399 | | PIXA *pixadb) |
400 | 0 | { |
401 | 0 | l_int32 w, h, empty; |
402 | 0 | PIX *pix1, *pix2, *pixvws, *pixd; |
403 | |
|
404 | 0 | if (ptlfound) *ptlfound = 0; |
405 | 0 | if (!ppixvws) |
406 | 0 | return (PIX *)ERROR_PTR("&pixvws not defined", __func__, NULL); |
407 | 0 | *ppixvws = NULL; |
408 | 0 | if (!pixs || pixGetDepth(pixs) != 1) |
409 | 0 | return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", __func__, NULL); |
410 | 0 | pixGetDimensions(pixs, &w, &h, NULL); |
411 | 0 | if (w < MinWidth || h < MinHeight) { |
412 | 0 | L_ERROR("pix too small: w = %d, h = %d\n", __func__, w, h); |
413 | 0 | return NULL; |
414 | 0 | } |
415 | | |
416 | | /* First we need a vertical whitespace mask. Invert the image. */ |
417 | 0 | pix1 = pixInvert(NULL, pixs); |
418 | | |
419 | | /* The whitespace mask will break textlines where there |
420 | | * is a large amount of white space below or above. |
421 | | * This can be prevented by identifying regions of the |
422 | | * inverted image that have large horizontal extent (bigger than |
423 | | * the separation between columns) and significant |
424 | | * vertical extent (bigger than the separation between |
425 | | * textlines), and subtracting this from the bg. */ |
426 | 0 | pix2 = pixMorphCompSequence(pix1, "o80.60", 0); |
427 | 0 | pixSubtract(pix1, pix1, pix2); |
428 | 0 | if (pixadb) pixaAddPix(pixadb, pix1, L_COPY); |
429 | 0 | pixDestroy(&pix2); |
430 | | |
431 | | /* Identify vertical whitespace by opening the remaining bg. |
432 | | * o5.1 removes thin vertical bg lines and o1.200 extracts |
433 | | * long vertical bg lines. */ |
434 | 0 | pixvws = pixMorphCompSequence(pix1, "o5.1 + o1.200", 0); |
435 | 0 | *ppixvws = pixvws; |
436 | 0 | if (pixadb) pixaAddPix(pixadb, pixvws, L_COPY); |
437 | 0 | pixDestroy(&pix1); |
438 | | |
439 | | /* Three steps to getting text line mask: |
440 | | * (1) close the characters and words in the textlines |
441 | | * (2) open the vertical whitespace corridors back up |
442 | | * (3) small opening to remove noise */ |
443 | 0 | pix1 = pixMorphSequence(pixs, "c30.1", 0); |
444 | 0 | if (pixadb) pixaAddPix(pixadb, pix1, L_COPY); |
445 | 0 | pixd = pixSubtract(NULL, pix1, pixvws); |
446 | 0 | pixOpenBrick(pixd, pixd, 3, 3); |
447 | 0 | if (pixadb) pixaAddPix(pixadb, pixd, L_COPY); |
448 | 0 | pixDestroy(&pix1); |
449 | | |
450 | | /* Check if text line mask is empty */ |
451 | 0 | if (ptlfound) { |
452 | 0 | pixZero(pixd, &empty); |
453 | 0 | if (!empty) |
454 | 0 | *ptlfound = 1; |
455 | 0 | } |
456 | |
|
457 | 0 | return pixd; |
458 | 0 | } |
459 | | |
460 | | |
461 | | /*------------------------------------------------------------------* |
462 | | * Textblock extraction * |
463 | | *------------------------------------------------------------------*/ |
464 | | /*! |
465 | | * \brief pixGenTextblockMask() |
466 | | * |
467 | | * \param[in] pixs 1 bpp, textline mask, assumed to be 150 to 200 ppi |
468 | | * \param[in] pixvws vertical white space mask |
469 | | * \param[in] pixadb input for collecting debug pix; use NULL to skip |
470 | | * \return pixd textblock mask, or NULL if empty or on error |
471 | | * |
472 | | * <pre> |
473 | | * Notes: |
474 | | * (1) Both the input masks (textline and vertical white space) and |
475 | | * the returned textblock mask are at the same resolution. |
476 | | * (2) This is not intended to work on small thumbnails. The |
477 | | * dimensions of pixs must be at least MinWidth x MinHeight. |
478 | | * (3) The result is somewhat noisy, in that small "blocks" of |
479 | | * text may be included. These can be removed by post-processing, |
480 | | * using, e.g., |
481 | | * pixSelectBySize(pix, 60, 60, 4, L_SELECT_IF_EITHER, |
482 | | * L_SELECT_IF_GTE, NULL); |
483 | | * </pre> |
484 | | */ |
485 | | PIX * |
486 | | pixGenTextblockMask(PIX *pixs, |
487 | | PIX *pixvws, |
488 | | PIXA *pixadb) |
489 | 0 | { |
490 | 0 | l_int32 w, h, empty; |
491 | 0 | PIX *pix1, *pix2, *pix3, *pixd; |
492 | |
|
493 | 0 | if (!pixs || pixGetDepth(pixs) != 1) |
494 | 0 | return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", __func__, NULL); |
495 | 0 | pixGetDimensions(pixs, &w, &h, NULL); |
496 | 0 | if (w < MinWidth || h < MinHeight) { |
497 | 0 | L_ERROR("pix too small: w = %d, h = %d\n", __func__, w, h); |
498 | 0 | return NULL; |
499 | 0 | } |
500 | 0 | if (!pixvws) |
501 | 0 | return (PIX *)ERROR_PTR("pixvws not defined", __func__, NULL); |
502 | | |
503 | | /* Join pixels vertically to make a textblock mask */ |
504 | 0 | pix1 = pixMorphSequence(pixs, "c1.10 + o4.1", 0); |
505 | 0 | pixZero(pix1, &empty); |
506 | 0 | if (empty) { |
507 | 0 | pixDestroy(&pix1); |
508 | 0 | L_INFO("no fg pixels in textblock mask\n", __func__); |
509 | 0 | return NULL; |
510 | 0 | } |
511 | 0 | if (pixadb) pixaAddPix(pixadb, pix1, L_COPY); |
512 | | |
513 | | /* Solidify the textblock mask and remove noise: |
514 | | * (1) For each cc, close the blocks and dilate slightly |
515 | | * to form a solid mask. |
516 | | * (2) Small horizontal closing between components. |
517 | | * (3) Open the white space between columns, again. |
518 | | * (4) Remove small components. */ |
519 | 0 | pix2 = pixMorphSequenceByComponent(pix1, "c30.30 + d3.3", 8, 0, 0, NULL); |
520 | 0 | pixCloseSafeBrick(pix2, pix2, 10, 1); |
521 | 0 | if (pixadb) pixaAddPix(pixadb, pix2, L_COPY); |
522 | 0 | pix3 = pixSubtract(NULL, pix2, pixvws); |
523 | 0 | if (pixadb) pixaAddPix(pixadb, pix3, L_COPY); |
524 | 0 | pixd = pixSelectBySize(pix3, 25, 5, 8, L_SELECT_IF_BOTH, |
525 | 0 | L_SELECT_IF_GTE, NULL); |
526 | 0 | if (pixadb) pixaAddPix(pixadb, pixd, L_COPY); |
527 | |
|
528 | 0 | pixDestroy(&pix1); |
529 | 0 | pixDestroy(&pix2); |
530 | 0 | pixDestroy(&pix3); |
531 | 0 | return pixd; |
532 | 0 | } |
533 | | |
534 | | |
535 | | /*------------------------------------------------------------------* |
536 | | * Location and extraction of page foreground; cleaning pages * |
537 | | *------------------------------------------------------------------*/ |
538 | | /*! |
539 | | * \brief pixCropImage() |
540 | | * |
541 | | * \param[in] pixs full resolution (any type or depth) |
542 | | * \param[in] lr_clear full res pixels cleared at left and right sides |
543 | | * \param[in] tb_clear full res pixels cleared at top and bottom sides |
544 | | * \param[in] edgeclean parameter for removing edge noise (-1 to 15) |
545 | | * default = 0 (no removal); |
546 | | * 15 is maximally aggressive for random noise |
547 | | * -1 for aggressively removing side noise |
548 | | * -2 to extract page embedded in black background |
549 | | * \param[in] lr_border full res final "added" pixels on left and right |
550 | | * \param[in] tb_border full res final "added" pixels on top and bottom |
551 | | * \param[in] maxwiden max fractional horizontal stretch allowed |
552 | | * \param[in] printwiden 0 to skip, 1 for 8.5x11, 2 for A4 |
553 | | * \param[in] *debugfile [optional] usually is NULL |
554 | | * \param[out] *pcropbox [optional] crop box at full resolution |
555 | | * \return cropped pix, or NULL on error |
556 | | * |
557 | | * <pre> |
558 | | * Notes: |
559 | | * (1) This binarizes and crops a page image. |
560 | | * (a) Binarizes if necessary and does 2x reduction. |
561 | | * (b) Clears near the border by %lr_clear and %tb_clear full |
562 | | * resolution pixels. (This is done at 2x reduction.) |
563 | | * (c) If %edgeclean > 0, it removes isolated sets of pixels, |
564 | | * using a close/open operation of size %edgeclean + 1. |
565 | | * If %edgeclean == -1, it uses a large vertical morphological |
566 | | * close/open and the extraction of either the largest |
567 | | * resulting connected component (or the largest two components |
568 | | * if the page has 2 columns), to eliminate noise on left |
569 | | * and right sides. |
570 | | * If %edgeclean == -2, it extracts the page region from a |
571 | | * possible exterior black surround. |
572 | | * (d) Find the bounding box of remaining fg pixels and scales |
573 | | * the box up 2x back to full resolution. |
574 | | * (e) Crops the binarized image to the bounding box. |
575 | | * (f) Slightly thickens long horizontal lines. |
576 | | * (g) Rescales this image to fit within the original image, |
577 | | * less lr_border on the sides and tb_border above and below. |
578 | | * The rescaling is done isomorphically with a (possible) |
579 | | * optional additional widening. Suggest the additional |
580 | | * widening factor not exceed 1.15. |
581 | | * (h) Optionally do additional horizontal stretch if needed to |
582 | | * better fill a printed page. Default is 0 to skip; 1 to |
583 | | * widen for 8.5x11 page, 2 for A4 page. |
584 | | * Note that (b) - (d) are done at 2x reduction for efficiency. |
585 | | * (2) Side clearing must not exceed 1/6 of the dimension on that side. |
586 | | * (3) The clear and border pixel parameters must be >= 0. |
587 | | * (4) The "clear" parameters act on the input image, whereas the |
588 | | * "border" parameters act to give a white border to the final |
589 | | * image. They are not literally added, because the input and final |
590 | | * images are the same size. If the resulting images are to be |
591 | | * printed, it is useful to have border pixel parameters of at |
592 | | * least 60 at 300 ppi, to avoid losing content at the edges. |
593 | | * (5) This is not intended to work on small thumbnails. The |
594 | | * dimensions of pixs must be at least MinWidth x MinHeight. |
595 | | * (6) Step (f) above helps with orthographically-produced music notation, |
596 | | * where the horizontal staff lines can be very thin and thus |
597 | | * subject to printer alias. |
598 | | * (7) With orthographically-produced (as opposed to scanned) images, |
599 | | * there is no scan noise, so you should skip noise removal |
600 | | * by setting %edgeclean = 0. |
601 | | * (8) If you are not concerned with printing on paper, use the |
602 | | * default value 0 for %printwiden. Widening only takes place |
603 | | * if the ratio h/w exceeds the specified paper size by 3%, |
604 | | * and the horizontal scaling factor will not exceed 1.25. |
605 | | * </pre> |
606 | | */ |
607 | | PIX * |
608 | | pixCropImage(PIX *pixs, |
609 | | l_int32 lr_clear, |
610 | | l_int32 tb_clear, |
611 | | l_int32 edgeclean, |
612 | | l_int32 lr_border, |
613 | | l_int32 tb_border, |
614 | | l_float32 maxwiden, |
615 | | l_int32 printwiden, |
616 | | const char *debugfile, |
617 | | BOX **pcropbox) |
618 | 0 | { |
619 | 0 | char cmd[64]; |
620 | 0 | l_int32 w, h, val, ret; |
621 | 0 | l_float32 r1, r2; |
622 | 0 | BOX *box1, *box2; |
623 | 0 | PIX *pix1, *pix2, *pix3, *pix4; |
624 | 0 | PIXA *pixa1; |
625 | |
|
626 | 0 | if (pcropbox) *pcropbox = NULL; |
627 | 0 | if (!pixs) |
628 | 0 | return (PIX *)ERROR_PTR("pixs not defined", __func__, NULL); |
629 | 0 | if (edgeclean > 15) { |
630 | 0 | L_WARNING("edgeclean > 15; setting to 15\n", __func__); |
631 | 0 | edgeclean = 15; |
632 | 0 | } |
633 | 0 | if (edgeclean < -1) { |
634 | 0 | lept_stderr("Using edgeclean = -2\n"); |
635 | 0 | edgeclean = -2; |
636 | 0 | } |
637 | 0 | pixGetDimensions(pixs, &w, &h, NULL); |
638 | 0 | if (w < MinWidth || h < MinHeight) { |
639 | 0 | L_ERROR("pix too small: w = %d, h = %d\n", __func__, w, h); |
640 | 0 | return NULL; |
641 | 0 | } |
642 | 0 | if (lr_clear < 0) lr_clear = 0; |
643 | 0 | if (tb_clear < 0) tb_clear = 0; |
644 | 0 | if (lr_border < 0) lr_border = 0; |
645 | 0 | if (tb_border < 0) tb_border = 0; |
646 | 0 | if (lr_clear > w / 6 || tb_clear > h / 6) { |
647 | 0 | L_ERROR("lr_clear or tb_clear too large; must be <= %d and %d\n", |
648 | 0 | __func__, w / 6, h / 6); |
649 | 0 | return NULL; |
650 | 0 | } |
651 | 0 | if (maxwiden > 1.15) |
652 | 0 | L_WARNING("maxwiden = %f > 1.15; suggest between 1.0 and 1.15\n", |
653 | 0 | __func__, maxwiden); |
654 | 0 | if (printwiden < 0 || printwiden > 2) printwiden = 0; |
655 | 0 | pixa1 = (debugfile) ? pixaCreate(5) : NULL; |
656 | 0 | if (pixa1) pixaAddPix(pixa1, pixs, L_COPY); |
657 | | |
658 | | /* Binarize if necessary and 2x reduction */ |
659 | 0 | pix1 = pixBackgroundNormTo1MinMax(pixs, 1, 1); |
660 | 0 | pix2 = pixReduceRankBinary2(pix1, 2, NULL); |
661 | | |
662 | | /* Clear out pixels near the image edges */ |
663 | 0 | pixSetOrClearBorder(pix2, lr_clear / 2, lr_clear / 2, tb_clear / 2, |
664 | 0 | tb_clear / 2, PIX_CLR); |
665 | 0 | if (pixa1) pixaAddPix(pixa1, pixScale(pix2, 2.0, 2.0), L_INSERT); |
666 | | |
667 | | /* Choose one of three methods for extracting foreground pixels: |
668 | | * (1) Include all foreground pixels |
669 | | * (2) Do a morphological close/open to remove noise throughout |
670 | | * the image before finding a b.b. for remaining f.g. pixels |
671 | | * (3) Do a large vertical closing and choose the largest (by area) |
672 | | * component to avoid foreground noise on left and right sides */ |
673 | 0 | if (edgeclean == 0) { |
674 | 0 | ret = pixClipToForeground(pix2, NULL, &box1); |
675 | 0 | } else if (edgeclean > 0) { |
676 | 0 | val = edgeclean + 1; |
677 | 0 | snprintf(cmd, 64, "c%d.%d + o%d.%d", val, val, val, val); |
678 | 0 | pix3 = pixMorphSequence(pix2, cmd, 0); |
679 | 0 | ret = pixClipToForeground(pix3, NULL, &box1); |
680 | 0 | pixDestroy(&pix3); |
681 | 0 | } else if (edgeclean == -1) { |
682 | 0 | ret = pixMaxCompAfterVClosing(pix2, &box1); |
683 | 0 | } else { /* edgeclean == -2 */ |
684 | 0 | ret = pixFindPageInsideBlackBorder(pix2, &box1); |
685 | 0 | } |
686 | 0 | pixDestroy(&pix2); |
687 | 0 | if (ret) { |
688 | 0 | L_ERROR("no returned b.b. for foreground\n", __func__); |
689 | 0 | boxDestroy(&box1); |
690 | 0 | pixDestroy(&pix1); |
691 | 0 | pixaDestroy(&pixa1); |
692 | 0 | return NULL; |
693 | 0 | } |
694 | | |
695 | | /* Transform to full resolution */ |
696 | 0 | box2 = boxTransform(box1, 0, 0, 2.0, 2.0); /* full res */ |
697 | 0 | boxDestroy(&box1); |
698 | 0 | if (pixa1) { |
699 | 0 | pix2 = pixCopy(NULL, pix1); |
700 | 0 | pixRenderBoxArb(pix2, box2, 5, 255, 0, 0); |
701 | 0 | pixaAddPix(pixa1, pix2, L_INSERT); |
702 | 0 | } |
703 | | |
704 | | /* Grab the foreground region */ |
705 | 0 | pix2 = pixClipRectangle(pix1, box2, NULL); |
706 | 0 | pixDestroy(&pix1); |
707 | | |
708 | | /* Slightly thicken long horizontal lines. This prevents loss of |
709 | | * printed thin music staff lines due to aliasing. */ |
710 | 0 | pix3 = pixMorphSequence(pix2, "o80.1 + d1.2", 0); |
711 | 0 | pixOr(pix2, pix2, pix3); |
712 | 0 | pixDestroy(&pix3); |
713 | | |
714 | | /* Rescale the fg and paste into the input-sized image */ |
715 | 0 | pix3 = pixRescaleForCropping(pix2, w, h, lr_border, tb_border, |
716 | 0 | maxwiden, NULL); |
717 | 0 | pixDestroy(&pix2); |
718 | 0 | if (pixa1) { |
719 | 0 | pix2 = pixCopy(NULL, pix3); |
720 | 0 | pixaAddPix(pixa1, pix2, L_INSERT); |
721 | 0 | } |
722 | | |
723 | | /* Optionally widen image if possible, for printing on 8.5 x 11 inch |
724 | | * or A4 paper. Specifically, widen the image if the h/w asperity |
725 | | * ratio of the input image exceeds that of the selected paper by |
726 | | * more than 3%. Do not widen by more than 20%. */ |
727 | 0 | r1 = (l_float32)h / (l_float32)w; |
728 | 0 | r2 = 0.0; /* for default case */ |
729 | 0 | if (printwiden == 1) /* standard */ |
730 | 0 | r2 = r1 / 1.294; |
731 | 0 | else if (printwiden == 2) /* A4 */ |
732 | 0 | r2 = r1 / 1.414; |
733 | 0 | if (r2 > 1.03) { |
734 | 0 | r2 = L_MIN(r2, 1.20); |
735 | 0 | lept_stderr("oversize h/w ratio by factor %6.3f\n", r2); |
736 | 0 | pix4 = pixScale(pix3, r2, 1.0); |
737 | 0 | } else { |
738 | 0 | pix4 = pixClone(pix3); |
739 | 0 | } |
740 | 0 | pixDestroy(&pix3); |
741 | |
|
742 | 0 | if (pcropbox) |
743 | 0 | *pcropbox = box2; |
744 | 0 | else |
745 | 0 | boxDestroy(&box2); |
746 | 0 | if (pixa1) { |
747 | 0 | pixaAddPix(pixa1, pix4, L_COPY); |
748 | 0 | lept_stderr("Writing debug file: %s\n", debugfile); |
749 | 0 | pixaConvertToPdf(pixa1, 0, 1.0, L_DEFAULT_ENCODE, 0, NULL, debugfile); |
750 | 0 | pixaDestroy(&pixa1); |
751 | 0 | } |
752 | 0 | return pix4; |
753 | 0 | } |
754 | | |
755 | | |
756 | | /*! |
757 | | * \brief pixMaxCompAfterVClosing() |
758 | | * |
759 | | * \param[in] pixs 1 bpp (input at 2x reduction) |
760 | | * \param[out] **pbox main region at input resolution (2x reduction) |
761 | | * \return 0 if OK, 1 on error |
762 | | * |
763 | | * <pre> |
764 | | * Notes: |
765 | | * (1) This removes foreground noise along left and right edges, |
766 | | * returning a bounding box for the remaining foreground pixels |
767 | | * at the input resolution. |
768 | | * (2) The input %pixs should be at a resolution 100 - 150 ppi. |
769 | | * (3) It does two 2x level1 rank binary reductions, followed |
770 | | * by a large vertical close/open, with a very small horizontal |
771 | | * close/oopen, and then a 4x expansion back to the input resolution. |
772 | | * (4) To work properly with 2-column layout, if the largest and |
773 | | * second-largest regions are comparable in size, both are included. |
774 | | * (5) This is used as an option to pixCropImage(), when given |
775 | | * an %edgecrop parameter of -1. |
776 | | * </pre> |
777 | | */ |
778 | | static l_ok |
779 | | pixMaxCompAfterVClosing(PIX *pixs, |
780 | | BOX **pbox) |
781 | 0 | { |
782 | 0 | l_int32 w1, h1, w2, h2, n, empty; |
783 | 0 | BOX *box1, *box2; |
784 | 0 | BOXA *boxa1, *boxa2; |
785 | 0 | PIX *pix1; |
786 | |
|
787 | 0 | if (!pbox) |
788 | 0 | return ERROR_INT("pbox not defined", __func__, 1); |
789 | 0 | *pbox = NULL; |
790 | 0 | if (!pixs || pixGetDepth(pixs) != 1) |
791 | 0 | return ERROR_INT("pixs undefined or not 1 bpp", __func__, 1); |
792 | | |
793 | | /* Strong vertical closing */ |
794 | 0 | pix1 = pixMorphSequence(pixs, "r11 + c3.80 + o3.80 + x4", 0); |
795 | 0 | pixZero(pix1, &empty); |
796 | 0 | if (empty) { |
797 | 0 | pixDestroy(&pix1); |
798 | 0 | return ERROR_INT("pix1 is empty", __func__, 1); |
799 | 0 | } |
800 | | |
801 | | /* Find the two c.c. with largest area. If they are not comparable |
802 | | * in area, return the bounding box of the largest; otherwise, |
803 | | * return the bounding box of both regions. */ |
804 | 0 | boxa1 = pixConnCompBB(pix1, 8); |
805 | 0 | pixDestroy(&pix1); |
806 | 0 | boxa2 = boxaSort(boxa1, L_SORT_BY_AREA, L_SORT_DECREASING, NULL); |
807 | 0 | if ((n = boxaGetCount(boxa2)) == 1) { |
808 | 0 | *pbox = boxaGetBox(boxa2, 0, L_COPY); |
809 | 0 | } else { /* 2 or more */ |
810 | 0 | box1 = boxaGetBox(boxa2, 0, L_COPY); |
811 | 0 | box2 = boxaGetBox(boxa2, 1, L_COPY); |
812 | 0 | boxGetGeometry(box1, NULL, NULL, &w1, &h1); |
813 | 0 | boxGetGeometry(box2, NULL, NULL, &w2, &h2); |
814 | 0 | if (((l_float32)(w2 * h2) / (l_float32)(w1 * h1)) > 0.7) { |
815 | 0 | *pbox = boxBoundingRegion(box1, box2); |
816 | 0 | boxDestroy(&box1); |
817 | 0 | } else { |
818 | 0 | *pbox = box1; |
819 | 0 | } |
820 | 0 | boxDestroy(&box2); |
821 | 0 | } |
822 | 0 | boxaDestroy(&boxa1); |
823 | 0 | boxaDestroy(&boxa2); |
824 | 0 | return 0; |
825 | 0 | } |
826 | | |
827 | | |
828 | | /*! |
829 | | * \brief pixFindPageInsideBlackBorder() |
830 | | * |
831 | | * \param[in] pixs 1 bpp (input at 2x reduction) |
832 | | * \param[out] **pbox page region at input resolution (2x reduction) |
833 | | * \return 0 if OK, 1 on error |
834 | | * |
835 | | * <pre> |
836 | | * Notes: |
837 | | * (1) This extracts the page region from the image, returning a |
838 | | * bounding box for the remaining foreground pixels. It is designed |
839 | | * to work when the page is within a fairly solid black border. |
840 | | * (2) It returns a bounding box for the page region at the input res. |
841 | | * (3) The input %pixs is expected to be at a resolution 100 - 150 ppi. |
842 | | * (4) This is used as an option to pixCropImage(), when given an |
843 | | * %edgecrop parameter of -2. |
844 | | * </pre> |
845 | | */ |
846 | | static l_ok |
847 | | pixFindPageInsideBlackBorder(PIX *pixs, |
848 | | BOX **pbox) |
849 | 0 | { |
850 | 0 | l_int32 empty, x, y; |
851 | 0 | BOX *box1, *box2, *box3; |
852 | 0 | BOXA *boxa1, *boxa2; |
853 | 0 | PIX *pix1, *pix2, *pix3; |
854 | |
|
855 | 0 | if (!pbox) |
856 | 0 | return ERROR_INT("pbox not defined", __func__, 1); |
857 | 0 | *pbox = NULL; |
858 | 0 | if (!pixs || pixGetDepth(pixs) != 1) |
859 | 0 | return ERROR_INT("pixs undefined or not 1 bpp", __func__, 1); |
860 | | |
861 | | /* Reduce 4x and remove some remaining small foreground */ |
862 | 0 | pix1 = pixMorphSequence(pixs, "r22 + c5.5 + o7.7", 0); |
863 | 0 | pixZero(pix1, &empty); |
864 | 0 | if (empty) { |
865 | 0 | pixDestroy(&pix1); |
866 | 0 | return ERROR_INT("pix1 is empty", __func__, 1); |
867 | 0 | } |
868 | | |
869 | | /* Photoinvert image and Find the c.c. with largest area. */ |
870 | 0 | pixInvert(pix1, pix1); |
871 | 0 | pix2 = pixMorphSequence(pix1, "c11.11 + o11.11", 0); |
872 | 0 | pixDestroy(&pix1); |
873 | 0 | boxa1 = pixConnCompBB(pix2, 8); |
874 | 0 | pixDestroy(&pix2); |
875 | 0 | boxa2 = boxaSort(boxa1, L_SORT_BY_AREA, L_SORT_DECREASING, NULL); |
876 | 0 | box1 = boxaGetBox(boxa2, 0, L_COPY); /* largest by area */ |
877 | 0 | boxAdjustSides(box1, box1, 5, -5, 5, -5); |
878 | 0 | box2 = boxTransform(box1, 0, 0, 4.0, 4.0); |
879 | | |
880 | | /* Crop this page from the original image and find the foreground */ |
881 | 0 | pix3 = pixClipRectangle(pixs, box2, NULL); |
882 | 0 | pixClipToForeground(pix3, NULL, &box3); |
883 | 0 | pixDestroy(&pix3); |
884 | 0 | boxGetGeometry(box2, &x, &y, NULL, NULL); |
885 | 0 | *pbox = boxTransform(box3, x, y, 1.0, 1.0); |
886 | 0 | boxaDestroy(&boxa1); |
887 | 0 | boxaDestroy(&boxa2); |
888 | 0 | boxDestroy(&box1); |
889 | 0 | boxDestroy(&box2); |
890 | 0 | boxDestroy(&box3); |
891 | 0 | return 0; |
892 | 0 | } |
893 | | |
894 | | |
895 | | /*! |
896 | | * \brief pixRescaleForCropping() |
897 | | * |
898 | | * \param[in] pixs 1 bpp |
899 | | * \param[in] w width of output lmage |
900 | | * \param[in] h height of output lmage |
901 | | * \param[in] lr_border cleared final border pixels on left and right |
902 | | * \param[in] tb_border cleared final border pixels on top and bottom |
903 | | * \param[in] maxwiden max fractional horizontal stretch allowed; >= 1.0 |
904 | | * \param[out] *ppixsc [optional] rescaled foreground region |
905 | | * \return pixd output image, or NULL on error |
906 | | * |
907 | | * <pre> |
908 | | * Notes: |
909 | | * (1) This rescales %pixs to fit maximally within an image of |
910 | | * size (w x h), under two conditions: |
911 | | * (a) the final image has cleared border regions given by the |
912 | | * input parameters %lr_border and %tb_border, and |
913 | | * (b) the input image is first isotropically scaled to fit |
914 | | * maximally within the allowed final region, and then further |
915 | | * maxiximally widened, subject to the constraints of the |
916 | | * cleared border and the %maxwiden parameter. |
917 | | * (2) The cleared border pixel parameters must be >= 0. |
918 | | * (3) If there is extra horizontal stretching by a factor |
919 | | * %maxwiden larger than about 1.15, the appearance may be |
920 | | * unpleasingly distorted; hence the suggestion not to exceed it. |
921 | | * </pre> |
922 | | */ |
923 | | static PIX * |
924 | | pixRescaleForCropping(PIX *pixs, |
925 | | l_int32 w, |
926 | | l_int32 h, |
927 | | l_int32 lr_border, |
928 | | l_int32 tb_border, |
929 | | l_float32 maxwiden, |
930 | | PIX **ppixsc) |
931 | 0 | { |
932 | 0 | static l_int32 first_time = TRUE; |
933 | 0 | l_int32 wi, hi, wmax, hmax, wn, wf, hf, xf; |
934 | 0 | l_float32 ratio, scaleh, scalew, scalewid; |
935 | 0 | PIX *pix1, *pixd; |
936 | |
|
937 | 0 | if (ppixsc) *ppixsc = NULL; |
938 | 0 | if (!pixs || pixGetDepth(pixs) != 1) |
939 | 0 | return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", __func__, NULL); |
940 | 0 | if (lr_border < 0) lr_border = 0; |
941 | 0 | if (tb_border < 0) tb_border = 0; |
942 | 0 | maxwiden = L_MAX(1.0, maxwiden); |
943 | 0 | if (maxwiden > 1.15) |
944 | 0 | L_WARNING("maxwiden = %f > 1.15; suggest between 1.0 and 1.15\n", |
945 | 0 | __func__, maxwiden); |
946 | | |
947 | | /* Rescale the foreground region. |
948 | | * First, decide if scaling is to full width or full height. |
949 | | * If scaling to full height, determine how much additional |
950 | | * width widening is possible, given the maxwiden constraint. |
951 | | * If scaling to full width, both width and height are |
952 | | * scaled isotropically. Scaling is done so that the resulting |
953 | | * foreground is maximally widened, so it can be horizontally |
954 | | * centered in an image of size (w x h), less %lr_border |
955 | | * on each side. */ |
956 | 0 | pixGetDimensions(pixs, &wi, &hi, NULL); |
957 | 0 | wmax = w - 2 * lr_border; |
958 | 0 | hmax = h - 2 * tb_border; |
959 | 0 | ratio = (l_float32)(wmax * hi) / (l_float32)(hmax * wi); |
960 | 0 | if (ratio >= 1.0) { /* width can be widened after isotropic scaling */ |
961 | 0 | scaleh = (l_float32)hmax / (l_float32)hi; |
962 | 0 | wn = scaleh * wi; /* scaled but not widened */ |
963 | 0 | scalewid = L_MIN(maxwiden, (l_float32)wmax / (l_float32)wn); |
964 | 0 | scalew = scaleh * scalewid; |
965 | 0 | wf = scalew * wi; |
966 | 0 | hf = hmax; /* scale to full height */ |
967 | 0 | pix1 = pixScale(pixs, scalew, scaleh); |
968 | 0 | if (first_time == TRUE) { |
969 | 0 | lept_stderr("Width stretched by factor %5.3f\n", scalewid); |
970 | 0 | first_time = FALSE; |
971 | 0 | } |
972 | 0 | xf = (w - wf) / 2.0; |
973 | 0 | } else { /* width cannot be widened after isotropic scaling */ |
974 | 0 | scalew = (l_float32)wmax / (l_float32)wi; |
975 | 0 | pix1 = pixScale(pixs, scalew, scalew); |
976 | 0 | wf = wmax; /* scale to full width */ |
977 | 0 | hf = scalew * hi; /* no extra vertical stretching allowed */ |
978 | 0 | xf = lr_border; |
979 | 0 | } |
980 | | |
981 | | /* Paste it, horizontally centered and vertically placed as |
982 | | * high as allowed (by %tb_border) into the final page image. */ |
983 | 0 | pixd = pixCreate(w, h, 1); |
984 | 0 | pixRasterop(pixd, xf, tb_border, wf, hf, PIX_SRC, pix1, 0, 0); |
985 | |
|
986 | 0 | if (ppixsc) |
987 | 0 | *ppixsc = pix1; |
988 | 0 | else |
989 | 0 | pixDestroy(&pix1); |
990 | 0 | return pixd; |
991 | 0 | } |
992 | | |
993 | | |
994 | | /*! |
995 | | * \brief pixCleanImage() |
996 | | * |
997 | | * \param[in] pixs full resolution (any type or depth) |
998 | | * \param[in] contrast vary contrast: 1 = lightest; 10 = darkest; |
999 | | * suggest 1 unless light features are being lost |
1000 | | * \param[in] rotation cw by 90 degrees: {0,1,2,3} represent |
1001 | | * 0, 90, 180 and 270 degree cw rotations |
1002 | | * \param[in] scale 1 (no scaling) or 2 (2x upscaling) |
1003 | | * \param[in] opensize opening size of structuring element for noise |
1004 | | * removal: {0 or 1 to skip; 2, 3 for opening} |
1005 | | * \return cleaned pix, or NULL on error |
1006 | | * |
1007 | | * <pre> |
1008 | | * Notes: |
1009 | | * (1) This deskews, optionally rotates and darkens, cleans background |
1010 | | * to white, binarizes and optionally removes small noise. |
1011 | | * (2) For color and grayscale input, local background normalization is |
1012 | | * done to 200, and a threshold of 180 sets the maximum foreground |
1013 | | * value in the normalized image. |
1014 | | * (3) The %contrast parameter adjusts the binarization to avoid losing |
1015 | | * lighter input pixels. Contrast is increased as %contrast increases |
1016 | | * from 1 to 10. |
1017 | | * (4) The %scale parameter controls the thresholding to 1 bpp. Two values: |
1018 | | * 1 = threshold |
1019 | | * 2 = linear interpolated 2x upscaling before threshold. |
1020 | | * (5) The #opensize parameter is the size of a square SEL used with |
1021 | | * opening to remove small speckle noise. Allowed open sizes are 2,3. |
1022 | | * If this is to be used, try 2 before 3. |
1023 | | * (6) This does the image processing for cleanTo1bppFilesToPdf() and |
1024 | | * prog/cleanpdf.c. |
1025 | | * </pre> |
1026 | | */ |
1027 | | PIX * |
1028 | | pixCleanImage(PIX *pixs, |
1029 | | l_int32 contrast, |
1030 | | l_int32 rotation, |
1031 | | l_int32 scale, |
1032 | | l_int32 opensize) |
1033 | 0 | { |
1034 | 0 | char sequence[32]; |
1035 | 0 | PIX *pix1, *pix2, *pix3, *pix4, *pix5; |
1036 | |
|
1037 | 0 | if (!pixs) |
1038 | 0 | return (PIX *)ERROR_PTR("pixs not defined", __func__, NULL); |
1039 | 0 | if (rotation < 0 || rotation > 3) { |
1040 | 0 | L_ERROR("invalid rotation = %d; rotation must be in {0,1,2,3}\n", |
1041 | 0 | __func__, rotation); |
1042 | 0 | return NULL; |
1043 | 0 | } |
1044 | 0 | if (contrast < 1 || contrast > 10) { |
1045 | 0 | L_ERROR("invalid contrast = %d; contrast must be in [1...10]\n", |
1046 | 0 | __func__, contrast); |
1047 | 0 | return NULL; |
1048 | 0 | } |
1049 | 0 | if (scale != 1 && scale != 2) { |
1050 | 0 | L_ERROR("invalid scale = %d; scale must be 1 or 2\n", |
1051 | 0 | __func__, opensize); |
1052 | 0 | return NULL; |
1053 | 0 | } |
1054 | 0 | if (opensize > 3) { |
1055 | 0 | L_ERROR("invalid opensize = %d; opensize must be <= 3\n", |
1056 | 0 | __func__, opensize); |
1057 | 0 | return NULL; |
1058 | 0 | } |
1059 | | |
1060 | 0 | if (pixGetDepth(pixs) == 1) { |
1061 | 0 | if (rotation > 0) |
1062 | 0 | pix1 = pixRotateOrth(pixs, rotation); |
1063 | 0 | else |
1064 | 0 | pix1 = pixClone(pixs); |
1065 | 0 | pix2 = pixFindSkewAndDeskew(pix1, 2, NULL, NULL); |
1066 | 0 | if (scale == 2) |
1067 | 0 | pix4 = pixExpandBinaryReplicate(pix2, 2, 2); |
1068 | 0 | else /* scale == 1 */ |
1069 | 0 | pix4 = pixClone(pix2); |
1070 | 0 | } else { |
1071 | 0 | pix1 = pixConvertTo8MinMax(pixs); |
1072 | 0 | if (rotation > 0) |
1073 | 0 | pix2 = pixRotateOrth(pix1, rotation); |
1074 | 0 | else |
1075 | 0 | pix2 = pixClone(pix1); |
1076 | 0 | pix3 = pixFindSkewAndDeskew(pix2, 2, NULL, NULL); |
1077 | 0 | pix4 = pixBackgroundNormTo1MinMax(pix3, contrast, scale); |
1078 | 0 | pixDestroy(&pix3); |
1079 | 0 | } |
1080 | |
|
1081 | 0 | if (opensize == 2 || opensize == 3) { |
1082 | 0 | snprintf(sequence, sizeof(sequence), "o%d.%d", opensize, opensize); |
1083 | 0 | pix5 = pixMorphSequence(pix4, sequence, 0); |
1084 | 0 | } else { |
1085 | 0 | pix5 = pixClone(pix4); |
1086 | 0 | } |
1087 | |
|
1088 | 0 | pixDestroy(&pix1); |
1089 | 0 | pixDestroy(&pix2); |
1090 | 0 | pixDestroy(&pix4); |
1091 | 0 | return pix5; |
1092 | 0 | } |
1093 | | |
1094 | | |
1095 | | /*! |
1096 | | * \brief pixFindPageForeground() |
1097 | | * |
1098 | | * \param[in] pixs full resolution (any type or depth) |
1099 | | * \param[in] threshold for binarization; typically about 128 |
1100 | | * \param[in] mindist min distance of text from border to allow |
1101 | | * cleaning near border; at 2x reduction, this |
1102 | | * should be larger than 50; typically about 70 |
1103 | | * \param[in] erasedist when conditions are satisfied, erase anything |
1104 | | * within this distance of the edge; |
1105 | | * typically 20-30 at 2x reduction |
1106 | | * \param[in] showmorph debug: set to a negative integer to show steps |
1107 | | * in generating masks; this is typically used |
1108 | | * for debugging region extraction |
1109 | | * \param[in] pixac debug: allocate outside and pass this in to |
1110 | | * accumulate results of each call to this function, |
1111 | | * which can be displayed in a mosaic or a pdf. |
1112 | | * \return box region including foreground, with some pixel noise |
1113 | | * removed, or NULL if not found |
1114 | | * |
1115 | | * <pre> |
1116 | | * Notes: |
1117 | | * (1) This doesn't simply crop to the fg. It attempts to remove |
1118 | | * pixel noise and junk at the edge of the image before cropping. |
1119 | | * The input %threshold is used if pixs is not 1 bpp. |
1120 | | * (2) This is not intended to work on small thumbnails. The |
1121 | | * dimensions of pixs must be at least MinWidth x MinHeight. |
1122 | | * (3) Debug: set showmorph to display the intermediate image in |
1123 | | * the morphological operations on this page. |
1124 | | * (4) Debug: to get pdf output of results when called repeatedly, |
1125 | | * call with an existing pixac, which will add an image of this page, |
1126 | | * with the fg outlined. If no foreground is found, there is |
1127 | | * no output for this page image. |
1128 | | * </pre> |
1129 | | */ |
1130 | | BOX * |
1131 | | pixFindPageForeground(PIX *pixs, |
1132 | | l_int32 threshold, |
1133 | | l_int32 mindist, |
1134 | | l_int32 erasedist, |
1135 | | l_int32 showmorph, |
1136 | | PIXAC *pixac) |
1137 | 0 | { |
1138 | 0 | l_int32 flag, nbox, intersects; |
1139 | 0 | l_int32 w, h, bx, by, bw, bh, left, right, top, bottom; |
1140 | 0 | PIX *pixb, *pixb2, *pixseed, *pixsf, *pixm, *pix1, *pixg2; |
1141 | 0 | BOX *box, *boxfg, *boxin, *boxd; |
1142 | 0 | BOXA *ba1, *ba2; |
1143 | |
|
1144 | 0 | if (!pixs) |
1145 | 0 | return (BOX *)ERROR_PTR("pixs not defined", __func__, NULL); |
1146 | 0 | pixGetDimensions(pixs, &w, &h, NULL); |
1147 | 0 | if (w < MinWidth || h < MinHeight) { |
1148 | 0 | L_ERROR("pix too small: w = %d, h = %d\n", __func__, w, h); |
1149 | 0 | return NULL; |
1150 | 0 | } |
1151 | | |
1152 | | /* Binarize, downscale by 0.5, remove the noise to generate a seed, |
1153 | | * and do a seedfill back from the seed into those 8-connected |
1154 | | * components of the binarized image for which there was at least |
1155 | | * one seed pixel. */ |
1156 | 0 | flag = (showmorph) ? 100 : 0; |
1157 | 0 | pixb = pixConvertTo1(pixs, threshold); |
1158 | 0 | pixb2 = pixScale(pixb, 0.5, 0.5); |
1159 | 0 | pixseed = pixMorphSequence(pixb2, "o1.2 + c9.9 + o3.3", flag); |
1160 | 0 | pix1 = pixMorphSequence(pixb2, "o50.1", 0); |
1161 | 0 | pixOr(pixseed, pixseed, pix1); |
1162 | 0 | pixDestroy(&pix1); |
1163 | 0 | pix1 = pixMorphSequence(pixb2, "o1.50", 0); |
1164 | 0 | pixOr(pixseed, pixseed, pix1); |
1165 | 0 | pixDestroy(&pix1); |
1166 | 0 | pixsf = pixSeedfillBinary(NULL, pixseed, pixb2, 8); |
1167 | 0 | pixm = pixRemoveBorderConnComps(pixsf, 8); |
1168 | | |
1169 | | /* Now, where is the main block of text? We want to remove noise near |
1170 | | * the edge of the image, but to do that, we have to be convinced that |
1171 | | * (1) there is noise and (2) it is far enough from the text block |
1172 | | * and close enough to the edge. For each edge, if the block |
1173 | | * is more than mindist from that edge, then clean 'erasedist' |
1174 | | * pixels from the edge. */ |
1175 | 0 | pix1 = pixMorphSequence(pixm, "c50.50", flag); |
1176 | 0 | ba1 = pixConnComp(pix1, NULL, 8); |
1177 | 0 | ba2 = boxaSort(ba1, L_SORT_BY_AREA, L_SORT_DECREASING, NULL); |
1178 | 0 | pixGetDimensions(pix1, &w, &h, NULL); |
1179 | 0 | nbox = boxaGetCount(ba2); |
1180 | 0 | if (nbox > 1) { |
1181 | 0 | box = boxaGetBox(ba2, 0, L_CLONE); |
1182 | 0 | boxGetGeometry(box, &bx, &by, &bw, &bh); |
1183 | 0 | left = (bx > mindist) ? erasedist : 0; |
1184 | 0 | right = (w - bx - bw > mindist) ? erasedist : 0; |
1185 | 0 | top = (by > mindist) ? erasedist : 0; |
1186 | 0 | bottom = (h - by - bh > mindist) ? erasedist : 0; |
1187 | 0 | pixSetOrClearBorder(pixm, left, right, top, bottom, PIX_CLR); |
1188 | 0 | boxDestroy(&box); |
1189 | 0 | } |
1190 | 0 | pixDestroy(&pix1); |
1191 | 0 | boxaDestroy(&ba1); |
1192 | 0 | boxaDestroy(&ba2); |
1193 | | |
1194 | | /* Locate the foreground region; don't bother cropping */ |
1195 | 0 | pixClipToForeground(pixm, NULL, &boxfg); |
1196 | | |
1197 | | /* Sanity check the fg region. Make sure it's not confined |
1198 | | * to a thin boundary on the left and right sides of the image, |
1199 | | * in which case it is likely to be noise. */ |
1200 | 0 | if (boxfg) { |
1201 | 0 | boxin = boxCreate(0.1 * w, 0, 0.8 * w, h); |
1202 | 0 | boxIntersects(boxfg, boxin, &intersects); |
1203 | 0 | boxDestroy(&boxin); |
1204 | 0 | if (!intersects) boxDestroy(&boxfg); |
1205 | 0 | } |
1206 | |
|
1207 | 0 | boxd = NULL; |
1208 | 0 | if (boxfg) { |
1209 | 0 | boxAdjustSides(boxfg, boxfg, -2, 2, -2, 2); /* tiny expansion */ |
1210 | 0 | boxd = boxTransform(boxfg, 0, 0, 2.0, 2.0); |
1211 | | |
1212 | | /* Save the debug image showing the box for this page */ |
1213 | 0 | if (pixac) { |
1214 | 0 | pixg2 = pixConvert1To4Cmap(pixb); |
1215 | 0 | pixRenderBoxArb(pixg2, boxd, 3, 255, 0, 0); |
1216 | 0 | pixacompAddPix(pixac, pixg2, IFF_DEFAULT); |
1217 | 0 | pixDestroy(&pixg2); |
1218 | 0 | } |
1219 | 0 | } |
1220 | |
|
1221 | 0 | pixDestroy(&pixb); |
1222 | 0 | pixDestroy(&pixb2); |
1223 | 0 | pixDestroy(&pixseed); |
1224 | 0 | pixDestroy(&pixsf); |
1225 | 0 | pixDestroy(&pixm); |
1226 | 0 | boxDestroy(&boxfg); |
1227 | 0 | return boxd; |
1228 | 0 | } |
1229 | | |
1230 | | |
1231 | | /*------------------------------------------------------------------* |
1232 | | * Extraction of characters from image with only text * |
1233 | | *------------------------------------------------------------------*/ |
1234 | | /*! |
1235 | | * \brief pixSplitIntoCharacters() |
1236 | | * |
1237 | | * \param[in] pixs 1 bpp, contains only deskewed text |
1238 | | * \param[in] minw min component width for initial filtering; typ. 4 |
1239 | | * \param[in] minh min component height for initial filtering; typ. 4 |
1240 | | * \param[out] pboxa [optional] character bounding boxes |
1241 | | * \param[out] ppixa [optional] character images |
1242 | | * \param[out] ppixdebug [optional] showing splittings |
1243 | | * |
1244 | | * \return 0 if OK, 1 on error |
1245 | | * |
1246 | | * <pre> |
1247 | | * Notes: |
1248 | | * (1) This is a simple function that attempts to find split points |
1249 | | * based on vertical pixel profiles. |
1250 | | * (2) It should be given an image that has an arbitrary number |
1251 | | * of text characters. |
1252 | | * (3) The returned pixa includes the boxes from which the |
1253 | | * (possibly split) components are extracted. |
1254 | | * </pre> |
1255 | | */ |
1256 | | l_ok |
1257 | | pixSplitIntoCharacters(PIX *pixs, |
1258 | | l_int32 minw, |
1259 | | l_int32 minh, |
1260 | | BOXA **pboxa, |
1261 | | PIXA **ppixa, |
1262 | | PIX **ppixdebug) |
1263 | 0 | { |
1264 | 0 | l_int32 ncomp, i, xoff, yoff; |
1265 | 0 | BOXA *boxa1, *boxa2, *boxat1, *boxat2, *boxad; |
1266 | 0 | BOXAA *baa; |
1267 | 0 | PIX *pix, *pix1, *pix2, *pixdb; |
1268 | 0 | PIXA *pixa1, *pixadb; |
1269 | |
|
1270 | 0 | if (pboxa) *pboxa = NULL; |
1271 | 0 | if (ppixa) *ppixa = NULL; |
1272 | 0 | if (ppixdebug) *ppixdebug = NULL; |
1273 | 0 | if (!pixs || pixGetDepth(pixs) != 1) |
1274 | 0 | return ERROR_INT("pixs not defined or not 1 bpp", __func__, 1); |
1275 | | |
1276 | | /* Remove the small stuff */ |
1277 | 0 | pix1 = pixSelectBySize(pixs, minw, minh, 8, L_SELECT_IF_BOTH, |
1278 | 0 | L_SELECT_IF_GT, NULL); |
1279 | | |
1280 | | /* Small vertical close for consolidation */ |
1281 | 0 | pix2 = pixMorphSequence(pix1, "c1.10", 0); |
1282 | 0 | pixDestroy(&pix1); |
1283 | | |
1284 | | /* Get the 8-connected components */ |
1285 | 0 | boxa1 = pixConnComp(pix2, &pixa1, 8); |
1286 | 0 | pixDestroy(&pix2); |
1287 | 0 | boxaDestroy(&boxa1); |
1288 | | |
1289 | | /* Split the components if obvious */ |
1290 | 0 | ncomp = pixaGetCount(pixa1); |
1291 | 0 | boxa2 = boxaCreate(ncomp); |
1292 | 0 | pixadb = (ppixdebug) ? pixaCreate(ncomp) : NULL; |
1293 | 0 | for (i = 0; i < ncomp; i++) { |
1294 | 0 | pix = pixaGetPix(pixa1, i, L_CLONE); |
1295 | 0 | if (ppixdebug) { |
1296 | 0 | boxat1 = pixSplitComponentWithProfile(pix, 10, 7, &pixdb); |
1297 | 0 | if (pixdb) |
1298 | 0 | pixaAddPix(pixadb, pixdb, L_INSERT); |
1299 | 0 | } else { |
1300 | 0 | boxat1 = pixSplitComponentWithProfile(pix, 10, 7, NULL); |
1301 | 0 | } |
1302 | 0 | pixaGetBoxGeometry(pixa1, i, &xoff, &yoff, NULL, NULL); |
1303 | 0 | boxat2 = boxaTransform(boxat1, xoff, yoff, 1.0, 1.0); |
1304 | 0 | boxaJoin(boxa2, boxat2, 0, -1); |
1305 | 0 | pixDestroy(&pix); |
1306 | 0 | boxaDestroy(&boxat1); |
1307 | 0 | boxaDestroy(&boxat2); |
1308 | 0 | } |
1309 | 0 | pixaDestroy(&pixa1); |
1310 | | |
1311 | | /* Generate the debug image */ |
1312 | 0 | if (ppixdebug) { |
1313 | 0 | if (pixaGetCount(pixadb) > 0) { |
1314 | 0 | *ppixdebug = pixaDisplayTiledInRows(pixadb, 32, 1500, |
1315 | 0 | 1.0, 0, 20, 1); |
1316 | 0 | } |
1317 | 0 | pixaDestroy(&pixadb); |
1318 | 0 | } |
1319 | | |
1320 | | /* Do a 2D sort on the bounding boxes, and flatten the result to 1D */ |
1321 | 0 | baa = boxaSort2d(boxa2, NULL, 0, 0, 5); |
1322 | 0 | boxad = boxaaFlattenToBoxa(baa, NULL, L_CLONE); |
1323 | 0 | boxaaDestroy(&baa); |
1324 | 0 | boxaDestroy(&boxa2); |
1325 | | |
1326 | | /* Optionally extract the pieces from the input image */ |
1327 | 0 | if (ppixa) |
1328 | 0 | *ppixa = pixClipRectangles(pixs, boxad); |
1329 | 0 | if (pboxa) |
1330 | 0 | *pboxa = boxad; |
1331 | 0 | else |
1332 | 0 | boxaDestroy(&boxad); |
1333 | 0 | return 0; |
1334 | 0 | } |
1335 | | |
1336 | | |
1337 | | /*! |
1338 | | * \brief pixSplitComponentWithProfile() |
1339 | | * |
1340 | | * \param[in] pixs 1 bpp, exactly one connected component |
1341 | | * \param[in] delta distance used in extrema finding in a numa; typ. 10 |
1342 | | * \param[in] mindel minimum required difference between profile |
1343 | | * minimum and profile values +2 and -2 away; typ. 7 |
1344 | | * \param[out] ppixdebug [optional] debug image of splitting |
1345 | | * \return boxa of c.c. after splitting, or NULL on error |
1346 | | * |
1347 | | * <pre> |
1348 | | * Notes: |
1349 | | * (1) This will split the most obvious cases of touching characters. |
1350 | | * The split points it is searching for are narrow and deep |
1351 | | * minimima in the vertical pixel projection profile, after a |
1352 | | * large vertical closing has been applied to the component. |
1353 | | * </pre> |
1354 | | */ |
1355 | | BOXA * |
1356 | | pixSplitComponentWithProfile(PIX *pixs, |
1357 | | l_int32 delta, |
1358 | | l_int32 mindel, |
1359 | | PIX **ppixdebug) |
1360 | 0 | { |
1361 | 0 | l_int32 w, h, n2, i, firstmin, xmin, xshift; |
1362 | 0 | l_int32 nmin, nleft, nright, nsplit, isplit, ncomp; |
1363 | 0 | l_int32 *array1, *array2; |
1364 | 0 | BOX *box; |
1365 | 0 | BOXA *boxad; |
1366 | 0 | NUMA *na1, *na2, *nasplit; |
1367 | 0 | PIX *pix1, *pixdb; |
1368 | |
|
1369 | 0 | if (ppixdebug) *ppixdebug = NULL; |
1370 | 0 | if (!pixs || pixGetDepth(pixs) != 1) |
1371 | 0 | return (BOXA *)ERROR_PTR("pixa undefined or not 1 bpp", __func__, NULL); |
1372 | 0 | pixGetDimensions(pixs, &w, &h, NULL); |
1373 | | |
1374 | | /* Closing to consolidate characters vertically */ |
1375 | 0 | pix1 = pixCloseSafeBrick(NULL, pixs, 1, 100); |
1376 | | |
1377 | | /* Get extrema of column projections */ |
1378 | 0 | boxad = boxaCreate(2); |
1379 | 0 | na1 = pixCountPixelsByColumn(pix1); /* w elements */ |
1380 | 0 | pixDestroy(&pix1); |
1381 | 0 | na2 = numaFindExtrema(na1, delta, NULL); |
1382 | 0 | n2 = numaGetCount(na2); |
1383 | 0 | if (n2 < 3) { /* no split possible */ |
1384 | 0 | box = boxCreate(0, 0, w, h); |
1385 | 0 | boxaAddBox(boxad, box, L_INSERT); |
1386 | 0 | numaDestroy(&na1); |
1387 | 0 | numaDestroy(&na2); |
1388 | 0 | return boxad; |
1389 | 0 | } |
1390 | | |
1391 | | /* Look for sufficiently deep and narrow minima. |
1392 | | * All minima of of interest must be surrounded by max on each |
1393 | | * side. firstmin is the index of first possible minimum. */ |
1394 | 0 | array1 = numaGetIArray(na1); |
1395 | 0 | array2 = numaGetIArray(na2); |
1396 | 0 | if (ppixdebug) numaWriteStderr(na2); |
1397 | 0 | firstmin = (array1[array2[0]] > array1[array2[1]]) ? 1 : 2; |
1398 | 0 | nasplit = numaCreate(n2); /* will hold split locations */ |
1399 | 0 | for (i = firstmin; i < n2 - 1; i+= 2) { |
1400 | 0 | xmin = array2[i]; |
1401 | 0 | nmin = array1[xmin]; |
1402 | 0 | if (xmin + 2 >= w) break; /* no more splits possible */ |
1403 | 0 | nleft = array1[xmin - 2]; |
1404 | 0 | nright = array1[xmin + 2]; |
1405 | 0 | if (ppixdebug) { |
1406 | 0 | lept_stderr( |
1407 | 0 | "Splitting: xmin = %d, w = %d; nl = %d, nmin = %d, nr = %d\n", |
1408 | 0 | xmin, w, nleft, nmin, nright); |
1409 | 0 | } |
1410 | 0 | if (nleft - nmin >= mindel && nright - nmin >= mindel) /* split */ |
1411 | 0 | numaAddNumber(nasplit, xmin); |
1412 | 0 | } |
1413 | 0 | nsplit = numaGetCount(nasplit); |
1414 | |
|
1415 | | #if 0 |
1416 | | if (ppixdebug && nsplit > 0) { |
1417 | | lept_mkdir("lept/split"); |
1418 | | gplotSimple1(na1, GPLOT_PNG, "/tmp/lept/split/split", NULL); |
1419 | | } |
1420 | | #endif |
1421 | |
|
1422 | 0 | numaDestroy(&na1); |
1423 | 0 | numaDestroy(&na2); |
1424 | 0 | LEPT_FREE(array1); |
1425 | 0 | LEPT_FREE(array2); |
1426 | |
|
1427 | 0 | if (nsplit == 0) { /* no splitting */ |
1428 | 0 | numaDestroy(&nasplit); |
1429 | 0 | box = boxCreate(0, 0, w, h); |
1430 | 0 | boxaAddBox(boxad, box, L_INSERT); |
1431 | 0 | return boxad; |
1432 | 0 | } |
1433 | | |
1434 | | /* Use split points to generate b.b. after splitting */ |
1435 | 0 | for (i = 0, xshift = 0; i < nsplit; i++) { |
1436 | 0 | numaGetIValue(nasplit, i, &isplit); |
1437 | 0 | box = boxCreate(xshift, 0, isplit - xshift, h); |
1438 | 0 | boxaAddBox(boxad, box, L_INSERT); |
1439 | 0 | xshift = isplit + 1; |
1440 | 0 | } |
1441 | 0 | box = boxCreate(xshift, 0, w - xshift, h); |
1442 | 0 | boxaAddBox(boxad, box, L_INSERT); |
1443 | 0 | numaDestroy(&nasplit); |
1444 | |
|
1445 | 0 | if (ppixdebug) { |
1446 | 0 | pixdb = pixConvertTo32(pixs); |
1447 | 0 | ncomp = boxaGetCount(boxad); |
1448 | 0 | for (i = 0; i < ncomp; i++) { |
1449 | 0 | box = boxaGetBox(boxad, i, L_CLONE); |
1450 | 0 | pixRenderBoxBlend(pixdb, box, 1, 255, 0, 0, 0.5); |
1451 | 0 | boxDestroy(&box); |
1452 | 0 | } |
1453 | 0 | *ppixdebug = pixdb; |
1454 | 0 | } |
1455 | |
|
1456 | 0 | return boxad; |
1457 | 0 | } |
1458 | | |
1459 | | |
1460 | | /*------------------------------------------------------------------* |
1461 | | * Extraction of lines of text * |
1462 | | *------------------------------------------------------------------*/ |
1463 | | /*! |
1464 | | * \brief pixExtractTextlines() |
1465 | | * |
1466 | | * \param[in] pixs any depth, assumed to have nearly horizontal text |
1467 | | * \param[in] maxw, maxh initial filtering: remove any components in pixs |
1468 | | * with components larger than maxw or maxh |
1469 | | * \param[in] minw, minh final filtering: remove extracted 'lines' |
1470 | | * with sizes smaller than minw or minh; use |
1471 | | * 0 for default. |
1472 | | * \param[in] adjw, adjh final adjustment of boxes representing each |
1473 | | * text line. If > 0, these increase the box |
1474 | | * size at each edge by this amount. |
1475 | | * \param[in] pixadb pixa for saving intermediate steps; NULL to omit |
1476 | | * \return pixa of textline images, including bounding boxes, or |
1477 | | * NULL on error |
1478 | | * |
1479 | | * <pre> |
1480 | | * Notes: |
1481 | | * (1) This function assumes that textline fragments have sufficient |
1482 | | * vertical separation and small enough skew so that a |
1483 | | * horizontal dilation sufficient to join words will not join |
1484 | | * textlines. It does not guarantee that horizontally adjacent |
1485 | | * textline fragments on the same line will be joined. |
1486 | | * (2) For images with multiple columns, it attempts to avoid joining |
1487 | | * textlines across the space between columns. If that is not |
1488 | | * a concern, you can also use pixExtractRawTextlines(), |
1489 | | * which will join them with alacrity. |
1490 | | * (3) This first removes components from pixs that are either |
1491 | | * wide (> %maxw) or tall (> %maxh). |
1492 | | * (4) A final filtering operation removes small components, such |
1493 | | * that width < %minw or height < %minh. |
1494 | | * (5) For reasonable accuracy, the resolution of pixs should be |
1495 | | * at least 100 ppi. For reasonable efficiency, the resolution |
1496 | | * should not exceed 600 ppi. |
1497 | | * (6) This can be used to determine if some region of a scanned |
1498 | | * image is horizontal text. |
1499 | | * (7) As an example, for a pix with resolution 300 ppi, a reasonable |
1500 | | * set of parameters is: |
1501 | | * pixExtractTextlines(pix, 150, 150, 36, 20, 5, 5, NULL); |
1502 | | * The defaults minw and minh for 300 ppi are about 36 and 20, |
1503 | | * so the same result is obtained with: |
1504 | | * pixExtractTextlines(pix, 150, 150, 0, 0, 5, 5, NULL); |
1505 | | * (8) The output pixa is composed of subimages, one for each textline, |
1506 | | * and the boxa in the pixa tells where in %pixs each textline goes. |
1507 | | * </pre> |
1508 | | */ |
1509 | | PIXA * |
1510 | | pixExtractTextlines(PIX *pixs, |
1511 | | l_int32 maxw, |
1512 | | l_int32 maxh, |
1513 | | l_int32 minw, |
1514 | | l_int32 minh, |
1515 | | l_int32 adjw, |
1516 | | l_int32 adjh, |
1517 | | PIXA *pixadb) |
1518 | 0 | { |
1519 | 0 | char buf[64]; |
1520 | 0 | l_int32 res, csize, empty; |
1521 | 0 | BOXA *boxa1, *boxa2, *boxa3; |
1522 | 0 | PIX *pix1, *pix2, *pix3; |
1523 | 0 | PIXA *pixa1, *pixa2, *pixa3; |
1524 | |
|
1525 | 0 | if (!pixs) |
1526 | 0 | return (PIXA *)ERROR_PTR("pixs not defined", __func__, NULL); |
1527 | | |
1528 | | /* Binarize carefully, if necessary */ |
1529 | 0 | if (pixGetDepth(pixs) > 1) { |
1530 | 0 | pix2 = pixConvertTo8(pixs, FALSE); |
1531 | 0 | pix3 = pixCleanBackgroundToWhite(pix2, NULL, NULL, 1.0, 70, 190); |
1532 | 0 | pix1 = pixThresholdToBinary(pix3, 150); |
1533 | 0 | pixDestroy(&pix2); |
1534 | 0 | pixDestroy(&pix3); |
1535 | 0 | } else { |
1536 | 0 | pix1 = pixClone(pixs); |
1537 | 0 | } |
1538 | 0 | pixZero(pix1, &empty); |
1539 | 0 | if (empty) { |
1540 | 0 | pixDestroy(&pix1); |
1541 | 0 | L_INFO("no fg pixels in input image\n", __func__); |
1542 | 0 | return NULL; |
1543 | 0 | } |
1544 | 0 | if (pixadb) pixaAddPix(pixadb, pix1, L_COPY); |
1545 | | |
1546 | | /* Remove any very tall or very wide connected components */ |
1547 | 0 | pix2 = pixSelectBySize(pix1, maxw, maxh, 8, L_SELECT_IF_BOTH, |
1548 | 0 | L_SELECT_IF_LT, NULL); |
1549 | 0 | if (pixadb) pixaAddPix(pixadb, pix2, L_COPY); |
1550 | 0 | pixDestroy(&pix1); |
1551 | | |
1552 | | /* Filter to solidify the text lines within the x-height region. |
1553 | | * The closing (csize) bridges gaps between words. The opening |
1554 | | * removes isolated bridges between textlines. */ |
1555 | 0 | if ((res = pixGetXRes(pixs)) == 0) { |
1556 | 0 | L_INFO("Resolution is not set: setting to 300 ppi\n", __func__); |
1557 | 0 | res = 300; |
1558 | 0 | } |
1559 | 0 | csize = L_MIN(120., 60.0 * res / 300.0); |
1560 | 0 | snprintf(buf, sizeof(buf), "c%d.1 + o%d.1", csize, csize / 3); |
1561 | 0 | pix3 = pixMorphCompSequence(pix2, buf, 0); |
1562 | 0 | if (pixadb) pixaAddPix(pixadb, pix3, L_COPY); |
1563 | | |
1564 | | /* Extract the connected components. These should be dilated lines */ |
1565 | 0 | boxa1 = pixConnComp(pix3, &pixa1, 4); |
1566 | 0 | if (pixadb) { |
1567 | 0 | pix1 = pixaDisplayRandomCmap(pixa1, 0, 0); |
1568 | 0 | pixcmapResetColor(pixGetColormap(pix1), 0, 255, 255, 255); |
1569 | 0 | pixaAddPix(pixadb, pix1, L_INSERT); |
1570 | 0 | } |
1571 | | |
1572 | | /* Set minw, minh if default is requested */ |
1573 | 0 | minw = (minw != 0) ? minw : (l_int32)(0.12 * res); |
1574 | 0 | minh = (minh != 0) ? minh : (l_int32)(0.07 * res); |
1575 | | |
1576 | | /* Remove line components that are too small */ |
1577 | 0 | pixa2 = pixaSelectBySize(pixa1, minw, minh, L_SELECT_IF_BOTH, |
1578 | 0 | L_SELECT_IF_GTE, NULL); |
1579 | 0 | if (pixadb) { |
1580 | 0 | pix1 = pixaDisplayRandomCmap(pixa2, 0, 0); |
1581 | 0 | pixcmapResetColor(pixGetColormap(pix1), 0, 255, 255, 255); |
1582 | 0 | pixaAddPix(pixadb, pix1, L_INSERT); |
1583 | 0 | pix1 = pixConvertTo32(pix2); |
1584 | 0 | pixRenderBoxaArb(pix1, pixa2->boxa, 2, 255, 0, 0); |
1585 | 0 | pixaAddPix(pixadb, pix1, L_INSERT); |
1586 | 0 | } |
1587 | | |
1588 | | /* Selectively AND with the version before dilation, and save */ |
1589 | 0 | boxa2 = pixaGetBoxa(pixa2, L_CLONE); |
1590 | 0 | boxa3 = boxaAdjustSides(boxa2, -adjw, adjw, -adjh, adjh); |
1591 | 0 | pixa3 = pixClipRectangles(pix2, boxa3); |
1592 | 0 | if (pixadb) { |
1593 | 0 | pix1 = pixaDisplayRandomCmap(pixa3, 0, 0); |
1594 | 0 | pixcmapResetColor(pixGetColormap(pix1), 0, 255, 255, 255); |
1595 | 0 | pixaAddPix(pixadb, pix1, L_INSERT); |
1596 | 0 | } |
1597 | |
|
1598 | 0 | pixDestroy(&pix2); |
1599 | 0 | pixDestroy(&pix3); |
1600 | 0 | pixaDestroy(&pixa1); |
1601 | 0 | pixaDestroy(&pixa2); |
1602 | 0 | boxaDestroy(&boxa1); |
1603 | 0 | boxaDestroy(&boxa2); |
1604 | 0 | boxaDestroy(&boxa3); |
1605 | 0 | return pixa3; |
1606 | 0 | } |
1607 | | |
1608 | | |
1609 | | /*! |
1610 | | * \brief pixExtractRawTextlines() |
1611 | | * |
1612 | | * \param[in] pixs any depth, assumed to have nearly horizontal text |
1613 | | * \param[in] maxw, maxh initial filtering: remove any components in pixs |
1614 | | * with components larger than maxw or maxh; |
1615 | | * use 0 for default values. |
1616 | | * \param[in] adjw, adjh final adjustment of boxes representing each |
1617 | | * text line. If > 0, these increase the box |
1618 | | * size at each edge by this amount. |
1619 | | * \param[in] pixadb pixa for saving intermediate steps; NULL to omit |
1620 | | * \return pixa of textline images, including bounding boxes, or |
1621 | | * NULL on error |
1622 | | * |
1623 | | * <pre> |
1624 | | * Notes: |
1625 | | * (1) This function assumes that textlines have sufficient |
1626 | | * vertical separation and small enough skew so that a |
1627 | | * horizontal dilation sufficient to join words will not join |
1628 | | * textlines. It aggressively joins textlines across multiple |
1629 | | * columns, so if that is not desired, you must either (a) make |
1630 | | * sure that %pixs is a single column of text or (b) use instead |
1631 | | * pixExtractTextlines(), which is more conservative |
1632 | | * about joining text fragments that have vertical overlap. |
1633 | | * (2) This first removes components from pixs that are either |
1634 | | * very wide (> %maxw) or very tall (> %maxh). |
1635 | | * (3) For reasonable accuracy, the resolution of pixs should be |
1636 | | * at least 100 ppi. For reasonable efficiency, the resolution |
1637 | | * should not exceed 600 ppi. |
1638 | | * (4) This can be used to determine if some region of a scanned |
1639 | | * image is horizontal text. |
1640 | | * (5) As an example, for a pix with resolution 300 ppi, a reasonable |
1641 | | * set of parameters is: |
1642 | | * pixExtractRawTextlines(pix, 150, 150, 0, 0, NULL); |
1643 | | * (6) The output pixa is composed of subimages, one for each textline, |
1644 | | * and the boxa in the pixa tells where in %pixs each textline goes. |
1645 | | * </pre> |
1646 | | */ |
1647 | | PIXA * |
1648 | | pixExtractRawTextlines(PIX *pixs, |
1649 | | l_int32 maxw, |
1650 | | l_int32 maxh, |
1651 | | l_int32 adjw, |
1652 | | l_int32 adjh, |
1653 | | PIXA *pixadb) |
1654 | 0 | { |
1655 | 0 | char buf[64]; |
1656 | 0 | l_int32 res, csize, empty; |
1657 | 0 | BOXA *boxa1, *boxa2, *boxa3; |
1658 | 0 | BOXAA *baa1; |
1659 | 0 | PIX *pix1, *pix2, *pix3; |
1660 | 0 | PIXA *pixa1, *pixa2; |
1661 | |
|
1662 | 0 | if (!pixs) |
1663 | 0 | return (PIXA *)ERROR_PTR("pixs not defined", __func__, NULL); |
1664 | | |
1665 | | /* Set maxw, maxh if default is requested */ |
1666 | 0 | if ((res = pixGetXRes(pixs)) == 0) { |
1667 | 0 | L_INFO("Resolution is not set: setting to 300 ppi\n", __func__); |
1668 | 0 | res = 300; |
1669 | 0 | } |
1670 | 0 | maxw = (maxw != 0) ? maxw : (l_int32)(0.5 * res); |
1671 | 0 | maxh = (maxh != 0) ? maxh : (l_int32)(0.5 * res); |
1672 | | |
1673 | | /* Binarize carefully, if necessary */ |
1674 | 0 | if (pixGetDepth(pixs) > 1) { |
1675 | 0 | pix2 = pixConvertTo8(pixs, FALSE); |
1676 | 0 | pix3 = pixCleanBackgroundToWhite(pix2, NULL, NULL, 1.0, 70, 190); |
1677 | 0 | pix1 = pixThresholdToBinary(pix3, 150); |
1678 | 0 | pixDestroy(&pix2); |
1679 | 0 | pixDestroy(&pix3); |
1680 | 0 | } else { |
1681 | 0 | pix1 = pixClone(pixs); |
1682 | 0 | } |
1683 | 0 | pixZero(pix1, &empty); |
1684 | 0 | if (empty) { |
1685 | 0 | pixDestroy(&pix1); |
1686 | 0 | L_INFO("no fg pixels in input image\n", __func__); |
1687 | 0 | return NULL; |
1688 | 0 | } |
1689 | 0 | if (pixadb) pixaAddPix(pixadb, pix1, L_COPY); |
1690 | | |
1691 | | /* Remove any very tall or very wide connected components */ |
1692 | 0 | pix2 = pixSelectBySize(pix1, maxw, maxh, 8, L_SELECT_IF_BOTH, |
1693 | 0 | L_SELECT_IF_LT, NULL); |
1694 | 0 | if (pixadb) pixaAddPix(pixadb, pix2, L_COPY); |
1695 | 0 | pixDestroy(&pix1); |
1696 | | |
1697 | | /* Filter to solidify the text lines within the x-height region. |
1698 | | * The closing (csize) bridges gaps between words. */ |
1699 | 0 | csize = L_MIN(120., 60.0 * res / 300.0); |
1700 | 0 | snprintf(buf, sizeof(buf), "c%d.1", csize); |
1701 | 0 | pix3 = pixMorphCompSequence(pix2, buf, 0); |
1702 | 0 | if (pixadb) pixaAddPix(pixadb, pix3, L_COPY); |
1703 | | |
1704 | | /* Extract the connected components. These should be dilated lines */ |
1705 | 0 | boxa1 = pixConnComp(pix3, &pixa1, 4); |
1706 | 0 | if (pixadb) { |
1707 | 0 | pix1 = pixaDisplayRandomCmap(pixa1, 0, 0); |
1708 | 0 | pixcmapResetColor(pixGetColormap(pix1), 0, 255, 255, 255); |
1709 | 0 | pixaAddPix(pixadb, pix1, L_INSERT); |
1710 | 0 | } |
1711 | | |
1712 | | /* Do a 2-d sort, and generate a bounding box for each set of text |
1713 | | * line segments that is aligned horizontally (i.e., has vertical |
1714 | | * overlap) into a box representing a single text line. */ |
1715 | 0 | baa1 = boxaSort2d(boxa1, NULL, -1, -1, 5); |
1716 | 0 | boxaaGetExtent(baa1, NULL, NULL, NULL, &boxa2); |
1717 | 0 | if (pixadb) { |
1718 | 0 | pix1 = pixConvertTo32(pix2); |
1719 | 0 | pixRenderBoxaArb(pix1, boxa2, 2, 255, 0, 0); |
1720 | 0 | pixaAddPix(pixadb, pix1, L_INSERT); |
1721 | 0 | } |
1722 | | |
1723 | | /* Optionally adjust the sides of each text line box, and then |
1724 | | * use the boxes to generate a pixa of the text lines. */ |
1725 | 0 | boxa3 = boxaAdjustSides(boxa2, -adjw, adjw, -adjh, adjh); |
1726 | 0 | pixa2 = pixClipRectangles(pix2, boxa3); |
1727 | 0 | if (pixadb) { |
1728 | 0 | pix1 = pixaDisplayRandomCmap(pixa2, 0, 0); |
1729 | 0 | pixcmapResetColor(pixGetColormap(pix1), 0, 255, 255, 255); |
1730 | 0 | pixaAddPix(pixadb, pix1, L_INSERT); |
1731 | 0 | } |
1732 | |
|
1733 | 0 | pixDestroy(&pix2); |
1734 | 0 | pixDestroy(&pix3); |
1735 | 0 | pixaDestroy(&pixa1); |
1736 | 0 | boxaDestroy(&boxa1); |
1737 | 0 | boxaDestroy(&boxa2); |
1738 | 0 | boxaDestroy(&boxa3); |
1739 | 0 | boxaaDestroy(&baa1); |
1740 | 0 | return pixa2; |
1741 | 0 | } |
1742 | | |
1743 | | |
1744 | | /*------------------------------------------------------------------* |
1745 | | * How many text columns * |
1746 | | *------------------------------------------------------------------*/ |
1747 | | /*! |
1748 | | * \brief pixCountTextColumns() |
1749 | | * |
1750 | | * \param[in] pixs 1 bpp |
1751 | | * \param[in] deltafract fraction of (max - min) to be used in the delta |
1752 | | * for extrema finding; typ 0.3 |
1753 | | * \param[in] peakfract fraction of (max - min) to be used to threshold |
1754 | | * the peak value; typ. 0.5 |
1755 | | * \param[in] clipfract fraction of image dimension removed on each side; |
1756 | | * typ. 0.1, which leaves w and h reduced by 0.8 |
1757 | | * \param[out] pncols number of columns; -1 if not determined |
1758 | | * \param[in] pixadb [optional] pre-allocated, for showing |
1759 | | * intermediate computation; use null to skip |
1760 | | * \return 0 if OK, 1 on error |
1761 | | * |
1762 | | * <pre> |
1763 | | * Notes: |
1764 | | * (1) It is assumed that pixs has the correct resolution set. |
1765 | | * If the resolution is 0, we set to 300 and issue a warning. |
1766 | | * (2) If necessary, the image is scaled to between 37 and 75 ppi; |
1767 | | * most of the processing is done at this resolution. |
1768 | | * (3) If no text is found (essentially a blank page), |
1769 | | * this returns ncols = 0. |
1770 | | * (4) For debug output, input a pre-allocated pixa. |
1771 | | * </pre> |
1772 | | */ |
1773 | | l_ok |
1774 | | pixCountTextColumns(PIX *pixs, |
1775 | | l_float32 deltafract, |
1776 | | l_float32 peakfract, |
1777 | | l_float32 clipfract, |
1778 | | l_int32 *pncols, |
1779 | | PIXA *pixadb) |
1780 | 0 | { |
1781 | 0 | l_int32 w, h, res, i, n, npeak; |
1782 | 0 | l_float32 scalefact, redfact, minval, maxval, val4, val5, fract; |
1783 | 0 | BOX *box; |
1784 | 0 | NUMA *na1, *na2, *na3, *na4, *na5; |
1785 | 0 | PIX *pix1, *pix2, *pix3, *pix4, *pix5; |
1786 | |
|
1787 | 0 | if (!pncols) |
1788 | 0 | return ERROR_INT("&ncols not defined", __func__, 1); |
1789 | 0 | *pncols = -1; /* init */ |
1790 | 0 | if (!pixs || pixGetDepth(pixs) != 1) |
1791 | 0 | return ERROR_INT("pixs not defined or not 1 bpp", __func__, 1); |
1792 | 0 | if (deltafract < 0.15 || deltafract > 0.75) |
1793 | 0 | L_WARNING("deltafract not in [0.15 ... 0.75]\n", __func__); |
1794 | 0 | if (peakfract < 0.25 || peakfract > 0.9) |
1795 | 0 | L_WARNING("peakfract not in [0.25 ... 0.9]\n", __func__); |
1796 | 0 | if (clipfract < 0.0 || clipfract >= 0.5) |
1797 | 0 | return ERROR_INT("clipfract not in [0.0 ... 0.5)\n", __func__, 1); |
1798 | 0 | if (pixadb) pixaAddPix(pixadb, pixs, L_COPY); |
1799 | | |
1800 | | /* Scale to between 37.5 and 75 ppi */ |
1801 | 0 | if ((res = pixGetXRes(pixs)) == 0) { |
1802 | 0 | L_WARNING("resolution undefined; set to 300\n", __func__); |
1803 | 0 | pixSetResolution(pixs, 300, 300); |
1804 | 0 | res = 300; |
1805 | 0 | } |
1806 | 0 | if (res < 37) { |
1807 | 0 | L_WARNING("resolution %d very low\n", __func__, res); |
1808 | 0 | scalefact = 37.5 / res; |
1809 | 0 | pix1 = pixScale(pixs, scalefact, scalefact); |
1810 | 0 | } else { |
1811 | 0 | redfact = (l_float32)res / 37.5; |
1812 | 0 | if (redfact < 2.0) |
1813 | 0 | pix1 = pixClone(pixs); |
1814 | 0 | else if (redfact < 4.0) |
1815 | 0 | pix1 = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0); |
1816 | 0 | else if (redfact < 8.0) |
1817 | 0 | pix1 = pixReduceRankBinaryCascade(pixs, 1, 2, 0, 0); |
1818 | 0 | else if (redfact < 16.0) |
1819 | 0 | pix1 = pixReduceRankBinaryCascade(pixs, 1, 2, 2, 0); |
1820 | 0 | else |
1821 | 0 | pix1 = pixReduceRankBinaryCascade(pixs, 1, 2, 2, 2); |
1822 | 0 | } |
1823 | 0 | if (pixadb) pixaAddPix(pixadb, pix1, L_COPY); |
1824 | | |
1825 | | /* Crop inner 80% of image */ |
1826 | 0 | pixGetDimensions(pix1, &w, &h, NULL); |
1827 | 0 | box = boxCreate(clipfract * w, clipfract * h, |
1828 | 0 | (1.0 - 2 * clipfract) * w, (1.0 - 2 * clipfract) * h); |
1829 | 0 | pix2 = pixClipRectangle(pix1, box, NULL); |
1830 | 0 | pixGetDimensions(pix2, &w, &h, NULL); |
1831 | 0 | boxDestroy(&box); |
1832 | 0 | if (pixadb) pixaAddPix(pixadb, pix2, L_COPY); |
1833 | | |
1834 | | /* Deskew */ |
1835 | 0 | pix3 = pixDeskew(pix2, 0); |
1836 | 0 | if (pixadb) pixaAddPix(pixadb, pix3, L_COPY); |
1837 | | |
1838 | | /* Close to increase column counts for text */ |
1839 | 0 | pix4 = pixCloseSafeBrick(NULL, pix3, 5, 21); |
1840 | 0 | if (pixadb) pixaAddPix(pixadb, pix4, L_COPY); |
1841 | 0 | pixInvert(pix4, pix4); |
1842 | 0 | na1 = pixCountByColumn(pix4, NULL); |
1843 | |
|
1844 | 0 | if (pixadb) { |
1845 | 0 | gplotSimple1(na1, GPLOT_PNG, "/tmp/lept/plot", NULL); |
1846 | 0 | pix5 = pixRead("/tmp/lept/plot.png"); |
1847 | 0 | pixaAddPix(pixadb, pix5, L_INSERT); |
1848 | 0 | } |
1849 | | |
1850 | | /* Analyze the column counts. na4 gives the locations of |
1851 | | * the extrema in normalized units (0.0 to 1.0) across the |
1852 | | * cropped image. na5 gives the magnitude of the |
1853 | | * extrema, normalized to the dynamic range. The peaks |
1854 | | * are values that are at least peakfract of (max - min). */ |
1855 | 0 | numaGetMax(na1, &maxval, NULL); |
1856 | 0 | numaGetMin(na1, &minval, NULL); |
1857 | 0 | fract = (l_float32)(maxval - minval) / h; /* is there much at all? */ |
1858 | 0 | if (fract < 0.05) { |
1859 | 0 | L_INFO("very little content on page; 0 text columns\n", __func__); |
1860 | 0 | *pncols = 0; |
1861 | 0 | } else { |
1862 | 0 | na2 = numaFindExtrema(na1, deltafract * (maxval - minval), &na3); |
1863 | 0 | na4 = numaTransform(na2, 0, 1.0 / w); |
1864 | 0 | na5 = numaTransform(na3, -minval, 1.0 / (maxval - minval)); |
1865 | 0 | n = numaGetCount(na4); |
1866 | 0 | for (i = 0, npeak = 0; i < n; i++) { |
1867 | 0 | numaGetFValue(na4, i, &val4); |
1868 | 0 | numaGetFValue(na5, i, &val5); |
1869 | 0 | if (val4 > 0.3 && val4 < 0.7 && val5 >= peakfract) { |
1870 | 0 | npeak++; |
1871 | 0 | L_INFO("Peak(loc,val) = (%5.3f,%5.3f)\n", __func__, val4, val5); |
1872 | 0 | } |
1873 | 0 | } |
1874 | 0 | *pncols = npeak + 1; |
1875 | 0 | numaDestroy(&na2); |
1876 | 0 | numaDestroy(&na3); |
1877 | 0 | numaDestroy(&na4); |
1878 | 0 | numaDestroy(&na5); |
1879 | 0 | } |
1880 | |
|
1881 | 0 | pixDestroy(&pix1); |
1882 | 0 | pixDestroy(&pix2); |
1883 | 0 | pixDestroy(&pix3); |
1884 | 0 | pixDestroy(&pix4); |
1885 | 0 | numaDestroy(&na1); |
1886 | 0 | return 0; |
1887 | 0 | } |
1888 | | |
1889 | | |
1890 | | /*------------------------------------------------------------------* |
1891 | | * Decision text vs photo * |
1892 | | *------------------------------------------------------------------*/ |
1893 | | /*! |
1894 | | * \brief pixDecideIfText() |
1895 | | * |
1896 | | * \param[in] pixs any depth |
1897 | | * \param[in] box [optional] if null, use entire pixs |
1898 | | * \param[out] pistext 1 if text; 0 if photo; -1 if not determined or empty |
1899 | | * \param[in] pixadb [optional] pre-allocated, for showing intermediate |
1900 | | * computation; use NULL to skip |
1901 | | * \return 0 if OK, 1 on error |
1902 | | * |
1903 | | * <pre> |
1904 | | * Notes: |
1905 | | * (1) It is assumed that pixs has the correct resolution set. |
1906 | | * If the resolution is 0, we set to 300 and issue a warning. |
1907 | | * (2) If necessary, the image is scaled to 300 ppi; most of the |
1908 | | * processing is done at this resolution. |
1909 | | * (3) Text is assumed to be in horizontal lines. |
1910 | | * (4) Because thin vertical lines are removed before filtering for |
1911 | | * text lines, this should identify tables as text. |
1912 | | * (5) If %box is null and pixs contains both text lines and line art, |
1913 | | * this function might return %istext == true. |
1914 | | * (6) If the input pixs is empty, or for some other reason the |
1915 | | * result can not be determined, return -1. |
1916 | | * (7) For debug output, input a pre-allocated pixa. |
1917 | | * </pre> |
1918 | | */ |
1919 | | l_ok |
1920 | | pixDecideIfText(PIX *pixs, |
1921 | | BOX *box, |
1922 | | l_int32 *pistext, |
1923 | | PIXA *pixadb) |
1924 | 0 | { |
1925 | 0 | l_int32 i, empty, maxw, w, h, n1, n2, n3, minlines, big_comp; |
1926 | 0 | l_float32 ratio1, ratio2; |
1927 | 0 | L_BMF *bmf; |
1928 | 0 | BOXA *boxa1, *boxa2, *boxa3, *boxa4, *boxa5; |
1929 | 0 | PIX *pix1, *pix2, *pix3, *pix4, *pix5, *pix6, *pix7; |
1930 | 0 | PIXA *pixa1; |
1931 | 0 | SEL *sel1; |
1932 | |
|
1933 | 0 | if (!pistext) |
1934 | 0 | return ERROR_INT("&istext not defined", __func__, 1); |
1935 | 0 | *pistext = -1; |
1936 | 0 | if (!pixs) |
1937 | 0 | return ERROR_INT("pixs not defined", __func__, 1); |
1938 | | |
1939 | | /* Crop, convert to 1 bpp, 300 ppi */ |
1940 | 0 | if ((pix1 = pixPrepare1bpp(pixs, box, 0.1, 300)) == NULL) |
1941 | 0 | return ERROR_INT("pix1 not made", __func__, 1); |
1942 | | |
1943 | 0 | pixZero(pix1, &empty); |
1944 | 0 | if (empty) { |
1945 | 0 | pixDestroy(&pix1); |
1946 | 0 | L_INFO("pix is empty\n", __func__); |
1947 | 0 | return 0; |
1948 | 0 | } |
1949 | 0 | w = pixGetWidth(pix1); |
1950 | | |
1951 | | /* Identify and remove tall, thin vertical lines (as found in tables) |
1952 | | * that are up to 9 pixels wide. Make a hit-miss sel with an |
1953 | | * 81 pixel vertical set of hits and with 3 pairs of misses that |
1954 | | * are 10 pixels apart horizontally. It is necessary to use a |
1955 | | * hit-miss transform; if we only opened with a vertical line of |
1956 | | * hits, we would remove solid regions of pixels that are not |
1957 | | * text or vertical lines. */ |
1958 | 0 | pix2 = pixCreate(11, 81, 1); |
1959 | 0 | for (i = 0; i < 81; i++) |
1960 | 0 | pixSetPixel(pix2, 5, i, 1); |
1961 | 0 | sel1 = selCreateFromPix(pix2, 40, 5, NULL); |
1962 | 0 | selSetElement(sel1, 20, 0, SEL_MISS); |
1963 | 0 | selSetElement(sel1, 20, 10, SEL_MISS); |
1964 | 0 | selSetElement(sel1, 40, 0, SEL_MISS); |
1965 | 0 | selSetElement(sel1, 40, 10, SEL_MISS); |
1966 | 0 | selSetElement(sel1, 60, 0, SEL_MISS); |
1967 | 0 | selSetElement(sel1, 60, 10, SEL_MISS); |
1968 | 0 | pix3 = pixHMT(NULL, pix1, sel1); |
1969 | 0 | pix4 = pixSeedfillBinaryRestricted(NULL, pix3, pix1, 8, 5, 1000); |
1970 | 0 | pix5 = pixXor(NULL, pix1, pix4); |
1971 | 0 | pixDestroy(&pix2); |
1972 | 0 | selDestroy(&sel1); |
1973 | | |
1974 | | /* Convert the text lines to separate long horizontal components */ |
1975 | 0 | pix6 = pixMorphCompSequence(pix5, "c30.1 + o15.1 + c60.1 + o2.2", 0); |
1976 | | |
1977 | | /* Estimate the distance to the bottom of the significant region */ |
1978 | 0 | if (box) { /* use full height */ |
1979 | 0 | pixGetDimensions(pix6, NULL, &h, NULL); |
1980 | 0 | } else { /* use height of region that has text lines */ |
1981 | 0 | pixFindThreshFgExtent(pix6, 400, NULL, &h); |
1982 | 0 | } |
1983 | |
|
1984 | 0 | if (pixadb) { |
1985 | 0 | bmf = bmfCreate(NULL, 6); |
1986 | 0 | pixaAddPixWithText(pixadb, pix1, 1, bmf, "threshold/crop to binary", |
1987 | 0 | 0x0000ff00, L_ADD_BELOW); |
1988 | 0 | pixaAddPixWithText(pixadb, pix3, 2, bmf, "hit-miss for vertical line", |
1989 | 0 | 0x0000ff00, L_ADD_BELOW); |
1990 | 0 | pixaAddPixWithText(pixadb, pix4, 2, bmf, "restricted seed-fill", |
1991 | 0 | 0x0000ff00, L_ADD_BELOW); |
1992 | 0 | pixaAddPixWithText(pixadb, pix5, 2, bmf, "remove using xor", |
1993 | 0 | 0x0000ff00, L_ADD_BELOW); |
1994 | 0 | pixaAddPixWithText(pixadb, pix6, 2, bmf, "make long horiz components", |
1995 | 0 | 0x0000ff00, L_ADD_BELOW); |
1996 | 0 | } |
1997 | | |
1998 | | /* Extract the connected components */ |
1999 | 0 | if (pixadb) { |
2000 | 0 | boxa1 = pixConnComp(pix6, &pixa1, 8); |
2001 | 0 | pix7 = pixaDisplayRandomCmap(pixa1, 0, 0); |
2002 | 0 | pixcmapResetColor(pixGetColormap(pix7), 0, 255, 255, 255); |
2003 | 0 | pixaAddPixWithText(pixadb, pix7, 2, bmf, "show connected components", |
2004 | 0 | 0x0000ff00, L_ADD_BELOW); |
2005 | 0 | pixDestroy(&pix7); |
2006 | 0 | pixaDestroy(&pixa1); |
2007 | 0 | bmfDestroy(&bmf); |
2008 | 0 | } else { |
2009 | 0 | boxa1 = pixConnComp(pix6, NULL, 8); |
2010 | 0 | } |
2011 | | |
2012 | | /* Analyze the connected components. The following conditions |
2013 | | * at 300 ppi must be satisfied if the image is text: |
2014 | | * (1) There are no components that are wider than 400 pixels and |
2015 | | * taller than 175 pixels. |
2016 | | * (2) The second longest component is at least 60% of the |
2017 | | * (possibly cropped) image width. This catches images |
2018 | | * that don't have any significant content. |
2019 | | * (3) Of the components that are at least 40% of the length |
2020 | | * of the longest (n2), at least 80% of them must not exceed |
2021 | | * 60 pixels in height. |
2022 | | * (4) The number of those long, thin components (n3) must |
2023 | | * equal or exceed a minimum that scales linearly with the |
2024 | | * image height. |
2025 | | * Most images that are not text fail more than one of these |
2026 | | * conditions. */ |
2027 | 0 | boxa2 = boxaSort(boxa1, L_SORT_BY_WIDTH, L_SORT_DECREASING, NULL); |
2028 | 0 | boxaGetBoxGeometry(boxa2, 1, NULL, NULL, &maxw, NULL); /* 2nd longest */ |
2029 | 0 | boxa3 = boxaSelectBySize(boxa1, 0.4 * maxw, 0, L_SELECT_WIDTH, |
2030 | 0 | L_SELECT_IF_GTE, NULL); |
2031 | 0 | boxa4 = boxaSelectBySize(boxa3, 0, 60, L_SELECT_HEIGHT, |
2032 | 0 | L_SELECT_IF_LTE, NULL); |
2033 | 0 | boxa5 = boxaSelectBySize(boxa1, 400, 175, L_SELECT_IF_BOTH, |
2034 | 0 | L_SELECT_IF_GT, NULL); |
2035 | 0 | big_comp = (boxaGetCount(boxa5) == 0) ? 0 : 1; |
2036 | 0 | n1 = boxaGetCount(boxa1); |
2037 | 0 | n2 = boxaGetCount(boxa3); |
2038 | 0 | n3 = boxaGetCount(boxa4); |
2039 | 0 | ratio1 = (l_float32)maxw / (l_float32)w; |
2040 | 0 | ratio2 = (l_float32)n3 / (l_float32)n2; |
2041 | 0 | minlines = L_MAX(2, h / 125); |
2042 | 0 | if (big_comp || ratio1 < 0.6 || ratio2 < 0.8 || n3 < minlines) |
2043 | 0 | *pistext = 0; |
2044 | 0 | else |
2045 | 0 | *pistext = 1; |
2046 | 0 | if (pixadb) { |
2047 | 0 | if (*pistext == 1) { |
2048 | 0 | L_INFO("This is text: \n n1 = %d, n2 = %d, n3 = %d, " |
2049 | 0 | "minlines = %d\n maxw = %d, ratio1 = %4.2f, h = %d, " |
2050 | 0 | "big_comp = %d\n", __func__, n1, n2, n3, minlines, |
2051 | 0 | maxw, ratio1, h, big_comp); |
2052 | 0 | } else { |
2053 | 0 | L_INFO("This is not text: \n n1 = %d, n2 = %d, n3 = %d, " |
2054 | 0 | "minlines = %d\n maxw = %d, ratio1 = %4.2f, h = %d, " |
2055 | 0 | "big_comp = %d\n", __func__, n1, n2, n3, minlines, |
2056 | 0 | maxw, ratio1, h, big_comp); |
2057 | 0 | } |
2058 | 0 | } |
2059 | |
|
2060 | 0 | boxaDestroy(&boxa1); |
2061 | 0 | boxaDestroy(&boxa2); |
2062 | 0 | boxaDestroy(&boxa3); |
2063 | 0 | boxaDestroy(&boxa4); |
2064 | 0 | boxaDestroy(&boxa5); |
2065 | 0 | pixDestroy(&pix1); |
2066 | 0 | pixDestroy(&pix3); |
2067 | 0 | pixDestroy(&pix4); |
2068 | 0 | pixDestroy(&pix5); |
2069 | 0 | pixDestroy(&pix6); |
2070 | 0 | return 0; |
2071 | 0 | } |
2072 | | |
2073 | | |
2074 | | /*! |
2075 | | * \brief pixFindThreshFgExtent() |
2076 | | * |
2077 | | * \param[in] pixs 1 bpp |
2078 | | * \param[in] thresh threshold number of pixels in row |
2079 | | * \param[out] ptop [optional] location of top of region |
2080 | | * \param[out] pbot [optional] location of bottom of region |
2081 | | * \return 0 if OK, 1 on error |
2082 | | */ |
2083 | | l_ok |
2084 | | pixFindThreshFgExtent(PIX *pixs, |
2085 | | l_int32 thresh, |
2086 | | l_int32 *ptop, |
2087 | | l_int32 *pbot) |
2088 | 0 | { |
2089 | 0 | l_int32 i, n; |
2090 | 0 | l_int32 *array; |
2091 | 0 | NUMA *na; |
2092 | |
|
2093 | 0 | if (ptop) *ptop = 0; |
2094 | 0 | if (pbot) *pbot = 0; |
2095 | 0 | if (!ptop && !pbot) |
2096 | 0 | return ERROR_INT("nothing to determine", __func__, 1); |
2097 | 0 | if (!pixs || pixGetDepth(pixs) != 1) |
2098 | 0 | return ERROR_INT("pixs not defined or not 1 bpp", __func__, 1); |
2099 | | |
2100 | 0 | na = pixCountPixelsByRow(pixs, NULL); |
2101 | 0 | n = numaGetCount(na); |
2102 | 0 | array = numaGetIArray(na); |
2103 | 0 | if (ptop) { |
2104 | 0 | for (i = 0; i < n; i++) { |
2105 | 0 | if (array[i] >= thresh) { |
2106 | 0 | *ptop = i; |
2107 | 0 | break; |
2108 | 0 | } |
2109 | 0 | } |
2110 | 0 | } |
2111 | 0 | if (pbot) { |
2112 | 0 | for (i = n - 1; i >= 0; i--) { |
2113 | 0 | if (array[i] >= thresh) { |
2114 | 0 | *pbot = i; |
2115 | 0 | break; |
2116 | 0 | } |
2117 | 0 | } |
2118 | 0 | } |
2119 | 0 | LEPT_FREE(array); |
2120 | 0 | numaDestroy(&na); |
2121 | 0 | return 0; |
2122 | 0 | } |
2123 | | |
2124 | | |
2125 | | /*------------------------------------------------------------------* |
2126 | | * Decision: table vs text * |
2127 | | *------------------------------------------------------------------*/ |
2128 | | /*! |
2129 | | * \brief pixDecideIfTable() |
2130 | | * |
2131 | | * \param[in] pixs any depth, any resolution >= 75 ppi |
2132 | | * \param[in] box [optional] if null, use entire pixs |
2133 | | * \param[in] orient L_PORTRAIT_MODE, L_LANDSCAPE_MODE |
2134 | | * \param[out] pscore 0 - 4; -1 if not determined |
2135 | | * \param[in] pixadb [optional] pre-allocated, for showing intermediate |
2136 | | * computation; use NULL to skip |
2137 | | * \return 0 if OK, 1 on error |
2138 | | * |
2139 | | * <pre> |
2140 | | * Notes: |
2141 | | * (1) It is assumed that pixs has the correct resolution set. |
2142 | | * If the resolution is 0, we assume it is 300 ppi and issue a warning. |
2143 | | * (2) If %orient == L_LANDSCAPE_MODE, the image is rotated 90 degrees |
2144 | | * clockwise before being analyzed. |
2145 | | * (3) The interpretation of the returned score: |
2146 | | * -1 undetermined |
2147 | | * 0 no table |
2148 | | * 1 unlikely to have a table |
2149 | | * 2 likely to have a table |
2150 | | * 3 even more likely to have a table |
2151 | | * 4 extremely likely to have a table |
2152 | | * * Setting the condition for finding a table at score >= 2 works |
2153 | | * well, except for false positives on kanji and landscape text. |
2154 | | * * These false positives can be removed by setting the condition |
2155 | | * at score >= 3, but recall is lowered because it will not find |
2156 | | * tables without either horizontal or vertical lines. |
2157 | | * (4) Most of the processing takes place at 75 ppi. |
2158 | | * (5) Internally, three numbers are determined, for horizontal and |
2159 | | * vertical fg lines, and for vertical bg lines. From these, |
2160 | | * four tests are made to decide if there is a table occupying |
2161 | | * a significant part of the image. |
2162 | | * (6) Images have arbitrary content and would be likely to trigger |
2163 | | * this detector, so they are checked for first, and if found, |
2164 | | * return with a 0 (no table) score. |
2165 | | * (7) Musical scores (tablature) are likely to trigger the detector. |
2166 | | * (8) Tables of content with more than 2 columns are likely to |
2167 | | * trigger the detector. |
2168 | | * (9) For debug output, input a pre-allocated pixa. |
2169 | | * </pre> |
2170 | | */ |
2171 | | l_ok |
2172 | | pixDecideIfTable(PIX *pixs, |
2173 | | BOX *box, |
2174 | | l_int32 orient, |
2175 | | l_int32 *pscore, |
2176 | | PIXA *pixadb) |
2177 | 0 | { |
2178 | 0 | l_int32 empty, nhb, nvb, nvw, score, htfound; |
2179 | 0 | PIX *pix1, *pix2, *pix3, *pix4, *pix5, *pix6, *pix7, *pix8, *pix9; |
2180 | |
|
2181 | 0 | if (!pscore) |
2182 | 0 | return ERROR_INT("&score not defined", __func__, 1); |
2183 | 0 | *pscore = -1; |
2184 | 0 | if (!pixs) |
2185 | 0 | return ERROR_INT("pixs not defined", __func__, 1); |
2186 | | |
2187 | | /* Check if there is an image region. First convert to 1 bpp |
2188 | | * at 175 ppi. If an image is found, assume there is no table. */ |
2189 | 0 | pix1 = pixPrepare1bpp(pixs, box, 0.1, 175); |
2190 | 0 | pix2 = pixGenerateHalftoneMask(pix1, NULL, &htfound, NULL); |
2191 | 0 | if (htfound && pixadb) pixaAddPix(pixadb, pix2, L_COPY); |
2192 | 0 | pixDestroy(&pix1); |
2193 | 0 | pixDestroy(&pix2); |
2194 | 0 | if (htfound) { |
2195 | 0 | *pscore = 0; |
2196 | 0 | L_INFO("pix has an image region\n", __func__); |
2197 | 0 | return 0; |
2198 | 0 | } |
2199 | | |
2200 | | /* Crop, convert to 1 bpp, 75 ppi */ |
2201 | 0 | if ((pix1 = pixPrepare1bpp(pixs, box, 0.05, 75)) == NULL) |
2202 | 0 | return ERROR_INT("pix1 not made", __func__, 1); |
2203 | | |
2204 | 0 | pixZero(pix1, &empty); |
2205 | 0 | if (empty) { |
2206 | 0 | *pscore = 0; |
2207 | 0 | pixDestroy(&pix1); |
2208 | 0 | L_INFO("pix is empty\n", __func__); |
2209 | 0 | return 0; |
2210 | 0 | } |
2211 | | |
2212 | | /* The 2x2 dilation on 75 ppi makes these two approaches very similar: |
2213 | | * (1) pix1 = pixPrepare1bpp(..., 300); // 300 ppi resolution |
2214 | | * pix2 = pixReduceRankBinaryCascade(pix1, 1, 1, 0, 0); |
2215 | | * (2) pix1 = pixPrepare1bpp(..., 75); // 75 ppi resolution |
2216 | | * pix2 = pixDilateBrick(NULL, pix1, 2, 2); |
2217 | | * But (2) is more efficient if the input image to pixPrepare1bpp() |
2218 | | * is not at 300 ppi. */ |
2219 | 0 | pix2 = pixDilateBrick(NULL, pix1, 2, 2); |
2220 | | |
2221 | | /* Deskew both horizontally and vertically; rotate by 90 |
2222 | | * degrees if in landscape mode. */ |
2223 | 0 | pix3 = pixDeskewBoth(pix2, 1); |
2224 | 0 | if (pixadb) { |
2225 | 0 | pixaAddPix(pixadb, pix2, L_COPY); |
2226 | 0 | pixaAddPix(pixadb, pix3, L_COPY); |
2227 | 0 | } |
2228 | 0 | if (orient == L_LANDSCAPE_MODE) |
2229 | 0 | pix4 = pixRotate90(pix3, 1); |
2230 | 0 | else |
2231 | 0 | pix4 = pixClone(pix3); |
2232 | 0 | pixDestroy(&pix1); |
2233 | 0 | pixDestroy(&pix2); |
2234 | 0 | pixDestroy(&pix3); |
2235 | 0 | pix1 = pixClone(pix4); |
2236 | 0 | pixDestroy(&pix4); |
2237 | | |
2238 | | /* Look for horizontal and vertical lines */ |
2239 | 0 | pix2 = pixMorphSequence(pix1, "o100.1 + c1.4", 0); |
2240 | 0 | pix3 = pixSeedfillBinary(NULL, pix2, pix1, 8); |
2241 | 0 | pix4 = pixMorphSequence(pix1, "o1.100 + c4.1", 0); |
2242 | 0 | pix5 = pixSeedfillBinary(NULL, pix4, pix1, 8); |
2243 | 0 | pix6 = pixOr(NULL, pix3, pix5); |
2244 | 0 | if (pixadb) { |
2245 | 0 | pixaAddPix(pixadb, pix2, L_COPY); |
2246 | 0 | pixaAddPix(pixadb, pix4, L_COPY); |
2247 | 0 | pixaAddPix(pixadb, pix3, L_COPY); |
2248 | 0 | pixaAddPix(pixadb, pix5, L_COPY); |
2249 | 0 | pixaAddPix(pixadb, pix6, L_COPY); |
2250 | 0 | } |
2251 | 0 | pixCountConnComp(pix2, 8, &nhb); /* number of horizontal black lines */ |
2252 | 0 | pixCountConnComp(pix4, 8, &nvb); /* number of vertical black lines */ |
2253 | | |
2254 | | /* Remove the lines */ |
2255 | 0 | pixSubtract(pix1, pix1, pix6); |
2256 | 0 | if (pixadb) pixaAddPix(pixadb, pix1, L_COPY); |
2257 | | |
2258 | | /* Remove noise pixels */ |
2259 | 0 | pix7 = pixMorphSequence(pix1, "c4.1 + o8.1", 0); |
2260 | 0 | if (pixadb) pixaAddPix(pixadb, pix7, L_COPY); |
2261 | | |
2262 | | /* Look for vertical white space. Invert to convert white bg |
2263 | | * to fg. Use a single rank-1 2x reduction, which closes small |
2264 | | * fg holes, for the final processing at 37.5 ppi. |
2265 | | * The vertical opening is then about 3 inches on a 300 ppi image. |
2266 | | * We also remove vertical whitespace that is less than 5 pixels |
2267 | | * wide at this resolution (about 0.1 inches) */ |
2268 | 0 | pixInvert(pix7, pix7); |
2269 | 0 | pix8 = pixMorphSequence(pix7, "r1 + o1.100", 0); |
2270 | 0 | pix9 = pixSelectBySize(pix8, 5, 0, 8, L_SELECT_WIDTH, |
2271 | 0 | L_SELECT_IF_GTE, NULL); |
2272 | 0 | pixCountConnComp(pix9, 8, &nvw); /* number of vertical white lines */ |
2273 | 0 | if (pixadb) { |
2274 | 0 | pixaAddPix(pixadb, pixScale(pix8, 2.0, 2.0), L_INSERT); |
2275 | 0 | pixaAddPix(pixadb, pixScale(pix9, 2.0, 2.0), L_INSERT); |
2276 | 0 | } |
2277 | | |
2278 | | /* Require at least 2 of the following 4 conditions for a table. |
2279 | | * Some tables do not have black (fg) lines, and for those we |
2280 | | * require more than 6 long vertical whitespace (bg) lines. */ |
2281 | 0 | score = 0; |
2282 | 0 | if (nhb > 1) score++; |
2283 | 0 | if (nvb > 2) score++; |
2284 | 0 | if (nvw > 3) score++; |
2285 | 0 | if (nvw > 6) score++; |
2286 | 0 | *pscore = score; |
2287 | |
|
2288 | 0 | pixDestroy(&pix1); |
2289 | 0 | pixDestroy(&pix2); |
2290 | 0 | pixDestroy(&pix3); |
2291 | 0 | pixDestroy(&pix4); |
2292 | 0 | pixDestroy(&pix5); |
2293 | 0 | pixDestroy(&pix6); |
2294 | 0 | pixDestroy(&pix7); |
2295 | 0 | pixDestroy(&pix8); |
2296 | 0 | pixDestroy(&pix9); |
2297 | 0 | return 0; |
2298 | 0 | } |
2299 | | |
2300 | | |
2301 | | /*! |
2302 | | * \brief pixPrepare1bpp() |
2303 | | * |
2304 | | * \param[in] pixs any depth |
2305 | | * \param[in] box [optional] if null, use entire pixs |
2306 | | * \param[in] cropfract fraction to be removed from the boundary; |
2307 | | * use 0.0 to retain the entire image |
2308 | | * \param[in] outres desired resolution of output image; if the |
2309 | | * input image resolution is not set, assume |
2310 | | * 300 ppi; use 0 to skip scaling. |
2311 | | * \return pixd if OK, NULL on error |
2312 | | * |
2313 | | * <pre> |
2314 | | * Notes: |
2315 | | * (1) This handles some common pre-processing operations, |
2316 | | * where the page segmentation algorithm takes a 1 bpp image. |
2317 | | * </pre> |
2318 | | */ |
2319 | | PIX * |
2320 | | pixPrepare1bpp(PIX *pixs, |
2321 | | BOX *box, |
2322 | | l_float32 cropfract, |
2323 | | l_int32 outres) |
2324 | 0 | { |
2325 | 0 | l_int32 w, h, res; |
2326 | 0 | l_float32 factor; |
2327 | 0 | BOX *box1; |
2328 | 0 | PIX *pix1, *pix2, *pix3, *pix4, *pix5; |
2329 | |
|
2330 | 0 | if (!pixs) |
2331 | 0 | return (PIX *)ERROR_PTR("pixs not defined", __func__, NULL); |
2332 | | |
2333 | | /* Crop the image. If no box is given, use %cropfract to remove |
2334 | | * pixels near the image boundary; this helps avoid false |
2335 | | * negatives from noise that is often found there. */ |
2336 | 0 | if (box) { |
2337 | 0 | pix1 = pixClipRectangle(pixs, box, NULL); |
2338 | 0 | } else { |
2339 | 0 | pixGetDimensions(pixs, &w, &h, NULL); |
2340 | 0 | box1 = boxCreate((l_int32)(cropfract * w), (l_int32)(cropfract * h), |
2341 | 0 | (l_int32)((1.0 - 2 * cropfract) * w), |
2342 | 0 | (l_int32)((1.0 - 2 * cropfract) * h)); |
2343 | 0 | pix1 = pixClipRectangle(pixs, box1, NULL); |
2344 | 0 | boxDestroy(&box1); |
2345 | 0 | } |
2346 | | |
2347 | | /* Convert to 1 bpp with adaptive background cleaning */ |
2348 | 0 | if (pixGetDepth(pixs) > 1) { |
2349 | 0 | pix2 = pixConvertTo8(pix1, 0); |
2350 | 0 | pix3 = pixCleanBackgroundToWhite(pix2, NULL, NULL, 1.0, 70, 160); |
2351 | 0 | pixDestroy(&pix1); |
2352 | 0 | pixDestroy(&pix2); |
2353 | 0 | if (!pix3) { |
2354 | 0 | L_INFO("pix cleaning failed\n", __func__); |
2355 | 0 | return NULL; |
2356 | 0 | } |
2357 | 0 | pix4 = pixThresholdToBinary(pix3, 200); |
2358 | 0 | pixDestroy(&pix3); |
2359 | 0 | } else { |
2360 | 0 | pix4 = pixClone(pix1); |
2361 | 0 | pixDestroy(&pix1); |
2362 | 0 | } |
2363 | | |
2364 | | /* Scale the image to the requested output resolution; |
2365 | | do not scale if %outres <= 0 */ |
2366 | 0 | if (outres <= 0) |
2367 | 0 | return pix4; |
2368 | 0 | if ((res = pixGetXRes(pixs)) == 0) { |
2369 | 0 | L_WARNING("Resolution is not set: using 300 ppi\n", __func__); |
2370 | 0 | res = 300; |
2371 | 0 | } |
2372 | 0 | if (res != outres) { |
2373 | 0 | factor = (l_float32)outres / (l_float32)res; |
2374 | 0 | pix5 = pixScale(pix4, factor, factor); |
2375 | 0 | } else { |
2376 | 0 | pix5 = pixClone(pix4); |
2377 | 0 | } |
2378 | 0 | pixDestroy(&pix4); |
2379 | 0 | return pix5; |
2380 | 0 | } |
2381 | | |
2382 | | |
2383 | | /*------------------------------------------------------------------* |
2384 | | * Estimate the grayscale background value * |
2385 | | *------------------------------------------------------------------*/ |
2386 | | /*! |
2387 | | * \brief pixEstimateBackground() |
2388 | | * |
2389 | | * \param[in] pixs 8 bpp, with or without colormap |
2390 | | * \param[in] darkthresh pixels below this value are never considered |
2391 | | * part of the background; typ. 70; use 0 to skip |
2392 | | * \param[in] edgecrop fraction of half-width on each side, and of |
2393 | | * half-height at top and bottom, that are cropped |
2394 | | * \param[out] pbg estimated background, or 0 on error |
2395 | | * \return 0 if OK, 1 on error |
2396 | | * |
2397 | | * <pre> |
2398 | | * Notes: |
2399 | | * (1) Caller should check that return bg value is > 0. |
2400 | | * </pre> |
2401 | | */ |
2402 | | l_ok |
2403 | | pixEstimateBackground(PIX *pixs, |
2404 | | l_int32 darkthresh, |
2405 | | l_float32 edgecrop, |
2406 | | l_int32 *pbg) |
2407 | 0 | { |
2408 | 0 | l_int32 w, h, sampling; |
2409 | 0 | l_float32 fbg; |
2410 | 0 | BOX *box; |
2411 | 0 | PIX *pix1, *pix2, *pixm; |
2412 | |
|
2413 | 0 | if (!pbg) |
2414 | 0 | return ERROR_INT("&bg not defined", __func__, 1); |
2415 | 0 | *pbg = 0; |
2416 | 0 | if (!pixs || pixGetDepth(pixs) != 8) |
2417 | 0 | return ERROR_INT("pixs not defined or not 8 bpp", __func__, 1); |
2418 | 0 | if (darkthresh > 128) |
2419 | 0 | L_WARNING("darkthresh unusually large\n", __func__); |
2420 | 0 | if (edgecrop < 0.0 || edgecrop >= 1.0) |
2421 | 0 | return ERROR_INT("edgecrop not in [0.0 ... 1.0)", __func__, 1); |
2422 | | |
2423 | 0 | pix1 = pixRemoveColormap(pixs, REMOVE_CMAP_TO_GRAYSCALE); |
2424 | 0 | pixGetDimensions(pix1, &w, &h, NULL); |
2425 | | |
2426 | | /* Optionally crop inner part of image */ |
2427 | 0 | if (edgecrop > 0.0) { |
2428 | 0 | box = boxCreate(0.5 * edgecrop * w, 0.5 * edgecrop * h, |
2429 | 0 | (1.0 - edgecrop) * w, (1.0 - edgecrop) * h); |
2430 | 0 | pix2 = pixClipRectangle(pix1, box, NULL); |
2431 | 0 | boxDestroy(&box); |
2432 | 0 | } else { |
2433 | 0 | pix2 = pixClone(pix1); |
2434 | 0 | } |
2435 | | |
2436 | | /* We will use no more than 50K samples */ |
2437 | 0 | sampling = L_MAX(1, (l_int32)sqrt((l_float64)(w * h) / 50000. + 0.5)); |
2438 | | |
2439 | | /* Optionally make a mask over all pixels lighter than %darkthresh */ |
2440 | 0 | pixm = NULL; |
2441 | 0 | if (darkthresh > 0) { |
2442 | 0 | pixm = pixThresholdToBinary(pix2, darkthresh); |
2443 | 0 | pixInvert(pixm, pixm); |
2444 | 0 | } |
2445 | |
|
2446 | 0 | pixGetRankValueMasked(pix2, pixm, 0, 0, sampling, 0.5, &fbg, NULL); |
2447 | 0 | *pbg = (l_int32)(fbg + 0.5); |
2448 | 0 | pixDestroy(&pix1); |
2449 | 0 | pixDestroy(&pix2); |
2450 | 0 | pixDestroy(&pixm); |
2451 | 0 | return 0; |
2452 | 0 | } |
2453 | | |
2454 | | |
2455 | | /*---------------------------------------------------------------------* |
2456 | | * Largest white or black rectangles in an image * |
2457 | | *---------------------------------------------------------------------*/ |
2458 | | /*! |
2459 | | * \brief pixFindLargeRectangles() |
2460 | | * |
2461 | | * \param[in] pixs 1 bpp |
2462 | | * \param[in] polarity 0 within background, 1 within foreground |
2463 | | * \param[in] nrect number of rectangles to be found |
2464 | | * \param[out] pboxa largest rectangles, sorted by decreasing area |
2465 | | * \param[in,out] ppixdb optional return output with rectangles drawn on it |
2466 | | * \return 0 if OK, 1 on error |
2467 | | * |
2468 | | * <pre> |
2469 | | * Notes: |
2470 | | * (1) This does a greedy search to find the largest rectangles, |
2471 | | * either black or white and without overlaps, in %pix. |
2472 | | * (2) See pixFindLargestRectangle(), which is called multiple |
2473 | | * times, for details. On each call, the largest rectangle |
2474 | | * found is painted, so that none of its pixels can be |
2475 | | * used later, before calling it again. |
2476 | | * (3) This function is surprisingly fast. Although |
2477 | | * pixFindLargestRectangle() runs at about 50 MPix/sec, when it |
2478 | | * is run multiple times by pixFindLargeRectangles(), it processes |
2479 | | * at 150 - 250 MPix/sec, and the time is approximately linear |
2480 | | * in %nrect. For example, for a 1 MPix image, searching for |
2481 | | * the largest 50 boxes takes about 0.2 seconds. |
2482 | | * </pre> |
2483 | | */ |
2484 | | l_ok |
2485 | | pixFindLargeRectangles(PIX *pixs, |
2486 | | l_int32 polarity, |
2487 | | l_int32 nrect, |
2488 | | BOXA **pboxa, |
2489 | | PIX **ppixdb) |
2490 | 0 | { |
2491 | 0 | l_int32 i, op, bx, by, bw, bh; |
2492 | 0 | BOX *box; |
2493 | 0 | BOXA *boxa; |
2494 | 0 | PIX *pix; |
2495 | |
|
2496 | 0 | if (ppixdb) *ppixdb = NULL; |
2497 | 0 | if (!pboxa) |
2498 | 0 | return ERROR_INT("&boxa not defined", __func__, 1); |
2499 | 0 | *pboxa = NULL; |
2500 | 0 | if (!pixs || pixGetDepth(pixs) != 1) |
2501 | 0 | return ERROR_INT("pixs not defined or not 1 bpp", __func__, 1); |
2502 | 0 | if (polarity != 0 && polarity != 1) |
2503 | 0 | return ERROR_INT("invalid polarity", __func__, 1); |
2504 | 0 | if (nrect > 1000) { |
2505 | 0 | L_WARNING("large num rectangles = %d requested; using 1000\n", |
2506 | 0 | __func__, nrect); |
2507 | 0 | nrect = 1000; |
2508 | 0 | } |
2509 | |
|
2510 | 0 | pix = pixCopy(NULL, pixs); |
2511 | 0 | boxa = boxaCreate(nrect); |
2512 | 0 | *pboxa = boxa; |
2513 | | |
2514 | | /* Sequentially find largest rectangle and fill with opposite color */ |
2515 | 0 | for (i = 0; i < nrect; i++) { |
2516 | 0 | if (pixFindLargestRectangle(pix, polarity, &box, NULL) == 1) { |
2517 | 0 | boxDestroy(&box); |
2518 | 0 | L_ERROR("failure in pixFindLargestRectangle\n", __func__); |
2519 | 0 | break; |
2520 | 0 | } |
2521 | 0 | boxaAddBox(boxa, box, L_INSERT); |
2522 | 0 | op = (polarity == 0) ? PIX_SET : PIX_CLR; |
2523 | 0 | boxGetGeometry(box, &bx, &by, &bw, &bh); |
2524 | 0 | pixRasterop(pix, bx, by, bw, bh, op, NULL, 0, 0); |
2525 | 0 | } |
2526 | |
|
2527 | 0 | if (ppixdb) |
2528 | 0 | *ppixdb = pixDrawBoxaRandom(pixs, boxa, 3); |
2529 | |
|
2530 | 0 | pixDestroy(&pix); |
2531 | 0 | return 0; |
2532 | 0 | } |
2533 | | |
2534 | | |
2535 | | /*! |
2536 | | * \brief pixFindLargestRectangle() |
2537 | | * |
2538 | | * \param[in] pixs 1 bpp |
2539 | | * \param[in] polarity 0 within background, 1 within foreground |
2540 | | * \param[out] pbox largest area rectangle |
2541 | | * \param[in,out] ppixdb optional return output with rectangle drawn on it |
2542 | | * \return 0 if OK, 1 on error |
2543 | | * |
2544 | | * <pre> |
2545 | | * Notes: |
2546 | | * (1) This is a simple and elegant solution to a problem in |
2547 | | * computational geometry that at first appears to be quite |
2548 | | * difficult: what is the largest rectangle that can be |
2549 | | * placed in the image, covering only pixels of one polarity |
2550 | | * (bg or fg)? The solution is O(n), where n is the number |
2551 | | * of pixels in the image, and it requires nothing more than |
2552 | | * using a simple recursion relation in a single sweep of the image. |
2553 | | * (2) In a sweep from UL to LR with left-to-right being the fast |
2554 | | * direction, calculate the largest white rectangle at (x, y), |
2555 | | * using previously calculated values at pixels #1 and #2: |
2556 | | * #1: (x, y - 1) |
2557 | | * #2: (x - 1, y) |
2558 | | * We also need the most recent "black" pixels that were seen |
2559 | | * in the current row and column. |
2560 | | * Consider the largest area. There are only two possibilities: |
2561 | | * (a) Min(w(1), horizdist) * (h(1) + 1) |
2562 | | * (b) Min(h(2), vertdist) * (w(2) + 1) |
2563 | | * where |
2564 | | * horizdist: the distance from the rightmost "black" pixel seen |
2565 | | * in the current row across to the current pixel |
2566 | | * vertdist: the distance from the lowest "black" pixel seen |
2567 | | * in the current column down to the current pixel |
2568 | | * and we choose the Max of (a) and (b). |
2569 | | * (3) To convince yourself that these recursion relations are correct, |
2570 | | * it helps to draw the maximum rectangles at #1 and #2. |
2571 | | * Then for #1, you try to extend the rectangle down one line, |
2572 | | * so that the height is h(1) + 1. Do you get the full |
2573 | | * width of #1, w(1)? It depends on where the black pixels are |
2574 | | * in the current row. You know the final width is bounded by w(1) |
2575 | | * and w(2) + 1, but the actual value depends on the distribution |
2576 | | * of black pixels in the current row that are at a distance |
2577 | | * from the current pixel that is between these limits. |
2578 | | * We call that value "horizdist", and the area is then given |
2579 | | * by the expression (a) above. Using similar reasoning for #2, |
2580 | | * where you attempt to extend the rectangle to the right |
2581 | | * by 1 pixel, you arrive at (b). The largest rectangle is |
2582 | | * then found by taking the Max. |
2583 | | * </pre> |
2584 | | */ |
2585 | | l_ok |
2586 | | pixFindLargestRectangle(PIX *pixs, |
2587 | | l_int32 polarity, |
2588 | | BOX **pbox, |
2589 | | PIX **ppixdb) |
2590 | 0 | { |
2591 | 0 | l_int32 i, j, w, h, d, wpls, val; |
2592 | 0 | l_int32 wp, hp, w1, w2, h1, h2, wmin, hmin, area1, area2; |
2593 | 0 | l_int32 xmax, ymax; /* LR corner of the largest rectangle */ |
2594 | 0 | l_int32 maxarea, wmax, hmax, vertdist, horizdist, prevfg; |
2595 | 0 | l_int32 *lowestfg; |
2596 | 0 | l_uint32 *datas, *lines; |
2597 | 0 | l_uint32 **linew, **lineh; |
2598 | 0 | BOX *box; |
2599 | 0 | PIX *pixw, *pixh; /* keeps the width and height for the largest */ |
2600 | | /* rectangles whose LR corner is located there. */ |
2601 | |
|
2602 | 0 | if (ppixdb) *ppixdb = NULL; |
2603 | 0 | if (!pbox) |
2604 | 0 | return ERROR_INT("&box not defined", __func__, 1); |
2605 | 0 | *pbox = NULL; |
2606 | 0 | if (!pixs) |
2607 | 0 | return ERROR_INT("pixs not defined", __func__, 1); |
2608 | 0 | pixGetDimensions(pixs, &w, &h, &d); |
2609 | 0 | if (d != 1) |
2610 | 0 | return ERROR_INT("pixs not 1 bpp", __func__, 1); |
2611 | 0 | if (polarity != 0 && polarity != 1) |
2612 | 0 | return ERROR_INT("invalid polarity", __func__, 1); |
2613 | | |
2614 | | /* Initialize lowest "fg" seen so far for each column */ |
2615 | 0 | lowestfg = (l_int32 *)LEPT_CALLOC(w, sizeof(l_int32)); |
2616 | 0 | for (i = 0; i < w; i++) |
2617 | 0 | lowestfg[i] = -1; |
2618 | | |
2619 | | /* The combination (val ^ polarity) is the color for which we |
2620 | | * are searching for the maximum rectangle. For polarity == 0, |
2621 | | * we search in the bg (white). */ |
2622 | 0 | pixw = pixCreate(w, h, 32); /* stores width */ |
2623 | 0 | pixh = pixCreate(w, h, 32); /* stores height */ |
2624 | 0 | linew = (l_uint32 **)pixGetLinePtrs(pixw, NULL); |
2625 | 0 | lineh = (l_uint32 **)pixGetLinePtrs(pixh, NULL); |
2626 | 0 | datas = pixGetData(pixs); |
2627 | 0 | wpls = pixGetWpl(pixs); |
2628 | 0 | maxarea = xmax = ymax = wmax = hmax = 0; |
2629 | 0 | for (i = 0; i < h; i++) { |
2630 | 0 | lines = datas + i * wpls; |
2631 | 0 | prevfg = -1; |
2632 | 0 | for (j = 0; j < w; j++) { |
2633 | 0 | val = GET_DATA_BIT(lines, j); |
2634 | 0 | if ((val ^ polarity) == 0) { /* bg (0) if polarity == 0, etc. */ |
2635 | 0 | if (i == 0 && j == 0) { |
2636 | 0 | wp = hp = 1; |
2637 | 0 | } else if (i == 0) { |
2638 | 0 | wp = linew[i][j - 1] + 1; |
2639 | 0 | hp = 1; |
2640 | 0 | } else if (j == 0) { |
2641 | 0 | wp = 1; |
2642 | 0 | hp = lineh[i - 1][j] + 1; |
2643 | 0 | } else { |
2644 | | /* Expand #1 prev rectangle down */ |
2645 | 0 | w1 = linew[i - 1][j]; |
2646 | 0 | h1 = lineh[i - 1][j]; |
2647 | 0 | horizdist = j - prevfg; |
2648 | 0 | wmin = L_MIN(w1, horizdist); /* width of new rectangle */ |
2649 | 0 | area1 = wmin * (h1 + 1); |
2650 | | |
2651 | | /* Expand #2 prev rectangle to right */ |
2652 | 0 | w2 = linew[i][j - 1]; |
2653 | 0 | h2 = lineh[i][j - 1]; |
2654 | 0 | vertdist = i - lowestfg[j]; |
2655 | 0 | hmin = L_MIN(h2, vertdist); /* height of new rectangle */ |
2656 | 0 | area2 = hmin * (w2 + 1); |
2657 | |
|
2658 | 0 | if (area1 > area2) { |
2659 | 0 | wp = wmin; |
2660 | 0 | hp = h1 + 1; |
2661 | 0 | } else { |
2662 | 0 | wp = w2 + 1; |
2663 | 0 | hp = hmin; |
2664 | 0 | } |
2665 | 0 | } |
2666 | 0 | } else { /* fg (1) if polarity == 0; bg (0) if polarity == 1 */ |
2667 | 0 | prevfg = j; |
2668 | 0 | lowestfg[j] = i; |
2669 | 0 | wp = hp = 0; |
2670 | 0 | } |
2671 | 0 | linew[i][j] = wp; |
2672 | 0 | lineh[i][j] = hp; |
2673 | 0 | if (wp * hp > maxarea) { |
2674 | 0 | maxarea = wp * hp; |
2675 | 0 | xmax = j; |
2676 | 0 | ymax = i; |
2677 | 0 | wmax = wp; |
2678 | 0 | hmax = hp; |
2679 | 0 | } |
2680 | 0 | } |
2681 | 0 | } |
2682 | | |
2683 | | /* Translate from LR corner to Box coords (UL corner, w, h) */ |
2684 | 0 | box = boxCreate(xmax - wmax + 1, ymax - hmax + 1, wmax, hmax); |
2685 | 0 | *pbox = box; |
2686 | |
|
2687 | 0 | if (ppixdb) { |
2688 | 0 | *ppixdb = pixConvertTo8(pixs, TRUE); |
2689 | 0 | pixRenderHashBoxArb(*ppixdb, box, 6, 2, L_NEG_SLOPE_LINE, 1, 255, 0, 0); |
2690 | 0 | } |
2691 | |
|
2692 | 0 | LEPT_FREE(linew); |
2693 | 0 | LEPT_FREE(lineh); |
2694 | 0 | LEPT_FREE(lowestfg); |
2695 | 0 | pixDestroy(&pixw); |
2696 | 0 | pixDestroy(&pixh); |
2697 | 0 | return 0; |
2698 | 0 | } |
2699 | | |
2700 | | |
2701 | | /*---------------------------------------------------------------------* |
2702 | | * Generate rectangle inside connected component * |
2703 | | *---------------------------------------------------------------------*/ |
2704 | | /*! |
2705 | | * \brief pixFindRectangleInCC() |
2706 | | * |
2707 | | * \param[in] pixs 1 bpp, with sufficient closings to make the fg be |
2708 | | * a single c.c. that is a convex hull |
2709 | | * \param[in] boxs [optional] if NULL, %pixs should be a minimum |
2710 | | * container of a single c.c. |
2711 | | * \param[in] fract first and all consecutive lines found must be at |
2712 | | * least this fraction of the fast scan dimension |
2713 | | * \param[in] dir L_SCAN_HORIZONTAL, L_SCAN_VERTICAL; direction of |
2714 | | * fast scan |
2715 | | * \param[in] select L_GEOMETRIC_UNION, L_GEOMETRIC_INTERSECTION, |
2716 | | * L_LARGEST_AREA, L_SMALEST_AREA |
2717 | | * \param[in] debug if 1, generates output pdf showing intermediate |
2718 | | * computation and final result |
2719 | | * \return box of included rectangle, or NULL on error |
2720 | | * |
2721 | | * <pre> |
2722 | | * Notes: |
2723 | | * (1) Computation is similar to pixFindLargestRectangle(), but allows |
2724 | | * a different set of results to choose from. |
2725 | | * (2) Select the fast scan direction. Then, scanning in the slow |
2726 | | * direction, find the longest run of ON pixels in the fast |
2727 | | * scan direction and look for the first run that is longer |
2728 | | * than %fract of the dimension. Continue until a shorter run |
2729 | | * is found. This generates a box of ON pixels fitting into the c.c. |
2730 | | * (3) Do this from both slow scan directions and use %select to get |
2731 | | * a resulting box from these two. |
2732 | | * (4) The extracted rectangle is not necessarily the largest that |
2733 | | * can fit in the c.c. To get that, use pixFindLargestRectangle(). |
2734 | | */ |
2735 | | BOX * |
2736 | | pixFindRectangleInCC(PIX *pixs, |
2737 | | BOX *boxs, |
2738 | | l_float32 fract, |
2739 | | l_int32 dir, |
2740 | | l_int32 select, |
2741 | | l_int32 debug) |
2742 | 0 | { |
2743 | 0 | l_int32 x, y, i, w, h, w1, h1, w2, h2, found, res; |
2744 | 0 | l_int32 xfirst, xlast, xstart, yfirst, ylast, length; |
2745 | 0 | BOX *box1, *box2, *box3, *box4, *box5; |
2746 | 0 | PIX *pix1, *pix2, *pixdb1, *pixdb2; |
2747 | 0 | PIXA *pixadb; |
2748 | |
|
2749 | 0 | if (!pixs || pixGetDepth(pixs) != 1) |
2750 | 0 | return (BOX *)ERROR_PTR("pixs undefined or not 1 bpp", __func__, NULL); |
2751 | 0 | if (fract <= 0.0 || fract > 1.0) |
2752 | 0 | return (BOX *)ERROR_PTR("invalid fraction", __func__, NULL); |
2753 | 0 | if (dir != L_SCAN_VERTICAL && dir != L_SCAN_HORIZONTAL) |
2754 | 0 | return (BOX *)ERROR_PTR("invalid scan direction", __func__, NULL); |
2755 | 0 | if (select != L_GEOMETRIC_UNION && select != L_GEOMETRIC_INTERSECTION && |
2756 | 0 | select != L_LARGEST_AREA && select != L_SMALLEST_AREA) |
2757 | 0 | return (BOX *)ERROR_PTR("invalid select", __func__, NULL); |
2758 | | |
2759 | | /* Extract the c.c. if necessary */ |
2760 | 0 | x = y = 0; |
2761 | 0 | if (boxs) { |
2762 | 0 | pix1 = pixClipRectangle(pixs, boxs, NULL); |
2763 | 0 | boxGetGeometry(boxs, &x, &y, NULL, NULL); |
2764 | 0 | } else { |
2765 | 0 | pix1 = pixClone(pixs); |
2766 | 0 | } |
2767 | | |
2768 | | /* All fast scans are horizontal; rotate 90 deg cw if necessary */ |
2769 | 0 | if (dir == L_SCAN_VERTICAL) |
2770 | 0 | pix2 = pixRotate90(pix1, 1); |
2771 | 0 | else /* L_SCAN_HORIZONTAL */ |
2772 | 0 | pix2 = pixClone(pix1); |
2773 | 0 | pixGetDimensions(pix2, &w, &h, NULL); |
2774 | |
|
2775 | 0 | pixadb = (debug) ? pixaCreate(0) : NULL; |
2776 | 0 | pixdb1 = NULL; |
2777 | 0 | if (pixadb) { |
2778 | 0 | lept_mkdir("lept/rect"); |
2779 | 0 | pixaAddPix(pixadb, pix1, L_CLONE); |
2780 | 0 | pixdb1 = pixConvertTo32(pix2); |
2781 | 0 | } |
2782 | 0 | pixDestroy(&pix1); |
2783 | | |
2784 | | /* Scanning down, find the first scanline with a long enough run. |
2785 | | * That run goes from (xfirst, yfirst) to (xlast, yfirst). */ |
2786 | 0 | found = FALSE; |
2787 | 0 | for (i = 0; i < h; i++) { |
2788 | 0 | pixFindMaxHorizontalRunOnLine(pix2, i, &xstart, &length); |
2789 | 0 | if (length >= (l_int32)(fract * w + 0.5)) { |
2790 | 0 | yfirst = i; |
2791 | 0 | xfirst = xstart; |
2792 | 0 | xlast = xfirst + length - 1; |
2793 | 0 | found = TRUE; |
2794 | 0 | break; |
2795 | 0 | } |
2796 | 0 | } |
2797 | 0 | if (!found) { |
2798 | 0 | L_WARNING("no run of sufficient size was found\n", __func__); |
2799 | 0 | pixDestroy(&pix2); |
2800 | 0 | pixDestroy(&pixdb1); |
2801 | 0 | pixaDestroy(&pixadb); |
2802 | 0 | return NULL; |
2803 | 0 | } |
2804 | | |
2805 | | /* Continue down until the condition fails */ |
2806 | 0 | w1 = xlast - xfirst + 1; |
2807 | 0 | h1 = h - yfirst; /* init */ |
2808 | 0 | ylast = h - 1; /* init */ |
2809 | 0 | for (i = yfirst + 1; i < h; i++) { |
2810 | 0 | pixFindMaxHorizontalRunOnLine(pix2, i, &xstart, &length); |
2811 | 0 | if (xstart > xfirst || (xstart + length - 1 < xlast) || |
2812 | 0 | i == h - 1) { |
2813 | 0 | ylast = i - 1; |
2814 | 0 | h1 = ylast - yfirst + 1; |
2815 | 0 | break; |
2816 | 0 | } |
2817 | 0 | } |
2818 | 0 | box1 = boxCreate(xfirst, yfirst, w1, h1); |
2819 | | |
2820 | | /* Scanning up, find the first scanline with a long enough run. |
2821 | | * That run goes from (xfirst, ylast) to (xlast, ylast). */ |
2822 | 0 | for (i = h - 1; i >= 0; i--) { |
2823 | 0 | pixFindMaxHorizontalRunOnLine(pix2, i, &xstart, &length); |
2824 | 0 | if (length >= (l_int32)(fract * w + 0.5)) { |
2825 | 0 | ylast = i; |
2826 | 0 | xfirst = xstart; |
2827 | 0 | xlast = xfirst + length - 1; |
2828 | 0 | break; |
2829 | 0 | } |
2830 | 0 | } |
2831 | | |
2832 | | /* Continue up until the condition fails */ |
2833 | 0 | w2 = xlast - xfirst + 1; |
2834 | 0 | h2 = ylast + 1; /* initialize */ |
2835 | 0 | for (i = ylast - 1; i >= 0; i--) { |
2836 | 0 | pixFindMaxHorizontalRunOnLine(pix2, i, &xstart, &length); |
2837 | 0 | if (xstart > xfirst || (xstart + length - 1 < xlast) || |
2838 | 0 | i == 0) { |
2839 | 0 | yfirst = i + 1; |
2840 | 0 | h2 = ylast - yfirst + 1; |
2841 | 0 | break; |
2842 | 0 | } |
2843 | 0 | } |
2844 | 0 | box2 = boxCreate(xfirst, yfirst, w2, h2); |
2845 | 0 | pixDestroy(&pix2); |
2846 | |
|
2847 | 0 | if (pixadb) { |
2848 | 0 | pixRenderBoxArb(pixdb1, box1, 2, 255, 0, 0); |
2849 | 0 | pixRenderBoxArb(pixdb1, box2, 2, 0, 255, 0); |
2850 | 0 | pixaAddPix(pixadb, pixdb1, L_INSERT); |
2851 | 0 | } |
2852 | | |
2853 | | /* Select the final result from the two boxes */ |
2854 | 0 | if (select == L_GEOMETRIC_UNION) |
2855 | 0 | box3 = boxBoundingRegion(box1, box2); |
2856 | 0 | else if (select == L_GEOMETRIC_INTERSECTION) |
2857 | 0 | box3 = boxOverlapRegion(box1, box2); |
2858 | 0 | else if (select == L_LARGEST_AREA) |
2859 | 0 | box3 = (w1 * h1 >= w2 * h2) ? boxCopy(box1) : boxCopy(box2); |
2860 | 0 | else /* select == L_SMALLEST_AREA) */ |
2861 | 0 | box3 = (w1 * h1 <= w2 * h2) ? boxCopy(box1) : boxCopy(box2); |
2862 | 0 | boxDestroy(&box1); |
2863 | 0 | boxDestroy(&box2); |
2864 | | |
2865 | | /* Rotate the box 90 degrees ccw if necessary */ |
2866 | 0 | box4 = NULL; |
2867 | 0 | if (box3) { |
2868 | 0 | if (dir == L_SCAN_VERTICAL) |
2869 | 0 | box4 = boxRotateOrth(box3, w, h, 3); |
2870 | 0 | else |
2871 | 0 | box4 = boxCopy(box3); |
2872 | 0 | } |
2873 | | |
2874 | | /* Transform back to global coordinates if %boxs exists */ |
2875 | 0 | box5 = (box4) ? boxTransform(box4, x, y, 1.0, 1.0) : NULL; |
2876 | 0 | boxDestroy(&box3); |
2877 | 0 | boxDestroy(&box4); |
2878 | | |
2879 | | /* Debug output */ |
2880 | 0 | if (pixadb) { |
2881 | 0 | pixdb1 = pixConvertTo8(pixs, 0); |
2882 | 0 | pixAddConstantGray(pixdb1, 190); |
2883 | 0 | pixdb2 = pixConvertTo32(pixdb1); |
2884 | 0 | if (box5) pixRenderBoxArb(pixdb2, box5, 4, 0, 0, 255); |
2885 | 0 | pixaAddPix(pixadb, pixdb2, L_INSERT); |
2886 | 0 | res = pixGetXRes(pixs); |
2887 | 0 | L_INFO("Writing debug files to /tmp/lept/rect/\n", __func__); |
2888 | 0 | pixaConvertToPdf(pixadb, res, 1.0, L_DEFAULT_ENCODE, 75, NULL, |
2889 | 0 | "/tmp/lept/rect/fitrect.pdf"); |
2890 | 0 | pix1 = pixaDisplayTiledAndScaled(pixadb, 32, 800, 1, 0, 40, 2); |
2891 | 0 | pixWrite("/tmp/lept/rect/fitrect.png", pix1, IFF_PNG); |
2892 | 0 | pixDestroy(&pix1); |
2893 | 0 | pixDestroy(&pixdb1); |
2894 | 0 | pixaDestroy(&pixadb); |
2895 | 0 | } |
2896 | |
|
2897 | 0 | return box5; |
2898 | 0 | } |
2899 | | |
2900 | | /*------------------------------------------------------------------* |
2901 | | * Automatic photoinvert for OCR * |
2902 | | *------------------------------------------------------------------*/ |
2903 | | /*! |
2904 | | * \brief pixAutoPhotoinvert() |
2905 | | * |
2906 | | * \param[in] pixs any depth, colormap ok |
2907 | | * \param[in] thresh binarization threshold; use 0 for default |
2908 | | * \param[out] ppixm [optional] image regions to be inverted |
2909 | | * \param[out] pixadb [optional] debug; input NULL to skip |
2910 | | * \return pixd 1 bpp image to be sent to OCR, or NULL on error |
2911 | | * |
2912 | | * <pre> |
2913 | | * Notes: |
2914 | | * (1) A 1 bpp image is returned, where pixels in image regions are |
2915 | | * photo-inverted. |
2916 | | * (2) If there is light text with a dark background, this will |
2917 | | * identify the region and photoinvert the pixels there if |
2918 | | * there are at least 60% fg pixels in the region. |
2919 | | * (3) For debug output, input a (typically empty) %pixadb. |
2920 | | * </pre> |
2921 | | */ |
2922 | | PIX * |
2923 | | pixAutoPhotoinvert(PIX *pixs, |
2924 | | l_int32 thresh, |
2925 | | PIX **ppixm, |
2926 | | PIXA *pixadb) |
2927 | 0 | { |
2928 | 0 | l_int32 i, n, empty, x, y, w, h; |
2929 | 0 | l_float32 fgfract; |
2930 | 0 | BOX *box1; |
2931 | 0 | BOXA *boxa1; |
2932 | 0 | PIX *pix1, *pix2, *pix3, *pix4, *pix5; |
2933 | |
|
2934 | 0 | if (ppixm) *ppixm = NULL; |
2935 | 0 | if (!pixs) |
2936 | 0 | return (PIX *)ERROR_PTR("pixs not defined", __func__, NULL); |
2937 | 0 | if (thresh == 0) thresh = 128; |
2938 | |
|
2939 | 0 | if ((pix1 = pixConvertTo1(pixs, thresh)) == NULL) |
2940 | 0 | return (PIX *)ERROR_PTR("pix1 not made", __func__, NULL); |
2941 | 0 | if (pixadb) pixaAddPix(pixadb, pix1, L_COPY); |
2942 | | |
2943 | | /* Identify regions for photo-inversion: |
2944 | | * (1) Start with the halftone mask. |
2945 | | * (2) Eliminate ordinary text and halftones in the mask. |
2946 | | * (3) Some regions of inverted text may have been removed in |
2947 | | * steps (1) and (2). Conditionally fill holes in the mask, |
2948 | | * but do not fill out to the bounding rect. */ |
2949 | 0 | pix2 = pixGenerateHalftoneMask(pix1, NULL, NULL, pixadb); |
2950 | 0 | pix3 = pixMorphSequence(pix2, "o15.15 + c25.25", 0); /* remove noise */ |
2951 | 0 | pix4 = pixFillHolesToBoundingRect(pix3, 1, 0.5, 1.0); |
2952 | 0 | if (pixadb) { |
2953 | 0 | pixaAddPix(pixadb, pix2, L_CLONE); |
2954 | 0 | pixaAddPix(pixadb, pix3, L_CLONE); |
2955 | 0 | pixaAddPix(pixadb, pix4, L_COPY); |
2956 | 0 | } |
2957 | 0 | pixDestroy(&pix2); |
2958 | 0 | pixDestroy(&pix3); |
2959 | 0 | pixZero(pix4, &empty); |
2960 | 0 | if (empty) { |
2961 | 0 | pixDestroy(&pix4); |
2962 | 0 | return pix1; |
2963 | 0 | } |
2964 | | |
2965 | | /* Examine each component and validate the inversion. |
2966 | | * Require at least 60% of pixels under each component to be FG. */ |
2967 | 0 | boxa1 = pixConnCompBB(pix4, 8); |
2968 | 0 | n = boxaGetCount(boxa1); |
2969 | 0 | for (i = 0; i < n; i++) { |
2970 | 0 | box1 = boxaGetBox(boxa1, i, L_COPY); |
2971 | 0 | pix5 = pixClipRectangle(pix1, box1, NULL); |
2972 | 0 | pixForegroundFraction(pix5, &fgfract); |
2973 | 0 | if (pixadb) lept_stderr("fg fraction: %5.3f\n", fgfract); |
2974 | 0 | boxGetGeometry(box1, &x, &y, &w, &h); |
2975 | 0 | if (fgfract < 0.6) /* erase from the mask */ |
2976 | 0 | pixRasterop(pix4, x, y, w, h, PIX_CLR, NULL, 0, 0); |
2977 | 0 | pixDestroy(&pix5); |
2978 | 0 | boxDestroy(&box1); |
2979 | 0 | } |
2980 | 0 | boxaDestroy(&boxa1); |
2981 | 0 | pixZero(pix4, &empty); |
2982 | 0 | if (empty) { |
2983 | 0 | pixDestroy(&pix4); |
2984 | 0 | return pix1; |
2985 | 0 | } |
2986 | | |
2987 | | /* Combine pixels of the photo-inverted pix with the binarized input */ |
2988 | 0 | pix5 = pixInvert(NULL, pix1); |
2989 | 0 | pixCombineMasked(pix1, pix5, pix4); |
2990 | |
|
2991 | 0 | if (pixadb) { |
2992 | 0 | pixaAddPix(pixadb, pix5, L_CLONE); |
2993 | 0 | pixaAddPix(pixadb, pix1, L_COPY); |
2994 | 0 | } |
2995 | 0 | pixDestroy(&pix5); |
2996 | 0 | if (ppixm) |
2997 | 0 | *ppixm = pix4; |
2998 | 0 | else |
2999 | 0 | pixDestroy(&pix4); |
3000 | 0 | return pix1; |
3001 | 0 | } |