/src/leptonica/src/pageseg.c
Line | Count | Source (jump to first uncovered line) |
1 | | /*====================================================================* |
2 | | - Copyright (C) 2001 Leptonica. All rights reserved. |
3 | | - |
4 | | - Redistribution and use in source and binary forms, with or without |
5 | | - modification, are permitted provided that the following conditions |
6 | | - are met: |
7 | | - 1. Redistributions of source code must retain the above copyright |
8 | | - notice, this list of conditions and the following disclaimer. |
9 | | - 2. Redistributions in binary form must reproduce the above |
10 | | - copyright notice, this list of conditions and the following |
11 | | - disclaimer in the documentation and/or other materials |
12 | | - provided with the distribution. |
13 | | - |
14 | | - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
15 | | - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
16 | | - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
17 | | - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY |
18 | | - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
19 | | - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
20 | | - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
21 | | - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
22 | | - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
23 | | - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
24 | | - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
25 | | *====================================================================*/ |
26 | | |
27 | | /*! |
28 | | * \file pageseg.c |
29 | | * <pre> |
30 | | * |
31 | | * Top level page segmentation |
32 | | * l_int32 pixGetRegionsBinary() |
33 | | * |
34 | | * Halftone region extraction |
35 | | * PIX *pixGenHalftoneMask() **Deprecated wrapper** |
36 | | * PIX *pixGenerateHalftoneMask() |
37 | | |
38 | | * |
39 | | * Textline extraction |
40 | | * PIX *pixGenTextlineMask() |
41 | | * |
42 | | * Textblock extraction |
43 | | * PIX *pixGenTextblockMask() |
44 | | * |
45 | | * Location and extraction of page foreground; cleaning pages |
46 | | * PIX *pixCropImage() |
47 | | * static l_int32 pixMaxCompAfterVClosing() |
48 | | * static PIX *pixRescaleForCropping() |
49 | | * PIX *pixCleanImage() |
50 | | * BOX *pixFindPageForeground() |
51 | | * |
52 | | * Extraction of characters from image with only text |
53 | | * l_int32 pixSplitIntoCharacters() |
54 | | * BOXA *pixSplitComponentWithProfile() |
55 | | * |
56 | | * Extraction of lines of text |
57 | | * PIXA *pixExtractTextlines() |
58 | | * PIXA *pixExtractRawTextlines() |
59 | | * |
60 | | * How many text columns |
61 | | * l_int32 pixCountTextColumns() |
62 | | * |
63 | | * Decision: text vs photo |
64 | | * l_int32 pixDecideIfText() |
65 | | * l_int32 pixFindThreshFgExtent() |
66 | | * |
67 | | * Decision: table vs text |
68 | | * l_int32 pixDecideIfTable() |
69 | | * Pix *pixPrepare1bpp() |
70 | | * |
71 | | * Estimate the grayscale background value |
72 | | * l_int32 pixEstimateBackground() |
73 | | * |
74 | | * Largest white or black rectangles in an image |
75 | | * l_int32 pixFindLargeRectangles() |
76 | | * l_int32 pixFindLargestRectangle() |
77 | | * |
78 | | * Generate rectangle inside connected component |
79 | | * BOX *pixFindRectangleInCC() |
80 | | * |
81 | | * Automatic photoinvert for OCR |
82 | | * PIX *pixAutoPhotoinvert() |
83 | | * </pre> |
84 | | */ |
85 | | |
86 | | #ifdef HAVE_CONFIG_H |
87 | | #include <config_auto.h> |
88 | | #endif /* HAVE_CONFIG_H */ |
89 | | |
90 | | #include <math.h> |
91 | | #include "allheaders.h" |
92 | | #include "pix_internal.h" |
93 | | |
94 | | /* These functions are not intended to work on very low-res images */ |
95 | | static const l_int32 MinWidth = 100; |
96 | | static const l_int32 MinHeight = 100; |
97 | | |
98 | | static PIX *pixRescaleForCropping(PIX *pixs, l_int32 w, l_int32 h, |
99 | | l_int32 lr_border, l_int32 tb_border, |
100 | | l_float32 maxwiden, PIX **ppixsc); |
101 | | |
102 | | /*------------------------------------------------------------------* |
103 | | * Top level page segmentation * |
104 | | *------------------------------------------------------------------*/ |
105 | | /*! |
106 | | * \brief pixGetRegionsBinary() |
107 | | * |
108 | | * \param[in] pixs 1 bpp, assumed to be 300 to 400 ppi |
109 | | * \param[out] ppixhm [optional] halftone mask |
110 | | * \param[out] ppixtm [optional] textline mask |
111 | | * \param[out] ppixtb [optional] textblock mask |
112 | | * \param[in] pixadb input for collecting debug pix; use NULL to skip |
113 | | * \return 0 if OK, 1 on error |
114 | | * |
115 | | * <pre> |
116 | | * Notes: |
117 | | * (1) It is best to deskew the image before segmenting. |
118 | | * (2) Passing in %pixadb enables debug output. |
119 | | * </pre> |
120 | | */ |
121 | | l_ok |
122 | | pixGetRegionsBinary(PIX *pixs, |
123 | | PIX **ppixhm, |
124 | | PIX **ppixtm, |
125 | | PIX **ppixtb, |
126 | | PIXA *pixadb) |
127 | 3.19k | { |
128 | 3.19k | l_int32 w, h, htfound, tlfound; |
129 | 3.19k | PIX *pixr, *pix1, *pix2; |
130 | 3.19k | PIX *pixtext; /* text pixels only */ |
131 | 3.19k | PIX *pixhm2; /* halftone mask; 2x reduction */ |
132 | 3.19k | PIX *pixhm; /* halftone mask; */ |
133 | 3.19k | PIX *pixtm2; /* textline mask; 2x reduction */ |
134 | 3.19k | PIX *pixtm; /* textline mask */ |
135 | 3.19k | PIX *pixvws; /* vertical white space mask */ |
136 | 3.19k | PIX *pixtb2; /* textblock mask; 2x reduction */ |
137 | 3.19k | PIX *pixtbf2; /* textblock mask; 2x reduction; small comps filtered */ |
138 | 3.19k | PIX *pixtb; /* textblock mask */ |
139 | | |
140 | 3.19k | if (ppixhm) *ppixhm = NULL; |
141 | 3.19k | if (ppixtm) *ppixtm = NULL; |
142 | 3.19k | if (ppixtb) *ppixtb = NULL; |
143 | 3.19k | if (!pixs || pixGetDepth(pixs) != 1) |
144 | 371 | return ERROR_INT("pixs undefined or not 1 bpp", __func__, 1); |
145 | 2.82k | pixGetDimensions(pixs, &w, &h, NULL); |
146 | 2.82k | if (w < MinWidth || h < MinHeight) { |
147 | 1.79k | L_ERROR("pix too small: w = %d, h = %d\n", __func__, w, h); |
148 | 1.79k | return 1; |
149 | 1.79k | } |
150 | | |
151 | | /* 2x reduce, to 150 -200 ppi */ |
152 | 1.02k | pixr = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0); |
153 | 1.02k | if (pixadb) pixaAddPix(pixadb, pixr, L_COPY); |
154 | | |
155 | | /* Get the halftone mask */ |
156 | 1.02k | pixhm2 = pixGenerateHalftoneMask(pixr, &pixtext, &htfound, pixadb); |
157 | | |
158 | | /* Get the textline mask from the text pixels */ |
159 | 1.02k | pixtm2 = pixGenTextlineMask(pixtext, &pixvws, &tlfound, pixadb); |
160 | | |
161 | | /* Get the textblock mask from the textline mask */ |
162 | 1.02k | pixtb2 = pixGenTextblockMask(pixtm2, pixvws, pixadb); |
163 | 1.02k | pixDestroy(&pixr); |
164 | 1.02k | pixDestroy(&pixtext); |
165 | 1.02k | pixDestroy(&pixvws); |
166 | | |
167 | | /* Remove small components from the mask, where a small |
168 | | * component is defined as one with both width and height < 60 */ |
169 | 1.02k | pixtbf2 = NULL; |
170 | 1.02k | if (pixtb2) { |
171 | 504 | pixtbf2 = pixSelectBySize(pixtb2, 60, 60, 4, L_SELECT_IF_EITHER, |
172 | 504 | L_SELECT_IF_GTE, NULL); |
173 | 504 | pixDestroy(&pixtb2); |
174 | 504 | if (pixadb) pixaAddPix(pixadb, pixtbf2, L_COPY); |
175 | 504 | } |
176 | | |
177 | | /* Expand all masks to full resolution, and do filling or |
178 | | * small dilations for better coverage. */ |
179 | 1.02k | pixhm = pixExpandReplicate(pixhm2, 2); |
180 | 1.02k | pix1 = pixSeedfillBinary(NULL, pixhm, pixs, 8); |
181 | 1.02k | pixOr(pixhm, pixhm, pix1); |
182 | 1.02k | pixDestroy(&pixhm2); |
183 | 1.02k | pixDestroy(&pix1); |
184 | 1.02k | if (pixadb) pixaAddPix(pixadb, pixhm, L_COPY); |
185 | | |
186 | 1.02k | pix1 = pixExpandReplicate(pixtm2, 2); |
187 | 1.02k | pixtm = pixDilateBrick(NULL, pix1, 3, 3); |
188 | 1.02k | pixDestroy(&pixtm2); |
189 | 1.02k | pixDestroy(&pix1); |
190 | 1.02k | if (pixadb) pixaAddPix(pixadb, pixtm, L_COPY); |
191 | | |
192 | 1.02k | if (pixtbf2) { |
193 | 504 | pix1 = pixExpandReplicate(pixtbf2, 2); |
194 | 504 | pixtb = pixDilateBrick(NULL, pix1, 3, 3); |
195 | 504 | pixDestroy(&pixtbf2); |
196 | 504 | pixDestroy(&pix1); |
197 | 504 | if (pixadb) pixaAddPix(pixadb, pixtb, L_COPY); |
198 | 521 | } else { |
199 | 521 | pixtb = pixCreateTemplate(pixs); /* empty mask */ |
200 | 521 | } |
201 | | |
202 | | /* Debug: identify objects that are neither text nor halftone image */ |
203 | 1.02k | if (pixadb) { |
204 | 1.02k | pix1 = pixSubtract(NULL, pixs, pixtm); /* remove text pixels */ |
205 | 1.02k | pix2 = pixSubtract(NULL, pix1, pixhm); /* remove halftone pixels */ |
206 | 1.02k | pixaAddPix(pixadb, pix2, L_INSERT); |
207 | 1.02k | pixDestroy(&pix1); |
208 | 1.02k | } |
209 | | |
210 | | /* Debug: display textline components with random colors */ |
211 | 1.02k | if (pixadb) { |
212 | 1.02k | l_int32 w, h; |
213 | 1.02k | BOXA *boxa; |
214 | 1.02k | PIXA *pixa; |
215 | 1.02k | boxa = pixConnComp(pixtm, &pixa, 8); |
216 | 1.02k | pixGetDimensions(pixtm, &w, &h, NULL); |
217 | 1.02k | pix1 = pixaDisplayRandomCmap(pixa, w, h); |
218 | 1.02k | pixcmapResetColor(pixGetColormap(pix1), 0, 255, 255, 255); |
219 | 1.02k | pixaAddPix(pixadb, pix1, L_INSERT); |
220 | 1.02k | pixaDestroy(&pixa); |
221 | 1.02k | boxaDestroy(&boxa); |
222 | 1.02k | } |
223 | | |
224 | | /* Debug: identify the outlines of each textblock */ |
225 | 1.02k | if (pixadb) { |
226 | 1.02k | PIXCMAP *cmap; |
227 | 1.02k | PTAA *ptaa; |
228 | 1.02k | ptaa = pixGetOuterBordersPtaa(pixtb); |
229 | 1.02k | lept_mkdir("lept/pageseg"); |
230 | 1.02k | ptaaWriteDebug("/tmp/lept/pageseg/tb_outlines.ptaa", ptaa, 1); |
231 | 1.02k | pix1 = pixRenderRandomCmapPtaa(pixtb, ptaa, 1, 16, 1); |
232 | 1.02k | cmap = pixGetColormap(pix1); |
233 | 1.02k | pixcmapResetColor(cmap, 0, 130, 130, 130); |
234 | 1.02k | pixaAddPix(pixadb, pix1, L_INSERT); |
235 | 1.02k | ptaaDestroy(&ptaa); |
236 | 1.02k | } |
237 | | |
238 | | /* Debug: get b.b. for all mask components */ |
239 | 1.02k | if (pixadb) { |
240 | 1.02k | BOXA *bahm, *batm, *batb; |
241 | 1.02k | bahm = pixConnComp(pixhm, NULL, 4); |
242 | 1.02k | batm = pixConnComp(pixtm, NULL, 4); |
243 | 1.02k | batb = pixConnComp(pixtb, NULL, 4); |
244 | 1.02k | boxaWriteDebug("/tmp/lept/pageseg/htmask.boxa", bahm); |
245 | 1.02k | boxaWriteDebug("/tmp/lept/pageseg/textmask.boxa", batm); |
246 | 1.02k | boxaWriteDebug("/tmp/lept/pageseg/textblock.boxa", batb); |
247 | 1.02k | boxaDestroy(&bahm); |
248 | 1.02k | boxaDestroy(&batm); |
249 | 1.02k | boxaDestroy(&batb); |
250 | 1.02k | } |
251 | 1.02k | if (pixadb) { |
252 | 1.02k | pixaConvertToPdf(pixadb, 0, 1.0, 0, 0, "Debug page segmentation", |
253 | 1.02k | "/tmp/lept/pageseg/debug.pdf"); |
254 | 1.02k | L_INFO("Writing debug pdf to /tmp/lept/pageseg/debug.pdf\n", __func__); |
255 | 1.02k | } |
256 | | |
257 | 1.02k | if (ppixhm) |
258 | 1.02k | *ppixhm = pixhm; |
259 | 0 | else |
260 | 0 | pixDestroy(&pixhm); |
261 | 1.02k | if (ppixtm) |
262 | 1.02k | *ppixtm = pixtm; |
263 | 0 | else |
264 | 0 | pixDestroy(&pixtm); |
265 | 1.02k | if (ppixtb) |
266 | 1.02k | *ppixtb = pixtb; |
267 | 0 | else |
268 | 0 | pixDestroy(&pixtb); |
269 | | |
270 | 1.02k | return 0; |
271 | 2.82k | } |
272 | | |
273 | | |
274 | | /*------------------------------------------------------------------* |
275 | | * Halftone region extraction * |
276 | | *------------------------------------------------------------------*/ |
277 | | /*! |
278 | | * \brief pixGenHalftoneMask() |
279 | | * |
280 | | * <pre> |
281 | | * Deprecated: |
282 | | * This wrapper avoids an ABI change with tesseract 3.0.4. |
283 | | * It should be removed when we no longer need to support 3.0.4. |
284 | | * The debug parameter is ignored (assumed 0). |
285 | | * </pre> |
286 | | */ |
287 | | PIX * |
288 | | pixGenHalftoneMask(PIX *pixs, |
289 | | PIX **ppixtext, |
290 | | l_int32 *phtfound, |
291 | | l_int32 debug) |
292 | 0 | { |
293 | 0 | return pixGenerateHalftoneMask(pixs, ppixtext, phtfound, NULL); |
294 | 0 | } |
295 | | |
296 | | |
297 | | /*! |
298 | | * \brief pixGenerateHalftoneMask() |
299 | | * |
300 | | * \param[in] pixs 1 bpp, assumed to be 150 to 200 ppi |
301 | | * \param[out] ppixtext [optional] text part of pixs |
302 | | * \param[out] phtfound [optional] 1 if the mask is not empty |
303 | | * \param[in] pixadb input for collecting debug pix; use NULL to skip |
304 | | * \return pixd halftone mask, or NULL on error |
305 | | * |
306 | | * <pre> |
307 | | * Notes: |
308 | | * (1) This is not intended to work on small thumbnails. The |
309 | | * dimensions of pixs must be at least MinWidth x MinHeight. |
310 | | * </pre> |
311 | | */ |
312 | | PIX * |
313 | | pixGenerateHalftoneMask(PIX *pixs, |
314 | | PIX **ppixtext, |
315 | | l_int32 *phtfound, |
316 | | PIXA *pixadb) |
317 | 4.21k | { |
318 | 4.21k | l_int32 w, h, empty; |
319 | 4.21k | PIX *pix1, *pix2, *pixhs, *pixhm, *pixd; |
320 | | |
321 | 4.21k | if (ppixtext) *ppixtext = NULL; |
322 | 4.21k | if (phtfound) *phtfound = 0; |
323 | 4.21k | if (!pixs || pixGetDepth(pixs) != 1) |
324 | 295 | return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", __func__, NULL); |
325 | 3.92k | pixGetDimensions(pixs, &w, &h, NULL); |
326 | 3.92k | if (w < MinWidth || h < MinHeight) { |
327 | 2.81k | L_ERROR("pix too small: w = %d, h = %d\n", __func__, w, h); |
328 | 2.81k | return NULL; |
329 | 2.81k | } |
330 | | |
331 | | /* Compute seed for halftone parts at 8x reduction */ |
332 | 1.11k | pix1 = pixReduceRankBinaryCascade(pixs, 4, 4, 0, 0); |
333 | 1.11k | pix2 = pixOpenBrick(NULL, pix1, 5, 5); |
334 | 1.11k | pixhs = pixExpandReplicate(pix2, 4); /* back to 2x reduction */ |
335 | 1.11k | pixDestroy(&pix1); |
336 | 1.11k | pixDestroy(&pix2); |
337 | 1.11k | if (pixadb) pixaAddPix(pixadb, pixhs, L_COPY); |
338 | | |
339 | | /* Compute mask for connected regions */ |
340 | 1.11k | pixhm = pixCloseSafeBrick(NULL, pixs, 4, 4); |
341 | 1.11k | if (pixadb) pixaAddPix(pixadb, pixhm, L_COPY); |
342 | | |
343 | | /* Fill seed into mask to get halftone mask */ |
344 | 1.11k | pixd = pixSeedfillBinary(NULL, pixhs, pixhm, 4); |
345 | 1.11k | if (pixadb) pixaAddPix(pixadb, pixd, L_COPY); |
346 | | |
347 | | #if 0 |
348 | | pixOpenBrick(pixd, pixd, 9, 9); |
349 | | #endif |
350 | | |
351 | | /* Check if mask is empty */ |
352 | 1.11k | pixZero(pixd, &empty); |
353 | 1.11k | if (phtfound && !empty) |
354 | 127 | *phtfound = 1; |
355 | | |
356 | | /* Optionally, get all pixels that are not under the halftone mask */ |
357 | 1.11k | if (ppixtext) { |
358 | 551 | if (empty) |
359 | 470 | *ppixtext = pixCopy(NULL, pixs); |
360 | 81 | else |
361 | 81 | *ppixtext = pixSubtract(NULL, pixs, pixd); |
362 | 551 | if (pixadb) pixaAddPix(pixadb, *ppixtext, L_COPY); |
363 | 551 | } |
364 | | |
365 | 1.11k | pixDestroy(&pixhs); |
366 | 1.11k | pixDestroy(&pixhm); |
367 | 1.11k | return pixd; |
368 | 3.92k | } |
369 | | |
370 | | |
371 | | /*------------------------------------------------------------------* |
372 | | * Textline extraction * |
373 | | *------------------------------------------------------------------*/ |
374 | | /*! |
375 | | * \brief pixGenTextlineMask() |
376 | | * |
377 | | * \param[in] pixs 1 bpp, assumed to be 150 to 200 ppi |
378 | | * \param[out] ppixvws vertical whitespace mask |
379 | | * \param[out] ptlfound [optional] 1 if the mask is not empty |
380 | | * \param[in] pixadb input for collecting debug pix; use NULL to skip |
381 | | * \return pixd textline mask, or NULL on error |
382 | | * |
383 | | * <pre> |
384 | | * Notes: |
385 | | * (1) The input pixs should be deskewed. |
386 | | * (2) pixs should have no halftone pixels. |
387 | | * (3) This is not intended to work on small thumbnails. The |
388 | | * dimensions of pixs must be at least MinWidth x MinHeight. |
389 | | * (4) Both the input image and the returned textline mask |
390 | | * are at the same resolution. |
391 | | * </pre> |
392 | | */ |
393 | | PIX * |
394 | | pixGenTextlineMask(PIX *pixs, |
395 | | PIX **ppixvws, |
396 | | l_int32 *ptlfound, |
397 | | PIXA *pixadb) |
398 | 1.02k | { |
399 | 1.02k | l_int32 w, h, empty; |
400 | 1.02k | PIX *pix1, *pix2, *pixvws, *pixd; |
401 | | |
402 | 1.02k | if (ptlfound) *ptlfound = 0; |
403 | 1.02k | if (!ppixvws) |
404 | 0 | return (PIX *)ERROR_PTR("&pixvws not defined", __func__, NULL); |
405 | 1.02k | *ppixvws = NULL; |
406 | 1.02k | if (!pixs || pixGetDepth(pixs) != 1) |
407 | 474 | return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", __func__, NULL); |
408 | 551 | pixGetDimensions(pixs, &w, &h, NULL); |
409 | 551 | if (w < MinWidth || h < MinHeight) { |
410 | 0 | L_ERROR("pix too small: w = %d, h = %d\n", __func__, w, h); |
411 | 0 | return NULL; |
412 | 0 | } |
413 | | |
414 | | /* First we need a vertical whitespace mask. Invert the image. */ |
415 | 551 | pix1 = pixInvert(NULL, pixs); |
416 | | |
417 | | /* The whitespace mask will break textlines where there |
418 | | * is a large amount of white space below or above. |
419 | | * This can be prevented by identifying regions of the |
420 | | * inverted image that have large horizontal extent (bigger than |
421 | | * the separation between columns) and significant |
422 | | * vertical extent (bigger than the separation between |
423 | | * textlines), and subtracting this from the bg. */ |
424 | 551 | pix2 = pixMorphCompSequence(pix1, "o80.60", 0); |
425 | 551 | pixSubtract(pix1, pix1, pix2); |
426 | 551 | if (pixadb) pixaAddPix(pixadb, pix1, L_COPY); |
427 | 551 | pixDestroy(&pix2); |
428 | | |
429 | | /* Identify vertical whitespace by opening the remaining bg. |
430 | | * o5.1 removes thin vertical bg lines and o1.200 extracts |
431 | | * long vertical bg lines. */ |
432 | 551 | pixvws = pixMorphCompSequence(pix1, "o5.1 + o1.200", 0); |
433 | 551 | *ppixvws = pixvws; |
434 | 551 | if (pixadb) pixaAddPix(pixadb, pixvws, L_COPY); |
435 | 551 | pixDestroy(&pix1); |
436 | | |
437 | | /* Three steps to getting text line mask: |
438 | | * (1) close the characters and words in the textlines |
439 | | * (2) open the vertical whitespace corridors back up |
440 | | * (3) small opening to remove noise */ |
441 | 551 | pix1 = pixMorphSequence(pixs, "c30.1", 0); |
442 | 551 | if (pixadb) pixaAddPix(pixadb, pix1, L_COPY); |
443 | 551 | pixd = pixSubtract(NULL, pix1, pixvws); |
444 | 551 | pixOpenBrick(pixd, pixd, 3, 3); |
445 | 551 | if (pixadb) pixaAddPix(pixadb, pixd, L_COPY); |
446 | 551 | pixDestroy(&pix1); |
447 | | |
448 | | /* Check if text line mask is empty */ |
449 | 551 | if (ptlfound) { |
450 | 551 | pixZero(pixd, &empty); |
451 | 551 | if (!empty) |
452 | 522 | *ptlfound = 1; |
453 | 551 | } |
454 | | |
455 | 551 | return pixd; |
456 | 551 | } |
457 | | |
458 | | |
459 | | /*------------------------------------------------------------------* |
460 | | * Textblock extraction * |
461 | | *------------------------------------------------------------------*/ |
462 | | /*! |
463 | | * \brief pixGenTextblockMask() |
464 | | * |
465 | | * \param[in] pixs 1 bpp, textline mask, assumed to be 150 to 200 ppi |
466 | | * \param[in] pixvws vertical white space mask |
467 | | * \param[in] pixadb input for collecting debug pix; use NULL to skip |
468 | | * \return pixd textblock mask, or NULL if empty or on error |
469 | | * |
470 | | * <pre> |
471 | | * Notes: |
472 | | * (1) Both the input masks (textline and vertical white space) and |
473 | | * the returned textblock mask are at the same resolution. |
474 | | * (2) This is not intended to work on small thumbnails. The |
475 | | * dimensions of pixs must be at least MinWidth x MinHeight. |
476 | | * (3) The result is somewhat noisy, in that small "blocks" of |
477 | | * text may be included. These can be removed by post-processing, |
478 | | * using, e.g., |
479 | | * pixSelectBySize(pix, 60, 60, 4, L_SELECT_IF_EITHER, |
480 | | * L_SELECT_IF_GTE, NULL); |
481 | | * </pre> |
482 | | */ |
483 | | PIX * |
484 | | pixGenTextblockMask(PIX *pixs, |
485 | | PIX *pixvws, |
486 | | PIXA *pixadb) |
487 | 1.02k | { |
488 | 1.02k | l_int32 w, h, empty; |
489 | 1.02k | PIX *pix1, *pix2, *pix3, *pixd; |
490 | | |
491 | 1.02k | if (!pixs || pixGetDepth(pixs) != 1) |
492 | 474 | return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", __func__, NULL); |
493 | 551 | pixGetDimensions(pixs, &w, &h, NULL); |
494 | 551 | if (w < MinWidth || h < MinHeight) { |
495 | 0 | L_ERROR("pix too small: w = %d, h = %d\n", __func__, w, h); |
496 | 0 | return NULL; |
497 | 0 | } |
498 | 551 | if (!pixvws) |
499 | 0 | return (PIX *)ERROR_PTR("pixvws not defined", __func__, NULL); |
500 | | |
501 | | /* Join pixels vertically to make a textblock mask */ |
502 | 551 | pix1 = pixMorphSequence(pixs, "c1.10 + o4.1", 0); |
503 | 551 | pixZero(pix1, &empty); |
504 | 551 | if (empty) { |
505 | 47 | pixDestroy(&pix1); |
506 | 47 | L_INFO("no fg pixels in textblock mask\n", __func__); |
507 | 47 | return NULL; |
508 | 47 | } |
509 | 504 | if (pixadb) pixaAddPix(pixadb, pix1, L_COPY); |
510 | | |
511 | | /* Solidify the textblock mask and remove noise: |
512 | | * (1) For each cc, close the blocks and dilate slightly |
513 | | * to form a solid mask. |
514 | | * (2) Small horizontal closing between components. |
515 | | * (3) Open the white space between columns, again. |
516 | | * (4) Remove small components. */ |
517 | 504 | pix2 = pixMorphSequenceByComponent(pix1, "c30.30 + d3.3", 8, 0, 0, NULL); |
518 | 504 | pixCloseSafeBrick(pix2, pix2, 10, 1); |
519 | 504 | if (pixadb) pixaAddPix(pixadb, pix2, L_COPY); |
520 | 504 | pix3 = pixSubtract(NULL, pix2, pixvws); |
521 | 504 | if (pixadb) pixaAddPix(pixadb, pix3, L_COPY); |
522 | 504 | pixd = pixSelectBySize(pix3, 25, 5, 8, L_SELECT_IF_BOTH, |
523 | 504 | L_SELECT_IF_GTE, NULL); |
524 | 504 | if (pixadb) pixaAddPix(pixadb, pixd, L_COPY); |
525 | | |
526 | 504 | pixDestroy(&pix1); |
527 | 504 | pixDestroy(&pix2); |
528 | 504 | pixDestroy(&pix3); |
529 | 504 | return pixd; |
530 | 551 | } |
531 | | |
532 | | |
533 | | /*------------------------------------------------------------------* |
534 | | * Location and extraction of page foreground; cleaning pages * |
535 | | *------------------------------------------------------------------*/ |
536 | | /*! |
537 | | * \brief pixCropImage() |
538 | | * |
539 | | * \param[in] pixs full resolution (any type or depth) |
540 | | * \param[in] lr_clear full res pixels cleared at left and right sides |
541 | | * \param[in] tb_clear full res pixels cleared at top and bottom sides |
542 | | * \param[in] edgeclean parameter for removing edge noise (-1 to 15) |
543 | | * default = 0 (no removal); |
544 | | * 15 is maximally aggressive for random noise |
545 | | * -1 for aggressively removing side noise |
546 | | * \param[in] lr_border full res final "added" pixels on left and right |
547 | | * \param[in] tb_border full res final "added" pixels on top and bottom |
548 | | * \param[in] maxwiden max fractional horizontal stretch allowed |
549 | | * \param[in] *debugfile [optional] usually is NULL |
550 | | * \param[out] *pcropbox [optional] crop box at full resolution |
551 | | * \return cropped pix, or NULL on error |
552 | | * |
553 | | * <pre> |
554 | | * Notes: |
555 | | * (1) This binarizes and crops a page image. |
556 | | * (a) Binarizes if necessary and does 2x reduction. |
557 | | * (b) Clears near the border by %lr_clear/2 and %tb_clear/2 pixels |
558 | | * (c) If %edgeclean > 0, it removes isolated sets of pixels, |
559 | | * using a close/open operation of size %edgeclean + 1. |
560 | | * If %edgeclean < 0, it uses a large vertical morphological |
561 | | * closing and the extraction of the largest resulting |
562 | | * connected component to eliminate noise on left and right sides. |
563 | | * (d) Find the bounding box of remaining fg pixels and scales |
564 | | * the box up 2x back to full resolution. |
565 | | * (e) Crops the binarized image to the bounding box. |
566 | | * (f) Slightly thickens long horizontal lines. |
567 | | * (g) Rescales this image to fit within the original image |
568 | | * less lr_border on the sides and tb_border above and below. |
569 | | * The rescaling is done isomorphically with a (possible) |
570 | | * optional additional widening. Suggest the additional |
571 | | * widening factor not exceed 1.15. |
572 | | * Note that (b) - (d) are done at 2x reduction for efficiency. |
573 | | * (2) Side clearing must not exceed 1/6 of the dimension on that side. |
574 | | * (3) The clear and border pixel parameters must be >= 0. |
575 | | * (4) The "clear" parameters act on the input image, whereas the |
576 | | * "border" parameters act to give a white border to the final |
577 | | * image. They are not literally added, because the input and final |
578 | | * images are the same size. If the resulting images are to be |
579 | | * printed, it is useful to have border pixel parameters of at |
580 | | * least 60 at 300 ppi, to avoid losing content at the edges. |
581 | | * (5) This is not intended to work on small thumbnails. The |
582 | | * dimensions of pixs must be at least MinWidth x MinHeight. |
583 | | * (6) Step (f) above helps with orthographically-produced music notation, |
584 | | * where the horizontal staff lines can be very thin and thus |
585 | | * subject to printer alias. |
586 | | * </pre> |
587 | | */ |
588 | | PIX * |
589 | | pixCropImage(PIX *pixs, |
590 | | l_int32 lr_clear, |
591 | | l_int32 tb_clear, |
592 | | l_int32 edgeclean, |
593 | | l_int32 lr_border, |
594 | | l_int32 tb_border, |
595 | | l_float32 maxwiden, |
596 | | const char *debugfile, |
597 | | BOX **pcropbox) |
598 | 0 | { |
599 | 0 | char cmd[64]; |
600 | 0 | l_int32 w, h, val, ret; |
601 | 0 | BOX *box1, *box2; |
602 | 0 | PIX *pix1, *pix2, *pix3; |
603 | 0 | PIXA *pixa1; |
604 | |
|
605 | 0 | if (pcropbox) *pcropbox = NULL; |
606 | 0 | if (!pixs) |
607 | 0 | return (PIX *)ERROR_PTR("pixs not defined", __func__, NULL); |
608 | 0 | if (edgeclean > 15) { |
609 | 0 | L_WARNING("edgeclean > 15; setting to 15\n", __func__); |
610 | 0 | edgeclean = 15; |
611 | 0 | } |
612 | 0 | pixGetDimensions(pixs, &w, &h, NULL); |
613 | 0 | if (w < MinWidth || h < MinHeight) { |
614 | 0 | L_ERROR("pix too small: w = %d, h = %d\n", __func__, w, h); |
615 | 0 | return NULL; |
616 | 0 | } |
617 | 0 | if (lr_clear < 0) lr_clear = 0; |
618 | 0 | if (tb_clear < 0) tb_clear = 0; |
619 | 0 | if (lr_border < 0) lr_border = 0; |
620 | 0 | if (tb_border < 0) tb_border = 0; |
621 | 0 | if (lr_clear > w / 6 || tb_clear > h / 6) { |
622 | 0 | L_ERROR("lr_clear or tb_clear too large; must be <= %d and %d\n", |
623 | 0 | __func__, w / 6, h / 6); |
624 | 0 | return NULL; |
625 | 0 | } |
626 | 0 | if (maxwiden > 1.15) |
627 | 0 | L_WARNING("maxwiden = %f > 1.15; suggest between 1.0 and 1.15\n", |
628 | 0 | __func__, maxwiden); |
629 | 0 | pixa1 = (debugfile) ? pixaCreate(5) : NULL; |
630 | 0 | if (pixa1) pixaAddPix(pixa1, pixs, L_COPY); |
631 | | |
632 | | /* Binarize if necessary and 2x reduction */ |
633 | 0 | pix1 = pixBackgroundNormTo1MinMax(pixs, 1, 1); |
634 | 0 | pix2 = pixReduceRankBinary2(pix1, 2, NULL); |
635 | | |
636 | | /* Clear out border pixels */ |
637 | 0 | pixSetOrClearBorder(pix2, lr_clear / 2, lr_clear / 2, tb_clear / 2, |
638 | 0 | tb_clear / 2, PIX_CLR); |
639 | 0 | if (pixa1) pixaAddPix(pixa1, pixScale(pix2, 2.0, 2.0), L_INSERT); |
640 | | |
641 | | /* Choose one of three methods for extracting foreground pixels: |
642 | | * (1) Include all foreground pixels |
643 | | * (2) Do a morphological close/open to remove noise throughout |
644 | | * the image before finding a b.b. for remaining f.g. pixels |
645 | | * (3) Do a large vertical closing and choose the largest (by area) |
646 | | * component to avoid foreground noise on left and right sides */ |
647 | 0 | if (edgeclean == 0) { |
648 | 0 | ret = pixClipToForeground(pix2, NULL, &box1); |
649 | 0 | } else if (edgeclean > 0) { |
650 | 0 | val = edgeclean + 1; |
651 | 0 | snprintf(cmd, 64, "c%d.%d + o%d.%d", val, val, val, val); |
652 | 0 | pix3 = pixMorphSequence(pix2, cmd, 0); |
653 | 0 | ret = pixClipToForeground(pix3, NULL, &box1); |
654 | 0 | pixDestroy(&pix3); |
655 | 0 | } else { /* edgeclean < 0) */ |
656 | 0 | ret = pixMaxCompAfterVClosing(pix2, &box1); |
657 | 0 | } |
658 | 0 | pixDestroy(&pix2); |
659 | 0 | if (ret) { |
660 | 0 | L_ERROR("no returned b.b. for foreground\n", __func__); |
661 | 0 | pixDestroy(&pix1); |
662 | 0 | pixaDestroy(&pixa1); |
663 | 0 | return NULL; |
664 | 0 | } |
665 | | |
666 | | /* Transform to full resolution */ |
667 | 0 | box2 = boxTransform(box1, 0, 0, 2.0, 2.0); /* full res */ |
668 | 0 | boxDestroy(&box1); |
669 | 0 | if (pixa1) { |
670 | 0 | pix2 = pixCopy(NULL, pix1); |
671 | 0 | pixRenderBoxArb(pix2, box2, 5, 255, 0, 0); |
672 | 0 | pixaAddPix(pixa1, pix2, L_INSERT); |
673 | 0 | } |
674 | | |
675 | | /* Grab the foreground region */ |
676 | 0 | pix2 = pixClipRectangle(pix1, box2, NULL); |
677 | 0 | pixDestroy(&pix1); |
678 | | |
679 | | /* Slightly thicken long horizontal lines. This prevents loss of |
680 | | * printed thin music staff lines due to aliasing. */ |
681 | 0 | pix3 = pixMorphSequence(pix2, "o80.1 + d1.2", 0); |
682 | 0 | pixOr(pix2, pix2, pix3); |
683 | 0 | pixDestroy(&pix3); |
684 | | |
685 | | /* Rescale the fg and paste into the final image */ |
686 | 0 | pix3 = pixRescaleForCropping(pix2, w, h, lr_border, tb_border, |
687 | 0 | maxwiden, NULL); |
688 | 0 | pixDestroy(&pix2); |
689 | 0 | if (pixa1) { |
690 | 0 | pix2 = pixCopy(NULL, pix3); |
691 | 0 | pixaAddPix(pixa1, pix2, L_INSERT); |
692 | 0 | } |
693 | |
|
694 | 0 | if (pcropbox) |
695 | 0 | *pcropbox = box2; |
696 | 0 | else |
697 | 0 | boxDestroy(&box2); |
698 | 0 | if (pixa1) { |
699 | 0 | pixaAddPix(pixa1, pix3, L_COPY); |
700 | 0 | lept_stderr("Writing debug file: %s\n", debugfile); |
701 | 0 | pixaConvertToPdf(pixa1, 0, 1.0, L_DEFAULT_ENCODE, 0, NULL, debugfile); |
702 | 0 | pixaDestroy(&pixa1); |
703 | 0 | } |
704 | 0 | return pix3; |
705 | 0 | } |
706 | | |
707 | | |
708 | | /*! |
709 | | * \brief pixMaxCompAfterVClosing() |
710 | | * |
711 | | * \param[in] pixs 1 bpp (input at 2x reduction) |
712 | | * \param[out] **pbox main region at input resolution (2x reduction) |
713 | | * \return 0 if OK, 1 on error |
714 | | * |
715 | | * <pre> |
716 | | * Notes: |
717 | | * (1) This removes foreground noise along left and right edges, |
718 | | * returning a bounding box for the remaining foreground pixels |
719 | | * at the input resolution. |
720 | | * (2) The input %pixs should be at a resolution 100 - 150 ppi. |
721 | | * (3) It does two 2x level1 rank binary reductions, followed |
722 | | * by a large vertical close/open, and then a 4x expansion |
723 | | * back to the input resolution. |
724 | | * (4) It is used as an option to pixCropImage(), when given |
725 | | * a negative %edgecrop parameter. |
726 | | * </pre> |
727 | | */ |
728 | | l_int32 |
729 | | pixMaxCompAfterVClosing(PIX *pixs, |
730 | | BOX **pbox) |
731 | 0 | { |
732 | 0 | l_int32 w, h, i, n, maxindex, maxarea, empty; |
733 | 0 | BOXA *boxa1; |
734 | 0 | PIX *pix1; |
735 | |
|
736 | 0 | if (!pbox) |
737 | 0 | return ERROR_INT("pbox not defined", __func__, 1); |
738 | 0 | *pbox = NULL; |
739 | 0 | if (!pixs || pixGetDepth(pixs) != 1) |
740 | 0 | return ERROR_INT("pixs undefined or not 1 bpp", __func__, 1); |
741 | | |
742 | | /* Strong vertical closing */ |
743 | 0 | pix1 = pixMorphSequence(pixs, "r11 + c1.50 + o1.50 + x4", 0); |
744 | 0 | pixZero(pix1, &empty); |
745 | 0 | if (empty) |
746 | 0 | return ERROR_INT("pix1 is empty", __func__, 1); |
747 | | |
748 | | /* Find the c.c. with largest area and return its bounding box */ |
749 | 0 | boxa1 = pixConnCompBB(pix1, 8); |
750 | 0 | pixDestroy(&pix1); |
751 | 0 | n = boxaGetCount(boxa1); |
752 | 0 | maxindex = 0; |
753 | 0 | maxarea = 0; |
754 | 0 | for (i = 0; i < n; i++) { |
755 | 0 | boxaGetBoxGeometry(boxa1, i, NULL, NULL, &w, &h); |
756 | 0 | if (w * h > maxarea) { |
757 | 0 | maxarea = w * h; |
758 | 0 | maxindex = i; |
759 | 0 | } |
760 | 0 | } |
761 | 0 | *pbox = boxaGetBox(boxa1, maxindex, L_COPY); |
762 | 0 | boxaDestroy(&boxa1); |
763 | 0 | return 0; |
764 | 0 | } |
765 | | |
766 | | |
767 | | /*! |
768 | | * \brief pixRescaleForCropping() |
769 | | * |
770 | | * \param[in] pixs 1 bpp |
771 | | * \param[in] w width of output lmage |
772 | | * \param[in] h height of output lmage |
773 | | * \param[in] lr_border cleared final border pixels on left and right |
774 | | * \param[in] tb_border cleared final border pixels on top and bottom |
775 | | * \param[in] maxwiden max fractional horizontal stretch allowed; >= 1.0 |
776 | | * \param[out] *ppixsc [optional] rescaled foreground region |
777 | | * \return pixd output image, or NULL on error |
778 | | * |
779 | | * <pre> |
780 | | * Notes: |
781 | | * (1) This rescales %pixs to fit maximally within an image of |
782 | | * size (w x h), under two conditions: |
783 | | * (a) the final image has cleared border regions given by the |
784 | | * input parameters %lr_border and %tb_border, and |
785 | | * (b) the input image is first isotropically scaled to fit |
786 | | * maximally within the allowed final region, and then further |
787 | | * maxiximally widened, subject to the constraints of the |
788 | | * cleared border and the %maxwiden parameter. |
789 | | * (2) The cleared border pixel parameters must be >= 0. |
790 | | * (3) If there is extra horizontal stretching by a factor |
791 | | * %maxwiden larger than about 1.15, the appearance may be |
792 | | * unpleasingly distorted; hence the suggestion not to exceed it. |
793 | | * </pre> |
794 | | */ |
795 | | static PIX * |
796 | | pixRescaleForCropping(PIX *pixs, |
797 | | l_int32 w, |
798 | | l_int32 h, |
799 | | l_int32 lr_border, |
800 | | l_int32 tb_border, |
801 | | l_float32 maxwiden, |
802 | | PIX **ppixsc) |
803 | 0 | { |
804 | 0 | static l_int32 first_time = TRUE; |
805 | 0 | l_int32 wi, hi, wmax, hmax, wn, wf, hf, xf; |
806 | 0 | l_float32 ratio, scaleh, scalew, scalewid; |
807 | 0 | PIX *pix1, *pixd; |
808 | |
|
809 | 0 | if (ppixsc) *ppixsc = NULL; |
810 | 0 | if (!pixs || pixGetDepth(pixs) != 1) |
811 | 0 | return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", __func__, NULL); |
812 | 0 | if (lr_border < 0) lr_border = 0; |
813 | 0 | if (tb_border < 0) tb_border = 0; |
814 | 0 | maxwiden = L_MAX(1.0, maxwiden); |
815 | 0 | if (maxwiden > 1.15) |
816 | 0 | L_WARNING("maxwiden = %f > 1.15; suggest between 1.0 and 1.15\n", |
817 | 0 | __func__, maxwiden); |
818 | | |
819 | | /* Rescale the foreground region. |
820 | | * First, decide if scaling is to full width or full height. |
821 | | * If scaling to full height, determine how much additional |
822 | | * width widening is possible, given the maxwiden constraint. |
823 | | * If scaling to full width, both width and height are |
824 | | * scaled isotropically. Scaling is done so that the resulting |
825 | | * foreground is maximally widened, so it can be horizontally |
826 | | * centered in an image of size (w x h), less %lr_border |
827 | | * on each side. */ |
828 | 0 | pixGetDimensions(pixs, &wi, &hi, NULL); |
829 | 0 | wmax = w - 2 * lr_border; |
830 | 0 | hmax = h - 2 * tb_border; |
831 | 0 | ratio = (l_float32)(wmax * hi) / (l_float32)(hmax * wi); |
832 | 0 | if (ratio >= 1) { /* width can be widened after isotropic scaling */ |
833 | 0 | scaleh = (l_float32)hmax / (l_float32)hi; |
834 | 0 | wn = scaleh * wi; /* scaled but not widened */ |
835 | 0 | scalewid = L_MIN(maxwiden, (l_float32)wmax / (l_float32)wn); |
836 | 0 | scalew = scaleh * scalewid; |
837 | 0 | wf = scalew * wi; |
838 | 0 | hf = hmax; /* scale to full height */ |
839 | 0 | pix1 = pixScale(pixs, scalew, scaleh); |
840 | 0 | if (first_time == TRUE) { |
841 | 0 | lept_stderr("Width stretched by factor %5.3f\n", scalewid); |
842 | 0 | first_time = FALSE; |
843 | 0 | } |
844 | 0 | xf = (w - wf) / 2.0; |
845 | 0 | } else { /* width cannot be widened after isotropic scaling */ |
846 | 0 | scalew = (l_float32)wmax / (l_float32)wi; |
847 | 0 | pix1 = pixScale(pixs, scalew, scalew); |
848 | 0 | wf = wmax; /* scale to full width */ |
849 | 0 | hf = scalew * hi; /* no extra vertical stretching allowed */ |
850 | 0 | xf = lr_border; |
851 | 0 | } |
852 | | |
853 | | /* Paste it, horizontally centered and vertically placed as |
854 | | * high as allowed (by %tb_border) into the final page image. */ |
855 | 0 | pixd = pixCreate(w, h, 1); |
856 | 0 | pixRasterop(pixd, xf, tb_border, wf, hf, PIX_SRC, pix1, 0, 0); |
857 | |
|
858 | 0 | if (ppixsc) |
859 | 0 | *ppixsc = pix1; |
860 | 0 | else |
861 | 0 | pixDestroy(&pix1); |
862 | 0 | return pixd; |
863 | 0 | } |
864 | | |
865 | | |
866 | | /*! |
867 | | * \brief pixCleanImage() |
868 | | * |
869 | | * \param[in] pixs full resolution (any type or depth) |
870 | | * \param[in] contrast vary contrast: 1 = lightest; 10 = darkest; |
871 | | * suggest 1 unless light features are being lost |
872 | | * \param[in] rotation cw by 90 degrees: {0,1,2,3} represent |
873 | | * 0, 90, 180 and 270 degree cw rotations |
874 | | * \param[in] scale 1 (no scaling) or 2 (2x upscaling) |
875 | | * \param[in] opensize opening size of structuring element for noise |
876 | | * removal: {0 or 1 to skip; 2, 3 for opening} |
877 | | * \return cleaned pix, or NULL on error |
878 | | * |
879 | | * <pre> |
880 | | * Notes: |
881 | | * (1) This deskews, optionally rotates and darkens, cleans background |
882 | | * to white, binarizes and optionally removes small noise. |
883 | | * (2) For color and grayscale input, local background normalization is |
884 | | * done to 200, and a threshold of 180 sets the maximum foreground |
885 | | * value in the normalized image. |
886 | | * (3) The %contrast parameter adjusts the binarization to avoid losing |
887 | | * lighter input pixels. Contrast is increased as %contrast increases |
888 | | * from 1 to 10. |
889 | | * (4) The %scale parameter controls the thresholding to 1 bpp. Two values: |
890 | | * 1 = threshold |
891 | | * 2 = linear interpolated 2x upscaling before threshold. |
892 | | * (5) The #opensize parameter is the size of a square SEL used with |
893 | | * opening to remove small speckle noise. Allowed open sizes are 2,3. |
894 | | * If this is to be used, try 2 before 3. |
895 | | * (6) This does the image processing for cleanTo1bppFilesToPdf() and |
896 | | * prog/cleanpdf.c. |
897 | | * </pre> |
898 | | */ |
899 | | PIX * |
900 | | pixCleanImage(PIX *pixs, |
901 | | l_int32 contrast, |
902 | | l_int32 rotation, |
903 | | l_int32 scale, |
904 | | l_int32 opensize) |
905 | 0 | { |
906 | 0 | char sequence[32]; |
907 | 0 | PIX *pix1, *pix2, *pix3, *pix4, *pix5; |
908 | |
|
909 | 0 | if (!pixs) |
910 | 0 | return (PIX *)ERROR_PTR("pixs not defined", __func__, NULL); |
911 | 0 | if (rotation < 0 || rotation > 3) { |
912 | 0 | L_ERROR("invalid rotation = %d; rotation must be in {0,1,2,3}\n", |
913 | 0 | __func__, rotation); |
914 | 0 | return NULL; |
915 | 0 | } |
916 | 0 | if (contrast < 1 || contrast > 10) { |
917 | 0 | L_ERROR("invalid contrast = %d; contrast must be in [1...10]\n", |
918 | 0 | __func__, contrast); |
919 | 0 | return NULL; |
920 | 0 | } |
921 | 0 | if (scale != 1 && scale != 2) { |
922 | 0 | L_ERROR("invalid scale = %d; scale must be 1 or 2\n", |
923 | 0 | __func__, opensize); |
924 | 0 | return NULL; |
925 | 0 | } |
926 | 0 | if (opensize > 3) { |
927 | 0 | L_ERROR("invalid opensize = %d; opensize must be <= 3\n", |
928 | 0 | __func__, opensize); |
929 | 0 | return NULL; |
930 | 0 | } |
931 | | |
932 | 0 | if (pixGetDepth(pixs) == 1) { |
933 | 0 | if (rotation > 0) |
934 | 0 | pix1 = pixRotateOrth(pixs, rotation); |
935 | 0 | else |
936 | 0 | pix1 = pixClone(pixs); |
937 | 0 | pix2 = pixFindSkewAndDeskew(pix1, 2, NULL, NULL); |
938 | 0 | if (scale == 2) |
939 | 0 | pix4 = pixExpandBinaryReplicate(pix2, 2, 2); |
940 | 0 | else /* scale == 1 */ |
941 | 0 | pix4 = pixClone(pix2); |
942 | 0 | } else { |
943 | 0 | pix1 = pixConvertTo8MinMax(pixs); |
944 | 0 | if (rotation > 0) |
945 | 0 | pix2 = pixRotateOrth(pix1, rotation); |
946 | 0 | else |
947 | 0 | pix2 = pixClone(pix1); |
948 | 0 | pix3 = pixFindSkewAndDeskew(pix2, 2, NULL, NULL); |
949 | 0 | pix4 = pixBackgroundNormTo1MinMax(pix3, contrast, scale); |
950 | 0 | pixDestroy(&pix3); |
951 | 0 | } |
952 | |
|
953 | 0 | if (opensize == 2 || opensize == 3) { |
954 | 0 | snprintf(sequence, sizeof(sequence), "o%d.%d", opensize, opensize); |
955 | 0 | pix5 = pixMorphSequence(pix4, sequence, 0); |
956 | 0 | } else { |
957 | 0 | pix5 = pixClone(pix4); |
958 | 0 | } |
959 | |
|
960 | 0 | pixDestroy(&pix1); |
961 | 0 | pixDestroy(&pix2); |
962 | 0 | pixDestroy(&pix4); |
963 | 0 | return pix5; |
964 | 0 | } |
965 | | |
966 | | |
967 | | /*! |
968 | | * \brief pixFindPageForeground() |
969 | | * |
970 | | * \param[in] pixs full resolution (any type or depth) |
971 | | * \param[in] threshold for binarization; typically about 128 |
972 | | * \param[in] mindist min distance of text from border to allow |
973 | | * cleaning near border; at 2x reduction, this |
974 | | * should be larger than 50; typically about 70 |
975 | | * \param[in] erasedist when conditions are satisfied, erase anything |
976 | | * within this distance of the edge; |
977 | | * typically 20-30 at 2x reduction |
978 | | * \param[in] showmorph debug: set to a negative integer to show steps |
979 | | * in generating masks; this is typically used |
980 | | * for debugging region extraction |
981 | | * \param[in] pixac debug: allocate outside and pass this in to |
982 | | * accumulate results of each call to this function, |
983 | | * which can be displayed in a mosaic or a pdf. |
984 | | * \return box region including foreground, with some pixel noise |
985 | | * removed, or NULL if not found |
986 | | * |
987 | | * <pre> |
988 | | * Notes: |
989 | | * (1) This doesn't simply crop to the fg. It attempts to remove |
990 | | * pixel noise and junk at the edge of the image before cropping. |
991 | | * The input %threshold is used if pixs is not 1 bpp. |
992 | | * (2) This is not intended to work on small thumbnails. The |
993 | | * dimensions of pixs must be at least MinWidth x MinHeight. |
994 | | * (3) Debug: set showmorph to display the intermediate image in |
995 | | * the morphological operations on this page. |
996 | | * (4) Debug: to get pdf output of results when called repeatedly, |
997 | | * call with an existing pixac, which will add an image of this page, |
998 | | * with the fg outlined. If no foreground is found, there is |
999 | | * no output for this page image. |
1000 | | * </pre> |
1001 | | */ |
1002 | | BOX * |
1003 | | pixFindPageForeground(PIX *pixs, |
1004 | | l_int32 threshold, |
1005 | | l_int32 mindist, |
1006 | | l_int32 erasedist, |
1007 | | l_int32 showmorph, |
1008 | | PIXAC *pixac) |
1009 | 3.19k | { |
1010 | 3.19k | l_int32 flag, nbox, intersects; |
1011 | 3.19k | l_int32 w, h, bx, by, bw, bh, left, right, top, bottom; |
1012 | 3.19k | PIX *pixb, *pixb2, *pixseed, *pixsf, *pixm, *pix1, *pixg2; |
1013 | 3.19k | BOX *box, *boxfg, *boxin, *boxd; |
1014 | 3.19k | BOXA *ba1, *ba2; |
1015 | | |
1016 | 3.19k | if (!pixs) |
1017 | 0 | return (BOX *)ERROR_PTR("pixs not defined", __func__, NULL); |
1018 | 3.19k | pixGetDimensions(pixs, &w, &h, NULL); |
1019 | 3.19k | if (w < MinWidth || h < MinHeight) { |
1020 | 2.14k | L_ERROR("pix too small: w = %d, h = %d\n", __func__, w, h); |
1021 | 2.14k | return NULL; |
1022 | 2.14k | } |
1023 | | |
1024 | | /* Binarize, downscale by 0.5, remove the noise to generate a seed, |
1025 | | * and do a seedfill back from the seed into those 8-connected |
1026 | | * components of the binarized image for which there was at least |
1027 | | * one seed pixel. */ |
1028 | 1.04k | flag = (showmorph) ? 100 : 0; |
1029 | 1.04k | pixb = pixConvertTo1(pixs, threshold); |
1030 | 1.04k | pixb2 = pixScale(pixb, 0.5, 0.5); |
1031 | 1.04k | pixseed = pixMorphSequence(pixb2, "o1.2 + c9.9 + o3.3", flag); |
1032 | 1.04k | pix1 = pixMorphSequence(pixb2, "o50.1", 0); |
1033 | 1.04k | pixOr(pixseed, pixseed, pix1); |
1034 | 1.04k | pixDestroy(&pix1); |
1035 | 1.04k | pix1 = pixMorphSequence(pixb2, "o1.50", 0); |
1036 | 1.04k | pixOr(pixseed, pixseed, pix1); |
1037 | 1.04k | pixDestroy(&pix1); |
1038 | 1.04k | pixsf = pixSeedfillBinary(NULL, pixseed, pixb2, 8); |
1039 | 1.04k | pixm = pixRemoveBorderConnComps(pixsf, 8); |
1040 | | |
1041 | | /* Now, where is the main block of text? We want to remove noise near |
1042 | | * the edge of the image, but to do that, we have to be convinced that |
1043 | | * (1) there is noise and (2) it is far enough from the text block |
1044 | | * and close enough to the edge. For each edge, if the block |
1045 | | * is more than mindist from that edge, then clean 'erasedist' |
1046 | | * pixels from the edge. */ |
1047 | 1.04k | pix1 = pixMorphSequence(pixm, "c50.50", flag); |
1048 | 1.04k | ba1 = pixConnComp(pix1, NULL, 8); |
1049 | 1.04k | ba2 = boxaSort(ba1, L_SORT_BY_AREA, L_SORT_DECREASING, NULL); |
1050 | 1.04k | pixGetDimensions(pix1, &w, &h, NULL); |
1051 | 1.04k | nbox = boxaGetCount(ba2); |
1052 | 1.04k | if (nbox > 1) { |
1053 | 318 | box = boxaGetBox(ba2, 0, L_CLONE); |
1054 | 318 | boxGetGeometry(box, &bx, &by, &bw, &bh); |
1055 | 318 | left = (bx > mindist) ? erasedist : 0; |
1056 | 318 | right = (w - bx - bw > mindist) ? erasedist : 0; |
1057 | 318 | top = (by > mindist) ? erasedist : 0; |
1058 | 318 | bottom = (h - by - bh > mindist) ? erasedist : 0; |
1059 | 318 | pixSetOrClearBorder(pixm, left, right, top, bottom, PIX_CLR); |
1060 | 318 | boxDestroy(&box); |
1061 | 318 | } |
1062 | 1.04k | pixDestroy(&pix1); |
1063 | 1.04k | boxaDestroy(&ba1); |
1064 | 1.04k | boxaDestroy(&ba2); |
1065 | | |
1066 | | /* Locate the foreground region; don't bother cropping */ |
1067 | 1.04k | pixClipToForeground(pixm, NULL, &boxfg); |
1068 | | |
1069 | | /* Sanity check the fg region. Make sure it's not confined |
1070 | | * to a thin boundary on the left and right sides of the image, |
1071 | | * in which case it is likely to be noise. */ |
1072 | 1.04k | if (boxfg) { |
1073 | 948 | boxin = boxCreate(0.1 * w, 0, 0.8 * w, h); |
1074 | 948 | boxIntersects(boxfg, boxin, &intersects); |
1075 | 948 | boxDestroy(&boxin); |
1076 | 948 | if (!intersects) boxDestroy(&boxfg); |
1077 | 948 | } |
1078 | | |
1079 | 1.04k | boxd = NULL; |
1080 | 1.04k | if (boxfg) { |
1081 | 929 | boxAdjustSides(boxfg, boxfg, -2, 2, -2, 2); /* tiny expansion */ |
1082 | 929 | boxd = boxTransform(boxfg, 0, 0, 2.0, 2.0); |
1083 | | |
1084 | | /* Save the debug image showing the box for this page */ |
1085 | 929 | if (pixac) { |
1086 | 0 | pixg2 = pixConvert1To4Cmap(pixb); |
1087 | 0 | pixRenderBoxArb(pixg2, boxd, 3, 255, 0, 0); |
1088 | 0 | pixacompAddPix(pixac, pixg2, IFF_DEFAULT); |
1089 | 0 | pixDestroy(&pixg2); |
1090 | 0 | } |
1091 | 929 | } |
1092 | | |
1093 | 1.04k | pixDestroy(&pixb); |
1094 | 1.04k | pixDestroy(&pixb2); |
1095 | 1.04k | pixDestroy(&pixseed); |
1096 | 1.04k | pixDestroy(&pixsf); |
1097 | 1.04k | pixDestroy(&pixm); |
1098 | 1.04k | boxDestroy(&boxfg); |
1099 | 1.04k | return boxd; |
1100 | 3.19k | } |
1101 | | |
1102 | | |
1103 | | /*------------------------------------------------------------------* |
1104 | | * Extraction of characters from image with only text * |
1105 | | *------------------------------------------------------------------*/ |
1106 | | /*! |
1107 | | * \brief pixSplitIntoCharacters() |
1108 | | * |
1109 | | * \param[in] pixs 1 bpp, contains only deskewed text |
1110 | | * \param[in] minw min component width for initial filtering; typ. 4 |
1111 | | * \param[in] minh min component height for initial filtering; typ. 4 |
1112 | | * \param[out] pboxa [optional] character bounding boxes |
1113 | | * \param[out] ppixa [optional] character images |
1114 | | * \param[out] ppixdebug [optional] showing splittings |
1115 | | * |
1116 | | * \return 0 if OK, 1 on error |
1117 | | * |
1118 | | * <pre> |
1119 | | * Notes: |
1120 | | * (1) This is a simple function that attempts to find split points |
1121 | | * based on vertical pixel profiles. |
1122 | | * (2) It should be given an image that has an arbitrary number |
1123 | | * of text characters. |
1124 | | * (3) The returned pixa includes the boxes from which the |
1125 | | * (possibly split) components are extracted. |
1126 | | * </pre> |
1127 | | */ |
1128 | | l_ok |
1129 | | pixSplitIntoCharacters(PIX *pixs, |
1130 | | l_int32 minw, |
1131 | | l_int32 minh, |
1132 | | BOXA **pboxa, |
1133 | | PIXA **ppixa, |
1134 | | PIX **ppixdebug) |
1135 | 3.19k | { |
1136 | 3.19k | l_int32 ncomp, i, xoff, yoff; |
1137 | 3.19k | BOXA *boxa1, *boxa2, *boxat1, *boxat2, *boxad; |
1138 | 3.19k | BOXAA *baa; |
1139 | 3.19k | PIX *pix, *pix1, *pix2, *pixdb; |
1140 | 3.19k | PIXA *pixa1, *pixadb; |
1141 | | |
1142 | 3.19k | if (pboxa) *pboxa = NULL; |
1143 | 3.19k | if (ppixa) *ppixa = NULL; |
1144 | 3.19k | if (ppixdebug) *ppixdebug = NULL; |
1145 | 3.19k | if (!pixs || pixGetDepth(pixs) != 1) |
1146 | 371 | return ERROR_INT("pixs not defined or not 1 bpp", __func__, 1); |
1147 | | |
1148 | | /* Remove the small stuff */ |
1149 | 2.82k | pix1 = pixSelectBySize(pixs, minw, minh, 8, L_SELECT_IF_BOTH, |
1150 | 2.82k | L_SELECT_IF_GT, NULL); |
1151 | | |
1152 | | /* Small vertical close for consolidation */ |
1153 | 2.82k | pix2 = pixMorphSequence(pix1, "c1.10", 0); |
1154 | 2.82k | pixDestroy(&pix1); |
1155 | | |
1156 | | /* Get the 8-connected components */ |
1157 | 2.82k | boxa1 = pixConnComp(pix2, &pixa1, 8); |
1158 | 2.82k | pixDestroy(&pix2); |
1159 | 2.82k | boxaDestroy(&boxa1); |
1160 | | |
1161 | | /* Split the components if obvious */ |
1162 | 2.82k | ncomp = pixaGetCount(pixa1); |
1163 | 2.82k | boxa2 = boxaCreate(ncomp); |
1164 | 2.82k | pixadb = (ppixdebug) ? pixaCreate(ncomp) : NULL; |
1165 | 156k | for (i = 0; i < ncomp; i++) { |
1166 | 153k | pix = pixaGetPix(pixa1, i, L_CLONE); |
1167 | 153k | if (ppixdebug) { |
1168 | 153k | boxat1 = pixSplitComponentWithProfile(pix, 10, 7, &pixdb); |
1169 | 153k | if (pixdb) |
1170 | 29.3k | pixaAddPix(pixadb, pixdb, L_INSERT); |
1171 | 153k | } else { |
1172 | 0 | boxat1 = pixSplitComponentWithProfile(pix, 10, 7, NULL); |
1173 | 0 | } |
1174 | 153k | pixaGetBoxGeometry(pixa1, i, &xoff, &yoff, NULL, NULL); |
1175 | 153k | boxat2 = boxaTransform(boxat1, xoff, yoff, 1.0, 1.0); |
1176 | 153k | boxaJoin(boxa2, boxat2, 0, -1); |
1177 | 153k | pixDestroy(&pix); |
1178 | 153k | boxaDestroy(&boxat1); |
1179 | 153k | boxaDestroy(&boxat2); |
1180 | 153k | } |
1181 | 2.82k | pixaDestroy(&pixa1); |
1182 | | |
1183 | | /* Generate the debug image */ |
1184 | 2.82k | if (ppixdebug) { |
1185 | 2.82k | if (pixaGetCount(pixadb) > 0) { |
1186 | 1.15k | *ppixdebug = pixaDisplayTiledInRows(pixadb, 32, 1500, |
1187 | 1.15k | 1.0, 0, 20, 1); |
1188 | 1.15k | } |
1189 | 2.82k | pixaDestroy(&pixadb); |
1190 | 2.82k | } |
1191 | | |
1192 | | /* Do a 2D sort on the bounding boxes, and flatten the result to 1D */ |
1193 | 2.82k | baa = boxaSort2d(boxa2, NULL, 0, 0, 5); |
1194 | 2.82k | boxad = boxaaFlattenToBoxa(baa, NULL, L_CLONE); |
1195 | 2.82k | boxaaDestroy(&baa); |
1196 | 2.82k | boxaDestroy(&boxa2); |
1197 | | |
1198 | | /* Optionally extract the pieces from the input image */ |
1199 | 2.82k | if (ppixa) |
1200 | 2.82k | *ppixa = pixClipRectangles(pixs, boxad); |
1201 | 2.82k | if (pboxa) |
1202 | 2.82k | *pboxa = boxad; |
1203 | 0 | else |
1204 | 0 | boxaDestroy(&boxad); |
1205 | 2.82k | return 0; |
1206 | 3.19k | } |
1207 | | |
1208 | | |
1209 | | /*! |
1210 | | * \brief pixSplitComponentWithProfile() |
1211 | | * |
1212 | | * \param[in] pixs 1 bpp, exactly one connected component |
1213 | | * \param[in] delta distance used in extrema finding in a numa; typ. 10 |
1214 | | * \param[in] mindel minimum required difference between profile |
1215 | | * minimum and profile values +2 and -2 away; typ. 7 |
1216 | | * \param[out] ppixdebug [optional] debug image of splitting |
1217 | | * \return boxa of c.c. after splitting, or NULL on error |
1218 | | * |
1219 | | * <pre> |
1220 | | * Notes: |
1221 | | * (1) This will split the most obvious cases of touching characters. |
1222 | | * The split points it is searching for are narrow and deep |
1223 | | * minimima in the vertical pixel projection profile, after a |
1224 | | * large vertical closing has been applied to the component. |
1225 | | * </pre> |
1226 | | */ |
1227 | | BOXA * |
1228 | | pixSplitComponentWithProfile(PIX *pixs, |
1229 | | l_int32 delta, |
1230 | | l_int32 mindel, |
1231 | | PIX **ppixdebug) |
1232 | 153k | { |
1233 | 153k | l_int32 w, h, n2, i, firstmin, xmin, xshift; |
1234 | 153k | l_int32 nmin, nleft, nright, nsplit, isplit, ncomp; |
1235 | 153k | l_int32 *array1, *array2; |
1236 | 153k | BOX *box; |
1237 | 153k | BOXA *boxad; |
1238 | 153k | NUMA *na1, *na2, *nasplit; |
1239 | 153k | PIX *pix1, *pixdb; |
1240 | | |
1241 | 153k | if (ppixdebug) *ppixdebug = NULL; |
1242 | 153k | if (!pixs || pixGetDepth(pixs) != 1) |
1243 | 0 | return (BOXA *)ERROR_PTR("pixa undefined or not 1 bpp", __func__, NULL); |
1244 | 153k | pixGetDimensions(pixs, &w, &h, NULL); |
1245 | | |
1246 | | /* Closing to consolidate characters vertically */ |
1247 | 153k | pix1 = pixCloseSafeBrick(NULL, pixs, 1, 100); |
1248 | | |
1249 | | /* Get extrema of column projections */ |
1250 | 153k | boxad = boxaCreate(2); |
1251 | 153k | na1 = pixCountPixelsByColumn(pix1); /* w elements */ |
1252 | 153k | pixDestroy(&pix1); |
1253 | 153k | na2 = numaFindExtrema(na1, delta, NULL); |
1254 | 153k | n2 = numaGetCount(na2); |
1255 | 153k | if (n2 < 3) { /* no split possible */ |
1256 | 119k | box = boxCreate(0, 0, w, h); |
1257 | 119k | boxaAddBox(boxad, box, L_INSERT); |
1258 | 119k | numaDestroy(&na1); |
1259 | 119k | numaDestroy(&na2); |
1260 | 119k | return boxad; |
1261 | 119k | } |
1262 | | |
1263 | | /* Look for sufficiently deep and narrow minima. |
1264 | | * All minima of of interest must be surrounded by max on each |
1265 | | * side. firstmin is the index of first possible minimum. */ |
1266 | 33.8k | array1 = numaGetIArray(na1); |
1267 | 33.8k | array2 = numaGetIArray(na2); |
1268 | 33.8k | if (ppixdebug) numaWriteStderr(na2); |
1269 | 33.8k | firstmin = (array1[array2[0]] > array1[array2[1]]) ? 1 : 2; |
1270 | 33.8k | nasplit = numaCreate(n2); /* will hold split locations */ |
1271 | 130k | for (i = firstmin; i < n2 - 1; i+= 2) { |
1272 | 96.6k | xmin = array2[i]; |
1273 | 96.6k | nmin = array1[xmin]; |
1274 | 96.6k | if (xmin + 2 >= w) break; /* no more splits possible */ |
1275 | 96.6k | nleft = array1[xmin - 2]; |
1276 | 96.6k | nright = array1[xmin + 2]; |
1277 | 96.6k | if (ppixdebug) { |
1278 | 96.6k | lept_stderr( |
1279 | 96.6k | "Splitting: xmin = %d, w = %d; nl = %d, nmin = %d, nr = %d\n", |
1280 | 96.6k | xmin, w, nleft, nmin, nright); |
1281 | 96.6k | } |
1282 | 96.6k | if (nleft - nmin >= mindel && nright - nmin >= mindel) /* split */ |
1283 | 74.6k | numaAddNumber(nasplit, xmin); |
1284 | 96.6k | } |
1285 | 33.8k | nsplit = numaGetCount(nasplit); |
1286 | | |
1287 | | #if 0 |
1288 | | if (ppixdebug && nsplit > 0) { |
1289 | | lept_mkdir("lept/split"); |
1290 | | gplotSimple1(na1, GPLOT_PNG, "/tmp/lept/split/split", NULL); |
1291 | | } |
1292 | | #endif |
1293 | | |
1294 | 33.8k | numaDestroy(&na1); |
1295 | 33.8k | numaDestroy(&na2); |
1296 | 33.8k | LEPT_FREE(array1); |
1297 | 33.8k | LEPT_FREE(array2); |
1298 | | |
1299 | 33.8k | if (nsplit == 0) { /* no splitting */ |
1300 | 4.44k | numaDestroy(&nasplit); |
1301 | 4.44k | box = boxCreate(0, 0, w, h); |
1302 | 4.44k | boxaAddBox(boxad, box, L_INSERT); |
1303 | 4.44k | return boxad; |
1304 | 4.44k | } |
1305 | | |
1306 | | /* Use split points to generate b.b. after splitting */ |
1307 | 104k | for (i = 0, xshift = 0; i < nsplit; i++) { |
1308 | 74.6k | numaGetIValue(nasplit, i, &isplit); |
1309 | 74.6k | box = boxCreate(xshift, 0, isplit - xshift, h); |
1310 | 74.6k | boxaAddBox(boxad, box, L_INSERT); |
1311 | 74.6k | xshift = isplit + 1; |
1312 | 74.6k | } |
1313 | 29.3k | box = boxCreate(xshift, 0, w - xshift, h); |
1314 | 29.3k | boxaAddBox(boxad, box, L_INSERT); |
1315 | 29.3k | numaDestroy(&nasplit); |
1316 | | |
1317 | 29.3k | if (ppixdebug) { |
1318 | 29.3k | pixdb = pixConvertTo32(pixs); |
1319 | 29.3k | ncomp = boxaGetCount(boxad); |
1320 | 133k | for (i = 0; i < ncomp; i++) { |
1321 | 104k | box = boxaGetBox(boxad, i, L_CLONE); |
1322 | 104k | pixRenderBoxBlend(pixdb, box, 1, 255, 0, 0, 0.5); |
1323 | 104k | boxDestroy(&box); |
1324 | 104k | } |
1325 | 29.3k | *ppixdebug = pixdb; |
1326 | 29.3k | } |
1327 | | |
1328 | 29.3k | return boxad; |
1329 | 33.8k | } |
1330 | | |
1331 | | |
1332 | | /*------------------------------------------------------------------* |
1333 | | * Extraction of lines of text * |
1334 | | *------------------------------------------------------------------*/ |
1335 | | /*! |
1336 | | * \brief pixExtractTextlines() |
1337 | | * |
1338 | | * \param[in] pixs any depth, assumed to have nearly horizontal text |
1339 | | * \param[in] maxw, maxh initial filtering: remove any components in pixs |
1340 | | * with components larger than maxw or maxh |
1341 | | * \param[in] minw, minh final filtering: remove extracted 'lines' |
1342 | | * with sizes smaller than minw or minh; use |
1343 | | * 0 for default. |
1344 | | * \param[in] adjw, adjh final adjustment of boxes representing each |
1345 | | * text line. If > 0, these increase the box |
1346 | | * size at each edge by this amount. |
1347 | | * \param[in] pixadb pixa for saving intermediate steps; NULL to omit |
1348 | | * \return pixa of textline images, including bounding boxes, or |
1349 | | * NULL on error |
1350 | | * |
1351 | | * <pre> |
1352 | | * Notes: |
1353 | | * (1) This function assumes that textline fragments have sufficient |
1354 | | * vertical separation and small enough skew so that a |
1355 | | * horizontal dilation sufficient to join words will not join |
1356 | | * textlines. It does not guarantee that horizontally adjacent |
1357 | | * textline fragments on the same line will be joined. |
1358 | | * (2) For images with multiple columns, it attempts to avoid joining |
1359 | | * textlines across the space between columns. If that is not |
1360 | | * a concern, you can also use pixExtractRawTextlines(), |
1361 | | * which will join them with alacrity. |
1362 | | * (3) This first removes components from pixs that are either |
1363 | | * wide (> %maxw) or tall (> %maxh). |
1364 | | * (4) A final filtering operation removes small components, such |
1365 | | * that width < %minw or height < %minh. |
1366 | | * (5) For reasonable accuracy, the resolution of pixs should be |
1367 | | * at least 100 ppi. For reasonable efficiency, the resolution |
1368 | | * should not exceed 600 ppi. |
1369 | | * (6) This can be used to determine if some region of a scanned |
1370 | | * image is horizontal text. |
1371 | | * (7) As an example, for a pix with resolution 300 ppi, a reasonable |
1372 | | * set of parameters is: |
1373 | | * pixExtractTextlines(pix, 150, 150, 36, 20, 5, 5, NULL); |
1374 | | * The defaults minw and minh for 300 ppi are about 36 and 20, |
1375 | | * so the same result is obtained with: |
1376 | | * pixExtractTextlines(pix, 150, 150, 0, 0, 5, 5, NULL); |
1377 | | * (8) The output pixa is composed of subimages, one for each textline, |
1378 | | * and the boxa in the pixa tells where in %pixs each textline goes. |
1379 | | * </pre> |
1380 | | */ |
1381 | | PIXA * |
1382 | | pixExtractTextlines(PIX *pixs, |
1383 | | l_int32 maxw, |
1384 | | l_int32 maxh, |
1385 | | l_int32 minw, |
1386 | | l_int32 minh, |
1387 | | l_int32 adjw, |
1388 | | l_int32 adjh, |
1389 | | PIXA *pixadb) |
1390 | 0 | { |
1391 | 0 | char buf[64]; |
1392 | 0 | l_int32 res, csize, empty; |
1393 | 0 | BOXA *boxa1, *boxa2, *boxa3; |
1394 | 0 | PIX *pix1, *pix2, *pix3; |
1395 | 0 | PIXA *pixa1, *pixa2, *pixa3; |
1396 | |
|
1397 | 0 | if (!pixs) |
1398 | 0 | return (PIXA *)ERROR_PTR("pixs not defined", __func__, NULL); |
1399 | | |
1400 | | /* Binarize carefully, if necessary */ |
1401 | 0 | if (pixGetDepth(pixs) > 1) { |
1402 | 0 | pix2 = pixConvertTo8(pixs, FALSE); |
1403 | 0 | pix3 = pixCleanBackgroundToWhite(pix2, NULL, NULL, 1.0, 70, 190); |
1404 | 0 | pix1 = pixThresholdToBinary(pix3, 150); |
1405 | 0 | pixDestroy(&pix2); |
1406 | 0 | pixDestroy(&pix3); |
1407 | 0 | } else { |
1408 | 0 | pix1 = pixClone(pixs); |
1409 | 0 | } |
1410 | 0 | pixZero(pix1, &empty); |
1411 | 0 | if (empty) { |
1412 | 0 | pixDestroy(&pix1); |
1413 | 0 | L_INFO("no fg pixels in input image\n", __func__); |
1414 | 0 | return NULL; |
1415 | 0 | } |
1416 | 0 | if (pixadb) pixaAddPix(pixadb, pix1, L_COPY); |
1417 | | |
1418 | | /* Remove any very tall or very wide connected components */ |
1419 | 0 | pix2 = pixSelectBySize(pix1, maxw, maxh, 8, L_SELECT_IF_BOTH, |
1420 | 0 | L_SELECT_IF_LT, NULL); |
1421 | 0 | if (pixadb) pixaAddPix(pixadb, pix2, L_COPY); |
1422 | 0 | pixDestroy(&pix1); |
1423 | | |
1424 | | /* Filter to solidify the text lines within the x-height region. |
1425 | | * The closing (csize) bridges gaps between words. The opening |
1426 | | * removes isolated bridges between textlines. */ |
1427 | 0 | if ((res = pixGetXRes(pixs)) == 0) { |
1428 | 0 | L_INFO("Resolution is not set: setting to 300 ppi\n", __func__); |
1429 | 0 | res = 300; |
1430 | 0 | } |
1431 | 0 | csize = L_MIN(120., 60.0 * res / 300.0); |
1432 | 0 | snprintf(buf, sizeof(buf), "c%d.1 + o%d.1", csize, csize / 3); |
1433 | 0 | pix3 = pixMorphCompSequence(pix2, buf, 0); |
1434 | 0 | if (pixadb) pixaAddPix(pixadb, pix3, L_COPY); |
1435 | | |
1436 | | /* Extract the connected components. These should be dilated lines */ |
1437 | 0 | boxa1 = pixConnComp(pix3, &pixa1, 4); |
1438 | 0 | if (pixadb) { |
1439 | 0 | pix1 = pixaDisplayRandomCmap(pixa1, 0, 0); |
1440 | 0 | pixcmapResetColor(pixGetColormap(pix1), 0, 255, 255, 255); |
1441 | 0 | pixaAddPix(pixadb, pix1, L_INSERT); |
1442 | 0 | } |
1443 | | |
1444 | | /* Set minw, minh if default is requested */ |
1445 | 0 | minw = (minw != 0) ? minw : (l_int32)(0.12 * res); |
1446 | 0 | minh = (minh != 0) ? minh : (l_int32)(0.07 * res); |
1447 | | |
1448 | | /* Remove line components that are too small */ |
1449 | 0 | pixa2 = pixaSelectBySize(pixa1, minw, minh, L_SELECT_IF_BOTH, |
1450 | 0 | L_SELECT_IF_GTE, NULL); |
1451 | 0 | if (pixadb) { |
1452 | 0 | pix1 = pixaDisplayRandomCmap(pixa2, 0, 0); |
1453 | 0 | pixcmapResetColor(pixGetColormap(pix1), 0, 255, 255, 255); |
1454 | 0 | pixaAddPix(pixadb, pix1, L_INSERT); |
1455 | 0 | pix1 = pixConvertTo32(pix2); |
1456 | 0 | pixRenderBoxaArb(pix1, pixa2->boxa, 2, 255, 0, 0); |
1457 | 0 | pixaAddPix(pixadb, pix1, L_INSERT); |
1458 | 0 | } |
1459 | | |
1460 | | /* Selectively AND with the version before dilation, and save */ |
1461 | 0 | boxa2 = pixaGetBoxa(pixa2, L_CLONE); |
1462 | 0 | boxa3 = boxaAdjustSides(boxa2, -adjw, adjw, -adjh, adjh); |
1463 | 0 | pixa3 = pixClipRectangles(pix2, boxa3); |
1464 | 0 | if (pixadb) { |
1465 | 0 | pix1 = pixaDisplayRandomCmap(pixa3, 0, 0); |
1466 | 0 | pixcmapResetColor(pixGetColormap(pix1), 0, 255, 255, 255); |
1467 | 0 | pixaAddPix(pixadb, pix1, L_INSERT); |
1468 | 0 | } |
1469 | |
|
1470 | 0 | pixDestroy(&pix2); |
1471 | 0 | pixDestroy(&pix3); |
1472 | 0 | pixaDestroy(&pixa1); |
1473 | 0 | pixaDestroy(&pixa2); |
1474 | 0 | boxaDestroy(&boxa1); |
1475 | 0 | boxaDestroy(&boxa2); |
1476 | 0 | boxaDestroy(&boxa3); |
1477 | 0 | return pixa3; |
1478 | 0 | } |
1479 | | |
1480 | | |
1481 | | /*! |
1482 | | * \brief pixExtractRawTextlines() |
1483 | | * |
1484 | | * \param[in] pixs any depth, assumed to have nearly horizontal text |
1485 | | * \param[in] maxw, maxh initial filtering: remove any components in pixs |
1486 | | * with components larger than maxw or maxh; |
1487 | | * use 0 for default values. |
1488 | | * \param[in] adjw, adjh final adjustment of boxes representing each |
1489 | | * text line. If > 0, these increase the box |
1490 | | * size at each edge by this amount. |
1491 | | * \param[in] pixadb pixa for saving intermediate steps; NULL to omit |
1492 | | * \return pixa of textline images, including bounding boxes, or |
1493 | | * NULL on error |
1494 | | * |
1495 | | * <pre> |
1496 | | * Notes: |
1497 | | * (1) This function assumes that textlines have sufficient |
1498 | | * vertical separation and small enough skew so that a |
1499 | | * horizontal dilation sufficient to join words will not join |
1500 | | * textlines. It aggressively joins textlines across multiple |
1501 | | * columns, so if that is not desired, you must either (a) make |
1502 | | * sure that %pixs is a single column of text or (b) use instead |
1503 | | * pixExtractTextlines(), which is more conservative |
1504 | | * about joining text fragments that have vertical overlap. |
1505 | | * (2) This first removes components from pixs that are either |
1506 | | * very wide (> %maxw) or very tall (> %maxh). |
1507 | | * (3) For reasonable accuracy, the resolution of pixs should be |
1508 | | * at least 100 ppi. For reasonable efficiency, the resolution |
1509 | | * should not exceed 600 ppi. |
1510 | | * (4) This can be used to determine if some region of a scanned |
1511 | | * image is horizontal text. |
1512 | | * (5) As an example, for a pix with resolution 300 ppi, a reasonable |
1513 | | * set of parameters is: |
1514 | | * pixExtractRawTextlines(pix, 150, 150, 0, 0, NULL); |
1515 | | * (6) The output pixa is composed of subimages, one for each textline, |
1516 | | * and the boxa in the pixa tells where in %pixs each textline goes. |
1517 | | * </pre> |
1518 | | */ |
1519 | | PIXA * |
1520 | | pixExtractRawTextlines(PIX *pixs, |
1521 | | l_int32 maxw, |
1522 | | l_int32 maxh, |
1523 | | l_int32 adjw, |
1524 | | l_int32 adjh, |
1525 | | PIXA *pixadb) |
1526 | 0 | { |
1527 | 0 | char buf[64]; |
1528 | 0 | l_int32 res, csize, empty; |
1529 | 0 | BOXA *boxa1, *boxa2, *boxa3; |
1530 | 0 | BOXAA *baa1; |
1531 | 0 | PIX *pix1, *pix2, *pix3; |
1532 | 0 | PIXA *pixa1, *pixa2; |
1533 | |
|
1534 | 0 | if (!pixs) |
1535 | 0 | return (PIXA *)ERROR_PTR("pixs not defined", __func__, NULL); |
1536 | | |
1537 | | /* Set maxw, maxh if default is requested */ |
1538 | 0 | if ((res = pixGetXRes(pixs)) == 0) { |
1539 | 0 | L_INFO("Resolution is not set: setting to 300 ppi\n", __func__); |
1540 | 0 | res = 300; |
1541 | 0 | } |
1542 | 0 | maxw = (maxw != 0) ? maxw : (l_int32)(0.5 * res); |
1543 | 0 | maxh = (maxh != 0) ? maxh : (l_int32)(0.5 * res); |
1544 | | |
1545 | | /* Binarize carefully, if necessary */ |
1546 | 0 | if (pixGetDepth(pixs) > 1) { |
1547 | 0 | pix2 = pixConvertTo8(pixs, FALSE); |
1548 | 0 | pix3 = pixCleanBackgroundToWhite(pix2, NULL, NULL, 1.0, 70, 190); |
1549 | 0 | pix1 = pixThresholdToBinary(pix3, 150); |
1550 | 0 | pixDestroy(&pix2); |
1551 | 0 | pixDestroy(&pix3); |
1552 | 0 | } else { |
1553 | 0 | pix1 = pixClone(pixs); |
1554 | 0 | } |
1555 | 0 | pixZero(pix1, &empty); |
1556 | 0 | if (empty) { |
1557 | 0 | pixDestroy(&pix1); |
1558 | 0 | L_INFO("no fg pixels in input image\n", __func__); |
1559 | 0 | return NULL; |
1560 | 0 | } |
1561 | 0 | if (pixadb) pixaAddPix(pixadb, pix1, L_COPY); |
1562 | | |
1563 | | /* Remove any very tall or very wide connected components */ |
1564 | 0 | pix2 = pixSelectBySize(pix1, maxw, maxh, 8, L_SELECT_IF_BOTH, |
1565 | 0 | L_SELECT_IF_LT, NULL); |
1566 | 0 | if (pixadb) pixaAddPix(pixadb, pix2, L_COPY); |
1567 | 0 | pixDestroy(&pix1); |
1568 | | |
1569 | | /* Filter to solidify the text lines within the x-height region. |
1570 | | * The closing (csize) bridges gaps between words. */ |
1571 | 0 | csize = L_MIN(120., 60.0 * res / 300.0); |
1572 | 0 | snprintf(buf, sizeof(buf), "c%d.1", csize); |
1573 | 0 | pix3 = pixMorphCompSequence(pix2, buf, 0); |
1574 | 0 | if (pixadb) pixaAddPix(pixadb, pix3, L_COPY); |
1575 | | |
1576 | | /* Extract the connected components. These should be dilated lines */ |
1577 | 0 | boxa1 = pixConnComp(pix3, &pixa1, 4); |
1578 | 0 | if (pixadb) { |
1579 | 0 | pix1 = pixaDisplayRandomCmap(pixa1, 0, 0); |
1580 | 0 | pixcmapResetColor(pixGetColormap(pix1), 0, 255, 255, 255); |
1581 | 0 | pixaAddPix(pixadb, pix1, L_INSERT); |
1582 | 0 | } |
1583 | | |
1584 | | /* Do a 2-d sort, and generate a bounding box for each set of text |
1585 | | * line segments that is aligned horizontally (i.e., has vertical |
1586 | | * overlap) into a box representing a single text line. */ |
1587 | 0 | baa1 = boxaSort2d(boxa1, NULL, -1, -1, 5); |
1588 | 0 | boxaaGetExtent(baa1, NULL, NULL, NULL, &boxa2); |
1589 | 0 | if (pixadb) { |
1590 | 0 | pix1 = pixConvertTo32(pix2); |
1591 | 0 | pixRenderBoxaArb(pix1, boxa2, 2, 255, 0, 0); |
1592 | 0 | pixaAddPix(pixadb, pix1, L_INSERT); |
1593 | 0 | } |
1594 | | |
1595 | | /* Optionally adjust the sides of each text line box, and then |
1596 | | * use the boxes to generate a pixa of the text lines. */ |
1597 | 0 | boxa3 = boxaAdjustSides(boxa2, -adjw, adjw, -adjh, adjh); |
1598 | 0 | pixa2 = pixClipRectangles(pix2, boxa3); |
1599 | 0 | if (pixadb) { |
1600 | 0 | pix1 = pixaDisplayRandomCmap(pixa2, 0, 0); |
1601 | 0 | pixcmapResetColor(pixGetColormap(pix1), 0, 255, 255, 255); |
1602 | 0 | pixaAddPix(pixadb, pix1, L_INSERT); |
1603 | 0 | } |
1604 | |
|
1605 | 0 | pixDestroy(&pix2); |
1606 | 0 | pixDestroy(&pix3); |
1607 | 0 | pixaDestroy(&pixa1); |
1608 | 0 | boxaDestroy(&boxa1); |
1609 | 0 | boxaDestroy(&boxa2); |
1610 | 0 | boxaDestroy(&boxa3); |
1611 | 0 | boxaaDestroy(&baa1); |
1612 | 0 | return pixa2; |
1613 | 0 | } |
1614 | | |
1615 | | |
1616 | | /*------------------------------------------------------------------* |
1617 | | * How many text columns * |
1618 | | *------------------------------------------------------------------*/ |
1619 | | /*! |
1620 | | * \brief pixCountTextColumns() |
1621 | | * |
1622 | | * \param[in] pixs 1 bpp |
1623 | | * \param[in] deltafract fraction of (max - min) to be used in the delta |
1624 | | * for extrema finding; typ 0.3 |
1625 | | * \param[in] peakfract fraction of (max - min) to be used to threshold |
1626 | | * the peak value; typ. 0.5 |
1627 | | * \param[in] clipfract fraction of image dimension removed on each side; |
1628 | | * typ. 0.1, which leaves w and h reduced by 0.8 |
1629 | | * \param[out] pncols number of columns; -1 if not determined |
1630 | | * \param[in] pixadb [optional] pre-allocated, for showing |
1631 | | * intermediate computation; use null to skip |
1632 | | * \return 0 if OK, 1 on error |
1633 | | * |
1634 | | * <pre> |
1635 | | * Notes: |
1636 | | * (1) It is assumed that pixs has the correct resolution set. |
1637 | | * If the resolution is 0, we set to 300 and issue a warning. |
1638 | | * (2) If necessary, the image is scaled to between 37 and 75 ppi; |
1639 | | * most of the processing is done at this resolution. |
1640 | | * (3) If no text is found (essentially a blank page), |
1641 | | * this returns ncols = 0. |
1642 | | * (4) For debug output, input a pre-allocated pixa. |
1643 | | * </pre> |
1644 | | */ |
1645 | | l_ok |
1646 | | pixCountTextColumns(PIX *pixs, |
1647 | | l_float32 deltafract, |
1648 | | l_float32 peakfract, |
1649 | | l_float32 clipfract, |
1650 | | l_int32 *pncols, |
1651 | | PIXA *pixadb) |
1652 | 0 | { |
1653 | 0 | l_int32 w, h, res, i, n, npeak; |
1654 | 0 | l_float32 scalefact, redfact, minval, maxval, val4, val5, fract; |
1655 | 0 | BOX *box; |
1656 | 0 | NUMA *na1, *na2, *na3, *na4, *na5; |
1657 | 0 | PIX *pix1, *pix2, *pix3, *pix4, *pix5; |
1658 | |
|
1659 | 0 | if (!pncols) |
1660 | 0 | return ERROR_INT("&ncols not defined", __func__, 1); |
1661 | 0 | *pncols = -1; /* init */ |
1662 | 0 | if (!pixs || pixGetDepth(pixs) != 1) |
1663 | 0 | return ERROR_INT("pixs not defined or not 1 bpp", __func__, 1); |
1664 | 0 | if (deltafract < 0.15 || deltafract > 0.75) |
1665 | 0 | L_WARNING("deltafract not in [0.15 ... 0.75]\n", __func__); |
1666 | 0 | if (peakfract < 0.25 || peakfract > 0.9) |
1667 | 0 | L_WARNING("peakfract not in [0.25 ... 0.9]\n", __func__); |
1668 | 0 | if (clipfract < 0.0 || clipfract >= 0.5) |
1669 | 0 | return ERROR_INT("clipfract not in [0.0 ... 0.5)\n", __func__, 1); |
1670 | 0 | if (pixadb) pixaAddPix(pixadb, pixs, L_COPY); |
1671 | | |
1672 | | /* Scale to between 37.5 and 75 ppi */ |
1673 | 0 | if ((res = pixGetXRes(pixs)) == 0) { |
1674 | 0 | L_WARNING("resolution undefined; set to 300\n", __func__); |
1675 | 0 | pixSetResolution(pixs, 300, 300); |
1676 | 0 | res = 300; |
1677 | 0 | } |
1678 | 0 | if (res < 37) { |
1679 | 0 | L_WARNING("resolution %d very low\n", __func__, res); |
1680 | 0 | scalefact = 37.5 / res; |
1681 | 0 | pix1 = pixScale(pixs, scalefact, scalefact); |
1682 | 0 | } else { |
1683 | 0 | redfact = (l_float32)res / 37.5; |
1684 | 0 | if (redfact < 2.0) |
1685 | 0 | pix1 = pixClone(pixs); |
1686 | 0 | else if (redfact < 4.0) |
1687 | 0 | pix1 = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0); |
1688 | 0 | else if (redfact < 8.0) |
1689 | 0 | pix1 = pixReduceRankBinaryCascade(pixs, 1, 2, 0, 0); |
1690 | 0 | else if (redfact < 16.0) |
1691 | 0 | pix1 = pixReduceRankBinaryCascade(pixs, 1, 2, 2, 0); |
1692 | 0 | else |
1693 | 0 | pix1 = pixReduceRankBinaryCascade(pixs, 1, 2, 2, 2); |
1694 | 0 | } |
1695 | 0 | if (pixadb) pixaAddPix(pixadb, pix1, L_COPY); |
1696 | | |
1697 | | /* Crop inner 80% of image */ |
1698 | 0 | pixGetDimensions(pix1, &w, &h, NULL); |
1699 | 0 | box = boxCreate(clipfract * w, clipfract * h, |
1700 | 0 | (1.0 - 2 * clipfract) * w, (1.0 - 2 * clipfract) * h); |
1701 | 0 | pix2 = pixClipRectangle(pix1, box, NULL); |
1702 | 0 | pixGetDimensions(pix2, &w, &h, NULL); |
1703 | 0 | boxDestroy(&box); |
1704 | 0 | if (pixadb) pixaAddPix(pixadb, pix2, L_COPY); |
1705 | | |
1706 | | /* Deskew */ |
1707 | 0 | pix3 = pixDeskew(pix2, 0); |
1708 | 0 | if (pixadb) pixaAddPix(pixadb, pix3, L_COPY); |
1709 | | |
1710 | | /* Close to increase column counts for text */ |
1711 | 0 | pix4 = pixCloseSafeBrick(NULL, pix3, 5, 21); |
1712 | 0 | if (pixadb) pixaAddPix(pixadb, pix4, L_COPY); |
1713 | 0 | pixInvert(pix4, pix4); |
1714 | 0 | na1 = pixCountByColumn(pix4, NULL); |
1715 | |
|
1716 | 0 | if (pixadb) { |
1717 | 0 | gplotSimple1(na1, GPLOT_PNG, "/tmp/lept/plot", NULL); |
1718 | 0 | pix5 = pixRead("/tmp/lept/plot.png"); |
1719 | 0 | pixaAddPix(pixadb, pix5, L_INSERT); |
1720 | 0 | } |
1721 | | |
1722 | | /* Analyze the column counts. na4 gives the locations of |
1723 | | * the extrema in normalized units (0.0 to 1.0) across the |
1724 | | * cropped image. na5 gives the magnitude of the |
1725 | | * extrema, normalized to the dynamic range. The peaks |
1726 | | * are values that are at least peakfract of (max - min). */ |
1727 | 0 | numaGetMax(na1, &maxval, NULL); |
1728 | 0 | numaGetMin(na1, &minval, NULL); |
1729 | 0 | fract = (l_float32)(maxval - minval) / h; /* is there much at all? */ |
1730 | 0 | if (fract < 0.05) { |
1731 | 0 | L_INFO("very little content on page; 0 text columns\n", __func__); |
1732 | 0 | *pncols = 0; |
1733 | 0 | } else { |
1734 | 0 | na2 = numaFindExtrema(na1, deltafract * (maxval - minval), &na3); |
1735 | 0 | na4 = numaTransform(na2, 0, 1.0 / w); |
1736 | 0 | na5 = numaTransform(na3, -minval, 1.0 / (maxval - minval)); |
1737 | 0 | n = numaGetCount(na4); |
1738 | 0 | for (i = 0, npeak = 0; i < n; i++) { |
1739 | 0 | numaGetFValue(na4, i, &val4); |
1740 | 0 | numaGetFValue(na5, i, &val5); |
1741 | 0 | if (val4 > 0.3 && val4 < 0.7 && val5 >= peakfract) { |
1742 | 0 | npeak++; |
1743 | 0 | L_INFO("Peak(loc,val) = (%5.3f,%5.3f)\n", __func__, val4, val5); |
1744 | 0 | } |
1745 | 0 | } |
1746 | 0 | *pncols = npeak + 1; |
1747 | 0 | numaDestroy(&na2); |
1748 | 0 | numaDestroy(&na3); |
1749 | 0 | numaDestroy(&na4); |
1750 | 0 | numaDestroy(&na5); |
1751 | 0 | } |
1752 | |
|
1753 | 0 | pixDestroy(&pix1); |
1754 | 0 | pixDestroy(&pix2); |
1755 | 0 | pixDestroy(&pix3); |
1756 | 0 | pixDestroy(&pix4); |
1757 | 0 | numaDestroy(&na1); |
1758 | 0 | return 0; |
1759 | 0 | } |
1760 | | |
1761 | | |
1762 | | /*------------------------------------------------------------------* |
1763 | | * Decision text vs photo * |
1764 | | *------------------------------------------------------------------*/ |
1765 | | /*! |
1766 | | * \brief pixDecideIfText() |
1767 | | * |
1768 | | * \param[in] pixs any depth |
1769 | | * \param[in] box [optional] if null, use entire pixs |
1770 | | * \param[out] pistext 1 if text; 0 if photo; -1 if not determined or empty |
1771 | | * \param[in] pixadb [optional] pre-allocated, for showing intermediate |
1772 | | * computation; use NULL to skip |
1773 | | * \return 0 if OK, 1 on error |
1774 | | * |
1775 | | * <pre> |
1776 | | * Notes: |
1777 | | * (1) It is assumed that pixs has the correct resolution set. |
1778 | | * If the resolution is 0, we set to 300 and issue a warning. |
1779 | | * (2) If necessary, the image is scaled to 300 ppi; most of the |
1780 | | * processing is done at this resolution. |
1781 | | * (3) Text is assumed to be in horizontal lines. |
1782 | | * (4) Because thin vertical lines are removed before filtering for |
1783 | | * text lines, this should identify tables as text. |
1784 | | * (5) If %box is null and pixs contains both text lines and line art, |
1785 | | * this function might return %istext == true. |
1786 | | * (6) If the input pixs is empty, or for some other reason the |
1787 | | * result can not be determined, return -1. |
1788 | | * (7) For debug output, input a pre-allocated pixa. |
1789 | | * </pre> |
1790 | | */ |
1791 | | l_ok |
1792 | | pixDecideIfText(PIX *pixs, |
1793 | | BOX *box, |
1794 | | l_int32 *pistext, |
1795 | | PIXA *pixadb) |
1796 | 0 | { |
1797 | 0 | l_int32 i, empty, maxw, w, h, n1, n2, n3, minlines, big_comp; |
1798 | 0 | l_float32 ratio1, ratio2; |
1799 | 0 | L_BMF *bmf; |
1800 | 0 | BOXA *boxa1, *boxa2, *boxa3, *boxa4, *boxa5; |
1801 | 0 | PIX *pix1, *pix2, *pix3, *pix4, *pix5, *pix6, *pix7; |
1802 | 0 | PIXA *pixa1; |
1803 | 0 | SEL *sel1; |
1804 | |
|
1805 | 0 | if (!pistext) |
1806 | 0 | return ERROR_INT("&istext not defined", __func__, 1); |
1807 | 0 | *pistext = -1; |
1808 | 0 | if (!pixs) |
1809 | 0 | return ERROR_INT("pixs not defined", __func__, 1); |
1810 | | |
1811 | | /* Crop, convert to 1 bpp, 300 ppi */ |
1812 | 0 | if ((pix1 = pixPrepare1bpp(pixs, box, 0.1, 300)) == NULL) |
1813 | 0 | return ERROR_INT("pix1 not made", __func__, 1); |
1814 | | |
1815 | 0 | pixZero(pix1, &empty); |
1816 | 0 | if (empty) { |
1817 | 0 | pixDestroy(&pix1); |
1818 | 0 | L_INFO("pix is empty\n", __func__); |
1819 | 0 | return 0; |
1820 | 0 | } |
1821 | 0 | w = pixGetWidth(pix1); |
1822 | | |
1823 | | /* Identify and remove tall, thin vertical lines (as found in tables) |
1824 | | * that are up to 9 pixels wide. Make a hit-miss sel with an |
1825 | | * 81 pixel vertical set of hits and with 3 pairs of misses that |
1826 | | * are 10 pixels apart horizontally. It is necessary to use a |
1827 | | * hit-miss transform; if we only opened with a vertical line of |
1828 | | * hits, we would remove solid regions of pixels that are not |
1829 | | * text or vertical lines. */ |
1830 | 0 | pix2 = pixCreate(11, 81, 1); |
1831 | 0 | for (i = 0; i < 81; i++) |
1832 | 0 | pixSetPixel(pix2, 5, i, 1); |
1833 | 0 | sel1 = selCreateFromPix(pix2, 40, 5, NULL); |
1834 | 0 | selSetElement(sel1, 20, 0, SEL_MISS); |
1835 | 0 | selSetElement(sel1, 20, 10, SEL_MISS); |
1836 | 0 | selSetElement(sel1, 40, 0, SEL_MISS); |
1837 | 0 | selSetElement(sel1, 40, 10, SEL_MISS); |
1838 | 0 | selSetElement(sel1, 60, 0, SEL_MISS); |
1839 | 0 | selSetElement(sel1, 60, 10, SEL_MISS); |
1840 | 0 | pix3 = pixHMT(NULL, pix1, sel1); |
1841 | 0 | pix4 = pixSeedfillBinaryRestricted(NULL, pix3, pix1, 8, 5, 1000); |
1842 | 0 | pix5 = pixXor(NULL, pix1, pix4); |
1843 | 0 | pixDestroy(&pix2); |
1844 | 0 | selDestroy(&sel1); |
1845 | | |
1846 | | /* Convert the text lines to separate long horizontal components */ |
1847 | 0 | pix6 = pixMorphCompSequence(pix5, "c30.1 + o15.1 + c60.1 + o2.2", 0); |
1848 | | |
1849 | | /* Estimate the distance to the bottom of the significant region */ |
1850 | 0 | if (box) { /* use full height */ |
1851 | 0 | pixGetDimensions(pix6, NULL, &h, NULL); |
1852 | 0 | } else { /* use height of region that has text lines */ |
1853 | 0 | pixFindThreshFgExtent(pix6, 400, NULL, &h); |
1854 | 0 | } |
1855 | |
|
1856 | 0 | if (pixadb) { |
1857 | 0 | bmf = bmfCreate(NULL, 6); |
1858 | 0 | pixaAddPixWithText(pixadb, pix1, 1, bmf, "threshold/crop to binary", |
1859 | 0 | 0x0000ff00, L_ADD_BELOW); |
1860 | 0 | pixaAddPixWithText(pixadb, pix3, 2, bmf, "hit-miss for vertical line", |
1861 | 0 | 0x0000ff00, L_ADD_BELOW); |
1862 | 0 | pixaAddPixWithText(pixadb, pix4, 2, bmf, "restricted seed-fill", |
1863 | 0 | 0x0000ff00, L_ADD_BELOW); |
1864 | 0 | pixaAddPixWithText(pixadb, pix5, 2, bmf, "remove using xor", |
1865 | 0 | 0x0000ff00, L_ADD_BELOW); |
1866 | 0 | pixaAddPixWithText(pixadb, pix6, 2, bmf, "make long horiz components", |
1867 | 0 | 0x0000ff00, L_ADD_BELOW); |
1868 | 0 | } |
1869 | | |
1870 | | /* Extract the connected components */ |
1871 | 0 | if (pixadb) { |
1872 | 0 | boxa1 = pixConnComp(pix6, &pixa1, 8); |
1873 | 0 | pix7 = pixaDisplayRandomCmap(pixa1, 0, 0); |
1874 | 0 | pixcmapResetColor(pixGetColormap(pix7), 0, 255, 255, 255); |
1875 | 0 | pixaAddPixWithText(pixadb, pix7, 2, bmf, "show connected components", |
1876 | 0 | 0x0000ff00, L_ADD_BELOW); |
1877 | 0 | pixDestroy(&pix7); |
1878 | 0 | pixaDestroy(&pixa1); |
1879 | 0 | bmfDestroy(&bmf); |
1880 | 0 | } else { |
1881 | 0 | boxa1 = pixConnComp(pix6, NULL, 8); |
1882 | 0 | } |
1883 | | |
1884 | | /* Analyze the connected components. The following conditions |
1885 | | * at 300 ppi must be satisfied if the image is text: |
1886 | | * (1) There are no components that are wider than 400 pixels and |
1887 | | * taller than 175 pixels. |
1888 | | * (2) The second longest component is at least 60% of the |
1889 | | * (possibly cropped) image width. This catches images |
1890 | | * that don't have any significant content. |
1891 | | * (3) Of the components that are at least 40% of the length |
1892 | | * of the longest (n2), at least 80% of them must not exceed |
1893 | | * 60 pixels in height. |
1894 | | * (4) The number of those long, thin components (n3) must |
1895 | | * equal or exceed a minimum that scales linearly with the |
1896 | | * image height. |
1897 | | * Most images that are not text fail more than one of these |
1898 | | * conditions. */ |
1899 | 0 | boxa2 = boxaSort(boxa1, L_SORT_BY_WIDTH, L_SORT_DECREASING, NULL); |
1900 | 0 | boxaGetBoxGeometry(boxa2, 1, NULL, NULL, &maxw, NULL); /* 2nd longest */ |
1901 | 0 | boxa3 = boxaSelectBySize(boxa1, 0.4 * maxw, 0, L_SELECT_WIDTH, |
1902 | 0 | L_SELECT_IF_GTE, NULL); |
1903 | 0 | boxa4 = boxaSelectBySize(boxa3, 0, 60, L_SELECT_HEIGHT, |
1904 | 0 | L_SELECT_IF_LTE, NULL); |
1905 | 0 | boxa5 = boxaSelectBySize(boxa1, 400, 175, L_SELECT_IF_BOTH, |
1906 | 0 | L_SELECT_IF_GT, NULL); |
1907 | 0 | big_comp = (boxaGetCount(boxa5) == 0) ? 0 : 1; |
1908 | 0 | n1 = boxaGetCount(boxa1); |
1909 | 0 | n2 = boxaGetCount(boxa3); |
1910 | 0 | n3 = boxaGetCount(boxa4); |
1911 | 0 | ratio1 = (l_float32)maxw / (l_float32)w; |
1912 | 0 | ratio2 = (l_float32)n3 / (l_float32)n2; |
1913 | 0 | minlines = L_MAX(2, h / 125); |
1914 | 0 | if (big_comp || ratio1 < 0.6 || ratio2 < 0.8 || n3 < minlines) |
1915 | 0 | *pistext = 0; |
1916 | 0 | else |
1917 | 0 | *pistext = 1; |
1918 | 0 | if (pixadb) { |
1919 | 0 | if (*pistext == 1) { |
1920 | 0 | L_INFO("This is text: \n n1 = %d, n2 = %d, n3 = %d, " |
1921 | 0 | "minlines = %d\n maxw = %d, ratio1 = %4.2f, h = %d, " |
1922 | 0 | "big_comp = %d\n", __func__, n1, n2, n3, minlines, |
1923 | 0 | maxw, ratio1, h, big_comp); |
1924 | 0 | } else { |
1925 | 0 | L_INFO("This is not text: \n n1 = %d, n2 = %d, n3 = %d, " |
1926 | 0 | "minlines = %d\n maxw = %d, ratio1 = %4.2f, h = %d, " |
1927 | 0 | "big_comp = %d\n", __func__, n1, n2, n3, minlines, |
1928 | 0 | maxw, ratio1, h, big_comp); |
1929 | 0 | } |
1930 | 0 | } |
1931 | |
|
1932 | 0 | boxaDestroy(&boxa1); |
1933 | 0 | boxaDestroy(&boxa2); |
1934 | 0 | boxaDestroy(&boxa3); |
1935 | 0 | boxaDestroy(&boxa4); |
1936 | 0 | boxaDestroy(&boxa5); |
1937 | 0 | pixDestroy(&pix1); |
1938 | 0 | pixDestroy(&pix3); |
1939 | 0 | pixDestroy(&pix4); |
1940 | 0 | pixDestroy(&pix5); |
1941 | 0 | pixDestroy(&pix6); |
1942 | 0 | return 0; |
1943 | 0 | } |
1944 | | |
1945 | | |
1946 | | /*! |
1947 | | * \brief pixFindThreshFgExtent() |
1948 | | * |
1949 | | * \param[in] pixs 1 bpp |
1950 | | * \param[in] thresh threshold number of pixels in row |
1951 | | * \param[out] ptop [optional] location of top of region |
1952 | | * \param[out] pbot [optional] location of bottom of region |
1953 | | * \return 0 if OK, 1 on error |
1954 | | */ |
1955 | | l_ok |
1956 | | pixFindThreshFgExtent(PIX *pixs, |
1957 | | l_int32 thresh, |
1958 | | l_int32 *ptop, |
1959 | | l_int32 *pbot) |
1960 | 0 | { |
1961 | 0 | l_int32 i, n; |
1962 | 0 | l_int32 *array; |
1963 | 0 | NUMA *na; |
1964 | |
|
1965 | 0 | if (ptop) *ptop = 0; |
1966 | 0 | if (pbot) *pbot = 0; |
1967 | 0 | if (!ptop && !pbot) |
1968 | 0 | return ERROR_INT("nothing to determine", __func__, 1); |
1969 | 0 | if (!pixs || pixGetDepth(pixs) != 1) |
1970 | 0 | return ERROR_INT("pixs not defined or not 1 bpp", __func__, 1); |
1971 | | |
1972 | 0 | na = pixCountPixelsByRow(pixs, NULL); |
1973 | 0 | n = numaGetCount(na); |
1974 | 0 | array = numaGetIArray(na); |
1975 | 0 | if (ptop) { |
1976 | 0 | for (i = 0; i < n; i++) { |
1977 | 0 | if (array[i] >= thresh) { |
1978 | 0 | *ptop = i; |
1979 | 0 | break; |
1980 | 0 | } |
1981 | 0 | } |
1982 | 0 | } |
1983 | 0 | if (pbot) { |
1984 | 0 | for (i = n - 1; i >= 0; i--) { |
1985 | 0 | if (array[i] >= thresh) { |
1986 | 0 | *pbot = i; |
1987 | 0 | break; |
1988 | 0 | } |
1989 | 0 | } |
1990 | 0 | } |
1991 | 0 | LEPT_FREE(array); |
1992 | 0 | numaDestroy(&na); |
1993 | 0 | return 0; |
1994 | 0 | } |
1995 | | |
1996 | | |
1997 | | /*------------------------------------------------------------------* |
1998 | | * Decision: table vs text * |
1999 | | *------------------------------------------------------------------*/ |
2000 | | /*! |
2001 | | * \brief pixDecideIfTable() |
2002 | | * |
2003 | | * \param[in] pixs any depth, any resolution >= 75 ppi |
2004 | | * \param[in] box [optional] if null, use entire pixs |
2005 | | * \param[in] orient L_PORTRAIT_MODE, L_LANDSCAPE_MODE |
2006 | | * \param[out] pscore 0 - 4; -1 if not determined |
2007 | | * \param[in] pixadb [optional] pre-allocated, for showing intermediate |
2008 | | * computation; use NULL to skip |
2009 | | * \return 0 if OK, 1 on error |
2010 | | * |
2011 | | * <pre> |
2012 | | * Notes: |
2013 | | * (1) It is assumed that pixs has the correct resolution set. |
2014 | | * If the resolution is 0, we assume it is 300 ppi and issue a warning. |
2015 | | * (2) If %orient == L_LANDSCAPE_MODE, the image is rotated 90 degrees |
2016 | | * clockwise before being analyzed. |
2017 | | * (3) The interpretation of the returned score: |
2018 | | * -1 undetermined |
2019 | | * 0 no table |
2020 | | * 1 unlikely to have a table |
2021 | | * 2 likely to have a table |
2022 | | * 3 even more likely to have a table |
2023 | | * 4 extremely likely to have a table |
2024 | | * * Setting the condition for finding a table at score >= 2 works |
2025 | | * well, except for false positives on kanji and landscape text. |
2026 | | * * These false positives can be removed by setting the condition |
2027 | | * at score >= 3, but recall is lowered because it will not find |
2028 | | * tables without either horizontal or vertical lines. |
2029 | | * (4) Most of the processing takes place at 75 ppi. |
2030 | | * (5) Internally, three numbers are determined, for horizontal and |
2031 | | * vertical fg lines, and for vertical bg lines. From these, |
2032 | | * four tests are made to decide if there is a table occupying |
2033 | | * a significant part of the image. |
2034 | | * (6) Images have arbitrary content and would be likely to trigger |
2035 | | * this detector, so they are checked for first, and if found, |
2036 | | * return with a 0 (no table) score. |
2037 | | * (7) Musical scores (tablature) are likely to trigger the detector. |
2038 | | * (8) Tables of content with more than 2 columns are likely to |
2039 | | * trigger the detector. |
2040 | | * (9) For debug output, input a pre-allocated pixa. |
2041 | | * </pre> |
2042 | | */ |
2043 | | l_ok |
2044 | | pixDecideIfTable(PIX *pixs, |
2045 | | BOX *box, |
2046 | | l_int32 orient, |
2047 | | l_int32 *pscore, |
2048 | | PIXA *pixadb) |
2049 | 3.19k | { |
2050 | 3.19k | l_int32 empty, nhb, nvb, nvw, score, htfound; |
2051 | 3.19k | PIX *pix1, *pix2, *pix3, *pix4, *pix5, *pix6, *pix7, *pix8, *pix9; |
2052 | | |
2053 | 3.19k | if (!pscore) |
2054 | 0 | return ERROR_INT("&score not defined", __func__, 1); |
2055 | 3.19k | *pscore = -1; |
2056 | 3.19k | if (!pixs) |
2057 | 0 | return ERROR_INT("pixs not defined", __func__, 1); |
2058 | | |
2059 | | /* Check if there is an image region. First convert to 1 bpp |
2060 | | * at 175 ppi. If an image is found, assume there is no table. */ |
2061 | 3.19k | pix1 = pixPrepare1bpp(pixs, box, 0.1, 175); |
2062 | 3.19k | pix2 = pixGenerateHalftoneMask(pix1, NULL, &htfound, NULL); |
2063 | 3.19k | if (htfound && pixadb) pixaAddPix(pixadb, pix2, L_COPY); |
2064 | 3.19k | pixDestroy(&pix1); |
2065 | 3.19k | pixDestroy(&pix2); |
2066 | 3.19k | if (htfound) { |
2067 | 46 | *pscore = 0; |
2068 | 46 | L_INFO("pix has an image region\n", __func__); |
2069 | 46 | return 0; |
2070 | 46 | } |
2071 | | |
2072 | | /* Crop, convert to 1 bpp, 75 ppi */ |
2073 | 3.14k | if ((pix1 = pixPrepare1bpp(pixs, box, 0.05, 75)) == NULL) |
2074 | 488 | return ERROR_INT("pix1 not made", __func__, 1); |
2075 | | |
2076 | 2.65k | pixZero(pix1, &empty); |
2077 | 2.65k | if (empty) { |
2078 | 97 | *pscore = 0; |
2079 | 97 | pixDestroy(&pix1); |
2080 | 97 | L_INFO("pix is empty\n", __func__); |
2081 | 97 | return 0; |
2082 | 97 | } |
2083 | | |
2084 | | /* The 2x2 dilation on 75 ppi makes these two approaches very similar: |
2085 | | * (1) pix1 = pixPrepare1bpp(..., 300); // 300 ppi resolution |
2086 | | * pix2 = pixReduceRankBinaryCascade(pix1, 1, 1, 0, 0); |
2087 | | * (2) pix1 = pixPrepare1bpp(..., 75); // 75 ppi resolution |
2088 | | * pix2 = pixDilateBrick(NULL, pix1, 2, 2); |
2089 | | * But (2) is more efficient if the input image to pixPrepare1bpp() |
2090 | | * is not at 300 ppi. */ |
2091 | 2.56k | pix2 = pixDilateBrick(NULL, pix1, 2, 2); |
2092 | | |
2093 | | /* Deskew both horizontally and vertically; rotate by 90 |
2094 | | * degrees if in landscape mode. */ |
2095 | 2.56k | pix3 = pixDeskewBoth(pix2, 1); |
2096 | 2.56k | if (pixadb) { |
2097 | 2.56k | pixaAddPix(pixadb, pix2, L_COPY); |
2098 | 2.56k | pixaAddPix(pixadb, pix3, L_COPY); |
2099 | 2.56k | } |
2100 | 2.56k | if (orient == L_LANDSCAPE_MODE) |
2101 | 0 | pix4 = pixRotate90(pix3, 1); |
2102 | 2.56k | else |
2103 | 2.56k | pix4 = pixClone(pix3); |
2104 | 2.56k | pixDestroy(&pix1); |
2105 | 2.56k | pixDestroy(&pix2); |
2106 | 2.56k | pixDestroy(&pix3); |
2107 | 2.56k | pix1 = pixClone(pix4); |
2108 | 2.56k | pixDestroy(&pix4); |
2109 | | |
2110 | | /* Look for horizontal and vertical lines */ |
2111 | 2.56k | pix2 = pixMorphSequence(pix1, "o100.1 + c1.4", 0); |
2112 | 2.56k | pix3 = pixSeedfillBinary(NULL, pix2, pix1, 8); |
2113 | 2.56k | pix4 = pixMorphSequence(pix1, "o1.100 + c4.1", 0); |
2114 | 2.56k | pix5 = pixSeedfillBinary(NULL, pix4, pix1, 8); |
2115 | 2.56k | pix6 = pixOr(NULL, pix3, pix5); |
2116 | 2.56k | if (pixadb) { |
2117 | 2.56k | pixaAddPix(pixadb, pix2, L_COPY); |
2118 | 2.56k | pixaAddPix(pixadb, pix4, L_COPY); |
2119 | 2.56k | pixaAddPix(pixadb, pix3, L_COPY); |
2120 | 2.56k | pixaAddPix(pixadb, pix5, L_COPY); |
2121 | 2.56k | pixaAddPix(pixadb, pix6, L_COPY); |
2122 | 2.56k | } |
2123 | 2.56k | pixCountConnComp(pix2, 8, &nhb); /* number of horizontal black lines */ |
2124 | 2.56k | pixCountConnComp(pix4, 8, &nvb); /* number of vertical black lines */ |
2125 | | |
2126 | | /* Remove the lines */ |
2127 | 2.56k | pixSubtract(pix1, pix1, pix6); |
2128 | 2.56k | if (pixadb) pixaAddPix(pixadb, pix1, L_COPY); |
2129 | | |
2130 | | /* Remove noise pixels */ |
2131 | 2.56k | pix7 = pixMorphSequence(pix1, "c4.1 + o8.1", 0); |
2132 | 2.56k | if (pixadb) pixaAddPix(pixadb, pix7, L_COPY); |
2133 | | |
2134 | | /* Look for vertical white space. Invert to convert white bg |
2135 | | * to fg. Use a single rank-1 2x reduction, which closes small |
2136 | | * fg holes, for the final processing at 37.5 ppi. |
2137 | | * The vertical opening is then about 3 inches on a 300 ppi image. |
2138 | | * We also remove vertical whitespace that is less than 5 pixels |
2139 | | * wide at this resolution (about 0.1 inches) */ |
2140 | 2.56k | pixInvert(pix7, pix7); |
2141 | 2.56k | pix8 = pixMorphSequence(pix7, "r1 + o1.100", 0); |
2142 | 2.56k | pix9 = pixSelectBySize(pix8, 5, 0, 8, L_SELECT_WIDTH, |
2143 | 2.56k | L_SELECT_IF_GTE, NULL); |
2144 | 2.56k | pixCountConnComp(pix9, 8, &nvw); /* number of vertical white lines */ |
2145 | 2.56k | if (pixadb) { |
2146 | 2.56k | pixaAddPix(pixadb, pixScale(pix8, 2.0, 2.0), L_INSERT); |
2147 | 2.56k | pixaAddPix(pixadb, pixScale(pix9, 2.0, 2.0), L_INSERT); |
2148 | 2.56k | } |
2149 | | |
2150 | | /* Require at least 2 of the following 4 conditions for a table. |
2151 | | * Some tables do not have black (fg) lines, and for those we |
2152 | | * require more than 6 long vertical whitespace (bg) lines. */ |
2153 | 2.56k | score = 0; |
2154 | 2.56k | if (nhb > 1) score++; |
2155 | 2.56k | if (nvb > 2) score++; |
2156 | 2.56k | if (nvw > 3) score++; |
2157 | 2.56k | if (nvw > 6) score++; |
2158 | 2.56k | *pscore = score; |
2159 | | |
2160 | 2.56k | pixDestroy(&pix1); |
2161 | 2.56k | pixDestroy(&pix2); |
2162 | 2.56k | pixDestroy(&pix3); |
2163 | 2.56k | pixDestroy(&pix4); |
2164 | 2.56k | pixDestroy(&pix5); |
2165 | 2.56k | pixDestroy(&pix6); |
2166 | 2.56k | pixDestroy(&pix7); |
2167 | 2.56k | pixDestroy(&pix8); |
2168 | 2.56k | pixDestroy(&pix9); |
2169 | 2.56k | return 0; |
2170 | 2.65k | } |
2171 | | |
2172 | | |
2173 | | /*! |
2174 | | * \brief pixPrepare1bpp() |
2175 | | * |
2176 | | * \param[in] pixs any depth |
2177 | | * \param[in] box [optional] if null, use entire pixs |
2178 | | * \param[in] cropfract fraction to be removed from the boundary; |
2179 | | * use 0.0 to retain the entire image |
2180 | | * \param[in] outres desired resolution of output image; if the |
2181 | | * input image resolution is not set, assume |
2182 | | * 300 ppi; use 0 to skip scaling. |
2183 | | * \return pixd if OK, NULL on error |
2184 | | * |
2185 | | * <pre> |
2186 | | * Notes: |
2187 | | * (1) This handles some common pre-processing operations, |
2188 | | * where the page segmentation algorithm takes a 1 bpp image. |
2189 | | * </pre> |
2190 | | */ |
2191 | | PIX * |
2192 | | pixPrepare1bpp(PIX *pixs, |
2193 | | BOX *box, |
2194 | | l_float32 cropfract, |
2195 | | l_int32 outres) |
2196 | 6.33k | { |
2197 | 6.33k | l_int32 w, h, res; |
2198 | 6.33k | l_float32 factor; |
2199 | 6.33k | BOX *box1; |
2200 | 6.33k | PIX *pix1, *pix2, *pix3, *pix4, *pix5; |
2201 | | |
2202 | 6.33k | if (!pixs) |
2203 | 0 | return (PIX *)ERROR_PTR("pixs not defined", __func__, NULL); |
2204 | | |
2205 | | /* Crop the image. If no box is given, use %cropfract to remove |
2206 | | * pixels near the image boundary; this helps avoid false |
2207 | | * negatives from noise that is often found there. */ |
2208 | 6.33k | if (box) { |
2209 | 0 | pix1 = pixClipRectangle(pixs, box, NULL); |
2210 | 6.33k | } else { |
2211 | 6.33k | pixGetDimensions(pixs, &w, &h, NULL); |
2212 | 6.33k | box1 = boxCreate((l_int32)(cropfract * w), (l_int32)(cropfract * h), |
2213 | 6.33k | (l_int32)((1.0 - 2 * cropfract) * w), |
2214 | 6.33k | (l_int32)((1.0 - 2 * cropfract) * h)); |
2215 | 6.33k | pix1 = pixClipRectangle(pixs, box1, NULL); |
2216 | 6.33k | boxDestroy(&box1); |
2217 | 6.33k | } |
2218 | | |
2219 | | /* Convert to 1 bpp with adaptive background cleaning */ |
2220 | 6.33k | if (pixGetDepth(pixs) > 1) { |
2221 | 737 | pix2 = pixConvertTo8(pix1, 0); |
2222 | 737 | pix3 = pixCleanBackgroundToWhite(pix2, NULL, NULL, 1.0, 70, 160); |
2223 | 737 | pixDestroy(&pix1); |
2224 | 737 | pixDestroy(&pix2); |
2225 | 737 | if (!pix3) { |
2226 | 46 | L_INFO("pix cleaning failed\n", __func__); |
2227 | 46 | return NULL; |
2228 | 46 | } |
2229 | 691 | pix4 = pixThresholdToBinary(pix3, 200); |
2230 | 691 | pixDestroy(&pix3); |
2231 | 5.60k | } else { |
2232 | 5.60k | pix4 = pixClone(pix1); |
2233 | 5.60k | pixDestroy(&pix1); |
2234 | 5.60k | } |
2235 | | |
2236 | | /* Scale the image to the requested output resolution; |
2237 | | do not scale if %outres <= 0 */ |
2238 | 6.29k | if (outres <= 0) |
2239 | 0 | return pix4; |
2240 | 6.29k | if ((res = pixGetXRes(pixs)) == 0) { |
2241 | 6.29k | L_WARNING("Resolution is not set: using 300 ppi\n", __func__); |
2242 | 6.29k | res = 300; |
2243 | 6.29k | } |
2244 | 6.29k | if (res != outres) { |
2245 | 6.29k | factor = (l_float32)outres / (l_float32)res; |
2246 | 6.29k | pix5 = pixScale(pix4, factor, factor); |
2247 | 6.29k | } else { |
2248 | 0 | pix5 = pixClone(pix4); |
2249 | 0 | } |
2250 | 6.29k | pixDestroy(&pix4); |
2251 | 6.29k | return pix5; |
2252 | 6.29k | } |
2253 | | |
2254 | | |
2255 | | /*------------------------------------------------------------------* |
2256 | | * Estimate the grayscale background value * |
2257 | | *------------------------------------------------------------------*/ |
2258 | | /*! |
2259 | | * \brief pixEstimateBackground() |
2260 | | * |
2261 | | * \param[in] pixs 8 bpp, with or without colormap |
2262 | | * \param[in] darkthresh pixels below this value are never considered |
2263 | | * part of the background; typ. 70; use 0 to skip |
2264 | | * \param[in] edgecrop fraction of half-width on each side, and of |
2265 | | * half-height at top and bottom, that are cropped |
2266 | | * \param[out] pbg estimated background, or 0 on error |
2267 | | * \return 0 if OK, 1 on error |
2268 | | * |
2269 | | * <pre> |
2270 | | * Notes: |
2271 | | * (1) Caller should check that return bg value is > 0. |
2272 | | * </pre> |
2273 | | */ |
2274 | | l_ok |
2275 | | pixEstimateBackground(PIX *pixs, |
2276 | | l_int32 darkthresh, |
2277 | | l_float32 edgecrop, |
2278 | | l_int32 *pbg) |
2279 | 0 | { |
2280 | 0 | l_int32 w, h, sampling; |
2281 | 0 | l_float32 fbg; |
2282 | 0 | BOX *box; |
2283 | 0 | PIX *pix1, *pix2, *pixm; |
2284 | |
|
2285 | 0 | if (!pbg) |
2286 | 0 | return ERROR_INT("&bg not defined", __func__, 1); |
2287 | 0 | *pbg = 0; |
2288 | 0 | if (!pixs || pixGetDepth(pixs) != 8) |
2289 | 0 | return ERROR_INT("pixs not defined or not 8 bpp", __func__, 1); |
2290 | 0 | if (darkthresh > 128) |
2291 | 0 | L_WARNING("darkthresh unusually large\n", __func__); |
2292 | 0 | if (edgecrop < 0.0 || edgecrop >= 1.0) |
2293 | 0 | return ERROR_INT("edgecrop not in [0.0 ... 1.0)", __func__, 1); |
2294 | | |
2295 | 0 | pix1 = pixRemoveColormap(pixs, REMOVE_CMAP_TO_GRAYSCALE); |
2296 | 0 | pixGetDimensions(pix1, &w, &h, NULL); |
2297 | | |
2298 | | /* Optionally crop inner part of image */ |
2299 | 0 | if (edgecrop > 0.0) { |
2300 | 0 | box = boxCreate(0.5 * edgecrop * w, 0.5 * edgecrop * h, |
2301 | 0 | (1.0 - edgecrop) * w, (1.0 - edgecrop) * h); |
2302 | 0 | pix2 = pixClipRectangle(pix1, box, NULL); |
2303 | 0 | boxDestroy(&box); |
2304 | 0 | } else { |
2305 | 0 | pix2 = pixClone(pix1); |
2306 | 0 | } |
2307 | | |
2308 | | /* We will use no more than 50K samples */ |
2309 | 0 | sampling = L_MAX(1, (l_int32)sqrt((l_float64)(w * h) / 50000. + 0.5)); |
2310 | | |
2311 | | /* Optionally make a mask over all pixels lighter than %darkthresh */ |
2312 | 0 | pixm = NULL; |
2313 | 0 | if (darkthresh > 0) { |
2314 | 0 | pixm = pixThresholdToBinary(pix2, darkthresh); |
2315 | 0 | pixInvert(pixm, pixm); |
2316 | 0 | } |
2317 | |
|
2318 | 0 | pixGetRankValueMasked(pix2, pixm, 0, 0, sampling, 0.5, &fbg, NULL); |
2319 | 0 | *pbg = (l_int32)(fbg + 0.5); |
2320 | 0 | pixDestroy(&pix1); |
2321 | 0 | pixDestroy(&pix2); |
2322 | 0 | pixDestroy(&pixm); |
2323 | 0 | return 0; |
2324 | 0 | } |
2325 | | |
2326 | | |
2327 | | /*---------------------------------------------------------------------* |
2328 | | * Largest white or black rectangles in an image * |
2329 | | *---------------------------------------------------------------------*/ |
2330 | | /*! |
2331 | | * \brief pixFindLargeRectangles() |
2332 | | * |
2333 | | * \param[in] pixs 1 bpp |
2334 | | * \param[in] polarity 0 within background, 1 within foreground |
2335 | | * \param[in] nrect number of rectangles to be found |
2336 | | * \param[out] pboxa largest rectangles, sorted by decreasing area |
2337 | | * \param[in,out] ppixdb optional return output with rectangles drawn on it |
2338 | | * \return 0 if OK, 1 on error |
2339 | | * |
2340 | | * <pre> |
2341 | | * Notes: |
2342 | | * (1) This does a greedy search to find the largest rectangles, |
2343 | | * either black or white and without overlaps, in %pix. |
2344 | | * (2) See pixFindLargestRectangle(), which is called multiple |
2345 | | * times, for details. On each call, the largest rectangle |
2346 | | * found is painted, so that none of its pixels can be |
2347 | | * used later, before calling it again. |
2348 | | * (3) This function is surprisingly fast. Although |
2349 | | * pixFindLargestRectangle() runs at about 50 MPix/sec, when it |
2350 | | * is run multiple times by pixFindLargeRectangles(), it processes |
2351 | | * at 150 - 250 MPix/sec, and the time is approximately linear |
2352 | | * in %nrect. For example, for a 1 MPix image, searching for |
2353 | | * the largest 50 boxes takes about 0.2 seconds. |
2354 | | * </pre> |
2355 | | */ |
2356 | | l_ok |
2357 | | pixFindLargeRectangles(PIX *pixs, |
2358 | | l_int32 polarity, |
2359 | | l_int32 nrect, |
2360 | | BOXA **pboxa, |
2361 | | PIX **ppixdb) |
2362 | 0 | { |
2363 | 0 | l_int32 i, op, bx, by, bw, bh; |
2364 | 0 | BOX *box; |
2365 | 0 | BOXA *boxa; |
2366 | 0 | PIX *pix; |
2367 | |
|
2368 | 0 | if (ppixdb) *ppixdb = NULL; |
2369 | 0 | if (!pboxa) |
2370 | 0 | return ERROR_INT("&boxa not defined", __func__, 1); |
2371 | 0 | *pboxa = NULL; |
2372 | 0 | if (!pixs || pixGetDepth(pixs) != 1) |
2373 | 0 | return ERROR_INT("pixs not defined or not 1 bpp", __func__, 1); |
2374 | 0 | if (polarity != 0 && polarity != 1) |
2375 | 0 | return ERROR_INT("invalid polarity", __func__, 1); |
2376 | 0 | if (nrect > 1000) { |
2377 | 0 | L_WARNING("large num rectangles = %d requested; using 1000\n", |
2378 | 0 | __func__, nrect); |
2379 | 0 | nrect = 1000; |
2380 | 0 | } |
2381 | |
|
2382 | 0 | pix = pixCopy(NULL, pixs); |
2383 | 0 | boxa = boxaCreate(nrect); |
2384 | 0 | *pboxa = boxa; |
2385 | | |
2386 | | /* Sequentially find largest rectangle and fill with opposite color */ |
2387 | 0 | for (i = 0; i < nrect; i++) { |
2388 | 0 | if (pixFindLargestRectangle(pix, polarity, &box, NULL) == 1) { |
2389 | 0 | boxDestroy(&box); |
2390 | 0 | L_ERROR("failure in pixFindLargestRectangle\n", __func__); |
2391 | 0 | break; |
2392 | 0 | } |
2393 | 0 | boxaAddBox(boxa, box, L_INSERT); |
2394 | 0 | op = (polarity == 0) ? PIX_SET : PIX_CLR; |
2395 | 0 | boxGetGeometry(box, &bx, &by, &bw, &bh); |
2396 | 0 | pixRasterop(pix, bx, by, bw, bh, op, NULL, 0, 0); |
2397 | 0 | } |
2398 | |
|
2399 | 0 | if (ppixdb) |
2400 | 0 | *ppixdb = pixDrawBoxaRandom(pixs, boxa, 3); |
2401 | |
|
2402 | 0 | pixDestroy(&pix); |
2403 | 0 | return 0; |
2404 | 0 | } |
2405 | | |
2406 | | |
2407 | | /*! |
2408 | | * \brief pixFindLargestRectangle() |
2409 | | * |
2410 | | * \param[in] pixs 1 bpp |
2411 | | * \param[in] polarity 0 within background, 1 within foreground |
2412 | | * \param[out] pbox largest area rectangle |
2413 | | * \param[in,out] ppixdb optional return output with rectangle drawn on it |
2414 | | * \return 0 if OK, 1 on error |
2415 | | * |
2416 | | * <pre> |
2417 | | * Notes: |
2418 | | * (1) This is a simple and elegant solution to a problem in |
2419 | | * computational geometry that at first appears to be quite |
2420 | | * difficult: what is the largest rectangle that can be |
2421 | | * placed in the image, covering only pixels of one polarity |
2422 | | * (bg or fg)? The solution is O(n), where n is the number |
2423 | | * of pixels in the image, and it requires nothing more than |
2424 | | * using a simple recursion relation in a single sweep of the image. |
2425 | | * (2) In a sweep from UL to LR with left-to-right being the fast |
2426 | | * direction, calculate the largest white rectangle at (x, y), |
2427 | | * using previously calculated values at pixels #1 and #2: |
2428 | | * #1: (x, y - 1) |
2429 | | * #2: (x - 1, y) |
2430 | | * We also need the most recent "black" pixels that were seen |
2431 | | * in the current row and column. |
2432 | | * Consider the largest area. There are only two possibilities: |
2433 | | * (a) Min(w(1), horizdist) * (h(1) + 1) |
2434 | | * (b) Min(h(2), vertdist) * (w(2) + 1) |
2435 | | * where |
2436 | | * horizdist: the distance from the rightmost "black" pixel seen |
2437 | | * in the current row across to the current pixel |
2438 | | * vertdist: the distance from the lowest "black" pixel seen |
2439 | | * in the current column down to the current pixel |
2440 | | * and we choose the Max of (a) and (b). |
2441 | | * (3) To convince yourself that these recursion relations are correct, |
2442 | | * it helps to draw the maximum rectangles at #1 and #2. |
2443 | | * Then for #1, you try to extend the rectangle down one line, |
2444 | | * so that the height is h(1) + 1. Do you get the full |
2445 | | * width of #1, w(1)? It depends on where the black pixels are |
2446 | | * in the current row. You know the final width is bounded by w(1) |
2447 | | * and w(2) + 1, but the actual value depends on the distribution |
2448 | | * of black pixels in the current row that are at a distance |
2449 | | * from the current pixel that is between these limits. |
2450 | | * We call that value "horizdist", and the area is then given |
2451 | | * by the expression (a) above. Using similar reasoning for #2, |
2452 | | * where you attempt to extend the rectangle to the right |
2453 | | * by 1 pixel, you arrive at (b). The largest rectangle is |
2454 | | * then found by taking the Max. |
2455 | | * </pre> |
2456 | | */ |
2457 | | l_ok |
2458 | | pixFindLargestRectangle(PIX *pixs, |
2459 | | l_int32 polarity, |
2460 | | BOX **pbox, |
2461 | | PIX **ppixdb) |
2462 | 0 | { |
2463 | 0 | l_int32 i, j, w, h, d, wpls, val; |
2464 | 0 | l_int32 wp, hp, w1, w2, h1, h2, wmin, hmin, area1, area2; |
2465 | 0 | l_int32 xmax, ymax; /* LR corner of the largest rectangle */ |
2466 | 0 | l_int32 maxarea, wmax, hmax, vertdist, horizdist, prevfg; |
2467 | 0 | l_int32 *lowestfg; |
2468 | 0 | l_uint32 *datas, *lines; |
2469 | 0 | l_uint32 **linew, **lineh; |
2470 | 0 | BOX *box; |
2471 | 0 | PIX *pixw, *pixh; /* keeps the width and height for the largest */ |
2472 | | /* rectangles whose LR corner is located there. */ |
2473 | |
|
2474 | 0 | if (ppixdb) *ppixdb = NULL; |
2475 | 0 | if (!pbox) |
2476 | 0 | return ERROR_INT("&box not defined", __func__, 1); |
2477 | 0 | *pbox = NULL; |
2478 | 0 | if (!pixs) |
2479 | 0 | return ERROR_INT("pixs not defined", __func__, 1); |
2480 | 0 | pixGetDimensions(pixs, &w, &h, &d); |
2481 | 0 | if (d != 1) |
2482 | 0 | return ERROR_INT("pixs not 1 bpp", __func__, 1); |
2483 | 0 | if (polarity != 0 && polarity != 1) |
2484 | 0 | return ERROR_INT("invalid polarity", __func__, 1); |
2485 | | |
2486 | | /* Initialize lowest "fg" seen so far for each column */ |
2487 | 0 | lowestfg = (l_int32 *)LEPT_CALLOC(w, sizeof(l_int32)); |
2488 | 0 | for (i = 0; i < w; i++) |
2489 | 0 | lowestfg[i] = -1; |
2490 | | |
2491 | | /* The combination (val ^ polarity) is the color for which we |
2492 | | * are searching for the maximum rectangle. For polarity == 0, |
2493 | | * we search in the bg (white). */ |
2494 | 0 | pixw = pixCreate(w, h, 32); /* stores width */ |
2495 | 0 | pixh = pixCreate(w, h, 32); /* stores height */ |
2496 | 0 | linew = (l_uint32 **)pixGetLinePtrs(pixw, NULL); |
2497 | 0 | lineh = (l_uint32 **)pixGetLinePtrs(pixh, NULL); |
2498 | 0 | datas = pixGetData(pixs); |
2499 | 0 | wpls = pixGetWpl(pixs); |
2500 | 0 | maxarea = xmax = ymax = wmax = hmax = 0; |
2501 | 0 | for (i = 0; i < h; i++) { |
2502 | 0 | lines = datas + i * wpls; |
2503 | 0 | prevfg = -1; |
2504 | 0 | for (j = 0; j < w; j++) { |
2505 | 0 | val = GET_DATA_BIT(lines, j); |
2506 | 0 | if ((val ^ polarity) == 0) { /* bg (0) if polarity == 0, etc. */ |
2507 | 0 | if (i == 0 && j == 0) { |
2508 | 0 | wp = hp = 1; |
2509 | 0 | } else if (i == 0) { |
2510 | 0 | wp = linew[i][j - 1] + 1; |
2511 | 0 | hp = 1; |
2512 | 0 | } else if (j == 0) { |
2513 | 0 | wp = 1; |
2514 | 0 | hp = lineh[i - 1][j] + 1; |
2515 | 0 | } else { |
2516 | | /* Expand #1 prev rectangle down */ |
2517 | 0 | w1 = linew[i - 1][j]; |
2518 | 0 | h1 = lineh[i - 1][j]; |
2519 | 0 | horizdist = j - prevfg; |
2520 | 0 | wmin = L_MIN(w1, horizdist); /* width of new rectangle */ |
2521 | 0 | area1 = wmin * (h1 + 1); |
2522 | | |
2523 | | /* Expand #2 prev rectangle to right */ |
2524 | 0 | w2 = linew[i][j - 1]; |
2525 | 0 | h2 = lineh[i][j - 1]; |
2526 | 0 | vertdist = i - lowestfg[j]; |
2527 | 0 | hmin = L_MIN(h2, vertdist); /* height of new rectangle */ |
2528 | 0 | area2 = hmin * (w2 + 1); |
2529 | |
|
2530 | 0 | if (area1 > area2) { |
2531 | 0 | wp = wmin; |
2532 | 0 | hp = h1 + 1; |
2533 | 0 | } else { |
2534 | 0 | wp = w2 + 1; |
2535 | 0 | hp = hmin; |
2536 | 0 | } |
2537 | 0 | } |
2538 | 0 | } else { /* fg (1) if polarity == 0; bg (0) if polarity == 1 */ |
2539 | 0 | prevfg = j; |
2540 | 0 | lowestfg[j] = i; |
2541 | 0 | wp = hp = 0; |
2542 | 0 | } |
2543 | 0 | linew[i][j] = wp; |
2544 | 0 | lineh[i][j] = hp; |
2545 | 0 | if (wp * hp > maxarea) { |
2546 | 0 | maxarea = wp * hp; |
2547 | 0 | xmax = j; |
2548 | 0 | ymax = i; |
2549 | 0 | wmax = wp; |
2550 | 0 | hmax = hp; |
2551 | 0 | } |
2552 | 0 | } |
2553 | 0 | } |
2554 | | |
2555 | | /* Translate from LR corner to Box coords (UL corner, w, h) */ |
2556 | 0 | box = boxCreate(xmax - wmax + 1, ymax - hmax + 1, wmax, hmax); |
2557 | 0 | *pbox = box; |
2558 | |
|
2559 | 0 | if (ppixdb) { |
2560 | 0 | *ppixdb = pixConvertTo8(pixs, TRUE); |
2561 | 0 | pixRenderHashBoxArb(*ppixdb, box, 6, 2, L_NEG_SLOPE_LINE, 1, 255, 0, 0); |
2562 | 0 | } |
2563 | |
|
2564 | 0 | LEPT_FREE(linew); |
2565 | 0 | LEPT_FREE(lineh); |
2566 | 0 | LEPT_FREE(lowestfg); |
2567 | 0 | pixDestroy(&pixw); |
2568 | 0 | pixDestroy(&pixh); |
2569 | 0 | return 0; |
2570 | 0 | } |
2571 | | |
2572 | | |
2573 | | /*---------------------------------------------------------------------* |
2574 | | * Generate rectangle inside connected component * |
2575 | | *---------------------------------------------------------------------*/ |
2576 | | /*! |
2577 | | * \brief pixFindRectangleInCC() |
2578 | | * |
2579 | | * \param[in] pixs 1 bpp, with sufficient closings to make the fg be |
2580 | | * a single c.c. that is a convex hull |
2581 | | * \param[in] boxs [optional] if NULL, %pixs should be a minimum |
2582 | | * container of a single c.c. |
2583 | | * \param[in] fract first and all consecutive lines found must be at |
2584 | | * least this fraction of the fast scan dimension |
2585 | | * \param[in] dir L_SCAN_HORIZONTAL, L_SCAN_VERTICAL; direction of |
2586 | | * fast scan |
2587 | | * \param[in] select L_GEOMETRIC_UNION, L_GEOMETRIC_INTERSECTION, |
2588 | | * L_LARGEST_AREA, L_SMALEST_AREA |
2589 | | * \param[in] debug if 1, generates output pdf showing intermediate |
2590 | | * computation and final result |
2591 | | * \return box of included rectangle, or NULL on error |
2592 | | * |
2593 | | * <pre> |
2594 | | * Notes: |
2595 | | * (1) Computation is similar to pixFindLargestRectangle(), but allows |
2596 | | * a different set of results to choose from. |
2597 | | * (2) Select the fast scan direction. Then, scanning in the slow |
2598 | | * direction, find the longest run of ON pixels in the fast |
2599 | | * scan direction and look for the first run that is longer |
2600 | | * than %fract of the dimension. Continue until a shorter run |
2601 | | * is found. This generates a box of ON pixels fitting into the c.c. |
2602 | | * (3) Do this from both slow scan directions and use %select to get |
2603 | | * a resulting box from these two. |
2604 | | * (4) The extracted rectangle is not necessarily the largest that |
2605 | | * can fit in the c.c. To get that, use pixFindLargestRectangle(). |
2606 | | */ |
2607 | | BOX * |
2608 | | pixFindRectangleInCC(PIX *pixs, |
2609 | | BOX *boxs, |
2610 | | l_float32 fract, |
2611 | | l_int32 dir, |
2612 | | l_int32 select, |
2613 | | l_int32 debug) |
2614 | 0 | { |
2615 | 0 | l_int32 x, y, i, w, h, w1, h1, w2, h2, found, res; |
2616 | 0 | l_int32 xfirst, xlast, xstart, yfirst, ylast, length; |
2617 | 0 | BOX *box1, *box2, *box3, *box4, *box5; |
2618 | 0 | PIX *pix1, *pix2, *pixdb1, *pixdb2; |
2619 | 0 | PIXA *pixadb; |
2620 | |
|
2621 | 0 | if (!pixs || pixGetDepth(pixs) != 1) |
2622 | 0 | return (BOX *)ERROR_PTR("pixs undefined or not 1 bpp", __func__, NULL); |
2623 | 0 | if (fract <= 0.0 || fract > 1.0) |
2624 | 0 | return (BOX *)ERROR_PTR("invalid fraction", __func__, NULL); |
2625 | 0 | if (dir != L_SCAN_VERTICAL && dir != L_SCAN_HORIZONTAL) |
2626 | 0 | return (BOX *)ERROR_PTR("invalid scan direction", __func__, NULL); |
2627 | 0 | if (select != L_GEOMETRIC_UNION && select != L_GEOMETRIC_INTERSECTION && |
2628 | 0 | select != L_LARGEST_AREA && select != L_SMALLEST_AREA) |
2629 | 0 | return (BOX *)ERROR_PTR("invalid select", __func__, NULL); |
2630 | | |
2631 | | /* Extract the c.c. if necessary */ |
2632 | 0 | x = y = 0; |
2633 | 0 | if (boxs) { |
2634 | 0 | pix1 = pixClipRectangle(pixs, boxs, NULL); |
2635 | 0 | boxGetGeometry(boxs, &x, &y, NULL, NULL); |
2636 | 0 | } else { |
2637 | 0 | pix1 = pixClone(pixs); |
2638 | 0 | } |
2639 | | |
2640 | | /* All fast scans are horizontal; rotate 90 deg cw if necessary */ |
2641 | 0 | if (dir == L_SCAN_VERTICAL) |
2642 | 0 | pix2 = pixRotate90(pix1, 1); |
2643 | 0 | else /* L_SCAN_HORIZONTAL */ |
2644 | 0 | pix2 = pixClone(pix1); |
2645 | 0 | pixGetDimensions(pix2, &w, &h, NULL); |
2646 | |
|
2647 | 0 | pixadb = (debug) ? pixaCreate(0) : NULL; |
2648 | 0 | pixdb1 = NULL; |
2649 | 0 | if (pixadb) { |
2650 | 0 | lept_mkdir("lept/rect"); |
2651 | 0 | pixaAddPix(pixadb, pix1, L_CLONE); |
2652 | 0 | pixdb1 = pixConvertTo32(pix2); |
2653 | 0 | } |
2654 | 0 | pixDestroy(&pix1); |
2655 | | |
2656 | | /* Scanning down, find the first scanline with a long enough run. |
2657 | | * That run goes from (xfirst, yfirst) to (xlast, yfirst). */ |
2658 | 0 | found = FALSE; |
2659 | 0 | for (i = 0; i < h; i++) { |
2660 | 0 | pixFindMaxHorizontalRunOnLine(pix2, i, &xstart, &length); |
2661 | 0 | if (length >= (l_int32)(fract * w + 0.5)) { |
2662 | 0 | yfirst = i; |
2663 | 0 | xfirst = xstart; |
2664 | 0 | xlast = xfirst + length - 1; |
2665 | 0 | found = TRUE; |
2666 | 0 | break; |
2667 | 0 | } |
2668 | 0 | } |
2669 | 0 | if (!found) { |
2670 | 0 | L_WARNING("no run of sufficient size was found\n", __func__); |
2671 | 0 | pixDestroy(&pix2); |
2672 | 0 | pixDestroy(&pixdb1); |
2673 | 0 | pixaDestroy(&pixadb); |
2674 | 0 | return NULL; |
2675 | 0 | } |
2676 | | |
2677 | | /* Continue down until the condition fails */ |
2678 | 0 | w1 = xlast - xfirst + 1; |
2679 | 0 | h1 = h - yfirst; /* init */ |
2680 | 0 | ylast = h - 1; /* init */ |
2681 | 0 | for (i = yfirst + 1; i < h; i++) { |
2682 | 0 | pixFindMaxHorizontalRunOnLine(pix2, i, &xstart, &length); |
2683 | 0 | if (xstart > xfirst || (xstart + length - 1 < xlast) || |
2684 | 0 | i == h - 1) { |
2685 | 0 | ylast = i - 1; |
2686 | 0 | h1 = ylast - yfirst + 1; |
2687 | 0 | break; |
2688 | 0 | } |
2689 | 0 | } |
2690 | 0 | box1 = boxCreate(xfirst, yfirst, w1, h1); |
2691 | | |
2692 | | /* Scanning up, find the first scanline with a long enough run. |
2693 | | * That run goes from (xfirst, ylast) to (xlast, ylast). */ |
2694 | 0 | for (i = h - 1; i >= 0; i--) { |
2695 | 0 | pixFindMaxHorizontalRunOnLine(pix2, i, &xstart, &length); |
2696 | 0 | if (length >= (l_int32)(fract * w + 0.5)) { |
2697 | 0 | ylast = i; |
2698 | 0 | xfirst = xstart; |
2699 | 0 | xlast = xfirst + length - 1; |
2700 | 0 | break; |
2701 | 0 | } |
2702 | 0 | } |
2703 | | |
2704 | | /* Continue up until the condition fails */ |
2705 | 0 | w2 = xlast - xfirst + 1; |
2706 | 0 | h2 = ylast + 1; /* initialize */ |
2707 | 0 | for (i = ylast - 1; i >= 0; i--) { |
2708 | 0 | pixFindMaxHorizontalRunOnLine(pix2, i, &xstart, &length); |
2709 | 0 | if (xstart > xfirst || (xstart + length - 1 < xlast) || |
2710 | 0 | i == 0) { |
2711 | 0 | yfirst = i + 1; |
2712 | 0 | h2 = ylast - yfirst + 1; |
2713 | 0 | break; |
2714 | 0 | } |
2715 | 0 | } |
2716 | 0 | box2 = boxCreate(xfirst, yfirst, w2, h2); |
2717 | 0 | pixDestroy(&pix2); |
2718 | |
|
2719 | 0 | if (pixadb) { |
2720 | 0 | pixRenderBoxArb(pixdb1, box1, 2, 255, 0, 0); |
2721 | 0 | pixRenderBoxArb(pixdb1, box2, 2, 0, 255, 0); |
2722 | 0 | pixaAddPix(pixadb, pixdb1, L_INSERT); |
2723 | 0 | } |
2724 | | |
2725 | | /* Select the final result from the two boxes */ |
2726 | 0 | if (select == L_GEOMETRIC_UNION) |
2727 | 0 | box3 = boxBoundingRegion(box1, box2); |
2728 | 0 | else if (select == L_GEOMETRIC_INTERSECTION) |
2729 | 0 | box3 = boxOverlapRegion(box1, box2); |
2730 | 0 | else if (select == L_LARGEST_AREA) |
2731 | 0 | box3 = (w1 * h1 >= w2 * h2) ? boxCopy(box1) : boxCopy(box2); |
2732 | 0 | else /* select == L_SMALLEST_AREA) */ |
2733 | 0 | box3 = (w1 * h1 <= w2 * h2) ? boxCopy(box1) : boxCopy(box2); |
2734 | 0 | boxDestroy(&box1); |
2735 | 0 | boxDestroy(&box2); |
2736 | | |
2737 | | /* Rotate the box 90 degrees ccw if necessary */ |
2738 | 0 | box4 = NULL; |
2739 | 0 | if (box3) { |
2740 | 0 | if (dir == L_SCAN_VERTICAL) |
2741 | 0 | box4 = boxRotateOrth(box3, w, h, 3); |
2742 | 0 | else |
2743 | 0 | box4 = boxCopy(box3); |
2744 | 0 | } |
2745 | | |
2746 | | /* Transform back to global coordinates if %boxs exists */ |
2747 | 0 | box5 = (box4) ? boxTransform(box4, x, y, 1.0, 1.0) : NULL; |
2748 | 0 | boxDestroy(&box3); |
2749 | 0 | boxDestroy(&box4); |
2750 | | |
2751 | | /* Debug output */ |
2752 | 0 | if (pixadb) { |
2753 | 0 | pixdb1 = pixConvertTo8(pixs, 0); |
2754 | 0 | pixAddConstantGray(pixdb1, 190); |
2755 | 0 | pixdb2 = pixConvertTo32(pixdb1); |
2756 | 0 | if (box5) pixRenderBoxArb(pixdb2, box5, 4, 0, 0, 255); |
2757 | 0 | pixaAddPix(pixadb, pixdb2, L_INSERT); |
2758 | 0 | res = pixGetXRes(pixs); |
2759 | 0 | L_INFO("Writing debug files to /tmp/lept/rect/\n", __func__); |
2760 | 0 | pixaConvertToPdf(pixadb, res, 1.0, L_DEFAULT_ENCODE, 75, NULL, |
2761 | 0 | "/tmp/lept/rect/fitrect.pdf"); |
2762 | 0 | pix1 = pixaDisplayTiledAndScaled(pixadb, 32, 800, 1, 0, 40, 2); |
2763 | 0 | pixWrite("/tmp/lept/rect/fitrect.png", pix1, IFF_PNG); |
2764 | 0 | pixDestroy(&pix1); |
2765 | 0 | pixDestroy(&pixdb1); |
2766 | 0 | pixaDestroy(&pixadb); |
2767 | 0 | } |
2768 | |
|
2769 | 0 | return box5; |
2770 | 0 | } |
2771 | | |
2772 | | /*------------------------------------------------------------------* |
2773 | | * Automatic photoinvert for OCR * |
2774 | | *------------------------------------------------------------------*/ |
2775 | | /*! |
2776 | | * \brief pixAutoPhotoinvert() |
2777 | | * |
2778 | | * \param[in] pixs any depth, colormap ok |
2779 | | * \param[in] thresh binarization threshold; use 0 for default |
2780 | | * \param[out] ppixm [optional] image regions to be inverted |
2781 | | * \param[out] pixadb [optional] debug; input NULL to skip |
2782 | | * \return pixd 1 bpp image to be sent to OCR, or NULL on error |
2783 | | * |
2784 | | * <pre> |
2785 | | * Notes: |
2786 | | * (1) A 1 bpp image is returned, where pixels in image regions are |
2787 | | * photo-inverted. |
2788 | | * (2) If there is light text with a dark background, this will |
2789 | | * identify the region and photoinvert the pixels there if |
2790 | | * there are at least 60% fg pixels in the region. |
2791 | | * (3) For debug output, input a (typically empty) %pixadb. |
2792 | | * </pre> |
2793 | | */ |
2794 | | PIX * |
2795 | | pixAutoPhotoinvert(PIX *pixs, |
2796 | | l_int32 thresh, |
2797 | | PIX **ppixm, |
2798 | | PIXA *pixadb) |
2799 | 0 | { |
2800 | 0 | l_int32 i, n, empty, x, y, w, h; |
2801 | 0 | l_float32 fgfract; |
2802 | 0 | BOX *box1; |
2803 | 0 | BOXA *boxa1; |
2804 | 0 | PIX *pix1, *pix2, *pix3, *pix4, *pix5; |
2805 | |
|
2806 | 0 | if (ppixm) *ppixm = NULL; |
2807 | 0 | if (!pixs) |
2808 | 0 | return (PIX *)ERROR_PTR("pixs not defined", __func__, NULL); |
2809 | 0 | if (thresh == 0) thresh = 128; |
2810 | |
|
2811 | 0 | if ((pix1 = pixConvertTo1(pixs, thresh)) == NULL) |
2812 | 0 | return (PIX *)ERROR_PTR("pix1 not made", __func__, NULL); |
2813 | 0 | if (pixadb) pixaAddPix(pixadb, pix1, L_COPY); |
2814 | | |
2815 | | /* Identify regions for photo-inversion: |
2816 | | * (1) Start with the halftone mask. |
2817 | | * (2) Eliminate ordinary text and halftones in the mask. |
2818 | | * (3) Some regions of inverted text may have been removed in |
2819 | | * steps (1) and (2). Conditionally fill holes in the mask, |
2820 | | * but do not fill out to the bounding rect. */ |
2821 | 0 | pix2 = pixGenerateHalftoneMask(pix1, NULL, NULL, pixadb); |
2822 | 0 | pix3 = pixMorphSequence(pix2, "o15.15 + c25.25", 0); /* remove noise */ |
2823 | 0 | pix4 = pixFillHolesToBoundingRect(pix3, 1, 0.5, 1.0); |
2824 | 0 | if (pixadb) { |
2825 | 0 | pixaAddPix(pixadb, pix2, L_CLONE); |
2826 | 0 | pixaAddPix(pixadb, pix3, L_CLONE); |
2827 | 0 | pixaAddPix(pixadb, pix4, L_COPY); |
2828 | 0 | } |
2829 | 0 | pixDestroy(&pix2); |
2830 | 0 | pixDestroy(&pix3); |
2831 | 0 | pixZero(pix4, &empty); |
2832 | 0 | if (empty) { |
2833 | 0 | pixDestroy(&pix4); |
2834 | 0 | return pix1; |
2835 | 0 | } |
2836 | | |
2837 | | /* Examine each component and validate the inversion. |
2838 | | * Require at least 60% of pixels under each component to be FG. */ |
2839 | 0 | boxa1 = pixConnCompBB(pix4, 8); |
2840 | 0 | n = boxaGetCount(boxa1); |
2841 | 0 | for (i = 0; i < n; i++) { |
2842 | 0 | box1 = boxaGetBox(boxa1, i, L_COPY); |
2843 | 0 | pix5 = pixClipRectangle(pix1, box1, NULL); |
2844 | 0 | pixForegroundFraction(pix5, &fgfract); |
2845 | 0 | if (pixadb) lept_stderr("fg fraction: %5.3f\n", fgfract); |
2846 | 0 | boxGetGeometry(box1, &x, &y, &w, &h); |
2847 | 0 | if (fgfract < 0.6) /* erase from the mask */ |
2848 | 0 | pixRasterop(pix4, x, y, w, h, PIX_CLR, NULL, 0, 0); |
2849 | 0 | pixDestroy(&pix5); |
2850 | 0 | boxDestroy(&box1); |
2851 | 0 | } |
2852 | 0 | boxaDestroy(&boxa1); |
2853 | 0 | pixZero(pix4, &empty); |
2854 | 0 | if (empty) { |
2855 | 0 | pixDestroy(&pix4); |
2856 | 0 | return pix1; |
2857 | 0 | } |
2858 | | |
2859 | | /* Combine pixels of the photo-inverted pix with the binarized input */ |
2860 | 0 | pix5 = pixInvert(NULL, pix1); |
2861 | 0 | pixCombineMasked(pix1, pix5, pix4); |
2862 | |
|
2863 | 0 | if (pixadb) { |
2864 | 0 | pixaAddPix(pixadb, pix5, L_CLONE); |
2865 | 0 | pixaAddPix(pixadb, pix1, L_COPY); |
2866 | 0 | } |
2867 | 0 | pixDestroy(&pix5); |
2868 | 0 | if (ppixm) |
2869 | 0 | *ppixm = pix4; |
2870 | 0 | else |
2871 | 0 | pixDestroy(&pix4); |
2872 | 0 | return pix1; |
2873 | 0 | } |