Coverage Report

Created: 2024-06-18 06:05

/src/leptonica/src/pageseg.c
Line
Count
Source (jump to first uncovered line)
1
/*====================================================================*
2
 -  Copyright (C) 2001 Leptonica.  All rights reserved.
3
 -
4
 -  Redistribution and use in source and binary forms, with or without
5
 -  modification, are permitted provided that the following conditions
6
 -  are met:
7
 -  1. Redistributions of source code must retain the above copyright
8
 -     notice, this list of conditions and the following disclaimer.
9
 -  2. Redistributions in binary form must reproduce the above
10
 -     copyright notice, this list of conditions and the following
11
 -     disclaimer in the documentation and/or other materials
12
 -     provided with the distribution.
13
 -
14
 -  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15
 -  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16
 -  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17
 -  A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL ANY
18
 -  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19
 -  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20
 -  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21
 -  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22
 -  OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23
 -  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24
 -  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25
 *====================================================================*/
26
27
/*!
28
 * \file  pageseg.c
29
 * <pre>
30
 *
31
 *      Top level page segmentation
32
 *          l_int32   pixGetRegionsBinary()
33
 *
34
 *      Halftone region extraction
35
 *          PIX      *pixGenHalftoneMask()    **Deprecated wrapper**
36
 *          PIX      *pixGenerateHalftoneMask()
37
38
 *
39
 *      Textline extraction
40
 *          PIX      *pixGenTextlineMask()
41
 *
42
 *      Textblock extraction
43
 *          PIX      *pixGenTextblockMask()
44
 *
45
 *      Location and extraction of page foreground; cleaning pages
46
 *          PIX            *pixCropImage()
47
 *          static l_int32  pixMaxCompAfterVClosing()
48
 *          static PIX     *pixRescaleForCropping()
49
 *          PIX            *pixCleanImage()
50
 *          BOX            *pixFindPageForeground()
51
 *
52
 *      Extraction of characters from image with only text
53
 *          l_int32   pixSplitIntoCharacters()
54
 *          BOXA     *pixSplitComponentWithProfile()
55
 *
56
 *      Extraction of lines of text
57
 *          PIXA     *pixExtractTextlines()
58
 *          PIXA     *pixExtractRawTextlines()
59
 *
60
 *      How many text columns
61
 *          l_int32   pixCountTextColumns()
62
 *
63
 *      Decision: text vs photo
64
 *          l_int32   pixDecideIfText()
65
 *          l_int32   pixFindThreshFgExtent()
66
 *
67
 *      Decision: table vs text
68
 *          l_int32   pixDecideIfTable()
69
 *          Pix      *pixPrepare1bpp()
70
 *
71
 *      Estimate the grayscale background value
72
 *          l_int32   pixEstimateBackground()
73
 *
74
 *      Largest white or black rectangles in an image
75
 *          l_int32   pixFindLargeRectangles()
76
 *          l_int32   pixFindLargestRectangle()
77
 *
78
 *      Generate rectangle inside connected component
79
 *          BOX      *pixFindRectangleInCC()
80
 *
81
 *      Automatic photoinvert for OCR
82
 *          PIX      *pixAutoPhotoinvert()
83
 * </pre>
84
 */
85
86
#ifdef HAVE_CONFIG_H
87
#include <config_auto.h>
88
#endif  /* HAVE_CONFIG_H */
89
90
#include <math.h>
91
#include "allheaders.h"
92
#include "pix_internal.h"
93
94
    /* These functions are not intended to work on very low-res images */
95
static const l_int32  MinWidth = 100;
96
static const l_int32  MinHeight = 100;
97
98
static PIX *pixRescaleForCropping(PIX *pixs, l_int32 w, l_int32 h,
99
                                  l_int32 lr_border, l_int32 tb_border,
100
                                  l_float32 maxwiden, PIX **ppixsc);
101
102
/*------------------------------------------------------------------*
103
 *                     Top level page segmentation                  *
104
 *------------------------------------------------------------------*/
105
/*!
106
 * \brief   pixGetRegionsBinary()
107
 *
108
 * \param[in]    pixs      1 bpp, assumed to be 300 to 400 ppi
109
 * \param[out]   ppixhm    [optional] halftone mask
110
 * \param[out]   ppixtm    [optional] textline mask
111
 * \param[out]   ppixtb    [optional] textblock mask
112
 * \param[in]    pixadb    input for collecting debug pix; use NULL to skip
113
 * \return  0 if OK, 1 on error
114
 *
115
 * <pre>
116
 * Notes:
117
 *      (1) It is best to deskew the image before segmenting.
118
 *      (2) Passing in %pixadb enables debug output.
119
 * </pre>
120
 */
121
l_ok
122
pixGetRegionsBinary(PIX   *pixs,
123
                    PIX  **ppixhm,
124
                    PIX  **ppixtm,
125
                    PIX  **ppixtb,
126
                    PIXA  *pixadb)
127
3.19k
{
128
3.19k
l_int32  w, h, htfound, tlfound;
129
3.19k
PIX     *pixr, *pix1, *pix2;
130
3.19k
PIX     *pixtext;  /* text pixels only */
131
3.19k
PIX     *pixhm2;   /* halftone mask; 2x reduction */
132
3.19k
PIX     *pixhm;    /* halftone mask;  */
133
3.19k
PIX     *pixtm2;   /* textline mask; 2x reduction */
134
3.19k
PIX     *pixtm;    /* textline mask */
135
3.19k
PIX     *pixvws;   /* vertical white space mask */
136
3.19k
PIX     *pixtb2;   /* textblock mask; 2x reduction */
137
3.19k
PIX     *pixtbf2;  /* textblock mask; 2x reduction; small comps filtered */
138
3.19k
PIX     *pixtb;    /* textblock mask */
139
140
3.19k
    if (ppixhm) *ppixhm = NULL;
141
3.19k
    if (ppixtm) *ppixtm = NULL;
142
3.19k
    if (ppixtb) *ppixtb = NULL;
143
3.19k
    if (!pixs || pixGetDepth(pixs) != 1)
144
371
        return ERROR_INT("pixs undefined or not 1 bpp", __func__, 1);
145
2.82k
    pixGetDimensions(pixs, &w, &h, NULL);
146
2.82k
    if (w < MinWidth || h < MinHeight) {
147
1.79k
        L_ERROR("pix too small: w = %d, h = %d\n", __func__, w, h);
148
1.79k
        return 1;
149
1.79k
    }
150
151
        /* 2x reduce, to 150 -200 ppi */
152
1.02k
    pixr = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0);
153
1.02k
    if (pixadb) pixaAddPix(pixadb, pixr, L_COPY);
154
155
        /* Get the halftone mask */
156
1.02k
    pixhm2 = pixGenerateHalftoneMask(pixr, &pixtext, &htfound, pixadb);
157
158
        /* Get the textline mask from the text pixels */
159
1.02k
    pixtm2 = pixGenTextlineMask(pixtext, &pixvws, &tlfound, pixadb);
160
161
        /* Get the textblock mask from the textline mask */
162
1.02k
    pixtb2 = pixGenTextblockMask(pixtm2, pixvws, pixadb);
163
1.02k
    pixDestroy(&pixr);
164
1.02k
    pixDestroy(&pixtext);
165
1.02k
    pixDestroy(&pixvws);
166
167
        /* Remove small components from the mask, where a small
168
         * component is defined as one with both width and height < 60 */
169
1.02k
    pixtbf2 = NULL;
170
1.02k
    if (pixtb2) {
171
504
        pixtbf2 = pixSelectBySize(pixtb2, 60, 60, 4, L_SELECT_IF_EITHER,
172
504
                                  L_SELECT_IF_GTE, NULL);
173
504
        pixDestroy(&pixtb2);
174
504
        if (pixadb) pixaAddPix(pixadb, pixtbf2, L_COPY);
175
504
    }
176
177
        /* Expand all masks to full resolution, and do filling or
178
         * small dilations for better coverage. */
179
1.02k
    pixhm = pixExpandReplicate(pixhm2, 2);
180
1.02k
    pix1 = pixSeedfillBinary(NULL, pixhm, pixs, 8);
181
1.02k
    pixOr(pixhm, pixhm, pix1);
182
1.02k
    pixDestroy(&pixhm2);
183
1.02k
    pixDestroy(&pix1);
184
1.02k
    if (pixadb) pixaAddPix(pixadb, pixhm, L_COPY);
185
186
1.02k
    pix1 = pixExpandReplicate(pixtm2, 2);
187
1.02k
    pixtm = pixDilateBrick(NULL, pix1, 3, 3);
188
1.02k
    pixDestroy(&pixtm2);
189
1.02k
    pixDestroy(&pix1);
190
1.02k
    if (pixadb) pixaAddPix(pixadb, pixtm, L_COPY);
191
192
1.02k
    if (pixtbf2) {
193
504
        pix1 = pixExpandReplicate(pixtbf2, 2);
194
504
        pixtb = pixDilateBrick(NULL, pix1, 3, 3);
195
504
        pixDestroy(&pixtbf2);
196
504
        pixDestroy(&pix1);
197
504
        if (pixadb) pixaAddPix(pixadb, pixtb, L_COPY);
198
521
    } else {
199
521
        pixtb = pixCreateTemplate(pixs);  /* empty mask */
200
521
    }
201
202
        /* Debug: identify objects that are neither text nor halftone image */
203
1.02k
    if (pixadb) {
204
1.02k
        pix1 = pixSubtract(NULL, pixs, pixtm);  /* remove text pixels */
205
1.02k
        pix2 = pixSubtract(NULL, pix1, pixhm);  /* remove halftone pixels */
206
1.02k
        pixaAddPix(pixadb, pix2, L_INSERT);
207
1.02k
        pixDestroy(&pix1);
208
1.02k
    }
209
210
        /* Debug: display textline components with random colors */
211
1.02k
    if (pixadb) {
212
1.02k
        l_int32  w, h;
213
1.02k
        BOXA    *boxa;
214
1.02k
        PIXA    *pixa;
215
1.02k
        boxa = pixConnComp(pixtm, &pixa, 8);
216
1.02k
        pixGetDimensions(pixtm, &w, &h, NULL);
217
1.02k
        pix1 = pixaDisplayRandomCmap(pixa, w, h);
218
1.02k
        pixcmapResetColor(pixGetColormap(pix1), 0, 255, 255, 255);
219
1.02k
        pixaAddPix(pixadb, pix1, L_INSERT);
220
1.02k
        pixaDestroy(&pixa);
221
1.02k
        boxaDestroy(&boxa);
222
1.02k
    }
223
224
        /* Debug: identify the outlines of each textblock */
225
1.02k
    if (pixadb) {
226
1.02k
        PIXCMAP  *cmap;
227
1.02k
        PTAA     *ptaa;
228
1.02k
        ptaa = pixGetOuterBordersPtaa(pixtb);
229
1.02k
        lept_mkdir("lept/pageseg");
230
1.02k
        ptaaWriteDebug("/tmp/lept/pageseg/tb_outlines.ptaa", ptaa, 1);
231
1.02k
        pix1 = pixRenderRandomCmapPtaa(pixtb, ptaa, 1, 16, 1);
232
1.02k
        cmap = pixGetColormap(pix1);
233
1.02k
        pixcmapResetColor(cmap, 0, 130, 130, 130);
234
1.02k
        pixaAddPix(pixadb, pix1, L_INSERT);
235
1.02k
        ptaaDestroy(&ptaa);
236
1.02k
    }
237
238
        /* Debug: get b.b. for all mask components */
239
1.02k
    if (pixadb) {
240
1.02k
        BOXA  *bahm, *batm, *batb;
241
1.02k
        bahm = pixConnComp(pixhm, NULL, 4);
242
1.02k
        batm = pixConnComp(pixtm, NULL, 4);
243
1.02k
        batb = pixConnComp(pixtb, NULL, 4);
244
1.02k
        boxaWriteDebug("/tmp/lept/pageseg/htmask.boxa", bahm);
245
1.02k
        boxaWriteDebug("/tmp/lept/pageseg/textmask.boxa", batm);
246
1.02k
        boxaWriteDebug("/tmp/lept/pageseg/textblock.boxa", batb);
247
1.02k
        boxaDestroy(&bahm);
248
1.02k
        boxaDestroy(&batm);
249
1.02k
        boxaDestroy(&batb);
250
1.02k
    }
251
1.02k
    if (pixadb) {
252
1.02k
        pixaConvertToPdf(pixadb, 0, 1.0, 0, 0, "Debug page segmentation",
253
1.02k
                         "/tmp/lept/pageseg/debug.pdf");
254
1.02k
        L_INFO("Writing debug pdf to /tmp/lept/pageseg/debug.pdf\n", __func__);
255
1.02k
    }
256
257
1.02k
    if (ppixhm)
258
1.02k
        *ppixhm = pixhm;
259
0
    else
260
0
        pixDestroy(&pixhm);
261
1.02k
    if (ppixtm)
262
1.02k
        *ppixtm = pixtm;
263
0
    else
264
0
        pixDestroy(&pixtm);
265
1.02k
    if (ppixtb)
266
1.02k
        *ppixtb = pixtb;
267
0
    else
268
0
        pixDestroy(&pixtb);
269
270
1.02k
    return 0;
271
2.82k
}
272
273
274
/*------------------------------------------------------------------*
275
 *                    Halftone region extraction                    *
276
 *------------------------------------------------------------------*/
277
/*!
278
 * \brief   pixGenHalftoneMask()
279
 *
280
 * <pre>
281
 * Deprecated:
282
 *   This wrapper avoids an ABI change with tesseract 3.0.4.
283
 *   It should be removed when we no longer need to support 3.0.4.
284
 *   The debug parameter is ignored (assumed 0).
285
 * </pre>
286
 */
287
PIX *
288
pixGenHalftoneMask(PIX      *pixs,
289
                   PIX     **ppixtext,
290
                   l_int32  *phtfound,
291
                   l_int32   debug)
292
0
{
293
0
    return pixGenerateHalftoneMask(pixs, ppixtext, phtfound, NULL);
294
0
}
295
296
297
/*!
298
 * \brief   pixGenerateHalftoneMask()
299
 *
300
 * \param[in]    pixs      1 bpp, assumed to be 150 to 200 ppi
301
 * \param[out]   ppixtext  [optional] text part of pixs
302
 * \param[out]   phtfound  [optional] 1 if the mask is not empty
303
 * \param[in]    pixadb    input for collecting debug pix; use NULL to skip
304
 * \return  pixd halftone mask, or NULL on error
305
 *
306
 * <pre>
307
 * Notes:
308
 *      (1) This is not intended to work on small thumbnails.  The
309
 *          dimensions of pixs must be at least MinWidth x MinHeight.
310
 * </pre>
311
 */
312
PIX *
313
pixGenerateHalftoneMask(PIX      *pixs,
314
                        PIX     **ppixtext,
315
                        l_int32  *phtfound,
316
                        PIXA     *pixadb)
317
4.21k
{
318
4.21k
l_int32  w, h, empty;
319
4.21k
PIX     *pix1, *pix2, *pixhs, *pixhm, *pixd;
320
321
4.21k
    if (ppixtext) *ppixtext = NULL;
322
4.21k
    if (phtfound) *phtfound = 0;
323
4.21k
    if (!pixs || pixGetDepth(pixs) != 1)
324
295
        return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", __func__, NULL);
325
3.92k
    pixGetDimensions(pixs, &w, &h, NULL);
326
3.92k
    if (w < MinWidth || h < MinHeight) {
327
2.81k
        L_ERROR("pix too small: w = %d, h = %d\n", __func__, w, h);
328
2.81k
        return NULL;
329
2.81k
    }
330
331
        /* Compute seed for halftone parts at 8x reduction */
332
1.11k
    pix1 = pixReduceRankBinaryCascade(pixs, 4, 4, 0, 0);
333
1.11k
    pix2 = pixOpenBrick(NULL, pix1, 5, 5);
334
1.11k
    pixhs = pixExpandReplicate(pix2, 4);  /* back to 2x reduction */
335
1.11k
    pixDestroy(&pix1);
336
1.11k
    pixDestroy(&pix2);
337
1.11k
    if (pixadb) pixaAddPix(pixadb, pixhs, L_COPY);
338
339
        /* Compute mask for connected regions */
340
1.11k
    pixhm = pixCloseSafeBrick(NULL, pixs, 4, 4);
341
1.11k
    if (pixadb) pixaAddPix(pixadb, pixhm, L_COPY);
342
343
        /* Fill seed into mask to get halftone mask */
344
1.11k
    pixd = pixSeedfillBinary(NULL, pixhs, pixhm, 4);
345
1.11k
    if (pixadb) pixaAddPix(pixadb, pixd, L_COPY);
346
347
#if 0
348
    pixOpenBrick(pixd, pixd, 9, 9);
349
#endif
350
351
        /* Check if mask is empty */
352
1.11k
    pixZero(pixd, &empty);
353
1.11k
    if (phtfound && !empty)
354
127
        *phtfound = 1;
355
356
        /* Optionally, get all pixels that are not under the halftone mask */
357
1.11k
    if (ppixtext) {
358
551
        if (empty)
359
470
            *ppixtext = pixCopy(NULL, pixs);
360
81
        else
361
81
            *ppixtext = pixSubtract(NULL, pixs, pixd);
362
551
        if (pixadb) pixaAddPix(pixadb, *ppixtext, L_COPY);
363
551
    }
364
365
1.11k
    pixDestroy(&pixhs);
366
1.11k
    pixDestroy(&pixhm);
367
1.11k
    return pixd;
368
3.92k
}
369
370
371
/*------------------------------------------------------------------*
372
 *                         Textline extraction                      *
373
 *------------------------------------------------------------------*/
374
/*!
375
 * \brief   pixGenTextlineMask()
376
 *
377
 * \param[in]    pixs      1 bpp, assumed to be 150 to 200 ppi
378
 * \param[out]   ppixvws   vertical whitespace mask
379
 * \param[out]   ptlfound  [optional] 1 if the mask is not empty
380
 * \param[in]    pixadb    input for collecting debug pix; use NULL to skip
381
 * \return  pixd textline mask, or NULL on error
382
 *
383
 * <pre>
384
 * Notes:
385
 *      (1) The input pixs should be deskewed.
386
 *      (2) pixs should have no halftone pixels.
387
 *      (3) This is not intended to work on small thumbnails.  The
388
 *          dimensions of pixs must be at least MinWidth x MinHeight.
389
 *      (4) Both the input image and the returned textline mask
390
 *          are at the same resolution.
391
 * </pre>
392
 */
393
PIX *
394
pixGenTextlineMask(PIX      *pixs,
395
                   PIX     **ppixvws,
396
                   l_int32  *ptlfound,
397
                   PIXA     *pixadb)
398
1.02k
{
399
1.02k
l_int32  w, h, empty;
400
1.02k
PIX     *pix1, *pix2, *pixvws, *pixd;
401
402
1.02k
    if (ptlfound) *ptlfound = 0;
403
1.02k
    if (!ppixvws)
404
0
        return (PIX *)ERROR_PTR("&pixvws not defined", __func__, NULL);
405
1.02k
    *ppixvws = NULL;
406
1.02k
    if (!pixs || pixGetDepth(pixs) != 1)
407
474
        return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", __func__, NULL);
408
551
    pixGetDimensions(pixs, &w, &h, NULL);
409
551
    if (w < MinWidth || h < MinHeight) {
410
0
        L_ERROR("pix too small: w = %d, h = %d\n", __func__, w, h);
411
0
        return NULL;
412
0
    }
413
414
        /* First we need a vertical whitespace mask.  Invert the image. */
415
551
    pix1 = pixInvert(NULL, pixs);
416
417
        /* The whitespace mask will break textlines where there
418
         * is a large amount of white space below or above.
419
         * This can be prevented by identifying regions of the
420
         * inverted image that have large horizontal extent (bigger than
421
         * the separation between columns) and significant
422
         * vertical extent (bigger than the separation between
423
         * textlines), and subtracting this from the bg. */
424
551
    pix2 = pixMorphCompSequence(pix1, "o80.60", 0);
425
551
    pixSubtract(pix1, pix1, pix2);
426
551
    if (pixadb) pixaAddPix(pixadb, pix1, L_COPY);
427
551
    pixDestroy(&pix2);
428
429
        /* Identify vertical whitespace by opening the remaining bg.
430
         * o5.1 removes thin vertical bg lines and o1.200 extracts
431
         * long vertical bg lines. */
432
551
    pixvws = pixMorphCompSequence(pix1, "o5.1 + o1.200", 0);
433
551
    *ppixvws = pixvws;
434
551
    if (pixadb) pixaAddPix(pixadb, pixvws, L_COPY);
435
551
    pixDestroy(&pix1);
436
437
        /* Three steps to getting text line mask:
438
         *   (1) close the characters and words in the textlines
439
         *   (2) open the vertical whitespace corridors back up
440
         *   (3) small opening to remove noise    */
441
551
    pix1 = pixMorphSequence(pixs, "c30.1", 0);
442
551
    if (pixadb) pixaAddPix(pixadb, pix1, L_COPY);
443
551
    pixd = pixSubtract(NULL, pix1, pixvws);
444
551
    pixOpenBrick(pixd, pixd, 3, 3);
445
551
    if (pixadb) pixaAddPix(pixadb, pixd, L_COPY);
446
551
    pixDestroy(&pix1);
447
448
        /* Check if text line mask is empty */
449
551
    if (ptlfound) {
450
551
        pixZero(pixd, &empty);
451
551
        if (!empty)
452
522
            *ptlfound = 1;
453
551
    }
454
455
551
    return pixd;
456
551
}
457
458
459
/*------------------------------------------------------------------*
460
 *                       Textblock extraction                       *
461
 *------------------------------------------------------------------*/
462
/*!
463
 * \brief   pixGenTextblockMask()
464
 *
465
 * \param[in]    pixs     1 bpp, textline mask, assumed to be 150 to 200 ppi
466
 * \param[in]    pixvws   vertical white space mask
467
 * \param[in]    pixadb   input for collecting debug pix; use NULL to skip
468
 * \return  pixd textblock mask, or NULL if empty or on error
469
 *
470
 * <pre>
471
 * Notes:
472
 *      (1) Both the input masks (textline and vertical white space) and
473
 *          the returned textblock mask are at the same resolution.
474
 *      (2) This is not intended to work on small thumbnails.  The
475
 *          dimensions of pixs must be at least MinWidth x MinHeight.
476
 *      (3) The result is somewhat noisy, in that small "blocks" of
477
 *          text may be included.  These can be removed by post-processing,
478
 *          using, e.g.,
479
 *             pixSelectBySize(pix, 60, 60, 4, L_SELECT_IF_EITHER,
480
 *                             L_SELECT_IF_GTE, NULL);
481
 * </pre>
482
 */
483
PIX *
484
pixGenTextblockMask(PIX   *pixs,
485
                    PIX   *pixvws,
486
                    PIXA  *pixadb)
487
1.02k
{
488
1.02k
l_int32  w, h, empty;
489
1.02k
PIX     *pix1, *pix2, *pix3, *pixd;
490
491
1.02k
    if (!pixs || pixGetDepth(pixs) != 1)
492
474
        return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", __func__, NULL);
493
551
    pixGetDimensions(pixs, &w, &h, NULL);
494
551
    if (w < MinWidth || h < MinHeight) {
495
0
        L_ERROR("pix too small: w = %d, h = %d\n", __func__, w, h);
496
0
        return NULL;
497
0
    }
498
551
    if (!pixvws)
499
0
        return (PIX *)ERROR_PTR("pixvws not defined", __func__, NULL);
500
501
        /* Join pixels vertically to make a textblock mask */
502
551
    pix1 = pixMorphSequence(pixs, "c1.10 + o4.1", 0);
503
551
    pixZero(pix1, &empty);
504
551
    if (empty) {
505
47
        pixDestroy(&pix1);
506
47
        L_INFO("no fg pixels in textblock mask\n", __func__);
507
47
        return NULL;
508
47
    }
509
504
    if (pixadb) pixaAddPix(pixadb, pix1, L_COPY);
510
511
        /* Solidify the textblock mask and remove noise:
512
         *   (1) For each cc, close the blocks and dilate slightly
513
         *       to form a solid mask.
514
         *   (2) Small horizontal closing between components.
515
         *   (3) Open the white space between columns, again.
516
         *   (4) Remove small components. */
517
504
    pix2 = pixMorphSequenceByComponent(pix1, "c30.30 + d3.3", 8, 0, 0, NULL);
518
504
    pixCloseSafeBrick(pix2, pix2, 10, 1);
519
504
    if (pixadb) pixaAddPix(pixadb, pix2, L_COPY);
520
504
    pix3 = pixSubtract(NULL, pix2, pixvws);
521
504
    if (pixadb) pixaAddPix(pixadb, pix3, L_COPY);
522
504
    pixd = pixSelectBySize(pix3, 25, 5, 8, L_SELECT_IF_BOTH,
523
504
                            L_SELECT_IF_GTE, NULL);
524
504
    if (pixadb) pixaAddPix(pixadb, pixd, L_COPY);
525
526
504
    pixDestroy(&pix1);
527
504
    pixDestroy(&pix2);
528
504
    pixDestroy(&pix3);
529
504
    return pixd;
530
551
}
531
532
533
/*------------------------------------------------------------------*
534
 *    Location and extraction of page foreground; cleaning pages    *
535
 *------------------------------------------------------------------*/
536
/*!
537
 * \brief   pixCropImage()
538
 *
539
 * \param[in]    pixs        full resolution (any type or depth)
540
 * \param[in]    lr_clear    full res pixels cleared at left and right sides
541
 * \param[in]    tb_clear    full res pixels cleared at top and bottom sides
542
 * \param[in]    edgeclean   parameter for removing edge noise (-1 to 15)
543
 *                           default = 0 (no removal);
544
 *                           15 is maximally aggressive for random noise
545
 *                           -1 for aggressively removing side noise
546
 * \param[in]    lr_border   full res final "added" pixels on left and right
547
 * \param[in]    tb_border   full res final "added" pixels on top and bottom
548
 * \param[in]    maxwiden    max fractional horizontal stretch allowed
549
 * \param[in]   *debugfile   [optional] usually is NULL
550
 * \param[out]  *pcropbox    [optional] crop box at full resolution
551
 * \return  cropped pix, or NULL on error
552
 *
553
 * <pre>
554
 * Notes:
555
 *      (1) This binarizes and crops a page image.
556
 *          (a) Binarizes if necessary and does 2x reduction.
557
 *          (b) Clears near the border by %lr_clear/2 and %tb_clear/2 pixels
558
 *          (c) If %edgeclean > 0, it removes isolated sets of pixels,
559
 *              using a close/open operation of size %edgeclean + 1.
560
 *              If %edgeclean < 0, it uses a large vertical morphological
561
 *              closing and the extraction of the largest resulting
562
 *              connected component to eliminate noise on left and right sides.
563
 *          (d) Find the bounding box of remaining fg pixels and scales
564
 *              the box up 2x back to full resolution.
565
 *          (e) Crops the binarized image to the bounding box.
566
 *          (f) Slightly thickens long horizontal lines.
567
 *          (g) Rescales this image to fit within the original image
568
 *              less lr_border on the sides and tb_border above and below.
569
 *              The rescaling is done isomorphically with a (possible)
570
 *              optional additional widening.  Suggest the additional
571
 *              widening factor not exceed 1.15.
572
 *          Note that (b) - (d) are done at 2x reduction for efficiency.
573
 *      (2) Side clearing must not exceed 1/6 of the dimension on that side.
574
 *      (3) The clear and border pixel parameters must be >= 0.
575
 *      (4) The "clear" parameters act on the input image, whereas the
576
 *          "border" parameters act to give a white border to the final
577
 *          image.  They are not literally added, because the input and final
578
 *          images are the same size.  If the resulting images are to be
579
 *          printed, it is useful to have border pixel parameters of at
580
 *          least 60 at 300 ppi, to avoid losing content at the edges.
581
 *      (5) This is not intended to work on small thumbnails.  The
582
 *          dimensions of pixs must be at least MinWidth x MinHeight.
583
 *      (6) Step (f) above helps with orthographically-produced music notation,
584
 *          where the horizontal staff lines can be very thin and thus
585
 *          subject to printer alias.
586
 * </pre>
587
 */
588
PIX *
589
pixCropImage(PIX         *pixs,
590
             l_int32      lr_clear,
591
             l_int32      tb_clear,
592
             l_int32      edgeclean,
593
             l_int32      lr_border,
594
             l_int32      tb_border,
595
             l_float32    maxwiden,
596
             const char  *debugfile,
597
             BOX        **pcropbox)
598
0
{
599
0
char     cmd[64];
600
0
l_int32  w, h, val, ret;
601
0
BOX     *box1, *box2;
602
0
PIX     *pix1, *pix2, *pix3;
603
0
PIXA    *pixa1;
604
605
0
    if (pcropbox) *pcropbox = NULL;
606
0
    if (!pixs)
607
0
        return (PIX *)ERROR_PTR("pixs not defined", __func__, NULL);
608
0
    if (edgeclean > 15) {
609
0
        L_WARNING("edgeclean > 15; setting to 15\n", __func__);
610
0
        edgeclean = 15;
611
0
    }
612
0
    pixGetDimensions(pixs, &w, &h, NULL);
613
0
    if (w < MinWidth || h < MinHeight) {
614
0
        L_ERROR("pix too small: w = %d, h = %d\n", __func__, w, h);
615
0
        return NULL;
616
0
    }
617
0
    if (lr_clear < 0) lr_clear = 0;
618
0
    if (tb_clear < 0) tb_clear = 0;
619
0
    if (lr_border < 0) lr_border = 0;
620
0
    if (tb_border < 0) tb_border = 0;
621
0
    if (lr_clear > w / 6 || tb_clear > h / 6) {
622
0
        L_ERROR("lr_clear or tb_clear too large; must be <= %d and %d\n",
623
0
                __func__, w / 6, h / 6);
624
0
        return NULL;
625
0
    }
626
0
    if (maxwiden > 1.15)
627
0
        L_WARNING("maxwiden = %f > 1.15; suggest between 1.0 and 1.15\n",
628
0
                  __func__, maxwiden);
629
0
    pixa1 = (debugfile) ? pixaCreate(5) : NULL;
630
0
    if (pixa1) pixaAddPix(pixa1, pixs, L_COPY);
631
632
        /* Binarize if necessary and 2x reduction */
633
0
    pix1 = pixBackgroundNormTo1MinMax(pixs, 1, 1);
634
0
    pix2 = pixReduceRankBinary2(pix1, 2, NULL);
635
636
        /* Clear out border pixels */
637
0
    pixSetOrClearBorder(pix2, lr_clear / 2, lr_clear / 2, tb_clear / 2,
638
0
                        tb_clear / 2, PIX_CLR);
639
0
    if (pixa1) pixaAddPix(pixa1, pixScale(pix2, 2.0, 2.0), L_INSERT);
640
641
        /* Choose one of three methods for extracting foreground pixels:
642
         * (1) Include all foreground pixels
643
         * (2) Do a morphological close/open to remove noise throughout
644
         *     the image before finding a b.b. for remaining f.g. pixels
645
         * (3) Do a large vertical closing and choose the largest (by area)
646
         *     component to avoid foreground noise on left and right sides */
647
0
    if (edgeclean == 0) {
648
0
        ret = pixClipToForeground(pix2, NULL, &box1);
649
0
    } else if (edgeclean > 0) {
650
0
        val = edgeclean + 1;
651
0
        snprintf(cmd, 64, "c%d.%d + o%d.%d", val, val, val, val);
652
0
        pix3 = pixMorphSequence(pix2, cmd, 0);
653
0
        ret = pixClipToForeground(pix3, NULL, &box1);
654
0
        pixDestroy(&pix3);
655
0
    } else {  /* edgeclean < 0) */
656
0
        ret = pixMaxCompAfterVClosing(pix2, &box1);
657
0
    }
658
0
    pixDestroy(&pix2);
659
0
    if (ret) {
660
0
        L_ERROR("no returned b.b. for foreground\n", __func__);
661
0
        pixDestroy(&pix1);
662
0
        pixaDestroy(&pixa1);
663
0
        return NULL;
664
0
    }
665
666
        /* Transform to full resolution */
667
0
    box2 = boxTransform(box1, 0, 0, 2.0, 2.0);  /* full res */
668
0
    boxDestroy(&box1);
669
0
    if (pixa1) {
670
0
        pix2 = pixCopy(NULL, pix1);
671
0
        pixRenderBoxArb(pix2, box2, 5, 255, 0, 0);
672
0
        pixaAddPix(pixa1, pix2, L_INSERT);
673
0
    }
674
675
        /* Grab the foreground region */
676
0
    pix2 = pixClipRectangle(pix1, box2, NULL);
677
0
    pixDestroy(&pix1);
678
679
        /* Slightly thicken long horizontal lines.  This prevents loss of
680
         * printed thin music staff lines due to aliasing. */
681
0
    pix3 = pixMorphSequence(pix2, "o80.1 + d1.2", 0);
682
0
    pixOr(pix2, pix2, pix3);
683
0
    pixDestroy(&pix3);
684
685
        /* Rescale the fg and paste into the final image */
686
0
    pix3 = pixRescaleForCropping(pix2,  w, h, lr_border, tb_border,
687
0
                                 maxwiden, NULL);
688
0
    pixDestroy(&pix2);
689
0
    if (pixa1) {
690
0
        pix2 = pixCopy(NULL, pix3);
691
0
        pixaAddPix(pixa1, pix2, L_INSERT);
692
0
    }
693
694
0
    if (pcropbox)
695
0
        *pcropbox = box2;
696
0
    else
697
0
        boxDestroy(&box2);
698
0
    if (pixa1) {
699
0
       pixaAddPix(pixa1, pix3, L_COPY);
700
0
       lept_stderr("Writing debug file: %s\n", debugfile);
701
0
       pixaConvertToPdf(pixa1, 0, 1.0, L_DEFAULT_ENCODE, 0, NULL, debugfile);
702
0
       pixaDestroy(&pixa1);
703
0
    }
704
0
    return pix3;
705
0
}
706
707
708
/*!
709
 * \brief   pixMaxCompAfterVClosing()
710
 *
711
 * \param[in]    pixs        1 bpp (input at 2x reduction)
712
 * \param[out]  **pbox       main region at input resolution (2x reduction)
713
 * \return  0 if OK, 1 on error
714
 *
715
 * <pre>
716
 * Notes:
717
 *      (1) This removes foreground noise along left and right edges,
718
 *          returning a bounding box for the remaining foreground pixels
719
 *          at the input resolution.
720
 *      (2) The input %pixs should be at a resolution 100 - 150 ppi.
721
 *      (3) It does two 2x level1 rank binary reductions, followed
722
 *          by a large vertical close/open, and then a 4x expansion
723
 *          back to the input resolution.
724
 *      (4) It is used as an option to pixCropImage(), when given
725
 *          a negative %edgecrop parameter.
726
 * </pre>
727
 */
728
l_int32
729
pixMaxCompAfterVClosing(PIX   *pixs,
730
                        BOX  **pbox)
731
0
{
732
0
l_int32  w, h, i, n, maxindex, maxarea, empty;
733
0
BOXA    *boxa1;
734
0
PIX     *pix1;
735
736
0
    if (!pbox)
737
0
        return ERROR_INT("pbox not defined", __func__, 1);
738
0
    *pbox = NULL;
739
0
    if (!pixs || pixGetDepth(pixs) != 1)
740
0
        return ERROR_INT("pixs undefined or not 1 bpp", __func__, 1);
741
742
        /* Strong vertical closing */
743
0
    pix1 = pixMorphSequence(pixs, "r11 + c1.50 + o1.50 + x4", 0);
744
0
    pixZero(pix1, &empty);
745
0
    if (empty)
746
0
        return ERROR_INT("pix1 is empty", __func__, 1);
747
748
        /* Find the c.c. with largest area and return its bounding box */
749
0
    boxa1 = pixConnCompBB(pix1, 8);
750
0
    pixDestroy(&pix1);
751
0
    n = boxaGetCount(boxa1);
752
0
    maxindex = 0;
753
0
    maxarea = 0;
754
0
    for (i = 0; i < n; i++) {
755
0
        boxaGetBoxGeometry(boxa1, i, NULL, NULL, &w, &h);
756
0
        if (w * h > maxarea) {
757
0
            maxarea = w * h;
758
0
            maxindex = i;
759
0
        }
760
0
    }
761
0
    *pbox = boxaGetBox(boxa1, maxindex, L_COPY);
762
0
    boxaDestroy(&boxa1);
763
0
    return 0; 
764
0
}
765
766
767
/*!
768
 * \brief   pixRescaleForCropping()
769
 *
770
 * \param[in]    pixs        1 bpp
771
 * \param[in]    w           width of output lmage
772
 * \param[in]    h           height of output lmage
773
 * \param[in]    lr_border   cleared final border pixels on left and right
774
 * \param[in]    tb_border   cleared final border pixels on top and bottom
775
 * \param[in]    maxwiden    max fractional horizontal stretch allowed; >= 1.0
776
 * \param[out]  *ppixsc      [optional] rescaled foreground region
777
 * \return  pixd  output image, or NULL on error
778
 *
779
 * <pre>
780
 * Notes:
781
 *      (1) This rescales %pixs to fit maximally within an image of
782
 *          size (w x h), under two conditions:
783
 *          (a) the final image has cleared border regions given by the
784
 *              input parameters %lr_border and %tb_border, and
785
 *          (b) the input image is first isotropically scaled to fit
786
 *              maximally within the allowed final region, and then further
787
 *              maxiximally widened, subject to the constraints of the
788
 *              cleared border and the %maxwiden parameter.
789
 *      (2) The cleared border pixel parameters must be >= 0.
790
 *      (3) If there is extra horizontal stretching by a factor
791
 *          %maxwiden larger than about 1.15, the appearance may be
792
 *          unpleasingly distorted; hence the suggestion not to exceed it.
793
 * </pre>
794
 */
795
static PIX *
796
pixRescaleForCropping(PIX       *pixs,
797
                      l_int32    w,
798
                      l_int32    h,
799
                      l_int32    lr_border,
800
                      l_int32    tb_border,
801
                      l_float32  maxwiden,
802
                      PIX      **ppixsc)
803
0
{
804
0
static l_int32  first_time = TRUE;
805
0
l_int32         wi, hi, wmax, hmax, wn, wf, hf, xf;
806
0
l_float32       ratio, scaleh, scalew, scalewid;
807
0
PIX            *pix1, *pixd;
808
809
0
    if (ppixsc) *ppixsc = NULL;
810
0
    if (!pixs || pixGetDepth(pixs) != 1)
811
0
        return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", __func__, NULL);
812
0
    if (lr_border < 0) lr_border = 0;
813
0
    if (tb_border < 0) tb_border = 0;
814
0
    maxwiden = L_MAX(1.0, maxwiden);
815
0
    if (maxwiden > 1.15)
816
0
        L_WARNING("maxwiden = %f > 1.15; suggest between 1.0 and 1.15\n",
817
0
                  __func__, maxwiden);
818
819
        /* Rescale the foreground region.
820
         * First, decide if scaling is to full width or full height.
821
         * If scaling to full height, determine how much additional
822
         * width widening is possible, given the maxwiden constraint.
823
         * If scaling to full width, both width and height are
824
         * scaled isotropically.  Scaling is done so that the resulting
825
         * foreground is maximally widened, so it can be horizontally
826
         * centered in an image of size (w x h), less %lr_border
827
         * on each side. */
828
0
    pixGetDimensions(pixs, &wi, &hi, NULL);
829
0
    wmax = w - 2 * lr_border;
830
0
    hmax = h - 2 * tb_border;
831
0
    ratio = (l_float32)(wmax * hi) / (l_float32)(hmax * wi);
832
0
    if (ratio >= 1) {  /* width can be widened after isotropic scaling */
833
0
        scaleh = (l_float32)hmax / (l_float32)hi;
834
0
        wn = scaleh * wi;  /* scaled but not widened */
835
0
        scalewid = L_MIN(maxwiden, (l_float32)wmax / (l_float32)wn);
836
0
        scalew = scaleh * scalewid;
837
0
        wf = scalew * wi;
838
0
        hf = hmax;  /* scale to full height */
839
0
        pix1 = pixScale(pixs, scalew, scaleh);
840
0
        if (first_time == TRUE) {
841
0
            lept_stderr("Width stretched by factor %5.3f\n", scalewid);
842
0
            first_time = FALSE;
843
0
        }
844
0
        xf = (w - wf) / 2.0;
845
0
    } else {  /* width cannot be widened after isotropic scaling */
846
0
        scalew = (l_float32)wmax / (l_float32)wi;
847
0
        pix1 = pixScale(pixs, scalew, scalew);
848
0
        wf = wmax;  /* scale to full width */
849
0
        hf = scalew * hi;  /* no extra vertical stretching allowed */
850
0
        xf = lr_border;
851
0
    }
852
853
        /* Paste it, horizontally centered and vertically placed as
854
         * high as allowed (by %tb_border) into the final page image. */
855
0
    pixd = pixCreate(w, h, 1);
856
0
    pixRasterop(pixd, xf, tb_border, wf, hf, PIX_SRC, pix1, 0, 0);
857
858
0
    if (ppixsc)
859
0
        *ppixsc = pix1;
860
0
    else
861
0
        pixDestroy(&pix1);
862
0
    return pixd;
863
0
}
864
865
866
/*!
867
 * \brief   pixCleanImage()
868
 *
869
 * \param[in]    pixs        full resolution (any type or depth)
870
 * \param[in]    contrast    vary contrast: 1 = lightest; 10 = darkest;
871
 *                           suggest 1 unless light features are being lost
872
 * \param[in]    rotation    cw by 90 degrees: {0,1,2,3} represent
873
 *                           0, 90, 180 and 270 degree cw rotations
874
 * \param[in]    scale       1 (no scaling) or 2 (2x upscaling)
875
 * \param[in]    opensize    opening size of structuring element for noise
876
 *                           removal: {0 or 1 to skip; 2, 3 for opening}
877
 * \return  cleaned pix, or NULL on error
878
 *
879
 * <pre>
880
 * Notes:
881
 *    (1) This deskews, optionally rotates and darkens, cleans background
882
 *        to white, binarizes and optionally removes small noise.
883
 *    (2) For color and grayscale input, local background normalization is
884
 *        done to 200, and a threshold of 180 sets the maximum foreground
885
 *        value in the normalized image.
886
 *    (3) The %contrast parameter adjusts the binarization to avoid losing
887
 *        lighter input pixels.  Contrast is increased as %contrast increases
888
 *        from 1 to 10.
889
 *    (4) The %scale parameter controls the thresholding to 1 bpp. Two values:
890
 *            1 = threshold
891
 *            2 = linear interpolated 2x upscaling before threshold.
892
 *    (5) The #opensize parameter is the size of a square SEL used with
893
 *        opening to remove small speckle noise.  Allowed open sizes are 2,3.
894
 *        If this is to be used, try 2 before 3.
895
 *    (6) This does the image processing for cleanTo1bppFilesToPdf() and
896
 *        prog/cleanpdf.c.
897
 * </pre>
898
 */
899
PIX *
900
pixCleanImage(PIX         *pixs,
901
              l_int32      contrast,
902
              l_int32      rotation,
903
              l_int32      scale,
904
              l_int32      opensize)
905
0
{
906
0
char  sequence[32];
907
0
PIX  *pix1, *pix2, *pix3, *pix4, *pix5;
908
909
0
    if (!pixs)
910
0
        return (PIX *)ERROR_PTR("pixs not defined", __func__, NULL);
911
0
    if (rotation < 0 || rotation > 3) {
912
0
        L_ERROR("invalid rotation = %d; rotation must be in  {0,1,2,3}\n",
913
0
                __func__, rotation);
914
0
        return NULL;
915
0
    }
916
0
    if (contrast < 1 || contrast > 10) {
917
0
        L_ERROR("invalid contrast = %d; contrast must be in [1...10]\n",
918
0
                __func__, contrast);
919
0
        return NULL;
920
0
    }
921
0
    if (scale != 1 && scale != 2) {
922
0
        L_ERROR("invalid scale = %d; scale must be 1 or 2\n",
923
0
                __func__, opensize);
924
0
        return NULL;
925
0
    }
926
0
    if (opensize > 3) {
927
0
        L_ERROR("invalid opensize = %d; opensize must be <= 3\n",
928
0
                __func__, opensize);
929
0
        return NULL;
930
0
    }
931
932
0
    if (pixGetDepth(pixs) == 1) {
933
0
        if (rotation > 0)
934
0
            pix1 = pixRotateOrth(pixs, rotation);
935
0
        else
936
0
            pix1 = pixClone(pixs);
937
0
        pix2 = pixFindSkewAndDeskew(pix1, 2, NULL, NULL);
938
0
        if (scale == 2)
939
0
            pix4 = pixExpandBinaryReplicate(pix2, 2, 2);
940
0
        else  /* scale == 1 */
941
0
            pix4 = pixClone(pix2);
942
0
    } else {
943
0
        pix1 = pixConvertTo8MinMax(pixs);
944
0
        if (rotation > 0)
945
0
            pix2 = pixRotateOrth(pix1, rotation);
946
0
        else
947
0
            pix2 = pixClone(pix1);
948
0
        pix3 = pixFindSkewAndDeskew(pix2, 2, NULL, NULL);
949
0
        pix4 = pixBackgroundNormTo1MinMax(pix3, contrast, scale);
950
0
        pixDestroy(&pix3);
951
0
    }
952
953
0
    if (opensize == 2 || opensize == 3) {
954
0
        snprintf(sequence, sizeof(sequence), "o%d.%d", opensize, opensize);
955
0
        pix5 = pixMorphSequence(pix4, sequence, 0);
956
0
    } else {
957
0
        pix5 = pixClone(pix4);
958
0
    }
959
960
0
    pixDestroy(&pix1);
961
0
    pixDestroy(&pix2);
962
0
    pixDestroy(&pix4);
963
0
    return pix5;
964
0
}
965
966
967
/*!
968
 * \brief   pixFindPageForeground()
969
 *
970
 * \param[in]    pixs       full resolution (any type or depth)
971
 * \param[in]    threshold  for binarization; typically about 128
972
 * \param[in]    mindist    min distance of text from border to allow
973
 *                          cleaning near border; at 2x reduction, this
974
 *                          should be larger than 50; typically about 70
975
 * \param[in]    erasedist  when conditions are satisfied, erase anything
976
 *                          within this distance of the edge;
977
 *                          typically 20-30 at 2x reduction
978
 * \param[in]    showmorph  debug: set to a negative integer to show steps
979
 *                          in generating masks; this is typically used
980
 *                          for debugging region extraction
981
 * \param[in]    pixac      debug: allocate outside and pass this in to
982
 *                          accumulate results of each call to this function,
983
 *                          which can be displayed in a mosaic or a pdf.
984
 * \return  box region including foreground, with some pixel noise
985
 *                   removed, or NULL if not found
986
 *
987
 * <pre>
988
 * Notes:
989
 *      (1) This doesn't simply crop to the fg.  It attempts to remove
990
 *          pixel noise and junk at the edge of the image before cropping.
991
 *          The input %threshold is used if pixs is not 1 bpp.
992
 *      (2) This is not intended to work on small thumbnails.  The
993
 *          dimensions of pixs must be at least MinWidth x MinHeight.
994
 *      (3) Debug: set showmorph to display the intermediate image in
995
 *          the morphological operations on this page.
996
 *      (4) Debug: to get pdf output of results when called repeatedly,
997
 *          call with an existing pixac, which will add an image of this page,
998
 *          with the fg outlined.  If no foreground is found, there is
999
 *          no output for this page image.
1000
 * </pre>
1001
 */
1002
BOX *
1003
pixFindPageForeground(PIX     *pixs,
1004
                      l_int32  threshold,
1005
                      l_int32  mindist,
1006
                      l_int32  erasedist,
1007
                      l_int32  showmorph,
1008
                      PIXAC   *pixac)
1009
3.19k
{
1010
3.19k
l_int32  flag, nbox, intersects;
1011
3.19k
l_int32  w, h, bx, by, bw, bh, left, right, top, bottom;
1012
3.19k
PIX     *pixb, *pixb2, *pixseed, *pixsf, *pixm, *pix1, *pixg2;
1013
3.19k
BOX     *box, *boxfg, *boxin, *boxd;
1014
3.19k
BOXA    *ba1, *ba2;
1015
1016
3.19k
    if (!pixs)
1017
0
        return (BOX *)ERROR_PTR("pixs not defined", __func__, NULL);
1018
3.19k
    pixGetDimensions(pixs, &w, &h, NULL);
1019
3.19k
    if (w < MinWidth || h < MinHeight) {
1020
2.14k
        L_ERROR("pix too small: w = %d, h = %d\n", __func__, w, h);
1021
2.14k
        return NULL;
1022
2.14k
    }
1023
1024
        /* Binarize, downscale by 0.5, remove the noise to generate a seed,
1025
         * and do a seedfill back from the seed into those 8-connected
1026
         * components of the binarized image for which there was at least
1027
         * one seed pixel. */
1028
1.04k
    flag = (showmorph) ? 100 : 0;
1029
1.04k
    pixb = pixConvertTo1(pixs, threshold);
1030
1.04k
    pixb2 = pixScale(pixb, 0.5, 0.5);
1031
1.04k
    pixseed = pixMorphSequence(pixb2, "o1.2 + c9.9 + o3.3", flag);
1032
1.04k
    pix1 = pixMorphSequence(pixb2, "o50.1", 0);
1033
1.04k
    pixOr(pixseed, pixseed, pix1);
1034
1.04k
    pixDestroy(&pix1);
1035
1.04k
    pix1 = pixMorphSequence(pixb2, "o1.50", 0);
1036
1.04k
    pixOr(pixseed, pixseed, pix1);
1037
1.04k
    pixDestroy(&pix1);
1038
1.04k
    pixsf = pixSeedfillBinary(NULL, pixseed, pixb2, 8);
1039
1.04k
    pixm = pixRemoveBorderConnComps(pixsf, 8);
1040
1041
        /* Now, where is the main block of text?  We want to remove noise near
1042
         * the edge of the image, but to do that, we have to be convinced that
1043
         * (1) there is noise and (2) it is far enough from the text block
1044
         * and close enough to the edge.  For each edge, if the block
1045
         * is more than mindist from that edge, then clean 'erasedist'
1046
         * pixels from the edge. */
1047
1.04k
    pix1 = pixMorphSequence(pixm, "c50.50", flag);
1048
1.04k
    ba1 = pixConnComp(pix1, NULL, 8);
1049
1.04k
    ba2 = boxaSort(ba1, L_SORT_BY_AREA, L_SORT_DECREASING, NULL);
1050
1.04k
    pixGetDimensions(pix1, &w, &h, NULL);
1051
1.04k
    nbox = boxaGetCount(ba2);
1052
1.04k
    if (nbox > 1) {
1053
318
        box = boxaGetBox(ba2, 0, L_CLONE);
1054
318
        boxGetGeometry(box, &bx, &by, &bw, &bh);
1055
318
        left = (bx > mindist) ? erasedist : 0;
1056
318
        right = (w - bx - bw > mindist) ? erasedist : 0;
1057
318
        top = (by > mindist) ? erasedist : 0;
1058
318
        bottom = (h - by - bh > mindist) ? erasedist : 0;
1059
318
        pixSetOrClearBorder(pixm, left, right, top, bottom, PIX_CLR);
1060
318
        boxDestroy(&box);
1061
318
    }
1062
1.04k
    pixDestroy(&pix1);
1063
1.04k
    boxaDestroy(&ba1);
1064
1.04k
    boxaDestroy(&ba2);
1065
1066
        /* Locate the foreground region; don't bother cropping */
1067
1.04k
    pixClipToForeground(pixm, NULL, &boxfg);
1068
1069
        /* Sanity check the fg region.  Make sure it's not confined
1070
         * to a thin boundary on the left and right sides of the image,
1071
         * in which case it is likely to be noise. */
1072
1.04k
    if (boxfg) {
1073
948
        boxin = boxCreate(0.1 * w, 0, 0.8 * w, h);
1074
948
        boxIntersects(boxfg, boxin, &intersects);
1075
948
        boxDestroy(&boxin);
1076
948
        if (!intersects) boxDestroy(&boxfg);
1077
948
    }
1078
1079
1.04k
    boxd = NULL;
1080
1.04k
    if (boxfg) {
1081
929
        boxAdjustSides(boxfg, boxfg, -2, 2, -2, 2);  /* tiny expansion */
1082
929
        boxd = boxTransform(boxfg, 0, 0, 2.0, 2.0);
1083
1084
            /* Save the debug image showing the box for this page */
1085
929
        if (pixac) {
1086
0
            pixg2 = pixConvert1To4Cmap(pixb);
1087
0
            pixRenderBoxArb(pixg2, boxd, 3, 255, 0, 0);
1088
0
            pixacompAddPix(pixac, pixg2, IFF_DEFAULT);
1089
0
            pixDestroy(&pixg2);
1090
0
        }
1091
929
    }
1092
1093
1.04k
    pixDestroy(&pixb);
1094
1.04k
    pixDestroy(&pixb2);
1095
1.04k
    pixDestroy(&pixseed);
1096
1.04k
    pixDestroy(&pixsf);
1097
1.04k
    pixDestroy(&pixm);
1098
1.04k
    boxDestroy(&boxfg);
1099
1.04k
    return boxd;
1100
3.19k
}
1101
1102
1103
/*------------------------------------------------------------------*
1104
 *         Extraction of characters from image with only text       *
1105
 *------------------------------------------------------------------*/
1106
/*!
1107
 * \brief   pixSplitIntoCharacters()
1108
 *
1109
 * \param[in]    pixs      1 bpp, contains only deskewed text
1110
 * \param[in]    minw      min component width for initial filtering; typ. 4
1111
 * \param[in]    minh      min component height for initial filtering; typ. 4
1112
 * \param[out]   pboxa     [optional] character bounding boxes
1113
 * \param[out]   ppixa     [optional] character images
1114
 * \param[out]   ppixdebug [optional] showing splittings
1115
 *
1116
 * \return  0 if OK, 1 on error
1117
 *
1118
 * <pre>
1119
 * Notes:
1120
 *      (1) This is a simple function that attempts to find split points
1121
 *          based on vertical pixel profiles.
1122
 *      (2) It should be given an image that has an arbitrary number
1123
 *          of text characters.
1124
 *      (3) The returned pixa includes the boxes from which the
1125
 *          (possibly split) components are extracted.
1126
 * </pre>
1127
 */
1128
l_ok
1129
pixSplitIntoCharacters(PIX     *pixs,
1130
                       l_int32  minw,
1131
                       l_int32  minh,
1132
                       BOXA   **pboxa,
1133
                       PIXA   **ppixa,
1134
                       PIX    **ppixdebug)
1135
3.19k
{
1136
3.19k
l_int32  ncomp, i, xoff, yoff;
1137
3.19k
BOXA   *boxa1, *boxa2, *boxat1, *boxat2, *boxad;
1138
3.19k
BOXAA  *baa;
1139
3.19k
PIX    *pix, *pix1, *pix2, *pixdb;
1140
3.19k
PIXA   *pixa1, *pixadb;
1141
1142
3.19k
    if (pboxa) *pboxa = NULL;
1143
3.19k
    if (ppixa) *ppixa = NULL;
1144
3.19k
    if (ppixdebug) *ppixdebug = NULL;
1145
3.19k
    if (!pixs || pixGetDepth(pixs) != 1)
1146
371
        return ERROR_INT("pixs not defined or not 1 bpp", __func__, 1);
1147
1148
        /* Remove the small stuff */
1149
2.82k
    pix1 = pixSelectBySize(pixs, minw, minh, 8, L_SELECT_IF_BOTH,
1150
2.82k
                           L_SELECT_IF_GT, NULL);
1151
1152
        /* Small vertical close for consolidation */
1153
2.82k
    pix2 = pixMorphSequence(pix1, "c1.10", 0);
1154
2.82k
    pixDestroy(&pix1);
1155
1156
        /* Get the 8-connected components */
1157
2.82k
    boxa1 = pixConnComp(pix2, &pixa1, 8);
1158
2.82k
    pixDestroy(&pix2);
1159
2.82k
    boxaDestroy(&boxa1);
1160
1161
        /* Split the components if obvious */
1162
2.82k
    ncomp = pixaGetCount(pixa1);
1163
2.82k
    boxa2 = boxaCreate(ncomp);
1164
2.82k
    pixadb = (ppixdebug) ? pixaCreate(ncomp) : NULL;
1165
156k
    for (i = 0; i < ncomp; i++) {
1166
153k
        pix = pixaGetPix(pixa1, i, L_CLONE);
1167
153k
        if (ppixdebug) {
1168
153k
            boxat1 = pixSplitComponentWithProfile(pix, 10, 7, &pixdb);
1169
153k
            if (pixdb)
1170
29.3k
                pixaAddPix(pixadb, pixdb, L_INSERT);
1171
153k
        } else {
1172
0
            boxat1 = pixSplitComponentWithProfile(pix, 10, 7, NULL);
1173
0
        }
1174
153k
        pixaGetBoxGeometry(pixa1, i, &xoff, &yoff, NULL, NULL);
1175
153k
        boxat2 = boxaTransform(boxat1, xoff, yoff, 1.0, 1.0);
1176
153k
        boxaJoin(boxa2, boxat2, 0, -1);
1177
153k
        pixDestroy(&pix);
1178
153k
        boxaDestroy(&boxat1);
1179
153k
        boxaDestroy(&boxat2);
1180
153k
    }
1181
2.82k
    pixaDestroy(&pixa1);
1182
1183
        /* Generate the debug image */
1184
2.82k
    if (ppixdebug) {
1185
2.82k
        if (pixaGetCount(pixadb) > 0) {
1186
1.15k
            *ppixdebug = pixaDisplayTiledInRows(pixadb, 32, 1500,
1187
1.15k
                                                1.0, 0, 20, 1);
1188
1.15k
        }
1189
2.82k
        pixaDestroy(&pixadb);
1190
2.82k
    }
1191
1192
        /* Do a 2D sort on the bounding boxes, and flatten the result to 1D */
1193
2.82k
    baa = boxaSort2d(boxa2, NULL, 0, 0, 5);
1194
2.82k
    boxad = boxaaFlattenToBoxa(baa, NULL, L_CLONE);
1195
2.82k
    boxaaDestroy(&baa);
1196
2.82k
    boxaDestroy(&boxa2);
1197
1198
        /* Optionally extract the pieces from the input image */
1199
2.82k
    if (ppixa)
1200
2.82k
        *ppixa = pixClipRectangles(pixs, boxad);
1201
2.82k
    if (pboxa)
1202
2.82k
        *pboxa = boxad;
1203
0
    else
1204
0
        boxaDestroy(&boxad);
1205
2.82k
    return 0;
1206
3.19k
}
1207
1208
1209
/*!
1210
 * \brief   pixSplitComponentWithProfile()
1211
 *
1212
 * \param[in]    pixs       1 bpp, exactly one connected component
1213
 * \param[in]    delta      distance used in extrema finding in a numa; typ. 10
1214
 * \param[in]    mindel     minimum required difference between profile
1215
 *                          minimum and profile values +2 and -2 away; typ. 7
1216
 * \param[out]   ppixdebug  [optional] debug image of splitting
1217
 * \return  boxa of c.c. after splitting, or NULL on error
1218
 *
1219
 * <pre>
1220
 * Notes:
1221
 *      (1) This will split the most obvious cases of touching characters.
1222
 *          The split points it is searching for are narrow and deep
1223
 *          minimima in the vertical pixel projection profile, after a
1224
 *          large vertical closing has been applied to the component.
1225
 * </pre>
1226
 */
1227
BOXA *
1228
pixSplitComponentWithProfile(PIX     *pixs,
1229
                             l_int32  delta,
1230
                             l_int32  mindel,
1231
                             PIX    **ppixdebug)
1232
153k
{
1233
153k
l_int32   w, h, n2, i, firstmin, xmin, xshift;
1234
153k
l_int32   nmin, nleft, nright, nsplit, isplit, ncomp;
1235
153k
l_int32  *array1, *array2;
1236
153k
BOX      *box;
1237
153k
BOXA     *boxad;
1238
153k
NUMA     *na1, *na2, *nasplit;
1239
153k
PIX      *pix1, *pixdb;
1240
1241
153k
    if (ppixdebug) *ppixdebug = NULL;
1242
153k
    if (!pixs || pixGetDepth(pixs) != 1)
1243
0
        return (BOXA *)ERROR_PTR("pixa undefined or not 1 bpp", __func__, NULL);
1244
153k
    pixGetDimensions(pixs, &w, &h, NULL);
1245
1246
        /* Closing to consolidate characters vertically */
1247
153k
    pix1 = pixCloseSafeBrick(NULL, pixs, 1, 100);
1248
1249
        /* Get extrema of column projections */
1250
153k
    boxad = boxaCreate(2);
1251
153k
    na1 = pixCountPixelsByColumn(pix1);  /* w elements */
1252
153k
    pixDestroy(&pix1);
1253
153k
    na2 = numaFindExtrema(na1, delta, NULL);
1254
153k
    n2 = numaGetCount(na2);
1255
153k
    if (n2 < 3) {  /* no split possible */
1256
119k
        box = boxCreate(0, 0, w, h);
1257
119k
        boxaAddBox(boxad, box, L_INSERT);
1258
119k
        numaDestroy(&na1);
1259
119k
        numaDestroy(&na2);
1260
119k
        return boxad;
1261
119k
    }
1262
1263
        /* Look for sufficiently deep and narrow minima.
1264
         * All minima of of interest must be surrounded by max on each
1265
         * side.  firstmin is the index of first possible minimum. */
1266
33.8k
    array1 = numaGetIArray(na1);
1267
33.8k
    array2 = numaGetIArray(na2);
1268
33.8k
    if (ppixdebug) numaWriteStderr(na2);
1269
33.8k
    firstmin = (array1[array2[0]] > array1[array2[1]]) ? 1 : 2;
1270
33.8k
    nasplit = numaCreate(n2);  /* will hold split locations */
1271
130k
    for (i = firstmin; i < n2 - 1; i+= 2) {
1272
96.6k
        xmin = array2[i];
1273
96.6k
        nmin = array1[xmin];
1274
96.6k
        if (xmin + 2 >= w) break;  /* no more splits possible */
1275
96.6k
        nleft = array1[xmin - 2];
1276
96.6k
        nright = array1[xmin + 2];
1277
96.6k
        if (ppixdebug) {
1278
96.6k
            lept_stderr(
1279
96.6k
                "Splitting: xmin = %d, w = %d; nl = %d, nmin = %d, nr = %d\n",
1280
96.6k
                xmin, w, nleft, nmin, nright);
1281
96.6k
        }
1282
96.6k
        if (nleft - nmin >= mindel && nright - nmin >= mindel)  /* split */
1283
74.6k
            numaAddNumber(nasplit, xmin);
1284
96.6k
    }
1285
33.8k
    nsplit = numaGetCount(nasplit);
1286
1287
#if 0
1288
    if (ppixdebug && nsplit > 0) {
1289
        lept_mkdir("lept/split");
1290
        gplotSimple1(na1, GPLOT_PNG, "/tmp/lept/split/split", NULL);
1291
    }
1292
#endif
1293
1294
33.8k
    numaDestroy(&na1);
1295
33.8k
    numaDestroy(&na2);
1296
33.8k
    LEPT_FREE(array1);
1297
33.8k
    LEPT_FREE(array2);
1298
1299
33.8k
    if (nsplit == 0) {  /* no splitting */
1300
4.44k
        numaDestroy(&nasplit);
1301
4.44k
        box = boxCreate(0, 0, w, h);
1302
4.44k
        boxaAddBox(boxad, box, L_INSERT);
1303
4.44k
        return boxad;
1304
4.44k
    }
1305
1306
        /* Use split points to generate b.b. after splitting */
1307
104k
    for (i = 0, xshift = 0; i < nsplit; i++) {
1308
74.6k
        numaGetIValue(nasplit, i, &isplit);
1309
74.6k
        box = boxCreate(xshift, 0, isplit - xshift, h);
1310
74.6k
        boxaAddBox(boxad, box, L_INSERT);
1311
74.6k
        xshift = isplit + 1;
1312
74.6k
    }
1313
29.3k
    box = boxCreate(xshift, 0, w - xshift, h);
1314
29.3k
    boxaAddBox(boxad, box, L_INSERT);
1315
29.3k
    numaDestroy(&nasplit);
1316
1317
29.3k
    if (ppixdebug) {
1318
29.3k
        pixdb = pixConvertTo32(pixs);
1319
29.3k
        ncomp = boxaGetCount(boxad);
1320
133k
        for (i = 0; i < ncomp; i++) {
1321
104k
            box = boxaGetBox(boxad, i, L_CLONE);
1322
104k
            pixRenderBoxBlend(pixdb, box, 1, 255, 0, 0, 0.5);
1323
104k
            boxDestroy(&box);
1324
104k
        }
1325
29.3k
        *ppixdebug = pixdb;
1326
29.3k
    }
1327
1328
29.3k
    return boxad;
1329
33.8k
}
1330
1331
1332
/*------------------------------------------------------------------*
1333
 *                    Extraction of lines of text                   *
1334
 *------------------------------------------------------------------*/
1335
/*!
1336
 * \brief   pixExtractTextlines()
1337
 *
1338
 * \param[in]    pixs        any depth, assumed to have nearly horizontal text
1339
 * \param[in]    maxw, maxh  initial filtering: remove any components in pixs
1340
 *                           with components larger than maxw or maxh
1341
 * \param[in]    minw, minh  final filtering: remove extracted 'lines'
1342
 *                           with sizes smaller than minw or minh; use
1343
 *                           0 for default.
1344
 * \param[in]    adjw, adjh  final adjustment of boxes representing each
1345
 *                           text line.  If > 0, these increase the box
1346
 *                           size at each edge by this amount.
1347
 * \param[in]    pixadb      pixa for saving intermediate steps; NULL to omit
1348
 * \return  pixa of textline images, including bounding boxes, or
1349
 *                    NULL on error
1350
 *
1351
 * <pre>
1352
 * Notes:
1353
 *      (1) This function assumes that textline fragments have sufficient
1354
 *          vertical separation and small enough skew so that a
1355
 *          horizontal dilation sufficient to join words will not join
1356
 *          textlines.  It does not guarantee that horizontally adjacent
1357
 *          textline fragments on the same line will be joined.
1358
 *      (2) For images with multiple columns, it attempts to avoid joining
1359
 *          textlines across the space between columns.  If that is not
1360
 *          a concern, you can also use pixExtractRawTextlines(),
1361
 *          which will join them with alacrity.
1362
 *      (3) This first removes components from pixs that are either
1363
 *          wide (> %maxw) or tall (> %maxh).
1364
 *      (4) A final filtering operation removes small components, such
1365
 *          that width < %minw or height < %minh.
1366
 *      (5) For reasonable accuracy, the resolution of pixs should be
1367
 *          at least 100 ppi.  For reasonable efficiency, the resolution
1368
 *          should not exceed 600 ppi.
1369
 *      (6) This can be used to determine if some region of a scanned
1370
 *          image is horizontal text.
1371
 *      (7) As an example, for a pix with resolution 300 ppi, a reasonable
1372
 *          set of parameters is:
1373
 *             pixExtractTextlines(pix, 150, 150, 36, 20, 5, 5, NULL);
1374
 *          The defaults minw and minh for 300 ppi are about 36 and 20,
1375
 *          so the same result is obtained with:
1376
 *             pixExtractTextlines(pix, 150, 150, 0, 0, 5, 5, NULL);
1377
 *      (8) The output pixa is composed of subimages, one for each textline,
1378
 *          and the boxa in the pixa tells where in %pixs each textline goes.
1379
 * </pre>
1380
 */
1381
PIXA *
1382
pixExtractTextlines(PIX     *pixs,
1383
                    l_int32  maxw,
1384
                    l_int32  maxh,
1385
                    l_int32  minw,
1386
                    l_int32  minh,
1387
                    l_int32  adjw,
1388
                    l_int32  adjh,
1389
                    PIXA    *pixadb)
1390
0
{
1391
0
char     buf[64];
1392
0
l_int32  res, csize, empty;
1393
0
BOXA    *boxa1, *boxa2, *boxa3;
1394
0
PIX     *pix1, *pix2, *pix3;
1395
0
PIXA    *pixa1, *pixa2, *pixa3;
1396
1397
0
    if (!pixs)
1398
0
        return (PIXA *)ERROR_PTR("pixs not defined", __func__, NULL);
1399
1400
        /* Binarize carefully, if necessary */
1401
0
    if (pixGetDepth(pixs) > 1) {
1402
0
        pix2 = pixConvertTo8(pixs, FALSE);
1403
0
        pix3 = pixCleanBackgroundToWhite(pix2, NULL, NULL, 1.0, 70, 190);
1404
0
        pix1 = pixThresholdToBinary(pix3, 150);
1405
0
        pixDestroy(&pix2);
1406
0
        pixDestroy(&pix3);
1407
0
    } else {
1408
0
        pix1 = pixClone(pixs);
1409
0
    }
1410
0
    pixZero(pix1, &empty);
1411
0
    if (empty) {
1412
0
        pixDestroy(&pix1);
1413
0
        L_INFO("no fg pixels in input image\n", __func__);
1414
0
        return NULL;
1415
0
    }
1416
0
    if (pixadb) pixaAddPix(pixadb, pix1, L_COPY);
1417
1418
        /* Remove any very tall or very wide connected components */
1419
0
    pix2 = pixSelectBySize(pix1, maxw, maxh, 8, L_SELECT_IF_BOTH,
1420
0
                           L_SELECT_IF_LT, NULL);
1421
0
    if (pixadb) pixaAddPix(pixadb, pix2, L_COPY);
1422
0
    pixDestroy(&pix1);
1423
1424
        /* Filter to solidify the text lines within the x-height region.
1425
         * The closing (csize) bridges gaps between words.  The opening
1426
         * removes isolated bridges between textlines. */
1427
0
    if ((res = pixGetXRes(pixs)) == 0) {
1428
0
        L_INFO("Resolution is not set: setting to 300 ppi\n", __func__);
1429
0
        res = 300;
1430
0
    }
1431
0
    csize = L_MIN(120., 60.0 * res / 300.0);
1432
0
    snprintf(buf, sizeof(buf), "c%d.1 + o%d.1", csize, csize / 3);
1433
0
    pix3 = pixMorphCompSequence(pix2, buf, 0);
1434
0
    if (pixadb) pixaAddPix(pixadb, pix3, L_COPY);
1435
1436
        /* Extract the connected components.  These should be dilated lines */
1437
0
    boxa1 = pixConnComp(pix3, &pixa1, 4);
1438
0
    if (pixadb) {
1439
0
        pix1 = pixaDisplayRandomCmap(pixa1, 0, 0);
1440
0
        pixcmapResetColor(pixGetColormap(pix1), 0, 255, 255, 255);
1441
0
        pixaAddPix(pixadb, pix1, L_INSERT);
1442
0
    }
1443
1444
        /* Set minw, minh if default is requested */
1445
0
    minw = (minw != 0) ? minw : (l_int32)(0.12 * res);
1446
0
    minh = (minh != 0) ? minh : (l_int32)(0.07 * res);
1447
1448
        /* Remove line components that are too small */
1449
0
    pixa2 = pixaSelectBySize(pixa1, minw, minh, L_SELECT_IF_BOTH,
1450
0
                           L_SELECT_IF_GTE, NULL);
1451
0
    if (pixadb) {
1452
0
        pix1 = pixaDisplayRandomCmap(pixa2, 0, 0);
1453
0
        pixcmapResetColor(pixGetColormap(pix1), 0, 255, 255, 255);
1454
0
        pixaAddPix(pixadb, pix1, L_INSERT);
1455
0
        pix1 = pixConvertTo32(pix2);
1456
0
        pixRenderBoxaArb(pix1, pixa2->boxa, 2, 255, 0, 0);
1457
0
        pixaAddPix(pixadb, pix1, L_INSERT);
1458
0
    }
1459
1460
        /* Selectively AND with the version before dilation, and save */
1461
0
    boxa2 = pixaGetBoxa(pixa2, L_CLONE);
1462
0
    boxa3 = boxaAdjustSides(boxa2, -adjw, adjw, -adjh, adjh);
1463
0
    pixa3 = pixClipRectangles(pix2, boxa3);
1464
0
    if (pixadb) {
1465
0
        pix1 = pixaDisplayRandomCmap(pixa3, 0, 0);
1466
0
        pixcmapResetColor(pixGetColormap(pix1), 0, 255, 255, 255);
1467
0
        pixaAddPix(pixadb, pix1, L_INSERT);
1468
0
    }
1469
1470
0
    pixDestroy(&pix2);
1471
0
    pixDestroy(&pix3);
1472
0
    pixaDestroy(&pixa1);
1473
0
    pixaDestroy(&pixa2);
1474
0
    boxaDestroy(&boxa1);
1475
0
    boxaDestroy(&boxa2);
1476
0
    boxaDestroy(&boxa3);
1477
0
    return pixa3;
1478
0
}
1479
1480
1481
/*!
1482
 * \brief   pixExtractRawTextlines()
1483
 *
1484
 * \param[in]    pixs        any depth, assumed to have nearly horizontal text
1485
 * \param[in]    maxw, maxh  initial filtering: remove any components in pixs
1486
 *                           with components larger than maxw or maxh;
1487
 *                           use 0 for default values.
1488
 * \param[in]    adjw, adjh  final adjustment of boxes representing each
1489
 *                           text line.  If > 0, these increase the box
1490
 *                           size at each edge by this amount.
1491
 * \param[in]    pixadb      pixa for saving intermediate steps; NULL to omit
1492
 * \return  pixa of textline images, including bounding boxes, or
1493
 *                    NULL on error
1494
 *
1495
 * <pre>
1496
 * Notes:
1497
 *      (1) This function assumes that textlines have sufficient
1498
 *          vertical separation and small enough skew so that a
1499
 *          horizontal dilation sufficient to join words will not join
1500
 *          textlines.  It aggressively joins textlines across multiple
1501
 *          columns, so if that is not desired, you must either (a) make
1502
 *          sure that %pixs is a single column of text or (b) use instead
1503
 *          pixExtractTextlines(), which is more conservative
1504
 *          about joining text fragments that have vertical overlap.
1505
 *      (2) This first removes components from pixs that are either
1506
 *          very wide (> %maxw) or very tall (> %maxh).
1507
 *      (3) For reasonable accuracy, the resolution of pixs should be
1508
 *          at least 100 ppi.  For reasonable efficiency, the resolution
1509
 *          should not exceed 600 ppi.
1510
 *      (4) This can be used to determine if some region of a scanned
1511
 *          image is horizontal text.
1512
 *      (5) As an example, for a pix with resolution 300 ppi, a reasonable
1513
 *          set of parameters is:
1514
 *             pixExtractRawTextlines(pix, 150, 150, 0, 0, NULL);
1515
 *      (6) The output pixa is composed of subimages, one for each textline,
1516
 *          and the boxa in the pixa tells where in %pixs each textline goes.
1517
 * </pre>
1518
 */
1519
PIXA *
1520
pixExtractRawTextlines(PIX     *pixs,
1521
                       l_int32  maxw,
1522
                       l_int32  maxh,
1523
                       l_int32  adjw,
1524
                       l_int32  adjh,
1525
                       PIXA    *pixadb)
1526
0
{
1527
0
char     buf[64];
1528
0
l_int32  res, csize, empty;
1529
0
BOXA    *boxa1, *boxa2, *boxa3;
1530
0
BOXAA   *baa1;
1531
0
PIX     *pix1, *pix2, *pix3;
1532
0
PIXA    *pixa1, *pixa2;
1533
1534
0
    if (!pixs)
1535
0
        return (PIXA *)ERROR_PTR("pixs not defined", __func__, NULL);
1536
1537
        /* Set maxw, maxh if default is requested */
1538
0
    if ((res = pixGetXRes(pixs)) == 0) {
1539
0
        L_INFO("Resolution is not set: setting to 300 ppi\n", __func__);
1540
0
        res = 300;
1541
0
    }
1542
0
    maxw = (maxw != 0) ? maxw : (l_int32)(0.5 * res);
1543
0
    maxh = (maxh != 0) ? maxh : (l_int32)(0.5 * res);
1544
1545
        /* Binarize carefully, if necessary */
1546
0
    if (pixGetDepth(pixs) > 1) {
1547
0
        pix2 = pixConvertTo8(pixs, FALSE);
1548
0
        pix3 = pixCleanBackgroundToWhite(pix2, NULL, NULL, 1.0, 70, 190);
1549
0
        pix1 = pixThresholdToBinary(pix3, 150);
1550
0
        pixDestroy(&pix2);
1551
0
        pixDestroy(&pix3);
1552
0
    } else {
1553
0
        pix1 = pixClone(pixs);
1554
0
    }
1555
0
    pixZero(pix1, &empty);
1556
0
    if (empty) {
1557
0
        pixDestroy(&pix1);
1558
0
        L_INFO("no fg pixels in input image\n", __func__);
1559
0
        return NULL;
1560
0
    }
1561
0
    if (pixadb) pixaAddPix(pixadb, pix1, L_COPY);
1562
1563
        /* Remove any very tall or very wide connected components */
1564
0
    pix2 = pixSelectBySize(pix1, maxw, maxh, 8, L_SELECT_IF_BOTH,
1565
0
                           L_SELECT_IF_LT, NULL);
1566
0
    if (pixadb) pixaAddPix(pixadb, pix2, L_COPY);
1567
0
    pixDestroy(&pix1);
1568
1569
        /* Filter to solidify the text lines within the x-height region.
1570
         * The closing (csize) bridges gaps between words. */
1571
0
    csize = L_MIN(120., 60.0 * res / 300.0);
1572
0
    snprintf(buf, sizeof(buf), "c%d.1", csize);
1573
0
    pix3 = pixMorphCompSequence(pix2, buf, 0);
1574
0
    if (pixadb) pixaAddPix(pixadb, pix3, L_COPY);
1575
1576
        /* Extract the connected components.  These should be dilated lines */
1577
0
    boxa1 = pixConnComp(pix3, &pixa1, 4);
1578
0
    if (pixadb) {
1579
0
        pix1 = pixaDisplayRandomCmap(pixa1, 0, 0);
1580
0
        pixcmapResetColor(pixGetColormap(pix1), 0, 255, 255, 255);
1581
0
        pixaAddPix(pixadb, pix1, L_INSERT);
1582
0
    }
1583
1584
        /* Do a 2-d sort, and generate a bounding box for each set of text
1585
         * line segments that is aligned horizontally (i.e., has vertical
1586
         * overlap) into a box representing a single text line. */
1587
0
    baa1 = boxaSort2d(boxa1, NULL, -1, -1, 5);
1588
0
    boxaaGetExtent(baa1, NULL, NULL, NULL, &boxa2);
1589
0
    if (pixadb) {
1590
0
        pix1 = pixConvertTo32(pix2);
1591
0
        pixRenderBoxaArb(pix1, boxa2, 2, 255, 0, 0);
1592
0
        pixaAddPix(pixadb, pix1, L_INSERT);
1593
0
    }
1594
1595
        /* Optionally adjust the sides of each text line box, and then
1596
         * use the boxes to generate a pixa of the text lines. */
1597
0
    boxa3 = boxaAdjustSides(boxa2, -adjw, adjw, -adjh, adjh);
1598
0
    pixa2 = pixClipRectangles(pix2, boxa3);
1599
0
    if (pixadb) {
1600
0
        pix1 = pixaDisplayRandomCmap(pixa2, 0, 0);
1601
0
        pixcmapResetColor(pixGetColormap(pix1), 0, 255, 255, 255);
1602
0
        pixaAddPix(pixadb, pix1, L_INSERT);
1603
0
    }
1604
1605
0
    pixDestroy(&pix2);
1606
0
    pixDestroy(&pix3);
1607
0
    pixaDestroy(&pixa1);
1608
0
    boxaDestroy(&boxa1);
1609
0
    boxaDestroy(&boxa2);
1610
0
    boxaDestroy(&boxa3);
1611
0
    boxaaDestroy(&baa1);
1612
0
    return pixa2;
1613
0
}
1614
1615
1616
/*------------------------------------------------------------------*
1617
 *                      How many text columns                       *
1618
 *------------------------------------------------------------------*/
1619
/*!
1620
 * \brief   pixCountTextColumns()
1621
 *
1622
 * \param[in]    pixs        1 bpp
1623
 * \param[in]    deltafract  fraction of (max - min) to be used in the delta
1624
 *                           for extrema finding; typ 0.3
1625
 * \param[in]    peakfract   fraction of (max - min) to be used to threshold
1626
 *                            the peak value; typ. 0.5
1627
 * \param[in]    clipfract   fraction of image dimension removed on each side;
1628
 *                           typ. 0.1, which leaves w and h reduced by 0.8
1629
 * \param[out]   pncols      number of columns; -1 if not determined
1630
 * \param[in]    pixadb      [optional] pre-allocated, for showing
1631
 *                           intermediate computation; use null to skip
1632
 * \return  0 if OK, 1 on error
1633
 *
1634
 * <pre>
1635
 * Notes:
1636
 *      (1) It is assumed that pixs has the correct resolution set.
1637
 *          If the resolution is 0, we set to 300 and issue a warning.
1638
 *      (2) If necessary, the image is scaled to between 37 and 75 ppi;
1639
 *          most of the processing is done at this resolution.
1640
 *      (3) If no text is found (essentially a blank page),
1641
 *          this returns ncols = 0.
1642
 *      (4) For debug output, input a pre-allocated pixa.
1643
 * </pre>
1644
 */
1645
l_ok
1646
pixCountTextColumns(PIX       *pixs,
1647
                    l_float32  deltafract,
1648
                    l_float32  peakfract,
1649
                    l_float32  clipfract,
1650
                    l_int32   *pncols,
1651
                    PIXA      *pixadb)
1652
0
{
1653
0
l_int32    w, h, res, i, n, npeak;
1654
0
l_float32  scalefact, redfact, minval, maxval, val4, val5, fract;
1655
0
BOX       *box;
1656
0
NUMA      *na1, *na2, *na3, *na4, *na5;
1657
0
PIX       *pix1, *pix2, *pix3, *pix4, *pix5;
1658
1659
0
    if (!pncols)
1660
0
        return ERROR_INT("&ncols not defined", __func__, 1);
1661
0
    *pncols = -1;  /* init */
1662
0
    if (!pixs || pixGetDepth(pixs) != 1)
1663
0
        return ERROR_INT("pixs not defined or not 1 bpp", __func__, 1);
1664
0
    if (deltafract < 0.15 || deltafract > 0.75)
1665
0
        L_WARNING("deltafract not in [0.15 ... 0.75]\n", __func__);
1666
0
    if (peakfract < 0.25 || peakfract > 0.9)
1667
0
        L_WARNING("peakfract not in [0.25 ... 0.9]\n", __func__);
1668
0
    if (clipfract < 0.0 || clipfract >= 0.5)
1669
0
        return ERROR_INT("clipfract not in [0.0 ... 0.5)\n", __func__, 1);
1670
0
    if (pixadb) pixaAddPix(pixadb, pixs, L_COPY);
1671
1672
        /* Scale to between 37.5 and 75 ppi */
1673
0
    if ((res = pixGetXRes(pixs)) == 0) {
1674
0
        L_WARNING("resolution undefined; set to 300\n", __func__);
1675
0
        pixSetResolution(pixs, 300, 300);
1676
0
        res = 300;
1677
0
    }
1678
0
    if (res < 37) {
1679
0
        L_WARNING("resolution %d very low\n", __func__, res);
1680
0
        scalefact = 37.5 / res;
1681
0
        pix1 = pixScale(pixs, scalefact, scalefact);
1682
0
    } else {
1683
0
        redfact = (l_float32)res / 37.5;
1684
0
        if (redfact < 2.0)
1685
0
            pix1 = pixClone(pixs);
1686
0
        else if (redfact < 4.0)
1687
0
            pix1 = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0);
1688
0
        else if (redfact < 8.0)
1689
0
            pix1 = pixReduceRankBinaryCascade(pixs, 1, 2, 0, 0);
1690
0
        else if (redfact < 16.0)
1691
0
            pix1 = pixReduceRankBinaryCascade(pixs, 1, 2, 2, 0);
1692
0
        else
1693
0
            pix1 = pixReduceRankBinaryCascade(pixs, 1, 2, 2, 2);
1694
0
    }
1695
0
    if (pixadb) pixaAddPix(pixadb, pix1, L_COPY);
1696
1697
        /* Crop inner 80% of image */
1698
0
    pixGetDimensions(pix1, &w, &h, NULL);
1699
0
    box = boxCreate(clipfract * w, clipfract * h,
1700
0
                    (1.0 - 2 * clipfract) * w, (1.0 - 2 * clipfract) * h);
1701
0
    pix2 = pixClipRectangle(pix1, box, NULL);
1702
0
    pixGetDimensions(pix2, &w, &h, NULL);
1703
0
    boxDestroy(&box);
1704
0
    if (pixadb) pixaAddPix(pixadb, pix2, L_COPY);
1705
1706
        /* Deskew */
1707
0
    pix3 = pixDeskew(pix2, 0);
1708
0
    if (pixadb) pixaAddPix(pixadb, pix3, L_COPY);
1709
1710
        /* Close to increase column counts for text */
1711
0
    pix4 = pixCloseSafeBrick(NULL, pix3, 5, 21);
1712
0
    if (pixadb) pixaAddPix(pixadb, pix4, L_COPY);
1713
0
    pixInvert(pix4, pix4);
1714
0
    na1 = pixCountByColumn(pix4, NULL);
1715
1716
0
    if (pixadb) {
1717
0
        gplotSimple1(na1, GPLOT_PNG, "/tmp/lept/plot", NULL);
1718
0
        pix5 = pixRead("/tmp/lept/plot.png");
1719
0
        pixaAddPix(pixadb, pix5, L_INSERT);
1720
0
    }
1721
1722
        /* Analyze the column counts.  na4 gives the locations of
1723
         * the extrema in normalized units (0.0 to 1.0) across the
1724
         * cropped image.  na5 gives the magnitude of the
1725
         * extrema, normalized to the dynamic range.  The peaks
1726
         * are values that are at least peakfract of (max - min). */
1727
0
    numaGetMax(na1, &maxval, NULL);
1728
0
    numaGetMin(na1, &minval, NULL);
1729
0
    fract = (l_float32)(maxval - minval) / h;  /* is there much at all? */
1730
0
    if (fract < 0.05) {
1731
0
        L_INFO("very little content on page; 0 text columns\n", __func__);
1732
0
        *pncols = 0;
1733
0
    } else {
1734
0
        na2 = numaFindExtrema(na1, deltafract * (maxval - minval), &na3);
1735
0
        na4 = numaTransform(na2, 0, 1.0 / w);
1736
0
        na5 = numaTransform(na3, -minval, 1.0 / (maxval - minval));
1737
0
        n = numaGetCount(na4);
1738
0
        for (i = 0, npeak = 0; i < n; i++) {
1739
0
            numaGetFValue(na4, i, &val4);
1740
0
            numaGetFValue(na5, i, &val5);
1741
0
            if (val4 > 0.3 && val4 < 0.7 && val5 >= peakfract) {
1742
0
                npeak++;
1743
0
                L_INFO("Peak(loc,val) = (%5.3f,%5.3f)\n", __func__, val4, val5);
1744
0
            }
1745
0
        }
1746
0
        *pncols = npeak + 1;
1747
0
        numaDestroy(&na2);
1748
0
        numaDestroy(&na3);
1749
0
        numaDestroy(&na4);
1750
0
        numaDestroy(&na5);
1751
0
    }
1752
1753
0
    pixDestroy(&pix1);
1754
0
    pixDestroy(&pix2);
1755
0
    pixDestroy(&pix3);
1756
0
    pixDestroy(&pix4);
1757
0
    numaDestroy(&na1);
1758
0
    return 0;
1759
0
}
1760
1761
1762
/*------------------------------------------------------------------*
1763
 *                      Decision text vs photo                      *
1764
 *------------------------------------------------------------------*/
1765
/*!
1766
 * \brief   pixDecideIfText()
1767
 *
1768
 * \param[in]    pixs     any depth
1769
 * \param[in]    box      [optional]  if null, use entire pixs
1770
 * \param[out]   pistext  1 if text; 0 if photo; -1 if not determined or empty
1771
 * \param[in]    pixadb   [optional] pre-allocated, for showing intermediate
1772
 *                        computation; use NULL to skip
1773
 * \return  0 if OK, 1 on error
1774
 *
1775
 * <pre>
1776
 * Notes:
1777
 *      (1) It is assumed that pixs has the correct resolution set.
1778
 *          If the resolution is 0, we set to 300 and issue a warning.
1779
 *      (2) If necessary, the image is scaled to 300 ppi; most of the
1780
 *          processing is done at this resolution.
1781
 *      (3) Text is assumed to be in horizontal lines.
1782
 *      (4) Because thin vertical lines are removed before filtering for
1783
 *          text lines, this should identify tables as text.
1784
 *      (5) If %box is null and pixs contains both text lines and line art,
1785
 *          this function might return %istext == true.
1786
 *      (6) If the input pixs is empty, or for some other reason the
1787
 *          result can not be determined, return -1.
1788
 *      (7) For debug output, input a pre-allocated pixa.
1789
 * </pre>
1790
 */
1791
l_ok
1792
pixDecideIfText(PIX      *pixs,
1793
                BOX      *box,
1794
                l_int32  *pistext,
1795
                PIXA     *pixadb)
1796
0
{
1797
0
l_int32    i, empty, maxw, w, h, n1, n2, n3, minlines, big_comp;
1798
0
l_float32  ratio1, ratio2;
1799
0
L_BMF     *bmf;
1800
0
BOXA      *boxa1, *boxa2, *boxa3, *boxa4, *boxa5;
1801
0
PIX       *pix1, *pix2, *pix3, *pix4, *pix5, *pix6, *pix7;
1802
0
PIXA      *pixa1;
1803
0
SEL       *sel1;
1804
1805
0
    if (!pistext)
1806
0
        return ERROR_INT("&istext not defined", __func__, 1);
1807
0
    *pistext = -1;
1808
0
    if (!pixs)
1809
0
        return ERROR_INT("pixs not defined", __func__, 1);
1810
1811
        /* Crop, convert to 1 bpp, 300 ppi */
1812
0
    if ((pix1 = pixPrepare1bpp(pixs, box, 0.1, 300)) == NULL)
1813
0
        return ERROR_INT("pix1 not made", __func__, 1);
1814
1815
0
    pixZero(pix1, &empty);
1816
0
    if (empty) {
1817
0
        pixDestroy(&pix1);
1818
0
        L_INFO("pix is empty\n", __func__);
1819
0
        return 0;
1820
0
    }
1821
0
    w = pixGetWidth(pix1);
1822
1823
        /* Identify and remove tall, thin vertical lines (as found in tables)
1824
         * that are up to 9 pixels wide.  Make a hit-miss sel with an
1825
         * 81 pixel vertical set of hits and with 3 pairs of misses that
1826
         * are 10 pixels apart horizontally.  It is necessary to use a
1827
         * hit-miss transform; if we only opened with a vertical line of
1828
         * hits, we would remove solid regions of pixels that are not
1829
         * text or vertical lines. */
1830
0
    pix2 = pixCreate(11, 81, 1);
1831
0
    for (i = 0; i < 81; i++)
1832
0
        pixSetPixel(pix2, 5, i, 1);
1833
0
    sel1 = selCreateFromPix(pix2, 40, 5, NULL);
1834
0
    selSetElement(sel1, 20, 0, SEL_MISS);
1835
0
    selSetElement(sel1, 20, 10, SEL_MISS);
1836
0
    selSetElement(sel1, 40, 0, SEL_MISS);
1837
0
    selSetElement(sel1, 40, 10, SEL_MISS);
1838
0
    selSetElement(sel1, 60, 0, SEL_MISS);
1839
0
    selSetElement(sel1, 60, 10, SEL_MISS);
1840
0
    pix3 = pixHMT(NULL, pix1, sel1);
1841
0
    pix4 = pixSeedfillBinaryRestricted(NULL, pix3, pix1, 8, 5, 1000);
1842
0
    pix5 = pixXor(NULL, pix1, pix4);
1843
0
    pixDestroy(&pix2);
1844
0
    selDestroy(&sel1);
1845
1846
        /* Convert the text lines to separate long horizontal components */
1847
0
    pix6 = pixMorphCompSequence(pix5, "c30.1 + o15.1 + c60.1 + o2.2", 0);
1848
1849
        /* Estimate the distance to the bottom of the significant region */
1850
0
    if (box) {  /* use full height */
1851
0
        pixGetDimensions(pix6, NULL, &h, NULL);
1852
0
    } else {  /* use height of region that has text lines */
1853
0
        pixFindThreshFgExtent(pix6, 400, NULL, &h);
1854
0
    }
1855
1856
0
    if (pixadb) {
1857
0
        bmf = bmfCreate(NULL, 6);
1858
0
        pixaAddPixWithText(pixadb, pix1, 1, bmf, "threshold/crop to binary",
1859
0
                           0x0000ff00, L_ADD_BELOW);
1860
0
        pixaAddPixWithText(pixadb, pix3, 2, bmf, "hit-miss for vertical line",
1861
0
                           0x0000ff00, L_ADD_BELOW);
1862
0
        pixaAddPixWithText(pixadb, pix4, 2, bmf, "restricted seed-fill",
1863
0
                           0x0000ff00, L_ADD_BELOW);
1864
0
        pixaAddPixWithText(pixadb, pix5, 2, bmf, "remove using xor",
1865
0
                           0x0000ff00, L_ADD_BELOW);
1866
0
        pixaAddPixWithText(pixadb, pix6, 2, bmf, "make long horiz components",
1867
0
                           0x0000ff00, L_ADD_BELOW);
1868
0
    }
1869
1870
        /* Extract the connected components */
1871
0
    if (pixadb) {
1872
0
        boxa1 = pixConnComp(pix6, &pixa1, 8);
1873
0
        pix7 = pixaDisplayRandomCmap(pixa1, 0, 0);
1874
0
        pixcmapResetColor(pixGetColormap(pix7), 0, 255, 255, 255);
1875
0
        pixaAddPixWithText(pixadb, pix7, 2, bmf, "show connected components",
1876
0
                           0x0000ff00, L_ADD_BELOW);
1877
0
        pixDestroy(&pix7);
1878
0
        pixaDestroy(&pixa1);
1879
0
        bmfDestroy(&bmf);
1880
0
    } else {
1881
0
        boxa1 = pixConnComp(pix6, NULL, 8);
1882
0
    }
1883
1884
        /* Analyze the connected components.  The following conditions
1885
         * at 300 ppi must be satisfied if the image is text:
1886
         * (1) There are no components that are wider than 400 pixels and
1887
         *     taller than 175 pixels.
1888
         * (2) The second longest component is at least 60% of the
1889
         *     (possibly cropped) image width.  This catches images
1890
         *     that don't have any significant content.
1891
         * (3) Of the components that are at least 40% of the length
1892
         *     of the longest (n2), at least 80% of them must not exceed
1893
         *     60 pixels in height.
1894
         * (4) The number of those long, thin components (n3) must
1895
         *     equal or exceed a minimum that scales linearly with the
1896
         *     image height.
1897
         * Most images that are not text fail more than one of these
1898
         * conditions. */
1899
0
    boxa2 = boxaSort(boxa1, L_SORT_BY_WIDTH, L_SORT_DECREASING, NULL);
1900
0
    boxaGetBoxGeometry(boxa2, 1, NULL, NULL, &maxw, NULL);  /* 2nd longest */
1901
0
    boxa3 = boxaSelectBySize(boxa1, 0.4 * maxw, 0, L_SELECT_WIDTH,
1902
0
                             L_SELECT_IF_GTE, NULL);
1903
0
    boxa4 = boxaSelectBySize(boxa3, 0, 60, L_SELECT_HEIGHT,
1904
0
                             L_SELECT_IF_LTE, NULL);
1905
0
    boxa5 = boxaSelectBySize(boxa1, 400, 175, L_SELECT_IF_BOTH,
1906
0
                             L_SELECT_IF_GT, NULL);
1907
0
    big_comp = (boxaGetCount(boxa5) == 0) ? 0 : 1;
1908
0
    n1 = boxaGetCount(boxa1);
1909
0
    n2 = boxaGetCount(boxa3);
1910
0
    n3 = boxaGetCount(boxa4);
1911
0
    ratio1 = (l_float32)maxw / (l_float32)w;
1912
0
    ratio2 = (l_float32)n3 / (l_float32)n2;
1913
0
    minlines = L_MAX(2, h / 125);
1914
0
    if (big_comp || ratio1 < 0.6 || ratio2 < 0.8 || n3 < minlines)
1915
0
        *pistext = 0;
1916
0
    else
1917
0
        *pistext = 1;
1918
0
    if (pixadb) {
1919
0
        if (*pistext == 1) {
1920
0
            L_INFO("This is text: \n  n1 = %d, n2 = %d, n3 = %d, "
1921
0
                   "minlines = %d\n  maxw = %d, ratio1 = %4.2f, h = %d, "
1922
0
                   "big_comp = %d\n", __func__, n1, n2, n3, minlines,
1923
0
                   maxw, ratio1, h, big_comp);
1924
0
        } else {
1925
0
            L_INFO("This is not text: \n  n1 = %d, n2 = %d, n3 = %d, "
1926
0
                   "minlines = %d\n  maxw = %d, ratio1 = %4.2f, h = %d, "
1927
0
                   "big_comp = %d\n", __func__, n1, n2, n3, minlines,
1928
0
                   maxw, ratio1, h, big_comp);
1929
0
        }
1930
0
    }
1931
1932
0
    boxaDestroy(&boxa1);
1933
0
    boxaDestroy(&boxa2);
1934
0
    boxaDestroy(&boxa3);
1935
0
    boxaDestroy(&boxa4);
1936
0
    boxaDestroy(&boxa5);
1937
0
    pixDestroy(&pix1);
1938
0
    pixDestroy(&pix3);
1939
0
    pixDestroy(&pix4);
1940
0
    pixDestroy(&pix5);
1941
0
    pixDestroy(&pix6);
1942
0
    return 0;
1943
0
}
1944
1945
1946
/*!
1947
 * \brief   pixFindThreshFgExtent()
1948
 *
1949
 * \param[in]    pixs     1 bpp
1950
 * \param[in]    thresh   threshold number of pixels in row
1951
 * \param[out]   ptop     [optional] location of top of region
1952
 * \param[out]   pbot     [optional] location of bottom of region
1953
 * \return  0 if OK, 1 on error
1954
 */
1955
l_ok
1956
pixFindThreshFgExtent(PIX      *pixs,
1957
                      l_int32   thresh,
1958
                      l_int32  *ptop,
1959
                      l_int32  *pbot)
1960
0
{
1961
0
l_int32   i, n;
1962
0
l_int32  *array;
1963
0
NUMA     *na;
1964
1965
0
    if (ptop) *ptop = 0;
1966
0
    if (pbot) *pbot = 0;
1967
0
    if (!ptop && !pbot)
1968
0
        return ERROR_INT("nothing to determine", __func__, 1);
1969
0
    if (!pixs || pixGetDepth(pixs) != 1)
1970
0
        return ERROR_INT("pixs not defined or not 1 bpp", __func__, 1);
1971
1972
0
    na = pixCountPixelsByRow(pixs, NULL);
1973
0
    n = numaGetCount(na);
1974
0
    array = numaGetIArray(na);
1975
0
    if (ptop) {
1976
0
        for (i = 0; i < n; i++) {
1977
0
            if (array[i] >= thresh) {
1978
0
                *ptop = i;
1979
0
                break;
1980
0
            }
1981
0
        }
1982
0
    }
1983
0
    if (pbot) {
1984
0
        for (i = n - 1; i >= 0; i--) {
1985
0
            if (array[i] >= thresh) {
1986
0
                *pbot = i;
1987
0
                break;
1988
0
            }
1989
0
        }
1990
0
    }
1991
0
    LEPT_FREE(array);
1992
0
    numaDestroy(&na);
1993
0
    return 0;
1994
0
}
1995
1996
1997
/*------------------------------------------------------------------*
1998
 *                     Decision: table vs text                      *
1999
 *------------------------------------------------------------------*/
2000
/*!
2001
 * \brief   pixDecideIfTable()
2002
 *
2003
 * \param[in]    pixs      any depth, any resolution >= 75 ppi
2004
 * \param[in]    box       [optional] if null, use entire pixs
2005
 * \param[in]    orient    L_PORTRAIT_MODE, L_LANDSCAPE_MODE
2006
 * \param[out]   pscore    0 - 4; -1 if not determined
2007
 * \param[in]    pixadb    [optional] pre-allocated, for showing intermediate
2008
 *                         computation; use NULL to skip
2009
 * \return  0 if OK, 1 on error
2010
 *
2011
 * <pre>
2012
 * Notes:
2013
 *      (1) It is assumed that pixs has the correct resolution set.
2014
 *          If the resolution is 0, we assume it is 300 ppi and issue a warning.
2015
 *      (2) If %orient == L_LANDSCAPE_MODE, the image is rotated 90 degrees
2016
 *          clockwise before being analyzed.
2017
 *      (3) The interpretation of the returned score:
2018
 *            -1     undetermined
2019
 *             0     no table
2020
 *             1     unlikely to have a table
2021
 *             2     likely to have a table
2022
 *             3     even more likely to have a table
2023
 *             4     extremely likely to have a table
2024
 *          * Setting the condition for finding a table at score >= 2 works
2025
 *            well, except for false positives on kanji and landscape text.
2026
 *          * These false positives can be removed by setting the condition
2027
 *            at score >= 3, but recall is lowered because it will not find
2028
 *            tables without either horizontal or vertical lines.
2029
 *      (4) Most of the processing takes place at 75 ppi.
2030
 *      (5) Internally, three numbers are determined, for horizontal and
2031
 *          vertical fg lines, and for vertical bg lines.  From these,
2032
 *          four tests are made to decide if there is a table occupying
2033
 *          a significant part of the image.
2034
 *      (6) Images have arbitrary content and would be likely to trigger
2035
 *          this detector, so they are checked for first, and if found,
2036
 *          return with a 0 (no table) score.
2037
 *      (7) Musical scores (tablature) are likely to trigger the detector.
2038
 *      (8) Tables of content with more than 2 columns are likely to
2039
 *          trigger the detector.
2040
 *      (9) For debug output, input a pre-allocated pixa.
2041
 * </pre>
2042
 */
2043
l_ok
2044
pixDecideIfTable(PIX      *pixs,
2045
                 BOX      *box,
2046
                 l_int32   orient,
2047
                 l_int32  *pscore,
2048
                 PIXA     *pixadb)
2049
3.19k
{
2050
3.19k
l_int32  empty, nhb, nvb, nvw, score, htfound;
2051
3.19k
PIX     *pix1, *pix2, *pix3, *pix4, *pix5, *pix6, *pix7, *pix8, *pix9;
2052
2053
3.19k
    if (!pscore)
2054
0
        return ERROR_INT("&score not defined", __func__, 1);
2055
3.19k
    *pscore = -1;
2056
3.19k
    if (!pixs)
2057
0
        return ERROR_INT("pixs not defined", __func__, 1);
2058
2059
        /* Check if there is an image region.  First convert to 1 bpp
2060
         * at 175 ppi.  If an image is found, assume there is no table.  */
2061
3.19k
    pix1 = pixPrepare1bpp(pixs, box, 0.1, 175);
2062
3.19k
    pix2 = pixGenerateHalftoneMask(pix1, NULL, &htfound, NULL);
2063
3.19k
    if (htfound && pixadb) pixaAddPix(pixadb, pix2, L_COPY);
2064
3.19k
    pixDestroy(&pix1);
2065
3.19k
    pixDestroy(&pix2);
2066
3.19k
    if (htfound) {
2067
46
        *pscore = 0;
2068
46
        L_INFO("pix has an image region\n", __func__);
2069
46
        return 0;
2070
46
    }
2071
2072
        /* Crop, convert to 1 bpp, 75 ppi */
2073
3.14k
    if ((pix1 = pixPrepare1bpp(pixs, box, 0.05, 75)) == NULL)
2074
488
        return ERROR_INT("pix1 not made", __func__, 1);
2075
2076
2.65k
    pixZero(pix1, &empty);
2077
2.65k
    if (empty) {
2078
97
        *pscore = 0;
2079
97
        pixDestroy(&pix1);
2080
97
        L_INFO("pix is empty\n", __func__);
2081
97
        return 0;
2082
97
    }
2083
2084
        /* The 2x2 dilation on 75 ppi makes these two approaches very similar:
2085
         * (1) pix1 = pixPrepare1bpp(..., 300);  // 300 ppi resolution
2086
         *     pix2 = pixReduceRankBinaryCascade(pix1, 1, 1, 0, 0);
2087
         * (2) pix1 = pixPrepare1bpp(..., 75);  // 75 ppi resolution
2088
         *     pix2 = pixDilateBrick(NULL, pix1, 2, 2);
2089
         * But (2) is more efficient if the input image to pixPrepare1bpp()
2090
         * is not at 300 ppi.   */
2091
2.56k
    pix2 = pixDilateBrick(NULL, pix1, 2, 2);
2092
2093
        /* Deskew both horizontally and vertically; rotate by 90
2094
         * degrees if in landscape mode. */
2095
2.56k
    pix3 = pixDeskewBoth(pix2, 1);
2096
2.56k
    if (pixadb) {
2097
2.56k
        pixaAddPix(pixadb, pix2, L_COPY);
2098
2.56k
        pixaAddPix(pixadb, pix3, L_COPY);
2099
2.56k
    }
2100
2.56k
    if (orient == L_LANDSCAPE_MODE)
2101
0
        pix4 = pixRotate90(pix3, 1);
2102
2.56k
    else
2103
2.56k
        pix4 = pixClone(pix3);
2104
2.56k
    pixDestroy(&pix1);
2105
2.56k
    pixDestroy(&pix2);
2106
2.56k
    pixDestroy(&pix3);
2107
2.56k
    pix1 = pixClone(pix4);
2108
2.56k
    pixDestroy(&pix4);
2109
2110
        /* Look for horizontal and vertical lines */
2111
2.56k
    pix2 = pixMorphSequence(pix1, "o100.1 + c1.4", 0);
2112
2.56k
    pix3 = pixSeedfillBinary(NULL, pix2, pix1, 8);
2113
2.56k
    pix4 = pixMorphSequence(pix1, "o1.100 + c4.1", 0);
2114
2.56k
    pix5 = pixSeedfillBinary(NULL, pix4, pix1, 8);
2115
2.56k
    pix6 = pixOr(NULL, pix3, pix5);
2116
2.56k
    if (pixadb) {
2117
2.56k
        pixaAddPix(pixadb, pix2, L_COPY);
2118
2.56k
        pixaAddPix(pixadb, pix4, L_COPY);
2119
2.56k
        pixaAddPix(pixadb, pix3, L_COPY);
2120
2.56k
        pixaAddPix(pixadb, pix5, L_COPY);
2121
2.56k
        pixaAddPix(pixadb, pix6, L_COPY);
2122
2.56k
    }
2123
2.56k
    pixCountConnComp(pix2, 8, &nhb);  /* number of horizontal black lines */
2124
2.56k
    pixCountConnComp(pix4, 8, &nvb);  /* number of vertical black lines */
2125
2126
        /* Remove the lines */
2127
2.56k
    pixSubtract(pix1, pix1, pix6);
2128
2.56k
    if (pixadb) pixaAddPix(pixadb, pix1, L_COPY);
2129
2130
        /* Remove noise pixels */
2131
2.56k
    pix7 = pixMorphSequence(pix1, "c4.1 + o8.1", 0);
2132
2.56k
    if (pixadb) pixaAddPix(pixadb, pix7, L_COPY);
2133
2134
        /* Look for vertical white space.  Invert to convert white bg
2135
         * to fg.  Use a single rank-1 2x reduction, which closes small
2136
         * fg holes, for the final processing at 37.5 ppi.
2137
         * The vertical opening is then about 3 inches on a 300 ppi image.
2138
         * We also remove vertical whitespace that is less than 5 pixels
2139
         * wide at this resolution (about 0.1 inches) */
2140
2.56k
    pixInvert(pix7, pix7);
2141
2.56k
    pix8 = pixMorphSequence(pix7, "r1 + o1.100", 0);
2142
2.56k
    pix9 = pixSelectBySize(pix8, 5, 0, 8, L_SELECT_WIDTH,
2143
2.56k
                           L_SELECT_IF_GTE, NULL);
2144
2.56k
    pixCountConnComp(pix9, 8, &nvw);  /* number of vertical white lines */
2145
2.56k
    if (pixadb) {
2146
2.56k
        pixaAddPix(pixadb, pixScale(pix8, 2.0, 2.0), L_INSERT);
2147
2.56k
        pixaAddPix(pixadb, pixScale(pix9, 2.0, 2.0), L_INSERT);
2148
2.56k
    }
2149
2150
        /* Require at least 2 of the following 4 conditions for a table.
2151
         * Some tables do not have black (fg) lines, and for those we
2152
         * require more than 6 long vertical whitespace (bg) lines.  */
2153
2.56k
    score = 0;
2154
2.56k
    if (nhb > 1) score++;
2155
2.56k
    if (nvb > 2) score++;
2156
2.56k
    if (nvw > 3) score++;
2157
2.56k
    if (nvw > 6) score++;
2158
2.56k
    *pscore = score;
2159
2160
2.56k
    pixDestroy(&pix1);
2161
2.56k
    pixDestroy(&pix2);
2162
2.56k
    pixDestroy(&pix3);
2163
2.56k
    pixDestroy(&pix4);
2164
2.56k
    pixDestroy(&pix5);
2165
2.56k
    pixDestroy(&pix6);
2166
2.56k
    pixDestroy(&pix7);
2167
2.56k
    pixDestroy(&pix8);
2168
2.56k
    pixDestroy(&pix9);
2169
2.56k
    return 0;
2170
2.65k
}
2171
2172
2173
/*!
2174
 * \brief   pixPrepare1bpp()
2175
 *
2176
 * \param[in]    pixs       any depth
2177
 * \param[in]    box        [optional] if null, use entire pixs
2178
 * \param[in]    cropfract  fraction to be removed from the boundary;
2179
 *                          use 0.0 to retain the entire image
2180
 * \param[in]    outres     desired resolution of output image; if the
2181
 *                          input image resolution is not set, assume
2182
 *                          300 ppi; use 0 to skip scaling.
2183
 * \return  pixd if OK, NULL on error
2184
 *
2185
 * <pre>
2186
 * Notes:
2187
 *      (1) This handles some common pre-processing operations,
2188
 *          where the page segmentation algorithm takes a 1 bpp image.
2189
 * </pre>
2190
 */
2191
PIX *
2192
pixPrepare1bpp(PIX       *pixs,
2193
               BOX       *box,
2194
               l_float32  cropfract,
2195
               l_int32    outres)
2196
6.33k
{
2197
6.33k
l_int32    w, h, res;
2198
6.33k
l_float32  factor;
2199
6.33k
BOX       *box1;
2200
6.33k
PIX       *pix1, *pix2, *pix3, *pix4, *pix5;
2201
2202
6.33k
    if (!pixs)
2203
0
        return (PIX *)ERROR_PTR("pixs not defined", __func__, NULL);
2204
2205
        /* Crop the image.  If no box is given, use %cropfract to remove
2206
         * pixels near the image boundary; this helps avoid false
2207
         * negatives from noise that is often found there. */
2208
6.33k
    if (box) {
2209
0
        pix1 = pixClipRectangle(pixs, box, NULL);
2210
6.33k
    } else {
2211
6.33k
        pixGetDimensions(pixs, &w, &h, NULL);
2212
6.33k
        box1 = boxCreate((l_int32)(cropfract * w), (l_int32)(cropfract * h),
2213
6.33k
                         (l_int32)((1.0 - 2 * cropfract) * w),
2214
6.33k
                         (l_int32)((1.0 - 2 * cropfract) * h));
2215
6.33k
        pix1 = pixClipRectangle(pixs, box1, NULL);
2216
6.33k
        boxDestroy(&box1);
2217
6.33k
    }
2218
2219
        /* Convert to 1 bpp with adaptive background cleaning */
2220
6.33k
    if (pixGetDepth(pixs) > 1) {
2221
737
        pix2 = pixConvertTo8(pix1, 0);
2222
737
        pix3 = pixCleanBackgroundToWhite(pix2, NULL, NULL, 1.0, 70, 160);
2223
737
        pixDestroy(&pix1);
2224
737
        pixDestroy(&pix2);
2225
737
        if (!pix3) {
2226
46
            L_INFO("pix cleaning failed\n", __func__);
2227
46
            return NULL;
2228
46
        }
2229
691
        pix4 = pixThresholdToBinary(pix3, 200);
2230
691
        pixDestroy(&pix3);
2231
5.60k
    } else {
2232
5.60k
        pix4 = pixClone(pix1);
2233
5.60k
        pixDestroy(&pix1);
2234
5.60k
    }
2235
2236
        /* Scale the image to the requested output resolution;
2237
           do not scale if %outres <= 0 */
2238
6.29k
    if (outres <= 0)
2239
0
        return pix4;
2240
6.29k
    if ((res = pixGetXRes(pixs)) == 0) {
2241
6.29k
        L_WARNING("Resolution is not set: using 300 ppi\n", __func__);
2242
6.29k
        res = 300;
2243
6.29k
    }
2244
6.29k
    if (res != outres) {
2245
6.29k
        factor = (l_float32)outres / (l_float32)res;
2246
6.29k
        pix5 = pixScale(pix4, factor, factor);
2247
6.29k
    } else {
2248
0
        pix5 = pixClone(pix4);
2249
0
    }
2250
6.29k
    pixDestroy(&pix4);
2251
6.29k
    return pix5;
2252
6.29k
}
2253
2254
2255
/*------------------------------------------------------------------*
2256
 *               Estimate the grayscale background value            *
2257
 *------------------------------------------------------------------*/
2258
/*!
2259
 * \brief   pixEstimateBackground()
2260
 *
2261
 * \param[in]    pixs         8 bpp, with or without colormap
2262
 * \param[in]    darkthresh   pixels below this value are never considered
2263
 *                            part of the background; typ. 70; use 0 to skip
2264
 * \param[in]    edgecrop     fraction of half-width on each side, and of
2265
 *                            half-height at top and bottom, that are cropped
2266
 * \param[out]   pbg          estimated background, or 0 on error
2267
 * \return  0 if OK, 1 on error
2268
 *
2269
 * <pre>
2270
 * Notes:
2271
 *      (1) Caller should check that return bg value is > 0.
2272
 * </pre>
2273
 */
2274
l_ok
2275
pixEstimateBackground(PIX       *pixs,
2276
                      l_int32    darkthresh,
2277
                      l_float32  edgecrop,
2278
                      l_int32   *pbg)
2279
0
{
2280
0
l_int32    w, h, sampling;
2281
0
l_float32  fbg;
2282
0
BOX       *box;
2283
0
PIX       *pix1, *pix2, *pixm;
2284
2285
0
    if (!pbg)
2286
0
        return ERROR_INT("&bg not defined", __func__, 1);
2287
0
    *pbg = 0;
2288
0
    if (!pixs || pixGetDepth(pixs) != 8)
2289
0
        return ERROR_INT("pixs not defined or not 8 bpp", __func__, 1);
2290
0
    if (darkthresh > 128)
2291
0
        L_WARNING("darkthresh unusually large\n", __func__);
2292
0
    if (edgecrop < 0.0 || edgecrop >= 1.0)
2293
0
        return ERROR_INT("edgecrop not in [0.0 ... 1.0)", __func__, 1);
2294
2295
0
    pix1 = pixRemoveColormap(pixs, REMOVE_CMAP_TO_GRAYSCALE);
2296
0
    pixGetDimensions(pix1, &w, &h, NULL);
2297
2298
        /* Optionally crop inner part of image */
2299
0
    if (edgecrop > 0.0) {
2300
0
        box = boxCreate(0.5 * edgecrop * w, 0.5 * edgecrop * h,
2301
0
                        (1.0 - edgecrop) * w, (1.0 - edgecrop) * h);
2302
0
        pix2 = pixClipRectangle(pix1, box, NULL);
2303
0
        boxDestroy(&box);
2304
0
    } else {
2305
0
        pix2 = pixClone(pix1);
2306
0
    }
2307
2308
        /* We will use no more than 50K samples */
2309
0
    sampling = L_MAX(1, (l_int32)sqrt((l_float64)(w * h) / 50000. + 0.5));
2310
2311
        /* Optionally make a mask over all pixels lighter than %darkthresh */
2312
0
    pixm = NULL;
2313
0
    if (darkthresh > 0) {
2314
0
        pixm = pixThresholdToBinary(pix2, darkthresh);
2315
0
        pixInvert(pixm, pixm);
2316
0
    }
2317
2318
0
    pixGetRankValueMasked(pix2, pixm, 0, 0, sampling, 0.5, &fbg, NULL);
2319
0
    *pbg = (l_int32)(fbg + 0.5);
2320
0
    pixDestroy(&pix1);
2321
0
    pixDestroy(&pix2);
2322
0
    pixDestroy(&pixm);
2323
0
    return 0;
2324
0
}
2325
2326
2327
/*---------------------------------------------------------------------*
2328
 *             Largest white or black rectangles in an image           *
2329
 *---------------------------------------------------------------------*/
2330
/*!
2331
 * \brief   pixFindLargeRectangles()
2332
 *
2333
 * \param[in]    pixs       1 bpp
2334
 * \param[in]    polarity   0 within background, 1 within foreground
2335
 * \param[in]    nrect      number of rectangles to be found
2336
 * \param[out]   pboxa      largest rectangles, sorted by decreasing area
2337
 * \param[in,out]  ppixdb   optional return output with rectangles drawn on it
2338
 * \return  0 if OK, 1 on error
2339
 *
2340
 * <pre>
2341
 * Notes:
2342
 *      (1) This does a greedy search to find the largest rectangles,
2343
 *          either black or white and without overlaps, in %pix.
2344
 *      (2) See pixFindLargestRectangle(), which is called multiple
2345
 *          times, for details.  On each call, the largest rectangle
2346
 *          found is painted, so that none of its pixels can be
2347
 *          used later, before calling it again.
2348
 *      (3) This function is surprisingly fast.  Although
2349
 *          pixFindLargestRectangle() runs at about 50 MPix/sec, when it
2350
 *          is run multiple times by pixFindLargeRectangles(), it processes
2351
 *          at 150 - 250 MPix/sec, and the time is approximately linear
2352
 *          in %nrect.  For example, for a 1 MPix image, searching for
2353
 *          the largest 50 boxes takes about 0.2 seconds.
2354
 * </pre>
2355
 */
2356
l_ok
2357
pixFindLargeRectangles(PIX          *pixs,
2358
                       l_int32       polarity,
2359
                       l_int32       nrect,
2360
                       BOXA        **pboxa,
2361
                       PIX         **ppixdb)
2362
0
{
2363
0
l_int32  i, op, bx, by, bw, bh;
2364
0
BOX     *box;
2365
0
BOXA    *boxa;
2366
0
PIX     *pix;
2367
2368
0
    if (ppixdb) *ppixdb = NULL;
2369
0
    if (!pboxa)
2370
0
        return ERROR_INT("&boxa not defined", __func__, 1);
2371
0
    *pboxa = NULL;
2372
0
    if (!pixs || pixGetDepth(pixs) != 1)
2373
0
        return ERROR_INT("pixs not defined or not 1 bpp", __func__, 1);
2374
0
    if (polarity != 0 && polarity != 1)
2375
0
        return ERROR_INT("invalid polarity", __func__, 1);
2376
0
    if (nrect > 1000) {
2377
0
        L_WARNING("large num rectangles = %d requested; using 1000\n",
2378
0
                  __func__, nrect);
2379
0
        nrect = 1000;
2380
0
    }
2381
2382
0
    pix = pixCopy(NULL, pixs);
2383
0
    boxa = boxaCreate(nrect);
2384
0
    *pboxa = boxa;
2385
2386
        /* Sequentially find largest rectangle and fill with opposite color */
2387
0
    for (i = 0; i < nrect; i++) {
2388
0
        if (pixFindLargestRectangle(pix, polarity, &box, NULL) == 1) {
2389
0
            boxDestroy(&box);
2390
0
            L_ERROR("failure in pixFindLargestRectangle\n", __func__);
2391
0
            break;
2392
0
        }
2393
0
        boxaAddBox(boxa, box, L_INSERT);
2394
0
        op = (polarity == 0) ? PIX_SET : PIX_CLR;
2395
0
        boxGetGeometry(box, &bx, &by, &bw, &bh);
2396
0
        pixRasterop(pix, bx, by, bw, bh, op, NULL, 0, 0);
2397
0
    }
2398
2399
0
    if (ppixdb)
2400
0
        *ppixdb = pixDrawBoxaRandom(pixs, boxa, 3);
2401
2402
0
    pixDestroy(&pix);
2403
0
    return 0;
2404
0
}
2405
2406
2407
/*!
2408
 * \brief   pixFindLargestRectangle()
2409
 *
2410
 * \param[in]    pixs       1 bpp
2411
 * \param[in]    polarity   0 within background, 1 within foreground
2412
 * \param[out]   pbox       largest area rectangle
2413
 * \param[in,out]  ppixdb   optional return output with rectangle drawn on it
2414
 * \return  0 if OK, 1 on error
2415
 *
2416
 * <pre>
2417
 * Notes:
2418
 *      (1) This is a simple and elegant solution to a problem in
2419
 *          computational geometry that at first appears to be quite
2420
 *          difficult: what is the largest rectangle that can be
2421
 *          placed in the image, covering only pixels of one polarity
2422
 *          (bg or fg)?  The solution is O(n), where n is the number
2423
 *          of pixels in the image, and it requires nothing more than
2424
 *          using a simple recursion relation in a single sweep of the image.
2425
 *      (2) In a sweep from UL to LR with left-to-right being the fast
2426
 *          direction, calculate the largest white rectangle at (x, y),
2427
 *          using previously calculated values at pixels #1 and #2:
2428
 *             #1:    (x, y - 1)
2429
 *             #2:    (x - 1, y)
2430
 *          We also need the most recent "black" pixels that were seen
2431
 *          in the current row and column.
2432
 *          Consider the largest area.  There are only two possibilities:
2433
 *             (a)  Min(w(1), horizdist) * (h(1) + 1)
2434
 *             (b)  Min(h(2), vertdist) * (w(2) + 1)
2435
 *          where
2436
 *             horizdist: the distance from the rightmost "black" pixel seen
2437
 *                        in the current row across to the current pixel
2438
 *             vertdist: the distance from the lowest "black" pixel seen
2439
 *                       in the current column down to the current pixel
2440
 *          and we choose the Max of (a) and (b).
2441
 *      (3) To convince yourself that these recursion relations are correct,
2442
 *          it helps to draw the maximum rectangles at #1 and #2.
2443
 *          Then for #1, you try to extend the rectangle down one line,
2444
 *          so that the height is h(1) + 1.  Do you get the full
2445
 *          width of #1, w(1)?  It depends on where the black pixels are
2446
 *          in the current row.  You know the final width is bounded by w(1)
2447
 *          and w(2) + 1, but the actual value depends on the distribution
2448
 *          of black pixels in the current row that are at a distance
2449
 *          from the current pixel that is between these limits.
2450
 *          We call that value "horizdist", and the area is then given
2451
 *          by the expression (a) above.  Using similar reasoning for #2,
2452
 *          where you attempt to extend the rectangle to the right
2453
 *          by 1 pixel, you arrive at (b).  The largest rectangle is
2454
 *          then found by taking the Max.
2455
 * </pre>
2456
 */
2457
l_ok
2458
pixFindLargestRectangle(PIX         *pixs,
2459
                        l_int32      polarity,
2460
                        BOX        **pbox,
2461
                        PIX        **ppixdb)
2462
0
{
2463
0
l_int32    i, j, w, h, d, wpls, val;
2464
0
l_int32    wp, hp, w1, w2, h1, h2, wmin, hmin, area1, area2;
2465
0
l_int32    xmax, ymax;  /* LR corner of the largest rectangle */
2466
0
l_int32    maxarea, wmax, hmax, vertdist, horizdist, prevfg;
2467
0
l_int32   *lowestfg;
2468
0
l_uint32  *datas, *lines;
2469
0
l_uint32 **linew, **lineh;
2470
0
BOX       *box;
2471
0
PIX       *pixw, *pixh;  /* keeps the width and height for the largest */
2472
                         /* rectangles whose LR corner is located there. */
2473
2474
0
    if (ppixdb) *ppixdb = NULL;
2475
0
    if (!pbox)
2476
0
        return ERROR_INT("&box not defined", __func__, 1);
2477
0
    *pbox = NULL;
2478
0
    if (!pixs)
2479
0
        return ERROR_INT("pixs not defined", __func__, 1);
2480
0
    pixGetDimensions(pixs, &w, &h, &d);
2481
0
    if (d != 1)
2482
0
        return ERROR_INT("pixs not 1 bpp", __func__, 1);
2483
0
    if (polarity != 0 && polarity != 1)
2484
0
        return ERROR_INT("invalid polarity", __func__, 1);
2485
2486
        /* Initialize lowest "fg" seen so far for each column */
2487
0
    lowestfg = (l_int32 *)LEPT_CALLOC(w, sizeof(l_int32));
2488
0
    for (i = 0; i < w; i++)
2489
0
        lowestfg[i] = -1;
2490
2491
        /* The combination (val ^ polarity) is the color for which we
2492
         * are searching for the maximum rectangle.  For polarity == 0,
2493
         * we search in the bg (white). */
2494
0
    pixw = pixCreate(w, h, 32);  /* stores width */
2495
0
    pixh = pixCreate(w, h, 32);  /* stores height */
2496
0
    linew = (l_uint32 **)pixGetLinePtrs(pixw, NULL);
2497
0
    lineh = (l_uint32 **)pixGetLinePtrs(pixh, NULL);
2498
0
    datas = pixGetData(pixs);
2499
0
    wpls = pixGetWpl(pixs);
2500
0
    maxarea = xmax = ymax = wmax = hmax = 0;
2501
0
    for (i = 0; i < h; i++) {
2502
0
        lines = datas + i * wpls;
2503
0
        prevfg = -1;
2504
0
        for (j = 0; j < w; j++) {
2505
0
            val = GET_DATA_BIT(lines, j);
2506
0
            if ((val ^ polarity) == 0) {  /* bg (0) if polarity == 0, etc. */
2507
0
                if (i == 0 && j == 0) {
2508
0
                    wp = hp = 1;
2509
0
                } else if (i == 0) {
2510
0
                    wp = linew[i][j - 1] + 1;
2511
0
                    hp = 1;
2512
0
                } else if (j == 0) {
2513
0
                    wp = 1;
2514
0
                    hp = lineh[i - 1][j] + 1;
2515
0
                } else {
2516
                        /* Expand #1 prev rectangle down */
2517
0
                    w1 = linew[i - 1][j];
2518
0
                    h1 = lineh[i - 1][j];
2519
0
                    horizdist = j - prevfg;
2520
0
                    wmin = L_MIN(w1, horizdist);  /* width of new rectangle */
2521
0
                    area1 = wmin * (h1 + 1);
2522
2523
                        /* Expand #2 prev rectangle to right */
2524
0
                    w2 = linew[i][j - 1];
2525
0
                    h2 = lineh[i][j - 1];
2526
0
                    vertdist = i - lowestfg[j];
2527
0
                    hmin = L_MIN(h2, vertdist);  /* height of new rectangle */
2528
0
                    area2 = hmin * (w2 + 1);
2529
2530
0
                    if (area1 > area2) {
2531
0
                         wp = wmin;
2532
0
                         hp = h1 + 1;
2533
0
                    } else {
2534
0
                         wp = w2 + 1;
2535
0
                         hp = hmin;
2536
0
                    }
2537
0
                }
2538
0
            } else {  /* fg (1) if polarity == 0; bg (0) if polarity == 1 */
2539
0
                prevfg = j;
2540
0
                lowestfg[j] = i;
2541
0
                wp = hp = 0;
2542
0
            }
2543
0
            linew[i][j] = wp;
2544
0
            lineh[i][j] = hp;
2545
0
            if (wp * hp > maxarea) {
2546
0
                maxarea = wp * hp;
2547
0
                xmax = j;
2548
0
                ymax = i;
2549
0
                wmax = wp;
2550
0
                hmax = hp;
2551
0
            }
2552
0
        }
2553
0
    }
2554
2555
        /* Translate from LR corner to Box coords (UL corner, w, h) */
2556
0
    box = boxCreate(xmax - wmax + 1, ymax - hmax + 1, wmax, hmax);
2557
0
    *pbox = box;
2558
2559
0
    if (ppixdb) {
2560
0
        *ppixdb = pixConvertTo8(pixs, TRUE);
2561
0
        pixRenderHashBoxArb(*ppixdb, box, 6, 2, L_NEG_SLOPE_LINE, 1, 255, 0, 0);
2562
0
    }
2563
2564
0
    LEPT_FREE(linew);
2565
0
    LEPT_FREE(lineh);
2566
0
    LEPT_FREE(lowestfg);
2567
0
    pixDestroy(&pixw);
2568
0
    pixDestroy(&pixh);
2569
0
    return 0;
2570
0
}
2571
2572
2573
/*---------------------------------------------------------------------*
2574
 *            Generate rectangle inside connected component            *
2575
 *---------------------------------------------------------------------*/
2576
/*!
2577
 * \brief   pixFindRectangleInCC()
2578
 *
2579
 * \param[in]    pixs     1 bpp, with sufficient closings to make the fg be
2580
 *                        a single c.c. that is a convex hull
2581
 * \param[in]    boxs     [optional] if NULL, %pixs should be a minimum
2582
 *                        container of a single c.c.
2583
 * \param[in]    fract    first and all consecutive lines found must be at
2584
 *                        least this fraction of the fast scan dimension
2585
 * \param[in]    dir      L_SCAN_HORIZONTAL, L_SCAN_VERTICAL; direction of
2586
 *                        fast scan
2587
 * \param[in]    select   L_GEOMETRIC_UNION, L_GEOMETRIC_INTERSECTION,
2588
 *                        L_LARGEST_AREA, L_SMALEST_AREA
2589
 * \param[in]    debug    if 1, generates output pdf showing intermediate
2590
 *                        computation and final result
2591
 * \return  box  of included rectangle, or NULL on error
2592
 *
2593
 * <pre>
2594
 * Notes:
2595
 *      (1) Computation is similar to pixFindLargestRectangle(), but allows
2596
 *          a different set of results to choose from.
2597
 *      (2) Select the fast scan direction.  Then, scanning in the slow
2598
 *          direction, find the longest run of ON pixels in the fast
2599
 *          scan direction and look for the first run that is longer
2600
 *          than %fract of the dimension.  Continue until a shorter run
2601
 *          is found.  This generates a box of ON pixels fitting into the c.c.
2602
 *      (3) Do this from both slow scan directions and use %select to get
2603
 *          a resulting box from these two.
2604
 *      (4) The extracted rectangle is not necessarily the largest that
2605
 *          can fit in the c.c.  To get that, use pixFindLargestRectangle().
2606
 */
2607
BOX *
2608
pixFindRectangleInCC(PIX       *pixs,
2609
                     BOX       *boxs,
2610
                     l_float32  fract,
2611
                     l_int32    dir,
2612
                     l_int32    select,
2613
                     l_int32    debug)
2614
0
{
2615
0
l_int32  x, y, i, w, h, w1, h1, w2, h2, found, res;
2616
0
l_int32  xfirst, xlast, xstart, yfirst, ylast, length;
2617
0
BOX     *box1, *box2, *box3, *box4, *box5;
2618
0
PIX     *pix1, *pix2, *pixdb1, *pixdb2;
2619
0
PIXA    *pixadb;
2620
2621
0
    if (!pixs || pixGetDepth(pixs) != 1)
2622
0
        return (BOX *)ERROR_PTR("pixs undefined or not 1 bpp", __func__, NULL);
2623
0
    if (fract <= 0.0 || fract > 1.0)
2624
0
        return (BOX *)ERROR_PTR("invalid fraction", __func__, NULL);
2625
0
    if (dir != L_SCAN_VERTICAL && dir != L_SCAN_HORIZONTAL)
2626
0
        return (BOX *)ERROR_PTR("invalid scan direction", __func__, NULL);
2627
0
    if (select != L_GEOMETRIC_UNION && select != L_GEOMETRIC_INTERSECTION &&
2628
0
        select != L_LARGEST_AREA && select != L_SMALLEST_AREA)
2629
0
        return (BOX *)ERROR_PTR("invalid select", __func__, NULL);
2630
2631
        /* Extract the c.c. if necessary */
2632
0
    x = y = 0;
2633
0
    if (boxs) {
2634
0
        pix1 = pixClipRectangle(pixs, boxs, NULL);
2635
0
        boxGetGeometry(boxs, &x, &y, NULL, NULL);
2636
0
    } else {
2637
0
        pix1 = pixClone(pixs);
2638
0
    }
2639
2640
        /* All fast scans are horizontal; rotate 90 deg cw if necessary */
2641
0
    if (dir == L_SCAN_VERTICAL)
2642
0
        pix2 = pixRotate90(pix1, 1);
2643
0
    else  /* L_SCAN_HORIZONTAL */
2644
0
        pix2 = pixClone(pix1);
2645
0
    pixGetDimensions(pix2, &w, &h, NULL);
2646
2647
0
    pixadb = (debug) ? pixaCreate(0) : NULL;
2648
0
    pixdb1 = NULL;
2649
0
    if (pixadb) {
2650
0
        lept_mkdir("lept/rect");
2651
0
        pixaAddPix(pixadb, pix1, L_CLONE);
2652
0
        pixdb1 = pixConvertTo32(pix2);
2653
0
    }
2654
0
    pixDestroy(&pix1);
2655
2656
        /* Scanning down, find the first scanline with a long enough run.
2657
         * That run goes from (xfirst, yfirst) to (xlast, yfirst).  */
2658
0
    found = FALSE;
2659
0
    for (i = 0; i < h; i++) {
2660
0
        pixFindMaxHorizontalRunOnLine(pix2, i, &xstart, &length);
2661
0
        if (length >= (l_int32)(fract * w + 0.5)) {
2662
0
            yfirst = i;
2663
0
            xfirst = xstart;
2664
0
            xlast = xfirst + length - 1;
2665
0
            found = TRUE;
2666
0
            break;
2667
0
        }
2668
0
    }
2669
0
    if (!found) {
2670
0
        L_WARNING("no run of sufficient size was found\n", __func__);
2671
0
        pixDestroy(&pix2);
2672
0
        pixDestroy(&pixdb1);
2673
0
        pixaDestroy(&pixadb);
2674
0
        return NULL;
2675
0
    }
2676
2677
         /* Continue down until the condition fails */
2678
0
    w1 = xlast - xfirst + 1;
2679
0
    h1 = h - yfirst;  /* init */
2680
0
    ylast = h - 1;  /* init */
2681
0
    for (i = yfirst + 1; i < h; i++) {
2682
0
        pixFindMaxHorizontalRunOnLine(pix2, i, &xstart, &length);
2683
0
        if (xstart > xfirst || (xstart + length - 1 < xlast) ||
2684
0
            i == h - 1) {
2685
0
            ylast = i - 1;
2686
0
            h1 = ylast - yfirst + 1;
2687
0
            break;
2688
0
        }
2689
0
    }
2690
0
    box1 = boxCreate(xfirst, yfirst, w1, h1);
2691
2692
        /* Scanning up, find the first scanline with a long enough run.
2693
         * That run goes from (xfirst, ylast) to (xlast, ylast).  */
2694
0
    for (i = h - 1; i >= 0; i--) {
2695
0
        pixFindMaxHorizontalRunOnLine(pix2, i, &xstart, &length);
2696
0
        if (length >= (l_int32)(fract * w + 0.5)) {
2697
0
            ylast = i;
2698
0
            xfirst = xstart;
2699
0
            xlast = xfirst + length - 1;
2700
0
            break;
2701
0
        }
2702
0
    }
2703
2704
         /* Continue up until the condition fails */
2705
0
    w2 = xlast - xfirst + 1;
2706
0
    h2 = ylast + 1;  /* initialize */
2707
0
    for (i = ylast - 1; i >= 0; i--) {
2708
0
        pixFindMaxHorizontalRunOnLine(pix2, i, &xstart, &length);
2709
0
        if (xstart > xfirst || (xstart + length - 1 < xlast) ||
2710
0
            i == 0) {
2711
0
            yfirst = i + 1;
2712
0
            h2 = ylast - yfirst + 1;
2713
0
            break;
2714
0
        }
2715
0
    }
2716
0
    box2 = boxCreate(xfirst, yfirst, w2, h2);
2717
0
    pixDestroy(&pix2);
2718
2719
0
    if (pixadb) {
2720
0
        pixRenderBoxArb(pixdb1, box1, 2, 255, 0, 0);
2721
0
        pixRenderBoxArb(pixdb1, box2, 2, 0, 255, 0);
2722
0
        pixaAddPix(pixadb, pixdb1, L_INSERT);
2723
0
    }
2724
2725
        /* Select the final result from the two boxes */
2726
0
    if (select == L_GEOMETRIC_UNION)
2727
0
        box3 = boxBoundingRegion(box1, box2);
2728
0
    else if (select == L_GEOMETRIC_INTERSECTION)
2729
0
        box3 = boxOverlapRegion(box1, box2);
2730
0
    else if (select == L_LARGEST_AREA)
2731
0
        box3 = (w1 * h1 >= w2 * h2) ? boxCopy(box1) : boxCopy(box2);
2732
0
    else  /* select == L_SMALLEST_AREA) */
2733
0
        box3 = (w1 * h1 <= w2 * h2) ? boxCopy(box1) : boxCopy(box2);
2734
0
    boxDestroy(&box1);
2735
0
    boxDestroy(&box2);
2736
2737
        /* Rotate the box 90 degrees ccw if necessary */
2738
0
    box4 = NULL;
2739
0
    if (box3) {
2740
0
        if (dir == L_SCAN_VERTICAL)
2741
0
            box4 = boxRotateOrth(box3, w, h, 3);
2742
0
        else
2743
0
            box4 = boxCopy(box3);
2744
0
    }
2745
2746
        /* Transform back to global coordinates if %boxs exists */
2747
0
    box5 = (box4) ? boxTransform(box4, x, y, 1.0, 1.0) : NULL;
2748
0
    boxDestroy(&box3);
2749
0
    boxDestroy(&box4);
2750
2751
        /* Debug output */
2752
0
    if (pixadb) {
2753
0
        pixdb1 = pixConvertTo8(pixs, 0);
2754
0
        pixAddConstantGray(pixdb1, 190);
2755
0
        pixdb2 = pixConvertTo32(pixdb1);
2756
0
        if (box5) pixRenderBoxArb(pixdb2, box5, 4, 0, 0, 255);
2757
0
        pixaAddPix(pixadb, pixdb2, L_INSERT);
2758
0
        res = pixGetXRes(pixs);
2759
0
        L_INFO("Writing debug files to /tmp/lept/rect/\n", __func__);
2760
0
        pixaConvertToPdf(pixadb, res, 1.0, L_DEFAULT_ENCODE, 75, NULL,
2761
0
                        "/tmp/lept/rect/fitrect.pdf");
2762
0
        pix1 = pixaDisplayTiledAndScaled(pixadb, 32, 800, 1, 0, 40, 2);
2763
0
        pixWrite("/tmp/lept/rect/fitrect.png", pix1, IFF_PNG);
2764
0
        pixDestroy(&pix1);
2765
0
        pixDestroy(&pixdb1);
2766
0
        pixaDestroy(&pixadb);
2767
0
    }
2768
2769
0
    return box5;
2770
0
}
2771
2772
/*------------------------------------------------------------------*
2773
 *                    Automatic photoinvert for OCR                 *
2774
 *------------------------------------------------------------------*/
2775
/*!
2776
 * \brief   pixAutoPhotoinvert()
2777
 *
2778
 * \param[in]    pixs       any depth, colormap ok
2779
 * \param[in]    thresh     binarization threshold; use 0 for default
2780
 * \param[out]   ppixm      [optional] image regions to be inverted
2781
 * \param[out]   pixadb     [optional] debug; input NULL to skip
2782
 * \return  pixd   1 bpp image to be sent to OCR, or NULL on error
2783
 *
2784
 * <pre>
2785
 * Notes:
2786
 *      (1) A 1 bpp image is returned, where pixels in image regions are
2787
 *          photo-inverted.
2788
 *      (2) If there is light text with a dark background, this will
2789
 *          identify the region and photoinvert the pixels there if
2790
 *          there are at least 60% fg pixels in the region.
2791
 *      (3) For debug output, input a (typically empty) %pixadb.
2792
 * </pre>
2793
 */
2794
PIX *
2795
pixAutoPhotoinvert(PIX       *pixs,
2796
                   l_int32    thresh,
2797
                   PIX      **ppixm,
2798
                   PIXA      *pixadb)
2799
0
{
2800
0
l_int32    i, n, empty, x, y, w, h;
2801
0
l_float32  fgfract;
2802
0
BOX       *box1;
2803
0
BOXA      *boxa1;
2804
0
PIX       *pix1, *pix2, *pix3, *pix4, *pix5;
2805
2806
0
    if (ppixm) *ppixm = NULL;
2807
0
    if (!pixs)
2808
0
        return (PIX *)ERROR_PTR("pixs not defined", __func__, NULL);
2809
0
    if (thresh == 0) thresh = 128;
2810
2811
0
    if ((pix1 = pixConvertTo1(pixs, thresh)) == NULL)
2812
0
        return (PIX *)ERROR_PTR("pix1 not made", __func__, NULL);
2813
0
    if (pixadb) pixaAddPix(pixadb, pix1, L_COPY);
2814
2815
        /* Identify regions for photo-inversion:
2816
         * (1) Start with the halftone mask.
2817
         * (2) Eliminate ordinary text and halftones in the mask.
2818
         * (3) Some regions of inverted text may have been removed in
2819
         *     steps (1) and (2).  Conditionally fill holes in the mask,
2820
         *     but do not fill out to the bounding rect. */
2821
0
    pix2 = pixGenerateHalftoneMask(pix1, NULL, NULL, pixadb);
2822
0
    pix3 = pixMorphSequence(pix2, "o15.15 + c25.25", 0);  /* remove noise */
2823
0
    pix4 = pixFillHolesToBoundingRect(pix3, 1, 0.5, 1.0);
2824
0
    if (pixadb) {
2825
0
        pixaAddPix(pixadb, pix2, L_CLONE);
2826
0
        pixaAddPix(pixadb, pix3, L_CLONE);
2827
0
        pixaAddPix(pixadb, pix4, L_COPY);
2828
0
    }
2829
0
    pixDestroy(&pix2);
2830
0
    pixDestroy(&pix3);
2831
0
    pixZero(pix4, &empty);
2832
0
    if (empty) {
2833
0
        pixDestroy(&pix4);
2834
0
        return pix1;
2835
0
    }
2836
2837
        /* Examine each component and validate the inversion.
2838
         * Require at least 60% of pixels under each component to be FG. */
2839
0
    boxa1 = pixConnCompBB(pix4, 8);
2840
0
    n = boxaGetCount(boxa1);
2841
0
    for (i = 0; i < n; i++) {
2842
0
        box1 = boxaGetBox(boxa1, i, L_COPY);
2843
0
        pix5 = pixClipRectangle(pix1, box1, NULL);
2844
0
        pixForegroundFraction(pix5, &fgfract);
2845
0
        if (pixadb) lept_stderr("fg fraction: %5.3f\n", fgfract);
2846
0
        boxGetGeometry(box1, &x, &y, &w, &h);
2847
0
        if (fgfract < 0.6)  /* erase from the mask */
2848
0
            pixRasterop(pix4, x, y, w, h, PIX_CLR, NULL, 0, 0);
2849
0
        pixDestroy(&pix5);
2850
0
        boxDestroy(&box1);
2851
0
    }
2852
0
    boxaDestroy(&boxa1);
2853
0
    pixZero(pix4, &empty);
2854
0
    if (empty) {
2855
0
        pixDestroy(&pix4);
2856
0
        return pix1;
2857
0
    }
2858
2859
        /* Combine pixels of the photo-inverted pix with the binarized input */
2860
0
    pix5 = pixInvert(NULL, pix1);
2861
0
    pixCombineMasked(pix1, pix5, pix4);
2862
2863
0
    if (pixadb) {
2864
0
        pixaAddPix(pixadb, pix5, L_CLONE);
2865
0
        pixaAddPix(pixadb, pix1, L_COPY);
2866
0
    }
2867
0
    pixDestroy(&pix5);
2868
0
    if (ppixm)
2869
0
        *ppixm = pix4;
2870
0
    else
2871
0
        pixDestroy(&pix4);
2872
0
    return pix1;
2873
0
}