Coverage Report

Created: 2025-07-12 06:44

/src/leptonica/src/pageseg.c
Line
Count
Source (jump to first uncovered line)
1
/*====================================================================*
2
 -  Copyright (C) 2001 Leptonica.  All rights reserved.
3
 -
4
 -  Redistribution and use in source and binary forms, with or without
5
 -  modification, are permitted provided that the following conditions
6
 -  are met:
7
 -  1. Redistributions of source code must retain the above copyright
8
 -     notice, this list of conditions and the following disclaimer.
9
 -  2. Redistributions in binary form must reproduce the above
10
 -     copyright notice, this list of conditions and the following
11
 -     disclaimer in the documentation and/or other materials
12
 -     provided with the distribution.
13
 -
14
 -  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15
 -  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16
 -  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17
 -  A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL ANY
18
 -  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19
 -  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20
 -  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21
 -  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22
 -  OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23
 -  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24
 -  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25
 *====================================================================*/
26
27
/*!
28
 * \file  pageseg.c
29
 * <pre>
30
 *
31
 *      Top level page segmentation
32
 *          l_int32   pixGetRegionsBinary()
33
 *
34
 *      Halftone region extraction
35
 *          PIX      *pixGenHalftoneMask()    **Deprecated wrapper**
36
 *          PIX      *pixGenerateHalftoneMask()
37
 *
38
 *      Textline extraction
39
 *          PIX      *pixGenTextlineMask()
40
 *
41
 *      Textblock extraction
42
 *          PIX      *pixGenTextblockMask()
43
 *
44
 *      Location and extraction of page foreground; cleaning pages
45
 *          PIX            *pixCropImage()
46
 *          static l_int32  pixMaxCompAfterVClosing()
47
 *          static l_int32  pixFindPageInsideBlackBorder()
48
 *          static PIX     *pixRescaleForCropping()
49
 *          PIX            *pixCleanImage()
50
 *          BOX            *pixFindPageForeground()
51
 *
52
 *      Extraction of characters from image with only text
53
 *          l_int32   pixSplitIntoCharacters()
54
 *          BOXA     *pixSplitComponentWithProfile()
55
 *
56
 *      Extraction of lines of text
57
 *          PIXA     *pixExtractTextlines()
58
 *          PIXA     *pixExtractRawTextlines()
59
 *
60
 *      How many text columns
61
 *          l_int32   pixCountTextColumns()
62
 *
63
 *      Decision: text vs photo
64
 *          l_int32   pixDecideIfText()
65
 *          l_int32   pixFindThreshFgExtent()
66
 *
67
 *      Decision: table vs text
68
 *          l_int32   pixDecideIfTable()
69
 *          Pix      *pixPrepare1bpp()
70
 *
71
 *      Estimate the grayscale background value
72
 *          l_int32   pixEstimateBackground()
73
 *
74
 *      Largest white or black rectangles in an image
75
 *          l_int32   pixFindLargeRectangles()
76
 *          l_int32   pixFindLargestRectangle()
77
 *
78
 *      Generate rectangle inside connected component
79
 *          BOX      *pixFindRectangleInCC()
80
 *
81
 *      Automatic photoinvert for OCR
82
 *          PIX      *pixAutoPhotoinvert()
83
 * </pre>
84
 */
85
86
#ifdef HAVE_CONFIG_H
87
#include <config_auto.h>
88
#endif  /* HAVE_CONFIG_H */
89
90
#include <math.h>
91
#include "allheaders.h"
92
#include "pix_internal.h"
93
94
    /* These functions are not intended to work on very low-res images */
95
static const l_int32  MinWidth = 100;
96
static const l_int32  MinHeight = 100;
97
98
static l_ok pixMaxCompAfterVClosing(PIX *pixs, BOX **pbox);
99
static l_ok pixFindPageInsideBlackBorder(PIX *pixs, BOX **pbox);
100
static PIX *pixRescaleForCropping(PIX *pixs, l_int32 w, l_int32 h,
101
                                  l_int32 lr_border, l_int32 tb_border,
102
                                  l_float32 maxwiden, PIX **ppixsc);
103
104
/*------------------------------------------------------------------*
105
 *                     Top level page segmentation                  *
106
 *------------------------------------------------------------------*/
107
/*!
108
 * \brief   pixGetRegionsBinary()
109
 *
110
 * \param[in]    pixs      1 bpp, assumed to be 300 to 400 ppi
111
 * \param[out]   ppixhm    [optional] halftone mask
112
 * \param[out]   ppixtm    [optional] textline mask
113
 * \param[out]   ppixtb    [optional] textblock mask
114
 * \param[in]    pixadb    input for collecting debug pix; use NULL to skip
115
 * \return  0 if OK, 1 on error
116
 *
117
 * <pre>
118
 * Notes:
119
 *      (1) It is best to deskew the image before segmenting.
120
 *      (2) Passing in %pixadb enables debug output.
121
 * </pre>
122
 */
123
l_ok
124
pixGetRegionsBinary(PIX   *pixs,
125
                    PIX  **ppixhm,
126
                    PIX  **ppixtm,
127
                    PIX  **ppixtb,
128
                    PIXA  *pixadb)
129
0
{
130
0
l_int32  w, h, htfound, tlfound;
131
0
PIX     *pixr, *pix1, *pix2;
132
0
PIX     *pixtext;  /* text pixels only */
133
0
PIX     *pixhm2;   /* halftone mask; 2x reduction */
134
0
PIX     *pixhm;    /* halftone mask;  */
135
0
PIX     *pixtm2;   /* textline mask; 2x reduction */
136
0
PIX     *pixtm;    /* textline mask */
137
0
PIX     *pixvws;   /* vertical white space mask */
138
0
PIX     *pixtb2;   /* textblock mask; 2x reduction */
139
0
PIX     *pixtbf2;  /* textblock mask; 2x reduction; small comps filtered */
140
0
PIX     *pixtb;    /* textblock mask */
141
142
0
    if (ppixhm) *ppixhm = NULL;
143
0
    if (ppixtm) *ppixtm = NULL;
144
0
    if (ppixtb) *ppixtb = NULL;
145
0
    if (!pixs || pixGetDepth(pixs) != 1)
146
0
        return ERROR_INT("pixs undefined or not 1 bpp", __func__, 1);
147
0
    pixGetDimensions(pixs, &w, &h, NULL);
148
0
    if (w < MinWidth || h < MinHeight) {
149
0
        L_ERROR("pix too small: w = %d, h = %d\n", __func__, w, h);
150
0
        return 1;
151
0
    }
152
153
        /* 2x reduce, to 150 -200 ppi */
154
0
    pixr = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0);
155
0
    if (pixadb) pixaAddPix(pixadb, pixr, L_COPY);
156
157
        /* Get the halftone mask */
158
0
    pixhm2 = pixGenerateHalftoneMask(pixr, &pixtext, &htfound, pixadb);
159
160
        /* Get the textline mask from the text pixels */
161
0
    pixtm2 = pixGenTextlineMask(pixtext, &pixvws, &tlfound, pixadb);
162
163
        /* Get the textblock mask from the textline mask */
164
0
    pixtb2 = pixGenTextblockMask(pixtm2, pixvws, pixadb);
165
0
    pixDestroy(&pixr);
166
0
    pixDestroy(&pixtext);
167
0
    pixDestroy(&pixvws);
168
169
        /* Remove small components from the mask, where a small
170
         * component is defined as one with both width and height < 60 */
171
0
    pixtbf2 = NULL;
172
0
    if (pixtb2) {
173
0
        pixtbf2 = pixSelectBySize(pixtb2, 60, 60, 4, L_SELECT_IF_EITHER,
174
0
                                  L_SELECT_IF_GTE, NULL);
175
0
        pixDestroy(&pixtb2);
176
0
        if (pixadb) pixaAddPix(pixadb, pixtbf2, L_COPY);
177
0
    }
178
179
        /* Expand all masks to full resolution, and do filling or
180
         * small dilations for better coverage. */
181
0
    pixhm = pixExpandReplicate(pixhm2, 2);
182
0
    pix1 = pixSeedfillBinary(NULL, pixhm, pixs, 8);
183
0
    pixOr(pixhm, pixhm, pix1);
184
0
    pixDestroy(&pixhm2);
185
0
    pixDestroy(&pix1);
186
0
    if (pixadb) pixaAddPix(pixadb, pixhm, L_COPY);
187
188
0
    pix1 = pixExpandReplicate(pixtm2, 2);
189
0
    pixtm = pixDilateBrick(NULL, pix1, 3, 3);
190
0
    pixDestroy(&pixtm2);
191
0
    pixDestroy(&pix1);
192
0
    if (pixadb) pixaAddPix(pixadb, pixtm, L_COPY);
193
194
0
    if (pixtbf2) {
195
0
        pix1 = pixExpandReplicate(pixtbf2, 2);
196
0
        pixtb = pixDilateBrick(NULL, pix1, 3, 3);
197
0
        pixDestroy(&pixtbf2);
198
0
        pixDestroy(&pix1);
199
0
        if (pixadb) pixaAddPix(pixadb, pixtb, L_COPY);
200
0
    } else {
201
0
        pixtb = pixCreateTemplate(pixs);  /* empty mask */
202
0
    }
203
204
        /* Debug: identify objects that are neither text nor halftone image */
205
0
    if (pixadb) {
206
0
        pix1 = pixSubtract(NULL, pixs, pixtm);  /* remove text pixels */
207
0
        pix2 = pixSubtract(NULL, pix1, pixhm);  /* remove halftone pixels */
208
0
        pixaAddPix(pixadb, pix2, L_INSERT);
209
0
        pixDestroy(&pix1);
210
0
    }
211
212
        /* Debug: display textline components with random colors */
213
0
    if (pixadb) {
214
0
        l_int32  w, h;
215
0
        BOXA    *boxa;
216
0
        PIXA    *pixa;
217
0
        boxa = pixConnComp(pixtm, &pixa, 8);
218
0
        pixGetDimensions(pixtm, &w, &h, NULL);
219
0
        pix1 = pixaDisplayRandomCmap(pixa, w, h);
220
0
        pixcmapResetColor(pixGetColormap(pix1), 0, 255, 255, 255);
221
0
        pixaAddPix(pixadb, pix1, L_INSERT);
222
0
        pixaDestroy(&pixa);
223
0
        boxaDestroy(&boxa);
224
0
    }
225
226
        /* Debug: identify the outlines of each textblock */
227
0
    if (pixadb) {
228
0
        PIXCMAP  *cmap;
229
0
        PTAA     *ptaa;
230
0
        ptaa = pixGetOuterBordersPtaa(pixtb);
231
0
        lept_mkdir("lept/pageseg");
232
0
        ptaaWriteDebug("/tmp/lept/pageseg/tb_outlines.ptaa", ptaa, 1);
233
0
        pix1 = pixRenderRandomCmapPtaa(pixtb, ptaa, 1, 16, 1);
234
0
        cmap = pixGetColormap(pix1);
235
0
        pixcmapResetColor(cmap, 0, 130, 130, 130);
236
0
        pixaAddPix(pixadb, pix1, L_INSERT);
237
0
        ptaaDestroy(&ptaa);
238
0
    }
239
240
        /* Debug: get b.b. for all mask components */
241
0
    if (pixadb) {
242
0
        BOXA  *bahm, *batm, *batb;
243
0
        bahm = pixConnComp(pixhm, NULL, 4);
244
0
        batm = pixConnComp(pixtm, NULL, 4);
245
0
        batb = pixConnComp(pixtb, NULL, 4);
246
0
        boxaWriteDebug("/tmp/lept/pageseg/htmask.boxa", bahm);
247
0
        boxaWriteDebug("/tmp/lept/pageseg/textmask.boxa", batm);
248
0
        boxaWriteDebug("/tmp/lept/pageseg/textblock.boxa", batb);
249
0
        boxaDestroy(&bahm);
250
0
        boxaDestroy(&batm);
251
0
        boxaDestroy(&batb);
252
0
    }
253
0
    if (pixadb) {
254
0
        pixaConvertToPdf(pixadb, 0, 1.0, 0, 0, "Debug page segmentation",
255
0
                         "/tmp/lept/pageseg/debug.pdf");
256
0
        L_INFO("Writing debug pdf to /tmp/lept/pageseg/debug.pdf\n", __func__);
257
0
    }
258
259
0
    if (ppixhm)
260
0
        *ppixhm = pixhm;
261
0
    else
262
0
        pixDestroy(&pixhm);
263
0
    if (ppixtm)
264
0
        *ppixtm = pixtm;
265
0
    else
266
0
        pixDestroy(&pixtm);
267
0
    if (ppixtb)
268
0
        *ppixtb = pixtb;
269
0
    else
270
0
        pixDestroy(&pixtb);
271
272
0
    return 0;
273
0
}
274
275
276
/*------------------------------------------------------------------*
277
 *                    Halftone region extraction                    *
278
 *------------------------------------------------------------------*/
279
/*!
280
 * \brief   pixGenHalftoneMask()
281
 *
282
 * <pre>
283
 * Deprecated:
284
 *   This wrapper avoids an ABI change with tesseract 3.0.4.
285
 *   It should be removed when we no longer need to support 3.0.4.
286
 *   The debug parameter is ignored (assumed 0).
287
 * </pre>
288
 */
289
PIX *
290
pixGenHalftoneMask(PIX      *pixs,
291
                   PIX     **ppixtext,
292
                   l_int32  *phtfound,
293
                   l_int32   debug)
294
0
{
295
0
    return pixGenerateHalftoneMask(pixs, ppixtext, phtfound, NULL);
296
0
}
297
298
299
/*!
300
 * \brief   pixGenerateHalftoneMask()
301
 *
302
 * \param[in]    pixs      1 bpp, assumed to be 150 to 200 ppi
303
 * \param[out]   ppixtext  [optional] text part of pixs
304
 * \param[out]   phtfound  [optional] 1 if the mask is not empty
305
 * \param[in]    pixadb    input for collecting debug pix; use NULL to skip
306
 * \return  pixd halftone mask, or NULL on error
307
 *
308
 * <pre>
309
 * Notes:
310
 *      (1) This is not intended to work on small thumbnails.  The
311
 *          dimensions of pixs must be at least MinWidth x MinHeight.
312
 * </pre>
313
 */
314
PIX *
315
pixGenerateHalftoneMask(PIX      *pixs,
316
                        PIX     **ppixtext,
317
                        l_int32  *phtfound,
318
                        PIXA     *pixadb)
319
0
{
320
0
l_int32  w, h, empty;
321
0
PIX     *pix1, *pix2, *pixhs, *pixhm, *pixd;
322
323
0
    if (ppixtext) *ppixtext = NULL;
324
0
    if (phtfound) *phtfound = 0;
325
0
    if (!pixs || pixGetDepth(pixs) != 1)
326
0
        return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", __func__, NULL);
327
0
    pixGetDimensions(pixs, &w, &h, NULL);
328
0
    if (w < MinWidth || h < MinHeight) {
329
0
        L_ERROR("pix too small: w = %d, h = %d\n", __func__, w, h);
330
0
        return NULL;
331
0
    }
332
333
        /* Compute seed for halftone parts at 8x reduction */
334
0
    pix1 = pixReduceRankBinaryCascade(pixs, 4, 4, 0, 0);
335
0
    pix2 = pixOpenBrick(NULL, pix1, 5, 5);
336
0
    pixhs = pixExpandReplicate(pix2, 4);  /* back to 2x reduction */
337
0
    pixDestroy(&pix1);
338
0
    pixDestroy(&pix2);
339
0
    if (pixadb) pixaAddPix(pixadb, pixhs, L_COPY);
340
341
        /* Compute mask for connected regions */
342
0
    pixhm = pixCloseSafeBrick(NULL, pixs, 4, 4);
343
0
    if (pixadb) pixaAddPix(pixadb, pixhm, L_COPY);
344
345
        /* Fill seed into mask to get halftone mask */
346
0
    pixd = pixSeedfillBinary(NULL, pixhs, pixhm, 4);
347
0
    if (pixadb) pixaAddPix(pixadb, pixd, L_COPY);
348
349
#if 0
350
    pixOpenBrick(pixd, pixd, 9, 9);
351
#endif
352
353
        /* Check if mask is empty */
354
0
    pixZero(pixd, &empty);
355
0
    if (phtfound && !empty)
356
0
        *phtfound = 1;
357
358
        /* Optionally, get all pixels that are not under the halftone mask */
359
0
    if (ppixtext) {
360
0
        if (empty)
361
0
            *ppixtext = pixCopy(NULL, pixs);
362
0
        else
363
0
            *ppixtext = pixSubtract(NULL, pixs, pixd);
364
0
        if (pixadb) pixaAddPix(pixadb, *ppixtext, L_COPY);
365
0
    }
366
367
0
    pixDestroy(&pixhs);
368
0
    pixDestroy(&pixhm);
369
0
    return pixd;
370
0
}
371
372
373
/*------------------------------------------------------------------*
374
 *                         Textline extraction                      *
375
 *------------------------------------------------------------------*/
376
/*!
377
 * \brief   pixGenTextlineMask()
378
 *
379
 * \param[in]    pixs      1 bpp, assumed to be 150 to 200 ppi
380
 * \param[out]   ppixvws   vertical whitespace mask
381
 * \param[out]   ptlfound  [optional] 1 if the mask is not empty
382
 * \param[in]    pixadb    input for collecting debug pix; use NULL to skip
383
 * \return  pixd textline mask, or NULL on error
384
 *
385
 * <pre>
386
 * Notes:
387
 *      (1) The input pixs should be deskewed.
388
 *      (2) pixs should have no halftone pixels.
389
 *      (3) This is not intended to work on small thumbnails.  The
390
 *          dimensions of pixs must be at least MinWidth x MinHeight.
391
 *      (4) Both the input image and the returned textline mask
392
 *          are at the same resolution.
393
 * </pre>
394
 */
395
PIX *
396
pixGenTextlineMask(PIX      *pixs,
397
                   PIX     **ppixvws,
398
                   l_int32  *ptlfound,
399
                   PIXA     *pixadb)
400
0
{
401
0
l_int32  w, h, empty;
402
0
PIX     *pix1, *pix2, *pixvws, *pixd;
403
404
0
    if (ptlfound) *ptlfound = 0;
405
0
    if (!ppixvws)
406
0
        return (PIX *)ERROR_PTR("&pixvws not defined", __func__, NULL);
407
0
    *ppixvws = NULL;
408
0
    if (!pixs || pixGetDepth(pixs) != 1)
409
0
        return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", __func__, NULL);
410
0
    pixGetDimensions(pixs, &w, &h, NULL);
411
0
    if (w < MinWidth || h < MinHeight) {
412
0
        L_ERROR("pix too small: w = %d, h = %d\n", __func__, w, h);
413
0
        return NULL;
414
0
    }
415
416
        /* First we need a vertical whitespace mask.  Invert the image. */
417
0
    pix1 = pixInvert(NULL, pixs);
418
419
        /* The whitespace mask will break textlines where there
420
         * is a large amount of white space below or above.
421
         * This can be prevented by identifying regions of the
422
         * inverted image that have large horizontal extent (bigger than
423
         * the separation between columns) and significant
424
         * vertical extent (bigger than the separation between
425
         * textlines), and subtracting this from the bg. */
426
0
    pix2 = pixMorphCompSequence(pix1, "o80.60", 0);
427
0
    pixSubtract(pix1, pix1, pix2);
428
0
    if (pixadb) pixaAddPix(pixadb, pix1, L_COPY);
429
0
    pixDestroy(&pix2);
430
431
        /* Identify vertical whitespace by opening the remaining bg.
432
         * o5.1 removes thin vertical bg lines and o1.200 extracts
433
         * long vertical bg lines. */
434
0
    pixvws = pixMorphCompSequence(pix1, "o5.1 + o1.200", 0);
435
0
    *ppixvws = pixvws;
436
0
    if (pixadb) pixaAddPix(pixadb, pixvws, L_COPY);
437
0
    pixDestroy(&pix1);
438
439
        /* Three steps to getting text line mask:
440
         *   (1) close the characters and words in the textlines
441
         *   (2) open the vertical whitespace corridors back up
442
         *   (3) small opening to remove noise    */
443
0
    pix1 = pixMorphSequence(pixs, "c30.1", 0);
444
0
    if (pixadb) pixaAddPix(pixadb, pix1, L_COPY);
445
0
    pixd = pixSubtract(NULL, pix1, pixvws);
446
0
    pixOpenBrick(pixd, pixd, 3, 3);
447
0
    if (pixadb) pixaAddPix(pixadb, pixd, L_COPY);
448
0
    pixDestroy(&pix1);
449
450
        /* Check if text line mask is empty */
451
0
    if (ptlfound) {
452
0
        pixZero(pixd, &empty);
453
0
        if (!empty)
454
0
            *ptlfound = 1;
455
0
    }
456
457
0
    return pixd;
458
0
}
459
460
461
/*------------------------------------------------------------------*
462
 *                       Textblock extraction                       *
463
 *------------------------------------------------------------------*/
464
/*!
465
 * \brief   pixGenTextblockMask()
466
 *
467
 * \param[in]    pixs     1 bpp, textline mask, assumed to be 150 to 200 ppi
468
 * \param[in]    pixvws   vertical white space mask
469
 * \param[in]    pixadb   input for collecting debug pix; use NULL to skip
470
 * \return  pixd textblock mask, or NULL if empty or on error
471
 *
472
 * <pre>
473
 * Notes:
474
 *      (1) Both the input masks (textline and vertical white space) and
475
 *          the returned textblock mask are at the same resolution.
476
 *      (2) This is not intended to work on small thumbnails.  The
477
 *          dimensions of pixs must be at least MinWidth x MinHeight.
478
 *      (3) The result is somewhat noisy, in that small "blocks" of
479
 *          text may be included.  These can be removed by post-processing,
480
 *          using, e.g.,
481
 *             pixSelectBySize(pix, 60, 60, 4, L_SELECT_IF_EITHER,
482
 *                             L_SELECT_IF_GTE, NULL);
483
 * </pre>
484
 */
485
PIX *
486
pixGenTextblockMask(PIX   *pixs,
487
                    PIX   *pixvws,
488
                    PIXA  *pixadb)
489
0
{
490
0
l_int32  w, h, empty;
491
0
PIX     *pix1, *pix2, *pix3, *pixd;
492
493
0
    if (!pixs || pixGetDepth(pixs) != 1)
494
0
        return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", __func__, NULL);
495
0
    pixGetDimensions(pixs, &w, &h, NULL);
496
0
    if (w < MinWidth || h < MinHeight) {
497
0
        L_ERROR("pix too small: w = %d, h = %d\n", __func__, w, h);
498
0
        return NULL;
499
0
    }
500
0
    if (!pixvws)
501
0
        return (PIX *)ERROR_PTR("pixvws not defined", __func__, NULL);
502
503
        /* Join pixels vertically to make a textblock mask */
504
0
    pix1 = pixMorphSequence(pixs, "c1.10 + o4.1", 0);
505
0
    pixZero(pix1, &empty);
506
0
    if (empty) {
507
0
        pixDestroy(&pix1);
508
0
        L_INFO("no fg pixels in textblock mask\n", __func__);
509
0
        return NULL;
510
0
    }
511
0
    if (pixadb) pixaAddPix(pixadb, pix1, L_COPY);
512
513
        /* Solidify the textblock mask and remove noise:
514
         *   (1) For each cc, close the blocks and dilate slightly
515
         *       to form a solid mask.
516
         *   (2) Small horizontal closing between components.
517
         *   (3) Open the white space between columns, again.
518
         *   (4) Remove small components. */
519
0
    pix2 = pixMorphSequenceByComponent(pix1, "c30.30 + d3.3", 8, 0, 0, NULL);
520
0
    pixCloseSafeBrick(pix2, pix2, 10, 1);
521
0
    if (pixadb) pixaAddPix(pixadb, pix2, L_COPY);
522
0
    pix3 = pixSubtract(NULL, pix2, pixvws);
523
0
    if (pixadb) pixaAddPix(pixadb, pix3, L_COPY);
524
0
    pixd = pixSelectBySize(pix3, 25, 5, 8, L_SELECT_IF_BOTH,
525
0
                            L_SELECT_IF_GTE, NULL);
526
0
    if (pixadb) pixaAddPix(pixadb, pixd, L_COPY);
527
528
0
    pixDestroy(&pix1);
529
0
    pixDestroy(&pix2);
530
0
    pixDestroy(&pix3);
531
0
    return pixd;
532
0
}
533
534
535
/*------------------------------------------------------------------*
536
 *    Location and extraction of page foreground; cleaning pages    *
537
 *------------------------------------------------------------------*/
538
/*!
539
 * \brief   pixCropImage()
540
 *
541
 * \param[in]    pixs        full resolution (any type or depth)
542
 * \param[in]    lr_clear    full res pixels cleared at left and right sides
543
 * \param[in]    tb_clear    full res pixels cleared at top and bottom sides
544
 * \param[in]    edgeclean   parameter for removing edge noise (-1 to 15)
545
 *                           default = 0 (no removal);
546
 *                           15 is maximally aggressive for random noise
547
 *                           -1 for aggressively removing side noise
548
 *                           -2 to extract page embedded in black background
549
 * \param[in]    lr_border   full res final "added" pixels on left and right
550
 * \param[in]    tb_border   full res final "added" pixels on top and bottom
551
 * \param[in]    maxwiden    max fractional horizontal stretch allowed
552
 * \param[in]    printwiden  0 to skip, 1 for 8.5x11, 2 for A4
553
 * \param[in]   *debugfile   [optional] usually is NULL
554
 * \param[out]  *pcropbox    [optional] crop box at full resolution
555
 * \return  cropped pix, or NULL on error
556
 *
557
 * <pre>
558
 * Notes:
559
 *      (1) This binarizes and crops a page image.
560
 *          (a) Binarizes if necessary and does 2x reduction.
561
 *          (b) Clears near the border by %lr_clear and %tb_clear full
562
 *              resolution pixels.  (This is done at 2x reduction.)
563
 *          (c) If %edgeclean > 0, it removes isolated sets of pixels,
564
 *              using a close/open operation of size %edgeclean + 1.
565
 *              If %edgeclean == -1, it uses a large vertical morphological
566
 *              close/open and the extraction of either the largest
567
 *              resulting connected component (or the largest two components
568
 *              if the page has 2 columns), to eliminate noise on left
569
 *              and right sides.
570
 *              If %edgeclean == -2, it extracts the page region from a
571
 *              possible exterior black surround.
572
 *          (d) Find the bounding box of remaining fg pixels and scales
573
 *              the box up 2x back to full resolution.
574
 *          (e) Crops the binarized image to the bounding box.
575
 *          (f) Slightly thickens long horizontal lines.
576
 *          (g) Rescales this image to fit within the original image,
577
 *              less lr_border on the sides and tb_border above and below.
578
 *              The rescaling is done isomorphically with a (possible)
579
 *              optional additional widening.  Suggest the additional
580
 *              widening factor not exceed 1.15.
581
 *          (h) Optionally do additional horizontal stretch if needed to
582
 *              better fill a printed page.  Default is 0 to skip; 1 to
583
 *              widen for 8.5x11 page, 2 for A4 page.
584
 *          Note that (b) - (d) are done at 2x reduction for efficiency.
585
 *      (2) Side clearing must not exceed 1/6 of the dimension on that side.
586
 *      (3) The clear and border pixel parameters must be >= 0.
587
 *      (4) The "clear" parameters act on the input image, whereas the
588
 *          "border" parameters act to give a white border to the final
589
 *          image.  They are not literally added, because the input and final
590
 *          images are the same size.  If the resulting images are to be
591
 *          printed, it is useful to have border pixel parameters of at
592
 *          least 60 at 300 ppi, to avoid losing content at the edges.
593
 *      (5) This is not intended to work on small thumbnails.  The
594
 *          dimensions of pixs must be at least MinWidth x MinHeight.
595
 *      (6) Step (f) above helps with orthographically-produced music notation,
596
 *          where the horizontal staff lines can be very thin and thus
597
 *          subject to printer alias.
598
 *      (7) With orthographically-produced (as opposed to scanned) images,
599
 *          there is no scan noise, so you should skip noise removal
600
 *          by setting %edgeclean = 0.
601
 *      (8) If you are not concerned with printing on paper, use the
602
 *          default value 0 for %printwiden.  Widening only takes place
603
 *          if the ratio h/w exceeds the specified paper size by 3%,
604
 *          and the horizontal scaling factor will not exceed 1.25.
605
 * </pre>
606
 */
607
PIX *
608
pixCropImage(PIX         *pixs,
609
             l_int32      lr_clear,
610
             l_int32      tb_clear,
611
             l_int32      edgeclean,
612
             l_int32      lr_border,
613
             l_int32      tb_border,
614
             l_float32    maxwiden,
615
             l_int32      printwiden,
616
             const char  *debugfile,
617
             BOX        **pcropbox)
618
0
{
619
0
char       cmd[64];
620
0
l_int32    w, h, val, ret;
621
0
l_float32  r1, r2;
622
0
BOX       *box1, *box2;
623
0
PIX       *pix1, *pix2, *pix3, *pix4;
624
0
PIXA      *pixa1;
625
626
0
    if (pcropbox) *pcropbox = NULL;
627
0
    if (!pixs)
628
0
        return (PIX *)ERROR_PTR("pixs not defined", __func__, NULL);
629
0
    if (edgeclean > 15) {
630
0
        L_WARNING("edgeclean > 15; setting to 15\n", __func__);
631
0
        edgeclean = 15;
632
0
    }
633
0
    if (edgeclean < -1) {
634
0
        lept_stderr("Using edgeclean = -2\n");
635
0
        edgeclean = -2;
636
0
    }
637
0
    pixGetDimensions(pixs, &w, &h, NULL);
638
0
    if (w < MinWidth || h < MinHeight) {
639
0
        L_ERROR("pix too small: w = %d, h = %d\n", __func__, w, h);
640
0
        return NULL;
641
0
    }
642
0
    if (lr_clear < 0) lr_clear = 0;
643
0
    if (tb_clear < 0) tb_clear = 0;
644
0
    if (lr_border < 0) lr_border = 0;
645
0
    if (tb_border < 0) tb_border = 0;
646
0
    if (lr_clear > w / 6 || tb_clear > h / 6) {
647
0
        L_ERROR("lr_clear or tb_clear too large; must be <= %d and %d\n",
648
0
                __func__, w / 6, h / 6);
649
0
        return NULL;
650
0
    }
651
0
    if (maxwiden > 1.15)
652
0
        L_WARNING("maxwiden = %f > 1.15; suggest between 1.0 and 1.15\n",
653
0
                  __func__, maxwiden);
654
0
    if (printwiden < 0 || printwiden > 2) printwiden = 0;
655
0
    pixa1 = (debugfile) ? pixaCreate(5) : NULL;
656
0
    if (pixa1) pixaAddPix(pixa1, pixs, L_COPY);
657
658
        /* Binarize if necessary and 2x reduction */
659
0
    pix1 = pixBackgroundNormTo1MinMax(pixs, 1, 1);
660
0
    pix2 = pixReduceRankBinary2(pix1, 2, NULL);
661
662
        /* Clear out pixels near the image edges */
663
0
    pixSetOrClearBorder(pix2, lr_clear / 2, lr_clear / 2, tb_clear / 2,
664
0
                        tb_clear / 2, PIX_CLR);
665
0
    if (pixa1) pixaAddPix(pixa1, pixScale(pix2, 2.0, 2.0), L_INSERT);
666
667
        /* Choose one of three methods for extracting foreground pixels:
668
         * (1) Include all foreground pixels
669
         * (2) Do a morphological close/open to remove noise throughout
670
         *     the image before finding a b.b. for remaining f.g. pixels
671
         * (3) Do a large vertical closing and choose the largest (by area)
672
         *     component to avoid foreground noise on left and right sides */
673
0
    if (edgeclean == 0) {
674
0
        ret = pixClipToForeground(pix2, NULL, &box1);
675
0
    } else if (edgeclean > 0) {
676
0
        val = edgeclean + 1;
677
0
        snprintf(cmd, 64, "c%d.%d + o%d.%d", val, val, val, val);
678
0
        pix3 = pixMorphSequence(pix2, cmd, 0);
679
0
        ret = pixClipToForeground(pix3, NULL, &box1);
680
0
        pixDestroy(&pix3);
681
0
    } else if (edgeclean == -1) {
682
0
        ret = pixMaxCompAfterVClosing(pix2, &box1);
683
0
    } else {  /* edgeclean == -2 */
684
0
        ret = pixFindPageInsideBlackBorder(pix2, &box1);
685
0
    }
686
0
    pixDestroy(&pix2);
687
0
    if (ret) {
688
0
        L_ERROR("no returned b.b. for foreground\n", __func__);
689
0
        boxDestroy(&box1);
690
0
        pixDestroy(&pix1);
691
0
        pixaDestroy(&pixa1);
692
0
        return NULL;
693
0
    }
694
695
        /* Transform to full resolution */
696
0
    box2 = boxTransform(box1, 0, 0, 2.0, 2.0);  /* full res */
697
0
    boxDestroy(&box1);
698
0
    if (pixa1) {
699
0
        pix2 = pixCopy(NULL, pix1);
700
0
        pixRenderBoxArb(pix2, box2, 5, 255, 0, 0);
701
0
        pixaAddPix(pixa1, pix2, L_INSERT);
702
0
    }
703
704
        /* Grab the foreground region */
705
0
    pix2 = pixClipRectangle(pix1, box2, NULL);
706
0
    pixDestroy(&pix1);
707
708
        /* Slightly thicken long horizontal lines.  This prevents loss of
709
         * printed thin music staff lines due to aliasing. */
710
0
    pix3 = pixMorphSequence(pix2, "o80.1 + d1.2", 0);
711
0
    pixOr(pix2, pix2, pix3);
712
0
    pixDestroy(&pix3);
713
714
        /* Rescale the fg and paste into the input-sized image */
715
0
    pix3 = pixRescaleForCropping(pix2,  w, h, lr_border, tb_border,
716
0
                                 maxwiden, NULL);
717
0
    pixDestroy(&pix2);
718
0
    if (pixa1) {
719
0
        pix2 = pixCopy(NULL, pix3);
720
0
        pixaAddPix(pixa1, pix2, L_INSERT);
721
0
    }
722
723
        /* Optionally widen image if possible, for printing on 8.5 x 11 inch
724
         * or A4 paper.  Specifically, widen the image if the h/w asperity
725
         * ratio of the input image exceeds that of the selected paper by
726
         * more than 3%.  Do not widen by more than 20%.  */
727
0
    r1 = (l_float32)h / (l_float32)w;
728
0
    r2 = 0.0;  /* for default case */
729
0
    if (printwiden == 1)  /* standard */
730
0
        r2 = r1 / 1.294;
731
0
    else if (printwiden == 2)  /* A4 */
732
0
        r2 = r1 / 1.414;
733
0
    if (r2 > 1.03) {
734
0
        r2 = L_MIN(r2, 1.20);
735
0
        lept_stderr("oversize h/w ratio by factor %6.3f\n", r2);
736
0
        pix4 = pixScale(pix3, r2, 1.0);
737
0
    } else {
738
0
        pix4 = pixClone(pix3);
739
0
    }
740
0
    pixDestroy(&pix3);
741
742
0
    if (pcropbox)
743
0
        *pcropbox = box2;
744
0
    else
745
0
        boxDestroy(&box2);
746
0
    if (pixa1) {
747
0
       pixaAddPix(pixa1, pix4, L_COPY);
748
0
       lept_stderr("Writing debug file: %s\n", debugfile);
749
0
       pixaConvertToPdf(pixa1, 0, 1.0, L_DEFAULT_ENCODE, 0, NULL, debugfile);
750
0
       pixaDestroy(&pixa1);
751
0
    }
752
0
    return pix4;
753
0
}
754
755
756
/*!
757
 * \brief   pixMaxCompAfterVClosing()
758
 *
759
 * \param[in]    pixs        1 bpp (input at 2x reduction)
760
 * \param[out]  **pbox       main region at input resolution (2x reduction)
761
 * \return  0 if OK, 1 on error
762
 *
763
 * <pre>
764
 * Notes:
765
 *      (1) This removes foreground noise along left and right edges,
766
 *          returning a bounding box for the remaining foreground pixels
767
 *          at the input resolution.
768
 *      (2) The input %pixs should be at a resolution 100 - 150 ppi.
769
 *      (3) It does two 2x level1 rank binary reductions, followed
770
 *          by a large vertical close/open, with a very small horizontal
771
 *          close/oopen, and then a 4x expansion back to the input resolution.
772
 *      (4) To work properly with 2-column layout, if the largest and
773
 *          second-largest regions are comparable in size, both are included.
774
 *      (5) This is used as an option to pixCropImage(), when given
775
 *          an %edgecrop parameter of -1.
776
 * </pre>
777
 */
778
static l_ok
779
pixMaxCompAfterVClosing(PIX   *pixs,
780
                        BOX  **pbox)
781
0
{
782
0
l_int32  w1, h1, w2, h2, n, empty;
783
0
BOX     *box1, *box2;
784
0
BOXA    *boxa1, *boxa2;
785
0
PIX     *pix1;
786
787
0
    if (!pbox)
788
0
        return ERROR_INT("pbox not defined", __func__, 1);
789
0
    *pbox = NULL;
790
0
    if (!pixs || pixGetDepth(pixs) != 1)
791
0
        return ERROR_INT("pixs undefined or not 1 bpp", __func__, 1);
792
793
        /* Strong vertical closing */
794
0
    pix1 = pixMorphSequence(pixs, "r11 + c3.80 + o3.80 + x4", 0);
795
0
    pixZero(pix1, &empty);
796
0
    if (empty) {
797
0
        pixDestroy(&pix1);
798
0
        return ERROR_INT("pix1 is empty", __func__, 1);
799
0
    }
800
801
        /* Find the two c.c. with largest area. If they are not comparable
802
         * in area, return the bounding box of the largest; otherwise,
803
         * return the bounding box of both regions. */
804
0
    boxa1 = pixConnCompBB(pix1, 8);
805
0
    pixDestroy(&pix1);
806
0
    boxa2 = boxaSort(boxa1, L_SORT_BY_AREA, L_SORT_DECREASING, NULL);
807
0
    if ((n = boxaGetCount(boxa2)) == 1) {
808
0
        *pbox = boxaGetBox(boxa2, 0, L_COPY);
809
0
    } else {  /* 2 or more */
810
0
        box1 = boxaGetBox(boxa2, 0, L_COPY);
811
0
        box2 = boxaGetBox(boxa2, 1, L_COPY);
812
0
        boxGetGeometry(box1, NULL, NULL, &w1, &h1);
813
0
        boxGetGeometry(box2, NULL, NULL, &w2, &h2);
814
0
        if (((l_float32)(w2 * h2) / (l_float32)(w1 * h1)) > 0.7) {
815
0
            *pbox = boxBoundingRegion(box1, box2);
816
0
            boxDestroy(&box1);
817
0
        } else {
818
0
            *pbox = box1;
819
0
        }
820
0
        boxDestroy(&box2);
821
0
    }
822
0
    boxaDestroy(&boxa1);
823
0
    boxaDestroy(&boxa2);
824
0
    return 0; 
825
0
}
826
827
828
/*!
829
 * \brief   pixFindPageInsideBlackBorder()
830
 *
831
 * \param[in]    pixs        1 bpp (input at 2x reduction)
832
 * \param[out]  **pbox       page region at input resolution (2x reduction)
833
 * \return  0 if OK, 1 on error
834
 *
835
 * <pre>
836
 * Notes:
837
 *      (1) This extracts the page region from the image, returning a
838
 *          bounding box for the remaining foreground pixels.  It is designed
839
 *          to work when the page is within a fairly solid black border.
840
 *      (2) It returns a bounding box for the page region at the input res.
841
 *      (3) The input %pixs is expected to be at a resolution 100 - 150 ppi.
842
 *      (4) This is used as an option to pixCropImage(), when given an
843
 *          %edgecrop parameter of -2.
844
 * </pre>
845
 */
846
static l_ok
847
pixFindPageInsideBlackBorder(PIX   *pixs,
848
                             BOX  **pbox)
849
0
{
850
0
l_int32  empty, x, y;
851
0
BOX     *box1, *box2, *box3;
852
0
BOXA    *boxa1, *boxa2;
853
0
PIX     *pix1, *pix2, *pix3;
854
855
0
    if (!pbox)
856
0
        return ERROR_INT("pbox not defined", __func__, 1);
857
0
    *pbox = NULL;
858
0
    if (!pixs || pixGetDepth(pixs) != 1)
859
0
        return ERROR_INT("pixs undefined or not 1 bpp", __func__, 1);
860
861
        /* Reduce 4x and remove some remaining small foreground */
862
0
    pix1 = pixMorphSequence(pixs, "r22 + c5.5 + o7.7", 0);
863
0
    pixZero(pix1, &empty);
864
0
    if (empty) {
865
0
        pixDestroy(&pix1);
866
0
        return ERROR_INT("pix1 is empty", __func__, 1);
867
0
    }
868
869
        /* Photoinvert image and Find the c.c. with largest area. */
870
0
    pixInvert(pix1, pix1);
871
0
    pix2 = pixMorphSequence(pix1, "c11.11 + o11.11", 0);
872
0
    pixDestroy(&pix1);
873
0
    boxa1 = pixConnCompBB(pix2, 8);
874
0
    pixDestroy(&pix2);
875
0
    boxa2 = boxaSort(boxa1, L_SORT_BY_AREA, L_SORT_DECREASING, NULL);
876
0
    box1 = boxaGetBox(boxa2, 0, L_COPY);  /* largest by area */
877
0
    boxAdjustSides(box1, box1, 5, -5, 5, -5);
878
0
    box2 = boxTransform(box1, 0, 0, 4.0, 4.0);
879
880
        /* Crop this page from the original image and find the foreground */
881
0
    pix3 = pixClipRectangle(pixs, box2, NULL);
882
0
    pixClipToForeground(pix3, NULL, &box3);
883
0
    pixDestroy(&pix3);
884
0
    boxGetGeometry(box2, &x, &y, NULL, NULL);
885
0
    *pbox = boxTransform(box3, x, y, 1.0, 1.0);
886
0
    boxaDestroy(&boxa1);
887
0
    boxaDestroy(&boxa2);
888
0
    boxDestroy(&box1);
889
0
    boxDestroy(&box2);
890
0
    boxDestroy(&box3);
891
0
    return 0;
892
0
}
893
894
895
/*!
896
 * \brief   pixRescaleForCropping()
897
 *
898
 * \param[in]    pixs        1 bpp
899
 * \param[in]    w           width of output lmage
900
 * \param[in]    h           height of output lmage
901
 * \param[in]    lr_border   cleared final border pixels on left and right
902
 * \param[in]    tb_border   cleared final border pixels on top and bottom
903
 * \param[in]    maxwiden    max fractional horizontal stretch allowed; >= 1.0
904
 * \param[out]  *ppixsc      [optional] rescaled foreground region
905
 * \return  pixd  output image, or NULL on error
906
 *
907
 * <pre>
908
 * Notes:
909
 *      (1) This rescales %pixs to fit maximally within an image of
910
 *          size (w x h), under two conditions:
911
 *          (a) the final image has cleared border regions given by the
912
 *              input parameters %lr_border and %tb_border, and
913
 *          (b) the input image is first isotropically scaled to fit
914
 *              maximally within the allowed final region, and then further
915
 *              maxiximally widened, subject to the constraints of the
916
 *              cleared border and the %maxwiden parameter.
917
 *      (2) The cleared border pixel parameters must be >= 0.
918
 *      (3) If there is extra horizontal stretching by a factor
919
 *          %maxwiden larger than about 1.15, the appearance may be
920
 *          unpleasingly distorted; hence the suggestion not to exceed it.
921
 * </pre>
922
 */
923
static PIX *
924
pixRescaleForCropping(PIX       *pixs,
925
                      l_int32    w,
926
                      l_int32    h,
927
                      l_int32    lr_border,
928
                      l_int32    tb_border,
929
                      l_float32  maxwiden,
930
                      PIX      **ppixsc)
931
0
{
932
0
static l_int32  first_time = TRUE;
933
0
l_int32         wi, hi, wmax, hmax, wn, wf, hf, xf;
934
0
l_float32       ratio, scaleh, scalew, scalewid;
935
0
PIX            *pix1, *pixd;
936
937
0
    if (ppixsc) *ppixsc = NULL;
938
0
    if (!pixs || pixGetDepth(pixs) != 1)
939
0
        return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", __func__, NULL);
940
0
    if (lr_border < 0) lr_border = 0;
941
0
    if (tb_border < 0) tb_border = 0;
942
0
    maxwiden = L_MAX(1.0, maxwiden);
943
0
    if (maxwiden > 1.15)
944
0
        L_WARNING("maxwiden = %f > 1.15; suggest between 1.0 and 1.15\n",
945
0
                  __func__, maxwiden);
946
947
        /* Rescale the foreground region.
948
         * First, decide if scaling is to full width or full height.
949
         * If scaling to full height, determine how much additional
950
         * width widening is possible, given the maxwiden constraint.
951
         * If scaling to full width, both width and height are
952
         * scaled isotropically.  Scaling is done so that the resulting
953
         * foreground is maximally widened, so it can be horizontally
954
         * centered in an image of size (w x h), less %lr_border
955
         * on each side. */
956
0
    pixGetDimensions(pixs, &wi, &hi, NULL);
957
0
    wmax = w - 2 * lr_border;
958
0
    hmax = h - 2 * tb_border;
959
0
    ratio = (l_float32)(wmax * hi) / (l_float32)(hmax * wi);
960
0
    if (ratio >= 1.0) {  /* width can be widened after isotropic scaling */
961
0
        scaleh = (l_float32)hmax / (l_float32)hi;
962
0
        wn = scaleh * wi;  /* scaled but not widened */
963
0
        scalewid = L_MIN(maxwiden, (l_float32)wmax / (l_float32)wn);
964
0
        scalew = scaleh * scalewid;
965
0
        wf = scalew * wi;
966
0
        hf = hmax;  /* scale to full height */
967
0
        pix1 = pixScale(pixs, scalew, scaleh);
968
0
        if (first_time == TRUE) {
969
0
            lept_stderr("Width stretched by factor %5.3f\n", scalewid);
970
0
            first_time = FALSE;
971
0
        }
972
0
        xf = (w - wf) / 2.0;
973
0
    } else {  /* width cannot be widened after isotropic scaling */
974
0
        scalew = (l_float32)wmax / (l_float32)wi;
975
0
        pix1 = pixScale(pixs, scalew, scalew);
976
0
        wf = wmax;  /* scale to full width */
977
0
        hf = scalew * hi;  /* no extra vertical stretching allowed */
978
0
        xf = lr_border;
979
0
    }
980
981
        /* Paste it, horizontally centered and vertically placed as
982
         * high as allowed (by %tb_border) into the final page image. */
983
0
    pixd = pixCreate(w, h, 1);
984
0
    pixRasterop(pixd, xf, tb_border, wf, hf, PIX_SRC, pix1, 0, 0);
985
986
0
    if (ppixsc)
987
0
        *ppixsc = pix1;
988
0
    else
989
0
        pixDestroy(&pix1);
990
0
    return pixd;
991
0
}
992
993
994
/*!
995
 * \brief   pixCleanImage()
996
 *
997
 * \param[in]    pixs        full resolution (any type or depth)
998
 * \param[in]    contrast    vary contrast: 1 = lightest; 10 = darkest;
999
 *                           suggest 1 unless light features are being lost
1000
 * \param[in]    rotation    cw by 90 degrees: {0,1,2,3} represent
1001
 *                           0, 90, 180 and 270 degree cw rotations
1002
 * \param[in]    scale       1 (no scaling) or 2 (2x upscaling)
1003
 * \param[in]    opensize    opening size of structuring element for noise
1004
 *                           removal: {0 or 1 to skip; 2, 3 for opening}
1005
 * \return  cleaned pix, or NULL on error
1006
 *
1007
 * <pre>
1008
 * Notes:
1009
 *    (1) This deskews, optionally rotates and darkens, cleans background
1010
 *        to white, binarizes and optionally removes small noise.
1011
 *    (2) For color and grayscale input, local background normalization is
1012
 *        done to 200, and a threshold of 180 sets the maximum foreground
1013
 *        value in the normalized image.
1014
 *    (3) The %contrast parameter adjusts the binarization to avoid losing
1015
 *        lighter input pixels.  Contrast is increased as %contrast increases
1016
 *        from 1 to 10.
1017
 *    (4) The %scale parameter controls the thresholding to 1 bpp. Two values:
1018
 *            1 = threshold
1019
 *            2 = linear interpolated 2x upscaling before threshold.
1020
 *    (5) The #opensize parameter is the size of a square SEL used with
1021
 *        opening to remove small speckle noise.  Allowed open sizes are 2,3.
1022
 *        If this is to be used, try 2 before 3.
1023
 *    (6) This does the image processing for cleanTo1bppFilesToPdf() and
1024
 *        prog/cleanpdf.c.
1025
 * </pre>
1026
 */
1027
PIX *
1028
pixCleanImage(PIX         *pixs,
1029
              l_int32      contrast,
1030
              l_int32      rotation,
1031
              l_int32      scale,
1032
              l_int32      opensize)
1033
0
{
1034
0
char  sequence[32];
1035
0
PIX  *pix1, *pix2, *pix3, *pix4, *pix5;
1036
1037
0
    if (!pixs)
1038
0
        return (PIX *)ERROR_PTR("pixs not defined", __func__, NULL);
1039
0
    if (rotation < 0 || rotation > 3) {
1040
0
        L_ERROR("invalid rotation = %d; rotation must be in  {0,1,2,3}\n",
1041
0
                __func__, rotation);
1042
0
        return NULL;
1043
0
    }
1044
0
    if (contrast < 1 || contrast > 10) {
1045
0
        L_ERROR("invalid contrast = %d; contrast must be in [1...10]\n",
1046
0
                __func__, contrast);
1047
0
        return NULL;
1048
0
    }
1049
0
    if (scale != 1 && scale != 2) {
1050
0
        L_ERROR("invalid scale = %d; scale must be 1 or 2\n",
1051
0
                __func__, opensize);
1052
0
        return NULL;
1053
0
    }
1054
0
    if (opensize > 3) {
1055
0
        L_ERROR("invalid opensize = %d; opensize must be <= 3\n",
1056
0
                __func__, opensize);
1057
0
        return NULL;
1058
0
    }
1059
1060
0
    if (pixGetDepth(pixs) == 1) {
1061
0
        if (rotation > 0)
1062
0
            pix1 = pixRotateOrth(pixs, rotation);
1063
0
        else
1064
0
            pix1 = pixClone(pixs);
1065
0
        pix2 = pixFindSkewAndDeskew(pix1, 2, NULL, NULL);
1066
0
        if (scale == 2)
1067
0
            pix4 = pixExpandBinaryReplicate(pix2, 2, 2);
1068
0
        else  /* scale == 1 */
1069
0
            pix4 = pixClone(pix2);
1070
0
    } else {
1071
0
        pix1 = pixConvertTo8MinMax(pixs);
1072
0
        if (rotation > 0)
1073
0
            pix2 = pixRotateOrth(pix1, rotation);
1074
0
        else
1075
0
            pix2 = pixClone(pix1);
1076
0
        pix3 = pixFindSkewAndDeskew(pix2, 2, NULL, NULL);
1077
0
        pix4 = pixBackgroundNormTo1MinMax(pix3, contrast, scale);
1078
0
        pixDestroy(&pix3);
1079
0
    }
1080
1081
0
    if (opensize == 2 || opensize == 3) {
1082
0
        snprintf(sequence, sizeof(sequence), "o%d.%d", opensize, opensize);
1083
0
        pix5 = pixMorphSequence(pix4, sequence, 0);
1084
0
    } else {
1085
0
        pix5 = pixClone(pix4);
1086
0
    }
1087
1088
0
    pixDestroy(&pix1);
1089
0
    pixDestroy(&pix2);
1090
0
    pixDestroy(&pix4);
1091
0
    return pix5;
1092
0
}
1093
1094
1095
/*!
1096
 * \brief   pixFindPageForeground()
1097
 *
1098
 * \param[in]    pixs       full resolution (any type or depth)
1099
 * \param[in]    threshold  for binarization; typically about 128
1100
 * \param[in]    mindist    min distance of text from border to allow
1101
 *                          cleaning near border; at 2x reduction, this
1102
 *                          should be larger than 50; typically about 70
1103
 * \param[in]    erasedist  when conditions are satisfied, erase anything
1104
 *                          within this distance of the edge;
1105
 *                          typically 20-30 at 2x reduction
1106
 * \param[in]    showmorph  debug: set to a negative integer to show steps
1107
 *                          in generating masks; this is typically used
1108
 *                          for debugging region extraction
1109
 * \param[in]    pixac      debug: allocate outside and pass this in to
1110
 *                          accumulate results of each call to this function,
1111
 *                          which can be displayed in a mosaic or a pdf.
1112
 * \return  box region including foreground, with some pixel noise
1113
 *                   removed, or NULL if not found
1114
 *
1115
 * <pre>
1116
 * Notes:
1117
 *      (1) This doesn't simply crop to the fg.  It attempts to remove
1118
 *          pixel noise and junk at the edge of the image before cropping.
1119
 *          The input %threshold is used if pixs is not 1 bpp.
1120
 *      (2) This is not intended to work on small thumbnails.  The
1121
 *          dimensions of pixs must be at least MinWidth x MinHeight.
1122
 *      (3) Debug: set showmorph to display the intermediate image in
1123
 *          the morphological operations on this page.
1124
 *      (4) Debug: to get pdf output of results when called repeatedly,
1125
 *          call with an existing pixac, which will add an image of this page,
1126
 *          with the fg outlined.  If no foreground is found, there is
1127
 *          no output for this page image.
1128
 * </pre>
1129
 */
1130
BOX *
1131
pixFindPageForeground(PIX     *pixs,
1132
                      l_int32  threshold,
1133
                      l_int32  mindist,
1134
                      l_int32  erasedist,
1135
                      l_int32  showmorph,
1136
                      PIXAC   *pixac)
1137
0
{
1138
0
l_int32  flag, nbox, intersects;
1139
0
l_int32  w, h, bx, by, bw, bh, left, right, top, bottom;
1140
0
PIX     *pixb, *pixb2, *pixseed, *pixsf, *pixm, *pix1, *pixg2;
1141
0
BOX     *box, *boxfg, *boxin, *boxd;
1142
0
BOXA    *ba1, *ba2;
1143
1144
0
    if (!pixs)
1145
0
        return (BOX *)ERROR_PTR("pixs not defined", __func__, NULL);
1146
0
    pixGetDimensions(pixs, &w, &h, NULL);
1147
0
    if (w < MinWidth || h < MinHeight) {
1148
0
        L_ERROR("pix too small: w = %d, h = %d\n", __func__, w, h);
1149
0
        return NULL;
1150
0
    }
1151
1152
        /* Binarize, downscale by 0.5, remove the noise to generate a seed,
1153
         * and do a seedfill back from the seed into those 8-connected
1154
         * components of the binarized image for which there was at least
1155
         * one seed pixel. */
1156
0
    flag = (showmorph) ? 100 : 0;
1157
0
    pixb = pixConvertTo1(pixs, threshold);
1158
0
    pixb2 = pixScale(pixb, 0.5, 0.5);
1159
0
    pixseed = pixMorphSequence(pixb2, "o1.2 + c9.9 + o3.3", flag);
1160
0
    pix1 = pixMorphSequence(pixb2, "o50.1", 0);
1161
0
    pixOr(pixseed, pixseed, pix1);
1162
0
    pixDestroy(&pix1);
1163
0
    pix1 = pixMorphSequence(pixb2, "o1.50", 0);
1164
0
    pixOr(pixseed, pixseed, pix1);
1165
0
    pixDestroy(&pix1);
1166
0
    pixsf = pixSeedfillBinary(NULL, pixseed, pixb2, 8);
1167
0
    pixm = pixRemoveBorderConnComps(pixsf, 8);
1168
1169
        /* Now, where is the main block of text?  We want to remove noise near
1170
         * the edge of the image, but to do that, we have to be convinced that
1171
         * (1) there is noise and (2) it is far enough from the text block
1172
         * and close enough to the edge.  For each edge, if the block
1173
         * is more than mindist from that edge, then clean 'erasedist'
1174
         * pixels from the edge. */
1175
0
    pix1 = pixMorphSequence(pixm, "c50.50", flag);
1176
0
    ba1 = pixConnComp(pix1, NULL, 8);
1177
0
    ba2 = boxaSort(ba1, L_SORT_BY_AREA, L_SORT_DECREASING, NULL);
1178
0
    pixGetDimensions(pix1, &w, &h, NULL);
1179
0
    nbox = boxaGetCount(ba2);
1180
0
    if (nbox > 1) {
1181
0
        box = boxaGetBox(ba2, 0, L_CLONE);
1182
0
        boxGetGeometry(box, &bx, &by, &bw, &bh);
1183
0
        left = (bx > mindist) ? erasedist : 0;
1184
0
        right = (w - bx - bw > mindist) ? erasedist : 0;
1185
0
        top = (by > mindist) ? erasedist : 0;
1186
0
        bottom = (h - by - bh > mindist) ? erasedist : 0;
1187
0
        pixSetOrClearBorder(pixm, left, right, top, bottom, PIX_CLR);
1188
0
        boxDestroy(&box);
1189
0
    }
1190
0
    pixDestroy(&pix1);
1191
0
    boxaDestroy(&ba1);
1192
0
    boxaDestroy(&ba2);
1193
1194
        /* Locate the foreground region; don't bother cropping */
1195
0
    pixClipToForeground(pixm, NULL, &boxfg);
1196
1197
        /* Sanity check the fg region.  Make sure it's not confined
1198
         * to a thin boundary on the left and right sides of the image,
1199
         * in which case it is likely to be noise. */
1200
0
    if (boxfg) {
1201
0
        boxin = boxCreate(0.1 * w, 0, 0.8 * w, h);
1202
0
        boxIntersects(boxfg, boxin, &intersects);
1203
0
        boxDestroy(&boxin);
1204
0
        if (!intersects) boxDestroy(&boxfg);
1205
0
    }
1206
1207
0
    boxd = NULL;
1208
0
    if (boxfg) {
1209
0
        boxAdjustSides(boxfg, boxfg, -2, 2, -2, 2);  /* tiny expansion */
1210
0
        boxd = boxTransform(boxfg, 0, 0, 2.0, 2.0);
1211
1212
            /* Save the debug image showing the box for this page */
1213
0
        if (pixac) {
1214
0
            pixg2 = pixConvert1To4Cmap(pixb);
1215
0
            pixRenderBoxArb(pixg2, boxd, 3, 255, 0, 0);
1216
0
            pixacompAddPix(pixac, pixg2, IFF_DEFAULT);
1217
0
            pixDestroy(&pixg2);
1218
0
        }
1219
0
    }
1220
1221
0
    pixDestroy(&pixb);
1222
0
    pixDestroy(&pixb2);
1223
0
    pixDestroy(&pixseed);
1224
0
    pixDestroy(&pixsf);
1225
0
    pixDestroy(&pixm);
1226
0
    boxDestroy(&boxfg);
1227
0
    return boxd;
1228
0
}
1229
1230
1231
/*------------------------------------------------------------------*
1232
 *         Extraction of characters from image with only text       *
1233
 *------------------------------------------------------------------*/
1234
/*!
1235
 * \brief   pixSplitIntoCharacters()
1236
 *
1237
 * \param[in]    pixs      1 bpp, contains only deskewed text
1238
 * \param[in]    minw      min component width for initial filtering; typ. 4
1239
 * \param[in]    minh      min component height for initial filtering; typ. 4
1240
 * \param[out]   pboxa     [optional] character bounding boxes
1241
 * \param[out]   ppixa     [optional] character images
1242
 * \param[out]   ppixdebug [optional] showing splittings
1243
 *
1244
 * \return  0 if OK, 1 on error
1245
 *
1246
 * <pre>
1247
 * Notes:
1248
 *      (1) This is a simple function that attempts to find split points
1249
 *          based on vertical pixel profiles.
1250
 *      (2) It should be given an image that has an arbitrary number
1251
 *          of text characters.
1252
 *      (3) The returned pixa includes the boxes from which the
1253
 *          (possibly split) components are extracted.
1254
 * </pre>
1255
 */
1256
l_ok
1257
pixSplitIntoCharacters(PIX     *pixs,
1258
                       l_int32  minw,
1259
                       l_int32  minh,
1260
                       BOXA   **pboxa,
1261
                       PIXA   **ppixa,
1262
                       PIX    **ppixdebug)
1263
0
{
1264
0
l_int32  ncomp, i, xoff, yoff;
1265
0
BOXA   *boxa1, *boxa2, *boxat1, *boxat2, *boxad;
1266
0
BOXAA  *baa;
1267
0
PIX    *pix, *pix1, *pix2, *pixdb;
1268
0
PIXA   *pixa1, *pixadb;
1269
1270
0
    if (pboxa) *pboxa = NULL;
1271
0
    if (ppixa) *ppixa = NULL;
1272
0
    if (ppixdebug) *ppixdebug = NULL;
1273
0
    if (!pixs || pixGetDepth(pixs) != 1)
1274
0
        return ERROR_INT("pixs not defined or not 1 bpp", __func__, 1);
1275
1276
        /* Remove the small stuff */
1277
0
    pix1 = pixSelectBySize(pixs, minw, minh, 8, L_SELECT_IF_BOTH,
1278
0
                           L_SELECT_IF_GT, NULL);
1279
1280
        /* Small vertical close for consolidation */
1281
0
    pix2 = pixMorphSequence(pix1, "c1.10", 0);
1282
0
    pixDestroy(&pix1);
1283
1284
        /* Get the 8-connected components */
1285
0
    boxa1 = pixConnComp(pix2, &pixa1, 8);
1286
0
    pixDestroy(&pix2);
1287
0
    boxaDestroy(&boxa1);
1288
1289
        /* Split the components if obvious */
1290
0
    ncomp = pixaGetCount(pixa1);
1291
0
    boxa2 = boxaCreate(ncomp);
1292
0
    pixadb = (ppixdebug) ? pixaCreate(ncomp) : NULL;
1293
0
    for (i = 0; i < ncomp; i++) {
1294
0
        pix = pixaGetPix(pixa1, i, L_CLONE);
1295
0
        if (ppixdebug) {
1296
0
            boxat1 = pixSplitComponentWithProfile(pix, 10, 7, &pixdb);
1297
0
            if (pixdb)
1298
0
                pixaAddPix(pixadb, pixdb, L_INSERT);
1299
0
        } else {
1300
0
            boxat1 = pixSplitComponentWithProfile(pix, 10, 7, NULL);
1301
0
        }
1302
0
        pixaGetBoxGeometry(pixa1, i, &xoff, &yoff, NULL, NULL);
1303
0
        boxat2 = boxaTransform(boxat1, xoff, yoff, 1.0, 1.0);
1304
0
        boxaJoin(boxa2, boxat2, 0, -1);
1305
0
        pixDestroy(&pix);
1306
0
        boxaDestroy(&boxat1);
1307
0
        boxaDestroy(&boxat2);
1308
0
    }
1309
0
    pixaDestroy(&pixa1);
1310
1311
        /* Generate the debug image */
1312
0
    if (ppixdebug) {
1313
0
        if (pixaGetCount(pixadb) > 0) {
1314
0
            *ppixdebug = pixaDisplayTiledInRows(pixadb, 32, 1500,
1315
0
                                                1.0, 0, 20, 1);
1316
0
        }
1317
0
        pixaDestroy(&pixadb);
1318
0
    }
1319
1320
        /* Do a 2D sort on the bounding boxes, and flatten the result to 1D */
1321
0
    baa = boxaSort2d(boxa2, NULL, 0, 0, 5);
1322
0
    boxad = boxaaFlattenToBoxa(baa, NULL, L_CLONE);
1323
0
    boxaaDestroy(&baa);
1324
0
    boxaDestroy(&boxa2);
1325
1326
        /* Optionally extract the pieces from the input image */
1327
0
    if (ppixa)
1328
0
        *ppixa = pixClipRectangles(pixs, boxad);
1329
0
    if (pboxa)
1330
0
        *pboxa = boxad;
1331
0
    else
1332
0
        boxaDestroy(&boxad);
1333
0
    return 0;
1334
0
}
1335
1336
1337
/*!
1338
 * \brief   pixSplitComponentWithProfile()
1339
 *
1340
 * \param[in]    pixs       1 bpp, exactly one connected component
1341
 * \param[in]    delta      distance used in extrema finding in a numa; typ. 10
1342
 * \param[in]    mindel     minimum required difference between profile
1343
 *                          minimum and profile values +2 and -2 away; typ. 7
1344
 * \param[out]   ppixdebug  [optional] debug image of splitting
1345
 * \return  boxa of c.c. after splitting, or NULL on error
1346
 *
1347
 * <pre>
1348
 * Notes:
1349
 *      (1) This will split the most obvious cases of touching characters.
1350
 *          The split points it is searching for are narrow and deep
1351
 *          minimima in the vertical pixel projection profile, after a
1352
 *          large vertical closing has been applied to the component.
1353
 * </pre>
1354
 */
1355
BOXA *
1356
pixSplitComponentWithProfile(PIX     *pixs,
1357
                             l_int32  delta,
1358
                             l_int32  mindel,
1359
                             PIX    **ppixdebug)
1360
0
{
1361
0
l_int32   w, h, n2, i, firstmin, xmin, xshift;
1362
0
l_int32   nmin, nleft, nright, nsplit, isplit, ncomp;
1363
0
l_int32  *array1, *array2;
1364
0
BOX      *box;
1365
0
BOXA     *boxad;
1366
0
NUMA     *na1, *na2, *nasplit;
1367
0
PIX      *pix1, *pixdb;
1368
1369
0
    if (ppixdebug) *ppixdebug = NULL;
1370
0
    if (!pixs || pixGetDepth(pixs) != 1)
1371
0
        return (BOXA *)ERROR_PTR("pixa undefined or not 1 bpp", __func__, NULL);
1372
0
    pixGetDimensions(pixs, &w, &h, NULL);
1373
1374
        /* Closing to consolidate characters vertically */
1375
0
    pix1 = pixCloseSafeBrick(NULL, pixs, 1, 100);
1376
1377
        /* Get extrema of column projections */
1378
0
    boxad = boxaCreate(2);
1379
0
    na1 = pixCountPixelsByColumn(pix1);  /* w elements */
1380
0
    pixDestroy(&pix1);
1381
0
    na2 = numaFindExtrema(na1, delta, NULL);
1382
0
    n2 = numaGetCount(na2);
1383
0
    if (n2 < 3) {  /* no split possible */
1384
0
        box = boxCreate(0, 0, w, h);
1385
0
        boxaAddBox(boxad, box, L_INSERT);
1386
0
        numaDestroy(&na1);
1387
0
        numaDestroy(&na2);
1388
0
        return boxad;
1389
0
    }
1390
1391
        /* Look for sufficiently deep and narrow minima.
1392
         * All minima of of interest must be surrounded by max on each
1393
         * side.  firstmin is the index of first possible minimum. */
1394
0
    array1 = numaGetIArray(na1);
1395
0
    array2 = numaGetIArray(na2);
1396
0
    if (ppixdebug) numaWriteStderr(na2);
1397
0
    firstmin = (array1[array2[0]] > array1[array2[1]]) ? 1 : 2;
1398
0
    nasplit = numaCreate(n2);  /* will hold split locations */
1399
0
    for (i = firstmin; i < n2 - 1; i+= 2) {
1400
0
        xmin = array2[i];
1401
0
        nmin = array1[xmin];
1402
0
        if (xmin + 2 >= w) break;  /* no more splits possible */
1403
0
        nleft = array1[xmin - 2];
1404
0
        nright = array1[xmin + 2];
1405
0
        if (ppixdebug) {
1406
0
            lept_stderr(
1407
0
                "Splitting: xmin = %d, w = %d; nl = %d, nmin = %d, nr = %d\n",
1408
0
                xmin, w, nleft, nmin, nright);
1409
0
        }
1410
0
        if (nleft - nmin >= mindel && nright - nmin >= mindel)  /* split */
1411
0
            numaAddNumber(nasplit, xmin);
1412
0
    }
1413
0
    nsplit = numaGetCount(nasplit);
1414
1415
#if 0
1416
    if (ppixdebug && nsplit > 0) {
1417
        lept_mkdir("lept/split");
1418
        gplotSimple1(na1, GPLOT_PNG, "/tmp/lept/split/split", NULL);
1419
    }
1420
#endif
1421
1422
0
    numaDestroy(&na1);
1423
0
    numaDestroy(&na2);
1424
0
    LEPT_FREE(array1);
1425
0
    LEPT_FREE(array2);
1426
1427
0
    if (nsplit == 0) {  /* no splitting */
1428
0
        numaDestroy(&nasplit);
1429
0
        box = boxCreate(0, 0, w, h);
1430
0
        boxaAddBox(boxad, box, L_INSERT);
1431
0
        return boxad;
1432
0
    }
1433
1434
        /* Use split points to generate b.b. after splitting */
1435
0
    for (i = 0, xshift = 0; i < nsplit; i++) {
1436
0
        numaGetIValue(nasplit, i, &isplit);
1437
0
        box = boxCreate(xshift, 0, isplit - xshift, h);
1438
0
        boxaAddBox(boxad, box, L_INSERT);
1439
0
        xshift = isplit + 1;
1440
0
    }
1441
0
    box = boxCreate(xshift, 0, w - xshift, h);
1442
0
    boxaAddBox(boxad, box, L_INSERT);
1443
0
    numaDestroy(&nasplit);
1444
1445
0
    if (ppixdebug) {
1446
0
        pixdb = pixConvertTo32(pixs);
1447
0
        ncomp = boxaGetCount(boxad);
1448
0
        for (i = 0; i < ncomp; i++) {
1449
0
            box = boxaGetBox(boxad, i, L_CLONE);
1450
0
            pixRenderBoxBlend(pixdb, box, 1, 255, 0, 0, 0.5);
1451
0
            boxDestroy(&box);
1452
0
        }
1453
0
        *ppixdebug = pixdb;
1454
0
    }
1455
1456
0
    return boxad;
1457
0
}
1458
1459
1460
/*------------------------------------------------------------------*
1461
 *                    Extraction of lines of text                   *
1462
 *------------------------------------------------------------------*/
1463
/*!
1464
 * \brief   pixExtractTextlines()
1465
 *
1466
 * \param[in]    pixs        any depth, assumed to have nearly horizontal text
1467
 * \param[in]    maxw, maxh  initial filtering: remove any components in pixs
1468
 *                           with components larger than maxw or maxh
1469
 * \param[in]    minw, minh  final filtering: remove extracted 'lines'
1470
 *                           with sizes smaller than minw or minh; use
1471
 *                           0 for default.
1472
 * \param[in]    adjw, adjh  final adjustment of boxes representing each
1473
 *                           text line.  If > 0, these increase the box
1474
 *                           size at each edge by this amount.
1475
 * \param[in]    pixadb      pixa for saving intermediate steps; NULL to omit
1476
 * \return  pixa of textline images, including bounding boxes, or
1477
 *                    NULL on error
1478
 *
1479
 * <pre>
1480
 * Notes:
1481
 *      (1) This function assumes that textline fragments have sufficient
1482
 *          vertical separation and small enough skew so that a
1483
 *          horizontal dilation sufficient to join words will not join
1484
 *          textlines.  It does not guarantee that horizontally adjacent
1485
 *          textline fragments on the same line will be joined.
1486
 *      (2) For images with multiple columns, it attempts to avoid joining
1487
 *          textlines across the space between columns.  If that is not
1488
 *          a concern, you can also use pixExtractRawTextlines(),
1489
 *          which will join them with alacrity.
1490
 *      (3) This first removes components from pixs that are either
1491
 *          wide (> %maxw) or tall (> %maxh).
1492
 *      (4) A final filtering operation removes small components, such
1493
 *          that width < %minw or height < %minh.
1494
 *      (5) For reasonable accuracy, the resolution of pixs should be
1495
 *          at least 100 ppi.  For reasonable efficiency, the resolution
1496
 *          should not exceed 600 ppi.
1497
 *      (6) This can be used to determine if some region of a scanned
1498
 *          image is horizontal text.
1499
 *      (7) As an example, for a pix with resolution 300 ppi, a reasonable
1500
 *          set of parameters is:
1501
 *             pixExtractTextlines(pix, 150, 150, 36, 20, 5, 5, NULL);
1502
 *          The defaults minw and minh for 300 ppi are about 36 and 20,
1503
 *          so the same result is obtained with:
1504
 *             pixExtractTextlines(pix, 150, 150, 0, 0, 5, 5, NULL);
1505
 *      (8) The output pixa is composed of subimages, one for each textline,
1506
 *          and the boxa in the pixa tells where in %pixs each textline goes.
1507
 * </pre>
1508
 */
1509
PIXA *
1510
pixExtractTextlines(PIX     *pixs,
1511
                    l_int32  maxw,
1512
                    l_int32  maxh,
1513
                    l_int32  minw,
1514
                    l_int32  minh,
1515
                    l_int32  adjw,
1516
                    l_int32  adjh,
1517
                    PIXA    *pixadb)
1518
0
{
1519
0
char     buf[64];
1520
0
l_int32  res, csize, empty;
1521
0
BOXA    *boxa1, *boxa2, *boxa3;
1522
0
PIX     *pix1, *pix2, *pix3;
1523
0
PIXA    *pixa1, *pixa2, *pixa3;
1524
1525
0
    if (!pixs)
1526
0
        return (PIXA *)ERROR_PTR("pixs not defined", __func__, NULL);
1527
1528
        /* Binarize carefully, if necessary */
1529
0
    if (pixGetDepth(pixs) > 1) {
1530
0
        pix2 = pixConvertTo8(pixs, FALSE);
1531
0
        pix3 = pixCleanBackgroundToWhite(pix2, NULL, NULL, 1.0, 70, 190);
1532
0
        pix1 = pixThresholdToBinary(pix3, 150);
1533
0
        pixDestroy(&pix2);
1534
0
        pixDestroy(&pix3);
1535
0
    } else {
1536
0
        pix1 = pixClone(pixs);
1537
0
    }
1538
0
    pixZero(pix1, &empty);
1539
0
    if (empty) {
1540
0
        pixDestroy(&pix1);
1541
0
        L_INFO("no fg pixels in input image\n", __func__);
1542
0
        return NULL;
1543
0
    }
1544
0
    if (pixadb) pixaAddPix(pixadb, pix1, L_COPY);
1545
1546
        /* Remove any very tall or very wide connected components */
1547
0
    pix2 = pixSelectBySize(pix1, maxw, maxh, 8, L_SELECT_IF_BOTH,
1548
0
                           L_SELECT_IF_LT, NULL);
1549
0
    if (pixadb) pixaAddPix(pixadb, pix2, L_COPY);
1550
0
    pixDestroy(&pix1);
1551
1552
        /* Filter to solidify the text lines within the x-height region.
1553
         * The closing (csize) bridges gaps between words.  The opening
1554
         * removes isolated bridges between textlines. */
1555
0
    if ((res = pixGetXRes(pixs)) == 0) {
1556
0
        L_INFO("Resolution is not set: setting to 300 ppi\n", __func__);
1557
0
        res = 300;
1558
0
    }
1559
0
    csize = L_MIN(120., 60.0 * res / 300.0);
1560
0
    snprintf(buf, sizeof(buf), "c%d.1 + o%d.1", csize, csize / 3);
1561
0
    pix3 = pixMorphCompSequence(pix2, buf, 0);
1562
0
    if (pixadb) pixaAddPix(pixadb, pix3, L_COPY);
1563
1564
        /* Extract the connected components.  These should be dilated lines */
1565
0
    boxa1 = pixConnComp(pix3, &pixa1, 4);
1566
0
    if (pixadb) {
1567
0
        pix1 = pixaDisplayRandomCmap(pixa1, 0, 0);
1568
0
        pixcmapResetColor(pixGetColormap(pix1), 0, 255, 255, 255);
1569
0
        pixaAddPix(pixadb, pix1, L_INSERT);
1570
0
    }
1571
1572
        /* Set minw, minh if default is requested */
1573
0
    minw = (minw != 0) ? minw : (l_int32)(0.12 * res);
1574
0
    minh = (minh != 0) ? minh : (l_int32)(0.07 * res);
1575
1576
        /* Remove line components that are too small */
1577
0
    pixa2 = pixaSelectBySize(pixa1, minw, minh, L_SELECT_IF_BOTH,
1578
0
                           L_SELECT_IF_GTE, NULL);
1579
0
    if (pixadb) {
1580
0
        pix1 = pixaDisplayRandomCmap(pixa2, 0, 0);
1581
0
        pixcmapResetColor(pixGetColormap(pix1), 0, 255, 255, 255);
1582
0
        pixaAddPix(pixadb, pix1, L_INSERT);
1583
0
        pix1 = pixConvertTo32(pix2);
1584
0
        pixRenderBoxaArb(pix1, pixa2->boxa, 2, 255, 0, 0);
1585
0
        pixaAddPix(pixadb, pix1, L_INSERT);
1586
0
    }
1587
1588
        /* Selectively AND with the version before dilation, and save */
1589
0
    boxa2 = pixaGetBoxa(pixa2, L_CLONE);
1590
0
    boxa3 = boxaAdjustSides(boxa2, -adjw, adjw, -adjh, adjh);
1591
0
    pixa3 = pixClipRectangles(pix2, boxa3);
1592
0
    if (pixadb) {
1593
0
        pix1 = pixaDisplayRandomCmap(pixa3, 0, 0);
1594
0
        pixcmapResetColor(pixGetColormap(pix1), 0, 255, 255, 255);
1595
0
        pixaAddPix(pixadb, pix1, L_INSERT);
1596
0
    }
1597
1598
0
    pixDestroy(&pix2);
1599
0
    pixDestroy(&pix3);
1600
0
    pixaDestroy(&pixa1);
1601
0
    pixaDestroy(&pixa2);
1602
0
    boxaDestroy(&boxa1);
1603
0
    boxaDestroy(&boxa2);
1604
0
    boxaDestroy(&boxa3);
1605
0
    return pixa3;
1606
0
}
1607
1608
1609
/*!
1610
 * \brief   pixExtractRawTextlines()
1611
 *
1612
 * \param[in]    pixs        any depth, assumed to have nearly horizontal text
1613
 * \param[in]    maxw, maxh  initial filtering: remove any components in pixs
1614
 *                           with components larger than maxw or maxh;
1615
 *                           use 0 for default values.
1616
 * \param[in]    adjw, adjh  final adjustment of boxes representing each
1617
 *                           text line.  If > 0, these increase the box
1618
 *                           size at each edge by this amount.
1619
 * \param[in]    pixadb      pixa for saving intermediate steps; NULL to omit
1620
 * \return  pixa of textline images, including bounding boxes, or
1621
 *                    NULL on error
1622
 *
1623
 * <pre>
1624
 * Notes:
1625
 *      (1) This function assumes that textlines have sufficient
1626
 *          vertical separation and small enough skew so that a
1627
 *          horizontal dilation sufficient to join words will not join
1628
 *          textlines.  It aggressively joins textlines across multiple
1629
 *          columns, so if that is not desired, you must either (a) make
1630
 *          sure that %pixs is a single column of text or (b) use instead
1631
 *          pixExtractTextlines(), which is more conservative
1632
 *          about joining text fragments that have vertical overlap.
1633
 *      (2) This first removes components from pixs that are either
1634
 *          very wide (> %maxw) or very tall (> %maxh).
1635
 *      (3) For reasonable accuracy, the resolution of pixs should be
1636
 *          at least 100 ppi.  For reasonable efficiency, the resolution
1637
 *          should not exceed 600 ppi.
1638
 *      (4) This can be used to determine if some region of a scanned
1639
 *          image is horizontal text.
1640
 *      (5) As an example, for a pix with resolution 300 ppi, a reasonable
1641
 *          set of parameters is:
1642
 *             pixExtractRawTextlines(pix, 150, 150, 0, 0, NULL);
1643
 *      (6) The output pixa is composed of subimages, one for each textline,
1644
 *          and the boxa in the pixa tells where in %pixs each textline goes.
1645
 * </pre>
1646
 */
1647
PIXA *
1648
pixExtractRawTextlines(PIX     *pixs,
1649
                       l_int32  maxw,
1650
                       l_int32  maxh,
1651
                       l_int32  adjw,
1652
                       l_int32  adjh,
1653
                       PIXA    *pixadb)
1654
0
{
1655
0
char     buf[64];
1656
0
l_int32  res, csize, empty;
1657
0
BOXA    *boxa1, *boxa2, *boxa3;
1658
0
BOXAA   *baa1;
1659
0
PIX     *pix1, *pix2, *pix3;
1660
0
PIXA    *pixa1, *pixa2;
1661
1662
0
    if (!pixs)
1663
0
        return (PIXA *)ERROR_PTR("pixs not defined", __func__, NULL);
1664
1665
        /* Set maxw, maxh if default is requested */
1666
0
    if ((res = pixGetXRes(pixs)) == 0) {
1667
0
        L_INFO("Resolution is not set: setting to 300 ppi\n", __func__);
1668
0
        res = 300;
1669
0
    }
1670
0
    maxw = (maxw != 0) ? maxw : (l_int32)(0.5 * res);
1671
0
    maxh = (maxh != 0) ? maxh : (l_int32)(0.5 * res);
1672
1673
        /* Binarize carefully, if necessary */
1674
0
    if (pixGetDepth(pixs) > 1) {
1675
0
        pix2 = pixConvertTo8(pixs, FALSE);
1676
0
        pix3 = pixCleanBackgroundToWhite(pix2, NULL, NULL, 1.0, 70, 190);
1677
0
        pix1 = pixThresholdToBinary(pix3, 150);
1678
0
        pixDestroy(&pix2);
1679
0
        pixDestroy(&pix3);
1680
0
    } else {
1681
0
        pix1 = pixClone(pixs);
1682
0
    }
1683
0
    pixZero(pix1, &empty);
1684
0
    if (empty) {
1685
0
        pixDestroy(&pix1);
1686
0
        L_INFO("no fg pixels in input image\n", __func__);
1687
0
        return NULL;
1688
0
    }
1689
0
    if (pixadb) pixaAddPix(pixadb, pix1, L_COPY);
1690
1691
        /* Remove any very tall or very wide connected components */
1692
0
    pix2 = pixSelectBySize(pix1, maxw, maxh, 8, L_SELECT_IF_BOTH,
1693
0
                           L_SELECT_IF_LT, NULL);
1694
0
    if (pixadb) pixaAddPix(pixadb, pix2, L_COPY);
1695
0
    pixDestroy(&pix1);
1696
1697
        /* Filter to solidify the text lines within the x-height region.
1698
         * The closing (csize) bridges gaps between words. */
1699
0
    csize = L_MIN(120., 60.0 * res / 300.0);
1700
0
    snprintf(buf, sizeof(buf), "c%d.1", csize);
1701
0
    pix3 = pixMorphCompSequence(pix2, buf, 0);
1702
0
    if (pixadb) pixaAddPix(pixadb, pix3, L_COPY);
1703
1704
        /* Extract the connected components.  These should be dilated lines */
1705
0
    boxa1 = pixConnComp(pix3, &pixa1, 4);
1706
0
    if (pixadb) {
1707
0
        pix1 = pixaDisplayRandomCmap(pixa1, 0, 0);
1708
0
        pixcmapResetColor(pixGetColormap(pix1), 0, 255, 255, 255);
1709
0
        pixaAddPix(pixadb, pix1, L_INSERT);
1710
0
    }
1711
1712
        /* Do a 2-d sort, and generate a bounding box for each set of text
1713
         * line segments that is aligned horizontally (i.e., has vertical
1714
         * overlap) into a box representing a single text line. */
1715
0
    baa1 = boxaSort2d(boxa1, NULL, -1, -1, 5);
1716
0
    boxaaGetExtent(baa1, NULL, NULL, NULL, &boxa2);
1717
0
    if (pixadb) {
1718
0
        pix1 = pixConvertTo32(pix2);
1719
0
        pixRenderBoxaArb(pix1, boxa2, 2, 255, 0, 0);
1720
0
        pixaAddPix(pixadb, pix1, L_INSERT);
1721
0
    }
1722
1723
        /* Optionally adjust the sides of each text line box, and then
1724
         * use the boxes to generate a pixa of the text lines. */
1725
0
    boxa3 = boxaAdjustSides(boxa2, -adjw, adjw, -adjh, adjh);
1726
0
    pixa2 = pixClipRectangles(pix2, boxa3);
1727
0
    if (pixadb) {
1728
0
        pix1 = pixaDisplayRandomCmap(pixa2, 0, 0);
1729
0
        pixcmapResetColor(pixGetColormap(pix1), 0, 255, 255, 255);
1730
0
        pixaAddPix(pixadb, pix1, L_INSERT);
1731
0
    }
1732
1733
0
    pixDestroy(&pix2);
1734
0
    pixDestroy(&pix3);
1735
0
    pixaDestroy(&pixa1);
1736
0
    boxaDestroy(&boxa1);
1737
0
    boxaDestroy(&boxa2);
1738
0
    boxaDestroy(&boxa3);
1739
0
    boxaaDestroy(&baa1);
1740
0
    return pixa2;
1741
0
}
1742
1743
1744
/*------------------------------------------------------------------*
1745
 *                      How many text columns                       *
1746
 *------------------------------------------------------------------*/
1747
/*!
1748
 * \brief   pixCountTextColumns()
1749
 *
1750
 * \param[in]    pixs        1 bpp
1751
 * \param[in]    deltafract  fraction of (max - min) to be used in the delta
1752
 *                           for extrema finding; typ 0.3
1753
 * \param[in]    peakfract   fraction of (max - min) to be used to threshold
1754
 *                            the peak value; typ. 0.5
1755
 * \param[in]    clipfract   fraction of image dimension removed on each side;
1756
 *                           typ. 0.1, which leaves w and h reduced by 0.8
1757
 * \param[out]   pncols      number of columns; -1 if not determined
1758
 * \param[in]    pixadb      [optional] pre-allocated, for showing
1759
 *                           intermediate computation; use null to skip
1760
 * \return  0 if OK, 1 on error
1761
 *
1762
 * <pre>
1763
 * Notes:
1764
 *      (1) It is assumed that pixs has the correct resolution set.
1765
 *          If the resolution is 0, we set to 300 and issue a warning.
1766
 *      (2) If necessary, the image is scaled to between 37 and 75 ppi;
1767
 *          most of the processing is done at this resolution.
1768
 *      (3) If no text is found (essentially a blank page),
1769
 *          this returns ncols = 0.
1770
 *      (4) For debug output, input a pre-allocated pixa.
1771
 * </pre>
1772
 */
1773
l_ok
1774
pixCountTextColumns(PIX       *pixs,
1775
                    l_float32  deltafract,
1776
                    l_float32  peakfract,
1777
                    l_float32  clipfract,
1778
                    l_int32   *pncols,
1779
                    PIXA      *pixadb)
1780
0
{
1781
0
l_int32    w, h, res, i, n, npeak;
1782
0
l_float32  scalefact, redfact, minval, maxval, val4, val5, fract;
1783
0
BOX       *box;
1784
0
NUMA      *na1, *na2, *na3, *na4, *na5;
1785
0
PIX       *pix1, *pix2, *pix3, *pix4, *pix5;
1786
1787
0
    if (!pncols)
1788
0
        return ERROR_INT("&ncols not defined", __func__, 1);
1789
0
    *pncols = -1;  /* init */
1790
0
    if (!pixs || pixGetDepth(pixs) != 1)
1791
0
        return ERROR_INT("pixs not defined or not 1 bpp", __func__, 1);
1792
0
    if (deltafract < 0.15 || deltafract > 0.75)
1793
0
        L_WARNING("deltafract not in [0.15 ... 0.75]\n", __func__);
1794
0
    if (peakfract < 0.25 || peakfract > 0.9)
1795
0
        L_WARNING("peakfract not in [0.25 ... 0.9]\n", __func__);
1796
0
    if (clipfract < 0.0 || clipfract >= 0.5)
1797
0
        return ERROR_INT("clipfract not in [0.0 ... 0.5)\n", __func__, 1);
1798
0
    if (pixadb) pixaAddPix(pixadb, pixs, L_COPY);
1799
1800
        /* Scale to between 37.5 and 75 ppi */
1801
0
    if ((res = pixGetXRes(pixs)) == 0) {
1802
0
        L_WARNING("resolution undefined; set to 300\n", __func__);
1803
0
        pixSetResolution(pixs, 300, 300);
1804
0
        res = 300;
1805
0
    }
1806
0
    if (res < 37) {
1807
0
        L_WARNING("resolution %d very low\n", __func__, res);
1808
0
        scalefact = 37.5 / res;
1809
0
        pix1 = pixScale(pixs, scalefact, scalefact);
1810
0
    } else {
1811
0
        redfact = (l_float32)res / 37.5;
1812
0
        if (redfact < 2.0)
1813
0
            pix1 = pixClone(pixs);
1814
0
        else if (redfact < 4.0)
1815
0
            pix1 = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0);
1816
0
        else if (redfact < 8.0)
1817
0
            pix1 = pixReduceRankBinaryCascade(pixs, 1, 2, 0, 0);
1818
0
        else if (redfact < 16.0)
1819
0
            pix1 = pixReduceRankBinaryCascade(pixs, 1, 2, 2, 0);
1820
0
        else
1821
0
            pix1 = pixReduceRankBinaryCascade(pixs, 1, 2, 2, 2);
1822
0
    }
1823
0
    if (pixadb) pixaAddPix(pixadb, pix1, L_COPY);
1824
1825
        /* Crop inner 80% of image */
1826
0
    pixGetDimensions(pix1, &w, &h, NULL);
1827
0
    box = boxCreate(clipfract * w, clipfract * h,
1828
0
                    (1.0 - 2 * clipfract) * w, (1.0 - 2 * clipfract) * h);
1829
0
    pix2 = pixClipRectangle(pix1, box, NULL);
1830
0
    pixGetDimensions(pix2, &w, &h, NULL);
1831
0
    boxDestroy(&box);
1832
0
    if (pixadb) pixaAddPix(pixadb, pix2, L_COPY);
1833
1834
        /* Deskew */
1835
0
    pix3 = pixDeskew(pix2, 0);
1836
0
    if (pixadb) pixaAddPix(pixadb, pix3, L_COPY);
1837
1838
        /* Close to increase column counts for text */
1839
0
    pix4 = pixCloseSafeBrick(NULL, pix3, 5, 21);
1840
0
    if (pixadb) pixaAddPix(pixadb, pix4, L_COPY);
1841
0
    pixInvert(pix4, pix4);
1842
0
    na1 = pixCountByColumn(pix4, NULL);
1843
1844
0
    if (pixadb) {
1845
0
        gplotSimple1(na1, GPLOT_PNG, "/tmp/lept/plot", NULL);
1846
0
        pix5 = pixRead("/tmp/lept/plot.png");
1847
0
        pixaAddPix(pixadb, pix5, L_INSERT);
1848
0
    }
1849
1850
        /* Analyze the column counts.  na4 gives the locations of
1851
         * the extrema in normalized units (0.0 to 1.0) across the
1852
         * cropped image.  na5 gives the magnitude of the
1853
         * extrema, normalized to the dynamic range.  The peaks
1854
         * are values that are at least peakfract of (max - min). */
1855
0
    numaGetMax(na1, &maxval, NULL);
1856
0
    numaGetMin(na1, &minval, NULL);
1857
0
    fract = (l_float32)(maxval - minval) / h;  /* is there much at all? */
1858
0
    if (fract < 0.05) {
1859
0
        L_INFO("very little content on page; 0 text columns\n", __func__);
1860
0
        *pncols = 0;
1861
0
    } else {
1862
0
        na2 = numaFindExtrema(na1, deltafract * (maxval - minval), &na3);
1863
0
        na4 = numaTransform(na2, 0, 1.0 / w);
1864
0
        na5 = numaTransform(na3, -minval, 1.0 / (maxval - minval));
1865
0
        n = numaGetCount(na4);
1866
0
        for (i = 0, npeak = 0; i < n; i++) {
1867
0
            numaGetFValue(na4, i, &val4);
1868
0
            numaGetFValue(na5, i, &val5);
1869
0
            if (val4 > 0.3 && val4 < 0.7 && val5 >= peakfract) {
1870
0
                npeak++;
1871
0
                L_INFO("Peak(loc,val) = (%5.3f,%5.3f)\n", __func__, val4, val5);
1872
0
            }
1873
0
        }
1874
0
        *pncols = npeak + 1;
1875
0
        numaDestroy(&na2);
1876
0
        numaDestroy(&na3);
1877
0
        numaDestroy(&na4);
1878
0
        numaDestroy(&na5);
1879
0
    }
1880
1881
0
    pixDestroy(&pix1);
1882
0
    pixDestroy(&pix2);
1883
0
    pixDestroy(&pix3);
1884
0
    pixDestroy(&pix4);
1885
0
    numaDestroy(&na1);
1886
0
    return 0;
1887
0
}
1888
1889
1890
/*------------------------------------------------------------------*
1891
 *                      Decision text vs photo                      *
1892
 *------------------------------------------------------------------*/
1893
/*!
1894
 * \brief   pixDecideIfText()
1895
 *
1896
 * \param[in]    pixs     any depth
1897
 * \param[in]    box      [optional]  if null, use entire pixs
1898
 * \param[out]   pistext  1 if text; 0 if photo; -1 if not determined or empty
1899
 * \param[in]    pixadb   [optional] pre-allocated, for showing intermediate
1900
 *                        computation; use NULL to skip
1901
 * \return  0 if OK, 1 on error
1902
 *
1903
 * <pre>
1904
 * Notes:
1905
 *      (1) It is assumed that pixs has the correct resolution set.
1906
 *          If the resolution is 0, we set to 300 and issue a warning.
1907
 *      (2) If necessary, the image is scaled to 300 ppi; most of the
1908
 *          processing is done at this resolution.
1909
 *      (3) Text is assumed to be in horizontal lines.
1910
 *      (4) Because thin vertical lines are removed before filtering for
1911
 *          text lines, this should identify tables as text.
1912
 *      (5) If %box is null and pixs contains both text lines and line art,
1913
 *          this function might return %istext == true.
1914
 *      (6) If the input pixs is empty, or for some other reason the
1915
 *          result can not be determined, return -1.
1916
 *      (7) For debug output, input a pre-allocated pixa.
1917
 * </pre>
1918
 */
1919
l_ok
1920
pixDecideIfText(PIX      *pixs,
1921
                BOX      *box,
1922
                l_int32  *pistext,
1923
                PIXA     *pixadb)
1924
0
{
1925
0
l_int32    i, empty, maxw, w, h, n1, n2, n3, minlines, big_comp;
1926
0
l_float32  ratio1, ratio2;
1927
0
L_BMF     *bmf;
1928
0
BOXA      *boxa1, *boxa2, *boxa3, *boxa4, *boxa5;
1929
0
PIX       *pix1, *pix2, *pix3, *pix4, *pix5, *pix6, *pix7;
1930
0
PIXA      *pixa1;
1931
0
SEL       *sel1;
1932
1933
0
    if (!pistext)
1934
0
        return ERROR_INT("&istext not defined", __func__, 1);
1935
0
    *pistext = -1;
1936
0
    if (!pixs)
1937
0
        return ERROR_INT("pixs not defined", __func__, 1);
1938
1939
        /* Crop, convert to 1 bpp, 300 ppi */
1940
0
    if ((pix1 = pixPrepare1bpp(pixs, box, 0.1, 300)) == NULL)
1941
0
        return ERROR_INT("pix1 not made", __func__, 1);
1942
1943
0
    pixZero(pix1, &empty);
1944
0
    if (empty) {
1945
0
        pixDestroy(&pix1);
1946
0
        L_INFO("pix is empty\n", __func__);
1947
0
        return 0;
1948
0
    }
1949
0
    w = pixGetWidth(pix1);
1950
1951
        /* Identify and remove tall, thin vertical lines (as found in tables)
1952
         * that are up to 9 pixels wide.  Make a hit-miss sel with an
1953
         * 81 pixel vertical set of hits and with 3 pairs of misses that
1954
         * are 10 pixels apart horizontally.  It is necessary to use a
1955
         * hit-miss transform; if we only opened with a vertical line of
1956
         * hits, we would remove solid regions of pixels that are not
1957
         * text or vertical lines. */
1958
0
    pix2 = pixCreate(11, 81, 1);
1959
0
    for (i = 0; i < 81; i++)
1960
0
        pixSetPixel(pix2, 5, i, 1);
1961
0
    sel1 = selCreateFromPix(pix2, 40, 5, NULL);
1962
0
    selSetElement(sel1, 20, 0, SEL_MISS);
1963
0
    selSetElement(sel1, 20, 10, SEL_MISS);
1964
0
    selSetElement(sel1, 40, 0, SEL_MISS);
1965
0
    selSetElement(sel1, 40, 10, SEL_MISS);
1966
0
    selSetElement(sel1, 60, 0, SEL_MISS);
1967
0
    selSetElement(sel1, 60, 10, SEL_MISS);
1968
0
    pix3 = pixHMT(NULL, pix1, sel1);
1969
0
    pix4 = pixSeedfillBinaryRestricted(NULL, pix3, pix1, 8, 5, 1000);
1970
0
    pix5 = pixXor(NULL, pix1, pix4);
1971
0
    pixDestroy(&pix2);
1972
0
    selDestroy(&sel1);
1973
1974
        /* Convert the text lines to separate long horizontal components */
1975
0
    pix6 = pixMorphCompSequence(pix5, "c30.1 + o15.1 + c60.1 + o2.2", 0);
1976
1977
        /* Estimate the distance to the bottom of the significant region */
1978
0
    if (box) {  /* use full height */
1979
0
        pixGetDimensions(pix6, NULL, &h, NULL);
1980
0
    } else {  /* use height of region that has text lines */
1981
0
        pixFindThreshFgExtent(pix6, 400, NULL, &h);
1982
0
    }
1983
1984
0
    if (pixadb) {
1985
0
        bmf = bmfCreate(NULL, 6);
1986
0
        pixaAddPixWithText(pixadb, pix1, 1, bmf, "threshold/crop to binary",
1987
0
                           0x0000ff00, L_ADD_BELOW);
1988
0
        pixaAddPixWithText(pixadb, pix3, 2, bmf, "hit-miss for vertical line",
1989
0
                           0x0000ff00, L_ADD_BELOW);
1990
0
        pixaAddPixWithText(pixadb, pix4, 2, bmf, "restricted seed-fill",
1991
0
                           0x0000ff00, L_ADD_BELOW);
1992
0
        pixaAddPixWithText(pixadb, pix5, 2, bmf, "remove using xor",
1993
0
                           0x0000ff00, L_ADD_BELOW);
1994
0
        pixaAddPixWithText(pixadb, pix6, 2, bmf, "make long horiz components",
1995
0
                           0x0000ff00, L_ADD_BELOW);
1996
0
    }
1997
1998
        /* Extract the connected components */
1999
0
    if (pixadb) {
2000
0
        boxa1 = pixConnComp(pix6, &pixa1, 8);
2001
0
        pix7 = pixaDisplayRandomCmap(pixa1, 0, 0);
2002
0
        pixcmapResetColor(pixGetColormap(pix7), 0, 255, 255, 255);
2003
0
        pixaAddPixWithText(pixadb, pix7, 2, bmf, "show connected components",
2004
0
                           0x0000ff00, L_ADD_BELOW);
2005
0
        pixDestroy(&pix7);
2006
0
        pixaDestroy(&pixa1);
2007
0
        bmfDestroy(&bmf);
2008
0
    } else {
2009
0
        boxa1 = pixConnComp(pix6, NULL, 8);
2010
0
    }
2011
2012
        /* Analyze the connected components.  The following conditions
2013
         * at 300 ppi must be satisfied if the image is text:
2014
         * (1) There are no components that are wider than 400 pixels and
2015
         *     taller than 175 pixels.
2016
         * (2) The second longest component is at least 60% of the
2017
         *     (possibly cropped) image width.  This catches images
2018
         *     that don't have any significant content.
2019
         * (3) Of the components that are at least 40% of the length
2020
         *     of the longest (n2), at least 80% of them must not exceed
2021
         *     60 pixels in height.
2022
         * (4) The number of those long, thin components (n3) must
2023
         *     equal or exceed a minimum that scales linearly with the
2024
         *     image height.
2025
         * Most images that are not text fail more than one of these
2026
         * conditions. */
2027
0
    boxa2 = boxaSort(boxa1, L_SORT_BY_WIDTH, L_SORT_DECREASING, NULL);
2028
0
    boxaGetBoxGeometry(boxa2, 1, NULL, NULL, &maxw, NULL);  /* 2nd longest */
2029
0
    boxa3 = boxaSelectBySize(boxa1, 0.4 * maxw, 0, L_SELECT_WIDTH,
2030
0
                             L_SELECT_IF_GTE, NULL);
2031
0
    boxa4 = boxaSelectBySize(boxa3, 0, 60, L_SELECT_HEIGHT,
2032
0
                             L_SELECT_IF_LTE, NULL);
2033
0
    boxa5 = boxaSelectBySize(boxa1, 400, 175, L_SELECT_IF_BOTH,
2034
0
                             L_SELECT_IF_GT, NULL);
2035
0
    big_comp = (boxaGetCount(boxa5) == 0) ? 0 : 1;
2036
0
    n1 = boxaGetCount(boxa1);
2037
0
    n2 = boxaGetCount(boxa3);
2038
0
    n3 = boxaGetCount(boxa4);
2039
0
    ratio1 = (l_float32)maxw / (l_float32)w;
2040
0
    ratio2 = (l_float32)n3 / (l_float32)n2;
2041
0
    minlines = L_MAX(2, h / 125);
2042
0
    if (big_comp || ratio1 < 0.6 || ratio2 < 0.8 || n3 < minlines)
2043
0
        *pistext = 0;
2044
0
    else
2045
0
        *pistext = 1;
2046
0
    if (pixadb) {
2047
0
        if (*pistext == 1) {
2048
0
            L_INFO("This is text: \n  n1 = %d, n2 = %d, n3 = %d, "
2049
0
                   "minlines = %d\n  maxw = %d, ratio1 = %4.2f, h = %d, "
2050
0
                   "big_comp = %d\n", __func__, n1, n2, n3, minlines,
2051
0
                   maxw, ratio1, h, big_comp);
2052
0
        } else {
2053
0
            L_INFO("This is not text: \n  n1 = %d, n2 = %d, n3 = %d, "
2054
0
                   "minlines = %d\n  maxw = %d, ratio1 = %4.2f, h = %d, "
2055
0
                   "big_comp = %d\n", __func__, n1, n2, n3, minlines,
2056
0
                   maxw, ratio1, h, big_comp);
2057
0
        }
2058
0
    }
2059
2060
0
    boxaDestroy(&boxa1);
2061
0
    boxaDestroy(&boxa2);
2062
0
    boxaDestroy(&boxa3);
2063
0
    boxaDestroy(&boxa4);
2064
0
    boxaDestroy(&boxa5);
2065
0
    pixDestroy(&pix1);
2066
0
    pixDestroy(&pix3);
2067
0
    pixDestroy(&pix4);
2068
0
    pixDestroy(&pix5);
2069
0
    pixDestroy(&pix6);
2070
0
    return 0;
2071
0
}
2072
2073
2074
/*!
2075
 * \brief   pixFindThreshFgExtent()
2076
 *
2077
 * \param[in]    pixs     1 bpp
2078
 * \param[in]    thresh   threshold number of pixels in row
2079
 * \param[out]   ptop     [optional] location of top of region
2080
 * \param[out]   pbot     [optional] location of bottom of region
2081
 * \return  0 if OK, 1 on error
2082
 */
2083
l_ok
2084
pixFindThreshFgExtent(PIX      *pixs,
2085
                      l_int32   thresh,
2086
                      l_int32  *ptop,
2087
                      l_int32  *pbot)
2088
0
{
2089
0
l_int32   i, n;
2090
0
l_int32  *array;
2091
0
NUMA     *na;
2092
2093
0
    if (ptop) *ptop = 0;
2094
0
    if (pbot) *pbot = 0;
2095
0
    if (!ptop && !pbot)
2096
0
        return ERROR_INT("nothing to determine", __func__, 1);
2097
0
    if (!pixs || pixGetDepth(pixs) != 1)
2098
0
        return ERROR_INT("pixs not defined or not 1 bpp", __func__, 1);
2099
2100
0
    na = pixCountPixelsByRow(pixs, NULL);
2101
0
    n = numaGetCount(na);
2102
0
    array = numaGetIArray(na);
2103
0
    if (ptop) {
2104
0
        for (i = 0; i < n; i++) {
2105
0
            if (array[i] >= thresh) {
2106
0
                *ptop = i;
2107
0
                break;
2108
0
            }
2109
0
        }
2110
0
    }
2111
0
    if (pbot) {
2112
0
        for (i = n - 1; i >= 0; i--) {
2113
0
            if (array[i] >= thresh) {
2114
0
                *pbot = i;
2115
0
                break;
2116
0
            }
2117
0
        }
2118
0
    }
2119
0
    LEPT_FREE(array);
2120
0
    numaDestroy(&na);
2121
0
    return 0;
2122
0
}
2123
2124
2125
/*------------------------------------------------------------------*
2126
 *                     Decision: table vs text                      *
2127
 *------------------------------------------------------------------*/
2128
/*!
2129
 * \brief   pixDecideIfTable()
2130
 *
2131
 * \param[in]    pixs      any depth, any resolution >= 75 ppi
2132
 * \param[in]    box       [optional] if null, use entire pixs
2133
 * \param[in]    orient    L_PORTRAIT_MODE, L_LANDSCAPE_MODE
2134
 * \param[out]   pscore    0 - 4; -1 if not determined
2135
 * \param[in]    pixadb    [optional] pre-allocated, for showing intermediate
2136
 *                         computation; use NULL to skip
2137
 * \return  0 if OK, 1 on error
2138
 *
2139
 * <pre>
2140
 * Notes:
2141
 *      (1) It is assumed that pixs has the correct resolution set.
2142
 *          If the resolution is 0, we assume it is 300 ppi and issue a warning.
2143
 *      (2) If %orient == L_LANDSCAPE_MODE, the image is rotated 90 degrees
2144
 *          clockwise before being analyzed.
2145
 *      (3) The interpretation of the returned score:
2146
 *            -1     undetermined
2147
 *             0     no table
2148
 *             1     unlikely to have a table
2149
 *             2     likely to have a table
2150
 *             3     even more likely to have a table
2151
 *             4     extremely likely to have a table
2152
 *          * Setting the condition for finding a table at score >= 2 works
2153
 *            well, except for false positives on kanji and landscape text.
2154
 *          * These false positives can be removed by setting the condition
2155
 *            at score >= 3, but recall is lowered because it will not find
2156
 *            tables without either horizontal or vertical lines.
2157
 *      (4) Most of the processing takes place at 75 ppi.
2158
 *      (5) Internally, three numbers are determined, for horizontal and
2159
 *          vertical fg lines, and for vertical bg lines.  From these,
2160
 *          four tests are made to decide if there is a table occupying
2161
 *          a significant part of the image.
2162
 *      (6) Images have arbitrary content and would be likely to trigger
2163
 *          this detector, so they are checked for first, and if found,
2164
 *          return with a 0 (no table) score.
2165
 *      (7) Musical scores (tablature) are likely to trigger the detector.
2166
 *      (8) Tables of content with more than 2 columns are likely to
2167
 *          trigger the detector.
2168
 *      (9) For debug output, input a pre-allocated pixa.
2169
 * </pre>
2170
 */
2171
l_ok
2172
pixDecideIfTable(PIX      *pixs,
2173
                 BOX      *box,
2174
                 l_int32   orient,
2175
                 l_int32  *pscore,
2176
                 PIXA     *pixadb)
2177
0
{
2178
0
l_int32  empty, nhb, nvb, nvw, score, htfound;
2179
0
PIX     *pix1, *pix2, *pix3, *pix4, *pix5, *pix6, *pix7, *pix8, *pix9;
2180
2181
0
    if (!pscore)
2182
0
        return ERROR_INT("&score not defined", __func__, 1);
2183
0
    *pscore = -1;
2184
0
    if (!pixs)
2185
0
        return ERROR_INT("pixs not defined", __func__, 1);
2186
2187
        /* Check if there is an image region.  First convert to 1 bpp
2188
         * at 175 ppi.  If an image is found, assume there is no table.  */
2189
0
    pix1 = pixPrepare1bpp(pixs, box, 0.1, 175);
2190
0
    pix2 = pixGenerateHalftoneMask(pix1, NULL, &htfound, NULL);
2191
0
    if (htfound && pixadb) pixaAddPix(pixadb, pix2, L_COPY);
2192
0
    pixDestroy(&pix1);
2193
0
    pixDestroy(&pix2);
2194
0
    if (htfound) {
2195
0
        *pscore = 0;
2196
0
        L_INFO("pix has an image region\n", __func__);
2197
0
        return 0;
2198
0
    }
2199
2200
        /* Crop, convert to 1 bpp, 75 ppi */
2201
0
    if ((pix1 = pixPrepare1bpp(pixs, box, 0.05, 75)) == NULL)
2202
0
        return ERROR_INT("pix1 not made", __func__, 1);
2203
2204
0
    pixZero(pix1, &empty);
2205
0
    if (empty) {
2206
0
        *pscore = 0;
2207
0
        pixDestroy(&pix1);
2208
0
        L_INFO("pix is empty\n", __func__);
2209
0
        return 0;
2210
0
    }
2211
2212
        /* The 2x2 dilation on 75 ppi makes these two approaches very similar:
2213
         * (1) pix1 = pixPrepare1bpp(..., 300);  // 300 ppi resolution
2214
         *     pix2 = pixReduceRankBinaryCascade(pix1, 1, 1, 0, 0);
2215
         * (2) pix1 = pixPrepare1bpp(..., 75);  // 75 ppi resolution
2216
         *     pix2 = pixDilateBrick(NULL, pix1, 2, 2);
2217
         * But (2) is more efficient if the input image to pixPrepare1bpp()
2218
         * is not at 300 ppi.   */
2219
0
    pix2 = pixDilateBrick(NULL, pix1, 2, 2);
2220
2221
        /* Deskew both horizontally and vertically; rotate by 90
2222
         * degrees if in landscape mode. */
2223
0
    pix3 = pixDeskewBoth(pix2, 1);
2224
0
    if (pixadb) {
2225
0
        pixaAddPix(pixadb, pix2, L_COPY);
2226
0
        pixaAddPix(pixadb, pix3, L_COPY);
2227
0
    }
2228
0
    if (orient == L_LANDSCAPE_MODE)
2229
0
        pix4 = pixRotate90(pix3, 1);
2230
0
    else
2231
0
        pix4 = pixClone(pix3);
2232
0
    pixDestroy(&pix1);
2233
0
    pixDestroy(&pix2);
2234
0
    pixDestroy(&pix3);
2235
0
    pix1 = pixClone(pix4);
2236
0
    pixDestroy(&pix4);
2237
2238
        /* Look for horizontal and vertical lines */
2239
0
    pix2 = pixMorphSequence(pix1, "o100.1 + c1.4", 0);
2240
0
    pix3 = pixSeedfillBinary(NULL, pix2, pix1, 8);
2241
0
    pix4 = pixMorphSequence(pix1, "o1.100 + c4.1", 0);
2242
0
    pix5 = pixSeedfillBinary(NULL, pix4, pix1, 8);
2243
0
    pix6 = pixOr(NULL, pix3, pix5);
2244
0
    if (pixadb) {
2245
0
        pixaAddPix(pixadb, pix2, L_COPY);
2246
0
        pixaAddPix(pixadb, pix4, L_COPY);
2247
0
        pixaAddPix(pixadb, pix3, L_COPY);
2248
0
        pixaAddPix(pixadb, pix5, L_COPY);
2249
0
        pixaAddPix(pixadb, pix6, L_COPY);
2250
0
    }
2251
0
    pixCountConnComp(pix2, 8, &nhb);  /* number of horizontal black lines */
2252
0
    pixCountConnComp(pix4, 8, &nvb);  /* number of vertical black lines */
2253
2254
        /* Remove the lines */
2255
0
    pixSubtract(pix1, pix1, pix6);
2256
0
    if (pixadb) pixaAddPix(pixadb, pix1, L_COPY);
2257
2258
        /* Remove noise pixels */
2259
0
    pix7 = pixMorphSequence(pix1, "c4.1 + o8.1", 0);
2260
0
    if (pixadb) pixaAddPix(pixadb, pix7, L_COPY);
2261
2262
        /* Look for vertical white space.  Invert to convert white bg
2263
         * to fg.  Use a single rank-1 2x reduction, which closes small
2264
         * fg holes, for the final processing at 37.5 ppi.
2265
         * The vertical opening is then about 3 inches on a 300 ppi image.
2266
         * We also remove vertical whitespace that is less than 5 pixels
2267
         * wide at this resolution (about 0.1 inches) */
2268
0
    pixInvert(pix7, pix7);
2269
0
    pix8 = pixMorphSequence(pix7, "r1 + o1.100", 0);
2270
0
    pix9 = pixSelectBySize(pix8, 5, 0, 8, L_SELECT_WIDTH,
2271
0
                           L_SELECT_IF_GTE, NULL);
2272
0
    pixCountConnComp(pix9, 8, &nvw);  /* number of vertical white lines */
2273
0
    if (pixadb) {
2274
0
        pixaAddPix(pixadb, pixScale(pix8, 2.0, 2.0), L_INSERT);
2275
0
        pixaAddPix(pixadb, pixScale(pix9, 2.0, 2.0), L_INSERT);
2276
0
    }
2277
2278
        /* Require at least 2 of the following 4 conditions for a table.
2279
         * Some tables do not have black (fg) lines, and for those we
2280
         * require more than 6 long vertical whitespace (bg) lines.  */
2281
0
    score = 0;
2282
0
    if (nhb > 1) score++;
2283
0
    if (nvb > 2) score++;
2284
0
    if (nvw > 3) score++;
2285
0
    if (nvw > 6) score++;
2286
0
    *pscore = score;
2287
2288
0
    pixDestroy(&pix1);
2289
0
    pixDestroy(&pix2);
2290
0
    pixDestroy(&pix3);
2291
0
    pixDestroy(&pix4);
2292
0
    pixDestroy(&pix5);
2293
0
    pixDestroy(&pix6);
2294
0
    pixDestroy(&pix7);
2295
0
    pixDestroy(&pix8);
2296
0
    pixDestroy(&pix9);
2297
0
    return 0;
2298
0
}
2299
2300
2301
/*!
2302
 * \brief   pixPrepare1bpp()
2303
 *
2304
 * \param[in]    pixs       any depth
2305
 * \param[in]    box        [optional] if null, use entire pixs
2306
 * \param[in]    cropfract  fraction to be removed from the boundary;
2307
 *                          use 0.0 to retain the entire image
2308
 * \param[in]    outres     desired resolution of output image; if the
2309
 *                          input image resolution is not set, assume
2310
 *                          300 ppi; use 0 to skip scaling.
2311
 * \return  pixd if OK, NULL on error
2312
 *
2313
 * <pre>
2314
 * Notes:
2315
 *      (1) This handles some common pre-processing operations,
2316
 *          where the page segmentation algorithm takes a 1 bpp image.
2317
 * </pre>
2318
 */
2319
PIX *
2320
pixPrepare1bpp(PIX       *pixs,
2321
               BOX       *box,
2322
               l_float32  cropfract,
2323
               l_int32    outres)
2324
0
{
2325
0
l_int32    w, h, res;
2326
0
l_float32  factor;
2327
0
BOX       *box1;
2328
0
PIX       *pix1, *pix2, *pix3, *pix4, *pix5;
2329
2330
0
    if (!pixs)
2331
0
        return (PIX *)ERROR_PTR("pixs not defined", __func__, NULL);
2332
2333
        /* Crop the image.  If no box is given, use %cropfract to remove
2334
         * pixels near the image boundary; this helps avoid false
2335
         * negatives from noise that is often found there. */
2336
0
    if (box) {
2337
0
        pix1 = pixClipRectangle(pixs, box, NULL);
2338
0
    } else {
2339
0
        pixGetDimensions(pixs, &w, &h, NULL);
2340
0
        box1 = boxCreate((l_int32)(cropfract * w), (l_int32)(cropfract * h),
2341
0
                         (l_int32)((1.0 - 2 * cropfract) * w),
2342
0
                         (l_int32)((1.0 - 2 * cropfract) * h));
2343
0
        pix1 = pixClipRectangle(pixs, box1, NULL);
2344
0
        boxDestroy(&box1);
2345
0
    }
2346
2347
        /* Convert to 1 bpp with adaptive background cleaning */
2348
0
    if (pixGetDepth(pixs) > 1) {
2349
0
        pix2 = pixConvertTo8(pix1, 0);
2350
0
        pix3 = pixCleanBackgroundToWhite(pix2, NULL, NULL, 1.0, 70, 160);
2351
0
        pixDestroy(&pix1);
2352
0
        pixDestroy(&pix2);
2353
0
        if (!pix3) {
2354
0
            L_INFO("pix cleaning failed\n", __func__);
2355
0
            return NULL;
2356
0
        }
2357
0
        pix4 = pixThresholdToBinary(pix3, 200);
2358
0
        pixDestroy(&pix3);
2359
0
    } else {
2360
0
        pix4 = pixClone(pix1);
2361
0
        pixDestroy(&pix1);
2362
0
    }
2363
2364
        /* Scale the image to the requested output resolution;
2365
           do not scale if %outres <= 0 */
2366
0
    if (outres <= 0)
2367
0
        return pix4;
2368
0
    if ((res = pixGetXRes(pixs)) == 0) {
2369
0
        L_WARNING("Resolution is not set: using 300 ppi\n", __func__);
2370
0
        res = 300;
2371
0
    }
2372
0
    if (res != outres) {
2373
0
        factor = (l_float32)outres / (l_float32)res;
2374
0
        pix5 = pixScale(pix4, factor, factor);
2375
0
    } else {
2376
0
        pix5 = pixClone(pix4);
2377
0
    }
2378
0
    pixDestroy(&pix4);
2379
0
    return pix5;
2380
0
}
2381
2382
2383
/*------------------------------------------------------------------*
2384
 *               Estimate the grayscale background value            *
2385
 *------------------------------------------------------------------*/
2386
/*!
2387
 * \brief   pixEstimateBackground()
2388
 *
2389
 * \param[in]    pixs         8 bpp, with or without colormap
2390
 * \param[in]    darkthresh   pixels below this value are never considered
2391
 *                            part of the background; typ. 70; use 0 to skip
2392
 * \param[in]    edgecrop     fraction of half-width on each side, and of
2393
 *                            half-height at top and bottom, that are cropped
2394
 * \param[out]   pbg          estimated background, or 0 on error
2395
 * \return  0 if OK, 1 on error
2396
 *
2397
 * <pre>
2398
 * Notes:
2399
 *      (1) Caller should check that return bg value is > 0.
2400
 * </pre>
2401
 */
2402
l_ok
2403
pixEstimateBackground(PIX       *pixs,
2404
                      l_int32    darkthresh,
2405
                      l_float32  edgecrop,
2406
                      l_int32   *pbg)
2407
0
{
2408
0
l_int32    w, h, sampling;
2409
0
l_float32  fbg;
2410
0
BOX       *box;
2411
0
PIX       *pix1, *pix2, *pixm;
2412
2413
0
    if (!pbg)
2414
0
        return ERROR_INT("&bg not defined", __func__, 1);
2415
0
    *pbg = 0;
2416
0
    if (!pixs || pixGetDepth(pixs) != 8)
2417
0
        return ERROR_INT("pixs not defined or not 8 bpp", __func__, 1);
2418
0
    if (darkthresh > 128)
2419
0
        L_WARNING("darkthresh unusually large\n", __func__);
2420
0
    if (edgecrop < 0.0 || edgecrop >= 1.0)
2421
0
        return ERROR_INT("edgecrop not in [0.0 ... 1.0)", __func__, 1);
2422
2423
0
    pix1 = pixRemoveColormap(pixs, REMOVE_CMAP_TO_GRAYSCALE);
2424
0
    pixGetDimensions(pix1, &w, &h, NULL);
2425
2426
        /* Optionally crop inner part of image */
2427
0
    if (edgecrop > 0.0) {
2428
0
        box = boxCreate(0.5 * edgecrop * w, 0.5 * edgecrop * h,
2429
0
                        (1.0 - edgecrop) * w, (1.0 - edgecrop) * h);
2430
0
        pix2 = pixClipRectangle(pix1, box, NULL);
2431
0
        boxDestroy(&box);
2432
0
    } else {
2433
0
        pix2 = pixClone(pix1);
2434
0
    }
2435
2436
        /* We will use no more than 50K samples */
2437
0
    sampling = L_MAX(1, (l_int32)sqrt((l_float64)(w * h) / 50000. + 0.5));
2438
2439
        /* Optionally make a mask over all pixels lighter than %darkthresh */
2440
0
    pixm = NULL;
2441
0
    if (darkthresh > 0) {
2442
0
        pixm = pixThresholdToBinary(pix2, darkthresh);
2443
0
        pixInvert(pixm, pixm);
2444
0
    }
2445
2446
0
    pixGetRankValueMasked(pix2, pixm, 0, 0, sampling, 0.5, &fbg, NULL);
2447
0
    *pbg = (l_int32)(fbg + 0.5);
2448
0
    pixDestroy(&pix1);
2449
0
    pixDestroy(&pix2);
2450
0
    pixDestroy(&pixm);
2451
0
    return 0;
2452
0
}
2453
2454
2455
/*---------------------------------------------------------------------*
2456
 *             Largest white or black rectangles in an image           *
2457
 *---------------------------------------------------------------------*/
2458
/*!
2459
 * \brief   pixFindLargeRectangles()
2460
 *
2461
 * \param[in]    pixs       1 bpp
2462
 * \param[in]    polarity   0 within background, 1 within foreground
2463
 * \param[in]    nrect      number of rectangles to be found
2464
 * \param[out]   pboxa      largest rectangles, sorted by decreasing area
2465
 * \param[in,out]  ppixdb   optional return output with rectangles drawn on it
2466
 * \return  0 if OK, 1 on error
2467
 *
2468
 * <pre>
2469
 * Notes:
2470
 *      (1) This does a greedy search to find the largest rectangles,
2471
 *          either black or white and without overlaps, in %pix.
2472
 *      (2) See pixFindLargestRectangle(), which is called multiple
2473
 *          times, for details.  On each call, the largest rectangle
2474
 *          found is painted, so that none of its pixels can be
2475
 *          used later, before calling it again.
2476
 *      (3) This function is surprisingly fast.  Although
2477
 *          pixFindLargestRectangle() runs at about 50 MPix/sec, when it
2478
 *          is run multiple times by pixFindLargeRectangles(), it processes
2479
 *          at 150 - 250 MPix/sec, and the time is approximately linear
2480
 *          in %nrect.  For example, for a 1 MPix image, searching for
2481
 *          the largest 50 boxes takes about 0.2 seconds.
2482
 * </pre>
2483
 */
2484
l_ok
2485
pixFindLargeRectangles(PIX          *pixs,
2486
                       l_int32       polarity,
2487
                       l_int32       nrect,
2488
                       BOXA        **pboxa,
2489
                       PIX         **ppixdb)
2490
0
{
2491
0
l_int32  i, op, bx, by, bw, bh;
2492
0
BOX     *box;
2493
0
BOXA    *boxa;
2494
0
PIX     *pix;
2495
2496
0
    if (ppixdb) *ppixdb = NULL;
2497
0
    if (!pboxa)
2498
0
        return ERROR_INT("&boxa not defined", __func__, 1);
2499
0
    *pboxa = NULL;
2500
0
    if (!pixs || pixGetDepth(pixs) != 1)
2501
0
        return ERROR_INT("pixs not defined or not 1 bpp", __func__, 1);
2502
0
    if (polarity != 0 && polarity != 1)
2503
0
        return ERROR_INT("invalid polarity", __func__, 1);
2504
0
    if (nrect > 1000) {
2505
0
        L_WARNING("large num rectangles = %d requested; using 1000\n",
2506
0
                  __func__, nrect);
2507
0
        nrect = 1000;
2508
0
    }
2509
2510
0
    pix = pixCopy(NULL, pixs);
2511
0
    boxa = boxaCreate(nrect);
2512
0
    *pboxa = boxa;
2513
2514
        /* Sequentially find largest rectangle and fill with opposite color */
2515
0
    for (i = 0; i < nrect; i++) {
2516
0
        if (pixFindLargestRectangle(pix, polarity, &box, NULL) == 1) {
2517
0
            boxDestroy(&box);
2518
0
            L_ERROR("failure in pixFindLargestRectangle\n", __func__);
2519
0
            break;
2520
0
        }
2521
0
        boxaAddBox(boxa, box, L_INSERT);
2522
0
        op = (polarity == 0) ? PIX_SET : PIX_CLR;
2523
0
        boxGetGeometry(box, &bx, &by, &bw, &bh);
2524
0
        pixRasterop(pix, bx, by, bw, bh, op, NULL, 0, 0);
2525
0
    }
2526
2527
0
    if (ppixdb)
2528
0
        *ppixdb = pixDrawBoxaRandom(pixs, boxa, 3);
2529
2530
0
    pixDestroy(&pix);
2531
0
    return 0;
2532
0
}
2533
2534
2535
/*!
2536
 * \brief   pixFindLargestRectangle()
2537
 *
2538
 * \param[in]    pixs       1 bpp
2539
 * \param[in]    polarity   0 within background, 1 within foreground
2540
 * \param[out]   pbox       largest area rectangle
2541
 * \param[in,out]  ppixdb   optional return output with rectangle drawn on it
2542
 * \return  0 if OK, 1 on error
2543
 *
2544
 * <pre>
2545
 * Notes:
2546
 *      (1) This is a simple and elegant solution to a problem in
2547
 *          computational geometry that at first appears to be quite
2548
 *          difficult: what is the largest rectangle that can be
2549
 *          placed in the image, covering only pixels of one polarity
2550
 *          (bg or fg)?  The solution is O(n), where n is the number
2551
 *          of pixels in the image, and it requires nothing more than
2552
 *          using a simple recursion relation in a single sweep of the image.
2553
 *      (2) In a sweep from UL to LR with left-to-right being the fast
2554
 *          direction, calculate the largest white rectangle at (x, y),
2555
 *          using previously calculated values at pixels #1 and #2:
2556
 *             #1:    (x, y - 1)
2557
 *             #2:    (x - 1, y)
2558
 *          We also need the most recent "black" pixels that were seen
2559
 *          in the current row and column.
2560
 *          Consider the largest area.  There are only two possibilities:
2561
 *             (a)  Min(w(1), horizdist) * (h(1) + 1)
2562
 *             (b)  Min(h(2), vertdist) * (w(2) + 1)
2563
 *          where
2564
 *             horizdist: the distance from the rightmost "black" pixel seen
2565
 *                        in the current row across to the current pixel
2566
 *             vertdist: the distance from the lowest "black" pixel seen
2567
 *                       in the current column down to the current pixel
2568
 *          and we choose the Max of (a) and (b).
2569
 *      (3) To convince yourself that these recursion relations are correct,
2570
 *          it helps to draw the maximum rectangles at #1 and #2.
2571
 *          Then for #1, you try to extend the rectangle down one line,
2572
 *          so that the height is h(1) + 1.  Do you get the full
2573
 *          width of #1, w(1)?  It depends on where the black pixels are
2574
 *          in the current row.  You know the final width is bounded by w(1)
2575
 *          and w(2) + 1, but the actual value depends on the distribution
2576
 *          of black pixels in the current row that are at a distance
2577
 *          from the current pixel that is between these limits.
2578
 *          We call that value "horizdist", and the area is then given
2579
 *          by the expression (a) above.  Using similar reasoning for #2,
2580
 *          where you attempt to extend the rectangle to the right
2581
 *          by 1 pixel, you arrive at (b).  The largest rectangle is
2582
 *          then found by taking the Max.
2583
 * </pre>
2584
 */
2585
l_ok
2586
pixFindLargestRectangle(PIX         *pixs,
2587
                        l_int32      polarity,
2588
                        BOX        **pbox,
2589
                        PIX        **ppixdb)
2590
0
{
2591
0
l_int32    i, j, w, h, d, wpls, val;
2592
0
l_int32    wp, hp, w1, w2, h1, h2, wmin, hmin, area1, area2;
2593
0
l_int32    xmax, ymax;  /* LR corner of the largest rectangle */
2594
0
l_int32    maxarea, wmax, hmax, vertdist, horizdist, prevfg;
2595
0
l_int32   *lowestfg;
2596
0
l_uint32  *datas, *lines;
2597
0
l_uint32 **linew, **lineh;
2598
0
BOX       *box;
2599
0
PIX       *pixw, *pixh;  /* keeps the width and height for the largest */
2600
                         /* rectangles whose LR corner is located there. */
2601
2602
0
    if (ppixdb) *ppixdb = NULL;
2603
0
    if (!pbox)
2604
0
        return ERROR_INT("&box not defined", __func__, 1);
2605
0
    *pbox = NULL;
2606
0
    if (!pixs)
2607
0
        return ERROR_INT("pixs not defined", __func__, 1);
2608
0
    pixGetDimensions(pixs, &w, &h, &d);
2609
0
    if (d != 1)
2610
0
        return ERROR_INT("pixs not 1 bpp", __func__, 1);
2611
0
    if (polarity != 0 && polarity != 1)
2612
0
        return ERROR_INT("invalid polarity", __func__, 1);
2613
2614
        /* Initialize lowest "fg" seen so far for each column */
2615
0
    lowestfg = (l_int32 *)LEPT_CALLOC(w, sizeof(l_int32));
2616
0
    for (i = 0; i < w; i++)
2617
0
        lowestfg[i] = -1;
2618
2619
        /* The combination (val ^ polarity) is the color for which we
2620
         * are searching for the maximum rectangle.  For polarity == 0,
2621
         * we search in the bg (white). */
2622
0
    pixw = pixCreate(w, h, 32);  /* stores width */
2623
0
    pixh = pixCreate(w, h, 32);  /* stores height */
2624
0
    linew = (l_uint32 **)pixGetLinePtrs(pixw, NULL);
2625
0
    lineh = (l_uint32 **)pixGetLinePtrs(pixh, NULL);
2626
0
    datas = pixGetData(pixs);
2627
0
    wpls = pixGetWpl(pixs);
2628
0
    maxarea = xmax = ymax = wmax = hmax = 0;
2629
0
    for (i = 0; i < h; i++) {
2630
0
        lines = datas + i * wpls;
2631
0
        prevfg = -1;
2632
0
        for (j = 0; j < w; j++) {
2633
0
            val = GET_DATA_BIT(lines, j);
2634
0
            if ((val ^ polarity) == 0) {  /* bg (0) if polarity == 0, etc. */
2635
0
                if (i == 0 && j == 0) {
2636
0
                    wp = hp = 1;
2637
0
                } else if (i == 0) {
2638
0
                    wp = linew[i][j - 1] + 1;
2639
0
                    hp = 1;
2640
0
                } else if (j == 0) {
2641
0
                    wp = 1;
2642
0
                    hp = lineh[i - 1][j] + 1;
2643
0
                } else {
2644
                        /* Expand #1 prev rectangle down */
2645
0
                    w1 = linew[i - 1][j];
2646
0
                    h1 = lineh[i - 1][j];
2647
0
                    horizdist = j - prevfg;
2648
0
                    wmin = L_MIN(w1, horizdist);  /* width of new rectangle */
2649
0
                    area1 = wmin * (h1 + 1);
2650
2651
                        /* Expand #2 prev rectangle to right */
2652
0
                    w2 = linew[i][j - 1];
2653
0
                    h2 = lineh[i][j - 1];
2654
0
                    vertdist = i - lowestfg[j];
2655
0
                    hmin = L_MIN(h2, vertdist);  /* height of new rectangle */
2656
0
                    area2 = hmin * (w2 + 1);
2657
2658
0
                    if (area1 > area2) {
2659
0
                         wp = wmin;
2660
0
                         hp = h1 + 1;
2661
0
                    } else {
2662
0
                         wp = w2 + 1;
2663
0
                         hp = hmin;
2664
0
                    }
2665
0
                }
2666
0
            } else {  /* fg (1) if polarity == 0; bg (0) if polarity == 1 */
2667
0
                prevfg = j;
2668
0
                lowestfg[j] = i;
2669
0
                wp = hp = 0;
2670
0
            }
2671
0
            linew[i][j] = wp;
2672
0
            lineh[i][j] = hp;
2673
0
            if (wp * hp > maxarea) {
2674
0
                maxarea = wp * hp;
2675
0
                xmax = j;
2676
0
                ymax = i;
2677
0
                wmax = wp;
2678
0
                hmax = hp;
2679
0
            }
2680
0
        }
2681
0
    }
2682
2683
        /* Translate from LR corner to Box coords (UL corner, w, h) */
2684
0
    box = boxCreate(xmax - wmax + 1, ymax - hmax + 1, wmax, hmax);
2685
0
    *pbox = box;
2686
2687
0
    if (ppixdb) {
2688
0
        *ppixdb = pixConvertTo8(pixs, TRUE);
2689
0
        pixRenderHashBoxArb(*ppixdb, box, 6, 2, L_NEG_SLOPE_LINE, 1, 255, 0, 0);
2690
0
    }
2691
2692
0
    LEPT_FREE(linew);
2693
0
    LEPT_FREE(lineh);
2694
0
    LEPT_FREE(lowestfg);
2695
0
    pixDestroy(&pixw);
2696
0
    pixDestroy(&pixh);
2697
0
    return 0;
2698
0
}
2699
2700
2701
/*---------------------------------------------------------------------*
2702
 *            Generate rectangle inside connected component            *
2703
 *---------------------------------------------------------------------*/
2704
/*!
2705
 * \brief   pixFindRectangleInCC()
2706
 *
2707
 * \param[in]    pixs     1 bpp, with sufficient closings to make the fg be
2708
 *                        a single c.c. that is a convex hull
2709
 * \param[in]    boxs     [optional] if NULL, %pixs should be a minimum
2710
 *                        container of a single c.c.
2711
 * \param[in]    fract    first and all consecutive lines found must be at
2712
 *                        least this fraction of the fast scan dimension
2713
 * \param[in]    dir      L_SCAN_HORIZONTAL, L_SCAN_VERTICAL; direction of
2714
 *                        fast scan
2715
 * \param[in]    select   L_GEOMETRIC_UNION, L_GEOMETRIC_INTERSECTION,
2716
 *                        L_LARGEST_AREA, L_SMALEST_AREA
2717
 * \param[in]    debug    if 1, generates output pdf showing intermediate
2718
 *                        computation and final result
2719
 * \return  box  of included rectangle, or NULL on error
2720
 *
2721
 * <pre>
2722
 * Notes:
2723
 *      (1) Computation is similar to pixFindLargestRectangle(), but allows
2724
 *          a different set of results to choose from.
2725
 *      (2) Select the fast scan direction.  Then, scanning in the slow
2726
 *          direction, find the longest run of ON pixels in the fast
2727
 *          scan direction and look for the first run that is longer
2728
 *          than %fract of the dimension.  Continue until a shorter run
2729
 *          is found.  This generates a box of ON pixels fitting into the c.c.
2730
 *      (3) Do this from both slow scan directions and use %select to get
2731
 *          a resulting box from these two.
2732
 *      (4) The extracted rectangle is not necessarily the largest that
2733
 *          can fit in the c.c.  To get that, use pixFindLargestRectangle().
2734
 */
2735
BOX *
2736
pixFindRectangleInCC(PIX       *pixs,
2737
                     BOX       *boxs,
2738
                     l_float32  fract,
2739
                     l_int32    dir,
2740
                     l_int32    select,
2741
                     l_int32    debug)
2742
0
{
2743
0
l_int32  x, y, i, w, h, w1, h1, w2, h2, found, res;
2744
0
l_int32  xfirst, xlast, xstart, yfirst, ylast, length;
2745
0
BOX     *box1, *box2, *box3, *box4, *box5;
2746
0
PIX     *pix1, *pix2, *pixdb1, *pixdb2;
2747
0
PIXA    *pixadb;
2748
2749
0
    if (!pixs || pixGetDepth(pixs) != 1)
2750
0
        return (BOX *)ERROR_PTR("pixs undefined or not 1 bpp", __func__, NULL);
2751
0
    if (fract <= 0.0 || fract > 1.0)
2752
0
        return (BOX *)ERROR_PTR("invalid fraction", __func__, NULL);
2753
0
    if (dir != L_SCAN_VERTICAL && dir != L_SCAN_HORIZONTAL)
2754
0
        return (BOX *)ERROR_PTR("invalid scan direction", __func__, NULL);
2755
0
    if (select != L_GEOMETRIC_UNION && select != L_GEOMETRIC_INTERSECTION &&
2756
0
        select != L_LARGEST_AREA && select != L_SMALLEST_AREA)
2757
0
        return (BOX *)ERROR_PTR("invalid select", __func__, NULL);
2758
2759
        /* Extract the c.c. if necessary */
2760
0
    x = y = 0;
2761
0
    if (boxs) {
2762
0
        pix1 = pixClipRectangle(pixs, boxs, NULL);
2763
0
        boxGetGeometry(boxs, &x, &y, NULL, NULL);
2764
0
    } else {
2765
0
        pix1 = pixClone(pixs);
2766
0
    }
2767
2768
        /* All fast scans are horizontal; rotate 90 deg cw if necessary */
2769
0
    if (dir == L_SCAN_VERTICAL)
2770
0
        pix2 = pixRotate90(pix1, 1);
2771
0
    else  /* L_SCAN_HORIZONTAL */
2772
0
        pix2 = pixClone(pix1);
2773
0
    pixGetDimensions(pix2, &w, &h, NULL);
2774
2775
0
    pixadb = (debug) ? pixaCreate(0) : NULL;
2776
0
    pixdb1 = NULL;
2777
0
    if (pixadb) {
2778
0
        lept_mkdir("lept/rect");
2779
0
        pixaAddPix(pixadb, pix1, L_CLONE);
2780
0
        pixdb1 = pixConvertTo32(pix2);
2781
0
    }
2782
0
    pixDestroy(&pix1);
2783
2784
        /* Scanning down, find the first scanline with a long enough run.
2785
         * That run goes from (xfirst, yfirst) to (xlast, yfirst).  */
2786
0
    found = FALSE;
2787
0
    for (i = 0; i < h; i++) {
2788
0
        pixFindMaxHorizontalRunOnLine(pix2, i, &xstart, &length);
2789
0
        if (length >= (l_int32)(fract * w + 0.5)) {
2790
0
            yfirst = i;
2791
0
            xfirst = xstart;
2792
0
            xlast = xfirst + length - 1;
2793
0
            found = TRUE;
2794
0
            break;
2795
0
        }
2796
0
    }
2797
0
    if (!found) {
2798
0
        L_WARNING("no run of sufficient size was found\n", __func__);
2799
0
        pixDestroy(&pix2);
2800
0
        pixDestroy(&pixdb1);
2801
0
        pixaDestroy(&pixadb);
2802
0
        return NULL;
2803
0
    }
2804
2805
         /* Continue down until the condition fails */
2806
0
    w1 = xlast - xfirst + 1;
2807
0
    h1 = h - yfirst;  /* init */
2808
0
    ylast = h - 1;  /* init */
2809
0
    for (i = yfirst + 1; i < h; i++) {
2810
0
        pixFindMaxHorizontalRunOnLine(pix2, i, &xstart, &length);
2811
0
        if (xstart > xfirst || (xstart + length - 1 < xlast) ||
2812
0
            i == h - 1) {
2813
0
            ylast = i - 1;
2814
0
            h1 = ylast - yfirst + 1;
2815
0
            break;
2816
0
        }
2817
0
    }
2818
0
    box1 = boxCreate(xfirst, yfirst, w1, h1);
2819
2820
        /* Scanning up, find the first scanline with a long enough run.
2821
         * That run goes from (xfirst, ylast) to (xlast, ylast).  */
2822
0
    for (i = h - 1; i >= 0; i--) {
2823
0
        pixFindMaxHorizontalRunOnLine(pix2, i, &xstart, &length);
2824
0
        if (length >= (l_int32)(fract * w + 0.5)) {
2825
0
            ylast = i;
2826
0
            xfirst = xstart;
2827
0
            xlast = xfirst + length - 1;
2828
0
            break;
2829
0
        }
2830
0
    }
2831
2832
         /* Continue up until the condition fails */
2833
0
    w2 = xlast - xfirst + 1;
2834
0
    h2 = ylast + 1;  /* initialize */
2835
0
    for (i = ylast - 1; i >= 0; i--) {
2836
0
        pixFindMaxHorizontalRunOnLine(pix2, i, &xstart, &length);
2837
0
        if (xstart > xfirst || (xstart + length - 1 < xlast) ||
2838
0
            i == 0) {
2839
0
            yfirst = i + 1;
2840
0
            h2 = ylast - yfirst + 1;
2841
0
            break;
2842
0
        }
2843
0
    }
2844
0
    box2 = boxCreate(xfirst, yfirst, w2, h2);
2845
0
    pixDestroy(&pix2);
2846
2847
0
    if (pixadb) {
2848
0
        pixRenderBoxArb(pixdb1, box1, 2, 255, 0, 0);
2849
0
        pixRenderBoxArb(pixdb1, box2, 2, 0, 255, 0);
2850
0
        pixaAddPix(pixadb, pixdb1, L_INSERT);
2851
0
    }
2852
2853
        /* Select the final result from the two boxes */
2854
0
    if (select == L_GEOMETRIC_UNION)
2855
0
        box3 = boxBoundingRegion(box1, box2);
2856
0
    else if (select == L_GEOMETRIC_INTERSECTION)
2857
0
        box3 = boxOverlapRegion(box1, box2);
2858
0
    else if (select == L_LARGEST_AREA)
2859
0
        box3 = (w1 * h1 >= w2 * h2) ? boxCopy(box1) : boxCopy(box2);
2860
0
    else  /* select == L_SMALLEST_AREA) */
2861
0
        box3 = (w1 * h1 <= w2 * h2) ? boxCopy(box1) : boxCopy(box2);
2862
0
    boxDestroy(&box1);
2863
0
    boxDestroy(&box2);
2864
2865
        /* Rotate the box 90 degrees ccw if necessary */
2866
0
    box4 = NULL;
2867
0
    if (box3) {
2868
0
        if (dir == L_SCAN_VERTICAL)
2869
0
            box4 = boxRotateOrth(box3, w, h, 3);
2870
0
        else
2871
0
            box4 = boxCopy(box3);
2872
0
    }
2873
2874
        /* Transform back to global coordinates if %boxs exists */
2875
0
    box5 = (box4) ? boxTransform(box4, x, y, 1.0, 1.0) : NULL;
2876
0
    boxDestroy(&box3);
2877
0
    boxDestroy(&box4);
2878
2879
        /* Debug output */
2880
0
    if (pixadb) {
2881
0
        pixdb1 = pixConvertTo8(pixs, 0);
2882
0
        pixAddConstantGray(pixdb1, 190);
2883
0
        pixdb2 = pixConvertTo32(pixdb1);
2884
0
        if (box5) pixRenderBoxArb(pixdb2, box5, 4, 0, 0, 255);
2885
0
        pixaAddPix(pixadb, pixdb2, L_INSERT);
2886
0
        res = pixGetXRes(pixs);
2887
0
        L_INFO("Writing debug files to /tmp/lept/rect/\n", __func__);
2888
0
        pixaConvertToPdf(pixadb, res, 1.0, L_DEFAULT_ENCODE, 75, NULL,
2889
0
                        "/tmp/lept/rect/fitrect.pdf");
2890
0
        pix1 = pixaDisplayTiledAndScaled(pixadb, 32, 800, 1, 0, 40, 2);
2891
0
        pixWrite("/tmp/lept/rect/fitrect.png", pix1, IFF_PNG);
2892
0
        pixDestroy(&pix1);
2893
0
        pixDestroy(&pixdb1);
2894
0
        pixaDestroy(&pixadb);
2895
0
    }
2896
2897
0
    return box5;
2898
0
}
2899
2900
/*------------------------------------------------------------------*
2901
 *                    Automatic photoinvert for OCR                 *
2902
 *------------------------------------------------------------------*/
2903
/*!
2904
 * \brief   pixAutoPhotoinvert()
2905
 *
2906
 * \param[in]    pixs       any depth, colormap ok
2907
 * \param[in]    thresh     binarization threshold; use 0 for default
2908
 * \param[out]   ppixm      [optional] image regions to be inverted
2909
 * \param[out]   pixadb     [optional] debug; input NULL to skip
2910
 * \return  pixd   1 bpp image to be sent to OCR, or NULL on error
2911
 *
2912
 * <pre>
2913
 * Notes:
2914
 *      (1) A 1 bpp image is returned, where pixels in image regions are
2915
 *          photo-inverted.
2916
 *      (2) If there is light text with a dark background, this will
2917
 *          identify the region and photoinvert the pixels there if
2918
 *          there are at least 60% fg pixels in the region.
2919
 *      (3) For debug output, input a (typically empty) %pixadb.
2920
 * </pre>
2921
 */
2922
PIX *
2923
pixAutoPhotoinvert(PIX       *pixs,
2924
                   l_int32    thresh,
2925
                   PIX      **ppixm,
2926
                   PIXA      *pixadb)
2927
0
{
2928
0
l_int32    i, n, empty, x, y, w, h;
2929
0
l_float32  fgfract;
2930
0
BOX       *box1;
2931
0
BOXA      *boxa1;
2932
0
PIX       *pix1, *pix2, *pix3, *pix4, *pix5;
2933
2934
0
    if (ppixm) *ppixm = NULL;
2935
0
    if (!pixs)
2936
0
        return (PIX *)ERROR_PTR("pixs not defined", __func__, NULL);
2937
0
    if (thresh == 0) thresh = 128;
2938
2939
0
    if ((pix1 = pixConvertTo1(pixs, thresh)) == NULL)
2940
0
        return (PIX *)ERROR_PTR("pix1 not made", __func__, NULL);
2941
0
    if (pixadb) pixaAddPix(pixadb, pix1, L_COPY);
2942
2943
        /* Identify regions for photo-inversion:
2944
         * (1) Start with the halftone mask.
2945
         * (2) Eliminate ordinary text and halftones in the mask.
2946
         * (3) Some regions of inverted text may have been removed in
2947
         *     steps (1) and (2).  Conditionally fill holes in the mask,
2948
         *     but do not fill out to the bounding rect. */
2949
0
    pix2 = pixGenerateHalftoneMask(pix1, NULL, NULL, pixadb);
2950
0
    pix3 = pixMorphSequence(pix2, "o15.15 + c25.25", 0);  /* remove noise */
2951
0
    pix4 = pixFillHolesToBoundingRect(pix3, 1, 0.5, 1.0);
2952
0
    if (pixadb) {
2953
0
        pixaAddPix(pixadb, pix2, L_CLONE);
2954
0
        pixaAddPix(pixadb, pix3, L_CLONE);
2955
0
        pixaAddPix(pixadb, pix4, L_COPY);
2956
0
    }
2957
0
    pixDestroy(&pix2);
2958
0
    pixDestroy(&pix3);
2959
0
    pixZero(pix4, &empty);
2960
0
    if (empty) {
2961
0
        pixDestroy(&pix4);
2962
0
        return pix1;
2963
0
    }
2964
2965
        /* Examine each component and validate the inversion.
2966
         * Require at least 60% of pixels under each component to be FG. */
2967
0
    boxa1 = pixConnCompBB(pix4, 8);
2968
0
    n = boxaGetCount(boxa1);
2969
0
    for (i = 0; i < n; i++) {
2970
0
        box1 = boxaGetBox(boxa1, i, L_COPY);
2971
0
        pix5 = pixClipRectangle(pix1, box1, NULL);
2972
0
        pixForegroundFraction(pix5, &fgfract);
2973
0
        if (pixadb) lept_stderr("fg fraction: %5.3f\n", fgfract);
2974
0
        boxGetGeometry(box1, &x, &y, &w, &h);
2975
0
        if (fgfract < 0.6)  /* erase from the mask */
2976
0
            pixRasterop(pix4, x, y, w, h, PIX_CLR, NULL, 0, 0);
2977
0
        pixDestroy(&pix5);
2978
0
        boxDestroy(&box1);
2979
0
    }
2980
0
    boxaDestroy(&boxa1);
2981
0
    pixZero(pix4, &empty);
2982
0
    if (empty) {
2983
0
        pixDestroy(&pix4);
2984
0
        return pix1;
2985
0
    }
2986
2987
        /* Combine pixels of the photo-inverted pix with the binarized input */
2988
0
    pix5 = pixInvert(NULL, pix1);
2989
0
    pixCombineMasked(pix1, pix5, pix4);
2990
2991
0
    if (pixadb) {
2992
0
        pixaAddPix(pixadb, pix5, L_CLONE);
2993
0
        pixaAddPix(pixadb, pix1, L_COPY);
2994
0
    }
2995
0
    pixDestroy(&pix5);
2996
0
    if (ppixm)
2997
0
        *ppixm = pix4;
2998
0
    else
2999
0
        pixDestroy(&pix4);
3000
0
    return pix1;
3001
0
}