Coverage Report

Created: 2024-06-18 06:05

/src/leptonica/src/compare.c
Line
Count
Source (jump to first uncovered line)
1
/*====================================================================*
2
 -  Copyright (C) 2001 Leptonica.  All rights reserved.
3
 -
4
 -  Redistribution and use in source and binary forms, with or without
5
 -  modification, are permitted provided that the following conditions
6
 -  are met:
7
 -  1. Redistributions of source code must retain the above copyright
8
 -     notice, this list of conditions and the following disclaimer.
9
 -  2. Redistributions in binary form must reproduce the above
10
 -     copyright notice, this list of conditions and the following
11
 -     disclaimer in the documentation and/or other materials
12
 -     provided with the distribution.
13
 -
14
 -  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15
 -  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16
 -  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17
 -  A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL ANY
18
 -  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19
 -  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20
 -  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21
 -  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22
 -  OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23
 -  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24
 -  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25
 *====================================================================*/
26
27
/*!
28
 * \file compare.c
29
 * <pre>
30
 *
31
 *      Test for pix equality
32
 *           l_int32     pixEqual()
33
 *           l_int32     pixEqualWithAlpha()
34
 *           l_int32     pixEqualWithCmap()
35
 *           l_int32     cmapEqual()
36
 *           l_int32     pixUsesCmapColor()
37
 *
38
 *      Binary correlation
39
 *           l_int32     pixCorrelationBinary()
40
 *
41
 *      Difference of two images of same size
42
 *           l_int32     pixDisplayDiff()
43
 *           l_int32     pixDisplayDiffBinary()
44
 *           l_int32     pixCompareBinary()
45
 *           l_int32     pixCompareGrayOrRGB()
46
 *           l_int32     pixCompareGray()
47
 *           l_int32     pixCompareRGB()
48
 *           l_int32     pixCompareTiled()
49
 *
50
 *      Other measures of the difference of two images of the same size
51
 *           NUMA       *pixCompareRankDifference()
52
 *           l_int32     pixTestForSimilarity()
53
 *           l_int32     pixGetDifferenceStats()
54
 *           NUMA       *pixGetDifferenceHistogram()
55
 *           l_int32     pixGetPerceptualDiff()
56
 *           l_int32     pixGetPSNR()
57
 *
58
 *      Comparison of photo regions by histogram
59
 *           l_int32     pixaComparePhotoRegionsByHisto()  -- top-level
60
 *           l_int32     pixComparePhotoRegionsByHisto()  -- top-level for 2
61
 *           l_int32     pixGenPhotoHistos()
62
 *           PIX        *pixPadToCenterCentroid()
63
 *           l_int32     pixCentroid8()
64
 *           l_int32     pixDecideIfPhotoImage()
65
 *       static l_int32  findHistoGridDimensions()
66
 *           l_int32     compareTilesByHisto()
67
 *
68
 *           l_int32     pixCompareGrayByHisto()  -- top-level for 2
69
 *       static l_int32  pixCompareTilesByHisto()
70
 *           l_int32     pixCropAlignedToCentroid()
71
 *
72
 *           l_uint8    *l_compressGrayHistograms()
73
 *           NUMAA      *l_uncompressGrayHistograms()
74
 *
75
 *      Translated images at the same resolution
76
 *           l_int32     pixCompareWithTranslation()
77
 *           l_int32     pixBestCorrelation()
78
 *
79
 *  For comparing images using tiled histograms, essentially all the
80
 *  computation goes into deciding if a region of an image is a photo,
81
 *  whether that photo region is amenable to similarity measurements
82
 *  using histograms, and finally the calculation of the gray histograms
83
 *  for each of the tiled regions.  The actual comparison is essentially
84
 *  instantaneous.  Therefore, with a large number of images to compare
85
 *  with each other, it is important to first calculate the histograms
86
 *  for each image.  Then the comparisons, which go as the square of the
87
 *  number of images, actually takes no time.
88
 *
89
 *  A high level function that takes a pixa of images and does
90
 *  all comparisons, pixaComparePhotosByHisto(), uses this split
91
 *  approach.  It pads the images so that the centroid is in the center,
92
 *  which will allow the tiles to be better aligned.
93
 *
94
 *  For testing purposes, two functions are given that do all the work
95
 *  to compare just two photo regions:
96
 *    *  pixComparePhotoRegionsByHisto() uses the split approach, qualifying
97
 *       the images first with pixGenPhotoHistos(), and then comparing
98
 *       with compareTilesByHisto().
99
 *    *  pixCompareGrayByHisto() aligns the two images by centroid
100
 *       and calls pixCompareTilesByHisto() to generate the histograms
101
 *       and do the comparison.
102
 *
103
 * </pre>
104
 */
105
106
#ifdef HAVE_CONFIG_H
107
#include <config_auto.h>
108
#endif  /* HAVE_CONFIG_H */
109
110
#include <string.h>
111
#include <math.h>
112
#include "allheaders.h"
113
114
    /* Small enough to consider equal to 0.0, for plot output */
115
static const l_float32  TINY = 0.00001f;
116
117
static l_ok findHistoGridDimensions(l_int32 n, l_int32 w, l_int32 h,
118
                                    l_int32 *pnx, l_int32 *pny, l_int32 debug);
119
static l_ok pixCompareTilesByHisto(PIX *pix1, PIX *pix2, l_int32 maxgray,
120
                                   l_int32 factor, l_int32 n,
121
                                   l_float32 *pscore, PIXA *pixadebug);
122
123
/*------------------------------------------------------------------*
124
 *                        Test for pix equality                     *
125
 *------------------------------------------------------------------*/
126
/*!
127
 * \brief   pixEqual()
128
 *
129
 * \param[in]    pix1
130
 * \param[in]    pix2
131
 * \param[out]   psame  1 if same; 0 if different
132
 * \return  0 if OK; 1 on error
133
 *
134
 * <pre>
135
 * Notes:
136
 *      (1) Equality is defined as having the same pixel values for
137
 *          each respective image pixel.
138
 *      (2) This works on two pix of any depth.  If one or both pix
139
 *          have a colormap, the depths can be different and the
140
 *          two pix can still be equal.
141
 *      (3) This ignores the alpha component for 32 bpp images.
142
 *      (4) If both pix have colormaps and the depths are equal,
143
 *          use the pixEqualWithCmap() function, which does a fast
144
 *          comparison if the colormaps are identical and a relatively
145
 *          slow comparison otherwise.
146
 *      (5) In all other cases, any existing colormaps must first be
147
 *          removed before doing pixel comparison.  After the colormaps
148
 *          are removed, the resulting two images must have the same depth.
149
 *          The "lowest common denominator" is RGB, but this is only
150
 *          chosen when necessary, or when both have colormaps but
151
 *          different depths.
152
 *      (6) For images without colormaps that are not 32 bpp, all bits
153
 *          in the image part of the data array must be identical.
154
 * </pre>
155
 */
156
l_ok
157
pixEqual(PIX      *pix1,
158
         PIX      *pix2,
159
         l_int32  *psame)
160
0
{
161
0
    return pixEqualWithAlpha(pix1, pix2, 0, psame);
162
0
}
163
164
165
/*!
166
 * \brief   pixEqualWithAlpha()
167
 *
168
 * \param[in]    pix1
169
 * \param[in]    pix2
170
 * \param[in]    use_alpha   1 to compare alpha in RGBA; 0 to ignore
171
 * \param[out]   psame       1 if same; 0 if different
172
 * \return  0 if OK; 1 on error
173
 *
174
 * <pre>
175
 * Notes:
176
 *      (1) See notes in pixEqual().
177
 *      (2) This is more general than pixEqual(), in that for 32 bpp
178
 *          RGBA images, where spp = 4, you can optionally include
179
 *          the alpha component in the comparison.
180
 * </pre>
181
 */
182
l_ok
183
pixEqualWithAlpha(PIX      *pix1,
184
                  PIX      *pix2,
185
                  l_int32   use_alpha,
186
                  l_int32  *psame)
187
1.41k
{
188
1.41k
l_int32    w1, h1, d1, w2, h2, d2, wpl1, wpl2;
189
1.41k
l_int32    spp1, spp2, i, j, color, mismatch, opaque;
190
1.41k
l_int32    fullwords, linebits, endbits;
191
1.41k
l_uint32   endmask, wordmask;
192
1.41k
l_uint32  *data1, *data2, *line1, *line2;
193
1.41k
PIX       *pixs1, *pixs2, *pixt1, *pixt2, *pixalpha;
194
1.41k
PIXCMAP   *cmap1, *cmap2;
195
196
1.41k
    if (!psame)
197
0
        return ERROR_INT("psame not defined", __func__, 1);
198
1.41k
    *psame = 0;  /* init to not equal */
199
1.41k
    if (!pix1 || !pix2)
200
0
        return ERROR_INT("pix1 and pix2 not both defined", __func__, 1);
201
1.41k
    pixGetDimensions(pix1, &w1, &h1, &d1);
202
1.41k
    pixGetDimensions(pix2, &w2, &h2, &d2);
203
1.41k
    if (w1 != w2 || h1 != h2) {
204
0
        L_INFO("pix sizes differ\n", __func__);
205
0
        return 0;
206
0
    }
207
208
        /* Suppose the use_alpha flag is true.
209
         * If only one of two 32 bpp images has spp == 4, we call that
210
         * a "mismatch" of the alpha component.  In the case of a mismatch,
211
         * if the 4 bpp pix does not have all alpha components opaque (255),
212
         * the images are not-equal.  However if they are all opaque,
213
         * this image is equivalent to spp == 3, so we allow the
214
         * comparison to go forward, testing only for the RGB equality. */
215
1.41k
    spp1 = pixGetSpp(pix1);
216
1.41k
    spp2 = pixGetSpp(pix2);
217
1.41k
    mismatch = 0;
218
1.41k
    if (use_alpha && d1 == 32 && d2 == 32) {
219
51
        mismatch = ((spp1 == 4 && spp2 != 4) || (spp1 != 4 && spp2 == 4));
220
51
        if (mismatch) {
221
0
            pixalpha = (spp1 == 4) ? pix1 : pix2;
222
0
            pixAlphaIsOpaque(pixalpha, &opaque);
223
0
            if (!opaque) {
224
0
                L_INFO("just one pix has a non-opaque alpha layer\n", __func__);
225
0
                return 0;
226
0
            }
227
0
        }
228
51
    }
229
230
1.41k
    cmap1 = pixGetColormap(pix1);
231
1.41k
    cmap2 = pixGetColormap(pix2);
232
1.41k
    if (!cmap1 && !cmap2 && (d1 != d2) && (d1 == 32 || d2 == 32)) {
233
0
        L_INFO("no colormaps, pix depths unequal, and one of them is RGB\n",
234
0
               __func__);
235
0
        return 0;
236
0
    }
237
238
1.41k
    if (cmap1 && cmap2 && (d1 == d2))   /* use special function */
239
327
        return pixEqualWithCmap(pix1, pix2, psame);
240
241
        /* Must remove colormaps if they exist, and in the process
242
         * end up with the resulting images having the same depth. */
243
1.08k
    if (cmap1 && !cmap2) {
244
0
        pixUsesCmapColor(pix1, &color);
245
0
        if (color && d2 <= 8)  /* can't be equal */
246
0
            return 0;
247
0
        if (d2 < 8)
248
0
            pixs2 = pixConvertTo8(pix2, FALSE);
249
0
        else
250
0
            pixs2 = pixClone(pix2);
251
0
        if (d2 <= 8)
252
0
            pixs1 = pixRemoveColormap(pix1, REMOVE_CMAP_TO_GRAYSCALE);
253
0
        else
254
0
            pixs1 = pixRemoveColormap(pix1, REMOVE_CMAP_TO_FULL_COLOR);
255
1.08k
    } else if (!cmap1 && cmap2) {
256
0
        pixUsesCmapColor(pix2, &color);
257
0
        if (color && d1 <= 8)  /* can't be equal */
258
0
            return 0;
259
0
        if (d1 < 8)
260
0
            pixs1 = pixConvertTo8(pix1, FALSE);
261
0
        else
262
0
            pixs1 = pixClone(pix1);
263
0
        if (d1 <= 8)
264
0
            pixs2 = pixRemoveColormap(pix2, REMOVE_CMAP_TO_GRAYSCALE);
265
0
        else
266
0
            pixs2 = pixRemoveColormap(pix2, REMOVE_CMAP_TO_FULL_COLOR);
267
1.08k
    } else if (cmap1 && cmap2) {  /* depths not equal; use rgb */
268
0
        pixs1 = pixRemoveColormap(pix1, REMOVE_CMAP_TO_FULL_COLOR);
269
0
        pixs2 = pixRemoveColormap(pix2, REMOVE_CMAP_TO_FULL_COLOR);
270
1.08k
    } else {  /* no colormaps */
271
1.08k
        pixs1 = pixClone(pix1);
272
1.08k
        pixs2 = pixClone(pix2);
273
1.08k
    }
274
275
        /* OK, we have no colormaps, but the depths may still be different */
276
1.08k
    d1 = pixGetDepth(pixs1);
277
1.08k
    d2 = pixGetDepth(pixs2);
278
1.08k
    if (d1 != d2) {
279
0
        if (d1 == 16 || d2 == 16) {
280
0
            L_INFO("one pix is 16 bpp\n", __func__);
281
0
            pixDestroy(&pixs1);
282
0
            pixDestroy(&pixs2);
283
0
            return 0;
284
0
        }
285
0
        pixt1 = pixConvertLossless(pixs1, 8);
286
0
        pixt2 = pixConvertLossless(pixs2, 8);
287
0
        if (!pixt1 || !pixt2) {
288
0
            L_INFO("failure to convert to 8 bpp\n", __func__);
289
0
            pixDestroy(&pixs1);
290
0
            pixDestroy(&pixs2);
291
0
            pixDestroy(&pixt1);
292
0
            pixDestroy(&pixt2);
293
0
            return 0;
294
0
        }
295
1.08k
    } else {
296
1.08k
        pixt1 = pixClone(pixs1);
297
1.08k
        pixt2 = pixClone(pixs2);
298
1.08k
    }
299
1.08k
    pixDestroy(&pixs1);
300
1.08k
    pixDestroy(&pixs2);
301
302
        /* No colormaps, equal depths; do pixel comparisons */
303
1.08k
    d1 = pixGetDepth(pixt1);
304
1.08k
    d2 = pixGetDepth(pixt2);
305
1.08k
    wpl1 = pixGetWpl(pixt1);
306
1.08k
    wpl2 = pixGetWpl(pixt2);
307
1.08k
    data1 = pixGetData(pixt1);
308
1.08k
    data2 = pixGetData(pixt2);
309
310
1.08k
    if (d1 == 32) {  /* test either RGB or RGBA pixels */
311
51
        if (use_alpha && !mismatch)
312
51
            wordmask = (spp1 == 3) ? 0xffffff00 : 0xffffffff;
313
0
        else
314
0
            wordmask = 0xffffff00;
315
3.48k
        for (i = 0; i < h1; i++) {
316
3.43k
            line1 = data1 + wpl1 * i;
317
3.43k
            line2 = data2 + wpl2 * i;
318
9.47k
            for (j = 0; j < wpl1; j++) {
319
6.03k
                if ((*line1 ^ *line2) & wordmask) {
320
0
                    pixDestroy(&pixt1);
321
0
                    pixDestroy(&pixt2);
322
0
                    return 0;
323
0
                }
324
6.03k
                line1++;
325
6.03k
                line2++;
326
6.03k
            }
327
3.43k
        }
328
1.03k
    } else {  /* all bits count */
329
1.03k
        linebits = d1 * w1;
330
1.03k
        fullwords = linebits / 32;
331
1.03k
        endbits = linebits & 31;
332
1.03k
        endmask = (endbits == 0) ? 0 : (0xffffffff << (32 - endbits));
333
90.8k
        for (i = 0; i < h1; i++) {
334
89.8k
            line1 = data1 + wpl1 * i;
335
89.8k
            line2 = data2 + wpl2 * i;
336
610k
            for (j = 0; j < fullwords; j++) {
337
520k
                if (*line1 ^ *line2) {
338
0
                    pixDestroy(&pixt1);
339
0
                    pixDestroy(&pixt2);
340
0
                    return 0;
341
0
                }
342
520k
                line1++;
343
520k
                line2++;
344
520k
            }
345
89.8k
            if (endbits) {
346
73.6k
                if ((*line1 ^ *line2) & endmask) {
347
0
                    pixDestroy(&pixt1);
348
0
                    pixDestroy(&pixt2);
349
0
                    return 0;
350
0
                }
351
73.6k
            }
352
89.8k
        }
353
1.03k
    }
354
355
1.08k
    pixDestroy(&pixt1);
356
1.08k
    pixDestroy(&pixt2);
357
1.08k
    *psame = 1;
358
1.08k
    return 0;
359
1.08k
}
360
361
362
/*!
363
 * \brief   pixEqualWithCmap()
364
 *
365
 * \param[in]    pix1
366
 * \param[in]    pix2
367
 * \param[out]   psame
368
 * \return  0 if OK, 1 on error
369
 *
370
 * <pre>
371
 * Notes:
372
 *      (1) This returns same = TRUE if the images have identical content.
373
 *      (2) Both pix must have a colormap, and be of equal size and depth.
374
 *          If these conditions are not satisfied, it is not an error;
375
 *          the returned result is same = FALSE.
376
 *      (3) We then check whether the colormaps are the same; if so,
377
 *          the comparison proceeds 32 bits at a time.
378
 *      (4) If the colormaps are different, the comparison is done by
379
 *          slow brute force.
380
 * </pre>
381
 */
382
l_ok
383
pixEqualWithCmap(PIX      *pix1,
384
                 PIX      *pix2,
385
                 l_int32  *psame)
386
327
{
387
327
l_int32    d, w, h, wpl1, wpl2, i, j, linebits, fullwords, endbits;
388
327
l_int32    rval1, rval2, gval1, gval2, bval1, bval2, samecmaps;
389
327
l_uint32   endmask, val1, val2;
390
327
l_uint32  *data1, *data2, *line1, *line2;
391
327
PIXCMAP   *cmap1, *cmap2;
392
393
327
    if (!psame)
394
0
        return ERROR_INT("&same not defined", __func__, 1);
395
327
    *psame = 0;
396
327
    if (!pix1)
397
0
        return ERROR_INT("pix1 not defined", __func__, 1);
398
327
    if (!pix2)
399
0
        return ERROR_INT("pix2 not defined", __func__, 1);
400
401
327
    if (pixSizesEqual(pix1, pix2) == 0)
402
0
        return 0;
403
327
    cmap1 = pixGetColormap(pix1);
404
327
    cmap2 = pixGetColormap(pix2);
405
327
    if (!cmap1 || !cmap2) {
406
0
        L_INFO("both images don't have colormap\n", __func__);
407
0
        return 0;
408
0
    }
409
327
    pixGetDimensions(pix1, &w, &h, &d);
410
327
    if (d != 1 && d != 2 && d != 4 && d != 8) {
411
0
        L_INFO("pix depth not in {1, 2, 4, 8}\n", __func__);
412
0
        return 0;
413
0
    }
414
415
327
    cmapEqual(cmap1, cmap2, 3, &samecmaps);
416
327
    if (samecmaps == TRUE) {  /* colormaps are identical; compare by words */
417
327
        linebits = d * w;
418
327
        wpl1 = pixGetWpl(pix1);
419
327
        wpl2 = pixGetWpl(pix2);
420
327
        data1 = pixGetData(pix1);
421
327
        data2 = pixGetData(pix2);
422
327
        fullwords = linebits / 32;
423
327
        endbits = linebits & 31;
424
327
        endmask = (endbits == 0) ? 0 : (0xffffffff << (32 - endbits));
425
7.37k
        for (i = 0; i < h; i++) {
426
7.04k
            line1 = data1 + wpl1 * i;
427
7.04k
            line2 = data2 + wpl2 * i;
428
23.3k
            for (j = 0; j < fullwords; j++) {
429
16.3k
                if (*line1 ^ *line2)
430
0
                    return 0;
431
16.3k
                line1++;
432
16.3k
                line2++;
433
16.3k
            }
434
7.04k
            if (endbits) {
435
3.72k
                if ((*line1 ^ *line2) & endmask)
436
0
                    return 0;
437
3.72k
            }
438
7.04k
        }
439
327
        *psame = 1;
440
327
        return 0;
441
327
    }
442
443
        /* Colormaps aren't identical; compare pixel by pixel */
444
0
    for (i = 0; i < h; i++) {
445
0
        for (j = 0; j < w; j++) {
446
0
            pixGetPixel(pix1, j, i, &val1);
447
0
            pixGetPixel(pix2, j, i, &val2);
448
0
            pixcmapGetColor(cmap1, val1, &rval1, &gval1, &bval1);
449
0
            pixcmapGetColor(cmap2, val2, &rval2, &gval2, &bval2);
450
0
            if (rval1 != rval2 || gval1 != gval2 || bval1 != bval2)
451
0
                return 0;
452
0
        }
453
0
    }
454
455
0
    *psame = 1;
456
0
    return 0;
457
0
}
458
459
460
/*!
461
 * \brief   cmapEqual()
462
 *
463
 * \param[in]    cmap1
464
 * \param[in]    cmap2
465
 * \param[in]    ncomps  3 for RGB, 4 for RGBA
466
 * \param[out]   psame
467
 * \return  0 if OK, 1 on error
468
 *
469
 * <pre>
470
 * Notes:
471
 *      (1) This returns %same = TRUE if the colormaps have identical entries.
472
 *      (2) If %ncomps == 4, the alpha components of the colormaps are also
473
 *          compared.
474
 * </pre>
475
 */
476
l_ok
477
cmapEqual(PIXCMAP  *cmap1,
478
          PIXCMAP  *cmap2,
479
          l_int32   ncomps,
480
          l_int32  *psame)
481
327
{
482
327
l_int32  n1, n2, i, rval1, rval2, gval1, gval2, bval1, bval2, aval1, aval2;
483
484
327
    if (!psame)
485
0
        return ERROR_INT("&same not defined", __func__, 1);
486
327
    *psame = FALSE;
487
327
    if (!cmap1)
488
0
        return ERROR_INT("cmap1 not defined", __func__, 1);
489
327
    if (!cmap2)
490
0
        return ERROR_INT("cmap2 not defined", __func__, 1);
491
327
    if (ncomps != 3 && ncomps != 4)
492
0
        return ERROR_INT("ncomps not 3 or 4", __func__, 1);
493
494
327
    n1 = pixcmapGetCount(cmap1);
495
327
    n2 = pixcmapGetCount(cmap2);
496
327
    if (n1 != n2) {
497
0
        L_INFO("colormap sizes are different\n", __func__);
498
0
        return 0;
499
0
    }
500
501
3.97k
    for (i = 0; i < n1; i++) {
502
3.65k
        pixcmapGetRGBA(cmap1, i, &rval1, &gval1, &bval1, &aval1);
503
3.65k
        pixcmapGetRGBA(cmap2, i, &rval2, &gval2, &bval2, &aval2);
504
3.65k
        if (rval1 != rval2 || gval1 != gval2 || bval1 != bval2)
505
0
            return 0;
506
3.65k
        if (ncomps == 4 && aval1 != aval2)
507
0
            return 0;
508
3.65k
    }
509
327
    *psame = TRUE;
510
327
    return 0;
511
327
}
512
513
514
/*!
515
 * \brief   pixUsesCmapColor()
516
 *
517
 * \param[in]    pixs     any depth, colormap
518
 * \param[out]   pcolor   TRUE if color found
519
 * \return  0 if OK, 1 on error
520
 *
521
 * <pre>
522
 * Notes:
523
 *      (1) This returns color = TRUE if three things are obtained:
524
 *          (a) the pix has a colormap
525
 *          (b) the colormap has at least one color entry
526
 *          (c) a color entry is actually used
527
 *      (2) It is used in pixEqual() for comparing two images, in a
528
 *          situation where it is required to know if the colormap
529
 *          has color entries that are actually used in the image.
530
 * </pre>
531
 */
532
l_ok
533
pixUsesCmapColor(PIX      *pixs,
534
                 l_int32  *pcolor)
535
0
{
536
0
l_int32   n, i, rval, gval, bval, numpix;
537
0
NUMA     *na;
538
0
PIXCMAP  *cmap;
539
540
0
    if (!pcolor)
541
0
        return ERROR_INT("&color not defined", __func__, 1);
542
0
    *pcolor = 0;
543
0
    if (!pixs)
544
0
        return ERROR_INT("pixs not defined", __func__, 1);
545
546
0
    if ((cmap = pixGetColormap(pixs)) == NULL)
547
0
        return 0;
548
549
0
    pixcmapHasColor(cmap, pcolor);
550
0
    if (*pcolor == 0)  /* no color */
551
0
        return 0;
552
553
        /* The cmap has color entries.  Are they used? */
554
0
    na = pixGetGrayHistogram(pixs, 1);
555
0
    n = pixcmapGetCount(cmap);
556
0
    for (i = 0; i < n; i++) {
557
0
        pixcmapGetColor(cmap, i, &rval, &gval, &bval);
558
0
        numaGetIValue(na, i, &numpix);
559
0
        if ((rval != gval || rval != bval) && numpix) {  /* color found! */
560
0
            *pcolor = 1;
561
0
            break;
562
0
        }
563
0
    }
564
0
    numaDestroy(&na);
565
566
0
    return 0;
567
0
}
568
569
570
/*------------------------------------------------------------------*
571
 *                          Binary correlation                      *
572
 *------------------------------------------------------------------*/
573
/*!
574
 * \brief   pixCorrelationBinary()
575
 *
576
 * \param[in]    pix1    1 bpp
577
 * \param[in]    pix2    1 bpp
578
 * \param[out]   pval    correlation
579
 * \return  0 if OK; 1 on error
580
 *
581
 * <pre>
582
 * Notes:
583
 *      (1) The correlation is a number between 0.0 and 1.0,
584
 *          based on foreground similarity:
585
 *                           (|1 AND 2|)**2
586
 *            correlation =  --------------
587
 *                             |1| * |2|
588
 *          where |x| is the count of foreground pixels in image x.
589
 *          If the images are identical, this is 1.0.
590
 *          If they have no fg pixels in common, this is 0.0.
591
 *          If one or both images have no fg pixels, the correlation is 0.0.
592
 *      (2) Typically the two images are of equal size, but this
593
 *          is not enforced.  Instead, the UL corners are aligned.
594
 * </pre>
595
 */
596
l_ok
597
pixCorrelationBinary(PIX        *pix1,
598
                     PIX        *pix2,
599
                     l_float32  *pval)
600
0
{
601
0
l_int32   count1, count2, countn;
602
0
l_int32  *tab8;
603
0
PIX      *pixn;
604
605
0
    if (!pval)
606
0
        return ERROR_INT("&pval not defined", __func__, 1);
607
0
    *pval = 0.0;
608
0
    if (!pix1)
609
0
        return ERROR_INT("pix1 not defined", __func__, 1);
610
0
    if (!pix2)
611
0
        return ERROR_INT("pix2 not defined", __func__, 1);
612
613
0
    tab8 = makePixelSumTab8();
614
0
    pixCountPixels(pix1, &count1, tab8);
615
0
    pixCountPixels(pix2, &count2, tab8);
616
0
    if (count1 == 0 || count2 == 0) {
617
0
        LEPT_FREE(tab8);
618
0
        return 0;
619
0
    }
620
0
    pixn = pixAnd(NULL, pix1, pix2);
621
0
    pixCountPixels(pixn, &countn, tab8);
622
0
    *pval = (l_float32)countn * (l_float32)countn /
623
0
              ((l_float32)count1 * (l_float32)count2);
624
0
    LEPT_FREE(tab8);
625
0
    pixDestroy(&pixn);
626
0
    return 0;
627
0
}
628
629
630
/*------------------------------------------------------------------*
631
 *                   Difference of two images                       *
632
 *------------------------------------------------------------------*/
633
/*!
634
 * \brief   pixDisplayDiff()
635
 *
636
 * \param[in]    pix1       any depth
637
 * \param[in]    pix2       any depth
638
 * \param[in]    showall    1 to display input images; 0 to only display result
639
 * \param[in]    mindiff    min difference to identify pixel
640
 * \param[in]    diffcolor  color of pixel indicating difference >= mindiff
641
 * \return  pixd  32 bpp rgb, or NULL on error
642
 *
643
 * <pre>
644
 * Notes:
645
 *      (1) This aligns the UL corners of pix1 and pix2, crops to the
646
 *          overlapping pixels, and shows which pixels have a significant
647
 *          difference in value.
648
 *      (2) Requires %pix1 and %pix2 to have the same depth.
649
 *      (3) If rgb, a pixel is identified as different if any component
650
 *          values of the corresponding pixels equals or exceeds %mindiff.
651
 *      (4) %diffcolor is in format 0xrrggbbaa.
652
 *      (5) If %pix1 and %pix2 are 1 bpp, ignores %mindiff and %diffcolor,
653
 *          and uses the result of pixDisplayDiffBinary().
654
 * </pre>
655
 */
656
PIX *
657
pixDisplayDiff(PIX      *pix1,
658
               PIX      *pix2,
659
               l_int32   showall,
660
               l_int32   mindiff,
661
               l_uint32  diffcolor)
662
0
{
663
0
l_int32    i, j, w1, h1, d1, w2, h2, d2, minw, minh, wpl1, wpl2, wpl3;
664
0
l_int32    rval1, gval1, bval1, rval2, gval2, bval2;
665
0
l_uint32   val1, val2;
666
0
l_uint32  *data1, *data2, *data3, *line1, *line2, *line3;
667
0
PIX       *pix3 = NULL, *pix4 = NULL, *pixd;
668
0
PIXA      *pixa1;
669
670
0
    if (!pix1 || !pix2)
671
0
        return (PIX *)ERROR_PTR("pix1, pix2 not both defined", __func__, NULL);
672
0
    pixGetDimensions(pix1, &w1, &h1, &d1);
673
0
    pixGetDimensions(pix2, &w2, &h2, &d2);
674
0
    if (d1 != d2)
675
0
        return (PIX *)ERROR_PTR("unequal depths", __func__, NULL);
676
0
    if (mindiff <= 0)
677
0
        return (PIX *)ERROR_PTR("mindiff must be > 0", __func__, NULL);
678
679
0
    if (d1 == 1) {
680
0
        pix3 = pixDisplayDiffBinary(pix1, pix2);
681
0
        pixd = pixConvertTo32(pix3); 
682
0
        pixDestroy(&pix3);
683
0
    } else {
684
0
        minw = L_MIN(w1, w2);
685
0
        minh = L_MIN(h1, h2);
686
0
        pix3 = pixConvertTo32(pix1);
687
0
        pix4 = pixConvertTo32(pix2);
688
0
        pixd = pixCreate(minw, minh, 32);
689
0
        pixRasterop(pixd, 0, 0, minw, minh, PIX_SRC, pix3, 0, 0);
690
0
        data1 = pixGetData(pix3);
691
0
        wpl1 = pixGetWpl(pix3);
692
0
        data2 = pixGetData(pix4);
693
0
        wpl2 = pixGetWpl(pix4);
694
0
        data3 = pixGetData(pixd);
695
0
        wpl3 = pixGetWpl(pixd);
696
0
        for (i = 0; i < minh; i++) {
697
0
            line1 = data1 + i * wpl1;
698
0
            line2 = data2 + i * wpl2;
699
0
            line3 = data3 + i * wpl3;
700
0
            for (j = 0; j < minw; j++) {
701
0
                val1 = GET_DATA_FOUR_BYTES(line1, j);
702
0
                val2 = GET_DATA_FOUR_BYTES(line2, j);
703
0
                extractRGBValues(val1, &rval1, &gval1, &bval1);
704
0
                extractRGBValues(val2, &rval2, &gval2, &bval2);
705
0
                if (L_ABS(rval1 - rval2) >= mindiff ||
706
0
                    L_ABS(gval1 - gval2) >= mindiff ||
707
0
                    L_ABS(bval1 - bval2) >= mindiff)
708
0
                    SET_DATA_FOUR_BYTES(line3, j, diffcolor);
709
0
            }
710
0
        }
711
0
    }
712
                
713
0
    if (showall) {
714
0
        pixa1 = pixaCreate(3);
715
0
        if (d1 == 1) {
716
0
            pixaAddPix(pixa1, pix1, L_COPY);
717
0
            pixaAddPix(pixa1, pix2, L_COPY);
718
0
        } else {
719
0
            pixaAddPix(pixa1, pix3, L_INSERT);
720
0
            pixaAddPix(pixa1, pix4, L_INSERT);
721
0
        }
722
0
        pixaAddPix(pixa1, pixd, L_INSERT);  /* save diff image */
723
0
        pixd = pixaDisplayTiledInColumns(pixa1, 2, 1.0, 30, 2);  /* all 3 */
724
0
        pixaDestroy(&pixa1);
725
0
    } else if (d1 != 1) {
726
0
        pixDestroy(&pix3);
727
0
        pixDestroy(&pix4);
728
0
    }
729
0
    return pixd;
730
0
}
731
732
733
/*!
734
 * \brief   pixDisplayDiffBinary()
735
 *
736
 * \param[in]    pix1    1 bpp
737
 * \param[in]    pix2    1 bpp
738
 * \return  pixd 4 bpp cmapped, or NULL on error
739
 *
740
 * <pre>
741
 * Notes:
742
 *      (1) This gives a color representation of the difference between
743
 *          pix1 and pix2.  The color difference depends on the order.
744
 *          The pixels in pixd have 4 colors:
745
 *           * unchanged:  black (on), white (off)
746
 *           * on in pix1, off in pix2: red
747
 *           * on in pix2, off in pix1: green
748
 *      (2) This aligns the UL corners of pix1 and pix2, and crops
749
 *          to the overlapping pixels.
750
 * </pre>
751
 */
752
PIX *
753
pixDisplayDiffBinary(PIX  *pix1,
754
                     PIX  *pix2)
755
0
{
756
0
l_int32   w1, h1, d1, w2, h2, d2, minw, minh;
757
0
PIX      *pixt, *pixd;
758
0
PIXCMAP  *cmap;
759
760
0
    if (!pix1 || !pix2)
761
0
        return (PIX *)ERROR_PTR("pix1, pix2 not both defined", __func__, NULL);
762
0
    pixGetDimensions(pix1, &w1, &h1, &d1);
763
0
    pixGetDimensions(pix2, &w2, &h2, &d2);
764
0
    if (d1 != 1 || d2 != 1)
765
0
        return (PIX *)ERROR_PTR("pix1 and pix2 not 1 bpp", __func__, NULL);
766
0
    minw = L_MIN(w1, w2);
767
0
    minh = L_MIN(h1, h2);
768
769
0
    pixd = pixCreate(minw, minh, 4);
770
0
    cmap = pixcmapCreate(4);
771
0
    pixcmapAddColor(cmap, 255, 255, 255);  /* initialized to white */
772
0
    pixcmapAddColor(cmap, 0, 0, 0);
773
0
    pixcmapAddColor(cmap, 255, 0, 0);
774
0
    pixcmapAddColor(cmap, 0, 255, 0);
775
0
    pixSetColormap(pixd, cmap);
776
777
0
    pixt = pixAnd(NULL, pix1, pix2);
778
0
    pixPaintThroughMask(pixd, pixt, 0, 0, 0x0);  /* black */
779
0
    pixSubtract(pixt, pix1, pix2);
780
0
    pixPaintThroughMask(pixd, pixt, 0, 0, 0xff000000);  /* red */
781
0
    pixSubtract(pixt, pix2, pix1);
782
0
    pixPaintThroughMask(pixd, pixt, 0, 0, 0x00ff0000);  /* green */
783
0
    pixDestroy(&pixt);
784
0
    return pixd;
785
0
}
786
787
788
/*!
789
 * \brief   pixCompareBinary()
790
 *
791
 * \param[in]    pix1       1 bpp
792
 * \param[in]    pix2       1 bpp
793
 * \param[in]    comptype   L_COMPARE_XOR, L_COMPARE_SUBTRACT
794
 * \param[out]   pfract     fraction of pixels that are different
795
 * \param[out]   ppixdiff   [optional] pix of difference
796
 * \return  0 if OK; 1 on error
797
 *
798
 * <pre>
799
 * Notes:
800
 *      (1) The two images are aligned at the UL corner, and do not
801
 *          need to be the same size.
802
 *      (2) If using L_COMPARE_SUBTRACT, pix2 is subtracted from pix1.
803
 *      (3) The total number of pixels is determined by pix1.
804
 *      (4) On error, the returned fraction is 1.0.
805
 * </pre>
806
 */
807
l_ok
808
pixCompareBinary(PIX        *pix1,
809
                 PIX        *pix2,
810
                 l_int32     comptype,
811
                 l_float32  *pfract,
812
                 PIX       **ppixdiff)
813
1.41k
{
814
1.41k
l_int32   w, h, count;
815
1.41k
PIX      *pixt;
816
817
1.41k
    if (ppixdiff) *ppixdiff = NULL;
818
1.41k
    if (!pfract)
819
0
        return ERROR_INT("&pfract not defined", __func__, 1);
820
1.41k
    *pfract = 1.0;  /* initialize to max difference */
821
1.41k
    if (!pix1 || pixGetDepth(pix1) != 1)
822
758
        return ERROR_INT("pix1 not defined or not 1 bpp", __func__, 1);
823
652
    if (!pix2 || pixGetDepth(pix2) != 1)
824
0
        return ERROR_INT("pix2 not defined or not 1 bpp", __func__, 1);
825
652
    if (comptype != L_COMPARE_XOR && comptype != L_COMPARE_SUBTRACT)
826
0
        return ERROR_INT("invalid comptype", __func__, 1);
827
828
652
    if (comptype == L_COMPARE_XOR)
829
326
        pixt = pixXor(NULL, pix1, pix2);
830
326
    else  /* comptype == L_COMPARE_SUBTRACT) */
831
326
        pixt = pixSubtract(NULL, pix1, pix2);
832
652
    pixCountPixels(pixt, &count, NULL);
833
652
    pixGetDimensions(pix1, &w, &h, NULL);
834
652
    *pfract = (l_float32)(count) / (l_float32)(w * h);
835
836
652
    if (ppixdiff)
837
652
        *ppixdiff = pixt;
838
0
    else
839
0
        pixDestroy(&pixt);
840
652
    return 0;
841
652
}
842
843
844
/*!
845
 * \brief   pixCompareGrayOrRGB()
846
 *
847
 * \param[in]    pix1      2,4,8,16 bpp gray, 32 bpp rgb, or colormapped
848
 * \param[in]    pix2      2,4,8,16 bpp gray, 32 bpp rgb, or colormapped
849
 * \param[in]    comptype  L_COMPARE_SUBTRACT, L_COMPARE_ABS_DIFF
850
 * \param[in]    plottype  gplot plot output type, or 0 for no plot
851
 * \param[out]   psame     [optional] 1 if pixel values are identical
852
 * \param[out]   pdiff     [optional] average difference
853
 * \param[out]   prmsdiff  [optional] rms of difference
854
 * \param[out]   ppixdiff  [optional] pix of difference
855
 * \return  0 if OK; 1 on error
856
 *
857
 * <pre>
858
 * Notes:
859
 *      (1) The two images are aligned at the UL corner, and do not
860
 *          need to be the same size.  If they are not the same size,
861
 *          the comparison will be made over overlapping pixels.
862
 *      (2) If there is a colormap, it is removed and the result
863
 *          is either gray or RGB depending on the colormap.
864
 *      (3) If RGB, each component is compared separately.
865
 *      (4) If type is L_COMPARE_ABS_DIFF, pix2 is subtracted from pix1
866
 *          and the absolute value is taken.
867
 *      (5) If type is L_COMPARE_SUBTRACT, pix2 is subtracted from pix1
868
 *          and the result is clipped to 0.
869
 *      (6) The plot output types are specified in gplot.h.
870
 *          Use 0 if no difference plot is to be made.
871
 *      (7) If the images are pixelwise identical, no difference
872
 *          plot is made, even if requested.  The result (TRUE or FALSE)
873
 *          is optionally returned in the parameter 'same'.
874
 *      (8) The average difference (either subtracting or absolute value)
875
 *          is optionally returned in the parameter 'diff'.
876
 *      (9) The RMS difference is optionally returned in the
877
 *          parameter 'rmsdiff'.  For RGB, we return the average of
878
 *          the RMS differences for each of the components.
879
 *     (10) Because pixel values are compared, pix1 and pix2 can be equal when:
880
 *          * they are both gray with different depth
881
 *          * one is colormapped and the other is not
882
 *          * they are both colormapped and have different size colormaps
883
 * </pre>
884
 */
885
l_ok
886
pixCompareGrayOrRGB(PIX        *pix1,
887
                    PIX        *pix2,
888
                    l_int32     comptype,
889
                    l_int32     plottype,
890
                    l_int32    *psame,
891
                    l_float32  *pdiff,
892
                    l_float32  *prmsdiff,
893
                    PIX       **ppixdiff)
894
0
{
895
0
l_int32  retval, d1, d2;
896
0
PIX     *pixt1, *pixt2, *pixs1, *pixs2;
897
898
0
    if (psame) *psame = 0;
899
0
    if (pdiff) *pdiff = 255.0;
900
0
    if (prmsdiff) *prmsdiff = 255.0;
901
0
    if (ppixdiff) *ppixdiff = NULL;
902
0
    if (!pix1 || pixGetDepth(pix1) == 1)
903
0
        return ERROR_INT("pix1 not defined or 1 bpp", __func__, 1);
904
0
    if (!pix2 || pixGetDepth(pix2) == 1)
905
0
        return ERROR_INT("pix2 not defined or 1 bpp", __func__, 1);
906
0
    if (comptype != L_COMPARE_SUBTRACT && comptype != L_COMPARE_ABS_DIFF)
907
0
        return ERROR_INT("invalid comptype", __func__, 1);
908
0
    if (plottype < 0 || plottype >= NUM_GPLOT_OUTPUTS)
909
0
        return ERROR_INT("invalid plottype", __func__, 1);
910
911
0
    pixt1 = pixRemoveColormap(pix1, REMOVE_CMAP_BASED_ON_SRC);
912
0
    pixt2 = pixRemoveColormap(pix2, REMOVE_CMAP_BASED_ON_SRC);
913
0
    d1 = pixGetDepth(pixt1);
914
0
    d2 = pixGetDepth(pixt2);
915
0
    if (d1 < 8)
916
0
        pixs1 = pixConvertTo8(pixt1, FALSE);
917
0
    else
918
0
        pixs1 = pixClone(pixt1);
919
0
    if (d2 < 8)
920
0
        pixs2 = pixConvertTo8(pixt2, FALSE);
921
0
    else
922
0
        pixs2 = pixClone(pixt2);
923
0
    pixDestroy(&pixt1);
924
0
    pixDestroy(&pixt2);
925
0
    d1 = pixGetDepth(pixs1);
926
0
    d2 = pixGetDepth(pixs2);
927
0
    if (d1 != d2) {
928
0
        pixDestroy(&pixs1);
929
0
        pixDestroy(&pixs2);
930
0
        return ERROR_INT("intrinsic depths are not equal", __func__, 1);
931
0
    }
932
933
0
    if (d1 == 8 || d1 == 16)
934
0
        retval = pixCompareGray(pixs1, pixs2, comptype, plottype, psame,
935
0
                                pdiff, prmsdiff, ppixdiff);
936
0
    else  /* d1 == 32 */
937
0
        retval = pixCompareRGB(pixs1, pixs2, comptype, plottype, psame,
938
0
                               pdiff, prmsdiff, ppixdiff);
939
0
    pixDestroy(&pixs1);
940
0
    pixDestroy(&pixs2);
941
0
    return retval;
942
0
}
943
944
945
/*!
946
 * \brief   pixCompareGray()
947
 *
948
 * \param[in]    pix1       8 or 16 bpp, not cmapped
949
 * \param[in]    pix2       8 or 16 bpp, not cmapped
950
 * \param[in]    comptype   L_COMPARE_SUBTRACT, L_COMPARE_ABS_DIFF
951
 * \param[in]    plottype   gplot plot output type, or 0 for no plot
952
 * \param[out]   psame      [optional] 1 if pixel values are identical
953
 * \param[out]   pdiff      [optional] average difference
954
 * \param[out]   prmsdiff   [optional] rms of difference
955
 * \param[out]   ppixdiff   [optional] pix of difference
956
 * \return  0 if OK; 1 on error
957
 *
958
 * <pre>
959
 * Notes:
960
 *      (1) See pixCompareGrayOrRGB() for details.
961
 *      (2) Use pixCompareGrayOrRGB() if the input pix are colormapped.
962
 *      (3) Note: setting %plottype > 0 can result in writing named
963
 *                output files.
964
 * </pre>
965
 */
966
l_ok
967
pixCompareGray(PIX        *pix1,
968
               PIX        *pix2,
969
               l_int32     comptype,
970
               l_int32     plottype,
971
               l_int32    *psame,
972
               l_float32  *pdiff,
973
               l_float32  *prmsdiff,
974
               PIX       **ppixdiff)
975
1.67k
{
976
1.67k
char            buf[64];
977
1.67k
static l_atomic index = 0;
978
1.67k
l_int32         d1, d2, same, first, last;
979
1.67k
GPLOT          *gplot;
980
1.67k
NUMA           *na, *nac;
981
1.67k
PIX            *pixt;
982
983
1.67k
    if (psame) *psame = 0;
984
1.67k
    if (pdiff) *pdiff = 255.0;
985
1.67k
    if (prmsdiff) *prmsdiff = 255.0;
986
1.67k
    if (ppixdiff) *ppixdiff = NULL;
987
1.67k
    if (!pix1)
988
0
        return ERROR_INT("pix1 not defined", __func__, 1);
989
1.67k
    if (!pix2)
990
0
        return ERROR_INT("pix2 not defined", __func__, 1);
991
1.67k
    d1 = pixGetDepth(pix1);
992
1.67k
    d2 = pixGetDepth(pix2);
993
1.67k
    if ((d1 != d2) || (d1 != 8 && d1 != 16))
994
0
        return ERROR_INT("depths unequal or not 8 or 16 bpp", __func__, 1);
995
1.67k
    if (pixGetColormap(pix1) || pixGetColormap(pix2))
996
0
        return ERROR_INT("pix1 and/or pix2 are colormapped", __func__, 1);
997
1.67k
    if (comptype != L_COMPARE_SUBTRACT && comptype != L_COMPARE_ABS_DIFF)
998
0
        return ERROR_INT("invalid comptype", __func__, 1);
999
1.67k
    if (plottype < 0 || plottype >= NUM_GPLOT_OUTPUTS)
1000
0
        return ERROR_INT("invalid plottype", __func__, 1);
1001
1002
1.67k
    lept_mkdir("lept/comp");
1003
1004
1.67k
    if (comptype == L_COMPARE_SUBTRACT)
1005
1.67k
        pixt = pixSubtractGray(NULL, pix1, pix2);
1006
0
    else  /* comptype == L_COMPARE_ABS_DIFF) */
1007
0
        pixt = pixAbsDifference(pix1, pix2);
1008
1009
1.67k
    pixZero(pixt, &same);
1010
1.67k
    if (same)
1011
1.67k
        L_INFO("Images are pixel-wise identical\n", __func__);
1012
1.67k
    if (psame) *psame = same;
1013
1014
1.67k
    if (pdiff)
1015
0
        pixGetAverageMasked(pixt, NULL, 0, 0, 1, L_MEAN_ABSVAL, pdiff);
1016
1017
        /* Don't bother to plot if the images are the same */
1018
1.67k
    if (plottype && !same) {
1019
0
        L_INFO("Images differ: output plots will be generated\n", __func__);
1020
0
        na = pixGetGrayHistogram(pixt, 1);
1021
0
        numaGetNonzeroRange(na, TINY, &first, &last);
1022
0
        nac = numaClipToInterval(na, 0, last);
1023
0
        snprintf(buf, sizeof(buf), "/tmp/lept/comp/compare_gray%d", index);
1024
0
        gplot = gplotCreate(buf, plottype,
1025
0
                            "Pixel Difference Histogram", "diff val",
1026
0
                            "number of pixels");
1027
0
        gplotAddPlot(gplot, NULL, nac, GPLOT_LINES, "gray");
1028
0
        gplotMakeOutput(gplot);
1029
0
        gplotDestroy(&gplot);
1030
0
        snprintf(buf, sizeof(buf), "/tmp/lept/comp/compare_gray%d.png",
1031
0
                 index++);
1032
0
        l_fileDisplay(buf, 100, 100, 1.0);
1033
0
        numaDestroy(&na);
1034
0
        numaDestroy(&nac);
1035
0
    }
1036
1037
1.67k
    if (ppixdiff)
1038
1.67k
        *ppixdiff = pixCopy(NULL, pixt);
1039
1040
1.67k
    if (prmsdiff) {
1041
0
        if (comptype == L_COMPARE_SUBTRACT) {  /* wrong type for rms diff */
1042
0
            pixDestroy(&pixt);
1043
0
            pixt = pixAbsDifference(pix1, pix2);
1044
0
        }
1045
0
        pixGetAverageMasked(pixt, NULL, 0, 0, 1, L_ROOT_MEAN_SQUARE, prmsdiff);
1046
0
    }
1047
1048
1.67k
    pixDestroy(&pixt);
1049
1.67k
    return 0;
1050
1.67k
}
1051
1052
1053
/*!
1054
 * \brief   pixCompareRGB()
1055
 *
1056
 * \param[in]    pix1       32 bpp rgb
1057
 * \param[in]    pix2       32 bpp rgb
1058
 * \param[in]    comptype   L_COMPARE_SUBTRACT, L_COMPARE_ABS_DIFF
1059
 * \param[in]    plottype   gplot plot output type, or 0 for no plot
1060
 * \param[out]   psame      [optional] 1 if pixel values are identical
1061
 * \param[out]   pdiff      [optional] average difference
1062
 * \param[out]   prmsdiff   [optional] rms of difference
1063
 * \param[out]   ppixdiff   [optional] pix of difference
1064
 * \return  0 if OK; 1 on error
1065
 *
1066
 * <pre>
1067
 * Notes:
1068
 *      (1) See pixCompareGrayOrRGB() for details.
1069
 *      (2) Note: setting %plottype > 0 can result in writing named
1070
 *                output files.
1071
 * </pre>
1072
 */
1073
l_ok
1074
pixCompareRGB(PIX        *pix1,
1075
              PIX        *pix2,
1076
              l_int32     comptype,
1077
              l_int32     plottype,
1078
              l_int32    *psame,
1079
              l_float32  *pdiff,
1080
              l_float32  *prmsdiff,
1081
              PIX       **ppixdiff)
1082
506
{
1083
506
char            buf[64];
1084
506
static l_atomic index = 0;
1085
506
l_int32         rsame, gsame, bsame, same, first, rlast, glast, blast, last;
1086
506
l_float32       rdiff, gdiff, bdiff;
1087
506
GPLOT          *gplot;
1088
506
NUMA           *nar, *nag, *nab, *narc, *nagc, *nabc;
1089
506
PIX            *pixr1, *pixr2, *pixg1, *pixg2, *pixb1, *pixb2;
1090
506
PIX            *pixr, *pixg, *pixb;
1091
1092
506
    if (psame) *psame = 0;
1093
506
    if (pdiff) *pdiff = 0.0;
1094
506
    if (prmsdiff) *prmsdiff = 0.0;
1095
506
    if (ppixdiff) *ppixdiff = NULL;
1096
506
    if (!pix1 || pixGetDepth(pix1) != 32)
1097
0
        return ERROR_INT("pix1 not defined or not 32 bpp", __func__, 1);
1098
506
    if (!pix2 || pixGetDepth(pix2) != 32)
1099
0
        return ERROR_INT("pix2 not defined or not ew bpp", __func__, 1);
1100
506
    if (comptype != L_COMPARE_SUBTRACT && comptype != L_COMPARE_ABS_DIFF)
1101
0
        return ERROR_INT("invalid comptype", __func__, 1);
1102
506
    if (plottype < 0 || plottype >= NUM_GPLOT_OUTPUTS)
1103
0
        return ERROR_INT("invalid plottype", __func__, 1);
1104
1105
506
    lept_mkdir("lept/comp");
1106
1107
506
    pixr1 = pixGetRGBComponent(pix1, COLOR_RED);
1108
506
    pixr2 = pixGetRGBComponent(pix2, COLOR_RED);
1109
506
    pixg1 = pixGetRGBComponent(pix1, COLOR_GREEN);
1110
506
    pixg2 = pixGetRGBComponent(pix2, COLOR_GREEN);
1111
506
    pixb1 = pixGetRGBComponent(pix1, COLOR_BLUE);
1112
506
    pixb2 = pixGetRGBComponent(pix2, COLOR_BLUE);
1113
506
    if (comptype == L_COMPARE_SUBTRACT) {
1114
506
        pixr = pixSubtractGray(NULL, pixr1, pixr2);
1115
506
        pixg = pixSubtractGray(NULL, pixg1, pixg2);
1116
506
        pixb = pixSubtractGray(NULL, pixb1, pixb2);
1117
506
    } else { /* comptype == L_COMPARE_ABS_DIFF) */
1118
0
        pixr = pixAbsDifference(pixr1, pixr2);
1119
0
        pixg = pixAbsDifference(pixg1, pixg2);
1120
0
        pixb = pixAbsDifference(pixb1, pixb2);
1121
0
    }
1122
1123
506
    pixZero(pixr, &rsame);
1124
506
    pixZero(pixg, &gsame);
1125
506
    pixZero(pixb, &bsame);
1126
506
    same = rsame && gsame && bsame;
1127
506
    if (same)
1128
506
        L_INFO("Images are pixel-wise identical\n", __func__);
1129
506
    if (psame) *psame = same;
1130
1131
506
    if (pdiff) {
1132
0
        pixGetAverageMasked(pixr, NULL, 0, 0, 1, L_MEAN_ABSVAL, &rdiff);
1133
0
        pixGetAverageMasked(pixg, NULL, 0, 0, 1, L_MEAN_ABSVAL, &gdiff);
1134
0
        pixGetAverageMasked(pixb, NULL, 0, 0, 1, L_MEAN_ABSVAL, &bdiff);
1135
0
        *pdiff = (rdiff + gdiff + bdiff) / 3.0;
1136
0
    }
1137
1138
        /* Don't bother to plot if the images are the same */
1139
506
    if (plottype && !same) {
1140
0
        L_INFO("Images differ: output plots will be generated\n", __func__);
1141
0
        nar = pixGetGrayHistogram(pixr, 1);
1142
0
        nag = pixGetGrayHistogram(pixg, 1);
1143
0
        nab = pixGetGrayHistogram(pixb, 1);
1144
0
        numaGetNonzeroRange(nar, TINY, &first, &rlast);
1145
0
        numaGetNonzeroRange(nag, TINY, &first, &glast);
1146
0
        numaGetNonzeroRange(nab, TINY, &first, &blast);
1147
0
        last = L_MAX(rlast, glast);
1148
0
        last = L_MAX(last, blast);
1149
0
        narc = numaClipToInterval(nar, 0, last);
1150
0
        nagc = numaClipToInterval(nag, 0, last);
1151
0
        nabc = numaClipToInterval(nab, 0, last);
1152
0
        snprintf(buf, sizeof(buf), "/tmp/lept/comp/compare_rgb%d", index);
1153
0
        gplot = gplotCreate(buf, plottype,
1154
0
                            "Pixel Difference Histogram", "diff val",
1155
0
                            "number of pixels");
1156
0
        gplotAddPlot(gplot, NULL, narc, GPLOT_LINES, "red");
1157
0
        gplotAddPlot(gplot, NULL, nagc, GPLOT_LINES, "green");
1158
0
        gplotAddPlot(gplot, NULL, nabc, GPLOT_LINES, "blue");
1159
0
        gplotMakeOutput(gplot);
1160
0
        gplotDestroy(&gplot);
1161
0
        snprintf(buf, sizeof(buf), "/tmp/lept/comp/compare_rgb%d.png",
1162
0
                 index++);
1163
0
        l_fileDisplay(buf, 100, 100, 1.0);
1164
0
        numaDestroy(&nar);
1165
0
        numaDestroy(&nag);
1166
0
        numaDestroy(&nab);
1167
0
        numaDestroy(&narc);
1168
0
        numaDestroy(&nagc);
1169
0
        numaDestroy(&nabc);
1170
0
    }
1171
1172
506
    if (ppixdiff)
1173
506
        *ppixdiff = pixCreateRGBImage(pixr, pixg, pixb);
1174
1175
506
    if (prmsdiff) {
1176
0
        if (comptype == L_COMPARE_SUBTRACT) {
1177
0
            pixDestroy(&pixr);
1178
0
            pixDestroy(&pixg);
1179
0
            pixDestroy(&pixb);
1180
0
            pixr = pixAbsDifference(pixr1, pixr2);
1181
0
            pixg = pixAbsDifference(pixg1, pixg2);
1182
0
            pixb = pixAbsDifference(pixb1, pixb2);
1183
0
        }
1184
0
        pixGetAverageMasked(pixr, NULL, 0, 0, 1, L_ROOT_MEAN_SQUARE, &rdiff);
1185
0
        pixGetAverageMasked(pixg, NULL, 0, 0, 1, L_ROOT_MEAN_SQUARE, &gdiff);
1186
0
        pixGetAverageMasked(pixb, NULL, 0, 0, 1, L_ROOT_MEAN_SQUARE, &bdiff);
1187
0
        *prmsdiff = (rdiff + gdiff + bdiff) / 3.0;
1188
0
    }
1189
1190
506
    pixDestroy(&pixr1);
1191
506
    pixDestroy(&pixr2);
1192
506
    pixDestroy(&pixg1);
1193
506
    pixDestroy(&pixg2);
1194
506
    pixDestroy(&pixb1);
1195
506
    pixDestroy(&pixb2);
1196
506
    pixDestroy(&pixr);
1197
506
    pixDestroy(&pixg);
1198
506
    pixDestroy(&pixb);
1199
506
    return 0;
1200
506
}
1201
1202
1203
/*!
1204
 * \brief   pixCompareTiled()
1205
 *
1206
 * \param[in]    pix1       8 bpp or 32 bpp rgb
1207
 * \param[in]    pix2       8 bpp 32 bpp rgb
1208
 * \param[in]    sx, sy     tile size; must be > 1 in each dimension
1209
 * \param[in]    type       L_MEAN_ABSVAL or L_ROOT_MEAN_SQUARE
1210
 * \param[out]   ppixdiff   pix of difference
1211
 * \return  0 if OK; 1 on error
1212
 *
1213
 * <pre>
1214
 * Notes:
1215
 *      (1) With L_MEAN_ABSVAL, we compute for each tile the
1216
 *          average abs value of the pixel component difference between
1217
 *          the two (aligned) images.  With L_ROOT_MEAN_SQUARE, we
1218
 *          compute instead the rms difference over all components.
1219
 *      (2) The two input pix must be the same depth.  Comparison is made
1220
 *          using UL corner alignment.
1221
 *      (3) For 32 bpp, the distance between corresponding tiles
1222
 *          is found by averaging the measured difference over all three
1223
 *          components of each pixel in the tile.
1224
 *      (4) The result, pixdiff, contains one pixel for each source tile.
1225
 * </pre>
1226
 */
1227
l_ok
1228
pixCompareTiled(PIX     *pix1,
1229
                PIX     *pix2,
1230
                l_int32  sx,
1231
                l_int32  sy,
1232
                l_int32  type,
1233
                PIX    **ppixdiff)
1234
0
{
1235
0
l_int32    d1, d2, w, h;
1236
0
PIX       *pixt, *pixr, *pixg, *pixb;
1237
0
PIX       *pixrdiff, *pixgdiff, *pixbdiff;
1238
0
PIXACC    *pixacc;
1239
1240
0
    if (!ppixdiff)
1241
0
        return ERROR_INT("&pixdiff not defined", __func__, 1);
1242
0
    *ppixdiff = NULL;
1243
0
    if (!pix1)
1244
0
        return ERROR_INT("pix1 not defined", __func__, 1);
1245
0
    if (!pix2)
1246
0
        return ERROR_INT("pix2 not defined", __func__, 1);
1247
0
    d1 = pixGetDepth(pix1);
1248
0
    d2 = pixGetDepth(pix2);
1249
0
    if (d1 != d2)
1250
0
        return ERROR_INT("depths not equal", __func__, 1);
1251
0
    if (d1 != 8 && d1 != 32)
1252
0
        return ERROR_INT("pix1 not 8 or 32 bpp", __func__, 1);
1253
0
    if (d2 != 8 && d2 != 32)
1254
0
        return ERROR_INT("pix2 not 8 or 32 bpp", __func__, 1);
1255
0
    if (sx < 2 || sy < 2)
1256
0
        return ERROR_INT("sx and sy not both > 1", __func__, 1);
1257
0
    if (type != L_MEAN_ABSVAL && type != L_ROOT_MEAN_SQUARE)
1258
0
        return ERROR_INT("invalid type", __func__, 1);
1259
1260
0
    pixt = pixAbsDifference(pix1, pix2);
1261
0
    if (d1 == 8) {
1262
0
        *ppixdiff = pixGetAverageTiled(pixt, sx, sy, type);
1263
0
    } else {  /* d1 == 32 */
1264
0
        pixr = pixGetRGBComponent(pixt, COLOR_RED);
1265
0
        pixg = pixGetRGBComponent(pixt, COLOR_GREEN);
1266
0
        pixb = pixGetRGBComponent(pixt, COLOR_BLUE);
1267
0
        pixrdiff = pixGetAverageTiled(pixr, sx, sy, type);
1268
0
        pixgdiff = pixGetAverageTiled(pixg, sx, sy, type);
1269
0
        pixbdiff = pixGetAverageTiled(pixb, sx, sy, type);
1270
0
        pixGetDimensions(pixrdiff, &w, &h, NULL);
1271
0
        pixacc = pixaccCreate(w, h, 0);
1272
0
        pixaccAdd(pixacc, pixrdiff);
1273
0
        pixaccAdd(pixacc, pixgdiff);
1274
0
        pixaccAdd(pixacc, pixbdiff);
1275
0
        pixaccMultConst(pixacc, 1.f / 3.f);
1276
0
        *ppixdiff = pixaccFinal(pixacc, 8);
1277
0
        pixDestroy(&pixr);
1278
0
        pixDestroy(&pixg);
1279
0
        pixDestroy(&pixb);
1280
0
        pixDestroy(&pixrdiff);
1281
0
        pixDestroy(&pixgdiff);
1282
0
        pixDestroy(&pixbdiff);
1283
0
        pixaccDestroy(&pixacc);
1284
0
    }
1285
0
    pixDestroy(&pixt);
1286
0
    return 0;
1287
0
}
1288
1289
1290
/*------------------------------------------------------------------*
1291
 *            Other measures of the difference of two images        *
1292
 *------------------------------------------------------------------*/
1293
/*!
1294
 * \brief   pixCompareRankDifference()
1295
 *
1296
 * \param[in]    pix1      8 bpp gray or 32 bpp rgb, or colormapped
1297
 * \param[in]    pix2      8 bpp gray or 32 bpp rgb, or colormapped
1298
 * \param[in]    factor    subsampling factor; use 0 or 1 for no subsampling
1299
 * \return  narank      numa of rank difference, or NULL on error
1300
 *
1301
 * <pre>
1302
 * Notes:
1303
 *      (1) This answers the question: if the pixel values in each
1304
 *          component are compared by absolute difference, for
1305
 *          any value of difference, what is the fraction of
1306
 *          pixel pairs that have a difference of this magnitude
1307
 *          or greater.  For a difference of 0, the fraction is 1.0.
1308
 *          In this sense, it is a mapping from pixel difference to
1309
 *          rank order of difference.
1310
 *      (2) The two images are aligned at the UL corner, and do not
1311
 *          need to be the same size.  If they are not the same size,
1312
 *          the comparison will be made over overlapping pixels.
1313
 *      (3) If there is a colormap, it is removed and the result
1314
 *          is either gray or RGB depending on the colormap.
1315
 *      (4) If RGB, pixel differences for each component are aggregated
1316
 *          into a single histogram.
1317
 * </pre>
1318
 */
1319
NUMA *
1320
pixCompareRankDifference(PIX     *pix1,
1321
                         PIX     *pix2,
1322
                         l_int32  factor)
1323
0
{
1324
0
l_int32     i;
1325
0
l_float32  *array1, *array2;
1326
0
NUMA       *nah, *nan, *nad;
1327
1328
0
    if (!pix1)
1329
0
        return (NUMA *)ERROR_PTR("pix1 not defined", __func__, NULL);
1330
0
    if (!pix2)
1331
0
        return (NUMA *)ERROR_PTR("pix2 not defined", __func__, NULL);
1332
1333
0
    if ((nah = pixGetDifferenceHistogram(pix1, pix2, factor)) == NULL)
1334
0
        return (NUMA *)ERROR_PTR("na not made", __func__, NULL);
1335
1336
0
    nan = numaNormalizeHistogram(nah, 1.0);
1337
0
    array1 = numaGetFArray(nan, L_NOCOPY);
1338
1339
0
    nad = numaCreate(256);
1340
0
    numaSetCount(nad, 256);  /* all initialized to 0.0 */
1341
0
    array2 = numaGetFArray(nad, L_NOCOPY);
1342
1343
        /* Do rank accumulation on normalized histo of diffs */
1344
0
    array2[0] = 1.0;
1345
0
    for (i = 1; i < 256; i++)
1346
0
        array2[i] = array2[i - 1] - array1[i - 1];
1347
1348
0
    numaDestroy(&nah);
1349
0
    numaDestroy(&nan);
1350
0
    return nad;
1351
0
}
1352
1353
1354
/*!
1355
 * \brief   pixTestForSimilarity()
1356
 *
1357
 * \param[in]    pix1         8 bpp gray or 32 bpp rgb, or colormapped
1358
 * \param[in]    pix2         8 bpp gray or 32 bpp rgb, or colormapped
1359
 * \param[in]    factor       subsampling factor; use 0 or 1 for no subsampling
1360
 * \param[in]    mindiff      minimum pixel difference to be counted; > 0
1361
 * \param[in]    maxfract     maximum fraction of pixels allowed to have
1362
 *                            diff greater than or equal to mindiff
1363
 * \param[in]    maxave       maximum average difference of pixels allowed for
1364
 *                            pixels with diff greater than or equal to
1365
 *                            mindiff, after subtracting mindiff
1366
 * \param[out]   psimilar     1 if similar, 0 otherwise
1367
 * \param[in]    details      use 1 to give normalized histogram and other data
1368
 * \return  0 if OK, 1 on error
1369
 *
1370
 * <pre>
1371
 * Notes:
1372
 *      (1) This takes 2 pix that are the same size and determines using
1373
 *          3 input parameters if they are "similar".  The first parameter
1374
 *          %mindiff establishes a criterion of pixel-to-pixel similarity:
1375
 *          two pixels are not similar if their difference in value is
1376
 *          at least mindiff.  Then %maxfract and %maxave are thresholds
1377
 *          on the number and distribution of dissimilar pixels
1378
 *          allowed for the two pix to be similar.   If the pix are
1379
 *          to be similar, neither threshold can be exceeded.
1380
 *      (2) In setting the %maxfract and %maxave thresholds, you have
1381
 *          these options:
1382
 *            (a) Base the comparison only on %maxfract.  Then set
1383
 *                %maxave = 0.0 or 256.0.  (If 0, we always ignore it.)
1384
 *            (b) Base the comparison only on %maxave.  Then set
1385
 *                %maxfract = 1.0.
1386
 *            (c) Base the comparison on both thresholds.
1387
 *      (3) Example of values that can be expected at mindiff = 15 when
1388
 *          comparing lossless png encoding with jpeg encoding, q=75:
1389
 *             (smoothish bg)       fractdiff = 0.01, avediff = 2.5
1390
 *             (natural scene)      fractdiff = 0.13, avediff = 3.5
1391
 *          To identify these images as 'similar', select maxfract
1392
 *          and maxave to be upper bounds of what you expect.
1393
 *      (4) See pixGetDifferenceStats() for a discussion of why we subtract
1394
 *          mindiff from the computed average diff of the nonsimilar pixels
1395
 *          to get the 'avediff' returned by that function.
1396
 *      (5) If there is a colormap, it is removed and the result
1397
 *          is either gray or RGB depending on the colormap.
1398
 *      (6) If RGB, the maximum difference between pixel components is
1399
 *          saved in the histogram.
1400
 * </pre>
1401
 */
1402
l_ok
1403
pixTestForSimilarity(PIX       *pix1,
1404
                     PIX       *pix2,
1405
                     l_int32    factor,
1406
                     l_int32    mindiff,
1407
                     l_float32  maxfract,
1408
                     l_float32  maxave,
1409
                     l_int32   *psimilar,
1410
                     l_int32    details)
1411
0
{
1412
0
l_float32   fractdiff, avediff;
1413
1414
0
    if (!psimilar)
1415
0
        return ERROR_INT("&similar not defined", __func__, 1);
1416
0
    *psimilar = 0;
1417
0
    if (!pix1)
1418
0
        return ERROR_INT("pix1 not defined", __func__, 1);
1419
0
    if (!pix2)
1420
0
        return ERROR_INT("pix2 not defined", __func__, 1);
1421
0
    if (pixSizesEqual(pix1, pix2) == 0)
1422
0
        return ERROR_INT("pix sizes not equal", __func__, 1);
1423
0
    if (mindiff <= 0)
1424
0
        return ERROR_INT("mindiff must be > 0", __func__, 1);
1425
1426
0
    if (pixGetDifferenceStats(pix1, pix2, factor, mindiff,
1427
0
                              &fractdiff, &avediff, details))
1428
0
        return ERROR_INT("diff stats not found", __func__, 1);
1429
1430
0
    if (maxave <= 0.0) maxave = 256.0;
1431
0
    if (fractdiff <= maxfract && avediff <= maxave)
1432
0
        *psimilar = 1;
1433
0
    return 0;
1434
0
}
1435
1436
1437
/*!
1438
 * \brief   pixGetDifferenceStats()
1439
 *
1440
 * \param[in]    pix1        8 bpp gray or 32 bpp rgb, or colormapped
1441
 * \param[in]    pix2        8 bpp gray or 32 bpp rgb, or colormapped
1442
 * \param[in]    factor      subsampling factor; use 0 or 1 for no subsampling
1443
 * \param[in]    mindiff     minimum pixel difference to be counted; > 0
1444
 * \param[out]   pfractdiff  fraction of pixels with diff greater than or
1445
 *                           equal to mindiff
1446
 * \param[out]   pavediff    average difference of pixels with diff greater
1447
 *                           than or equal to mindiff, less mindiff
1448
 * \param[in]    details     use 1 to give normalized histogram and other data
1449
 * \return  0 if OK, 1 on error
1450
 *
1451
 * <pre>
1452
 * Notes:
1453
 *      (1) This takes a threshold %mindiff and describes the difference
1454
 *          between two images in terms of two numbers:
1455
 *            (a) the fraction of pixels, %fractdiff, whose difference
1456
 *                equals or exceeds the threshold %mindiff, and
1457
 *            (b) the average value %avediff of the difference in pixel value
1458
 *                for the pixels in the set given by (a), after you subtract
1459
 *                %mindiff.  The reason for subtracting %mindiff is that
1460
 *                you then get a useful measure for the rate of falloff
1461
 *                of the distribution for larger differences.  For example,
1462
 *                if %mindiff = 10 and you find that %avediff = 2.5, it
1463
 *                says that of the pixels with diff > 10, the average of
1464
 *                their diffs is just mindiff + 2.5 = 12.5.  This is a
1465
 *                fast falloff in the histogram with increasing difference.
1466
 *      (2) The two images are aligned at the UL corner, and do not
1467
 *          need to be the same size.  If they are not the same size,
1468
 *          the comparison will be made over overlapping pixels.
1469
 *      (3) If there is a colormap, it is removed and the result
1470
 *          is either gray or RGB depending on the colormap.
1471
 *      (4) If RGB, the maximum difference between pixel components is
1472
 *          saved in the histogram.
1473
 *      (5) Set %details == 1 to see the difference histogram and get
1474
 *          an output that shows for each value of %mindiff, what are the
1475
 *          minimum values required for fractdiff and avediff in order
1476
 *          that the two pix will be considered similar.
1477
 * </pre>
1478
 */
1479
l_ok
1480
pixGetDifferenceStats(PIX        *pix1,
1481
                      PIX        *pix2,
1482
                      l_int32     factor,
1483
                      l_int32     mindiff,
1484
                      l_float32  *pfractdiff,
1485
                      l_float32  *pavediff,
1486
                      l_int32     details)
1487
0
{
1488
0
l_int32     i, first, last, diff;
1489
0
l_float32   fract, ave;
1490
0
l_float32  *array;
1491
0
NUMA       *nah, *nan, *nac;
1492
1493
0
    if (pfractdiff) *pfractdiff = 0.0;
1494
0
    if (pavediff) *pavediff = 0.0;
1495
0
    if (!pfractdiff)
1496
0
        return ERROR_INT("&fractdiff not defined", __func__, 1);
1497
0
    if (!pavediff)
1498
0
        return ERROR_INT("&avediff not defined", __func__, 1);
1499
0
    if (!pix1)
1500
0
        return ERROR_INT("pix1 not defined", __func__, 1);
1501
0
    if (!pix2)
1502
0
        return ERROR_INT("pix2 not defined", __func__, 1);
1503
0
    if (mindiff <= 0)
1504
0
        return ERROR_INT("mindiff must be > 0", __func__, 1);
1505
1506
0
    if ((nah = pixGetDifferenceHistogram(pix1, pix2, factor)) == NULL)
1507
0
        return ERROR_INT("na not made", __func__, 1);
1508
1509
0
    if ((nan = numaNormalizeHistogram(nah, 1.0)) == NULL) {
1510
0
        numaDestroy(&nah);
1511
0
        return ERROR_INT("nan not made", __func__, 1);
1512
0
    }
1513
0
    array = numaGetFArray(nan, L_NOCOPY);
1514
1515
0
    if (details) {
1516
0
        lept_mkdir("lept/comp");
1517
0
        numaGetNonzeroRange(nan, 0.0, &first, &last);
1518
0
        nac = numaClipToInterval(nan, first, last);
1519
0
        gplotSimple1(nac, GPLOT_PNG, "/tmp/lept/comp/histo",
1520
0
                     "Difference histogram");
1521
0
        l_fileDisplay("/tmp/lept/comp/histo.png", 500, 0, 1.0);
1522
0
        lept_stderr("\nNonzero values in normalized histogram:");
1523
0
        numaWriteStderr(nac);
1524
0
        numaDestroy(&nac);
1525
0
        lept_stderr(" Mindiff      fractdiff      avediff\n");
1526
0
        lept_stderr(" -----------------------------------\n");
1527
0
        for (diff = 1; diff < L_MIN(2 * mindiff, last); diff++) {
1528
0
            fract = 0.0;
1529
0
            ave = 0.0;
1530
0
            for (i = diff; i <= last; i++) {
1531
0
                fract += array[i];
1532
0
                ave += (l_float32)i * array[i];
1533
0
            }
1534
0
            ave = (fract == 0.0) ? 0.0 : ave / fract;
1535
0
            ave -= diff;
1536
0
            lept_stderr("%5d         %7.4f        %7.4f\n",
1537
0
                        diff, fract, ave);
1538
0
        }
1539
0
        lept_stderr(" -----------------------------------\n");
1540
0
    }
1541
1542
0
    fract = 0.0;
1543
0
    ave = 0.0;
1544
0
    for (i = mindiff; i < 256; i++) {
1545
0
      fract += array[i];
1546
0
      ave += (l_float32)i * array[i];
1547
0
    }
1548
0
    ave = (fract == 0.0) ? 0.0 : ave / fract;
1549
0
    ave -= mindiff;
1550
1551
0
    *pfractdiff = fract;
1552
0
    *pavediff = ave;
1553
1554
0
    numaDestroy(&nah);
1555
0
    numaDestroy(&nan);
1556
0
    return 0;
1557
0
}
1558
1559
1560
/*!
1561
 * \brief   pixGetDifferenceHistogram()
1562
 *
1563
 * \param[in]    pix1      8 bpp gray or 32 bpp rgb, or colormapped
1564
 * \param[in]    pix2      8 bpp gray or 32 bpp rgb, or colormapped
1565
 * \param[in]    factor    subsampling factor; use 0 or 1 for no subsampling
1566
 * \return  na     Numa of histogram of differences, or NULL on error
1567
 *
1568
 * <pre>
1569
 * Notes:
1570
 *      (1) The two images are aligned at the UL corner, and do not
1571
 *          need to be the same size.  If they are not the same size,
1572
 *          the comparison will be made over overlapping pixels.
1573
 *      (2) If there is a colormap, it is removed and the result
1574
 *          is either gray or RGB depending on the colormap.
1575
 *      (3) If RGB, the maximum difference between pixel components is
1576
 *          saved in the histogram.
1577
 * </pre>
1578
 */
1579
NUMA *
1580
pixGetDifferenceHistogram(PIX     *pix1,
1581
                          PIX     *pix2,
1582
                          l_int32  factor)
1583
0
{
1584
0
l_int32     w1, h1, d1, w2, h2, d2, w, h, wpl1, wpl2;
1585
0
l_int32     i, j, val, val1, val2;
1586
0
l_int32     rval1, rval2, gval1, gval2, bval1, bval2;
1587
0
l_int32     rdiff, gdiff, bdiff, maxdiff;
1588
0
l_uint32   *data1, *data2, *line1, *line2;
1589
0
l_float32  *array;
1590
0
NUMA       *na;
1591
0
PIX        *pixt1, *pixt2;
1592
1593
0
    if (!pix1)
1594
0
        return (NUMA *)ERROR_PTR("pix1 not defined", __func__, NULL);
1595
0
    if (!pix2)
1596
0
        return (NUMA *)ERROR_PTR("pix2 not defined", __func__, NULL);
1597
0
    d1 = pixGetDepth(pix1);
1598
0
    d2 = pixGetDepth(pix2);
1599
0
    if (d1 == 16 || d2 == 16)
1600
0
        return (NUMA *)ERROR_PTR("d == 16 not supported", __func__, NULL);
1601
0
    if (d1 < 8 && !pixGetColormap(pix1))
1602
0
        return (NUMA *)ERROR_PTR("pix1 depth < 8 bpp and not cmapped",
1603
0
                                 __func__, NULL);
1604
0
    if (d2 < 8 && !pixGetColormap(pix2))
1605
0
        return (NUMA *)ERROR_PTR("pix2 depth < 8 bpp and not cmapped",
1606
0
                                 __func__, NULL);
1607
0
    pixt1 = pixRemoveColormap(pix1, REMOVE_CMAP_BASED_ON_SRC);
1608
0
    pixt2 = pixRemoveColormap(pix2, REMOVE_CMAP_BASED_ON_SRC);
1609
0
    pixGetDimensions(pixt1, &w1, &h1, &d1);
1610
0
    pixGetDimensions(pixt2, &w2, &h2, &d2);
1611
0
    if (d1 != d2) {
1612
0
        pixDestroy(&pixt1);
1613
0
        pixDestroy(&pixt2);
1614
0
        return (NUMA *)ERROR_PTR("pix depths not equal", __func__, NULL);
1615
0
    }
1616
0
    if (factor < 1) factor = 1;
1617
1618
0
    na = numaCreate(256);
1619
0
    numaSetCount(na, 256);  /* all initialized to 0.0 */
1620
0
    array = numaGetFArray(na, L_NOCOPY);
1621
0
    w = L_MIN(w1, w2);
1622
0
    h = L_MIN(h1, h2);
1623
0
    data1 = pixGetData(pixt1);
1624
0
    data2 = pixGetData(pixt2);
1625
0
    wpl1 = pixGetWpl(pixt1);
1626
0
    wpl2 = pixGetWpl(pixt2);
1627
0
    if (d1 == 8) {
1628
0
        for (i = 0; i < h; i += factor) {
1629
0
            line1 = data1 + i * wpl1;
1630
0
            line2 = data2 + i * wpl2;
1631
0
            for (j = 0; j < w; j += factor) {
1632
0
                val1 = GET_DATA_BYTE(line1, j);
1633
0
                val2 = GET_DATA_BYTE(line2, j);
1634
0
                val = L_ABS(val1 - val2);
1635
0
                array[val]++;
1636
0
            }
1637
0
        }
1638
0
    } else {  /* d1 == 32 */
1639
0
        for (i = 0; i < h; i += factor) {
1640
0
            line1 = data1 + i * wpl1;
1641
0
            line2 = data2 + i * wpl2;
1642
0
            for (j = 0; j < w; j += factor) {
1643
0
                extractRGBValues(line1[j], &rval1, &gval1, &bval1);
1644
0
                extractRGBValues(line2[j], &rval2, &gval2, &bval2);
1645
0
                rdiff = L_ABS(rval1 - rval2);
1646
0
                gdiff = L_ABS(gval1 - gval2);
1647
0
                bdiff = L_ABS(bval1 - bval2);
1648
0
                maxdiff = L_MAX(rdiff, gdiff);
1649
0
                maxdiff = L_MAX(maxdiff, bdiff);
1650
0
                array[maxdiff]++;
1651
0
            }
1652
0
        }
1653
0
    }
1654
1655
0
    pixDestroy(&pixt1);
1656
0
    pixDestroy(&pixt2);
1657
0
    return na;
1658
0
}
1659
1660
1661
/*!
1662
 * \brief   pixGetPerceptualDiff()
1663
 *
1664
 * \param[in]    pixs1       8 bpp gray or 32 bpp rgb, or colormapped
1665
 * \param[in]    pixs2       8 bpp gray or 32 bpp rgb, or colormapped
1666
 * \param[in]    sampling    subsampling factor; use 0 or 1 for no subsampling
1667
 * \param[in]    dilation    size of grayscale or color Sel; odd
1668
 * \param[in]    mindiff     minimum pixel difference to be counted; > 0
1669
 * \param[out]   pfract      fraction of pixels with diff greater than mindiff
1670
 * \param[out]   ppixdiff1   [optional] showing difference (gray or color)
1671
 * \param[out]   ppixdiff2   [optional] showing pixels of sufficient diff
1672
 * \return  0 if OK, 1 on error
1673
 *
1674
 * <pre>
1675
 * Notes:
1676
 *      (1) This takes 2 pix and determines, using 2 input parameters:
1677
 *           * %dilation specifies the amount of grayscale or color
1678
 *             dilation to apply to the images, to compensate for
1679
 *             a small amount of misregistration.  A typical number might
1680
 *             be 5, which uses a 5x5 Sel.  Grayscale dilation expands
1681
 *             lighter pixels into darker pixel regions.
1682
 *           * %mindiff determines the threshold on the difference in
1683
 *             pixel values to be counted -- two pixels are not similar
1684
 *             if their difference in value is at least %mindiff.  For
1685
 *             color pixels, we use the maximum component difference.
1686
 *      (2) The pixelwise comparison is always done with the UL corners
1687
 *          aligned.  The sizes of pix1 and pix2 need not be the same,
1688
 *          although in practice it can be useful to scale to the same size.
1689
 *      (3) If there is a colormap, it is removed and the result
1690
 *          is either gray or RGB depending on the colormap.
1691
 *      (4) Two optional diff images can be retrieved (typ. for debugging):
1692
 *           pixdiff1: the gray or color difference
1693
 *           pixdiff2: thresholded to 1 bpp for pixels exceeding %mindiff
1694
 *      (5) The returned value of fract can be compared to some threshold,
1695
 *          which is application dependent.
1696
 *      (6) This method is in analogy to the two-sided hausdorff transform,
1697
 *          except here it is for d > 1.  For d == 1 (see pixRankHaustest()),
1698
 *          we verify that when one pix1 is dilated, it covers at least a
1699
 *          given fraction of the pixels in pix2, and v.v.; in that
1700
 *          case, the two pix are sufficiently similar.  Here, we
1701
 *          do an analogous thing: subtract the dilated pix1 from pix2 to
1702
 *          get a 1-sided hausdorff-like transform.  Then do it the
1703
 *          other way.  Take the component-wise max of the two results,
1704
 *          and threshold to get the fraction of pixels with a difference
1705
 *          below the threshold.
1706
 * </pre>
1707
 */
1708
l_ok
1709
pixGetPerceptualDiff(PIX        *pixs1,
1710
                     PIX        *pixs2,
1711
                     l_int32     sampling,
1712
                     l_int32     dilation,
1713
                     l_int32     mindiff,
1714
                     l_float32  *pfract,
1715
                     PIX       **ppixdiff1,
1716
                     PIX       **ppixdiff2)
1717
1.41k
{
1718
1.41k
l_int32  d1, d2, w, h, count;
1719
1.41k
PIX     *pix1, *pix2, *pix3, *pix4, *pix5, *pix6, *pix7, *pix8, *pix9;
1720
1.41k
PIX     *pix10, *pix11;
1721
1722
1.41k
    if (ppixdiff1) *ppixdiff1 = NULL;
1723
1.41k
    if (ppixdiff2) *ppixdiff2 = NULL;
1724
1.41k
    if (!pfract)
1725
0
        return ERROR_INT("&fract not defined", __func__, 1);
1726
1.41k
    *pfract = 1.0;  /* init to completely different */
1727
1.41k
    if ((dilation & 1) == 0)
1728
0
        return ERROR_INT("dilation must be odd", __func__, 1);
1729
1.41k
    if (!pixs1)
1730
0
        return ERROR_INT("pixs1 not defined", __func__, 1);
1731
1.41k
    if (!pixs2)
1732
0
        return ERROR_INT("pixs2 not defined", __func__, 1);
1733
1.41k
    d1 = pixGetDepth(pixs1);
1734
1.41k
    d2 = pixGetDepth(pixs2);
1735
1.41k
    if (!pixGetColormap(pixs1) && d1 < 8)
1736
244
        return ERROR_INT("pixs1 not cmapped and < 8 bpp", __func__, 1);
1737
1.16k
    if (!pixGetColormap(pixs2) && d2 < 8)
1738
0
        return ERROR_INT("pixs2 not cmapped and < 8 bpp", __func__, 1);
1739
1740
        /* Integer downsample if requested */
1741
1.16k
    if (sampling > 1) {
1742
0
        pix1 = pixScaleByIntSampling(pixs1, sampling);
1743
0
        pix2 = pixScaleByIntSampling(pixs2, sampling);
1744
1.16k
    } else {
1745
1.16k
        pix1 = pixClone(pixs1);
1746
1.16k
        pix2 = pixClone(pixs2);
1747
1.16k
    }
1748
1749
        /* Remove colormaps */
1750
1.16k
    if (pixGetColormap(pix1)) {
1751
327
        pix3 = pixRemoveColormap(pix1, REMOVE_CMAP_BASED_ON_SRC);
1752
327
        d1 = pixGetDepth(pix3);
1753
839
    } else {
1754
839
        pix3 = pixClone(pix1);
1755
839
    }
1756
1.16k
    if (pixGetColormap(pix2)) {
1757
327
        pix4 = pixRemoveColormap(pix2, REMOVE_CMAP_BASED_ON_SRC);
1758
327
        d2 = pixGetDepth(pix4);
1759
839
    } else {
1760
839
        pix4 = pixClone(pix2);
1761
839
    }
1762
1.16k
    pixDestroy(&pix1);
1763
1.16k
    pixDestroy(&pix2);
1764
1.16k
    if (d1 != d2 || (d1 != 8 && d1 != 32)) {
1765
76
        pixDestroy(&pix3);
1766
76
        pixDestroy(&pix4);
1767
76
        L_INFO("depths unequal or not in {8,32}: d1 = %d, d2 = %d\n",
1768
76
               __func__, d1, d2);
1769
76
        return 1;
1770
76
    }
1771
1772
        /* In each direction, do a small dilation and subtract the dilated
1773
         * image from the other image to get a one-sided difference.
1774
         * Then take the max of the differences for each direction
1775
         * and clipping each component to 255 if necessary.  Note that
1776
         * for RGB images, the dilations and max selection are done
1777
         * component-wise, and the conversion to grayscale also uses the
1778
         * maximum component.  The resulting grayscale images are
1779
         * thresholded using %mindiff. */
1780
1.09k
    if (d1 == 8) {
1781
837
        pix5 = pixDilateGray(pix3, dilation, dilation);
1782
837
        pixCompareGray(pix4, pix5, L_COMPARE_SUBTRACT, 0, NULL, NULL, NULL,
1783
837
                       &pix7);
1784
837
        pix6 = pixDilateGray(pix4, dilation, dilation);
1785
837
        pixCompareGray(pix3, pix6, L_COMPARE_SUBTRACT, 0, NULL, NULL, NULL,
1786
837
                       &pix8);
1787
837
        pix9 = pixMinOrMax(NULL, pix7, pix8, L_CHOOSE_MAX);
1788
837
        pix10 = pixThresholdToBinary(pix9, mindiff);
1789
837
        pixInvert(pix10, pix10);
1790
837
        pixCountPixels(pix10, &count, NULL);
1791
837
        pixGetDimensions(pix10, &w, &h, NULL);
1792
837
        *pfract = (w <= 0 || h <= 0) ? 0.0 :
1793
837
                                       (l_float32)count / (l_float32)(w * h);
1794
837
        pixDestroy(&pix5);
1795
837
        pixDestroy(&pix6);
1796
837
        pixDestroy(&pix7);
1797
837
        pixDestroy(&pix8);
1798
837
        if (ppixdiff1)
1799
837
            *ppixdiff1 = pix9;
1800
0
        else
1801
0
            pixDestroy(&pix9);
1802
837
        if (ppixdiff2)
1803
837
            *ppixdiff2 = pix10;
1804
0
        else
1805
0
            pixDestroy(&pix10);
1806
837
    } else {  /* d1 == 32 */
1807
253
        pix5 = pixColorMorph(pix3, L_MORPH_DILATE, dilation, dilation);
1808
253
        pixCompareRGB(pix4, pix5, L_COMPARE_SUBTRACT, 0, NULL, NULL, NULL,
1809
253
                       &pix7);
1810
253
        pix6 = pixColorMorph(pix4, L_MORPH_DILATE, dilation, dilation);
1811
253
        pixCompareRGB(pix3, pix6, L_COMPARE_SUBTRACT, 0, NULL, NULL, NULL,
1812
253
                      &pix8);
1813
253
        pix9 = pixMinOrMax(NULL, pix7, pix8, L_CHOOSE_MAX);
1814
253
        pix10 = pixConvertRGBToGrayMinMax(pix9, L_CHOOSE_MAX);
1815
253
        pix11 = pixThresholdToBinary(pix10, mindiff);
1816
253
        pixInvert(pix11, pix11);
1817
253
        pixCountPixels(pix11, &count, NULL);
1818
253
        pixGetDimensions(pix11, &w, &h, NULL);
1819
253
        *pfract = (w <= 0 || h <= 0) ? 0.0 :
1820
253
                                       (l_float32)count / (l_float32)(w * h);
1821
253
        pixDestroy(&pix5);
1822
253
        pixDestroy(&pix6);
1823
253
        pixDestroy(&pix7);
1824
253
        pixDestroy(&pix8);
1825
253
        pixDestroy(&pix10);
1826
253
        if (ppixdiff1)
1827
253
            *ppixdiff1 = pix9;
1828
0
        else
1829
0
            pixDestroy(&pix9);
1830
253
        if (ppixdiff2)
1831
253
            *ppixdiff2 = pix11;
1832
0
        else
1833
0
            pixDestroy(&pix11);
1834
1835
253
    }
1836
1.09k
    pixDestroy(&pix3);
1837
1.09k
    pixDestroy(&pix4);
1838
1.09k
    return 0;
1839
1.16k
}
1840
1841
1842
/*!
1843
 * \brief   pixGetPSNR()
1844
 *
1845
 * \param[in]    pix1, pix2     8 or 32 bpp; no colormap
1846
 * \param[in]    factor         sampling factor; >= 1
1847
 * \param[out]   ppsnr          power signal/noise ratio difference
1848
 * \return  0 if OK, 1 on error
1849
 *
1850
 * <pre>
1851
 * Notes:
1852
 *      (1) This computes the power S/N ratio, in dB, for the difference
1853
 *          between two images.  By convention, the power S/N
1854
 *          for a grayscale image is ('log' == log base 10,
1855
 *          and 'ln == log base e):
1856
 *            PSNR = 10 * log((255/MSE)^2)
1857
 *                 = 4.3429 * ln((255/MSE)^2)
1858
 *                 = -4.3429 * ln((MSE/255)^2)
1859
 *          where MSE is the mean squared error.
1860
 *          Here are some examples:
1861
 *             MSE             PSNR
1862
 *             ---             ----
1863
 *             10              28.1
1864
 *             3               38.6
1865
 *             1               48.1
1866
 *             0.1             68.1
1867
 *      (2) If pix1 and pix2 have the same pixel values, the MSE = 0.0
1868
 *          and the PSNR is infinity.  For that case, this returns
1869
 *          PSNR = 1000, which corresponds to the very small MSE of
1870
 *          about 10^(-48).
1871
 * </pre>
1872
 */
1873
l_ok
1874
pixGetPSNR(PIX        *pix1,
1875
           PIX        *pix2,
1876
           l_int32     factor,
1877
           l_float32  *ppsnr)
1878
0
{
1879
0
l_int32    same, i, j, w, h, d, wpl1, wpl2, v1, v2, r1, g1, b1, r2, g2, b2;
1880
0
l_uint32  *data1, *data2, *line1, *line2;
1881
0
l_float32  mse;  /* mean squared error */
1882
1883
0
    if (!ppsnr)
1884
0
        return ERROR_INT("&psnr not defined", __func__, 1);
1885
0
    *ppsnr = 0.0;
1886
0
    if (!pix1 || !pix2)
1887
0
        return ERROR_INT("empty input pix", __func__, 1);
1888
0
    if (!pixSizesEqual(pix1, pix2))
1889
0
        return ERROR_INT("pix sizes unequal", __func__, 1);
1890
0
    if (pixGetColormap(pix1))
1891
0
        return ERROR_INT("pix1 has colormap", __func__, 1);
1892
0
    if (pixGetColormap(pix2))
1893
0
        return ERROR_INT("pix2 has colormap", __func__, 1);
1894
0
    pixGetDimensions(pix1, &w, &h, &d);
1895
0
    if (d != 8 && d != 32)
1896
0
        return ERROR_INT("pix not 8 or 32 bpp", __func__, 1);
1897
0
    if (factor < 1)
1898
0
        return ERROR_INT("invalid sampling factor", __func__, 1);
1899
1900
0
    pixEqual(pix1, pix2, &same);
1901
0
    if (same) {
1902
0
        *ppsnr = 1000.0;  /* crazy big exponent */
1903
0
        return 0;
1904
0
    }
1905
1906
0
    data1 = pixGetData(pix1);
1907
0
    data2 = pixGetData(pix2);
1908
0
    wpl1 = pixGetWpl(pix1);
1909
0
    wpl2 = pixGetWpl(pix2);
1910
0
    mse = 0.0;
1911
0
    if (d == 8) {
1912
0
        for (i = 0; i < h; i += factor) {
1913
0
            line1 = data1 + i * wpl1;
1914
0
            line2 = data2 + i * wpl2;
1915
0
            for (j = 0; j < w; j += factor) {
1916
0
                v1 = GET_DATA_BYTE(line1, j);
1917
0
                v2 = GET_DATA_BYTE(line2, j);
1918
0
                mse += (l_float32)(v1 - v2) * (v1 - v2);
1919
0
            }
1920
0
        }
1921
0
    } else {  /* d == 32 */
1922
0
        for (i = 0; i < h; i += factor) {
1923
0
            line1 = data1 + i * wpl1;
1924
0
            line2 = data2 + i * wpl2;
1925
0
            for (j = 0; j < w; j += factor) {
1926
0
                extractRGBValues(line1[j], &r1, &g1, &b1);
1927
0
                extractRGBValues(line2[j], &r2, &g2, &b2);
1928
0
                mse += ((l_float32)(r1 - r2) * (r1 - r2) +
1929
0
                        (g1 - g2) * (g1 - g2) +
1930
0
                        (b1 - b2) * (b1 - b2)) / 3.0;
1931
0
            }
1932
0
        }
1933
0
    }
1934
0
    mse = mse / ((l_float32)(w) * h);
1935
1936
0
    *ppsnr = -4.3429448 * log(mse / (255 * 255));
1937
0
    return 0;
1938
0
}
1939
1940
1941
/*------------------------------------------------------------------*
1942
 *             Comparison of photo regions by histogram             *
1943
 *------------------------------------------------------------------*/
1944
/*!
1945
 * \brief   pixaComparePhotoRegionsByHisto()
1946
 *
1947
 * \param[in]    pixa        any depth; colormap OK
1948
 * \param[in]    minratio    requiring sizes be compatible; < 1.0
1949
 * \param[in]    textthresh  threshold for text/photo; use 0 for default
1950
 * \param[in]    factor      subsampling; >= 1
1951
 * \param[in]    n           in range {1, ... 7}. n^2 is the maximum number
1952
 *                           of subregions for histograms; typ. n = 3.
1953
 * \param[in]    simthresh   threshold for similarity; use 0 for default
1954
 * \param[out]   pnai array  giving similarity class indices
1955
 * \param[out]   pscores     [optional] score matrix as 1-D array of size N^2
1956
 * \param[out]   ppixd       [optional] pix of similarity classes
1957
 * \param[in]    debug       1 to output histograms; 0 otherwise
1958
 * \return  0 if OK, 1 on error
1959
 *
1960
 * <pre>
1961
 * Notes:
1962
 *      (1) This function takes a pixa of cropped photo images and
1963
 *          compares each one to the others for similarity.
1964
 *          Each image is first tested to see if it is a photo that can
1965
 *          be compared by tiled histograms.  If so, it is padded to put
1966
 *          the centroid in the center of the image, and the histograms
1967
 *          are generated.  The final step of comparing each histogram
1968
 *          with all the others is very fast.
1969
 *      (2) To make the histograms, each image is subdivided in a maximum
1970
 *          of n^2 subimages.  The parameter %n specifies the "side" of
1971
 *          an n x n grid of such subimages.  If the subimages have an
1972
 *          aspect ratio larger than 2, the grid will change, again using n^2
1973
 *          as a maximum for the number of subimages.  For example,
1974
 *          if n == 3, but the image is 600 x 200 pixels, a 3x3 grid
1975
 *          would have subimages of 200 x 67 pixels, which is more
1976
 *          than 2:1, so we change to a 4x2 grid where each subimage
1977
 *          has 150 x 100 pixels.
1978
 *      (3) An initial filter gives %score = 0 if the ratio of widths
1979
 *          and heights (smallest / largest) does not exceed a
1980
 *          threshold %minratio.  If set at 1.0, both images must be
1981
 *          exactly the same size.  A typical value for %minratio is 0.9.
1982
 *      (4) The comparison score between two images is a value in [0.0 .. 1.0].
1983
 *          If the comparison score >= %simthresh, the images are placed in
1984
 *          the same similarity class.  Default value for %simthresh is 0.25.
1985
 *      (5) An array %nai of similarity class indices for pix in the
1986
 *          input pixa is returned.
1987
 *      (6) There are two debugging options:
1988
 *          * An optional 2D matrix of scores is returned as a 1D array.
1989
 *            A visualization of this is written to a temp file.
1990
 *          * An optional pix showing the similarity classes can be
1991
 *            returned.  Text in each input pix is reproduced.
1992
 *      (7) See the notes in pixComparePhotoRegionsByHisto() for details
1993
 *          on the implementation.
1994
 * </pre>
1995
 */
1996
l_ok
1997
pixaComparePhotoRegionsByHisto(PIXA        *pixa,
1998
                               l_float32    minratio,
1999
                               l_float32    textthresh,
2000
                               l_int32      factor,
2001
                               l_int32      n,
2002
                               l_float32    simthresh,
2003
                               NUMA       **pnai,
2004
                               l_float32  **pscores,
2005
                               PIX        **ppixd,
2006
                               l_int32      debug)
2007
0
{
2008
0
char       *text;
2009
0
l_int32     i, j, nim, w, h, w1, h1, w2, h2, ival, index, classid;
2010
0
l_float32   score;
2011
0
l_float32  *scores;
2012
0
NUMA       *nai, *naw, *nah;
2013
0
NUMAA      *naa;
2014
0
NUMAA     **n3a;  /* array of naa */
2015
0
PIX        *pix;
2016
2017
0
    if (pscores) *pscores = NULL;
2018
0
    if (ppixd) *ppixd = NULL;
2019
0
    if (!pnai)
2020
0
        return ERROR_INT("&na not defined", __func__, 1);
2021
0
    *pnai = NULL;
2022
0
    if (!pixa)
2023
0
        return ERROR_INT("pixa not defined", __func__, 1);
2024
0
    if (minratio < 0.0 || minratio > 1.0)
2025
0
        return ERROR_INT("minratio not in [0.0 ... 1.0]", __func__, 1);
2026
0
    if (textthresh <= 0.0) textthresh = 1.3f;
2027
0
    if (factor < 1)
2028
0
        return ERROR_INT("subsampling factor must be >= 1", __func__, 1);
2029
0
    if (n < 1 || n > 7) {
2030
0
        L_WARNING("n = %d is invalid; setting to 4\n", __func__, n);
2031
0
        n = 4;
2032
0
    }
2033
0
    if (simthresh <= 0.0) simthresh = 0.25;
2034
0
    if (simthresh > 1.0)
2035
0
        return ERROR_INT("simthresh invalid; should be near 0.25", __func__, 1);
2036
2037
        /* Prepare the histograms */
2038
0
    nim = pixaGetCount(pixa);
2039
0
    if ((n3a = (NUMAA **)LEPT_CALLOC(nim, sizeof(NUMAA *))) == NULL)
2040
0
        return ERROR_INT("calloc fail for n3a", __func__, 1);
2041
0
    naw = numaCreate(0);
2042
0
    nah = numaCreate(0);
2043
0
    for (i = 0; i < nim; i++) {
2044
0
        pix = pixaGetPix(pixa, i, L_CLONE);
2045
0
        text = pixGetText(pix);
2046
0
        pixSetResolution(pix, 150, 150);
2047
0
        index = (debug) ? i : 0;
2048
0
        pixGenPhotoHistos(pix, NULL, factor, textthresh, n,
2049
0
                          &naa, &w, &h, index);
2050
0
        n3a[i] = naa;
2051
0
        numaAddNumber(naw, w);
2052
0
        numaAddNumber(nah, h);
2053
0
        if (naa)
2054
0
            lept_stderr("Image %s is photo\n", text);
2055
0
        else
2056
0
            lept_stderr("Image %s is NOT photo\n", text);
2057
0
        pixDestroy(&pix);
2058
0
    }
2059
2060
        /* Do the comparisons.  We are making a set of classes, where
2061
         * all similar images are placed in the same class.  There are
2062
         * 'nim' input images.  The classes are labeled by 'classid' (all
2063
         * similar images get the same 'classid' value), and 'nai' maps
2064
         * the classid of the image in the input array to the classid
2065
         * of the similarity class.  */
2066
0
    if ((scores =
2067
0
               (l_float32 *)LEPT_CALLOC((size_t)nim * nim, sizeof(l_float32)))
2068
0
                == NULL) {
2069
0
        L_ERROR("calloc fail for scores\n", __func__);
2070
0
        goto cleanup;
2071
0
    }
2072
0
    nai = numaMakeConstant(-1, nim);  /* classid array */
2073
0
    for (i = 0, classid = 0; i < nim; i++) {
2074
0
        scores[nim * i + i] = 1.0;
2075
0
        numaGetIValue(nai, i, &ival);
2076
0
        if (ival != -1)  /* already set */
2077
0
            continue;
2078
0
        numaSetValue(nai, i, classid);
2079
0
        if (n3a[i] == NULL) {  /* not a photo */
2080
0
            classid++;
2081
0
            continue;
2082
0
        }
2083
0
        numaGetIValue(naw, i, &w1);
2084
0
        numaGetIValue(nah, i, &h1);
2085
0
        for (j = i + 1; j < nim; j++) {
2086
0
            numaGetIValue(nai, j, &ival);
2087
0
            if (ival != -1)  /* already set */
2088
0
                continue;
2089
0
            if (n3a[j] == NULL)  /* not a photo */
2090
0
                continue;
2091
0
            numaGetIValue(naw, j, &w2);
2092
0
            numaGetIValue(nah, j, &h2);
2093
0
            compareTilesByHisto(n3a[i], n3a[j], minratio, w1, h1, w2, h2,
2094
0
                                &score, NULL);
2095
0
            scores[nim * i + j] = score;
2096
0
            scores[nim * j + i] = score;  /* the score array is symmetric */
2097
/*            lept_stderr("score = %5.3f\n", score); */
2098
0
            if (score > simthresh) {
2099
0
                numaSetValue(nai, j, classid);
2100
0
                lept_stderr(
2101
0
                        "Setting %d similar to %d, in class %d; score %5.3f\n",
2102
0
                        j, i, classid, score);
2103
0
            }
2104
0
        }
2105
0
        classid++;
2106
0
    }
2107
0
    *pnai = nai;
2108
2109
        /* Debug: optionally save and display the score array.
2110
         * All images that are photos are represented by a point on
2111
         * the diagonal. Other images in the same similarity class
2112
         * are on the same horizontal raster line to the right.
2113
         * The array has been symmetrized, so images in the same
2114
         * same similarity class also appear on the same column below. */
2115
0
    if (pscores) {
2116
0
        l_int32    wpl, fact;
2117
0
        l_uint32  *line, *data;
2118
0
        PIX       *pix2, *pix3;
2119
0
        pix2 = pixCreate(nim, nim, 8);
2120
0
        data = pixGetData(pix2);
2121
0
        wpl = pixGetWpl(pix2);
2122
0
        for (i = 0; i < nim; i++) {
2123
0
            line = data + i * wpl;
2124
0
            for (j = 0; j < nim; j++) {
2125
0
                SET_DATA_BYTE(line, j,
2126
0
                              L_MIN(255, 4.0 * 255 * scores[nim * i + j]));
2127
0
            }
2128
0
        }
2129
0
        fact = L_MAX(2, 1000 / nim);
2130
0
        pix3 = pixExpandReplicate(pix2, fact);
2131
0
        lept_stderr("Writing to /tmp/lept/comp/scorearray.png\n");
2132
0
        lept_mkdir("lept/comp");
2133
0
        pixWrite("/tmp/lept/comp/scorearray.png", pix3, IFF_PNG);
2134
0
        pixDestroy(&pix2);
2135
0
        pixDestroy(&pix3);
2136
0
        *pscores = scores;
2137
0
    } else {
2138
0
        LEPT_FREE(scores);
2139
0
    }
2140
2141
        /* Debug: optionally display and save the image comparisons.
2142
         * Image similarity classes are displayed by column; similar
2143
         * images are displayed in the same column. */
2144
0
    if (ppixd)
2145
0
        *ppixd = pixaDisplayTiledByIndex(pixa, nai, 200, 20, 2, 6, 0x0000ff00);
2146
2147
0
cleanup:
2148
0
    numaDestroy(&naw);
2149
0
    numaDestroy(&nah);
2150
0
    for (i = 0; i < nim; i++)
2151
0
        numaaDestroy(&n3a[i]);
2152
0
    LEPT_FREE(n3a);
2153
0
    return 0;
2154
0
}
2155
2156
2157
/*!
2158
 * \brief   pixComparePhotoRegionsByHisto()
2159
 *
2160
 * \param[in]    pix1, pix2    any depth; colormap OK
2161
 * \param[in]    box1, box2    [optional] photo regions from each; can be null
2162
 * \param[in]    minratio      requiring sizes be compatible; < 1.0
2163
 * \param[in]    factor        subsampling factor; >= 1
2164
 * \param[in]    n             in range {1, ... 7}. n^2 is the maximum number
2165
 *                             of subregions for histograms; typ. n = 3.
2166
 * \param[out]   pscore        similarity score of histograms
2167
 * \param[in]    debugflag     1 for debug output; 0 for no debugging
2168
 * \return  0 if OK, 1 on error
2169
 *
2170
 * <pre>
2171
 * Notes:
2172
 *      (1) This function compares two grayscale photo regions.  If a
2173
 *          box is given, the region is clipped; otherwise assume
2174
 *          the entire images are photo regions.  This is done with a
2175
 *          set of not more than n^2 spatially aligned histograms, which are
2176
 *          aligned using the centroid of the inverse image.
2177
 *      (2) The parameter %n specifies the "side" of an n x n grid
2178
 *          of subimages.  If the subimages have an aspect ratio larger
2179
 *          than 2, the grid will change, using n^2 as a maximum for
2180
 *          the number of subimages.  For example, if n == 3, but the
2181
 *          image is 600 x 200 pixels, a 3x3 grid would have subimages
2182
 *          of 200 x 67 pixels, which is more than 2:1, so we change
2183
 *          to a 4x2 grid where each subimage has 150 x 100 pixels.
2184
 *      (3) An initial filter gives %score = 0 if the ratio of widths
2185
 *          and heights (smallest / largest) does not exceed a
2186
 *          threshold %minratio.  This must be between 0.5 and 1.0.
2187
 *          If set at 1.0, both images must be exactly the same size.
2188
 *          A typical value for %minratio is 0.9.
2189
 *      (4) Because this function should not be used on text or
2190
 *          line graphics, which can give false positive results
2191
 *          (i.e., high scores for different images), filter the images
2192
 *          using pixGenPhotoHistos(), which returns tiled histograms
2193
 *          only if an image is not text and comparison is expected
2194
 *          to work with histograms.  If either image fails the test,
2195
 *          the comparison returns a score of 0.0.
2196
 *      (5) The white value counts in the histograms are removed; they
2197
 *          are typically pixels that were padded to achieve alignment.
2198
 *      (6) For an efficient representation of the histogram, normalize
2199
 *          using a multiplicative factor so that the number in the
2200
 *          maximum bucket is 255.  It then takes 256 bytes to store.
2201
 *      (7) When comparing the histograms of two regions, use the
2202
 *          Earth Mover distance (EMD), with the histograms normalized
2203
 *          so that the sum over bins is the same.  Further normalize
2204
 *          by dividing by 255, so that the result is in [0.0 ... 1.0].
2205
 *      (8) Get a similarity score S = 1.0 - k * D, where
2206
 *            k is a constant, say in the range 5-10
2207
 *            D = normalized EMD
2208
 *          and for multiple tiles, take the Min(S) to be the final score.
2209
 *          Using aligned tiles gives protection against accidental
2210
 *          similarity of the overall grayscale histograms.
2211
 *          A small number of aligned tiles works well.
2212
 *      (9) With debug on, you get a pdf that shows, for each tile,
2213
 *          the images, histograms and score.
2214
 * </pre>
2215
 */
2216
l_ok
2217
pixComparePhotoRegionsByHisto(PIX        *pix1,
2218
                              PIX        *pix2,
2219
                              BOX        *box1,
2220
                              BOX        *box2,
2221
                              l_float32   minratio,
2222
                              l_int32     factor,
2223
                              l_int32     n,
2224
                              l_float32  *pscore,
2225
                              l_int32     debugflag)
2226
0
{
2227
0
l_int32    w1, h1, w2, h2, w1c, h1c, w2c, h2c, debugindex;
2228
0
l_float32  wratio, hratio;
2229
0
NUMAA     *naa1, *naa2;
2230
0
PIX       *pix3, *pix4;
2231
0
PIXA      *pixa;
2232
2233
0
    if (!pscore)
2234
0
        return ERROR_INT("&score not defined", __func__, 1);
2235
0
    *pscore = 0.0;
2236
0
    if (!pix1 || !pix2)
2237
0
        return ERROR_INT("pix1 and pix2 not both defined", __func__, 1);
2238
0
    if (minratio < 0.5 || minratio > 1.0)
2239
0
        return ERROR_INT("minratio not in [0.5 ... 1.0]", __func__, 1);
2240
0
    if (factor < 1)
2241
0
        return ERROR_INT("subsampling factor must be >= 1", __func__, 1);
2242
0
    if (n < 1 || n > 7) {
2243
0
        L_WARNING("n = %d is invalid; setting to 4\n", __func__, n);
2244
0
        n = 4;
2245
0
    }
2246
2247
0
    debugindex = 0;
2248
0
    if (debugflag) {
2249
0
        lept_mkdir("lept/comp");
2250
0
        debugindex = 666;  /* arbitrary number used for naming output */
2251
0
    }
2252
2253
        /* Initial filter by size */
2254
0
    if (box1)
2255
0
        boxGetGeometry(box1, NULL, NULL, &w1, &h1);
2256
0
    else
2257
0
        pixGetDimensions(pix1, &w1, &h1, NULL);
2258
0
    if (box2)
2259
0
        boxGetGeometry(box2, NULL, NULL, &w2, &h2);
2260
0
    else
2261
0
        pixGetDimensions(pix1, &w2, &h2, NULL);
2262
0
    wratio = (w1 < w2) ? (l_float32)w1 / (l_float32)w2 :
2263
0
             (l_float32)w2 / (l_float32)w1;
2264
0
    hratio = (h1 < h2) ? (l_float32)h1 / (l_float32)h2 :
2265
0
             (l_float32)h2 / (l_float32)h1;
2266
0
    if (wratio < minratio || hratio < minratio)
2267
0
        return 0;
2268
2269
        /* Initial crop, if necessary, and make histos */
2270
0
    if (box1)
2271
0
        pix3 = pixClipRectangle(pix1, box1, NULL);
2272
0
    else
2273
0
        pix3 = pixClone(pix1);
2274
0
    pixGenPhotoHistos(pix3, NULL, factor, 0, n, &naa1, &w1c, &h1c, debugindex);
2275
0
    pixDestroy(&pix3);
2276
0
    if (!naa1) return 0;
2277
0
    if (box2)
2278
0
        pix4 = pixClipRectangle(pix2, box2, NULL);
2279
0
    else
2280
0
        pix4 = pixClone(pix2);
2281
0
    pixGenPhotoHistos(pix4, NULL, factor, 0, n, &naa2, &w2c, &h2c, debugindex);
2282
0
    pixDestroy(&pix4);
2283
0
    if (!naa2) return 0;
2284
2285
        /* Compare histograms */
2286
0
    pixa = (debugflag) ? pixaCreate(0) : NULL;
2287
0
    compareTilesByHisto(naa1, naa2, minratio, w1c, h1c, w2c, h2c, pscore, pixa);
2288
0
    pixaDestroy(&pixa);
2289
0
    return 0;
2290
0
}
2291
2292
2293
/*!
2294
 * \brief   pixGenPhotoHistos()
2295
 *
2296
 * \param[in]    pixs      depth > 1 bpp; colormap OK
2297
 * \param[in]    box       [optional] region to be selected; can be null
2298
 * \param[in]    factor    subsampling; >= 1
2299
 * \param[in]    thresh    threshold for photo/text; use 0 for default
2300
 * \param[in]    n         in range {1, ... 7}. n^2 is the maximum number
2301
 *                         of subregions for histograms; typ. n = 3.
2302
 * \param[out]   pnaa      nx * ny 256-entry gray histograms
2303
 * \param[out]   pw        width of image used to make histograms
2304
 * \param[out]   ph        height of image used to make histograms
2305
 * \param[in]    debugindex  0 for no debugging; positive integer otherwise
2306
 * \return  0 if OK, 1 on error
2307
 *
2308
 * <pre>
2309
 * Notes:
2310
 *      (1) This crops and converts to 8 bpp if necessary.  It adds a
2311
 *          minimal white boundary such that the centroid of the
2312
 *          photo-inverted image is in the center. This allows
2313
 *          automatic alignment with histograms of other image regions.
2314
 *      (2) The parameter %n specifies the "side" of the n x n grid
2315
 *          of subimages.  If the subimages have an aspect ratio larger
2316
 *          than 2, the grid will change, using n^2 as a maximum for
2317
 *          the number of subimages.  For example, if n == 3, but the
2318
 *          image is 600 x 200 pixels, a 3x3 grid would have subimages
2319
 *          of 200 x 67 pixels, which is more than 2:1, so we change
2320
 *          to a 4x2 grid where each subimage has 150 x 100 pixels.
2321
 *      (3) The white value in the histogram is removed, because of
2322
 *          the padding.
2323
 *      (4) Use 0 for conservative default (1.3) for thresh.
2324
 *      (5) For an efficient representation of the histogram, normalize
2325
 *          using a multiplicative factor so that the number in the
2326
 *          maximum bucket is 255.  It then takes 256 bytes to store.
2327
 *      (6) With %debugindex > 0, this makes a pdf that shows, for each tile,
2328
 *          the images and histograms.
2329
 * </pre>
2330
 */
2331
l_ok
2332
pixGenPhotoHistos(PIX        *pixs,
2333
                  BOX        *box,
2334
                  l_int32     factor,
2335
                  l_float32   thresh,
2336
                  l_int32     n,
2337
                  NUMAA     **pnaa,
2338
                  l_int32    *pw,
2339
                  l_int32    *ph,
2340
                  l_int32     debugindex)
2341
0
{
2342
0
char    buf[64];
2343
0
NUMAA  *naa;
2344
0
PIX    *pix1, *pix2, *pix3, *pixm;
2345
0
PIXA   *pixa;
2346
2347
0
    if (pnaa) *pnaa = NULL;
2348
0
    if (pw) *pw = 0;
2349
0
    if (ph) *ph = 0;
2350
0
    if (!pnaa)
2351
0
        return ERROR_INT("&naa not defined", __func__, 1);
2352
0
    if (!pw || !ph)
2353
0
        return ERROR_INT("&w and &h not both defined", __func__, 1);
2354
0
    if (!pixs || pixGetDepth(pixs) == 1)
2355
0
        return ERROR_INT("pixs not defined or 1 bpp", __func__, 1);
2356
0
    if (factor < 1)
2357
0
        return ERROR_INT("subsampling factor must be >= 1", __func__, 1);
2358
0
    if (thresh <= 0.0) thresh = 1.3f;  /* default */
2359
0
    if (n < 1 || n > 7) {
2360
0
        L_WARNING("n = %d is invalid; setting to 4\n", __func__, n);
2361
0
        n = 4;
2362
0
    }
2363
2364
0
    pixa = NULL;
2365
0
    if (debugindex > 0) {
2366
0
        pixa = pixaCreate(0);
2367
0
        lept_mkdir("lept/comp");
2368
0
    }
2369
2370
        /* Initial crop, if necessary */
2371
0
    if (box)
2372
0
        pix1 = pixClipRectangle(pixs, box, NULL);
2373
0
    else
2374
0
        pix1 = pixClone(pixs);
2375
2376
        /* Convert to 8 bpp and pad to center the centroid */
2377
0
    pix2 = pixConvertTo8(pix1, FALSE);
2378
0
    pix3 = pixPadToCenterCentroid(pix2, factor);
2379
2380
        /* Set to 255 all pixels above 230.  Do this so that light gray
2381
         * pixels do not enter into the comparison. */
2382
0
    pixm = pixThresholdToBinary(pix3, 230);
2383
0
    pixInvert(pixm, pixm);
2384
0
    pixSetMaskedGeneral(pix3, pixm, 255, 0, 0);
2385
0
    pixDestroy(&pixm);
2386
2387
0
    if (debugindex > 0) {
2388
0
        PIX   *pix4, *pix5, *pix6, *pix7, *pix8;
2389
0
        PIXA  *pixa2;
2390
0
        pix4 = pixConvertTo32(pix2);
2391
0
        pix5 = pixConvertTo32(pix3);
2392
0
        pix6 = pixScaleToSize(pix4, 400, 0);
2393
0
        pix7 = pixScaleToSize(pix5, 400, 0);
2394
0
        pixa2 = pixaCreate(2);
2395
0
        pixaAddPix(pixa2, pix6, L_INSERT);
2396
0
        pixaAddPix(pixa2, pix7, L_INSERT);
2397
0
        pix8 = pixaDisplayTiledInRows(pixa2, 32, 1000, 1.0, 0, 50, 3);
2398
0
        pixaAddPix(pixa, pix8, L_INSERT);
2399
0
        pixDestroy(&pix4);
2400
0
        pixDestroy(&pix5);
2401
0
        pixaDestroy(&pixa2);
2402
0
    }
2403
0
    pixDestroy(&pix1);
2404
0
    pixDestroy(&pix2);
2405
2406
        /* Test if this is a photoimage */
2407
0
    pixDecideIfPhotoImage(pix3, factor, thresh, n, &naa, pixa);
2408
0
    if (naa) {
2409
0
        *pnaa = naa;
2410
0
        *pw = pixGetWidth(pix3);
2411
0
        *ph = pixGetHeight(pix3);
2412
0
    }
2413
2414
0
    if (pixa) {
2415
0
        snprintf(buf, sizeof(buf), "/tmp/lept/comp/tiledhistos.%d.pdf",
2416
0
                 debugindex);
2417
0
        lept_stderr("Writing to %s\n", buf);
2418
0
        pixaConvertToPdf(pixa, 300, 1.0, L_FLATE_ENCODE, 0, NULL, buf);
2419
0
        pixaDestroy(&pixa);
2420
0
    }
2421
2422
0
    pixDestroy(&pix3);
2423
0
    return 0;
2424
0
}
2425
2426
2427
/*!
2428
 * \brief   pixPadToCenterCentroid()
2429
 *
2430
 * \param[in]    pixs     any depth, colormap OK
2431
 * \param[in]    factor   subsampling for centroid; >= 1
2432
 * \return  pixd padded with white pixels, or NULL on error.
2433
 *
2434
 * <pre>
2435
 * Notes:
2436
 *      (1) This add minimum white padding to an 8 bpp pix, such that
2437
 *          the centroid of the photometric inverse is in the center of
2438
 *          the resulting image.  Thus in computing the centroid,
2439
 *          black pixels have weight 255, and white pixels have weight 0.
2440
 * </pre>
2441
 */
2442
PIX *
2443
pixPadToCenterCentroid(PIX     *pixs,
2444
                       l_int32  factor)
2445
2446
0
{
2447
0
l_float32  cx, cy;
2448
0
l_int32    xs, ys, delx, dely, icx, icy, ws, hs, wd, hd;
2449
0
PIX       *pix1, *pixd;
2450
2451
0
    if (!pixs)
2452
0
        return (PIX *)ERROR_PTR("pixs not defined", __func__, NULL);
2453
0
    if (factor < 1)
2454
0
        return (PIX *)ERROR_PTR("invalid sampling factor", __func__, NULL);
2455
2456
0
    pix1 = pixConvertTo8(pixs, FALSE);
2457
0
    pixCentroid8(pix1, factor, &cx, &cy);
2458
0
    icx = (l_int32)(cx + 0.5);
2459
0
    icy = (l_int32)(cy + 0.5);
2460
0
    pixGetDimensions(pix1, &ws, &hs, NULL);
2461
0
    delx = ws - 2 * icx;
2462
0
    dely = hs - 2 * icy;
2463
0
    xs = L_MAX(0, delx);
2464
0
    ys = L_MAX(0, dely);
2465
0
    wd = 2 * L_MAX(icx, ws - icx);
2466
0
    hd = 2 * L_MAX(icy, hs - icy);
2467
0
    pixd = pixCreate(wd, hd, 8);
2468
0
    pixSetAll(pixd);  /* to white */
2469
0
    pixCopyResolution(pixd, pixs);
2470
0
    pixRasterop(pixd, xs, ys, ws, hs, PIX_SRC, pix1, 0, 0);
2471
0
    pixDestroy(&pix1);
2472
0
    return pixd;
2473
0
}
2474
2475
2476
/*!
2477
 * \brief   pixCentroid8()
2478
 *
2479
 * \param[in]    pixs    8 bpp
2480
 * \param[in]    factor  subsampling factor; >= 1
2481
 * \param[out]   pcx     x value of centroid
2482
 * \param[out]   pcy     y value of centroid
2483
 * \return  0 if OK, 1 on error
2484
 *
2485
 * <pre>
2486
 * Notes:
2487
 *      (1) This first does a photometric inversion (black = 255, white = 0).
2488
 *          It then finds the centroid of the result.  The inversion is
2489
 *          done because white is usually background, so the centroid
2490
 *          is computed based on the "foreground" gray pixels, and the
2491
 *          darker the pixel, the more weight it is given.
2492
 * </pre>
2493
 */
2494
l_ok
2495
pixCentroid8(PIX        *pixs,
2496
             l_int32     factor,
2497
             l_float32  *pcx,
2498
             l_float32  *pcy)
2499
0
{
2500
0
l_int32    i, j, w, h, wpl, val;
2501
0
l_float32  sumx, sumy, sumv;
2502
0
l_uint32  *data, *line;
2503
0
PIX       *pix1;
2504
2505
0
    if (pcx) *pcx = 0.0;
2506
0
    if (pcy) *pcy = 0.0;
2507
0
    if (!pixs || pixGetDepth(pixs) != 8)
2508
0
        return ERROR_INT("pixs undefined or not 8 bpp", __func__, 1);
2509
0
    if (factor < 1)
2510
0
        return ERROR_INT("subsampling factor must be >= 1", __func__, 1);
2511
0
    if (!pcx || !pcy)
2512
0
        return ERROR_INT("&cx and &cy not both defined", __func__, 1);
2513
2514
0
    pix1 = pixInvert(NULL, pixs);
2515
0
    pixGetDimensions(pix1, &w, &h, NULL);
2516
0
    data = pixGetData(pix1);
2517
0
    wpl = pixGetWpl(pix1);
2518
0
    sumx = sumy = sumv = 0.0;
2519
0
    for (i = 0; i < h; i++) {
2520
0
        line = data + i * wpl;
2521
0
        for (j = 0; j < w; j++) {
2522
0
            val = GET_DATA_BYTE(line, j);
2523
0
            sumx += val * j;
2524
0
            sumy += val * i;
2525
0
            sumv += val;
2526
0
        }
2527
0
    }
2528
0
    pixDestroy(&pix1);
2529
2530
0
    if (sumv == 0) {
2531
0
        L_INFO("input image is white\n", __func__);
2532
0
        *pcx = (l_float32)(w) / 2;
2533
0
        *pcy = (l_float32)(h) / 2;
2534
0
    } else {
2535
0
        *pcx = sumx / sumv;
2536
0
        *pcy = sumy / sumv;
2537
0
    }
2538
2539
0
    return 0;
2540
0
}
2541
2542
2543
/*!
2544
 * \brief   pixDecideIfPhotoImage()
2545
 *
2546
 * \param[in]    pix         8 bpp, centroid in center
2547
 * \param[in]    factor      subsampling for histograms; >= 1
2548
 * \param[in]    thresh      threshold for photo/text; use 0 for default
2549
 * \param[in]    n           in range {1, ... 7}. n^2 is the maximum number
2550
 *                           of subregions for histograms; typ. n = 3.
2551
 * \param[out]   pnaa        array of normalized histograms
2552
 * \param[in]    pixadebug   [optional] use only for debug output
2553
 * \return  0 if OK, 1 on error
2554
 *
2555
 * <pre>
2556
 * Notes:
2557
 *      (1) The input image must be 8 bpp (no colormap), and padded with
2558
 *          white pixels so the centroid of photo-inverted pixels is at
2559
 *          the center of the image.
2560
 *      (2) The parameter %n specifies the "side" of the n x n grid
2561
 *          of subimages.  If the subimages have an aspect ratio larger
2562
 *          than 2, the grid will change, using n^2 as a maximum for
2563
 *          the number of subimages.  For example, if n == 3, but the
2564
 *          image is 600 x 200 pixels, a 3x3 grid would have subimages
2565
 *          of 200 x 67 pixels, which is more than 2:1, so we change
2566
 *          to a 4x2 grid where each subimage has 150 x 100 pixels.
2567
 *      (3) If the pix is not almost certainly a photoimage, the returned
2568
 *          histograms (%naa) are null.
2569
 *      (4) If histograms are generated, the white (255) count is set
2570
 *          to 0.  This removes all pixels values above 230, including
2571
 *          white padding from the centroid matching operation, from
2572
 *          consideration.  The resulting histograms are then normalized
2573
 *          so the maximum count is 255.
2574
 *      (5) Default for %thresh is 1.3; this seems sufficiently conservative.
2575
 *      (6) Use %pixadebug == NULL unless debug output is requested.
2576
 * </pre>
2577
 */
2578
l_ok
2579
pixDecideIfPhotoImage(PIX       *pix,
2580
                      l_int32    factor,
2581
                      l_float32  thresh,
2582
                      l_int32    n,
2583
                      NUMAA    **pnaa,
2584
                      PIXA      *pixadebug)
2585
0
{
2586
0
char       buf[64];
2587
0
l_int32    i, w, h, nx, ny, ngrids, istext, isphoto;
2588
0
l_float32  maxval, sum1, sum2, ratio;
2589
0
L_BMF     *bmf;
2590
0
NUMA      *na1, *na2, *na3, *narv;
2591
0
NUMAA     *naa;
2592
0
PIX       *pix1;
2593
0
PIXA      *pixa1, *pixa2, *pixa3;
2594
2595
0
    if (!pnaa)
2596
0
        return ERROR_INT("&naa not defined", __func__, 1);
2597
0
    *pnaa = NULL;
2598
0
    if (!pix || pixGetDepth(pix) != 8 || pixGetColormap(pix))
2599
0
        return ERROR_INT("pix undefined or invalid", __func__, 1);
2600
0
    if (n < 1 || n > 7) {
2601
0
        L_WARNING("n = %d is invalid; setting to 4\n", __func__, n);
2602
0
        n = 4;
2603
0
    }
2604
0
    if (thresh <= 0.0) thresh = 1.3f;  /* default */
2605
2606
        /* Look for text lines */
2607
0
    pixDecideIfText(pix, NULL, &istext, pixadebug);
2608
0
    if (istext) {
2609
0
        L_INFO("Image is text\n", __func__);
2610
0
        return 0;
2611
0
    }
2612
2613
        /* Determine grid from n */
2614
0
    pixGetDimensions(pix, &w, &h, NULL);
2615
0
    if (w == 0 || h == 0)
2616
0
        return ERROR_INT("invalid pix dimension", __func__, 1);
2617
0
    findHistoGridDimensions(n, w, h, &nx, &ny, 1);
2618
2619
        /* Evaluate histograms in each tile */
2620
0
    pixa1 = pixaSplitPix(pix, nx, ny, 0, 0);
2621
0
    ngrids = nx * ny;
2622
0
    bmf = (pixadebug) ? bmfCreate(NULL, 6) : NULL;
2623
0
    naa = numaaCreate(ngrids);
2624
0
    if (pixadebug) {
2625
0
        lept_rmdir("lept/compplot");
2626
0
        lept_mkdir("lept/compplot");
2627
0
    }
2628
0
    for (i = 0; i < ngrids; i++) {
2629
0
        pix1 = pixaGetPix(pixa1, i, L_CLONE);
2630
2631
            /* Get histograms, set white count to 0, normalize max to 255 */
2632
0
        na1 = pixGetGrayHistogram(pix1, factor);
2633
0
        numaSetValue(na1, 255, 0);
2634
0
        na2 = numaWindowedMean(na1, 5);  /* do some smoothing */
2635
0
        numaGetMax(na2, &maxval, NULL);
2636
0
        na3 = numaTransform(na2, 0, 255.0 / maxval);
2637
0
        if (pixadebug) {
2638
0
            snprintf(buf, sizeof(buf), "/tmp/lept/compplot/plot.%d", i);
2639
0
            gplotSimple1(na3, GPLOT_PNG, buf, "Histos");
2640
0
        }
2641
2642
0
        numaaAddNuma(naa, na3, L_INSERT);
2643
0
        numaDestroy(&na1);
2644
0
        numaDestroy(&na2);
2645
0
        pixDestroy(&pix1);
2646
0
    }
2647
0
    if (pixadebug) {
2648
0
        pix1 = pixaDisplayTiledInColumns(pixa1, nx, 1.0, 30, 2);
2649
0
        pixaAddPix(pixadebug, pix1, L_INSERT);
2650
0
        pixa2 = pixaReadFiles("/tmp/lept/compplot", ".png");
2651
0
        pixa3 = pixaScale(pixa2, 0.4f, 0.4f);
2652
0
        pix1 = pixaDisplayTiledInColumns(pixa3, nx, 1.0, 30, 2);
2653
0
        pixaAddPix(pixadebug, pix1, L_INSERT);
2654
0
        pixaDestroy(&pixa2);
2655
0
        pixaDestroy(&pixa3);
2656
0
    }
2657
2658
        /* Compute the standard deviation between these histos to decide
2659
         * if the image is photo or something more like line art,
2660
         * which does not support good comparison by tiled histograms.  */
2661
0
    grayInterHistogramStats(naa, 5, NULL, NULL, NULL, &narv);
2662
2663
        /* For photos, the root variance has a larger weight of
2664
         * values in the range [50 ... 150] compared to [200 ... 230],
2665
         * than text or line art.  For the latter, most of the variance
2666
         * between tiles is in the lightest parts of the image, well
2667
         * above 150.  */
2668
0
    numaGetSumOnInterval(narv, 50, 150, &sum1);
2669
0
    numaGetSumOnInterval(narv, 200, 230, &sum2);
2670
0
    if (sum2 == 0.0) {  /* shouldn't happen */
2671
0
        ratio = 0.001f;  /* anything very small for debug output */
2672
0
        isphoto = 0;  /* be conservative */
2673
0
    } else {
2674
0
        ratio = sum1 / sum2;
2675
0
        isphoto = (ratio > thresh) ? 1 : 0;
2676
0
    }
2677
0
    if (pixadebug) {
2678
0
        if (isphoto)
2679
0
            L_INFO("ratio %f > %f; isphoto is true\n",
2680
0
                   __func__, ratio, thresh);
2681
0
        else
2682
0
            L_INFO("ratio %f < %f; isphoto is false\n",
2683
0
                   __func__, ratio, thresh);
2684
0
    }
2685
0
    if (isphoto)
2686
0
        *pnaa = naa;
2687
0
    else
2688
0
        numaaDestroy(&naa);
2689
0
    bmfDestroy(&bmf);
2690
0
    numaDestroy(&narv);
2691
0
    pixaDestroy(&pixa1);
2692
0
    return 0;
2693
0
}
2694
2695
2696
/*!
2697
 * \brief   findHistoGridDimensions()
2698
 *
2699
 * \param[in]    n         max number of grid elements is n^2; typ. n = 3
2700
 * \param[in]    w         width of image to be subdivided
2701
 * \param[in]    h         height of image to be subdivided
2702
 * \param[out]   pnx       number of grid elements in x direction
2703
 * \param[out]   pny       number of grid elements in y direction
2704
 * \param[in]    debug     1 for debug output to stderr
2705
 * \return  0 if OK, 1 on error
2706
 *
2707
 * <pre>
2708
 * Notes:
2709
 *      (1) This determines the number of subdivisions to be used on
2710
 *          the image in each direction.  A histogram will be built
2711
 *          for each subimage.
2712
 *      (2) The parameter %n specifies the "side" of the n x n grid
2713
 *          of subimages.  If the subimages have an aspect ratio larger
2714
 *          than 2, the grid will change, using n^2 as a maximum for
2715
 *          the number of subimages.  For example, if n == 3, but the
2716
 *          image is 600 x 200 pixels, a 3x3 grid would have subimages
2717
 *          of 200 x 67 pixels, which is more than 2:1, so we change
2718
 *          to a 4x2 grid where each subimage has 150 x 100 pixels.
2719
 * </pre>
2720
 */
2721
static l_ok
2722
findHistoGridDimensions(l_int32   n,
2723
                        l_int32   w,
2724
                        l_int32   h,
2725
                        l_int32  *pnx,
2726
                        l_int32  *pny,
2727
                        l_int32   debug)
2728
0
{
2729
0
l_int32    nx, ny, max;
2730
0
l_float32  ratio;
2731
2732
0
    ratio = (l_float32)w / (l_float32)h;
2733
0
    max = n * n;
2734
0
    nx = ny = n;
2735
0
    while (nx > 1 && ny > 1) {
2736
0
        if (ratio > 2.0) {  /* reduce ny */
2737
0
            ny--;
2738
0
            nx = max / ny;
2739
0
            if (debug)
2740
0
                lept_stderr("nx = %d, ny = %d, ratio w/h = %4.2f\n",
2741
0
                            nx, ny, ratio);
2742
0
        } else if (ratio < 0.5) {  /* reduce nx */
2743
0
            nx--;
2744
0
            ny = max / nx;
2745
0
            if (debug)
2746
0
                lept_stderr("nx = %d, ny = %d, ratio w/h = %4.2f\n",
2747
0
                            nx, ny, ratio);
2748
0
        } else {  /* we're ok */
2749
0
            if (debug)
2750
0
                lept_stderr("nx = %d, ny = %d, ratio w/h = %4.2f\n",
2751
0
                            nx, ny, ratio);
2752
0
            break;
2753
0
        }
2754
0
        ratio = (l_float32)(ny * w) / (l_float32)(nx * h);
2755
0
    }
2756
0
    *pnx = nx;
2757
0
    *pny = ny;
2758
0
    return 0;
2759
0
}
2760
2761
2762
/*!
2763
 * \brief   compareTilesByHisto()
2764
 *
2765
 * \param[in]    naa1, naa2      each is a set of 256 entry histograms
2766
 * \param[in]    minratio        requiring image sizes be compatible; < 1.0
2767
 * \param[in]    w1, h1, w2, h2  image sizes from which histograms were made
2768
 * \param[out]   pscore          similarity score of histograms
2769
 * \param[in]    pixadebug       [optional] use only for debug output
2770
 * \return  0 if OK, 1 on error
2771
 *
2772
 * <pre>
2773
 * Notes:
2774
 *      (1) naa1 and naa2 must be generated using pixGenPhotoHistos(),
2775
 *          using the same tile sizes.
2776
 *      (2) The image dimensions must be similar.  The score is 0.0
2777
 *          if the ratio of widths and heights (smallest / largest)
2778
 *          exceeds a threshold %minratio, which must be between
2779
 *          0.5 and 1.0.  If set at 1.0, both images must be exactly
2780
 *          the same size.  A typical value for %minratio is 0.9.
2781
 *      (3) The input pixadebug is null unless debug output is requested.
2782
 * </pre>
2783
 */
2784
l_ok
2785
compareTilesByHisto(NUMAA      *naa1,
2786
                    NUMAA      *naa2,
2787
                    l_float32   minratio,
2788
                    l_int32     w1,
2789
                    l_int32     h1,
2790
                    l_int32     w2,
2791
                    l_int32     h2,
2792
                    l_float32  *pscore,
2793
                    PIXA       *pixadebug)
2794
0
{
2795
0
char       buf1[128], buf2[128];
2796
0
l_int32    i, n;
2797
0
l_float32  wratio, hratio, score, minscore, dist;
2798
0
L_BMF     *bmf;
2799
0
NUMA      *na1, *na2, *nadist, *nascore;
2800
2801
0
    if (!pscore)
2802
0
        return ERROR_INT("&score not defined", __func__, 1);
2803
0
    *pscore = 0.0;
2804
0
    if (!naa1 || !naa2)
2805
0
        return ERROR_INT("naa1 and naa2 not both defined", __func__, 1);
2806
2807
        /* Filter for different sizes */
2808
0
    wratio = (w1 < w2) ? (l_float32)w1 / (l_float32)w2 :
2809
0
             (l_float32)w2 / (l_float32)w1;
2810
0
    hratio = (h1 < h2) ? (l_float32)h1 / (l_float32)h2 :
2811
0
             (l_float32)h2 / (l_float32)h1;
2812
0
    if (wratio < minratio || hratio < minratio) {
2813
0
        if (pixadebug)
2814
0
            L_INFO("Sizes differ: wratio = %f, hratio = %f\n",
2815
0
                   __func__, wratio, hratio);
2816
0
        return 0;
2817
0
    }
2818
0
    n = numaaGetCount(naa1);
2819
0
    if (n != numaaGetCount(naa2)) {  /* due to differing w/h ratio */
2820
0
        L_INFO("naa1 and naa2 sizes are different\n", __func__);
2821
0
        return 0;
2822
0
    }
2823
2824
0
    if (pixadebug) {
2825
0
        lept_rmdir("lept/comptile");
2826
0
        lept_mkdir("lept/comptile");
2827
0
    }
2828
2829
2830
        /* Evaluate histograms in each tile.  Remove white before
2831
         * computing EMD, because there are may be a lot of white
2832
         * pixels due to padding, and we don't want to include them.
2833
         * This also makes the debug histo plots more informative. */
2834
0
    minscore = 1.0;
2835
0
    nadist = numaCreate(n);
2836
0
    nascore = numaCreate(n);
2837
0
    bmf = (pixadebug) ? bmfCreate(NULL, 6) : NULL;
2838
0
    for (i = 0; i < n; i++) {
2839
0
        na1 = numaaGetNuma(naa1, i, L_CLONE);
2840
0
        na2 = numaaGetNuma(naa2, i, L_CLONE);
2841
0
        numaSetValue(na1, 255, 0.0);
2842
0
        numaSetValue(na2, 255, 0.0);
2843
2844
            /* To compare histograms, use the normalized earthmover distance.
2845
             * Further normalize to get the EM distance as a fraction of the
2846
             * maximum distance in the histogram (255).  Finally, scale this
2847
             * up by 10.0, and subtract from 1.0 to get a similarity score. */
2848
0
        numaEarthMoverDistance(na1, na2, &dist);
2849
0
        score = L_MAX(0.0, 1.0 - 10.0 * (dist / 255.));
2850
0
        numaAddNumber(nadist, dist);
2851
0
        numaAddNumber(nascore, score);
2852
0
        minscore = L_MIN(minscore, score);
2853
0
        if (pixadebug) {
2854
0
            snprintf(buf1, sizeof(buf1), "/tmp/lept/comptile/plot.%d", i);
2855
0
            gplotSimple2(na1, na2, GPLOT_PNG, buf1, "Histos");
2856
0
        }
2857
0
        numaDestroy(&na1);
2858
0
        numaDestroy(&na2);
2859
0
    }
2860
0
    *pscore = minscore;
2861
2862
0
    if (pixadebug) {
2863
0
        for (i = 0; i < n; i++) {
2864
0
            PIX  *pix1, *pix2;
2865
0
            snprintf(buf1, sizeof(buf1), "/tmp/lept/comptile/plot.%d.png", i);
2866
0
            pix1 = pixRead(buf1);
2867
0
            numaGetFValue(nadist, i, &dist);
2868
0
            numaGetFValue(nascore, i, &score);
2869
0
            snprintf(buf2, sizeof(buf2),
2870
0
                 "Image %d\ndist = %5.3f, score = %5.3f", i, dist, score);
2871
0
            pix2 = pixAddTextlines(pix1, bmf, buf2, 0x0000ff00, L_ADD_BELOW);
2872
0
            pixaAddPix(pixadebug, pix2, L_INSERT);
2873
0
            pixDestroy(&pix1);
2874
0
        }
2875
0
        lept_stderr("Writing to /tmp/lept/comptile/comparegray.pdf\n");
2876
0
        pixaConvertToPdf(pixadebug, 300, 1.0, L_FLATE_ENCODE, 0, NULL,
2877
0
                         "/tmp/lept/comptile/comparegray.pdf");
2878
0
        numaWriteDebug("/tmp/lept/comptile/scores.na", nascore);
2879
0
        numaWriteDebug("/tmp/lept/comptile/dists.na", nadist);
2880
0
    }
2881
2882
0
    bmfDestroy(&bmf);
2883
0
    numaDestroy(&nadist);
2884
0
    numaDestroy(&nascore);
2885
0
    return 0;
2886
0
}
2887
2888
2889
/*!
2890
 * \brief   pixCompareGrayByHisto()
2891
 *
2892
 * \param[in]    pix1, pix2  any depth; colormap OK
2893
 * \param[in]    box1, box2  [optional] region selected from each; can be null
2894
 * \param[in]    minratio    requiring sizes be compatible; < 1.0
2895
 * \param[in]    maxgray     max value to keep in histo; >= 200, 255 to keep all
2896
 * \param[in]    factor      subsampling factor; >= 1
2897
 * \param[in]    n           in range {1, ... 7}. n^2 is the maximum number
2898
 *                           of subregions for histograms; typ. n = 3.
2899
 * \param[out]   pscore      similarity score of histograms
2900
 * \param[in]    debugflag   1 for debug output; 0 for no debugging
2901
 * \return  0 if OK, 1 on error
2902
 *
2903
 * <pre>
2904
 * Notes:
2905
 *      (1) This function compares two grayscale photo regions.  It can
2906
 *          do it with a single histogram from each region, or with a
2907
 *          set of spatially aligned histograms.  For both cases,
2908
 *          align the regions using the centroid of the inverse image,
2909
 *          and crop to the smallest of the two.
2910
 *      (2) The parameter %n specifies the "side" of an n x n grid
2911
 *          of subimages.  If the subimages have an aspect ratio larger
2912
 *          than 2, the grid will change, using n^2 as a maximum for
2913
 *          the number of subimages.  For example, if n == 3, but the
2914
 *          image is 600 x 200 pixels, a 3x3 grid would have subimages
2915
 *          of 200 x 67 pixels, which is more than 2:1, so we change
2916
 *          to a 4x2 grid where each subimage has 150 x 100 pixels.
2917
 *      (3) An initial filter gives %score = 0 if the ratio of widths
2918
 *          and heights (smallest / largest) does not exceed a
2919
 *          threshold %minratio.  This must be between 0.5 and 1.0.
2920
 *          If set at 1.0, both images must be exactly the same size.
2921
 *          A typical value for %minratio is 0.9.
2922
 *      (4) The lightest values in the histogram can be disregarded.
2923
 *          Set %maxgray to the lightest value to be kept.  For example,
2924
 *          to eliminate white (255), set %maxgray = 254.  %maxgray must
2925
 *          be >= 200.
2926
 *      (5) For an efficient representation of the histogram, normalize
2927
 *          using a multiplicative factor so that the number in the
2928
 *          maximum bucket is 255.  It then takes 256 bytes to store.
2929
 *      (6) When comparing the histograms of two regions:
2930
 *          ~ Use %maxgray = 254 to ignore the white pixels, the number
2931
 *            of which may be sensitive to the crop region if the pixels
2932
 *            outside that region are white.
2933
 *          ~ Use the Earth Mover distance (EMD), with the histograms
2934
 *            normalized so that the sum over bins is the same.
2935
 *            Further normalize by dividing by 255, so that the result
2936
 *            is in [0.0 ... 1.0].
2937
 *      (7) Get a similarity score S = 1.0 - k * D, where
2938
 *            k is a constant, say in the range 5-10
2939
 *            D = normalized EMD
2940
 *          and for multiple tiles, take the Min(S) to be the final score.
2941
 *          Using aligned tiles gives protection against accidental
2942
 *          similarity of the overall grayscale histograms.
2943
 *          A small number of aligned tiles works well.
2944
 *      (8) With debug on, you get a pdf that shows, for each tile,
2945
 *          the images, histograms and score.
2946
 *      (9) When to use:
2947
 *          (a) Because this function should not be used on text or
2948
 *              line graphics, which can give false positive results
2949
 *              (i.e., high scores for different images), the input
2950
 *              images should be filtered.
2951
 *          (b) To filter, first use pixDecideIfText().  If that function
2952
 *              says the image is text, do not use it.  If the function
2953
 *              says it is not text, it still may be line graphics, and
2954
 *              in that case, use:
2955
 *                 pixGetGrayHistogramTiled()
2956
 *                 grayInterHistogramStats()
2957
 *              to determine whether it is photo or line graphics.
2958
 * </pre>
2959
 */
2960
l_ok
2961
pixCompareGrayByHisto(PIX        *pix1,
2962
                      PIX        *pix2,
2963
                      BOX        *box1,
2964
                      BOX        *box2,
2965
                      l_float32   minratio,
2966
                      l_int32     maxgray,
2967
                      l_int32     factor,
2968
                      l_int32     n,
2969
                      l_float32  *pscore,
2970
                      l_int32     debugflag)
2971
0
{
2972
0
l_int32    w1, h1, w2, h2;
2973
0
l_float32  wratio, hratio;
2974
0
BOX       *box3, *box4;
2975
0
PIX       *pix3, *pix4, *pix5, *pix6, *pix7, *pix8;
2976
0
PIXA      *pixa;
2977
2978
0
    if (!pscore)
2979
0
        return ERROR_INT("&score not defined", __func__, 1);
2980
0
    *pscore = 0.0;
2981
0
    if (!pix1 || !pix2)
2982
0
        return ERROR_INT("pix1 and pix2 not both defined", __func__, 1);
2983
0
    if (minratio < 0.5 || minratio > 1.0)
2984
0
        return ERROR_INT("minratio not in [0.5 ... 1.0]", __func__, 1);
2985
0
    if (maxgray < 200)
2986
0
        return ERROR_INT("invalid maxgray; should be >= 200", __func__, 1);
2987
0
    maxgray = L_MIN(255, maxgray);
2988
0
    if (factor < 1)
2989
0
        return ERROR_INT("subsampling factor must be >= 1", __func__, 1);
2990
0
    if (n < 1 || n > 7) {
2991
0
        L_WARNING("n = %d is invalid; setting to 4\n", __func__, n);
2992
0
        n = 4;
2993
0
    }
2994
2995
0
    if (debugflag)
2996
0
        lept_mkdir("lept/comp");
2997
2998
        /* Initial filter by size */
2999
0
    if (box1)
3000
0
        boxGetGeometry(box1, NULL, NULL, &w1, &h1);
3001
0
    else
3002
0
        pixGetDimensions(pix1, &w1, &h1, NULL);
3003
0
    if (box2)
3004
0
        boxGetGeometry(box2, NULL, NULL, &w2, &h2);
3005
0
    else
3006
0
        pixGetDimensions(pix1, &w2, &h2, NULL);
3007
0
    wratio = (w1 < w2) ? (l_float32)w1 / (l_float32)w2 :
3008
0
             (l_float32)w2 / (l_float32)w1;
3009
0
    hratio = (h1 < h2) ? (l_float32)h1 / (l_float32)h2 :
3010
0
             (l_float32)h2 / (l_float32)h1;
3011
0
    if (wratio < minratio || hratio < minratio)
3012
0
        return 0;
3013
3014
        /* Initial crop, if necessary */
3015
0
    if (box1)
3016
0
        pix3 = pixClipRectangle(pix1, box1, NULL);
3017
0
    else
3018
0
        pix3 = pixClone(pix1);
3019
0
    if (box2)
3020
0
        pix4 = pixClipRectangle(pix2, box2, NULL);
3021
0
    else
3022
0
        pix4 = pixClone(pix2);
3023
3024
        /* Convert to 8 bpp, align centroids and do maximal crop */
3025
0
    pix5 = pixConvertTo8(pix3, FALSE);
3026
0
    pix6 = pixConvertTo8(pix4, FALSE);
3027
0
    pixCropAlignedToCentroid(pix5, pix6, factor, &box3, &box4);
3028
0
    pix7 = pixClipRectangle(pix5, box3, NULL);
3029
0
    pix8 = pixClipRectangle(pix6, box4, NULL);
3030
0
    pixa = (debugflag) ? pixaCreate(0) : NULL;
3031
0
    if (debugflag) {
3032
0
        PIX     *pix9, *pix10, *pix11, *pix12, *pix13;
3033
0
        PIXA    *pixa2;
3034
0
        pix9 = pixConvertTo32(pix5);
3035
0
        pix10 = pixConvertTo32(pix6);
3036
0
        pixRenderBoxArb(pix9, box3, 2, 255, 0, 0);
3037
0
        pixRenderBoxArb(pix10, box4, 2, 255, 0, 0);
3038
0
        pix11 = pixScaleToSize(pix9, 400, 0);
3039
0
        pix12 = pixScaleToSize(pix10, 400, 0);
3040
0
        pixa2 = pixaCreate(2);
3041
0
        pixaAddPix(pixa2, pix11, L_INSERT);
3042
0
        pixaAddPix(pixa2, pix12, L_INSERT);
3043
0
        pix13 = pixaDisplayTiledInRows(pixa2, 32, 1000, 1.0, 0, 50, 0);
3044
0
        pixaAddPix(pixa, pix13, L_INSERT);
3045
0
        pixDestroy(&pix9);
3046
0
        pixDestroy(&pix10);
3047
0
        pixaDestroy(&pixa2);
3048
0
    }
3049
0
    pixDestroy(&pix3);
3050
0
    pixDestroy(&pix4);
3051
0
    pixDestroy(&pix5);
3052
0
    pixDestroy(&pix6);
3053
0
    boxDestroy(&box3);
3054
0
    boxDestroy(&box4);
3055
3056
        /* Tile and compare histograms */
3057
0
    pixCompareTilesByHisto(pix7, pix8, maxgray, factor, n, pscore, pixa);
3058
0
    pixaDestroy(&pixa);
3059
0
    pixDestroy(&pix7);
3060
0
    pixDestroy(&pix8);
3061
0
    return 0;
3062
0
}
3063
3064
3065
/*!
3066
 * \brief   pixCompareTilesByHisto()
3067
 *
3068
 * \param[in]    pix1, pix2     8 bpp
3069
 * \param[in]    maxgray        max value to keep in histo; 255 to keep all
3070
 * \param[in]    factor         subsampling factor; >= 1
3071
 * \param[in]    n              see pixCompareGrayByHisto()
3072
 * \param[out]   pscore         similarity score of histograms
3073
 * \param[in]    pixadebug      [optional] use only for debug output
3074
 * \return  0 if OK, 1 on error
3075
 *
3076
 * <pre>
3077
 * Notes:
3078
 *      (1) This static function is only called from pixCompareGrayByHisto().
3079
 *          The input images have been converted to 8 bpp if necessary,
3080
 *          aligned and cropped.
3081
 *      (2) The input pixadebug is null unless debug output is requested.
3082
 *      (3) See pixCompareGrayByHisto() for details.
3083
 * </pre>
3084
 */
3085
static l_ok
3086
pixCompareTilesByHisto(PIX        *pix1,
3087
                       PIX        *pix2,
3088
                       l_int32     maxgray,
3089
                       l_int32     factor,
3090
                       l_int32     n,
3091
                       l_float32  *pscore,
3092
                       PIXA       *pixadebug)
3093
0
{
3094
0
char       buf[64];
3095
0
l_int32    w, h, i, j, nx, ny, ngr;
3096
0
l_float32  score, minscore, maxval1, maxval2, dist;
3097
0
L_BMF     *bmf;
3098
0
NUMA      *na1, *na2, *na3, *na4, *na5, *na6, *na7;
3099
0
PIX       *pix3, *pix4;
3100
0
PIXA      *pixa1, *pixa2;
3101
3102
0
    if (!pscore)
3103
0
        return ERROR_INT("&score not defined", __func__, 1);
3104
0
    *pscore = 0.0;
3105
0
    if (!pix1 || !pix2)
3106
0
        return ERROR_INT("pix1 and pix2 not both defined", __func__, 1);
3107
3108
        /* Determine grid from n */
3109
0
    pixGetDimensions(pix1, &w, &h, NULL);
3110
0
    findHistoGridDimensions(n, w, h, &nx, &ny, 1);
3111
0
    ngr = nx * ny;
3112
3113
        /* Evaluate histograms in each tile */
3114
0
    pixa1 = pixaSplitPix(pix1, nx, ny, 0, 0);
3115
0
    pixa2 = pixaSplitPix(pix2, nx, ny, 0, 0);
3116
0
    na7 = (pixadebug) ? numaCreate(ngr) : NULL;
3117
0
    bmf = (pixadebug) ? bmfCreate(NULL, 6) : NULL;
3118
0
    minscore = 1.0;
3119
0
    for (i = 0; i < ngr; i++) {
3120
0
        pix3 = pixaGetPix(pixa1, i, L_CLONE);
3121
0
        pix4 = pixaGetPix(pixa2, i, L_CLONE);
3122
3123
            /* Get histograms, set white count to 0, normalize max to 255 */
3124
0
        na1 = pixGetGrayHistogram(pix3, factor);
3125
0
        na2 = pixGetGrayHistogram(pix4, factor);
3126
0
        if (maxgray < 255) {
3127
0
            for (j = maxgray + 1; j <= 255; j++) {
3128
0
                numaSetValue(na1, j, 0);
3129
0
                numaSetValue(na2, j, 0);
3130
0
            }
3131
0
        }
3132
0
        na3 = numaWindowedMean(na1, 5);
3133
0
        na4 = numaWindowedMean(na2, 5);
3134
0
        numaGetMax(na3, &maxval1, NULL);
3135
0
        numaGetMax(na4, &maxval2, NULL);
3136
0
        na5 = numaTransform(na3, 0, 255.0 / maxval1);
3137
0
        na6 = numaTransform(na4, 0, 255.0 / maxval2);
3138
0
        if (pixadebug) {
3139
0
            gplotSimple2(na5, na6, GPLOT_PNG, "/tmp/lept/comp/plot1", "Histos");
3140
0
        }
3141
3142
            /* To compare histograms, use the normalized earthmover distance.
3143
             * Further normalize to get the EM distance as a fraction of the
3144
             * maximum distance in the histogram (255).  Finally, scale this
3145
             * up by 10.0, and subtract from 1.0 to get a similarity score. */
3146
0
        numaEarthMoverDistance(na5, na6, &dist);
3147
0
        score = L_MAX(0.0, 1.0 - 8.0 * (dist / 255.));
3148
0
        if (pixadebug) numaAddNumber(na7, score);
3149
0
        minscore = L_MIN(minscore, score);
3150
0
        if (pixadebug) {
3151
0
            PIX     *pix5, *pix6, *pix7, *pix8, *pix9, *pix10;
3152
0
            PIXA    *pixa3;
3153
0
            l_int32  w, h, wscale;
3154
0
            pixa3 = pixaCreate(3);
3155
0
            pixGetDimensions(pix3, &w, &h, NULL);
3156
0
            wscale = (w > h) ? 700 : 400;
3157
0
            pix5 = pixScaleToSize(pix3, wscale, 0);
3158
0
            pix6 = pixScaleToSize(pix4, wscale, 0);
3159
0
            pixaAddPix(pixa3, pix5, L_INSERT);
3160
0
            pixaAddPix(pixa3, pix6, L_INSERT);
3161
0
            pix7 = pixRead("/tmp/lept/comp/plot1.png");
3162
0
            pix8 = pixScaleToSize(pix7, 700, 0);
3163
0
            snprintf(buf, sizeof(buf), "%5.3f", score);
3164
0
            pix9 = pixAddTextlines(pix8, bmf, buf, 0x0000ff00, L_ADD_RIGHT);
3165
0
            pixaAddPix(pixa3, pix9, L_INSERT);
3166
0
            pix10 = pixaDisplayTiledInRows(pixa3, 32, 1000, 1.0, 0, 50, 0);
3167
0
            pixaAddPix(pixadebug, pix10, L_INSERT);
3168
0
            pixDestroy(&pix7);
3169
0
            pixDestroy(&pix8);
3170
0
            pixaDestroy(&pixa3);
3171
0
        }
3172
0
        numaDestroy(&na1);
3173
0
        numaDestroy(&na2);
3174
0
        numaDestroy(&na3);
3175
0
        numaDestroy(&na4);
3176
0
        numaDestroy(&na5);
3177
0
        numaDestroy(&na6);
3178
0
        pixDestroy(&pix3);
3179
0
        pixDestroy(&pix4);
3180
0
    }
3181
0
    *pscore = minscore;
3182
3183
0
    if (pixadebug) {
3184
0
        pixaConvertToPdf(pixadebug, 300, 1.0, L_FLATE_ENCODE, 0, NULL,
3185
0
                         "/tmp/lept/comp/comparegray.pdf");
3186
0
        numaWriteDebug("/tmp/lept/comp/tilescores.na", na7);
3187
0
    }
3188
3189
0
    bmfDestroy(&bmf);
3190
0
    numaDestroy(&na7);
3191
0
    pixaDestroy(&pixa1);
3192
0
    pixaDestroy(&pixa2);
3193
0
    return 0;
3194
0
}
3195
3196
3197
/*!
3198
 * \brief   pixCropAlignedToCentroid()
3199
 *
3200
 * \param[in]    pix1, pix2   any depth; colormap OK
3201
 * \param[in]    factor       subsampling; >= 1
3202
 * \param[out]   pbox1        crop box for pix1
3203
 * \param[out]   pbox2        crop box for pix2
3204
 * \return  0 if OK, 1 on error
3205
 *
3206
 * <pre>
3207
 * Notes:
3208
 *      (1) This finds the maximum crop boxes for two 8 bpp images when
3209
 *          their centroids of their photometric inverses are aligned.
3210
 *          Black pixels have weight 255; white pixels have weight 0.
3211
 * </pre>
3212
 */
3213
l_ok
3214
pixCropAlignedToCentroid(PIX     *pix1,
3215
                         PIX     *pix2,
3216
                         l_int32  factor,
3217
                         BOX    **pbox1,
3218
                         BOX    **pbox2)
3219
0
{
3220
0
l_float32  cx1, cy1, cx2, cy2;
3221
0
l_int32    w1, h1, w2, h2, icx1, icy1, icx2, icy2;
3222
0
l_int32    xm, xm1, xm2, xp, xp1, xp2, ym, ym1, ym2, yp, yp1, yp2;
3223
0
PIX       *pix3, *pix4;
3224
3225
0
    if (pbox1) *pbox1 = NULL;
3226
0
    if (pbox2) *pbox2 = NULL;
3227
0
    if (!pix1 || !pix2)
3228
0
        return ERROR_INT("pix1 and pix2 not both defined", __func__, 1);
3229
0
    if (factor < 1)
3230
0
        return ERROR_INT("subsampling factor must be >= 1", __func__, 1);
3231
0
    if (!pbox1 || !pbox2)
3232
0
        return ERROR_INT("&box1 and &box2 not both defined", __func__, 1);
3233
3234
0
    pix3 = pixConvertTo8(pix1, FALSE);
3235
0
    pix4 = pixConvertTo8(pix2, FALSE);
3236
0
    pixCentroid8(pix3, factor, &cx1, &cy1);
3237
0
    pixCentroid8(pix4, factor, &cx2, &cy2);
3238
0
    pixGetDimensions(pix3, &w1, &h1, NULL);
3239
0
    pixGetDimensions(pix4, &w2, &h2, NULL);
3240
0
    pixDestroy(&pix3);
3241
0
    pixDestroy(&pix4);
3242
3243
0
    icx1 = (l_int32)(cx1 + 0.5);
3244
0
    icy1 = (l_int32)(cy1 + 0.5);
3245
0
    icx2 = (l_int32)(cx2 + 0.5);
3246
0
    icy2 = (l_int32)(cy2 + 0.5);
3247
0
    xm = L_MIN(icx1, icx2);
3248
0
    xm1 = icx1 - xm;
3249
0
    xm2 = icx2 - xm;
3250
0
    xp = L_MIN(w1 - icx1, w2 - icx2);  /* one pixel beyond to the right */
3251
0
    xp1 = icx1 + xp;
3252
0
    xp2 = icx2 + xp;
3253
0
    ym = L_MIN(icy1, icy2);
3254
0
    ym1 = icy1 - ym;
3255
0
    ym2 = icy2 - ym;
3256
0
    yp = L_MIN(h1 - icy1, h2 - icy2);  /* one pixel below the bottom */
3257
0
    yp1 = icy1 + yp;
3258
0
    yp2 = icy2 + yp;
3259
0
    *pbox1 = boxCreate(xm1, ym1, xp1 - xm1, yp1 - ym1);
3260
0
    *pbox2 = boxCreate(xm2, ym2, xp2 - xm2, yp2 - ym2);
3261
0
    return 0;
3262
0
}
3263
3264
3265
/*!
3266
 * \brief   l_compressGrayHistograms()
3267
 *
3268
 * \param[in]    naa     set of 256-entry histograms
3269
 * \param[in]    w, h    size of image
3270
 * \param[out]   psize   size of byte array
3271
 * \return  0 if OK, 1 on error
3272
 *
3273
 * <pre>
3274
 * Notes:
3275
 *      (1) This first writes w and h to the byte array as 4 byte ints.
3276
 *      (2) Then it normalizes each histogram to a max value of 255,
3277
 *          and saves each value as a byte.  If there are
3278
 *          N histograms, the output bytearray has 8 + 256 * N bytes.
3279
 *      (3) Further compression of the array with zlib yields only about
3280
 *          a 25% decrease in size, so we don't bother.  If size reduction
3281
 *          were important, a lossy transform using a 1-dimensional DCT
3282
 *          would be effective, because we don't care about the fine
3283
 *          details of these histograms.
3284
 * </pre>
3285
 */
3286
l_uint8 *
3287
l_compressGrayHistograms(NUMAA   *naa,
3288
                         l_int32  w,
3289
                         l_int32  h,
3290
                         size_t  *psize)
3291
0
{
3292
0
l_uint8   *bytea;
3293
0
l_int32    i, j, n, nn, ival;
3294
0
l_float32  maxval;
3295
0
NUMA      *na1, *na2;
3296
3297
0
    if (!psize)
3298
0
        return (l_uint8 *)ERROR_PTR("&size not defined", __func__, NULL);
3299
0
    *psize = 0;
3300
0
    if (!naa)
3301
0
        return (l_uint8 *)ERROR_PTR("naa not defined", __func__, NULL);
3302
0
    n = numaaGetCount(naa);
3303
0
    for (i = 0; i < n; i++) {
3304
0
        nn = numaaGetNumaCount(naa, i);
3305
0
        if (nn != 256) {
3306
0
            L_ERROR("%d numbers in numa[%d]\n", __func__, nn, i);
3307
0
            return NULL;
3308
0
        }
3309
0
    }
3310
3311
0
    if ((bytea = (l_uint8 *)LEPT_CALLOC(8 + 256 * n, sizeof(l_uint8))) == NULL)
3312
0
        return (l_uint8 *)ERROR_PTR("bytea not made", __func__, NULL);
3313
0
    *psize = 8 + 256 * n;
3314
0
    l_setDataFourBytes(bytea, 0, w);
3315
0
    l_setDataFourBytes(bytea, 1, h);
3316
0
    for (i = 0; i < n; i++) {
3317
0
        na1 = numaaGetNuma(naa, i, L_COPY);
3318
0
        numaGetMax(na1, &maxval, NULL);
3319
0
        na2 = numaTransform(na1, 0, 255.0 / maxval);
3320
0
        for (j = 0; j < 256; j++) {
3321
0
            numaGetIValue(na2, j, &ival);
3322
0
            bytea[8 + 256 * i + j] = ival;
3323
0
        }
3324
0
        numaDestroy(&na1);
3325
0
        numaDestroy(&na2);
3326
0
    }
3327
3328
0
    return bytea;
3329
0
}
3330
3331
3332
/*!
3333
 * \brief   l_uncompressGrayHistograms()
3334
 *
3335
 * \param[in]    bytea    byte array of size 8 + 256 * N, N an integer
3336
 * \param[in]    size     size of byte array
3337
 * \param[out]   pw       width of the image that generated the histograms
3338
 * \param[out]   ph       height of the image
3339
 * \return  numaa     representing N histograms, each with 256 bins,
3340
 *                    or NULL on error.
3341
 *
3342
 * <pre>
3343
 * Notes:
3344
 *      (1) The first 8 bytes are read as two 32-bit ints.
3345
 *      (2) Then this constructs a numaa representing some number of
3346
 *          gray histograms that are normalized such that the max value
3347
 *          in each histogram is 255.  The data is stored as a byte
3348
 *          array, with 256 bytes holding the data for each histogram.
3349
 *          Each gray histogram was computed from a tile of a grayscale image.
3350
 * </pre>
3351
 */
3352
NUMAA *
3353
l_uncompressGrayHistograms(l_uint8  *bytea,
3354
                           size_t    size,
3355
                           l_int32  *pw,
3356
                           l_int32  *ph)
3357
0
{
3358
0
l_int32  i, j, n;
3359
0
NUMA    *na;
3360
0
NUMAA   *naa;
3361
3362
0
    if (pw) *pw = 0;
3363
0
    if (ph) *ph = 0;
3364
0
    if (!pw || !ph)
3365
0
        return (NUMAA *)ERROR_PTR("&w and &h not both defined", __func__, NULL);
3366
0
    if (!bytea)
3367
0
        return (NUMAA *)ERROR_PTR("bytea not defined", __func__, NULL);
3368
0
    n = (size - 8) / 256;
3369
0
    if ((size - 8) % 256 != 0)
3370
0
        return (NUMAA *)ERROR_PTR("bytea size is invalid", __func__, NULL);
3371
3372
0
    *pw = l_getDataFourBytes(bytea, 0);
3373
0
    *ph = l_getDataFourBytes(bytea, 1);
3374
0
    naa = numaaCreate(n);
3375
0
    for (i = 0; i < n; i++) {
3376
0
        na = numaCreate(256);
3377
0
        for (j = 0; j < 256; j++)
3378
0
            numaAddNumber(na, bytea[8 + 256 * i + j]);
3379
0
        numaaAddNuma(naa, na, L_INSERT);
3380
0
    }
3381
3382
0
    return naa;
3383
0
}
3384
3385
3386
/*------------------------------------------------------------------*
3387
 *             Translated images at the same resolution             *
3388
 *------------------------------------------------------------------*/
3389
/*!
3390
 * \brief   pixCompareWithTranslation()
3391
 *
3392
 * \param[in]    pix1, pix2    any depth; colormap OK
3393
 * \param[in]    thresh        threshold for converting to 1 bpp
3394
 * \param[out]   pdelx         x translation on pix2 to align with pix1
3395
 * \param[out]   pdely         y translation on pix2 to align with pix1
3396
 * \param[out]   pscore        correlation score at best alignment
3397
 * \param[in]    debugflag     1 for debug output; 0 for no debugging
3398
 * \return  0 if OK, 1 on error
3399
 *
3400
 * <pre>
3401
 * Notes:
3402
 *      (1) This does a coarse-to-fine search for best translational
3403
 *          alignment of two images, measured by a scoring function
3404
 *          that is the correlation between the fg pixels.
3405
 *      (2) The threshold is used if the images aren't 1 bpp.
3406
 *      (3) With debug on, you get a pdf that shows, as a grayscale
3407
 *          image, the score as a function of shift from the initial
3408
 *          estimate, for each of the four levels.  The shift is 0 at
3409
 *          the center of the image.
3410
 *      (4) With debug on, you also get a pdf that shows the
3411
 *          difference at the best alignment between the two images,
3412
 *          at each of the four levels.  The red and green pixels
3413
 *          show locations where one image has a fg pixel and the
3414
 *          other doesn't.  The black pixels are where both images
3415
 *          have fg pixels, and white pixels are where neither image
3416
 *          has fg pixels.
3417
 * </pre>
3418
 */
3419
l_ok
3420
pixCompareWithTranslation(PIX        *pix1,
3421
                          PIX        *pix2,
3422
                          l_int32     thresh,
3423
                          l_int32    *pdelx,
3424
                          l_int32    *pdely,
3425
                          l_float32  *pscore,
3426
                          l_int32     debugflag)
3427
0
{
3428
0
l_uint8   *subtab;
3429
0
l_int32    i, level, area1, area2, delx, dely;
3430
0
l_int32    etransx, etransy, maxshift, dbint;
3431
0
l_int32   *stab, *ctab;
3432
0
l_float32  cx1, cx2, cy1, cy2, score;
3433
0
PIX       *pixb1, *pixb2, *pixt1, *pixt2, *pixt3, *pixt4;
3434
0
PIXA      *pixa1, *pixa2, *pixadb = NULL;
3435
3436
0
    if (pdelx) *pdelx = 0;
3437
0
    if (pdely) *pdely = 0;
3438
0
    if (pscore) *pscore = 0.0;
3439
0
    if (!pdelx || !pdely)
3440
0
        return ERROR_INT("&delx and &dely not defined", __func__, 1);
3441
0
    if (!pscore)
3442
0
        return ERROR_INT("&score not defined", __func__, 1);
3443
0
    if (!pix1)
3444
0
        return ERROR_INT("pix1 not defined", __func__, 1);
3445
0
    if (!pix2)
3446
0
        return ERROR_INT("pix2 not defined", __func__, 1);
3447
3448
        /* Make tables */
3449
0
    subtab = makeSubsampleTab2x();
3450
0
    stab = makePixelSumTab8();
3451
0
    ctab = makePixelCentroidTab8();
3452
3453
        /* Binarize each image */
3454
0
    pixb1 = pixConvertTo1(pix1, thresh);
3455
0
    pixb2 = pixConvertTo1(pix2, thresh);
3456
3457
        /* Make a cascade of 2x reduced images for each, thresholding
3458
         * with level 2 (neutral), down to 8x reduction */
3459
0
    pixa1 = pixaCreate(4);
3460
0
    pixa2 = pixaCreate(4);
3461
0
    if (debugflag)
3462
0
        pixadb = pixaCreate(4);
3463
0
    pixaAddPix(pixa1, pixb1, L_INSERT);
3464
0
    pixaAddPix(pixa2, pixb2, L_INSERT);
3465
0
    for (i = 0; i < 3; i++) {
3466
0
        pixt1 = pixReduceRankBinary2(pixb1, 2, subtab);
3467
0
        pixt2 = pixReduceRankBinary2(pixb2, 2, subtab);
3468
0
        pixaAddPix(pixa1, pixt1, L_INSERT);
3469
0
        pixaAddPix(pixa2, pixt2, L_INSERT);
3470
0
        pixb1 = pixt1;
3471
0
        pixb2 = pixt2;
3472
0
    }
3473
3474
        /* At the lowest level, use the centroids with a maxshift of 6
3475
         * to search for the best alignment.  Then at higher levels,
3476
         * use the result from the level below as the initial approximation
3477
         * for the alignment, and search with a maxshift of 2. */
3478
0
    for (level = 3; level >= 0; level--) {
3479
0
        pixt1 = pixaGetPix(pixa1, level, L_CLONE);
3480
0
        pixt2 = pixaGetPix(pixa2, level, L_CLONE);
3481
0
        pixCountPixels(pixt1, &area1, stab);
3482
0
        pixCountPixels(pixt2, &area2, stab);
3483
0
        if (level == 3) {
3484
0
            pixCentroid(pixt1, ctab, stab, &cx1, &cy1);
3485
0
            pixCentroid(pixt2, ctab, stab, &cx2, &cy2);
3486
0
            etransx = lept_roundftoi(cx1 - cx2);
3487
0
            etransy = lept_roundftoi(cy1 - cy2);
3488
0
            maxshift = 6;
3489
0
        } else {
3490
0
            etransx = 2 * delx;
3491
0
            etransy = 2 * dely;
3492
0
            maxshift = 2;
3493
0
        }
3494
0
        dbint = (debugflag) ? level + 1 : 0;
3495
0
        pixBestCorrelation(pixt1, pixt2, area1, area2, etransx, etransy,
3496
0
                           maxshift, stab, &delx, &dely, &score, dbint);
3497
0
        if (debugflag) {
3498
0
            lept_stderr("Level %d: delx = %d, dely = %d, score = %7.4f\n",
3499
0
                        level, delx, dely, score);
3500
0
            pixRasteropIP(pixt2, delx, dely, L_BRING_IN_WHITE);
3501
0
            pixt3 = pixDisplayDiffBinary(pixt1, pixt2);
3502
0
            pixt4 = pixExpandReplicate(pixt3, 8 / (1 << (3 - level)));
3503
0
            pixaAddPix(pixadb, pixt4, L_INSERT);
3504
0
            pixDestroy(&pixt3);
3505
0
        }
3506
0
        pixDestroy(&pixt1);
3507
0
        pixDestroy(&pixt2);
3508
0
    }
3509
3510
0
    if (debugflag) {
3511
0
        pixaConvertToPdf(pixadb, 300, 1.0, L_FLATE_ENCODE, 0, NULL,
3512
0
                         "/tmp/lept/comp/compare.pdf");
3513
0
        convertFilesToPdf("/tmp/lept/comp", "correl_", 30, 1.0, L_FLATE_ENCODE,
3514
0
                          0, "Correlation scores at levels 1 through 5",
3515
0
                          "/tmp/lept/comp/correl.pdf");
3516
0
        pixaDestroy(&pixadb);
3517
0
    }
3518
3519
0
    *pdelx = delx;
3520
0
    *pdely = dely;
3521
0
    *pscore = score;
3522
0
    pixaDestroy(&pixa1);
3523
0
    pixaDestroy(&pixa2);
3524
0
    LEPT_FREE(subtab);
3525
0
    LEPT_FREE(stab);
3526
0
    LEPT_FREE(ctab);
3527
0
    return 0;
3528
0
}
3529
3530
3531
/*!
3532
 * \brief   pixBestCorrelation()
3533
 *
3534
 * \param[in]    pix1      1 bpp
3535
 * \param[in]    pix2      1 bpp
3536
 * \param[in]    area1     number of on pixels in pix1
3537
 * \param[in]    area2     number of on pixels in pix2
3538
 * \param[in]    etransx   estimated x translation of pix2 to align with pix1
3539
 * \param[in]    etransy   estimated y translation of pix2 to align with pix1
3540
 * \param[in]    maxshift  max x and y shift of pix2, around the estimated
3541
 *                         alignment location, relative to pix1
3542
 * \param[in]    tab8      [optional] sum tab for ON pixels in byte; can be NULL
3543
 * \param[out]   pdelx     [optional] best x shift of pix2 relative to pix1
3544
 * \param[out]   pdely     [optional] best y shift of pix2 relative to pix1
3545
 * \param[out]   pscore    [optional] maximum score found; can be NULL
3546
 * \param[in]    debugflag   <= 0 to skip; positive to generate output.
3547
 *                           The integer is used to label the debug image.
3548
 * \return  0 if OK, 1 on error
3549
 *
3550
 * <pre>
3551
 * Notes:
3552
 *      (1) This maximizes the correlation score between two 1 bpp images,
3553
 *          by starting with an estimate of the alignment
3554
 *          (%etransx, %etransy) and computing the correlation around this.
3555
 *          It optionally returns the shift (%delx, %dely) that maximizes
3556
 *          the correlation score when pix2 is shifted by this amount
3557
 *          relative to pix1.
3558
 *      (2) Get the centroids of pix1 and pix2, using pixCentroid(),
3559
 *          to compute (%etransx, %etransy).  Get the areas using
3560
 *          pixCountPixels().
3561
 *      (3) The centroid of pix2 is shifted with respect to the centroid
3562
 *          of pix1 by all values between -maxshiftx and maxshiftx,
3563
 *          and likewise for the y shifts.  Therefore, the number of
3564
 *          correlations computed is:
3565
 *               (2 * maxshiftx + 1) * (2 * maxshifty + 1)
3566
 *          Consequently, if pix1 and pix2 are large, you should do this
3567
 *          in a coarse-to-fine sequence.  See the use of this function
3568
 *          in pixCompareWithTranslation().
3569
 * </pre>
3570
 */
3571
l_ok
3572
pixBestCorrelation(PIX        *pix1,
3573
                   PIX        *pix2,
3574
                   l_int32     area1,
3575
                   l_int32     area2,
3576
                   l_int32     etransx,
3577
                   l_int32     etransy,
3578
                   l_int32     maxshift,
3579
                   l_int32    *tab8,
3580
                   l_int32    *pdelx,
3581
                   l_int32    *pdely,
3582
                   l_float32  *pscore,
3583
                   l_int32     debugflag)
3584
0
{
3585
0
l_int32    shiftx, shifty, delx, dely;
3586
0
l_int32   *tab;
3587
0
l_float32  maxscore, score;
3588
0
FPIX      *fpix = NULL;
3589
0
PIX       *pix3, *pix4;
3590
3591
0
    if (pdelx) *pdelx = 0;
3592
0
    if (pdely) *pdely = 0;
3593
0
    if (pscore) *pscore = 0.0;
3594
0
    if (!pix1 || pixGetDepth(pix1) != 1)
3595
0
        return ERROR_INT("pix1 not defined or not 1 bpp", __func__, 1);
3596
0
    if (!pix2 || pixGetDepth(pix2) != 1)
3597
0
        return ERROR_INT("pix2 not defined or not 1 bpp", __func__, 1);
3598
0
    if (!area1 || !area2)
3599
0
        return ERROR_INT("areas must be > 0", __func__, 1);
3600
3601
0
    if (debugflag > 0)
3602
0
        fpix = fpixCreate(2 * maxshift + 1, 2 * maxshift + 1);
3603
3604
0
    if (!tab8)
3605
0
        tab = makePixelSumTab8();
3606
0
    else
3607
0
        tab = tab8;
3608
3609
        /* Search over a set of {shiftx, shifty} for the max */
3610
0
    maxscore = 0;
3611
0
    delx = etransx;
3612
0
    dely = etransy;
3613
0
    for (shifty = -maxshift; shifty <= maxshift; shifty++) {
3614
0
        for (shiftx = -maxshift; shiftx <= maxshift; shiftx++) {
3615
0
            pixCorrelationScoreShifted(pix1, pix2, area1, area2,
3616
0
                                       etransx + shiftx,
3617
0
                                       etransy + shifty, tab, &score);
3618
0
            if (debugflag > 0) {
3619
0
                fpixSetPixel(fpix, maxshift + shiftx, maxshift + shifty,
3620
0
                             1000.0 * score);
3621
/*                lept_stderr("(sx, sy) = (%d, %d): score = %6.4f\n",
3622
                              shiftx, shifty, score); */
3623
0
            }
3624
0
            if (score > maxscore) {
3625
0
                maxscore = score;
3626
0
                delx = etransx + shiftx;
3627
0
                dely = etransy + shifty;
3628
0
            }
3629
0
        }
3630
0
    }
3631
3632
0
    if (debugflag > 0) {
3633
0
        char  buf[128];
3634
0
        lept_mkdir("lept/comp");
3635
0
        pix3 = fpixDisplayMaxDynamicRange(fpix);
3636
0
        pix4 = pixExpandReplicate(pix3, 20);
3637
0
        snprintf(buf, sizeof(buf), "/tmp/lept/comp/correl_%d.png",
3638
0
                 debugflag);
3639
0
        pixWrite(buf, pix4, IFF_PNG);
3640
0
        pixDestroy(&pix3);
3641
0
        pixDestroy(&pix4);
3642
0
        fpixDestroy(&fpix);
3643
0
    }
3644
3645
0
    if (pdelx) *pdelx = delx;
3646
0
    if (pdely) *pdely = dely;
3647
0
    if (pscore) *pscore = maxscore;
3648
0
    if (!tab8) LEPT_FREE(tab);
3649
0
    return 0;
3650
0
}