/src/leptonica/src/correlscore.c
Line | Count | Source |
1 | | /*====================================================================* |
2 | | - Copyright (C) 2001 Leptonica. All rights reserved. |
3 | | - |
4 | | - Redistribution and use in source and binary forms, with or without |
5 | | - modification, are permitted provided that the following conditions |
6 | | - are met: |
7 | | - 1. Redistributions of source code must retain the above copyright |
8 | | - notice, this list of conditions and the following disclaimer. |
9 | | - 2. Redistributions in binary form must reproduce the above |
10 | | - copyright notice, this list of conditions and the following |
11 | | - disclaimer in the documentation and/or other materials |
12 | | - provided with the distribution. |
13 | | - |
14 | | - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
15 | | - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
16 | | - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
17 | | - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY |
18 | | - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
19 | | - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
20 | | - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
21 | | - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
22 | | - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
23 | | - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
24 | | - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
25 | | *====================================================================*/ |
26 | | |
27 | | /* |
28 | | * correlscore.c |
29 | | * |
30 | | * These are functions for computing correlation between |
31 | | * pairs of 1 bpp images. |
32 | | * |
33 | | * Optimized 2 pix correlators (for jbig2 clustering) |
34 | | * l_int32 pixCorrelationScore() |
35 | | * l_int32 pixCorrelationScoreThresholded() |
36 | | * |
37 | | * Simple 2 pix correlators |
38 | | * l_int32 pixCorrelationScoreSimple() |
39 | | * l_int32 pixCorrelationScoreShifted() |
40 | | * |
41 | | * There are other, more application-oriented functions, that |
42 | | * compute the correlation between two binary images, taking into |
43 | | * account small translational shifts, between two binary images. |
44 | | * These are: |
45 | | * compare.c: pixBestCorrelation() |
46 | | * Uses coarse-to-fine translations of full image |
47 | | * recogident.c: pixCorrelationBestShift() |
48 | | * Uses small shifts between c.c. centroids. |
49 | | */ |
50 | | |
51 | | #ifdef HAVE_CONFIG_H |
52 | | #include <config_auto.h> |
53 | | #endif /* HAVE_CONFIG_H */ |
54 | | |
55 | | #include <math.h> |
56 | | #include "allheaders.h" |
57 | | |
58 | | /* -------------------------------------------------------------------- * |
59 | | * Optimized 2 pix correlators (for jbig2 clustering) * |
60 | | * -------------------------------------------------------------------- */ |
61 | | /*! |
62 | | * \brief pixCorrelationScore() |
63 | | * |
64 | | * \param[in] pix1 test pix, 1 bpp |
65 | | * \param[in] pix2 exemplar pix, 1 bpp |
66 | | * \param[in] area1 number of on pixels in pix1 |
67 | | * \param[in] area2 number of on pixels in pix2 |
68 | | * \param[in] delx x comp of centroid difference |
69 | | * \param[in] dely y comp of centroid difference |
70 | | * \param[in] maxdiffw max width difference of pix1 and pix2 |
71 | | * \param[in] maxdiffh max height difference of pix1 and pix2 |
72 | | * \param[in] tab sum tab for byte |
73 | | * \param[out] pscore correlation score |
74 | | * \return 0 if OK, 1 on error |
75 | | * |
76 | | * <pre> |
77 | | * Notes: |
78 | | * We check first that the two pix are roughly the same size. |
79 | | * For jbclass (jbig2) applications at roughly 300 ppi, maxdiffw and |
80 | | * maxdiffh should be at least 2. |
81 | | * |
82 | | * Only if they meet that criterion do we compare the bitmaps. |
83 | | * The centroid difference is used to align the two images to the |
84 | | * nearest integer for the correlation. |
85 | | * |
86 | | * The correlation score is the ratio of the square of the number of |
87 | | * pixels in the AND of the two bitmaps to the product of the number |
88 | | * of ON pixels in each. Denote the number of ON pixels in pix1 |
89 | | * by |1|, the number in pix2 by |2|, and the number in the AND |
90 | | * of pix1 and pix2 by |1 & 2|. The correlation score is then |
91 | | * (|1 & 2|)**2 / (|1|*|2|). |
92 | | * |
93 | | * This score is compared with an input threshold, which can |
94 | | * be modified depending on the weight of the template. |
95 | | * The modified threshold is |
96 | | * thresh + (1.0 - thresh) * weight * R |
97 | | * where |
98 | | * weight is a fixed input factor between 0.0 and 1.0 |
99 | | * R = |2| / area(2) |
100 | | * and area(2) is the total number of pixels in 2 (i.e., width x height). |
101 | | * |
102 | | * To understand why a weight factor is useful, consider what happens |
103 | | * with thick, sans-serif characters that look similar and have a value |
104 | | * of R near 1. Different characters can have a high correlation value, |
105 | | * and the classifier will make incorrect substitutions. The weight |
106 | | * factor raises the threshold for these characters. |
107 | | * |
108 | | * Yet another approach to reduce such substitutions is to run the classifier |
109 | | * in a non-greedy way, matching to the template with the highest |
110 | | * score, not the first template with a score satisfying the matching |
111 | | * constraint. However, this is not particularly effective. |
112 | | * |
113 | | * The implementation here gives the same result as in |
114 | | * pixCorrelationScoreSimple(), where a temporary Pix is made to hold |
115 | | * the AND and implementation uses rasterop: |
116 | | * pixt = pixCreateTemplate(pix1); |
117 | | * pixRasterop(pixt, idelx, idely, wt, ht, PIX_SRC, pix2, 0, 0); |
118 | | * pixRasterop(pixt, 0, 0, wi, hi, PIX_SRC & PIX_DST, pix1, 0, 0); |
119 | | * pixCountPixels(pixt, &count, tab); |
120 | | * pixDestroy(&pixt); |
121 | | * However, here it is done in a streaming fashion, counting as it goes, |
122 | | * and touching memory exactly once, giving a 3-4x speedup over the |
123 | | * simple implementation. This very fast correlation matcher was |
124 | | * contributed by William Rucklidge. |
125 | | * </pre> |
126 | | */ |
127 | | l_ok |
128 | | pixCorrelationScore(PIX *pix1, |
129 | | PIX *pix2, |
130 | | l_int32 area1, |
131 | | l_int32 area2, |
132 | | l_float32 delx, /* x(1) - x(3) */ |
133 | | l_float32 dely, /* y(1) - y(3) */ |
134 | | l_int32 maxdiffw, |
135 | | l_int32 maxdiffh, |
136 | | l_int32 *tab, |
137 | | l_float32 *pscore) |
138 | 0 | { |
139 | 0 | l_int32 wi, hi, wt, ht, delw, delh, idelx, idely, count; |
140 | 0 | l_int32 wpl1, wpl2, lorow, hirow, locol, hicol; |
141 | 0 | l_int32 x, y, pix1lskip, pix2lskip, rowwords1, rowwords2; |
142 | 0 | l_uint32 word1, word2, andw; |
143 | 0 | l_uint32 *row1, *row2; |
144 | |
|
145 | 0 | if (!pscore) |
146 | 0 | return ERROR_INT("&score not defined", __func__, 1); |
147 | 0 | *pscore = 0.0; |
148 | 0 | if (!pix1 || pixGetDepth(pix1) != 1) |
149 | 0 | return ERROR_INT("pix1 undefined or not 1 bpp", __func__, 1); |
150 | 0 | if (!pix2 || pixGetDepth(pix2) != 1) |
151 | 0 | return ERROR_INT("pix2 undefined or not 1 bpp", __func__, 1); |
152 | 0 | if (!tab) |
153 | 0 | return ERROR_INT("tab not defined", __func__, 1); |
154 | 0 | if (area1 <= 0 || area2 <= 0) |
155 | 0 | return ERROR_INT("areas must be > 0", __func__, 1); |
156 | | |
157 | | /* Eliminate based on size difference */ |
158 | 0 | pixGetDimensions(pix1, &wi, &hi, NULL); |
159 | 0 | pixGetDimensions(pix2, &wt, &ht, NULL); |
160 | 0 | delw = L_ABS(wi - wt); |
161 | 0 | if (delw > maxdiffw) |
162 | 0 | return 0; |
163 | 0 | delh = L_ABS(hi - ht); |
164 | 0 | if (delh > maxdiffh) |
165 | 0 | return 0; |
166 | | |
167 | | /* Round difference to nearest integer */ |
168 | 0 | if (delx >= 0) |
169 | 0 | idelx = (l_int32)(delx + 0.5); |
170 | 0 | else |
171 | 0 | idelx = (l_int32)(delx - 0.5); |
172 | 0 | if (dely >= 0) |
173 | 0 | idely = (l_int32)(dely + 0.5); |
174 | 0 | else |
175 | 0 | idely = (l_int32)(dely - 0.5); |
176 | |
|
177 | 0 | count = 0; |
178 | 0 | wpl1 = pixGetWpl(pix1); |
179 | 0 | wpl2 = pixGetWpl(pix2); |
180 | 0 | rowwords2 = wpl2; |
181 | | |
182 | | /* What rows of pix1 need to be considered? Only those underlying the |
183 | | * shifted pix2. */ |
184 | 0 | lorow = L_MAX(idely, 0); |
185 | 0 | hirow = L_MIN(ht + idely, hi); |
186 | | |
187 | | /* Get the pointer to the first row of each image that will be |
188 | | * considered. */ |
189 | 0 | row1 = pixGetData(pix1) + wpl1 * lorow; |
190 | 0 | row2 = pixGetData(pix2) + wpl2 * (lorow - idely); |
191 | | |
192 | | /* Similarly, figure out which columns of pix1 will be considered. */ |
193 | 0 | locol = L_MAX(idelx, 0); |
194 | 0 | hicol = L_MIN(wt + idelx, wi); |
195 | |
|
196 | 0 | if (idelx >= 32) { |
197 | | /* pix2 is shifted far enough to the right that pix1's first |
198 | | * word(s) won't contribute to the count. Increment its |
199 | | * pointer to point to the first word that will contribute, |
200 | | * and adjust other values accordingly. */ |
201 | 0 | pix1lskip = idelx >> 5; /* # of words to skip on left */ |
202 | 0 | row1 += pix1lskip; |
203 | 0 | locol -= pix1lskip << 5; |
204 | 0 | hicol -= pix1lskip << 5; |
205 | 0 | idelx &= 31; |
206 | 0 | } else if (idelx <= -32) { |
207 | | /* pix2 is shifted far enough to the left that its first word(s) |
208 | | * won't contribute to the count. Increment its pointer |
209 | | * to point to the first word that will contribute, |
210 | | * and adjust other values accordingly. */ |
211 | 0 | pix2lskip = -((idelx + 31) >> 5); /* # of words to skip on left */ |
212 | 0 | row2 += pix2lskip; |
213 | 0 | rowwords2 -= pix2lskip; |
214 | 0 | idelx += pix2lskip << 5; |
215 | 0 | } |
216 | |
|
217 | 0 | if ((locol >= hicol) || (lorow >= hirow)) { /* there is no overlap */ |
218 | 0 | count = 0; |
219 | 0 | } else { |
220 | | /* How many words of each row of pix1 need to be considered? */ |
221 | 0 | rowwords1 = (hicol + 31) >> 5; |
222 | |
|
223 | 0 | if (idelx == 0) { |
224 | | /* There's no lateral offset; simple case. */ |
225 | 0 | for (y = lorow; y < hirow; y++, row1 += wpl1, row2 += wpl2) { |
226 | 0 | for (x = 0; x < rowwords1; x++) { |
227 | 0 | andw = row1[x] & row2[x]; |
228 | 0 | count += tab[andw & 0xff] + |
229 | 0 | tab[(andw >> 8) & 0xff] + |
230 | 0 | tab[(andw >> 16) & 0xff] + |
231 | 0 | tab[andw >> 24]; |
232 | 0 | } |
233 | 0 | } |
234 | 0 | } else if (idelx > 0) { |
235 | | /* pix2 is shifted to the right. word 0 of pix1 is touched by |
236 | | * word 0 of pix2; word 1 of pix1 is touched by word 0 and word |
237 | | * 1 of pix2, and so on up to the last word of pix1 (word N), |
238 | | * which is touched by words N-1 and N of pix1... if there is a |
239 | | * word N. Handle the two cases (pix2 has N-1 words and pix2 |
240 | | * has at least N words) separately. |
241 | | * |
242 | | * Note: we know that pix2 has at least N-1 words (i.e., |
243 | | * rowwords2 >= rowwords1 - 1) by the following logic. |
244 | | * We can pretend that idelx <= 31 because the >= 32 logic |
245 | | * above adjusted everything appropriately. Then |
246 | | * hicol <= wt + idelx <= wt + 31, so |
247 | | * hicol + 31 <= wt + 62 |
248 | | * rowwords1 = (hicol + 31) >> 5 <= (wt + 62) >> 5 |
249 | | * rowwords2 == (wt + 31) >> 5, so |
250 | | * rowwords1 <= rowwords2 + 1 */ |
251 | 0 | if (rowwords2 < rowwords1) { |
252 | 0 | for (y = lorow; y < hirow; y++, row1 += wpl1, row2 += wpl2) { |
253 | | /* Do the first iteration so the loop can be |
254 | | * branch-free. */ |
255 | 0 | word1 = row1[0]; |
256 | 0 | word2 = row2[0] >> idelx; |
257 | 0 | andw = word1 & word2; |
258 | 0 | count += tab[andw & 0xff] + |
259 | 0 | tab[(andw >> 8) & 0xff] + |
260 | 0 | tab[(andw >> 16) & 0xff] + |
261 | 0 | tab[andw >> 24]; |
262 | |
|
263 | 0 | for (x = 1; x < rowwords2; x++) { |
264 | 0 | word1 = row1[x]; |
265 | 0 | word2 = (row2[x] >> idelx) | |
266 | 0 | (row2[x - 1] << (32 - idelx)); |
267 | 0 | andw = word1 & word2; |
268 | 0 | count += tab[andw & 0xff] + |
269 | 0 | tab[(andw >> 8) & 0xff] + |
270 | 0 | tab[(andw >> 16) & 0xff] + |
271 | 0 | tab[andw >> 24]; |
272 | 0 | } |
273 | | |
274 | | /* Now the last iteration - we know that this is safe |
275 | | * (i.e. rowwords1 >= 2) because rowwords1 > rowwords2 |
276 | | * > 0 (if it was 0, we'd have taken the "count = 0" |
277 | | * fast-path out of here). */ |
278 | 0 | word1 = row1[x]; |
279 | 0 | word2 = row2[x - 1] << (32 - idelx); |
280 | 0 | andw = word1 & word2; |
281 | 0 | count += tab[andw & 0xff] + |
282 | 0 | tab[(andw >> 8) & 0xff] + |
283 | 0 | tab[(andw >> 16) & 0xff] + |
284 | 0 | tab[andw >> 24]; |
285 | 0 | } |
286 | 0 | } else { |
287 | 0 | for (y = lorow; y < hirow; y++, row1 += wpl1, row2 += wpl2) { |
288 | | /* Do the first iteration so the loop can be |
289 | | * branch-free. This section is the same as above |
290 | | * except for the different limit on the loop, since |
291 | | * the last iteration is the same as all the other |
292 | | * iterations (beyond the first). */ |
293 | 0 | word1 = row1[0]; |
294 | 0 | word2 = row2[0] >> idelx; |
295 | 0 | andw = word1 & word2; |
296 | 0 | count += tab[andw & 0xff] + |
297 | 0 | tab[(andw >> 8) & 0xff] + |
298 | 0 | tab[(andw >> 16) & 0xff] + |
299 | 0 | tab[andw >> 24]; |
300 | |
|
301 | 0 | for (x = 1; x < rowwords1; x++) { |
302 | 0 | word1 = row1[x]; |
303 | 0 | word2 = (row2[x] >> idelx) | |
304 | 0 | (row2[x - 1] << (32 - idelx)); |
305 | 0 | andw = word1 & word2; |
306 | 0 | count += tab[andw & 0xff] + |
307 | 0 | tab[(andw >> 8) & 0xff] + |
308 | 0 | tab[(andw >> 16) & 0xff] + |
309 | 0 | tab[andw >> 24]; |
310 | 0 | } |
311 | 0 | } |
312 | 0 | } |
313 | 0 | } else { |
314 | | /* pix2 is shifted to the left. word 0 of pix1 is touched by |
315 | | * word 0 and word 1 of pix2, and so on up to the last word of |
316 | | * pix1 (word N), which is touched by words N and N+1 of |
317 | | * pix2... if there is a word N+1. Handle the two cases (pix2 |
318 | | * has N words and pix2 has at least N+1 words) separately. */ |
319 | 0 | if (rowwords1 < rowwords2) { |
320 | | /* pix2 has at least N+1 words, so every iteration through |
321 | | * the loop can be the same. */ |
322 | 0 | for (y = lorow; y < hirow; y++, row1 += wpl1, row2 += wpl2) { |
323 | 0 | for (x = 0; x < rowwords1; x++) { |
324 | 0 | word1 = row1[x]; |
325 | 0 | word2 = row2[x] << -idelx; |
326 | 0 | word2 |= row2[x + 1] >> (32 + idelx); |
327 | 0 | andw = word1 & word2; |
328 | 0 | count += tab[andw & 0xff] + |
329 | 0 | tab[(andw >> 8) & 0xff] + |
330 | 0 | tab[(andw >> 16) & 0xff] + |
331 | 0 | tab[andw >> 24]; |
332 | 0 | } |
333 | 0 | } |
334 | 0 | } else { |
335 | | /* pix2 has only N words, so the last iteration is broken |
336 | | * out. */ |
337 | 0 | for (y = lorow; y < hirow; y++, row1 += wpl1, row2 += wpl2) { |
338 | 0 | for (x = 0; x < rowwords1 - 1; x++) { |
339 | 0 | word1 = row1[x]; |
340 | 0 | word2 = row2[x] << -idelx; |
341 | 0 | word2 |= row2[x + 1] >> (32 + idelx); |
342 | 0 | andw = word1 & word2; |
343 | 0 | count += tab[andw & 0xff] + |
344 | 0 | tab[(andw >> 8) & 0xff] + |
345 | 0 | tab[(andw >> 16) & 0xff] + |
346 | 0 | tab[andw >> 24]; |
347 | 0 | } |
348 | |
|
349 | 0 | word1 = row1[x]; |
350 | 0 | word2 = row2[x] << -idelx; |
351 | 0 | andw = word1 & word2; |
352 | 0 | count += tab[andw & 0xff] + |
353 | 0 | tab[(andw >> 8) & 0xff] + |
354 | 0 | tab[(andw >> 16) & 0xff] + |
355 | 0 | tab[andw >> 24]; |
356 | 0 | } |
357 | 0 | } |
358 | 0 | } |
359 | 0 | } |
360 | |
|
361 | 0 | *pscore = (l_float32)count * (l_float32)count / |
362 | 0 | ((l_float32)area1 * (l_float32)area2); |
363 | | /* lept_stderr("score = %5.3f, count = %d, area1 = %d, area2 = %d\n", |
364 | | *pscore, count, area1, area2); */ |
365 | 0 | return 0; |
366 | 0 | } |
367 | | |
368 | | |
369 | | /*! |
370 | | * \brief pixCorrelationScoreThresholded() |
371 | | * |
372 | | * \param[in] pix1 test pix, 1 bpp |
373 | | * \param[in] pix2 exemplar pix, 1 bpp |
374 | | * \param[in] area1 number of on pixels in pix1 |
375 | | * \param[in] area2 number of on pixels in pix2 |
376 | | * \param[in] delx x comp of centroid difference |
377 | | * \param[in] dely y comp of centroid difference |
378 | | * \param[in] maxdiffw max width difference of pix1 and pix2 |
379 | | * \param[in] maxdiffh max height difference of pix1 and pix2 |
380 | | * \param[in] tab sum tab for byte |
381 | | * \param[in] downcount count of 1 pixels below each row of pix1 |
382 | | * \param[in] score_threshold |
383 | | * \return whether the correlation score is >= score_threshold |
384 | | * |
385 | | * |
386 | | * <pre> |
387 | | * Notes: |
388 | | * We check first that the two pix are roughly the same size. |
389 | | * Only if they meet that criterion do we compare the bitmaps. |
390 | | * The centroid difference is used to align the two images to the |
391 | | * nearest integer for the correlation. |
392 | | * |
393 | | * The correlation score is the ratio of the square of the number of |
394 | | * pixels in the AND of the two bitmaps to the product of the number |
395 | | * of ON pixels in each. Denote the number of ON pixels in pix1 |
396 | | * by |1|, the number in pix2 by |2|, and the number in the AND |
397 | | * of pix1 and pix2 by |1 & 2|. The correlation score is then |
398 | | * (|1 & 2|)**2 / (|1|*|2|). |
399 | | * |
400 | | * This score is compared with an input threshold, which can |
401 | | * be modified depending on the weight of the template. |
402 | | * The modified threshold is |
403 | | * thresh + (1.0 - thresh) * weight * R |
404 | | * where |
405 | | * weight is a fixed input factor between 0.0 and 1.0 |
406 | | * R = |2| / area(2) |
407 | | * and area(2) is the total number of pixels in 2 (i.e., width x height). |
408 | | * |
409 | | * To understand why a weight factor is useful, consider what happens |
410 | | * with thick, sans-serif characters that look similar and have a value |
411 | | * of R near 1. Different characters can have a high correlation value, |
412 | | * and the classifier will make incorrect substitutions. The weight |
413 | | * factor raises the threshold for these characters. |
414 | | * |
415 | | * Yet another approach to reduce such substitutions is to run the classifier |
416 | | * in a non-greedy way, matching to the template with the highest |
417 | | * score, not the first template with a score satisfying the matching |
418 | | * constraint. However, this is not particularly effective. |
419 | | * |
420 | | * This very fast correlation matcher was contributed by William Rucklidge. |
421 | | * </pre> |
422 | | */ |
423 | | l_int32 |
424 | | pixCorrelationScoreThresholded(PIX *pix1, |
425 | | PIX *pix2, |
426 | | l_int32 area1, |
427 | | l_int32 area2, |
428 | | l_float32 delx, /* x(1) - x(3) */ |
429 | | l_float32 dely, /* y(1) - y(3) */ |
430 | | l_int32 maxdiffw, |
431 | | l_int32 maxdiffh, |
432 | | l_int32 *tab, |
433 | | l_int32 *downcount, |
434 | | l_float32 score_threshold) |
435 | 0 | { |
436 | 0 | l_int32 wi, hi, wt, ht, delw, delh, idelx, idely, count; |
437 | 0 | l_int32 wpl1, wpl2, lorow, hirow, locol, hicol, untouchable; |
438 | 0 | l_int32 x, y, pix1lskip, pix2lskip, rowwords1, rowwords2; |
439 | 0 | l_uint32 word1, word2, andw; |
440 | 0 | l_uint32 *row1, *row2; |
441 | 0 | l_float32 score; |
442 | 0 | l_int32 threshold; |
443 | |
|
444 | 0 | if (!pix1 || pixGetDepth(pix1) != 1) |
445 | 0 | return ERROR_INT("pix1 undefined or not 1 bpp", __func__, 0); |
446 | 0 | if (!pix2 || pixGetDepth(pix2) != 1) |
447 | 0 | return ERROR_INT("pix2 undefined or not 1 bpp", __func__, 0); |
448 | 0 | if (!tab) |
449 | 0 | return ERROR_INT("tab not defined", __func__, 0); |
450 | 0 | if (area1 <= 0 || area2 <= 0) |
451 | 0 | return ERROR_INT("areas must be > 0", __func__, 0); |
452 | | |
453 | | /* Eliminate based on size difference */ |
454 | 0 | pixGetDimensions(pix1, &wi, &hi, NULL); |
455 | 0 | pixGetDimensions(pix2, &wt, &ht, NULL); |
456 | 0 | delw = L_ABS(wi - wt); |
457 | 0 | if (delw > maxdiffw) |
458 | 0 | return FALSE; |
459 | 0 | delh = L_ABS(hi - ht); |
460 | 0 | if (delh > maxdiffh) |
461 | 0 | return FALSE; |
462 | | |
463 | | /* Round difference to nearest integer */ |
464 | 0 | if (delx >= 0) |
465 | 0 | idelx = (l_int32)(delx + 0.5); |
466 | 0 | else |
467 | 0 | idelx = (l_int32)(delx - 0.5); |
468 | 0 | if (dely >= 0) |
469 | 0 | idely = (l_int32)(dely + 0.5); |
470 | 0 | else |
471 | 0 | idely = (l_int32)(dely - 0.5); |
472 | | |
473 | | /* Compute the correlation count that is needed so that |
474 | | * count * count / (area1 * area2) >= score_threshold */ |
475 | 0 | threshold = (l_int32)ceil(sqrt((l_float64)score_threshold * area1 * area2)); |
476 | |
|
477 | 0 | count = 0; |
478 | 0 | wpl1 = pixGetWpl(pix1); |
479 | 0 | wpl2 = pixGetWpl(pix2); |
480 | 0 | rowwords2 = wpl2; |
481 | | |
482 | | /* What rows of pix1 need to be considered? Only those underlying the |
483 | | * shifted pix2. */ |
484 | 0 | lorow = L_MAX(idely, 0); |
485 | 0 | hirow = L_MIN(ht + idely, hi); |
486 | | |
487 | | /* Get the pointer to the first row of each image that will be |
488 | | * considered. */ |
489 | 0 | row1 = pixGetData(pix1) + wpl1 * lorow; |
490 | 0 | row2 = pixGetData(pix2) + wpl2 * (lorow - idely); |
491 | 0 | if (hirow <= hi) { |
492 | | /* Some rows of pix1 will never contribute to count */ |
493 | 0 | untouchable = downcount[hirow - 1]; |
494 | 0 | } |
495 | | |
496 | | /* Similarly, figure out which columns of pix1 will be considered. */ |
497 | 0 | locol = L_MAX(idelx, 0); |
498 | 0 | hicol = L_MIN(wt + idelx, wi); |
499 | |
|
500 | 0 | if (idelx >= 32) { |
501 | | /* pix2 is shifted far enough to the right that pix1's first |
502 | | * word(s) won't contribute to the count. Increment its |
503 | | * pointer to point to the first word that will contribute, |
504 | | * and adjust other values accordingly. */ |
505 | 0 | pix1lskip = idelx >> 5; /* # of words to skip on left */ |
506 | 0 | row1 += pix1lskip; |
507 | 0 | locol -= pix1lskip << 5; |
508 | 0 | hicol -= pix1lskip << 5; |
509 | 0 | idelx &= 31; |
510 | 0 | } else if (idelx <= -32) { |
511 | | /* pix2 is shifted far enough to the left that its first word(s) |
512 | | * won't contribute to the count. Increment its pointer |
513 | | * to point to the first word that will contribute, |
514 | | * and adjust other values accordingly. */ |
515 | 0 | pix2lskip = -((idelx + 31) >> 5); /* # of words to skip on left */ |
516 | 0 | row2 += pix2lskip; |
517 | 0 | rowwords2 -= pix2lskip; |
518 | 0 | idelx += pix2lskip << 5; |
519 | 0 | } |
520 | |
|
521 | 0 | if ((locol >= hicol) || (lorow >= hirow)) { /* there is no overlap */ |
522 | 0 | count = 0; |
523 | 0 | } else { |
524 | | /* How many words of each row of pix1 need to be considered? */ |
525 | 0 | rowwords1 = (hicol + 31) >> 5; |
526 | |
|
527 | 0 | if (idelx == 0) { |
528 | | /* There's no lateral offset; simple case. */ |
529 | 0 | for (y = lorow; y < hirow; y++, row1 += wpl1, row2 += wpl2) { |
530 | 0 | for (x = 0; x < rowwords1; x++) { |
531 | 0 | andw = row1[x] & row2[x]; |
532 | 0 | count += tab[andw & 0xff] + |
533 | 0 | tab[(andw >> 8) & 0xff] + |
534 | 0 | tab[(andw >> 16) & 0xff] + |
535 | 0 | tab[andw >> 24]; |
536 | 0 | } |
537 | | |
538 | | /* If the count is over the threshold, no need to |
539 | | * calculate any further. Likewise, return early if the |
540 | | * count plus the maximum count attainable from further |
541 | | * rows is below the threshold. */ |
542 | 0 | if (count >= threshold) return TRUE; |
543 | 0 | if (count + downcount[y] - untouchable < threshold) { |
544 | 0 | return FALSE; |
545 | 0 | } |
546 | 0 | } |
547 | 0 | } else if (idelx > 0) { |
548 | | /* pix2 is shifted to the right. word 0 of pix1 is touched by |
549 | | * word 0 of pix2; word 1 of pix1 is touched by word 0 and word |
550 | | * 1 of pix2, and so on up to the last word of pix1 (word N), |
551 | | * which is touched by words N-1 and N of pix1... if there is a |
552 | | * word N. Handle the two cases (pix2 has N-1 words and pix2 |
553 | | * has at least N words) separately. |
554 | | * |
555 | | * Note: we know that pix2 has at least N-1 words (i.e., |
556 | | * rowwords2 >= rowwords1 - 1) by the following logic. |
557 | | * We can pretend that idelx <= 31 because the >= 32 logic |
558 | | * above adjusted everything appropriately. Then |
559 | | * hicol <= wt + idelx <= wt + 31, so |
560 | | * hicol + 31 <= wt + 62 |
561 | | * rowwords1 = (hicol + 31) >> 5 <= (wt + 62) >> 5 |
562 | | * rowwords2 == (wt + 31) >> 5, so |
563 | | * rowwords1 <= rowwords2 + 1 */ |
564 | 0 | if (rowwords2 < rowwords1) { |
565 | 0 | for (y = lorow; y < hirow; y++, row1 += wpl1, row2 += wpl2) { |
566 | | /* Do the first iteration so the loop can be |
567 | | * branch-free. */ |
568 | 0 | word1 = row1[0]; |
569 | 0 | word2 = row2[0] >> idelx; |
570 | 0 | andw = word1 & word2; |
571 | 0 | count += tab[andw & 0xff] + |
572 | 0 | tab[(andw >> 8) & 0xff] + |
573 | 0 | tab[(andw >> 16) & 0xff] + |
574 | 0 | tab[andw >> 24]; |
575 | |
|
576 | 0 | for (x = 1; x < rowwords2; x++) { |
577 | 0 | word1 = row1[x]; |
578 | 0 | word2 = (row2[x] >> idelx) | |
579 | 0 | (row2[x - 1] << (32 - idelx)); |
580 | 0 | andw = word1 & word2; |
581 | 0 | count += tab[andw & 0xff] + |
582 | 0 | tab[(andw >> 8) & 0xff] + |
583 | 0 | tab[(andw >> 16) & 0xff] + |
584 | 0 | tab[andw >> 24]; |
585 | 0 | } |
586 | | |
587 | | /* Now the last iteration - we know that this is safe |
588 | | * (i.e. rowwords1 >= 2) because rowwords1 > rowwords2 |
589 | | * > 0 (if it was 0, we'd have taken the "count = 0" |
590 | | * fast-path out of here). */ |
591 | 0 | word1 = row1[x]; |
592 | 0 | word2 = row2[x - 1] << (32 - idelx); |
593 | 0 | andw = word1 & word2; |
594 | 0 | count += tab[andw & 0xff] + |
595 | 0 | tab[(andw >> 8) & 0xff] + |
596 | 0 | tab[(andw >> 16) & 0xff] + |
597 | 0 | tab[andw >> 24]; |
598 | |
|
599 | 0 | if (count >= threshold) return TRUE; |
600 | 0 | if (count + downcount[y] - untouchable < threshold) { |
601 | 0 | return FALSE; |
602 | 0 | } |
603 | 0 | } |
604 | 0 | } else { |
605 | 0 | for (y = lorow; y < hirow; y++, row1 += wpl1, row2 += wpl2) { |
606 | | /* Do the first iteration so the loop can be |
607 | | * branch-free. This section is the same as above |
608 | | * except for the different limit on the loop, since |
609 | | * the last iteration is the same as all the other |
610 | | * iterations (beyond the first). */ |
611 | 0 | word1 = row1[0]; |
612 | 0 | word2 = row2[0] >> idelx; |
613 | 0 | andw = word1 & word2; |
614 | 0 | count += tab[andw & 0xff] + |
615 | 0 | tab[(andw >> 8) & 0xff] + |
616 | 0 | tab[(andw >> 16) & 0xff] + |
617 | 0 | tab[andw >> 24]; |
618 | |
|
619 | 0 | for (x = 1; x < rowwords1; x++) { |
620 | 0 | word1 = row1[x]; |
621 | 0 | word2 = (row2[x] >> idelx) | |
622 | 0 | (row2[x - 1] << (32 - idelx)); |
623 | 0 | andw = word1 & word2; |
624 | 0 | count += tab[andw & 0xff] + |
625 | 0 | tab[(andw >> 8) & 0xff] + |
626 | 0 | tab[(andw >> 16) & 0xff] + |
627 | 0 | tab[andw >> 24]; |
628 | 0 | } |
629 | |
|
630 | 0 | if (count >= threshold) return TRUE; |
631 | 0 | if (count + downcount[y] - untouchable < threshold) { |
632 | 0 | return FALSE; |
633 | 0 | } |
634 | 0 | } |
635 | 0 | } |
636 | 0 | } else { |
637 | | /* pix2 is shifted to the left. word 0 of pix1 is touched by |
638 | | * word 0 and word 1 of pix2, and so on up to the last word of |
639 | | * pix1 (word N), which is touched by words N and N+1 of |
640 | | * pix2... if there is a word N+1. Handle the two cases (pix2 |
641 | | * has N words and pix2 has at least N+1 words) separately. */ |
642 | 0 | if (rowwords1 < rowwords2) { |
643 | | /* pix2 has at least N+1 words, so every iteration through |
644 | | * the loop can be the same. */ |
645 | 0 | for (y = lorow; y < hirow; y++, row1 += wpl1, row2 += wpl2) { |
646 | 0 | for (x = 0; x < rowwords1; x++) { |
647 | 0 | word1 = row1[x]; |
648 | 0 | word2 = row2[x] << -idelx; |
649 | 0 | word2 |= row2[x + 1] >> (32 + idelx); |
650 | 0 | andw = word1 & word2; |
651 | 0 | count += tab[andw & 0xff] + |
652 | 0 | tab[(andw >> 8) & 0xff] + |
653 | 0 | tab[(andw >> 16) & 0xff] + |
654 | 0 | tab[andw >> 24]; |
655 | 0 | } |
656 | |
|
657 | 0 | if (count >= threshold) return TRUE; |
658 | 0 | if (count + downcount[y] - untouchable < threshold) { |
659 | 0 | return FALSE; |
660 | 0 | } |
661 | 0 | } |
662 | 0 | } else { |
663 | | /* pix2 has only N words, so the last iteration is broken |
664 | | * out. */ |
665 | 0 | for (y = lorow; y < hirow; y++, row1 += wpl1, row2 += wpl2) { |
666 | 0 | for (x = 0; x < rowwords1 - 1; x++) { |
667 | 0 | word1 = row1[x]; |
668 | 0 | word2 = row2[x] << -idelx; |
669 | 0 | word2 |= row2[x + 1] >> (32 + idelx); |
670 | 0 | andw = word1 & word2; |
671 | 0 | count += tab[andw & 0xff] + |
672 | 0 | tab[(andw >> 8) & 0xff] + |
673 | 0 | tab[(andw >> 16) & 0xff] + |
674 | 0 | tab[andw >> 24]; |
675 | 0 | } |
676 | |
|
677 | 0 | word1 = row1[x]; |
678 | 0 | word2 = row2[x] << -idelx; |
679 | 0 | andw = word1 & word2; |
680 | 0 | count += tab[andw & 0xff] + |
681 | 0 | tab[(andw >> 8) & 0xff] + |
682 | 0 | tab[(andw >> 16) & 0xff] + |
683 | 0 | tab[andw >> 24]; |
684 | |
|
685 | 0 | if (count >= threshold) return TRUE; |
686 | 0 | if (count + downcount[y] - untouchable < threshold) { |
687 | 0 | return FALSE; |
688 | 0 | } |
689 | 0 | } |
690 | 0 | } |
691 | 0 | } |
692 | 0 | } |
693 | | |
694 | 0 | score = (l_float32)count * (l_float32)count / |
695 | 0 | ((l_float32)area1 * (l_float32)area2); |
696 | 0 | if (score >= score_threshold) { |
697 | 0 | lept_stderr( |
698 | 0 | "count %d < threshold %d but score %g >= score_threshold %g\n", |
699 | 0 | count, threshold, score, score_threshold); |
700 | 0 | } |
701 | 0 | return FALSE; |
702 | 0 | } |
703 | | |
704 | | |
705 | | /* -------------------------------------------------------------------- * |
706 | | * Simple 2 pix correlators (for jbig2 clustering) * |
707 | | * -------------------------------------------------------------------- */ |
708 | | /*! |
709 | | * \brief pixCorrelationScoreSimple() |
710 | | * |
711 | | * \param[in] pix1 test pix, 1 bpp |
712 | | * \param[in] pix2 exemplar pix, 1 bpp |
713 | | * \param[in] area1 number of on pixels in pix1 |
714 | | * \param[in] area2 number of on pixels in pix2 |
715 | | * \param[in] delx x comp of centroid difference |
716 | | * \param[in] dely y comp of centroid difference |
717 | | * \param[in] maxdiffw max width difference of pix1 and pix2 |
718 | | * \param[in] maxdiffh max height difference of pix1 and pix2 |
719 | | * \param[in] tab sum tab for byte |
720 | | * \param[out] pscore correlation score, in range [0.0 ... 1.0] |
721 | | * \return 0 if OK, 1 on error |
722 | | * |
723 | | * <pre> |
724 | | * Notes: |
725 | | * (1) This calculates exactly the same value as pixCorrelationScore(). |
726 | | * It is 2-3x slower, but much simpler to understand. |
727 | | * (2) The returned correlation score is 0.0 if the width or height |
728 | | * exceed %maxdiffw or %maxdiffh. |
729 | | * </pre> |
730 | | */ |
731 | | l_ok |
732 | | pixCorrelationScoreSimple(PIX *pix1, |
733 | | PIX *pix2, |
734 | | l_int32 area1, |
735 | | l_int32 area2, |
736 | | l_float32 delx, /* x(1) - x(3) */ |
737 | | l_float32 dely, /* y(1) - y(3) */ |
738 | | l_int32 maxdiffw, |
739 | | l_int32 maxdiffh, |
740 | | l_int32 *tab, |
741 | | l_float32 *pscore) |
742 | 0 | { |
743 | 0 | l_int32 wi, hi, wt, ht, delw, delh, idelx, idely, count; |
744 | 0 | PIX *pixt; |
745 | |
|
746 | 0 | if (!pscore) |
747 | 0 | return ERROR_INT("&score not defined", __func__, 1); |
748 | 0 | *pscore = 0.0; |
749 | 0 | if (!pix1 || pixGetDepth(pix1) != 1) |
750 | 0 | return ERROR_INT("pix1 undefined or not 1 bpp", __func__, 1); |
751 | 0 | if (!pix2 || pixGetDepth(pix2) != 1) |
752 | 0 | return ERROR_INT("pix2 undefined or not 1 bpp", __func__, 1); |
753 | 0 | if (!tab) |
754 | 0 | return ERROR_INT("tab not defined", __func__, 1); |
755 | 0 | if (!area1 || !area2) |
756 | 0 | return ERROR_INT("areas must be > 0", __func__, 1); |
757 | | |
758 | | /* Eliminate based on size difference */ |
759 | 0 | pixGetDimensions(pix1, &wi, &hi, NULL); |
760 | 0 | pixGetDimensions(pix2, &wt, &ht, NULL); |
761 | 0 | delw = L_ABS(wi - wt); |
762 | 0 | if (delw > maxdiffw) |
763 | 0 | return 0; |
764 | 0 | delh = L_ABS(hi - ht); |
765 | 0 | if (delh > maxdiffh) |
766 | 0 | return 0; |
767 | | |
768 | | /* Round difference to nearest integer */ |
769 | 0 | if (delx >= 0) |
770 | 0 | idelx = (l_int32)(delx + 0.5); |
771 | 0 | else |
772 | 0 | idelx = (l_int32)(delx - 0.5); |
773 | 0 | if (dely >= 0) |
774 | 0 | idely = (l_int32)(dely + 0.5); |
775 | 0 | else |
776 | 0 | idely = (l_int32)(dely - 0.5); |
777 | | |
778 | | /* pixt = pixAnd(NULL, pix1, pix2), including shift. |
779 | | * To insure that pixels are ON only within the |
780 | | * intersection of pix1 and the shifted pix2: |
781 | | * (1) Start with pixt cleared and equal in size to pix1. |
782 | | * (2) Blit the shifted pix2 onto pixt. Then all ON pixels |
783 | | * are within the intersection of pix1 and the shifted pix2. |
784 | | * (3) AND pix1 with pixt. */ |
785 | 0 | pixt = pixCreateTemplate(pix1); |
786 | 0 | pixRasterop(pixt, idelx, idely, wt, ht, PIX_SRC, pix2, 0, 0); |
787 | 0 | pixRasterop(pixt, 0, 0, wi, hi, PIX_SRC & PIX_DST, pix1, 0, 0); |
788 | 0 | pixCountPixels(pixt, &count, tab); |
789 | 0 | pixDestroy(&pixt); |
790 | |
|
791 | 0 | *pscore = (l_float32)count * (l_float32)count / |
792 | 0 | ((l_float32)area1 * (l_float32)area2); |
793 | | /* lept_stderr("score = %5.3f, count = %d, area1 = %d, area2 = %d\n", |
794 | | *pscore, count, area1, area2); */ |
795 | 0 | return 0; |
796 | 0 | } |
797 | | |
798 | | |
799 | | /*! |
800 | | * \brief pixCorrelationScoreShifted() |
801 | | * |
802 | | * \param[in] pix1 1 bpp |
803 | | * \param[in] pix2 1 bpp |
804 | | * \param[in] area1 number of on pixels in pix1 |
805 | | * \param[in] area2 number of on pixels in pix2 |
806 | | * \param[in] delx x translation of pix2 relative to pix1 |
807 | | * \param[in] dely y translation of pix2 relative to pix1 |
808 | | * \param[in] tab sum tab for byte |
809 | | * \param[out] pscore correlation score |
810 | | * \return 0 if OK, 1 on error |
811 | | * |
812 | | * <pre> |
813 | | * Notes: |
814 | | * (1) This finds the correlation between two 1 bpp images, |
815 | | * when pix2 is shifted by (delx, dely) with respect |
816 | | * to each other. |
817 | | * (2) This is implemented by starting with a copy of pix1 and |
818 | | * ANDing its pixels with those of a shifted pix2. |
819 | | * (3) Get the pixel counts for area1 and area2 using piCountPixels(). |
820 | | * (4) A good estimate for a shift that would maximize the correlation |
821 | | * is to align the centroids (cx1, cy1; cx2, cy2), giving the |
822 | | * relative translations etransx and etransy: |
823 | | * etransx = cx1 - cx2 |
824 | | * etransy = cy1 - cy2 |
825 | | * Typically delx is chosen to be near etransx; ditto for dely. |
826 | | * This function is used in pixBestCorrelation(), where the |
827 | | * translations delx and dely are varied to find the best alignment. |
828 | | * (5) We do not check the sizes of pix1 and pix2, because they should |
829 | | * be comparable. |
830 | | * </pre> |
831 | | */ |
832 | | l_ok |
833 | | pixCorrelationScoreShifted(PIX *pix1, |
834 | | PIX *pix2, |
835 | | l_int32 area1, |
836 | | l_int32 area2, |
837 | | l_int32 delx, |
838 | | l_int32 dely, |
839 | | l_int32 *tab, |
840 | | l_float32 *pscore) |
841 | 0 | { |
842 | 0 | l_int32 w1, h1, w2, h2, count; |
843 | 0 | PIX *pixt; |
844 | |
|
845 | 0 | if (!pscore) |
846 | 0 | return ERROR_INT("&score not defined", __func__, 1); |
847 | 0 | *pscore = 0.0; |
848 | 0 | if (!pix1 || pixGetDepth(pix1) != 1) |
849 | 0 | return ERROR_INT("pix1 undefined or not 1 bpp", __func__, 1); |
850 | 0 | if (!pix2 || pixGetDepth(pix2) != 1) |
851 | 0 | return ERROR_INT("pix2 undefined or not 1 bpp", __func__, 1); |
852 | 0 | if (!tab) |
853 | 0 | return ERROR_INT("tab not defined", __func__, 1); |
854 | 0 | if (!area1 || !area2) |
855 | 0 | return ERROR_INT("areas must be > 0", __func__, 1); |
856 | | |
857 | 0 | pixGetDimensions(pix1, &w1, &h1, NULL); |
858 | 0 | pixGetDimensions(pix2, &w2, &h2, NULL); |
859 | | |
860 | | /* To insure that pixels are ON only within the |
861 | | * intersection of pix1 and the shifted pix2: |
862 | | * (1) Start with pixt cleared and equal in size to pix1. |
863 | | * (2) Blit the shifted pix2 onto pixt. Then all ON pixels |
864 | | * are within the intersection of pix1 and the shifted pix2. |
865 | | * (3) AND pix1 with pixt. */ |
866 | 0 | pixt = pixCreateTemplate(pix1); |
867 | 0 | pixRasterop(pixt, delx, dely, w2, h2, PIX_SRC, pix2, 0, 0); |
868 | 0 | pixRasterop(pixt, 0, 0, w1, h1, PIX_SRC & PIX_DST, pix1, 0, 0); |
869 | 0 | pixCountPixels(pixt, &count, tab); |
870 | 0 | pixDestroy(&pixt); |
871 | |
|
872 | 0 | *pscore = (l_float32)count * (l_float32)count / |
873 | 0 | ((l_float32)area1 * (l_float32)area2); |
874 | 0 | return 0; |
875 | 0 | } |