/src/leptonica/src/skew.c
Line | Count | Source (jump to first uncovered line) |
1 | | /*====================================================================* |
2 | | - Copyright (C) 2001 Leptonica. All rights reserved. |
3 | | - |
4 | | - Redistribution and use in source and binary forms, with or without |
5 | | - modification, are permitted provided that the following conditions |
6 | | - are met: |
7 | | - 1. Redistributions of source code must retain the above copyright |
8 | | - notice, this list of conditions and the following disclaimer. |
9 | | - 2. Redistributions in binary form must reproduce the above |
10 | | - copyright notice, this list of conditions and the following |
11 | | - disclaimer in the documentation and/or other materials |
12 | | - provided with the distribution. |
13 | | - |
14 | | - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
15 | | - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
16 | | - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
17 | | - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY |
18 | | - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
19 | | - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
20 | | - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
21 | | - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
22 | | - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
23 | | - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
24 | | - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
25 | | *====================================================================*/ |
26 | | |
27 | | /*! |
28 | | * \file skew.c |
29 | | * <pre> |
30 | | * |
31 | | * Top-level deskew interfaces |
32 | | * PIX *pixDeskewBoth() |
33 | | * PIX *pixDeskew() |
34 | | * PIX *pixFindSkewAndDeskew() |
35 | | * PIX *pixDeskewGeneral() |
36 | | * |
37 | | * Top-level angle-finding interface |
38 | | * l_int32 pixFindSkew() |
39 | | * |
40 | | * Basic angle-finding functions |
41 | | * l_int32 pixFindSkewSweep() |
42 | | * l_int32 pixFindSkewSweepAndSearch() |
43 | | * l_int32 pixFindSkewSweepAndSearchScore() |
44 | | * l_int32 pixFindSkewSweepAndSearchScorePivot() |
45 | | * |
46 | | * Search over arbitrary range of angles in orthogonal directions |
47 | | * l_int32 pixFindSkewOrthogonalRange() |
48 | | * |
49 | | * Differential square sum function for scoring |
50 | | * l_int32 pixFindDifferentialSquareSum() |
51 | | * |
52 | | * Measures of variance of row sums |
53 | | * l_int32 pixFindNormalizedSquareSum() |
54 | | * |
55 | | * |
56 | | * ============================================================== |
57 | | * Page skew detection |
58 | | * |
59 | | * Skew is determined by pixel profiles, which are computed |
60 | | * as pixel sums along the raster line for each line in the |
61 | | * image. By vertically shearing the image by a given angle, |
62 | | * the sums can be computed quickly along the raster lines |
63 | | * rather than along lines at that angle. The score is |
64 | | * computed from these line sums by taking the square of |
65 | | * the DIFFERENCE between adjacent line sums, summed over |
66 | | * all lines. The skew angle is then found as the angle |
67 | | * that maximizes the score. The actual computation for |
68 | | * any sheared image is done in the function |
69 | | * pixFindDifferentialSquareSum(). |
70 | | * |
71 | | * The search for the angle that maximizes this score is |
72 | | * most efficiently performed by first sweeping coarsely |
73 | | * over angles, using a significantly reduced image (say, 4x |
74 | | * reduction), to find the approximate maximum within a half |
75 | | * degree or so, and then doing an interval-halving binary |
76 | | * search at higher resolution to get the skew angle to |
77 | | * within 1/20 degree or better. |
78 | | * |
79 | | * The differential signal is used (rather than just using |
80 | | * that variance of line sums) because it rejects the |
81 | | * background noise due to total number of black pixels, |
82 | | * and has maximum contributions from the baselines and |
83 | | * x-height lines of text when the textlines are aligned |
84 | | * with the raster lines. It also works well in multicolumn |
85 | | * pages where the textlines do not line up across columns. |
86 | | * |
87 | | * The method is fast, accurate to within an angle (in radians) |
88 | | * of approximately the inverse width in pixels of the image, |
89 | | * and will work on a surprisingly small amount of text data |
90 | | * (just a couple of text lines). Consequently, it can |
91 | | * also be used to find local skew if the skew were to vary |
92 | | * significantly over the page. Local skew determination |
93 | | * is not very important except for locating lines of |
94 | | * handwritten text that may be mixed with printed text. |
95 | | * </pre> |
96 | | */ |
97 | | |
98 | | #ifdef HAVE_CONFIG_H |
99 | | #include <config_auto.h> |
100 | | #endif /* HAVE_CONFIG_H */ |
101 | | |
102 | | #include <math.h> |
103 | | #include "allheaders.h" |
104 | | |
105 | | /* Default sweep angle parameters for pixFindSkew() */ |
106 | | static const l_float32 DefaultSweepRange = 7.0; /* degrees */ |
107 | | static const l_float32 DefaultSweepDelta = 1.0; /* degrees */ |
108 | | |
109 | | /* Default final angle difference parameter for binary |
110 | | * search in pixFindSkew(). The expected accuracy is |
111 | | * not better than the inverse image width in pixels, |
112 | | * say, 1/2000 radians, or about 0.03 degrees. */ |
113 | | static const l_float32 DefaultMinbsDelta = 0.01f; /* degrees */ |
114 | | |
115 | | /* Default scale factors for pixFindSkew() */ |
116 | | static const l_int32 DefaultSweepReduction = 4; /* sweep part; 4 is good */ |
117 | | static const l_int32 DefaultBsReduction = 2; /* binary search part */ |
118 | | |
119 | | /* Minimum angle for deskewing in pixDeskew() */ |
120 | | static const l_float32 MinDeskewAngle = 0.1f; /* degree */ |
121 | | |
122 | | /* Minimum allowed confidence (ratio) for deskewing in pixDeskew() */ |
123 | | static const l_float32 MinAllowedConfidence = 3.0; |
124 | | |
125 | | /* Minimum allowed maxscore to give nonzero confidence */ |
126 | | static const l_int32 MinValidMaxscore = 10000; |
127 | | |
128 | | /* Constant setting threshold for minimum allowed minscore |
129 | | * to give nonzero confidence; multiply this constant by |
130 | | * (height * width^2) */ |
131 | | static const l_float32 MinscoreThreshFactor = 0.000002f; |
132 | | |
133 | | /* Default binarization threshold value */ |
134 | | static const l_int32 DefaultBinaryThreshold = 130; |
135 | | |
136 | | #ifndef NO_CONSOLE_IO |
137 | | #define DEBUG_PRINT_SCORES 0 |
138 | | #define DEBUG_PRINT_SWEEP 0 |
139 | | #define DEBUG_PRINT_BINARY 0 |
140 | | #define DEBUG_PRINT_ORTH 0 |
141 | | #define DEBUG_THRESHOLD 0 |
142 | | #define DEBUG_PLOT_SCORES 0 /* requires the gnuplot executable */ |
143 | | #endif /* ~NO_CONSOLE_IO */ |
144 | | |
145 | | |
146 | | |
147 | | /*-----------------------------------------------------------------------* |
148 | | * Top-level deskew interfaces * |
149 | | *-----------------------------------------------------------------------*/ |
150 | | /*! |
151 | | * \brief pixDeskewBoth() |
152 | | * |
153 | | * \param[in] pixs any depth |
154 | | * \param[in] redsearch for binary search: reduction factor = 1, 2 or 4; |
155 | | * use 0 for default |
156 | | * \return pixd deskewed pix, or NULL on error |
157 | | * |
158 | | * <pre> |
159 | | * Notes: |
160 | | * (1) This binarizes if necessary and does both horizontal |
161 | | * and vertical deskewing, using the default parameters in |
162 | | * the underlying pixDeskew(). See usage there. |
163 | | * </pre> |
164 | | */ |
165 | | PIX * |
166 | | pixDeskewBoth(PIX *pixs, |
167 | | l_int32 redsearch) |
168 | 0 | { |
169 | 0 | PIX *pix1, *pix2, *pix3, *pix4; |
170 | |
|
171 | 0 | if (!pixs) |
172 | 0 | return (PIX *)ERROR_PTR("pixs not defined", __func__, NULL); |
173 | 0 | if (redsearch == 0) |
174 | 0 | redsearch = DefaultBsReduction; |
175 | 0 | else if (redsearch != 1 && redsearch != 2 && redsearch != 4) |
176 | 0 | return (PIX *)ERROR_PTR("redsearch not in {1,2,4}", __func__, NULL); |
177 | | |
178 | 0 | pix1 = pixDeskew(pixs, redsearch); |
179 | 0 | pix2 = pixRotate90(pix1, 1); |
180 | 0 | pix3 = pixDeskew(pix2, redsearch); |
181 | 0 | pix4 = pixRotate90(pix3, -1); |
182 | 0 | pixDestroy(&pix1); |
183 | 0 | pixDestroy(&pix2); |
184 | 0 | pixDestroy(&pix3); |
185 | 0 | return pix4; |
186 | 0 | } |
187 | | |
188 | | |
189 | | /*! |
190 | | * \brief pixDeskew() |
191 | | * |
192 | | * \param[in] pixs any depth |
193 | | * \param[in] redsearch for binary search: reduction factor = 1, 2 or 4; |
194 | | * use 0 for default |
195 | | * \return pixd deskewed pix, or NULL on error |
196 | | * |
197 | | * <pre> |
198 | | * Notes: |
199 | | * (1) This binarizes if necessary and finds the skew angle. If the |
200 | | * angle is large enough and there is sufficient confidence, |
201 | | * it returns a deskewed image; otherwise, it returns a clone. |
202 | | * (2) Typical values at 300 ppi for %redsearch are 2 and 4. |
203 | | * At 75 ppi, one should use %redsearch = 1. |
204 | | * </pre> |
205 | | */ |
206 | | PIX * |
207 | | pixDeskew(PIX *pixs, |
208 | | l_int32 redsearch) |
209 | 0 | { |
210 | 0 | if (!pixs) |
211 | 0 | return (PIX *)ERROR_PTR("pixs not defined", __func__, NULL); |
212 | 0 | if (redsearch == 0) |
213 | 0 | redsearch = DefaultBsReduction; |
214 | 0 | else if (redsearch != 1 && redsearch != 2 && redsearch != 4) |
215 | 0 | return (PIX *)ERROR_PTR("redsearch not in {1,2,4}", __func__, NULL); |
216 | | |
217 | 0 | return pixDeskewGeneral(pixs, 0, 0.0, 0.0, redsearch, 0, NULL, NULL); |
218 | 0 | } |
219 | | |
220 | | |
221 | | /*! |
222 | | * \brief pixFindSkewAndDeskew() |
223 | | * |
224 | | * \param[in] pixs any depth |
225 | | * \param[in] redsearch for binary search: reduction factor = 1, 2 or 4; |
226 | | * use 0 for default |
227 | | * \param[out] pangle [optional] angle required to deskew, |
228 | | * in degrees; use NULL to skip |
229 | | * \param[out] pconf [optional] conf value is ratio |
230 | | * of max/min scores; use NULL to skip |
231 | | * \return pixd deskewed pix, or NULL on error |
232 | | * |
233 | | * <pre> |
234 | | * Notes: |
235 | | * (1) This binarizes if necessary and finds the skew angle. If the |
236 | | * angle is large enough and there is sufficient confidence, |
237 | | * it returns a deskewed image; otherwise, it returns a clone. |
238 | | * </pre> |
239 | | */ |
240 | | PIX * |
241 | | pixFindSkewAndDeskew(PIX *pixs, |
242 | | l_int32 redsearch, |
243 | | l_float32 *pangle, |
244 | | l_float32 *pconf) |
245 | 0 | { |
246 | 0 | if (!pixs) |
247 | 0 | return (PIX *)ERROR_PTR("pixs not defined", __func__, NULL); |
248 | 0 | if (redsearch == 0) |
249 | 0 | redsearch = DefaultBsReduction; |
250 | 0 | else if (redsearch != 1 && redsearch != 2 && redsearch != 4) |
251 | 0 | return (PIX *)ERROR_PTR("redsearch not in {1,2,4}", __func__, NULL); |
252 | | |
253 | 0 | return pixDeskewGeneral(pixs, 0, 0.0, 0.0, redsearch, 0, pangle, pconf); |
254 | 0 | } |
255 | | |
256 | | |
257 | | /*! |
258 | | * \brief pixDeskewGeneral() |
259 | | * |
260 | | * \param[in] pixs any depth |
261 | | * \param[in] redsweep for linear search: reduction factor = 1, 2 or 4; |
262 | | * use 0 for default |
263 | | * \param[in] sweeprange in degrees in each direction from 0; |
264 | | * use 0.0 for default |
265 | | * \param[in] sweepdelta in degrees; use 0.0 for default |
266 | | * \param[in] redsearch for binary search: reduction factor = 1, 2 or 4; |
267 | | * use 0 for default; |
268 | | * \param[in] thresh for binarizing the image; use 0 for default |
269 | | * \param[out] pangle [optional] angle required to deskew, |
270 | | * in degrees; use NULL to skip |
271 | | * \param[out] pconf [optional] conf value is ratio |
272 | | * of max/min scores; use NULL to skip |
273 | | * \return pixd deskewed pix, or NULL on error |
274 | | * |
275 | | * <pre> |
276 | | * Notes: |
277 | | * (1) This binarizes if necessary and finds the skew angle. If the |
278 | | * angle is large enough and there is sufficient confidence, |
279 | | * it returns a deskewed image; otherwise, it returns a clone. |
280 | | * </pre> |
281 | | */ |
282 | | PIX * |
283 | | pixDeskewGeneral(PIX *pixs, |
284 | | l_int32 redsweep, |
285 | | l_float32 sweeprange, |
286 | | l_float32 sweepdelta, |
287 | | l_int32 redsearch, |
288 | | l_int32 thresh, |
289 | | l_float32 *pangle, |
290 | | l_float32 *pconf) |
291 | 0 | { |
292 | 0 | l_int32 ret, depth; |
293 | 0 | l_float32 angle, conf, deg2rad; |
294 | 0 | PIX *pixb, *pixd; |
295 | |
|
296 | 0 | if (pangle) *pangle = 0.0; |
297 | 0 | if (pconf) *pconf = 0.0; |
298 | 0 | if (!pixs) |
299 | 0 | return (PIX *)ERROR_PTR("pixs not defined", __func__, NULL); |
300 | 0 | if (redsweep == 0) |
301 | 0 | redsweep = DefaultSweepReduction; |
302 | 0 | else if (redsweep != 1 && redsweep != 2 && redsweep != 4) |
303 | 0 | return (PIX *)ERROR_PTR("redsweep not in {1,2,4}", __func__, NULL); |
304 | 0 | if (sweeprange == 0.0) |
305 | 0 | sweeprange = DefaultSweepRange; |
306 | 0 | if (sweepdelta == 0.0) |
307 | 0 | sweepdelta = DefaultSweepDelta; |
308 | 0 | if (redsearch == 0) |
309 | 0 | redsearch = DefaultBsReduction; |
310 | 0 | else if (redsearch != 1 && redsearch != 2 && redsearch != 4) |
311 | 0 | return (PIX *)ERROR_PTR("redsearch not in {1,2,4}", __func__, NULL); |
312 | 0 | if (thresh == 0) |
313 | 0 | thresh = DefaultBinaryThreshold; |
314 | |
|
315 | 0 | deg2rad = 3.1415926535f / 180.f; |
316 | | |
317 | | /* Binarize if necessary */ |
318 | 0 | depth = pixGetDepth(pixs); |
319 | 0 | if (depth == 1) |
320 | 0 | pixb = pixClone(pixs); |
321 | 0 | else |
322 | 0 | pixb = pixConvertTo1(pixs, thresh); |
323 | | |
324 | | /* Use the 1 bpp image to find the skew */ |
325 | 0 | ret = pixFindSkewSweepAndSearch(pixb, &angle, &conf, redsweep, redsearch, |
326 | 0 | sweeprange, sweepdelta, |
327 | 0 | DefaultMinbsDelta); |
328 | 0 | pixDestroy(&pixb); |
329 | 0 | if (pangle) *pangle = angle; |
330 | 0 | if (pconf) *pconf = conf; |
331 | 0 | if (ret) |
332 | 0 | return pixClone(pixs); |
333 | | |
334 | 0 | if (L_ABS(angle) < MinDeskewAngle || conf < MinAllowedConfidence) |
335 | 0 | return pixClone(pixs); |
336 | | |
337 | 0 | if ((pixd = pixRotate(pixs, deg2rad * angle, L_ROTATE_AREA_MAP, |
338 | 0 | L_BRING_IN_WHITE, 0, 0)) == NULL) |
339 | 0 | return pixClone(pixs); |
340 | 0 | else |
341 | 0 | return pixd; |
342 | 0 | } |
343 | | |
344 | | |
345 | | /*-----------------------------------------------------------------------* |
346 | | * Simple top-level angle-finding interface * |
347 | | *-----------------------------------------------------------------------*/ |
348 | | /*! |
349 | | * \brief pixFindSkew() |
350 | | * |
351 | | * \param[in] pixs 1 bpp |
352 | | * \param[out] pangle angle required to deskew, in degrees |
353 | | * \param[out] pconf confidence value is ratio max/min scores |
354 | | * \return 0 if OK, 1 on error or if angle measurement not valid |
355 | | * |
356 | | * <pre> |
357 | | * Notes: |
358 | | * (1) This is a simple high-level interface, that uses default |
359 | | * values of the parameters for reasonable speed and accuracy. |
360 | | * (2) The angle returned is the negative of the skew angle of |
361 | | * the image. It is the angle required for deskew. |
362 | | * Clockwise rotations are positive angles. |
363 | | * </pre> |
364 | | */ |
365 | | l_ok |
366 | | pixFindSkew(PIX *pixs, |
367 | | l_float32 *pangle, |
368 | | l_float32 *pconf) |
369 | 0 | { |
370 | 0 | if (pangle) *pangle = 0.0; |
371 | 0 | if (pconf) *pconf = 0.0; |
372 | 0 | if (!pangle || !pconf) |
373 | 0 | return ERROR_INT("&angle and/or &conf not defined", __func__, 1); |
374 | 0 | if (!pixs) |
375 | 0 | return ERROR_INT("pixs not defined", __func__, 1); |
376 | 0 | if (pixGetDepth(pixs) != 1) |
377 | 0 | return ERROR_INT("pixs not 1 bpp", __func__, 1); |
378 | | |
379 | 0 | return pixFindSkewSweepAndSearch(pixs, pangle, pconf, |
380 | 0 | DefaultSweepReduction, |
381 | 0 | DefaultBsReduction, |
382 | 0 | DefaultSweepRange, |
383 | 0 | DefaultSweepDelta, |
384 | 0 | DefaultMinbsDelta); |
385 | 0 | } |
386 | | |
387 | | |
388 | | /*-----------------------------------------------------------------------* |
389 | | * Basic angle-finding functions * |
390 | | *-----------------------------------------------------------------------*/ |
391 | | /*! |
392 | | * \brief pixFindSkewSweep() |
393 | | * |
394 | | * \param[in] pixs 1 bpp |
395 | | * \param[out] pangle angle required to deskew, in degrees |
396 | | * \param[in] reduction factor = 1, 2, 4 or 8 |
397 | | * \param[in] sweeprange half the full range; assumed about 0; in degrees |
398 | | * \param[in] sweepdelta angle increment of sweep; in degrees |
399 | | * \return 0 if OK, 1 on error or if angle measurement not valid |
400 | | * |
401 | | * <pre> |
402 | | * Notes: |
403 | | * (1) This examines the 'score' for skew angles with equal intervals. |
404 | | * (2) Caller must check the return value for validity of the result. |
405 | | * </pre> |
406 | | */ |
407 | | l_ok |
408 | | pixFindSkewSweep(PIX *pixs, |
409 | | l_float32 *pangle, |
410 | | l_int32 reduction, |
411 | | l_float32 sweeprange, |
412 | | l_float32 sweepdelta) |
413 | 0 | { |
414 | 0 | l_int32 ret, bzero, i, nangles; |
415 | 0 | l_float32 deg2rad, theta; |
416 | 0 | l_float32 sum, maxscore, maxangle; |
417 | 0 | NUMA *natheta, *nascore; |
418 | 0 | PIX *pix, *pixt; |
419 | |
|
420 | 0 | if (!pangle) |
421 | 0 | return ERROR_INT("&angle not defined", __func__, 1); |
422 | 0 | *pangle = 0.0; |
423 | 0 | if (!pixs) |
424 | 0 | return ERROR_INT("pixs not defined", __func__, 1); |
425 | 0 | if (pixGetDepth(pixs) != 1) |
426 | 0 | return ERROR_INT("pixs not 1 bpp", __func__, 1); |
427 | 0 | if (reduction != 1 && reduction != 2 && reduction != 4 && reduction != 8) |
428 | 0 | return ERROR_INT("reduction must be in {1,2,4,8}", __func__, 1); |
429 | | |
430 | 0 | deg2rad = 3.1415926535f / 180.f; |
431 | 0 | ret = 0; |
432 | | |
433 | | /* Generate reduced image, if requested */ |
434 | 0 | if (reduction == 1) |
435 | 0 | pix = pixClone(pixs); |
436 | 0 | else if (reduction == 2) |
437 | 0 | pix = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0); |
438 | 0 | else if (reduction == 4) |
439 | 0 | pix = pixReduceRankBinaryCascade(pixs, 1, 1, 0, 0); |
440 | 0 | else /* reduction == 8 */ |
441 | 0 | pix = pixReduceRankBinaryCascade(pixs, 1, 1, 2, 0); |
442 | |
|
443 | 0 | pixZero(pix, &bzero); |
444 | 0 | if (bzero) { |
445 | 0 | pixDestroy(&pix); |
446 | 0 | return 1; |
447 | 0 | } |
448 | | |
449 | 0 | nangles = (l_int32)((2. * sweeprange) / sweepdelta + 1); |
450 | 0 | natheta = numaCreate(nangles); |
451 | 0 | nascore = numaCreate(nangles); |
452 | 0 | pixt = pixCreateTemplate(pix); |
453 | |
|
454 | 0 | if (!pix || !pixt) { |
455 | 0 | ret = ERROR_INT("pix and pixt not both made", __func__, 1); |
456 | 0 | goto cleanup; |
457 | 0 | } |
458 | 0 | if (!natheta || !nascore) { |
459 | 0 | ret = ERROR_INT("natheta and nascore not both made", __func__, 1); |
460 | 0 | goto cleanup; |
461 | 0 | } |
462 | | |
463 | 0 | for (i = 0; i < nangles; i++) { |
464 | 0 | theta = -sweeprange + i * sweepdelta; /* degrees */ |
465 | | |
466 | | /* Shear pix about the UL corner and put the result in pixt */ |
467 | 0 | pixVShearCorner(pixt, pix, deg2rad * theta, L_BRING_IN_WHITE); |
468 | | |
469 | | /* Get score */ |
470 | 0 | pixFindDifferentialSquareSum(pixt, &sum); |
471 | |
|
472 | | #if DEBUG_PRINT_SCORES |
473 | | L_INFO("sum(%7.2f) = %7.0f\n", __func__, theta, sum); |
474 | | #endif /* DEBUG_PRINT_SCORES */ |
475 | | |
476 | | /* Save the result in the output arrays */ |
477 | 0 | numaAddNumber(nascore, sum); |
478 | 0 | numaAddNumber(natheta, theta); |
479 | 0 | } |
480 | | |
481 | | /* Find the location of the maximum (i.e., the skew angle) |
482 | | * by fitting the largest data point and its two neighbors |
483 | | * to a quadratic, using lagrangian interpolation. */ |
484 | 0 | numaFitMax(nascore, &maxscore, natheta, &maxangle); |
485 | 0 | *pangle = maxangle; |
486 | |
|
487 | | #if DEBUG_PRINT_SWEEP |
488 | | L_INFO(" From sweep: angle = %7.3f, score = %7.3f\n", __func__, |
489 | | maxangle, maxscore); |
490 | | #endif /* DEBUG_PRINT_SWEEP */ |
491 | |
|
492 | | #if DEBUG_PLOT_SCORES |
493 | | /* Plot the result -- the scores versus rotation angle -- |
494 | | * using gnuplot with GPLOT_LINES (lines connecting data points). |
495 | | * The GPLOT data structure is first created, with the |
496 | | * appropriate data incorporated from the two input NUMAs, |
497 | | * and then the function gplotMakeOutput() uses gnuplot to |
498 | | * generate the output plot. This can be either a .png file |
499 | | * or a .ps file, depending on whether you use GPLOT_PNG |
500 | | * or GPLOT_PS. */ |
501 | | {GPLOT *gplot; |
502 | | gplot = gplotCreate("sweep_output", GPLOT_PNG, |
503 | | "Sweep. Variance of difference of ON pixels vs. angle", |
504 | | "angle (deg)", "score"); |
505 | | gplotAddPlot(gplot, natheta, nascore, GPLOT_LINES, "plot1"); |
506 | | gplotAddPlot(gplot, natheta, nascore, GPLOT_POINTS, "plot2"); |
507 | | gplotMakeOutput(gplot); |
508 | | gplotDestroy(&gplot); |
509 | | } |
510 | | #endif /* DEBUG_PLOT_SCORES */ |
511 | |
|
512 | 0 | cleanup: |
513 | 0 | pixDestroy(&pix); |
514 | 0 | pixDestroy(&pixt); |
515 | 0 | numaDestroy(&nascore); |
516 | 0 | numaDestroy(&natheta); |
517 | 0 | return ret; |
518 | 0 | } |
519 | | |
520 | | |
521 | | /*! |
522 | | * \brief pixFindSkewSweepAndSearch() |
523 | | * |
524 | | * \param[in] pixs 1 bpp |
525 | | * \param[out] pangle angle required to deskew; in degrees |
526 | | * \param[out] pconf confidence given by ratio of max/min score |
527 | | * \param[in] redsweep sweep reduction factor = 1, 2, 4 or 8 |
528 | | * \param[in] redsearch binary search reduction factor = 1, 2, 4 or 8; |
529 | | * and must not exceed redsweep |
530 | | * \param[in] sweeprange half the full range, assumed about 0; in degrees |
531 | | * \param[in] sweepdelta angle increment of sweep; in degrees |
532 | | * \param[in] minbsdelta min binary search increment angle; in degrees |
533 | | * \return 0 if OK, 1 on error or if angle measurement not valid |
534 | | * |
535 | | * <pre> |
536 | | * Notes: |
537 | | * (1) This finds the skew angle, doing first a sweep through a set |
538 | | * of equal angles, and then doing a binary search until |
539 | | * convergence. |
540 | | * (2) Caller must check the return value for validity of the result. |
541 | | * (3) In computing the differential line sum variance score, we sum |
542 | | * the result over scanlines, but we always skip: |
543 | | * ~ at least one scanline |
544 | | * ~ not more than 10% of the image height |
545 | | * ~ not more than 5% of the image width |
546 | | * (4) See also notes in pixFindSkewSweepAndSearchScore() |
547 | | * </pre> |
548 | | */ |
549 | | l_ok |
550 | | pixFindSkewSweepAndSearch(PIX *pixs, |
551 | | l_float32 *pangle, |
552 | | l_float32 *pconf, |
553 | | l_int32 redsweep, |
554 | | l_int32 redsearch, |
555 | | l_float32 sweeprange, |
556 | | l_float32 sweepdelta, |
557 | | l_float32 minbsdelta) |
558 | 0 | { |
559 | 0 | return pixFindSkewSweepAndSearchScore(pixs, pangle, pconf, NULL, |
560 | 0 | redsweep, redsearch, 0.0, sweeprange, |
561 | 0 | sweepdelta, minbsdelta); |
562 | 0 | } |
563 | | |
564 | | |
565 | | /*! |
566 | | * \brief pixFindSkewSweepAndSearchScore() |
567 | | * |
568 | | * \param[in] pixs 1 bpp |
569 | | * \param[out] pangle angle required to deskew; in degrees |
570 | | * \param[out] pconf confidence given by ratio of max/min score |
571 | | * \param[out] pendscore [optional] max score; use NULL to ignore |
572 | | * \param[in] redsweep sweep reduction factor = 1, 2, 4 or 8 |
573 | | * \param[in] redsearch binary search reduction factor = 1, 2, 4 or 8; |
574 | | * and must not exceed redsweep |
575 | | * \param[in] sweepcenter angle about which sweep is performed; in degrees |
576 | | * \param[in] sweeprange half the full range, taken about sweepcenter; |
577 | | * in degrees |
578 | | * \param[in] sweepdelta angle increment of sweep; in degrees |
579 | | * \param[in] minbsdelta min binary search increment angle; in degrees |
580 | | * \return 0 if OK, 1 on error or if angle measurement not valid |
581 | | * |
582 | | * <pre> |
583 | | * Notes: |
584 | | * (1) This finds the skew angle, doing first a sweep through a set |
585 | | * of equal angles, and then doing a binary search until convergence. |
586 | | * (2) There are two built-in constants that determine if the |
587 | | * returned confidence is nonzero: |
588 | | * ~ MinValidMaxscore (minimum allowed maxscore) |
589 | | * ~ MinscoreThreshFactor (determines minimum allowed |
590 | | * minscore, by multiplying by (height * width^2) |
591 | | * If either of these conditions is not satisfied, the returned |
592 | | * confidence value will be zero. The maxscore is optionally |
593 | | * returned in this function to allow evaluation of the |
594 | | * resulting angle by a method that is independent of the |
595 | | * returned confidence value. |
596 | | * (3) The larger the confidence value, the greater the probability |
597 | | * that the proper alignment is given by the angle that maximizes |
598 | | * variance. It should be compared to a threshold, which depends |
599 | | * on the application. Values between 3.0 and 6.0 are common. |
600 | | * (4) By default, the shear is about the UL corner. |
601 | | * </pre> |
602 | | */ |
603 | | l_ok |
604 | | pixFindSkewSweepAndSearchScore(PIX *pixs, |
605 | | l_float32 *pangle, |
606 | | l_float32 *pconf, |
607 | | l_float32 *pendscore, |
608 | | l_int32 redsweep, |
609 | | l_int32 redsearch, |
610 | | l_float32 sweepcenter, |
611 | | l_float32 sweeprange, |
612 | | l_float32 sweepdelta, |
613 | | l_float32 minbsdelta) |
614 | 0 | { |
615 | 0 | return pixFindSkewSweepAndSearchScorePivot(pixs, pangle, pconf, pendscore, |
616 | 0 | redsweep, redsearch, 0.0, |
617 | 0 | sweeprange, sweepdelta, |
618 | 0 | minbsdelta, |
619 | 0 | L_SHEAR_ABOUT_CORNER); |
620 | 0 | } |
621 | | |
622 | | |
623 | | /*! |
624 | | * \brief pixFindSkewSweepAndSearchScorePivot() |
625 | | * |
626 | | * \param[in] pixs 1 bpp |
627 | | * \param[out] pangle angle required to deskew; in degrees |
628 | | * \param[out] pconf confidence given by ratio of max/min score |
629 | | * \param[out] pendscore [optional] max score; use NULL to ignore |
630 | | * \param[in] redsweep sweep reduction factor = 1, 2, 4 or 8 |
631 | | * \param[in] redsearch binary search reduction factor = 1, 2, 4 or 8; |
632 | | * and must not exceed redsweep |
633 | | * \param[in] sweepcenter angle about which sweep is performed; in degrees |
634 | | * \param[in] sweeprange half the full range, taken about sweepcenter; |
635 | | * in degrees |
636 | | * \param[in] sweepdelta angle increment of sweep; in degrees |
637 | | * \param[in] minbsdelta min binary search increment angle; in degrees |
638 | | * \param[in] pivot L_SHEAR_ABOUT_CORNER, L_SHEAR_ABOUT_CENTER |
639 | | * \return 0 if OK, 1 on error or if angle measurement not valid |
640 | | * |
641 | | * <pre> |
642 | | * Notes: |
643 | | * (1) See notes in pixFindSkewSweepAndSearchScore(). |
644 | | * (2) This allows choice of shear pivoting from either the UL corner |
645 | | * or the center. For small angles, the ability to discriminate |
646 | | * angles is better with shearing from the UL corner. However, |
647 | | * for large angles (say, greater than 20 degrees), it is better |
648 | | * to shear about the center because a shear from the UL corner |
649 | | * loses too much of the image. |
650 | | * </pre> |
651 | | */ |
652 | | l_ok |
653 | | pixFindSkewSweepAndSearchScorePivot(PIX *pixs, |
654 | | l_float32 *pangle, |
655 | | l_float32 *pconf, |
656 | | l_float32 *pendscore, |
657 | | l_int32 redsweep, |
658 | | l_int32 redsearch, |
659 | | l_float32 sweepcenter, |
660 | | l_float32 sweeprange, |
661 | | l_float32 sweepdelta, |
662 | | l_float32 minbsdelta, |
663 | | l_int32 pivot) |
664 | 0 | { |
665 | 0 | l_int32 ret, bzero, i, nangles, n, ratio, maxindex, minloc; |
666 | 0 | l_int32 width, height; |
667 | 0 | l_float32 deg2rad, theta, delta; |
668 | 0 | l_float32 sum, maxscore, maxangle; |
669 | 0 | l_float32 centerangle, leftcenterangle, rightcenterangle; |
670 | 0 | l_float32 lefttemp, righttemp; |
671 | 0 | l_float32 bsearchscore[5]; |
672 | 0 | l_float32 minscore, minthresh; |
673 | 0 | l_float32 rangeleft; |
674 | 0 | NUMA *natheta, *nascore; |
675 | 0 | PIX *pixsw, *pixsch, *pixt1, *pixt2; |
676 | |
|
677 | 0 | if (pendscore) *pendscore = 0.0; |
678 | 0 | if (pangle) *pangle = 0.0; |
679 | 0 | if (pconf) *pconf = 0.0; |
680 | 0 | if (!pangle || !pconf) |
681 | 0 | return ERROR_INT("&angle and/or &conf not defined", __func__, 1); |
682 | 0 | if (!pixs || pixGetDepth(pixs) != 1) |
683 | 0 | return ERROR_INT("pixs not defined or not 1 bpp", __func__, 1); |
684 | 0 | if (redsweep != 1 && redsweep != 2 && redsweep != 4 && redsweep != 8) |
685 | 0 | return ERROR_INT("redsweep must be in {1,2,4,8}", __func__, 1); |
686 | 0 | if (redsearch != 1 && redsearch != 2 && redsearch != 4 && redsearch != 8) |
687 | 0 | return ERROR_INT("redsearch must be in {1,2,4,8}", __func__, 1); |
688 | 0 | if (redsearch > redsweep) |
689 | 0 | return ERROR_INT("redsearch must not exceed redsweep", __func__, 1); |
690 | 0 | if (pivot != L_SHEAR_ABOUT_CORNER && pivot != L_SHEAR_ABOUT_CENTER) |
691 | 0 | return ERROR_INT("invalid pivot", __func__, 1); |
692 | | |
693 | 0 | deg2rad = 3.1415926535f / 180.f; |
694 | 0 | ret = 0; |
695 | | |
696 | | /* Generate reduced image for binary search, if requested */ |
697 | 0 | if (redsearch == 1) |
698 | 0 | pixsch = pixClone(pixs); |
699 | 0 | else if (redsearch == 2) |
700 | 0 | pixsch = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0); |
701 | 0 | else if (redsearch == 4) |
702 | 0 | pixsch = pixReduceRankBinaryCascade(pixs, 1, 1, 0, 0); |
703 | 0 | else /* redsearch == 8 */ |
704 | 0 | pixsch = pixReduceRankBinaryCascade(pixs, 1, 1, 2, 0); |
705 | |
|
706 | 0 | pixZero(pixsch, &bzero); |
707 | 0 | if (bzero) { |
708 | 0 | pixDestroy(&pixsch); |
709 | 0 | return 1; |
710 | 0 | } |
711 | | |
712 | | /* Generate reduced image for sweep, if requested */ |
713 | 0 | ratio = redsweep / redsearch; |
714 | 0 | if (ratio == 1) { |
715 | 0 | pixsw = pixClone(pixsch); |
716 | 0 | } else { /* ratio > 1 */ |
717 | 0 | if (ratio == 2) |
718 | 0 | pixsw = pixReduceRankBinaryCascade(pixsch, 1, 0, 0, 0); |
719 | 0 | else if (ratio == 4) |
720 | 0 | pixsw = pixReduceRankBinaryCascade(pixsch, 1, 2, 0, 0); |
721 | 0 | else /* ratio == 8 */ |
722 | 0 | pixsw = pixReduceRankBinaryCascade(pixsch, 1, 2, 2, 0); |
723 | 0 | } |
724 | |
|
725 | 0 | pixt1 = pixCreateTemplate(pixsw); |
726 | 0 | if (ratio == 1) |
727 | 0 | pixt2 = pixClone(pixt1); |
728 | 0 | else |
729 | 0 | pixt2 = pixCreateTemplate(pixsch); |
730 | |
|
731 | 0 | nangles = (l_int32)((2. * sweeprange) / sweepdelta + 1); |
732 | 0 | natheta = numaCreate(nangles); |
733 | 0 | nascore = numaCreate(nangles); |
734 | |
|
735 | 0 | if (!pixsch || !pixsw) { |
736 | 0 | ret = ERROR_INT("pixsch and pixsw not both made", __func__, 1); |
737 | 0 | goto cleanup; |
738 | 0 | } |
739 | 0 | if (!pixt1 || !pixt2) { |
740 | 0 | ret = ERROR_INT("pixt1 and pixt2 not both made", __func__, 1); |
741 | 0 | goto cleanup; |
742 | 0 | } |
743 | 0 | if (!natheta || !nascore) { |
744 | 0 | ret = ERROR_INT("natheta and nascore not both made", __func__, 1); |
745 | 0 | goto cleanup; |
746 | 0 | } |
747 | | |
748 | | /* Do sweep */ |
749 | 0 | rangeleft = sweepcenter - sweeprange; |
750 | 0 | for (i = 0; i < nangles; i++) { |
751 | 0 | theta = rangeleft + i * sweepdelta; /* degrees */ |
752 | | |
753 | | /* Shear pix and put the result in pixt1 */ |
754 | 0 | if (pivot == L_SHEAR_ABOUT_CORNER) |
755 | 0 | pixVShearCorner(pixt1, pixsw, deg2rad * theta, L_BRING_IN_WHITE); |
756 | 0 | else |
757 | 0 | pixVShearCenter(pixt1, pixsw, deg2rad * theta, L_BRING_IN_WHITE); |
758 | | |
759 | | /* Get score */ |
760 | 0 | pixFindDifferentialSquareSum(pixt1, &sum); |
761 | |
|
762 | | #if DEBUG_PRINT_SCORES |
763 | | L_INFO("sum(%7.2f) = %7.0f\n", __func__, theta, sum); |
764 | | #endif /* DEBUG_PRINT_SCORES */ |
765 | | |
766 | | /* Save the result in the output arrays */ |
767 | 0 | numaAddNumber(nascore, sum); |
768 | 0 | numaAddNumber(natheta, theta); |
769 | 0 | } |
770 | | |
771 | | /* Find the largest of the set (maxscore at maxangle) */ |
772 | 0 | numaGetMax(nascore, &maxscore, &maxindex); |
773 | 0 | numaGetFValue(natheta, maxindex, &maxangle); |
774 | |
|
775 | | #if DEBUG_PRINT_SWEEP |
776 | | L_INFO(" From sweep: angle = %7.3f, score = %7.3f\n", __func__, |
777 | | maxangle, maxscore); |
778 | | #endif /* DEBUG_PRINT_SWEEP */ |
779 | |
|
780 | | #if DEBUG_PLOT_SCORES |
781 | | /* Plot the sweep result -- the scores versus rotation angle -- |
782 | | * using gnuplot with GPLOT_LINES (lines connecting data points). */ |
783 | | {GPLOT *gplot; |
784 | | gplot = gplotCreate("sweep_output", GPLOT_PNG, |
785 | | "Sweep. Variance of difference of ON pixels vs. angle", |
786 | | "angle (deg)", "score"); |
787 | | gplotAddPlot(gplot, natheta, nascore, GPLOT_LINES, "plot1"); |
788 | | gplotAddPlot(gplot, natheta, nascore, GPLOT_POINTS, "plot2"); |
789 | | gplotMakeOutput(gplot); |
790 | | gplotDestroy(&gplot); |
791 | | } |
792 | | #endif /* DEBUG_PLOT_SCORES */ |
793 | | |
794 | | /* Check if the max is at the end of the sweep. */ |
795 | 0 | n = numaGetCount(natheta); |
796 | 0 | if (maxindex == 0 || maxindex == n - 1) { |
797 | 0 | L_WARNING("max found at sweep edge\n", __func__); |
798 | 0 | goto cleanup; |
799 | 0 | } |
800 | | |
801 | | /* Empty the numas for re-use */ |
802 | 0 | numaEmpty(nascore); |
803 | 0 | numaEmpty(natheta); |
804 | | |
805 | | /* Do binary search to find skew angle. |
806 | | * First, set up initial three points. */ |
807 | 0 | centerangle = maxangle; |
808 | 0 | if (pivot == L_SHEAR_ABOUT_CORNER) { |
809 | 0 | pixVShearCorner(pixt2, pixsch, deg2rad * centerangle, L_BRING_IN_WHITE); |
810 | 0 | pixFindDifferentialSquareSum(pixt2, &bsearchscore[2]); |
811 | 0 | pixVShearCorner(pixt2, pixsch, deg2rad * (centerangle - sweepdelta), |
812 | 0 | L_BRING_IN_WHITE); |
813 | 0 | pixFindDifferentialSquareSum(pixt2, &bsearchscore[0]); |
814 | 0 | pixVShearCorner(pixt2, pixsch, deg2rad * (centerangle + sweepdelta), |
815 | 0 | L_BRING_IN_WHITE); |
816 | 0 | pixFindDifferentialSquareSum(pixt2, &bsearchscore[4]); |
817 | 0 | } else { |
818 | 0 | pixVShearCenter(pixt2, pixsch, deg2rad * centerangle, L_BRING_IN_WHITE); |
819 | 0 | pixFindDifferentialSquareSum(pixt2, &bsearchscore[2]); |
820 | 0 | pixVShearCenter(pixt2, pixsch, deg2rad * (centerangle - sweepdelta), |
821 | 0 | L_BRING_IN_WHITE); |
822 | 0 | pixFindDifferentialSquareSum(pixt2, &bsearchscore[0]); |
823 | 0 | pixVShearCenter(pixt2, pixsch, deg2rad * (centerangle + sweepdelta), |
824 | 0 | L_BRING_IN_WHITE); |
825 | 0 | pixFindDifferentialSquareSum(pixt2, &bsearchscore[4]); |
826 | 0 | } |
827 | |
|
828 | 0 | numaAddNumber(nascore, bsearchscore[2]); |
829 | 0 | numaAddNumber(natheta, centerangle); |
830 | 0 | numaAddNumber(nascore, bsearchscore[0]); |
831 | 0 | numaAddNumber(natheta, centerangle - sweepdelta); |
832 | 0 | numaAddNumber(nascore, bsearchscore[4]); |
833 | 0 | numaAddNumber(natheta, centerangle + sweepdelta); |
834 | | |
835 | | /* Start the search */ |
836 | 0 | delta = 0.5f * sweepdelta; |
837 | 0 | while (delta >= minbsdelta) |
838 | 0 | { |
839 | | /* Get the left intermediate score */ |
840 | 0 | leftcenterangle = centerangle - delta; |
841 | 0 | if (pivot == L_SHEAR_ABOUT_CORNER) |
842 | 0 | pixVShearCorner(pixt2, pixsch, deg2rad * leftcenterangle, |
843 | 0 | L_BRING_IN_WHITE); |
844 | 0 | else |
845 | 0 | pixVShearCenter(pixt2, pixsch, deg2rad * leftcenterangle, |
846 | 0 | L_BRING_IN_WHITE); |
847 | 0 | pixFindDifferentialSquareSum(pixt2, &bsearchscore[1]); |
848 | 0 | numaAddNumber(nascore, bsearchscore[1]); |
849 | 0 | numaAddNumber(natheta, leftcenterangle); |
850 | | |
851 | | /* Get the right intermediate score */ |
852 | 0 | rightcenterangle = centerangle + delta; |
853 | 0 | if (pivot == L_SHEAR_ABOUT_CORNER) |
854 | 0 | pixVShearCorner(pixt2, pixsch, deg2rad * rightcenterangle, |
855 | 0 | L_BRING_IN_WHITE); |
856 | 0 | else |
857 | 0 | pixVShearCenter(pixt2, pixsch, deg2rad * rightcenterangle, |
858 | 0 | L_BRING_IN_WHITE); |
859 | 0 | pixFindDifferentialSquareSum(pixt2, &bsearchscore[3]); |
860 | 0 | numaAddNumber(nascore, bsearchscore[3]); |
861 | 0 | numaAddNumber(natheta, rightcenterangle); |
862 | | |
863 | | /* Find the maximum of the five scores and its location. |
864 | | * Note that the maximum must be in the center |
865 | | * three values, not in the end two. */ |
866 | 0 | maxscore = bsearchscore[1]; |
867 | 0 | maxindex = 1; |
868 | 0 | for (i = 2; i < 4; i++) { |
869 | 0 | if (bsearchscore[i] > maxscore) { |
870 | 0 | maxscore = bsearchscore[i]; |
871 | 0 | maxindex = i; |
872 | 0 | } |
873 | 0 | } |
874 | | |
875 | | /* Set up score array to interpolate for the next iteration */ |
876 | 0 | lefttemp = bsearchscore[maxindex - 1]; |
877 | 0 | righttemp = bsearchscore[maxindex + 1]; |
878 | 0 | bsearchscore[2] = maxscore; |
879 | 0 | bsearchscore[0] = lefttemp; |
880 | 0 | bsearchscore[4] = righttemp; |
881 | | |
882 | | /* Get new center angle and delta for next iteration */ |
883 | 0 | centerangle = centerangle + delta * (maxindex - 2); |
884 | 0 | delta = 0.5f * delta; |
885 | 0 | } |
886 | 0 | *pangle = centerangle; |
887 | |
|
888 | | #if DEBUG_PRINT_SCORES |
889 | | L_INFO(" Binary search score = %7.3f\n", __func__, bsearchscore[2]); |
890 | | #endif /* DEBUG_PRINT_SCORES */ |
891 | |
|
892 | 0 | if (pendscore) /* save if requested */ |
893 | 0 | *pendscore = bsearchscore[2]; |
894 | | |
895 | | /* Return the ratio of Max score over Min score |
896 | | * as a confidence value. Don't trust if the Min score |
897 | | * is too small, which can happen if the image is all black |
898 | | * with only a few white pixels interspersed. In that case, |
899 | | * we get a contribution from the top and bottom edges when |
900 | | * vertically sheared, but this contribution becomes zero when |
901 | | * the shear angle is zero. For zero shear angle, the only |
902 | | * contribution will be from the white pixels. We expect that |
903 | | * the signal goes as the product of the (height * width^2), |
904 | | * so we compute a (hopefully) normalized minimum threshold as |
905 | | * a function of these dimensions. */ |
906 | 0 | numaGetMin(nascore, &minscore, &minloc); |
907 | 0 | width = pixGetWidth(pixsch); |
908 | 0 | height = pixGetHeight(pixsch); |
909 | 0 | minthresh = MinscoreThreshFactor * width * width * height; |
910 | |
|
911 | | #if DEBUG_THRESHOLD |
912 | | L_INFO(" minthresh = %10.2f, minscore = %10.2f\n", __func__, |
913 | | minthresh, minscore); |
914 | | L_INFO(" maxscore = %10.2f\n", __func__, maxscore); |
915 | | #endif /* DEBUG_THRESHOLD */ |
916 | |
|
917 | 0 | if (minscore > minthresh) |
918 | 0 | *pconf = maxscore / minscore; |
919 | 0 | else |
920 | 0 | *pconf = 0.0; |
921 | | |
922 | | /* Don't trust it if too close to the edge of the sweep |
923 | | * range or if maxscore is small */ |
924 | 0 | if ((centerangle > rangeleft + 2 * sweeprange - sweepdelta) || |
925 | 0 | (centerangle < rangeleft + sweepdelta) || |
926 | 0 | (maxscore < MinValidMaxscore)) |
927 | 0 | *pconf = 0.0; |
928 | |
|
929 | | #if DEBUG_PRINT_BINARY |
930 | | lept_stderr("Binary search: angle = %7.3f, score ratio = %6.2f\n", |
931 | | *pangle, *pconf); |
932 | | lept_stderr(" max score = %8.0f\n", maxscore); |
933 | | #endif /* DEBUG_PRINT_BINARY */ |
934 | |
|
935 | | #if DEBUG_PLOT_SCORES |
936 | | /* Plot the result -- the scores versus rotation angle -- |
937 | | * using gnuplot with GPLOT_POINTS. Because the data |
938 | | * points are not ordered by theta (increasing or decreasing), |
939 | | * using GPLOT_LINES would be confusing! */ |
940 | | {GPLOT *gplot; |
941 | | gplot = gplotCreate("search_output", GPLOT_PNG, |
942 | | "Binary search. Variance of difference of ON pixels vs. angle", |
943 | | "angle (deg)", "score"); |
944 | | gplotAddPlot(gplot, natheta, nascore, GPLOT_POINTS, "plot1"); |
945 | | gplotMakeOutput(gplot); |
946 | | gplotDestroy(&gplot); |
947 | | } |
948 | | #endif /* DEBUG_PLOT_SCORES */ |
949 | |
|
950 | 0 | cleanup: |
951 | 0 | pixDestroy(&pixsw); |
952 | 0 | pixDestroy(&pixsch); |
953 | 0 | pixDestroy(&pixt1); |
954 | 0 | pixDestroy(&pixt2); |
955 | 0 | numaDestroy(&nascore); |
956 | 0 | numaDestroy(&natheta); |
957 | 0 | return ret; |
958 | 0 | } |
959 | | |
960 | | |
961 | | /*---------------------------------------------------------------------* |
962 | | * Search over arbitrary range of angles in orthogonal directions * |
963 | | *---------------------------------------------------------------------*/ |
964 | | /* |
965 | | * \brief pixFindSkewOrthogonalRange() |
966 | | * |
967 | | * \param[in] pixs 1 bpp |
968 | | * \param[out] pangle angle required to deskew; in degrees cw |
969 | | * \param[out] pconf confidence given by ratio of max/min score |
970 | | * \param[in] redsweep sweep reduction factor = 1, 2, 4 or 8 |
971 | | * \param[in] redsearch binary search reduction factor = 1, 2, 4 or 8; |
972 | | * and must not exceed redsweep |
973 | | * \param[in] sweeprange half the full range in each orthogonal |
974 | | * direction, taken about 0, in degrees |
975 | | * \param[in] sweepdelta angle increment of sweep; in degrees |
976 | | * \param[in] minbsdelta min binary search increment angle; in degrees |
977 | | * \param[in] confprior amount by which confidence of 90 degree rotated |
978 | | * result is reduced when comparing with unrotated |
979 | | * confidence value |
980 | | * \return 0 if OK, 1 on error or if angle measurement not valid |
981 | | * |
982 | | * <pre> |
983 | | * Notes: |
984 | | * (1) This searches for the skew angle, first in the range |
985 | | * [-sweeprange, sweeprange], and then in |
986 | | * [90 - sweeprange, 90 + sweeprange], with angles measured |
987 | | * clockwise. For exploring the full range of possibilities, |
988 | | * suggest using sweeprange = 47.0 degrees, giving some overlap |
989 | | * at 45 and 135 degrees. From these results, and discounting |
990 | | * the the second confidence by %confprior, it selects the |
991 | | * angle for maximal differential variance. If the angle |
992 | | * is larger than pi/4, the angle found after 90 degree rotation |
993 | | * is selected. |
994 | | * (2) The larger the confidence value, the greater the probability |
995 | | * that the proper alignment is given by the angle that maximizes |
996 | | * variance. It should be compared to a threshold, which depends |
997 | | * on the application. Values between 3.0 and 6.0 are common. |
998 | | * (3) Allowing for both portrait and landscape searches is more |
999 | | * difficult, because if the signal from the text lines is weak, |
1000 | | * a signal from vertical rules can be larger! |
1001 | | * The most difficult documents to deskew have some or all of: |
1002 | | * (a) Multiple columns, not aligned |
1003 | | * (b) Black lines along the vertical edges |
1004 | | * (c) Text from two pages, and at different angles |
1005 | | * Rule of thumb for resolution: |
1006 | | * (a) If the margins are clean, you can work at 75 ppi, |
1007 | | * although 100 ppi is safer. |
1008 | | * (b) If there are vertical lines in the margins, do not |
1009 | | * work below 150 ppi. The signal from the text lines must |
1010 | | * exceed that from the margin lines. |
1011 | | * (4) Choosing the %confprior parameter depends on knowing something |
1012 | | * about the source of image. However, we're not using |
1013 | | * real probabilities here, so its use is qualitative. |
1014 | | * If landscape and portrait are equally likely, use |
1015 | | * %confprior = 0.0. If the likelihood of portrait (non-rotated) |
1016 | | * is 100 times higher than that of landscape, we want to reduce |
1017 | | * the chance that we rotate to landscape in a situation where |
1018 | | * the landscape signal is accidentally larger than the |
1019 | | * portrait signal. To do this use a positive value of |
1020 | | * %confprior; say 1.5. |
1021 | | * </pre> |
1022 | | */ |
1023 | | l_int32 |
1024 | | pixFindSkewOrthogonalRange(PIX *pixs, |
1025 | | l_float32 *pangle, |
1026 | | l_float32 *pconf, |
1027 | | l_int32 redsweep, |
1028 | | l_int32 redsearch, |
1029 | | l_float32 sweeprange, |
1030 | | l_float32 sweepdelta, |
1031 | | l_float32 minbsdelta, |
1032 | | l_float32 confprior) |
1033 | 0 | { |
1034 | 0 | l_float32 angle1, conf1, score1, angle2, conf2, score2; |
1035 | 0 | PIX *pixr; |
1036 | |
|
1037 | 0 | if (pangle) *pangle = 0.0; |
1038 | 0 | if (pconf) *pconf = 0.0; |
1039 | 0 | if (!pangle || !pconf) |
1040 | 0 | return ERROR_INT("&angle and/or &conf not defined", __func__, 1); |
1041 | 0 | if (!pixs || pixGetDepth(pixs) != 1) |
1042 | 0 | return ERROR_INT("pixs not defined or not 1 bpp", __func__, 1); |
1043 | | |
1044 | 0 | pixFindSkewSweepAndSearchScorePivot(pixs, &angle1, &conf1, &score1, |
1045 | 0 | redsweep, redsearch, 0.0, |
1046 | 0 | sweeprange, sweepdelta, minbsdelta, |
1047 | 0 | L_SHEAR_ABOUT_CORNER); |
1048 | 0 | pixr = pixRotateOrth(pixs, 1); |
1049 | 0 | pixFindSkewSweepAndSearchScorePivot(pixr, &angle2, &conf2, &score2, |
1050 | 0 | redsweep, redsearch, 0.0, |
1051 | 0 | sweeprange, sweepdelta, minbsdelta, |
1052 | 0 | L_SHEAR_ABOUT_CORNER); |
1053 | 0 | pixDestroy(&pixr); |
1054 | |
|
1055 | 0 | if (conf1 > conf2 - confprior) { |
1056 | 0 | *pangle = angle1; |
1057 | 0 | *pconf = conf1; |
1058 | 0 | } else { |
1059 | 0 | *pangle = -90.0f + angle2; |
1060 | 0 | *pconf = conf2; |
1061 | 0 | } |
1062 | |
|
1063 | | #if DEBUG_PRINT_ORTH |
1064 | | lept_stderr(" About 0: angle1 = %7.3f, conf1 = %7.3f, score1 = %f\n", |
1065 | | angle1, conf1, score1); |
1066 | | lept_stderr(" About 90: angle2 = %7.3f, conf2 = %7.3f, score2 = %f\n", |
1067 | | angle2, conf2, score2); |
1068 | | lept_stderr(" Final: angle = %7.3f, conf = %7.3f\n", *pangle, *pconf); |
1069 | | #endif /* DEBUG_PRINT_ORTH */ |
1070 | |
|
1071 | 0 | return 0; |
1072 | 0 | } |
1073 | | |
1074 | | |
1075 | | |
1076 | | /*----------------------------------------------------------------* |
1077 | | * Differential square sum function * |
1078 | | *----------------------------------------------------------------*/ |
1079 | | /*! |
1080 | | * \brief pixFindDifferentialSquareSum() |
1081 | | * |
1082 | | * \param[in] pixs |
1083 | | * \param[out] psum result |
1084 | | * \return 0 if OK, 1 on error |
1085 | | * |
1086 | | * <pre> |
1087 | | * Notes: |
1088 | | * (1) At the top and bottom, we skip: |
1089 | | * ~ at least one scanline |
1090 | | * ~ not more than 10% of the image height |
1091 | | * ~ not more than 5% of the image width |
1092 | | * </pre> |
1093 | | */ |
1094 | | l_ok |
1095 | | pixFindDifferentialSquareSum(PIX *pixs, |
1096 | | l_float32 *psum) |
1097 | 0 | { |
1098 | 0 | l_int32 i, n; |
1099 | 0 | l_int32 w, h, skiph, skip, nskip; |
1100 | 0 | l_float32 val1, val2, diff, sum; |
1101 | 0 | NUMA *na; |
1102 | |
|
1103 | 0 | if (!psum) |
1104 | 0 | return ERROR_INT("&sum not defined", __func__, 1); |
1105 | 0 | *psum = 0.0; |
1106 | 0 | if (!pixs) |
1107 | 0 | return ERROR_INT("pixs not defined", __func__, 1); |
1108 | | |
1109 | | /* Generate a number array consisting of the sum |
1110 | | * of pixels in each row of pixs */ |
1111 | 0 | if ((na = pixCountPixelsByRow(pixs, NULL)) == NULL) |
1112 | 0 | return ERROR_INT("na not made", __func__, 1); |
1113 | | |
1114 | | /* Compute the number of rows at top and bottom to omit. |
1115 | | * We omit these to avoid getting a spurious signal from |
1116 | | * the top and bottom of a (nearly) all black image. */ |
1117 | 0 | w = pixGetWidth(pixs); |
1118 | 0 | h = pixGetHeight(pixs); |
1119 | 0 | skiph = (l_int32)(0.05 * w); /* skip for max shear of 0.025 radians */ |
1120 | 0 | skip = L_MIN(h / 10, skiph); /* don't remove more than 10% of image */ |
1121 | 0 | nskip = L_MAX(skip / 2, 1); /* at top & bot; skip at least one line */ |
1122 | | |
1123 | | /* Sum the squares of differential row sums, on the |
1124 | | * allowed rows. Note that nskip must be >= 1. */ |
1125 | 0 | n = numaGetCount(na); |
1126 | 0 | sum = 0.0; |
1127 | 0 | for (i = nskip; i < n - nskip; i++) { |
1128 | 0 | numaGetFValue(na, i - 1, &val1); |
1129 | 0 | numaGetFValue(na, i, &val2); |
1130 | 0 | diff = val2 - val1; |
1131 | 0 | sum += diff * diff; |
1132 | 0 | } |
1133 | 0 | numaDestroy(&na); |
1134 | 0 | *psum = sum; |
1135 | 0 | return 0; |
1136 | 0 | } |
1137 | | |
1138 | | |
1139 | | /*----------------------------------------------------------------* |
1140 | | * Normalized square sum * |
1141 | | *----------------------------------------------------------------*/ |
1142 | | /*! |
1143 | | * \brief pixFindNormalizedSquareSum() |
1144 | | * |
1145 | | * \param[in] pixs |
1146 | | * \param[out] phratio [optional] ratio of normalized horiz square sum |
1147 | | * to result if the pixel distribution were uniform |
1148 | | * \param[out] pvratio [optional] ratio of normalized vert square sum |
1149 | | * to result if the pixel distribution were uniform |
1150 | | * \param[out] pfract [optional] ratio of fg pixels to total pixels |
1151 | | * \return 0 if OK, 1 on error or if there are no fg pixels |
1152 | | * |
1153 | | * <pre> |
1154 | | * Notes: |
1155 | | * (1) Let the image have h scanlines and N fg pixels. |
1156 | | * If the pixels were uniformly distributed on scanlines, |
1157 | | * the sum of squares of fg pixels on each scanline would be |
1158 | | * h * (N / h)^2. However, if the pixels are not uniformly |
1159 | | * distributed (e.g., for text), the sum of squares of fg |
1160 | | * pixels will be larger. We return in hratio and vratio the |
1161 | | * ratio of these two values. |
1162 | | * (2) If there are no fg pixels, hratio and vratio are returned as 0.0. |
1163 | | * </pre> |
1164 | | */ |
1165 | | l_ok |
1166 | | pixFindNormalizedSquareSum(PIX *pixs, |
1167 | | l_float32 *phratio, |
1168 | | l_float32 *pvratio, |
1169 | | l_float32 *pfract) |
1170 | 0 | { |
1171 | 0 | l_int32 i, w, h, empty; |
1172 | 0 | l_float32 sum, sumsq, uniform, val; |
1173 | 0 | NUMA *na; |
1174 | 0 | PIX *pixt; |
1175 | |
|
1176 | 0 | if (phratio) *phratio = 0.0; |
1177 | 0 | if (pvratio) *pvratio = 0.0; |
1178 | 0 | if (pfract) *pfract = 0.0; |
1179 | 0 | if (!phratio && !pvratio) |
1180 | 0 | return ERROR_INT("nothing to do", __func__, 1); |
1181 | 0 | if (!pixs || pixGetDepth(pixs) != 1) |
1182 | 0 | return ERROR_INT("pixs not defined or not 1 bpp", __func__, 1); |
1183 | 0 | pixGetDimensions(pixs, &w, &h, NULL); |
1184 | |
|
1185 | 0 | empty = 0; |
1186 | 0 | if (phratio) { |
1187 | 0 | na = pixCountPixelsByRow(pixs, NULL); |
1188 | 0 | numaGetSum(na, &sum); /* fg pixels */ |
1189 | 0 | if (pfract) *pfract = sum / (l_float32)(w * h); |
1190 | 0 | if (sum != 0.0) { |
1191 | 0 | uniform = sum * sum / h; /* h*(sum / h)^2 */ |
1192 | 0 | sumsq = 0.0; |
1193 | 0 | for (i = 0; i < h; i++) { |
1194 | 0 | numaGetFValue(na, i, &val); |
1195 | 0 | sumsq += val * val; |
1196 | 0 | } |
1197 | 0 | *phratio = sumsq / uniform; |
1198 | 0 | } else { |
1199 | 0 | empty = 1; |
1200 | 0 | } |
1201 | 0 | numaDestroy(&na); |
1202 | 0 | } |
1203 | |
|
1204 | 0 | if (pvratio) { |
1205 | 0 | if (empty == 1) return 1; |
1206 | 0 | pixt = pixRotateOrth(pixs, 1); |
1207 | 0 | na = pixCountPixelsByRow(pixt, NULL); |
1208 | 0 | numaGetSum(na, &sum); |
1209 | 0 | if (pfract) *pfract = sum / (l_float32)(w * h); |
1210 | 0 | if (sum != 0.0) { |
1211 | 0 | uniform = sum * sum / w; |
1212 | 0 | sumsq = 0.0; |
1213 | 0 | for (i = 0; i < w; i++) { |
1214 | 0 | numaGetFValue(na, i, &val); |
1215 | 0 | sumsq += val * val; |
1216 | 0 | } |
1217 | 0 | *pvratio = sumsq / uniform; |
1218 | 0 | } else { |
1219 | 0 | empty = 1; |
1220 | 0 | } |
1221 | 0 | pixDestroy(&pixt); |
1222 | 0 | numaDestroy(&na); |
1223 | 0 | } |
1224 | | |
1225 | 0 | return empty; |
1226 | 0 | } |