/src/leptonica/src/skew.c
Line | Count | Source (jump to first uncovered line) |
1 | | /*====================================================================* |
2 | | - Copyright (C) 2001 Leptonica. All rights reserved. |
3 | | - |
4 | | - Redistribution and use in source and binary forms, with or without |
5 | | - modification, are permitted provided that the following conditions |
6 | | - are met: |
7 | | - 1. Redistributions of source code must retain the above copyright |
8 | | - notice, this list of conditions and the following disclaimer. |
9 | | - 2. Redistributions in binary form must reproduce the above |
10 | | - copyright notice, this list of conditions and the following |
11 | | - disclaimer in the documentation and/or other materials |
12 | | - provided with the distribution. |
13 | | - |
14 | | - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
15 | | - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
16 | | - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
17 | | - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY |
18 | | - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
19 | | - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
20 | | - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
21 | | - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
22 | | - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
23 | | - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
24 | | - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
25 | | *====================================================================*/ |
26 | | |
27 | | /*! |
28 | | * \file skew.c |
29 | | * <pre> |
30 | | * |
31 | | * Top-level deskew interfaces |
32 | | * PIX *pixDeskewBoth() |
33 | | * PIX *pixDeskew() |
34 | | * PIX *pixFindSkewAndDeskew() |
35 | | * PIX *pixDeskewGeneral() |
36 | | * |
37 | | * Top-level angle-finding interface |
38 | | * l_int32 pixFindSkew() |
39 | | * |
40 | | * Basic angle-finding functions |
41 | | * l_int32 pixFindSkewSweep() |
42 | | * l_int32 pixFindSkewSweepAndSearch() |
43 | | * l_int32 pixFindSkewSweepAndSearchScore() |
44 | | * l_int32 pixFindSkewSweepAndSearchScorePivot() |
45 | | * |
46 | | * Search over arbitrary range of angles in orthogonal directions |
47 | | * l_int32 pixFindSkewOrthogonalRange() |
48 | | * |
49 | | * Differential square sum function for scoring |
50 | | * l_int32 pixFindDifferentialSquareSum() |
51 | | * |
52 | | * Measures of variance of row sums |
53 | | * l_int32 pixFindNormalizedSquareSum() |
54 | | * |
55 | | * |
56 | | * ============================================================== |
57 | | * Page skew detection |
58 | | * |
59 | | * Skew is determined by pixel profiles, which are computed |
60 | | * as pixel sums along the raster line for each line in the |
61 | | * image. By vertically shearing the image by a given angle, |
62 | | * the sums can be computed quickly along the raster lines |
63 | | * rather than along lines at that angle. The score is |
64 | | * computed from these line sums by taking the square of |
65 | | * the DIFFERENCE between adjacent line sums, summed over |
66 | | * all lines. The skew angle is then found as the angle |
67 | | * that maximizes the score. The actual computation for |
68 | | * any sheared image is done in the function |
69 | | * pixFindDifferentialSquareSum(). |
70 | | * |
71 | | * The search for the angle that maximizes this score is |
72 | | * most efficiently performed by first sweeping coarsely |
73 | | * over angles, using a significantly reduced image (say, 4x |
74 | | * reduction), to find the approximate maximum within a half |
75 | | * degree or so, and then doing an interval-halving binary |
76 | | * search at higher resolution to get the skew angle to |
77 | | * within 1/20 degree or better. |
78 | | * |
79 | | * The differential signal is used (rather than just using |
80 | | * that variance of line sums) because it rejects the |
81 | | * background noise due to total number of black pixels, |
82 | | * and has maximum contributions from the baselines and |
83 | | * x-height lines of text when the textlines are aligned |
84 | | * with the raster lines. It also works well in multicolumn |
85 | | * pages where the textlines do not line up across columns. |
86 | | * |
87 | | * The method is fast, accurate to within an angle (in radians) |
88 | | * of approximately the inverse width in pixels of the image, |
89 | | * and will work on a surprisingly small amount of text data |
90 | | * (just a couple of text lines). Consequently, it can |
91 | | * also be used to find local skew if the skew were to vary |
92 | | * significantly over the page. Local skew determination |
93 | | * is not very important except for locating lines of |
94 | | * handwritten text that may be mixed with printed text. |
95 | | * </pre> |
96 | | */ |
97 | | |
98 | | #ifdef HAVE_CONFIG_H |
99 | | #include <config_auto.h> |
100 | | #endif /* HAVE_CONFIG_H */ |
101 | | |
102 | | #include <math.h> |
103 | | #include "allheaders.h" |
104 | | |
105 | | /* Default sweep angle parameters for pixFindSkew() */ |
106 | | static const l_float32 DefaultSweepRange = 7.0; /* degrees */ |
107 | | static const l_float32 DefaultSweepDelta = 1.0; /* degrees */ |
108 | | |
109 | | /* Default final angle difference parameter for binary |
110 | | * search in pixFindSkew(). The expected accuracy is |
111 | | * not better than the inverse image width in pixels, |
112 | | * say, 1/2000 radians, or about 0.03 degrees. */ |
113 | | static const l_float32 DefaultMinbsDelta = 0.01f; /* degrees */ |
114 | | |
115 | | /* Default scale factors for pixFindSkew() */ |
116 | | static const l_int32 DefaultSweepReduction = 4; /* sweep part; 4 is good */ |
117 | | static const l_int32 DefaultBsReduction = 2; /* binary search part */ |
118 | | |
119 | | /* Minimum angle for deskewing in pixDeskew() */ |
120 | | static const l_float32 MinDeskewAngle = 0.1f; /* degree */ |
121 | | |
122 | | /* Minimum allowed confidence (ratio) for deskewing in pixDeskew() */ |
123 | | static const l_float32 MinAllowedConfidence = 3.0; |
124 | | |
125 | | /* Minimum allowed maxscore to give nonzero confidence */ |
126 | | static const l_int32 MinValidMaxscore = 10000; |
127 | | |
128 | | /* Constant setting threshold for minimum allowed minscore |
129 | | * to give nonzero confidence; multiply this constant by |
130 | | * (height * width^2) */ |
131 | | static const l_float32 MinscoreThreshFactor = 0.000002f; |
132 | | |
133 | | /* Default binarization threshold value. |
134 | | * This is set deliberately above 130 to capture light foreground |
135 | | * with poor printing or images that are out of focus. */ |
136 | | static const l_int32 DefaultBinaryThreshold = 160; |
137 | | |
138 | | #ifndef NO_CONSOLE_IO |
139 | | #define DEBUG_PRINT_SCORES 0 |
140 | | #define DEBUG_PRINT_SWEEP 0 |
141 | | #define DEBUG_PRINT_BINARY 0 |
142 | | #define DEBUG_PRINT_ORTH 0 |
143 | | #define DEBUG_THRESHOLD 0 |
144 | | #define DEBUG_PLOT_SCORES 0 /* requires the gnuplot executable */ |
145 | | #endif /* ~NO_CONSOLE_IO */ |
146 | | |
147 | | |
148 | | |
149 | | /*-----------------------------------------------------------------------* |
150 | | * Top-level deskew interfaces * |
151 | | *-----------------------------------------------------------------------*/ |
152 | | /*! |
153 | | * \brief pixDeskewBoth() |
154 | | * |
155 | | * \param[in] pixs any depth |
156 | | * \param[in] redsearch for binary search: reduction factor = 1, 2 or 4; |
157 | | * use 0 for default |
158 | | * \return pixd deskewed pix, or NULL on error |
159 | | * |
160 | | * <pre> |
161 | | * Notes: |
162 | | * (1) This binarizes if necessary and does both horizontal |
163 | | * and vertical deskewing, using the default parameters in |
164 | | * the underlying pixDeskew(). See usage there. |
165 | | * </pre> |
166 | | */ |
167 | | PIX * |
168 | | pixDeskewBoth(PIX *pixs, |
169 | | l_int32 redsearch) |
170 | 0 | { |
171 | 0 | PIX *pix1, *pix2, *pix3, *pix4; |
172 | |
|
173 | 0 | if (!pixs) |
174 | 0 | return (PIX *)ERROR_PTR("pixs not defined", __func__, NULL); |
175 | 0 | if (redsearch == 0) |
176 | 0 | redsearch = DefaultBsReduction; |
177 | 0 | else if (redsearch != 1 && redsearch != 2 && redsearch != 4) |
178 | 0 | return (PIX *)ERROR_PTR("redsearch not in {1,2,4}", __func__, NULL); |
179 | | |
180 | 0 | pix1 = pixDeskew(pixs, redsearch); |
181 | 0 | pix2 = pixRotate90(pix1, 1); |
182 | 0 | pix3 = pixDeskew(pix2, redsearch); |
183 | 0 | pix4 = pixRotate90(pix3, -1); |
184 | 0 | pixDestroy(&pix1); |
185 | 0 | pixDestroy(&pix2); |
186 | 0 | pixDestroy(&pix3); |
187 | 0 | return pix4; |
188 | 0 | } |
189 | | |
190 | | |
191 | | /*! |
192 | | * \brief pixDeskew() |
193 | | * |
194 | | * \param[in] pixs any depth |
195 | | * \param[in] redsearch for binary search: reduction factor = 1, 2 or 4; |
196 | | * use 0 for default |
197 | | * \return pixd deskewed pix, or NULL on error |
198 | | * |
199 | | * <pre> |
200 | | * Notes: |
201 | | * (1) This binarizes if necessary and finds the skew angle. If the |
202 | | * angle is large enough and there is sufficient confidence, |
203 | | * it returns a deskewed image; otherwise, it returns a clone. |
204 | | * (2) Typical values at 300 ppi for %redsearch are 2 and 4. |
205 | | * At 75 ppi, one should use %redsearch = 1. |
206 | | * </pre> |
207 | | */ |
208 | | PIX * |
209 | | pixDeskew(PIX *pixs, |
210 | | l_int32 redsearch) |
211 | 0 | { |
212 | 0 | if (!pixs) |
213 | 0 | return (PIX *)ERROR_PTR("pixs not defined", __func__, NULL); |
214 | 0 | if (redsearch == 0) |
215 | 0 | redsearch = DefaultBsReduction; |
216 | 0 | else if (redsearch != 1 && redsearch != 2 && redsearch != 4) |
217 | 0 | return (PIX *)ERROR_PTR("redsearch not in {1,2,4}", __func__, NULL); |
218 | | |
219 | 0 | return pixDeskewGeneral(pixs, 0, 0.0, 0.0, redsearch, 0, NULL, NULL); |
220 | 0 | } |
221 | | |
222 | | |
223 | | /*! |
224 | | * \brief pixFindSkewAndDeskew() |
225 | | * |
226 | | * \param[in] pixs any depth |
227 | | * \param[in] redsearch for binary search: reduction factor = 1, 2 or 4; |
228 | | * use 0 for default |
229 | | * \param[out] pangle [optional] angle required to deskew, |
230 | | * in degrees; use NULL to skip |
231 | | * \param[out] pconf [optional] conf value is ratio |
232 | | * of max/min scores; use NULL to skip |
233 | | * \return pixd deskewed pix, or NULL on error |
234 | | * |
235 | | * <pre> |
236 | | * Notes: |
237 | | * (1) This binarizes if necessary and finds the skew angle. If the |
238 | | * angle is large enough and there is sufficient confidence, |
239 | | * it returns a deskewed image; otherwise, it returns a clone. |
240 | | * </pre> |
241 | | */ |
242 | | PIX * |
243 | | pixFindSkewAndDeskew(PIX *pixs, |
244 | | l_int32 redsearch, |
245 | | l_float32 *pangle, |
246 | | l_float32 *pconf) |
247 | 0 | { |
248 | 0 | if (!pixs) |
249 | 0 | return (PIX *)ERROR_PTR("pixs not defined", __func__, NULL); |
250 | 0 | if (redsearch == 0) |
251 | 0 | redsearch = DefaultBsReduction; |
252 | 0 | else if (redsearch != 1 && redsearch != 2 && redsearch != 4) |
253 | 0 | return (PIX *)ERROR_PTR("redsearch not in {1,2,4}", __func__, NULL); |
254 | | |
255 | 0 | return pixDeskewGeneral(pixs, 0, 0.0, 0.0, redsearch, 0, pangle, pconf); |
256 | 0 | } |
257 | | |
258 | | |
259 | | /*! |
260 | | * \brief pixDeskewGeneral() |
261 | | * |
262 | | * \param[in] pixs any depth |
263 | | * \param[in] redsweep for linear search: reduction factor = 1, 2 or 4; |
264 | | * use 0 for default |
265 | | * \param[in] sweeprange in degrees in each direction from 0; |
266 | | * use 0.0 for default |
267 | | * \param[in] sweepdelta in degrees; use 0.0 for default |
268 | | * \param[in] redsearch for binary search: reduction factor = 1, 2 or 4; |
269 | | * use 0 for default; |
270 | | * \param[in] thresh for binarizing the image; use 0 for default |
271 | | * \param[out] pangle [optional] angle required to deskew, |
272 | | * in degrees; use NULL to skip |
273 | | * \param[out] pconf [optional] conf value is ratio |
274 | | * of max/min scores; use NULL to skip |
275 | | * \return pixd deskewed pix, or NULL on error |
276 | | * |
277 | | * <pre> |
278 | | * Notes: |
279 | | * (1) This binarizes if necessary and finds the skew angle. If the |
280 | | * angle is large enough and there is sufficient confidence, |
281 | | * it returns a deskewed image; otherwise, it returns a clone. |
282 | | * </pre> |
283 | | */ |
284 | | PIX * |
285 | | pixDeskewGeneral(PIX *pixs, |
286 | | l_int32 redsweep, |
287 | | l_float32 sweeprange, |
288 | | l_float32 sweepdelta, |
289 | | l_int32 redsearch, |
290 | | l_int32 thresh, |
291 | | l_float32 *pangle, |
292 | | l_float32 *pconf) |
293 | 0 | { |
294 | 0 | l_int32 ret, depth; |
295 | 0 | l_float32 angle, conf, deg2rad; |
296 | 0 | PIX *pixb, *pixd; |
297 | |
|
298 | 0 | if (pangle) *pangle = 0.0; |
299 | 0 | if (pconf) *pconf = 0.0; |
300 | 0 | if (!pixs) |
301 | 0 | return (PIX *)ERROR_PTR("pixs not defined", __func__, NULL); |
302 | 0 | if (redsweep == 0) |
303 | 0 | redsweep = DefaultSweepReduction; |
304 | 0 | else if (redsweep != 1 && redsweep != 2 && redsweep != 4) |
305 | 0 | return (PIX *)ERROR_PTR("redsweep not in {1,2,4}", __func__, NULL); |
306 | 0 | if (sweeprange == 0.0) |
307 | 0 | sweeprange = DefaultSweepRange; |
308 | 0 | if (sweepdelta == 0.0) |
309 | 0 | sweepdelta = DefaultSweepDelta; |
310 | 0 | if (redsearch == 0) |
311 | 0 | redsearch = DefaultBsReduction; |
312 | 0 | else if (redsearch != 1 && redsearch != 2 && redsearch != 4) |
313 | 0 | return (PIX *)ERROR_PTR("redsearch not in {1,2,4}", __func__, NULL); |
314 | 0 | if (thresh == 0) |
315 | 0 | thresh = DefaultBinaryThreshold; |
316 | |
|
317 | 0 | deg2rad = 3.1415926535f / 180.f; |
318 | | |
319 | | /* Binarize if necessary */ |
320 | 0 | depth = pixGetDepth(pixs); |
321 | 0 | if (depth == 1) |
322 | 0 | pixb = pixClone(pixs); |
323 | 0 | else |
324 | 0 | pixb = pixConvertTo1(pixs, thresh); |
325 | | |
326 | | /* Use the 1 bpp image to find the skew */ |
327 | 0 | ret = pixFindSkewSweepAndSearch(pixb, &angle, &conf, redsweep, redsearch, |
328 | 0 | sweeprange, sweepdelta, |
329 | 0 | DefaultMinbsDelta); |
330 | 0 | pixDestroy(&pixb); |
331 | 0 | if (pangle) *pangle = angle; |
332 | 0 | if (pconf) *pconf = conf; |
333 | 0 | if (ret) |
334 | 0 | return pixClone(pixs); |
335 | | |
336 | 0 | if (L_ABS(angle) < MinDeskewAngle || conf < MinAllowedConfidence) |
337 | 0 | return pixClone(pixs); |
338 | | |
339 | 0 | if ((pixd = pixRotate(pixs, deg2rad * angle, L_ROTATE_AREA_MAP, |
340 | 0 | L_BRING_IN_WHITE, 0, 0)) == NULL) |
341 | 0 | return pixClone(pixs); |
342 | 0 | else |
343 | 0 | return pixd; |
344 | 0 | } |
345 | | |
346 | | |
347 | | /*-----------------------------------------------------------------------* |
348 | | * Simple top-level angle-finding interface * |
349 | | *-----------------------------------------------------------------------*/ |
350 | | /*! |
351 | | * \brief pixFindSkew() |
352 | | * |
353 | | * \param[in] pixs 1 bpp |
354 | | * \param[out] pangle angle required to deskew, in degrees |
355 | | * \param[out] pconf confidence value is ratio max/min scores |
356 | | * \return 0 if OK, 1 on error or if angle measurement not valid |
357 | | * |
358 | | * <pre> |
359 | | * Notes: |
360 | | * (1) This is a simple high-level interface, that uses default |
361 | | * values of the parameters for reasonable speed and accuracy. |
362 | | * (2) The angle returned is the negative of the skew angle of |
363 | | * the image. It is the angle required for deskew. |
364 | | * Clockwise rotations are positive angles. |
365 | | * </pre> |
366 | | */ |
367 | | l_ok |
368 | | pixFindSkew(PIX *pixs, |
369 | | l_float32 *pangle, |
370 | | l_float32 *pconf) |
371 | 0 | { |
372 | 0 | if (pangle) *pangle = 0.0; |
373 | 0 | if (pconf) *pconf = 0.0; |
374 | 0 | if (!pangle || !pconf) |
375 | 0 | return ERROR_INT("&angle and/or &conf not defined", __func__, 1); |
376 | 0 | if (!pixs) |
377 | 0 | return ERROR_INT("pixs not defined", __func__, 1); |
378 | 0 | if (pixGetDepth(pixs) != 1) |
379 | 0 | return ERROR_INT("pixs not 1 bpp", __func__, 1); |
380 | | |
381 | 0 | return pixFindSkewSweepAndSearch(pixs, pangle, pconf, |
382 | 0 | DefaultSweepReduction, |
383 | 0 | DefaultBsReduction, |
384 | 0 | DefaultSweepRange, |
385 | 0 | DefaultSweepDelta, |
386 | 0 | DefaultMinbsDelta); |
387 | 0 | } |
388 | | |
389 | | |
390 | | /*-----------------------------------------------------------------------* |
391 | | * Basic angle-finding functions * |
392 | | *-----------------------------------------------------------------------*/ |
393 | | /*! |
394 | | * \brief pixFindSkewSweep() |
395 | | * |
396 | | * \param[in] pixs 1 bpp |
397 | | * \param[out] pangle angle required to deskew, in degrees |
398 | | * \param[in] reduction factor = 1, 2, 4 or 8 |
399 | | * \param[in] sweeprange half the full range; assumed about 0; in degrees |
400 | | * \param[in] sweepdelta angle increment of sweep; in degrees |
401 | | * \return 0 if OK, 1 on error or if angle measurement not valid |
402 | | * |
403 | | * <pre> |
404 | | * Notes: |
405 | | * (1) This examines the 'score' for skew angles with equal intervals. |
406 | | * (2) Caller must check the return value for validity of the result. |
407 | | * </pre> |
408 | | */ |
409 | | l_ok |
410 | | pixFindSkewSweep(PIX *pixs, |
411 | | l_float32 *pangle, |
412 | | l_int32 reduction, |
413 | | l_float32 sweeprange, |
414 | | l_float32 sweepdelta) |
415 | 0 | { |
416 | 0 | l_int32 ret, bzero, i, nangles; |
417 | 0 | l_float32 deg2rad, theta; |
418 | 0 | l_float32 sum, maxscore, maxangle; |
419 | 0 | NUMA *natheta, *nascore; |
420 | 0 | PIX *pix, *pixt; |
421 | |
|
422 | 0 | if (!pangle) |
423 | 0 | return ERROR_INT("&angle not defined", __func__, 1); |
424 | 0 | *pangle = 0.0; |
425 | 0 | if (!pixs) |
426 | 0 | return ERROR_INT("pixs not defined", __func__, 1); |
427 | 0 | if (pixGetDepth(pixs) != 1) |
428 | 0 | return ERROR_INT("pixs not 1 bpp", __func__, 1); |
429 | 0 | if (reduction != 1 && reduction != 2 && reduction != 4 && reduction != 8) |
430 | 0 | return ERROR_INT("reduction must be in {1,2,4,8}", __func__, 1); |
431 | | |
432 | 0 | deg2rad = 3.1415926535f / 180.f; |
433 | 0 | ret = 0; |
434 | | |
435 | | /* Generate reduced image, if requested */ |
436 | 0 | if (reduction == 1) |
437 | 0 | pix = pixClone(pixs); |
438 | 0 | else if (reduction == 2) |
439 | 0 | pix = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0); |
440 | 0 | else if (reduction == 4) |
441 | 0 | pix = pixReduceRankBinaryCascade(pixs, 1, 1, 0, 0); |
442 | 0 | else /* reduction == 8 */ |
443 | 0 | pix = pixReduceRankBinaryCascade(pixs, 1, 1, 2, 0); |
444 | |
|
445 | 0 | pixZero(pix, &bzero); |
446 | 0 | if (bzero) { |
447 | 0 | pixDestroy(&pix); |
448 | 0 | return 1; |
449 | 0 | } |
450 | | |
451 | 0 | nangles = (l_int32)((2. * sweeprange) / sweepdelta + 1); |
452 | 0 | natheta = numaCreate(nangles); |
453 | 0 | nascore = numaCreate(nangles); |
454 | 0 | pixt = pixCreateTemplate(pix); |
455 | |
|
456 | 0 | if (!pix || !pixt) { |
457 | 0 | ret = ERROR_INT("pix and pixt not both made", __func__, 1); |
458 | 0 | goto cleanup; |
459 | 0 | } |
460 | 0 | if (!natheta || !nascore) { |
461 | 0 | ret = ERROR_INT("natheta and nascore not both made", __func__, 1); |
462 | 0 | goto cleanup; |
463 | 0 | } |
464 | | |
465 | 0 | for (i = 0; i < nangles; i++) { |
466 | 0 | theta = -sweeprange + i * sweepdelta; /* degrees */ |
467 | | |
468 | | /* Shear pix about the UL corner and put the result in pixt */ |
469 | 0 | pixVShearCorner(pixt, pix, deg2rad * theta, L_BRING_IN_WHITE); |
470 | | |
471 | | /* Get score */ |
472 | 0 | pixFindDifferentialSquareSum(pixt, &sum); |
473 | |
|
474 | | #if DEBUG_PRINT_SCORES |
475 | | L_INFO("sum(%7.2f) = %7.0f\n", __func__, theta, sum); |
476 | | #endif /* DEBUG_PRINT_SCORES */ |
477 | | |
478 | | /* Save the result in the output arrays */ |
479 | 0 | numaAddNumber(nascore, sum); |
480 | 0 | numaAddNumber(natheta, theta); |
481 | 0 | } |
482 | | |
483 | | /* Find the location of the maximum (i.e., the skew angle) |
484 | | * by fitting the largest data point and its two neighbors |
485 | | * to a quadratic, using lagrangian interpolation. */ |
486 | 0 | numaFitMax(nascore, &maxscore, natheta, &maxangle); |
487 | 0 | *pangle = maxangle; |
488 | |
|
489 | | #if DEBUG_PRINT_SWEEP |
490 | | L_INFO(" From sweep: angle = %7.3f, score = %7.3f\n", __func__, |
491 | | maxangle, maxscore); |
492 | | #endif /* DEBUG_PRINT_SWEEP */ |
493 | |
|
494 | | #if DEBUG_PLOT_SCORES |
495 | | /* Plot the result -- the scores versus rotation angle -- |
496 | | * using gnuplot with GPLOT_LINES (lines connecting data points). |
497 | | * The GPLOT data structure is first created, with the |
498 | | * appropriate data incorporated from the two input NUMAs, |
499 | | * and then the function gplotMakeOutput() uses gnuplot to |
500 | | * generate the output plot. This can be either a .png file |
501 | | * or a .ps file, depending on whether you use GPLOT_PNG |
502 | | * or GPLOT_PS. */ |
503 | | {GPLOT *gplot; |
504 | | gplot = gplotCreate("sweep_output", GPLOT_PNG, |
505 | | "Sweep. Variance of difference of ON pixels vs. angle", |
506 | | "angle (deg)", "score"); |
507 | | gplotAddPlot(gplot, natheta, nascore, GPLOT_LINES, "plot1"); |
508 | | gplotAddPlot(gplot, natheta, nascore, GPLOT_POINTS, "plot2"); |
509 | | gplotMakeOutput(gplot); |
510 | | gplotDestroy(&gplot); |
511 | | } |
512 | | #endif /* DEBUG_PLOT_SCORES */ |
513 | |
|
514 | 0 | cleanup: |
515 | 0 | pixDestroy(&pix); |
516 | 0 | pixDestroy(&pixt); |
517 | 0 | numaDestroy(&nascore); |
518 | 0 | numaDestroy(&natheta); |
519 | 0 | return ret; |
520 | 0 | } |
521 | | |
522 | | |
523 | | /*! |
524 | | * \brief pixFindSkewSweepAndSearch() |
525 | | * |
526 | | * \param[in] pixs 1 bpp |
527 | | * \param[out] pangle angle required to deskew; in degrees |
528 | | * \param[out] pconf confidence given by ratio of max/min score |
529 | | * \param[in] redsweep sweep reduction factor = 1, 2, 4 or 8 |
530 | | * \param[in] redsearch binary search reduction factor = 1, 2, 4 or 8; |
531 | | * and must not exceed redsweep |
532 | | * \param[in] sweeprange half the full range, assumed about 0; in degrees |
533 | | * \param[in] sweepdelta angle increment of sweep; in degrees |
534 | | * \param[in] minbsdelta min binary search increment angle; in degrees |
535 | | * \return 0 if OK, 1 on error or if angle measurement not valid |
536 | | * |
537 | | * <pre> |
538 | | * Notes: |
539 | | * (1) This finds the skew angle, doing first a sweep through a set |
540 | | * of equal angles, and then doing a binary search until |
541 | | * convergence. |
542 | | * (2) Caller must check the return value for validity of the result. |
543 | | * (3) In computing the differential line sum variance score, we sum |
544 | | * the result over scanlines, but we always skip: |
545 | | * ~ at least one scanline |
546 | | * ~ not more than 10% of the image height |
547 | | * ~ not more than 5% of the image width |
548 | | * (4) See also notes in pixFindSkewSweepAndSearchScore() |
549 | | * </pre> |
550 | | */ |
551 | | l_ok |
552 | | pixFindSkewSweepAndSearch(PIX *pixs, |
553 | | l_float32 *pangle, |
554 | | l_float32 *pconf, |
555 | | l_int32 redsweep, |
556 | | l_int32 redsearch, |
557 | | l_float32 sweeprange, |
558 | | l_float32 sweepdelta, |
559 | | l_float32 minbsdelta) |
560 | 0 | { |
561 | 0 | return pixFindSkewSweepAndSearchScore(pixs, pangle, pconf, NULL, |
562 | 0 | redsweep, redsearch, 0.0, sweeprange, |
563 | 0 | sweepdelta, minbsdelta); |
564 | 0 | } |
565 | | |
566 | | |
567 | | /*! |
568 | | * \brief pixFindSkewSweepAndSearchScore() |
569 | | * |
570 | | * \param[in] pixs 1 bpp |
571 | | * \param[out] pangle angle required to deskew; in degrees |
572 | | * \param[out] pconf confidence given by ratio of max/min score |
573 | | * \param[out] pendscore [optional] max score; use NULL to ignore |
574 | | * \param[in] redsweep sweep reduction factor = 1, 2, 4 or 8 |
575 | | * \param[in] redsearch binary search reduction factor = 1, 2, 4 or 8; |
576 | | * and must not exceed redsweep |
577 | | * \param[in] sweepcenter angle about which sweep is performed; in degrees |
578 | | * \param[in] sweeprange half the full range, taken about sweepcenter; |
579 | | * in degrees |
580 | | * \param[in] sweepdelta angle increment of sweep; in degrees |
581 | | * \param[in] minbsdelta min binary search increment angle; in degrees |
582 | | * \return 0 if OK, 1 on error or if angle measurement not valid |
583 | | * |
584 | | * <pre> |
585 | | * Notes: |
586 | | * (1) This finds the skew angle, doing first a sweep through a set |
587 | | * of equal angles, and then doing a binary search until convergence. |
588 | | * (2) There are two built-in constants that determine if the |
589 | | * returned confidence is nonzero: |
590 | | * ~ MinValidMaxscore (minimum allowed maxscore) |
591 | | * ~ MinscoreThreshFactor (determines minimum allowed |
592 | | * minscore, by multiplying by (height * width^2) |
593 | | * If either of these conditions is not satisfied, the returned |
594 | | * confidence value will be zero. The maxscore is optionally |
595 | | * returned in this function to allow evaluation of the |
596 | | * resulting angle by a method that is independent of the |
597 | | * returned confidence value. |
598 | | * (3) The larger the confidence value, the greater the probability |
599 | | * that the proper alignment is given by the angle that maximizes |
600 | | * variance. It should be compared to a threshold, which depends |
601 | | * on the application. Values between 3.0 and 6.0 are common. |
602 | | * (4) By default, the shear is about the UL corner. |
603 | | * </pre> |
604 | | */ |
605 | | l_ok |
606 | | pixFindSkewSweepAndSearchScore(PIX *pixs, |
607 | | l_float32 *pangle, |
608 | | l_float32 *pconf, |
609 | | l_float32 *pendscore, |
610 | | l_int32 redsweep, |
611 | | l_int32 redsearch, |
612 | | l_float32 sweepcenter, |
613 | | l_float32 sweeprange, |
614 | | l_float32 sweepdelta, |
615 | | l_float32 minbsdelta) |
616 | 0 | { |
617 | 0 | return pixFindSkewSweepAndSearchScorePivot(pixs, pangle, pconf, pendscore, |
618 | 0 | redsweep, redsearch, 0.0, |
619 | 0 | sweeprange, sweepdelta, |
620 | 0 | minbsdelta, |
621 | 0 | L_SHEAR_ABOUT_CORNER); |
622 | 0 | } |
623 | | |
624 | | |
625 | | /*! |
626 | | * \brief pixFindSkewSweepAndSearchScorePivot() |
627 | | * |
628 | | * \param[in] pixs 1 bpp |
629 | | * \param[out] pangle angle required to deskew; in degrees |
630 | | * \param[out] pconf confidence given by ratio of max/min score |
631 | | * \param[out] pendscore [optional] max score; use NULL to ignore |
632 | | * \param[in] redsweep sweep reduction factor = 1, 2, 4 or 8 |
633 | | * \param[in] redsearch binary search reduction factor = 1, 2, 4 or 8; |
634 | | * and must not exceed redsweep |
635 | | * \param[in] sweepcenter angle about which sweep is performed; in degrees |
636 | | * \param[in] sweeprange half the full range, taken about sweepcenter; |
637 | | * in degrees |
638 | | * \param[in] sweepdelta angle increment of sweep; in degrees |
639 | | * \param[in] minbsdelta min binary search increment angle; in degrees |
640 | | * \param[in] pivot L_SHEAR_ABOUT_CORNER, L_SHEAR_ABOUT_CENTER |
641 | | * \return 0 if OK, 1 on error or if angle measurement not valid |
642 | | * |
643 | | * <pre> |
644 | | * Notes: |
645 | | * (1) See notes in pixFindSkewSweepAndSearchScore(). |
646 | | * (2) This allows choice of shear pivoting from either the UL corner |
647 | | * or the center. For small angles, the ability to discriminate |
648 | | * angles is better with shearing from the UL corner. However, |
649 | | * for large angles (say, greater than 20 degrees), it is better |
650 | | * to shear about the center because a shear from the UL corner |
651 | | * loses too much of the image. |
652 | | * </pre> |
653 | | */ |
654 | | l_ok |
655 | | pixFindSkewSweepAndSearchScorePivot(PIX *pixs, |
656 | | l_float32 *pangle, |
657 | | l_float32 *pconf, |
658 | | l_float32 *pendscore, |
659 | | l_int32 redsweep, |
660 | | l_int32 redsearch, |
661 | | l_float32 sweepcenter, |
662 | | l_float32 sweeprange, |
663 | | l_float32 sweepdelta, |
664 | | l_float32 minbsdelta, |
665 | | l_int32 pivot) |
666 | 0 | { |
667 | 0 | l_int32 ret, bzero, i, nangles, n, ratio, maxindex, minloc; |
668 | 0 | l_int32 width, height; |
669 | 0 | l_float32 deg2rad, theta, delta; |
670 | 0 | l_float32 sum, maxscore, maxangle; |
671 | 0 | l_float32 centerangle, leftcenterangle, rightcenterangle; |
672 | 0 | l_float32 lefttemp, righttemp; |
673 | 0 | l_float32 bsearchscore[5]; |
674 | 0 | l_float32 minscore, minthresh; |
675 | 0 | l_float32 rangeleft; |
676 | 0 | NUMA *natheta, *nascore; |
677 | 0 | PIX *pixsw, *pixsch, *pixt1, *pixt2; |
678 | |
|
679 | 0 | if (pendscore) *pendscore = 0.0; |
680 | 0 | if (pangle) *pangle = 0.0; |
681 | 0 | if (pconf) *pconf = 0.0; |
682 | 0 | if (!pangle || !pconf) |
683 | 0 | return ERROR_INT("&angle and/or &conf not defined", __func__, 1); |
684 | 0 | if (!pixs || pixGetDepth(pixs) != 1) |
685 | 0 | return ERROR_INT("pixs not defined or not 1 bpp", __func__, 1); |
686 | 0 | if (redsweep != 1 && redsweep != 2 && redsweep != 4 && redsweep != 8) |
687 | 0 | return ERROR_INT("redsweep must be in {1,2,4,8}", __func__, 1); |
688 | 0 | if (redsearch != 1 && redsearch != 2 && redsearch != 4 && redsearch != 8) |
689 | 0 | return ERROR_INT("redsearch must be in {1,2,4,8}", __func__, 1); |
690 | 0 | if (redsearch > redsweep) |
691 | 0 | return ERROR_INT("redsearch must not exceed redsweep", __func__, 1); |
692 | 0 | if (pivot != L_SHEAR_ABOUT_CORNER && pivot != L_SHEAR_ABOUT_CENTER) |
693 | 0 | return ERROR_INT("invalid pivot", __func__, 1); |
694 | | |
695 | 0 | deg2rad = 3.1415926535f / 180.f; |
696 | 0 | ret = 0; |
697 | | |
698 | | /* Generate reduced image for binary search, if requested */ |
699 | 0 | if (redsearch == 1) |
700 | 0 | pixsch = pixClone(pixs); |
701 | 0 | else if (redsearch == 2) |
702 | 0 | pixsch = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0); |
703 | 0 | else if (redsearch == 4) |
704 | 0 | pixsch = pixReduceRankBinaryCascade(pixs, 1, 1, 0, 0); |
705 | 0 | else /* redsearch == 8 */ |
706 | 0 | pixsch = pixReduceRankBinaryCascade(pixs, 1, 1, 2, 0); |
707 | |
|
708 | 0 | pixZero(pixsch, &bzero); |
709 | 0 | if (bzero) { |
710 | 0 | pixDestroy(&pixsch); |
711 | 0 | return 1; |
712 | 0 | } |
713 | | |
714 | | /* Generate reduced image for sweep, if requested */ |
715 | 0 | ratio = redsweep / redsearch; |
716 | 0 | if (ratio == 1) { |
717 | 0 | pixsw = pixClone(pixsch); |
718 | 0 | } else { /* ratio > 1 */ |
719 | 0 | if (ratio == 2) |
720 | 0 | pixsw = pixReduceRankBinaryCascade(pixsch, 1, 0, 0, 0); |
721 | 0 | else if (ratio == 4) |
722 | 0 | pixsw = pixReduceRankBinaryCascade(pixsch, 1, 2, 0, 0); |
723 | 0 | else /* ratio == 8 */ |
724 | 0 | pixsw = pixReduceRankBinaryCascade(pixsch, 1, 2, 2, 0); |
725 | 0 | } |
726 | |
|
727 | 0 | pixt1 = pixCreateTemplate(pixsw); |
728 | 0 | if (ratio == 1) |
729 | 0 | pixt2 = pixClone(pixt1); |
730 | 0 | else |
731 | 0 | pixt2 = pixCreateTemplate(pixsch); |
732 | |
|
733 | 0 | nangles = (l_int32)((2. * sweeprange) / sweepdelta + 1); |
734 | 0 | natheta = numaCreate(nangles); |
735 | 0 | nascore = numaCreate(nangles); |
736 | |
|
737 | 0 | if (!pixsch || !pixsw) { |
738 | 0 | ret = ERROR_INT("pixsch and pixsw not both made", __func__, 1); |
739 | 0 | goto cleanup; |
740 | 0 | } |
741 | 0 | if (!pixt1 || !pixt2) { |
742 | 0 | ret = ERROR_INT("pixt1 and pixt2 not both made", __func__, 1); |
743 | 0 | goto cleanup; |
744 | 0 | } |
745 | 0 | if (!natheta || !nascore) { |
746 | 0 | ret = ERROR_INT("natheta and nascore not both made", __func__, 1); |
747 | 0 | goto cleanup; |
748 | 0 | } |
749 | | |
750 | | /* Do sweep */ |
751 | 0 | rangeleft = sweepcenter - sweeprange; |
752 | 0 | for (i = 0; i < nangles; i++) { |
753 | 0 | theta = rangeleft + i * sweepdelta; /* degrees */ |
754 | | |
755 | | /* Shear pix and put the result in pixt1 */ |
756 | 0 | if (pivot == L_SHEAR_ABOUT_CORNER) |
757 | 0 | pixVShearCorner(pixt1, pixsw, deg2rad * theta, L_BRING_IN_WHITE); |
758 | 0 | else |
759 | 0 | pixVShearCenter(pixt1, pixsw, deg2rad * theta, L_BRING_IN_WHITE); |
760 | | |
761 | | /* Get score */ |
762 | 0 | pixFindDifferentialSquareSum(pixt1, &sum); |
763 | |
|
764 | | #if DEBUG_PRINT_SCORES |
765 | | L_INFO("sum(%7.2f) = %7.0f\n", __func__, theta, sum); |
766 | | #endif /* DEBUG_PRINT_SCORES */ |
767 | | |
768 | | /* Save the result in the output arrays */ |
769 | 0 | numaAddNumber(nascore, sum); |
770 | 0 | numaAddNumber(natheta, theta); |
771 | 0 | } |
772 | | |
773 | | /* Find the largest of the set (maxscore at maxangle) */ |
774 | 0 | numaGetMax(nascore, &maxscore, &maxindex); |
775 | 0 | numaGetFValue(natheta, maxindex, &maxangle); |
776 | |
|
777 | | #if DEBUG_PRINT_SWEEP |
778 | | L_INFO(" From sweep: angle = %7.3f, score = %7.3f\n", __func__, |
779 | | maxangle, maxscore); |
780 | | #endif /* DEBUG_PRINT_SWEEP */ |
781 | |
|
782 | | #if DEBUG_PLOT_SCORES |
783 | | /* Plot the sweep result -- the scores versus rotation angle -- |
784 | | * using gnuplot with GPLOT_LINES (lines connecting data points). */ |
785 | | {GPLOT *gplot; |
786 | | gplot = gplotCreate("sweep_output", GPLOT_PNG, |
787 | | "Sweep. Variance of difference of ON pixels vs. angle", |
788 | | "angle (deg)", "score"); |
789 | | gplotAddPlot(gplot, natheta, nascore, GPLOT_LINES, "plot1"); |
790 | | gplotAddPlot(gplot, natheta, nascore, GPLOT_POINTS, "plot2"); |
791 | | gplotMakeOutput(gplot); |
792 | | gplotDestroy(&gplot); |
793 | | } |
794 | | #endif /* DEBUG_PLOT_SCORES */ |
795 | | |
796 | | /* Check if the max is at the end of the sweep. */ |
797 | 0 | n = numaGetCount(natheta); |
798 | 0 | if (maxindex == 0 || maxindex == n - 1) { |
799 | 0 | L_WARNING("max found at sweep edge\n", __func__); |
800 | 0 | goto cleanup; |
801 | 0 | } |
802 | | |
803 | | /* Empty the numas for re-use */ |
804 | 0 | numaEmpty(nascore); |
805 | 0 | numaEmpty(natheta); |
806 | | |
807 | | /* Do binary search to find skew angle. |
808 | | * First, set up initial three points. */ |
809 | 0 | centerangle = maxangle; |
810 | 0 | if (pivot == L_SHEAR_ABOUT_CORNER) { |
811 | 0 | pixVShearCorner(pixt2, pixsch, deg2rad * centerangle, L_BRING_IN_WHITE); |
812 | 0 | pixFindDifferentialSquareSum(pixt2, &bsearchscore[2]); |
813 | 0 | pixVShearCorner(pixt2, pixsch, deg2rad * (centerangle - sweepdelta), |
814 | 0 | L_BRING_IN_WHITE); |
815 | 0 | pixFindDifferentialSquareSum(pixt2, &bsearchscore[0]); |
816 | 0 | pixVShearCorner(pixt2, pixsch, deg2rad * (centerangle + sweepdelta), |
817 | 0 | L_BRING_IN_WHITE); |
818 | 0 | pixFindDifferentialSquareSum(pixt2, &bsearchscore[4]); |
819 | 0 | } else { |
820 | 0 | pixVShearCenter(pixt2, pixsch, deg2rad * centerangle, L_BRING_IN_WHITE); |
821 | 0 | pixFindDifferentialSquareSum(pixt2, &bsearchscore[2]); |
822 | 0 | pixVShearCenter(pixt2, pixsch, deg2rad * (centerangle - sweepdelta), |
823 | 0 | L_BRING_IN_WHITE); |
824 | 0 | pixFindDifferentialSquareSum(pixt2, &bsearchscore[0]); |
825 | 0 | pixVShearCenter(pixt2, pixsch, deg2rad * (centerangle + sweepdelta), |
826 | 0 | L_BRING_IN_WHITE); |
827 | 0 | pixFindDifferentialSquareSum(pixt2, &bsearchscore[4]); |
828 | 0 | } |
829 | |
|
830 | 0 | numaAddNumber(nascore, bsearchscore[2]); |
831 | 0 | numaAddNumber(natheta, centerangle); |
832 | 0 | numaAddNumber(nascore, bsearchscore[0]); |
833 | 0 | numaAddNumber(natheta, centerangle - sweepdelta); |
834 | 0 | numaAddNumber(nascore, bsearchscore[4]); |
835 | 0 | numaAddNumber(natheta, centerangle + sweepdelta); |
836 | | |
837 | | /* Start the search */ |
838 | 0 | delta = 0.5f * sweepdelta; |
839 | 0 | while (delta >= minbsdelta) |
840 | 0 | { |
841 | | /* Get the left intermediate score */ |
842 | 0 | leftcenterangle = centerangle - delta; |
843 | 0 | if (pivot == L_SHEAR_ABOUT_CORNER) |
844 | 0 | pixVShearCorner(pixt2, pixsch, deg2rad * leftcenterangle, |
845 | 0 | L_BRING_IN_WHITE); |
846 | 0 | else |
847 | 0 | pixVShearCenter(pixt2, pixsch, deg2rad * leftcenterangle, |
848 | 0 | L_BRING_IN_WHITE); |
849 | 0 | pixFindDifferentialSquareSum(pixt2, &bsearchscore[1]); |
850 | 0 | numaAddNumber(nascore, bsearchscore[1]); |
851 | 0 | numaAddNumber(natheta, leftcenterangle); |
852 | | |
853 | | /* Get the right intermediate score */ |
854 | 0 | rightcenterangle = centerangle + delta; |
855 | 0 | if (pivot == L_SHEAR_ABOUT_CORNER) |
856 | 0 | pixVShearCorner(pixt2, pixsch, deg2rad * rightcenterangle, |
857 | 0 | L_BRING_IN_WHITE); |
858 | 0 | else |
859 | 0 | pixVShearCenter(pixt2, pixsch, deg2rad * rightcenterangle, |
860 | 0 | L_BRING_IN_WHITE); |
861 | 0 | pixFindDifferentialSquareSum(pixt2, &bsearchscore[3]); |
862 | 0 | numaAddNumber(nascore, bsearchscore[3]); |
863 | 0 | numaAddNumber(natheta, rightcenterangle); |
864 | | |
865 | | /* Find the maximum of the five scores and its location. |
866 | | * Note that the maximum must be in the center |
867 | | * three values, not in the end two. */ |
868 | 0 | maxscore = bsearchscore[1]; |
869 | 0 | maxindex = 1; |
870 | 0 | for (i = 2; i < 4; i++) { |
871 | 0 | if (bsearchscore[i] > maxscore) { |
872 | 0 | maxscore = bsearchscore[i]; |
873 | 0 | maxindex = i; |
874 | 0 | } |
875 | 0 | } |
876 | | |
877 | | /* Set up score array to interpolate for the next iteration */ |
878 | 0 | lefttemp = bsearchscore[maxindex - 1]; |
879 | 0 | righttemp = bsearchscore[maxindex + 1]; |
880 | 0 | bsearchscore[2] = maxscore; |
881 | 0 | bsearchscore[0] = lefttemp; |
882 | 0 | bsearchscore[4] = righttemp; |
883 | | |
884 | | /* Get new center angle and delta for next iteration */ |
885 | 0 | centerangle = centerangle + delta * (maxindex - 2); |
886 | 0 | delta = 0.5f * delta; |
887 | 0 | } |
888 | 0 | *pangle = centerangle; |
889 | |
|
890 | | #if DEBUG_PRINT_SCORES |
891 | | L_INFO(" Binary search score = %7.3f\n", __func__, bsearchscore[2]); |
892 | | #endif /* DEBUG_PRINT_SCORES */ |
893 | |
|
894 | 0 | if (pendscore) /* save if requested */ |
895 | 0 | *pendscore = bsearchscore[2]; |
896 | | |
897 | | /* Return the ratio of Max score over Min score |
898 | | * as a confidence value. Don't trust if the Min score |
899 | | * is too small, which can happen if the image is all black |
900 | | * with only a few white pixels interspersed. In that case, |
901 | | * we get a contribution from the top and bottom edges when |
902 | | * vertically sheared, but this contribution becomes zero when |
903 | | * the shear angle is zero. For zero shear angle, the only |
904 | | * contribution will be from the white pixels. We expect that |
905 | | * the signal goes as the product of the (height * width^2), |
906 | | * so we compute a (hopefully) normalized minimum threshold as |
907 | | * a function of these dimensions. */ |
908 | 0 | numaGetMin(nascore, &minscore, &minloc); |
909 | 0 | width = pixGetWidth(pixsch); |
910 | 0 | height = pixGetHeight(pixsch); |
911 | 0 | minthresh = MinscoreThreshFactor * width * width * height; |
912 | |
|
913 | | #if DEBUG_THRESHOLD |
914 | | L_INFO(" minthresh = %10.2f, minscore = %10.2f\n", __func__, |
915 | | minthresh, minscore); |
916 | | L_INFO(" maxscore = %10.2f\n", __func__, maxscore); |
917 | | #endif /* DEBUG_THRESHOLD */ |
918 | |
|
919 | 0 | if (minscore > minthresh) |
920 | 0 | *pconf = maxscore / minscore; |
921 | 0 | else |
922 | 0 | *pconf = 0.0; |
923 | | |
924 | | /* Don't trust it if too close to the edge of the sweep |
925 | | * range or if maxscore is small */ |
926 | 0 | if ((centerangle > rangeleft + 2 * sweeprange - sweepdelta) || |
927 | 0 | (centerangle < rangeleft + sweepdelta) || |
928 | 0 | (maxscore < MinValidMaxscore)) |
929 | 0 | *pconf = 0.0; |
930 | |
|
931 | | #if DEBUG_PRINT_BINARY |
932 | | lept_stderr("Binary search: angle = %7.3f, score ratio = %6.2f\n", |
933 | | *pangle, *pconf); |
934 | | lept_stderr(" max score = %8.0f\n", maxscore); |
935 | | #endif /* DEBUG_PRINT_BINARY */ |
936 | |
|
937 | | #if DEBUG_PLOT_SCORES |
938 | | /* Plot the result -- the scores versus rotation angle -- |
939 | | * using gnuplot with GPLOT_POINTS. Because the data |
940 | | * points are not ordered by theta (increasing or decreasing), |
941 | | * using GPLOT_LINES would be confusing! */ |
942 | | {GPLOT *gplot; |
943 | | gplot = gplotCreate("search_output", GPLOT_PNG, |
944 | | "Binary search. Variance of difference of ON pixels vs. angle", |
945 | | "angle (deg)", "score"); |
946 | | gplotAddPlot(gplot, natheta, nascore, GPLOT_POINTS, "plot1"); |
947 | | gplotMakeOutput(gplot); |
948 | | gplotDestroy(&gplot); |
949 | | } |
950 | | #endif /* DEBUG_PLOT_SCORES */ |
951 | |
|
952 | 0 | cleanup: |
953 | 0 | pixDestroy(&pixsw); |
954 | 0 | pixDestroy(&pixsch); |
955 | 0 | pixDestroy(&pixt1); |
956 | 0 | pixDestroy(&pixt2); |
957 | 0 | numaDestroy(&nascore); |
958 | 0 | numaDestroy(&natheta); |
959 | 0 | return ret; |
960 | 0 | } |
961 | | |
962 | | |
963 | | /*---------------------------------------------------------------------* |
964 | | * Search over arbitrary range of angles in orthogonal directions * |
965 | | *---------------------------------------------------------------------*/ |
966 | | /* |
967 | | * \brief pixFindSkewOrthogonalRange() |
968 | | * |
969 | | * \param[in] pixs 1 bpp |
970 | | * \param[out] pangle angle required to deskew; in degrees cw |
971 | | * \param[out] pconf confidence given by ratio of max/min score |
972 | | * \param[in] redsweep sweep reduction factor = 1, 2, 4 or 8 |
973 | | * \param[in] redsearch binary search reduction factor = 1, 2, 4 or 8; |
974 | | * and must not exceed redsweep |
975 | | * \param[in] sweeprange half the full range in each orthogonal |
976 | | * direction, taken about 0, in degrees |
977 | | * \param[in] sweepdelta angle increment of sweep; in degrees |
978 | | * \param[in] minbsdelta min binary search increment angle; in degrees |
979 | | * \param[in] confprior amount by which confidence of 90 degree rotated |
980 | | * result is reduced when comparing with unrotated |
981 | | * confidence value |
982 | | * \return 0 if OK, 1 on error or if angle measurement not valid |
983 | | * |
984 | | * <pre> |
985 | | * Notes: |
986 | | * (1) This searches for the skew angle, first in the range |
987 | | * [-sweeprange, sweeprange], and then in |
988 | | * [90 - sweeprange, 90 + sweeprange], with angles measured |
989 | | * clockwise. For exploring the full range of possibilities, |
990 | | * suggest using sweeprange = 47.0 degrees, giving some overlap |
991 | | * at 45 and 135 degrees. From these results, and discounting |
992 | | * the the second confidence by %confprior, it selects the |
993 | | * angle for maximal differential variance. If the angle |
994 | | * is larger than pi/4, the angle found after 90 degree rotation |
995 | | * is selected. |
996 | | * (2) The larger the confidence value, the greater the probability |
997 | | * that the proper alignment is given by the angle that maximizes |
998 | | * variance. It should be compared to a threshold, which depends |
999 | | * on the application. Values between 3.0 and 6.0 are common. |
1000 | | * (3) Allowing for both portrait and landscape searches is more |
1001 | | * difficult, because if the signal from the text lines is weak, |
1002 | | * a signal from vertical rules can be larger! |
1003 | | * The most difficult documents to deskew have some or all of: |
1004 | | * (a) Multiple columns, not aligned |
1005 | | * (b) Black lines along the vertical edges |
1006 | | * (c) Text from two pages, and at different angles |
1007 | | * Rule of thumb for resolution: |
1008 | | * (a) If the margins are clean, you can work at 75 ppi, |
1009 | | * although 100 ppi is safer. |
1010 | | * (b) If there are vertical lines in the margins, do not |
1011 | | * work below 150 ppi. The signal from the text lines must |
1012 | | * exceed that from the margin lines. |
1013 | | * (4) Choosing the %confprior parameter depends on knowing something |
1014 | | * about the source of image. However, we're not using |
1015 | | * real probabilities here, so its use is qualitative. |
1016 | | * If landscape and portrait are equally likely, use |
1017 | | * %confprior = 0.0. If the likelihood of portrait (non-rotated) |
1018 | | * is 100 times higher than that of landscape, we want to reduce |
1019 | | * the chance that we rotate to landscape in a situation where |
1020 | | * the landscape signal is accidentally larger than the |
1021 | | * portrait signal. To do this use a positive value of |
1022 | | * %confprior; say 1.5. |
1023 | | * </pre> |
1024 | | */ |
1025 | | l_int32 |
1026 | | pixFindSkewOrthogonalRange(PIX *pixs, |
1027 | | l_float32 *pangle, |
1028 | | l_float32 *pconf, |
1029 | | l_int32 redsweep, |
1030 | | l_int32 redsearch, |
1031 | | l_float32 sweeprange, |
1032 | | l_float32 sweepdelta, |
1033 | | l_float32 minbsdelta, |
1034 | | l_float32 confprior) |
1035 | 0 | { |
1036 | 0 | l_float32 angle1, conf1, score1, angle2, conf2, score2; |
1037 | 0 | PIX *pixr; |
1038 | |
|
1039 | 0 | if (pangle) *pangle = 0.0; |
1040 | 0 | if (pconf) *pconf = 0.0; |
1041 | 0 | if (!pangle || !pconf) |
1042 | 0 | return ERROR_INT("&angle and/or &conf not defined", __func__, 1); |
1043 | 0 | if (!pixs || pixGetDepth(pixs) != 1) |
1044 | 0 | return ERROR_INT("pixs not defined or not 1 bpp", __func__, 1); |
1045 | | |
1046 | 0 | pixFindSkewSweepAndSearchScorePivot(pixs, &angle1, &conf1, &score1, |
1047 | 0 | redsweep, redsearch, 0.0, |
1048 | 0 | sweeprange, sweepdelta, minbsdelta, |
1049 | 0 | L_SHEAR_ABOUT_CORNER); |
1050 | 0 | pixr = pixRotateOrth(pixs, 1); |
1051 | 0 | pixFindSkewSweepAndSearchScorePivot(pixr, &angle2, &conf2, &score2, |
1052 | 0 | redsweep, redsearch, 0.0, |
1053 | 0 | sweeprange, sweepdelta, minbsdelta, |
1054 | 0 | L_SHEAR_ABOUT_CORNER); |
1055 | 0 | pixDestroy(&pixr); |
1056 | |
|
1057 | 0 | if (conf1 > conf2 - confprior) { |
1058 | 0 | *pangle = angle1; |
1059 | 0 | *pconf = conf1; |
1060 | 0 | } else { |
1061 | 0 | *pangle = -90.0f + angle2; |
1062 | 0 | *pconf = conf2; |
1063 | 0 | } |
1064 | |
|
1065 | | #if DEBUG_PRINT_ORTH |
1066 | | lept_stderr(" About 0: angle1 = %7.3f, conf1 = %7.3f, score1 = %f\n", |
1067 | | angle1, conf1, score1); |
1068 | | lept_stderr(" About 90: angle2 = %7.3f, conf2 = %7.3f, score2 = %f\n", |
1069 | | angle2, conf2, score2); |
1070 | | lept_stderr(" Final: angle = %7.3f, conf = %7.3f\n", *pangle, *pconf); |
1071 | | #endif /* DEBUG_PRINT_ORTH */ |
1072 | |
|
1073 | 0 | return 0; |
1074 | 0 | } |
1075 | | |
1076 | | |
1077 | | |
1078 | | /*----------------------------------------------------------------* |
1079 | | * Differential square sum function * |
1080 | | *----------------------------------------------------------------*/ |
1081 | | /*! |
1082 | | * \brief pixFindDifferentialSquareSum() |
1083 | | * |
1084 | | * \param[in] pixs |
1085 | | * \param[out] psum result |
1086 | | * \return 0 if OK, 1 on error |
1087 | | * |
1088 | | * <pre> |
1089 | | * Notes: |
1090 | | * (1) At the top and bottom, we skip: |
1091 | | * ~ at least one scanline |
1092 | | * ~ not more than 10% of the image height |
1093 | | * ~ not more than 5% of the image width |
1094 | | * </pre> |
1095 | | */ |
1096 | | l_ok |
1097 | | pixFindDifferentialSquareSum(PIX *pixs, |
1098 | | l_float32 *psum) |
1099 | 0 | { |
1100 | 0 | l_int32 i, n; |
1101 | 0 | l_int32 w, h, skiph, skip, nskip; |
1102 | 0 | l_float32 val1, val2, diff, sum; |
1103 | 0 | NUMA *na; |
1104 | |
|
1105 | 0 | if (!psum) |
1106 | 0 | return ERROR_INT("&sum not defined", __func__, 1); |
1107 | 0 | *psum = 0.0; |
1108 | 0 | if (!pixs) |
1109 | 0 | return ERROR_INT("pixs not defined", __func__, 1); |
1110 | | |
1111 | | /* Generate a number array consisting of the sum |
1112 | | * of pixels in each row of pixs */ |
1113 | 0 | if ((na = pixCountPixelsByRow(pixs, NULL)) == NULL) |
1114 | 0 | return ERROR_INT("na not made", __func__, 1); |
1115 | | |
1116 | | /* Compute the number of rows at top and bottom to omit. |
1117 | | * We omit these to avoid getting a spurious signal from |
1118 | | * the top and bottom of a (nearly) all black image. */ |
1119 | 0 | w = pixGetWidth(pixs); |
1120 | 0 | h = pixGetHeight(pixs); |
1121 | 0 | skiph = (l_int32)(0.05 * w); /* skip for max shear of 0.025 radians */ |
1122 | 0 | skip = L_MIN(h / 10, skiph); /* don't remove more than 10% of image */ |
1123 | 0 | nskip = L_MAX(skip / 2, 1); /* at top & bot; skip at least one line */ |
1124 | | |
1125 | | /* Sum the squares of differential row sums, on the |
1126 | | * allowed rows. Note that nskip must be >= 1. */ |
1127 | 0 | n = numaGetCount(na); |
1128 | 0 | sum = 0.0; |
1129 | 0 | for (i = nskip; i < n - nskip; i++) { |
1130 | 0 | numaGetFValue(na, i - 1, &val1); |
1131 | 0 | numaGetFValue(na, i, &val2); |
1132 | 0 | diff = val2 - val1; |
1133 | 0 | sum += diff * diff; |
1134 | 0 | } |
1135 | 0 | numaDestroy(&na); |
1136 | 0 | *psum = sum; |
1137 | 0 | return 0; |
1138 | 0 | } |
1139 | | |
1140 | | |
1141 | | /*----------------------------------------------------------------* |
1142 | | * Normalized square sum * |
1143 | | *----------------------------------------------------------------*/ |
1144 | | /*! |
1145 | | * \brief pixFindNormalizedSquareSum() |
1146 | | * |
1147 | | * \param[in] pixs |
1148 | | * \param[out] phratio [optional] ratio of normalized horiz square sum |
1149 | | * to result if the pixel distribution were uniform |
1150 | | * \param[out] pvratio [optional] ratio of normalized vert square sum |
1151 | | * to result if the pixel distribution were uniform |
1152 | | * \param[out] pfract [optional] ratio of fg pixels to total pixels |
1153 | | * \return 0 if OK, 1 on error or if there are no fg pixels |
1154 | | * |
1155 | | * <pre> |
1156 | | * Notes: |
1157 | | * (1) Let the image have h scanlines and N fg pixels. |
1158 | | * If the pixels were uniformly distributed on scanlines, |
1159 | | * the sum of squares of fg pixels on each scanline would be |
1160 | | * h * (N / h)^2. However, if the pixels are not uniformly |
1161 | | * distributed (e.g., for text), the sum of squares of fg |
1162 | | * pixels will be larger. We return in hratio and vratio the |
1163 | | * ratio of these two values. |
1164 | | * (2) If there are no fg pixels, hratio and vratio are returned as 0.0. |
1165 | | * </pre> |
1166 | | */ |
1167 | | l_ok |
1168 | | pixFindNormalizedSquareSum(PIX *pixs, |
1169 | | l_float32 *phratio, |
1170 | | l_float32 *pvratio, |
1171 | | l_float32 *pfract) |
1172 | 0 | { |
1173 | 0 | l_int32 i, w, h, empty; |
1174 | 0 | l_float32 sum, sumsq, uniform, val; |
1175 | 0 | NUMA *na; |
1176 | 0 | PIX *pixt; |
1177 | |
|
1178 | 0 | if (phratio) *phratio = 0.0; |
1179 | 0 | if (pvratio) *pvratio = 0.0; |
1180 | 0 | if (pfract) *pfract = 0.0; |
1181 | 0 | if (!phratio && !pvratio) |
1182 | 0 | return ERROR_INT("nothing to do", __func__, 1); |
1183 | 0 | if (!pixs || pixGetDepth(pixs) != 1) |
1184 | 0 | return ERROR_INT("pixs not defined or not 1 bpp", __func__, 1); |
1185 | 0 | pixGetDimensions(pixs, &w, &h, NULL); |
1186 | |
|
1187 | 0 | empty = 0; |
1188 | 0 | if (phratio) { |
1189 | 0 | na = pixCountPixelsByRow(pixs, NULL); |
1190 | 0 | numaGetSum(na, &sum); /* fg pixels */ |
1191 | 0 | if (pfract) *pfract = sum / (l_float32)(w * h); |
1192 | 0 | if (sum != 0.0) { |
1193 | 0 | uniform = sum * sum / h; /* h*(sum / h)^2 */ |
1194 | 0 | sumsq = 0.0; |
1195 | 0 | for (i = 0; i < h; i++) { |
1196 | 0 | numaGetFValue(na, i, &val); |
1197 | 0 | sumsq += val * val; |
1198 | 0 | } |
1199 | 0 | *phratio = sumsq / uniform; |
1200 | 0 | } else { |
1201 | 0 | empty = 1; |
1202 | 0 | } |
1203 | 0 | numaDestroy(&na); |
1204 | 0 | } |
1205 | |
|
1206 | 0 | if (pvratio) { |
1207 | 0 | if (empty == 1) return 1; |
1208 | 0 | pixt = pixRotateOrth(pixs, 1); |
1209 | 0 | na = pixCountPixelsByRow(pixt, NULL); |
1210 | 0 | numaGetSum(na, &sum); |
1211 | 0 | if (pfract) *pfract = sum / (l_float32)(w * h); |
1212 | 0 | if (sum != 0.0) { |
1213 | 0 | uniform = sum * sum / w; |
1214 | 0 | sumsq = 0.0; |
1215 | 0 | for (i = 0; i < w; i++) { |
1216 | 0 | numaGetFValue(na, i, &val); |
1217 | 0 | sumsq += val * val; |
1218 | 0 | } |
1219 | 0 | *pvratio = sumsq / uniform; |
1220 | 0 | } else { |
1221 | 0 | empty = 1; |
1222 | 0 | } |
1223 | 0 | pixDestroy(&pixt); |
1224 | 0 | numaDestroy(&na); |
1225 | 0 | } |
1226 | | |
1227 | 0 | return empty; |
1228 | 0 | } |