/src/tesseract/src/textord/devanagari_processing.cpp
Line | Count | Source |
1 | | /********************************************************************** |
2 | | * File: devanagari_processing.cpp |
3 | | * Description: Methods to process images containing devanagari symbols, |
4 | | * prior to classification. |
5 | | * Author: Shobhit Saxena |
6 | | * |
7 | | * (C) Copyright 2008, Google Inc. |
8 | | ** Licensed under the Apache License, Version 2.0 (the "License"); |
9 | | ** you may not use this file except in compliance with the License. |
10 | | ** You may obtain a copy of the License at |
11 | | ** http://www.apache.org/licenses/LICENSE-2.0 |
12 | | ** Unless required by applicable law or agreed to in writing, software |
13 | | ** distributed under the License is distributed on an "AS IS" BASIS, |
14 | | ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
15 | | ** See the License for the specific language governing permissions and |
16 | | ** limitations under the License. |
17 | | * |
18 | | **********************************************************************/ |
19 | | |
20 | | #ifdef HAVE_CONFIG_H |
21 | | # include "config_auto.h" |
22 | | #endif |
23 | | |
24 | | #include "devanagari_processing.h" |
25 | | |
26 | | #include "debugpixa.h" |
27 | | #include "statistc.h" |
28 | | #include "tordmain.h" |
29 | | |
30 | | #include <allheaders.h> |
31 | | |
32 | | namespace tesseract { |
33 | | |
34 | | // Flags controlling the debugging information for shiro-rekha splitting |
35 | | // strategies. |
36 | | INT_VAR(devanagari_split_debuglevel, 0, "Debug level for split shiro-rekha process."); |
37 | | |
38 | | BOOL_VAR(devanagari_split_debugimage, 0, |
39 | | "Whether to create a debug image for split shiro-rekha process."); |
40 | | |
41 | | ShiroRekhaSplitter::ShiroRekhaSplitter() : |
42 | 4 | orig_pix_(nullptr), |
43 | 4 | splitted_image_(nullptr), |
44 | 4 | pageseg_split_strategy_(NO_SPLIT), |
45 | 4 | ocr_split_strategy_(NO_SPLIT), |
46 | 4 | debug_image_(nullptr), |
47 | 4 | segmentation_block_list_(nullptr), |
48 | 4 | global_xheight_(kUnspecifiedXheight), |
49 | 4 | perform_close_(false) |
50 | 4 | { |
51 | 4 | } |
52 | | |
53 | 0 | ShiroRekhaSplitter::~ShiroRekhaSplitter() { |
54 | 0 | Clear(); |
55 | 0 | } |
56 | | |
57 | 32.5k | void ShiroRekhaSplitter::Clear() { |
58 | 32.5k | orig_pix_.destroy(); |
59 | 32.5k | splitted_image_.destroy(); |
60 | 32.5k | pageseg_split_strategy_ = NO_SPLIT; |
61 | 32.5k | ocr_split_strategy_ = NO_SPLIT; |
62 | 32.5k | debug_image_.destroy(); |
63 | 32.5k | segmentation_block_list_ = nullptr; |
64 | 32.5k | global_xheight_ = kUnspecifiedXheight; |
65 | 32.5k | perform_close_ = false; |
66 | 32.5k | } |
67 | | |
68 | | // On setting the input image, a clone of it is owned by this class. |
69 | 16.2k | void ShiroRekhaSplitter::set_orig_pix(Image pix) { |
70 | 16.2k | if (orig_pix_) { |
71 | 0 | orig_pix_.destroy(); |
72 | 0 | } |
73 | 16.2k | orig_pix_ = pix.clone(); |
74 | 16.2k | } |
75 | | |
76 | | // Top-level method to perform splitting based on current settings. |
77 | | // Returns true if a split was actually performed. |
78 | | // split_for_pageseg should be true if the splitting is being done prior to |
79 | | // page segmentation. This mode uses the flag |
80 | | // pageseg_devanagari_split_strategy to determine the splitting strategy. |
81 | 32.5k | bool ShiroRekhaSplitter::Split(bool split_for_pageseg, DebugPixa *pixa_debug) { |
82 | 32.5k | SplitStrategy split_strategy = split_for_pageseg ? pageseg_split_strategy_ : ocr_split_strategy_; |
83 | 32.5k | if (split_strategy == NO_SPLIT) { |
84 | 32.5k | return false; // Nothing to do. |
85 | 32.5k | } |
86 | 0 | ASSERT_HOST(split_strategy == MINIMAL_SPLIT || split_strategy == MAXIMAL_SPLIT); |
87 | 0 | ASSERT_HOST(orig_pix_); |
88 | 0 | if (devanagari_split_debuglevel > 0) { |
89 | 0 | tprintf("Splitting shiro-rekha ...\n"); |
90 | 0 | tprintf("Split strategy = %s\n", split_strategy == MINIMAL_SPLIT ? "Minimal" : "Maximal"); |
91 | 0 | tprintf("Initial pageseg available = %s\n", segmentation_block_list_ ? "yes" : "no"); |
92 | 0 | } |
93 | | // Create a copy of original image to store the splitting output. |
94 | 0 | splitted_image_.destroy(); |
95 | 0 | splitted_image_ = orig_pix_.copy(); |
96 | | |
97 | | // Initialize debug image if required. |
98 | 0 | if (devanagari_split_debugimage) { |
99 | 0 | debug_image_.destroy(); |
100 | 0 | debug_image_ = pixConvertTo32(orig_pix_); |
101 | 0 | } |
102 | | |
103 | | // Determine all connected components in the input image. A close operation |
104 | | // may be required prior to this, depending on the current settings. |
105 | 0 | Image pix_for_ccs = orig_pix_.clone(); |
106 | 0 | if (perform_close_ && global_xheight_ != kUnspecifiedXheight && !segmentation_block_list_) { |
107 | 0 | if (devanagari_split_debuglevel > 0) { |
108 | 0 | tprintf("Performing a global close operation..\n"); |
109 | 0 | } |
110 | | // A global measure is available for xheight, but no local information |
111 | | // exists. |
112 | 0 | pix_for_ccs.destroy(); |
113 | 0 | pix_for_ccs = orig_pix_.copy(); |
114 | 0 | PerformClose(pix_for_ccs, global_xheight_); |
115 | 0 | } |
116 | 0 | Pixa *ccs; |
117 | 0 | Boxa *tmp_boxa = pixConnComp(pix_for_ccs, &ccs, 8); |
118 | 0 | boxaDestroy(&tmp_boxa); |
119 | 0 | pix_for_ccs.destroy(); |
120 | | |
121 | | // Iterate over all connected components. Get their bounding boxes and clip |
122 | | // out the image regions corresponding to these boxes from the original image. |
123 | | // Conditionally run splitting on each of them. |
124 | 0 | Boxa *regions_to_clear = boxaCreate(0); |
125 | 0 | int num_ccs = 0; |
126 | 0 | if (ccs != nullptr) { |
127 | 0 | num_ccs = pixaGetCount(ccs); |
128 | 0 | } |
129 | 0 | for (int i = 0; i < num_ccs; ++i) { |
130 | 0 | Box *box = pixaGetBox(ccs, i, L_CLONE); |
131 | 0 | Image word_pix = pixClipRectangle(orig_pix_, box, nullptr); |
132 | 0 | ASSERT_HOST(word_pix); |
133 | 0 | int xheight = GetXheightForCC(box); |
134 | 0 | if (xheight == kUnspecifiedXheight && segmentation_block_list_ && devanagari_split_debugimage) { |
135 | 0 | pixRenderBoxArb(debug_image_, box, 1, 255, 0, 0); |
136 | 0 | } |
137 | | // If some xheight measure is available, attempt to pre-eliminate small |
138 | | // blobs from the shiro-rekha process. This is primarily to save the CCs |
139 | | // corresponding to punctuation marks/small dots etc which are part of |
140 | | // larger graphemes. |
141 | 0 | l_int32 x, y, w, h; |
142 | 0 | boxGetGeometry(box, &x, &y, &w, &h); |
143 | 0 | if (xheight == kUnspecifiedXheight || (w > xheight / 3 && h > xheight / 2)) { |
144 | 0 | SplitWordShiroRekha(split_strategy, word_pix, xheight, x, y, regions_to_clear); |
145 | 0 | } else if (devanagari_split_debuglevel > 0) { |
146 | 0 | tprintf("CC dropped from splitting: %d,%d (%d, %d)\n", x, y, w, h); |
147 | 0 | } |
148 | 0 | word_pix.destroy(); |
149 | 0 | boxDestroy(&box); |
150 | 0 | } |
151 | | // Actually clear the boxes now. |
152 | 0 | for (int i = 0; i < boxaGetCount(regions_to_clear); ++i) { |
153 | 0 | Box *box = boxaGetBox(regions_to_clear, i, L_CLONE); |
154 | 0 | pixClearInRect(splitted_image_, box); |
155 | 0 | boxDestroy(&box); |
156 | 0 | } |
157 | 0 | boxaDestroy(®ions_to_clear); |
158 | 0 | pixaDestroy(&ccs); |
159 | 0 | if (devanagari_split_debugimage && pixa_debug != nullptr) { |
160 | 0 | pixa_debug->AddPix(debug_image_, split_for_pageseg ? "pageseg_split" : "ocr_split"); |
161 | 0 | } |
162 | 0 | return true; |
163 | 32.5k | } |
164 | | |
165 | | // Method to perform a close operation on the input image. The xheight |
166 | | // estimate decides the size of sel used. |
167 | 0 | void ShiroRekhaSplitter::PerformClose(Image pix, int xheight_estimate) { |
168 | 0 | pixCloseBrick(pix, pix, xheight_estimate / 8, xheight_estimate / 3); |
169 | 0 | } |
170 | | |
171 | | // This method resolves the cc bbox to a particular row and returns the row's |
172 | | // xheight. |
173 | 0 | int ShiroRekhaSplitter::GetXheightForCC(Box *cc_bbox) { |
174 | 0 | if (!segmentation_block_list_) { |
175 | 0 | return global_xheight_; |
176 | 0 | } |
177 | | // Compute the box coordinates in Tesseract's coordinate system. |
178 | 0 | l_int32 x, y, w, h; |
179 | 0 | boxGetGeometry(cc_bbox, &x, &y, &w, &h); |
180 | 0 | TBOX bbox(x, pixGetHeight(orig_pix_) - y - h - 1, |
181 | 0 | x + w, pixGetHeight(orig_pix_) - y - 1); |
182 | | // Iterate over all blocks. |
183 | 0 | BLOCK_IT block_it(segmentation_block_list_); |
184 | 0 | for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) { |
185 | 0 | BLOCK *block = block_it.data(); |
186 | | // Iterate over all rows in the block. |
187 | 0 | ROW_IT row_it(block->row_list()); |
188 | 0 | for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { |
189 | 0 | ROW *row = row_it.data(); |
190 | 0 | if (!row->bounding_box().major_overlap(bbox)) { |
191 | 0 | continue; |
192 | 0 | } |
193 | | // Row could be skewed, warped, etc. Use the position of the box to |
194 | | // determine the baseline position of the row for that x-coordinate. |
195 | | // Create a square TBOX whose baseline's mid-point lies at this point |
196 | | // and side is row's xheight. Take the overlap of this box with the input |
197 | | // box and check if it is a 'major overlap'. If so, this box lies in this |
198 | | // row. In that case, return the xheight for this row. |
199 | 0 | float box_middle = 0.5 * (bbox.left() + bbox.right()); |
200 | 0 | int baseline = static_cast<int>(row->base_line(box_middle) + 0.5); |
201 | 0 | TBOX test_box(box_middle - row->x_height() / 2, baseline, box_middle + row->x_height() / 2, |
202 | 0 | static_cast<int>(baseline + row->x_height())); |
203 | | // Compute overlap. If it is a major overlap, this is the right row. |
204 | 0 | if (bbox.major_overlap(test_box)) { |
205 | 0 | return row->x_height(); |
206 | 0 | } |
207 | 0 | } |
208 | 0 | } |
209 | | // No row found for this bbox. |
210 | 0 | return kUnspecifiedXheight; |
211 | 0 | } |
212 | | |
213 | | // Returns a list of regions (boxes) which should be cleared in the original |
214 | | // image so as to perform shiro-rekha splitting. Pix is assumed to carry one |
215 | | // (or less) word only. Xheight measure could be the global estimate, the row |
216 | | // estimate, or unspecified. If unspecified, over splitting may occur, since a |
217 | | // conservative estimate of stroke width along with an associated multiplier |
218 | | // is used in its place. It is advisable to have a specified xheight when |
219 | | // splitting for classification/training. |
220 | | // A vertical projection histogram of all the on-pixels in the input pix is |
221 | | // computed. The maxima of this histogram is regarded as an approximate location |
222 | | // of the shiro-rekha. By descending on the maxima's peak on both sides, |
223 | | // stroke width of shiro-rekha is estimated. |
224 | | // A horizontal projection histogram is computed for a sub-image of the input |
225 | | // image, which extends from just below the shiro-rekha down to a certain |
226 | | // leeway. The leeway depends on the input xheight, if provided, else a |
227 | | // conservative multiplier on approximate stroke width is used (which may lead |
228 | | // to over-splitting). |
229 | | void ShiroRekhaSplitter::SplitWordShiroRekha(SplitStrategy split_strategy, Image pix, int xheight, |
230 | 0 | int word_left, int word_top, Boxa *regions_to_clear) { |
231 | 0 | if (split_strategy == NO_SPLIT) { |
232 | 0 | return; |
233 | 0 | } |
234 | 0 | int width = pixGetWidth(pix); |
235 | 0 | int height = pixGetHeight(pix); |
236 | | // Statistically determine the yextents of the shiro-rekha. |
237 | 0 | int shirorekha_top, shirorekha_bottom, shirorekha_ylevel; |
238 | 0 | GetShiroRekhaYExtents(pix, &shirorekha_top, &shirorekha_bottom, &shirorekha_ylevel); |
239 | | // Since the shiro rekha is also a stroke, its width is equal to the stroke |
240 | | // width. |
241 | 0 | int stroke_width = shirorekha_bottom - shirorekha_top + 1; |
242 | | |
243 | | // Some safeguards to protect CCs we do not want to be split. |
244 | | // These are particularly useful when the word wasn't eliminated earlier |
245 | | // because xheight information was unavailable. |
246 | 0 | if (shirorekha_ylevel > height / 2) { |
247 | | // Shirorekha shouldn't be in the bottom half of the word. |
248 | 0 | if (devanagari_split_debuglevel > 0) { |
249 | 0 | tprintf("Skipping splitting CC at (%d, %d): shirorekha in lower half..\n", word_left, |
250 | 0 | word_top); |
251 | 0 | } |
252 | 0 | return; |
253 | 0 | } |
254 | 0 | if (stroke_width > height / 3) { |
255 | | // Even the boldest of fonts shouldn't do this. |
256 | 0 | if (devanagari_split_debuglevel > 0) { |
257 | 0 | tprintf("Skipping splitting CC at (%d, %d): stroke width too huge..\n", word_left, word_top); |
258 | 0 | } |
259 | 0 | return; |
260 | 0 | } |
261 | | |
262 | | // Clear the ascender and descender regions of the word. |
263 | | // Obtain a vertical projection histogram for the resulting image. |
264 | 0 | Box *box_to_clear = boxCreate(0, shirorekha_top - stroke_width / 3, width, 5 * stroke_width / 3); |
265 | 0 | Image word_in_xheight = pix.copy(); |
266 | 0 | pixClearInRect(word_in_xheight, box_to_clear); |
267 | | // Also clear any pixels which are below shirorekha_bottom + some leeway. |
268 | | // The leeway is set to xheight if the information is available, else it is a |
269 | | // multiplier applied to the stroke width. |
270 | 0 | int leeway_to_keep = stroke_width * 3; |
271 | 0 | if (xheight != kUnspecifiedXheight) { |
272 | | // This is because the xheight-region typically includes the shiro-rekha |
273 | | // inside it, i.e., the top of the xheight range corresponds to the top of |
274 | | // shiro-rekha. |
275 | 0 | leeway_to_keep = xheight - stroke_width; |
276 | 0 | } |
277 | 0 | auto y = shirorekha_bottom + leeway_to_keep; |
278 | 0 | boxSetGeometry(box_to_clear, -1, y, -1, height - y); |
279 | 0 | pixClearInRect(word_in_xheight, box_to_clear); |
280 | 0 | boxDestroy(&box_to_clear); |
281 | |
|
282 | 0 | PixelHistogram vert_hist; |
283 | 0 | vert_hist.ConstructVerticalCountHist(word_in_xheight); |
284 | 0 | word_in_xheight.destroy(); |
285 | | |
286 | | // If the number of black pixel in any column of the image is less than a |
287 | | // fraction of the stroke width, treat it as noise / a stray mark. Perform |
288 | | // these changes inside the vert_hist data itself, as that is used later on as |
289 | | // a bit vector for the final split decision at every column. |
290 | 0 | for (int i = 0; i < width; ++i) { |
291 | 0 | if (vert_hist.hist()[i] <= stroke_width / 4) { |
292 | 0 | vert_hist.hist()[i] = 0; |
293 | 0 | } else { |
294 | 0 | vert_hist.hist()[i] = 1; |
295 | 0 | } |
296 | 0 | } |
297 | | // In order to split the line at any point, we make sure that the width of the |
298 | | // gap is at least half the stroke width. |
299 | 0 | int i = 0; |
300 | 0 | int cur_component_width = 0; |
301 | 0 | while (i < width) { |
302 | 0 | if (!vert_hist.hist()[i]) { |
303 | 0 | int j = 0; |
304 | 0 | while (i + j < width && !vert_hist.hist()[i + j]) { |
305 | 0 | ++j; |
306 | 0 | } |
307 | 0 | if (j >= stroke_width / 2 && cur_component_width >= stroke_width / 2) { |
308 | | // Perform a shiro-rekha split. The intervening region lies from i to |
309 | | // i+j-1. |
310 | | // A minimal single-pixel split makes the estimation of intra- and |
311 | | // inter-word spacing easier during page layout analysis, |
312 | | // whereas a maximal split may be needed for OCR, depending on |
313 | | // how the engine was trained. |
314 | 0 | bool minimal_split = (split_strategy == MINIMAL_SPLIT); |
315 | 0 | int split_width = minimal_split ? 1 : j; |
316 | 0 | int split_left = minimal_split ? i + (j / 2) - (split_width / 2) : i; |
317 | 0 | if (!minimal_split || (i != 0 && i + j != width)) { |
318 | 0 | Box *box_to_clear = |
319 | 0 | boxCreate(word_left + split_left, word_top + shirorekha_top - stroke_width / 3, |
320 | 0 | split_width, 5 * stroke_width / 3); |
321 | 0 | if (box_to_clear) { |
322 | 0 | boxaAddBox(regions_to_clear, box_to_clear, L_CLONE); |
323 | | // Mark this in the debug image if needed. |
324 | 0 | if (devanagari_split_debugimage) { |
325 | 0 | pixRenderBoxArb(debug_image_, box_to_clear, 1, 128, 255, 128); |
326 | 0 | } |
327 | 0 | boxDestroy(&box_to_clear); |
328 | 0 | cur_component_width = 0; |
329 | 0 | } |
330 | 0 | } |
331 | 0 | } |
332 | 0 | i += j; |
333 | 0 | } else { |
334 | 0 | ++i; |
335 | 0 | ++cur_component_width; |
336 | 0 | } |
337 | 0 | } |
338 | 0 | } |
339 | | |
340 | | // Refreshes the words in the segmentation block list by using blobs in the |
341 | | // input block list. |
342 | | // The segmentation block list must be set. |
343 | 0 | void ShiroRekhaSplitter::RefreshSegmentationWithNewBlobs(C_BLOB_LIST *new_blobs) { |
344 | | // The segmentation block list must have been specified. |
345 | 0 | ASSERT_HOST(segmentation_block_list_); |
346 | 0 | if (devanagari_split_debuglevel > 0) { |
347 | 0 | tprintf("Before refreshing blobs:\n"); |
348 | 0 | PrintSegmentationStats(segmentation_block_list_); |
349 | 0 | tprintf("New Blobs found: %d\n", new_blobs->length()); |
350 | 0 | } |
351 | |
|
352 | 0 | C_BLOB_LIST not_found_blobs; |
353 | 0 | RefreshWordBlobsFromNewBlobs( |
354 | 0 | segmentation_block_list_, new_blobs, |
355 | 0 | ((devanagari_split_debugimage && debug_image_) ? ¬_found_blobs : nullptr)); |
356 | |
|
357 | 0 | if (devanagari_split_debuglevel > 0) { |
358 | 0 | tprintf("After refreshing blobs:\n"); |
359 | 0 | PrintSegmentationStats(segmentation_block_list_); |
360 | 0 | } |
361 | 0 | if (devanagari_split_debugimage && debug_image_) { |
362 | | // Plot out the original blobs for which no match was found in the new |
363 | | // all_blobs list. |
364 | 0 | C_BLOB_IT not_found_it(¬_found_blobs); |
365 | 0 | for (not_found_it.mark_cycle_pt(); !not_found_it.cycled_list(); not_found_it.forward()) { |
366 | 0 | C_BLOB *not_found = not_found_it.data(); |
367 | 0 | TBOX not_found_box = not_found->bounding_box(); |
368 | 0 | Box *box_to_plot = GetBoxForTBOX(not_found_box); |
369 | 0 | pixRenderBoxArb(debug_image_, box_to_plot, 1, 255, 0, 255); |
370 | 0 | boxDestroy(&box_to_plot); |
371 | 0 | } |
372 | | |
373 | | // Plot out the blobs unused from all blobs. |
374 | 0 | C_BLOB_IT all_blobs_it(new_blobs); |
375 | 0 | for (all_blobs_it.mark_cycle_pt(); !all_blobs_it.cycled_list(); all_blobs_it.forward()) { |
376 | 0 | C_BLOB *a_blob = all_blobs_it.data(); |
377 | 0 | Box *box_to_plot = GetBoxForTBOX(a_blob->bounding_box()); |
378 | 0 | pixRenderBoxArb(debug_image_, box_to_plot, 3, 0, 127, 0); |
379 | 0 | boxDestroy(&box_to_plot); |
380 | 0 | } |
381 | 0 | } |
382 | 0 | } |
383 | | |
384 | | // Returns a new box object for the corresponding TBOX, based on the original |
385 | | // image's coordinate system. |
386 | 0 | Box *ShiroRekhaSplitter::GetBoxForTBOX(const TBOX &tbox) const { |
387 | 0 | return boxCreate(tbox.left(), pixGetHeight(orig_pix_) - tbox.top() - 1, tbox.width(), |
388 | 0 | tbox.height()); |
389 | 0 | } |
390 | | |
391 | | // This method returns the computed mode-height of blobs in the pix. |
392 | | // It also prunes very small blobs from calculation. |
393 | 0 | int ShiroRekhaSplitter::GetModeHeight(Image pix) { |
394 | 0 | Boxa *boxa = pixConnComp(pix, nullptr, 8); |
395 | 0 | STATS heights(0, pixGetHeight(pix) - 1); |
396 | 0 | heights.clear(); |
397 | 0 | for (int i = 0; i < boxaGetCount(boxa); ++i) { |
398 | 0 | Box *box = boxaGetBox(boxa, i, L_CLONE); |
399 | 0 | l_int32 x, y, w, h; |
400 | 0 | boxGetGeometry(box, &x, &y, &w, &h); |
401 | 0 | if (h >= 3 || w >= 3) { |
402 | 0 | heights.add(h, 1); |
403 | 0 | } |
404 | 0 | boxDestroy(&box); |
405 | 0 | } |
406 | 0 | boxaDestroy(&boxa); |
407 | 0 | return heights.mode(); |
408 | 0 | } |
409 | | |
410 | | // This method returns y-extents of the shiro-rekha computed from the input |
411 | | // word image. |
412 | | void ShiroRekhaSplitter::GetShiroRekhaYExtents(Image word_pix, int *shirorekha_top, |
413 | 0 | int *shirorekha_bottom, int *shirorekha_ylevel) { |
414 | | // Compute a histogram from projecting the word on a vertical line. |
415 | 0 | PixelHistogram hist_horiz; |
416 | 0 | hist_horiz.ConstructHorizontalCountHist(word_pix); |
417 | | // Get the ylevel where the top-line exists. This is basically the global |
418 | | // maxima in the horizontal histogram. |
419 | 0 | int topline_onpixel_count = 0; |
420 | 0 | int topline_ylevel = hist_horiz.GetHistogramMaximum(&topline_onpixel_count); |
421 | | |
422 | | // Get the upper and lower extents of the shiro rekha. |
423 | 0 | int thresh = (topline_onpixel_count * 70) / 100; |
424 | 0 | int ulimit = topline_ylevel; |
425 | 0 | int llimit = topline_ylevel; |
426 | 0 | while (ulimit > 0 && hist_horiz.hist()[ulimit] >= thresh) { |
427 | 0 | --ulimit; |
428 | 0 | } |
429 | 0 | while (llimit < pixGetHeight(word_pix) && hist_horiz.hist()[llimit] >= thresh) { |
430 | 0 | ++llimit; |
431 | 0 | } |
432 | |
|
433 | 0 | if (shirorekha_top) { |
434 | 0 | *shirorekha_top = ulimit; |
435 | 0 | } |
436 | 0 | if (shirorekha_bottom) { |
437 | 0 | *shirorekha_bottom = llimit; |
438 | 0 | } |
439 | 0 | if (shirorekha_ylevel) { |
440 | 0 | *shirorekha_ylevel = topline_ylevel; |
441 | 0 | } |
442 | 0 | } |
443 | | |
444 | | // This method returns the global-maxima for the histogram. The frequency of |
445 | | // the global maxima is returned in count, if specified. |
446 | 0 | int PixelHistogram::GetHistogramMaximum(int *count) const { |
447 | 0 | int best_value = 0; |
448 | 0 | for (int i = 0; i < length_; ++i) { |
449 | 0 | if (hist_[i] > hist_[best_value]) { |
450 | 0 | best_value = i; |
451 | 0 | } |
452 | 0 | } |
453 | 0 | if (count) { |
454 | 0 | *count = hist_[best_value]; |
455 | 0 | } |
456 | 0 | return best_value; |
457 | 0 | } |
458 | | |
459 | | // Methods to construct histograms from images. |
460 | 0 | void PixelHistogram::ConstructVerticalCountHist(Image pix) { |
461 | 0 | Clear(); |
462 | 0 | int width = pixGetWidth(pix); |
463 | 0 | int height = pixGetHeight(pix); |
464 | 0 | hist_ = new int[width]; |
465 | 0 | length_ = width; |
466 | 0 | int wpl = pixGetWpl(pix); |
467 | 0 | l_uint32 *data = pixGetData(pix); |
468 | 0 | for (int i = 0; i < width; ++i) { |
469 | 0 | hist_[i] = 0; |
470 | 0 | } |
471 | 0 | for (int i = 0; i < height; ++i) { |
472 | 0 | l_uint32 *line = data + i * wpl; |
473 | 0 | for (int j = 0; j < width; ++j) { |
474 | 0 | if (GET_DATA_BIT(line, j)) { |
475 | 0 | ++(hist_[j]); |
476 | 0 | } |
477 | 0 | } |
478 | 0 | } |
479 | 0 | } |
480 | | |
481 | 0 | void PixelHistogram::ConstructHorizontalCountHist(Image pix) { |
482 | 0 | Clear(); |
483 | 0 | Numa *counts = pixCountPixelsByRow(pix, nullptr); |
484 | 0 | length_ = numaGetCount(counts); |
485 | 0 | hist_ = new int[length_]; |
486 | 0 | for (int i = 0; i < length_; ++i) { |
487 | 0 | l_int32 val = 0; |
488 | 0 | numaGetIValue(counts, i, &val); |
489 | 0 | hist_[i] = val; |
490 | 0 | } |
491 | 0 | numaDestroy(&counts); |
492 | 0 | } |
493 | | |
494 | | } // namespace tesseract. |