/src/tesseract/src/ccstruct/ocrblock.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /********************************************************************** |
2 | | * File: ocrblock.cpp (Formerly block.c) |
3 | | * Description: BLOCK member functions and iterator functions. |
4 | | * Author: Ray Smith |
5 | | * |
6 | | * (C) Copyright 1991, Hewlett-Packard Ltd. |
7 | | ** Licensed under the Apache License, Version 2.0 (the "License"); |
8 | | ** you may not use this file except in compliance with the License. |
9 | | ** You may obtain a copy of the License at |
10 | | ** http://www.apache.org/licenses/LICENSE-2.0 |
11 | | ** Unless required by applicable law or agreed to in writing, software |
12 | | ** distributed under the License is distributed on an "AS IS" BASIS, |
13 | | ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | ** See the License for the specific language governing permissions and |
15 | | ** limitations under the License. |
16 | | * |
17 | | **********************************************************************/ |
18 | | |
19 | | #include "ocrblock.h" |
20 | | |
21 | | #include "stepblob.h" |
22 | | #include "tprintf.h" |
23 | | |
24 | | #include <cstdlib> |
25 | | #include <memory> // std::unique_ptr |
26 | | |
27 | | namespace tesseract { |
28 | | |
29 | | /** |
30 | | * BLOCK::BLOCK |
31 | | * |
32 | | * Constructor for a simple rectangular block. |
33 | | */ |
34 | | BLOCK::BLOCK(const char *name, ///< filename |
35 | | bool prop, ///< proportional |
36 | | int16_t kern, ///< kerning |
37 | | int16_t space, ///< spacing |
38 | | TDimension xmin, ///< bottom left |
39 | | TDimension ymin, |
40 | | TDimension xmax, ///< top right |
41 | | TDimension ymax) |
42 | 17.2k | : pdblk(xmin, ymin, xmax, ymax) |
43 | 17.2k | , filename(name) |
44 | 17.2k | , re_rotation_(1.0f, 0.0f) |
45 | 17.2k | , classify_rotation_(1.0f, 0.0f) |
46 | 17.2k | , skew_(1.0f, 0.0f) { |
47 | 17.2k | ICOORDELT_IT left_it = &pdblk.leftside; |
48 | 17.2k | ICOORDELT_IT right_it = &pdblk.rightside; |
49 | | |
50 | 17.2k | proportional = prop; |
51 | 17.2k | kerning = kern; |
52 | 17.2k | spacing = space; |
53 | 17.2k | font_class = -1; // not assigned |
54 | 17.2k | cell_over_xheight_ = 2.0f; |
55 | 17.2k | pdblk.hand_poly = nullptr; |
56 | 17.2k | left_it.set_to_list(&pdblk.leftside); |
57 | 17.2k | right_it.set_to_list(&pdblk.rightside); |
58 | | // make default box |
59 | 17.2k | left_it.add_to_end(new ICOORDELT(xmin, ymin)); |
60 | 17.2k | left_it.add_to_end(new ICOORDELT(xmin, ymax)); |
61 | 17.2k | right_it.add_to_end(new ICOORDELT(xmax, ymin)); |
62 | 17.2k | right_it.add_to_end(new ICOORDELT(xmax, ymax)); |
63 | 17.2k | } |
64 | | |
65 | | /** |
66 | | * decreasing_top_order |
67 | | * |
68 | | * Sort Comparator: Return <0 if row1 top < row2 top |
69 | | */ |
70 | | |
71 | 0 | static int decreasing_top_order(const ROW *row1, const ROW *row2) { |
72 | 0 | return row2->bounding_box().top() - |
73 | 0 | row1->bounding_box().top(); |
74 | 0 | } |
75 | | |
76 | | /** |
77 | | * BLOCK::rotate |
78 | | * |
79 | | * Rotate the polygon by the given rotation and recompute the bounding_box. |
80 | | */ |
81 | 0 | void BLOCK::rotate(const FCOORD &rotation) { |
82 | 0 | pdblk.poly_block()->rotate(rotation); |
83 | 0 | pdblk.box = *pdblk.poly_block()->bounding_box(); |
84 | 0 | } |
85 | | |
86 | | // Returns the bounding box including the desired combination of upper and |
87 | | // lower noise/diacritic elements. |
88 | 0 | TBOX BLOCK::restricted_bounding_box(bool upper_dots, bool lower_dots) const { |
89 | 0 | TBOX box; |
90 | | // This is a read-only iteration of the rows in the block. |
91 | 0 | ROW_IT it(const_cast<ROW_LIST *>(&rows)); |
92 | 0 | for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { |
93 | 0 | box += it.data()->restricted_bounding_box(upper_dots, lower_dots); |
94 | 0 | } |
95 | 0 | return box; |
96 | 0 | } |
97 | | |
98 | | /** |
99 | | * BLOCK::reflect_polygon_in_y_axis |
100 | | * |
101 | | * Reflects the polygon in the y-axis and recompute the bounding_box. |
102 | | * Does nothing to any contained rows/words/blobs etc. |
103 | | */ |
104 | 0 | void BLOCK::reflect_polygon_in_y_axis() { |
105 | 0 | pdblk.poly_block()->reflect_in_y_axis(); |
106 | 0 | pdblk.box = *pdblk.poly_block()->bounding_box(); |
107 | 0 | } |
108 | | |
109 | | /** |
110 | | * BLOCK::sort_rows |
111 | | * |
112 | | * Order rows so that they are in order of decreasing Y coordinate |
113 | | */ |
114 | | |
115 | 0 | void BLOCK::sort_rows() { // order on "top" |
116 | 0 | ROW_IT row_it(&rows); |
117 | |
|
118 | 0 | row_it.sort(decreasing_top_order); |
119 | 0 | } |
120 | | |
121 | | /** |
122 | | * BLOCK::compress |
123 | | * |
124 | | * Delete space between the rows. (And maybe one day, compress the rows) |
125 | | * Fill space of block from top down, left aligning rows. |
126 | | */ |
127 | | |
128 | 0 | void BLOCK::compress() { // squash it up |
129 | 0 | #define ROW_SPACING 5 |
130 | |
|
131 | 0 | ROW_IT row_it(&rows); |
132 | 0 | ROW *row; |
133 | 0 | ICOORD row_spacing(0, ROW_SPACING); |
134 | |
|
135 | 0 | ICOORDELT_IT icoordelt_it; |
136 | |
|
137 | 0 | sort_rows(); |
138 | |
|
139 | 0 | pdblk.box = TBOX(pdblk.box.topleft(), pdblk.box.topleft()); |
140 | 0 | pdblk.box.move_bottom_edge(ROW_SPACING); |
141 | 0 | for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { |
142 | 0 | row = row_it.data(); |
143 | 0 | row->move(pdblk.box.botleft() - row_spacing - row->bounding_box().topleft()); |
144 | 0 | pdblk.box += row->bounding_box(); |
145 | 0 | } |
146 | |
|
147 | 0 | pdblk.leftside.clear(); |
148 | 0 | icoordelt_it.set_to_list(&pdblk.leftside); |
149 | 0 | icoordelt_it.add_to_end(new ICOORDELT(pdblk.box.left(), pdblk.box.bottom())); |
150 | 0 | icoordelt_it.add_to_end(new ICOORDELT(pdblk.box.left(), pdblk.box.top())); |
151 | 0 | pdblk.rightside.clear(); |
152 | 0 | icoordelt_it.set_to_list(&pdblk.rightside); |
153 | 0 | icoordelt_it.add_to_end(new ICOORDELT(pdblk.box.right(), pdblk.box.bottom())); |
154 | 0 | icoordelt_it.add_to_end(new ICOORDELT(pdblk.box.right(), pdblk.box.top())); |
155 | 0 | } |
156 | | |
157 | | /** |
158 | | * BLOCK::check_pitch |
159 | | * |
160 | | * Check whether the block is fixed or prop, set the flag, and set |
161 | | * the pitch if it is fixed. |
162 | | */ |
163 | | |
164 | 16.7k | void BLOCK::check_pitch() { // check prop |
165 | | // tprintf("Missing FFT fixed pitch stuff!\n"); |
166 | 16.7k | pitch = -1; |
167 | 16.7k | } |
168 | | |
169 | | /** |
170 | | * BLOCK::compress |
171 | | * |
172 | | * Compress and move in a single operation. |
173 | | */ |
174 | | |
175 | | void BLOCK::compress( // squash it up |
176 | | const ICOORD vec // and move |
177 | 0 | ) { |
178 | 0 | pdblk.box.move(vec); |
179 | 0 | compress(); |
180 | 0 | } |
181 | | |
182 | | /** |
183 | | * BLOCK::print |
184 | | * |
185 | | * Print the info on a block |
186 | | */ |
187 | | |
188 | | void BLOCK::print( // print list of sides |
189 | | FILE *, ///< file to print on |
190 | | bool dump ///< print full detail |
191 | 0 | ) { |
192 | 0 | ICOORDELT_IT it = &pdblk.leftside; // iterator |
193 | |
|
194 | 0 | pdblk.box.print(); |
195 | 0 | tprintf("Proportional= %s\n", proportional ? "TRUE" : "FALSE"); |
196 | 0 | tprintf("Kerning= %d\n", kerning); |
197 | 0 | tprintf("Spacing= %d\n", spacing); |
198 | 0 | tprintf("Fixed_pitch=%d\n", pitch); |
199 | 0 | tprintf("Filename= %s\n", filename.c_str()); |
200 | |
|
201 | 0 | if (dump) { |
202 | 0 | tprintf("Left side coords are:\n"); |
203 | 0 | for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { |
204 | 0 | tprintf("(%d,%d) ", it.data()->x(), it.data()->y()); |
205 | 0 | } |
206 | 0 | tprintf("\n"); |
207 | 0 | tprintf("Right side coords are:\n"); |
208 | 0 | it.set_to_list(&pdblk.rightside); |
209 | 0 | for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { |
210 | 0 | tprintf("(%d,%d) ", it.data()->x(), it.data()->y()); |
211 | 0 | } |
212 | 0 | tprintf("\n"); |
213 | 0 | } |
214 | 0 | } |
215 | | |
216 | | /** |
217 | | * BLOCK::operator= |
218 | | * |
219 | | * Assignment - duplicate the block structure, but with an EMPTY row list. |
220 | | */ |
221 | | |
222 | | BLOCK &BLOCK::operator=( // assignment |
223 | | const BLOCK &source // from this |
224 | 0 | ) { |
225 | 0 | this->ELIST<BLOCK>::LINK::operator=(source); |
226 | 0 | pdblk = source.pdblk; |
227 | 0 | proportional = source.proportional; |
228 | 0 | kerning = source.kerning; |
229 | 0 | spacing = source.spacing; |
230 | 0 | filename = source.filename; // STRINGs assign ok |
231 | 0 | if (!rows.empty()) { |
232 | 0 | rows.clear(); |
233 | 0 | } |
234 | 0 | re_rotation_ = source.re_rotation_; |
235 | 0 | classify_rotation_ = source.classify_rotation_; |
236 | 0 | skew_ = source.skew_; |
237 | 0 | return *this; |
238 | 0 | } |
239 | | |
240 | | // This function is for finding the approximate (horizontal) distance from |
241 | | // the x-coordinate of the left edge of a symbol to the left edge of the |
242 | | // text block which contains it. We are passed: |
243 | | // segments - output of PB_LINE_IT::get_line() which contains x-coordinate |
244 | | // intervals for the scan line going through the symbol's y-coordinate. |
245 | | // Each element of segments is of the form (x()=start_x, y()=length). |
246 | | // x - the x coordinate of the symbol we're interested in. |
247 | | // margin - return value, the distance from x,y to the left margin of the |
248 | | // block containing it. |
249 | | // If all segments were to the right of x, we return false and 0. |
250 | 145k | static bool LeftMargin(ICOORDELT_LIST *segments, int x, int *margin) { |
251 | 145k | bool found = false; |
252 | 145k | *margin = 0; |
253 | 145k | if (segments->empty()) { |
254 | 7.90k | return found; |
255 | 7.90k | } |
256 | 137k | ICOORDELT_IT seg_it(segments); |
257 | 275k | for (seg_it.mark_cycle_pt(); !seg_it.cycled_list(); seg_it.forward()) { |
258 | 137k | int cur_margin = x - seg_it.data()->x(); |
259 | 137k | if (cur_margin >= 0) { |
260 | 137k | if (!found) { |
261 | 137k | *margin = cur_margin; |
262 | 137k | } else if (cur_margin < *margin) { |
263 | 0 | *margin = cur_margin; |
264 | 0 | } |
265 | 137k | found = true; |
266 | 137k | } |
267 | 137k | } |
268 | 137k | return found; |
269 | 145k | } |
270 | | |
271 | | // This function is for finding the approximate (horizontal) distance from |
272 | | // the x-coordinate of the right edge of a symbol to the right edge of the |
273 | | // text block which contains it. We are passed: |
274 | | // segments - output of PB_LINE_IT::get_line() which contains x-coordinate |
275 | | // intervals for the scan line going through the symbol's y-coordinate. |
276 | | // Each element of segments is of the form (x()=start_x, y()=length). |
277 | | // x - the x coordinate of the symbol we're interested in. |
278 | | // margin - return value, the distance from x,y to the right margin of the |
279 | | // block containing it. |
280 | | // If all segments were to the left of x, we return false and 0. |
281 | 145k | static bool RightMargin(ICOORDELT_LIST *segments, int x, int *margin) { |
282 | 145k | bool found = false; |
283 | 145k | *margin = 0; |
284 | 145k | if (segments->empty()) { |
285 | 9.17k | return found; |
286 | 9.17k | } |
287 | 136k | ICOORDELT_IT seg_it(segments); |
288 | 272k | for (seg_it.mark_cycle_pt(); !seg_it.cycled_list(); seg_it.forward()) { |
289 | 136k | int cur_margin = seg_it.data()->x() + seg_it.data()->y() - x; |
290 | 136k | if (cur_margin >= 0) { |
291 | 136k | if (!found) { |
292 | 136k | *margin = cur_margin; |
293 | 136k | } else if (cur_margin < *margin) { |
294 | 0 | *margin = cur_margin; |
295 | 0 | } |
296 | 136k | found = true; |
297 | 136k | } |
298 | 136k | } |
299 | 136k | return found; |
300 | 145k | } |
301 | | |
302 | | // Compute the distance from the left and right ends of each row to the |
303 | | // left and right edges of the block's polyblock. Illustration: |
304 | | // ____________________________ _______________________ |
305 | | // | Howdy neighbor! | |rectangular blocks look| |
306 | | // | This text is written to| |more like stacked pizza| |
307 | | // |illustrate how useful poly- |boxes. | |
308 | | // |blobs are in ----------- ------ The polyblob| |
309 | | // |dealing with| _________ |for a BLOCK rec-| |
310 | | // |harder layout| /===========\ |ords the possibly| |
311 | | // |issues. | | _ _ | |skewed pseudo-| |
312 | | // | You see this| | |_| \|_| | |rectangular | |
313 | | // |text is flowed| | } | |boundary that| |
314 | | // |around a mid-| \ ____ | |forms the ideal-| |
315 | | // |column portrait._____ \ / __|ized text margin| |
316 | | // | Polyblobs exist| \ / |from which we should| |
317 | | // |to account for insets| | | |measure paragraph| |
318 | | // |which make otherwise| ----- |indentation. | |
319 | | // ----------------------- ---------------------- |
320 | | // |
321 | | // If we identify a drop-cap, we measure the left margin for the lines |
322 | | // below the first line relative to one space past the drop cap. The |
323 | | // first line's margin and those past the drop cap area are measured |
324 | | // relative to the enclosing polyblock. |
325 | | // |
326 | | // TODO(rays): Before this will work well, we'll need to adjust the |
327 | | // polyblob tighter around the text near images, as in: |
328 | | // UNLV_AUTO:mag.3G0 page 2 |
329 | | // UNLV_AUTO:mag.3G4 page 16 |
330 | 15.4k | void BLOCK::compute_row_margins() { |
331 | 15.4k | if (row_list()->empty() || row_list()->singleton()) { |
332 | 2.20k | return; |
333 | 2.20k | } |
334 | | |
335 | | // If Layout analysis was not called, default to this. |
336 | 13.2k | POLY_BLOCK rect_block(pdblk.bounding_box(), PT_FLOWING_TEXT); |
337 | 13.2k | POLY_BLOCK *pblock = &rect_block; |
338 | 13.2k | if (pdblk.poly_block() != nullptr) { |
339 | 0 | pblock = pdblk.poly_block(); |
340 | 0 | } |
341 | | |
342 | | // Step One: Determine if there is a drop-cap. |
343 | | // TODO(eger): Fix up drop cap code for RTL languages. |
344 | 13.2k | ROW_IT r_it(row_list()); |
345 | 13.2k | ROW *first_row = r_it.data(); |
346 | 13.2k | ROW *second_row = r_it.data_relative(1); |
347 | | |
348 | | // initialize the bottom of a fictitious drop cap far above the first line. |
349 | 13.2k | int drop_cap_bottom = first_row->bounding_box().top() + first_row->bounding_box().height(); |
350 | 13.2k | int drop_cap_right = first_row->bounding_box().left(); |
351 | 13.2k | int mid_second_line = second_row->bounding_box().top() - second_row->bounding_box().height() / 2; |
352 | 13.2k | WERD_IT werd_it(r_it.data()->word_list()); // words of line one |
353 | 13.2k | if (!werd_it.empty()) { |
354 | 13.2k | C_BLOB_IT cblob_it(werd_it.data()->cblob_list()); |
355 | 52.7k | for (cblob_it.mark_cycle_pt(); !cblob_it.cycled_list(); cblob_it.forward()) { |
356 | 39.5k | TBOX bbox = cblob_it.data()->bounding_box(); |
357 | 39.5k | if (bbox.bottom() <= mid_second_line) { |
358 | | // we found a real drop cap |
359 | 4.31k | first_row->set_has_drop_cap(true); |
360 | 4.31k | if (drop_cap_bottom > bbox.bottom()) { |
361 | 2.46k | drop_cap_bottom = bbox.bottom(); |
362 | 2.46k | } |
363 | 4.31k | if (drop_cap_right < bbox.right()) { |
364 | 4.29k | drop_cap_right = bbox.right(); |
365 | 4.29k | } |
366 | 4.31k | } |
367 | 39.5k | } |
368 | 13.2k | } |
369 | | |
370 | | // Step Two: Calculate the margin from the text of each row to the block |
371 | | // (or drop-cap) boundaries. |
372 | 13.2k | PB_LINE_IT lines(pblock); |
373 | 13.2k | r_it.set_to_list(row_list()); |
374 | 158k | for (r_it.mark_cycle_pt(); !r_it.cycled_list(); r_it.forward()) { |
375 | 145k | ROW *row = r_it.data(); |
376 | 145k | TBOX row_box = row->bounding_box(); |
377 | 145k | int left_y = row->base_line(row_box.left()) + row->x_height(); |
378 | 145k | int left_margin; |
379 | 145k | const std::unique_ptr</*non-const*/ ICOORDELT_LIST> segments_left(lines.get_line(left_y)); |
380 | 145k | LeftMargin(segments_left.get(), row_box.left(), &left_margin); |
381 | | |
382 | 145k | if (row_box.top() >= drop_cap_bottom) { |
383 | 10.6k | int drop_cap_distance = row_box.left() - row->space() - drop_cap_right; |
384 | 10.6k | if (drop_cap_distance < 0) { |
385 | 8.53k | drop_cap_distance = 0; |
386 | 8.53k | } |
387 | 10.6k | if (drop_cap_distance < left_margin) { |
388 | 4.43k | left_margin = drop_cap_distance; |
389 | 4.43k | } |
390 | 10.6k | } |
391 | | |
392 | 145k | int right_y = row->base_line(row_box.right()) + row->x_height(); |
393 | 145k | int right_margin; |
394 | 145k | const std::unique_ptr</*non-const*/ ICOORDELT_LIST> segments_right(lines.get_line(right_y)); |
395 | 145k | RightMargin(segments_right.get(), row_box.right(), &right_margin); |
396 | 145k | row->set_lmargin(left_margin); |
397 | 145k | row->set_rmargin(right_margin); |
398 | 145k | } |
399 | 13.2k | } |
400 | | |
401 | | /********************************************************************** |
402 | | * PrintSegmentationStats |
403 | | * |
404 | | * Prints segmentation stats for the given block list. |
405 | | **********************************************************************/ |
406 | | |
407 | 0 | void PrintSegmentationStats(BLOCK_LIST *block_list) { |
408 | 0 | int num_blocks = 0; |
409 | 0 | int num_rows = 0; |
410 | 0 | int num_words = 0; |
411 | 0 | int num_blobs = 0; |
412 | 0 | BLOCK_IT block_it(block_list); |
413 | 0 | for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) { |
414 | 0 | BLOCK *block = block_it.data(); |
415 | 0 | ++num_blocks; |
416 | 0 | ROW_IT row_it(block->row_list()); |
417 | 0 | for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { |
418 | 0 | ++num_rows; |
419 | 0 | ROW *row = row_it.data(); |
420 | | // Iterate over all werds in the row. |
421 | 0 | WERD_IT werd_it(row->word_list()); |
422 | 0 | for (werd_it.mark_cycle_pt(); !werd_it.cycled_list(); werd_it.forward()) { |
423 | 0 | WERD *werd = werd_it.data(); |
424 | 0 | ++num_words; |
425 | 0 | num_blobs += werd->cblob_list()->length(); |
426 | 0 | } |
427 | 0 | } |
428 | 0 | } |
429 | 0 | tprintf("Block list stats:\nBlocks = %d\nRows = %d\nWords = %d\nBlobs = %d\n", num_blocks, |
430 | 0 | num_rows, num_words, num_blobs); |
431 | 0 | } |
432 | | |
433 | | /********************************************************************** |
434 | | * ExtractBlobsFromSegmentation |
435 | | * |
436 | | * Extracts blobs from the given block list and adds them to the output list. |
437 | | * The block list must have been created by performing a page segmentation. |
438 | | **********************************************************************/ |
439 | | |
440 | 0 | void ExtractBlobsFromSegmentation(BLOCK_LIST *blocks, C_BLOB_LIST *output_blob_list) { |
441 | 0 | C_BLOB_IT return_list_it(output_blob_list); |
442 | 0 | BLOCK_IT block_it(blocks); |
443 | 0 | for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) { |
444 | 0 | BLOCK *block = block_it.data(); |
445 | 0 | ROW_IT row_it(block->row_list()); |
446 | 0 | for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { |
447 | 0 | ROW *row = row_it.data(); |
448 | | // Iterate over all werds in the row. |
449 | 0 | WERD_IT werd_it(row->word_list()); |
450 | 0 | for (werd_it.mark_cycle_pt(); !werd_it.cycled_list(); werd_it.forward()) { |
451 | 0 | WERD *werd = werd_it.data(); |
452 | 0 | return_list_it.move_to_last(); |
453 | 0 | return_list_it.add_list_after(werd->cblob_list()); |
454 | 0 | return_list_it.move_to_last(); |
455 | 0 | return_list_it.add_list_after(werd->rej_cblob_list()); |
456 | 0 | } |
457 | 0 | } |
458 | 0 | } |
459 | 0 | } |
460 | | |
461 | | /********************************************************************** |
462 | | * RefreshWordBlobsFromNewBlobs() |
463 | | * |
464 | | * Refreshes the words in the block_list by using blobs in the |
465 | | * new_blobs list. |
466 | | * Block list must have word segmentation in it. |
467 | | * It consumes the blobs provided in the new_blobs list. The blobs leftover in |
468 | | * the new_blobs list after the call weren't matched to any blobs of the words |
469 | | * in block list. |
470 | | * The output not_found_blobs is a list of blobs from the original segmentation |
471 | | * in the block_list for which no corresponding new blobs were found. |
472 | | **********************************************************************/ |
473 | | |
474 | | void RefreshWordBlobsFromNewBlobs(BLOCK_LIST *block_list, C_BLOB_LIST *new_blobs, |
475 | 0 | C_BLOB_LIST *not_found_blobs) { |
476 | | // Now iterate over all the blobs in the segmentation_block_list_, and just |
477 | | // replace the corresponding c-blobs inside the werds. |
478 | 0 | BLOCK_IT block_it(block_list); |
479 | 0 | for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) { |
480 | 0 | BLOCK *block = block_it.data(); |
481 | 0 | if (block->pdblk.poly_block() != nullptr && !block->pdblk.poly_block()->IsText()) { |
482 | 0 | continue; // Don't touch non-text blocks. |
483 | 0 | } |
484 | | // Iterate over all rows in the block. |
485 | 0 | ROW_IT row_it(block->row_list()); |
486 | 0 | for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { |
487 | 0 | ROW *row = row_it.data(); |
488 | | // Iterate over all werds in the row. |
489 | 0 | WERD_IT werd_it(row->word_list()); |
490 | 0 | WERD_LIST new_words; |
491 | 0 | WERD_IT new_words_it(&new_words); |
492 | 0 | for (werd_it.mark_cycle_pt(); !werd_it.cycled_list(); werd_it.forward()) { |
493 | 0 | WERD *werd = werd_it.extract(); |
494 | 0 | WERD *new_werd = werd->ConstructWerdWithNewBlobs(new_blobs, not_found_blobs); |
495 | 0 | if (new_werd) { |
496 | | // Insert this new werd into the actual row's werd-list. Remove the |
497 | | // existing one. |
498 | 0 | new_words_it.add_after_then_move(new_werd); |
499 | 0 | delete werd; |
500 | 0 | } else { |
501 | | // Reinsert the older word back, for lack of better options. |
502 | | // This is critical since dropping the words messes up segmentation: |
503 | | // eg. 1st word in the row might otherwise have W_FUZZY_NON turned on. |
504 | 0 | new_words_it.add_after_then_move(werd); |
505 | 0 | } |
506 | 0 | } |
507 | | // Get rid of the old word list & replace it with the new one. |
508 | 0 | row->word_list()->clear(); |
509 | 0 | werd_it.move_to_first(); |
510 | 0 | werd_it.add_list_after(&new_words); |
511 | 0 | } |
512 | 0 | } |
513 | 0 | } |
514 | | |
515 | | } // namespace tesseract |