/src/tesseract/src/ccstruct/blobbox.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /********************************************************************** |
2 | | * File: blobbox.cpp (Formerly blobnbox.c) |
3 | | * Description: Code for the textord blob class. |
4 | | * Author: Ray Smith |
5 | | * |
6 | | * (C) Copyright 1992, Hewlett-Packard Ltd. |
7 | | ** Licensed under the Apache License, Version 2.0 (the "License"); |
8 | | ** you may not use this file except in compliance with the License. |
9 | | ** You may obtain a copy of the License at |
10 | | ** http://www.apache.org/licenses/LICENSE-2.0 |
11 | | ** Unless required by applicable law or agreed to in writing, software |
12 | | ** distributed under the License is distributed on an "AS IS" BASIS, |
13 | | ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | ** See the License for the specific language governing permissions and |
15 | | ** limitations under the License. |
16 | | * |
17 | | **********************************************************************/ |
18 | | |
19 | | // Include automatically generated configuration file if running autoconf. |
20 | | #ifdef HAVE_CONFIG_H |
21 | | # include "config_auto.h" |
22 | | #endif |
23 | | |
24 | | #include "blobbox.h" |
25 | | #include "blobs.h" // for TPOINT |
26 | | #include "coutln.h" // for C_OUTLINE_IT, C_OUTLINE, C_OUTLINE_LIST |
27 | | #include "environ.h" // for l_uint32 |
28 | | #include "host.h" // for NearlyEqual |
29 | | #include "points.h" // for operator+=, ICOORD::rotate |
30 | | |
31 | | #include "helpers.h" // for UpdateRange, IntCastRounded |
32 | | |
33 | | #include <allheaders.h> // for pixGetHeight, pixGetPixel |
34 | | |
35 | | #include <algorithm> // for max, min |
36 | | #include <cmath> |
37 | | #include <cstdint> // for INT32_MAX, INT16_MAX |
38 | | |
39 | 799k | #define PROJECTION_MARGIN 10 // arbitrary |
40 | | |
41 | | namespace tesseract { |
42 | | |
43 | | // Up to 30 degrees is allowed for rotations of diacritic blobs. |
44 | | const double kCosSmallAngle = 0.866; |
45 | | // Min aspect ratio for a joined word to indicate an obvious flow direction. |
46 | | const double kDefiniteAspectRatio = 2.0; |
47 | | // Multiple of short length in perimeter to make a joined word. |
48 | | const double kComplexShapePerimeterRatio = 1.5; |
49 | | // Min multiple of linesize for medium-sized blobs in ReFilterBlobs. |
50 | | const double kMinMediumSizeRatio = 0.25; |
51 | | // Max multiple of linesize for medium-sized blobs in ReFilterBlobs. |
52 | | const double kMaxMediumSizeRatio = 4.0; |
53 | | |
54 | | // Rotates the box and the underlying blob. |
55 | 0 | void BLOBNBOX::rotate(FCOORD rotation) { |
56 | 0 | cblob_ptr->rotate(rotation); |
57 | 0 | rotate_box(rotation); |
58 | 0 | compute_bounding_box(); |
59 | 0 | } |
60 | | |
61 | | // Reflect the box in the y-axis, leaving the underlying blob untouched. |
62 | 0 | void BLOBNBOX::reflect_box_in_y_axis() { |
63 | 0 | int left = -box.right(); |
64 | 0 | box.set_right(-box.left()); |
65 | 0 | box.set_left(left); |
66 | 0 | } |
67 | | |
68 | | // Rotates the box by the angle given by rotation. |
69 | | // If the blob is a diacritic, then only small rotations for skew |
70 | | // correction can be applied. |
71 | 0 | void BLOBNBOX::rotate_box(FCOORD rotation) { |
72 | 0 | if (IsDiacritic()) { |
73 | 0 | ASSERT_HOST(rotation.x() >= kCosSmallAngle); |
74 | 0 | ICOORD top_pt((box.left() + box.right()) / 2, base_char_top_); |
75 | 0 | ICOORD bottom_pt(top_pt.x(), base_char_bottom_); |
76 | 0 | top_pt.rotate(rotation); |
77 | 0 | base_char_top_ = top_pt.y(); |
78 | 0 | bottom_pt.rotate(rotation); |
79 | 0 | base_char_bottom_ = bottom_pt.y(); |
80 | 0 | box.rotate(rotation); |
81 | 0 | } else { |
82 | 0 | box.rotate(rotation); |
83 | 0 | set_diacritic_box(box); |
84 | 0 | } |
85 | 0 | } |
86 | | |
87 | | /********************************************************************** |
88 | | * BLOBNBOX::merge |
89 | | * |
90 | | * Merge this blob with the given blob, which should be after this. |
91 | | **********************************************************************/ |
92 | | void BLOBNBOX::merge( // merge blobs |
93 | | BLOBNBOX *nextblob // blob to join with |
94 | 1.20M | ) { |
95 | 1.20M | box += nextblob->box; // merge boxes |
96 | 1.20M | set_diacritic_box(box); |
97 | 1.20M | nextblob->joined = true; |
98 | 1.20M | } |
99 | | |
100 | | // Merge this with other, taking the outlines from other. |
101 | | // Other is not deleted, but left for the caller to handle. |
102 | 0 | void BLOBNBOX::really_merge(BLOBNBOX *other) { |
103 | 0 | if (other->cblob_ptr != nullptr) { |
104 | 0 | C_OUTLINE_IT ol_it(cblob_ptr->out_list()); |
105 | 0 | ol_it.add_list_after(other->cblob_ptr->out_list()); |
106 | 0 | } |
107 | 0 | compute_bounding_box(); |
108 | 0 | } |
109 | | |
110 | | /********************************************************************** |
111 | | * BLOBNBOX::chop |
112 | | * |
113 | | * Chop this blob into equal sized pieces using the x height as a guide. |
114 | | * The blob is not actually chopped. Instead, fake blobs are inserted |
115 | | * with the relevant bounding boxes. |
116 | | **********************************************************************/ |
117 | | |
118 | | void BLOBNBOX::chop( // chop blobs |
119 | | BLOBNBOX_IT *start_it, // location of this |
120 | | BLOBNBOX_IT *end_it, // iterator |
121 | | FCOORD rotation, // for landscape |
122 | | float xheight // of line |
123 | 1.67M | ) { |
124 | 1.67M | int16_t blobcount; // no of blobs |
125 | 1.67M | BLOBNBOX *newblob; // fake blob |
126 | 1.67M | BLOBNBOX *blob; // current blob |
127 | 1.67M | int16_t blobindex; // number of chop |
128 | 1.67M | int16_t leftx; // left edge of blob |
129 | 1.67M | float blobwidth; // width of each |
130 | 1.67M | float rightx; // right edge to scan |
131 | 1.67M | float ymin, ymax; // limits of new blob |
132 | 1.67M | float test_ymin, test_ymax; // limits of part blob |
133 | 1.67M | ICOORD bl, tr; // corners of box |
134 | 1.67M | BLOBNBOX_IT blob_it; // blob iterator |
135 | | |
136 | | // get no of chops |
137 | 1.67M | blobcount = static_cast<int16_t>(std::floor(box.width() / xheight)); |
138 | 1.67M | if (blobcount > 1 && cblob_ptr != nullptr) { |
139 | | // width of each |
140 | 56.7k | blobwidth = static_cast<float>(box.width() + 1) / blobcount; |
141 | 388k | for (blobindex = blobcount - 1, rightx = box.right(); blobindex >= 0; |
142 | 331k | blobindex--, rightx -= blobwidth) { |
143 | 331k | ymin = static_cast<float>(INT32_MAX); |
144 | 331k | ymax = static_cast<float>(-INT32_MAX); |
145 | 331k | blob_it = *start_it; |
146 | 3.40M | do { |
147 | 3.40M | blob = blob_it.data(); |
148 | 3.40M | find_cblob_vlimits(blob->cblob_ptr, rightx - blobwidth, rightx, |
149 | 3.40M | /*rotation, */ test_ymin, test_ymax); |
150 | 3.40M | blob_it.forward(); |
151 | 3.40M | UpdateRange(test_ymin, test_ymax, &ymin, &ymax); |
152 | 3.40M | } while (blob != end_it->data()); |
153 | 331k | if (ymin < ymax) { |
154 | 330k | leftx = static_cast<int16_t>(std::floor(rightx - blobwidth)); |
155 | 330k | if (leftx < box.left()) { |
156 | 56.7k | leftx = box.left(); // clip to real box |
157 | 56.7k | } |
158 | 330k | bl = ICOORD(leftx, static_cast<int16_t>(std::floor(ymin))); |
159 | 330k | tr = ICOORD(static_cast<int16_t>(std::ceil(rightx)), static_cast<int16_t>(std::ceil(ymax))); |
160 | 330k | if (blobindex == 0) { |
161 | 56.1k | box = TBOX(bl, tr); // change box |
162 | 274k | } else { |
163 | 274k | newblob = new BLOBNBOX; |
164 | | // box is all it has |
165 | 274k | newblob->box = TBOX(bl, tr); |
166 | | // stay on current |
167 | 274k | newblob->base_char_top_ = tr.y(); |
168 | 274k | newblob->base_char_bottom_ = bl.y(); |
169 | 274k | end_it->add_after_stay_put(newblob); |
170 | 274k | } |
171 | 330k | } |
172 | 331k | } |
173 | 56.7k | } |
174 | 1.67M | } |
175 | | |
176 | | // Returns the box gaps between this and its neighbours_ in an array |
177 | | // indexed by BlobNeighbourDir. |
178 | 0 | void BLOBNBOX::NeighbourGaps(int gaps[BND_COUNT]) const { |
179 | 0 | for (int dir = 0; dir < BND_COUNT; ++dir) { |
180 | 0 | gaps[dir] = INT16_MAX; |
181 | 0 | BLOBNBOX *neighbour = neighbours_[dir]; |
182 | 0 | if (neighbour != nullptr) { |
183 | 0 | const TBOX &n_box = neighbour->bounding_box(); |
184 | 0 | if (dir == BND_LEFT || dir == BND_RIGHT) { |
185 | 0 | gaps[dir] = box.x_gap(n_box); |
186 | 0 | } else { |
187 | 0 | gaps[dir] = box.y_gap(n_box); |
188 | 0 | } |
189 | 0 | } |
190 | 0 | } |
191 | 0 | } |
192 | | // Returns the min and max horizontal and vertical gaps (from NeighbourGaps) |
193 | | // modified so that if the max exceeds the max dimension of the blob, and |
194 | | // the min is less, the max is replaced with the min. |
195 | | // The objective is to catch cases where there is only a single neighbour |
196 | | // and avoid reporting the other gap as a ridiculously large number |
197 | 0 | void BLOBNBOX::MinMaxGapsClipped(int *h_min, int *h_max, int *v_min, int *v_max) const { |
198 | 0 | int max_dimension = std::max(box.width(), box.height()); |
199 | 0 | int gaps[BND_COUNT]; |
200 | 0 | NeighbourGaps(gaps); |
201 | 0 | *h_min = std::min(gaps[BND_LEFT], gaps[BND_RIGHT]); |
202 | 0 | *h_max = std::max(gaps[BND_LEFT], gaps[BND_RIGHT]); |
203 | 0 | if (*h_max > max_dimension && *h_min < max_dimension) { |
204 | 0 | *h_max = *h_min; |
205 | 0 | } |
206 | 0 | *v_min = std::min(gaps[BND_ABOVE], gaps[BND_BELOW]); |
207 | 0 | *v_max = std::max(gaps[BND_ABOVE], gaps[BND_BELOW]); |
208 | 0 | if (*v_max > max_dimension && *v_min < max_dimension) { |
209 | 0 | *v_max = *v_min; |
210 | 0 | } |
211 | 0 | } |
212 | | |
213 | | // Nulls out any neighbours that are DeletableNoise to remove references. |
214 | 0 | void BLOBNBOX::CleanNeighbours() { |
215 | 0 | for (int dir = 0; dir < BND_COUNT; ++dir) { |
216 | 0 | BLOBNBOX *neighbour = neighbours_[dir]; |
217 | 0 | if (neighbour != nullptr && neighbour->DeletableNoise()) { |
218 | 0 | neighbours_[dir] = nullptr; |
219 | 0 | good_stroke_neighbours_[dir] = false; |
220 | 0 | } |
221 | 0 | } |
222 | 0 | } |
223 | | |
224 | | // Returns positive if there is at least one side neighbour that has a similar |
225 | | // stroke width and is not on the other side of a rule line. |
226 | 0 | int BLOBNBOX::GoodTextBlob() const { |
227 | 0 | int score = 0; |
228 | 0 | for (int dir = 0; dir < BND_COUNT; ++dir) { |
229 | 0 | auto bnd = static_cast<BlobNeighbourDir>(dir); |
230 | 0 | if (good_stroke_neighbour(bnd)) { |
231 | 0 | ++score; |
232 | 0 | } |
233 | 0 | } |
234 | 0 | return score; |
235 | 0 | } |
236 | | |
237 | | // Returns the number of side neighbours that are of type BRT_NOISE. |
238 | 0 | int BLOBNBOX::NoisyNeighbours() const { |
239 | 0 | int count = 0; |
240 | 0 | for (int dir = 0; dir < BND_COUNT; ++dir) { |
241 | 0 | auto bnd = static_cast<BlobNeighbourDir>(dir); |
242 | 0 | BLOBNBOX *blob = neighbour(bnd); |
243 | 0 | if (blob != nullptr && blob->region_type() == BRT_NOISE) { |
244 | 0 | ++count; |
245 | 0 | } |
246 | 0 | } |
247 | 0 | return count; |
248 | 0 | } |
249 | | |
250 | | // Returns true, and sets vert_possible/horz_possible if the blob has some |
251 | | // feature that makes it individually appear to flow one way. |
252 | | // eg if it has a high aspect ratio, yet has a complex shape, such as a |
253 | | // joined word in Latin, Arabic, or Hindi, rather than being a -, I, l, 1 etc. |
254 | 0 | bool BLOBNBOX::DefiniteIndividualFlow() { |
255 | 0 | if (cblob() == nullptr) { |
256 | 0 | return false; |
257 | 0 | } |
258 | 0 | int box_perimeter = 2 * (box.height() + box.width()); |
259 | 0 | if (box.width() > box.height() * kDefiniteAspectRatio) { |
260 | | // Attempt to distinguish a wide joined word from a dash. |
261 | | // If it is a dash, then its perimeter is approximately |
262 | | // 2 * (box width + stroke width), but more if the outline is noisy, |
263 | | // so perimeter - 2*(box width + stroke width) should be close to zero. |
264 | | // A complex shape such as a joined word should have a much larger value. |
265 | 0 | int perimeter = cblob()->perimeter(); |
266 | 0 | if (vert_stroke_width() > 0 || perimeter <= 0) { |
267 | 0 | perimeter -= 2 * vert_stroke_width(); |
268 | 0 | } else { |
269 | 0 | perimeter -= 4 * cblob()->area() / perimeter; |
270 | 0 | } |
271 | 0 | perimeter -= 2 * box.width(); |
272 | | // Use a multiple of the box perimeter as a threshold. |
273 | 0 | if (perimeter > kComplexShapePerimeterRatio * box_perimeter) { |
274 | 0 | set_vert_possible(false); |
275 | 0 | set_horz_possible(true); |
276 | 0 | return true; |
277 | 0 | } |
278 | 0 | } |
279 | 0 | if (box.height() > box.width() * kDefiniteAspectRatio) { |
280 | | // As above, but for a putative vertical word vs a I/1/l. |
281 | 0 | int perimeter = cblob()->perimeter(); |
282 | 0 | if (horz_stroke_width() > 0 || perimeter <= 0) { |
283 | 0 | perimeter -= 2 * horz_stroke_width(); |
284 | 0 | } else { |
285 | 0 | perimeter -= 4 * cblob()->area() / perimeter; |
286 | 0 | } |
287 | 0 | perimeter -= 2 * box.height(); |
288 | 0 | if (perimeter > kComplexShapePerimeterRatio * box_perimeter) { |
289 | 0 | set_vert_possible(true); |
290 | 0 | set_horz_possible(false); |
291 | 0 | return true; |
292 | 0 | } |
293 | 0 | } |
294 | 0 | return false; |
295 | 0 | } |
296 | | |
297 | | // Returns true if there is no tabstop violation in merging this and other. |
298 | 0 | bool BLOBNBOX::ConfirmNoTabViolation(const BLOBNBOX &other) const { |
299 | 0 | if (box.left() < other.box.left() && box.left() < other.left_rule_) { |
300 | 0 | return false; |
301 | 0 | } |
302 | 0 | if (other.box.left() < box.left() && other.box.left() < left_rule_) { |
303 | 0 | return false; |
304 | 0 | } |
305 | 0 | if (box.right() > other.box.right() && box.right() > other.right_rule_) { |
306 | 0 | return false; |
307 | 0 | } |
308 | 0 | if (other.box.right() > box.right() && other.box.right() > right_rule_) { |
309 | 0 | return false; |
310 | 0 | } |
311 | 0 | return true; |
312 | 0 | } |
313 | | |
314 | | // Returns true if other has a similar stroke width to this. |
315 | | bool BLOBNBOX::MatchingStrokeWidth(const BLOBNBOX &other, double fractional_tolerance, |
316 | 0 | double constant_tolerance) const { |
317 | | // The perimeter-based width is used as a backup in case there is |
318 | | // no information in the blob. |
319 | 0 | double p_width = area_stroke_width(); |
320 | 0 | double n_p_width = other.area_stroke_width(); |
321 | 0 | float h_tolerance = horz_stroke_width_ * fractional_tolerance + constant_tolerance; |
322 | 0 | float v_tolerance = vert_stroke_width_ * fractional_tolerance + constant_tolerance; |
323 | 0 | double p_tolerance = p_width * fractional_tolerance + constant_tolerance; |
324 | 0 | bool h_zero = horz_stroke_width_ == 0.0f || other.horz_stroke_width_ == 0.0f; |
325 | 0 | bool v_zero = vert_stroke_width_ == 0.0f || other.vert_stroke_width_ == 0.0f; |
326 | 0 | bool h_ok = !h_zero && NearlyEqual(horz_stroke_width_, other.horz_stroke_width_, h_tolerance); |
327 | 0 | bool v_ok = !v_zero && NearlyEqual(vert_stroke_width_, other.vert_stroke_width_, v_tolerance); |
328 | 0 | bool p_ok = h_zero && v_zero && NearlyEqual(p_width, n_p_width, p_tolerance); |
329 | | // For a match, at least one of the horizontal and vertical widths |
330 | | // must match, and the other one must either match or be zero. |
331 | | // Only if both are zero will we look at the perimeter metric. |
332 | 0 | return p_ok || ((v_ok || h_ok) && (h_ok || h_zero) && (v_ok || v_zero)); |
333 | 0 | } |
334 | | |
335 | | // Returns a bounding box of the outline contained within the |
336 | | // given horizontal range. |
337 | 0 | TBOX BLOBNBOX::BoundsWithinLimits(int left, int right) { |
338 | 0 | FCOORD no_rotation(1.0f, 0.0f); |
339 | 0 | float top = box.top(); |
340 | 0 | float bottom = box.bottom(); |
341 | 0 | if (cblob_ptr != nullptr) { |
342 | 0 | find_cblob_limits(cblob_ptr, static_cast<float>(left), static_cast<float>(right), no_rotation, |
343 | 0 | bottom, top); |
344 | 0 | } |
345 | |
|
346 | 0 | if (top < bottom) { |
347 | 0 | top = box.top(); |
348 | 0 | bottom = box.bottom(); |
349 | 0 | } |
350 | 0 | FCOORD bot_left(left, bottom); |
351 | 0 | FCOORD top_right(right, top); |
352 | 0 | TBOX shrunken_box(bot_left); |
353 | 0 | TBOX shrunken_box2(top_right); |
354 | 0 | shrunken_box += shrunken_box2; |
355 | 0 | return shrunken_box; |
356 | 0 | } |
357 | | |
358 | | // Estimates and stores the baseline position based on the shape of the |
359 | | // outline. |
360 | 2.63M | void BLOBNBOX::EstimateBaselinePosition() { |
361 | 2.63M | baseline_y_ = box.bottom(); // The default. |
362 | 2.63M | if (cblob_ptr == nullptr) { |
363 | 0 | return; |
364 | 0 | } |
365 | 2.63M | baseline_y_ = cblob_ptr->EstimateBaselinePosition(); |
366 | 2.63M | } |
367 | | |
368 | | // Helper to call CleanNeighbours on all blobs on the list. |
369 | 0 | void BLOBNBOX::CleanNeighbours(BLOBNBOX_LIST *blobs) { |
370 | 0 | BLOBNBOX_IT blob_it(blobs); |
371 | 0 | for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { |
372 | 0 | blob_it.data()->CleanNeighbours(); |
373 | 0 | } |
374 | 0 | } |
375 | | |
376 | | // Helper to delete all the deletable blobs on the list. |
377 | 0 | void BLOBNBOX::DeleteNoiseBlobs(BLOBNBOX_LIST *blobs) { |
378 | 0 | BLOBNBOX_IT blob_it(blobs); |
379 | 0 | for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { |
380 | 0 | BLOBNBOX *blob = blob_it.data(); |
381 | 0 | if (blob->DeletableNoise()) { |
382 | 0 | delete blob->remove_cblob(); |
383 | 0 | delete blob_it.extract(); |
384 | 0 | } |
385 | 0 | } |
386 | 0 | } |
387 | | |
388 | | // Helper to compute edge offsets for all the blobs on the list. |
389 | | // See coutln.h for an explanation of edge offsets. |
390 | 50.0k | void BLOBNBOX::ComputeEdgeOffsets(Image thresholds, Image grey, BLOBNBOX_LIST *blobs) { |
391 | 50.0k | int grey_height = 0; |
392 | 50.0k | int thr_height = 0; |
393 | 50.0k | int scale_factor = 1; |
394 | 50.0k | if (thresholds != nullptr && grey != nullptr) { |
395 | 0 | grey_height = pixGetHeight(grey); |
396 | 0 | thr_height = pixGetHeight(thresholds); |
397 | 0 | scale_factor = IntCastRounded(static_cast<double>(grey_height) / thr_height); |
398 | 0 | } |
399 | 50.0k | BLOBNBOX_IT blob_it(blobs); |
400 | 2.98M | for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { |
401 | 2.93M | BLOBNBOX *blob = blob_it.data(); |
402 | 2.93M | if (blob->cblob() != nullptr) { |
403 | | // Get the threshold that applies to this blob. |
404 | 2.93M | l_uint32 threshold = 128; |
405 | 2.93M | if (thresholds != nullptr && grey != nullptr) { |
406 | 0 | const TBOX &box = blob->cblob()->bounding_box(); |
407 | | // Transform the coordinates if required. |
408 | 0 | TPOINT pt((box.left() + box.right()) / 2, (box.top() + box.bottom()) / 2); |
409 | 0 | pixGetPixel(thresholds, pt.x / scale_factor, thr_height - 1 - pt.y / scale_factor, |
410 | 0 | &threshold); |
411 | 0 | } |
412 | 2.93M | blob->cblob()->ComputeEdgeOffsets(threshold, grey); |
413 | 2.93M | } |
414 | 2.93M | } |
415 | 50.0k | } |
416 | | |
417 | | #ifndef GRAPHICS_DISABLED |
418 | | // Helper to draw all the blobs on the list in the given body_colour, |
419 | | // with child outlines in the child_colour. |
420 | | void BLOBNBOX::PlotBlobs(BLOBNBOX_LIST *list, ScrollView::Color body_colour, |
421 | | ScrollView::Color child_colour, ScrollView *win) { |
422 | | BLOBNBOX_IT it(list); |
423 | | for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { |
424 | | it.data()->plot(win, body_colour, child_colour); |
425 | | } |
426 | | } |
427 | | |
428 | | // Helper to draw only DeletableNoise blobs (unowned, BRT_NOISE) on the |
429 | | // given list in the given body_colour, with child outlines in the |
430 | | // child_colour. |
431 | | void BLOBNBOX::PlotNoiseBlobs(BLOBNBOX_LIST *list, ScrollView::Color body_colour, |
432 | | ScrollView::Color child_colour, ScrollView *win) { |
433 | | BLOBNBOX_IT it(list); |
434 | | for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { |
435 | | BLOBNBOX *blob = it.data(); |
436 | | if (blob->DeletableNoise()) { |
437 | | blob->plot(win, body_colour, child_colour); |
438 | | } |
439 | | } |
440 | | } |
441 | | |
442 | | ScrollView::Color BLOBNBOX::TextlineColor(BlobRegionType region_type, BlobTextFlowType flow_type) { |
443 | | switch (region_type) { |
444 | | case BRT_HLINE: |
445 | | return ScrollView::BROWN; |
446 | | case BRT_VLINE: |
447 | | return ScrollView::DARK_GREEN; |
448 | | case BRT_RECTIMAGE: |
449 | | return ScrollView::RED; |
450 | | case BRT_POLYIMAGE: |
451 | | return ScrollView::ORANGE; |
452 | | case BRT_UNKNOWN: |
453 | | return flow_type == BTFT_NONTEXT ? ScrollView::CYAN : ScrollView::WHITE; |
454 | | case BRT_VERT_TEXT: |
455 | | if (flow_type == BTFT_STRONG_CHAIN || flow_type == BTFT_TEXT_ON_IMAGE) { |
456 | | return ScrollView::GREEN; |
457 | | } |
458 | | if (flow_type == BTFT_CHAIN) { |
459 | | return ScrollView::LIME_GREEN; |
460 | | } |
461 | | return ScrollView::YELLOW; |
462 | | case BRT_TEXT: |
463 | | if (flow_type == BTFT_STRONG_CHAIN) { |
464 | | return ScrollView::BLUE; |
465 | | } |
466 | | if (flow_type == BTFT_TEXT_ON_IMAGE) { |
467 | | return ScrollView::LIGHT_BLUE; |
468 | | } |
469 | | if (flow_type == BTFT_CHAIN) { |
470 | | return ScrollView::MEDIUM_BLUE; |
471 | | } |
472 | | if (flow_type == BTFT_LEADER) { |
473 | | return ScrollView::WHEAT; |
474 | | } |
475 | | if (flow_type == BTFT_NONTEXT) { |
476 | | return ScrollView::PINK; |
477 | | } |
478 | | return ScrollView::MAGENTA; |
479 | | default: |
480 | | return ScrollView::GREY; |
481 | | } |
482 | | } |
483 | | |
484 | | // Keep in sync with BlobRegionType. |
485 | | ScrollView::Color BLOBNBOX::BoxColor() const { |
486 | | return TextlineColor(region_type_, flow_); |
487 | | } |
488 | | |
489 | | void BLOBNBOX::plot(ScrollView *window, // window to draw in |
490 | | ScrollView::Color blob_colour, // for outer bits |
491 | | ScrollView::Color child_colour) { // for holes |
492 | | if (cblob_ptr != nullptr) { |
493 | | cblob_ptr->plot(window, blob_colour, child_colour); |
494 | | } |
495 | | } |
496 | | #endif |
497 | | /********************************************************************** |
498 | | * find_cblob_limits |
499 | | * |
500 | | * Scan the outlines of the cblob to locate the y min and max |
501 | | * between the given x limits. |
502 | | **********************************************************************/ |
503 | | |
504 | | void find_cblob_limits( // get y limits |
505 | | C_BLOB *blob, // blob to search |
506 | | float leftx, // x limits |
507 | | float rightx, |
508 | | FCOORD rotation, // for landscape |
509 | | float &ymin, // output y limits |
510 | 0 | float &ymax) { |
511 | 0 | int16_t stepindex; // current point |
512 | 0 | ICOORD pos; // current coords |
513 | 0 | ICOORD vec; // rotated step |
514 | 0 | C_OUTLINE *outline; // current outline |
515 | | // outlines |
516 | 0 | C_OUTLINE_IT out_it = blob->out_list(); |
517 | |
|
518 | 0 | ymin = static_cast<float>(INT32_MAX); |
519 | 0 | ymax = static_cast<float>(-INT32_MAX); |
520 | 0 | for (out_it.mark_cycle_pt(); !out_it.cycled_list(); out_it.forward()) { |
521 | 0 | outline = out_it.data(); |
522 | 0 | pos = outline->start_pos(); // get coords |
523 | 0 | pos.rotate(rotation); |
524 | 0 | for (stepindex = 0; stepindex < outline->pathlength(); stepindex++) { |
525 | | // inside |
526 | 0 | if (pos.x() >= leftx && pos.x() <= rightx) { |
527 | 0 | UpdateRange(pos.y(), &ymin, &ymax); |
528 | 0 | } |
529 | 0 | vec = outline->step(stepindex); |
530 | 0 | vec.rotate(rotation); |
531 | 0 | pos += vec; // move to next |
532 | 0 | } |
533 | 0 | } |
534 | 0 | } |
535 | | |
536 | | /********************************************************************** |
537 | | * find_cblob_vlimits |
538 | | * |
539 | | * Scan the outlines of the cblob to locate the y min and max |
540 | | * between the given x limits. |
541 | | **********************************************************************/ |
542 | | |
543 | | void find_cblob_vlimits( // get y limits |
544 | | C_BLOB *blob, // blob to search |
545 | | float leftx, // x limits |
546 | | float rightx, |
547 | | float &ymin, // output y limits |
548 | 3.40M | float &ymax) { |
549 | 3.40M | int16_t stepindex; // current point |
550 | 3.40M | ICOORD pos; // current coords |
551 | 3.40M | ICOORD vec; // rotated step |
552 | 3.40M | C_OUTLINE *outline; // current outline |
553 | | // outlines |
554 | 3.40M | C_OUTLINE_IT out_it = blob->out_list(); |
555 | | |
556 | 3.40M | ymin = static_cast<float>(INT32_MAX); |
557 | 3.40M | ymax = static_cast<float>(-INT32_MAX); |
558 | 6.81M | for (out_it.mark_cycle_pt(); !out_it.cycled_list(); out_it.forward()) { |
559 | 3.40M | outline = out_it.data(); |
560 | 3.40M | pos = outline->start_pos(); // get coords |
561 | 303M | for (stepindex = 0; stepindex < outline->pathlength(); stepindex++) { |
562 | | // inside |
563 | 299M | if (pos.x() >= leftx && pos.x() <= rightx) { |
564 | 18.8M | UpdateRange(pos.y(), &ymin, &ymax); |
565 | 18.8M | } |
566 | 299M | vec = outline->step(stepindex); |
567 | 299M | pos += vec; // move to next |
568 | 299M | } |
569 | 3.40M | } |
570 | 3.40M | } |
571 | | |
572 | | /********************************************************************** |
573 | | * find_cblob_hlimits |
574 | | * |
575 | | * Scan the outlines of the cblob to locate the x min and max |
576 | | * between the given y limits. |
577 | | **********************************************************************/ |
578 | | |
579 | | void find_cblob_hlimits( // get x limits |
580 | | C_BLOB *blob, // blob to search |
581 | | float bottomy, // y limits |
582 | | float topy, |
583 | | float &xmin, // output x limits |
584 | 7.29M | float &xmax) { |
585 | 7.29M | int16_t stepindex; // current point |
586 | 7.29M | ICOORD pos; // current coords |
587 | 7.29M | ICOORD vec; // rotated step |
588 | 7.29M | C_OUTLINE *outline; // current outline |
589 | | // outlines |
590 | 7.29M | C_OUTLINE_IT out_it = blob->out_list(); |
591 | | |
592 | 7.29M | xmin = static_cast<float>(INT32_MAX); |
593 | 7.29M | xmax = static_cast<float>(-INT32_MAX); |
594 | 14.6M | for (out_it.mark_cycle_pt(); !out_it.cycled_list(); out_it.forward()) { |
595 | 7.30M | outline = out_it.data(); |
596 | 7.30M | pos = outline->start_pos(); // get coords |
597 | 200M | for (stepindex = 0; stepindex < outline->pathlength(); stepindex++) { |
598 | | // inside |
599 | 193M | if (pos.y() >= bottomy && pos.y() <= topy) { |
600 | 115M | UpdateRange(pos.x(), &xmin, &xmax); |
601 | 115M | } |
602 | 193M | vec = outline->step(stepindex); |
603 | 193M | pos += vec; // move to next |
604 | 193M | } |
605 | 7.30M | } |
606 | 7.29M | } |
607 | | |
608 | | /********************************************************************** |
609 | | * crotate_cblob |
610 | | * |
611 | | * Rotate the copy by the given vector and return a C_BLOB. |
612 | | **********************************************************************/ |
613 | | |
614 | | C_BLOB *crotate_cblob( // rotate it |
615 | | C_BLOB *blob, // blob to search |
616 | | FCOORD rotation // for landscape |
617 | 67.2k | ) { |
618 | 67.2k | C_OUTLINE_LIST out_list; // output outlines |
619 | | // input outlines |
620 | 67.2k | C_OUTLINE_IT in_it = blob->out_list(); |
621 | | // output outlines |
622 | 67.2k | C_OUTLINE_IT out_it = &out_list; |
623 | | |
624 | 134k | for (in_it.mark_cycle_pt(); !in_it.cycled_list(); in_it.forward()) { |
625 | 67.2k | out_it.add_after_then_move(new C_OUTLINE(in_it.data(), rotation)); |
626 | 67.2k | } |
627 | 67.2k | return new C_BLOB(&out_list); |
628 | 67.2k | } |
629 | | |
630 | | /********************************************************************** |
631 | | * box_next |
632 | | * |
633 | | * Compute the bounding box of this blob with merging of x overlaps |
634 | | * but no pre-chopping. |
635 | | * Then move the iterator on to the start of the next blob. |
636 | | **********************************************************************/ |
637 | | |
638 | | TBOX box_next( // get bounding box |
639 | | BLOBNBOX_IT *it // iterator to blobds |
640 | 4.05M | ) { |
641 | 4.05M | BLOBNBOX *blob; // current blob |
642 | 4.05M | TBOX result; // total box |
643 | | |
644 | 4.05M | blob = it->data(); |
645 | 4.05M | result = blob->bounding_box(); |
646 | 8.11M | do { |
647 | 8.11M | it->forward(); |
648 | 8.11M | blob = it->data(); |
649 | 8.11M | if (blob->cblob() == nullptr) { |
650 | | // was pre-chopped |
651 | 1.00M | result += blob->bounding_box(); |
652 | 1.00M | } |
653 | 8.11M | } |
654 | | // until next real blob |
655 | 8.11M | while ((blob->cblob() == nullptr) || blob->joined_to_prev()); |
656 | 4.05M | return result; |
657 | 4.05M | } |
658 | | |
659 | | /********************************************************************** |
660 | | * box_next_pre_chopped |
661 | | * |
662 | | * Compute the bounding box of this blob with merging of x overlaps |
663 | | * but WITH pre-chopping. |
664 | | * Then move the iterator on to the start of the next pre-chopped blob. |
665 | | **********************************************************************/ |
666 | | |
667 | | TBOX box_next_pre_chopped( // get bounding box |
668 | | BLOBNBOX_IT *it // iterator to blobds |
669 | 4.55M | ) { |
670 | 4.55M | BLOBNBOX *blob; // current blob |
671 | 4.55M | TBOX result; // total box |
672 | | |
673 | 4.55M | blob = it->data(); |
674 | 4.55M | result = blob->bounding_box(); |
675 | 7.94M | do { |
676 | 7.94M | it->forward(); |
677 | 7.94M | blob = it->data(); |
678 | 7.94M | } |
679 | | // until next real blob |
680 | 7.94M | while (blob->joined_to_prev()); |
681 | 4.55M | return result; |
682 | 4.55M | } |
683 | | |
684 | | /********************************************************************** |
685 | | * TO_ROW::TO_ROW |
686 | | * |
687 | | * Constructor to make a row from a blob. |
688 | | **********************************************************************/ |
689 | | |
690 | | TO_ROW::TO_ROW( // constructor |
691 | | BLOBNBOX *blob, // first blob |
692 | | float top, // corrected top |
693 | | float bottom, // of row |
694 | | float row_size // ideal |
695 | 308k | ) { |
696 | 308k | clear(); |
697 | 308k | y_min = bottom; |
698 | 308k | y_max = top; |
699 | 308k | initial_y_min = bottom; |
700 | | |
701 | 308k | float diff; // in size |
702 | 308k | BLOBNBOX_IT it = &blobs; // list of blobs |
703 | | |
704 | 308k | it.add_to_end(blob); |
705 | 308k | diff = top - bottom - row_size; |
706 | 308k | if (diff > 0) { |
707 | 10.2k | y_max -= diff / 2; |
708 | 10.2k | y_min += diff / 2; |
709 | 10.2k | } |
710 | | // very small object |
711 | 298k | else if ((top - bottom) * 3 < row_size) { |
712 | 1.58k | diff = row_size / 3 + bottom - top; |
713 | 1.58k | y_max += diff / 2; |
714 | 1.58k | y_min -= diff / 2; |
715 | 1.58k | } |
716 | 308k | } |
717 | | |
718 | 0 | void TO_ROW::print() const { |
719 | 0 | tprintf( |
720 | 0 | "pitch=%d, fp=%g, fps=%g, fpns=%g, prs=%g, prns=%g," |
721 | 0 | " spacing=%g xh=%g y_origin=%g xev=%d, asc=%g, desc=%g," |
722 | 0 | " body=%g, minsp=%d maxnsp=%d, thr=%d kern=%g sp=%g\n", |
723 | 0 | pitch_decision, fixed_pitch, fp_space, fp_nonsp, pr_space, pr_nonsp, spacing, xheight, |
724 | 0 | y_origin, xheight_evidence, ascrise, descdrop, body_size, min_space, max_nonspace, |
725 | 0 | space_threshold, kern_size, space_size); |
726 | 0 | } |
727 | | |
728 | | /********************************************************************** |
729 | | * TO_ROW:add_blob |
730 | | * |
731 | | * Add the blob to the end of the row. |
732 | | **********************************************************************/ |
733 | | |
734 | | void TO_ROW::add_blob( // constructor |
735 | | BLOBNBOX *blob, // first blob |
736 | | float top, // corrected top |
737 | | float bottom, // of row |
738 | | float row_size // ideal |
739 | 4.72M | ) { |
740 | 4.72M | float allowed; // allowed expansion |
741 | 4.72M | float available; // expansion |
742 | 4.72M | BLOBNBOX_IT it = &blobs; // list of blobs |
743 | | |
744 | 4.72M | it.add_to_end(blob); |
745 | 4.72M | allowed = row_size + y_min - y_max; |
746 | 4.72M | if (allowed > 0) { |
747 | 2.71M | available = top > y_max ? top - y_max : 0; |
748 | 2.71M | if (bottom < y_min) { |
749 | | // total available |
750 | 317k | available += y_min - bottom; |
751 | 317k | } |
752 | 2.71M | if (available > 0) { |
753 | 1.16M | available += available; // do it gradually |
754 | 1.16M | if (available < allowed) { |
755 | 484k | available = allowed; |
756 | 484k | } |
757 | 1.16M | if (bottom < y_min) { |
758 | 317k | y_min -= (y_min - bottom) * allowed / available; |
759 | 317k | } |
760 | 1.16M | if (top > y_max) { |
761 | 885k | y_max += (top - y_max) * allowed / available; |
762 | 885k | } |
763 | 1.16M | } |
764 | 2.71M | } |
765 | 4.72M | } |
766 | | |
767 | | /********************************************************************** |
768 | | * TO_ROW:insert_blob |
769 | | * |
770 | | * Add the blob to the row in the correct position. |
771 | | **********************************************************************/ |
772 | | |
773 | | void TO_ROW::insert_blob( // constructor |
774 | | BLOBNBOX *blob // first blob |
775 | 23.3k | ) { |
776 | 23.3k | BLOBNBOX_IT it = &blobs; // list of blobs |
777 | | |
778 | 23.3k | if (it.empty()) { |
779 | 0 | it.add_before_then_move(blob); |
780 | 23.3k | } else { |
781 | 23.3k | it.mark_cycle_pt(); |
782 | 538k | while (!it.cycled_list() && it.data()->bounding_box().left() <= blob->bounding_box().left()) { |
783 | 515k | it.forward(); |
784 | 515k | } |
785 | 23.3k | if (it.cycled_list()) { |
786 | 2.48k | it.add_to_end(blob); |
787 | 20.8k | } else { |
788 | 20.8k | it.add_before_stay_put(blob); |
789 | 20.8k | } |
790 | 23.3k | } |
791 | 23.3k | } |
792 | | |
793 | | /********************************************************************** |
794 | | * TO_ROW::compute_vertical_projection |
795 | | * |
796 | | * Compute the vertical projection of a TO_ROW from its blobs. |
797 | | **********************************************************************/ |
798 | | |
799 | 199k | void TO_ROW::compute_vertical_projection() { // project whole row |
800 | 199k | TBOX row_box; // bound of row |
801 | 199k | BLOBNBOX *blob; // current blob |
802 | 199k | TBOX blob_box; // bounding box |
803 | 199k | BLOBNBOX_IT blob_it = blob_list(); |
804 | | |
805 | 199k | if (blob_it.empty()) { |
806 | 0 | return; |
807 | 0 | } |
808 | 199k | row_box = blob_it.data()->bounding_box(); |
809 | 3.10M | for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { |
810 | 2.90M | row_box += blob_it.data()->bounding_box(); |
811 | 2.90M | } |
812 | | |
813 | 199k | projection.set_range(row_box.left() - PROJECTION_MARGIN, row_box.right() + PROJECTION_MARGIN - 1); |
814 | 199k | projection_left = row_box.left() - PROJECTION_MARGIN; |
815 | 199k | projection_right = row_box.right() + PROJECTION_MARGIN; |
816 | 3.10M | for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { |
817 | 2.90M | blob = blob_it.data(); |
818 | 2.90M | if (blob->cblob() != nullptr) { |
819 | 2.62M | vertical_cblob_projection(blob->cblob(), &projection); |
820 | 2.62M | } |
821 | 2.90M | } |
822 | 199k | } |
823 | | |
824 | | /********************************************************************** |
825 | | * TO_ROW::clear |
826 | | * |
827 | | * Zero out all scalar members. |
828 | | **********************************************************************/ |
829 | 308k | void TO_ROW::clear() { |
830 | 308k | all_caps = false; |
831 | 308k | used_dm_model = false; |
832 | 308k | projection_left = 0; |
833 | 308k | projection_right = 0; |
834 | 308k | pitch_decision = PITCH_DUNNO; |
835 | 308k | fixed_pitch = 0.0; |
836 | 308k | fp_space = 0.0; |
837 | 308k | fp_nonsp = 0.0; |
838 | 308k | pr_space = 0.0; |
839 | 308k | pr_nonsp = 0.0; |
840 | 308k | spacing = 0.0; |
841 | 308k | xheight = 0.0; |
842 | 308k | xheight_evidence = 0; |
843 | 308k | body_size = 0.0; |
844 | 308k | ascrise = 0.0; |
845 | 308k | descdrop = 0.0; |
846 | 308k | min_space = 0; |
847 | 308k | max_nonspace = 0; |
848 | 308k | space_threshold = 0; |
849 | 308k | kern_size = 0.0; |
850 | 308k | space_size = 0.0; |
851 | 308k | y_min = 0.0; |
852 | 308k | y_max = 0.0; |
853 | 308k | initial_y_min = 0.0; |
854 | 308k | m = 0.0; |
855 | 308k | c = 0.0; |
856 | 308k | error = 0.0; |
857 | 308k | para_c = 0.0; |
858 | 308k | para_error = 0.0; |
859 | 308k | y_origin = 0.0; |
860 | 308k | credibility = 0.0; |
861 | 308k | num_repeated_sets_ = -1; |
862 | 308k | } |
863 | | |
864 | | /********************************************************************** |
865 | | * vertical_cblob_projection |
866 | | * |
867 | | * Compute the vertical projection of a cblob from its outlines |
868 | | * and add to the given STATS. |
869 | | **********************************************************************/ |
870 | | |
871 | | void vertical_cblob_projection( // project outlines |
872 | | C_BLOB *blob, // blob to project |
873 | | STATS *stats // output |
874 | 2.62M | ) { |
875 | | // outlines of blob |
876 | 2.62M | C_OUTLINE_IT out_it = blob->out_list(); |
877 | | |
878 | 5.25M | for (out_it.mark_cycle_pt(); !out_it.cycled_list(); out_it.forward()) { |
879 | 2.63M | vertical_coutline_projection(out_it.data(), stats); |
880 | 2.63M | } |
881 | 2.62M | } |
882 | | |
883 | | /********************************************************************** |
884 | | * vertical_coutline_projection |
885 | | * |
886 | | * Compute the vertical projection of a outline from its outlines |
887 | | * and add to the given STATS. |
888 | | **********************************************************************/ |
889 | | |
890 | | void vertical_coutline_projection( // project outlines |
891 | | C_OUTLINE *outline, // outline to project |
892 | | STATS *stats // output |
893 | 2.80M | ) { |
894 | 2.80M | ICOORD pos; // current point |
895 | 2.80M | ICOORD step; // edge step |
896 | 2.80M | int32_t length; // of outline |
897 | 2.80M | int16_t stepindex; // current step |
898 | 2.80M | C_OUTLINE_IT out_it = outline->child(); |
899 | | |
900 | 2.80M | pos = outline->start_pos(); |
901 | 2.80M | length = outline->pathlength(); |
902 | 74.9M | for (stepindex = 0; stepindex < length; stepindex++) { |
903 | 72.1M | step = outline->step(stepindex); |
904 | 72.1M | if (step.x() > 0) { |
905 | 15.3M | stats->add(pos.x(), -pos.y()); |
906 | 56.7M | } else if (step.x() < 0) { |
907 | 15.3M | stats->add(pos.x() - 1, pos.y()); |
908 | 15.3M | } |
909 | 72.1M | pos += step; |
910 | 72.1M | } |
911 | | |
912 | 2.97M | for (out_it.mark_cycle_pt(); !out_it.cycled_list(); out_it.forward()) { |
913 | 173k | vertical_coutline_projection(out_it.data(), stats); |
914 | 173k | } |
915 | 2.80M | } |
916 | | |
917 | | /********************************************************************** |
918 | | * TO_BLOCK::TO_BLOCK |
919 | | * |
920 | | * Constructor to make a TO_BLOCK from a real block. |
921 | | **********************************************************************/ |
922 | | |
923 | | TO_BLOCK::TO_BLOCK( // make a block |
924 | | BLOCK *src_block // real block |
925 | 16.6k | ) { |
926 | 16.6k | clear(); |
927 | 16.6k | block = src_block; |
928 | 16.6k | } |
929 | | |
930 | | /********************************************************************** |
931 | | * TO_BLOCK::clear |
932 | | * |
933 | | * Zero out all scalar members. |
934 | | **********************************************************************/ |
935 | 16.6k | void TO_BLOCK::clear() { |
936 | 16.6k | block = nullptr; |
937 | 16.6k | pitch_decision = PITCH_DUNNO; |
938 | 16.6k | line_spacing = 0.0; |
939 | 16.6k | line_size = 0.0; |
940 | 16.6k | max_blob_size = 0.0; |
941 | 16.6k | baseline_offset = 0.0; |
942 | 16.6k | xheight = 0.0; |
943 | 16.6k | fixed_pitch = 0.0; |
944 | 16.6k | kern_size = 0.0; |
945 | 16.6k | space_size = 0.0; |
946 | 16.6k | min_space = 0; |
947 | 16.6k | max_nonspace = 0; |
948 | 16.6k | fp_space = 0.0; |
949 | 16.6k | fp_nonsp = 0.0; |
950 | 16.6k | pr_space = 0.0; |
951 | 16.6k | pr_nonsp = 0.0; |
952 | 16.6k | key_row = nullptr; |
953 | 16.6k | } |
954 | | |
955 | 16.6k | TO_BLOCK::~TO_BLOCK() { |
956 | | // Any residual BLOBNBOXes at this stage own their blobs, so delete them. |
957 | 16.6k | BLOBNBOX::clear_blobnboxes(&blobs); |
958 | 16.6k | BLOBNBOX::clear_blobnboxes(&underlines); |
959 | 16.6k | BLOBNBOX::clear_blobnboxes(&noise_blobs); |
960 | 16.6k | BLOBNBOX::clear_blobnboxes(&small_blobs); |
961 | 16.6k | BLOBNBOX::clear_blobnboxes(&large_blobs); |
962 | 16.6k | } |
963 | | |
964 | | // Helper function to divide the input blobs over noise, small, medium |
965 | | // and large lists. Blobs small in height and (small in width or large in width) |
966 | | // go in the noise list. Dash (-) candidates go in the small list, and |
967 | | // medium and large are by height. |
968 | | // SIDE-EFFECT: reset all blobs to initial state by calling Init(). |
969 | | static void SizeFilterBlobs(int min_height, int max_height, BLOBNBOX_LIST *src_list, |
970 | | BLOBNBOX_LIST *noise_list, BLOBNBOX_LIST *small_list, |
971 | 0 | BLOBNBOX_LIST *medium_list, BLOBNBOX_LIST *large_list) { |
972 | 0 | BLOBNBOX_IT noise_it(noise_list); |
973 | 0 | BLOBNBOX_IT small_it(small_list); |
974 | 0 | BLOBNBOX_IT medium_it(medium_list); |
975 | 0 | BLOBNBOX_IT large_it(large_list); |
976 | 0 | for (BLOBNBOX_IT src_it(src_list); !src_it.empty(); src_it.forward()) { |
977 | 0 | BLOBNBOX *blob = src_it.extract(); |
978 | 0 | blob->ReInit(); |
979 | 0 | int width = blob->bounding_box().width(); |
980 | 0 | int height = blob->bounding_box().height(); |
981 | 0 | if (height < min_height && (width < min_height || width > max_height)) { |
982 | 0 | noise_it.add_after_then_move(blob); |
983 | 0 | } else if (height > max_height) { |
984 | 0 | large_it.add_after_then_move(blob); |
985 | 0 | } else if (height < min_height) { |
986 | 0 | small_it.add_after_then_move(blob); |
987 | 0 | } else { |
988 | 0 | medium_it.add_after_then_move(blob); |
989 | 0 | } |
990 | 0 | } |
991 | 0 | } |
992 | | |
993 | | // Reorganize the blob lists with a different definition of small, medium |
994 | | // and large, compared to the original definition. |
995 | | // Height is still the primary filter key, but medium width blobs of small |
996 | | // height become small, and very wide blobs of small height stay noise, along |
997 | | // with small dot-shaped blobs. |
998 | 0 | void TO_BLOCK::ReSetAndReFilterBlobs() { |
999 | 0 | int min_height = IntCastRounded(kMinMediumSizeRatio * line_size); |
1000 | 0 | int max_height = IntCastRounded(kMaxMediumSizeRatio * line_size); |
1001 | 0 | BLOBNBOX_LIST noise_list; |
1002 | 0 | BLOBNBOX_LIST small_list; |
1003 | 0 | BLOBNBOX_LIST medium_list; |
1004 | 0 | BLOBNBOX_LIST large_list; |
1005 | 0 | SizeFilterBlobs(min_height, max_height, &blobs, &noise_list, &small_list, &medium_list, |
1006 | 0 | &large_list); |
1007 | 0 | SizeFilterBlobs(min_height, max_height, &large_blobs, &noise_list, &small_list, &medium_list, |
1008 | 0 | &large_list); |
1009 | 0 | SizeFilterBlobs(min_height, max_height, &small_blobs, &noise_list, &small_list, &medium_list, |
1010 | 0 | &large_list); |
1011 | 0 | SizeFilterBlobs(min_height, max_height, &noise_blobs, &noise_list, &small_list, &medium_list, |
1012 | 0 | &large_list); |
1013 | 0 | BLOBNBOX_IT blob_it(&blobs); |
1014 | 0 | blob_it.add_list_after(&medium_list); |
1015 | 0 | blob_it.set_to_list(&large_blobs); |
1016 | 0 | blob_it.add_list_after(&large_list); |
1017 | 0 | blob_it.set_to_list(&small_blobs); |
1018 | 0 | blob_it.add_list_after(&small_list); |
1019 | 0 | blob_it.set_to_list(&noise_blobs); |
1020 | 0 | blob_it.add_list_after(&noise_list); |
1021 | 0 | } |
1022 | | |
1023 | | // Deletes noise blobs from all lists where not owned by a ColPartition. |
1024 | 0 | void TO_BLOCK::DeleteUnownedNoise() { |
1025 | 0 | BLOBNBOX::CleanNeighbours(&blobs); |
1026 | 0 | BLOBNBOX::CleanNeighbours(&small_blobs); |
1027 | 0 | BLOBNBOX::CleanNeighbours(&noise_blobs); |
1028 | 0 | BLOBNBOX::CleanNeighbours(&large_blobs); |
1029 | 0 | BLOBNBOX::DeleteNoiseBlobs(&blobs); |
1030 | 0 | BLOBNBOX::DeleteNoiseBlobs(&small_blobs); |
1031 | 0 | BLOBNBOX::DeleteNoiseBlobs(&noise_blobs); |
1032 | 0 | BLOBNBOX::DeleteNoiseBlobs(&large_blobs); |
1033 | 0 | } |
1034 | | |
1035 | | // Computes and stores the edge offsets on each blob for use in feature |
1036 | | // extraction, using greyscale if the supplied grey and thresholds pixes |
1037 | | // are 8-bit or otherwise (if nullptr or not 8 bit) the original binary |
1038 | | // edge step outlines. |
1039 | | // Thresholds must either be the same size as grey or an integer down-scale |
1040 | | // of grey. |
1041 | | // See coutln.h for an explanation of edge offsets. |
1042 | 16.6k | void TO_BLOCK::ComputeEdgeOffsets(Image thresholds, Image grey) { |
1043 | 16.6k | BLOBNBOX::ComputeEdgeOffsets(thresholds, grey, &blobs); |
1044 | 16.6k | BLOBNBOX::ComputeEdgeOffsets(thresholds, grey, &small_blobs); |
1045 | 16.6k | BLOBNBOX::ComputeEdgeOffsets(thresholds, grey, &noise_blobs); |
1046 | 16.6k | } |
1047 | | |
1048 | | #ifndef GRAPHICS_DISABLED |
1049 | | // Draw the noise blobs from all lists in red. |
1050 | | void TO_BLOCK::plot_noise_blobs(ScrollView *win) { |
1051 | | BLOBNBOX::PlotNoiseBlobs(&noise_blobs, ScrollView::RED, ScrollView::RED, win); |
1052 | | BLOBNBOX::PlotNoiseBlobs(&small_blobs, ScrollView::RED, ScrollView::RED, win); |
1053 | | BLOBNBOX::PlotNoiseBlobs(&large_blobs, ScrollView::RED, ScrollView::RED, win); |
1054 | | BLOBNBOX::PlotNoiseBlobs(&blobs, ScrollView::RED, ScrollView::RED, win); |
1055 | | } |
1056 | | |
1057 | | // Draw the blobs on the various lists in the block in different colors. |
1058 | | void TO_BLOCK::plot_graded_blobs(ScrollView *win) { |
1059 | | BLOBNBOX::PlotBlobs(&noise_blobs, ScrollView::CORAL, ScrollView::BLUE, win); |
1060 | | BLOBNBOX::PlotBlobs(&small_blobs, ScrollView::GOLDENROD, ScrollView::YELLOW, win); |
1061 | | BLOBNBOX::PlotBlobs(&large_blobs, ScrollView::DARK_GREEN, ScrollView::YELLOW, win); |
1062 | | BLOBNBOX::PlotBlobs(&blobs, ScrollView::WHITE, ScrollView::BROWN, win); |
1063 | | } |
1064 | | |
1065 | | /********************************************************************** |
1066 | | * plot_blob_list |
1067 | | * |
1068 | | * Draw a list of blobs. |
1069 | | **********************************************************************/ |
1070 | | |
1071 | | void plot_blob_list(ScrollView *win, // window to draw in |
1072 | | BLOBNBOX_LIST *list, // blob list |
1073 | | ScrollView::Color body_colour, // colour to draw |
1074 | | ScrollView::Color child_colour) { // colour of child |
1075 | | BLOBNBOX_IT it = list; |
1076 | | for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { |
1077 | | it.data()->plot(win, body_colour, child_colour); |
1078 | | } |
1079 | | } |
1080 | | #endif // !GRAPHICS_DISABLED |
1081 | | |
1082 | | } // namespace tesseract |