/src/tesseract/src/ccstruct/split.cpp
Line | Count | Source |
1 | | /****************************************************************************** |
2 | | * |
3 | | * File: split.cpp (Formerly split.c) |
4 | | * Author: Mark Seaman, OCR Technology |
5 | | * |
6 | | * (c) Copyright 1987, Hewlett-Packard Company. |
7 | | ** Licensed under the Apache License, Version 2.0 (the "License"); |
8 | | ** you may not use this file except in compliance with the License. |
9 | | ** You may obtain a copy of the License at |
10 | | ** http://www.apache.org/licenses/LICENSE-2.0 |
11 | | ** Unless required by applicable law or agreed to in writing, software |
12 | | ** distributed under the License is distributed on an "AS IS" BASIS, |
13 | | ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | ** See the License for the specific language governing permissions and |
15 | | ** limitations under the License. |
16 | | * |
17 | | *************************************************************************/ |
18 | | |
19 | | // Include automatically generated configuration file if running autoconf. |
20 | | #ifdef HAVE_CONFIG_H |
21 | | # include "config_auto.h" |
22 | | #endif |
23 | | |
24 | | #include "split.h" |
25 | | |
26 | | #include "coutln.h" |
27 | | #include "tprintf.h" |
28 | | |
29 | | #include <algorithm> |
30 | | |
31 | | namespace tesseract { |
32 | | |
33 | | /*---------------------------------------------------------------------- |
34 | | V a r i a b l e s |
35 | | ----------------------------------------------------------------------*/ |
36 | | // Limit on the amount of penalty for the chop being off-center. |
37 | | const int kCenterGradeCap = 25; |
38 | | // Ridiculously large priority for splits that are no use. |
39 | | const double kBadPriority = 999.0; |
40 | | |
41 | | BOOL_VAR(wordrec_display_splits, 0, "Display splits"); |
42 | | |
43 | | // Hides the SPLIT so the outlines appear not to be cut by it. |
44 | 351k | void SPLIT::Hide() const { |
45 | 351k | EDGEPT *edgept = point1; |
46 | 351k | do { |
47 | 351k | edgept->Hide(); |
48 | 351k | edgept = edgept->next; |
49 | 351k | } while (!edgept->EqualPos(*point2) && edgept != point1); |
50 | 351k | edgept = point2; |
51 | 351k | do { |
52 | 351k | edgept->Hide(); |
53 | 351k | edgept = edgept->next; |
54 | 351k | } while (!edgept->EqualPos(*point1) && edgept != point2); |
55 | 351k | } |
56 | | |
57 | | // Undoes hide, so the outlines are cut by the SPLIT. |
58 | 374k | void SPLIT::Reveal() const { |
59 | 374k | EDGEPT *edgept = point1; |
60 | 374k | do { |
61 | 374k | edgept->Reveal(); |
62 | 374k | edgept = edgept->next; |
63 | 374k | } while (!edgept->EqualPos(*point2) && edgept != point1); |
64 | 374k | edgept = point2; |
65 | 374k | do { |
66 | 374k | edgept->Reveal(); |
67 | 374k | edgept = edgept->next; |
68 | 374k | } while (!edgept->EqualPos(*point1) && edgept != point2); |
69 | 374k | } |
70 | | |
71 | | // Compute a split priority based on the bounding boxes of the parts. |
72 | | // The arguments here are config parameters defined in Wordrec. Add chop_ |
73 | | // to the beginning of the name. |
74 | | float SPLIT::FullPriority(int xmin, int xmax, double overlap_knob, int centered_maxwidth, |
75 | 82.4M | double center_knob, double width_change_knob) const { |
76 | 82.4M | TBOX box1 = Box12(); |
77 | 82.4M | TBOX box2 = Box21(); |
78 | 82.4M | int min_left = std::min(box1.left(), box2.left()); |
79 | 82.4M | int max_right = std::max(box1.right(), box2.right()); |
80 | 82.4M | if (xmin < min_left && xmax > max_right) { |
81 | 22.8M | return kBadPriority; |
82 | 22.8M | } |
83 | | |
84 | 59.6M | float grade = 0.0f; |
85 | | // grade_overlap. |
86 | 59.6M | int width1 = box1.width(); |
87 | 59.6M | int width2 = box2.width(); |
88 | 59.6M | int min_width = std::min(width1, width2); |
89 | 59.6M | int overlap = -box1.x_gap(box2); |
90 | 59.6M | if (overlap == min_width) { |
91 | 51.5M | grade += 100.0f; // Total overlap. |
92 | 51.5M | } else { |
93 | 8.07M | if (2 * overlap > min_width) { |
94 | 5.08M | overlap += 2 * overlap - min_width; |
95 | 5.08M | } |
96 | 8.07M | if (overlap > 0) { |
97 | 7.34M | grade += overlap_knob * overlap; |
98 | 7.34M | } |
99 | 8.07M | } |
100 | | // grade_center_of_blob. |
101 | 59.6M | if (width1 <= centered_maxwidth || width2 <= centered_maxwidth) { |
102 | 56.3M | grade += std::min(static_cast<double>(kCenterGradeCap), center_knob * abs(width1 - width2)); |
103 | 56.3M | } |
104 | | // grade_width_change. |
105 | 59.6M | float width_change_grade = 20 - (max_right - min_left - std::max(width1, width2)); |
106 | 59.6M | if (width_change_grade > 0.0f) { |
107 | 56.7M | grade += width_change_grade * width_change_knob; |
108 | 56.7M | } |
109 | 59.6M | return grade; |
110 | 82.4M | } |
111 | | |
112 | | // Returns true if *this SPLIT appears OK in the sense that it does not cross |
113 | | // any outlines and does not chop off any ridiculously small pieces. |
114 | 493k | bool SPLIT::IsHealthy(const TBLOB &blob, int min_points, int min_area) const { |
115 | 493k | return !IsLittleChunk(min_points, min_area) && |
116 | 220k | !blob.SegmentCrossesOutline(point1->pos, point2->pos); |
117 | 493k | } |
118 | | |
119 | | // Returns true if the split generates a small chunk in terms of either area |
120 | | // or number of points. |
121 | 493k | bool SPLIT::IsLittleChunk(int min_points, int min_area) const { |
122 | 493k | if (point1->ShortNonCircularSegment(min_points, point2) && |
123 | 217k | point1->SegmentArea(point2) < min_area) { |
124 | 146k | return true; |
125 | 146k | } |
126 | 346k | if (point2->ShortNonCircularSegment(min_points, point1) && |
127 | 175k | point2->SegmentArea(point1) < min_area) { |
128 | 126k | return true; |
129 | 126k | } |
130 | 220k | return false; |
131 | 346k | } |
132 | | |
133 | | /********************************************************************** |
134 | | * make_edgept |
135 | | * |
136 | | * Create an EDGEPT and hook it into an existing list of edge points. |
137 | | **********************************************************************/ |
138 | 231M | EDGEPT *make_edgept(TDimension x, TDimension y, EDGEPT *next, EDGEPT *prev) { |
139 | 231M | EDGEPT *this_edgept; |
140 | | /* Create point */ |
141 | 231M | this_edgept = new EDGEPT; |
142 | 231M | this_edgept->pos.x = x; |
143 | 231M | this_edgept->pos.y = y; |
144 | | // Now deal with the src_outline steps. |
145 | 231M | C_OUTLINE *prev_ol = prev->src_outline; |
146 | 231M | if (prev_ol != nullptr && prev->next == next) { |
147 | | // Compute the fraction of the segment that is being cut. |
148 | 0 | FCOORD segment_vec(next->pos.x - prev->pos.x, next->pos.y - prev->pos.y); |
149 | 0 | FCOORD target_vec(x - prev->pos.x, y - prev->pos.y); |
150 | 0 | double cut_fraction = target_vec.length() / segment_vec.length(); |
151 | | // Get the start and end at the step level. |
152 | 0 | ICOORD step_start = prev_ol->position_at_index(prev->start_step); |
153 | 0 | int end_step = prev->start_step + prev->step_count; |
154 | 0 | int step_length = prev_ol->pathlength(); |
155 | 0 | ICOORD step_end = prev_ol->position_at_index(end_step % step_length); |
156 | 0 | ICOORD step_vec = step_end - step_start; |
157 | 0 | double target_length = step_vec.length() * cut_fraction; |
158 | | // Find the point on the segment that gives the length nearest to target. |
159 | 0 | int best_step = prev->start_step; |
160 | 0 | ICOORD total_step(0, 0); |
161 | 0 | double best_dist = target_length; |
162 | 0 | for (int s = prev->start_step; s < end_step; ++s) { |
163 | 0 | total_step += prev_ol->step(s % step_length); |
164 | 0 | double dist = fabs(target_length - total_step.length()); |
165 | 0 | if (dist < best_dist) { |
166 | 0 | best_dist = dist; |
167 | 0 | best_step = s + 1; |
168 | 0 | } |
169 | 0 | } |
170 | | // The new point is an intermediate point. |
171 | 0 | this_edgept->src_outline = prev_ol; |
172 | 0 | this_edgept->step_count = end_step - best_step; |
173 | 0 | this_edgept->start_step = best_step % step_length; |
174 | 0 | prev->step_count = best_step - prev->start_step; |
175 | 231M | } else { |
176 | | // The new point is poly only. |
177 | 231M | this_edgept->src_outline = nullptr; |
178 | 231M | this_edgept->step_count = 0; |
179 | 231M | this_edgept->start_step = 0; |
180 | 231M | } |
181 | | /* Hook it up */ |
182 | 231M | this_edgept->next = next; |
183 | 231M | this_edgept->prev = prev; |
184 | 231M | prev->next = this_edgept; |
185 | 231M | next->prev = this_edgept; |
186 | | /* Set up vec entries */ |
187 | 231M | this_edgept->vec.x = this_edgept->next->pos.x - x; |
188 | 231M | this_edgept->vec.y = this_edgept->next->pos.y - y; |
189 | 231M | this_edgept->prev->vec.x = x - this_edgept->prev->pos.x; |
190 | 231M | this_edgept->prev->vec.y = y - this_edgept->prev->pos.y; |
191 | 231M | return this_edgept; |
192 | 231M | } |
193 | | |
194 | | /********************************************************************** |
195 | | * remove_edgept |
196 | | * |
197 | | * Remove a given EDGEPT from its list and delete it. |
198 | | **********************************************************************/ |
199 | 4.17M | void remove_edgept(EDGEPT *point) { |
200 | 4.17M | EDGEPT *prev = point->prev; |
201 | 4.17M | EDGEPT *next = point->next; |
202 | | // Add point's steps onto prev's steps if they are from the same outline. |
203 | 4.17M | if (prev->src_outline == point->src_outline && prev->src_outline != nullptr) { |
204 | 0 | prev->step_count += point->step_count; |
205 | 0 | } |
206 | 4.17M | prev->next = next; |
207 | 4.17M | next->prev = prev; |
208 | 4.17M | prev->vec.x = next->pos.x - prev->pos.x; |
209 | 4.17M | prev->vec.y = next->pos.y - prev->pos.y; |
210 | 4.17M | delete point; |
211 | 4.17M | } |
212 | | |
213 | | /********************************************************************** |
214 | | * Print |
215 | | * |
216 | | * Shows the coordinates of both points in a split. |
217 | | **********************************************************************/ |
218 | 0 | void SPLIT::Print() const { |
219 | 0 | tprintf("(%d,%d)--(%d,%d)", point1->pos.x, point1->pos.y, point2->pos.x, point2->pos.y); |
220 | 0 | } |
221 | | |
222 | | #ifndef GRAPHICS_DISABLED |
223 | | // Draws the split in the given window. |
224 | | void SPLIT::Mark(ScrollView *window) const { |
225 | | window->Pen(ScrollView::GREEN); |
226 | | window->Line(point1->pos.x, point1->pos.y, point2->pos.x, point2->pos.y); |
227 | | window->UpdateWindow(); |
228 | | } |
229 | | #endif |
230 | | |
231 | | // Creates two outlines out of one by splitting the original one in half. |
232 | | // Inserts the resulting outlines into the given list. |
233 | 137k | void SPLIT::SplitOutlineList(TESSLINE *outlines) const { |
234 | 137k | SplitOutline(); |
235 | 582k | while (outlines->next != nullptr) { |
236 | 444k | outlines = outlines->next; |
237 | 444k | } |
238 | | |
239 | 137k | outlines->next = new TESSLINE; |
240 | 137k | outlines->next->loop = point1; |
241 | 137k | outlines->next->ComputeBoundingBox(); |
242 | | |
243 | 137k | outlines = outlines->next; |
244 | | |
245 | 137k | outlines->next = new TESSLINE; |
246 | 137k | outlines->next->loop = point2; |
247 | 137k | outlines->next->ComputeBoundingBox(); |
248 | | |
249 | 137k | outlines->next->next = nullptr; |
250 | 137k | } |
251 | | |
252 | | // Makes a split between these two edge points, but does not affect the |
253 | | // outlines to which they belong. |
254 | 113M | void SPLIT::SplitOutline() const { |
255 | 113M | EDGEPT *temp2 = point2->next; |
256 | 113M | EDGEPT *temp1 = point1->next; |
257 | | /* Create two new points */ |
258 | 113M | EDGEPT *new_point1 = make_edgept(point1->pos.x, point1->pos.y, temp1, point2); |
259 | 113M | EDGEPT *new_point2 = make_edgept(point2->pos.x, point2->pos.y, temp2, point1); |
260 | | // point1 and 2 are now cross-over points, so they must have nullptr |
261 | | // src_outlines and give their src_outline information their new |
262 | | // replacements. |
263 | 113M | new_point1->src_outline = point1->src_outline; |
264 | 113M | new_point1->start_step = point1->start_step; |
265 | 113M | new_point1->step_count = point1->step_count; |
266 | 113M | new_point2->src_outline = point2->src_outline; |
267 | 113M | new_point2->start_step = point2->start_step; |
268 | 113M | new_point2->step_count = point2->step_count; |
269 | 113M | point1->src_outline = nullptr; |
270 | 113M | point1->start_step = 0; |
271 | 113M | point1->step_count = 0; |
272 | 113M | point2->src_outline = nullptr; |
273 | 113M | point2->start_step = 0; |
274 | 113M | point2->step_count = 0; |
275 | 113M | } |
276 | | |
277 | | // Undoes the effect of SplitOutlineList, correcting the outlines for undoing |
278 | | // the split, but possibly leaving some duplicate outlines. |
279 | 65.1k | void SPLIT::UnsplitOutlineList(TBLOB *blob) const { |
280 | | /* Modify edge points */ |
281 | 65.1k | UnsplitOutlines(); |
282 | | |
283 | 65.1k | auto *outline1 = new TESSLINE; |
284 | 65.1k | outline1->next = blob->outlines; |
285 | 65.1k | blob->outlines = outline1; |
286 | 65.1k | outline1->loop = point1; |
287 | | |
288 | 65.1k | auto *outline2 = new TESSLINE; |
289 | 65.1k | outline2->next = blob->outlines; |
290 | 65.1k | blob->outlines = outline2; |
291 | 65.1k | outline2->loop = point2; |
292 | 65.1k | } |
293 | | |
294 | | // Removes the split that was put between these two points. |
295 | 113M | void SPLIT::UnsplitOutlines() const { |
296 | 113M | EDGEPT *tmp1 = point1->next; |
297 | 113M | EDGEPT *tmp2 = point2->next; |
298 | | |
299 | 113M | tmp1->next->prev = point2; |
300 | 113M | tmp2->next->prev = point1; |
301 | | |
302 | | // tmp2 is coincident with point1. point1 takes tmp2's place as tmp2 is |
303 | | // deleted. |
304 | 113M | point1->next = tmp2->next; |
305 | 113M | point1->src_outline = tmp2->src_outline; |
306 | 113M | point1->start_step = tmp2->start_step; |
307 | 113M | point1->step_count = tmp2->step_count; |
308 | | // Likewise point2 takes tmp1's place. |
309 | 113M | point2->next = tmp1->next; |
310 | 113M | point2->src_outline = tmp1->src_outline; |
311 | 113M | point2->start_step = tmp1->start_step; |
312 | 113M | point2->step_count = tmp1->step_count; |
313 | | |
314 | 113M | delete tmp1; |
315 | 113M | delete tmp2; |
316 | | |
317 | 113M | point1->vec.x = point1->next->pos.x - point1->pos.x; |
318 | 113M | point1->vec.y = point1->next->pos.y - point1->pos.y; |
319 | | |
320 | 113M | point2->vec.x = point2->next->pos.x - point2->pos.x; |
321 | 113M | point2->vec.y = point2->next->pos.y - point2->pos.y; |
322 | 113M | } |
323 | | |
324 | | } // namespace tesseract |