/src/tesseract/src/wordrec/chop.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /****************************************************************************** |
2 | | * |
3 | | * File: chop.cpp (Formerly chop.c) |
4 | | * Author: Mark Seaman, OCR Technology |
5 | | * |
6 | | * (c) Copyright 1987, Hewlett-Packard Company. |
7 | | ** Licensed under the Apache License, Version 2.0 (the "License"); |
8 | | ** you may not use this file except in compliance with the License. |
9 | | ** You may obtain a copy of the License at |
10 | | ** http://www.apache.org/licenses/LICENSE-2.0 |
11 | | ** Unless required by applicable law or agreed to in writing, software |
12 | | ** distributed under the License is distributed on an "AS IS" BASIS, |
13 | | ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | ** See the License for the specific language governing permissions and |
15 | | ** limitations under the License. |
16 | | * |
17 | | *****************************************************************************/ |
18 | | |
19 | | /*---------------------------------------------------------------------- |
20 | | I n c l u d e s |
21 | | ----------------------------------------------------------------------*/ |
22 | | |
23 | | #define _USE_MATH_DEFINES // for M_PI |
24 | | #include "chop.h" |
25 | | #include <cmath> // for M_PI |
26 | | #include "outlines.h" |
27 | | #include "plotedges.h" |
28 | | #include "wordrec.h" |
29 | | |
30 | | // Include automatically generated configuration file if running autoconf. |
31 | | #ifdef HAVE_CONFIG_H |
32 | | # include "config_auto.h" |
33 | | #endif |
34 | | |
35 | | namespace tesseract { |
36 | | |
37 | | // Show if the line is going in the positive or negative X direction. |
38 | 13.2M | static int direction(const EDGEPT *point) { |
39 | | //* direction to return |
40 | 13.2M | int dir = 0; |
41 | | //* prev point |
42 | 13.2M | const EDGEPT *prev = point->prev; |
43 | | //* next point |
44 | 13.2M | const EDGEPT *next = point->next; |
45 | | |
46 | 13.2M | if (((prev->pos.x <= point->pos.x) && (point->pos.x < next->pos.x)) || |
47 | 13.2M | ((prev->pos.x < point->pos.x) && (point->pos.x <= next->pos.x))) { |
48 | 4.44M | dir = 1; |
49 | 4.44M | } |
50 | 13.2M | if (((prev->pos.x >= point->pos.x) && (point->pos.x > next->pos.x)) || |
51 | 13.2M | ((prev->pos.x > point->pos.x) && (point->pos.x >= next->pos.x))) { |
52 | 4.46M | dir = -1; |
53 | 4.46M | } |
54 | | |
55 | 13.2M | return dir; |
56 | 13.2M | } |
57 | | |
58 | | /** |
59 | | * @name point_priority |
60 | | * |
61 | | * Assign a priority to and edge point that might be used as part of a |
62 | | * split. The argument should be of type EDGEPT. |
63 | | */ |
64 | 31.2M | PRIORITY Wordrec::point_priority(EDGEPT *point) { |
65 | 31.2M | return static_cast<PRIORITY>(angle_change(point->prev, point, point->next)); |
66 | 31.2M | } |
67 | | |
68 | | /** |
69 | | * @name add_point_to_list |
70 | | * |
71 | | * Add an edge point to a POINT_GROUP containing a list of other points. |
72 | | */ |
73 | 4.38M | void Wordrec::add_point_to_list(PointHeap *point_heap, EDGEPT *point) { |
74 | 4.38M | if (point_heap->size() < MAX_NUM_POINTS - 2) { |
75 | 3.78M | PointPair pair(point_priority(point), point); |
76 | 3.78M | point_heap->Push(&pair); |
77 | 3.78M | } |
78 | | |
79 | | #ifndef GRAPHICS_DISABLED |
80 | | if (chop_debug > 2) { |
81 | | mark_outline(point); |
82 | | } |
83 | | #endif |
84 | 4.38M | } |
85 | | |
86 | | // Returns true if the edgept supplied as input is an inside angle. This |
87 | | // is determined by the angular change of the vectors from point to point. |
88 | 5.42M | bool Wordrec::is_inside_angle(EDGEPT *pt) { |
89 | 5.42M | return angle_change(pt->prev, pt, pt->next) < chop_inside_angle; |
90 | 5.42M | } |
91 | | |
92 | | /** |
93 | | * @name angle_change |
94 | | * |
95 | | * Return the change in angle (degrees) of the line segments between |
96 | | * points one and two, and two and three. |
97 | | */ |
98 | 98.8M | int Wordrec::angle_change(EDGEPT *point1, EDGEPT *point2, EDGEPT *point3) { |
99 | 98.8M | VECTOR vector1; |
100 | 98.8M | VECTOR vector2; |
101 | | |
102 | 98.8M | int angle; |
103 | | |
104 | | /* Compute angle */ |
105 | 98.8M | vector1.x = point2->pos.x - point1->pos.x; |
106 | 98.8M | vector1.y = point2->pos.y - point1->pos.y; |
107 | 98.8M | vector2.x = point3->pos.x - point2->pos.x; |
108 | 98.8M | vector2.y = point3->pos.y - point2->pos.y; |
109 | | /* Use cross product */ |
110 | 98.8M | float length = std::sqrt(static_cast<float>(vector1.length()) * vector2.length()); |
111 | 98.8M | if (static_cast<int>(length) == 0) { |
112 | 375k | return (0); |
113 | 375k | } |
114 | 98.4M | angle = static_cast<int>(floor(std::asin(vector1.cross(vector2) / length) / M_PI * 180.0 + 0.5)); |
115 | | |
116 | | /* Use dot product */ |
117 | 98.4M | if (vector1.dot(vector2) < 0) { |
118 | 54.8M | angle = 180 - angle; |
119 | 54.8M | } |
120 | | /* Adjust angle */ |
121 | 98.4M | if (angle > 180) { |
122 | 43.2M | angle -= 360; |
123 | 43.2M | } |
124 | 98.4M | if (angle <= -180) { |
125 | 2.15k | angle += 360; |
126 | 2.15k | } |
127 | 98.4M | return (angle); |
128 | 98.8M | } |
129 | | |
130 | | /** |
131 | | * @name pick_close_point |
132 | | * |
133 | | * Choose the edge point that is closest to the critical point. This |
134 | | * point may not be exactly vertical from the critical point. |
135 | | */ |
136 | 37.7M | EDGEPT *Wordrec::pick_close_point(EDGEPT *critical_point, EDGEPT *vertical_point, int *best_dist) { |
137 | 37.7M | EDGEPT *best_point = nullptr; |
138 | 37.7M | int this_distance; |
139 | 37.7M | bool found_better; |
140 | | |
141 | 37.7M | do { |
142 | 37.7M | found_better = false; |
143 | | |
144 | 37.7M | this_distance = edgept_dist(critical_point, vertical_point); |
145 | 37.7M | if (this_distance <= *best_dist) { |
146 | 12.3M | if (!(same_point(critical_point->pos, vertical_point->pos) || |
147 | 12.3M | same_point(critical_point->pos, vertical_point->next->pos) || |
148 | 12.3M | (best_point && same_point(best_point->pos, vertical_point->pos)) || |
149 | 12.3M | is_exterior_point(critical_point, vertical_point))) { |
150 | 9.03M | *best_dist = this_distance; |
151 | 9.03M | best_point = vertical_point; |
152 | 9.03M | if (chop_vertical_creep) { |
153 | 0 | found_better = true; |
154 | 0 | } |
155 | 9.03M | } |
156 | 12.3M | } |
157 | 37.7M | vertical_point = vertical_point->next; |
158 | 37.7M | } while (found_better == true); |
159 | | |
160 | 37.7M | return (best_point); |
161 | 37.7M | } |
162 | | |
163 | | /** |
164 | | * @name prioritize_points |
165 | | * |
166 | | * Find a list of edge points from the outer outline of this blob. For |
167 | | * each of these points assign a priority. Sort these points using a |
168 | | * heap structure so that they can be visited in order. |
169 | | */ |
170 | 3.07M | void Wordrec::prioritize_points(TESSLINE *outline, PointHeap *points) { |
171 | 3.07M | EDGEPT *this_point; |
172 | 3.07M | EDGEPT *local_min = nullptr; |
173 | 3.07M | EDGEPT *local_max = nullptr; |
174 | | |
175 | 3.07M | this_point = outline->loop; |
176 | 3.07M | local_min = this_point; |
177 | 3.07M | local_max = this_point; |
178 | 19.0M | do { |
179 | 19.0M | if (this_point->vec.y < 0) { |
180 | | /* Look for minima */ |
181 | 7.97M | if (local_max != nullptr) { |
182 | 5.48M | new_max_point(local_max, points); |
183 | 5.48M | } else if (is_inside_angle(this_point)) { |
184 | 481k | add_point_to_list(points, this_point); |
185 | 481k | } |
186 | 7.97M | local_max = nullptr; |
187 | 7.97M | local_min = this_point->next; |
188 | 11.1M | } else if (this_point->vec.y > 0) { |
189 | | /* Look for maxima */ |
190 | 7.65M | if (local_min != nullptr) { |
191 | 4.71M | new_min_point(local_min, points); |
192 | 4.71M | } else if (is_inside_angle(this_point)) { |
193 | 431k | add_point_to_list(points, this_point); |
194 | 431k | } |
195 | 7.65M | local_min = nullptr; |
196 | 7.65M | local_max = this_point->next; |
197 | 7.65M | } else { |
198 | | /* Flat area */ |
199 | 3.45M | if (local_max != nullptr) { |
200 | 2.30M | if (local_max->prev->vec.y != 0) { |
201 | 2.03M | new_max_point(local_max, points); |
202 | 2.03M | } |
203 | 2.30M | local_max = this_point->next; |
204 | 2.30M | local_min = nullptr; |
205 | 2.30M | } else { |
206 | 1.14M | if (local_min->prev->vec.y != 0) { |
207 | 996k | new_min_point(local_min, points); |
208 | 996k | } |
209 | 1.14M | local_min = this_point->next; |
210 | 1.14M | local_max = nullptr; |
211 | 1.14M | } |
212 | 3.45M | } |
213 | | |
214 | | /* Next point */ |
215 | 19.0M | this_point = this_point->next; |
216 | 19.0M | } while (this_point != outline->loop); |
217 | 3.07M | } |
218 | | |
219 | | /** |
220 | | * @name new_min_point |
221 | | * |
222 | | * Found a new minimum point try to decide whether to save it or not. |
223 | | * Return the new value for the local minimum. If a point is saved then |
224 | | * the local minimum is reset to nullptr. |
225 | | */ |
226 | 5.71M | void Wordrec::new_min_point(EDGEPT *local_min, PointHeap *points) { |
227 | 5.71M | int16_t dir; |
228 | | |
229 | 5.71M | dir = direction(local_min); |
230 | | |
231 | 5.71M | if (dir < 0) { |
232 | 1.10M | add_point_to_list(points, local_min); |
233 | 1.10M | return; |
234 | 1.10M | } |
235 | | |
236 | 4.60M | if (dir == 0 && point_priority(local_min) < 0) { |
237 | 368k | add_point_to_list(points, local_min); |
238 | 368k | return; |
239 | 368k | } |
240 | 4.60M | } |
241 | | |
242 | | /** |
243 | | * @name new_max_point |
244 | | * |
245 | | * Found a new minimum point try to decide whether to save it or not. |
246 | | * Return the new value for the local minimum. If a point is saved then |
247 | | * the local minimum is reset to nullptr. |
248 | | */ |
249 | 7.52M | void Wordrec::new_max_point(EDGEPT *local_max, PointHeap *points) { |
250 | 7.52M | int16_t dir; |
251 | | |
252 | 7.52M | dir = direction(local_max); |
253 | | |
254 | 7.52M | if (dir > 0) { |
255 | 1.33M | add_point_to_list(points, local_max); |
256 | 1.33M | return; |
257 | 1.33M | } |
258 | | |
259 | 6.18M | if (dir == 0 && point_priority(local_max) < 0) { |
260 | 656k | add_point_to_list(points, local_max); |
261 | 656k | return; |
262 | 656k | } |
263 | 6.18M | } |
264 | | |
265 | | /** |
266 | | * @name vertical_projection_point |
267 | | * |
268 | | * For one point on the outline, find the corresponding point on the |
269 | | * other side of the outline that is a likely projection for a split |
270 | | * point. This is done by iterating through the edge points until the |
271 | | * X value of the point being looked at is greater than the X value of |
272 | | * the split point. Ensure that the point being returned is not right |
273 | | * next to the split point. Return the edge point in *best_point as |
274 | | * a result, and any points that were newly created are also saved on |
275 | | * the new_points list. |
276 | | */ |
277 | | void Wordrec::vertical_projection_point(EDGEPT *split_point, EDGEPT *target_point, |
278 | 19.5M | EDGEPT **best_point, EDGEPT_CLIST *new_points) { |
279 | 19.5M | EDGEPT *p; /* Iterator */ |
280 | 19.5M | EDGEPT *this_edgept; /* Iterator */ |
281 | 19.5M | EDGEPT_C_IT new_point_it(new_points); |
282 | 19.5M | int x = split_point->pos.x; /* X value of vertical */ |
283 | 19.5M | int best_dist = LARGE_DISTANCE; /* Best point found */ |
284 | | |
285 | 19.5M | if (*best_point != nullptr) { |
286 | 13.5M | best_dist = edgept_dist(split_point, *best_point); |
287 | 13.5M | } |
288 | | |
289 | 19.5M | p = target_point; |
290 | | /* Look at each edge point */ |
291 | 323M | do { |
292 | 323M | if (((p->pos.x <= x && x <= p->next->pos.x) || (p->next->pos.x <= x && x <= p->pos.x)) && |
293 | 323M | !same_point(split_point->pos, p->pos) && !same_point(split_point->pos, p->next->pos) && |
294 | 323M | !p->IsChopPt() && (*best_point == nullptr || !same_point((*best_point)->pos, p->pos))) { |
295 | 37.7M | if (near_point(split_point, p, p->next, &this_edgept)) { |
296 | 6.42M | new_point_it.add_before_then_move(this_edgept); |
297 | 6.42M | } |
298 | | |
299 | 37.7M | if (*best_point == nullptr) { |
300 | 6.08M | best_dist = edgept_dist(split_point, this_edgept); |
301 | 6.08M | } |
302 | | |
303 | 37.7M | this_edgept = pick_close_point(split_point, this_edgept, &best_dist); |
304 | 37.7M | if (this_edgept) { |
305 | 9.03M | *best_point = this_edgept; |
306 | 9.03M | } |
307 | 37.7M | } |
308 | | |
309 | 323M | p = p->next; |
310 | 323M | } while (p != target_point); |
311 | 19.5M | } |
312 | | |
313 | | } // namespace tesseract |