/src/tesseract/src/wordrec/findseam.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /****************************************************************************** |
2 | | * |
3 | | * File: findseam.cpp (Formerly findseam.c) |
4 | | * Author: Mark Seaman, OCR Technology |
5 | | * |
6 | | * (c) Copyright 1987, Hewlett-Packard Company. |
7 | | ** Licensed under the Apache License, Version 2.0 (the "License"); |
8 | | ** you may not use this file except in compliance with the License. |
9 | | ** You may obtain a copy of the License at |
10 | | ** http://www.apache.org/licenses/LICENSE-2.0 |
11 | | ** Unless required by applicable law or agreed to in writing, software |
12 | | ** distributed under the License is distributed on an "AS IS" BASIS, |
13 | | ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | ** See the License for the specific language governing permissions and |
15 | | ** limitations under the License. |
16 | | * |
17 | | *****************************************************************************/ |
18 | | /*---------------------------------------------------------------------- |
19 | | I n c l u d e s |
20 | | ----------------------------------------------------------------------*/ |
21 | | #include "findseam.h" |
22 | | #include "outlines.h" |
23 | | #include "plotedges.h" |
24 | | #include "seam.h" |
25 | | #include "wordrec.h" |
26 | | |
27 | | // Include automatically generated configuration file if running autoconf. |
28 | | #ifdef HAVE_CONFIG_H |
29 | | # include "config_auto.h" |
30 | | #endif |
31 | | |
32 | | /********************************************************************** |
33 | | * partial_split_priority |
34 | | * |
35 | | * Assign a priority to this split based on the features that it has. |
36 | | * Grade it according to the different rating schemes and return the |
37 | | * value of its goodness. |
38 | | **********************************************************************/ |
39 | | |
40 | 3.26M | #define partial_split_priority(split) (grade_split_length(split) + grade_sharpness(split)) |
41 | | |
42 | | /*---------------------------------------------------------------------- |
43 | | T y p e s |
44 | | ----------------------------------------------------------------------*/ |
45 | 557M | #define SPLIT_CLOSENESS 20 /* Difference in x value */ |
46 | | /* How many to keep */ |
47 | 58.8M | #define MAX_NUM_SEAMS 150 |
48 | | /* How many to keep */ |
49 | 830k | #define NO_FULL_PRIORITY (-1) // Special marker for pri. |
50 | | /* Evaluate right away */ |
51 | 543k | #define BAD_PRIORITY 9999.0 |
52 | | |
53 | | /*---------------------------------------------------------------------- |
54 | | F u n c t i o n s |
55 | | ----------------------------------------------------------------------*/ |
56 | | namespace tesseract { |
57 | | |
58 | | /********************************************************************** |
59 | | * add_seam_to_queue |
60 | | * |
61 | | * Adds the given new_seam to the seams priority queue, unless it is full |
62 | | * and the new seam is worse than the worst. |
63 | | **********************************************************************/ |
64 | 58.4M | void Wordrec::add_seam_to_queue(float new_priority, SEAM *new_seam, SeamQueue *seams) { |
65 | 58.4M | if (new_seam == nullptr) { |
66 | 0 | return; |
67 | 0 | } |
68 | 58.4M | if (chop_debug) { |
69 | 0 | tprintf("Pushing new seam with priority %g :", new_priority); |
70 | 0 | new_seam->Print("seam: "); |
71 | 0 | } |
72 | 58.4M | if (seams->size() >= MAX_NUM_SEAMS) { |
73 | 30.6M | SeamPair old_pair(0, nullptr); |
74 | 30.6M | if (seams->PopWorst(&old_pair) && old_pair.key() <= new_priority) { |
75 | 13.8M | if (chop_debug) { |
76 | 0 | tprintf("Old seam staying with priority %g\n", old_pair.key()); |
77 | 0 | } |
78 | 13.8M | delete new_seam; |
79 | 13.8M | seams->Push(&old_pair); |
80 | 13.8M | return; |
81 | 16.7M | } else if (chop_debug) { |
82 | 0 | tprintf("New seam with priority %g beats old worst seam with %g\n", new_priority, |
83 | 0 | old_pair.key()); |
84 | 0 | } |
85 | 30.6M | } |
86 | 44.5M | SeamPair new_pair(new_priority, new_seam); |
87 | 44.5M | seams->Push(&new_pair); |
88 | 44.5M | } |
89 | | |
90 | | /********************************************************************** |
91 | | * choose_best_seam |
92 | | * |
93 | | * Choose the best seam that can be created by assembling this a |
94 | | * collection of splits. A queue of all the possible seams is |
95 | | * maintained. Each new split received is placed in that queue with |
96 | | * its partial priority value. These values in the seam queue are |
97 | | * evaluated and combined until a good enough seam is found. If no |
98 | | * further good seams are being found then this function returns to the |
99 | | * caller, who will send more splits. If this function is called with |
100 | | * a split of nullptr, then no further splits can be supplied by the |
101 | | * caller. |
102 | | **********************************************************************/ |
103 | | void Wordrec::choose_best_seam(SeamQueue *seam_queue, const SPLIT *split, PRIORITY priority, |
104 | 3.71M | SEAM **seam_result, TBLOB *blob, SeamPile *seam_pile) { |
105 | 3.71M | SEAM *seam; |
106 | 3.71M | float my_priority; |
107 | | /* Add seam of split */ |
108 | 3.71M | my_priority = priority; |
109 | 3.71M | if (split != nullptr) { |
110 | 3.26M | TPOINT split_point = split->point1->pos; |
111 | 3.26M | split_point += split->point2->pos; |
112 | 3.26M | split_point /= 2; |
113 | 3.26M | seam = new SEAM(my_priority, split_point, *split); |
114 | 3.26M | if (chop_debug > 1) { |
115 | 0 | seam->Print("Partial priority "); |
116 | 0 | } |
117 | 3.26M | add_seam_to_queue(my_priority, seam, seam_queue); |
118 | | |
119 | 3.26M | if (my_priority > chop_good_split) { |
120 | 387k | return; |
121 | 387k | } |
122 | 3.26M | } |
123 | | |
124 | 3.32M | TBOX bbox = blob->bounding_box(); |
125 | | /* Queue loop */ |
126 | 28.0M | while (!seam_queue->empty()) { |
127 | 26.8M | SeamPair seam_pair; |
128 | 26.8M | seam_queue->Pop(&seam_pair); |
129 | 26.8M | seam = seam_pair.extract_data(); |
130 | | /* Set full priority */ |
131 | 26.8M | my_priority = |
132 | 26.8M | seam->FullPriority(bbox.left(), bbox.right(), chop_overlap_knob, chop_centered_maxwidth, |
133 | 26.8M | chop_center_knob, chop_width_change_knob); |
134 | 26.8M | if (chop_debug) { |
135 | 0 | char str[80]; |
136 | 0 | snprintf(str, sizeof(str), "Full my_priority %0.0f, ", my_priority); |
137 | 0 | seam->Print(str); |
138 | 0 | } |
139 | | |
140 | 26.8M | if ((*seam_result == nullptr || (*seam_result)->priority() > my_priority) && |
141 | 26.8M | my_priority < chop_ok_split) { |
142 | | /* No crossing */ |
143 | 183k | if (seam->IsHealthy(*blob, chop_min_outline_points, chop_min_outline_area)) { |
144 | 73.8k | delete *seam_result; |
145 | 73.8k | *seam_result = new SEAM(*seam); |
146 | 73.8k | (*seam_result)->set_priority(my_priority); |
147 | 109k | } else { |
148 | 109k | delete seam; |
149 | 109k | seam = nullptr; |
150 | 109k | my_priority = BAD_PRIORITY; |
151 | 109k | } |
152 | 183k | } |
153 | | |
154 | 26.8M | if (my_priority < chop_good_split) { |
155 | 141k | delete seam; |
156 | 141k | return; /* Made good answer */ |
157 | 141k | } |
158 | | |
159 | 26.6M | if (seam) { |
160 | | /* Combine with others */ |
161 | 26.5M | if (seam_pile->size() < chop_seam_pile_size) { |
162 | 4.08M | combine_seam(*seam_pile, seam, seam_queue); |
163 | 4.08M | SeamDecPair pair(seam_pair.key(), seam); |
164 | 4.08M | seam_pile->Push(&pair); |
165 | 22.4M | } else if (chop_new_seam_pile && seam_pile->size() == chop_seam_pile_size && |
166 | 22.4M | seam_pile->PeekTop().key() > seam_pair.key()) { |
167 | 2.27M | combine_seam(*seam_pile, seam, seam_queue); |
168 | 2.27M | SeamDecPair pair; |
169 | 2.27M | seam_pile->Pop(&pair); // pop the worst. |
170 | | // Replace the seam in pair (deleting the old one) with |
171 | | // the new seam and score, then push back into the heap. |
172 | 2.27M | pair.set_key(seam_pair.key()); |
173 | 2.27M | pair.set_data(seam); |
174 | 2.27M | seam_pile->Push(&pair); |
175 | 20.2M | } else { |
176 | 20.2M | delete seam; |
177 | 20.2M | } |
178 | 26.5M | } |
179 | | |
180 | 26.6M | my_priority = seam_queue->empty() ? NO_FULL_PRIORITY : seam_queue->PeekTop().key(); |
181 | 26.6M | if ((my_priority > chop_ok_split) || (my_priority > chop_good_split && split)) { |
182 | 1.98M | return; |
183 | 1.98M | } |
184 | 26.6M | } |
185 | 3.32M | } |
186 | | |
187 | | /********************************************************************** |
188 | | * combine_seam |
189 | | * |
190 | | * Find other seams to combine with this one. The new seams that result |
191 | | * from this union should be added to the seam queue. The return value |
192 | | * tells whether or not any additional seams were added to the queue. |
193 | | **********************************************************************/ |
194 | 6.35M | void Wordrec::combine_seam(const SeamPile &seam_pile, const SEAM *seam, SeamQueue *seam_queue) { |
195 | 563M | for (int x = 0; x < seam_pile.size(); ++x) { |
196 | 557M | const SEAM *this_one = seam_pile.get(x).data(); |
197 | 557M | if (seam->CombineableWith(*this_one, SPLIT_CLOSENESS, chop_ok_split)) { |
198 | 55.1M | SEAM *new_one = new SEAM(*seam); |
199 | 55.1M | new_one->CombineWith(*this_one); |
200 | 55.1M | if (chop_debug > 1) { |
201 | 0 | new_one->Print("Combo priority "); |
202 | 0 | } |
203 | 55.1M | add_seam_to_queue(new_one->priority(), new_one, seam_queue); |
204 | 55.1M | } |
205 | 557M | } |
206 | 6.35M | } |
207 | | |
208 | | /********************************************************************** |
209 | | * pick_good_seam |
210 | | * |
211 | | * Find and return a good seam that will split this blob into two pieces. |
212 | | * Work from the outlines provided. |
213 | | **********************************************************************/ |
214 | 474k | SEAM *Wordrec::pick_good_seam(TBLOB *blob) { |
215 | 474k | SeamPile seam_pile(chop_seam_pile_size); |
216 | 474k | EDGEPT *points[MAX_NUM_POINTS]; |
217 | 474k | EDGEPT_CLIST new_points; |
218 | 474k | SEAM *seam = nullptr; |
219 | 474k | TESSLINE *outline; |
220 | 474k | int16_t num_points = 0; |
221 | | |
222 | | #ifndef GRAPHICS_DISABLED |
223 | | if (chop_debug > 2) { |
224 | | wordrec_display_splits.set_value(true); |
225 | | } |
226 | | |
227 | | draw_blob_edges(blob); |
228 | | #endif |
229 | | |
230 | 474k | PointHeap point_heap(MAX_NUM_POINTS); |
231 | 1.35M | for (outline = blob->outlines; outline; outline = outline->next) { |
232 | 876k | prioritize_points(outline, &point_heap); |
233 | 876k | } |
234 | | |
235 | 1.60M | while (!point_heap.empty() && num_points < MAX_NUM_POINTS) { |
236 | 1.12M | points[num_points++] = point_heap.PeekTop().data(); |
237 | 1.12M | point_heap.Pop(nullptr); |
238 | 1.12M | } |
239 | | |
240 | | /* Initialize queue */ |
241 | 474k | SeamQueue seam_queue(MAX_NUM_SEAMS); |
242 | | |
243 | 474k | try_point_pairs(points, num_points, &seam_queue, &seam_pile, &seam, blob); |
244 | 474k | try_vertical_splits(points, num_points, &new_points, &seam_queue, &seam_pile, &seam, blob); |
245 | | |
246 | 474k | if (seam == nullptr) { |
247 | 433k | choose_best_seam(&seam_queue, nullptr, BAD_PRIORITY, &seam, blob, &seam_pile); |
248 | 433k | } else if (seam->priority() > chop_good_split) { |
249 | 15.6k | choose_best_seam(&seam_queue, nullptr, seam->priority(), &seam, blob, &seam_pile); |
250 | 15.6k | } |
251 | | |
252 | 474k | EDGEPT_C_IT it(&new_points); |
253 | 2.16M | for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { |
254 | 1.68M | EDGEPT *inserted_point = it.data(); |
255 | 1.68M | if (seam == nullptr || !seam->UsesPoint(inserted_point)) { |
256 | 12.9M | for (outline = blob->outlines; outline; outline = outline->next) { |
257 | 11.2M | if (outline->loop == inserted_point) { |
258 | 0 | outline->loop = outline->loop->next; |
259 | 0 | } |
260 | 11.2M | } |
261 | 1.65M | remove_edgept(inserted_point); |
262 | 1.65M | } |
263 | 1.68M | } |
264 | | |
265 | 474k | if (seam) { |
266 | 43.9k | if (seam->priority() > chop_ok_split) { |
267 | 0 | delete seam; |
268 | 0 | seam = nullptr; |
269 | 0 | } |
270 | | #ifndef GRAPHICS_DISABLED |
271 | | else if (wordrec_display_splits) { |
272 | | seam->Mark(edge_window); |
273 | | if (chop_debug > 2) { |
274 | | edge_window->Update(); |
275 | | edge_window->Wait(); |
276 | | } |
277 | | } |
278 | | #endif |
279 | 43.9k | } |
280 | | |
281 | 474k | if (chop_debug) { |
282 | 0 | wordrec_display_splits.set_value(false); |
283 | 0 | } |
284 | | |
285 | 474k | return (seam); |
286 | 474k | } |
287 | | |
288 | | /********************************************************************** |
289 | | * try_point_pairs |
290 | | * |
291 | | * Try all the splits that are produced by pairing critical points |
292 | | * together. See if any of them are suitable for use. Use a seam |
293 | | * queue and seam pile that have already been initialized and used. |
294 | | **********************************************************************/ |
295 | | void Wordrec::try_point_pairs(EDGEPT *points[MAX_NUM_POINTS], int16_t num_points, |
296 | | SeamQueue *seam_queue, SeamPile *seam_pile, SEAM **seam, |
297 | 474k | TBLOB *blob) { |
298 | 474k | int16_t x; |
299 | 474k | int16_t y; |
300 | 474k | PRIORITY priority; |
301 | | |
302 | 1.60M | for (x = 0; x < num_points; x++) { |
303 | 11.1M | for (y = x + 1; y < num_points; y++) { |
304 | 10.0M | if (points[y] && |
305 | 10.0M | points[x]->WeightedDistance(*points[y], chop_x_y_weight) < chop_split_length && |
306 | 10.0M | points[x] != points[y]->next && points[y] != points[x]->next && |
307 | 10.0M | !is_exterior_point(points[x], points[y]) && !is_exterior_point(points[y], points[x])) { |
308 | 2.19M | SPLIT split(points[x], points[y]); |
309 | 2.19M | priority = partial_split_priority(&split); |
310 | | |
311 | 2.19M | choose_best_seam(seam_queue, &split, priority, seam, blob, seam_pile); |
312 | 2.19M | } |
313 | 10.0M | } |
314 | 1.12M | } |
315 | 474k | } |
316 | | |
317 | | /********************************************************************** |
318 | | * try_vertical_splits |
319 | | * |
320 | | * Try all the splits that are produced by vertical projection to see |
321 | | * if any of them are suitable for use. Use a seam queue and seam pile |
322 | | * that have already been initialized and used. |
323 | | * Return in new_points a collection of points that were inserted into |
324 | | * the blob while examining vertical splits and which may safely be |
325 | | * removed once a seam is chosen if they are not part of the seam. |
326 | | **********************************************************************/ |
327 | | void Wordrec::try_vertical_splits(EDGEPT *points[MAX_NUM_POINTS], int16_t num_points, |
328 | | EDGEPT_CLIST *new_points, SeamQueue *seam_queue, |
329 | 474k | SeamPile *seam_pile, SEAM **seam, TBLOB *blob) { |
330 | 474k | EDGEPT *vertical_point = nullptr; |
331 | 474k | int16_t x; |
332 | 474k | PRIORITY priority; |
333 | 474k | TESSLINE *outline; |
334 | | |
335 | 1.60M | for (x = 0; x < num_points; x++) { |
336 | 1.12M | vertical_point = nullptr; |
337 | 7.12M | for (outline = blob->outlines; outline; outline = outline->next) { |
338 | 5.99M | vertical_projection_point(points[x], outline->loop, &vertical_point, new_points); |
339 | 5.99M | } |
340 | | |
341 | 1.12M | if (vertical_point && points[x] != vertical_point->next && vertical_point != points[x]->next && |
342 | 1.12M | points[x]->WeightedDistance(*vertical_point, chop_x_y_weight) < chop_split_length) { |
343 | 1.07M | SPLIT split(points[x], vertical_point); |
344 | 1.07M | priority = partial_split_priority(&split); |
345 | 1.07M | choose_best_seam(seam_queue, &split, priority, seam, blob, seam_pile); |
346 | 1.07M | } |
347 | 1.12M | } |
348 | 474k | } |
349 | | |
350 | | } // namespace tesseract |