/src/tesseract/src/ccstruct/seam.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /****************************************************************************** |
2 | | * |
3 | | * File: seam.cpp (Formerly seam.c) |
4 | | * Author: Mark Seaman, OCR Technology |
5 | | * |
6 | | * (c) Copyright 1987, Hewlett-Packard Company. |
7 | | ** Licensed under the Apache License, Version 2.0 (the "License"); |
8 | | ** you may not use this file except in compliance with the License. |
9 | | ** You may obtain a copy of the License at |
10 | | ** http://www.apache.org/licenses/LICENSE-2.0 |
11 | | ** Unless required by applicable law or agreed to in writing, software |
12 | | ** distributed under the License is distributed on an "AS IS" BASIS, |
13 | | ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | ** See the License for the specific language governing permissions and |
15 | | ** limitations under the License. |
16 | | * |
17 | | *****************************************************************************/ |
18 | | /*---------------------------------------------------------------------- |
19 | | I n c l u d e s |
20 | | ----------------------------------------------------------------------*/ |
21 | | #include "seam.h" |
22 | | |
23 | | #include "blobs.h" |
24 | | #include "tprintf.h" |
25 | | |
26 | | namespace tesseract { |
27 | | |
28 | | /*---------------------------------------------------------------------- |
29 | | Public Function Code |
30 | | ----------------------------------------------------------------------*/ |
31 | | |
32 | | // Returns the bounding box of all the points in the seam. |
33 | 0 | TBOX SEAM::bounding_box() const { |
34 | 0 | TBOX box(location_.x, location_.y, location_.x, location_.y); |
35 | 0 | for (int s = 0; s < num_splits_; ++s) { |
36 | 0 | box += splits_[s].bounding_box(); |
37 | 0 | } |
38 | 0 | return box; |
39 | 0 | } |
40 | | |
41 | | // Returns true if the splits in *this SEAM appear OK in the sense that they |
42 | | // do not cross any outlines and do not chop off any ridiculously small |
43 | | // pieces. |
44 | 713k | bool SEAM::IsHealthy(const TBLOB &blob, int min_points, int min_area) const { |
45 | | // TODO(rays) Try testing all the splits. Duplicating original code for now, |
46 | | // which tested only the first. |
47 | 713k | return num_splits_ == 0 || splits_[0].IsHealthy(blob, min_points, min_area); |
48 | 713k | } |
49 | | |
50 | | // Computes the widthp_/widthn_ range for all existing SEAMs and for *this |
51 | | // seam, which is about to be inserted at insert_index. Returns false if |
52 | | // any of the computations fails, as this indicates an invalid chop. |
53 | | // widthn_/widthp_ are only changed if modify is true. |
54 | | bool SEAM::PrepareToInsertSeam(const std::vector<SEAM *> &seams, |
55 | 581k | const std::vector<TBLOB *> &blobs, int insert_index, bool modify) { |
56 | 4.76M | for (int s = 0; s < insert_index; ++s) { |
57 | 4.18M | if (!seams[s]->FindBlobWidth(blobs, s, modify)) { |
58 | 0 | return false; |
59 | 0 | } |
60 | 4.18M | } |
61 | 581k | if (!FindBlobWidth(blobs, insert_index, modify)) { |
62 | 0 | return false; |
63 | 0 | } |
64 | 4.64M | for (unsigned s = insert_index; s < seams.size(); ++s) { |
65 | 4.06M | if (!seams[s]->FindBlobWidth(blobs, s + 1, modify)) { |
66 | 0 | return false; |
67 | 0 | } |
68 | 4.06M | } |
69 | 581k | return true; |
70 | 581k | } |
71 | | |
72 | | // Computes the widthp_/widthn_ range. Returns false if not all the splits |
73 | | // are accounted for. widthn_/widthp_ are only changed if modify is true. |
74 | 8.83M | bool SEAM::FindBlobWidth(const std::vector<TBLOB *> &blobs, int index, bool modify) { |
75 | 8.83M | int num_found = 0; |
76 | 8.83M | if (modify) { |
77 | 4.41M | widthp_ = 0; |
78 | 4.41M | widthn_ = 0; |
79 | 4.41M | } |
80 | 10.9M | for (int s = 0; s < num_splits_; ++s) { |
81 | 2.07M | const SPLIT &split = splits_[s]; |
82 | 2.07M | bool found_split = split.ContainedByBlob(*blobs[index]); |
83 | | // Look right. |
84 | 2.36M | for (unsigned b = index + 1; !found_split && b < blobs.size(); ++b) { |
85 | 284k | found_split = split.ContainedByBlob(*blobs[b]); |
86 | 284k | if (found_split && b - index > widthp_ && modify) { |
87 | 86.3k | widthp_ = b - index; |
88 | 86.3k | } |
89 | 284k | } |
90 | | // Look left. |
91 | 2.08M | for (int b = index - 1; !found_split && b >= 0; --b) { |
92 | 2.53k | found_split = split.ContainedByBlob(*blobs[b]); |
93 | 2.53k | if (found_split && index - b > widthn_ && modify) { |
94 | 808 | widthn_ = index - b; |
95 | 808 | } |
96 | 2.53k | } |
97 | 2.07M | if (found_split) { |
98 | 2.07M | ++num_found; |
99 | 2.07M | } |
100 | 2.07M | } |
101 | 8.83M | return num_found == num_splits_; |
102 | 8.83M | } |
103 | | |
104 | | // Splits this blob into two blobs by applying the splits included in |
105 | | // *this SEAM |
106 | 411k | void SEAM::ApplySeam(bool italic_blob, TBLOB *blob, TBLOB *other_blob) const { |
107 | 607k | for (int s = 0; s < num_splits_; ++s) { |
108 | 196k | splits_[s].SplitOutlineList(blob->outlines); |
109 | 196k | } |
110 | 411k | blob->ComputeBoundingBoxes(); |
111 | | |
112 | 411k | divide_blobs(blob, other_blob, italic_blob, location_); |
113 | | |
114 | 411k | blob->EliminateDuplicateOutlines(); |
115 | 411k | other_blob->EliminateDuplicateOutlines(); |
116 | | |
117 | 411k | blob->CorrectBlobOrder(other_blob); |
118 | 411k | } |
119 | | |
120 | | // Undoes ApplySeam by removing the seam between these two blobs. |
121 | | // Produces one blob as a result, and deletes other_blob. |
122 | 120k | void SEAM::UndoSeam(TBLOB *blob, TBLOB *other_blob) const { |
123 | 120k | if (blob->outlines == nullptr) { |
124 | 3.95k | blob->outlines = other_blob->outlines; |
125 | 3.95k | other_blob->outlines = nullptr; |
126 | 3.95k | } |
127 | | |
128 | 120k | TESSLINE *outline = blob->outlines; |
129 | 286k | while (outline->next) { |
130 | 165k | outline = outline->next; |
131 | 165k | } |
132 | 120k | outline->next = other_blob->outlines; |
133 | 120k | other_blob->outlines = nullptr; |
134 | 120k | delete other_blob; |
135 | | |
136 | 214k | for (int s = 0; s < num_splits_; ++s) { |
137 | 93.6k | splits_[s].UnsplitOutlineList(blob); |
138 | 93.6k | } |
139 | 120k | blob->ComputeBoundingBoxes(); |
140 | 120k | blob->EliminateDuplicateOutlines(); |
141 | 120k | } |
142 | | |
143 | | // Prints everything in *this SEAM. |
144 | 0 | void SEAM::Print(const char *label) const { |
145 | 0 | tprintf("%s", label); |
146 | 0 | tprintf(" %6.2f @ (%d,%d), p=%u, n=%u ", priority_, location_.x, location_.y, widthp_, widthn_); |
147 | 0 | for (int s = 0; s < num_splits_; ++s) { |
148 | 0 | splits_[s].Print(); |
149 | 0 | if (s + 1 < num_splits_) { |
150 | 0 | tprintf(", "); |
151 | 0 | } |
152 | 0 | } |
153 | 0 | tprintf("\n"); |
154 | 0 | } |
155 | | |
156 | | // Prints a collection of SEAMs. |
157 | | /* static */ |
158 | 0 | void SEAM::PrintSeams(const char *label, const std::vector<SEAM *> &seams) { |
159 | 0 | if (!seams.empty()) { |
160 | 0 | tprintf("%s\n", label); |
161 | 0 | for (unsigned x = 0; x < seams.size(); ++x) { |
162 | 0 | tprintf("%2u: ", x); |
163 | 0 | seams[x]->Print(""); |
164 | 0 | } |
165 | 0 | tprintf("\n"); |
166 | 0 | } |
167 | 0 | } |
168 | | |
169 | | #ifndef GRAPHICS_DISABLED |
170 | | // Draws the seam in the given window. |
171 | | void SEAM::Mark(ScrollView *window) const { |
172 | | for (int s = 0; s < num_splits_; ++s) { |
173 | | splits_[s].Mark(window); |
174 | | } |
175 | | } |
176 | | #endif |
177 | | |
178 | | // Break up the blobs in this chain so that they are all independent. |
179 | | // This operation should undo the affect of join_pieces. |
180 | | /* static */ |
181 | | void SEAM::BreakPieces(const std::vector<SEAM *> &seams, const std::vector<TBLOB *> &blobs, |
182 | 1.73M | int first, int last) { |
183 | 5.84M | for (int x = first; x < last; ++x) { |
184 | 4.11M | seams[x]->Reveal(); |
185 | 4.11M | } |
186 | | |
187 | 1.73M | TESSLINE *outline = blobs[first]->outlines; |
188 | 1.73M | int next_blob = first + 1; |
189 | | |
190 | 6.67M | while (outline != nullptr && next_blob <= last) { |
191 | 4.93M | if (outline->next == blobs[next_blob]->outlines) { |
192 | 4.11M | outline->next = nullptr; |
193 | 4.11M | outline = blobs[next_blob]->outlines; |
194 | 4.11M | ++next_blob; |
195 | 4.11M | } else { |
196 | 823k | outline = outline->next; |
197 | 823k | } |
198 | 4.93M | } |
199 | 1.73M | } |
200 | | |
201 | | // Join a group of base level pieces into a single blob that can then |
202 | | // be classified. |
203 | | /* static */ |
204 | | void SEAM::JoinPieces(const std::vector<SEAM *> &seams, const std::vector<TBLOB *> &blobs, |
205 | 1.73M | int first, int last) { |
206 | 1.73M | TESSLINE *outline = blobs[first]->outlines; |
207 | 1.73M | if (!outline) { |
208 | 0 | return; |
209 | 0 | } |
210 | | |
211 | 5.84M | for (int x = first; x < last; ++x) { |
212 | 4.11M | SEAM *seam = seams[x]; |
213 | 4.11M | if (x - seam->widthn_ >= first && x + seam->widthp_ < last) { |
214 | 4.09M | seam->Hide(); |
215 | 4.09M | } |
216 | 7.31M | while (outline->next) { |
217 | 3.20M | outline = outline->next; |
218 | 3.20M | } |
219 | 4.11M | outline->next = blobs[x + 1]->outlines; |
220 | 4.11M | } |
221 | 1.73M | } |
222 | | |
223 | | // Hides the seam so the outlines appear not to be cut by it. |
224 | 4.09M | void SEAM::Hide() const { |
225 | 4.56M | for (int s = 0; s < num_splits_; ++s) { |
226 | 466k | splits_[s].Hide(); |
227 | 466k | } |
228 | 4.09M | } |
229 | | |
230 | | // Undoes hide, so the outlines are cut by the seam. |
231 | 4.11M | void SEAM::Reveal() const { |
232 | 4.60M | for (int s = 0; s < num_splits_; ++s) { |
233 | 493k | splits_[s].Reveal(); |
234 | 493k | } |
235 | 4.11M | } |
236 | | |
237 | | // Computes and returns, but does not set, the full priority of *this SEAM. |
238 | | float SEAM::FullPriority(int xmin, int xmax, double overlap_knob, int centered_maxwidth, |
239 | 99.8M | double center_knob, double width_change_knob) const { |
240 | 99.8M | if (num_splits_ == 0) { |
241 | 0 | return 0.0f; |
242 | 0 | } |
243 | 236M | for (int s = 1; s < num_splits_; ++s) { |
244 | 136M | splits_[s].SplitOutline(); |
245 | 136M | } |
246 | 99.8M | float full_priority = |
247 | 99.8M | priority_ + splits_[0].FullPriority(xmin, xmax, overlap_knob, centered_maxwidth, center_knob, |
248 | 99.8M | width_change_knob); |
249 | 236M | for (int s = num_splits_ - 1; s >= 1; --s) { |
250 | 136M | splits_[s].UnsplitOutlines(); |
251 | 136M | } |
252 | 99.8M | return full_priority; |
253 | 99.8M | } |
254 | | |
255 | | /** |
256 | | * @name start_seam_list |
257 | | * |
258 | | * Initialize a list of seams that match the original number of blobs |
259 | | * present in the starting segmentation. Each of the seams created |
260 | | * by this routine have location information only. |
261 | | */ |
262 | 734k | void start_seam_list(TWERD *word, std::vector<SEAM *> *seam_array) { |
263 | 734k | seam_array->clear(); |
264 | 734k | TPOINT location; |
265 | | |
266 | 3.84M | for (unsigned b = 1; b < word->NumBlobs(); ++b) { |
267 | 3.11M | TBOX bbox = word->blobs[b - 1]->bounding_box(); |
268 | 3.11M | TBOX nbox = word->blobs[b]->bounding_box(); |
269 | 3.11M | location.x = (bbox.right() + nbox.left()) / 2; |
270 | 3.11M | location.y = (bbox.bottom() + bbox.top() + nbox.bottom() + nbox.top()) / 4; |
271 | 3.11M | seam_array->push_back(new SEAM(0.0f, location)); |
272 | 3.11M | } |
273 | 734k | } |
274 | | |
275 | | } // namespace tesseract |