/src/tesseract/src/textord/scanedg.cpp
Line | Count | Source |
1 | | /********************************************************************** |
2 | | * File: scanedg.cpp (Formerly scanedge.c) |
3 | | * Description: Raster scanning crack based edge extractor. |
4 | | * Author: Ray Smith |
5 | | * |
6 | | * (C) Copyright 1991, Hewlett-Packard Ltd. |
7 | | ** Licensed under the Apache License, Version 2.0 (the "License"); |
8 | | ** you may not use this file except in compliance with the License. |
9 | | ** You may obtain a copy of the License at |
10 | | ** http://www.apache.org/licenses/LICENSE-2.0 |
11 | | ** Unless required by applicable law or agreed to in writing, software |
12 | | ** distributed under the License is distributed on an "AS IS" BASIS, |
13 | | ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | ** See the License for the specific language governing permissions and |
15 | | ** limitations under the License. |
16 | | * |
17 | | **********************************************************************/ |
18 | | |
19 | | #include "scanedg.h" |
20 | | |
21 | | #include "crakedge.h" |
22 | | #include "edgloop.h" |
23 | | #include "pdblock.h" |
24 | | |
25 | | #include <allheaders.h> |
26 | | |
27 | | #include <memory> // std::unique_ptr |
28 | | |
29 | | namespace tesseract { |
30 | | |
31 | 6.29M | #define WHITE_PIX 1 /*thresholded colours */ |
32 | | #define BLACK_PIX 0 |
33 | | // Flips between WHITE_PIX and BLACK_PIX. |
34 | 57.6M | #define FLIP_COLOUR(pix) (1 - (pix)) |
35 | | |
36 | | struct CrackPos { |
37 | | CRACKEDGE **free_cracks; // Freelist for fast allocation. |
38 | | int x; // Position of new edge. |
39 | | int y; |
40 | | }; |
41 | | |
42 | | static void free_crackedges(CRACKEDGE *start); |
43 | | |
44 | | static void join_edges(CRACKEDGE *edge1, CRACKEDGE *edge2, CRACKEDGE **free_cracks, |
45 | | C_OUTLINE_IT *outline_it); |
46 | | |
47 | | static void line_edges(TDimension x, TDimension y, TDimension xext, uint8_t uppercolour, uint8_t *bwpos, |
48 | | CRACKEDGE **prevline, CRACKEDGE **free_cracks, C_OUTLINE_IT *outline_it); |
49 | | |
50 | | static void make_margins(PDBLK *block, BLOCK_LINE_IT *line_it, uint8_t *pixels, uint8_t margin, |
51 | | TDimension left, TDimension right, TDimension y); |
52 | | |
53 | | static CRACKEDGE *h_edge(int sign, CRACKEDGE *join, CrackPos *pos); |
54 | | static CRACKEDGE *v_edge(int sign, CRACKEDGE *join, CrackPos *pos); |
55 | | |
56 | | /********************************************************************** |
57 | | * block_edges |
58 | | * |
59 | | * Extract edges from a PDBLK. |
60 | | **********************************************************************/ |
61 | | |
62 | | void block_edges(Image t_pix, // thresholded image |
63 | | PDBLK *block, // block in image |
64 | 16.2k | C_OUTLINE_IT *outline_it) { |
65 | 16.2k | ICOORD bleft; // bounding box |
66 | 16.2k | ICOORD tright; |
67 | 16.2k | BLOCK_LINE_IT line_it = block; // line iterator |
68 | | |
69 | 16.2k | int width = pixGetWidth(t_pix); |
70 | 16.2k | int height = pixGetHeight(t_pix); |
71 | 16.2k | int wpl = pixGetWpl(t_pix); |
72 | | // lines in progress |
73 | 16.2k | std::unique_ptr<CRACKEDGE *[]> ptrline(new CRACKEDGE *[width + 1]); |
74 | 16.2k | CRACKEDGE *free_cracks = nullptr; |
75 | | |
76 | 16.2k | block->bounding_box(bleft, tright); // block box |
77 | 16.2k | ASSERT_HOST(tright.x() <= width); |
78 | 16.2k | ASSERT_HOST(tright.y() <= height); |
79 | 16.2k | int block_width = tright.x() - bleft.x(); |
80 | 5.16M | for (int x = block_width; x >= 0; x--) { |
81 | 5.14M | ptrline[x] = nullptr; // no lines in progress |
82 | 5.14M | } |
83 | | |
84 | 16.2k | std::unique_ptr<uint8_t[]> bwline(new uint8_t[width]); |
85 | | |
86 | 16.2k | const uint8_t margin = WHITE_PIX; |
87 | | |
88 | 2.98M | for (int y = tright.y() - 1; y >= bleft.y() - 1; y--) { |
89 | 2.97M | if (y >= bleft.y() && y < tright.y()) { |
90 | | // Get the binary pixels from the image. |
91 | 2.95M | l_uint32 *line = pixGetData(t_pix) + wpl * (height - 1 - y); |
92 | 1.19G | for (int x = 0; x < block_width; ++x) { |
93 | 1.19G | bwline[x] = GET_DATA_BIT(line, x + bleft.x()) ^ 1; |
94 | 1.19G | } |
95 | 2.95M | make_margins(block, &line_it, bwline.get(), margin, bleft.x(), tright.x(), y); |
96 | 2.95M | } else { |
97 | 16.2k | memset(bwline.get(), margin, block_width * sizeof(bwline[0])); |
98 | 16.2k | } |
99 | 2.97M | line_edges(bleft.x(), y, block_width, margin, bwline.get(), ptrline.get(), &free_cracks, |
100 | 2.97M | outline_it); |
101 | 2.97M | } |
102 | | |
103 | 16.2k | free_crackedges(free_cracks); // really free them |
104 | 16.2k | } |
105 | | |
106 | | /********************************************************************** |
107 | | * make_margins |
108 | | * |
109 | | * Get an image line and set to margin non-text pixels. |
110 | | **********************************************************************/ |
111 | | |
112 | | static void make_margins( // get a line |
113 | | PDBLK *block, // block in image |
114 | | BLOCK_LINE_IT *line_it, // for old style |
115 | | uint8_t *pixels, // pixels to strip |
116 | | uint8_t margin, // white-out pixel |
117 | | TDimension left, // block edges |
118 | | TDimension right, |
119 | | TDimension y // line coord |
120 | 2.95M | ) { |
121 | 2.95M | ICOORDELT_IT seg_it; |
122 | | |
123 | 2.95M | if (block->poly_block() != nullptr) { |
124 | 0 | std::unique_ptr<PB_LINE_IT> lines(new PB_LINE_IT(block->poly_block())); |
125 | 0 | const std::unique_ptr</*non-const*/ ICOORDELT_LIST> segments(lines->get_line(y)); |
126 | 0 | if (!segments->empty()) { |
127 | 0 | seg_it.set_to_list(segments.get()); |
128 | 0 | seg_it.mark_cycle_pt(); |
129 | 0 | auto start = seg_it.data()->x(); |
130 | 0 | auto xext = seg_it.data()->y(); |
131 | 0 | for (auto xindex = left; xindex < right; xindex++) { |
132 | 0 | if (xindex >= start && !seg_it.cycled_list()) { |
133 | 0 | xindex = start + xext - 1; |
134 | 0 | seg_it.forward(); |
135 | 0 | start = seg_it.data()->x(); |
136 | 0 | xext = seg_it.data()->y(); |
137 | 0 | } else { |
138 | 0 | pixels[xindex - left] = margin; |
139 | 0 | } |
140 | 0 | } |
141 | 0 | } else { |
142 | 0 | for (auto xindex = left; xindex < right; xindex++) { |
143 | 0 | pixels[xindex - left] = margin; |
144 | 0 | } |
145 | 0 | } |
146 | 2.95M | } else { |
147 | 2.95M | TDimension xext; // of segment |
148 | 2.95M | auto start = line_it->get_line(y, xext); |
149 | 2.95M | for (auto xindex = left; xindex < start; xindex++) { |
150 | 0 | pixels[xindex - left] = margin; |
151 | 0 | } |
152 | 2.95M | for (auto xindex = start + xext; xindex < right; xindex++) { |
153 | 0 | pixels[xindex - left] = margin; |
154 | 0 | } |
155 | 2.95M | } |
156 | 2.95M | } |
157 | | |
158 | | /********************************************************************** |
159 | | * line_edges |
160 | | * |
161 | | * Scan a line for edges and update the edges in progress. |
162 | | * When edges close into loops, send them for approximation. |
163 | | **********************************************************************/ |
164 | | |
165 | | static void line_edges(TDimension x, // coord of line start |
166 | | TDimension y, // coord of line |
167 | | TDimension xext, // width of line |
168 | | uint8_t uppercolour, // start of prev line |
169 | | uint8_t *bwpos, // thresholded line |
170 | | CRACKEDGE **prevline, // edges in progress |
171 | 2.97M | CRACKEDGE **free_cracks, C_OUTLINE_IT *outline_it) { |
172 | 2.97M | CrackPos pos = {free_cracks, x, y}; |
173 | 2.97M | int xmax; // max x coord |
174 | 2.97M | int prevcolour; // of previous pixel |
175 | 2.97M | CRACKEDGE *current; // current h edge |
176 | 2.97M | CRACKEDGE *newcurrent; // new h edge |
177 | | |
178 | 2.97M | xmax = x + xext; // max allowable coord |
179 | 2.97M | prevcolour = uppercolour; // forced plain margin |
180 | 2.97M | current = nullptr; // nothing yet |
181 | | |
182 | | // do each pixel |
183 | 1.20G | for (; pos.x < xmax; pos.x++, prevline++) { |
184 | 1.19G | const int colour = *bwpos++; // current pixel |
185 | 1.19G | if (*prevline != nullptr) { |
186 | | // changed above |
187 | | // change colour |
188 | 57.6M | uppercolour = FLIP_COLOUR(uppercolour); |
189 | 57.6M | if (colour == prevcolour) { |
190 | 23.3M | if (colour == uppercolour) { |
191 | | // finish a line |
192 | 11.5M | join_edges(current, *prevline, free_cracks, outline_it); |
193 | 11.5M | current = nullptr; // no edge now |
194 | 11.7M | } else { |
195 | | // new horiz edge |
196 | 11.7M | current = h_edge(uppercolour - colour, *prevline, &pos); |
197 | 11.7M | } |
198 | 23.3M | *prevline = nullptr; // no change this time |
199 | 34.3M | } else { |
200 | 34.3M | if (colour == uppercolour) { |
201 | 28.0M | *prevline = v_edge(colour - prevcolour, *prevline, &pos); |
202 | | // 8 vs 4 connection |
203 | 28.0M | } else if (colour == WHITE_PIX) { |
204 | 3.20M | join_edges(current, *prevline, free_cracks, outline_it); |
205 | 3.20M | current = h_edge(uppercolour - colour, nullptr, &pos); |
206 | 3.20M | *prevline = v_edge(colour - prevcolour, current, &pos); |
207 | 3.20M | } else { |
208 | 3.06M | newcurrent = h_edge(uppercolour - colour, *prevline, &pos); |
209 | 3.06M | *prevline = v_edge(colour - prevcolour, current, &pos); |
210 | 3.06M | current = newcurrent; // right going h edge |
211 | 3.06M | } |
212 | 34.3M | prevcolour = colour; // remember new colour |
213 | 34.3M | } |
214 | 1.14G | } else { |
215 | 1.14G | if (colour != prevcolour) { |
216 | 23.3M | *prevline = current = v_edge(colour - prevcolour, current, &pos); |
217 | 23.3M | prevcolour = colour; |
218 | 23.3M | } |
219 | 1.14G | if (colour != uppercolour) { |
220 | 33.1M | current = h_edge(uppercolour - colour, current, &pos); |
221 | 1.10G | } else { |
222 | 1.10G | current = nullptr; // no edge now |
223 | 1.10G | } |
224 | 1.14G | } |
225 | 1.19G | } |
226 | 2.97M | if (current != nullptr) { |
227 | | // out of block |
228 | 44.6k | if (*prevline != nullptr) { // got one to join to? |
229 | 22.3k | join_edges(current, *prevline, free_cracks, outline_it); |
230 | 22.3k | *prevline = nullptr; // tidy now |
231 | 22.3k | } else { |
232 | | // fake vertical |
233 | 22.3k | *prevline = v_edge(FLIP_COLOUR(prevcolour) - prevcolour, current, &pos); |
234 | 22.3k | } |
235 | 2.92M | } else if (*prevline != nullptr) { |
236 | | // continue fake |
237 | 26.1k | *prevline = v_edge(FLIP_COLOUR(prevcolour) - prevcolour, *prevline, &pos); |
238 | 26.1k | } |
239 | 2.97M | } |
240 | | |
241 | | /********************************************************************** |
242 | | * h_edge |
243 | | * |
244 | | * Create a new horizontal CRACKEDGE and join it to the given edge. |
245 | | **********************************************************************/ |
246 | | |
247 | | static CRACKEDGE *h_edge(int sign, // sign of edge |
248 | | CRACKEDGE *join, // edge to join to |
249 | 51.2M | CrackPos *pos) { |
250 | 51.2M | CRACKEDGE *newpt; // return value |
251 | | |
252 | 51.2M | if (*pos->free_cracks != nullptr) { |
253 | 44.2M | newpt = *pos->free_cracks; |
254 | 44.2M | *pos->free_cracks = newpt->next; // get one fast |
255 | 44.2M | } else { |
256 | 6.95M | newpt = new CRACKEDGE; |
257 | 6.95M | } |
258 | 51.2M | newpt->pos.set_y(pos->y + 1); // coords of pt |
259 | 51.2M | newpt->stepy = 0; // edge is horizontal |
260 | | |
261 | 51.2M | if (sign > 0) { |
262 | 25.6M | newpt->pos.set_x(pos->x + 1); // start location |
263 | 25.6M | newpt->stepx = -1; |
264 | 25.6M | newpt->stepdir = 0; |
265 | 25.6M | } else { |
266 | 25.6M | newpt->pos.set_x(pos->x); // start location |
267 | 25.6M | newpt->stepx = 1; |
268 | 25.6M | newpt->stepdir = 2; |
269 | 25.6M | } |
270 | | |
271 | 51.2M | if (join == nullptr) { |
272 | 3.20M | newpt->next = newpt; // ptrs to other ends |
273 | 3.20M | newpt->prev = newpt; |
274 | 48.0M | } else { |
275 | 48.0M | if (newpt->pos.x() + newpt->stepx == join->pos.x() && newpt->pos.y() == join->pos.y()) { |
276 | 25.6M | newpt->prev = join->prev; // update other ends |
277 | 25.6M | newpt->prev->next = newpt; |
278 | 25.6M | newpt->next = join; // join up |
279 | 25.6M | join->prev = newpt; |
280 | 25.6M | } else { |
281 | 22.4M | newpt->next = join->next; // update other ends |
282 | 22.4M | newpt->next->prev = newpt; |
283 | 22.4M | newpt->prev = join; // join up |
284 | 22.4M | join->next = newpt; |
285 | 22.4M | } |
286 | 48.0M | } |
287 | 51.2M | return newpt; |
288 | 51.2M | } |
289 | | |
290 | | /********************************************************************** |
291 | | * v_edge |
292 | | * |
293 | | * Create a new vertical CRACKEDGE and join it to the given edge. |
294 | | **********************************************************************/ |
295 | | |
296 | | static CRACKEDGE *v_edge(int sign, // sign of edge |
297 | 57.6M | CRACKEDGE *join, CrackPos *pos) { |
298 | 57.6M | CRACKEDGE *newpt; // return value |
299 | | |
300 | 57.6M | if (*pos->free_cracks != nullptr) { |
301 | 49.6M | newpt = *pos->free_cracks; |
302 | 49.6M | *pos->free_cracks = newpt->next; // get one fast |
303 | 49.6M | } else { |
304 | 7.98M | newpt = new CRACKEDGE; |
305 | 7.98M | } |
306 | 57.6M | newpt->pos.set_x(pos->x); // coords of pt |
307 | 57.6M | newpt->stepx = 0; // edge is vertical |
308 | | |
309 | 57.6M | if (sign > 0) { |
310 | 28.8M | newpt->pos.set_y(pos->y); // start location |
311 | 28.8M | newpt->stepy = 1; |
312 | 28.8M | newpt->stepdir = 3; |
313 | 28.8M | } else { |
314 | 28.8M | newpt->pos.set_y(pos->y + 1); // start location |
315 | 28.8M | newpt->stepy = -1; |
316 | 28.8M | newpt->stepdir = 1; |
317 | 28.8M | } |
318 | | |
319 | 57.6M | if (join == nullptr) { |
320 | 11.5M | newpt->next = newpt; // ptrs to other ends |
321 | 11.5M | newpt->prev = newpt; |
322 | 46.1M | } else { |
323 | 46.1M | if (newpt->pos.x() == join->pos.x() && newpt->pos.y() + newpt->stepy == join->pos.y()) { |
324 | 25.1M | newpt->prev = join->prev; // update other ends |
325 | 25.1M | newpt->prev->next = newpt; |
326 | 25.1M | newpt->next = join; // join up |
327 | 25.1M | join->prev = newpt; |
328 | 25.1M | } else { |
329 | 20.9M | newpt->next = join->next; // update other ends |
330 | 20.9M | newpt->next->prev = newpt; |
331 | 20.9M | newpt->prev = join; // join up |
332 | 20.9M | join->next = newpt; |
333 | 20.9M | } |
334 | 46.1M | } |
335 | 57.6M | return newpt; |
336 | 57.6M | } |
337 | | |
338 | | /********************************************************************** |
339 | | * join_edges |
340 | | * |
341 | | * Join 2 edges together. Send the outline for approximation when a |
342 | | * closed loop is formed. |
343 | | **********************************************************************/ |
344 | | |
345 | | static void join_edges(CRACKEDGE *edge1, // edges to join |
346 | | CRACKEDGE *edge2, // no specific order |
347 | 14.7M | CRACKEDGE **free_cracks, C_OUTLINE_IT *outline_it) { |
348 | 14.7M | if (edge1->pos.x() + edge1->stepx != edge2->pos.x() || |
349 | 7.88M | edge1->pos.y() + edge1->stepy != edge2->pos.y()) { |
350 | 6.85M | CRACKEDGE *tempedge = edge1; |
351 | 6.85M | edge1 = edge2; // swap around |
352 | 6.85M | edge2 = tempedge; |
353 | 6.85M | } |
354 | | |
355 | 14.7M | if (edge1->next == edge2) { |
356 | | // already closed |
357 | 6.43M | complete_edge(edge1, outline_it); |
358 | | // attach freelist to end |
359 | 6.43M | edge1->prev->next = *free_cracks; |
360 | 6.43M | *free_cracks = edge1; // and free list |
361 | 8.31M | } else { |
362 | | // update opposite ends |
363 | 8.31M | edge2->prev->next = edge1->next; |
364 | 8.31M | edge1->next->prev = edge2->prev; |
365 | 8.31M | edge1->next = edge2; // make joins |
366 | 8.31M | edge2->prev = edge1; |
367 | 8.31M | } |
368 | 14.7M | } |
369 | | |
370 | | /********************************************************************** |
371 | | * free_crackedges |
372 | | * |
373 | | * Really free the CRACKEDGEs by giving them back to delete. |
374 | | **********************************************************************/ |
375 | | |
376 | 16.2k | static void free_crackedges(CRACKEDGE *start) { |
377 | 16.2k | CRACKEDGE *current; // current edge to free |
378 | 16.2k | CRACKEDGE *next; // next one to free |
379 | | |
380 | 14.9M | for (current = start; current != nullptr; current = next) { |
381 | 14.9M | next = current->next; |
382 | 14.9M | delete current; // delete them all |
383 | 14.9M | } |
384 | 16.2k | } |
385 | | |
386 | | } // namespace tesseract |