/src/tesseract/src/classify/mfoutline.cpp
Line | Count | Source |
1 | | /****************************************************************************** |
2 | | ** Filename: mfoutline.c |
3 | | ** Purpose: Interface to outline struct used for extracting features |
4 | | ** Author: Dan Johnson |
5 | | ** |
6 | | ** (c) Copyright Hewlett-Packard Company, 1988. |
7 | | ** Licensed under the Apache License, Version 2.0 (the "License"); |
8 | | ** you may not use this file except in compliance with the License. |
9 | | ** You may obtain a copy of the License at |
10 | | ** http://www.apache.org/licenses/LICENSE-2.0 |
11 | | ** Unless required by applicable law or agreed to in writing, software |
12 | | ** distributed under the License is distributed on an "AS IS" BASIS, |
13 | | ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | ** See the License for the specific language governing permissions and |
15 | | ** limitations under the License. |
16 | | ******************************************************************************/ |
17 | | |
18 | | #include "mfoutline.h" |
19 | | |
20 | | #include "blobs.h" |
21 | | #include "classify.h" |
22 | | #include "clusttool.h" //If remove you get caught in a loop somewhere |
23 | | #include "mfx.h" |
24 | | #include "params.h" |
25 | | |
26 | | #include <cmath> |
27 | | #include <cstdio> |
28 | | |
29 | | namespace tesseract { |
30 | | |
31 | | /*---------------------------------------------------------------------------*/ |
32 | | /** Convert a blob into a list of MFOUTLINEs (float-based microfeature format). |
33 | | */ |
34 | 2.28k | LIST ConvertBlob(TBLOB *blob) { |
35 | 2.28k | LIST outlines = NIL_LIST; |
36 | 2.28k | return (blob == nullptr) ? NIL_LIST : ConvertOutlines(blob->outlines, outlines, outer); |
37 | 2.28k | } |
38 | | |
39 | | /*---------------------------------------------------------------------------*/ |
40 | | /** Convert a TESSLINE into the float-based MFOUTLINE micro-feature format. */ |
41 | 4.53k | MFOUTLINE ConvertOutline(TESSLINE *outline) { |
42 | 4.53k | auto MFOutline = NIL_LIST; |
43 | | |
44 | 4.53k | if (outline == nullptr || outline->loop == nullptr) { |
45 | 0 | return MFOutline; |
46 | 0 | } |
47 | | |
48 | 4.53k | auto StartPoint = outline->loop; |
49 | 4.53k | auto EdgePoint = StartPoint; |
50 | 26.8k | do { |
51 | 26.8k | auto NextPoint = EdgePoint->next; |
52 | | |
53 | | /* filter out duplicate points */ |
54 | 26.8k | if (EdgePoint->pos.x != NextPoint->pos.x || EdgePoint->pos.y != NextPoint->pos.y) { |
55 | 26.8k | auto NewPoint = new MFEDGEPT; |
56 | 26.8k | NewPoint->ClearMark(); |
57 | 26.8k | NewPoint->Hidden = EdgePoint->IsHidden(); |
58 | 26.8k | NewPoint->Point.x = EdgePoint->pos.x; |
59 | 26.8k | NewPoint->Point.y = EdgePoint->pos.y; |
60 | 26.8k | MFOutline = push(MFOutline, NewPoint); |
61 | 26.8k | } |
62 | 26.8k | EdgePoint = NextPoint; |
63 | 26.8k | } while (EdgePoint != StartPoint); |
64 | | |
65 | 4.53k | if (MFOutline != nullptr) { |
66 | 4.53k | MakeOutlineCircular(MFOutline); |
67 | 4.53k | } |
68 | 4.53k | return MFOutline; |
69 | 4.53k | } |
70 | | |
71 | | /*---------------------------------------------------------------------------*/ |
72 | | /** |
73 | | * Convert a tree of outlines to a list of MFOUTLINEs (lists of MFEDGEPTs). |
74 | | * |
75 | | * @param outline first outline to be converted |
76 | | * @param mf_outlines list to add converted outlines to |
77 | | * @param outline_type are the outlines outer or holes? |
78 | | */ |
79 | 2.28k | LIST ConvertOutlines(TESSLINE *outline, LIST mf_outlines, OUTLINETYPE outline_type) { |
80 | 2.28k | MFOUTLINE mf_outline; |
81 | | |
82 | 6.82k | while (outline != nullptr) { |
83 | 4.53k | mf_outline = ConvertOutline(outline); |
84 | 4.53k | if (mf_outline != nullptr) { |
85 | 4.53k | mf_outlines = push(mf_outlines, mf_outline); |
86 | 4.53k | } |
87 | 4.53k | outline = outline->next; |
88 | 4.53k | } |
89 | 2.28k | return mf_outlines; |
90 | 2.28k | } |
91 | | |
92 | | /*---------------------------------------------------------------------------*/ |
93 | | /** |
94 | | * This routine searches through the specified outline, computes |
95 | | * a slope for each vector in the outline, and marks each |
96 | | * vector as having one of the following directions: |
97 | | * N, S, E, W, NE, NW, SE, SW |
98 | | * This information is then stored in the outline and the |
99 | | * outline is returned. |
100 | | * @param Outline micro-feature outline to analyze |
101 | | * @param MinSlope controls "snapping" of segments to horizontal |
102 | | * @param MaxSlope controls "snapping" of segments to vertical |
103 | | */ |
104 | 0 | void FindDirectionChanges(MFOUTLINE Outline, float MinSlope, float MaxSlope) { |
105 | 0 | MFEDGEPT *Current; |
106 | 0 | MFEDGEPT *Last; |
107 | 0 | MFOUTLINE EdgePoint; |
108 | |
|
109 | 0 | if (DegenerateOutline(Outline)) { |
110 | 0 | return; |
111 | 0 | } |
112 | | |
113 | 0 | Last = PointAt(Outline); |
114 | 0 | Outline = NextPointAfter(Outline); |
115 | 0 | EdgePoint = Outline; |
116 | 0 | do { |
117 | 0 | Current = PointAt(EdgePoint); |
118 | 0 | ComputeDirection(Last, Current, MinSlope, MaxSlope); |
119 | |
|
120 | 0 | Last = Current; |
121 | 0 | EdgePoint = NextPointAfter(EdgePoint); |
122 | 0 | } while (EdgePoint != Outline); |
123 | |
|
124 | 0 | } /* FindDirectionChanges */ |
125 | | |
126 | | /*---------------------------------------------------------------------------*/ |
127 | | /** |
128 | | * This routine deallocates all of the memory consumed by |
129 | | * a micro-feature outline. |
130 | | * @param arg micro-feature outline to be freed |
131 | | */ |
132 | 4.53k | void FreeMFOutline(void *arg) { // MFOUTLINE Outline) |
133 | 4.53k | auto Outline = static_cast<MFOUTLINE>(arg); |
134 | | |
135 | | /* break the circular outline so we can use std. techniques to deallocate */ |
136 | 4.53k | MFOUTLINE Start = Outline->list_rest(); |
137 | 4.53k | set_rest(Outline, NIL_LIST); |
138 | 31.3k | while (Start != nullptr) { |
139 | 26.8k | delete reinterpret_cast<MFEDGEPT *>(Start->first_node()); |
140 | 26.8k | Start = pop(Start); |
141 | 26.8k | } |
142 | | |
143 | 4.53k | } /* FreeMFOutline */ |
144 | | |
145 | | /*---------------------------------------------------------------------------*/ |
146 | | /** |
147 | | * Release all memory consumed by the specified list |
148 | | * of outlines. |
149 | | * @param Outlines list of mf-outlines to be freed |
150 | | */ |
151 | 2.28k | void FreeOutlines(LIST Outlines) { |
152 | 2.28k | destroy_nodes(Outlines, FreeMFOutline); |
153 | 2.28k | } /* FreeOutlines */ |
154 | | |
155 | | /*---------------------------------------------------------------------------*/ |
156 | | /** |
157 | | * This routine searches through the specified outline and finds |
158 | | * the points at which the outline changes direction. These |
159 | | * points are then marked as "extremities". This routine is |
160 | | * used as an alternative to FindExtremities(). It forces the |
161 | | * endpoints of the microfeatures to be at the direction |
162 | | * changes rather than at the midpoint between direction |
163 | | * changes. |
164 | | * @param Outline micro-feature outline to analyze |
165 | | */ |
166 | 0 | void MarkDirectionChanges(MFOUTLINE Outline) { |
167 | 0 | MFOUTLINE Current; |
168 | 0 | MFOUTLINE Last; |
169 | 0 | MFOUTLINE First; |
170 | |
|
171 | 0 | if (DegenerateOutline(Outline)) { |
172 | 0 | return; |
173 | 0 | } |
174 | | |
175 | 0 | First = NextDirectionChange(Outline); |
176 | 0 | Last = First; |
177 | 0 | do { |
178 | 0 | Current = NextDirectionChange(Last); |
179 | 0 | PointAt(Current)->MarkPoint(); |
180 | 0 | Last = Current; |
181 | 0 | } while (Last != First); |
182 | |
|
183 | 0 | } /* MarkDirectionChanges */ |
184 | | |
185 | | /*---------------------------------------------------------------------------*/ |
186 | | /** |
187 | | * This routine returns the next point in the micro-feature |
188 | | * outline that is an extremity. The search starts after |
189 | | * EdgePoint. The routine assumes that the outline being |
190 | | * searched is not a degenerate outline (i.e. it must have |
191 | | * 2 or more edge points). |
192 | | * @param EdgePoint start search from this point |
193 | | * @return Next extremity in the outline after EdgePoint. |
194 | | * @note Globals: none |
195 | | */ |
196 | 0 | MFOUTLINE NextExtremity(MFOUTLINE EdgePoint) { |
197 | 0 | EdgePoint = NextPointAfter(EdgePoint); |
198 | 0 | while (!PointAt(EdgePoint)->ExtremityMark) { |
199 | 0 | EdgePoint = NextPointAfter(EdgePoint); |
200 | 0 | } |
201 | |
|
202 | 0 | return (EdgePoint); |
203 | |
|
204 | 0 | } /* NextExtremity */ |
205 | | |
206 | | /*---------------------------------------------------------------------------*/ |
207 | | /** |
208 | | * This routine normalizes the coordinates of the specified |
209 | | * outline so that the outline is deskewed down to the |
210 | | * baseline, translated so that x=0 is at XOrigin, and scaled |
211 | | * so that the height of a character cell from descender to |
212 | | * ascender is 1. Of this height, 0.25 is for the descender, |
213 | | * 0.25 for the ascender, and 0.5 for the x-height. The |
214 | | * y coordinate of the baseline is 0. |
215 | | * @param Outline outline to be normalized |
216 | | * @param XOrigin x-origin of text |
217 | | */ |
218 | 4.53k | void NormalizeOutline(MFOUTLINE Outline, float XOrigin) { |
219 | 4.53k | if (Outline == NIL_LIST) { |
220 | 0 | return; |
221 | 0 | } |
222 | | |
223 | 4.53k | MFOUTLINE EdgePoint = Outline; |
224 | 26.8k | do { |
225 | 26.8k | MFEDGEPT *Current = PointAt(EdgePoint); |
226 | 26.8k | Current->Point.y = MF_SCALE_FACTOR * (Current->Point.y - kBlnBaselineOffset); |
227 | 26.8k | Current->Point.x = MF_SCALE_FACTOR * (Current->Point.x - XOrigin); |
228 | 26.8k | EdgePoint = NextPointAfter(EdgePoint); |
229 | 26.8k | } while (EdgePoint != Outline); |
230 | 4.53k | } /* NormalizeOutline */ |
231 | | |
232 | | /*---------------------------------------------------------------------------*/ |
233 | | /** |
234 | | * This routine normalizes every outline in Outlines |
235 | | * according to the currently selected normalization method. |
236 | | * It also returns the scale factors that it used to do this |
237 | | * scaling. The scale factors returned represent the x and |
238 | | * y sizes in the normalized coordinate system that correspond |
239 | | * to 1 pixel in the original coordinate system. |
240 | | * Outlines are changed and XScale and YScale are updated. |
241 | | * |
242 | | * Globals: |
243 | | * - classify_norm_method method being used for normalization |
244 | | * - classify_char_norm_range map radius of gyration to this value |
245 | | * @param Outlines list of outlines to be normalized |
246 | | * @param XScale x-direction scale factor used by routine |
247 | | * @param YScale y-direction scale factor used by routine |
248 | | */ |
249 | 2.28k | void Classify::NormalizeOutlines(LIST Outlines, float *XScale, float *YScale) { |
250 | 2.28k | MFOUTLINE Outline; |
251 | | |
252 | 2.28k | switch (classify_norm_method) { |
253 | 0 | case character: |
254 | 0 | ASSERT_HOST(!"How did NormalizeOutlines get called in character mode?"); |
255 | 0 | break; |
256 | | |
257 | 2.28k | case baseline: |
258 | 4.53k | iterate(Outlines) { |
259 | 4.53k | Outline = static_cast<MFOUTLINE>(Outlines->first_node()); |
260 | 4.53k | NormalizeOutline(Outline, 0.0); |
261 | 4.53k | } |
262 | 2.28k | *XScale = *YScale = MF_SCALE_FACTOR; |
263 | 2.28k | break; |
264 | 2.28k | } |
265 | 2.28k | } /* NormalizeOutlines */ |
266 | | |
267 | | /*---------------------------------------------------------------------------- |
268 | | Private Code |
269 | | ----------------------------------------------------------------------------*/ |
270 | | /** |
271 | | * Change the direction of every vector in the specified |
272 | | * outline segment to Direction. The segment to be changed |
273 | | * starts at Start and ends at End. Note that the previous |
274 | | * direction of End must also be changed to reflect the |
275 | | * change in direction of the point before it. |
276 | | * @param Start defines start of segment of outline to be modified |
277 | | * @param End defines end of segment of outline to be modified |
278 | | * @param Direction new direction to assign to segment |
279 | | */ |
280 | 0 | void ChangeDirection(MFOUTLINE Start, MFOUTLINE End, DIRECTION Direction) { |
281 | 0 | MFOUTLINE Current; |
282 | |
|
283 | 0 | for (Current = Start; Current != End; Current = NextPointAfter(Current)) { |
284 | 0 | PointAt(Current)->Direction = Direction; |
285 | 0 | } |
286 | |
|
287 | 0 | PointAt(End)->PreviousDirection = Direction; |
288 | |
|
289 | 0 | } /* ChangeDirection */ |
290 | | |
291 | | /** |
292 | | * This routine normalizes each point in Outline by |
293 | | * translating it to the specified center and scaling it |
294 | | * anisotropically according to the given scale factors. |
295 | | * @param Outline outline to be character normalized |
296 | | * @param cn_denorm |
297 | | */ |
298 | 0 | void CharNormalizeOutline(MFOUTLINE Outline, const DENORM &cn_denorm) { |
299 | 0 | MFOUTLINE First, Current; |
300 | 0 | MFEDGEPT *CurrentPoint; |
301 | |
|
302 | 0 | if (Outline == NIL_LIST) { |
303 | 0 | return; |
304 | 0 | } |
305 | | |
306 | 0 | First = Outline; |
307 | 0 | Current = First; |
308 | 0 | do { |
309 | 0 | CurrentPoint = PointAt(Current); |
310 | 0 | FCOORD pos(CurrentPoint->Point.x, CurrentPoint->Point.y); |
311 | 0 | cn_denorm.LocalNormTransform(pos, &pos); |
312 | 0 | CurrentPoint->Point.x = (pos.x() - UINT8_MAX / 2) * MF_SCALE_FACTOR; |
313 | 0 | CurrentPoint->Point.y = (pos.y() - UINT8_MAX / 2) * MF_SCALE_FACTOR; |
314 | |
|
315 | 0 | Current = NextPointAfter(Current); |
316 | 0 | } while (Current != First); |
317 | |
|
318 | 0 | } /* CharNormalizeOutline */ |
319 | | |
320 | | /** |
321 | | * This routine computes the slope from Start to Finish and |
322 | | * and then computes the approximate direction of the line |
323 | | * segment from Start to Finish. The direction is quantized |
324 | | * into 8 buckets: |
325 | | * N, S, E, W, NE, NW, SE, SW |
326 | | * Both the slope and the direction are then stored into |
327 | | * the appropriate fields of the Start edge point. The |
328 | | * direction is also stored into the PreviousDirection field |
329 | | * of the Finish edge point. |
330 | | * @param Start starting point to compute direction from |
331 | | * @param Finish finishing point to compute direction to |
332 | | * @param MinSlope slope below which lines are horizontal |
333 | | * @param MaxSlope slope above which lines are vertical |
334 | | */ |
335 | 0 | void ComputeDirection(MFEDGEPT *Start, MFEDGEPT *Finish, float MinSlope, float MaxSlope) { |
336 | 0 | FVECTOR Delta; |
337 | |
|
338 | 0 | Delta.x = Finish->Point.x - Start->Point.x; |
339 | 0 | Delta.y = Finish->Point.y - Start->Point.y; |
340 | 0 | if (Delta.x == 0) { |
341 | 0 | if (Delta.y < 0) { |
342 | 0 | Start->Slope = -FLT_MAX; |
343 | 0 | Start->Direction = south; |
344 | 0 | } else { |
345 | 0 | Start->Slope = FLT_MAX; |
346 | 0 | Start->Direction = north; |
347 | 0 | } |
348 | 0 | } else { |
349 | 0 | Start->Slope = Delta.y / Delta.x; |
350 | 0 | if (Delta.x > 0) { |
351 | 0 | if (Delta.y > 0) { |
352 | 0 | if (Start->Slope > MinSlope) { |
353 | 0 | if (Start->Slope < MaxSlope) { |
354 | 0 | Start->Direction = northeast; |
355 | 0 | } else { |
356 | 0 | Start->Direction = north; |
357 | 0 | } |
358 | 0 | } else { |
359 | 0 | Start->Direction = east; |
360 | 0 | } |
361 | 0 | } else if (Start->Slope < -MinSlope) { |
362 | 0 | if (Start->Slope > -MaxSlope) { |
363 | 0 | Start->Direction = southeast; |
364 | 0 | } else { |
365 | 0 | Start->Direction = south; |
366 | 0 | } |
367 | 0 | } else { |
368 | 0 | Start->Direction = east; |
369 | 0 | } |
370 | 0 | } else if (Delta.y > 0) { |
371 | 0 | if (Start->Slope < -MinSlope) { |
372 | 0 | if (Start->Slope > -MaxSlope) { |
373 | 0 | Start->Direction = northwest; |
374 | 0 | } else { |
375 | 0 | Start->Direction = north; |
376 | 0 | } |
377 | 0 | } else { |
378 | 0 | Start->Direction = west; |
379 | 0 | } |
380 | 0 | } else if (Start->Slope > MinSlope) { |
381 | 0 | if (Start->Slope < MaxSlope) { |
382 | 0 | Start->Direction = southwest; |
383 | 0 | } else { |
384 | 0 | Start->Direction = south; |
385 | 0 | } |
386 | 0 | } else { |
387 | 0 | Start->Direction = west; |
388 | 0 | } |
389 | 0 | } |
390 | 0 | Finish->PreviousDirection = Start->Direction; |
391 | 0 | } |
392 | | |
393 | | /** |
394 | | * This routine returns the next point in the micro-feature |
395 | | * outline that has a direction different than EdgePoint. The |
396 | | * routine assumes that the outline being searched is not a |
397 | | * degenerate outline (i.e. it must have 2 or more edge points). |
398 | | * @param EdgePoint start search from this point |
399 | | * @return Point of next direction change in micro-feature outline. |
400 | | * @note Globals: none |
401 | | */ |
402 | 0 | MFOUTLINE NextDirectionChange(MFOUTLINE EdgePoint) { |
403 | 0 | DIRECTION InitialDirection; |
404 | |
|
405 | 0 | InitialDirection = PointAt(EdgePoint)->Direction; |
406 | |
|
407 | 0 | MFOUTLINE next_pt = nullptr; |
408 | 0 | do { |
409 | 0 | EdgePoint = NextPointAfter(EdgePoint); |
410 | 0 | next_pt = NextPointAfter(EdgePoint); |
411 | 0 | } while (PointAt(EdgePoint)->Direction == InitialDirection && !PointAt(EdgePoint)->Hidden && |
412 | 0 | next_pt != nullptr && !PointAt(next_pt)->Hidden); |
413 | |
|
414 | 0 | return (EdgePoint); |
415 | 0 | } |
416 | | |
417 | | } // namespace tesseract |