Coverage Report

Created: 2025-09-27 07:10

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/tesseract/src/classify/mfoutline.cpp
Line
Count
Source
1
/******************************************************************************
2
 ** Filename:    mfoutline.c
3
 ** Purpose:     Interface to outline struct used for extracting features
4
 ** Author:      Dan Johnson
5
 **
6
 ** (c) Copyright Hewlett-Packard Company, 1988.
7
 ** Licensed under the Apache License, Version 2.0 (the "License");
8
 ** you may not use this file except in compliance with the License.
9
 ** You may obtain a copy of the License at
10
 ** http://www.apache.org/licenses/LICENSE-2.0
11
 ** Unless required by applicable law or agreed to in writing, software
12
 ** distributed under the License is distributed on an "AS IS" BASIS,
13
 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 ** See the License for the specific language governing permissions and
15
 ** limitations under the License.
16
 ******************************************************************************/
17
18
#include "mfoutline.h"
19
20
#include "blobs.h"
21
#include "classify.h"
22
#include "clusttool.h" //If remove you get caught in a loop somewhere
23
#include "mfx.h"
24
#include "params.h"
25
26
#include <cmath>
27
#include <cstdio>
28
29
namespace tesseract {
30
31
/*---------------------------------------------------------------------------*/
32
/** Convert a blob into a list of MFOUTLINEs (float-based microfeature format).
33
 */
34
2.28k
LIST ConvertBlob(TBLOB *blob) {
35
2.28k
  LIST outlines = NIL_LIST;
36
2.28k
  return (blob == nullptr) ? NIL_LIST : ConvertOutlines(blob->outlines, outlines, outer);
37
2.28k
}
38
39
/*---------------------------------------------------------------------------*/
40
/** Convert a TESSLINE into the float-based MFOUTLINE micro-feature format. */
41
4.53k
MFOUTLINE ConvertOutline(TESSLINE *outline) {
42
4.53k
  auto MFOutline = NIL_LIST;
43
44
4.53k
  if (outline == nullptr || outline->loop == nullptr) {
45
0
    return MFOutline;
46
0
  }
47
48
4.53k
  auto StartPoint = outline->loop;
49
4.53k
  auto EdgePoint = StartPoint;
50
26.8k
  do {
51
26.8k
    auto NextPoint = EdgePoint->next;
52
53
    /* filter out duplicate points */
54
26.8k
    if (EdgePoint->pos.x != NextPoint->pos.x || EdgePoint->pos.y != NextPoint->pos.y) {
55
26.8k
      auto NewPoint = new MFEDGEPT;
56
26.8k
      NewPoint->ClearMark();
57
26.8k
      NewPoint->Hidden = EdgePoint->IsHidden();
58
26.8k
      NewPoint->Point.x = EdgePoint->pos.x;
59
26.8k
      NewPoint->Point.y = EdgePoint->pos.y;
60
26.8k
      MFOutline = push(MFOutline, NewPoint);
61
26.8k
    }
62
26.8k
    EdgePoint = NextPoint;
63
26.8k
  } while (EdgePoint != StartPoint);
64
65
4.53k
  if (MFOutline != nullptr) {
66
4.53k
    MakeOutlineCircular(MFOutline);
67
4.53k
  }
68
4.53k
  return MFOutline;
69
4.53k
}
70
71
/*---------------------------------------------------------------------------*/
72
/**
73
 * Convert a tree of outlines to a list of MFOUTLINEs (lists of MFEDGEPTs).
74
 *
75
 * @param outline      first outline to be converted
76
 * @param mf_outlines  list to add converted outlines to
77
 * @param outline_type  are the outlines outer or holes?
78
 */
79
2.28k
LIST ConvertOutlines(TESSLINE *outline, LIST mf_outlines, OUTLINETYPE outline_type) {
80
2.28k
  MFOUTLINE mf_outline;
81
82
6.82k
  while (outline != nullptr) {
83
4.53k
    mf_outline = ConvertOutline(outline);
84
4.53k
    if (mf_outline != nullptr) {
85
4.53k
      mf_outlines = push(mf_outlines, mf_outline);
86
4.53k
    }
87
4.53k
    outline = outline->next;
88
4.53k
  }
89
2.28k
  return mf_outlines;
90
2.28k
}
91
92
/*---------------------------------------------------------------------------*/
93
/**
94
 * This routine searches through the specified outline, computes
95
 * a slope for each vector in the outline, and marks each
96
 * vector as having one of the following directions:
97
 *   N, S, E, W, NE, NW, SE, SW
98
 * This information is then stored in the outline and the
99
 * outline is returned.
100
 * @param Outline   micro-feature outline to analyze
101
 * @param MinSlope  controls "snapping" of segments to horizontal
102
 * @param MaxSlope  controls "snapping" of segments to vertical
103
 */
104
0
void FindDirectionChanges(MFOUTLINE Outline, float MinSlope, float MaxSlope) {
105
0
  MFEDGEPT *Current;
106
0
  MFEDGEPT *Last;
107
0
  MFOUTLINE EdgePoint;
108
109
0
  if (DegenerateOutline(Outline)) {
110
0
    return;
111
0
  }
112
113
0
  Last = PointAt(Outline);
114
0
  Outline = NextPointAfter(Outline);
115
0
  EdgePoint = Outline;
116
0
  do {
117
0
    Current = PointAt(EdgePoint);
118
0
    ComputeDirection(Last, Current, MinSlope, MaxSlope);
119
120
0
    Last = Current;
121
0
    EdgePoint = NextPointAfter(EdgePoint);
122
0
  } while (EdgePoint != Outline);
123
124
0
} /* FindDirectionChanges */
125
126
/*---------------------------------------------------------------------------*/
127
/**
128
 * This routine deallocates all of the memory consumed by
129
 * a micro-feature outline.
130
 * @param arg   micro-feature outline to be freed
131
 */
132
4.53k
void FreeMFOutline(void *arg) { // MFOUTLINE Outline)
133
4.53k
  auto Outline = static_cast<MFOUTLINE>(arg);
134
135
  /* break the circular outline so we can use std. techniques to deallocate */
136
4.53k
  MFOUTLINE Start = Outline->list_rest();
137
4.53k
  set_rest(Outline, NIL_LIST);
138
31.3k
  while (Start != nullptr) {
139
26.8k
    delete reinterpret_cast<MFEDGEPT *>(Start->first_node());
140
26.8k
    Start = pop(Start);
141
26.8k
  }
142
143
4.53k
} /* FreeMFOutline */
144
145
/*---------------------------------------------------------------------------*/
146
/**
147
 * Release all memory consumed by the specified list
148
 * of outlines.
149
 * @param Outlines  list of mf-outlines to be freed
150
 */
151
2.28k
void FreeOutlines(LIST Outlines) {
152
2.28k
  destroy_nodes(Outlines, FreeMFOutline);
153
2.28k
} /* FreeOutlines */
154
155
/*---------------------------------------------------------------------------*/
156
/**
157
 * This routine searches through the specified outline and finds
158
 * the points at which the outline changes direction.  These
159
 * points are then marked as "extremities".  This routine is
160
 * used as an alternative to FindExtremities().  It forces the
161
 * endpoints of the microfeatures to be at the direction
162
 * changes rather than at the midpoint between direction
163
 * changes.
164
 * @param Outline   micro-feature outline to analyze
165
 */
166
0
void MarkDirectionChanges(MFOUTLINE Outline) {
167
0
  MFOUTLINE Current;
168
0
  MFOUTLINE Last;
169
0
  MFOUTLINE First;
170
171
0
  if (DegenerateOutline(Outline)) {
172
0
    return;
173
0
  }
174
175
0
  First = NextDirectionChange(Outline);
176
0
  Last = First;
177
0
  do {
178
0
    Current = NextDirectionChange(Last);
179
0
    PointAt(Current)->MarkPoint();
180
0
    Last = Current;
181
0
  } while (Last != First);
182
183
0
} /* MarkDirectionChanges */
184
185
/*---------------------------------------------------------------------------*/
186
/**
187
 * This routine returns the next point in the micro-feature
188
 * outline that is an extremity.  The search starts after
189
 * EdgePoint.  The routine assumes that the outline being
190
 * searched is not a degenerate outline (i.e. it must have
191
 * 2 or more edge points).
192
 * @param EdgePoint start search from this point
193
 * @return Next extremity in the outline after EdgePoint.
194
 * @note Globals: none
195
 */
196
0
MFOUTLINE NextExtremity(MFOUTLINE EdgePoint) {
197
0
  EdgePoint = NextPointAfter(EdgePoint);
198
0
  while (!PointAt(EdgePoint)->ExtremityMark) {
199
0
    EdgePoint = NextPointAfter(EdgePoint);
200
0
  }
201
202
0
  return (EdgePoint);
203
204
0
} /* NextExtremity */
205
206
/*---------------------------------------------------------------------------*/
207
/**
208
 * This routine normalizes the coordinates of the specified
209
 * outline so that the outline is deskewed down to the
210
 * baseline, translated so that x=0 is at XOrigin, and scaled
211
 * so that the height of a character cell from descender to
212
 * ascender is 1.  Of this height, 0.25 is for the descender,
213
 * 0.25 for the ascender, and 0.5 for the x-height.  The
214
 * y coordinate of the baseline is 0.
215
 * @param Outline   outline to be normalized
216
 * @param XOrigin   x-origin of text
217
 */
218
4.53k
void NormalizeOutline(MFOUTLINE Outline, float XOrigin) {
219
4.53k
  if (Outline == NIL_LIST) {
220
0
    return;
221
0
  }
222
223
4.53k
  MFOUTLINE EdgePoint = Outline;
224
26.8k
  do {
225
26.8k
    MFEDGEPT *Current = PointAt(EdgePoint);
226
26.8k
    Current->Point.y = MF_SCALE_FACTOR * (Current->Point.y - kBlnBaselineOffset);
227
26.8k
    Current->Point.x = MF_SCALE_FACTOR * (Current->Point.x - XOrigin);
228
26.8k
    EdgePoint = NextPointAfter(EdgePoint);
229
26.8k
  } while (EdgePoint != Outline);
230
4.53k
} /* NormalizeOutline */
231
232
/*---------------------------------------------------------------------------*/
233
/**
234
 * This routine normalizes every outline in Outlines
235
 * according to the currently selected normalization method.
236
 * It also returns the scale factors that it used to do this
237
 * scaling.  The scale factors returned represent the x and
238
 * y sizes in the normalized coordinate system that correspond
239
 * to 1 pixel in the original coordinate system.
240
 * Outlines are changed and XScale and YScale are updated.
241
 *
242
 * Globals:
243
 * - classify_norm_method  method being used for normalization
244
 * - classify_char_norm_range map radius of gyration to this value
245
 * @param Outlines  list of outlines to be normalized
246
 * @param XScale    x-direction scale factor used by routine
247
 * @param YScale    y-direction scale factor used by routine
248
 */
249
2.28k
void Classify::NormalizeOutlines(LIST Outlines, float *XScale, float *YScale) {
250
2.28k
  MFOUTLINE Outline;
251
252
2.28k
  switch (classify_norm_method) {
253
0
    case character:
254
0
      ASSERT_HOST(!"How did NormalizeOutlines get called in character mode?");
255
0
      break;
256
257
2.28k
    case baseline:
258
4.53k
      iterate(Outlines) {
259
4.53k
        Outline = static_cast<MFOUTLINE>(Outlines->first_node());
260
4.53k
        NormalizeOutline(Outline, 0.0);
261
4.53k
      }
262
2.28k
      *XScale = *YScale = MF_SCALE_FACTOR;
263
2.28k
      break;
264
2.28k
  }
265
2.28k
} /* NormalizeOutlines */
266
267
/*----------------------------------------------------------------------------
268
              Private Code
269
----------------------------------------------------------------------------*/
270
/**
271
 * Change the direction of every vector in the specified
272
 * outline segment to Direction.  The segment to be changed
273
 * starts at Start and ends at End.  Note that the previous
274
 * direction of End must also be changed to reflect the
275
 * change in direction of the point before it.
276
 * @param Start defines start of segment of outline to be modified
277
 * @param End defines end of segment of outline to be modified
278
 * @param Direction new direction to assign to segment
279
 */
280
0
void ChangeDirection(MFOUTLINE Start, MFOUTLINE End, DIRECTION Direction) {
281
0
  MFOUTLINE Current;
282
283
0
  for (Current = Start; Current != End; Current = NextPointAfter(Current)) {
284
0
    PointAt(Current)->Direction = Direction;
285
0
  }
286
287
0
  PointAt(End)->PreviousDirection = Direction;
288
289
0
} /* ChangeDirection */
290
291
/**
292
 * This routine normalizes each point in Outline by
293
 * translating it to the specified center and scaling it
294
 * anisotropically according to the given scale factors.
295
 * @param Outline     outline to be character normalized
296
 * @param cn_denorm
297
 */
298
0
void CharNormalizeOutline(MFOUTLINE Outline, const DENORM &cn_denorm) {
299
0
  MFOUTLINE First, Current;
300
0
  MFEDGEPT *CurrentPoint;
301
302
0
  if (Outline == NIL_LIST) {
303
0
    return;
304
0
  }
305
306
0
  First = Outline;
307
0
  Current = First;
308
0
  do {
309
0
    CurrentPoint = PointAt(Current);
310
0
    FCOORD pos(CurrentPoint->Point.x, CurrentPoint->Point.y);
311
0
    cn_denorm.LocalNormTransform(pos, &pos);
312
0
    CurrentPoint->Point.x = (pos.x() - UINT8_MAX / 2) * MF_SCALE_FACTOR;
313
0
    CurrentPoint->Point.y = (pos.y() - UINT8_MAX / 2) * MF_SCALE_FACTOR;
314
315
0
    Current = NextPointAfter(Current);
316
0
  } while (Current != First);
317
318
0
} /* CharNormalizeOutline */
319
320
/**
321
 * This routine computes the slope from Start to Finish and
322
 * and then computes the approximate direction of the line
323
 * segment from Start to Finish.  The direction is quantized
324
 * into 8 buckets:
325
 *  N, S, E, W, NE, NW, SE, SW
326
 * Both the slope and the direction are then stored into
327
 * the appropriate fields of the Start edge point.  The
328
 * direction is also stored into the PreviousDirection field
329
 * of the Finish edge point.
330
 * @param Start   starting point to compute direction from
331
 * @param Finish    finishing point to compute direction to
332
 * @param MinSlope  slope below which lines are horizontal
333
 * @param MaxSlope  slope above which lines are vertical
334
 */
335
0
void ComputeDirection(MFEDGEPT *Start, MFEDGEPT *Finish, float MinSlope, float MaxSlope) {
336
0
  FVECTOR Delta;
337
338
0
  Delta.x = Finish->Point.x - Start->Point.x;
339
0
  Delta.y = Finish->Point.y - Start->Point.y;
340
0
  if (Delta.x == 0) {
341
0
    if (Delta.y < 0) {
342
0
      Start->Slope = -FLT_MAX;
343
0
      Start->Direction = south;
344
0
    } else {
345
0
      Start->Slope = FLT_MAX;
346
0
      Start->Direction = north;
347
0
    }
348
0
  } else {
349
0
    Start->Slope = Delta.y / Delta.x;
350
0
    if (Delta.x > 0) {
351
0
      if (Delta.y > 0) {
352
0
        if (Start->Slope > MinSlope) {
353
0
          if (Start->Slope < MaxSlope) {
354
0
            Start->Direction = northeast;
355
0
          } else {
356
0
            Start->Direction = north;
357
0
          }
358
0
        } else {
359
0
          Start->Direction = east;
360
0
        }
361
0
      } else if (Start->Slope < -MinSlope) {
362
0
        if (Start->Slope > -MaxSlope) {
363
0
          Start->Direction = southeast;
364
0
        } else {
365
0
          Start->Direction = south;
366
0
        }
367
0
      } else {
368
0
        Start->Direction = east;
369
0
      }
370
0
    } else if (Delta.y > 0) {
371
0
      if (Start->Slope < -MinSlope) {
372
0
        if (Start->Slope > -MaxSlope) {
373
0
          Start->Direction = northwest;
374
0
        } else {
375
0
          Start->Direction = north;
376
0
        }
377
0
      } else {
378
0
        Start->Direction = west;
379
0
      }
380
0
    } else if (Start->Slope > MinSlope) {
381
0
      if (Start->Slope < MaxSlope) {
382
0
        Start->Direction = southwest;
383
0
      } else {
384
0
        Start->Direction = south;
385
0
      }
386
0
    } else {
387
0
      Start->Direction = west;
388
0
    }
389
0
  }
390
0
  Finish->PreviousDirection = Start->Direction;
391
0
}
392
393
/**
394
 * This routine returns the next point in the micro-feature
395
 * outline that has a direction different than EdgePoint.  The
396
 * routine assumes that the outline being searched is not a
397
 * degenerate outline (i.e. it must have 2 or more edge points).
398
 * @param EdgePoint start search from this point
399
 * @return Point of next direction change in micro-feature outline.
400
 * @note Globals: none
401
 */
402
0
MFOUTLINE NextDirectionChange(MFOUTLINE EdgePoint) {
403
0
  DIRECTION InitialDirection;
404
405
0
  InitialDirection = PointAt(EdgePoint)->Direction;
406
407
0
  MFOUTLINE next_pt = nullptr;
408
0
  do {
409
0
    EdgePoint = NextPointAfter(EdgePoint);
410
0
    next_pt = NextPointAfter(EdgePoint);
411
0
  } while (PointAt(EdgePoint)->Direction == InitialDirection && !PointAt(EdgePoint)->Hidden &&
412
0
           next_pt != nullptr && !PointAt(next_pt)->Hidden);
413
414
0
  return (EdgePoint);
415
0
}
416
417
} // namespace tesseract