Coverage Report

Created: 2026-06-13 06:51

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/tesseract/src/ccstruct/split.cpp
Line
Count
Source
1
/******************************************************************************
2
 *
3
 * File:         split.cpp  (Formerly split.c)
4
 * Author:       Mark Seaman, OCR Technology
5
 *
6
 * (c) Copyright 1987, Hewlett-Packard Company.
7
 ** Licensed under the Apache License, Version 2.0 (the "License");
8
 ** you may not use this file except in compliance with the License.
9
 ** You may obtain a copy of the License at
10
 ** http://www.apache.org/licenses/LICENSE-2.0
11
 ** Unless required by applicable law or agreed to in writing, software
12
 ** distributed under the License is distributed on an "AS IS" BASIS,
13
 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 ** See the License for the specific language governing permissions and
15
 ** limitations under the License.
16
 *
17
 *************************************************************************/
18
19
// Include automatically generated configuration file if running autoconf.
20
#ifdef HAVE_CONFIG_H
21
#  include "config_auto.h"
22
#endif
23
24
#include "split.h"
25
26
#include "coutln.h"
27
#include "tprintf.h"
28
29
#include <algorithm>
30
31
namespace tesseract {
32
33
/*----------------------------------------------------------------------
34
              V a r i a b l e s
35
----------------------------------------------------------------------*/
36
// Limit on the amount of penalty for the chop being off-center.
37
const int kCenterGradeCap = 25;
38
// Ridiculously large priority for splits that are no use.
39
const double kBadPriority = 999.0;
40
41
BOOL_VAR(wordrec_display_splits, 0, "Display splits");
42
43
// Hides the SPLIT so the outlines appear not to be cut by it.
44
351k
void SPLIT::Hide() const {
45
351k
  EDGEPT *edgept = point1;
46
351k
  do {
47
351k
    edgept->Hide();
48
351k
    edgept = edgept->next;
49
351k
  } while (!edgept->EqualPos(*point2) && edgept != point1);
50
351k
  edgept = point2;
51
351k
  do {
52
351k
    edgept->Hide();
53
351k
    edgept = edgept->next;
54
351k
  } while (!edgept->EqualPos(*point1) && edgept != point2);
55
351k
}
56
57
// Undoes hide, so the outlines are cut by the SPLIT.
58
374k
void SPLIT::Reveal() const {
59
374k
  EDGEPT *edgept = point1;
60
374k
  do {
61
374k
    edgept->Reveal();
62
374k
    edgept = edgept->next;
63
374k
  } while (!edgept->EqualPos(*point2) && edgept != point1);
64
374k
  edgept = point2;
65
374k
  do {
66
374k
    edgept->Reveal();
67
374k
    edgept = edgept->next;
68
374k
  } while (!edgept->EqualPos(*point1) && edgept != point2);
69
374k
}
70
71
// Compute a split priority based on the bounding boxes of the parts.
72
// The arguments here are config parameters defined in Wordrec. Add chop_
73
// to the beginning of the name.
74
float SPLIT::FullPriority(int xmin, int xmax, double overlap_knob, int centered_maxwidth,
75
82.4M
                          double center_knob, double width_change_knob) const {
76
82.4M
  TBOX box1 = Box12();
77
82.4M
  TBOX box2 = Box21();
78
82.4M
  int min_left = std::min(box1.left(), box2.left());
79
82.4M
  int max_right = std::max(box1.right(), box2.right());
80
82.4M
  if (xmin < min_left && xmax > max_right) {
81
22.8M
    return kBadPriority;
82
22.8M
  }
83
84
59.6M
  float grade = 0.0f;
85
  // grade_overlap.
86
59.6M
  int width1 = box1.width();
87
59.6M
  int width2 = box2.width();
88
59.6M
  int min_width = std::min(width1, width2);
89
59.6M
  int overlap = -box1.x_gap(box2);
90
59.6M
  if (overlap == min_width) {
91
51.5M
    grade += 100.0f; // Total overlap.
92
51.5M
  } else {
93
8.07M
    if (2 * overlap > min_width) {
94
5.08M
      overlap += 2 * overlap - min_width;
95
5.08M
    }
96
8.07M
    if (overlap > 0) {
97
7.34M
      grade += overlap_knob * overlap;
98
7.34M
    }
99
8.07M
  }
100
  // grade_center_of_blob.
101
59.6M
  if (width1 <= centered_maxwidth || width2 <= centered_maxwidth) {
102
56.3M
    grade += std::min(static_cast<double>(kCenterGradeCap), center_knob * abs(width1 - width2));
103
56.3M
  }
104
  // grade_width_change.
105
59.6M
  float width_change_grade = 20 - (max_right - min_left - std::max(width1, width2));
106
59.6M
  if (width_change_grade > 0.0f) {
107
56.7M
    grade += width_change_grade * width_change_knob;
108
56.7M
  }
109
59.6M
  return grade;
110
82.4M
}
111
112
// Returns true if *this SPLIT appears OK in the sense that it does not cross
113
// any outlines and does not chop off any ridiculously small pieces.
114
493k
bool SPLIT::IsHealthy(const TBLOB &blob, int min_points, int min_area) const {
115
493k
  return !IsLittleChunk(min_points, min_area) &&
116
220k
         !blob.SegmentCrossesOutline(point1->pos, point2->pos);
117
493k
}
118
119
// Returns true if the split generates a small chunk in terms of either area
120
// or number of points.
121
493k
bool SPLIT::IsLittleChunk(int min_points, int min_area) const {
122
493k
  if (point1->ShortNonCircularSegment(min_points, point2) &&
123
217k
      point1->SegmentArea(point2) < min_area) {
124
146k
    return true;
125
146k
  }
126
346k
  if (point2->ShortNonCircularSegment(min_points, point1) &&
127
175k
      point2->SegmentArea(point1) < min_area) {
128
126k
    return true;
129
126k
  }
130
220k
  return false;
131
346k
}
132
133
/**********************************************************************
134
 * make_edgept
135
 *
136
 * Create an EDGEPT and hook it into an existing list of edge points.
137
 **********************************************************************/
138
231M
EDGEPT *make_edgept(TDimension x, TDimension y, EDGEPT *next, EDGEPT *prev) {
139
231M
  EDGEPT *this_edgept;
140
  /* Create point */
141
231M
  this_edgept = new EDGEPT;
142
231M
  this_edgept->pos.x = x;
143
231M
  this_edgept->pos.y = y;
144
  // Now deal with the src_outline steps.
145
231M
  C_OUTLINE *prev_ol = prev->src_outline;
146
231M
  if (prev_ol != nullptr && prev->next == next) {
147
    // Compute the fraction of the segment that is being cut.
148
0
    FCOORD segment_vec(next->pos.x - prev->pos.x, next->pos.y - prev->pos.y);
149
0
    FCOORD target_vec(x - prev->pos.x, y - prev->pos.y);
150
0
    double cut_fraction = target_vec.length() / segment_vec.length();
151
    // Get the start and end at the step level.
152
0
    ICOORD step_start = prev_ol->position_at_index(prev->start_step);
153
0
    int end_step = prev->start_step + prev->step_count;
154
0
    int step_length = prev_ol->pathlength();
155
0
    ICOORD step_end = prev_ol->position_at_index(end_step % step_length);
156
0
    ICOORD step_vec = step_end - step_start;
157
0
    double target_length = step_vec.length() * cut_fraction;
158
    // Find the point on the segment that gives the length nearest to target.
159
0
    int best_step = prev->start_step;
160
0
    ICOORD total_step(0, 0);
161
0
    double best_dist = target_length;
162
0
    for (int s = prev->start_step; s < end_step; ++s) {
163
0
      total_step += prev_ol->step(s % step_length);
164
0
      double dist = fabs(target_length - total_step.length());
165
0
      if (dist < best_dist) {
166
0
        best_dist = dist;
167
0
        best_step = s + 1;
168
0
      }
169
0
    }
170
    // The new point is an intermediate point.
171
0
    this_edgept->src_outline = prev_ol;
172
0
    this_edgept->step_count = end_step - best_step;
173
0
    this_edgept->start_step = best_step % step_length;
174
0
    prev->step_count = best_step - prev->start_step;
175
231M
  } else {
176
    // The new point is poly only.
177
231M
    this_edgept->src_outline = nullptr;
178
231M
    this_edgept->step_count = 0;
179
231M
    this_edgept->start_step = 0;
180
231M
  }
181
  /* Hook it up */
182
231M
  this_edgept->next = next;
183
231M
  this_edgept->prev = prev;
184
231M
  prev->next = this_edgept;
185
231M
  next->prev = this_edgept;
186
  /* Set up vec entries */
187
231M
  this_edgept->vec.x = this_edgept->next->pos.x - x;
188
231M
  this_edgept->vec.y = this_edgept->next->pos.y - y;
189
231M
  this_edgept->prev->vec.x = x - this_edgept->prev->pos.x;
190
231M
  this_edgept->prev->vec.y = y - this_edgept->prev->pos.y;
191
231M
  return this_edgept;
192
231M
}
193
194
/**********************************************************************
195
 * remove_edgept
196
 *
197
 * Remove a given EDGEPT from its list and delete it.
198
 **********************************************************************/
199
4.17M
void remove_edgept(EDGEPT *point) {
200
4.17M
  EDGEPT *prev = point->prev;
201
4.17M
  EDGEPT *next = point->next;
202
  // Add point's steps onto prev's steps if they are from the same outline.
203
4.17M
  if (prev->src_outline == point->src_outline && prev->src_outline != nullptr) {
204
0
    prev->step_count += point->step_count;
205
0
  }
206
4.17M
  prev->next = next;
207
4.17M
  next->prev = prev;
208
4.17M
  prev->vec.x = next->pos.x - prev->pos.x;
209
4.17M
  prev->vec.y = next->pos.y - prev->pos.y;
210
4.17M
  delete point;
211
4.17M
}
212
213
/**********************************************************************
214
 * Print
215
 *
216
 * Shows the coordinates of both points in a split.
217
 **********************************************************************/
218
0
void SPLIT::Print() const {
219
0
  tprintf("(%d,%d)--(%d,%d)", point1->pos.x, point1->pos.y, point2->pos.x, point2->pos.y);
220
0
}
221
222
#ifndef GRAPHICS_DISABLED
223
// Draws the split in the given window.
224
void SPLIT::Mark(ScrollView *window) const {
225
  window->Pen(ScrollView::GREEN);
226
  window->Line(point1->pos.x, point1->pos.y, point2->pos.x, point2->pos.y);
227
  window->UpdateWindow();
228
}
229
#endif
230
231
// Creates two outlines out of one by splitting the original one in half.
232
// Inserts the resulting outlines into the given list.
233
137k
void SPLIT::SplitOutlineList(TESSLINE *outlines) const {
234
137k
  SplitOutline();
235
582k
  while (outlines->next != nullptr) {
236
444k
    outlines = outlines->next;
237
444k
  }
238
239
137k
  outlines->next = new TESSLINE;
240
137k
  outlines->next->loop = point1;
241
137k
  outlines->next->ComputeBoundingBox();
242
243
137k
  outlines = outlines->next;
244
245
137k
  outlines->next = new TESSLINE;
246
137k
  outlines->next->loop = point2;
247
137k
  outlines->next->ComputeBoundingBox();
248
249
137k
  outlines->next->next = nullptr;
250
137k
}
251
252
// Makes a split between these two edge points, but does not affect the
253
// outlines to which they belong.
254
113M
void SPLIT::SplitOutline() const {
255
113M
  EDGEPT *temp2 = point2->next;
256
113M
  EDGEPT *temp1 = point1->next;
257
  /* Create two new points */
258
113M
  EDGEPT *new_point1 = make_edgept(point1->pos.x, point1->pos.y, temp1, point2);
259
113M
  EDGEPT *new_point2 = make_edgept(point2->pos.x, point2->pos.y, temp2, point1);
260
  // point1 and 2 are now cross-over points, so they must have nullptr
261
  // src_outlines and give their src_outline information their new
262
  // replacements.
263
113M
  new_point1->src_outline = point1->src_outline;
264
113M
  new_point1->start_step = point1->start_step;
265
113M
  new_point1->step_count = point1->step_count;
266
113M
  new_point2->src_outline = point2->src_outline;
267
113M
  new_point2->start_step = point2->start_step;
268
113M
  new_point2->step_count = point2->step_count;
269
113M
  point1->src_outline = nullptr;
270
113M
  point1->start_step = 0;
271
113M
  point1->step_count = 0;
272
113M
  point2->src_outline = nullptr;
273
113M
  point2->start_step = 0;
274
113M
  point2->step_count = 0;
275
113M
}
276
277
// Undoes the effect of SplitOutlineList, correcting the outlines for undoing
278
// the split, but possibly leaving some duplicate outlines.
279
65.1k
void SPLIT::UnsplitOutlineList(TBLOB *blob) const {
280
  /* Modify edge points */
281
65.1k
  UnsplitOutlines();
282
283
65.1k
  auto *outline1 = new TESSLINE;
284
65.1k
  outline1->next = blob->outlines;
285
65.1k
  blob->outlines = outline1;
286
65.1k
  outline1->loop = point1;
287
288
65.1k
  auto *outline2 = new TESSLINE;
289
65.1k
  outline2->next = blob->outlines;
290
65.1k
  blob->outlines = outline2;
291
65.1k
  outline2->loop = point2;
292
65.1k
}
293
294
// Removes the split that was put between these two points.
295
113M
void SPLIT::UnsplitOutlines() const {
296
113M
  EDGEPT *tmp1 = point1->next;
297
113M
  EDGEPT *tmp2 = point2->next;
298
299
113M
  tmp1->next->prev = point2;
300
113M
  tmp2->next->prev = point1;
301
302
  // tmp2 is coincident with point1. point1 takes tmp2's place as tmp2 is
303
  // deleted.
304
113M
  point1->next = tmp2->next;
305
113M
  point1->src_outline = tmp2->src_outline;
306
113M
  point1->start_step = tmp2->start_step;
307
113M
  point1->step_count = tmp2->step_count;
308
  // Likewise point2 takes tmp1's place.
309
113M
  point2->next = tmp1->next;
310
113M
  point2->src_outline = tmp1->src_outline;
311
113M
  point2->start_step = tmp1->start_step;
312
113M
  point2->step_count = tmp1->step_count;
313
314
113M
  delete tmp1;
315
113M
  delete tmp2;
316
317
113M
  point1->vec.x = point1->next->pos.x - point1->pos.x;
318
113M
  point1->vec.y = point1->next->pos.y - point1->pos.y;
319
320
113M
  point2->vec.x = point2->next->pos.x - point2->pos.x;
321
113M
  point2->vec.y = point2->next->pos.y - point2->pos.y;
322
113M
}
323
324
} // namespace tesseract