Coverage Report

Created: 2025-06-13 07:02

/src/tesseract/src/wordrec/findseam.cpp
Line
Count
Source (jump to first uncovered line)
1
/******************************************************************************
2
 *
3
 * File:         findseam.cpp  (Formerly findseam.c)
4
 * Author:       Mark Seaman, OCR Technology
5
 *
6
 * (c) Copyright 1987, Hewlett-Packard Company.
7
 ** Licensed under the Apache License, Version 2.0 (the "License");
8
 ** you may not use this file except in compliance with the License.
9
 ** You may obtain a copy of the License at
10
 ** http://www.apache.org/licenses/LICENSE-2.0
11
 ** Unless required by applicable law or agreed to in writing, software
12
 ** distributed under the License is distributed on an "AS IS" BASIS,
13
 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 ** See the License for the specific language governing permissions and
15
 ** limitations under the License.
16
 *
17
 *****************************************************************************/
18
/*----------------------------------------------------------------------
19
              I n c l u d e s
20
----------------------------------------------------------------------*/
21
#include "findseam.h"
22
#include "outlines.h"
23
#include "plotedges.h"
24
#include "seam.h"
25
#include "wordrec.h"
26
27
// Include automatically generated configuration file if running autoconf.
28
#ifdef HAVE_CONFIG_H
29
#  include "config_auto.h"
30
#endif
31
32
/**********************************************************************
33
 * partial_split_priority
34
 *
35
 * Assign a priority to this split based on the features that it has.
36
 * Grade it according to the different rating schemes and return the
37
 * value of its goodness.
38
 **********************************************************************/
39
40
3.26M
#define partial_split_priority(split) (grade_split_length(split) + grade_sharpness(split))
41
42
/*----------------------------------------------------------------------
43
              T y p e s
44
----------------------------------------------------------------------*/
45
557M
#define SPLIT_CLOSENESS 20 /* Difference in x value */
46
                           /* How many to keep */
47
58.8M
#define MAX_NUM_SEAMS 150
48
/* How many to keep */
49
830k
#define NO_FULL_PRIORITY (-1) // Special marker for pri.
50
                            /* Evaluate right away */
51
543k
#define BAD_PRIORITY 9999.0
52
53
/*----------------------------------------------------------------------
54
              F u n c t i o n s
55
----------------------------------------------------------------------*/
56
namespace tesseract {
57
58
/**********************************************************************
59
 * add_seam_to_queue
60
 *
61
 * Adds the given new_seam to the seams priority queue, unless it is full
62
 * and the new seam is worse than the worst.
63
 **********************************************************************/
64
58.4M
void Wordrec::add_seam_to_queue(float new_priority, SEAM *new_seam, SeamQueue *seams) {
65
58.4M
  if (new_seam == nullptr) {
66
0
    return;
67
0
  }
68
58.4M
  if (chop_debug) {
69
0
    tprintf("Pushing new seam with priority %g :", new_priority);
70
0
    new_seam->Print("seam: ");
71
0
  }
72
58.4M
  if (seams->size() >= MAX_NUM_SEAMS) {
73
30.6M
    SeamPair old_pair(0, nullptr);
74
30.6M
    if (seams->PopWorst(&old_pair) && old_pair.key() <= new_priority) {
75
13.8M
      if (chop_debug) {
76
0
        tprintf("Old seam staying with priority %g\n", old_pair.key());
77
0
      }
78
13.8M
      delete new_seam;
79
13.8M
      seams->Push(&old_pair);
80
13.8M
      return;
81
16.7M
    } else if (chop_debug) {
82
0
      tprintf("New seam with priority %g beats old worst seam with %g\n", new_priority,
83
0
              old_pair.key());
84
0
    }
85
30.6M
  }
86
44.5M
  SeamPair new_pair(new_priority, new_seam);
87
44.5M
  seams->Push(&new_pair);
88
44.5M
}
89
90
/**********************************************************************
91
 * choose_best_seam
92
 *
93
 * Choose the best seam that can be created by assembling this a
94
 * collection of splits.  A queue of all the possible seams is
95
 * maintained.  Each new split received is placed in that queue with
96
 * its partial priority value.  These values in the seam queue are
97
 * evaluated and combined until a good enough seam is found.  If no
98
 * further good seams are being found then this function returns to the
99
 * caller, who will send more splits.  If this function is called with
100
 * a split of nullptr, then no further splits can be supplied by the
101
 * caller.
102
 **********************************************************************/
103
void Wordrec::choose_best_seam(SeamQueue *seam_queue, const SPLIT *split, PRIORITY priority,
104
3.71M
                               SEAM **seam_result, TBLOB *blob, SeamPile *seam_pile) {
105
3.71M
  SEAM *seam;
106
3.71M
  float my_priority;
107
  /* Add seam of split */
108
3.71M
  my_priority = priority;
109
3.71M
  if (split != nullptr) {
110
3.26M
    TPOINT split_point = split->point1->pos;
111
3.26M
    split_point += split->point2->pos;
112
3.26M
    split_point /= 2;
113
3.26M
    seam = new SEAM(my_priority, split_point, *split);
114
3.26M
    if (chop_debug > 1) {
115
0
      seam->Print("Partial priority    ");
116
0
    }
117
3.26M
    add_seam_to_queue(my_priority, seam, seam_queue);
118
119
3.26M
    if (my_priority > chop_good_split) {
120
387k
      return;
121
387k
    }
122
3.26M
  }
123
124
3.32M
  TBOX bbox = blob->bounding_box();
125
  /* Queue loop */
126
28.0M
  while (!seam_queue->empty()) {
127
26.8M
    SeamPair seam_pair;
128
26.8M
    seam_queue->Pop(&seam_pair);
129
26.8M
    seam = seam_pair.extract_data();
130
    /* Set full priority */
131
26.8M
    my_priority =
132
26.8M
        seam->FullPriority(bbox.left(), bbox.right(), chop_overlap_knob, chop_centered_maxwidth,
133
26.8M
                           chop_center_knob, chop_width_change_knob);
134
26.8M
    if (chop_debug) {
135
0
      char str[80];
136
0
      snprintf(str, sizeof(str), "Full my_priority %0.0f,  ", my_priority);
137
0
      seam->Print(str);
138
0
    }
139
140
26.8M
    if ((*seam_result == nullptr || (*seam_result)->priority() > my_priority) &&
141
26.8M
        my_priority < chop_ok_split) {
142
      /* No crossing */
143
183k
      if (seam->IsHealthy(*blob, chop_min_outline_points, chop_min_outline_area)) {
144
73.8k
        delete *seam_result;
145
73.8k
        *seam_result = new SEAM(*seam);
146
73.8k
        (*seam_result)->set_priority(my_priority);
147
109k
      } else {
148
109k
        delete seam;
149
109k
        seam = nullptr;
150
109k
        my_priority = BAD_PRIORITY;
151
109k
      }
152
183k
    }
153
154
26.8M
    if (my_priority < chop_good_split) {
155
141k
      delete seam;
156
141k
      return; /* Made good answer */
157
141k
    }
158
159
26.6M
    if (seam) {
160
      /* Combine with others */
161
26.5M
      if (seam_pile->size() < chop_seam_pile_size) {
162
4.08M
        combine_seam(*seam_pile, seam, seam_queue);
163
4.08M
        SeamDecPair pair(seam_pair.key(), seam);
164
4.08M
        seam_pile->Push(&pair);
165
22.4M
      } else if (chop_new_seam_pile && seam_pile->size() == chop_seam_pile_size &&
166
22.4M
                 seam_pile->PeekTop().key() > seam_pair.key()) {
167
2.27M
        combine_seam(*seam_pile, seam, seam_queue);
168
2.27M
        SeamDecPair pair;
169
2.27M
        seam_pile->Pop(&pair); // pop the worst.
170
        // Replace the seam in pair (deleting the old one) with
171
        // the new seam and score, then push back into the heap.
172
2.27M
        pair.set_key(seam_pair.key());
173
2.27M
        pair.set_data(seam);
174
2.27M
        seam_pile->Push(&pair);
175
20.2M
      } else {
176
20.2M
        delete seam;
177
20.2M
      }
178
26.5M
    }
179
180
26.6M
    my_priority = seam_queue->empty() ? NO_FULL_PRIORITY : seam_queue->PeekTop().key();
181
26.6M
    if ((my_priority > chop_ok_split) || (my_priority > chop_good_split && split)) {
182
1.98M
      return;
183
1.98M
    }
184
26.6M
  }
185
3.32M
}
186
187
/**********************************************************************
188
 * combine_seam
189
 *
190
 * Find other seams to combine with this one.  The new seams that result
191
 * from this union should be added to the seam queue.  The return value
192
 * tells whether or not any additional seams were added to the queue.
193
 **********************************************************************/
194
6.35M
void Wordrec::combine_seam(const SeamPile &seam_pile, const SEAM *seam, SeamQueue *seam_queue) {
195
563M
  for (int x = 0; x < seam_pile.size(); ++x) {
196
557M
    const SEAM *this_one = seam_pile.get(x).data();
197
557M
    if (seam->CombineableWith(*this_one, SPLIT_CLOSENESS, chop_ok_split)) {
198
55.1M
      SEAM *new_one = new SEAM(*seam);
199
55.1M
      new_one->CombineWith(*this_one);
200
55.1M
      if (chop_debug > 1) {
201
0
        new_one->Print("Combo priority       ");
202
0
      }
203
55.1M
      add_seam_to_queue(new_one->priority(), new_one, seam_queue);
204
55.1M
    }
205
557M
  }
206
6.35M
}
207
208
/**********************************************************************
209
 * pick_good_seam
210
 *
211
 * Find and return a good seam that will split this blob into two pieces.
212
 * Work from the outlines provided.
213
 **********************************************************************/
214
474k
SEAM *Wordrec::pick_good_seam(TBLOB *blob) {
215
474k
  SeamPile seam_pile(chop_seam_pile_size);
216
474k
  EDGEPT *points[MAX_NUM_POINTS];
217
474k
  EDGEPT_CLIST new_points;
218
474k
  SEAM *seam = nullptr;
219
474k
  TESSLINE *outline;
220
474k
  int16_t num_points = 0;
221
222
#ifndef GRAPHICS_DISABLED
223
  if (chop_debug > 2) {
224
    wordrec_display_splits.set_value(true);
225
  }
226
227
  draw_blob_edges(blob);
228
#endif
229
230
474k
  PointHeap point_heap(MAX_NUM_POINTS);
231
1.35M
  for (outline = blob->outlines; outline; outline = outline->next) {
232
876k
    prioritize_points(outline, &point_heap);
233
876k
  }
234
235
1.60M
  while (!point_heap.empty() && num_points < MAX_NUM_POINTS) {
236
1.12M
    points[num_points++] = point_heap.PeekTop().data();
237
1.12M
    point_heap.Pop(nullptr);
238
1.12M
  }
239
240
  /* Initialize queue */
241
474k
  SeamQueue seam_queue(MAX_NUM_SEAMS);
242
243
474k
  try_point_pairs(points, num_points, &seam_queue, &seam_pile, &seam, blob);
244
474k
  try_vertical_splits(points, num_points, &new_points, &seam_queue, &seam_pile, &seam, blob);
245
246
474k
  if (seam == nullptr) {
247
433k
    choose_best_seam(&seam_queue, nullptr, BAD_PRIORITY, &seam, blob, &seam_pile);
248
433k
  } else if (seam->priority() > chop_good_split) {
249
15.6k
    choose_best_seam(&seam_queue, nullptr, seam->priority(), &seam, blob, &seam_pile);
250
15.6k
  }
251
252
474k
  EDGEPT_C_IT it(&new_points);
253
2.16M
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
254
1.68M
    EDGEPT *inserted_point = it.data();
255
1.68M
    if (seam == nullptr || !seam->UsesPoint(inserted_point)) {
256
12.9M
      for (outline = blob->outlines; outline; outline = outline->next) {
257
11.2M
        if (outline->loop == inserted_point) {
258
0
          outline->loop = outline->loop->next;
259
0
        }
260
11.2M
      }
261
1.65M
      remove_edgept(inserted_point);
262
1.65M
    }
263
1.68M
  }
264
265
474k
  if (seam) {
266
43.9k
    if (seam->priority() > chop_ok_split) {
267
0
      delete seam;
268
0
      seam = nullptr;
269
0
    }
270
#ifndef GRAPHICS_DISABLED
271
    else if (wordrec_display_splits) {
272
      seam->Mark(edge_window);
273
      if (chop_debug > 2) {
274
        edge_window->Update();
275
        edge_window->Wait();
276
      }
277
    }
278
#endif
279
43.9k
  }
280
281
474k
  if (chop_debug) {
282
0
    wordrec_display_splits.set_value(false);
283
0
  }
284
285
474k
  return (seam);
286
474k
}
287
288
/**********************************************************************
289
 * try_point_pairs
290
 *
291
 * Try all the splits that are produced by pairing critical points
292
 * together.  See if any of them are suitable for use.  Use a seam
293
 * queue and seam pile that have already been initialized and used.
294
 **********************************************************************/
295
void Wordrec::try_point_pairs(EDGEPT *points[MAX_NUM_POINTS], int16_t num_points,
296
                              SeamQueue *seam_queue, SeamPile *seam_pile, SEAM **seam,
297
474k
                              TBLOB *blob) {
298
474k
  int16_t x;
299
474k
  int16_t y;
300
474k
  PRIORITY priority;
301
302
1.60M
  for (x = 0; x < num_points; x++) {
303
11.1M
    for (y = x + 1; y < num_points; y++) {
304
10.0M
      if (points[y] &&
305
10.0M
          points[x]->WeightedDistance(*points[y], chop_x_y_weight) < chop_split_length &&
306
10.0M
          points[x] != points[y]->next && points[y] != points[x]->next &&
307
10.0M
          !is_exterior_point(points[x], points[y]) && !is_exterior_point(points[y], points[x])) {
308
2.19M
        SPLIT split(points[x], points[y]);
309
2.19M
        priority = partial_split_priority(&split);
310
311
2.19M
        choose_best_seam(seam_queue, &split, priority, seam, blob, seam_pile);
312
2.19M
      }
313
10.0M
    }
314
1.12M
  }
315
474k
}
316
317
/**********************************************************************
318
 * try_vertical_splits
319
 *
320
 * Try all the splits that are produced by vertical projection to see
321
 * if any of them are suitable for use.  Use a seam queue and seam pile
322
 * that have already been initialized and used.
323
 * Return in new_points a collection of points that were inserted into
324
 * the blob while examining vertical splits and which may safely be
325
 * removed once a seam is chosen if they are not part of the seam.
326
 **********************************************************************/
327
void Wordrec::try_vertical_splits(EDGEPT *points[MAX_NUM_POINTS], int16_t num_points,
328
                                  EDGEPT_CLIST *new_points, SeamQueue *seam_queue,
329
474k
                                  SeamPile *seam_pile, SEAM **seam, TBLOB *blob) {
330
474k
  EDGEPT *vertical_point = nullptr;
331
474k
  int16_t x;
332
474k
  PRIORITY priority;
333
474k
  TESSLINE *outline;
334
335
1.60M
  for (x = 0; x < num_points; x++) {
336
1.12M
    vertical_point = nullptr;
337
7.12M
    for (outline = blob->outlines; outline; outline = outline->next) {
338
5.99M
      vertical_projection_point(points[x], outline->loop, &vertical_point, new_points);
339
5.99M
    }
340
341
1.12M
    if (vertical_point && points[x] != vertical_point->next && vertical_point != points[x]->next &&
342
1.12M
        points[x]->WeightedDistance(*vertical_point, chop_x_y_weight) < chop_split_length) {
343
1.07M
      SPLIT split(points[x], vertical_point);
344
1.07M
      priority = partial_split_priority(&split);
345
1.07M
      choose_best_seam(seam_queue, &split, priority, seam, blob, seam_pile);
346
1.07M
    }
347
1.12M
  }
348
474k
}
349
350
} // namespace tesseract