Coverage Report

Created: 2024-02-28 06:46

/src/tesseract/src/ccstruct/seam.cpp
Line
Count
Source (jump to first uncovered line)
1
/******************************************************************************
2
 *
3
 * File:         seam.cpp  (Formerly seam.c)
4
 * Author:       Mark Seaman, OCR Technology
5
 *
6
 * (c) Copyright 1987, Hewlett-Packard Company.
7
 ** Licensed under the Apache License, Version 2.0 (the "License");
8
 ** you may not use this file except in compliance with the License.
9
 ** You may obtain a copy of the License at
10
 ** http://www.apache.org/licenses/LICENSE-2.0
11
 ** Unless required by applicable law or agreed to in writing, software
12
 ** distributed under the License is distributed on an "AS IS" BASIS,
13
 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 ** See the License for the specific language governing permissions and
15
 ** limitations under the License.
16
 *
17
 *****************************************************************************/
18
/*----------------------------------------------------------------------
19
              I n c l u d e s
20
----------------------------------------------------------------------*/
21
#include "seam.h"
22
23
#include "blobs.h"
24
#include "tprintf.h"
25
26
namespace tesseract {
27
28
/*----------------------------------------------------------------------
29
        Public Function Code
30
----------------------------------------------------------------------*/
31
32
// Returns the bounding box of all the points in the seam.
33
0
TBOX SEAM::bounding_box() const {
34
0
  TBOX box(location_.x, location_.y, location_.x, location_.y);
35
0
  for (int s = 0; s < num_splits_; ++s) {
36
0
    box += splits_[s].bounding_box();
37
0
  }
38
0
  return box;
39
0
}
40
41
// Returns true if the splits in *this SEAM appear OK in the sense that they
42
// do not cross any outlines and do not chop off any ridiculously small
43
// pieces.
44
713k
bool SEAM::IsHealthy(const TBLOB &blob, int min_points, int min_area) const {
45
  // TODO(rays) Try testing all the splits. Duplicating original code for now,
46
  // which tested only the first.
47
713k
  return num_splits_ == 0 || splits_[0].IsHealthy(blob, min_points, min_area);
48
713k
}
49
50
// Computes the widthp_/widthn_ range for all existing SEAMs and for *this
51
// seam, which is about to be inserted at insert_index. Returns false if
52
// any of the computations fails, as this indicates an invalid chop.
53
// widthn_/widthp_ are only changed if modify is true.
54
bool SEAM::PrepareToInsertSeam(const std::vector<SEAM *> &seams,
55
581k
                               const std::vector<TBLOB *> &blobs, int insert_index, bool modify) {
56
4.76M
  for (int s = 0; s < insert_index; ++s) {
57
4.18M
    if (!seams[s]->FindBlobWidth(blobs, s, modify)) {
58
0
      return false;
59
0
    }
60
4.18M
  }
61
581k
  if (!FindBlobWidth(blobs, insert_index, modify)) {
62
0
    return false;
63
0
  }
64
4.64M
  for (unsigned s = insert_index; s < seams.size(); ++s) {
65
4.06M
    if (!seams[s]->FindBlobWidth(blobs, s + 1, modify)) {
66
0
      return false;
67
0
    }
68
4.06M
  }
69
581k
  return true;
70
581k
}
71
72
// Computes the widthp_/widthn_ range. Returns false if not all the splits
73
// are accounted for. widthn_/widthp_ are only changed if modify is true.
74
8.83M
bool SEAM::FindBlobWidth(const std::vector<TBLOB *> &blobs, int index, bool modify) {
75
8.83M
  int num_found = 0;
76
8.83M
  if (modify) {
77
4.41M
    widthp_ = 0;
78
4.41M
    widthn_ = 0;
79
4.41M
  }
80
10.9M
  for (int s = 0; s < num_splits_; ++s) {
81
2.07M
    const SPLIT &split = splits_[s];
82
2.07M
    bool found_split = split.ContainedByBlob(*blobs[index]);
83
    // Look right.
84
2.36M
    for (unsigned b = index + 1; !found_split && b < blobs.size(); ++b) {
85
284k
      found_split = split.ContainedByBlob(*blobs[b]);
86
284k
      if (found_split && b - index > widthp_ && modify) {
87
86.3k
        widthp_ = b - index;
88
86.3k
      }
89
284k
    }
90
    // Look left.
91
2.08M
    for (int b = index - 1; !found_split && b >= 0; --b) {
92
2.53k
      found_split = split.ContainedByBlob(*blobs[b]);
93
2.53k
      if (found_split && index - b > widthn_ && modify) {
94
808
        widthn_ = index - b;
95
808
      }
96
2.53k
    }
97
2.07M
    if (found_split) {
98
2.07M
      ++num_found;
99
2.07M
    }
100
2.07M
  }
101
8.83M
  return num_found == num_splits_;
102
8.83M
}
103
104
// Splits this blob into two blobs by applying the splits included in
105
// *this SEAM
106
411k
void SEAM::ApplySeam(bool italic_blob, TBLOB *blob, TBLOB *other_blob) const {
107
607k
  for (int s = 0; s < num_splits_; ++s) {
108
196k
    splits_[s].SplitOutlineList(blob->outlines);
109
196k
  }
110
411k
  blob->ComputeBoundingBoxes();
111
112
411k
  divide_blobs(blob, other_blob, italic_blob, location_);
113
114
411k
  blob->EliminateDuplicateOutlines();
115
411k
  other_blob->EliminateDuplicateOutlines();
116
117
411k
  blob->CorrectBlobOrder(other_blob);
118
411k
}
119
120
// Undoes ApplySeam by removing the seam between these two blobs.
121
// Produces one blob as a result, and deletes other_blob.
122
120k
void SEAM::UndoSeam(TBLOB *blob, TBLOB *other_blob) const {
123
120k
  if (blob->outlines == nullptr) {
124
3.95k
    blob->outlines = other_blob->outlines;
125
3.95k
    other_blob->outlines = nullptr;
126
3.95k
  }
127
128
120k
  TESSLINE *outline = blob->outlines;
129
286k
  while (outline->next) {
130
165k
    outline = outline->next;
131
165k
  }
132
120k
  outline->next = other_blob->outlines;
133
120k
  other_blob->outlines = nullptr;
134
120k
  delete other_blob;
135
136
214k
  for (int s = 0; s < num_splits_; ++s) {
137
93.6k
    splits_[s].UnsplitOutlineList(blob);
138
93.6k
  }
139
120k
  blob->ComputeBoundingBoxes();
140
120k
  blob->EliminateDuplicateOutlines();
141
120k
}
142
143
// Prints everything in *this SEAM.
144
0
void SEAM::Print(const char *label) const {
145
0
  tprintf("%s", label);
146
0
  tprintf(" %6.2f @ (%d,%d), p=%u, n=%u ", priority_, location_.x, location_.y, widthp_, widthn_);
147
0
  for (int s = 0; s < num_splits_; ++s) {
148
0
    splits_[s].Print();
149
0
    if (s + 1 < num_splits_) {
150
0
      tprintf(",   ");
151
0
    }
152
0
  }
153
0
  tprintf("\n");
154
0
}
155
156
// Prints a collection of SEAMs.
157
/* static */
158
0
void SEAM::PrintSeams(const char *label, const std::vector<SEAM *> &seams) {
159
0
  if (!seams.empty()) {
160
0
    tprintf("%s\n", label);
161
0
    for (unsigned x = 0; x < seams.size(); ++x) {
162
0
      tprintf("%2u:   ", x);
163
0
      seams[x]->Print("");
164
0
    }
165
0
    tprintf("\n");
166
0
  }
167
0
}
168
169
#ifndef GRAPHICS_DISABLED
170
// Draws the seam in the given window.
171
void SEAM::Mark(ScrollView *window) const {
172
  for (int s = 0; s < num_splits_; ++s) {
173
    splits_[s].Mark(window);
174
  }
175
}
176
#endif
177
178
// Break up the blobs in this chain so that they are all independent.
179
// This operation should undo the affect of join_pieces.
180
/* static */
181
void SEAM::BreakPieces(const std::vector<SEAM *> &seams, const std::vector<TBLOB *> &blobs,
182
1.73M
                       int first, int last) {
183
5.84M
  for (int x = first; x < last; ++x) {
184
4.11M
    seams[x]->Reveal();
185
4.11M
  }
186
187
1.73M
  TESSLINE *outline = blobs[first]->outlines;
188
1.73M
  int next_blob = first + 1;
189
190
6.67M
  while (outline != nullptr && next_blob <= last) {
191
4.93M
    if (outline->next == blobs[next_blob]->outlines) {
192
4.11M
      outline->next = nullptr;
193
4.11M
      outline = blobs[next_blob]->outlines;
194
4.11M
      ++next_blob;
195
4.11M
    } else {
196
823k
      outline = outline->next;
197
823k
    }
198
4.93M
  }
199
1.73M
}
200
201
// Join a group of base level pieces into a single blob that can then
202
// be classified.
203
/* static */
204
void SEAM::JoinPieces(const std::vector<SEAM *> &seams, const std::vector<TBLOB *> &blobs,
205
1.73M
                      int first, int last) {
206
1.73M
  TESSLINE *outline = blobs[first]->outlines;
207
1.73M
  if (!outline) {
208
0
    return;
209
0
  }
210
211
5.84M
  for (int x = first; x < last; ++x) {
212
4.11M
    SEAM *seam = seams[x];
213
4.11M
    if (x - seam->widthn_ >= first && x + seam->widthp_ < last) {
214
4.09M
      seam->Hide();
215
4.09M
    }
216
7.31M
    while (outline->next) {
217
3.20M
      outline = outline->next;
218
3.20M
    }
219
4.11M
    outline->next = blobs[x + 1]->outlines;
220
4.11M
  }
221
1.73M
}
222
223
// Hides the seam so the outlines appear not to be cut by it.
224
4.09M
void SEAM::Hide() const {
225
4.56M
  for (int s = 0; s < num_splits_; ++s) {
226
466k
    splits_[s].Hide();
227
466k
  }
228
4.09M
}
229
230
// Undoes hide, so the outlines are cut by the seam.
231
4.11M
void SEAM::Reveal() const {
232
4.60M
  for (int s = 0; s < num_splits_; ++s) {
233
493k
    splits_[s].Reveal();
234
493k
  }
235
4.11M
}
236
237
// Computes and returns, but does not set, the full priority of *this SEAM.
238
float SEAM::FullPriority(int xmin, int xmax, double overlap_knob, int centered_maxwidth,
239
99.8M
                         double center_knob, double width_change_knob) const {
240
99.8M
  if (num_splits_ == 0) {
241
0
    return 0.0f;
242
0
  }
243
236M
  for (int s = 1; s < num_splits_; ++s) {
244
136M
    splits_[s].SplitOutline();
245
136M
  }
246
99.8M
  float full_priority =
247
99.8M
      priority_ + splits_[0].FullPriority(xmin, xmax, overlap_knob, centered_maxwidth, center_knob,
248
99.8M
                                          width_change_knob);
249
236M
  for (int s = num_splits_ - 1; s >= 1; --s) {
250
136M
    splits_[s].UnsplitOutlines();
251
136M
  }
252
99.8M
  return full_priority;
253
99.8M
}
254
255
/**
256
 * @name start_seam_list
257
 *
258
 * Initialize a list of seams that match the original number of blobs
259
 * present in the starting segmentation.  Each of the seams created
260
 * by this routine have location information only.
261
 */
262
734k
void start_seam_list(TWERD *word, std::vector<SEAM *> *seam_array) {
263
734k
  seam_array->clear();
264
734k
  TPOINT location;
265
266
3.84M
  for (unsigned b = 1; b < word->NumBlobs(); ++b) {
267
3.11M
    TBOX bbox = word->blobs[b - 1]->bounding_box();
268
3.11M
    TBOX nbox = word->blobs[b]->bounding_box();
269
3.11M
    location.x = (bbox.right() + nbox.left()) / 2;
270
3.11M
    location.y = (bbox.bottom() + bbox.top() + nbox.bottom() + nbox.top()) / 4;
271
3.11M
    seam_array->push_back(new SEAM(0.0f, location));
272
3.11M
  }
273
734k
}
274
275
} // namespace tesseract