Coverage Report

Created: 2025-09-27 07:10

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/tesseract/src/ccstruct/seam.h
Line
Count
Source
1
/******************************************************************************
2
 *
3
 * File:        seam.h
4
 * Author:      Mark Seaman, SW Productivity
5
 *
6
 * (c) Copyright 1987, Hewlett-Packard Company.
7
 ** Licensed under the Apache License, Version 2.0 (the "License");
8
 ** you may not use this file except in compliance with the License.
9
 ** You may obtain a copy of the License at
10
 ** http://www.apache.org/licenses/LICENSE-2.0
11
 ** Unless required by applicable law or agreed to in writing, software
12
 ** distributed under the License is distributed on an "AS IS" BASIS,
13
 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 ** See the License for the specific language governing permissions and
15
 ** limitations under the License.
16
 *
17
 *****************************************************************************/
18
#ifndef SEAM_H
19
#define SEAM_H
20
21
// Include automatically generated configuration file if running autoconf.
22
#ifdef HAVE_CONFIG_H
23
#  include "config_auto.h"
24
#endif
25
26
#include "blobs.h"
27
#include "split.h"
28
29
namespace tesseract {
30
31
using PRIORITY = float; /*  PRIORITY  */
32
33
class SEAM {
34
public:
35
  // A seam with no splits
36
  SEAM(float priority, const TPOINT &location)
37
2.62M
      : priority_(priority), location_(location), num_splits_(0) {}
38
  // A seam with a single split point.
39
  SEAM(float priority, const TPOINT &location, const SPLIT &split)
40
10.0M
      : priority_(priority), location_(location), num_splits_(1) {
41
10.0M
    splits_[0] = split;
42
10.0M
  }
43
  // Default copy constructor, operator= and destructor are OK!
44
45
  // Accessors.
46
231M
  float priority() const {
47
231M
    return priority_;
48
231M
  }
49
212k
  void set_priority(float priority) {
50
212k
    priority_ = priority;
51
212k
  }
52
0
  bool HasAnySplits() const {
53
0
    return num_splits_ > 0;
54
0
  }
55
56
  // Returns the bounding box of all the points in the seam.
57
  TBOX bounding_box() const;
58
59
  // Returns true if other can be combined into *this.
60
1.97G
  bool CombineableWith(const SEAM &other, int max_x_dist, float max_total_priority) const {
61
1.97G
    int dist = location_.x - other.location_.x;
62
1.97G
    return -max_x_dist < dist && dist < max_x_dist &&
63
1.51G
           num_splits_ + other.num_splits_ <= kMaxNumSplits &&
64
582M
           priority_ + other.priority_ < max_total_priority && !OverlappingSplits(other) &&
65
187M
           !SharesPosition(other);
66
1.97G
  }
67
68
  // Combines other into *this. Only works if CombinableWith returned true.
69
187M
  void CombineWith(const SEAM &other) {
70
187M
    priority_ += other.priority_;
71
187M
    location_ += other.location_;
72
187M
    location_ /= 2;
73
74
420M
    for (uint8_t s = 0; s < other.num_splits_ && num_splits_ < kMaxNumSplits; ++s) {
75
232M
      splits_[num_splits_++] = other.splits_[s];
76
232M
    }
77
187M
  }
78
79
  // Returns true if the given blob contains all splits of *this SEAM.
80
406k
  bool ContainedByBlob(const TBLOB &blob) const {
81
590k
    for (int s = 0; s < num_splits_; ++s) {
82
192k
      if (!splits_[s].ContainedByBlob(blob)) {
83
7.71k
        return false;
84
7.71k
      }
85
192k
    }
86
398k
    return true;
87
406k
  }
88
89
  // Returns true if the given EDGEPT is used by this SEAM, checking only
90
  // the EDGEPT pointer, not the coordinates.
91
2.60M
  bool UsesPoint(const EDGEPT *point) const {
92
6.32M
    for (int s = 0; s < num_splits_; ++s) {
93
3.79M
      if (splits_[s].UsesPoint(point)) {
94
80.4k
        return true;
95
80.4k
      }
96
3.79M
    }
97
2.52M
    return false;
98
2.60M
  }
99
  // Returns true if *this and other share any common point, by coordinates.
100
190M
  bool SharesPosition(const SEAM &other) const {
101
460M
    for (int s = 0; s < num_splits_; ++s) {
102
585M
      for (int t = 0; t < other.num_splits_; ++t) {
103
315M
        if (splits_[s].SharesPosition(other.splits_[t])) {
104
4.77k
          return true;
105
4.77k
        }
106
315M
      }
107
270M
    }
108
190M
    return false;
109
190M
  }
110
  // Returns true if *this and other have any vertically overlapping splits.
111
521M
  bool OverlappingSplits(const SEAM &other) const {
112
889M
    for (int s = 0; s < num_splits_; ++s) {
113
701M
      TBOX split1_box = splits_[s].bounding_box();
114
1.13G
      for (int t = 0; t < other.num_splits_; ++t) {
115
770M
        TBOX split2_box = other.splits_[t].bounding_box();
116
770M
        if (split1_box.y_overlap(split2_box)) {
117
334M
          return true;
118
334M
        }
119
770M
      }
120
701M
    }
121
187M
    return false;
122
521M
  }
123
124
  // Marks the edgepts used by the seam so the segments made by the cut
125
  // never get split further by another seam in the future.
126
192k
  void Finalize() {
127
270k
    for (int s = 0; s < num_splits_; ++s) {
128
78.3k
      splits_[s].point1->MarkChop();
129
78.3k
      splits_[s].point2->MarkChop();
130
78.3k
    }
131
192k
  }
132
133
  // Returns true if the splits in *this SEAM appear OK in the sense that they
134
  // do not cross any outlines and do not chop off any ridiculously small
135
  // pieces.
136
  bool IsHealthy(const TBLOB &blob, int min_points, int min_area) const;
137
138
  // Computes the widthp_/widthn_ range for all existing SEAMs and for *this
139
  // seam, which is about to be inserted at insert_index. Returns false if
140
  // any of the computations fails, as this indicates an invalid chop.
141
  // widthn_/widthp_ are only changed if modify is true.
142
  bool PrepareToInsertSeam(const std::vector<SEAM *> &seams, const std::vector<TBLOB *> &blobs,
143
                           int insert_index, bool modify);
144
  // Computes the widthp_/widthn_ range. Returns false if not all the splits
145
  // are accounted for. widthn_/widthp_ are only changed if modify is true.
146
  bool FindBlobWidth(const std::vector<TBLOB *> &blobs, int index, bool modify);
147
148
  // Splits this blob into two blobs by applying the splits included in
149
  // *this SEAM
150
  void ApplySeam(bool italic_blob, TBLOB *blob, TBLOB *other_blob) const;
151
  // Undoes ApplySeam by removing the seam between these two blobs.
152
  // Produces one blob as a result, and deletes other_blob.
153
  void UndoSeam(TBLOB *blob, TBLOB *other_blob) const;
154
155
  // Prints everything in *this SEAM.
156
  void Print(const char *label) const;
157
  // Prints a collection of SEAMs.
158
  static void PrintSeams(const char *label, const std::vector<SEAM *> &seams);
159
#ifndef GRAPHICS_DISABLED
160
  // Draws the seam in the given window.
161
  void Mark(ScrollView *window) const;
162
#endif
163
164
  // Break up the blobs in this chain so that they are all independent.
165
  // This operation should undo the affect of join_pieces.
166
  static void BreakPieces(const std::vector<SEAM *> &seams, const std::vector<TBLOB *> &blobs,
167
                          int first, int last);
168
  // Join a group of base level pieces into a single blob that can then
169
  // be classified.
170
  static void JoinPieces(const std::vector<SEAM *> &seams, const std::vector<TBLOB *> &blobs,
171
                         int first, int last);
172
173
  // Hides the seam so the outlines appear not to be cut by it.
174
  void Hide() const;
175
  // Undoes hide, so the outlines are cut by the seam.
176
  void Reveal() const;
177
178
  // Computes and returns, but does not set, the full priority of *this SEAM.
179
  // The arguments here are config parameters defined in Wordrec. Add chop_
180
  // to the beginning of the name.
181
  float FullPriority(int xmin, int xmax, double overlap_knob, int centered_maxwidth,
182
                     double center_knob, double width_change_knob) const;
183
184
private:
185
  // Maximum number of splits that a SEAM can hold.
186
  static const uint8_t kMaxNumSplits = 3;
187
  // Priority of this split. Lower is better.
188
  float priority_;
189
  // Position of the middle of the seam.
190
  TPOINT location_;
191
  // A range such that all splits in *this SEAM are contained within blobs in
192
  // the range [index - widthn_,index + widthp_] where index is the index of
193
  // this SEAM in the seams vector.
194
  uint8_t widthp_ = 0;
195
  uint8_t widthn_ = 0;
196
  // Number of splits_ that are used.
197
  uint8_t num_splits_;
198
  // Set of pairs of points that are the ends of each split in the SEAM.
199
  SPLIT splits_[kMaxNumSplits];
200
};
201
202
void start_seam_list(TWERD *word, std::vector<SEAM *> *seam_array);
203
204
} // namespace tesseract
205
206
#endif