/src/tesseract/src/textord/tabvector.h
Line | Count | Source (jump to first uncovered line) |
1 | | /////////////////////////////////////////////////////////////////////// |
2 | | // File: tabvector.h |
3 | | // Description: Class to hold a near-vertical vector representing a tab-stop. |
4 | | // Author: Ray Smith |
5 | | // |
6 | | // (C) Copyright 2008, Google Inc. |
7 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
8 | | // you may not use this file except in compliance with the License. |
9 | | // You may obtain a copy of the License at |
10 | | // http://www.apache.org/licenses/LICENSE-2.0 |
11 | | // Unless required by applicable law or agreed to in writing, software |
12 | | // distributed under the License is distributed on an "AS IS" BASIS, |
13 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | // See the License for the specific language governing permissions and |
15 | | // limitations under the License. |
16 | | // |
17 | | /////////////////////////////////////////////////////////////////////// |
18 | | |
19 | | #ifndef TESSERACT_TEXTORD_TABVECTOR_H_ |
20 | | #define TESSERACT_TEXTORD_TABVECTOR_H_ |
21 | | |
22 | | #include "bbgrid.h" |
23 | | #include "blobgrid.h" |
24 | | #include "clst.h" |
25 | | #include "elst.h" |
26 | | #include "elst2.h" |
27 | | #include "rect.h" |
28 | | |
29 | | #include <algorithm> |
30 | | |
31 | | class BLOBNBOX; |
32 | | class ScrollView; |
33 | | |
34 | | namespace tesseract { |
35 | | |
36 | | extern double_VAR_H(textord_tabvector_vertical_gap_fraction); |
37 | | extern double_VAR_H(textord_tabvector_vertical_box_ratio); |
38 | | |
39 | | // The alignment type that a tab vector represents. |
40 | | // Keep this enum synced with kAlignmentNames in tabvector.cpp. |
41 | | enum TabAlignment { |
42 | | TA_LEFT_ALIGNED, |
43 | | TA_LEFT_RAGGED, |
44 | | TA_CENTER_JUSTIFIED, |
45 | | TA_RIGHT_ALIGNED, |
46 | | TA_RIGHT_RAGGED, |
47 | | TA_SEPARATOR, |
48 | | TA_COUNT |
49 | | }; |
50 | | |
51 | | // Forward declarations. The classes use their own list types, so we |
52 | | // need to make the list types first. |
53 | | class TabFind; |
54 | | class TabVector; |
55 | | class TabConstraint; |
56 | | |
57 | | ELIST2IZEH(TabVector) |
58 | | CLISTIZEH(TabVector) |
59 | | ELISTIZEH(TabConstraint) |
60 | | |
61 | | // TabConstraint is a totally self-contained class to maintain |
62 | | // a list of [min,max] constraints, each referring to a TabVector. |
63 | | // The constraints are manipulated through static methods that act |
64 | | // on a list of constraints. The list itself is cooperatively owned |
65 | | // by the TabVectors of the constraints on the list and managed |
66 | | // by implicit reference counting via the elements of the list. |
67 | | class TabConstraint : public ELIST<TabConstraint>::LINK { |
68 | | public: |
69 | | // This empty constructor is here only so that the class can be ELISTIZED. |
70 | | // TODO(rays) change deep_copy in elst.h line 955 to take a callback copier |
71 | | // and eliminate CLASSNAME##_copier. |
72 | | TabConstraint() = default; |
73 | | |
74 | | // Create a constraint for the top or bottom of this TabVector. |
75 | | static void CreateConstraint(TabVector *vector, bool is_top); |
76 | | |
77 | | // Test to see if the constraints are compatible enough to merge. |
78 | | static bool CompatibleConstraints(TabConstraint_LIST *list1, TabConstraint_LIST *list2); |
79 | | |
80 | | // Merge the lists of constraints and update the TabVector pointers. |
81 | | // The second list is deleted. |
82 | | static void MergeConstraints(TabConstraint_LIST *list1, TabConstraint_LIST *list2); |
83 | | |
84 | | // Set all the tops and bottoms as appropriate to a mean of the |
85 | | // constrained range. Delete all the constraints and list. |
86 | | static void ApplyConstraints(TabConstraint_LIST *constraints); |
87 | | |
88 | | private: |
89 | | TabConstraint(TabVector *vector, bool is_top); |
90 | | |
91 | | // Get the max of the mins and the min of the maxes. |
92 | | static void GetConstraints(TabConstraint_LIST *constraints, int *y_min, int *y_max); |
93 | | |
94 | | // The TabVector this constraint applies to. |
95 | | TabVector *vector_; |
96 | | // If true then we refer to the top of the vector_. |
97 | | bool is_top_; |
98 | | // The allowed range of this vector_. |
99 | | int y_min_; |
100 | | int y_max_; |
101 | | }; |
102 | | |
103 | | // Class to hold information about a single vector |
104 | | // that represents a tab stop or a rule line. |
105 | | class TabVector : public ELIST2<TabVector>::LINK { |
106 | | public: |
107 | | // TODO(rays) fix this in elst.h line 1076, where it should use the |
108 | | // copy constructor instead of operator=. |
109 | 0 | TabVector() = default; |
110 | 0 | ~TabVector() = default; |
111 | | |
112 | | // Public factory to build a TabVector from a list of boxes. |
113 | | // The TabVector will be of the given alignment type. |
114 | | // The input vertical vector is used in fitting, and the output |
115 | | // vertical_x, vertical_y have the resulting line vector added to them |
116 | | // if the alignment is not ragged. |
117 | | // The extended_start_y and extended_end_y are the maximum possible |
118 | | // extension to the line segment that can be used to align with others. |
119 | | // The input CLIST of BLOBNBOX good_points is consumed and taken over. |
120 | | static TabVector *FitVector(TabAlignment alignment, ICOORD vertical, int extended_start_y, |
121 | | int extended_end_y, BLOBNBOX_CLIST *good_points, int *vertical_x, |
122 | | int *vertical_y); |
123 | | |
124 | | // Build a ragged TabVector by copying another's direction, shifting it |
125 | | // to match the given blob, and making its initial extent the height |
126 | | // of the blob, but its extended bounds from the bounds of the original. |
127 | | TabVector(const TabVector &src, TabAlignment alignment, const ICOORD &vertical_skew, |
128 | | BLOBNBOX *blob); |
129 | | |
130 | | // Copies basic attributes of a tab vector for simple operations. |
131 | | // Copies things such startpt, endpt, range, width. |
132 | | // Does not copy things such as partners, boxes, or constraints. |
133 | | // This is useful if you only need vector information for processing, such |
134 | | // as in the table detection code. |
135 | | TabVector *ShallowCopy() const; |
136 | | |
137 | | // Simple accessors. |
138 | 0 | const ICOORD &startpt() const { |
139 | 0 | return startpt_; |
140 | 0 | } |
141 | 0 | const ICOORD &endpt() const { |
142 | 0 | return endpt_; |
143 | 0 | } |
144 | 0 | int extended_ymax() const { |
145 | 0 | return extended_ymax_; |
146 | 0 | } |
147 | 0 | int extended_ymin() const { |
148 | 0 | return extended_ymin_; |
149 | 0 | } |
150 | 0 | int sort_key() const { |
151 | 0 | return sort_key_; |
152 | 0 | } |
153 | 0 | int mean_width() const { |
154 | 0 | return mean_width_; |
155 | 0 | } |
156 | 0 | void set_top_constraints(TabConstraint_LIST *constraints) { |
157 | 0 | top_constraints_ = constraints; |
158 | 0 | } |
159 | 0 | void set_bottom_constraints(TabConstraint_LIST *constraints) { |
160 | 0 | bottom_constraints_ = constraints; |
161 | 0 | } |
162 | 0 | TabVector_CLIST *partners() { |
163 | 0 | return &partners_; |
164 | 0 | } |
165 | 0 | void set_startpt(const ICOORD &start) { |
166 | 0 | startpt_ = start; |
167 | 0 | } |
168 | 0 | void set_endpt(const ICOORD &end) { |
169 | 0 | endpt_ = end; |
170 | 0 | } |
171 | 0 | bool intersects_other_lines() const { |
172 | 0 | return intersects_other_lines_; |
173 | 0 | } |
174 | 0 | void set_intersects_other_lines(bool value) { |
175 | 0 | intersects_other_lines_ = value; |
176 | 0 | } |
177 | | |
178 | | // Inline quasi-accessors that require some computation. |
179 | | |
180 | | // Compute the x coordinate at the given y coordinate. |
181 | 0 | int XAtY(int y) const { |
182 | 0 | int height = endpt_.y() - startpt_.y(); |
183 | 0 | if (height != 0) { |
184 | 0 | return (y - startpt_.y()) * (endpt_.x() - startpt_.x()) / height + startpt_.x(); |
185 | 0 | } else { |
186 | 0 | return startpt_.x(); |
187 | 0 | } |
188 | 0 | } |
189 | | |
190 | | // Compute the vertical overlap with the other TabVector. |
191 | 0 | int VOverlap(const TabVector &other) const { |
192 | 0 | return std::min(other.endpt_.y(), endpt_.y()) - std::max(other.startpt_.y(), startpt_.y()); |
193 | 0 | } |
194 | | // Compute the vertical overlap with the given y bounds. |
195 | 0 | int VOverlap(int top_y, int bottom_y) const { |
196 | 0 | return std::min(top_y, static_cast<int>(endpt_.y())) - |
197 | 0 | std::max(bottom_y, static_cast<int>(startpt_.y())); |
198 | 0 | } |
199 | | // Compute the extended vertical overlap with the given y bounds. |
200 | 0 | int ExtendedOverlap(int top_y, int bottom_y) const { |
201 | 0 | return std::min(top_y, extended_ymax_) - std::max(bottom_y, extended_ymin_); |
202 | 0 | } |
203 | | |
204 | | // Return true if this is a left tab stop, either aligned, or ragged. |
205 | 0 | bool IsLeftTab() const { |
206 | 0 | return alignment_ == TA_LEFT_ALIGNED || alignment_ == TA_LEFT_RAGGED; |
207 | 0 | } |
208 | | // Return true if this is a right tab stop, either aligned, or ragged. |
209 | 0 | bool IsRightTab() const { |
210 | 0 | return alignment_ == TA_RIGHT_ALIGNED || alignment_ == TA_RIGHT_RAGGED; |
211 | 0 | } |
212 | | // Return true if this is a separator. |
213 | 0 | bool IsSeparator() const { |
214 | 0 | return alignment_ == TA_SEPARATOR; |
215 | 0 | } |
216 | | // Return true if this is a center aligned tab stop. |
217 | 0 | bool IsCenterTab() const { |
218 | 0 | return alignment_ == TA_CENTER_JUSTIFIED; |
219 | 0 | } |
220 | | // Return true if this is a ragged tab top, either left or right. |
221 | 0 | bool IsRagged() const { |
222 | 0 | return alignment_ == TA_LEFT_RAGGED || alignment_ == TA_RIGHT_RAGGED; |
223 | 0 | } |
224 | | |
225 | | // Return true if this vector is to the left of the other in terms |
226 | | // of sort_key_. |
227 | 0 | bool IsLeftOf(const TabVector &other) const { |
228 | 0 | return sort_key_ < other.sort_key_; |
229 | 0 | } |
230 | | |
231 | | // Return true if the vector has no partners. |
232 | 0 | bool Partnerless() { |
233 | 0 | return partners_.empty(); |
234 | 0 | } |
235 | | |
236 | | // Return the number of tab boxes in this vector. |
237 | 0 | int BoxCount() { |
238 | 0 | return boxes_.length(); |
239 | 0 | } |
240 | | |
241 | | // Lock the vector from refits by clearing the boxes_ list. |
242 | 0 | void Freeze() { |
243 | 0 | boxes_.shallow_clear(); |
244 | 0 | } |
245 | | |
246 | | // Flip x and y on the ends so a vector can be created from flipped input. |
247 | 0 | void XYFlip() { |
248 | 0 | int x = startpt_.y(); |
249 | 0 | startpt_.set_y(startpt_.x()); |
250 | 0 | startpt_.set_x(x); |
251 | 0 | x = endpt_.y(); |
252 | 0 | endpt_.set_y(endpt_.x()); |
253 | 0 | endpt_.set_x(x); |
254 | 0 | } |
255 | | |
256 | | // Reflect the tab vector in the y-axis. |
257 | 0 | void ReflectInYAxis() { |
258 | 0 | startpt_.set_x(-startpt_.x()); |
259 | 0 | endpt_.set_x(-endpt_.x()); |
260 | 0 | sort_key_ = -sort_key_; |
261 | 0 | if (alignment_ == TA_LEFT_ALIGNED) { |
262 | 0 | alignment_ = TA_RIGHT_ALIGNED; |
263 | 0 | } else if (alignment_ == TA_RIGHT_ALIGNED) { |
264 | 0 | alignment_ = TA_LEFT_ALIGNED; |
265 | 0 | } |
266 | 0 | if (alignment_ == TA_LEFT_RAGGED) { |
267 | 0 | alignment_ = TA_RIGHT_RAGGED; |
268 | 0 | } else if (alignment_ == TA_RIGHT_RAGGED) { |
269 | 0 | alignment_ = TA_LEFT_RAGGED; |
270 | 0 | } |
271 | 0 | } |
272 | | |
273 | | // Separate function to compute the sort key for a given coordinate pair. |
274 | 0 | static int SortKey(const ICOORD &vertical, int x, int y) { |
275 | 0 | ICOORD pt(x, y); |
276 | 0 | return pt * vertical; |
277 | 0 | } |
278 | | |
279 | | // Return the x at the given y for the given sort key. |
280 | 0 | static int XAtY(const ICOORD &vertical, int sort_key, int y) { |
281 | 0 | if (vertical.y() != 0) { |
282 | 0 | return (vertical.x() * y + sort_key) / vertical.y(); |
283 | 0 | } else { |
284 | 0 | return sort_key; |
285 | 0 | } |
286 | 0 | } |
287 | | |
288 | | // Sort function for E2LIST::sort to sort by sort_key_. |
289 | 0 | static int SortVectorsByKey(const TabVector *tv1, const TabVector *tv2) { |
290 | 0 | return tv1->sort_key_ - tv2->sort_key_; |
291 | 0 | } |
292 | | |
293 | | // More complex members. |
294 | | |
295 | | // Extend this vector to include the supplied blob if it doesn't |
296 | | // already have it. |
297 | | void ExtendToBox(BLOBNBOX *blob); |
298 | | |
299 | | // Set the ycoord of the start and move the xcoord to match. |
300 | | void SetYStart(int start_y); |
301 | | // Set the ycoord of the end and move the xcoord to match. |
302 | | void SetYEnd(int end_y); |
303 | | |
304 | | // Rotate the ends by the given vector. |
305 | | void Rotate(const FCOORD &rotation); |
306 | | |
307 | | // Setup the initial constraints, being the limits of |
308 | | // the vector and the extended ends. |
309 | | void SetupConstraints(); |
310 | | |
311 | | // Setup the constraints between the partners of this TabVector. |
312 | | void SetupPartnerConstraints(); |
313 | | |
314 | | // Setup the constraints between this and its partner. |
315 | | void SetupPartnerConstraints(TabVector *partner); |
316 | | |
317 | | // Use the constraints to modify the top and bottom. |
318 | | void ApplyConstraints(); |
319 | | |
320 | | // Merge close tab vectors of the same side that overlap. |
321 | | static void MergeSimilarTabVectors(const ICOORD &vertical, TabVector_LIST *vectors, |
322 | | BlobGrid *grid); |
323 | | |
324 | | // Return true if this vector is the same side, overlaps, and close |
325 | | // enough to the other to be merged. |
326 | | bool SimilarTo(const ICOORD &vertical, const TabVector &other, BlobGrid *grid) const; |
327 | | |
328 | | // Eat the other TabVector into this and delete it. |
329 | | void MergeWith(const ICOORD &vertical, TabVector *other); |
330 | | |
331 | | // Add a new element to the list of partner TabVectors. |
332 | | // Partners must be added in order of increasing y coordinate of the text line |
333 | | // that makes them partners. |
334 | | // Groups of identical partners are merged into one. |
335 | | void AddPartner(TabVector *partner); |
336 | | |
337 | | // Return true if other is a partner of this. |
338 | | bool IsAPartner(const TabVector *other); |
339 | | |
340 | | // Print basic information about this tab vector. |
341 | | void Print(const char *prefix); |
342 | | |
343 | | // Print basic information about this tab vector and every box in it. |
344 | | void Debug(const char *prefix); |
345 | | |
346 | | // Draw this tabvector in place in the given window. |
347 | | void Display(ScrollView *tab_win); |
348 | | |
349 | | // Refit the line and/or re-evaluate the vector if the dirty flags are set. |
350 | | void FitAndEvaluateIfNeeded(const ICOORD &vertical, TabFind *finder); |
351 | | |
352 | | // Evaluate the vector in terms of coverage of its length by good-looking |
353 | | // box edges. A good looking box is one where its nearest neighbour on the |
354 | | // inside is nearer than half the distance its nearest neighbour on the |
355 | | // outside of the putative column. Bad boxes are removed from the line. |
356 | | // A second pass then further filters boxes by requiring that the gutter |
357 | | // width be a minimum fraction of the mean gutter along the line. |
358 | | void Evaluate(const ICOORD &vertical, TabFind *finder); |
359 | | |
360 | | // (Re)Fit a line to the stored points. Returns false if the line |
361 | | // is degenerate. Although the TabVector code mostly doesn't care about the |
362 | | // direction of lines, XAtY would give silly results for a horizontal line. |
363 | | // The class is mostly aimed at use for vertical lines representing |
364 | | // horizontal tab stops. |
365 | | bool Fit(ICOORD vertical, bool force_parallel); |
366 | | |
367 | | // Return the partner of this TabVector if the vector qualifies as |
368 | | // being a vertical text line, otherwise nullptr. |
369 | | TabVector *VerticalTextlinePartner(); |
370 | | |
371 | | // Return the matching tabvector if there is exactly one partner, or |
372 | | // nullptr otherwise. This can be used after matching is done, eg. by |
373 | | // VerticalTextlinePartner(), without checking if the line is vertical. |
374 | | TabVector *GetSinglePartner(); |
375 | | |
376 | | private: |
377 | | // Constructor is private as the static factory is the external way |
378 | | // to build a TabVector. |
379 | | TabVector(int extended_ymin, int extended_ymax, TabAlignment alignment, BLOBNBOX_CLIST *boxes); |
380 | | |
381 | | // Delete this, but first, repoint all the partners to point to |
382 | | // replacement. If replacement is nullptr, then partner relationships |
383 | | // are removed. |
384 | | void Delete(TabVector *replacement); |
385 | | |
386 | | private: |
387 | | // The bottom of the tab line. |
388 | | ICOORD startpt_; |
389 | | // The top of the tab line. |
390 | | ICOORD endpt_; |
391 | | // The lowest y that the vector might extend to. |
392 | | int extended_ymin_ = 0; |
393 | | // The highest y that the vector might extend to. |
394 | | int extended_ymax_ = 0; |
395 | | // Perpendicular distance of vector from a given vertical for sorting. |
396 | | int sort_key_ = 0; |
397 | | // Result of Evaluate 0-100. Coverage of line with good boxes. |
398 | | int percent_score_ = 0; |
399 | | // The mean width of the blobs. Meaningful only for separator lines. |
400 | | int mean_width_ = 0; |
401 | | // True if the boxes_ list has been modified, so a refit is needed. |
402 | | bool needs_refit_ = false; |
403 | | // True if a fit has been done, so re-evaluation is needed. |
404 | | bool needs_evaluation_ = false; |
405 | | // True if a separator line intersects at least 2 other lines. |
406 | | bool intersects_other_lines_ = false; |
407 | | // The type of this TabVector. |
408 | | TabAlignment alignment_ = TA_LEFT_ALIGNED; |
409 | | // The list of boxes whose edges are aligned at this TabVector. |
410 | | BLOBNBOX_CLIST boxes_; |
411 | | // List of TabVectors that have a connection with this via a text line. |
412 | | TabVector_CLIST partners_; |
413 | | // Constraints used to resolve the exact location of the top and bottom |
414 | | // of the tab line. |
415 | | TabConstraint_LIST *top_constraints_ = nullptr; |
416 | | TabConstraint_LIST *bottom_constraints_ = nullptr; |
417 | | }; |
418 | | |
419 | | } // namespace tesseract. |
420 | | |
421 | | #endif // TESSERACT_TEXTORD_TABVECTOR_H_ |