/src/tesseract/src/textord/workingpartset.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /////////////////////////////////////////////////////////////////////// |
2 | | // File: workingpartset.cpp |
3 | | // Description: Class to hold a working set of partitions of the page |
4 | | // during construction of text/image regions. |
5 | | // Author: Ray Smith |
6 | | // Created: Tue Ocr 28 17:21:01 PDT 2008 |
7 | | // |
8 | | // (C) Copyright 2008, Google Inc. |
9 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
10 | | // you may not use this file except in compliance with the License. |
11 | | // You may obtain a copy of the License at |
12 | | // http://www.apache.org/licenses/LICENSE-2.0 |
13 | | // Unless required by applicable law or agreed to in writing, software |
14 | | // distributed under the License is distributed on an "AS IS" BASIS, |
15 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
16 | | // See the License for the specific language governing permissions and |
17 | | // limitations under the License. |
18 | | // |
19 | | /////////////////////////////////////////////////////////////////////// |
20 | | |
21 | | #include "workingpartset.h" |
22 | | #include "colpartition.h" |
23 | | |
24 | | namespace tesseract { |
25 | | |
26 | | // Add the partition to this WorkingPartSet. Unrelated partitions are |
27 | | // stored in the order in which they are received, but if the partition |
28 | | // has a SingletonPartner, make sure that it stays with its partner. |
29 | 0 | void WorkingPartSet::AddPartition(ColPartition *part) { |
30 | 0 | ColPartition *partner = part->SingletonPartner(true); |
31 | 0 | if (partner != nullptr) { |
32 | 0 | ASSERT_HOST(partner->SingletonPartner(false) == part); |
33 | 0 | } |
34 | 0 | if (latest_part_ == nullptr || partner == nullptr) { |
35 | | // This partition goes at the end of the list |
36 | 0 | part_it_.move_to_last(); |
37 | 0 | } else if (latest_part_->SingletonPartner(false) != part) { |
38 | | // Reposition the iterator to the correct partner, or at the end. |
39 | 0 | for (part_it_.move_to_first(); !part_it_.at_last() && part_it_.data() != partner; |
40 | 0 | part_it_.forward()) { |
41 | 0 | ; |
42 | 0 | } |
43 | 0 | } |
44 | 0 | part_it_.add_after_then_move(part); |
45 | 0 | latest_part_ = part; |
46 | 0 | } |
47 | | |
48 | | // Make blocks out of any partitions in this WorkingPartSet, and append |
49 | | // them to the end of the blocks list. bleft, tright and resolution give |
50 | | // the bounds and resolution of the source image, so that blocks can be |
51 | | // made to fit in the bounds. |
52 | | // All ColPartitions go in the used_parts list, as they need to be kept |
53 | | // around, but are no longer needed. |
54 | | void WorkingPartSet::ExtractCompletedBlocks(const ICOORD &bleft, const ICOORD &tright, |
55 | | int resolution, ColPartition_LIST *used_parts, |
56 | 0 | BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks) { |
57 | 0 | MakeBlocks(bleft, tright, resolution, used_parts); |
58 | 0 | BLOCK_IT block_it(blocks); |
59 | 0 | block_it.move_to_last(); |
60 | 0 | block_it.add_list_after(&completed_blocks_); |
61 | 0 | TO_BLOCK_IT to_block_it(to_blocks); |
62 | 0 | to_block_it.move_to_last(); |
63 | 0 | to_block_it.add_list_after(&to_blocks_); |
64 | 0 | } |
65 | | |
66 | | // Insert the given blocks at the front of the completed_blocks_ list so |
67 | | // they can be kept in the correct reading order. |
68 | 0 | void WorkingPartSet::InsertCompletedBlocks(BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks) { |
69 | 0 | BLOCK_IT block_it(&completed_blocks_); |
70 | 0 | block_it.add_list_before(blocks); |
71 | 0 | TO_BLOCK_IT to_block_it(&to_blocks_); |
72 | 0 | to_block_it.add_list_before(to_blocks); |
73 | 0 | } |
74 | | |
75 | | // Make a block using lines parallel to the given vector that fit between |
76 | | // the min and max coordinates specified by the ColPartitions. |
77 | | // Construct a block from the given list of partitions. |
78 | | void WorkingPartSet::MakeBlocks(const ICOORD &bleft, const ICOORD &tright, int resolution, |
79 | 0 | ColPartition_LIST *used_parts) { |
80 | 0 | part_it_.move_to_first(); |
81 | 0 | while (!part_it_.empty()) { |
82 | | // Gather a list of ColPartitions in block_parts that will be split |
83 | | // by linespacing into smaller blocks. |
84 | 0 | ColPartition_LIST block_parts; |
85 | 0 | ColPartition_IT block_it(&block_parts); |
86 | 0 | ColPartition *next_part = nullptr; |
87 | 0 | bool text_block = false; |
88 | 0 | do { |
89 | 0 | ColPartition *part = part_it_.extract(); |
90 | 0 | if (part->blob_type() == BRT_UNKNOWN || (part->IsTextType() && part->type() != PT_TABLE)) { |
91 | 0 | text_block = true; |
92 | 0 | } |
93 | 0 | part->set_working_set(nullptr); |
94 | 0 | part_it_.forward(); |
95 | 0 | block_it.add_after_then_move(part); |
96 | 0 | next_part = part->SingletonPartner(false); |
97 | 0 | if (part_it_.empty() || next_part != part_it_.data()) { |
98 | | // Sequences of partitions can get split by titles. |
99 | 0 | next_part = nullptr; |
100 | 0 | } |
101 | | // Merge adjacent blocks that are of the same type and let the |
102 | | // linespacing determine the real boundaries. |
103 | 0 | if (next_part == nullptr && !part_it_.empty()) { |
104 | 0 | ColPartition *next_block_part = part_it_.data(); |
105 | 0 | const TBOX &part_box = part->bounding_box(); |
106 | 0 | const TBOX &next_box = next_block_part->bounding_box(); |
107 | | |
108 | | // In addition to the same type, the next box must not be above the |
109 | | // current box, nor (if image) too far below. |
110 | 0 | PolyBlockType type = part->type(), next_type = next_block_part->type(); |
111 | 0 | if (ColPartition::TypesSimilar(type, next_type) && !part->IsLineType() && |
112 | 0 | !next_block_part->IsLineType() && next_box.bottom() <= part_box.top() && |
113 | 0 | (text_block || part_box.bottom() <= next_box.top())) { |
114 | 0 | next_part = next_block_part; |
115 | 0 | } |
116 | 0 | } |
117 | 0 | } while (!part_it_.empty() && next_part != nullptr); |
118 | 0 | if (!text_block) { |
119 | 0 | TO_BLOCK *to_block = ColPartition::MakeBlock(bleft, tright, &block_parts, used_parts); |
120 | 0 | if (to_block != nullptr) { |
121 | 0 | TO_BLOCK_IT to_block_it(&to_blocks_); |
122 | 0 | to_block_it.add_to_end(to_block); |
123 | 0 | BLOCK_IT block_it(&completed_blocks_); |
124 | 0 | block_it.add_to_end(to_block->block); |
125 | 0 | } |
126 | 0 | } else { |
127 | | // Further sub-divide text blocks where linespacing changes. |
128 | 0 | ColPartition::LineSpacingBlocks(bleft, tright, resolution, &block_parts, used_parts, |
129 | 0 | &completed_blocks_, &to_blocks_); |
130 | 0 | } |
131 | 0 | } |
132 | 0 | part_it_.set_to_list(&part_set_); |
133 | 0 | latest_part_ = nullptr; |
134 | 0 | ASSERT_HOST(completed_blocks_.length() == to_blocks_.length()); |
135 | 0 | } |
136 | | |
137 | | } // namespace tesseract. |