/src/tesseract/src/ccmain/pageiterator.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /////////////////////////////////////////////////////////////////////// |
2 | | // File: pageiterator.cpp |
3 | | // Description: Iterator for tesseract page structure that avoids using |
4 | | // tesseract internal data structures. |
5 | | // Author: Ray Smith |
6 | | // |
7 | | // (C) Copyright 2010, Google Inc. |
8 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
9 | | // you may not use this file except in compliance with the License. |
10 | | // You may obtain a copy of the License at |
11 | | // http://www.apache.org/licenses/LICENSE-2.0 |
12 | | // Unless required by applicable law or agreed to in writing, software |
13 | | // distributed under the License is distributed on an "AS IS" BASIS, |
14 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
15 | | // See the License for the specific language governing permissions and |
16 | | // limitations under the License. |
17 | | // |
18 | | /////////////////////////////////////////////////////////////////////// |
19 | | |
20 | | #include <allheaders.h> |
21 | | #include <tesseract/pageiterator.h> |
22 | | #include "helpers.h" |
23 | | #include "pageres.h" |
24 | | #include "tesseractclass.h" |
25 | | |
26 | | #include <algorithm> |
27 | | |
28 | | namespace tesseract { |
29 | | |
30 | | PageIterator::PageIterator(PAGE_RES *page_res, Tesseract *tesseract, int scale, |
31 | | int scaled_yres, int rect_left, int rect_top, |
32 | | int rect_width, int rect_height) |
33 | 14.7k | : page_res_(page_res), |
34 | 14.7k | tesseract_(tesseract), |
35 | 14.7k | word_(nullptr), |
36 | 14.7k | word_length_(0), |
37 | 14.7k | blob_index_(0), |
38 | 14.7k | cblob_it_(nullptr), |
39 | 14.7k | include_upper_dots_(false), |
40 | 14.7k | include_lower_dots_(false), |
41 | 14.7k | scale_(scale), |
42 | 14.7k | scaled_yres_(scaled_yres), |
43 | 14.7k | rect_left_(rect_left), |
44 | 14.7k | rect_top_(rect_top), |
45 | 14.7k | rect_width_(rect_width), |
46 | 14.7k | rect_height_(rect_height) { |
47 | 14.7k | it_ = new PAGE_RES_IT(page_res); |
48 | 14.7k | PageIterator::Begin(); |
49 | 14.7k | } |
50 | | |
51 | 3.51M | PageIterator::~PageIterator() { |
52 | 3.51M | delete it_; |
53 | 3.51M | delete cblob_it_; |
54 | 3.51M | } |
55 | | |
56 | | /** |
57 | | * PageIterators may be copied! This makes it possible to iterate over |
58 | | * all the objects at a lower level, while maintaining an iterator to |
59 | | * objects at a higher level. |
60 | | */ |
61 | | PageIterator::PageIterator(const PageIterator &src) |
62 | 3.49M | : page_res_(src.page_res_), |
63 | 3.49M | tesseract_(src.tesseract_), |
64 | 3.49M | word_(nullptr), |
65 | 3.49M | word_length_(src.word_length_), |
66 | 3.49M | blob_index_(src.blob_index_), |
67 | 3.49M | cblob_it_(nullptr), |
68 | 3.49M | include_upper_dots_(src.include_upper_dots_), |
69 | 3.49M | include_lower_dots_(src.include_lower_dots_), |
70 | 3.49M | scale_(src.scale_), |
71 | 3.49M | scaled_yres_(src.scaled_yres_), |
72 | 3.49M | rect_left_(src.rect_left_), |
73 | 3.49M | rect_top_(src.rect_top_), |
74 | 3.49M | rect_width_(src.rect_width_), |
75 | 3.49M | rect_height_(src.rect_height_) { |
76 | 3.49M | it_ = new PAGE_RES_IT(*src.it_); |
77 | 3.49M | BeginWord(src.blob_index_); |
78 | 3.49M | } |
79 | | |
80 | 7.05k | const PageIterator &PageIterator::operator=(const PageIterator &src) { |
81 | 7.05k | page_res_ = src.page_res_; |
82 | 7.05k | tesseract_ = src.tesseract_; |
83 | 7.05k | include_upper_dots_ = src.include_upper_dots_; |
84 | 7.05k | include_lower_dots_ = src.include_lower_dots_; |
85 | 7.05k | scale_ = src.scale_; |
86 | 7.05k | scaled_yres_ = src.scaled_yres_; |
87 | 7.05k | rect_left_ = src.rect_left_; |
88 | 7.05k | rect_top_ = src.rect_top_; |
89 | 7.05k | rect_width_ = src.rect_width_; |
90 | 7.05k | rect_height_ = src.rect_height_; |
91 | 7.05k | delete it_; |
92 | 7.05k | it_ = new PAGE_RES_IT(*src.it_); |
93 | 7.05k | BeginWord(src.blob_index_); |
94 | 7.05k | return *this; |
95 | 7.05k | } |
96 | | |
97 | 789k | bool PageIterator::PositionedAtSameWord(const PAGE_RES_IT *other) const { |
98 | 789k | return (it_ == nullptr && it_ == other) || |
99 | 789k | ((other != nullptr) && (it_ != nullptr) && (*it_ == *other)); |
100 | 789k | } |
101 | | |
102 | | // ============= Moving around within the page ============. |
103 | | |
104 | | /** Resets the iterator to point to the start of the page. */ |
105 | 14.7k | void PageIterator::Begin() { |
106 | 14.7k | it_->restart_page_with_empties(); |
107 | 14.7k | BeginWord(0); |
108 | 14.7k | } |
109 | | |
110 | 567k | void PageIterator::RestartParagraph() { |
111 | 567k | if (it_->block() == nullptr) { |
112 | 0 | return; // At end of the document. |
113 | 0 | } |
114 | 567k | PAGE_RES_IT para(page_res_); |
115 | 567k | PAGE_RES_IT next_para(para); |
116 | 567k | next_para.forward_paragraph(); |
117 | 930k | while (next_para.cmp(*it_) <= 0) { |
118 | 363k | para = next_para; |
119 | 363k | next_para.forward_paragraph(); |
120 | 363k | } |
121 | 567k | *it_ = para; |
122 | 567k | BeginWord(0); |
123 | 567k | } |
124 | | |
125 | 446k | bool PageIterator::IsWithinFirstTextlineOfParagraph() const { |
126 | 446k | PageIterator p_start(*this); |
127 | 446k | p_start.RestartParagraph(); |
128 | 446k | return p_start.it_->row() == it_->row(); |
129 | 446k | } |
130 | | |
131 | 3.69M | void PageIterator::RestartRow() { |
132 | 3.69M | it_->restart_row(); |
133 | 3.69M | BeginWord(0); |
134 | 3.69M | } |
135 | | |
136 | | /** |
137 | | * Moves to the start of the next object at the given level in the |
138 | | * page hierarchy, and returns false if the end of the page was reached. |
139 | | * NOTE (CHANGED!) that ALL PageIteratorLevel level values will visit each |
140 | | * non-text block at least once. |
141 | | * Think of non text blocks as containing a single para, with at least one |
142 | | * line, with a single imaginary word, containing a single symbol. |
143 | | * The bounding boxes mark out any polygonal nature of the block, and |
144 | | * PTIsTextType(BLockType()) is false for non-text blocks. |
145 | | * Calls to Next with different levels may be freely intermixed. |
146 | | * This function iterates words in right-to-left scripts correctly, if |
147 | | * the appropriate language has been loaded into Tesseract. |
148 | | */ |
149 | 4.39M | bool PageIterator::Next(PageIteratorLevel level) { |
150 | 4.39M | if (it_->block() == nullptr) { |
151 | 0 | return false; // Already at the end! |
152 | 0 | } |
153 | 4.39M | if (it_->word() == nullptr) { |
154 | 0 | level = RIL_BLOCK; |
155 | 0 | } |
156 | | |
157 | 4.39M | switch (level) { |
158 | 7.05k | case RIL_BLOCK: |
159 | 7.05k | it_->forward_block(); |
160 | 7.05k | break; |
161 | 10.7k | case RIL_PARA: |
162 | 10.7k | it_->forward_paragraph(); |
163 | 10.7k | break; |
164 | 470k | case RIL_TEXTLINE: |
165 | 549k | for (it_->forward_with_empties(); it_->row() == it_->prev_row(); |
166 | 470k | it_->forward_with_empties()) { |
167 | 78.7k | ; |
168 | 78.7k | } |
169 | 470k | break; |
170 | 3.90M | case RIL_WORD: |
171 | 3.90M | it_->forward_with_empties(); |
172 | 3.90M | break; |
173 | 0 | case RIL_SYMBOL: |
174 | 0 | if (cblob_it_ != nullptr) { |
175 | 0 | cblob_it_->forward(); |
176 | 0 | } |
177 | 0 | ++blob_index_; |
178 | 0 | if (blob_index_ >= word_length_) { |
179 | 0 | it_->forward_with_empties(); |
180 | 0 | } else { |
181 | 0 | return true; |
182 | 0 | } |
183 | 0 | break; |
184 | 4.39M | } |
185 | 4.39M | BeginWord(0); |
186 | 4.39M | return it_->block() != nullptr; |
187 | 4.39M | } |
188 | | |
189 | | /** |
190 | | * Returns true if the iterator is at the start of an object at the given |
191 | | * level. Possible uses include determining if a call to Next(RIL_WORD) |
192 | | * moved to the start of a RIL_PARA. |
193 | | */ |
194 | 3.33M | bool PageIterator::IsAtBeginningOf(PageIteratorLevel level) const { |
195 | 3.33M | if (it_->block() == nullptr) { |
196 | 0 | return false; // Already at the end! |
197 | 0 | } |
198 | 3.33M | if (it_->word() == nullptr) { |
199 | 0 | return true; // In an image block. |
200 | 0 | } |
201 | 3.33M | switch (level) { |
202 | 0 | case RIL_BLOCK: |
203 | 0 | return blob_index_ == 0 && it_->block() != it_->prev_block(); |
204 | 522k | case RIL_PARA: |
205 | 522k | return blob_index_ == 0 && |
206 | 522k | (it_->block() != it_->prev_block() || |
207 | 522k | it_->row()->row->para() != it_->prev_row()->row->para()); |
208 | 2.80M | case RIL_TEXTLINE: |
209 | 2.80M | return blob_index_ == 0 && it_->row() != it_->prev_row(); |
210 | 0 | case RIL_WORD: |
211 | 0 | return blob_index_ == 0; |
212 | 0 | case RIL_SYMBOL: |
213 | 0 | return true; |
214 | 3.33M | } |
215 | 0 | return false; |
216 | 3.33M | } |
217 | | |
218 | | /** |
219 | | * Returns whether the iterator is positioned at the last element in a |
220 | | * given level. (e.g. the last word in a line, the last line in a block) |
221 | | */ |
222 | | bool PageIterator::IsAtFinalElement(PageIteratorLevel level, |
223 | 0 | PageIteratorLevel element) const { |
224 | 0 | if (Empty(element)) { |
225 | 0 | return true; // Already at the end! |
226 | 0 | } |
227 | | // The result is true if we step forward by element and find we are |
228 | | // at the end of the page or at beginning of *all* levels in: |
229 | | // [level, element). |
230 | | // When there is more than one level difference between element and level, |
231 | | // we could for instance move forward one symbol and still be at the first |
232 | | // word on a line, so we also have to be at the first symbol in a word. |
233 | 0 | PageIterator next(*this); |
234 | 0 | next.Next(element); |
235 | 0 | if (next.Empty(element)) { |
236 | 0 | return true; // Reached the end of the page. |
237 | 0 | } |
238 | 0 | while (element > level) { |
239 | 0 | element = static_cast<PageIteratorLevel>(element - 1); |
240 | 0 | if (!next.IsAtBeginningOf(element)) { |
241 | 0 | return false; |
242 | 0 | } |
243 | 0 | } |
244 | 0 | return true; |
245 | 0 | } |
246 | | |
247 | | /** |
248 | | * Returns whether this iterator is positioned |
249 | | * before other: -1 |
250 | | * equal to other: 0 |
251 | | * after other: 1 |
252 | | */ |
253 | 0 | int PageIterator::Cmp(const PageIterator &other) const { |
254 | 0 | int word_cmp = it_->cmp(*other.it_); |
255 | 0 | if (word_cmp != 0) { |
256 | 0 | return word_cmp; |
257 | 0 | } |
258 | 0 | if (blob_index_ < other.blob_index_) { |
259 | 0 | return -1; |
260 | 0 | } |
261 | 0 | if (blob_index_ == other.blob_index_) { |
262 | 0 | return 0; |
263 | 0 | } |
264 | 0 | return 1; |
265 | 0 | } |
266 | | |
267 | | // ============= Accessing data ==============. |
268 | | // Coordinate system: |
269 | | // Integer coordinates are at the cracks between the pixels. |
270 | | // The top-left corner of the top-left pixel in the image is at (0,0). |
271 | | // The bottom-right corner of the bottom-right pixel in the image is at |
272 | | // (width, height). |
273 | | // Every bounding box goes from the top-left of the top-left contained |
274 | | // pixel to the bottom-right of the bottom-right contained pixel, so |
275 | | // the bounding box of the single top-left pixel in the image is: |
276 | | // (0,0)->(1,1). |
277 | | // If an image rectangle has been set in the API, then returned coordinates |
278 | | // relate to the original (full) image, rather than the rectangle. |
279 | | |
280 | | /** |
281 | | * Returns the bounding rectangle of the current object at the given level in |
282 | | * the coordinates of the working image that is pix_binary(). |
283 | | * See comment on coordinate system above. |
284 | | * Returns false if there is no such object at the current position. |
285 | | */ |
286 | | bool PageIterator::BoundingBoxInternal(PageIteratorLevel level, int *left, |
287 | | int *top, int *right, |
288 | 0 | int *bottom) const { |
289 | 0 | if (Empty(level)) { |
290 | 0 | return false; |
291 | 0 | } |
292 | 0 | TBOX box; |
293 | 0 | PARA *para = nullptr; |
294 | 0 | switch (level) { |
295 | 0 | case RIL_BLOCK: |
296 | 0 | box = it_->block()->block->restricted_bounding_box(include_upper_dots_, |
297 | 0 | include_lower_dots_); |
298 | 0 | break; |
299 | 0 | case RIL_PARA: |
300 | 0 | para = it_->row()->row->para(); |
301 | | // Fall through. |
302 | 0 | case RIL_TEXTLINE: |
303 | 0 | box = it_->row()->row->restricted_bounding_box(include_upper_dots_, |
304 | 0 | include_lower_dots_); |
305 | 0 | break; |
306 | 0 | case RIL_WORD: |
307 | 0 | box = it_->word()->word->restricted_bounding_box(include_upper_dots_, |
308 | 0 | include_lower_dots_); |
309 | 0 | break; |
310 | 0 | case RIL_SYMBOL: |
311 | 0 | if (cblob_it_ == nullptr) { |
312 | 0 | box = it_->word()->box_word->BlobBox(blob_index_); |
313 | 0 | } else { |
314 | 0 | box = cblob_it_->data()->bounding_box(); |
315 | 0 | } |
316 | 0 | } |
317 | 0 | if (level == RIL_PARA) { |
318 | 0 | PageIterator other = *this; |
319 | 0 | other.Begin(); |
320 | 0 | do { |
321 | 0 | if (other.it_->block() && |
322 | 0 | other.it_->block()->block == it_->block()->block && |
323 | 0 | other.it_->row() && other.it_->row()->row && |
324 | 0 | other.it_->row()->row->para() == para) { |
325 | 0 | box = box.bounding_union(other.it_->row()->row->bounding_box()); |
326 | 0 | } |
327 | 0 | } while (other.Next(RIL_TEXTLINE)); |
328 | 0 | } |
329 | 0 | if (level != RIL_SYMBOL || cblob_it_ != nullptr) { |
330 | 0 | box.rotate(it_->block()->block->re_rotation()); |
331 | 0 | } |
332 | | // Now we have a box in tesseract coordinates relative to the image rectangle, |
333 | | // we have to convert the coords to a top-down system. |
334 | 0 | const int pix_height = pixGetHeight(tesseract_->pix_binary()); |
335 | 0 | const int pix_width = pixGetWidth(tesseract_->pix_binary()); |
336 | 0 | *left = ClipToRange(static_cast<int>(box.left()), 0, pix_width); |
337 | 0 | *top = ClipToRange(pix_height - box.top(), 0, pix_height); |
338 | 0 | *right = ClipToRange(static_cast<int>(box.right()), *left, pix_width); |
339 | 0 | *bottom = ClipToRange(pix_height - box.bottom(), *top, pix_height); |
340 | 0 | return true; |
341 | 0 | } |
342 | | |
343 | | /** |
344 | | * Returns the bounding rectangle of the current object at the given level in |
345 | | * coordinates of the original image. |
346 | | * See comment on coordinate system above. |
347 | | * Returns false if there is no such object at the current position. |
348 | | */ |
349 | | bool PageIterator::BoundingBox(PageIteratorLevel level, int *left, int *top, |
350 | 0 | int *right, int *bottom) const { |
351 | 0 | return BoundingBox(level, 0, left, top, right, bottom); |
352 | 0 | } |
353 | | |
354 | | bool PageIterator::BoundingBox(PageIteratorLevel level, const int padding, |
355 | | int *left, int *top, int *right, |
356 | 0 | int *bottom) const { |
357 | 0 | if (!BoundingBoxInternal(level, left, top, right, bottom)) { |
358 | 0 | return false; |
359 | 0 | } |
360 | | // Convert to the coordinate system of the original image. |
361 | 0 | *left = ClipToRange(*left / scale_ + rect_left_ - padding, rect_left_, |
362 | 0 | rect_left_ + rect_width_); |
363 | 0 | *top = ClipToRange(*top / scale_ + rect_top_ - padding, rect_top_, |
364 | 0 | rect_top_ + rect_height_); |
365 | 0 | *right = ClipToRange((*right + scale_ - 1) / scale_ + rect_left_ + padding, |
366 | 0 | *left, rect_left_ + rect_width_); |
367 | 0 | *bottom = ClipToRange((*bottom + scale_ - 1) / scale_ + rect_top_ + padding, |
368 | 0 | *top, rect_top_ + rect_height_); |
369 | 0 | return true; |
370 | 0 | } |
371 | | |
372 | | /** Return that there is no such object at a given level. */ |
373 | 4.28M | bool PageIterator::Empty(PageIteratorLevel level) const { |
374 | 4.28M | if (it_->block() == nullptr) { |
375 | 20.1k | return true; // Already at the end! |
376 | 20.1k | } |
377 | 4.26M | if (it_->word() == nullptr && level != RIL_BLOCK) { |
378 | 0 | return true; // image block |
379 | 0 | } |
380 | 4.26M | if (level == RIL_SYMBOL && blob_index_ >= word_length_) { |
381 | 0 | return true; // Zero length word, or already at the end of it. |
382 | 0 | } |
383 | 4.26M | return false; |
384 | 4.26M | } |
385 | | |
386 | | /** Returns the type of the current block. |
387 | | * See tesseract/publictypes.h for PolyBlockType. */ |
388 | 10.7k | PolyBlockType PageIterator::BlockType() const { |
389 | 10.7k | if (it_->block() == nullptr || it_->block()->block == nullptr) { |
390 | 0 | return PT_UNKNOWN; // Already at the end! |
391 | 0 | } |
392 | 10.7k | if (it_->block()->block->pdblk.poly_block() == nullptr) { |
393 | 10.7k | return PT_FLOWING_TEXT; // No layout analysis used - assume text. |
394 | 10.7k | } |
395 | 0 | return it_->block()->block->pdblk.poly_block()->isA(); |
396 | 10.7k | } |
397 | | |
398 | | /** Returns the polygon outline of the current block. The returned Pta must |
399 | | * be ptaDestroy-ed after use. */ |
400 | 0 | Pta *PageIterator::BlockPolygon() const { |
401 | 0 | if (it_->block() == nullptr || it_->block()->block == nullptr) { |
402 | 0 | return nullptr; // Already at the end! |
403 | 0 | } |
404 | 0 | if (it_->block()->block->pdblk.poly_block() == nullptr) { |
405 | 0 | return nullptr; // No layout analysis used - no polygon. |
406 | 0 | } |
407 | | // Copy polygon, so we can unrotate it to image coordinates. |
408 | 0 | POLY_BLOCK *internal_poly = it_->block()->block->pdblk.poly_block(); |
409 | 0 | ICOORDELT_LIST vertices; |
410 | 0 | vertices.deep_copy(internal_poly->points(), ICOORDELT::deep_copy); |
411 | 0 | POLY_BLOCK poly(&vertices, internal_poly->isA()); |
412 | 0 | poly.rotate(it_->block()->block->re_rotation()); |
413 | 0 | ICOORDELT_IT it(poly.points()); |
414 | 0 | Pta *pta = ptaCreate(it.length()); |
415 | 0 | int num_pts = 0; |
416 | 0 | for (it.mark_cycle_pt(); !it.cycled_list(); it.forward(), ++num_pts) { |
417 | 0 | ICOORD *pt = it.data(); |
418 | | // Convert to top-down coords within the input image. |
419 | 0 | int x = static_cast<float>(pt->x()) / scale_ + rect_left_; |
420 | 0 | int y = rect_top_ + rect_height_ - static_cast<float>(pt->y()) / scale_; |
421 | 0 | x = ClipToRange(x, rect_left_, rect_left_ + rect_width_); |
422 | 0 | y = ClipToRange(y, rect_top_, rect_top_ + rect_height_); |
423 | 0 | ptaAddPt(pta, x, y); |
424 | 0 | } |
425 | 0 | return pta; |
426 | 0 | } |
427 | | |
428 | | /** |
429 | | * Returns a binary image of the current object at the given level. |
430 | | * The position and size match the return from BoundingBoxInternal, and so this |
431 | | * could be upscaled with respect to the original input image. |
432 | | * Use pixDestroy to delete the image after use. |
433 | | * The following methods are used to generate the images: |
434 | | * RIL_BLOCK: mask the page image with the block polygon. |
435 | | * RIL_TEXTLINE: Clip the rectangle of the line box from the page image. |
436 | | * TODO(rays) fix this to generate and use a line polygon. |
437 | | * RIL_WORD: Clip the rectangle of the word box from the page image. |
438 | | * RIL_SYMBOL: Render the symbol outline to an image for cblobs (prior |
439 | | * to recognition) or the bounding box otherwise. |
440 | | * A reconstruction of the original image (using xor to check for double |
441 | | * representation) should be reasonably accurate, |
442 | | * apart from removed noise, at the block level. Below the block level, the |
443 | | * reconstruction will be missing images and line separators. |
444 | | * At the symbol level, kerned characters will be invade the bounding box |
445 | | * if rendered after recognition, making an xor reconstruction inaccurate, but |
446 | | * an or construction better. Before recognition, symbol-level reconstruction |
447 | | * should be good, even with xor, since the images come from the connected |
448 | | * components. |
449 | | */ |
450 | 0 | Pix *PageIterator::GetBinaryImage(PageIteratorLevel level) const { |
451 | 0 | int left, top, right, bottom; |
452 | 0 | if (!BoundingBoxInternal(level, &left, &top, &right, &bottom)) { |
453 | 0 | return nullptr; |
454 | 0 | } |
455 | 0 | if (level == RIL_SYMBOL && cblob_it_ != nullptr && |
456 | 0 | cblob_it_->data()->area() != 0) { |
457 | 0 | return cblob_it_->data()->render(); |
458 | 0 | } |
459 | 0 | Box *box = boxCreate(left, top, right - left, bottom - top); |
460 | 0 | Image pix = pixClipRectangle(tesseract_->pix_binary(), box, nullptr); |
461 | 0 | boxDestroy(&box); |
462 | 0 | if (level == RIL_BLOCK || level == RIL_PARA) { |
463 | | // Clip to the block polygon as well. |
464 | 0 | TBOX mask_box; |
465 | 0 | Image mask = it_->block()->block->render_mask(&mask_box); |
466 | 0 | int mask_x = left - mask_box.left(); |
467 | 0 | int mask_y = top - (tesseract_->ImageHeight() - mask_box.top()); |
468 | | // AND the mask and pix, putting the result in pix. |
469 | 0 | pixRasterop(pix, std::max(0, -mask_x), std::max(0, -mask_y), |
470 | 0 | pixGetWidth(pix), pixGetHeight(pix), PIX_SRC & PIX_DST, mask, |
471 | 0 | std::max(0, mask_x), std::max(0, mask_y)); |
472 | 0 | mask.destroy(); |
473 | 0 | } |
474 | 0 | return pix; |
475 | 0 | } |
476 | | |
477 | | /** |
478 | | * Returns an image of the current object at the given level in greyscale |
479 | | * if available in the input. To guarantee a binary image use BinaryImage. |
480 | | * NOTE that in order to give the best possible image, the bounds are |
481 | | * expanded slightly over the binary connected component, by the supplied |
482 | | * padding, so the top-left position of the returned image is returned |
483 | | * in (left,top). These will most likely not match the coordinates |
484 | | * returned by BoundingBox. |
485 | | * If you do not supply an original image, you will get a binary one. |
486 | | * Use pixDestroy to delete the image after use. |
487 | | */ |
488 | | Pix *PageIterator::GetImage(PageIteratorLevel level, int padding, |
489 | 0 | Pix *original_img, int *left, int *top) const { |
490 | 0 | int right, bottom; |
491 | 0 | if (!BoundingBox(level, left, top, &right, &bottom)) { |
492 | 0 | return nullptr; |
493 | 0 | } |
494 | 0 | if (original_img == nullptr) { |
495 | 0 | return GetBinaryImage(level); |
496 | 0 | } |
497 | | |
498 | | // Expand the box. |
499 | 0 | *left = std::max(*left - padding, 0); |
500 | 0 | *top = std::max(*top - padding, 0); |
501 | 0 | right = std::min(right + padding, rect_width_); |
502 | 0 | bottom = std::min(bottom + padding, rect_height_); |
503 | 0 | Box *box = boxCreate(*left, *top, right - *left, bottom - *top); |
504 | 0 | Image grey_pix = pixClipRectangle(original_img, box, nullptr); |
505 | 0 | boxDestroy(&box); |
506 | 0 | if (level == RIL_BLOCK || level == RIL_PARA) { |
507 | | // Clip to the block polygon as well. |
508 | 0 | TBOX mask_box; |
509 | 0 | Image mask = it_->block()->block->render_mask(&mask_box); |
510 | | // Copy the mask registered correctly into an image the size of grey_pix. |
511 | 0 | int mask_x = *left - mask_box.left(); |
512 | 0 | int mask_y = *top - (pixGetHeight(original_img) - mask_box.top()); |
513 | 0 | int width = pixGetWidth(grey_pix); |
514 | 0 | int height = pixGetHeight(grey_pix); |
515 | 0 | Image resized_mask = pixCreate(width, height, 1); |
516 | 0 | pixRasterop(resized_mask, std::max(0, -mask_x), std::max(0, -mask_y), width, |
517 | 0 | height, PIX_SRC, mask, std::max(0, mask_x), |
518 | 0 | std::max(0, mask_y)); |
519 | 0 | mask.destroy(); |
520 | 0 | pixDilateBrick(resized_mask, resized_mask, 2 * padding + 1, |
521 | 0 | 2 * padding + 1); |
522 | 0 | pixInvert(resized_mask, resized_mask); |
523 | 0 | pixSetMasked(grey_pix, resized_mask, UINT32_MAX); |
524 | 0 | resized_mask.destroy(); |
525 | 0 | } |
526 | 0 | return grey_pix; |
527 | 0 | } |
528 | | |
529 | | /** |
530 | | * Returns the baseline of the current object at the given level. |
531 | | * The baseline is the line that passes through (x1, y1) and (x2, y2). |
532 | | * WARNING: with vertical text, baselines may be vertical! |
533 | | */ |
534 | | bool PageIterator::Baseline(PageIteratorLevel level, int *x1, int *y1, int *x2, |
535 | 0 | int *y2) const { |
536 | 0 | if (it_->word() == nullptr) { |
537 | 0 | return false; // Already at the end! |
538 | 0 | } |
539 | 0 | ROW *row = it_->row()->row; |
540 | 0 | WERD *word = it_->word()->word; |
541 | 0 | TBOX box = (level == RIL_WORD || level == RIL_SYMBOL) ? word->bounding_box() |
542 | 0 | : row->bounding_box(); |
543 | 0 | int left = box.left(); |
544 | 0 | ICOORD startpt(left, static_cast<int16_t>(row->base_line(left) + 0.5)); |
545 | 0 | int right = box.right(); |
546 | 0 | ICOORD endpt(right, static_cast<int16_t>(row->base_line(right) + 0.5)); |
547 | | // Rotate to image coordinates and convert to global image coords. |
548 | 0 | startpt.rotate(it_->block()->block->re_rotation()); |
549 | 0 | endpt.rotate(it_->block()->block->re_rotation()); |
550 | 0 | *x1 = startpt.x() / scale_ + rect_left_; |
551 | 0 | *y1 = (rect_height_ - startpt.y()) / scale_ + rect_top_; |
552 | 0 | *x2 = endpt.x() / scale_ + rect_left_; |
553 | 0 | *y2 = (rect_height_ - endpt.y()) / scale_ + rect_top_; |
554 | 0 | return true; |
555 | 0 | } |
556 | | |
557 | | void PageIterator::RowAttributes(float *row_height, float *descenders, |
558 | 0 | float *ascenders) const { |
559 | 0 | *row_height = it_->row()->row->x_height() + it_->row()->row->ascenders() - |
560 | 0 | it_->row()->row->descenders(); |
561 | 0 | *descenders = it_->row()->row->descenders(); |
562 | 0 | *ascenders = it_->row()->row->ascenders(); |
563 | 0 | } |
564 | | |
565 | | void PageIterator::Orientation(tesseract::Orientation *orientation, |
566 | | tesseract::WritingDirection *writing_direction, |
567 | | tesseract::TextlineOrder *textline_order, |
568 | 0 | float *deskew_angle) const { |
569 | 0 | auto *block_res = it_->block(); |
570 | 0 | if (block_res == nullptr) { |
571 | | // Nothing can be done, so return default values. |
572 | 0 | *orientation = ORIENTATION_PAGE_UP; |
573 | 0 | *writing_direction = WRITING_DIRECTION_LEFT_TO_RIGHT; |
574 | 0 | *textline_order = TEXTLINE_ORDER_TOP_TO_BOTTOM; |
575 | 0 | return; |
576 | 0 | } |
577 | 0 | auto *block = block_res->block; |
578 | | |
579 | | // Orientation |
580 | 0 | FCOORD up_in_image(0.0, 1.0); |
581 | 0 | up_in_image.unrotate(block->classify_rotation()); |
582 | 0 | up_in_image.rotate(block->re_rotation()); |
583 | |
|
584 | 0 | if (up_in_image.x() == 0.0F) { |
585 | 0 | if (up_in_image.y() > 0.0F) { |
586 | 0 | *orientation = ORIENTATION_PAGE_UP; |
587 | 0 | } else { |
588 | 0 | *orientation = ORIENTATION_PAGE_DOWN; |
589 | 0 | } |
590 | 0 | } else if (up_in_image.x() > 0.0F) { |
591 | 0 | *orientation = ORIENTATION_PAGE_RIGHT; |
592 | 0 | } else { |
593 | 0 | *orientation = ORIENTATION_PAGE_LEFT; |
594 | 0 | } |
595 | | |
596 | | // Writing direction |
597 | 0 | bool is_vertical_text = (block->classify_rotation().x() == 0.0); |
598 | 0 | bool right_to_left = block->right_to_left(); |
599 | 0 | *writing_direction = is_vertical_text |
600 | 0 | ? WRITING_DIRECTION_TOP_TO_BOTTOM |
601 | 0 | : (right_to_left ? WRITING_DIRECTION_RIGHT_TO_LEFT |
602 | 0 | : WRITING_DIRECTION_LEFT_TO_RIGHT); |
603 | | |
604 | | // Textline Order |
605 | 0 | const bool is_mongolian = false; // TODO(eger): fix me |
606 | 0 | *textline_order = is_vertical_text |
607 | 0 | ? (is_mongolian ? TEXTLINE_ORDER_LEFT_TO_RIGHT |
608 | 0 | : TEXTLINE_ORDER_RIGHT_TO_LEFT) |
609 | 0 | : TEXTLINE_ORDER_TOP_TO_BOTTOM; |
610 | | |
611 | | // Deskew angle |
612 | 0 | FCOORD skew = block->skew(); // true horizontal for textlines |
613 | 0 | *deskew_angle = -skew.angle(); |
614 | 0 | } |
615 | | |
616 | | void PageIterator::ParagraphInfo(tesseract::ParagraphJustification *just, |
617 | | bool *is_list_item, bool *is_crown, |
618 | 0 | int *first_line_indent) const { |
619 | 0 | *just = tesseract::JUSTIFICATION_UNKNOWN; |
620 | 0 | if (!it_->row() || !it_->row()->row || !it_->row()->row->para() || |
621 | 0 | !it_->row()->row->para()->model) { |
622 | 0 | return; |
623 | 0 | } |
624 | | |
625 | 0 | PARA *para = it_->row()->row->para(); |
626 | 0 | *is_list_item = para->is_list_item; |
627 | 0 | *is_crown = para->is_very_first_or_continuation; |
628 | 0 | *first_line_indent = para->model->first_indent() - para->model->body_indent(); |
629 | 0 | *just = para->model->justification(); |
630 | 0 | } |
631 | | |
632 | | /** |
633 | | * Sets up the internal data for iterating the blobs of a new word, then |
634 | | * moves the iterator to the given offset. |
635 | | */ |
636 | 12.1M | void PageIterator::BeginWord(int offset) { |
637 | 12.1M | WERD_RES *word_res = it_->word(); |
638 | 12.1M | if (word_res == nullptr) { |
639 | | // This is a non-text block, so there is no word. |
640 | 212k | word_length_ = 0; |
641 | 212k | blob_index_ = 0; |
642 | 212k | word_ = nullptr; |
643 | 212k | return; |
644 | 212k | } |
645 | 11.9M | if (word_res->best_choice != nullptr) { |
646 | | // Recognition has been done, so we are using the box_word, which |
647 | | // is already baseline denormalized. |
648 | 11.9M | word_length_ = word_res->best_choice->length(); |
649 | 11.9M | if (word_res->box_word != nullptr) { |
650 | 11.9M | if (word_res->box_word->length() != static_cast<unsigned>(word_length_)) { |
651 | 0 | tprintf("Corrupted word! best_choice[len=%d] = %s, box_word[len=%d]: ", |
652 | 0 | word_length_, word_res->best_choice->unichar_string().c_str(), |
653 | 0 | word_res->box_word->length()); |
654 | 0 | word_res->box_word->bounding_box().print(); |
655 | 0 | } |
656 | 11.9M | ASSERT_HOST(word_res->box_word->length() == |
657 | 11.9M | static_cast<unsigned>(word_length_)); |
658 | 11.9M | } |
659 | 11.9M | word_ = nullptr; |
660 | | // We will be iterating the box_word. |
661 | 11.9M | delete cblob_it_; |
662 | 11.9M | cblob_it_ = nullptr; |
663 | 11.9M | } else { |
664 | | // No recognition yet, so a "symbol" is a cblob. |
665 | 0 | word_ = word_res->word; |
666 | 0 | ASSERT_HOST(word_->cblob_list() != nullptr); |
667 | 0 | word_length_ = word_->cblob_list()->length(); |
668 | 0 | if (cblob_it_ == nullptr) { |
669 | 0 | cblob_it_ = new C_BLOB_IT; |
670 | 0 | } |
671 | 0 | cblob_it_->set_to_list(word_->cblob_list()); |
672 | 0 | } |
673 | 11.9M | for (blob_index_ = 0; blob_index_ < offset; ++blob_index_) { |
674 | 0 | if (cblob_it_ != nullptr) { |
675 | 0 | cblob_it_->forward(); |
676 | 0 | } |
677 | 0 | } |
678 | 11.9M | } |
679 | | |
680 | 0 | bool PageIterator::SetWordBlamerBundle(BlamerBundle *blamer_bundle) { |
681 | 0 | if (it_->word() != nullptr) { |
682 | 0 | it_->word()->blamer_bundle = blamer_bundle; |
683 | 0 | return true; |
684 | 0 | } else { |
685 | 0 | return false; |
686 | 0 | } |
687 | 0 | } |
688 | | |
689 | | } // namespace tesseract. |