/src/tesseract/src/textord/ccnontextdetect.cpp
Line | Count | Source |
1 | | /////////////////////////////////////////////////////////////////////// |
2 | | // File: ccnontextdetect.cpp |
3 | | // Description: Connected-Component-based photo (non-text) detection. |
4 | | // Author: rays@google.com (Ray Smith) |
5 | | // |
6 | | // Copyright 2011 Google Inc. All Rights Reserved. |
7 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
8 | | // you may not use this file except in compliance with the License. |
9 | | // You may obtain a copy of the License at |
10 | | // http://www.apache.org/licenses/LICENSE-2.0 |
11 | | // Unless required by applicable law or agreed to in writing, software |
12 | | // distributed under the License is distributed on an "AS IS" BASIS, |
13 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | // See the License for the specific language governing permissions and |
15 | | // limitations under the License. |
16 | | // |
17 | | /////////////////////////////////////////////////////////////////////// |
18 | | |
19 | | #ifdef HAVE_CONFIG_H |
20 | | # include "config_auto.h" |
21 | | #endif |
22 | | |
23 | | #include "ccnontextdetect.h" |
24 | | #include "helpers.h" // for IntCastRounded |
25 | | #include "imagefind.h" |
26 | | #include "strokewidth.h" |
27 | | |
28 | | namespace tesseract { |
29 | | |
30 | | // Max number of neighbour small objects per squared gridsize before a grid |
31 | | // cell becomes image. |
32 | | const double kMaxSmallNeighboursPerPix = 1.0 / 32; |
33 | | // Max number of small blobs a large blob may overlap before it is rejected |
34 | | // and determined to be image. |
35 | | const int kMaxLargeOverlapsWithSmall = 3; |
36 | | // Max number of small blobs a medium blob may overlap before it is rejected |
37 | | // and determined to be image. Larger than for large blobs as medium blobs |
38 | | // may be complex Chinese characters. Very large Chinese characters are going |
39 | | // to overlap more medium blobs than small. |
40 | | const int kMaxMediumOverlapsWithSmall = 12; |
41 | | // Max number of normal blobs a large blob may overlap before it is rejected |
42 | | // and determined to be image. This is set higher to allow for drop caps, which |
43 | | // may overlap a lot of good text blobs. |
44 | | const int kMaxLargeOverlapsWithMedium = 12; |
45 | | // Multiplier of original noise_count used to test for the case of spreading |
46 | | // noise beyond where it should really be. |
47 | | const int kOriginalNoiseMultiple = 8; |
48 | | // Pixel padding for noise blobs when rendering on the image |
49 | | // mask to encourage them to join together. Make it too big and images |
50 | | // will fatten out too much and have to be clipped to text. |
51 | | const int kNoisePadding = 4; |
52 | | // Fraction of max_noise_count_ to be added to the noise count if there is |
53 | | // photo mask in the background. |
54 | | const double kPhotoOffsetFraction = 0.375; |
55 | | // Min ratio of perimeter^2/16area for a "good" blob in estimating noise |
56 | | // density. Good blobs are supposed to be highly likely real text. |
57 | | // We consider a square to have unit ratio, where A=(p/4)^2, hence the factor |
58 | | // of 16. Digital circles are weird and have a minimum ratio of pi/64, not |
59 | | // the 1/(4pi) that you would expect. |
60 | | const double kMinGoodTextPARatio = 1.5; |
61 | | |
62 | | CCNonTextDetect::CCNonTextDetect(int gridsize, const ICOORD &bleft, const ICOORD &tright) |
63 | 0 | : BlobGrid(gridsize, bleft, tright) |
64 | 0 | , max_noise_count_(static_cast<int>(kMaxSmallNeighboursPerPix * gridsize * gridsize)) |
65 | 0 | , noise_density_(nullptr) { |
66 | | // TODO(rays) break max_noise_count_ out into an area-proportional |
67 | | // value, as now plus an additive constant for the number of text blobs |
68 | | // in the 3x3 neighbourhood - maybe 9. |
69 | 0 | } |
70 | | |
71 | 0 | CCNonTextDetect::~CCNonTextDetect() { |
72 | 0 | delete noise_density_; |
73 | 0 | } |
74 | | |
75 | | // Creates and returns a Pix with the same resolution as the original |
76 | | // in which 1 (black) pixels represent likely non text (photo, line drawing) |
77 | | // areas of the page, deleting from the blob_block the blobs that were |
78 | | // determined to be non-text. |
79 | | // The photo_map is used to bias the decision towards non-text, rather than |
80 | | // supplying definite decision. |
81 | | // The blob_block is the usual result of connected component analysis, |
82 | | // holding the detected blobs. |
83 | | // The returned Pix should be PixDestroyed after use. |
84 | 0 | Image CCNonTextDetect::ComputeNonTextMask(bool debug, Image photo_map, TO_BLOCK *blob_block) { |
85 | | // Insert the smallest blobs into the grid. |
86 | 0 | InsertBlobList(&blob_block->small_blobs); |
87 | 0 | InsertBlobList(&blob_block->noise_blobs); |
88 | | // Add the medium blobs that don't have a good strokewidth neighbour. |
89 | | // Those that do go into good_grid as an antidote to spreading beyond the |
90 | | // real reaches of a noise region. |
91 | 0 | BlobGrid good_grid(gridsize(), bleft(), tright()); |
92 | 0 | BLOBNBOX_IT blob_it(&blob_block->blobs); |
93 | 0 | for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { |
94 | 0 | BLOBNBOX *blob = blob_it.data(); |
95 | 0 | double perimeter_area_ratio = blob->cblob()->perimeter() / 4.0; |
96 | 0 | perimeter_area_ratio *= perimeter_area_ratio / blob->enclosed_area(); |
97 | 0 | if (blob->GoodTextBlob() == 0 || perimeter_area_ratio < kMinGoodTextPARatio) { |
98 | 0 | InsertBBox(true, true, blob); |
99 | 0 | } else { |
100 | 0 | good_grid.InsertBBox(true, true, blob); |
101 | 0 | } |
102 | 0 | } |
103 | 0 | noise_density_ = ComputeNoiseDensity(debug, photo_map, &good_grid); |
104 | 0 | good_grid.Clear(); // Not needed any more. |
105 | 0 | Image pix = noise_density_->ThresholdToPix(max_noise_count_); |
106 | 0 | if (debug) { |
107 | 0 | pixWrite("junknoisemask.png", pix, IFF_PNG); |
108 | 0 | } |
109 | 0 | ScrollView *win = nullptr; |
110 | | #ifndef GRAPHICS_DISABLED |
111 | | if (debug) { |
112 | | win = MakeWindow(0, 400, "Photo Mask Blobs"); |
113 | | } |
114 | | #endif // !GRAPHICS_DISABLED |
115 | | // Large and medium blobs are not text if they overlap with "a lot" of small |
116 | | // blobs. |
117 | 0 | MarkAndDeleteNonTextBlobs(&blob_block->large_blobs, kMaxLargeOverlapsWithSmall, win, |
118 | 0 | ScrollView::DARK_GREEN, pix); |
119 | 0 | MarkAndDeleteNonTextBlobs(&blob_block->blobs, kMaxMediumOverlapsWithSmall, win, ScrollView::WHITE, |
120 | 0 | pix); |
121 | | // Clear the grid of small blobs and insert the medium blobs. |
122 | 0 | Clear(); |
123 | 0 | InsertBlobList(&blob_block->blobs); |
124 | 0 | MarkAndDeleteNonTextBlobs(&blob_block->large_blobs, kMaxLargeOverlapsWithMedium, win, |
125 | 0 | ScrollView::DARK_GREEN, pix); |
126 | | // Clear again before we start deleting the blobs in the grid. |
127 | 0 | Clear(); |
128 | 0 | MarkAndDeleteNonTextBlobs(&blob_block->noise_blobs, -1, win, ScrollView::CORAL, pix); |
129 | 0 | MarkAndDeleteNonTextBlobs(&blob_block->small_blobs, -1, win, ScrollView::GOLDENROD, pix); |
130 | 0 | MarkAndDeleteNonTextBlobs(&blob_block->blobs, -1, win, ScrollView::WHITE, pix); |
131 | 0 | if (debug) { |
132 | | #ifndef GRAPHICS_DISABLED |
133 | | win->Update(); |
134 | | #endif // !GRAPHICS_DISABLED |
135 | 0 | pixWrite("junkccphotomask.png", pix, IFF_PNG); |
136 | | #ifndef GRAPHICS_DISABLED |
137 | | win->AwaitEvent(SVET_DESTROY); |
138 | | delete win; |
139 | | #endif // !GRAPHICS_DISABLED |
140 | 0 | } |
141 | 0 | return pix; |
142 | 0 | } |
143 | | |
144 | | // Computes and returns the noise_density IntGrid, at the same gridsize as |
145 | | // this by summing the number of small elements in a 3x3 neighbourhood of |
146 | | // each grid cell. good_grid is filled with blobs that are considered most |
147 | | // likely good text, and this is filled with small and medium blobs that are |
148 | | // more likely non-text. |
149 | | // The photo_map is used to bias the decision towards non-text, rather than |
150 | | // supplying definite decision. |
151 | 0 | IntGrid *CCNonTextDetect::ComputeNoiseDensity(bool debug, Image photo_map, BlobGrid *good_grid) { |
152 | 0 | IntGrid *noise_counts = CountCellElements(); |
153 | 0 | IntGrid *noise_density = noise_counts->NeighbourhoodSum(); |
154 | 0 | IntGrid *good_counts = good_grid->CountCellElements(); |
155 | | // Now increase noise density in photo areas, to bias the decision and |
156 | | // minimize hallucinated text on image, but trim the noise_density where |
157 | | // there are good blobs and the original count is low in non-photo areas, |
158 | | // indicating that most of the result came from neighbouring cells. |
159 | 0 | int height = pixGetHeight(photo_map); |
160 | 0 | int photo_offset = IntCastRounded(max_noise_count_ * kPhotoOffsetFraction); |
161 | 0 | for (int y = 0; y < gridheight(); ++y) { |
162 | 0 | for (int x = 0; x < gridwidth(); ++x) { |
163 | 0 | int noise = noise_density->GridCellValue(x, y); |
164 | 0 | if (max_noise_count_ < noise + photo_offset && noise <= max_noise_count_) { |
165 | | // Test for photo. |
166 | 0 | int left = x * gridsize(); |
167 | 0 | int right = left + gridsize(); |
168 | 0 | int bottom = height - y * gridsize(); |
169 | 0 | int top = bottom - gridsize(); |
170 | 0 | if (ImageFind::BoundsWithinRect(photo_map, &left, &top, &right, &bottom)) { |
171 | 0 | noise_density->SetGridCell(x, y, noise + photo_offset); |
172 | 0 | } |
173 | 0 | } |
174 | 0 | if (debug && noise > max_noise_count_ && good_counts->GridCellValue(x, y) > 0) { |
175 | 0 | tprintf("At %d, %d, noise = %d, good=%d, orig=%d, thr=%d\n", x * gridsize(), y * gridsize(), |
176 | 0 | noise_density->GridCellValue(x, y), good_counts->GridCellValue(x, y), |
177 | 0 | noise_counts->GridCellValue(x, y), max_noise_count_); |
178 | 0 | } |
179 | 0 | if (noise > max_noise_count_ && good_counts->GridCellValue(x, y) > 0 && |
180 | 0 | noise_counts->GridCellValue(x, y) * kOriginalNoiseMultiple <= max_noise_count_) { |
181 | 0 | noise_density->SetGridCell(x, y, 0); |
182 | 0 | } |
183 | 0 | } |
184 | 0 | } |
185 | 0 | delete noise_counts; |
186 | 0 | delete good_counts; |
187 | 0 | return noise_density; |
188 | 0 | } |
189 | | |
190 | | // Helper to expand a box in one of the 4 directions by the given pad, |
191 | | // provided it does not expand into any cell with a zero noise density. |
192 | | // If that is not possible, try expanding all round by a small constant. |
193 | 0 | static TBOX AttemptBoxExpansion(const TBOX &box, const IntGrid &noise_density, int pad) { |
194 | 0 | TBOX expanded_box(box); |
195 | 0 | expanded_box.set_right(box.right() + pad); |
196 | 0 | if (!noise_density.AnyZeroInRect(expanded_box)) { |
197 | 0 | return expanded_box; |
198 | 0 | } |
199 | 0 | expanded_box = box; |
200 | 0 | expanded_box.set_left(box.left() - pad); |
201 | 0 | if (!noise_density.AnyZeroInRect(expanded_box)) { |
202 | 0 | return expanded_box; |
203 | 0 | } |
204 | 0 | expanded_box = box; |
205 | 0 | expanded_box.set_top(box.top() + pad); |
206 | 0 | if (!noise_density.AnyZeroInRect(expanded_box)) { |
207 | 0 | return expanded_box; |
208 | 0 | } |
209 | 0 | expanded_box = box; |
210 | 0 | expanded_box.set_bottom(box.bottom() + pad); |
211 | 0 | if (!noise_density.AnyZeroInRect(expanded_box)) { |
212 | 0 | return expanded_box; |
213 | 0 | } |
214 | 0 | expanded_box = box; |
215 | 0 | expanded_box.pad(kNoisePadding, kNoisePadding); |
216 | 0 | if (!noise_density.AnyZeroInRect(expanded_box)) { |
217 | 0 | return expanded_box; |
218 | 0 | } |
219 | 0 | return box; |
220 | 0 | } |
221 | | |
222 | | // Tests each blob in the list to see if it is certain non-text using 2 |
223 | | // conditions: |
224 | | // 1. blob overlaps a cell with high value in noise_density_ (previously set |
225 | | // by ComputeNoiseDensity). |
226 | | // OR 2. The blob overlaps more than max_blob_overlaps in *this grid. This |
227 | | // condition is disabled with max_blob_overlaps == -1. |
228 | | // If it does, the blob is declared non-text, and is used to mark up the |
229 | | // nontext_mask. Such blobs are fully deleted, and non-noise blobs have their |
230 | | // neighbours reset, as they may now point to deleted data. |
231 | | // WARNING: The blobs list blobs may be in the *this grid, but they are |
232 | | // not removed. If any deleted blobs might be in *this, then this must be |
233 | | // Clear()ed immediately after MarkAndDeleteNonTextBlobs is called. |
234 | | // If the win is not nullptr, deleted blobs are drawn on it in red, and kept |
235 | | // blobs are drawn on it in ok_color. |
236 | | void CCNonTextDetect::MarkAndDeleteNonTextBlobs(BLOBNBOX_LIST *blobs, int max_blob_overlaps, |
237 | | ScrollView *win, ScrollView::Color ok_color, |
238 | 0 | Image nontext_mask) { |
239 | 0 | int imageheight = tright().y() - bleft().x(); |
240 | 0 | BLOBNBOX_IT blob_it(blobs); |
241 | 0 | BLOBNBOX_LIST dead_blobs; |
242 | 0 | BLOBNBOX_IT dead_it(&dead_blobs); |
243 | 0 | for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { |
244 | 0 | BLOBNBOX *blob = blob_it.data(); |
245 | 0 | TBOX box = blob->bounding_box(); |
246 | 0 | if (!noise_density_->RectMostlyOverThreshold(box, max_noise_count_) && |
247 | 0 | (max_blob_overlaps < 0 || !BlobOverlapsTooMuch(blob, max_blob_overlaps))) { |
248 | 0 | blob->ClearNeighbours(); |
249 | | #ifndef GRAPHICS_DISABLED |
250 | | if (win != nullptr) { |
251 | | blob->plot(win, ok_color, ok_color); |
252 | | } |
253 | | #endif // !GRAPHICS_DISABLED |
254 | 0 | } else { |
255 | 0 | if (noise_density_->AnyZeroInRect(box)) { |
256 | | // There is a danger that the bounding box may overlap real text, so |
257 | | // we need to render the outline. |
258 | 0 | Image blob_pix = blob->cblob()->render_outline(); |
259 | 0 | pixRasterop(nontext_mask, box.left(), imageheight - box.top(), box.width(), box.height(), |
260 | 0 | PIX_SRC | PIX_DST, blob_pix, 0, 0); |
261 | 0 | blob_pix.destroy(); |
262 | 0 | } else { |
263 | 0 | if (box.area() < gridsize() * gridsize()) { |
264 | | // It is a really bad idea to make lots of small components in the |
265 | | // photo mask, so try to join it to a bigger area by expanding the |
266 | | // box in a way that does not touch any zero noise density cell. |
267 | 0 | box = AttemptBoxExpansion(box, *noise_density_, gridsize()); |
268 | 0 | } |
269 | | // All overlapped cells are non-zero, so just mark the rectangle. |
270 | 0 | pixRasterop(nontext_mask, box.left(), imageheight - box.top(), box.width(), box.height(), |
271 | 0 | PIX_SET, nullptr, 0, 0); |
272 | 0 | } |
273 | | #ifndef GRAPHICS_DISABLED |
274 | | if (win != nullptr) { |
275 | | blob->plot(win, ScrollView::RED, ScrollView::RED); |
276 | | } |
277 | | #endif // !GRAPHICS_DISABLED |
278 | | // It is safe to delete the cblob now, as it isn't used by the grid |
279 | | // or BlobOverlapsTooMuch, and the BLOBNBOXes will go away with the |
280 | | // dead_blobs list. |
281 | | // TODO: remove next line, currently still needed for resultiterator_test. |
282 | 0 | delete blob->remove_cblob(); |
283 | 0 | dead_it.add_to_end(blob_it.extract()); |
284 | 0 | } |
285 | 0 | } |
286 | 0 | } |
287 | | |
288 | | // Returns true if the given blob overlaps more than max_overlaps blobs |
289 | | // in the current grid. |
290 | 0 | bool CCNonTextDetect::BlobOverlapsTooMuch(BLOBNBOX *blob, int max_overlaps) { |
291 | | // Search the grid to see what intersects it. |
292 | | // Setup a Rectangle search for overlapping this blob. |
293 | 0 | BlobGridSearch rsearch(this); |
294 | 0 | const TBOX &box = blob->bounding_box(); |
295 | 0 | rsearch.StartRectSearch(box); |
296 | 0 | rsearch.SetUniqueMode(true); |
297 | 0 | BLOBNBOX *neighbour; |
298 | 0 | int overlap_count = 0; |
299 | 0 | while (overlap_count <= max_overlaps && (neighbour = rsearch.NextRectSearch()) != nullptr) { |
300 | 0 | if (box.major_overlap(neighbour->bounding_box())) { |
301 | 0 | ++overlap_count; |
302 | 0 | if (overlap_count > max_overlaps) { |
303 | 0 | return true; |
304 | 0 | } |
305 | 0 | } |
306 | 0 | } |
307 | 0 | return false; |
308 | 0 | } |
309 | | |
310 | | } // namespace tesseract. |