Coverage Report

Created: 2025-09-27 07:10

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/tesseract/src/textord/ccnontextdetect.cpp
Line
Count
Source
1
///////////////////////////////////////////////////////////////////////
2
// File:        ccnontextdetect.cpp
3
// Description: Connected-Component-based photo (non-text) detection.
4
// Author:      rays@google.com (Ray Smith)
5
//
6
// Copyright 2011 Google Inc. All Rights Reserved.
7
// Licensed under the Apache License, Version 2.0 (the "License");
8
// you may not use this file except in compliance with the License.
9
// You may obtain a copy of the License at
10
// http://www.apache.org/licenses/LICENSE-2.0
11
// Unless required by applicable law or agreed to in writing, software
12
// distributed under the License is distributed on an "AS IS" BASIS,
13
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
// See the License for the specific language governing permissions and
15
// limitations under the License.
16
//
17
///////////////////////////////////////////////////////////////////////
18
19
#ifdef HAVE_CONFIG_H
20
#  include "config_auto.h"
21
#endif
22
23
#include "ccnontextdetect.h"
24
#include "helpers.h"         // for IntCastRounded
25
#include "imagefind.h"
26
#include "strokewidth.h"
27
28
namespace tesseract {
29
30
// Max number of neighbour small objects per squared gridsize before a grid
31
// cell becomes image.
32
const double kMaxSmallNeighboursPerPix = 1.0 / 32;
33
// Max number of small blobs a large blob may overlap before it is rejected
34
// and determined to be image.
35
const int kMaxLargeOverlapsWithSmall = 3;
36
// Max number of small blobs a medium blob may overlap before it is rejected
37
// and determined to be image. Larger than for large blobs as medium blobs
38
// may be complex Chinese characters. Very large Chinese characters are going
39
// to overlap more medium blobs than small.
40
const int kMaxMediumOverlapsWithSmall = 12;
41
// Max number of normal blobs a large blob may overlap before it is rejected
42
// and determined to be image. This is set higher to allow for drop caps, which
43
// may overlap a lot of good text blobs.
44
const int kMaxLargeOverlapsWithMedium = 12;
45
// Multiplier of original noise_count used to test for the case of spreading
46
// noise beyond where it should really be.
47
const int kOriginalNoiseMultiple = 8;
48
// Pixel padding for noise blobs when rendering on the image
49
// mask to encourage them to join together. Make it too big and images
50
// will fatten out too much and have to be clipped to text.
51
const int kNoisePadding = 4;
52
// Fraction of max_noise_count_ to be added to the noise count if there is
53
// photo mask in the background.
54
const double kPhotoOffsetFraction = 0.375;
55
// Min ratio of perimeter^2/16area for a "good" blob in estimating noise
56
// density. Good blobs are supposed to be highly likely real text.
57
// We consider a square to have unit ratio, where A=(p/4)^2, hence the factor
58
// of 16. Digital circles are weird and have a minimum ratio of pi/64, not
59
// the 1/(4pi) that you would expect.
60
const double kMinGoodTextPARatio = 1.5;
61
62
CCNonTextDetect::CCNonTextDetect(int gridsize, const ICOORD &bleft, const ICOORD &tright)
63
0
    : BlobGrid(gridsize, bleft, tright)
64
0
    , max_noise_count_(static_cast<int>(kMaxSmallNeighboursPerPix * gridsize * gridsize))
65
0
    , noise_density_(nullptr) {
66
  // TODO(rays) break max_noise_count_ out into an area-proportional
67
  // value, as now plus an additive constant for the number of text blobs
68
  // in the 3x3 neighbourhood - maybe 9.
69
0
}
70
71
0
CCNonTextDetect::~CCNonTextDetect() {
72
0
  delete noise_density_;
73
0
}
74
75
// Creates and returns a Pix with the same resolution as the original
76
// in which 1 (black) pixels represent likely non text (photo, line drawing)
77
// areas of the page, deleting from the blob_block the blobs that were
78
// determined to be non-text.
79
// The photo_map is used to bias the decision towards non-text, rather than
80
// supplying definite decision.
81
// The blob_block is the usual result of connected component analysis,
82
// holding the detected blobs.
83
// The returned Pix should be PixDestroyed after use.
84
0
Image CCNonTextDetect::ComputeNonTextMask(bool debug, Image photo_map, TO_BLOCK *blob_block) {
85
  // Insert the smallest blobs into the grid.
86
0
  InsertBlobList(&blob_block->small_blobs);
87
0
  InsertBlobList(&blob_block->noise_blobs);
88
  // Add the medium blobs that don't have a good strokewidth neighbour.
89
  // Those that do go into good_grid as an antidote to spreading beyond the
90
  // real reaches of a noise region.
91
0
  BlobGrid good_grid(gridsize(), bleft(), tright());
92
0
  BLOBNBOX_IT blob_it(&blob_block->blobs);
93
0
  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
94
0
    BLOBNBOX *blob = blob_it.data();
95
0
    double perimeter_area_ratio = blob->cblob()->perimeter() / 4.0;
96
0
    perimeter_area_ratio *= perimeter_area_ratio / blob->enclosed_area();
97
0
    if (blob->GoodTextBlob() == 0 || perimeter_area_ratio < kMinGoodTextPARatio) {
98
0
      InsertBBox(true, true, blob);
99
0
    } else {
100
0
      good_grid.InsertBBox(true, true, blob);
101
0
    }
102
0
  }
103
0
  noise_density_ = ComputeNoiseDensity(debug, photo_map, &good_grid);
104
0
  good_grid.Clear(); // Not needed any more.
105
0
  Image pix = noise_density_->ThresholdToPix(max_noise_count_);
106
0
  if (debug) {
107
0
    pixWrite("junknoisemask.png", pix, IFF_PNG);
108
0
  }
109
0
  ScrollView *win = nullptr;
110
#ifndef GRAPHICS_DISABLED
111
  if (debug) {
112
    win = MakeWindow(0, 400, "Photo Mask Blobs");
113
  }
114
#endif // !GRAPHICS_DISABLED
115
  // Large and medium blobs are not text if they overlap with "a lot" of small
116
  // blobs.
117
0
  MarkAndDeleteNonTextBlobs(&blob_block->large_blobs, kMaxLargeOverlapsWithSmall, win,
118
0
                            ScrollView::DARK_GREEN, pix);
119
0
  MarkAndDeleteNonTextBlobs(&blob_block->blobs, kMaxMediumOverlapsWithSmall, win, ScrollView::WHITE,
120
0
                            pix);
121
  // Clear the grid of small blobs and insert the medium blobs.
122
0
  Clear();
123
0
  InsertBlobList(&blob_block->blobs);
124
0
  MarkAndDeleteNonTextBlobs(&blob_block->large_blobs, kMaxLargeOverlapsWithMedium, win,
125
0
                            ScrollView::DARK_GREEN, pix);
126
  // Clear again before we start deleting the blobs in the grid.
127
0
  Clear();
128
0
  MarkAndDeleteNonTextBlobs(&blob_block->noise_blobs, -1, win, ScrollView::CORAL, pix);
129
0
  MarkAndDeleteNonTextBlobs(&blob_block->small_blobs, -1, win, ScrollView::GOLDENROD, pix);
130
0
  MarkAndDeleteNonTextBlobs(&blob_block->blobs, -1, win, ScrollView::WHITE, pix);
131
0
  if (debug) {
132
#ifndef GRAPHICS_DISABLED
133
    win->Update();
134
#endif // !GRAPHICS_DISABLED
135
0
    pixWrite("junkccphotomask.png", pix, IFF_PNG);
136
#ifndef GRAPHICS_DISABLED
137
    win->AwaitEvent(SVET_DESTROY);
138
    delete win;
139
#endif // !GRAPHICS_DISABLED
140
0
  }
141
0
  return pix;
142
0
}
143
144
// Computes and returns the noise_density IntGrid, at the same gridsize as
145
// this by summing the number of small elements in a 3x3 neighbourhood of
146
// each grid cell. good_grid is filled with blobs that are considered most
147
// likely good text, and this is filled with small and medium blobs that are
148
// more likely non-text.
149
// The photo_map is used to bias the decision towards non-text, rather than
150
// supplying definite decision.
151
0
IntGrid *CCNonTextDetect::ComputeNoiseDensity(bool debug, Image photo_map, BlobGrid *good_grid) {
152
0
  IntGrid *noise_counts = CountCellElements();
153
0
  IntGrid *noise_density = noise_counts->NeighbourhoodSum();
154
0
  IntGrid *good_counts = good_grid->CountCellElements();
155
  // Now increase noise density in photo areas, to bias the decision and
156
  // minimize hallucinated text on image, but trim the noise_density where
157
  // there are good blobs and the original count is low in non-photo areas,
158
  // indicating that most of the result came from neighbouring cells.
159
0
  int height = pixGetHeight(photo_map);
160
0
  int photo_offset = IntCastRounded(max_noise_count_ * kPhotoOffsetFraction);
161
0
  for (int y = 0; y < gridheight(); ++y) {
162
0
    for (int x = 0; x < gridwidth(); ++x) {
163
0
      int noise = noise_density->GridCellValue(x, y);
164
0
      if (max_noise_count_ < noise + photo_offset && noise <= max_noise_count_) {
165
        // Test for photo.
166
0
        int left = x * gridsize();
167
0
        int right = left + gridsize();
168
0
        int bottom = height - y * gridsize();
169
0
        int top = bottom - gridsize();
170
0
        if (ImageFind::BoundsWithinRect(photo_map, &left, &top, &right, &bottom)) {
171
0
          noise_density->SetGridCell(x, y, noise + photo_offset);
172
0
        }
173
0
      }
174
0
      if (debug && noise > max_noise_count_ && good_counts->GridCellValue(x, y) > 0) {
175
0
        tprintf("At %d, %d, noise = %d, good=%d, orig=%d, thr=%d\n", x * gridsize(), y * gridsize(),
176
0
                noise_density->GridCellValue(x, y), good_counts->GridCellValue(x, y),
177
0
                noise_counts->GridCellValue(x, y), max_noise_count_);
178
0
      }
179
0
      if (noise > max_noise_count_ && good_counts->GridCellValue(x, y) > 0 &&
180
0
          noise_counts->GridCellValue(x, y) * kOriginalNoiseMultiple <= max_noise_count_) {
181
0
        noise_density->SetGridCell(x, y, 0);
182
0
      }
183
0
    }
184
0
  }
185
0
  delete noise_counts;
186
0
  delete good_counts;
187
0
  return noise_density;
188
0
}
189
190
// Helper to expand a box in one of the 4 directions by the given pad,
191
// provided it does not expand into any cell with a zero noise density.
192
// If that is not possible, try expanding all round by a small constant.
193
0
static TBOX AttemptBoxExpansion(const TBOX &box, const IntGrid &noise_density, int pad) {
194
0
  TBOX expanded_box(box);
195
0
  expanded_box.set_right(box.right() + pad);
196
0
  if (!noise_density.AnyZeroInRect(expanded_box)) {
197
0
    return expanded_box;
198
0
  }
199
0
  expanded_box = box;
200
0
  expanded_box.set_left(box.left() - pad);
201
0
  if (!noise_density.AnyZeroInRect(expanded_box)) {
202
0
    return expanded_box;
203
0
  }
204
0
  expanded_box = box;
205
0
  expanded_box.set_top(box.top() + pad);
206
0
  if (!noise_density.AnyZeroInRect(expanded_box)) {
207
0
    return expanded_box;
208
0
  }
209
0
  expanded_box = box;
210
0
  expanded_box.set_bottom(box.bottom() + pad);
211
0
  if (!noise_density.AnyZeroInRect(expanded_box)) {
212
0
    return expanded_box;
213
0
  }
214
0
  expanded_box = box;
215
0
  expanded_box.pad(kNoisePadding, kNoisePadding);
216
0
  if (!noise_density.AnyZeroInRect(expanded_box)) {
217
0
    return expanded_box;
218
0
  }
219
0
  return box;
220
0
}
221
222
// Tests each blob in the list to see if it is certain non-text using 2
223
// conditions:
224
// 1. blob overlaps a cell with high value in noise_density_ (previously set
225
// by ComputeNoiseDensity).
226
// OR 2. The blob overlaps more than max_blob_overlaps in *this grid. This
227
// condition is disabled with max_blob_overlaps == -1.
228
// If it does, the blob is declared non-text, and is used to mark up the
229
// nontext_mask. Such blobs are fully deleted, and non-noise blobs have their
230
// neighbours reset, as they may now point to deleted data.
231
// WARNING: The blobs list blobs may be in the *this grid, but they are
232
// not removed. If any deleted blobs might be in *this, then this must be
233
// Clear()ed immediately after MarkAndDeleteNonTextBlobs is called.
234
// If the win is not nullptr, deleted blobs are drawn on it in red, and kept
235
// blobs are drawn on it in ok_color.
236
void CCNonTextDetect::MarkAndDeleteNonTextBlobs(BLOBNBOX_LIST *blobs, int max_blob_overlaps,
237
                                                ScrollView *win, ScrollView::Color ok_color,
238
0
                                                Image nontext_mask) {
239
0
  int imageheight = tright().y() - bleft().x();
240
0
  BLOBNBOX_IT blob_it(blobs);
241
0
  BLOBNBOX_LIST dead_blobs;
242
0
  BLOBNBOX_IT dead_it(&dead_blobs);
243
0
  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
244
0
    BLOBNBOX *blob = blob_it.data();
245
0
    TBOX box = blob->bounding_box();
246
0
    if (!noise_density_->RectMostlyOverThreshold(box, max_noise_count_) &&
247
0
        (max_blob_overlaps < 0 || !BlobOverlapsTooMuch(blob, max_blob_overlaps))) {
248
0
      blob->ClearNeighbours();
249
#ifndef GRAPHICS_DISABLED
250
      if (win != nullptr) {
251
        blob->plot(win, ok_color, ok_color);
252
      }
253
#endif // !GRAPHICS_DISABLED
254
0
    } else {
255
0
      if (noise_density_->AnyZeroInRect(box)) {
256
        // There is a danger that the bounding box may overlap real text, so
257
        // we need to render the outline.
258
0
        Image blob_pix = blob->cblob()->render_outline();
259
0
        pixRasterop(nontext_mask, box.left(), imageheight - box.top(), box.width(), box.height(),
260
0
                    PIX_SRC | PIX_DST, blob_pix, 0, 0);
261
0
        blob_pix.destroy();
262
0
      } else {
263
0
        if (box.area() < gridsize() * gridsize()) {
264
          // It is a really bad idea to make lots of small components in the
265
          // photo mask, so try to join it to a bigger area by expanding the
266
          // box in a way that does not touch any zero noise density cell.
267
0
          box = AttemptBoxExpansion(box, *noise_density_, gridsize());
268
0
        }
269
        // All overlapped cells are non-zero, so just mark the rectangle.
270
0
        pixRasterop(nontext_mask, box.left(), imageheight - box.top(), box.width(), box.height(),
271
0
                    PIX_SET, nullptr, 0, 0);
272
0
      }
273
#ifndef GRAPHICS_DISABLED
274
      if (win != nullptr) {
275
        blob->plot(win, ScrollView::RED, ScrollView::RED);
276
      }
277
#endif // !GRAPHICS_DISABLED
278
      // It is safe to delete the cblob now, as it isn't used by the grid
279
      // or BlobOverlapsTooMuch, and the BLOBNBOXes will go away with the
280
      // dead_blobs list.
281
      // TODO: remove next line, currently still needed for resultiterator_test.
282
0
      delete blob->remove_cblob();
283
0
      dead_it.add_to_end(blob_it.extract());
284
0
    }
285
0
  }
286
0
}
287
288
// Returns true if the given blob overlaps more than max_overlaps blobs
289
// in the current grid.
290
0
bool CCNonTextDetect::BlobOverlapsTooMuch(BLOBNBOX *blob, int max_overlaps) {
291
  // Search the grid to see what intersects it.
292
  // Setup a Rectangle search for overlapping this blob.
293
0
  BlobGridSearch rsearch(this);
294
0
  const TBOX &box = blob->bounding_box();
295
0
  rsearch.StartRectSearch(box);
296
0
  rsearch.SetUniqueMode(true);
297
0
  BLOBNBOX *neighbour;
298
0
  int overlap_count = 0;
299
0
  while (overlap_count <= max_overlaps && (neighbour = rsearch.NextRectSearch()) != nullptr) {
300
0
    if (box.major_overlap(neighbour->bounding_box())) {
301
0
      ++overlap_count;
302
0
      if (overlap_count > max_overlaps) {
303
0
        return true;
304
0
      }
305
0
    }
306
0
  }
307
0
  return false;
308
0
}
309
310
} // namespace tesseract.