Coverage Report

Created: 2025-11-16 06:50

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/tesseract/src/textord/devanagari_processing.cpp
Line
Count
Source
1
/**********************************************************************
2
 * File:        devanagari_processing.cpp
3
 * Description: Methods to process images containing devanagari symbols,
4
 *              prior to classification.
5
 * Author:      Shobhit Saxena
6
 *
7
 * (C) Copyright 2008, Google Inc.
8
 ** Licensed under the Apache License, Version 2.0 (the "License");
9
 ** you may not use this file except in compliance with the License.
10
 ** You may obtain a copy of the License at
11
 ** http://www.apache.org/licenses/LICENSE-2.0
12
 ** Unless required by applicable law or agreed to in writing, software
13
 ** distributed under the License is distributed on an "AS IS" BASIS,
14
 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
 ** See the License for the specific language governing permissions and
16
 ** limitations under the License.
17
 *
18
 **********************************************************************/
19
20
#ifdef HAVE_CONFIG_H
21
#  include "config_auto.h"
22
#endif
23
24
#include "devanagari_processing.h"
25
26
#include "debugpixa.h"
27
#include "statistc.h"
28
#include "tordmain.h"
29
30
#include <allheaders.h>
31
32
namespace tesseract {
33
34
// Flags controlling the debugging information for shiro-rekha splitting
35
// strategies.
36
INT_VAR(devanagari_split_debuglevel, 0, "Debug level for split shiro-rekha process.");
37
38
BOOL_VAR(devanagari_split_debugimage, 0,
39
         "Whether to create a debug image for split shiro-rekha process.");
40
41
ShiroRekhaSplitter::ShiroRekhaSplitter() :
42
4
  orig_pix_(nullptr),
43
4
  splitted_image_(nullptr),
44
4
  pageseg_split_strategy_(NO_SPLIT),
45
4
  ocr_split_strategy_(NO_SPLIT),
46
4
  debug_image_(nullptr),
47
4
  segmentation_block_list_(nullptr),
48
4
  global_xheight_(kUnspecifiedXheight),
49
4
  perform_close_(false)
50
4
{
51
4
}
52
53
0
ShiroRekhaSplitter::~ShiroRekhaSplitter() {
54
0
  Clear();
55
0
}
56
57
32.5k
void ShiroRekhaSplitter::Clear() {
58
32.5k
  orig_pix_.destroy();
59
32.5k
  splitted_image_.destroy();
60
32.5k
  pageseg_split_strategy_ = NO_SPLIT;
61
32.5k
  ocr_split_strategy_ = NO_SPLIT;
62
32.5k
  debug_image_.destroy();
63
32.5k
  segmentation_block_list_ = nullptr;
64
32.5k
  global_xheight_ = kUnspecifiedXheight;
65
32.5k
  perform_close_ = false;
66
32.5k
}
67
68
// On setting the input image, a clone of it is owned by this class.
69
16.2k
void ShiroRekhaSplitter::set_orig_pix(Image pix) {
70
16.2k
  if (orig_pix_) {
71
0
    orig_pix_.destroy();
72
0
  }
73
16.2k
  orig_pix_ = pix.clone();
74
16.2k
}
75
76
// Top-level method to perform splitting based on current settings.
77
// Returns true if a split was actually performed.
78
// split_for_pageseg should be true if the splitting is being done prior to
79
// page segmentation. This mode uses the flag
80
// pageseg_devanagari_split_strategy to determine the splitting strategy.
81
32.5k
bool ShiroRekhaSplitter::Split(bool split_for_pageseg, DebugPixa *pixa_debug) {
82
32.5k
  SplitStrategy split_strategy = split_for_pageseg ? pageseg_split_strategy_ : ocr_split_strategy_;
83
32.5k
  if (split_strategy == NO_SPLIT) {
84
32.5k
    return false; // Nothing to do.
85
32.5k
  }
86
0
  ASSERT_HOST(split_strategy == MINIMAL_SPLIT || split_strategy == MAXIMAL_SPLIT);
87
0
  ASSERT_HOST(orig_pix_);
88
0
  if (devanagari_split_debuglevel > 0) {
89
0
    tprintf("Splitting shiro-rekha ...\n");
90
0
    tprintf("Split strategy = %s\n", split_strategy == MINIMAL_SPLIT ? "Minimal" : "Maximal");
91
0
    tprintf("Initial pageseg available = %s\n", segmentation_block_list_ ? "yes" : "no");
92
0
  }
93
  // Create a copy of original image to store the splitting output.
94
0
  splitted_image_.destroy();
95
0
  splitted_image_ = orig_pix_.copy();
96
97
  // Initialize debug image if required.
98
0
  if (devanagari_split_debugimage) {
99
0
    debug_image_.destroy();
100
0
    debug_image_ = pixConvertTo32(orig_pix_);
101
0
  }
102
103
  // Determine all connected components in the input image. A close operation
104
  // may be required prior to this, depending on the current settings.
105
0
  Image pix_for_ccs = orig_pix_.clone();
106
0
  if (perform_close_ && global_xheight_ != kUnspecifiedXheight && !segmentation_block_list_) {
107
0
    if (devanagari_split_debuglevel > 0) {
108
0
      tprintf("Performing a global close operation..\n");
109
0
    }
110
    // A global measure is available for xheight, but no local information
111
    // exists.
112
0
    pix_for_ccs.destroy();
113
0
    pix_for_ccs = orig_pix_.copy();
114
0
    PerformClose(pix_for_ccs, global_xheight_);
115
0
  }
116
0
  Pixa *ccs;
117
0
  Boxa *tmp_boxa = pixConnComp(pix_for_ccs, &ccs, 8);
118
0
  boxaDestroy(&tmp_boxa);
119
0
  pix_for_ccs.destroy();
120
121
  // Iterate over all connected components. Get their bounding boxes and clip
122
  // out the image regions corresponding to these boxes from the original image.
123
  // Conditionally run splitting on each of them.
124
0
  Boxa *regions_to_clear = boxaCreate(0);
125
0
  int num_ccs = 0;
126
0
  if (ccs != nullptr) {
127
0
    num_ccs = pixaGetCount(ccs);
128
0
  }
129
0
  for (int i = 0; i < num_ccs; ++i) {
130
0
    Box *box = pixaGetBox(ccs, i, L_CLONE);
131
0
    Image word_pix = pixClipRectangle(orig_pix_, box, nullptr);
132
0
    ASSERT_HOST(word_pix);
133
0
    int xheight = GetXheightForCC(box);
134
0
    if (xheight == kUnspecifiedXheight && segmentation_block_list_ && devanagari_split_debugimage) {
135
0
      pixRenderBoxArb(debug_image_, box, 1, 255, 0, 0);
136
0
    }
137
    // If some xheight measure is available, attempt to pre-eliminate small
138
    // blobs from the shiro-rekha process. This is primarily to save the CCs
139
    // corresponding to punctuation marks/small dots etc which are part of
140
    // larger graphemes.
141
0
    l_int32 x, y, w, h;
142
0
    boxGetGeometry(box, &x, &y, &w, &h);
143
0
    if (xheight == kUnspecifiedXheight || (w > xheight / 3 && h > xheight / 2)) {
144
0
      SplitWordShiroRekha(split_strategy, word_pix, xheight, x, y, regions_to_clear);
145
0
    } else if (devanagari_split_debuglevel > 0) {
146
0
      tprintf("CC dropped from splitting: %d,%d (%d, %d)\n", x, y, w, h);
147
0
    }
148
0
    word_pix.destroy();
149
0
    boxDestroy(&box);
150
0
  }
151
  // Actually clear the boxes now.
152
0
  for (int i = 0; i < boxaGetCount(regions_to_clear); ++i) {
153
0
    Box *box = boxaGetBox(regions_to_clear, i, L_CLONE);
154
0
    pixClearInRect(splitted_image_, box);
155
0
    boxDestroy(&box);
156
0
  }
157
0
  boxaDestroy(&regions_to_clear);
158
0
  pixaDestroy(&ccs);
159
0
  if (devanagari_split_debugimage && pixa_debug != nullptr) {
160
0
    pixa_debug->AddPix(debug_image_, split_for_pageseg ? "pageseg_split" : "ocr_split");
161
0
  }
162
0
  return true;
163
32.5k
}
164
165
// Method to perform a close operation on the input image. The xheight
166
// estimate decides the size of sel used.
167
0
void ShiroRekhaSplitter::PerformClose(Image pix, int xheight_estimate) {
168
0
  pixCloseBrick(pix, pix, xheight_estimate / 8, xheight_estimate / 3);
169
0
}
170
171
// This method resolves the cc bbox to a particular row and returns the row's
172
// xheight.
173
0
int ShiroRekhaSplitter::GetXheightForCC(Box *cc_bbox) {
174
0
  if (!segmentation_block_list_) {
175
0
    return global_xheight_;
176
0
  }
177
  // Compute the box coordinates in Tesseract's coordinate system.
178
0
  l_int32 x, y, w, h;
179
0
  boxGetGeometry(cc_bbox, &x, &y, &w, &h);
180
0
  TBOX bbox(x, pixGetHeight(orig_pix_) - y - h - 1,
181
0
            x + w, pixGetHeight(orig_pix_) - y - 1);
182
  // Iterate over all blocks.
183
0
  BLOCK_IT block_it(segmentation_block_list_);
184
0
  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
185
0
    BLOCK *block = block_it.data();
186
    // Iterate over all rows in the block.
187
0
    ROW_IT row_it(block->row_list());
188
0
    for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
189
0
      ROW *row = row_it.data();
190
0
      if (!row->bounding_box().major_overlap(bbox)) {
191
0
        continue;
192
0
      }
193
      // Row could be skewed, warped, etc. Use the position of the box to
194
      // determine the baseline position of the row for that x-coordinate.
195
      // Create a square TBOX whose baseline's mid-point lies at this point
196
      // and side is row's xheight. Take the overlap of this box with the input
197
      // box and check if it is a 'major overlap'. If so, this box lies in this
198
      // row. In that case, return the xheight for this row.
199
0
      float box_middle = 0.5 * (bbox.left() + bbox.right());
200
0
      int baseline = static_cast<int>(row->base_line(box_middle) + 0.5);
201
0
      TBOX test_box(box_middle - row->x_height() / 2, baseline, box_middle + row->x_height() / 2,
202
0
                    static_cast<int>(baseline + row->x_height()));
203
      // Compute overlap. If it is a major overlap, this is the right row.
204
0
      if (bbox.major_overlap(test_box)) {
205
0
        return row->x_height();
206
0
      }
207
0
    }
208
0
  }
209
  // No row found for this bbox.
210
0
  return kUnspecifiedXheight;
211
0
}
212
213
// Returns a list of regions (boxes) which should be cleared in the original
214
// image so as to perform shiro-rekha splitting. Pix is assumed to carry one
215
// (or less) word only. Xheight measure could be the global estimate, the row
216
// estimate, or unspecified. If unspecified, over splitting may occur, since a
217
// conservative estimate of stroke width along with an associated multiplier
218
// is used in its place. It is advisable to have a specified xheight when
219
// splitting for classification/training.
220
// A vertical projection histogram of all the on-pixels in the input pix is
221
// computed. The maxima of this histogram is regarded as an approximate location
222
// of the shiro-rekha. By descending on the maxima's peak on both sides,
223
// stroke width of shiro-rekha is estimated.
224
// A horizontal projection histogram is computed for a sub-image of the input
225
// image, which extends from just below the shiro-rekha down to a certain
226
// leeway. The leeway depends on the input xheight, if provided, else a
227
// conservative multiplier on approximate stroke width is used (which may lead
228
// to over-splitting).
229
void ShiroRekhaSplitter::SplitWordShiroRekha(SplitStrategy split_strategy, Image pix, int xheight,
230
0
                                             int word_left, int word_top, Boxa *regions_to_clear) {
231
0
  if (split_strategy == NO_SPLIT) {
232
0
    return;
233
0
  }
234
0
  int width = pixGetWidth(pix);
235
0
  int height = pixGetHeight(pix);
236
  // Statistically determine the yextents of the shiro-rekha.
237
0
  int shirorekha_top, shirorekha_bottom, shirorekha_ylevel;
238
0
  GetShiroRekhaYExtents(pix, &shirorekha_top, &shirorekha_bottom, &shirorekha_ylevel);
239
  // Since the shiro rekha is also a stroke, its width is equal to the stroke
240
  // width.
241
0
  int stroke_width = shirorekha_bottom - shirorekha_top + 1;
242
243
  // Some safeguards to protect CCs we do not want to be split.
244
  // These are particularly useful when the word wasn't eliminated earlier
245
  // because xheight information was unavailable.
246
0
  if (shirorekha_ylevel > height / 2) {
247
    // Shirorekha shouldn't be in the bottom half of the word.
248
0
    if (devanagari_split_debuglevel > 0) {
249
0
      tprintf("Skipping splitting CC at (%d, %d): shirorekha in lower half..\n", word_left,
250
0
              word_top);
251
0
    }
252
0
    return;
253
0
  }
254
0
  if (stroke_width > height / 3) {
255
    // Even the boldest of fonts shouldn't do this.
256
0
    if (devanagari_split_debuglevel > 0) {
257
0
      tprintf("Skipping splitting CC at (%d, %d): stroke width too huge..\n", word_left, word_top);
258
0
    }
259
0
    return;
260
0
  }
261
262
  // Clear the ascender and descender regions of the word.
263
  // Obtain a vertical projection histogram for the resulting image.
264
0
  Box *box_to_clear = boxCreate(0, shirorekha_top - stroke_width / 3, width, 5 * stroke_width / 3);
265
0
  Image word_in_xheight = pix.copy();
266
0
  pixClearInRect(word_in_xheight, box_to_clear);
267
  // Also clear any pixels which are below shirorekha_bottom + some leeway.
268
  // The leeway is set to xheight if the information is available, else it is a
269
  // multiplier applied to the stroke width.
270
0
  int leeway_to_keep = stroke_width * 3;
271
0
  if (xheight != kUnspecifiedXheight) {
272
    // This is because the xheight-region typically includes the shiro-rekha
273
    // inside it, i.e., the top of the xheight range corresponds to the top of
274
    // shiro-rekha.
275
0
    leeway_to_keep = xheight - stroke_width;
276
0
  }
277
0
  auto y = shirorekha_bottom + leeway_to_keep;
278
0
  boxSetGeometry(box_to_clear, -1, y, -1, height - y);
279
0
  pixClearInRect(word_in_xheight, box_to_clear);
280
0
  boxDestroy(&box_to_clear);
281
282
0
  PixelHistogram vert_hist;
283
0
  vert_hist.ConstructVerticalCountHist(word_in_xheight);
284
0
  word_in_xheight.destroy();
285
286
  // If the number of black pixel in any column of the image is less than a
287
  // fraction of the stroke width, treat it as noise / a stray mark. Perform
288
  // these changes inside the vert_hist data itself, as that is used later on as
289
  // a bit vector for the final split decision at every column.
290
0
  for (int i = 0; i < width; ++i) {
291
0
    if (vert_hist.hist()[i] <= stroke_width / 4) {
292
0
      vert_hist.hist()[i] = 0;
293
0
    } else {
294
0
      vert_hist.hist()[i] = 1;
295
0
    }
296
0
  }
297
  // In order to split the line at any point, we make sure that the width of the
298
  // gap is at least half the stroke width.
299
0
  int i = 0;
300
0
  int cur_component_width = 0;
301
0
  while (i < width) {
302
0
    if (!vert_hist.hist()[i]) {
303
0
      int j = 0;
304
0
      while (i + j < width && !vert_hist.hist()[i + j]) {
305
0
        ++j;
306
0
      }
307
0
      if (j >= stroke_width / 2 && cur_component_width >= stroke_width / 2) {
308
        // Perform a shiro-rekha split. The intervening region lies from i to
309
        // i+j-1.
310
        // A minimal single-pixel split makes the estimation of intra- and
311
        // inter-word spacing easier during page layout analysis,
312
        // whereas a maximal split may be needed for OCR, depending on
313
        // how the engine was trained.
314
0
        bool minimal_split = (split_strategy == MINIMAL_SPLIT);
315
0
        int split_width = minimal_split ? 1 : j;
316
0
        int split_left = minimal_split ? i + (j / 2) - (split_width / 2) : i;
317
0
        if (!minimal_split || (i != 0 && i + j != width)) {
318
0
          Box *box_to_clear =
319
0
              boxCreate(word_left + split_left, word_top + shirorekha_top - stroke_width / 3,
320
0
                        split_width, 5 * stroke_width / 3);
321
0
          if (box_to_clear) {
322
0
            boxaAddBox(regions_to_clear, box_to_clear, L_CLONE);
323
            // Mark this in the debug image if needed.
324
0
            if (devanagari_split_debugimage) {
325
0
              pixRenderBoxArb(debug_image_, box_to_clear, 1, 128, 255, 128);
326
0
            }
327
0
            boxDestroy(&box_to_clear);
328
0
            cur_component_width = 0;
329
0
          }
330
0
        }
331
0
      }
332
0
      i += j;
333
0
    } else {
334
0
      ++i;
335
0
      ++cur_component_width;
336
0
    }
337
0
  }
338
0
}
339
340
// Refreshes the words in the segmentation block list by using blobs in the
341
// input block list.
342
// The segmentation block list must be set.
343
0
void ShiroRekhaSplitter::RefreshSegmentationWithNewBlobs(C_BLOB_LIST *new_blobs) {
344
  // The segmentation block list must have been specified.
345
0
  ASSERT_HOST(segmentation_block_list_);
346
0
  if (devanagari_split_debuglevel > 0) {
347
0
    tprintf("Before refreshing blobs:\n");
348
0
    PrintSegmentationStats(segmentation_block_list_);
349
0
    tprintf("New Blobs found: %d\n", new_blobs->length());
350
0
  }
351
352
0
  C_BLOB_LIST not_found_blobs;
353
0
  RefreshWordBlobsFromNewBlobs(
354
0
      segmentation_block_list_, new_blobs,
355
0
      ((devanagari_split_debugimage && debug_image_) ? &not_found_blobs : nullptr));
356
357
0
  if (devanagari_split_debuglevel > 0) {
358
0
    tprintf("After refreshing blobs:\n");
359
0
    PrintSegmentationStats(segmentation_block_list_);
360
0
  }
361
0
  if (devanagari_split_debugimage && debug_image_) {
362
    // Plot out the original blobs for which no match was found in the new
363
    // all_blobs list.
364
0
    C_BLOB_IT not_found_it(&not_found_blobs);
365
0
    for (not_found_it.mark_cycle_pt(); !not_found_it.cycled_list(); not_found_it.forward()) {
366
0
      C_BLOB *not_found = not_found_it.data();
367
0
      TBOX not_found_box = not_found->bounding_box();
368
0
      Box *box_to_plot = GetBoxForTBOX(not_found_box);
369
0
      pixRenderBoxArb(debug_image_, box_to_plot, 1, 255, 0, 255);
370
0
      boxDestroy(&box_to_plot);
371
0
    }
372
373
    // Plot out the blobs unused from all blobs.
374
0
    C_BLOB_IT all_blobs_it(new_blobs);
375
0
    for (all_blobs_it.mark_cycle_pt(); !all_blobs_it.cycled_list(); all_blobs_it.forward()) {
376
0
      C_BLOB *a_blob = all_blobs_it.data();
377
0
      Box *box_to_plot = GetBoxForTBOX(a_blob->bounding_box());
378
0
      pixRenderBoxArb(debug_image_, box_to_plot, 3, 0, 127, 0);
379
0
      boxDestroy(&box_to_plot);
380
0
    }
381
0
  }
382
0
}
383
384
// Returns a new box object for the corresponding TBOX, based on the original
385
// image's coordinate system.
386
0
Box *ShiroRekhaSplitter::GetBoxForTBOX(const TBOX &tbox) const {
387
0
  return boxCreate(tbox.left(), pixGetHeight(orig_pix_) - tbox.top() - 1, tbox.width(),
388
0
                   tbox.height());
389
0
}
390
391
// This method returns the computed mode-height of blobs in the pix.
392
// It also prunes very small blobs from calculation.
393
0
int ShiroRekhaSplitter::GetModeHeight(Image pix) {
394
0
  Boxa *boxa = pixConnComp(pix, nullptr, 8);
395
0
  STATS heights(0, pixGetHeight(pix) - 1);
396
0
  heights.clear();
397
0
  for (int i = 0; i < boxaGetCount(boxa); ++i) {
398
0
    Box *box = boxaGetBox(boxa, i, L_CLONE);
399
0
    l_int32 x, y, w, h;
400
0
    boxGetGeometry(box, &x, &y, &w, &h);
401
0
    if (h >= 3 || w >= 3) {
402
0
      heights.add(h, 1);
403
0
    }
404
0
    boxDestroy(&box);
405
0
  }
406
0
  boxaDestroy(&boxa);
407
0
  return heights.mode();
408
0
}
409
410
// This method returns y-extents of the shiro-rekha computed from the input
411
// word image.
412
void ShiroRekhaSplitter::GetShiroRekhaYExtents(Image word_pix, int *shirorekha_top,
413
0
                                               int *shirorekha_bottom, int *shirorekha_ylevel) {
414
  // Compute a histogram from projecting the word on a vertical line.
415
0
  PixelHistogram hist_horiz;
416
0
  hist_horiz.ConstructHorizontalCountHist(word_pix);
417
  // Get the ylevel where the top-line exists. This is basically the global
418
  // maxima in the horizontal histogram.
419
0
  int topline_onpixel_count = 0;
420
0
  int topline_ylevel = hist_horiz.GetHistogramMaximum(&topline_onpixel_count);
421
422
  // Get the upper and lower extents of the shiro rekha.
423
0
  int thresh = (topline_onpixel_count * 70) / 100;
424
0
  int ulimit = topline_ylevel;
425
0
  int llimit = topline_ylevel;
426
0
  while (ulimit > 0 && hist_horiz.hist()[ulimit] >= thresh) {
427
0
    --ulimit;
428
0
  }
429
0
  while (llimit < pixGetHeight(word_pix) && hist_horiz.hist()[llimit] >= thresh) {
430
0
    ++llimit;
431
0
  }
432
433
0
  if (shirorekha_top) {
434
0
    *shirorekha_top = ulimit;
435
0
  }
436
0
  if (shirorekha_bottom) {
437
0
    *shirorekha_bottom = llimit;
438
0
  }
439
0
  if (shirorekha_ylevel) {
440
0
    *shirorekha_ylevel = topline_ylevel;
441
0
  }
442
0
}
443
444
// This method returns the global-maxima for the histogram. The frequency of
445
// the global maxima is returned in count, if specified.
446
0
int PixelHistogram::GetHistogramMaximum(int *count) const {
447
0
  int best_value = 0;
448
0
  for (int i = 0; i < length_; ++i) {
449
0
    if (hist_[i] > hist_[best_value]) {
450
0
      best_value = i;
451
0
    }
452
0
  }
453
0
  if (count) {
454
0
    *count = hist_[best_value];
455
0
  }
456
0
  return best_value;
457
0
}
458
459
// Methods to construct histograms from images.
460
0
void PixelHistogram::ConstructVerticalCountHist(Image pix) {
461
0
  Clear();
462
0
  int width = pixGetWidth(pix);
463
0
  int height = pixGetHeight(pix);
464
0
  hist_ = new int[width];
465
0
  length_ = width;
466
0
  int wpl = pixGetWpl(pix);
467
0
  l_uint32 *data = pixGetData(pix);
468
0
  for (int i = 0; i < width; ++i) {
469
0
    hist_[i] = 0;
470
0
  }
471
0
  for (int i = 0; i < height; ++i) {
472
0
    l_uint32 *line = data + i * wpl;
473
0
    for (int j = 0; j < width; ++j) {
474
0
      if (GET_DATA_BIT(line, j)) {
475
0
        ++(hist_[j]);
476
0
      }
477
0
    }
478
0
  }
479
0
}
480
481
0
void PixelHistogram::ConstructHorizontalCountHist(Image pix) {
482
0
  Clear();
483
0
  Numa *counts = pixCountPixelsByRow(pix, nullptr);
484
0
  length_ = numaGetCount(counts);
485
0
  hist_ = new int[length_];
486
0
  for (int i = 0; i < length_; ++i) {
487
0
    l_int32 val = 0;
488
0
    numaGetIValue(counts, i, &val);
489
0
    hist_[i] = val;
490
0
  }
491
0
  numaDestroy(&counts);
492
0
}
493
494
} // namespace tesseract.