Coverage Report

Created: 2025-06-13 07:15

/src/tesseract/src/textord/devanagari_processing.h
Line
Count
Source (jump to first uncovered line)
1
// Copyright 2008 Google Inc. All Rights Reserved.
2
// Author: shobhitsaxena@google.com (Shobhit Saxena)
3
// Licensed under the Apache License, Version 2.0 (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
6
// http://www.apache.org/licenses/LICENSE-2.0
7
// Unless required by applicable law or agreed to in writing, software
8
// distributed under the License is distributed on an "AS IS" BASIS,
9
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
// See the License for the specific language governing permissions and
11
// limitations under the License.
12
13
#ifndef TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_
14
#define TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_
15
16
#include <allheaders.h>
17
#include "ocrblock.h"
18
#include "params.h"
19
20
struct Pix;
21
struct Box;
22
struct Boxa;
23
24
namespace tesseract {
25
26
extern INT_VAR_H(devanagari_split_debuglevel);
27
28
extern BOOL_VAR_H(devanagari_split_debugimage);
29
30
class TBOX;
31
class DebugPixa;
32
33
class PixelHistogram {
34
public:
35
0
  PixelHistogram() {
36
0
    hist_ = nullptr;
37
0
    length_ = 0;
38
0
  }
39
40
0
  ~PixelHistogram() {
41
0
    Clear();
42
0
  }
43
44
0
  void Clear() {
45
0
    delete[] hist_;
46
0
    length_ = 0;
47
0
  }
48
49
0
  int *hist() const {
50
0
    return hist_;
51
0
  }
52
53
0
  int length() const {
54
0
    return length_;
55
0
  }
56
57
  // Methods to construct histograms from images. These clear any existing data.
58
  void ConstructVerticalCountHist(Image pix);
59
  void ConstructHorizontalCountHist(Image pix);
60
61
  // This method returns the global-maxima for the histogram. The frequency of
62
  // the global maxima is returned in count, if specified.
63
  int GetHistogramMaximum(int *count) const;
64
65
private:
66
  int *hist_;
67
  int length_;
68
};
69
70
class ShiroRekhaSplitter {
71
public:
72
  enum SplitStrategy {
73
    NO_SPLIT = 0,  // No splitting is performed for the phase.
74
    MINIMAL_SPLIT, // Blobs are split minimally.
75
    MAXIMAL_SPLIT  // Blobs are split maximally.
76
  };
77
78
  ShiroRekhaSplitter();
79
  virtual ~ShiroRekhaSplitter();
80
81
  // Top-level method to perform splitting based on current settings.
82
  // Returns true if a split was actually performed.
83
  // If split_for_pageseg is true, the pageseg_split_strategy_ is used for
84
  // splitting. If false, the ocr_split_strategy_ is used.
85
  bool Split(bool split_for_pageseg, DebugPixa *pixa_debug);
86
87
  // Clears the memory held by this object.
88
  void Clear();
89
90
  // Refreshes the words in the segmentation block list by using blobs in the
91
  // input blob list.
92
  // The segmentation block list must be set.
93
  void RefreshSegmentationWithNewBlobs(C_BLOB_LIST *new_blobs);
94
95
  // Returns true if the split strategies for pageseg and ocr are different.
96
15.4k
  bool HasDifferentSplitStrategies() const {
97
15.4k
    return pageseg_split_strategy_ != ocr_split_strategy_;
98
15.4k
  }
99
100
  // This only keeps a copy of the block list pointer. At split call, the list
101
  // object should still be alive. This block list is used as a golden
102
  // segmentation when performing splitting.
103
15.4k
  void set_segmentation_block_list(BLOCK_LIST *block_list) {
104
15.4k
    segmentation_block_list_ = block_list;
105
15.4k
  }
106
107
  static const int kUnspecifiedXheight = -1;
108
109
0
  void set_global_xheight(int xheight) {
110
0
    global_xheight_ = xheight;
111
0
  }
112
113
0
  void set_perform_close(bool perform) {
114
0
    perform_close_ = perform;
115
0
  }
116
117
  // Returns the image obtained from shiro-rekha splitting. The returned object
118
  // is owned by this class. Callers may want to clone the returned pix to keep
119
  // it alive beyond the life of ShiroRekhaSplitter object.
120
0
  Image splitted_image() {
121
0
    return splitted_image_;
122
0
  }
123
124
  // On setting the input image, a clone of it is owned by this class.
125
  void set_orig_pix(Image pix);
126
127
  // Returns the input image provided to the object. This object is owned by
128
  // this class. Callers may want to clone the returned pix to work with it.
129
30.9k
  Image orig_pix() {
130
30.9k
    return orig_pix_;
131
30.9k
  }
132
133
0
  SplitStrategy ocr_split_strategy() const {
134
0
    return ocr_split_strategy_;
135
0
  }
136
137
15.4k
  void set_ocr_split_strategy(SplitStrategy strategy) {
138
15.4k
    ocr_split_strategy_ = strategy;
139
15.4k
  }
140
141
0
  SplitStrategy pageseg_split_strategy() const {
142
0
    return pageseg_split_strategy_;
143
0
  }
144
145
15.4k
  void set_pageseg_split_strategy(SplitStrategy strategy) {
146
15.4k
    pageseg_split_strategy_ = strategy;
147
15.4k
  }
148
149
0
  BLOCK_LIST *segmentation_block_list() {
150
0
    return segmentation_block_list_;
151
0
  }
152
153
  // This method returns the computed mode-height of blobs in the pix.
154
  // It also prunes very small blobs from calculation. Could be used to provide
155
  // a global xheight estimate for images which have the same point-size text.
156
  static int GetModeHeight(Image pix);
157
158
private:
159
  // Method to perform a close operation on the input image. The xheight
160
  // estimate decides the size of sel used.
161
  static void PerformClose(Image pix, int xheight_estimate);
162
163
  // This method resolves the cc bbox to a particular row and returns the row's
164
  // xheight. This uses block_list_ if available, else just returns the
165
  // global_xheight_ estimate currently set in the object.
166
  int GetXheightForCC(Box *cc_bbox);
167
168
  // Returns a list of regions (boxes) which should be cleared in the original
169
  // image so as to perform shiro-rekha splitting. Pix is assumed to carry one
170
  // (or less) word only. Xheight measure could be the global estimate, the row
171
  // estimate, or unspecified. If unspecified, over splitting may occur, since a
172
  // conservative estimate of stroke width along with an associated multiplier
173
  // is used in its place. It is advisable to have a specified xheight when
174
  // splitting for classification/training.
175
  void SplitWordShiroRekha(SplitStrategy split_strategy, Image pix, int xheight, int word_left,
176
                           int word_top, Boxa *regions_to_clear);
177
178
  // Returns a new box object for the corresponding TBOX, based on the original
179
  // image's coordinate system.
180
  Box *GetBoxForTBOX(const TBOX &tbox) const;
181
182
  // This method returns y-extents of the shiro-rekha computed from the input
183
  // word image.
184
  static void GetShiroRekhaYExtents(Image word_pix, int *shirorekha_top, int *shirorekha_bottom,
185
                                    int *shirorekha_ylevel);
186
187
  Image orig_pix_;       // Just a clone of the input image passed.
188
  Image splitted_image_; // Image produced after the last splitting round. The
189
                        // object is owned by this class.
190
  SplitStrategy pageseg_split_strategy_;
191
  SplitStrategy ocr_split_strategy_;
192
  Image debug_image_;
193
  // This block list is used as a golden segmentation when performing splitting.
194
  BLOCK_LIST *segmentation_block_list_;
195
  int global_xheight_;
196
  bool perform_close_; // Whether a morphological close operation should be
197
                       // performed before CCs are run through splitting.
198
};
199
200
} // namespace tesseract.
201
202
#endif // TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_