Coverage Report

Created: 2025-06-13 07:02

/src/tesseract/src/classify/shapeclassifier.h
Line
Count
Source (jump to first uncovered line)
1
///////////////////////////////////////////////////////////////////////
2
// File:        shapeclassifier.h
3
// Description: Base interface class for classifiers that return a
4
//              shape index.
5
// Author:      Ray Smith
6
//
7
// (C) Copyright 2011, Google Inc.
8
// Licensed under the Apache License, Version 2.0 (the "License");
9
// you may not use this file except in compliance with the License.
10
// You may obtain a copy of the License at
11
// http://www.apache.org/licenses/LICENSE-2.0
12
// Unless required by applicable law or agreed to in writing, software
13
// distributed under the License is distributed on an "AS IS" BASIS,
14
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
// See the License for the specific language governing permissions and
16
// limitations under the License.
17
//
18
///////////////////////////////////////////////////////////////////////
19
20
#ifndef TESSERACT_CLASSIFY_SHAPECLASSIFIER_H_
21
#define TESSERACT_CLASSIFY_SHAPECLASSIFIER_H_
22
23
#include "image.h"
24
25
#include <tesseract/unichar.h>
26
27
#include <vector>
28
29
struct Pix;
30
31
namespace tesseract {
32
33
class ScrollView;
34
class UNICHARSET;
35
36
struct ShapeRating;
37
class ShapeTable;
38
class TrainingSample;
39
class TrainingSampleSet;
40
struct UnicharRating;
41
42
// Interface base class for classifiers that produce ShapeRating results.
43
class TESS_API ShapeClassifier {
44
public:
45
0
  virtual ~ShapeClassifier() = default;
46
47
  // Classifies the given [training] sample, writing to results.
48
  // If page_pix is not nullptr, the overriding function may call
49
  // sample.GetSamplePix(padding, page_pix) to get an image of the sample
50
  // padded (with real image data) by the given padding to extract features
51
  // from the image of the character. Other members of TrainingSample:
52
  // features(), micro_features(), cn_feature(), geo_feature() may be used
53
  // to get the appropriate tesseract features.
54
  // If debug is non-zero, then various degrees of classifier dependent debug
55
  // information is provided.
56
  // If keep_this (a UNICHAR_ID) is >= 0, then the results should always
57
  // contain keep_this, and (if possible) anything of intermediate confidence.
58
  // (Used for answering "Why didn't it get that right?" questions.) It must
59
  // be a UNICHAR_ID as the callers have no clue how to choose the best shape
60
  // that may contain a desired answer.
61
  // The return value is the number of classes saved in results.
62
  // NOTE that overriding functions MUST clear and sort the results by
63
  // descending rating unless the classifier is working with a team of such
64
  // classifiers.
65
  // NOTE: Neither overload of ClassifySample is pure, but at least one must
66
  // be overridden by a classifier in order for it to do anything.
67
  virtual int UnicharClassifySample(const TrainingSample &sample, Image page_pix, int debug,
68
                                    UNICHAR_ID keep_this, std::vector<UnicharRating> *results);
69
70
protected:
71
  virtual int ClassifySample(const TrainingSample &sample, Image page_pix, int debug,
72
                             UNICHAR_ID keep_this, std::vector<ShapeRating> *results);
73
74
public:
75
  // Returns the shape that contains unichar_id that has the best result.
76
  // If result is not nullptr, it is set with the shape_id and rating.
77
  // Returns -1 if ClassifySample fails to provide any result containing
78
  // unichar_id. BestShapeForUnichar does not need to be overridden if
79
  // ClassifySample respects the keep_this rule.
80
  virtual int BestShapeForUnichar(const TrainingSample &sample, Image page_pix,
81
                                  UNICHAR_ID unichar_id, ShapeRating *result);
82
83
  // Provides access to the ShapeTable that this classifier works with.
84
  virtual const ShapeTable *GetShapeTable() const = 0;
85
  // Provides access to the UNICHARSET that this classifier works with.
86
  // Must be overridden IFF GetShapeTable() returns nullptr.
87
  virtual const UNICHARSET &GetUnicharset() const;
88
89
  // Visual debugger classifies the given sample, displays the results and
90
  // solicits user input to display other classifications. Returns when
91
  // the user has finished with debugging the sample.
92
  // Probably doesn't need to be overridden if the subclass provides
93
  // DisplayClassifyAs.
94
  void DebugDisplay(const TrainingSample &sample, Image page_pix, UNICHAR_ID unichar_id);
95
96
  // Displays classification as the given unichar_id. Creates as many windows
97
  // as it feels fit, using index as a guide for placement. Adds any created
98
  // windows to the windows output and returns a new index that may be used
99
  // by any subsequent classifiers. Caller waits for the user to view and
100
  // then destroys the windows by clearing the vector.
101
  virtual int DisplayClassifyAs(const TrainingSample &sample, Image page_pix, UNICHAR_ID unichar_id,
102
                                int index, std::vector<ScrollView *> &windows);
103
104
  // Prints debug information on the results. context is some introductory/title
105
  // message.
106
  virtual void UnicharPrintResults(const char *context,
107
                                   const std::vector<UnicharRating> &results) const;
108
  virtual void PrintResults(const char *context, const std::vector<ShapeRating> &results) const;
109
110
protected:
111
  // Removes any result that has all its unichars covered by a better choice,
112
  // regardless of font.
113
  void FilterDuplicateUnichars(std::vector<ShapeRating> *results) const;
114
};
115
116
} // namespace tesseract.
117
118
#endif // TESSERACT_CLASSIFY_SHAPECLASSIFIER_H_