Coverage Report

Created: 2025-07-23 07:12

/src/tesseract/src/wordrec/associate.h
Line
Count
Source (jump to first uncovered line)
1
///////////////////////////////////////////////////////////////////////
2
// File:        associate.h
3
// Description: Structs, classes, typedefs useful for the segmentation
4
//              search. Functions for scoring segmentation paths according
5
//              to their character widths, gap widths and seam cuts.
6
// Author:      Daria Antonova
7
// Created:     Mon Mar 8 11:26:43 PDT 2010
8
//
9
// (C) Copyright 2010, Google Inc.
10
// Licensed under the Apache License, Version 2.0 (the "License");
11
// you may not use this file except in compliance with the License.
12
// You may obtain a copy of the License at
13
// http://www.apache.org/licenses/LICENSE-2.0
14
// Unless required by applicable law or agreed to in writing, software
15
// distributed under the License is distributed on an "AS IS" BASIS,
16
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17
// See the License for the specific language governing permissions and
18
// limitations under the License.
19
//
20
///////////////////////////////////////////////////////////////////////
21
22
#ifndef ASSOCIATE_H
23
#define ASSOCIATE_H
24
25
#include "blobs.h"
26
#include "elst.h"
27
#include "ratngs.h"
28
#include "seam.h"
29
#include "split.h"
30
31
namespace tesseract {
32
33
class WERD_RES;
34
35
// Statistics about character widths, gaps and seams.
36
struct AssociateStats {
37
26.3M
  AssociateStats() {
38
26.3M
    Clear();
39
26.3M
  }
40
41
52.5M
  void Clear() {
42
52.5M
    shape_cost = 0.0f;
43
52.5M
    bad_shape = false;
44
52.5M
    full_wh_ratio = 0.0f;
45
52.5M
    full_wh_ratio_total = 0.0f;
46
52.5M
    full_wh_ratio_var = 0.0f;
47
52.5M
    bad_fixed_pitch_right_gap = false;
48
52.5M
    bad_fixed_pitch_wh_ratio = false;
49
52.5M
    gap_sum = 0;
50
52.5M
  }
51
52
0
  void Print() {
53
0
    tprintf("AssociateStats: s(%g %d)\n", shape_cost, bad_shape);
54
0
  }
55
56
  float shape_cost;               // cost of blob shape
57
  bool bad_shape;                 // true if the shape of the blob is unacceptable
58
  float full_wh_ratio;            // width-to-height ratio + gap on the right
59
  float full_wh_ratio_total;      // sum of width-to-height ratios
60
                                  // on the path terminating at this blob
61
  float full_wh_ratio_var;        // variance of full_wh_ratios on the path
62
  bool bad_fixed_pitch_right_gap; // true if there is no gap before
63
                                  // the blob on the right
64
  bool bad_fixed_pitch_wh_ratio;  // true if the blobs has width-to-height
65
                                  // ratio > kMaxFixedPitchCharAspectRatio
66
  int gap_sum;                    // sum of gaps within the blob
67
};
68
69
// Utility functions for scoring segmentation paths according to their
70
// character widths, gap widths, seam characteristics.
71
class AssociateUtils {
72
public:
73
  static const float kMaxFixedPitchCharAspectRatio;
74
  static const float kMinGap;
75
76
  // Returns outline length of the given blob is computed as:
77
  // rating_cert_scale * rating / certainty
78
  // Since from Wordrec::SegSearch() in segsearch.cpp
79
  // rating_cert_scale = -1.0 * getDict().certainty_scale / rating_scale
80
  // And from Classify::ConvertMatchesToChoices() in adaptmatch.cpp
81
  // Rating = Certainty = next.rating
82
  // Rating *= rating_scale * Results->BlobLength
83
  // Certainty *= -(getDict().certainty_scale)
84
58.4M
  static inline float ComputeOutlineLength(float rating_cert_scale, const BLOB_CHOICE &b) {
85
58.4M
    return rating_cert_scale * b.rating() / b.certainty();
86
58.4M
  }
87
0
  static inline float ComputeRating(float rating_cert_scale, float cert, int width) {
88
0
    return static_cast<float>(width) * cert / rating_cert_scale;
89
0
  }
90
91
  // Computes character widths, gaps and seams stats given the
92
  // AssociateStats of the path so far, col, row of the blob that
93
  // is being added to the path, and WERD_RES containing information
94
  // about character widths, gaps and seams.
95
  // Fills associate_cost with the combined shape, gap and seam cost
96
  // of adding a unichar from (col, row) to the path (note that since
97
  // this function could be used to compute the prioritization for
98
  // pain points, (col, row) entry might not be classified yet; thus
99
  // information in the (col, row) entry of the ratings matrix is not used).
100
  //
101
  // Note: the function assumes that word_res, stats and
102
  // associate_cost pointers are not nullptr.
103
  static void ComputeStats(int col, int row, const AssociateStats *parent_stats,
104
                           int parent_path_length, bool fixed_pitch, float max_char_wh_ratio,
105
                           WERD_RES *word_res, bool debug, AssociateStats *stats);
106
107
  // Returns the width cost for fixed-pitch text.
108
  static float FixedPitchWidthCost(float norm_width, float right_gap, bool end_pos,
109
                                   float max_char_wh_ratio);
110
111
  // Returns the gap cost for fixed-pitch text (penalizes vertically
112
  // overlapping components).
113
0
  static inline float FixedPitchGapCost(float norm_gap, bool end_pos) {
114
0
    return (norm_gap < 0.05 && !end_pos) ? 5.0f : 0.0f;
115
0
  }
116
};
117
118
} // namespace tesseract
119
120
#endif