/src/tesseract/src/wordrec/associate.h
Line | Count | Source (jump to first uncovered line) |
1 | | /////////////////////////////////////////////////////////////////////// |
2 | | // File: associate.h |
3 | | // Description: Structs, classes, typedefs useful for the segmentation |
4 | | // search. Functions for scoring segmentation paths according |
5 | | // to their character widths, gap widths and seam cuts. |
6 | | // Author: Daria Antonova |
7 | | // Created: Mon Mar 8 11:26:43 PDT 2010 |
8 | | // |
9 | | // (C) Copyright 2010, Google Inc. |
10 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
11 | | // you may not use this file except in compliance with the License. |
12 | | // You may obtain a copy of the License at |
13 | | // http://www.apache.org/licenses/LICENSE-2.0 |
14 | | // Unless required by applicable law or agreed to in writing, software |
15 | | // distributed under the License is distributed on an "AS IS" BASIS, |
16 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
17 | | // See the License for the specific language governing permissions and |
18 | | // limitations under the License. |
19 | | // |
20 | | /////////////////////////////////////////////////////////////////////// |
21 | | |
22 | | #ifndef ASSOCIATE_H |
23 | | #define ASSOCIATE_H |
24 | | |
25 | | #include "blobs.h" |
26 | | #include "elst.h" |
27 | | #include "ratngs.h" |
28 | | #include "seam.h" |
29 | | #include "split.h" |
30 | | |
31 | | namespace tesseract { |
32 | | |
33 | | class WERD_RES; |
34 | | |
35 | | // Statistics about character widths, gaps and seams. |
36 | | struct AssociateStats { |
37 | 26.3M | AssociateStats() { |
38 | 26.3M | Clear(); |
39 | 26.3M | } |
40 | | |
41 | 52.5M | void Clear() { |
42 | 52.5M | shape_cost = 0.0f; |
43 | 52.5M | bad_shape = false; |
44 | 52.5M | full_wh_ratio = 0.0f; |
45 | 52.5M | full_wh_ratio_total = 0.0f; |
46 | 52.5M | full_wh_ratio_var = 0.0f; |
47 | 52.5M | bad_fixed_pitch_right_gap = false; |
48 | 52.5M | bad_fixed_pitch_wh_ratio = false; |
49 | 52.5M | gap_sum = 0; |
50 | 52.5M | } |
51 | | |
52 | 0 | void Print() { |
53 | 0 | tprintf("AssociateStats: s(%g %d)\n", shape_cost, bad_shape); |
54 | 0 | } |
55 | | |
56 | | float shape_cost; // cost of blob shape |
57 | | bool bad_shape; // true if the shape of the blob is unacceptable |
58 | | float full_wh_ratio; // width-to-height ratio + gap on the right |
59 | | float full_wh_ratio_total; // sum of width-to-height ratios |
60 | | // on the path terminating at this blob |
61 | | float full_wh_ratio_var; // variance of full_wh_ratios on the path |
62 | | bool bad_fixed_pitch_right_gap; // true if there is no gap before |
63 | | // the blob on the right |
64 | | bool bad_fixed_pitch_wh_ratio; // true if the blobs has width-to-height |
65 | | // ratio > kMaxFixedPitchCharAspectRatio |
66 | | int gap_sum; // sum of gaps within the blob |
67 | | }; |
68 | | |
69 | | // Utility functions for scoring segmentation paths according to their |
70 | | // character widths, gap widths, seam characteristics. |
71 | | class AssociateUtils { |
72 | | public: |
73 | | static const float kMaxFixedPitchCharAspectRatio; |
74 | | static const float kMinGap; |
75 | | |
76 | | // Returns outline length of the given blob is computed as: |
77 | | // rating_cert_scale * rating / certainty |
78 | | // Since from Wordrec::SegSearch() in segsearch.cpp |
79 | | // rating_cert_scale = -1.0 * getDict().certainty_scale / rating_scale |
80 | | // And from Classify::ConvertMatchesToChoices() in adaptmatch.cpp |
81 | | // Rating = Certainty = next.rating |
82 | | // Rating *= rating_scale * Results->BlobLength |
83 | | // Certainty *= -(getDict().certainty_scale) |
84 | 58.4M | static inline float ComputeOutlineLength(float rating_cert_scale, const BLOB_CHOICE &b) { |
85 | 58.4M | return rating_cert_scale * b.rating() / b.certainty(); |
86 | 58.4M | } |
87 | 0 | static inline float ComputeRating(float rating_cert_scale, float cert, int width) { |
88 | 0 | return static_cast<float>(width) * cert / rating_cert_scale; |
89 | 0 | } |
90 | | |
91 | | // Computes character widths, gaps and seams stats given the |
92 | | // AssociateStats of the path so far, col, row of the blob that |
93 | | // is being added to the path, and WERD_RES containing information |
94 | | // about character widths, gaps and seams. |
95 | | // Fills associate_cost with the combined shape, gap and seam cost |
96 | | // of adding a unichar from (col, row) to the path (note that since |
97 | | // this function could be used to compute the prioritization for |
98 | | // pain points, (col, row) entry might not be classified yet; thus |
99 | | // information in the (col, row) entry of the ratings matrix is not used). |
100 | | // |
101 | | // Note: the function assumes that word_res, stats and |
102 | | // associate_cost pointers are not nullptr. |
103 | | static void ComputeStats(int col, int row, const AssociateStats *parent_stats, |
104 | | int parent_path_length, bool fixed_pitch, float max_char_wh_ratio, |
105 | | WERD_RES *word_res, bool debug, AssociateStats *stats); |
106 | | |
107 | | // Returns the width cost for fixed-pitch text. |
108 | | static float FixedPitchWidthCost(float norm_width, float right_gap, bool end_pos, |
109 | | float max_char_wh_ratio); |
110 | | |
111 | | // Returns the gap cost for fixed-pitch text (penalizes vertically |
112 | | // overlapping components). |
113 | 0 | static inline float FixedPitchGapCost(float norm_gap, bool end_pos) { |
114 | 0 | return (norm_gap < 0.05 && !end_pos) ? 5.0f : 0.0f; |
115 | 0 | } |
116 | | }; |
117 | | |
118 | | } // namespace tesseract |
119 | | |
120 | | #endif |