Coverage Report

Created: 2026-04-01 07:03

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/tesseract/src/classify/normfeat.cpp
Line
Count
Source
1
/******************************************************************************
2
 ** Filename:    normfeat.c
3
 ** Purpose:     Definition of char normalization features.
4
 ** Author:      Dan Johnson
5
 **
6
 ** (c) Copyright Hewlett-Packard Company, 1988.
7
 ** Licensed under the Apache License, Version 2.0 (the "License");
8
 ** you may not use this file except in compliance with the License.
9
 ** You may obtain a copy of the License at
10
 ** http://www.apache.org/licenses/LICENSE-2.0
11
 ** Unless required by applicable law or agreed to in writing, software
12
 ** distributed under the License is distributed on an "AS IS" BASIS,
13
 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 ** See the License for the specific language governing permissions and
15
 ** limitations under the License.
16
 ******************************************************************************/
17
18
#include "normfeat.h"
19
20
#include "featdefs.h"
21
#include "intfx.h"
22
#include "mfoutline.h"
23
24
namespace tesseract {
25
26
/** Return the length of the outline in baseline normalized form. */
27
1.70M
float ActualOutlineLength(FEATURE Feature) {
28
1.70M
  return (Feature->Params[CharNormLength] * LENGTH_COMPRESSION);
29
1.70M
}
30
31
/**
32
 * Return the character normalization feature for a blob.
33
 *
34
 * The features returned are in a scale where the x-height has been
35
 * normalized to live in the region y = [-0.25 .. 0.25].  Example ranges
36
 * for English below are based on the Linux font collection on 2009-12-04:
37
 *
38
 *  - Params[CharNormY]
39
 *     - The y coordinate of the grapheme's centroid.
40
 *     - English: [-0.27, 0.71]
41
 *
42
 *  - Params[CharNormLength]
43
 *     - The length of the grapheme's outline (tiny segments discarded),
44
 *     divided by 10.0=LENGTH_COMPRESSION.
45
 *     - English: [0.16, 0.85]
46
 *
47
 *  - Params[CharNormRx]
48
 *     - The radius of gyration about the x axis, as measured from CharNormY.
49
 *     - English: [0.011, 0.34]
50
 *
51
 *  - Params[CharNormRy]
52
 *     - The radius of gyration about the y axis, as measured from
53
 *     the x center of the grapheme's bounding box.
54
 *     - English: [0.011, 0.31]
55
 */
56
0
FEATURE_SET ExtractCharNormFeatures(const INT_FX_RESULT_STRUCT &fx_info) {
57
0
  auto feature_set = new FEATURE_SET_STRUCT(1);
58
0
  auto feature = new FEATURE_STRUCT(&CharNormDesc);
59
60
0
  feature->Params[CharNormY] = MF_SCALE_FACTOR * (fx_info.Ymean - kBlnBaselineOffset);
61
0
  feature->Params[CharNormLength] = MF_SCALE_FACTOR * fx_info.Length / LENGTH_COMPRESSION;
62
0
  feature->Params[CharNormRx] = MF_SCALE_FACTOR * fx_info.Rx;
63
0
  feature->Params[CharNormRy] = MF_SCALE_FACTOR * fx_info.Ry;
64
65
0
  AddFeature(feature_set, feature);
66
67
0
  return feature_set;
68
0
} /* ExtractCharNormFeatures */
69
70
} // namespace tesseract