Coverage Report

Created: 2025-06-13 07:15

/src/tesseract/src/ccstruct/linlsq.h
Line
Count
Source (jump to first uncovered line)
1
/**********************************************************************
2
 * File:        linlsq.h  (Formerly llsq.h)
3
 * Description: Linear Least squares fitting code.
4
 * Author:      Ray Smith
5
 *
6
 * (C) Copyright 1991, Hewlett-Packard Ltd.
7
 ** Licensed under the Apache License, Version 2.0 (the "License");
8
 ** you may not use this file except in compliance with the License.
9
 ** You may obtain a copy of the License at
10
 ** http://www.apache.org/licenses/LICENSE-2.0
11
 ** Unless required by applicable law or agreed to in writing, software
12
 ** distributed under the License is distributed on an "AS IS" BASIS,
13
 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 ** See the License for the specific language governing permissions and
15
 ** limitations under the License.
16
 *
17
 **********************************************************************/
18
19
#ifndef TESSERACT_CCSTRUCT_LINLSQ_H_
20
#define TESSERACT_CCSTRUCT_LINLSQ_H_
21
22
#include "points.h" // for FCOORD
23
24
#include <algorithm> // for std::nth_element
25
#include <cstdint> // for int32_t
26
27
namespace tesseract {
28
29
class TESS_API LLSQ {
30
public:
31
1.39M
  LLSQ() {   // constructor
32
1.39M
    clear(); // set to zeros
33
1.39M
  }
34
  void clear(); // initialize
35
36
  // Adds an element with a weight of 1.
37
  void add(double x, double y);
38
  // Adds an element with a specified weight.
39
  void add(double x, double y, double weight);
40
  // Adds a whole LLSQ.
41
  void add(const LLSQ &other);
42
  // Deletes an element with a weight of 1.
43
  void remove(double x, double y);
44
1.20M
  int32_t count() const { // no of elements
45
1.20M
    return static_cast<int>(total_weight + 0.5);
46
1.20M
  }
47
48
  double m() const;                     // get gradient
49
  double c(double m) const;             // get constant
50
  double rms(double m, double c) const; // get error
51
  double pearson() const;               // get correlation coefficient.
52
53
  // Returns the x,y means as an FCOORD.
54
  FCOORD mean_point() const;
55
56
  // Returns the average sum of squared perpendicular error from a line
57
  // through mean_point() in the direction dir.
58
  double rms_orth(const FCOORD &dir) const;
59
60
  // Returns the direction of the fitted line as a unit vector, using the
61
  // least mean squared perpendicular distance. The line runs through the
62
  // mean_point, i.e. a point p on the line is given by:
63
  // p = mean_point() + lambda * vector_fit() for some real number lambda.
64
  // Note that the result (0<=x<=1, -1<=y<=-1) is directionally ambiguous
65
  // and may be negated without changing its meaning, since a line is only
66
  // unique to a range of pi radians.
67
  // Modernists prefer to think of this as an Eigenvalue problem, but
68
  // Pearson had the simple solution in 1901.
69
  //
70
  // Note that this is equivalent to returning the Principal Component in PCA,
71
  // or the eigenvector corresponding to the largest eigenvalue in the
72
  // covariance matrix.
73
  FCOORD vector_fit() const;
74
75
  // Returns the covariance.
76
91.0k
  double covariance() const {
77
91.0k
    if (total_weight > 0.0) {
78
91.0k
      return (sigxy - sigx * sigy / total_weight) / total_weight;
79
91.0k
    } else {
80
0
      return 0.0;
81
0
    }
82
91.0k
  }
83
1.33M
  double x_variance() const {
84
1.33M
    if (total_weight > 0.0) {
85
1.33M
      return (sigxx - sigx * sigx / total_weight) / total_weight;
86
1.33M
    } else {
87
0
      return 0.0;
88
0
    }
89
1.33M
  }
90
1.24M
  double y_variance() const {
91
1.24M
    if (total_weight > 0.0) {
92
1.24M
      return (sigyy - sigy * sigy / total_weight) / total_weight;
93
1.24M
    } else {
94
0
      return 0.0;
95
0
    }
96
1.24M
  }
97
98
private:
99
  double total_weight; // no of elements or sum of weights.
100
  double sigx;         // sum of x
101
  double sigy;         // sum of y
102
  double sigxx;        // sum x squared
103
  double sigxy;        // sum of xy
104
  double sigyy;        // sum y squared
105
};
106
107
// Returns the median value of the vector, given that the values are
108
// circular, with the given modulus. Values may be signed or unsigned,
109
// eg range from -pi to pi (modulus 2pi) or from 0 to 2pi (modulus 2pi).
110
// NOTE that the array is shuffled, but the time taken is linear.
111
// An assumption is made that most of the values are spread over no more than
112
// half the range, but wrap-around is accounted for if the median is near
113
// the wrap-around point.
114
// Cannot be a member of vector, as it makes heavy use of LLSQ.
115
// T must be an integer or float/double type.
116
template <typename T>
117
40.1k
T MedianOfCircularValues(T modulus, std::vector<T> &v) {
118
40.1k
  LLSQ stats;
119
40.1k
  T halfrange = static_cast<T>(modulus / 2);
120
40.1k
  auto num_elements = v.size();
121
690k
  for (auto i : v) {
122
690k
    stats.add(i, i + halfrange);
123
690k
  }
124
40.1k
  bool offset_needed = stats.y_variance() < stats.x_variance();
125
40.1k
  if (offset_needed) {
126
258k
    for (auto i : v) {
127
258k
      i += halfrange;
128
258k
    }
129
15.0k
  }
130
40.1k
  auto median_index = num_elements / 2;
131
40.1k
  std::nth_element(v.begin(), v.begin() + median_index, v.end());
132
40.1k
  if (offset_needed) {
133
258k
    for (auto i : v) {
134
258k
      i -= halfrange;
135
258k
    }
136
15.0k
  }
137
40.1k
  return v[median_index];
138
40.1k
}
139
140
} // namespace tesseract
141
142
#endif // TESSERACT_CCSTRUCT_LINLSQ_H_