/src/tesseract/src/ccstruct/linlsq.h
Line | Count | Source (jump to first uncovered line) |
1 | | /********************************************************************** |
2 | | * File: linlsq.h (Formerly llsq.h) |
3 | | * Description: Linear Least squares fitting code. |
4 | | * Author: Ray Smith |
5 | | * |
6 | | * (C) Copyright 1991, Hewlett-Packard Ltd. |
7 | | ** Licensed under the Apache License, Version 2.0 (the "License"); |
8 | | ** you may not use this file except in compliance with the License. |
9 | | ** You may obtain a copy of the License at |
10 | | ** http://www.apache.org/licenses/LICENSE-2.0 |
11 | | ** Unless required by applicable law or agreed to in writing, software |
12 | | ** distributed under the License is distributed on an "AS IS" BASIS, |
13 | | ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | ** See the License for the specific language governing permissions and |
15 | | ** limitations under the License. |
16 | | * |
17 | | **********************************************************************/ |
18 | | |
19 | | #ifndef TESSERACT_CCSTRUCT_LINLSQ_H_ |
20 | | #define TESSERACT_CCSTRUCT_LINLSQ_H_ |
21 | | |
22 | | #include "points.h" // for FCOORD |
23 | | |
24 | | #include <algorithm> // for std::nth_element |
25 | | #include <cstdint> // for int32_t |
26 | | |
27 | | namespace tesseract { |
28 | | |
29 | | class TESS_API LLSQ { |
30 | | public: |
31 | 1.39M | LLSQ() { // constructor |
32 | 1.39M | clear(); // set to zeros |
33 | 1.39M | } |
34 | | void clear(); // initialize |
35 | | |
36 | | // Adds an element with a weight of 1. |
37 | | void add(double x, double y); |
38 | | // Adds an element with a specified weight. |
39 | | void add(double x, double y, double weight); |
40 | | // Adds a whole LLSQ. |
41 | | void add(const LLSQ &other); |
42 | | // Deletes an element with a weight of 1. |
43 | | void remove(double x, double y); |
44 | 1.20M | int32_t count() const { // no of elements |
45 | 1.20M | return static_cast<int>(total_weight + 0.5); |
46 | 1.20M | } |
47 | | |
48 | | double m() const; // get gradient |
49 | | double c(double m) const; // get constant |
50 | | double rms(double m, double c) const; // get error |
51 | | double pearson() const; // get correlation coefficient. |
52 | | |
53 | | // Returns the x,y means as an FCOORD. |
54 | | FCOORD mean_point() const; |
55 | | |
56 | | // Returns the average sum of squared perpendicular error from a line |
57 | | // through mean_point() in the direction dir. |
58 | | double rms_orth(const FCOORD &dir) const; |
59 | | |
60 | | // Returns the direction of the fitted line as a unit vector, using the |
61 | | // least mean squared perpendicular distance. The line runs through the |
62 | | // mean_point, i.e. a point p on the line is given by: |
63 | | // p = mean_point() + lambda * vector_fit() for some real number lambda. |
64 | | // Note that the result (0<=x<=1, -1<=y<=-1) is directionally ambiguous |
65 | | // and may be negated without changing its meaning, since a line is only |
66 | | // unique to a range of pi radians. |
67 | | // Modernists prefer to think of this as an Eigenvalue problem, but |
68 | | // Pearson had the simple solution in 1901. |
69 | | // |
70 | | // Note that this is equivalent to returning the Principal Component in PCA, |
71 | | // or the eigenvector corresponding to the largest eigenvalue in the |
72 | | // covariance matrix. |
73 | | FCOORD vector_fit() const; |
74 | | |
75 | | // Returns the covariance. |
76 | 91.0k | double covariance() const { |
77 | 91.0k | if (total_weight > 0.0) { |
78 | 91.0k | return (sigxy - sigx * sigy / total_weight) / total_weight; |
79 | 91.0k | } else { |
80 | 0 | return 0.0; |
81 | 0 | } |
82 | 91.0k | } |
83 | 1.33M | double x_variance() const { |
84 | 1.33M | if (total_weight > 0.0) { |
85 | 1.33M | return (sigxx - sigx * sigx / total_weight) / total_weight; |
86 | 1.33M | } else { |
87 | 0 | return 0.0; |
88 | 0 | } |
89 | 1.33M | } |
90 | 1.24M | double y_variance() const { |
91 | 1.24M | if (total_weight > 0.0) { |
92 | 1.24M | return (sigyy - sigy * sigy / total_weight) / total_weight; |
93 | 1.24M | } else { |
94 | 0 | return 0.0; |
95 | 0 | } |
96 | 1.24M | } |
97 | | |
98 | | private: |
99 | | double total_weight; // no of elements or sum of weights. |
100 | | double sigx; // sum of x |
101 | | double sigy; // sum of y |
102 | | double sigxx; // sum x squared |
103 | | double sigxy; // sum of xy |
104 | | double sigyy; // sum y squared |
105 | | }; |
106 | | |
107 | | // Returns the median value of the vector, given that the values are |
108 | | // circular, with the given modulus. Values may be signed or unsigned, |
109 | | // eg range from -pi to pi (modulus 2pi) or from 0 to 2pi (modulus 2pi). |
110 | | // NOTE that the array is shuffled, but the time taken is linear. |
111 | | // An assumption is made that most of the values are spread over no more than |
112 | | // half the range, but wrap-around is accounted for if the median is near |
113 | | // the wrap-around point. |
114 | | // Cannot be a member of vector, as it makes heavy use of LLSQ. |
115 | | // T must be an integer or float/double type. |
116 | | template <typename T> |
117 | 40.1k | T MedianOfCircularValues(T modulus, std::vector<T> &v) { |
118 | 40.1k | LLSQ stats; |
119 | 40.1k | T halfrange = static_cast<T>(modulus / 2); |
120 | 40.1k | auto num_elements = v.size(); |
121 | 690k | for (auto i : v) { |
122 | 690k | stats.add(i, i + halfrange); |
123 | 690k | } |
124 | 40.1k | bool offset_needed = stats.y_variance() < stats.x_variance(); |
125 | 40.1k | if (offset_needed) { |
126 | 258k | for (auto i : v) { |
127 | 258k | i += halfrange; |
128 | 258k | } |
129 | 15.0k | } |
130 | 40.1k | auto median_index = num_elements / 2; |
131 | 40.1k | std::nth_element(v.begin(), v.begin() + median_index, v.end()); |
132 | 40.1k | if (offset_needed) { |
133 | 258k | for (auto i : v) { |
134 | 258k | i -= halfrange; |
135 | 258k | } |
136 | 15.0k | } |
137 | 40.1k | return v[median_index]; |
138 | 40.1k | } |
139 | | |
140 | | } // namespace tesseract |
141 | | |
142 | | #endif // TESSERACT_CCSTRUCT_LINLSQ_H_ |