/src/libreoffice/sc/inc/kahan.hxx
Line | Count | Source |
1 | | /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; fill-column: 100 -*- */ |
2 | | /* |
3 | | * This file is part of the LibreOffice project. |
4 | | * |
5 | | * This Source Code Form is subject to the terms of the Mozilla Public |
6 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
7 | | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
8 | | */ |
9 | | |
10 | | #pragma once |
11 | | |
12 | | #include <o3tl/untaint.hxx> |
13 | | #include <rtl/math.hxx> |
14 | | #include <cmath> |
15 | | |
16 | | class KahanSum; |
17 | | namespace sc::op |
18 | | { |
19 | | // Checkout available optimization options. |
20 | | // Note that it turned out to be problematic to support CPU-specific code |
21 | | // that's not guaranteed to be available on that specific platform (see |
22 | | // git commit 2d36e7f5186ba5215f2b228b98c24520bd4f2882). SSE2 is guaranteed on |
23 | | // x86_64 and it is our baseline requirement for x86 on Windows, so SSE2 use is |
24 | | // hardcoded on those platforms. |
25 | | // Whenever we raise baseline to e.g. AVX, this may get |
26 | | // replaced with AVX code (get it from mentioned git commit). |
27 | | // Do it similarly with other platforms. |
28 | | #if defined(X86_64) || (defined(INTEL) && defined(_WIN32)) |
29 | | #define SC_USE_SSE2 1 |
30 | | KahanSum executeSSE2(size_t& i, size_t nSize, const double* pCurrent); |
31 | | #else |
32 | | #define SC_USE_SSE2 0 |
33 | | #endif |
34 | | } |
35 | | |
36 | | /** |
37 | | * This class provides LO with Kahan summation algorithm |
38 | | * About this algorithm: https://en.wikipedia.org/wiki/Kahan_summation_algorithm |
39 | | * For general purpose software we assume first order error is enough. |
40 | | * |
41 | | * Additionally queue and remember the last recent non-zero value and add it |
42 | | * similar to approxAdd() when obtaining the final result to further eliminate |
43 | | * accuracy errors. (e.g. for the dreaded 0.1 + 0.2 - 0.3 != 0.0) |
44 | | */ |
45 | | |
46 | | class KahanSum |
47 | | { |
48 | | public: |
49 | 1.83k | constexpr KahanSum() = default; |
50 | | |
51 | | constexpr KahanSum(double x_0) |
52 | 115k | : m_fSum(x_0) |
53 | 115k | { |
54 | 115k | } |
55 | | |
56 | | constexpr KahanSum(double x_0, double err_0) |
57 | 14.1k | : m_fSum(x_0) |
58 | 14.1k | , m_fError(err_0) |
59 | 14.1k | { |
60 | 14.1k | } |
61 | | |
62 | | constexpr KahanSum(const KahanSum& fSum) = default; |
63 | | |
64 | | public: |
65 | | /** |
66 | | * Performs one step of the Neumaier sum of doubles. |
67 | | * Overwrites the summand and error. |
68 | | * T could be double or long double. |
69 | | */ |
70 | | template <typename T> static inline void sumNeumaierNormal(T& sum, T& err, const double& value) |
71 | 106k | { |
72 | 106k | T t = sum + value; |
73 | 106k | if (std::abs(sum) >= std::abs(value)) |
74 | 67.3k | err += (sum - t) + value; |
75 | 39.1k | else |
76 | 39.1k | err += (value - t) + sum; |
77 | 106k | sum = t; |
78 | 106k | } |
79 | | |
80 | | /** |
81 | | * Adds a value to the sum using Kahan summation. |
82 | | * @param x_i |
83 | | */ |
84 | | void add(double x_i) |
85 | 326k | { |
86 | 326k | if (x_i == 0.0) |
87 | 177k | return; |
88 | | |
89 | 148k | if (!m_fMem) |
90 | 43.5k | { |
91 | 43.5k | m_fMem = x_i; |
92 | 43.5k | return; |
93 | 43.5k | } |
94 | | |
95 | 105k | sumNeumaierNormal(m_fSum, m_fError, m_fMem); |
96 | 105k | m_fMem = x_i; |
97 | 105k | } |
98 | | |
99 | | /** |
100 | | * Adds a value to the sum using Kahan summation. |
101 | | * @param fSum |
102 | | */ |
103 | | inline void add(const KahanSum& fSum) |
104 | 53.7k | { |
105 | 53.7k | #if SC_USE_SSE2 |
106 | 53.7k | add(fSum.m_fSum + fSum.m_fError); |
107 | 53.7k | add(fSum.m_fMem); |
108 | | #else |
109 | | // Without SSE2 the sum+compensation value fails badly. Continue |
110 | | // keeping the old though slightly off (see tdf#156985) explicit |
111 | | // addition of the compensation value. |
112 | | double sum = fSum.m_fSum; |
113 | | double err = fSum.m_fError; |
114 | | if (fSum.m_fMem != 0.0) |
115 | | sumNeumaierNormal(sum, err, fSum.m_fMem); |
116 | | add(sum); |
117 | | add(err); |
118 | | #endif |
119 | 53.7k | } |
120 | | |
121 | | /** |
122 | | * Substracts a value to the sum using Kahan summation. |
123 | | * @param fSum |
124 | | */ |
125 | 1.43k | inline void subtract(const KahanSum& fSum) { add(-fSum); } |
126 | | |
127 | | public: |
128 | | constexpr KahanSum operator-() const |
129 | 1.43k | { |
130 | 1.43k | KahanSum fKahanSum; |
131 | 1.43k | fKahanSum.m_fSum = -m_fSum; |
132 | 1.43k | fKahanSum.m_fError = -m_fError; |
133 | 1.43k | fKahanSum.m_fMem = -m_fMem; |
134 | 1.43k | return fKahanSum; |
135 | 1.43k | } |
136 | | |
137 | | constexpr KahanSum& operator=(double fSum) |
138 | 44.1k | { |
139 | 44.1k | m_fSum = fSum; |
140 | 44.1k | m_fError = 0; |
141 | 44.1k | m_fMem = 0; |
142 | 44.1k | return *this; |
143 | 44.1k | } |
144 | | |
145 | | constexpr KahanSum& operator=(const KahanSum& fSum) = default; |
146 | | |
147 | 52.3k | inline void operator+=(const KahanSum& fSum) { add(fSum); } |
148 | | |
149 | 191k | inline void operator+=(double fSum) { add(fSum); } |
150 | | |
151 | 1.43k | inline void operator-=(const KahanSum& fSum) { subtract(fSum); } |
152 | | |
153 | 17 | inline void operator-=(double fSum) { add(-fSum); } |
154 | | |
155 | | inline KahanSum operator+(double fSum) const |
156 | 527 | { |
157 | 527 | KahanSum fNSum(*this); |
158 | 527 | fNSum.add(fSum); |
159 | 527 | return fNSum; |
160 | 527 | } |
161 | | |
162 | | inline KahanSum operator+(const KahanSum& fSum) const |
163 | 0 | { |
164 | 0 | KahanSum fNSum(*this); |
165 | 0 | fNSum += fSum; |
166 | 0 | return fNSum; |
167 | 0 | } |
168 | | |
169 | | inline KahanSum operator-(double fSum) const |
170 | 0 | { |
171 | 0 | KahanSum fNSum(*this); |
172 | 0 | fNSum.add(-fSum); |
173 | 0 | return fNSum; |
174 | 0 | } |
175 | | |
176 | | inline KahanSum operator-(const KahanSum& fSum) const |
177 | 1.43k | { |
178 | 1.43k | KahanSum fNSum(*this); |
179 | 1.43k | fNSum -= fSum; |
180 | 1.43k | return fNSum; |
181 | 1.43k | } |
182 | | |
183 | | /** |
184 | | * In some parts of the code of interpr_.cxx this may be used for |
185 | | * product instead of sum. This operator shall be used for that task. |
186 | | */ |
187 | | constexpr void operator*=(double fTimes) |
188 | 4.24k | { |
189 | 4.24k | if (m_fMem) |
190 | 5 | { |
191 | 5 | m_fSum = get() * fTimes; |
192 | 5 | m_fMem = 0.0; |
193 | 5 | } |
194 | 4.23k | else |
195 | 4.23k | { |
196 | 4.23k | m_fSum = (m_fSum + m_fError) * fTimes; |
197 | 4.23k | } |
198 | 4.24k | m_fError = 0.0; |
199 | 4.24k | } |
200 | | |
201 | | constexpr void operator/=(double fDivides) |
202 | 0 | { |
203 | 0 | if (m_fMem) |
204 | 0 | { |
205 | 0 | m_fSum = get() / fDivides; |
206 | 0 | m_fMem = 0.0; |
207 | 0 | } |
208 | 0 | else |
209 | 0 | { |
210 | 0 | m_fSum = (m_fSum + m_fError) / fDivides; |
211 | 0 | } |
212 | 0 | m_fError = 0.0; |
213 | 0 | } |
214 | | |
215 | 131 | inline KahanSum operator*(const KahanSum& fTimes) const { return get() * fTimes.get(); } |
216 | | |
217 | 1.82k | inline KahanSum operator*(double fTimes) const { return get() * fTimes; } |
218 | | |
219 | | inline KahanSum operator/(const KahanSum& fDivides) const |
220 | 0 | { |
221 | 0 | return o3tl::div_allow_zero(get(), fDivides.get()); |
222 | 0 | } |
223 | | |
224 | 118 | inline KahanSum operator/(double fDivides) const { return get() / fDivides; } |
225 | | |
226 | 0 | inline bool operator<(const KahanSum& fSum) const { return get() < fSum.get(); } |
227 | | |
228 | 1.48k | inline bool operator<(double fSum) const { return get() < fSum; } |
229 | | |
230 | 0 | inline bool operator>(const KahanSum& fSum) const { return get() > fSum.get(); } |
231 | | |
232 | 0 | inline bool operator>(double fSum) const { return get() > fSum; } |
233 | | |
234 | 0 | inline bool operator<=(const KahanSum& fSum) const { return get() <= fSum.get(); } |
235 | | |
236 | 0 | inline bool operator<=(double fSum) const { return get() <= fSum; } |
237 | | |
238 | 0 | inline bool operator>=(const KahanSum& fSum) const { return get() >= fSum.get(); } |
239 | | |
240 | 0 | inline bool operator>=(double fSum) const { return get() >= fSum; } |
241 | | |
242 | 0 | inline bool operator==(const KahanSum& fSum) const { return get() == fSum.get(); } |
243 | | |
244 | 21.8k | inline bool operator!=(const KahanSum& fSum) const { return get() != fSum.get(); } |
245 | | |
246 | | public: |
247 | | /** |
248 | | * Returns the final sum. |
249 | | * @return final sum |
250 | | */ |
251 | | double get() const |
252 | 156k | { |
253 | 156k | const double fTotal = m_fSum + m_fError; |
254 | 156k | if (!m_fMem) |
255 | 129k | return fTotal; |
256 | | |
257 | | // Check the same condition as rtl::math::approxAdd() and if true |
258 | | // return 0.0, if false use another Kahan summation adding m_fMem. |
259 | 27.0k | if (((m_fMem < 0.0 && fTotal > 0.0) || (fTotal < 0.0 && m_fMem > 0.0)) |
260 | 1.68k | && rtl::math::approxEqual(m_fMem, -fTotal)) |
261 | 41 | { |
262 | | /* TODO: should we reset all values to zero here for further |
263 | | * summation, or is it better to keep them as they are? */ |
264 | 41 | return 0.0; |
265 | 41 | } |
266 | | |
267 | | // The actual argument passed to add() here does not matter as long as |
268 | | // it is not 0, m_fMem is not 0 and will be added anyway, see add(). |
269 | 26.9k | const_cast<KahanSum*>(this)->add(m_fMem); |
270 | 26.9k | const_cast<KahanSum*>(this)->m_fMem = 0.0; |
271 | 26.9k | return m_fSum + m_fError; |
272 | 27.0k | } |
273 | | |
274 | | private: |
275 | | double m_fSum = 0; |
276 | | double m_fError = 0; |
277 | | double m_fMem = 0; |
278 | | }; |
279 | | |
280 | | /* vim:set shiftwidth=4 softtabstop=4 expandtab cinoptions=b1,g0,N-s cinkeys+=0=break: */ |