/src/quantlib/ql/math/statistics/generalstatistics.hpp
Line | Count | Source |
1 | | /* -*- mode: c++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ |
2 | | |
3 | | /* |
4 | | Copyright (C) 2003 Ferdinando Ametrano |
5 | | Copyright (C) 2003 RiskMap srl |
6 | | |
7 | | This file is part of QuantLib, a free-software/open-source library |
8 | | for financial quantitative analysts and developers - http://quantlib.org/ |
9 | | |
10 | | QuantLib is free software: you can redistribute it and/or modify it |
11 | | under the terms of the QuantLib license. You should have received a |
12 | | copy of the license along with this program; if not, please email |
13 | | <quantlib-dev@lists.sf.net>. The license is also available online at |
14 | | <https://www.quantlib.org/license.shtml>. |
15 | | |
16 | | This program is distributed in the hope that it will be useful, but WITHOUT |
17 | | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
18 | | FOR A PARTICULAR PURPOSE. See the license for more details. |
19 | | */ |
20 | | |
21 | | /*! \file generalstatistics.hpp |
22 | | \brief statistics tool |
23 | | */ |
24 | | |
25 | | #ifndef quantlib_general_statistics_hpp |
26 | | #define quantlib_general_statistics_hpp |
27 | | |
28 | | #include <ql/utilities/null.hpp> |
29 | | #include <ql/errors.hpp> |
30 | | #include <vector> |
31 | | #include <algorithm> |
32 | | #include <utility> |
33 | | |
34 | | namespace QuantLib { |
35 | | |
36 | | //! Statistics tool |
37 | | /*! This class accumulates a set of data and returns their |
38 | | statistics (e.g: mean, variance, skewness, kurtosis, |
39 | | error estimation, percentile, etc.) based on the empirical |
40 | | distribution (no gaussian assumption) |
41 | | |
42 | | It doesn't suffer the numerical instability problem of |
43 | | IncrementalStatistics. The downside is that it stores all |
44 | | samples, thus increasing the memory requirements. |
45 | | */ |
46 | | class GeneralStatistics { |
47 | | public: |
48 | | typedef Real value_type; |
49 | | GeneralStatistics(); |
50 | | //! \name Inspectors |
51 | | //@{ |
52 | | //! number of samples collected |
53 | | Size samples() const; |
54 | | |
55 | | //! collected data |
56 | | const std::vector<std::pair<Real,Real> >& data() const; |
57 | | |
58 | | //! sum of data weights |
59 | | Real weightSum() const; |
60 | | |
61 | | /*! returns the mean, defined as |
62 | | \f[ \langle x \rangle = \frac{\sum w_i x_i}{\sum w_i}. \f] |
63 | | */ |
64 | | Real mean() const; |
65 | | |
66 | | /*! returns the variance, defined as |
67 | | \f[ \sigma^2 = \frac{N}{N-1} \left\langle \left( |
68 | | x-\langle x \rangle \right)^2 \right\rangle. \f] |
69 | | */ |
70 | | Real variance() const; |
71 | | |
72 | | /*! returns the standard deviation \f$ \sigma \f$, defined as the |
73 | | square root of the variance. |
74 | | */ |
75 | | Real standardDeviation() const; |
76 | | |
77 | | /*! returns the error estimate on the mean value, defined as |
78 | | \f$ \epsilon = \sigma/\sqrt{N}. \f$ |
79 | | */ |
80 | | Real errorEstimate() const; |
81 | | |
82 | | /*! returns the skewness, defined as |
83 | | \f[ \frac{N^2}{(N-1)(N-2)} \frac{\left\langle \left( |
84 | | x-\langle x \rangle \right)^3 \right\rangle}{\sigma^3}. \f] |
85 | | The above evaluates to 0 for a Gaussian distribution. |
86 | | */ |
87 | | Real skewness() const; |
88 | | |
89 | | /*! returns the excess kurtosis, defined as |
90 | | \f[ \frac{N^2(N+1)}{(N-1)(N-2)(N-3)} |
91 | | \frac{\left\langle \left(x-\langle x \rangle \right)^4 |
92 | | \right\rangle}{\sigma^4} - \frac{3(N-1)^2}{(N-2)(N-3)}. \f] |
93 | | The above evaluates to 0 for a Gaussian distribution. |
94 | | */ |
95 | | Real kurtosis() const; |
96 | | |
97 | | /*! returns the minimum sample value */ |
98 | | Real min() const; |
99 | | |
100 | | /*! returns the maximum sample value */ |
101 | | Real max() const; |
102 | | |
103 | | /*! Expectation value of a function \f$ f \f$ on a given |
104 | | range \f$ \mathcal{R} \f$, i.e., |
105 | | \f[ \mathrm{E}\left[f \;|\; \mathcal{R}\right] = |
106 | | \frac{\sum_{x_i \in \mathcal{R}} f(x_i) w_i}{ |
107 | | \sum_{x_i \in \mathcal{R}} w_i}. \f] |
108 | | The range is passed as a boolean function returning |
109 | | <tt>true</tt> if the argument belongs to the range |
110 | | or <tt>false</tt> otherwise. |
111 | | |
112 | | The function returns a pair made of the result and |
113 | | the number of observations in the given range. |
114 | | */ |
115 | | template <class Func, class Predicate> |
116 | | std::pair<Real,Size> expectationValue(const Func& f, |
117 | 2.70k | const Predicate& inRange) const { |
118 | 2.70k | Real num = 0.0, den = 0.0; |
119 | 2.70k | Size N = 0; |
120 | 2.70k | std::vector<std::pair<Real,Real> >::const_iterator i; |
121 | 272k | for (i=samples_.begin(); i!=samples_.end(); ++i) { |
122 | 270k | Real x = i->first, w = i->second; |
123 | 270k | if (inRange(x)) { |
124 | 270k | num += f(x)*w; |
125 | 270k | den += w; |
126 | 270k | N += 1; |
127 | 270k | } |
128 | 270k | } |
129 | 2.70k | if (N == 0) |
130 | 0 | return std::make_pair<Real,Size>(Null<Real>(),0); |
131 | 2.70k | else |
132 | 2.70k | return std::make_pair(num/den,N); |
133 | 2.70k | } generalstatistics.cpp:std::__1::pair<double, unsigned long> QuantLib::GeneralStatistics::expectationValue<QuantLib::GeneralStatistics::mean() const::$_0, QuantLib::GeneralStatistics::expectationValue<QuantLib::GeneralStatistics::mean() const::$_0>(QuantLib::GeneralStatistics::mean() const::$_0 const&) const::{lambda(double)#1}>(QuantLib::GeneralStatistics::mean() const::$_0 const&, QuantLib::GeneralStatistics::expectationValue<QuantLib::GeneralStatistics::mean() const::$_0>(QuantLib::GeneralStatistics::mean() const::$_0 const&) const::{lambda(double)#1} const&) constLine | Count | Source | 117 | 2.02k | const Predicate& inRange) const { | 118 | 2.02k | Real num = 0.0, den = 0.0; | 119 | 2.02k | Size N = 0; | 120 | 2.02k | std::vector<std::pair<Real,Real> >::const_iterator i; | 121 | 204k | for (i=samples_.begin(); i!=samples_.end(); ++i) { | 122 | 202k | Real x = i->first, w = i->second; | 123 | 202k | if (inRange(x)) { | 124 | 202k | num += f(x)*w; | 125 | 202k | den += w; | 126 | 202k | N += 1; | 127 | 202k | } | 128 | 202k | } | 129 | 2.02k | if (N == 0) | 130 | 0 | return std::make_pair<Real,Size>(Null<Real>(),0); | 131 | 2.02k | else | 132 | 2.02k | return std::make_pair(num/den,N); | 133 | 2.02k | } |
generalstatistics.cpp:std::__1::pair<double, unsigned long> QuantLib::GeneralStatistics::expectationValue<QuantLib::GeneralStatistics::variance() const::$_0, QuantLib::GeneralStatistics::expectationValue<QuantLib::GeneralStatistics::variance() const::$_0>(QuantLib::GeneralStatistics::variance() const::$_0 const&) const::{lambda(double)#1}>(QuantLib::GeneralStatistics::variance() const::$_0 const&, QuantLib::GeneralStatistics::expectationValue<QuantLib::GeneralStatistics::variance() const::$_0>(QuantLib::GeneralStatistics::variance() const::$_0 const&) const::{lambda(double)#1} const&) constLine | Count | Source | 117 | 675 | const Predicate& inRange) const { | 118 | 675 | Real num = 0.0, den = 0.0; | 119 | 675 | Size N = 0; | 120 | 675 | std::vector<std::pair<Real,Real> >::const_iterator i; | 121 | 68.1k | for (i=samples_.begin(); i!=samples_.end(); ++i) { | 122 | 67.5k | Real x = i->first, w = i->second; | 123 | 67.5k | if (inRange(x)) { | 124 | 67.5k | num += f(x)*w; | 125 | 67.5k | den += w; | 126 | 67.5k | N += 1; | 127 | 67.5k | } | 128 | 67.5k | } | 129 | 675 | if (N == 0) | 130 | 0 | return std::make_pair<Real,Size>(Null<Real>(),0); | 131 | 675 | else | 132 | 675 | return std::make_pair(num/den,N); | 133 | 675 | } |
Unexecuted instantiation: generalstatistics.cpp:std::__1::pair<double, unsigned long> QuantLib::GeneralStatistics::expectationValue<QuantLib::GeneralStatistics::skewness() const::$_0, QuantLib::GeneralStatistics::expectationValue<QuantLib::GeneralStatistics::skewness() const::$_0>(QuantLib::GeneralStatistics::skewness() const::$_0 const&) const::{lambda(double)#1}>(QuantLib::GeneralStatistics::skewness() const::$_0 const&, QuantLib::GeneralStatistics::expectationValue<QuantLib::GeneralStatistics::skewness() const::$_0>(QuantLib::GeneralStatistics::skewness() const::$_0 const&) const::{lambda(double)#1} const&) constUnexecuted instantiation: generalstatistics.cpp:std::__1::pair<double, unsigned long> QuantLib::GeneralStatistics::expectationValue<QuantLib::GeneralStatistics::kurtosis() const::$_0, QuantLib::GeneralStatistics::expectationValue<QuantLib::GeneralStatistics::kurtosis() const::$_0>(QuantLib::GeneralStatistics::kurtosis() const::$_0 const&) const::{lambda(double)#1}>(QuantLib::GeneralStatistics::kurtosis() const::$_0 const&, QuantLib::GeneralStatistics::expectationValue<QuantLib::GeneralStatistics::kurtosis() const::$_0>(QuantLib::GeneralStatistics::kurtosis() const::$_0 const&) const::{lambda(double)#1} const&) const |
134 | | |
135 | | /*! Expectation value of a function \f$ f \f$ over the whole |
136 | | set of samples; equivalent to passing the other overload |
137 | | a range function always returning <tt>true</tt>. |
138 | | */ |
139 | | template <class Func> |
140 | 2.70k | std::pair<Real,Size> expectationValue(const Func& f) const { |
141 | 270k | return expectationValue(f, [](Real x) { return true; });generalstatistics.cpp:QuantLib::GeneralStatistics::expectationValue<QuantLib::GeneralStatistics::mean() const::$_0>(QuantLib::GeneralStatistics::mean() const::$_0 const&) const::{lambda(double)#1}::operator()(double) constLine | Count | Source | 141 | 202k | return expectationValue(f, [](Real x) { return true; }); |
generalstatistics.cpp:QuantLib::GeneralStatistics::expectationValue<QuantLib::GeneralStatistics::variance() const::$_0>(QuantLib::GeneralStatistics::variance() const::$_0 const&) const::{lambda(double)#1}::operator()(double) constLine | Count | Source | 141 | 67.5k | return expectationValue(f, [](Real x) { return true; }); |
Unexecuted instantiation: generalstatistics.cpp:QuantLib::GeneralStatistics::expectationValue<QuantLib::GeneralStatistics::skewness() const::$_0>(QuantLib::GeneralStatistics::skewness() const::$_0 const&) const::{lambda(double)#1}::operator()(double) constUnexecuted instantiation: generalstatistics.cpp:QuantLib::GeneralStatistics::expectationValue<QuantLib::GeneralStatistics::kurtosis() const::$_0>(QuantLib::GeneralStatistics::kurtosis() const::$_0 const&) const::{lambda(double)#1}::operator()(double) const |
142 | 2.70k | } generalstatistics.cpp:std::__1::pair<double, unsigned long> QuantLib::GeneralStatistics::expectationValue<QuantLib::GeneralStatistics::mean() const::$_0>(QuantLib::GeneralStatistics::mean() const::$_0 const&) const Line | Count | Source | 140 | 2.02k | std::pair<Real,Size> expectationValue(const Func& f) const { | 141 | 2.02k | return expectationValue(f, [](Real x) { return true; }); | 142 | 2.02k | } |
generalstatistics.cpp:std::__1::pair<double, unsigned long> QuantLib::GeneralStatistics::expectationValue<QuantLib::GeneralStatistics::variance() const::$_0>(QuantLib::GeneralStatistics::variance() const::$_0 const&) const Line | Count | Source | 140 | 675 | std::pair<Real,Size> expectationValue(const Func& f) const { | 141 | 675 | return expectationValue(f, [](Real x) { return true; }); | 142 | 675 | } |
Unexecuted instantiation: generalstatistics.cpp:std::__1::pair<double, unsigned long> QuantLib::GeneralStatistics::expectationValue<QuantLib::GeneralStatistics::skewness() const::$_0>(QuantLib::GeneralStatistics::skewness() const::$_0 const&) const Unexecuted instantiation: generalstatistics.cpp:std::__1::pair<double, unsigned long> QuantLib::GeneralStatistics::expectationValue<QuantLib::GeneralStatistics::kurtosis() const::$_0>(QuantLib::GeneralStatistics::kurtosis() const::$_0 const&) const |
143 | | |
144 | | /*! \f$ y \f$-th percentile, defined as the value \f$ \bar{x} \f$ |
145 | | such that |
146 | | \f[ y = \frac{\sum_{x_i < \bar{x}} w_i}{ |
147 | | \sum_i w_i} \f] |
148 | | |
149 | | \pre \f$ y \f$ must be in the range \f$ (0-1]. \f$ |
150 | | */ |
151 | | Real percentile(Real y) const; |
152 | | |
153 | | /*! \f$ y \f$-th top percentile, defined as the value |
154 | | \f$ \bar{x} \f$ such that |
155 | | \f[ y = \frac{\sum_{x_i > \bar{x}} w_i}{ |
156 | | \sum_i w_i} \f] |
157 | | |
158 | | \pre \f$ y \f$ must be in the range \f$ (0-1]. \f$ |
159 | | */ |
160 | | Real topPercentile(Real y) const; |
161 | | //@} |
162 | | |
163 | | //! \name Modifiers |
164 | | //@{ |
165 | | //! adds a datum to the set, possibly with a weight |
166 | | void add(Real value, Real weight = 1.0); |
167 | | //! adds a sequence of data to the set, with default weight |
168 | | template <class DataIterator> |
169 | | void addSequence(DataIterator begin, DataIterator end) { |
170 | | for (;begin!=end;++begin) |
171 | | add(*begin); |
172 | | } |
173 | | //! adds a sequence of data to the set, each with its weight |
174 | | template <class DataIterator, class WeightIterator> |
175 | | void addSequence(DataIterator begin, DataIterator end, |
176 | | WeightIterator wbegin) { |
177 | | for (;begin!=end;++begin,++wbegin) |
178 | | add(*begin, *wbegin); |
179 | | } |
180 | | |
181 | | //! resets the data to a null set |
182 | | void reset(); |
183 | | |
184 | | //! informs the internal storage of a planned increase in size |
185 | | void reserve(Size n) const; |
186 | | |
187 | | //! sort the data set in increasing order |
188 | | void sort() const; |
189 | | //@} |
190 | | private: |
191 | | mutable std::vector<std::pair<Real,Real> > samples_; |
192 | | mutable bool sorted_; |
193 | | }; |
194 | | |
195 | | |
196 | | // inline definitions |
197 | | |
198 | 675 | inline GeneralStatistics::GeneralStatistics() { |
199 | 675 | reset(); |
200 | 675 | } |
201 | | |
202 | 4.05k | inline Size GeneralStatistics::samples() const { |
203 | 4.05k | return samples_.size(); |
204 | 4.05k | } |
205 | | |
206 | | inline const std::vector<std::pair<Real,Real> >& |
207 | 0 | GeneralStatistics::data() const { |
208 | 0 | return samples_; |
209 | 0 | } |
210 | | |
211 | 0 | inline Real GeneralStatistics::standardDeviation() const { |
212 | 0 | return std::sqrt(variance()); |
213 | 0 | } |
214 | | |
215 | 675 | inline Real GeneralStatistics::errorEstimate() const { |
216 | 675 | return std::sqrt(variance()/samples()); |
217 | 675 | } |
218 | | |
219 | 0 | inline Real GeneralStatistics::min() const { |
220 | 0 | QL_REQUIRE(samples() > 0, "empty sample set"); |
221 | 0 | return std::min_element(samples_.begin(), |
222 | 0 | samples_.end())->first; |
223 | 0 | } |
224 | | |
225 | 0 | inline Real GeneralStatistics::max() const { |
226 | 0 | QL_REQUIRE(samples() > 0, "empty sample set"); |
227 | 0 | return std::max_element(samples_.begin(), |
228 | 0 | samples_.end())->first; |
229 | 0 | } |
230 | | |
231 | | /*! \pre weights must be positive or null */ |
232 | 67.5k | inline void GeneralStatistics::add(Real value, Real weight) { |
233 | 67.5k | QL_REQUIRE(weight>=0.0, "negative weight not allowed"); |
234 | 67.5k | samples_.emplace_back(value, weight); |
235 | 67.5k | sorted_ = false; |
236 | 67.5k | } |
237 | | |
238 | 675 | inline void GeneralStatistics::reset() { |
239 | 675 | samples_ = std::vector<std::pair<Real,Real> >(); |
240 | 675 | sorted_ = true; |
241 | 675 | } |
242 | | |
243 | 0 | inline void GeneralStatistics::reserve(Size n) const { |
244 | 0 | samples_.reserve(n); |
245 | 0 | } |
246 | | |
247 | 0 | inline void GeneralStatistics::sort() const { |
248 | 0 | if (!sorted_) { |
249 | 0 | std::sort(samples_.begin(), samples_.end()); |
250 | 0 | sorted_ = true; |
251 | 0 | } |
252 | 0 | } |
253 | | |
254 | | } |
255 | | |
256 | | |
257 | | #endif |