Coverage Report

Created: 2026-06-08 06:47

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/quantlib/ql/math/statistics/generalstatistics.hpp
Line
Count
Source
1
/* -*- mode: c++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2
3
/*
4
 Copyright (C) 2003 Ferdinando Ametrano
5
 Copyright (C) 2003 RiskMap srl
6
7
 This file is part of QuantLib, a free-software/open-source library
8
 for financial quantitative analysts and developers - http://quantlib.org/
9
10
 QuantLib is free software: you can redistribute it and/or modify it
11
 under the terms of the QuantLib license.  You should have received a
12
 copy of the license along with this program; if not, please email
13
 <quantlib-dev@lists.sf.net>. The license is also available online at
14
 <https://www.quantlib.org/license.shtml>.
15
16
 This program is distributed in the hope that it will be useful, but WITHOUT
17
 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
18
 FOR A PARTICULAR PURPOSE.  See the license for more details.
19
*/
20
21
/*! \file generalstatistics.hpp
22
    \brief statistics tool
23
*/
24
25
#ifndef quantlib_general_statistics_hpp
26
#define quantlib_general_statistics_hpp
27
28
#include <ql/utilities/null.hpp>
29
#include <ql/errors.hpp>
30
#include <vector>
31
#include <algorithm>
32
#include <utility>
33
34
namespace QuantLib {
35
36
    //! Statistics tool
37
    /*! This class accumulates a set of data and returns their
38
        statistics (e.g: mean, variance, skewness, kurtosis,
39
        error estimation, percentile, etc.) based on the empirical
40
        distribution (no gaussian assumption)
41
42
        It doesn't suffer the numerical instability problem of
43
        IncrementalStatistics. The downside is that it stores all
44
        samples, thus increasing the memory requirements.
45
    */
46
    class GeneralStatistics {
47
      public:
48
        typedef Real value_type;
49
        GeneralStatistics();
50
        //! \name Inspectors
51
        //@{
52
        //! number of samples collected
53
        Size samples() const;
54
55
        //! collected data
56
        const std::vector<std::pair<Real,Real> >& data() const;
57
58
        //! sum of data weights
59
        Real weightSum() const;
60
61
        /*! returns the mean, defined as
62
            \f[ \langle x \rangle = \frac{\sum w_i x_i}{\sum w_i}. \f]
63
        */
64
        Real mean() const;
65
66
        /*! returns the variance, defined as
67
            \f[ \sigma^2 = \frac{N}{N-1} \left\langle \left(
68
                x-\langle x \rangle \right)^2 \right\rangle. \f]
69
        */
70
        Real variance() const;
71
72
        /*! returns the standard deviation \f$ \sigma \f$, defined as the
73
            square root of the variance.
74
        */
75
        Real standardDeviation() const;
76
77
        /*! returns the error estimate on the mean value, defined as
78
            \f$ \epsilon = \sigma/\sqrt{N}. \f$
79
        */
80
        Real errorEstimate() const;
81
82
        /*! returns the skewness, defined as
83
            \f[ \frac{N^2}{(N-1)(N-2)} \frac{\left\langle \left(
84
                x-\langle x \rangle \right)^3 \right\rangle}{\sigma^3}. \f]
85
            The above evaluates to 0 for a Gaussian distribution.
86
        */
87
        Real skewness() const;
88
89
        /*! returns the excess kurtosis, defined as
90
            \f[ \frac{N^2(N+1)}{(N-1)(N-2)(N-3)}
91
                \frac{\left\langle \left(x-\langle x \rangle \right)^4
92
                \right\rangle}{\sigma^4} - \frac{3(N-1)^2}{(N-2)(N-3)}. \f]
93
            The above evaluates to 0 for a Gaussian distribution.
94
        */
95
        Real kurtosis() const;
96
97
        /*! returns the minimum sample value */
98
        Real min() const;
99
100
        /*! returns the maximum sample value */
101
        Real max() const;
102
103
        /*! Expectation value of a function \f$ f \f$ on a given
104
            range \f$ \mathcal{R} \f$, i.e.,
105
            \f[ \mathrm{E}\left[f \;|\; \mathcal{R}\right] =
106
                \frac{\sum_{x_i \in \mathcal{R}} f(x_i) w_i}{
107
                      \sum_{x_i \in \mathcal{R}} w_i}. \f]
108
            The range is passed as a boolean function returning
109
            <tt>true</tt> if the argument belongs to the range
110
            or <tt>false</tt> otherwise.
111
112
            The function returns a pair made of the result and
113
            the number of observations in the given range.
114
        */
115
        template <class Func, class Predicate>
116
        std::pair<Real,Size> expectationValue(const Func& f,
117
2.70k
                                              const Predicate& inRange) const {
118
2.70k
            Real num = 0.0, den = 0.0;
119
2.70k
            Size N = 0;
120
2.70k
            std::vector<std::pair<Real,Real> >::const_iterator i;
121
272k
            for (i=samples_.begin(); i!=samples_.end(); ++i) {
122
270k
                Real x = i->first, w = i->second;
123
270k
                if (inRange(x)) {
124
270k
                    num += f(x)*w;
125
270k
                    den += w;
126
270k
                    N += 1;
127
270k
                }
128
270k
            }
129
2.70k
            if (N == 0)
130
0
                return std::make_pair<Real,Size>(Null<Real>(),0);
131
2.70k
            else
132
2.70k
                return std::make_pair(num/den,N);
133
2.70k
        }
generalstatistics.cpp:std::__1::pair<double, unsigned long> QuantLib::GeneralStatistics::expectationValue<QuantLib::GeneralStatistics::mean() const::$_0, QuantLib::GeneralStatistics::expectationValue<QuantLib::GeneralStatistics::mean() const::$_0>(QuantLib::GeneralStatistics::mean() const::$_0 const&) const::{lambda(double)#1}>(QuantLib::GeneralStatistics::mean() const::$_0 const&, QuantLib::GeneralStatistics::expectationValue<QuantLib::GeneralStatistics::mean() const::$_0>(QuantLib::GeneralStatistics::mean() const::$_0 const&) const::{lambda(double)#1} const&) const
Line
Count
Source
117
2.02k
                                              const Predicate& inRange) const {
118
2.02k
            Real num = 0.0, den = 0.0;
119
2.02k
            Size N = 0;
120
2.02k
            std::vector<std::pair<Real,Real> >::const_iterator i;
121
204k
            for (i=samples_.begin(); i!=samples_.end(); ++i) {
122
202k
                Real x = i->first, w = i->second;
123
202k
                if (inRange(x)) {
124
202k
                    num += f(x)*w;
125
202k
                    den += w;
126
202k
                    N += 1;
127
202k
                }
128
202k
            }
129
2.02k
            if (N == 0)
130
0
                return std::make_pair<Real,Size>(Null<Real>(),0);
131
2.02k
            else
132
2.02k
                return std::make_pair(num/den,N);
133
2.02k
        }
generalstatistics.cpp:std::__1::pair<double, unsigned long> QuantLib::GeneralStatistics::expectationValue<QuantLib::GeneralStatistics::variance() const::$_0, QuantLib::GeneralStatistics::expectationValue<QuantLib::GeneralStatistics::variance() const::$_0>(QuantLib::GeneralStatistics::variance() const::$_0 const&) const::{lambda(double)#1}>(QuantLib::GeneralStatistics::variance() const::$_0 const&, QuantLib::GeneralStatistics::expectationValue<QuantLib::GeneralStatistics::variance() const::$_0>(QuantLib::GeneralStatistics::variance() const::$_0 const&) const::{lambda(double)#1} const&) const
Line
Count
Source
117
675
                                              const Predicate& inRange) const {
118
675
            Real num = 0.0, den = 0.0;
119
675
            Size N = 0;
120
675
            std::vector<std::pair<Real,Real> >::const_iterator i;
121
68.1k
            for (i=samples_.begin(); i!=samples_.end(); ++i) {
122
67.5k
                Real x = i->first, w = i->second;
123
67.5k
                if (inRange(x)) {
124
67.5k
                    num += f(x)*w;
125
67.5k
                    den += w;
126
67.5k
                    N += 1;
127
67.5k
                }
128
67.5k
            }
129
675
            if (N == 0)
130
0
                return std::make_pair<Real,Size>(Null<Real>(),0);
131
675
            else
132
675
                return std::make_pair(num/den,N);
133
675
        }
Unexecuted instantiation: generalstatistics.cpp:std::__1::pair<double, unsigned long> QuantLib::GeneralStatistics::expectationValue<QuantLib::GeneralStatistics::skewness() const::$_0, QuantLib::GeneralStatistics::expectationValue<QuantLib::GeneralStatistics::skewness() const::$_0>(QuantLib::GeneralStatistics::skewness() const::$_0 const&) const::{lambda(double)#1}>(QuantLib::GeneralStatistics::skewness() const::$_0 const&, QuantLib::GeneralStatistics::expectationValue<QuantLib::GeneralStatistics::skewness() const::$_0>(QuantLib::GeneralStatistics::skewness() const::$_0 const&) const::{lambda(double)#1} const&) const
Unexecuted instantiation: generalstatistics.cpp:std::__1::pair<double, unsigned long> QuantLib::GeneralStatistics::expectationValue<QuantLib::GeneralStatistics::kurtosis() const::$_0, QuantLib::GeneralStatistics::expectationValue<QuantLib::GeneralStatistics::kurtosis() const::$_0>(QuantLib::GeneralStatistics::kurtosis() const::$_0 const&) const::{lambda(double)#1}>(QuantLib::GeneralStatistics::kurtosis() const::$_0 const&, QuantLib::GeneralStatistics::expectationValue<QuantLib::GeneralStatistics::kurtosis() const::$_0>(QuantLib::GeneralStatistics::kurtosis() const::$_0 const&) const::{lambda(double)#1} const&) const
134
135
        /*! Expectation value of a function \f$ f \f$ over the whole
136
            set of samples; equivalent to passing the other overload
137
            a range function always returning <tt>true</tt>.
138
        */
139
        template <class Func>
140
2.70k
        std::pair<Real,Size> expectationValue(const Func& f) const {
141
270k
            return expectationValue(f, [](Real x) { return true; });
generalstatistics.cpp:QuantLib::GeneralStatistics::expectationValue<QuantLib::GeneralStatistics::mean() const::$_0>(QuantLib::GeneralStatistics::mean() const::$_0 const&) const::{lambda(double)#1}::operator()(double) const
Line
Count
Source
141
202k
            return expectationValue(f, [](Real x) { return true; });
generalstatistics.cpp:QuantLib::GeneralStatistics::expectationValue<QuantLib::GeneralStatistics::variance() const::$_0>(QuantLib::GeneralStatistics::variance() const::$_0 const&) const::{lambda(double)#1}::operator()(double) const
Line
Count
Source
141
67.5k
            return expectationValue(f, [](Real x) { return true; });
Unexecuted instantiation: generalstatistics.cpp:QuantLib::GeneralStatistics::expectationValue<QuantLib::GeneralStatistics::skewness() const::$_0>(QuantLib::GeneralStatistics::skewness() const::$_0 const&) const::{lambda(double)#1}::operator()(double) const
Unexecuted instantiation: generalstatistics.cpp:QuantLib::GeneralStatistics::expectationValue<QuantLib::GeneralStatistics::kurtosis() const::$_0>(QuantLib::GeneralStatistics::kurtosis() const::$_0 const&) const::{lambda(double)#1}::operator()(double) const
142
2.70k
        }
generalstatistics.cpp:std::__1::pair<double, unsigned long> QuantLib::GeneralStatistics::expectationValue<QuantLib::GeneralStatistics::mean() const::$_0>(QuantLib::GeneralStatistics::mean() const::$_0 const&) const
Line
Count
Source
140
2.02k
        std::pair<Real,Size> expectationValue(const Func& f) const {
141
2.02k
            return expectationValue(f, [](Real x) { return true; });
142
2.02k
        }
generalstatistics.cpp:std::__1::pair<double, unsigned long> QuantLib::GeneralStatistics::expectationValue<QuantLib::GeneralStatistics::variance() const::$_0>(QuantLib::GeneralStatistics::variance() const::$_0 const&) const
Line
Count
Source
140
675
        std::pair<Real,Size> expectationValue(const Func& f) const {
141
675
            return expectationValue(f, [](Real x) { return true; });
142
675
        }
Unexecuted instantiation: generalstatistics.cpp:std::__1::pair<double, unsigned long> QuantLib::GeneralStatistics::expectationValue<QuantLib::GeneralStatistics::skewness() const::$_0>(QuantLib::GeneralStatistics::skewness() const::$_0 const&) const
Unexecuted instantiation: generalstatistics.cpp:std::__1::pair<double, unsigned long> QuantLib::GeneralStatistics::expectationValue<QuantLib::GeneralStatistics::kurtosis() const::$_0>(QuantLib::GeneralStatistics::kurtosis() const::$_0 const&) const
143
        
144
        /*! \f$ y \f$-th percentile, defined as the value \f$ \bar{x} \f$
145
            such that
146
            \f[ y = \frac{\sum_{x_i < \bar{x}} w_i}{
147
                          \sum_i w_i} \f]
148
149
            \pre \f$ y \f$ must be in the range \f$ (0-1]. \f$
150
        */
151
        Real percentile(Real y) const;
152
153
        /*! \f$ y \f$-th top percentile, defined as the value
154
            \f$ \bar{x} \f$ such that
155
            \f[ y = \frac{\sum_{x_i > \bar{x}} w_i}{
156
                          \sum_i w_i} \f]
157
158
            \pre \f$ y \f$ must be in the range \f$ (0-1]. \f$
159
        */
160
        Real topPercentile(Real y) const;
161
        //@}
162
163
        //! \name Modifiers
164
        //@{
165
        //! adds a datum to the set, possibly with a weight
166
        void add(Real value, Real weight = 1.0);
167
        //! adds a sequence of data to the set, with default weight
168
        template <class DataIterator>
169
        void addSequence(DataIterator begin, DataIterator end) {
170
            for (;begin!=end;++begin)
171
                add(*begin);
172
        }
173
        //! adds a sequence of data to the set, each with its weight
174
        template <class DataIterator, class WeightIterator>
175
        void addSequence(DataIterator begin, DataIterator end,
176
                         WeightIterator wbegin) {
177
            for (;begin!=end;++begin,++wbegin)
178
                add(*begin, *wbegin);
179
        }
180
181
        //! resets the data to a null set
182
        void reset();
183
184
        //! informs the internal storage of a planned increase in size
185
        void reserve(Size n) const;
186
187
        //! sort the data set in increasing order
188
        void sort() const;
189
        //@}
190
      private:
191
        mutable std::vector<std::pair<Real,Real> > samples_;
192
        mutable bool sorted_;
193
    };
194
195
196
    // inline definitions
197
198
675
    inline GeneralStatistics::GeneralStatistics() {
199
675
        reset();
200
675
    }
201
202
4.05k
    inline Size GeneralStatistics::samples() const {
203
4.05k
        return samples_.size();
204
4.05k
    }
205
206
    inline const std::vector<std::pair<Real,Real> >&
207
0
    GeneralStatistics::data() const {
208
0
        return samples_;
209
0
    }
210
211
0
    inline Real GeneralStatistics::standardDeviation() const {
212
0
        return std::sqrt(variance());
213
0
    }
214
215
675
    inline Real GeneralStatistics::errorEstimate() const {
216
675
        return std::sqrt(variance()/samples());
217
675
    }
218
219
0
    inline Real GeneralStatistics::min() const {
220
0
        QL_REQUIRE(samples() > 0, "empty sample set");
221
0
        return std::min_element(samples_.begin(),
222
0
                                samples_.end())->first;
223
0
    }
224
225
0
    inline Real GeneralStatistics::max() const {
226
0
        QL_REQUIRE(samples() > 0, "empty sample set");
227
0
        return std::max_element(samples_.begin(),
228
0
                                samples_.end())->first;
229
0
    }
230
231
    /*! \pre weights must be positive or null */
232
67.5k
    inline void GeneralStatistics::add(Real value, Real weight) {
233
67.5k
        QL_REQUIRE(weight>=0.0, "negative weight not allowed");
234
67.5k
        samples_.emplace_back(value, weight);
235
67.5k
        sorted_ = false;
236
67.5k
    }
237
238
675
    inline void GeneralStatistics::reset() {
239
675
        samples_ = std::vector<std::pair<Real,Real> >();
240
675
        sorted_ = true;
241
675
    }
242
243
0
    inline void GeneralStatistics::reserve(Size n) const {
244
0
        samples_.reserve(n);
245
0
    }
246
247
0
    inline void GeneralStatistics::sort() const {
248
0
        if (!sorted_) {
249
0
            std::sort(samples_.begin(), samples_.end());
250
0
            sorted_ = true;
251
0
        }
252
0
    }
253
254
}
255
256
257
#endif