Coverage Report

Created: 2026-02-14 06:52

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/gdal/alg/raster_stats.h
Line
Count
Source
1
/******************************************************************************
2
*
3
 * Project:  GDAL
4
 * Purpose:  GDALZonalStats implementation
5
 * Author:   Dan Baston
6
 *
7
 ******************************************************************************
8
 * Copyright (c) 2018-2025, ISciences LLC
9
 *
10
 * SPDX-License-Identifier: MIT
11
 ****************************************************************************/
12
13
#pragma once
14
15
#include <algorithm>
16
#include <cmath>
17
#include <limits>
18
#include <optional>
19
#include <unordered_map>
20
21
//! @cond Doxygen_Suppress
22
23
namespace gdal
24
{
25
struct RasterStatsOptions
26
{
27
    static constexpr float min_coverage_fraction_default =
28
        std::numeric_limits<float>::min();  // ~1e-38
29
30
    float min_coverage_fraction = min_coverage_fraction_default;
31
    bool calc_variance = false;
32
    bool store_histogram = false;
33
    bool store_values = false;
34
    bool store_weights = false;
35
    bool store_coverage_fraction = false;
36
    bool store_xy = false;
37
    bool include_nodata = false;
38
    double default_weight = std::numeric_limits<double>::quiet_NaN();
39
40
    bool operator==(const RasterStatsOptions &other) const
41
0
    {
42
0
        return min_coverage_fraction == other.min_coverage_fraction &&
43
0
               calc_variance == other.calc_variance &&
44
0
               store_histogram == other.store_histogram &&
45
0
               store_values == other.store_values &&
46
0
               store_weights == other.store_weights &&
47
0
               store_coverage_fraction == other.store_coverage_fraction &&
48
0
               store_xy == other.store_xy &&
49
0
               include_nodata == other.include_nodata &&
50
0
               (default_weight == other.default_weight ||
51
0
                (std::isnan(default_weight) &&
52
0
                 std::isnan(other.default_weight)));
53
0
    }
54
55
    bool operator!=(const RasterStatsOptions &other) const
56
0
    {
57
0
        return !(*this == other);
58
0
    }
59
};
60
61
class WestVariance
62
{
63
    /** \brief Implements an incremental algorithm for weighted standard
64
     * deviation, variance, and coefficient of variation, as described in
65
     * formula WV2 of West, D.H.D. (1979) "Updating Mean and Variance
66
     * Estimates: An Improved Method". Communications of the ACM 22(9).
67
     */
68
69
  private:
70
    double sum_w = 0;
71
    double mean = 0;
72
    double t = 0;
73
74
  public:
75
    /** \brief Update variance estimate with another value
76
     *
77
     * @param x value to add
78
     * @param w weight of `x`
79
     */
80
    void process(double x, double w)
81
0
    {
82
0
        if (w == 0)
83
0
        {
84
0
            return;
85
0
        }
86
87
0
        double mean_old = mean;
88
89
0
        sum_w += w;
90
0
        mean += (w / sum_w) * (x - mean_old);
91
0
        t += w * (x - mean_old) * (x - mean);
92
0
    }
93
94
    /** \brief Return the population variance.
95
     */
96
    constexpr double variance() const
97
0
    {
98
0
        return t / sum_w;
99
0
    }
100
101
    /** \brief Return the population standard deviation
102
     */
103
    double stdev() const
104
0
    {
105
0
        return std::sqrt(variance());
106
0
    }
107
108
    /** \brief Return the population coefficient of variation
109
     */
110
    double coefficent_of_variation() const
111
0
    {
112
0
        return stdev() / mean;
113
0
    }
114
};
115
116
template <typename ValueType> class RasterStats
117
{
118
  public:
119
    /**
120
     * Compute raster statistics from a Raster representing intersection percentages,
121
     * a Raster representing data values, and (optionally) a Raster representing weights.
122
     * and a set of raster values.
123
     */
124
    explicit RasterStats(const RasterStatsOptions &options)
125
0
        : m_min{std::numeric_limits<ValueType>::max()},
126
0
          m_max{std::numeric_limits<ValueType>::lowest()}, m_sum_ciwi{0},
127
0
          m_sum_ci{0}, m_sum_xici{0}, m_sum_xiciwi{0}, m_options{options}
128
0
    {
129
0
    }
130
131
    // All pixels covered 100%
132
    void process(const ValueType *pValues, const GByte *pabyMask,
133
                 const double *padfWeights, const GByte *pabyWeightsMask,
134
                 const double *padfX, const double *padfY, size_t nX, size_t nY)
135
0
    {
136
0
        for (size_t i = 0; i < nX * nY; i++)
137
0
        {
138
0
            if (pabyMask[i] == 255)
139
0
            {
140
0
                if (padfX && padfY)
141
0
                {
142
0
                    process_location(padfX[i % nX], padfY[i / nX]);
143
0
                }
144
0
                const double dfWeight =
145
0
                    pabyWeightsMask
146
0
                        ? (pabyWeightsMask[i] == 255
147
0
                               ? padfWeights[i]
148
0
                               : std::numeric_limits<double>::quiet_NaN())
149
0
                        : 1.0;
150
0
                process_value(pValues[i], 1.0, dfWeight);
151
0
            }
152
0
        }
153
0
    }
154
155
    // Pixels covered 0% or 100%
156
    void process(const ValueType *pValues, const GByte *pabyMask,
157
                 const double *padfWeights, const GByte *pabyWeightsMask,
158
                 const GByte *pabyCov, const double *pdfX, const double *pdfY,
159
                 size_t nX, size_t nY)
160
0
    {
161
0
        for (size_t i = 0; i < nX * nY; i++)
162
0
        {
163
0
            if (pabyMask[i] == 255 && pabyCov[i])
164
0
            {
165
0
                if (pdfX && pdfY)
166
0
                {
167
0
                    process_location(pdfX[i % nX], pdfY[i / nX]);
168
0
                }
169
0
                const double dfWeight =
170
0
                    pabyWeightsMask
171
0
                        ? (pabyWeightsMask[i] == 255
172
0
                               ? padfWeights[i]
173
0
                               : std::numeric_limits<double>::quiet_NaN())
174
0
                        : 1.0;
175
0
                process_value(pValues[i], 1.0, dfWeight);
176
0
            }
177
0
        }
178
0
    }
179
180
    // Pixels fractionally covered
181
    void process(const ValueType *pValues, const GByte *pabyMask,
182
                 const double *padfWeights, const GByte *pabyWeightsMask,
183
                 const float *pfCov, const double *pdfX, const double *pdfY,
184
                 size_t nX, size_t nY)
185
0
    {
186
0
        for (size_t i = 0; i < nX * nY; i++)
187
0
        {
188
0
            if (pabyMask[i] == 255 &&
189
0
                pfCov[i] >= m_options.min_coverage_fraction)
190
0
            {
191
0
                if (pdfX && pdfY)
192
0
                {
193
0
                    process_location(pdfX[i % nX], pdfY[i / nX]);
194
0
                }
195
0
                const double dfWeight =
196
0
                    pabyWeightsMask
197
0
                        ? (pabyWeightsMask[i] == 255
198
0
                               ? padfWeights[i]
199
0
                               : std::numeric_limits<double>::quiet_NaN())
200
0
                        : 1.0;
201
0
                process_value(pValues[i], pfCov[i], dfWeight);
202
0
            }
203
0
        }
204
0
    }
205
206
    void process_location(double x, double y)
207
0
    {
208
0
        if (m_options.store_xy)
209
0
        {
210
0
            m_cell_x.push_back(x);
211
0
            m_cell_y.push_back(y);
212
0
        }
213
0
    }
214
215
    void process_value(const ValueType &val, float coverage, double weight)
216
0
    {
217
0
        if (m_options.store_coverage_fraction)
218
0
        {
219
0
            m_cell_cov.push_back(coverage);
220
0
        }
221
222
0
        m_sum_ci += static_cast<double>(coverage);
223
0
        m_sum_xici += static_cast<double>(val) * static_cast<double>(coverage);
224
225
0
        double ciwi = static_cast<double>(coverage) * weight;
226
0
        m_sum_ciwi += ciwi;
227
0
        m_sum_xiciwi += static_cast<double>(val) * ciwi;
228
229
0
        if (m_options.calc_variance)
230
0
        {
231
0
            m_variance.process(static_cast<double>(val),
232
0
                               static_cast<double>(coverage));
233
0
            m_weighted_variance.process(static_cast<double>(val), ciwi);
234
0
        }
235
236
0
        if (val < m_min)
237
0
        {
238
0
            m_min = val;
239
0
            if (m_options.store_xy)
240
0
            {
241
0
                m_min_xy = {m_cell_x.back(), m_cell_y.back()};
242
0
            }
243
0
        }
244
245
0
        if (val > m_max)
246
0
        {
247
0
            m_max = val;
248
0
            if (m_options.store_xy)
249
0
            {
250
0
                m_max_xy = {m_cell_x.back(), m_cell_y.back()};
251
0
            }
252
0
        }
253
254
0
        if (m_options.store_histogram)
255
0
        {
256
0
            auto &entry = m_freq[val];
257
0
            entry.m_sum_ci += static_cast<double>(coverage);
258
0
            entry.m_sum_ciwi += ciwi;
259
0
        }
260
261
0
        if (m_options.store_values)
262
0
        {
263
0
            m_cell_values.push_back(val);
264
0
        }
265
266
0
        if (m_options.store_weights)
267
0
        {
268
0
            m_cell_weights.push_back(weight);
269
0
        }
270
0
    }
271
272
    /**
273
     * The mean value of cells covered by this polygon, weighted
274
     * by the percent of the cell that is covered.
275
     */
276
    double mean() const
277
0
    {
278
0
        if (count() > 0)
279
0
        {
280
0
            return sum() / count();
281
0
        }
282
0
        else
283
0
        {
284
0
            return std::numeric_limits<double>::quiet_NaN();
285
0
        }
286
0
    }
287
288
    /**
289
     * The mean value of cells covered by this polygon, weighted
290
     * by the percent of the cell that is covered and a secondary
291
     * weighting raster.
292
     *
293
     * If any weights are undefined, will return NAN. If this is undesirable,
294
     * caller should replace undefined weights with a suitable default
295
     * before computing statistics.
296
     */
297
    double weighted_mean() const
298
0
    {
299
0
        if (weighted_count() > 0)
300
0
        {
301
0
            return weighted_sum() / weighted_count();
302
0
        }
303
0
        else
304
0
        {
305
0
            return std::numeric_limits<double>::quiet_NaN();
306
0
        }
307
0
    }
308
309
    /** The fraction of weighted cells to unweighted cells.
310
     *  Meaningful only when the values of the weighting
311
     *  raster are between 0 and 1.
312
     */
313
    double weighted_fraction() const
314
    {
315
        return weighted_sum() / sum();
316
    }
317
318
    /**
319
     * The raster value occupying the greatest number of cells
320
     * or partial cells within the polygon. When multiple values
321
     * cover the same number of cells, the greatest value will
322
     * be returned. Weights are not taken into account.
323
     */
324
    std::optional<ValueType> mode() const
325
0
    {
326
0
        auto it = std::max_element(
327
0
            m_freq.cbegin(), m_freq.cend(),
328
0
            [](const auto &a, const auto &b)
329
0
            {
330
0
                return a.second.m_sum_ci < b.second.m_sum_ci ||
331
0
                       (a.second.m_sum_ci == b.second.m_sum_ci &&
332
0
                        a.first < b.first);
333
0
            });
334
0
        if (it == m_freq.end())
335
0
        {
336
0
            return std::nullopt;
337
0
        }
338
0
        return it->first;
339
0
    }
340
341
    /**
342
     * The minimum value in any raster cell wholly or partially covered
343
     * by the polygon. Weights are not taken into account.
344
     */
345
    std::optional<ValueType> min() const
346
0
    {
347
0
        if (m_sum_ci == 0)
348
0
        {
349
0
            return std::nullopt;
350
0
        }
351
0
        return m_min;
352
0
    }
353
354
    /// XY values corresponding to the center of the cell whose value
355
    /// is returned by min()
356
    std::optional<std::pair<double, double>> min_xy() const
357
0
    {
358
0
        if (m_sum_ci == 0)
359
0
        {
360
0
            return std::nullopt;
361
0
        }
362
0
        return m_min_xy;
363
0
    }
364
365
    /**
366
     * The maximum value in any raster cell wholly or partially covered
367
     * by the polygon. Weights are not taken into account.
368
     */
369
    std::optional<ValueType> max() const
370
0
    {
371
0
        if (m_sum_ci == 0)
372
0
        {
373
0
            return std::nullopt;
374
0
        }
375
0
        return m_max;
376
0
    }
377
378
    /// XY values corresponding to the center of the cell whose value
379
    /// is returned by max()
380
    std::optional<std::pair<double, double>> max_xy() const
381
0
    {
382
0
        if (m_sum_ci == 0)
383
0
        {
384
0
            return std::nullopt;
385
0
        }
386
0
        return m_max_xy;
387
0
    }
388
389
    /**
390
     * The sum of raster cells covered by the polygon, with each raster
391
     * value weighted by its coverage fraction.
392
     */
393
    double sum() const
394
0
    {
395
0
        return m_sum_xici;
396
0
    }
397
398
    /**
399
     * The sum of raster cells covered by the polygon, with each raster
400
     * value weighted by its coverage fraction and weighting raster value.
401
     *
402
     * If any weights are undefined, will return NAN. If this is undesirable,
403
     * caller should replace undefined weights with a suitable default
404
     * before computing statistics.
405
     */
406
    double weighted_sum() const
407
0
    {
408
0
        return m_sum_xiciwi;
409
0
    }
410
411
    /**
412
     * The number of raster cells with any defined value
413
     * covered by the polygon. Weights are not taken
414
     * into account.
415
     */
416
    double count() const
417
0
    {
418
0
        return m_sum_ci;
419
0
    }
420
421
    /**
422
     * The number of raster cells with a specific value
423
     * covered by the polygon. Weights are not taken
424
     * into account.
425
     */
426
    std::optional<double> count(const ValueType &value) const
427
    {
428
        const auto &entry = m_freq.find(value);
429
430
        if (entry == m_freq.end())
431
        {
432
            return std::nullopt;
433
        }
434
435
        return entry->second.m_sum_ci;
436
    }
437
438
    /**
439
     * The fraction of defined raster cells covered by the polygon with
440
     * a value that equals the specified value.
441
     * Weights are not taken into account.
442
     */
443
    std::optional<double> frac(const ValueType &value) const
444
    {
445
        auto count_for_value = count(value);
446
447
        if (!count_for_value.has_value())
448
        {
449
            return count_for_value;
450
        }
451
452
        return count_for_value.value() / count();
453
    }
454
455
    /**
456
     * The weighted fraction of defined raster cells covered by the polygon with
457
     * a value that equals the specified value.
458
     */
459
    std::optional<double> weighted_frac(const ValueType &value) const
460
    {
461
        auto count_for_value = weighted_count(value);
462
463
        if (!count_for_value.has_value())
464
        {
465
            return count_for_value;
466
        }
467
468
        return count_for_value.value() / weighted_count();
469
    }
470
471
    /**
472
     * The population variance of raster cells touched
473
     * by the polygon. Cell coverage fractions are taken
474
     * into account; values of a weighting raster are not.
475
     */
476
    double variance() const
477
0
    {
478
0
        return m_variance.variance();
479
0
    }
480
481
    /**
482
     * The population variance of raster cells touched
483
     * by the polygon, taking into account cell coverage
484
     * fractions and values of a weighting raster.
485
     */
486
    double weighted_variance() const
487
0
    {
488
0
        return m_weighted_variance.variance();
489
0
    }
490
491
    /**
492
     * The population standard deviation of raster cells
493
     * touched by the polygon. Cell coverage fractions
494
     * are taken into account; values of a weighting
495
     * raster are not.
496
     */
497
    double stdev() const
498
0
    {
499
0
        return m_variance.stdev();
500
0
    }
501
502
    /**
503
     * The population standard deviation of raster cells
504
     * touched by the polygon, taking into account cell
505
     * coverage fractions and values of a weighting raster.
506
     */
507
    double weighted_stdev() const
508
0
    {
509
0
        return m_weighted_variance.stdev();
510
0
    }
511
512
    /**
513
     * The sum of weights for each cell covered by the
514
     * polygon, with each weight multiplied by the coverage
515
     * fraction of each cell.
516
     *
517
     * If any weights are undefined, will return NAN. If this is undesirable,
518
     * caller should replace undefined weights with a suitable default
519
     * before computing statistics.
520
     */
521
    double weighted_count() const
522
0
    {
523
0
        return m_sum_ciwi;
524
0
    }
525
526
    /**
527
     * The sum of weights for each cell of a specific value covered by the
528
     * polygon, with each weight multiplied by the coverage fraction
529
     * of each cell.
530
     *
531
     * If any weights are undefined, will return NAN. If this is undesirable,
532
     * caller should replace undefined weights with a suitable default
533
     * before computing statistics.
534
     */
535
    std::optional<double> weighted_count(const ValueType &value) const
536
    {
537
        const auto &entry = m_freq.find(value);
538
539
        if (entry == m_freq.end())
540
        {
541
            return std::nullopt;
542
        }
543
544
        return entry->second.m_sum_ciwi;
545
    }
546
547
    /**
548
     * The raster value occupying the least number of cells
549
     * or partial cells within the polygon. When multiple values
550
     * cover the same number of cells, the lowest value will
551
     * be returned.
552
     *
553
     * Cell weights are not taken into account.
554
     */
555
    std::optional<ValueType> minority() const
556
0
    {
557
0
        auto it = std::min_element(
558
0
            m_freq.cbegin(), m_freq.cend(),
559
0
            [](const auto &a, const auto &b)
560
0
            {
561
0
                return a.second.m_sum_ci < b.second.m_sum_ci ||
562
0
                       (a.second.m_sum_ci == b.second.m_sum_ci &&
563
0
                        a.first < b.first);
564
0
            });
565
0
        if (it == m_freq.end())
566
0
        {
567
0
            return std::nullopt;
568
0
        }
569
0
        return it->first;
570
0
    }
571
572
    /**
573
     * The number of distinct defined raster values in cells wholly
574
     * or partially covered by the polygon.
575
     */
576
    std::uint64_t variety() const
577
0
    {
578
0
        return m_freq.size();
579
0
    }
580
581
    const std::vector<ValueType> &values() const
582
0
    {
583
0
        return m_cell_values;
584
0
    }
585
586
    const std::vector<bool> &values_defined() const
587
    {
588
        return m_cell_values_defined;
589
    }
590
591
    const std::vector<float> &coverage_fractions() const
592
0
    {
593
0
        return m_cell_cov;
594
0
    }
595
596
    const std::vector<double> &weights() const
597
0
    {
598
0
        return m_cell_weights;
599
0
    }
600
601
    const std::vector<bool> &weights_defined() const
602
    {
603
        return m_cell_weights_defined;
604
    }
605
606
    const std::vector<double> &center_x() const
607
0
    {
608
0
        return m_cell_x;
609
0
    }
610
611
    const std::vector<double> &center_y() const
612
0
    {
613
0
        return m_cell_y;
614
0
    }
615
616
    const auto &freq() const
617
0
    {
618
0
        return m_freq;
619
0
    }
620
621
  private:
622
    ValueType m_min{};
623
    ValueType m_max{};
624
    std::pair<double, double> m_min_xy{
625
        std::numeric_limits<double>::quiet_NaN(),
626
        std::numeric_limits<double>::quiet_NaN()};
627
    std::pair<double, double> m_max_xy{
628
        std::numeric_limits<double>::quiet_NaN(),
629
        std::numeric_limits<double>::quiet_NaN()};
630
631
    // ci: coverage fraction of pixel i
632
    // wi: weight of pixel i
633
    // xi: value of pixel i
634
    double m_sum_ciwi{0};
635
    double m_sum_ci{0};
636
    double m_sum_xici{0};
637
    double m_sum_xiciwi{0};
638
639
    WestVariance m_variance{};
640
    WestVariance m_weighted_variance{};
641
642
    struct ValueFreqEntry
643
    {
644
        double m_sum_ci = 0;
645
        double m_sum_ciwi = 0;
646
    };
647
648
    std::unordered_map<ValueType, ValueFreqEntry> m_freq{};
649
650
    std::vector<float> m_cell_cov{};
651
    std::vector<ValueType> m_cell_values{};
652
    std::vector<double> m_cell_weights{};
653
    std::vector<double> m_cell_x{};
654
    std::vector<double> m_cell_y{};
655
    std::vector<bool> m_cell_values_defined{};
656
    std::vector<bool> m_cell_weights_defined{};
657
658
    RasterStatsOptions m_options;
659
};
660
661
template <typename T>
662
std::ostream &operator<<(std::ostream &os, const RasterStats<T> &stats)
663
{
664
    os << "{" << std::endl;
665
    os << "  \"count\" : " << stats.count() << "," << std::endl;
666
667
    os << "  \"min\" : ";
668
    if (stats.min().has_value())
669
    {
670
        os << stats.min().value();
671
    }
672
    else
673
    {
674
        os << "null";
675
    }
676
    os << "," << std::endl;
677
678
    os << "  \"max\" : ";
679
    if (stats.max().has_value())
680
    {
681
        os << stats.max().value();
682
    }
683
    else
684
    {
685
        os << "null";
686
    }
687
    os << "," << std::endl;
688
689
    os << "  \"mean\" : " << stats.mean() << "," << std::endl;
690
    os << "  \"sum\" : " << stats.sum() << "," << std::endl;
691
    os << "  \"weighted_mean\" : " << stats.weighted_mean() << "," << std::endl;
692
    os << "  \"weighted_sum\" : " << stats.weighted_sum();
693
    if (stats.stores_values())
694
    {
695
        os << "," << std::endl;
696
        os << "  \"mode\" : ";
697
        if (stats.mode().has_value())
698
        {
699
            os << stats.mode().value();
700
        }
701
        else
702
        {
703
            os << "null";
704
        }
705
        os << "," << std::endl;
706
707
        os << "  \"minority\" : ";
708
        if (stats.minority().has_value())
709
        {
710
            os << stats.minority().value();
711
        }
712
        else
713
        {
714
            os << "null";
715
        }
716
        os << "," << std::endl;
717
718
        os << "  \"variety\" : " << stats.variety() << std::endl;
719
    }
720
    else
721
    {
722
        os << std::endl;
723
    }
724
    os << "}" << std::endl;
725
    return os;
726
}
727
728
}  // namespace gdal
729
730
//! @endcond