1
#pragma once
2

            
3
#include <cstdint>
4
#include <string>
5

            
6
#include "source/extensions/tracers/opentelemetry/samplers/dynatrace/sampler_config_provider.h"
7
#include "source/extensions/tracers/opentelemetry/samplers/dynatrace/stream_summary.h"
8

            
9
#include "absl/synchronization/mutex.h"
10

            
11
namespace Envoy {
12
namespace Extensions {
13
namespace Tracers {
14
namespace OpenTelemetry {
15

            
16
/**
17
 * @brief Container for sampling exponent / multiplicity.
18
 * based on the "Space Saving algorithm", AKA "HeavyHitter"
19
 * See:
20
 * https://cse.hkust.edu.hk/~raywong/comp5331/References/EfficientComputationOfFrequentAndTop-kElementsInDataStreams.pdf
21
 *
22
 */
23
class SamplingState {
24
public:
25
  // Convert exponent to multiplicity
26
4387
  [[nodiscard]] static uint32_t toMultiplicity(uint32_t exponent) { return 1 << exponent; }
27
1604
  [[nodiscard]] uint32_t getExponent() const { return exponent_; }
28
2997
  [[nodiscard]] uint32_t getMultiplicity() const { return toMultiplicity(exponent_); }
29
199
  void increaseExponent() { exponent_++; }
30
102
  void decreaseExponent() {
31
102
    if (exponent_ > 0) {
32
26
      exponent_--;
33
26
    }
34
102
  }
35

            
36
1540
  explicit SamplingState(uint32_t exponent) : exponent_(exponent) {};
37

            
38
139
  SamplingState() = default;
39

            
40
  /**
41
   * @brief Does a sampling decision based on random number attribute and multiplicity
42
   *
43
   * @param random_nr Random number used for sampling decision.
44
   * @return true if request should be sampled, false otherwise
45
   */
46
1399
  bool shouldSample(const uint64_t random_nr) const { return (random_nr % getMultiplicity() == 0); }
47

            
48
private:
49
  uint32_t exponent_{0};
50
};
51

            
52
using StreamSummaryT = StreamSummary<std::string>;
53
using TopKListT = std::list<Counter<std::string>>;
54

            
55
/**
56
 * @brief Counts the requests per sampling key in the current period. Calculates the sampling
57
 * exponents based on the request count in the latest period.
58
 *
59
 */
60
class SamplingController : public Logger::Loggable<Logger::Id::tracing> {
61

            
62
public:
63
  explicit SamplingController(SamplerConfigProviderPtr sampler_config_provider)
64
32
      : stream_summary_(std::make_unique<StreamSummaryT>(STREAM_SUMMARY_SIZE)),
65
32
        sampler_config_provider_(std::move(sampler_config_provider)) {}
66

            
67
  /**
68
   * @brief Trigger calculating the sampling exponents based on the request count since last update
69
   *
70
   */
71
  void update();
72

            
73
  /**
74
   * @brief Get the Sampling State object for a sampling key
75
   *
76
   * @param sampling_key Sampling Key to search for
77
   * @return SamplingState Current Sampling State for key
78
   */
79
  SamplingState getSamplingState(const std::string& sampling_key) const;
80

            
81
  /**
82
   * @brief Returns the number of spans which would have been sampled in the last period using the
83
   * current sampling states
84
   *
85
   * @return effective count
86
   */
87
  uint64_t getEffectiveCount() const;
88

            
89
  /**
90
   * @brief Counts the occurrence of sampling_key
91
   *
92
   * @param sampling_key Sampling Key used to categorize the request
93
   */
94
  void offer(const std::string& sampling_key);
95

            
96
  /**
97
   * @brief Creates the Sampling Key which is used to categorize a request
98
   *
99
   * @param path_query The request path. May contain the query.
100
   * @param method The request method.
101
   * @return The sampling key.
102
   */
103
  static std::string getSamplingKey(const absl::string_view path_query,
104
                                    const absl::string_view method);
105

            
106
  static constexpr size_t STREAM_SUMMARY_SIZE{100};
107
  static constexpr uint32_t MAX_SAMPLING_EXPONENT = (1 << 4) - 1; // 15
108

            
109
private:
110
  using SamplingExponentsT = absl::flat_hash_map<std::string, SamplingState>;
111
  SamplingExponentsT sampling_exponents_;
112
  mutable absl::Mutex sampling_exponents_mutex_{};
113
  std::string rest_bucket_key_{};
114
  std::unique_ptr<StreamSummaryT> stream_summary_;
115
  uint64_t last_effective_count_{};
116
  mutable absl::Mutex stream_summary_mutex_{};
117
  SamplerConfigProviderPtr sampler_config_provider_;
118

            
119
  void logSamplingInfo(const TopKListT& top_k, const SamplingExponentsT& new_sampling_exponents,
120
                       uint64_t last_period_count, uint32_t total_wanted) const;
121

            
122
  static uint64_t calculateEffectiveCount(const TopKListT& top_k,
123
                                          const SamplingExponentsT& sampling_exponents);
124

            
125
  void calculateSamplingExponents(const TopKListT& top_k, uint32_t total_wanted,
126
                                  SamplingExponentsT& new_sampling_exponents) const;
127

            
128
  void update(const TopKListT& top_k, uint64_t last_period_count, uint32_t total_wanted);
129
};
130

            
131
} // namespace OpenTelemetry
132
} // namespace Tracers
133
} // namespace Extensions
134
} // namespace Envoy