1
#include "source/extensions/clusters/strict_dns/strict_dns_cluster.h"
2

            
3
#include <chrono>
4

            
5
#include "envoy/common/exception.h"
6
#include "envoy/config/cluster/v3/cluster.pb.h"
7
#include "envoy/config/endpoint/v3/endpoint.pb.h"
8
#include "envoy/config/endpoint/v3/endpoint_components.pb.h"
9
#include "envoy/extensions/clusters/dns/v3/dns_cluster.pb.h"
10

            
11
#include "source/common/common/dns_utils.h"
12
#include "source/common/network/dns_resolver/dns_factory_util.h"
13
#include "source/extensions/clusters/common/dns_cluster_backcompat.h"
14

            
15
namespace Envoy {
16
namespace Upstream {
17

            
18
absl::StatusOr<std::unique_ptr<StrictDnsClusterImpl>>
19
StrictDnsClusterImpl::create(const envoy::config::cluster::v3::Cluster& cluster,
20
                             const envoy::extensions::clusters::dns::v3::DnsCluster& dns_cluster,
21
                             ClusterFactoryContext& context,
22
300
                             Network::DnsResolverSharedPtr dns_resolver) {
23
300
  absl::Status creation_status = absl::OkStatus();
24
300
  auto ret = std::unique_ptr<StrictDnsClusterImpl>(new StrictDnsClusterImpl(
25
300
      cluster, dns_cluster, context, std::move(dns_resolver), creation_status));
26

            
27
300
  RETURN_IF_NOT_OK(creation_status);
28
275
  return ret;
29
300
}
30

            
31
StrictDnsClusterImpl::StrictDnsClusterImpl(
32
    const envoy::config::cluster::v3::Cluster& cluster,
33
    const envoy::extensions::clusters::dns::v3::DnsCluster& dns_cluster,
34
    ClusterFactoryContext& context, Network::DnsResolverSharedPtr dns_resolver,
35
    absl::Status& creation_status)
36
300
    : BaseDynamicClusterImpl(cluster, context, creation_status),
37
300
      load_assignment_(cluster.load_assignment()),
38
300
      local_info_(context.serverFactoryContext().localInfo()), dns_resolver_(dns_resolver),
39
300
      dns_refresh_rate_ms_(std::chrono::milliseconds(
40
300
          PROTOBUF_GET_MS_OR_DEFAULT(dns_cluster, dns_refresh_rate, 5000))),
41
300
      dns_jitter_ms_(PROTOBUF_GET_MS_OR_DEFAULT(dns_cluster, dns_jitter, 0)),
42
300
      respect_dns_ttl_(dns_cluster.respect_dns_ttl()),
43
      dns_lookup_family_(
44
300
          Envoy::DnsUtils::getDnsLookupFamilyFromEnum(dns_cluster.dns_lookup_family())) {
45
300
  RETURN_ONLY_IF_NOT_OK_REF(creation_status);
46

            
47
275
  failure_backoff_strategy_ = Config::Utility::prepareDnsRefreshStrategy(
48
275
      dns_cluster, dns_refresh_rate_ms_.count(),
49
275
      context.serverFactoryContext().api().randomGenerator());
50

            
51
275
  std::list<ResolveTargetPtr> resolve_targets;
52
275
  const auto& locality_lb_endpoints = load_assignment_.endpoints();
53
275
  THROW_IF_NOT_OK(validateEndpoints(locality_lb_endpoints, {}));
54
274
  for (const auto& locality_lb_endpoint : locality_lb_endpoints) {
55
173
    for (const auto& lb_endpoint : locality_lb_endpoint.lb_endpoints()) {
56
173
      const auto& socket_address = lb_endpoint.endpoint().address().socket_address();
57
173
      if (!socket_address.resolver_name().empty()) {
58
1
        throw EnvoyException("STRICT_DNS clusters must NOT have a custom resolver name set");
59
1
      }
60

            
61
172
      resolve_targets.emplace_back(new ResolveTarget(
62
172
          *this, context.serverFactoryContext().mainThreadDispatcher(), socket_address.address(),
63
172
          socket_address.port_value(), locality_lb_endpoint, lb_endpoint));
64
172
    }
65
164
  }
66
273
  resolve_targets_ = std::move(resolve_targets);
67

            
68
273
  overprovisioning_factor_ = PROTOBUF_GET_WRAPPED_OR_DEFAULT(
69
273
      load_assignment_.policy(), overprovisioning_factor, kDefaultOverProvisioningFactor);
70
273
  weighted_priority_health_ = load_assignment_.policy().weighted_priority_health();
71
273
}
72

            
73
139
void StrictDnsClusterImpl::startPreInit() {
74
149
  for (const ResolveTargetPtr& target : resolve_targets_) {
75
127
    target->startResolve();
76
127
  }
77
  // If the config provides no endpoints, the cluster is initialized immediately as if all hosts are
78
  // resolved in failure.
79
139
  if (resolve_targets_.empty() || !wait_for_warm_on_init_) {
80
24
    onPreInitComplete();
81
24
  }
82
139
}
83

            
84
void StrictDnsClusterImpl::updateAllHosts(const HostVector& hosts_added,
85
                                          const HostVector& hosts_removed,
86
75
                                          uint32_t current_priority) {
87
75
  PriorityStateManager priority_state_manager(*this, local_info_, nullptr);
88
  // At this point we know that we are different so make a new host list and notify.
89
  //
90
  // TODO(dio): The uniqueness of a host address resolved in STRICT_DNS cluster per priority is not
91
  // guaranteed. Need a clear agreement on the behavior here, whether it is allowable to have
92
  // duplicated hosts inside a priority. And if we want to enforce this behavior, it should be done
93
  // inside the priority state manager.
94
99
  for (const ResolveTargetPtr& target : resolve_targets_) {
95
99
    priority_state_manager.initializePriorityFor(target->locality_lb_endpoints_);
96
105
    for (const HostSharedPtr& host : target->hosts_) {
97
105
      if (target->locality_lb_endpoints_.priority() == current_priority) {
98
103
        priority_state_manager.registerHostForPriority(host, target->locality_lb_endpoints_);
99
103
      }
100
105
    }
101
99
  }
102

            
103
  // TODO(dio): Add assertion in here.
104
75
  priority_state_manager.updateClusterPrioritySet(
105
75
      current_priority, std::move(priority_state_manager.priorityState()[current_priority].first),
106
75
      hosts_added, hosts_removed, absl::nullopt, weighted_priority_health_,
107
75
      overprovisioning_factor_);
108
75
}
109

            
110
StrictDnsClusterImpl::ResolveTarget::ResolveTarget(
111
    StrictDnsClusterImpl& parent, Event::Dispatcher& dispatcher, const std::string& dns_address,
112
    const uint32_t dns_port,
113
    const envoy::config::endpoint::v3::LocalityLbEndpoints& locality_lb_endpoint,
114
    const envoy::config::endpoint::v3::LbEndpoint& lb_endpoint)
115
172
    : parent_(parent), locality_lb_endpoints_(locality_lb_endpoint), lb_endpoint_(lb_endpoint),
116
172
      dns_address_(dns_address),
117
172
      hostname_(lb_endpoint_.endpoint().hostname().empty() ? dns_address_
118
172
                                                           : lb_endpoint_.endpoint().hostname()),
119
172
      port_(dns_port),
120
174
      resolve_timer_(dispatcher.createTimer([this]() -> void { startResolve(); })) {}
121

            
122
172
StrictDnsClusterImpl::ResolveTarget::~ResolveTarget() {
123
172
  if (active_query_) {
124
79
    active_query_->cancel(Network::ActiveDnsQuery::CancelReason::QueryAbandoned);
125
79
  }
126
172
}
127

            
128
161
void StrictDnsClusterImpl::ResolveTarget::startResolve() {
129
161
  ENVOY_LOG(trace, "starting async DNS resolution for {}", dns_address_);
130
161
  parent_.info_->configUpdateStats().update_attempt_.inc();
131

            
132
161
  active_query_ = parent_.dns_resolver_->resolve(
133
161
      dns_address_, parent_.dns_lookup_family_,
134
161
      [this](Network::DnsResolver::ResolutionStatus status, absl::string_view details,
135
163
             std::list<Network::DnsResponse>&& response) -> void {
136
92
        active_query_ = nullptr;
137
92
        ENVOY_LOG(trace, "async DNS resolution complete for {} details {}", dns_address_, details);
138

            
139
92
        std::chrono::milliseconds final_refresh_rate = parent_.dns_refresh_rate_ms_;
140

            
141
92
        if (status == Network::DnsResolver::ResolutionStatus::Completed) {
142
86
          parent_.info_->configUpdateStats().update_success_.inc();
143

            
144
86
          HostVector new_hosts;
145
86
          std::chrono::seconds ttl_refresh_rate = std::chrono::seconds::max();
146
86
          absl::flat_hash_set<std::string> all_new_hosts;
147
112
          for (const auto& resp : response) {
148
112
            const auto& addrinfo = resp.addrInfo();
149
            // TODO(mattklein123): Currently the DNS interface does not consider port. We need to
150
            // make a new address that has port in it. We need to both support IPv6 as well as
151
            // potentially move port handling into the DNS interface itself, which would work better
152
            // for SRV.
153
112
            ASSERT(addrinfo.address_ != nullptr);
154
112
            auto address = Network::Utility::getAddressWithPort(*(addrinfo.address_), port_);
155
112
            if (all_new_hosts.count(address->asString()) > 0) {
156
4
              continue;
157
4
            }
158

            
159
108
            new_hosts.emplace_back(THROW_OR_RETURN_VALUE(
160
108
                HostImpl::create(
161
108
                    parent_.info_, hostname_, address,
162
                    // TODO(zyfjeff): Created through metadata shared pool
163
108
                    std::make_shared<const envoy::config::core::v3::Metadata>(
164
108
                        lb_endpoint_.metadata()),
165
108
                    std::make_shared<const envoy::config::core::v3::Metadata>(
166
108
                        locality_lb_endpoints_.metadata()),
167
108
                    lb_endpoint_.load_balancing_weight().value(),
168
108
                    parent_.constLocalitySharedPool()->getObject(locality_lb_endpoints_.locality()),
169
108
                    lb_endpoint_.endpoint().health_check_config(),
170
108
                    locality_lb_endpoints_.priority(), lb_endpoint_.health_status()),
171
108
                std::unique_ptr<HostImpl>));
172
108
            all_new_hosts.emplace(address->asString());
173
108
            ttl_refresh_rate = min(ttl_refresh_rate, addrinfo.ttl_);
174
108
          }
175

            
176
86
          HostVector hosts_added;
177
86
          HostVector hosts_removed;
178
86
          if (parent_.updateDynamicHostList(new_hosts, hosts_, hosts_added, hosts_removed,
179
86
                                            all_hosts_, all_new_hosts)) {
180
75
            ENVOY_LOG(debug, "DNS hosts have changed for {}", dns_address_);
181
75
            ASSERT(std::all_of(hosts_.begin(), hosts_.end(), [&](const auto& host) {
182
75
              return host->priority() == locality_lb_endpoints_.priority();
183
75
            }));
184

            
185
            // Update host map for current resolve target.
186
75
            for (const auto& host : hosts_removed) {
187
21
              all_hosts_.erase(host->address()->asString());
188
21
            }
189
86
            for (const auto& host : hosts_added) {
190
82
              all_hosts_.insert({host->address()->asString(), host});
191
82
            }
192

            
193
75
            parent_.updateAllHosts(hosts_added, hosts_removed, locality_lb_endpoints_.priority());
194
75
          } else {
195
11
            parent_.info_->configUpdateStats().update_no_rebuild_.inc();
196
11
          }
197

            
198
          // reset failure backoff strategy because there was a success.
199
86
          parent_.failure_backoff_strategy_->reset();
200

            
201
86
          if (!response.empty() && parent_.respect_dns_ttl_ &&
202
86
              ttl_refresh_rate != std::chrono::seconds(0)) {
203
5
            final_refresh_rate = ttl_refresh_rate;
204
5
            ASSERT(ttl_refresh_rate != std::chrono::seconds::max() &&
205
5
                   final_refresh_rate.count() > 0);
206
5
          }
207
86
          if (parent_.dns_jitter_ms_.count() > 0) {
208
            // Note that `parent_.random_.random()` returns a uint64 while
209
            // `parent_.dns_jitter_ms_.count()` returns a signed long that gets cast into a uint64.
210
            // Thus, the modulo of the two will be a positive as long as
211
            // `parent_dns_jitter_ms_.count()` is positive.
212
            // It is important that this be positive, otherwise `final_refresh_rate` could be
213
            // negative causing Envoy to crash.
214
2
            final_refresh_rate += std::chrono::milliseconds(parent_.random_.random() %
215
2
                                                            parent_.dns_jitter_ms_.count());
216
2
          }
217

            
218
86
          ENVOY_LOG(debug, "DNS refresh rate reset for {}, refresh rate {} ms", dns_address_,
219
86
                    final_refresh_rate.count());
220
86
        } else {
221
6
          parent_.info_->configUpdateStats().update_failure_.inc();
222

            
223
6
          final_refresh_rate =
224
6
              std::chrono::milliseconds(parent_.failure_backoff_strategy_->nextBackOffMs());
225
6
          ENVOY_LOG(debug, "DNS refresh rate reset for {}, (failure) refresh rate {} ms",
226
6
                    dns_address_, final_refresh_rate.count());
227
6
        }
228

            
229
        // If there is an initialize callback, fire it now. Note that if the cluster refers to
230
        // multiple DNS names, this will return initialized after a single DNS resolution
231
        // completes. This is not perfect but is easier to code and unclear if the extra
232
        // complexity is needed so will start with this.
233
92
        parent_.onPreInitComplete();
234
92
        resolve_timer_->enableTimer(final_refresh_rate);
235
92
      });
236
161
}
237

            
238
} // namespace Upstream
239
} // namespace Envoy