1
#include "source/extensions/common/dynamic_forward_proxy/dns_cache_impl.h"
2

            
3
#include "envoy/extensions/common/dynamic_forward_proxy/v3/dns_cache.pb.h"
4

            
5
#include "source/common/common/dns_utils.h"
6
#include "source/common/common/stl_helpers.h"
7
#include "source/common/config/utility.h"
8
#include "source/common/http/utility.h"
9
#include "source/common/network/dns_resolver/dns_factory_util.h"
10
#include "source/common/network/utility.h"
11
#include "source/common/runtime/runtime_features.h"
12

            
13
namespace Envoy {
14
namespace Extensions {
15
namespace Common {
16
namespace DynamicForwardProxy {
17

            
18
absl::StatusOr<std::shared_ptr<DnsCacheImpl>> DnsCacheImpl::createDnsCacheImpl(
19
    Server::Configuration::GenericFactoryContext& context,
20
169
    const envoy::extensions::common::dynamic_forward_proxy::v3::DnsCacheConfig& config) {
21
169
  const uint32_t max_hosts = PROTOBUF_GET_WRAPPED_OR_DEFAULT(config, max_hosts, 1024);
22
169
  if (static_cast<size_t>(config.preresolve_hostnames().size()) > max_hosts) {
23
1
    return absl::InvalidArgumentError(fmt::format(
24
1
        "DNS Cache [{}] configured with preresolve_hostnames={} larger than max_hosts={}",
25
1
        config.name(), config.preresolve_hostnames().size(), max_hosts));
26
1
  }
27
168
  auto resolver_or_error =
28
168
      selectDnsResolver(config, context.serverFactoryContext().mainThreadDispatcher(),
29
168
                        context.serverFactoryContext());
30
168
  RETURN_IF_NOT_OK_REF(resolver_or_error.status());
31

            
32
168
  return std::shared_ptr<DnsCacheImpl>(
33
168
      new DnsCacheImpl(context, config, std::move(*resolver_or_error)));
34
168
}
35

            
36
DnsCacheImpl::DnsCacheImpl(
37
    Server::Configuration::GenericFactoryContext& context,
38
    const envoy::extensions::common::dynamic_forward_proxy::v3::DnsCacheConfig& config,
39
    Network::DnsResolverSharedPtr&& resolver)
40
168
    : main_thread_dispatcher_(context.serverFactoryContext().mainThreadDispatcher()),
41
168
      config_(config), random_generator_(context.serverFactoryContext().api().randomGenerator()),
42
168
      dns_lookup_family_(DnsUtils::getDnsLookupFamilyFromEnum(config.dns_lookup_family())),
43
168
      resolver_(std::move(resolver)), tls_slot_(context.serverFactoryContext().threadLocal()),
44
168
      scope_(context.scope().createScope(fmt::format("dns_cache.{}.", config.name()))),
45
168
      stats_(generateDnsCacheStats(*scope_)),
46
168
      resource_manager_(*scope_, context.serverFactoryContext().runtime(), config.name(),
47
168
                        config.dns_cache_circuit_breaker()),
48
168
      refresh_interval_(PROTOBUF_GET_MS_OR_DEFAULT(config, dns_refresh_rate, 60000)),
49
168
      min_refresh_interval_(PROTOBUF_GET_MS_OR_DEFAULT(config, dns_min_refresh_rate, 5000)),
50
168
      timeout_interval_(PROTOBUF_GET_MS_OR_DEFAULT(config, dns_query_timeout, 5000)),
51
168
      file_system_(context.serverFactoryContext().api().fileSystem()),
52
168
      validation_visitor_(context.messageValidationVisitor()),
53
168
      host_ttl_(PROTOBUF_GET_MS_OR_DEFAULT(config, host_ttl, 300000)),
54
168
      max_hosts_(PROTOBUF_GET_WRAPPED_OR_DEFAULT(config, max_hosts, 1024)) {
55
271
  tls_slot_.set([&](Event::Dispatcher&) { return std::make_shared<ThreadLocalHostInfo>(*this); });
56

            
57
168
  loadCacheEntries(config);
58

            
59
  // Preresolved hostnames are resolved without a read lock on primary hosts because it is done
60
  // during object construction.
61
168
  for (const auto& hostname : config.preresolve_hostnames()) {
62
    // No need to get a resolution handle on this resolution as the only outcome needed is for the
63
    // cache to load an entry. Further if this particular resolution fails all the is lost is the
64
    // potential optimization of having the entry be preresolved the first time a true consumer of
65
    // this DNS cache asks for it.
66
3
    const std::string host =
67
3
        DnsHostInfo::normalizeHostForDfp(hostname.address(), hostname.port_value());
68
3
    ENVOY_LOG(debug, "DNS pre-resolve starting for host {}", host);
69
3
    startCacheLoad(host, hostname.port_value(), false, false);
70
3
  }
71
168
  enable_dfp_dns_trace_ = context.serverFactoryContext().runtime().snapshot().getBoolean(
72
168
      "envoy.enable_dfp_dns_trace", false);
73
168
}
74

            
75
168
DnsCacheImpl::~DnsCacheImpl() {
76
168
  for (const auto& primary_host : primary_hosts_) {
77
120
    if (primary_host.second->active_query_ != nullptr) {
78
7
      primary_host.second->active_query_->cancel(
79
7
          Network::ActiveDnsQuery::CancelReason::QueryAbandoned);
80
7
    }
81
120
  }
82

            
83
168
  for (auto update_callbacks : update_callbacks_) {
84
47
    update_callbacks->cancel();
85
47
  }
86
168
}
87

            
88
absl::StatusOr<Network::DnsResolverSharedPtr> DnsCacheImpl::selectDnsResolver(
89
    const envoy::extensions::common::dynamic_forward_proxy::v3::DnsCacheConfig& config,
90
    Event::Dispatcher& main_thread_dispatcher,
91
168
    Server::Configuration::CommonFactoryContext& context) {
92
168
  envoy::config::core::v3::TypedExtensionConfig typed_dns_resolver_config;
93
168
  Network::DnsResolverFactory* dns_resolver_factory;
94

            
95
  // If DnsCacheConfig doesn't have any DNS related configuration, and the
96
  // default DNS resolver, i.e, the typed_dns_resolver_config in the bootstrap
97
  // configuration, is not empty, then creates the default DNS resolver.
98
168
  if (!config.has_typed_dns_resolver_config() && !config.has_dns_resolution_config() &&
99
168
      context.api().bootstrap().has_typed_dns_resolver_config() &&
100
168
      !(context.api().bootstrap().typed_dns_resolver_config().typed_config().type_url().empty())) {
101
12
    typed_dns_resolver_config = context.api().bootstrap().typed_dns_resolver_config();
102
12
    dns_resolver_factory =
103
12
        &Network::createDnsResolverFactoryFromTypedConfig(typed_dns_resolver_config);
104
156
  } else {
105
156
    dns_resolver_factory =
106
156
        &Network::createDnsResolverFactoryFromProto(config, typed_dns_resolver_config);
107
156
  }
108
168
  return dns_resolver_factory->createDnsResolver(main_thread_dispatcher, context.api(),
109
168
                                                 typed_dns_resolver_config);
110
168
}
111

            
112
168
DnsCacheStats DnsCacheImpl::generateDnsCacheStats(Stats::Scope& scope) {
113
168
  return {ALL_DNS_CACHE_STATS(POOL_COUNTER(scope), POOL_GAUGE(scope))};
114
168
}
115

            
116
DnsCacheImpl::LoadDnsCacheEntryResult
117
DnsCacheImpl::loadDnsCacheEntryWithForceRefresh(absl::string_view raw_host, uint16_t default_port,
118
                                                bool is_proxy_lookup, bool force_refresh,
119
153
                                                LoadDnsCacheEntryCallbacks& callbacks) {
120
153
  std::string host = DnsHostInfo::normalizeHostForDfp(raw_host, default_port);
121

            
122
153
  ENVOY_LOG(debug, "thread local lookup for host '{}' {}", host,
123
153
            is_proxy_lookup ? "proxy mode " : "");
124
153
  ThreadLocalHostInfo& tls_host_info = *tls_slot_;
125

            
126
153
  bool is_overflow = false;
127
153
  absl::optional<DnsHostInfoSharedPtr> host_info = absl::nullopt;
128
153
  bool ignore_cached_entries = force_refresh;
129

            
130
153
  {
131
153
    absl::ReaderMutexLock read_lock{primary_hosts_lock_};
132
153
    is_overflow = primary_hosts_.size() >= max_hosts_;
133
153
    auto tls_host = primary_hosts_.find(host);
134
153
    if (tls_host != primary_hosts_.end() && tls_host->second->host_info_->firstResolveComplete()) {
135
33
      host_info = tls_host->second->host_info_;
136
33
    }
137
153
  }
138

            
139
153
  if (host_info) {
140
33
    ENVOY_LOG(debug, "cache hit for host '{}'", host);
141
33
    if (Runtime::runtimeFeatureEnabled("envoy.reloadable_features.reresolve_null_addresses") &&
142
33
        !is_proxy_lookup && *host_info && (*host_info)->address() == nullptr) {
143
      ENVOY_LOG(debug, "ignoring null address cache hit for miss for host '{}'", host);
144
      ignore_cached_entries = true;
145
    }
146
33
    if (config_.disable_dns_refresh_on_failure() && !is_proxy_lookup &&
147
33
        (*host_info)->resolutionStatus() == Network::DnsResolver::ResolutionStatus::Failure) {
148
3
      ENVOY_LOG(debug, "ignoring failed address cache hit for miss for host '{}'", host);
149
3
      ignore_cached_entries = true;
150
3
    }
151
33
    if (!ignore_cached_entries) {
152
28
      return {LoadDnsCacheEntryStatus::InCache, nullptr, host_info};
153
28
    }
154
33
  }
155
125
  if (is_overflow) {
156
3
    ENVOY_LOG(debug, "DNS cache overflow for host '{}'", host);
157
3
    stats_.host_overflow_.inc();
158
3
    return {LoadDnsCacheEntryStatus::Overflow, nullptr, absl::nullopt};
159
3
  }
160
122
  ENVOY_LOG(debug, "cache miss for host '{}', posting to main thread", host);
161
122
  main_thread_dispatcher_.post(
162
122
      [this, host = std::string(host), default_port, is_proxy_lookup, ignore_cached_entries]() {
163
122
        startCacheLoad(host, default_port, is_proxy_lookup, ignore_cached_entries);
164
122
      });
165
122
  return {LoadDnsCacheEntryStatus::Loading,
166
122
          std::make_unique<LoadDnsCacheEntryHandleImpl>(tls_host_info.pending_resolutions_, host,
167
122
                                                        callbacks),
168
122
          absl::nullopt};
169
125
}
170

            
171
97
Upstream::ResourceAutoIncDecPtr DnsCacheImpl::canCreateDnsRequest() {
172
97
  auto& current_pending_requests = resource_manager_.pendingRequests();
173
97
  if (!current_pending_requests.canCreate()) {
174
4
    stats_.dns_rq_pending_overflow_.inc();
175
4
    return nullptr;
176
4
  }
177
93
  return std::make_unique<Upstream::ResourceAutoIncDec>(current_pending_requests);
178
97
}
179

            
180
110
void DnsCacheImpl::iterateHostMap(IterateHostMapCb iterate_callback) {
181
110
  absl::ReaderMutexLock reader_lock{primary_hosts_lock_};
182
113
  for (const auto& host : primary_hosts_) {
183
    // Only include hosts that have ever resolved to an address.
184
21
    if (host.second->host_info_->address() != nullptr) {
185
21
      iterate_callback(host.first, host.second->host_info_);
186
21
    }
187
21
  }
188
110
}
189

            
190
7
absl::optional<const DnsHostInfoSharedPtr> DnsCacheImpl::getHost(absl::string_view host_name) {
191
  // Find a host with the given name.
192
7
  const auto host_info = [&]() -> const DnsHostInfoSharedPtr {
193
7
    absl::ReaderMutexLock reader_lock{primary_hosts_lock_};
194
7
    auto it = primary_hosts_.find(host_name);
195
7
    return it != primary_hosts_.end() ? it->second->host_info_ : nullptr;
196
7
  }();
197

            
198
  // Only include hosts that have ever resolved to an address.
199
7
  if (!host_info || host_info->address() == nullptr) {
200
3
    return {};
201
4
  } else {
202
4
    return host_info;
203
4
  }
204
7
}
205

            
206
DnsCacheImpl::AddUpdateCallbacksHandlePtr
207
158
DnsCacheImpl::addUpdateCallbacks(UpdateCallbacks& callbacks) {
208
158
  return std::make_unique<AddUpdateCallbacksHandleImpl>(update_callbacks_, callbacks);
209
158
}
210

            
211
void DnsCacheImpl::startCacheLoad(const std::string& host, uint16_t default_port,
212
125
                                  bool is_proxy_lookup, bool ignore_cached_entries) {
213
125
  ASSERT(main_thread_dispatcher_.isThreadSafe());
214

            
215
  // It's possible for multiple requests to race trying to start a resolution. If a host is
216
  // already in the map it's either in the process of being resolved or the resolution is already
217
  // heading out to the worker threads. Either way the pending resolution will be completed.
218

            
219
  // Functions like this one that modify primary_hosts_ are only called in the main thread so we
220
  // know it is safe to use the PrimaryHostInfo pointers outside of the lock.
221
125
  auto* primary_host = [&]() {
222
125
    absl::ReaderMutexLock reader_lock{primary_hosts_lock_};
223
125
    auto host_it = primary_hosts_.find(host);
224
125
    return host_it != primary_hosts_.end() ? host_it->second.get() : nullptr;
225
125
  }();
226

            
227
125
  if (primary_host) {
228
7
    if (!ignore_cached_entries || !primary_host->host_info_->firstResolveComplete()) {
229
2
      ENVOY_LOG(debug, "main thread resolve for host '{}' skipped. Entry present", host);
230
2
      return;
231
2
    }
232
    // The host was in cache but we want to force a refresh. Remove the host
233
    // entirely to ensure initial resolve logic works as expected.
234
5
    removeHost(host, *primary_host, false);
235
5
  }
236

            
237
123
  primary_host = createHost(host, default_port);
238
  // If the DNS request was simply to create a host endpoint in a Dynamic Forward Proxy cluster,
239
  // fast fail the look-up as the address is not needed.
240
123
  if (is_proxy_lookup) {
241
1
    finishResolve(host, Network::DnsResolver::ResolutionStatus::Completed, "proxy_resolve", {}, {},
242
1
                  true);
243
122
  } else {
244
122
    startResolve(host, *primary_host);
245
122
  }
246
123
}
247

            
248
DnsCacheImpl::PrimaryHostInfo* DnsCacheImpl::createHost(const std::string& host,
249
140
                                                        uint16_t default_port) {
250
140
  const auto host_attributes = Http::Utility::parseAuthority(host);
251
  // TODO(mattklein123): Right now, the same host with different ports will become two
252
  // independent primary hosts with independent DNS resolutions. I'm not sure how much this will
253
  // matter, but we could consider collapsing these down and sharing the underlying DNS resolution.
254
140
  {
255
140
    absl::WriterMutexLock writer_lock{primary_hosts_lock_};
256
140
    return primary_hosts_
257
        // try_emplace() is used here for direct argument forwarding.
258
140
        .try_emplace(host,
259
140
                     std::make_unique<PrimaryHostInfo>(
260
140
                         *this, std::string(host_attributes.host_),
261
140
                         host_attributes.port_.value_or(default_port),
262
140
                         host_attributes.is_ip_address_, [this, host]() { onReResolveAlarm(host); },
263
140
                         [this, host]() { onResolveTimeout(host); }))
264
140
        .first->second.get();
265
140
  }
266
140
}
267

            
268
26
DnsCacheImpl::PrimaryHostInfo& DnsCacheImpl::getPrimaryHost(const std::string& host) {
269
  // Functions modify primary_hosts_ are only called in the main thread so we
270
  // know it is safe to use the PrimaryHostInfo pointers outside of the lock.
271
26
  ASSERT(main_thread_dispatcher_.isThreadSafe());
272
26
  absl::ReaderMutexLock reader_lock{primary_hosts_lock_};
273
26
  const auto primary_host_it = primary_hosts_.find(host);
274
26
  ASSERT(primary_host_it != primary_hosts_.end());
275
26
  return *(primary_host_it->second);
276
26
}
277

            
278
5
void DnsCacheImpl::onResolveTimeout(const std::string& host) {
279
5
  ASSERT(main_thread_dispatcher_.isThreadSafe());
280

            
281
5
  ENVOY_LOG_EVENT(debug, "dns_cache_resolve_timeout", "host='{}' resolution timeout", host);
282
5
  stats_.dns_query_timeout_.inc();
283
5
  finishResolve(host, Network::DnsResolver::ResolutionStatus::Failure, "resolve_timeout", {},
284
5
                absl::nullopt, /* is_proxy_lookup= */ false, /* is_timeout= */ true);
285
5
}
286

            
287
26
void DnsCacheImpl::onReResolveAlarm(const std::string& host) {
288
26
  ASSERT(main_thread_dispatcher_.isThreadSafe());
289

            
290
26
  auto& primary_host = getPrimaryHost(host);
291
26
  const std::chrono::steady_clock::duration now_duration =
292
26
      main_thread_dispatcher_.timeSource().monotonicTime().time_since_epoch();
293
26
  auto last_used_time = primary_host.host_info_->lastUsedTime();
294
26
  ENVOY_LOG(debug, "host='{}' TTL check: now={} last_used={} TTL {}", host, now_duration.count(),
295
26
            last_used_time.count(), host_ttl_.count());
296
26
  if ((now_duration - last_used_time) > host_ttl_) {
297
15
    ENVOY_LOG(debug, "host='{}' TTL expired, removing", host);
298
15
    removeHost(host, primary_host, true);
299
17
  } else {
300
11
    startResolve(host, primary_host);
301
11
  }
302
26
}
303

            
304
void DnsCacheImpl::removeHost(const std::string& host, const PrimaryHostInfo& primary_host,
305
20
                              bool update_threads) {
306
  // If we need to erase the host, hold onto the PrimaryHostInfo object that owns this callback.
307
  // This is defined at function scope so that it is only erased on function exit to avoid
308
  // use-after-free issues
309
20
  PrimaryHostInfoPtr host_to_erase;
310

            
311
  // If the host has no address then that means that the DnsCacheImpl has never
312
  // runAddUpdateCallbacks for this host, and thus the callback targets are not aware of it.
313
  // Therefore, runRemoveCallbacks should only be ran if the host's address != nullptr.
314
20
  if (primary_host.host_info_->address()) {
315
14
    runRemoveCallbacks(host);
316
14
  }
317
20
  {
318
20
    removeCacheEntry(host);
319
20
    absl::WriterMutexLock writer_lock{primary_hosts_lock_};
320
20
    auto host_it = primary_hosts_.find(host);
321
20
    ASSERT(host_it != primary_hosts_.end());
322
20
    host_to_erase = std::move(host_it->second);
323
20
    primary_hosts_.erase(host_it);
324
20
  }
325
  // In the case of force-remove and resolve, don't cancel outstanding resolve
326
  // callbacks on remove, as a resolve is pending.
327
20
  if (update_threads) {
328
15
    notifyThreads(host, primary_host.host_info_);
329
15
  }
330
20
}
331

            
332
4
void DnsCacheImpl::forceRefreshHosts() {
333
4
  ENVOY_LOG(debug, "beginning DNS cache force refresh");
334
  // Tell the underlying resolver to reset itself since we likely just went through a network
335
  // transition and parameters may have changed.
336
4
  resolver_->resetNetworking();
337

            
338
4
  absl::ReaderMutexLock reader_lock{primary_hosts_lock_};
339
4
  for (auto& primary_host : primary_hosts_) {
340
    // Avoid holding the lock for longer than necessary by just triggering the refresh timer for
341
    // each host IFF the host is not already refreshing. Cancellation is assumed to be cheap for
342
    // resolvers.
343
3
    if (primary_host.second->active_query_ != nullptr) {
344
1
      primary_host.second->active_query_->cancel(
345
1
          Network::ActiveDnsQuery::CancelReason::QueryAbandoned);
346
1
      primary_host.second->active_query_ = nullptr;
347
1
      if (timeout_interval_.count() > 0) {
348
1
        primary_host.second->timeout_timer_->disableTimer();
349
1
      }
350
1
    }
351

            
352
3
    if (timeout_interval_.count() > 0) {
353
3
      ASSERT(!primary_host.second->timeout_timer_->enabled());
354
3
    }
355
3
    primary_host.second->refresh_timer_->enableTimer(std::chrono::milliseconds(0), nullptr);
356
3
    ENVOY_LOG_EVENT(debug, "force_refresh_host", "force refreshing host='{}'", primary_host.first);
357
3
  }
358
4
}
359

            
360
7
void DnsCacheImpl::setIpVersionToRemove(absl::optional<Network::Address::IpVersion> ip_version) {
361
7
  absl::MutexLock lock{ip_version_to_remove_lock_};
362
7
  ip_version_to_remove_ = ip_version;
363
7
}
364

            
365
absl::optional<Network::Address::IpVersion> DnsCacheImpl::getIpVersionToRemove() {
366
  absl::MutexLock lock{ip_version_to_remove_lock_};
367
  return ip_version_to_remove_;
368
}
369

            
370
2
void DnsCacheImpl::stop() {
371
2
  ENVOY_LOG(debug, "stopping DNS cache");
372
  // Tell the underlying resolver to reset itself since we likely just went through a network
373
  // transition and parameters may have changed.
374
2
  resolver_->resetNetworking();
375

            
376
2
  absl::ReaderMutexLock reader_lock{primary_hosts_lock_};
377
2
  for (auto& primary_host : primary_hosts_) {
378
1
    if (primary_host.second->active_query_ != nullptr) {
379
1
      primary_host.second->active_query_->cancel(
380
1
          Network::ActiveDnsQuery::CancelReason::QueryAbandoned);
381
1
      primary_host.second->active_query_ = nullptr;
382
1
    }
383

            
384
1
    if (timeout_interval_.count() > 0) {
385
1
      primary_host.second->timeout_timer_->disableTimer();
386
1
      ASSERT(!primary_host.second->timeout_timer_->enabled());
387
1
    }
388
1
    primary_host.second->refresh_timer_->disableTimer();
389
1
    ENVOY_LOG_EVENT(debug, "stop_host", "stop host='{}'", primary_host.first);
390
1
  }
391
2
}
392

            
393
133
void DnsCacheImpl::startResolve(const std::string& host, PrimaryHostInfo& host_info) {
394
133
  ENVOY_LOG(debug, "starting main thread resolve for host='{}' dns='{}' port='{}' timeout='{}'",
395
133
            host, host_info.host_info_->resolvedHost(), host_info.port_, timeout_interval_.count());
396
133
  ASSERT(host_info.active_query_ == nullptr);
397

            
398
133
  stats_.dns_query_attempt_.inc();
399
133
  if (timeout_interval_.count() > 0) {
400
122
    host_info.timeout_timer_->enableTimer(timeout_interval_, nullptr);
401
122
  }
402
133
  host_info.active_query_ = resolver_->resolve(
403
133
      host_info.host_info_->resolvedHost(), dns_lookup_family_,
404
133
      [this, host](Network::DnsResolver::ResolutionStatus status, absl::string_view details,
405
133
                   std::list<Network::DnsResponse>&& response) {
406
122
        finishResolve(host, status, details, std::move(response));
407
122
      });
408
133
}
409

            
410
void DnsCacheImpl::finishResolve(const std::string& host,
411
                                 Network::DnsResolver::ResolutionStatus status,
412
                                 absl::string_view details,
413
                                 std::list<Network::DnsResponse>&& response,
414
                                 absl::optional<MonotonicTime> resolution_time,
415
145
                                 bool is_proxy_lookup, bool is_timeout) {
416
145
  ASSERT(main_thread_dispatcher_.isThreadSafe());
417
145
  if (Runtime::runtimeFeatureEnabled(
418
145
          "envoy.reloadable_features.dns_cache_set_ip_version_to_remove")) {
419
8
    {
420
8
      absl::MutexLock lock{ip_version_to_remove_lock_};
421
8
      if (ip_version_to_remove_.has_value()) {
422
6
        if (config_.preresolve_hostnames_size() > 0) {
423
1
          IS_ENVOY_BUG(
424
1
              "Unable to delete IP version addresses when DNS preresolve hostnames are not empty.");
425
5
        } else {
426
5
          response.remove_if([ip_version_to_remove =
427
7
                                  *ip_version_to_remove_](const Network::DnsResponse& dns_resp) {
428
            // Ignore the loopback address because a socket interface can still support both IPv4
429
            // and IPv6 but has no outgoing IPv4/IPv6 connectivity.
430
7
            return !Network::Utility::isLoopbackAddress(*dns_resp.addrInfo().address_) &&
431
7
                   dns_resp.addrInfo().address_->ip()->version() == ip_version_to_remove;
432
7
          });
433
5
        }
434
6
      }
435
8
    }
436
8
  }
437
145
  ENVOY_LOG_EVENT(debug, "dns_cache_finish_resolve",
438
145
                  "main thread resolve complete for host '{}': {}", host,
439
145
                  accumulateToString<Network::DnsResponse>(response, [](const auto& dns_response) {
440
145
                    return dns_response.addrInfo().address_->asString();
441
145
                  }));
442
145
  const bool from_cache = resolution_time.has_value();
443

            
444
  // Functions like this one that modify primary_hosts_ are only called in the main thread so we
445
  // know it is safe to use the PrimaryHostInfo pointers outside of the lock.
446
145
  auto* primary_host_info = [&]() {
447
145
    absl::ReaderMutexLock reader_lock{primary_hosts_lock_};
448
145
    const auto primary_host_it = primary_hosts_.find(host);
449
145
    ASSERT(primary_host_it != primary_hosts_.end());
450
145
    return primary_host_it->second.get();
451
145
  }();
452

            
453
145
  std::string details_with_maybe_trace = std::string(details);
454
145
  if (primary_host_info != nullptr && primary_host_info->active_query_ != nullptr) {
455
123
    if (enable_dfp_dns_trace_) {
456
2
      std::string traces = primary_host_info->active_query_->getTraces();
457
2
      details_with_maybe_trace = absl::StrCat(details, ":", traces);
458
2
    }
459
    // `cancel` must be called last because the `ActiveQuery` will be destroyed afterward.
460
123
    if (is_timeout) {
461
5
      primary_host_info->active_query_->cancel(Network::ActiveDnsQuery::CancelReason::Timeout);
462
5
    }
463
123
  }
464
145
  bool failure = status == Network::DnsResolver::ResolutionStatus::Failure || response.empty();
465
145
  details_with_maybe_trace = absl::StrCat(
466
145
      (failure ? "dns_resolution_failure{" : ""),
467
145
      StringUtil::replaceAllEmptySpace(details_with_maybe_trace), (failure ? "}" : ""));
468

            
469
145
  bool first_resolve = false;
470

            
471
145
  if (!from_cache) {
472
128
    first_resolve = !primary_host_info->host_info_->firstResolveComplete();
473
128
    if (timeout_interval_.count() > 0) {
474
120
      primary_host_info->timeout_timer_->disableTimer();
475
120
    }
476
128
    primary_host_info->active_query_ = nullptr;
477

            
478
128
    if (status == Network::DnsResolver::ResolutionStatus::Failure) {
479
30
      stats_.dns_query_failure_.inc();
480
99
    } else {
481
98
      stats_.dns_query_success_.inc();
482
98
    }
483
128
  }
484

            
485
145
  auto address_list = DnsUtils::generateAddressList(response, primary_host_info->port_);
486
  // Only the change the address if:
487
  // 1) The new address is valid &&
488
  // 2a) The host doesn't yet have an address ||
489
  // 2b) The host has a changed address.
490
  //
491
  // This means that once a host gets an address it will stick even in the case of a subsequent
492
  // resolution failure.
493
145
  bool address_changed = false;
494
145
  auto current_address = primary_host_info->host_info_->address();
495

            
496
145
  if (!resolution_time.has_value()) {
497
128
    resolution_time = main_thread_dispatcher_.timeSource().monotonicTime();
498
128
  }
499
145
  std::chrono::seconds dns_ttl =
500
145
      std::chrono::duration_cast<std::chrono::seconds>(refresh_interval_);
501

            
502
  // If the DNS resolver successfully resolved with an empty response list, the dns cache does not
503
  // update. This ensures that a potentially previously resolved address does not stabilize back to
504
  // 0 hosts.
505
145
  if (!address_list.empty()) {
506
    // Update the cache entry and staleness any time the ttl changes.
507
111
    if (!from_cache) {
508
94
      addCacheEntry(host, address_list, response.front().addrInfo().ttl_);
509
94
    }
510
    // Arbitrarily cap DNS re-resolution at min_refresh_interval_ to avoid constant DNS queries.
511
111
    dns_ttl = std::max<std::chrono::seconds>(
512
111
        std::chrono::duration_cast<std::chrono::seconds>(min_refresh_interval_),
513
111
        response.front().addrInfo().ttl_);
514
111
    primary_host_info->host_info_->updateStale(resolution_time.value(), dns_ttl);
515
111
  }
516

            
517
145
  bool should_update_cache =
518
145
      !address_list.empty() &&
519
145
      DnsUtils::listChanged(address_list, primary_host_info->host_info_->addressList());
520
  // If this was a proxy lookup it's OK to send a null address resolution as
521
  // long as this isn't a transition from non-null to null address.
522
145
  should_update_cache |= is_proxy_lookup && !current_address;
523

            
524
145
  if (should_update_cache) {
525
106
    primary_host_info->host_info_->setAddresses(std::move(address_list), details_with_maybe_trace,
526
106
                                                status);
527
106
    ENVOY_LOG_EVENT(debug, "dns_cache_update_address",
528
106
                    "host '{}' address has changed from {} to {}", host,
529
106
                    current_address ? current_address->asStringView() : "<empty>",
530
106
                    primary_host_info->host_info_->address()
531
106
                        ? primary_host_info->host_info_->address()->asStringView()
532
106
                        : "<empty>");
533

            
534
106
    absl::Status host_status = runAddUpdateCallbacks(host, primary_host_info->host_info_);
535
106
    ENVOY_BUG(host_status.ok(),
536
106
              absl::StrCat("Failed to update DFP host due to ", host_status.message()));
537
106
    primary_host_info->host_info_->setFirstResolveComplete();
538
106
    address_changed = true;
539
106
    stats_.host_address_changed_.inc();
540
107
  } else if (current_address == nullptr) {
541
    // We only set details here if current address is null because but
542
    // non-null->null resolutions we don't update the address so will use a
543
    // previously resolved address + details.
544
31
    primary_host_info->host_info_->setDetails(details_with_maybe_trace);
545
31
    primary_host_info->host_info_->setResolutionStatus(status);
546
31
  }
547

            
548
145
  if (first_resolve) {
549
114
    primary_host_info->host_info_->setFirstResolveComplete();
550
114
  }
551
145
  if (first_resolve || (address_changed && !primary_host_info->host_info_->isStale())) {
552
129
    notifyThreads(host, primary_host_info->host_info_);
553
129
  }
554

            
555
145
  runResolutionCompleteCallbacks(host, primary_host_info->host_info_, status);
556

            
557
  // Kick off the refresh timer.
558
145
  if (status == Network::DnsResolver::ResolutionStatus::Completed) {
559
115
    primary_host_info->failure_backoff_strategy_->reset();
560
115
    primary_host_info->refresh_timer_->enableTimer(dns_ttl);
561
115
    ENVOY_LOG(debug, "DNS refresh rate reset for host '{}', refresh rate {} ms", host,
562
115
              dns_ttl.count() * 1000);
563
116
  } else {
564
30
    if (!config_.disable_dns_refresh_on_failure()) {
565
24
      const uint64_t refresh_interval =
566
24
          primary_host_info->failure_backoff_strategy_->nextBackOffMs();
567
24
      primary_host_info->refresh_timer_->enableTimer(std::chrono::milliseconds(refresh_interval));
568
24
      ENVOY_LOG(debug, "DNS refresh rate reset for host '{}', (failure) refresh rate {} ms", host,
569
24
                refresh_interval);
570
24
    }
571
30
  }
572
145
}
573

            
574
absl::Status DnsCacheImpl::runAddUpdateCallbacks(const std::string& host,
575
106
                                                 const DnsHostInfoSharedPtr& host_info) {
576
106
  for (auto* callbacks : update_callbacks_) {
577
88
    RETURN_IF_NOT_OK(callbacks->callbacks_.onDnsHostAddOrUpdate(host, host_info));
578
88
  }
579
106
  return absl::OkStatus();
580
106
}
581

            
582
void DnsCacheImpl::runResolutionCompleteCallbacks(const std::string& host,
583
                                                  const DnsHostInfoSharedPtr& host_info,
584
145
                                                  Network::DnsResolver::ResolutionStatus status) {
585
145
  for (auto* callbacks : update_callbacks_) {
586
127
    callbacks->callbacks_.onDnsResolutionComplete(host, host_info, status);
587
127
  }
588
145
}
589

            
590
14
void DnsCacheImpl::runRemoveCallbacks(const std::string& host) {
591
14
  for (auto* callbacks : update_callbacks_) {
592
14
    callbacks->callbacks_.onDnsHostRemove(host);
593
14
  }
594
14
}
595

            
596
void DnsCacheImpl::notifyThreads(const std::string& host,
597
144
                                 const DnsHostInfoImplSharedPtr& resolved_info) {
598
144
  auto shared_info = std::make_shared<HostMapUpdateInfo>(host, resolved_info);
599
238
  tls_slot_.runOnAllThreads([shared_info](OptRef<ThreadLocalHostInfo> local_host_info) {
600
238
    local_host_info->onHostMapUpdate(shared_info);
601
238
  });
602
144
}
603

            
604
271
DnsCacheImpl::ThreadLocalHostInfo::~ThreadLocalHostInfo() {
605
  // Make sure we cancel any handles that still exist.
606
271
  for (const auto& per_host_list : pending_resolutions_) {
607
1
    for (auto pending_resolution : per_host_list.second) {
608
1
      pending_resolution->cancel();
609
1
    }
610
1
  }
611
271
}
612

            
613
void DnsCacheImpl::ThreadLocalHostInfo::onHostMapUpdate(
614
238
    const HostMapUpdateInfoSharedPtr& resolved_host) {
615
238
  auto host_it = pending_resolutions_.find(resolved_host->host_);
616
238
  if (host_it != pending_resolutions_.end()) {
617
    // Calling the onLoadDnsCacheComplete may trigger more host resolutions adding more elements
618
    // to the `pending_resolutions_` map, potentially invalidating the host_it iterator. So we
619
    // copy the list of handles to a local variable before cleaning up the map.
620
107
    std::list<LoadDnsCacheEntryHandleImpl*> completed_resolutions(std::move(host_it->second));
621
107
    pending_resolutions_.erase(host_it);
622
109
    for (auto* resolution : completed_resolutions) {
623
109
      auto& callbacks = resolution->callbacks_;
624
109
      resolution->cancel();
625
109
      callbacks.onLoadDnsCacheComplete(resolved_host->info_);
626
109
    }
627
107
  }
628
238
}
629

            
630
DnsCacheImpl::PrimaryHostInfo::PrimaryHostInfo(DnsCacheImpl& parent,
631
                                               absl::string_view host_to_resolve, uint16_t port,
632
                                               bool is_ip_address,
633
                                               const Event::TimerCb& refresh_timer_cb,
634
                                               const Event::TimerCb& timeout_timer_cb)
635
140
    : parent_(parent), port_(port),
636
140
      refresh_timer_(parent.main_thread_dispatcher_.createTimer(refresh_timer_cb)),
637
140
      timeout_timer_(parent.main_thread_dispatcher_.createTimer(timeout_timer_cb)),
638
140
      host_info_(std::make_shared<DnsHostInfoImpl>(parent, host_to_resolve, is_ip_address)),
639
      failure_backoff_strategy_(
640
140
          Config::Utility::prepareDnsRefreshStrategy<
641
140
              envoy::extensions::common::dynamic_forward_proxy::v3::DnsCacheConfig>(
642
140
              parent_.config_, parent_.refresh_interval_.count(), parent_.random_generator_)) {
643
140
  parent_.stats_.host_added_.inc();
644
140
  parent_.stats_.num_hosts_.inc();
645
140
}
646

            
647
140
DnsCacheImpl::PrimaryHostInfo::~PrimaryHostInfo() {
648
140
  parent_.stats_.host_removed_.inc();
649
140
  parent_.stats_.num_hosts_.dec();
650
140
}
651

            
652
void DnsCacheImpl::addCacheEntry(
653
    const std::string& host,
654
    const std::vector<Network::Address::InstanceConstSharedPtr>& address_list,
655
94
    const std::chrono::seconds ttl) {
656
94
  if (!key_value_store_ || address_list.empty()) {
657
42
    return;
658
42
  }
659
52
  MonotonicTime now = main_thread_dispatcher_.timeSource().monotonicTime();
660
52
  uint64_t seconds_since_epoch =
661
52
      std::chrono::duration_cast<std::chrono::seconds>(now.time_since_epoch()).count();
662
54
  std::string value = absl::StrJoin(address_list, "\n", [&](std::string* out, const auto& addr) {
663
54
    absl::StrAppend(out, addr->asString(), "|", ttl.count(), "|", seconds_since_epoch);
664
54
  });
665
52
  key_value_store_->addOrUpdate(host, value, absl::nullopt);
666
52
}
667

            
668
20
void DnsCacheImpl::removeCacheEntry(const std::string& host) {
669
20
  if (!key_value_store_) {
670
8
    return;
671
8
  }
672
12
  key_value_store_->remove(host);
673
12
}
674

            
675
absl::optional<Network::DnsResponse>
676
26
DnsCacheImpl::parseValue(absl::string_view value, absl::optional<MonotonicTime>& resolution_time) {
677
26
  Network::Address::InstanceConstSharedPtr address;
678
26
  const auto parts = StringUtil::splitToken(value, "|");
679
26
  std::chrono::seconds ttl(0);
680
26
  if (parts.size() != 3) {
681
1
    ENVOY_LOG(warn, "Incorrect number of tokens in the cache line");
682
1
    return {};
683
1
  }
684
25
  address = Network::Utility::parseInternetAddressAndPortNoThrow(std::string(parts[0]));
685
25
  if (address == nullptr) {
686
2
    ENVOY_LOG(warn, "{} is not a valid address", parts[0]);
687
2
  }
688
25
  uint64_t ttl_int;
689
25
  if (absl::SimpleAtoi(parts[1], &ttl_int) && ttl_int != 0) {
690
24
    ttl = std::chrono::seconds(ttl_int);
691
24
  } else {
692
1
    ENVOY_LOG(warn, "{} is not a valid ttl", parts[1]);
693
1
  }
694
25
  uint64_t epoch_int;
695
25
  if (absl::SimpleAtoi(parts[2], &epoch_int)) {
696
24
    MonotonicTime now = main_thread_dispatcher_.timeSource().monotonicTime();
697
24
    const std::chrono::seconds seconds_since_epoch =
698
24
        std::chrono::duration_cast<std::chrono::seconds>(now.time_since_epoch());
699
24
    resolution_time = main_thread_dispatcher_.timeSource().monotonicTime() -
700
24
                      (seconds_since_epoch - std::chrono::seconds(epoch_int));
701
24
  }
702
25
  if (address == nullptr || ttl == std::chrono::seconds(0) || !resolution_time.has_value()) {
703
3
    ENVOY_LOG(warn, "Unable to parse cache line '{}'", value);
704
3
    return {};
705
3
  }
706
22
  return Network::DnsResponse(address, ttl);
707
25
}
708

            
709
void DnsCacheImpl::loadCacheEntries(
710
168
    const envoy::extensions::common::dynamic_forward_proxy::v3::DnsCacheConfig& config) {
711
168
  if (!config.has_key_value_config()) {
712
92
    return;
713
92
  }
714
76
  auto& factory =
715
76
      Config::Utility::getAndCheckFactory<KeyValueStoreFactory>(config.key_value_config().config());
716
76
  key_value_store_ = factory.createStore(config.key_value_config(), validation_visitor_,
717
76
                                         main_thread_dispatcher_, file_system_);
718
80
  KeyValueStore::ConstIterateCb load = [this](const std::string& key, const std::string& value) {
719
22
    absl::optional<MonotonicTime> resolution_time;
720
22
    std::list<Network::DnsResponse> responses;
721
22
    const auto addresses = StringUtil::splitToken(value, "\n");
722
26
    for (absl::string_view address_line : addresses) {
723
26
      absl::optional<Network::DnsResponse> response = parseValue(address_line, resolution_time);
724
26
      if (!response.has_value()) {
725
4
        return KeyValueStore::Iterate::Break;
726
4
      }
727
22
      responses.emplace_back(response.value());
728
22
    }
729
18
    if (responses.empty()) {
730
1
      return KeyValueStore::Iterate::Break;
731
1
    }
732
17
    createHost(key, responses.front().addrInfo().address_->ip()->port());
733
17
    ENVOY_LOG_EVENT(
734
17
        debug, "dns_cache_load_finished", "persistent dns cache load complete for host '{}': {}",
735
17
        key, accumulateToString<Network::DnsResponse>(responses, [](const auto& dns_response) {
736
17
          return dns_response.addrInfo().address_->asString();
737
17
        }));
738
17
    finishResolve(key, Network::DnsResolver::ResolutionStatus::Completed, "from_cache",
739
17
                  std::move(responses), resolution_time);
740
17
    stats_.cache_load_.inc();
741
17
    return KeyValueStore::Iterate::Continue;
742
18
  };
743
76
  key_value_store_->iterate(load);
744
76
}
745

            
746
DnsCacheImpl::DnsHostInfoImpl::DnsHostInfoImpl(DnsCacheImpl& parent,
747
                                               absl::string_view resolved_host, bool is_ip_address)
748
140
    : parent_(parent), resolved_host_(resolved_host), is_ip_address_(is_ip_address),
749
140
      stale_at_time_(parent_.main_thread_dispatcher_.timeSource().monotonicTime()) {
750
140
  touch();
751
140
}
752

            
753
410
Network::Address::InstanceConstSharedPtr DnsCacheImpl::DnsHostInfoImpl::address() const {
754
410
  absl::ReaderMutexLock lock{resolve_lock_};
755
  // Return the first address in the list, if any.
756
410
  return !address_list_.empty() ? address_list_.front() : nullptr;
757
410
}
758

            
759
std::vector<Network::Address::InstanceConstSharedPtr>
760
186
DnsCacheImpl::DnsHostInfoImpl::addressList() const {
761
186
  std::vector<Network::Address::InstanceConstSharedPtr> ret;
762
186
  absl::ReaderMutexLock lock{resolve_lock_};
763
186
  ret = address_list_;
764
186
  return ret;
765
186
}
766

            
767
238
const std::string& DnsCacheImpl::DnsHostInfoImpl::resolvedHost() const { return resolved_host_; }
768

            
769
103
bool DnsCacheImpl::DnsHostInfoImpl::isIpAddress() const { return is_ip_address_; }
770

            
771
229
void DnsCacheImpl::DnsHostInfoImpl::touch() {
772
229
  last_used_time_ = parent_.main_thread_dispatcher_.timeSource().monotonicTime().time_since_epoch();
773
229
}
774

            
775
void DnsCacheImpl::DnsHostInfoImpl::updateStale(MonotonicTime resolution_time,
776
111
                                                std::chrono::seconds ttl) {
777
111
  stale_at_time_ = resolution_time + ttl;
778
111
}
779

            
780
23
bool DnsCacheImpl::DnsHostInfoImpl::isStale() {
781
23
  return parent_.main_thread_dispatcher_.timeSource().monotonicTime() >
782
23
         static_cast<MonotonicTime>(stale_at_time_);
783
23
}
784

            
785
void DnsCacheImpl::DnsHostInfoImpl::setAddresses(
786
    std::vector<Network::Address::InstanceConstSharedPtr>&& list, absl::string_view details,
787
106
    Network::DnsResolver::ResolutionStatus resolution_status) {
788
106
  absl::WriterMutexLock lock{resolve_lock_};
789
106
  address_list_ = std::move(list);
790
106
  details_ = details;
791
106
  resolution_status_ = resolution_status;
792
106
}
793

            
794
31
void DnsCacheImpl::DnsHostInfoImpl::setDetails(absl::string_view details) {
795
31
  absl::WriterMutexLock lock{resolve_lock_};
796
31
  details_ = details;
797
31
}
798

            
799
74
std::string DnsCacheImpl::DnsHostInfoImpl::details() {
800
74
  absl::ReaderMutexLock lock{resolve_lock_};
801
74
  return details_;
802
74
}
803

            
804
26
std::chrono::steady_clock::duration DnsCacheImpl::DnsHostInfoImpl::lastUsedTime() const {
805
26
  return last_used_time_.load();
806
26
}
807

            
808
169
bool DnsCacheImpl::DnsHostInfoImpl::firstResolveComplete() const {
809
169
  absl::ReaderMutexLock lock{resolve_lock_};
810
169
  return first_resolve_complete_;
811
169
}
812

            
813
220
void DnsCacheImpl::DnsHostInfoImpl::setFirstResolveComplete() {
814
220
  absl::WriterMutexLock lock{resolve_lock_};
815
220
  first_resolve_complete_ = true;
816
220
}
817

            
818
void DnsCacheImpl::DnsHostInfoImpl::setResolutionStatus(
819
31
    Network::DnsResolver::ResolutionStatus resolution_status) {
820
31
  absl::WriterMutexLock lock{resolve_lock_};
821
31
  resolution_status_ = resolution_status;
822
31
}
823

            
824
5
Network::DnsResolver::ResolutionStatus DnsCacheImpl::DnsHostInfoImpl::resolutionStatus() const {
825
5
  absl::WriterMutexLock lock{resolve_lock_};
826
5
  return resolution_status_;
827
5
}
828

            
829
} // namespace DynamicForwardProxy
830
} // namespace Common
831
} // namespace Extensions
832
} // namespace Envoy