Coverage Report

Created: 2024-09-19 09:45

/proc/self/cwd/source/server/server.cc
Line
Count
Source (jump to first uncovered line)
1
#include "source/server/server.h"
2
3
#include <csignal>
4
#include <cstdint>
5
#include <ctime>
6
#include <functional>
7
#include <memory>
8
#include <string>
9
10
#include "envoy/admin/v3/config_dump.pb.h"
11
#include "envoy/common/exception.h"
12
#include "envoy/common/time.h"
13
#include "envoy/config/bootstrap/v3/bootstrap.pb.h"
14
#include "envoy/config/bootstrap/v3/bootstrap.pb.validate.h"
15
#include "envoy/event/dispatcher.h"
16
#include "envoy/event/signal.h"
17
#include "envoy/event/timer.h"
18
#include "envoy/network/dns.h"
19
#include "envoy/registry/registry.h"
20
#include "envoy/server/bootstrap_extension_config.h"
21
#include "envoy/server/instance.h"
22
#include "envoy/server/options.h"
23
#include "envoy/stats/histogram.h"
24
#include "envoy/stats/stats.h"
25
#include "envoy/upstream/cluster_manager.h"
26
27
#include "source/common/api/api_impl.h"
28
#include "source/common/api/os_sys_calls_impl.h"
29
#include "source/common/common/enum_to_int.h"
30
#include "source/common/common/mutex_tracer_impl.h"
31
#include "source/common/common/utility.h"
32
#include "source/common/config/utility.h"
33
#include "source/common/config/well_known_names.h"
34
#include "source/common/config/xds_resource.h"
35
#include "source/common/http/codes.h"
36
#include "source/common/http/headers.h"
37
#include "source/common/local_info/local_info_impl.h"
38
#include "source/common/network/address_impl.h"
39
#include "source/common/network/dns_resolver/dns_factory_util.h"
40
#include "source/common/network/socket_interface.h"
41
#include "source/common/network/socket_interface_impl.h"
42
#include "source/common/protobuf/utility.h"
43
#include "source/common/runtime/runtime_impl.h"
44
#include "source/common/runtime/runtime_keys.h"
45
#include "source/common/signal/fatal_error_handler.h"
46
#include "source/common/stats/stats_matcher_impl.h"
47
#include "source/common/stats/tag_producer_impl.h"
48
#include "source/common/stats/thread_local_store.h"
49
#include "source/common/stats/timespan_impl.h"
50
#include "source/common/tls/context_manager_impl.h"
51
#include "source/common/upstream/cluster_manager_impl.h"
52
#include "source/common/version/version.h"
53
#include "source/server/configuration_impl.h"
54
#include "source/server/listener_hooks.h"
55
#include "source/server/listener_manager_factory.h"
56
#include "source/server/regex_engine.h"
57
#include "source/server/utils.h"
58
59
namespace Envoy {
60
namespace Server {
61
62
namespace {
63
4.98k
std::unique_ptr<ConnectionHandler> getHandler(Event::Dispatcher& dispatcher) {
64
65
4.98k
  auto* factory = Config::Utility::getFactoryByName<ConnectionHandlerFactory>(
66
4.98k
      "envoy.connection_handler.default");
67
4.98k
  if (factory) {
68
4.98k
    return factory->createConnectionHandler(dispatcher, absl::nullopt);
69
4.98k
  }
70
0
  ENVOY_LOG_MISC(debug, "Unable to find envoy.connection_handler.default factory");
71
0
  return nullptr;
72
4.98k
}
73
74
} // namespace
75
76
InstanceBase::InstanceBase(Init::Manager& init_manager, const Options& options,
77
                           Event::TimeSystem& time_system, ListenerHooks& hooks,
78
                           HotRestart& restarter, Stats::StoreRoot& store,
79
                           Thread::BasicLockable& access_log_lock,
80
                           Random::RandomGeneratorPtr&& random_generator,
81
                           ThreadLocal::Instance& tls, Thread::ThreadFactory& thread_factory,
82
                           Filesystem::Instance& file_system,
83
                           std::unique_ptr<ProcessContext> process_context,
84
                           Buffer::WatermarkFactorySharedPtr watermark_factory)
85
    : init_manager_(init_manager), live_(false), options_(options),
86
      validation_context_(options_.allowUnknownStaticFields(),
87
                          !options.rejectUnknownDynamicFields(),
88
                          options.ignoreUnknownDynamicFields()),
89
      time_source_(time_system), restarter_(restarter), start_time_(time(nullptr)),
90
      original_start_time_(start_time_), stats_store_(store), thread_local_(tls),
91
      random_generator_(std::move(random_generator)),
92
      api_(new Api::Impl(
93
          thread_factory, store, time_system, file_system, *random_generator_, bootstrap_,
94
          process_context ? ProcessContextOptRef(std::ref(*process_context)) : absl::nullopt,
95
          watermark_factory)),
96
      dispatcher_(api_->allocateDispatcher("main_thread")),
97
      access_log_manager_(options.fileFlushIntervalMsec(), *api_, *dispatcher_, access_log_lock,
98
                          store),
99
      handler_(getHandler(*dispatcher_)), worker_factory_(thread_local_, *api_, hooks),
100
      mutex_tracer_(options.mutexTracingEnabled() ? &Envoy::MutexTracerImpl::getOrCreateTracer()
101
                                                  : nullptr),
102
      grpc_context_(store.symbolTable()), http_context_(store.symbolTable()),
103
      router_context_(store.symbolTable()), process_context_(std::move(process_context)),
104
      hooks_(hooks), quic_stat_names_(store.symbolTable()), server_contexts_(*this),
105
4.98k
      enable_reuse_port_default_(true), stats_flush_in_progress_(false) {}
106
107
4.98k
InstanceBase::~InstanceBase() {
108
4.98k
  terminate();
109
110
  // Stop logging to file before all the AccessLogManager and its dependencies are
111
  // destructed to avoid crashing at shutdown.
112
4.98k
  file_logger_.reset();
113
114
  // Destruct the ListenerManager explicitly, before InstanceBase's local init_manager_ is
115
  // destructed.
116
  //
117
  // The ListenerManager's DestinationPortsMap contains FilterChainSharedPtrs. There is a rare race
118
  // condition where one of these FilterChains contains an HttpConnectionManager, which contains an
119
  // RdsRouteConfigProvider, which contains an RdsRouteConfigSubscriptionSharedPtr. Since
120
  // RdsRouteConfigSubscription is an Init::Target, ~RdsRouteConfigSubscription triggers a callback
121
  // set at initialization, which goes to unregister it from the top-level InitManager, which has
122
  // already been destructed (use-after-free) causing a segfault.
123
4.98k
  ENVOY_LOG(debug, "destroying listener manager");
124
4.98k
  listener_manager_.reset();
125
4.98k
  ENVOY_LOG(debug, "destroyed listener manager");
126
4.98k
  dispatcher_->shutdown();
127
128
#ifdef ENVOY_PERFETTO
129
  if (tracing_session_ != nullptr) {
130
    // Flush the trace data.
131
    perfetto::TrackEvent::Flush();
132
    // Disable tracing and block until tracing has stopped.
133
    tracing_session_->StopBlocking();
134
    close(tracing_fd_);
135
  }
136
#endif
137
4.98k
}
138
139
32.4k
Upstream::ClusterManager& InstanceBase::clusterManager() {
140
32.4k
  ASSERT(config_.clusterManager() != nullptr);
141
32.4k
  return *config_.clusterManager();
142
32.4k
}
143
144
0
const Upstream::ClusterManager& InstanceBase::clusterManager() const {
145
0
  ASSERT(config_.clusterManager() != nullptr);
146
0
  return *config_.clusterManager();
147
0
}
148
149
0
void InstanceBase::drainListeners(OptRef<const Network::ExtraShutdownListenerOptions> options) {
150
0
  ENVOY_LOG(info, "closing and draining listeners");
151
0
  listener_manager_->stopListeners(ListenerManager::StopListenersType::All,
152
0
                                   options.has_value() ? *options
153
0
                                                       : Network::ExtraShutdownListenerOptions{});
154
0
  drain_manager_->startDrainSequence([] {});
155
0
}
156
157
4.48k
void InstanceBase::failHealthcheck(bool fail) {
158
4.48k
  live_.store(!fail);
159
4.48k
  server_stats_->live_.set(live_.load());
160
4.48k
}
161
162
MetricSnapshotImpl::MetricSnapshotImpl(Stats::Store& store,
163
                                       Upstream::ClusterManager& cluster_manager,
164
3.03k
                                       TimeSource& time_source) {
165
3.03k
  store.forEachSinkedCounter(
166
3.03k
      [this](std::size_t size) {
167
3.03k
        snapped_counters_.reserve(size);
168
3.03k
        counters_.reserve(size);
169
3.03k
      },
170
744k
      [this](Stats::Counter& counter) {
171
744k
        snapped_counters_.push_back(Stats::CounterSharedPtr(&counter));
172
744k
        counters_.push_back({counter.latch(), counter});
173
744k
      });
174
175
3.03k
  store.forEachSinkedGauge(
176
3.03k
      [this](std::size_t size) {
177
3.03k
        snapped_gauges_.reserve(size);
178
3.03k
        gauges_.reserve(size);
179
3.03k
      },
180
217k
      [this](Stats::Gauge& gauge) {
181
217k
        snapped_gauges_.push_back(Stats::GaugeSharedPtr(&gauge));
182
217k
        gauges_.push_back(gauge);
183
217k
      });
184
185
3.03k
  store.forEachSinkedHistogram(
186
3.03k
      [this](std::size_t size) {
187
1.97k
        snapped_histograms_.reserve(size);
188
1.97k
        histograms_.reserve(size);
189
1.97k
      },
190
3.03k
      [this](Stats::ParentHistogram& histogram) {
191
42
        snapped_histograms_.push_back(Stats::ParentHistogramSharedPtr(&histogram));
192
42
        histograms_.push_back(histogram);
193
42
      });
194
195
3.03k
  store.forEachSinkedTextReadout(
196
3.03k
      [this](std::size_t size) {
197
3.03k
        snapped_text_readouts_.reserve(size);
198
3.03k
        text_readouts_.reserve(size);
199
3.03k
      },
200
3.07k
      [this](Stats::TextReadout& text_readout) {
201
3.07k
        snapped_text_readouts_.push_back(Stats::TextReadoutSharedPtr(&text_readout));
202
3.07k
        text_readouts_.push_back(text_readout);
203
3.07k
      });
204
205
3.03k
  Upstream::HostUtility::forEachHostMetric(
206
3.03k
      cluster_manager,
207
3.03k
      [this](Stats::PrimitiveCounterSnapshot&& metric) {
208
0
        host_counters_.emplace_back(std::move(metric));
209
0
      },
210
3.03k
      [this](Stats::PrimitiveGaugeSnapshot&& metric) {
211
0
        host_gauges_.emplace_back(std::move(metric));
212
0
      });
213
214
3.03k
  snapshot_time_ = time_source.systemTime();
215
3.03k
}
216
217
void InstanceUtil::flushMetricsToSinks(const std::list<Stats::SinkPtr>& sinks, Stats::Store& store,
218
3.03k
                                       Upstream::ClusterManager& cm, TimeSource& time_source) {
219
  // Create a snapshot and flush to all sinks.
220
  // NOTE: Even if there are no sinks, creating the snapshot has the important property that it
221
  //       latches all counters on a periodic basis. The hot restart code assumes this is being
222
  //       done so this should not be removed.
223
3.03k
  MetricSnapshotImpl snapshot(store, cm, time_source);
224
3.03k
  for (const auto& sink : sinks) {
225
0
    sink->flush(snapshot);
226
0
  }
227
3.03k
}
228
229
3.03k
void InstanceBase::flushStats() {
230
3.03k
  if (stats_flush_in_progress_) {
231
0
    ENVOY_LOG(debug, "skipping stats flush as flush is already in progress");
232
0
    server_stats_->dropped_stat_flushes_.inc();
233
0
    return;
234
0
  }
235
236
3.03k
  stats_flush_in_progress_ = true;
237
3.03k
  ENVOY_LOG(debug, "flushing stats");
238
  // If Envoy is not fully initialized, workers will not be started and mergeHistograms
239
  // completion callback is not called immediately. As a result of this server stats will
240
  // not be updated and flushed to stat sinks. So skip mergeHistograms call if workers are
241
  // not started yet.
242
3.03k
  if (initManager().state() == Init::Manager::State::Initialized) {
243
    // A shutdown initiated before this callback may prevent this from being called as per
244
    // the semantics documented in ThreadLocal's runOnAllThreads method.
245
1.97k
    stats_store_.mergeHistograms([this]() -> void { flushStatsInternal(); });
246
1.97k
  } else {
247
1.05k
    ENVOY_LOG(debug, "Envoy is not fully initialized, skipping histogram merge and flushing stats");
248
1.05k
    flushStatsInternal();
249
1.05k
  }
250
3.03k
}
251
252
5.00k
void InstanceBase::updateServerStats() {
253
  // mergeParentStatsIfAny() does nothing and returns a struct of 0s if there is no parent.
254
5.00k
  HotRestart::ServerStatsFromParent parent_stats = restarter_.mergeParentStatsIfAny(stats_store_);
255
256
5.00k
  server_stats_->uptime_.set(time(nullptr) - original_start_time_);
257
5.00k
  server_stats_->memory_allocated_.set(Memory::Stats::totalCurrentlyAllocated() +
258
5.00k
                                       parent_stats.parent_memory_allocated_);
259
5.00k
  server_stats_->memory_heap_size_.set(Memory::Stats::totalCurrentlyReserved());
260
5.00k
  server_stats_->memory_physical_size_.set(Memory::Stats::totalPhysicalBytes());
261
5.00k
  if (!options().hotRestartDisabled()) {
262
1.05k
    server_stats_->parent_connections_.set(parent_stats.parent_connections_);
263
1.05k
  }
264
5.00k
  server_stats_->total_connections_.set(listener_manager_->numConnections() +
265
5.00k
                                        parent_stats.parent_connections_);
266
5.00k
  server_stats_->days_until_first_cert_expiring_.set(
267
5.00k
      sslContextManager().daysUntilFirstCertExpires().value_or(0));
268
269
5.00k
  auto secs_until_ocsp_response_expires =
270
5.00k
      sslContextManager().secondsUntilFirstOcspResponseExpires();
271
5.00k
  if (secs_until_ocsp_response_expires) {
272
0
    server_stats_->seconds_until_first_ocsp_response_expiring_.set(
273
0
        secs_until_ocsp_response_expires.value());
274
0
  }
275
5.00k
  server_stats_->state_.set(
276
5.00k
      enumToInt(Utility::serverState(initManager().state(), healthCheckFailed())));
277
5.00k
  server_stats_->stats_recent_lookups_.set(
278
5.00k
      stats_store_.symbolTable().getRecentLookups([](absl::string_view, uint64_t) {}));
279
5.00k
}
280
281
3.03k
void InstanceBase::flushStatsInternal() {
282
3.03k
  updateServerStats();
283
3.03k
  auto& stats_config = config_.statsConfig();
284
3.03k
  InstanceUtil::flushMetricsToSinks(stats_config.sinks(), stats_store_, clusterManager(),
285
3.03k
                                    timeSource());
286
  // TODO(ramaraochavali): consider adding different flush interval for histograms.
287
3.03k
  if (stat_flush_timer_ != nullptr) {
288
3.03k
    stat_flush_timer_->enableTimer(stats_config.flushInterval());
289
3.03k
  }
290
291
3.03k
  stats_flush_in_progress_ = false;
292
3.03k
}
293
294
6.35k
bool InstanceBase::healthCheckFailed() { return !live_.load(); }
295
296
0
ProcessContextOptRef InstanceBase::processContext() {
297
0
  if (process_context_ == nullptr) {
298
0
    return absl::nullopt;
299
0
  }
300
301
0
  return *process_context_;
302
0
}
303
304
namespace {
305
306
0
bool canBeRegisteredAsInlineHeader(const Http::LowerCaseString& header_name) {
307
  // 'set-cookie' cannot currently be registered as an inline header.
308
0
  if (header_name == Http::Headers::get().SetCookie) {
309
0
    return false;
310
0
  }
311
0
  return true;
312
0
}
313
314
absl::Status
315
4.98k
registerCustomInlineHeadersFromBootstrap(const envoy::config::bootstrap::v3::Bootstrap& bootstrap) {
316
4.98k
  for (const auto& inline_header : bootstrap.inline_headers()) {
317
0
    const Http::LowerCaseString lower_case_name(inline_header.inline_header_name());
318
0
    if (!canBeRegisteredAsInlineHeader(lower_case_name)) {
319
0
      return absl::InvalidArgumentError(
320
0
          fmt::format("Header {} cannot be registered as an inline header.",
321
0
                      inline_header.inline_header_name()));
322
0
    }
323
0
    switch (inline_header.inline_header_type()) {
324
0
    case envoy::config::bootstrap::v3::CustomInlineHeader::REQUEST_HEADER:
325
0
      Http::CustomInlineHeaderRegistry::registerInlineHeader<
326
0
          Http::RequestHeaderMap::header_map_type>(lower_case_name);
327
0
      break;
328
0
    case envoy::config::bootstrap::v3::CustomInlineHeader::REQUEST_TRAILER:
329
0
      Http::CustomInlineHeaderRegistry::registerInlineHeader<
330
0
          Http::RequestTrailerMap::header_map_type>(lower_case_name);
331
0
      break;
332
0
    case envoy::config::bootstrap::v3::CustomInlineHeader::RESPONSE_HEADER:
333
0
      Http::CustomInlineHeaderRegistry::registerInlineHeader<
334
0
          Http::ResponseHeaderMap::header_map_type>(lower_case_name);
335
0
      break;
336
0
    case envoy::config::bootstrap::v3::CustomInlineHeader::RESPONSE_TRAILER:
337
0
      Http::CustomInlineHeaderRegistry::registerInlineHeader<
338
0
          Http::ResponseTrailerMap::header_map_type>(lower_case_name);
339
0
      break;
340
0
    default:
341
0
      PANIC("not implemented");
342
0
    }
343
0
  }
344
4.98k
  return absl::OkStatus();
345
4.98k
}
346
347
} // namespace
348
349
absl::Status InstanceUtil::loadBootstrapConfig(
350
    envoy::config::bootstrap::v3::Bootstrap& bootstrap, const Options& options,
351
10.5k
    ProtobufMessage::ValidationVisitor& validation_visitor, Api::Api& api) {
352
10.5k
  const std::string& config_path = options.configPath();
353
10.5k
  const std::string& config_yaml = options.configYaml();
354
10.5k
  const envoy::config::bootstrap::v3::Bootstrap& config_proto = options.configProto();
355
356
  // One of config_path and config_yaml or bootstrap should be specified.
357
10.5k
  if (config_path.empty() && config_yaml.empty() && config_proto.ByteSizeLong() == 0) {
358
0
    return absl::InvalidArgumentError(
359
0
        "At least one of --config-path or --config-yaml or Options::configProto() "
360
0
        "should be non-empty");
361
0
  }
362
363
10.5k
  if (!config_path.empty()) {
364
10.5k
    MessageUtil::loadFromFile(config_path, bootstrap, validation_visitor, api);
365
10.5k
  }
366
10.5k
  if (!config_yaml.empty()) {
367
0
    envoy::config::bootstrap::v3::Bootstrap bootstrap_override;
368
0
#ifdef ENVOY_ENABLE_YAML
369
0
    MessageUtil::loadFromYaml(config_yaml, bootstrap_override, validation_visitor);
370
    // TODO(snowp): The fact that we do a merge here doesn't seem to be covered under test.
371
#else
372
    // Treat the yaml as proto
373
    Protobuf::TextFormat::ParseFromString(config_yaml, &bootstrap_override);
374
#endif
375
0
    bootstrap.MergeFrom(bootstrap_override);
376
0
  }
377
10.5k
  if (config_proto.ByteSizeLong() != 0) {
378
0
    bootstrap.MergeFrom(config_proto);
379
0
  }
380
10.5k
  MessageUtil::validate(bootstrap, validation_visitor);
381
10.5k
  return absl::OkStatus();
382
10.5k
}
383
384
void InstanceBase::initialize(Network::Address::InstanceConstSharedPtr local_address,
385
4.98k
                              ComponentFactory& component_factory) {
386
4.98k
  std::function set_up_logger = [&] {
387
0
    TRY_ASSERT_MAIN_THREAD {
388
0
      file_logger_ = std::make_unique<Logger::FileSinkDelegate>(
389
0
          options_.logPath(), access_log_manager_, Logger::Registry::getSink());
390
0
    }
391
0
    END_TRY
392
0
    CATCH(const EnvoyException& e, {
393
0
      throw EnvoyException(
394
0
          fmt::format("Failed to open log-file '{}'. e.what(): {}", options_.logPath(), e.what()));
395
0
    });
396
0
  };
397
398
4.98k
  TRY_ASSERT_MAIN_THREAD {
399
4.98k
    if (!options_.logPath().empty()) {
400
0
      set_up_logger();
401
0
    }
402
4.98k
    restarter_.initialize(*dispatcher_, *this);
403
4.98k
    drain_manager_ = component_factory.createDrainManager(*this);
404
4.98k
    THROW_IF_NOT_OK(initializeOrThrow(std::move(local_address), component_factory));
405
4.98k
  }
406
4.98k
  END_TRY
407
4.98k
  MULTI_CATCH(
408
4.98k
      const EnvoyException& e,
409
4.98k
      {
410
4.98k
        ENVOY_LOG(critical, "error initializing config '{} {} {}': {}",
411
4.98k
                  options_.configProto().DebugString(), options_.configYaml(),
412
4.98k
                  options_.configPath(), e.what());
413
4.98k
        terminate();
414
4.98k
        throw;
415
4.98k
      },
416
4.98k
      {
417
4.98k
        ENVOY_LOG(critical, "error initializing due to unknown exception");
418
4.98k
        terminate();
419
4.98k
        throw;
420
4.98k
      });
421
3.04k
}
422
423
absl::Status InstanceBase::initializeOrThrow(Network::Address::InstanceConstSharedPtr local_address,
424
4.98k
                                             ComponentFactory& component_factory) {
425
4.98k
  ENVOY_LOG(info, "initializing epoch {} (base id={}, hot restart version={})",
426
4.98k
            options_.restartEpoch(), restarter_.baseId(), restarter_.version());
427
428
4.98k
  ENVOY_LOG(info, "statically linked extensions:");
429
366k
  for (const auto& ext : Envoy::Registry::FactoryCategoryRegistry::registeredFactories()) {
430
366k
    ENVOY_LOG(info, "  {}: {}", ext.first, absl::StrJoin(ext.second->registeredNames(), ", "));
431
366k
  }
432
433
  // The main thread is registered for thread local updates so that code that does not care
434
  // whether it runs on the main thread or on workers can still use TLS.
435
  // We do this as early as possible because this has no side effect and could ensure that the
436
  // TLS always contains a valid main thread dispatcher when TLS is used.
437
4.98k
  thread_local_.registerThread(*dispatcher_, true);
438
439
  // Handle configuration that needs to take place prior to the main configuration load.
440
4.98k
  RETURN_IF_NOT_OK(InstanceUtil::loadBootstrapConfig(
441
4.98k
      bootstrap_, options_, messageValidationContext().staticValidationVisitor(), *api_));
442
4.98k
  bootstrap_config_update_time_ = time_source_.systemTime();
443
444
4.98k
  if (bootstrap_.has_application_log_config()) {
445
515
    RETURN_IF_NOT_OK(
446
515
        Utility::assertExclusiveLogFormatMethod(options_, bootstrap_.application_log_config()));
447
515
    RETURN_IF_NOT_OK(Utility::maybeSetApplicationLogFormat(bootstrap_.application_log_config()));
448
513
  }
449
450
#ifdef ENVOY_PERFETTO
451
  perfetto::TracingInitArgs args;
452
  // Include in-process events only.
453
  args.backends = perfetto::kInProcessBackend;
454
  perfetto::Tracing::Initialize(args);
455
  perfetto::TrackEvent::Register();
456
457
  // Prepare a configuration for a new "Perfetto" tracing session.
458
  perfetto::TraceConfig cfg;
459
  // TODO(rojkov): make the tracer configurable with either "Perfetto"'s native
460
  // message or custom one embedded into Bootstrap.
461
  cfg.add_buffers()->set_size_kb(1024);
462
  auto* ds_cfg = cfg.add_data_sources()->mutable_config();
463
  ds_cfg->set_name("track_event");
464
465
  const std::string pftrace_path =
466
      PROTOBUF_GET_STRING_OR_DEFAULT(bootstrap_, perf_tracing_file_path, "envoy.pftrace");
467
  // Instantiate a new tracing session.
468
  tracing_session_ = perfetto::Tracing::NewTrace();
469
  tracing_fd_ = open(pftrace_path.c_str(), O_RDWR | O_CREAT | O_TRUNC, 0600);
470
  if (tracing_fd_ == -1) {
471
    return absl::InvalidArgumentError(
472
        fmt::format("unable to open tracing file {}: {}", pftrace_path, errorDetails(errno)));
473
  }
474
  // Configure the tracing session.
475
  tracing_session_->Setup(cfg, tracing_fd_);
476
  // Enable tracing and block until tracing has started.
477
  tracing_session_->StartBlocking();
478
#endif
479
480
  // Immediate after the bootstrap has been loaded, override the header prefix, if configured to
481
  // do so. This must be set before any other code block references the HeaderValues ConstSingleton.
482
4.98k
  if (!bootstrap_.header_prefix().empty()) {
483
    // setPrefix has a release assert verifying that setPrefix() is not called after prefix()
484
0
    ThreadSafeSingleton<Http::PrefixValue>::get().setPrefix(bootstrap_.header_prefix().c_str());
485
0
  }
486
487
  // Register Custom O(1) headers from bootstrap.
488
4.98k
  RETURN_IF_NOT_OK(registerCustomInlineHeadersFromBootstrap(bootstrap_));
489
490
4.98k
  ENVOY_LOG(info, "HTTP header map info:");
491
19.9k
  for (const auto& info : Http::HeaderMapImplUtility::getAllHeaderMapImplInfo()) {
492
19.9k
    ENVOY_LOG(info, "  {}: {} bytes: {}", info.name_, info.size_,
493
19.9k
              absl::StrJoin(info.registered_headers_, ","));
494
19.9k
  }
495
496
  // Initialize the regex engine and inject to singleton.
497
  // Needs to happen before stats store initialization because the stats
498
  // matcher config can include regexes.
499
4.98k
  regex_engine_ = createRegexEngine(
500
4.98k
      bootstrap_, messageValidationContext().staticValidationVisitor(), serverFactoryContext());
501
502
  // Needs to happen as early as possible in the instantiation to preempt the objects that require
503
  // stats.
504
4.98k
  auto producer_or_error =
505
4.98k
      Stats::TagProducerImpl::createTagProducer(bootstrap_.stats_config(), options_.statsTags());
506
4.98k
  RETURN_IF_NOT_OK_REF(producer_or_error.status());
507
4.61k
  stats_store_.setTagProducer(std::move(producer_or_error.value()));
508
4.61k
  stats_store_.setStatsMatcher(std::make_unique<Stats::StatsMatcherImpl>(
509
4.61k
      bootstrap_.stats_config(), stats_store_.symbolTable(), server_contexts_));
510
4.61k
  stats_store_.setHistogramSettings(
511
4.61k
      std::make_unique<Stats::HistogramSettingsImpl>(bootstrap_.stats_config(), server_contexts_));
512
513
4.61k
  const std::string server_stats_prefix = "server.";
514
4.61k
  const std::string server_compilation_settings_stats_prefix = "server.compilation_settings";
515
4.61k
  server_stats_ = std::make_unique<ServerStats>(
516
4.61k
      ServerStats{ALL_SERVER_STATS(POOL_COUNTER_PREFIX(stats_store_, server_stats_prefix),
517
4.61k
                                   POOL_GAUGE_PREFIX(stats_store_, server_stats_prefix),
518
4.61k
                                   POOL_HISTOGRAM_PREFIX(stats_store_, server_stats_prefix))});
519
4.61k
  server_compilation_settings_stats_ =
520
4.61k
      std::make_unique<CompilationSettings::ServerCompilationSettingsStats>(
521
4.61k
          CompilationSettings::ServerCompilationSettingsStats{ALL_SERVER_COMPILATION_SETTINGS_STATS(
522
4.61k
              POOL_COUNTER_PREFIX(stats_store_, server_compilation_settings_stats_prefix),
523
4.61k
              POOL_GAUGE_PREFIX(stats_store_, server_compilation_settings_stats_prefix),
524
4.61k
              POOL_HISTOGRAM_PREFIX(stats_store_, server_compilation_settings_stats_prefix))});
525
4.61k
  validation_context_.setCounters(server_stats_->static_unknown_fields_,
526
4.61k
                                  server_stats_->dynamic_unknown_fields_,
527
4.61k
                                  server_stats_->wip_protos_);
528
529
4.61k
  memory_allocator_manager_ = std::make_unique<Memory::AllocatorManager>(
530
4.61k
      *api_, *stats_store_.rootScope(), bootstrap_.memory_allocator_manager());
531
532
4.61k
  initialization_timer_ = std::make_unique<Stats::HistogramCompletableTimespanImpl>(
533
4.61k
      server_stats_->initialization_time_ms_, timeSource());
534
4.61k
  server_stats_->concurrency_.set(options_.concurrency());
535
4.61k
  if (!options().hotRestartDisabled()) {
536
2.49k
    server_stats_->hot_restart_epoch_.set(options_.restartEpoch());
537
2.49k
  }
538
4.61k
  InstanceBase::failHealthcheck(false);
539
540
  // Check if bootstrap has server version override set, if yes, we should use that as
541
  // 'server.version' stat.
542
4.61k
  uint64_t version_int;
543
4.61k
  if (bootstrap_.stats_server_version_override().value() > 0) {
544
101
    version_int = bootstrap_.stats_server_version_override().value();
545
4.51k
  } else {
546
4.51k
    if (!StringUtil::atoull(VersionInfo::revision().substr(0, 6).c_str(), version_int, 16)) {
547
0
      return absl::InvalidArgumentError("compiled GIT SHA is invalid. Invalid build.");
548
0
    }
549
4.51k
  }
550
4.61k
  server_stats_->version_.set(version_int);
551
4.61k
  if (VersionInfo::sslFipsCompliant()) {
552
0
    server_compilation_settings_stats_->fips_mode_.set(1);
553
4.61k
  } else {
554
    // Set this explicitly so that "used" flag is set so that it can be pushed to stats sinks.
555
4.61k
    server_compilation_settings_stats_->fips_mode_.set(0);
556
4.61k
  }
557
558
  // If user has set user_agent_name in the bootstrap config, use it.
559
  // Default to "envoy" if unset.
560
4.61k
  if (bootstrap_.node().user_agent_name().empty()) {
561
4.18k
    bootstrap_.mutable_node()->set_user_agent_name("envoy");
562
4.18k
  }
563
564
  // If user has set user_agent_build_version in the bootstrap config, use it.
565
  // Default to the internal server version.
566
4.61k
  if (!bootstrap_.node().user_agent_build_version().has_version()) {
567
4.47k
    *bootstrap_.mutable_node()->mutable_user_agent_build_version() = VersionInfo::buildVersion();
568
4.47k
  }
569
570
318k
  for (const auto& ext : Envoy::Registry::FactoryCategoryRegistry::registeredFactories()) {
571
318k
    auto registered_types = ext.second->registeredTypes();
572
1.20M
    for (const auto& name : ext.second->allRegisteredNames()) {
573
1.20M
      auto* extension = bootstrap_.mutable_node()->add_extensions();
574
1.20M
      extension->set_name(std::string(name));
575
1.20M
      extension->set_category(ext.first);
576
1.20M
      auto const version = ext.second->getFactoryVersion(name);
577
1.20M
      if (version) {
578
0
        *extension->mutable_version() = version.value();
579
0
      }
580
1.20M
      extension->set_disabled(ext.second->isFactoryDisabled(name));
581
1.20M
      auto it = registered_types.find(name);
582
1.20M
      if (it != registered_types.end()) {
583
805k
        std::sort(it->second.begin(), it->second.end());
584
864k
        for (const auto& type_url : it->second) {
585
864k
          extension->add_type_urls(type_url);
586
864k
        }
587
805k
      }
588
1.20M
    }
589
318k
  }
590
591
4.61k
  local_info_ = std::make_unique<LocalInfo::LocalInfoImpl>(
592
4.61k
      stats().symbolTable(), bootstrap_.node(), bootstrap_.node_context_params(), local_address,
593
4.61k
      options_.serviceZone(), options_.serviceClusterName(), options_.serviceNodeName());
594
595
4.61k
  absl::Status creation_status;
596
4.61k
  Configuration::InitialImpl initial_config(bootstrap_, creation_status);
597
4.61k
  RETURN_IF_NOT_OK(creation_status);
598
599
  // Learn original_start_time_ if our parent is still around to inform us of it.
600
4.61k
  const auto parent_admin_shutdown_response = restarter_.sendParentAdminShutdownRequest();
601
4.61k
  if (parent_admin_shutdown_response.has_value()) {
602
0
    original_start_time_ = parent_admin_shutdown_response.value().original_start_time_;
603
    // TODO(soulxu): This is added for switching the reuse port default value as true (#17259).
604
    // It ensures the same default value during the hot restart. This can be removed when
605
    // everyone switches to the new default value.
606
0
    enable_reuse_port_default_ =
607
0
        parent_admin_shutdown_response.value().enable_reuse_port_default_ ? true : false;
608
0
  }
609
610
4.61k
  OptRef<Server::ConfigTracker> config_tracker;
611
4.61k
#ifdef ENVOY_ADMIN_FUNCTIONALITY
612
4.61k
  admin_ = std::make_shared<AdminImpl>(initial_config.admin().profilePath(), *this,
613
4.61k
                                       initial_config.admin().ignoreGlobalConnLimit());
614
615
4.61k
  config_tracker = admin_->getConfigTracker();
616
4.61k
#endif
617
4.61k
  secret_manager_ = std::make_unique<Secret::SecretManagerImpl>(config_tracker);
618
619
4.61k
  loadServerFlags(initial_config.flagsPath());
620
621
  // Initialize the overload manager early so other modules can register for actions.
622
4.61k
  auto overload_manager_or_error = createOverloadManager();
623
4.61k
  RETURN_IF_NOT_OK(overload_manager_or_error.status());
624
4.61k
  overload_manager_ = std::move(*overload_manager_or_error);
625
4.61k
  null_overload_manager_ = createNullOverloadManager();
626
627
4.61k
  maybeCreateHeapShrinker();
628
629
4.61k
  for (const auto& bootstrap_extension : bootstrap_.bootstrap_extensions()) {
630
1
    auto& factory = Config::Utility::getAndCheckFactory<Configuration::BootstrapExtensionFactory>(
631
1
        bootstrap_extension);
632
1
    auto config = Config::Utility::translateAnyToFactoryConfig(
633
1
        bootstrap_extension.typed_config(), messageValidationContext().staticValidationVisitor(),
634
1
        factory);
635
1
    bootstrap_extensions_.push_back(
636
1
        factory.createBootstrapExtension(*config, serverFactoryContext()));
637
1
  }
638
639
  // Register the fatal actions.
640
4.61k
  {
641
4.61k
    FatalAction::FatalActionPtrList safe_actions;
642
4.61k
    FatalAction::FatalActionPtrList unsafe_actions;
643
4.61k
    for (const auto& action_config : bootstrap_.fatal_actions()) {
644
18
      auto& factory =
645
18
          Config::Utility::getAndCheckFactory<Server::Configuration::FatalActionFactory>(
646
18
              action_config.config());
647
18
      auto action = factory.createFatalActionFromProto(action_config, this);
648
649
18
      if (action->isAsyncSignalSafe()) {
650
0
        safe_actions.push_back(std::move(action));
651
18
      } else {
652
18
        unsafe_actions.push_back(std::move(action));
653
18
      }
654
18
    }
655
4.61k
    Envoy::FatalErrorHandler::registerFatalActions(
656
4.61k
        std::move(safe_actions), std::move(unsafe_actions), api_->threadFactory());
657
4.61k
  }
658
659
4.61k
  if (!bootstrap_.default_socket_interface().empty()) {
660
218
    auto& sock_name = bootstrap_.default_socket_interface();
661
218
    auto sock = const_cast<Network::SocketInterface*>(Network::socketInterface(sock_name));
662
218
    if (sock != nullptr) {
663
0
      Network::SocketInterfaceSingleton::clear();
664
0
      Network::SocketInterfaceSingleton::initialize(sock);
665
0
    }
666
218
  }
667
668
4.61k
  ListenerManagerFactory* listener_manager_factory = nullptr;
669
4.61k
  if (bootstrap_.has_listener_manager()) {
670
2
    listener_manager_factory = Config::Utility::getAndCheckFactory<ListenerManagerFactory>(
671
2
        bootstrap_.listener_manager(), false);
672
4.61k
  } else {
673
4.61k
    listener_manager_factory = &Config::Utility::getAndCheckFactoryByName<ListenerManagerFactory>(
674
4.61k
        Config::ServerExtensionValues::get().DEFAULT_LISTENER);
675
4.61k
  }
676
677
  // Workers get created first so they register for thread local updates.
678
4.61k
  listener_manager_ = listener_manager_factory->createListenerManager(
679
4.61k
      *this, nullptr, worker_factory_, bootstrap_.enable_dispatcher_stats(), quic_stat_names_);
680
681
  // We can now initialize stats for threading.
682
4.61k
  stats_store_.initializeThreading(*dispatcher_, thread_local_);
683
684
  // It's now safe to start writing stats from the main thread's dispatcher.
685
4.61k
  if (bootstrap_.enable_dispatcher_stats()) {
686
170
    dispatcher_->initializeStats(*stats_store_.rootScope(), "server.");
687
170
  }
688
689
  // The broad order of initialization from this point on is the following:
690
  // 1. Statically provisioned configuration (bootstrap) are loaded.
691
  // 2. Cluster manager is created and all primary clusters (i.e. with endpoint assignments
692
  //    provisioned statically in bootstrap, discovered through DNS or file based CDS) are
693
  //    initialized.
694
  // 3. Various services are initialized and configured using the bootstrap config.
695
  // 4. RTDS is initialized using primary clusters. This  allows runtime overrides to be fully
696
  //    configured before the rest of xDS configuration is provisioned.
697
  // 5. Secondary clusters (with endpoint assignments provisioned by xDS servers) are initialized.
698
  // 6. The rest of the dynamic configuration is provisioned.
699
  //
700
  // Please note: this order requires that RTDS is provisioned using a primary cluster. If RTDS is
701
  // provisioned through ADS then ADS must use primary cluster as well. This invariant is enforced
702
  // during RTDS initialization and invalid configuration will be rejected.
703
704
  // Runtime gets initialized before the main configuration since during main configuration
705
  // load things may grab a reference to the loader for later use.
706
4.61k
  runtime_ = component_factory.createRuntime(*this, initial_config);
707
4.61k
  validation_context_.setRuntime(runtime());
708
709
4.61k
  if (!runtime().snapshot().getBoolean("envoy.disallow_global_stats", false)) {
710
4.40k
    assert_action_registration_ = Assert::addDebugAssertionFailureRecordAction(
711
4.40k
        [this](const char*) { server_stats_->debug_assertion_failures_.inc(); });
712
4.40k
    envoy_bug_action_registration_ = Assert::addEnvoyBugFailureRecordAction(
713
4.40k
        [this](const char*) { server_stats_->envoy_bug_failures_.inc(); });
714
4.40k
  }
715
716
4.61k
  if (initial_config.admin().address()) {
717
1.97k
#ifdef ENVOY_ADMIN_FUNCTIONALITY
718
    // Admin instance always be created if admin support is not compiled out.
719
1.97k
    RELEASE_ASSERT(admin_ != nullptr, "Admin instance should be created but actually not.");
720
1.97k
    auto typed_admin = dynamic_cast<AdminImpl*>(admin_.get());
721
1.97k
    RELEASE_ASSERT(typed_admin != nullptr, "Admin implementation is not an AdminImpl.");
722
1.97k
    initial_config.initAdminAccessLog(bootstrap_, typed_admin->factoryContext());
723
1.97k
    admin_->startHttpListener(initial_config.admin().accessLogs(), initial_config.admin().address(),
724
1.97k
                              initial_config.admin().socketOptions());
725
#else
726
    return absl::InvalidArgumentError("Admin address configured but admin support compiled out");
727
#endif
728
2.63k
  } else {
729
2.63k
    ENVOY_LOG(info, "No admin address given, so no admin HTTP server started.");
730
2.63k
  }
731
4.61k
  if (admin_) {
732
4.40k
    config_tracker_entry_ = admin_->getConfigTracker().add(
733
4.40k
        "bootstrap", [this](const Matchers::StringMatcher&) { return dumpBootstrapConfig(); });
734
4.40k
  }
735
4.61k
  if (initial_config.admin().address()) {
736
1.97k
    admin_->addListenerToHandler(handler_.get());
737
1.97k
  }
738
739
  // Once we have runtime we can initialize the SSL context manager.
740
4.61k
  ssl_context_manager_ =
741
4.61k
      std::make_unique<Extensions::TransportSockets::Tls::ContextManagerImpl>(server_contexts_);
742
743
4.61k
  http_server_properties_cache_manager_ =
744
4.61k
      std::make_unique<Http::HttpServerPropertiesCacheManagerImpl>(
745
4.61k
          serverFactoryContext(), messageValidationContext().staticValidationVisitor(),
746
4.61k
          thread_local_);
747
748
4.61k
  cluster_manager_factory_ = std::make_unique<Upstream::ProdClusterManagerFactory>(
749
4.61k
      serverFactoryContext(), stats_store_, thread_local_, http_context_,
750
4.61k
      [this]() -> Network::DnsResolverSharedPtr { return this->getOrCreateDnsResolver(); },
751
4.61k
      *ssl_context_manager_, *secret_manager_, quic_stat_names_, *this);
752
753
  // Now the configuration gets parsed. The configuration may start setting
754
  // thread local data per above. See MainImpl::initialize() for why ConfigImpl
755
  // is constructed as part of the InstanceBase and then populated once
756
  // cluster_manager_factory_ is available.
757
4.61k
  RETURN_IF_NOT_OK(config_.initialize(bootstrap_, *this, *cluster_manager_factory_));
758
759
  // Instruct the listener manager to create the LDS provider if needed. This must be done later
760
  // because various items do not yet exist when the listener manager is created.
761
3.61k
  if (bootstrap_.dynamic_resources().has_lds_config() ||
762
3.61k
      !bootstrap_.dynamic_resources().lds_resources_locator().empty()) {
763
2.27k
    std::unique_ptr<xds::core::v3::ResourceLocator> lds_resources_locator;
764
2.27k
    if (!bootstrap_.dynamic_resources().lds_resources_locator().empty()) {
765
11
      lds_resources_locator = std::make_unique<xds::core::v3::ResourceLocator>(
766
11
          THROW_OR_RETURN_VALUE(Config::XdsResourceIdentifier::decodeUrl(
767
11
                                    bootstrap_.dynamic_resources().lds_resources_locator()),
768
11
                                xds::core::v3::ResourceLocator));
769
11
    }
770
2.27k
    listener_manager_->createLdsApi(bootstrap_.dynamic_resources().lds_config(),
771
2.27k
                                    lds_resources_locator.get());
772
2.27k
  }
773
774
  // We have to defer RTDS initialization until after the cluster manager is
775
  // instantiated (which in turn relies on runtime...).
776
3.61k
  RETURN_IF_NOT_OK(runtime().initialize(clusterManager()));
777
778
3.44k
  clusterManager().setPrimaryClustersInitializedCb(
779
3.44k
      [this]() { onClusterManagerPrimaryInitializationComplete(); });
780
781
3.44k
  auto& stats_config = config_.statsConfig();
782
3.44k
  for (const Stats::SinkPtr& sink : stats_config.sinks()) {
783
0
    stats_store_.addSink(*sink);
784
0
  }
785
3.44k
  if (!stats_config.flushOnAdmin()) {
786
    // Some of the stat sinks may need dispatcher support so don't flush until the main loop starts.
787
    // Just setup the timer.
788
3.02k
    stat_flush_timer_ = dispatcher_->createTimer([this]() -> void { flushStats(); });
789
3.02k
    stat_flush_timer_->enableTimer(stats_config.flushInterval());
790
3.02k
  }
791
792
  // Now that we are initialized, notify the bootstrap extensions.
793
3.44k
  for (auto&& bootstrap_extension : bootstrap_extensions_) {
794
0
    bootstrap_extension->onServerInitialized();
795
0
  }
796
797
  // GuardDog (deadlock detection) object and thread setup before workers are
798
  // started and before our own run() loop runs.
799
3.44k
  main_thread_guard_dog_ = maybeCreateGuardDog("main_thread");
800
3.44k
  worker_guard_dog_ = maybeCreateGuardDog("workers");
801
3.44k
  return absl::OkStatus();
802
3.61k
}
803
804
3.08k
void InstanceBase::onClusterManagerPrimaryInitializationComplete() {
805
  // If RTDS was not configured the `onRuntimeReady` callback is immediately invoked.
806
3.08k
  runtime().startRtdsSubscriptions([this]() { onRuntimeReady(); });
807
3.08k
}
808
809
2.79k
void InstanceBase::onRuntimeReady() {
810
  // Begin initializing secondary clusters after RTDS configuration has been applied.
811
  // Initializing can throw exceptions, so catch these.
812
2.79k
  TRY_ASSERT_MAIN_THREAD {
813
2.79k
    THROW_IF_NOT_OK(clusterManager().initializeSecondaryClusters(bootstrap_));
814
2.79k
  }
815
2.79k
  END_TRY
816
2.79k
  CATCH(const EnvoyException& e, {
817
2.79k
    ENVOY_LOG(warn, "Skipping initialization of secondary cluster: {}", e.what());
818
2.79k
    shutdown();
819
2.79k
  });
820
821
2.79k
  if (bootstrap_.has_hds_config()) {
822
262
    const auto& hds_config = bootstrap_.hds_config();
823
262
    async_client_manager_ = std::make_unique<Grpc::AsyncClientManagerImpl>(
824
262
        *config_.clusterManager(), thread_local_, server_contexts_, grpc_context_.statNames(),
825
262
        bootstrap_.grpc_async_client_manager_config());
826
262
    TRY_ASSERT_MAIN_THREAD {
827
262
      THROW_IF_NOT_OK(Config::Utility::checkTransportVersion(hds_config));
828
      // HDS does not support xDS-Failover.
829
261
      auto factory_or_error = Config::Utility::factoryForGrpcApiConfigSource(
830
261
          *async_client_manager_, hds_config, *stats_store_.rootScope(), false, 0);
831
261
      THROW_IF_NOT_OK_REF(factory_or_error.status());
832
212
      hds_delegate_ = std::make_unique<Upstream::HdsDelegate>(
833
212
          serverFactoryContext(), *stats_store_.rootScope(),
834
212
          factory_or_error.value()->createUncachedRawAsyncClient(), stats_store_,
835
212
          *ssl_context_manager_, info_factory_);
836
212
    }
837
212
    END_TRY
838
262
    CATCH(const EnvoyException& e, {
839
262
      ENVOY_LOG(warn, "Skipping initialization of HDS cluster: {}", e.what());
840
262
      shutdown();
841
262
    });
842
262
  }
843
844
  // TODO (nezdolik): Fully deprecate this runtime key in the next release.
845
2.79k
  if (runtime().snapshot().get(Runtime::Keys::GlobalMaxCxRuntimeKey)) {
846
0
    ENVOY_LOG(warn,
847
0
              "Usage of the deprecated runtime key {}, consider switching to "
848
0
              "`envoy.resource_monitors.global_downstream_max_connections` instead."
849
0
              "This runtime key will be removed in future.",
850
0
              Runtime::Keys::GlobalMaxCxRuntimeKey);
851
0
  }
852
2.79k
}
853
854
1.97k
void InstanceBase::startWorkers() {
855
  // The callback will be called after workers are started.
856
1.97k
  THROW_IF_NOT_OK(
857
1.97k
      listener_manager_->startWorkers(makeOptRefFromPtr(worker_guard_dog_.get()), [this]() {
858
1.97k
        if (isShutdown()) {
859
1.97k
          return;
860
1.97k
        }
861
862
1.97k
        initialization_timer_->complete();
863
        // Update server stats as soon as initialization is done.
864
1.97k
        updateServerStats();
865
1.97k
        workers_started_ = true;
866
1.97k
        hooks_.onWorkersStarted();
867
        // At this point we are ready to take traffic and all listening ports are up. Notify our
868
        // parent if applicable that they can stop listening and drain.
869
1.97k
        restarter_.drainParentListeners();
870
1.97k
        drain_manager_->startParentShutdownSequence();
871
1.97k
      }));
872
1.97k
}
873
874
Runtime::LoaderPtr InstanceUtil::createRuntime(Instance& server,
875
4.45k
                                               Server::Configuration::Initial& config) {
876
4.45k
#ifdef ENVOY_ENABLE_YAML
877
4.45k
  ENVOY_LOG(info, "runtime: {}", MessageUtil::getYamlStringFromMessage(config.runtime()));
878
4.45k
#endif
879
4.45k
  absl::StatusOr<std::unique_ptr<Runtime::LoaderImpl>> loader = Runtime::LoaderImpl::create(
880
4.45k
      server.dispatcher(), server.threadLocal(), config.runtime(), server.localInfo(),
881
4.45k
      server.stats(), server.api().randomGenerator(),
882
4.45k
      server.messageValidationContext().dynamicValidationVisitor(), server.api());
883
4.45k
  THROW_IF_NOT_OK(loader.status());
884
4.40k
  return std::move(loader.value());
885
4.45k
}
886
887
4.47k
void InstanceBase::loadServerFlags(const absl::optional<std::string>& flags_path) {
888
4.47k
  if (!flags_path) {
889
3.92k
    return;
890
3.92k
  }
891
892
552
  ENVOY_LOG(info, "server flags path: {}", flags_path.value());
893
552
  if (api_->fileSystem().fileExists(flags_path.value() + "/drain")) {
894
8
    ENVOY_LOG(info, "starting server in drain mode");
895
8
    InstanceBase::failHealthcheck(true);
896
8
  }
897
552
}
898
899
RunHelper::RunHelper(Instance& instance, const Options& options, Event::Dispatcher& dispatcher,
900
                     Upstream::ClusterManager& cm, AccessLog::AccessLogManager& access_log_manager,
901
                     Init::Manager& init_manager, OverloadManager& overload_manager,
902
                     OverloadManager& null_overload_manager, std::function<void()> post_init_cb)
903
1.97k
    : init_watcher_("RunHelper", [&instance, post_init_cb]() {
904
1.97k
        if (!instance.isShutdown()) {
905
1.97k
          post_init_cb();
906
1.97k
        }
907
1.97k
      }) {
908
  // Setup signals.
909
  // Since signals are not supported on Windows we have an internal definition for `SIGTERM`
910
  // On POSIX it resolves as expected to SIGTERM
911
  // On Windows we use it internally for all the console events that indicate that we should
912
  // terminate the process.
913
1.97k
  if (options.signalHandlingEnabled()) {
914
1.97k
    sigterm_ = dispatcher.listenForSignal(ENVOY_SIGTERM, [&instance]() {
915
0
      ENVOY_LOG(warn, "caught ENVOY_SIGTERM");
916
0
      instance.shutdown();
917
0
    });
918
1.97k
#ifndef WIN32
919
1.97k
    sigint_ = dispatcher.listenForSignal(SIGINT, [&instance]() {
920
0
      ENVOY_LOG(warn, "caught SIGINT");
921
0
      instance.shutdown();
922
0
    });
923
924
1.97k
    sig_usr_1_ = dispatcher.listenForSignal(SIGUSR1, [&access_log_manager]() {
925
0
      ENVOY_LOG(info, "caught SIGUSR1. Reopening access logs.");
926
0
      access_log_manager.reopen();
927
0
    });
928
929
1.97k
    sig_hup_ = dispatcher.listenForSignal(SIGHUP, []() {
930
0
      ENVOY_LOG(warn, "caught and eating SIGHUP. See documentation for how to hot restart.");
931
0
    });
932
1.97k
#endif
933
1.97k
  }
934
935
  // Start overload manager before workers.
936
1.97k
  overload_manager.start();
937
1.97k
  null_overload_manager.start();
938
939
  // If there is no global limit to the number of active connections, warn on startup.
940
1.97k
  if (!overload_manager.getThreadLocalOverloadState().isResourceMonitorEnabled(
941
1.97k
          Server::OverloadProactiveResourceName::GlobalDownstreamMaxConnections)) {
942
1.97k
    ENVOY_LOG(
943
1.97k
        warn,
944
1.97k
        "There is no configured limit to the number of allowed active downstream "
945
1.97k
        "connections. Configure a "
946
1.97k
        "limit in `envoy.resource_monitors.global_downstream_max_connections` resource monitor.");
947
1.97k
  }
948
949
  // Register for cluster manager init notification. We don't start serving worker traffic until
950
  // upstream clusters are initialized which may involve running the event loop. Note however that
951
  // this can fire immediately if all clusters have already initialized. Also note that we need
952
  // to guard against shutdown at two different levels since SIGTERM can come in once the run loop
953
  // starts.
954
1.97k
  cm.setInitializedCb([&instance, &init_manager, &cm, this]() {
955
1.97k
    if (instance.isShutdown()) {
956
0
      return;
957
0
    }
958
959
1.97k
    const auto type_url = Config::getTypeUrl<envoy::config::route::v3::RouteConfiguration>();
960
    // Pause RDS to ensure that we don't send any requests until we've
961
    // subscribed to all the RDS resources. The subscriptions happen in the init callbacks,
962
    // so we pause RDS until we've completed all the callbacks.
963
1.97k
    Config::ScopedResume maybe_resume_rds;
964
1.97k
    if (cm.adsMux()) {
965
1.97k
      maybe_resume_rds = cm.adsMux()->pause(type_url);
966
1.97k
    }
967
968
1.97k
    ENVOY_LOG(info, "all clusters initialized. initializing init manager");
969
1.97k
    init_manager.initialize(init_watcher_);
970
971
    // Now that we're execute all the init callbacks we can resume RDS
972
    // as we've subscribed to all the statically defined RDS resources.
973
    // This is done by tearing down the maybe_resume_rds Cleanup object.
974
1.97k
  });
975
1.97k
}
976
977
1.97k
void InstanceBase::run() {
978
  // RunHelper exists primarily to facilitate testing of how we respond to early shutdown during
979
  // startup (see RunHelperTest in server_test.cc).
980
1.97k
  const auto run_helper =
981
1.97k
      RunHelper(*this, options_, *dispatcher_, clusterManager(), access_log_manager_, init_manager_,
982
1.97k
                overloadManager(), nullOverloadManager(), [this] {
983
1.97k
                  notifyCallbacksForStage(Stage::PostInit);
984
1.97k
                  startWorkers();
985
1.97k
                });
986
987
  // Run the main dispatch loop waiting to exit.
988
1.97k
  ENVOY_LOG(info, "starting main dispatch loop");
989
1.97k
  WatchDogSharedPtr watchdog;
990
1.97k
  if (main_thread_guard_dog_) {
991
1.97k
    watchdog = main_thread_guard_dog_->createWatchDog(api_->threadFactory().currentThreadId(),
992
1.97k
                                                      "main_thread", *dispatcher_);
993
1.97k
  }
994
1.97k
  dispatcher_->post([this] { notifyCallbacksForStage(Stage::Startup); });
995
1.97k
  dispatcher_->run(Event::Dispatcher::RunType::Block);
996
1.97k
  ENVOY_LOG(info, "main dispatch loop exited");
997
1.97k
  if (main_thread_guard_dog_) {
998
1.97k
    main_thread_guard_dog_->stopWatching(watchdog);
999
1.97k
  }
1000
1.97k
  watchdog.reset();
1001
1002
1.97k
  terminate();
1003
1.97k
}
1004
1005
8.90k
void InstanceBase::terminate() {
1006
8.90k
  if (terminated_) {
1007
3.92k
    return;
1008
3.92k
  }
1009
4.98k
  terminated_ = true;
1010
1011
  // Before starting to shutdown anything else, stop slot destruction updates.
1012
4.98k
  thread_local_.shutdownGlobalThreading();
1013
1014
  // Before the workers start exiting we should disable stat threading.
1015
4.98k
  stats_store_.shutdownThreading();
1016
1017
  // TODO: figure out the correct fix: https://github.com/envoyproxy/envoy/issues/15072.
1018
4.98k
  std::vector<std::string> muxes = {
1019
4.98k
      "envoy.config_mux.new_grpc_mux_factory", "envoy.config_mux.grpc_mux_factory",
1020
4.98k
      "envoy.config_mux.delta_grpc_mux_factory", "envoy.config_mux.sotw_grpc_mux_factory"};
1021
19.9k
  for (const auto& name : muxes) {
1022
19.9k
    auto* factory = Config::Utility::getFactoryByName<Config::MuxFactory>(name);
1023
19.9k
    if (factory) {
1024
19.9k
      factory->shutdownAll();
1025
19.9k
    }
1026
19.9k
  }
1027
1028
4.98k
  if (overload_manager_) {
1029
4.47k
    overload_manager_->stop();
1030
4.47k
  }
1031
1032
  // Shutdown all the workers now that the main dispatch loop is done.
1033
4.98k
  if (listener_manager_ != nullptr) {
1034
4.45k
    listener_manager_->stopWorkers();
1035
4.45k
  }
1036
1037
  // Only flush if we have not been hot restarted.
1038
4.98k
  if (stat_flush_timer_) {
1039
3.02k
    flushStats();
1040
3.02k
  }
1041
1042
4.98k
  if (config_.clusterManager() != nullptr) {
1043
4.31k
    config_.clusterManager()->shutdown();
1044
4.31k
  }
1045
4.98k
  handler_.reset();
1046
4.98k
  thread_local_.shutdownThread();
1047
4.98k
  restarter_.shutdown();
1048
4.98k
  ENVOY_LOG(info, "exiting");
1049
4.98k
  ENVOY_FLUSH_LOG();
1050
4.98k
  FatalErrorHandler::clearFatalActionsOnTerminate();
1051
4.98k
}
1052
1053
65.2k
Runtime::Loader& InstanceBase::runtime() { return *runtime_; }
1054
1055
2.10k
void InstanceBase::shutdown() {
1056
2.10k
  ENVOY_LOG(info, "shutting down server instance");
1057
2.10k
  shutdown_ = true;
1058
2.10k
  restarter_.sendParentTerminateRequest();
1059
2.10k
  notifyCallbacksForStage(Stage::ShutdownExit, [this] { dispatcher_->exit(); });
1060
2.10k
}
1061
1062
0
void InstanceBase::shutdownAdmin() {
1063
0
  ENVOY_LOG(warn, "shutting down admin due to child startup");
1064
0
  stat_flush_timer_.reset();
1065
0
  handler_->stopListeners();
1066
0
  if (admin_) {
1067
0
    admin_->closeSocket();
1068
0
  }
1069
1070
  // If we still have a parent, it should be terminated now that we have a child.
1071
0
  ENVOY_LOG(warn, "terminating parent process");
1072
0
  restarter_.sendParentTerminateRequest();
1073
0
}
1074
1075
ServerLifecycleNotifier::HandlePtr InstanceBase::registerCallback(Stage stage,
1076
0
                                                                  StageCallback callback) {
1077
0
  auto& callbacks = stage_callbacks_[stage];
1078
0
  return std::make_unique<LifecycleCallbackHandle<StageCallback>>(callbacks, callback);
1079
0
}
1080
1081
ServerLifecycleNotifier::HandlePtr
1082
0
InstanceBase::registerCallback(Stage stage, StageCallbackWithCompletion callback) {
1083
0
  ASSERT(stage == Stage::ShutdownExit);
1084
0
  auto& callbacks = stage_completable_callbacks_[stage];
1085
0
  return std::make_unique<LifecycleCallbackHandle<StageCallbackWithCompletion>>(callbacks,
1086
0
                                                                                callback);
1087
0
}
1088
1089
6.05k
void InstanceBase::notifyCallbacksForStage(Stage stage, std::function<void()> completion_cb) {
1090
6.05k
  ASSERT_IS_MAIN_OR_TEST_THREAD();
1091
6.05k
  const auto stage_it = stage_callbacks_.find(stage);
1092
6.05k
  if (stage_it != stage_callbacks_.end()) {
1093
0
    LifecycleNotifierCallbacks& callbacks = stage_it->second;
1094
0
    for (auto callback_it = callbacks.begin(); callback_it != callbacks.end();) {
1095
0
      StageCallback callback = *callback_it;
1096
      // Increment the iterator before invoking the callback in case the
1097
      // callback deletes the handle which will unregister itself and
1098
      // invalidate this iterator if we're still pointing at it.
1099
0
      ++callback_it;
1100
0
      callback();
1101
0
    }
1102
0
  }
1103
1104
  // Wrap completion_cb so that it only gets invoked when all callbacks for this stage
1105
  // have finished their work.
1106
6.05k
  std::shared_ptr<void> cb_guard(
1107
6.05k
      new Cleanup([this, completion_cb]() { dispatcher_->post(completion_cb); }));
1108
1109
  // Registrations which take a completion callback are typically implemented by executing a
1110
  // callback on all worker threads using Slot::runOnAllThreads which will hang indefinitely if
1111
  // worker threads have not been started so we need to skip notifications if envoy is shutdown
1112
  // early before workers have started.
1113
6.05k
  if (workers_started_) {
1114
3.93k
    const auto it2 = stage_completable_callbacks_.find(stage);
1115
3.93k
    if (it2 != stage_completable_callbacks_.end()) {
1116
0
      ENVOY_LOG(info, "Notifying {} callback(s) with completion.", it2->second.size());
1117
0
      for (const StageCallbackWithCompletion& callback : it2->second) {
1118
0
        callback([cb_guard] {});
1119
0
      }
1120
0
    }
1121
3.93k
  }
1122
6.05k
}
1123
1124
0
ProtobufTypes::MessagePtr InstanceBase::dumpBootstrapConfig() {
1125
0
  auto config_dump = std::make_unique<envoy::admin::v3::BootstrapConfigDump>();
1126
0
  config_dump->mutable_bootstrap()->MergeFrom(bootstrap_);
1127
0
  TimestampUtil::systemClockToTimestamp(bootstrap_config_update_time_,
1128
0
                                        *(config_dump->mutable_last_updated()));
1129
0
  return config_dump;
1130
0
}
1131
1132
0
Network::DnsResolverSharedPtr InstanceBase::getOrCreateDnsResolver() {
1133
0
  if (!dns_resolver_) {
1134
0
    envoy::config::core::v3::TypedExtensionConfig typed_dns_resolver_config;
1135
0
    Network::DnsResolverFactory& dns_resolver_factory =
1136
0
        Network::createDnsResolverFactoryFromProto(bootstrap_, typed_dns_resolver_config);
1137
0
    dns_resolver_ = THROW_OR_RETURN_VALUE(
1138
0
        dns_resolver_factory.createDnsResolver(dispatcher(), api(), typed_dns_resolver_config),
1139
0
        Network::DnsResolverSharedPtr);
1140
0
  }
1141
0
  return dns_resolver_;
1142
0
}
1143
1144
3.75k
bool InstanceBase::enableReusePortDefault() { return enable_reuse_port_default_; }
1145
1146
} // namespace Server
1147
} // namespace Envoy