1
#include "source/server/server.h"
2

            
3
#include <csignal>
4
#include <cstdint>
5
#include <ctime>
6
#include <functional>
7
#include <memory>
8
#include <string>
9

            
10
#include "envoy/admin/v3/config_dump.pb.h"
11
#include "envoy/common/exception.h"
12
#include "envoy/common/time.h"
13
#include "envoy/config/bootstrap/v3/bootstrap.pb.h"
14
#include "envoy/config/bootstrap/v3/bootstrap.pb.validate.h"
15
#include "envoy/event/dispatcher.h"
16
#include "envoy/event/signal.h"
17
#include "envoy/event/timer.h"
18
#include "envoy/network/dns.h"
19
#include "envoy/registry/registry.h"
20
#include "envoy/server/bootstrap_extension_config.h"
21
#include "envoy/server/instance.h"
22
#include "envoy/server/options.h"
23
#include "envoy/stats/histogram.h"
24
#include "envoy/stats/stats.h"
25
#include "envoy/upstream/cluster_manager.h"
26

            
27
#include "source/common/api/api_impl.h"
28
#include "source/common/api/os_sys_calls_impl.h"
29
#include "source/common/common/enum_to_int.h"
30
#include "source/common/common/mutex_tracer_impl.h"
31
#include "source/common/common/notification.h"
32
#include "source/common/common/utility.h"
33
#include "source/common/config/utility.h"
34
#include "source/common/config/well_known_names.h"
35
#include "source/common/config/xds_manager_impl.h"
36
#include "source/common/config/xds_resource.h"
37
#include "source/common/http/codes.h"
38
#include "source/common/http/headers.h"
39
#include "source/common/local_info/local_info_impl.h"
40
#include "source/common/network/address_impl.h"
41
#include "source/common/network/dns_resolver/dns_factory_util.h"
42
#include "source/common/network/socket_interface.h"
43
#include "source/common/network/socket_interface_impl.h"
44
#include "source/common/protobuf/utility.h"
45
#include "source/common/runtime/runtime_impl.h"
46
#include "source/common/runtime/runtime_keys.h"
47
#include "source/common/signal/fatal_error_handler.h"
48
#include "source/common/stats/stats_matcher_impl.h"
49
#include "source/common/stats/tag_producer_impl.h"
50
#include "source/common/stats/thread_local_store.h"
51
#include "source/common/stats/timespan_impl.h"
52
#include "source/common/tls/context_manager_impl.h"
53
#include "source/common/upstream/cluster_manager_impl.h"
54
#include "source/common/version/version.h"
55
#include "source/server/configuration_impl.h"
56
#include "source/server/listener_hooks.h"
57
#include "source/server/listener_manager_factory.h"
58
#include "source/server/regex_engine.h"
59
#include "source/server/utils.h"
60

            
61
namespace Envoy {
62
namespace Server {
63

            
64
namespace {
65
10691
std::unique_ptr<ConnectionHandler> getHandler(Event::Dispatcher& dispatcher) {
66

            
67
10691
  auto* factory = Config::Utility::getFactoryByName<ConnectionHandlerFactory>(
68
10691
      "envoy.connection_handler.default");
69
10691
  if (factory) {
70
10691
    return factory->createConnectionHandler(dispatcher, absl::nullopt);
71
10691
  }
72
  ENVOY_LOG_MISC(debug, "Unable to find envoy.connection_handler.default factory");
73
  return nullptr;
74
10691
}
75

            
76
} // namespace
77

            
78
InstanceBase::InstanceBase(Init::Manager& init_manager, const Options& options,
79
                           Event::TimeSystem& time_system, ListenerHooks& hooks,
80
                           HotRestart& restarter, Stats::StoreRoot& store,
81
                           Thread::BasicLockable& access_log_lock,
82
                           Random::RandomGeneratorPtr&& random_generator,
83
                           ThreadLocal::Instance& tls, Thread::ThreadFactory& thread_factory,
84
                           Filesystem::Instance& file_system,
85
                           std::unique_ptr<ProcessContext> process_context,
86
                           Buffer::WatermarkFactorySharedPtr watermark_factory)
87
10691
    : init_manager_(init_manager), live_(false), options_(options),
88
10691
      validation_context_(options_.allowUnknownStaticFields(),
89
10691
                          !options.rejectUnknownDynamicFields(),
90
10691
                          options.ignoreUnknownDynamicFields(), options.skipDeprecatedLogs()),
91
10691
      time_source_(time_system), restarter_(restarter), start_time_(time(nullptr)),
92
10691
      original_start_time_(start_time_), stats_store_(store), thread_local_(tls),
93
10691
      random_generator_(std::move(random_generator)),
94
10691
      api_(new Api::Impl(
95
10691
          thread_factory, store, time_system, file_system, *random_generator_, bootstrap_,
96
10691
          process_context ? ProcessContextOptRef(std::ref(*process_context)) : absl::nullopt,
97
10691
          watermark_factory)),
98
10691
      dispatcher_(api_->allocateDispatcher("main_thread")),
99
10691
      access_log_manager_(options.fileFlushIntervalMsec(), options.fileFlushMinSizeKB(), *api_,
100
10691
                          *dispatcher_, access_log_lock, store),
101
10691
      handler_(getHandler(*dispatcher_)), worker_factory_(thread_local_, *api_, hooks),
102
10691
      mutex_tracer_(options.mutexTracingEnabled() ? &Envoy::MutexTracerImpl::getOrCreateTracer()
103
10691
                                                  : nullptr),
104
10691
      grpc_context_(store.symbolTable()), http_context_(store.symbolTable()),
105
10691
      router_context_(store.symbolTable()), process_context_(std::move(process_context)),
106
10691
      hooks_(hooks), quic_stat_names_(store.symbolTable()), server_contexts_(*this),
107
10691
      enable_reuse_port_default_(true), stats_flush_in_progress_(false) {
108
  // Register the server factory context on the main thread.
109
10691
  Configuration::ServerFactoryContextInstance::initialize(&server_contexts_);
110
10691
}
111

            
112
10691
InstanceBase::~InstanceBase() {
113
10691
  terminate();
114

            
115
  // Clear the server factory context on the main thread.
116
10691
  Configuration::ServerFactoryContextInstance::clear();
117

            
118
  // Stop logging to file before all the AccessLogManager and its dependencies are
119
  // destructed to avoid crashing at shutdown.
120
10691
  file_logger_.reset();
121

            
122
  // Destruct the ListenerManager explicitly, before InstanceBase's local init_manager_ is
123
  // destructed.
124
  //
125
  // The ListenerManager's DestinationPortsMap contains FilterChainSharedPtrs. There is a rare race
126
  // condition where one of these FilterChains contains an HttpConnectionManager, which contains an
127
  // RdsRouteConfigProvider, which contains an RdsRouteConfigSubscriptionSharedPtr. Since
128
  // RdsRouteConfigSubscription is an Init::Target, ~RdsRouteConfigSubscription triggers a callback
129
  // set at initialization, which goes to unregister it from the top-level InitManager, which has
130
  // already been destructed (use-after-free) causing a segfault.
131
10691
  ENVOY_LOG(debug, "destroying listener manager");
132
10691
  listener_manager_.reset();
133
10691
  ENVOY_LOG(debug, "destroyed listener manager");
134
10691
  dispatcher_->shutdown();
135
10691
  ENVOY_LOG(debug, "shut down dispatcher");
136

            
137
#ifdef ENVOY_PERFETTO
138
  if (tracing_session_ != nullptr) {
139
    // Flush the trace data.
140
    perfetto::TrackEvent::Flush();
141
    // Disable tracing and block until tracing has stopped.
142
    tracing_session_->StopBlocking();
143
    close(tracing_fd_);
144
  }
145
#endif
146
10691
}
147

            
148
294001
Upstream::ClusterManager& InstanceBase::clusterManager() {
149
294001
  ASSERT(config_.clusterManager() != nullptr);
150
294001
  return *config_.clusterManager();
151
294001
}
152

            
153
8
const Upstream::ClusterManager& InstanceBase::clusterManager() const {
154
8
  ASSERT(config_.clusterManager() != nullptr);
155
8
  return *config_.clusterManager();
156
8
}
157

            
158
2
void InstanceBase::drainListeners(OptRef<const Network::ExtraShutdownListenerOptions> options) {
159
2
  ENVOY_LOG(info, "closing and draining listeners");
160
2
  listener_manager_->stopListeners(ListenerManager::StopListenersType::All,
161
2
                                   options.has_value() ? *options
162
2
                                                       : Network::ExtraShutdownListenerOptions{});
163
2
  drain_manager_->startDrainSequence(Network::DrainDirection::All, [] {});
164
2
}
165

            
166
10689
void InstanceBase::failHealthcheck(bool fail) {
167
10689
  live_.store(!fail);
168
10689
  server_stats_->live_.set(live_.load());
169
10689
}
170

            
171
MetricSnapshotImpl::MetricSnapshotImpl(Stats::Store& store,
172
                                       Upstream::ClusterManager& cluster_manager,
173
11896
                                       TimeSource& time_source) {
174
11896
  store.forEachSinkedCounter(
175
11896
      [this](std::size_t size) {
176
11895
        snapped_counters_.reserve(size);
177
11895
        counters_.reserve(size);
178
11895
      },
179
4317779
      [this](Stats::Counter& counter) {
180
4317779
        snapped_counters_.push_back(Stats::CounterSharedPtr(&counter));
181
4317779
        counters_.push_back({counter.latch(), counter});
182
4317779
      });
183

            
184
11896
  store.forEachSinkedGauge(
185
11896
      [this](std::size_t size) {
186
11895
        snapped_gauges_.reserve(size);
187
11895
        gauges_.reserve(size);
188
11895
      },
189
1165500
      [this](Stats::Gauge& gauge) {
190
1165500
        snapped_gauges_.push_back(Stats::GaugeSharedPtr(&gauge));
191
1165500
        gauges_.push_back(gauge);
192
1165500
      });
193

            
194
11896
  store.forEachSinkedHistogram(
195
11896
      [this](std::size_t size) {
196
11825
        snapped_histograms_.reserve(size);
197
11825
        histograms_.reserve(size);
198
11825
      },
199
27075
      [this](Stats::ParentHistogram& histogram) {
200
20209
        snapped_histograms_.push_back(Stats::ParentHistogramSharedPtr(&histogram));
201
20209
        histograms_.push_back(histogram);
202
20209
      });
203

            
204
11896
  store.forEachSinkedTextReadout(
205
11896
      [this](std::size_t size) {
206
11895
        snapped_text_readouts_.reserve(size);
207
11895
        text_readouts_.reserve(size);
208
11895
      },
209
16122
      [this](Stats::TextReadout& text_readout) {
210
15989
        snapped_text_readouts_.push_back(Stats::TextReadoutSharedPtr(&text_readout));
211
15989
        text_readouts_.push_back(text_readout);
212
15989
      });
213

            
214
11896
  Upstream::HostUtility::forEachHostMetric(
215
11896
      cluster_manager,
216
11896
      [this](Stats::PrimitiveCounterSnapshot&& metric) {
217
        host_counters_.emplace_back(std::move(metric));
218
      },
219
11896
      [this](Stats::PrimitiveGaugeSnapshot&& metric) {
220
        host_gauges_.emplace_back(std::move(metric));
221
      });
222

            
223
11896
  snapshot_time_ = time_source.systemTime();
224
11896
}
225

            
226
void InstanceUtil::flushMetricsToSinks(const std::list<Stats::SinkPtr>& sinks, Stats::Store& store,
227
11896
                                       Upstream::ClusterManager& cm, TimeSource& time_source) {
228
  // Create a snapshot and flush to all sinks.
229
  // NOTE: Even if there are no sinks, creating the snapshot has the important property that it
230
  //       latches all counters on a periodic basis. The hot restart code assumes this is being
231
  //       done so this should not be removed.
232
11896
  MetricSnapshotImpl snapshot(store, cm, time_source);
233
11896
  for (const auto& sink : sinks) {
234
44
    sink->flush(snapshot);
235
44
  }
236
11896
}
237

            
238
11902
void InstanceBase::flushStats() {
239
11902
  if (stats_flush_in_progress_) {
240
4
    ENVOY_LOG(debug, "skipping stats flush as flush is already in progress");
241
4
    server_stats_->dropped_stat_flushes_.inc();
242
4
    return;
243
4
  }
244

            
245
11898
  stats_flush_in_progress_ = true;
246
11898
  ENVOY_LOG(debug, "flushing stats");
247
  // If Envoy is not fully initialized, workers will not be started and mergeHistograms
248
  // completion callback is not called immediately. As a result of this server stats will
249
  // not be updated and flushed to stat sinks. So skip mergeHistograms call if workers are
250
  // not started yet.
251
11898
  if (initManager().state() == Init::Manager::State::Initialized) {
252
    // A shutdown initiated before this callback may prevent this from being called as per
253
    // the semantics documented in ThreadLocal's runOnAllThreads method.
254
11575
    stats_store_.mergeHistograms([this]() -> void { flushStatsInternal(); });
255
11663
  } else {
256
323
    ENVOY_LOG(debug, "Envoy is not fully initialized, skipping histogram merge and flushing stats");
257
323
    flushStatsInternal();
258
323
  }
259
11898
}
260

            
261
22365
void InstanceBase::updateServerStats() {
262
  // mergeParentStatsIfAny() does nothing and returns a struct of 0s if there is no parent.
263
22365
  HotRestart::ServerStatsFromParent parent_stats = restarter_.mergeParentStatsIfAny(stats_store_);
264

            
265
22365
  server_stats_->uptime_.set(time(nullptr) - original_start_time_);
266
22365
  server_stats_->memory_allocated_.set(Memory::Stats::totalCurrentlyAllocated() +
267
22365
                                       parent_stats.parent_memory_allocated_);
268
22365
  server_stats_->memory_heap_size_.set(Memory::Stats::totalCurrentlyReserved());
269
22365
  server_stats_->memory_physical_size_.set(Memory::Stats::totalPhysicalBytes());
270
22365
  if (!options().hotRestartDisabled()) {
271
110
    server_stats_->parent_connections_.set(parent_stats.parent_connections_);
272
110
  }
273
22365
  server_stats_->total_connections_.set(listener_manager_->numConnections() +
274
22365
                                        parent_stats.parent_connections_);
275
22365
  server_stats_->days_until_first_cert_expiring_.set(
276
22365
      sslContextManager().daysUntilFirstCertExpires().value_or(0));
277

            
278
22365
  auto secs_until_ocsp_response_expires =
279
22365
      sslContextManager().secondsUntilFirstOcspResponseExpires();
280
22365
  if (secs_until_ocsp_response_expires) {
281
2338
    server_stats_->seconds_until_first_ocsp_response_expiring_.set(
282
2338
        secs_until_ocsp_response_expires.value());
283
2338
  }
284
22365
  server_stats_->state_.set(
285
22365
      enumToInt(Utility::serverState(initManager().state(), healthCheckFailed())));
286
22365
  server_stats_->stats_recent_lookups_.set(
287
22365
      stats_store_.symbolTable().getRecentLookups([](absl::string_view, uint64_t) {}));
288
22365
}
289

            
290
11891
void InstanceBase::flushStatsInternal() {
291
11891
  updateServerStats();
292
11891
  auto& stats_config = config_.statsConfig();
293
11891
  InstanceUtil::flushMetricsToSinks(stats_config.sinks(), stats_store_, clusterManager(),
294
11891
                                    timeSource());
295
11891
  if (const auto evict_on_flush = stats_config.evictOnFlush(); evict_on_flush > 0) {
296
5
    stats_eviction_counter_ = (stats_eviction_counter_ + 1) % evict_on_flush;
297
5
    if (stats_eviction_counter_ == 0) {
298
2
      stats_store_.evictUnused();
299
2
    }
300
5
  }
301
  // TODO(ramaraochavali): consider adding different flush interval for histograms.
302
11891
  if (stat_flush_timer_ != nullptr) {
303
11888
    stat_flush_timer_->enableTimer(stats_config.flushInterval());
304
11888
  }
305

            
306
11891
  stats_flush_in_progress_ = false;
307
11891
}
308

            
309
69659
bool InstanceBase::healthCheckFailed() { return !live_.load(); }
310

            
311
4
ProcessContextOptRef InstanceBase::processContext() {
312
4
  if (process_context_ == nullptr) {
313
1
    return absl::nullopt;
314
1
  }
315

            
316
3
  return *process_context_;
317
4
}
318

            
319
namespace {
320

            
321
5
bool canBeRegisteredAsInlineHeader(const Http::LowerCaseString& header_name) {
322
  // 'set-cookie' cannot currently be registered as an inline header.
323
5
  if (header_name == Http::Headers::get().SetCookie) {
324
1
    return false;
325
1
  }
326
4
  return true;
327
5
}
328

            
329
absl::Status
330
10672
registerCustomInlineHeadersFromBootstrap(const envoy::config::bootstrap::v3::Bootstrap& bootstrap) {
331
10672
  for (const auto& inline_header : bootstrap.inline_headers()) {
332
5
    const Http::LowerCaseString lower_case_name(inline_header.inline_header_name());
333
5
    if (!canBeRegisteredAsInlineHeader(lower_case_name)) {
334
1
      return absl::InvalidArgumentError(
335
1
          fmt::format("Header {} cannot be registered as an inline header.",
336
1
                      inline_header.inline_header_name()));
337
1
    }
338
4
    switch (inline_header.inline_header_type()) {
339
1
    case envoy::config::bootstrap::v3::CustomInlineHeader::REQUEST_HEADER:
340
1
      Http::CustomInlineHeaderRegistry::registerInlineHeader<
341
1
          Http::RequestHeaderMap::header_map_type>(lower_case_name);
342
1
      break;
343
1
    case envoy::config::bootstrap::v3::CustomInlineHeader::REQUEST_TRAILER:
344
1
      Http::CustomInlineHeaderRegistry::registerInlineHeader<
345
1
          Http::RequestTrailerMap::header_map_type>(lower_case_name);
346
1
      break;
347
1
    case envoy::config::bootstrap::v3::CustomInlineHeader::RESPONSE_HEADER:
348
1
      Http::CustomInlineHeaderRegistry::registerInlineHeader<
349
1
          Http::ResponseHeaderMap::header_map_type>(lower_case_name);
350
1
      break;
351
1
    case envoy::config::bootstrap::v3::CustomInlineHeader::RESPONSE_TRAILER:
352
1
      Http::CustomInlineHeaderRegistry::registerInlineHeader<
353
1
          Http::ResponseTrailerMap::header_map_type>(lower_case_name);
354
1
      break;
355
    default:
356
      PANIC("not implemented");
357
4
    }
358
4
  }
359
10671
  return absl::OkStatus();
360
10672
}
361

            
362
} // namespace
363

            
364
absl::Status InstanceUtil::loadBootstrapConfig(
365
    envoy::config::bootstrap::v3::Bootstrap& bootstrap, const Options& options,
366
10891
    ProtobufMessage::ValidationVisitor& validation_visitor, Api::Api& api) {
367
10891
  const std::string& config_path = options.configPath();
368
10891
  const std::string& config_yaml = options.configYaml();
369
10891
  const envoy::config::bootstrap::v3::Bootstrap& config_proto = options.configProto();
370

            
371
  // One of config_path and config_yaml or bootstrap should be specified.
372
10891
  if (config_path.empty() && config_yaml.empty() && config_proto.ByteSizeLong() == 0) {
373
2
    return absl::InvalidArgumentError(
374
2
        "At least one of --config-path or --config-yaml or Options::configProto() "
375
2
        "should be non-empty");
376
2
  }
377

            
378
10889
  if (!config_path.empty()) {
379
10846
    RETURN_IF_NOT_OK(MessageUtil::loadFromFile(config_path, bootstrap, validation_visitor, api));
380
10846
  }
381
10889
  if (!config_yaml.empty()) {
382
45
    envoy::config::bootstrap::v3::Bootstrap bootstrap_override;
383
45
#ifdef ENVOY_ENABLE_YAML
384
45
    MessageUtil::loadFromYaml(config_yaml, bootstrap_override, validation_visitor);
385
    // TODO(snowp): The fact that we do a merge here doesn't seem to be covered under test.
386
#else
387
    // Treat the yaml as proto
388
    Protobuf::TextFormat::ParseFromString(config_yaml, &bootstrap_override);
389
#endif
390
45
    bootstrap.MergeFrom(bootstrap_override);
391
45
  }
392
10889
  if (config_proto.ByteSizeLong() != 0) {
393
2
    bootstrap.MergeFrom(config_proto);
394
2
  }
395
10889
  MessageUtil::validate(bootstrap, validation_visitor);
396
10889
  return absl::OkStatus();
397
10889
}
398

            
399
10671
void InstanceUtil::raiseFileLimits() {
400
10671
  if (!Runtime::runtimeFeatureEnabled("envoy.restart_features.raise_file_limits")) {
401
    return;
402
  }
403
10671
  struct rlimit rlim;
404
10671
  if (const auto result = Api::OsSysCallsSingleton::get().getrlimit(RLIMIT_NOFILE, &rlim);
405
10671
      result.return_value_ != 0) {
406
1
    ENVOY_LOG(warn, "Failed to read file descriptor limit, error {}.", errorDetails(result.errno_));
407
1
    return;
408
1
  }
409
10670
  const auto old = rlim.rlim_cur;
410
10670
  if (old == rlim.rlim_max) {
411
10668
    return;
412
10668
  }
413
2
  rlim.rlim_cur = rlim.rlim_max;
414
2
  if (const auto result = Api::OsSysCallsSingleton::get().setrlimit(RLIMIT_NOFILE, &rlim);
415
2
      result.return_value_ != 0) {
416
1
    ENVOY_LOG(warn, "Failed to raise file descriptor limit to maximum, error {}.",
417
1
              errorDetails(result.errno_));
418
1
    return;
419
1
  }
420
1
  ENVOY_LOG(info, "Raised file descriptor limits from {} to {}.", old, rlim.rlim_max);
421
1
}
422

            
423
void InstanceBase::initialize(Network::Address::InstanceConstSharedPtr local_address,
424
10690
                              ComponentFactory& component_factory) {
425
10690
  std::function set_up_logger = [&] {
426
2
    TRY_ASSERT_MAIN_THREAD {
427
2
      file_logger_ = THROW_OR_RETURN_VALUE(
428
2
          Logger::FileSinkDelegate::create(options_.logPath(), access_log_manager_,
429
2
                                           Logger::Registry::getSink()),
430
2
          std::unique_ptr<Logger::FileSinkDelegate>);
431
2
    }
432
2
    END_TRY
433
2
    CATCH(const EnvoyException& e, {
434
2
      throw EnvoyException(
435
2
          fmt::format("Failed to open log-file '{}'. e.what(): {}", options_.logPath(), e.what()));
436
2
    });
437
1
  };
438

            
439
10690
  TRY_ASSERT_MAIN_THREAD {
440
10690
    if (!options_.logPath().empty()) {
441
2
      set_up_logger();
442
2
    }
443
10690
    restarter_.initialize(*dispatcher_, *this);
444
10690
    drain_manager_ = component_factory.createDrainManager(*this);
445
10690
    THROW_IF_NOT_OK(initializeOrThrow(std::move(local_address), component_factory));
446
10690
  }
447
10690
  END_TRY
448
10690
  MULTI_CATCH(
449
10690
      const EnvoyException& e,
450
10690
      {
451
10690
        ENVOY_LOG(critical, "error `{}` initializing config '{} {} {}'", e.what(),
452
10690
                  options_.configProto().DebugString(), options_.configYaml(),
453
10690
                  options_.configPath());
454
10690
        terminate();
455
10690
        throw;
456
10690
      },
457
10690
      {
458
10690
        ENVOY_LOG(critical, "error initializing due to unknown exception");
459
10690
        terminate();
460
10690
        throw;
461
10690
      });
462
10664
}
463

            
464
absl::Status InstanceBase::initializeOrThrow(Network::Address::InstanceConstSharedPtr local_address,
465
10687
                                             ComponentFactory& component_factory) {
466
10687
  ENVOY_LOG(info, "initializing epoch {} (base id={}, hot restart version={})",
467
10687
            options_.restartEpoch(), restarter_.baseId(), restarter_.version());
468

            
469
10687
  ENVOY_LOG(info, "statically linked extensions:");
470
498324
  for (const auto& ext : Envoy::Registry::FactoryCategoryRegistry::registeredFactories()) {
471
498324
    ENVOY_LOG(info, "  {}: {}", ext.first, absl::StrJoin(ext.second->registeredNames(), ", "));
472
498324
  }
473

            
474
  // The main thread is registered for thread local updates so that code that does not care
475
  // whether it runs on the main thread or on workers can still use TLS.
476
  // We do this as early as possible because this has no side effect and could ensure that the
477
  // TLS always contains a valid main thread dispatcher when TLS is used.
478
10687
  thread_local_.registerThread(*dispatcher_, true);
479

            
480
  // Handle configuration that needs to take place prior to the main configuration load.
481
10687
  RETURN_IF_NOT_OK(InstanceUtil::loadBootstrapConfig(
482
10687
      bootstrap_, options_, messageValidationContext().staticValidationVisitor(), *api_));
483
10685
  bootstrap_config_update_time_ = time_source_.systemTime();
484

            
485
10685
  if (bootstrap_.has_application_log_config()) {
486
5
    RETURN_IF_NOT_OK(
487
5
        Utility::assertExclusiveLogFormatMethod(options_, bootstrap_.application_log_config()));
488
4
    RETURN_IF_NOT_OK(Utility::maybeSetApplicationLogFormat(bootstrap_.application_log_config()));
489
2
  }
490

            
491
#ifdef ENVOY_PERFETTO
492
  perfetto::TracingInitArgs args;
493
  // Include in-process events only.
494
  args.backends = perfetto::kInProcessBackend;
495
  perfetto::Tracing::Initialize(args);
496
  perfetto::TrackEvent::Register();
497

            
498
  // Prepare a configuration for a new "Perfetto" tracing session.
499
  perfetto::TraceConfig cfg;
500
  // TODO(rojkov): make the tracer configurable with either "Perfetto"'s native
501
  // message or custom one embedded into Bootstrap.
502
  cfg.add_buffers()->set_size_kb(1024);
503
  auto* ds_cfg = cfg.add_data_sources()->mutable_config();
504
  ds_cfg->set_name("track_event");
505

            
506
  const std::string pftrace_path =
507
      PROTOBUF_GET_STRING_OR_DEFAULT(bootstrap_, perf_tracing_file_path, "envoy.pftrace");
508
  // Instantiate a new tracing session.
509
  tracing_session_ = perfetto::Tracing::NewTrace();
510
  tracing_fd_ = open(pftrace_path.c_str(), O_RDWR | O_CREAT | O_TRUNC, 0600);
511
  if (tracing_fd_ == -1) {
512
    return absl::InvalidArgumentError(
513
        fmt::format("unable to open tracing file {}: {}", pftrace_path, errorDetails(errno)));
514
  }
515
  // Configure the tracing session.
516
  tracing_session_->Setup(cfg, tracing_fd_);
517
  // Enable tracing and block until tracing has started.
518
  tracing_session_->StartBlocking();
519
#endif
520

            
521
  // Immediate after the bootstrap has been loaded, override the header prefix, if configured to
522
  // do so. This must be set before any other code block references the HeaderValues ConstSingleton.
523
10682
  if (!bootstrap_.header_prefix().empty()) {
524
    // setPrefix has a release assert verifying that setPrefix() is not called after prefix()
525
1
    ThreadSafeSingleton<Http::PrefixValue>::get().setPrefix(bootstrap_.header_prefix().c_str());
526
1
  }
527

            
528
  // Register Custom O(1) headers from bootstrap.
529
10682
  RETURN_IF_NOT_OK(registerCustomInlineHeadersFromBootstrap(bootstrap_));
530

            
531
10681
  ENVOY_LOG(info, "HTTP header map info:");
532
42684
  for (const auto& info : Http::HeaderMapImplUtility::getAllHeaderMapImplInfo()) {
533
42684
    ENVOY_LOG(info, "  {}: {} bytes: {}", info.name_, info.size_,
534
42684
              absl::StrJoin(info.registered_headers_, ","));
535
42684
  }
536

            
537
  // Initialize the regex engine and inject to singleton.
538
  // Needs to happen before stats store initialization because the stats
539
  // matcher config can include regexes.
540
10681
  regex_engine_ = createRegexEngine(
541
10681
      bootstrap_, messageValidationContext().staticValidationVisitor(), serverFactoryContext());
542

            
543
  // Needs to happen as early as possible in the instantiation to preempt the objects that require
544
  // stats.
545
10681
  auto producer_or_error =
546
10681
      Stats::TagProducerImpl::createTagProducer(bootstrap_.stats_config(), options_.statsTags());
547
10681
  RETURN_IF_NOT_OK_REF(producer_or_error.status());
548
10681
  stats_store_.setTagProducer(std::move(producer_or_error.value()));
549
10681
  stats_store_.setStatsMatcher(std::make_unique<Stats::StatsMatcherImpl>(
550
10681
      bootstrap_.stats_config(), stats_store_.symbolTable(), server_contexts_));
551
10681
  stats_store_.setHistogramSettings(
552
10681
      std::make_unique<Stats::HistogramSettingsImpl>(bootstrap_.stats_config(), server_contexts_));
553

            
554
10681
  const std::string server_stats_prefix = "server.";
555
10681
  const std::string server_compilation_settings_stats_prefix = "server.compilation_settings";
556
10681
  server_stats_ = std::make_unique<ServerStats>(
557
10681
      ServerStats{ALL_SERVER_STATS(POOL_COUNTER_PREFIX(stats_store_, server_stats_prefix),
558
10681
                                   POOL_GAUGE_PREFIX(stats_store_, server_stats_prefix),
559
10681
                                   POOL_HISTOGRAM_PREFIX(stats_store_, server_stats_prefix))});
560
10681
  server_compilation_settings_stats_ =
561
10681
      std::make_unique<CompilationSettings::ServerCompilationSettingsStats>(
562
10681
          CompilationSettings::ServerCompilationSettingsStats{ALL_SERVER_COMPILATION_SETTINGS_STATS(
563
10681
              POOL_COUNTER_PREFIX(stats_store_, server_compilation_settings_stats_prefix),
564
10681
              POOL_GAUGE_PREFIX(stats_store_, server_compilation_settings_stats_prefix),
565
10681
              POOL_HISTOGRAM_PREFIX(stats_store_, server_compilation_settings_stats_prefix))});
566
10681
  validation_context_.setCounters(server_stats_->static_unknown_fields_,
567
10681
                                  server_stats_->dynamic_unknown_fields_,
568
10681
                                  server_stats_->wip_protos_);
569

            
570
10681
  memory_allocator_manager_ =
571
10681
      std::make_unique<Memory::AllocatorManager>(*api_, bootstrap_.memory_allocator_manager());
572

            
573
10681
  initialization_timer_ = std::make_unique<Stats::HistogramCompletableTimespanImpl>(
574
10681
      server_stats_->initialization_time_ms_, timeSource());
575
10681
  server_stats_->concurrency_.set(options_.concurrency());
576
10681
  if (!options().hotRestartDisabled()) {
577
76
    server_stats_->hot_restart_epoch_.set(options_.restartEpoch());
578
76
  }
579
10681
  InstanceBase::failHealthcheck(false);
580

            
581
  // Check if bootstrap has server version override set, if yes, we should use that as
582
  // 'server.version' stat.
583
10681
  uint64_t version_int;
584
10681
  if (bootstrap_.stats_server_version_override().value() > 0) {
585
2
    version_int = bootstrap_.stats_server_version_override().value();
586
10679
  } else {
587
10679
    if (!StringUtil::atoull(VersionInfo::revision().substr(0, 6).c_str(), version_int, 16)) {
588
      return absl::InvalidArgumentError("compiled GIT SHA is invalid. Invalid build.");
589
    }
590
10679
  }
591
10681
  server_stats_->version_.set(version_int);
592
10681
  if (VersionInfo::sslFipsCompliant()) {
593
    server_compilation_settings_stats_->fips_mode_.set(1);
594
10681
  } else {
595
    // Set this explicitly so that "used" flag is set so that it can be pushed to stats sinks.
596
10681
    server_compilation_settings_stats_->fips_mode_.set(0);
597
10681
  }
598

            
599
  // If user has set user_agent_name in the bootstrap config, use it.
600
  // Default to "envoy" if unset.
601
10681
  if (bootstrap_.node().user_agent_name().empty()) {
602
10670
    bootstrap_.mutable_node()->set_user_agent_name("envoy");
603
10670
  }
604

            
605
  // If user has set user_agent_build_version in the bootstrap config, use it.
606
  // Default to the internal server version.
607
10681
  if (!bootstrap_.node().user_agent_build_version().has_version()) {
608
10670
    *bootstrap_.mutable_node()->mutable_user_agent_build_version() = VersionInfo::buildVersion();
609
10670
  }
610

            
611
497564
  for (const auto& ext : Envoy::Registry::FactoryCategoryRegistry::registeredFactories()) {
612
497564
    auto registered_types = ext.second->registeredTypes();
613
1490946
    for (const auto& name : ext.second->allRegisteredNames()) {
614
1490946
      auto* extension = bootstrap_.mutable_node()->add_extensions();
615
1490946
      extension->set_name(std::string(name));
616
1490946
      extension->set_category(ext.first);
617
1490946
      auto const version = ext.second->getFactoryVersion(name);
618
1490946
      if (version) {
619
        *extension->mutable_version() = version.value();
620
      }
621
1490946
      extension->set_disabled(ext.second->isFactoryDisabled(name));
622
1490946
      auto it = registered_types.find(name);
623
1490946
      if (it != registered_types.end()) {
624
817545
        std::sort(it->second.begin(), it->second.end());
625
834036
        for (const auto& type_url : it->second) {
626
834036
          extension->add_type_urls(type_url);
627
834036
        }
628
817545
      }
629
1490946
    }
630
497564
  }
631

            
632
10681
  local_info_ = std::make_unique<LocalInfo::LocalInfoImpl>(
633
10681
      stats().symbolTable(), bootstrap_.node(), bootstrap_.node_context_params(), local_address,
634
10681
      options_.serviceZone(), options_.serviceClusterName(), options_.serviceNodeName());
635

            
636
10681
  absl::Status creation_status;
637
10681
  Configuration::InitialImpl initial_config(bootstrap_, creation_status);
638
10681
  RETURN_IF_NOT_OK(creation_status);
639

            
640
  // Learn original_start_time_ if our parent is still around to inform us of it.
641
10681
  const auto parent_admin_shutdown_response = restarter_.sendParentAdminShutdownRequest();
642
10681
  if (parent_admin_shutdown_response.has_value()) {
643
    original_start_time_ = parent_admin_shutdown_response.value().original_start_time_;
644
    // TODO(soulxu): This is added for switching the reuse port default value as true (#17259).
645
    // It ensures the same default value during the hot restart. This can be removed when
646
    // everyone switches to the new default value.
647
    enable_reuse_port_default_ =
648
        parent_admin_shutdown_response.value().enable_reuse_port_default_ ? true : false;
649
  }
650

            
651
10681
  OptRef<Server::ConfigTracker> config_tracker;
652
10681
#ifdef ENVOY_ADMIN_FUNCTIONALITY
653
10681
  auto admin_impl = std::make_shared<AdminImpl>(initial_config.admin().profilePath(), *this,
654
10681
                                                initial_config.admin().ignoreGlobalConnLimit());
655

            
656
10837
  for (const auto& allowlisted_path : bootstrap_.admin().allow_paths()) {
657
195
    admin_impl->addAllowlistedPath(
658
195
        std::make_unique<Matchers::StringMatcherImpl>(allowlisted_path, server_contexts_));
659
195
  }
660

            
661
10681
  admin_ = admin_impl;
662
10681
  config_tracker = admin_->getConfigTracker();
663
10681
#endif
664
10681
  secret_manager_ = std::make_unique<Secret::SecretManagerImpl>(config_tracker);
665

            
666
10681
  loadServerFlags(initial_config.flagsPath());
667

            
668
  // Initialize the overload manager early so other modules can register for actions.
669
10681
  auto overload_manager_or_error = createOverloadManager();
670
10681
  RETURN_IF_NOT_OK(overload_manager_or_error.status());
671
10681
  overload_manager_ = std::move(*overload_manager_or_error);
672
10681
  null_overload_manager_ = createNullOverloadManager();
673

            
674
10681
  maybeCreateHeapShrinker();
675

            
676
10713
  for (const auto& bootstrap_extension : bootstrap_.bootstrap_extensions()) {
677
97
    auto& factory = Config::Utility::getAndCheckFactory<Configuration::BootstrapExtensionFactory>(
678
97
        bootstrap_extension);
679
97
    auto config = Config::Utility::translateAnyToFactoryConfig(
680
97
        bootstrap_extension.typed_config(), messageValidationContext().staticValidationVisitor(),
681
97
        factory);
682
97
    bootstrap_extensions_.push_back(
683
97
        factory.createBootstrapExtension(*config, serverFactoryContext()));
684
97
  }
685

            
686
  // Register the fatal actions.
687
10681
  {
688
10681
    FatalAction::FatalActionPtrList safe_actions;
689
10681
    FatalAction::FatalActionPtrList unsafe_actions;
690
10681
    for (const auto& action_config : bootstrap_.fatal_actions()) {
691
      auto& factory =
692
          Config::Utility::getAndCheckFactory<Server::Configuration::FatalActionFactory>(
693
              action_config.config());
694
      auto action = factory.createFatalActionFromProto(action_config, this);
695

            
696
      if (action->isAsyncSignalSafe()) {
697
        safe_actions.push_back(std::move(action));
698
      } else {
699
        unsafe_actions.push_back(std::move(action));
700
      }
701
    }
702
10681
    Envoy::FatalErrorHandler::registerFatalActions(
703
10681
        std::move(safe_actions), std::move(unsafe_actions), api_->threadFactory());
704
10681
  }
705

            
706
10681
  if (!bootstrap_.default_socket_interface().empty()) {
707
6
    auto& sock_name = bootstrap_.default_socket_interface();
708
6
    auto sock = const_cast<Network::SocketInterface*>(Network::socketInterface(sock_name));
709
6
    if (sock != nullptr) {
710
6
      Network::SocketInterfaceSingleton::clear();
711
6
      Network::SocketInterfaceSingleton::initialize(sock);
712
6
    }
713
6
  }
714

            
715
10681
  ListenerManagerFactory* listener_manager_factory = nullptr;
716
10681
  if (bootstrap_.has_listener_manager()) {
717
    listener_manager_factory = Config::Utility::getAndCheckFactory<ListenerManagerFactory>(
718
        bootstrap_.listener_manager(), false);
719
10681
  } else {
720
10681
    listener_manager_factory = &Config::Utility::getAndCheckFactoryByName<ListenerManagerFactory>(
721
10681
        Config::ServerExtensionValues::get().DEFAULT_LISTENER);
722
10681
  }
723

            
724
  // Workers get created first so they register for thread local updates.
725
10681
  listener_manager_ = listener_manager_factory->createListenerManager(
726
10681
      *this, nullptr, worker_factory_, bootstrap_.enable_dispatcher_stats(), quic_stat_names_);
727

            
728
  // We can now initialize stats for threading.
729
10681
  stats_store_.initializeThreading(*dispatcher_, thread_local_);
730

            
731
  // It's now safe to start writing stats from the main thread's dispatcher.
732
10681
  if (bootstrap_.enable_dispatcher_stats()) {
733
6
    dispatcher_->initializeStats(*stats_store_.rootScope(), "server.");
734
6
  }
735

            
736
  // The broad order of initialization from this point on is the following:
737
  // 1. Statically provisioned configuration (bootstrap) are loaded.
738
  // 2. Cluster manager is created and all primary clusters (i.e. with endpoint assignments
739
  //    provisioned statically in bootstrap, discovered through DNS or file based CDS) are
740
  //    initialized.
741
  // 3. Various services are initialized and configured using the bootstrap config.
742
  // 4. RTDS is initialized using primary clusters. This  allows runtime overrides to be fully
743
  //    configured before the rest of xDS configuration is provisioned.
744
  // 5. Secondary clusters (with endpoint assignments provisioned by xDS servers) are initialized.
745
  // 6. The rest of the dynamic configuration is provisioned.
746
  //
747
  // Please note: this order requires that RTDS is provisioned using a primary cluster. If RTDS is
748
  // provisioned through ADS then ADS must use primary cluster as well. This invariant is enforced
749
  // during RTDS initialization and invalid configuration will be rejected.
750

            
751
  // Runtime gets initialized before the main configuration since during main configuration
752
  // load things may grab a reference to the loader for later use.
753
10681
  runtime_ = component_factory.createRuntime(*this, initial_config);
754
10681
  validation_context_.setRuntime(runtime());
755

            
756
10681
#ifndef WIN32
757
  // Envoy automatically raises soft file limits, but we do it here in order to allow
758
  // a runtime override to disable this feature. Once the feature defaults to always on,
759
  // we can move this as the first thing to occur during the process initialization.
760
10681
  InstanceUtil::raiseFileLimits();
761
10681
#endif
762

            
763
10681
  if (!runtime().snapshot().getBoolean("envoy.disallow_global_stats", false)) {
764
10663
    assert_action_registration_ = Assert::addDebugAssertionFailureRecordAction(
765
10663
        [this](const char*) { server_stats_->debug_assertion_failures_.inc(); });
766
10663
    envoy_bug_action_registration_ = Assert::addEnvoyBugFailureRecordAction(
767
10663
        [this](const char*) { server_stats_->envoy_bug_failures_.inc(); });
768
10663
    envoy_notification_registration_ = Notification::addEnvoyNotificationRecordAction(
769
10663
        [this](absl::string_view) { server_stats_->envoy_notifications_.inc(); });
770
10663
  }
771

            
772
10681
  if (initial_config.admin().address()) {
773
10633
#ifdef ENVOY_ADMIN_FUNCTIONALITY
774
    // Admin instance always be created if admin support is not compiled out.
775
10633
    RELEASE_ASSERT(admin_ != nullptr, "Admin instance should be created but actually not.");
776
10633
    auto typed_admin = dynamic_cast<AdminImpl*>(admin_.get());
777
10633
    RELEASE_ASSERT(typed_admin != nullptr, "Admin implementation is not an AdminImpl.");
778
10633
    initial_config.initAdminAccessLog(bootstrap_, typed_admin->factoryContext());
779
10633
    ENVOY_LOG(info, "Starting admin HTTP server at {}",
780
10633
              initial_config.admin().address()->asString());
781
10633
    admin_->startHttpListener(initial_config.admin().accessLogs(), initial_config.admin().address(),
782
10633
                              initial_config.admin().socketOptions());
783
#else
784
    return absl::InvalidArgumentError("Admin address configured but admin support compiled out");
785
#endif
786
10651
  } else {
787
48
    ENVOY_LOG(info, "No admin address given, so no admin HTTP server started.");
788
48
  }
789
10681
  if (admin_) {
790
10667
    config_tracker_entry_ = admin_->getConfigTracker().add(
791
10667
        "bootstrap", [this](const Matchers::StringMatcher&) { return dumpBootstrapConfig(); });
792
10667
  }
793
10681
  if (initial_config.admin().address()) {
794
10633
    admin_->addListenerToHandler(handler_.get());
795
10633
  }
796

            
797
  // Once we have runtime we can initialize the SSL context manager.
798
10681
  ssl_context_manager_ =
799
10681
      std::make_unique<Extensions::TransportSockets::Tls::ContextManagerImpl>(server_contexts_);
800

            
801
10681
  http_server_properties_cache_manager_ =
802
10681
      std::make_unique<Http::HttpServerPropertiesCacheManagerImpl>(
803
10681
          serverFactoryContext(), messageValidationContext().staticValidationVisitor(),
804
10681
          thread_local_);
805

            
806
  // Create the xDS-Manager that will be passed to the cluster manager when it
807
  // is initialized below.
808
10681
  xds_manager_ = std::make_unique<Config::XdsManagerImpl>(*dispatcher_, *api_, stats_store_,
809
10681
                                                          *local_info_, validation_context_, *this);
810

            
811
10681
  cluster_manager_factory_ = std::make_unique<Upstream::ProdClusterManagerFactory>(
812
10681
      serverFactoryContext(),
813
10684
      [this]() -> Network::DnsResolverSharedPtr { return this->getOrCreateDnsResolver(); },
814
10681
      quic_stat_names_);
815

            
816
  // Now that the worker thread are initialized, notify the bootstrap extensions.
817
10713
  for (auto&& bootstrap_extension : bootstrap_extensions_) {
818
95
    bootstrap_extension->onWorkerThreadInitialized();
819
95
  }
820

            
821
  // Now the configuration gets parsed. The configuration may start setting
822
  // thread local data per above. See MainImpl::initialize() for why ConfigImpl
823
  // is constructed as part of the InstanceBase and then populated once
824
  // cluster_manager_factory_ is available.
825
10681
  RETURN_IF_NOT_OK(config_.initialize(bootstrap_, *this, *cluster_manager_factory_));
826

            
827
  // Instruct the listener manager to create the LDS provider if needed. This must be done later
828
  // because various items do not yet exist when the listener manager is created.
829
10679
  if (bootstrap_.dynamic_resources().has_lds_config() ||
830
10679
      !bootstrap_.dynamic_resources().lds_resources_locator().empty()) {
831
10342
    std::unique_ptr<xds::core::v3::ResourceLocator> lds_resources_locator;
832
10342
    if (!bootstrap_.dynamic_resources().lds_resources_locator().empty()) {
833
15
      lds_resources_locator = std::make_unique<xds::core::v3::ResourceLocator>(
834
15
          THROW_OR_RETURN_VALUE(Config::XdsResourceIdentifier::decodeUrl(
835
15
                                    bootstrap_.dynamic_resources().lds_resources_locator()),
836
15
                                xds::core::v3::ResourceLocator));
837
15
    }
838
10342
    listener_manager_->createLdsApi(bootstrap_.dynamic_resources().lds_config(),
839
10342
                                    lds_resources_locator.get());
840
10342
  }
841

            
842
  // We have to defer RTDS initialization until after the cluster manager is
843
  // instantiated (which in turn relies on runtime...).
844
10679
  RETURN_IF_NOT_OK(runtime().initialize(clusterManager()));
845

            
846
10678
  clusterManager().setPrimaryClustersInitializedCb(
847
10678
      [this]() { onClusterManagerPrimaryInitializationComplete(); });
848

            
849
10678
  auto& stats_config = config_.statsConfig();
850
10678
  for (const Stats::SinkPtr& sink : stats_config.sinks()) {
851
20
    stats_store_.addSink(*sink);
852
20
  }
853
10678
  if (!stats_config.flushOnAdmin()) {
854
    // Some of the stat sinks may need dispatcher support so don't flush until the main loop starts.
855
    // Just setup the timer.
856
10933
    stat_flush_timer_ = dispatcher_->createTimer([this]() -> void { flushStats(); });
857
10662
    stat_flush_timer_->enableTimer(stats_config.flushInterval());
858
10662
  }
859

            
860
  // Now that we are initialized, notify the bootstrap extensions.
861
10710
  for (auto&& bootstrap_extension : bootstrap_extensions_) {
862
95
    bootstrap_extension->onServerInitialized(*this);
863
95
  }
864

            
865
  // GuardDog (deadlock detection) object and thread setup before workers are
866
  // started and before our own run() loop runs.
867
10678
  main_thread_guard_dog_ = maybeCreateGuardDog("main_thread", config_.mainThreadWatchdogConfig());
868
10678
  if (Runtime::runtimeFeatureEnabled("envoy.restart_features.worker_threads_watchdog_fix")) {
869
10656
    worker_guard_dog_ = maybeCreateGuardDog("workers", config_.workerWatchdogConfig());
870
10656
  } else {
871
22
    worker_guard_dog_ = maybeCreateGuardDog("workers", config_.mainThreadWatchdogConfig());
872
22
  }
873
10678
  return absl::OkStatus();
874
10679
}
875

            
876
10653
void InstanceBase::onClusterManagerPrimaryInitializationComplete() {
877
  // If RTDS was not configured the `onRuntimeReady` callback is immediately invoked.
878
10653
  runtime().startRtdsSubscriptions([this]() { onRuntimeReady(); });
879
10653
}
880

            
881
10653
void InstanceBase::onRuntimeReady() {
882
  // Begin initializing secondary clusters after RTDS configuration has been applied.
883
  // Initializing can throw exceptions, so catch these.
884
10653
  TRY_ASSERT_MAIN_THREAD {
885
10653
    THROW_IF_NOT_OK(clusterManager().initializeSecondaryClusters(bootstrap_));
886
10653
  }
887
10653
  END_TRY
888
10653
  CATCH(const EnvoyException& e, {
889
10653
    ENVOY_LOG(warn, "Skipping initialization of secondary cluster: {}", e.what());
890
10653
    shutdown();
891
10653
  });
892

            
893
10653
  if (bootstrap_.has_hds_config()) {
894
41
    const auto& hds_config = bootstrap_.hds_config();
895
41
    async_client_manager_ = std::make_unique<Grpc::AsyncClientManagerImpl>(
896
41
        bootstrap_.grpc_async_client_manager_config(), server_contexts_, grpc_context_.statNames());
897
41
    TRY_ASSERT_MAIN_THREAD {
898
41
      THROW_IF_NOT_OK(Config::Utility::checkTransportVersion(hds_config));
899
      // HDS does not support xDS-Failover.
900
41
      auto factory_or_error = Config::Utility::factoryForGrpcApiConfigSource(
901
41
          *async_client_manager_, hds_config, *stats_store_.rootScope(), false, 0, false);
902
41
      THROW_IF_NOT_OK_REF(factory_or_error.status());
903
40
      hds_delegate_ = maybeCreateHdsDelegate(
904
40
          serverFactoryContext(), *stats_store_.rootScope(),
905
40
          THROW_OR_RETURN_VALUE(factory_or_error.value()->createUncachedRawAsyncClient(),
906
40
                                Grpc::RawAsyncClientPtr),
907
40
          stats_store_, *ssl_context_manager_);
908
40
    }
909
40
    END_TRY
910
41
    CATCH(const EnvoyException& e,
911
41
          { ENVOY_LOG(warn, "Skipping initialization of HDS cluster: {}", e.what()); });
912
41
    if (!hds_delegate_) {
913
1
      shutdown();
914
1
    }
915
41
  }
916

            
917
  // TODO (nezdolik): Fully deprecate this runtime key in the next release.
918
10653
  if (runtime().snapshot().get(Runtime::Keys::GlobalMaxCxRuntimeKey)) {
919
8
    if (!options_.skipDeprecatedLogs()) {
920
7
      ENVOY_LOG(warn,
921
7
                "Usage of the deprecated runtime key {}, consider switching to "
922
7
                "`envoy.resource_monitors.global_downstream_max_connections` instead."
923
7
                "This runtime key will be removed in future.",
924
7
                Runtime::Keys::GlobalMaxCxRuntimeKey);
925
7
    }
926
8
  }
927
10653
}
928

            
929
10483
void InstanceBase::startWorkers() {
930
  // The callback will be called after workers are started.
931
10483
  THROW_IF_NOT_OK(
932
10483
      listener_manager_->startWorkers(makeOptRefFromPtr(worker_guard_dog_.get()), [this]() {
933
10483
        if (isShutdown()) {
934
10483
          return;
935
10483
        }
936

            
937
10483
        initialization_timer_->complete();
938
        // Update server stats as soon as initialization is done.
939
10483
        updateServerStats();
940
10483
        workers_started_ = true;
941
10483
        hooks_.onWorkersStarted();
942
        // At this point we are ready to take traffic and all listening ports are up. Notify our
943
        // parent if applicable that they can stop listening and drain.
944
10483
        restarter_.drainParentListeners();
945
10483
        drain_manager_->startParentShutdownSequence();
946
10483
      }));
947
10483
}
948

            
949
Runtime::LoaderPtr InstanceUtil::createRuntime(Instance& server,
950
10718
                                               Server::Configuration::Initial& config) {
951
10718
#ifdef ENVOY_ENABLE_YAML
952
10718
  ENVOY_LOG(info, "runtime: {}", MessageUtil::getYamlStringFromMessage(config.runtime()));
953
10718
#endif
954
10718
  absl::StatusOr<std::unique_ptr<Runtime::LoaderImpl>> loader = Runtime::LoaderImpl::create(
955
10718
      server.dispatcher(), server.threadLocal(), config.runtime(), server.localInfo(),
956
10718
      server.stats(), server.api().randomGenerator(),
957
10718
      server.messageValidationContext().dynamicValidationVisitor(), server.api());
958
10718
  THROW_IF_NOT_OK_REF(loader.status());
959
10716
  return std::move(loader.value());
960
10718
}
961

            
962
10671
void InstanceBase::loadServerFlags(const absl::optional<std::string>& flags_path) {
963
10671
  if (!flags_path) {
964
10671
    return;
965
10671
  }
966

            
967
  ENVOY_LOG(info, "server flags path: {}", flags_path.value());
968
  if (api_->fileSystem().fileExists(flags_path.value() + "/drain")) {
969
    ENVOY_LOG(info, "starting server in drain mode");
970
    InstanceBase::failHealthcheck(true);
971
  }
972
}
973

            
974
RunHelper::RunHelper(Instance& instance, const Options& options, Event::Dispatcher& dispatcher,
975
                     Config::XdsManager& xds_manager, Upstream::ClusterManager& cm,
976
                     AccessLog::AccessLogManager& access_log_manager, Init::Manager& init_manager,
977
                     OverloadManager& overload_manager, OverloadManager& null_overload_manager,
978
                     std::function<void()> post_init_cb)
979
10608
    : init_watcher_("RunHelper", [&instance, post_init_cb]() {
980
10485
        if (!instance.isShutdown()) {
981
10484
          post_init_cb();
982
10484
        }
983
10608
      }) {
984
  // Setup signals.
985
  // Since signals are not supported on Windows we have an internal definition for `SIGTERM`
986
  // On POSIX it resolves as expected to SIGTERM
987
  // On Windows we use it internally for all the console events that indicate that we should
988
  // terminate the process.
989
10608
  if (options.signalHandlingEnabled()) {
990
10608
    sigterm_ = dispatcher.listenForSignal(ENVOY_SIGTERM, [&instance]() {
991
6
      ENVOY_LOG(warn, "caught ENVOY_SIGTERM");
992
6
      instance.shutdown();
993
6
    });
994
10608
#ifndef WIN32
995
10608
    sigint_ = dispatcher.listenForSignal(SIGINT, [&instance]() {
996
1
      ENVOY_LOG(warn, "caught SIGINT");
997
1
      instance.shutdown();
998
1
    });
999

            
10608
    sig_usr_1_ = dispatcher.listenForSignal(SIGUSR1, [&access_log_manager]() {
      ENVOY_LOG(info, "caught SIGUSR1. Reopening access logs.");
      access_log_manager.reopen();
    });
10608
    sig_hup_ = dispatcher.listenForSignal(SIGHUP, []() {
      ENVOY_LOG(warn, "caught and eating SIGHUP. See documentation for how to hot restart.");
    });
10608
#endif
10608
  }
  // Start overload manager before workers.
10608
  overload_manager.start();
10608
  null_overload_manager.start();
  // If there is no global limit to the number of active connections, warn on startup.
10608
  if (!overload_manager.getThreadLocalOverloadState().isResourceMonitorEnabled(
10608
          Server::OverloadProactiveResourceName::GlobalDownstreamMaxConnections)) {
10604
    ENVOY_LOG(
10604
        warn,
10604
        "There is no configured limit to the number of allowed active downstream "
10604
        "connections. Configure a "
10604
        "limit in `envoy.resource_monitors.global_downstream_max_connections` resource monitor.");
10604
  }
  // Register for cluster manager init notification. We don't start serving worker traffic until
  // upstream clusters are initialized which may involve running the event loop. Note however that
  // this can fire immediately if all clusters have already initialized. Also note that we need
  // to guard against shutdown at two different levels since SIGTERM can come in once the run loop
  // starts.
10608
  cm.setInitializedCb([&instance, &init_manager, &xds_manager, this]() {
10576
    if (instance.isShutdown()) {
2
      return;
2
    }
10574
    const auto type_url = Config::getTypeUrl<envoy::config::route::v3::RouteConfiguration>();
    // Pause RDS to ensure that we don't send any requests until we've
    // subscribed to all the RDS resources. The subscriptions happen in the init callbacks,
    // so we pause RDS until we've completed all the callbacks.
10574
    Config::ScopedResume resume_rds = xds_manager.pause(type_url);
10574
    ENVOY_LOG(info, "all clusters initialized. initializing init manager");
10574
    init_manager.initialize(init_watcher_);
    // Now that we're execute all the init callbacks we can resume RDS
    // as we've subscribed to all the statically defined RDS resources.
    // This is done by tearing down the maybe_resume_rds Cleanup object.
10574
  });
10608
}
10605
void InstanceBase::run() {
  // RunHelper exists primarily to facilitate testing of how we respond to early shutdown during
  // startup (see RunHelperTest in server_test.cc).
10605
  const auto run_helper =
10605
      RunHelper(*this, options_, *dispatcher_, xdsManager(), clusterManager(), access_log_manager_,
10605
                init_manager_, overloadManager(), nullOverloadManager(), [this] {
10483
                  notifyCallbacksForStage(Stage::PostInit);
10483
                  startWorkers();
10483
                });
  // Run the main dispatch loop waiting to exit.
10605
  ENVOY_LOG(info, "starting main dispatch loop");
10605
  WatchDogSharedPtr watchdog;
10605
  if (main_thread_guard_dog_) {
10605
    watchdog = main_thread_guard_dog_->createWatchDog(api_->threadFactory().currentThreadId(),
10605
                                                      "main_thread", *dispatcher_);
10605
  }
10605
  main_dispatch_loop_started_.store(true);
10605
  dispatcher_->post([this] { notifyCallbacksForStage(Stage::Startup); });
10605
  dispatcher_->run(Event::Dispatcher::RunType::Block);
10605
  ENVOY_LOG(info, "main dispatch loop exited");
10605
  if (main_thread_guard_dog_) {
10605
    main_thread_guard_dog_->stopWatching(watchdog);
10605
  }
10605
  watchdog.reset();
10605
  terminate();
10605
}
21322
void InstanceBase::terminate() {
21322
  if (terminated_) {
10631
    return;
10631
  }
10691
  terminated_ = true;
  // Before starting to shutdown anything else, stop slot destruction updates.
10691
  thread_local_.shutdownGlobalThreading();
  // Before the workers start exiting we should disable stat threading.
10691
  stats_store_.shutdownThreading();
  // TODO: figure out the correct fix: https://github.com/envoyproxy/envoy/issues/15072.
10691
  std::vector<std::string> muxes = {
10691
      "envoy.config_mux.new_grpc_mux_factory", "envoy.config_mux.grpc_mux_factory",
10691
      "envoy.config_mux.delta_grpc_mux_factory", "envoy.config_mux.sotw_grpc_mux_factory"};
42764
  for (const auto& name : muxes) {
42764
    auto* factory = Config::Utility::getFactoryByName<Config::MuxFactory>(name);
42764
    if (factory) {
42608
      factory->shutdownAll();
42608
    }
42764
  }
10691
  if (overload_manager_) {
10671
    overload_manager_->stop();
10671
  }
  // Shutdown all the workers now that the main dispatch loop is done.
10691
  if (listener_manager_ != nullptr) {
10669
    listener_manager_->stopWorkers();
10669
  }
  // Only flush if we have not been hot restarted.
10691
  if (stat_flush_timer_) {
10662
    flushStats();
10662
  }
10691
  if (config_.clusterManager() != nullptr) {
10667
    config_.clusterManager()->shutdown();
10667
  }
10691
  handler_.reset();
10691
  thread_local_.shutdownThread();
10691
  restarter_.shutdown();
10691
  ENVOY_LOG(info, "exiting");
10691
  ENVOY_FLUSH_LOG();
10691
  FatalErrorHandler::clearFatalActionsOnTerminate();
10691
}
260555
Runtime::Loader& InstanceBase::runtime() { return *runtime_; }
10608
void InstanceBase::shutdown() {
10608
  ENVOY_LOG(info, "shutting down server instance");
10608
  shutdown_ = true;
10608
  restarter_.sendParentTerminateRequest();
10608
  notifyCallbacksForStage(Stage::ShutdownExit, [this] { dispatcher_->exit(); });
10608
}
void InstanceBase::shutdownAdmin() {
  ENVOY_LOG(warn, "shutting down admin due to child startup");
  stat_flush_timer_.reset();
  handler_->stopListeners();
  if (admin_) {
    admin_->closeSocket();
  }
  // If we still have a parent, it should be terminated now that we have a child.
  ENVOY_LOG(warn, "terminating parent process");
  restarter_.sendParentTerminateRequest();
}
ServerLifecycleNotifier::HandlePtr InstanceBase::registerCallback(Stage stage,
25
                                                                  StageCallback callback) {
25
  auto& callbacks = stage_callbacks_[stage];
25
  return std::make_unique<LifecycleCallbackHandle<StageCallback>>(callbacks, callback);
25
}
ServerLifecycleNotifier::HandlePtr
26
InstanceBase::registerCallback(Stage stage, StageCallbackWithCompletion callback) {
26
  ASSERT(stage == Stage::ShutdownExit);
26
  auto& callbacks = stage_completable_callbacks_[stage];
26
  return std::make_unique<LifecycleCallbackHandle<StageCallbackWithCompletion>>(callbacks,
26
                                                                                callback);
26
}
31696
void InstanceBase::notifyCallbacksForStage(Stage stage, std::function<void()> completion_cb) {
31696
  ASSERT_IS_MAIN_OR_TEST_THREAD();
31696
  const auto stage_it = stage_callbacks_.find(stage);
31696
  if (stage_it != stage_callbacks_.end()) {
24
    LifecycleNotifierCallbacks& callbacks = stage_it->second;
48
    for (auto callback_it = callbacks.begin(); callback_it != callbacks.end();) {
24
      StageCallback callback = *callback_it;
      // Increment the iterator before invoking the callback in case the
      // callback deletes the handle which will unregister itself and
      // invalidate this iterator if we're still pointing at it.
24
      ++callback_it;
24
      callback();
24
    }
24
  }
  // Wrap completion_cb so that it only gets invoked when all callbacks for this stage
  // have finished their work.
31696
  std::shared_ptr<void> cb_guard(
31696
      new Cleanup([this, completion_cb]() { dispatcher_->post(completion_cb); }));
  // Registrations which take a completion callback are typically implemented by executing a
  // callback on all worker threads using Slot::runOnAllThreads which will hang indefinitely if
  // worker threads have not been started so we need to skip notifications if envoy is shutdown
  // early before workers have started.
31696
  if (workers_started_) {
19938
    const auto it2 = stage_completable_callbacks_.find(stage);
19938
    if (it2 != stage_completable_callbacks_.end()) {
23
      ENVOY_LOG(info, "Notifying {} callback(s) with completion.", it2->second.size());
23
      for (const StageCallbackWithCompletion& callback : it2->second) {
17
        callback([cb_guard] {});
17
      }
23
    }
19938
  }
31696
}
55
ProtobufTypes::MessagePtr InstanceBase::dumpBootstrapConfig() {
55
  auto config_dump = std::make_unique<envoy::admin::v3::BootstrapConfigDump>();
55
  config_dump->mutable_bootstrap()->MergeFrom(bootstrap_);
55
  TimestampUtil::systemClockToTimestamp(bootstrap_config_update_time_,
55
                                        *(config_dump->mutable_last_updated()));
55
  return config_dump;
55
}
89
Network::DnsResolverSharedPtr InstanceBase::getOrCreateDnsResolver() {
89
  if (!dns_resolver_) {
83
    envoy::config::core::v3::TypedExtensionConfig typed_dns_resolver_config;
83
    Network::DnsResolverFactory& dns_resolver_factory =
83
        Network::createDnsResolverFactoryFromProto(bootstrap_, typed_dns_resolver_config);
83
    dns_resolver_ = THROW_OR_RETURN_VALUE(
83
        dns_resolver_factory.createDnsResolver(dispatcher(), api(), typed_dns_resolver_config),
83
        Network::DnsResolverSharedPtr);
83
  }
89
  return dns_resolver_;
89
}
10323
bool InstanceBase::enableReusePortDefault() { return enable_reuse_port_default_; }
} // namespace Server
} // namespace Envoy