/proc/self/cwd/source/server/server.cc
Line | Count | Source (jump to first uncovered line) |
1 | | #include "source/server/server.h" |
2 | | |
3 | | #include <csignal> |
4 | | #include <cstdint> |
5 | | #include <ctime> |
6 | | #include <functional> |
7 | | #include <memory> |
8 | | #include <string> |
9 | | |
10 | | #include "envoy/admin/v3/config_dump.pb.h" |
11 | | #include "envoy/common/exception.h" |
12 | | #include "envoy/common/time.h" |
13 | | #include "envoy/config/bootstrap/v3/bootstrap.pb.h" |
14 | | #include "envoy/config/bootstrap/v3/bootstrap.pb.validate.h" |
15 | | #include "envoy/event/dispatcher.h" |
16 | | #include "envoy/event/signal.h" |
17 | | #include "envoy/event/timer.h" |
18 | | #include "envoy/network/dns.h" |
19 | | #include "envoy/registry/registry.h" |
20 | | #include "envoy/server/bootstrap_extension_config.h" |
21 | | #include "envoy/server/instance.h" |
22 | | #include "envoy/server/options.h" |
23 | | #include "envoy/stats/histogram.h" |
24 | | #include "envoy/stats/stats.h" |
25 | | #include "envoy/upstream/cluster_manager.h" |
26 | | |
27 | | #include "source/common/api/api_impl.h" |
28 | | #include "source/common/api/os_sys_calls_impl.h" |
29 | | #include "source/common/common/enum_to_int.h" |
30 | | #include "source/common/common/mutex_tracer_impl.h" |
31 | | #include "source/common/common/utility.h" |
32 | | #include "source/common/config/utility.h" |
33 | | #include "source/common/config/well_known_names.h" |
34 | | #include "source/common/config/xds_resource.h" |
35 | | #include "source/common/http/codes.h" |
36 | | #include "source/common/http/headers.h" |
37 | | #include "source/common/local_info/local_info_impl.h" |
38 | | #include "source/common/memory/stats.h" |
39 | | #include "source/common/network/address_impl.h" |
40 | | #include "source/common/network/dns_resolver/dns_factory_util.h" |
41 | | #include "source/common/network/socket_interface.h" |
42 | | #include "source/common/network/socket_interface_impl.h" |
43 | | #include "source/common/network/tcp_listener_impl.h" |
44 | | #include "source/common/protobuf/utility.h" |
45 | | #include "source/common/router/rds_impl.h" |
46 | | #include "source/common/runtime/runtime_impl.h" |
47 | | #include "source/common/signal/fatal_error_handler.h" |
48 | | #include "source/common/singleton/manager_impl.h" |
49 | | #include "source/common/stats/thread_local_store.h" |
50 | | #include "source/common/stats/timespan_impl.h" |
51 | | #include "source/common/upstream/cluster_manager_impl.h" |
52 | | #include "source/common/version/version.h" |
53 | | #include "source/server/configuration_impl.h" |
54 | | #include "source/server/guarddog_impl.h" |
55 | | #include "source/server/listener_hooks.h" |
56 | | #include "source/server/listener_manager_factory.h" |
57 | | #include "source/server/regex_engine.h" |
58 | | #include "source/server/ssl_context_manager.h" |
59 | | #include "source/server/utils.h" |
60 | | |
61 | | namespace Envoy { |
62 | | namespace Server { |
63 | | namespace { |
64 | 5.48k | std::unique_ptr<ConnectionHandler> getHandler(Event::Dispatcher& dispatcher) { |
65 | | |
66 | 5.48k | auto* factory = Config::Utility::getFactoryByName<ConnectionHandlerFactory>( |
67 | 5.48k | "envoy.connection_handler.default"); |
68 | 5.48k | if (factory) { |
69 | 5.48k | return factory->createConnectionHandler(dispatcher, absl::nullopt); |
70 | 5.48k | } |
71 | 0 | ENVOY_LOG_MISC(debug, "Unable to find envoy.connection_handler.default factory"); |
72 | 0 | return nullptr; |
73 | 5.48k | } |
74 | | |
75 | | } // namespace |
76 | | |
77 | | InstanceBase::InstanceBase(Init::Manager& init_manager, const Options& options, |
78 | | Event::TimeSystem& time_system, ListenerHooks& hooks, |
79 | | HotRestart& restarter, Stats::StoreRoot& store, |
80 | | Thread::BasicLockable& access_log_lock, |
81 | | Random::RandomGeneratorPtr&& random_generator, |
82 | | ThreadLocal::Instance& tls, Thread::ThreadFactory& thread_factory, |
83 | | Filesystem::Instance& file_system, |
84 | | std::unique_ptr<ProcessContext> process_context, |
85 | | Buffer::WatermarkFactorySharedPtr watermark_factory) |
86 | | : init_manager_(init_manager), live_(false), options_(options), |
87 | | validation_context_(options_.allowUnknownStaticFields(), |
88 | | !options.rejectUnknownDynamicFields(), |
89 | | options.ignoreUnknownDynamicFields()), |
90 | | time_source_(time_system), restarter_(restarter), start_time_(time(nullptr)), |
91 | | original_start_time_(start_time_), stats_store_(store), thread_local_(tls), |
92 | | random_generator_(std::move(random_generator)), |
93 | | api_(new Api::Impl( |
94 | | thread_factory, store, time_system, file_system, *random_generator_, bootstrap_, |
95 | | process_context ? ProcessContextOptRef(std::ref(*process_context)) : absl::nullopt, |
96 | | watermark_factory)), |
97 | | dispatcher_(api_->allocateDispatcher("main_thread")), |
98 | | access_log_manager_(options.fileFlushIntervalMsec(), *api_, *dispatcher_, access_log_lock, |
99 | | store), |
100 | | singleton_manager_(new Singleton::ManagerImpl(api_->threadFactory())), |
101 | | handler_(getHandler(*dispatcher_)), worker_factory_(thread_local_, *api_, hooks), |
102 | | mutex_tracer_(options.mutexTracingEnabled() ? &Envoy::MutexTracerImpl::getOrCreateTracer() |
103 | | : nullptr), |
104 | | grpc_context_(store.symbolTable()), http_context_(store.symbolTable()), |
105 | | router_context_(store.symbolTable()), process_context_(std::move(process_context)), |
106 | | hooks_(hooks), quic_stat_names_(store.symbolTable()), server_contexts_(*this), |
107 | 5.48k | enable_reuse_port_default_(true), stats_flush_in_progress_(false) {} |
108 | | |
109 | 5.48k | InstanceBase::~InstanceBase() { |
110 | 5.48k | terminate(); |
111 | | |
112 | | // Stop logging to file before all the AccessLogManager and its dependencies are |
113 | | // destructed to avoid crashing at shutdown. |
114 | 5.48k | file_logger_.reset(); |
115 | | |
116 | | // Destruct the ListenerManager explicitly, before InstanceBase's local init_manager_ is |
117 | | // destructed. |
118 | | // |
119 | | // The ListenerManager's DestinationPortsMap contains FilterChainSharedPtrs. There is a rare race |
120 | | // condition where one of these FilterChains contains an HttpConnectionManager, which contains an |
121 | | // RdsRouteConfigProvider, which contains an RdsRouteConfigSubscriptionSharedPtr. Since |
122 | | // RdsRouteConfigSubscription is an Init::Target, ~RdsRouteConfigSubscription triggers a callback |
123 | | // set at initialization, which goes to unregister it from the top-level InitManager, which has |
124 | | // already been destructed (use-after-free) causing a segfault. |
125 | 5.48k | ENVOY_LOG(debug, "destroying listener manager"); |
126 | 5.48k | listener_manager_.reset(); |
127 | 5.48k | ENVOY_LOG(debug, "destroyed listener manager"); |
128 | 5.48k | dispatcher_->shutdown(); |
129 | | |
130 | | #ifdef ENVOY_PERFETTO |
131 | | if (tracing_session_ != nullptr) { |
132 | | // Flush the trace data. |
133 | | perfetto::TrackEvent::Flush(); |
134 | | // Disable tracing and block until tracing has stopped. |
135 | | tracing_session_->StopBlocking(); |
136 | | close(tracing_fd_); |
137 | | } |
138 | | #endif |
139 | 5.48k | } |
140 | | |
141 | 44.5k | Upstream::ClusterManager& InstanceBase::clusterManager() { |
142 | 44.5k | ASSERT(config_.clusterManager() != nullptr); |
143 | 44.5k | return *config_.clusterManager(); |
144 | 44.5k | } |
145 | | |
146 | 0 | const Upstream::ClusterManager& InstanceBase::clusterManager() const { |
147 | 0 | ASSERT(config_.clusterManager() != nullptr); |
148 | 0 | return *config_.clusterManager(); |
149 | 0 | } |
150 | | |
151 | 0 | void InstanceBase::drainListeners(OptRef<const Network::ExtraShutdownListenerOptions> options) { |
152 | 0 | ENVOY_LOG(info, "closing and draining listeners"); |
153 | 0 | listener_manager_->stopListeners(ListenerManager::StopListenersType::All, |
154 | 0 | options.has_value() ? *options |
155 | 0 | : Network::ExtraShutdownListenerOptions{}); |
156 | 0 | drain_manager_->startDrainSequence([] {}); |
157 | 0 | } |
158 | | |
159 | 5.35k | void InstanceBase::failHealthcheck(bool fail) { |
160 | 5.35k | live_.store(!fail); |
161 | 5.35k | server_stats_->live_.set(live_.load()); |
162 | 5.35k | } |
163 | | |
164 | | MetricSnapshotImpl::MetricSnapshotImpl(Stats::Store& store, |
165 | | Upstream::ClusterManager& cluster_manager, |
166 | 3.78k | TimeSource& time_source) { |
167 | 3.78k | store.forEachSinkedCounter( |
168 | 3.78k | [this](std::size_t size) { |
169 | 3.78k | snapped_counters_.reserve(size); |
170 | 3.78k | counters_.reserve(size); |
171 | 3.78k | }, |
172 | 970k | [this](Stats::Counter& counter) { |
173 | 970k | snapped_counters_.push_back(Stats::CounterSharedPtr(&counter)); |
174 | 970k | counters_.push_back({counter.latch(), counter}); |
175 | 970k | }); |
176 | | |
177 | 3.78k | store.forEachSinkedGauge( |
178 | 3.78k | [this](std::size_t size) { |
179 | 3.78k | snapped_gauges_.reserve(size); |
180 | 3.78k | gauges_.reserve(size); |
181 | 3.78k | }, |
182 | 274k | [this](Stats::Gauge& gauge) { |
183 | 274k | snapped_gauges_.push_back(Stats::GaugeSharedPtr(&gauge)); |
184 | 274k | gauges_.push_back(gauge); |
185 | 274k | }); |
186 | | |
187 | 3.78k | store.forEachSinkedHistogram( |
188 | 3.78k | [this](std::size_t size) { |
189 | 2.66k | snapped_histograms_.reserve(size); |
190 | 2.66k | histograms_.reserve(size); |
191 | 2.66k | }, |
192 | 3.78k | [this](Stats::ParentHistogram& histogram) { |
193 | 221 | snapped_histograms_.push_back(Stats::ParentHistogramSharedPtr(&histogram)); |
194 | 221 | histograms_.push_back(histogram); |
195 | 221 | }); |
196 | | |
197 | 3.78k | store.forEachSinkedTextReadout( |
198 | 3.78k | [this](std::size_t size) { |
199 | 3.78k | snapped_text_readouts_.reserve(size); |
200 | 3.78k | text_readouts_.reserve(size); |
201 | 3.78k | }, |
202 | 3.78k | [this](Stats::TextReadout& text_readout) { |
203 | 3.16k | snapped_text_readouts_.push_back(Stats::TextReadoutSharedPtr(&text_readout)); |
204 | 3.16k | text_readouts_.push_back(text_readout); |
205 | 3.16k | }); |
206 | | |
207 | 3.78k | Upstream::HostUtility::forEachHostMetric( |
208 | 3.78k | cluster_manager, |
209 | 3.78k | [this](Stats::PrimitiveCounterSnapshot&& metric) { |
210 | 0 | host_counters_.emplace_back(std::move(metric)); |
211 | 0 | }, |
212 | 3.78k | [this](Stats::PrimitiveGaugeSnapshot&& metric) { |
213 | 0 | host_gauges_.emplace_back(std::move(metric)); |
214 | 0 | }); |
215 | | |
216 | 3.78k | snapshot_time_ = time_source.systemTime(); |
217 | 3.78k | } |
218 | | |
219 | | void InstanceUtil::flushMetricsToSinks(const std::list<Stats::SinkPtr>& sinks, Stats::Store& store, |
220 | 3.78k | Upstream::ClusterManager& cm, TimeSource& time_source) { |
221 | | // Create a snapshot and flush to all sinks. |
222 | | // NOTE: Even if there are no sinks, creating the snapshot has the important property that it |
223 | | // latches all counters on a periodic basis. The hot restart code assumes this is being |
224 | | // done so this should not be removed. |
225 | 3.78k | MetricSnapshotImpl snapshot(store, cm, time_source); |
226 | 3.78k | for (const auto& sink : sinks) { |
227 | 0 | sink->flush(snapshot); |
228 | 0 | } |
229 | 3.78k | } |
230 | | |
231 | 3.78k | void InstanceBase::flushStats() { |
232 | 3.78k | if (stats_flush_in_progress_) { |
233 | 0 | ENVOY_LOG(debug, "skipping stats flush as flush is already in progress"); |
234 | 0 | server_stats_->dropped_stat_flushes_.inc(); |
235 | 0 | return; |
236 | 0 | } |
237 | | |
238 | 3.78k | stats_flush_in_progress_ = true; |
239 | 3.78k | ENVOY_LOG(debug, "flushing stats"); |
240 | | // If Envoy is not fully initialized, workers will not be started and mergeHistograms |
241 | | // completion callback is not called immediately. As a result of this server stats will |
242 | | // not be updated and flushed to stat sinks. So skip mergeHistograms call if workers are |
243 | | // not started yet. |
244 | 3.78k | if (initManager().state() == Init::Manager::State::Initialized) { |
245 | | // A shutdown initiated before this callback may prevent this from being called as per |
246 | | // the semantics documented in ThreadLocal's runOnAllThreads method. |
247 | 2.66k | stats_store_.mergeHistograms([this]() -> void { flushStatsInternal(); }); |
248 | 2.66k | } else { |
249 | 1.11k | ENVOY_LOG(debug, "Envoy is not fully initialized, skipping histogram merge and flushing stats"); |
250 | 1.11k | flushStatsInternal(); |
251 | 1.11k | } |
252 | 3.78k | } |
253 | | |
254 | 6.43k | void InstanceBase::updateServerStats() { |
255 | | // mergeParentStatsIfAny() does nothing and returns a struct of 0s if there is no parent. |
256 | 6.43k | HotRestart::ServerStatsFromParent parent_stats = restarter_.mergeParentStatsIfAny(stats_store_); |
257 | | |
258 | 6.43k | server_stats_->uptime_.set(time(nullptr) - original_start_time_); |
259 | 6.43k | server_stats_->memory_allocated_.set(Memory::Stats::totalCurrentlyAllocated() + |
260 | 6.43k | parent_stats.parent_memory_allocated_); |
261 | 6.43k | server_stats_->memory_heap_size_.set(Memory::Stats::totalCurrentlyReserved()); |
262 | 6.43k | server_stats_->memory_physical_size_.set(Memory::Stats::totalPhysicalBytes()); |
263 | 6.43k | server_stats_->parent_connections_.set(parent_stats.parent_connections_); |
264 | 6.43k | server_stats_->total_connections_.set(listener_manager_->numConnections() + |
265 | 6.43k | parent_stats.parent_connections_); |
266 | 6.43k | server_stats_->days_until_first_cert_expiring_.set( |
267 | 6.43k | sslContextManager().daysUntilFirstCertExpires().value_or(0)); |
268 | | |
269 | 6.43k | auto secs_until_ocsp_response_expires = |
270 | 6.43k | sslContextManager().secondsUntilFirstOcspResponseExpires(); |
271 | 6.43k | if (secs_until_ocsp_response_expires) { |
272 | 0 | server_stats_->seconds_until_first_ocsp_response_expiring_.set( |
273 | 0 | secs_until_ocsp_response_expires.value()); |
274 | 0 | } |
275 | 6.43k | server_stats_->state_.set( |
276 | 6.43k | enumToInt(Utility::serverState(initManager().state(), healthCheckFailed()))); |
277 | 6.43k | server_stats_->stats_recent_lookups_.set( |
278 | 6.43k | stats_store_.symbolTable().getRecentLookups([](absl::string_view, uint64_t) {})); |
279 | 6.43k | } |
280 | | |
281 | 3.78k | void InstanceBase::flushStatsInternal() { |
282 | 3.78k | updateServerStats(); |
283 | 3.78k | auto& stats_config = config_.statsConfig(); |
284 | 3.78k | InstanceUtil::flushMetricsToSinks(stats_config.sinks(), stats_store_, clusterManager(), |
285 | 3.78k | timeSource()); |
286 | | // TODO(ramaraochavali): consider adding different flush interval for histograms. |
287 | 3.78k | if (stat_flush_timer_ != nullptr) { |
288 | 3.78k | stat_flush_timer_->enableTimer(stats_config.flushInterval()); |
289 | 3.78k | } |
290 | | |
291 | 3.78k | stats_flush_in_progress_ = false; |
292 | 3.78k | } |
293 | | |
294 | 8.06k | bool InstanceBase::healthCheckFailed() { return !live_.load(); } |
295 | | |
296 | 19 | ProcessContextOptRef InstanceBase::processContext() { |
297 | 19 | if (process_context_ == nullptr) { |
298 | 19 | return absl::nullopt; |
299 | 19 | } |
300 | | |
301 | 0 | return *process_context_; |
302 | 19 | } |
303 | | |
304 | | namespace { |
305 | | |
306 | 0 | bool canBeRegisteredAsInlineHeader(const Http::LowerCaseString& header_name) { |
307 | | // 'set-cookie' cannot currently be registered as an inline header. |
308 | 0 | if (header_name == Http::Headers::get().SetCookie) { |
309 | 0 | return false; |
310 | 0 | } |
311 | 0 | return true; |
312 | 0 | } |
313 | | |
314 | | void registerCustomInlineHeadersFromBootstrap( |
315 | 5.48k | const envoy::config::bootstrap::v3::Bootstrap& bootstrap) { |
316 | 5.48k | for (const auto& inline_header : bootstrap.inline_headers()) { |
317 | 0 | const Http::LowerCaseString lower_case_name(inline_header.inline_header_name()); |
318 | 0 | if (!canBeRegisteredAsInlineHeader(lower_case_name)) { |
319 | 0 | throwEnvoyExceptionOrPanic(fmt::format("Header {} cannot be registered as an inline header.", |
320 | 0 | inline_header.inline_header_name())); |
321 | 0 | } |
322 | 0 | switch (inline_header.inline_header_type()) { |
323 | 0 | case envoy::config::bootstrap::v3::CustomInlineHeader::REQUEST_HEADER: |
324 | 0 | Http::CustomInlineHeaderRegistry::registerInlineHeader< |
325 | 0 | Http::RequestHeaderMap::header_map_type>(lower_case_name); |
326 | 0 | break; |
327 | 0 | case envoy::config::bootstrap::v3::CustomInlineHeader::REQUEST_TRAILER: |
328 | 0 | Http::CustomInlineHeaderRegistry::registerInlineHeader< |
329 | 0 | Http::RequestTrailerMap::header_map_type>(lower_case_name); |
330 | 0 | break; |
331 | 0 | case envoy::config::bootstrap::v3::CustomInlineHeader::RESPONSE_HEADER: |
332 | 0 | Http::CustomInlineHeaderRegistry::registerInlineHeader< |
333 | 0 | Http::ResponseHeaderMap::header_map_type>(lower_case_name); |
334 | 0 | break; |
335 | 0 | case envoy::config::bootstrap::v3::CustomInlineHeader::RESPONSE_TRAILER: |
336 | 0 | Http::CustomInlineHeaderRegistry::registerInlineHeader< |
337 | 0 | Http::ResponseTrailerMap::header_map_type>(lower_case_name); |
338 | 0 | break; |
339 | 0 | default: |
340 | 0 | PANIC("not implemented"); |
341 | 0 | } |
342 | 0 | } |
343 | 5.48k | } |
344 | | |
345 | | } // namespace |
346 | | |
347 | | void InstanceUtil::loadBootstrapConfig(envoy::config::bootstrap::v3::Bootstrap& bootstrap, |
348 | | const Options& options, |
349 | | ProtobufMessage::ValidationVisitor& validation_visitor, |
350 | 12.1k | Api::Api& api) { |
351 | 12.1k | const std::string& config_path = options.configPath(); |
352 | 12.1k | const std::string& config_yaml = options.configYaml(); |
353 | 12.1k | const envoy::config::bootstrap::v3::Bootstrap& config_proto = options.configProto(); |
354 | | |
355 | | // One of config_path and config_yaml or bootstrap should be specified. |
356 | 12.1k | if (config_path.empty() && config_yaml.empty() && config_proto.ByteSizeLong() == 0) { |
357 | 0 | throwEnvoyExceptionOrPanic( |
358 | 0 | "At least one of --config-path or --config-yaml or Options::configProto() " |
359 | 0 | "should be non-empty"); |
360 | 0 | } |
361 | | |
362 | 12.1k | if (!config_path.empty()) { |
363 | 12.1k | #ifdef ENVOY_ENABLE_YAML |
364 | 12.1k | MessageUtil::loadFromFile(config_path, bootstrap, validation_visitor, api); |
365 | | #else |
366 | | if (!config_path.empty()) { |
367 | | throwEnvoyExceptionOrPanic("Cannot load from file with YAML disabled\n"); |
368 | | } |
369 | | UNREFERENCED_PARAMETER(api); |
370 | | #endif |
371 | 12.1k | } |
372 | 12.1k | if (!config_yaml.empty()) { |
373 | 0 | #ifdef ENVOY_ENABLE_YAML |
374 | 0 | envoy::config::bootstrap::v3::Bootstrap bootstrap_override; |
375 | 0 | MessageUtil::loadFromYaml(config_yaml, bootstrap_override, validation_visitor); |
376 | | // TODO(snowp): The fact that we do a merge here doesn't seem to be covered under test. |
377 | 0 | bootstrap.MergeFrom(bootstrap_override); |
378 | | #else |
379 | | throwEnvoyExceptionOrPanic("Cannot load from YAML with YAML disabled\n"); |
380 | | #endif |
381 | 0 | } |
382 | 12.1k | if (config_proto.ByteSizeLong() != 0) { |
383 | 0 | bootstrap.MergeFrom(config_proto); |
384 | 0 | } |
385 | 12.1k | MessageUtil::validate(bootstrap, validation_visitor); |
386 | 12.1k | } |
387 | | |
388 | | void InstanceBase::initialize(Network::Address::InstanceConstSharedPtr local_address, |
389 | 5.48k | ComponentFactory& component_factory) { |
390 | 5.48k | std::function set_up_logger = [&] { |
391 | 0 | TRY_ASSERT_MAIN_THREAD { |
392 | 0 | file_logger_ = std::make_unique<Logger::FileSinkDelegate>( |
393 | 0 | options_.logPath(), access_log_manager_, Logger::Registry::getSink()); |
394 | 0 | } |
395 | 0 | END_TRY |
396 | 0 | CATCH(const EnvoyException& e, { |
397 | 0 | throw EnvoyException( |
398 | 0 | fmt::format("Failed to open log-file '{}'. e.what(): {}", options_.logPath(), e.what())); |
399 | 0 | }); |
400 | 0 | }; |
401 | | |
402 | 5.48k | TRY_ASSERT_MAIN_THREAD { |
403 | 5.48k | if (!options_.logPath().empty()) { |
404 | 0 | set_up_logger(); |
405 | 0 | } |
406 | 5.48k | restarter_.initialize(*dispatcher_, *this); |
407 | 5.48k | drain_manager_ = component_factory.createDrainManager(*this); |
408 | 5.48k | initializeOrThrow(std::move(local_address), component_factory); |
409 | 5.48k | } |
410 | 5.48k | END_TRY |
411 | 5.48k | MULTI_CATCH( |
412 | 5.48k | const EnvoyException& e, |
413 | 5.48k | { |
414 | 5.48k | ENVOY_LOG(critical, "error initializing config '{} {} {}': {}", |
415 | 5.48k | options_.configProto().DebugString(), options_.configYaml(), |
416 | 5.48k | options_.configPath(), e.what()); |
417 | 5.48k | terminate(); |
418 | 5.48k | throw; |
419 | 5.48k | }, |
420 | 5.48k | { |
421 | 5.48k | ENVOY_LOG(critical, "error initializing due to unknown exception"); |
422 | 5.48k | terminate(); |
423 | 5.48k | throw; |
424 | 5.48k | }); |
425 | 3.81k | } |
426 | | |
427 | | void InstanceBase::initializeOrThrow(Network::Address::InstanceConstSharedPtr local_address, |
428 | 5.48k | ComponentFactory& component_factory) { |
429 | 5.48k | ENVOY_LOG(info, "initializing epoch {} (base id={}, hot restart version={})", |
430 | 5.48k | options_.restartEpoch(), restarter_.baseId(), restarter_.version()); |
431 | | |
432 | 5.48k | ENVOY_LOG(info, "statically linked extensions:"); |
433 | 319k | for (const auto& ext : Envoy::Registry::FactoryCategoryRegistry::registeredFactories()) { |
434 | 319k | ENVOY_LOG(info, " {}: {}", ext.first, absl::StrJoin(ext.second->registeredNames(), ", ")); |
435 | 319k | } |
436 | | |
437 | | // Handle configuration that needs to take place prior to the main configuration load. |
438 | 5.48k | InstanceUtil::loadBootstrapConfig(bootstrap_, options_, |
439 | 5.48k | messageValidationContext().staticValidationVisitor(), *api_); |
440 | 5.48k | bootstrap_config_update_time_ = time_source_.systemTime(); |
441 | | |
442 | 5.48k | if (bootstrap_.has_application_log_config()) { |
443 | 43 | THROW_IF_NOT_OK( |
444 | 43 | Utility::assertExclusiveLogFormatMethod(options_, bootstrap_.application_log_config())); |
445 | 43 | THROW_IF_NOT_OK(Utility::maybeSetApplicationLogFormat(bootstrap_.application_log_config())); |
446 | 43 | } |
447 | | |
448 | | #ifdef ENVOY_PERFETTO |
449 | | perfetto::TracingInitArgs args; |
450 | | // Include in-process events only. |
451 | | args.backends = perfetto::kInProcessBackend; |
452 | | perfetto::Tracing::Initialize(args); |
453 | | perfetto::TrackEvent::Register(); |
454 | | |
455 | | // Prepare a configuration for a new "Perfetto" tracing session. |
456 | | perfetto::TraceConfig cfg; |
457 | | // TODO(rojkov): make the tracer configurable with either "Perfetto"'s native |
458 | | // message or custom one embedded into Bootstrap. |
459 | | cfg.add_buffers()->set_size_kb(1024); |
460 | | auto* ds_cfg = cfg.add_data_sources()->mutable_config(); |
461 | | ds_cfg->set_name("track_event"); |
462 | | |
463 | | const std::string pftrace_path = |
464 | | PROTOBUF_GET_STRING_OR_DEFAULT(bootstrap_, perf_tracing_file_path, "envoy.pftrace"); |
465 | | // Instantiate a new tracing session. |
466 | | tracing_session_ = perfetto::Tracing::NewTrace(); |
467 | | tracing_fd_ = open(pftrace_path.c_str(), O_RDWR | O_CREAT | O_TRUNC, 0600); |
468 | | if (tracing_fd_ == -1) { |
469 | | throwEnvoyExceptionOrPanic( |
470 | | fmt::format("unable to open tracing file {}: {}", pftrace_path, errorDetails(errno))); |
471 | | } |
472 | | // Configure the tracing session. |
473 | | tracing_session_->Setup(cfg, tracing_fd_); |
474 | | // Enable tracing and block until tracing has started. |
475 | | tracing_session_->StartBlocking(); |
476 | | #endif |
477 | | |
478 | | // Immediate after the bootstrap has been loaded, override the header prefix, if configured to |
479 | | // do so. This must be set before any other code block references the HeaderValues ConstSingleton. |
480 | 5.48k | if (!bootstrap_.header_prefix().empty()) { |
481 | | // setPrefix has a release assert verifying that setPrefix() is not called after prefix() |
482 | 0 | ThreadSafeSingleton<Http::PrefixValue>::get().setPrefix(bootstrap_.header_prefix().c_str()); |
483 | 0 | } |
484 | | |
485 | | // Register Custom O(1) headers from bootstrap. |
486 | 5.48k | registerCustomInlineHeadersFromBootstrap(bootstrap_); |
487 | | |
488 | 5.48k | ENVOY_LOG(info, "HTTP header map info:"); |
489 | 21.9k | for (const auto& info : Http::HeaderMapImplUtility::getAllHeaderMapImplInfo()) { |
490 | 21.9k | ENVOY_LOG(info, " {}: {} bytes: {}", info.name_, info.size_, |
491 | 21.9k | absl::StrJoin(info.registered_headers_, ",")); |
492 | 21.9k | } |
493 | | |
494 | | // Initialize the regex engine and inject to singleton. |
495 | | // Needs to happen before stats store initialization because the stats |
496 | | // matcher config can include regexes. |
497 | 5.48k | regex_engine_ = createRegexEngine( |
498 | 5.48k | bootstrap_, messageValidationContext().staticValidationVisitor(), serverFactoryContext()); |
499 | | |
500 | | // Needs to happen as early as possible in the instantiation to preempt the objects that require |
501 | | // stats. |
502 | 5.48k | stats_store_.setTagProducer(Config::Utility::createTagProducer(bootstrap_, options_.statsTags())); |
503 | 5.48k | stats_store_.setStatsMatcher( |
504 | 5.48k | Config::Utility::createStatsMatcher(bootstrap_, stats_store_.symbolTable())); |
505 | 5.48k | stats_store_.setHistogramSettings(Config::Utility::createHistogramSettings(bootstrap_)); |
506 | | |
507 | 5.48k | const std::string server_stats_prefix = "server."; |
508 | 5.48k | const std::string server_compilation_settings_stats_prefix = "server.compilation_settings"; |
509 | 5.48k | server_stats_ = std::make_unique<ServerStats>( |
510 | 5.48k | ServerStats{ALL_SERVER_STATS(POOL_COUNTER_PREFIX(stats_store_, server_stats_prefix), |
511 | 5.48k | POOL_GAUGE_PREFIX(stats_store_, server_stats_prefix), |
512 | 5.48k | POOL_HISTOGRAM_PREFIX(stats_store_, server_stats_prefix))}); |
513 | 5.48k | server_compilation_settings_stats_ = |
514 | 5.48k | std::make_unique<CompilationSettings::ServerCompilationSettingsStats>( |
515 | 5.48k | CompilationSettings::ServerCompilationSettingsStats{ALL_SERVER_COMPILATION_SETTINGS_STATS( |
516 | 5.48k | POOL_COUNTER_PREFIX(stats_store_, server_compilation_settings_stats_prefix), |
517 | 5.48k | POOL_GAUGE_PREFIX(stats_store_, server_compilation_settings_stats_prefix), |
518 | 5.48k | POOL_HISTOGRAM_PREFIX(stats_store_, server_compilation_settings_stats_prefix))}); |
519 | 5.48k | validation_context_.setCounters(server_stats_->static_unknown_fields_, |
520 | 5.48k | server_stats_->dynamic_unknown_fields_, |
521 | 5.48k | server_stats_->wip_protos_); |
522 | | |
523 | 5.48k | initialization_timer_ = std::make_unique<Stats::HistogramCompletableTimespanImpl>( |
524 | 5.48k | server_stats_->initialization_time_ms_, timeSource()); |
525 | 5.48k | server_stats_->concurrency_.set(options_.concurrency()); |
526 | 5.48k | server_stats_->hot_restart_epoch_.set(options_.restartEpoch()); |
527 | 5.48k | InstanceBase::failHealthcheck(false); |
528 | | |
529 | | // Check if bootstrap has server version override set, if yes, we should use that as |
530 | | // 'server.version' stat. |
531 | 5.48k | uint64_t version_int; |
532 | 5.48k | if (bootstrap_.stats_server_version_override().value() > 0) { |
533 | 75 | version_int = bootstrap_.stats_server_version_override().value(); |
534 | 5.41k | } else { |
535 | 5.41k | if (!StringUtil::atoull(VersionInfo::revision().substr(0, 6).c_str(), version_int, 16)) { |
536 | 0 | throwEnvoyExceptionOrPanic("compiled GIT SHA is invalid. Invalid build."); |
537 | 0 | } |
538 | 5.41k | } |
539 | 5.48k | server_stats_->version_.set(version_int); |
540 | 5.48k | if (VersionInfo::sslFipsCompliant()) { |
541 | 0 | server_compilation_settings_stats_->fips_mode_.set(1); |
542 | 5.48k | } else { |
543 | | // Set this explicitly so that "used" flag is set so that it can be pushed to stats sinks. |
544 | 5.48k | server_compilation_settings_stats_->fips_mode_.set(0); |
545 | 5.48k | } |
546 | | |
547 | | // If user has set user_agent_name in the bootstrap config, use it. |
548 | | // Default to "envoy" if unset. |
549 | 5.48k | if (bootstrap_.node().user_agent_name().empty()) { |
550 | 5.23k | bootstrap_.mutable_node()->set_user_agent_name("envoy"); |
551 | 5.23k | } |
552 | | |
553 | | // If user has set user_agent_build_version in the bootstrap config, use it. |
554 | | // Default to the internal server version. |
555 | 5.48k | if (!bootstrap_.node().user_agent_build_version().has_version()) { |
556 | 5.34k | *bootstrap_.mutable_node()->mutable_user_agent_build_version() = VersionInfo::buildVersion(); |
557 | 5.34k | } |
558 | | |
559 | 308k | for (const auto& ext : Envoy::Registry::FactoryCategoryRegistry::registeredFactories()) { |
560 | 308k | auto registered_types = ext.second->registeredTypes(); |
561 | 1.19M | for (const auto& name : ext.second->allRegisteredNames()) { |
562 | 1.19M | auto* extension = bootstrap_.mutable_node()->add_extensions(); |
563 | 1.19M | extension->set_name(std::string(name)); |
564 | 1.19M | extension->set_category(ext.first); |
565 | 1.19M | auto const version = ext.second->getFactoryVersion(name); |
566 | 1.19M | if (version) { |
567 | 0 | *extension->mutable_version() = version.value(); |
568 | 0 | } |
569 | 1.19M | extension->set_disabled(ext.second->isFactoryDisabled(name)); |
570 | 1.19M | auto it = registered_types.find(name); |
571 | 1.19M | if (it != registered_types.end()) { |
572 | 780k | std::sort(it->second.begin(), it->second.end()); |
573 | 838k | for (const auto& type_url : it->second) { |
574 | 838k | extension->add_type_urls(type_url); |
575 | 838k | } |
576 | 780k | } |
577 | 1.19M | } |
578 | 308k | } |
579 | | |
580 | 5.48k | local_info_ = std::make_unique<LocalInfo::LocalInfoImpl>( |
581 | 5.48k | stats().symbolTable(), bootstrap_.node(), bootstrap_.node_context_params(), local_address, |
582 | 5.48k | options_.serviceZone(), options_.serviceClusterName(), options_.serviceNodeName()); |
583 | | |
584 | 5.48k | Configuration::InitialImpl initial_config(bootstrap_); |
585 | | |
586 | | // Learn original_start_time_ if our parent is still around to inform us of it. |
587 | 5.48k | const auto parent_admin_shutdown_response = restarter_.sendParentAdminShutdownRequest(); |
588 | 5.48k | if (parent_admin_shutdown_response.has_value()) { |
589 | 0 | original_start_time_ = parent_admin_shutdown_response.value().original_start_time_; |
590 | | // TODO(soulxu): This is added for switching the reuse port default value as true (#17259). |
591 | | // It ensures the same default value during the hot restart. This can be removed when |
592 | | // everyone switches to the new default value. |
593 | 0 | enable_reuse_port_default_ = |
594 | 0 | parent_admin_shutdown_response.value().enable_reuse_port_default_ ? true : false; |
595 | 0 | } |
596 | | |
597 | 5.48k | OptRef<Server::ConfigTracker> config_tracker; |
598 | 5.48k | #ifdef ENVOY_ADMIN_FUNCTIONALITY |
599 | 5.48k | admin_ = std::make_unique<AdminImpl>(initial_config.admin().profilePath(), *this, |
600 | 5.48k | initial_config.admin().ignoreGlobalConnLimit()); |
601 | | |
602 | 5.48k | config_tracker = admin_->getConfigTracker(); |
603 | 5.48k | #endif |
604 | 5.48k | secret_manager_ = std::make_unique<Secret::SecretManagerImpl>(config_tracker); |
605 | | |
606 | 5.48k | loadServerFlags(initial_config.flagsPath()); |
607 | | |
608 | | // Initialize the overload manager early so other modules can register for actions. |
609 | 5.48k | overload_manager_ = std::make_unique<OverloadManagerImpl>( |
610 | 5.48k | *dispatcher_, *stats_store_.rootScope(), thread_local_, bootstrap_.overload_manager(), |
611 | 5.48k | messageValidationContext().staticValidationVisitor(), *api_, options_); |
612 | | |
613 | 5.48k | maybeCreateHeapShrinker(); |
614 | | |
615 | 5.48k | for (const auto& bootstrap_extension : bootstrap_.bootstrap_extensions()) { |
616 | 5 | auto& factory = Config::Utility::getAndCheckFactory<Configuration::BootstrapExtensionFactory>( |
617 | 5 | bootstrap_extension); |
618 | 5 | auto config = Config::Utility::translateAnyToFactoryConfig( |
619 | 5 | bootstrap_extension.typed_config(), messageValidationContext().staticValidationVisitor(), |
620 | 5 | factory); |
621 | 5 | bootstrap_extensions_.push_back( |
622 | 5 | factory.createBootstrapExtension(*config, serverFactoryContext())); |
623 | 5 | } |
624 | | |
625 | | // Register the fatal actions. |
626 | 5.48k | { |
627 | 5.48k | FatalAction::FatalActionPtrList safe_actions; |
628 | 5.48k | FatalAction::FatalActionPtrList unsafe_actions; |
629 | 5.48k | for (const auto& action_config : bootstrap_.fatal_actions()) { |
630 | 9 | auto& factory = |
631 | 9 | Config::Utility::getAndCheckFactory<Server::Configuration::FatalActionFactory>( |
632 | 9 | action_config.config()); |
633 | 9 | auto action = factory.createFatalActionFromProto(action_config, this); |
634 | | |
635 | 9 | if (action->isAsyncSignalSafe()) { |
636 | 0 | safe_actions.push_back(std::move(action)); |
637 | 9 | } else { |
638 | 9 | unsafe_actions.push_back(std::move(action)); |
639 | 9 | } |
640 | 9 | } |
641 | 5.48k | Envoy::FatalErrorHandler::registerFatalActions( |
642 | 5.48k | std::move(safe_actions), std::move(unsafe_actions), api_->threadFactory()); |
643 | 5.48k | } |
644 | | |
645 | 5.48k | if (!bootstrap_.default_socket_interface().empty()) { |
646 | 294 | auto& sock_name = bootstrap_.default_socket_interface(); |
647 | 294 | auto sock = const_cast<Network::SocketInterface*>(Network::socketInterface(sock_name)); |
648 | 294 | if (sock != nullptr) { |
649 | 0 | Network::SocketInterfaceSingleton::clear(); |
650 | 0 | Network::SocketInterfaceSingleton::initialize(sock); |
651 | 0 | } |
652 | 294 | } |
653 | | |
654 | 5.48k | ListenerManagerFactory* listener_manager_factory = nullptr; |
655 | 5.48k | if (bootstrap_.has_listener_manager()) { |
656 | 0 | listener_manager_factory = Config::Utility::getAndCheckFactory<ListenerManagerFactory>( |
657 | 0 | bootstrap_.listener_manager(), false); |
658 | 5.48k | } else { |
659 | 5.48k | listener_manager_factory = &Config::Utility::getAndCheckFactoryByName<ListenerManagerFactory>( |
660 | 5.48k | Config::ServerExtensionValues::get().DEFAULT_LISTENER); |
661 | 5.48k | } |
662 | | |
663 | | // Workers get created first so they register for thread local updates. |
664 | 5.48k | listener_manager_ = listener_manager_factory->createListenerManager( |
665 | 5.48k | *this, nullptr, worker_factory_, bootstrap_.enable_dispatcher_stats(), quic_stat_names_); |
666 | | |
667 | | // The main thread is also registered for thread local updates so that code that does not care |
668 | | // whether it runs on the main thread or on workers can still use TLS. |
669 | 5.48k | thread_local_.registerThread(*dispatcher_, true); |
670 | | |
671 | | // We can now initialize stats for threading. |
672 | 5.48k | stats_store_.initializeThreading(*dispatcher_, thread_local_); |
673 | | |
674 | | // It's now safe to start writing stats from the main thread's dispatcher. |
675 | 5.48k | if (bootstrap_.enable_dispatcher_stats()) { |
676 | 148 | dispatcher_->initializeStats(*stats_store_.rootScope(), "server."); |
677 | 148 | } |
678 | | |
679 | | // The broad order of initialization from this point on is the following: |
680 | | // 1. Statically provisioned configuration (bootstrap) are loaded. |
681 | | // 2. Cluster manager is created and all primary clusters (i.e. with endpoint assignments |
682 | | // provisioned statically in bootstrap, discovered through DNS or file based CDS) are |
683 | | // initialized. |
684 | | // 3. Various services are initialized and configured using the bootstrap config. |
685 | | // 4. RTDS is initialized using primary clusters. This allows runtime overrides to be fully |
686 | | // configured before the rest of xDS configuration is provisioned. |
687 | | // 5. Secondary clusters (with endpoint assignments provisioned by xDS servers) are initialized. |
688 | | // 6. The rest of the dynamic configuration is provisioned. |
689 | | // |
690 | | // Please note: this order requires that RTDS is provisioned using a primary cluster. If RTDS is |
691 | | // provisioned through ADS then ADS must use primary cluster as well. This invariant is enforced |
692 | | // during RTDS initialization and invalid configuration will be rejected. |
693 | | |
694 | | // Runtime gets initialized before the main configuration since during main configuration |
695 | | // load things may grab a reference to the loader for later use. |
696 | 5.48k | runtime_ = component_factory.createRuntime(*this, initial_config); |
697 | | |
698 | 5.48k | initial_config.initAdminAccessLog(bootstrap_, *this); |
699 | 5.48k | validation_context_.setRuntime(runtime()); |
700 | | |
701 | 5.48k | if (!runtime().snapshot().getBoolean("envoy.disallow_global_stats", false)) { |
702 | 5.27k | assert_action_registration_ = Assert::addDebugAssertionFailureRecordAction( |
703 | 5.27k | [this](const char*) { server_stats_->debug_assertion_failures_.inc(); }); |
704 | 5.27k | envoy_bug_action_registration_ = Assert::addEnvoyBugFailureRecordAction( |
705 | 5.27k | [this](const char*) { server_stats_->envoy_bug_failures_.inc(); }); |
706 | 5.27k | } |
707 | | |
708 | 5.48k | if (initial_config.admin().address()) { |
709 | 2.64k | if (!admin_) { |
710 | 0 | throwEnvoyExceptionOrPanic("Admin address configured but admin support compiled out"); |
711 | 0 | } |
712 | 2.64k | admin_->startHttpListener(initial_config.admin().accessLogs(), options_.adminAddressPath(), |
713 | 2.64k | initial_config.admin().address(), |
714 | 2.64k | initial_config.admin().socketOptions(), |
715 | 2.64k | stats_store_.createScope("listener.admin.")); |
716 | 2.84k | } else { |
717 | 2.84k | ENVOY_LOG(warn, "No admin address given, so no admin HTTP server started."); |
718 | 2.84k | } |
719 | 5.48k | if (admin_) { |
720 | 5.27k | config_tracker_entry_ = admin_->getConfigTracker().add( |
721 | 5.27k | "bootstrap", [this](const Matchers::StringMatcher&) { return dumpBootstrapConfig(); }); |
722 | 5.27k | } |
723 | 5.48k | if (initial_config.admin().address()) { |
724 | 2.64k | admin_->addListenerToHandler(handler_.get()); |
725 | 2.64k | } |
726 | | |
727 | | // Once we have runtime we can initialize the SSL context manager. |
728 | 5.48k | ssl_context_manager_ = createContextManager("ssl_context_manager", time_source_); |
729 | | |
730 | 5.48k | cluster_manager_factory_ = std::make_unique<Upstream::ProdClusterManagerFactory>( |
731 | 5.48k | serverFactoryContext(), stats_store_, thread_local_, http_context_, |
732 | 5.48k | [this]() -> Network::DnsResolverSharedPtr { return this->getOrCreateDnsResolver(); }, |
733 | 5.48k | *ssl_context_manager_, *secret_manager_, quic_stat_names_, *this); |
734 | | |
735 | | // Now the configuration gets parsed. The configuration may start setting |
736 | | // thread local data per above. See MainImpl::initialize() for why ConfigImpl |
737 | | // is constructed as part of the InstanceBase and then populated once |
738 | | // cluster_manager_factory_ is available. |
739 | 5.48k | config_.initialize(bootstrap_, *this, *cluster_manager_factory_); |
740 | | |
741 | | // Instruct the listener manager to create the LDS provider if needed. This must be done later |
742 | | // because various items do not yet exist when the listener manager is created. |
743 | 5.48k | if (bootstrap_.dynamic_resources().has_lds_config() || |
744 | 5.48k | !bootstrap_.dynamic_resources().lds_resources_locator().empty()) { |
745 | 2.88k | std::unique_ptr<xds::core::v3::ResourceLocator> lds_resources_locator; |
746 | 2.88k | if (!bootstrap_.dynamic_resources().lds_resources_locator().empty()) { |
747 | 10 | lds_resources_locator = |
748 | 10 | std::make_unique<xds::core::v3::ResourceLocator>(Config::XdsResourceIdentifier::decodeUrl( |
749 | 10 | bootstrap_.dynamic_resources().lds_resources_locator())); |
750 | 10 | } |
751 | 2.88k | listener_manager_->createLdsApi(bootstrap_.dynamic_resources().lds_config(), |
752 | 2.88k | lds_resources_locator.get()); |
753 | 2.88k | } |
754 | | |
755 | | // We have to defer RTDS initialization until after the cluster manager is |
756 | | // instantiated (which in turn relies on runtime...). |
757 | 5.48k | runtime().initialize(clusterManager()); |
758 | | |
759 | 5.48k | clusterManager().setPrimaryClustersInitializedCb( |
760 | 5.48k | [this]() { onClusterManagerPrimaryInitializationComplete(); }); |
761 | | |
762 | 5.48k | auto& stats_config = config_.statsConfig(); |
763 | 5.48k | for (const Stats::SinkPtr& sink : stats_config.sinks()) { |
764 | 0 | stats_store_.addSink(*sink); |
765 | 0 | } |
766 | 5.48k | if (!stats_config.flushOnAdmin()) { |
767 | | // Some of the stat sinks may need dispatcher support so don't flush until the main loop starts. |
768 | | // Just setup the timer. |
769 | 3.76k | stat_flush_timer_ = dispatcher_->createTimer([this]() -> void { flushStats(); }); |
770 | 3.76k | stat_flush_timer_->enableTimer(stats_config.flushInterval()); |
771 | 3.76k | } |
772 | | |
773 | | // Now that we are initialized, notify the bootstrap extensions. |
774 | 5.48k | for (auto&& bootstrap_extension : bootstrap_extensions_) { |
775 | 0 | bootstrap_extension->onServerInitialized(); |
776 | 0 | } |
777 | | |
778 | | // GuardDog (deadlock detection) object and thread setup before workers are |
779 | | // started and before our own run() loop runs. |
780 | 5.48k | main_thread_guard_dog_ = std::make_unique<Server::GuardDogImpl>( |
781 | 5.48k | *stats_store_.rootScope(), config_.mainThreadWatchdogConfig(), *api_, "main_thread"); |
782 | 5.48k | worker_guard_dog_ = std::make_unique<Server::GuardDogImpl>( |
783 | 5.48k | *stats_store_.rootScope(), config_.workerWatchdogConfig(), *api_, "workers"); |
784 | 5.48k | } |
785 | | |
786 | 3.83k | void InstanceBase::onClusterManagerPrimaryInitializationComplete() { |
787 | | // If RTDS was not configured the `onRuntimeReady` callback is immediately invoked. |
788 | 3.83k | runtime().startRtdsSubscriptions([this]() { onRuntimeReady(); }); |
789 | 3.83k | } |
790 | | |
791 | 3.72k | void InstanceBase::onRuntimeReady() { |
792 | | // Begin initializing secondary clusters after RTDS configuration has been applied. |
793 | | // Initializing can throw exceptions, so catch these. |
794 | 3.72k | TRY_ASSERT_MAIN_THREAD { clusterManager().initializeSecondaryClusters(bootstrap_); } |
795 | 3.72k | END_TRY |
796 | 3.72k | CATCH(const EnvoyException& e, { |
797 | 3.72k | ENVOY_LOG(warn, "Skipping initialization of secondary cluster: {}", e.what()); |
798 | 3.72k | shutdown(); |
799 | 3.72k | }); |
800 | | |
801 | 3.72k | if (bootstrap_.has_hds_config()) { |
802 | 308 | const auto& hds_config = bootstrap_.hds_config(); |
803 | 308 | async_client_manager_ = std::make_unique<Grpc::AsyncClientManagerImpl>( |
804 | 308 | *config_.clusterManager(), thread_local_, time_source_, *api_, grpc_context_.statNames(), |
805 | 308 | bootstrap_.grpc_async_client_manager_config()); |
806 | 308 | TRY_ASSERT_MAIN_THREAD { |
807 | 308 | THROW_IF_NOT_OK(Config::Utility::checkTransportVersion(hds_config)); |
808 | 269 | hds_delegate_ = std::make_unique<Upstream::HdsDelegate>( |
809 | 269 | serverFactoryContext(), *stats_store_.rootScope(), |
810 | 269 | Config::Utility::factoryForGrpcApiConfigSource(*async_client_manager_, hds_config, |
811 | 269 | *stats_store_.rootScope(), false) |
812 | 269 | ->createUncachedRawAsyncClient(), |
813 | 269 | stats_store_, *ssl_context_manager_, info_factory_); |
814 | 269 | } |
815 | 269 | END_TRY |
816 | 308 | CATCH(const EnvoyException& e, { |
817 | 308 | ENVOY_LOG(warn, "Skipping initialization of HDS cluster: {}", e.what()); |
818 | 308 | shutdown(); |
819 | 308 | }); |
820 | 308 | } |
821 | 3.72k | } |
822 | | |
823 | 2.64k | void InstanceBase::startWorkers() { |
824 | | // The callback will be called after workers are started. |
825 | 2.64k | listener_manager_->startWorkers(*worker_guard_dog_, [this]() { |
826 | 2.64k | if (isShutdown()) { |
827 | 0 | return; |
828 | 0 | } |
829 | | |
830 | 2.64k | initialization_timer_->complete(); |
831 | | // Update server stats as soon as initialization is done. |
832 | 2.64k | updateServerStats(); |
833 | 2.64k | workers_started_ = true; |
834 | 2.64k | hooks_.onWorkersStarted(); |
835 | | // At this point we are ready to take traffic and all listening ports are up. Notify our |
836 | | // parent if applicable that they can stop listening and drain. |
837 | 2.64k | restarter_.drainParentListeners(); |
838 | 2.64k | drain_manager_->startParentShutdownSequence(); |
839 | 2.64k | }); |
840 | 2.64k | } |
841 | | |
842 | | Runtime::LoaderPtr InstanceUtil::createRuntime(Instance& server, |
843 | 5.33k | Server::Configuration::Initial& config) { |
844 | 5.33k | #ifdef ENVOY_ENABLE_YAML |
845 | 5.33k | ENVOY_LOG(info, "runtime: {}", MessageUtil::getYamlStringFromMessage(config.runtime())); |
846 | 5.33k | #endif |
847 | 5.33k | return std::make_unique<Runtime::LoaderImpl>( |
848 | 5.33k | server.dispatcher(), server.threadLocal(), config.runtime(), server.localInfo(), |
849 | 5.33k | server.stats(), server.api().randomGenerator(), |
850 | 5.33k | server.messageValidationContext().dynamicValidationVisitor(), server.api()); |
851 | 5.33k | } |
852 | | |
853 | 5.34k | void InstanceBase::loadServerFlags(const absl::optional<std::string>& flags_path) { |
854 | 5.34k | if (!flags_path) { |
855 | 4.89k | return; |
856 | 4.89k | } |
857 | | |
858 | 458 | ENVOY_LOG(info, "server flags path: {}", flags_path.value()); |
859 | 458 | if (api_->fileSystem().fileExists(flags_path.value() + "/drain")) { |
860 | 2 | ENVOY_LOG(info, "starting server in drain mode"); |
861 | 2 | InstanceBase::failHealthcheck(true); |
862 | 2 | } |
863 | 458 | } |
864 | | |
865 | | RunHelper::RunHelper(Instance& instance, const Options& options, Event::Dispatcher& dispatcher, |
866 | | Upstream::ClusterManager& cm, AccessLog::AccessLogManager& access_log_manager, |
867 | | Init::Manager& init_manager, OverloadManager& overload_manager, |
868 | | std::function<void()> post_init_cb) |
869 | 2.64k | : init_watcher_("RunHelper", [&instance, post_init_cb]() { |
870 | 2.64k | if (!instance.isShutdown()) { |
871 | 2.64k | post_init_cb(); |
872 | 2.64k | } |
873 | 2.64k | }) { |
874 | | // Setup signals. |
875 | | // Since signals are not supported on Windows we have an internal definition for `SIGTERM` |
876 | | // On POSIX it resolves as expected to SIGTERM |
877 | | // On Windows we use it internally for all the console events that indicate that we should |
878 | | // terminate the process. |
879 | 2.64k | if (options.signalHandlingEnabled()) { |
880 | 2.64k | sigterm_ = dispatcher.listenForSignal(ENVOY_SIGTERM, [&instance]() { |
881 | 0 | ENVOY_LOG(warn, "caught ENVOY_SIGTERM"); |
882 | 0 | instance.shutdown(); |
883 | 0 | }); |
884 | 2.64k | #ifndef WIN32 |
885 | 2.64k | sigint_ = dispatcher.listenForSignal(SIGINT, [&instance]() { |
886 | 0 | ENVOY_LOG(warn, "caught SIGINT"); |
887 | 0 | instance.shutdown(); |
888 | 0 | }); |
889 | | |
890 | 2.64k | sig_usr_1_ = dispatcher.listenForSignal(SIGUSR1, [&access_log_manager]() { |
891 | 0 | ENVOY_LOG(info, "caught SIGUSR1. Reopening access logs."); |
892 | 0 | access_log_manager.reopen(); |
893 | 0 | }); |
894 | | |
895 | 2.64k | sig_hup_ = dispatcher.listenForSignal(SIGHUP, []() { |
896 | 0 | ENVOY_LOG(warn, "caught and eating SIGHUP. See documentation for how to hot restart."); |
897 | 0 | }); |
898 | 2.64k | #endif |
899 | 2.64k | } |
900 | | |
901 | | // Start overload manager before workers. |
902 | 2.64k | overload_manager.start(); |
903 | | |
904 | | // If there is no global limit to the number of active connections, warn on startup. |
905 | 2.64k | if (!overload_manager.getThreadLocalOverloadState().isResourceMonitorEnabled( |
906 | 2.64k | Server::OverloadProactiveResourceName::GlobalDownstreamMaxConnections) && |
907 | 2.64k | !instance.runtime().snapshot().get(Network::TcpListenerImpl::GlobalMaxCxRuntimeKey)) { |
908 | 2.64k | ENVOY_LOG(warn, "There is no configured limit to the number of allowed active downstream " |
909 | 2.64k | "connections. Configure a " |
910 | 2.64k | "limit in `envoy.resource_monitors.downstream_connections` resource monitor."); |
911 | 2.64k | } |
912 | | |
913 | | // Register for cluster manager init notification. We don't start serving worker traffic until |
914 | | // upstream clusters are initialized which may involve running the event loop. Note however that |
915 | | // this can fire immediately if all clusters have already initialized. Also note that we need |
916 | | // to guard against shutdown at two different levels since SIGTERM can come in once the run loop |
917 | | // starts. |
918 | 2.64k | cm.setInitializedCb([&instance, &init_manager, &cm, this]() { |
919 | 2.64k | if (instance.isShutdown()) { |
920 | 0 | return; |
921 | 0 | } |
922 | | |
923 | 2.64k | const auto type_url = Config::getTypeUrl<envoy::config::route::v3::RouteConfiguration>(); |
924 | | // Pause RDS to ensure that we don't send any requests until we've |
925 | | // subscribed to all the RDS resources. The subscriptions happen in the init callbacks, |
926 | | // so we pause RDS until we've completed all the callbacks. |
927 | 2.64k | Config::ScopedResume maybe_resume_rds; |
928 | 2.64k | if (cm.adsMux()) { |
929 | 2.64k | maybe_resume_rds = cm.adsMux()->pause(type_url); |
930 | 2.64k | } |
931 | | |
932 | 2.64k | ENVOY_LOG(info, "all clusters initialized. initializing init manager"); |
933 | 2.64k | init_manager.initialize(init_watcher_); |
934 | | |
935 | | // Now that we're execute all the init callbacks we can resume RDS |
936 | | // as we've subscribed to all the statically defined RDS resources. |
937 | | // This is done by tearing down the maybe_resume_rds Cleanup object. |
938 | 2.64k | }); |
939 | 2.64k | } |
940 | | |
941 | 2.64k | void InstanceBase::run() { |
942 | | // RunHelper exists primarily to facilitate testing of how we respond to early shutdown during |
943 | | // startup (see RunHelperTest in server_test.cc). |
944 | 2.64k | const auto run_helper = RunHelper(*this, options_, *dispatcher_, clusterManager(), |
945 | 2.64k | access_log_manager_, init_manager_, overloadManager(), [this] { |
946 | 2.64k | notifyCallbacksForStage(Stage::PostInit); |
947 | 2.64k | startWorkers(); |
948 | 2.64k | }); |
949 | | |
950 | | // Run the main dispatch loop waiting to exit. |
951 | 2.64k | ENVOY_LOG(info, "starting main dispatch loop"); |
952 | 2.64k | auto watchdog = main_thread_guard_dog_->createWatchDog(api_->threadFactory().currentThreadId(), |
953 | 2.64k | "main_thread", *dispatcher_); |
954 | 2.64k | dispatcher_->post([this] { notifyCallbacksForStage(Stage::Startup); }); |
955 | 2.64k | dispatcher_->run(Event::Dispatcher::RunType::Block); |
956 | 2.64k | ENVOY_LOG(info, "main dispatch loop exited"); |
957 | 2.64k | main_thread_guard_dog_->stopWatching(watchdog); |
958 | 2.64k | watchdog.reset(); |
959 | | |
960 | 2.64k | terminate(); |
961 | 2.64k | } |
962 | | |
963 | 9.81k | void InstanceBase::terminate() { |
964 | 9.81k | if (terminated_) { |
965 | 4.32k | return; |
966 | 4.32k | } |
967 | 5.48k | terminated_ = true; |
968 | | |
969 | | // Before starting to shutdown anything else, stop slot destruction updates. |
970 | 5.48k | thread_local_.shutdownGlobalThreading(); |
971 | | |
972 | | // Before the workers start exiting we should disable stat threading. |
973 | 5.48k | stats_store_.shutdownThreading(); |
974 | | |
975 | | // TODO: figure out the correct fix: https://github.com/envoyproxy/envoy/issues/15072. |
976 | 5.48k | std::vector<std::string> muxes = { |
977 | 5.48k | "envoy.config_mux.new_grpc_mux_factory", "envoy.config_mux.grpc_mux_factory", |
978 | 5.48k | "envoy.config_mux.delta_grpc_mux_factory", "envoy.config_mux.sotw_grpc_mux_factory"}; |
979 | 21.9k | for (const auto& name : muxes) { |
980 | 21.9k | auto* factory = Config::Utility::getFactoryByName<Config::MuxFactory>(name); |
981 | 21.9k | if (factory) { |
982 | 21.9k | factory->shutdownAll(); |
983 | 21.9k | } |
984 | 21.9k | } |
985 | | |
986 | 5.48k | if (overload_manager_) { |
987 | 5.34k | overload_manager_->stop(); |
988 | 5.34k | } |
989 | | |
990 | | // Shutdown all the workers now that the main dispatch loop is done. |
991 | 5.48k | if (listener_manager_ != nullptr) { |
992 | 5.33k | listener_manager_->stopWorkers(); |
993 | 5.33k | } |
994 | | |
995 | | // Only flush if we have not been hot restarted. |
996 | 5.48k | if (stat_flush_timer_) { |
997 | 3.76k | flushStats(); |
998 | 3.76k | } |
999 | | |
1000 | 5.48k | if (config_.clusterManager() != nullptr) { |
1001 | 5.10k | config_.clusterManager()->shutdown(); |
1002 | 5.10k | } |
1003 | 5.48k | handler_.reset(); |
1004 | 5.48k | thread_local_.shutdownThread(); |
1005 | 5.48k | restarter_.shutdown(); |
1006 | 5.48k | ENVOY_LOG(info, "exiting"); |
1007 | 5.48k | ENVOY_FLUSH_LOG(); |
1008 | 5.48k | FatalErrorHandler::clearFatalActionsOnTerminate(); |
1009 | 5.48k | } |
1010 | | |
1011 | 88.4k | Runtime::Loader& InstanceBase::runtime() { return *runtime_; } |
1012 | | |
1013 | 2.81k | void InstanceBase::shutdown() { |
1014 | 2.81k | ENVOY_LOG(info, "shutting down server instance"); |
1015 | 2.81k | shutdown_ = true; |
1016 | 2.81k | restarter_.sendParentTerminateRequest(); |
1017 | 2.81k | notifyCallbacksForStage(Stage::ShutdownExit, [this] { dispatcher_->exit(); }); |
1018 | 2.81k | } |
1019 | | |
1020 | 0 | void InstanceBase::shutdownAdmin() { |
1021 | 0 | ENVOY_LOG(warn, "shutting down admin due to child startup"); |
1022 | 0 | stat_flush_timer_.reset(); |
1023 | 0 | handler_->stopListeners(); |
1024 | 0 | if (admin_) { |
1025 | 0 | admin_->closeSocket(); |
1026 | 0 | } |
1027 | | |
1028 | | // If we still have a parent, it should be terminated now that we have a child. |
1029 | 0 | ENVOY_LOG(warn, "terminating parent process"); |
1030 | 0 | restarter_.sendParentTerminateRequest(); |
1031 | 0 | } |
1032 | | |
1033 | | ServerLifecycleNotifier::HandlePtr InstanceBase::registerCallback(Stage stage, |
1034 | 0 | StageCallback callback) { |
1035 | 0 | auto& callbacks = stage_callbacks_[stage]; |
1036 | 0 | return std::make_unique<LifecycleCallbackHandle<StageCallback>>(callbacks, callback); |
1037 | 0 | } |
1038 | | |
1039 | | ServerLifecycleNotifier::HandlePtr |
1040 | 0 | InstanceBase::registerCallback(Stage stage, StageCallbackWithCompletion callback) { |
1041 | 0 | ASSERT(stage == Stage::ShutdownExit); |
1042 | 0 | auto& callbacks = stage_completable_callbacks_[stage]; |
1043 | 0 | return std::make_unique<LifecycleCallbackHandle<StageCallbackWithCompletion>>(callbacks, |
1044 | 0 | callback); |
1045 | 0 | } |
1046 | | |
1047 | 8.11k | void InstanceBase::notifyCallbacksForStage(Stage stage, std::function<void()> completion_cb) { |
1048 | 8.11k | ASSERT_IS_MAIN_OR_TEST_THREAD(); |
1049 | 8.11k | const auto it = stage_callbacks_.find(stage); |
1050 | 8.11k | if (it != stage_callbacks_.end()) { |
1051 | 0 | for (const StageCallback& callback : it->second) { |
1052 | 0 | callback(); |
1053 | 0 | } |
1054 | 0 | } |
1055 | | |
1056 | | // Wrap completion_cb so that it only gets invoked when all callbacks for this stage |
1057 | | // have finished their work. |
1058 | 8.11k | std::shared_ptr<void> cb_guard( |
1059 | 8.11k | new Cleanup([this, completion_cb]() { dispatcher_->post(completion_cb); })); |
1060 | | |
1061 | | // Registrations which take a completion callback are typically implemented by executing a |
1062 | | // callback on all worker threads using Slot::runOnAllThreads which will hang indefinitely if |
1063 | | // worker threads have not been started so we need to skip notifications if envoy is shutdown |
1064 | | // early before workers have started. |
1065 | 8.11k | if (workers_started_) { |
1066 | 5.28k | const auto it2 = stage_completable_callbacks_.find(stage); |
1067 | 5.28k | if (it2 != stage_completable_callbacks_.end()) { |
1068 | 0 | ENVOY_LOG(info, "Notifying {} callback(s) with completion.", it2->second.size()); |
1069 | 0 | for (const StageCallbackWithCompletion& callback : it2->second) { |
1070 | 0 | callback([cb_guard] {}); |
1071 | 0 | } |
1072 | 0 | } |
1073 | 5.28k | } |
1074 | 8.11k | } |
1075 | | |
1076 | 2.64k | ProtobufTypes::MessagePtr InstanceBase::dumpBootstrapConfig() { |
1077 | 2.64k | auto config_dump = std::make_unique<envoy::admin::v3::BootstrapConfigDump>(); |
1078 | 2.64k | config_dump->mutable_bootstrap()->MergeFrom(bootstrap_); |
1079 | 2.64k | TimestampUtil::systemClockToTimestamp(bootstrap_config_update_time_, |
1080 | 2.64k | *(config_dump->mutable_last_updated())); |
1081 | 2.64k | return config_dump; |
1082 | 2.64k | } |
1083 | | |
1084 | 0 | Network::DnsResolverSharedPtr InstanceBase::getOrCreateDnsResolver() { |
1085 | 0 | if (!dns_resolver_) { |
1086 | 0 | envoy::config::core::v3::TypedExtensionConfig typed_dns_resolver_config; |
1087 | 0 | Network::DnsResolverFactory& dns_resolver_factory = |
1088 | 0 | Network::createDnsResolverFactoryFromProto(bootstrap_, typed_dns_resolver_config); |
1089 | 0 | dns_resolver_ = |
1090 | 0 | dns_resolver_factory.createDnsResolver(dispatcher(), api(), typed_dns_resolver_config); |
1091 | 0 | } |
1092 | 0 | return dns_resolver_; |
1093 | 0 | } |
1094 | | |
1095 | 4.97k | bool InstanceBase::enableReusePortDefault() { return enable_reuse_port_default_; } |
1096 | | |
1097 | | } // namespace Server |
1098 | | } // namespace Envoy |