/proc/self/cwd/source/server/server.cc
Line | Count | Source (jump to first uncovered line) |
1 | | #include "source/server/server.h" |
2 | | |
3 | | #include <csignal> |
4 | | #include <cstdint> |
5 | | #include <ctime> |
6 | | #include <functional> |
7 | | #include <memory> |
8 | | #include <string> |
9 | | |
10 | | #include "envoy/admin/v3/config_dump.pb.h" |
11 | | #include "envoy/common/exception.h" |
12 | | #include "envoy/common/time.h" |
13 | | #include "envoy/config/bootstrap/v3/bootstrap.pb.h" |
14 | | #include "envoy/config/bootstrap/v3/bootstrap.pb.validate.h" |
15 | | #include "envoy/event/dispatcher.h" |
16 | | #include "envoy/event/signal.h" |
17 | | #include "envoy/event/timer.h" |
18 | | #include "envoy/network/dns.h" |
19 | | #include "envoy/registry/registry.h" |
20 | | #include "envoy/server/bootstrap_extension_config.h" |
21 | | #include "envoy/server/instance.h" |
22 | | #include "envoy/server/options.h" |
23 | | #include "envoy/stats/histogram.h" |
24 | | #include "envoy/stats/stats.h" |
25 | | #include "envoy/upstream/cluster_manager.h" |
26 | | |
27 | | #include "source/common/api/api_impl.h" |
28 | | #include "source/common/api/os_sys_calls_impl.h" |
29 | | #include "source/common/common/enum_to_int.h" |
30 | | #include "source/common/common/mutex_tracer_impl.h" |
31 | | #include "source/common/common/utility.h" |
32 | | #include "source/common/config/utility.h" |
33 | | #include "source/common/config/well_known_names.h" |
34 | | #include "source/common/config/xds_resource.h" |
35 | | #include "source/common/http/codes.h" |
36 | | #include "source/common/http/headers.h" |
37 | | #include "source/common/local_info/local_info_impl.h" |
38 | | #include "source/common/network/address_impl.h" |
39 | | #include "source/common/network/dns_resolver/dns_factory_util.h" |
40 | | #include "source/common/network/socket_interface.h" |
41 | | #include "source/common/network/socket_interface_impl.h" |
42 | | #include "source/common/protobuf/utility.h" |
43 | | #include "source/common/runtime/runtime_impl.h" |
44 | | #include "source/common/runtime/runtime_keys.h" |
45 | | #include "source/common/signal/fatal_error_handler.h" |
46 | | #include "source/common/stats/stats_matcher_impl.h" |
47 | | #include "source/common/stats/tag_producer_impl.h" |
48 | | #include "source/common/stats/thread_local_store.h" |
49 | | #include "source/common/stats/timespan_impl.h" |
50 | | #include "source/common/tls/context_manager_impl.h" |
51 | | #include "source/common/upstream/cluster_manager_impl.h" |
52 | | #include "source/common/version/version.h" |
53 | | #include "source/server/configuration_impl.h" |
54 | | #include "source/server/listener_hooks.h" |
55 | | #include "source/server/listener_manager_factory.h" |
56 | | #include "source/server/regex_engine.h" |
57 | | #include "source/server/utils.h" |
58 | | |
59 | | namespace Envoy { |
60 | | namespace Server { |
61 | | |
62 | | namespace { |
63 | 4.98k | std::unique_ptr<ConnectionHandler> getHandler(Event::Dispatcher& dispatcher) { |
64 | | |
65 | 4.98k | auto* factory = Config::Utility::getFactoryByName<ConnectionHandlerFactory>( |
66 | 4.98k | "envoy.connection_handler.default"); |
67 | 4.98k | if (factory) { |
68 | 4.98k | return factory->createConnectionHandler(dispatcher, absl::nullopt); |
69 | 4.98k | } |
70 | 0 | ENVOY_LOG_MISC(debug, "Unable to find envoy.connection_handler.default factory"); |
71 | 0 | return nullptr; |
72 | 4.98k | } |
73 | | |
74 | | } // namespace |
75 | | |
76 | | InstanceBase::InstanceBase(Init::Manager& init_manager, const Options& options, |
77 | | Event::TimeSystem& time_system, ListenerHooks& hooks, |
78 | | HotRestart& restarter, Stats::StoreRoot& store, |
79 | | Thread::BasicLockable& access_log_lock, |
80 | | Random::RandomGeneratorPtr&& random_generator, |
81 | | ThreadLocal::Instance& tls, Thread::ThreadFactory& thread_factory, |
82 | | Filesystem::Instance& file_system, |
83 | | std::unique_ptr<ProcessContext> process_context, |
84 | | Buffer::WatermarkFactorySharedPtr watermark_factory) |
85 | | : init_manager_(init_manager), live_(false), options_(options), |
86 | | validation_context_(options_.allowUnknownStaticFields(), |
87 | | !options.rejectUnknownDynamicFields(), |
88 | | options.ignoreUnknownDynamicFields()), |
89 | | time_source_(time_system), restarter_(restarter), start_time_(time(nullptr)), |
90 | | original_start_time_(start_time_), stats_store_(store), thread_local_(tls), |
91 | | random_generator_(std::move(random_generator)), |
92 | | api_(new Api::Impl( |
93 | | thread_factory, store, time_system, file_system, *random_generator_, bootstrap_, |
94 | | process_context ? ProcessContextOptRef(std::ref(*process_context)) : absl::nullopt, |
95 | | watermark_factory)), |
96 | | dispatcher_(api_->allocateDispatcher("main_thread")), |
97 | | access_log_manager_(options.fileFlushIntervalMsec(), *api_, *dispatcher_, access_log_lock, |
98 | | store), |
99 | | handler_(getHandler(*dispatcher_)), worker_factory_(thread_local_, *api_, hooks), |
100 | | mutex_tracer_(options.mutexTracingEnabled() ? &Envoy::MutexTracerImpl::getOrCreateTracer() |
101 | | : nullptr), |
102 | | grpc_context_(store.symbolTable()), http_context_(store.symbolTable()), |
103 | | router_context_(store.symbolTable()), process_context_(std::move(process_context)), |
104 | | hooks_(hooks), quic_stat_names_(store.symbolTable()), server_contexts_(*this), |
105 | 4.98k | enable_reuse_port_default_(true), stats_flush_in_progress_(false) {} |
106 | | |
107 | 4.98k | InstanceBase::~InstanceBase() { |
108 | 4.98k | terminate(); |
109 | | |
110 | | // Stop logging to file before all the AccessLogManager and its dependencies are |
111 | | // destructed to avoid crashing at shutdown. |
112 | 4.98k | file_logger_.reset(); |
113 | | |
114 | | // Destruct the ListenerManager explicitly, before InstanceBase's local init_manager_ is |
115 | | // destructed. |
116 | | // |
117 | | // The ListenerManager's DestinationPortsMap contains FilterChainSharedPtrs. There is a rare race |
118 | | // condition where one of these FilterChains contains an HttpConnectionManager, which contains an |
119 | | // RdsRouteConfigProvider, which contains an RdsRouteConfigSubscriptionSharedPtr. Since |
120 | | // RdsRouteConfigSubscription is an Init::Target, ~RdsRouteConfigSubscription triggers a callback |
121 | | // set at initialization, which goes to unregister it from the top-level InitManager, which has |
122 | | // already been destructed (use-after-free) causing a segfault. |
123 | 4.98k | ENVOY_LOG(debug, "destroying listener manager"); |
124 | 4.98k | listener_manager_.reset(); |
125 | 4.98k | ENVOY_LOG(debug, "destroyed listener manager"); |
126 | 4.98k | dispatcher_->shutdown(); |
127 | | |
128 | | #ifdef ENVOY_PERFETTO |
129 | | if (tracing_session_ != nullptr) { |
130 | | // Flush the trace data. |
131 | | perfetto::TrackEvent::Flush(); |
132 | | // Disable tracing and block until tracing has stopped. |
133 | | tracing_session_->StopBlocking(); |
134 | | close(tracing_fd_); |
135 | | } |
136 | | #endif |
137 | 4.98k | } |
138 | | |
139 | 32.4k | Upstream::ClusterManager& InstanceBase::clusterManager() { |
140 | 32.4k | ASSERT(config_.clusterManager() != nullptr); |
141 | 32.4k | return *config_.clusterManager(); |
142 | 32.4k | } |
143 | | |
144 | 0 | const Upstream::ClusterManager& InstanceBase::clusterManager() const { |
145 | 0 | ASSERT(config_.clusterManager() != nullptr); |
146 | 0 | return *config_.clusterManager(); |
147 | 0 | } |
148 | | |
149 | 0 | void InstanceBase::drainListeners(OptRef<const Network::ExtraShutdownListenerOptions> options) { |
150 | 0 | ENVOY_LOG(info, "closing and draining listeners"); |
151 | 0 | listener_manager_->stopListeners(ListenerManager::StopListenersType::All, |
152 | 0 | options.has_value() ? *options |
153 | 0 | : Network::ExtraShutdownListenerOptions{}); |
154 | 0 | drain_manager_->startDrainSequence([] {}); |
155 | 0 | } |
156 | | |
157 | 4.48k | void InstanceBase::failHealthcheck(bool fail) { |
158 | 4.48k | live_.store(!fail); |
159 | 4.48k | server_stats_->live_.set(live_.load()); |
160 | 4.48k | } |
161 | | |
162 | | MetricSnapshotImpl::MetricSnapshotImpl(Stats::Store& store, |
163 | | Upstream::ClusterManager& cluster_manager, |
164 | 3.03k | TimeSource& time_source) { |
165 | 3.03k | store.forEachSinkedCounter( |
166 | 3.03k | [this](std::size_t size) { |
167 | 3.03k | snapped_counters_.reserve(size); |
168 | 3.03k | counters_.reserve(size); |
169 | 3.03k | }, |
170 | 744k | [this](Stats::Counter& counter) { |
171 | 744k | snapped_counters_.push_back(Stats::CounterSharedPtr(&counter)); |
172 | 744k | counters_.push_back({counter.latch(), counter}); |
173 | 744k | }); |
174 | | |
175 | 3.03k | store.forEachSinkedGauge( |
176 | 3.03k | [this](std::size_t size) { |
177 | 3.03k | snapped_gauges_.reserve(size); |
178 | 3.03k | gauges_.reserve(size); |
179 | 3.03k | }, |
180 | 217k | [this](Stats::Gauge& gauge) { |
181 | 217k | snapped_gauges_.push_back(Stats::GaugeSharedPtr(&gauge)); |
182 | 217k | gauges_.push_back(gauge); |
183 | 217k | }); |
184 | | |
185 | 3.03k | store.forEachSinkedHistogram( |
186 | 3.03k | [this](std::size_t size) { |
187 | 1.97k | snapped_histograms_.reserve(size); |
188 | 1.97k | histograms_.reserve(size); |
189 | 1.97k | }, |
190 | 3.03k | [this](Stats::ParentHistogram& histogram) { |
191 | 42 | snapped_histograms_.push_back(Stats::ParentHistogramSharedPtr(&histogram)); |
192 | 42 | histograms_.push_back(histogram); |
193 | 42 | }); |
194 | | |
195 | 3.03k | store.forEachSinkedTextReadout( |
196 | 3.03k | [this](std::size_t size) { |
197 | 3.03k | snapped_text_readouts_.reserve(size); |
198 | 3.03k | text_readouts_.reserve(size); |
199 | 3.03k | }, |
200 | 3.07k | [this](Stats::TextReadout& text_readout) { |
201 | 3.07k | snapped_text_readouts_.push_back(Stats::TextReadoutSharedPtr(&text_readout)); |
202 | 3.07k | text_readouts_.push_back(text_readout); |
203 | 3.07k | }); |
204 | | |
205 | 3.03k | Upstream::HostUtility::forEachHostMetric( |
206 | 3.03k | cluster_manager, |
207 | 3.03k | [this](Stats::PrimitiveCounterSnapshot&& metric) { |
208 | 0 | host_counters_.emplace_back(std::move(metric)); |
209 | 0 | }, |
210 | 3.03k | [this](Stats::PrimitiveGaugeSnapshot&& metric) { |
211 | 0 | host_gauges_.emplace_back(std::move(metric)); |
212 | 0 | }); |
213 | | |
214 | 3.03k | snapshot_time_ = time_source.systemTime(); |
215 | 3.03k | } |
216 | | |
217 | | void InstanceUtil::flushMetricsToSinks(const std::list<Stats::SinkPtr>& sinks, Stats::Store& store, |
218 | 3.03k | Upstream::ClusterManager& cm, TimeSource& time_source) { |
219 | | // Create a snapshot and flush to all sinks. |
220 | | // NOTE: Even if there are no sinks, creating the snapshot has the important property that it |
221 | | // latches all counters on a periodic basis. The hot restart code assumes this is being |
222 | | // done so this should not be removed. |
223 | 3.03k | MetricSnapshotImpl snapshot(store, cm, time_source); |
224 | 3.03k | for (const auto& sink : sinks) { |
225 | 0 | sink->flush(snapshot); |
226 | 0 | } |
227 | 3.03k | } |
228 | | |
229 | 3.03k | void InstanceBase::flushStats() { |
230 | 3.03k | if (stats_flush_in_progress_) { |
231 | 0 | ENVOY_LOG(debug, "skipping stats flush as flush is already in progress"); |
232 | 0 | server_stats_->dropped_stat_flushes_.inc(); |
233 | 0 | return; |
234 | 0 | } |
235 | | |
236 | 3.03k | stats_flush_in_progress_ = true; |
237 | 3.03k | ENVOY_LOG(debug, "flushing stats"); |
238 | | // If Envoy is not fully initialized, workers will not be started and mergeHistograms |
239 | | // completion callback is not called immediately. As a result of this server stats will |
240 | | // not be updated and flushed to stat sinks. So skip mergeHistograms call if workers are |
241 | | // not started yet. |
242 | 3.03k | if (initManager().state() == Init::Manager::State::Initialized) { |
243 | | // A shutdown initiated before this callback may prevent this from being called as per |
244 | | // the semantics documented in ThreadLocal's runOnAllThreads method. |
245 | 1.97k | stats_store_.mergeHistograms([this]() -> void { flushStatsInternal(); }); |
246 | 1.97k | } else { |
247 | 1.05k | ENVOY_LOG(debug, "Envoy is not fully initialized, skipping histogram merge and flushing stats"); |
248 | 1.05k | flushStatsInternal(); |
249 | 1.05k | } |
250 | 3.03k | } |
251 | | |
252 | 5.00k | void InstanceBase::updateServerStats() { |
253 | | // mergeParentStatsIfAny() does nothing and returns a struct of 0s if there is no parent. |
254 | 5.00k | HotRestart::ServerStatsFromParent parent_stats = restarter_.mergeParentStatsIfAny(stats_store_); |
255 | | |
256 | 5.00k | server_stats_->uptime_.set(time(nullptr) - original_start_time_); |
257 | 5.00k | server_stats_->memory_allocated_.set(Memory::Stats::totalCurrentlyAllocated() + |
258 | 5.00k | parent_stats.parent_memory_allocated_); |
259 | 5.00k | server_stats_->memory_heap_size_.set(Memory::Stats::totalCurrentlyReserved()); |
260 | 5.00k | server_stats_->memory_physical_size_.set(Memory::Stats::totalPhysicalBytes()); |
261 | 5.00k | if (!options().hotRestartDisabled()) { |
262 | 1.05k | server_stats_->parent_connections_.set(parent_stats.parent_connections_); |
263 | 1.05k | } |
264 | 5.00k | server_stats_->total_connections_.set(listener_manager_->numConnections() + |
265 | 5.00k | parent_stats.parent_connections_); |
266 | 5.00k | server_stats_->days_until_first_cert_expiring_.set( |
267 | 5.00k | sslContextManager().daysUntilFirstCertExpires().value_or(0)); |
268 | | |
269 | 5.00k | auto secs_until_ocsp_response_expires = |
270 | 5.00k | sslContextManager().secondsUntilFirstOcspResponseExpires(); |
271 | 5.00k | if (secs_until_ocsp_response_expires) { |
272 | 0 | server_stats_->seconds_until_first_ocsp_response_expiring_.set( |
273 | 0 | secs_until_ocsp_response_expires.value()); |
274 | 0 | } |
275 | 5.00k | server_stats_->state_.set( |
276 | 5.00k | enumToInt(Utility::serverState(initManager().state(), healthCheckFailed()))); |
277 | 5.00k | server_stats_->stats_recent_lookups_.set( |
278 | 5.00k | stats_store_.symbolTable().getRecentLookups([](absl::string_view, uint64_t) {})); |
279 | 5.00k | } |
280 | | |
281 | 3.03k | void InstanceBase::flushStatsInternal() { |
282 | 3.03k | updateServerStats(); |
283 | 3.03k | auto& stats_config = config_.statsConfig(); |
284 | 3.03k | InstanceUtil::flushMetricsToSinks(stats_config.sinks(), stats_store_, clusterManager(), |
285 | 3.03k | timeSource()); |
286 | | // TODO(ramaraochavali): consider adding different flush interval for histograms. |
287 | 3.03k | if (stat_flush_timer_ != nullptr) { |
288 | 3.03k | stat_flush_timer_->enableTimer(stats_config.flushInterval()); |
289 | 3.03k | } |
290 | | |
291 | 3.03k | stats_flush_in_progress_ = false; |
292 | 3.03k | } |
293 | | |
294 | 6.35k | bool InstanceBase::healthCheckFailed() { return !live_.load(); } |
295 | | |
296 | 0 | ProcessContextOptRef InstanceBase::processContext() { |
297 | 0 | if (process_context_ == nullptr) { |
298 | 0 | return absl::nullopt; |
299 | 0 | } |
300 | | |
301 | 0 | return *process_context_; |
302 | 0 | } |
303 | | |
304 | | namespace { |
305 | | |
306 | 0 | bool canBeRegisteredAsInlineHeader(const Http::LowerCaseString& header_name) { |
307 | | // 'set-cookie' cannot currently be registered as an inline header. |
308 | 0 | if (header_name == Http::Headers::get().SetCookie) { |
309 | 0 | return false; |
310 | 0 | } |
311 | 0 | return true; |
312 | 0 | } |
313 | | |
314 | | absl::Status |
315 | 4.98k | registerCustomInlineHeadersFromBootstrap(const envoy::config::bootstrap::v3::Bootstrap& bootstrap) { |
316 | 4.98k | for (const auto& inline_header : bootstrap.inline_headers()) { |
317 | 0 | const Http::LowerCaseString lower_case_name(inline_header.inline_header_name()); |
318 | 0 | if (!canBeRegisteredAsInlineHeader(lower_case_name)) { |
319 | 0 | return absl::InvalidArgumentError( |
320 | 0 | fmt::format("Header {} cannot be registered as an inline header.", |
321 | 0 | inline_header.inline_header_name())); |
322 | 0 | } |
323 | 0 | switch (inline_header.inline_header_type()) { |
324 | 0 | case envoy::config::bootstrap::v3::CustomInlineHeader::REQUEST_HEADER: |
325 | 0 | Http::CustomInlineHeaderRegistry::registerInlineHeader< |
326 | 0 | Http::RequestHeaderMap::header_map_type>(lower_case_name); |
327 | 0 | break; |
328 | 0 | case envoy::config::bootstrap::v3::CustomInlineHeader::REQUEST_TRAILER: |
329 | 0 | Http::CustomInlineHeaderRegistry::registerInlineHeader< |
330 | 0 | Http::RequestTrailerMap::header_map_type>(lower_case_name); |
331 | 0 | break; |
332 | 0 | case envoy::config::bootstrap::v3::CustomInlineHeader::RESPONSE_HEADER: |
333 | 0 | Http::CustomInlineHeaderRegistry::registerInlineHeader< |
334 | 0 | Http::ResponseHeaderMap::header_map_type>(lower_case_name); |
335 | 0 | break; |
336 | 0 | case envoy::config::bootstrap::v3::CustomInlineHeader::RESPONSE_TRAILER: |
337 | 0 | Http::CustomInlineHeaderRegistry::registerInlineHeader< |
338 | 0 | Http::ResponseTrailerMap::header_map_type>(lower_case_name); |
339 | 0 | break; |
340 | 0 | default: |
341 | 0 | PANIC("not implemented"); |
342 | 0 | } |
343 | 0 | } |
344 | 4.98k | return absl::OkStatus(); |
345 | 4.98k | } |
346 | | |
347 | | } // namespace |
348 | | |
349 | | absl::Status InstanceUtil::loadBootstrapConfig( |
350 | | envoy::config::bootstrap::v3::Bootstrap& bootstrap, const Options& options, |
351 | 10.5k | ProtobufMessage::ValidationVisitor& validation_visitor, Api::Api& api) { |
352 | 10.5k | const std::string& config_path = options.configPath(); |
353 | 10.5k | const std::string& config_yaml = options.configYaml(); |
354 | 10.5k | const envoy::config::bootstrap::v3::Bootstrap& config_proto = options.configProto(); |
355 | | |
356 | | // One of config_path and config_yaml or bootstrap should be specified. |
357 | 10.5k | if (config_path.empty() && config_yaml.empty() && config_proto.ByteSizeLong() == 0) { |
358 | 0 | return absl::InvalidArgumentError( |
359 | 0 | "At least one of --config-path or --config-yaml or Options::configProto() " |
360 | 0 | "should be non-empty"); |
361 | 0 | } |
362 | | |
363 | 10.5k | if (!config_path.empty()) { |
364 | 10.5k | MessageUtil::loadFromFile(config_path, bootstrap, validation_visitor, api); |
365 | 10.5k | } |
366 | 10.5k | if (!config_yaml.empty()) { |
367 | 0 | envoy::config::bootstrap::v3::Bootstrap bootstrap_override; |
368 | 0 | #ifdef ENVOY_ENABLE_YAML |
369 | 0 | MessageUtil::loadFromYaml(config_yaml, bootstrap_override, validation_visitor); |
370 | | // TODO(snowp): The fact that we do a merge here doesn't seem to be covered under test. |
371 | | #else |
372 | | // Treat the yaml as proto |
373 | | Protobuf::TextFormat::ParseFromString(config_yaml, &bootstrap_override); |
374 | | #endif |
375 | 0 | bootstrap.MergeFrom(bootstrap_override); |
376 | 0 | } |
377 | 10.5k | if (config_proto.ByteSizeLong() != 0) { |
378 | 0 | bootstrap.MergeFrom(config_proto); |
379 | 0 | } |
380 | 10.5k | MessageUtil::validate(bootstrap, validation_visitor); |
381 | 10.5k | return absl::OkStatus(); |
382 | 10.5k | } |
383 | | |
384 | | void InstanceBase::initialize(Network::Address::InstanceConstSharedPtr local_address, |
385 | 4.98k | ComponentFactory& component_factory) { |
386 | 4.98k | std::function set_up_logger = [&] { |
387 | 0 | TRY_ASSERT_MAIN_THREAD { |
388 | 0 | file_logger_ = std::make_unique<Logger::FileSinkDelegate>( |
389 | 0 | options_.logPath(), access_log_manager_, Logger::Registry::getSink()); |
390 | 0 | } |
391 | 0 | END_TRY |
392 | 0 | CATCH(const EnvoyException& e, { |
393 | 0 | throw EnvoyException( |
394 | 0 | fmt::format("Failed to open log-file '{}'. e.what(): {}", options_.logPath(), e.what())); |
395 | 0 | }); |
396 | 0 | }; |
397 | | |
398 | 4.98k | TRY_ASSERT_MAIN_THREAD { |
399 | 4.98k | if (!options_.logPath().empty()) { |
400 | 0 | set_up_logger(); |
401 | 0 | } |
402 | 4.98k | restarter_.initialize(*dispatcher_, *this); |
403 | 4.98k | drain_manager_ = component_factory.createDrainManager(*this); |
404 | 4.98k | THROW_IF_NOT_OK(initializeOrThrow(std::move(local_address), component_factory)); |
405 | 4.98k | } |
406 | 4.98k | END_TRY |
407 | 4.98k | MULTI_CATCH( |
408 | 4.98k | const EnvoyException& e, |
409 | 4.98k | { |
410 | 4.98k | ENVOY_LOG(critical, "error initializing config '{} {} {}': {}", |
411 | 4.98k | options_.configProto().DebugString(), options_.configYaml(), |
412 | 4.98k | options_.configPath(), e.what()); |
413 | 4.98k | terminate(); |
414 | 4.98k | throw; |
415 | 4.98k | }, |
416 | 4.98k | { |
417 | 4.98k | ENVOY_LOG(critical, "error initializing due to unknown exception"); |
418 | 4.98k | terminate(); |
419 | 4.98k | throw; |
420 | 4.98k | }); |
421 | 3.04k | } |
422 | | |
423 | | absl::Status InstanceBase::initializeOrThrow(Network::Address::InstanceConstSharedPtr local_address, |
424 | 4.98k | ComponentFactory& component_factory) { |
425 | 4.98k | ENVOY_LOG(info, "initializing epoch {} (base id={}, hot restart version={})", |
426 | 4.98k | options_.restartEpoch(), restarter_.baseId(), restarter_.version()); |
427 | | |
428 | 4.98k | ENVOY_LOG(info, "statically linked extensions:"); |
429 | 366k | for (const auto& ext : Envoy::Registry::FactoryCategoryRegistry::registeredFactories()) { |
430 | 366k | ENVOY_LOG(info, " {}: {}", ext.first, absl::StrJoin(ext.second->registeredNames(), ", ")); |
431 | 366k | } |
432 | | |
433 | | // The main thread is registered for thread local updates so that code that does not care |
434 | | // whether it runs on the main thread or on workers can still use TLS. |
435 | | // We do this as early as possible because this has no side effect and could ensure that the |
436 | | // TLS always contains a valid main thread dispatcher when TLS is used. |
437 | 4.98k | thread_local_.registerThread(*dispatcher_, true); |
438 | | |
439 | | // Handle configuration that needs to take place prior to the main configuration load. |
440 | 4.98k | RETURN_IF_NOT_OK(InstanceUtil::loadBootstrapConfig( |
441 | 4.98k | bootstrap_, options_, messageValidationContext().staticValidationVisitor(), *api_)); |
442 | 4.98k | bootstrap_config_update_time_ = time_source_.systemTime(); |
443 | | |
444 | 4.98k | if (bootstrap_.has_application_log_config()) { |
445 | 515 | RETURN_IF_NOT_OK( |
446 | 515 | Utility::assertExclusiveLogFormatMethod(options_, bootstrap_.application_log_config())); |
447 | 515 | RETURN_IF_NOT_OK(Utility::maybeSetApplicationLogFormat(bootstrap_.application_log_config())); |
448 | 513 | } |
449 | | |
450 | | #ifdef ENVOY_PERFETTO |
451 | | perfetto::TracingInitArgs args; |
452 | | // Include in-process events only. |
453 | | args.backends = perfetto::kInProcessBackend; |
454 | | perfetto::Tracing::Initialize(args); |
455 | | perfetto::TrackEvent::Register(); |
456 | | |
457 | | // Prepare a configuration for a new "Perfetto" tracing session. |
458 | | perfetto::TraceConfig cfg; |
459 | | // TODO(rojkov): make the tracer configurable with either "Perfetto"'s native |
460 | | // message or custom one embedded into Bootstrap. |
461 | | cfg.add_buffers()->set_size_kb(1024); |
462 | | auto* ds_cfg = cfg.add_data_sources()->mutable_config(); |
463 | | ds_cfg->set_name("track_event"); |
464 | | |
465 | | const std::string pftrace_path = |
466 | | PROTOBUF_GET_STRING_OR_DEFAULT(bootstrap_, perf_tracing_file_path, "envoy.pftrace"); |
467 | | // Instantiate a new tracing session. |
468 | | tracing_session_ = perfetto::Tracing::NewTrace(); |
469 | | tracing_fd_ = open(pftrace_path.c_str(), O_RDWR | O_CREAT | O_TRUNC, 0600); |
470 | | if (tracing_fd_ == -1) { |
471 | | return absl::InvalidArgumentError( |
472 | | fmt::format("unable to open tracing file {}: {}", pftrace_path, errorDetails(errno))); |
473 | | } |
474 | | // Configure the tracing session. |
475 | | tracing_session_->Setup(cfg, tracing_fd_); |
476 | | // Enable tracing and block until tracing has started. |
477 | | tracing_session_->StartBlocking(); |
478 | | #endif |
479 | | |
480 | | // Immediate after the bootstrap has been loaded, override the header prefix, if configured to |
481 | | // do so. This must be set before any other code block references the HeaderValues ConstSingleton. |
482 | 4.98k | if (!bootstrap_.header_prefix().empty()) { |
483 | | // setPrefix has a release assert verifying that setPrefix() is not called after prefix() |
484 | 0 | ThreadSafeSingleton<Http::PrefixValue>::get().setPrefix(bootstrap_.header_prefix().c_str()); |
485 | 0 | } |
486 | | |
487 | | // Register Custom O(1) headers from bootstrap. |
488 | 4.98k | RETURN_IF_NOT_OK(registerCustomInlineHeadersFromBootstrap(bootstrap_)); |
489 | | |
490 | 4.98k | ENVOY_LOG(info, "HTTP header map info:"); |
491 | 19.9k | for (const auto& info : Http::HeaderMapImplUtility::getAllHeaderMapImplInfo()) { |
492 | 19.9k | ENVOY_LOG(info, " {}: {} bytes: {}", info.name_, info.size_, |
493 | 19.9k | absl::StrJoin(info.registered_headers_, ",")); |
494 | 19.9k | } |
495 | | |
496 | | // Initialize the regex engine and inject to singleton. |
497 | | // Needs to happen before stats store initialization because the stats |
498 | | // matcher config can include regexes. |
499 | 4.98k | regex_engine_ = createRegexEngine( |
500 | 4.98k | bootstrap_, messageValidationContext().staticValidationVisitor(), serverFactoryContext()); |
501 | | |
502 | | // Needs to happen as early as possible in the instantiation to preempt the objects that require |
503 | | // stats. |
504 | 4.98k | auto producer_or_error = |
505 | 4.98k | Stats::TagProducerImpl::createTagProducer(bootstrap_.stats_config(), options_.statsTags()); |
506 | 4.98k | RETURN_IF_NOT_OK_REF(producer_or_error.status()); |
507 | 4.61k | stats_store_.setTagProducer(std::move(producer_or_error.value())); |
508 | 4.61k | stats_store_.setStatsMatcher(std::make_unique<Stats::StatsMatcherImpl>( |
509 | 4.61k | bootstrap_.stats_config(), stats_store_.symbolTable(), server_contexts_)); |
510 | 4.61k | stats_store_.setHistogramSettings( |
511 | 4.61k | std::make_unique<Stats::HistogramSettingsImpl>(bootstrap_.stats_config(), server_contexts_)); |
512 | | |
513 | 4.61k | const std::string server_stats_prefix = "server."; |
514 | 4.61k | const std::string server_compilation_settings_stats_prefix = "server.compilation_settings"; |
515 | 4.61k | server_stats_ = std::make_unique<ServerStats>( |
516 | 4.61k | ServerStats{ALL_SERVER_STATS(POOL_COUNTER_PREFIX(stats_store_, server_stats_prefix), |
517 | 4.61k | POOL_GAUGE_PREFIX(stats_store_, server_stats_prefix), |
518 | 4.61k | POOL_HISTOGRAM_PREFIX(stats_store_, server_stats_prefix))}); |
519 | 4.61k | server_compilation_settings_stats_ = |
520 | 4.61k | std::make_unique<CompilationSettings::ServerCompilationSettingsStats>( |
521 | 4.61k | CompilationSettings::ServerCompilationSettingsStats{ALL_SERVER_COMPILATION_SETTINGS_STATS( |
522 | 4.61k | POOL_COUNTER_PREFIX(stats_store_, server_compilation_settings_stats_prefix), |
523 | 4.61k | POOL_GAUGE_PREFIX(stats_store_, server_compilation_settings_stats_prefix), |
524 | 4.61k | POOL_HISTOGRAM_PREFIX(stats_store_, server_compilation_settings_stats_prefix))}); |
525 | 4.61k | validation_context_.setCounters(server_stats_->static_unknown_fields_, |
526 | 4.61k | server_stats_->dynamic_unknown_fields_, |
527 | 4.61k | server_stats_->wip_protos_); |
528 | | |
529 | 4.61k | memory_allocator_manager_ = std::make_unique<Memory::AllocatorManager>( |
530 | 4.61k | *api_, *stats_store_.rootScope(), bootstrap_.memory_allocator_manager()); |
531 | | |
532 | 4.61k | initialization_timer_ = std::make_unique<Stats::HistogramCompletableTimespanImpl>( |
533 | 4.61k | server_stats_->initialization_time_ms_, timeSource()); |
534 | 4.61k | server_stats_->concurrency_.set(options_.concurrency()); |
535 | 4.61k | if (!options().hotRestartDisabled()) { |
536 | 2.49k | server_stats_->hot_restart_epoch_.set(options_.restartEpoch()); |
537 | 2.49k | } |
538 | 4.61k | InstanceBase::failHealthcheck(false); |
539 | | |
540 | | // Check if bootstrap has server version override set, if yes, we should use that as |
541 | | // 'server.version' stat. |
542 | 4.61k | uint64_t version_int; |
543 | 4.61k | if (bootstrap_.stats_server_version_override().value() > 0) { |
544 | 101 | version_int = bootstrap_.stats_server_version_override().value(); |
545 | 4.51k | } else { |
546 | 4.51k | if (!StringUtil::atoull(VersionInfo::revision().substr(0, 6).c_str(), version_int, 16)) { |
547 | 0 | return absl::InvalidArgumentError("compiled GIT SHA is invalid. Invalid build."); |
548 | 0 | } |
549 | 4.51k | } |
550 | 4.61k | server_stats_->version_.set(version_int); |
551 | 4.61k | if (VersionInfo::sslFipsCompliant()) { |
552 | 0 | server_compilation_settings_stats_->fips_mode_.set(1); |
553 | 4.61k | } else { |
554 | | // Set this explicitly so that "used" flag is set so that it can be pushed to stats sinks. |
555 | 4.61k | server_compilation_settings_stats_->fips_mode_.set(0); |
556 | 4.61k | } |
557 | | |
558 | | // If user has set user_agent_name in the bootstrap config, use it. |
559 | | // Default to "envoy" if unset. |
560 | 4.61k | if (bootstrap_.node().user_agent_name().empty()) { |
561 | 4.18k | bootstrap_.mutable_node()->set_user_agent_name("envoy"); |
562 | 4.18k | } |
563 | | |
564 | | // If user has set user_agent_build_version in the bootstrap config, use it. |
565 | | // Default to the internal server version. |
566 | 4.61k | if (!bootstrap_.node().user_agent_build_version().has_version()) { |
567 | 4.47k | *bootstrap_.mutable_node()->mutable_user_agent_build_version() = VersionInfo::buildVersion(); |
568 | 4.47k | } |
569 | | |
570 | 318k | for (const auto& ext : Envoy::Registry::FactoryCategoryRegistry::registeredFactories()) { |
571 | 318k | auto registered_types = ext.second->registeredTypes(); |
572 | 1.20M | for (const auto& name : ext.second->allRegisteredNames()) { |
573 | 1.20M | auto* extension = bootstrap_.mutable_node()->add_extensions(); |
574 | 1.20M | extension->set_name(std::string(name)); |
575 | 1.20M | extension->set_category(ext.first); |
576 | 1.20M | auto const version = ext.second->getFactoryVersion(name); |
577 | 1.20M | if (version) { |
578 | 0 | *extension->mutable_version() = version.value(); |
579 | 0 | } |
580 | 1.20M | extension->set_disabled(ext.second->isFactoryDisabled(name)); |
581 | 1.20M | auto it = registered_types.find(name); |
582 | 1.20M | if (it != registered_types.end()) { |
583 | 805k | std::sort(it->second.begin(), it->second.end()); |
584 | 864k | for (const auto& type_url : it->second) { |
585 | 864k | extension->add_type_urls(type_url); |
586 | 864k | } |
587 | 805k | } |
588 | 1.20M | } |
589 | 318k | } |
590 | | |
591 | 4.61k | local_info_ = std::make_unique<LocalInfo::LocalInfoImpl>( |
592 | 4.61k | stats().symbolTable(), bootstrap_.node(), bootstrap_.node_context_params(), local_address, |
593 | 4.61k | options_.serviceZone(), options_.serviceClusterName(), options_.serviceNodeName()); |
594 | | |
595 | 4.61k | absl::Status creation_status; |
596 | 4.61k | Configuration::InitialImpl initial_config(bootstrap_, creation_status); |
597 | 4.61k | RETURN_IF_NOT_OK(creation_status); |
598 | | |
599 | | // Learn original_start_time_ if our parent is still around to inform us of it. |
600 | 4.61k | const auto parent_admin_shutdown_response = restarter_.sendParentAdminShutdownRequest(); |
601 | 4.61k | if (parent_admin_shutdown_response.has_value()) { |
602 | 0 | original_start_time_ = parent_admin_shutdown_response.value().original_start_time_; |
603 | | // TODO(soulxu): This is added for switching the reuse port default value as true (#17259). |
604 | | // It ensures the same default value during the hot restart. This can be removed when |
605 | | // everyone switches to the new default value. |
606 | 0 | enable_reuse_port_default_ = |
607 | 0 | parent_admin_shutdown_response.value().enable_reuse_port_default_ ? true : false; |
608 | 0 | } |
609 | | |
610 | 4.61k | OptRef<Server::ConfigTracker> config_tracker; |
611 | 4.61k | #ifdef ENVOY_ADMIN_FUNCTIONALITY |
612 | 4.61k | admin_ = std::make_shared<AdminImpl>(initial_config.admin().profilePath(), *this, |
613 | 4.61k | initial_config.admin().ignoreGlobalConnLimit()); |
614 | | |
615 | 4.61k | config_tracker = admin_->getConfigTracker(); |
616 | 4.61k | #endif |
617 | 4.61k | secret_manager_ = std::make_unique<Secret::SecretManagerImpl>(config_tracker); |
618 | | |
619 | 4.61k | loadServerFlags(initial_config.flagsPath()); |
620 | | |
621 | | // Initialize the overload manager early so other modules can register for actions. |
622 | 4.61k | auto overload_manager_or_error = createOverloadManager(); |
623 | 4.61k | RETURN_IF_NOT_OK(overload_manager_or_error.status()); |
624 | 4.61k | overload_manager_ = std::move(*overload_manager_or_error); |
625 | 4.61k | null_overload_manager_ = createNullOverloadManager(); |
626 | | |
627 | 4.61k | maybeCreateHeapShrinker(); |
628 | | |
629 | 4.61k | for (const auto& bootstrap_extension : bootstrap_.bootstrap_extensions()) { |
630 | 1 | auto& factory = Config::Utility::getAndCheckFactory<Configuration::BootstrapExtensionFactory>( |
631 | 1 | bootstrap_extension); |
632 | 1 | auto config = Config::Utility::translateAnyToFactoryConfig( |
633 | 1 | bootstrap_extension.typed_config(), messageValidationContext().staticValidationVisitor(), |
634 | 1 | factory); |
635 | 1 | bootstrap_extensions_.push_back( |
636 | 1 | factory.createBootstrapExtension(*config, serverFactoryContext())); |
637 | 1 | } |
638 | | |
639 | | // Register the fatal actions. |
640 | 4.61k | { |
641 | 4.61k | FatalAction::FatalActionPtrList safe_actions; |
642 | 4.61k | FatalAction::FatalActionPtrList unsafe_actions; |
643 | 4.61k | for (const auto& action_config : bootstrap_.fatal_actions()) { |
644 | 18 | auto& factory = |
645 | 18 | Config::Utility::getAndCheckFactory<Server::Configuration::FatalActionFactory>( |
646 | 18 | action_config.config()); |
647 | 18 | auto action = factory.createFatalActionFromProto(action_config, this); |
648 | | |
649 | 18 | if (action->isAsyncSignalSafe()) { |
650 | 0 | safe_actions.push_back(std::move(action)); |
651 | 18 | } else { |
652 | 18 | unsafe_actions.push_back(std::move(action)); |
653 | 18 | } |
654 | 18 | } |
655 | 4.61k | Envoy::FatalErrorHandler::registerFatalActions( |
656 | 4.61k | std::move(safe_actions), std::move(unsafe_actions), api_->threadFactory()); |
657 | 4.61k | } |
658 | | |
659 | 4.61k | if (!bootstrap_.default_socket_interface().empty()) { |
660 | 218 | auto& sock_name = bootstrap_.default_socket_interface(); |
661 | 218 | auto sock = const_cast<Network::SocketInterface*>(Network::socketInterface(sock_name)); |
662 | 218 | if (sock != nullptr) { |
663 | 0 | Network::SocketInterfaceSingleton::clear(); |
664 | 0 | Network::SocketInterfaceSingleton::initialize(sock); |
665 | 0 | } |
666 | 218 | } |
667 | | |
668 | 4.61k | ListenerManagerFactory* listener_manager_factory = nullptr; |
669 | 4.61k | if (bootstrap_.has_listener_manager()) { |
670 | 2 | listener_manager_factory = Config::Utility::getAndCheckFactory<ListenerManagerFactory>( |
671 | 2 | bootstrap_.listener_manager(), false); |
672 | 4.61k | } else { |
673 | 4.61k | listener_manager_factory = &Config::Utility::getAndCheckFactoryByName<ListenerManagerFactory>( |
674 | 4.61k | Config::ServerExtensionValues::get().DEFAULT_LISTENER); |
675 | 4.61k | } |
676 | | |
677 | | // Workers get created first so they register for thread local updates. |
678 | 4.61k | listener_manager_ = listener_manager_factory->createListenerManager( |
679 | 4.61k | *this, nullptr, worker_factory_, bootstrap_.enable_dispatcher_stats(), quic_stat_names_); |
680 | | |
681 | | // We can now initialize stats for threading. |
682 | 4.61k | stats_store_.initializeThreading(*dispatcher_, thread_local_); |
683 | | |
684 | | // It's now safe to start writing stats from the main thread's dispatcher. |
685 | 4.61k | if (bootstrap_.enable_dispatcher_stats()) { |
686 | 170 | dispatcher_->initializeStats(*stats_store_.rootScope(), "server."); |
687 | 170 | } |
688 | | |
689 | | // The broad order of initialization from this point on is the following: |
690 | | // 1. Statically provisioned configuration (bootstrap) are loaded. |
691 | | // 2. Cluster manager is created and all primary clusters (i.e. with endpoint assignments |
692 | | // provisioned statically in bootstrap, discovered through DNS or file based CDS) are |
693 | | // initialized. |
694 | | // 3. Various services are initialized and configured using the bootstrap config. |
695 | | // 4. RTDS is initialized using primary clusters. This allows runtime overrides to be fully |
696 | | // configured before the rest of xDS configuration is provisioned. |
697 | | // 5. Secondary clusters (with endpoint assignments provisioned by xDS servers) are initialized. |
698 | | // 6. The rest of the dynamic configuration is provisioned. |
699 | | // |
700 | | // Please note: this order requires that RTDS is provisioned using a primary cluster. If RTDS is |
701 | | // provisioned through ADS then ADS must use primary cluster as well. This invariant is enforced |
702 | | // during RTDS initialization and invalid configuration will be rejected. |
703 | | |
704 | | // Runtime gets initialized before the main configuration since during main configuration |
705 | | // load things may grab a reference to the loader for later use. |
706 | 4.61k | runtime_ = component_factory.createRuntime(*this, initial_config); |
707 | 4.61k | validation_context_.setRuntime(runtime()); |
708 | | |
709 | 4.61k | if (!runtime().snapshot().getBoolean("envoy.disallow_global_stats", false)) { |
710 | 4.40k | assert_action_registration_ = Assert::addDebugAssertionFailureRecordAction( |
711 | 4.40k | [this](const char*) { server_stats_->debug_assertion_failures_.inc(); }); |
712 | 4.40k | envoy_bug_action_registration_ = Assert::addEnvoyBugFailureRecordAction( |
713 | 4.40k | [this](const char*) { server_stats_->envoy_bug_failures_.inc(); }); |
714 | 4.40k | } |
715 | | |
716 | 4.61k | if (initial_config.admin().address()) { |
717 | 1.97k | #ifdef ENVOY_ADMIN_FUNCTIONALITY |
718 | | // Admin instance always be created if admin support is not compiled out. |
719 | 1.97k | RELEASE_ASSERT(admin_ != nullptr, "Admin instance should be created but actually not."); |
720 | 1.97k | auto typed_admin = dynamic_cast<AdminImpl*>(admin_.get()); |
721 | 1.97k | RELEASE_ASSERT(typed_admin != nullptr, "Admin implementation is not an AdminImpl."); |
722 | 1.97k | initial_config.initAdminAccessLog(bootstrap_, typed_admin->factoryContext()); |
723 | 1.97k | admin_->startHttpListener(initial_config.admin().accessLogs(), initial_config.admin().address(), |
724 | 1.97k | initial_config.admin().socketOptions()); |
725 | | #else |
726 | | return absl::InvalidArgumentError("Admin address configured but admin support compiled out"); |
727 | | #endif |
728 | 2.63k | } else { |
729 | 2.63k | ENVOY_LOG(info, "No admin address given, so no admin HTTP server started."); |
730 | 2.63k | } |
731 | 4.61k | if (admin_) { |
732 | 4.40k | config_tracker_entry_ = admin_->getConfigTracker().add( |
733 | 4.40k | "bootstrap", [this](const Matchers::StringMatcher&) { return dumpBootstrapConfig(); }); |
734 | 4.40k | } |
735 | 4.61k | if (initial_config.admin().address()) { |
736 | 1.97k | admin_->addListenerToHandler(handler_.get()); |
737 | 1.97k | } |
738 | | |
739 | | // Once we have runtime we can initialize the SSL context manager. |
740 | 4.61k | ssl_context_manager_ = |
741 | 4.61k | std::make_unique<Extensions::TransportSockets::Tls::ContextManagerImpl>(server_contexts_); |
742 | | |
743 | 4.61k | http_server_properties_cache_manager_ = |
744 | 4.61k | std::make_unique<Http::HttpServerPropertiesCacheManagerImpl>( |
745 | 4.61k | serverFactoryContext(), messageValidationContext().staticValidationVisitor(), |
746 | 4.61k | thread_local_); |
747 | | |
748 | 4.61k | cluster_manager_factory_ = std::make_unique<Upstream::ProdClusterManagerFactory>( |
749 | 4.61k | serverFactoryContext(), stats_store_, thread_local_, http_context_, |
750 | 4.61k | [this]() -> Network::DnsResolverSharedPtr { return this->getOrCreateDnsResolver(); }, |
751 | 4.61k | *ssl_context_manager_, *secret_manager_, quic_stat_names_, *this); |
752 | | |
753 | | // Now the configuration gets parsed. The configuration may start setting |
754 | | // thread local data per above. See MainImpl::initialize() for why ConfigImpl |
755 | | // is constructed as part of the InstanceBase and then populated once |
756 | | // cluster_manager_factory_ is available. |
757 | 4.61k | RETURN_IF_NOT_OK(config_.initialize(bootstrap_, *this, *cluster_manager_factory_)); |
758 | | |
759 | | // Instruct the listener manager to create the LDS provider if needed. This must be done later |
760 | | // because various items do not yet exist when the listener manager is created. |
761 | 3.61k | if (bootstrap_.dynamic_resources().has_lds_config() || |
762 | 3.61k | !bootstrap_.dynamic_resources().lds_resources_locator().empty()) { |
763 | 2.27k | std::unique_ptr<xds::core::v3::ResourceLocator> lds_resources_locator; |
764 | 2.27k | if (!bootstrap_.dynamic_resources().lds_resources_locator().empty()) { |
765 | 11 | lds_resources_locator = std::make_unique<xds::core::v3::ResourceLocator>( |
766 | 11 | THROW_OR_RETURN_VALUE(Config::XdsResourceIdentifier::decodeUrl( |
767 | 11 | bootstrap_.dynamic_resources().lds_resources_locator()), |
768 | 11 | xds::core::v3::ResourceLocator)); |
769 | 11 | } |
770 | 2.27k | listener_manager_->createLdsApi(bootstrap_.dynamic_resources().lds_config(), |
771 | 2.27k | lds_resources_locator.get()); |
772 | 2.27k | } |
773 | | |
774 | | // We have to defer RTDS initialization until after the cluster manager is |
775 | | // instantiated (which in turn relies on runtime...). |
776 | 3.61k | RETURN_IF_NOT_OK(runtime().initialize(clusterManager())); |
777 | | |
778 | 3.44k | clusterManager().setPrimaryClustersInitializedCb( |
779 | 3.44k | [this]() { onClusterManagerPrimaryInitializationComplete(); }); |
780 | | |
781 | 3.44k | auto& stats_config = config_.statsConfig(); |
782 | 3.44k | for (const Stats::SinkPtr& sink : stats_config.sinks()) { |
783 | 0 | stats_store_.addSink(*sink); |
784 | 0 | } |
785 | 3.44k | if (!stats_config.flushOnAdmin()) { |
786 | | // Some of the stat sinks may need dispatcher support so don't flush until the main loop starts. |
787 | | // Just setup the timer. |
788 | 3.02k | stat_flush_timer_ = dispatcher_->createTimer([this]() -> void { flushStats(); }); |
789 | 3.02k | stat_flush_timer_->enableTimer(stats_config.flushInterval()); |
790 | 3.02k | } |
791 | | |
792 | | // Now that we are initialized, notify the bootstrap extensions. |
793 | 3.44k | for (auto&& bootstrap_extension : bootstrap_extensions_) { |
794 | 0 | bootstrap_extension->onServerInitialized(); |
795 | 0 | } |
796 | | |
797 | | // GuardDog (deadlock detection) object and thread setup before workers are |
798 | | // started and before our own run() loop runs. |
799 | 3.44k | main_thread_guard_dog_ = maybeCreateGuardDog("main_thread"); |
800 | 3.44k | worker_guard_dog_ = maybeCreateGuardDog("workers"); |
801 | 3.44k | return absl::OkStatus(); |
802 | 3.61k | } |
803 | | |
804 | 3.08k | void InstanceBase::onClusterManagerPrimaryInitializationComplete() { |
805 | | // If RTDS was not configured the `onRuntimeReady` callback is immediately invoked. |
806 | 3.08k | runtime().startRtdsSubscriptions([this]() { onRuntimeReady(); }); |
807 | 3.08k | } |
808 | | |
809 | 2.79k | void InstanceBase::onRuntimeReady() { |
810 | | // Begin initializing secondary clusters after RTDS configuration has been applied. |
811 | | // Initializing can throw exceptions, so catch these. |
812 | 2.79k | TRY_ASSERT_MAIN_THREAD { |
813 | 2.79k | THROW_IF_NOT_OK(clusterManager().initializeSecondaryClusters(bootstrap_)); |
814 | 2.79k | } |
815 | 2.79k | END_TRY |
816 | 2.79k | CATCH(const EnvoyException& e, { |
817 | 2.79k | ENVOY_LOG(warn, "Skipping initialization of secondary cluster: {}", e.what()); |
818 | 2.79k | shutdown(); |
819 | 2.79k | }); |
820 | | |
821 | 2.79k | if (bootstrap_.has_hds_config()) { |
822 | 262 | const auto& hds_config = bootstrap_.hds_config(); |
823 | 262 | async_client_manager_ = std::make_unique<Grpc::AsyncClientManagerImpl>( |
824 | 262 | *config_.clusterManager(), thread_local_, server_contexts_, grpc_context_.statNames(), |
825 | 262 | bootstrap_.grpc_async_client_manager_config()); |
826 | 262 | TRY_ASSERT_MAIN_THREAD { |
827 | 262 | THROW_IF_NOT_OK(Config::Utility::checkTransportVersion(hds_config)); |
828 | | // HDS does not support xDS-Failover. |
829 | 261 | auto factory_or_error = Config::Utility::factoryForGrpcApiConfigSource( |
830 | 261 | *async_client_manager_, hds_config, *stats_store_.rootScope(), false, 0); |
831 | 261 | THROW_IF_NOT_OK_REF(factory_or_error.status()); |
832 | 212 | hds_delegate_ = std::make_unique<Upstream::HdsDelegate>( |
833 | 212 | serverFactoryContext(), *stats_store_.rootScope(), |
834 | 212 | factory_or_error.value()->createUncachedRawAsyncClient(), stats_store_, |
835 | 212 | *ssl_context_manager_, info_factory_); |
836 | 212 | } |
837 | 212 | END_TRY |
838 | 262 | CATCH(const EnvoyException& e, { |
839 | 262 | ENVOY_LOG(warn, "Skipping initialization of HDS cluster: {}", e.what()); |
840 | 262 | shutdown(); |
841 | 262 | }); |
842 | 262 | } |
843 | | |
844 | | // TODO (nezdolik): Fully deprecate this runtime key in the next release. |
845 | 2.79k | if (runtime().snapshot().get(Runtime::Keys::GlobalMaxCxRuntimeKey)) { |
846 | 0 | ENVOY_LOG(warn, |
847 | 0 | "Usage of the deprecated runtime key {}, consider switching to " |
848 | 0 | "`envoy.resource_monitors.global_downstream_max_connections` instead." |
849 | 0 | "This runtime key will be removed in future.", |
850 | 0 | Runtime::Keys::GlobalMaxCxRuntimeKey); |
851 | 0 | } |
852 | 2.79k | } |
853 | | |
854 | 1.97k | void InstanceBase::startWorkers() { |
855 | | // The callback will be called after workers are started. |
856 | 1.97k | THROW_IF_NOT_OK( |
857 | 1.97k | listener_manager_->startWorkers(makeOptRefFromPtr(worker_guard_dog_.get()), [this]() { |
858 | 1.97k | if (isShutdown()) { |
859 | 1.97k | return; |
860 | 1.97k | } |
861 | | |
862 | 1.97k | initialization_timer_->complete(); |
863 | | // Update server stats as soon as initialization is done. |
864 | 1.97k | updateServerStats(); |
865 | 1.97k | workers_started_ = true; |
866 | 1.97k | hooks_.onWorkersStarted(); |
867 | | // At this point we are ready to take traffic and all listening ports are up. Notify our |
868 | | // parent if applicable that they can stop listening and drain. |
869 | 1.97k | restarter_.drainParentListeners(); |
870 | 1.97k | drain_manager_->startParentShutdownSequence(); |
871 | 1.97k | })); |
872 | 1.97k | } |
873 | | |
874 | | Runtime::LoaderPtr InstanceUtil::createRuntime(Instance& server, |
875 | 4.45k | Server::Configuration::Initial& config) { |
876 | 4.45k | #ifdef ENVOY_ENABLE_YAML |
877 | 4.45k | ENVOY_LOG(info, "runtime: {}", MessageUtil::getYamlStringFromMessage(config.runtime())); |
878 | 4.45k | #endif |
879 | 4.45k | absl::StatusOr<std::unique_ptr<Runtime::LoaderImpl>> loader = Runtime::LoaderImpl::create( |
880 | 4.45k | server.dispatcher(), server.threadLocal(), config.runtime(), server.localInfo(), |
881 | 4.45k | server.stats(), server.api().randomGenerator(), |
882 | 4.45k | server.messageValidationContext().dynamicValidationVisitor(), server.api()); |
883 | 4.45k | THROW_IF_NOT_OK(loader.status()); |
884 | 4.40k | return std::move(loader.value()); |
885 | 4.45k | } |
886 | | |
887 | 4.47k | void InstanceBase::loadServerFlags(const absl::optional<std::string>& flags_path) { |
888 | 4.47k | if (!flags_path) { |
889 | 3.92k | return; |
890 | 3.92k | } |
891 | | |
892 | 552 | ENVOY_LOG(info, "server flags path: {}", flags_path.value()); |
893 | 552 | if (api_->fileSystem().fileExists(flags_path.value() + "/drain")) { |
894 | 8 | ENVOY_LOG(info, "starting server in drain mode"); |
895 | 8 | InstanceBase::failHealthcheck(true); |
896 | 8 | } |
897 | 552 | } |
898 | | |
899 | | RunHelper::RunHelper(Instance& instance, const Options& options, Event::Dispatcher& dispatcher, |
900 | | Upstream::ClusterManager& cm, AccessLog::AccessLogManager& access_log_manager, |
901 | | Init::Manager& init_manager, OverloadManager& overload_manager, |
902 | | OverloadManager& null_overload_manager, std::function<void()> post_init_cb) |
903 | 1.97k | : init_watcher_("RunHelper", [&instance, post_init_cb]() { |
904 | 1.97k | if (!instance.isShutdown()) { |
905 | 1.97k | post_init_cb(); |
906 | 1.97k | } |
907 | 1.97k | }) { |
908 | | // Setup signals. |
909 | | // Since signals are not supported on Windows we have an internal definition for `SIGTERM` |
910 | | // On POSIX it resolves as expected to SIGTERM |
911 | | // On Windows we use it internally for all the console events that indicate that we should |
912 | | // terminate the process. |
913 | 1.97k | if (options.signalHandlingEnabled()) { |
914 | 1.97k | sigterm_ = dispatcher.listenForSignal(ENVOY_SIGTERM, [&instance]() { |
915 | 0 | ENVOY_LOG(warn, "caught ENVOY_SIGTERM"); |
916 | 0 | instance.shutdown(); |
917 | 0 | }); |
918 | 1.97k | #ifndef WIN32 |
919 | 1.97k | sigint_ = dispatcher.listenForSignal(SIGINT, [&instance]() { |
920 | 0 | ENVOY_LOG(warn, "caught SIGINT"); |
921 | 0 | instance.shutdown(); |
922 | 0 | }); |
923 | | |
924 | 1.97k | sig_usr_1_ = dispatcher.listenForSignal(SIGUSR1, [&access_log_manager]() { |
925 | 0 | ENVOY_LOG(info, "caught SIGUSR1. Reopening access logs."); |
926 | 0 | access_log_manager.reopen(); |
927 | 0 | }); |
928 | | |
929 | 1.97k | sig_hup_ = dispatcher.listenForSignal(SIGHUP, []() { |
930 | 0 | ENVOY_LOG(warn, "caught and eating SIGHUP. See documentation for how to hot restart."); |
931 | 0 | }); |
932 | 1.97k | #endif |
933 | 1.97k | } |
934 | | |
935 | | // Start overload manager before workers. |
936 | 1.97k | overload_manager.start(); |
937 | 1.97k | null_overload_manager.start(); |
938 | | |
939 | | // If there is no global limit to the number of active connections, warn on startup. |
940 | 1.97k | if (!overload_manager.getThreadLocalOverloadState().isResourceMonitorEnabled( |
941 | 1.97k | Server::OverloadProactiveResourceName::GlobalDownstreamMaxConnections)) { |
942 | 1.97k | ENVOY_LOG( |
943 | 1.97k | warn, |
944 | 1.97k | "There is no configured limit to the number of allowed active downstream " |
945 | 1.97k | "connections. Configure a " |
946 | 1.97k | "limit in `envoy.resource_monitors.global_downstream_max_connections` resource monitor."); |
947 | 1.97k | } |
948 | | |
949 | | // Register for cluster manager init notification. We don't start serving worker traffic until |
950 | | // upstream clusters are initialized which may involve running the event loop. Note however that |
951 | | // this can fire immediately if all clusters have already initialized. Also note that we need |
952 | | // to guard against shutdown at two different levels since SIGTERM can come in once the run loop |
953 | | // starts. |
954 | 1.97k | cm.setInitializedCb([&instance, &init_manager, &cm, this]() { |
955 | 1.97k | if (instance.isShutdown()) { |
956 | 0 | return; |
957 | 0 | } |
958 | | |
959 | 1.97k | const auto type_url = Config::getTypeUrl<envoy::config::route::v3::RouteConfiguration>(); |
960 | | // Pause RDS to ensure that we don't send any requests until we've |
961 | | // subscribed to all the RDS resources. The subscriptions happen in the init callbacks, |
962 | | // so we pause RDS until we've completed all the callbacks. |
963 | 1.97k | Config::ScopedResume maybe_resume_rds; |
964 | 1.97k | if (cm.adsMux()) { |
965 | 1.97k | maybe_resume_rds = cm.adsMux()->pause(type_url); |
966 | 1.97k | } |
967 | | |
968 | 1.97k | ENVOY_LOG(info, "all clusters initialized. initializing init manager"); |
969 | 1.97k | init_manager.initialize(init_watcher_); |
970 | | |
971 | | // Now that we're execute all the init callbacks we can resume RDS |
972 | | // as we've subscribed to all the statically defined RDS resources. |
973 | | // This is done by tearing down the maybe_resume_rds Cleanup object. |
974 | 1.97k | }); |
975 | 1.97k | } |
976 | | |
977 | 1.97k | void InstanceBase::run() { |
978 | | // RunHelper exists primarily to facilitate testing of how we respond to early shutdown during |
979 | | // startup (see RunHelperTest in server_test.cc). |
980 | 1.97k | const auto run_helper = |
981 | 1.97k | RunHelper(*this, options_, *dispatcher_, clusterManager(), access_log_manager_, init_manager_, |
982 | 1.97k | overloadManager(), nullOverloadManager(), [this] { |
983 | 1.97k | notifyCallbacksForStage(Stage::PostInit); |
984 | 1.97k | startWorkers(); |
985 | 1.97k | }); |
986 | | |
987 | | // Run the main dispatch loop waiting to exit. |
988 | 1.97k | ENVOY_LOG(info, "starting main dispatch loop"); |
989 | 1.97k | WatchDogSharedPtr watchdog; |
990 | 1.97k | if (main_thread_guard_dog_) { |
991 | 1.97k | watchdog = main_thread_guard_dog_->createWatchDog(api_->threadFactory().currentThreadId(), |
992 | 1.97k | "main_thread", *dispatcher_); |
993 | 1.97k | } |
994 | 1.97k | dispatcher_->post([this] { notifyCallbacksForStage(Stage::Startup); }); |
995 | 1.97k | dispatcher_->run(Event::Dispatcher::RunType::Block); |
996 | 1.97k | ENVOY_LOG(info, "main dispatch loop exited"); |
997 | 1.97k | if (main_thread_guard_dog_) { |
998 | 1.97k | main_thread_guard_dog_->stopWatching(watchdog); |
999 | 1.97k | } |
1000 | 1.97k | watchdog.reset(); |
1001 | | |
1002 | 1.97k | terminate(); |
1003 | 1.97k | } |
1004 | | |
1005 | 8.90k | void InstanceBase::terminate() { |
1006 | 8.90k | if (terminated_) { |
1007 | 3.92k | return; |
1008 | 3.92k | } |
1009 | 4.98k | terminated_ = true; |
1010 | | |
1011 | | // Before starting to shutdown anything else, stop slot destruction updates. |
1012 | 4.98k | thread_local_.shutdownGlobalThreading(); |
1013 | | |
1014 | | // Before the workers start exiting we should disable stat threading. |
1015 | 4.98k | stats_store_.shutdownThreading(); |
1016 | | |
1017 | | // TODO: figure out the correct fix: https://github.com/envoyproxy/envoy/issues/15072. |
1018 | 4.98k | std::vector<std::string> muxes = { |
1019 | 4.98k | "envoy.config_mux.new_grpc_mux_factory", "envoy.config_mux.grpc_mux_factory", |
1020 | 4.98k | "envoy.config_mux.delta_grpc_mux_factory", "envoy.config_mux.sotw_grpc_mux_factory"}; |
1021 | 19.9k | for (const auto& name : muxes) { |
1022 | 19.9k | auto* factory = Config::Utility::getFactoryByName<Config::MuxFactory>(name); |
1023 | 19.9k | if (factory) { |
1024 | 19.9k | factory->shutdownAll(); |
1025 | 19.9k | } |
1026 | 19.9k | } |
1027 | | |
1028 | 4.98k | if (overload_manager_) { |
1029 | 4.47k | overload_manager_->stop(); |
1030 | 4.47k | } |
1031 | | |
1032 | | // Shutdown all the workers now that the main dispatch loop is done. |
1033 | 4.98k | if (listener_manager_ != nullptr) { |
1034 | 4.45k | listener_manager_->stopWorkers(); |
1035 | 4.45k | } |
1036 | | |
1037 | | // Only flush if we have not been hot restarted. |
1038 | 4.98k | if (stat_flush_timer_) { |
1039 | 3.02k | flushStats(); |
1040 | 3.02k | } |
1041 | | |
1042 | 4.98k | if (config_.clusterManager() != nullptr) { |
1043 | 4.31k | config_.clusterManager()->shutdown(); |
1044 | 4.31k | } |
1045 | 4.98k | handler_.reset(); |
1046 | 4.98k | thread_local_.shutdownThread(); |
1047 | 4.98k | restarter_.shutdown(); |
1048 | 4.98k | ENVOY_LOG(info, "exiting"); |
1049 | 4.98k | ENVOY_FLUSH_LOG(); |
1050 | 4.98k | FatalErrorHandler::clearFatalActionsOnTerminate(); |
1051 | 4.98k | } |
1052 | | |
1053 | 65.2k | Runtime::Loader& InstanceBase::runtime() { return *runtime_; } |
1054 | | |
1055 | 2.10k | void InstanceBase::shutdown() { |
1056 | 2.10k | ENVOY_LOG(info, "shutting down server instance"); |
1057 | 2.10k | shutdown_ = true; |
1058 | 2.10k | restarter_.sendParentTerminateRequest(); |
1059 | 2.10k | notifyCallbacksForStage(Stage::ShutdownExit, [this] { dispatcher_->exit(); }); |
1060 | 2.10k | } |
1061 | | |
1062 | 0 | void InstanceBase::shutdownAdmin() { |
1063 | 0 | ENVOY_LOG(warn, "shutting down admin due to child startup"); |
1064 | 0 | stat_flush_timer_.reset(); |
1065 | 0 | handler_->stopListeners(); |
1066 | 0 | if (admin_) { |
1067 | 0 | admin_->closeSocket(); |
1068 | 0 | } |
1069 | | |
1070 | | // If we still have a parent, it should be terminated now that we have a child. |
1071 | 0 | ENVOY_LOG(warn, "terminating parent process"); |
1072 | 0 | restarter_.sendParentTerminateRequest(); |
1073 | 0 | } |
1074 | | |
1075 | | ServerLifecycleNotifier::HandlePtr InstanceBase::registerCallback(Stage stage, |
1076 | 0 | StageCallback callback) { |
1077 | 0 | auto& callbacks = stage_callbacks_[stage]; |
1078 | 0 | return std::make_unique<LifecycleCallbackHandle<StageCallback>>(callbacks, callback); |
1079 | 0 | } |
1080 | | |
1081 | | ServerLifecycleNotifier::HandlePtr |
1082 | 0 | InstanceBase::registerCallback(Stage stage, StageCallbackWithCompletion callback) { |
1083 | 0 | ASSERT(stage == Stage::ShutdownExit); |
1084 | 0 | auto& callbacks = stage_completable_callbacks_[stage]; |
1085 | 0 | return std::make_unique<LifecycleCallbackHandle<StageCallbackWithCompletion>>(callbacks, |
1086 | 0 | callback); |
1087 | 0 | } |
1088 | | |
1089 | 6.05k | void InstanceBase::notifyCallbacksForStage(Stage stage, std::function<void()> completion_cb) { |
1090 | 6.05k | ASSERT_IS_MAIN_OR_TEST_THREAD(); |
1091 | 6.05k | const auto stage_it = stage_callbacks_.find(stage); |
1092 | 6.05k | if (stage_it != stage_callbacks_.end()) { |
1093 | 0 | LifecycleNotifierCallbacks& callbacks = stage_it->second; |
1094 | 0 | for (auto callback_it = callbacks.begin(); callback_it != callbacks.end();) { |
1095 | 0 | StageCallback callback = *callback_it; |
1096 | | // Increment the iterator before invoking the callback in case the |
1097 | | // callback deletes the handle which will unregister itself and |
1098 | | // invalidate this iterator if we're still pointing at it. |
1099 | 0 | ++callback_it; |
1100 | 0 | callback(); |
1101 | 0 | } |
1102 | 0 | } |
1103 | | |
1104 | | // Wrap completion_cb so that it only gets invoked when all callbacks for this stage |
1105 | | // have finished their work. |
1106 | 6.05k | std::shared_ptr<void> cb_guard( |
1107 | 6.05k | new Cleanup([this, completion_cb]() { dispatcher_->post(completion_cb); })); |
1108 | | |
1109 | | // Registrations which take a completion callback are typically implemented by executing a |
1110 | | // callback on all worker threads using Slot::runOnAllThreads which will hang indefinitely if |
1111 | | // worker threads have not been started so we need to skip notifications if envoy is shutdown |
1112 | | // early before workers have started. |
1113 | 6.05k | if (workers_started_) { |
1114 | 3.93k | const auto it2 = stage_completable_callbacks_.find(stage); |
1115 | 3.93k | if (it2 != stage_completable_callbacks_.end()) { |
1116 | 0 | ENVOY_LOG(info, "Notifying {} callback(s) with completion.", it2->second.size()); |
1117 | 0 | for (const StageCallbackWithCompletion& callback : it2->second) { |
1118 | 0 | callback([cb_guard] {}); |
1119 | 0 | } |
1120 | 0 | } |
1121 | 3.93k | } |
1122 | 6.05k | } |
1123 | | |
1124 | 0 | ProtobufTypes::MessagePtr InstanceBase::dumpBootstrapConfig() { |
1125 | 0 | auto config_dump = std::make_unique<envoy::admin::v3::BootstrapConfigDump>(); |
1126 | 0 | config_dump->mutable_bootstrap()->MergeFrom(bootstrap_); |
1127 | 0 | TimestampUtil::systemClockToTimestamp(bootstrap_config_update_time_, |
1128 | 0 | *(config_dump->mutable_last_updated())); |
1129 | 0 | return config_dump; |
1130 | 0 | } |
1131 | | |
1132 | 0 | Network::DnsResolverSharedPtr InstanceBase::getOrCreateDnsResolver() { |
1133 | 0 | if (!dns_resolver_) { |
1134 | 0 | envoy::config::core::v3::TypedExtensionConfig typed_dns_resolver_config; |
1135 | 0 | Network::DnsResolverFactory& dns_resolver_factory = |
1136 | 0 | Network::createDnsResolverFactoryFromProto(bootstrap_, typed_dns_resolver_config); |
1137 | 0 | dns_resolver_ = THROW_OR_RETURN_VALUE( |
1138 | 0 | dns_resolver_factory.createDnsResolver(dispatcher(), api(), typed_dns_resolver_config), |
1139 | 0 | Network::DnsResolverSharedPtr); |
1140 | 0 | } |
1141 | 0 | return dns_resolver_; |
1142 | 0 | } |
1143 | | |
1144 | 3.75k | bool InstanceBase::enableReusePortDefault() { return enable_reuse_port_default_; } |
1145 | | |
1146 | | } // namespace Server |
1147 | | } // namespace Envoy |