Line data Source code
1 : #include "source/extensions/health_checkers/http/health_checker_impl.h"
2 :
3 : #include <cstdint>
4 : #include <iterator>
5 : #include <memory>
6 :
7 : #include "envoy/config/core/v3/health_check.pb.h"
8 : #include "envoy/data/core/v3/health_check_event.pb.h"
9 : #include "envoy/server/health_checker_config.h"
10 : #include "envoy/type/v3/http.pb.h"
11 : #include "envoy/type/v3/range.pb.h"
12 :
13 : #include "source/common/buffer/zero_copy_input_stream_impl.h"
14 : #include "source/common/common/empty_string.h"
15 : #include "source/common/common/enum_to_int.h"
16 : #include "source/common/common/macros.h"
17 : #include "source/common/config/utility.h"
18 : #include "source/common/config/well_known_names.h"
19 : #include "source/common/grpc/common.h"
20 : #include "source/common/http/header_map_impl.h"
21 : #include "source/common/http/header_utility.h"
22 : #include "source/common/network/address_impl.h"
23 : #include "source/common/network/socket_impl.h"
24 : #include "source/common/network/utility.h"
25 : #include "source/common/router/router.h"
26 : #include "source/common/runtime/runtime_features.h"
27 : #include "source/common/upstream/host_utility.h"
28 :
29 : #include "absl/strings/match.h"
30 : #include "absl/strings/str_cat.h"
31 :
32 : namespace Envoy {
33 : namespace Upstream {
34 :
35 : namespace {
36 :
37 : envoy::config::core::v3::RequestMethod
38 24 : getMethod(const envoy::config::core::v3::RequestMethod config_method) {
39 24 : if (config_method == envoy::config::core::v3::METHOD_UNSPECIFIED) {
40 24 : return envoy::config::core::v3::GET;
41 24 : }
42 :
43 0 : return config_method;
44 24 : }
45 :
46 : } // namespace
47 :
48 : Upstream::HealthCheckerSharedPtr HttpHealthCheckerFactory::createCustomHealthChecker(
49 : const envoy::config::core::v3::HealthCheck& config,
50 0 : Server::Configuration::HealthCheckerFactoryContext& context) {
51 0 : return std::make_shared<ProdHttpHealthCheckerImpl>(
52 0 : context.cluster(), config, context.mainThreadDispatcher(), context.runtime(),
53 0 : context.api().randomGenerator(), context.eventLogger());
54 0 : }
55 :
56 : REGISTER_FACTORY(HttpHealthCheckerFactory, Server::Configuration::CustomHealthCheckerFactory);
57 :
58 : HttpHealthCheckerImpl::HttpHealthCheckerImpl(const Cluster& cluster,
59 : const envoy::config::core::v3::HealthCheck& config,
60 : Event::Dispatcher& dispatcher,
61 : Runtime::Loader& runtime,
62 : Random::RandomGenerator& random,
63 : HealthCheckEventLoggerPtr&& event_logger)
64 : : HealthCheckerImplBase(cluster, config, dispatcher, runtime, random, std::move(event_logger)),
65 : path_(config.http_health_check().path()), host_value_(config.http_health_check().host()),
66 : method_(getMethod(config.http_health_check().method())),
67 : response_buffer_size_(PROTOBUF_GET_WRAPPED_OR_DEFAULT(
68 : config.http_health_check(), response_buffer_size, kDefaultMaxBytesInBuffer)),
69 : request_headers_parser_(
70 : Router::HeaderParser::configure(config.http_health_check().request_headers_to_add(),
71 : config.http_health_check().request_headers_to_remove())),
72 : http_status_checker_(config.http_health_check().expected_statuses(),
73 : config.http_health_check().retriable_statuses(),
74 : static_cast<uint64_t>(Http::Code::OK)),
75 : codec_client_type_(codecClientType(config.http_health_check().codec_client_type())),
76 25 : random_generator_(random) {
77 25 : auto bytes_or_error = PayloadMatcher::loadProtoBytes(config.http_health_check().receive());
78 25 : THROW_IF_STATUS_NOT_OK(bytes_or_error, throw);
79 25 : receive_bytes_ = bytes_or_error.value();
80 25 : if (config.http_health_check().has_service_name_matcher()) {
81 20 : service_name_matcher_.emplace(config.http_health_check().service_name_matcher());
82 20 : }
83 :
84 25 : if (response_buffer_size_ != 0 && !receive_bytes_.empty()) {
85 1 : uint64_t total = 0;
86 1 : for (auto const& bytes : receive_bytes_) {
87 1 : total += bytes.size();
88 1 : }
89 1 : if (total > response_buffer_size_) {
90 0 : throw EnvoyException(fmt::format(
91 0 : "The expected response length '{}' is over than http health response buffer size '{}'",
92 0 : total, response_buffer_size_));
93 0 : }
94 1 : }
95 25 : }
96 :
97 : HttpHealthCheckerImpl::HttpStatusChecker::HttpStatusChecker(
98 : const Protobuf::RepeatedPtrField<envoy::type::v3::Int64Range>& expected_statuses,
99 : const Protobuf::RepeatedPtrField<envoy::type::v3::Int64Range>& retriable_statuses,
100 24 : uint64_t default_expected_status) {
101 24 : for (const auto& status_range : expected_statuses) {
102 2 : const auto start = static_cast<uint64_t>(status_range.start());
103 2 : const auto end = static_cast<uint64_t>(status_range.end());
104 :
105 2 : validateRange(start, end, "expected");
106 :
107 2 : expected_ranges_.emplace_back(std::make_pair(start, end));
108 2 : }
109 :
110 24 : if (expected_ranges_.empty()) {
111 22 : expected_ranges_.emplace_back(
112 22 : std::make_pair(default_expected_status, default_expected_status + 1));
113 22 : }
114 :
115 24 : for (const auto& status_range : retriable_statuses) {
116 0 : const auto start = static_cast<uint64_t>(status_range.start());
117 0 : const auto end = static_cast<uint64_t>(status_range.end());
118 :
119 0 : validateRange(start, end, "retriable");
120 :
121 0 : retriable_ranges_.emplace_back(std::make_pair(start, end));
122 0 : }
123 24 : }
124 :
125 : void HttpHealthCheckerImpl::HttpStatusChecker::validateRange(uint64_t start, uint64_t end,
126 2 : absl::string_view range_type) {
127 2 : if (start >= end) {
128 0 : throw EnvoyException(fmt::format("Invalid http {} status range: expecting start < "
129 0 : "end, but found start={} and end={}",
130 0 : range_type, start, end));
131 0 : }
132 :
133 2 : if (start < 100) {
134 1 : throw EnvoyException(
135 1 : fmt::format("Invalid http {} status range: expecting start >= 100, but found start={}",
136 1 : range_type, start));
137 1 : }
138 :
139 1 : if (end > 600) {
140 1 : throw EnvoyException(fmt::format(
141 1 : "Invalid http {} status range: expecting end <= 600, but found end={}", range_type, end));
142 1 : }
143 1 : }
144 :
145 1 : bool HttpHealthCheckerImpl::HttpStatusChecker::inRetriableRanges(uint64_t http_status) const {
146 1 : return inRanges(http_status, retriable_ranges_);
147 1 : }
148 :
149 18 : bool HttpHealthCheckerImpl::HttpStatusChecker::inExpectedRanges(uint64_t http_status) const {
150 18 : return inRanges(http_status, expected_ranges_);
151 18 : }
152 :
153 : bool HttpHealthCheckerImpl::HttpStatusChecker::inRanges(
154 19 : uint64_t http_status, const std::vector<std::pair<uint64_t, uint64_t>>& ranges) {
155 19 : for (const auto& range : ranges) {
156 18 : if (http_status >= range.first && http_status < range.second) {
157 17 : return true;
158 17 : }
159 18 : }
160 :
161 2 : return false;
162 19 : }
163 :
164 40 : Http::Protocol codecClientTypeToProtocol(Http::CodecType codec_client_type) {
165 40 : switch (codec_client_type) {
166 40 : case Http::CodecType::HTTP1:
167 40 : return Http::Protocol::Http11;
168 0 : case Http::CodecType::HTTP2:
169 0 : return Http::Protocol::Http2;
170 0 : case Http::CodecType::HTTP3:
171 0 : return Http::Protocol::Http3;
172 40 : }
173 0 : PANIC_DUE_TO_CORRUPT_ENUM
174 0 : }
175 :
176 18 : Http::Protocol HttpHealthCheckerImpl::protocol() const {
177 18 : return codecClientTypeToProtocol(codec_client_type_);
178 18 : }
179 :
180 : HttpHealthCheckerImpl::HttpActiveHealthCheckSession::HttpActiveHealthCheckSession(
181 : HttpHealthCheckerImpl& parent, const HostSharedPtr& host)
182 : : ActiveHealthCheckSession(parent, host), parent_(parent),
183 : response_body_(std::make_unique<Buffer::OwnedImpl>()),
184 : hostname_(
185 : HealthCheckerFactory::getHostname(host, parent_.host_value_, parent_.cluster_.info())),
186 :
187 : local_connection_info_provider_(std::make_shared<Network::ConnectionInfoSetterImpl>(
188 : Network::Utility::getCanonicalIpv4LoopbackAddress(),
189 : Network::Utility::getCanonicalIpv4LoopbackAddress())),
190 : protocol_(codecClientTypeToProtocol(parent_.codec_client_type_)), expect_reset_(false),
191 22 : reuse_connection_(false), request_in_flight_(false) {}
192 :
193 22 : HttpHealthCheckerImpl::HttpActiveHealthCheckSession::~HttpActiveHealthCheckSession() {
194 22 : ASSERT(client_ == nullptr);
195 22 : }
196 :
197 22 : void HttpHealthCheckerImpl::HttpActiveHealthCheckSession::onDeferredDelete() {
198 22 : if (client_) {
199 : // If there is an active request it will get reset, so make sure we ignore the reset.
200 15 : expect_reset_ = true;
201 15 : client_->close(Network::ConnectionCloseType::Abort);
202 15 : }
203 22 : }
204 :
205 : void HttpHealthCheckerImpl::HttpActiveHealthCheckSession::decodeHeaders(
206 18 : Http::ResponseHeaderMapPtr&& headers, bool end_stream) {
207 18 : ASSERT(!response_headers_);
208 18 : response_headers_ = std::move(headers);
209 18 : if (end_stream) {
210 17 : onResponseComplete();
211 17 : }
212 18 : }
213 :
214 : void HttpHealthCheckerImpl::HttpActiveHealthCheckSession::decodeData(Buffer::Instance& data,
215 1 : bool end_stream) {
216 1 : if (parent_.response_buffer_size_ != 0) {
217 1 : if (!parent_.receive_bytes_.empty() &&
218 1 : response_body_->length() < parent_.response_buffer_size_) {
219 1 : response_body_->move(data, parent_.response_buffer_size_ - response_body_->length());
220 1 : }
221 1 : } else {
222 0 : if (!parent_.receive_bytes_.empty()) {
223 0 : response_body_->move(data, data.length());
224 0 : }
225 0 : }
226 :
227 1 : if (end_stream) {
228 1 : onResponseComplete();
229 1 : }
230 1 : }
231 :
232 31 : void HttpHealthCheckerImpl::HttpActiveHealthCheckSession::onEvent(Network::ConnectionEvent event) {
233 31 : if (event == Network::ConnectionEvent::RemoteClose ||
234 31 : event == Network::ConnectionEvent::LocalClose) {
235 : // For the raw disconnect event, we are either between intervals in which case we already have
236 : // a timer setup, or we did the close or got a reset, in which case we already setup a new
237 : // timer. There is nothing to do here other than blow away the client.
238 31 : response_headers_.reset();
239 31 : response_body_->drain(response_body_->length());
240 31 : parent_.dispatcher_.deferredDelete(std::move(client_));
241 31 : }
242 31 : }
243 :
244 : // TODO(lilika) : Support connection pooling
245 32 : void HttpHealthCheckerImpl::HttpActiveHealthCheckSession::onInterval() {
246 32 : if (!client_) {
247 31 : Upstream::Host::CreateConnectionData conn =
248 31 : host_->createHealthCheckConnection(parent_.dispatcher_, parent_.transportSocketOptions(),
249 31 : parent_.transportSocketMatchMetadata().get());
250 31 : client_.reset(parent_.createCodecClient(conn));
251 31 : client_->addConnectionCallbacks(connection_callback_impl_);
252 31 : client_->setCodecConnectionCallbacks(http_connection_callback_impl_);
253 31 : expect_reset_ = false;
254 31 : reuse_connection_ = parent_.reuse_connection_;
255 31 : }
256 :
257 32 : Http::RequestEncoder* request_encoder = &client_->newStream(*this);
258 32 : request_encoder->getStream().addCallbacks(*this);
259 32 : request_in_flight_ = true;
260 :
261 32 : const auto request_headers = Http::createHeaderMap<Http::RequestHeaderMapImpl>(
262 32 : {{Http::Headers::get().Method, envoy::config::core::v3::RequestMethod_Name(parent_.method_)},
263 32 : {Http::Headers::get().Host, hostname_},
264 32 : {Http::Headers::get().Path, parent_.path_},
265 32 : {Http::Headers::get().UserAgent, Http::Headers::get().UserAgentValues.EnvoyHealthChecker}});
266 32 : Router::FilterUtility::setUpstreamScheme(
267 32 : *request_headers,
268 : // Here there is no downstream connection so scheme will be based on
269 : // upstream crypto
270 32 : host_->transportSocketFactory().implementsSecureTransport());
271 32 : StreamInfo::StreamInfoImpl stream_info(protocol_, parent_.dispatcher_.timeSource(),
272 32 : local_connection_info_provider_);
273 32 : stream_info.setUpstreamInfo(std::make_shared<StreamInfo::UpstreamInfoImpl>());
274 32 : stream_info.upstreamInfo()->setUpstreamHost(host_);
275 32 : parent_.request_headers_parser_->evaluateHeaders(*request_headers, stream_info);
276 32 : auto status = request_encoder->encodeHeaders(*request_headers, true);
277 : // Encoding will only fail if required request headers are missing.
278 32 : ASSERT(status.ok());
279 32 : }
280 :
281 : void HttpHealthCheckerImpl::HttpActiveHealthCheckSession::onResetStream(Http::StreamResetReason,
282 14 : absl::string_view) {
283 14 : request_in_flight_ = false;
284 14 : ENVOY_CONN_LOG(debug, "connection/stream error health_flags={}", *client_,
285 14 : HostUtility::healthFlagsToString(*host_));
286 14 : if (expect_reset_) {
287 9 : return;
288 9 : }
289 :
290 5 : if (client_ && !reuse_connection_) {
291 0 : client_->close(Network::ConnectionCloseType::Abort);
292 0 : }
293 :
294 5 : handleFailure(envoy::data::core::v3::NETWORK);
295 5 : }
296 :
297 : void HttpHealthCheckerImpl::HttpActiveHealthCheckSession::onGoAway(
298 0 : Http::GoAwayErrorCode error_code) {
299 0 : ENVOY_CONN_LOG(debug, "connection going away goaway_code={}, health_flags={}", *client_,
300 0 : static_cast<int>(error_code), HostUtility::healthFlagsToString(*host_));
301 :
302 0 : if (request_in_flight_ && error_code == Http::GoAwayErrorCode::NoError) {
303 : // The server is starting a graceful shutdown. Allow the in flight request
304 : // to finish without treating this as a health check error, and then
305 : // reconnect.
306 0 : reuse_connection_ = false;
307 0 : return;
308 0 : }
309 :
310 0 : if (request_in_flight_) {
311 : // Record this as a failed health check.
312 0 : handleFailure(envoy::data::core::v3::NETWORK);
313 0 : }
314 :
315 0 : if (client_) {
316 0 : expect_reset_ = true;
317 0 : client_->close(Network::ConnectionCloseType::Abort);
318 0 : }
319 0 : }
320 :
321 : HttpHealthCheckerImpl::HttpActiveHealthCheckSession::HealthCheckResult
322 18 : HttpHealthCheckerImpl::HttpActiveHealthCheckSession::healthCheckResult() {
323 18 : const uint64_t response_code = Http::Utility::getResponseStatus(*response_headers_);
324 18 : ENVOY_CONN_LOG(debug, "hc response_code={} health_flags={}", *client_, response_code,
325 18 : HostUtility::healthFlagsToString(*host_));
326 :
327 18 : if (!parent_.receive_bytes_.empty()) {
328 : // If the expected response is set, check the first 1024 bytes of actual response if contains
329 : // the expected response.
330 1 : if (!PayloadMatcher::match(parent_.receive_bytes_, *response_body_)) {
331 0 : if (response_headers_->EnvoyImmediateHealthCheckFail() != nullptr) {
332 0 : host_->healthFlagSet(Host::HealthFlag::EXCLUDED_VIA_IMMEDIATE_HC_FAIL);
333 0 : }
334 0 : return HealthCheckResult::Failed;
335 0 : }
336 1 : ENVOY_CONN_LOG(debug, "hc http response body healthcheck passed", *client_);
337 1 : }
338 :
339 18 : if (!parent_.http_status_checker_.inExpectedRanges(response_code)) {
340 : // If the HTTP response code would indicate failure AND the immediate health check
341 : // failure header is set, exclude the host from LB.
342 : // TODO(mattklein123): We could consider doing this check for any HTTP response code, but this
343 : // seems like the least surprising behavior and we could consider relaxing this in the future.
344 : // TODO(mattklein123): This will not force a host set rebuild of the host was already failed.
345 : // This is something we could do in the future but seems unnecessary right now.
346 1 : if (response_headers_->EnvoyImmediateHealthCheckFail() != nullptr) {
347 0 : host_->healthFlagSet(Host::HealthFlag::EXCLUDED_VIA_IMMEDIATE_HC_FAIL);
348 0 : }
349 :
350 1 : if (parent_.http_status_checker_.inRetriableRanges(response_code)) {
351 0 : return HealthCheckResult::Retriable;
352 1 : } else {
353 1 : return HealthCheckResult::Failed;
354 1 : }
355 1 : }
356 :
357 17 : const auto degraded = response_headers_->EnvoyDegraded() != nullptr;
358 :
359 17 : if (parent_.service_name_matcher_.has_value() &&
360 17 : parent_.runtime_.snapshot().featureEnabled("health_check.verify_cluster", 100UL)) {
361 3 : parent_.stats_.verify_cluster_.inc();
362 3 : std::string service_cluster_healthchecked =
363 3 : response_headers_->EnvoyUpstreamHealthCheckedCluster()
364 3 : ? std::string(response_headers_->getEnvoyUpstreamHealthCheckedClusterValue())
365 3 : : EMPTY_STRING;
366 3 : if (parent_.service_name_matcher_->match(service_cluster_healthchecked)) {
367 0 : return degraded ? HealthCheckResult::Degraded : HealthCheckResult::Succeeded;
368 3 : } else {
369 3 : return HealthCheckResult::Failed;
370 3 : }
371 3 : }
372 :
373 14 : return degraded ? HealthCheckResult::Degraded : HealthCheckResult::Succeeded;
374 17 : }
375 :
376 18 : void HttpHealthCheckerImpl::HttpActiveHealthCheckSession::onResponseComplete() {
377 18 : request_in_flight_ = false;
378 :
379 18 : switch (healthCheckResult()) {
380 13 : case HealthCheckResult::Succeeded:
381 13 : handleSuccess(false);
382 13 : break;
383 1 : case HealthCheckResult::Degraded:
384 1 : handleSuccess(true);
385 1 : break;
386 4 : case HealthCheckResult::Failed:
387 4 : handleFailure(envoy::data::core::v3::ACTIVE, /*retriable=*/false);
388 4 : break;
389 0 : case HealthCheckResult::Retriable:
390 0 : handleFailure(envoy::data::core::v3::ACTIVE, /*retriable=*/true);
391 0 : break;
392 18 : }
393 :
394 18 : if (shouldClose()) {
395 5 : client_->close(Network::ConnectionCloseType::Abort);
396 5 : }
397 :
398 18 : response_headers_.reset();
399 18 : response_body_->drain(response_body_->length());
400 18 : }
401 :
402 : // It is possible for this session to have been deferred destroyed inline in handleFailure()
403 : // above so make sure we still have a connection that we might need to close.
404 18 : bool HttpHealthCheckerImpl::HttpActiveHealthCheckSession::shouldClose() const {
405 18 : if (client_ == nullptr) {
406 0 : return false;
407 0 : }
408 :
409 18 : if (!reuse_connection_) {
410 2 : return true;
411 2 : }
412 :
413 16 : return Http::HeaderUtility::shouldCloseConnection(client_->protocol(), *response_headers_);
414 18 : }
415 :
416 4 : void HttpHealthCheckerImpl::HttpActiveHealthCheckSession::onTimeout() {
417 4 : request_in_flight_ = false;
418 4 : if (client_) {
419 4 : ENVOY_CONN_LOG(debug, "connection/stream timeout health_flags={}", *client_,
420 4 : HostUtility::healthFlagsToString(*host_));
421 :
422 : // If there is an active request it will get reset, so make sure we ignore the reset.
423 4 : expect_reset_ = true;
424 :
425 4 : client_->close(Network::ConnectionCloseType::Abort);
426 4 : }
427 4 : }
428 :
429 : Http::CodecType
430 22 : HttpHealthCheckerImpl::codecClientType(const envoy::type::v3::CodecClientType& type) {
431 22 : switch (type) {
432 0 : PANIC_ON_PROTO_ENUM_SENTINEL_VALUES;
433 0 : case envoy::type::v3::HTTP3:
434 0 : return Http::CodecType::HTTP3;
435 0 : case envoy::type::v3::HTTP2:
436 0 : return Http::CodecType::HTTP2;
437 22 : case envoy::type::v3::HTTP1:
438 22 : return Http::CodecType::HTTP1;
439 22 : }
440 0 : PANIC_DUE_TO_CORRUPT_ENUM
441 0 : }
442 :
443 : Http::CodecClient*
444 0 : ProdHttpHealthCheckerImpl::createCodecClient(Upstream::Host::CreateConnectionData& data) {
445 0 : return new Http::CodecClientProd(codec_client_type_, std::move(data.connection_),
446 0 : data.host_description_, dispatcher_, random_generator_,
447 0 : transportSocketOptions());
448 0 : }
449 :
450 : } // namespace Upstream
451 : } // namespace Envoy
|