/src/pdns/pdns/dnsdistdist/dnsdist.hh
Line | Count | Source |
1 | | /* |
2 | | * This file is part of PowerDNS or dnsdist. |
3 | | * Copyright -- PowerDNS.COM B.V. and its contributors |
4 | | * |
5 | | * This program is free software; you can redistribute it and/or modify |
6 | | * it under the terms of version 2 of the GNU General Public License as |
7 | | * published by the Free Software Foundation. |
8 | | * |
9 | | * In addition, for the avoidance of any doubt, permission is granted to |
10 | | * link this program with OpenSSL and to (re)distribute the binaries |
11 | | * produced as the result of such linking. |
12 | | * |
13 | | * This program is distributed in the hope that it will be useful, |
14 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
16 | | * GNU General Public License for more details. |
17 | | * |
18 | | * You should have received a copy of the GNU General Public License |
19 | | * along with this program; if not, write to the Free Software |
20 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
21 | | */ |
22 | | #pragma once |
23 | | |
24 | | #include "config.h" |
25 | | |
26 | | #include <atomic> |
27 | | #include <condition_variable> |
28 | | #include <memory> |
29 | | #include <mutex> |
30 | | #include <string> |
31 | | #include <thread> |
32 | | #include <time.h> |
33 | | #include <unistd.h> |
34 | | #include <unordered_map> |
35 | | |
36 | | #include "bpf-filter.hh" |
37 | | #include "circular_buffer.hh" |
38 | | #include "dnsdist-idstate.hh" |
39 | | #include "dnsdist-lbpolicies.hh" |
40 | | #include "dnsdist-protocols.hh" |
41 | | #include "dnsname.hh" |
42 | | #include "dnsdist-doh-common.hh" |
43 | | #include "doq.hh" |
44 | | #include "doh3.hh" |
45 | | #include "ednsoptions.hh" |
46 | | #include "iputils.hh" |
47 | | #include "misc.hh" |
48 | | #include "mplexer.hh" |
49 | | #include "noinitvector.hh" |
50 | | #include "uuid-utils.hh" |
51 | | #include "proxy-protocol.hh" |
52 | | #include "stat_t.hh" |
53 | | |
54 | | uint64_t uptimeOfProcess(const std::string& str); |
55 | | |
56 | | using QTag = std::unordered_map<string, string>; |
57 | | |
58 | | class IncomingTCPConnectionState; |
59 | | |
60 | | class TCPQuerySender; |
61 | | |
62 | | struct ClientState; |
63 | | |
64 | | struct DNSQuestion |
65 | | { |
66 | | DNSQuestion(InternalQueryState& ids_, PacketBuffer& data_); |
67 | | DNSQuestion(const DNSQuestion&) = delete; |
68 | | DNSQuestion& operator=(const DNSQuestion&) = delete; |
69 | | DNSQuestion(DNSQuestion&&) = default; |
70 | 0 | virtual ~DNSQuestion() = default; |
71 | | |
72 | | std::string getTrailingData() const; |
73 | | bool setTrailingData(const std::string&); |
74 | | const PacketBuffer& getData() const |
75 | 0 | { |
76 | 0 | return data; |
77 | 0 | } |
78 | | PacketBuffer& getMutableData() |
79 | 0 | { |
80 | 0 | return data; |
81 | 0 | } |
82 | | |
83 | | bool editHeader(const std::function<bool(dnsheader&)>& editFunction); |
84 | | |
85 | | const dnsheader_aligned getHeader() const |
86 | 0 | { |
87 | 0 | if (data.size() < sizeof(dnsheader)) { |
88 | 0 | throw std::runtime_error("Trying to access the dnsheader of a too small (" + std::to_string(data.size()) + ") DNSQuestion buffer"); |
89 | 0 | } |
90 | 0 | dnsheader_aligned dh(data.data()); |
91 | 0 | return dh; |
92 | 0 | } |
93 | | |
94 | | /* this function is not safe against unaligned access, you should |
95 | | use editHeader() instead, but we need it for the Lua bindings */ |
96 | | dnsheader* getMutableHeader() const |
97 | 0 | { |
98 | 0 | if (data.size() < sizeof(dnsheader)) { |
99 | 0 | throw std::runtime_error("Trying to access the dnsheader of a too small (" + std::to_string(data.size()) + ") DNSQuestion buffer"); |
100 | 0 | } |
101 | 0 | // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) |
102 | 0 | return reinterpret_cast<dnsheader*>(data.data()); |
103 | 0 | } |
104 | | |
105 | | bool hasRoomFor(size_t more) const |
106 | 0 | { |
107 | 0 | return data.size() <= getMaximumSize() && (getMaximumSize() - data.size()) >= more; |
108 | 0 | } |
109 | | |
110 | | size_t getMaximumSize() const |
111 | 0 | { |
112 | 0 | if (overTCP()) { |
113 | 0 | return std::numeric_limits<uint16_t>::max(); |
114 | 0 | } |
115 | 0 | return 4096; |
116 | 0 | } |
117 | | |
118 | | dnsdist::Protocol getProtocol() const |
119 | 0 | { |
120 | 0 | return ids.protocol; |
121 | 0 | } |
122 | | |
123 | | bool overTCP() const |
124 | 0 | { |
125 | 0 | return !(ids.protocol == dnsdist::Protocol::DoUDP || ids.protocol == dnsdist::Protocol::DNSCryptUDP); |
126 | 0 | } |
127 | | |
128 | | void setTag(std::string&& key, std::string&& value) |
129 | 0 | { |
130 | 0 | if (!ids.qTag) { |
131 | 0 | ids.qTag = std::make_unique<QTag>(); |
132 | 0 | } |
133 | 0 | ids.qTag->insert_or_assign(std::move(key), std::move(value)); |
134 | 0 | } |
135 | | |
136 | | void setTag(const std::string& key, const std::string& value) |
137 | 0 | { |
138 | 0 | if (!ids.qTag) { |
139 | 0 | ids.qTag = std::make_unique<QTag>(); |
140 | 0 | } |
141 | 0 | ids.qTag->insert_or_assign(key, value); |
142 | 0 | } |
143 | | |
144 | | void setTag(const std::string& key, std::string&& value) |
145 | 0 | { |
146 | 0 | if (!ids.qTag) { |
147 | 0 | ids.qTag = std::make_unique<QTag>(); |
148 | 0 | } |
149 | 0 | ids.qTag->insert_or_assign(key, std::move(value)); |
150 | 0 | } |
151 | | |
152 | | const struct timespec& getQueryRealTime() const |
153 | 0 | { |
154 | 0 | return ids.queryRealTime.d_start; |
155 | 0 | } |
156 | | |
157 | | bool isAsynchronous() const |
158 | 0 | { |
159 | 0 | return asynchronous; |
160 | 0 | } |
161 | | |
162 | | std::shared_ptr<IncomingTCPConnectionState> getIncomingTCPState() const |
163 | 0 | { |
164 | 0 | return d_incomingTCPState; |
165 | 0 | } |
166 | | |
167 | | ClientState* getFrontend() const |
168 | 0 | { |
169 | 0 | return ids.cs; |
170 | 0 | } |
171 | | |
172 | | protected: |
173 | | PacketBuffer& data; |
174 | | |
175 | | public: |
176 | | InternalQueryState& ids; |
177 | | std::unique_ptr<Netmask> ecs{nullptr}; |
178 | | std::string sni; /* Server Name Indication, if any (DoT or DoH) */ |
179 | | mutable std::unique_ptr<EDNSOptionViewMap> ednsOptions; /* this needs to be mutable because it is parsed just in time, when DNSQuestion is read-only */ |
180 | | std::shared_ptr<IncomingTCPConnectionState> d_incomingTCPState{nullptr}; |
181 | | std::unique_ptr<std::vector<ProxyProtocolValue>> proxyProtocolValues{nullptr}; |
182 | | uint16_t ecsPrefixLength; |
183 | | uint8_t ednsRCode{0}; |
184 | | bool ecsOverride; |
185 | | bool useECS{true}; |
186 | | bool asynchronous{false}; |
187 | | bool d_selfGeneratedHandledEDNS{false}; |
188 | | }; |
189 | | |
190 | | struct DownstreamState; |
191 | | |
192 | | struct DNSResponse : DNSQuestion |
193 | | { |
194 | | DNSResponse(InternalQueryState& ids_, PacketBuffer& data_, const std::shared_ptr<DownstreamState>& downstream) : |
195 | 0 | DNSQuestion(ids_, data_), d_downstream(downstream) {} |
196 | | DNSResponse(const DNSResponse&) = delete; |
197 | | DNSResponse& operator=(const DNSResponse&) = delete; |
198 | | DNSResponse(DNSResponse&&) = default; |
199 | | |
200 | | const std::shared_ptr<DownstreamState>& d_downstream; |
201 | | }; |
202 | | |
203 | | using pdns::stat_t; |
204 | | |
205 | | class BasicQPSLimiter |
206 | | { |
207 | | public: |
208 | | BasicQPSLimiter(unsigned int burst) : |
209 | | d_tokens(burst) |
210 | 0 | { |
211 | 0 | d_prev.start(); |
212 | 0 | } |
213 | | |
214 | | virtual ~BasicQPSLimiter() |
215 | 0 | { |
216 | 0 | } |
217 | | |
218 | | bool check(unsigned int rate, unsigned int burst) const // this is not quite fair |
219 | 0 | { |
220 | 0 | if (checkOnly(rate, burst)) { |
221 | 0 | addHit(); |
222 | 0 | return true; |
223 | 0 | } |
224 | 0 |
|
225 | 0 | return false; |
226 | 0 | } |
227 | | |
228 | | bool checkOnly(unsigned int rate, unsigned int burst) const // this is not quite fair |
229 | 0 | { |
230 | 0 | auto delta = d_prev.udiffAndSet(); |
231 | 0 |
|
232 | 0 | if (delta > 0.0) { // time, frequently, does go backwards.. |
233 | 0 | d_tokens += 1.0 * rate * (delta / 1000000.0); |
234 | 0 | } |
235 | 0 |
|
236 | 0 | if (d_tokens > burst) { |
237 | 0 | d_tokens = burst; |
238 | 0 | } |
239 | 0 |
|
240 | 0 | bool ret = false; |
241 | 0 | if (d_tokens >= 1.0) { // we need this because burst=1 is weird otherwise |
242 | 0 | ret = true; |
243 | 0 | } |
244 | 0 |
|
245 | 0 | return ret; |
246 | 0 | } |
247 | | |
248 | | virtual void addHit() const |
249 | 0 | { |
250 | 0 | --d_tokens; |
251 | 0 | } |
252 | | |
253 | | bool seenSince(const struct timespec& cutOff) const |
254 | 0 | { |
255 | 0 | return cutOff < d_prev.d_start; |
256 | 0 | } |
257 | | |
258 | | protected: |
259 | | mutable StopWatch d_prev; |
260 | | mutable double d_tokens{0.0}; |
261 | | }; |
262 | | |
263 | | class QPSLimiter : public BasicQPSLimiter |
264 | | { |
265 | | public: |
266 | | QPSLimiter(unsigned int rate, unsigned int burst) : |
267 | | BasicQPSLimiter(burst), d_rate(rate), d_burst(burst), d_passthrough(false) |
268 | 0 | { |
269 | 0 | d_prev.start(); |
270 | 0 | } |
271 | | |
272 | | unsigned int getRate() const |
273 | 0 | { |
274 | 0 | return d_passthrough ? 0 : d_rate; |
275 | 0 | } |
276 | | |
277 | | bool check() const // this is not quite fair |
278 | 0 | { |
279 | 0 | if (d_passthrough) { |
280 | 0 | return true; |
281 | 0 | } |
282 | 0 |
|
283 | 0 | return BasicQPSLimiter::check(d_rate, d_burst); |
284 | 0 | } |
285 | | |
286 | | bool checkOnly() const |
287 | 0 | { |
288 | 0 | if (d_passthrough) { |
289 | 0 | return true; |
290 | 0 | } |
291 | 0 |
|
292 | 0 | return BasicQPSLimiter::checkOnly(d_rate, d_burst); |
293 | 0 | } |
294 | | |
295 | | void addHit() const override |
296 | 0 | { |
297 | 0 | if (!d_passthrough) { |
298 | 0 | --d_tokens; |
299 | 0 | } |
300 | 0 | } |
301 | | |
302 | | private: |
303 | | unsigned int d_rate{0}; |
304 | | unsigned int d_burst{0}; |
305 | | bool d_passthrough{true}; |
306 | | }; |
307 | | |
308 | | class XskPacket; |
309 | | class XskSocket; |
310 | | class XskWorker; |
311 | | |
312 | | class DNSCryptContext; |
313 | | |
314 | | struct ClientState |
315 | | { |
316 | | ClientState(const ComboAddress& local_, bool isTCP_, bool doReusePort, int fastOpenQueue, const std::string& itfName, const std::set<int>& cpus_, bool enableProxyProtocol) : |
317 | | cpus(cpus_), interface(itfName), local(local_), fastOpenQueueSize(fastOpenQueue), tcp(isTCP_), reuseport(doReusePort), d_enableProxyProtocol(enableProxyProtocol) |
318 | 0 | { |
319 | 0 | } |
320 | | |
321 | | stat_t queries{0}; |
322 | | stat_t nonCompliantQueries{0}; |
323 | | mutable stat_t responses{0}; |
324 | | mutable stat_t tcpDiedReadingQuery{0}; |
325 | | mutable stat_t tcpDiedSendingResponse{0}; |
326 | | mutable stat_t tcpGaveUp{0}; |
327 | | mutable stat_t tcpClientTimeouts{0}; |
328 | | mutable stat_t tcpDownstreamTimeouts{0}; |
329 | | /* current number of connections to this frontend */ |
330 | | mutable stat_t tcpCurrentConnections{0}; |
331 | | /* maximum number of concurrent connections to this frontend reached */ |
332 | | mutable stat_t tcpMaxConcurrentConnections{0}; |
333 | | stat_t tlsNewSessions{0}; // A new TLS session has been negotiated, no resumption |
334 | | stat_t tlsResumptions{0}; // A TLS session has been resumed, either via session id or via a TLS ticket |
335 | | stat_t tlsUnknownTicketKey{0}; // A TLS ticket has been presented but we don't have the associated key (might have expired) |
336 | | stat_t tlsInactiveTicketKey{0}; // A TLS ticket has been successfully resumed but the key is no longer active, we should issue a new one |
337 | | stat_t tls10queries{0}; // valid DNS queries received via TLSv1.0 |
338 | | stat_t tls11queries{0}; // valid DNS queries received via TLSv1.1 |
339 | | stat_t tls12queries{0}; // valid DNS queries received via TLSv1.2 |
340 | | stat_t tls13queries{0}; // valid DNS queries received via TLSv1.3 |
341 | | stat_t tlsUnknownqueries{0}; // valid DNS queries received via unknown TLS version |
342 | | pdns::stat_double_t tcpAvgIOsPerConnection{0.0}; |
343 | | pdns::stat_double_t tcpAvgQueriesPerConnection{0.0}; |
344 | | /* in ms */ |
345 | | pdns::stat_double_t tcpAvgConnectionDuration{0.0}; |
346 | | std::set<int> cpus; |
347 | | std::string interface; |
348 | | ComboAddress local; |
349 | | std::vector<std::pair<ComboAddress, int>> d_additionalAddresses; |
350 | | std::shared_ptr<DNSCryptContext> dnscryptCtx{nullptr}; |
351 | | std::shared_ptr<TLSFrontend> tlsFrontend{nullptr}; |
352 | | std::shared_ptr<DOHFrontend> dohFrontend{nullptr}; |
353 | | std::shared_ptr<DOQFrontend> doqFrontend{nullptr}; |
354 | | std::shared_ptr<DOH3Frontend> doh3Frontend{nullptr}; |
355 | | std::shared_ptr<BPFFilter> d_filter{nullptr}; |
356 | | std::shared_ptr<XskWorker> xskInfo{nullptr}; |
357 | | std::shared_ptr<XskWorker> xskInfoResponder{nullptr}; |
358 | | size_t d_maxInFlightQueriesPerConn{1}; |
359 | | size_t d_tcpConcurrentConnectionsLimit{0}; |
360 | | int udpFD{-1}; |
361 | | int tcpFD{-1}; |
362 | | int tcpListenQueueSize{SOMAXCONN}; |
363 | | int fastOpenQueueSize{0}; |
364 | | bool muted{false}; |
365 | | bool tcp; |
366 | | bool reuseport; |
367 | | bool d_enableProxyProtocol{true}; // the global proxy protocol ACL still applies |
368 | | bool ready{false}; |
369 | | |
370 | | int getSocket() const |
371 | 0 | { |
372 | 0 | return udpFD != -1 ? udpFD : tcpFD; |
373 | 0 | } |
374 | | |
375 | | bool isUDP() const |
376 | 0 | { |
377 | 0 | return udpFD != -1; |
378 | 0 | } |
379 | | |
380 | | bool isTCP() const |
381 | 0 | { |
382 | 0 | return udpFD == -1; |
383 | 0 | } |
384 | | |
385 | | bool isDoH() const |
386 | 0 | { |
387 | 0 | return dohFrontend != nullptr; |
388 | 0 | } |
389 | | |
390 | | bool hasTLS() const |
391 | 0 | { |
392 | 0 | return tlsFrontend != nullptr || (dohFrontend != nullptr && dohFrontend->isHTTPS()); |
393 | 0 | } |
394 | | |
395 | | const std::shared_ptr<const TLSFrontend> getTLSFrontend() const |
396 | 0 | { |
397 | 0 | if (tlsFrontend != nullptr) { |
398 | 0 | return tlsFrontend; |
399 | 0 | } |
400 | 0 | if (dohFrontend) { |
401 | 0 | return dohFrontend->d_tlsContext; |
402 | 0 | } |
403 | 0 | throw std::runtime_error("Trying to get a TLS frontend from a non-TLS ClientState"); |
404 | 0 | } |
405 | | |
406 | | dnsdist::Protocol getProtocol() const |
407 | 0 | { |
408 | 0 | if (dnscryptCtx) { |
409 | 0 | if (udpFD != -1) { |
410 | 0 | return dnsdist::Protocol::DNSCryptUDP; |
411 | 0 | } |
412 | 0 | return dnsdist::Protocol::DNSCryptTCP; |
413 | 0 | } |
414 | 0 | if (isDoH()) { |
415 | 0 | return dnsdist::Protocol::DoH; |
416 | 0 | } |
417 | 0 | else if (hasTLS()) { |
418 | 0 | return dnsdist::Protocol::DoT; |
419 | 0 | } |
420 | 0 | else if (doqFrontend != nullptr) { |
421 | 0 | return dnsdist::Protocol::DoQ; |
422 | 0 | } |
423 | 0 | else if (doh3Frontend != nullptr) { |
424 | 0 | return dnsdist::Protocol::DoH3; |
425 | 0 | } |
426 | 0 | else if (udpFD != -1) { |
427 | 0 | return dnsdist::Protocol::DoUDP; |
428 | 0 | } |
429 | 0 | else { |
430 | 0 | return dnsdist::Protocol::DoTCP; |
431 | 0 | } |
432 | 0 | } |
433 | | |
434 | | std::string getType() const |
435 | 0 | { |
436 | 0 | std::string result = udpFD != -1 ? "UDP" : "TCP"; |
437 | 0 |
|
438 | 0 | if (doqFrontend) { |
439 | 0 | result += " (DNS over QUIC)"; |
440 | 0 | } |
441 | 0 | else if (doh3Frontend) { |
442 | 0 | result += " (DNS over HTTP/3)"; |
443 | 0 | } |
444 | 0 | else if (dohFrontend) { |
445 | 0 | if (dohFrontend->isHTTPS()) { |
446 | 0 | result += " (DNS over HTTPS)"; |
447 | 0 | } |
448 | 0 | else { |
449 | 0 | result += " (DNS over HTTP)"; |
450 | 0 | } |
451 | 0 | } |
452 | 0 | else if (tlsFrontend) { |
453 | 0 | result += " (DNS over TLS)"; |
454 | 0 | } |
455 | 0 | else if (dnscryptCtx) { |
456 | 0 | result += " (DNSCrypt)"; |
457 | 0 | } |
458 | 0 |
|
459 | 0 | return result; |
460 | 0 | } |
461 | | |
462 | | void detachFilter(int socket) |
463 | 0 | { |
464 | 0 | if (d_filter) { |
465 | 0 | d_filter->removeSocket(socket); |
466 | 0 | d_filter = nullptr; |
467 | 0 | } |
468 | 0 | } |
469 | | |
470 | | void attachFilter(shared_ptr<BPFFilter>& bpf, int socket) |
471 | 0 | { |
472 | 0 | detachFilter(socket); |
473 | 0 |
|
474 | 0 | bpf->addSocket(socket); |
475 | 0 | d_filter = bpf; |
476 | 0 | } |
477 | | |
478 | | void detachFilter() |
479 | 0 | { |
480 | 0 | if (d_filter) { |
481 | 0 | detachFilter(getSocket()); |
482 | 0 | for (const auto& [addr, socket] : d_additionalAddresses) { |
483 | 0 | (void)addr; |
484 | 0 | if (socket != -1) { |
485 | 0 | detachFilter(socket); |
486 | 0 | } |
487 | 0 | } |
488 | 0 |
|
489 | 0 | d_filter = nullptr; |
490 | 0 | } |
491 | 0 | } |
492 | | |
493 | | void attachFilter(shared_ptr<BPFFilter>& bpf) |
494 | 0 | { |
495 | 0 | detachFilter(); |
496 | 0 |
|
497 | 0 | bpf->addSocket(getSocket()); |
498 | 0 | for (const auto& [addr, socket] : d_additionalAddresses) { |
499 | 0 | (void)addr; |
500 | 0 | if (socket != -1) { |
501 | 0 | bpf->addSocket(socket); |
502 | 0 | } |
503 | 0 | } |
504 | 0 | d_filter = bpf; |
505 | 0 | } |
506 | | |
507 | | void updateTCPMetrics(size_t nbQueries, uint64_t durationMs, size_t nbIOs) |
508 | 0 | { |
509 | 0 | tcpAvgQueriesPerConnection = (99.0 * tcpAvgQueriesPerConnection / 100.0) + (nbQueries / 100.0); |
510 | 0 | tcpAvgConnectionDuration = (99.0 * tcpAvgConnectionDuration / 100.0) + (durationMs / 100.0); |
511 | 0 | tcpAvgIOsPerConnection = (99.0 * tcpAvgIOsPerConnection / 100.0) + (nbIOs / 100.0); |
512 | 0 | } |
513 | | }; |
514 | | |
515 | | struct CrossProtocolQuery; |
516 | | class FDMultiplexer; |
517 | | |
518 | | struct DownstreamState : public std::enable_shared_from_this<DownstreamState> |
519 | | { |
520 | | DownstreamState(const DownstreamState&) = delete; |
521 | | DownstreamState(DownstreamState&&) = delete; |
522 | | DownstreamState& operator=(const DownstreamState&) = delete; |
523 | | DownstreamState& operator=(DownstreamState&&) = delete; |
524 | | |
525 | | typedef std::function<std::tuple<DNSName, uint16_t, uint16_t>(const DNSName&, uint16_t, uint16_t, dnsheader*)> checkfunc_t; |
526 | | enum class Availability : uint8_t |
527 | | { |
528 | | Up, |
529 | | Down, |
530 | | Auto |
531 | | }; |
532 | | enum class HealthCheckMode : uint8_t |
533 | | { |
534 | | Active, |
535 | | Lazy |
536 | | }; |
537 | | enum class LazyHealthCheckMode : uint8_t |
538 | | { |
539 | | TimeoutOnly, |
540 | | TimeoutOrServFail |
541 | | }; |
542 | | |
543 | | struct BaseConfig |
544 | | { |
545 | | BaseConfig() |
546 | 0 | { |
547 | 0 | } |
548 | | BaseConfig(const ComboAddress& remote_) : |
549 | | remote(remote_) |
550 | 0 | { |
551 | 0 | } |
552 | | |
553 | | TLSContextParameters d_tlsParams; |
554 | | set<string> pools; |
555 | | std::set<int> d_cpus; |
556 | | checkfunc_t checkFunction; |
557 | | std::optional<boost::uuids::uuid> id; |
558 | | DNSName checkName{"a.root-servers.net."}; |
559 | | ComboAddress remote; |
560 | | ComboAddress sourceAddr; |
561 | | std::string sourceItfName; |
562 | | std::string d_tlsSubjectName; |
563 | | std::string d_dohPath; |
564 | | std::string name; |
565 | | std::string nameWithAddr; |
566 | | #ifdef HAVE_XSK |
567 | | std::array<uint8_t, 6> sourceMACAddr; |
568 | | std::array<uint8_t, 6> destMACAddr; |
569 | | #endif /* HAVE_XSK */ |
570 | | size_t d_numberOfSockets{1}; |
571 | | size_t d_maxInFlightQueriesPerConn{1}; |
572 | | size_t d_tcpConcurrentConnectionsLimit{0}; |
573 | | int order{1}; |
574 | | int d_weight{1}; |
575 | | int tcpConnectTimeout{5}; |
576 | | int tcpRecvTimeout{30}; |
577 | | int tcpSendTimeout{30}; |
578 | | int d_qpsLimit{0}; |
579 | | unsigned int sourceItf{0}; |
580 | | QType checkType{QType::A}; |
581 | | uint16_t checkClass{QClass::IN}; |
582 | | uint16_t d_retries{5}; |
583 | | uint16_t d_lazyHealthCheckSampleSize{100}; |
584 | | uint16_t d_lazyHealthCheckMinSampleCount{1}; |
585 | | uint16_t d_lazyHealthCheckFailedInterval{30}; |
586 | | uint16_t d_lazyHealthCheckMaxBackOff{3600}; |
587 | | uint8_t d_lazyHealthCheckThreshold{20}; |
588 | | LazyHealthCheckMode d_lazyHealthCheckMode{LazyHealthCheckMode::TimeoutOrServFail}; |
589 | | uint8_t udpTimeout{0}; |
590 | | uint8_t dscp{0}; |
591 | | Availability d_availability{Availability::Auto}; |
592 | | HealthCheckMode d_healthCheckMode{HealthCheckMode::Active}; |
593 | | bool d_tlsSubjectIsAddr{false}; |
594 | | bool mustResolve{false}; |
595 | | bool useECS{false}; |
596 | | bool useProxyProtocol{false}; |
597 | | bool d_proxyProtocolAdvertiseTLS{false}; |
598 | | bool setCD{false}; |
599 | | bool disableZeroScope{false}; |
600 | | bool tcpFastOpen{false}; |
601 | | bool ipBindAddrNoPort{true}; |
602 | | bool reconnectOnUp{false}; |
603 | | bool d_tcpCheck{false}; |
604 | | bool d_tcpOnly{false}; |
605 | | bool d_addXForwardedHeaders{false}; // for DoH backends |
606 | | bool d_lazyHealthCheckUseExponentialBackOff{false}; |
607 | | bool d_upgradeToLazyHealthChecks{false}; |
608 | | }; |
609 | | |
610 | | struct Config : public BaseConfig |
611 | | { |
612 | | Config() : |
613 | | BaseConfig() |
614 | 0 | { |
615 | 0 | } |
616 | | Config(const ComboAddress& remote_) : |
617 | | BaseConfig(remote_) |
618 | 0 | { |
619 | 0 | } |
620 | | Config(const Config& c) : |
621 | | BaseConfig(c) |
622 | 0 | { |
623 | 0 | checkInterval.store(c.checkInterval.load()); |
624 | 0 | checkTimeout.store(c.checkTimeout.load()); |
625 | 0 | maxCheckFailures.store(c.maxCheckFailures.load()); |
626 | 0 | minRiseSuccesses.store(c.minRiseSuccesses.load()); |
627 | 0 | } |
628 | | |
629 | | std::atomic<unsigned int> checkInterval{1}; |
630 | | std::atomic<uint16_t> checkTimeout{1000}; /* in milliseconds */ |
631 | | std::atomic<uint8_t> maxCheckFailures{1}; |
632 | | std::atomic<uint8_t> minRiseSuccesses{1}; |
633 | | }; |
634 | | |
635 | | struct HealthCheckMetrics |
636 | | { |
637 | | stat_t d_failures{0}; |
638 | | stat_t d_timeOuts{0}; |
639 | | stat_t d_parseErrors{0}; |
640 | | stat_t d_networkErrors{0}; |
641 | | stat_t d_mismatchErrors{0}; |
642 | | stat_t d_invalidResponseErrors{0}; |
643 | | }; |
644 | | |
645 | | DownstreamState(DownstreamState::Config&& config, std::shared_ptr<TLSCtx> tlsCtx, bool connect); |
646 | | DownstreamState(const ComboAddress& remote) : |
647 | | DownstreamState(DownstreamState::Config(remote), nullptr, false) |
648 | 0 | { |
649 | 0 | } |
650 | | |
651 | | ~DownstreamState(); |
652 | | |
653 | | Config d_config; |
654 | | HealthCheckMetrics d_healthCheckMetrics; |
655 | | stat_t sendErrors{0}; |
656 | | stat_t outstanding{0}; |
657 | | stat_t reuseds{0}; |
658 | | stat_t queries{0}; |
659 | | stat_t responses{0}; |
660 | | stat_t nonCompliantResponses{0}; |
661 | | struct |
662 | | { |
663 | | stat_t sendErrors{0}; |
664 | | stat_t reuseds{0}; |
665 | | stat_t queries{0}; |
666 | | } prev; |
667 | | stat_t tcpDiedSendingQuery{0}; |
668 | | stat_t tcpDiedReadingResponse{0}; |
669 | | stat_t tcpGaveUp{0}; |
670 | | stat_t tcpReadTimeouts{0}; |
671 | | stat_t tcpWriteTimeouts{0}; |
672 | | stat_t tcpConnectTimeouts{0}; |
673 | | /* current number of connections to this backend */ |
674 | | stat_t tcpCurrentConnections{0}; |
675 | | /* maximum number of concurrent connections to this backend reached */ |
676 | | stat_t tcpMaxConcurrentConnections{0}; |
677 | | /* number of times we had to enforce the maximum concurrent connections limit */ |
678 | | stat_t tcpTooManyConcurrentConnections{0}; |
679 | | stat_t tcpReusedConnections{0}; |
680 | | stat_t tcpNewConnections{0}; |
681 | | stat_t tlsResumptions{0}; |
682 | | pdns::stat_double_t tcpAvgQueriesPerConnection{0.0}; |
683 | | /* in ms */ |
684 | | pdns::stat_double_t tcpAvgConnectionDuration{0.0}; |
685 | | pdns::stat_double_t queryLoad{0.0}; |
686 | | pdns::stat_double_t dropRate{0.0}; |
687 | | |
688 | | SharedLockGuarded<std::vector<unsigned int>> hashes; |
689 | | LockGuarded<std::unique_ptr<FDMultiplexer>> mplexer{nullptr}; |
690 | | |
691 | | private: |
692 | | LockGuarded<std::map<uint16_t, IDState>> d_idStatesMap; |
693 | | vector<IDState> idStates; |
694 | | |
695 | | struct LazyHealthCheckStats |
696 | | { |
697 | | boost::circular_buffer<bool> d_lastResults; |
698 | | time_t d_nextCheck{0}; |
699 | | enum class LazyStatus : uint8_t |
700 | | { |
701 | | Healthy = 0, |
702 | | PotentialFailure, |
703 | | Failed |
704 | | }; |
705 | | LazyStatus d_status{LazyStatus::Healthy}; |
706 | | }; |
707 | | LockGuarded<LazyHealthCheckStats> d_lazyHealthCheckStats; |
708 | | |
709 | | public: |
710 | | std::shared_ptr<TLSCtx> d_tlsCtx{nullptr}; |
711 | | std::vector<int> sockets; |
712 | | StopWatch sw; |
713 | | std::optional<QPSLimiter> d_qpsLimiter; |
714 | | #ifdef HAVE_XSK |
715 | | std::vector<std::shared_ptr<XskWorker>> d_xskInfos; |
716 | | std::vector<std::shared_ptr<XskSocket>> d_xskSockets; |
717 | | #endif |
718 | | std::atomic<uint64_t> idOffset{0}; |
719 | | std::atomic<double> d_healthCheckLatency{0.0}; |
720 | | size_t socketsOffset{0}; |
721 | | double latencyUsec{0.0}; |
722 | | double latencyUsecTCP{0.0}; |
723 | | unsigned int d_nextCheck{0}; |
724 | | uint16_t currentCheckFailures{0}; |
725 | | std::atomic<bool> hashesComputed{false}; |
726 | | std::atomic<bool> connected{false}; |
727 | | std::atomic<bool> upStatus{false}; |
728 | | |
729 | | private: |
730 | | void handleUDPTimeout(IDState& ids); |
731 | | void updateNextLazyHealthCheck(LazyHealthCheckStats& stats, bool checkScheduled, std::optional<time_t> currentTime = std::nullopt); |
732 | | void connectUDPSockets(); |
733 | | #ifdef HAVE_XSK |
734 | | void addXSKDestination(int fd); |
735 | | void removeXSKDestination(int fd); |
736 | | #endif /* HAVE_XSK */ |
737 | | |
738 | | std::mutex connectLock; |
739 | | std::condition_variable d_connectedWait; |
740 | | #ifdef HAVE_XSK |
741 | | SharedLockGuarded<std::vector<ComboAddress>> d_socketSourceAddresses; |
742 | | #endif |
743 | | std::atomic_flag threadStarted; |
744 | | uint8_t consecutiveSuccessfulChecks{0}; |
745 | | bool d_stopped{false}; |
746 | | |
747 | | public: |
748 | | static bool parseSourceParameter(const std::string& source, Config& config); |
749 | | static bool parseAvailabilityConfigFromStr(DownstreamState::Config& config, const std::string& str); |
750 | | |
751 | | void updateStatisticsInfo() |
752 | 0 | { |
753 | 0 | auto delta = sw.udiffAndSet() / 1000000.0; |
754 | 0 | queryLoad.store(1.0 * (queries.load() - prev.queries.load()) / delta); |
755 | 0 | dropRate.store(1.0 * (reuseds.load() - prev.reuseds.load()) / delta); |
756 | 0 | prev.queries.store(queries.load()); |
757 | 0 | prev.reuseds.store(reuseds.load()); |
758 | 0 | } |
759 | | void start(); |
760 | | |
761 | | bool isUp() const |
762 | 0 | { |
763 | 0 | if (d_config.d_availability == Availability::Down) { |
764 | 0 | return false; |
765 | 0 | } |
766 | 0 | else if (d_config.d_availability == Availability::Up) { |
767 | 0 | return true; |
768 | 0 | } |
769 | 0 | return upStatus.load(std::memory_order_relaxed); |
770 | 0 | } |
771 | | |
772 | | void setUp() |
773 | 0 | { |
774 | 0 | d_config.d_availability = Availability::Up; |
775 | 0 | } |
776 | | |
777 | | void setUpStatus(bool newStatus) |
778 | 0 | { |
779 | 0 | upStatus.store(newStatus); |
780 | 0 | if (!newStatus) { |
781 | 0 | latencyUsec = 0.0; |
782 | 0 | latencyUsecTCP = 0.0; |
783 | 0 | } |
784 | 0 | } |
785 | | void setDown() |
786 | 0 | { |
787 | 0 | d_config.d_availability = Availability::Down; |
788 | 0 | latencyUsec = 0.0; |
789 | 0 | latencyUsecTCP = 0.0; |
790 | 0 | } |
791 | | void setAuto() |
792 | 0 | { |
793 | 0 | d_config.d_availability = Availability::Auto; |
794 | 0 | } |
795 | | void setActiveAuto() |
796 | 0 | { |
797 | 0 | d_config.d_availability = Availability::Auto; |
798 | 0 | d_config.d_healthCheckMode = HealthCheckMode::Active; |
799 | 0 | } |
800 | | void setLazyAuto() |
801 | 0 | { |
802 | 0 | d_config.d_availability = Availability::Auto; |
803 | 0 | d_config.d_healthCheckMode = HealthCheckMode::Lazy; |
804 | 0 | d_lazyHealthCheckStats.lock()->d_lastResults.set_capacity(d_config.d_lazyHealthCheckSampleSize); |
805 | 0 | } |
806 | | bool healthCheckRequired(std::optional<time_t> currentTime = std::nullopt); |
807 | | |
808 | | const string& getName() const |
809 | 0 | { |
810 | 0 | return d_config.name; |
811 | 0 | } |
812 | | const string& getNameWithAddr() const |
813 | 0 | { |
814 | 0 | return d_config.nameWithAddr; |
815 | 0 | } |
816 | | void setName(const std::string& newName) |
817 | 0 | { |
818 | 0 | d_config.name = newName; |
819 | 0 | d_config.nameWithAddr = newName.empty() ? d_config.remote.toStringWithPort() : (d_config.name + " (" + d_config.remote.toStringWithPort() + ")"); |
820 | 0 | } |
821 | | |
822 | | string getStatus() const |
823 | 0 | { |
824 | 0 | string status; |
825 | 0 | if (d_config.d_availability == DownstreamState::Availability::Up) { |
826 | 0 | status = "UP"; |
827 | 0 | } |
828 | 0 | else if (d_config.d_availability == DownstreamState::Availability::Down) { |
829 | 0 | status = "DOWN"; |
830 | 0 | } |
831 | 0 | else { |
832 | 0 | status = (upStatus.load(std::memory_order_relaxed) ? "up" : "down"); |
833 | 0 | } |
834 | 0 | return status; |
835 | 0 | } |
836 | | |
837 | | bool reconnect(bool initialAttempt = false); |
838 | | void waitUntilConnected(); |
839 | | void hash(); |
840 | | void setId(const boost::uuids::uuid& newId); |
841 | | void setWeight(int newWeight); |
842 | | void stop(); |
843 | | bool isStopped() const |
844 | 0 | { |
845 | 0 | return d_stopped; |
846 | 0 | } |
847 | | const boost::uuids::uuid& getID() const |
848 | 0 | { |
849 | 0 | return *d_config.id; |
850 | 0 | } |
851 | | |
852 | | void updateTCPMetrics(size_t nbQueries, uint64_t durationMs) |
853 | 0 | { |
854 | 0 | tcpAvgQueriesPerConnection = (99.0 * tcpAvgQueriesPerConnection / 100.0) + (nbQueries / 100.0); |
855 | 0 | tcpAvgConnectionDuration = (99.0 * tcpAvgConnectionDuration / 100.0) + (durationMs / 100.0); |
856 | 0 | } |
857 | | |
858 | | void updateTCPLatency(double udiff) |
859 | 0 | { |
860 | 0 | latencyUsecTCP = (127.0 * latencyUsecTCP / 128.0) + udiff / 128.0; |
861 | 0 | } |
862 | | |
863 | | void incQueriesCount() |
864 | 0 | { |
865 | 0 | ++queries; |
866 | 0 | if (d_qpsLimiter) { |
867 | 0 | d_qpsLimiter->addHit(); |
868 | 0 | } |
869 | 0 | } |
870 | | |
871 | | void incCurrentConnectionsCount(); |
872 | | |
873 | | bool doHealthcheckOverTCP() const |
874 | 0 | { |
875 | 0 | return d_config.d_tcpOnly || d_config.d_tcpCheck || d_tlsCtx != nullptr; |
876 | 0 | } |
877 | | |
878 | | [[nodiscard]] bool isTCPOnly() const |
879 | 0 | { |
880 | 0 | return d_config.d_tcpOnly || d_tlsCtx != nullptr; |
881 | 0 | } |
882 | | |
883 | | bool isDoH() const |
884 | 0 | { |
885 | 0 | return !d_config.d_dohPath.empty(); |
886 | 0 | } |
887 | | |
888 | | bool passCrossProtocolQuery(std::unique_ptr<CrossProtocolQuery>&& cpq); |
889 | | int pickSocketForSending(); |
890 | | void pickSocketsReadyForReceiving(std::vector<int>& ready); |
891 | | void handleUDPTimeouts(); |
892 | | void reportTimeoutOrError(); |
893 | | void reportResponse(uint8_t rcode); |
894 | | void submitHealthCheckResult(bool initial, bool newResult); |
895 | | time_t getNextLazyHealthCheck(); |
896 | | uint16_t saveState(InternalQueryState&&); |
897 | | void restoreState(uint16_t id, InternalQueryState&&); |
898 | | std::optional<InternalQueryState> getState(uint16_t id); |
899 | | |
900 | | #ifdef HAVE_XSK |
901 | | void registerXsk(std::vector<std::shared_ptr<XskSocket>>& xsks); |
902 | | [[nodiscard]] ComboAddress pickSourceAddressForSending(); |
903 | | #endif /* HAVE_XSK */ |
904 | | |
905 | | dnsdist::Protocol getProtocol() const |
906 | 0 | { |
907 | 0 | if (isDoH()) { |
908 | 0 | return dnsdist::Protocol::DoH; |
909 | 0 | } |
910 | 0 | if (d_tlsCtx != nullptr) { |
911 | 0 | return dnsdist::Protocol::DoT; |
912 | 0 | } |
913 | 0 | if (isTCPOnly()) { |
914 | 0 | return dnsdist::Protocol::DoTCP; |
915 | 0 | } |
916 | 0 | return dnsdist::Protocol::DoUDP; |
917 | 0 | } |
918 | | |
919 | | double getRelevantLatencyUsec() const |
920 | 0 | { |
921 | 0 | if (isTCPOnly()) { |
922 | 0 | return latencyUsecTCP; |
923 | 0 | } |
924 | 0 | return latencyUsec; |
925 | 0 | } |
926 | | |
927 | | unsigned int getQPSLimit() const; |
928 | | }; |
929 | | |
930 | | void responderThread(std::shared_ptr<DownstreamState> dss); |
931 | | |
932 | | enum ednsHeaderFlags |
933 | | { |
934 | | EDNS_HEADER_FLAG_NONE = 0, |
935 | | EDNS_HEADER_FLAG_DO = 32768 |
936 | | }; |
937 | | |
938 | | extern shared_ptr<BPFFilter> g_defaultBPFFilter; |
939 | | |
940 | | void tcpAcceptorThread(const std::vector<ClientState*>& states); |
941 | | |
942 | | void setLuaNoSideEffect(); // if nothing has been declared, set that there are no side effects |
943 | | void setLuaSideEffect(); // set to report a side effect, cancelling all _no_ side effect calls |
944 | | bool getLuaNoSideEffect(); // set if there were only explicit declarations of _no_ side effect |
945 | | void resetLuaSideEffect(); // reset to indeterminate state |
946 | | |
947 | | bool responseContentMatches(const PacketBuffer& response, const DNSName& qname, const uint16_t qtype, const uint16_t qclass, const std::shared_ptr<DownstreamState>& remote, bool allowEmptyResponse); |
948 | | |
949 | | bool checkQueryHeaders(const struct dnsheader& dnsHeader, ClientState& clientState); |
950 | | |
951 | | class DNSCryptQuery; |
952 | | |
953 | | bool handleDNSCryptQuery(PacketBuffer& packet, DNSCryptQuery& query, bool tcp, time_t now, PacketBuffer& response); |
954 | | bool checkDNSCryptQuery(const ClientState& clientState, PacketBuffer& query, std::unique_ptr<DNSCryptQuery>& dnsCryptQuery, time_t now, bool tcp); |
955 | | |
956 | | enum class ProcessQueryResult : uint8_t |
957 | | { |
958 | | Drop, |
959 | | SendAnswer, |
960 | | PassToBackend, |
961 | | Asynchronous |
962 | | }; |
963 | | |
964 | | #include "dnsdist-actions.hh" |
965 | | #include "dnsdist-rule-chains.hh" |
966 | | |
967 | | ProcessQueryResult processQuery(DNSQuestion& dnsQuestion, std::shared_ptr<DownstreamState>& selectedBackend); |
968 | | ProcessQueryResult processQueryAfterRules(DNSQuestion& dnsQuestion, std::shared_ptr<DownstreamState>& outgoingBackend); |
969 | | bool processResponse(PacketBuffer& response, DNSResponse& dnsResponse, bool muted); |
970 | | bool processRulesResult(const DNSAction::Action& action, DNSQuestion& dnsQuestion, std::string& ruleresult, bool& drop); |
971 | | bool processResponseAfterRules(PacketBuffer& response, DNSResponse& dnsResponse, bool muted); |
972 | | bool processResponderPacket(std::shared_ptr<DownstreamState>& dss, PacketBuffer& response, InternalQueryState&& ids); |
973 | | bool applyRulesToResponse(const std::vector<dnsdist::rules::ResponseRuleAction>& respRuleActions, DNSResponse& dnsResponse); |
974 | | |
975 | | bool assignOutgoingUDPQueryToBackend(std::shared_ptr<DownstreamState>& downstream, uint16_t queryID, DNSQuestion& dnsQuestion, PacketBuffer& query, bool actuallySend = true); |
976 | | |
977 | | ssize_t udpClientSendRequestToBackend(const std::shared_ptr<DownstreamState>& backend, const int socketDesc, const PacketBuffer& request, bool healthCheck = false); |
978 | | bool sendUDPResponse(int origFD, const PacketBuffer& response, const int delayMsec, const ComboAddress& origDest, const ComboAddress& origRemote); |
979 | | void handleResponseSent(const DNSName& qname, const QType& qtype, double udiff, const ComboAddress& client, const ComboAddress& backend, unsigned int size, const dnsheader& cleartextDH, dnsdist::Protocol outgoingProtocol, dnsdist::Protocol incomingProtocol, bool fromBackend); |
980 | | void handleResponseSent(const InternalQueryState& ids, double udiff, const ComboAddress& client, const ComboAddress& backend, unsigned int size, const dnsheader& cleartextDH, dnsdist::Protocol outgoingProtocol, bool fromBackend); |
981 | | bool handleTimeoutResponseRules(const std::vector<dnsdist::rules::ResponseRuleAction>& rules, InternalQueryState& ids, const std::shared_ptr<DownstreamState>& ds, const std::shared_ptr<TCPQuerySender>& sender); |
982 | | void handleServerStateChange(const std::string& nameWithAddr, bool newResult); |