/src/bind9/lib/isc/netmgr/tcp.c
Line | Count | Source |
1 | | /* |
2 | | * Copyright (C) Internet Systems Consortium, Inc. ("ISC") |
3 | | * |
4 | | * SPDX-License-Identifier: MPL-2.0 |
5 | | * |
6 | | * This Source Code Form is subject to the terms of the Mozilla Public |
7 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
8 | | * file, you can obtain one at https://mozilla.org/MPL/2.0/. |
9 | | * |
10 | | * See the COPYRIGHT file distributed with this work for additional |
11 | | * information regarding copyright ownership. |
12 | | */ |
13 | | |
14 | | #include <libgen.h> |
15 | | #include <unistd.h> |
16 | | |
17 | | #include <isc/async.h> |
18 | | #include <isc/atomic.h> |
19 | | #include <isc/barrier.h> |
20 | | #include <isc/buffer.h> |
21 | | #include <isc/errno.h> |
22 | | #include <isc/log.h> |
23 | | #include <isc/magic.h> |
24 | | #include <isc/mem.h> |
25 | | #include <isc/netmgr.h> |
26 | | #include <isc/quota.h> |
27 | | #include <isc/random.h> |
28 | | #include <isc/refcount.h> |
29 | | #include <isc/region.h> |
30 | | #include <isc/result.h> |
31 | | #include <isc/sockaddr.h> |
32 | | #include <isc/stdtime.h> |
33 | | #include <isc/thread.h> |
34 | | #include <isc/util.h> |
35 | | #include <isc/uv.h> |
36 | | |
37 | | #include "../loop_p.h" |
38 | | #include "netmgr-int.h" |
39 | | |
40 | | static atomic_uint_fast32_t last_tcpquota_log = 0; |
41 | | |
42 | | static bool |
43 | 0 | can_log_tcp_quota(void) { |
44 | 0 | isc_stdtime_t last; |
45 | 0 | isc_stdtime_t now = isc_stdtime_now(); |
46 | 0 | last = atomic_exchange_relaxed(&last_tcpquota_log, now); |
47 | 0 | if (now != last) { |
48 | 0 | return true; |
49 | 0 | } |
50 | | |
51 | 0 | return false; |
52 | 0 | } |
53 | | |
54 | | static isc_result_t |
55 | | tcp_connect_direct(isc_nmsocket_t *sock, isc__nm_uvreq_t *req); |
56 | | |
57 | | static isc_result_t |
58 | | tcp_send_direct(isc_nmsocket_t *sock, isc__nm_uvreq_t *req); |
59 | | static void |
60 | | tcp_connect_cb(uv_connect_t *uvreq, int status); |
61 | | static void |
62 | | tcp_stop_cb(uv_handle_t *handle); |
63 | | |
64 | | static void |
65 | | tcp_connection_cb(uv_stream_t *server, int status); |
66 | | |
67 | | static void |
68 | | tcp_close_cb(uv_handle_t *uvhandle); |
69 | | |
70 | | static isc_result_t |
71 | | accept_connection(isc_nmsocket_t *ssock); |
72 | | |
73 | | static void |
74 | | quota_accept_cb(void *arg); |
75 | | |
76 | | static void |
77 | | tcp_dbg_log(const isc_nmsocket_t *sock, const isc_result_t result, |
78 | 0 | const char *msg) { |
79 | 0 | const int level = ISC_LOG_DEBUG(99); |
80 | |
|
81 | 0 | if (!isc_log_wouldlog(level)) { |
82 | 0 | return; |
83 | 0 | } |
84 | | |
85 | 0 | char err_msg[256]; |
86 | 0 | char peer_sabuf[ISC_SOCKADDR_FORMATSIZE]; |
87 | 0 | char local_sabuf[ISC_SOCKADDR_FORMATSIZE]; |
88 | 0 | const bool has_peer_info = !sock->accepting && sock->recv_cb != NULL; |
89 | |
|
90 | 0 | err_msg[0] = peer_sabuf[0] = local_sabuf[0] = '\0'; |
91 | |
|
92 | 0 | isc_sockaddr_format(&sock->iface, local_sabuf, sizeof(local_sabuf)); |
93 | |
|
94 | 0 | if (has_peer_info) { |
95 | 0 | isc_sockaddr_format(&sock->peer, peer_sabuf, |
96 | 0 | sizeof(peer_sabuf)); |
97 | 0 | } |
98 | |
|
99 | 0 | if (result != ISC_R_SUCCESS) { |
100 | 0 | snprintf(err_msg, sizeof(err_msg), " (error: %s)", |
101 | 0 | isc_result_totext(result)); |
102 | 0 | } |
103 | |
|
104 | 0 | isc_log_write(NS_LOGCATEGORY_GENERAL, ISC_LOGMODULE_NETMGR, level, |
105 | 0 | " (%s%son %s): %s%s", peer_sabuf, |
106 | 0 | has_peer_info ? " " : "", local_sabuf, msg, err_msg); |
107 | 0 | } |
108 | | |
109 | | static isc_result_t |
110 | 0 | tcp_connect_direct(isc_nmsocket_t *sock, isc__nm_uvreq_t *req) { |
111 | 0 | isc__networker_t *worker = NULL; |
112 | 0 | isc_result_t result = ISC_R_UNSET; |
113 | 0 | int r; |
114 | |
|
115 | 0 | REQUIRE(VALID_NMSOCK(sock)); |
116 | 0 | REQUIRE(VALID_UVREQ(req)); |
117 | |
|
118 | 0 | REQUIRE(sock->tid == isc_tid()); |
119 | |
|
120 | 0 | worker = sock->worker; |
121 | |
|
122 | 0 | sock->connecting = true; |
123 | | |
124 | | /* 2 minute timeout */ |
125 | 0 | result = isc__nm_socket_connectiontimeout(sock->fd, 120 * 1000); |
126 | 0 | RUNTIME_CHECK(result == ISC_R_SUCCESS); |
127 | |
|
128 | 0 | r = uv_tcp_init(&worker->loop->loop, &sock->uv_handle.tcp); |
129 | 0 | UV_RUNTIME_CHECK(uv_tcp_init, r); |
130 | 0 | uv_handle_set_data(&sock->uv_handle.handle, sock); |
131 | |
|
132 | 0 | r = uv_timer_init(&worker->loop->loop, &sock->read_timer); |
133 | 0 | UV_RUNTIME_CHECK(uv_timer_init, r); |
134 | 0 | uv_handle_set_data((uv_handle_t *)&sock->read_timer, sock); |
135 | |
|
136 | 0 | r = uv_tcp_open(&sock->uv_handle.tcp, sock->fd); |
137 | 0 | if (r != 0) { |
138 | 0 | isc__nm_closesocket(sock->fd); |
139 | 0 | isc__nm_incstats(sock, STATID_OPENFAIL); |
140 | 0 | return isc_uverr2result(r); |
141 | 0 | } |
142 | 0 | isc__nm_incstats(sock, STATID_OPEN); |
143 | |
|
144 | 0 | if (req->local.length != 0) { |
145 | 0 | r = uv_tcp_bind(&sock->uv_handle.tcp, &req->local.type.sa, 0); |
146 | 0 | if (r != 0) { |
147 | 0 | isc__nm_incstats(sock, STATID_BINDFAIL); |
148 | 0 | return isc_uverr2result(r); |
149 | 0 | } |
150 | 0 | } |
151 | | |
152 | 0 | isc__nm_set_network_buffers(&sock->uv_handle.handle); |
153 | |
|
154 | 0 | uv_handle_set_data(&req->uv_req.handle, req); |
155 | 0 | r = uv_tcp_connect(&req->uv_req.connect, &sock->uv_handle.tcp, |
156 | 0 | &req->peer.type.sa, tcp_connect_cb); |
157 | 0 | if (r != 0) { |
158 | 0 | isc__nm_incstats(sock, STATID_CONNECTFAIL); |
159 | 0 | return isc_uverr2result(r); |
160 | 0 | } |
161 | | |
162 | 0 | uv_handle_set_data((uv_handle_t *)&sock->read_timer, |
163 | 0 | &req->uv_req.connect); |
164 | 0 | isc__nmsocket_timer_start(sock); |
165 | |
|
166 | 0 | return ISC_R_SUCCESS; |
167 | 0 | } |
168 | | |
169 | | static void |
170 | 0 | tcp_connect_cb(uv_connect_t *uvreq, int status) { |
171 | 0 | isc_result_t result = ISC_R_UNSET; |
172 | 0 | isc__nm_uvreq_t *req = NULL; |
173 | 0 | isc_nmsocket_t *sock = uv_handle_get_data((uv_handle_t *)uvreq->handle); |
174 | 0 | struct sockaddr_storage ss; |
175 | 0 | isc__networker_t *worker = NULL; |
176 | 0 | int r; |
177 | |
|
178 | 0 | REQUIRE(VALID_NMSOCK(sock)); |
179 | 0 | REQUIRE(sock->tid == isc_tid()); |
180 | |
|
181 | 0 | worker = sock->worker; |
182 | |
|
183 | 0 | req = uv_handle_get_data((uv_handle_t *)uvreq); |
184 | |
|
185 | 0 | REQUIRE(VALID_UVREQ(req)); |
186 | 0 | REQUIRE(VALID_NMHANDLE(req->handle)); |
187 | |
|
188 | 0 | INSIST(sock->connecting); |
189 | |
|
190 | 0 | if (sock->timedout || status == UV_ETIMEDOUT) { |
191 | | /* Connection timed-out */ |
192 | 0 | result = ISC_R_TIMEDOUT; |
193 | 0 | goto error; |
194 | 0 | } else if (isc__nm_closing(worker)) { |
195 | | /* Network manager shutting down */ |
196 | 0 | result = ISC_R_SHUTTINGDOWN; |
197 | 0 | goto error; |
198 | 0 | } else if (isc__nmsocket_closing(sock)) { |
199 | | /* Connection canceled */ |
200 | 0 | result = ISC_R_CANCELED; |
201 | 0 | goto error; |
202 | 0 | } else if (status == UV_EADDRINUSE) { |
203 | | /* |
204 | | * On FreeBSD the TCP connect() call sometimes results in a |
205 | | * spurious transient EADDRINUSE. Try a few more times before |
206 | | * giving up. |
207 | | */ |
208 | 0 | if (--req->connect_tries > 0) { |
209 | 0 | r = uv_tcp_connect(&req->uv_req.connect, |
210 | 0 | &sock->uv_handle.tcp, |
211 | 0 | &req->peer.type.sa, tcp_connect_cb); |
212 | 0 | if (r != 0) { |
213 | 0 | result = isc_uverr2result(r); |
214 | 0 | goto error; |
215 | 0 | } |
216 | 0 | return; |
217 | 0 | } |
218 | 0 | result = isc_uverr2result(status); |
219 | 0 | goto error; |
220 | 0 | } else if (status != 0) { |
221 | 0 | result = isc_uverr2result(status); |
222 | 0 | goto error; |
223 | 0 | } |
224 | | |
225 | 0 | isc__nmsocket_timer_stop(sock); |
226 | 0 | uv_handle_set_data((uv_handle_t *)&sock->read_timer, sock); |
227 | |
|
228 | 0 | isc__nm_incstats(sock, STATID_CONNECT); |
229 | 0 | r = uv_tcp_getpeername(&sock->uv_handle.tcp, (struct sockaddr *)&ss, |
230 | 0 | &(int){ sizeof(ss) }); |
231 | 0 | if (r != 0) { |
232 | 0 | result = isc_uverr2result(r); |
233 | 0 | goto error; |
234 | 0 | } |
235 | | |
236 | 0 | sock->connecting = false; |
237 | 0 | sock->connected = true; |
238 | |
|
239 | 0 | result = isc_sockaddr_fromsockaddr(&sock->peer, (struct sockaddr *)&ss); |
240 | 0 | RUNTIME_CHECK(result == ISC_R_SUCCESS); |
241 | |
|
242 | 0 | isc__nm_connectcb(sock, req, ISC_R_SUCCESS, false); |
243 | |
|
244 | 0 | return; |
245 | 0 | error: |
246 | 0 | isc__nm_failed_connect_cb(sock, req, result, false); |
247 | 0 | } |
248 | | |
249 | | void |
250 | | isc_nm_tcpconnect(isc_sockaddr_t *local, isc_sockaddr_t *peer, |
251 | | isc_nm_cb_t connect_cb, void *connect_cbarg, |
252 | 0 | unsigned int timeout) { |
253 | 0 | isc_result_t result = ISC_R_SUCCESS; |
254 | 0 | isc_nmsocket_t *sock = NULL; |
255 | 0 | isc__nm_uvreq_t *req = NULL; |
256 | 0 | sa_family_t sa_family; |
257 | 0 | isc__networker_t *worker = isc__networker_current(); |
258 | 0 | uv_os_sock_t fd = -1; |
259 | |
|
260 | 0 | REQUIRE(local != NULL); |
261 | 0 | REQUIRE(peer != NULL); |
262 | |
|
263 | 0 | if (isc__nm_closing(worker)) { |
264 | 0 | connect_cb(NULL, ISC_R_SHUTTINGDOWN, connect_cbarg); |
265 | 0 | return; |
266 | 0 | } |
267 | | |
268 | 0 | sa_family = peer->type.sa.sa_family; |
269 | |
|
270 | 0 | result = isc__nm_socket(sa_family, SOCK_STREAM, 0, &fd); |
271 | 0 | if (result != ISC_R_SUCCESS) { |
272 | 0 | connect_cb(NULL, result, connect_cbarg); |
273 | 0 | return; |
274 | 0 | } |
275 | | |
276 | 0 | sock = isc_mempool_get(worker->nmsocket_pool); |
277 | 0 | isc__nmsocket_init(sock, worker, isc_nm_tcpsocket, local, NULL); |
278 | |
|
279 | 0 | sock->connect_timeout = timeout; |
280 | 0 | sock->fd = fd; |
281 | 0 | sock->client = true; |
282 | |
|
283 | 0 | req = isc__nm_uvreq_get(sock); |
284 | 0 | req->cb.connect = connect_cb; |
285 | 0 | req->cbarg = connect_cbarg; |
286 | 0 | req->peer = *peer; |
287 | 0 | req->local = *local; |
288 | 0 | req->handle = isc__nmhandle_get(sock, &req->peer, &sock->iface); |
289 | |
|
290 | 0 | (void)isc__nm_socket_min_mtu(sock->fd, sa_family); |
291 | 0 | (void)isc__nm_socket_tcp_maxseg(sock->fd, NM_MAXSEG); |
292 | |
|
293 | 0 | sock->active = true; |
294 | |
|
295 | 0 | result = tcp_connect_direct(sock, req); |
296 | 0 | if (result != ISC_R_SUCCESS) { |
297 | 0 | sock->active = false; |
298 | 0 | isc__nm_tcp_close(sock); |
299 | 0 | isc__nm_connectcb(sock, req, result, true); |
300 | 0 | } |
301 | | |
302 | | /* |
303 | | * The sock is now attached to the handle. |
304 | | */ |
305 | 0 | isc__nmsocket_detach(&sock); |
306 | 0 | } |
307 | | |
308 | | static uv_os_sock_t |
309 | 0 | isc__nm_tcp_lb_socket(sa_family_t sa_family) { |
310 | 0 | isc_result_t result; |
311 | 0 | uv_os_sock_t sock; |
312 | |
|
313 | 0 | result = isc__nm_socket(sa_family, SOCK_STREAM, 0, &sock); |
314 | 0 | RUNTIME_CHECK(result == ISC_R_SUCCESS); |
315 | |
|
316 | 0 | (void)isc__nm_socket_v6only(sock, sa_family); |
317 | | |
318 | | /* FIXME: set mss */ |
319 | |
|
320 | 0 | result = isc__nm_socket_reuse(sock, 1); |
321 | 0 | RUNTIME_CHECK(result == ISC_R_SUCCESS); |
322 | |
|
323 | 0 | if (isc__netmgr->load_balance_sockets) { |
324 | 0 | result = isc__nm_socket_reuse_lb(sock); |
325 | 0 | RUNTIME_CHECK(result == ISC_R_SUCCESS); |
326 | 0 | } |
327 | |
|
328 | 0 | return sock; |
329 | 0 | } |
330 | | |
331 | | static void |
332 | 0 | start_tcp_child_job(void *arg) { |
333 | 0 | isc_nmsocket_t *sock = arg; |
334 | |
|
335 | 0 | REQUIRE(VALID_NMSOCK(sock)); |
336 | 0 | REQUIRE(VALID_NMSOCK(sock->parent)); |
337 | 0 | REQUIRE(sock->type == isc_nm_tcpsocket); |
338 | 0 | REQUIRE(sock->tid == isc_tid()); |
339 | |
|
340 | 0 | sa_family_t sa_family = sock->iface.type.sa.sa_family; |
341 | 0 | int r, flags = 0; |
342 | 0 | isc_result_t result = ISC_R_UNSET; |
343 | 0 | isc_loop_t *loop = sock->worker->loop; |
344 | 0 | struct sockaddr_storage ss; |
345 | |
|
346 | 0 | (void)isc__nm_socket_min_mtu(sock->fd, sa_family); |
347 | 0 | (void)isc__nm_socket_tcp_maxseg(sock->fd, NM_MAXSEG); |
348 | |
|
349 | 0 | r = uv_tcp_init(&loop->loop, &sock->uv_handle.tcp); |
350 | 0 | UV_RUNTIME_CHECK(uv_tcp_init, r); |
351 | 0 | uv_handle_set_data(&sock->uv_handle.handle, sock); |
352 | | /* This keeps the socket alive after everything else is gone */ |
353 | 0 | isc__nmsocket_attach(sock, &(isc_nmsocket_t *){ NULL }); |
354 | |
|
355 | 0 | r = uv_timer_init(&loop->loop, &sock->read_timer); |
356 | 0 | UV_RUNTIME_CHECK(uv_timer_init, r); |
357 | 0 | uv_handle_set_data((uv_handle_t *)&sock->read_timer, sock); |
358 | |
|
359 | 0 | r = uv_tcp_open(&sock->uv_handle.tcp, sock->fd); |
360 | 0 | if (r < 0) { |
361 | 0 | isc__nm_closesocket(sock->fd); |
362 | 0 | isc__nm_incstats(sock, STATID_OPENFAIL); |
363 | 0 | goto done; |
364 | 0 | } |
365 | 0 | isc__nm_incstats(sock, STATID_OPEN); |
366 | |
|
367 | 0 | if (sa_family == AF_INET6) { |
368 | 0 | flags = UV_TCP_IPV6ONLY; |
369 | 0 | } |
370 | |
|
371 | 0 | if (isc__netmgr->load_balance_sockets) { |
372 | 0 | r = isc__nm_tcp_freebind(&sock->uv_handle.tcp, |
373 | 0 | &sock->iface.type.sa, flags); |
374 | 0 | if (r < 0) { |
375 | 0 | isc__nm_incstats(sock, STATID_BINDFAIL); |
376 | 0 | goto done; |
377 | 0 | } |
378 | 0 | } else if (sock->tid == 0) { |
379 | 0 | r = isc__nm_tcp_freebind(&sock->uv_handle.tcp, |
380 | 0 | &sock->iface.type.sa, flags); |
381 | 0 | if (r < 0) { |
382 | 0 | isc__nm_incstats(sock, STATID_BINDFAIL); |
383 | 0 | goto done; |
384 | 0 | } |
385 | 0 | sock->parent->uv_handle.tcp.flags = sock->uv_handle.tcp.flags; |
386 | 0 | } else { |
387 | | /* The socket is already bound, just copy the flags */ |
388 | 0 | sock->uv_handle.tcp.flags = sock->parent->uv_handle.tcp.flags; |
389 | 0 | } |
390 | | |
391 | 0 | isc__nm_set_network_buffers(&sock->uv_handle.handle); |
392 | | |
393 | | /* |
394 | | * The callback will run in the same thread uv_listen() was called |
395 | | * from, so a race with tcp_connection_cb() isn't possible. |
396 | | */ |
397 | 0 | r = uv_listen((uv_stream_t *)&sock->uv_handle.tcp, sock->backlog, |
398 | 0 | tcp_connection_cb); |
399 | 0 | if (r != 0) { |
400 | 0 | isc__nmsocket_log(sock, ISC_LOG_ERROR, "uv_listen failed: %s", |
401 | 0 | isc_result_totext(isc_uverr2result(r))); |
402 | 0 | isc__nm_incstats(sock, STATID_BINDFAIL); |
403 | 0 | goto done; |
404 | 0 | } |
405 | | |
406 | 0 | if (sock->tid == 0) { |
407 | 0 | r = uv_tcp_getsockname(&sock->uv_handle.tcp, |
408 | 0 | (struct sockaddr *)&ss, |
409 | 0 | &(int){ sizeof(ss) }); |
410 | 0 | if (r != 0) { |
411 | 0 | goto done; |
412 | 0 | } |
413 | | |
414 | 0 | result = isc_sockaddr_fromsockaddr(&sock->parent->iface, |
415 | 0 | (struct sockaddr *)&ss); |
416 | 0 | if (result != ISC_R_SUCCESS) { |
417 | 0 | goto done_result; |
418 | 0 | } |
419 | 0 | } |
420 | | |
421 | 0 | done: |
422 | 0 | result = isc_uverr2result(r); |
423 | |
|
424 | 0 | done_result: |
425 | 0 | if (result != ISC_R_SUCCESS) { |
426 | 0 | sock->pquota = NULL; |
427 | 0 | } |
428 | |
|
429 | 0 | sock->result = result; |
430 | |
|
431 | 0 | REQUIRE(!loop->paused); |
432 | |
|
433 | 0 | if (sock->tid != 0) { |
434 | 0 | isc_barrier_wait(&sock->parent->listen_barrier); |
435 | 0 | } |
436 | 0 | } |
437 | | |
438 | | static void |
439 | | start_tcp_child(isc_sockaddr_t *iface, isc_nmsocket_t *sock, uv_os_sock_t fd, |
440 | 0 | isc_tid_t tid) { |
441 | 0 | isc_nmsocket_t *csock = &sock->children[tid]; |
442 | 0 | isc__networker_t *worker = isc__networker_get(tid); |
443 | |
|
444 | 0 | isc__nmsocket_init(csock, worker, isc_nm_tcpsocket, iface, sock); |
445 | 0 | csock->accept_cb = sock->accept_cb; |
446 | 0 | csock->accept_cbarg = sock->accept_cbarg; |
447 | 0 | csock->backlog = sock->backlog; |
448 | | |
449 | | /* |
450 | | * Quota isn't attached, just assigned. |
451 | | */ |
452 | 0 | csock->pquota = sock->pquota; |
453 | |
|
454 | 0 | if (isc__netmgr->load_balance_sockets) { |
455 | 0 | UNUSED(fd); |
456 | 0 | csock->fd = isc__nm_tcp_lb_socket(iface->type.sa.sa_family); |
457 | 0 | } else { |
458 | 0 | csock->fd = dup(fd); |
459 | 0 | } |
460 | 0 | REQUIRE(csock->fd >= 0); |
461 | |
|
462 | 0 | if (tid == 0) { |
463 | 0 | start_tcp_child_job(csock); |
464 | 0 | } else { |
465 | 0 | isc_async_run(worker->loop, start_tcp_child_job, csock); |
466 | 0 | } |
467 | 0 | } |
468 | | |
469 | | isc_result_t |
470 | | isc_nm_listentcp(uint32_t workers, isc_sockaddr_t *iface, |
471 | | isc_nm_accept_cb_t accept_cb, void *accept_cbarg, int backlog, |
472 | 0 | isc_quota_t *quota, isc_nmsocket_t **sockp) { |
473 | 0 | isc_nmsocket_t *sock = NULL; |
474 | 0 | uv_os_sock_t fd = -1; |
475 | 0 | isc_result_t result = ISC_R_UNSET; |
476 | 0 | isc__networker_t *worker = isc__networker_get(0); |
477 | |
|
478 | 0 | REQUIRE(isc_tid() == 0); |
479 | |
|
480 | 0 | if (workers == 0) { |
481 | 0 | workers = isc__netmgr->nloops; |
482 | 0 | } |
483 | 0 | REQUIRE(workers <= isc__netmgr->nloops); |
484 | |
|
485 | 0 | sock = isc_mempool_get(worker->nmsocket_pool); |
486 | 0 | isc__nmsocket_init(sock, worker, isc_nm_tcplistener, iface, NULL); |
487 | |
|
488 | 0 | sock->nchildren = (workers == ISC_NM_LISTEN_ALL) |
489 | 0 | ? (uint32_t)isc__netmgr->nloops |
490 | 0 | : workers; |
491 | 0 | sock->children = isc_mem_cget(worker->mctx, sock->nchildren, |
492 | 0 | sizeof(sock->children[0])); |
493 | |
|
494 | 0 | isc__nmsocket_barrier_init(sock); |
495 | |
|
496 | 0 | sock->accept_cb = accept_cb; |
497 | 0 | sock->accept_cbarg = accept_cbarg; |
498 | 0 | sock->backlog = backlog; |
499 | 0 | sock->pquota = quota; |
500 | |
|
501 | 0 | if (!isc__netmgr->load_balance_sockets) { |
502 | 0 | fd = isc__nm_tcp_lb_socket(iface->type.sa.sa_family); |
503 | 0 | } |
504 | |
|
505 | 0 | start_tcp_child(iface, sock, fd, 0); |
506 | 0 | result = sock->children[0].result; |
507 | 0 | INSIST(result != ISC_R_UNSET); |
508 | |
|
509 | 0 | for (size_t i = 1; i < sock->nchildren; i++) { |
510 | 0 | start_tcp_child(iface, sock, fd, i); |
511 | 0 | } |
512 | |
|
513 | 0 | isc_barrier_wait(&sock->listen_barrier); |
514 | |
|
515 | 0 | if (!isc__netmgr->load_balance_sockets) { |
516 | 0 | isc__nm_closesocket(fd); |
517 | 0 | } |
518 | | |
519 | | /* |
520 | | * If any of the child sockets have failed then isc_nm_listentcp |
521 | | * fails. |
522 | | */ |
523 | 0 | for (size_t i = 1; i < sock->nchildren; i++) { |
524 | 0 | if (result == ISC_R_SUCCESS && |
525 | 0 | sock->children[i].result != ISC_R_SUCCESS) |
526 | 0 | { |
527 | 0 | result = sock->children[i].result; |
528 | 0 | } |
529 | 0 | } |
530 | |
|
531 | 0 | if (result != ISC_R_SUCCESS) { |
532 | 0 | sock->active = false; |
533 | 0 | isc__nm_tcp_stoplistening(sock); |
534 | 0 | isc_nmsocket_close(&sock); |
535 | |
|
536 | 0 | return result; |
537 | 0 | } |
538 | | |
539 | 0 | sock->active = true; |
540 | |
|
541 | 0 | *sockp = sock; |
542 | 0 | return ISC_R_SUCCESS; |
543 | 0 | } |
544 | | |
545 | | static void |
546 | 0 | tcp_connection_cb(uv_stream_t *server, int status) { |
547 | 0 | isc_nmsocket_t *ssock = uv_handle_get_data((uv_handle_t *)server); |
548 | 0 | isc_result_t result; |
549 | |
|
550 | 0 | REQUIRE(ssock->accept_cb != NULL); |
551 | |
|
552 | 0 | if (status != 0) { |
553 | 0 | result = isc_uverr2result(status); |
554 | 0 | tcp_dbg_log(ssock, result, |
555 | 0 | "TCP peer connection attempt early failure"); |
556 | 0 | goto done; |
557 | 0 | } |
558 | | |
559 | 0 | REQUIRE(VALID_NMSOCK(ssock)); |
560 | 0 | REQUIRE(ssock->tid == isc_tid()); |
561 | |
|
562 | 0 | if (isc__nmsocket_closing(ssock)) { |
563 | 0 | result = ISC_R_CANCELED; |
564 | 0 | goto done; |
565 | 0 | } |
566 | | |
567 | | /* Prepare the child socket */ |
568 | 0 | isc_nmsocket_t *csock = isc_mempool_get(ssock->worker->nmsocket_pool); |
569 | 0 | isc__nmsocket_init(csock, ssock->worker, isc_nm_tcpsocket, |
570 | 0 | &ssock->iface, NULL); |
571 | 0 | isc__nmsocket_attach(ssock, &csock->server); |
572 | |
|
573 | 0 | tcp_dbg_log(csock, ISC_R_SUCCESS, "TCP peer connection attempt"); |
574 | |
|
575 | 0 | if (csock->server->pquota != NULL) { |
576 | 0 | result = isc_quota_acquire_cb(csock->server->pquota, |
577 | 0 | &csock->quotacb, quota_accept_cb, |
578 | 0 | csock); |
579 | 0 | if (result == ISC_R_QUOTA) { |
580 | 0 | csock->quota_accept_ts = isc_time_monotonic(); |
581 | 0 | isc__nm_incstats(ssock, STATID_ACCEPTFAIL); |
582 | 0 | goto done; |
583 | 0 | } |
584 | 0 | } |
585 | | |
586 | 0 | result = accept_connection(csock); |
587 | 0 | done: |
588 | 0 | isc__nm_accept_connection_log(ssock, result, can_log_tcp_quota()); |
589 | 0 | } |
590 | | |
591 | | static void |
592 | 0 | stop_tcp_child_job(void *arg) { |
593 | 0 | isc_nmsocket_t *sock = arg; |
594 | |
|
595 | 0 | REQUIRE(VALID_NMSOCK(sock)); |
596 | 0 | REQUIRE(sock->tid == isc_tid()); |
597 | 0 | REQUIRE(sock->parent != NULL); |
598 | 0 | REQUIRE(sock->type == isc_nm_tcpsocket); |
599 | 0 | REQUIRE(!sock->closing); |
600 | |
|
601 | 0 | sock->active = false; |
602 | 0 | sock->closing = true; |
603 | | |
604 | | /* |
605 | | * The order of the close operation is important here, the uv_close() |
606 | | * gets scheduled in the reverse order, so we need to close the timer |
607 | | * last, so its gone by the time we destroy the socket |
608 | | */ |
609 | | |
610 | | /* 2. close the listening socket */ |
611 | 0 | isc__nmsocket_clearcb(sock); |
612 | 0 | isc__nm_stop_reading(sock); |
613 | 0 | uv_close(&sock->uv_handle.handle, tcp_stop_cb); |
614 | | |
615 | | /* 1. close the read timer */ |
616 | 0 | isc__nmsocket_timer_stop(sock); |
617 | 0 | uv_close(&sock->read_timer, NULL); |
618 | |
|
619 | 0 | REQUIRE(!sock->worker->loop->paused); |
620 | 0 | isc_barrier_wait(&sock->parent->stop_barrier); |
621 | 0 | } |
622 | | |
623 | | static void |
624 | 0 | stop_tcp_child(isc_nmsocket_t *sock) { |
625 | 0 | REQUIRE(VALID_NMSOCK(sock)); |
626 | |
|
627 | 0 | if (sock->tid == 0) { |
628 | 0 | stop_tcp_child_job(sock); |
629 | 0 | } else { |
630 | 0 | isc_async_run(sock->worker->loop, stop_tcp_child_job, sock); |
631 | 0 | } |
632 | 0 | } |
633 | | |
634 | | void |
635 | 0 | isc__nm_tcp_stoplistening(isc_nmsocket_t *sock) { |
636 | 0 | REQUIRE(VALID_NMSOCK(sock)); |
637 | 0 | REQUIRE(sock->type == isc_nm_tcplistener); |
638 | 0 | REQUIRE(sock->tid == isc_tid()); |
639 | 0 | REQUIRE(sock->tid == 0); |
640 | 0 | REQUIRE(!sock->closing); |
641 | |
|
642 | 0 | sock->closing = true; |
643 | | |
644 | | /* Mark the parent socket inactive */ |
645 | 0 | sock->active = false; |
646 | | |
647 | | /* Stop all the other threads' children */ |
648 | 0 | for (size_t i = 1; i < sock->nchildren; i++) { |
649 | 0 | stop_tcp_child(&sock->children[i]); |
650 | 0 | } |
651 | | |
652 | | /* Stop the child for the main thread */ |
653 | 0 | stop_tcp_child(&sock->children[0]); |
654 | | |
655 | | /* Stop the parent */ |
656 | 0 | sock->closed = true; |
657 | |
|
658 | 0 | isc__nmsocket_prep_destroy(sock); |
659 | 0 | } |
660 | | |
661 | | static void |
662 | 0 | tcp_stop_cb(uv_handle_t *handle) { |
663 | 0 | isc_nmsocket_t *sock = uv_handle_get_data(handle); |
664 | 0 | uv_handle_set_data(handle, NULL); |
665 | |
|
666 | 0 | REQUIRE(VALID_NMSOCK(sock)); |
667 | 0 | REQUIRE(sock->tid == isc_tid()); |
668 | 0 | REQUIRE(sock->closing); |
669 | 0 | REQUIRE(sock->type == isc_nm_tcpsocket); |
670 | 0 | REQUIRE(!sock->closed); |
671 | |
|
672 | 0 | sock->closed = true; |
673 | |
|
674 | 0 | isc__nm_incstats(sock, STATID_CLOSE); |
675 | |
|
676 | 0 | isc__nmsocket_detach(&sock); |
677 | 0 | } |
678 | | |
679 | | void |
680 | | isc__nm_tcp_failed_read_cb(isc_nmsocket_t *sock, isc_result_t result, |
681 | 0 | bool async) { |
682 | 0 | REQUIRE(VALID_NMSOCK(sock)); |
683 | 0 | REQUIRE(result != ISC_R_SUCCESS); |
684 | |
|
685 | 0 | isc__nmsocket_timer_stop(sock); |
686 | 0 | isc__nm_stop_reading(sock); |
687 | 0 | sock->reading = false; |
688 | |
|
689 | 0 | if (sock->recv_cb != NULL) { |
690 | 0 | isc__nm_uvreq_t *req = isc__nm_get_read_req(sock, NULL); |
691 | 0 | isc__nmsocket_clearcb(sock); |
692 | 0 | isc__nm_readcb(sock, req, result, async); |
693 | 0 | } |
694 | |
|
695 | 0 | isc__nmsocket_prep_destroy(sock); |
696 | 0 | } |
697 | | |
698 | | void |
699 | 0 | isc__nm_tcp_read(isc_nmhandle_t *handle, isc_nm_recv_cb_t cb, void *cbarg) { |
700 | 0 | isc_nmsocket_t *sock; |
701 | 0 | isc_result_t result; |
702 | |
|
703 | 0 | REQUIRE(VALID_NMHANDLE(handle)); |
704 | 0 | REQUIRE(VALID_NMSOCK(handle->sock)); |
705 | |
|
706 | 0 | sock = handle->sock; |
707 | |
|
708 | 0 | REQUIRE(sock->type == isc_nm_tcpsocket); |
709 | 0 | REQUIRE(sock->statichandle == handle); |
710 | |
|
711 | 0 | sock->recv_cb = cb; |
712 | 0 | sock->recv_cbarg = cbarg; |
713 | | |
714 | | /* Initialize the timer */ |
715 | 0 | if (sock->read_timeout == 0) { |
716 | 0 | sock->read_timeout = |
717 | 0 | sock->keepalive |
718 | 0 | ? atomic_load_relaxed(&isc__netmgr->keepalive) |
719 | 0 | : atomic_load_relaxed(&isc__netmgr->idle); |
720 | 0 | } |
721 | |
|
722 | 0 | if (isc__nmsocket_closing(sock)) { |
723 | 0 | CLEANUP(ISC_R_CANCELED); |
724 | 0 | } |
725 | | |
726 | 0 | if (!sock->reading_throttled) { |
727 | 0 | CHECK(isc__nm_start_reading(sock)); |
728 | 0 | } |
729 | | |
730 | 0 | sock->reading = true; |
731 | |
|
732 | 0 | if (!sock->manual_read_timer) { |
733 | 0 | isc__nmsocket_timer_start(sock); |
734 | 0 | } |
735 | |
|
736 | 0 | return; |
737 | 0 | cleanup: |
738 | 0 | isc__nm_tcp_failed_read_cb(sock, result, true); |
739 | 0 | } |
740 | | |
741 | | void |
742 | 0 | isc__nm_tcp_read_stop(isc_nmhandle_t *handle) { |
743 | 0 | REQUIRE(VALID_NMHANDLE(handle)); |
744 | 0 | REQUIRE(VALID_NMSOCK(handle->sock)); |
745 | |
|
746 | 0 | isc_nmsocket_t *sock = handle->sock; |
747 | |
|
748 | 0 | if (!sock->manual_read_timer) { |
749 | 0 | isc__nmsocket_timer_stop(sock); |
750 | 0 | } |
751 | 0 | isc__nm_stop_reading(sock); |
752 | 0 | sock->reading = false; |
753 | |
|
754 | 0 | return; |
755 | 0 | } |
756 | | |
757 | | void |
758 | 0 | isc__nm_tcp_read_cb(uv_stream_t *stream, ssize_t nread, const uv_buf_t *buf) { |
759 | 0 | isc_nmsocket_t *sock = uv_handle_get_data((uv_handle_t *)stream); |
760 | 0 | isc__nm_uvreq_t *req = NULL; |
761 | |
|
762 | 0 | REQUIRE(VALID_NMSOCK(sock)); |
763 | 0 | REQUIRE(sock->tid == isc_tid()); |
764 | 0 | REQUIRE(buf != NULL); |
765 | |
|
766 | 0 | if (isc__nmsocket_closing(sock)) { |
767 | 0 | isc__nm_tcp_failed_read_cb(sock, ISC_R_CANCELED, false); |
768 | 0 | goto free; |
769 | 0 | } |
770 | | |
771 | 0 | if (nread < 0) { |
772 | 0 | if (nread != UV_EOF) { |
773 | 0 | isc__nm_incstats(sock, STATID_RECVFAIL); |
774 | 0 | } |
775 | |
|
776 | 0 | isc__nm_tcp_failed_read_cb(sock, isc_uverr2result(nread), |
777 | 0 | false); |
778 | |
|
779 | 0 | goto free; |
780 | 0 | } |
781 | | |
782 | 0 | req = isc__nm_get_read_req(sock, NULL); |
783 | | |
784 | | /* |
785 | | * The callback will be called synchronously because the |
786 | | * result is ISC_R_SUCCESS, so we don't need to retain |
787 | | * the buffer |
788 | | */ |
789 | 0 | req->uvbuf.base = buf->base; |
790 | 0 | req->uvbuf.len = nread; |
791 | |
|
792 | 0 | if (!sock->client) { |
793 | 0 | sock->read_timeout = |
794 | 0 | sock->keepalive |
795 | 0 | ? atomic_load_relaxed(&isc__netmgr->keepalive) |
796 | 0 | : atomic_load_relaxed(&isc__netmgr->idle); |
797 | 0 | } |
798 | |
|
799 | 0 | isc__nm_readcb(sock, req, ISC_R_SUCCESS, false); |
800 | |
|
801 | 0 | if (!sock->client && sock->reading) { |
802 | | /* |
803 | | * Stop reading if we have accumulated enough bytes in the send |
804 | | * queue; this means that the TCP client is not reading back the |
805 | | * data we sending to it, and there's no reason to continue |
806 | | * processing more incoming DNS messages, if the client is not |
807 | | * reading back the responses. |
808 | | */ |
809 | 0 | size_t write_queue_size = |
810 | 0 | uv_stream_get_write_queue_size(&sock->uv_handle.stream); |
811 | |
|
812 | 0 | if (write_queue_size >= ISC_NETMGR_TCP_SENDBUF_SIZE) { |
813 | 0 | isc__nmsocket_log( |
814 | 0 | sock, ISC_LOG_DEBUG(3), |
815 | 0 | "throttling TCP connection, the other side is " |
816 | 0 | "not reading the data (%zu)", |
817 | 0 | write_queue_size); |
818 | 0 | sock->reading_throttled = true; |
819 | 0 | isc__nm_stop_reading(sock); |
820 | 0 | } |
821 | 0 | } else if (uv_is_active(&sock->uv_handle.handle) && |
822 | 0 | !sock->manual_read_timer) |
823 | 0 | { |
824 | | /* The readcb could have paused the reading */ |
825 | | /* The timer will be updated */ |
826 | 0 | isc__nmsocket_timer_restart(sock); |
827 | 0 | } |
828 | |
|
829 | 0 | free: |
830 | 0 | if (nread < 0) { |
831 | | /* |
832 | | * The buffer may be a null buffer on error. |
833 | | */ |
834 | 0 | if (buf->base == NULL && buf->len == 0) { |
835 | 0 | return; |
836 | 0 | } |
837 | 0 | } |
838 | | |
839 | 0 | isc__nm_free_uvbuf(sock, buf); |
840 | 0 | } |
841 | | |
842 | | /* |
843 | | * This is called after we get a quota_accept_cb() callback. |
844 | | */ |
845 | | static void |
846 | 0 | tcpaccept_cb(void *arg) { |
847 | 0 | isc_nmsocket_t *csock = arg; |
848 | 0 | isc_nmsocket_t *ssock = csock->server; |
849 | |
|
850 | 0 | REQUIRE(VALID_NMSOCK(csock)); |
851 | 0 | REQUIRE(csock->tid == isc_tid()); |
852 | |
|
853 | 0 | isc_result_t result = accept_connection(csock); |
854 | 0 | isc__nm_accept_connection_log(ssock, result, can_log_tcp_quota()); |
855 | 0 | isc__nmsocket_detach(&csock); |
856 | 0 | } |
857 | | |
858 | | static void |
859 | 0 | quota_accept_cb(void *arg) { |
860 | 0 | isc_nmsocket_t *csock = arg; |
861 | 0 | isc_nmsocket_t *ssock = csock->server; |
862 | |
|
863 | 0 | REQUIRE(VALID_NMSOCK(csock)); |
864 | | |
865 | | /* |
866 | | * This needs to be asynchronous, because the quota might have been |
867 | | * released by a different child socket. |
868 | | */ |
869 | 0 | if (csock->tid == isc_tid()) { |
870 | 0 | isc_result_t result = accept_connection(csock); |
871 | 0 | isc__nm_accept_connection_log(ssock, result, |
872 | 0 | can_log_tcp_quota()); |
873 | 0 | } else { |
874 | 0 | isc__nmsocket_attach(csock, &(isc_nmsocket_t *){ NULL }); |
875 | 0 | isc_async_run(csock->worker->loop, tcpaccept_cb, csock); |
876 | 0 | } |
877 | 0 | } |
878 | | |
879 | | static isc_result_t |
880 | 0 | accept_connection(isc_nmsocket_t *csock) { |
881 | 0 | int r; |
882 | 0 | isc_result_t result; |
883 | 0 | struct sockaddr_storage ss; |
884 | 0 | isc_sockaddr_t local; |
885 | 0 | isc_nmhandle_t *handle = NULL; |
886 | |
|
887 | 0 | REQUIRE(VALID_NMSOCK(csock)); |
888 | 0 | REQUIRE(VALID_NMSOCK(csock->server)); |
889 | 0 | REQUIRE(csock->tid == isc_tid()); |
890 | |
|
891 | 0 | csock->accepting = true; |
892 | 0 | csock->accept_cb = csock->server->accept_cb; |
893 | 0 | csock->accept_cbarg = csock->server->accept_cbarg; |
894 | 0 | csock->recv_cb = csock->server->recv_cb; |
895 | 0 | csock->recv_cbarg = csock->server->recv_cbarg; |
896 | 0 | csock->read_timeout = atomic_load_relaxed(&isc__netmgr->init); |
897 | |
|
898 | 0 | r = uv_tcp_init(&csock->worker->loop->loop, &csock->uv_handle.tcp); |
899 | 0 | UV_RUNTIME_CHECK(uv_tcp_init, r); |
900 | 0 | uv_handle_set_data(&csock->uv_handle.handle, csock); |
901 | |
|
902 | 0 | r = uv_timer_init(&csock->worker->loop->loop, &csock->read_timer); |
903 | 0 | UV_RUNTIME_CHECK(uv_timer_init, r); |
904 | 0 | uv_handle_set_data((uv_handle_t *)&csock->read_timer, csock); |
905 | |
|
906 | 0 | if (csock->server->pquota != NULL) { |
907 | 0 | isc__nm_incstats(csock, STATID_CLIENTS); |
908 | 0 | } |
909 | | |
910 | | /* |
911 | | * We need to initialize the tcp and timer before failing because |
912 | | * isc__nm_tcp_close() can't handle uninitalized TCP nmsocket. |
913 | | */ |
914 | 0 | if (isc__nmsocket_closing(csock)) { |
915 | 0 | CLEANUP(ISC_R_CANCELED); |
916 | 0 | } |
917 | | |
918 | 0 | r = uv_accept(&csock->server->uv_handle.stream, |
919 | 0 | &csock->uv_handle.stream); |
920 | 0 | if (r != 0) { |
921 | 0 | result = isc_uverr2result(r); |
922 | 0 | goto cleanup; |
923 | 0 | } |
924 | | |
925 | | /* Check if the connection is not expired */ |
926 | 0 | if (csock->quota_accept_ts != 0) { |
927 | | /* The timestamp is given in nanoseconds */ |
928 | 0 | const uint64_t time_elapsed_ms = |
929 | 0 | (isc_time_monotonic() - csock->quota_accept_ts) / |
930 | 0 | NS_PER_MS; |
931 | |
|
932 | 0 | if (time_elapsed_ms >= csock->read_timeout) { |
933 | | /* |
934 | | * At this point we have received a connection from a |
935 | | * queue of accepted connections (via uv_accept()), but |
936 | | * it has expired. We cannot do anything better than |
937 | | * drop it on the floor at this point. |
938 | | */ |
939 | 0 | CLEANUP(ISC_R_TIMEDOUT); |
940 | 0 | } else { |
941 | | /* Adjust the initial read timeout accordingly */ |
942 | 0 | csock->read_timeout -= time_elapsed_ms; |
943 | 0 | } |
944 | 0 | } |
945 | | |
946 | 0 | r = uv_tcp_getpeername(&csock->uv_handle.tcp, (struct sockaddr *)&ss, |
947 | 0 | &(int){ sizeof(ss) }); |
948 | 0 | if (r != 0) { |
949 | 0 | result = isc_uverr2result(r); |
950 | 0 | goto cleanup; |
951 | 0 | } |
952 | | |
953 | 0 | CHECK(isc_sockaddr_fromsockaddr(&csock->peer, (struct sockaddr *)&ss)); |
954 | |
|
955 | 0 | r = uv_tcp_getsockname(&csock->uv_handle.tcp, (struct sockaddr *)&ss, |
956 | 0 | &(int){ sizeof(ss) }); |
957 | 0 | if (r != 0) { |
958 | 0 | result = isc_uverr2result(r); |
959 | 0 | goto cleanup; |
960 | 0 | } |
961 | | |
962 | 0 | CHECK(isc_sockaddr_fromsockaddr(&local, (struct sockaddr *)&ss)); |
963 | |
|
964 | 0 | handle = isc__nmhandle_get(csock, NULL, &local); |
965 | |
|
966 | 0 | result = csock->accept_cb(handle, ISC_R_SUCCESS, csock->accept_cbarg); |
967 | 0 | if (result != ISC_R_SUCCESS) { |
968 | 0 | isc_nmhandle_detach(&handle); |
969 | 0 | goto cleanup; |
970 | 0 | } |
971 | | |
972 | 0 | csock->accepting = false; |
973 | |
|
974 | 0 | tcp_dbg_log(csock, ISC_R_SUCCESS, "TCP connection has been accepted"); |
975 | |
|
976 | 0 | isc__nm_incstats(csock, STATID_ACCEPT); |
977 | | |
978 | | /* |
979 | | * The acceptcb needs to attach to the handle if it wants to keep the |
980 | | * connection alive |
981 | | */ |
982 | 0 | isc_nmhandle_detach(&handle); |
983 | | |
984 | | /* |
985 | | * sock is now attached to the handle. |
986 | | */ |
987 | 0 | isc__nmsocket_detach(&csock); |
988 | |
|
989 | 0 | return ISC_R_SUCCESS; |
990 | | |
991 | 0 | cleanup: |
992 | 0 | csock->active = false; |
993 | 0 | csock->accepting = false; |
994 | |
|
995 | 0 | if (result != ISC_R_NOTCONNECTED) { |
996 | | /* IGNORE: The client disconnected before we could accept */ |
997 | 0 | isc__nmsocket_log(csock, ISC_LOG_ERROR, |
998 | 0 | "Accepting TCP connection failed: %s", |
999 | 0 | isc_result_totext(result)); |
1000 | 0 | } |
1001 | |
|
1002 | 0 | tcp_dbg_log(csock, result, "TCP connection has NOT been accepted"); |
1003 | |
|
1004 | 0 | isc__nmsocket_prep_destroy(csock); |
1005 | |
|
1006 | 0 | isc__nmsocket_detach(&csock); |
1007 | |
|
1008 | 0 | return result; |
1009 | 0 | } |
1010 | | |
1011 | | static void |
1012 | | tcp_send(isc_nmhandle_t *handle, const isc_region_t *region, isc_nm_cb_t cb, |
1013 | 0 | void *cbarg, const bool dnsmsg) { |
1014 | 0 | REQUIRE(VALID_NMHANDLE(handle)); |
1015 | 0 | REQUIRE(VALID_NMSOCK(handle->sock)); |
1016 | |
|
1017 | 0 | isc_nmsocket_t *sock = handle->sock; |
1018 | 0 | isc_result_t result; |
1019 | 0 | isc__nm_uvreq_t *uvreq = NULL; |
1020 | |
|
1021 | 0 | REQUIRE(sock->type == isc_nm_tcpsocket); |
1022 | 0 | REQUIRE(sock->tid == isc_tid()); |
1023 | |
|
1024 | 0 | uvreq = isc__nm_uvreq_get(sock); |
1025 | 0 | if (dnsmsg) { |
1026 | 0 | *(uint16_t *)uvreq->tcplen = htons(region->length); |
1027 | 0 | } |
1028 | 0 | uvreq->uvbuf.base = (char *)region->base; |
1029 | 0 | uvreq->uvbuf.len = region->length; |
1030 | |
|
1031 | 0 | isc_nmhandle_attach(handle, &uvreq->handle); |
1032 | |
|
1033 | 0 | uvreq->cb.send = cb; |
1034 | 0 | uvreq->cbarg = cbarg; |
1035 | |
|
1036 | 0 | if (sock->write_timeout == 0) { |
1037 | 0 | sock->write_timeout = |
1038 | 0 | sock->keepalive |
1039 | 0 | ? atomic_load_relaxed(&isc__netmgr->keepalive) |
1040 | 0 | : atomic_load_relaxed(&isc__netmgr->idle); |
1041 | 0 | } |
1042 | |
|
1043 | 0 | result = tcp_send_direct(sock, uvreq); |
1044 | 0 | if (result != ISC_R_SUCCESS) { |
1045 | 0 | isc__nm_incstats(sock, STATID_SENDFAIL); |
1046 | 0 | isc__nm_failed_send_cb(sock, uvreq, result, true); |
1047 | 0 | } |
1048 | |
|
1049 | 0 | return; |
1050 | 0 | } |
1051 | | |
1052 | | void |
1053 | | isc__nm_tcp_send(isc_nmhandle_t *handle, const isc_region_t *region, |
1054 | 0 | isc_nm_cb_t cb, void *cbarg) { |
1055 | 0 | tcp_send(handle, region, cb, cbarg, false); |
1056 | 0 | } |
1057 | | |
1058 | | void |
1059 | | isc__nm_tcp_senddns(isc_nmhandle_t *handle, const isc_region_t *region, |
1060 | 0 | isc_nm_cb_t cb, void *cbarg) { |
1061 | 0 | tcp_send(handle, region, cb, cbarg, true); |
1062 | 0 | } |
1063 | | |
1064 | | static void |
1065 | 0 | tcp_maybe_restart_reading(isc_nmsocket_t *sock) { |
1066 | 0 | if (!sock->client && sock->reading && |
1067 | 0 | !uv_is_active(&sock->uv_handle.handle)) |
1068 | 0 | { |
1069 | | /* |
1070 | | * Restart reading if we have less data in the send queue than |
1071 | | * the send buffer size, this means that the TCP client has |
1072 | | * started reading some data again. Starting reading when we go |
1073 | | * under the limit instead of waiting for all data has been |
1074 | | * flushed allows faster recovery (in case there was a |
1075 | | * congestion and now there isn't). |
1076 | | */ |
1077 | 0 | size_t write_queue_size = |
1078 | 0 | uv_stream_get_write_queue_size(&sock->uv_handle.stream); |
1079 | 0 | if (write_queue_size < ISC_NETMGR_TCP_SENDBUF_SIZE) { |
1080 | 0 | isc__nmsocket_log( |
1081 | 0 | sock, ISC_LOG_DEBUG(3), |
1082 | 0 | "resuming TCP connection, the other side " |
1083 | 0 | "is reading the data again (%zu)", |
1084 | 0 | write_queue_size); |
1085 | 0 | isc__nm_start_reading(sock); |
1086 | 0 | sock->reading_throttled = false; |
1087 | 0 | } |
1088 | 0 | } |
1089 | 0 | } |
1090 | | |
1091 | | static void |
1092 | 0 | tcp_send_cb(uv_write_t *req, int status) { |
1093 | 0 | isc__nm_uvreq_t *uvreq = (isc__nm_uvreq_t *)req->data; |
1094 | 0 | isc_nmsocket_t *sock = NULL; |
1095 | |
|
1096 | 0 | REQUIRE(VALID_UVREQ(uvreq)); |
1097 | 0 | REQUIRE(VALID_NMSOCK(uvreq->sock)); |
1098 | |
|
1099 | 0 | sock = uvreq->sock; |
1100 | |
|
1101 | 0 | isc_nm_timer_stop(uvreq->timer); |
1102 | 0 | isc_nm_timer_detach(&uvreq->timer); |
1103 | |
|
1104 | 0 | if (status < 0) { |
1105 | 0 | isc__nm_incstats(sock, STATID_SENDFAIL); |
1106 | 0 | isc__nm_failed_send_cb(sock, uvreq, isc_uverr2result(status), |
1107 | 0 | false); |
1108 | 0 | if (!sock->client && sock->reading) { |
1109 | | /* |
1110 | | * As we are resuming reading, it is not throttled |
1111 | | * anymore (technically). |
1112 | | */ |
1113 | 0 | sock->reading_throttled = false; |
1114 | 0 | isc__nm_start_reading(sock); |
1115 | 0 | isc__nmsocket_reset(sock); |
1116 | 0 | } |
1117 | 0 | return; |
1118 | 0 | } |
1119 | | |
1120 | 0 | isc__nm_sendcb(sock, uvreq, ISC_R_SUCCESS, false); |
1121 | 0 | tcp_maybe_restart_reading(sock); |
1122 | 0 | } |
1123 | | |
1124 | | static isc_result_t |
1125 | 0 | tcp_send_direct(isc_nmsocket_t *sock, isc__nm_uvreq_t *req) { |
1126 | 0 | REQUIRE(VALID_NMSOCK(sock)); |
1127 | 0 | REQUIRE(VALID_UVREQ(req)); |
1128 | 0 | REQUIRE(sock->tid == isc_tid()); |
1129 | 0 | REQUIRE(sock->type == isc_nm_tcpsocket); |
1130 | |
|
1131 | 0 | int r; |
1132 | 0 | uv_buf_t bufs[2] = { { 0 }, { 0 } }; /* ugly, but required for old GCC |
1133 | | versions */ |
1134 | 0 | size_t nbufs = 1; |
1135 | |
|
1136 | 0 | if (isc__nmsocket_closing(sock)) { |
1137 | 0 | return ISC_R_CANCELED; |
1138 | 0 | } |
1139 | | |
1140 | | /* Check if we are not trying to send a DNS message */ |
1141 | 0 | if (*(uint16_t *)req->tcplen == 0) { |
1142 | 0 | bufs[0].base = req->uvbuf.base; |
1143 | 0 | bufs[0].len = req->uvbuf.len; |
1144 | |
|
1145 | 0 | r = uv_try_write(&sock->uv_handle.stream, bufs, nbufs); |
1146 | |
|
1147 | 0 | if (r == (int)(bufs[0].len)) { |
1148 | | /* Wrote everything */ |
1149 | 0 | isc__nm_sendcb(sock, req, ISC_R_SUCCESS, true); |
1150 | 0 | tcp_maybe_restart_reading(sock); |
1151 | 0 | return ISC_R_SUCCESS; |
1152 | 0 | } else if (r > 0) { |
1153 | 0 | bufs[0].base += (size_t)r; |
1154 | 0 | bufs[0].len -= (size_t)r; |
1155 | 0 | } else if (!(r == UV_ENOSYS || r == UV_EAGAIN)) { |
1156 | 0 | return isc_uverr2result(r); |
1157 | 0 | } |
1158 | 0 | } else { |
1159 | 0 | nbufs = 2; |
1160 | 0 | bufs[0].base = req->tcplen; |
1161 | 0 | bufs[0].len = 2; |
1162 | 0 | bufs[1].base = req->uvbuf.base; |
1163 | 0 | bufs[1].len = req->uvbuf.len; |
1164 | |
|
1165 | 0 | r = uv_try_write(&sock->uv_handle.stream, bufs, nbufs); |
1166 | |
|
1167 | 0 | if (r == (int)(bufs[0].len + bufs[1].len)) { |
1168 | | /* Wrote everything */ |
1169 | 0 | isc__nm_sendcb(sock, req, ISC_R_SUCCESS, true); |
1170 | 0 | tcp_maybe_restart_reading(sock); |
1171 | 0 | return ISC_R_SUCCESS; |
1172 | 0 | } else if (r == 1) { |
1173 | | /* Partial write of DNSMSG length */ |
1174 | 0 | bufs[0].base = req->tcplen + 1; |
1175 | 0 | bufs[0].len = 1; |
1176 | 0 | } else if (r > 0) { |
1177 | | /* Partial write of DNSMSG */ |
1178 | 0 | nbufs = 1; |
1179 | 0 | bufs[0].base = req->uvbuf.base + (r - 2); |
1180 | 0 | bufs[0].len = req->uvbuf.len - (r - 2); |
1181 | 0 | } else if (!(r == UV_ENOSYS || r == UV_EAGAIN)) { |
1182 | 0 | return isc_uverr2result(r); |
1183 | 0 | } |
1184 | 0 | } |
1185 | | |
1186 | 0 | if (!sock->client && sock->reading) { |
1187 | 0 | sock->reading_throttled = true; |
1188 | 0 | isc__nm_stop_reading(sock); |
1189 | 0 | } |
1190 | |
|
1191 | 0 | isc__nmsocket_log(sock, ISC_LOG_DEBUG(3), |
1192 | 0 | "%sthe other side is not " |
1193 | 0 | "reading the data, switching to uv_write()", |
1194 | 0 | !sock->client && sock->reading |
1195 | 0 | ? "throttling TCP connection, " |
1196 | 0 | : ""); |
1197 | |
|
1198 | 0 | r = uv_write(&req->uv_req.write, &sock->uv_handle.stream, bufs, nbufs, |
1199 | 0 | tcp_send_cb); |
1200 | 0 | if (r < 0) { |
1201 | 0 | return isc_uverr2result(r); |
1202 | 0 | } |
1203 | | |
1204 | 0 | isc_nm_timer_create(req->handle, isc__nmsocket_writetimeout_cb, req, |
1205 | 0 | &req->timer); |
1206 | 0 | if (sock->write_timeout > 0) { |
1207 | 0 | isc_nm_timer_start(req->timer, sock->write_timeout); |
1208 | 0 | } |
1209 | |
|
1210 | 0 | return ISC_R_SUCCESS; |
1211 | 0 | } |
1212 | | |
1213 | | static void |
1214 | 0 | tcp_close_sock(isc_nmsocket_t *sock) { |
1215 | 0 | REQUIRE(VALID_NMSOCK(sock)); |
1216 | 0 | REQUIRE(sock->tid == isc_tid()); |
1217 | 0 | REQUIRE(sock->closing); |
1218 | 0 | REQUIRE(!sock->closed); |
1219 | |
|
1220 | 0 | sock->closed = true; |
1221 | 0 | sock->connected = false; |
1222 | |
|
1223 | 0 | isc__nm_incstats(sock, STATID_CLOSE); |
1224 | |
|
1225 | 0 | if (sock->server != NULL) { |
1226 | 0 | if (sock->server->pquota != NULL) { |
1227 | 0 | isc__nm_decstats(sock, STATID_CLIENTS); |
1228 | 0 | isc_quota_release(sock->server->pquota); |
1229 | 0 | } |
1230 | 0 | isc__nmsocket_detach(&sock->server); |
1231 | 0 | } |
1232 | |
|
1233 | 0 | tcp_dbg_log(sock, ISC_R_SUCCESS, "TCP connection closed"); |
1234 | |
|
1235 | 0 | isc__nmsocket_prep_destroy(sock); |
1236 | 0 | } |
1237 | | |
1238 | | static void |
1239 | 0 | tcp_close_cb(uv_handle_t *handle) { |
1240 | 0 | isc_nmsocket_t *sock = uv_handle_get_data(handle); |
1241 | 0 | uv_handle_set_data(handle, NULL); |
1242 | |
|
1243 | 0 | tcp_close_sock(sock); |
1244 | 0 | } |
1245 | | |
1246 | | void |
1247 | 0 | isc__nm_tcp_close(isc_nmsocket_t *sock) { |
1248 | 0 | REQUIRE(VALID_NMSOCK(sock)); |
1249 | 0 | REQUIRE(sock->type == isc_nm_tcpsocket); |
1250 | 0 | REQUIRE(!isc__nmsocket_active(sock)); |
1251 | 0 | REQUIRE(sock->tid == isc_tid()); |
1252 | 0 | REQUIRE(sock->parent == NULL); |
1253 | 0 | REQUIRE(!sock->closing); |
1254 | |
|
1255 | 0 | sock->closing = true; |
1256 | | |
1257 | | /* |
1258 | | * The order of the close operation is important here, the uv_close() |
1259 | | * gets scheduled in the reverse order, so we need to close the timer |
1260 | | * last, so its gone by the time we destroy the socket |
1261 | | */ |
1262 | |
|
1263 | 0 | if (!uv_is_closing(&sock->uv_handle.handle)) { |
1264 | | /* Normal order of operation */ |
1265 | | |
1266 | | /* 2. close the socket + destroy the socket in callback */ |
1267 | 0 | isc__nmsocket_clearcb(sock); |
1268 | 0 | isc__nm_stop_reading(sock); |
1269 | 0 | sock->reading = false; |
1270 | 0 | uv_close(&sock->uv_handle.handle, tcp_close_cb); |
1271 | | |
1272 | | /* 1. close the timer */ |
1273 | 0 | isc__nmsocket_timer_stop(sock); |
1274 | 0 | uv_close((uv_handle_t *)&sock->read_timer, NULL); |
1275 | 0 | } else { |
1276 | | /* The socket was already closed elsewhere */ |
1277 | | |
1278 | | /* 1. close the timer + destroy the socket in callback */ |
1279 | 0 | isc__nmsocket_timer_stop(sock); |
1280 | 0 | uv_handle_set_data((uv_handle_t *)&sock->read_timer, sock); |
1281 | 0 | uv_close((uv_handle_t *)&sock->read_timer, tcp_close_cb); |
1282 | 0 | } |
1283 | 0 | } |
1284 | | |
1285 | | static void |
1286 | 0 | tcp_close_connect_cb(uv_handle_t *handle) { |
1287 | 0 | isc_nmsocket_t *sock = uv_handle_get_data(handle); |
1288 | |
|
1289 | 0 | REQUIRE(VALID_NMSOCK(sock)); |
1290 | |
|
1291 | 0 | REQUIRE(sock->tid == isc_tid()); |
1292 | |
|
1293 | 0 | isc__nmsocket_prep_destroy(sock); |
1294 | 0 | isc__nmsocket_detach(&sock); |
1295 | 0 | } |
1296 | | |
1297 | | void |
1298 | 0 | isc__nm_tcp_shutdown(isc_nmsocket_t *sock) { |
1299 | 0 | REQUIRE(VALID_NMSOCK(sock)); |
1300 | 0 | REQUIRE(sock->tid == isc_tid()); |
1301 | 0 | REQUIRE(sock->type == isc_nm_tcpsocket); |
1302 | | |
1303 | | /* |
1304 | | * If the socket is active, mark it inactive and |
1305 | | * continue. If it isn't active, stop now. |
1306 | | */ |
1307 | 0 | if (!sock->active) { |
1308 | 0 | return; |
1309 | 0 | } |
1310 | 0 | sock->active = false; |
1311 | |
|
1312 | 0 | INSIST(!sock->accepting); |
1313 | |
|
1314 | 0 | if (sock->connecting) { |
1315 | 0 | isc_nmsocket_t *tsock = NULL; |
1316 | 0 | isc__nmsocket_attach(sock, &tsock); |
1317 | 0 | uv_close(&sock->uv_handle.handle, tcp_close_connect_cb); |
1318 | 0 | return; |
1319 | 0 | } |
1320 | | |
1321 | | /* There's a handle attached to the socket (from accept or connect) */ |
1322 | 0 | if (sock->statichandle) { |
1323 | 0 | isc__nm_failed_read_cb(sock, ISC_R_SHUTTINGDOWN, false); |
1324 | 0 | return; |
1325 | 0 | } |
1326 | | |
1327 | | /* Destroy the non-listening socket */ |
1328 | 0 | if (sock->parent == NULL) { |
1329 | 0 | isc__nmsocket_prep_destroy(sock); |
1330 | 0 | return; |
1331 | 0 | } |
1332 | | |
1333 | | /* Destroy the listening socket if on the same loop */ |
1334 | 0 | if (sock->tid == sock->parent->tid) { |
1335 | 0 | isc__nmsocket_prep_destroy(sock->parent); |
1336 | 0 | } |
1337 | 0 | } |
1338 | | |
1339 | | void |
1340 | 0 | isc__nmhandle_tcp_set_manual_timer(isc_nmhandle_t *handle, const bool manual) { |
1341 | 0 | isc_nmsocket_t *sock; |
1342 | |
|
1343 | 0 | REQUIRE(VALID_NMHANDLE(handle)); |
1344 | 0 | sock = handle->sock; |
1345 | 0 | REQUIRE(VALID_NMSOCK(sock)); |
1346 | 0 | REQUIRE(sock->type == isc_nm_tcpsocket); |
1347 | 0 | REQUIRE(sock->tid == isc_tid()); |
1348 | 0 | REQUIRE(!uv_is_active(&sock->uv_handle.handle)); |
1349 | |
|
1350 | 0 | sock->manual_read_timer = manual; |
1351 | 0 | } |