/src/h2o/lib/common/socket/evloop.c.h
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2014-2016 DeNA Co., Ltd., Kazuho Oku, Fastly, Inc. |
3 | | * |
4 | | * Permission is hereby granted, free of charge, to any person obtaining a copy |
5 | | * of this software and associated documentation files (the "Software"), to |
6 | | * deal in the Software without restriction, including without limitation the |
7 | | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or |
8 | | * sell copies of the Software, and to permit persons to whom the Software is |
9 | | * furnished to do so, subject to the following conditions: |
10 | | * |
11 | | * The above copyright notice and this permission notice shall be included in |
12 | | * all copies or substantial portions of the Software. |
13 | | * |
14 | | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
15 | | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
16 | | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
17 | | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
18 | | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
19 | | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
20 | | * IN THE SOFTWARE. |
21 | | */ |
22 | | #include <netinet/in.h> |
23 | | #include <netinet/tcp.h> |
24 | | #include <stdlib.h> |
25 | | #include <sys/time.h> |
26 | | #include <sys/uio.h> |
27 | | #include <unistd.h> |
28 | | #if H2O_USE_KTLS |
29 | | #include <linux/tls.h> |
30 | | #endif |
31 | | #include "cloexec.h" |
32 | | #include "h2o/linklist.h" |
33 | | |
34 | | #if !defined(H2O_USE_ACCEPT4) |
35 | | #ifdef __linux__ |
36 | | #if defined(__ANDROID__) && __ANDROID_API__ < 21 |
37 | | #define H2O_USE_ACCEPT4 0 |
38 | | #else |
39 | | #define H2O_USE_ACCEPT4 1 |
40 | | #endif |
41 | | #elif __FreeBSD__ >= 10 |
42 | | #define H2O_USE_ACCEPT4 1 |
43 | | #else |
44 | | #define H2O_USE_ACCEPT4 0 |
45 | | #endif |
46 | | #endif |
47 | | |
48 | | struct st_h2o_evloop_socket_t { |
49 | | h2o_socket_t super; |
50 | | int fd; |
51 | | int _flags; |
52 | | h2o_evloop_t *loop; |
53 | | size_t max_read_size; |
54 | | struct st_h2o_evloop_socket_t *_next_pending; |
55 | | struct st_h2o_evloop_socket_t *_next_statechanged; |
56 | | struct { |
57 | | uint64_t prev_loop; |
58 | | uint64_t cur_loop; |
59 | | uint64_t cur_run_count; |
60 | | } bytes_written; |
61 | | /** |
62 | | * vector to be sent (or vec.callbacks is NULL when not used) |
63 | | */ |
64 | | h2o_sendvec_t sendvec; |
65 | | }; |
66 | | |
67 | | static void link_to_pending(struct st_h2o_evloop_socket_t *sock); |
68 | | static void link_to_statechanged(struct st_h2o_evloop_socket_t *sock); |
69 | | static void write_pending(struct st_h2o_evloop_socket_t *sock); |
70 | | static h2o_evloop_t *create_evloop(size_t sz); |
71 | | static void update_now(h2o_evloop_t *loop); |
72 | | static int32_t adjust_max_wait(h2o_evloop_t *loop, int32_t max_wait); |
73 | | |
74 | | /* functions to be defined in the backends */ |
75 | | static int evloop_do_proceed(h2o_evloop_t *loop, int32_t max_wait); |
76 | | static void evloop_do_dispose(h2o_evloop_t *loop); |
77 | | static void evloop_do_on_socket_create(struct st_h2o_evloop_socket_t *sock); |
78 | | static int evloop_do_on_socket_close(struct st_h2o_evloop_socket_t *sock); |
79 | | static void evloop_do_on_socket_export(struct st_h2o_evloop_socket_t *sock); |
80 | | |
81 | | #if H2O_USE_POLL || H2O_USE_EPOLL || H2O_USE_KQUEUE |
82 | | /* explicitly specified */ |
83 | | #else |
84 | | #if defined(__APPLE__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) |
85 | | #define H2O_USE_KQUEUE 1 |
86 | | #elif defined(__linux) |
87 | | #define H2O_USE_EPOLL 1 |
88 | | #if defined(SO_ZEROCOPY) && defined(SO_EE_ORIGIN_ZEROCOPY) && defined(MSG_ZEROCOPY) |
89 | | #define H2O_USE_MSG_ZEROCOPY 1 |
90 | | #endif |
91 | | #else |
92 | | #define H2O_USE_POLL 1 |
93 | | #endif |
94 | | #endif |
95 | | #if !defined(H2O_USE_MSG_ZEROCOPY) |
96 | | #define H2O_USE_MSG_ZEROCOPY 0 |
97 | | #endif |
98 | | |
99 | | #if H2O_USE_POLL |
100 | | #include "evloop/poll.c.h" |
101 | | #elif H2O_USE_EPOLL |
102 | | #include "evloop/epoll.c.h" |
103 | | #elif H2O_USE_KQUEUE |
104 | | #include "evloop/kqueue.c.h" |
105 | | #else |
106 | | #error "poller not specified" |
107 | | #endif |
108 | | |
109 | | size_t h2o_evloop_socket_max_read_size = 1024 * 1024; /* by default, we read up to 1MB at once */ |
110 | | size_t h2o_evloop_socket_max_write_size = 1024 * 1024; /* by default, we write up to 1MB at once */ |
111 | | |
112 | | void link_to_pending(struct st_h2o_evloop_socket_t *sock) |
113 | 47.5k | { |
114 | 47.5k | if (sock->_next_pending == sock) { |
115 | 46.8k | struct st_h2o_evloop_socket_t **slot = (sock->_flags & H2O_SOCKET_FLAG_IS_ACCEPTED_CONNECTION) != 0 |
116 | 46.8k | ? &sock->loop->_pending_as_server |
117 | 46.8k | : &sock->loop->_pending_as_client; |
118 | 46.8k | sock->_next_pending = *slot; |
119 | 46.8k | *slot = sock; |
120 | 46.8k | } |
121 | 47.5k | } |
122 | | |
123 | | void link_to_statechanged(struct st_h2o_evloop_socket_t *sock) |
124 | 55.1k | { |
125 | 55.1k | if (sock->_next_statechanged == sock) { |
126 | 37.5k | sock->_next_statechanged = NULL; |
127 | 37.5k | *sock->loop->_statechanged.tail_ref = sock; |
128 | 37.5k | sock->loop->_statechanged.tail_ref = &sock->_next_statechanged; |
129 | 37.5k | } |
130 | 55.1k | } |
131 | | |
132 | | static const char *on_read_core(int fd, h2o_buffer_t **input, size_t max_bytes) |
133 | 24.6k | { |
134 | 24.6k | ssize_t read_so_far = 0; |
135 | | |
136 | 24.7k | while (1) { |
137 | 24.7k | ssize_t rret; |
138 | 24.7k | h2o_iovec_t buf = h2o_buffer_try_reserve(input, max_bytes < 4096 ? max_bytes : 4096); |
139 | 24.7k | if (buf.base == NULL) { |
140 | | /* memory allocation failed */ |
141 | 0 | return h2o_socket_error_out_of_memory; |
142 | 0 | } |
143 | 24.7k | size_t read_size = buf.len <= INT_MAX / 2 ? buf.len : INT_MAX / 2 + 1; |
144 | 24.7k | if (read_size > max_bytes) |
145 | 0 | read_size = max_bytes; |
146 | 24.7k | while ((rret = read(fd, buf.base, read_size)) == -1 && errno == EINTR) |
147 | 0 | ; |
148 | 24.7k | if (rret == -1) { |
149 | 10 | if (errno == EAGAIN) |
150 | 3 | break; |
151 | 7 | else |
152 | 7 | return h2o_socket_error_io; |
153 | 24.7k | } else if (rret == 0) { |
154 | 6.93k | if (read_so_far == 0) |
155 | 6.93k | return h2o_socket_error_closed; /* TODO notify close */ |
156 | 0 | break; |
157 | 6.93k | } |
158 | 17.7k | (*input)->size += rret; |
159 | 17.7k | if (buf.len != rret) |
160 | 17.6k | break; |
161 | 98 | read_so_far += rret; |
162 | 98 | if (read_so_far >= max_bytes) |
163 | 0 | break; |
164 | 98 | } |
165 | 17.6k | return NULL; |
166 | 24.6k | } |
167 | | |
168 | | static size_t write_vecs(struct st_h2o_evloop_socket_t *sock, h2o_iovec_t **bufs, size_t *bufcnt, int sendmsg_flags) |
169 | 20.3k | { |
170 | 20.3k | ssize_t wret; |
171 | | |
172 | 20.3k | while (*bufcnt != 0) { |
173 | | /* write */ |
174 | 20.0k | int iovcnt = *bufcnt < IOV_MAX ? (int)*bufcnt : IOV_MAX; |
175 | 20.0k | struct msghdr msg; |
176 | 20.0k | do { |
177 | 20.0k | msg = (struct msghdr){.msg_iov = (struct iovec *)*bufs, .msg_iovlen = iovcnt}; |
178 | 20.0k | } while ((wret = sendmsg(sock->fd, &msg, sendmsg_flags)) == -1 && errno == EINTR); |
179 | 20.0k | SOCKET_PROBE(WRITEV, &sock->super, wret); |
180 | 20.0k | H2O_LOG_SOCK(writev, &sock->super, { PTLS_LOG_ELEMENT_SIGNED(ret, wret); }); |
181 | | |
182 | 20.0k | if (wret == -1) |
183 | 419 | return errno == EAGAIN ? 0 : SIZE_MAX; |
184 | | |
185 | | /* adjust the buffer, doing the write once again only if all IOV_MAX buffers being supplied were fully written */ |
186 | 33.5k | while ((*bufs)->len <= wret) { |
187 | 33.5k | wret -= (*bufs)->len; |
188 | 33.5k | ++*bufs; |
189 | 33.5k | --*bufcnt; |
190 | 33.5k | if (*bufcnt == 0) { |
191 | 19.6k | assert(wret == 0); |
192 | 19.6k | return 0; |
193 | 19.6k | } |
194 | 33.5k | } |
195 | 0 | if (wret != 0) { |
196 | 0 | return wret; |
197 | 0 | } else if (iovcnt < IOV_MAX) { |
198 | 0 | return 0; |
199 | 0 | } |
200 | 0 | } |
201 | | |
202 | 328 | return 0; |
203 | 20.3k | } |
204 | | |
205 | | static size_t write_core(struct st_h2o_evloop_socket_t *sock, h2o_iovec_t **bufs, size_t *bufcnt) |
206 | 20.3k | { |
207 | 20.3k | if (sock->super.ssl == NULL || sock->super.ssl->offload == H2O_SOCKET_SSL_OFFLOAD_ON) { |
208 | 20.3k | if (sock->super.ssl != NULL) |
209 | 0 | assert(!has_pending_ssl_bytes(sock->super.ssl)); |
210 | 20.3k | return write_vecs(sock, bufs, bufcnt, 0); |
211 | 20.3k | } |
212 | | |
213 | | /* SSL: flatten given vector if that has not been done yet; `*bufs` is guaranteed to have one slot available at the end; see |
214 | | * `do_write_with_sendvec`, `init_write_buf`. */ |
215 | 0 | if (sock->sendvec.callbacks != NULL) { |
216 | 0 | size_t veclen = flatten_sendvec(&sock->super, &sock->sendvec); |
217 | 0 | if (veclen == SIZE_MAX) |
218 | 0 | return SIZE_MAX; |
219 | 0 | sock->sendvec.callbacks = NULL; |
220 | 0 | (*bufs)[(*bufcnt)++] = h2o_iovec_init(sock->super._write_buf.flattened, veclen); |
221 | 0 | } |
222 | | |
223 | | /* continue encrypting and writing, until we run out of data */ |
224 | 0 | size_t first_buf_written = 0; |
225 | 0 | while (1) { |
226 | | /* write bytes already encrypted, if any */ |
227 | 0 | if (has_pending_ssl_bytes(sock->super.ssl)) { |
228 | 0 | h2o_iovec_t encbuf = h2o_iovec_init(sock->super.ssl->output.buf.base + sock->super.ssl->output.pending_off, |
229 | 0 | sock->super.ssl->output.buf.off - sock->super.ssl->output.pending_off); |
230 | 0 | h2o_iovec_t *encbufs = &encbuf; |
231 | 0 | size_t encbufcnt = 1, enc_written; |
232 | 0 | int sendmsg_flags = 0; |
233 | 0 | #if H2O_USE_MSG_ZEROCOPY |
234 | | /* Use zero copy if amount of data to be written is no less than 4KB, and if the memory can be returned to |
235 | | * `h2o_socket_zerocopy_buffer_allocator`. Latter is a short-cut. It is only under exceptional conditions (e.g., TLS |
236 | | * stack adding a post-handshake message) that we'd see the buffer grow to a size that cannot be returned to the |
237 | | * recycling allocator. |
238 | | * Even though https://www.kernel.org/doc/html/v5.17/networking/msg_zerocopy.html recommends 10KB, 4KB has been chosen |
239 | | * as the threshold, because we are likely to be using the non-temporal aesgcm engine and tx-nocache-copy, in which case |
240 | | * copying sendmsg is going to be more costly than what the kernel documentation assumes. In a synthetic benchmark, |
241 | | * changing from 16KB to 4KB increased the throughput by ~10%. */ |
242 | 0 | if (sock->super.ssl->output.allocated_for_zerocopy && encbuf.len >= 4096 && |
243 | 0 | sock->super.ssl->output.buf.capacity == h2o_socket_zerocopy_buffer_allocator.conf->memsize) |
244 | 0 | sendmsg_flags = MSG_ZEROCOPY; |
245 | 0 | #endif |
246 | 0 | if ((enc_written = write_vecs(sock, &encbufs, &encbufcnt, sendmsg_flags)) == SIZE_MAX) { |
247 | 0 | dispose_ssl_output_buffer(sock->super.ssl); |
248 | 0 | return SIZE_MAX; |
249 | 0 | } |
250 | 0 | if (sendmsg_flags != 0 && (encbufcnt == 0 || enc_written > 0)) { |
251 | 0 | zerocopy_buffers_push(sock->super._zerocopy, sock->super.ssl->output.buf.base); |
252 | 0 | if (!sock->super.ssl->output.zerocopy_owned) { |
253 | 0 | sock->super.ssl->output.zerocopy_owned = 1; |
254 | 0 | ++h2o_socket_num_zerocopy_buffers_inflight; |
255 | 0 | } |
256 | 0 | } |
257 | | /* if write is incomplete, record the advance and bail out */ |
258 | 0 | if (encbufcnt != 0) { |
259 | 0 | sock->super.ssl->output.pending_off += enc_written; |
260 | 0 | break; |
261 | 0 | } |
262 | | /* succeeded in writing all the encrypted data; free the buffer */ |
263 | 0 | dispose_ssl_output_buffer(sock->super.ssl); |
264 | 0 | } |
265 | | /* bail out if complete */ |
266 | 0 | if (*bufcnt == 0 && sock->sendvec.callbacks == NULL) |
267 | 0 | break; |
268 | | /* convert more cleartext to TLS records if possible, or bail out on fatal error */ |
269 | 0 | if ((first_buf_written = generate_tls_records(&sock->super, bufs, bufcnt, first_buf_written)) == SIZE_MAX) |
270 | 0 | break; |
271 | | /* as an optimization, if we have a flattened vector, release memory as soon as they have been encrypted */ |
272 | 0 | if (*bufcnt == 0 && sock->super._write_buf.flattened != NULL) { |
273 | 0 | h2o_mem_free_recycle(&h2o_socket_ssl_buffer_allocator, sock->super._write_buf.flattened); |
274 | 0 | sock->super._write_buf.flattened = NULL; |
275 | 0 | } |
276 | 0 | } |
277 | | |
278 | 0 | return first_buf_written; |
279 | 0 | } |
280 | | |
281 | | /** |
282 | | * Sends contents of sendvec, and returns if operation has been successful, either completely or partially. Upon completion, |
283 | | * `sendvec.vec.callbacks` is reset to NULL. |
284 | | */ |
285 | | static int sendvec_core(struct st_h2o_evloop_socket_t *sock) |
286 | 0 | { |
287 | 0 | size_t bytes_sent; |
288 | |
|
289 | 0 | assert(sock->sendvec.len != 0); |
290 | | |
291 | | /* send, and return an error if failed */ |
292 | 0 | if ((bytes_sent = sock->sendvec.callbacks->send_(&sock->sendvec, sock->fd, sock->sendvec.len)) == SIZE_MAX) |
293 | 0 | return 0; |
294 | | |
295 | | /* update offset, and return if we are not done yet */ |
296 | 0 | if (sock->sendvec.len != 0) |
297 | 0 | return 1; |
298 | | |
299 | | /* operation complete; mark as such */ |
300 | 0 | sock->sendvec.callbacks = NULL; |
301 | 0 | return 1; |
302 | 0 | } |
303 | | |
304 | | void write_pending(struct st_h2o_evloop_socket_t *sock) |
305 | 26 | { |
306 | 26 | assert(sock->super._cb.write != NULL); |
307 | | |
308 | | /* write from buffer, if we have anything */ |
309 | 26 | int ssl_needs_flatten = sock->sendvec.callbacks != NULL && sock->super.ssl != NULL |
310 | | #if H2O_USE_KTLS |
311 | | && sock->super.ssl->offload != H2O_SOCKET_SSL_OFFLOAD_ON |
312 | | #endif |
313 | 26 | ; |
314 | 26 | if (sock->super._write_buf.cnt != 0 || has_pending_ssl_bytes(sock->super.ssl) || ssl_needs_flatten) { |
315 | 0 | size_t first_buf_written; |
316 | 0 | if ((first_buf_written = write_core(sock, &sock->super._write_buf.bufs, &sock->super._write_buf.cnt)) != SIZE_MAX) { |
317 | | /* return if there's still pending data, adjusting buf[0] if necessary */ |
318 | 0 | if (sock->super._write_buf.cnt != 0) { |
319 | 0 | sock->super._write_buf.bufs[0].base += first_buf_written; |
320 | 0 | sock->super._write_buf.bufs[0].len -= first_buf_written; |
321 | 0 | return; |
322 | 0 | } else if (has_pending_ssl_bytes(sock->super.ssl)) { |
323 | 0 | return; |
324 | 0 | } |
325 | 0 | } |
326 | 0 | } |
327 | | |
328 | | /* either completed or failed */ |
329 | 26 | dispose_write_buf(&sock->super); |
330 | | |
331 | | /* send the vector, if we have one and if all buffered writes are complete */ |
332 | 26 | if (sock->sendvec.callbacks != NULL && sock->super._write_buf.cnt == 0 && !has_pending_ssl_bytes(sock->super.ssl)) { |
333 | | /* send, and upon partial send, return without changing state for another round */ |
334 | 0 | if (sendvec_core(sock) && sock->sendvec.callbacks != NULL) |
335 | 0 | return; |
336 | 0 | } |
337 | | |
338 | | /* operation completed or failed, schedule notification */ |
339 | 26 | SOCKET_PROBE(WRITE_COMPLETE, &sock->super, sock->super._write_buf.cnt == 0 && !has_pending_ssl_bytes(sock->super.ssl)); |
340 | 26 | H2O_LOG_SOCK(write_complete, &sock->super, |
341 | 26 | { PTLS_LOG_ELEMENT_BOOL(success, sock->super._write_buf.cnt == 0 && !has_pending_ssl_bytes(sock->super.ssl)); }); |
342 | 0 | sock->bytes_written.cur_loop = sock->super.bytes_written; |
343 | 26 | sock->_flags |= H2O_SOCKET_FLAG_IS_WRITE_NOTIFY; |
344 | 26 | link_to_pending(sock); |
345 | 26 | link_to_statechanged(sock); /* might need to disable the write polling */ |
346 | 26 | } |
347 | | |
348 | | static void read_on_ready(struct st_h2o_evloop_socket_t *sock) |
349 | 24.6k | { |
350 | 24.6k | const char *err = 0; |
351 | 24.6k | size_t prev_size = sock->super.input->size; |
352 | | |
353 | 24.6k | if ((sock->_flags & H2O_SOCKET_FLAG_DONT_READ) != 0) |
354 | 0 | goto Notify; |
355 | | |
356 | 24.6k | if ((err = on_read_core(sock->fd, sock->super.ssl == NULL ? &sock->super.input : &sock->super.ssl->input.encrypted, |
357 | 24.6k | sock->max_read_size)) != NULL) |
358 | 6.94k | goto Notify; |
359 | | |
360 | 17.6k | if (sock->super.ssl != NULL && sock->super.ssl->handshake.cb == NULL) |
361 | 0 | err = decode_ssl_input(&sock->super); |
362 | | |
363 | 24.6k | Notify: |
364 | | /* the application may get notified even if no new data is avaiable. The |
365 | | * behavior is intentional; it is designed as such so that the applications |
366 | | * can update their timeout counters when a partial SSL record arrives. |
367 | | */ |
368 | 24.6k | sock->super.bytes_read += sock->super.input->size - prev_size; |
369 | 24.6k | sock->super._cb.read(&sock->super, err); |
370 | 24.6k | } |
371 | | |
372 | | void do_dispose_socket(h2o_socket_t *_sock) |
373 | 13.2k | { |
374 | 13.2k | struct st_h2o_evloop_socket_t *sock = (struct st_h2o_evloop_socket_t *)_sock; |
375 | | |
376 | 13.2k | dispose_write_buf(&sock->super); |
377 | | |
378 | 13.2k | sock->_flags = H2O_SOCKET_FLAG_IS_DISPOSED | (sock->_flags & H2O_SOCKET_FLAG__EPOLL_IS_REGISTERED); |
379 | | |
380 | | /* Give backends chance to do the necessary cleanup, as well as giving them chance to switch to their own disposal method; e.g., |
381 | | * shutdown(SHUT_RDWR) with delays to reclaim all zero copy buffers. */ |
382 | 13.2k | if (evloop_do_on_socket_close(sock)) |
383 | 0 | return; |
384 | | |
385 | | /* immediate close */ |
386 | 13.2k | if (sock->fd != -1) { |
387 | 13.2k | close(sock->fd); |
388 | 13.2k | sock->fd = -1; |
389 | 13.2k | } |
390 | 13.2k | link_to_statechanged(sock); |
391 | 13.2k | } |
392 | | |
393 | | void report_early_write_error(h2o_socket_t *_sock) |
394 | 419 | { |
395 | 419 | struct st_h2o_evloop_socket_t *sock = (struct st_h2o_evloop_socket_t *)_sock; |
396 | | |
397 | | /* fill in _wreq.bufs with fake data to indicate error */ |
398 | 419 | sock->super._write_buf.bufs = sock->super._write_buf.smallbufs; |
399 | 419 | sock->super._write_buf.cnt = 1; |
400 | 419 | *sock->super._write_buf.bufs = h2o_iovec_init(H2O_STRLIT("deadbeef")); |
401 | 419 | sock->_flags |= H2O_SOCKET_FLAG_IS_WRITE_NOTIFY; |
402 | 419 | link_to_pending(sock); |
403 | 419 | } |
404 | | |
405 | | void do_write(h2o_socket_t *_sock, h2o_iovec_t *bufs, size_t bufcnt) |
406 | 20.3k | { |
407 | 20.3k | struct st_h2o_evloop_socket_t *sock = (struct st_h2o_evloop_socket_t *)_sock; |
408 | 20.3k | size_t first_buf_written; |
409 | | |
410 | | /* Don't write too much; if more than 1MB have been already written in the current invocation of `h2o_evloop_run`, wait until |
411 | | * the event loop notifies us that the socket is writable. */ |
412 | 20.3k | if (sock->bytes_written.cur_run_count != sock->loop->run_count) { |
413 | 18.0k | sock->bytes_written.prev_loop = sock->bytes_written.cur_loop; |
414 | 18.0k | sock->bytes_written.cur_run_count = sock->loop->run_count; |
415 | 18.0k | } else if (sock->bytes_written.cur_loop - sock->bytes_written.prev_loop >= h2o_evloop_socket_max_write_size) { |
416 | 0 | init_write_buf(&sock->super, bufs, bufcnt, 0); |
417 | 0 | goto Schedule_Write; |
418 | 0 | } |
419 | | |
420 | | /* try to write now */ |
421 | 20.3k | if ((first_buf_written = write_core(sock, &bufs, &bufcnt)) == SIZE_MAX) { |
422 | 419 | report_early_write_error(&sock->super); |
423 | 419 | return; |
424 | 419 | } |
425 | 19.9k | if (bufcnt == 0 && !has_pending_ssl_bytes(sock->super.ssl)) { |
426 | | /* write complete, schedule the callback */ |
427 | 19.9k | if (sock->super._write_buf.flattened != NULL) { |
428 | 0 | h2o_mem_free_recycle(&h2o_socket_ssl_buffer_allocator, sock->super._write_buf.flattened); |
429 | 0 | sock->super._write_buf.flattened = NULL; |
430 | 0 | } |
431 | 19.9k | if (sock->sendvec.callbacks != NULL) { |
432 | 0 | if (!sendvec_core(sock)) { |
433 | 0 | report_early_write_error(&sock->super); |
434 | 0 | return; |
435 | 0 | } |
436 | 0 | if (sock->sendvec.callbacks != NULL) |
437 | 0 | goto Schedule_Write; |
438 | 0 | } |
439 | 19.9k | sock->bytes_written.cur_loop = sock->super.bytes_written; |
440 | 19.9k | sock->_flags |= H2O_SOCKET_FLAG_IS_WRITE_NOTIFY; |
441 | 19.9k | link_to_pending(sock); |
442 | 19.9k | return; |
443 | 19.9k | } |
444 | | |
445 | | /* setup the buffer to send pending data */ |
446 | 0 | init_write_buf(&sock->super, bufs, bufcnt, first_buf_written); |
447 | |
|
448 | 0 | Schedule_Write: |
449 | 0 | link_to_statechanged(sock); |
450 | 0 | } |
451 | | |
452 | | static int can_tls_offload(h2o_socket_t *sock) |
453 | 0 | { |
454 | | #if H2O_USE_KTLS |
455 | | if (sock->ssl->offload != H2O_SOCKET_SSL_OFFLOAD_NONE && sock->ssl->ptls != NULL) { |
456 | | ptls_cipher_suite_t *cipher = ptls_get_cipher(sock->ssl->ptls); |
457 | | switch (cipher->id) { |
458 | | case PTLS_CIPHER_SUITE_AES_128_GCM_SHA256: |
459 | | case PTLS_CIPHER_SUITE_AES_256_GCM_SHA384: |
460 | | return 1; |
461 | | default: |
462 | | break; |
463 | | } |
464 | | } |
465 | | #endif |
466 | |
|
467 | 0 | return 0; |
468 | 0 | } |
469 | | |
470 | | #if H2O_USE_KTLS |
471 | | static void switch_to_ktls(struct st_h2o_evloop_socket_t *sock) |
472 | | { |
473 | | assert(sock->super.ssl->offload == H2O_SOCKET_SSL_OFFLOAD_TBD); |
474 | | |
475 | | /* Postpone the decision, when we are still in the early stages of the connection, as we want to use userspace TLS for |
476 | | * generating small TLS records. TODO: integrate with TLS record size calculation logic. */ |
477 | | if (sock->super.bytes_written < 65536) |
478 | | return; |
479 | | |
480 | | /* load the key to the kernel */ |
481 | | struct { |
482 | | uint8_t key[PTLS_MAX_SECRET_SIZE]; |
483 | | uint8_t iv[PTLS_MAX_DIGEST_SIZE]; |
484 | | uint64_t seq; |
485 | | union { |
486 | | struct tls12_crypto_info_aes_gcm_128 aesgcm128; |
487 | | struct tls12_crypto_info_aes_gcm_256 aesgcm256; |
488 | | } tx_params; |
489 | | size_t tx_params_size; |
490 | | } keys; |
491 | | |
492 | | /* at the moment, only TLS/1.3 connections using aes-gcm is supported */ |
493 | | if (sock->super.ssl->ptls == NULL) |
494 | | goto Fail; |
495 | | ptls_cipher_suite_t *cipher = ptls_get_cipher(sock->super.ssl->ptls); |
496 | | switch (cipher->id) { |
497 | | case PTLS_CIPHER_SUITE_AES_128_GCM_SHA256: |
498 | | case PTLS_CIPHER_SUITE_AES_256_GCM_SHA384: |
499 | | break; |
500 | | default: |
501 | | goto Fail; |
502 | | } |
503 | | if (ptls_get_traffic_keys(sock->super.ssl->ptls, 1, keys.key, keys.iv, &keys.seq) != 0) |
504 | | goto Fail; |
505 | | keys.seq = htobe64(keys.seq); /* converted to big endian ASAP */ |
506 | | |
507 | | #define SETUP_TX_PARAMS(target, type) \ |
508 | | do { \ |
509 | | keys.tx_params.target.info.version = TLS_1_3_VERSION; \ |
510 | | keys.tx_params.target.info.cipher_type = type; \ |
511 | | H2O_BUILD_ASSERT(sizeof(keys.tx_params.target.key) == cipher->aead->key_size); \ |
512 | | memcpy(keys.tx_params.target.key, keys.key, cipher->aead->key_size); \ |
513 | | H2O_BUILD_ASSERT(cipher->aead->iv_size == 12); \ |
514 | | H2O_BUILD_ASSERT(sizeof(keys.tx_params.target.salt) == 4); \ |
515 | | memcpy(keys.tx_params.target.salt, keys.iv, 4); \ |
516 | | H2O_BUILD_ASSERT(sizeof(keys.tx_params.target.iv) == 8); \ |
517 | | memcpy(keys.tx_params.target.iv, keys.iv + 4, 8); \ |
518 | | H2O_BUILD_ASSERT(sizeof(keys.tx_params.target.rec_seq) == sizeof(keys.seq)); \ |
519 | | memcpy(keys.tx_params.target.rec_seq, &keys.seq, sizeof(keys.seq)); \ |
520 | | keys.tx_params_size = sizeof(keys.tx_params.target); \ |
521 | | } while (0) |
522 | | switch (cipher->id) { |
523 | | case PTLS_CIPHER_SUITE_AES_128_GCM_SHA256: |
524 | | SETUP_TX_PARAMS(aesgcm128, TLS_CIPHER_AES_GCM_128); |
525 | | break; |
526 | | case PTLS_CIPHER_SUITE_AES_256_GCM_SHA384: |
527 | | SETUP_TX_PARAMS(aesgcm256, TLS_CIPHER_AES_GCM_256); |
528 | | break; |
529 | | default: |
530 | | goto Fail; |
531 | | } |
532 | | #undef SETUP_TX_PARAMS |
533 | | |
534 | | /* set to kernel */ |
535 | | if (setsockopt(sock->fd, SOL_TCP, TCP_ULP, "tls", sizeof("tls")) != 0) |
536 | | goto Fail; |
537 | | if (setsockopt(sock->fd, SOL_TLS, TLS_TX, &keys.tx_params, keys.tx_params_size) != 0) |
538 | | goto Fail; |
539 | | sock->super.ssl->offload = H2O_SOCKET_SSL_OFFLOAD_ON; |
540 | | |
541 | | Exit: |
542 | | ptls_clear_memory(&keys, sizeof(keys)); |
543 | | return; |
544 | | |
545 | | Fail: |
546 | | sock->super.ssl->offload = H2O_SOCKET_SSL_OFFLOAD_NONE; |
547 | | goto Exit; |
548 | | } |
549 | | #endif |
550 | | |
551 | | /** |
552 | | * `bufs` should be an array capable of storing `bufcnt + 1` objects, as we will be flattening `sendvec` at the end of `bufs` before |
553 | | * encryption; see `write_core`. |
554 | | */ |
555 | | static int do_write_with_sendvec(h2o_socket_t *_sock, h2o_iovec_t *bufs, size_t bufcnt, h2o_sendvec_t *sendvec) |
556 | 0 | { |
557 | 0 | struct st_h2o_evloop_socket_t *sock = (struct st_h2o_evloop_socket_t *)_sock; |
558 | |
|
559 | 0 | assert(sendvec->callbacks->read_ != NULL); |
560 | 0 | assert(sock->sendvec.callbacks == NULL); |
561 | | |
562 | | /* If userspace TLS is used, rely on `read_` which is a mandatory callback. Otherwise, rely on `send_` if it is available. */ |
563 | 0 | if (sock->super.ssl != NULL) { |
564 | | #if H2O_USE_KTLS |
565 | | if (sock->super.ssl->offload == H2O_SOCKET_SSL_OFFLOAD_TBD) |
566 | | switch_to_ktls(sock); |
567 | | if (sock->super.ssl->offload == H2O_SOCKET_SSL_OFFLOAD_ON && sendvec->callbacks->send_ == NULL) |
568 | | return 0; |
569 | | #endif |
570 | 0 | } else { |
571 | 0 | if (sendvec->callbacks->send_ == NULL) |
572 | 0 | return 0; |
573 | 0 | } |
574 | | |
575 | | /* handling writes with sendvec, here */ |
576 | 0 | sock->sendvec = *sendvec; |
577 | 0 | do_write(&sock->super, bufs, bufcnt); |
578 | |
|
579 | 0 | return 1; |
580 | 0 | } |
581 | | |
582 | | int h2o_socket_get_fd(h2o_socket_t *_sock) |
583 | 2.88k | { |
584 | 2.88k | struct st_h2o_evloop_socket_t *sock = (struct st_h2o_evloop_socket_t *)_sock; |
585 | 2.88k | return sock->fd; |
586 | 2.88k | } |
587 | | |
588 | | void do_read_start(h2o_socket_t *_sock) |
589 | 27.7k | { |
590 | 27.7k | struct st_h2o_evloop_socket_t *sock = (struct st_h2o_evloop_socket_t *)_sock; |
591 | | |
592 | 27.7k | link_to_statechanged(sock); |
593 | 27.7k | } |
594 | | |
595 | | void do_read_stop(h2o_socket_t *_sock) |
596 | 14.1k | { |
597 | 14.1k | struct st_h2o_evloop_socket_t *sock = (struct st_h2o_evloop_socket_t *)_sock; |
598 | | |
599 | 14.1k | sock->_flags &= ~H2O_SOCKET_FLAG_IS_READ_READY; |
600 | 14.1k | link_to_statechanged(sock); |
601 | 14.1k | } |
602 | | |
603 | | void h2o_socket_dont_read(h2o_socket_t *_sock, int dont_read) |
604 | 0 | { |
605 | 0 | struct st_h2o_evloop_socket_t *sock = (struct st_h2o_evloop_socket_t *)_sock; |
606 | |
|
607 | 0 | if (dont_read) { |
608 | 0 | sock->_flags |= H2O_SOCKET_FLAG_DONT_READ; |
609 | 0 | } else { |
610 | 0 | sock->_flags &= ~H2O_SOCKET_FLAG_DONT_READ; |
611 | 0 | } |
612 | 0 | } |
613 | | |
614 | | int do_export(h2o_socket_t *_sock, h2o_socket_export_t *info) |
615 | 0 | { |
616 | 0 | struct st_h2o_evloop_socket_t *sock = (void *)_sock; |
617 | |
|
618 | 0 | assert((sock->_flags & H2O_SOCKET_FLAG_IS_DISPOSED) == 0); |
619 | 0 | evloop_do_on_socket_export(sock); |
620 | 0 | sock->_flags = H2O_SOCKET_FLAG_IS_DISPOSED | (sock->_flags & H2O_SOCKET_FLAG__EPOLL_IS_REGISTERED); |
621 | |
|
622 | 0 | info->fd = sock->fd; |
623 | 0 | sock->fd = -1; |
624 | |
|
625 | 0 | return 0; |
626 | 0 | } |
627 | | |
628 | | h2o_socket_t *do_import(h2o_loop_t *loop, h2o_socket_export_t *info) |
629 | 0 | { |
630 | 0 | return h2o_evloop_socket_create(loop, info->fd, 0); |
631 | 0 | } |
632 | | |
633 | | h2o_loop_t *h2o_socket_get_loop(h2o_socket_t *_sock) |
634 | 2.88k | { |
635 | 2.88k | struct st_h2o_evloop_socket_t *sock = (void *)_sock; |
636 | 2.88k | return sock->loop; |
637 | 2.88k | } |
638 | | |
639 | | socklen_t get_sockname_uncached(h2o_socket_t *_sock, struct sockaddr *sa) |
640 | 0 | { |
641 | 0 | struct st_h2o_evloop_socket_t *sock = (void *)_sock; |
642 | 0 | socklen_t len = sizeof(struct sockaddr_storage); |
643 | 0 | if (getsockname(sock->fd, sa, &len) != 0) |
644 | 0 | return 0; |
645 | 0 | return len; |
646 | 0 | } |
647 | | |
648 | | socklen_t get_peername_uncached(h2o_socket_t *_sock, struct sockaddr *sa) |
649 | 1.02k | { |
650 | 1.02k | struct st_h2o_evloop_socket_t *sock = (void *)_sock; |
651 | 1.02k | socklen_t len = sizeof(struct sockaddr_storage); |
652 | 1.02k | if (getpeername(sock->fd, sa, &len) != 0) |
653 | 0 | return 0; |
654 | 1.02k | return len; |
655 | 1.02k | } |
656 | | |
657 | | static struct st_h2o_evloop_socket_t *create_socket(h2o_evloop_t *loop, int fd, int flags) |
658 | 13.2k | { |
659 | 13.2k | struct st_h2o_evloop_socket_t *sock; |
660 | | |
661 | 13.2k | sock = h2o_mem_alloc(sizeof(*sock)); |
662 | 13.2k | memset(sock, 0, sizeof(*sock)); |
663 | 13.2k | h2o_buffer_init(&sock->super.input, &h2o_socket_buffer_prototype); |
664 | 13.2k | sock->loop = loop; |
665 | 13.2k | sock->fd = fd; |
666 | 13.2k | sock->_flags = flags; |
667 | 13.2k | sock->max_read_size = h2o_evloop_socket_max_read_size; /* by default, we read up to 1MB at once */ |
668 | 13.2k | sock->_next_pending = sock; |
669 | 13.2k | sock->_next_statechanged = sock; |
670 | | |
671 | 13.2k | evloop_do_on_socket_create(sock); |
672 | | |
673 | 13.2k | return sock; |
674 | 13.2k | } |
675 | | |
676 | | /** |
677 | | * Sets TCP_NODELAY if the given file descriptor is likely to be a TCP socket. The intent of this function is to reduce number of |
678 | | * unnecessary system calls. Therefore, we skip setting TCP_NODELAY when it is certain that the socket is not a TCP socket, |
679 | | * otherwise call setsockopt. |
680 | | */ |
681 | | static void set_nodelay_if_likely_tcp(int fd, struct sockaddr *sa) |
682 | 13.2k | { |
683 | 13.2k | if (sa != NULL && !(sa->sa_family == AF_INET || sa->sa_family == AF_INET6)) |
684 | 2.51k | return; |
685 | | |
686 | 10.6k | int on = 1; |
687 | 10.6k | setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on)); |
688 | 10.6k | } |
689 | | |
690 | | h2o_socket_t *h2o_evloop_socket_create(h2o_evloop_t *loop, int fd, int flags) |
691 | 10.6k | { |
692 | | /* It is the reponsibility of the event loop to modify the properties of a socket for its use (e.g., set O_NONBLOCK). */ |
693 | 10.6k | fcntl(fd, F_SETFL, O_NONBLOCK); |
694 | 10.6k | set_nodelay_if_likely_tcp(fd, NULL); |
695 | | |
696 | 10.6k | return &create_socket(loop, fd, flags)->super; |
697 | 10.6k | } |
698 | | |
699 | | h2o_socket_t *h2o_evloop_socket_accept(h2o_socket_t *_listener) |
700 | 0 | { |
701 | 0 | struct st_h2o_evloop_socket_t *listener = (struct st_h2o_evloop_socket_t *)_listener; |
702 | 0 | int fd; |
703 | 0 | h2o_socket_t *sock; |
704 | 0 | union { |
705 | 0 | struct sockaddr sa; |
706 | 0 | struct sockaddr_in sin4; |
707 | 0 | struct sockaddr_in6 sin6; |
708 | 0 | } peeraddr; |
709 | 0 | socklen_t peeraddrlen = sizeof(peeraddr); |
710 | |
|
711 | 0 | #if H2O_USE_ACCEPT4 |
712 | 0 | if ((fd = accept4(listener->fd, &peeraddr.sa, &peeraddrlen, SOCK_NONBLOCK | SOCK_CLOEXEC)) == -1) |
713 | 0 | return NULL; |
714 | 0 | sock = &create_socket(listener->loop, fd, H2O_SOCKET_FLAG_IS_ACCEPTED_CONNECTION)->super; |
715 | | #else |
716 | | if ((fd = cloexec_accept(listener->fd, &peeraddr.sa, &peeraddrlen)) == -1) |
717 | | return NULL; |
718 | | fcntl(fd, F_SETFL, O_NONBLOCK); |
719 | | sock = &create_socket(listener->loop, fd, H2O_SOCKET_FLAG_IS_ACCEPTED_CONNECTION)->super; |
720 | | #endif |
721 | 0 | if (peeraddrlen <= sizeof(peeraddr)) { |
722 | 0 | h2o_socket_setpeername(sock, &peeraddr.sa, peeraddrlen); |
723 | 0 | } else { |
724 | 0 | peeraddr.sa.sa_family = AF_UNSPEC; |
725 | 0 | } |
726 | | |
727 | | /* note: even on linux, the accepted socket might not inherit TCP_NODELAY from the listening socket; see |
728 | | * https://github.com/h2o/h2o/pull/2542#issuecomment-760700859 */ |
729 | 0 | set_nodelay_if_likely_tcp(fd, &peeraddr.sa); |
730 | |
|
731 | 0 | ptls_log_init_conn_state(&sock->_log_state, ptls_openssl_random_bytes); |
732 | 0 | switch (peeraddr.sa.sa_family) { |
733 | 0 | case AF_INET: /* store as v6-mapped v4 address */ |
734 | 0 | ptls_build_v4_mapped_v6_address(&sock->_log_state.address, &peeraddr.sin4.sin_addr); |
735 | 0 | break; |
736 | 0 | case AF_INET6: |
737 | 0 | sock->_log_state.address = peeraddr.sin6.sin6_addr; |
738 | 0 | break; |
739 | 0 | default: |
740 | 0 | break; |
741 | 0 | } |
742 | | |
743 | 0 | return sock; |
744 | 0 | } |
745 | | |
746 | | h2o_socket_t *h2o_socket_connect(h2o_loop_t *loop, struct sockaddr *addr, socklen_t addrlen, h2o_socket_cb cb, const char **err) |
747 | 2.51k | { |
748 | 2.51k | int fd, connect_ret; |
749 | 2.51k | struct st_h2o_evloop_socket_t *sock; |
750 | | |
751 | 2.51k | if ((fd = cloexec_socket(addr->sa_family, SOCK_STREAM, 0)) == -1) { |
752 | 0 | if (err != NULL) { |
753 | 0 | *err = h2o_socket_error_socket_fail; |
754 | 0 | } |
755 | 0 | return NULL; |
756 | 0 | } |
757 | 2.51k | fcntl(fd, F_SETFL, O_NONBLOCK); |
758 | | |
759 | 2.51k | if (!((connect_ret = connect(fd, addr, addrlen)) == 0 || errno == EINPROGRESS)) { |
760 | 0 | if (err != NULL) |
761 | 0 | *err = h2o_socket_get_error_string(errno, h2o_socket_error_conn_fail); |
762 | 0 | close(fd); |
763 | 0 | return NULL; |
764 | 0 | } |
765 | | |
766 | 2.51k | sock = create_socket(loop, fd, H2O_SOCKET_FLAG_IS_CONNECTING); |
767 | 2.51k | set_nodelay_if_likely_tcp(fd, addr); |
768 | | |
769 | 2.51k | if (connect_ret == 0) { |
770 | | /* connection has been established synchronously; notify the fact without going back to epoll */ |
771 | 2.51k | sock->_flags |= H2O_SOCKET_FLAG_IS_WRITE_NOTIFY | H2O_SOCKET_FLAG_IS_CONNECTING_CONNECTED; |
772 | 2.51k | sock->super._cb.write = cb; |
773 | 2.51k | link_to_pending(sock); |
774 | 2.51k | } else { |
775 | 0 | h2o_socket_notify_write(&sock->super, cb); |
776 | 0 | } |
777 | 2.51k | return &sock->super; |
778 | 2.51k | } |
779 | | |
780 | | void h2o_evloop_socket_set_max_read_size(h2o_socket_t *_sock, size_t max_size) |
781 | 4.40k | { |
782 | 4.40k | struct st_h2o_evloop_socket_t *sock = (void *)_sock; |
783 | 4.40k | sock->max_read_size = max_size; |
784 | 4.40k | } |
785 | | |
786 | | h2o_evloop_t *create_evloop(size_t sz) |
787 | 1 | { |
788 | 1 | h2o_evloop_t *loop = h2o_mem_alloc(sz); |
789 | | |
790 | 1 | memset(loop, 0, sz); |
791 | 1 | loop->_statechanged.tail_ref = &loop->_statechanged.head; |
792 | 1 | update_now(loop); |
793 | | /* 3 levels * 32-slots => 1 second goes into 2nd, becomes O(N) above approx. 31 seconds */ |
794 | 1 | loop->_timeouts = h2o_timerwheel_create(3, loop->_now_millisec); |
795 | | |
796 | 1 | return loop; |
797 | 1 | } |
798 | | |
799 | | void update_now(h2o_evloop_t *loop) |
800 | 100k | { |
801 | 100k | gettimeofday(&loop->_tv_at, NULL); |
802 | 100k | loop->_now_nanosec = ((uint64_t)loop->_tv_at.tv_sec * 1000000 + loop->_tv_at.tv_usec) * 1000; |
803 | 100k | loop->_now_millisec = loop->_now_nanosec / 1000000; |
804 | 100k | } |
805 | | |
806 | | int32_t adjust_max_wait(h2o_evloop_t *loop, int32_t max_wait) |
807 | 37.6k | { |
808 | 37.6k | uint64_t wake_at = h2o_timerwheel_get_wake_at(loop->_timeouts); |
809 | | |
810 | 37.6k | update_now(loop); |
811 | | |
812 | 37.6k | if (wake_at <= loop->_now_millisec) { |
813 | 0 | max_wait = 0; |
814 | 37.6k | } else { |
815 | 37.6k | uint64_t delta = wake_at - loop->_now_millisec; |
816 | 37.6k | if (delta < max_wait) |
817 | 6 | max_wait = (int32_t)delta; |
818 | 37.6k | } |
819 | | |
820 | 37.6k | return max_wait; |
821 | 37.6k | } |
822 | | |
823 | | void h2o_socket_notify_write(h2o_socket_t *_sock, h2o_socket_cb cb) |
824 | 26 | { |
825 | 26 | struct st_h2o_evloop_socket_t *sock = (struct st_h2o_evloop_socket_t *)_sock; |
826 | 26 | assert(sock->super._cb.write == NULL); |
827 | 26 | assert(sock->super._write_buf.cnt == 0); |
828 | 26 | assert(!has_pending_ssl_bytes(sock->super.ssl)); |
829 | | |
830 | 26 | sock->super._cb.write = cb; |
831 | 26 | link_to_statechanged(sock); |
832 | 26 | } |
833 | | |
834 | | static void run_socket(struct st_h2o_evloop_socket_t *sock) |
835 | 46.8k | { |
836 | 46.8k | if ((sock->_flags & H2O_SOCKET_FLAG_IS_DISPOSED) != 0) { |
837 | | /* is freed in updatestates phase */ |
838 | 4.76k | return; |
839 | 4.76k | } |
840 | | |
841 | 42.1k | if ((sock->_flags & H2O_SOCKET_FLAG_IS_READ_READY) != 0) { |
842 | 24.6k | sock->_flags &= ~H2O_SOCKET_FLAG_IS_READ_READY; |
843 | 24.6k | read_on_ready(sock); |
844 | 24.6k | } |
845 | | |
846 | 42.1k | if ((sock->_flags & H2O_SOCKET_FLAG_IS_WRITE_NOTIFY) != 0) { |
847 | 22.8k | const char *err = NULL; |
848 | 22.8k | assert(sock->super._cb.write != NULL); |
849 | 22.8k | sock->_flags &= ~H2O_SOCKET_FLAG_IS_WRITE_NOTIFY; |
850 | 22.8k | if (sock->super._write_buf.cnt != 0 || has_pending_ssl_bytes(sock->super.ssl) || sock->sendvec.callbacks != NULL) { |
851 | | /* error */ |
852 | 416 | err = h2o_socket_error_io; |
853 | 416 | sock->super._write_buf.cnt = 0; |
854 | 416 | if (has_pending_ssl_bytes(sock->super.ssl)) |
855 | 0 | dispose_ssl_output_buffer(sock->super.ssl); |
856 | 416 | sock->sendvec.callbacks = NULL; |
857 | 22.4k | } else if ((sock->_flags & H2O_SOCKET_FLAG_IS_CONNECTING) != 0) { |
858 | | /* completion of connect; determine error if we do not know whether the connection has been successfully estabilshed */ |
859 | 2.49k | if ((sock->_flags & H2O_SOCKET_FLAG_IS_CONNECTING_CONNECTED) == 0) { |
860 | 0 | int so_err = 0; |
861 | 0 | socklen_t l = sizeof(so_err); |
862 | 0 | so_err = 0; |
863 | 0 | if (getsockopt(sock->fd, SOL_SOCKET, SO_ERROR, &so_err, &l) != 0 || so_err != 0) |
864 | 0 | err = h2o_socket_get_error_string(so_err, h2o_socket_error_conn_fail); |
865 | 0 | } |
866 | 2.49k | sock->_flags &= ~(H2O_SOCKET_FLAG_IS_CONNECTING | H2O_SOCKET_FLAG_IS_CONNECTING_CONNECTED); |
867 | 2.49k | } |
868 | 22.8k | on_write_complete(&sock->super, err); |
869 | 22.8k | } |
870 | 42.1k | } |
871 | | |
872 | | static void run_pending(h2o_evloop_t *loop) |
873 | 38.0k | { |
874 | 38.0k | struct st_h2o_evloop_socket_t *sock; |
875 | | |
876 | 78.0k | while (loop->_pending_as_server != NULL || loop->_pending_as_client != NULL) { |
877 | 49.6k | while ((sock = loop->_pending_as_client) != NULL) { |
878 | 9.66k | loop->_pending_as_client = sock->_next_pending; |
879 | 9.66k | sock->_next_pending = sock; |
880 | 9.66k | run_socket(sock); |
881 | 9.66k | } |
882 | 40.0k | if ((sock = loop->_pending_as_server) != NULL) { |
883 | 37.2k | loop->_pending_as_server = sock->_next_pending; |
884 | 37.2k | sock->_next_pending = sock; |
885 | 37.2k | run_socket(sock); |
886 | 37.2k | } |
887 | 40.0k | } |
888 | 38.0k | } |
889 | | |
890 | | void h2o_evloop_destroy(h2o_evloop_t *loop) |
891 | 0 | { |
892 | 0 | struct st_h2o_evloop_socket_t *sock; |
893 | | |
894 | | /* timeouts are governed by the application and MUST be destroyed prior to destroying the loop */ |
895 | 0 | assert(h2o_timerwheel_get_wake_at(loop->_timeouts) == UINT64_MAX); |
896 | | |
897 | | /* dispose all socket */ |
898 | 0 | while ((sock = loop->_pending_as_client) != NULL) { |
899 | 0 | loop->_pending_as_client = sock->_next_pending; |
900 | 0 | sock->_next_pending = sock; |
901 | 0 | h2o_socket_close((h2o_socket_t *)sock); |
902 | 0 | } |
903 | 0 | while ((sock = loop->_pending_as_server) != NULL) { |
904 | 0 | loop->_pending_as_server = sock->_next_pending; |
905 | 0 | sock->_next_pending = sock; |
906 | 0 | h2o_socket_close((h2o_socket_t *)sock); |
907 | 0 | } |
908 | | |
909 | | /* now all socket are disposedand and placed in linked list statechanged |
910 | | * we can freeing memory in cycle by next_statechanged, |
911 | | */ |
912 | 0 | while ((sock = loop->_statechanged.head) != NULL) { |
913 | 0 | loop->_statechanged.head = sock->_next_statechanged; |
914 | 0 | free(sock); |
915 | 0 | } |
916 | | |
917 | | /* dispose backend-specific data */ |
918 | 0 | evloop_do_dispose(loop); |
919 | | |
920 | | /* lastly we need to free loop memory */ |
921 | 0 | h2o_timerwheel_destroy(loop->_timeouts); |
922 | 0 | free(loop); |
923 | 0 | } |
924 | | |
925 | | int h2o_evloop_run(h2o_evloop_t *loop, int32_t max_wait) |
926 | 37.6k | { |
927 | 37.6k | ++loop->run_count; |
928 | | |
929 | | /* Update socket states, poll, set readable flags, perform pending writes. */ |
930 | 37.6k | if (evloop_do_proceed(loop, max_wait) != 0) |
931 | 1 | return -1; |
932 | | |
933 | | /* Run the pending callbacks. */ |
934 | 37.6k | run_pending(loop); |
935 | | |
936 | | /* Run the expired timers at the same time invoking pending callbacks for every timer callback. This is an locality |
937 | | * optimization; handles things like timeout -> write -> on_write_complete for each object. |
938 | | * Expired timers are fetched and run at most 10 times, after which `h2o_evloop_run` returns even if there is a |
939 | | * pending immediate timer. By doing so, we guarantee that the server can make progress by polling the socket, doing |
940 | | * I/O, as well as running other operations coded in the caller of `h2s_evloop_run`, even if there is broken code |
941 | | * that registers an immediate timer perpetually. */ |
942 | 38.0k | for (int i = 0; i < 10; ++i) { |
943 | 38.0k | h2o_linklist_t expired; |
944 | 38.0k | h2o_linklist_init_anchor(&expired); |
945 | 38.0k | h2o_timerwheel_get_expired(loop->_timeouts, loop->_now_millisec, &expired); |
946 | 38.0k | if (h2o_linklist_is_empty(&expired)) |
947 | 37.6k | break; |
948 | 381 | do { |
949 | 381 | h2o_timerwheel_entry_t *timer = H2O_STRUCT_FROM_MEMBER(h2o_timerwheel_entry_t, _link, expired.next); |
950 | 381 | h2o_linklist_unlink(&timer->_link); |
951 | 381 | timer->cb(timer); |
952 | 381 | run_pending(loop); |
953 | 381 | } while (!h2o_linklist_is_empty(&expired)); |
954 | 380 | } |
955 | | |
956 | 37.6k | assert(loop->_pending_as_client == NULL); |
957 | 37.6k | assert(loop->_pending_as_server == NULL); |
958 | | |
959 | 37.6k | if (h2o_sliding_counter_is_running(&loop->exec_time_nanosec_counter)) { |
960 | 25.0k | update_now(loop); |
961 | 25.0k | h2o_sliding_counter_stop(&loop->exec_time_nanosec_counter, loop->_now_nanosec); |
962 | 25.0k | } |
963 | | |
964 | 37.6k | return 0; |
965 | 37.6k | } |