/src/h2o/lib/http3/common.c
Line | Count | Source |
1 | | /* |
2 | | * Copyright (c) 2018 Fastly, Kazuho Oku |
3 | | * |
4 | | * Permission is hereby granted, free of charge, to any person obtaining a copy |
5 | | * of this software and associated documentation files (the "Software"), to |
6 | | * deal in the Software without restriction, including without limitation the |
7 | | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or |
8 | | * sell copies of the Software, and to permit persons to whom the Software is |
9 | | * furnished to do so, subject to the following conditions: |
10 | | * |
11 | | * The above copyright notice and this permission notice shall be included in |
12 | | * all copies or substantial portions of the Software. |
13 | | * |
14 | | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
15 | | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
16 | | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
17 | | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
18 | | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
19 | | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
20 | | * IN THE SOFTWARE. |
21 | | */ |
22 | | #ifdef __APPLE__ |
23 | | #define __APPLE_USE_RFC_3542 /* to use IPV6_PKTINFO */ |
24 | | #endif |
25 | | #include <errno.h> |
26 | | #include <sys/types.h> |
27 | | #include <netinet/in.h> |
28 | | #include <netinet/ip.h> |
29 | | #include <netinet/udp.h> |
30 | | #include <pthread.h> |
31 | | #include <stdio.h> |
32 | | #include <sys/socket.h> |
33 | | #include <unistd.h> |
34 | | #include "picotls/openssl.h" |
35 | | #include "h2o.h" |
36 | | #include "h2o/string_.h" |
37 | | #include "h2o/http3_common.h" |
38 | | #include "h2o/http3_internal.h" |
39 | | #include "h2o/multithread.h" |
40 | | #include "../probes_.h" |
41 | | |
42 | | h2o_quic_conn_t h2o_quic_accept_conn_decryption_failed; |
43 | | h2o_http3_conn_t h2o_http3_accept_conn_closed; |
44 | | |
45 | | struct st_h2o_http3_ingress_unistream_t { |
46 | | /** |
47 | | * back pointer |
48 | | */ |
49 | | quicly_stream_t *quic; |
50 | | /** |
51 | | * |
52 | | */ |
53 | | h2o_buffer_t *recvbuf; |
54 | | /** |
55 | | * A callback that passes unparsed input to be handled. `src` is set to NULL when receiving a reset. |
56 | | */ |
57 | | void (*handle_input)(h2o_http3_conn_t *conn, struct st_h2o_http3_ingress_unistream_t *stream, const uint8_t **src, |
58 | | const uint8_t *src_end, int is_eos); |
59 | | }; |
60 | | |
61 | | const char h2o_http3_err_frame_too_large[] = "HTTP/3 frame is too large"; |
62 | | |
63 | | const ptls_iovec_t h2o_http3_alpn[3] = {{(void *)H2O_STRLIT("h3")}, {(void *)H2O_STRLIT("h3-29")}, {(void *)H2O_STRLIT("h3-27")}}; |
64 | | |
65 | | static void report_sendmsg_errors(h2o_error_reporter_t *reporter, uint64_t total_successes, uint64_t cur_successes) |
66 | 0 | { |
67 | 0 | char errstr[256]; |
68 | 0 | fprintf(stderr, "sendmsg failed %" PRIu64 " time%s, succeeded: %" PRIu64 " time%s, over the last minute: %s\n", |
69 | 0 | reporter->cur_errors, reporter->cur_errors > 1 ? "s" : "", cur_successes, cur_successes > 1 ? "s" : "", |
70 | 0 | h2o_strerror_r((int)reporter->data, errstr, sizeof(errstr))); |
71 | 0 | } |
72 | | |
73 | | static h2o_error_reporter_t track_sendmsg = H2O_ERROR_REPORTER_INITIALIZER(report_sendmsg_errors); |
74 | | |
75 | | #if !H2O_USE_LIBUV |
76 | | h2o_socket_t *h2o_quic_create_client_socket(h2o_loop_t *loop, int family) |
77 | 0 | { |
78 | 0 | int fd, on = 1; |
79 | 0 | quicly_address_t addr = {.sa.sa_family = family}; |
80 | 0 | socklen_t addrlen; |
81 | |
|
82 | 0 | if ((fd = socket(family, SOCK_DGRAM, 0)) == -1) |
83 | 0 | return NULL; |
84 | | |
85 | 0 | switch (family) { |
86 | 0 | case AF_INET: |
87 | 0 | #ifdef IP_RECVTOS |
88 | 0 | setsockopt(fd, IPPROTO_IP, IP_RECVTOS, &on, sizeof(on)); |
89 | 0 | #endif |
90 | 0 | addrlen = sizeof(addr.sin); |
91 | 0 | break; |
92 | 0 | case AF_INET6: |
93 | 0 | #ifdef IPV6_V6ONLY |
94 | 0 | if (setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &on, sizeof(on)) != 0) |
95 | 0 | goto Error; |
96 | 0 | #endif |
97 | 0 | #ifdef IPV6_RECVTCLASS |
98 | 0 | setsockopt(fd, IPPROTO_IPV6, IPV6_RECVTCLASS, &on, sizeof(on)); |
99 | 0 | #endif |
100 | 0 | addrlen = sizeof(addr.sin6); |
101 | 0 | break; |
102 | 0 | default: |
103 | 0 | assert(!"unexpected address family"); |
104 | 0 | goto Error; |
105 | 0 | } |
106 | | |
107 | 0 | if (bind(fd, &addr.sa, addrlen) != 0) |
108 | 0 | goto Error; |
109 | | |
110 | 0 | return h2o_evloop_socket_create(loop, fd, H2O_SOCKET_FLAG_DONT_READ); |
111 | | |
112 | 0 | Error: { |
113 | 0 | int saved_errno = errno; |
114 | 0 | close(fd); |
115 | 0 | errno = saved_errno; |
116 | 0 | } |
117 | 0 | return NULL; |
118 | 0 | } |
119 | | #endif |
120 | | |
121 | | int h2o_quic_send_datagrams(h2o_quic_ctx_t *ctx, quicly_address_t *dest, quicly_address_t *src, struct iovec *datagrams, |
122 | | size_t num_datagrams, uint8_t ecn) |
123 | 0 | { |
124 | 0 | union { |
125 | 0 | struct cmsghdr hdr; |
126 | 0 | char buf[ |
127 | 0 | #ifdef IPV6_PKTINFO |
128 | 0 | CMSG_SPACE(sizeof(struct in6_pktinfo)) |
129 | | #elif defined(IP_PKTINFO) |
130 | | CMSG_SPACE(sizeof(struct in_pktinfo)) |
131 | | #elif defined(IP_SENDSRCADDR) |
132 | | CMSG_SPACE(sizeof(struct in_addr)) |
133 | | #else |
134 | | CMSG_SPACE(1) |
135 | | #endif |
136 | 0 | #ifdef UDP_SEGMENT |
137 | 0 | + CMSG_SPACE(sizeof(uint16_t)) |
138 | 0 | #endif |
139 | | + CMSG_SPACE(sizeof(int)) /* IP_TOS or IPV6_TCLASS */ |
140 | | + CMSG_SPACE(1) /* sentry */ |
141 | 0 | ]; |
142 | 0 | } cmsgbuf = {.buf = {} /* zero-cleared so that CMSG_NXTHDR can be used for locating the *next* cmsghdr */}; |
143 | 0 | struct msghdr mess = { |
144 | 0 | .msg_name = &dest->sa, |
145 | 0 | .msg_namelen = quicly_get_socklen(&dest->sa), |
146 | 0 | .msg_control = cmsgbuf.buf, |
147 | 0 | .msg_controllen = sizeof(cmsgbuf.buf), |
148 | 0 | }; |
149 | 0 | struct cmsghdr *cmsg = CMSG_FIRSTHDR(&mess); |
150 | 0 | h2o_quic_socket_t *sock; |
151 | 0 | int ret; |
152 | |
|
153 | 0 | #define PUSH_CMSG(level, type, value) \ |
154 | 0 | do { \ |
155 | 0 | cmsg->cmsg_level = (level); \ |
156 | 0 | cmsg->cmsg_type = (type); \ |
157 | 0 | cmsg->cmsg_len = CMSG_LEN(sizeof(value)); \ |
158 | 0 | memcpy(CMSG_DATA(cmsg), &value, sizeof(value)); \ |
159 | 0 | cmsg = CMSG_NXTHDR(&mess, cmsg); \ |
160 | 0 | } while (0) |
161 | |
|
162 | 0 | if (ctx->sock.addr.ss_family == dest->sa.sa_family) { |
163 | 0 | sock = &ctx->sock; |
164 | 0 | } else if (ctx->sock_alt_family.sock != NULL && ctx->sock_alt_family.addr.ss_family == dest->sa.sa_family) { |
165 | 0 | sock = &ctx->sock_alt_family; |
166 | 0 | } else { |
167 | 0 | return 0; |
168 | 0 | } |
169 | | |
170 | | /* first CMSG is the source address */ |
171 | 0 | if (src->sa.sa_family != AF_UNSPEC) { |
172 | 0 | switch (src->sa.sa_family) { |
173 | 0 | case AF_INET: { |
174 | 0 | #if defined(IP_PKTINFO) |
175 | 0 | if (*sock->port != src->sin.sin_port) |
176 | 0 | return 0; |
177 | 0 | struct in_pktinfo info = {.ipi_spec_dst = src->sin.sin_addr}; |
178 | 0 | PUSH_CMSG(IPPROTO_IP, IP_PKTINFO, info); |
179 | | #elif defined(IP_SENDSRCADDR) |
180 | | if (*sock->port != src->sin.sin_port) |
181 | | return 0; |
182 | | struct sockaddr_in *fdaddr = (struct sockaddr_in *)&sock->addr; |
183 | | assert(fdaddr->sin_family == AF_INET); |
184 | | if (fdaddr->sin_addr.s_addr == INADDR_ANY) |
185 | | PUSH_CMSG(IPPROTO_IP, IP_SENDSRCADDR, src->sin.sin_addr); |
186 | | #else |
187 | | h2o_fatal("IP_PKTINFO not available"); |
188 | | #endif |
189 | 0 | } break; |
190 | 0 | case AF_INET6: |
191 | 0 | #ifdef IPV6_PKTINFO |
192 | 0 | if (*sock->port != src->sin6.sin6_port) |
193 | 0 | return 0; |
194 | 0 | struct in6_pktinfo info = {.ipi6_addr = src->sin6.sin6_addr}; |
195 | 0 | PUSH_CMSG(IPPROTO_IPV6, IPV6_PKTINFO, info); |
196 | | #else |
197 | | h2o_fatal("IPV6_PKTINFO not available"); |
198 | | #endif |
199 | 0 | break; |
200 | 0 | default: |
201 | 0 | h2o_fatal("unexpected address family"); |
202 | 0 | break; |
203 | 0 | } |
204 | 0 | } |
205 | | |
206 | | /* next CMSG is UDP_SEGMENT size (for GSO); assert that the input follows the expected pattern (see the doc-comment of |
207 | | * `quicly_send`), then set the CMSG and convert `datagrams` into one. */ |
208 | 0 | for (size_t i = 1; i < num_datagrams; ++i) { |
209 | 0 | assert(datagrams[i - 1].iov_base + datagrams[i - 1].iov_len == datagrams[i].iov_base); |
210 | 0 | assert(i == num_datagrams - 1 || datagrams[i].iov_len == datagrams[0].iov_len); |
211 | 0 | } |
212 | 0 | #ifdef UDP_SEGMENT |
213 | 0 | struct iovec gso_iovec; |
214 | 0 | if (num_datagrams > 1 && ctx->use_gso) { |
215 | 0 | uint16_t segsize = (uint16_t)datagrams[0].iov_len; |
216 | 0 | PUSH_CMSG(SOL_UDP, UDP_SEGMENT, segsize); |
217 | 0 | gso_iovec = (struct iovec){ |
218 | 0 | .iov_base = datagrams[0].iov_base, |
219 | 0 | .iov_len = datagrams[num_datagrams - 1].iov_base + datagrams[num_datagrams - 1].iov_len - datagrams[0].iov_base, |
220 | 0 | }; |
221 | 0 | datagrams = &gso_iovec; |
222 | 0 | num_datagrams = 1; |
223 | 0 | } |
224 | 0 | #endif |
225 | |
|
226 | 0 | if (ecn != 0) { |
227 | 0 | int tos = ecn; /* IPV6_TCLASS uses int, and draft-ietf-tsvwg-udp-ecn-05 says IP_TOS assumes int too, on all platforms */ |
228 | 0 | switch (dest->sa.sa_family) { |
229 | 0 | case AF_INET: |
230 | 0 | PUSH_CMSG(IPPROTO_IP, IP_TOS, tos); |
231 | 0 | break; |
232 | 0 | case AF_INET6: |
233 | 0 | #ifdef IPV6_TCLASS |
234 | 0 | PUSH_CMSG(IPPROTO_IPV6, IPV6_TCLASS, tos); |
235 | 0 | #endif |
236 | 0 | break; |
237 | 0 | default: |
238 | 0 | break; |
239 | 0 | } |
240 | 0 | } |
241 | | |
242 | | /* commit CMSG length */ |
243 | 0 | if ((mess.msg_controllen = (socklen_t)((char *)cmsg - (char *)cmsgbuf.buf)) == 0) |
244 | 0 | mess.msg_control = NULL; |
245 | | |
246 | | /* send datagrams */ |
247 | 0 | for (size_t i = 0; i < num_datagrams; ++i) { |
248 | 0 | mess.msg_iov = datagrams + i; |
249 | 0 | mess.msg_iovlen = 1; |
250 | 0 | while ((ret = (int)sendmsg(h2o_socket_get_fd(sock->sock), &mess, 0)) == -1 && errno == EINTR) |
251 | 0 | ; |
252 | 0 | if (ret == -1) |
253 | 0 | goto SendmsgError; |
254 | 0 | } |
255 | | |
256 | 0 | h2o_error_reporter_record_success(&track_sendmsg); |
257 | |
|
258 | 0 | return 1; |
259 | | |
260 | 0 | SendmsgError: |
261 | | /* The UDP stack returns EINVAL (linux) or EADDRNOTAVAIL (darwin, and presumably other BSD) when it was unable to use the |
262 | | * designated source address. We communicate that back to the caller so that the connection can be closed immediately. */ |
263 | 0 | if (src->sa.sa_family != AF_UNSPEC && (errno == EINVAL || errno == EADDRNOTAVAIL)) |
264 | 0 | return 0; |
265 | | |
266 | | /* Temporary failure to send a packet is not a permanent error fo the connection. (TODO do we want do something more |
267 | | * specific?) */ |
268 | | |
269 | | /* Log the number of failed invocations once per minute, if there has been such a failure. */ |
270 | 0 | h2o_error_reporter_record_error(ctx->loop, &track_sendmsg, 60000, errno); |
271 | |
|
272 | 0 | return 1; |
273 | |
|
274 | 0 | #undef PUSH_CMSG |
275 | 0 | } |
276 | | |
277 | | static inline const h2o_http3_conn_callbacks_t *get_callbacks(h2o_http3_conn_t *conn) |
278 | 0 | { |
279 | 0 | return (const h2o_http3_conn_callbacks_t *)conn->super.callbacks; |
280 | 0 | } |
281 | | |
282 | | static void ingress_unistream_on_destroy(quicly_stream_t *qs, quicly_error_t err) |
283 | 0 | { |
284 | 0 | struct st_h2o_http3_ingress_unistream_t *stream = qs->data; |
285 | 0 | h2o_buffer_dispose(&stream->recvbuf); |
286 | 0 | free(stream); |
287 | 0 | } |
288 | | |
289 | | static void ingress_unistream_on_receive(quicly_stream_t *qs, size_t off, const void *input, size_t len) |
290 | 0 | { |
291 | 0 | h2o_http3_conn_t *conn = *quicly_get_data(qs->conn); |
292 | 0 | struct st_h2o_http3_ingress_unistream_t *stream = qs->data; |
293 | | |
294 | | /* save received data */ |
295 | 0 | h2o_http3_update_recvbuf(&stream->recvbuf, off, input, len); |
296 | | |
297 | | /* determine bytes that can be handled */ |
298 | 0 | const uint8_t *src = (const uint8_t *)stream->recvbuf->bytes, |
299 | 0 | *src_end = src + quicly_recvstate_bytes_available(&stream->quic->recvstate); |
300 | 0 | if (src == src_end && !quicly_recvstate_transfer_complete(&stream->quic->recvstate)) |
301 | 0 | return; |
302 | | |
303 | | /* handle the bytes */ |
304 | 0 | stream->handle_input(conn, stream, &src, src_end, quicly_recvstate_transfer_complete(&stream->quic->recvstate)); |
305 | 0 | if (quicly_get_state(conn->super.quic) >= QUICLY_STATE_CLOSING) |
306 | 0 | return; |
307 | | |
308 | | /* remove bytes that have been consumed */ |
309 | 0 | size_t bytes_consumed = src - (const uint8_t *)stream->recvbuf->bytes; |
310 | 0 | if (bytes_consumed != 0) { |
311 | 0 | h2o_buffer_consume(&stream->recvbuf, bytes_consumed); |
312 | 0 | quicly_stream_sync_recvbuf(stream->quic, bytes_consumed); |
313 | 0 | } |
314 | 0 | } |
315 | | |
316 | | static void ingress_unistream_on_receive_reset(quicly_stream_t *qs, quicly_error_t err) |
317 | 0 | { |
318 | 0 | h2o_http3_conn_t *conn = *quicly_get_data(qs->conn); |
319 | 0 | struct st_h2o_http3_ingress_unistream_t *stream = qs->data; |
320 | |
|
321 | 0 | stream->handle_input(conn, stream, NULL, NULL, 1); |
322 | 0 | } |
323 | | |
324 | | static void qpack_encoder_stream_handle_input(h2o_http3_conn_t *conn, struct st_h2o_http3_ingress_unistream_t *stream, |
325 | | const uint8_t **src, const uint8_t *src_end, int is_eos) |
326 | 0 | { |
327 | 0 | if (src == NULL || is_eos) { |
328 | 0 | h2o_quic_close_connection(&conn->super, H2O_HTTP3_ERROR_CLOSED_CRITICAL_STREAM, NULL); |
329 | 0 | return; |
330 | 0 | } |
331 | | |
332 | 0 | int64_t *unblocked_stream_ids; |
333 | 0 | size_t num_unblocked; |
334 | 0 | int ret; |
335 | 0 | const char *err_desc = NULL; |
336 | 0 | if ((ret = h2o_qpack_decoder_handle_input(conn->qpack.dec, &unblocked_stream_ids, &num_unblocked, src, src_end, &err_desc)) != |
337 | 0 | 0) { |
338 | 0 | h2o_quic_close_connection(&conn->super, ret, err_desc); |
339 | 0 | return; |
340 | 0 | } |
341 | | |
342 | | /* TODO handle unblocked streams */ |
343 | 0 | } |
344 | | |
345 | | static void qpack_decoder_stream_handle_input(h2o_http3_conn_t *conn, struct st_h2o_http3_ingress_unistream_t *stream, |
346 | | const uint8_t **src, const uint8_t *src_end, int is_eos) |
347 | 0 | { |
348 | 0 | if (src == NULL || is_eos) { |
349 | 0 | h2o_quic_close_connection(&conn->super, H2O_HTTP3_ERROR_CLOSED_CRITICAL_STREAM, NULL); |
350 | 0 | return; |
351 | 0 | } |
352 | | |
353 | 0 | int ret; |
354 | 0 | const char *err_desc = NULL; |
355 | 0 | if ((ret = h2o_qpack_encoder_handle_input(conn->qpack.enc, src, src_end, &err_desc)) != 0) |
356 | 0 | h2o_quic_close_connection(&conn->super, ret, err_desc); |
357 | 0 | } |
358 | | |
359 | | static void control_stream_handle_input(h2o_http3_conn_t *conn, struct st_h2o_http3_ingress_unistream_t *stream, |
360 | | const uint8_t **src, const uint8_t *src_end, int is_eos) |
361 | 0 | { |
362 | 0 | if (src == NULL || is_eos) { |
363 | 0 | h2o_quic_close_connection(&conn->super, H2O_HTTP3_ERROR_CLOSED_CRITICAL_STREAM, NULL); |
364 | 0 | return; |
365 | 0 | } |
366 | | |
367 | 0 | do { |
368 | 0 | h2o_http3_read_frame_t frame; |
369 | 0 | quicly_error_t ret; |
370 | 0 | const char *err_desc = NULL; |
371 | |
|
372 | 0 | if ((ret = h2o_http3_read_frame(&frame, quicly_is_client(conn->super.quic), H2O_HTTP3_STREAM_TYPE_CONTROL, |
373 | 0 | conn->max_frame_payload_size, src, src_end, &err_desc)) != 0) { |
374 | 0 | if (ret != H2O_HTTP3_ERROR_INCOMPLETE) |
375 | 0 | h2o_quic_close_connection(&conn->super, ret, err_desc); |
376 | 0 | break; |
377 | 0 | } |
378 | 0 | if (h2o_http3_has_received_settings(conn) == (frame.type == H2O_HTTP3_FRAME_TYPE_SETTINGS) || |
379 | 0 | frame.type == H2O_HTTP3_FRAME_TYPE_DATA) { |
380 | 0 | h2o_quic_close_connection(&conn->super, H2O_HTTP3_ERROR_FRAME_UNEXPECTED, NULL); |
381 | 0 | break; |
382 | 0 | } |
383 | 0 | get_callbacks(conn)->handle_control_stream_frame(conn, frame.type, frame.payload, frame.length); |
384 | 0 | if (quicly_get_state(conn->super.quic) >= QUICLY_STATE_CLOSING) |
385 | 0 | break; |
386 | 0 | } while (*src != src_end); |
387 | 0 | } |
388 | | |
389 | | static void discard_handle_input(h2o_http3_conn_t *conn, struct st_h2o_http3_ingress_unistream_t *stream, const uint8_t **src, |
390 | | const uint8_t *src_end, int is_eos) |
391 | 0 | { |
392 | 0 | if (src == NULL) |
393 | 0 | return; |
394 | 0 | *src = src_end; |
395 | 0 | } |
396 | | |
397 | | static void unknown_type_handle_input(h2o_http3_conn_t *conn, struct st_h2o_http3_ingress_unistream_t *stream, const uint8_t **src, |
398 | | const uint8_t *src_end, int is_eos) |
399 | 0 | { |
400 | 0 | uint64_t type; |
401 | | |
402 | | /* resets are allowed at least until the type is being determined */ |
403 | 0 | if (src == NULL) |
404 | 0 | return; |
405 | | |
406 | | /* read the type, or just return if incomplete */ |
407 | 0 | if ((type = quicly_decodev(src, src_end)) == UINT64_MAX) |
408 | 0 | return; |
409 | | |
410 | 0 | switch (type) { |
411 | 0 | case H2O_HTTP3_STREAM_TYPE_CONTROL: |
412 | 0 | conn->_control_streams.ingress.control = stream; |
413 | 0 | stream->handle_input = control_stream_handle_input; |
414 | 0 | break; |
415 | 0 | case H2O_HTTP3_STREAM_TYPE_QPACK_ENCODER: |
416 | 0 | conn->_control_streams.ingress.qpack_encoder = stream; |
417 | 0 | stream->handle_input = qpack_encoder_stream_handle_input; |
418 | 0 | break; |
419 | 0 | case H2O_HTTP3_STREAM_TYPE_QPACK_DECODER: |
420 | 0 | conn->_control_streams.ingress.qpack_decoder = stream; |
421 | 0 | stream->handle_input = qpack_decoder_stream_handle_input; |
422 | 0 | break; |
423 | 0 | default: |
424 | 0 | quicly_request_stop(stream->quic, H2O_HTTP3_ERROR_STREAM_CREATION); |
425 | 0 | stream->handle_input = discard_handle_input; |
426 | 0 | break; |
427 | 0 | } |
428 | | |
429 | 0 | return stream->handle_input(conn, stream, src, src_end, is_eos); |
430 | 0 | } |
431 | | |
432 | | static void egress_unistream_on_destroy(quicly_stream_t *qs, quicly_error_t err) |
433 | 19.9k | { |
434 | 19.9k | struct st_h2o_http3_egress_unistream_t *stream = qs->data; |
435 | 19.9k | h2o_buffer_dispose(&stream->sendbuf); |
436 | 19.9k | free(stream); |
437 | 19.9k | } |
438 | | |
439 | | static void egress_unistream_on_send_shift(quicly_stream_t *qs, size_t delta) |
440 | 0 | { |
441 | 0 | struct st_h2o_http3_egress_unistream_t *stream = qs->data; |
442 | 0 | h2o_buffer_consume(&stream->sendbuf, delta); |
443 | 0 | } |
444 | | |
445 | | static void egress_unistream_on_send_emit(quicly_stream_t *qs, size_t off, void *dst, size_t *len, int *wrote_all) |
446 | 19.9k | { |
447 | 19.9k | struct st_h2o_http3_egress_unistream_t *stream = qs->data; |
448 | | |
449 | 19.9k | if (*len >= stream->sendbuf->size - off) { |
450 | 19.9k | *len = stream->sendbuf->size - off; |
451 | 19.9k | *wrote_all = 1; |
452 | 19.9k | } else { |
453 | 0 | *wrote_all = 0; |
454 | 0 | } |
455 | 19.9k | memcpy(dst, stream->sendbuf->bytes + off, *len); |
456 | 19.9k | } |
457 | | |
458 | | static void egress_unistream_on_send_stop(quicly_stream_t *qs, quicly_error_t err) |
459 | 0 | { |
460 | 0 | struct st_h2o_http3_conn_t *conn = *quicly_get_data(qs->conn); |
461 | 0 | h2o_quic_close_connection(&conn->super, H2O_HTTP3_ERROR_CLOSED_CRITICAL_STREAM, NULL); |
462 | 0 | } |
463 | | |
464 | | void h2o_http3_on_create_unidirectional_stream(quicly_stream_t *qs) |
465 | 19.9k | { |
466 | 19.9k | if (quicly_stream_is_self_initiated(qs)) { |
467 | | /* create egress unistream */ |
468 | 19.9k | static const quicly_stream_callbacks_t callbacks = {egress_unistream_on_destroy, egress_unistream_on_send_shift, |
469 | 19.9k | egress_unistream_on_send_emit, egress_unistream_on_send_stop}; |
470 | 19.9k | struct st_h2o_http3_egress_unistream_t *stream = h2o_mem_alloc(sizeof(*stream)); |
471 | 19.9k | qs->data = stream; |
472 | 19.9k | qs->callbacks = &callbacks; |
473 | 19.9k | stream->quic = qs; |
474 | 19.9k | h2o_buffer_init(&stream->sendbuf, &h2o_socket_buffer_prototype); |
475 | 19.9k | } else { |
476 | | /* create ingress unistream */ |
477 | 0 | static const quicly_stream_callbacks_t callbacks = { |
478 | 0 | ingress_unistream_on_destroy, NULL, NULL, NULL, ingress_unistream_on_receive, ingress_unistream_on_receive_reset}; |
479 | 0 | struct st_h2o_http3_ingress_unistream_t *stream = h2o_mem_alloc(sizeof(*stream)); |
480 | 0 | qs->data = stream; |
481 | 0 | qs->callbacks = &callbacks; |
482 | 0 | stream->quic = qs; |
483 | 0 | h2o_buffer_init(&stream->recvbuf, &h2o_socket_buffer_prototype); |
484 | 0 | stream->handle_input = unknown_type_handle_input; |
485 | 0 | } |
486 | 19.9k | } |
487 | | |
488 | | static quicly_error_t open_egress_unistream(h2o_http3_conn_t *conn, struct st_h2o_http3_egress_unistream_t **stream, |
489 | | h2o_iovec_t initial_bytes) |
490 | 19.9k | { |
491 | 19.9k | quicly_stream_t *qs; |
492 | 19.9k | quicly_error_t ret; |
493 | | |
494 | 19.9k | if ((ret = quicly_open_stream(conn->super.quic, &qs, 1)) != 0) |
495 | 0 | return ret; |
496 | 19.9k | *stream = qs->data; |
497 | 19.9k | assert((*stream)->quic == qs); |
498 | | |
499 | 19.9k | h2o_buffer_append(&(*stream)->sendbuf, initial_bytes.base, initial_bytes.len); |
500 | 19.9k | return quicly_stream_sync_sendbuf((*stream)->quic, 1); |
501 | 19.9k | } |
502 | | |
503 | | static uint8_t *accept_hashkey_flatten_address(uint8_t *p, quicly_address_t *addr) |
504 | 0 | { |
505 | 0 | switch (addr->sa.sa_family) { |
506 | 0 | case AF_INET: |
507 | 0 | *p++ = 4; |
508 | 0 | memcpy(p, &addr->sin.sin_addr.s_addr, 4); |
509 | 0 | p += 4; |
510 | 0 | memcpy(p, &addr->sin.sin_port, 2); |
511 | 0 | p += 2; |
512 | 0 | break; |
513 | 0 | case AF_INET6: |
514 | 0 | *p++ = 6; |
515 | 0 | memcpy(p, addr->sin6.sin6_addr.s6_addr, 16); |
516 | 0 | p += 16; |
517 | 0 | memcpy(p, &addr->sin.sin_port, 2); |
518 | 0 | p += 2; |
519 | 0 | break; |
520 | 0 | case AF_UNSPEC: |
521 | 0 | *p++ = 0; |
522 | 0 | break; |
523 | 0 | default: |
524 | 0 | h2o_fatal("unknown protocol family"); |
525 | 0 | break; |
526 | 0 | } |
527 | 0 | return p; |
528 | 0 | } |
529 | | |
530 | | static uint64_t calc_accept_hashkey(quicly_address_t *destaddr, quicly_address_t *srcaddr, ptls_iovec_t src_cid) |
531 | 0 | { |
532 | | /* prepare key */ |
533 | 0 | static __thread EVP_CIPHER_CTX *cipher = NULL; |
534 | 0 | if (cipher == NULL) { |
535 | 0 | static uint8_t key[PTLS_AES128_KEY_SIZE]; |
536 | 0 | H2O_MULTITHREAD_ONCE({ ptls_openssl_random_bytes(key, sizeof(key)); }); |
537 | 0 | cipher = EVP_CIPHER_CTX_new(); |
538 | 0 | EVP_EncryptInit_ex(cipher, EVP_aes_128_cbc(), NULL, key, NULL); |
539 | 0 | } |
540 | |
|
541 | 0 | uint8_t buf[(1 + 16 + 2) * 2 + QUICLY_MAX_CID_LEN_V1 + PTLS_AES_BLOCK_SIZE] = {0}; |
542 | 0 | uint8_t *p = buf; |
543 | | |
544 | | /* build plaintext to encrypt */ |
545 | 0 | p = accept_hashkey_flatten_address(p, destaddr); |
546 | 0 | p = accept_hashkey_flatten_address(p, srcaddr); |
547 | 0 | memcpy(p, src_cid.base, src_cid.len); |
548 | 0 | p += src_cid.len; |
549 | 0 | assert(p <= buf + sizeof(buf)); |
550 | 0 | size_t bytes_to_encrypt = ((p - buf) + PTLS_AES_BLOCK_SIZE - 1) / PTLS_AES_BLOCK_SIZE * PTLS_AES_BLOCK_SIZE; |
551 | 0 | assert(bytes_to_encrypt <= sizeof(buf)); |
552 | | |
553 | 0 | { /* encrypt */ |
554 | 0 | EVP_EncryptInit_ex(cipher, NULL, NULL, NULL, NULL); |
555 | 0 | int bytes_encrypted = 0, ret = EVP_EncryptUpdate(cipher, buf, &bytes_encrypted, buf, (int)bytes_to_encrypt); |
556 | 0 | assert(ret); |
557 | 0 | assert(bytes_encrypted == bytes_to_encrypt); |
558 | 0 | } |
559 | | |
560 | | /* use the last `size_t` bytes of the CBC output as the result */ |
561 | 0 | uint64_t result; |
562 | 0 | memcpy(&result, buf + bytes_to_encrypt - sizeof(result), sizeof(result)); |
563 | | /* avoid 0 (used as nonexist) */ |
564 | 0 | if (result == 0) |
565 | 0 | result = 1; |
566 | 0 | return result; |
567 | 0 | } |
568 | | |
569 | | static void drop_from_acceptmap(h2o_quic_ctx_t *ctx, h2o_quic_conn_t *conn) |
570 | 6.64k | { |
571 | 6.64k | if (conn->_accept_hashkey != 0) { |
572 | 0 | khint_t iter; |
573 | 0 | if ((iter = kh_get_h2o_quic_acceptmap(ctx->conns_accepting, conn->_accept_hashkey)) != kh_end(ctx->conns_accepting)) |
574 | 0 | kh_del_h2o_quic_acceptmap(ctx->conns_accepting, iter); |
575 | 0 | conn->_accept_hashkey = 0; |
576 | 0 | } |
577 | 6.64k | } |
578 | | |
579 | | static void send_version_negotiation(h2o_quic_ctx_t *ctx, quicly_address_t *destaddr, ptls_iovec_t dest_cid, |
580 | | quicly_address_t *srcaddr, ptls_iovec_t src_cid, const uint32_t *versions) |
581 | 0 | { |
582 | 0 | uint8_t payload[QUICLY_MIN_CLIENT_INITIAL_SIZE]; |
583 | 0 | size_t payload_size = quicly_send_version_negotiation(ctx->quic, dest_cid, src_cid, versions, payload); |
584 | 0 | assert(payload_size != SIZE_MAX); |
585 | 0 | struct iovec vec = {.iov_base = payload, .iov_len = payload_size}; |
586 | 0 | h2o_quic_send_datagrams(ctx, destaddr, srcaddr, &vec, 1, 0); |
587 | 0 | return; |
588 | 0 | } |
589 | | |
590 | | static void process_packets(h2o_quic_ctx_t *ctx, quicly_address_t *destaddr, quicly_address_t *srcaddr, uint8_t ttl, |
591 | | quicly_decoded_packet_t *packets, size_t num_packets) |
592 | 0 | { |
593 | 0 | h2o_quic_conn_t *conn = NULL; |
594 | 0 | size_t accepted_packet_index = SIZE_MAX; |
595 | |
|
596 | 0 | assert(num_packets != 0); |
597 | | |
598 | 0 | if (ctx->quic_stats != NULL) { |
599 | 0 | ctx->quic_stats->packet_received += num_packets; |
600 | 0 | } |
601 | |
|
602 | | #if H2O_USE_DTRACE |
603 | | if (PTLS_UNLIKELY(H2O_H3_PACKET_RECEIVE_ENABLED())) { |
604 | | for (size_t i = 0; i != num_packets; ++i) |
605 | | H2O_H3_PACKET_RECEIVE(&destaddr->sa, &srcaddr->sa, packets[i].octets.base, packets[i].octets.len); |
606 | | } |
607 | | #endif |
608 | |
|
609 | 0 | if (packets[0].cid.src.len > QUICLY_MAX_CID_LEN_V1) |
610 | 0 | return; |
611 | | |
612 | | /* find the matching connection, by first looking at the CID (all packets as client, or Handshake, 1-RTT packets as server) */ |
613 | 0 | if (packets[0].cid.dest.plaintext.node_id == ctx->next_cid->node_id && |
614 | 0 | packets[0].cid.dest.plaintext.thread_id == ctx->next_cid->thread_id) { |
615 | 0 | khiter_t iter = kh_get_h2o_quic_idmap(ctx->conns_by_id, packets[0].cid.dest.plaintext.master_id); |
616 | 0 | if (iter != kh_end(ctx->conns_by_id)) { |
617 | 0 | conn = kh_val(ctx->conns_by_id, iter); |
618 | | /* drop long header packets with different 4-tuple than the original, or if the incoming packet might be the first- |
619 | | * flight from the client, advance to state lookup using `cons_accepting` */ |
620 | 0 | if (!quicly_is_destination(conn->quic, &destaddr->sa, &srcaddr->sa, packets)) { |
621 | 0 | if (!packets[0].cid.dest.might_be_client_generated) |
622 | 0 | return; |
623 | 0 | conn = NULL; |
624 | 0 | } |
625 | 0 | } else if (!packets[0].cid.dest.might_be_client_generated) { |
626 | | /* send stateless reset when we could not find a matching connection for a 1 RTT packet */ |
627 | 0 | if (packets[0].octets.len >= QUICLY_STATELESS_RESET_PACKET_MIN_LEN) { |
628 | 0 | uint8_t payload[QUICLY_MIN_CLIENT_INITIAL_SIZE]; |
629 | 0 | size_t payload_size = quicly_send_stateless_reset(ctx->quic, packets[0].cid.dest.encrypted.base, payload); |
630 | 0 | if (payload_size != SIZE_MAX) { |
631 | 0 | struct iovec vec = {.iov_base = payload, .iov_len = payload_size}; |
632 | 0 | h2o_quic_send_datagrams(ctx, srcaddr, destaddr, &vec, 1, 0); |
633 | 0 | } |
634 | 0 | } |
635 | 0 | return; |
636 | 0 | } |
637 | 0 | } else if (!packets[0].cid.dest.might_be_client_generated) { |
638 | | /* forward 1-RTT packets belonging to different nodes, threads */ |
639 | 0 | if (ttl == 0) |
640 | 0 | return; |
641 | 0 | uint64_t offending_node_id = packets[0].cid.dest.plaintext.node_id; |
642 | 0 | if (ctx->forward_packets != NULL && ctx->forward_packets(ctx, &offending_node_id, packets[0].cid.dest.plaintext.thread_id, |
643 | 0 | destaddr, srcaddr, ttl, packets, num_packets)) |
644 | 0 | return; |
645 | | /* non-authenticating 1-RTT packets are potentially stateless resets (FIXME handle them, note that we need to use a hashdos- |
646 | | * resistant hash map that also meets constant-time comparison requirements) */ |
647 | 0 | return; |
648 | 0 | } |
649 | | |
650 | 0 | if (conn == NULL) { |
651 | | /* Initial or 0-RTT packet, use 4-tuple to match the thread and the connection */ |
652 | 0 | assert(packets[0].cid.dest.might_be_client_generated); |
653 | 0 | uint64_t accept_hashkey = calc_accept_hashkey(destaddr, srcaddr, packets[0].cid.src); |
654 | 0 | if (ctx->accept_thread_divisor != 0) { |
655 | 0 | uint32_t offending_thread = accept_hashkey % ctx->accept_thread_divisor; |
656 | 0 | if (offending_thread != ctx->next_cid->thread_id) { |
657 | 0 | if (ctx->forward_packets != NULL) |
658 | 0 | ctx->forward_packets(ctx, NULL, offending_thread, destaddr, srcaddr, ttl, packets, num_packets); |
659 | 0 | return; |
660 | 0 | } |
661 | 0 | } |
662 | 0 | khiter_t iter = kh_get_h2o_quic_acceptmap(ctx->conns_accepting, accept_hashkey); |
663 | 0 | if (iter == kh_end(ctx->conns_accepting)) { |
664 | | /* a new connection for this thread (at least on this process); accept or delegate to newer process */ |
665 | 0 | if (ctx->acceptor != NULL) { |
666 | 0 | if (packets[0].version != 0 && !quicly_is_supported_version(packets[0].version)) { |
667 | 0 | send_version_negotiation(ctx, srcaddr, packets[0].cid.src, destaddr, packets[0].cid.dest.encrypted, |
668 | 0 | quicly_supported_versions); |
669 | 0 | return; |
670 | 0 | } |
671 | 0 | } else { |
672 | | /* This is the offending thread but it is not accepting, which means that the process (or the thread) is not acting |
673 | | * as a server (likely gracefully shutting down). Let the application process forward the packet to the next |
674 | | * generation. */ |
675 | 0 | if (ctx->forward_packets != NULL && |
676 | 0 | ctx->forward_packets(ctx, NULL, ctx->next_cid->thread_id, destaddr, srcaddr, ttl, packets, num_packets)) |
677 | 0 | return; |
678 | | /* If not forwarded, send rejection to the peer. A Version Negotiation packet that carries only a greasing version |
679 | | * number is used for the purpose, hoping that that signal will trigger immediate downgrade to HTTP/2, across the |
680 | | * broad spectrum of the client implementations than if CONNECTION_REFUSED is being used. */ |
681 | 0 | if (packets[0].version != 0) { |
682 | 0 | static const uint32_t no_versions[] = {0}; |
683 | 0 | send_version_negotiation(ctx, srcaddr, packets[0].cid.src, destaddr, packets[0].cid.dest.encrypted, |
684 | 0 | no_versions); |
685 | 0 | } |
686 | 0 | return; |
687 | 0 | } |
688 | | /* try to accept any of the Initial packets being received */ |
689 | 0 | size_t i; |
690 | 0 | for (i = 0; i != num_packets; ++i) { |
691 | 0 | if ((packets[i].octets.base[0] & QUICLY_PACKET_TYPE_BITMASK) == QUICLY_PACKET_TYPE_INITIAL && |
692 | 0 | (conn = ctx->acceptor(ctx, destaddr, srcaddr, packets + i)) != NULL) { |
693 | | /* non-null generally means success, except for H2O_QUIC_ACCEPT_CONN_DECRYPTION_FAILED */ |
694 | 0 | if (conn == &h2o_quic_accept_conn_decryption_failed) { |
695 | | /* failed to decrypt Initial packet <=> it could belong to a connection on a different node; forward it to |
696 | | * the destination being claimed by the DCID */ |
697 | 0 | uint64_t offending_node_id = packets[i].cid.dest.plaintext.node_id; |
698 | 0 | uint32_t offending_thread_id = packets[i].cid.dest.plaintext.thread_id; |
699 | 0 | if (ctx->forward_packets != NULL && ttl > 0 && |
700 | 0 | (offending_node_id != ctx->next_cid->node_id || offending_thread_id != ctx->next_cid->thread_id)) |
701 | 0 | ctx->forward_packets(ctx, &offending_node_id, offending_thread_id, destaddr, srcaddr, ttl, packets, |
702 | 0 | num_packets); |
703 | 0 | return; |
704 | 0 | } |
705 | 0 | break; |
706 | 0 | } |
707 | 0 | } |
708 | 0 | if (conn == NULL) |
709 | 0 | return; |
710 | 0 | accepted_packet_index = i; |
711 | 0 | conn->_accept_hashkey = accept_hashkey; |
712 | 0 | int r; |
713 | 0 | iter = kh_put_h2o_quic_acceptmap(conn->ctx->conns_accepting, accept_hashkey, &r); |
714 | 0 | assert(iter != kh_end(conn->ctx->conns_accepting)); |
715 | 0 | kh_val(conn->ctx->conns_accepting, iter) = conn; |
716 | 0 | } else { |
717 | | /* likely have found a connection in `conns_accepting` */ |
718 | 0 | conn = kh_val(ctx->conns_accepting, iter); |
719 | 0 | assert(conn != NULL); |
720 | 0 | assert(!quicly_is_client(conn->quic)); |
721 | 0 | if (!quicly_is_destination(conn->quic, &destaddr->sa, &srcaddr->sa, packets)) { |
722 | 0 | uint64_t offending_node_id = packets[0].cid.dest.plaintext.node_id; |
723 | 0 | uint32_t offending_thread_id = packets[0].cid.dest.plaintext.thread_id; |
724 | 0 | if (offending_node_id != ctx->next_cid->node_id || offending_thread_id != ctx->next_cid->thread_id) { |
725 | | /* accept key matches to a connection being established, but DCID doesn't -- likely a second (or later) Initial |
726 | | * that is supposed to be handled by another node. forward it. */ |
727 | 0 | if (ttl == 0) |
728 | 0 | return; |
729 | 0 | if (ctx->forward_packets != NULL) |
730 | 0 | ctx->forward_packets(ctx, &offending_node_id, offending_thread_id, destaddr, srcaddr, ttl, packets, |
731 | 0 | num_packets); |
732 | 0 | } |
733 | | /* regardless of forwarding outcome, we need to drop this packet as it is not for us */ |
734 | 0 | return; |
735 | 0 | } |
736 | 0 | } |
737 | 0 | } |
738 | | |
739 | | /* receive packets to the found connection */ |
740 | 0 | for (size_t i = 0; i != num_packets; ++i) { |
741 | 0 | if (i != accepted_packet_index) { |
742 | 0 | quicly_error_t ret = quicly_receive(conn->quic, &destaddr->sa, &srcaddr->sa, packets + i); |
743 | 0 | switch (ret) { |
744 | 0 | case QUICLY_ERROR_STATE_EXHAUSTION: |
745 | 0 | case PTLS_ERROR_NO_MEMORY: |
746 | 0 | fprintf(stderr, "%s: `quicly_receive()` returned ret:%" PRId64 "\n", __func__, ret); |
747 | 0 | conn->callbacks->destroy_connection(conn); |
748 | 0 | return; |
749 | 0 | } |
750 | 0 | if (ret != QUICLY_ERROR_PACKET_IGNORED && ret != QUICLY_ERROR_DECRYPTION_FAILED) { |
751 | 0 | if (ctx->quic_stats != NULL) { |
752 | 0 | ++ctx->quic_stats->packet_processed; |
753 | 0 | } |
754 | 0 | } |
755 | 0 | } |
756 | 0 | } |
757 | | |
758 | 0 | h2o_quic_schedule_timer(conn); |
759 | 0 | if (ctx->notify_conn_update != NULL) |
760 | 0 | ctx->notify_conn_update(ctx, conn); |
761 | 0 | } |
762 | | |
763 | | void h2o_quic_read_socket(h2o_quic_ctx_t *ctx, h2o_socket_t *sock) |
764 | 0 | { |
765 | 0 | struct { |
766 | 0 | quicly_address_t destaddr, srcaddr; |
767 | 0 | struct iovec vec; |
768 | 0 | uint8_t ttl; |
769 | 0 | uint8_t ecn; |
770 | 0 | union { |
771 | 0 | struct cmsghdr _align; /* natrually align the contents of controlbuf (which are of type cmsghdr) */ |
772 | 0 | char controlbuf[ |
773 | 0 | #ifdef IPV6_PKTINFO |
774 | 0 | CMSG_SPACE(sizeof(struct in6_pktinfo)) |
775 | | #elif defined(IP_PKTINFO) |
776 | | CMSG_SPACE(sizeof(struct in_pktinfo)) |
777 | | #elif defined(IP_RECVDSTADDR) |
778 | | CMSG_SPACE(sizeof(struct in_addr)) |
779 | | #else |
780 | | CMSG_SPACE(1) |
781 | | #endif |
782 | 0 | #if defined(IPV6_TCLASS) || defined(IP_TOS) || defined(IP_RECVTOS) |
783 | | + CMSG_SPACE(sizeof(int)) /* IPv6 uses int, which is bigger than uint8_t used by IPv4 */ |
784 | 0 | #endif |
785 | 0 | ]; |
786 | 0 | }; |
787 | 0 | uint8_t buf[1600]; |
788 | 0 | } dgrams[10]; |
789 | 0 | #ifdef __linux__ |
790 | 0 | struct mmsghdr mess[PTLS_ELEMENTSOF(dgrams)]; |
791 | | #else |
792 | | struct { |
793 | | struct msghdr msg_hdr; |
794 | | } mess[PTLS_ELEMENTSOF(dgrams)]; |
795 | | #endif |
796 | |
|
797 | 0 | #define INIT_DGRAMS(i) \ |
798 | 0 | do { \ |
799 | 0 | mess[i].msg_hdr = (struct msghdr){ \ |
800 | 0 | .msg_name = &dgrams[i].srcaddr, \ |
801 | 0 | .msg_namelen = sizeof(dgrams[i].srcaddr), \ |
802 | 0 | .msg_iov = &dgrams[i].vec, \ |
803 | 0 | .msg_iovlen = 1, \ |
804 | 0 | .msg_control = &dgrams[i].controlbuf, \ |
805 | 0 | .msg_controllen = sizeof(dgrams[i].controlbuf), \ |
806 | 0 | }; \ |
807 | 0 | memset(&dgrams[i].destaddr, 0, sizeof(dgrams[i].destaddr)); \ |
808 | 0 | dgrams[i].vec.iov_base = dgrams[i].buf; \ |
809 | 0 | dgrams[i].vec.iov_len = sizeof(dgrams[i].buf); \ |
810 | 0 | } while (0) |
811 | | |
812 | | /* If the socket is either ctx->sock or ctx->sock_alt_family, the destination port is that of the socket. Otherwise, the |
813 | | * preprocess_packet callback takes care, therefore the value can be bogus. */ |
814 | 0 | in_port_t dst_port = ctx->sock_alt_family.sock == sock ? *ctx->sock_alt_family.port : *ctx->sock.port; |
815 | |
|
816 | 0 | int fd = h2o_socket_get_fd(sock); |
817 | 0 | size_t dgram_index, num_dgrams; |
818 | | |
819 | | /* Read datagrams. Sender should be provided an ACK every fraction of RTT, otherwise its behavior becomes bursty (assuming that |
820 | | * pacing is not used), rather than packets being spread across entire round-trip. To minimize the chance of us entering such |
821 | | * situation, number of datagrams being read at once is limited to `PTLS_ELEMENTSOF(dgrams)`. In other words, one ack is |
822 | | * generated for no more than every 10 ack-eliciting packets being received, unless the ack-frequency extension is used. */ |
823 | 0 | #ifdef __linux__ |
824 | 0 | { |
825 | 0 | int rret; |
826 | 0 | do { |
827 | 0 | for (dgram_index = 0; dgram_index < PTLS_ELEMENTSOF(dgrams); ++dgram_index) |
828 | 0 | INIT_DGRAMS(dgram_index); |
829 | 0 | } while ((rret = recvmmsg(fd, mess, PTLS_ELEMENTSOF(mess), 0, NULL)) < 0 && errno == EINTR); |
830 | 0 | if (rret <= 0) |
831 | 0 | goto Exit; |
832 | 0 | num_dgrams = (size_t)rret; |
833 | 0 | for (dgram_index = 0; dgram_index < num_dgrams; ++dgram_index) |
834 | 0 | dgrams[dgram_index].vec.iov_len = (size_t)mess[dgram_index].msg_len; |
835 | 0 | } |
836 | | #else |
837 | | for (dgram_index = 0; dgram_index < PTLS_ELEMENTSOF(dgrams); ++dgram_index) { |
838 | | ssize_t rret; |
839 | | do { |
840 | | INIT_DGRAMS(dgram_index); |
841 | | } while ((rret = recvmsg(fd, &mess[dgram_index].msg_hdr, 0)) < 0 && errno == EINTR); |
842 | | if (rret < 0) |
843 | | break; |
844 | | dgrams[dgram_index].vec.iov_len = rret; |
845 | | } |
846 | | num_dgrams = dgram_index; |
847 | | if (num_dgrams == 0) |
848 | | goto Exit; |
849 | | #endif |
850 | | |
851 | | /* normalize and store the obtained data into `dgrams` */ |
852 | 0 | for (dgram_index = 0; dgram_index < num_dgrams; ++dgram_index) { |
853 | 0 | dgrams[dgram_index].ecn = 0; |
854 | 0 | dgrams[dgram_index].destaddr.sa.sa_family = AF_UNSPEC; |
855 | 0 | { /* fetch destination address */ |
856 | 0 | struct cmsghdr *cmsg; |
857 | 0 | for (cmsg = CMSG_FIRSTHDR(&mess[dgram_index].msg_hdr); cmsg != NULL; |
858 | 0 | cmsg = CMSG_NXTHDR(&mess[dgram_index].msg_hdr, cmsg)) { |
859 | 0 | switch (cmsg->cmsg_level) { |
860 | 0 | case IPPROTO_IP: |
861 | 0 | switch (cmsg->cmsg_type) { |
862 | 0 | #ifdef IP_PKTINFO |
863 | 0 | case IP_PKTINFO: |
864 | 0 | dgrams[dgram_index].destaddr.sin.sin_family = AF_INET; |
865 | 0 | memcpy(&dgrams[dgram_index].destaddr.sin.sin_addr, CMSG_DATA(cmsg) + offsetof(struct in_pktinfo, ipi_addr), |
866 | 0 | sizeof(struct in_addr)); |
867 | 0 | dgrams[dgram_index].destaddr.sin.sin_port = dst_port; |
868 | 0 | break; |
869 | 0 | #endif |
870 | | #ifdef IP_RECVDSTADDR |
871 | | case IP_RECVDSTADDR: |
872 | | dgrams[dgram_index].destaddr.sin.sin_family = AF_INET; |
873 | | memcpy(&dgrams[dgram_index].destaddr.sin.sin_addr, CMSG_DATA(cmsg), sizeof(struct in_addr)); |
874 | | dgrams[dgram_index].destaddr.sin.sin_port = dst_port; |
875 | | break; |
876 | | #endif |
877 | 0 | #ifdef IP_RECVTOS |
878 | | #ifdef __APPLE__ |
879 | | case IP_RECVTOS: |
880 | | #else |
881 | 0 | case IP_TOS: |
882 | 0 | #endif |
883 | | /* draft-ietf-tsvwg-udp-ecn-05 recommends using a byte on all platforms */ |
884 | 0 | dgrams[dgram_index].ecn = *(uint8_t *)CMSG_DATA(cmsg) & IPTOS_ECN_MASK; |
885 | 0 | break; |
886 | 0 | #endif |
887 | 0 | default: |
888 | 0 | break; |
889 | 0 | } |
890 | 0 | break; |
891 | 0 | case IPPROTO_IPV6: |
892 | 0 | switch (cmsg->cmsg_type) { |
893 | 0 | #ifdef IPV6_PKTINFO |
894 | 0 | case IPV6_PKTINFO: |
895 | 0 | dgrams[dgram_index].destaddr.sin6.sin6_family = AF_INET6; |
896 | 0 | memcpy(&dgrams[dgram_index].destaddr.sin6.sin6_addr, |
897 | 0 | CMSG_DATA(cmsg) + offsetof(struct in6_pktinfo, ipi6_addr), sizeof(struct in6_addr)); |
898 | 0 | dgrams[dgram_index].destaddr.sin6.sin6_port = dst_port; |
899 | 0 | break; |
900 | 0 | #endif |
901 | 0 | #ifdef IPV6_TCLASS |
902 | 0 | case IPV6_TCLASS: { |
903 | 0 | int optval; |
904 | 0 | memcpy(&optval, CMSG_DATA(cmsg), sizeof(optval)); |
905 | 0 | dgrams[dgram_index].ecn = optval & IPTOS_ECN_MASK; |
906 | 0 | } break; |
907 | 0 | #endif |
908 | 0 | default: |
909 | 0 | break; |
910 | 0 | } |
911 | 0 | break; |
912 | 0 | } |
913 | 0 | } |
914 | 0 | } |
915 | 0 | dgrams[dgram_index].ttl = ctx->default_ttl; |
916 | | /* preprocess (and drop the packet if it failed) */ |
917 | 0 | if (ctx->preprocess_packet != NULL && |
918 | 0 | !ctx->preprocess_packet(ctx, &mess[dgram_index].msg_hdr, &dgrams[dgram_index].destaddr, &dgrams[dgram_index].srcaddr, |
919 | 0 | &dgrams[dgram_index].ttl)) { |
920 | 0 | dgrams[dgram_index].vec.iov_len = 0; /* mark as unused */ |
921 | 0 | } else { |
922 | 0 | assert(dgrams[dgram_index].srcaddr.sa.sa_family == AF_INET || dgrams[dgram_index].srcaddr.sa.sa_family == AF_INET6); |
923 | 0 | } |
924 | 0 | } |
925 | | |
926 | | /* convert dgrams to decoded packets and process them in group of (4-tuple, dcid) */ |
927 | 0 | quicly_decoded_packet_t packets[64]; |
928 | 0 | size_t packet_index = 0; |
929 | 0 | dgram_index = 0; |
930 | 0 | while (dgram_index < num_dgrams) { |
931 | 0 | int has_decoded = 0; /* indicates if a decoded packet belonging to a different connection is stored at |
932 | | * `packets[packet_index]` */ |
933 | | /* skip zero-sized datagrams (or the ones for which preprocessing failed) */ |
934 | 0 | if (dgrams[dgram_index].vec.iov_len == 0) { |
935 | 0 | ++dgram_index; |
936 | 0 | continue; |
937 | 0 | } |
938 | | /* dispatch packets in `packets`, if the datagram at dgram_index is from a different path */ |
939 | 0 | if (packet_index != 0) { |
940 | 0 | assert(dgram_index != 0); |
941 | | /* check source address */ |
942 | 0 | if (h2o_socket_compare_address(&dgrams[dgram_index - 1].srcaddr.sa, &dgrams[dgram_index].srcaddr.sa, 1) != 0) |
943 | 0 | goto ProcessPackets; |
944 | | /* check destination address, if available */ |
945 | 0 | if (dgrams[dgram_index - 1].destaddr.sa.sa_family == AF_UNSPEC && |
946 | 0 | dgrams[dgram_index].destaddr.sa.sa_family == AF_UNSPEC) { |
947 | | /* ok */ |
948 | 0 | } else if (h2o_socket_compare_address(&dgrams[dgram_index - 1].destaddr.sa, &dgrams[dgram_index].destaddr.sa, 1) == 0) { |
949 | | /* ok */ |
950 | 0 | } else { |
951 | 0 | goto ProcessPackets; |
952 | 0 | } |
953 | | /* TTL should be same for dispatched packets */ |
954 | 0 | if (dgrams[dgram_index - 1].ttl != dgrams[dgram_index].ttl) |
955 | 0 | goto ProcessPackets; |
956 | 0 | } |
957 | | /* decode the first packet */ |
958 | 0 | size_t payload_off = 0; |
959 | 0 | if (quicly_decode_packet(ctx->quic, packets + packet_index, dgrams[dgram_index].vec.iov_base, |
960 | 0 | dgrams[dgram_index].vec.iov_len, &payload_off) == SIZE_MAX) { |
961 | 0 | ++dgram_index; |
962 | 0 | goto ProcessPackets; |
963 | 0 | } |
964 | 0 | packets[packet_index].ecn = dgrams[dgram_index].ecn; |
965 | | /* dispatch packets in `packets` if the DCID is different, setting the `has_decoded` flag */ |
966 | 0 | if (packet_index != 0) { |
967 | 0 | const ptls_iovec_t *prev_dcid = &packets[packet_index - 1].cid.dest.encrypted, |
968 | 0 | *cur_dcid = &packets[packet_index].cid.dest.encrypted; |
969 | 0 | if (!(prev_dcid->len == cur_dcid->len && memcmp(prev_dcid->base, cur_dcid->base, prev_dcid->len) == 0)) { |
970 | 0 | has_decoded = 1; |
971 | 0 | ++dgram_index; |
972 | 0 | goto ProcessPackets; |
973 | 0 | } |
974 | 0 | } |
975 | 0 | ++packet_index; |
976 | | /* add rest of the packets */ |
977 | 0 | while (payload_off < dgrams[dgram_index].vec.iov_len && packet_index < PTLS_ELEMENTSOF(packets)) { |
978 | 0 | if (quicly_decode_packet(ctx->quic, packets + packet_index, dgrams[dgram_index].vec.iov_base, |
979 | 0 | dgrams[dgram_index].vec.iov_len, &payload_off) == SIZE_MAX) |
980 | 0 | break; |
981 | 0 | packets[packet_index].ecn = dgrams[dgram_index].ecn; |
982 | 0 | ++packet_index; |
983 | 0 | } |
984 | 0 | ++dgram_index; |
985 | | /* if we have enough room for the next datagram, that is, the expected worst case of 4 packets in a coalesced datagram, |
986 | | * continue */ |
987 | 0 | if (packet_index + 4 < PTLS_ELEMENTSOF(packets)) |
988 | 0 | continue; |
989 | | |
990 | 0 | ProcessPackets: |
991 | 0 | if (packet_index != 0) { |
992 | 0 | process_packets(ctx, &dgrams[dgram_index - 1].destaddr, &dgrams[dgram_index - 1].srcaddr, dgrams[dgram_index - 1].ttl, |
993 | 0 | packets, packet_index); |
994 | 0 | if (has_decoded) { |
995 | 0 | packets[0] = packets[packet_index]; |
996 | 0 | packet_index = 1; |
997 | 0 | } else { |
998 | 0 | packet_index = 0; |
999 | 0 | } |
1000 | 0 | } |
1001 | 0 | } |
1002 | 0 | if (packet_index != 0) |
1003 | 0 | process_packets(ctx, &dgrams[dgram_index - 1].destaddr, &dgrams[dgram_index - 1].srcaddr, dgrams[dgram_index - 1].ttl, |
1004 | 0 | packets, packet_index); |
1005 | |
|
1006 | 0 | Exit:; |
1007 | |
|
1008 | 0 | #undef INIT_DGRAMS |
1009 | 0 | } |
1010 | | |
1011 | | static void on_read(h2o_socket_t *sock, const char *err) |
1012 | 0 | { |
1013 | 0 | h2o_quic_ctx_t *ctx = sock->data; |
1014 | 0 | h2o_quic_read_socket(ctx, sock); |
1015 | 0 | } |
1016 | | |
1017 | | static void setup_quic_socket(h2o_quic_ctx_t *ctx, h2o_quic_socket_t *qsock, h2o_socket_t *sock) |
1018 | 0 | { |
1019 | 0 | *qsock = (h2o_quic_socket_t){.sock = sock}; |
1020 | 0 | h2o_socket_getsockname(qsock->sock, (void *)&qsock->addr); |
1021 | 0 | switch (qsock->addr.ss_family) { |
1022 | 0 | case AF_INET: |
1023 | 0 | qsock->port = &((struct sockaddr_in *)&qsock->addr)->sin_port; |
1024 | 0 | break; |
1025 | 0 | case AF_INET6: |
1026 | 0 | qsock->port = &((struct sockaddr_in6 *)&qsock->addr)->sin6_port; |
1027 | 0 | break; |
1028 | 0 | default: |
1029 | 0 | assert(!"unexpected address family"); |
1030 | 0 | break; |
1031 | 0 | } |
1032 | 0 | qsock->sock->data = ctx; |
1033 | 0 | h2o_socket_read_start(qsock->sock, on_read); |
1034 | 0 | } |
1035 | | |
1036 | | static void on_timeout(h2o_timer_t *timeout) |
1037 | 18.6k | { |
1038 | 18.6k | h2o_quic_conn_t *conn = H2O_STRUCT_FROM_MEMBER(h2o_quic_conn_t, _timeout, timeout); |
1039 | 18.6k | h2o_quic_send(conn); |
1040 | 18.6k | } |
1041 | | |
1042 | | int h2o_http3_read_frame(h2o_http3_read_frame_t *frame, int is_client, uint64_t stream_type, size_t max_frame_payload_size, |
1043 | | const uint8_t **_src, const uint8_t *src_end, const char **err_desc) |
1044 | 34.0k | { |
1045 | 34.0k | const uint8_t *src = *_src; |
1046 | | |
1047 | 34.0k | if ((frame->type = quicly_decodev(&src, src_end)) == UINT64_MAX) |
1048 | 49 | return H2O_HTTP3_ERROR_INCOMPLETE; |
1049 | 33.9k | if ((frame->length = quicly_decodev(&src, src_end)) == UINT64_MAX) |
1050 | 102 | return H2O_HTTP3_ERROR_INCOMPLETE; |
1051 | 33.8k | frame->_header_size = (uint8_t)(src - *_src); |
1052 | | |
1053 | | /* read the content of the frame (unless it's a DATA frame) */ |
1054 | 33.8k | frame->payload = NULL; |
1055 | 33.8k | if (frame->type != H2O_HTTP3_FRAME_TYPE_DATA) { |
1056 | 7.87k | if (frame->length > max_frame_payload_size) { |
1057 | 198 | H2O_PROBE(H3_FRAME_RECEIVE, frame->type, NULL, frame->length); |
1058 | 198 | PTLS_LOG(h2o, h3_frame_receive, { |
1059 | 198 | PTLS_LOG_ELEMENT_UNSIGNED(frame_type, frame->type); |
1060 | 198 | PTLS_LOG_ELEMENT_UNSIGNED(payload_len, frame->length); |
1061 | 198 | }); |
1062 | 198 | *err_desc = h2o_http3_err_frame_too_large; |
1063 | 198 | return H2O_HTTP3_ERROR_GENERAL_PROTOCOL; /* FIXME is this the correct code? */ |
1064 | 198 | } |
1065 | 7.67k | if (src_end - src < frame->length) |
1066 | 51 | return H2O_HTTP3_ERROR_INCOMPLETE; |
1067 | 7.62k | frame->payload = src; |
1068 | 7.62k | src += frame->length; |
1069 | 7.62k | } |
1070 | | |
1071 | 33.6k | H2O_PROBE(H3_FRAME_RECEIVE, frame->type, frame->payload, frame->length); |
1072 | 33.6k | PTLS_LOG(h2o, h3_frame_receive, { |
1073 | 33.6k | PTLS_LOG_ELEMENT_UNSIGNED(frame_type, frame->type); |
1074 | 33.6k | if (frame->payload != NULL) { |
1075 | 33.6k | PTLS_LOG_APPDATA_ELEMENT_HEXDUMP(payload, frame->payload, frame->length); |
1076 | 33.6k | } else { |
1077 | 33.6k | PTLS_LOG_ELEMENT_UNSIGNED(payload_len, frame->length); |
1078 | 33.6k | } |
1079 | 33.6k | }); |
1080 | | |
1081 | | /* validate frame type */ |
1082 | 33.6k | switch (frame->type) { |
1083 | 0 | #define FRAME(id, req_clnt, req_srvr, ctl_clnt, ctl_srvr) \ |
1084 | 32.1k | case H2O_HTTP3_FRAME_TYPE_##id: \ |
1085 | 32.1k | switch (stream_type) { \ |
1086 | 32.1k | case H2O_HTTP3_STREAM_TYPE_REQUEST: \ |
1087 | 32.1k | if (req_clnt && !is_client) \ |
1088 | 32.1k | goto Validation_Success; \ |
1089 | 32.1k | if (req_srvr && is_client) \ |
1090 | 11 | goto Validation_Success; \ |
1091 | 11 | break; \ |
1092 | 11 | case H2O_HTTP3_STREAM_TYPE_CONTROL: \ |
1093 | 0 | if (ctl_clnt && !is_client) \ |
1094 | 0 | goto Validation_Success; \ |
1095 | 0 | if (ctl_srvr && is_client) \ |
1096 | 0 | goto Validation_Success; \ |
1097 | 0 | break; \ |
1098 | 0 | default: \ |
1099 | 0 | h2o_fatal("unexpected stream type"); \ |
1100 | 0 | break; \ |
1101 | 32.1k | } \ |
1102 | 32.1k | break |
1103 | | /* clang-format off */ |
1104 | | /* +-------------------------+-------------+-------------+ |
1105 | | * | | req-stream | ctrl-stream | |
1106 | | * | frame +------+------+------+------+ |
1107 | | * | |client|server|client|server| |
1108 | | * +-------------------------+------+------+------+------+ */ |
1109 | 26.0k | FRAME( DATA , 1 , 1 , 0 , 0 ); |
1110 | 6.12k | FRAME( HEADERS , 1 , 1 , 0 , 0 ); |
1111 | 1 | FRAME( CANCEL_PUSH , 0 , 0 , 1 , 1 ); |
1112 | 3 | FRAME( SETTINGS , 0 , 0 , 1 , 1 ); |
1113 | 3 | FRAME( PUSH_PROMISE , 0 , 1 , 0 , 0 ); |
1114 | 3 | FRAME( GOAWAY , 0 , 0 , 1 , 1 ); |
1115 | 1 | FRAME( MAX_PUSH_ID , 0 , 0 , 1 , 0 ); |
1116 | 1 | FRAME( PRIORITY_UPDATE_REQUEST , 0 , 0 , 1 , 0 ); |
1117 | 1 | FRAME( PRIORITY_UPDATE_PUSH , 0 , 0 , 1 , 0 ); |
1118 | | /* +-------------------------+------+------+------+------+ */ |
1119 | | /* clang-format on */ |
1120 | 1 | #undef FRAME |
1121 | 1.48k | default: |
1122 | | /* ignore extension frames that we do not handle */ |
1123 | 1.48k | goto Validation_Success; |
1124 | 33.6k | } |
1125 | 11 | return H2O_HTTP3_ERROR_FRAME_UNEXPECTED; |
1126 | 33.6k | Validation_Success:; |
1127 | | |
1128 | 33.6k | *_src = src; |
1129 | 33.6k | return 0; |
1130 | 33.6k | } |
1131 | | |
1132 | | void h2o_quic_init_context(h2o_quic_ctx_t *ctx, h2o_loop_t *loop, h2o_socket_t *sock, h2o_socket_t *sock_alt_family, |
1133 | | quicly_context_t *quic, quicly_cid_plaintext_t *next_cid, h2o_quic_accept_cb acceptor, |
1134 | | h2o_quic_notify_connection_update_cb notify_conn_update, uint8_t use_gso, h2o_quic_stats_t *quic_stats) |
1135 | 0 | { |
1136 | 0 | assert(quic->stream_open != NULL); |
1137 | | |
1138 | 0 | *ctx = (h2o_quic_ctx_t){ |
1139 | 0 | .loop = loop, |
1140 | 0 | .quic = quic, |
1141 | 0 | .next_cid = next_cid, |
1142 | 0 | .conns_by_id = kh_init_h2o_quic_idmap(), |
1143 | 0 | .conns_accepting = kh_init_h2o_quic_acceptmap(), |
1144 | 0 | .notify_conn_update = notify_conn_update, |
1145 | 0 | .acceptor = acceptor, |
1146 | 0 | .use_gso = use_gso, |
1147 | 0 | .quic_stats = quic_stats, |
1148 | 0 | }; |
1149 | 0 | setup_quic_socket(ctx, &ctx->sock, sock); |
1150 | 0 | if (sock_alt_family != NULL) { |
1151 | 0 | setup_quic_socket(ctx, &ctx->sock_alt_family, sock_alt_family); |
1152 | 0 | } |
1153 | 0 | } |
1154 | | |
1155 | | void h2o_quic_dispose_context(h2o_quic_ctx_t *ctx) |
1156 | 0 | { |
1157 | 0 | assert(kh_size(ctx->conns_by_id) == 0); |
1158 | 0 | assert(kh_size(ctx->conns_accepting) == 0); |
1159 | | |
1160 | 0 | h2o_socket_close(ctx->sock.sock); |
1161 | 0 | if (ctx->sock_alt_family.sock != NULL) |
1162 | 0 | h2o_socket_close(ctx->sock_alt_family.sock); |
1163 | 0 | kh_destroy_h2o_quic_idmap(ctx->conns_by_id); |
1164 | 0 | kh_destroy_h2o_quic_acceptmap(ctx->conns_accepting); |
1165 | 0 | } |
1166 | | |
1167 | | void h2o_quic_set_forwarding_context(h2o_quic_ctx_t *ctx, uint32_t accept_thread_divisor, uint8_t ttl, |
1168 | | h2o_quic_forward_packets_cb forward_cb, h2o_quic_preprocess_packet_cb preprocess_cb) |
1169 | 0 | { |
1170 | 0 | ctx->accept_thread_divisor = accept_thread_divisor; |
1171 | 0 | ctx->forward_packets = forward_cb; |
1172 | 0 | ctx->default_ttl = ttl; |
1173 | 0 | ctx->preprocess_packet = preprocess_cb; |
1174 | 0 | } |
1175 | | |
1176 | | void h2o_quic_close_connection(h2o_quic_conn_t *conn, quicly_error_t err, const char *reason_phrase) |
1177 | 4.23k | { |
1178 | 4.23k | switch (quicly_get_state(conn->quic)) { |
1179 | 0 | case QUICLY_STATE_FIRSTFLIGHT: /* FIXME why is this separate? */ |
1180 | 0 | conn->callbacks->destroy_connection(conn); |
1181 | 0 | break; |
1182 | 4.23k | case QUICLY_STATE_CONNECTED: |
1183 | 4.23k | quicly_close(conn->quic, err, reason_phrase); |
1184 | 4.23k | h2o_quic_schedule_timer(conn); |
1185 | 4.23k | break; |
1186 | 0 | default: |
1187 | | /* only need to wait for the socket close */ |
1188 | 0 | break; |
1189 | 4.23k | } |
1190 | 4.23k | } |
1191 | | |
1192 | | void h2o_quic_close_all_connections(h2o_quic_ctx_t *ctx) |
1193 | 0 | { |
1194 | 0 | h2o_quic_conn_t *conn; |
1195 | |
|
1196 | 0 | kh_foreach_value(ctx->conns_by_id, conn, { h2o_quic_close_connection(conn, 0, NULL); }); |
1197 | | /* closing a connection should also remove an entry from conns_accepting */ |
1198 | 0 | assert(kh_size(ctx->conns_accepting) == 0); |
1199 | 0 | } |
1200 | | |
1201 | | size_t h2o_quic_num_connections(h2o_quic_ctx_t *ctx) |
1202 | 0 | { |
1203 | | /* throughout its lifetime, a connection is always registered to both conns_by_id and conns_accepting, |
1204 | | thus counting conns_by_id is enough */ |
1205 | 0 | return kh_size(ctx->conns_by_id); |
1206 | 0 | } |
1207 | | |
1208 | | void h2o_quic_init_conn(h2o_quic_conn_t *conn, h2o_quic_ctx_t *ctx, const h2o_quic_conn_callbacks_t *callbacks) |
1209 | 6.64k | { |
1210 | 6.64k | *conn = (h2o_quic_conn_t){ctx, NULL, callbacks}; |
1211 | 6.64k | h2o_timer_init(&conn->_timeout, on_timeout); |
1212 | 6.64k | } |
1213 | | |
1214 | | void h2o_quic_dispose_conn(h2o_quic_conn_t *conn) |
1215 | 6.64k | { |
1216 | 6.64k | if (conn->quic != NULL) { |
1217 | 6.64k | khiter_t iter; |
1218 | | /* unregister from maps */ |
1219 | 6.64k | if ((iter = kh_get_h2o_quic_idmap(conn->ctx->conns_by_id, quicly_get_master_id(conn->quic)->master_id)) != |
1220 | 6.64k | kh_end(conn->ctx->conns_by_id)) |
1221 | 6.64k | kh_del_h2o_quic_idmap(conn->ctx->conns_by_id, iter); |
1222 | 6.64k | drop_from_acceptmap(conn->ctx, conn); |
1223 | 6.64k | quicly_free(conn->quic); |
1224 | 6.64k | } |
1225 | 6.64k | h2o_timer_unlink(&conn->_timeout); |
1226 | 6.64k | } |
1227 | | |
1228 | | void h2o_quic_setup(h2o_quic_conn_t *conn, quicly_conn_t *quic) |
1229 | 6.64k | { |
1230 | | /* Setup relation between `h2o_quic_conn_t` and `quicly_conn_t`. At this point, `conn` will not have `quic` associated, though |
1231 | | * the back pointer might have alreday been set up (see how we call `quicly_accept`). */ |
1232 | 6.64k | assert(conn->quic == NULL); |
1233 | 6.64k | void **backptr = quicly_get_data(quic); |
1234 | 6.64k | if (*backptr == NULL) { |
1235 | 6.64k | *backptr = conn; |
1236 | 6.64k | } else { |
1237 | 0 | assert(*backptr == conn); |
1238 | 0 | } |
1239 | 6.64k | conn->quic = quic; |
1240 | | |
1241 | | /* register to the idmap */ |
1242 | 6.64k | int r; |
1243 | 6.64k | khiter_t iter = kh_put_h2o_quic_idmap(conn->ctx->conns_by_id, quicly_get_master_id(conn->quic)->master_id, &r); |
1244 | 6.64k | assert(iter != kh_end(conn->ctx->conns_by_id)); |
1245 | 6.64k | kh_val(conn->ctx->conns_by_id, iter) = conn; |
1246 | 6.64k | } |
1247 | | |
1248 | | void h2o_http3_init_conn(h2o_http3_conn_t *conn, h2o_quic_ctx_t *ctx, const h2o_http3_conn_callbacks_t *callbacks, |
1249 | | const h2o_http3_qpack_context_t *qpack_ctx, size_t max_frame_payload_size) |
1250 | 6.64k | { |
1251 | 6.64k | h2o_quic_init_conn(&conn->super, ctx, &callbacks->super); |
1252 | 6.64k | memset((char *)conn + sizeof(conn->super), 0, sizeof(*conn) - sizeof(conn->super)); |
1253 | 6.64k | conn->qpack.ctx = qpack_ctx; |
1254 | 6.64k | conn->max_frame_payload_size = max_frame_payload_size; |
1255 | 6.64k | } |
1256 | | |
1257 | | void h2o_http3_dispose_conn(h2o_http3_conn_t *conn) |
1258 | 6.64k | { |
1259 | 6.64k | if (conn->qpack.dec != NULL) |
1260 | 6.64k | h2o_qpack_destroy_decoder(conn->qpack.dec); |
1261 | 6.64k | if (conn->qpack.enc != NULL) |
1262 | 0 | h2o_qpack_destroy_encoder(conn->qpack.enc); |
1263 | 6.64k | h2o_quic_dispose_conn(&conn->super); |
1264 | 6.64k | } |
1265 | | |
1266 | | static size_t build_firstflight(h2o_http3_conn_t *conn, uint8_t *bytebuf, size_t capacity) |
1267 | 6.64k | { |
1268 | 6.64k | ptls_buffer_t buf; |
1269 | 6.64k | int ret = 0; |
1270 | | |
1271 | 6.64k | ptls_buffer_init(&buf, bytebuf, capacity); |
1272 | | |
1273 | | /* push stream type */ |
1274 | 6.64k | ptls_buffer_push_quicint(&buf, H2O_HTTP3_STREAM_TYPE_CONTROL); |
1275 | | |
1276 | | /* push SETTINGS frame */ |
1277 | 6.64k | ptls_buffer_push_quicint(&buf, H2O_HTTP3_FRAME_TYPE_SETTINGS); |
1278 | 6.64k | ptls_buffer_push_block(&buf, -1, { |
1279 | 6.64k | quicly_context_t *qctx = quicly_get_context(conn->super.quic); |
1280 | 6.64k | if (qctx->transport_params.max_datagram_frame_size != 0) { |
1281 | | // advertise that we are prepared to receive both RFC and draft-03 datagram formats |
1282 | 6.64k | ptls_buffer_push_quicint(&buf, H2O_HTTP3_SETTINGS_H3_DATAGRAM); |
1283 | 6.64k | ptls_buffer_push_quicint(&buf, 1); |
1284 | 6.64k | ptls_buffer_push_quicint(&buf, H2O_HTTP3_SETTINGS_H3_DATAGRAM_DRAFT03); |
1285 | 6.64k | ptls_buffer_push_quicint(&buf, 1); |
1286 | 6.64k | }; |
1287 | 6.64k | ptls_buffer_push_quicint(&buf, H2O_HTTP3_SETTINGS_ENABLE_CONNECT_PROTOCOL); |
1288 | 6.64k | ptls_buffer_push_quicint(&buf, 1); |
1289 | 6.64k | }); |
1290 | | |
1291 | 6.64k | assert(!buf.is_allocated); |
1292 | 6.64k | return buf.off; |
1293 | | |
1294 | 0 | Exit: |
1295 | 0 | h2o_fatal("unreachable"); |
1296 | 6.64k | } |
1297 | | |
1298 | | quicly_error_t h2o_http3_setup(h2o_http3_conn_t *conn, quicly_conn_t *quic) |
1299 | 6.64k | { |
1300 | 6.64k | quicly_error_t ret; |
1301 | | |
1302 | 6.64k | h2o_quic_setup(&conn->super, quic); |
1303 | 6.64k | conn->state = H2O_HTTP3_CONN_STATE_OPEN; |
1304 | | |
1305 | | /* setup h3 objects, only when the connection state has been created */ |
1306 | 6.64k | if (quicly_get_state(quic) > QUICLY_STATE_CONNECTED) |
1307 | 0 | goto Exit; |
1308 | | |
1309 | | /* create decoder with the table size set to zero; see SETTINGS sent below. */ |
1310 | 6.64k | conn->qpack.dec = h2o_qpack_create_decoder(0, 100 /* FIXME */); |
1311 | | |
1312 | 6.64k | { /* open control streams, send SETTINGS */ |
1313 | 6.64k | uint8_t firstflight[32]; |
1314 | 6.64k | size_t firstflight_len = build_firstflight(conn, firstflight, sizeof(firstflight)); |
1315 | 6.64k | if ((ret = open_egress_unistream(conn, &conn->_control_streams.egress.control, |
1316 | 6.64k | h2o_iovec_init(firstflight, firstflight_len))) != 0) |
1317 | 0 | return ret; |
1318 | 6.64k | } |
1319 | | |
1320 | 6.64k | { /* open QPACK encoder & decoder streams */ |
1321 | 6.64k | static const uint8_t encoder_first_flight[] = {H2O_HTTP3_STREAM_TYPE_QPACK_ENCODER}; |
1322 | 6.64k | static const uint8_t decoder_first_flight[] = {H2O_HTTP3_STREAM_TYPE_QPACK_DECODER}; |
1323 | 6.64k | if ((ret = open_egress_unistream(conn, &conn->_control_streams.egress.qpack_encoder, |
1324 | 6.64k | h2o_iovec_init(encoder_first_flight, sizeof(encoder_first_flight)))) != 0 || |
1325 | 6.64k | (ret = open_egress_unistream(conn, &conn->_control_streams.egress.qpack_decoder, |
1326 | 6.64k | h2o_iovec_init(decoder_first_flight, sizeof(decoder_first_flight)))) != 0) |
1327 | 0 | return ret; |
1328 | 6.64k | } |
1329 | | |
1330 | 6.64k | Exit: |
1331 | 6.64k | h2o_quic_schedule_timer(&conn->super); |
1332 | 6.64k | return 0; |
1333 | 6.64k | } |
1334 | | |
1335 | | quicly_error_t h2o_quic_send(h2o_quic_conn_t *conn) |
1336 | 25.2k | { |
1337 | 25.2k | quicly_address_t dest, src; |
1338 | 25.2k | struct iovec datagrams[10]; |
1339 | 25.2k | size_t num_datagrams = PTLS_ELEMENTSOF(datagrams); |
1340 | 25.2k | uint8_t datagram_buf[1500 * PTLS_ELEMENTSOF(datagrams)]; |
1341 | | |
1342 | 25.2k | quicly_error_t ret = quicly_send(conn->quic, &dest, &src, datagrams, &num_datagrams, datagram_buf, sizeof(datagram_buf)); |
1343 | 25.2k | switch (ret) { |
1344 | 18.6k | case 0: |
1345 | 18.6k | if (num_datagrams != 0 && |
1346 | 0 | !h2o_quic_send_datagrams(conn->ctx, &dest, &src, datagrams, num_datagrams, quicly_send_get_ecn_bits(conn->quic))) { |
1347 | | /* FIXME close the connection immediately */ |
1348 | 0 | break; |
1349 | 0 | } |
1350 | 18.6k | break; |
1351 | 18.6k | case QUICLY_ERROR_STATE_EXHAUSTION: |
1352 | 6.64k | case QUICLY_ERROR_FREE_CONNECTION: |
1353 | 6.64k | conn->callbacks->destroy_connection(conn); |
1354 | 6.64k | return 0; |
1355 | 0 | default: |
1356 | 0 | h2o_fatal("quicly_send returned %" PRId64, ret); |
1357 | 25.2k | } |
1358 | | |
1359 | 18.6k | h2o_quic_schedule_timer(conn); |
1360 | | |
1361 | 18.6k | return 1; |
1362 | 25.2k | } |
1363 | | |
1364 | | void h2o_http3_update_recvbuf(h2o_buffer_t **buf, size_t off, const void *src, size_t len) |
1365 | 6.64k | { |
1366 | 6.64k | size_t new_size = off + len; |
1367 | | |
1368 | 6.64k | if ((*buf)->size < new_size) { |
1369 | 6.64k | h2o_buffer_reserve(buf, new_size - (*buf)->size); |
1370 | 6.64k | (*buf)->size = new_size; |
1371 | 6.64k | } |
1372 | 6.64k | memcpy((*buf)->bytes + off, src, len); |
1373 | 6.64k | } |
1374 | | |
1375 | | void h2o_quic_schedule_timer(h2o_quic_conn_t *conn) |
1376 | 33.6k | { |
1377 | 33.6k | int64_t timeout = quicly_get_first_timeout(conn->quic); |
1378 | 33.6k | if (h2o_timer_is_linked(&conn->_timeout)) { |
1379 | | #if !H2O_USE_LIBUV /* optimization to skip registering a timer specifying the same time */ |
1380 | 15.0k | if (timeout == conn->_timeout.expire_at) |
1381 | 14.9k | return; |
1382 | 91 | #endif |
1383 | 91 | h2o_timer_unlink(&conn->_timeout); |
1384 | 91 | } |
1385 | 18.7k | uint64_t now = h2o_now(conn->ctx->loop), delay = now < timeout ? timeout - now : 0; |
1386 | 18.7k | h2o_timer_link(conn->ctx->loop, delay, &conn->_timeout); |
1387 | 18.7k | } |
1388 | | |
1389 | | int h2o_http3_handle_settings_frame(h2o_http3_conn_t *conn, const uint8_t *payload, size_t length, const char **err_desc) |
1390 | 0 | { |
1391 | 0 | const uint8_t *src = payload, *src_end = src + length; |
1392 | 0 | uint32_t header_table_size = 0; |
1393 | 0 | uint64_t blocked_streams = 0; |
1394 | |
|
1395 | 0 | assert(!h2o_http3_has_received_settings(conn)); |
1396 | | |
1397 | 0 | while (src != src_end) { |
1398 | 0 | uint64_t id; |
1399 | 0 | uint64_t value; |
1400 | 0 | if ((id = quicly_decodev(&src, src_end)) == UINT64_MAX) |
1401 | 0 | goto Malformed; |
1402 | 0 | if ((value = quicly_decodev(&src, src_end)) == UINT64_MAX) |
1403 | 0 | goto Malformed; |
1404 | 0 | switch (id) { |
1405 | 0 | case H2O_HTTP3_SETTINGS_MAX_FIELD_SECTION_SIZE: |
1406 | 0 | conn->peer_settings.max_field_section_size = value; |
1407 | 0 | break; |
1408 | 0 | case H2O_HTTP3_SETTINGS_QPACK_MAX_TABLE_CAPACITY: |
1409 | 0 | header_table_size = |
1410 | 0 | value < conn->qpack.ctx->encoder_table_capacity ? (uint32_t)value : conn->qpack.ctx->encoder_table_capacity; |
1411 | 0 | break; |
1412 | 0 | case H2O_HTTP3_SETTINGS_QPACK_BLOCKED_STREAMS: |
1413 | 0 | blocked_streams = value; |
1414 | 0 | break; |
1415 | 0 | case H2O_HTTP3_SETTINGS_H3_DATAGRAM: |
1416 | 0 | case H2O_HTTP3_SETTINGS_H3_DATAGRAM_DRAFT03: |
1417 | 0 | switch (value) { |
1418 | 0 | case 0: |
1419 | 0 | break; |
1420 | 0 | case 1: { |
1421 | 0 | const quicly_transport_parameters_t *remote_tp = quicly_get_remote_transport_parameters(conn->super.quic); |
1422 | 0 | if (remote_tp->max_datagram_frame_size == 0) |
1423 | 0 | goto Malformed; |
1424 | 0 | conn->peer_settings.h3_datagram = 1; |
1425 | 0 | } break; |
1426 | 0 | default: |
1427 | 0 | goto Malformed; |
1428 | 0 | } |
1429 | 0 | break; |
1430 | 0 | default: |
1431 | 0 | break; |
1432 | 0 | } |
1433 | 0 | } |
1434 | | |
1435 | 0 | conn->qpack.enc = h2o_qpack_create_encoder(header_table_size, blocked_streams); |
1436 | 0 | return 0; |
1437 | 0 | Malformed: |
1438 | 0 | *err_desc = "malformed SETTINGS frame"; |
1439 | 0 | return H2O_HTTP3_ERROR_FRAME; |
1440 | 0 | } |
1441 | | |
1442 | | void h2o_http3_send_qpack_stream_cancel(h2o_http3_conn_t *conn, quicly_stream_id_t stream_id) |
1443 | 0 | { |
1444 | 0 | struct st_h2o_http3_egress_unistream_t *stream = conn->_control_streams.egress.qpack_decoder; |
1445 | | |
1446 | | /* allocate and write */ |
1447 | 0 | h2o_iovec_t buf = h2o_buffer_reserve(&stream->sendbuf, stream->sendbuf->size + H2O_HPACK_ENCODE_INT_MAX_LENGTH); |
1448 | 0 | assert(buf.base != NULL); |
1449 | 0 | stream->sendbuf->size += h2o_qpack_decoder_send_stream_cancel(conn->qpack.dec, (uint8_t *)buf.base, stream_id); |
1450 | | |
1451 | | /* notify the transport */ |
1452 | 0 | H2O_HTTP3_CHECK_SUCCESS(quicly_stream_sync_sendbuf(stream->quic, 1) == 0); |
1453 | 0 | } |
1454 | | |
1455 | | void h2o_http3_send_qpack_header_ack(h2o_http3_conn_t *conn, const void *bytes, size_t len) |
1456 | 0 | { |
1457 | 0 | struct st_h2o_http3_egress_unistream_t *stream = conn->_control_streams.egress.qpack_encoder; |
1458 | |
|
1459 | 0 | assert(stream != NULL); |
1460 | 0 | h2o_buffer_append(&stream->sendbuf, bytes, len); |
1461 | 0 | H2O_HTTP3_CHECK_SUCCESS(quicly_stream_sync_sendbuf(stream->quic, 1)); |
1462 | 0 | } |
1463 | | |
1464 | | void h2o_http3_send_shutdown_goaway_frame(h2o_http3_conn_t *conn) |
1465 | 0 | { |
1466 | | /* There is a moment where the transport-level close has been initiated while st_h2o_http3_server_conn_t remains. |
1467 | | * Check QUIC connection state to skip sending GOAWAY in such a case. */ |
1468 | 0 | if (conn->state < H2O_HTTP3_CONN_STATE_HALF_CLOSED && quicly_get_state(conn->super.quic) == QUICLY_STATE_CONNECTED) { |
1469 | | /* advertise the maximum stream ID to indicate that we will no longer accept new requests. |
1470 | | * HTTP/3 draft section 5.2.8 -- |
1471 | | * "An endpoint that is attempting to gracefully shut down a connection can send a GOAWAY frame with a value set to the |
1472 | | * maximum possible value (2^62-4 for servers, 2^62-1 for clients). This ensures that the peer stops creating new |
1473 | | * requests or pushes." */ |
1474 | 0 | h2o_http3_send_goaway_frame(conn, (UINT64_C(1) << 62) - 4); |
1475 | 0 | } |
1476 | 0 | } |
1477 | | |
1478 | | void h2o_http3_send_goaway_frame(h2o_http3_conn_t *conn, uint64_t stream_or_push_id) |
1479 | 0 | { |
1480 | 0 | size_t cap = h2o_http3_goaway_frame_capacity(stream_or_push_id); |
1481 | 0 | h2o_iovec_t alloced = h2o_buffer_reserve(&conn->_control_streams.egress.control->sendbuf, cap); |
1482 | 0 | h2o_http3_encode_goaway_frame((uint8_t *)alloced.base, stream_or_push_id); |
1483 | 0 | conn->_control_streams.egress.control->sendbuf->size += cap; |
1484 | 0 | quicly_stream_sync_sendbuf(conn->_control_streams.egress.control->quic, 1); |
1485 | 0 | } |
1486 | | |
1487 | | void h2o_http3_send_h3_datagrams(h2o_http3_conn_t *conn, uint64_t flow_id, h2o_iovec_t *datagrams, size_t num_datagrams) |
1488 | 0 | { |
1489 | 0 | for (size_t i = 0; i < num_datagrams; ++i) { |
1490 | 0 | h2o_iovec_t *src = datagrams + i; |
1491 | 0 | uint8_t buf[quicly_encodev_capacity(flow_id) + src->len], *p = buf; |
1492 | 0 | p = quicly_encodev(p, flow_id); |
1493 | 0 | memcpy(p, src->base, src->len); |
1494 | 0 | p += src->len; |
1495 | 0 | ptls_iovec_t payload = ptls_iovec_init(buf, p - buf); |
1496 | 0 | quicly_send_datagram_frames(conn->super.quic, &payload, 1); |
1497 | 0 | } |
1498 | |
|
1499 | 0 | h2o_quic_schedule_timer(&conn->super); |
1500 | 0 | } |
1501 | | |
1502 | | uint64_t h2o_http3_decode_h3_datagram(h2o_iovec_t *payload, const void *_src, size_t len) |
1503 | 0 | { |
1504 | 0 | const uint8_t *src = _src, *end = src + len; |
1505 | 0 | uint64_t flow_id; |
1506 | |
|
1507 | 0 | if ((flow_id = ptls_decode_quicint(&src, end)) != UINT64_MAX) |
1508 | 0 | *payload = h2o_iovec_init(src, end - src); |
1509 | 0 | return flow_id; |
1510 | 0 | } |