/src/h2o/lib/http3/common.c
Line | Count | Source |
1 | | /* |
2 | | * Copyright (c) 2018 Fastly, Kazuho Oku |
3 | | * |
4 | | * Permission is hereby granted, free of charge, to any person obtaining a copy |
5 | | * of this software and associated documentation files (the "Software"), to |
6 | | * deal in the Software without restriction, including without limitation the |
7 | | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or |
8 | | * sell copies of the Software, and to permit persons to whom the Software is |
9 | | * furnished to do so, subject to the following conditions: |
10 | | * |
11 | | * The above copyright notice and this permission notice shall be included in |
12 | | * all copies or substantial portions of the Software. |
13 | | * |
14 | | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
15 | | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
16 | | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
17 | | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
18 | | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
19 | | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
20 | | * IN THE SOFTWARE. |
21 | | */ |
22 | | #ifdef __APPLE__ |
23 | | #define __APPLE_USE_RFC_3542 /* to use IPV6_PKTINFO */ |
24 | | #endif |
25 | | #include <errno.h> |
26 | | #include <sys/types.h> |
27 | | #include <netinet/in.h> |
28 | | #include <netinet/udp.h> |
29 | | #include <pthread.h> |
30 | | #include <stdio.h> |
31 | | #include <sys/socket.h> |
32 | | #include "picotls/openssl.h" |
33 | | #include "h2o.h" |
34 | | #include "h2o/string_.h" |
35 | | #include "h2o/http3_common.h" |
36 | | #include "h2o/http3_internal.h" |
37 | | #include "h2o/multithread.h" |
38 | | #include "../probes_.h" |
39 | | |
40 | | h2o_quic_conn_t h2o_quic_accept_conn_decryption_failed; |
41 | | h2o_http3_conn_t h2o_http3_accept_conn_closed; |
42 | | |
43 | | struct st_h2o_http3_ingress_unistream_t { |
44 | | /** |
45 | | * back pointer |
46 | | */ |
47 | | quicly_stream_t *quic; |
48 | | /** |
49 | | * |
50 | | */ |
51 | | h2o_buffer_t *recvbuf; |
52 | | /** |
53 | | * A callback that passes unparsed input to be handled. `src` is set to NULL when receiving a reset. |
54 | | */ |
55 | | void (*handle_input)(h2o_http3_conn_t *conn, struct st_h2o_http3_ingress_unistream_t *stream, const uint8_t **src, |
56 | | const uint8_t *src_end, int is_eos); |
57 | | }; |
58 | | |
59 | | const char h2o_http3_err_frame_too_large[] = "HTTP/3 frame is too large"; |
60 | | |
61 | | const ptls_iovec_t h2o_http3_alpn[3] = {{(void *)H2O_STRLIT("h3")}, {(void *)H2O_STRLIT("h3-29")}, {(void *)H2O_STRLIT("h3-27")}}; |
62 | | |
63 | | static void report_sendmsg_errors(h2o_error_reporter_t *reporter, uint64_t total_successes, uint64_t cur_successes) |
64 | 0 | { |
65 | 0 | char errstr[256]; |
66 | 0 | fprintf(stderr, "sendmsg failed %" PRIu64 " time%s, succeeded: %" PRIu64 " time%s, over the last minute: %s\n", |
67 | 0 | reporter->cur_errors, reporter->cur_errors > 1 ? "s" : "", cur_successes, cur_successes > 1 ? "s" : "", |
68 | 0 | h2o_strerror_r((int)reporter->data, errstr, sizeof(errstr))); |
69 | 0 | } |
70 | | |
71 | | static h2o_error_reporter_t track_sendmsg = H2O_ERROR_REPORTER_INITIALIZER(report_sendmsg_errors); |
72 | | |
73 | | int h2o_quic_send_datagrams(h2o_quic_ctx_t *ctx, quicly_address_t *dest, quicly_address_t *src, struct iovec *datagrams, |
74 | | size_t num_datagrams) |
75 | 0 | { |
76 | 0 | union { |
77 | 0 | struct cmsghdr hdr; |
78 | 0 | char buf[ |
79 | 0 | #ifdef IPV6_PKTINFO |
80 | 0 | CMSG_SPACE(sizeof(struct in6_pktinfo)) |
81 | | #elif defined(IP_PKTINFO) |
82 | | CMSG_SPACE(sizeof(struct in_pktinfo)) |
83 | | #elif defined(IP_SENDSRCADDR) |
84 | | CMSG_SPACE(sizeof(struct in_addr)) |
85 | | #else |
86 | | CMSG_SPACE(1) |
87 | | #endif |
88 | 0 | #ifdef UDP_SEGMENT |
89 | 0 | + CMSG_SPACE(sizeof(uint16_t)) |
90 | 0 | #endif |
91 | | + CMSG_SPACE(1) /* sentry */ |
92 | 0 | ]; |
93 | 0 | } cmsgbuf = {.buf = {} /* zero-cleared so that CMSG_NXTHDR can be used for locating the *next* cmsghdr */}; |
94 | 0 | struct msghdr mess = { |
95 | 0 | .msg_name = &dest->sa, |
96 | 0 | .msg_namelen = quicly_get_socklen(&dest->sa), |
97 | 0 | .msg_control = cmsgbuf.buf, |
98 | 0 | .msg_controllen = sizeof(cmsgbuf.buf), |
99 | 0 | }; |
100 | 0 | struct cmsghdr *cmsg = CMSG_FIRSTHDR(&mess); |
101 | 0 | int ret; |
102 | |
|
103 | 0 | #define PUSH_CMSG(level, type, value) \ |
104 | 0 | do { \ |
105 | 0 | cmsg->cmsg_level = (level); \ |
106 | 0 | cmsg->cmsg_type = (type); \ |
107 | 0 | cmsg->cmsg_len = CMSG_LEN(sizeof(value)); \ |
108 | 0 | memcpy(CMSG_DATA(cmsg), &value, sizeof(value)); \ |
109 | 0 | cmsg = CMSG_NXTHDR(&mess, cmsg); \ |
110 | 0 | } while (0) |
111 | | |
112 | | /* first CMSG is the source address */ |
113 | 0 | if (src->sa.sa_family != AF_UNSPEC) { |
114 | 0 | switch (src->sa.sa_family) { |
115 | 0 | case AF_INET: { |
116 | 0 | #if defined(IP_PKTINFO) |
117 | 0 | if (*ctx->sock.port != src->sin.sin_port) |
118 | 0 | return 0; |
119 | 0 | struct in_pktinfo info = {.ipi_spec_dst = src->sin.sin_addr}; |
120 | 0 | PUSH_CMSG(IPPROTO_IP, IP_PKTINFO, info); |
121 | | #elif defined(IP_SENDSRCADDR) |
122 | | if (*ctx->sock.port != src->sin.sin_port) |
123 | | return 0; |
124 | | struct sockaddr_in *fdaddr = (struct sockaddr_in *)&ctx->sock.addr; |
125 | | assert(fdaddr->sin_family == AF_INET); |
126 | | if (fdaddr->sin_addr.s_addr == INADDR_ANY) |
127 | | PUSH_CMSG(IPPROTO_IP, IP_SENDSRCADDR, src->sin.sin_addr); |
128 | | #else |
129 | | h2o_fatal("IP_PKTINFO not available"); |
130 | | #endif |
131 | 0 | } break; |
132 | 0 | case AF_INET6: |
133 | 0 | #ifdef IPV6_PKTINFO |
134 | 0 | if (*ctx->sock.port != src->sin6.sin6_port) |
135 | 0 | return 0; |
136 | 0 | struct in6_pktinfo info = {.ipi6_addr = src->sin6.sin6_addr}; |
137 | 0 | PUSH_CMSG(IPPROTO_IPV6, IPV6_PKTINFO, info); |
138 | | #else |
139 | | h2o_fatal("IPV6_PKTINFO not available"); |
140 | | #endif |
141 | 0 | break; |
142 | 0 | default: |
143 | 0 | h2o_fatal("unexpected address family"); |
144 | 0 | break; |
145 | 0 | } |
146 | 0 | } |
147 | | |
148 | | /* next CMSG is UDP_SEGMENT size (for GSO); assert that the input follows the expected pattern (see the doc-comment of |
149 | | * `quicly_send`), then set the CMSG and convert `datagrams` into one. */ |
150 | 0 | for (size_t i = 1; i < num_datagrams; ++i) { |
151 | 0 | assert(datagrams[i - 1].iov_base + datagrams[i - 1].iov_len == datagrams[i].iov_base); |
152 | 0 | assert(i == num_datagrams - 1 || datagrams[i].iov_len == datagrams[0].iov_len); |
153 | 0 | } |
154 | 0 | #ifdef UDP_SEGMENT |
155 | 0 | struct iovec gso_iovec; |
156 | 0 | if (num_datagrams > 1 && ctx->use_gso) { |
157 | 0 | uint16_t segsize = (uint16_t)datagrams[0].iov_len; |
158 | 0 | PUSH_CMSG(SOL_UDP, UDP_SEGMENT, segsize); |
159 | 0 | gso_iovec = (struct iovec){ |
160 | 0 | .iov_base = datagrams[0].iov_base, |
161 | 0 | .iov_len = datagrams[num_datagrams - 1].iov_base + datagrams[num_datagrams - 1].iov_len - datagrams[0].iov_base, |
162 | 0 | }; |
163 | 0 | datagrams = &gso_iovec; |
164 | 0 | num_datagrams = 1; |
165 | 0 | } |
166 | 0 | #endif |
167 | | |
168 | | /* commit CMSG length */ |
169 | 0 | if ((mess.msg_controllen = (socklen_t)((char *)cmsg - (char *)cmsgbuf.buf)) == 0) |
170 | 0 | mess.msg_control = NULL; |
171 | | |
172 | | /* send datagrams */ |
173 | 0 | for (size_t i = 0; i < num_datagrams; ++i) { |
174 | 0 | mess.msg_iov = datagrams + i; |
175 | 0 | mess.msg_iovlen = 1; |
176 | 0 | while ((ret = (int)sendmsg(h2o_socket_get_fd(ctx->sock.sock), &mess, 0)) == -1 && errno == EINTR) |
177 | 0 | ; |
178 | 0 | if (ret == -1) |
179 | 0 | goto SendmsgError; |
180 | 0 | } |
181 | | |
182 | 0 | h2o_error_reporter_record_success(&track_sendmsg); |
183 | |
|
184 | 0 | return 1; |
185 | | |
186 | 0 | SendmsgError: |
187 | | /* The UDP stack returns EINVAL (linux) or EADDRNOTAVAIL (darwin, and presumably other BSD) when it was unable to use the |
188 | | * designated source address. We communicate that back to the caller so that the connection can be closed immediately. */ |
189 | 0 | if (src->sa.sa_family != AF_UNSPEC && (errno == EINVAL || errno == EADDRNOTAVAIL)) |
190 | 0 | return 0; |
191 | | |
192 | | /* Temporary failure to send a packet is not a permanent error fo the connection. (TODO do we want do something more |
193 | | * specific?) */ |
194 | | |
195 | | /* Log the number of failed invocations once per minute, if there has been such a failure. */ |
196 | 0 | h2o_error_reporter_record_error(ctx->loop, &track_sendmsg, 60000, errno); |
197 | |
|
198 | 0 | return 1; |
199 | |
|
200 | 0 | #undef PUSH_CMSG |
201 | 0 | } |
202 | | |
203 | | static inline const h2o_http3_conn_callbacks_t *get_callbacks(h2o_http3_conn_t *conn) |
204 | 0 | { |
205 | 0 | return (const h2o_http3_conn_callbacks_t *)conn->super.callbacks; |
206 | 0 | } |
207 | | |
208 | | static void ingress_unistream_on_destroy(quicly_stream_t *qs, quicly_error_t err) |
209 | 0 | { |
210 | 0 | struct st_h2o_http3_ingress_unistream_t *stream = qs->data; |
211 | 0 | h2o_buffer_dispose(&stream->recvbuf); |
212 | 0 | free(stream); |
213 | 0 | } |
214 | | |
215 | | static void ingress_unistream_on_receive(quicly_stream_t *qs, size_t off, const void *input, size_t len) |
216 | 0 | { |
217 | 0 | h2o_http3_conn_t *conn = *quicly_get_data(qs->conn); |
218 | 0 | struct st_h2o_http3_ingress_unistream_t *stream = qs->data; |
219 | | |
220 | | /* save received data */ |
221 | 0 | h2o_http3_update_recvbuf(&stream->recvbuf, off, input, len); |
222 | | |
223 | | /* determine bytes that can be handled */ |
224 | 0 | const uint8_t *src = (const uint8_t *)stream->recvbuf->bytes, |
225 | 0 | *src_end = src + quicly_recvstate_bytes_available(&stream->quic->recvstate); |
226 | 0 | if (src == src_end && !quicly_recvstate_transfer_complete(&stream->quic->recvstate)) |
227 | 0 | return; |
228 | | |
229 | | /* handle the bytes */ |
230 | 0 | stream->handle_input(conn, stream, &src, src_end, quicly_recvstate_transfer_complete(&stream->quic->recvstate)); |
231 | 0 | if (quicly_get_state(conn->super.quic) >= QUICLY_STATE_CLOSING) |
232 | 0 | return; |
233 | | |
234 | | /* remove bytes that have been consumed */ |
235 | 0 | size_t bytes_consumed = src - (const uint8_t *)stream->recvbuf->bytes; |
236 | 0 | if (bytes_consumed != 0) { |
237 | 0 | h2o_buffer_consume(&stream->recvbuf, bytes_consumed); |
238 | 0 | quicly_stream_sync_recvbuf(stream->quic, bytes_consumed); |
239 | 0 | } |
240 | 0 | } |
241 | | |
242 | | static void ingress_unistream_on_receive_reset(quicly_stream_t *qs, quicly_error_t err) |
243 | 0 | { |
244 | 0 | h2o_http3_conn_t *conn = *quicly_get_data(qs->conn); |
245 | 0 | struct st_h2o_http3_ingress_unistream_t *stream = qs->data; |
246 | |
|
247 | 0 | stream->handle_input(conn, stream, NULL, NULL, 1); |
248 | 0 | } |
249 | | |
250 | | static void qpack_encoder_stream_handle_input(h2o_http3_conn_t *conn, struct st_h2o_http3_ingress_unistream_t *stream, |
251 | | const uint8_t **src, const uint8_t *src_end, int is_eos) |
252 | 0 | { |
253 | 0 | if (src == NULL || is_eos) { |
254 | 0 | h2o_quic_close_connection(&conn->super, H2O_HTTP3_ERROR_CLOSED_CRITICAL_STREAM, NULL); |
255 | 0 | return; |
256 | 0 | } |
257 | | |
258 | 0 | int64_t *unblocked_stream_ids; |
259 | 0 | size_t num_unblocked; |
260 | 0 | int ret; |
261 | 0 | const char *err_desc = NULL; |
262 | 0 | if ((ret = h2o_qpack_decoder_handle_input(conn->qpack.dec, &unblocked_stream_ids, &num_unblocked, src, src_end, &err_desc)) != |
263 | 0 | 0) { |
264 | 0 | h2o_quic_close_connection(&conn->super, ret, err_desc); |
265 | 0 | return; |
266 | 0 | } |
267 | | |
268 | | /* TODO handle unblocked streams */ |
269 | 0 | } |
270 | | |
271 | | static void qpack_decoder_stream_handle_input(h2o_http3_conn_t *conn, struct st_h2o_http3_ingress_unistream_t *stream, |
272 | | const uint8_t **src, const uint8_t *src_end, int is_eos) |
273 | 0 | { |
274 | 0 | if (src == NULL || is_eos) { |
275 | 0 | h2o_quic_close_connection(&conn->super, H2O_HTTP3_ERROR_CLOSED_CRITICAL_STREAM, NULL); |
276 | 0 | return; |
277 | 0 | } |
278 | | |
279 | 0 | int ret; |
280 | 0 | const char *err_desc = NULL; |
281 | 0 | if ((ret = h2o_qpack_encoder_handle_input(conn->qpack.enc, src, src_end, &err_desc)) != 0) |
282 | 0 | h2o_quic_close_connection(&conn->super, ret, err_desc); |
283 | 0 | } |
284 | | |
285 | | static void control_stream_handle_input(h2o_http3_conn_t *conn, struct st_h2o_http3_ingress_unistream_t *stream, |
286 | | const uint8_t **src, const uint8_t *src_end, int is_eos) |
287 | 0 | { |
288 | 0 | if (src == NULL || is_eos) { |
289 | 0 | h2o_quic_close_connection(&conn->super, H2O_HTTP3_ERROR_CLOSED_CRITICAL_STREAM, NULL); |
290 | 0 | return; |
291 | 0 | } |
292 | | |
293 | 0 | do { |
294 | 0 | h2o_http3_read_frame_t frame; |
295 | 0 | quicly_error_t ret; |
296 | 0 | const char *err_desc = NULL; |
297 | |
|
298 | 0 | if ((ret = h2o_http3_read_frame(&frame, quicly_is_client(conn->super.quic), H2O_HTTP3_STREAM_TYPE_CONTROL, |
299 | 0 | conn->max_frame_payload_size, src, src_end, &err_desc)) != 0) { |
300 | 0 | if (ret != H2O_HTTP3_ERROR_INCOMPLETE) |
301 | 0 | h2o_quic_close_connection(&conn->super, ret, err_desc); |
302 | 0 | break; |
303 | 0 | } |
304 | 0 | if (h2o_http3_has_received_settings(conn) == (frame.type == H2O_HTTP3_FRAME_TYPE_SETTINGS) || |
305 | 0 | frame.type == H2O_HTTP3_FRAME_TYPE_DATA) { |
306 | 0 | h2o_quic_close_connection(&conn->super, H2O_HTTP3_ERROR_FRAME_UNEXPECTED, NULL); |
307 | 0 | break; |
308 | 0 | } |
309 | 0 | get_callbacks(conn)->handle_control_stream_frame(conn, frame.type, frame.payload, frame.length); |
310 | 0 | if (quicly_get_state(conn->super.quic) >= QUICLY_STATE_CLOSING) |
311 | 0 | break; |
312 | 0 | } while (*src != src_end); |
313 | 0 | } |
314 | | |
315 | | static void discard_handle_input(h2o_http3_conn_t *conn, struct st_h2o_http3_ingress_unistream_t *stream, const uint8_t **src, |
316 | | const uint8_t *src_end, int is_eos) |
317 | 0 | { |
318 | 0 | if (src == NULL) |
319 | 0 | return; |
320 | 0 | *src = src_end; |
321 | 0 | } |
322 | | |
323 | | static void unknown_type_handle_input(h2o_http3_conn_t *conn, struct st_h2o_http3_ingress_unistream_t *stream, const uint8_t **src, |
324 | | const uint8_t *src_end, int is_eos) |
325 | 0 | { |
326 | 0 | uint64_t type; |
327 | | |
328 | | /* resets are allowed at least until the type is being determined */ |
329 | 0 | if (src == NULL) |
330 | 0 | return; |
331 | | |
332 | | /* read the type, or just return if incomplete */ |
333 | 0 | if ((type = quicly_decodev(src, src_end)) == UINT64_MAX) |
334 | 0 | return; |
335 | | |
336 | 0 | switch (type) { |
337 | 0 | case H2O_HTTP3_STREAM_TYPE_CONTROL: |
338 | 0 | conn->_control_streams.ingress.control = stream; |
339 | 0 | stream->handle_input = control_stream_handle_input; |
340 | 0 | break; |
341 | 0 | case H2O_HTTP3_STREAM_TYPE_QPACK_ENCODER: |
342 | 0 | conn->_control_streams.ingress.qpack_encoder = stream; |
343 | 0 | stream->handle_input = qpack_encoder_stream_handle_input; |
344 | 0 | break; |
345 | 0 | case H2O_HTTP3_STREAM_TYPE_QPACK_DECODER: |
346 | 0 | conn->_control_streams.ingress.qpack_decoder = stream; |
347 | 0 | stream->handle_input = qpack_decoder_stream_handle_input; |
348 | 0 | break; |
349 | 0 | default: |
350 | 0 | quicly_request_stop(stream->quic, H2O_HTTP3_ERROR_STREAM_CREATION); |
351 | 0 | stream->handle_input = discard_handle_input; |
352 | 0 | break; |
353 | 0 | } |
354 | | |
355 | 0 | return stream->handle_input(conn, stream, src, src_end, is_eos); |
356 | 0 | } |
357 | | |
358 | | static void egress_unistream_on_destroy(quicly_stream_t *qs, quicly_error_t err) |
359 | 18.1k | { |
360 | 18.1k | struct st_h2o_http3_egress_unistream_t *stream = qs->data; |
361 | 18.1k | h2o_buffer_dispose(&stream->sendbuf); |
362 | 18.1k | free(stream); |
363 | 18.1k | } |
364 | | |
365 | | static void egress_unistream_on_send_shift(quicly_stream_t *qs, size_t delta) |
366 | 0 | { |
367 | 0 | struct st_h2o_http3_egress_unistream_t *stream = qs->data; |
368 | 0 | h2o_buffer_consume(&stream->sendbuf, delta); |
369 | 0 | } |
370 | | |
371 | | static void egress_unistream_on_send_emit(quicly_stream_t *qs, size_t off, void *dst, size_t *len, int *wrote_all) |
372 | 18.1k | { |
373 | 18.1k | struct st_h2o_http3_egress_unistream_t *stream = qs->data; |
374 | | |
375 | 18.1k | if (*len >= stream->sendbuf->size - off) { |
376 | 18.1k | *len = stream->sendbuf->size - off; |
377 | 18.1k | *wrote_all = 1; |
378 | 18.1k | } else { |
379 | 0 | *wrote_all = 0; |
380 | 0 | } |
381 | 18.1k | memcpy(dst, stream->sendbuf->bytes + off, *len); |
382 | 18.1k | } |
383 | | |
384 | | static void egress_unistream_on_send_stop(quicly_stream_t *qs, quicly_error_t err) |
385 | 0 | { |
386 | 0 | struct st_h2o_http3_conn_t *conn = *quicly_get_data(qs->conn); |
387 | 0 | h2o_quic_close_connection(&conn->super, H2O_HTTP3_ERROR_CLOSED_CRITICAL_STREAM, NULL); |
388 | 0 | } |
389 | | |
390 | | void h2o_http3_on_create_unidirectional_stream(quicly_stream_t *qs) |
391 | 18.1k | { |
392 | 18.1k | if (quicly_stream_is_self_initiated(qs)) { |
393 | | /* create egress unistream */ |
394 | 18.1k | static const quicly_stream_callbacks_t callbacks = {egress_unistream_on_destroy, egress_unistream_on_send_shift, |
395 | 18.1k | egress_unistream_on_send_emit, egress_unistream_on_send_stop}; |
396 | 18.1k | struct st_h2o_http3_egress_unistream_t *stream = h2o_mem_alloc(sizeof(*stream)); |
397 | 18.1k | qs->data = stream; |
398 | 18.1k | qs->callbacks = &callbacks; |
399 | 18.1k | stream->quic = qs; |
400 | 18.1k | h2o_buffer_init(&stream->sendbuf, &h2o_socket_buffer_prototype); |
401 | 18.1k | } else { |
402 | | /* create ingress unistream */ |
403 | 0 | static const quicly_stream_callbacks_t callbacks = { |
404 | 0 | ingress_unistream_on_destroy, NULL, NULL, NULL, ingress_unistream_on_receive, ingress_unistream_on_receive_reset}; |
405 | 0 | struct st_h2o_http3_ingress_unistream_t *stream = h2o_mem_alloc(sizeof(*stream)); |
406 | 0 | qs->data = stream; |
407 | 0 | qs->callbacks = &callbacks; |
408 | 0 | stream->quic = qs; |
409 | 0 | h2o_buffer_init(&stream->recvbuf, &h2o_socket_buffer_prototype); |
410 | 0 | stream->handle_input = unknown_type_handle_input; |
411 | 0 | } |
412 | 18.1k | } |
413 | | |
414 | | static quicly_error_t open_egress_unistream(h2o_http3_conn_t *conn, struct st_h2o_http3_egress_unistream_t **stream, |
415 | | h2o_iovec_t initial_bytes) |
416 | 18.1k | { |
417 | 18.1k | quicly_stream_t *qs; |
418 | 18.1k | quicly_error_t ret; |
419 | | |
420 | 18.1k | if ((ret = quicly_open_stream(conn->super.quic, &qs, 1)) != 0) |
421 | 0 | return ret; |
422 | 18.1k | *stream = qs->data; |
423 | 18.1k | assert((*stream)->quic == qs); |
424 | | |
425 | 18.1k | h2o_buffer_append(&(*stream)->sendbuf, initial_bytes.base, initial_bytes.len); |
426 | 18.1k | return quicly_stream_sync_sendbuf((*stream)->quic, 1); |
427 | 18.1k | } |
428 | | |
429 | | static uint8_t *accept_hashkey_flatten_address(uint8_t *p, quicly_address_t *addr) |
430 | 0 | { |
431 | 0 | switch (addr->sa.sa_family) { |
432 | 0 | case AF_INET: |
433 | 0 | *p++ = 4; |
434 | 0 | memcpy(p, &addr->sin.sin_addr.s_addr, 4); |
435 | 0 | p += 4; |
436 | 0 | memcpy(p, &addr->sin.sin_port, 2); |
437 | 0 | p += 2; |
438 | 0 | break; |
439 | 0 | case AF_INET6: |
440 | 0 | *p++ = 6; |
441 | 0 | memcpy(p, addr->sin6.sin6_addr.s6_addr, 16); |
442 | 0 | p += 16; |
443 | 0 | memcpy(p, &addr->sin.sin_port, 2); |
444 | 0 | p += 2; |
445 | 0 | break; |
446 | 0 | case AF_UNSPEC: |
447 | 0 | *p++ = 0; |
448 | 0 | break; |
449 | 0 | default: |
450 | 0 | h2o_fatal("unknown protocol family"); |
451 | 0 | break; |
452 | 0 | } |
453 | 0 | return p; |
454 | 0 | } |
455 | | |
456 | | static uint64_t calc_accept_hashkey(quicly_address_t *destaddr, quicly_address_t *srcaddr, ptls_iovec_t src_cid) |
457 | 0 | { |
458 | | /* prepare key */ |
459 | 0 | static __thread EVP_CIPHER_CTX *cipher = NULL; |
460 | 0 | if (cipher == NULL) { |
461 | 0 | static uint8_t key[PTLS_AES128_KEY_SIZE]; |
462 | 0 | H2O_MULTITHREAD_ONCE({ ptls_openssl_random_bytes(key, sizeof(key)); }); |
463 | 0 | cipher = EVP_CIPHER_CTX_new(); |
464 | 0 | EVP_EncryptInit_ex(cipher, EVP_aes_128_cbc(), NULL, key, NULL); |
465 | 0 | } |
466 | |
|
467 | 0 | uint8_t buf[(1 + 16 + 2) * 2 + QUICLY_MAX_CID_LEN_V1 + PTLS_AES_BLOCK_SIZE] = {0}; |
468 | 0 | uint8_t *p = buf; |
469 | | |
470 | | /* build plaintext to encrypt */ |
471 | 0 | p = accept_hashkey_flatten_address(p, destaddr); |
472 | 0 | p = accept_hashkey_flatten_address(p, srcaddr); |
473 | 0 | memcpy(p, src_cid.base, src_cid.len); |
474 | 0 | p += src_cid.len; |
475 | 0 | assert(p <= buf + sizeof(buf)); |
476 | 0 | size_t bytes_to_encrypt = ((p - buf) + PTLS_AES_BLOCK_SIZE - 1) / PTLS_AES_BLOCK_SIZE * PTLS_AES_BLOCK_SIZE; |
477 | 0 | assert(bytes_to_encrypt <= sizeof(buf)); |
478 | | |
479 | 0 | { /* encrypt */ |
480 | 0 | EVP_EncryptInit_ex(cipher, NULL, NULL, NULL, NULL); |
481 | 0 | int bytes_encrypted = 0, ret = EVP_EncryptUpdate(cipher, buf, &bytes_encrypted, buf, (int)bytes_to_encrypt); |
482 | 0 | assert(ret); |
483 | 0 | assert(bytes_encrypted == bytes_to_encrypt); |
484 | 0 | } |
485 | | |
486 | | /* use the last `size_t` bytes of the CBC output as the result */ |
487 | 0 | uint64_t result; |
488 | 0 | memcpy(&result, buf + bytes_to_encrypt - sizeof(result), sizeof(result)); |
489 | | /* avoid 0 (used as nonexist) */ |
490 | 0 | if (result == 0) |
491 | 0 | result = 1; |
492 | 0 | return result; |
493 | 0 | } |
494 | | |
495 | | static void drop_from_acceptmap(h2o_quic_ctx_t *ctx, h2o_quic_conn_t *conn) |
496 | 6.03k | { |
497 | 6.03k | if (conn->_accept_hashkey != 0) { |
498 | 0 | khint_t iter; |
499 | 0 | if ((iter = kh_get_h2o_quic_acceptmap(ctx->conns_accepting, conn->_accept_hashkey)) != kh_end(ctx->conns_accepting)) |
500 | 0 | kh_del_h2o_quic_acceptmap(ctx->conns_accepting, iter); |
501 | 0 | conn->_accept_hashkey = 0; |
502 | 0 | } |
503 | 6.03k | } |
504 | | |
505 | | static void send_version_negotiation(h2o_quic_ctx_t *ctx, quicly_address_t *destaddr, ptls_iovec_t dest_cid, |
506 | | quicly_address_t *srcaddr, ptls_iovec_t src_cid, const uint32_t *versions) |
507 | 0 | { |
508 | 0 | uint8_t payload[QUICLY_MIN_CLIENT_INITIAL_SIZE]; |
509 | 0 | size_t payload_size = quicly_send_version_negotiation(ctx->quic, dest_cid, src_cid, versions, payload); |
510 | 0 | assert(payload_size != SIZE_MAX); |
511 | 0 | struct iovec vec = {.iov_base = payload, .iov_len = payload_size}; |
512 | 0 | h2o_quic_send_datagrams(ctx, destaddr, srcaddr, &vec, 1); |
513 | 0 | return; |
514 | 0 | } |
515 | | |
516 | | static void process_packets(h2o_quic_ctx_t *ctx, quicly_address_t *destaddr, quicly_address_t *srcaddr, uint8_t ttl, |
517 | | quicly_decoded_packet_t *packets, size_t num_packets) |
518 | 0 | { |
519 | 0 | h2o_quic_conn_t *conn = NULL; |
520 | 0 | size_t accepted_packet_index = SIZE_MAX; |
521 | |
|
522 | 0 | assert(num_packets != 0); |
523 | | |
524 | 0 | if (ctx->quic_stats != NULL) { |
525 | 0 | ctx->quic_stats->packet_received += num_packets; |
526 | 0 | } |
527 | |
|
528 | | #if H2O_USE_DTRACE |
529 | | if (PTLS_UNLIKELY(H2O_H3_PACKET_RECEIVE_ENABLED())) { |
530 | | for (size_t i = 0; i != num_packets; ++i) |
531 | | H2O_H3_PACKET_RECEIVE(&destaddr->sa, &srcaddr->sa, packets[i].octets.base, packets[i].octets.len); |
532 | | } |
533 | | #endif |
534 | |
|
535 | 0 | if (packets[0].cid.src.len > QUICLY_MAX_CID_LEN_V1) |
536 | 0 | return; |
537 | | |
538 | | /* find the matching connection, by first looking at the CID (all packets as client, or Handshake, 1-RTT packets as server) */ |
539 | 0 | if (packets[0].cid.dest.plaintext.node_id == ctx->next_cid->node_id && |
540 | 0 | packets[0].cid.dest.plaintext.thread_id == ctx->next_cid->thread_id) { |
541 | 0 | khiter_t iter = kh_get_h2o_quic_idmap(ctx->conns_by_id, packets[0].cid.dest.plaintext.master_id); |
542 | 0 | if (iter != kh_end(ctx->conns_by_id)) { |
543 | 0 | conn = kh_val(ctx->conns_by_id, iter); |
544 | | /* drop long header packets with different 4-tuple than the original, or if the incoming packet might be the first- |
545 | | * flight from the client, advance to state lookup using `cons_accepting` */ |
546 | 0 | if (!quicly_is_destination(conn->quic, &destaddr->sa, &srcaddr->sa, packets)) { |
547 | 0 | if (!packets[0].cid.dest.might_be_client_generated) |
548 | 0 | return; |
549 | 0 | conn = NULL; |
550 | 0 | } |
551 | 0 | } else if (!packets[0].cid.dest.might_be_client_generated) { |
552 | | /* send stateless reset when we could not find a matching connection for a 1 RTT packet */ |
553 | 0 | if (packets[0].octets.len >= QUICLY_STATELESS_RESET_PACKET_MIN_LEN) { |
554 | 0 | uint8_t payload[QUICLY_MIN_CLIENT_INITIAL_SIZE]; |
555 | 0 | size_t payload_size = quicly_send_stateless_reset(ctx->quic, packets[0].cid.dest.encrypted.base, payload); |
556 | 0 | if (payload_size != SIZE_MAX) { |
557 | 0 | struct iovec vec = {.iov_base = payload, .iov_len = payload_size}; |
558 | 0 | h2o_quic_send_datagrams(ctx, srcaddr, destaddr, &vec, 1); |
559 | 0 | } |
560 | 0 | } |
561 | 0 | return; |
562 | 0 | } |
563 | 0 | } else if (!packets[0].cid.dest.might_be_client_generated) { |
564 | | /* forward 1-RTT packets belonging to different nodes, threads */ |
565 | 0 | if (ttl == 0) |
566 | 0 | return; |
567 | 0 | uint64_t offending_node_id = packets[0].cid.dest.plaintext.node_id; |
568 | 0 | if (ctx->forward_packets != NULL && ctx->forward_packets(ctx, &offending_node_id, packets[0].cid.dest.plaintext.thread_id, |
569 | 0 | destaddr, srcaddr, ttl, packets, num_packets)) |
570 | 0 | return; |
571 | | /* non-authenticating 1-RTT packets are potentially stateless resets (FIXME handle them, note that we need to use a hashdos- |
572 | | * resistant hash map that also meets constant-time comparison requirements) */ |
573 | 0 | return; |
574 | 0 | } |
575 | | |
576 | 0 | if (conn == NULL) { |
577 | | /* Initial or 0-RTT packet, use 4-tuple to match the thread and the connection */ |
578 | 0 | assert(packets[0].cid.dest.might_be_client_generated); |
579 | 0 | uint64_t accept_hashkey = calc_accept_hashkey(destaddr, srcaddr, packets[0].cid.src); |
580 | 0 | if (ctx->accept_thread_divisor != 0) { |
581 | 0 | uint32_t offending_thread = accept_hashkey % ctx->accept_thread_divisor; |
582 | 0 | if (offending_thread != ctx->next_cid->thread_id) { |
583 | 0 | if (ctx->forward_packets != NULL) |
584 | 0 | ctx->forward_packets(ctx, NULL, offending_thread, destaddr, srcaddr, ttl, packets, num_packets); |
585 | 0 | return; |
586 | 0 | } |
587 | 0 | } |
588 | 0 | khiter_t iter = kh_get_h2o_quic_acceptmap(ctx->conns_accepting, accept_hashkey); |
589 | 0 | if (iter == kh_end(ctx->conns_accepting)) { |
590 | | /* a new connection for this thread (at least on this process); accept or delegate to newer process */ |
591 | 0 | if (ctx->acceptor != NULL) { |
592 | 0 | if (packets[0].version != 0 && !quicly_is_supported_version(packets[0].version)) { |
593 | 0 | send_version_negotiation(ctx, srcaddr, packets[0].cid.src, destaddr, packets[0].cid.dest.encrypted, |
594 | 0 | quicly_supported_versions); |
595 | 0 | return; |
596 | 0 | } |
597 | 0 | } else { |
598 | | /* This is the offending thread but it is not accepting, which means that the process (or the thread) is not acting |
599 | | * as a server (likely gracefully shutting down). Let the application process forward the packet to the next |
600 | | * generation. */ |
601 | 0 | if (ctx->forward_packets != NULL && |
602 | 0 | ctx->forward_packets(ctx, NULL, ctx->next_cid->thread_id, destaddr, srcaddr, ttl, packets, num_packets)) |
603 | 0 | return; |
604 | | /* If not forwarded, send rejection to the peer. A Version Negotiation packet that carries only a greasing version |
605 | | * number is used for the purpose, hoping that that signal will trigger immediate downgrade to HTTP/2, across the |
606 | | * broad spectrum of the client implementations than if CONNECTION_REFUSED is being used. */ |
607 | 0 | if (packets[0].version != 0) { |
608 | 0 | static const uint32_t no_versions[] = {0}; |
609 | 0 | send_version_negotiation(ctx, srcaddr, packets[0].cid.src, destaddr, packets[0].cid.dest.encrypted, |
610 | 0 | no_versions); |
611 | 0 | } |
612 | 0 | return; |
613 | 0 | } |
614 | | /* try to accept any of the Initial packets being received */ |
615 | 0 | size_t i; |
616 | 0 | for (i = 0; i != num_packets; ++i) { |
617 | 0 | if ((packets[i].octets.base[0] & QUICLY_PACKET_TYPE_BITMASK) == QUICLY_PACKET_TYPE_INITIAL && |
618 | 0 | (conn = ctx->acceptor(ctx, destaddr, srcaddr, packets + i)) != NULL) { |
619 | | /* non-null generally means success, except for H2O_QUIC_ACCEPT_CONN_DECRYPTION_FAILED */ |
620 | 0 | if (conn == &h2o_quic_accept_conn_decryption_failed) { |
621 | | /* failed to decrypt Initial packet <=> it could belong to a connection on a different node; forward it to |
622 | | * the destination being claimed by the DCID */ |
623 | 0 | uint64_t offending_node_id = packets[i].cid.dest.plaintext.node_id; |
624 | 0 | uint32_t offending_thread_id = packets[i].cid.dest.plaintext.thread_id; |
625 | 0 | if (ctx->forward_packets != NULL && ttl > 0 && |
626 | 0 | (offending_node_id != ctx->next_cid->node_id || offending_thread_id != ctx->next_cid->thread_id)) |
627 | 0 | ctx->forward_packets(ctx, &offending_node_id, offending_thread_id, destaddr, srcaddr, ttl, packets, |
628 | 0 | num_packets); |
629 | 0 | return; |
630 | 0 | } |
631 | 0 | break; |
632 | 0 | } |
633 | 0 | } |
634 | 0 | if (conn == NULL) |
635 | 0 | return; |
636 | 0 | accepted_packet_index = i; |
637 | 0 | conn->_accept_hashkey = accept_hashkey; |
638 | 0 | int r; |
639 | 0 | iter = kh_put_h2o_quic_acceptmap(conn->ctx->conns_accepting, accept_hashkey, &r); |
640 | 0 | assert(iter != kh_end(conn->ctx->conns_accepting)); |
641 | 0 | kh_val(conn->ctx->conns_accepting, iter) = conn; |
642 | 0 | } else { |
643 | | /* likely have found a connection in `conns_accepting` */ |
644 | 0 | conn = kh_val(ctx->conns_accepting, iter); |
645 | 0 | assert(conn != NULL); |
646 | 0 | assert(!quicly_is_client(conn->quic)); |
647 | 0 | if (!quicly_is_destination(conn->quic, &destaddr->sa, &srcaddr->sa, packets)) { |
648 | 0 | uint64_t offending_node_id = packets[0].cid.dest.plaintext.node_id; |
649 | 0 | uint32_t offending_thread_id = packets[0].cid.dest.plaintext.thread_id; |
650 | 0 | if (offending_node_id != ctx->next_cid->node_id || offending_thread_id != ctx->next_cid->thread_id) { |
651 | | /* accept key matches to a connection being established, but DCID doesn't -- likely a second (or later) Initial |
652 | | * that is supposed to be handled by another node. forward it. */ |
653 | 0 | if (ttl == 0) |
654 | 0 | return; |
655 | 0 | if (ctx->forward_packets != NULL) |
656 | 0 | ctx->forward_packets(ctx, &offending_node_id, offending_thread_id, destaddr, srcaddr, ttl, packets, |
657 | 0 | num_packets); |
658 | 0 | } |
659 | | /* regardless of forwarding outcome, we need to drop this packet as it is not for us */ |
660 | 0 | return; |
661 | 0 | } |
662 | 0 | } |
663 | 0 | } |
664 | | |
665 | | /* receive packets to the found connection */ |
666 | 0 | for (size_t i = 0; i != num_packets; ++i) { |
667 | 0 | if (i != accepted_packet_index) { |
668 | 0 | quicly_error_t ret = quicly_receive(conn->quic, &destaddr->sa, &srcaddr->sa, packets + i); |
669 | 0 | switch (ret) { |
670 | 0 | case QUICLY_ERROR_STATE_EXHAUSTION: |
671 | 0 | case PTLS_ERROR_NO_MEMORY: |
672 | 0 | fprintf(stderr, "%s: `quicly_receive()` returned ret:%" PRId64 "\n", __func__, ret); |
673 | 0 | conn->callbacks->destroy_connection(conn); |
674 | 0 | return; |
675 | 0 | } |
676 | 0 | if (ret != QUICLY_ERROR_PACKET_IGNORED && ret != QUICLY_ERROR_DECRYPTION_FAILED) { |
677 | 0 | if (ctx->quic_stats != NULL) { |
678 | 0 | ++ctx->quic_stats->packet_processed; |
679 | 0 | } |
680 | 0 | } |
681 | 0 | } |
682 | 0 | } |
683 | | |
684 | 0 | h2o_quic_schedule_timer(conn); |
685 | 0 | if (ctx->notify_conn_update != NULL) |
686 | 0 | ctx->notify_conn_update(ctx, conn); |
687 | 0 | } |
688 | | |
689 | | void h2o_quic_read_socket(h2o_quic_ctx_t *ctx, h2o_socket_t *sock) |
690 | 0 | { |
691 | 0 | struct { |
692 | 0 | quicly_address_t destaddr, srcaddr; |
693 | 0 | struct iovec vec; |
694 | 0 | uint8_t ttl; |
695 | 0 | union { |
696 | 0 | struct cmsghdr _align; /* natrually align the contents of controlbuf (which are of type cmsghdr) */ |
697 | 0 | char controlbuf[ |
698 | 0 | #ifdef IPV6_PKTINFO |
699 | 0 | CMSG_SPACE(sizeof(struct in6_pktinfo)) |
700 | | #elif defined(IP_PKTINFO) |
701 | | CMSG_SPACE(sizeof(struct in_pktinfo)) |
702 | | #elif defined(IP_RECVDSTADDR) |
703 | | CMSG_SPACE(sizeof(struct in_addr)) |
704 | | #else |
705 | | CMSG_SPACE(1) |
706 | | #endif |
707 | 0 | ]; |
708 | 0 | }; |
709 | 0 | uint8_t buf[1600]; |
710 | 0 | } dgrams[10]; |
711 | 0 | #ifdef __linux__ |
712 | 0 | struct mmsghdr mess[PTLS_ELEMENTSOF(dgrams)]; |
713 | | #else |
714 | | struct { |
715 | | struct msghdr msg_hdr; |
716 | | } mess[PTLS_ELEMENTSOF(dgrams)]; |
717 | | #endif |
718 | |
|
719 | 0 | #define INIT_DGRAMS(i) \ |
720 | 0 | do { \ |
721 | 0 | mess[i].msg_hdr = (struct msghdr){ \ |
722 | 0 | .msg_name = &dgrams[i].srcaddr, \ |
723 | 0 | .msg_namelen = sizeof(dgrams[i].srcaddr), \ |
724 | 0 | .msg_iov = &dgrams[i].vec, \ |
725 | 0 | .msg_iovlen = 1, \ |
726 | 0 | .msg_control = &dgrams[i].controlbuf, \ |
727 | 0 | .msg_controllen = sizeof(dgrams[i].controlbuf), \ |
728 | 0 | }; \ |
729 | 0 | memset(&dgrams[i].destaddr, 0, sizeof(dgrams[i].destaddr)); \ |
730 | 0 | dgrams[i].vec.iov_base = dgrams[i].buf; \ |
731 | 0 | dgrams[i].vec.iov_len = sizeof(dgrams[i].buf); \ |
732 | 0 | } while (0) |
733 | |
|
734 | 0 | int fd = h2o_socket_get_fd(sock); |
735 | 0 | size_t dgram_index, num_dgrams; |
736 | | |
737 | | /* Read datagrams. Sender should be provided an ACK every fraction of RTT, otherwise its behavior becomes bursty (assuming that |
738 | | * pacing is not used), rather than packets being spread across entire round-trip. To minimize the chance of us entering such |
739 | | * situation, number of datagrams being read at once is limited to `PTLS_ELEMENTSOF(dgrams)`. In other words, one ack is |
740 | | * generated for no more than every 10 ack-eliciting packets being received, unless the ack-frequency extension is used. */ |
741 | 0 | #ifdef __linux__ |
742 | 0 | { |
743 | 0 | int rret; |
744 | 0 | do { |
745 | 0 | for (dgram_index = 0; dgram_index < PTLS_ELEMENTSOF(dgrams); ++dgram_index) |
746 | 0 | INIT_DGRAMS(dgram_index); |
747 | 0 | } while ((rret = recvmmsg(fd, mess, PTLS_ELEMENTSOF(mess), 0, NULL)) < 0 && errno == EINTR); |
748 | 0 | if (rret <= 0) |
749 | 0 | goto Exit; |
750 | 0 | num_dgrams = (size_t)rret; |
751 | 0 | for (dgram_index = 0; dgram_index < num_dgrams; ++dgram_index) |
752 | 0 | dgrams[dgram_index].vec.iov_len = (size_t)mess[dgram_index].msg_len; |
753 | 0 | } |
754 | | #else |
755 | | for (dgram_index = 0; dgram_index < PTLS_ELEMENTSOF(dgrams); ++dgram_index) { |
756 | | ssize_t rret; |
757 | | do { |
758 | | INIT_DGRAMS(dgram_index); |
759 | | } while ((rret = recvmsg(fd, &mess[dgram_index].msg_hdr, 0)) < 0 && errno == EINTR); |
760 | | if (rret < 0) |
761 | | break; |
762 | | dgrams[dgram_index].vec.iov_len = rret; |
763 | | } |
764 | | num_dgrams = dgram_index; |
765 | | if (num_dgrams == 0) |
766 | | goto Exit; |
767 | | #endif |
768 | | |
769 | | /* normalize and store the obtained data into `dgrams` */ |
770 | 0 | for (dgram_index = 0; dgram_index < num_dgrams; ++dgram_index) { |
771 | 0 | { /* fetch destination address */ |
772 | 0 | struct cmsghdr *cmsg; |
773 | 0 | for (cmsg = CMSG_FIRSTHDR(&mess[dgram_index].msg_hdr); cmsg != NULL; |
774 | 0 | cmsg = CMSG_NXTHDR(&mess[dgram_index].msg_hdr, cmsg)) { |
775 | 0 | #ifdef IP_PKTINFO |
776 | 0 | if (cmsg->cmsg_level == IPPROTO_IP && cmsg->cmsg_type == IP_PKTINFO) { |
777 | 0 | dgrams[dgram_index].destaddr.sin.sin_family = AF_INET; |
778 | 0 | memcpy(&dgrams[dgram_index].destaddr.sin.sin_addr, CMSG_DATA(cmsg) + offsetof(struct in_pktinfo, ipi_addr), |
779 | 0 | sizeof(struct in_addr)); |
780 | 0 | dgrams[dgram_index].destaddr.sin.sin_port = *ctx->sock.port; |
781 | 0 | goto DestAddrFound; |
782 | 0 | } |
783 | 0 | #endif |
784 | | #ifdef IP_RECVDSTADDR |
785 | | if (cmsg->cmsg_level == IPPROTO_IP && cmsg->cmsg_type == IP_RECVDSTADDR) { |
786 | | dgrams[dgram_index].destaddr.sin.sin_family = AF_INET; |
787 | | memcpy(&dgrams[dgram_index].destaddr.sin.sin_addr, CMSG_DATA(cmsg), sizeof(struct in_addr)); |
788 | | dgrams[dgram_index].destaddr.sin.sin_port = *ctx->sock.port; |
789 | | goto DestAddrFound; |
790 | | } |
791 | | #endif |
792 | 0 | #ifdef IPV6_PKTINFO |
793 | 0 | if (cmsg->cmsg_level == IPPROTO_IPV6 && cmsg->cmsg_type == IPV6_PKTINFO) { |
794 | 0 | dgrams[dgram_index].destaddr.sin6.sin6_family = AF_INET6; |
795 | 0 | memcpy(&dgrams[dgram_index].destaddr.sin6.sin6_addr, CMSG_DATA(cmsg) + offsetof(struct in6_pktinfo, ipi6_addr), |
796 | 0 | sizeof(struct in6_addr)); |
797 | 0 | dgrams[dgram_index].destaddr.sin6.sin6_port = *ctx->sock.port; |
798 | 0 | goto DestAddrFound; |
799 | 0 | } |
800 | 0 | #endif |
801 | 0 | } |
802 | 0 | dgrams[dgram_index].destaddr.sa.sa_family = AF_UNSPEC; |
803 | 0 | DestAddrFound:; |
804 | 0 | } |
805 | 0 | dgrams[dgram_index].ttl = ctx->default_ttl; |
806 | | /* preprocess (and drop the packet if it failed) */ |
807 | 0 | if (ctx->preprocess_packet != NULL && |
808 | 0 | !ctx->preprocess_packet(ctx, &mess[dgram_index].msg_hdr, &dgrams[dgram_index].destaddr, &dgrams[dgram_index].srcaddr, |
809 | 0 | &dgrams[dgram_index].ttl)) { |
810 | 0 | dgrams[dgram_index].vec.iov_len = 0; /* mark as unused */ |
811 | 0 | } else { |
812 | 0 | assert(dgrams[dgram_index].srcaddr.sa.sa_family == AF_INET || dgrams[dgram_index].srcaddr.sa.sa_family == AF_INET6); |
813 | 0 | } |
814 | 0 | } |
815 | | |
816 | | /* convert dgrams to decoded packets and process them in group of (4-tuple, dcid) */ |
817 | 0 | quicly_decoded_packet_t packets[64]; |
818 | 0 | size_t packet_index = 0; |
819 | 0 | dgram_index = 0; |
820 | 0 | while (dgram_index < num_dgrams) { |
821 | 0 | int has_decoded = 0; /* indicates if a decoded packet belonging to a different connection is stored at |
822 | | * `packets[packet_index]` */ |
823 | | /* skip zero-sized datagrams (or the ones for which preprocessing failed) */ |
824 | 0 | if (dgrams[dgram_index].vec.iov_len == 0) { |
825 | 0 | ++dgram_index; |
826 | 0 | continue; |
827 | 0 | } |
828 | | /* dispatch packets in `packets`, if the datagram at dgram_index is from a different path */ |
829 | 0 | if (packet_index != 0) { |
830 | 0 | assert(dgram_index != 0); |
831 | | /* check source address */ |
832 | 0 | if (h2o_socket_compare_address(&dgrams[dgram_index - 1].srcaddr.sa, &dgrams[dgram_index].srcaddr.sa, 1) != 0) |
833 | 0 | goto ProcessPackets; |
834 | | /* check destination address, if available */ |
835 | 0 | if (dgrams[dgram_index - 1].destaddr.sa.sa_family == AF_UNSPEC && |
836 | 0 | dgrams[dgram_index].destaddr.sa.sa_family == AF_UNSPEC) { |
837 | | /* ok */ |
838 | 0 | } else if (h2o_socket_compare_address(&dgrams[dgram_index - 1].destaddr.sa, &dgrams[dgram_index].destaddr.sa, 1) == 0) { |
839 | | /* ok */ |
840 | 0 | } else { |
841 | 0 | goto ProcessPackets; |
842 | 0 | } |
843 | | /* TTL should be same for dispatched packets */ |
844 | 0 | if (dgrams[dgram_index - 1].ttl != dgrams[dgram_index].ttl) |
845 | 0 | goto ProcessPackets; |
846 | 0 | } |
847 | | /* decode the first packet */ |
848 | 0 | size_t payload_off = 0; |
849 | 0 | if (quicly_decode_packet(ctx->quic, packets + packet_index, dgrams[dgram_index].vec.iov_base, |
850 | 0 | dgrams[dgram_index].vec.iov_len, &payload_off) == SIZE_MAX) { |
851 | 0 | ++dgram_index; |
852 | 0 | goto ProcessPackets; |
853 | 0 | } |
854 | | /* dispatch packets in `packets` if the DCID is different, setting the `has_decoded` flag */ |
855 | 0 | if (packet_index != 0) { |
856 | 0 | const ptls_iovec_t *prev_dcid = &packets[packet_index - 1].cid.dest.encrypted, |
857 | 0 | *cur_dcid = &packets[packet_index].cid.dest.encrypted; |
858 | 0 | if (!(prev_dcid->len == cur_dcid->len && memcmp(prev_dcid->base, cur_dcid->base, prev_dcid->len) == 0)) { |
859 | 0 | has_decoded = 1; |
860 | 0 | ++dgram_index; |
861 | 0 | goto ProcessPackets; |
862 | 0 | } |
863 | 0 | } |
864 | 0 | ++packet_index; |
865 | | /* add rest of the packets */ |
866 | 0 | while (payload_off < dgrams[dgram_index].vec.iov_len && packet_index < PTLS_ELEMENTSOF(packets)) { |
867 | 0 | if (quicly_decode_packet(ctx->quic, packets + packet_index, dgrams[dgram_index].vec.iov_base, |
868 | 0 | dgrams[dgram_index].vec.iov_len, &payload_off) == SIZE_MAX) |
869 | 0 | break; |
870 | 0 | ++packet_index; |
871 | 0 | } |
872 | 0 | ++dgram_index; |
873 | | /* if we have enough room for the next datagram, that is, the expected worst case of 4 packets in a coalesced datagram, |
874 | | * continue */ |
875 | 0 | if (packet_index + 4 < PTLS_ELEMENTSOF(packets)) |
876 | 0 | continue; |
877 | | |
878 | 0 | ProcessPackets: |
879 | 0 | if (packet_index != 0) { |
880 | 0 | process_packets(ctx, &dgrams[dgram_index - 1].destaddr, &dgrams[dgram_index - 1].srcaddr, dgrams[dgram_index - 1].ttl, |
881 | 0 | packets, packet_index); |
882 | 0 | if (has_decoded) { |
883 | 0 | packets[0] = packets[packet_index]; |
884 | 0 | packet_index = 1; |
885 | 0 | } else { |
886 | 0 | packet_index = 0; |
887 | 0 | } |
888 | 0 | } |
889 | 0 | } |
890 | 0 | if (packet_index != 0) |
891 | 0 | process_packets(ctx, &dgrams[dgram_index - 1].destaddr, &dgrams[dgram_index - 1].srcaddr, dgrams[dgram_index - 1].ttl, |
892 | 0 | packets, packet_index); |
893 | |
|
894 | 0 | Exit:; |
895 | |
|
896 | 0 | #undef INIT_DGRAMS |
897 | 0 | } |
898 | | |
899 | | static void on_read(h2o_socket_t *sock, const char *err) |
900 | 0 | { |
901 | 0 | h2o_quic_ctx_t *ctx = sock->data; |
902 | 0 | h2o_quic_read_socket(ctx, sock); |
903 | 0 | } |
904 | | |
905 | | static void on_timeout(h2o_timer_t *timeout) |
906 | 16.9k | { |
907 | 16.9k | h2o_quic_conn_t *conn = H2O_STRUCT_FROM_MEMBER(h2o_quic_conn_t, _timeout, timeout); |
908 | 16.9k | h2o_quic_send(conn); |
909 | 16.9k | } |
910 | | |
911 | | int h2o_http3_read_frame(h2o_http3_read_frame_t *frame, int is_client, uint64_t stream_type, size_t max_frame_payload_size, |
912 | | const uint8_t **_src, const uint8_t *src_end, const char **err_desc) |
913 | 23.6k | { |
914 | 23.6k | const uint8_t *src = *_src; |
915 | | |
916 | 23.6k | if ((frame->type = quicly_decodev(&src, src_end)) == UINT64_MAX) |
917 | 42 | return H2O_HTTP3_ERROR_INCOMPLETE; |
918 | 23.5k | if ((frame->length = quicly_decodev(&src, src_end)) == UINT64_MAX) |
919 | 86 | return H2O_HTTP3_ERROR_INCOMPLETE; |
920 | 23.4k | frame->_header_size = (uint8_t)(src - *_src); |
921 | | |
922 | | /* read the content of the frame (unless it's a DATA frame) */ |
923 | 23.4k | frame->payload = NULL; |
924 | 23.4k | if (frame->type != H2O_HTTP3_FRAME_TYPE_DATA) { |
925 | 7.58k | if (frame->length > max_frame_payload_size) { |
926 | 150 | H2O_PROBE(H3_FRAME_RECEIVE, frame->type, NULL, frame->length); |
927 | 150 | PTLS_LOG(h2o, h3_frame_receive, { |
928 | 150 | PTLS_LOG_ELEMENT_UNSIGNED(frame_type, frame->type); |
929 | 150 | PTLS_LOG_ELEMENT_UNSIGNED(payload_len, frame->length); |
930 | 150 | }); |
931 | 150 | *err_desc = h2o_http3_err_frame_too_large; |
932 | 150 | return H2O_HTTP3_ERROR_GENERAL_PROTOCOL; /* FIXME is this the correct code? */ |
933 | 150 | } |
934 | 7.43k | if (src_end - src < frame->length) |
935 | 48 | return H2O_HTTP3_ERROR_INCOMPLETE; |
936 | 7.38k | frame->payload = src; |
937 | 7.38k | src += frame->length; |
938 | 7.38k | } |
939 | | |
940 | 23.2k | H2O_PROBE(H3_FRAME_RECEIVE, frame->type, frame->payload, frame->length); |
941 | 23.2k | PTLS_LOG(h2o, h3_frame_receive, { |
942 | 23.2k | PTLS_LOG_ELEMENT_UNSIGNED(frame_type, frame->type); |
943 | 23.2k | if (frame->payload != NULL) { |
944 | 23.2k | PTLS_LOG_APPDATA_ELEMENT_HEXDUMP(payload, frame->payload, frame->length); |
945 | 23.2k | } else { |
946 | 23.2k | PTLS_LOG_ELEMENT_UNSIGNED(payload_len, frame->length); |
947 | 23.2k | } |
948 | 23.2k | }); |
949 | | |
950 | | /* validate frame type */ |
951 | 23.2k | switch (frame->type) { |
952 | 0 | #define FRAME(id, req_clnt, req_srvr, ctl_clnt, ctl_srvr) \ |
953 | 21.5k | case H2O_HTTP3_FRAME_TYPE_##id: \ |
954 | 21.5k | switch (stream_type) { \ |
955 | 21.5k | case H2O_HTTP3_STREAM_TYPE_REQUEST: \ |
956 | 21.5k | if (req_clnt && !is_client) \ |
957 | 21.5k | goto Validation_Success; \ |
958 | 21.5k | if (req_srvr && is_client) \ |
959 | 9 | goto Validation_Success; \ |
960 | 9 | break; \ |
961 | 9 | case H2O_HTTP3_STREAM_TYPE_CONTROL: \ |
962 | 0 | if (ctl_clnt && !is_client) \ |
963 | 0 | goto Validation_Success; \ |
964 | 0 | if (ctl_srvr && is_client) \ |
965 | 0 | goto Validation_Success; \ |
966 | 0 | break; \ |
967 | 0 | default: \ |
968 | 0 | h2o_fatal("unexpected stream type"); \ |
969 | 0 | break; \ |
970 | 21.5k | } \ |
971 | 21.5k | break |
972 | | /* clang-format off */ |
973 | | /* +-------------------------+-------------+-------------+ |
974 | | * | | req-stream | ctrl-stream | |
975 | | * | frame +------+------+------+------+ |
976 | | * | |client|server|client|server| |
977 | | * +-------------------------+------+------+------+------+ */ |
978 | 15.8k | FRAME( DATA , 1 , 1 , 0 , 0 ); |
979 | 5.64k | FRAME( HEADERS , 1 , 1 , 0 , 0 ); |
980 | 2 | FRAME( CANCEL_PUSH , 0 , 0 , 1 , 1 ); |
981 | 2 | FRAME( SETTINGS , 0 , 0 , 1 , 1 ); |
982 | 2 | FRAME( PUSH_PROMISE , 0 , 1 , 0 , 0 ); |
983 | 1 | FRAME( GOAWAY , 0 , 0 , 1 , 1 ); |
984 | 1 | FRAME( MAX_PUSH_ID , 0 , 0 , 1 , 0 ); |
985 | 1 | FRAME( PRIORITY_UPDATE_REQUEST , 0 , 0 , 1 , 0 ); |
986 | 1 | FRAME( PRIORITY_UPDATE_PUSH , 0 , 0 , 1 , 0 ); |
987 | | /* +-------------------------+------+------+------+------+ */ |
988 | | /* clang-format on */ |
989 | 1 | #undef FRAME |
990 | 1.73k | default: |
991 | | /* ignore extension frames that we do not handle */ |
992 | 1.73k | goto Validation_Success; |
993 | 23.2k | } |
994 | 9 | return H2O_HTTP3_ERROR_FRAME_UNEXPECTED; |
995 | 23.2k | Validation_Success:; |
996 | | |
997 | 23.2k | *_src = src; |
998 | 23.2k | return 0; |
999 | 23.2k | } |
1000 | | |
1001 | | void h2o_quic_init_context(h2o_quic_ctx_t *ctx, h2o_loop_t *loop, h2o_socket_t *sock, quicly_context_t *quic, |
1002 | | quicly_cid_plaintext_t *next_cid, h2o_quic_accept_cb acceptor, |
1003 | | h2o_quic_notify_connection_update_cb notify_conn_update, uint8_t use_gso, h2o_quic_stats_t *quic_stats) |
1004 | 0 | { |
1005 | 0 | assert(quic->stream_open != NULL); |
1006 | | |
1007 | 0 | *ctx = (h2o_quic_ctx_t){ |
1008 | 0 | .loop = loop, |
1009 | 0 | .sock = {.sock = sock}, |
1010 | 0 | .quic = quic, |
1011 | 0 | .next_cid = next_cid, |
1012 | 0 | .conns_by_id = kh_init_h2o_quic_idmap(), |
1013 | 0 | .conns_accepting = kh_init_h2o_quic_acceptmap(), |
1014 | 0 | .notify_conn_update = notify_conn_update, |
1015 | 0 | .acceptor = acceptor, |
1016 | 0 | .use_gso = use_gso, |
1017 | 0 | .quic_stats = quic_stats, |
1018 | 0 | }; |
1019 | 0 | ctx->sock.sock->data = ctx; |
1020 | 0 | ctx->sock.addrlen = h2o_socket_getsockname(ctx->sock.sock, (void *)&ctx->sock.addr); |
1021 | 0 | assert(ctx->sock.addrlen != 0); |
1022 | 0 | switch (ctx->sock.addr.ss_family) { |
1023 | 0 | case AF_INET: |
1024 | 0 | ctx->sock.port = &((struct sockaddr_in *)&ctx->sock.addr)->sin_port; |
1025 | 0 | break; |
1026 | 0 | case AF_INET6: |
1027 | 0 | ctx->sock.port = &((struct sockaddr_in6 *)&ctx->sock.addr)->sin6_port; |
1028 | 0 | break; |
1029 | 0 | default: |
1030 | 0 | assert(!"unexpected address family"); |
1031 | 0 | break; |
1032 | 0 | } |
1033 | | |
1034 | 0 | h2o_socket_read_start(ctx->sock.sock, on_read); |
1035 | 0 | } |
1036 | | |
1037 | | void h2o_quic_dispose_context(h2o_quic_ctx_t *ctx) |
1038 | 0 | { |
1039 | 0 | assert(kh_size(ctx->conns_by_id) == 0); |
1040 | 0 | assert(kh_size(ctx->conns_accepting) == 0); |
1041 | | |
1042 | 0 | h2o_socket_close(ctx->sock.sock); |
1043 | 0 | kh_destroy_h2o_quic_idmap(ctx->conns_by_id); |
1044 | 0 | kh_destroy_h2o_quic_acceptmap(ctx->conns_accepting); |
1045 | 0 | } |
1046 | | |
1047 | | void h2o_quic_set_forwarding_context(h2o_quic_ctx_t *ctx, uint32_t accept_thread_divisor, uint8_t ttl, |
1048 | | h2o_quic_forward_packets_cb forward_cb, h2o_quic_preprocess_packet_cb preprocess_cb) |
1049 | 0 | { |
1050 | 0 | ctx->accept_thread_divisor = accept_thread_divisor; |
1051 | 0 | ctx->forward_packets = forward_cb; |
1052 | 0 | ctx->default_ttl = ttl; |
1053 | 0 | ctx->preprocess_packet = preprocess_cb; |
1054 | 0 | } |
1055 | | |
1056 | | void h2o_quic_close_connection(h2o_quic_conn_t *conn, quicly_error_t err, const char *reason_phrase) |
1057 | 3.78k | { |
1058 | 3.78k | switch (quicly_get_state(conn->quic)) { |
1059 | 0 | case QUICLY_STATE_FIRSTFLIGHT: /* FIXME why is this separate? */ |
1060 | 0 | conn->callbacks->destroy_connection(conn); |
1061 | 0 | break; |
1062 | 3.78k | case QUICLY_STATE_CONNECTED: |
1063 | 3.78k | quicly_close(conn->quic, err, reason_phrase); |
1064 | 3.78k | h2o_quic_schedule_timer(conn); |
1065 | 3.78k | break; |
1066 | 0 | default: |
1067 | | /* only need to wait for the socket close */ |
1068 | 0 | break; |
1069 | 3.78k | } |
1070 | 3.78k | } |
1071 | | |
1072 | | void h2o_quic_close_all_connections(h2o_quic_ctx_t *ctx) |
1073 | 0 | { |
1074 | 0 | h2o_quic_conn_t *conn; |
1075 | |
|
1076 | 0 | kh_foreach_value(ctx->conns_by_id, conn, { h2o_quic_close_connection(conn, 0, NULL); }); |
1077 | | /* closing a connection should also remove an entry from conns_accepting */ |
1078 | 0 | assert(kh_size(ctx->conns_accepting) == 0); |
1079 | 0 | } |
1080 | | |
1081 | | size_t h2o_quic_num_connections(h2o_quic_ctx_t *ctx) |
1082 | 0 | { |
1083 | | /* throughout its lifetime, a connection is always registered to both conns_by_id and conns_accepting, |
1084 | | thus counting conns_by_id is enough */ |
1085 | 0 | return kh_size(ctx->conns_by_id); |
1086 | 0 | } |
1087 | | |
1088 | | void h2o_quic_init_conn(h2o_quic_conn_t *conn, h2o_quic_ctx_t *ctx, const h2o_quic_conn_callbacks_t *callbacks) |
1089 | 6.03k | { |
1090 | 6.03k | *conn = (h2o_quic_conn_t){ctx, NULL, callbacks}; |
1091 | 6.03k | h2o_timer_init(&conn->_timeout, on_timeout); |
1092 | 6.03k | } |
1093 | | |
1094 | | void h2o_quic_dispose_conn(h2o_quic_conn_t *conn) |
1095 | 6.03k | { |
1096 | 6.03k | if (conn->quic != NULL) { |
1097 | 6.03k | khiter_t iter; |
1098 | | /* unregister from maps */ |
1099 | 6.03k | if ((iter = kh_get_h2o_quic_idmap(conn->ctx->conns_by_id, quicly_get_master_id(conn->quic)->master_id)) != |
1100 | 6.03k | kh_end(conn->ctx->conns_by_id)) |
1101 | 6.03k | kh_del_h2o_quic_idmap(conn->ctx->conns_by_id, iter); |
1102 | 6.03k | drop_from_acceptmap(conn->ctx, conn); |
1103 | 6.03k | quicly_free(conn->quic); |
1104 | 6.03k | } |
1105 | 6.03k | h2o_timer_unlink(&conn->_timeout); |
1106 | 6.03k | } |
1107 | | |
1108 | | void h2o_quic_setup(h2o_quic_conn_t *conn, quicly_conn_t *quic) |
1109 | 6.03k | { |
1110 | | /* Setup relation between `h2o_quic_conn_t` and `quicly_conn_t`. At this point, `conn` will not have `quic` associated, though |
1111 | | * the back pointer might have alreday been set up (see how we call `quicly_accept`). */ |
1112 | 6.03k | assert(conn->quic == NULL); |
1113 | 6.03k | void **backptr = quicly_get_data(quic); |
1114 | 6.03k | if (*backptr == NULL) { |
1115 | 6.03k | *backptr = conn; |
1116 | 6.03k | } else { |
1117 | 0 | assert(*backptr == conn); |
1118 | 0 | } |
1119 | 6.03k | conn->quic = quic; |
1120 | | |
1121 | | /* register to the idmap */ |
1122 | 6.03k | int r; |
1123 | 6.03k | khiter_t iter = kh_put_h2o_quic_idmap(conn->ctx->conns_by_id, quicly_get_master_id(conn->quic)->master_id, &r); |
1124 | 6.03k | assert(iter != kh_end(conn->ctx->conns_by_id)); |
1125 | 6.03k | kh_val(conn->ctx->conns_by_id, iter) = conn; |
1126 | 6.03k | } |
1127 | | |
1128 | | void h2o_http3_init_conn(h2o_http3_conn_t *conn, h2o_quic_ctx_t *ctx, const h2o_http3_conn_callbacks_t *callbacks, |
1129 | | const h2o_http3_qpack_context_t *qpack_ctx, size_t max_frame_payload_size) |
1130 | 6.03k | { |
1131 | 6.03k | h2o_quic_init_conn(&conn->super, ctx, &callbacks->super); |
1132 | 6.03k | memset((char *)conn + sizeof(conn->super), 0, sizeof(*conn) - sizeof(conn->super)); |
1133 | 6.03k | conn->qpack.ctx = qpack_ctx; |
1134 | 6.03k | conn->max_frame_payload_size = max_frame_payload_size; |
1135 | 6.03k | } |
1136 | | |
1137 | | void h2o_http3_dispose_conn(h2o_http3_conn_t *conn) |
1138 | 6.03k | { |
1139 | 6.03k | if (conn->qpack.dec != NULL) |
1140 | 6.03k | h2o_qpack_destroy_decoder(conn->qpack.dec); |
1141 | 6.03k | if (conn->qpack.enc != NULL) |
1142 | 0 | h2o_qpack_destroy_encoder(conn->qpack.enc); |
1143 | 6.03k | h2o_quic_dispose_conn(&conn->super); |
1144 | 6.03k | } |
1145 | | |
1146 | | static size_t build_firstflight(h2o_http3_conn_t *conn, uint8_t *bytebuf, size_t capacity) |
1147 | 6.03k | { |
1148 | 6.03k | ptls_buffer_t buf; |
1149 | 6.03k | int ret = 0; |
1150 | | |
1151 | 6.03k | ptls_buffer_init(&buf, bytebuf, capacity); |
1152 | | |
1153 | | /* push stream type */ |
1154 | 6.03k | ptls_buffer_push_quicint(&buf, H2O_HTTP3_STREAM_TYPE_CONTROL); |
1155 | | |
1156 | | /* push SETTINGS frame */ |
1157 | 6.03k | ptls_buffer_push_quicint(&buf, H2O_HTTP3_FRAME_TYPE_SETTINGS); |
1158 | 6.03k | ptls_buffer_push_block(&buf, -1, { |
1159 | 6.03k | quicly_context_t *qctx = quicly_get_context(conn->super.quic); |
1160 | 6.03k | if (qctx->transport_params.max_datagram_frame_size != 0) { |
1161 | | // advertise that we are prepared to receive both RFC and draft-03 datagram formats |
1162 | 6.03k | ptls_buffer_push_quicint(&buf, H2O_HTTP3_SETTINGS_H3_DATAGRAM); |
1163 | 6.03k | ptls_buffer_push_quicint(&buf, 1); |
1164 | 6.03k | ptls_buffer_push_quicint(&buf, H2O_HTTP3_SETTINGS_H3_DATAGRAM_DRAFT03); |
1165 | 6.03k | ptls_buffer_push_quicint(&buf, 1); |
1166 | 6.03k | }; |
1167 | 6.03k | ptls_buffer_push_quicint(&buf, H2O_HTTP3_SETTINGS_ENABLE_CONNECT_PROTOCOL); |
1168 | 6.03k | ptls_buffer_push_quicint(&buf, 1); |
1169 | 6.03k | }); |
1170 | | |
1171 | 6.03k | assert(!buf.is_allocated); |
1172 | 6.03k | return buf.off; |
1173 | | |
1174 | 0 | Exit: |
1175 | 0 | h2o_fatal("unreachable"); |
1176 | 6.03k | } |
1177 | | |
1178 | | quicly_error_t h2o_http3_setup(h2o_http3_conn_t *conn, quicly_conn_t *quic) |
1179 | 6.03k | { |
1180 | 6.03k | quicly_error_t ret; |
1181 | | |
1182 | 6.03k | h2o_quic_setup(&conn->super, quic); |
1183 | 6.03k | conn->state = H2O_HTTP3_CONN_STATE_OPEN; |
1184 | | |
1185 | | /* setup h3 objects, only when the connection state has been created */ |
1186 | 6.03k | if (quicly_get_state(quic) > QUICLY_STATE_CONNECTED) |
1187 | 0 | goto Exit; |
1188 | | |
1189 | | /* create decoder with the table size set to zero; see SETTINGS sent below. */ |
1190 | 6.03k | conn->qpack.dec = h2o_qpack_create_decoder(0, 100 /* FIXME */); |
1191 | | |
1192 | 6.03k | { /* open control streams, send SETTINGS */ |
1193 | 6.03k | uint8_t firstflight[32]; |
1194 | 6.03k | size_t firstflight_len = build_firstflight(conn, firstflight, sizeof(firstflight)); |
1195 | 6.03k | if ((ret = open_egress_unistream(conn, &conn->_control_streams.egress.control, |
1196 | 6.03k | h2o_iovec_init(firstflight, firstflight_len))) != 0) |
1197 | 0 | return ret; |
1198 | 6.03k | } |
1199 | | |
1200 | 6.03k | { /* open QPACK encoder & decoder streams */ |
1201 | 6.03k | static const uint8_t encoder_first_flight[] = {H2O_HTTP3_STREAM_TYPE_QPACK_ENCODER}; |
1202 | 6.03k | static const uint8_t decoder_first_flight[] = {H2O_HTTP3_STREAM_TYPE_QPACK_DECODER}; |
1203 | 6.03k | if ((ret = open_egress_unistream(conn, &conn->_control_streams.egress.qpack_encoder, |
1204 | 6.03k | h2o_iovec_init(encoder_first_flight, sizeof(encoder_first_flight)))) != 0 || |
1205 | 6.03k | (ret = open_egress_unistream(conn, &conn->_control_streams.egress.qpack_decoder, |
1206 | 6.03k | h2o_iovec_init(decoder_first_flight, sizeof(decoder_first_flight)))) != 0) |
1207 | 0 | return ret; |
1208 | 6.03k | } |
1209 | | |
1210 | 6.03k | Exit: |
1211 | 6.03k | h2o_quic_schedule_timer(&conn->super); |
1212 | 6.03k | return 0; |
1213 | 6.03k | } |
1214 | | |
1215 | | quicly_error_t h2o_quic_send(h2o_quic_conn_t *conn) |
1216 | 22.9k | { |
1217 | 22.9k | quicly_address_t dest, src; |
1218 | 22.9k | struct iovec datagrams[10]; |
1219 | 22.9k | size_t num_datagrams = PTLS_ELEMENTSOF(datagrams); |
1220 | 22.9k | uint8_t datagram_buf[1500 * PTLS_ELEMENTSOF(datagrams)]; |
1221 | | |
1222 | 22.9k | quicly_error_t ret = quicly_send(conn->quic, &dest, &src, datagrams, &num_datagrams, datagram_buf, sizeof(datagram_buf)); |
1223 | 22.9k | switch (ret) { |
1224 | 16.9k | case 0: |
1225 | 16.9k | if (num_datagrams != 0 && !h2o_quic_send_datagrams(conn->ctx, &dest, &src, datagrams, num_datagrams)) { |
1226 | | /* FIXME close the connection immediately */ |
1227 | 0 | break; |
1228 | 0 | } |
1229 | 16.9k | break; |
1230 | 16.9k | case QUICLY_ERROR_STATE_EXHAUSTION: |
1231 | 6.03k | case QUICLY_ERROR_FREE_CONNECTION: |
1232 | 6.03k | conn->callbacks->destroy_connection(conn); |
1233 | 6.03k | return 0; |
1234 | 0 | default: |
1235 | 0 | h2o_fatal("quicly_send returned %" PRId64, ret); |
1236 | 22.9k | } |
1237 | | |
1238 | 16.9k | h2o_quic_schedule_timer(conn); |
1239 | | |
1240 | 16.9k | return 1; |
1241 | 22.9k | } |
1242 | | |
1243 | | void h2o_http3_update_recvbuf(h2o_buffer_t **buf, size_t off, const void *src, size_t len) |
1244 | 6.03k | { |
1245 | 6.03k | size_t new_size = off + len; |
1246 | | |
1247 | 6.03k | if ((*buf)->size < new_size) { |
1248 | 6.03k | h2o_buffer_reserve(buf, new_size - (*buf)->size); |
1249 | 6.03k | (*buf)->size = new_size; |
1250 | 6.03k | } |
1251 | 6.03k | memcpy((*buf)->bytes + off, src, len); |
1252 | 6.03k | } |
1253 | | |
1254 | | void h2o_quic_schedule_timer(h2o_quic_conn_t *conn) |
1255 | 30.5k | { |
1256 | 30.5k | int64_t timeout = quicly_get_first_timeout(conn->quic); |
1257 | 30.5k | if (h2o_timer_is_linked(&conn->_timeout)) { |
1258 | | #if !H2O_USE_LIBUV /* optimization to skip registering a timer specifying the same time */ |
1259 | 13.6k | if (timeout == conn->_timeout.expire_at) |
1260 | 13.5k | return; |
1261 | 108 | #endif |
1262 | 108 | h2o_timer_unlink(&conn->_timeout); |
1263 | 108 | } |
1264 | 17.0k | uint64_t now = h2o_now(conn->ctx->loop), delay = now < timeout ? timeout - now : 0; |
1265 | 17.0k | h2o_timer_link(conn->ctx->loop, delay, &conn->_timeout); |
1266 | 17.0k | } |
1267 | | |
1268 | | int h2o_http3_handle_settings_frame(h2o_http3_conn_t *conn, const uint8_t *payload, size_t length, const char **err_desc) |
1269 | 0 | { |
1270 | 0 | const uint8_t *src = payload, *src_end = src + length; |
1271 | 0 | uint32_t header_table_size = 0; |
1272 | 0 | uint64_t blocked_streams = 0; |
1273 | |
|
1274 | 0 | assert(!h2o_http3_has_received_settings(conn)); |
1275 | | |
1276 | 0 | while (src != src_end) { |
1277 | 0 | uint64_t id; |
1278 | 0 | uint64_t value; |
1279 | 0 | if ((id = quicly_decodev(&src, src_end)) == UINT64_MAX) |
1280 | 0 | goto Malformed; |
1281 | 0 | if ((value = quicly_decodev(&src, src_end)) == UINT64_MAX) |
1282 | 0 | goto Malformed; |
1283 | 0 | switch (id) { |
1284 | 0 | case H2O_HTTP3_SETTINGS_MAX_FIELD_SECTION_SIZE: |
1285 | 0 | conn->peer_settings.max_field_section_size = value; |
1286 | 0 | break; |
1287 | 0 | case H2O_HTTP3_SETTINGS_QPACK_MAX_TABLE_CAPACITY: |
1288 | 0 | header_table_size = |
1289 | 0 | value < conn->qpack.ctx->encoder_table_capacity ? (uint32_t)value : conn->qpack.ctx->encoder_table_capacity; |
1290 | 0 | break; |
1291 | 0 | case H2O_HTTP3_SETTINGS_QPACK_BLOCKED_STREAMS: |
1292 | 0 | blocked_streams = value; |
1293 | 0 | break; |
1294 | 0 | case H2O_HTTP3_SETTINGS_H3_DATAGRAM: |
1295 | 0 | case H2O_HTTP3_SETTINGS_H3_DATAGRAM_DRAFT03: |
1296 | 0 | switch (value) { |
1297 | 0 | case 0: |
1298 | 0 | break; |
1299 | 0 | case 1: { |
1300 | 0 | const quicly_transport_parameters_t *remote_tp = quicly_get_remote_transport_parameters(conn->super.quic); |
1301 | 0 | if (remote_tp->max_datagram_frame_size == 0) |
1302 | 0 | goto Malformed; |
1303 | 0 | conn->peer_settings.h3_datagram = 1; |
1304 | 0 | } break; |
1305 | 0 | default: |
1306 | 0 | goto Malformed; |
1307 | 0 | } |
1308 | 0 | break; |
1309 | 0 | default: |
1310 | 0 | break; |
1311 | 0 | } |
1312 | 0 | } |
1313 | | |
1314 | 0 | conn->qpack.enc = h2o_qpack_create_encoder(header_table_size, blocked_streams); |
1315 | 0 | return 0; |
1316 | 0 | Malformed: |
1317 | 0 | *err_desc = "malformed SETTINGS frame"; |
1318 | 0 | return H2O_HTTP3_ERROR_FRAME; |
1319 | 0 | } |
1320 | | |
1321 | | void h2o_http3_send_qpack_stream_cancel(h2o_http3_conn_t *conn, quicly_stream_id_t stream_id) |
1322 | 0 | { |
1323 | 0 | struct st_h2o_http3_egress_unistream_t *stream = conn->_control_streams.egress.qpack_decoder; |
1324 | | |
1325 | | /* allocate and write */ |
1326 | 0 | h2o_iovec_t buf = h2o_buffer_reserve(&stream->sendbuf, stream->sendbuf->size + H2O_HPACK_ENCODE_INT_MAX_LENGTH); |
1327 | 0 | assert(buf.base != NULL); |
1328 | 0 | stream->sendbuf->size += h2o_qpack_decoder_send_stream_cancel(conn->qpack.dec, (uint8_t *)buf.base, stream_id); |
1329 | | |
1330 | | /* notify the transport */ |
1331 | 0 | H2O_HTTP3_CHECK_SUCCESS(quicly_stream_sync_sendbuf(stream->quic, 1) == 0); |
1332 | 0 | } |
1333 | | |
1334 | | void h2o_http3_send_qpack_header_ack(h2o_http3_conn_t *conn, const void *bytes, size_t len) |
1335 | 0 | { |
1336 | 0 | struct st_h2o_http3_egress_unistream_t *stream = conn->_control_streams.egress.qpack_encoder; |
1337 | |
|
1338 | 0 | assert(stream != NULL); |
1339 | 0 | h2o_buffer_append(&stream->sendbuf, bytes, len); |
1340 | 0 | H2O_HTTP3_CHECK_SUCCESS(quicly_stream_sync_sendbuf(stream->quic, 1)); |
1341 | 0 | } |
1342 | | |
1343 | | void h2o_http3_send_shutdown_goaway_frame(h2o_http3_conn_t *conn) |
1344 | 0 | { |
1345 | | /* There is a moment where the transport-level close has been initiated while st_h2o_http3_server_conn_t remains. |
1346 | | * Check QUIC connection state to skip sending GOAWAY in such a case. */ |
1347 | 0 | if (conn->state < H2O_HTTP3_CONN_STATE_HALF_CLOSED && quicly_get_state(conn->super.quic) == QUICLY_STATE_CONNECTED) { |
1348 | | /* advertise the maximum stream ID to indicate that we will no longer accept new requests. |
1349 | | * HTTP/3 draft section 5.2.8 -- |
1350 | | * "An endpoint that is attempting to gracefully shut down a connection can send a GOAWAY frame with a value set to the |
1351 | | * maximum possible value (2^62-4 for servers, 2^62-1 for clients). This ensures that the peer stops creating new |
1352 | | * requests or pushes." */ |
1353 | 0 | h2o_http3_send_goaway_frame(conn, (UINT64_C(1) << 62) - 4); |
1354 | 0 | } |
1355 | 0 | } |
1356 | | |
1357 | | void h2o_http3_send_goaway_frame(h2o_http3_conn_t *conn, uint64_t stream_or_push_id) |
1358 | 0 | { |
1359 | 0 | size_t cap = h2o_http3_goaway_frame_capacity(stream_or_push_id); |
1360 | 0 | h2o_iovec_t alloced = h2o_buffer_reserve(&conn->_control_streams.egress.control->sendbuf, cap); |
1361 | 0 | h2o_http3_encode_goaway_frame((uint8_t *)alloced.base, stream_or_push_id); |
1362 | 0 | conn->_control_streams.egress.control->sendbuf->size += cap; |
1363 | 0 | quicly_stream_sync_sendbuf(conn->_control_streams.egress.control->quic, 1); |
1364 | 0 | } |
1365 | | |
1366 | | void h2o_http3_send_h3_datagrams(h2o_http3_conn_t *conn, uint64_t flow_id, h2o_iovec_t *datagrams, size_t num_datagrams) |
1367 | 0 | { |
1368 | 0 | for (size_t i = 0; i < num_datagrams; ++i) { |
1369 | 0 | h2o_iovec_t *src = datagrams + i; |
1370 | 0 | uint8_t buf[quicly_encodev_capacity(flow_id) + src->len], *p = buf; |
1371 | 0 | p = quicly_encodev(p, flow_id); |
1372 | 0 | memcpy(p, src->base, src->len); |
1373 | 0 | p += src->len; |
1374 | 0 | ptls_iovec_t payload = ptls_iovec_init(buf, p - buf); |
1375 | 0 | quicly_send_datagram_frames(conn->super.quic, &payload, 1); |
1376 | 0 | } |
1377 | |
|
1378 | 0 | h2o_quic_schedule_timer(&conn->super); |
1379 | 0 | } |
1380 | | |
1381 | | uint64_t h2o_http3_decode_h3_datagram(h2o_iovec_t *payload, const void *_src, size_t len) |
1382 | 0 | { |
1383 | 0 | const uint8_t *src = _src, *end = src + len; |
1384 | 0 | uint64_t flow_id; |
1385 | |
|
1386 | 0 | if ((flow_id = ptls_decode_quicint(&src, end)) != UINT64_MAX) |
1387 | 0 | *payload = h2o_iovec_init(src, end - src); |
1388 | 0 | return flow_id; |
1389 | 0 | } |