Coverage Report

Created: 2026-05-30 06:23

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/h2o/lib/http3/common.c
Line
Count
Source
1
/*
2
 * Copyright (c) 2018 Fastly, Kazuho Oku
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a copy
5
 * of this software and associated documentation files (the "Software"), to
6
 * deal in the Software without restriction, including without limitation the
7
 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8
 * sell copies of the Software, and to permit persons to whom the Software is
9
 * furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice shall be included in
12
 * all copies or substantial portions of the Software.
13
 *
14
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
20
 * IN THE SOFTWARE.
21
 */
22
#ifdef __APPLE__
23
#define __APPLE_USE_RFC_3542 /* to use IPV6_PKTINFO */
24
#endif
25
#include <errno.h>
26
#include <sys/types.h>
27
#include <netinet/in.h>
28
#include <netinet/ip.h>
29
#include <netinet/udp.h>
30
#include <pthread.h>
31
#include <stdio.h>
32
#include <sys/socket.h>
33
#include <unistd.h>
34
#include "picotls/openssl.h"
35
#include "h2o.h"
36
#include "h2o/string_.h"
37
#include "h2o/http3_common.h"
38
#include "h2o/http3_internal.h"
39
#include "h2o/multithread.h"
40
#include "../probes_.h"
41
42
h2o_quic_conn_t h2o_quic_accept_conn_decryption_failed;
43
h2o_http3_conn_t h2o_http3_accept_conn_closed;
44
45
struct st_h2o_http3_ingress_unistream_t {
46
    /**
47
     * back pointer
48
     */
49
    quicly_stream_t *quic;
50
    /**
51
     *
52
     */
53
    h2o_buffer_t *recvbuf;
54
    /**
55
     * A callback that passes unparsed input to be handled. `src` is set to NULL when receiving a reset.
56
     */
57
    void (*handle_input)(h2o_http3_conn_t *conn, struct st_h2o_http3_ingress_unistream_t *stream, const uint8_t **src,
58
                         const uint8_t *src_end, int is_eos);
59
};
60
61
const char h2o_http3_err_frame_too_large[] = "HTTP/3 frame is too large";
62
63
const ptls_iovec_t h2o_http3_alpn[3] = {{(void *)H2O_STRLIT("h3")}, {(void *)H2O_STRLIT("h3-29")}, {(void *)H2O_STRLIT("h3-27")}};
64
65
static void report_sendmsg_errors(h2o_error_reporter_t *reporter, uint64_t total_successes, uint64_t cur_successes)
66
0
{
67
0
    char errstr[256];
68
0
    fprintf(stderr, "sendmsg failed %" PRIu64 " time%s, succeeded: %" PRIu64 " time%s, over the last minute: %s\n",
69
0
            reporter->cur_errors, reporter->cur_errors > 1 ? "s" : "", cur_successes, cur_successes > 1 ? "s" : "",
70
0
            h2o_strerror_r((int)reporter->data, errstr, sizeof(errstr)));
71
0
}
72
73
static h2o_error_reporter_t track_sendmsg = H2O_ERROR_REPORTER_INITIALIZER(report_sendmsg_errors);
74
75
#if !H2O_USE_LIBUV
76
h2o_socket_t *h2o_quic_create_client_socket(h2o_loop_t *loop, int family)
77
0
{
78
0
    int fd, on = 1;
79
0
    quicly_address_t addr = {.sa.sa_family = family};
80
0
    socklen_t addrlen;
81
82
0
    if ((fd = socket(family, SOCK_DGRAM, 0)) == -1)
83
0
        return NULL;
84
85
0
    switch (family) {
86
0
    case AF_INET:
87
0
#ifdef IP_RECVTOS
88
0
        setsockopt(fd, IPPROTO_IP, IP_RECVTOS, &on, sizeof(on));
89
0
#endif
90
0
        addrlen = sizeof(addr.sin);
91
0
        break;
92
0
    case AF_INET6:
93
0
#ifdef IPV6_V6ONLY
94
0
        if (setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &on, sizeof(on)) != 0)
95
0
            goto Error;
96
0
#endif
97
0
#ifdef IPV6_RECVTCLASS
98
0
        setsockopt(fd, IPPROTO_IPV6, IPV6_RECVTCLASS, &on, sizeof(on));
99
0
#endif
100
0
        addrlen = sizeof(addr.sin6);
101
0
        break;
102
0
    default:
103
0
        assert(!"unexpected address family");
104
0
        goto Error;
105
0
    }
106
107
0
    if (bind(fd, &addr.sa, addrlen) != 0)
108
0
        goto Error;
109
110
0
    return h2o_evloop_socket_create(loop, fd, H2O_SOCKET_FLAG_DONT_READ);
111
112
0
Error: {
113
0
    int saved_errno = errno;
114
0
    close(fd);
115
0
    errno = saved_errno;
116
0
}
117
0
    return NULL;
118
0
}
119
#endif
120
121
int h2o_quic_send_datagrams(h2o_quic_ctx_t *ctx, quicly_address_t *dest, quicly_address_t *src, struct iovec *datagrams,
122
                            size_t num_datagrams, uint8_t ecn)
123
0
{
124
0
    union {
125
0
        struct cmsghdr hdr;
126
0
        char buf[
127
0
#ifdef IPV6_PKTINFO
128
0
            CMSG_SPACE(sizeof(struct in6_pktinfo))
129
#elif defined(IP_PKTINFO)
130
            CMSG_SPACE(sizeof(struct in_pktinfo))
131
#elif defined(IP_SENDSRCADDR)
132
            CMSG_SPACE(sizeof(struct in_addr))
133
#else
134
            CMSG_SPACE(1)
135
#endif
136
0
#ifdef UDP_SEGMENT
137
0
            + CMSG_SPACE(sizeof(uint16_t))
138
0
#endif
139
            + CMSG_SPACE(sizeof(int)) /* IP_TOS or IPV6_TCLASS */
140
            + CMSG_SPACE(1)           /* sentry */
141
0
        ];
142
0
    } cmsgbuf = {.buf = {} /* zero-cleared so that CMSG_NXTHDR can be used for locating the *next* cmsghdr */};
143
0
    struct msghdr mess = {
144
0
        .msg_name = &dest->sa,
145
0
        .msg_namelen = quicly_get_socklen(&dest->sa),
146
0
        .msg_control = cmsgbuf.buf,
147
0
        .msg_controllen = sizeof(cmsgbuf.buf),
148
0
    };
149
0
    struct cmsghdr *cmsg = CMSG_FIRSTHDR(&mess);
150
0
    h2o_quic_socket_t *sock;
151
0
    int ret;
152
153
0
#define PUSH_CMSG(level, type, value)                                                                                              \
154
0
    do {                                                                                                                           \
155
0
        cmsg->cmsg_level = (level);                                                                                                \
156
0
        cmsg->cmsg_type = (type);                                                                                                  \
157
0
        cmsg->cmsg_len = CMSG_LEN(sizeof(value));                                                                                  \
158
0
        memcpy(CMSG_DATA(cmsg), &value, sizeof(value));                                                                            \
159
0
        cmsg = CMSG_NXTHDR(&mess, cmsg);                                                                                           \
160
0
    } while (0)
161
162
0
    if (ctx->sock.addr.ss_family == dest->sa.sa_family) {
163
0
        sock = &ctx->sock;
164
0
    } else if (ctx->sock_alt_family.sock != NULL && ctx->sock_alt_family.addr.ss_family == dest->sa.sa_family) {
165
0
        sock = &ctx->sock_alt_family;
166
0
    } else {
167
0
        return 0;
168
0
    }
169
170
    /* first CMSG is the source address */
171
0
    if (src->sa.sa_family != AF_UNSPEC) {
172
0
        switch (src->sa.sa_family) {
173
0
        case AF_INET: {
174
0
#if defined(IP_PKTINFO)
175
0
            if (*sock->port != src->sin.sin_port)
176
0
                return 0;
177
0
            struct in_pktinfo info = {.ipi_spec_dst = src->sin.sin_addr};
178
0
            PUSH_CMSG(IPPROTO_IP, IP_PKTINFO, info);
179
#elif defined(IP_SENDSRCADDR)
180
            if (*sock->port != src->sin.sin_port)
181
                return 0;
182
            struct sockaddr_in *fdaddr = (struct sockaddr_in *)&sock->addr;
183
            assert(fdaddr->sin_family == AF_INET);
184
            if (fdaddr->sin_addr.s_addr == INADDR_ANY)
185
                PUSH_CMSG(IPPROTO_IP, IP_SENDSRCADDR, src->sin.sin_addr);
186
#else
187
            h2o_fatal("IP_PKTINFO not available");
188
#endif
189
0
        } break;
190
0
        case AF_INET6:
191
0
#ifdef IPV6_PKTINFO
192
0
            if (*sock->port != src->sin6.sin6_port)
193
0
                return 0;
194
0
            struct in6_pktinfo info = {.ipi6_addr = src->sin6.sin6_addr};
195
0
            PUSH_CMSG(IPPROTO_IPV6, IPV6_PKTINFO, info);
196
#else
197
            h2o_fatal("IPV6_PKTINFO not available");
198
#endif
199
0
            break;
200
0
        default:
201
0
            h2o_fatal("unexpected address family");
202
0
            break;
203
0
        }
204
0
    }
205
206
    /* next CMSG is UDP_SEGMENT size (for GSO); assert that the input follows the expected pattern (see the doc-comment of
207
     * `quicly_send`), then set the CMSG and convert `datagrams` into one. */
208
0
    for (size_t i = 1; i < num_datagrams; ++i) {
209
0
        assert(datagrams[i - 1].iov_base + datagrams[i - 1].iov_len == datagrams[i].iov_base);
210
0
        assert(i == num_datagrams - 1 || datagrams[i].iov_len == datagrams[0].iov_len);
211
0
    }
212
0
#ifdef UDP_SEGMENT
213
0
    struct iovec gso_iovec;
214
0
    if (num_datagrams > 1 && ctx->use_gso) {
215
0
        uint16_t segsize = (uint16_t)datagrams[0].iov_len;
216
0
        PUSH_CMSG(SOL_UDP, UDP_SEGMENT, segsize);
217
0
        gso_iovec = (struct iovec){
218
0
            .iov_base = datagrams[0].iov_base,
219
0
            .iov_len = datagrams[num_datagrams - 1].iov_base + datagrams[num_datagrams - 1].iov_len - datagrams[0].iov_base,
220
0
        };
221
0
        datagrams = &gso_iovec;
222
0
        num_datagrams = 1;
223
0
    }
224
0
#endif
225
226
0
    if (ecn != 0) {
227
0
        int tos = ecn; /* IPV6_TCLASS uses int, and draft-ietf-tsvwg-udp-ecn-05 says IP_TOS assumes int too, on all platforms */
228
0
        switch (dest->sa.sa_family) {
229
0
        case AF_INET:
230
0
            PUSH_CMSG(IPPROTO_IP, IP_TOS, tos);
231
0
            break;
232
0
        case AF_INET6:
233
0
#ifdef IPV6_TCLASS
234
0
            PUSH_CMSG(IPPROTO_IPV6, IPV6_TCLASS, tos);
235
0
#endif
236
0
            break;
237
0
        default:
238
0
            break;
239
0
        }
240
0
    }
241
242
    /* commit CMSG length */
243
0
    if ((mess.msg_controllen = (socklen_t)((char *)cmsg - (char *)cmsgbuf.buf)) == 0)
244
0
        mess.msg_control = NULL;
245
246
    /* send datagrams */
247
0
    for (size_t i = 0; i < num_datagrams; ++i) {
248
0
        mess.msg_iov = datagrams + i;
249
0
        mess.msg_iovlen = 1;
250
0
        while ((ret = (int)sendmsg(h2o_socket_get_fd(sock->sock), &mess, 0)) == -1 && errno == EINTR)
251
0
            ;
252
0
        if (ret == -1)
253
0
            goto SendmsgError;
254
0
    }
255
256
0
    h2o_error_reporter_record_success(&track_sendmsg);
257
258
0
    return 1;
259
260
0
SendmsgError:
261
    /* The UDP stack returns EINVAL (linux) or EADDRNOTAVAIL (darwin, and presumably other BSD) when it was unable to use the
262
     * designated source address.  We communicate that back to the caller so that the connection can be closed immediately. */
263
0
    if (src->sa.sa_family != AF_UNSPEC && (errno == EINVAL || errno == EADDRNOTAVAIL))
264
0
        return 0;
265
266
    /* Temporary failure to send a packet is not a permanent error fo the connection. (TODO do we want do something more
267
     * specific?) */
268
269
    /* Log the number of failed invocations once per minute, if there has been such a failure. */
270
0
    h2o_error_reporter_record_error(ctx->loop, &track_sendmsg, 60000, errno);
271
272
0
    return 1;
273
274
0
#undef PUSH_CMSG
275
0
}
276
277
static inline const h2o_http3_conn_callbacks_t *get_callbacks(h2o_http3_conn_t *conn)
278
0
{
279
0
    return (const h2o_http3_conn_callbacks_t *)conn->super.callbacks;
280
0
}
281
282
static void ingress_unistream_on_destroy(quicly_stream_t *qs, quicly_error_t err)
283
0
{
284
0
    struct st_h2o_http3_ingress_unistream_t *stream = qs->data;
285
0
    h2o_buffer_dispose(&stream->recvbuf);
286
0
    free(stream);
287
0
}
288
289
static void ingress_unistream_on_receive(quicly_stream_t *qs, size_t off, const void *input, size_t len)
290
0
{
291
0
    h2o_http3_conn_t *conn = *quicly_get_data(qs->conn);
292
0
    struct st_h2o_http3_ingress_unistream_t *stream = qs->data;
293
294
    /* save received data */
295
0
    h2o_http3_update_recvbuf(&stream->recvbuf, off, input, len);
296
297
    /* determine bytes that can be handled */
298
0
    const uint8_t *src = (const uint8_t *)stream->recvbuf->bytes,
299
0
                  *src_end = src + quicly_recvstate_bytes_available(&stream->quic->recvstate);
300
0
    if (src == src_end && !quicly_recvstate_transfer_complete(&stream->quic->recvstate))
301
0
        return;
302
303
    /* handle the bytes */
304
0
    stream->handle_input(conn, stream, &src, src_end, quicly_recvstate_transfer_complete(&stream->quic->recvstate));
305
0
    if (quicly_get_state(conn->super.quic) >= QUICLY_STATE_CLOSING)
306
0
        return;
307
308
    /* remove bytes that have been consumed */
309
0
    size_t bytes_consumed = src - (const uint8_t *)stream->recvbuf->bytes;
310
0
    if (bytes_consumed != 0) {
311
0
        h2o_buffer_consume(&stream->recvbuf, bytes_consumed);
312
0
        quicly_stream_sync_recvbuf(stream->quic, bytes_consumed);
313
0
    }
314
0
}
315
316
static void ingress_unistream_on_receive_reset(quicly_stream_t *qs, quicly_error_t err)
317
0
{
318
0
    h2o_http3_conn_t *conn = *quicly_get_data(qs->conn);
319
0
    struct st_h2o_http3_ingress_unistream_t *stream = qs->data;
320
321
0
    stream->handle_input(conn, stream, NULL, NULL, 1);
322
0
}
323
324
static void qpack_encoder_stream_handle_input(h2o_http3_conn_t *conn, struct st_h2o_http3_ingress_unistream_t *stream,
325
                                              const uint8_t **src, const uint8_t *src_end, int is_eos)
326
0
{
327
0
    if (src == NULL || is_eos) {
328
0
        h2o_quic_close_connection(&conn->super, H2O_HTTP3_ERROR_CLOSED_CRITICAL_STREAM, NULL);
329
0
        return;
330
0
    }
331
332
0
    int64_t *unblocked_stream_ids;
333
0
    size_t num_unblocked;
334
0
    int ret;
335
0
    const char *err_desc = NULL;
336
0
    if ((ret = h2o_qpack_decoder_handle_input(conn->qpack.dec, &unblocked_stream_ids, &num_unblocked, src, src_end, &err_desc)) !=
337
0
        0) {
338
0
        h2o_quic_close_connection(&conn->super, ret, err_desc);
339
0
        return;
340
0
    }
341
342
    /* TODO handle unblocked streams */
343
0
}
344
345
static void qpack_decoder_stream_handle_input(h2o_http3_conn_t *conn, struct st_h2o_http3_ingress_unistream_t *stream,
346
                                              const uint8_t **src, const uint8_t *src_end, int is_eos)
347
0
{
348
0
    if (src == NULL || is_eos) {
349
0
        h2o_quic_close_connection(&conn->super, H2O_HTTP3_ERROR_CLOSED_CRITICAL_STREAM, NULL);
350
0
        return;
351
0
    }
352
353
0
    int ret;
354
0
    const char *err_desc = NULL;
355
0
    if ((ret = h2o_qpack_encoder_handle_input(conn->qpack.enc, src, src_end, &err_desc)) != 0)
356
0
        h2o_quic_close_connection(&conn->super, ret, err_desc);
357
0
}
358
359
static void control_stream_handle_input(h2o_http3_conn_t *conn, struct st_h2o_http3_ingress_unistream_t *stream,
360
                                        const uint8_t **src, const uint8_t *src_end, int is_eos)
361
0
{
362
0
    if (src == NULL || is_eos) {
363
0
        h2o_quic_close_connection(&conn->super, H2O_HTTP3_ERROR_CLOSED_CRITICAL_STREAM, NULL);
364
0
        return;
365
0
    }
366
367
0
    do {
368
0
        h2o_http3_read_frame_t frame;
369
0
        quicly_error_t ret;
370
0
        const char *err_desc = NULL;
371
372
0
        if ((ret = h2o_http3_read_frame(&frame, quicly_is_client(conn->super.quic), H2O_HTTP3_STREAM_TYPE_CONTROL,
373
0
                                        conn->max_frame_payload_size, src, src_end, &err_desc)) != 0) {
374
0
            if (ret != H2O_HTTP3_ERROR_INCOMPLETE)
375
0
                h2o_quic_close_connection(&conn->super, ret, err_desc);
376
0
            break;
377
0
        }
378
0
        if (h2o_http3_has_received_settings(conn) == (frame.type == H2O_HTTP3_FRAME_TYPE_SETTINGS) ||
379
0
            frame.type == H2O_HTTP3_FRAME_TYPE_DATA) {
380
0
            h2o_quic_close_connection(&conn->super, H2O_HTTP3_ERROR_FRAME_UNEXPECTED, NULL);
381
0
            break;
382
0
        }
383
0
        get_callbacks(conn)->handle_control_stream_frame(conn, frame.type, frame.payload, frame.length);
384
0
        if (quicly_get_state(conn->super.quic) >= QUICLY_STATE_CLOSING)
385
0
            break;
386
0
    } while (*src != src_end);
387
0
}
388
389
static void discard_handle_input(h2o_http3_conn_t *conn, struct st_h2o_http3_ingress_unistream_t *stream, const uint8_t **src,
390
                                 const uint8_t *src_end, int is_eos)
391
0
{
392
0
    if (src == NULL)
393
0
        return;
394
0
    *src = src_end;
395
0
}
396
397
static void unknown_type_handle_input(h2o_http3_conn_t *conn, struct st_h2o_http3_ingress_unistream_t *stream, const uint8_t **src,
398
                                      const uint8_t *src_end, int is_eos)
399
0
{
400
0
    uint64_t type;
401
402
    /* resets are allowed at least until the type is being determined */
403
0
    if (src == NULL)
404
0
        return;
405
406
    /* read the type, or just return if incomplete */
407
0
    if ((type = quicly_decodev(src, src_end)) == UINT64_MAX)
408
0
        return;
409
410
0
    switch (type) {
411
0
    case H2O_HTTP3_STREAM_TYPE_CONTROL:
412
0
        conn->_control_streams.ingress.control = stream;
413
0
        stream->handle_input = control_stream_handle_input;
414
0
        break;
415
0
    case H2O_HTTP3_STREAM_TYPE_QPACK_ENCODER:
416
0
        conn->_control_streams.ingress.qpack_encoder = stream;
417
0
        stream->handle_input = qpack_encoder_stream_handle_input;
418
0
        break;
419
0
    case H2O_HTTP3_STREAM_TYPE_QPACK_DECODER:
420
0
        conn->_control_streams.ingress.qpack_decoder = stream;
421
0
        stream->handle_input = qpack_decoder_stream_handle_input;
422
0
        break;
423
0
    default:
424
0
        quicly_request_stop(stream->quic, H2O_HTTP3_ERROR_STREAM_CREATION);
425
0
        stream->handle_input = discard_handle_input;
426
0
        break;
427
0
    }
428
429
0
    return stream->handle_input(conn, stream, src, src_end, is_eos);
430
0
}
431
432
static void egress_unistream_on_destroy(quicly_stream_t *qs, quicly_error_t err)
433
19.9k
{
434
19.9k
    struct st_h2o_http3_egress_unistream_t *stream = qs->data;
435
19.9k
    h2o_buffer_dispose(&stream->sendbuf);
436
19.9k
    free(stream);
437
19.9k
}
438
439
static void egress_unistream_on_send_shift(quicly_stream_t *qs, size_t delta)
440
0
{
441
0
    struct st_h2o_http3_egress_unistream_t *stream = qs->data;
442
0
    h2o_buffer_consume(&stream->sendbuf, delta);
443
0
}
444
445
static void egress_unistream_on_send_emit(quicly_stream_t *qs, size_t off, void *dst, size_t *len, int *wrote_all)
446
19.9k
{
447
19.9k
    struct st_h2o_http3_egress_unistream_t *stream = qs->data;
448
449
19.9k
    if (*len >= stream->sendbuf->size - off) {
450
19.9k
        *len = stream->sendbuf->size - off;
451
19.9k
        *wrote_all = 1;
452
19.9k
    } else {
453
0
        *wrote_all = 0;
454
0
    }
455
19.9k
    memcpy(dst, stream->sendbuf->bytes + off, *len);
456
19.9k
}
457
458
static void egress_unistream_on_send_stop(quicly_stream_t *qs, quicly_error_t err)
459
0
{
460
0
    struct st_h2o_http3_conn_t *conn = *quicly_get_data(qs->conn);
461
0
    h2o_quic_close_connection(&conn->super, H2O_HTTP3_ERROR_CLOSED_CRITICAL_STREAM, NULL);
462
0
}
463
464
void h2o_http3_on_create_unidirectional_stream(quicly_stream_t *qs)
465
19.9k
{
466
19.9k
    if (quicly_stream_is_self_initiated(qs)) {
467
        /* create egress unistream */
468
19.9k
        static const quicly_stream_callbacks_t callbacks = {egress_unistream_on_destroy, egress_unistream_on_send_shift,
469
19.9k
                                                            egress_unistream_on_send_emit, egress_unistream_on_send_stop};
470
19.9k
        struct st_h2o_http3_egress_unistream_t *stream = h2o_mem_alloc(sizeof(*stream));
471
19.9k
        qs->data = stream;
472
19.9k
        qs->callbacks = &callbacks;
473
19.9k
        stream->quic = qs;
474
19.9k
        h2o_buffer_init(&stream->sendbuf, &h2o_socket_buffer_prototype);
475
19.9k
    } else {
476
        /* create ingress unistream */
477
0
        static const quicly_stream_callbacks_t callbacks = {
478
0
            ingress_unistream_on_destroy, NULL, NULL, NULL, ingress_unistream_on_receive, ingress_unistream_on_receive_reset};
479
0
        struct st_h2o_http3_ingress_unistream_t *stream = h2o_mem_alloc(sizeof(*stream));
480
0
        qs->data = stream;
481
0
        qs->callbacks = &callbacks;
482
0
        stream->quic = qs;
483
0
        h2o_buffer_init(&stream->recvbuf, &h2o_socket_buffer_prototype);
484
0
        stream->handle_input = unknown_type_handle_input;
485
0
    }
486
19.9k
}
487
488
static quicly_error_t open_egress_unistream(h2o_http3_conn_t *conn, struct st_h2o_http3_egress_unistream_t **stream,
489
                                            h2o_iovec_t initial_bytes)
490
19.9k
{
491
19.9k
    quicly_stream_t *qs;
492
19.9k
    quicly_error_t ret;
493
494
19.9k
    if ((ret = quicly_open_stream(conn->super.quic, &qs, 1)) != 0)
495
0
        return ret;
496
19.9k
    *stream = qs->data;
497
19.9k
    assert((*stream)->quic == qs);
498
499
19.9k
    h2o_buffer_append(&(*stream)->sendbuf, initial_bytes.base, initial_bytes.len);
500
19.9k
    return quicly_stream_sync_sendbuf((*stream)->quic, 1);
501
19.9k
}
502
503
static uint8_t *accept_hashkey_flatten_address(uint8_t *p, quicly_address_t *addr)
504
0
{
505
0
    switch (addr->sa.sa_family) {
506
0
    case AF_INET:
507
0
        *p++ = 4;
508
0
        memcpy(p, &addr->sin.sin_addr.s_addr, 4);
509
0
        p += 4;
510
0
        memcpy(p, &addr->sin.sin_port, 2);
511
0
        p += 2;
512
0
        break;
513
0
    case AF_INET6:
514
0
        *p++ = 6;
515
0
        memcpy(p, addr->sin6.sin6_addr.s6_addr, 16);
516
0
        p += 16;
517
0
        memcpy(p, &addr->sin.sin_port, 2);
518
0
        p += 2;
519
0
        break;
520
0
    case AF_UNSPEC:
521
0
        *p++ = 0;
522
0
        break;
523
0
    default:
524
0
        h2o_fatal("unknown protocol family");
525
0
        break;
526
0
    }
527
0
    return p;
528
0
}
529
530
static uint64_t calc_accept_hashkey(quicly_address_t *destaddr, quicly_address_t *srcaddr, ptls_iovec_t src_cid)
531
0
{
532
    /* prepare key */
533
0
    static __thread EVP_CIPHER_CTX *cipher = NULL;
534
0
    if (cipher == NULL) {
535
0
        static uint8_t key[PTLS_AES128_KEY_SIZE];
536
0
        H2O_MULTITHREAD_ONCE({ ptls_openssl_random_bytes(key, sizeof(key)); });
537
0
        cipher = EVP_CIPHER_CTX_new();
538
0
        EVP_EncryptInit_ex(cipher, EVP_aes_128_cbc(), NULL, key, NULL);
539
0
    }
540
541
0
    uint8_t buf[(1 + 16 + 2) * 2 + QUICLY_MAX_CID_LEN_V1 + PTLS_AES_BLOCK_SIZE] = {0};
542
0
    uint8_t *p = buf;
543
544
    /* build plaintext to encrypt */
545
0
    p = accept_hashkey_flatten_address(p, destaddr);
546
0
    p = accept_hashkey_flatten_address(p, srcaddr);
547
0
    memcpy(p, src_cid.base, src_cid.len);
548
0
    p += src_cid.len;
549
0
    assert(p <= buf + sizeof(buf));
550
0
    size_t bytes_to_encrypt = ((p - buf) + PTLS_AES_BLOCK_SIZE - 1) / PTLS_AES_BLOCK_SIZE * PTLS_AES_BLOCK_SIZE;
551
0
    assert(bytes_to_encrypt <= sizeof(buf));
552
553
0
    { /* encrypt */
554
0
        EVP_EncryptInit_ex(cipher, NULL, NULL, NULL, NULL);
555
0
        int bytes_encrypted = 0, ret = EVP_EncryptUpdate(cipher, buf, &bytes_encrypted, buf, (int)bytes_to_encrypt);
556
0
        assert(ret);
557
0
        assert(bytes_encrypted == bytes_to_encrypt);
558
0
    }
559
560
    /* use the last `size_t` bytes of the CBC output as the result */
561
0
    uint64_t result;
562
0
    memcpy(&result, buf + bytes_to_encrypt - sizeof(result), sizeof(result));
563
    /* avoid 0 (used as nonexist) */
564
0
    if (result == 0)
565
0
        result = 1;
566
0
    return result;
567
0
}
568
569
static void drop_from_acceptmap(h2o_quic_ctx_t *ctx, h2o_quic_conn_t *conn)
570
6.64k
{
571
6.64k
    if (conn->_accept_hashkey != 0) {
572
0
        khint_t iter;
573
0
        if ((iter = kh_get_h2o_quic_acceptmap(ctx->conns_accepting, conn->_accept_hashkey)) != kh_end(ctx->conns_accepting))
574
0
            kh_del_h2o_quic_acceptmap(ctx->conns_accepting, iter);
575
0
        conn->_accept_hashkey = 0;
576
0
    }
577
6.64k
}
578
579
static void send_version_negotiation(h2o_quic_ctx_t *ctx, quicly_address_t *destaddr, ptls_iovec_t dest_cid,
580
                                     quicly_address_t *srcaddr, ptls_iovec_t src_cid, const uint32_t *versions)
581
0
{
582
0
    uint8_t payload[QUICLY_MIN_CLIENT_INITIAL_SIZE];
583
0
    size_t payload_size = quicly_send_version_negotiation(ctx->quic, dest_cid, src_cid, versions, payload);
584
0
    assert(payload_size != SIZE_MAX);
585
0
    struct iovec vec = {.iov_base = payload, .iov_len = payload_size};
586
0
    h2o_quic_send_datagrams(ctx, destaddr, srcaddr, &vec, 1, 0);
587
0
    return;
588
0
}
589
590
static void process_packets(h2o_quic_ctx_t *ctx, quicly_address_t *destaddr, quicly_address_t *srcaddr, uint8_t ttl,
591
                            quicly_decoded_packet_t *packets, size_t num_packets)
592
0
{
593
0
    h2o_quic_conn_t *conn = NULL;
594
0
    size_t accepted_packet_index = SIZE_MAX;
595
596
0
    assert(num_packets != 0);
597
598
0
    if (ctx->quic_stats != NULL) {
599
0
        ctx->quic_stats->packet_received += num_packets;
600
0
    }
601
602
#if H2O_USE_DTRACE
603
    if (PTLS_UNLIKELY(H2O_H3_PACKET_RECEIVE_ENABLED())) {
604
        for (size_t i = 0; i != num_packets; ++i)
605
            H2O_H3_PACKET_RECEIVE(&destaddr->sa, &srcaddr->sa, packets[i].octets.base, packets[i].octets.len);
606
    }
607
#endif
608
609
0
    if (packets[0].cid.src.len > QUICLY_MAX_CID_LEN_V1)
610
0
        return;
611
612
    /* find the matching connection, by first looking at the CID (all packets as client, or Handshake, 1-RTT packets as server) */
613
0
    if (packets[0].cid.dest.plaintext.node_id == ctx->next_cid->node_id &&
614
0
        packets[0].cid.dest.plaintext.thread_id == ctx->next_cid->thread_id) {
615
0
        khiter_t iter = kh_get_h2o_quic_idmap(ctx->conns_by_id, packets[0].cid.dest.plaintext.master_id);
616
0
        if (iter != kh_end(ctx->conns_by_id)) {
617
0
            conn = kh_val(ctx->conns_by_id, iter);
618
            /* drop long header packets with different 4-tuple than the original, or if the incoming packet might be the first-
619
             * flight from the client, advance to state lookup using `cons_accepting` */
620
0
            if (!quicly_is_destination(conn->quic, &destaddr->sa, &srcaddr->sa, packets)) {
621
0
                if (!packets[0].cid.dest.might_be_client_generated)
622
0
                    return;
623
0
                conn = NULL;
624
0
            }
625
0
        } else if (!packets[0].cid.dest.might_be_client_generated) {
626
            /* send stateless reset when we could not find a matching connection for a 1 RTT packet */
627
0
            if (packets[0].octets.len >= QUICLY_STATELESS_RESET_PACKET_MIN_LEN) {
628
0
                uint8_t payload[QUICLY_MIN_CLIENT_INITIAL_SIZE];
629
0
                size_t payload_size = quicly_send_stateless_reset(ctx->quic, packets[0].cid.dest.encrypted.base, payload);
630
0
                if (payload_size != SIZE_MAX) {
631
0
                    struct iovec vec = {.iov_base = payload, .iov_len = payload_size};
632
0
                    h2o_quic_send_datagrams(ctx, srcaddr, destaddr, &vec, 1, 0);
633
0
                }
634
0
            }
635
0
            return;
636
0
        }
637
0
    } else if (!packets[0].cid.dest.might_be_client_generated) {
638
        /* forward 1-RTT packets belonging to different nodes, threads */
639
0
        if (ttl == 0)
640
0
            return;
641
0
        uint64_t offending_node_id = packets[0].cid.dest.plaintext.node_id;
642
0
        if (ctx->forward_packets != NULL && ctx->forward_packets(ctx, &offending_node_id, packets[0].cid.dest.plaintext.thread_id,
643
0
                                                                 destaddr, srcaddr, ttl, packets, num_packets))
644
0
            return;
645
        /* non-authenticating 1-RTT packets are potentially stateless resets (FIXME handle them, note that we need to use a hashdos-
646
         * resistant hash map that also meets constant-time comparison requirements) */
647
0
        return;
648
0
    }
649
650
0
    if (conn == NULL) {
651
        /* Initial or 0-RTT packet, use 4-tuple to match the thread and the connection */
652
0
        assert(packets[0].cid.dest.might_be_client_generated);
653
0
        uint64_t accept_hashkey = calc_accept_hashkey(destaddr, srcaddr, packets[0].cid.src);
654
0
        if (ctx->accept_thread_divisor != 0) {
655
0
            uint32_t offending_thread = accept_hashkey % ctx->accept_thread_divisor;
656
0
            if (offending_thread != ctx->next_cid->thread_id) {
657
0
                if (ctx->forward_packets != NULL)
658
0
                    ctx->forward_packets(ctx, NULL, offending_thread, destaddr, srcaddr, ttl, packets, num_packets);
659
0
                return;
660
0
            }
661
0
        }
662
0
        khiter_t iter = kh_get_h2o_quic_acceptmap(ctx->conns_accepting, accept_hashkey);
663
0
        if (iter == kh_end(ctx->conns_accepting)) {
664
            /* a new connection for this thread (at least on this process); accept or delegate to newer process */
665
0
            if (ctx->acceptor != NULL) {
666
0
                if (packets[0].version != 0 && !quicly_is_supported_version(packets[0].version)) {
667
0
                    send_version_negotiation(ctx, srcaddr, packets[0].cid.src, destaddr, packets[0].cid.dest.encrypted,
668
0
                                             quicly_supported_versions);
669
0
                    return;
670
0
                }
671
0
            } else {
672
                /* This is the offending thread but it is not accepting, which means that the process (or the thread) is not acting
673
                 * as a server (likely gracefully shutting down). Let the application process forward the packet to the next
674
                 * generation. */
675
0
                if (ctx->forward_packets != NULL &&
676
0
                    ctx->forward_packets(ctx, NULL, ctx->next_cid->thread_id, destaddr, srcaddr, ttl, packets, num_packets))
677
0
                    return;
678
                /* If not forwarded, send rejection to the peer. A Version Negotiation packet that carries only a greasing version
679
                 * number is used for the purpose, hoping that that signal will trigger immediate downgrade to HTTP/2, across the
680
                 * broad spectrum of the client implementations than if CONNECTION_REFUSED is being used. */
681
0
                if (packets[0].version != 0) {
682
0
                    static const uint32_t no_versions[] = {0};
683
0
                    send_version_negotiation(ctx, srcaddr, packets[0].cid.src, destaddr, packets[0].cid.dest.encrypted,
684
0
                                             no_versions);
685
0
                }
686
0
                return;
687
0
            }
688
            /* try to accept any of the Initial packets being received */
689
0
            size_t i;
690
0
            for (i = 0; i != num_packets; ++i) {
691
0
                if ((packets[i].octets.base[0] & QUICLY_PACKET_TYPE_BITMASK) == QUICLY_PACKET_TYPE_INITIAL &&
692
0
                    (conn = ctx->acceptor(ctx, destaddr, srcaddr, packets + i)) != NULL) {
693
                    /* non-null generally means success, except for H2O_QUIC_ACCEPT_CONN_DECRYPTION_FAILED */
694
0
                    if (conn == &h2o_quic_accept_conn_decryption_failed) {
695
                        /* failed to decrypt Initial packet <=> it could belong to a connection on a different node; forward it to
696
                         * the destination being claimed by the DCID */
697
0
                        uint64_t offending_node_id = packets[i].cid.dest.plaintext.node_id;
698
0
                        uint32_t offending_thread_id = packets[i].cid.dest.plaintext.thread_id;
699
0
                        if (ctx->forward_packets != NULL && ttl > 0 &&
700
0
                            (offending_node_id != ctx->next_cid->node_id || offending_thread_id != ctx->next_cid->thread_id))
701
0
                            ctx->forward_packets(ctx, &offending_node_id, offending_thread_id, destaddr, srcaddr, ttl, packets,
702
0
                                                 num_packets);
703
0
                        return;
704
0
                    }
705
0
                    break;
706
0
                }
707
0
            }
708
0
            if (conn == NULL)
709
0
                return;
710
0
            accepted_packet_index = i;
711
0
            conn->_accept_hashkey = accept_hashkey;
712
0
            int r;
713
0
            iter = kh_put_h2o_quic_acceptmap(conn->ctx->conns_accepting, accept_hashkey, &r);
714
0
            assert(iter != kh_end(conn->ctx->conns_accepting));
715
0
            kh_val(conn->ctx->conns_accepting, iter) = conn;
716
0
        } else {
717
            /* likely have found a connection in `conns_accepting` */
718
0
            conn = kh_val(ctx->conns_accepting, iter);
719
0
            assert(conn != NULL);
720
0
            assert(!quicly_is_client(conn->quic));
721
0
            if (!quicly_is_destination(conn->quic, &destaddr->sa, &srcaddr->sa, packets)) {
722
0
                uint64_t offending_node_id = packets[0].cid.dest.plaintext.node_id;
723
0
                uint32_t offending_thread_id = packets[0].cid.dest.plaintext.thread_id;
724
0
                if (offending_node_id != ctx->next_cid->node_id || offending_thread_id != ctx->next_cid->thread_id) {
725
                    /* accept key matches to a connection being established, but DCID doesn't -- likely a second (or later) Initial
726
                     * that is supposed to be handled by another node. forward it. */
727
0
                    if (ttl == 0)
728
0
                        return;
729
0
                    if (ctx->forward_packets != NULL)
730
0
                        ctx->forward_packets(ctx, &offending_node_id, offending_thread_id, destaddr, srcaddr, ttl, packets,
731
0
                                             num_packets);
732
0
                }
733
                /* regardless of forwarding outcome, we need to drop this packet as it is not for us */
734
0
                return;
735
0
            }
736
0
        }
737
0
    }
738
739
    /* receive packets to the found connection */
740
0
    for (size_t i = 0; i != num_packets; ++i) {
741
0
        if (i != accepted_packet_index) {
742
0
            quicly_error_t ret = quicly_receive(conn->quic, &destaddr->sa, &srcaddr->sa, packets + i);
743
0
            switch (ret) {
744
0
            case QUICLY_ERROR_STATE_EXHAUSTION:
745
0
            case PTLS_ERROR_NO_MEMORY:
746
0
                fprintf(stderr, "%s: `quicly_receive()` returned ret:%" PRId64 "\n", __func__, ret);
747
0
                conn->callbacks->destroy_connection(conn);
748
0
                return;
749
0
            }
750
0
            if (ret != QUICLY_ERROR_PACKET_IGNORED && ret != QUICLY_ERROR_DECRYPTION_FAILED) {
751
0
                if (ctx->quic_stats != NULL) {
752
0
                    ++ctx->quic_stats->packet_processed;
753
0
                }
754
0
            }
755
0
        }
756
0
    }
757
758
0
    h2o_quic_schedule_timer(conn);
759
0
    if (ctx->notify_conn_update != NULL)
760
0
        ctx->notify_conn_update(ctx, conn);
761
0
}
762
763
void h2o_quic_read_socket(h2o_quic_ctx_t *ctx, h2o_socket_t *sock)
764
0
{
765
0
    struct {
766
0
        quicly_address_t destaddr, srcaddr;
767
0
        struct iovec vec;
768
0
        uint8_t ttl;
769
0
        uint8_t ecn;
770
0
        union {
771
0
            struct cmsghdr _align; /* natrually align the contents of controlbuf (which are of type cmsghdr) */
772
0
            char controlbuf[
773
0
#ifdef IPV6_PKTINFO
774
0
                CMSG_SPACE(sizeof(struct in6_pktinfo))
775
#elif defined(IP_PKTINFO)
776
                CMSG_SPACE(sizeof(struct in_pktinfo))
777
#elif defined(IP_RECVDSTADDR)
778
                CMSG_SPACE(sizeof(struct in_addr))
779
#else
780
                CMSG_SPACE(1)
781
#endif
782
0
#if defined(IPV6_TCLASS) || defined(IP_TOS) || defined(IP_RECVTOS)
783
                + CMSG_SPACE(sizeof(int)) /* IPv6 uses int, which is bigger than uint8_t used by IPv4 */
784
0
#endif
785
0
            ];
786
0
        };
787
0
        uint8_t buf[1600];
788
0
    } dgrams[10];
789
0
#ifdef __linux__
790
0
    struct mmsghdr mess[PTLS_ELEMENTSOF(dgrams)];
791
#else
792
    struct {
793
        struct msghdr msg_hdr;
794
    } mess[PTLS_ELEMENTSOF(dgrams)];
795
#endif
796
797
0
#define INIT_DGRAMS(i)                                                                                                             \
798
0
    do {                                                                                                                           \
799
0
        mess[i].msg_hdr = (struct msghdr){                                                                                         \
800
0
            .msg_name = &dgrams[i].srcaddr,                                                                                        \
801
0
            .msg_namelen = sizeof(dgrams[i].srcaddr),                                                                              \
802
0
            .msg_iov = &dgrams[i].vec,                                                                                             \
803
0
            .msg_iovlen = 1,                                                                                                       \
804
0
            .msg_control = &dgrams[i].controlbuf,                                                                                  \
805
0
            .msg_controllen = sizeof(dgrams[i].controlbuf),                                                                        \
806
0
        };                                                                                                                         \
807
0
        memset(&dgrams[i].destaddr, 0, sizeof(dgrams[i].destaddr));                                                                \
808
0
        dgrams[i].vec.iov_base = dgrams[i].buf;                                                                                    \
809
0
        dgrams[i].vec.iov_len = sizeof(dgrams[i].buf);                                                                             \
810
0
    } while (0)
811
812
    /* If the socket is either ctx->sock or ctx->sock_alt_family, the destination port is that of the socket. Otherwise, the
813
     * preprocess_packet callback takes care, therefore the value can be bogus. */
814
0
    in_port_t dst_port = ctx->sock_alt_family.sock == sock ? *ctx->sock_alt_family.port : *ctx->sock.port;
815
816
0
    int fd = h2o_socket_get_fd(sock);
817
0
    size_t dgram_index, num_dgrams;
818
819
    /* Read datagrams. Sender should be provided an ACK every fraction of RTT, otherwise its behavior becomes bursty (assuming that
820
     * pacing is not used), rather than packets being spread across entire round-trip. To minimize the chance of us entering such
821
     * situation, number of datagrams being read at once is limited to `PTLS_ELEMENTSOF(dgrams)`. In other words, one ack is
822
     * generated for no more than every 10 ack-eliciting packets being received, unless the ack-frequency extension is used. */
823
0
#ifdef __linux__
824
0
    {
825
0
        int rret;
826
0
        do {
827
0
            for (dgram_index = 0; dgram_index < PTLS_ELEMENTSOF(dgrams); ++dgram_index)
828
0
                INIT_DGRAMS(dgram_index);
829
0
        } while ((rret = recvmmsg(fd, mess, PTLS_ELEMENTSOF(mess), 0, NULL)) < 0 && errno == EINTR);
830
0
        if (rret <= 0)
831
0
            goto Exit;
832
0
        num_dgrams = (size_t)rret;
833
0
        for (dgram_index = 0; dgram_index < num_dgrams; ++dgram_index)
834
0
            dgrams[dgram_index].vec.iov_len = (size_t)mess[dgram_index].msg_len;
835
0
    }
836
#else
837
    for (dgram_index = 0; dgram_index < PTLS_ELEMENTSOF(dgrams); ++dgram_index) {
838
        ssize_t rret;
839
        do {
840
            INIT_DGRAMS(dgram_index);
841
        } while ((rret = recvmsg(fd, &mess[dgram_index].msg_hdr, 0)) < 0 && errno == EINTR);
842
        if (rret < 0)
843
            break;
844
        dgrams[dgram_index].vec.iov_len = rret;
845
    }
846
    num_dgrams = dgram_index;
847
    if (num_dgrams == 0)
848
        goto Exit;
849
#endif
850
851
    /* normalize and store the obtained data into `dgrams` */
852
0
    for (dgram_index = 0; dgram_index < num_dgrams; ++dgram_index) {
853
0
        dgrams[dgram_index].ecn = 0;
854
0
        dgrams[dgram_index].destaddr.sa.sa_family = AF_UNSPEC;
855
0
        { /* fetch destination address */
856
0
            struct cmsghdr *cmsg;
857
0
            for (cmsg = CMSG_FIRSTHDR(&mess[dgram_index].msg_hdr); cmsg != NULL;
858
0
                 cmsg = CMSG_NXTHDR(&mess[dgram_index].msg_hdr, cmsg)) {
859
0
                switch (cmsg->cmsg_level) {
860
0
                case IPPROTO_IP:
861
0
                    switch (cmsg->cmsg_type) {
862
0
#ifdef IP_PKTINFO
863
0
                    case IP_PKTINFO:
864
0
                        dgrams[dgram_index].destaddr.sin.sin_family = AF_INET;
865
0
                        memcpy(&dgrams[dgram_index].destaddr.sin.sin_addr, CMSG_DATA(cmsg) + offsetof(struct in_pktinfo, ipi_addr),
866
0
                               sizeof(struct in_addr));
867
0
                        dgrams[dgram_index].destaddr.sin.sin_port = dst_port;
868
0
                        break;
869
0
#endif
870
#ifdef IP_RECVDSTADDR
871
                    case IP_RECVDSTADDR:
872
                        dgrams[dgram_index].destaddr.sin.sin_family = AF_INET;
873
                        memcpy(&dgrams[dgram_index].destaddr.sin.sin_addr, CMSG_DATA(cmsg), sizeof(struct in_addr));
874
                        dgrams[dgram_index].destaddr.sin.sin_port = dst_port;
875
                        break;
876
#endif
877
0
#ifdef IP_RECVTOS
878
#ifdef __APPLE__
879
                    case IP_RECVTOS:
880
#else
881
0
                    case IP_TOS:
882
0
#endif
883
                        /* draft-ietf-tsvwg-udp-ecn-05 recommends using a byte on all platforms */
884
0
                        dgrams[dgram_index].ecn = *(uint8_t *)CMSG_DATA(cmsg) & IPTOS_ECN_MASK;
885
0
                        break;
886
0
#endif
887
0
                    default:
888
0
                        break;
889
0
                    }
890
0
                    break;
891
0
                case IPPROTO_IPV6:
892
0
                    switch (cmsg->cmsg_type) {
893
0
#ifdef IPV6_PKTINFO
894
0
                    case IPV6_PKTINFO:
895
0
                        dgrams[dgram_index].destaddr.sin6.sin6_family = AF_INET6;
896
0
                        memcpy(&dgrams[dgram_index].destaddr.sin6.sin6_addr,
897
0
                               CMSG_DATA(cmsg) + offsetof(struct in6_pktinfo, ipi6_addr), sizeof(struct in6_addr));
898
0
                        dgrams[dgram_index].destaddr.sin6.sin6_port = dst_port;
899
0
                        break;
900
0
#endif
901
0
#ifdef IPV6_TCLASS
902
0
                    case IPV6_TCLASS: {
903
0
                        int optval;
904
0
                        memcpy(&optval, CMSG_DATA(cmsg), sizeof(optval));
905
0
                        dgrams[dgram_index].ecn = optval & IPTOS_ECN_MASK;
906
0
                    } break;
907
0
#endif
908
0
                    default:
909
0
                        break;
910
0
                    }
911
0
                    break;
912
0
                }
913
0
            }
914
0
        }
915
0
        dgrams[dgram_index].ttl = ctx->default_ttl;
916
        /* preprocess (and drop the packet if it failed) */
917
0
        if (ctx->preprocess_packet != NULL &&
918
0
            !ctx->preprocess_packet(ctx, &mess[dgram_index].msg_hdr, &dgrams[dgram_index].destaddr, &dgrams[dgram_index].srcaddr,
919
0
                                    &dgrams[dgram_index].ttl)) {
920
0
            dgrams[dgram_index].vec.iov_len = 0; /* mark as unused */
921
0
        } else {
922
0
            assert(dgrams[dgram_index].srcaddr.sa.sa_family == AF_INET || dgrams[dgram_index].srcaddr.sa.sa_family == AF_INET6);
923
0
        }
924
0
    }
925
926
    /* convert dgrams to decoded packets and process them in group of (4-tuple, dcid) */
927
0
    quicly_decoded_packet_t packets[64];
928
0
    size_t packet_index = 0;
929
0
    dgram_index = 0;
930
0
    while (dgram_index < num_dgrams) {
931
0
        int has_decoded = 0; /* indicates if a decoded packet belonging to a different connection is stored at
932
                              * `packets[packet_index]` */
933
        /* skip zero-sized datagrams (or the ones for which preprocessing failed) */
934
0
        if (dgrams[dgram_index].vec.iov_len == 0) {
935
0
            ++dgram_index;
936
0
            continue;
937
0
        }
938
        /* dispatch packets in `packets`, if the datagram at dgram_index is from a different path */
939
0
        if (packet_index != 0) {
940
0
            assert(dgram_index != 0);
941
            /* check source address */
942
0
            if (h2o_socket_compare_address(&dgrams[dgram_index - 1].srcaddr.sa, &dgrams[dgram_index].srcaddr.sa, 1) != 0)
943
0
                goto ProcessPackets;
944
            /* check destination address, if available */
945
0
            if (dgrams[dgram_index - 1].destaddr.sa.sa_family == AF_UNSPEC &&
946
0
                dgrams[dgram_index].destaddr.sa.sa_family == AF_UNSPEC) {
947
                /* ok */
948
0
            } else if (h2o_socket_compare_address(&dgrams[dgram_index - 1].destaddr.sa, &dgrams[dgram_index].destaddr.sa, 1) == 0) {
949
                /* ok */
950
0
            } else {
951
0
                goto ProcessPackets;
952
0
            }
953
            /* TTL should be same for dispatched packets */
954
0
            if (dgrams[dgram_index - 1].ttl != dgrams[dgram_index].ttl)
955
0
                goto ProcessPackets;
956
0
        }
957
        /* decode the first packet */
958
0
        size_t payload_off = 0;
959
0
        if (quicly_decode_packet(ctx->quic, packets + packet_index, dgrams[dgram_index].vec.iov_base,
960
0
                                 dgrams[dgram_index].vec.iov_len, &payload_off) == SIZE_MAX) {
961
0
            ++dgram_index;
962
0
            goto ProcessPackets;
963
0
        }
964
0
        packets[packet_index].ecn = dgrams[dgram_index].ecn;
965
        /* dispatch packets in `packets` if the DCID is different, setting the `has_decoded` flag */
966
0
        if (packet_index != 0) {
967
0
            const ptls_iovec_t *prev_dcid = &packets[packet_index - 1].cid.dest.encrypted,
968
0
                               *cur_dcid = &packets[packet_index].cid.dest.encrypted;
969
0
            if (!(prev_dcid->len == cur_dcid->len && memcmp(prev_dcid->base, cur_dcid->base, prev_dcid->len) == 0)) {
970
0
                has_decoded = 1;
971
0
                ++dgram_index;
972
0
                goto ProcessPackets;
973
0
            }
974
0
        }
975
0
        ++packet_index;
976
        /* add rest of the packets */
977
0
        while (payload_off < dgrams[dgram_index].vec.iov_len && packet_index < PTLS_ELEMENTSOF(packets)) {
978
0
            if (quicly_decode_packet(ctx->quic, packets + packet_index, dgrams[dgram_index].vec.iov_base,
979
0
                                     dgrams[dgram_index].vec.iov_len, &payload_off) == SIZE_MAX)
980
0
                break;
981
0
            packets[packet_index].ecn = dgrams[dgram_index].ecn;
982
0
            ++packet_index;
983
0
        }
984
0
        ++dgram_index;
985
        /* if we have enough room for the next datagram, that is, the expected worst case of 4 packets in a coalesced datagram,
986
         * continue */
987
0
        if (packet_index + 4 < PTLS_ELEMENTSOF(packets))
988
0
            continue;
989
990
0
    ProcessPackets:
991
0
        if (packet_index != 0) {
992
0
            process_packets(ctx, &dgrams[dgram_index - 1].destaddr, &dgrams[dgram_index - 1].srcaddr, dgrams[dgram_index - 1].ttl,
993
0
                            packets, packet_index);
994
0
            if (has_decoded) {
995
0
                packets[0] = packets[packet_index];
996
0
                packet_index = 1;
997
0
            } else {
998
0
                packet_index = 0;
999
0
            }
1000
0
        }
1001
0
    }
1002
0
    if (packet_index != 0)
1003
0
        process_packets(ctx, &dgrams[dgram_index - 1].destaddr, &dgrams[dgram_index - 1].srcaddr, dgrams[dgram_index - 1].ttl,
1004
0
                        packets, packet_index);
1005
1006
0
Exit:;
1007
1008
0
#undef INIT_DGRAMS
1009
0
}
1010
1011
static void on_read(h2o_socket_t *sock, const char *err)
1012
0
{
1013
0
    h2o_quic_ctx_t *ctx = sock->data;
1014
0
    h2o_quic_read_socket(ctx, sock);
1015
0
}
1016
1017
static void setup_quic_socket(h2o_quic_ctx_t *ctx, h2o_quic_socket_t *qsock, h2o_socket_t *sock)
1018
0
{
1019
0
    *qsock = (h2o_quic_socket_t){.sock = sock};
1020
0
    h2o_socket_getsockname(qsock->sock, (void *)&qsock->addr);
1021
0
    switch (qsock->addr.ss_family) {
1022
0
    case AF_INET:
1023
0
        qsock->port = &((struct sockaddr_in *)&qsock->addr)->sin_port;
1024
0
        break;
1025
0
    case AF_INET6:
1026
0
        qsock->port = &((struct sockaddr_in6 *)&qsock->addr)->sin6_port;
1027
0
        break;
1028
0
    default:
1029
0
        assert(!"unexpected address family");
1030
0
        break;
1031
0
    }
1032
0
    qsock->sock->data = ctx;
1033
0
    h2o_socket_read_start(qsock->sock, on_read);
1034
0
}
1035
1036
static void on_timeout(h2o_timer_t *timeout)
1037
18.6k
{
1038
18.6k
    h2o_quic_conn_t *conn = H2O_STRUCT_FROM_MEMBER(h2o_quic_conn_t, _timeout, timeout);
1039
18.6k
    h2o_quic_send(conn);
1040
18.6k
}
1041
1042
int h2o_http3_read_frame(h2o_http3_read_frame_t *frame, int is_client, uint64_t stream_type, size_t max_frame_payload_size,
1043
                         const uint8_t **_src, const uint8_t *src_end, const char **err_desc)
1044
34.0k
{
1045
34.0k
    const uint8_t *src = *_src;
1046
1047
34.0k
    if ((frame->type = quicly_decodev(&src, src_end)) == UINT64_MAX)
1048
49
        return H2O_HTTP3_ERROR_INCOMPLETE;
1049
33.9k
    if ((frame->length = quicly_decodev(&src, src_end)) == UINT64_MAX)
1050
102
        return H2O_HTTP3_ERROR_INCOMPLETE;
1051
33.8k
    frame->_header_size = (uint8_t)(src - *_src);
1052
1053
    /* read the content of the frame (unless it's a DATA frame) */
1054
33.8k
    frame->payload = NULL;
1055
33.8k
    if (frame->type != H2O_HTTP3_FRAME_TYPE_DATA) {
1056
7.87k
        if (frame->length > max_frame_payload_size) {
1057
198
            H2O_PROBE(H3_FRAME_RECEIVE, frame->type, NULL, frame->length);
1058
198
            PTLS_LOG(h2o, h3_frame_receive, {
1059
198
                PTLS_LOG_ELEMENT_UNSIGNED(frame_type, frame->type);
1060
198
                PTLS_LOG_ELEMENT_UNSIGNED(payload_len, frame->length);
1061
198
            });
1062
198
            *err_desc = h2o_http3_err_frame_too_large;
1063
198
            return H2O_HTTP3_ERROR_GENERAL_PROTOCOL; /* FIXME is this the correct code? */
1064
198
        }
1065
7.67k
        if (src_end - src < frame->length)
1066
51
            return H2O_HTTP3_ERROR_INCOMPLETE;
1067
7.62k
        frame->payload = src;
1068
7.62k
        src += frame->length;
1069
7.62k
    }
1070
1071
33.6k
    H2O_PROBE(H3_FRAME_RECEIVE, frame->type, frame->payload, frame->length);
1072
33.6k
    PTLS_LOG(h2o, h3_frame_receive, {
1073
33.6k
        PTLS_LOG_ELEMENT_UNSIGNED(frame_type, frame->type);
1074
33.6k
        if (frame->payload != NULL) {
1075
33.6k
            PTLS_LOG_APPDATA_ELEMENT_HEXDUMP(payload, frame->payload, frame->length);
1076
33.6k
        } else {
1077
33.6k
            PTLS_LOG_ELEMENT_UNSIGNED(payload_len, frame->length);
1078
33.6k
        }
1079
33.6k
    });
1080
1081
    /* validate frame type */
1082
33.6k
    switch (frame->type) {
1083
0
#define FRAME(id, req_clnt, req_srvr, ctl_clnt, ctl_srvr)                                                                          \
1084
32.1k
    case H2O_HTTP3_FRAME_TYPE_##id:                                                                                                \
1085
32.1k
        switch (stream_type) {                                                                                                     \
1086
32.1k
        case H2O_HTTP3_STREAM_TYPE_REQUEST:                                                                                        \
1087
32.1k
            if (req_clnt && !is_client)                                                                                            \
1088
32.1k
                goto Validation_Success;                                                                                           \
1089
32.1k
            if (req_srvr && is_client)                                                                                             \
1090
11
                goto Validation_Success;                                                                                           \
1091
11
            break;                                                                                                                 \
1092
11
        case H2O_HTTP3_STREAM_TYPE_CONTROL:                                                                                        \
1093
0
            if (ctl_clnt && !is_client)                                                                                            \
1094
0
                goto Validation_Success;                                                                                           \
1095
0
            if (ctl_srvr && is_client)                                                                                             \
1096
0
                goto Validation_Success;                                                                                           \
1097
0
            break;                                                                                                                 \
1098
0
        default:                                                                                                                   \
1099
0
            h2o_fatal("unexpected stream type");                                                                                   \
1100
0
            break;                                                                                                                 \
1101
32.1k
        }                                                                                                                          \
1102
32.1k
        break
1103
        /* clang-format off */
1104
        /*   +-------------------------+-------------+-------------+
1105
         *   |                         | req-stream  | ctrl-stream |
1106
         *   |          frame          +------+------+------+------+
1107
         *   |                         |client|server|client|server|
1108
         *   +-------------------------+------+------+------+------+ */
1109
26.0k
        FRAME( DATA                    ,    1 ,    1 ,    0 ,    0 );
1110
6.12k
        FRAME( HEADERS                 ,    1 ,    1 ,    0 ,    0 );
1111
1
        FRAME( CANCEL_PUSH             ,    0 ,    0 ,    1 ,    1 );
1112
3
        FRAME( SETTINGS                ,    0 ,    0 ,    1 ,    1 );
1113
3
        FRAME( PUSH_PROMISE            ,    0 ,    1 ,    0 ,    0 );
1114
3
        FRAME( GOAWAY                  ,    0 ,    0 ,    1 ,    1 );
1115
1
        FRAME( MAX_PUSH_ID             ,    0 ,    0 ,    1 ,    0 );
1116
1
        FRAME( PRIORITY_UPDATE_REQUEST ,    0 ,    0 ,    1 ,    0 );
1117
1
        FRAME( PRIORITY_UPDATE_PUSH    ,    0 ,    0 ,    1 ,    0 );
1118
        /*   +-------------------------+------+------+------+------+ */
1119
        /* clang-format on */
1120
1
#undef FRAME
1121
1.48k
    default:
1122
        /* ignore extension frames that we do not handle */
1123
1.48k
        goto Validation_Success;
1124
33.6k
    }
1125
11
    return H2O_HTTP3_ERROR_FRAME_UNEXPECTED;
1126
33.6k
Validation_Success:;
1127
1128
33.6k
    *_src = src;
1129
33.6k
    return 0;
1130
33.6k
}
1131
1132
void h2o_quic_init_context(h2o_quic_ctx_t *ctx, h2o_loop_t *loop, h2o_socket_t *sock, h2o_socket_t *sock_alt_family,
1133
                           quicly_context_t *quic, quicly_cid_plaintext_t *next_cid, h2o_quic_accept_cb acceptor,
1134
                           h2o_quic_notify_connection_update_cb notify_conn_update, uint8_t use_gso, h2o_quic_stats_t *quic_stats)
1135
0
{
1136
0
    assert(quic->stream_open != NULL);
1137
1138
0
    *ctx = (h2o_quic_ctx_t){
1139
0
        .loop = loop,
1140
0
        .quic = quic,
1141
0
        .next_cid = next_cid,
1142
0
        .conns_by_id = kh_init_h2o_quic_idmap(),
1143
0
        .conns_accepting = kh_init_h2o_quic_acceptmap(),
1144
0
        .notify_conn_update = notify_conn_update,
1145
0
        .acceptor = acceptor,
1146
0
        .use_gso = use_gso,
1147
0
        .quic_stats = quic_stats,
1148
0
    };
1149
0
    setup_quic_socket(ctx, &ctx->sock, sock);
1150
0
    if (sock_alt_family != NULL) {
1151
0
        setup_quic_socket(ctx, &ctx->sock_alt_family, sock_alt_family);
1152
0
    }
1153
0
}
1154
1155
void h2o_quic_dispose_context(h2o_quic_ctx_t *ctx)
1156
0
{
1157
0
    assert(kh_size(ctx->conns_by_id) == 0);
1158
0
    assert(kh_size(ctx->conns_accepting) == 0);
1159
1160
0
    h2o_socket_close(ctx->sock.sock);
1161
0
    if (ctx->sock_alt_family.sock != NULL)
1162
0
        h2o_socket_close(ctx->sock_alt_family.sock);
1163
0
    kh_destroy_h2o_quic_idmap(ctx->conns_by_id);
1164
0
    kh_destroy_h2o_quic_acceptmap(ctx->conns_accepting);
1165
0
}
1166
1167
void h2o_quic_set_forwarding_context(h2o_quic_ctx_t *ctx, uint32_t accept_thread_divisor, uint8_t ttl,
1168
                                     h2o_quic_forward_packets_cb forward_cb, h2o_quic_preprocess_packet_cb preprocess_cb)
1169
0
{
1170
0
    ctx->accept_thread_divisor = accept_thread_divisor;
1171
0
    ctx->forward_packets = forward_cb;
1172
0
    ctx->default_ttl = ttl;
1173
0
    ctx->preprocess_packet = preprocess_cb;
1174
0
}
1175
1176
void h2o_quic_close_connection(h2o_quic_conn_t *conn, quicly_error_t err, const char *reason_phrase)
1177
4.23k
{
1178
4.23k
    switch (quicly_get_state(conn->quic)) {
1179
0
    case QUICLY_STATE_FIRSTFLIGHT: /* FIXME why is this separate? */
1180
0
        conn->callbacks->destroy_connection(conn);
1181
0
        break;
1182
4.23k
    case QUICLY_STATE_CONNECTED:
1183
4.23k
        quicly_close(conn->quic, err, reason_phrase);
1184
4.23k
        h2o_quic_schedule_timer(conn);
1185
4.23k
        break;
1186
0
    default:
1187
        /* only need to wait for the socket close */
1188
0
        break;
1189
4.23k
    }
1190
4.23k
}
1191
1192
void h2o_quic_close_all_connections(h2o_quic_ctx_t *ctx)
1193
0
{
1194
0
    h2o_quic_conn_t *conn;
1195
1196
0
    kh_foreach_value(ctx->conns_by_id, conn, { h2o_quic_close_connection(conn, 0, NULL); });
1197
    /* closing a connection should also remove an entry from conns_accepting */
1198
0
    assert(kh_size(ctx->conns_accepting) == 0);
1199
0
}
1200
1201
size_t h2o_quic_num_connections(h2o_quic_ctx_t *ctx)
1202
0
{
1203
    /* throughout its lifetime, a connection is always registered to both conns_by_id and conns_accepting,
1204
       thus counting conns_by_id is enough */
1205
0
    return kh_size(ctx->conns_by_id);
1206
0
}
1207
1208
void h2o_quic_init_conn(h2o_quic_conn_t *conn, h2o_quic_ctx_t *ctx, const h2o_quic_conn_callbacks_t *callbacks)
1209
6.64k
{
1210
6.64k
    *conn = (h2o_quic_conn_t){ctx, NULL, callbacks};
1211
6.64k
    h2o_timer_init(&conn->_timeout, on_timeout);
1212
6.64k
}
1213
1214
void h2o_quic_dispose_conn(h2o_quic_conn_t *conn)
1215
6.64k
{
1216
6.64k
    if (conn->quic != NULL) {
1217
6.64k
        khiter_t iter;
1218
        /* unregister from maps */
1219
6.64k
        if ((iter = kh_get_h2o_quic_idmap(conn->ctx->conns_by_id, quicly_get_master_id(conn->quic)->master_id)) !=
1220
6.64k
            kh_end(conn->ctx->conns_by_id))
1221
6.64k
            kh_del_h2o_quic_idmap(conn->ctx->conns_by_id, iter);
1222
6.64k
        drop_from_acceptmap(conn->ctx, conn);
1223
6.64k
        quicly_free(conn->quic);
1224
6.64k
    }
1225
6.64k
    h2o_timer_unlink(&conn->_timeout);
1226
6.64k
}
1227
1228
void h2o_quic_setup(h2o_quic_conn_t *conn, quicly_conn_t *quic)
1229
6.64k
{
1230
    /* Setup relation between `h2o_quic_conn_t` and `quicly_conn_t`. At this point, `conn` will not have `quic` associated, though
1231
     * the back pointer might have alreday been set up (see how we call `quicly_accept`). */
1232
6.64k
    assert(conn->quic == NULL);
1233
6.64k
    void **backptr = quicly_get_data(quic);
1234
6.64k
    if (*backptr == NULL) {
1235
6.64k
        *backptr = conn;
1236
6.64k
    } else {
1237
0
        assert(*backptr == conn);
1238
0
    }
1239
6.64k
    conn->quic = quic;
1240
1241
    /* register to the idmap */
1242
6.64k
    int r;
1243
6.64k
    khiter_t iter = kh_put_h2o_quic_idmap(conn->ctx->conns_by_id, quicly_get_master_id(conn->quic)->master_id, &r);
1244
6.64k
    assert(iter != kh_end(conn->ctx->conns_by_id));
1245
6.64k
    kh_val(conn->ctx->conns_by_id, iter) = conn;
1246
6.64k
}
1247
1248
void h2o_http3_init_conn(h2o_http3_conn_t *conn, h2o_quic_ctx_t *ctx, const h2o_http3_conn_callbacks_t *callbacks,
1249
                         const h2o_http3_qpack_context_t *qpack_ctx, size_t max_frame_payload_size)
1250
6.64k
{
1251
6.64k
    h2o_quic_init_conn(&conn->super, ctx, &callbacks->super);
1252
6.64k
    memset((char *)conn + sizeof(conn->super), 0, sizeof(*conn) - sizeof(conn->super));
1253
6.64k
    conn->qpack.ctx = qpack_ctx;
1254
6.64k
    conn->max_frame_payload_size = max_frame_payload_size;
1255
6.64k
}
1256
1257
void h2o_http3_dispose_conn(h2o_http3_conn_t *conn)
1258
6.64k
{
1259
6.64k
    if (conn->qpack.dec != NULL)
1260
6.64k
        h2o_qpack_destroy_decoder(conn->qpack.dec);
1261
6.64k
    if (conn->qpack.enc != NULL)
1262
0
        h2o_qpack_destroy_encoder(conn->qpack.enc);
1263
6.64k
    h2o_quic_dispose_conn(&conn->super);
1264
6.64k
}
1265
1266
static size_t build_firstflight(h2o_http3_conn_t *conn, uint8_t *bytebuf, size_t capacity)
1267
6.64k
{
1268
6.64k
    ptls_buffer_t buf;
1269
6.64k
    int ret = 0;
1270
1271
6.64k
    ptls_buffer_init(&buf, bytebuf, capacity);
1272
1273
    /* push stream type */
1274
6.64k
    ptls_buffer_push_quicint(&buf, H2O_HTTP3_STREAM_TYPE_CONTROL);
1275
1276
    /* push SETTINGS frame */
1277
6.64k
    ptls_buffer_push_quicint(&buf, H2O_HTTP3_FRAME_TYPE_SETTINGS);
1278
6.64k
    ptls_buffer_push_block(&buf, -1, {
1279
6.64k
        quicly_context_t *qctx = quicly_get_context(conn->super.quic);
1280
6.64k
        if (qctx->transport_params.max_datagram_frame_size != 0) {
1281
            // advertise that we are prepared to receive both RFC and draft-03 datagram formats
1282
6.64k
            ptls_buffer_push_quicint(&buf, H2O_HTTP3_SETTINGS_H3_DATAGRAM);
1283
6.64k
            ptls_buffer_push_quicint(&buf, 1);
1284
6.64k
            ptls_buffer_push_quicint(&buf, H2O_HTTP3_SETTINGS_H3_DATAGRAM_DRAFT03);
1285
6.64k
            ptls_buffer_push_quicint(&buf, 1);
1286
6.64k
        };
1287
6.64k
        ptls_buffer_push_quicint(&buf, H2O_HTTP3_SETTINGS_ENABLE_CONNECT_PROTOCOL);
1288
6.64k
        ptls_buffer_push_quicint(&buf, 1);
1289
6.64k
    });
1290
1291
6.64k
    assert(!buf.is_allocated);
1292
6.64k
    return buf.off;
1293
1294
0
Exit:
1295
0
    h2o_fatal("unreachable");
1296
6.64k
}
1297
1298
quicly_error_t h2o_http3_setup(h2o_http3_conn_t *conn, quicly_conn_t *quic)
1299
6.64k
{
1300
6.64k
    quicly_error_t ret;
1301
1302
6.64k
    h2o_quic_setup(&conn->super, quic);
1303
6.64k
    conn->state = H2O_HTTP3_CONN_STATE_OPEN;
1304
1305
    /* setup h3 objects, only when the connection state has been created */
1306
6.64k
    if (quicly_get_state(quic) > QUICLY_STATE_CONNECTED)
1307
0
        goto Exit;
1308
1309
    /* create decoder with the table size set to zero; see SETTINGS sent below. */
1310
6.64k
    conn->qpack.dec = h2o_qpack_create_decoder(0, 100 /* FIXME */);
1311
1312
6.64k
    { /* open control streams, send SETTINGS */
1313
6.64k
        uint8_t firstflight[32];
1314
6.64k
        size_t firstflight_len = build_firstflight(conn, firstflight, sizeof(firstflight));
1315
6.64k
        if ((ret = open_egress_unistream(conn, &conn->_control_streams.egress.control,
1316
6.64k
                                         h2o_iovec_init(firstflight, firstflight_len))) != 0)
1317
0
            return ret;
1318
6.64k
    }
1319
1320
6.64k
    { /* open QPACK encoder & decoder streams */
1321
6.64k
        static const uint8_t encoder_first_flight[] = {H2O_HTTP3_STREAM_TYPE_QPACK_ENCODER};
1322
6.64k
        static const uint8_t decoder_first_flight[] = {H2O_HTTP3_STREAM_TYPE_QPACK_DECODER};
1323
6.64k
        if ((ret = open_egress_unistream(conn, &conn->_control_streams.egress.qpack_encoder,
1324
6.64k
                                         h2o_iovec_init(encoder_first_flight, sizeof(encoder_first_flight)))) != 0 ||
1325
6.64k
            (ret = open_egress_unistream(conn, &conn->_control_streams.egress.qpack_decoder,
1326
6.64k
                                         h2o_iovec_init(decoder_first_flight, sizeof(decoder_first_flight)))) != 0)
1327
0
            return ret;
1328
6.64k
    }
1329
1330
6.64k
Exit:
1331
6.64k
    h2o_quic_schedule_timer(&conn->super);
1332
6.64k
    return 0;
1333
6.64k
}
1334
1335
quicly_error_t h2o_quic_send(h2o_quic_conn_t *conn)
1336
25.2k
{
1337
25.2k
    quicly_address_t dest, src;
1338
25.2k
    struct iovec datagrams[10];
1339
25.2k
    size_t num_datagrams = PTLS_ELEMENTSOF(datagrams);
1340
25.2k
    uint8_t datagram_buf[1500 * PTLS_ELEMENTSOF(datagrams)];
1341
1342
25.2k
    quicly_error_t ret = quicly_send(conn->quic, &dest, &src, datagrams, &num_datagrams, datagram_buf, sizeof(datagram_buf));
1343
25.2k
    switch (ret) {
1344
18.6k
    case 0:
1345
18.6k
        if (num_datagrams != 0 &&
1346
0
            !h2o_quic_send_datagrams(conn->ctx, &dest, &src, datagrams, num_datagrams, quicly_send_get_ecn_bits(conn->quic))) {
1347
            /* FIXME close the connection immediately */
1348
0
            break;
1349
0
        }
1350
18.6k
        break;
1351
18.6k
    case QUICLY_ERROR_STATE_EXHAUSTION:
1352
6.64k
    case QUICLY_ERROR_FREE_CONNECTION:
1353
6.64k
        conn->callbacks->destroy_connection(conn);
1354
6.64k
        return 0;
1355
0
    default:
1356
0
        h2o_fatal("quicly_send returned %" PRId64, ret);
1357
25.2k
    }
1358
1359
18.6k
    h2o_quic_schedule_timer(conn);
1360
1361
18.6k
    return 1;
1362
25.2k
}
1363
1364
void h2o_http3_update_recvbuf(h2o_buffer_t **buf, size_t off, const void *src, size_t len)
1365
6.64k
{
1366
6.64k
    size_t new_size = off + len;
1367
1368
6.64k
    if ((*buf)->size < new_size) {
1369
6.64k
        h2o_buffer_reserve(buf, new_size - (*buf)->size);
1370
6.64k
        (*buf)->size = new_size;
1371
6.64k
    }
1372
6.64k
    memcpy((*buf)->bytes + off, src, len);
1373
6.64k
}
1374
1375
void h2o_quic_schedule_timer(h2o_quic_conn_t *conn)
1376
33.6k
{
1377
33.6k
    int64_t timeout = quicly_get_first_timeout(conn->quic);
1378
33.6k
    if (h2o_timer_is_linked(&conn->_timeout)) {
1379
#if !H2O_USE_LIBUV /* optimization to skip registering a timer specifying the same time */
1380
15.0k
        if (timeout == conn->_timeout.expire_at)
1381
14.9k
            return;
1382
91
#endif
1383
91
        h2o_timer_unlink(&conn->_timeout);
1384
91
    }
1385
18.7k
    uint64_t now = h2o_now(conn->ctx->loop), delay = now < timeout ? timeout - now : 0;
1386
18.7k
    h2o_timer_link(conn->ctx->loop, delay, &conn->_timeout);
1387
18.7k
}
1388
1389
int h2o_http3_handle_settings_frame(h2o_http3_conn_t *conn, const uint8_t *payload, size_t length, const char **err_desc)
1390
0
{
1391
0
    const uint8_t *src = payload, *src_end = src + length;
1392
0
    uint32_t header_table_size = 0;
1393
0
    uint64_t blocked_streams = 0;
1394
1395
0
    assert(!h2o_http3_has_received_settings(conn));
1396
1397
0
    while (src != src_end) {
1398
0
        uint64_t id;
1399
0
        uint64_t value;
1400
0
        if ((id = quicly_decodev(&src, src_end)) == UINT64_MAX)
1401
0
            goto Malformed;
1402
0
        if ((value = quicly_decodev(&src, src_end)) == UINT64_MAX)
1403
0
            goto Malformed;
1404
0
        switch (id) {
1405
0
        case H2O_HTTP3_SETTINGS_MAX_FIELD_SECTION_SIZE:
1406
0
            conn->peer_settings.max_field_section_size = value;
1407
0
            break;
1408
0
        case H2O_HTTP3_SETTINGS_QPACK_MAX_TABLE_CAPACITY:
1409
0
            header_table_size =
1410
0
                value < conn->qpack.ctx->encoder_table_capacity ? (uint32_t)value : conn->qpack.ctx->encoder_table_capacity;
1411
0
            break;
1412
0
        case H2O_HTTP3_SETTINGS_QPACK_BLOCKED_STREAMS:
1413
0
            blocked_streams = value;
1414
0
            break;
1415
0
        case H2O_HTTP3_SETTINGS_H3_DATAGRAM:
1416
0
        case H2O_HTTP3_SETTINGS_H3_DATAGRAM_DRAFT03:
1417
0
            switch (value) {
1418
0
            case 0:
1419
0
                break;
1420
0
            case 1: {
1421
0
                const quicly_transport_parameters_t *remote_tp = quicly_get_remote_transport_parameters(conn->super.quic);
1422
0
                if (remote_tp->max_datagram_frame_size == 0)
1423
0
                    goto Malformed;
1424
0
                conn->peer_settings.h3_datagram = 1;
1425
0
            } break;
1426
0
            default:
1427
0
                goto Malformed;
1428
0
            }
1429
0
            break;
1430
0
        default:
1431
0
            break;
1432
0
        }
1433
0
    }
1434
1435
0
    conn->qpack.enc = h2o_qpack_create_encoder(header_table_size, blocked_streams);
1436
0
    return 0;
1437
0
Malformed:
1438
0
    *err_desc = "malformed SETTINGS frame";
1439
0
    return H2O_HTTP3_ERROR_FRAME;
1440
0
}
1441
1442
void h2o_http3_send_qpack_stream_cancel(h2o_http3_conn_t *conn, quicly_stream_id_t stream_id)
1443
0
{
1444
0
    struct st_h2o_http3_egress_unistream_t *stream = conn->_control_streams.egress.qpack_decoder;
1445
1446
    /* allocate and write */
1447
0
    h2o_iovec_t buf = h2o_buffer_reserve(&stream->sendbuf, stream->sendbuf->size + H2O_HPACK_ENCODE_INT_MAX_LENGTH);
1448
0
    assert(buf.base != NULL);
1449
0
    stream->sendbuf->size += h2o_qpack_decoder_send_stream_cancel(conn->qpack.dec, (uint8_t *)buf.base, stream_id);
1450
1451
    /* notify the transport */
1452
0
    H2O_HTTP3_CHECK_SUCCESS(quicly_stream_sync_sendbuf(stream->quic, 1) == 0);
1453
0
}
1454
1455
void h2o_http3_send_qpack_header_ack(h2o_http3_conn_t *conn, const void *bytes, size_t len)
1456
0
{
1457
0
    struct st_h2o_http3_egress_unistream_t *stream = conn->_control_streams.egress.qpack_encoder;
1458
1459
0
    assert(stream != NULL);
1460
0
    h2o_buffer_append(&stream->sendbuf, bytes, len);
1461
0
    H2O_HTTP3_CHECK_SUCCESS(quicly_stream_sync_sendbuf(stream->quic, 1));
1462
0
}
1463
1464
void h2o_http3_send_shutdown_goaway_frame(h2o_http3_conn_t *conn)
1465
0
{
1466
    /* There is a moment where the transport-level close has been initiated while st_h2o_http3_server_conn_t remains.
1467
     * Check QUIC connection state to skip sending GOAWAY in such a case. */
1468
0
    if (conn->state < H2O_HTTP3_CONN_STATE_HALF_CLOSED && quicly_get_state(conn->super.quic) == QUICLY_STATE_CONNECTED) {
1469
        /* advertise the maximum stream ID to indicate that we will no longer accept new requests.
1470
         * HTTP/3 draft section 5.2.8 --
1471
         * "An endpoint that is attempting to gracefully shut down a connection can send a GOAWAY frame with a value set to the
1472
         * maximum possible value (2^62-4 for servers, 2^62-1 for clients). This ensures that the peer stops creating new
1473
         * requests or pushes." */
1474
0
        h2o_http3_send_goaway_frame(conn, (UINT64_C(1) << 62) - 4);
1475
0
    }
1476
0
}
1477
1478
void h2o_http3_send_goaway_frame(h2o_http3_conn_t *conn, uint64_t stream_or_push_id)
1479
0
{
1480
0
    size_t cap = h2o_http3_goaway_frame_capacity(stream_or_push_id);
1481
0
    h2o_iovec_t alloced = h2o_buffer_reserve(&conn->_control_streams.egress.control->sendbuf, cap);
1482
0
    h2o_http3_encode_goaway_frame((uint8_t *)alloced.base, stream_or_push_id);
1483
0
    conn->_control_streams.egress.control->sendbuf->size += cap;
1484
0
    quicly_stream_sync_sendbuf(conn->_control_streams.egress.control->quic, 1);
1485
0
}
1486
1487
void h2o_http3_send_h3_datagrams(h2o_http3_conn_t *conn, uint64_t flow_id, h2o_iovec_t *datagrams, size_t num_datagrams)
1488
0
{
1489
0
    for (size_t i = 0; i < num_datagrams; ++i) {
1490
0
        h2o_iovec_t *src = datagrams + i;
1491
0
        uint8_t buf[quicly_encodev_capacity(flow_id) + src->len], *p = buf;
1492
0
        p = quicly_encodev(p, flow_id);
1493
0
        memcpy(p, src->base, src->len);
1494
0
        p += src->len;
1495
0
        ptls_iovec_t payload = ptls_iovec_init(buf, p - buf);
1496
0
        quicly_send_datagram_frames(conn->super.quic, &payload, 1);
1497
0
    }
1498
1499
0
    h2o_quic_schedule_timer(&conn->super);
1500
0
}
1501
1502
uint64_t h2o_http3_decode_h3_datagram(h2o_iovec_t *payload, const void *_src, size_t len)
1503
0
{
1504
0
    const uint8_t *src = _src, *end = src + len;
1505
0
    uint64_t flow_id;
1506
1507
0
    if ((flow_id = ptls_decode_quicint(&src, end)) != UINT64_MAX)
1508
0
        *payload = h2o_iovec_init(src, end - src);
1509
0
    return flow_id;
1510
0
}