Coverage Report

Created: 2025-07-12 06:34

/src/h2o/lib/common/socket.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2015 DeNA Co., Ltd., Kazuho Oku, Justin Zhu
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a copy
5
 * of this software and associated documentation files (the "Software"), to
6
 * deal in the Software without restriction, including without limitation the
7
 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8
 * sell copies of the Software, and to permit persons to whom the Software is
9
 * furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice shall be included in
12
 * all copies or substantial portions of the Software.
13
 *
14
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
20
 * IN THE SOFTWARE.
21
 */
22
#include <errno.h>
23
#include <fcntl.h>
24
#include <inttypes.h>
25
#include <limits.h>
26
#include <netdb.h>
27
#include <netinet/in.h>
28
#include <netinet/tcp.h>
29
#include <string.h>
30
#include <sys/syscall.h>
31
#include <sys/un.h>
32
#include <unistd.h>
33
#include <openssl/err.h>
34
#if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__)
35
#include <sys/ioctl.h>
36
#endif
37
#include "picotls.h"
38
#if H2O_USE_FUSION
39
#include "picotls/fusion.h"
40
#endif
41
#include "quicly.h"
42
#include "h2o/socket.h"
43
#include "h2o/multithread.h"
44
#include "../probes_.h"
45
46
#if defined(__APPLE__) && defined(__clang__)
47
#pragma clang diagnostic ignored "-Wdeprecated-declarations"
48
#endif
49
50
#ifndef IOV_MAX
51
#define IOV_MAX UIO_MAXIOV
52
#endif
53
54
/* kernel-headers bundled with Ubuntu 14.04 does not have the constant defined in netinet/tcp.h */
55
#if defined(__linux__) && !defined(TCP_NOTSENT_LOWAT)
56
#define TCP_NOTSENT_LOWAT 25
57
#endif
58
59
#define OPENSSL_HOSTNAME_VALIDATION_LINKAGE static
60
#pragma GCC diagnostic push
61
#pragma GCC diagnostic ignored "-Wpragmas"
62
#pragma GCC diagnostic ignored "-Wshorten-64-to-32"
63
#include "../../deps/ssl-conservatory/openssl/openssl_hostname_validation.c"
64
#pragma GCC diagnostic pop
65
66
#define SOCKET_PROBE(label, sock, ...) H2O_PROBE(SOCKET_##label, sock, __VA_ARGS__)
67
68
struct st_h2o_socket_ssl_t {
69
    SSL_CTX *ssl_ctx;
70
    SSL *ossl;
71
    ptls_t *ptls;
72
    enum {
73
        H2O_SOCKET_SSL_OFFLOAD_NONE,
74
        H2O_SOCKET_SSL_OFFLOAD_ON,
75
        H2O_SOCKET_SSL_OFFLOAD_TBD,
76
    } offload;
77
    int *did_write_in_read; /* used for detecting and closing the connection upon renegotiation (FIXME implement renegotiation) */
78
    size_t record_overhead;
79
    struct {
80
        uint64_t send_finished_iv; /* UINT64_MAX if not available */
81
        struct {
82
            uint8_t type;
83
            uint16_t length;
84
        } last_received[2];
85
    } tls12_record_layer;
86
    struct {
87
        h2o_socket_cb cb;
88
        union {
89
            struct {
90
                struct {
91
                    enum {
92
                        ASYNC_RESUMPTION_STATE_COMPLETE = 0, /* just pass thru */
93
                        ASYNC_RESUMPTION_STATE_RECORD,       /* record first input, restore SSL state if it changes to REQUEST_SENT
94
                                                              */
95
                        ASYNC_RESUMPTION_STATE_REQUEST_SENT  /* async request has been sent, and is waiting for response */
96
                    } state;
97
                    SSL_SESSION *session_data;
98
                } async_resumption;
99
            } server;
100
            struct {
101
                char *server_name;
102
                h2o_cache_t *session_cache;
103
                h2o_iovec_t session_cache_key;
104
                h2o_cache_hashcode_t session_cache_key_hash;
105
            } client;
106
        };
107
    } handshake;
108
    struct {
109
        h2o_buffer_t *encrypted;
110
    } input;
111
    /**
112
     * Pending TLS data to be sent.
113
     */
114
    struct {
115
        /**
116
         * This buffer is initialized when and only when pending data is stored. Otherwise, all the members are zero-cleared; see
117
         * `has_pending_ssl_data`.
118
         * To reduce the cost of repeated memory allocation, expansion, and release, this buffer points to a chunk of memory being
119
         * allocated from `h2o_socket_ssl_buffer_allocator` when initialized. Upon disposal, the memory chunk being used by this
120
         * buffer is returned to that memory pool, unless the chunk has been expanded. It is designed as such because sometimes it
121
         * is hard to limit the amount of TLS records being generated at once (who knows how large the server's handshake messages
122
         * will be, or when it has to send a KeyUpdate message?). But for most of the case, handshake messages will be smaller than
123
         * the default size (H2O_SOCKET_DEFAULT_SSL_BUFFER_SIZE), and application traffic will not cause expansion (see
124
         * * `generate_tls_records`). Therefore, the memory chunk will be recycled.
125
         */
126
        ptls_buffer_t buf;
127
        size_t pending_off;
128
        unsigned zerocopy_owned : 1;
129
        unsigned allocated_for_zerocopy : 1;
130
    } output;
131
    struct {
132
        unsigned inflight : 1;
133
        unsigned sock_is_closed : 1;
134
        ptls_buffer_t ptls_wbuf;
135
    } async;
136
};
137
138
struct st_h2o_ssl_context_t {
139
    SSL_CTX *ctx;
140
    const h2o_iovec_t *protocols;
141
    h2o_iovec_t _npn_list_of_protocols;
142
};
143
144
/**
145
 * Holds list of buffers to be retain until notified by the kernel.
146
 */
147
struct st_h2o_socket_zerocopy_buffers_t {
148
    void **bufs;
149
    size_t first, last, capacity;
150
    uint64_t first_counter;
151
};
152
153
/* backend functions */
154
static void init_write_buf(h2o_socket_t *sock, h2o_iovec_t *bufs, size_t bufcnt, size_t first_buf_written);
155
static void dispose_write_buf(h2o_socket_t *sock);
156
static void dispose_ssl_output_buffer(struct st_h2o_socket_ssl_t *ssl);
157
static int has_pending_ssl_bytes(struct st_h2o_socket_ssl_t *ssl);
158
static size_t generate_tls_records(h2o_socket_t *sock, h2o_iovec_t **bufs, size_t *bufcnt, size_t first_buf_written);
159
static void do_dispose_socket(h2o_socket_t *sock);
160
static void report_early_write_error(h2o_socket_t *sock);
161
static void do_write(h2o_socket_t *sock, h2o_iovec_t *bufs, size_t bufcnt);
162
static void do_read_start(h2o_socket_t *sock);
163
static void do_read_stop(h2o_socket_t *sock);
164
static int do_export(h2o_socket_t *_sock, h2o_socket_export_t *info);
165
static h2o_socket_t *do_import(h2o_loop_t *loop, h2o_socket_export_t *info);
166
static socklen_t get_peername_uncached(h2o_socket_t *sock, struct sockaddr *sa);
167
static socklen_t get_sockname_uncached(h2o_socket_t *sock, struct sockaddr *sa);
168
static int zerocopy_buffers_is_empty(struct st_h2o_socket_zerocopy_buffers_t *buffers);
169
static void zerocopy_buffers_dispose(struct st_h2o_socket_zerocopy_buffers_t *buffers);
170
static void zerocopy_buffers_push(struct st_h2o_socket_zerocopy_buffers_t *buffers, void *p);
171
static void *zerocopy_buffers_release(struct st_h2o_socket_zerocopy_buffers_t *buffers, uint64_t counter);
172
173
/* internal functions called from the backend */
174
static const char *decode_ssl_input(h2o_socket_t *sock);
175
static size_t flatten_sendvec(h2o_socket_t *sock, h2o_sendvec_t *sendvec);
176
static void on_write_complete(h2o_socket_t *sock, const char *err);
177
178
h2o_buffer_mmap_settings_t h2o_socket_buffer_mmap_settings = {
179
    32 * 1024 * 1024, /* 32MB, should better be greater than max frame size of HTTP2 for performance reasons */
180
    "/tmp/h2o.b.XXXXXX"};
181
182
h2o_buffer_prototype_t h2o_socket_buffer_prototype = {
183
    {H2O_SOCKET_INITIAL_INPUT_BUFFER_SIZE}, /* minimum initial capacity; actual initial size is ~8KB, see h2o_buffer_reserve */
184
    &h2o_socket_buffer_mmap_settings};
185
186
h2o_mem_recycle_conf_t h2o_socket_ssl_buffer_conf = {.memsize = H2O_SOCKET_DEFAULT_SSL_BUFFER_SIZE,
187
                                                     .align_bits =
188
#ifdef H2O_USE_FUSION
189
                                                         PTLS_X86_CACHE_LINE_ALIGN_BITS
190
#else
191
                                                         0
192
#endif
193
};
194
__thread h2o_mem_recycle_t h2o_socket_ssl_buffer_allocator = {&h2o_socket_ssl_buffer_conf};
195
__thread h2o_mem_recycle_t h2o_socket_zerocopy_buffer_allocator = {&h2o_socket_ssl_buffer_conf};
196
__thread size_t h2o_socket_num_zerocopy_buffers_inflight;
197
198
int h2o_socket_use_ktls = 0;
199
200
const char h2o_socket_error_out_of_memory[] = "out of memory";
201
const char h2o_socket_error_io[] = "I/O error";
202
const char h2o_socket_error_closed[] = "socket closed by peer";
203
const char h2o_socket_error_conn_fail[] = "connection failure";
204
const char h2o_socket_error_conn_refused[] = "connection refused";
205
const char h2o_socket_error_conn_timed_out[] = "connection timed out";
206
const char h2o_socket_error_network_unreachable[] = "network unreachable";
207
const char h2o_socket_error_host_unreachable[] = "host unreachable";
208
const char h2o_socket_error_socket_fail[] = "socket creation failed";
209
const char h2o_socket_error_ssl_no_cert[] = "no certificate";
210
const char h2o_socket_error_ssl_cert_invalid[] = "invalid certificate";
211
const char h2o_socket_error_ssl_cert_name_mismatch[] = "certificate name mismatch";
212
const char h2o_socket_error_ssl_decode[] = "SSL decode error";
213
const char h2o_socket_error_ssl_handshake[] = "ssl handshake failure";
214
215
static void (*resumption_get_async)(h2o_socket_t *sock, h2o_iovec_t session_id);
216
static void (*resumption_new)(h2o_socket_t *sock, h2o_iovec_t session_id, h2o_iovec_t session_data);
217
218
#if H2O_USE_LIBUV
219
#include "socket/uv-binding.c.h"
220
#else
221
#include "socket/evloop.c.h"
222
#endif
223
224
static int read_bio(BIO *b, char *out, int len)
225
0
{
226
0
    h2o_socket_t *sock = BIO_get_data(b);
227
228
0
    if (len == 0)
229
0
        return 0;
230
231
0
    if (sock->ssl->input.encrypted->size == 0) {
232
0
        BIO_set_retry_read(b);
233
0
        return -1;
234
0
    }
235
236
0
    if (len == 5 && sock->ssl->input.encrypted->size >= 5) {
237
0
        sock->ssl->tls12_record_layer.last_received[1] = sock->ssl->tls12_record_layer.last_received[0];
238
0
        sock->ssl->tls12_record_layer.last_received[0].type = sock->ssl->input.encrypted->bytes[0];
239
0
        sock->ssl->tls12_record_layer.last_received[0].length =
240
0
            ((sock->ssl->input.encrypted->bytes[3] & 0xff) << 8) | (sock->ssl->input.encrypted->bytes[4] & 0xff);
241
0
    }
242
243
0
    if (sock->ssl->input.encrypted->size < len) {
244
0
        len = (int)sock->ssl->input.encrypted->size;
245
0
    }
246
0
    memcpy(out, sock->ssl->input.encrypted->bytes, len);
247
0
    h2o_buffer_consume(&sock->ssl->input.encrypted, len);
248
249
0
    return len;
250
0
}
251
252
static void init_write_buf(h2o_socket_t *sock, h2o_iovec_t *bufs, size_t bufcnt, size_t first_buf_written)
253
0
{
254
    /* Use smallbufs or allocate slots. An additional slot is reserved at the end so that sendvec can be flattened there for
255
     * encryption. */
256
0
    if (bufcnt < PTLS_ELEMENTSOF(sock->_write_buf.smallbufs)) {
257
0
        sock->_write_buf.bufs = sock->_write_buf.smallbufs;
258
0
    } else {
259
0
        sock->_write_buf.bufs = h2o_mem_alloc(sizeof(sock->_write_buf.bufs[0]) * (bufcnt + 1));
260
0
        sock->_write_buf.alloced_ptr = sock->_write_buf.bufs;
261
0
    }
262
263
    /* Initialize the vector. */
264
0
    if (bufcnt != 0) {
265
0
        sock->_write_buf.bufs[0].base = bufs[0].base + first_buf_written;
266
0
        sock->_write_buf.bufs[0].len = bufs[0].len - first_buf_written;
267
0
        for (size_t i = 1; i < bufcnt; ++i)
268
0
            sock->_write_buf.bufs[i] = bufs[i];
269
0
    }
270
0
    sock->_write_buf.cnt = bufcnt;
271
0
}
272
273
static void dispose_write_buf(h2o_socket_t *sock)
274
13.2k
{
275
13.2k
    if (sock->_write_buf.smallbufs <= sock->_write_buf.bufs &&
276
13.2k
        sock->_write_buf.bufs <=
277
359
            sock->_write_buf.smallbufs + sizeof(sock->_write_buf.smallbufs) / sizeof(sock->_write_buf.smallbufs[0])) {
278
        /* no need to free */
279
12.9k
    } else {
280
12.9k
        free(sock->_write_buf.alloced_ptr);
281
12.9k
        sock->_write_buf.bufs = sock->_write_buf.smallbufs;
282
12.9k
    }
283
284
13.2k
    if (sock->_write_buf.flattened != NULL) {
285
0
        h2o_mem_free_recycle(&h2o_socket_ssl_buffer_allocator, sock->_write_buf.flattened);
286
0
        sock->_write_buf.flattened = NULL;
287
0
    }
288
13.2k
}
289
290
static void init_ssl_output_buffer(struct st_h2o_socket_ssl_t *ssl, int zerocopy)
291
0
{
292
0
    h2o_mem_recycle_t *allocator = zerocopy ? &h2o_socket_zerocopy_buffer_allocator : &h2o_socket_ssl_buffer_allocator;
293
0
    ptls_buffer_init(&ssl->output.buf, h2o_mem_alloc_recycle(allocator), allocator->conf->memsize);
294
0
    ssl->output.buf.is_allocated = 1; /* set to true, so that the allocated memory is freed when the buffer is expanded */
295
0
    ssl->output.buf.align_bits = allocator->conf->align_bits;
296
0
    ssl->output.pending_off = 0;
297
0
    ssl->output.zerocopy_owned = 0;
298
0
    ssl->output.allocated_for_zerocopy = zerocopy;
299
0
}
300
301
static void dispose_ssl_output_buffer(struct st_h2o_socket_ssl_t *ssl)
302
0
{
303
    /* The destruction logic that we have here are different from `ptls_buffer_dispose` in following two aspects:
304
     * - returns the allocated memory to the pool if possible
305
     * - does not zero-clear the memory (there's no need to, because the content is something to be sent in clear) */
306
307
0
    assert(ssl->output.buf.is_allocated);
308
309
0
    if (!ssl->output.zerocopy_owned) {
310
0
        h2o_mem_recycle_t *allocator =
311
0
            ssl->output.allocated_for_zerocopy ? &h2o_socket_zerocopy_buffer_allocator : &h2o_socket_ssl_buffer_allocator;
312
0
        if (ssl->output.buf.capacity == allocator->conf->memsize) {
313
0
            h2o_mem_free_recycle(allocator, ssl->output.buf.base);
314
0
        } else {
315
0
            free(ssl->output.buf.base);
316
0
        }
317
0
    }
318
0
    ssl->output.buf = (ptls_buffer_t){};
319
0
    ssl->output.pending_off = 0;
320
0
    ssl->output.zerocopy_owned = 0;
321
0
}
322
323
static int has_pending_ssl_bytes(struct st_h2o_socket_ssl_t *ssl)
324
65.9k
{
325
    /* for convenience, this function can be invoked for non-TLS connections too, in which case ssl will be NULL */
326
65.9k
    if (ssl == NULL)
327
65.9k
        return 0;
328
329
    /* the contract is that `dispose_ssl_output_buffer` is called immediately when all the data are written out */
330
0
    return ssl->output.buf.base != NULL;
331
65.9k
}
332
333
static void write_ssl_bytes(h2o_socket_t *sock, const void *in, size_t len)
334
0
{
335
0
    if (len != 0) {
336
0
        if (!has_pending_ssl_bytes(sock->ssl))
337
0
            init_ssl_output_buffer(sock->ssl, sock->_zerocopy != NULL);
338
0
        if (ptls_buffer_reserve(&sock->ssl->output.buf, len) != 0)
339
0
            h2o_fatal("no memory; tried to allocate %zu bytes", len);
340
0
        memcpy(sock->ssl->output.buf.base + sock->ssl->output.buf.off, in, len);
341
0
        sock->ssl->output.buf.off += len;
342
0
    }
343
0
}
344
345
static int write_bio(BIO *b, const char *in, int len)
346
0
{
347
0
    h2o_socket_t *sock = BIO_get_data(b);
348
349
    /* FIXME no support for SSL renegotiation (yet) */
350
0
    if (sock->ssl->did_write_in_read != NULL) {
351
0
        *sock->ssl->did_write_in_read = 1;
352
0
        return -1;
353
0
    }
354
355
    /* Record bytes where the explicit IV will exist within a TLS 1.2 Finished message. When migrating the connection to picotls,
356
     * Finished is going to be the last and the only encrypted record being sent by OpenSSL. We record that explicit IV and picotls
357
     * starts with that explicit IV incremented by 1. */
358
0
    if (len >= 45 && memcmp(in + len - 45, H2O_STRLIT("\x16\x03\x03\x00\x28")) == 0) {
359
0
        const uint8_t *p = (const uint8_t *)in + len - 40;
360
0
        sock->ssl->tls12_record_layer.send_finished_iv = quicly_decode64(&p);
361
0
    } else {
362
0
        sock->ssl->tls12_record_layer.send_finished_iv = UINT64_MAX;
363
0
    }
364
365
0
    write_ssl_bytes(sock, in, len);
366
0
    return len;
367
0
}
368
369
static int puts_bio(BIO *b, const char *str)
370
0
{
371
0
    return write_bio(b, str, (int)strlen(str));
372
0
}
373
374
static long ctrl_bio(BIO *b, int cmd, long num, void *ptr)
375
0
{
376
0
    switch (cmd) {
377
0
    case BIO_CTRL_GET_CLOSE:
378
0
        return BIO_get_shutdown(b);
379
0
    case BIO_CTRL_SET_CLOSE:
380
0
        BIO_set_shutdown(b, (int)num);
381
0
        return 1;
382
0
    case BIO_CTRL_FLUSH:
383
0
        return 1;
384
0
    default:
385
0
        return 0;
386
0
    }
387
0
}
388
389
static void setup_bio(h2o_socket_t *sock)
390
0
{
391
0
    static BIO_METHOD *volatile bio_methods = NULL;
392
0
    H2O_MULTITHREAD_ONCE({
393
0
        bio_methods = BIO_meth_new(BIO_TYPE_FD, "h2o_socket");
394
0
        BIO_meth_set_write(bio_methods, write_bio);
395
0
        BIO_meth_set_read(bio_methods, read_bio);
396
0
        BIO_meth_set_puts(bio_methods, puts_bio);
397
0
        BIO_meth_set_ctrl(bio_methods, ctrl_bio);
398
0
    });
399
400
0
    BIO *bio = BIO_new(bio_methods);
401
0
    if (bio == NULL)
402
0
        h2o_fatal("no memory");
403
0
    BIO_set_data(bio, sock);
404
0
    BIO_set_init(bio, 1);
405
0
    SSL_set_bio(sock->ssl->ossl, bio, bio);
406
0
}
407
408
const char *decode_ssl_input(h2o_socket_t *sock)
409
0
{
410
0
    assert(sock->ssl != NULL);
411
0
    assert(sock->ssl->handshake.cb == NULL);
412
413
0
    if (sock->ssl->ptls != NULL) {
414
0
        if (sock->ssl->input.encrypted->size != 0) {
415
0
            const char *src = sock->ssl->input.encrypted->bytes, *src_end = src + sock->ssl->input.encrypted->size;
416
0
            h2o_iovec_t reserved;
417
0
            ptls_buffer_t rbuf;
418
0
            int ret;
419
0
            if ((reserved = h2o_buffer_try_reserve(&sock->input, sock->ssl->input.encrypted->size)).base == NULL)
420
0
                return h2o_socket_error_out_of_memory;
421
0
            ptls_buffer_init(&rbuf, reserved.base, reserved.len);
422
0
            do {
423
0
                size_t consumed = src_end - src;
424
0
                if ((ret = ptls_receive(sock->ssl->ptls, &rbuf, src, &consumed)) != 0)
425
0
                    break;
426
0
                src += consumed;
427
0
            } while (src != src_end);
428
0
            h2o_buffer_consume(&sock->ssl->input.encrypted, sock->ssl->input.encrypted->size - (src_end - src));
429
0
            if (rbuf.is_allocated) {
430
0
                if ((reserved = h2o_buffer_try_reserve(&sock->input, rbuf.off)).base == NULL)
431
0
                    return h2o_socket_error_out_of_memory;
432
0
                memcpy(reserved.base, rbuf.base, rbuf.off);
433
0
                sock->input->size += rbuf.off;
434
0
                ptls_buffer_dispose(&rbuf);
435
0
            } else {
436
0
                sock->input->size += rbuf.off;
437
0
            }
438
0
            if (!(ret == 0 || ret == PTLS_ERROR_IN_PROGRESS))
439
0
                return h2o_socket_error_ssl_decode;
440
0
        }
441
0
        return NULL;
442
0
    }
443
444
0
    while (sock->ssl->input.encrypted->size != 0 || SSL_pending(sock->ssl->ossl)) {
445
0
        int rlen;
446
0
        h2o_iovec_t buf = h2o_buffer_try_reserve(&sock->input, 4096);
447
0
        if (buf.base == NULL)
448
0
            return h2o_socket_error_out_of_memory;
449
0
        { /* call SSL_read (while detecting SSL renegotiation and reporting it as error) */
450
0
            int did_write_in_read = 0;
451
0
            sock->ssl->did_write_in_read = &did_write_in_read;
452
0
            ERR_clear_error();
453
0
            rlen = SSL_read(sock->ssl->ossl, buf.base, (int)buf.len);
454
0
            sock->ssl->did_write_in_read = NULL;
455
0
            if (did_write_in_read)
456
0
                return "ssl renegotiation not supported";
457
0
        }
458
0
        if (rlen == -1) {
459
0
            if (SSL_get_error(sock->ssl->ossl, rlen) != SSL_ERROR_WANT_READ) {
460
0
                return h2o_socket_error_ssl_decode;
461
0
            }
462
0
            break;
463
0
        } else if (rlen == 0) {
464
0
            break;
465
0
        } else {
466
0
            sock->input->size += rlen;
467
0
        }
468
0
    }
469
470
0
    return 0;
471
0
}
472
473
static void flush_pending_ssl(h2o_socket_t *sock, h2o_socket_cb cb)
474
0
{
475
0
    sock->_cb.write = cb;
476
0
    do_write(sock, NULL, 0);
477
0
}
478
479
static void destroy_ssl(struct st_h2o_socket_ssl_t *ssl)
480
0
{
481
0
    assert(!ssl->async.inflight);
482
0
    assert(ssl->async.ptls_wbuf.base == NULL);
483
484
0
    if (ssl->ptls != NULL) {
485
0
        ptls_free(ssl->ptls);
486
0
        ssl->ptls = NULL;
487
0
    }
488
0
    if (ssl->ossl != NULL) {
489
0
        if (!SSL_is_server(ssl->ossl)) {
490
0
            free(ssl->handshake.client.server_name);
491
0
            free(ssl->handshake.client.session_cache_key.base);
492
0
        }
493
0
        SSL_free(ssl->ossl);
494
0
        ssl->ossl = NULL;
495
0
    }
496
0
    h2o_buffer_dispose(&ssl->input.encrypted);
497
0
    if (has_pending_ssl_bytes(ssl))
498
0
        dispose_ssl_output_buffer(ssl);
499
0
    free(ssl);
500
0
}
501
502
static void dispose_socket(h2o_socket_t *sock, const char *err)
503
13.2k
{
504
13.2k
    void (*close_cb)(void *data);
505
13.2k
    void *close_cb_data;
506
507
13.2k
    if (sock->ssl != NULL) {
508
0
        destroy_ssl(sock->ssl);
509
0
        sock->ssl = NULL;
510
0
    }
511
13.2k
    h2o_buffer_dispose(&sock->input);
512
13.2k
    if (sock->_peername != NULL) {
513
1.02k
        free(sock->_peername);
514
1.02k
        sock->_peername = NULL;
515
1.02k
    }
516
13.2k
    if (sock->_sockname != NULL) {
517
0
        free(sock->_sockname);
518
0
        sock->_sockname = NULL;
519
0
    }
520
521
13.2k
    close_cb = sock->on_close.cb;
522
13.2k
    close_cb_data = sock->on_close.data;
523
524
13.2k
    do_dispose_socket(sock);
525
526
13.2k
    if (close_cb != NULL)
527
2.43k
        close_cb(close_cb_data);
528
13.2k
}
529
530
static void shutdown_ssl(h2o_socket_t *sock, const char *err)
531
0
{
532
0
    if (err != NULL)
533
0
        goto Close;
534
535
0
    if (sock->_cb.write != NULL) {
536
        /* note: libuv calls the write callback after the socket is closed by uv_close (with status set to 0 if the write succeeded)
537
         */
538
0
        sock->_cb.write = NULL;
539
0
        goto Close;
540
0
    }
541
542
    /* at the moment, we do not send Close Notify Alert when kTLS is used (TODO) */
543
0
    if (sock->ssl->offload == H2O_SOCKET_SSL_OFFLOAD_ON)
544
0
        goto Close;
545
546
    /* send Close Notify if necessary, depending on each TLS stack being used */
547
0
    if (sock->ssl->ptls != NULL) {
548
0
        ptls_buffer_t wbuf;
549
0
        uint8_t wbuf_small[32];
550
0
        ptls_buffer_init(&wbuf, wbuf_small, sizeof(wbuf_small));
551
0
        if (ptls_send_alert(sock->ssl->ptls, &wbuf, PTLS_ALERT_LEVEL_WARNING, PTLS_ALERT_CLOSE_NOTIFY) != 0)
552
0
            goto Close;
553
0
        write_ssl_bytes(sock, wbuf.base, wbuf.off);
554
0
        ptls_buffer_dispose(&wbuf);
555
0
    } else if (sock->ssl->ossl != NULL) {
556
0
        ERR_clear_error();
557
0
        if (SSL_shutdown(sock->ssl->ossl) == -1)
558
0
            goto Close;
559
0
    } else {
560
0
        goto Close;
561
0
    }
562
563
0
    if (has_pending_ssl_bytes(sock->ssl)) {
564
0
        h2o_socket_read_stop(sock);
565
0
        flush_pending_ssl(sock, dispose_socket);
566
0
        return;
567
0
    }
568
569
0
Close:
570
0
    dispose_socket(sock, err);
571
0
}
572
573
void h2o_socket_dispose_export(h2o_socket_export_t *info)
574
0
{
575
0
    assert(info->fd != -1);
576
0
    if (info->ssl != NULL) {
577
0
        destroy_ssl(info->ssl);
578
0
        info->ssl = NULL;
579
0
    }
580
0
    h2o_buffer_dispose(&info->input);
581
0
    close(info->fd);
582
0
    info->fd = -1;
583
0
}
584
585
int h2o_socket_export(h2o_socket_t *sock, h2o_socket_export_t *info)
586
0
{
587
0
    static h2o_buffer_prototype_t nonpooling_prototype;
588
589
0
    assert(sock->_zerocopy == NULL);
590
0
    assert(!h2o_socket_is_writing(sock));
591
0
    assert(sock->ssl == NULL || !sock->ssl->async.inflight);
592
593
0
    if (do_export(sock, info) == -1)
594
0
        return -1;
595
596
0
    if ((info->ssl = sock->ssl) != NULL) {
597
0
        sock->ssl = NULL;
598
0
        h2o_buffer_set_prototype(&info->ssl->input.encrypted, &nonpooling_prototype);
599
0
    }
600
0
    info->input = sock->input;
601
0
    h2o_buffer_set_prototype(&info->input, &nonpooling_prototype);
602
0
    h2o_buffer_init(&sock->input, &h2o_socket_buffer_prototype);
603
604
0
    h2o_socket_close(sock);
605
606
0
    return 0;
607
0
}
608
609
h2o_socket_t *h2o_socket_import(h2o_loop_t *loop, h2o_socket_export_t *info)
610
0
{
611
0
    h2o_socket_t *sock;
612
613
0
    assert(info->fd != -1);
614
615
0
    sock = do_import(loop, info);
616
0
    info->fd = -1; /* just in case */
617
0
    if ((sock->ssl = info->ssl) != NULL) {
618
0
        setup_bio(sock);
619
0
        h2o_buffer_set_prototype(&sock->ssl->input.encrypted, &h2o_socket_buffer_prototype);
620
0
    }
621
0
    sock->input = info->input;
622
0
    h2o_buffer_set_prototype(&sock->input, &h2o_socket_buffer_prototype);
623
0
    return sock;
624
0
}
625
626
void h2o_socket_close(h2o_socket_t *sock)
627
13.2k
{
628
13.2k
    if (sock->ssl == NULL) {
629
13.2k
        dispose_socket(sock, 0);
630
13.2k
    } else {
631
0
        if (sock->ssl->async.inflight) {
632
0
            sock->ssl->async.sock_is_closed = 1;
633
0
            return;
634
0
        }
635
0
        shutdown_ssl(sock, 0);
636
0
    }
637
13.2k
}
638
639
static uint16_t calc_suggested_tls_payload_size(h2o_socket_t *sock, uint16_t suggested_tls_record_size)
640
0
{
641
0
    uint16_t ps = suggested_tls_record_size;
642
0
    if (sock->ssl != NULL && sock->ssl->record_overhead < ps)
643
0
        ps -= sock->ssl->record_overhead;
644
0
    return ps;
645
0
}
646
647
static void disable_latency_optimized_write(h2o_socket_t *sock, int (*adjust_notsent_lowat)(h2o_socket_t *, unsigned))
648
3.06k
{
649
3.06k
    if (sock->_latency_optimization.notsent_is_minimized) {
650
0
        adjust_notsent_lowat(sock, 0);
651
0
        sock->_latency_optimization.notsent_is_minimized = 0;
652
0
    }
653
3.06k
    sock->_latency_optimization.state = H2O_SOCKET_LATENCY_OPTIMIZATION_STATE_DISABLED;
654
3.06k
    sock->_latency_optimization.suggested_tls_payload_size = SIZE_MAX;
655
3.06k
    sock->_latency_optimization.suggested_write_size = SIZE_MAX;
656
3.06k
}
657
658
static inline void prepare_for_latency_optimized_write(h2o_socket_t *sock,
659
                                                       const h2o_socket_latency_optimization_conditions_t *conditions, uint32_t rtt,
660
                                                       uint32_t mss, uint32_t cwnd_size, uint32_t cwnd_avail, uint64_t loop_time,
661
                                                       int (*adjust_notsent_lowat)(h2o_socket_t *, unsigned))
662
0
{
663
    /* check RTT */
664
0
    if (rtt < conditions->min_rtt * (uint64_t)1000)
665
0
        goto Disable;
666
0
    if (rtt * conditions->max_additional_delay < loop_time * 1000 * 100)
667
0
        goto Disable;
668
669
    /* latency-optimization is enabled */
670
0
    sock->_latency_optimization.state = H2O_SOCKET_LATENCY_OPTIMIZATION_STATE_DETERMINED;
671
672
    /* no need to:
673
     *   1) adjust the write size if single_write_size << cwnd_size
674
     *   2) align TLS record boundary to TCP packet boundary if packet loss-rate is low and BW isn't small (implied by cwnd size)
675
     */
676
0
    if (mss * cwnd_size < conditions->max_cwnd) {
677
0
        if (!sock->_latency_optimization.notsent_is_minimized) {
678
0
            if (adjust_notsent_lowat(sock, 1 /* cannot be set to zero on Linux */) != 0)
679
0
                goto Disable;
680
0
            sock->_latency_optimization.notsent_is_minimized = 1;
681
0
        }
682
0
        sock->_latency_optimization.suggested_tls_payload_size = calc_suggested_tls_payload_size(sock, mss);
683
0
        sock->_latency_optimization.suggested_write_size =
684
0
            cwnd_avail * (size_t)sock->_latency_optimization.suggested_tls_payload_size;
685
0
    } else {
686
0
        if (sock->_latency_optimization.notsent_is_minimized) {
687
0
            if (adjust_notsent_lowat(sock, 0) != 0)
688
0
                goto Disable;
689
0
            sock->_latency_optimization.notsent_is_minimized = 0;
690
0
        }
691
0
        sock->_latency_optimization.suggested_tls_payload_size = SIZE_MAX;
692
0
        sock->_latency_optimization.suggested_write_size = SIZE_MAX;
693
0
    }
694
0
    return;
695
696
0
Disable:
697
0
    disable_latency_optimized_write(sock, adjust_notsent_lowat);
698
0
}
699
700
/**
701
 * Obtains RTT, MSS, size of CWND (in the number of packets).
702
 * Also writes to cwnd_avail minimum number of packets (of MSS size) sufficient to shut up poll-for-write under the precondition
703
 * that TCP_NOTSENT_LOWAT is set to 1.
704
 */
705
static int obtain_tcp_info(int fd, uint32_t *rtt, uint32_t *mss, uint32_t *cwnd_size, uint32_t *cwnd_avail)
706
3.06k
{
707
3.06k
#define CALC_CWND_PAIR_FROM_BYTE_UNITS(cwnd_bytes, inflight_bytes)                                                                 \
708
3.06k
    do {                                                                                                                           \
709
3.06k
        *cwnd_size = (cwnd_bytes + *mss / 2) / *mss;                                                                               \
710
3.06k
        *cwnd_avail = cwnd_bytes > inflight_bytes ? (cwnd_bytes - inflight_bytes) / *mss + 2 : 2;                                  \
711
3.06k
    } while (0)
712
713
3.06k
#if defined(__linux__) && defined(TCP_INFO)
714
715
3.06k
    struct tcp_info tcpi;
716
3.06k
    socklen_t tcpisz = sizeof(tcpi);
717
3.06k
    if (getsockopt(fd, IPPROTO_TCP, TCP_INFO, &tcpi, &tcpisz) != 0)
718
3.06k
        return -1;
719
0
    *rtt = tcpi.tcpi_rtt;
720
0
    *mss = tcpi.tcpi_snd_mss;
721
0
    *cwnd_size = tcpi.tcpi_snd_cwnd;
722
0
    *cwnd_avail = tcpi.tcpi_snd_cwnd > tcpi.tcpi_unacked ? tcpi.tcpi_snd_cwnd - tcpi.tcpi_unacked + 2 : 2;
723
0
    return 0;
724
725
#elif defined(__APPLE__) && defined(TCP_CONNECTION_INFO)
726
727
    struct tcp_connection_info tcpi;
728
    socklen_t tcpisz = sizeof(tcpi);
729
    if (getsockopt(fd, IPPROTO_TCP, TCP_CONNECTION_INFO, &tcpi, &tcpisz) != 0 || tcpi.tcpi_maxseg == 0)
730
        return -1;
731
    *rtt = tcpi.tcpi_srtt * 1000;
732
    *mss = tcpi.tcpi_maxseg;
733
    CALC_CWND_PAIR_FROM_BYTE_UNITS(tcpi.tcpi_snd_cwnd, tcpi.tcpi_snd_sbbytes);
734
    return 0;
735
736
#else
737
738
    /* For other operating systems that do not have TCP_NOTSENT_LOWAT, it is meaningless to return information. Return -1 to disable
739
     * the low latency optimization. */
740
    return -1;
741
742
#endif
743
744
3.06k
#undef CALC_CWND_PAIR_FROM_BYTE_UNITS
745
3.06k
}
746
747
#ifdef TCP_NOTSENT_LOWAT
748
static int adjust_notsent_lowat(h2o_socket_t *sock, unsigned notsent_lowat)
749
0
{
750
0
    return setsockopt(h2o_socket_get_fd(sock), IPPROTO_TCP, TCP_NOTSENT_LOWAT, &notsent_lowat, sizeof(notsent_lowat));
751
0
}
752
#else
753
#define adjust_notsent_lowat NULL
754
#endif
755
756
size_t h2o_socket_do_prepare_for_latency_optimized_write(h2o_socket_t *sock,
757
                                                         const h2o_socket_latency_optimization_conditions_t *conditions)
758
3.06k
{
759
3.06k
    uint32_t rtt = 0, mss = 0, cwnd_size = 0, cwnd_avail = 0;
760
3.06k
    uint64_t loop_time = UINT64_MAX;
761
3.06k
    int can_prepare = 1;
762
763
#if !defined(TCP_NOTSENT_LOWAT)
764
    /* the feature cannot be setup unless TCP_NOTSENT_LOWAT is available */
765
    can_prepare = 0;
766
#endif
767
768
#if H2O_USE_LIBUV
769
    /* poll-then-write is impossible with libuv */
770
    can_prepare = 0;
771
#else
772
3.06k
    if (can_prepare)
773
3.06k
        loop_time = h2o_evloop_get_execution_time_millisec(h2o_socket_get_loop(sock));
774
3.06k
#endif
775
776
    /* obtain TCP states */
777
3.06k
    if (can_prepare && obtain_tcp_info(h2o_socket_get_fd(sock), &rtt, &mss, &cwnd_size, &cwnd_avail) != 0)
778
3.06k
        can_prepare = 0;
779
780
    /* determine suggested_write_size, suggested_tls_record_size and adjust TCP_NOTSENT_LOWAT based on the obtained information */
781
3.06k
    if (can_prepare) {
782
0
        prepare_for_latency_optimized_write(sock, conditions, rtt, mss, cwnd_size, cwnd_avail, loop_time, adjust_notsent_lowat);
783
3.06k
    } else {
784
3.06k
        disable_latency_optimized_write(sock, adjust_notsent_lowat);
785
3.06k
    }
786
787
3.06k
    return sock->_latency_optimization.suggested_write_size;
788
789
3.06k
#undef CALC_CWND_PAIR_FROM_BYTE_UNITS
790
3.06k
}
791
792
static size_t calc_tls_write_size(h2o_socket_t *sock, size_t bufsize)
793
0
{
794
0
    size_t recsize;
795
796
    /* set recsize to the maximum TLS record size by using the latency optimizer, or if the optimizer is not in action, based on the
797
     * number of bytes that have already been sent */
798
0
    switch (sock->_latency_optimization.state) {
799
0
    case H2O_SOCKET_LATENCY_OPTIMIZATION_STATE_TBD:
800
0
    case H2O_SOCKET_LATENCY_OPTIMIZATION_STATE_DISABLED:
801
0
        recsize = sock->bytes_written < 64 * 1024 ? calc_suggested_tls_payload_size(sock, 1400) : SIZE_MAX;
802
0
        break;
803
0
    case H2O_SOCKET_LATENCY_OPTIMIZATION_STATE_DETERMINED:
804
0
        sock->_latency_optimization.state = H2O_SOCKET_LATENCY_OPTIMIZATION_STATE_NEEDS_UPDATE;
805
    /* fallthru */
806
0
    default:
807
0
        recsize = sock->_latency_optimization.suggested_tls_payload_size;
808
0
        break;
809
0
    }
810
811
0
    return recsize < bufsize ? recsize : bufsize;
812
0
}
813
814
/**
815
 * Given a vector, generate at least one TLS record if there's enough space in the buffer, and return the size of application data
816
 * being encrypted. Otherwise, returns zero.
817
 */
818
static size_t generate_tls_records_from_one_vec(h2o_socket_t *sock, const void *input, size_t inlen)
819
0
{
820
0
    static const size_t MAX_RECORD_PAYLOAD_SIZE = 16 * 1024, LARGE_RECORD_OVERHEAD = 5 + 32;
821
822
0
    size_t tls_write_size = calc_tls_write_size(sock, inlen);
823
0
    size_t space_left = sock->ssl->output.buf.capacity - sock->ssl->output.buf.off;
824
825
0
    if (tls_write_size < inlen) {
826
        /* Writing small TLS records, one by one. Bail out if we might fail to do so. */
827
0
        if (space_left < tls_write_size + LARGE_RECORD_OVERHEAD)
828
0
            return 0;
829
0
    } else {
830
        /* Writing full-sized records. Adjust tls_write_size to a multiple of full-sized TLS records, or bail out if we cannot
831
         * write one. */
832
0
        size_t rec_capacity = space_left / (MAX_RECORD_PAYLOAD_SIZE + LARGE_RECORD_OVERHEAD);
833
0
        if (rec_capacity == 0)
834
0
            return 0;
835
0
        tls_write_size = MAX_RECORD_PAYLOAD_SIZE * rec_capacity;
836
0
        if (tls_write_size > inlen)
837
0
            tls_write_size = inlen;
838
0
    }
839
840
    /* Generate TLS record(s). */
841
0
    if (sock->ssl->ptls != NULL) {
842
0
        int ret = ptls_send(sock->ssl->ptls, &sock->ssl->output.buf, input, tls_write_size);
843
0
        assert(ret == 0);
844
0
    } else {
845
0
        int ret = SSL_write(sock->ssl->ossl, input, (int)tls_write_size);
846
        /* The error happens if SSL_write is called after SSL_read returns a fatal error (e.g. due to corrupt TCP packet being
847
         * received). We might be converting more and more TLS records on this side as read errors occur. */
848
0
        if (ret <= 0)
849
0
            return SIZE_MAX;
850
0
        assert(ret == tls_write_size);
851
0
    }
852
853
0
    SOCKET_PROBE(WRITE_TLS_RECORD, sock, tls_write_size, sock->ssl->output.buf.off);
854
0
    H2O_LOG_SOCK(write_tls_record, sock, {
855
0
        PTLS_LOG_ELEMENT_UNSIGNED(write_size, tls_write_size);
856
0
        PTLS_LOG_ELEMENT_UNSIGNED(bytes_buffered, sock->ssl->output.buf.off);
857
0
    });
858
0
    return tls_write_size;
859
0
}
860
861
/**
862
 * Generate as many TLS records as possible, given a list of vectors. Upon return, `*bufs` and `*bufcnt` will be updated to point
863
 * the buffers that still have pending data, and the number of bytes being already written within `(*buf)[0]` will be returned.
864
 */
865
static size_t generate_tls_records(h2o_socket_t *sock, h2o_iovec_t **bufs, size_t *bufcnt, size_t first_buf_written)
866
0
{
867
0
    assert(!has_pending_ssl_bytes(sock->ssl) && "we are filling encrypted bytes from the front, with no existing buffer, always");
868
869
0
    while (*bufcnt != 0) {
870
0
        if ((*bufs)->len == 0) {
871
0
            ++*bufs;
872
0
            --*bufcnt;
873
0
            continue;
874
0
        }
875
0
        if (!has_pending_ssl_bytes(sock->ssl))
876
0
            init_ssl_output_buffer(sock->ssl, sock->_zerocopy != NULL);
877
0
        size_t bytes_newly_written =
878
0
            generate_tls_records_from_one_vec(sock, (*bufs)->base + first_buf_written, (*bufs)->len - first_buf_written);
879
0
        if (bytes_newly_written == SIZE_MAX) {
880
0
            return SIZE_MAX;
881
0
        } else if (bytes_newly_written == 0) {
882
0
            break;
883
0
        }
884
0
        first_buf_written += bytes_newly_written;
885
0
        if ((*bufs)->len == first_buf_written) {
886
0
            first_buf_written = 0;
887
0
            ++*bufs;
888
0
            --*bufcnt;
889
0
        }
890
0
    }
891
892
0
    return first_buf_written;
893
0
}
894
895
size_t flatten_sendvec(h2o_socket_t *sock, h2o_sendvec_t *sendvec)
896
0
{
897
0
    assert(h2o_socket_ssl_buffer_allocator.conf->memsize >= H2O_PULL_SENDVEC_MAX_SIZE);
898
0
    sock->_write_buf.flattened = h2o_mem_alloc_recycle(&h2o_socket_ssl_buffer_allocator);
899
0
    size_t len = sendvec->len;
900
901
0
    if (!sendvec->callbacks->read_(sendvec, sock->_write_buf.flattened, len)) {
902
        /* failed */
903
0
        h2o_mem_free_recycle(&h2o_socket_ssl_buffer_allocator, sock->_write_buf.flattened);
904
0
        sock->_write_buf.flattened = NULL;
905
0
        return SIZE_MAX;
906
0
    }
907
0
    return len;
908
0
}
909
910
void h2o_socket_write(h2o_socket_t *sock, h2o_iovec_t *bufs, size_t bufcnt, h2o_socket_cb cb)
911
9.23k
{
912
9.23k
    SOCKET_PROBE(WRITE, sock, bufs, bufcnt, cb);
913
9.23k
    H2O_LOG_SOCK(write, sock, {
914
9.23k
        size_t num_bytes = 0;
915
9.23k
        for (size_t i = 0; i < bufcnt; ++i)
916
9.23k
            num_bytes += bufs[i].len;
917
9.23k
        PTLS_LOG_ELEMENT_UNSIGNED(num_bytes, num_bytes);
918
9.23k
        PTLS_LOG_ELEMENT_UNSIGNED(bufcnt, bufcnt);
919
9.23k
        PTLS_LOG_ELEMENT_PTR(cb, cb);
920
9.23k
    });
921
922
0
    assert(sock->_cb.write == NULL);
923
9.23k
    sock->_cb.write = cb;
924
925
19.9k
    for (size_t i = 0; i != bufcnt; ++i) {
926
10.6k
        sock->bytes_written += bufs[i].len;
927
#if H2O_SOCKET_DUMP_WRITE
928
        h2o_error_printf("writing %zu bytes to fd:%d\n", bufs[i].len, h2o_socket_get_fd(sock));
929
        h2o_dump_memory(stderr, bufs[i].base, bufs[i].len);
930
#endif
931
10.6k
    }
932
933
9.23k
    do_write(sock, bufs, bufcnt);
934
9.23k
}
935
936
void h2o_socket_sendvec(h2o_socket_t *sock, h2o_sendvec_t *vecs, size_t cnt, h2o_socket_cb cb)
937
11.2k
{
938
11.2k
    assert(sock->_cb.write == NULL);
939
11.2k
    assert(sock->_write_buf.flattened == NULL);
940
941
11.2k
    sock->_cb.write = cb;
942
943
11.2k
    if (cnt == 0)
944
272
        return do_write(sock, NULL, 0);
945
946
10.9k
    h2o_iovec_t bufs[cnt];
947
10.9k
    size_t pull_index = SIZE_MAX;
948
949
    /* copy vectors to bufs, while looking for one to flatten */
950
34.5k
    for (size_t i = 0; i < cnt; ++i) {
951
23.5k
        sock->bytes_written += vecs[i].len;
952
23.5k
        if (vecs[i].callbacks->read_ == h2o_sendvec_read_raw || vecs[i].len == 0) {
953
23.5k
            bufs[i] = h2o_iovec_init(vecs[i].raw, vecs[i].len);
954
23.5k
        } else {
955
0
            assert(pull_index == SIZE_MAX || !"h2o_socket_sendvec can only handle one pull vector at a time");
956
0
            assert(vecs[i].len <= H2O_PULL_SENDVEC_MAX_SIZE); /* at the moment, this is our size limit */
957
0
            pull_index = i;
958
0
        }
959
23.5k
    }
960
961
10.9k
    if (pull_index != SIZE_MAX) {
962
        /* If the pull vector has a send callback, and if we have the necessary conditions to utilize it, Let it write directly to
963
         * the socket. */
964
0
#if !H2O_USE_LIBUV
965
0
        if (pull_index == cnt - 1 && vecs[pull_index].callbacks != NULL &&
966
0
            do_write_with_sendvec(sock, bufs, cnt - 1, vecs + pull_index))
967
0
            return;
968
0
#endif
969
        /* Load the vector onto memory now. */
970
0
        size_t pulllen = flatten_sendvec(sock, &vecs[pull_index]);
971
0
        if (pulllen == SIZE_MAX) {
972
0
            report_early_write_error(sock);
973
0
            return;
974
0
        }
975
0
        bufs[pull_index] = h2o_iovec_init(sock->_write_buf.flattened, pulllen);
976
0
    }
977
978
10.9k
    do_write(sock, bufs, cnt);
979
10.9k
}
980
981
void on_write_complete(h2o_socket_t *sock, const char *err)
982
22.8k
{
983
22.8k
    h2o_socket_cb cb;
984
985
22.8k
    if (has_pending_ssl_bytes(sock->ssl))
986
0
        dispose_ssl_output_buffer(sock->ssl);
987
988
22.8k
    cb = sock->_cb.write;
989
22.8k
    sock->_cb.write = NULL;
990
22.8k
    cb(sock, err);
991
22.8k
}
992
993
void h2o_socket_read_start(h2o_socket_t *sock, h2o_socket_cb cb)
994
27.6k
{
995
27.6k
    sock->_cb.read = cb;
996
27.6k
    do_read_start(sock);
997
27.6k
}
998
999
void h2o_socket_read_stop(h2o_socket_t *sock)
1000
14.1k
{
1001
14.1k
    sock->_cb.read = NULL;
1002
14.1k
    do_read_stop(sock);
1003
14.1k
}
1004
1005
void h2o_socket_setpeername(h2o_socket_t *sock, struct sockaddr *sa, socklen_t len)
1006
1.02k
{
1007
1.02k
    free(sock->_peername);
1008
1.02k
    sock->_peername = h2o_mem_alloc(offsetof(struct st_h2o_socket_addr_t, addr) + len);
1009
1.02k
    sock->_peername->len = len;
1010
1.02k
    memcpy(&sock->_peername->addr, sa, len);
1011
1.02k
}
1012
1013
socklen_t h2o_socket_getpeername(h2o_socket_t *sock, struct sockaddr *sa)
1014
2.41k
{
1015
    /* return cached, if exists */
1016
2.41k
    if (sock->_peername != NULL) {
1017
1.38k
        memcpy(sa, &sock->_peername->addr, sock->_peername->len);
1018
1.38k
        return sock->_peername->len;
1019
1.38k
    }
1020
    /* call, copy to cache, and return */
1021
1.02k
    socklen_t len = get_peername_uncached(sock, sa);
1022
1.02k
    h2o_socket_setpeername(sock, sa, len);
1023
1.02k
    return len;
1024
2.41k
}
1025
1026
socklen_t h2o_socket_getsockname(h2o_socket_t *sock, struct sockaddr *sa)
1027
0
{
1028
    /* return cached, if exists */
1029
0
    if (sock->_sockname != NULL) {
1030
0
        memcpy(sa, &sock->_sockname->addr, sock->_sockname->len);
1031
0
        return sock->_sockname->len;
1032
0
    }
1033
    /* call, copy to cache, and return */
1034
0
    socklen_t len = get_sockname_uncached(sock, sa);
1035
0
    sock->_sockname = h2o_mem_alloc(offsetof(struct st_h2o_socket_addr_t, addr) + len);
1036
0
    sock->_sockname->len = len;
1037
0
    memcpy(&sock->_sockname->addr, sa, len);
1038
0
    return len;
1039
0
}
1040
1041
ptls_t *h2o_socket_get_ptls(h2o_socket_t *sock)
1042
2.41k
{
1043
2.41k
    return sock->ssl != NULL ? sock->ssl->ptls : NULL;
1044
2.41k
}
1045
1046
const char *h2o_socket_get_ssl_protocol_version(h2o_socket_t *sock)
1047
2.41k
{
1048
2.41k
    if (sock->ssl != NULL) {
1049
0
        if (sock->ssl->ptls != NULL) {
1050
0
            switch (ptls_get_protocol_version(sock->ssl->ptls)) {
1051
0
            case PTLS_PROTOCOL_VERSION_TLS12:
1052
0
                return "TLSv1.2";
1053
0
            case PTLS_PROTOCOL_VERSION_TLS13:
1054
0
                return "TLSv1.3";
1055
0
            default:
1056
0
                return "TLSv?";
1057
0
            }
1058
0
        }
1059
0
        if (sock->ssl->ossl != NULL)
1060
0
            return SSL_get_version(sock->ssl->ossl);
1061
0
    }
1062
2.41k
    return NULL;
1063
2.41k
}
1064
1065
int h2o_socket_get_ssl_session_reused(h2o_socket_t *sock)
1066
2.41k
{
1067
2.41k
    if (sock->ssl != NULL) {
1068
0
        if (sock->ssl->ptls != NULL)
1069
0
            return ptls_is_psk_handshake(sock->ssl->ptls);
1070
0
        if (sock->ssl->ossl != NULL)
1071
0
            return (int)SSL_session_reused(sock->ssl->ossl);
1072
0
    }
1073
2.41k
    return -1;
1074
2.41k
}
1075
1076
const char *h2o_socket_get_ssl_cipher(h2o_socket_t *sock)
1077
2.41k
{
1078
2.41k
    if (sock->ssl != NULL) {
1079
0
        if (sock->ssl->ptls != NULL) {
1080
0
            ptls_cipher_suite_t *cipher = ptls_get_cipher(sock->ssl->ptls);
1081
0
            if (cipher != NULL)
1082
0
                return cipher->name;
1083
0
        } else if (sock->ssl->ossl != NULL) {
1084
0
            return SSL_get_cipher_name(sock->ssl->ossl);
1085
0
        }
1086
0
    }
1087
2.41k
    return NULL;
1088
2.41k
}
1089
1090
int h2o_socket_get_ssl_cipher_bits(h2o_socket_t *sock)
1091
2.41k
{
1092
2.41k
    if (sock->ssl != NULL) {
1093
0
        if (sock->ssl->ptls != NULL) {
1094
0
            ptls_cipher_suite_t *cipher = ptls_get_cipher(sock->ssl->ptls);
1095
0
            if (cipher == NULL)
1096
0
                return 0;
1097
0
            return (int)cipher->aead->key_size * 8;
1098
0
        } else if (sock->ssl->ossl != NULL) {
1099
0
            return SSL_get_cipher_bits(sock->ssl->ossl, NULL);
1100
0
        }
1101
0
    }
1102
2.41k
    return 0;
1103
2.41k
}
1104
1105
h2o_iovec_t h2o_socket_get_ssl_session_id(h2o_socket_t *sock)
1106
0
{
1107
0
    if (sock->ssl != NULL) {
1108
0
        if (sock->ssl->ptls != NULL) {
1109
            /* FIXME */
1110
0
        } else if (sock->ssl->ossl != NULL) {
1111
0
            SSL_SESSION *session;
1112
0
            if (sock->ssl->handshake.server.async_resumption.state == ASYNC_RESUMPTION_STATE_COMPLETE &&
1113
0
                (session = SSL_get_session(sock->ssl->ossl)) != NULL) {
1114
0
                unsigned id_len;
1115
0
                const unsigned char *id = SSL_SESSION_get_id(session, &id_len);
1116
0
                return h2o_iovec_init(id, id_len);
1117
0
            }
1118
0
        }
1119
0
    }
1120
1121
0
    return h2o_iovec_init(NULL, 0);
1122
0
}
1123
1124
const char *h2o_socket_get_ssl_server_name(const h2o_socket_t *sock)
1125
0
{
1126
0
    if (sock->ssl != NULL) {
1127
0
        if (sock->ssl->ptls != NULL) {
1128
0
            return ptls_get_server_name(sock->ssl->ptls);
1129
0
        } else if (sock->ssl->ossl != NULL) {
1130
0
            return SSL_get_servername(sock->ssl->ossl, TLSEXT_NAMETYPE_host_name);
1131
0
        }
1132
0
    }
1133
0
    return NULL;
1134
0
}
1135
1136
int h2o_socket_can_tls_offload(h2o_socket_t *sock)
1137
0
{
1138
0
    if (sock->ssl == NULL)
1139
0
        return 0;
1140
1141
#if H2O_USE_LIBUV
1142
    return 0;
1143
#else
1144
0
    return can_tls_offload(sock);
1145
0
#endif
1146
0
}
1147
1148
h2o_iovec_t h2o_socket_log_tcp_congestion_controller(h2o_socket_t *sock, h2o_mem_pool_t *pool)
1149
0
{
1150
0
#if defined(TCP_CONGESTION)
1151
0
    int fd;
1152
0
    if ((fd = h2o_socket_get_fd(sock)) >= 0) {
1153
0
#define CC_BUFSIZE 32
1154
0
        socklen_t buflen = CC_BUFSIZE;
1155
0
        char *buf = pool != NULL ? h2o_mem_alloc_pool(pool, *buf, buflen) : h2o_mem_alloc(buflen);
1156
0
        if (getsockopt(fd, IPPROTO_TCP, TCP_CONGESTION, buf, &buflen) == 0) {
1157
            /* Upon return, linux sets `buflen` to some value greater than the size of the string. Therefore, we apply strlen after
1158
             * making sure that the result does not overrun the buffer. */
1159
0
            buf[CC_BUFSIZE - 1] = '\0';
1160
0
            return h2o_iovec_init(buf, strlen(buf));
1161
0
        }
1162
0
        if (pool == NULL)
1163
0
            free(buf);
1164
0
#undef CC_BUFSIZE
1165
0
    }
1166
0
#endif
1167
0
    return h2o_iovec_init(NULL, 0);
1168
0
}
1169
1170
h2o_iovec_t h2o_socket_log_tcp_delivery_rate(h2o_socket_t *sock, h2o_mem_pool_t *pool)
1171
0
{
1172
0
#if defined(__linux__) && defined(TCP_INFO)
1173
0
    int fd;
1174
0
    if ((fd = h2o_socket_get_fd(sock)) >= 0) {
1175
        /* A copy of `struct tcp_info` found in linux/tcp.h, up to `tcpi_delivery_rate`. Rest of the codebase uses netinet/tcp.h,
1176
         * which does not provide access to `tcpi_delivery_rate`. */
1177
0
        struct {
1178
0
            uint8_t tcpi_state;
1179
0
            uint8_t tcpi_ca_state;
1180
0
            uint8_t tcpi_retransmits;
1181
0
            uint8_t tcpi_probes;
1182
0
            uint8_t tcpi_backoff;
1183
0
            uint8_t tcpi_options;
1184
0
            uint8_t tcpi_snd_wscale : 4, tcpi_rcv_wscale : 4;
1185
0
            uint8_t tcpi_delivery_rate_app_limited : 1;
1186
1187
0
            uint32_t tcpi_rto;
1188
0
            uint32_t tcpi_ato;
1189
0
            uint32_t tcpi_snd_mss;
1190
0
            uint32_t tcpi_rcv_mss;
1191
1192
0
            uint32_t tcpi_unacked;
1193
0
            uint32_t tcpi_sacked;
1194
0
            uint32_t tcpi_lost;
1195
0
            uint32_t tcpi_retrans;
1196
0
            uint32_t tcpi_fackets;
1197
1198
            /* Times. */
1199
0
            uint32_t tcpi_last_data_sent;
1200
0
            uint32_t tcpi_last_ack_sent; /* Not remembered, sorry. */
1201
0
            uint32_t tcpi_last_data_recv;
1202
0
            uint32_t tcpi_last_ack_recv;
1203
1204
            /* Metrics. */
1205
0
            uint32_t tcpi_pmtu;
1206
0
            uint32_t tcpi_rcv_ssthresh;
1207
0
            uint32_t tcpi_rtt;
1208
0
            uint32_t tcpi_rttvar;
1209
0
            uint32_t tcpi_snd_ssthresh;
1210
0
            uint32_t tcpi_snd_cwnd;
1211
0
            uint32_t tcpi_advmss;
1212
0
            uint32_t tcpi_reordering;
1213
1214
0
            uint32_t tcpi_rcv_rtt;
1215
0
            uint32_t tcpi_rcv_space;
1216
1217
0
            uint32_t tcpi_total_retrans;
1218
1219
0
            uint64_t tcpi_pacing_rate;
1220
0
            uint64_t tcpi_max_pacing_rate;
1221
0
            uint64_t tcpi_bytes_acked;    /* RFC4898 tcpEStatsAppHCThruOctetsAcked */
1222
0
            uint64_t tcpi_bytes_received; /* RFC4898 tcpEStatsAppHCThruOctetsReceived */
1223
0
            uint32_t tcpi_segs_out;       /* RFC4898 tcpEStatsPerfSegsOut */
1224
0
            uint32_t tcpi_segs_in;        /* RFC4898 tcpEStatsPerfSegsIn */
1225
1226
0
            uint32_t tcpi_notsent_bytes;
1227
0
            uint32_t tcpi_min_rtt;
1228
0
            uint32_t tcpi_data_segs_in;  /* RFC4898 tcpEStatsDataSegsIn */
1229
0
            uint32_t tcpi_data_segs_out; /* RFC4898 tcpEStatsDataSegsOut */
1230
1231
0
            uint64_t tcpi_delivery_rate;
1232
0
        } tcpi;
1233
0
        socklen_t tcpisz = sizeof(tcpi);
1234
0
        if (getsockopt(fd, IPPROTO_TCP, TCP_INFO, &tcpi, &tcpisz) == 0) {
1235
0
            char *buf = (char *)(pool != NULL ? h2o_mem_alloc_pool(pool, char, sizeof(H2O_UINT64_LONGEST_STR))
1236
0
                                              : h2o_mem_alloc(sizeof(H2O_UINT64_LONGEST_STR)));
1237
0
            size_t len = sprintf(buf, "%" PRIu64, (uint64_t)tcpi.tcpi_delivery_rate);
1238
0
            return h2o_iovec_init(buf, len);
1239
0
        }
1240
0
    }
1241
0
#endif
1242
0
    return h2o_iovec_init(NULL, 0);
1243
0
}
1244
1245
h2o_iovec_t h2o_socket_log_ssl_session_id(h2o_socket_t *sock, h2o_mem_pool_t *pool)
1246
0
{
1247
0
    h2o_iovec_t base64id, rawid = h2o_socket_get_ssl_session_id(sock);
1248
1249
0
    if (rawid.base == NULL)
1250
0
        return h2o_iovec_init(NULL, 0);
1251
1252
0
    base64id.base = pool != NULL ? h2o_mem_alloc_pool(pool, char, h2o_base64_encode_capacity(rawid.len))
1253
0
                                 : h2o_mem_alloc(h2o_base64_encode_capacity(rawid.len));
1254
0
    base64id.len = h2o_base64_encode(base64id.base, rawid.base, rawid.len, 1);
1255
0
    return base64id;
1256
0
}
1257
1258
h2o_iovec_t h2o_socket_log_ssl_cipher_bits(h2o_socket_t *sock, h2o_mem_pool_t *pool)
1259
0
{
1260
0
    int bits = h2o_socket_get_ssl_cipher_bits(sock);
1261
0
    if (bits != 0) {
1262
0
        char *s = (char *)(pool != NULL ? h2o_mem_alloc_pool(pool, char, sizeof(H2O_INT16_LONGEST_STR))
1263
0
                                        : h2o_mem_alloc(sizeof(H2O_INT16_LONGEST_STR)));
1264
0
        size_t len = sprintf(s, "%" PRId16, (int16_t)bits);
1265
0
        return h2o_iovec_init(s, len);
1266
0
    } else {
1267
0
        return h2o_iovec_init(NULL, 0);
1268
0
    }
1269
0
}
1270
1271
h2o_iovec_t h2o_socket_log_ssl_ech_config_id(h2o_socket_t *sock, h2o_mem_pool_t *pool)
1272
0
{
1273
0
    uint8_t config_id;
1274
1275
0
    if (sock->ssl != NULL && sock->ssl->ptls != NULL && ptls_is_ech_handshake(sock->ssl->ptls, &config_id, NULL, NULL)) {
1276
0
        char *s = (char *)(pool != NULL ? h2o_mem_alloc_pool(pool, char, sizeof(H2O_UINT8_LONGEST_STR))
1277
0
                                        : h2o_mem_alloc(sizeof(H2O_UINT8_LONGEST_STR)));
1278
0
        size_t len = sprintf(s, "%" PRIu8, config_id);
1279
0
        return h2o_iovec_init(s, len);
1280
0
    } else {
1281
0
        return h2o_iovec_init(NULL, 0);
1282
0
    }
1283
0
}
1284
1285
h2o_iovec_t h2o_socket_log_ssl_ech_kem(h2o_socket_t *sock, h2o_mem_pool_t *pool)
1286
0
{
1287
0
    ptls_hpke_kem_t *kem;
1288
1289
0
    if (sock->ssl != NULL && sock->ssl->ptls != NULL && ptls_is_ech_handshake(sock->ssl->ptls, NULL, &kem, NULL)) {
1290
0
        return h2o_iovec_init(kem->keyex->name, strlen(kem->keyex->name));
1291
0
    } else {
1292
0
        return h2o_iovec_init(NULL, 0);
1293
0
    }
1294
0
}
1295
1296
h2o_iovec_t h2o_socket_log_ssl_ech_cipher(h2o_socket_t *sock, h2o_mem_pool_t *pool)
1297
0
{
1298
0
    ptls_hpke_cipher_suite_t *cipher;
1299
1300
0
    if (sock->ssl != NULL && sock->ssl->ptls != NULL && ptls_is_ech_handshake(sock->ssl->ptls, NULL, NULL, &cipher)) {
1301
0
        return h2o_iovec_init(cipher->name, strlen(cipher->name));
1302
0
    } else {
1303
0
        return h2o_iovec_init(NULL, 0);
1304
0
    }
1305
0
}
1306
1307
h2o_iovec_t h2o_socket_log_ssl_ech_cipher_bits(h2o_socket_t *sock, h2o_mem_pool_t *pool)
1308
0
{
1309
0
    ptls_hpke_cipher_suite_t *cipher;
1310
1311
0
    if (sock->ssl != NULL && sock->ssl->ptls != NULL && ptls_is_ech_handshake(sock->ssl->ptls, NULL, NULL, &cipher)) {
1312
0
        uint16_t bits = (uint16_t)(cipher->aead->key_size * 8);
1313
0
        char *s = (char *)(pool != NULL ? h2o_mem_alloc_pool(pool, char, sizeof(H2O_UINT16_LONGEST_STR))
1314
0
                                        : h2o_mem_alloc(sizeof(H2O_UINT16_LONGEST_STR)));
1315
0
        size_t len = sprintf(s, "%" PRIu16, bits);
1316
0
        return h2o_iovec_init(s, len);
1317
0
    } else {
1318
0
        return h2o_iovec_init(NULL, 0);
1319
0
    }
1320
0
}
1321
1322
h2o_iovec_t h2o_socket_log_ssl_backend(h2o_socket_t *sock, h2o_mem_pool_t *pool)
1323
0
{
1324
0
    if (sock->ssl->ptls != NULL)
1325
0
        return h2o_iovec_init(H2O_STRLIT("picotls"));
1326
0
    if (sock->ssl->ossl != NULL)
1327
0
        return h2o_iovec_init(H2O_STRLIT("openssl"));
1328
0
    return h2o_iovec_init(NULL, 0);
1329
0
}
1330
1331
int h2o_socket_compare_address(struct sockaddr *x, struct sockaddr *y, int check_port)
1332
0
{
1333
0
#define CMP(a, b)                                                                                                                  \
1334
0
    do {                                                                                                                           \
1335
0
        if (a != b)                                                                                                                \
1336
0
            return a < b ? -1 : 1;                                                                                                 \
1337
0
    } while (0)
1338
1339
0
    CMP(x->sa_family, y->sa_family);
1340
1341
0
    if (x->sa_family == AF_UNIX) {
1342
0
        struct sockaddr_un *xun = (void *)x, *yun = (void *)y;
1343
0
        int r = strcmp(xun->sun_path, yun->sun_path);
1344
0
        if (r != 0)
1345
0
            return r;
1346
0
    } else if (x->sa_family == AF_INET) {
1347
0
        struct sockaddr_in *xin = (void *)x, *yin = (void *)y;
1348
0
        CMP(ntohl(xin->sin_addr.s_addr), ntohl(yin->sin_addr.s_addr));
1349
0
        if (check_port)
1350
0
            CMP(ntohs(xin->sin_port), ntohs(yin->sin_port));
1351
0
    } else if (x->sa_family == AF_INET6) {
1352
0
        struct sockaddr_in6 *xin6 = (void *)x, *yin6 = (void *)y;
1353
0
        int r = memcmp(xin6->sin6_addr.s6_addr, yin6->sin6_addr.s6_addr, sizeof(xin6->sin6_addr.s6_addr));
1354
0
        if (r != 0)
1355
0
            return r;
1356
0
        if (check_port)
1357
0
            CMP(ntohs(xin6->sin6_port), ntohs(yin6->sin6_port));
1358
0
        CMP(xin6->sin6_scope_id, yin6->sin6_scope_id);
1359
0
    } else {
1360
0
        assert(!"unknown sa_family");
1361
0
    }
1362
1363
0
#undef CMP
1364
0
    return 0;
1365
0
}
1366
1367
size_t h2o_socket_getnumerichost(const struct sockaddr *sa, socklen_t salen, char *buf)
1368
2.41k
{
1369
2.41k
    if (sa->sa_family == AF_INET) {
1370
        /* fast path for IPv4 addresses */
1371
0
        struct sockaddr_in *sin = (void *)sa;
1372
0
        uint32_t addr;
1373
0
        addr = htonl(sin->sin_addr.s_addr);
1374
0
        return sprintf(buf, "%d.%d.%d.%d", addr >> 24, (addr >> 16) & 255, (addr >> 8) & 255, addr & 255);
1375
0
    }
1376
1377
2.41k
    if (getnameinfo(sa, salen, buf, NI_MAXHOST, NULL, 0, NI_NUMERICHOST) != 0)
1378
0
        return SIZE_MAX;
1379
2.41k
    return strlen(buf);
1380
2.41k
}
1381
1382
int32_t h2o_socket_getport(const struct sockaddr *sa)
1383
0
{
1384
0
    switch (sa->sa_family) {
1385
0
    case AF_INET:
1386
0
        return htons(((struct sockaddr_in *)sa)->sin_port);
1387
0
    case AF_INET6:
1388
0
        return htons(((struct sockaddr_in6 *)sa)->sin6_port);
1389
0
    default:
1390
0
        return -1;
1391
0
    }
1392
0
}
1393
1394
const char *h2o_socket_get_error_string(int errnum, const char *default_err)
1395
0
{
1396
0
    switch (errnum) {
1397
0
    case ECONNREFUSED:
1398
0
        return h2o_socket_error_conn_refused;
1399
0
    case ETIMEDOUT:
1400
0
        return h2o_socket_error_conn_timed_out;
1401
0
    case ENETUNREACH:
1402
0
        return h2o_socket_error_network_unreachable;
1403
0
    case EHOSTUNREACH:
1404
0
        return h2o_socket_error_host_unreachable;
1405
0
    default:
1406
0
        return default_err;
1407
0
    }
1408
0
}
1409
1410
static void create_ossl(h2o_socket_t *sock, int is_server)
1411
0
{
1412
0
    sock->ssl->ossl = SSL_new(sock->ssl->ssl_ctx);
1413
#ifdef OPENSSL_IS_BORINGSSL
1414
    if (is_server) {
1415
        SSL_set_accept_state(sock->ssl->ossl);
1416
    } else {
1417
        SSL_set_connect_state(sock->ssl->ossl);
1418
    }
1419
#else
1420
0
    assert(SSL_is_server(sock->ssl->ossl) == !!is_server);
1421
0
#endif
1422
    /* set app data to be used in h2o_socket_ssl_new_session_cb */
1423
0
    SSL_set_app_data(sock->ssl->ossl, sock);
1424
0
    setup_bio(sock);
1425
0
}
1426
1427
static SSL_SESSION *on_async_resumption_get(SSL *ssl,
1428
#if !defined(LIBRESSL_VERSION_NUMBER) ? OPENSSL_VERSION_NUMBER >= 0x1010000fL : LIBRESSL_VERSION_NUMBER > 0x2070000f
1429
                                            const
1430
#endif
1431
                                            unsigned char *data,
1432
                                            int len, int *copy)
1433
0
{
1434
0
    h2o_socket_t *sock = BIO_get_data(SSL_get_rbio(ssl));
1435
1436
0
    switch (sock->ssl->handshake.server.async_resumption.state) {
1437
0
    case ASYNC_RESUMPTION_STATE_RECORD:
1438
0
#if H2O_USE_OPENSSL_CLIENT_HELLO_CB
1439
0
        h2o_fatal("on_async_resumption_client_hello should have captured this state");
1440
0
#endif
1441
0
        sock->ssl->handshake.server.async_resumption.state = ASYNC_RESUMPTION_STATE_REQUEST_SENT;
1442
0
        resumption_get_async(sock, h2o_iovec_init(data, len));
1443
0
        return NULL;
1444
0
    case ASYNC_RESUMPTION_STATE_COMPLETE:
1445
0
        *copy = 1;
1446
0
        return sock->ssl->handshake.server.async_resumption.session_data;
1447
0
    default:
1448
0
        assert(!"FIXME");
1449
0
        return NULL;
1450
0
    }
1451
0
}
1452
1453
#if H2O_USE_OPENSSL_CLIENT_HELLO_CB
1454
static int on_async_resumption_client_hello(SSL *ssl, int *al, void *arg)
1455
0
{
1456
0
    h2o_socket_t *sock = BIO_get_data(SSL_get_rbio(ssl));
1457
0
    const unsigned char *sess_id;
1458
0
    size_t sess_id_len;
1459
1460
0
    if (sock->ssl->handshake.server.async_resumption.state == ASYNC_RESUMPTION_STATE_RECORD &&
1461
0
        (sess_id_len = SSL_client_hello_get0_session_id(ssl, &sess_id)) != 0) {
1462
0
        sock->ssl->handshake.server.async_resumption.state = ASYNC_RESUMPTION_STATE_REQUEST_SENT;
1463
0
        resumption_get_async(sock, h2o_iovec_init(sess_id, sess_id_len));
1464
0
        return SSL_CLIENT_HELLO_RETRY;
1465
0
    }
1466
1467
0
    return SSL_CLIENT_HELLO_SUCCESS;
1468
0
}
1469
#endif
1470
1471
int h2o_socket_ssl_new_session_cb(SSL *s, SSL_SESSION *sess)
1472
0
{
1473
0
    h2o_socket_t *sock = (h2o_socket_t *)SSL_get_app_data(s);
1474
0
    assert(sock != NULL);
1475
0
    assert(sock->ssl != NULL);
1476
1477
0
    if (!SSL_is_server(s) && sock->ssl->handshake.client.session_cache != NULL
1478
0
#if !defined(LIBRESSL_VERSION_NUMBER) && OPENSSL_VERSION_NUMBER >= 0x1010100fL
1479
0
        && SSL_SESSION_is_resumable(sess)
1480
0
#endif
1481
0
    ) {
1482
0
        h2o_cache_set(sock->ssl->handshake.client.session_cache, h2o_now(h2o_socket_get_loop(sock)),
1483
0
                      sock->ssl->handshake.client.session_cache_key, sock->ssl->handshake.client.session_cache_key_hash,
1484
0
                      h2o_iovec_init(sess, 1));
1485
0
        return 1; /* retain ref count */
1486
0
    }
1487
1488
0
    return 0; /* drop ref count */
1489
0
}
1490
1491
static int on_async_resumption_new(SSL *ssl, SSL_SESSION *session)
1492
0
{
1493
0
    h2o_socket_t *sock = BIO_get_data(SSL_get_rbio(ssl));
1494
1495
0
    h2o_iovec_t data;
1496
0
    const unsigned char *id;
1497
0
    unsigned id_len;
1498
0
    unsigned char *p;
1499
1500
    /* build data */
1501
0
    data.len = i2d_SSL_SESSION(session, NULL);
1502
0
    data.base = alloca(data.len);
1503
0
    p = (void *)data.base;
1504
0
    i2d_SSL_SESSION(session, &p);
1505
1506
0
    id = SSL_SESSION_get_id(session, &id_len);
1507
0
    resumption_new(sock, h2o_iovec_init(id, id_len), data);
1508
0
    return 0;
1509
0
}
1510
1511
/**
1512
 * transfer traffic secret to picotls and discard OpenSSL state, if possible
1513
 */
1514
static void switch_to_picotls(h2o_socket_t *sock, uint16_t csid)
1515
0
{
1516
#if defined(LIBRESSL_VERSION_NUMBER) || OPENSSL_VERSION_NUMBER < 0x1010000fL
1517
    /* Libressl and openssl 1.0.2 does not have SSL_SESSION_get_master_key, or the functions to obtain hello random. Also, they lack
1518
     * the keylog callback that can be used as an alternative. */
1519
    return;
1520
#else
1521
1522
    /* TODO When using boringssl (the only fork of OpenSSL that supports TLS 1.2 False Start), we should probably refuse to switch
1523
     * to picotls when `SSL_in_false_start` returns true, as `SSL_handshake` might signal completion before receiving Finished.
1524
     * This is a issue specific to client-side connections; it does not matter for h2o accepting TLS 1.2 connections. */
1525
1526
    /* skip protocols other than TLS 1.2 */
1527
0
    if (SSL_version(sock->ssl->ossl) != TLS1_2_VERSION)
1528
0
        return;
1529
1530
0
    ptls_context_t *ptls_ctx = h2o_socket_ssl_get_picotls_context(sock->ssl->ssl_ctx);
1531
0
    if (ptls_ctx == NULL)
1532
0
        return;
1533
1534
    /* find the corresponding zerocopy cipher suite, or bail out */
1535
0
    ptls_cipher_suite_t *cs = ptls_find_cipher_suite(ptls_ctx->tls12_cipher_suites, csid);
1536
0
    if (cs == NULL)
1537
0
        return;
1538
1539
    /* The precondition for calling `ptls_build_tl12_export_params` is that we have sent and received only one encrypted record
1540
     * (i.e., next sequence number is 1). Bail out if that expectation is not met (which is very unlikely in practice). At the same
1541
     * time, obtain explicit nonce that has been used, if the underlying AEAD uses one. */
1542
0
    if (!(sock->ssl->tls12_record_layer.last_received[1].type == 20 /* TLS 1.2 ChangeCipherSpec */ &&
1543
0
          sock->ssl->tls12_record_layer.last_received[0].type == 22 /* TLS 1.2 Handshake record */ &&
1544
0
          sock->ssl->tls12_record_layer.last_received[0].length == cs->aead->tls12.record_iv_size + 16 + cs->aead->tag_size))
1545
0
        return;
1546
0
    if (cs->aead->tls12.record_iv_size != 0 && sock->ssl->tls12_record_layer.send_finished_iv == UINT64_MAX)
1547
0
        return;
1548
1549
0
    uint8_t master_secret[PTLS_TLS12_MASTER_SECRET_SIZE], hello_randoms[PTLS_HELLO_RANDOM_SIZE * 2], params_smallbuf[128];
1550
0
    ptls_buffer_t params;
1551
0
    int ret;
1552
1553
0
    ptls_buffer_init(&params, params_smallbuf, sizeof(params_smallbuf));
1554
1555
    /* extract the necessary bits */
1556
0
    if (SSL_SESSION_get_master_key(SSL_get_session(sock->ssl->ossl), master_secret, sizeof(master_secret)) != sizeof(master_secret))
1557
0
        goto Exit;
1558
0
    if (SSL_get_server_random(sock->ssl->ossl, hello_randoms, PTLS_HELLO_RANDOM_SIZE) != PTLS_HELLO_RANDOM_SIZE)
1559
0
        goto Exit;
1560
0
    if (SSL_get_client_random(sock->ssl->ossl, hello_randoms + PTLS_HELLO_RANDOM_SIZE, PTLS_HELLO_RANDOM_SIZE) !=
1561
0
        PTLS_HELLO_RANDOM_SIZE)
1562
0
        goto Exit;
1563
1564
    /* try to create ptls context */
1565
0
    h2o_iovec_t negotiated_protocol = h2o_socket_ssl_get_selected_protocol(sock);
1566
0
    if (ptls_build_tls12_export_params(ptls_ctx, &params, SSL_is_server(sock->ssl->ossl), SSL_session_reused(sock->ssl->ossl), cs,
1567
0
                                       master_secret, hello_randoms, sock->ssl->tls12_record_layer.send_finished_iv + 1,
1568
0
                                       h2o_socket_get_ssl_server_name(sock),
1569
0
                                       ptls_iovec_init(negotiated_protocol.base, negotiated_protocol.len)) != 0)
1570
0
        goto Exit;
1571
0
    ptls_log_conn_state_override = &sock->_log_state;
1572
0
    if ((ret = ptls_import(ptls_ctx, &sock->ssl->ptls, ptls_iovec_init(params.base, params.off))) != 0)
1573
0
        h2o_fatal("failed to import TLS params built using the same context:%d", ret);
1574
0
    ptls_log_conn_state_override = NULL;
1575
1576
0
    if (sock->ssl->ptls != NULL) {
1577
0
        SSL_set_shutdown(sock->ssl->ossl, SSL_SENT_SHUTDOWN); /* close the session so that it can be resumed */
1578
0
        SSL_free(sock->ssl->ossl);
1579
0
        sock->ssl->ossl = NULL;
1580
0
    }
1581
1582
0
Exit:
1583
0
    ptls_clear_memory(master_secret, sizeof(master_secret));
1584
0
    ptls_buffer_dispose(&params);
1585
0
#endif
1586
0
}
1587
1588
static void on_handshake_complete(h2o_socket_t *sock, const char *err)
1589
0
{
1590
0
    assert(sock->ssl->handshake.cb != NULL);
1591
1592
0
    assert(!sock->ssl->async.inflight);
1593
0
    if (sock->ssl->async.sock_is_closed) {
1594
0
        shutdown_ssl(sock, NULL);
1595
0
        return;
1596
0
    }
1597
0
    if (err == NULL) {
1598
        /* Post-handshake setup: set record_overhead, zerocopy, switch to picotls */
1599
0
        if (sock->ssl->ptls == NULL) {
1600
0
            const SSL_CIPHER *cipher = SSL_get_current_cipher(sock->ssl->ossl);
1601
0
            uint32_t cipher_id = SSL_CIPHER_get_id(cipher);
1602
0
            switch (cipher_id) {
1603
0
            case TLS1_CK_RSA_WITH_AES_128_GCM_SHA256:
1604
0
            case TLS1_CK_DHE_RSA_WITH_AES_128_GCM_SHA256:
1605
0
            case TLS1_CK_ECDHE_RSA_WITH_AES_128_GCM_SHA256:
1606
0
            case TLS1_CK_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256:
1607
0
                sock->ssl->record_overhead = 5 /* header */ + 8 /* iv (RFC 5288 3) */ + 16 /* tag (RFC 5116 5.1) */;
1608
0
                break;
1609
0
            case TLS1_CK_RSA_WITH_AES_256_GCM_SHA384:
1610
0
            case TLS1_CK_DHE_RSA_WITH_AES_256_GCM_SHA384:
1611
0
            case TLS1_CK_ECDHE_RSA_WITH_AES_256_GCM_SHA384:
1612
0
            case TLS1_CK_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384:
1613
0
                sock->ssl->record_overhead = 5 /* header */ + 8 /* iv (RFC 5288 3) */ + 16 /* tag (RFC 5116 5.1) */;
1614
0
                break;
1615
0
#if defined(TLS1_CK_DHE_RSA_WITH_CHACHA20_POLY1305)
1616
0
            case TLS1_CK_DHE_RSA_WITH_CHACHA20_POLY1305:
1617
0
            case TLS1_CK_ECDHE_RSA_WITH_CHACHA20_POLY1305:
1618
0
            case TLS1_CK_ECDHE_ECDSA_WITH_CHACHA20_POLY1305:
1619
0
                sock->ssl->record_overhead = 5 /* header */ + 16 /* tag */;
1620
0
                break;
1621
0
#endif
1622
0
            default:
1623
0
                sock->ssl->record_overhead = 32; /* sufficiently large number that can hold most payloads */
1624
0
                break;
1625
0
            }
1626
0
            switch_to_picotls(sock, cipher_id & 0xffff /* obtain IANA cipher-suite ID in a way compatible w. OpenSSL 1.1.0 */);
1627
0
        }
1628
0
        if (sock->ssl->ptls != NULL) {
1629
0
            sock->ssl->record_overhead = ptls_get_record_overhead(sock->ssl->ptls);
1630
0
#if H2O_USE_MSG_ZEROCOPY
1631
0
            assert(sock->_zerocopy == NULL);
1632
0
            ptls_cipher_suite_t *cipher = ptls_get_cipher(sock->ssl->ptls);
1633
0
            if (cipher->aead->non_temporal) {
1634
0
                unsigned one = 1;
1635
0
                if (setsockopt(h2o_socket_get_fd(sock), SOL_SOCKET, SO_ZEROCOPY, &one, sizeof(one)) == 0) {
1636
0
                    sock->_zerocopy = h2o_mem_alloc(sizeof(*sock->_zerocopy));
1637
0
                    *sock->_zerocopy = (struct st_h2o_socket_zerocopy_buffers_t){};
1638
0
                }
1639
0
            }
1640
0
#endif
1641
0
        } else {
1642
0
            assert(sock->ssl->ossl != NULL);
1643
0
        }
1644
0
    }
1645
1646
0
    h2o_socket_cb handshake_cb = sock->ssl->handshake.cb;
1647
0
    sock->_cb.write = NULL;
1648
0
    sock->ssl->handshake.cb = NULL;
1649
0
    if (err == NULL)
1650
0
        err = decode_ssl_input(sock);
1651
0
    handshake_cb(sock, err);
1652
0
}
1653
1654
const char *get_handshake_error(struct st_h2o_socket_ssl_t *ssl)
1655
0
{
1656
0
    const char *err = h2o_socket_error_ssl_handshake;
1657
0
    if (ssl->ossl != NULL) {
1658
0
        long verify_result = SSL_get_verify_result(ssl->ossl);
1659
0
        if (verify_result != X509_V_OK) {
1660
0
            err = X509_verify_cert_error_string(verify_result);
1661
0
            assert(err != NULL);
1662
0
        }
1663
0
    }
1664
0
    return err;
1665
0
}
1666
1667
static void on_handshake_fail_complete(h2o_socket_t *sock, const char *err)
1668
0
{
1669
0
    on_handshake_complete(sock, get_handshake_error(sock->ssl));
1670
0
}
1671
1672
static void proceed_handshake(h2o_socket_t *sock, const char *err);
1673
1674
#if H2O_CAN_OSSL_ASYNC
1675
1676
void h2o_socket_start_async_handshake(h2o_loop_t *loop, int async_fd, void *data, h2o_socket_cb cb)
1677
0
{
1678
    /* dup async_fd as h2o socket handling will close it */
1679
0
    if ((async_fd = dup(async_fd)) == -1) {
1680
0
        char errbuf[256];
1681
0
        h2o_fatal("dup failed:%s", h2o_strerror_r(errno, errbuf, sizeof(errbuf)));
1682
0
    }
1683
1684
    /* add async fd to event loop in order to retry when openssl engine is ready */
1685
#if H2O_USE_LIBUV
1686
    h2o_socket_t *async_sock = h2o_uv__poll_create(loop, async_fd, (uv_close_cb)free);
1687
#else
1688
0
    h2o_socket_t *async_sock = h2o_evloop_socket_create(loop, async_fd, H2O_SOCKET_FLAG_DONT_READ);
1689
0
#endif
1690
0
    async_sock->data = data;
1691
0
    h2o_socket_read_start(async_sock, cb);
1692
0
}
1693
1694
void *h2o_socket_async_handshake_on_notify(h2o_socket_t *async_sock, const char *err)
1695
0
{
1696
0
    if (err != NULL)
1697
0
        h2o_fatal("error on internal notification fd:%s", err);
1698
1699
    /* Do we need to handle spurious events for eventfds / pipes used for intra-process communication? If so, maybe we should call
1700
     * select (2) here to assert that the socket is actually readable, and return NULL if it is not. */
1701
1702
0
    void *data = async_sock->data;
1703
1704
0
    h2o_socket_read_stop(async_sock);
1705
0
    dispose_socket(async_sock, NULL);
1706
1707
0
    return data;
1708
0
}
1709
1710
static void on_async_proceed_handshake(h2o_socket_t *async_sock, const char *err)
1711
0
{
1712
0
    h2o_socket_t *sock = h2o_socket_async_handshake_on_notify(async_sock, err);
1713
1714
0
    assert(sock->ssl->async.inflight);
1715
0
    sock->ssl->async.inflight = 0;
1716
1717
0
    proceed_handshake(sock, NULL);
1718
0
}
1719
1720
#endif
1721
1722
static void on_async_job_complete(void *_sock)
1723
0
{
1724
0
    h2o_socket_t *sock = _sock;
1725
1726
0
    assert(sock->ssl->async.inflight);
1727
0
    sock->ssl->async.inflight = 0;
1728
1729
0
    proceed_handshake(sock, NULL);
1730
0
}
1731
1732
static void do_proceed_handshake_async(h2o_socket_t *sock, ptls_buffer_t *ptls_wbuf)
1733
0
{
1734
0
    assert(!sock->ssl->async.inflight);
1735
0
    sock->ssl->async.inflight = 1;
1736
0
    h2o_socket_read_stop(sock);
1737
1738
    /* retain wbuf, wait for notification */
1739
0
    if (sock->ssl->ptls != NULL) {
1740
0
        sock->ssl->async.ptls_wbuf = *ptls_wbuf;
1741
0
        *ptls_wbuf = (ptls_buffer_t){NULL};
1742
0
        ptls_async_job_t *job = ptls_get_async_job(sock->ssl->ptls);
1743
0
        if (job->set_completion_callback != NULL) {
1744
            /* completion is notified via a callback */
1745
0
            job->set_completion_callback(job, on_async_job_complete, sock);
1746
0
        } else {
1747
0
#if H2O_CAN_OSSL_ASYNC
1748
0
            assert(job->get_fd != NULL);
1749
0
            int async_fd = job->get_fd(job);
1750
0
            h2o_socket_start_async_handshake(h2o_socket_get_loop(sock), async_fd, sock, on_async_proceed_handshake);
1751
#else
1752
            h2o_fatal("callback-based approach must have been chosen as the only option when OpenSSL async API is unavailable");
1753
#endif
1754
0
        }
1755
0
    } else {
1756
0
#if H2O_CAN_OSSL_ASYNC
1757
0
        assert(ptls_wbuf == NULL);
1758
0
        int async_fd;
1759
0
        size_t numfds;
1760
0
        SSL_get_all_async_fds(sock->ssl->ossl, NULL, &numfds);
1761
0
        assert(numfds == 1);
1762
0
        SSL_get_all_async_fds(sock->ssl->ossl, &async_fd, &numfds);
1763
0
        h2o_socket_start_async_handshake(h2o_socket_get_loop(sock), async_fd, sock, on_async_proceed_handshake);
1764
#elif defined(OPENSSL_IS_BORINGSSL)
1765
        ptls_async_job_t *job = SSL_get_ex_data(sock->ssl->ossl, h2o_socket_boringssl_get_async_job_index());
1766
        assert(job != NULL);
1767
        assert(job->set_completion_callback != NULL);
1768
        job->set_completion_callback(job, on_async_job_complete, sock);
1769
#else
1770
        h2o_fatal("how can OpenSSL ask async when the async API is unavailable");
1771
#endif
1772
0
    }
1773
0
}
1774
1775
static void proceed_handshake_picotls(h2o_socket_t *sock)
1776
0
{
1777
0
    size_t consumed = sock->ssl->input.encrypted->size;
1778
0
    ptls_buffer_t wbuf;
1779
1780
0
    if (sock->ssl->async.ptls_wbuf.base != NULL) {
1781
0
        wbuf = sock->ssl->async.ptls_wbuf;
1782
0
        sock->ssl->async.ptls_wbuf = (ptls_buffer_t){NULL};
1783
0
    } else {
1784
0
        ptls_buffer_init(&wbuf, "", 0);
1785
0
    }
1786
1787
0
    int ret = ptls_handshake(sock->ssl->ptls, &wbuf, sock->ssl->input.encrypted->bytes, &consumed, NULL);
1788
0
    h2o_buffer_consume(&sock->ssl->input.encrypted, consumed);
1789
1790
0
    if (ret == PTLS_ERROR_ASYNC_OPERATION) {
1791
0
        do_proceed_handshake_async(sock, &wbuf);
1792
0
        return;
1793
0
    }
1794
1795
    /* determine the next action */
1796
0
    h2o_socket_cb next_cb;
1797
0
    switch (ret) {
1798
0
    case 0:
1799
0
        next_cb = on_handshake_complete;
1800
0
        break;
1801
0
    case PTLS_ERROR_IN_PROGRESS:
1802
0
        next_cb = proceed_handshake;
1803
0
        break;
1804
0
    default:
1805
0
        next_cb = on_handshake_fail_complete;
1806
0
        break;
1807
0
    }
1808
1809
    /* When something is to be sent, send it and then take the next action. If there's nothing to be sent and the handshake is still
1810
     * in progress, wait for more bytes to arrive; otherwise, take the action immediately. */
1811
0
    if (wbuf.off != 0) {
1812
0
        h2o_socket_read_stop(sock);
1813
0
        write_ssl_bytes(sock, wbuf.base, wbuf.off);
1814
0
        flush_pending_ssl(sock, next_cb);
1815
0
    } else if (ret == PTLS_ERROR_IN_PROGRESS) {
1816
0
        h2o_socket_read_start(sock, next_cb);
1817
0
    } else {
1818
0
        next_cb(sock, NULL);
1819
0
    }
1820
1821
0
    ptls_buffer_dispose(&wbuf);
1822
0
}
1823
1824
static void proceed_handshake_openssl(h2o_socket_t *sock)
1825
0
{
1826
0
    h2o_iovec_t first_input = {NULL};
1827
0
    int ret = 0;
1828
0
    const char *err = NULL;
1829
1830
0
    assert(sock->ssl->ossl != NULL);
1831
1832
0
    if (SSL_is_server(sock->ssl->ossl) && sock->ssl->handshake.server.async_resumption.state == ASYNC_RESUMPTION_STATE_RECORD) {
1833
0
        if (sock->ssl->input.encrypted->size <= 1024) {
1834
            /* retain a copy of input if performing async resumption */
1835
0
            first_input = h2o_iovec_init(alloca(sock->ssl->input.encrypted->size), sock->ssl->input.encrypted->size);
1836
0
            memcpy(first_input.base, sock->ssl->input.encrypted->bytes, first_input.len);
1837
0
        } else {
1838
0
            sock->ssl->handshake.server.async_resumption.state = ASYNC_RESUMPTION_STATE_COMPLETE;
1839
0
        }
1840
0
    }
1841
1842
0
Redo:
1843
0
    ERR_clear_error();
1844
0
    if (SSL_is_server(sock->ssl->ossl)) {
1845
0
        ret = SSL_accept(sock->ssl->ossl);
1846
0
        switch (sock->ssl->handshake.server.async_resumption.state) {
1847
0
        case ASYNC_RESUMPTION_STATE_COMPLETE:
1848
0
            break;
1849
0
        case ASYNC_RESUMPTION_STATE_RECORD:
1850
            /* async resumption has not been triggered; proceed the state to complete */
1851
0
            sock->ssl->handshake.server.async_resumption.state = ASYNC_RESUMPTION_STATE_COMPLETE;
1852
0
            break;
1853
0
        case ASYNC_RESUMPTION_STATE_REQUEST_SENT: {
1854
            /* sent async request, reset the ssl state, and wait for async response */
1855
0
            assert(ret < 0);
1856
0
#if H2O_CAN_OSSL_ASYNC
1857
0
            assert(SSL_get_error(sock->ssl->ossl, ret) != SSL_ERROR_WANT_ASYNC &&
1858
0
                   "async operation should start only after resumption state is obtained and OpenSSL decides not to resume");
1859
0
#endif
1860
0
            SSL_free(sock->ssl->ossl);
1861
0
            create_ossl(sock, 1);
1862
0
            if (has_pending_ssl_bytes(sock->ssl))
1863
0
                dispose_ssl_output_buffer(sock->ssl);
1864
0
            h2o_buffer_consume(&sock->ssl->input.encrypted, sock->ssl->input.encrypted->size);
1865
0
            h2o_buffer_reserve(&sock->ssl->input.encrypted, first_input.len);
1866
0
            memcpy(sock->ssl->input.encrypted->bytes, first_input.base, first_input.len);
1867
0
            sock->ssl->input.encrypted->size = first_input.len;
1868
0
            h2o_socket_read_stop(sock);
1869
0
            return;
1870
0
        }
1871
0
        default:
1872
0
            h2o_fatal("unexpected async resumption state");
1873
0
            break;
1874
0
        }
1875
0
    } else {
1876
0
        ret = SSL_connect(sock->ssl->ossl);
1877
0
    }
1878
1879
    /* handshake failed either in strict mTLS mode or others */
1880
0
    if (ret == 0 || (ret < 0 && SSL_get_error(sock->ssl->ossl, ret) != SSL_ERROR_WANT_READ)) {
1881
0
        int is_async = 0;
1882
0
#if H2O_CAN_OSSL_ASYNC
1883
0
        is_async = SSL_get_error(sock->ssl->ossl, ret) == SSL_ERROR_WANT_ASYNC;
1884
#elif defined(OPENSSL_IS_BORINGSSL)
1885
        is_async = SSL_get_error(sock->ssl->ossl, ret) == SSL_ERROR_WANT_PRIVATE_KEY_OPERATION;
1886
#endif
1887
0
        if (is_async) {
1888
0
            do_proceed_handshake_async(sock, NULL);
1889
0
            return;
1890
0
        }
1891
1892
        /* OpenSSL 1.1.0 emits an alert immediately, we  send it now. 1.0.2 emits the error when SSL_shutdown is called in
1893
         * shutdown_ssl. */
1894
0
        if (has_pending_ssl_bytes(sock->ssl)) {
1895
0
            h2o_socket_read_stop(sock);
1896
0
            flush_pending_ssl(sock, on_handshake_fail_complete);
1897
0
            return;
1898
0
        }
1899
0
        err = get_handshake_error(sock->ssl);
1900
0
        goto Complete;
1901
0
    }
1902
1903
0
    if (has_pending_ssl_bytes(sock->ssl)) {
1904
0
        h2o_socket_read_stop(sock);
1905
0
        flush_pending_ssl(sock, ret == 1 ? on_handshake_complete : proceed_handshake);
1906
0
    } else {
1907
0
        if (ret == 1) {
1908
0
            if (!SSL_is_server(sock->ssl->ossl)) {
1909
0
                X509 *cert = SSL_get_peer_certificate(sock->ssl->ossl);
1910
0
                if (cert != NULL) {
1911
0
                    switch (validate_hostname(sock->ssl->handshake.client.server_name, cert)) {
1912
0
                    case MatchFound:
1913
                        /* ok */
1914
0
                        break;
1915
0
                    case MatchNotFound:
1916
0
                        err = h2o_socket_error_ssl_cert_name_mismatch;
1917
0
                        break;
1918
0
                    default:
1919
0
                        err = h2o_socket_error_ssl_cert_invalid;
1920
0
                        break;
1921
0
                    }
1922
0
                    X509_free(cert);
1923
0
                } else {
1924
0
                    err = h2o_socket_error_ssl_no_cert;
1925
0
                }
1926
0
            }
1927
0
            goto Complete;
1928
0
        }
1929
0
        if (sock->ssl->input.encrypted->size != 0) {
1930
0
            goto Redo;
1931
0
        }
1932
0
        h2o_socket_read_start(sock, proceed_handshake);
1933
0
    }
1934
0
    return;
1935
1936
0
Complete:
1937
0
    h2o_socket_read_stop(sock);
1938
0
    on_handshake_complete(sock, err);
1939
0
}
1940
1941
/**
1942
 * Called when it is still uncertain which of the two TLS stacks (picotls or OpenSSL) should handle the handshake.
1943
 * The function first tries picotls without consuming the socket input buffer. Then, if picotls returns PTLS_ALERT_PROTOCOL_VERSION
1944
 * indicating that the client is using TLS 1.2 or below, switches to using OpenSSL.
1945
 */
1946
static void proceed_handshake_undetermined(h2o_socket_t *sock)
1947
0
{
1948
0
    assert(sock->ssl->ossl == NULL && sock->ssl->ptls == NULL);
1949
1950
0
    ptls_context_t *ptls_ctx = h2o_socket_ssl_get_picotls_context(sock->ssl->ssl_ctx);
1951
0
    assert(ptls_ctx != NULL);
1952
1953
0
    size_t consumed = sock->ssl->input.encrypted->size;
1954
0
    ptls_buffer_t wbuf;
1955
0
    ptls_buffer_init(&wbuf, "", 0);
1956
1957
0
    ptls_log_conn_state_override = &sock->_log_state;
1958
0
    ptls_t *ptls = ptls_new(ptls_ctx, 1);
1959
0
    ptls_log_conn_state_override = NULL;
1960
0
    if (ptls == NULL)
1961
0
        h2o_fatal("no memory");
1962
0
    *ptls_get_data_ptr(ptls) = sock;
1963
0
    int ret = ptls_handshake(ptls, &wbuf, sock->ssl->input.encrypted->bytes, &consumed, NULL);
1964
1965
0
    if (ret == PTLS_ERROR_IN_PROGRESS && wbuf.off == 0) {
1966
        /* we aren't sure if the picotls can process the handshake, retain handshake transcript and replay on next occasion */
1967
0
        ptls_free(ptls);
1968
0
    } else if (ret == PTLS_ALERT_PROTOCOL_VERSION) {
1969
        /* the client cannot use tls1.3, fallback to openssl */
1970
0
        ptls_free(ptls);
1971
0
        create_ossl(sock, 1);
1972
0
        proceed_handshake_openssl(sock);
1973
0
    } else {
1974
        /* picotls is responsible for handling the handshake */
1975
0
        sock->ssl->ptls = ptls;
1976
0
        sock->ssl->handshake.server.async_resumption.state = ASYNC_RESUMPTION_STATE_COMPLETE;
1977
0
        h2o_buffer_consume(&sock->ssl->input.encrypted, consumed);
1978
0
        if (ret == PTLS_ERROR_ASYNC_OPERATION) {
1979
0
            do_proceed_handshake_async(sock, &wbuf);
1980
0
            return;
1981
0
        }
1982
        /* stop reading, send response */
1983
0
        h2o_socket_read_stop(sock);
1984
0
        write_ssl_bytes(sock, wbuf.base, wbuf.off);
1985
0
        h2o_socket_cb cb;
1986
0
        switch (ret) {
1987
0
        case 0:
1988
0
            cb = on_handshake_complete;
1989
0
            break;
1990
0
        case PTLS_ERROR_IN_PROGRESS:
1991
0
            cb = proceed_handshake;
1992
0
            break;
1993
0
        default:
1994
0
            assert(ret != PTLS_ERROR_STATELESS_RETRY && "stateless retry is never turned on by us for TCP");
1995
0
            cb = on_handshake_fail_complete;
1996
0
            break;
1997
0
        }
1998
0
        flush_pending_ssl(sock, cb);
1999
0
    }
2000
0
    ptls_buffer_dispose(&wbuf);
2001
0
}
2002
2003
static void proceed_handshake(h2o_socket_t *sock, const char *err)
2004
0
{
2005
0
    assert(!sock->ssl->async.inflight && "while async operation is inflight, the socket should be neither reading nor writing");
2006
2007
0
    sock->_cb.write = NULL;
2008
2009
0
    if (err != NULL) {
2010
0
        h2o_socket_read_stop(sock);
2011
0
        on_handshake_complete(sock, err);
2012
0
        return;
2013
0
    }
2014
2015
0
    if (sock->ssl->ptls != NULL) {
2016
0
        proceed_handshake_picotls(sock);
2017
0
    } else if (sock->ssl->ossl != NULL) {
2018
0
        proceed_handshake_openssl(sock);
2019
0
    } else if (h2o_socket_ssl_get_picotls_context(sock->ssl->ssl_ctx) == NULL) {
2020
0
        create_ossl(sock, 1);
2021
0
        proceed_handshake_openssl(sock);
2022
0
    } else {
2023
0
        proceed_handshake_undetermined(sock);
2024
0
    }
2025
0
}
2026
2027
void h2o_socket_ssl_handshake(h2o_socket_t *sock, SSL_CTX *ssl_ctx, const char *server_name, h2o_iovec_t alpn_protos,
2028
                              h2o_socket_cb handshake_cb)
2029
0
{
2030
0
    sock->ssl = h2o_mem_alloc(sizeof(*sock->ssl));
2031
0
    *sock->ssl = (struct st_h2o_socket_ssl_t){
2032
0
        .ssl_ctx = ssl_ctx, .handshake = {.cb = handshake_cb}, .tls12_record_layer = {.send_finished_iv = UINT64_MAX}};
2033
#if H2O_USE_KTLS
2034
    /* Set offload state to TBD if kTLS is enabled. Otherwise, remains H2O_SOCKET_SSL_OFFLOAD_OFF. */
2035
    if (h2o_socket_use_ktls)
2036
        sock->ssl->offload = H2O_SOCKET_SSL_OFFLOAD_TBD;
2037
#endif
2038
2039
    /* setup the buffers; sock->input should be empty, sock->ssl->input.encrypted should contain the initial input, if any */
2040
0
    h2o_buffer_init(&sock->ssl->input.encrypted, &h2o_socket_buffer_prototype);
2041
0
    if (sock->input->size != 0) {
2042
0
        h2o_buffer_t *tmp = sock->input;
2043
0
        sock->input = sock->ssl->input.encrypted;
2044
0
        sock->ssl->input.encrypted = tmp;
2045
0
    }
2046
2047
0
    if (server_name == NULL) {
2048
        /* is server */
2049
0
        if (SSL_CTX_sess_get_get_cb(sock->ssl->ssl_ctx) != NULL)
2050
0
            sock->ssl->handshake.server.async_resumption.state = ASYNC_RESUMPTION_STATE_RECORD;
2051
0
        if (sock->ssl->input.encrypted->size != 0)
2052
0
            proceed_handshake(sock, 0);
2053
0
        else
2054
0
            h2o_socket_read_start(sock, proceed_handshake);
2055
0
    } else {
2056
0
        create_ossl(sock, 0);
2057
0
        if (alpn_protos.base != NULL)
2058
0
            SSL_set_alpn_protos(sock->ssl->ossl, (const unsigned char *)alpn_protos.base, (unsigned)alpn_protos.len);
2059
0
        h2o_cache_t *session_cache = h2o_socket_ssl_get_session_cache(sock->ssl->ssl_ctx);
2060
0
        if (session_cache != NULL) {
2061
0
            struct sockaddr_storage sa;
2062
0
            int32_t port;
2063
0
            if (h2o_socket_getpeername(sock, (struct sockaddr *)&sa) != 0 &&
2064
0
                (port = h2o_socket_getport((struct sockaddr *)&sa)) != -1) {
2065
                /* session cache is available */
2066
0
                h2o_iovec_t session_cache_key;
2067
0
                session_cache_key.base = h2o_mem_alloc(strlen(server_name) + sizeof(":" H2O_UINT16_LONGEST_STR));
2068
0
                session_cache_key.len = sprintf(session_cache_key.base, "%s:%" PRIu16, server_name, (uint16_t)port);
2069
0
                sock->ssl->handshake.client.session_cache = session_cache;
2070
0
                sock->ssl->handshake.client.session_cache_key = session_cache_key;
2071
0
                sock->ssl->handshake.client.session_cache_key_hash =
2072
0
                    h2o_cache_calchash(session_cache_key.base, session_cache_key.len);
2073
2074
                /* fetch from session cache */
2075
0
                h2o_cache_ref_t *cacheref = h2o_cache_fetch(session_cache, h2o_now(h2o_socket_get_loop(sock)),
2076
0
                                                            sock->ssl->handshake.client.session_cache_key,
2077
0
                                                            sock->ssl->handshake.client.session_cache_key_hash);
2078
0
                if (cacheref != NULL) {
2079
0
                    SSL_set_session(sock->ssl->ossl, (SSL_SESSION *)cacheref->value.base);
2080
0
                    h2o_cache_release(session_cache, cacheref);
2081
0
                }
2082
0
            }
2083
0
        }
2084
0
        sock->ssl->handshake.client.server_name = h2o_strdup(NULL, server_name, SIZE_MAX).base;
2085
0
        SSL_set_tlsext_host_name(sock->ssl->ossl, sock->ssl->handshake.client.server_name);
2086
0
        proceed_handshake(sock, 0);
2087
0
    }
2088
0
}
2089
2090
void h2o_socket_ssl_resume_server_handshake(h2o_socket_t *sock, h2o_iovec_t session_data)
2091
0
{
2092
0
    if (session_data.len != 0) {
2093
0
        const unsigned char *p = (void *)session_data.base;
2094
0
        sock->ssl->handshake.server.async_resumption.session_data = d2i_SSL_SESSION(NULL, &p, (long)session_data.len);
2095
        /* FIXME warn on failure */
2096
0
    }
2097
2098
0
    sock->ssl->handshake.server.async_resumption.state = ASYNC_RESUMPTION_STATE_COMPLETE;
2099
0
    proceed_handshake(sock, 0);
2100
2101
0
    if (sock->ssl->handshake.server.async_resumption.session_data != NULL) {
2102
0
        SSL_SESSION_free(sock->ssl->handshake.server.async_resumption.session_data);
2103
0
        sock->ssl->handshake.server.async_resumption.session_data = NULL;
2104
0
    }
2105
0
}
2106
2107
void h2o_socket_ssl_async_resumption_init(h2o_socket_ssl_resumption_get_async_cb get_async_cb,
2108
                                          h2o_socket_ssl_resumption_new_cb new_cb)
2109
0
{
2110
0
    resumption_get_async = get_async_cb;
2111
0
    resumption_new = new_cb;
2112
0
}
2113
2114
void h2o_socket_ssl_async_resumption_setup_ctx(SSL_CTX *ctx)
2115
0
{
2116
    /**
2117
     * Asynchronous resumption is a feature of libh2o that allows the use of an external session store.
2118
     * The traditional API provided by OpenSSL (`SSL_CTX_sess_set_get_cb`) assumes a blocking operation for the session store
2119
     * lookup. However, on an event-loop-based design, we cannot block while sending a request to and waiting for a response from a
2120
     * remote session store.
2121
     * Our strategy to evade this problem is to run the handshake twice for each TCP connection. When the `SSL_CTX_sess_set_get_cb`
2122
     * callback is called for the first time, asynchronous lookup is initiated. Then, immediately, the TLS handshake state is
2123
     * discarded, while ClientHello (input from TCP to the SSL handshake state machine) is retained. Once the asynchronous lookup is
2124
     * complete, we rerun the TLS handshake from scratch. When the session callback is called again, the result of the asynchronous
2125
     * lookup is supplied.
2126
     * With OpenSSL 1.1.1 and above, `SSL_CTX_set_client_hello_cb` is used to capture the session ID. This is because with the new
2127
     * callback it is possible to stop the SSL handshake state machine from preparing the full handshake response. With the old
2128
     * `SSL_CTX_sess_set_get_cb` callback, it is impossible to stop OpenSSL doing that even in the case of us discarding everything
2129
     * modulo the session ID. That includes private key operation which is very CPU intensive.
2130
     */
2131
0
    SSL_CTX_sess_set_get_cb(ctx, on_async_resumption_get);
2132
0
    SSL_CTX_sess_set_new_cb(ctx, on_async_resumption_new);
2133
0
#if H2O_USE_OPENSSL_CLIENT_HELLO_CB
2134
0
    SSL_CTX_set_client_hello_cb(ctx, on_async_resumption_client_hello, NULL);
2135
0
#endif
2136
2137
    /* if necessary, it is the responsibility of the caller to disable the internal cache */
2138
0
}
2139
2140
static int get_ptls_index(void)
2141
0
{
2142
0
    static volatile int index;
2143
0
    H2O_MULTITHREAD_ONCE({ index = SSL_CTX_get_ex_new_index(0, NULL, NULL, NULL, NULL); });
2144
0
    return index;
2145
0
}
2146
2147
ptls_context_t *h2o_socket_ssl_get_picotls_context(SSL_CTX *ossl)
2148
0
{
2149
0
    return SSL_CTX_get_ex_data(ossl, get_ptls_index());
2150
0
}
2151
2152
void h2o_socket_ssl_set_picotls_context(SSL_CTX *ossl, ptls_context_t *ptls)
2153
0
{
2154
0
    SSL_CTX_set_ex_data(ossl, get_ptls_index(), ptls);
2155
0
}
2156
2157
static void on_dispose_ssl_ctx_session_cache(void *parent, void *ptr, CRYPTO_EX_DATA *ad, int idx, long argl, void *argp)
2158
0
{
2159
0
    h2o_cache_t *ssl_session_cache = (h2o_cache_t *)ptr;
2160
0
    if (ssl_session_cache != NULL)
2161
0
        h2o_cache_destroy(ssl_session_cache);
2162
0
}
2163
2164
static int get_ssl_session_cache_index(void)
2165
0
{
2166
0
    static volatile int index;
2167
0
    H2O_MULTITHREAD_ONCE({ index = SSL_CTX_get_ex_new_index(0, NULL, NULL, NULL, on_dispose_ssl_ctx_session_cache); });
2168
0
    return index;
2169
0
}
2170
2171
h2o_cache_t *h2o_socket_ssl_get_session_cache(SSL_CTX *ctx)
2172
0
{
2173
0
    return (h2o_cache_t *)SSL_CTX_get_ex_data(ctx, get_ssl_session_cache_index());
2174
0
}
2175
2176
void h2o_socket_ssl_set_session_cache(SSL_CTX *ctx, h2o_cache_t *cache)
2177
0
{
2178
0
    SSL_CTX_set_ex_data(ctx, get_ssl_session_cache_index(), cache);
2179
0
}
2180
2181
void h2o_socket_ssl_destroy_session_cache_entry(h2o_iovec_t value)
2182
0
{
2183
0
    SSL_SESSION *session = (SSL_SESSION *)value.base;
2184
0
    SSL_SESSION_free(session);
2185
0
}
2186
2187
h2o_iovec_t h2o_socket_ssl_get_selected_protocol(h2o_socket_t *sock)
2188
0
{
2189
0
    const unsigned char *data = NULL;
2190
0
    unsigned len = 0;
2191
2192
0
    if (sock->ssl == NULL)
2193
0
        return h2o_iovec_init(NULL, 0);
2194
2195
0
    if (sock->ssl->ptls != NULL) {
2196
0
        const char *proto = ptls_get_negotiated_protocol(sock->ssl->ptls);
2197
0
        return proto != NULL ? h2o_iovec_init(proto, strlen(proto)) : h2o_iovec_init(NULL, 0);
2198
0
    }
2199
2200
0
#if H2O_USE_ALPN
2201
0
    if (len == 0)
2202
0
        SSL_get0_alpn_selected(sock->ssl->ossl, &data, &len);
2203
0
#endif
2204
0
#if H2O_USE_NPN
2205
0
    if (len == 0)
2206
0
        SSL_get0_next_proto_negotiated(sock->ssl->ossl, &data, &len);
2207
0
#endif
2208
2209
0
    return h2o_iovec_init(data, len);
2210
0
}
2211
2212
int h2o_socket_ssl_is_early_data(h2o_socket_t *sock)
2213
0
{
2214
0
    assert(sock->ssl != NULL);
2215
2216
0
    if (sock->ssl->ptls != NULL && !ptls_handshake_is_complete(sock->ssl->ptls))
2217
0
        return 1;
2218
0
    return 0;
2219
0
}
2220
2221
static int on_alpn_select(SSL *ssl, const unsigned char **out, unsigned char *outlen, const unsigned char *_in, unsigned int inlen,
2222
                          void *_protocols)
2223
0
{
2224
0
    const h2o_iovec_t *protocols = _protocols;
2225
0
    size_t i;
2226
2227
0
    for (i = 0; protocols[i].len != 0; ++i) {
2228
0
        const unsigned char *in = _in, *in_end = in + inlen;
2229
0
        while (in != in_end) {
2230
0
            size_t cand_len = *in++;
2231
0
            if (in_end - in < cand_len) {
2232
                /* broken request */
2233
0
                return SSL_TLSEXT_ERR_NOACK;
2234
0
            }
2235
0
            if (cand_len == protocols[i].len && memcmp(in, protocols[i].base, cand_len) == 0) {
2236
0
                goto Found;
2237
0
            }
2238
0
            in += cand_len;
2239
0
        }
2240
0
    }
2241
    /* not found */
2242
0
    return SSL_TLSEXT_ERR_NOACK;
2243
2244
0
Found:
2245
0
    *out = (const unsigned char *)protocols[i].base;
2246
0
    *outlen = (unsigned char)protocols[i].len;
2247
0
    return SSL_TLSEXT_ERR_OK;
2248
0
}
2249
2250
#if H2O_USE_ALPN
2251
2252
void h2o_ssl_register_alpn_protocols(SSL_CTX *ctx, const h2o_iovec_t *protocols)
2253
0
{
2254
0
    SSL_CTX_set_alpn_select_cb(ctx, on_alpn_select, (void *)protocols);
2255
0
}
2256
2257
#endif
2258
2259
#if H2O_USE_NPN
2260
2261
static int on_npn_advertise(SSL *ssl, const unsigned char **out, unsigned *outlen, void *protocols)
2262
0
{
2263
0
    *out = protocols;
2264
0
    *outlen = (unsigned)strlen(protocols);
2265
0
    return SSL_TLSEXT_ERR_OK;
2266
0
}
2267
2268
void h2o_ssl_register_npn_protocols(SSL_CTX *ctx, const char *protocols)
2269
0
{
2270
0
    SSL_CTX_set_next_protos_advertised_cb(ctx, on_npn_advertise, (void *)protocols);
2271
0
}
2272
2273
#endif
2274
2275
int h2o_socket_set_df_bit(int fd, int domain)
2276
0
{
2277
0
#define SETSOCKOPT(ip, optname, _optvar)                                                                                           \
2278
0
    do {                                                                                                                           \
2279
0
        int optvar = _optvar;                                                                                                      \
2280
0
        if (setsockopt(fd, ip, optname, &optvar, sizeof(optvar)) != 0) {                                                           \
2281
0
            perror("failed to set the DF bit through setsockopt(" H2O_TO_STR(ip) ", " H2O_TO_STR(optname) ")");                    \
2282
0
            return 0;                                                                                                              \
2283
0
        }                                                                                                                          \
2284
0
        return 1;                                                                                                                  \
2285
0
    } while (0)
2286
2287
0
    switch (domain) {
2288
0
    case AF_INET:
2289
0
#if defined(IP_PMTUDISC_DO)
2290
0
        SETSOCKOPT(IPPROTO_IP, IP_MTU_DISCOVER, IP_PMTUDISC_DO);
2291
#elif defined(IP_DONTFRAG)
2292
        SETSOCKOPT(IPPROTO_IP, IP_DONTFRAG, 1);
2293
#endif
2294
0
        break;
2295
0
    case AF_INET6:
2296
0
#if defined(IPV6_PMTUDISC_DO)
2297
0
        SETSOCKOPT(IPPROTO_IPV6, IPV6_MTU_DISCOVER, IPV6_PMTUDISC_DO);
2298
#elif defined(IPV6_DONTFRAG)
2299
        SETSOCKOPT(IPPROTO_IPV6, IPV6_DONTFRAG, 1);
2300
#endif
2301
0
        break;
2302
0
    default:
2303
0
        break;
2304
0
    }
2305
2306
0
    return 1;
2307
2308
0
#undef SETSOCKOPT
2309
0
}
2310
2311
void h2o_sliding_counter_stop(h2o_sliding_counter_t *counter, uint64_t now)
2312
25.3k
{
2313
25.3k
    uint64_t elapsed;
2314
2315
25.3k
    assert(counter->cur.start_at != 0);
2316
2317
    /* calculate the time used, and reset cur */
2318
25.3k
    if (now <= counter->cur.start_at)
2319
5
        elapsed = 0;
2320
25.3k
    else
2321
25.3k
        elapsed = now - counter->cur.start_at;
2322
25.3k
    counter->cur.start_at = 0;
2323
2324
    /* adjust prev */
2325
25.3k
    counter->prev.sum += elapsed;
2326
25.3k
    counter->prev.sum -= counter->prev.slots[counter->prev.index];
2327
25.3k
    counter->prev.slots[counter->prev.index] = elapsed;
2328
25.3k
    if (++counter->prev.index >= sizeof(counter->prev.slots) / sizeof(counter->prev.slots[0]))
2329
3.16k
        counter->prev.index = 0;
2330
2331
    /* recalc average */
2332
25.3k
    counter->average = counter->prev.sum / (sizeof(counter->prev.slots) / sizeof(counter->prev.slots[0]));
2333
25.3k
}
2334
2335
void h2o_sendvec_init_raw(h2o_sendvec_t *vec, const void *base, size_t len)
2336
29.2k
{
2337
29.2k
    static const h2o_sendvec_callbacks_t callbacks = {h2o_sendvec_read_raw};
2338
29.2k
    vec->callbacks = &callbacks;
2339
29.2k
    vec->raw = (char *)base;
2340
29.2k
    vec->len = len;
2341
29.2k
}
2342
2343
int h2o_sendvec_read_raw(h2o_sendvec_t *src, void *dst, size_t len)
2344
3.48k
{
2345
3.48k
    assert(len <= src->len);
2346
3.48k
    memcpy(dst, src->raw, len);
2347
3.48k
    src->raw += len;
2348
3.48k
    src->len -= len;
2349
3.48k
    return 1;
2350
3.48k
}
2351
2352
int zerocopy_buffers_is_empty(struct st_h2o_socket_zerocopy_buffers_t *buffers)
2353
0
{
2354
0
    return buffers->first == buffers->last;
2355
0
}
2356
2357
void zerocopy_buffers_dispose(struct st_h2o_socket_zerocopy_buffers_t *buffers)
2358
0
{
2359
0
    assert(zerocopy_buffers_is_empty(buffers));
2360
0
    if (buffers->bufs != NULL)
2361
0
        free(buffers->bufs);
2362
0
}
2363
2364
void zerocopy_buffers_push(struct st_h2o_socket_zerocopy_buffers_t *buffers, void *p)
2365
0
{
2366
0
    if (buffers->last >= buffers->capacity) {
2367
0
        assert(buffers->last == buffers->capacity);
2368
0
        size_t new_capacity = (buffers->last - buffers->first) * 2;
2369
0
        if (new_capacity < 16)
2370
0
            new_capacity = 16;
2371
0
        if (new_capacity <= buffers->capacity) {
2372
0
            memmove(buffers->bufs, buffers->bufs + buffers->first, sizeof(buffers->bufs[0]) * (buffers->last - buffers->first));
2373
0
        } else {
2374
0
            void **newbufs = h2o_mem_alloc(sizeof(newbufs[0]) * new_capacity);
2375
0
            h2o_memcpy(newbufs, buffers->bufs + buffers->first, sizeof(newbufs[0]) * (buffers->last - buffers->first));
2376
0
            free(buffers->bufs);
2377
0
            buffers->bufs = newbufs;
2378
0
            buffers->capacity = new_capacity;
2379
0
        }
2380
0
        buffers->last -= buffers->first;
2381
0
        buffers->first = 0;
2382
0
    }
2383
0
    buffers->bufs[buffers->last++] = p;
2384
0
}
2385
2386
void *zerocopy_buffers_release(struct st_h2o_socket_zerocopy_buffers_t *buffers, uint64_t counter)
2387
0
{
2388
0
    assert(buffers->first_counter <= counter);
2389
2390
0
    size_t free_slot = buffers->first + (counter - buffers->first_counter);
2391
0
    assert(free_slot < buffers->last);
2392
2393
    /* Determine the address represented by given counter. */
2394
0
    void *free_ptr = buffers->bufs[free_slot];
2395
0
    assert(free_ptr != NULL);
2396
2397
    /* Search for adjacent entries that refer to the same address. If found, the address cannot be freed yet; hence set the return
2398
     * value to NULL. Rationale: when sendmsg returns partial write, one memory block would be registered multiple times in a
2399
     * consecutive manner. Such memory block can be freed only when the last entry is being released. */
2400
0
    for (size_t i = free_slot + 1; i < buffers->last; ++i) {
2401
0
        if (buffers->bufs[i] != NULL) {
2402
0
            if (buffers->bufs[i] == free_ptr)
2403
0
                free_ptr = NULL;
2404
0
            break;
2405
0
        }
2406
0
    }
2407
0
    if (free_ptr != NULL && free_slot > buffers->first) {
2408
0
        size_t i = free_slot - 1;
2409
0
        do {
2410
0
            if (buffers->bufs[i] != NULL) {
2411
0
                if (buffers->bufs[i] == free_ptr)
2412
0
                    free_ptr = NULL;
2413
0
                break;
2414
0
            }
2415
0
        } while (i-- > buffers->first);
2416
0
    }
2417
2418
0
    if (buffers->first_counter == counter) {
2419
        /* Release is in-order. Move `first` and `first_counter` to the next valid entry. */
2420
0
        ++buffers->first;
2421
0
        ++buffers->first_counter;
2422
0
        while (buffers->first != buffers->last) {
2423
0
            if (buffers->bufs[buffers->first] != NULL)
2424
0
                break;
2425
0
            ++buffers->first;
2426
0
            ++buffers->first_counter;
2427
0
        }
2428
0
        if (buffers->first == buffers->last) {
2429
0
            buffers->first = 0;
2430
0
            buffers->last = 0;
2431
0
        }
2432
0
    } else {
2433
        /* Out-of-order: just clear the slot. */
2434
0
        buffers->bufs[free_slot] = NULL;
2435
0
    }
2436
2437
0
    return free_ptr;
2438
0
}
2439
2440
void h2o_socket_clear_recycle(int full)
2441
0
{
2442
0
    h2o_mem_clear_recycle(&h2o_socket_ssl_buffer_allocator, full);
2443
0
    h2o_mem_clear_recycle(&h2o_socket_zerocopy_buffer_allocator, full);
2444
0
}
2445
2446
int h2o_socket_recycle_is_empty(void)
2447
0
{
2448
0
    return h2o_mem_recycle_is_empty(&h2o_socket_ssl_buffer_allocator) &&
2449
0
           h2o_mem_recycle_is_empty(&h2o_socket_zerocopy_buffer_allocator);
2450
0
}
2451
2452
#ifdef OPENSSL_IS_BORINGSSL
2453
2454
int h2o_socket_boringssl_get_async_job_index(void)
2455
{
2456
    static volatile int index;
2457
    H2O_MULTITHREAD_ONCE({ index = SSL_get_ex_new_index(0, 0, NULL, NULL, NULL); });
2458
    return index;
2459
}
2460
2461
int h2o_socket_boringssl_async_resumption_in_flight(SSL *ssl)
2462
{
2463
    h2o_socket_t *sock = BIO_get_data(SSL_get_rbio(ssl));
2464
    return SSL_is_server(ssl) && sock->ssl->handshake.server.async_resumption.state == ASYNC_RESUMPTION_STATE_REQUEST_SENT;
2465
}
2466
2467
#endif