/src/picotls/lib/quiclb-impl.h
Line | Count | Source |
1 | | /* |
2 | | * Copyright (c) 2025 Fastly, Kazuho Oku |
3 | | * |
4 | | * Permission is hereby granted, free of charge, to any person obtaining a copy |
5 | | * of this software and associated documentation files (the "Software"), to |
6 | | * deal in the Software without restriction, including without limitation the |
7 | | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or |
8 | | * sell copies of the Software, and to permit persons to whom the Software is |
9 | | * furnished to do so, subject to the following conditions: |
10 | | * |
11 | | * The above copyright notice and this permission notice shall be included in |
12 | | * all copies or substantial portions of the Software. |
13 | | * |
14 | | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
15 | | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
16 | | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
17 | | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
18 | | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
19 | | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
20 | | * IN THE SOFTWARE. |
21 | | */ |
22 | | #ifndef picotls_quiclb_h |
23 | | #define picotls_quiclb_h |
24 | | |
25 | | #if defined(__x86_64__) || defined(_M_X64) |
26 | | #include <emmintrin.h> |
27 | | #define PICOTLS_QUICLB_HAVE_SSE2 1 |
28 | | #endif |
29 | | |
30 | | union picotls_quiclb_block { |
31 | | uint8_t bytes[PTLS_AES_BLOCK_SIZE]; |
32 | | uint64_t u64[PTLS_AES_BLOCK_SIZE / sizeof(uint64_t)]; |
33 | | #if PICOTLS_QUICLB_HAVE_SSE2 |
34 | | __m128i m128; |
35 | | #endif |
36 | | }; |
37 | | |
38 | | /** |
39 | | * encrypts one block of AES, assuming the context is `ptls_cipher_context_t` backed by ptls_foo_aes128ecb |
40 | | */ |
41 | | static inline void picotls_quiclb_cipher_aes(void *aesecb, union picotls_quiclb_block *block) |
42 | 0 | { |
43 | 0 | ptls_cipher_encrypt(aesecb, block->bytes, block->bytes, PTLS_AES_BLOCK_SIZE); |
44 | 0 | } |
45 | | |
46 | | /** |
47 | | * calculates X ^ AES(mask_and_expand(Y)) |
48 | | */ |
49 | | static inline void picotls_quiclb_one_round(void (*aesecb_func)(void *aesecb, union picotls_quiclb_block *), void *aesecb_ctx, |
50 | | union picotls_quiclb_block *dest, const union picotls_quiclb_block *x, |
51 | | const union picotls_quiclb_block *y, const union picotls_quiclb_block *mask, |
52 | | const union picotls_quiclb_block *len_pass) |
53 | 0 | { |
54 | 0 | #if PICOTLS_QUICLB_HAVE_SSE2 |
55 | 0 | dest->m128 = _mm_or_si128(_mm_and_si128(y->m128, mask->m128), len_pass->m128); |
56 | | #else |
57 | | for (size_t i = 0; i < PTLS_ELEMENTSOF(dest->u64); ++i) |
58 | | dest->u64[i] = (y->u64[i] & mask->u64[i]) | len_pass->u64[i]; |
59 | | #endif |
60 | |
|
61 | 0 | aesecb_func(aesecb_ctx, dest); |
62 | |
|
63 | 0 | #if PICOTLS_QUICLB_HAVE_SSE2 |
64 | 0 | dest->m128 = _mm_xor_si128(dest->m128, x->m128); |
65 | | #else |
66 | | for (size_t i = 0; i < PTLS_ELEMENTSOF(dest->u64); ++i) |
67 | | dest->u64[i] ^= x->u64[i]; |
68 | | #endif |
69 | 0 | } |
70 | | |
71 | | static inline void picotls_quiclb_split_input(union picotls_quiclb_block *l, union picotls_quiclb_block *r, const uint8_t *input, |
72 | | size_t len) |
73 | 0 | { |
74 | 0 | size_t i; |
75 | 0 | for (i = 0; i < (len + 1) / 2; ++i) |
76 | 0 | l->bytes[i] = input[i]; |
77 | 0 | for (; i < PTLS_ELEMENTSOF(l->bytes); ++i) |
78 | 0 | l->bytes[i] = 0; |
79 | 0 | for (i = 0; i < (len + 1) / 2; ++i) |
80 | 0 | r->bytes[i] = input[i + len / 2]; |
81 | 0 | for (; i < PTLS_ELEMENTSOF(r->bytes); ++i) |
82 | 0 | r->bytes[i] = 0; |
83 | 0 | } |
84 | | |
85 | | static inline void picotls_quiclb_merge_output(uint8_t *output, size_t len, const union picotls_quiclb_block *l, |
86 | | const union picotls_quiclb_block *r) |
87 | 0 | { |
88 | 0 | uint8_t *outp = output; |
89 | |
|
90 | 0 | for (size_t i = 0; i < len / 2; ++i) |
91 | 0 | *outp++ = l->bytes[i]; |
92 | |
|
93 | 0 | if (len % 2 == 0) { |
94 | 0 | for (size_t i = 0; i < len / 2; ++i) |
95 | 0 | *outp++ = r->bytes[i]; |
96 | 0 | } else { |
97 | 0 | *outp++ = (l->bytes[len / 2] & 0xf0) | (r->bytes[0] & 0x0f); |
98 | 0 | for (size_t i = 0; i < len / 2; ++i) |
99 | 0 | *outp++ = r->bytes[i + 1]; |
100 | 0 | } |
101 | 0 | } |
102 | | |
103 | | static inline void picotls_quiclb_do_init(ptls_cipher_context_t *ctx, const void *iv) |
104 | 0 | { |
105 | | /* no-op */ |
106 | 0 | } |
107 | | |
108 | | static inline void picotls_quiclb_transform(void (*aesecb_func)(void *aesecb, union picotls_quiclb_block *), void *aesecb_ctx, |
109 | | void *output, const void *input, size_t len, int encrypt) |
110 | 0 | { |
111 | 0 | static const struct quiclb_mask_t { |
112 | 0 | union picotls_quiclb_block l, r; |
113 | 0 | } masks[] = { |
114 | 0 | {{{0xff, 0xff, 0xff, 0xf0}}, {{0x0f, 0xff, 0xff, 0xff}}}, /* 7 (MIN_LEN) */ |
115 | 0 | {{{0xff, 0xff, 0xff, 0xff}}, {{0xff, 0xff, 0xff, 0xff}}}, /* 8 */ |
116 | 0 | {{{0xff, 0xff, 0xff, 0xff, 0xf0}}, {{0x0f, 0xff, 0xff, 0xff, 0xff}}}, /* 9 */ |
117 | 0 | {{{0xff, 0xff, 0xff, 0xff, 0xff}}, {{0xff, 0xff, 0xff, 0xff, 0xff}}}, /* 10 */ |
118 | 0 | {{{0xff, 0xff, 0xff, 0xff, 0xff, 0xf0}}, {{0x0f, 0xff, 0xff, 0xff, 0xff, 0xff}}}, /* 11 */ |
119 | 0 | {{{0xff, 0xff, 0xff, 0xff, 0xff, 0xff}}, {{0xff, 0xff, 0xff, 0xff, 0xff, 0xff}}}, /* 12 */ |
120 | 0 | {{{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf0}}, {{0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}}}, /* 13 */ |
121 | 0 | {{{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}}, {{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}}}, /* 14 */ |
122 | 0 | {{{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf0}}, {{0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}}}, /* 15 */ |
123 | 0 | {{{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}}, {{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}}}, /* 16 */ |
124 | 0 | {{{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf0}}, |
125 | 0 | {{0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}}}, /* 17 */ |
126 | 0 | {{{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}}, |
127 | 0 | {{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}}}, /* 18 */ |
128 | 0 | {{{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf0}}, |
129 | 0 | {{0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}}} /* 19 */ |
130 | 0 | }; |
131 | |
|
132 | 0 | assert(PTLS_QUICLB_MIN_BLOCK_SIZE <= len && len <= PTLS_QUICLB_MAX_BLOCK_SIZE); |
133 | 0 | PTLS_BUILD_ASSERT(PTLS_QUICLB_MAX_BLOCK_SIZE == PTLS_QUICLB_MIN_BLOCK_SIZE + PTLS_ELEMENTSOF(masks) - 1); |
134 | |
|
135 | 0 | const struct quiclb_mask_t *mask = &masks[len - PTLS_QUICLB_MIN_BLOCK_SIZE]; |
136 | 0 | union picotls_quiclb_block l0, r0, r1, l1, r2, l2, len_pass = {{0}}; |
137 | 0 | len_pass.bytes[14] = (uint8_t)len; |
138 | |
|
139 | 0 | #define ROUND(rnd, dest, x, y, mask_side) \ |
140 | 0 | do { \ |
141 | 0 | len_pass.bytes[15] = (rnd); \ |
142 | 0 | picotls_quiclb_one_round(aesecb_func, aesecb_ctx, &dest, &x, &y, &mask->mask_side, &len_pass); \ |
143 | 0 | } while (0) |
144 | |
|
145 | 0 | if (encrypt) { |
146 | 0 | picotls_quiclb_split_input(&l0, &r0, input, len); |
147 | 0 | ROUND(1, r1, r0, l0, l); |
148 | 0 | ROUND(2, l1, l0, r1, r); |
149 | 0 | ROUND(3, r2, r1, l1, l); |
150 | 0 | ROUND(4, l2, l1, r2, r); |
151 | 0 | picotls_quiclb_merge_output(output, len, &l2, &r2); |
152 | 0 | } else { |
153 | 0 | picotls_quiclb_split_input(&l2, &r2, input, len); |
154 | 0 | ROUND(4, l1, l2, r2, r); |
155 | 0 | ROUND(3, r1, r2, l1, l); |
156 | 0 | ROUND(2, l0, l1, r1, r); |
157 | 0 | ROUND(1, r0, r1, l0, l); |
158 | 0 | picotls_quiclb_merge_output(output, len, &l0, &r0); |
159 | 0 | } |
160 | |
|
161 | 0 | #undef ROUND |
162 | 0 | } |
163 | | |
164 | | #endif |