/src/serenity/Userland/Libraries/LibCrypto/Authentication/Poly1305.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2022, stelar7 <dudedbz@gmail.com> |
3 | | * |
4 | | * SPDX-License-Identifier: BSD-2-Clause |
5 | | */ |
6 | | |
7 | | #include <AK/ByteReader.h> |
8 | | #include <AK/Endian.h> |
9 | | #include <LibCrypto/Authentication/Poly1305.h> |
10 | | |
11 | | namespace Crypto::Authentication { |
12 | | |
13 | | Poly1305::Poly1305(ReadonlyBytes key) |
14 | 77 | { |
15 | 385 | for (size_t i = 0; i < 16; i += 4) { |
16 | 308 | m_state.r[i / 4] = AK::convert_between_host_and_little_endian(ByteReader::load32(key.offset(i))); |
17 | 308 | } |
18 | | |
19 | | // r[3], r[7], r[11], and r[15] are required to have their top four bits clear (be smaller than 16) |
20 | | // r[4], r[8], and r[12] are required to have their bottom two bits clear (be divisible by 4) |
21 | 77 | m_state.r[0] &= 0x0FFFFFFF; |
22 | 77 | m_state.r[1] &= 0x0FFFFFFC; |
23 | 77 | m_state.r[2] &= 0x0FFFFFFC; |
24 | 77 | m_state.r[3] &= 0x0FFFFFFC; |
25 | | |
26 | 385 | for (size_t i = 16; i < 32; i += 4) { |
27 | 308 | m_state.s[(i - 16) / 4] = AK::convert_between_host_and_little_endian(ByteReader::load32(key.offset(i))); |
28 | 308 | } |
29 | 77 | } |
30 | | |
31 | | void Poly1305::update(ReadonlyBytes message) |
32 | 77 | { |
33 | 77 | size_t offset = 0; |
34 | 655k | while (offset < message.size()) { |
35 | 655k | u32 n = min(message.size() - offset, 16 - m_state.block_count); |
36 | 655k | memcpy(m_state.blocks + m_state.block_count, message.offset_pointer(offset), n); |
37 | 655k | m_state.block_count += n; |
38 | 655k | offset += n; |
39 | | |
40 | 655k | if (m_state.block_count == 16) { |
41 | 655k | process_block(); |
42 | 655k | m_state.block_count = 0; |
43 | 655k | } |
44 | 655k | } |
45 | 77 | } |
46 | | |
47 | | void Poly1305::process_block() |
48 | 655k | { |
49 | 655k | u32 a[5]; |
50 | 655k | u8 n = m_state.block_count; |
51 | | |
52 | | // Add one bit beyond the number of octets. For a 16-byte block, |
53 | | // this is equivalent to adding 2^128 to the number. For the shorter |
54 | | // block, it can be 2^120, 2^112, or any power of two that is evenly |
55 | | // divisible by 8, all the way down to 2^8. |
56 | 655k | m_state.blocks[n++] = 0x01; |
57 | | |
58 | | // If the block is not 17 bytes long (the last block), pad it with zeros. |
59 | | // This is meaningless if you are treating the blocks as numbers. |
60 | 656k | while (n < 17) { |
61 | 295 | m_state.blocks[n++] = 0x00; |
62 | 295 | } |
63 | | |
64 | | // Read the block as a little-endian number. |
65 | 3.27M | for (size_t i = 0; i < 16; i += 4) { |
66 | 2.62M | a[i / 4] = AK::convert_between_host_and_little_endian(ByteReader::load32(m_state.blocks + i)); |
67 | 2.62M | } |
68 | 655k | a[4] = m_state.blocks[16]; |
69 | | |
70 | | // Add this number to the accumulator. |
71 | 655k | m_state.a[0] += a[0]; |
72 | 655k | m_state.a[1] += a[1]; |
73 | 655k | m_state.a[2] += a[2]; |
74 | 655k | m_state.a[3] += a[3]; |
75 | 655k | m_state.a[4] += a[4]; |
76 | | |
77 | | // Carry |
78 | 655k | m_state.a[1] += m_state.a[0] >> 32; |
79 | 655k | m_state.a[2] += m_state.a[1] >> 32; |
80 | 655k | m_state.a[3] += m_state.a[2] >> 32; |
81 | 655k | m_state.a[4] += m_state.a[3] >> 32; |
82 | | |
83 | | // Only consider the least significant bits |
84 | 655k | a[0] = m_state.a[0] & 0xFFFFFFFF; |
85 | 655k | a[1] = m_state.a[1] & 0xFFFFFFFF; |
86 | 655k | a[2] = m_state.a[2] & 0xFFFFFFFF; |
87 | 655k | a[3] = m_state.a[3] & 0xFFFFFFFF; |
88 | 655k | a[4] = m_state.a[4] & 0xFFFFFFFF; |
89 | | |
90 | | // Multiply by r |
91 | 655k | m_state.a[0] = (u64)a[0] * m_state.r[0]; |
92 | 655k | m_state.a[1] = (u64)a[0] * m_state.r[1] + (u64)a[1] * m_state.r[0]; |
93 | 655k | m_state.a[2] = (u64)a[0] * m_state.r[2] + (u64)a[1] * m_state.r[1] + (u64)a[2] * m_state.r[0]; |
94 | 655k | m_state.a[3] = (u64)a[0] * m_state.r[3] + (u64)a[1] * m_state.r[2] + (u64)a[2] * m_state.r[1] + (u64)a[3] * m_state.r[0]; |
95 | 655k | m_state.a[4] = (u64)a[1] * m_state.r[3] + (u64)a[2] * m_state.r[2] + (u64)a[3] * m_state.r[1] + (u64)a[4] * m_state.r[0]; |
96 | 655k | m_state.a[5] = (u64)a[2] * m_state.r[3] + (u64)a[3] * m_state.r[2] + (u64)a[4] * m_state.r[1]; |
97 | 655k | m_state.a[6] = (u64)a[3] * m_state.r[3] + (u64)a[4] * m_state.r[2]; |
98 | 655k | m_state.a[7] = (u64)a[4] * m_state.r[3]; |
99 | | |
100 | | // Carry |
101 | 655k | m_state.a[1] += m_state.a[0] >> 32; |
102 | 655k | m_state.a[2] += m_state.a[1] >> 32; |
103 | 655k | m_state.a[3] += m_state.a[2] >> 32; |
104 | 655k | m_state.a[4] += m_state.a[3] >> 32; |
105 | 655k | m_state.a[5] += m_state.a[4] >> 32; |
106 | 655k | m_state.a[6] += m_state.a[5] >> 32; |
107 | 655k | m_state.a[7] += m_state.a[6] >> 32; |
108 | | |
109 | | // Save the high part of the accumulator |
110 | 655k | a[0] = m_state.a[4] & 0xFFFFFFFC; |
111 | 655k | a[1] = m_state.a[5] & 0xFFFFFFFF; |
112 | 655k | a[2] = m_state.a[6] & 0xFFFFFFFF; |
113 | 655k | a[3] = m_state.a[7] & 0xFFFFFFFF; |
114 | | |
115 | | // Only consider the least significant bits |
116 | 655k | m_state.a[0] &= 0xFFFFFFFF; |
117 | 655k | m_state.a[1] &= 0xFFFFFFFF; |
118 | 655k | m_state.a[2] &= 0xFFFFFFFF; |
119 | 655k | m_state.a[3] &= 0xFFFFFFFF; |
120 | 655k | m_state.a[4] &= 0x00000003; |
121 | | |
122 | | // Fast modular reduction (first pass) |
123 | 655k | m_state.a[0] += a[0]; |
124 | 655k | m_state.a[0] += (a[0] >> 2) | (a[1] << 30); |
125 | 655k | m_state.a[1] += a[1]; |
126 | 655k | m_state.a[1] += (a[1] >> 2) | (a[2] << 30); |
127 | 655k | m_state.a[2] += a[2]; |
128 | 655k | m_state.a[2] += (a[2] >> 2) | (a[3] << 30); |
129 | 655k | m_state.a[3] += a[3]; |
130 | 655k | m_state.a[3] += (a[3] >> 2); |
131 | | |
132 | | // Carry |
133 | 655k | m_state.a[1] += m_state.a[0] >> 32; |
134 | 655k | m_state.a[2] += m_state.a[1] >> 32; |
135 | 655k | m_state.a[3] += m_state.a[2] >> 32; |
136 | 655k | m_state.a[4] += m_state.a[3] >> 32; |
137 | | |
138 | | // Save the high part of the accumulator |
139 | 655k | a[0] = m_state.a[4] & 0xFFFFFFFC; |
140 | | |
141 | | // Only consider the least significant bits |
142 | 655k | m_state.a[0] &= 0xFFFFFFFF; |
143 | 655k | m_state.a[1] &= 0xFFFFFFFF; |
144 | 655k | m_state.a[2] &= 0xFFFFFFFF; |
145 | 655k | m_state.a[3] &= 0xFFFFFFFF; |
146 | 655k | m_state.a[4] &= 0x00000003; |
147 | | |
148 | | // Fast modular reduction (second pass) |
149 | 655k | m_state.a[0] += a[0]; |
150 | 655k | m_state.a[0] += a[0] >> 2; |
151 | | |
152 | | // Carry |
153 | 655k | m_state.a[1] += m_state.a[0] >> 32; |
154 | 655k | m_state.a[2] += m_state.a[1] >> 32; |
155 | 655k | m_state.a[3] += m_state.a[2] >> 32; |
156 | 655k | m_state.a[4] += m_state.a[3] >> 32; |
157 | | |
158 | | // Only consider the least significant bits |
159 | 655k | m_state.a[0] &= 0xFFFFFFFF; |
160 | 655k | m_state.a[1] &= 0xFFFFFFFF; |
161 | 655k | m_state.a[2] &= 0xFFFFFFFF; |
162 | 655k | m_state.a[3] &= 0xFFFFFFFF; |
163 | 655k | m_state.a[4] &= 0x00000003; |
164 | 655k | } |
165 | | |
166 | | ErrorOr<ByteBuffer> Poly1305::digest() |
167 | 77 | { |
168 | 77 | if (m_state.block_count != 0) |
169 | 62 | process_block(); |
170 | | |
171 | 77 | u32 b[4]; |
172 | | |
173 | | // Save the accumulator |
174 | 77 | b[0] = m_state.a[0] & 0xFFFFFFFF; |
175 | 77 | b[1] = m_state.a[1] & 0xFFFFFFFF; |
176 | 77 | b[2] = m_state.a[2] & 0xFFFFFFFF; |
177 | 77 | b[3] = m_state.a[3] & 0xFFFFFFFF; |
178 | | |
179 | | // Compute a + 5 |
180 | 77 | m_state.a[0] += 5; |
181 | | |
182 | | // Carry |
183 | 77 | m_state.a[1] += m_state.a[0] >> 32; |
184 | 77 | m_state.a[2] += m_state.a[1] >> 32; |
185 | 77 | m_state.a[3] += m_state.a[2] >> 32; |
186 | 77 | m_state.a[4] += m_state.a[3] >> 32; |
187 | | |
188 | | // Select mask based on (a + 5) >= 2^130 |
189 | 77 | u32 mask = ((m_state.a[4] & 0x04) >> 2) - 1; |
190 | | |
191 | | // Select based on mask |
192 | 77 | m_state.a[0] = (m_state.a[0] & ~mask) | (b[0] & mask); |
193 | 77 | m_state.a[1] = (m_state.a[1] & ~mask) | (b[1] & mask); |
194 | 77 | m_state.a[2] = (m_state.a[2] & ~mask) | (b[2] & mask); |
195 | 77 | m_state.a[3] = (m_state.a[3] & ~mask) | (b[3] & mask); |
196 | | |
197 | | // Finally, the value of the secret key "s" is added to the accumulator, |
198 | | // and the 128 least significant bits are serialized in little-endian |
199 | | // order to form the tag. |
200 | 77 | m_state.a[0] += m_state.s[0]; |
201 | 77 | m_state.a[1] += m_state.s[1]; |
202 | 77 | m_state.a[2] += m_state.s[2]; |
203 | 77 | m_state.a[3] += m_state.s[3]; |
204 | | |
205 | | // Carry |
206 | 77 | m_state.a[1] += m_state.a[0] >> 32; |
207 | 77 | m_state.a[2] += m_state.a[1] >> 32; |
208 | 77 | m_state.a[3] += m_state.a[2] >> 32; |
209 | 77 | m_state.a[4] += m_state.a[3] >> 32; |
210 | | |
211 | | // Only consider the least significant bits |
212 | 77 | b[0] = m_state.a[0] & 0xFFFFFFFF; |
213 | 77 | b[1] = m_state.a[1] & 0xFFFFFFFF; |
214 | 77 | b[2] = m_state.a[2] & 0xFFFFFFFF; |
215 | 77 | b[3] = m_state.a[3] & 0xFFFFFFFF; |
216 | | |
217 | 77 | ByteBuffer output = TRY(ByteBuffer::create_uninitialized(16)); |
218 | | |
219 | 385 | for (auto i = 0; i < 4; i++) { |
220 | 308 | ByteReader::store(output.offset_pointer(i * 4), AK::convert_between_host_and_little_endian(b[i])); |
221 | 308 | } |
222 | | |
223 | 77 | return output; |
224 | 77 | } |
225 | | |
226 | | } |