/src/openssh/smult_curve25519_ref.c
Line | Count | Source |
1 | | /* $OpenBSD: smult_curve25519_ref.c,v 1.2 2013/11/02 22:02:14 markus Exp $ */ |
2 | | /* |
3 | | version 20081011 |
4 | | Matthew Dempsky |
5 | | Public domain. |
6 | | Derived from public domain code by D. J. Bernstein. |
7 | | */ |
8 | | |
9 | | int crypto_scalarmult_curve25519(unsigned char *, const unsigned char *, const unsigned char *); |
10 | | |
11 | | static void add(unsigned int out[32],const unsigned int a[32],const unsigned int b[32]) |
12 | 3.89M | { |
13 | 3.89M | unsigned int j; |
14 | 3.89M | unsigned int u; |
15 | 3.89M | u = 0; |
16 | 124M | for (j = 0;j < 31;++j) { u += a[j] + b[j]; out[j] = u & 255; u >>= 8; } |
17 | 3.89M | u += a[31] + b[31]; out[31] = u; |
18 | 3.89M | } |
19 | | |
20 | | static void sub(unsigned int out[32],const unsigned int a[32],const unsigned int b[32]) |
21 | 3.89M | { |
22 | 3.89M | unsigned int j; |
23 | 3.89M | unsigned int u; |
24 | 3.89M | u = 218; |
25 | 124M | for (j = 0;j < 31;++j) { |
26 | 120M | u += a[j] + 65280 - b[j]; |
27 | 120M | out[j] = u & 255; |
28 | 120M | u >>= 8; |
29 | 120M | } |
30 | 3.89M | u += a[31] - b[31]; |
31 | 3.89M | out[31] = u; |
32 | 3.89M | } |
33 | | |
34 | | static void squeeze(unsigned int a[32]) |
35 | 9.77M | { |
36 | 9.77M | unsigned int j; |
37 | 9.77M | unsigned int u; |
38 | 9.77M | u = 0; |
39 | 312M | for (j = 0;j < 31;++j) { u += a[j]; a[j] = u & 255; u >>= 8; } |
40 | 9.77M | u += a[31]; a[31] = u & 127; |
41 | 9.77M | u = 19 * (u >> 7); |
42 | 312M | for (j = 0;j < 31;++j) { u += a[j]; a[j] = u & 255; u >>= 8; } |
43 | 9.77M | u += a[31]; a[31] = u; |
44 | 9.77M | } |
45 | | |
46 | | static const unsigned int minusp[32] = { |
47 | | 19, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128 |
48 | | } ; |
49 | | |
50 | | static void freeze(unsigned int a[32]) |
51 | 3.81k | { |
52 | 3.81k | unsigned int aorig[32]; |
53 | 3.81k | unsigned int j; |
54 | 3.81k | unsigned int negative; |
55 | | |
56 | 125k | for (j = 0;j < 32;++j) aorig[j] = a[j]; |
57 | 3.81k | add(a,a,minusp); |
58 | 3.81k | negative = -((a[31] >> 7) & 1); |
59 | 125k | for (j = 0;j < 32;++j) a[j] ^= negative & (aorig[j] ^ a[j]); |
60 | 3.81k | } |
61 | | |
62 | | static void mult(unsigned int out[32],const unsigned int a[32],const unsigned int b[32]) |
63 | 4.91M | { |
64 | 4.91M | unsigned int i; |
65 | 4.91M | unsigned int j; |
66 | 4.91M | unsigned int u; |
67 | | |
68 | 162M | for (i = 0;i < 32;++i) { |
69 | 157M | u = 0; |
70 | 2.75G | for (j = 0;j <= i;++j) u += a[j] * b[i - j]; |
71 | 2.59G | for (j = i + 1;j < 32;++j) u += 38 * a[j] * b[i + 32 - j]; |
72 | 157M | out[i] = u; |
73 | 157M | } |
74 | 4.91M | squeeze(out); |
75 | 4.91M | } |
76 | | |
77 | | static void mult121665(unsigned int out[32],const unsigned int a[32]) |
78 | 973k | { |
79 | 973k | unsigned int j; |
80 | 973k | unsigned int u; |
81 | | |
82 | 973k | u = 0; |
83 | 31.1M | for (j = 0;j < 31;++j) { u += 121665 * a[j]; out[j] = u & 255; u >>= 8; } |
84 | 973k | u += 121665 * a[31]; out[31] = u & 127; |
85 | 973k | u = 19 * (u >> 7); |
86 | 31.1M | for (j = 0;j < 31;++j) { u += out[j]; out[j] = u & 255; u >>= 8; } |
87 | 973k | u += out[j]; out[j] = u; |
88 | 973k | } |
89 | | |
90 | | static void square(unsigned int out[32],const unsigned int a[32]) |
91 | 4.86M | { |
92 | 4.86M | unsigned int i; |
93 | 4.86M | unsigned int j; |
94 | 4.86M | unsigned int u; |
95 | | |
96 | 160M | for (i = 0;i < 32;++i) { |
97 | 155M | u = 0; |
98 | 1.40G | for (j = 0;j < i - j;++j) u += a[j] * a[i - j]; |
99 | 1.32G | for (j = i + 1;j < i + 32 - j;++j) u += 38 * a[j] * a[i + 32 - j]; |
100 | 155M | u *= 2; |
101 | 155M | if ((i & 1) == 0) { |
102 | 77.8M | u += a[i / 2] * a[i / 2]; |
103 | 77.8M | u += 38 * a[i / 2 + 16] * a[i / 2 + 16]; |
104 | 77.8M | } |
105 | 155M | out[i] = u; |
106 | 155M | } |
107 | 4.86M | squeeze(out); |
108 | 4.86M | } |
109 | | |
110 | | static void select(unsigned int p[64],unsigned int q[64],const unsigned int r[64],const unsigned int s[64],unsigned int b) |
111 | 1.94M | { |
112 | 1.94M | unsigned int j; |
113 | 1.94M | unsigned int t; |
114 | 1.94M | unsigned int bminus1; |
115 | | |
116 | 1.94M | bminus1 = b - 1; |
117 | 126M | for (j = 0;j < 64;++j) { |
118 | 124M | t = bminus1 & (r[j] ^ s[j]); |
119 | 124M | p[j] = s[j] ^ t; |
120 | 124M | q[j] = r[j] ^ t; |
121 | 124M | } |
122 | 1.94M | } |
123 | | |
124 | | static void mainloop(unsigned int work[64],const unsigned char e[32]) |
125 | 3.81k | { |
126 | 3.81k | unsigned int xzm1[64]; |
127 | 3.81k | unsigned int xzm[64]; |
128 | 3.81k | unsigned int xzmb[64]; |
129 | 3.81k | unsigned int xzm1b[64]; |
130 | 3.81k | unsigned int xznb[64]; |
131 | 3.81k | unsigned int xzn1b[64]; |
132 | 3.81k | unsigned int a0[64]; |
133 | 3.81k | unsigned int a1[64]; |
134 | 3.81k | unsigned int b0[64]; |
135 | 3.81k | unsigned int b1[64]; |
136 | 3.81k | unsigned int c1[64]; |
137 | 3.81k | unsigned int r[32]; |
138 | 3.81k | unsigned int s[32]; |
139 | 3.81k | unsigned int t[32]; |
140 | 3.81k | unsigned int u[32]; |
141 | 3.81k | unsigned int j; |
142 | 3.81k | unsigned int b; |
143 | 3.81k | int pos; |
144 | | |
145 | 125k | for (j = 0;j < 32;++j) xzm1[j] = work[j]; |
146 | 3.81k | xzm1[32] = 1; |
147 | 122k | for (j = 33;j < 64;++j) xzm1[j] = 0; |
148 | | |
149 | 3.81k | xzm[0] = 1; |
150 | 244k | for (j = 1;j < 64;++j) xzm[j] = 0; |
151 | | |
152 | 977k | for (pos = 254;pos >= 0;--pos) { |
153 | 973k | b = e[pos / 8] >> (pos & 7); |
154 | 973k | b &= 1; |
155 | 973k | select(xzmb,xzm1b,xzm,xzm1,b); |
156 | 973k | add(a0,xzmb,xzmb + 32); |
157 | 973k | sub(a0 + 32,xzmb,xzmb + 32); |
158 | 973k | add(a1,xzm1b,xzm1b + 32); |
159 | 973k | sub(a1 + 32,xzm1b,xzm1b + 32); |
160 | 973k | square(b0,a0); |
161 | 973k | square(b0 + 32,a0 + 32); |
162 | 973k | mult(b1,a1,a0 + 32); |
163 | 973k | mult(b1 + 32,a1 + 32,a0); |
164 | 973k | add(c1,b1,b1 + 32); |
165 | 973k | sub(c1 + 32,b1,b1 + 32); |
166 | 973k | square(r,c1 + 32); |
167 | 973k | sub(s,b0,b0 + 32); |
168 | 973k | mult121665(t,s); |
169 | 973k | add(u,t,b0); |
170 | 973k | mult(xznb,b0,b0 + 32); |
171 | 973k | mult(xznb + 32,s,u); |
172 | 973k | square(xzn1b,c1); |
173 | 973k | mult(xzn1b + 32,r,work); |
174 | 973k | select(xzm,xzm1,xznb,xzn1b,b); |
175 | 973k | } |
176 | | |
177 | 248k | for (j = 0;j < 64;++j) work[j] = xzm[j]; |
178 | 3.81k | } |
179 | | |
180 | | static void recip(unsigned int out[32],const unsigned int z[32]) |
181 | 3.81k | { |
182 | 3.81k | unsigned int z2[32]; |
183 | 3.81k | unsigned int z9[32]; |
184 | 3.81k | unsigned int z11[32]; |
185 | 3.81k | unsigned int z2_5_0[32]; |
186 | 3.81k | unsigned int z2_10_0[32]; |
187 | 3.81k | unsigned int z2_20_0[32]; |
188 | 3.81k | unsigned int z2_50_0[32]; |
189 | 3.81k | unsigned int z2_100_0[32]; |
190 | 3.81k | unsigned int t0[32]; |
191 | 3.81k | unsigned int t1[32]; |
192 | 3.81k | int i; |
193 | | |
194 | | /* 2 */ square(z2,z); |
195 | 3.81k | /* 4 */ square(t1,z2); |
196 | 3.81k | /* 8 */ square(t0,t1); |
197 | 3.81k | /* 9 */ mult(z9,t0,z); |
198 | 3.81k | /* 11 */ mult(z11,z9,z2); |
199 | 3.81k | /* 22 */ square(t0,z11); |
200 | 3.81k | /* 2^5 - 2^0 = 31 */ mult(z2_5_0,t0,z9); |
201 | | |
202 | | /* 2^6 - 2^1 */ square(t0,z2_5_0); |
203 | 3.81k | /* 2^7 - 2^2 */ square(t1,t0); |
204 | 3.81k | /* 2^8 - 2^3 */ square(t0,t1); |
205 | 3.81k | /* 2^9 - 2^4 */ square(t1,t0); |
206 | 3.81k | /* 2^10 - 2^5 */ square(t0,t1); |
207 | 3.81k | /* 2^10 - 2^0 */ mult(z2_10_0,t0,z2_5_0); |
208 | | |
209 | | /* 2^11 - 2^1 */ square(t0,z2_10_0); |
210 | 3.81k | /* 2^12 - 2^2 */ square(t1,t0); |
211 | 19.0k | /* 2^20 - 2^10 */ for (i = 2;i < 10;i += 2) { square(t0,t1); square(t1,t0); } |
212 | 3.81k | /* 2^20 - 2^0 */ mult(z2_20_0,t1,z2_10_0); |
213 | | |
214 | | /* 2^21 - 2^1 */ square(t0,z2_20_0); |
215 | 3.81k | /* 2^22 - 2^2 */ square(t1,t0); |
216 | 38.1k | /* 2^40 - 2^20 */ for (i = 2;i < 20;i += 2) { square(t0,t1); square(t1,t0); } |
217 | 3.81k | /* 2^40 - 2^0 */ mult(t0,t1,z2_20_0); |
218 | | |
219 | | /* 2^41 - 2^1 */ square(t1,t0); |
220 | 3.81k | /* 2^42 - 2^2 */ square(t0,t1); |
221 | 19.0k | /* 2^50 - 2^10 */ for (i = 2;i < 10;i += 2) { square(t1,t0); square(t0,t1); } |
222 | 3.81k | /* 2^50 - 2^0 */ mult(z2_50_0,t0,z2_10_0); |
223 | | |
224 | | /* 2^51 - 2^1 */ square(t0,z2_50_0); |
225 | 3.81k | /* 2^52 - 2^2 */ square(t1,t0); |
226 | 95.4k | /* 2^100 - 2^50 */ for (i = 2;i < 50;i += 2) { square(t0,t1); square(t1,t0); } |
227 | 3.81k | /* 2^100 - 2^0 */ mult(z2_100_0,t1,z2_50_0); |
228 | | |
229 | | /* 2^101 - 2^1 */ square(t1,z2_100_0); |
230 | 3.81k | /* 2^102 - 2^2 */ square(t0,t1); |
231 | 190k | /* 2^200 - 2^100 */ for (i = 2;i < 100;i += 2) { square(t1,t0); square(t0,t1); } |
232 | 3.81k | /* 2^200 - 2^0 */ mult(t1,t0,z2_100_0); |
233 | | |
234 | | /* 2^201 - 2^1 */ square(t0,t1); |
235 | 3.81k | /* 2^202 - 2^2 */ square(t1,t0); |
236 | 95.4k | /* 2^250 - 2^50 */ for (i = 2;i < 50;i += 2) { square(t0,t1); square(t1,t0); } |
237 | 3.81k | /* 2^250 - 2^0 */ mult(t0,t1,z2_50_0); |
238 | | |
239 | | /* 2^251 - 2^1 */ square(t1,t0); |
240 | 3.81k | /* 2^252 - 2^2 */ square(t0,t1); |
241 | 3.81k | /* 2^253 - 2^3 */ square(t1,t0); |
242 | 3.81k | /* 2^254 - 2^4 */ square(t0,t1); |
243 | 3.81k | /* 2^255 - 2^5 */ square(t1,t0); |
244 | 3.81k | /* 2^255 - 21 */ mult(out,t1,z11); |
245 | 3.81k | } |
246 | | |
247 | | int crypto_scalarmult_curve25519(unsigned char *q, |
248 | | const unsigned char *n, |
249 | | const unsigned char *p) |
250 | 3.81k | { |
251 | 3.81k | unsigned int work[96]; |
252 | 3.81k | unsigned char e[32]; |
253 | 3.81k | unsigned int i; |
254 | 125k | for (i = 0;i < 32;++i) e[i] = n[i]; |
255 | 3.81k | e[0] &= 248; |
256 | 3.81k | e[31] &= 127; |
257 | 3.81k | e[31] |= 64; |
258 | 125k | for (i = 0;i < 32;++i) work[i] = p[i]; |
259 | 3.81k | mainloop(work,e); |
260 | 3.81k | recip(work + 32,work + 32); |
261 | 3.81k | mult(work + 64,work,work + 32); |
262 | 3.81k | freeze(work + 64); |
263 | 125k | for (i = 0;i < 32;++i) q[i] = work[64 + i]; |
264 | 3.81k | return 0; |
265 | 3.81k | } |