/src/relic/src/fpx/relic_fp16_mul.c
Line | Count | Source |
1 | | /* |
2 | | * RELIC is an Efficient LIbrary for Cryptography |
3 | | * Copyright (c) 2023 RELIC Authors |
4 | | * |
5 | | * This file is part of RELIC. RELIC is legal property of its developers, |
6 | | * whose names are not listed here. Please refer to the COPYRIGHT file |
7 | | * for contact information. |
8 | | * |
9 | | * RELIC is free software; you can redistribute it and/or modify it under the |
10 | | * terms of the version 4.1 (or later) of the GNU Lesser General Public License |
11 | | * as published by the Free Software Foundation; or version 4.0 of the Apache |
12 | | * License as published by the Apache Software Foundation. See the LICENSE files |
13 | | * for more details. |
14 | | * |
15 | | * RELIC is distributed in the hope that it will be useful, but WITHOUT ANY |
16 | | * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR |
17 | | * A PARTICULAR PURPOSE. See the LICENSE files for more details. |
18 | | * |
19 | | * You should have received a copy of the GNU Lesser General Public or the |
20 | | * Apache License along with RELIC. If not, see <https://www.gnu.org/licenses/> |
21 | | * or <https://www.apache.org/licenses/>. |
22 | | */ |
23 | | |
24 | | /** |
25 | | * @file |
26 | | * |
27 | | * Implementation of multiplication in an sextadecic extension of a prime field. |
28 | | * |
29 | | * @ingroup fpx |
30 | | */ |
31 | | |
32 | | #include "relic_core.h" |
33 | | #include "relic_fp_low.h" |
34 | | #include "relic_fpx_low.h" |
35 | | |
36 | | /*============================================================================*/ |
37 | | /* Public definitions */ |
38 | | /*============================================================================*/ |
39 | | |
40 | | #if FPX_RDC == BASIC || !defined(STRIP) |
41 | | |
42 | 0 | void fp16_mul_basic(fp16_t c, const fp16_t a, const fp16_t b) { |
43 | 0 | fp8_t t0, t1, t4; |
44 | |
|
45 | 0 | fp8_null(t0); |
46 | 0 | fp8_null(t1); |
47 | 0 | fp8_null(t4); |
48 | |
|
49 | 0 | RLC_TRY { |
50 | 0 | fp8_new(t0); |
51 | 0 | fp8_new(t1); |
52 | 0 | fp8_new(t4); |
53 | | |
54 | | /* Karatsuba algorithm. */ |
55 | | |
56 | | /* t0 = a_0 * b_0. */ |
57 | 0 | fp8_mul(t0, a[0], b[0]); |
58 | | /* t1 = a_1 * b_1. */ |
59 | 0 | fp8_mul(t1, a[1], b[1]); |
60 | | /* t4 = b_0 + b_1. */ |
61 | 0 | fp8_add(t4, b[0], b[1]); |
62 | | |
63 | | /* c_1 = a_0 + a_1. */ |
64 | 0 | fp8_add(c[1], a[0], a[1]); |
65 | | |
66 | | /* c_1 = (a_0 + a_1) * (b_0 + b_1) */ |
67 | 0 | fp8_mul(c[1], c[1], t4); |
68 | 0 | fp8_sub(c[1], c[1], t0); |
69 | 0 | fp8_sub(c[1], c[1], t1); |
70 | | |
71 | | /* c_0 = a_0b_0 + v * a_1b_1. */ |
72 | 0 | fp8_mul_art(t4, t1); |
73 | 0 | fp8_add(c[0], t0, t4); |
74 | 0 | } RLC_CATCH_ANY { |
75 | 0 | RLC_THROW(ERR_CAUGHT); |
76 | 0 | } RLC_FINALLY { |
77 | 0 | fp8_free(t0); |
78 | 0 | fp8_free(t1); |
79 | 0 | fp8_free(t4); |
80 | 0 | } |
81 | 0 | } |
82 | | |
83 | 0 | void fp16_mul_dxs_basic(fp16_t c, const fp16_t a, const fp16_t b) { |
84 | 0 | fp8_t t0, t1, t4; |
85 | |
|
86 | 0 | fp8_null(t0); |
87 | 0 | fp8_null(t1); |
88 | 0 | fp8_null(t4); |
89 | |
|
90 | 0 | RLC_TRY { |
91 | 0 | fp8_new(t0); |
92 | 0 | fp8_new(t1); |
93 | 0 | fp8_new(t4); |
94 | | |
95 | | /* Karatsuba algorithm. */ |
96 | |
|
97 | 0 | if (fp4_is_zero(b[1][0])) { |
98 | | /* t0 = a_0 * b_0. */ |
99 | 0 | fp8_mul(t0, a[0], b[0]); |
100 | | |
101 | | /* t1 = a_1 * b_1. */ |
102 | 0 | fp4_mul(t1[0], a[1][1], b[1][1]); |
103 | 0 | fp4_add(t1[1], a[1][0], a[1][1]); |
104 | 0 | fp4_mul(t1[1], t1[1], b[1][1]); |
105 | 0 | fp4_sub(t1[1], t1[1], t1[0]); |
106 | 0 | fp4_mul_art(t1[0], t1[0]); |
107 | 0 | } else { |
108 | | #if EP_ADD == BASIC |
109 | | /* t0 = a_0 * b_0. */ |
110 | | for (int i = 0; i < 2; i++) { |
111 | | for (int j = 0; j < 2; j++) { |
112 | | for (int k = 0; k < 2; k++) { |
113 | | fp_mul(t0[i][j][k], a[0][i][j][k], b[0][0][0][0]); |
114 | | } |
115 | | } |
116 | | } |
117 | | #else |
118 | | /* t0 = a_0 * b_0. */ |
119 | 0 | for (int i = 0; i < 2; i++) { |
120 | 0 | fp4_mul(t0[i], a[0][i], b[0][0]); |
121 | 0 | } |
122 | 0 | #endif |
123 | | /* t1 = a_1 * b_1. */ |
124 | 0 | fp8_mul(t1, a[1], b[1]); |
125 | 0 | } |
126 | | /* t4 = b_0 + b_1. */ |
127 | 0 | fp8_add(t4, b[0], b[1]); |
128 | | |
129 | | /* c_1 = a_0 + a_1. */ |
130 | 0 | fp8_add(c[1], a[0], a[1]); |
131 | | |
132 | | /* c_1 = (a_0 + a_1) * (b_0 + b_1) */ |
133 | 0 | fp8_mul(c[1], c[1], t4); |
134 | 0 | fp8_sub(c[1], c[1], t0); |
135 | 0 | fp8_sub(c[1], c[1], t1); |
136 | | |
137 | | /* c_0 = a_0b_0 + v * a_1b_1. */ |
138 | 0 | fp8_mul_art(t4, t1); |
139 | 0 | fp8_add(c[0], t0, t4); |
140 | 0 | } RLC_CATCH_ANY { |
141 | 0 | RLC_THROW(ERR_CAUGHT); |
142 | 0 | } RLC_FINALLY { |
143 | 0 | fp8_free(t0); |
144 | 0 | fp8_free(t1); |
145 | 0 | fp8_free(t4); |
146 | 0 | } |
147 | 0 | } |
148 | | |
149 | | #endif |
150 | | |
151 | | #if PP_EXT == LAZYR || !defined(STRIP) |
152 | | |
153 | 0 | void fp16_mul_unr(dv16_t c, const fp16_t a, const fp16_t b) { |
154 | 0 | fp8_t t0, t1; |
155 | 0 | dv8_t u0, u1, u2, u3; |
156 | |
|
157 | 0 | fp8_null(t0); |
158 | 0 | fp8_null(t1); |
159 | 0 | dv8_null(u0); |
160 | 0 | dv8_null(u1); |
161 | 0 | dv8_null(u2); |
162 | 0 | dv8_null(u3); |
163 | |
|
164 | 0 | RLC_TRY { |
165 | 0 | fp8_new(t0); |
166 | 0 | fp8_new(t1); |
167 | 0 | dv8_new(u0); |
168 | 0 | dv8_new(u1); |
169 | 0 | dv8_new(u2); |
170 | 0 | dv8_new(u3); |
171 | | |
172 | | /* Karatsuba algorithm. */ |
173 | | |
174 | | /* u0 = a_0 * b_0. */ |
175 | 0 | fp8_mul_unr(u0, a[0], b[0]); |
176 | | /* u1 = a_1 * b_1. */ |
177 | 0 | fp8_mul_unr(u1, a[1], b[1]); |
178 | | /* t1 = a_0 + a_1. */ |
179 | 0 | fp8_add(t0, a[0], a[1]); |
180 | | /* t0 = b_0 + b_1. */ |
181 | 0 | fp8_add(t1, b[0], b[1]); |
182 | | /* u2 = (a_0 + a_1) * (b_0 + b_1) */ |
183 | 0 | fp8_mul_unr(u2, t0, t1); |
184 | | /* c_1 = u2 - a_0b_0 - a_1b_1. */ |
185 | 0 | for (int i = 0; i < 2; i++) { |
186 | 0 | for (int j = 0; j < 2; j++) { |
187 | 0 | fp2_subc_low(c[1][i][j], u2[i][j], u0[i][j]); |
188 | 0 | fp2_subc_low(c[1][i][j], c[1][i][j], u1[i][j]); |
189 | 0 | } |
190 | 0 | } |
191 | | /* c_0 = a_0b_0 + v * a_1b_1. */ |
192 | 0 | fp2_nord_low(u2[0][0], u1[1][1]); |
193 | 0 | dv_copy(u2[0][1][0], u1[1][0][0], 2 * RLC_FP_DIGS); |
194 | 0 | dv_copy(u2[0][1][1], u1[1][0][1], 2 * RLC_FP_DIGS); |
195 | 0 | dv_copy(u2[1][0][0], u1[0][0][0], 2 * RLC_FP_DIGS); |
196 | 0 | dv_copy(u2[1][0][1], u1[0][0][1], 2 * RLC_FP_DIGS); |
197 | 0 | dv_copy(u2[1][1][0], u1[0][1][0], 2 * RLC_FP_DIGS); |
198 | 0 | dv_copy(u2[1][1][1], u1[0][1][1], 2 * RLC_FP_DIGS); |
199 | 0 | for (int i = 0; i < 2; i++) { |
200 | 0 | for (int j = 0; j < 2; j++) { |
201 | 0 | fp2_addc_low(c[0][i][j], u0[i][j], u2[i][j]); |
202 | 0 | } |
203 | 0 | } |
204 | 0 | } RLC_CATCH_ANY { |
205 | 0 | RLC_THROW(ERR_CAUGHT); |
206 | 0 | } RLC_FINALLY { |
207 | 0 | fp8_free(t0); |
208 | 0 | fp8_free(t1); |
209 | 0 | dv8_free(u0); |
210 | 0 | dv8_free(u1); |
211 | 0 | dv8_free(u2); |
212 | 0 | dv8_free(u3); |
213 | 0 | } |
214 | 0 | } |
215 | | |
216 | 0 | void fp16_mul_lazyr(fp16_t c, const fp16_t a, const fp16_t b) { |
217 | 0 | dv16_t t; |
218 | |
|
219 | 0 | dv16_null(t); |
220 | |
|
221 | 0 | RLC_TRY { |
222 | 0 | dv16_new(t); |
223 | 0 | fp16_mul_unr(t, a, b); |
224 | 0 | for (int i = 0; i < 2; i++) { |
225 | 0 | for (int j = 0; j < 2; j++) { |
226 | 0 | for (int k = 0; k < 2; k++) { |
227 | 0 | fp2_rdcn_low(c[i][j][k], t[i][j][k]); |
228 | 0 | } |
229 | 0 | } |
230 | 0 | } |
231 | 0 | } RLC_CATCH_ANY { |
232 | 0 | RLC_THROW(ERR_CAUGHT); |
233 | 0 | } RLC_FINALLY { |
234 | 0 | dv16_free(t); |
235 | 0 | } |
236 | 0 | } |
237 | | |
238 | 0 | void fp16_mul_dxs_lazyr(fp16_t c, const fp16_t a, const fp16_t b) { |
239 | 0 | fp8_t t0, t1; |
240 | 0 | dv8_t u0, u1, u2, u3; |
241 | 0 | dv16_t t; |
242 | |
|
243 | 0 | fp8_null(t0); |
244 | 0 | fp8_null(t1); |
245 | 0 | dv8_null(u0); |
246 | 0 | dv8_null(u1); |
247 | 0 | dv8_null(u2); |
248 | 0 | dv8_null(u3); |
249 | 0 | dv16_null(t); |
250 | |
|
251 | 0 | RLC_TRY { |
252 | 0 | fp8_new(t0); |
253 | 0 | fp8_new(t1); |
254 | 0 | dv8_new(u0); |
255 | 0 | dv8_new(u1); |
256 | 0 | dv8_new(u2); |
257 | 0 | dv8_new(u3); |
258 | 0 | dv16_new(t); |
259 | | |
260 | | /* Karatsuba algorithm. */ |
261 | |
|
262 | 0 | if (fp4_is_zero(b[1][0])) { |
263 | | /* u0 = a_0 * b_0. */ |
264 | 0 | fp8_mul_unr(u0, a[0], b[0]); |
265 | | |
266 | | /* u1 = a_1 * b_1. */ |
267 | 0 | fp4_mul_unr(u2[0], a[1][1], b[1][1]); |
268 | 0 | fp4_add(t1[0], a[1][0], a[1][1]); |
269 | 0 | fp4_mul_unr(u2[1], t1[0], b[1][1]); |
270 | 0 | fp2_subc_low(u1[1][0], u2[1][0], u2[0][0]); |
271 | 0 | fp2_subc_low(u1[1][1], u2[1][1], u2[0][1]); |
272 | 0 | fp2_nord_low(u1[0][0], u2[0][1]); |
273 | 0 | dv_copy(u1[0][1][0], u2[0][0][0], 2 * RLC_FP_DIGS); |
274 | 0 | dv_copy(u1[0][1][1], u2[0][0][1], 2 * RLC_FP_DIGS); |
275 | 0 | } else { |
276 | | #if EP_ADD == BASIC |
277 | | /* u0 = a_0 * b_0. */ |
278 | | for (int i = 0; i < 2; i++) { |
279 | | for (int j = 0; j < 2; j++) { |
280 | | for (int k = 0; k < 2; k++) { |
281 | | fp_muln_low(u0[i][j][k], a[0][i][j][k], b[0][0][0][0]); |
282 | | } |
283 | | } |
284 | | } |
285 | | #else |
286 | | /* u0 = a_0 * b_0. */ |
287 | 0 | for (int i = 0; i < 2; i++) { |
288 | 0 | fp4_mul_unr(u0[i], a[0][i], b[0][0]); |
289 | 0 | } |
290 | 0 | #endif |
291 | | /* u1 = a_1 * b_1. */ |
292 | 0 | fp8_mul_unr(u1, a[1], b[1]); |
293 | 0 | } |
294 | | /* t1 = a_0 + a_1. */ |
295 | 0 | fp8_add(t0, a[0], a[1]); |
296 | | /* t0 = b_0 + b_1. */ |
297 | 0 | fp8_add(t1, b[0], b[1]); |
298 | | /* u2 = (a_0 + a_1) * (b_0 + b_1) */ |
299 | 0 | fp8_mul_unr(u2, t0, t1); |
300 | | |
301 | | /* c_1 = u2 - a_0b_0 - a_1b_1. */ |
302 | 0 | for (int i = 0; i < 2; i++) { |
303 | 0 | for (int j = 0; j < 2; j++) { |
304 | 0 | fp2_subc_low(t[1][i][j], u2[i][j], u0[i][j]); |
305 | 0 | fp2_subc_low(t[1][i][j], t[1][i][j], u1[i][j]); |
306 | 0 | } |
307 | 0 | } |
308 | | /* c_0 = a_0b_0 + v * a_1b_1. */ |
309 | 0 | fp2_nord_low(u2[0][0], u1[1][1]); |
310 | 0 | dv_copy(u2[0][1][0], u1[1][0][0], 2 * RLC_FP_DIGS); |
311 | 0 | dv_copy(u2[0][1][1], u1[1][0][1], 2 * RLC_FP_DIGS); |
312 | 0 | dv_copy(u2[1][0][0], u1[0][0][0], 2 * RLC_FP_DIGS); |
313 | 0 | dv_copy(u2[1][0][1], u1[0][0][1], 2 * RLC_FP_DIGS); |
314 | 0 | dv_copy(u2[1][1][0], u1[0][1][0], 2 * RLC_FP_DIGS); |
315 | 0 | dv_copy(u2[1][1][1], u1[0][1][1], 2 * RLC_FP_DIGS); |
316 | 0 | for (int i = 0; i < 2; i++) { |
317 | 0 | for (int j = 0; j < 2; j++) { |
318 | 0 | fp2_addc_low(t[0][i][j], u0[i][j], u2[i][j]); |
319 | 0 | } |
320 | 0 | } |
321 | 0 | for (int i = 0; i < 2; i++) { |
322 | 0 | for (int j = 0; j < 2; j++) { |
323 | 0 | for (int k = 0; k < 2; k++) { |
324 | 0 | fp2_rdcn_low(c[i][j][k], t[i][j][k]); |
325 | 0 | } |
326 | 0 | } |
327 | 0 | } |
328 | 0 | } RLC_CATCH_ANY { |
329 | 0 | RLC_THROW(ERR_CAUGHT); |
330 | 0 | } RLC_FINALLY { |
331 | 0 | fp8_free(t0); |
332 | 0 | fp8_free(t1); |
333 | 0 | dv8_free(u0); |
334 | 0 | dv8_free(u1); |
335 | 0 | dv8_free(u2); |
336 | 0 | dv8_free(u3); |
337 | 0 | dv16_free(t); |
338 | 0 | } |
339 | 0 | } |
340 | | |
341 | | #endif |
342 | | |
343 | 0 | void fp16_mul_art(fp16_t c, const fp16_t a) { |
344 | 0 | fp8_t t0; |
345 | |
|
346 | 0 | fp8_null(t0); |
347 | |
|
348 | 0 | RLC_TRY { |
349 | 0 | fp8_new(t0); |
350 | | |
351 | | /* (a_0 + a_1 * v) * v = a_0 * v + a_1 * v^4 */ |
352 | 0 | fp8_copy(t0, a[0]); |
353 | 0 | fp8_mul_art(c[0], a[1]); |
354 | 0 | fp8_copy(c[1], t0); |
355 | 0 | } RLC_CATCH_ANY { |
356 | 0 | RLC_THROW(ERR_CAUGHT); |
357 | 0 | } RLC_FINALLY { |
358 | 0 | fp8_free(t0); |
359 | 0 | } |
360 | 0 | } |