Coverage Report

Created: 2026-04-01 07:07

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/relic/src/fpx/relic_fp16_mul.c
Line
Count
Source
1
/*
2
 * RELIC is an Efficient LIbrary for Cryptography
3
 * Copyright (c) 2023 RELIC Authors
4
 *
5
 * This file is part of RELIC. RELIC is legal property of its developers,
6
 * whose names are not listed here. Please refer to the COPYRIGHT file
7
 * for contact information.
8
 *
9
 * RELIC is free software; you can redistribute it and/or modify it under the
10
 * terms of the version 4.1 (or later) of the GNU Lesser General Public License
11
 * as published by the Free Software Foundation; or version 4.0 of the Apache
12
 * License as published by the Apache Software Foundation. See the LICENSE files
13
 * for more details.
14
 *
15
 * RELIC is distributed in the hope that it will be useful, but WITHOUT ANY
16
 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
17
 * A PARTICULAR PURPOSE. See the LICENSE files for more details.
18
 *
19
 * You should have received a copy of the GNU Lesser General Public or the
20
 * Apache License along with RELIC. If not, see <https://www.gnu.org/licenses/>
21
 * or <https://www.apache.org/licenses/>.
22
 */
23
24
/**
25
 * @file
26
 *
27
 * Implementation of multiplication in an sextadecic extension of a prime field.
28
 *
29
 * @ingroup fpx
30
 */
31
32
#include "relic_core.h"
33
#include "relic_fp_low.h"
34
#include "relic_fpx_low.h"
35
36
/*============================================================================*/
37
/* Public definitions                                                         */
38
/*============================================================================*/
39
40
#if FPX_RDC == BASIC || !defined(STRIP)
41
42
0
void fp16_mul_basic(fp16_t c, const fp16_t a, const fp16_t b) {
43
0
  fp8_t t0, t1, t4;
44
45
0
  fp8_null(t0);
46
0
  fp8_null(t1);
47
0
  fp8_null(t4);
48
49
0
  RLC_TRY {
50
0
    fp8_new(t0);
51
0
    fp8_new(t1);
52
0
    fp8_new(t4);
53
54
    /* Karatsuba algorithm. */
55
56
    /* t0 = a_0 * b_0. */
57
0
    fp8_mul(t0, a[0], b[0]);
58
    /* t1 = a_1 * b_1. */
59
0
    fp8_mul(t1, a[1], b[1]);
60
    /* t4 = b_0 + b_1. */
61
0
    fp8_add(t4, b[0], b[1]);
62
63
    /* c_1 = a_0 + a_1. */
64
0
    fp8_add(c[1], a[0], a[1]);
65
66
    /* c_1 = (a_0 + a_1) * (b_0 + b_1) */
67
0
    fp8_mul(c[1], c[1], t4);
68
0
    fp8_sub(c[1], c[1], t0);
69
0
    fp8_sub(c[1], c[1], t1);
70
71
    /* c_0 = a_0b_0 + v * a_1b_1. */
72
0
    fp8_mul_art(t4, t1);
73
0
    fp8_add(c[0], t0, t4);
74
0
  } RLC_CATCH_ANY {
75
0
    RLC_THROW(ERR_CAUGHT);
76
0
  } RLC_FINALLY {
77
0
    fp8_free(t0);
78
0
    fp8_free(t1);
79
0
    fp8_free(t4);
80
0
  }
81
0
}
82
83
0
void fp16_mul_dxs_basic(fp16_t c, const fp16_t a, const fp16_t b) {
84
0
  fp8_t t0, t1, t4;
85
86
0
  fp8_null(t0);
87
0
  fp8_null(t1);
88
0
  fp8_null(t4);
89
90
0
  RLC_TRY {
91
0
    fp8_new(t0);
92
0
    fp8_new(t1);
93
0
    fp8_new(t4);
94
95
    /* Karatsuba algorithm. */
96
97
0
    if (fp4_is_zero(b[1][0])) {
98
      /* t0 = a_0 * b_0. */
99
0
      fp8_mul(t0, a[0], b[0]);
100
101
      /* t1 = a_1 * b_1. */
102
0
      fp4_mul(t1[0], a[1][1], b[1][1]);
103
0
      fp4_add(t1[1], a[1][0], a[1][1]);
104
0
      fp4_mul(t1[1], t1[1], b[1][1]);
105
0
      fp4_sub(t1[1], t1[1], t1[0]);
106
0
      fp4_mul_art(t1[0], t1[0]);
107
0
    } else {
108
#if EP_ADD == BASIC
109
      /* t0 = a_0 * b_0. */
110
      for (int i = 0; i < 2; i++) {
111
        for (int j = 0; j < 2; j++) {
112
          for (int k = 0; k < 2; k++) {
113
            fp_mul(t0[i][j][k], a[0][i][j][k], b[0][0][0][0]);
114
          }
115
        }
116
      }
117
#else
118
      /* t0 = a_0 * b_0. */
119
0
      for (int i = 0; i < 2; i++) {
120
0
        fp4_mul(t0[i], a[0][i], b[0][0]);
121
0
      }
122
0
#endif
123
      /* t1 = a_1 * b_1. */
124
0
      fp8_mul(t1, a[1], b[1]);
125
0
    }
126
    /* t4 = b_0 + b_1. */
127
0
    fp8_add(t4, b[0], b[1]);
128
129
    /* c_1 = a_0 + a_1. */
130
0
    fp8_add(c[1], a[0], a[1]);
131
132
    /* c_1 = (a_0 + a_1) * (b_0 + b_1) */
133
0
    fp8_mul(c[1], c[1], t4);
134
0
    fp8_sub(c[1], c[1], t0);
135
0
    fp8_sub(c[1], c[1], t1);
136
137
    /* c_0 = a_0b_0 + v * a_1b_1. */
138
0
    fp8_mul_art(t4, t1);
139
0
    fp8_add(c[0], t0, t4);
140
0
  } RLC_CATCH_ANY {
141
0
    RLC_THROW(ERR_CAUGHT);
142
0
  } RLC_FINALLY {
143
0
    fp8_free(t0);
144
0
    fp8_free(t1);
145
0
    fp8_free(t4);
146
0
  }
147
0
}
148
149
#endif
150
151
#if PP_EXT == LAZYR || !defined(STRIP)
152
153
0
void fp16_mul_unr(dv16_t c, const fp16_t a, const fp16_t b) {
154
0
  fp8_t t0, t1;
155
0
  dv8_t u0, u1, u2, u3;
156
157
0
  fp8_null(t0);
158
0
  fp8_null(t1);
159
0
  dv8_null(u0);
160
0
  dv8_null(u1);
161
0
  dv8_null(u2);
162
0
  dv8_null(u3);
163
164
0
  RLC_TRY {
165
0
    fp8_new(t0);
166
0
    fp8_new(t1);
167
0
    dv8_new(u0);
168
0
    dv8_new(u1);
169
0
    dv8_new(u2);
170
0
    dv8_new(u3);
171
172
    /* Karatsuba algorithm. */
173
174
    /* u0 = a_0 * b_0. */
175
0
    fp8_mul_unr(u0, a[0], b[0]);
176
    /* u1 = a_1 * b_1. */
177
0
    fp8_mul_unr(u1, a[1], b[1]);
178
    /* t1 = a_0 + a_1. */
179
0
    fp8_add(t0, a[0], a[1]);
180
    /* t0 = b_0 + b_1. */
181
0
    fp8_add(t1, b[0], b[1]);
182
    /* u2 = (a_0 + a_1) * (b_0 + b_1) */
183
0
    fp8_mul_unr(u2, t0, t1);
184
    /* c_1 = u2 - a_0b_0 - a_1b_1. */
185
0
    for (int i = 0; i < 2; i++) {
186
0
      for (int j = 0; j < 2; j++) {
187
0
        fp2_subc_low(c[1][i][j], u2[i][j], u0[i][j]);
188
0
        fp2_subc_low(c[1][i][j], c[1][i][j], u1[i][j]);
189
0
      }
190
0
    }
191
    /* c_0 = a_0b_0 + v * a_1b_1. */
192
0
    fp2_nord_low(u2[0][0], u1[1][1]);
193
0
    dv_copy(u2[0][1][0], u1[1][0][0], 2 * RLC_FP_DIGS);
194
0
    dv_copy(u2[0][1][1], u1[1][0][1], 2 * RLC_FP_DIGS);
195
0
    dv_copy(u2[1][0][0], u1[0][0][0], 2 * RLC_FP_DIGS);
196
0
    dv_copy(u2[1][0][1], u1[0][0][1], 2 * RLC_FP_DIGS);
197
0
    dv_copy(u2[1][1][0], u1[0][1][0], 2 * RLC_FP_DIGS);
198
0
    dv_copy(u2[1][1][1], u1[0][1][1], 2 * RLC_FP_DIGS);
199
0
    for (int i = 0; i < 2; i++) {
200
0
      for (int j = 0; j < 2; j++) {
201
0
        fp2_addc_low(c[0][i][j], u0[i][j], u2[i][j]);
202
0
      }
203
0
    }
204
0
  } RLC_CATCH_ANY {
205
0
    RLC_THROW(ERR_CAUGHT);
206
0
  } RLC_FINALLY {
207
0
    fp8_free(t0);
208
0
    fp8_free(t1);
209
0
    dv8_free(u0);
210
0
    dv8_free(u1);
211
0
    dv8_free(u2);
212
0
    dv8_free(u3);
213
0
  }
214
0
}
215
216
0
void fp16_mul_lazyr(fp16_t c, const fp16_t a, const fp16_t b) {
217
0
  dv16_t t;
218
219
0
  dv16_null(t);
220
221
0
  RLC_TRY {
222
0
    dv16_new(t);
223
0
    fp16_mul_unr(t, a, b);
224
0
    for (int i = 0; i < 2; i++) {
225
0
      for (int j = 0; j < 2; j++) {
226
0
        for (int k = 0; k < 2; k++) {
227
0
          fp2_rdcn_low(c[i][j][k], t[i][j][k]);
228
0
        }
229
0
      }
230
0
    }
231
0
  } RLC_CATCH_ANY {
232
0
    RLC_THROW(ERR_CAUGHT);
233
0
  } RLC_FINALLY {
234
0
    dv16_free(t);
235
0
  }
236
0
}
237
238
0
void fp16_mul_dxs_lazyr(fp16_t c, const fp16_t a, const fp16_t b) {
239
0
  fp8_t t0, t1;
240
0
  dv8_t u0, u1, u2, u3;
241
0
  dv16_t t;
242
243
0
  fp8_null(t0);
244
0
  fp8_null(t1);
245
0
  dv8_null(u0);
246
0
  dv8_null(u1);
247
0
  dv8_null(u2);
248
0
  dv8_null(u3);
249
0
  dv16_null(t);
250
251
0
  RLC_TRY {
252
0
    fp8_new(t0);
253
0
    fp8_new(t1);
254
0
    dv8_new(u0);
255
0
    dv8_new(u1);
256
0
    dv8_new(u2);
257
0
    dv8_new(u3);
258
0
    dv16_new(t);
259
260
    /* Karatsuba algorithm. */
261
262
0
    if (fp4_is_zero(b[1][0])) {
263
      /* u0 = a_0 * b_0. */
264
0
      fp8_mul_unr(u0, a[0], b[0]);
265
266
      /* u1 = a_1 * b_1. */
267
0
      fp4_mul_unr(u2[0], a[1][1], b[1][1]);
268
0
      fp4_add(t1[0], a[1][0], a[1][1]);
269
0
      fp4_mul_unr(u2[1], t1[0], b[1][1]);
270
0
      fp2_subc_low(u1[1][0], u2[1][0], u2[0][0]);
271
0
      fp2_subc_low(u1[1][1], u2[1][1], u2[0][1]);
272
0
      fp2_nord_low(u1[0][0], u2[0][1]);
273
0
      dv_copy(u1[0][1][0], u2[0][0][0], 2 * RLC_FP_DIGS);
274
0
      dv_copy(u1[0][1][1], u2[0][0][1], 2 * RLC_FP_DIGS);
275
0
    } else {
276
#if EP_ADD == BASIC
277
      /* u0 = a_0 * b_0. */
278
      for (int i = 0; i < 2; i++) {
279
        for (int j = 0; j < 2; j++) {
280
          for (int k = 0; k < 2; k++) {
281
            fp_muln_low(u0[i][j][k], a[0][i][j][k], b[0][0][0][0]);
282
          }
283
        }
284
      }
285
#else
286
      /* u0 = a_0 * b_0. */
287
0
      for (int i = 0; i < 2; i++) {
288
0
        fp4_mul_unr(u0[i], a[0][i], b[0][0]);
289
0
      }
290
0
#endif
291
      /* u1 = a_1 * b_1. */
292
0
      fp8_mul_unr(u1, a[1], b[1]);
293
0
    }
294
    /* t1 = a_0 + a_1. */
295
0
    fp8_add(t0, a[0], a[1]);
296
    /* t0 = b_0 + b_1. */
297
0
    fp8_add(t1, b[0], b[1]);
298
    /* u2 = (a_0 + a_1) * (b_0 + b_1) */
299
0
    fp8_mul_unr(u2, t0, t1);
300
301
    /* c_1 = u2 - a_0b_0 - a_1b_1. */
302
0
    for (int i = 0; i < 2; i++) {
303
0
      for (int j = 0; j < 2; j++) {
304
0
        fp2_subc_low(t[1][i][j], u2[i][j], u0[i][j]);
305
0
        fp2_subc_low(t[1][i][j], t[1][i][j], u1[i][j]);
306
0
      }
307
0
    }
308
    /* c_0 = a_0b_0 + v * a_1b_1. */
309
0
    fp2_nord_low(u2[0][0], u1[1][1]);
310
0
    dv_copy(u2[0][1][0], u1[1][0][0], 2 * RLC_FP_DIGS);
311
0
    dv_copy(u2[0][1][1], u1[1][0][1], 2 * RLC_FP_DIGS);
312
0
    dv_copy(u2[1][0][0], u1[0][0][0], 2 * RLC_FP_DIGS);
313
0
    dv_copy(u2[1][0][1], u1[0][0][1], 2 * RLC_FP_DIGS);
314
0
    dv_copy(u2[1][1][0], u1[0][1][0], 2 * RLC_FP_DIGS);
315
0
    dv_copy(u2[1][1][1], u1[0][1][1], 2 * RLC_FP_DIGS);
316
0
    for (int i = 0; i < 2; i++) {
317
0
      for (int j = 0; j < 2; j++) {
318
0
        fp2_addc_low(t[0][i][j], u0[i][j], u2[i][j]);
319
0
      }
320
0
    }
321
0
    for (int i = 0; i < 2; i++) {
322
0
      for (int j = 0; j < 2; j++) {
323
0
        for (int k = 0; k < 2; k++) {
324
0
          fp2_rdcn_low(c[i][j][k], t[i][j][k]);
325
0
        }
326
0
      }
327
0
    }
328
0
  } RLC_CATCH_ANY {
329
0
    RLC_THROW(ERR_CAUGHT);
330
0
  } RLC_FINALLY {
331
0
    fp8_free(t0);
332
0
    fp8_free(t1);
333
0
    dv8_free(u0);
334
0
    dv8_free(u1);
335
0
    dv8_free(u2);
336
0
    dv8_free(u3);
337
0
    dv16_free(t);
338
0
  }
339
0
}
340
341
#endif
342
343
0
void fp16_mul_art(fp16_t c, const fp16_t a) {
344
0
  fp8_t t0;
345
346
0
  fp8_null(t0);
347
348
0
  RLC_TRY {
349
0
    fp8_new(t0);
350
351
    /* (a_0 + a_1 * v) * v = a_0 * v + a_1 * v^4 */
352
0
    fp8_copy(t0, a[0]);
353
0
    fp8_mul_art(c[0], a[1]);
354
0
    fp8_copy(c[1], t0);
355
0
  } RLC_CATCH_ANY {
356
0
    RLC_THROW(ERR_CAUGHT);
357
0
  } RLC_FINALLY {
358
0
    fp8_free(t0);
359
0
  }
360
0
}