/src/relic/src/fpx/relic_fp16_mul.c

Source
/*
 * RELIC is an Efficient LIbrary for Cryptography
 * Copyright (c) 2023 RELIC Authors
 *
 * This file is part of RELIC. RELIC is legal property of its developers,
 * whose names are not listed here. Please refer to the COPYRIGHT file
 * for contact information.
 *
 * RELIC is free software; you can redistribute it and/or modify it under the
 * terms of the version 4.1 (or later) of the GNU Lesser General Public License
 * as published by the Free Software Foundation; or version 4.0 of the Apache
 * License as published by the Apache Software Foundation. See the LICENSE files
 * for more details.
 *
 * RELIC is distributed in the hope that it will be useful, but WITHOUT ANY
 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
 * A PARTICULAR PURPOSE. See the LICENSE files for more details.
 *
 * You should have received a copy of the GNU Lesser General Public or the
 * Apache License along with RELIC. If not, see <https://www.gnu.org/licenses/>
 * or <https://www.apache.org/licenses/>.
 */

/**
 * @file
 *
 * Implementation of multiplication in an sextadecic extension of a prime field.
 *
 * @ingroup fpx
 */

#include "relic_core.h"
#include "relic_fp_low.h"
#include "relic_fpx_low.h"

/*============================================================================*/
/* Public definitions                                                         */
/*============================================================================*/

#if FPX_RDC == BASIC || !defined(STRIP)

void fp16_mul_basic(fp16_t c, const fp16_t a, const fp16_t b) {
  fp8_t t0, t1, t4;

  fp8_null(t0);
  fp8_null(t1);
  fp8_null(t4);

  RLC_TRY {
    fp8_new(t0);
    fp8_new(t1);
    fp8_new(t4);

    /* Karatsuba algorithm. */

    /* t0 = a_0 * b_0. */
    fp8_mul(t0, a[0], b[0]);
    /* t1 = a_1 * b_1. */
    fp8_mul(t1, a[1], b[1]);
    /* t4 = b_0 + b_1. */
    fp8_add(t4, b[0], b[1]);

    /* c_1 = a_0 + a_1. */
    fp8_add(c[1], a[0], a[1]);

    /* c_1 = (a_0 + a_1) * (b_0 + b_1) */
    fp8_mul(c[1], c[1], t4);
    fp8_sub(c[1], c[1], t0);
    fp8_sub(c[1], c[1], t1);

    /* c_0 = a_0b_0 + v * a_1b_1. */
    fp8_mul_art(t4, t1);
    fp8_add(c[0], t0, t4);
  } RLC_CATCH_ANY {
    RLC_THROW(ERR_CAUGHT);
  } RLC_FINALLY {
    fp8_free(t0);
    fp8_free(t1);
    fp8_free(t4);
  }
}

void fp16_mul_dxs_basic(fp16_t c, const fp16_t a, const fp16_t b) {
  fp8_t t0, t1, t4;

  fp8_null(t0);
  fp8_null(t1);
  fp8_null(t4);

  RLC_TRY {
    fp8_new(t0);
    fp8_new(t1);
    fp8_new(t4);

    /* Karatsuba algorithm. */

    if (fp4_is_zero(b[1][0])) {
      /* t0 = a_0 * b_0. */
      fp8_mul(t0, a[0], b[0]);

      /* t1 = a_1 * b_1. */
      fp4_mul(t1[0], a[1][1], b[1][1]);
      fp4_add(t1[1], a[1][0], a[1][1]);
      fp4_mul(t1[1], t1[1], b[1][1]);
      fp4_sub(t1[1], t1[1], t1[0]);
      fp4_mul_art(t1[0], t1[0]);
    } else {
#if EP_ADD == BASIC
      /* t0 = a_0 * b_0. */
      for (int i = 0; i < 2; i++) {
        for (int j = 0; j < 2; j++) {
          for (int k = 0; k < 2; k++) {
            fp_mul(t0[i][j][k], a[0][i][j][k], b[0][0][0][0]);
          }
        }
      }
#else
      /* t0 = a_0 * b_0. */
      for (int i = 0; i < 2; i++) {
        fp4_mul(t0[i], a[0][i], b[0][0]);
      }
#endif
      /* t1 = a_1 * b_1. */
      fp8_mul(t1, a[1], b[1]);
    }
    /* t4 = b_0 + b_1. */
    fp8_add(t4, b[0], b[1]);

    /* c_1 = a_0 + a_1. */
    fp8_add(c[1], a[0], a[1]);

    /* c_1 = (a_0 + a_1) * (b_0 + b_1) */
    fp8_mul(c[1], c[1], t4);
    fp8_sub(c[1], c[1], t0);
    fp8_sub(c[1], c[1], t1);

    /* c_0 = a_0b_0 + v * a_1b_1. */
    fp8_mul_art(t4, t1);
    fp8_add(c[0], t0, t4);
  } RLC_CATCH_ANY {
    RLC_THROW(ERR_CAUGHT);
  } RLC_FINALLY {
    fp8_free(t0);
    fp8_free(t1);
    fp8_free(t4);
  }
}

#endif

#if PP_EXT == LAZYR || !defined(STRIP)

void fp16_mul_unr(dv16_t c, const fp16_t a, const fp16_t b) {
  fp8_t t0, t1;
  dv8_t u0, u1, u2, u3;

  fp8_null(t0);
  fp8_null(t1);
  dv8_null(u0);
  dv8_null(u1);
  dv8_null(u2);
  dv8_null(u3);

  RLC_TRY {
    fp8_new(t0);
    fp8_new(t1);
    dv8_new(u0);
    dv8_new(u1);
    dv8_new(u2);
    dv8_new(u3);

    /* Karatsuba algorithm. */

    /* u0 = a_0 * b_0. */
    fp8_mul_unr(u0, a[0], b[0]);
    /* u1 = a_1 * b_1. */
    fp8_mul_unr(u1, a[1], b[1]);
    /* t1 = a_0 + a_1. */
    fp8_add(t0, a[0], a[1]);
    /* t0 = b_0 + b_1. */
    fp8_add(t1, b[0], b[1]);
    /* u2 = (a_0 + a_1) * (b_0 + b_1) */
    fp8_mul_unr(u2, t0, t1);
    /* c_1 = u2 - a_0b_0 - a_1b_1. */
    for (int i = 0; i < 2; i++) {
      for (int j = 0; j < 2; j++) {
        fp2_subc_low(c[1][i][j], u2[i][j], u0[i][j]);
        fp2_subc_low(c[1][i][j], c[1][i][j], u1[i][j]);
      }
    }
    /* c_0 = a_0b_0 + v * a_1b_1. */
    fp2_nord_low(u2[0][0], u1[1][1]);
    dv_copy(u2[0][1][0], u1[1][0][0], 2 * RLC_FP_DIGS);
    dv_copy(u2[0][1][1], u1[1][0][1], 2 * RLC_FP_DIGS);
    dv_copy(u2[1][0][0], u1[0][0][0], 2 * RLC_FP_DIGS);
    dv_copy(u2[1][0][1], u1[0][0][1], 2 * RLC_FP_DIGS);
    dv_copy(u2[1][1][0], u1[0][1][0], 2 * RLC_FP_DIGS);
    dv_copy(u2[1][1][1], u1[0][1][1], 2 * RLC_FP_DIGS);
    for (int i = 0; i < 2; i++) {
      for (int j = 0; j < 2; j++) {
        fp2_addc_low(c[0][i][j], u0[i][j], u2[i][j]);
      }
    }
  } RLC_CATCH_ANY {
    RLC_THROW(ERR_CAUGHT);
  } RLC_FINALLY {
    fp8_free(t0);
    fp8_free(t1);
    dv8_free(u0);
    dv8_free(u1);
    dv8_free(u2);
    dv8_free(u3);
  }
}

void fp16_mul_lazyr(fp16_t c, const fp16_t a, const fp16_t b) {
  dv16_t t;

  dv16_null(t);

  RLC_TRY {
    dv16_new(t);
    fp16_mul_unr(t, a, b);
    for (int i = 0; i < 2; i++) {
      for (int j = 0; j < 2; j++) {
        for (int k = 0; k < 2; k++) {
          fp2_rdcn_low(c[i][j][k], t[i][j][k]);
        }
      }
    }
  } RLC_CATCH_ANY {
    RLC_THROW(ERR_CAUGHT);
  } RLC_FINALLY {
    dv16_free(t);
  }
}

void fp16_mul_dxs_lazyr(fp16_t c, const fp16_t a, const fp16_t b) {
  fp8_t t0, t1;
  dv8_t u0, u1, u2, u3;
  dv16_t t;

  fp8_null(t0);
  fp8_null(t1);
  dv8_null(u0);
  dv8_null(u1);
  dv8_null(u2);
  dv8_null(u3);
  dv16_null(t);

  RLC_TRY {
    fp8_new(t0);
    fp8_new(t1);
    dv8_new(u0);
    dv8_new(u1);
    dv8_new(u2);
    dv8_new(u3);
    dv16_new(t);

    /* Karatsuba algorithm. */

    if (fp4_is_zero(b[1][0])) {
      /* u0 = a_0 * b_0. */
      fp8_mul_unr(u0, a[0], b[0]);

      /* u1 = a_1 * b_1. */
      fp4_mul_unr(u2[0], a[1][1], b[1][1]);
      fp4_add(t1[0], a[1][0], a[1][1]);
      fp4_mul_unr(u2[1], t1[0], b[1][1]);
      fp2_subc_low(u1[1][0], u2[1][0], u2[0][0]);
      fp2_subc_low(u1[1][1], u2[1][1], u2[0][1]);
      fp2_nord_low(u1[0][0], u2[0][1]);
      dv_copy(u1[0][1][0], u2[0][0][0], 2 * RLC_FP_DIGS);
      dv_copy(u1[0][1][1], u2[0][0][1], 2 * RLC_FP_DIGS);
    } else {
#if EP_ADD == BASIC
      /* u0 = a_0 * b_0. */
      for (int i = 0; i < 2; i++) {
        for (int j = 0; j < 2; j++) {
          for (int k = 0; k < 2; k++) {
            fp_muln_low(u0[i][j][k], a[0][i][j][k], b[0][0][0][0]);
          }
        }
      }
#else
      /* u0 = a_0 * b_0. */
      for (int i = 0; i < 2; i++) {
        fp4_mul_unr(u0[i], a[0][i], b[0][0]);
      }
#endif
      /* u1 = a_1 * b_1. */
      fp8_mul_unr(u1, a[1], b[1]);
    }
    /* t1 = a_0 + a_1. */
    fp8_add(t0, a[0], a[1]);
    /* t0 = b_0 + b_1. */
    fp8_add(t1, b[0], b[1]);
    /* u2 = (a_0 + a_1) * (b_0 + b_1) */
    fp8_mul_unr(u2, t0, t1);

    /* c_1 = u2 - a_0b_0 - a_1b_1. */
    for (int i = 0; i < 2; i++) {
      for (int j = 0; j < 2; j++) {
        fp2_subc_low(t[1][i][j], u2[i][j], u0[i][j]);
        fp2_subc_low(t[1][i][j], t[1][i][j], u1[i][j]);
      }
    }
    /* c_0 = a_0b_0 + v * a_1b_1. */
    fp2_nord_low(u2[0][0], u1[1][1]);
    dv_copy(u2[0][1][0], u1[1][0][0], 2 * RLC_FP_DIGS);
    dv_copy(u2[0][1][1], u1[1][0][1], 2 * RLC_FP_DIGS);
    dv_copy(u2[1][0][0], u1[0][0][0], 2 * RLC_FP_DIGS);
    dv_copy(u2[1][0][1], u1[0][0][1], 2 * RLC_FP_DIGS);
    dv_copy(u2[1][1][0], u1[0][1][0], 2 * RLC_FP_DIGS);
    dv_copy(u2[1][1][1], u1[0][1][1], 2 * RLC_FP_DIGS);
    for (int i = 0; i < 2; i++) {
      for (int j = 0; j < 2; j++) {
        fp2_addc_low(t[0][i][j], u0[i][j], u2[i][j]);
      }
    }
    for (int i = 0; i < 2; i++) {
      for (int j = 0; j < 2; j++) {
        for (int k = 0; k < 2; k++) {
          fp2_rdcn_low(c[i][j][k], t[i][j][k]);
        }
      }
    }
  } RLC_CATCH_ANY {
    RLC_THROW(ERR_CAUGHT);
  } RLC_FINALLY {
    fp8_free(t0);
    fp8_free(t1);
    dv8_free(u0);
    dv8_free(u1);
    dv8_free(u2);
    dv8_free(u3);
    dv16_free(t);
  }
}

#endif

void fp16_mul_art(fp16_t c, const fp16_t a) {
  fp8_t t0;

  fp8_null(t0);

  RLC_TRY {
    fp8_new(t0);

    /* (a_0 + a_1 * v) * v = a_0 * v + a_1 * v^4 */
    fp8_copy(t0, a[0]);
    fp8_mul_art(c[0], a[1]);
    fp8_copy(c[1], t0);
  } RLC_CATCH_ANY {
    RLC_THROW(ERR_CAUGHT);
  } RLC_FINALLY {
    fp8_free(t0);
  }
}

Coverage Report

Created: 2026-04-01 07:07

Line	Count	Source
1		/*
2		* RELIC is an Efficient LIbrary for Cryptography
3		* Copyright (c) 2023 RELIC Authors
4		*
5		* This file is part of RELIC. RELIC is legal property of its developers,
6		* whose names are not listed here. Please refer to the COPYRIGHT file
7		* for contact information.
8		*
9		* RELIC is free software; you can redistribute it and/or modify it under the
10		* terms of the version 4.1 (or later) of the GNU Lesser General Public License
11		* as published by the Free Software Foundation; or version 4.0 of the Apache
12		* License as published by the Apache Software Foundation. See the LICENSE files
13		* for more details.
14		*
15		* RELIC is distributed in the hope that it will be useful, but WITHOUT ANY
16		* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
17		* A PARTICULAR PURPOSE. See the LICENSE files for more details.
18		*
19		* You should have received a copy of the GNU Lesser General Public or the
20		* Apache License along with RELIC. If not, see <https://www.gnu.org/licenses/>
21		* or <https://www.apache.org/licenses/>.
22		*/
23
24		/**
25		* @file
26		*
27		* Implementation of multiplication in an sextadecic extension of a prime field.
28		*
29		* @ingroup fpx
30		*/
31
32		#include "relic_core.h"
33		#include "relic_fp_low.h"
34		#include "relic_fpx_low.h"
35
36		/============================================================================/
37		/* Public definitions */
38		/============================================================================/
39
40		#if FPX_RDC == BASIC \|\| !defined(STRIP)
41
42	0	void fp16_mul_basic(fp16_t c, const fp16_t a, const fp16_t b) {
43	0	fp8_t t0, t1, t4;
44
45	0	fp8_null(t0);
46	0	fp8_null(t1);
47	0	fp8_null(t4);
48
49	0	RLC_TRY {
50	0	fp8_new(t0);
51	0	fp8_new(t1);
52	0	fp8_new(t4);
53
54		/* Karatsuba algorithm. */
55
56		/* t0 = a_0 * b_0. */
57	0	fp8_mul(t0, a[0], b[0]);
58		/* t1 = a_1 * b_1. */
59	0	fp8_mul(t1, a[1], b[1]);
60		/* t4 = b_0 + b_1. */
61	0	fp8_add(t4, b[0], b[1]);
62
63		/* c_1 = a_0 + a_1. */
64	0	fp8_add(c[1], a[0], a[1]);
65
66		/* c_1 = (a_0 + a_1) * (b_0 + b_1) */
67	0	fp8_mul(c[1], c[1], t4);
68	0	fp8_sub(c[1], c[1], t0);
69	0	fp8_sub(c[1], c[1], t1);
70
71		/* c_0 = a_0b_0 + v * a_1b_1. */
72	0	fp8_mul_art(t4, t1);
73	0	fp8_add(c[0], t0, t4);
74	0	} RLC_CATCH_ANY {
75	0	RLC_THROW(ERR_CAUGHT);
76	0	} RLC_FINALLY {
77	0	fp8_free(t0);
78	0	fp8_free(t1);
79	0	fp8_free(t4);
80	0	}
81	0	}
82
83	0	void fp16_mul_dxs_basic(fp16_t c, const fp16_t a, const fp16_t b) {
84	0	fp8_t t0, t1, t4;
85
86	0	fp8_null(t0);
87	0	fp8_null(t1);
88	0	fp8_null(t4);
89
90	0	RLC_TRY {
91	0	fp8_new(t0);
92	0	fp8_new(t1);
93	0	fp8_new(t4);
94
95		/* Karatsuba algorithm. */
96
97	0	if (fp4_is_zero(b[1][0])) {
98		/* t0 = a_0 * b_0. */
99	0	fp8_mul(t0, a[0], b[0]);
100
101		/* t1 = a_1 * b_1. */
102	0	fp4_mul(t1[0], a[1][1], b[1][1]);
103	0	fp4_add(t1[1], a[1][0], a[1][1]);
104	0	fp4_mul(t1[1], t1[1], b[1][1]);
105	0	fp4_sub(t1[1], t1[1], t1[0]);
106	0	fp4_mul_art(t1[0], t1[0]);
107	0	} else {
108		#if EP_ADD == BASIC
109		/* t0 = a_0 * b_0. */
110		for (int i = 0; i < 2; i++) {
111		for (int j = 0; j < 2; j++) {
112		for (int k = 0; k < 2; k++) {
113		fp_mul(t0[i][j][k], a[0][i][j][k], b[0][0][0][0]);
114		}
115		}
116		}
117		#else
118		/* t0 = a_0 * b_0. */
119	0	for (int i = 0; i < 2; i++) {
120	0	fp4_mul(t0[i], a[0][i], b[0][0]);
121	0	}
122	0	#endif
123		/* t1 = a_1 * b_1. */
124	0	fp8_mul(t1, a[1], b[1]);
125	0	}
126		/* t4 = b_0 + b_1. */
127	0	fp8_add(t4, b[0], b[1]);
128
129		/* c_1 = a_0 + a_1. */
130	0	fp8_add(c[1], a[0], a[1]);
131
132		/* c_1 = (a_0 + a_1) * (b_0 + b_1) */
133	0	fp8_mul(c[1], c[1], t4);
134	0	fp8_sub(c[1], c[1], t0);
135	0	fp8_sub(c[1], c[1], t1);
136
137		/* c_0 = a_0b_0 + v * a_1b_1. */
138	0	fp8_mul_art(t4, t1);
139	0	fp8_add(c[0], t0, t4);
140	0	} RLC_CATCH_ANY {
141	0	RLC_THROW(ERR_CAUGHT);
142	0	} RLC_FINALLY {
143	0	fp8_free(t0);
144	0	fp8_free(t1);
145	0	fp8_free(t4);
146	0	}
147	0	}
148
149		#endif
150
151		#if PP_EXT == LAZYR \|\| !defined(STRIP)
152
153	0	void fp16_mul_unr(dv16_t c, const fp16_t a, const fp16_t b) {
154	0	fp8_t t0, t1;
155	0	dv8_t u0, u1, u2, u3;
156
157	0	fp8_null(t0);
158	0	fp8_null(t1);
159	0	dv8_null(u0);
160	0	dv8_null(u1);
161	0	dv8_null(u2);
162	0	dv8_null(u3);
163
164	0	RLC_TRY {
165	0	fp8_new(t0);
166	0	fp8_new(t1);
167	0	dv8_new(u0);
168	0	dv8_new(u1);
169	0	dv8_new(u2);
170	0	dv8_new(u3);
171
172		/* Karatsuba algorithm. */
173
174		/* u0 = a_0 * b_0. */
175	0	fp8_mul_unr(u0, a[0], b[0]);
176		/* u1 = a_1 * b_1. */
177	0	fp8_mul_unr(u1, a[1], b[1]);
178		/* t1 = a_0 + a_1. */
179	0	fp8_add(t0, a[0], a[1]);
180		/* t0 = b_0 + b_1. */
181	0	fp8_add(t1, b[0], b[1]);
182		/* u2 = (a_0 + a_1) * (b_0 + b_1) */
183	0	fp8_mul_unr(u2, t0, t1);
184		/* c_1 = u2 - a_0b_0 - a_1b_1. */
185	0	for (int i = 0; i < 2; i++) {
186	0	for (int j = 0; j < 2; j++) {
187	0	fp2_subc_low(c[1][i][j], u2[i][j], u0[i][j]);
188	0	fp2_subc_low(c[1][i][j], c[1][i][j], u1[i][j]);
189	0	}
190	0	}
191		/* c_0 = a_0b_0 + v * a_1b_1. */
192	0	fp2_nord_low(u2[0][0], u1[1][1]);
193	0	dv_copy(u2[0][1][0], u1[1][0][0], 2 * RLC_FP_DIGS);
194	0	dv_copy(u2[0][1][1], u1[1][0][1], 2 * RLC_FP_DIGS);
195	0	dv_copy(u2[1][0][0], u1[0][0][0], 2 * RLC_FP_DIGS);
196	0	dv_copy(u2[1][0][1], u1[0][0][1], 2 * RLC_FP_DIGS);
197	0	dv_copy(u2[1][1][0], u1[0][1][0], 2 * RLC_FP_DIGS);
198	0	dv_copy(u2[1][1][1], u1[0][1][1], 2 * RLC_FP_DIGS);
199	0	for (int i = 0; i < 2; i++) {
200	0	for (int j = 0; j < 2; j++) {
201	0	fp2_addc_low(c[0][i][j], u0[i][j], u2[i][j]);
202	0	}
203	0	}
204	0	} RLC_CATCH_ANY {
205	0	RLC_THROW(ERR_CAUGHT);
206	0	} RLC_FINALLY {
207	0	fp8_free(t0);
208	0	fp8_free(t1);
209	0	dv8_free(u0);
210	0	dv8_free(u1);
211	0	dv8_free(u2);
212	0	dv8_free(u3);
213	0	}
214	0	}
215
216	0	void fp16_mul_lazyr(fp16_t c, const fp16_t a, const fp16_t b) {
217	0	dv16_t t;
218
219	0	dv16_null(t);
220
221	0	RLC_TRY {
222	0	dv16_new(t);
223	0	fp16_mul_unr(t, a, b);
224	0	for (int i = 0; i < 2; i++) {
225	0	for (int j = 0; j < 2; j++) {
226	0	for (int k = 0; k < 2; k++) {
227	0	fp2_rdcn_low(c[i][j][k], t[i][j][k]);
228	0	}
229	0	}
230	0	}
231	0	} RLC_CATCH_ANY {
232	0	RLC_THROW(ERR_CAUGHT);
233	0	} RLC_FINALLY {
234	0	dv16_free(t);
235	0	}
236	0	}
237
238	0	void fp16_mul_dxs_lazyr(fp16_t c, const fp16_t a, const fp16_t b) {
239	0	fp8_t t0, t1;
240	0	dv8_t u0, u1, u2, u3;
241	0	dv16_t t;
242
243	0	fp8_null(t0);
244	0	fp8_null(t1);
245	0	dv8_null(u0);
246	0	dv8_null(u1);
247	0	dv8_null(u2);
248	0	dv8_null(u3);
249	0	dv16_null(t);
250
251	0	RLC_TRY {
252	0	fp8_new(t0);
253	0	fp8_new(t1);
254	0	dv8_new(u0);
255	0	dv8_new(u1);
256	0	dv8_new(u2);
257	0	dv8_new(u3);
258	0	dv16_new(t);
259
260		/* Karatsuba algorithm. */
261
262	0	if (fp4_is_zero(b[1][0])) {
263		/* u0 = a_0 * b_0. */
264	0	fp8_mul_unr(u0, a[0], b[0]);
265
266		/* u1 = a_1 * b_1. */
267	0	fp4_mul_unr(u2[0], a[1][1], b[1][1]);
268	0	fp4_add(t1[0], a[1][0], a[1][1]);
269	0	fp4_mul_unr(u2[1], t1[0], b[1][1]);
270	0	fp2_subc_low(u1[1][0], u2[1][0], u2[0][0]);
271	0	fp2_subc_low(u1[1][1], u2[1][1], u2[0][1]);
272	0	fp2_nord_low(u1[0][0], u2[0][1]);
273	0	dv_copy(u1[0][1][0], u2[0][0][0], 2 * RLC_FP_DIGS);
274	0	dv_copy(u1[0][1][1], u2[0][0][1], 2 * RLC_FP_DIGS);
275	0	} else {
276		#if EP_ADD == BASIC
277		/* u0 = a_0 * b_0. */
278		for (int i = 0; i < 2; i++) {
279		for (int j = 0; j < 2; j++) {
280		for (int k = 0; k < 2; k++) {
281		fp_muln_low(u0[i][j][k], a[0][i][j][k], b[0][0][0][0]);
282		}
283		}
284		}
285		#else
286		/* u0 = a_0 * b_0. */
287	0	for (int i = 0; i < 2; i++) {
288	0	fp4_mul_unr(u0[i], a[0][i], b[0][0]);
289	0	}
290	0	#endif
291		/* u1 = a_1 * b_1. */
292	0	fp8_mul_unr(u1, a[1], b[1]);
293	0	}
294		/* t1 = a_0 + a_1. */
295	0	fp8_add(t0, a[0], a[1]);
296		/* t0 = b_0 + b_1. */
297	0	fp8_add(t1, b[0], b[1]);
298		/* u2 = (a_0 + a_1) * (b_0 + b_1) */
299	0	fp8_mul_unr(u2, t0, t1);
300
301		/* c_1 = u2 - a_0b_0 - a_1b_1. */
302	0	for (int i = 0; i < 2; i++) {
303	0	for (int j = 0; j < 2; j++) {
304	0	fp2_subc_low(t[1][i][j], u2[i][j], u0[i][j]);
305	0	fp2_subc_low(t[1][i][j], t[1][i][j], u1[i][j]);
306	0	}
307	0	}
308		/* c_0 = a_0b_0 + v * a_1b_1. */
309	0	fp2_nord_low(u2[0][0], u1[1][1]);
310	0	dv_copy(u2[0][1][0], u1[1][0][0], 2 * RLC_FP_DIGS);
311	0	dv_copy(u2[0][1][1], u1[1][0][1], 2 * RLC_FP_DIGS);
312	0	dv_copy(u2[1][0][0], u1[0][0][0], 2 * RLC_FP_DIGS);
313	0	dv_copy(u2[1][0][1], u1[0][0][1], 2 * RLC_FP_DIGS);
314	0	dv_copy(u2[1][1][0], u1[0][1][0], 2 * RLC_FP_DIGS);
315	0	dv_copy(u2[1][1][1], u1[0][1][1], 2 * RLC_FP_DIGS);
316	0	for (int i = 0; i < 2; i++) {
317	0	for (int j = 0; j < 2; j++) {
318	0	fp2_addc_low(t[0][i][j], u0[i][j], u2[i][j]);
319	0	}
320	0	}
321	0	for (int i = 0; i < 2; i++) {
322	0	for (int j = 0; j < 2; j++) {
323	0	for (int k = 0; k < 2; k++) {
324	0	fp2_rdcn_low(c[i][j][k], t[i][j][k]);
325	0	}
326	0	}
327	0	}
328	0	} RLC_CATCH_ANY {
329	0	RLC_THROW(ERR_CAUGHT);
330	0	} RLC_FINALLY {
331	0	fp8_free(t0);
332	0	fp8_free(t1);
333	0	dv8_free(u0);
334	0	dv8_free(u1);
335	0	dv8_free(u2);
336	0	dv8_free(u3);
337	0	dv16_free(t);
338	0	}
339	0	}
340
341		#endif
342
343	0	void fp16_mul_art(fp16_t c, const fp16_t a) {
344	0	fp8_t t0;
345
346	0	fp8_null(t0);
347
348	0	RLC_TRY {
349	0	fp8_new(t0);
350
351		/* (a_0 + a_1 * v) * v = a_0 * v + a_1 * v^4 */
352	0	fp8_copy(t0, a[0]);
353	0	fp8_mul_art(c[0], a[1]);
354	0	fp8_copy(c[1], t0);
355	0	} RLC_CATCH_ANY {
356	0	RLC_THROW(ERR_CAUGHT);
357	0	} RLC_FINALLY {
358	0	fp8_free(t0);
359	0	}
360	0	}