Line | Count | Source |
1 | | /* |
2 | | * Copyright Supranational LLC |
3 | | * Licensed under the Apache License, Version 2.0, see LICENSE for details. |
4 | | * SPDX-License-Identifier: Apache-2.0 |
5 | | */ |
6 | | |
7 | | #include "vect.h" |
8 | | |
9 | | #ifdef __BLST_NO_ASM__ |
10 | | # include "no_asm.h" |
11 | | #endif |
12 | | |
13 | | /* |
14 | | * Following are some reference C implementations to assist new |
15 | | * assembly modules development, as starting-point stand-ins and for |
16 | | * cross-checking. In order to "polyfil" specific subroutine redefine |
17 | | * it on compiler command line, e.g. -Dmul_mont_384x=_mul_mont_384x. |
18 | | */ |
19 | | |
20 | | #ifdef lshift_mod_384 |
21 | | inline void lshift_mod_384(vec384 ret, const vec384 a, size_t n, |
22 | | const vec384 mod) |
23 | | { |
24 | | while(n--) |
25 | | add_mod_384(ret, a, a, mod), a = ret; |
26 | | } |
27 | | #endif |
28 | | |
29 | | #ifdef mul_by_8_mod_384 |
30 | | inline void mul_by_8_mod_384(vec384 ret, const vec384 a, const vec384 mod) |
31 | | { lshift_mod_384(ret, a, 3, mod); } |
32 | | #endif |
33 | | |
34 | | #ifdef mul_by_3_mod_384 |
35 | | inline void mul_by_3_mod_384(vec384 ret, const vec384 a, const vec384 mod) |
36 | | { |
37 | | vec384 t; |
38 | | |
39 | | add_mod_384(t, a, a, mod); |
40 | | add_mod_384(ret, t, a, mod); |
41 | | } |
42 | | #endif |
43 | | |
44 | | #ifdef mul_by_3_mod_384x |
45 | | inline void mul_by_3_mod_384x(vec384x ret, const vec384x a, const vec384 mod) |
46 | | { |
47 | | mul_by_3_mod_384(ret[0], a[0], mod); |
48 | | mul_by_3_mod_384(ret[1], a[1], mod); |
49 | | } |
50 | | #endif |
51 | | |
52 | | #ifdef mul_by_8_mod_384x |
53 | | inline void mul_by_8_mod_384x(vec384x ret, const vec384x a, const vec384 mod) |
54 | | { |
55 | | mul_by_8_mod_384(ret[0], a[0], mod); |
56 | | mul_by_8_mod_384(ret[1], a[1], mod); |
57 | | } |
58 | | #endif |
59 | | |
60 | | #ifdef mul_by_1_plus_i_mod_384x |
61 | | inline void mul_by_1_plus_i_mod_384x(vec384x ret, const vec384x a, |
62 | | const vec384 mod) |
63 | | { |
64 | | vec384 t; |
65 | | |
66 | | add_mod_384(t, a[0], a[1], mod); |
67 | | sub_mod_384(ret[0], a[0], a[1], mod); |
68 | | vec_copy(ret[1], t, sizeof(t)); |
69 | | } |
70 | | #endif |
71 | | |
72 | | #ifdef add_mod_384x |
73 | | inline void add_mod_384x(vec384x ret, const vec384x a, const vec384x b, |
74 | | const vec384 mod) |
75 | | { |
76 | | add_mod_384(ret[0], a[0], b[0], mod); |
77 | | add_mod_384(ret[1], a[1], b[1], mod); |
78 | | } |
79 | | #endif |
80 | | |
81 | | #ifdef sub_mod_384x |
82 | | inline void sub_mod_384x(vec384x ret, const vec384x a, const vec384x b, |
83 | | const vec384 mod) |
84 | | { |
85 | | sub_mod_384(ret[0], a[0], b[0], mod); |
86 | | sub_mod_384(ret[1], a[1], b[1], mod); |
87 | | } |
88 | | #endif |
89 | | |
90 | | #ifdef lshift_mod_384x |
91 | | inline void lshift_mod_384x(vec384x ret, const vec384x a, size_t n, |
92 | | const vec384 mod) |
93 | | { |
94 | | lshift_mod_384(ret[0], a[0], n, mod); |
95 | | lshift_mod_384(ret[1], a[1], n, mod); |
96 | | } |
97 | | #endif |
98 | | |
99 | | #if defined(mul_mont_384x) && !(defined(__ADX__) && !defined(__BLST_PORTABLE__)) |
100 | | void mul_mont_384x(vec384x ret, const vec384x a, const vec384x b, |
101 | | const vec384 mod, limb_t n0) |
102 | | { |
103 | | vec768 t0, t1, t2; |
104 | | vec384 aa, bb; |
105 | | |
106 | | mul_384(t0, a[0], b[0]); |
107 | | mul_384(t1, a[1], b[1]); |
108 | | |
109 | | add_mod_384(aa, a[0], a[1], mod); |
110 | | add_mod_384(bb, b[0], b[1], mod); |
111 | | mul_384(t2, aa, bb); |
112 | | sub_mod_384x384(t2, t2, t0, mod); |
113 | | sub_mod_384x384(t2, t2, t1, mod); |
114 | | |
115 | | sub_mod_384x384(t0, t0, t1, mod); |
116 | | |
117 | | redc_mont_384(ret[0], t0, mod, n0); |
118 | | redc_mont_384(ret[1], t2, mod, n0); |
119 | | } |
120 | | #endif |
121 | | |
122 | | #if defined(sqr_mont_384x) && !(defined(__ADX__) && !defined(__BLST_PORTABLE__)) |
123 | | void sqr_mont_384x(vec384x ret, const vec384x a, const vec384 mod, limb_t n0) |
124 | | { |
125 | | vec384 t0, t1; |
126 | | |
127 | | add_mod_384(t0, a[0], a[1], mod); |
128 | | sub_mod_384(t1, a[0], a[1], mod); |
129 | | |
130 | | mul_mont_384(ret[1], a[0], a[1], mod, n0); |
131 | | add_mod_384(ret[1], ret[1], ret[1], mod); |
132 | | |
133 | | mul_mont_384(ret[0], t0, t1, mod, n0); |
134 | | } |
135 | | #endif |
136 | | |
137 | | limb_t div_3_limbs(const limb_t dividend_top[2], limb_t d_lo, limb_t d_hi); |
138 | | limb_t quot_rem_128(limb_t *quot_rem, const limb_t *divisor, limb_t quotient); |
139 | | limb_t quot_rem_64(limb_t *quot_rem, const limb_t *divisor, limb_t quotient); |
140 | | |
141 | | /* |
142 | | * Divide 255-bit |val| by z^2 yielding 128-bit quotient and remainder in place. |
143 | | */ |
144 | | static void div_by_zz(limb_t val[]) |
145 | 233 | { |
146 | 233 | static const limb_t zz[] = { TO_LIMB_T(0x0000000100000000), |
147 | 233 | TO_LIMB_T(0xac45a4010001a402) }; |
148 | 233 | size_t loop, zz_len = sizeof(zz)/sizeof(zz[0]); |
149 | 233 | limb_t d_lo, d_hi; |
150 | | |
151 | 233 | d_lo = zz[zz_len - 2]; |
152 | 233 | d_hi = zz[zz_len - 1]; |
153 | 699 | for (loop = zz_len, zz_len--; loop--;) { |
154 | 466 | limb_t q = div_3_limbs(val + loop + zz_len, d_lo, d_hi); |
155 | 466 | (void)quot_rem_128(val + loop, zz, q); |
156 | 466 | } |
157 | | /* remainder is in low half of val[], quotient is in high */ |
158 | 233 | } |
159 | | |
160 | | /* |
161 | | * Divide 128-bit |val| by z yielding 64-bit quotient and remainder in place. |
162 | | */ |
163 | | static void div_by_z(limb_t val[]) |
164 | 322 | { |
165 | 322 | static const limb_t z[] = { TO_LIMB_T(0xd201000000010000) }; |
166 | 322 | size_t loop, z_len = sizeof(z)/sizeof(z[0]); |
167 | 322 | limb_t d_lo, d_hi; |
168 | | |
169 | 322 | d_lo = (sizeof(z) == sizeof(limb_t)) ? 0 : z[z_len - 2]; |
170 | 322 | d_hi = z[z_len - 1]; |
171 | 644 | for (loop = z_len, z_len--; loop--;) { |
172 | 322 | limb_t q = div_3_limbs(val + loop + z_len, d_lo, d_hi); |
173 | 322 | (void)quot_rem_64(val + loop, z, q); |
174 | 322 | } |
175 | | /* remainder is in low half of val[], quotient is in high */ |
176 | 322 | } |