/src/mcl/include/mcl/op.hpp
Line | Count | Source (jump to first uncovered line) |
1 | | #pragma once |
2 | | /** |
3 | | @file |
4 | | @brief definition of Op |
5 | | @author MITSUNARI Shigeo(@herumi) |
6 | | @license modified new BSD license |
7 | | http://opensource.org/licenses/BSD-3-Clause |
8 | | */ |
9 | | #include <mcl/gmp_util.hpp> |
10 | | #include <mcl/array.hpp> |
11 | | #include <mcl/invmod_fwd.hpp> |
12 | | #ifndef MCL_STANDALONE |
13 | | #include <stdio.h> |
14 | | #endif |
15 | | |
16 | | #if defined(__EMSCRIPTEN__) || defined(__wasm__) |
17 | | #define MCL_DONT_USE_XBYAK |
18 | | #endif |
19 | | #if !defined(MCL_DONT_USE_XBYAK) && (defined(_WIN64) || defined(__x86_64__)) && (MCL_SIZEOF_UNIT == 8) && !defined(MCL_STATIC_CODE) |
20 | | #define MCL_USE_XBYAK |
21 | | #endif |
22 | | #if defined(MCL_USE_XBYAK) || defined(MCL_STATIC_CODE) |
23 | | #define MCL_X64_ASM |
24 | | #define MCL_XBYAK_DIRECT_CALL |
25 | | #endif |
26 | | |
27 | | #define MCL_MAX_HASH_BIT_SIZE 512 |
28 | | |
29 | | |
30 | | namespace mcl { |
31 | | |
32 | | static const int version = 0x186; /* 0xABC = A.BC */ |
33 | | |
34 | | /* |
35 | | specifies available string format mode for X::setIoMode() |
36 | | // for Fp, Fp2, Fp6, Fp12 |
37 | | default(0) : IoDec |
38 | | printable string(zero terminated, variable size) |
39 | | IoBin(2) | IoDec(10) | IoHex(16) | IoBinPrefix | IoHexPrefix |
40 | | |
41 | | byte string(not zero terminated, fixed size) |
42 | | IoArray | IoArrayRaw |
43 | | IoArray = IoSerialize |
44 | | |
45 | | // for Ec |
46 | | affine(0) | IoEcCompY | IoComp |
47 | | default : affine |
48 | | |
49 | | affine and IoEcCompY are available with ioMode for Fp |
50 | | IoSerialize ignores ioMode for Fp |
51 | | |
52 | | IoAuto |
53 | | dec or hex according to ios_base::fmtflags |
54 | | IoBin |
55 | | binary number([01]+) |
56 | | IoDec |
57 | | decimal number |
58 | | IoHex |
59 | | hexadecimal number([0-9a-fA-F]+) |
60 | | IoBinPrefix |
61 | | 0b + <binary number> |
62 | | IoHexPrefix |
63 | | 0x + <hexadecimal number> |
64 | | IoArray |
65 | | array of Unit(fixed size = Fp::getByteSize()) |
66 | | IoArrayRaw |
67 | | array of Unit(fixed size = Fp::getByteSize()) without Montgomery conversion |
68 | | |
69 | | // for Ec::setIoMode() |
70 | | IoEcAffine(default) |
71 | | "0" ; infinity |
72 | | "1 <x> <y>" ; affine coordinate |
73 | | |
74 | | IoEcProj |
75 | | "4" <x> <y> <z> ; projective or jacobi coordinate |
76 | | |
77 | | IoEcCompY |
78 | | 1-bit y prepresentation of elliptic curve |
79 | | "2 <x>" ; compressed for even y |
80 | | "3 <x>" ; compressed for odd y |
81 | | |
82 | | IoSerialize |
83 | | if isMSBserialize(): // p is not full bit |
84 | | size = Fp::getByteSize() |
85 | | use MSB of array of x for 1-bit y for prime p where (p % 8 != 0) |
86 | | [0] ; infinity |
87 | | <x> ; for even y |
88 | | <x>|1 ; for odd y ; |1 means set MSB of x |
89 | | else: |
90 | | size = Fp::getByteSize() + 1 |
91 | | [0] ; infinity |
92 | | 2 <x> ; for even y |
93 | | 3 <x> ; for odd y |
94 | | */ |
95 | | enum IoMode { |
96 | | IoAuto = 0, // dec or hex according to ios_base::fmtflags |
97 | | IoBin = 2, // binary number without prefix |
98 | | IoDec = 10, // decimal number without prefix |
99 | | IoHex = 16, // hexadecimal number without prefix |
100 | | IoArray = 32, // array of Unit(fixed size) |
101 | | IoArrayRaw = 64, // raw array of Unit without Montgomery conversion |
102 | | IoPrefix = 128, // append '0b'(bin) or '0x'(hex) |
103 | | IoBinPrefix = IoBin | IoPrefix, |
104 | | IoHexPrefix = IoHex | IoPrefix, |
105 | | IoEcAffine = 0, // affine coordinate |
106 | | IoEcCompY = 256, // 1-bit y representation of elliptic curve |
107 | | IoSerialize = 512, // use MBS for 1-bit y |
108 | | IoFixedSizeByteSeq = IoSerialize, // obsolete |
109 | | IoEcProj = 1024, // projective or jacobi coordinate |
110 | | IoSerializeHexStr = 2048, // printable hex string |
111 | | IoEcAffineSerialize = 4096, // serialize [x:y] |
112 | | IoBigEndian = 8192 // serialize as big endian (default little endian) |
113 | | }; |
114 | | |
115 | | namespace fp { |
116 | | |
117 | | inline bool isIoSerializeMode(int ioMode) |
118 | 174k | { |
119 | 174k | return ioMode & (IoArray | IoArrayRaw | IoSerialize | IoEcAffineSerialize | IoSerializeHexStr); |
120 | 174k | } |
121 | | |
122 | | const size_t maxMulVecN = 32; // inner loop of mulVec |
123 | | |
124 | | #ifndef MCL_MAX_MUL_VEC_NGLV |
125 | | #define MCL_MAX_MUL_VEC_NGLV 16 |
126 | | #endif |
127 | | const size_t maxMulVecNGLV = MCL_MAX_MUL_VEC_NGLV; // inner loop of mulVec with GLV |
128 | | |
129 | | struct FpGenerator; |
130 | | struct Op; |
131 | | |
132 | | typedef void (*void1u)(Unit*); |
133 | | typedef void (*void2u)(Unit*, const Unit*); |
134 | | typedef void (*void2uI)(Unit*, const Unit*, Unit); |
135 | | typedef void (*void2uIu)(Unit*, const Unit*, Unit, const Unit*); |
136 | | typedef void (*void2uOp)(Unit*, const Unit*, const Op&); |
137 | | typedef void (*void3u)(Unit*, const Unit*, const Unit*); |
138 | | typedef void (*void4u)(Unit*, const Unit*, const Unit*, const Unit*); |
139 | | typedef int (*int2u)(Unit*, const Unit*); |
140 | | |
141 | | typedef Unit (*u1uII)(Unit*, Unit, Unit); |
142 | | typedef Unit (*u3u)(Unit*, const Unit*, const Unit*); |
143 | | typedef Unit (*u2uI)(Unit*, const Unit *, Unit); |
144 | | |
145 | | /* |
146 | | disable -Wcast-function-type |
147 | | the number of arguments of some JIT functions is smaller than that of T |
148 | | */ |
149 | | template<class T, class S> |
150 | | T func_ptr_cast(S func) |
151 | 8 | { |
152 | 8 | return reinterpret_cast<T>(reinterpret_cast<void*>(func)); |
153 | 8 | } |
154 | | struct Block { |
155 | | const Unit *p; // pointer to original FpT.v_ |
156 | | size_t n; |
157 | | Unit v_[maxUnitSize]; |
158 | | }; |
159 | | |
160 | | enum Mode { |
161 | | FP_AUTO, |
162 | | FP_GMP, |
163 | | FP_GMP_MONT, |
164 | | FP_LLVM, |
165 | | FP_LLVM_MONT, |
166 | | FP_XBYAK |
167 | | }; |
168 | | |
169 | | enum PrimeMode { |
170 | | PM_GENERIC = 0, |
171 | | PM_NIST_P192, |
172 | | PM_SECP256K1, |
173 | | PM_NIST_P521 |
174 | | }; |
175 | | |
176 | | struct Op { |
177 | | /* |
178 | | don't change the layout of rp and p |
179 | | asm code assumes &rp + 1 == p |
180 | | */ |
181 | | Unit rp; |
182 | | Unit p[maxUnitSize]; |
183 | | mpz_class mp; |
184 | | uint32_t pmod4; |
185 | | mcl::SquareRoot sq; |
186 | | CYBOZU_ALIGN(8) char im[sizeof(mcl::inv::InvModT<maxUnitSize>)]; |
187 | | mcl::Modp modp; |
188 | | mcl::SmallModp smallModp; |
189 | | Unit half[maxUnitSize]; // (p + 1) / 2 |
190 | | Unit oneRep[maxUnitSize]; // 1(=inv R if Montgomery) |
191 | | /* |
192 | | for Montgomery |
193 | | one = 1 |
194 | | R = (1 << (N * sizeof(Unit) * 8)) % p |
195 | | R2 = (R * R) % p |
196 | | R3 = RR^3 |
197 | | */ |
198 | | Unit one[maxUnitSize]; |
199 | | Unit R2[maxUnitSize]; |
200 | | Unit R3[maxUnitSize]; |
201 | | #ifdef MCL_USE_XBYAK |
202 | | FpGenerator *fg; |
203 | | #endif |
204 | | #ifdef MCL_X64_ASM |
205 | | mcl::Array<Unit> invTbl; |
206 | | #endif |
207 | | void3u fp_addA_; |
208 | | void3u fp_subA_; |
209 | | void2u fp_negA_; |
210 | | void3u fp_mulA_; |
211 | | void2u fp_sqrA_; |
212 | | void2u fp_mul2A_; |
213 | | void2u fp_mul9A_; |
214 | | void3u fp2_addA_; |
215 | | void3u fp2_subA_; |
216 | | void2u fp2_negA_; |
217 | | void3u fp2_mulA_; |
218 | | void2u fp2_sqrA_; |
219 | | void2u fp2_mul2A_; |
220 | | void3u fpDbl_addA_; |
221 | | void3u fpDbl_subA_; |
222 | | void2u fpDbl_modA_; |
223 | | void3u fp2Dbl_mulPreA_; |
224 | | void2u fp2Dbl_sqrPreA_; |
225 | | void2u fp2Dbl_mul_xiA_; |
226 | | size_t maxN; |
227 | | size_t N; |
228 | | size_t bitSize; |
229 | | bool (*fp_isZero)(const Unit*); |
230 | | void1u fp_clear; |
231 | | void2u fp_copy; |
232 | | void2u fp_shr1; |
233 | | void3u fp_neg; |
234 | | void4u fp_add; |
235 | | void4u fp_sub; |
236 | | void4u fp_mul; |
237 | | void3u fp_sqr; |
238 | | void3u fp_mul2; |
239 | | void2uOp fp_invOp; |
240 | | void2uIu fp_mulUnit; // fp_mulUnitPre |
241 | | |
242 | | void3u fpDbl_mulPre; |
243 | | void2u fpDbl_sqrPre; |
244 | | int2u fp_preInv; |
245 | | void2uI fp_mulUnitPre; // z[N + 1] = x[N] * y |
246 | | |
247 | | void4u fpDbl_add; |
248 | | void4u fpDbl_sub; |
249 | | void3u fpDbl_mod; |
250 | | |
251 | | u3u fp_addPre; // without modulo p |
252 | | u3u fp_subPre; // without modulo p |
253 | | u3u fpDbl_addPre; |
254 | | u3u fpDbl_subPre; |
255 | | /* |
256 | | for Fp2 = F[u] / (u^2 + 1) |
257 | | x = a + bu |
258 | | */ |
259 | | int xi_a; // xi = xi_a + u |
260 | | void2u fp2_mul_xiA_; |
261 | | uint32_t (*hash)(void *out, uint32_t maxOutSize, const void *msg, uint32_t msgSize); |
262 | | |
263 | | PrimeMode primeMode; |
264 | | bool isFullBit; // true if bitSize % unitSize == 0 |
265 | | bool isLtQuad; // true if (bitSize % unitSize) <= unitSize - 2 |
266 | | bool isMont; // true if use Montgomery |
267 | | bool isFastMod; // true if modulo is fast |
268 | | |
269 | | Op() |
270 | 8 | { |
271 | 8 | clear(); |
272 | 8 | } |
273 | | ~Op() |
274 | 0 | { |
275 | 0 | #ifdef MCL_USE_XBYAK |
276 | 0 | destroyFpGenerator(fg); |
277 | 0 | #endif |
278 | 0 | } |
279 | | void clear() |
280 | 16 | { |
281 | 16 | rp = 0; |
282 | 16 | memset(p, 0, sizeof(p)); |
283 | 16 | mp = 0; |
284 | 16 | pmod4 = 0; |
285 | 16 | sq.clear(); |
286 | | // fg is not set |
287 | 16 | memset(half, 0, sizeof(half)); |
288 | 16 | memset(oneRep, 0, sizeof(oneRep)); |
289 | 16 | memset(one, 0, sizeof(one)); |
290 | 16 | memset(R2, 0, sizeof(R2)); |
291 | 16 | memset(R3, 0, sizeof(R3)); |
292 | 16 | #ifdef MCL_X64_ASM |
293 | 16 | invTbl.clear(); |
294 | 16 | #endif |
295 | 16 | fp_addA_ = 0; |
296 | 16 | fp_subA_ = 0; |
297 | 16 | fp_negA_ = 0; |
298 | 16 | fp_mulA_ = 0; |
299 | 16 | fp_sqrA_ = 0; |
300 | 16 | fp_mul2A_ = 0; |
301 | 16 | fp_mul9A_ = 0; |
302 | 16 | fp2_addA_ = 0; |
303 | 16 | fp2_subA_ = 0; |
304 | 16 | fp2_negA_ = 0; |
305 | 16 | fp2_mulA_ = 0; |
306 | 16 | fp2_sqrA_ = 0; |
307 | 16 | fp2_mul2A_ = 0; |
308 | 16 | fpDbl_addA_ = 0; |
309 | 16 | fpDbl_subA_ = 0; |
310 | 16 | fpDbl_modA_ = 0; |
311 | 16 | fp2Dbl_mulPreA_ = 0; |
312 | 16 | fp2Dbl_sqrPreA_ = 0; |
313 | 16 | fp2Dbl_mul_xiA_ = 0; |
314 | 16 | maxN = 0; |
315 | 16 | N = 0; |
316 | 16 | bitSize = 0; |
317 | 16 | fp_isZero = 0; |
318 | 16 | fp_clear = 0; |
319 | 16 | fp_copy = 0; |
320 | 16 | fp_shr1 = 0; |
321 | 16 | fp_neg = 0; |
322 | 16 | fp_add = 0; |
323 | 16 | fp_sub = 0; |
324 | 16 | fp_mul = 0; |
325 | 16 | fp_sqr = 0; |
326 | 16 | fp_mul2 = 0; |
327 | 16 | fp_invOp = 0; |
328 | 16 | fp_mulUnit = 0; |
329 | | |
330 | 16 | fpDbl_mulPre = 0; |
331 | 16 | fpDbl_sqrPre = 0; |
332 | 16 | fp_preInv = 0; |
333 | 16 | fp_mulUnitPre = 0; |
334 | | |
335 | 16 | fpDbl_add = 0; |
336 | 16 | fpDbl_sub = 0; |
337 | 16 | fpDbl_mod = 0; |
338 | | |
339 | 16 | fp_addPre = 0; |
340 | 16 | fp_subPre = 0; |
341 | 16 | fpDbl_addPre = 0; |
342 | 16 | fpDbl_subPre = 0; |
343 | | |
344 | 16 | xi_a = 0; |
345 | 16 | fp2_mul_xiA_ = 0; |
346 | 16 | hash = 0; |
347 | | |
348 | 16 | primeMode = PM_GENERIC; |
349 | 16 | isFullBit = false; |
350 | 16 | isLtQuad = false; |
351 | 16 | isMont = false; |
352 | 16 | isFastMod = false; |
353 | 16 | } |
354 | | void fromMont(Unit* y, const Unit *x) const |
355 | 90.3k | { |
356 | | /* |
357 | | M(x, y) = xyR^-1 |
358 | | y = M(x, 1) = xR^-1 |
359 | | */ |
360 | 90.3k | fp_mul(y, x, one, p); |
361 | 90.3k | } |
362 | | void toMont(Unit* y, const Unit *x) const |
363 | 104k | { |
364 | | /* |
365 | | y = M(x, R2) = xR^2 R^-1 = xR |
366 | | */ |
367 | 104k | fp_mul(y, x, R2, p); |
368 | 104k | } |
369 | | bool init(const mpz_class& p, size_t maxBitSize, int xi_a, Mode mode, size_t mclMaxBitSize = MCL_MAX_BIT_SIZE); |
370 | | #ifdef MCL_USE_XBYAK |
371 | | static FpGenerator* createFpGenerator(); |
372 | | static void destroyFpGenerator(FpGenerator *fg); |
373 | | #endif |
374 | | private: |
375 | | Op(const Op&); |
376 | | void operator=(const Op&); |
377 | | }; |
378 | | |
379 | | inline const char* getIoSeparator(int ioMode) |
380 | 29.1k | { |
381 | 29.1k | return (ioMode & (IoArray | IoArrayRaw | IoSerialize | IoSerializeHexStr | IoEcAffineSerialize)) ? "" : " "; |
382 | 29.1k | } |
383 | | |
384 | | inline void dump(const void *buf, size_t n) |
385 | 0 | { |
386 | 0 | #ifdef MCL_STANDALONE |
387 | 0 | (void)buf; |
388 | 0 | (void)n; |
389 | 0 | #else |
390 | 0 | const uint8_t *s = (const uint8_t *)buf; |
391 | 0 | for (size_t i = 0; i < n; i++) { |
392 | 0 | printf("%02x ", s[i]); |
393 | 0 | } |
394 | 0 | printf("\n"); |
395 | 0 | #endif |
396 | 0 | } |
397 | | |
398 | | #ifndef CYBOZU_DONT_USE_STRING |
399 | | int detectIoMode(int ioMode, const std::ios_base& ios); |
400 | | |
401 | | inline void dump(const std::string& s) |
402 | 0 | { |
403 | 0 | dump(s.c_str(), s.size()); |
404 | 0 | } |
405 | | #endif |
406 | | |
407 | | } } // mcl::fp |