/src/boringssl/crypto/fipsmodule/bn/rsaz_exp.c.inc
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright 2013-2016 The OpenSSL Project Authors. All Rights Reserved. |
3 | | * Copyright (c) 2012, Intel Corporation. All Rights Reserved. |
4 | | * |
5 | | * Licensed under the OpenSSL license (the "License"). You may not use |
6 | | * this file except in compliance with the License. You can obtain a copy |
7 | | * in the file LICENSE in the source distribution or at |
8 | | * https://www.openssl.org/source/license.html |
9 | | * |
10 | | * Originally written by Shay Gueron (1, 2), and Vlad Krasnov (1) |
11 | | * (1) Intel Corporation, Israel Development Center, Haifa, Israel |
12 | | * (2) University of Haifa, Israel |
13 | | */ |
14 | | |
15 | | #include "rsaz_exp.h" |
16 | | |
17 | | #if defined(RSAZ_ENABLED) |
18 | | |
19 | | #include <openssl/mem.h> |
20 | | |
21 | | #include <assert.h> |
22 | | |
23 | | #include "internal.h" |
24 | | #include "../../internal.h" |
25 | | |
26 | | |
27 | | // rsaz_one is 1 in RSAZ's representation. |
28 | | alignas(64) static const BN_ULONG rsaz_one[40] = { |
29 | | 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
30 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; |
31 | | // rsaz_two80 is 2^80 in RSAZ's representation. Note RSAZ uses base 2^29, so this is |
32 | | // 2^(29*2 + 22) = 2^80, not 2^(64*2 + 22). |
33 | | alignas(64) static const BN_ULONG rsaz_two80[40] = { |
34 | | 0, 0, 1 << 22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
35 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; |
36 | | |
37 | | void RSAZ_1024_mod_exp_avx2(BN_ULONG result_norm[16], |
38 | | const BN_ULONG base_norm[16], |
39 | | const BN_ULONG exponent[16], |
40 | | const BN_ULONG m_norm[16], const BN_ULONG RR[16], |
41 | | BN_ULONG k0, |
42 | 0 | BN_ULONG storage[MOD_EXP_CTIME_STORAGE_LEN]) { |
43 | 0 | static_assert(MOD_EXP_CTIME_ALIGN % 64 == 0, |
44 | 0 | "MOD_EXP_CTIME_ALIGN is too small"); |
45 | 0 | assert((uintptr_t)storage % 64 == 0); |
46 | | |
47 | 0 | BN_ULONG *a_inv, *m, *result, *table_s = storage + 40 * 3, *R2 = table_s; |
48 | | // Note |R2| aliases |table_s|. |
49 | 0 | if (((((uintptr_t)storage & 4095) + 320) >> 12) != 0) { |
50 | 0 | result = storage; |
51 | 0 | a_inv = storage + 40; |
52 | 0 | m = storage + 40 * 2; // should not cross page |
53 | 0 | } else { |
54 | 0 | m = storage; // should not cross page |
55 | 0 | result = storage + 40; |
56 | 0 | a_inv = storage + 40 * 2; |
57 | 0 | } |
58 | |
|
59 | 0 | rsaz_1024_norm2red_avx2(m, m_norm); |
60 | 0 | rsaz_1024_norm2red_avx2(a_inv, base_norm); |
61 | 0 | rsaz_1024_norm2red_avx2(R2, RR); |
62 | | |
63 | | // Convert |R2| from the usual radix, giving R = 2^1024, to RSAZ's radix, |
64 | | // giving R = 2^(36*29) = 2^1044. |
65 | 0 | rsaz_1024_mul_avx2(R2, R2, R2, m, k0); |
66 | | // R2 = 2^2048 * 2^2048 / 2^1044 = 2^3052 |
67 | 0 | rsaz_1024_mul_avx2(R2, R2, rsaz_two80, m, k0); |
68 | | // R2 = 2^3052 * 2^80 / 2^1044 = 2^2088 = (2^1044)^2 |
69 | | |
70 | | // table[0] = 1 |
71 | | // table[1] = a_inv^1 |
72 | 0 | rsaz_1024_mul_avx2(result, R2, rsaz_one, m, k0); |
73 | 0 | rsaz_1024_mul_avx2(a_inv, a_inv, R2, m, k0); |
74 | 0 | rsaz_1024_scatter5_avx2(table_s, result, 0); |
75 | 0 | rsaz_1024_scatter5_avx2(table_s, a_inv, 1); |
76 | | // table[2] = a_inv^2 |
77 | 0 | rsaz_1024_sqr_avx2(result, a_inv, m, k0, 1); |
78 | 0 | rsaz_1024_scatter5_avx2(table_s, result, 2); |
79 | | // table[4] = a_inv^4 |
80 | 0 | rsaz_1024_sqr_avx2(result, result, m, k0, 1); |
81 | 0 | rsaz_1024_scatter5_avx2(table_s, result, 4); |
82 | | // table[8] = a_inv^8 |
83 | 0 | rsaz_1024_sqr_avx2(result, result, m, k0, 1); |
84 | 0 | rsaz_1024_scatter5_avx2(table_s, result, 8); |
85 | | // table[16] = a_inv^16 |
86 | 0 | rsaz_1024_sqr_avx2(result, result, m, k0, 1); |
87 | 0 | rsaz_1024_scatter5_avx2(table_s, result, 16); |
88 | 0 | for (int i = 3; i < 32; i += 2) { |
89 | | // table[i] = table[i-1] * a_inv = a_inv^i |
90 | 0 | rsaz_1024_gather5_avx2(result, table_s, i - 1); |
91 | 0 | rsaz_1024_mul_avx2(result, result, a_inv, m, k0); |
92 | 0 | rsaz_1024_scatter5_avx2(table_s, result, i); |
93 | 0 | for (int j = 2 * i; j < 32; j *= 2) { |
94 | | // table[j] = table[j/2]^2 = a_inv^j |
95 | 0 | rsaz_1024_sqr_avx2(result, result, m, k0, 1); |
96 | 0 | rsaz_1024_scatter5_avx2(table_s, result, j); |
97 | 0 | } |
98 | 0 | } |
99 | | |
100 | | // Load the first window. |
101 | 0 | const uint8_t *p_str = (const uint8_t *)exponent; |
102 | 0 | int wvalue = p_str[127] >> 3; |
103 | 0 | rsaz_1024_gather5_avx2(result, table_s, wvalue); |
104 | |
|
105 | 0 | int index = 1014; |
106 | 0 | while (index > -1) { // Loop for the remaining 127 windows. |
107 | 0 | rsaz_1024_sqr_avx2(result, result, m, k0, 5); |
108 | |
|
109 | 0 | uint16_t wvalue_16; |
110 | 0 | memcpy(&wvalue_16, &p_str[index / 8], sizeof(wvalue_16)); |
111 | 0 | wvalue = wvalue_16; |
112 | 0 | wvalue = (wvalue >> (index % 8)) & 31; |
113 | 0 | index -= 5; |
114 | |
|
115 | 0 | rsaz_1024_gather5_avx2(a_inv, table_s, wvalue); // Borrow |a_inv|. |
116 | 0 | rsaz_1024_mul_avx2(result, result, a_inv, m, k0); |
117 | 0 | } |
118 | | |
119 | | // Square four times. |
120 | 0 | rsaz_1024_sqr_avx2(result, result, m, k0, 4); |
121 | |
|
122 | 0 | wvalue = p_str[0] & 15; |
123 | |
|
124 | 0 | rsaz_1024_gather5_avx2(a_inv, table_s, wvalue); // Borrow |a_inv|. |
125 | 0 | rsaz_1024_mul_avx2(result, result, a_inv, m, k0); |
126 | | |
127 | | // Convert from Montgomery. |
128 | 0 | rsaz_1024_mul_avx2(result, result, rsaz_one, m, k0); |
129 | |
|
130 | 0 | rsaz_1024_red2norm_avx2(result_norm, result); |
131 | 0 | BN_ULONG scratch[16]; |
132 | 0 | bn_reduce_once_in_place(result_norm, /*carry=*/0, m_norm, scratch, 16); |
133 | |
|
134 | 0 | OPENSSL_cleanse(storage, MOD_EXP_CTIME_STORAGE_LEN * sizeof(BN_ULONG)); |
135 | 0 | } |
136 | | |
137 | | #endif // RSAZ_ENABLED |