/src/SymCrypt/lib/scsTools.c
Line | Count | Source (jump to first uncovered line) |
1 | | // |
2 | | // scsTools.c Support tools for writing side-channel safe code |
3 | | // |
4 | | // Copyright (c) Microsoft Corporation. Licensed under the MIT license. |
5 | | // |
6 | | |
7 | | #include "precomp.h" |
8 | | |
9 | | // |
10 | | // This code needs to process data in words, and we'd like to use 32-bit words on 32-bit |
11 | | // architectures and 64-bit words on 64-bit architectures. So we use NATIVE_UINT & friends. |
12 | | // |
13 | | |
14 | | // Buffer limits for SymCryptScsRotateBuffer |
15 | | #define MIN_BUFFER_SIZE (32) |
16 | | |
17 | | // |
18 | | // Masking functions |
19 | | // Masking functions can be more efficient if the inputs are restricted to values that can |
20 | | // be represented in the signed data types. |
21 | | // This is why we have some functions that take 31-bit inputs. |
22 | | // |
23 | | |
24 | | // 31-bit inputs |
25 | | |
26 | | UINT32 |
27 | | SYMCRYPT_CALL |
28 | | SymCryptMask32IsNonzeroU31( UINT32 v ) |
29 | 0 | { |
30 | 0 | SYMCRYPT_ASSERT( v < (1UL<<31) ); |
31 | 0 | return (-(INT32) v) >> 31; |
32 | 0 | } |
33 | | |
34 | | UINT32 |
35 | | SYMCRYPT_CALL |
36 | | SymCryptMask32IsZeroU31( UINT32 v ) |
37 | 0 | { |
38 | 0 | return ~SymCryptMask32IsNonzeroU31( v ); |
39 | 0 | } |
40 | | |
41 | | UINT32 |
42 | | SYMCRYPT_CALL |
43 | | SymCryptMask32NeqU31( UINT32 a, UINT32 b ) |
44 | 0 | { |
45 | 0 | SYMCRYPT_ASSERT( a < (1UL<<31) ); |
46 | 0 | SYMCRYPT_ASSERT( b < (1UL<<31) ); |
47 | |
|
48 | 0 | return SymCryptMask32IsNonzeroU31( a ^ b ); |
49 | 0 | } |
50 | | |
51 | | UINT32 |
52 | | SYMCRYPT_CALL |
53 | | SymCryptMask32LtU31( UINT32 a, UINT32 b ) |
54 | 0 | { |
55 | 0 | SYMCRYPT_ASSERT( a < (1UL<<31) ); |
56 | 0 | SYMCRYPT_ASSERT( b < (1UL<<31) ); |
57 | | |
58 | | // Casting to INT32 is defined as a and b are < 2^31 |
59 | 0 | return ((INT32) a - (INT32) b) >> 31; |
60 | 0 | } |
61 | | |
62 | | |
63 | | // 32-bit inputs |
64 | | |
65 | | UINT32 |
66 | | SYMCRYPT_CALL |
67 | | SymCryptMask32EqU32( UINT32 a, UINT32 b ) |
68 | 0 | { |
69 | 0 | return ~(UINT32) ( (-(INT64)(a^b)) >> 32); |
70 | 0 | } |
71 | | |
72 | | |
73 | | // Other helper functions |
74 | | SIZE_T |
75 | | SYMCRYPT_CALL |
76 | | SymCryptRoundUpPow2Sizet( SIZE_T v ) |
77 | 0 | { |
78 | 0 | SIZE_T res; |
79 | |
|
80 | 0 | SYMCRYPT_ASSERT( v <= (SIZE_T_MAX / 2) + 1); |
81 | | // If v is very large, then the result res might overflow. |
82 | | // As SIZE_T is an unsigned type, the overflow is defined to |
83 | | // be modulo 2^n for some n, and therefore we'll get res==0 |
84 | | // which will terminate the loop. |
85 | |
|
86 | 0 | res = 1; |
87 | 0 | while( res < v ) |
88 | 0 | { |
89 | 0 | res += res; |
90 | | |
91 | | // Catch any overflows; should never happen but break to avoid infinite loop |
92 | 0 | if( res == 0 ) |
93 | 0 | { |
94 | 0 | break; |
95 | 0 | } |
96 | 0 | } |
97 | |
|
98 | 0 | return res; |
99 | 0 | } |
100 | | |
101 | | |
102 | | // |
103 | | // Copy data |
104 | | // |
105 | | |
106 | | VOID |
107 | | SYMCRYPT_CALL |
108 | | SymCryptScsCopy( |
109 | | _In_reads_( cbDst ) PCBYTE pbSrc, |
110 | | SIZE_T cbSrc, |
111 | | _Out_writes_( cbDst ) PBYTE pbDst, |
112 | | SIZE_T cbDst ) |
113 | | // Copy cbSrc bytes of pbSrc into pbDst without revealing cbSrc |
114 | | // through side channels. |
115 | | // - pbSrc/cbSrc: buffer to copy data from |
116 | | // - pbDst/cbDst: buffer that receives the data |
117 | | // Equivalent to: |
118 | | // n = min( cbSrc, cbDst ) |
119 | | // pbDst[ 0.. n-1 ] = pbSrc[ 0 .. n - 1 ] |
120 | | // cbSrc is protected from side-channels; cbDst is public. |
121 | | // Note that pbSrc must be cbDst bytes long, not cbSrc bytes. |
122 | 0 | { |
123 | 0 | UINT32 i; |
124 | |
|
125 | 0 | SYMCRYPT_ASSERT( cbSrc <= (1UL << 31) && cbDst <= (1UL << 31) ); |
126 | | |
127 | | // Loop over the destination buffer and update each byte with the source data (if appropriate) |
128 | | // We round-robin loop over the source buffer |
129 | 0 | for( i = 0; i < cbDst; i++ ) |
130 | 0 | { |
131 | 0 | pbDst[ i ] ^= (pbSrc[ i ] ^ pbDst[ i ]) & SymCryptMask32LtU31( i, (UINT32) cbSrc ); |
132 | 0 | } |
133 | 0 | } |
134 | | |
135 | | |
136 | | // |
137 | | // Buffer rotation |
138 | | // To recover a message from an encoding with variable data position we have to do a copy from a |
139 | | // variable memory location. But our memory access pattern cannot depend on the secret location. |
140 | | // This code rotates a given buffer by a variable # bytes without revealing the shift amount. |
141 | | // |
142 | | // For efficiency we do this using NATIVE_UINT values so that we get the best performance on each platform. |
143 | | // |
144 | | // The first step is to rotate the array between 0 and NATIVE_BYTES-1 bytes to get the proper word alignment. |
145 | | // After that we only have to rotate the words. |
146 | | // We do this using a sequence of swaps. |
147 | | // Notation: |
148 | | // W[i] array of words, 0 <= i < n where n is the # words, a power of 2. |
149 | | // s Rotation amount (to the left). The value in W[s] at the start should appear in W[0] at the end. |
150 | | // |
151 | | // We use masked swaps as they seem to be more efficient then masked multiplexers. |
152 | | // We can split this problem down recursively |
153 | | // |
154 | | // Function Rotate( W, n, s) |
155 | | // - Rotate W[0..n/2-1] by s mod n/2 |
156 | | // - Rotate W[n/2..n-1] by s mod n/2 |
157 | | // for i in 0..n/2-1: |
158 | | // swap W[i] and W[i+n/2] if (i+s) % n >= n/2 |
159 | | // |
160 | | // After the two half-sized rotates, each word is in the right position modulo n/2, so all that needs to be |
161 | | // done in possibly swap (W[i],W[i+n/2]) pairs. |
162 | | // Let W' be the array after the half-sized rotates. We have |
163 | | // W'[i] = W[ (i + s) % (n/2) ] for i in 0..n/2 |
164 | | // In the final array W'' we should have W''[i] = W[ (i+s)%n ] |
165 | | // So W''[i] = W'[i] when (i+s) % n = (i+s) % n/2 which is equivalent to (i+s)%n < n/2. |
166 | | // |
167 | | // We turn this into a non-recursive algorithm. |
168 | | // First we do rotations on 2 words, |
169 | | // then the fixups to make it 4-word rotations, |
170 | | // then on to 8-words, etc. |
171 | | // At each level we compute the masks for the swaps once, and re-use them for each copy |
172 | | // As a further optimization, we merge the 1st and 2nd pass into one to reduce the # read/writes |
173 | | // |
174 | | // We avoid using / and % throughout to avoid any time-dependent instructions. |
175 | | // |
176 | | |
177 | | VOID |
178 | | SYMCRYPT_CALL |
179 | | SymCryptScsRotateBuffer( |
180 | | _Inout_updates_( cbBuffer ) PBYTE pbBuffer, |
181 | | SIZE_T cbBuffer, |
182 | | SIZE_T lshift ) |
183 | 0 | { |
184 | 0 | NATIVE_UINT * pBuf; |
185 | 0 | UINT32 n; |
186 | 0 | UINT32 a; |
187 | 0 | UINT32 b; |
188 | 0 | UINT32 i; |
189 | 0 | UINT32 j; |
190 | 0 | UINT32 blockSize; |
191 | 0 | UINT32 blockSizeLog; |
192 | 0 | UINT32 blockSizeLimit; |
193 | |
|
194 | 0 | NATIVE_UINT V; |
195 | 0 | NATIVE_UINT T; |
196 | 0 | NATIVE_UINT A; |
197 | 0 | NATIVE_UINT B; |
198 | 0 | NATIVE_UINT C; |
199 | 0 | NATIVE_UINT D; |
200 | 0 | NATIVE_UINT M; |
201 | 0 | NATIVE_UINT M0; |
202 | 0 | NATIVE_UINT M1; |
203 | |
|
204 | 0 | NATIVE_UINT Mask[ 16 ]; // Size must be a power of 2 |
205 | |
|
206 | 0 | SYMCRYPT_ASSERT( (cbBuffer & (cbBuffer - 1)) == 0 && cbBuffer >= MIN_BUFFER_SIZE ); |
207 | 0 | SYMCRYPT_ASSERT( lshift < cbBuffer ); |
208 | |
|
209 | 0 | pBuf = (NATIVE_UINT *) pbBuffer; |
210 | 0 | n = (UINT32)cbBuffer / NATIVE_BYTES; |
211 | | |
212 | | // First a rotate left by lshift % NATIVE_BYTES |
213 | | // This is more complex because shifting by NATIVE_BITS is not a defined operation, and behavior is different |
214 | | // on different CPUs. |
215 | | |
216 | | // Compute the shift amounts & mask |
217 | | // M = 0 if lshift % NATIVE_BYTES == 0, -1 otherwise |
218 | 0 | a = 8 * (lshift & (NATIVE_BYTES-1)); // Core shift |
219 | 0 | M = (-(NATIVE_INT)a) >> (NATIVE_BITS - 1); // mask |
220 | 0 | b = (NATIVE_BITS - a) & (UINT32) M; // complementary shift, or 0 if it would be equal to NATIVE_BITS |
221 | |
|
222 | 0 | i = n; |
223 | 0 | V = pBuf[0]; |
224 | 0 | do{ |
225 | | // Loop invariant: i > 0 && v = pBuf[i] from before any changes; |
226 | 0 | i--; |
227 | 0 | T = pBuf[i]; |
228 | 0 | pBuf[i] = T >> a | ((V << b) & M); |
229 | 0 | V = T; |
230 | 0 | } while( i > 0 ); |
231 | | |
232 | | // Now that the rotation is word-aligned, we can start our word rotation |
233 | 0 | lshift >>= NATIVE_BYTES_LOG2; // convert to # words to rotate. |
234 | | |
235 | | // We know we have at least 4 words, so we start with a pass do do 4-word rotations |
236 | 0 | SYMCRYPT_ASSERT( n >= 4 ); |
237 | |
|
238 | 0 | M = -(NATIVE_INT)(lshift & 1); |
239 | 0 | M0 = -(NATIVE_INT)( ((lshift + 0) >> 1) & 1 ); // s + 0 mod 4 >= 2 |
240 | 0 | M1 = -(NATIVE_INT)( ((lshift + 1) >> 1) & 1 ); // s + 1 mod 4 >= 2 |
241 | |
|
242 | 0 | for( i=0; i<n; i+=4 ) |
243 | 0 | { |
244 | 0 | A = pBuf[i]; |
245 | 0 | B = pBuf[i+1]; |
246 | 0 | C = pBuf[i+2]; |
247 | 0 | D = pBuf[i+3]; |
248 | |
|
249 | 0 | T = (A ^ B) & M; |
250 | 0 | A ^= T; |
251 | 0 | B ^= T; |
252 | |
|
253 | 0 | T = (C ^ D) & M; |
254 | 0 | C ^= T; |
255 | 0 | D ^= T; |
256 | |
|
257 | 0 | T = (A ^ C) & M0; |
258 | 0 | A ^= T; |
259 | 0 | C ^= T; |
260 | |
|
261 | 0 | T = (B ^ D) & M1; |
262 | 0 | B ^= T; |
263 | 0 | D ^= T; |
264 | |
|
265 | 0 | pBuf[i ] = A; |
266 | 0 | pBuf[i+1] = B; |
267 | 0 | pBuf[i+2] = C; |
268 | 0 | pBuf[i+3] = D; |
269 | 0 | } |
270 | | |
271 | | // Do the swaps using the mask array |
272 | 0 | blockSize = 4; // size of rotated blocks |
273 | 0 | blockSizeLog = 2; |
274 | | |
275 | | // |
276 | | // Using the mask array is beneficial as long as the array is used twice or more |
277 | | // Each swap loop processes 2 * blockSize of data, so the block size should never |
278 | | // be larger than n/4 |
279 | 0 | blockSizeLimit = SYMCRYPT_MIN( SYMCRYPT_ARRAY_SIZE( Mask ), n/4 ); |
280 | 0 | while( blockSize <= blockSizeLimit ) |
281 | 0 | { |
282 | | // Compute the masks for this level |
283 | 0 | for( i=0; i<blockSize; i++ ) |
284 | 0 | { |
285 | 0 | Mask[i] =-(NATIVE_INT)( ((i + lshift) >> blockSizeLog) & 1); |
286 | 0 | } |
287 | | |
288 | | // Now swap the elements of pairs of blocks according to the masks |
289 | 0 | for( i=0; i < n; i += 2 * blockSize ) |
290 | 0 | { |
291 | 0 | for( j=0; j < blockSize; j++ ) |
292 | 0 | { |
293 | 0 | A = pBuf[ i + j ]; |
294 | 0 | B = pBuf[ i + j + blockSize ]; |
295 | 0 | T = (A ^ B) & Mask[j]; |
296 | 0 | A ^= T; |
297 | 0 | B ^= T; |
298 | 0 | pBuf[ i + j ] = A; |
299 | 0 | pBuf[ i + j + blockSize ] = B; |
300 | 0 | } |
301 | 0 | } |
302 | 0 | blockSize *= 2; |
303 | 0 | blockSizeLog += 1; |
304 | 0 | } |
305 | | |
306 | | // Do the rest without using a mask array, either because we are only |
307 | | // going to use each mask value once, or because we don't have a large-enough |
308 | | // array |
309 | 0 | while( blockSize < n ) |
310 | 0 | { |
311 | | // Now swap the elements of pairs of blocks according to the masks |
312 | 0 | for( i=0; i < n; i += 2 * blockSize ) |
313 | 0 | { |
314 | 0 | for( j=0; j < blockSize; j++ ) |
315 | 0 | { |
316 | 0 | M = -(NATIVE_INT)( ((j + lshift) >> blockSizeLog) & 1); |
317 | 0 | A = pBuf[ i + j ]; |
318 | 0 | B = pBuf[ i + j + blockSize ]; |
319 | 0 | T = (A ^ B) & M; |
320 | 0 | A ^= T; |
321 | 0 | B ^= T; |
322 | 0 | pBuf[ i + j ] = A; |
323 | 0 | pBuf[ i + j + blockSize ] = B; |
324 | 0 | } |
325 | 0 | } |
326 | 0 | blockSize *= 2; |
327 | 0 | blockSizeLog += 1; |
328 | 0 | } |
329 | |
|
330 | 0 | } |
331 | | |
332 | | |
333 | | // |
334 | | // Map values in a side-channel safe way, typically used for mapping error codes. |
335 | | // |
336 | | // (pcMap, nMap) point to an array of nMap entries of type SYMCRYPT_UINT32_MAP; |
337 | | // each entry specifies a single mapping. If u32Input matches the |
338 | | // 'from' field, the return value will be the 'to' field value. |
339 | | // If u32Input is not equal to any 'from' field values, the return value is u32Default. |
340 | | // Both u32Input and the return value are treated as secrets w.r.t. side channels. |
341 | | // |
342 | | // If multiple map entries have the same 'from' field value, then the return value |
343 | | // is one of the several 'to' field values; which one is not defined. |
344 | | // |
345 | | // This function is particularly useful when mapping error codes in situations where |
346 | | // the actual error cannot be revealed through side channels. |
347 | | // |
348 | | |
349 | | UINT32 |
350 | | SYMCRYPT_CALL |
351 | | SymCryptMapUint32( |
352 | | UINT32 u32Input, |
353 | | UINT32 u32Default, |
354 | | _In_reads_(nMap) PCSYMCRYPT_UINT32_MAP pcMap, |
355 | | SIZE_T nMap) |
356 | 0 | { |
357 | 0 | UINT32 mask; |
358 | 0 | UINT32 u32Output = u32Default; |
359 | |
|
360 | 0 | for (SIZE_T i = 0; i < nMap; ++i) |
361 | 0 | { |
362 | 0 | mask = SymCryptMask32EqU32(u32Input, pcMap[i].from); |
363 | 0 | u32Output ^= (u32Output ^ pcMap[i].to) & mask; |
364 | 0 | } |
365 | |
|
366 | 0 | return u32Output; |
367 | 0 | } |