Coverage Report

Created: 2024-11-21 07:03

/src/SymCrypt/lib/scsTools.c
Line
Count
Source (jump to first uncovered line)
1
//
2
// scsTools.c   Support tools for writing side-channel safe code
3
//
4
// Copyright (c) Microsoft Corporation. Licensed under the MIT license.
5
//
6
7
#include "precomp.h"
8
9
//
10
// This code needs to process data in words, and we'd like to use 32-bit words on 32-bit
11
// architectures and 64-bit words on 64-bit architectures. So we use NATIVE_UINT & friends.
12
//
13
14
// Buffer limits for SymCryptScsRotateBuffer
15
#define MIN_BUFFER_SIZE (32)
16
17
//
18
// Masking functions
19
// Masking functions can be more efficient if the inputs are restricted to values that can
20
// be represented in the signed data types.
21
// This is why we have some functions that take 31-bit inputs.
22
//
23
24
// 31-bit inputs
25
26
UINT32
27
SYMCRYPT_CALL
28
SymCryptMask32IsNonzeroU31( UINT32 v )
29
0
{
30
0
    SYMCRYPT_ASSERT( v < (1UL<<31) );
31
0
    return (-(INT32) v) >> 31;
32
0
}
33
34
UINT32
35
SYMCRYPT_CALL
36
SymCryptMask32IsZeroU31( UINT32 v )
37
0
{
38
0
    return ~SymCryptMask32IsNonzeroU31( v );
39
0
}
40
41
UINT32
42
SYMCRYPT_CALL
43
SymCryptMask32NeqU31( UINT32 a, UINT32 b )
44
0
{
45
0
    SYMCRYPT_ASSERT( a < (1UL<<31) );
46
0
    SYMCRYPT_ASSERT( b < (1UL<<31) );
47
48
0
    return SymCryptMask32IsNonzeroU31( a ^ b );
49
0
}
50
51
UINT32
52
SYMCRYPT_CALL
53
SymCryptMask32LtU31( UINT32 a, UINT32 b )
54
0
{
55
0
    SYMCRYPT_ASSERT( a < (1UL<<31) );
56
0
    SYMCRYPT_ASSERT( b < (1UL<<31) );
57
58
    // Casting to INT32 is defined as a and b are < 2^31
59
0
    return ((INT32) a - (INT32) b) >> 31;
60
0
}
61
62
63
// 32-bit inputs
64
65
UINT32
66
SYMCRYPT_CALL
67
SymCryptMask32EqU32( UINT32 a, UINT32 b )
68
0
{
69
0
    return ~(UINT32) ( (-(INT64)(a^b)) >> 32);
70
0
}
71
72
73
// Other helper functions
74
SIZE_T
75
SYMCRYPT_CALL
76
SymCryptRoundUpPow2Sizet( SIZE_T v )
77
0
{
78
0
    SIZE_T res;
79
80
0
    SYMCRYPT_ASSERT( v <= (SIZE_T_MAX / 2) + 1);
81
    // If v is very large, then the result res might overflow.
82
    // As SIZE_T is an unsigned type, the overflow is defined to
83
    // be modulo 2^n for some n, and therefore we'll get res==0
84
    // which will terminate the loop.
85
86
0
    res = 1;
87
0
    while( res < v )
88
0
    {
89
0
        res += res;
90
91
        // Catch any overflows; should never happen but break to avoid infinite loop
92
0
        if( res == 0 )
93
0
        {
94
0
            break;
95
0
        }
96
0
    }
97
98
0
    return res;
99
0
}
100
101
102
//
103
// Copy data
104
//
105
106
VOID
107
SYMCRYPT_CALL
108
SymCryptScsCopy(
109
    _In_reads_( cbDst )     PCBYTE  pbSrc,
110
                            SIZE_T  cbSrc,
111
    _Out_writes_( cbDst )   PBYTE   pbDst,
112
                            SIZE_T  cbDst )
113
// Copy cbSrc bytes of pbSrc into pbDst without revealing cbSrc
114
// through side channels.
115
// - pbSrc/cbSrc: buffer to copy data from
116
// - pbDst/cbDst: buffer that receives the data
117
// Equivalent to:
118
//      n = min( cbSrc, cbDst )
119
//      pbDst[ 0.. n-1 ] = pbSrc[ 0 .. n - 1 ]
120
// cbSrc is protected from side-channels; cbDst is public.
121
// Note that pbSrc must be cbDst bytes long, not cbSrc bytes.
122
0
{
123
0
    UINT32 i;
124
125
0
    SYMCRYPT_ASSERT( cbSrc <= (1UL << 31) && cbDst <= (1UL << 31) );
126
127
    // Loop over the destination buffer and update each byte with the source data (if appropriate)
128
    // We round-robin loop over the source buffer
129
0
    for( i = 0; i < cbDst; i++ )
130
0
    {
131
0
        pbDst[ i ] ^= (pbSrc[ i ] ^ pbDst[ i ]) & SymCryptMask32LtU31( i, (UINT32) cbSrc );
132
0
    }
133
0
}
134
135
136
//
137
// Buffer rotation
138
// To recover a message from an encoding with variable data position we have to do a copy from a
139
// variable memory location. But our memory access pattern cannot depend on the secret location.
140
// This code rotates a given buffer by a variable # bytes without revealing the shift amount.
141
//
142
// For efficiency we do this using NATIVE_UINT values so that we get the best performance on each platform.
143
//
144
// The first step is to rotate the array between 0 and NATIVE_BYTES-1 bytes to get the proper word alignment.
145
// After that we only have to rotate the words.
146
// We do this using a sequence of swaps.
147
// Notation:
148
//  W[i]    array of words, 0 <= i < n where n is the # words, a power of 2.
149
//  s       Rotation amount (to the left). The value in W[s] at the start should appear in W[0] at the end.
150
//
151
// We use masked swaps as they seem to be more efficient then masked multiplexers.
152
// We can split this problem down recursively
153
//
154
// Function Rotate( W, n, s)
155
//  - Rotate W[0..n/2-1] by s mod n/2
156
//  - Rotate W[n/2..n-1] by s mod n/2
157
// for i in 0..n/2-1:
158
//      swap W[i] and W[i+n/2] if (i+s) % n >= n/2
159
//
160
// After the two half-sized rotates, each word is in the right position modulo n/2, so all that needs to be
161
// done in possibly swap (W[i],W[i+n/2]) pairs.
162
// Let W' be the array after the half-sized rotates. We have
163
// W'[i] = W[ (i + s) % (n/2) ] for i in 0..n/2
164
// In the final array W'' we should have W''[i] = W[ (i+s)%n ]
165
// So W''[i] = W'[i] when (i+s) % n = (i+s) % n/2 which is equivalent to (i+s)%n < n/2.
166
//
167
// We turn this into a non-recursive algorithm.
168
// First we do rotations on 2 words,
169
// then the fixups to make it 4-word rotations,
170
// then on to 8-words, etc.
171
// At each level we compute the masks for the swaps once, and re-use them for each copy
172
// As a further optimization, we merge the 1st and 2nd pass into one to reduce the # read/writes
173
//
174
// We avoid using / and % throughout to avoid any time-dependent instructions.
175
//
176
177
VOID
178
SYMCRYPT_CALL
179
SymCryptScsRotateBuffer(
180
    _Inout_updates_( cbBuffer ) PBYTE   pbBuffer,
181
                                SIZE_T  cbBuffer,
182
                                SIZE_T  lshift )
183
0
{
184
0
    NATIVE_UINT * pBuf;
185
0
    UINT32 n;
186
0
    UINT32 a;
187
0
    UINT32 b;
188
0
    UINT32 i;
189
0
    UINT32 j;
190
0
    UINT32 blockSize;
191
0
    UINT32 blockSizeLog;
192
0
    UINT32 blockSizeLimit;
193
194
0
    NATIVE_UINT V;
195
0
    NATIVE_UINT T;
196
0
    NATIVE_UINT A;
197
0
    NATIVE_UINT B;
198
0
    NATIVE_UINT C;
199
0
    NATIVE_UINT D;
200
0
    NATIVE_UINT M;
201
0
    NATIVE_UINT M0;
202
0
    NATIVE_UINT M1;
203
204
0
    NATIVE_UINT Mask[ 16 ];      // Size must be a power of 2
205
206
0
    SYMCRYPT_ASSERT( (cbBuffer & (cbBuffer - 1)) == 0 && cbBuffer >= MIN_BUFFER_SIZE );
207
0
    SYMCRYPT_ASSERT( lshift < cbBuffer );
208
209
0
    pBuf = (NATIVE_UINT *) pbBuffer;
210
0
    n = (UINT32)cbBuffer / NATIVE_BYTES;
211
212
    // First a rotate left by lshift % NATIVE_BYTES
213
    // This is more complex because shifting by NATIVE_BITS is not a defined operation, and behavior is different
214
    // on different CPUs.
215
216
    // Compute the shift amounts & mask
217
    // M = 0 if lshift % NATIVE_BYTES == 0, -1 otherwise
218
0
    a = 8 * (lshift & (NATIVE_BYTES-1));            // Core shift
219
0
    M = (-(NATIVE_INT)a) >> (NATIVE_BITS - 1);      // mask
220
0
    b = (NATIVE_BITS - a) & (UINT32) M;         // complementary shift, or 0 if it would be equal to NATIVE_BITS
221
222
0
    i = n;
223
0
    V = pBuf[0];
224
0
    do{
225
        // Loop invariant: i > 0 && v = pBuf[i] from before any changes;
226
0
        i--;
227
0
        T = pBuf[i];
228
0
        pBuf[i] = T >> a | ((V << b) & M);
229
0
        V = T;
230
0
    } while( i > 0 );
231
232
    // Now that the rotation is word-aligned, we can start our word rotation
233
0
    lshift >>= NATIVE_BYTES_LOG2;           // convert to # words to rotate.
234
235
    // We know we have at least 4 words, so we start with a pass do do 4-word rotations
236
0
    SYMCRYPT_ASSERT( n >= 4 );
237
238
0
    M = -(NATIVE_INT)(lshift & 1);
239
0
    M0 = -(NATIVE_INT)( ((lshift + 0) >> 1) & 1 );      // s + 0 mod 4 >= 2
240
0
    M1 = -(NATIVE_INT)( ((lshift + 1) >> 1) & 1 );      // s + 1 mod 4 >= 2
241
242
0
    for( i=0; i<n; i+=4 )
243
0
    {
244
0
        A = pBuf[i];
245
0
        B = pBuf[i+1];
246
0
        C = pBuf[i+2];
247
0
        D = pBuf[i+3];
248
249
0
        T = (A ^ B) & M;
250
0
        A ^= T;
251
0
        B ^= T;
252
253
0
        T = (C ^ D) & M;
254
0
        C ^= T;
255
0
        D ^= T;
256
257
0
        T = (A ^ C) & M0;
258
0
        A ^= T;
259
0
        C ^= T;
260
261
0
        T = (B ^ D) & M1;
262
0
        B ^= T;
263
0
        D ^= T;
264
265
0
        pBuf[i  ] = A;
266
0
        pBuf[i+1] = B;
267
0
        pBuf[i+2] = C;
268
0
        pBuf[i+3] = D;
269
0
    }
270
271
    // Do the swaps using the mask array
272
0
    blockSize = 4;  // size of rotated blocks
273
0
    blockSizeLog = 2;
274
275
    //
276
    // Using the mask array is beneficial as long as the array is used twice or more
277
    // Each swap loop processes 2 * blockSize of data, so the block size should never
278
    // be larger than n/4
279
0
    blockSizeLimit = SYMCRYPT_MIN( SYMCRYPT_ARRAY_SIZE( Mask ), n/4 );
280
0
    while( blockSize <= blockSizeLimit )
281
0
    {
282
        // Compute the masks for this level
283
0
        for( i=0; i<blockSize; i++ )
284
0
        {
285
0
            Mask[i] =-(NATIVE_INT)( ((i + lshift) >> blockSizeLog) & 1);
286
0
        }
287
288
        // Now swap the elements of pairs of blocks according to the masks
289
0
        for( i=0; i < n; i += 2 * blockSize )
290
0
        {
291
0
            for( j=0; j < blockSize; j++ )
292
0
            {
293
0
                A = pBuf[ i + j ];
294
0
                B = pBuf[ i + j + blockSize ];
295
0
                T = (A ^ B) & Mask[j];
296
0
                A ^= T;
297
0
                B ^= T;
298
0
                pBuf[ i + j ] = A;
299
0
                pBuf[ i + j + blockSize ] = B;
300
0
            }
301
0
        }
302
0
        blockSize *= 2;
303
0
        blockSizeLog += 1;
304
0
    }
305
306
    // Do the rest without using a mask array, either because we are only
307
    // going to use each mask value once, or because we don't have a large-enough
308
    // array
309
0
    while( blockSize < n  )
310
0
    {
311
        // Now swap the elements of pairs of blocks according to the masks
312
0
        for( i=0; i < n; i += 2 * blockSize )
313
0
        {
314
0
            for( j=0; j < blockSize; j++ )
315
0
            {
316
0
                M = -(NATIVE_INT)( ((j + lshift) >> blockSizeLog) & 1);
317
0
                A = pBuf[ i + j ];
318
0
                B = pBuf[ i + j + blockSize ];
319
0
                T = (A ^ B) & M;
320
0
                A ^= T;
321
0
                B ^= T;
322
0
                pBuf[ i + j ] = A;
323
0
                pBuf[ i + j + blockSize ] = B;
324
0
            }
325
0
        }
326
0
        blockSize *= 2;
327
0
        blockSizeLog += 1;
328
0
    }
329
330
0
}
331
332
333
//
334
// Map values in a side-channel safe way, typically used for mapping error codes.
335
//
336
// (pcMap, nMap) point to an array of nMap entries of type SYMCRYPT_UINT32_MAP;
337
// each entry specifies a single mapping. If u32Input matches the
338
// 'from' field, the return value will be the 'to' field value.
339
// If u32Input is not equal to any 'from' field values, the return value is u32Default.
340
// Both u32Input and the return value are treated as secrets w.r.t. side channels.
341
//
342
// If multiple map entries have the same 'from' field value, then the return value
343
// is one of the several 'to' field values; which one is not defined.
344
//
345
// This function is particularly useful when mapping error codes in situations where
346
// the actual error cannot be revealed through side channels.
347
//
348
349
UINT32
350
SYMCRYPT_CALL
351
SymCryptMapUint32(
352
                        UINT32                  u32Input,
353
                        UINT32                  u32Default,
354
    _In_reads_(nMap)    PCSYMCRYPT_UINT32_MAP   pcMap,
355
                        SIZE_T                  nMap)
356
0
{
357
0
    UINT32 mask;
358
0
    UINT32 u32Output    = u32Default;
359
360
0
    for (SIZE_T i = 0; i < nMap; ++i)
361
0
    {
362
0
        mask = SymCryptMask32EqU32(u32Input, pcMap[i].from);
363
0
        u32Output ^= (u32Output ^ pcMap[i].to) & mask;
364
0
    }
365
366
0
    return u32Output;
367
0
}