Coverage Report

Created: 2024-11-21 07:03

/src/cryptopp/xts.cpp
Line
Count
Source (jump to first uncovered line)
1
// xts.cpp - written and placed in the public domain by Jeffrey Walton
2
3
// Aarch32, Aarch64, Altivec and X86_64 include SIMD as part of the
4
// base architecture. We can use the SIMD code below without an
5
// architecture option. No runtime tests are required. Unfortunately,
6
// we can't use it on Altivec because an architecture switch is required.
7
// The updated XorBuffer gains 0.3 to 1.5 cpb on the architectures for
8
// 16-byte block sizes.
9
10
#include "pch.h"
11
12
#include "xts.h"
13
#include "misc.h"
14
#include "modes.h"
15
#include "cpu.h"
16
17
#if defined(CRYPTOPP_DEBUG)
18
# include "aes.h"
19
# include "threefish.h"
20
#endif
21
22
// 0.3 to 0.4 cpb profit
23
#if defined(__SSE2__) || defined(_M_X64)
24
# include <emmintrin.h>
25
#endif
26
27
#if defined(__aarch32__) || defined(__aarch64__) || defined(_M_ARM64)
28
# if (CRYPTOPP_ARM_NEON_HEADER) || (CRYPTOPP_ARM_ASIMD_AVAILABLE)
29
#  include <arm_neon.h>
30
# endif
31
#endif
32
33
#if defined(__ALTIVEC__)
34
# include "ppc_simd.h"
35
#endif
36
37
ANONYMOUS_NAMESPACE_BEGIN
38
39
using namespace CryptoPP;
40
41
#if defined(CRYPTOPP_DEBUG) && !defined(CRYPTOPP_DOXYGEN_PROCESSING)
42
43
using CryptoPP::AES;
44
using CryptoPP::XTS_Mode;
45
using CryptoPP::Threefish512;
46
47
void Modes_TestInstantiations()
48
{
49
    XTS_Mode<AES>::Encryption m0;
50
    XTS_Mode<AES>::Decryption m1;
51
    XTS_Mode<AES>::Encryption m2;
52
    XTS_Mode<AES>::Decryption m3;
53
54
#if CRYPTOPP_XTS_WIDE_BLOCK_CIPHERS
55
    XTS_Mode<Threefish512>::Encryption m4;
56
    XTS_Mode<Threefish512>::Decryption m5;
57
#endif
58
}
59
#endif  // CRYPTOPP_DEBUG
60
61
inline void XorBuffer(byte *output, const byte *input, const byte *mask, size_t count)
62
0
{
63
0
    CRYPTOPP_ASSERT(count >= 16 && (count % 16 == 0));
64
65
#if defined(CRYPTOPP_DISABLE_ASM)
66
    xorbuf(output, input, mask, count);
67
68
#elif defined(__SSE2__) || defined(_M_X64)
69
0
    for (size_t i=0; i<count; i+=16)
70
0
        _mm_storeu_si128(M128_CAST(output+i),
71
0
            _mm_xor_si128(
72
0
                _mm_loadu_si128(CONST_M128_CAST(input+i)),
73
0
                _mm_loadu_si128(CONST_M128_CAST(mask+i))));
74
75
#elif defined(__aarch32__) || defined(__aarch64__) || defined(_M_ARM64)
76
    for (size_t i=0; i<count; i+=16)
77
        vst1q_u8(output+i, veorq_u8(vld1q_u8(input+i), vld1q_u8(mask+i)));
78
79
#elif defined(__ALTIVEC__)
80
    for (size_t i=0; i<count; i+=16)
81
        VecStore(VecXor(VecLoad(input+i), VecLoad(mask+i)), output+i);
82
83
#else
84
    xorbuf(output, input, mask, count);
85
#endif
86
0
}
87
88
inline void XorBuffer(byte *buf, const byte *mask, size_t count)
89
0
{
90
0
    XorBuffer(buf, buf, mask, count);
91
0
}
92
93
// Borrowed from CMAC, but little-endian representation
94
inline void GF_Double(byte *out, const byte* in, unsigned int len)
95
0
{
96
0
#if defined(CRYPTOPP_WORD128_AVAILABLE)
97
0
    word128 carry = 0, x;
98
0
    for (size_t i=0, idx=0; i<len/16; ++i, idx+=16)
99
0
    {
100
0
        x = GetWord<word128>(false, LITTLE_ENDIAN_ORDER, in+idx);
101
0
        word128 y = (x >> 127); x = (x << 1) + carry;
102
0
        PutWord<word128>(false, LITTLE_ENDIAN_ORDER, out+idx, x);
103
0
        carry = y;
104
0
    }
105
#elif defined(_M_X64) || defined(_M_ARM64) || defined(_LP64) || defined(__LP64__)
106
    word64 carry = 0, x;
107
    for (size_t i=0, idx=0; i<len/8; ++i, idx+=8)
108
    {
109
        x = GetWord<word64>(false, LITTLE_ENDIAN_ORDER, in+idx);
110
        word64 y = (x >> 63); x = (x << 1) + carry;
111
        PutWord<word64>(false, LITTLE_ENDIAN_ORDER, out+idx, x);
112
        carry = y;
113
    }
114
#else
115
    word32 carry = 0, x;
116
    for (size_t i=0, idx=0; i<len/4; ++i, idx+=4)
117
    {
118
        x = GetWord<word32>(false, LITTLE_ENDIAN_ORDER, in+idx);
119
        word32 y = (x >> 31); x = (x << 1) + carry;
120
        PutWord<word32>(false, LITTLE_ENDIAN_ORDER, out+idx, x);
121
        carry = y;
122
    }
123
#endif
124
125
#if CRYPTOPP_XTS_WIDE_BLOCK_CIPHERS
126
127
    CRYPTOPP_ASSERT(IsPowerOf2(len));
128
    CRYPTOPP_ASSERT(len >= 16);
129
    CRYPTOPP_ASSERT(len <= 128);
130
131
    byte* k = out;
132
    if (carry)
133
    {
134
        switch (len)
135
        {
136
        case 16:
137
        {
138
            const size_t LEIDX = 16-1;
139
            k[LEIDX-15] ^= 0x87;
140
            break;
141
        }
142
        case 32:
143
        {
144
            // https://crypto.stackexchange.com/q/9815/10496
145
            // Polynomial x^256 + x^10 + x^5 + x^2 + 1
146
            const size_t LEIDX = 32-1;
147
            k[LEIDX-30] ^= 4;
148
            k[LEIDX-31] ^= 0x25;
149
            break;
150
        }
151
        case 64:
152
        {
153
            // https://crypto.stackexchange.com/q/9815/10496
154
            // Polynomial x^512 + x^8 + x^5 + x^2 + 1
155
            const size_t LEIDX = 64-1;
156
            k[LEIDX-62] ^= 1;
157
            k[LEIDX-63] ^= 0x25;
158
            break;
159
        }
160
        case 128:
161
        {
162
            // https://crypto.stackexchange.com/q/9815/10496
163
            // Polynomial x^1024 + x^19 + x^6 + x + 1
164
            const size_t LEIDX = 128-1;
165
            k[LEIDX-125] ^= 8;
166
            k[LEIDX-126] ^= 0x00;
167
            k[LEIDX-127] ^= 0x43;
168
            break;
169
        }
170
        default:
171
            CRYPTOPP_ASSERT(0);
172
        }
173
    }
174
#else
175
0
    CRYPTOPP_ASSERT(len == 16);
176
177
0
    byte* k = out;
178
0
    if (carry)
179
0
    {
180
0
        k[0] ^= 0x87;
181
0
        return;
182
0
    }
183
0
#endif  // CRYPTOPP_XTS_WIDE_BLOCK_CIPHERS
184
0
}
185
186
inline void GF_Double(byte *inout, unsigned int len)
187
0
{
188
0
    GF_Double(inout, inout, len);
189
0
}
190
191
ANONYMOUS_NAMESPACE_END
192
193
NAMESPACE_BEGIN(CryptoPP)
194
195
void XTS_ModeBase::ThrowIfInvalidBlockSize(size_t length)
196
0
{
197
#if CRYPTOPP_XTS_WIDE_BLOCK_CIPHERS
198
    CRYPTOPP_ASSERT(length >= 16 && length <= 128 && IsPowerOf2(length));
199
    if (length < 16 || length > 128 || !IsPowerOf2(length))
200
        throw InvalidArgument(AlgorithmName() + ": block size of underlying block cipher is not valid");
201
#else
202
0
    CRYPTOPP_ASSERT(length == 16);
203
0
    if (length != 16)
204
0
        throw InvalidArgument(AlgorithmName() + ": block size of underlying block cipher is not 16");
205
0
#endif
206
0
}
207
208
void XTS_ModeBase::ThrowIfInvalidKeyLength(size_t length)
209
2
{
210
2
    CRYPTOPP_ASSERT(length % 2 == 0);
211
2
    if (!GetBlockCipher().IsValidKeyLength((length+1)/2))
212
2
        throw InvalidKeyLength(AlgorithmName(), length);
213
2
}
214
215
void XTS_ModeBase::SetKey(const byte *key, size_t length, const NameValuePairs &params)
216
2
{
217
2
    ThrowIfInvalidKeyLength(length);
218
2
    ThrowIfInvalidBlockSize(BlockSize());
219
220
2
    const size_t klen = length/2;
221
2
    AccessBlockCipher().SetKey(key+0, klen, params);
222
2
    AccessTweakCipher().SetKey(key+klen, klen, params);
223
224
2
    ResizeBuffers();
225
226
2
    size_t ivLength;
227
2
    const byte *iv = GetIVAndThrowIfInvalid(params, ivLength);
228
2
    Resynchronize(iv, (int)ivLength);
229
2
}
230
231
void XTS_ModeBase::Resynchronize(const byte *iv, int ivLength)
232
0
{
233
0
    BlockOrientedCipherModeBase::Resynchronize(iv, ivLength);
234
0
    std::memcpy(m_xregister, m_register, ivLength);
235
0
    GetTweakCipher().ProcessBlock(m_xregister);
236
0
}
237
238
void XTS_ModeBase::Resynchronize(word64 sector, ByteOrder order)
239
0
{
240
0
    SecByteBlock iv(GetTweakCipher().BlockSize());
241
0
    PutWord<word64>(false, order, iv, sector);
242
0
    std::memset(iv+8, 0x00, iv.size()-8);
243
244
0
    BlockOrientedCipherModeBase::Resynchronize(iv, (int)iv.size());
245
0
    std::memcpy(m_xregister, iv, iv.size());
246
0
    GetTweakCipher().ProcessBlock(m_xregister);
247
0
}
248
249
void XTS_ModeBase::ResizeBuffers()
250
6
{
251
6
    BlockOrientedCipherModeBase::ResizeBuffers();
252
6
    m_xworkspace.New(GetBlockCipher().BlockSize()*ParallelBlocks);
253
6
    m_xregister.New(GetBlockCipher().BlockSize()*ParallelBlocks);
254
6
}
255
256
// ProcessData runs either 12-4-1 blocks, 8-2-1 or 4-1 blocks. Which is
257
// selected depends on ParallelBlocks in the header file. 12-4-1 or 8-2-1
258
// can be used on Aarch64 and PowerPC. Intel should use 4-1 due to lack
259
// of registers. The unneeded code paths should be removed by optimizer.
260
// The extra gyrations save us 1.8 cpb on Aarch64 and 2.1 cpb on PowerPC.
261
void XTS_ModeBase::ProcessData(byte *outString, const byte *inString, size_t length)
262
0
{
263
    // data unit is multiple of 16 bytes
264
0
    CRYPTOPP_ASSERT(length % BlockSize() == 0);
265
266
0
    enum { lastParallelBlock = ParallelBlocks-1 };
267
0
    const unsigned int blockSize = GetBlockCipher().BlockSize();
268
0
    const size_t parallelSize = blockSize*ParallelBlocks;
269
270
    // encrypt the data unit, optimal size at a time
271
0
    while (length >= parallelSize)
272
0
    {
273
        // m_xregister[0] always points to the next tweak.
274
0
        GF_Double(m_xregister+1*blockSize, m_xregister+0*blockSize, blockSize);
275
0
        GF_Double(m_xregister+2*blockSize, m_xregister+1*blockSize, blockSize);
276
0
        GF_Double(m_xregister+3*blockSize, m_xregister+2*blockSize, blockSize);
277
278
0
        if (ParallelBlocks > 4)
279
0
        {
280
0
            GF_Double(m_xregister+4*blockSize, m_xregister+3*blockSize, blockSize);
281
0
            GF_Double(m_xregister+5*blockSize, m_xregister+4*blockSize, blockSize);
282
0
            GF_Double(m_xregister+6*blockSize, m_xregister+5*blockSize, blockSize);
283
0
            GF_Double(m_xregister+7*blockSize, m_xregister+6*blockSize, blockSize);
284
0
        }
285
0
        if (ParallelBlocks > 8)
286
0
        {
287
0
            GF_Double(m_xregister+8*blockSize, m_xregister+7*blockSize, blockSize);
288
0
            GF_Double(m_xregister+9*blockSize, m_xregister+8*blockSize, blockSize);
289
0
            GF_Double(m_xregister+10*blockSize, m_xregister+9*blockSize, blockSize);
290
0
            GF_Double(m_xregister+11*blockSize, m_xregister+10*blockSize, blockSize);
291
0
        }
292
293
        // merge the tweak into the input block
294
0
        XorBuffer(m_xworkspace, inString, m_xregister, parallelSize);
295
296
        // encrypt one block, merge the tweak into the output block
297
0
        GetBlockCipher().AdvancedProcessBlocks(m_xworkspace, m_xregister,
298
0
            outString, parallelSize, BlockTransformation::BT_AllowParallel);
299
300
        // m_xregister[0] always points to the next tweak.
301
0
        GF_Double(m_xregister+0, m_xregister+lastParallelBlock*blockSize, blockSize);
302
303
0
        inString += parallelSize;
304
0
        outString += parallelSize;
305
0
        length -= parallelSize;
306
0
    }
307
308
    // encrypt the data unit, 4 blocks at a time
309
0
    while (ParallelBlocks == 12 && length >= blockSize*4)
310
0
    {
311
        // m_xregister[0] always points to the next tweak.
312
0
        GF_Double(m_xregister+1*blockSize, m_xregister+0*blockSize, blockSize);
313
0
        GF_Double(m_xregister+2*blockSize, m_xregister+1*blockSize, blockSize);
314
0
        GF_Double(m_xregister+3*blockSize, m_xregister+2*blockSize, blockSize);
315
316
        // merge the tweak into the input block
317
0
        XorBuffer(m_xworkspace, inString, m_xregister, blockSize*4);
318
319
        // encrypt one block, merge the tweak into the output block
320
0
        GetBlockCipher().AdvancedProcessBlocks(m_xworkspace, m_xregister,
321
0
            outString, blockSize*4, BlockTransformation::BT_AllowParallel);
322
323
        // m_xregister[0] always points to the next tweak.
324
0
        GF_Double(m_xregister+0, m_xregister+3*blockSize, blockSize);
325
326
0
        inString += blockSize*4;
327
0
        outString += blockSize*4;
328
0
        length -= blockSize*4;
329
0
    }
330
331
    // encrypt the data unit, 2 blocks at a time
332
0
    while (ParallelBlocks == 8 && length >= blockSize*2)
333
0
    {
334
        // m_xregister[0] always points to the next tweak.
335
0
        GF_Double(m_xregister+1*blockSize, m_xregister+0*blockSize, blockSize);
336
337
        // merge the tweak into the input block
338
0
        XorBuffer(m_xworkspace, inString, m_xregister, blockSize*2);
339
340
        // encrypt one block, merge the tweak into the output block
341
0
        GetBlockCipher().AdvancedProcessBlocks(m_xworkspace, m_xregister,
342
0
            outString, blockSize*2, BlockTransformation::BT_AllowParallel);
343
344
        // m_xregister[0] always points to the next tweak.
345
0
        GF_Double(m_xregister+0, m_xregister+1*blockSize, blockSize);
346
347
0
        inString += blockSize*2;
348
0
        outString += blockSize*2;
349
0
        length -= blockSize*2;
350
0
    }
351
352
    // encrypt the data unit, blocksize at a time
353
0
    while (length)
354
0
    {
355
        // merge the tweak into the input block
356
0
        XorBuffer(m_xworkspace, inString, m_xregister, blockSize);
357
358
        // encrypt one block
359
0
        GetBlockCipher().ProcessBlock(m_xworkspace);
360
361
        // merge the tweak into the output block
362
0
        XorBuffer(outString, m_xworkspace, m_xregister, blockSize);
363
364
        // Multiply T by alpha
365
0
        GF_Double(m_xregister, blockSize);
366
367
0
        inString += blockSize;
368
0
        outString += blockSize;
369
0
        length -= blockSize;
370
0
    }
371
0
}
372
373
size_t XTS_ModeBase::ProcessLastBlock(byte *outString, size_t outLength, const byte *inString, size_t inLength)
374
0
{
375
    // need at least a full AES block
376
0
    CRYPTOPP_ASSERT(inLength >= BlockSize());
377
378
0
    if (inLength < BlockSize())
379
0
        throw InvalidArgument("XTS: message is too short for ciphertext stealing");
380
381
0
    if (IsForwardTransformation())
382
0
        return ProcessLastPlainBlock(outString, outLength, inString, inLength);
383
0
    else
384
0
        return ProcessLastCipherBlock(outString, outLength, inString, inLength);
385
0
}
386
387
size_t XTS_ModeBase::ProcessLastPlainBlock(byte *outString, size_t outLength, const byte *inString, size_t inLength)
388
0
{
389
    // ensure output buffer is large enough
390
0
    CRYPTOPP_ASSERT(outLength >= inLength);
391
392
0
    const unsigned int blockSize = GetBlockCipher().BlockSize();
393
0
    const size_t blocks = inLength / blockSize;
394
0
    const size_t tail = inLength % blockSize;
395
0
    outLength = inLength;
396
397
0
    if (tail == 0)
398
0
    {
399
        // Allow ProcessData to handle all the full blocks
400
0
        ProcessData(outString, inString, inLength);
401
0
        return inLength;
402
0
    }
403
0
    else if (blocks > 1)
404
0
    {
405
        // Allow ProcessData to handle full blocks except one
406
0
        const size_t head = (blocks-1)*blockSize;
407
0
        ProcessData(outString, inString, inLength-head);
408
409
0
        outString += head;
410
0
        inString  += head; inLength -= head;
411
0
    }
412
413
    ///// handle the full block /////
414
415
    // merge the tweak into the input block
416
0
    XorBuffer(m_xworkspace, inString, m_xregister, blockSize);
417
418
    // encrypt one block
419
0
    GetBlockCipher().ProcessBlock(m_xworkspace);
420
421
    // merge the tweak into the output block
422
0
    XorBuffer(outString, m_xworkspace, m_xregister, blockSize);
423
424
    // Multiply T by alpha
425
0
    GF_Double(m_xregister, blockSize);
426
427
    ///// handle final partial block /////
428
429
0
    inString += blockSize;
430
0
    outString += blockSize;
431
0
    const size_t len = inLength-blockSize;
432
433
    // copy in the final plaintext bytes
434
0
    std::memcpy(m_xworkspace, inString, len);
435
    // and copy out the final ciphertext bytes
436
0
    std::memcpy(outString, outString-blockSize, len);
437
    // "steal" ciphertext to complete the block
438
0
    std::memcpy(m_xworkspace+len, outString-blockSize+len, blockSize-len);
439
440
    // merge the tweak into the input block
441
0
    XorBuffer(m_xworkspace, m_xregister, blockSize);
442
443
    // encrypt one block
444
0
    GetBlockCipher().ProcessBlock(m_xworkspace);
445
446
    // merge the tweak into the previous output block
447
0
    XorBuffer(outString-blockSize, m_xworkspace, m_xregister, blockSize);
448
449
0
    return outLength;
450
0
}
451
452
size_t XTS_ModeBase::ProcessLastCipherBlock(byte *outString, size_t outLength, const byte *inString, size_t inLength)
453
0
{
454
    // ensure output buffer is large enough
455
0
    CRYPTOPP_ASSERT(outLength >= inLength);
456
457
0
    const unsigned int blockSize = GetBlockCipher().BlockSize();
458
0
    const size_t blocks = inLength / blockSize;
459
0
    const size_t tail = inLength % blockSize;
460
0
    outLength = inLength;
461
462
0
    if (tail == 0)
463
0
    {
464
        // Allow ProcessData to handle all the full blocks
465
0
        ProcessData(outString, inString, inLength);
466
0
        return inLength;
467
0
    }
468
0
    else if (blocks > 1)
469
0
    {
470
        // Allow ProcessData to handle full blocks except one
471
0
        const size_t head = (blocks-1)*blockSize;
472
0
        ProcessData(outString, inString, inLength-head);
473
474
0
        outString += head;
475
0
        inString  += head; inLength -= head;
476
0
    }
477
478
0
    #define poly1 (m_xregister+0*blockSize)
479
0
    #define poly2 (m_xregister+1*blockSize)
480
0
    GF_Double(poly2, poly1, blockSize);
481
482
    ///// handle final partial block /////
483
484
0
    inString += blockSize;
485
0
    outString += blockSize;
486
0
    const size_t len = inLength-blockSize;
487
488
    // merge the tweak into the input block
489
0
    XorBuffer(m_xworkspace, inString-blockSize, poly2, blockSize);
490
491
    // encrypt one block
492
0
    GetBlockCipher().ProcessBlock(m_xworkspace);
493
494
    // merge the tweak into the output block
495
0
    XorBuffer(m_xworkspace, poly2, blockSize);
496
497
    // copy in the final plaintext bytes
498
0
    std::memcpy(outString-blockSize, inString, len);
499
    // and copy out the final ciphertext bytes
500
0
    std::memcpy(outString, m_xworkspace, len);
501
    // "steal" ciphertext to complete the block
502
0
    std::memcpy(outString-blockSize+len, m_xworkspace+len, blockSize-len);
503
504
    ///// handle the full previous block /////
505
506
0
    inString -= blockSize;
507
0
    outString -= blockSize;
508
509
    // merge the tweak into the input block
510
0
    XorBuffer(m_xworkspace, outString, poly1, blockSize);
511
512
    // encrypt one block
513
0
    GetBlockCipher().ProcessBlock(m_xworkspace);
514
515
    // merge the tweak into the output block
516
0
    XorBuffer(outString, m_xworkspace, poly1, blockSize);
517
518
0
    return outLength;
519
0
}
520
521
NAMESPACE_END