Coverage Report

Created: 2024-11-21 07:03

/src/cryptopp/kalyna.cpp
Line
Count
Source (jump to first uncovered line)
1
// kalyna.cpp - written and placed in the public domain by Jeffrey Walton
2
//              This code relied upon three sources. First was Oliynykov, Gorbenko, Kazymyrov, Ruzhentsev,
3
//              Kuznetsov, Gorbenko, Dyrda, Dolgov, Pushkaryov, Mordvinov and Kaidalov's "A New Encryption
4
//              Standard of Ukraine: The Kalyna Block Cipher" (http://eprint.iacr.org/2015/650.pdf). Second
5
//              was Roman Oliynykov and Oleksandr Kazymyrov's GitHub with the reference implementation
6
//              (http://github.com/Roman-Oliynykov/Kalyna-reference). The third and most utilized resource
7
//              was Keru Kuro's public domain implementation of Kalyna in CppCrypto
8
//              (http://sourceforge.net/projects/cppcrypto/). Kuro has an outstanding implementation that
9
//              performed better than the reference implementation and our initial attempts. The only downside
10
//              was the missing big endian port.
11
12
#include "pch.h"
13
#include "config.h"
14
15
#include "kalyna.h"
16
#include "argnames.h"
17
#include "misc.h"
18
#include "cpu.h"
19
20
NAMESPACE_BEGIN(CryptoPP)
21
NAMESPACE_BEGIN(KalynaTab)
22
23
// T can be shared between Kupyna and Kalyna; IT, S and IS are Kalyna specific
24
extern const word64 T[8][256];  // Columns
25
extern const word64 IT[8][256]; // Inverse
26
extern const byte S[4][256];    // Substitution
27
extern const byte IS[4][256];   // Inverse
28
29
NAMESPACE_END
30
NAMESPACE_END
31
32
ANONYMOUS_NAMESPACE_BEGIN
33
34
// The typedef here is to sidestep problems with byte in the global namespace
35
typedef unsigned char byte;
36
37
using CryptoPP::word64;
38
using CryptoPP::KalynaTab::T;
39
using CryptoPP::KalynaTab::S;
40
using CryptoPP::KalynaTab::IT;
41
using CryptoPP::KalynaTab::IS;
42
43
template <unsigned int NB>
44
inline void MakeOddKey(const word64 evenkey[NB], word64 oddkey[NB])
45
49
{
46
#if (CRYPTOPP_BIG_ENDIAN)
47
    if (NB == 2)
48
    {
49
        oddkey[0] = (evenkey[1] << 8) | (evenkey[0] >> 56);
50
        oddkey[1] = (evenkey[0] << 8) | (evenkey[1] >> 56);
51
    }
52
    else if (NB == 4)
53
    {
54
        oddkey[0] = (evenkey[2] << 40) | (evenkey[1] >> 24);
55
        oddkey[1] = (evenkey[3] << 40) | (evenkey[2] >> 24);
56
        oddkey[2] = (evenkey[0] << 40) | (evenkey[3] >> 24);
57
        oddkey[3] = (evenkey[1] << 40) | (evenkey[0] >> 24);
58
    }
59
    else if (NB == 8)
60
    {
61
        oddkey[0] = (evenkey[3] << 40) | (evenkey[2] >> 24);
62
        oddkey[1] = (evenkey[4] << 40) | (evenkey[3] >> 24);
63
        oddkey[2] = (evenkey[5] << 40) | (evenkey[4] >> 24);
64
        oddkey[3] = (evenkey[6] << 40) | (evenkey[5] >> 24);
65
66
        oddkey[4] = (evenkey[7] << 40) | (evenkey[6] >> 24);
67
        oddkey[5] = (evenkey[0] << 40) | (evenkey[7] >> 24);
68
        oddkey[6] = (evenkey[1] << 40) | (evenkey[0] >> 24);
69
        oddkey[7] = (evenkey[2] << 40) | (evenkey[1] >> 24);
70
    }
71
    else
72
    {
73
        CRYPTOPP_ASSERT(0);
74
    }
75
#else
76
49
    static const unsigned int U = (NB == 2) ? 16 : (NB == 4) ? 32 : (NB == 8) ? 64 : -1;
77
49
    static const unsigned int V = (NB == 2) ?  7 : (NB == 4) ? 11 : (NB == 8) ? 19 : -1;
78
79
49
    const byte* even = reinterpret_cast<const byte*>(evenkey);
80
49
    byte* odd = reinterpret_cast<byte*>(oddkey);
81
82
49
    std::memcpy(odd, even + V, U - V);
83
49
    std::memcpy(odd + U - V, even, V);
84
49
#endif
85
49
}
kalyna.cpp:void (anonymous namespace)::MakeOddKey<2u>(unsigned long const*, unsigned long*)
Line
Count
Source
45
15
{
46
#if (CRYPTOPP_BIG_ENDIAN)
47
    if (NB == 2)
48
    {
49
        oddkey[0] = (evenkey[1] << 8) | (evenkey[0] >> 56);
50
        oddkey[1] = (evenkey[0] << 8) | (evenkey[1] >> 56);
51
    }
52
    else if (NB == 4)
53
    {
54
        oddkey[0] = (evenkey[2] << 40) | (evenkey[1] >> 24);
55
        oddkey[1] = (evenkey[3] << 40) | (evenkey[2] >> 24);
56
        oddkey[2] = (evenkey[0] << 40) | (evenkey[3] >> 24);
57
        oddkey[3] = (evenkey[1] << 40) | (evenkey[0] >> 24);
58
    }
59
    else if (NB == 8)
60
    {
61
        oddkey[0] = (evenkey[3] << 40) | (evenkey[2] >> 24);
62
        oddkey[1] = (evenkey[4] << 40) | (evenkey[3] >> 24);
63
        oddkey[2] = (evenkey[5] << 40) | (evenkey[4] >> 24);
64
        oddkey[3] = (evenkey[6] << 40) | (evenkey[5] >> 24);
65
66
        oddkey[4] = (evenkey[7] << 40) | (evenkey[6] >> 24);
67
        oddkey[5] = (evenkey[0] << 40) | (evenkey[7] >> 24);
68
        oddkey[6] = (evenkey[1] << 40) | (evenkey[0] >> 24);
69
        oddkey[7] = (evenkey[2] << 40) | (evenkey[1] >> 24);
70
    }
71
    else
72
    {
73
        CRYPTOPP_ASSERT(0);
74
    }
75
#else
76
15
    static const unsigned int U = (NB == 2) ? 16 : (NB == 4) ? 32 : (NB == 8) ? 64 : -1;
77
15
    static const unsigned int V = (NB == 2) ?  7 : (NB == 4) ? 11 : (NB == 8) ? 19 : -1;
78
79
15
    const byte* even = reinterpret_cast<const byte*>(evenkey);
80
15
    byte* odd = reinterpret_cast<byte*>(oddkey);
81
82
15
    std::memcpy(odd, even + V, U - V);
83
15
    std::memcpy(odd + U - V, even, V);
84
15
#endif
85
15
}
kalyna.cpp:void (anonymous namespace)::MakeOddKey<4u>(unsigned long const*, unsigned long*)
Line
Count
Source
45
7
{
46
#if (CRYPTOPP_BIG_ENDIAN)
47
    if (NB == 2)
48
    {
49
        oddkey[0] = (evenkey[1] << 8) | (evenkey[0] >> 56);
50
        oddkey[1] = (evenkey[0] << 8) | (evenkey[1] >> 56);
51
    }
52
    else if (NB == 4)
53
    {
54
        oddkey[0] = (evenkey[2] << 40) | (evenkey[1] >> 24);
55
        oddkey[1] = (evenkey[3] << 40) | (evenkey[2] >> 24);
56
        oddkey[2] = (evenkey[0] << 40) | (evenkey[3] >> 24);
57
        oddkey[3] = (evenkey[1] << 40) | (evenkey[0] >> 24);
58
    }
59
    else if (NB == 8)
60
    {
61
        oddkey[0] = (evenkey[3] << 40) | (evenkey[2] >> 24);
62
        oddkey[1] = (evenkey[4] << 40) | (evenkey[3] >> 24);
63
        oddkey[2] = (evenkey[5] << 40) | (evenkey[4] >> 24);
64
        oddkey[3] = (evenkey[6] << 40) | (evenkey[5] >> 24);
65
66
        oddkey[4] = (evenkey[7] << 40) | (evenkey[6] >> 24);
67
        oddkey[5] = (evenkey[0] << 40) | (evenkey[7] >> 24);
68
        oddkey[6] = (evenkey[1] << 40) | (evenkey[0] >> 24);
69
        oddkey[7] = (evenkey[2] << 40) | (evenkey[1] >> 24);
70
    }
71
    else
72
    {
73
        CRYPTOPP_ASSERT(0);
74
    }
75
#else
76
7
    static const unsigned int U = (NB == 2) ? 16 : (NB == 4) ? 32 : (NB == 8) ? 64 : -1;
77
7
    static const unsigned int V = (NB == 2) ?  7 : (NB == 4) ? 11 : (NB == 8) ? 19 : -1;
78
79
7
    const byte* even = reinterpret_cast<const byte*>(evenkey);
80
7
    byte* odd = reinterpret_cast<byte*>(oddkey);
81
82
7
    std::memcpy(odd, even + V, U - V);
83
7
    std::memcpy(odd + U - V, even, V);
84
7
#endif
85
7
}
kalyna.cpp:void (anonymous namespace)::MakeOddKey<8u>(unsigned long const*, unsigned long*)
Line
Count
Source
45
27
{
46
#if (CRYPTOPP_BIG_ENDIAN)
47
    if (NB == 2)
48
    {
49
        oddkey[0] = (evenkey[1] << 8) | (evenkey[0] >> 56);
50
        oddkey[1] = (evenkey[0] << 8) | (evenkey[1] >> 56);
51
    }
52
    else if (NB == 4)
53
    {
54
        oddkey[0] = (evenkey[2] << 40) | (evenkey[1] >> 24);
55
        oddkey[1] = (evenkey[3] << 40) | (evenkey[2] >> 24);
56
        oddkey[2] = (evenkey[0] << 40) | (evenkey[3] >> 24);
57
        oddkey[3] = (evenkey[1] << 40) | (evenkey[0] >> 24);
58
    }
59
    else if (NB == 8)
60
    {
61
        oddkey[0] = (evenkey[3] << 40) | (evenkey[2] >> 24);
62
        oddkey[1] = (evenkey[4] << 40) | (evenkey[3] >> 24);
63
        oddkey[2] = (evenkey[5] << 40) | (evenkey[4] >> 24);
64
        oddkey[3] = (evenkey[6] << 40) | (evenkey[5] >> 24);
65
66
        oddkey[4] = (evenkey[7] << 40) | (evenkey[6] >> 24);
67
        oddkey[5] = (evenkey[0] << 40) | (evenkey[7] >> 24);
68
        oddkey[6] = (evenkey[1] << 40) | (evenkey[0] >> 24);
69
        oddkey[7] = (evenkey[2] << 40) | (evenkey[1] >> 24);
70
    }
71
    else
72
    {
73
        CRYPTOPP_ASSERT(0);
74
    }
75
#else
76
27
    static const unsigned int U = (NB == 2) ? 16 : (NB == 4) ? 32 : (NB == 8) ? 64 : -1;
77
27
    static const unsigned int V = (NB == 2) ?  7 : (NB == 4) ? 11 : (NB == 8) ? 19 : -1;
78
79
27
    const byte* even = reinterpret_cast<const byte*>(evenkey);
80
27
    byte* odd = reinterpret_cast<byte*>(oddkey);
81
82
27
    std::memcpy(odd, even + V, U - V);
83
27
    std::memcpy(odd + U - V, even, V);
84
27
#endif
85
27
}
86
87
template <unsigned int NB>
88
inline void SwapBlocks(word64 k[NB])
89
34
{
90
34
    const word64 t = k[0];
91
34
    k[0] = k[1];
92
93
34
    if (NB > 2)
94
34
    {
95
34
        k[1] = k[2];
96
34
        k[2] = k[3];
97
34
    }
98
99
34
    if (NB > 4)
100
27
    {
101
27
        k[3] = k[4];
102
27
        k[4] = k[5];
103
27
        k[5] = k[6];
104
27
        k[6] = k[7];
105
27
    }
106
107
34
    k[NB - 1] = t;
108
34
}
kalyna.cpp:void (anonymous namespace)::SwapBlocks<4u>(unsigned long*)
Line
Count
Source
89
7
{
90
7
    const word64 t = k[0];
91
7
    k[0] = k[1];
92
93
7
    if (NB > 2)
94
7
    {
95
7
        k[1] = k[2];
96
7
        k[2] = k[3];
97
7
    }
98
99
7
    if (NB > 4)
100
0
    {
101
0
        k[3] = k[4];
102
0
        k[4] = k[5];
103
0
        k[5] = k[6];
104
0
        k[6] = k[7];
105
0
    }
106
107
7
    k[NB - 1] = t;
108
7
}
kalyna.cpp:void (anonymous namespace)::SwapBlocks<8u>(unsigned long*)
Line
Count
Source
89
27
{
90
27
    const word64 t = k[0];
91
27
    k[0] = k[1];
92
93
27
    if (NB > 2)
94
27
    {
95
27
        k[1] = k[2];
96
27
        k[2] = k[3];
97
27
    }
98
99
27
    if (NB > 4)
100
27
    {
101
27
        k[3] = k[4];
102
27
        k[4] = k[5];
103
27
        k[5] = k[6];
104
27
        k[6] = k[7];
105
27
    }
106
107
27
    k[NB - 1] = t;
108
27
}
109
110
template <unsigned int NB>
111
inline void AddKey(const word64 x[NB], word64 y[NB], const word64 k[NB])
112
63
{
113
63
    y[0] = x[0] + k[0];
114
63
    y[1] = x[1] + k[1];
115
116
63
    if (NB > 2)
117
42
    {
118
42
        y[2] = x[2] + k[2];
119
42
        y[3] = x[3] + k[3];
120
42
    }
121
122
63
    if (NB > 4)
123
33
    {
124
33
        y[4] = x[4] + k[4];
125
33
        y[5] = x[5] + k[5];
126
33
        y[6] = x[6] + k[6];
127
33
        y[7] = x[7] + k[7];
128
33
    }
129
63
}
kalyna.cpp:void (anonymous namespace)::AddKey<2u>(unsigned long const*, unsigned long*, unsigned long const*)
Line
Count
Source
112
21
{
113
21
    y[0] = x[0] + k[0];
114
21
    y[1] = x[1] + k[1];
115
116
21
    if (NB > 2)
117
0
    {
118
0
        y[2] = x[2] + k[2];
119
0
        y[3] = x[3] + k[3];
120
0
    }
121
122
21
    if (NB > 4)
123
0
    {
124
0
        y[4] = x[4] + k[4];
125
0
        y[5] = x[5] + k[5];
126
0
        y[6] = x[6] + k[6];
127
0
        y[7] = x[7] + k[7];
128
0
    }
129
21
}
kalyna.cpp:void (anonymous namespace)::AddKey<4u>(unsigned long const*, unsigned long*, unsigned long const*)
Line
Count
Source
112
9
{
113
9
    y[0] = x[0] + k[0];
114
9
    y[1] = x[1] + k[1];
115
116
9
    if (NB > 2)
117
9
    {
118
9
        y[2] = x[2] + k[2];
119
9
        y[3] = x[3] + k[3];
120
9
    }
121
122
9
    if (NB > 4)
123
0
    {
124
0
        y[4] = x[4] + k[4];
125
0
        y[5] = x[5] + k[5];
126
0
        y[6] = x[6] + k[6];
127
0
        y[7] = x[7] + k[7];
128
0
    }
129
9
}
kalyna.cpp:void (anonymous namespace)::AddKey<8u>(unsigned long const*, unsigned long*, unsigned long const*)
Line
Count
Source
112
33
{
113
33
    y[0] = x[0] + k[0];
114
33
    y[1] = x[1] + k[1];
115
116
33
    if (NB > 2)
117
33
    {
118
33
        y[2] = x[2] + k[2];
119
33
        y[3] = x[3] + k[3];
120
33
    }
121
122
33
    if (NB > 4)
123
33
    {
124
33
        y[4] = x[4] + k[4];
125
33
        y[5] = x[5] + k[5];
126
33
        y[6] = x[6] + k[6];
127
33
        y[7] = x[7] + k[7];
128
33
    }
129
33
}
130
131
template <unsigned int NB>
132
inline void SubKey(const word64 x[NB], word64 y[NB], const word64 k[NB])
133
14
{
134
14
    y[0] = x[0] - k[0];
135
14
    y[1] = x[1] - k[1];
136
137
14
    if (NB > 2)
138
13
    {
139
13
        y[2] = x[2] - k[2];
140
13
        y[3] = x[3] - k[3];
141
13
    }
142
143
14
    if (NB > 4)
144
13
    {
145
13
        y[4] = x[4] - k[4];
146
13
        y[5] = x[5] - k[5];
147
13
        y[6] = x[6] - k[6];
148
13
        y[7] = x[7] - k[7];
149
13
    }
150
14
}
kalyna.cpp:void (anonymous namespace)::SubKey<2u>(unsigned long const*, unsigned long*, unsigned long const*)
Line
Count
Source
133
1
{
134
1
    y[0] = x[0] - k[0];
135
1
    y[1] = x[1] - k[1];
136
137
1
    if (NB > 2)
138
0
    {
139
0
        y[2] = x[2] - k[2];
140
0
        y[3] = x[3] - k[3];
141
0
    }
142
143
1
    if (NB > 4)
144
0
    {
145
0
        y[4] = x[4] - k[4];
146
0
        y[5] = x[5] - k[5];
147
0
        y[6] = x[6] - k[6];
148
0
        y[7] = x[7] - k[7];
149
0
    }
150
1
}
Unexecuted instantiation: kalyna.cpp:void (anonymous namespace)::SubKey<4u>(unsigned long const*, unsigned long*, unsigned long const*)
kalyna.cpp:void (anonymous namespace)::SubKey<8u>(unsigned long const*, unsigned long*, unsigned long const*)
Line
Count
Source
133
13
{
134
13
    y[0] = x[0] - k[0];
135
13
    y[1] = x[1] - k[1];
136
137
13
    if (NB > 2)
138
13
    {
139
13
        y[2] = x[2] - k[2];
140
13
        y[3] = x[3] - k[3];
141
13
    }
142
143
13
    if (NB > 4)
144
13
    {
145
13
        y[4] = x[4] - k[4];
146
13
        y[5] = x[5] - k[5];
147
13
        y[6] = x[6] - k[6];
148
13
        y[7] = x[7] - k[7];
149
13
    }
150
13
}
151
152
template <unsigned int NB>
153
static inline void AddConstant(word64 src[NB], word64 dst[NB], word64 constant)
154
56
{
155
56
    dst[0] = src[0] + constant;
156
56
    dst[1] = src[1] + constant;
157
158
56
    if (NB > 2)
159
38
    {
160
38
        dst[2] = src[2] + constant;
161
38
        dst[3] = src[3] + constant;
162
38
    }
163
164
56
    if (NB > 4)
165
30
    {
166
30
        dst[4] = src[4] + constant;
167
30
        dst[5] = src[5] + constant;
168
30
        dst[6] = src[6] + constant;
169
30
        dst[7] = src[7] + constant;
170
30
    }
171
56
}
kalyna.cpp:void (anonymous namespace)::AddConstant<2u>(unsigned long*, unsigned long*, unsigned long)
Line
Count
Source
154
18
{
155
18
    dst[0] = src[0] + constant;
156
18
    dst[1] = src[1] + constant;
157
158
18
    if (NB > 2)
159
0
    {
160
0
        dst[2] = src[2] + constant;
161
0
        dst[3] = src[3] + constant;
162
0
    }
163
164
18
    if (NB > 4)
165
0
    {
166
0
        dst[4] = src[4] + constant;
167
0
        dst[5] = src[5] + constant;
168
0
        dst[6] = src[6] + constant;
169
0
        dst[7] = src[7] + constant;
170
0
    }
171
18
}
kalyna.cpp:void (anonymous namespace)::AddConstant<4u>(unsigned long*, unsigned long*, unsigned long)
Line
Count
Source
154
8
{
155
8
    dst[0] = src[0] + constant;
156
8
    dst[1] = src[1] + constant;
157
158
8
    if (NB > 2)
159
8
    {
160
8
        dst[2] = src[2] + constant;
161
8
        dst[3] = src[3] + constant;
162
8
    }
163
164
8
    if (NB > 4)
165
0
    {
166
0
        dst[4] = src[4] + constant;
167
0
        dst[5] = src[5] + constant;
168
0
        dst[6] = src[6] + constant;
169
0
        dst[7] = src[7] + constant;
170
0
    }
171
8
}
kalyna.cpp:void (anonymous namespace)::AddConstant<8u>(unsigned long*, unsigned long*, unsigned long)
Line
Count
Source
154
30
{
155
30
    dst[0] = src[0] + constant;
156
30
    dst[1] = src[1] + constant;
157
158
30
    if (NB > 2)
159
30
    {
160
30
        dst[2] = src[2] + constant;
161
30
        dst[3] = src[3] + constant;
162
30
    }
163
164
30
    if (NB > 4)
165
30
    {
166
30
        dst[4] = src[4] + constant;
167
30
        dst[5] = src[5] + constant;
168
30
        dst[6] = src[6] + constant;
169
30
        dst[7] = src[7] + constant;
170
30
    }
171
30
}
172
173
inline void G0128(const word64 x[2], word64 y[2])
174
3
{
175
3
    y[0] = T[0][(byte)x[0]] ^ T[1][(byte)(x[0] >> 8)] ^ T[2][(byte)(x[0] >> 16)] ^ T[3][(byte)(x[0] >> 24)] ^
176
3
        T[4][(byte)(x[1] >> 32)] ^ T[5][(byte)(x[1] >> 40)] ^ T[6][(byte)(x[1] >> 48)] ^ T[7][(byte)(x[1] >> 56)];
177
3
    y[1] = T[0][(byte)x[1]] ^ T[1][(byte)(x[1] >> 8)] ^ T[2][(byte)(x[1] >> 16)] ^ T[3][(byte)(x[1] >> 24)] ^
178
3
        T[4][(byte)(x[0] >> 32)] ^ T[5][(byte)(x[0] >> 40)] ^ T[6][(byte)(x[0] >> 48)] ^ T[7][(byte)(x[0] >> 56)];
179
3
}
180
181
inline void G0256(const word64 x[4], word64 y[4])
182
1
{
183
1
    y[0] = T[0][(byte)x[0]] ^ T[1][(byte)(x[0] >> 8)] ^ T[2][(byte)(x[3] >> 16)] ^ T[3][(byte)(x[3] >> 24)] ^
184
1
        T[4][(byte)(x[2] >> 32)] ^ T[5][(byte)(x[2] >> 40)] ^ T[6][(byte)(x[1] >> 48)] ^ T[7][(byte)(x[1] >> 56)];
185
1
    y[1] = T[0][(byte)x[1]] ^ T[1][(byte)(x[1] >> 8)] ^ T[2][(byte)(x[0] >> 16)] ^ T[3][(byte)(x[0] >> 24)] ^
186
1
        T[4][(byte)(x[3] >> 32)] ^ T[5][(byte)(x[3] >> 40)] ^ T[6][(byte)(x[2] >> 48)] ^ T[7][(byte)(x[2] >> 56)];
187
1
    y[2] = T[0][(byte)x[2]] ^ T[1][(byte)(x[2] >> 8)] ^ T[2][(byte)(x[1] >> 16)] ^ T[3][(byte)(x[1] >> 24)] ^
188
1
        T[4][(byte)(x[0] >> 32)] ^ T[5][(byte)(x[0] >> 40)] ^ T[6][(byte)(x[3] >> 48)] ^ T[7][(byte)(x[3] >> 56)];
189
1
    y[3] = T[0][(byte)x[3]] ^ T[1][(byte)(x[3] >> 8)] ^ T[2][(byte)(x[2] >> 16)] ^ T[3][(byte)(x[2] >> 24)] ^
190
1
        T[4][(byte)(x[1] >> 32)] ^ T[5][(byte)(x[1] >> 40)] ^ T[6][(byte)(x[0] >> 48)] ^ T[7][(byte)(x[0] >> 56)];
191
1
}
192
193
inline void G0512(const word64 x[8], word64 y[8])
194
3
{
195
3
    y[0] = T[0][(byte)x[0]] ^ T[1][(byte)(x[7] >> 8)] ^ T[2][(byte)(x[6] >> 16)] ^ T[3][(byte)(x[5] >> 24)] ^
196
3
        T[4][(byte)(x[4] >> 32)] ^ T[5][(byte)(x[3] >> 40)] ^ T[6][(byte)(x[2] >> 48)] ^ T[7][(byte)(x[1] >> 56)];
197
3
    y[1] = T[0][(byte)x[1]] ^ T[1][(byte)(x[0] >> 8)] ^ T[2][(byte)(x[7] >> 16)] ^ T[3][(byte)(x[6] >> 24)] ^
198
3
        T[4][(byte)(x[5] >> 32)] ^ T[5][(byte)(x[4] >> 40)] ^ T[6][(byte)(x[3] >> 48)] ^ T[7][(byte)(x[2] >> 56)];
199
3
    y[2] = T[0][(byte)x[2]] ^ T[1][(byte)(x[1] >> 8)] ^ T[2][(byte)(x[0] >> 16)] ^ T[3][(byte)(x[7] >> 24)] ^
200
3
        T[4][(byte)(x[6] >> 32)] ^ T[5][(byte)(x[5] >> 40)] ^ T[6][(byte)(x[4] >> 48)] ^ T[7][(byte)(x[3] >> 56)];
201
3
    y[3] = T[0][(byte)x[3]] ^ T[1][(byte)(x[2] >> 8)] ^ T[2][(byte)(x[1] >> 16)] ^ T[3][(byte)(x[0] >> 24)] ^
202
3
        T[4][(byte)(x[7] >> 32)] ^ T[5][(byte)(x[6] >> 40)] ^ T[6][(byte)(x[5] >> 48)] ^ T[7][(byte)(x[4] >> 56)];
203
3
    y[4] = T[0][(byte)x[4]] ^ T[1][(byte)(x[3] >> 8)] ^ T[2][(byte)(x[2] >> 16)] ^ T[3][(byte)(x[1] >> 24)] ^
204
3
        T[4][(byte)(x[0] >> 32)] ^ T[5][(byte)(x[7] >> 40)] ^ T[6][(byte)(x[6] >> 48)] ^ T[7][(byte)(x[5] >> 56)];
205
3
    y[5] = T[0][(byte)x[5]] ^ T[1][(byte)(x[4] >> 8)] ^ T[2][(byte)(x[3] >> 16)] ^ T[3][(byte)(x[2] >> 24)] ^
206
3
        T[4][(byte)(x[1] >> 32)] ^ T[5][(byte)(x[0] >> 40)] ^ T[6][(byte)(x[7] >> 48)] ^ T[7][(byte)(x[6] >> 56)];
207
3
    y[6] = T[0][(byte)x[6]] ^ T[1][(byte)(x[5] >> 8)] ^ T[2][(byte)(x[4] >> 16)] ^ T[3][(byte)(x[3] >> 24)] ^
208
3
        T[4][(byte)(x[2] >> 32)] ^ T[5][(byte)(x[1] >> 40)] ^ T[6][(byte)(x[0] >> 48)] ^ T[7][(byte)(x[7] >> 56)];
209
3
    y[7] = T[0][(byte)x[7]] ^ T[1][(byte)(x[6] >> 8)] ^ T[2][(byte)(x[5] >> 16)] ^ T[3][(byte)(x[4] >> 24)] ^
210
3
        T[4][(byte)(x[3] >> 32)] ^ T[5][(byte)(x[2] >> 40)] ^ T[6][(byte)(x[1] >> 48)] ^ T[7][(byte)(x[0] >> 56)];
211
3
}
212
213
inline void GL128(const word64 x[2], word64 y[2], const word64 k[2])
214
21
{
215
21
    y[0] = k[0] + (T[0][(byte)x[0]] ^ T[1][(byte)(x[0] >> 8)] ^ T[2][(byte)(x[0] >> 16)] ^ T[3][(byte)(x[0] >> 24)] ^
216
21
        T[4][(byte)(x[1] >> 32)] ^ T[5][(byte)(x[1] >> 40)] ^ T[6][(byte)(x[1] >> 48)] ^ T[7][(byte)(x[1] >> 56)]);
217
21
    y[1] = k[1] + (T[0][(byte)x[1]] ^ T[1][(byte)(x[1] >> 8)] ^ T[2][(byte)(x[1] >> 16)] ^ T[3][(byte)(x[1] >> 24)] ^
218
21
        T[4][(byte)(x[0] >> 32)] ^ T[5][(byte)(x[0] >> 40)] ^ T[6][(byte)(x[0] >> 48)] ^ T[7][(byte)(x[0] >> 56)]);
219
21
}
220
221
inline void GL256(const word64 x[4], word64 y[4], const word64 k[4])
222
9
{
223
9
    y[0] = k[0] + (T[0][(byte)x[0]] ^ T[1][(byte)(x[0] >> 8)] ^ T[2][(byte)(x[3] >> 16)] ^ T[3][(byte)(x[3] >> 24)] ^
224
9
        T[4][(byte)(x[2] >> 32)] ^ T[5][(byte)(x[2] >> 40)] ^ T[6][(byte)(x[1] >> 48)] ^ T[7][(byte)(x[1] >> 56)]);
225
9
    y[1] = k[1] + (T[0][(byte)x[1]] ^ T[1][(byte)(x[1] >> 8)] ^ T[2][(byte)(x[0] >> 16)] ^ T[3][(byte)(x[0] >> 24)] ^
226
9
        T[4][(byte)(x[3] >> 32)] ^ T[5][(byte)(x[3] >> 40)] ^ T[6][(byte)(x[2] >> 48)] ^ T[7][(byte)(x[2] >> 56)]);
227
9
    y[2] = k[2] + (T[0][(byte)x[2]] ^ T[1][(byte)(x[2] >> 8)] ^ T[2][(byte)(x[1] >> 16)] ^ T[3][(byte)(x[1] >> 24)] ^
228
9
        T[4][(byte)(x[0] >> 32)] ^ T[5][(byte)(x[0] >> 40)] ^ T[6][(byte)(x[3] >> 48)] ^ T[7][(byte)(x[3] >> 56)]);
229
9
    y[3] = k[3] + (T[0][(byte)x[3]] ^ T[1][(byte)(x[3] >> 8)] ^ T[2][(byte)(x[2] >> 16)] ^ T[3][(byte)(x[2] >> 24)] ^
230
9
        T[4][(byte)(x[1] >> 32)] ^ T[5][(byte)(x[1] >> 40)] ^ T[6][(byte)(x[0] >> 48)] ^ T[7][(byte)(x[0] >> 56)]);
231
9
}
232
233
inline void GL512(const word64 x[8], word64 y[8], const word64 k[8])
234
33
{
235
33
    y[0] = k[0] + (T[0][(byte)x[0]] ^ T[1][(byte)(x[7] >> 8)] ^ T[2][(byte)(x[6] >> 16)] ^ T[3][(byte)(x[5] >> 24)] ^
236
33
        T[4][(byte)(x[4] >> 32)] ^ T[5][(byte)(x[3] >> 40)] ^ T[6][(byte)(x[2] >> 48)] ^ T[7][(byte)(x[1] >> 56)]);
237
33
    y[1] = k[1] + (T[0][(byte)x[1]] ^ T[1][(byte)(x[0] >> 8)] ^ T[2][(byte)(x[7] >> 16)] ^ T[3][(byte)(x[6] >> 24)] ^
238
33
        T[4][(byte)(x[5] >> 32)] ^ T[5][(byte)(x[4] >> 40)] ^ T[6][(byte)(x[3] >> 48)] ^ T[7][(byte)(x[2] >> 56)]);
239
33
    y[2] = k[2] + (T[0][(byte)x[2]] ^ T[1][(byte)(x[1] >> 8)] ^ T[2][(byte)(x[0] >> 16)] ^ T[3][(byte)(x[7] >> 24)] ^
240
33
        T[4][(byte)(x[6] >> 32)] ^ T[5][(byte)(x[5] >> 40)] ^ T[6][(byte)(x[4] >> 48)] ^ T[7][(byte)(x[3] >> 56)]);
241
33
    y[3] = k[3] + (T[0][(byte)x[3]] ^ T[1][(byte)(x[2] >> 8)] ^ T[2][(byte)(x[1] >> 16)] ^ T[3][(byte)(x[0] >> 24)] ^
242
33
        T[4][(byte)(x[7] >> 32)] ^ T[5][(byte)(x[6] >> 40)] ^ T[6][(byte)(x[5] >> 48)] ^ T[7][(byte)(x[4] >> 56)]);
243
33
    y[4] = k[4] + (T[0][(byte)x[4]] ^ T[1][(byte)(x[3] >> 8)] ^ T[2][(byte)(x[2] >> 16)] ^ T[3][(byte)(x[1] >> 24)] ^
244
33
        T[4][(byte)(x[0] >> 32)] ^ T[5][(byte)(x[7] >> 40)] ^ T[6][(byte)(x[6] >> 48)] ^ T[7][(byte)(x[5] >> 56)]);
245
33
    y[5] = k[5] + (T[0][(byte)x[5]] ^ T[1][(byte)(x[4] >> 8)] ^ T[2][(byte)(x[3] >> 16)] ^ T[3][(byte)(x[2] >> 24)] ^
246
33
        T[4][(byte)(x[1] >> 32)] ^ T[5][(byte)(x[0] >> 40)] ^ T[6][(byte)(x[7] >> 48)] ^ T[7][(byte)(x[6] >> 56)]);
247
33
    y[6] = k[6] + (T[0][(byte)x[6]] ^ T[1][(byte)(x[5] >> 8)] ^ T[2][(byte)(x[4] >> 16)] ^ T[3][(byte)(x[3] >> 24)] ^
248
33
        T[4][(byte)(x[2] >> 32)] ^ T[5][(byte)(x[1] >> 40)] ^ T[6][(byte)(x[0] >> 48)] ^ T[7][(byte)(x[7] >> 56)]);
249
33
    y[7] = k[7] + (T[0][(byte)x[7]] ^ T[1][(byte)(x[6] >> 8)] ^ T[2][(byte)(x[5] >> 16)] ^ T[3][(byte)(x[4] >> 24)] ^
250
33
        T[4][(byte)(x[3] >> 32)] ^ T[5][(byte)(x[2] >> 40)] ^ T[6][(byte)(x[1] >> 48)] ^ T[7][(byte)(x[0] >> 56)]);
251
33
}
252
253
inline void IMC128(word64 x[2])
254
10
{
255
10
    x[0] = IT[0][S[0][(byte)x[0]]] ^ IT[1][S[1][(byte)(x[0] >> 8)]] ^ IT[2][S[2][(byte)(x[0] >> 16)]] ^ IT[3][S[3][(byte)(x[0] >> 24)]] ^
256
10
        IT[4][S[0][(byte)(x[0] >> 32)]] ^ IT[5][S[1][(byte)(x[0] >> 40)]] ^ IT[6][S[2][(byte)(x[0] >> 48)]] ^ IT[7][S[3][(byte)(x[0] >> 56)]];
257
10
    x[1] = IT[0][S[0][(byte)x[1]]] ^ IT[1][S[1][(byte)(x[1] >> 8)]] ^ IT[2][S[2][(byte)(x[1] >> 16)]] ^ IT[3][S[3][(byte)(x[1] >> 24)]] ^
258
10
        IT[4][S[0][(byte)(x[1] >> 32)]] ^ IT[5][S[1][(byte)(x[1] >> 40)]] ^ IT[6][S[2][(byte)(x[1] >> 48)]] ^ IT[7][S[3][(byte)(x[1] >> 56)]];
259
10
}
260
261
inline void IMC256(word64 x[4])
262
0
{
263
0
    x[0] = IT[0][S[0][(byte)x[0]]] ^ IT[1][S[1][(byte)(x[0] >> 8)]] ^ IT[2][S[2][(byte)(x[0] >> 16)]] ^ IT[3][S[3][(byte)(x[0] >> 24)]] ^
264
0
        IT[4][S[0][(byte)(x[0] >> 32)]] ^ IT[5][S[1][(byte)(x[0] >> 40)]] ^ IT[6][S[2][(byte)(x[0] >> 48)]] ^ IT[7][S[3][(byte)(x[0] >> 56)]];
265
0
    x[1] = IT[0][S[0][(byte)x[1]]] ^ IT[1][S[1][(byte)(x[1] >> 8)]] ^ IT[2][S[2][(byte)(x[1] >> 16)]] ^ IT[3][S[3][(byte)(x[1] >> 24)]] ^
266
0
        IT[4][S[0][(byte)(x[1] >> 32)]] ^ IT[5][S[1][(byte)(x[1] >> 40)]] ^ IT[6][S[2][(byte)(x[1] >> 48)]] ^ IT[7][S[3][(byte)(x[1] >> 56)]];
267
0
    x[2] = IT[0][S[0][(byte)x[2]]] ^ IT[1][S[1][(byte)(x[2] >> 8)]] ^ IT[2][S[2][(byte)(x[2] >> 16)]] ^ IT[3][S[3][(byte)(x[2] >> 24)]] ^
268
0
        IT[4][S[0][(byte)(x[2] >> 32)]] ^ IT[5][S[1][(byte)(x[2] >> 40)]] ^ IT[6][S[2][(byte)(x[2] >> 48)]] ^ IT[7][S[3][(byte)(x[2] >> 56)]];
269
0
    x[3] = IT[0][S[0][(byte)x[3]]] ^ IT[1][S[1][(byte)(x[3] >> 8)]] ^ IT[2][S[2][(byte)(x[3] >> 16)]] ^ IT[3][S[3][(byte)(x[3] >> 24)]] ^
270
0
        IT[4][S[0][(byte)(x[3] >> 32)]] ^ IT[5][S[1][(byte)(x[3] >> 40)]] ^ IT[6][S[2][(byte)(x[3] >> 48)]] ^ IT[7][S[3][(byte)(x[3] >> 56)]];
271
0
}
272
273
inline void IMC512(word64 x[8])
274
47
{
275
47
    x[0] = IT[0][S[0][(byte)x[0]]] ^ IT[1][S[1][(byte)(x[0] >> 8)]] ^ IT[2][S[2][(byte)(x[0] >> 16)]] ^ IT[3][S[3][(byte)(x[0] >> 24)]] ^
276
47
        IT[4][S[0][(byte)(x[0] >> 32)]] ^ IT[5][S[1][(byte)(x[0] >> 40)]] ^ IT[6][S[2][(byte)(x[0] >> 48)]] ^ IT[7][S[3][(byte)(x[0] >> 56)]];
277
47
    x[1] = IT[0][S[0][(byte)x[1]]] ^ IT[1][S[1][(byte)(x[1] >> 8)]] ^ IT[2][S[2][(byte)(x[1] >> 16)]] ^ IT[3][S[3][(byte)(x[1] >> 24)]] ^
278
47
        IT[4][S[0][(byte)(x[1] >> 32)]] ^ IT[5][S[1][(byte)(x[1] >> 40)]] ^ IT[6][S[2][(byte)(x[1] >> 48)]] ^ IT[7][S[3][(byte)(x[1] >> 56)]];
279
47
    x[2] = IT[0][S[0][(byte)x[2]]] ^ IT[1][S[1][(byte)(x[2] >> 8)]] ^ IT[2][S[2][(byte)(x[2] >> 16)]] ^ IT[3][S[3][(byte)(x[2] >> 24)]] ^
280
47
        IT[4][S[0][(byte)(x[2] >> 32)]] ^ IT[5][S[1][(byte)(x[2] >> 40)]] ^ IT[6][S[2][(byte)(x[2] >> 48)]] ^ IT[7][S[3][(byte)(x[2] >> 56)]];
281
47
    x[3] = IT[0][S[0][(byte)x[3]]] ^ IT[1][S[1][(byte)(x[3] >> 8)]] ^ IT[2][S[2][(byte)(x[3] >> 16)]] ^ IT[3][S[3][(byte)(x[3] >> 24)]] ^
282
47
        IT[4][S[0][(byte)(x[3] >> 32)]] ^ IT[5][S[1][(byte)(x[3] >> 40)]] ^ IT[6][S[2][(byte)(x[3] >> 48)]] ^ IT[7][S[3][(byte)(x[3] >> 56)]];
283
47
    x[4] = IT[0][S[0][(byte)x[4]]] ^ IT[1][S[1][(byte)(x[4] >> 8)]] ^ IT[2][S[2][(byte)(x[4] >> 16)]] ^ IT[3][S[3][(byte)(x[4] >> 24)]] ^
284
47
        IT[4][S[0][(byte)(x[4] >> 32)]] ^ IT[5][S[1][(byte)(x[4] >> 40)]] ^ IT[6][S[2][(byte)(x[4] >> 48)]] ^ IT[7][S[3][(byte)(x[4] >> 56)]];
285
47
    x[5] = IT[0][S[0][(byte)x[5]]] ^ IT[1][S[1][(byte)(x[5] >> 8)]] ^ IT[2][S[2][(byte)(x[5] >> 16)]] ^ IT[3][S[3][(byte)(x[5] >> 24)]] ^
286
47
        IT[4][S[0][(byte)(x[5] >> 32)]] ^ IT[5][S[1][(byte)(x[5] >> 40)]] ^ IT[6][S[2][(byte)(x[5] >> 48)]] ^ IT[7][S[3][(byte)(x[5] >> 56)]];
287
47
    x[6] = IT[0][S[0][(byte)x[6]]] ^ IT[1][S[1][(byte)(x[6] >> 8)]] ^ IT[2][S[2][(byte)(x[6] >> 16)]] ^ IT[3][S[3][(byte)(x[6] >> 24)]] ^
288
47
        IT[4][S[0][(byte)(x[6] >> 32)]] ^ IT[5][S[1][(byte)(x[6] >> 40)]] ^ IT[6][S[2][(byte)(x[6] >> 48)]] ^ IT[7][S[3][(byte)(x[6] >> 56)]];
289
47
    x[7] = IT[0][S[0][(byte)x[7]]] ^ IT[1][S[1][(byte)(x[7] >> 8)]] ^ IT[2][S[2][(byte)(x[7] >> 16)]] ^ IT[3][S[3][(byte)(x[7] >> 24)]] ^
290
47
        IT[4][S[0][(byte)(x[7] >> 32)]] ^ IT[5][S[1][(byte)(x[7] >> 40)]] ^ IT[6][S[2][(byte)(x[7] >> 48)]] ^ IT[7][S[3][(byte)(x[7] >> 56)]];
291
47
}
292
293
inline void IG128(const word64 x[2], word64 y[2], const word64 k[2])
294
9
{
295
9
    y[0] = k[0] ^ IT[0][(byte)x[0]] ^ IT[1][(byte)(x[0] >> 8)] ^ IT[2][(byte)(x[0] >> 16)] ^ IT[3][(byte)(x[0] >> 24)] ^
296
9
        IT[4][(byte)(x[1] >> 32)] ^ IT[5][(byte)(x[1] >> 40)] ^ IT[6][(byte)(x[1] >> 48)] ^ IT[7][(byte)(x[1] >> 56)];
297
9
    y[1] = k[1] ^ IT[0][(byte)x[1]] ^ IT[1][(byte)(x[1] >> 8)] ^ IT[2][(byte)(x[1] >> 16)] ^ IT[3][(byte)(x[1] >> 24)] ^
298
9
        IT[4][(byte)(x[0] >> 32)] ^ IT[5][(byte)(x[0] >> 40)] ^ IT[6][(byte)(x[0] >> 48)] ^ IT[7][(byte)(x[0] >> 56)];
299
9
}
300
301
inline void IG256(const word64 x[4], word64 y[4], const word64 k[4])
302
0
{
303
0
    y[0] = k[0] ^ IT[0][(byte)x[0]] ^ IT[1][(byte)(x[0] >> 8)] ^ IT[2][(byte)(x[1] >> 16)] ^ IT[3][(byte)(x[1] >> 24)] ^
304
0
        IT[4][(byte)(x[2] >> 32)] ^ IT[5][(byte)(x[2] >> 40)] ^ IT[6][(byte)(x[3] >> 48)] ^ IT[7][(byte)(x[3] >> 56)];
305
0
    y[1] = k[1] ^ IT[0][(byte)x[1]] ^ IT[1][(byte)(x[1] >> 8)] ^ IT[2][(byte)(x[2] >> 16)] ^ IT[3][(byte)(x[2] >> 24)] ^
306
0
        IT[4][(byte)(x[3] >> 32)] ^ IT[5][(byte)(x[3] >> 40)] ^ IT[6][(byte)(x[0] >> 48)] ^ IT[7][(byte)(x[0] >> 56)];
307
0
    y[2] = k[2] ^ IT[0][(byte)x[2]] ^ IT[1][(byte)(x[2] >> 8)] ^ IT[2][(byte)(x[3] >> 16)] ^ IT[3][(byte)(x[3] >> 24)] ^
308
0
        IT[4][(byte)(x[0] >> 32)] ^ IT[5][(byte)(x[0] >> 40)] ^ IT[6][(byte)(x[1] >> 48)] ^ IT[7][(byte)(x[1] >> 56)];
309
0
    y[3] = k[3] ^ IT[0][(byte)x[3]] ^ IT[1][(byte)(x[3] >> 8)] ^ IT[2][(byte)(x[0] >> 16)] ^ IT[3][(byte)(x[0] >> 24)] ^
310
0
        IT[4][(byte)(x[1] >> 32)] ^ IT[5][(byte)(x[1] >> 40)] ^ IT[6][(byte)(x[2] >> 48)] ^ IT[7][(byte)(x[2] >> 56)];
311
0
}
312
313
inline void IG512(const word64 x[8], word64 y[8], const word64 k[8])
314
221
{
315
221
    y[0] = k[0] ^ IT[0][(byte)x[0]] ^ IT[1][(byte)(x[1] >> 8)] ^ IT[2][(byte)(x[2] >> 16)] ^ IT[3][(byte)(x[3] >> 24)] ^
316
221
        IT[4][(byte)(x[4] >> 32)] ^ IT[5][(byte)(x[5] >> 40)] ^ IT[6][(byte)(x[6] >> 48)] ^ IT[7][(byte)(x[7] >> 56)];
317
221
    y[1] = k[1] ^ IT[0][(byte)x[1]] ^ IT[1][(byte)(x[2] >> 8)] ^ IT[2][(byte)(x[3] >> 16)] ^ IT[3][(byte)(x[4] >> 24)] ^
318
221
        IT[4][(byte)(x[5] >> 32)] ^ IT[5][(byte)(x[6] >> 40)] ^ IT[6][(byte)(x[7] >> 48)] ^ IT[7][(byte)(x[0] >> 56)];
319
221
    y[2] = k[2] ^ IT[0][(byte)x[2]] ^ IT[1][(byte)(x[3] >> 8)] ^ IT[2][(byte)(x[4] >> 16)] ^ IT[3][(byte)(x[5] >> 24)] ^
320
221
        IT[4][(byte)(x[6] >> 32)] ^ IT[5][(byte)(x[7] >> 40)] ^ IT[6][(byte)(x[0] >> 48)] ^ IT[7][(byte)(x[1] >> 56)];
321
221
    y[3] = k[3] ^ IT[0][(byte)x[3]] ^ IT[1][(byte)(x[4] >> 8)] ^ IT[2][(byte)(x[5] >> 16)] ^ IT[3][(byte)(x[6] >> 24)] ^
322
221
        IT[4][(byte)(x[7] >> 32)] ^ IT[5][(byte)(x[0] >> 40)] ^ IT[6][(byte)(x[1] >> 48)] ^ IT[7][(byte)(x[2] >> 56)];
323
221
    y[4] = k[4] ^ IT[0][(byte)x[4]] ^ IT[1][(byte)(x[5] >> 8)] ^ IT[2][(byte)(x[6] >> 16)] ^ IT[3][(byte)(x[7] >> 24)] ^
324
221
        IT[4][(byte)(x[0] >> 32)] ^ IT[5][(byte)(x[1] >> 40)] ^ IT[6][(byte)(x[2] >> 48)] ^ IT[7][(byte)(x[3] >> 56)];
325
221
    y[5] = k[5] ^ IT[0][(byte)x[5]] ^ IT[1][(byte)(x[6] >> 8)] ^ IT[2][(byte)(x[7] >> 16)] ^ IT[3][(byte)(x[0] >> 24)] ^
326
221
        IT[4][(byte)(x[1] >> 32)] ^ IT[5][(byte)(x[2] >> 40)] ^ IT[6][(byte)(x[3] >> 48)] ^ IT[7][(byte)(x[4] >> 56)];
327
221
    y[6] = k[6] ^ IT[0][(byte)x[6]] ^ IT[1][(byte)(x[7] >> 8)] ^ IT[2][(byte)(x[0] >> 16)] ^ IT[3][(byte)(x[1] >> 24)] ^
328
221
        IT[4][(byte)(x[2] >> 32)] ^ IT[5][(byte)(x[3] >> 40)] ^ IT[6][(byte)(x[4] >> 48)] ^ IT[7][(byte)(x[5] >> 56)];
329
221
    y[7] = k[7] ^ IT[0][(byte)x[7]] ^ IT[1][(byte)(x[0] >> 8)] ^ IT[2][(byte)(x[1] >> 16)] ^ IT[3][(byte)(x[2] >> 24)] ^
330
221
        IT[4][(byte)(x[3] >> 32)] ^ IT[5][(byte)(x[4] >> 40)] ^ IT[6][(byte)(x[5] >> 48)] ^ IT[7][(byte)(x[6] >> 56)];
331
221
}
332
333
inline void IGL128(const word64 x[2], word64 y[2], const word64 k[2])
334
1
{
335
1
    y[0] = (word64(IS[0][(byte)x[0]]) ^ word64(IS[1][(byte)(x[0] >> 8)]) << 8 ^ word64(IS[2][(byte)(x[0] >> 16)]) << 16 ^ word64(IS[3][(byte)(x[0] >> 24)]) << 24 ^
336
1
        word64(IS[0][(byte)(x[1] >> 32)]) << 32 ^ word64(IS[1][(byte)(x[1] >> 40)]) << 40 ^ word64(IS[2][(byte)(x[1] >> 48)]) << 48 ^ word64(IS[3][(byte)(x[1] >> 56)]) << 56) - k[0];
337
1
    y[1] = (word64(IS[0][(byte)x[1]]) ^ word64(IS[1][(byte)(x[1] >> 8)]) << 8 ^ word64(IS[2][(byte)(x[1] >> 16)]) << 16 ^ word64(IS[3][(byte)(x[1] >> 24)]) << 24 ^
338
1
        word64(IS[0][(byte)(x[0] >> 32)]) << 32 ^ word64(IS[1][(byte)(x[0] >> 40)]) << 40 ^ word64(IS[2][(byte)(x[0] >> 48)]) << 48 ^ word64(IS[3][(byte)(x[0] >> 56)]) << 56) - k[1];
339
1
}
340
341
inline void IGL256(const word64 x[4], word64 y[4], const word64 k[4])
342
0
{
343
0
    y[0] = (word64(IS[0][(byte)x[0]]) ^ word64(IS[1][(byte)(x[0] >> 8)]) << 8 ^ word64(IS[2][(byte)(x[1] >> 16)]) << 16 ^ word64(IS[3][(byte)(x[1] >> 24)]) << 24 ^
344
0
        word64(IS[0][(byte)(x[2] >> 32)]) << 32 ^ word64(IS[1][(byte)(x[2] >> 40)]) << 40 ^ word64(IS[2][(byte)(x[3] >> 48)]) << 48 ^ word64(IS[3][(byte)(x[3] >> 56)]) << 56) - k[0];
345
0
    y[1] = (word64(IS[0][(byte)x[1]]) ^ word64(IS[1][(byte)(x[1] >> 8)]) << 8 ^ word64(IS[2][(byte)(x[2] >> 16)]) << 16 ^ word64(IS[3][(byte)(x[2] >> 24)]) << 24 ^
346
0
        word64(IS[0][(byte)(x[3] >> 32)]) << 32 ^ word64(IS[1][(byte)(x[3] >> 40)]) << 40 ^ word64(IS[2][(byte)(x[0] >> 48)]) << 48 ^ word64(IS[3][(byte)(x[0] >> 56)]) << 56) - k[1];
347
0
    y[2] = (word64(IS[0][(byte)x[2]]) ^ word64(IS[1][(byte)(x[2] >> 8)]) << 8 ^ word64(IS[2][(byte)(x[3] >> 16)]) << 16 ^ word64(IS[3][(byte)(x[3] >> 24)]) << 24 ^
348
0
        word64(IS[0][(byte)(x[0] >> 32)]) << 32 ^ word64(IS[1][(byte)(x[0] >> 40)]) << 40 ^ word64(IS[2][(byte)(x[1] >> 48)]) << 48 ^ word64(IS[3][(byte)(x[1] >> 56)]) << 56) - k[2];
349
0
    y[3] = (word64(IS[0][(byte)x[3]]) ^ word64(IS[1][(byte)(x[3] >> 8)]) << 8 ^ word64(IS[2][(byte)(x[0] >> 16)]) << 16 ^ word64(IS[3][(byte)(x[0] >> 24)]) << 24 ^
350
0
        word64(IS[0][(byte)(x[1] >> 32)]) << 32 ^ word64(IS[1][(byte)(x[1] >> 40)]) << 40 ^ word64(IS[2][(byte)(x[2] >> 48)]) << 48 ^ word64(IS[3][(byte)(x[2] >> 56)]) << 56) - k[3];
351
0
}
352
353
inline void IGL512(const word64 x[8], word64 y[8], const word64 k[8])
354
13
{
355
13
    y[0] = (word64(IS[0][(byte)x[0]]) ^ word64(IS[1][(byte)(x[1] >> 8)]) << 8 ^ word64(IS[2][(byte)(x[2] >> 16)]) << 16 ^ word64(IS[3][(byte)(x[3] >> 24)]) << 24 ^
356
13
        word64(IS[0][(byte)(x[4] >> 32)]) << 32 ^ word64(IS[1][(byte)(x[5] >> 40)]) << 40 ^ word64(IS[2][(byte)(x[6] >> 48)]) << 48 ^ word64(IS[3][(byte)(x[7] >> 56)]) << 56) - k[0];
357
13
    y[1] = (word64(IS[0][(byte)x[1]]) ^ word64(IS[1][(byte)(x[2] >> 8)]) << 8 ^ word64(IS[2][(byte)(x[3] >> 16)]) << 16 ^ word64(IS[3][(byte)(x[4] >> 24)]) << 24 ^
358
13
        word64(IS[0][(byte)(x[5] >> 32)]) << 32 ^ word64(IS[1][(byte)(x[6] >> 40)]) << 40 ^ word64(IS[2][(byte)(x[7] >> 48)]) << 48 ^ word64(IS[3][(byte)(x[0] >> 56)]) << 56) - k[1];
359
13
    y[2] = (word64(IS[0][(byte)x[2]]) ^ word64(IS[1][(byte)(x[3] >> 8)]) << 8 ^ word64(IS[2][(byte)(x[4] >> 16)]) << 16 ^ word64(IS[3][(byte)(x[5] >> 24)]) << 24 ^
360
13
        word64(IS[0][(byte)(x[6] >> 32)]) << 32 ^ word64(IS[1][(byte)(x[7] >> 40)]) << 40 ^ word64(IS[2][(byte)(x[0] >> 48)]) << 48 ^ word64(IS[3][(byte)(x[1] >> 56)]) << 56) - k[2];
361
13
    y[3] = (word64(IS[0][(byte)x[3]]) ^ word64(IS[1][(byte)(x[4] >> 8)]) << 8 ^ word64(IS[2][(byte)(x[5] >> 16)]) << 16 ^ word64(IS[3][(byte)(x[6] >> 24)]) << 24 ^
362
13
        word64(IS[0][(byte)(x[7] >> 32)]) << 32 ^ word64(IS[1][(byte)(x[0] >> 40)]) << 40 ^ word64(IS[2][(byte)(x[1] >> 48)]) << 48 ^ word64(IS[3][(byte)(x[2] >> 56)]) << 56) - k[3];
363
13
    y[4] = (word64(IS[0][(byte)x[4]]) ^ word64(IS[1][(byte)(x[5] >> 8)]) << 8 ^ word64(IS[2][(byte)(x[6] >> 16)]) << 16 ^ word64(IS[3][(byte)(x[7] >> 24)]) << 24 ^
364
13
        word64(IS[0][(byte)(x[0] >> 32)]) << 32 ^ word64(IS[1][(byte)(x[1] >> 40)]) << 40 ^ word64(IS[2][(byte)(x[2] >> 48)]) << 48 ^ word64(IS[3][(byte)(x[3] >> 56)]) << 56) - k[4];
365
13
    y[5] = (word64(IS[0][(byte)x[5]]) ^ word64(IS[1][(byte)(x[6] >> 8)]) << 8 ^ word64(IS[2][(byte)(x[7] >> 16)]) << 16 ^ word64(IS[3][(byte)(x[0] >> 24)]) << 24 ^
366
13
        word64(IS[0][(byte)(x[1] >> 32)]) << 32 ^ word64(IS[1][(byte)(x[2] >> 40)]) << 40 ^ word64(IS[2][(byte)(x[3] >> 48)]) << 48 ^ word64(IS[3][(byte)(x[4] >> 56)]) << 56) - k[5];
367
13
    y[6] = (word64(IS[0][(byte)x[6]]) ^ word64(IS[1][(byte)(x[7] >> 8)]) << 8 ^ word64(IS[2][(byte)(x[0] >> 16)]) << 16 ^ word64(IS[3][(byte)(x[1] >> 24)]) << 24 ^
368
13
        word64(IS[0][(byte)(x[2] >> 32)]) << 32 ^ word64(IS[1][(byte)(x[3] >> 40)]) << 40 ^ word64(IS[2][(byte)(x[4] >> 48)]) << 48 ^ word64(IS[3][(byte)(x[5] >> 56)]) << 56) - k[6];
369
13
    y[7] = (word64(IS[0][(byte)x[7]]) ^ word64(IS[1][(byte)(x[0] >> 8)]) << 8 ^ word64(IS[2][(byte)(x[1] >> 16)]) << 16 ^ word64(IS[3][(byte)(x[2] >> 24)]) << 24 ^
370
13
        word64(IS[0][(byte)(x[3] >> 32)]) << 32 ^ word64(IS[1][(byte)(x[4] >> 40)]) << 40 ^ word64(IS[2][(byte)(x[5] >> 48)]) << 48 ^ word64(IS[3][(byte)(x[6] >> 56)]) << 56) - k[7];
371
13
}
372
373
inline void G128(const word64 x[2], word64 y[2], const word64 k[2])
374
21
{
375
21
    y[0] = k[0] ^ T[0][(byte)x[0]] ^ T[1][(byte)(x[0] >> 8)] ^ T[2][(byte)(x[0] >> 16)] ^ T[3][(byte)(x[0] >> 24)] ^
376
21
        T[4][(byte)(x[1] >> 32)] ^ T[5][(byte)(x[1] >> 40)] ^ T[6][(byte)(x[1] >> 48)] ^ T[7][(byte)(x[1] >> 56)];
377
21
    y[1] = k[1] ^ T[0][(byte)x[1]] ^ T[1][(byte)(x[1] >> 8)] ^ T[2][(byte)(x[1] >> 16)] ^ T[3][(byte)(x[1] >> 24)] ^
378
21
        T[4][(byte)(x[0] >> 32)] ^ T[5][(byte)(x[0] >> 40)] ^ T[6][(byte)(x[0] >> 48)] ^ T[7][(byte)(x[0] >> 56)];
379
21
}
380
381
inline void G256(const word64 x[4], word64 y[4], const word64 k[4])
382
9
{
383
9
    y[0] = k[0] ^ T[0][(byte)x[0]] ^ T[1][(byte)(x[0] >> 8)] ^ T[2][(byte)(x[3] >> 16)] ^ T[3][(byte)(x[3] >> 24)] ^
384
9
        T[4][(byte)(x[2] >> 32)] ^ T[5][(byte)(x[2] >> 40)] ^ T[6][(byte)(x[1] >> 48)] ^ T[7][(byte)(x[1] >> 56)];
385
9
    y[1] = k[1] ^ T[0][(byte)x[1]] ^ T[1][(byte)(x[1] >> 8)] ^ T[2][(byte)(x[0] >> 16)] ^ T[3][(byte)(x[0] >> 24)] ^
386
9
        T[4][(byte)(x[3] >> 32)] ^ T[5][(byte)(x[3] >> 40)] ^ T[6][(byte)(x[2] >> 48)] ^ T[7][(byte)(x[2] >> 56)];
387
9
    y[2] = k[2] ^ T[0][(byte)x[2]] ^ T[1][(byte)(x[2] >> 8)] ^ T[2][(byte)(x[1] >> 16)] ^ T[3][(byte)(x[1] >> 24)] ^
388
9
        T[4][(byte)(x[0] >> 32)] ^ T[5][(byte)(x[0] >> 40)] ^ T[6][(byte)(x[3] >> 48)] ^ T[7][(byte)(x[3] >> 56)];
389
9
    y[3] = k[3] ^ T[0][(byte)x[3]] ^ T[1][(byte)(x[3] >> 8)] ^ T[2][(byte)(x[2] >> 16)] ^ T[3][(byte)(x[2] >> 24)] ^
390
9
        T[4][(byte)(x[1] >> 32)] ^ T[5][(byte)(x[1] >> 40)] ^ T[6][(byte)(x[0] >> 48)] ^ T[7][(byte)(x[0] >> 56)];
391
9
}
392
393
inline void G512(const word64 x[8], word64 y[8], const word64 k[8])
394
33
{
395
33
    y[0] = k[0] ^ T[0][(byte)x[0]] ^ T[1][(byte)(x[7] >> 8)] ^ T[2][(byte)(x[6] >> 16)] ^ T[3][(byte)(x[5] >> 24)] ^
396
33
        T[4][(byte)(x[4] >> 32)] ^ T[5][(byte)(x[3] >> 40)] ^ T[6][(byte)(x[2] >> 48)] ^ T[7][(byte)(x[1] >> 56)];
397
33
    y[1] = k[1] ^ T[0][(byte)x[1]] ^ T[1][(byte)(x[0] >> 8)] ^ T[2][(byte)(x[7] >> 16)] ^ T[3][(byte)(x[6] >> 24)] ^
398
33
        T[4][(byte)(x[5] >> 32)] ^ T[5][(byte)(x[4] >> 40)] ^ T[6][(byte)(x[3] >> 48)] ^ T[7][(byte)(x[2] >> 56)];
399
33
    y[2] = k[2] ^ T[0][(byte)x[2]] ^ T[1][(byte)(x[1] >> 8)] ^ T[2][(byte)(x[0] >> 16)] ^ T[3][(byte)(x[7] >> 24)] ^
400
33
        T[4][(byte)(x[6] >> 32)] ^ T[5][(byte)(x[5] >> 40)] ^ T[6][(byte)(x[4] >> 48)] ^ T[7][(byte)(x[3] >> 56)];
401
33
    y[3] = k[3] ^ T[0][(byte)x[3]] ^ T[1][(byte)(x[2] >> 8)] ^ T[2][(byte)(x[1] >> 16)] ^ T[3][(byte)(x[0] >> 24)] ^
402
33
        T[4][(byte)(x[7] >> 32)] ^ T[5][(byte)(x[6] >> 40)] ^ T[6][(byte)(x[5] >> 48)] ^ T[7][(byte)(x[4] >> 56)];
403
33
    y[4] = k[4] ^ T[0][(byte)x[4]] ^ T[1][(byte)(x[3] >> 8)] ^ T[2][(byte)(x[2] >> 16)] ^ T[3][(byte)(x[1] >> 24)] ^
404
33
        T[4][(byte)(x[0] >> 32)] ^ T[5][(byte)(x[7] >> 40)] ^ T[6][(byte)(x[6] >> 48)] ^ T[7][(byte)(x[5] >> 56)];
405
33
    y[5] = k[5] ^ T[0][(byte)x[5]] ^ T[1][(byte)(x[4] >> 8)] ^ T[2][(byte)(x[3] >> 16)] ^ T[3][(byte)(x[2] >> 24)] ^
406
33
        T[4][(byte)(x[1] >> 32)] ^ T[5][(byte)(x[0] >> 40)] ^ T[6][(byte)(x[7] >> 48)] ^ T[7][(byte)(x[6] >> 56)];
407
33
    y[6] = k[6] ^ T[0][(byte)x[6]] ^ T[1][(byte)(x[5] >> 8)] ^ T[2][(byte)(x[4] >> 16)] ^ T[3][(byte)(x[3] >> 24)] ^
408
33
        T[4][(byte)(x[2] >> 32)] ^ T[5][(byte)(x[1] >> 40)] ^ T[6][(byte)(x[0] >> 48)] ^ T[7][(byte)(x[7] >> 56)];
409
33
    y[7] = k[7] ^ T[0][(byte)x[7]] ^ T[1][(byte)(x[6] >> 8)] ^ T[2][(byte)(x[5] >> 16)] ^ T[3][(byte)(x[4] >> 24)] ^
410
33
        T[4][(byte)(x[3] >> 32)] ^ T[5][(byte)(x[2] >> 40)] ^ T[6][(byte)(x[1] >> 48)] ^ T[7][(byte)(x[0] >> 56)];
411
33
}
412
413
ANONYMOUS_NAMESPACE_END
414
415
NAMESPACE_BEGIN(CryptoPP)
416
417
// *********************** UncheckedSetKey specializations *********************** //
418
419
void Kalyna128::Base::SetKey_22(const word64 key[2])
420
3
{
421
3
    word64 *ks = m_wspace+0, *ksc = m_wspace+2, *t1 = m_wspace+4;
422
3
    word64 *t2 = m_wspace+6, *k = m_wspace+8, *kswapped = m_wspace+10;
423
424
3
    std::memset(t1, 0, 2*8);
425
3
    t1[0] = (128 + 128 + 64) / 64;
426
427
3
    AddKey<2>(t1, t2, key);
428
3
    G128(t2, t1, key);
429
3
    GL128(t1, t2, key);
430
3
    G0128(t2, ks);
431
432
3
    word64 constant = W64LIT(0x0001000100010001);
433
434
    // round 0
435
3
    std::memcpy(k, key, 16);
436
3
    kswapped[1] = k[0];
437
3
    kswapped[0] = k[1];
438
439
3
    AddConstant<2>(ks, ksc, constant);
440
3
    AddKey<2>(k, t2, ksc);
441
3
    G128(t2, t1, ksc);
442
3
    GL128(t1, &m_rkeys[0], ksc);
443
3
    MakeOddKey<2>(&m_rkeys[0], &m_rkeys[2]);
444
445
    // round 2
446
3
    constant <<= 1;
447
3
    AddConstant<2>(ks, ksc, constant);
448
3
    AddKey<2>(kswapped, t2, ksc);
449
3
    G128(t2, t1, ksc);
450
3
    GL128(t1, &m_rkeys[4], ksc);
451
3
    MakeOddKey<2>(&m_rkeys[4], &m_rkeys[6]);
452
453
    // round 4
454
3
    constant <<= 1;
455
3
    AddConstant<2>(ks, ksc, constant);
456
3
    AddKey<2>(k, t2, ksc);
457
3
    G128(t2, t1, ksc);
458
3
    GL128(t1, &m_rkeys[8], ksc);
459
3
    MakeOddKey<2>(&m_rkeys[8], &m_rkeys[10]);
460
461
    // round 6
462
3
    constant <<= 1;
463
3
    AddConstant<2>(ks, ksc, constant);
464
3
    AddKey<2>(kswapped, t2, ksc);
465
3
    G128(t2, t1, ksc);
466
3
    GL128(t1, &m_rkeys[12], ksc);
467
3
    MakeOddKey<2>(&m_rkeys[12], &m_rkeys[14]);
468
469
    // round 8
470
3
    constant <<= 1;
471
3
    AddConstant<2>(ks, ksc, constant);
472
3
    AddKey<2>(k, t2, ksc);
473
3
    G128(t2, t1, ksc);
474
3
    GL128(t1, &m_rkeys[16], ksc);
475
3
    MakeOddKey<2>(&m_rkeys[16], &m_rkeys[18]);
476
477
    // round 10
478
3
    constant <<= 1;
479
3
    AddConstant<2>(ks, ksc, constant);
480
3
    AddKey<2>(kswapped, t2, ksc);
481
3
    G128(t2, t1, ksc);
482
3
    GL128(t1, &m_rkeys[20], ksc);
483
484
3
    if (!IsForwardTransformation())
485
1
    {
486
1
        IMC128(&m_rkeys[18]); IMC128(&m_rkeys[16]);
487
1
        IMC128(&m_rkeys[14]); IMC128(&m_rkeys[12]);
488
1
        IMC128(&m_rkeys[10]); IMC128(&m_rkeys[ 8]);
489
1
        IMC128(&m_rkeys[ 6]); IMC128(&m_rkeys[ 4]);
490
1
        IMC128(&m_rkeys[ 2]);
491
1
    }
492
3
}
493
494
void Kalyna128::Base::SetKey_24(const word64 key[4])
495
0
{
496
0
    word64 *ks = m_wspace+0, *ksc = m_wspace+2, *t1 = m_wspace+4, *t2 = m_wspace+6;
497
0
    word64 *k = m_wspace+8, *ka = m_wspace+12, *ko = m_wspace+14;
498
499
0
    std::memset(t1, 0, 2*8);
500
0
    t1[0] = (128 + 256 + 64) / 64;
501
0
    std::memcpy(ka, key, 16);
502
0
    std::memcpy(ko, key + 2, 16);
503
504
0
    AddKey<2>(t1, t2, ka);
505
0
    G128(t2, t1, ko);
506
0
    GL128(t1, t2, ka);
507
0
    G0128(t2, ks);
508
509
0
    word64 constant = W64LIT(0x0001000100010001);
510
511
    // round 0
512
0
    std::memcpy(k, key, 256 / 8);
513
0
    AddConstant<2>(ks, ksc, constant);
514
0
    AddKey<2>(k, t2, ksc);
515
0
    G128(t2, t1, ksc);
516
0
    GL128(t1, &m_rkeys[0], ksc);
517
0
    MakeOddKey<2>(&m_rkeys[0], &m_rkeys[2]);
518
519
    // round 2
520
0
    constant <<= 1;
521
0
    AddConstant<2>(ks, ksc, constant);
522
0
    AddKey<2>(k + 2, t2, ksc);
523
0
    G128(t2, t1, ksc);
524
0
    GL128(t1, &m_rkeys[4], ksc);
525
0
    MakeOddKey<2>(&m_rkeys[4], &m_rkeys[6]);
526
527
    // round 4
528
0
    SwapBlocks<4>(k);
529
0
    constant <<= 1;
530
0
    AddConstant<2>(ks, ksc, constant);
531
0
    AddKey<2>(k, t2, ksc);
532
0
    G128(t2, t1, ksc);
533
0
    GL128(t1, &m_rkeys[8], ksc);
534
0
    MakeOddKey<2>(&m_rkeys[8], &m_rkeys[10]);
535
536
    // round 6
537
0
    constant <<= 1;
538
0
    AddConstant<2>(ks, ksc, constant);
539
0
    AddKey<2>(k + 2, t2, ksc);
540
0
    G128(t2, t1, ksc);
541
0
    GL128(t1, &m_rkeys[12], ksc);
542
0
    MakeOddKey<2>(&m_rkeys[12], &m_rkeys[14]);
543
544
    // round 8
545
0
    SwapBlocks<4>(k);
546
0
    constant <<= 1;
547
0
    AddConstant<2>(ks, ksc, constant);
548
0
    AddKey<2>(k, t2, ksc);
549
0
    G128(t2, t1, ksc);
550
0
    GL128(t1, &m_rkeys[16], ksc);
551
0
    MakeOddKey<2>(&m_rkeys[16], &m_rkeys[18]);
552
553
    // round 10
554
0
    constant <<= 1;
555
0
    AddConstant<2>(ks, ksc, constant);
556
0
    AddKey<2>(k + 2, t2, ksc);
557
0
    G128(t2, t1, ksc);
558
0
    GL128(t1, &m_rkeys[20], ksc);
559
0
    MakeOddKey<2>(&m_rkeys[20], &m_rkeys[22]);
560
561
    // round 12
562
0
    SwapBlocks<4>(k);
563
0
    constant <<= 1;
564
0
    AddConstant<2>(ks, ksc, constant);
565
0
    AddKey<2>(k, t2, ksc);
566
0
    G128(t2, t1, ksc);
567
0
    GL128(t1, &m_rkeys[24], ksc);
568
0
    MakeOddKey<2>(&m_rkeys[24], &m_rkeys[26]);
569
570
    // round 14
571
0
    constant <<= 1;
572
0
    AddConstant<2>(ks, ksc, constant);
573
0
    AddKey<2>(k + 2, t2, ksc);
574
0
    G128(t2, t1, ksc);
575
0
    GL128(t1, &m_rkeys[28], ksc);
576
577
0
    if (!IsForwardTransformation())
578
0
    {
579
0
        IMC128(&m_rkeys[26]);
580
0
        IMC128(&m_rkeys[24]);
581
0
        IMC128(&m_rkeys[22]);
582
0
        IMC128(&m_rkeys[20]);
583
0
        IMC128(&m_rkeys[18]);
584
0
        IMC128(&m_rkeys[16]);
585
0
        IMC128(&m_rkeys[14]);
586
0
        IMC128(&m_rkeys[12]);
587
0
        IMC128(&m_rkeys[10]);
588
0
        IMC128(&m_rkeys[8]);
589
0
        IMC128(&m_rkeys[6]);
590
0
        IMC128(&m_rkeys[4]);
591
0
        IMC128(&m_rkeys[2]);
592
0
    }
593
0
}
594
595
void Kalyna256::Base::SetKey_44(const word64 key[4])
596
1
{
597
1
    word64 *ks = m_wspace+0, *ksc = m_wspace+4, *t1 = m_wspace+8;
598
1
    word64 *t2 = m_wspace+12, *k = m_wspace+16;
599
600
1
    std::memset(t1, 0, 32);
601
1
    t1[0] = (256 + 256 + 64) / 64;
602
603
1
    AddKey<4>(t1, t2, key);
604
1
    G256(t2, t1, key);
605
1
    GL256(t1, t2, key);
606
1
    G0256(t2, ks);
607
608
1
    word64 constant = W64LIT(0x0001000100010001);
609
610
    // round 0
611
1
    std::memcpy(k, key, 32);
612
1
    AddConstant<4>(ks, ksc, constant);
613
1
    AddKey<4>(k, t2, ksc);
614
1
    G256(t2, t1, ksc);
615
1
    GL256(t1, &m_rkeys[0], ksc);
616
1
    MakeOddKey<4>(&m_rkeys[0], &m_rkeys[4]);
617
618
    // round 2
619
1
    SwapBlocks<4>(k);
620
1
    constant <<= 1;
621
1
    AddConstant<4>(ks, ksc, constant);
622
1
    AddKey<4>(k, t2, ksc);
623
1
    G256(t2, t1, ksc);
624
1
    GL256(t1, &m_rkeys[8], ksc);
625
1
    MakeOddKey<4>(&m_rkeys[8], &m_rkeys[12]);
626
627
    // round 4
628
1
    SwapBlocks<4>(k);
629
1
    constant <<= 1;
630
1
    AddConstant<4>(ks, ksc, constant);
631
1
    AddKey<4>(k, t2, ksc);
632
1
    G256(t2, t1, ksc);
633
1
    GL256(t1, &m_rkeys[16], ksc);
634
1
    MakeOddKey<4>(&m_rkeys[16], &m_rkeys[20]);
635
636
    // round 6
637
1
    SwapBlocks<4>(k);
638
1
    constant <<= 1;
639
1
    AddConstant<4>(ks, ksc, constant);
640
1
    AddKey<4>(k, t2, ksc);
641
1
    G256(t2, t1, ksc);
642
1
    GL256(t1, &m_rkeys[24], ksc);
643
1
    MakeOddKey<4>(&m_rkeys[24], &m_rkeys[28]);
644
645
    // round 8
646
1
    SwapBlocks<4>(k);
647
1
    constant <<= 1;
648
1
    AddConstant<4>(ks, ksc, constant);
649
1
    AddKey<4>(k, t2, ksc);
650
1
    G256(t2, t1, ksc);
651
1
    GL256(t1, &m_rkeys[32], ksc);
652
1
    MakeOddKey<4>(&m_rkeys[32], &m_rkeys[36]);
653
654
    // round 10
655
1
    SwapBlocks<4>(k);
656
1
    constant <<= 1;
657
1
    AddConstant<4>(ks, ksc, constant);
658
1
    AddKey<4>(k, t2, ksc);
659
1
    G256(t2, t1, ksc);
660
1
    GL256(t1, &m_rkeys[40], ksc);
661
1
    MakeOddKey<4>(&m_rkeys[40], &m_rkeys[44]);
662
663
    // round 12
664
1
    SwapBlocks<4>(k);
665
1
    constant <<= 1;
666
1
    AddConstant<4>(ks, ksc, constant);
667
1
    AddKey<4>(k, t2, ksc);
668
1
    G256(t2, t1, ksc);
669
1
    GL256(t1, &m_rkeys[48], ksc);
670
1
    MakeOddKey<4>(&m_rkeys[48], &m_rkeys[52]);
671
672
    // round 14
673
1
    SwapBlocks<4>(k);
674
1
    constant <<= 1;
675
1
    AddConstant<4>(ks, ksc, constant);
676
1
    AddKey<4>(k, t2, ksc);
677
1
    G256(t2, t1, ksc);
678
1
    GL256(t1, &m_rkeys[56], ksc);
679
680
1
    if (!IsForwardTransformation())
681
0
    {
682
0
        IMC256(&m_rkeys[52]);
683
0
        IMC256(&m_rkeys[48]);
684
0
        IMC256(&m_rkeys[44]);
685
0
        IMC256(&m_rkeys[40]);
686
0
        IMC256(&m_rkeys[36]);
687
0
        IMC256(&m_rkeys[32]);
688
0
        IMC256(&m_rkeys[28]);
689
0
        IMC256(&m_rkeys[24]);
690
0
        IMC256(&m_rkeys[20]);
691
0
        IMC256(&m_rkeys[16]);
692
0
        IMC256(&m_rkeys[12]);
693
0
        IMC256(&m_rkeys[8]);
694
0
        IMC256(&m_rkeys[4]);
695
0
    }
696
1
}
697
698
void Kalyna256::Base::SetKey_48(const word64 key[8])
699
0
{
700
0
    word64 *ks = m_wspace+0, *ksc = m_wspace+4, *t1 = m_wspace+8, *t2 = m_wspace+12;
701
0
    word64 *k = m_wspace+16, *ka = m_wspace+24, *ko = m_wspace+28;
702
703
0
    std::memset(t1, 0, 4*8);
704
0
    t1[0] = (512 + 256 + 64) / 64;
705
0
    std::memcpy(ka, key, 32);
706
0
    std::memcpy(ko, key+4, 32);
707
708
0
    AddKey<4>(t1, t2, ka);
709
0
    G256(t2, t1, ko);
710
0
    GL256(t1, t2, ka);
711
0
    G0256(t2, ks);
712
713
0
    word64 constant = W64LIT(0x0001000100010001);
714
715
    // round 0
716
0
    std::memcpy(k, key, 512 / 8);
717
0
    AddConstant<4>(ks, ksc, constant);
718
0
    AddKey<4>(k, t2, ksc);
719
0
    G256(t2, t1, ksc);
720
0
    GL256(t1, &m_rkeys[0], ksc);
721
0
    MakeOddKey<4>(&m_rkeys[0], &m_rkeys[4]);
722
723
    // round 2
724
0
    constant <<= 1;
725
0
    AddConstant<4>(ks, ksc, constant);
726
0
    AddKey<4>(k+4, t2, ksc);
727
0
    G256(t2, t1, ksc);
728
0
    GL256(t1, &m_rkeys[8], ksc);
729
0
    MakeOddKey<4>(&m_rkeys[8], &m_rkeys[12]);
730
731
    // round 4
732
0
    SwapBlocks<8>(k);
733
0
    constant <<= 1;
734
0
    AddConstant<4>(ks, ksc, constant);
735
0
    AddKey<4>(k, t2, ksc);
736
0
    G256(t2, t1, ksc);
737
0
    GL256(t1, &m_rkeys[16], ksc);
738
0
    MakeOddKey<4>(&m_rkeys[16], &m_rkeys[20]);
739
740
    // round 6
741
0
    constant <<= 1;
742
0
    AddConstant<4>(ks, ksc, constant);
743
0
    AddKey<4>(k+4, t2, ksc);
744
0
    G256(t2, t1, ksc);
745
0
    GL256(t1, &m_rkeys[24], ksc);
746
0
    MakeOddKey<4>(&m_rkeys[24], &m_rkeys[28]);
747
748
    // round 8
749
0
    SwapBlocks<8>(k);
750
0
    constant <<= 1;
751
0
    AddConstant<4>(ks, ksc, constant);
752
0
    AddKey<4>(k, t2, ksc);
753
0
    G256(t2, t1, ksc);
754
0
    GL256(t1, &m_rkeys[32], ksc);
755
0
    MakeOddKey<4>(&m_rkeys[32], &m_rkeys[36]);
756
757
    // round 10
758
0
    constant <<= 1;
759
0
    AddConstant<4>(ks, ksc, constant);
760
0
    AddKey<4>(k+4, t2, ksc);
761
0
    G256(t2, t1, ksc);
762
0
    GL256(t1, &m_rkeys[40], ksc);
763
0
    MakeOddKey<4>(&m_rkeys[40], &m_rkeys[44]);
764
765
    // round 12
766
0
    SwapBlocks<8>(k);
767
0
    constant <<= 1;
768
0
    AddConstant<4>(ks, ksc, constant);
769
0
    AddKey<4>(k, t2, ksc);
770
0
    G256(t2, t1, ksc);
771
0
    GL256(t1, &m_rkeys[48], ksc);
772
0
    MakeOddKey<4>(&m_rkeys[48], &m_rkeys[52]);
773
774
    // round 14
775
0
    constant <<= 1;
776
0
    AddConstant<4>(ks, ksc, constant);
777
0
    AddKey<4>(k+4, t2, ksc);
778
0
    G256(t2, t1, ksc);
779
0
    GL256(t1, &m_rkeys[56], ksc);
780
0
    MakeOddKey<4>(&m_rkeys[56], &m_rkeys[60]);
781
782
    // round 16
783
0
    SwapBlocks<8>(k);
784
0
    constant <<= 1;
785
0
    AddConstant<4>(ks, ksc, constant);
786
0
    AddKey<4>(k, t2, ksc);
787
0
    G256(t2, t1, ksc);
788
0
    GL256(t1, &m_rkeys[64], ksc);
789
0
    MakeOddKey<4>(&m_rkeys[64], &m_rkeys[68]);
790
791
    // round 18
792
0
    constant <<= 1;
793
0
    AddConstant<4>(ks, ksc, constant);
794
0
    AddKey<4>(k+4, t2, ksc);
795
0
    G256(t2, t1, ksc);
796
0
    GL256(t1, &m_rkeys[72], ksc);
797
798
0
    if (!IsForwardTransformation())
799
0
    {
800
0
        IMC256(&m_rkeys[68]);
801
0
        IMC256(&m_rkeys[64]);
802
0
        IMC256(&m_rkeys[60]);
803
0
        IMC256(&m_rkeys[56]);
804
0
        IMC256(&m_rkeys[52]);
805
0
        IMC256(&m_rkeys[48]);
806
0
        IMC256(&m_rkeys[44]);
807
0
        IMC256(&m_rkeys[40]);
808
0
        IMC256(&m_rkeys[36]);
809
0
        IMC256(&m_rkeys[32]);
810
0
        IMC256(&m_rkeys[28]);
811
0
        IMC256(&m_rkeys[24]);
812
0
        IMC256(&m_rkeys[20]);
813
0
        IMC256(&m_rkeys[16]);
814
0
        IMC256(&m_rkeys[12]);
815
0
        IMC256(&m_rkeys[8]);
816
0
        IMC256(&m_rkeys[4]);
817
0
    }
818
0
}
819
820
void Kalyna512::Base::SetKey_88(const word64 key[8])
821
3
{
822
3
    word64 *ks = m_wspace+0, *ksc = m_wspace+8, *t1 = m_wspace+16;
823
3
    word64 *t2 = m_wspace+24, *k = m_wspace+32;
824
825
3
    std::memset(t1, 0, 8*8);
826
3
    t1[0] = (512 + 512 + 64) / 64;
827
828
3
    AddKey<8>(t1, t2, key);
829
3
    G512(t2, t1, key);
830
3
    GL512(t1, t2, key);
831
3
    G0512(t2, ks);
832
833
3
    word64 constant = W64LIT(0x0001000100010001);
834
835
    // round 0
836
3
    std::memcpy(k, key, 512 / 8);
837
3
    AddConstant<8>(ks, ksc, constant);
838
3
    AddKey<8>(k, t2, ksc);
839
3
    G512(t2, t1, ksc);
840
3
    GL512(t1, &m_rkeys[0], ksc);
841
3
    MakeOddKey<8>(&m_rkeys[0], &m_rkeys[8]);
842
843
    // round 2
844
3
    SwapBlocks<8>(k);
845
3
    constant <<= 1;
846
3
    AddConstant<8>(ks, ksc, constant);
847
3
    AddKey<8>(k, t2, ksc);
848
3
    G512(t2, t1, ksc);
849
3
    GL512(t1, &m_rkeys[16], ksc);
850
3
    MakeOddKey<8>(&m_rkeys[16], &m_rkeys[24]);
851
852
    // round 4
853
3
    SwapBlocks<8>(k);
854
3
    constant <<= 1;
855
3
    AddConstant<8>(ks, ksc, constant);
856
3
    AddKey<8>(k, t2, ksc);
857
3
    G512(t2, t1, ksc);
858
3
    GL512(t1, &m_rkeys[32], ksc);
859
3
    MakeOddKey<8>(&m_rkeys[32], &m_rkeys[40]);
860
861
    // round 6
862
3
    SwapBlocks<8>(k);
863
3
    constant <<= 1;
864
3
    AddConstant<8>(ks, ksc, constant);
865
3
    AddKey<8>(k, t2, ksc);
866
3
    G512(t2, t1, ksc);
867
3
    GL512(t1, &m_rkeys[48], ksc);
868
3
    MakeOddKey<8>(&m_rkeys[48], &m_rkeys[56]);
869
870
    // round 8
871
3
    SwapBlocks<8>(k);
872
3
    constant <<= 1;
873
3
    AddConstant<8>(ks, ksc, constant);
874
3
    AddKey<8>(k, t2, ksc);
875
3
    G512(t2, t1, ksc);
876
3
    GL512(t1, &m_rkeys[64], ksc);
877
3
    MakeOddKey<8>(&m_rkeys[64], &m_rkeys[72]);
878
879
    // round 10
880
3
    SwapBlocks<8>(k);
881
3
    constant <<= 1;
882
3
    AddConstant<8>(ks, ksc, constant);
883
3
    AddKey<8>(k, t2, ksc);
884
3
    G512(t2, t1, ksc);
885
3
    GL512(t1, &m_rkeys[80], ksc);
886
3
    MakeOddKey<8>(&m_rkeys[80], &m_rkeys[88]);
887
888
    // round 12
889
3
    SwapBlocks<8>(k);
890
3
    constant <<= 1;
891
3
    AddConstant<8>(ks, ksc, constant);
892
3
    AddKey<8>(k, t2, ksc);
893
3
    G512(t2, t1, ksc);
894
3
    GL512(t1, &m_rkeys[96], ksc);
895
3
    MakeOddKey<8>(&m_rkeys[96], &m_rkeys[104]);
896
897
    // round 14
898
3
    SwapBlocks<8>(k);
899
3
    constant <<= 1;
900
3
    AddConstant<8>(ks, ksc, constant);
901
3
    AddKey<8>(k, t2, ksc);
902
3
    G512(t2, t1, ksc);
903
3
    GL512(t1, &m_rkeys[112], ksc);
904
3
    MakeOddKey<8>(&m_rkeys[112], &m_rkeys[120]);
905
906
    // round 16
907
3
    SwapBlocks<8>(k);
908
3
    constant <<= 1;
909
3
    AddConstant<8>(ks, ksc, constant);
910
3
    AddKey<8>(k, t2, ksc);
911
3
    G512(t2, t1, ksc);
912
3
    GL512(t1, &m_rkeys[128], ksc);
913
3
    MakeOddKey<8>(&m_rkeys[128], &m_rkeys[136]);
914
915
    // round 18
916
3
    SwapBlocks<8>(k);
917
3
    constant <<= 1;
918
3
    AddConstant<8>(ks, ksc, constant);
919
3
    AddKey<8>(k, t2, ksc);
920
3
    G512(t2, t1, ksc);
921
3
    GL512(t1, &m_rkeys[144], ksc);
922
923
3
    if (!IsForwardTransformation())
924
2
    {
925
2
        IMC512(&m_rkeys[136]); IMC512(&m_rkeys[128]); IMC512(&m_rkeys[120]); IMC512(&m_rkeys[112]);
926
2
        IMC512(&m_rkeys[104]); IMC512(&m_rkeys[ 96]); IMC512(&m_rkeys[ 88]); IMC512(&m_rkeys[ 80]);
927
2
        IMC512(&m_rkeys[ 72]); IMC512(&m_rkeys[ 64]); IMC512(&m_rkeys[ 56]); IMC512(&m_rkeys[ 48]);
928
2
        IMC512(&m_rkeys[ 40]); IMC512(&m_rkeys[ 32]); IMC512(&m_rkeys[ 24]); IMC512(&m_rkeys[ 16]);
929
2
        IMC512(&m_rkeys[  8]);
930
2
    }
931
3
}
932
933
// *********************** ProcessAndXorBlock specializations *********************** //
934
935
void Kalyna128::Base::ProcessBlock_22(const byte* inBlock, const byte* xorBlock, byte* outBlock) const
936
1
{
937
1
    word64 *t1 = m_wspace+0, *t2 = m_wspace+2, *msg = m_wspace+4;
938
939
    // Reverse bytes on BigEndian; Align pointer on LittleEndian
940
1
    typedef GetBlock<word64, LittleEndian, false> InBlock;
941
1
    InBlock iblk(inBlock);
942
1
    iblk(msg[0])(msg[1]);
943
944
1
    if (IsForwardTransformation())
945
0
    {
946
0
        AddKey<2>(msg, t1, m_rkeys);
947
0
        G128(t1, t2, &m_rkeys[2]);   // 1
948
0
        G128(t2, t1, &m_rkeys[4]);   // 2
949
0
        G128(t1, t2, &m_rkeys[6]);   // 3
950
0
        G128(t2, t1, &m_rkeys[8]);   // 4
951
0
        G128(t1, t2, &m_rkeys[10]);  // 5
952
0
        G128(t2, t1, &m_rkeys[12]);  // 6
953
0
        G128(t1, t2, &m_rkeys[14]);  // 7
954
0
        G128(t2, t1, &m_rkeys[16]);  // 8
955
0
        G128(t1, t2, &m_rkeys[18]);  // 9
956
0
        GL128(t2, t1, &m_rkeys[20]); // 10
957
0
    }
958
1
    else
959
1
    {
960
1
        SubKey<2>(msg, t1, &m_rkeys[20]);
961
1
        IMC128(t1);
962
1
        IG128(t1, t2, &m_rkeys[18]);
963
1
        IG128(t2, t1, &m_rkeys[16]);
964
1
        IG128(t1, t2, &m_rkeys[14]);
965
1
        IG128(t2, t1, &m_rkeys[12]);
966
1
        IG128(t1, t2, &m_rkeys[10]);
967
1
        IG128(t2, t1, &m_rkeys[8]);
968
1
        IG128(t1, t2, &m_rkeys[6]);
969
1
        IG128(t2, t1, &m_rkeys[4]);
970
1
        IG128(t1, t2, &m_rkeys[2]);
971
1
        IGL128(t2, t1, &m_rkeys[0]);
972
1
    }
973
974
    // Reverse bytes on BigEndian; Align pointer on LittleEndian
975
1
    typedef PutBlock<word64, LittleEndian, false> OutBlock;
976
1
    OutBlock oblk(xorBlock, outBlock);
977
1
    oblk(t1[0])(t1[1]);
978
1
}
979
980
void Kalyna128::Base::ProcessBlock_24(const byte *inBlock, const byte *xorBlock, byte *outBlock) const
981
0
{
982
0
    word64 *t1 = m_wspace+0, *t2 = m_wspace+2, *msg = m_wspace+4;
983
984
    // Reverse bytes on BigEndian; Align pointer on LittleEndian
985
0
    typedef GetBlock<word64, LittleEndian, false> InBlock;
986
0
    InBlock iblk(inBlock);
987
0
    iblk(msg[0])(msg[1]);
988
989
0
    if (IsForwardTransformation())
990
0
    {
991
0
        AddKey<2>(msg, t1, m_rkeys);
992
0
        G128(t1, t2, &m_rkeys[ 2]); // 1
993
0
        G128(t2, t1, &m_rkeys[ 4]); // 2
994
0
        G128(t1, t2, &m_rkeys[ 6]); // 3
995
0
        G128(t2, t1, &m_rkeys[ 8]); // 4
996
0
        G128(t1, t2, &m_rkeys[10]); // 5
997
0
        G128(t2, t1, &m_rkeys[12]); // 6
998
0
        G128(t1, t2, &m_rkeys[14]); // 7
999
0
        G128(t2, t1, &m_rkeys[16]); // 8
1000
0
        G128(t1, t2, &m_rkeys[18]); // 9
1001
0
        G128(t2, t1, &m_rkeys[20]); // 10
1002
0
        G128(t1, t2, &m_rkeys[22]); // 11
1003
0
        G128(t2, t1, &m_rkeys[24]); // 12
1004
0
        G128(t1, t2, &m_rkeys[26]); // 13
1005
0
        GL128(t2, t1, &m_rkeys[28]); // 14
1006
0
    }
1007
0
    else
1008
0
    {
1009
0
        SubKey<2>(msg, t1, &m_rkeys[28]);
1010
0
        IMC128(t1);
1011
0
        IG128(t1, t2, &m_rkeys[26]);
1012
0
        IG128(t2, t1, &m_rkeys[24]);
1013
0
        IG128(t1, t2, &m_rkeys[22]);
1014
0
        IG128(t2, t1, &m_rkeys[20]);
1015
0
        IG128(t1, t2, &m_rkeys[18]);
1016
0
        IG128(t2, t1, &m_rkeys[16]);
1017
0
        IG128(t1, t2, &m_rkeys[14]);
1018
0
        IG128(t2, t1, &m_rkeys[12]);
1019
0
        IG128(t1, t2, &m_rkeys[10]);
1020
0
        IG128(t2, t1, &m_rkeys[8]);
1021
0
        IG128(t1, t2, &m_rkeys[6]);
1022
0
        IG128(t2, t1, &m_rkeys[4]);
1023
0
        IG128(t1, t2, &m_rkeys[2]);
1024
0
        IGL128(t2, t1, &m_rkeys[0]);
1025
0
    }
1026
1027
    // Reverse bytes on BigEndian; Align pointer on LittleEndian
1028
0
    typedef PutBlock<word64, LittleEndian, false> OutBlock;
1029
0
    OutBlock oblk(xorBlock, outBlock);
1030
0
    oblk(t1[0])(t1[1]);
1031
0
}
1032
1033
void Kalyna256::Base::ProcessBlock_44(const byte *inBlock, const byte *xorBlock, byte *outBlock) const
1034
0
{
1035
0
    word64 *t1 = m_wspace+0, *t2 = m_wspace+4, *msg = m_wspace+8;
1036
1037
    // Reverse bytes on BigEndian; Align pointer on LittleEndian
1038
0
    typedef GetBlock<word64, LittleEndian, false> InBlock;
1039
0
    InBlock iblk(inBlock);
1040
0
    iblk(msg[0])(msg[1])(msg[2])(msg[3]);
1041
1042
0
    if (IsForwardTransformation())
1043
0
    {
1044
0
        AddKey<4>(msg, t1, m_rkeys);
1045
0
        G256(t1, t2, &m_rkeys[4]); // 1
1046
0
        G256(t2, t1, &m_rkeys[8]); // 2
1047
0
        G256(t1, t2, &m_rkeys[12]); // 3
1048
0
        G256(t2, t1, &m_rkeys[16]); // 4
1049
0
        G256(t1, t2, &m_rkeys[20]); // 5
1050
0
        G256(t2, t1, &m_rkeys[24]); // 6
1051
0
        G256(t1, t2, &m_rkeys[28]); // 7
1052
0
        G256(t2, t1, &m_rkeys[32]); // 8
1053
0
        G256(t1, t2, &m_rkeys[36]); // 9
1054
0
        G256(t2, t1, &m_rkeys[40]); // 10
1055
0
        G256(t1, t2, &m_rkeys[44]); // 11
1056
0
        G256(t2, t1, &m_rkeys[48]); // 12
1057
0
        G256(t1, t2, &m_rkeys[52]); // 13
1058
0
        GL256(t2, t1, &m_rkeys[56]); // 14
1059
0
    }
1060
0
    else
1061
0
    {
1062
0
        SubKey<4>(msg, t1, &m_rkeys[56]);
1063
0
        IMC256(t1);
1064
0
        IG256(t1, t2, &m_rkeys[52]);
1065
0
        IG256(t2, t1, &m_rkeys[48]);
1066
0
        IG256(t1, t2, &m_rkeys[44]);
1067
0
        IG256(t2, t1, &m_rkeys[40]);
1068
0
        IG256(t1, t2, &m_rkeys[36]);
1069
0
        IG256(t2, t1, &m_rkeys[32]);
1070
0
        IG256(t1, t2, &m_rkeys[28]);
1071
0
        IG256(t2, t1, &m_rkeys[24]);
1072
0
        IG256(t1, t2, &m_rkeys[20]);
1073
0
        IG256(t2, t1, &m_rkeys[16]);
1074
0
        IG256(t1, t2, &m_rkeys[12]);
1075
0
        IG256(t2, t1, &m_rkeys[8]);
1076
0
        IG256(t1, t2, &m_rkeys[4]);
1077
0
        IGL256(t2, t1, &m_rkeys[0]);
1078
0
    }
1079
1080
    // Reverse bytes on BigEndian; Align pointer on LittleEndian
1081
0
    typedef PutBlock<word64, LittleEndian, false> OutBlock;
1082
0
    OutBlock oblk(xorBlock, outBlock);
1083
0
    oblk(t1[0])(t1[1])(t1[2])(t1[3]);
1084
0
}
1085
1086
void Kalyna256::Base::ProcessBlock_48(const byte *inBlock, const byte *xorBlock, byte *outBlock) const
1087
0
{
1088
0
    word64 *t1 = m_wspace+0, *t2 = m_wspace+4, *msg = m_wspace+8;
1089
1090
    // Reverse bytes on BigEndian; Align pointer on LittleEndian
1091
0
    typedef GetBlock<word64, LittleEndian, false> InBlock;
1092
0
    InBlock iblk(inBlock);
1093
0
    iblk(msg[0])(msg[1])(msg[2])(msg[3]);
1094
1095
0
    if (IsForwardTransformation())
1096
0
    {
1097
0
        AddKey<4>(msg, t1, m_rkeys);
1098
0
        G256(t1, t2, &m_rkeys[4]); // 1
1099
0
        G256(t2, t1, &m_rkeys[8]); // 2
1100
0
        G256(t1, t2, &m_rkeys[12]); // 3
1101
0
        G256(t2, t1, &m_rkeys[16]); // 4
1102
0
        G256(t1, t2, &m_rkeys[20]); // 5
1103
0
        G256(t2, t1, &m_rkeys[24]); // 6
1104
0
        G256(t1, t2, &m_rkeys[28]); // 7
1105
0
        G256(t2, t1, &m_rkeys[32]); // 8
1106
0
        G256(t1, t2, &m_rkeys[36]); // 9
1107
0
        G256(t2, t1, &m_rkeys[40]); // 10
1108
0
        G256(t1, t2, &m_rkeys[44]); // 11
1109
0
        G256(t2, t1, &m_rkeys[48]); // 12
1110
0
        G256(t1, t2, &m_rkeys[52]); // 13
1111
0
        G256(t2, t1, &m_rkeys[56]); // 14
1112
0
        G256(t1, t2, &m_rkeys[60]); // 15
1113
0
        G256(t2, t1, &m_rkeys[64]); // 16
1114
0
        G256(t1, t2, &m_rkeys[68]); // 17
1115
0
        GL256(t2, t1, &m_rkeys[72]); // 18
1116
0
    }
1117
0
    else
1118
0
    {
1119
0
        SubKey<4>(msg, t1, &m_rkeys[72]);
1120
0
        IMC256(t1);
1121
0
        IG256(t1, t2, &m_rkeys[68]);
1122
0
        IG256(t2, t1, &m_rkeys[64]);
1123
0
        IG256(t1, t2, &m_rkeys[60]);
1124
0
        IG256(t2, t1, &m_rkeys[56]);
1125
0
        IG256(t1, t2, &m_rkeys[52]);
1126
0
        IG256(t2, t1, &m_rkeys[48]);
1127
0
        IG256(t1, t2, &m_rkeys[44]);
1128
0
        IG256(t2, t1, &m_rkeys[40]);
1129
0
        IG256(t1, t2, &m_rkeys[36]);
1130
0
        IG256(t2, t1, &m_rkeys[32]);
1131
0
        IG256(t1, t2, &m_rkeys[28]);
1132
0
        IG256(t2, t1, &m_rkeys[24]);
1133
0
        IG256(t1, t2, &m_rkeys[20]);
1134
0
        IG256(t2, t1, &m_rkeys[16]);
1135
0
        IG256(t1, t2, &m_rkeys[12]);
1136
0
        IG256(t2, t1, &m_rkeys[8]);
1137
0
        IG256(t1, t2, &m_rkeys[4]);
1138
0
        IGL256(t2, t1, &m_rkeys[0]);
1139
0
    }
1140
1141
    // Reverse bytes on BigEndian; Align pointer on LittleEndian
1142
0
    typedef PutBlock<word64, LittleEndian, false> OutBlock;
1143
0
    OutBlock oblk(xorBlock, outBlock);
1144
0
    oblk(t1[0])(t1[1])(t1[2])(t1[3]);
1145
0
}
1146
1147
void Kalyna512::Base::ProcessBlock_88(const byte *inBlock, const byte *xorBlock, byte *outBlock) const
1148
13
{
1149
13
    word64 *t1 = m_wspace+0, *t2 = m_wspace+8, *msg = m_wspace+16;
1150
1151
    // Reverse bytes on BigEndian; Align pointer on LittleEndian
1152
13
    typedef GetBlock<word64, LittleEndian, false> InBlock;
1153
13
    InBlock iblk(inBlock);
1154
13
    iblk(msg[0])(msg[1])(msg[2])(msg[3])(msg[4])(msg[5])(msg[6])(msg[7]);
1155
1156
13
    if (IsForwardTransformation())
1157
0
    {
1158
0
        AddKey<8>(msg, t1, m_rkeys);
1159
0
        G512(t1, t2, &m_rkeys[8]); // 1
1160
0
        G512(t2, t1, &m_rkeys[16]); // 2
1161
0
        G512(t1, t2, &m_rkeys[24]); // 3
1162
0
        G512(t2, t1, &m_rkeys[32]); // 4
1163
0
        G512(t1, t2, &m_rkeys[40]); // 5
1164
0
        G512(t2, t1, &m_rkeys[48]); // 6
1165
0
        G512(t1, t2, &m_rkeys[56]); // 7
1166
0
        G512(t2, t1, &m_rkeys[64]); // 8
1167
0
        G512(t1, t2, &m_rkeys[72]); // 9
1168
0
        G512(t2, t1, &m_rkeys[80]); // 10
1169
0
        G512(t1, t2, &m_rkeys[88]); // 11
1170
0
        G512(t2, t1, &m_rkeys[96]); // 12
1171
0
        G512(t1, t2, &m_rkeys[104]); // 13
1172
0
        G512(t2, t1, &m_rkeys[112]); // 14
1173
0
        G512(t1, t2, &m_rkeys[120]); // 15
1174
0
        G512(t2, t1, &m_rkeys[128]); // 16
1175
0
        G512(t1, t2, &m_rkeys[136]); // 17
1176
0
        GL512(t2, t1, &m_rkeys[144]); // 18
1177
0
    }
1178
13
    else
1179
13
    {
1180
13
        SubKey<8>(msg, t1, &m_rkeys[144]);
1181
13
        IMC512(t1);
1182
13
        IG512(t1, t2, &m_rkeys[136]);
1183
13
        IG512(t2, t1, &m_rkeys[128]);
1184
13
        IG512(t1, t2, &m_rkeys[120]);
1185
13
        IG512(t2, t1, &m_rkeys[112]);
1186
13
        IG512(t1, t2, &m_rkeys[104]);
1187
13
        IG512(t2, t1, &m_rkeys[96]);
1188
13
        IG512(t1, t2, &m_rkeys[88]);
1189
13
        IG512(t2, t1, &m_rkeys[80]);
1190
13
        IG512(t1, t2, &m_rkeys[72]);
1191
13
        IG512(t2, t1, &m_rkeys[64]);
1192
13
        IG512(t1, t2, &m_rkeys[56]);
1193
13
        IG512(t2, t1, &m_rkeys[48]);
1194
13
        IG512(t1, t2, &m_rkeys[40]);
1195
13
        IG512(t2, t1, &m_rkeys[32]);
1196
13
        IG512(t1, t2, &m_rkeys[24]);
1197
13
        IG512(t2, t1, &m_rkeys[16]);
1198
13
        IG512(t1, t2, &m_rkeys[8]);
1199
13
        IGL512(t2, t1, &m_rkeys[0]);
1200
13
    }
1201
1202
    // Reverse bytes on BigEndian; Align pointer on LittleEndian
1203
13
    typedef PutBlock<word64, LittleEndian, false> OutBlock;
1204
13
    OutBlock oblk(xorBlock, outBlock);
1205
13
    oblk(t1[0])(t1[1])(t1[2])(t1[3])(t1[4])(t1[5])(t1[6])(t1[7]);
1206
13
}
1207
1208
// *********************** Library routines *********************** //
1209
1210
void Kalyna128::Base::UncheckedSetKey(const byte *key, unsigned int keylen, const NameValuePairs &params)
1211
3
{
1212
3
    CRYPTOPP_UNUSED(params);
1213
3
    m_nb = static_cast<unsigned int>(16U / sizeof(word64));
1214
3
    m_nk = static_cast<unsigned int>(keylen / sizeof(word64));
1215
1216
3
    switch (keylen)
1217
3
    {
1218
3
    case 16:  // 128
1219
3
        m_kl = 16;
1220
3
        m_mkey.New(2);
1221
3
        m_rkeys.New(11*2);
1222
3
        m_wspace.New(2*6);
1223
1224
3
        GetUserKey(LITTLE_ENDIAN_ORDER, m_mkey.begin(), 2, key, 16);
1225
3
        SetKey_22(m_mkey.begin());
1226
3
        break;
1227
0
    case 32:  // 256
1228
0
        m_kl = 32;
1229
0
        m_mkey.New(4);
1230
0
        m_rkeys.New(15*2);
1231
0
        m_wspace.New(6*2+4);
1232
1233
0
        GetUserKey(LITTLE_ENDIAN_ORDER, m_mkey.begin(), 4, key, 32);
1234
0
        SetKey_24(m_mkey.begin());
1235
0
        break;
1236
0
    default:
1237
0
        CRYPTOPP_ASSERT(0);
1238
3
    }
1239
3
}
1240
1241
void Kalyna128::Base::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, byte *outBlock) const
1242
1
{
1243
    // Timing attack countermeasure. see comments in Rijndael for more details
1244
1
    const int cacheLineSize = GetCacheLineSize();
1245
1
    volatile word64 _u = 0;
1246
1
    word64 u = _u;
1247
1248
1
    const byte* p = reinterpret_cast<const byte*>(KalynaTab::S);
1249
5
    for (unsigned int i=0; i<256; i+=cacheLineSize)
1250
4
        u ^= *reinterpret_cast<const word64*>(p+i);
1251
1
    m_wspace[0] = u;
1252
1253
1
    switch ((m_nb << 8) | m_nk)
1254
1
    {
1255
1
    case (2 << 8) | 2:
1256
1
        ProcessBlock_22(inBlock, xorBlock, outBlock);
1257
1
        break;
1258
0
    case (2 << 8) | 4:
1259
0
        ProcessBlock_24(inBlock, xorBlock, outBlock);
1260
0
        break;
1261
0
    default:
1262
0
        CRYPTOPP_ASSERT(0);
1263
1
    }
1264
1
}
1265
1266
void Kalyna256::Base::UncheckedSetKey(const byte *key, unsigned int keylen, const NameValuePairs &params)
1267
1
{
1268
1
    CRYPTOPP_UNUSED(params);
1269
1
    m_nb = static_cast<unsigned int>(32U / sizeof(word64));
1270
1
    m_nk = static_cast<unsigned int>(keylen / sizeof(word64));
1271
1272
1
    switch (keylen)
1273
1
    {
1274
1
    case 32:  // 256
1275
1
        m_kl = 32;
1276
1
        m_mkey.New(4);
1277
1
        m_rkeys.New(15*4);
1278
1
        m_wspace.New(5*4);
1279
1280
1
        GetUserKey(LITTLE_ENDIAN_ORDER, m_mkey.begin(), 4, key, 32);
1281
1
        SetKey_44(m_mkey.begin());
1282
1
        break;
1283
0
    case 64:  // 512
1284
0
        m_kl = 64;
1285
0
        m_mkey.New(8);
1286
0
        m_rkeys.New(19*4);
1287
0
        m_wspace.New(6*4+8);
1288
1289
0
        GetUserKey(LITTLE_ENDIAN_ORDER, m_mkey.begin(), 8, key, 64);
1290
0
        SetKey_48(m_mkey.begin());
1291
0
        break;
1292
0
    default:
1293
0
        CRYPTOPP_ASSERT(0);
1294
1
    }
1295
1
}
1296
1297
void Kalyna256::Base::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, byte *outBlock) const
1298
0
{
1299
    // Timing attack countermeasure. see comments in Rijndael for more details
1300
0
    const int cacheLineSize = GetCacheLineSize();
1301
0
    volatile word64 _u = 0;
1302
0
    word64 u = _u;
1303
1304
0
    const byte* p = reinterpret_cast<const byte*>(KalynaTab::S);
1305
0
    for (unsigned int i=0; i<256; i+=cacheLineSize)
1306
0
        u ^= *reinterpret_cast<const word64*>(p+i);
1307
0
    m_wspace[0] = u;
1308
1309
0
    switch ((m_nb << 8) | m_nk)
1310
0
    {
1311
0
    case (4 << 8) | 4:
1312
0
        ProcessBlock_44(inBlock, xorBlock, outBlock);
1313
0
        break;
1314
0
    case (4 << 8) | 8:
1315
0
        ProcessBlock_48(inBlock, xorBlock, outBlock);
1316
0
        break;
1317
0
    default:
1318
0
        CRYPTOPP_ASSERT(0);
1319
0
    }
1320
0
}
1321
1322
void Kalyna512::Base::UncheckedSetKey(const byte *key, unsigned int keylen, const NameValuePairs &params)
1323
3
{
1324
3
    CRYPTOPP_UNUSED(params);
1325
3
    m_nb = static_cast<unsigned int>(64U / sizeof(word64));
1326
3
    m_nk = static_cast<unsigned int>(keylen / sizeof(word64));
1327
1328
3
    switch (keylen)
1329
3
    {
1330
3
    case 64:  // 512
1331
3
        m_kl = 64;
1332
3
        m_nb = static_cast<unsigned int>(64U / sizeof(word64));
1333
3
        m_nk = static_cast<unsigned int>(keylen / sizeof(word64));
1334
1335
3
        m_mkey.New(8);
1336
3
        m_rkeys.New(19*8);
1337
3
        m_wspace.New(5*8);
1338
1339
3
        GetUserKey(LITTLE_ENDIAN_ORDER, m_mkey.begin(), 8, key, 64);
1340
3
        SetKey_88(m_mkey.begin());
1341
3
        break;
1342
0
    default:
1343
0
        CRYPTOPP_ASSERT(0);
1344
3
    }
1345
3
}
1346
1347
void Kalyna512::Base::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, byte *outBlock) const
1348
13
{
1349
    // Timing attack countermeasure. see comments in Rijndael for more details
1350
13
    const int cacheLineSize = GetCacheLineSize();
1351
13
    volatile word64 _u = 0;
1352
13
    word64 u = _u;
1353
1354
13
    const byte* p = reinterpret_cast<const byte*>(KalynaTab::S);
1355
65
    for (unsigned int i=0; i<256; i+=cacheLineSize)
1356
52
        u ^= *reinterpret_cast<const word64*>(p+i);
1357
13
    m_wspace[0] = u;
1358
1359
13
    ProcessBlock_88(inBlock, xorBlock, outBlock);
1360
13
}
1361
1362
NAMESPACE_END