Coverage Report

Created: 2024-11-21 07:03

/src/openssl/crypto/whrlpool/wp_block.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright 2005-2021 The OpenSSL Project Authors. All Rights Reserved.
3
 *
4
 * Licensed under the Apache License 2.0 (the "License").  You may not use
5
 * this file except in compliance with the License.  You can obtain a copy
6
 * in the file LICENSE in the source distribution or at
7
 * https://www.openssl.org/source/license.html
8
 */
9
10
/**
11
 * The Whirlpool hashing function.
12
 *
13
 * See
14
 *      P.S.L.M. Barreto, V. Rijmen,
15
 *      ``The Whirlpool hashing function,''
16
 *      NESSIE submission, 2000 (tweaked version, 2001),
17
 *      <https://www.cosic.esat.kuleuven.ac.be/nessie/workshop/submissions/whirlpool.zip>
18
 *
19
 * Based on "@version 3.0 (2003.03.12)" by Paulo S.L.M. Barreto and
20
 * Vincent Rijmen. Lookup "reference implementations" on
21
 * <http://planeta.terra.com.br/informatica/paulobarreto/>
22
 *
23
 * =============================================================================
24
 *
25
 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
26
 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
27
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
29
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
32
 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
33
 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
34
 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
35
 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36
 *
37
 */
38
39
/*
40
 * Whirlpool low level APIs are deprecated for public use, but still ok for
41
 * internal use.
42
 */
43
#include "internal/deprecated.h"
44
45
#include "internal/cryptlib.h"
46
#include "wp_local.h"
47
#include <string.h>
48
49
typedef unsigned char u8;
50
#if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32)
51
typedef unsigned __int64 u64;
52
#elif defined(__arch64__)
53
typedef unsigned long u64;
54
#else
55
typedef unsigned long long u64;
56
#endif
57
58
0
#define ROUNDS  10
59
60
#define STRICT_ALIGNMENT
61
#if !defined(PEDANTIC) && (defined(__i386) || defined(__i386__) || \
62
                           defined(__x86_64) || defined(__x86_64__) || \
63
                           defined(_M_IX86) || defined(_M_AMD64) || \
64
                           defined(_M_X64))
65
/*
66
 * Well, formally there're couple of other architectures, which permit
67
 * unaligned loads, specifically those not crossing cache lines, IA-64 and
68
 * PowerPC...
69
 */
70
# undef STRICT_ALIGNMENT
71
#endif
72
73
#ifndef STRICT_ALIGNMENT
74
# ifdef __GNUC__
75
typedef u64 u64_a1 __attribute((__aligned__(1)));
76
# else
77
typedef u64 u64_a1;
78
# endif
79
#endif
80
81
#if defined(__GNUC__) && !defined(STRICT_ALIGNMENT)
82
typedef u64 u64_aX __attribute((__aligned__(1)));
83
#else
84
typedef u64 u64_aX;
85
#endif
86
87
#undef SMALL_REGISTER_BANK
88
#if defined(__i386) || defined(__i386__) || defined(_M_IX86)
89
# define SMALL_REGISTER_BANK
90
# if defined(WHIRLPOOL_ASM)
91
#  ifndef OPENSSL_SMALL_FOOTPRINT
92
/*
93
 * it appears that for elder non-MMX
94
 * CPUs this is actually faster!
95
 */
96
#   define OPENSSL_SMALL_FOOTPRINT
97
#  endif
98
#  define GO_FOR_MMX(ctx,inp,num)     do {                    \
99
        void whirlpool_block_mmx(void *,const void *,size_t);   \
100
        if (!(OPENSSL_ia32cap_P[0] & (1<<23)))  break;          \
101
        whirlpool_block_mmx(ctx->H.c,inp,num);  return;         \
102
                                        } while (0)
103
# endif
104
#endif
105
106
#undef ROTATE
107
#ifndef PEDANTIC
108
# if defined(_MSC_VER)
109
#  if defined(_WIN64)            /* applies to both IA-64 and AMD64 */
110
#   include <stdlib.h>
111
#   pragma intrinsic(_rotl64)
112
#   define ROTATE(a,n) _rotl64((a),n)
113
#  endif
114
# elif defined(__GNUC__) && __GNUC__>=2
115
#  if defined(__x86_64) || defined(__x86_64__)
116
#   if defined(L_ENDIAN)
117
#    define ROTATE(a,n)       ({ u64 ret; asm ("rolq %1,%0"   \
118
                                   : "=r"(ret) : "J"(n),"0"(a) : "cc"); ret; })
119
#   elif defined(B_ENDIAN)
120
       /*
121
        * Most will argue that x86_64 is always little-endian. Well, yes, but
122
        * then we have stratus.com who has modified gcc to "emulate"
123
        * big-endian on x86. Is there evidence that they [or somebody else]
124
        * won't do same for x86_64? Naturally no. And this line is waiting
125
        * ready for that brave soul:-)
126
        */
127
#    define ROTATE(a,n)       ({ u64 ret; asm ("rorq %1,%0"   \
128
                                   : "=r"(ret) : "J"(n),"0"(a) : "cc"); ret; })
129
#   endif
130
#  elif defined(__ia64) || defined(__ia64__)
131
#   if defined(L_ENDIAN)
132
#    define ROTATE(a,n)       ({ u64 ret; asm ("shrp %0=%1,%1,%2"     \
133
                                   : "=r"(ret) : "r"(a),"M"(64-(n))); ret; })
134
#   elif defined(B_ENDIAN)
135
#    define ROTATE(a,n)       ({ u64 ret; asm ("shrp %0=%1,%1,%2"     \
136
                                   : "=r"(ret) : "r"(a),"M"(n)); ret; })
137
#   endif
138
#  endif
139
# endif
140
#endif
141
142
#if defined(OPENSSL_SMALL_FOOTPRINT)
143
# if !defined(ROTATE)
144
#  if defined(L_ENDIAN)         /* little-endians have to rotate left */
145
#   define ROTATE(i,n)       ((i)<<(n) ^ (i)>>(64-n))
146
#  elif defined(B_ENDIAN)       /* big-endians have to rotate right */
147
#   define ROTATE(i,n)       ((i)>>(n) ^ (i)<<(64-n))
148
#  endif
149
# endif
150
# if defined(ROTATE) && !defined(STRICT_ALIGNMENT)
151
#  define STRICT_ALIGNMENT      /* ensure smallest table size */
152
# endif
153
#endif
154
155
/*
156
 * Table size depends on STRICT_ALIGNMENT and whether or not endian-
157
 * specific ROTATE macro is defined. If STRICT_ALIGNMENT is not
158
 * defined, which is normally the case on x86[_64] CPUs, the table is
159
 * 4KB large unconditionally. Otherwise if ROTATE is defined, the
160
 * table is 2KB large, and otherwise - 16KB. 2KB table requires a
161
 * whole bunch of additional rotations, but I'm willing to "trade,"
162
 * because 16KB table certainly trashes L1 cache. I wish all CPUs
163
 * could handle unaligned load as 4KB table doesn't trash the cache,
164
 * nor does it require additional rotations.
165
 */
166
/*
167
 * Note that every Cn macro expands as two loads: one byte load and
168
 * one quadword load. One can argue that many single-byte loads
169
 * is too excessive, as one could load a quadword and "milk" it for
170
 * eight 8-bit values instead. Well, yes, but in order to do so *and*
171
 * avoid excessive loads you have to accommodate a handful of 64-bit
172
 * values in the register bank and issue a bunch of shifts and mask.
173
 * It's a tradeoff: loads vs. shift and mask in big register bank[!].
174
 * On most CPUs eight single-byte loads are faster and I let other
175
 * ones to depend on smart compiler to fold byte loads if beneficial.
176
 * Hand-coded assembler would be another alternative:-)
177
 */
178
#ifdef STRICT_ALIGNMENT
179
# if defined(ROTATE)
180
#  define N   1
181
#  define LL(c0,c1,c2,c3,c4,c5,c6,c7) c0,c1,c2,c3,c4,c5,c6,c7
182
#  define C0(K,i)     (Cx.q[K.c[(i)*8+0]])
183
#  define C1(K,i)     ROTATE(Cx.q[K.c[(i)*8+1]],8)
184
#  define C2(K,i)     ROTATE(Cx.q[K.c[(i)*8+2]],16)
185
#  define C3(K,i)     ROTATE(Cx.q[K.c[(i)*8+3]],24)
186
#  define C4(K,i)     ROTATE(Cx.q[K.c[(i)*8+4]],32)
187
#  define C5(K,i)     ROTATE(Cx.q[K.c[(i)*8+5]],40)
188
#  define C6(K,i)     ROTATE(Cx.q[K.c[(i)*8+6]],48)
189
#  define C7(K,i)     ROTATE(Cx.q[K.c[(i)*8+7]],56)
190
# else
191
#  define N   8
192
#  define LL(c0,c1,c2,c3,c4,c5,c6,c7) c0,c1,c2,c3,c4,c5,c6,c7, \
193
                                        c7,c0,c1,c2,c3,c4,c5,c6, \
194
                                        c6,c7,c0,c1,c2,c3,c4,c5, \
195
                                        c5,c6,c7,c0,c1,c2,c3,c4, \
196
                                        c4,c5,c6,c7,c0,c1,c2,c3, \
197
                                        c3,c4,c5,c6,c7,c0,c1,c2, \
198
                                        c2,c3,c4,c5,c6,c7,c0,c1, \
199
                                        c1,c2,c3,c4,c5,c6,c7,c0
200
#  define C0(K,i)     (Cx.q[0+8*K.c[(i)*8+0]])
201
#  define C1(K,i)     (Cx.q[1+8*K.c[(i)*8+1]])
202
#  define C2(K,i)     (Cx.q[2+8*K.c[(i)*8+2]])
203
#  define C3(K,i)     (Cx.q[3+8*K.c[(i)*8+3]])
204
#  define C4(K,i)     (Cx.q[4+8*K.c[(i)*8+4]])
205
#  define C5(K,i)     (Cx.q[5+8*K.c[(i)*8+5]])
206
#  define C6(K,i)     (Cx.q[6+8*K.c[(i)*8+6]])
207
#  define C7(K,i)     (Cx.q[7+8*K.c[(i)*8+7]])
208
# endif
209
#else
210
0
# define N     2
211
# define LL(c0,c1,c2,c3,c4,c5,c6,c7)   c0,c1,c2,c3,c4,c5,c6,c7, \
212
                                        c0,c1,c2,c3,c4,c5,c6,c7
213
0
# define C0(K,i)       (((u64*)(Cx.c+0))[2*K.c[(i)*8+0]])
214
0
# define C1(K,i)       (((u64_a1*)(Cx.c+7))[2*K.c[(i)*8+1]])
215
0
# define C2(K,i)       (((u64_a1*)(Cx.c+6))[2*K.c[(i)*8+2]])
216
0
# define C3(K,i)       (((u64_a1*)(Cx.c+5))[2*K.c[(i)*8+3]])
217
0
# define C4(K,i)       (((u64_a1*)(Cx.c+4))[2*K.c[(i)*8+4]])
218
0
# define C5(K,i)       (((u64_a1*)(Cx.c+3))[2*K.c[(i)*8+5]])
219
0
# define C6(K,i)       (((u64_a1*)(Cx.c+2))[2*K.c[(i)*8+6]])
220
0
# define C7(K,i)       (((u64_a1*)(Cx.c+1))[2*K.c[(i)*8+7]])
221
#endif
222
223
static const
224
    union {
225
    u8 c[(256 * N + ROUNDS) * sizeof(u64)];
226
    u64 q[(256 * N + ROUNDS)];
227
} Cx = {
228
        {
229
            /* Note endian-neutral representation:-) */
230
            LL(0x18, 0x18, 0x60, 0x18, 0xc0, 0x78, 0x30, 0xd8),
231
            LL(0x23, 0x23, 0x8c, 0x23, 0x05, 0xaf, 0x46, 0x26),
232
            LL(0xc6, 0xc6, 0x3f, 0xc6, 0x7e, 0xf9, 0x91, 0xb8),
233
            LL(0xe8, 0xe8, 0x87, 0xe8, 0x13, 0x6f, 0xcd, 0xfb),
234
            LL(0x87, 0x87, 0x26, 0x87, 0x4c, 0xa1, 0x13, 0xcb),
235
            LL(0xb8, 0xb8, 0xda, 0xb8, 0xa9, 0x62, 0x6d, 0x11),
236
            LL(0x01, 0x01, 0x04, 0x01, 0x08, 0x05, 0x02, 0x09),
237
            LL(0x4f, 0x4f, 0x21, 0x4f, 0x42, 0x6e, 0x9e, 0x0d),
238
            LL(0x36, 0x36, 0xd8, 0x36, 0xad, 0xee, 0x6c, 0x9b),
239
            LL(0xa6, 0xa6, 0xa2, 0xa6, 0x59, 0x04, 0x51, 0xff),
240
            LL(0xd2, 0xd2, 0x6f, 0xd2, 0xde, 0xbd, 0xb9, 0x0c),
241
            LL(0xf5, 0xf5, 0xf3, 0xf5, 0xfb, 0x06, 0xf7, 0x0e),
242
            LL(0x79, 0x79, 0xf9, 0x79, 0xef, 0x80, 0xf2, 0x96),
243
            LL(0x6f, 0x6f, 0xa1, 0x6f, 0x5f, 0xce, 0xde, 0x30),
244
            LL(0x91, 0x91, 0x7e, 0x91, 0xfc, 0xef, 0x3f, 0x6d),
245
            LL(0x52, 0x52, 0x55, 0x52, 0xaa, 0x07, 0xa4, 0xf8),
246
            LL(0x60, 0x60, 0x9d, 0x60, 0x27, 0xfd, 0xc0, 0x47),
247
            LL(0xbc, 0xbc, 0xca, 0xbc, 0x89, 0x76, 0x65, 0x35),
248
            LL(0x9b, 0x9b, 0x56, 0x9b, 0xac, 0xcd, 0x2b, 0x37),
249
            LL(0x8e, 0x8e, 0x02, 0x8e, 0x04, 0x8c, 0x01, 0x8a),
250
            LL(0xa3, 0xa3, 0xb6, 0xa3, 0x71, 0x15, 0x5b, 0xd2),
251
            LL(0x0c, 0x0c, 0x30, 0x0c, 0x60, 0x3c, 0x18, 0x6c),
252
            LL(0x7b, 0x7b, 0xf1, 0x7b, 0xff, 0x8a, 0xf6, 0x84),
253
            LL(0x35, 0x35, 0xd4, 0x35, 0xb5, 0xe1, 0x6a, 0x80),
254
            LL(0x1d, 0x1d, 0x74, 0x1d, 0xe8, 0x69, 0x3a, 0xf5),
255
            LL(0xe0, 0xe0, 0xa7, 0xe0, 0x53, 0x47, 0xdd, 0xb3),
256
            LL(0xd7, 0xd7, 0x7b, 0xd7, 0xf6, 0xac, 0xb3, 0x21),
257
            LL(0xc2, 0xc2, 0x2f, 0xc2, 0x5e, 0xed, 0x99, 0x9c),
258
            LL(0x2e, 0x2e, 0xb8, 0x2e, 0x6d, 0x96, 0x5c, 0x43),
259
            LL(0x4b, 0x4b, 0x31, 0x4b, 0x62, 0x7a, 0x96, 0x29),
260
            LL(0xfe, 0xfe, 0xdf, 0xfe, 0xa3, 0x21, 0xe1, 0x5d),
261
            LL(0x57, 0x57, 0x41, 0x57, 0x82, 0x16, 0xae, 0xd5),
262
            LL(0x15, 0x15, 0x54, 0x15, 0xa8, 0x41, 0x2a, 0xbd),
263
            LL(0x77, 0x77, 0xc1, 0x77, 0x9f, 0xb6, 0xee, 0xe8),
264
            LL(0x37, 0x37, 0xdc, 0x37, 0xa5, 0xeb, 0x6e, 0x92),
265
            LL(0xe5, 0xe5, 0xb3, 0xe5, 0x7b, 0x56, 0xd7, 0x9e),
266
            LL(0x9f, 0x9f, 0x46, 0x9f, 0x8c, 0xd9, 0x23, 0x13),
267
            LL(0xf0, 0xf0, 0xe7, 0xf0, 0xd3, 0x17, 0xfd, 0x23),
268
            LL(0x4a, 0x4a, 0x35, 0x4a, 0x6a, 0x7f, 0x94, 0x20),
269
            LL(0xda, 0xda, 0x4f, 0xda, 0x9e, 0x95, 0xa9, 0x44),
270
            LL(0x58, 0x58, 0x7d, 0x58, 0xfa, 0x25, 0xb0, 0xa2),
271
            LL(0xc9, 0xc9, 0x03, 0xc9, 0x06, 0xca, 0x8f, 0xcf),
272
            LL(0x29, 0x29, 0xa4, 0x29, 0x55, 0x8d, 0x52, 0x7c),
273
            LL(0x0a, 0x0a, 0x28, 0x0a, 0x50, 0x22, 0x14, 0x5a),
274
            LL(0xb1, 0xb1, 0xfe, 0xb1, 0xe1, 0x4f, 0x7f, 0x50),
275
            LL(0xa0, 0xa0, 0xba, 0xa0, 0x69, 0x1a, 0x5d, 0xc9),
276
            LL(0x6b, 0x6b, 0xb1, 0x6b, 0x7f, 0xda, 0xd6, 0x14),
277
            LL(0x85, 0x85, 0x2e, 0x85, 0x5c, 0xab, 0x17, 0xd9),
278
            LL(0xbd, 0xbd, 0xce, 0xbd, 0x81, 0x73, 0x67, 0x3c),
279
            LL(0x5d, 0x5d, 0x69, 0x5d, 0xd2, 0x34, 0xba, 0x8f),
280
            LL(0x10, 0x10, 0x40, 0x10, 0x80, 0x50, 0x20, 0x90),
281
            LL(0xf4, 0xf4, 0xf7, 0xf4, 0xf3, 0x03, 0xf5, 0x07),
282
            LL(0xcb, 0xcb, 0x0b, 0xcb, 0x16, 0xc0, 0x8b, 0xdd),
283
            LL(0x3e, 0x3e, 0xf8, 0x3e, 0xed, 0xc6, 0x7c, 0xd3),
284
            LL(0x05, 0x05, 0x14, 0x05, 0x28, 0x11, 0x0a, 0x2d),
285
            LL(0x67, 0x67, 0x81, 0x67, 0x1f, 0xe6, 0xce, 0x78),
286
            LL(0xe4, 0xe4, 0xb7, 0xe4, 0x73, 0x53, 0xd5, 0x97),
287
            LL(0x27, 0x27, 0x9c, 0x27, 0x25, 0xbb, 0x4e, 0x02),
288
            LL(0x41, 0x41, 0x19, 0x41, 0x32, 0x58, 0x82, 0x73),
289
            LL(0x8b, 0x8b, 0x16, 0x8b, 0x2c, 0x9d, 0x0b, 0xa7),
290
            LL(0xa7, 0xa7, 0xa6, 0xa7, 0x51, 0x01, 0x53, 0xf6),
291
            LL(0x7d, 0x7d, 0xe9, 0x7d, 0xcf, 0x94, 0xfa, 0xb2),
292
            LL(0x95, 0x95, 0x6e, 0x95, 0xdc, 0xfb, 0x37, 0x49),
293
            LL(0xd8, 0xd8, 0x47, 0xd8, 0x8e, 0x9f, 0xad, 0x56),
294
            LL(0xfb, 0xfb, 0xcb, 0xfb, 0x8b, 0x30, 0xeb, 0x70),
295
            LL(0xee, 0xee, 0x9f, 0xee, 0x23, 0x71, 0xc1, 0xcd),
296
            LL(0x7c, 0x7c, 0xed, 0x7c, 0xc7, 0x91, 0xf8, 0xbb),
297
            LL(0x66, 0x66, 0x85, 0x66, 0x17, 0xe3, 0xcc, 0x71),
298
            LL(0xdd, 0xdd, 0x53, 0xdd, 0xa6, 0x8e, 0xa7, 0x7b),
299
            LL(0x17, 0x17, 0x5c, 0x17, 0xb8, 0x4b, 0x2e, 0xaf),
300
            LL(0x47, 0x47, 0x01, 0x47, 0x02, 0x46, 0x8e, 0x45),
301
            LL(0x9e, 0x9e, 0x42, 0x9e, 0x84, 0xdc, 0x21, 0x1a),
302
            LL(0xca, 0xca, 0x0f, 0xca, 0x1e, 0xc5, 0x89, 0xd4),
303
            LL(0x2d, 0x2d, 0xb4, 0x2d, 0x75, 0x99, 0x5a, 0x58),
304
            LL(0xbf, 0xbf, 0xc6, 0xbf, 0x91, 0x79, 0x63, 0x2e),
305
            LL(0x07, 0x07, 0x1c, 0x07, 0x38, 0x1b, 0x0e, 0x3f),
306
            LL(0xad, 0xad, 0x8e, 0xad, 0x01, 0x23, 0x47, 0xac),
307
            LL(0x5a, 0x5a, 0x75, 0x5a, 0xea, 0x2f, 0xb4, 0xb0),
308
            LL(0x83, 0x83, 0x36, 0x83, 0x6c, 0xb5, 0x1b, 0xef),
309
            LL(0x33, 0x33, 0xcc, 0x33, 0x85, 0xff, 0x66, 0xb6),
310
            LL(0x63, 0x63, 0x91, 0x63, 0x3f, 0xf2, 0xc6, 0x5c),
311
            LL(0x02, 0x02, 0x08, 0x02, 0x10, 0x0a, 0x04, 0x12),
312
            LL(0xaa, 0xaa, 0x92, 0xaa, 0x39, 0x38, 0x49, 0x93),
313
            LL(0x71, 0x71, 0xd9, 0x71, 0xaf, 0xa8, 0xe2, 0xde),
314
            LL(0xc8, 0xc8, 0x07, 0xc8, 0x0e, 0xcf, 0x8d, 0xc6),
315
            LL(0x19, 0x19, 0x64, 0x19, 0xc8, 0x7d, 0x32, 0xd1),
316
            LL(0x49, 0x49, 0x39, 0x49, 0x72, 0x70, 0x92, 0x3b),
317
            LL(0xd9, 0xd9, 0x43, 0xd9, 0x86, 0x9a, 0xaf, 0x5f),
318
            LL(0xf2, 0xf2, 0xef, 0xf2, 0xc3, 0x1d, 0xf9, 0x31),
319
            LL(0xe3, 0xe3, 0xab, 0xe3, 0x4b, 0x48, 0xdb, 0xa8),
320
            LL(0x5b, 0x5b, 0x71, 0x5b, 0xe2, 0x2a, 0xb6, 0xb9),
321
            LL(0x88, 0x88, 0x1a, 0x88, 0x34, 0x92, 0x0d, 0xbc),
322
            LL(0x9a, 0x9a, 0x52, 0x9a, 0xa4, 0xc8, 0x29, 0x3e),
323
            LL(0x26, 0x26, 0x98, 0x26, 0x2d, 0xbe, 0x4c, 0x0b),
324
            LL(0x32, 0x32, 0xc8, 0x32, 0x8d, 0xfa, 0x64, 0xbf),
325
            LL(0xb0, 0xb0, 0xfa, 0xb0, 0xe9, 0x4a, 0x7d, 0x59),
326
            LL(0xe9, 0xe9, 0x83, 0xe9, 0x1b, 0x6a, 0xcf, 0xf2),
327
            LL(0x0f, 0x0f, 0x3c, 0x0f, 0x78, 0x33, 0x1e, 0x77),
328
            LL(0xd5, 0xd5, 0x73, 0xd5, 0xe6, 0xa6, 0xb7, 0x33),
329
            LL(0x80, 0x80, 0x3a, 0x80, 0x74, 0xba, 0x1d, 0xf4),
330
            LL(0xbe, 0xbe, 0xc2, 0xbe, 0x99, 0x7c, 0x61, 0x27),
331
            LL(0xcd, 0xcd, 0x13, 0xcd, 0x26, 0xde, 0x87, 0xeb),
332
            LL(0x34, 0x34, 0xd0, 0x34, 0xbd, 0xe4, 0x68, 0x89),
333
            LL(0x48, 0x48, 0x3d, 0x48, 0x7a, 0x75, 0x90, 0x32),
334
            LL(0xff, 0xff, 0xdb, 0xff, 0xab, 0x24, 0xe3, 0x54),
335
            LL(0x7a, 0x7a, 0xf5, 0x7a, 0xf7, 0x8f, 0xf4, 0x8d),
336
            LL(0x90, 0x90, 0x7a, 0x90, 0xf4, 0xea, 0x3d, 0x64),
337
            LL(0x5f, 0x5f, 0x61, 0x5f, 0xc2, 0x3e, 0xbe, 0x9d),
338
            LL(0x20, 0x20, 0x80, 0x20, 0x1d, 0xa0, 0x40, 0x3d),
339
            LL(0x68, 0x68, 0xbd, 0x68, 0x67, 0xd5, 0xd0, 0x0f),
340
            LL(0x1a, 0x1a, 0x68, 0x1a, 0xd0, 0x72, 0x34, 0xca),
341
            LL(0xae, 0xae, 0x82, 0xae, 0x19, 0x2c, 0x41, 0xb7),
342
            LL(0xb4, 0xb4, 0xea, 0xb4, 0xc9, 0x5e, 0x75, 0x7d),
343
            LL(0x54, 0x54, 0x4d, 0x54, 0x9a, 0x19, 0xa8, 0xce),
344
            LL(0x93, 0x93, 0x76, 0x93, 0xec, 0xe5, 0x3b, 0x7f),
345
            LL(0x22, 0x22, 0x88, 0x22, 0x0d, 0xaa, 0x44, 0x2f),
346
            LL(0x64, 0x64, 0x8d, 0x64, 0x07, 0xe9, 0xc8, 0x63),
347
            LL(0xf1, 0xf1, 0xe3, 0xf1, 0xdb, 0x12, 0xff, 0x2a),
348
            LL(0x73, 0x73, 0xd1, 0x73, 0xbf, 0xa2, 0xe6, 0xcc),
349
            LL(0x12, 0x12, 0x48, 0x12, 0x90, 0x5a, 0x24, 0x82),
350
            LL(0x40, 0x40, 0x1d, 0x40, 0x3a, 0x5d, 0x80, 0x7a),
351
            LL(0x08, 0x08, 0x20, 0x08, 0x40, 0x28, 0x10, 0x48),
352
            LL(0xc3, 0xc3, 0x2b, 0xc3, 0x56, 0xe8, 0x9b, 0x95),
353
            LL(0xec, 0xec, 0x97, 0xec, 0x33, 0x7b, 0xc5, 0xdf),
354
            LL(0xdb, 0xdb, 0x4b, 0xdb, 0x96, 0x90, 0xab, 0x4d),
355
            LL(0xa1, 0xa1, 0xbe, 0xa1, 0x61, 0x1f, 0x5f, 0xc0),
356
            LL(0x8d, 0x8d, 0x0e, 0x8d, 0x1c, 0x83, 0x07, 0x91),
357
            LL(0x3d, 0x3d, 0xf4, 0x3d, 0xf5, 0xc9, 0x7a, 0xc8),
358
            LL(0x97, 0x97, 0x66, 0x97, 0xcc, 0xf1, 0x33, 0x5b),
359
            LL(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00),
360
            LL(0xcf, 0xcf, 0x1b, 0xcf, 0x36, 0xd4, 0x83, 0xf9),
361
            LL(0x2b, 0x2b, 0xac, 0x2b, 0x45, 0x87, 0x56, 0x6e),
362
            LL(0x76, 0x76, 0xc5, 0x76, 0x97, 0xb3, 0xec, 0xe1),
363
            LL(0x82, 0x82, 0x32, 0x82, 0x64, 0xb0, 0x19, 0xe6),
364
            LL(0xd6, 0xd6, 0x7f, 0xd6, 0xfe, 0xa9, 0xb1, 0x28),
365
            LL(0x1b, 0x1b, 0x6c, 0x1b, 0xd8, 0x77, 0x36, 0xc3),
366
            LL(0xb5, 0xb5, 0xee, 0xb5, 0xc1, 0x5b, 0x77, 0x74),
367
            LL(0xaf, 0xaf, 0x86, 0xaf, 0x11, 0x29, 0x43, 0xbe),
368
            LL(0x6a, 0x6a, 0xb5, 0x6a, 0x77, 0xdf, 0xd4, 0x1d),
369
            LL(0x50, 0x50, 0x5d, 0x50, 0xba, 0x0d, 0xa0, 0xea),
370
            LL(0x45, 0x45, 0x09, 0x45, 0x12, 0x4c, 0x8a, 0x57),
371
            LL(0xf3, 0xf3, 0xeb, 0xf3, 0xcb, 0x18, 0xfb, 0x38),
372
            LL(0x30, 0x30, 0xc0, 0x30, 0x9d, 0xf0, 0x60, 0xad),
373
            LL(0xef, 0xef, 0x9b, 0xef, 0x2b, 0x74, 0xc3, 0xc4),
374
            LL(0x3f, 0x3f, 0xfc, 0x3f, 0xe5, 0xc3, 0x7e, 0xda),
375
            LL(0x55, 0x55, 0x49, 0x55, 0x92, 0x1c, 0xaa, 0xc7),
376
            LL(0xa2, 0xa2, 0xb2, 0xa2, 0x79, 0x10, 0x59, 0xdb),
377
            LL(0xea, 0xea, 0x8f, 0xea, 0x03, 0x65, 0xc9, 0xe9),
378
            LL(0x65, 0x65, 0x89, 0x65, 0x0f, 0xec, 0xca, 0x6a),
379
            LL(0xba, 0xba, 0xd2, 0xba, 0xb9, 0x68, 0x69, 0x03),
380
            LL(0x2f, 0x2f, 0xbc, 0x2f, 0x65, 0x93, 0x5e, 0x4a),
381
            LL(0xc0, 0xc0, 0x27, 0xc0, 0x4e, 0xe7, 0x9d, 0x8e),
382
            LL(0xde, 0xde, 0x5f, 0xde, 0xbe, 0x81, 0xa1, 0x60),
383
            LL(0x1c, 0x1c, 0x70, 0x1c, 0xe0, 0x6c, 0x38, 0xfc),
384
            LL(0xfd, 0xfd, 0xd3, 0xfd, 0xbb, 0x2e, 0xe7, 0x46),
385
            LL(0x4d, 0x4d, 0x29, 0x4d, 0x52, 0x64, 0x9a, 0x1f),
386
            LL(0x92, 0x92, 0x72, 0x92, 0xe4, 0xe0, 0x39, 0x76),
387
            LL(0x75, 0x75, 0xc9, 0x75, 0x8f, 0xbc, 0xea, 0xfa),
388
            LL(0x06, 0x06, 0x18, 0x06, 0x30, 0x1e, 0x0c, 0x36),
389
            LL(0x8a, 0x8a, 0x12, 0x8a, 0x24, 0x98, 0x09, 0xae),
390
            LL(0xb2, 0xb2, 0xf2, 0xb2, 0xf9, 0x40, 0x79, 0x4b),
391
            LL(0xe6, 0xe6, 0xbf, 0xe6, 0x63, 0x59, 0xd1, 0x85),
392
            LL(0x0e, 0x0e, 0x38, 0x0e, 0x70, 0x36, 0x1c, 0x7e),
393
            LL(0x1f, 0x1f, 0x7c, 0x1f, 0xf8, 0x63, 0x3e, 0xe7),
394
            LL(0x62, 0x62, 0x95, 0x62, 0x37, 0xf7, 0xc4, 0x55),
395
            LL(0xd4, 0xd4, 0x77, 0xd4, 0xee, 0xa3, 0xb5, 0x3a),
396
            LL(0xa8, 0xa8, 0x9a, 0xa8, 0x29, 0x32, 0x4d, 0x81),
397
            LL(0x96, 0x96, 0x62, 0x96, 0xc4, 0xf4, 0x31, 0x52),
398
            LL(0xf9, 0xf9, 0xc3, 0xf9, 0x9b, 0x3a, 0xef, 0x62),
399
            LL(0xc5, 0xc5, 0x33, 0xc5, 0x66, 0xf6, 0x97, 0xa3),
400
            LL(0x25, 0x25, 0x94, 0x25, 0x35, 0xb1, 0x4a, 0x10),
401
            LL(0x59, 0x59, 0x79, 0x59, 0xf2, 0x20, 0xb2, 0xab),
402
            LL(0x84, 0x84, 0x2a, 0x84, 0x54, 0xae, 0x15, 0xd0),
403
            LL(0x72, 0x72, 0xd5, 0x72, 0xb7, 0xa7, 0xe4, 0xc5),
404
            LL(0x39, 0x39, 0xe4, 0x39, 0xd5, 0xdd, 0x72, 0xec),
405
            LL(0x4c, 0x4c, 0x2d, 0x4c, 0x5a, 0x61, 0x98, 0x16),
406
            LL(0x5e, 0x5e, 0x65, 0x5e, 0xca, 0x3b, 0xbc, 0x94),
407
            LL(0x78, 0x78, 0xfd, 0x78, 0xe7, 0x85, 0xf0, 0x9f),
408
            LL(0x38, 0x38, 0xe0, 0x38, 0xdd, 0xd8, 0x70, 0xe5),
409
            LL(0x8c, 0x8c, 0x0a, 0x8c, 0x14, 0x86, 0x05, 0x98),
410
            LL(0xd1, 0xd1, 0x63, 0xd1, 0xc6, 0xb2, 0xbf, 0x17),
411
            LL(0xa5, 0xa5, 0xae, 0xa5, 0x41, 0x0b, 0x57, 0xe4),
412
            LL(0xe2, 0xe2, 0xaf, 0xe2, 0x43, 0x4d, 0xd9, 0xa1),
413
            LL(0x61, 0x61, 0x99, 0x61, 0x2f, 0xf8, 0xc2, 0x4e),
414
            LL(0xb3, 0xb3, 0xf6, 0xb3, 0xf1, 0x45, 0x7b, 0x42),
415
            LL(0x21, 0x21, 0x84, 0x21, 0x15, 0xa5, 0x42, 0x34),
416
            LL(0x9c, 0x9c, 0x4a, 0x9c, 0x94, 0xd6, 0x25, 0x08),
417
            LL(0x1e, 0x1e, 0x78, 0x1e, 0xf0, 0x66, 0x3c, 0xee),
418
            LL(0x43, 0x43, 0x11, 0x43, 0x22, 0x52, 0x86, 0x61),
419
            LL(0xc7, 0xc7, 0x3b, 0xc7, 0x76, 0xfc, 0x93, 0xb1),
420
            LL(0xfc, 0xfc, 0xd7, 0xfc, 0xb3, 0x2b, 0xe5, 0x4f),
421
            LL(0x04, 0x04, 0x10, 0x04, 0x20, 0x14, 0x08, 0x24),
422
            LL(0x51, 0x51, 0x59, 0x51, 0xb2, 0x08, 0xa2, 0xe3),
423
            LL(0x99, 0x99, 0x5e, 0x99, 0xbc, 0xc7, 0x2f, 0x25),
424
            LL(0x6d, 0x6d, 0xa9, 0x6d, 0x4f, 0xc4, 0xda, 0x22),
425
            LL(0x0d, 0x0d, 0x34, 0x0d, 0x68, 0x39, 0x1a, 0x65),
426
            LL(0xfa, 0xfa, 0xcf, 0xfa, 0x83, 0x35, 0xe9, 0x79),
427
            LL(0xdf, 0xdf, 0x5b, 0xdf, 0xb6, 0x84, 0xa3, 0x69),
428
            LL(0x7e, 0x7e, 0xe5, 0x7e, 0xd7, 0x9b, 0xfc, 0xa9),
429
            LL(0x24, 0x24, 0x90, 0x24, 0x3d, 0xb4, 0x48, 0x19),
430
            LL(0x3b, 0x3b, 0xec, 0x3b, 0xc5, 0xd7, 0x76, 0xfe),
431
            LL(0xab, 0xab, 0x96, 0xab, 0x31, 0x3d, 0x4b, 0x9a),
432
            LL(0xce, 0xce, 0x1f, 0xce, 0x3e, 0xd1, 0x81, 0xf0),
433
            LL(0x11, 0x11, 0x44, 0x11, 0x88, 0x55, 0x22, 0x99),
434
            LL(0x8f, 0x8f, 0x06, 0x8f, 0x0c, 0x89, 0x03, 0x83),
435
            LL(0x4e, 0x4e, 0x25, 0x4e, 0x4a, 0x6b, 0x9c, 0x04),
436
            LL(0xb7, 0xb7, 0xe6, 0xb7, 0xd1, 0x51, 0x73, 0x66),
437
            LL(0xeb, 0xeb, 0x8b, 0xeb, 0x0b, 0x60, 0xcb, 0xe0),
438
            LL(0x3c, 0x3c, 0xf0, 0x3c, 0xfd, 0xcc, 0x78, 0xc1),
439
            LL(0x81, 0x81, 0x3e, 0x81, 0x7c, 0xbf, 0x1f, 0xfd),
440
            LL(0x94, 0x94, 0x6a, 0x94, 0xd4, 0xfe, 0x35, 0x40),
441
            LL(0xf7, 0xf7, 0xfb, 0xf7, 0xeb, 0x0c, 0xf3, 0x1c),
442
            LL(0xb9, 0xb9, 0xde, 0xb9, 0xa1, 0x67, 0x6f, 0x18),
443
            LL(0x13, 0x13, 0x4c, 0x13, 0x98, 0x5f, 0x26, 0x8b),
444
            LL(0x2c, 0x2c, 0xb0, 0x2c, 0x7d, 0x9c, 0x58, 0x51),
445
            LL(0xd3, 0xd3, 0x6b, 0xd3, 0xd6, 0xb8, 0xbb, 0x05),
446
            LL(0xe7, 0xe7, 0xbb, 0xe7, 0x6b, 0x5c, 0xd3, 0x8c),
447
            LL(0x6e, 0x6e, 0xa5, 0x6e, 0x57, 0xcb, 0xdc, 0x39),
448
            LL(0xc4, 0xc4, 0x37, 0xc4, 0x6e, 0xf3, 0x95, 0xaa),
449
            LL(0x03, 0x03, 0x0c, 0x03, 0x18, 0x0f, 0x06, 0x1b),
450
            LL(0x56, 0x56, 0x45, 0x56, 0x8a, 0x13, 0xac, 0xdc),
451
            LL(0x44, 0x44, 0x0d, 0x44, 0x1a, 0x49, 0x88, 0x5e),
452
            LL(0x7f, 0x7f, 0xe1, 0x7f, 0xdf, 0x9e, 0xfe, 0xa0),
453
            LL(0xa9, 0xa9, 0x9e, 0xa9, 0x21, 0x37, 0x4f, 0x88),
454
            LL(0x2a, 0x2a, 0xa8, 0x2a, 0x4d, 0x82, 0x54, 0x67),
455
            LL(0xbb, 0xbb, 0xd6, 0xbb, 0xb1, 0x6d, 0x6b, 0x0a),
456
            LL(0xc1, 0xc1, 0x23, 0xc1, 0x46, 0xe2, 0x9f, 0x87),
457
            LL(0x53, 0x53, 0x51, 0x53, 0xa2, 0x02, 0xa6, 0xf1),
458
            LL(0xdc, 0xdc, 0x57, 0xdc, 0xae, 0x8b, 0xa5, 0x72),
459
            LL(0x0b, 0x0b, 0x2c, 0x0b, 0x58, 0x27, 0x16, 0x53),
460
            LL(0x9d, 0x9d, 0x4e, 0x9d, 0x9c, 0xd3, 0x27, 0x01),
461
            LL(0x6c, 0x6c, 0xad, 0x6c, 0x47, 0xc1, 0xd8, 0x2b),
462
            LL(0x31, 0x31, 0xc4, 0x31, 0x95, 0xf5, 0x62, 0xa4),
463
            LL(0x74, 0x74, 0xcd, 0x74, 0x87, 0xb9, 0xe8, 0xf3),
464
            LL(0xf6, 0xf6, 0xff, 0xf6, 0xe3, 0x09, 0xf1, 0x15),
465
            LL(0x46, 0x46, 0x05, 0x46, 0x0a, 0x43, 0x8c, 0x4c),
466
            LL(0xac, 0xac, 0x8a, 0xac, 0x09, 0x26, 0x45, 0xa5),
467
            LL(0x89, 0x89, 0x1e, 0x89, 0x3c, 0x97, 0x0f, 0xb5),
468
            LL(0x14, 0x14, 0x50, 0x14, 0xa0, 0x44, 0x28, 0xb4),
469
            LL(0xe1, 0xe1, 0xa3, 0xe1, 0x5b, 0x42, 0xdf, 0xba),
470
            LL(0x16, 0x16, 0x58, 0x16, 0xb0, 0x4e, 0x2c, 0xa6),
471
            LL(0x3a, 0x3a, 0xe8, 0x3a, 0xcd, 0xd2, 0x74, 0xf7),
472
            LL(0x69, 0x69, 0xb9, 0x69, 0x6f, 0xd0, 0xd2, 0x06),
473
            LL(0x09, 0x09, 0x24, 0x09, 0x48, 0x2d, 0x12, 0x41),
474
            LL(0x70, 0x70, 0xdd, 0x70, 0xa7, 0xad, 0xe0, 0xd7),
475
            LL(0xb6, 0xb6, 0xe2, 0xb6, 0xd9, 0x54, 0x71, 0x6f),
476
            LL(0xd0, 0xd0, 0x67, 0xd0, 0xce, 0xb7, 0xbd, 0x1e),
477
            LL(0xed, 0xed, 0x93, 0xed, 0x3b, 0x7e, 0xc7, 0xd6),
478
            LL(0xcc, 0xcc, 0x17, 0xcc, 0x2e, 0xdb, 0x85, 0xe2),
479
            LL(0x42, 0x42, 0x15, 0x42, 0x2a, 0x57, 0x84, 0x68),
480
            LL(0x98, 0x98, 0x5a, 0x98, 0xb4, 0xc2, 0x2d, 0x2c),
481
            LL(0xa4, 0xa4, 0xaa, 0xa4, 0x49, 0x0e, 0x55, 0xed),
482
            LL(0x28, 0x28, 0xa0, 0x28, 0x5d, 0x88, 0x50, 0x75),
483
            LL(0x5c, 0x5c, 0x6d, 0x5c, 0xda, 0x31, 0xb8, 0x86),
484
            LL(0xf8, 0xf8, 0xc7, 0xf8, 0x93, 0x3f, 0xed, 0x6b),
485
            LL(0x86, 0x86, 0x22, 0x86, 0x44, 0xa4, 0x11, 0xc2),
486
0
#define RC      (&(Cx.q[256*N]))
487
            0x18, 0x23, 0xc6, 0xe8, 0x87, 0xb8, 0x01, 0x4f,
488
            /* rc[ROUNDS] */
489
            0x36, 0xa6, 0xd2, 0xf5, 0x79, 0x6f, 0x91, 0x52, 0x60, 0xbc, 0x9b,
490
            0x8e, 0xa3, 0x0c, 0x7b, 0x35, 0x1d, 0xe0, 0xd7, 0xc2, 0x2e, 0x4b,
491
            0xfe, 0x57, 0x15, 0x77, 0x37, 0xe5, 0x9f, 0xf0, 0x4a, 0xda, 0x58,
492
            0xc9, 0x29, 0x0a, 0xb1, 0xa0, 0x6b, 0x85, 0xbd, 0x5d, 0x10, 0xf4,
493
            0xcb, 0x3e, 0x05, 0x67, 0xe4, 0x27, 0x41, 0x8b, 0xa7, 0x7d, 0x95,
494
            0xd8, 0xfb, 0xee, 0x7c, 0x66, 0xdd, 0x17, 0x47, 0x9e, 0xca, 0x2d,
495
            0xbf, 0x07, 0xad, 0x5a, 0x83, 0x33
496
        }
497
    };
498
499
void whirlpool_block(WHIRLPOOL_CTX *ctx, const void *inp, size_t n)
500
0
{
501
0
    int r;
502
0
    const u8 *p = inp;
503
0
    union {
504
0
        u64 q[8];
505
0
        u8 c[64];
506
0
    } S, K, *H = (void *)ctx->H.q;
507
508
#ifdef GO_FOR_MMX
509
    GO_FOR_MMX(ctx, inp, n);
510
#endif
511
0
    do {
512
#ifdef OPENSSL_SMALL_FOOTPRINT
513
        u64 L[8];
514
        int i;
515
516
        for (i = 0; i < 64; i++)
517
            S.c[i] = (K.c[i] = H->c[i]) ^ p[i];
518
        for (r = 0; r < ROUNDS; r++) {
519
            for (i = 0; i < 8; i++) {
520
                L[i] = i ? 0 : RC[r];
521
                L[i] ^= C0(K, i) ^ C1(K, (i - 1) & 7) ^
522
                    C2(K, (i - 2) & 7) ^ C3(K, (i - 3) & 7) ^
523
                    C4(K, (i - 4) & 7) ^ C5(K, (i - 5) & 7) ^
524
                    C6(K, (i - 6) & 7) ^ C7(K, (i - 7) & 7);
525
            }
526
            memcpy(K.q, L, 64);
527
            for (i = 0; i < 8; i++) {
528
                L[i] ^= C0(S, i) ^ C1(S, (i - 1) & 7) ^
529
                    C2(S, (i - 2) & 7) ^ C3(S, (i - 3) & 7) ^
530
                    C4(S, (i - 4) & 7) ^ C5(S, (i - 5) & 7) ^
531
                    C6(S, (i - 6) & 7) ^ C7(S, (i - 7) & 7);
532
            }
533
            memcpy(S.q, L, 64);
534
        }
535
        for (i = 0; i < 64; i++)
536
            H->c[i] ^= S.c[i] ^ p[i];
537
#else
538
0
        u64 L0, L1, L2, L3, L4, L5, L6, L7;
539
540
# ifdef STRICT_ALIGNMENT
541
        if ((size_t)p & 7) {
542
            memcpy(S.c, p, 64);
543
            S.q[0] ^= (K.q[0] = H->q[0]);
544
            S.q[1] ^= (K.q[1] = H->q[1]);
545
            S.q[2] ^= (K.q[2] = H->q[2]);
546
            S.q[3] ^= (K.q[3] = H->q[3]);
547
            S.q[4] ^= (K.q[4] = H->q[4]);
548
            S.q[5] ^= (K.q[5] = H->q[5]);
549
            S.q[6] ^= (K.q[6] = H->q[6]);
550
            S.q[7] ^= (K.q[7] = H->q[7]);
551
        } else
552
# endif
553
0
        {
554
0
            const u64_aX *pa = (const u64_aX *)p;
555
0
            S.q[0] = (K.q[0] = H->q[0]) ^ pa[0];
556
0
            S.q[1] = (K.q[1] = H->q[1]) ^ pa[1];
557
0
            S.q[2] = (K.q[2] = H->q[2]) ^ pa[2];
558
0
            S.q[3] = (K.q[3] = H->q[3]) ^ pa[3];
559
0
            S.q[4] = (K.q[4] = H->q[4]) ^ pa[4];
560
0
            S.q[5] = (K.q[5] = H->q[5]) ^ pa[5];
561
0
            S.q[6] = (K.q[6] = H->q[6]) ^ pa[6];
562
0
            S.q[7] = (K.q[7] = H->q[7]) ^ pa[7];
563
0
        }
564
565
0
        for (r = 0; r < ROUNDS; r++) {
566
# ifdef SMALL_REGISTER_BANK
567
            L0 = C0(K, 0) ^ C1(K, 7) ^ C2(K, 6) ^ C3(K, 5) ^
568
                C4(K, 4) ^ C5(K, 3) ^ C6(K, 2) ^ C7(K, 1) ^ RC[r];
569
            L1 = C0(K, 1) ^ C1(K, 0) ^ C2(K, 7) ^ C3(K, 6) ^
570
                C4(K, 5) ^ C5(K, 4) ^ C6(K, 3) ^ C7(K, 2);
571
            L2 = C0(K, 2) ^ C1(K, 1) ^ C2(K, 0) ^ C3(K, 7) ^
572
                C4(K, 6) ^ C5(K, 5) ^ C6(K, 4) ^ C7(K, 3);
573
            L3 = C0(K, 3) ^ C1(K, 2) ^ C2(K, 1) ^ C3(K, 0) ^
574
                C4(K, 7) ^ C5(K, 6) ^ C6(K, 5) ^ C7(K, 4);
575
            L4 = C0(K, 4) ^ C1(K, 3) ^ C2(K, 2) ^ C3(K, 1) ^
576
                C4(K, 0) ^ C5(K, 7) ^ C6(K, 6) ^ C7(K, 5);
577
            L5 = C0(K, 5) ^ C1(K, 4) ^ C2(K, 3) ^ C3(K, 2) ^
578
                C4(K, 1) ^ C5(K, 0) ^ C6(K, 7) ^ C7(K, 6);
579
            L6 = C0(K, 6) ^ C1(K, 5) ^ C2(K, 4) ^ C3(K, 3) ^
580
                C4(K, 2) ^ C5(K, 1) ^ C6(K, 0) ^ C7(K, 7);
581
            L7 = C0(K, 7) ^ C1(K, 6) ^ C2(K, 5) ^ C3(K, 4) ^
582
                C4(K, 3) ^ C5(K, 2) ^ C6(K, 1) ^ C7(K, 0);
583
584
            K.q[0] = L0;
585
            K.q[1] = L1;
586
            K.q[2] = L2;
587
            K.q[3] = L3;
588
            K.q[4] = L4;
589
            K.q[5] = L5;
590
            K.q[6] = L6;
591
            K.q[7] = L7;
592
593
            L0 ^= C0(S, 0) ^ C1(S, 7) ^ C2(S, 6) ^ C3(S, 5) ^
594
                C4(S, 4) ^ C5(S, 3) ^ C6(S, 2) ^ C7(S, 1);
595
            L1 ^= C0(S, 1) ^ C1(S, 0) ^ C2(S, 7) ^ C3(S, 6) ^
596
                C4(S, 5) ^ C5(S, 4) ^ C6(S, 3) ^ C7(S, 2);
597
            L2 ^= C0(S, 2) ^ C1(S, 1) ^ C2(S, 0) ^ C3(S, 7) ^
598
                C4(S, 6) ^ C5(S, 5) ^ C6(S, 4) ^ C7(S, 3);
599
            L3 ^= C0(S, 3) ^ C1(S, 2) ^ C2(S, 1) ^ C3(S, 0) ^
600
                C4(S, 7) ^ C5(S, 6) ^ C6(S, 5) ^ C7(S, 4);
601
            L4 ^= C0(S, 4) ^ C1(S, 3) ^ C2(S, 2) ^ C3(S, 1) ^
602
                C4(S, 0) ^ C5(S, 7) ^ C6(S, 6) ^ C7(S, 5);
603
            L5 ^= C0(S, 5) ^ C1(S, 4) ^ C2(S, 3) ^ C3(S, 2) ^
604
                C4(S, 1) ^ C5(S, 0) ^ C6(S, 7) ^ C7(S, 6);
605
            L6 ^= C0(S, 6) ^ C1(S, 5) ^ C2(S, 4) ^ C3(S, 3) ^
606
                C4(S, 2) ^ C5(S, 1) ^ C6(S, 0) ^ C7(S, 7);
607
            L7 ^= C0(S, 7) ^ C1(S, 6) ^ C2(S, 5) ^ C3(S, 4) ^
608
                C4(S, 3) ^ C5(S, 2) ^ C6(S, 1) ^ C7(S, 0);
609
610
            S.q[0] = L0;
611
            S.q[1] = L1;
612
            S.q[2] = L2;
613
            S.q[3] = L3;
614
            S.q[4] = L4;
615
            S.q[5] = L5;
616
            S.q[6] = L6;
617
            S.q[7] = L7;
618
# else
619
0
            L0 = C0(K, 0);
620
0
            L1 = C1(K, 0);
621
0
            L2 = C2(K, 0);
622
0
            L3 = C3(K, 0);
623
0
            L4 = C4(K, 0);
624
0
            L5 = C5(K, 0);
625
0
            L6 = C6(K, 0);
626
0
            L7 = C7(K, 0);
627
0
            L0 ^= RC[r];
628
629
0
            L1 ^= C0(K, 1);
630
0
            L2 ^= C1(K, 1);
631
0
            L3 ^= C2(K, 1);
632
0
            L4 ^= C3(K, 1);
633
0
            L5 ^= C4(K, 1);
634
0
            L6 ^= C5(K, 1);
635
0
            L7 ^= C6(K, 1);
636
0
            L0 ^= C7(K, 1);
637
638
0
            L2 ^= C0(K, 2);
639
0
            L3 ^= C1(K, 2);
640
0
            L4 ^= C2(K, 2);
641
0
            L5 ^= C3(K, 2);
642
0
            L6 ^= C4(K, 2);
643
0
            L7 ^= C5(K, 2);
644
0
            L0 ^= C6(K, 2);
645
0
            L1 ^= C7(K, 2);
646
647
0
            L3 ^= C0(K, 3);
648
0
            L4 ^= C1(K, 3);
649
0
            L5 ^= C2(K, 3);
650
0
            L6 ^= C3(K, 3);
651
0
            L7 ^= C4(K, 3);
652
0
            L0 ^= C5(K, 3);
653
0
            L1 ^= C6(K, 3);
654
0
            L2 ^= C7(K, 3);
655
656
0
            L4 ^= C0(K, 4);
657
0
            L5 ^= C1(K, 4);
658
0
            L6 ^= C2(K, 4);
659
0
            L7 ^= C3(K, 4);
660
0
            L0 ^= C4(K, 4);
661
0
            L1 ^= C5(K, 4);
662
0
            L2 ^= C6(K, 4);
663
0
            L3 ^= C7(K, 4);
664
665
0
            L5 ^= C0(K, 5);
666
0
            L6 ^= C1(K, 5);
667
0
            L7 ^= C2(K, 5);
668
0
            L0 ^= C3(K, 5);
669
0
            L1 ^= C4(K, 5);
670
0
            L2 ^= C5(K, 5);
671
0
            L3 ^= C6(K, 5);
672
0
            L4 ^= C7(K, 5);
673
674
0
            L6 ^= C0(K, 6);
675
0
            L7 ^= C1(K, 6);
676
0
            L0 ^= C2(K, 6);
677
0
            L1 ^= C3(K, 6);
678
0
            L2 ^= C4(K, 6);
679
0
            L3 ^= C5(K, 6);
680
0
            L4 ^= C6(K, 6);
681
0
            L5 ^= C7(K, 6);
682
683
0
            L7 ^= C0(K, 7);
684
0
            L0 ^= C1(K, 7);
685
0
            L1 ^= C2(K, 7);
686
0
            L2 ^= C3(K, 7);
687
0
            L3 ^= C4(K, 7);
688
0
            L4 ^= C5(K, 7);
689
0
            L5 ^= C6(K, 7);
690
0
            L6 ^= C7(K, 7);
691
692
0
            K.q[0] = L0;
693
0
            K.q[1] = L1;
694
0
            K.q[2] = L2;
695
0
            K.q[3] = L3;
696
0
            K.q[4] = L4;
697
0
            K.q[5] = L5;
698
0
            K.q[6] = L6;
699
0
            K.q[7] = L7;
700
701
0
            L0 ^= C0(S, 0);
702
0
            L1 ^= C1(S, 0);
703
0
            L2 ^= C2(S, 0);
704
0
            L3 ^= C3(S, 0);
705
0
            L4 ^= C4(S, 0);
706
0
            L5 ^= C5(S, 0);
707
0
            L6 ^= C6(S, 0);
708
0
            L7 ^= C7(S, 0);
709
710
0
            L1 ^= C0(S, 1);
711
0
            L2 ^= C1(S, 1);
712
0
            L3 ^= C2(S, 1);
713
0
            L4 ^= C3(S, 1);
714
0
            L5 ^= C4(S, 1);
715
0
            L6 ^= C5(S, 1);
716
0
            L7 ^= C6(S, 1);
717
0
            L0 ^= C7(S, 1);
718
719
0
            L2 ^= C0(S, 2);
720
0
            L3 ^= C1(S, 2);
721
0
            L4 ^= C2(S, 2);
722
0
            L5 ^= C3(S, 2);
723
0
            L6 ^= C4(S, 2);
724
0
            L7 ^= C5(S, 2);
725
0
            L0 ^= C6(S, 2);
726
0
            L1 ^= C7(S, 2);
727
728
0
            L3 ^= C0(S, 3);
729
0
            L4 ^= C1(S, 3);
730
0
            L5 ^= C2(S, 3);
731
0
            L6 ^= C3(S, 3);
732
0
            L7 ^= C4(S, 3);
733
0
            L0 ^= C5(S, 3);
734
0
            L1 ^= C6(S, 3);
735
0
            L2 ^= C7(S, 3);
736
737
0
            L4 ^= C0(S, 4);
738
0
            L5 ^= C1(S, 4);
739
0
            L6 ^= C2(S, 4);
740
0
            L7 ^= C3(S, 4);
741
0
            L0 ^= C4(S, 4);
742
0
            L1 ^= C5(S, 4);
743
0
            L2 ^= C6(S, 4);
744
0
            L3 ^= C7(S, 4);
745
746
0
            L5 ^= C0(S, 5);
747
0
            L6 ^= C1(S, 5);
748
0
            L7 ^= C2(S, 5);
749
0
            L0 ^= C3(S, 5);
750
0
            L1 ^= C4(S, 5);
751
0
            L2 ^= C5(S, 5);
752
0
            L3 ^= C6(S, 5);
753
0
            L4 ^= C7(S, 5);
754
755
0
            L6 ^= C0(S, 6);
756
0
            L7 ^= C1(S, 6);
757
0
            L0 ^= C2(S, 6);
758
0
            L1 ^= C3(S, 6);
759
0
            L2 ^= C4(S, 6);
760
0
            L3 ^= C5(S, 6);
761
0
            L4 ^= C6(S, 6);
762
0
            L5 ^= C7(S, 6);
763
764
0
            L7 ^= C0(S, 7);
765
0
            L0 ^= C1(S, 7);
766
0
            L1 ^= C2(S, 7);
767
0
            L2 ^= C3(S, 7);
768
0
            L3 ^= C4(S, 7);
769
0
            L4 ^= C5(S, 7);
770
0
            L5 ^= C6(S, 7);
771
0
            L6 ^= C7(S, 7);
772
773
0
            S.q[0] = L0;
774
0
            S.q[1] = L1;
775
0
            S.q[2] = L2;
776
0
            S.q[3] = L3;
777
0
            S.q[4] = L4;
778
0
            S.q[5] = L5;
779
0
            S.q[6] = L6;
780
0
            S.q[7] = L7;
781
0
# endif
782
0
        }
783
784
# ifdef STRICT_ALIGNMENT
785
        if ((size_t)p & 7) {
786
            int i;
787
            for (i = 0; i < 64; i++)
788
                H->c[i] ^= S.c[i] ^ p[i];
789
        } else
790
# endif
791
0
        {
792
0
            const u64_aX *pa = (const u64_aX *)p;
793
0
            H->q[0] ^= S.q[0] ^ pa[0];
794
0
            H->q[1] ^= S.q[1] ^ pa[1];
795
0
            H->q[2] ^= S.q[2] ^ pa[2];
796
0
            H->q[3] ^= S.q[3] ^ pa[3];
797
0
            H->q[4] ^= S.q[4] ^ pa[4];
798
0
            H->q[5] ^= S.q[5] ^ pa[5];
799
0
            H->q[6] ^= S.q[6] ^ pa[6];
800
0
            H->q[7] ^= S.q[7] ^ pa[7];
801
0
        }
802
0
#endif
803
0
        p += 64;
804
0
    } while (--n);
805
0
}