Coverage Report

Created: 2025-12-31 06:58

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/openssl30/crypto/sha/sha512.c
Line
Count
Source
1
/*
2
 * Copyright 2004-2021 The OpenSSL Project Authors. All Rights Reserved.
3
 *
4
 * Licensed under the Apache License 2.0 (the "License").  You may not use
5
 * this file except in compliance with the License.  You can obtain a copy
6
 * in the file LICENSE in the source distribution or at
7
 * https://www.openssl.org/source/license.html
8
 */
9
10
/*
11
 * SHA512 low level APIs are deprecated for public use, but still ok for
12
 * internal use.
13
 */
14
#include "internal/deprecated.h"
15
16
#include <stdio.h>
17
#include <openssl/opensslconf.h>
18
/*-
19
 * IMPLEMENTATION NOTES.
20
 *
21
 * As you might have noticed 32-bit hash algorithms:
22
 *
23
 * - permit SHA_LONG to be wider than 32-bit
24
 * - optimized versions implement two transform functions: one operating
25
 *   on [aligned] data in host byte order and one - on data in input
26
 *   stream byte order;
27
 * - share common byte-order neutral collector and padding function
28
 *   implementations, ../md32_common.h;
29
 *
30
 * Neither of the above applies to this SHA-512 implementations. Reasons
31
 * [in reverse order] are:
32
 *
33
 * - it's the only 64-bit hash algorithm for the moment of this writing,
34
 *   there is no need for common collector/padding implementation [yet];
35
 * - by supporting only one transform function [which operates on
36
 *   *aligned* data in input stream byte order, big-endian in this case]
37
 *   we minimize burden of maintenance in two ways: a) collector/padding
38
 *   function is simpler; b) only one transform function to stare at;
39
 * - SHA_LONG64 is required to be exactly 64-bit in order to be able to
40
 *   apply a number of optimizations to mitigate potential performance
41
 *   penalties caused by previous design decision;
42
 *
43
 * Caveat lector.
44
 *
45
 * Implementation relies on the fact that "long long" is 64-bit on
46
 * both 32- and 64-bit platforms. If some compiler vendor comes up
47
 * with 128-bit long long, adjustment to sha.h would be required.
48
 * As this implementation relies on 64-bit integer type, it's totally
49
 * inappropriate for platforms which don't support it, most notably
50
 * 16-bit platforms.
51
 */
52
#include <stdlib.h>
53
#include <string.h>
54
55
#include <openssl/crypto.h>
56
#include <openssl/sha.h>
57
#include <openssl/opensslv.h>
58
59
#include "internal/cryptlib.h"
60
#include "crypto/sha.h"
61
62
#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || defined(__x86_64) || defined(_M_AMD64) || defined(_M_X64) || defined(__s390__) || defined(__s390x__) || defined(__aarch64__) || defined(SHA512_ASM)
63
#define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
64
#endif
65
66
#if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32__)
67
#define U64(C) C##UI64
68
#elif defined(__arch64__)
69
#define U64(C) C##UL
70
#else
71
485M
#define U64(C) C##ULL
72
#endif
73
74
int sha512_224_init(SHA512_CTX *c)
75
332k
{
76
332k
    c->h[0] = U64(0x8c3d37c819544da2);
77
332k
    c->h[1] = U64(0x73e1996689dcd4d6);
78
332k
    c->h[2] = U64(0x1dfab7ae32ff9c82);
79
332k
    c->h[3] = U64(0x679dd514582f9fcf);
80
332k
    c->h[4] = U64(0x0f6d2b697bd44da8);
81
332k
    c->h[5] = U64(0x77e36f7304c48942);
82
332k
    c->h[6] = U64(0x3f9d85a86a1d36c8);
83
332k
    c->h[7] = U64(0x1112e6ad91d692a1);
84
85
332k
    c->Nl = 0;
86
332k
    c->Nh = 0;
87
332k
    c->num = 0;
88
332k
    c->md_len = SHA224_DIGEST_LENGTH;
89
332k
    return 1;
90
332k
}
91
92
int sha512_256_init(SHA512_CTX *c)
93
123k
{
94
123k
    c->h[0] = U64(0x22312194fc2bf72c);
95
123k
    c->h[1] = U64(0x9f555fa3c84c64c2);
96
123k
    c->h[2] = U64(0x2393b86b6f53b151);
97
123k
    c->h[3] = U64(0x963877195940eabd);
98
123k
    c->h[4] = U64(0x96283ee2a88effe3);
99
123k
    c->h[5] = U64(0xbe5e1e2553863992);
100
123k
    c->h[6] = U64(0x2b0199fc2c85b8aa);
101
123k
    c->h[7] = U64(0x0eb72ddc81c52ca2);
102
103
123k
    c->Nl = 0;
104
123k
    c->Nh = 0;
105
123k
    c->num = 0;
106
123k
    c->md_len = SHA256_DIGEST_LENGTH;
107
123k
    return 1;
108
123k
}
109
110
int SHA384_Init(SHA512_CTX *c)
111
2.11M
{
112
2.11M
    c->h[0] = U64(0xcbbb9d5dc1059ed8);
113
2.11M
    c->h[1] = U64(0x629a292a367cd507);
114
2.11M
    c->h[2] = U64(0x9159015a3070dd17);
115
2.11M
    c->h[3] = U64(0x152fecd8f70e5939);
116
2.11M
    c->h[4] = U64(0x67332667ffc00b31);
117
2.11M
    c->h[5] = U64(0x8eb44a8768581511);
118
2.11M
    c->h[6] = U64(0xdb0c2e0d64f98fa7);
119
2.11M
    c->h[7] = U64(0x47b5481dbefa4fa4);
120
121
2.11M
    c->Nl = 0;
122
2.11M
    c->Nh = 0;
123
2.11M
    c->num = 0;
124
2.11M
    c->md_len = SHA384_DIGEST_LENGTH;
125
2.11M
    return 1;
126
2.11M
}
127
128
int SHA512_Init(SHA512_CTX *c)
129
38.4M
{
130
38.4M
    c->h[0] = U64(0x6a09e667f3bcc908);
131
38.4M
    c->h[1] = U64(0xbb67ae8584caa73b);
132
38.4M
    c->h[2] = U64(0x3c6ef372fe94f82b);
133
38.4M
    c->h[3] = U64(0xa54ff53a5f1d36f1);
134
38.4M
    c->h[4] = U64(0x510e527fade682d1);
135
38.4M
    c->h[5] = U64(0x9b05688c2b3e6c1f);
136
38.4M
    c->h[6] = U64(0x1f83d9abfb41bd6b);
137
38.4M
    c->h[7] = U64(0x5be0cd19137e2179);
138
139
38.4M
    c->Nl = 0;
140
38.4M
    c->Nh = 0;
141
38.4M
    c->num = 0;
142
38.4M
    c->md_len = SHA512_DIGEST_LENGTH;
143
38.4M
    return 1;
144
38.4M
}
145
146
#ifndef SHA512_ASM
147
static
148
#endif
149
    void sha512_block_data_order(SHA512_CTX *ctx, const void *in, size_t num);
150
151
int SHA512_Final(unsigned char *md, SHA512_CTX *c)
152
41.0M
{
153
41.0M
    unsigned char *p = (unsigned char *)c->u.p;
154
41.0M
    size_t n = c->num;
155
156
41.0M
    p[n] = 0x80; /* There always is a room for one */
157
41.0M
    n++;
158
41.0M
    if (n > (sizeof(c->u) - 16)) {
159
242k
        memset(p + n, 0, sizeof(c->u) - n);
160
242k
        n = 0;
161
242k
        sha512_block_data_order(c, p, 1);
162
242k
    }
163
164
41.0M
    memset(p + n, 0, sizeof(c->u) - 16 - n);
165
#ifdef B_ENDIAN
166
    c->u.d[SHA_LBLOCK - 2] = c->Nh;
167
    c->u.d[SHA_LBLOCK - 1] = c->Nl;
168
#else
169
41.0M
    p[sizeof(c->u) - 1] = (unsigned char)(c->Nl);
170
41.0M
    p[sizeof(c->u) - 2] = (unsigned char)(c->Nl >> 8);
171
41.0M
    p[sizeof(c->u) - 3] = (unsigned char)(c->Nl >> 16);
172
41.0M
    p[sizeof(c->u) - 4] = (unsigned char)(c->Nl >> 24);
173
41.0M
    p[sizeof(c->u) - 5] = (unsigned char)(c->Nl >> 32);
174
41.0M
    p[sizeof(c->u) - 6] = (unsigned char)(c->Nl >> 40);
175
41.0M
    p[sizeof(c->u) - 7] = (unsigned char)(c->Nl >> 48);
176
41.0M
    p[sizeof(c->u) - 8] = (unsigned char)(c->Nl >> 56);
177
41.0M
    p[sizeof(c->u) - 9] = (unsigned char)(c->Nh);
178
41.0M
    p[sizeof(c->u) - 10] = (unsigned char)(c->Nh >> 8);
179
41.0M
    p[sizeof(c->u) - 11] = (unsigned char)(c->Nh >> 16);
180
41.0M
    p[sizeof(c->u) - 12] = (unsigned char)(c->Nh >> 24);
181
41.0M
    p[sizeof(c->u) - 13] = (unsigned char)(c->Nh >> 32);
182
41.0M
    p[sizeof(c->u) - 14] = (unsigned char)(c->Nh >> 40);
183
41.0M
    p[sizeof(c->u) - 15] = (unsigned char)(c->Nh >> 48);
184
41.0M
    p[sizeof(c->u) - 16] = (unsigned char)(c->Nh >> 56);
185
41.0M
#endif
186
187
41.0M
    sha512_block_data_order(c, p, 1);
188
189
41.0M
    if (md == 0)
190
0
        return 0;
191
192
41.0M
    switch (c->md_len) {
193
    /* Let compiler decide if it's appropriate to unroll... */
194
332k
    case SHA224_DIGEST_LENGTH:
195
1.33M
        for (n = 0; n < SHA224_DIGEST_LENGTH / 8; n++) {
196
998k
            SHA_LONG64 t = c->h[n];
197
198
998k
            *(md++) = (unsigned char)(t >> 56);
199
998k
            *(md++) = (unsigned char)(t >> 48);
200
998k
            *(md++) = (unsigned char)(t >> 40);
201
998k
            *(md++) = (unsigned char)(t >> 32);
202
998k
            *(md++) = (unsigned char)(t >> 24);
203
998k
            *(md++) = (unsigned char)(t >> 16);
204
998k
            *(md++) = (unsigned char)(t >> 8);
205
998k
            *(md++) = (unsigned char)(t);
206
998k
        }
207
        /*
208
         * For 224 bits, there are four bytes left over that have to be
209
         * processed separately.
210
         */
211
332k
        {
212
332k
            SHA_LONG64 t = c->h[SHA224_DIGEST_LENGTH / 8];
213
214
332k
            *(md++) = (unsigned char)(t >> 56);
215
332k
            *(md++) = (unsigned char)(t >> 48);
216
332k
            *(md++) = (unsigned char)(t >> 40);
217
332k
            *(md++) = (unsigned char)(t >> 32);
218
332k
        }
219
332k
        break;
220
123k
    case SHA256_DIGEST_LENGTH:
221
615k
        for (n = 0; n < SHA256_DIGEST_LENGTH / 8; n++) {
222
492k
            SHA_LONG64 t = c->h[n];
223
224
492k
            *(md++) = (unsigned char)(t >> 56);
225
492k
            *(md++) = (unsigned char)(t >> 48);
226
492k
            *(md++) = (unsigned char)(t >> 40);
227
492k
            *(md++) = (unsigned char)(t >> 32);
228
492k
            *(md++) = (unsigned char)(t >> 24);
229
492k
            *(md++) = (unsigned char)(t >> 16);
230
492k
            *(md++) = (unsigned char)(t >> 8);
231
492k
            *(md++) = (unsigned char)(t);
232
492k
        }
233
123k
        break;
234
2.21M
    case SHA384_DIGEST_LENGTH:
235
15.5M
        for (n = 0; n < SHA384_DIGEST_LENGTH / 8; n++) {
236
13.3M
            SHA_LONG64 t = c->h[n];
237
238
13.3M
            *(md++) = (unsigned char)(t >> 56);
239
13.3M
            *(md++) = (unsigned char)(t >> 48);
240
13.3M
            *(md++) = (unsigned char)(t >> 40);
241
13.3M
            *(md++) = (unsigned char)(t >> 32);
242
13.3M
            *(md++) = (unsigned char)(t >> 24);
243
13.3M
            *(md++) = (unsigned char)(t >> 16);
244
13.3M
            *(md++) = (unsigned char)(t >> 8);
245
13.3M
            *(md++) = (unsigned char)(t);
246
13.3M
        }
247
2.21M
        break;
248
38.4M
    case SHA512_DIGEST_LENGTH:
249
345M
        for (n = 0; n < SHA512_DIGEST_LENGTH / 8; n++) {
250
307M
            SHA_LONG64 t = c->h[n];
251
252
307M
            *(md++) = (unsigned char)(t >> 56);
253
307M
            *(md++) = (unsigned char)(t >> 48);
254
307M
            *(md++) = (unsigned char)(t >> 40);
255
307M
            *(md++) = (unsigned char)(t >> 32);
256
307M
            *(md++) = (unsigned char)(t >> 24);
257
307M
            *(md++) = (unsigned char)(t >> 16);
258
307M
            *(md++) = (unsigned char)(t >> 8);
259
307M
            *(md++) = (unsigned char)(t);
260
307M
        }
261
38.4M
        break;
262
    /* ... as well as make sure md_len is not abused. */
263
0
    default:
264
0
        return 0;
265
41.0M
    }
266
267
41.0M
    return 1;
268
41.0M
}
269
270
int SHA384_Final(unsigned char *md, SHA512_CTX *c)
271
2.21M
{
272
2.21M
    return SHA512_Final(md, c);
273
2.21M
}
274
275
int SHA512_Update(SHA512_CTX *c, const void *_data, size_t len)
276
157M
{
277
157M
    SHA_LONG64 l;
278
157M
    unsigned char *p = c->u.p;
279
157M
    const unsigned char *data = (const unsigned char *)_data;
280
281
157M
    if (len == 0)
282
0
        return 1;
283
284
157M
    l = (c->Nl + (((SHA_LONG64)len) << 3)) & U64(0xffffffffffffffff);
285
157M
    if (l < c->Nl)
286
0
        c->Nh++;
287
157M
    if (sizeof(len) >= 8)
288
157M
        c->Nh += (((SHA_LONG64)len) >> 61);
289
157M
    c->Nl = l;
290
291
157M
    if (c->num != 0) {
292
77.0M
        size_t n = sizeof(c->u) - c->num;
293
294
77.0M
        if (len < n) {
295
38.5M
            memcpy(p + c->num, data, len), c->num += (unsigned int)len;
296
38.5M
            return 1;
297
38.5M
        } else {
298
38.5M
            memcpy(p + c->num, data, n), c->num = 0;
299
38.5M
            len -= n, data += n;
300
38.5M
            sha512_block_data_order(c, p, 1);
301
38.5M
        }
302
77.0M
    }
303
304
119M
    if (len >= sizeof(c->u)) {
305
#ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
306
        if ((size_t)data % sizeof(c->u.d[0]) != 0)
307
            while (len >= sizeof(c->u))
308
                memcpy(p, data, sizeof(c->u)),
309
                    sha512_block_data_order(c, p, 1),
310
                    len -= sizeof(c->u), data += sizeof(c->u);
311
        else
312
#endif
313
2.17M
            sha512_block_data_order(c, data, len / sizeof(c->u)),
314
2.17M
                data += len, len %= sizeof(c->u), data -= len;
315
2.17M
    }
316
317
119M
    if (len != 0)
318
79.4M
        memcpy(p, data, len), c->num = (int)len;
319
320
119M
    return 1;
321
157M
}
322
323
int SHA384_Update(SHA512_CTX *c, const void *data, size_t len)
324
4.73M
{
325
4.73M
    return SHA512_Update(c, data, len);
326
4.73M
}
327
328
void SHA512_Transform(SHA512_CTX *c, const unsigned char *data)
329
91.9k
{
330
#ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
331
    if ((size_t)data % sizeof(c->u.d[0]) != 0)
332
        memcpy(c->u.p, data, sizeof(c->u.p)), data = c->u.p;
333
#endif
334
91.9k
    sha512_block_data_order(c, data, 1);
335
91.9k
}
336
337
#ifndef SHA512_ASM
338
static const SHA_LONG64 K512[80] = {
339
    U64(0x428a2f98d728ae22), U64(0x7137449123ef65cd),
340
    U64(0xb5c0fbcfec4d3b2f), U64(0xe9b5dba58189dbbc),
341
    U64(0x3956c25bf348b538), U64(0x59f111f1b605d019),
342
    U64(0x923f82a4af194f9b), U64(0xab1c5ed5da6d8118),
343
    U64(0xd807aa98a3030242), U64(0x12835b0145706fbe),
344
    U64(0x243185be4ee4b28c), U64(0x550c7dc3d5ffb4e2),
345
    U64(0x72be5d74f27b896f), U64(0x80deb1fe3b1696b1),
346
    U64(0x9bdc06a725c71235), U64(0xc19bf174cf692694),
347
    U64(0xe49b69c19ef14ad2), U64(0xefbe4786384f25e3),
348
    U64(0x0fc19dc68b8cd5b5), U64(0x240ca1cc77ac9c65),
349
    U64(0x2de92c6f592b0275), U64(0x4a7484aa6ea6e483),
350
    U64(0x5cb0a9dcbd41fbd4), U64(0x76f988da831153b5),
351
    U64(0x983e5152ee66dfab), U64(0xa831c66d2db43210),
352
    U64(0xb00327c898fb213f), U64(0xbf597fc7beef0ee4),
353
    U64(0xc6e00bf33da88fc2), U64(0xd5a79147930aa725),
354
    U64(0x06ca6351e003826f), U64(0x142929670a0e6e70),
355
    U64(0x27b70a8546d22ffc), U64(0x2e1b21385c26c926),
356
    U64(0x4d2c6dfc5ac42aed), U64(0x53380d139d95b3df),
357
    U64(0x650a73548baf63de), U64(0x766a0abb3c77b2a8),
358
    U64(0x81c2c92e47edaee6), U64(0x92722c851482353b),
359
    U64(0xa2bfe8a14cf10364), U64(0xa81a664bbc423001),
360
    U64(0xc24b8b70d0f89791), U64(0xc76c51a30654be30),
361
    U64(0xd192e819d6ef5218), U64(0xd69906245565a910),
362
    U64(0xf40e35855771202a), U64(0x106aa07032bbd1b8),
363
    U64(0x19a4c116b8d2d0c8), U64(0x1e376c085141ab53),
364
    U64(0x2748774cdf8eeb99), U64(0x34b0bcb5e19b48a8),
365
    U64(0x391c0cb3c5c95a63), U64(0x4ed8aa4ae3418acb),
366
    U64(0x5b9cca4f7763e373), U64(0x682e6ff3d6b2b8a3),
367
    U64(0x748f82ee5defb2fc), U64(0x78a5636f43172f60),
368
    U64(0x84c87814a1f0ab72), U64(0x8cc702081a6439ec),
369
    U64(0x90befffa23631e28), U64(0xa4506cebde82bde9),
370
    U64(0xbef9a3f7b2c67915), U64(0xc67178f2e372532b),
371
    U64(0xca273eceea26619c), U64(0xd186b8c721c0c207),
372
    U64(0xeada7dd6cde0eb1e), U64(0xf57d4f7fee6ed178),
373
    U64(0x06f067aa72176fba), U64(0x0a637dc5a2c898a6),
374
    U64(0x113f9804bef90dae), U64(0x1b710b35131c471b),
375
    U64(0x28db77f523047d84), U64(0x32caab7b40c72493),
376
    U64(0x3c9ebe0a15c9bebc), U64(0x431d67c49c100d4c),
377
    U64(0x4cc5d4becb3e42b6), U64(0x597f299cfc657e2a),
378
    U64(0x5fcb6fab3ad6faec), U64(0x6c44198c4a475817)
379
};
380
381
#ifndef PEDANTIC
382
#if defined(__GNUC__) && __GNUC__ >= 2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
383
#if defined(__x86_64) || defined(__x86_64__)
384
#define ROTR(a, n) ({ SHA_LONG64 ret;             \
385
                                asm ("rorq %1,%0"       \
386
                                : "=r"(ret)             \
387
                                : "J"(n),"0"(a)         \
388
                                : "cc"); ret; })
389
#if !defined(B_ENDIAN)
390
#define PULL64(x) ({ SHA_LONG64 ret=*((const SHA_LONG64 *)(&(x)));  \
391
                                asm ("bswapq    %0"             \
392
                                : "=r"(ret)                     \
393
                                : "0"(ret)); ret; })
394
#endif
395
#elif (defined(__i386) || defined(__i386__)) && !defined(B_ENDIAN)
396
#if defined(I386_ONLY)
397
#define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
398
                          unsigned int hi=p[0],lo=p[1];          \
399
                                asm("xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
400
                                    "roll $16,%%eax; roll $16,%%edx; "\
401
                                    "xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
402
                                : "=a"(lo),"=d"(hi)             \
403
                                : "0"(lo),"1"(hi) : "cc");      \
404
                                ((SHA_LONG64)hi)<<32|lo; })
405
#else
406
#define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
407
                          unsigned int hi=p[0],lo=p[1];         \
408
                                asm ("bswapl %0; bswapl %1;"    \
409
                                : "=r"(lo),"=r"(hi)             \
410
                                : "0"(lo),"1"(hi));             \
411
                                ((SHA_LONG64)hi)<<32|lo; })
412
#endif
413
#elif (defined(_ARCH_PPC) && defined(__64BIT__)) || defined(_ARCH_PPC64)
414
#define ROTR(a, n) ({ SHA_LONG64 ret;             \
415
                                asm ("rotrdi %0,%1,%2"  \
416
                                : "=r"(ret)             \
417
                                : "r"(a),"K"(n)); ret; })
418
#elif defined(__aarch64__)
419
#define ROTR(a, n) ({ SHA_LONG64 ret;             \
420
                                asm ("ror %0,%1,%2"     \
421
                                : "=r"(ret)             \
422
                                : "r"(a),"I"(n)); ret; })
423
#if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
424
#define PULL64(x) ({ SHA_LONG64 ret;                     \
425
                                asm ("rev       %0,%1"          \
426
                                : "=r"(ret)                     \
427
                                : "r"(*((const SHA_LONG64 *)(&(x))))); ret; })
428
#endif
429
#endif
430
#elif defined(_MSC_VER)
431
#if defined(_WIN64) /* applies to both IA-64 and AMD64 */
432
#pragma intrinsic(_rotr64)
433
#define ROTR(a, n) _rotr64((a), n)
434
#endif
435
#if defined(_M_IX86) && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
436
#if defined(I386_ONLY)
437
static SHA_LONG64 __fastcall __pull64be(const void *x)
438
{
439
    _asm mov edx, [ecx + 0] _asm mov eax, [ecx + 4] _asm xchg dh, dl _asm xchg ah, al _asm rol edx, 16 _asm rol eax, 16 _asm xchg dh, dl _asm xchg ah, al
440
}
441
#else
442
static SHA_LONG64 __fastcall __pull64be(const void *x) {
443
    _asm mov edx, [ecx + 0] _asm mov eax, [ecx + 4] _asm bswap edx _asm bswap eax
444
}
445
#endif
446
#define PULL64(x) __pull64be(&(x))
447
#endif
448
#endif
449
#endif
450
#ifndef PULL64
451
#define B(x, j) (((SHA_LONG64)(*(((const unsigned char *)(&x)) + j))) << ((7 - j) * 8))
452
#define PULL64(x) (B(x, 0) | B(x, 1) | B(x, 2) | B(x, 3) | B(x, 4) | B(x, 5) | B(x, 6) | B(x, 7))
453
#endif
454
#ifndef ROTR
455
#define ROTR(x, s) (((x) >> s) | (x) << (64 - s))
456
#endif
457
#define Sigma0(x) (ROTR((x), 28) ^ ROTR((x), 34) ^ ROTR((x), 39))
458
#define Sigma1(x) (ROTR((x), 14) ^ ROTR((x), 18) ^ ROTR((x), 41))
459
#define sigma0(x) (ROTR((x), 1) ^ ROTR((x), 8) ^ ((x) >> 7))
460
#define sigma1(x) (ROTR((x), 19) ^ ROTR((x), 61) ^ ((x) >> 6))
461
#define Ch(x, y, z) (((x) & (y)) ^ ((~(x)) & (z)))
462
#define Maj(x, y, z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
463
464
#if defined(__i386) || defined(__i386__) || defined(_M_IX86)
465
/*
466
 * This code should give better results on 32-bit CPU with less than
467
 * ~24 registers, both size and performance wise...
468
 */
469
470
static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
471
    size_t num)
472
{
473
    const SHA_LONG64 *W = in;
474
    SHA_LONG64 A, E, T;
475
    SHA_LONG64 X[9 + 80], *F;
476
    int i;
477
478
    while (num--) {
479
480
        F = X + 80;
481
        A = ctx->h[0];
482
        F[1] = ctx->h[1];
483
        F[2] = ctx->h[2];
484
        F[3] = ctx->h[3];
485
        E = ctx->h[4];
486
        F[5] = ctx->h[5];
487
        F[6] = ctx->h[6];
488
        F[7] = ctx->h[7];
489
490
        for (i = 0; i < 16; i++, F--) {
491
#ifdef B_ENDIAN
492
            T = W[i];
493
#else
494
            T = PULL64(W[i]);
495
#endif
496
            F[0] = A;
497
            F[4] = E;
498
            F[8] = T;
499
            T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
500
            E = F[3] + T;
501
            A = T + Sigma0(A) + Maj(A, F[1], F[2]);
502
        }
503
504
        for (; i < 80; i++, F--) {
505
            T = sigma0(F[8 + 16 - 1]);
506
            T += sigma1(F[8 + 16 - 14]);
507
            T += F[8 + 16] + F[8 + 16 - 9];
508
509
            F[0] = A;
510
            F[4] = E;
511
            F[8] = T;
512
            T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
513
            E = F[3] + T;
514
            A = T + Sigma0(A) + Maj(A, F[1], F[2]);
515
        }
516
517
        ctx->h[0] += A;
518
        ctx->h[1] += F[1];
519
        ctx->h[2] += F[2];
520
        ctx->h[3] += F[3];
521
        ctx->h[4] += E;
522
        ctx->h[5] += F[5];
523
        ctx->h[6] += F[6];
524
        ctx->h[7] += F[7];
525
526
        W += SHA_LBLOCK;
527
    }
528
}
529
530
#elif defined(OPENSSL_SMALL_FOOTPRINT)
531
532
static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
533
    size_t num)
534
{
535
    const SHA_LONG64 *W = in;
536
    SHA_LONG64 a, b, c, d, e, f, g, h, s0, s1, T1, T2;
537
    SHA_LONG64 X[16];
538
    int i;
539
540
    while (num--) {
541
542
        a = ctx->h[0];
543
        b = ctx->h[1];
544
        c = ctx->h[2];
545
        d = ctx->h[3];
546
        e = ctx->h[4];
547
        f = ctx->h[5];
548
        g = ctx->h[6];
549
        h = ctx->h[7];
550
551
        for (i = 0; i < 16; i++) {
552
#ifdef B_ENDIAN
553
            T1 = X[i] = W[i];
554
#else
555
            T1 = X[i] = PULL64(W[i]);
556
#endif
557
            T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i];
558
            T2 = Sigma0(a) + Maj(a, b, c);
559
            h = g;
560
            g = f;
561
            f = e;
562
            e = d + T1;
563
            d = c;
564
            c = b;
565
            b = a;
566
            a = T1 + T2;
567
        }
568
569
        for (; i < 80; i++) {
570
            s0 = X[(i + 1) & 0x0f];
571
            s0 = sigma0(s0);
572
            s1 = X[(i + 14) & 0x0f];
573
            s1 = sigma1(s1);
574
575
            T1 = X[i & 0xf] += s0 + s1 + X[(i + 9) & 0xf];
576
            T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i];
577
            T2 = Sigma0(a) + Maj(a, b, c);
578
            h = g;
579
            g = f;
580
            f = e;
581
            e = d + T1;
582
            d = c;
583
            c = b;
584
            b = a;
585
            a = T1 + T2;
586
        }
587
588
        ctx->h[0] += a;
589
        ctx->h[1] += b;
590
        ctx->h[2] += c;
591
        ctx->h[3] += d;
592
        ctx->h[4] += e;
593
        ctx->h[5] += f;
594
        ctx->h[6] += g;
595
        ctx->h[7] += h;
596
597
        W += SHA_LBLOCK;
598
    }
599
}
600
601
#else
602
#define ROUND_00_15(i, a, b, c, d, e, f, g, h)       \
603
    do {                                             \
604
        T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i]; \
605
        h = Sigma0(a) + Maj(a, b, c);                \
606
        d += T1;                                     \
607
        h += T1;                                     \
608
    } while (0)
609
610
#define ROUND_16_80(i, j, a, b, c, d, e, f, g, h, X)       \
611
    do {                                                   \
612
        s0 = X[(j + 1) & 0x0f];                            \
613
        s0 = sigma0(s0);                                   \
614
        s1 = X[(j + 14) & 0x0f];                           \
615
        s1 = sigma1(s1);                                   \
616
        T1 = X[(j) & 0x0f] += s0 + s1 + X[(j + 9) & 0x0f]; \
617
        ROUND_00_15(i + j, a, b, c, d, e, f, g, h);        \
618
    } while (0)
619
620
static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
621
    size_t num)
622
{
623
    const SHA_LONG64 *W = in;
624
    SHA_LONG64 a, b, c, d, e, f, g, h, s0, s1, T1;
625
    SHA_LONG64 X[16];
626
    int i;
627
628
    while (num--) {
629
630
        a = ctx->h[0];
631
        b = ctx->h[1];
632
        c = ctx->h[2];
633
        d = ctx->h[3];
634
        e = ctx->h[4];
635
        f = ctx->h[5];
636
        g = ctx->h[6];
637
        h = ctx->h[7];
638
639
#ifdef B_ENDIAN
640
        T1 = X[0] = W[0];
641
        ROUND_00_15(0, a, b, c, d, e, f, g, h);
642
        T1 = X[1] = W[1];
643
        ROUND_00_15(1, h, a, b, c, d, e, f, g);
644
        T1 = X[2] = W[2];
645
        ROUND_00_15(2, g, h, a, b, c, d, e, f);
646
        T1 = X[3] = W[3];
647
        ROUND_00_15(3, f, g, h, a, b, c, d, e);
648
        T1 = X[4] = W[4];
649
        ROUND_00_15(4, e, f, g, h, a, b, c, d);
650
        T1 = X[5] = W[5];
651
        ROUND_00_15(5, d, e, f, g, h, a, b, c);
652
        T1 = X[6] = W[6];
653
        ROUND_00_15(6, c, d, e, f, g, h, a, b);
654
        T1 = X[7] = W[7];
655
        ROUND_00_15(7, b, c, d, e, f, g, h, a);
656
        T1 = X[8] = W[8];
657
        ROUND_00_15(8, a, b, c, d, e, f, g, h);
658
        T1 = X[9] = W[9];
659
        ROUND_00_15(9, h, a, b, c, d, e, f, g);
660
        T1 = X[10] = W[10];
661
        ROUND_00_15(10, g, h, a, b, c, d, e, f);
662
        T1 = X[11] = W[11];
663
        ROUND_00_15(11, f, g, h, a, b, c, d, e);
664
        T1 = X[12] = W[12];
665
        ROUND_00_15(12, e, f, g, h, a, b, c, d);
666
        T1 = X[13] = W[13];
667
        ROUND_00_15(13, d, e, f, g, h, a, b, c);
668
        T1 = X[14] = W[14];
669
        ROUND_00_15(14, c, d, e, f, g, h, a, b);
670
        T1 = X[15] = W[15];
671
        ROUND_00_15(15, b, c, d, e, f, g, h, a);
672
#else
673
        T1 = X[0] = PULL64(W[0]);
674
        ROUND_00_15(0, a, b, c, d, e, f, g, h);
675
        T1 = X[1] = PULL64(W[1]);
676
        ROUND_00_15(1, h, a, b, c, d, e, f, g);
677
        T1 = X[2] = PULL64(W[2]);
678
        ROUND_00_15(2, g, h, a, b, c, d, e, f);
679
        T1 = X[3] = PULL64(W[3]);
680
        ROUND_00_15(3, f, g, h, a, b, c, d, e);
681
        T1 = X[4] = PULL64(W[4]);
682
        ROUND_00_15(4, e, f, g, h, a, b, c, d);
683
        T1 = X[5] = PULL64(W[5]);
684
        ROUND_00_15(5, d, e, f, g, h, a, b, c);
685
        T1 = X[6] = PULL64(W[6]);
686
        ROUND_00_15(6, c, d, e, f, g, h, a, b);
687
        T1 = X[7] = PULL64(W[7]);
688
        ROUND_00_15(7, b, c, d, e, f, g, h, a);
689
        T1 = X[8] = PULL64(W[8]);
690
        ROUND_00_15(8, a, b, c, d, e, f, g, h);
691
        T1 = X[9] = PULL64(W[9]);
692
        ROUND_00_15(9, h, a, b, c, d, e, f, g);
693
        T1 = X[10] = PULL64(W[10]);
694
        ROUND_00_15(10, g, h, a, b, c, d, e, f);
695
        T1 = X[11] = PULL64(W[11]);
696
        ROUND_00_15(11, f, g, h, a, b, c, d, e);
697
        T1 = X[12] = PULL64(W[12]);
698
        ROUND_00_15(12, e, f, g, h, a, b, c, d);
699
        T1 = X[13] = PULL64(W[13]);
700
        ROUND_00_15(13, d, e, f, g, h, a, b, c);
701
        T1 = X[14] = PULL64(W[14]);
702
        ROUND_00_15(14, c, d, e, f, g, h, a, b);
703
        T1 = X[15] = PULL64(W[15]);
704
        ROUND_00_15(15, b, c, d, e, f, g, h, a);
705
#endif
706
707
        for (i = 16; i < 80; i += 16) {
708
            ROUND_16_80(i, 0, a, b, c, d, e, f, g, h, X);
709
            ROUND_16_80(i, 1, h, a, b, c, d, e, f, g, X);
710
            ROUND_16_80(i, 2, g, h, a, b, c, d, e, f, X);
711
            ROUND_16_80(i, 3, f, g, h, a, b, c, d, e, X);
712
            ROUND_16_80(i, 4, e, f, g, h, a, b, c, d, X);
713
            ROUND_16_80(i, 5, d, e, f, g, h, a, b, c, X);
714
            ROUND_16_80(i, 6, c, d, e, f, g, h, a, b, X);
715
            ROUND_16_80(i, 7, b, c, d, e, f, g, h, a, X);
716
            ROUND_16_80(i, 8, a, b, c, d, e, f, g, h, X);
717
            ROUND_16_80(i, 9, h, a, b, c, d, e, f, g, X);
718
            ROUND_16_80(i, 10, g, h, a, b, c, d, e, f, X);
719
            ROUND_16_80(i, 11, f, g, h, a, b, c, d, e, X);
720
            ROUND_16_80(i, 12, e, f, g, h, a, b, c, d, X);
721
            ROUND_16_80(i, 13, d, e, f, g, h, a, b, c, X);
722
            ROUND_16_80(i, 14, c, d, e, f, g, h, a, b, X);
723
            ROUND_16_80(i, 15, b, c, d, e, f, g, h, a, X);
724
        }
725
726
        ctx->h[0] += a;
727
        ctx->h[1] += b;
728
        ctx->h[2] += c;
729
        ctx->h[3] += d;
730
        ctx->h[4] += e;
731
        ctx->h[5] += f;
732
        ctx->h[6] += g;
733
        ctx->h[7] += h;
734
735
        W += SHA_LBLOCK;
736
    }
737
}
738
739
#endif
740
741
#endif /* SHA512_ASM */