Coverage Report

Created: 2026-01-09 07:00

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/openssl/crypto/sha/sha512.c
Line
Count
Source
1
/*
2
 * Copyright 2004-2024 The OpenSSL Project Authors. All Rights Reserved.
3
 *
4
 * Licensed under the Apache License 2.0 (the "License").  You may not use
5
 * this file except in compliance with the License.  You can obtain a copy
6
 * in the file LICENSE in the source distribution or at
7
 * https://www.openssl.org/source/license.html
8
 */
9
10
/*
11
 * SHA512 low level APIs are deprecated for public use, but still ok for
12
 * internal use.
13
 */
14
#include "internal/deprecated.h"
15
16
#include <stdio.h>
17
#include <openssl/opensslconf.h>
18
/*-
19
 * IMPLEMENTATION NOTES.
20
 *
21
 * As you might have noticed, 32-bit hash algorithms:
22
 *
23
 * - permit SHA_LONG to be wider than 32-bit
24
 * - optimized versions implement two transform functions: one operating
25
 *   on [aligned] data in host byte order, and one operating on data in input
26
 *   stream byte order;
27
 * - share common byte-order neutral collector and padding function
28
 *   implementations, crypto/md32_common.h;
29
 *
30
 * Neither of the above applies to this SHA-512 implementation. Reasons
31
 * [in reverse order] are:
32
 *
33
 * - it's the only 64-bit hash algorithm for the moment of this writing,
34
 *   there is no need for common collector/padding implementation [yet];
35
 * - by supporting only one transform function [which operates on
36
 *   *aligned* data in input stream byte order, big-endian in this case]
37
 *   we minimize burden of maintenance in two ways: a) collector/padding
38
 *   function is simpler; b) only one transform function to stare at;
39
 * - SHA_LONG64 is required to be exactly 64-bit in order to be able to
40
 *   apply a number of optimizations to mitigate potential performance
41
 *   penalties caused by previous design decision;
42
 *
43
 * Caveat lector.
44
 *
45
 * Implementation relies on the fact that "long long" is 64-bit on
46
 * both 32- and 64-bit platforms. If some compiler vendor comes up
47
 * with 128-bit long long, adjustment to sha.h would be required.
48
 * As this implementation relies on 64-bit integer type, it's totally
49
 * inappropriate for platforms which don't support it, most notably
50
 * 16-bit platforms.
51
 */
52
#include <stdlib.h>
53
#include <string.h>
54
55
#include <openssl/crypto.h>
56
#include <openssl/sha.h>
57
#include <openssl/opensslv.h>
58
59
#include "internal/cryptlib.h"
60
#include "crypto/sha.h"
61
62
#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || defined(__x86_64) || defined(_M_AMD64) || defined(_M_X64) || defined(__s390__) || defined(__s390x__) || defined(__aarch64__) || defined(SHA512_ASM)
63
#define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
64
#endif
65
66
#if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32__)
67
#define U64(C) C##UI64
68
#elif defined(__arch64__)
69
#define U64(C) C##UL
70
#else
71
59.0k
#define U64(C) C##ULL
72
#endif
73
74
int sha512_224_init(SHA512_CTX *c)
75
2
{
76
2
    c->h[0] = U64(0x8c3d37c819544da2);
77
2
    c->h[1] = U64(0x73e1996689dcd4d6);
78
2
    c->h[2] = U64(0x1dfab7ae32ff9c82);
79
2
    c->h[3] = U64(0x679dd514582f9fcf);
80
2
    c->h[4] = U64(0x0f6d2b697bd44da8);
81
2
    c->h[5] = U64(0x77e36f7304c48942);
82
2
    c->h[6] = U64(0x3f9d85a86a1d36c8);
83
2
    c->h[7] = U64(0x1112e6ad91d692a1);
84
85
2
    c->Nl = 0;
86
2
    c->Nh = 0;
87
2
    c->num = 0;
88
2
    c->md_len = SHA224_DIGEST_LENGTH;
89
2
    return 1;
90
2
}
91
92
int sha512_256_init(SHA512_CTX *c)
93
2
{
94
2
    c->h[0] = U64(0x22312194fc2bf72c);
95
2
    c->h[1] = U64(0x9f555fa3c84c64c2);
96
2
    c->h[2] = U64(0x2393b86b6f53b151);
97
2
    c->h[3] = U64(0x963877195940eabd);
98
2
    c->h[4] = U64(0x96283ee2a88effe3);
99
2
    c->h[5] = U64(0xbe5e1e2553863992);
100
2
    c->h[6] = U64(0x2b0199fc2c85b8aa);
101
2
    c->h[7] = U64(0x0eb72ddc81c52ca2);
102
103
2
    c->Nl = 0;
104
2
    c->Nh = 0;
105
2
    c->num = 0;
106
2
    c->md_len = SHA256_DIGEST_LENGTH;
107
2
    return 1;
108
2
}
109
110
int SHA384_Init(SHA512_CTX *c)
111
26
{
112
26
    c->h[0] = U64(0xcbbb9d5dc1059ed8);
113
26
    c->h[1] = U64(0x629a292a367cd507);
114
26
    c->h[2] = U64(0x9159015a3070dd17);
115
26
    c->h[3] = U64(0x152fecd8f70e5939);
116
26
    c->h[4] = U64(0x67332667ffc00b31);
117
26
    c->h[5] = U64(0x8eb44a8768581511);
118
26
    c->h[6] = U64(0xdb0c2e0d64f98fa7);
119
26
    c->h[7] = U64(0x47b5481dbefa4fa4);
120
121
26
    c->Nl = 0;
122
26
    c->Nh = 0;
123
26
    c->num = 0;
124
26
    c->md_len = SHA384_DIGEST_LENGTH;
125
26
    return 1;
126
26
}
127
128
int SHA512_Init(SHA512_CTX *c)
129
6.53k
{
130
6.53k
    c->h[0] = U64(0x6a09e667f3bcc908);
131
6.53k
    c->h[1] = U64(0xbb67ae8584caa73b);
132
6.53k
    c->h[2] = U64(0x3c6ef372fe94f82b);
133
6.53k
    c->h[3] = U64(0xa54ff53a5f1d36f1);
134
6.53k
    c->h[4] = U64(0x510e527fade682d1);
135
6.53k
    c->h[5] = U64(0x9b05688c2b3e6c1f);
136
6.53k
    c->h[6] = U64(0x1f83d9abfb41bd6b);
137
6.53k
    c->h[7] = U64(0x5be0cd19137e2179);
138
139
6.53k
    c->Nl = 0;
140
6.53k
    c->Nh = 0;
141
6.53k
    c->num = 0;
142
6.53k
    c->md_len = SHA512_DIGEST_LENGTH;
143
6.53k
    return 1;
144
6.53k
}
145
146
#ifndef SHA512_ASM
147
static
148
#else
149
#ifdef INCLUDE_C_SHA512
150
void sha512_block_data_order_c(SHA512_CTX *ctx, const void *in, size_t num);
151
#endif
152
#endif
153
    void sha512_block_data_order(SHA512_CTX *ctx, const void *in, size_t num);
154
155
int SHA512_Final(unsigned char *md, SHA512_CTX *c)
156
3.28k
{
157
3.28k
    unsigned char *p = (unsigned char *)c->u.p;
158
3.28k
    size_t n = c->num;
159
160
3.28k
    p[n] = 0x80; /* There always is a room for one */
161
3.28k
    n++;
162
3.28k
    if (n > (sizeof(c->u) - 16)) {
163
43
        memset(p + n, 0, sizeof(c->u) - n);
164
43
        n = 0;
165
43
        sha512_block_data_order(c, p, 1);
166
43
    }
167
168
3.28k
    memset(p + n, 0, sizeof(c->u) - 16 - n);
169
#ifdef B_ENDIAN
170
    c->u.d[SHA_LBLOCK - 2] = c->Nh;
171
    c->u.d[SHA_LBLOCK - 1] = c->Nl;
172
#else
173
3.28k
    p[sizeof(c->u) - 1] = (unsigned char)(c->Nl);
174
3.28k
    p[sizeof(c->u) - 2] = (unsigned char)(c->Nl >> 8);
175
3.28k
    p[sizeof(c->u) - 3] = (unsigned char)(c->Nl >> 16);
176
3.28k
    p[sizeof(c->u) - 4] = (unsigned char)(c->Nl >> 24);
177
3.28k
    p[sizeof(c->u) - 5] = (unsigned char)(c->Nl >> 32);
178
3.28k
    p[sizeof(c->u) - 6] = (unsigned char)(c->Nl >> 40);
179
3.28k
    p[sizeof(c->u) - 7] = (unsigned char)(c->Nl >> 48);
180
3.28k
    p[sizeof(c->u) - 8] = (unsigned char)(c->Nl >> 56);
181
3.28k
    p[sizeof(c->u) - 9] = (unsigned char)(c->Nh);
182
3.28k
    p[sizeof(c->u) - 10] = (unsigned char)(c->Nh >> 8);
183
3.28k
    p[sizeof(c->u) - 11] = (unsigned char)(c->Nh >> 16);
184
3.28k
    p[sizeof(c->u) - 12] = (unsigned char)(c->Nh >> 24);
185
3.28k
    p[sizeof(c->u) - 13] = (unsigned char)(c->Nh >> 32);
186
3.28k
    p[sizeof(c->u) - 14] = (unsigned char)(c->Nh >> 40);
187
3.28k
    p[sizeof(c->u) - 15] = (unsigned char)(c->Nh >> 48);
188
3.28k
    p[sizeof(c->u) - 16] = (unsigned char)(c->Nh >> 56);
189
3.28k
#endif
190
191
3.28k
    sha512_block_data_order(c, p, 1);
192
193
3.28k
    if (md == 0)
194
0
        return 0;
195
196
3.28k
    switch (c->md_len) {
197
    /* Let compiler decide if it's appropriate to unroll... */
198
1
    case SHA224_DIGEST_LENGTH:
199
4
        for (n = 0; n < SHA224_DIGEST_LENGTH / 8; n++) {
200
3
            SHA_LONG64 t = c->h[n];
201
202
3
            *(md++) = (unsigned char)(t >> 56);
203
3
            *(md++) = (unsigned char)(t >> 48);
204
3
            *(md++) = (unsigned char)(t >> 40);
205
3
            *(md++) = (unsigned char)(t >> 32);
206
3
            *(md++) = (unsigned char)(t >> 24);
207
3
            *(md++) = (unsigned char)(t >> 16);
208
3
            *(md++) = (unsigned char)(t >> 8);
209
3
            *(md++) = (unsigned char)(t);
210
3
        }
211
        /*
212
         * For 224 bits, there are four bytes left over that have to be
213
         * processed separately.
214
         */
215
1
        {
216
1
            SHA_LONG64 t = c->h[SHA224_DIGEST_LENGTH / 8];
217
218
1
            *(md++) = (unsigned char)(t >> 56);
219
1
            *(md++) = (unsigned char)(t >> 48);
220
1
            *(md++) = (unsigned char)(t >> 40);
221
1
            *(md++) = (unsigned char)(t >> 32);
222
1
        }
223
1
        break;
224
1
    case SHA256_DIGEST_LENGTH:
225
5
        for (n = 0; n < SHA256_DIGEST_LENGTH / 8; n++) {
226
4
            SHA_LONG64 t = c->h[n];
227
228
4
            *(md++) = (unsigned char)(t >> 56);
229
4
            *(md++) = (unsigned char)(t >> 48);
230
4
            *(md++) = (unsigned char)(t >> 40);
231
4
            *(md++) = (unsigned char)(t >> 32);
232
4
            *(md++) = (unsigned char)(t >> 24);
233
4
            *(md++) = (unsigned char)(t >> 16);
234
4
            *(md++) = (unsigned char)(t >> 8);
235
4
            *(md++) = (unsigned char)(t);
236
4
        }
237
1
        break;
238
13
    case SHA384_DIGEST_LENGTH:
239
91
        for (n = 0; n < SHA384_DIGEST_LENGTH / 8; n++) {
240
78
            SHA_LONG64 t = c->h[n];
241
242
78
            *(md++) = (unsigned char)(t >> 56);
243
78
            *(md++) = (unsigned char)(t >> 48);
244
78
            *(md++) = (unsigned char)(t >> 40);
245
78
            *(md++) = (unsigned char)(t >> 32);
246
78
            *(md++) = (unsigned char)(t >> 24);
247
78
            *(md++) = (unsigned char)(t >> 16);
248
78
            *(md++) = (unsigned char)(t >> 8);
249
78
            *(md++) = (unsigned char)(t);
250
78
        }
251
13
        break;
252
3.26k
    case SHA512_DIGEST_LENGTH:
253
29.3k
        for (n = 0; n < SHA512_DIGEST_LENGTH / 8; n++) {
254
26.1k
            SHA_LONG64 t = c->h[n];
255
256
26.1k
            *(md++) = (unsigned char)(t >> 56);
257
26.1k
            *(md++) = (unsigned char)(t >> 48);
258
26.1k
            *(md++) = (unsigned char)(t >> 40);
259
26.1k
            *(md++) = (unsigned char)(t >> 32);
260
26.1k
            *(md++) = (unsigned char)(t >> 24);
261
26.1k
            *(md++) = (unsigned char)(t >> 16);
262
26.1k
            *(md++) = (unsigned char)(t >> 8);
263
26.1k
            *(md++) = (unsigned char)(t);
264
26.1k
        }
265
3.26k
        break;
266
    /* ... as well as make sure md_len is not abused. */
267
0
    default:
268
0
        return 0;
269
3.28k
    }
270
271
3.28k
    return 1;
272
3.28k
}
273
274
int SHA384_Final(unsigned char *md, SHA512_CTX *c)
275
13
{
276
13
    return SHA512_Final(md, c);
277
13
}
278
279
int SHA512_Update(SHA512_CTX *c, const void *_data, size_t len)
280
6.56k
{
281
6.56k
    SHA_LONG64 l;
282
6.56k
    unsigned char *p = c->u.p;
283
6.56k
    const unsigned char *data = (const unsigned char *)_data;
284
285
6.56k
    if (len == 0)
286
0
        return 1;
287
288
6.56k
    l = (c->Nl + (((SHA_LONG64)len) << 3)) & U64(0xffffffffffffffff);
289
6.56k
    if (l < c->Nl)
290
0
        c->Nh++;
291
6.56k
    if (sizeof(len) >= 8)
292
6.56k
        c->Nh += (((SHA_LONG64)len) >> 61);
293
6.56k
    c->Nl = l;
294
295
6.56k
    if (c->num != 0) {
296
0
        size_t n = sizeof(c->u) - c->num;
297
298
0
        if (len < n) {
299
0
            memcpy(p + c->num, data, len), c->num += (unsigned int)len;
300
0
            return 1;
301
0
        } else {
302
0
            memcpy(p + c->num, data, n), c->num = 0;
303
0
            len -= n, data += n;
304
0
            sha512_block_data_order(c, p, 1);
305
0
        }
306
0
    }
307
308
6.56k
    if (len >= sizeof(c->u)) {
309
#ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
310
        if ((size_t)data % sizeof(c->u.d[0]) != 0)
311
            while (len >= sizeof(c->u))
312
                memcpy(p, data, sizeof(c->u)),
313
                    sha512_block_data_order(c, p, 1),
314
                    len -= sizeof(c->u), data += sizeof(c->u);
315
        else
316
#endif
317
6.56k
            sha512_block_data_order(c, data, len / sizeof(c->u)),
318
6.56k
                data += len, len %= sizeof(c->u), data -= len;
319
6.56k
    }
320
321
6.56k
    if (len != 0)
322
87
        memcpy(p, data, len), c->num = (int)len;
323
324
6.56k
    return 1;
325
6.56k
}
326
327
int SHA384_Update(SHA512_CTX *c, const void *data, size_t len)
328
26
{
329
26
    return SHA512_Update(c, data, len);
330
26
}
331
332
void SHA512_Transform(SHA512_CTX *c, const unsigned char *data)
333
0
{
334
#ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
335
    if ((size_t)data % sizeof(c->u.d[0]) != 0)
336
        memcpy(c->u.p, data, sizeof(c->u.p)), data = c->u.p;
337
#endif
338
0
    sha512_block_data_order(c, data, 1);
339
0
}
340
341
#if !defined(SHA512_ASM) || defined(INCLUDE_C_SHA512)
342
static const SHA_LONG64 K512[80] = {
343
    U64(0x428a2f98d728ae22), U64(0x7137449123ef65cd),
344
    U64(0xb5c0fbcfec4d3b2f), U64(0xe9b5dba58189dbbc),
345
    U64(0x3956c25bf348b538), U64(0x59f111f1b605d019),
346
    U64(0x923f82a4af194f9b), U64(0xab1c5ed5da6d8118),
347
    U64(0xd807aa98a3030242), U64(0x12835b0145706fbe),
348
    U64(0x243185be4ee4b28c), U64(0x550c7dc3d5ffb4e2),
349
    U64(0x72be5d74f27b896f), U64(0x80deb1fe3b1696b1),
350
    U64(0x9bdc06a725c71235), U64(0xc19bf174cf692694),
351
    U64(0xe49b69c19ef14ad2), U64(0xefbe4786384f25e3),
352
    U64(0x0fc19dc68b8cd5b5), U64(0x240ca1cc77ac9c65),
353
    U64(0x2de92c6f592b0275), U64(0x4a7484aa6ea6e483),
354
    U64(0x5cb0a9dcbd41fbd4), U64(0x76f988da831153b5),
355
    U64(0x983e5152ee66dfab), U64(0xa831c66d2db43210),
356
    U64(0xb00327c898fb213f), U64(0xbf597fc7beef0ee4),
357
    U64(0xc6e00bf33da88fc2), U64(0xd5a79147930aa725),
358
    U64(0x06ca6351e003826f), U64(0x142929670a0e6e70),
359
    U64(0x27b70a8546d22ffc), U64(0x2e1b21385c26c926),
360
    U64(0x4d2c6dfc5ac42aed), U64(0x53380d139d95b3df),
361
    U64(0x650a73548baf63de), U64(0x766a0abb3c77b2a8),
362
    U64(0x81c2c92e47edaee6), U64(0x92722c851482353b),
363
    U64(0xa2bfe8a14cf10364), U64(0xa81a664bbc423001),
364
    U64(0xc24b8b70d0f89791), U64(0xc76c51a30654be30),
365
    U64(0xd192e819d6ef5218), U64(0xd69906245565a910),
366
    U64(0xf40e35855771202a), U64(0x106aa07032bbd1b8),
367
    U64(0x19a4c116b8d2d0c8), U64(0x1e376c085141ab53),
368
    U64(0x2748774cdf8eeb99), U64(0x34b0bcb5e19b48a8),
369
    U64(0x391c0cb3c5c95a63), U64(0x4ed8aa4ae3418acb),
370
    U64(0x5b9cca4f7763e373), U64(0x682e6ff3d6b2b8a3),
371
    U64(0x748f82ee5defb2fc), U64(0x78a5636f43172f60),
372
    U64(0x84c87814a1f0ab72), U64(0x8cc702081a6439ec),
373
    U64(0x90befffa23631e28), U64(0xa4506cebde82bde9),
374
    U64(0xbef9a3f7b2c67915), U64(0xc67178f2e372532b),
375
    U64(0xca273eceea26619c), U64(0xd186b8c721c0c207),
376
    U64(0xeada7dd6cde0eb1e), U64(0xf57d4f7fee6ed178),
377
    U64(0x06f067aa72176fba), U64(0x0a637dc5a2c898a6),
378
    U64(0x113f9804bef90dae), U64(0x1b710b35131c471b),
379
    U64(0x28db77f523047d84), U64(0x32caab7b40c72493),
380
    U64(0x3c9ebe0a15c9bebc), U64(0x431d67c49c100d4c),
381
    U64(0x4cc5d4becb3e42b6), U64(0x597f299cfc657e2a),
382
    U64(0x5fcb6fab3ad6faec), U64(0x6c44198c4a475817)
383
};
384
385
#ifndef PEDANTIC
386
#if defined(__GNUC__) && __GNUC__ >= 2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
387
#if defined(__x86_64) || defined(__x86_64__)
388
#define ROTR(a, n) ({ SHA_LONG64 ret;             \
389
                                asm ("rorq %1,%0"       \
390
                                : "=r"(ret)             \
391
                                : "J"(n),"0"(a)         \
392
                                : "cc"); ret; })
393
#if !defined(B_ENDIAN)
394
#define PULL64(x) ({ SHA_LONG64 ret=*((const SHA_LONG64 *)(&(x)));  \
395
                                asm ("bswapq    %0"             \
396
                                : "=r"(ret)                     \
397
                                : "0"(ret)); ret; })
398
#endif
399
#elif (defined(__i386) || defined(__i386__)) && !defined(B_ENDIAN)
400
#if defined(I386_ONLY)
401
#define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
402
                          unsigned int hi=p[0],lo=p[1];          \
403
                                asm("xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
404
                                    "roll $16,%%eax; roll $16,%%edx; "\
405
                                    "xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
406
                                : "=a"(lo),"=d"(hi)             \
407
                                : "0"(lo),"1"(hi) : "cc");      \
408
                                ((SHA_LONG64)hi)<<32|lo; })
409
#else
410
#define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
411
                          unsigned int hi=p[0],lo=p[1];         \
412
                                asm ("bswapl %0; bswapl %1;"    \
413
                                : "=r"(lo),"=r"(hi)             \
414
                                : "0"(lo),"1"(hi));             \
415
                                ((SHA_LONG64)hi)<<32|lo; })
416
#endif
417
#elif (defined(_ARCH_PPC) && defined(__64BIT__)) || defined(_ARCH_PPC64)
418
#define ROTR(a, n) ({ SHA_LONG64 ret;             \
419
                                asm ("rotrdi %0,%1,%2"  \
420
                                : "=r"(ret)             \
421
                                : "r"(a),"K"(n)); ret; })
422
#elif defined(__aarch64__)
423
#define ROTR(a, n) ({ SHA_LONG64 ret;             \
424
                                asm ("ror %0,%1,%2"     \
425
                                : "=r"(ret)             \
426
                                : "r"(a),"I"(n)); ret; })
427
#if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
428
#define PULL64(x) ({ SHA_LONG64 ret;                     \
429
                                asm ("rev       %0,%1"          \
430
                                : "=r"(ret)                     \
431
                                : "r"(*((const SHA_LONG64 *)(&(x))))); ret; })
432
#endif
433
#elif (defined(__riscv_zbkb) || defined(__riscv_zbb)) && __riscv_xlen == 32
434
#define PULL64(x) ({ SHA_LONG64 ret;                                        \
435
                        unsigned int *r = (unsigned int *)(&(ret));             \
436
                        const unsigned int *p = (const unsigned int *)(&(x));   \
437
                        asm ("rev8 %0, %1"                                      \
438
                        : "=r"(r[0])                                            \
439
                        : "r" (p[1]));                                          \
440
                        asm ("rev8 %0, %1"                                      \
441
                        : "=r"(r[1])                                            \
442
                        : "r" (p[0])); ret; })
443
#elif (defined(__riscv_zbkb) || defined(__riscv_zbb)) && __riscv_xlen == 64
444
#define PULL64(x) ({ SHA_LONG64 ret;    \
445
                        asm ("rev8 %0, %1"  \
446
                        : "=r"(ret)         \
447
                        : "r"(x)); ret; })
448
#endif
449
#if defined(__riscv_zknh) && __riscv_xlen == 32
450
#define Sigma0(x) ({ SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret));    \
451
                        const unsigned int *p = (const unsigned int *)(&(x));           \
452
                        asm ("sha512sum0r %0, %1, %2"                                   \
453
                        : "=r"(r[0])                                                    \
454
                        : "r" (p[0]), "r" (p[1]));                                      \
455
                        asm ("sha512sum0r %0, %2, %1"                                   \
456
                        : "=r"(r[1])                                                    \
457
                        : "r" (p[0]), "r" (p[1])); ret; })
458
#define Sigma1(x) ({ SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret));    \
459
                        const unsigned int *p = (const unsigned int *)(&(x));           \
460
                        asm ("sha512sum1r %0, %1, %2"                                   \
461
                        : "=r"(r[0])                                                    \
462
                        : "r" (p[0]), "r" (p[1]));                                      \
463
                        asm ("sha512sum1r %0, %2, %1"                                   \
464
                        : "=r"(r[1])                                                    \
465
                        : "r" (p[0]), "r" (p[1])); ret; })
466
#define sigma0(x) ({ SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret));    \
467
                        const unsigned int *p = (const unsigned int *)(&(x));           \
468
                        asm ("sha512sig0l %0, %1, %2"                                   \
469
                        : "=r"(r[0])                                                    \
470
                        : "r" (p[0]), "r" (p[1]));                                      \
471
                        asm ("sha512sig0h %0, %2, %1"                                   \
472
                        : "=r"(r[1])                                                    \
473
                        : "r" (p[0]), "r" (p[1])); ret; })
474
#define sigma1(x) ({ SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret));    \
475
                        const unsigned int *p = (const unsigned int *)(&(x));           \
476
                        asm ("sha512sig1l %0, %1, %2"                                   \
477
                        : "=r"(r[0])                                                    \
478
                        : "r" (p[0]), "r" (p[1]));                                      \
479
                        asm ("sha512sig1h %0, %2, %1"                                   \
480
                        : "=r"(r[1])                                                    \
481
                        : "r" (p[0]), "r" (p[1])); ret; })
482
#elif defined(__riscv_zknh) && __riscv_xlen == 64
483
#define Sigma0(x) ({ SHA_LONG64 ret;            \
484
                        asm ("sha512sum0 %0, %1"    \
485
                        : "=r"(ret)                 \
486
                        : "r"(x)); ret; })
487
#define Sigma1(x) ({ SHA_LONG64 ret;            \
488
                        asm ("sha512sum1 %0, %1"    \
489
                        : "=r"(ret)                 \
490
                        : "r"(x)); ret; })
491
#define sigma0(x) ({ SHA_LONG64 ret;            \
492
                        asm ("sha512sig0 %0, %1"    \
493
                        : "=r"(ret)                 \
494
                        : "r"(x)); ret; })
495
#define sigma1(x) ({ SHA_LONG64 ret;            \
496
                        asm ("sha512sig1 %0, %1"    \
497
                        : "=r"(ret)                 \
498
                        : "r"(x)); ret; })
499
#endif
500
#if (defined(__riscv_zbt) || defined(__riscv_zpn)) && __riscv_xlen == 32
501
#define Ch(x, y, z) ({  SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret));   \
502
                        const unsigned int *xp = (const unsigned int *)(&(x));          \
503
                        const unsigned int *yp = (const unsigned int *)(&(y));          \
504
                        const unsigned int *zp = (const unsigned int *)(&(z));          \
505
                        asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3\n\t"                \
506
                        : "=r"(r[0])                                                    \
507
                        : "r"(xp[0]), "r"(yp[0]), "r"(zp[0]));                          \
508
                        asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3\n\t"                \
509
                        : "=r"(r[1])                                                    \
510
                        : "r"(xp[1]), "r"(yp[1]), "r"(zp[1])); ret; })
511
#define Maj(x, y, z) ({ SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret));   \
512
                        const unsigned int *xp = (const unsigned int *)(&(x));          \
513
                        const unsigned int *yp = (const unsigned int *)(&(y));          \
514
                        const unsigned int *zp = (const unsigned int *)(&(z));          \
515
                        asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3\n\t"                \
516
                        : "=r"(r[0])                                                    \
517
                        : "r"(xp[0]^zp[0]), "r"(yp[0]), "r"(zp[0]));                    \
518
                        asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3\n\t"                \
519
                        : "=r"(r[1])                                                    \
520
                        : "r"(xp[1]^zp[1]), "r"(yp[1]), "r"(zp[1])); ret; })
521
#elif (defined(__riscv_zbt) || defined(__riscv_zpn)) && __riscv_xlen == 64
522
#define Ch(x, y, z) ({  SHA_LONG64 ret;                           \
523
                        asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3"\
524
                        : "=r"(ret)                                 \
525
                        : "r"(x), "r"(y), "r"(z)); ret; })
526
#define Maj(x, y, z) ({ SHA_LONG64 ret;                           \
527
                        asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3"\
528
                        : "=r"(ret)                                 \
529
                        : "r"(x^z), "r"(y), "r"(x)); ret; })
530
#endif
531
#elif defined(_MSC_VER)
532
#if defined(_WIN64) /* applies to both IA-64 and AMD64 */
533
#pragma intrinsic(_rotr64)
534
#define ROTR(a, n) _rotr64((a), n)
535
#endif
536
#if defined(_M_IX86) && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
537
#if defined(I386_ONLY)
538
static SHA_LONG64 __fastcall __pull64be(const void *x)
539
{
540
    _asm mov edx, [ecx + 0] _asm mov eax, [ecx + 4] _asm xchg dh, dl _asm xchg ah, al _asm rol edx, 16 _asm rol eax, 16 _asm xchg dh, dl _asm xchg ah, al
541
}
542
#else
543
static SHA_LONG64 __fastcall __pull64be(const void *x) {
544
    _asm mov edx, [ecx + 0] _asm mov eax, [ecx + 4] _asm bswap edx _asm bswap eax
545
}
546
#endif
547
#define PULL64(x) __pull64be(&(x))
548
#endif
549
#endif
550
#endif
551
#ifndef PULL64
552
443M
#define B(x, j) (((SHA_LONG64)(*(((const unsigned char *)(&x)) + j))) << ((7 - j) * 8))
553
55.4M
#define PULL64(x) (B(x, 0) | B(x, 1) | B(x, 2) | B(x, 3) | B(x, 4) | B(x, 5) | B(x, 6) | B(x, 7))
554
#endif
555
#ifndef ROTR
556
2.55G
#define ROTR(x, s) (((x) >> s) | (x) << (64 - s))
557
#endif
558
#ifndef Sigma0
559
277M
#define Sigma0(x) (ROTR((x), 28) ^ ROTR((x), 34) ^ ROTR((x), 39))
560
#endif
561
#ifndef Sigma1
562
277M
#define Sigma1(x) (ROTR((x), 14) ^ ROTR((x), 18) ^ ROTR((x), 41))
563
#endif
564
#ifndef sigma0
565
221M
#define sigma0(x) (ROTR((x), 1) ^ ROTR((x), 8) ^ ((x) >> 7))
566
#endif
567
#ifndef sigma1
568
221M
#define sigma1(x) (ROTR((x), 19) ^ ROTR((x), 61) ^ ((x) >> 6))
569
#endif
570
#ifndef Ch
571
277M
#define Ch(x, y, z) (((x) & (y)) ^ ((~(x)) & (z)))
572
#endif
573
#ifndef Maj
574
277M
#define Maj(x, y, z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
575
#endif
576
577
#if defined(__i386) || defined(__i386__) || defined(_M_IX86)
578
/*
579
 * This code should give better results on 32-bit CPU with less than
580
 * ~24 registers, both size and performance wise...
581
 */
582
583
static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
584
    size_t num)
585
{
586
    const SHA_LONG64 *W = in;
587
    SHA_LONG64 A, E, T;
588
    SHA_LONG64 X[9 + 80], *F;
589
    int i;
590
591
    while (num--) {
592
593
        F = X + 80;
594
        A = ctx->h[0];
595
        F[1] = ctx->h[1];
596
        F[2] = ctx->h[2];
597
        F[3] = ctx->h[3];
598
        E = ctx->h[4];
599
        F[5] = ctx->h[5];
600
        F[6] = ctx->h[6];
601
        F[7] = ctx->h[7];
602
603
        for (i = 0; i < 16; i++, F--) {
604
#ifdef B_ENDIAN
605
            T = W[i];
606
#else
607
            T = PULL64(W[i]);
608
#endif
609
            F[0] = A;
610
            F[4] = E;
611
            F[8] = T;
612
            T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
613
            E = F[3] + T;
614
            A = T + Sigma0(A) + Maj(A, F[1], F[2]);
615
        }
616
617
        for (; i < 80; i++, F--) {
618
            T = sigma0(F[8 + 16 - 1]);
619
            T += sigma1(F[8 + 16 - 14]);
620
            T += F[8 + 16] + F[8 + 16 - 9];
621
622
            F[0] = A;
623
            F[4] = E;
624
            F[8] = T;
625
            T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
626
            E = F[3] + T;
627
            A = T + Sigma0(A) + Maj(A, F[1], F[2]);
628
        }
629
630
        ctx->h[0] += A;
631
        ctx->h[1] += F[1];
632
        ctx->h[2] += F[2];
633
        ctx->h[3] += F[3];
634
        ctx->h[4] += E;
635
        ctx->h[5] += F[5];
636
        ctx->h[6] += F[6];
637
        ctx->h[7] += F[7];
638
639
        W += SHA_LBLOCK;
640
    }
641
}
642
643
#elif defined(OPENSSL_SMALL_FOOTPRINT)
644
645
static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
646
    size_t num)
647
{
648
    const SHA_LONG64 *W = in;
649
    SHA_LONG64 a, b, c, d, e, f, g, h, s0, s1, T1, T2;
650
    SHA_LONG64 X[16];
651
    int i;
652
653
    while (num--) {
654
655
        a = ctx->h[0];
656
        b = ctx->h[1];
657
        c = ctx->h[2];
658
        d = ctx->h[3];
659
        e = ctx->h[4];
660
        f = ctx->h[5];
661
        g = ctx->h[6];
662
        h = ctx->h[7];
663
664
        for (i = 0; i < 16; i++) {
665
#ifdef B_ENDIAN
666
            T1 = X[i] = W[i];
667
#else
668
            T1 = X[i] = PULL64(W[i]);
669
#endif
670
            T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i];
671
            T2 = Sigma0(a) + Maj(a, b, c);
672
            h = g;
673
            g = f;
674
            f = e;
675
            e = d + T1;
676
            d = c;
677
            c = b;
678
            b = a;
679
            a = T1 + T2;
680
        }
681
682
        for (; i < 80; i++) {
683
            s0 = X[(i + 1) & 0x0f];
684
            s0 = sigma0(s0);
685
            s1 = X[(i + 14) & 0x0f];
686
            s1 = sigma1(s1);
687
688
            T1 = X[i & 0xf] += s0 + s1 + X[(i + 9) & 0xf];
689
            T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i];
690
            T2 = Sigma0(a) + Maj(a, b, c);
691
            h = g;
692
            g = f;
693
            f = e;
694
            e = d + T1;
695
            d = c;
696
            c = b;
697
            b = a;
698
            a = T1 + T2;
699
        }
700
701
        ctx->h[0] += a;
702
        ctx->h[1] += b;
703
        ctx->h[2] += c;
704
        ctx->h[3] += d;
705
        ctx->h[4] += e;
706
        ctx->h[5] += f;
707
        ctx->h[6] += g;
708
        ctx->h[7] += h;
709
710
        W += SHA_LBLOCK;
711
    }
712
}
713
714
#else
715
#define ROUND_00_15(i, a, b, c, d, e, f, g, h)       \
716
277M
    do {                                             \
717
277M
        T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i]; \
718
277M
        h = Sigma0(a) + Maj(a, b, c);                \
719
277M
        d += T1;                                     \
720
277M
        h += T1;                                     \
721
277M
    } while (0)
722
723
#define ROUND_16_80(i, j, a, b, c, d, e, f, g, h, X)       \
724
221M
    do {                                                   \
725
221M
        s0 = X[(j + 1) & 0x0f];                            \
726
221M
        s0 = sigma0(s0);                                   \
727
221M
        s1 = X[(j + 14) & 0x0f];                           \
728
221M
        s1 = sigma1(s1);                                   \
729
221M
        T1 = X[(j) & 0x0f] += s0 + s1 + X[(j + 9) & 0x0f]; \
730
221M
        ROUND_00_15(i + j, a, b, c, d, e, f, g, h);        \
731
221M
    } while (0)
732
733
#ifdef INCLUDE_C_SHA512
734
void sha512_block_data_order_c(SHA512_CTX *ctx, const void *in, size_t num)
735
#else
736
static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
737
    size_t num)
738
#endif
739
9.88k
{
740
9.88k
    const SHA_LONG64 *W = in;
741
9.88k
    SHA_LONG64 a, b, c, d, e, f, g, h, s0, s1, T1;
742
9.88k
    SHA_LONG64 X[16];
743
9.88k
    int i;
744
745
3.47M
    while (num--) {
746
747
3.46M
        a = ctx->h[0];
748
3.46M
        b = ctx->h[1];
749
3.46M
        c = ctx->h[2];
750
3.46M
        d = ctx->h[3];
751
3.46M
        e = ctx->h[4];
752
3.46M
        f = ctx->h[5];
753
3.46M
        g = ctx->h[6];
754
3.46M
        h = ctx->h[7];
755
756
#ifdef B_ENDIAN
757
        T1 = X[0] = W[0];
758
        ROUND_00_15(0, a, b, c, d, e, f, g, h);
759
        T1 = X[1] = W[1];
760
        ROUND_00_15(1, h, a, b, c, d, e, f, g);
761
        T1 = X[2] = W[2];
762
        ROUND_00_15(2, g, h, a, b, c, d, e, f);
763
        T1 = X[3] = W[3];
764
        ROUND_00_15(3, f, g, h, a, b, c, d, e);
765
        T1 = X[4] = W[4];
766
        ROUND_00_15(4, e, f, g, h, a, b, c, d);
767
        T1 = X[5] = W[5];
768
        ROUND_00_15(5, d, e, f, g, h, a, b, c);
769
        T1 = X[6] = W[6];
770
        ROUND_00_15(6, c, d, e, f, g, h, a, b);
771
        T1 = X[7] = W[7];
772
        ROUND_00_15(7, b, c, d, e, f, g, h, a);
773
        T1 = X[8] = W[8];
774
        ROUND_00_15(8, a, b, c, d, e, f, g, h);
775
        T1 = X[9] = W[9];
776
        ROUND_00_15(9, h, a, b, c, d, e, f, g);
777
        T1 = X[10] = W[10];
778
        ROUND_00_15(10, g, h, a, b, c, d, e, f);
779
        T1 = X[11] = W[11];
780
        ROUND_00_15(11, f, g, h, a, b, c, d, e);
781
        T1 = X[12] = W[12];
782
        ROUND_00_15(12, e, f, g, h, a, b, c, d);
783
        T1 = X[13] = W[13];
784
        ROUND_00_15(13, d, e, f, g, h, a, b, c);
785
        T1 = X[14] = W[14];
786
        ROUND_00_15(14, c, d, e, f, g, h, a, b);
787
        T1 = X[15] = W[15];
788
        ROUND_00_15(15, b, c, d, e, f, g, h, a);
789
#else
790
3.46M
        T1 = X[0] = PULL64(W[0]);
791
3.46M
        ROUND_00_15(0, a, b, c, d, e, f, g, h);
792
3.46M
        T1 = X[1] = PULL64(W[1]);
793
3.46M
        ROUND_00_15(1, h, a, b, c, d, e, f, g);
794
3.46M
        T1 = X[2] = PULL64(W[2]);
795
3.46M
        ROUND_00_15(2, g, h, a, b, c, d, e, f);
796
3.46M
        T1 = X[3] = PULL64(W[3]);
797
3.46M
        ROUND_00_15(3, f, g, h, a, b, c, d, e);
798
3.46M
        T1 = X[4] = PULL64(W[4]);
799
3.46M
        ROUND_00_15(4, e, f, g, h, a, b, c, d);
800
3.46M
        T1 = X[5] = PULL64(W[5]);
801
3.46M
        ROUND_00_15(5, d, e, f, g, h, a, b, c);
802
3.46M
        T1 = X[6] = PULL64(W[6]);
803
3.46M
        ROUND_00_15(6, c, d, e, f, g, h, a, b);
804
3.46M
        T1 = X[7] = PULL64(W[7]);
805
3.46M
        ROUND_00_15(7, b, c, d, e, f, g, h, a);
806
3.46M
        T1 = X[8] = PULL64(W[8]);
807
3.46M
        ROUND_00_15(8, a, b, c, d, e, f, g, h);
808
3.46M
        T1 = X[9] = PULL64(W[9]);
809
3.46M
        ROUND_00_15(9, h, a, b, c, d, e, f, g);
810
3.46M
        T1 = X[10] = PULL64(W[10]);
811
3.46M
        ROUND_00_15(10, g, h, a, b, c, d, e, f);
812
3.46M
        T1 = X[11] = PULL64(W[11]);
813
3.46M
        ROUND_00_15(11, f, g, h, a, b, c, d, e);
814
3.46M
        T1 = X[12] = PULL64(W[12]);
815
3.46M
        ROUND_00_15(12, e, f, g, h, a, b, c, d);
816
3.46M
        T1 = X[13] = PULL64(W[13]);
817
3.46M
        ROUND_00_15(13, d, e, f, g, h, a, b, c);
818
3.46M
        T1 = X[14] = PULL64(W[14]);
819
3.46M
        ROUND_00_15(14, c, d, e, f, g, h, a, b);
820
3.46M
        T1 = X[15] = PULL64(W[15]);
821
3.46M
        ROUND_00_15(15, b, c, d, e, f, g, h, a);
822
3.46M
#endif
823
824
17.3M
        for (i = 16; i < 80; i += 16) {
825
13.8M
            ROUND_16_80(i, 0, a, b, c, d, e, f, g, h, X);
826
13.8M
            ROUND_16_80(i, 1, h, a, b, c, d, e, f, g, X);
827
13.8M
            ROUND_16_80(i, 2, g, h, a, b, c, d, e, f, X);
828
13.8M
            ROUND_16_80(i, 3, f, g, h, a, b, c, d, e, X);
829
13.8M
            ROUND_16_80(i, 4, e, f, g, h, a, b, c, d, X);
830
13.8M
            ROUND_16_80(i, 5, d, e, f, g, h, a, b, c, X);
831
13.8M
            ROUND_16_80(i, 6, c, d, e, f, g, h, a, b, X);
832
13.8M
            ROUND_16_80(i, 7, b, c, d, e, f, g, h, a, X);
833
13.8M
            ROUND_16_80(i, 8, a, b, c, d, e, f, g, h, X);
834
13.8M
            ROUND_16_80(i, 9, h, a, b, c, d, e, f, g, X);
835
13.8M
            ROUND_16_80(i, 10, g, h, a, b, c, d, e, f, X);
836
13.8M
            ROUND_16_80(i, 11, f, g, h, a, b, c, d, e, X);
837
13.8M
            ROUND_16_80(i, 12, e, f, g, h, a, b, c, d, X);
838
13.8M
            ROUND_16_80(i, 13, d, e, f, g, h, a, b, c, X);
839
13.8M
            ROUND_16_80(i, 14, c, d, e, f, g, h, a, b, X);
840
13.8M
            ROUND_16_80(i, 15, b, c, d, e, f, g, h, a, X);
841
13.8M
        }
842
843
3.46M
        ctx->h[0] += a;
844
3.46M
        ctx->h[1] += b;
845
3.46M
        ctx->h[2] += c;
846
3.46M
        ctx->h[3] += d;
847
3.46M
        ctx->h[4] += e;
848
3.46M
        ctx->h[5] += f;
849
3.46M
        ctx->h[6] += g;
850
3.46M
        ctx->h[7] += h;
851
852
3.46M
        W += SHA_LBLOCK;
853
3.46M
    }
854
9.88k
}
855
856
#endif
857
858
#endif /* SHA512_ASM */