Coverage Report

Created: 2026-02-22 06:11

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/openssl/crypto/sha/sha512.c
Line
Count
Source
1
/*
2
 * Copyright 2004-2024 The OpenSSL Project Authors. All Rights Reserved.
3
 *
4
 * Licensed under the Apache License 2.0 (the "License").  You may not use
5
 * this file except in compliance with the License.  You can obtain a copy
6
 * in the file LICENSE in the source distribution or at
7
 * https://www.openssl.org/source/license.html
8
 */
9
10
/*
11
 * SHA512 low level APIs are deprecated for public use, but still ok for
12
 * internal use.
13
 */
14
#include "internal/deprecated.h"
15
16
#include <stdio.h>
17
#include <openssl/opensslconf.h>
18
#include <openssl/byteorder.h>
19
/*-
20
 * IMPLEMENTATION NOTES.
21
 *
22
 * As you might have noticed, 32-bit hash algorithms:
23
 *
24
 * - permit SHA_LONG to be wider than 32-bit
25
 * - optimized versions implement two transform functions: one operating
26
 *   on [aligned] data in host byte order, and one operating on data in input
27
 *   stream byte order;
28
 * - share common byte-order neutral collector and padding function
29
 *   implementations, crypto/md32_common.h;
30
 *
31
 * Neither of the above applies to this SHA-512 implementation. Reasons
32
 * [in reverse order] are:
33
 *
34
 * - it's the only 64-bit hash algorithm for the moment of this writing,
35
 *   there is no need for common collector/padding implementation [yet];
36
 * - by supporting only one transform function [which operates on
37
 *   *aligned* data in input stream byte order, big-endian in this case]
38
 *   we minimize burden of maintenance in two ways: a) collector/padding
39
 *   function is simpler; b) only one transform function to stare at;
40
 * - SHA_LONG64 is required to be exactly 64-bit in order to be able to
41
 *   apply a number of optimizations to mitigate potential performance
42
 *   penalties caused by previous design decision;
43
 *
44
 * Caveat lector.
45
 *
46
 * Implementation relies on the fact that "long long" is 64-bit on
47
 * both 32- and 64-bit platforms. If some compiler vendor comes up
48
 * with 128-bit long long, adjustment to sha.h would be required.
49
 * As this implementation relies on 64-bit integer type, it's totally
50
 * inappropriate for platforms which don't support it, most notably
51
 * 16-bit platforms.
52
 */
53
#include <stdlib.h>
54
#include <string.h>
55
56
#include <openssl/crypto.h>
57
#include <openssl/sha.h>
58
#include <openssl/opensslv.h>
59
60
#include "internal/cryptlib.h"
61
#include "crypto/sha.h"
62
63
#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || defined(__x86_64) || defined(_M_AMD64) || defined(_M_X64) || defined(__s390__) || defined(__s390x__) || defined(__aarch64__) || defined(SHA512_ASM)
64
#define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
65
#endif
66
67
#if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32__)
68
#define U64(C) C##UI64
69
#elif defined(__arch64__)
70
#define U64(C) C##UL
71
#else
72
55.9k
#define U64(C) C##ULL
73
#endif
74
75
int SHA512_Update_thunk(void *cp, const unsigned char *data, size_t len);
76
77
int sha512_224_init(SHA512_CTX *c)
78
4
{
79
4
    c->h[0] = U64(0x8c3d37c819544da2);
80
4
    c->h[1] = U64(0x73e1996689dcd4d6);
81
4
    c->h[2] = U64(0x1dfab7ae32ff9c82);
82
4
    c->h[3] = U64(0x679dd514582f9fcf);
83
4
    c->h[4] = U64(0x0f6d2b697bd44da8);
84
4
    c->h[5] = U64(0x77e36f7304c48942);
85
4
    c->h[6] = U64(0x3f9d85a86a1d36c8);
86
4
    c->h[7] = U64(0x1112e6ad91d692a1);
87
88
4
    c->Nl = 0;
89
4
    c->Nh = 0;
90
4
    c->num = 0;
91
4
    c->md_len = SHA224_DIGEST_LENGTH;
92
4
    return 1;
93
4
}
94
95
int sha512_256_init(SHA512_CTX *c)
96
2
{
97
2
    c->h[0] = U64(0x22312194fc2bf72c);
98
2
    c->h[1] = U64(0x9f555fa3c84c64c2);
99
2
    c->h[2] = U64(0x2393b86b6f53b151);
100
2
    c->h[3] = U64(0x963877195940eabd);
101
2
    c->h[4] = U64(0x96283ee2a88effe3);
102
2
    c->h[5] = U64(0xbe5e1e2553863992);
103
2
    c->h[6] = U64(0x2b0199fc2c85b8aa);
104
2
    c->h[7] = U64(0x0eb72ddc81c52ca2);
105
106
2
    c->Nl = 0;
107
2
    c->Nh = 0;
108
2
    c->num = 0;
109
2
    c->md_len = SHA256_DIGEST_LENGTH;
110
2
    return 1;
111
2
}
112
113
int SHA384_Init(SHA512_CTX *c)
114
30
{
115
30
    c->h[0] = U64(0xcbbb9d5dc1059ed8);
116
30
    c->h[1] = U64(0x629a292a367cd507);
117
30
    c->h[2] = U64(0x9159015a3070dd17);
118
30
    c->h[3] = U64(0x152fecd8f70e5939);
119
30
    c->h[4] = U64(0x67332667ffc00b31);
120
30
    c->h[5] = U64(0x8eb44a8768581511);
121
30
    c->h[6] = U64(0xdb0c2e0d64f98fa7);
122
30
    c->h[7] = U64(0x47b5481dbefa4fa4);
123
124
30
    c->Nl = 0;
125
30
    c->Nh = 0;
126
30
    c->num = 0;
127
30
    c->md_len = SHA384_DIGEST_LENGTH;
128
30
    return 1;
129
30
}
130
131
int SHA512_Init(SHA512_CTX *c)
132
6.18k
{
133
6.18k
    c->h[0] = U64(0x6a09e667f3bcc908);
134
6.18k
    c->h[1] = U64(0xbb67ae8584caa73b);
135
6.18k
    c->h[2] = U64(0x3c6ef372fe94f82b);
136
6.18k
    c->h[3] = U64(0xa54ff53a5f1d36f1);
137
6.18k
    c->h[4] = U64(0x510e527fade682d1);
138
6.18k
    c->h[5] = U64(0x9b05688c2b3e6c1f);
139
6.18k
    c->h[6] = U64(0x1f83d9abfb41bd6b);
140
6.18k
    c->h[7] = U64(0x5be0cd19137e2179);
141
142
6.18k
    c->Nl = 0;
143
6.18k
    c->Nh = 0;
144
6.18k
    c->num = 0;
145
6.18k
    c->md_len = SHA512_DIGEST_LENGTH;
146
6.18k
    return 1;
147
6.18k
}
148
149
#ifndef SHA512_ASM
150
static
151
#else
152
#ifdef INCLUDE_C_SHA512
153
void sha512_block_data_order_c(SHA512_CTX *ctx, const void *in, size_t num);
154
#endif
155
#endif
156
    void sha512_block_data_order(SHA512_CTX *ctx, const void *in, size_t num);
157
158
#define OUTPUT_RESULT(md, len)      \
159
27.9k
    for (n = 0; n < (len / 8); n++) \
160
24.8k
    md = OPENSSL_store_u64_be(md, (uint64_t)c->h[n])
161
162
int SHA512_Final(unsigned char *out, SHA512_CTX *c)
163
3.11k
{
164
3.11k
    unsigned char *p = (unsigned char *)c->u.p;
165
3.11k
    size_t n = c->num;
166
167
3.11k
    p[n] = 0x80; /* There always is a room for one */
168
3.11k
    n++;
169
3.11k
    if (n > (sizeof(c->u) - 16)) {
170
36
        memset(p + n, 0, sizeof(c->u) - n);
171
36
        n = 0;
172
36
        sha512_block_data_order(c, p, 1);
173
36
    }
174
175
3.11k
    memset(p + n, 0, sizeof(c->u) - 16 - n);
176
#ifdef B_ENDIAN
177
    c->u.d[SHA_LBLOCK - 2] = c->Nh;
178
    c->u.d[SHA_LBLOCK - 1] = c->Nl;
179
#else
180
3.11k
    uint8_t *cu = p + sizeof(c->u) - 16;
181
182
3.11k
    cu = OPENSSL_store_u64_be(cu, (uint64_t)c->Nh);
183
3.11k
    cu = OPENSSL_store_u64_be(cu, (uint64_t)c->Nl);
184
3.11k
#endif
185
186
3.11k
    sha512_block_data_order(c, p, 1);
187
188
3.11k
    if (out == NULL)
189
0
        return 0;
190
191
    /* Let compiler decide if it's appropriate to unroll... */
192
3.11k
    switch (c->md_len) {
193
0
    case SHA256_192_DIGEST_LENGTH:
194
0
        OUTPUT_RESULT(out, SHA256_192_DIGEST_LENGTH);
195
0
        break;
196
1
    case SHA256_DIGEST_LENGTH:
197
1
        OUTPUT_RESULT(out, SHA256_DIGEST_LENGTH);
198
1
        break;
199
15
    case SHA384_DIGEST_LENGTH:
200
15
        OUTPUT_RESULT(out, SHA384_DIGEST_LENGTH);
201
15
        break;
202
3.09k
    case SHA512_DIGEST_LENGTH:
203
3.09k
        OUTPUT_RESULT(out, SHA512_DIGEST_LENGTH);
204
3.09k
        break;
205
2
    case SHA224_DIGEST_LENGTH: {
206
2
        OUTPUT_RESULT(out, SHA224_DIGEST_LENGTH);
207
        /*
208
         * For 224 bits, there are four bytes left over that have to be
209
         * processed separately.
210
         */
211
2
        {
212
2
            SHA_LONG64 t = c->h[SHA224_DIGEST_LENGTH / 8];
213
214
2
            *(out++) = (unsigned char)(t >> 56);
215
2
            *(out++) = (unsigned char)(t >> 48);
216
2
            *(out++) = (unsigned char)(t >> 40);
217
2
            *(out++) = (unsigned char)(t >> 32);
218
2
        }
219
2
        break;
220
0
    }
221
    /* ... as well as make sure md_len is not abused. */
222
0
    default:
223
0
        return 0;
224
3.11k
    }
225
226
3.11k
    return 1;
227
3.11k
}
228
229
int SHA384_Final(unsigned char *md, SHA512_CTX *c)
230
15
{
231
15
    return SHA512_Final(md, c);
232
15
}
233
234
int SHA512_Update_thunk(void *cp, const unsigned char *data, size_t len)
235
6.22k
{
236
6.22k
    SHA512_CTX *c = (SHA512_CTX *)cp;
237
6.22k
    SHA_LONG64 l;
238
6.22k
    unsigned char *p = c->u.p;
239
240
6.22k
    if (len == 0)
241
0
        return 1;
242
243
6.22k
    l = (c->Nl + (((SHA_LONG64)len) << 3)) & U64(0xffffffffffffffff);
244
6.22k
    if (l < c->Nl)
245
0
        c->Nh++;
246
6.22k
    if (sizeof(len) >= 8)
247
6.22k
        c->Nh += (((SHA_LONG64)len) >> 61);
248
6.22k
    c->Nl = l;
249
250
6.22k
    if (c->num != 0) {
251
0
        size_t n = sizeof(c->u) - c->num;
252
253
0
        if (len < n) {
254
0
            memcpy(p + c->num, data, len), c->num += (unsigned int)len;
255
0
            return 1;
256
0
        } else {
257
0
            memcpy(p + c->num, data, n), c->num = 0;
258
0
            len -= n, data += n;
259
0
            sha512_block_data_order(c, p, 1);
260
0
        }
261
0
    }
262
263
6.22k
    if (len >= sizeof(c->u)) {
264
#ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
265
        if ((size_t)data % sizeof(c->u.d[0]) != 0)
266
            while (len >= sizeof(c->u))
267
                memcpy(p, data, sizeof(c->u)),
268
                    sha512_block_data_order(c, p, 1),
269
                    len -= sizeof(c->u), data += sizeof(c->u);
270
        else
271
#endif
272
6.22k
            sha512_block_data_order(c, data, len / sizeof(c->u)),
273
6.22k
                data += len, len %= sizeof(c->u), data -= len;
274
6.22k
    }
275
276
6.22k
    if (len != 0)
277
79
        memcpy(p, data, len), c->num = (int)len;
278
279
6.22k
    return 1;
280
6.22k
}
281
282
int SHA512_Update(SHA512_CTX *c, const void *_data, size_t len)
283
0
{
284
0
    return SHA512_Update_thunk((void *)c, (const unsigned char *)_data, len);
285
0
}
286
287
int SHA384_Update(SHA512_CTX *c, const void *data, size_t len)
288
0
{
289
0
    return SHA512_Update_thunk((void *)c, (const unsigned char *)data, len);
290
0
}
291
292
void SHA512_Transform(SHA512_CTX *c, const unsigned char *data)
293
0
{
294
#ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
295
    if ((size_t)data % sizeof(c->u.d[0]) != 0)
296
        memcpy(c->u.p, data, sizeof(c->u.p)), data = c->u.p;
297
#endif
298
0
    sha512_block_data_order(c, data, 1);
299
0
}
300
301
#if !defined(SHA512_ASM) || defined(INCLUDE_C_SHA512)
302
static const SHA_LONG64 K512[80] = {
303
    U64(0x428a2f98d728ae22), U64(0x7137449123ef65cd),
304
    U64(0xb5c0fbcfec4d3b2f), U64(0xe9b5dba58189dbbc),
305
    U64(0x3956c25bf348b538), U64(0x59f111f1b605d019),
306
    U64(0x923f82a4af194f9b), U64(0xab1c5ed5da6d8118),
307
    U64(0xd807aa98a3030242), U64(0x12835b0145706fbe),
308
    U64(0x243185be4ee4b28c), U64(0x550c7dc3d5ffb4e2),
309
    U64(0x72be5d74f27b896f), U64(0x80deb1fe3b1696b1),
310
    U64(0x9bdc06a725c71235), U64(0xc19bf174cf692694),
311
    U64(0xe49b69c19ef14ad2), U64(0xefbe4786384f25e3),
312
    U64(0x0fc19dc68b8cd5b5), U64(0x240ca1cc77ac9c65),
313
    U64(0x2de92c6f592b0275), U64(0x4a7484aa6ea6e483),
314
    U64(0x5cb0a9dcbd41fbd4), U64(0x76f988da831153b5),
315
    U64(0x983e5152ee66dfab), U64(0xa831c66d2db43210),
316
    U64(0xb00327c898fb213f), U64(0xbf597fc7beef0ee4),
317
    U64(0xc6e00bf33da88fc2), U64(0xd5a79147930aa725),
318
    U64(0x06ca6351e003826f), U64(0x142929670a0e6e70),
319
    U64(0x27b70a8546d22ffc), U64(0x2e1b21385c26c926),
320
    U64(0x4d2c6dfc5ac42aed), U64(0x53380d139d95b3df),
321
    U64(0x650a73548baf63de), U64(0x766a0abb3c77b2a8),
322
    U64(0x81c2c92e47edaee6), U64(0x92722c851482353b),
323
    U64(0xa2bfe8a14cf10364), U64(0xa81a664bbc423001),
324
    U64(0xc24b8b70d0f89791), U64(0xc76c51a30654be30),
325
    U64(0xd192e819d6ef5218), U64(0xd69906245565a910),
326
    U64(0xf40e35855771202a), U64(0x106aa07032bbd1b8),
327
    U64(0x19a4c116b8d2d0c8), U64(0x1e376c085141ab53),
328
    U64(0x2748774cdf8eeb99), U64(0x34b0bcb5e19b48a8),
329
    U64(0x391c0cb3c5c95a63), U64(0x4ed8aa4ae3418acb),
330
    U64(0x5b9cca4f7763e373), U64(0x682e6ff3d6b2b8a3),
331
    U64(0x748f82ee5defb2fc), U64(0x78a5636f43172f60),
332
    U64(0x84c87814a1f0ab72), U64(0x8cc702081a6439ec),
333
    U64(0x90befffa23631e28), U64(0xa4506cebde82bde9),
334
    U64(0xbef9a3f7b2c67915), U64(0xc67178f2e372532b),
335
    U64(0xca273eceea26619c), U64(0xd186b8c721c0c207),
336
    U64(0xeada7dd6cde0eb1e), U64(0xf57d4f7fee6ed178),
337
    U64(0x06f067aa72176fba), U64(0x0a637dc5a2c898a6),
338
    U64(0x113f9804bef90dae), U64(0x1b710b35131c471b),
339
    U64(0x28db77f523047d84), U64(0x32caab7b40c72493),
340
    U64(0x3c9ebe0a15c9bebc), U64(0x431d67c49c100d4c),
341
    U64(0x4cc5d4becb3e42b6), U64(0x597f299cfc657e2a),
342
    U64(0x5fcb6fab3ad6faec), U64(0x6c44198c4a475817)
343
};
344
345
#ifndef PEDANTIC
346
#if defined(__GNUC__) && __GNUC__ >= 2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
347
#if defined(__x86_64) || defined(__x86_64__)
348
#define ROTR(a, n) ({ SHA_LONG64 ret;             \
349
                                asm ("rorq %1,%0"       \
350
                                : "=r"(ret)             \
351
                                : "J"(n),"0"(a)         \
352
                                : "cc"); ret; })
353
#if !defined(B_ENDIAN)
354
#define PULL64(x) ({ SHA_LONG64 ret=*((const SHA_LONG64 *)(&(x)));  \
355
                                asm ("bswapq    %0"             \
356
                                : "=r"(ret)                     \
357
                                : "0"(ret)); ret; })
358
#endif
359
#elif (defined(__i386) || defined(__i386__)) && !defined(B_ENDIAN)
360
#if defined(I386_ONLY)
361
#define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
362
                          unsigned int hi=p[0],lo=p[1];          \
363
                                asm("xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
364
                                    "roll $16,%%eax; roll $16,%%edx; "\
365
                                    "xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
366
                                : "=a"(lo),"=d"(hi)             \
367
                                : "0"(lo),"1"(hi) : "cc");      \
368
                                ((SHA_LONG64)hi)<<32|lo; })
369
#else
370
#define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
371
                          unsigned int hi=p[0],lo=p[1];         \
372
                                asm ("bswapl %0; bswapl %1;"    \
373
                                : "=r"(lo),"=r"(hi)             \
374
                                : "0"(lo),"1"(hi));             \
375
                                ((SHA_LONG64)hi)<<32|lo; })
376
#endif
377
#elif (defined(_ARCH_PPC) && defined(__64BIT__)) || defined(_ARCH_PPC64)
378
#define ROTR(a, n) ({ SHA_LONG64 ret;             \
379
                                asm ("rotrdi %0,%1,%2"  \
380
                                : "=r"(ret)             \
381
                                : "r"(a),"K"(n)); ret; })
382
#elif defined(__aarch64__)
383
#define ROTR(a, n) ({ SHA_LONG64 ret;             \
384
                                asm ("ror %0,%1,%2"     \
385
                                : "=r"(ret)             \
386
                                : "r"(a),"I"(n)); ret; })
387
#if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
388
#define PULL64(x) ({ SHA_LONG64 ret;                     \
389
                                asm ("rev       %0,%1"          \
390
                                : "=r"(ret)                     \
391
                                : "r"(*((const SHA_LONG64 *)(&(x))))); ret; })
392
#endif
393
#elif (defined(__riscv_zbkb) || defined(__riscv_zbb)) && __riscv_xlen == 32
394
#define PULL64(x) ({ SHA_LONG64 ret;                                        \
395
                        unsigned int *r = (unsigned int *)(&(ret));             \
396
                        const unsigned int *p = (const unsigned int *)(&(x));   \
397
                        asm ("rev8 %0, %1"                                      \
398
                        : "=r"(r[0])                                            \
399
                        : "r" (p[1]));                                          \
400
                        asm ("rev8 %0, %1"                                      \
401
                        : "=r"(r[1])                                            \
402
                        : "r" (p[0])); ret; })
403
#elif (defined(__riscv_zbkb) || defined(__riscv_zbb)) && __riscv_xlen == 64
404
#define PULL64(x) ({ SHA_LONG64 ret;    \
405
                        asm ("rev8 %0, %1"  \
406
                        : "=r"(ret)         \
407
                        : "r"(x)); ret; })
408
#endif
409
#if defined(__riscv_zknh) && __riscv_xlen == 32
410
#define Sigma0(x) ({ SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret));    \
411
                        const unsigned int *p = (const unsigned int *)(&(x));           \
412
                        asm ("sha512sum0r %0, %1, %2"                                   \
413
                        : "=r"(r[0])                                                    \
414
                        : "r" (p[0]), "r" (p[1]));                                      \
415
                        asm ("sha512sum0r %0, %2, %1"                                   \
416
                        : "=r"(r[1])                                                    \
417
                        : "r" (p[0]), "r" (p[1])); ret; })
418
#define Sigma1(x) ({ SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret));    \
419
                        const unsigned int *p = (const unsigned int *)(&(x));           \
420
                        asm ("sha512sum1r %0, %1, %2"                                   \
421
                        : "=r"(r[0])                                                    \
422
                        : "r" (p[0]), "r" (p[1]));                                      \
423
                        asm ("sha512sum1r %0, %2, %1"                                   \
424
                        : "=r"(r[1])                                                    \
425
                        : "r" (p[0]), "r" (p[1])); ret; })
426
#define sigma0(x) ({ SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret));    \
427
                        const unsigned int *p = (const unsigned int *)(&(x));           \
428
                        asm ("sha512sig0l %0, %1, %2"                                   \
429
                        : "=r"(r[0])                                                    \
430
                        : "r" (p[0]), "r" (p[1]));                                      \
431
                        asm ("sha512sig0h %0, %2, %1"                                   \
432
                        : "=r"(r[1])                                                    \
433
                        : "r" (p[0]), "r" (p[1])); ret; })
434
#define sigma1(x) ({ SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret));    \
435
                        const unsigned int *p = (const unsigned int *)(&(x));           \
436
                        asm ("sha512sig1l %0, %1, %2"                                   \
437
                        : "=r"(r[0])                                                    \
438
                        : "r" (p[0]), "r" (p[1]));                                      \
439
                        asm ("sha512sig1h %0, %2, %1"                                   \
440
                        : "=r"(r[1])                                                    \
441
                        : "r" (p[0]), "r" (p[1])); ret; })
442
#elif defined(__riscv_zknh) && __riscv_xlen == 64
443
#define Sigma0(x) ({ SHA_LONG64 ret;            \
444
                        asm ("sha512sum0 %0, %1"    \
445
                        : "=r"(ret)                 \
446
                        : "r"(x)); ret; })
447
#define Sigma1(x) ({ SHA_LONG64 ret;            \
448
                        asm ("sha512sum1 %0, %1"    \
449
                        : "=r"(ret)                 \
450
                        : "r"(x)); ret; })
451
#define sigma0(x) ({ SHA_LONG64 ret;            \
452
                        asm ("sha512sig0 %0, %1"    \
453
                        : "=r"(ret)                 \
454
                        : "r"(x)); ret; })
455
#define sigma1(x) ({ SHA_LONG64 ret;            \
456
                        asm ("sha512sig1 %0, %1"    \
457
                        : "=r"(ret)                 \
458
                        : "r"(x)); ret; })
459
#endif
460
#if (defined(__riscv_zbt) || defined(__riscv_zpn)) && __riscv_xlen == 32
461
#define Ch(x, y, z) ({  SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret));   \
462
                        const unsigned int *xp = (const unsigned int *)(&(x));          \
463
                        const unsigned int *yp = (const unsigned int *)(&(y));          \
464
                        const unsigned int *zp = (const unsigned int *)(&(z));          \
465
                        asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3\n\t"                \
466
                        : "=r"(r[0])                                                    \
467
                        : "r"(xp[0]), "r"(yp[0]), "r"(zp[0]));                          \
468
                        asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3\n\t"                \
469
                        : "=r"(r[1])                                                    \
470
                        : "r"(xp[1]), "r"(yp[1]), "r"(zp[1])); ret; })
471
#define Maj(x, y, z) ({ SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret));   \
472
                        const unsigned int *xp = (const unsigned int *)(&(x));          \
473
                        const unsigned int *yp = (const unsigned int *)(&(y));          \
474
                        const unsigned int *zp = (const unsigned int *)(&(z));          \
475
                        asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3\n\t"                \
476
                        : "=r"(r[0])                                                    \
477
                        : "r"(xp[0]^zp[0]), "r"(yp[0]), "r"(zp[0]));                    \
478
                        asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3\n\t"                \
479
                        : "=r"(r[1])                                                    \
480
                        : "r"(xp[1]^zp[1]), "r"(yp[1]), "r"(zp[1])); ret; })
481
#elif (defined(__riscv_zbt) || defined(__riscv_zpn)) && __riscv_xlen == 64
482
#define Ch(x, y, z) ({  SHA_LONG64 ret;                           \
483
                        asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3"\
484
                        : "=r"(ret)                                 \
485
                        : "r"(x), "r"(y), "r"(z)); ret; })
486
#define Maj(x, y, z) ({ SHA_LONG64 ret;                           \
487
                        asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3"\
488
                        : "=r"(ret)                                 \
489
                        : "r"(x^z), "r"(y), "r"(x)); ret; })
490
#endif
491
#elif defined(_MSC_VER)
492
#if defined(_WIN64) /* applies to both IA-64 and AMD64 */
493
#pragma intrinsic(_rotr64)
494
#define ROTR(a, n) _rotr64((a), n)
495
#endif
496
#if defined(_M_IX86) && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
497
#if defined(I386_ONLY)
498
static SHA_LONG64 __fastcall __pull64be(const void *x)
499
{
500
    _asm mov edx, [ecx + 0] _asm mov eax, [ecx + 4] _asm xchg dh, dl _asm xchg ah, al _asm rol edx, 16 _asm rol eax, 16 _asm xchg dh, dl _asm xchg ah, al
501
}
502
#else
503
static SHA_LONG64 __fastcall __pull64be(const void *x) {
504
    _asm mov edx, [ecx + 0] _asm mov eax, [ecx + 4] _asm bswap edx _asm bswap eax
505
}
506
#endif
507
#define PULL64(x) __pull64be(&(x))
508
#endif
509
#endif
510
#endif
511
#ifndef PULL64
512
401M
#define B(x, j) (((SHA_LONG64)(*(((const unsigned char *)(&x)) + j))) << ((7 - j) * 8))
513
50.1M
#define PULL64(x) (B(x, 0) | B(x, 1) | B(x, 2) | B(x, 3) | B(x, 4) | B(x, 5) | B(x, 6) | B(x, 7))
514
#endif
515
#ifndef ROTR
516
2.30G
#define ROTR(x, s) (((x) >> s) | (x) << (64 - s))
517
#endif
518
#ifndef Sigma0
519
250M
#define Sigma0(x) (ROTR((x), 28) ^ ROTR((x), 34) ^ ROTR((x), 39))
520
#endif
521
#ifndef Sigma1
522
250M
#define Sigma1(x) (ROTR((x), 14) ^ ROTR((x), 18) ^ ROTR((x), 41))
523
#endif
524
#ifndef sigma0
525
200M
#define sigma0(x) (ROTR((x), 1) ^ ROTR((x), 8) ^ ((x) >> 7))
526
#endif
527
#ifndef sigma1
528
200M
#define sigma1(x) (ROTR((x), 19) ^ ROTR((x), 61) ^ ((x) >> 6))
529
#endif
530
#ifndef Ch
531
250M
#define Ch(x, y, z) (((x) & (y)) ^ ((~(x)) & (z)))
532
#endif
533
#ifndef Maj
534
250M
#define Maj(x, y, z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
535
#endif
536
537
#if defined(__i386) || defined(__i386__) || defined(_M_IX86)
538
/*
539
 * This code should give better results on 32-bit CPU with less than
540
 * ~24 registers, both size and performance wise...
541
 */
542
543
static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
544
    size_t num)
545
{
546
    const SHA_LONG64 *W = in;
547
    SHA_LONG64 A, E, T;
548
    SHA_LONG64 X[9 + 80], *F;
549
    int i;
550
551
    while (num--) {
552
553
        F = X + 80;
554
        A = ctx->h[0];
555
        F[1] = ctx->h[1];
556
        F[2] = ctx->h[2];
557
        F[3] = ctx->h[3];
558
        E = ctx->h[4];
559
        F[5] = ctx->h[5];
560
        F[6] = ctx->h[6];
561
        F[7] = ctx->h[7];
562
563
        for (i = 0; i < 16; i++, F--) {
564
#ifdef B_ENDIAN
565
            T = W[i];
566
#else
567
            T = PULL64(W[i]);
568
#endif
569
            F[0] = A;
570
            F[4] = E;
571
            F[8] = T;
572
            T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
573
            E = F[3] + T;
574
            A = T + Sigma0(A) + Maj(A, F[1], F[2]);
575
        }
576
577
        for (; i < 80; i++, F--) {
578
            T = sigma0(F[8 + 16 - 1]);
579
            T += sigma1(F[8 + 16 - 14]);
580
            T += F[8 + 16] + F[8 + 16 - 9];
581
582
            F[0] = A;
583
            F[4] = E;
584
            F[8] = T;
585
            T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
586
            E = F[3] + T;
587
            A = T + Sigma0(A) + Maj(A, F[1], F[2]);
588
        }
589
590
        ctx->h[0] += A;
591
        ctx->h[1] += F[1];
592
        ctx->h[2] += F[2];
593
        ctx->h[3] += F[3];
594
        ctx->h[4] += E;
595
        ctx->h[5] += F[5];
596
        ctx->h[6] += F[6];
597
        ctx->h[7] += F[7];
598
599
        W += SHA_LBLOCK;
600
    }
601
}
602
603
#elif defined(OPENSSL_SMALL_FOOTPRINT)
604
605
static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
606
    size_t num)
607
{
608
    const SHA_LONG64 *W = in;
609
    SHA_LONG64 a, b, c, d, e, f, g, h, s0, s1, T1, T2;
610
    SHA_LONG64 X[16];
611
    int i;
612
613
    while (num--) {
614
615
        a = ctx->h[0];
616
        b = ctx->h[1];
617
        c = ctx->h[2];
618
        d = ctx->h[3];
619
        e = ctx->h[4];
620
        f = ctx->h[5];
621
        g = ctx->h[6];
622
        h = ctx->h[7];
623
624
        for (i = 0; i < 16; i++) {
625
#ifdef B_ENDIAN
626
            T1 = X[i] = W[i];
627
#else
628
            T1 = X[i] = PULL64(W[i]);
629
#endif
630
            T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i];
631
            T2 = Sigma0(a) + Maj(a, b, c);
632
            h = g;
633
            g = f;
634
            f = e;
635
            e = d + T1;
636
            d = c;
637
            c = b;
638
            b = a;
639
            a = T1 + T2;
640
        }
641
642
        for (; i < 80; i++) {
643
            s0 = X[(i + 1) & 0x0f];
644
            s0 = sigma0(s0);
645
            s1 = X[(i + 14) & 0x0f];
646
            s1 = sigma1(s1);
647
648
            T1 = X[i & 0xf] += s0 + s1 + X[(i + 9) & 0xf];
649
            T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i];
650
            T2 = Sigma0(a) + Maj(a, b, c);
651
            h = g;
652
            g = f;
653
            f = e;
654
            e = d + T1;
655
            d = c;
656
            c = b;
657
            b = a;
658
            a = T1 + T2;
659
        }
660
661
        ctx->h[0] += a;
662
        ctx->h[1] += b;
663
        ctx->h[2] += c;
664
        ctx->h[3] += d;
665
        ctx->h[4] += e;
666
        ctx->h[5] += f;
667
        ctx->h[6] += g;
668
        ctx->h[7] += h;
669
670
        W += SHA_LBLOCK;
671
    }
672
}
673
674
#else
675
#define ROUND_00_15(i, a, b, c, d, e, f, g, h)       \
676
250M
    do {                                             \
677
250M
        T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i]; \
678
250M
        h = Sigma0(a) + Maj(a, b, c);                \
679
250M
        d += T1;                                     \
680
250M
        h += T1;                                     \
681
250M
    } while (0)
682
683
#define ROUND_16_80(i, j, a, b, c, d, e, f, g, h, X)       \
684
200M
    do {                                                   \
685
200M
        s0 = X[(j + 1) & 0x0f];                            \
686
200M
        s0 = sigma0(s0);                                   \
687
200M
        s1 = X[(j + 14) & 0x0f];                           \
688
200M
        s1 = sigma1(s1);                                   \
689
200M
        T1 = X[(j) & 0x0f] += s0 + s1 + X[(j + 9) & 0x0f]; \
690
200M
        ROUND_00_15(i + j, a, b, c, d, e, f, g, h);        \
691
200M
    } while (0)
692
693
#ifdef INCLUDE_C_SHA512
694
void sha512_block_data_order_c(SHA512_CTX *ctx, const void *in, size_t num)
695
#else
696
static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
697
    size_t num)
698
#endif
699
9.36k
{
700
9.36k
    const SHA_LONG64 *W = in;
701
9.36k
    SHA_LONG64 a, b, c, d, e, f, g, h, s0, s1, T1;
702
9.36k
    SHA_LONG64 X[16];
703
9.36k
    int i;
704
705
3.14M
    while (num--) {
706
707
3.13M
        a = ctx->h[0];
708
3.13M
        b = ctx->h[1];
709
3.13M
        c = ctx->h[2];
710
3.13M
        d = ctx->h[3];
711
3.13M
        e = ctx->h[4];
712
3.13M
        f = ctx->h[5];
713
3.13M
        g = ctx->h[6];
714
3.13M
        h = ctx->h[7];
715
716
#ifdef B_ENDIAN
717
        T1 = X[0] = W[0];
718
        ROUND_00_15(0, a, b, c, d, e, f, g, h);
719
        T1 = X[1] = W[1];
720
        ROUND_00_15(1, h, a, b, c, d, e, f, g);
721
        T1 = X[2] = W[2];
722
        ROUND_00_15(2, g, h, a, b, c, d, e, f);
723
        T1 = X[3] = W[3];
724
        ROUND_00_15(3, f, g, h, a, b, c, d, e);
725
        T1 = X[4] = W[4];
726
        ROUND_00_15(4, e, f, g, h, a, b, c, d);
727
        T1 = X[5] = W[5];
728
        ROUND_00_15(5, d, e, f, g, h, a, b, c);
729
        T1 = X[6] = W[6];
730
        ROUND_00_15(6, c, d, e, f, g, h, a, b);
731
        T1 = X[7] = W[7];
732
        ROUND_00_15(7, b, c, d, e, f, g, h, a);
733
        T1 = X[8] = W[8];
734
        ROUND_00_15(8, a, b, c, d, e, f, g, h);
735
        T1 = X[9] = W[9];
736
        ROUND_00_15(9, h, a, b, c, d, e, f, g);
737
        T1 = X[10] = W[10];
738
        ROUND_00_15(10, g, h, a, b, c, d, e, f);
739
        T1 = X[11] = W[11];
740
        ROUND_00_15(11, f, g, h, a, b, c, d, e);
741
        T1 = X[12] = W[12];
742
        ROUND_00_15(12, e, f, g, h, a, b, c, d);
743
        T1 = X[13] = W[13];
744
        ROUND_00_15(13, d, e, f, g, h, a, b, c);
745
        T1 = X[14] = W[14];
746
        ROUND_00_15(14, c, d, e, f, g, h, a, b);
747
        T1 = X[15] = W[15];
748
        ROUND_00_15(15, b, c, d, e, f, g, h, a);
749
#else
750
3.13M
        T1 = X[0] = PULL64(W[0]);
751
3.13M
        ROUND_00_15(0, a, b, c, d, e, f, g, h);
752
3.13M
        T1 = X[1] = PULL64(W[1]);
753
3.13M
        ROUND_00_15(1, h, a, b, c, d, e, f, g);
754
3.13M
        T1 = X[2] = PULL64(W[2]);
755
3.13M
        ROUND_00_15(2, g, h, a, b, c, d, e, f);
756
3.13M
        T1 = X[3] = PULL64(W[3]);
757
3.13M
        ROUND_00_15(3, f, g, h, a, b, c, d, e);
758
3.13M
        T1 = X[4] = PULL64(W[4]);
759
3.13M
        ROUND_00_15(4, e, f, g, h, a, b, c, d);
760
3.13M
        T1 = X[5] = PULL64(W[5]);
761
3.13M
        ROUND_00_15(5, d, e, f, g, h, a, b, c);
762
3.13M
        T1 = X[6] = PULL64(W[6]);
763
3.13M
        ROUND_00_15(6, c, d, e, f, g, h, a, b);
764
3.13M
        T1 = X[7] = PULL64(W[7]);
765
3.13M
        ROUND_00_15(7, b, c, d, e, f, g, h, a);
766
3.13M
        T1 = X[8] = PULL64(W[8]);
767
3.13M
        ROUND_00_15(8, a, b, c, d, e, f, g, h);
768
3.13M
        T1 = X[9] = PULL64(W[9]);
769
3.13M
        ROUND_00_15(9, h, a, b, c, d, e, f, g);
770
3.13M
        T1 = X[10] = PULL64(W[10]);
771
3.13M
        ROUND_00_15(10, g, h, a, b, c, d, e, f);
772
3.13M
        T1 = X[11] = PULL64(W[11]);
773
3.13M
        ROUND_00_15(11, f, g, h, a, b, c, d, e);
774
3.13M
        T1 = X[12] = PULL64(W[12]);
775
3.13M
        ROUND_00_15(12, e, f, g, h, a, b, c, d);
776
3.13M
        T1 = X[13] = PULL64(W[13]);
777
3.13M
        ROUND_00_15(13, d, e, f, g, h, a, b, c);
778
3.13M
        T1 = X[14] = PULL64(W[14]);
779
3.13M
        ROUND_00_15(14, c, d, e, f, g, h, a, b);
780
3.13M
        T1 = X[15] = PULL64(W[15]);
781
3.13M
        ROUND_00_15(15, b, c, d, e, f, g, h, a);
782
3.13M
#endif
783
784
15.6M
        for (i = 16; i < 80; i += 16) {
785
12.5M
            ROUND_16_80(i, 0, a, b, c, d, e, f, g, h, X);
786
12.5M
            ROUND_16_80(i, 1, h, a, b, c, d, e, f, g, X);
787
12.5M
            ROUND_16_80(i, 2, g, h, a, b, c, d, e, f, X);
788
12.5M
            ROUND_16_80(i, 3, f, g, h, a, b, c, d, e, X);
789
12.5M
            ROUND_16_80(i, 4, e, f, g, h, a, b, c, d, X);
790
12.5M
            ROUND_16_80(i, 5, d, e, f, g, h, a, b, c, X);
791
12.5M
            ROUND_16_80(i, 6, c, d, e, f, g, h, a, b, X);
792
12.5M
            ROUND_16_80(i, 7, b, c, d, e, f, g, h, a, X);
793
12.5M
            ROUND_16_80(i, 8, a, b, c, d, e, f, g, h, X);
794
12.5M
            ROUND_16_80(i, 9, h, a, b, c, d, e, f, g, X);
795
12.5M
            ROUND_16_80(i, 10, g, h, a, b, c, d, e, f, X);
796
12.5M
            ROUND_16_80(i, 11, f, g, h, a, b, c, d, e, X);
797
12.5M
            ROUND_16_80(i, 12, e, f, g, h, a, b, c, d, X);
798
12.5M
            ROUND_16_80(i, 13, d, e, f, g, h, a, b, c, X);
799
12.5M
            ROUND_16_80(i, 14, c, d, e, f, g, h, a, b, X);
800
12.5M
            ROUND_16_80(i, 15, b, c, d, e, f, g, h, a, X);
801
12.5M
        }
802
803
3.13M
        ctx->h[0] += a;
804
3.13M
        ctx->h[1] += b;
805
3.13M
        ctx->h[2] += c;
806
3.13M
        ctx->h[3] += d;
807
3.13M
        ctx->h[4] += e;
808
3.13M
        ctx->h[5] += f;
809
3.13M
        ctx->h[6] += g;
810
3.13M
        ctx->h[7] += h;
811
812
3.13M
        W += SHA_LBLOCK;
813
3.13M
    }
814
9.36k
}
815
816
#endif
817
818
#endif /* SHA512_ASM */