Coverage Report

Created: 2026-03-03 06:43

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/openssl/crypto/sha/sha512.c
Line
Count
Source
1
/*
2
 * Copyright 2004-2024 The OpenSSL Project Authors. All Rights Reserved.
3
 *
4
 * Licensed under the Apache License 2.0 (the "License").  You may not use
5
 * this file except in compliance with the License.  You can obtain a copy
6
 * in the file LICENSE in the source distribution or at
7
 * https://www.openssl.org/source/license.html
8
 */
9
10
/*
11
 * SHA512 low level APIs are deprecated for public use, but still ok for
12
 * internal use.
13
 */
14
#include "internal/deprecated.h"
15
16
#include <stdio.h>
17
#include <openssl/opensslconf.h>
18
#include <openssl/byteorder.h>
19
/*-
20
 * IMPLEMENTATION NOTES.
21
 *
22
 * As you might have noticed, 32-bit hash algorithms:
23
 *
24
 * - permit SHA_LONG to be wider than 32-bit
25
 * - optimized versions implement two transform functions: one operating
26
 *   on [aligned] data in host byte order, and one operating on data in input
27
 *   stream byte order;
28
 * - share common byte-order neutral collector and padding function
29
 *   implementations, crypto/md32_common.h;
30
 *
31
 * Neither of the above applies to this SHA-512 implementation. Reasons
32
 * [in reverse order] are:
33
 *
34
 * - it's the only 64-bit hash algorithm for the moment of this writing,
35
 *   there is no need for common collector/padding implementation [yet];
36
 * - by supporting only one transform function [which operates on
37
 *   *aligned* data in input stream byte order, big-endian in this case]
38
 *   we minimize burden of maintenance in two ways: a) collector/padding
39
 *   function is simpler; b) only one transform function to stare at;
40
 * - SHA_LONG64 is required to be exactly 64-bit in order to be able to
41
 *   apply a number of optimizations to mitigate potential performance
42
 *   penalties caused by previous design decision;
43
 *
44
 * Caveat lector.
45
 *
46
 * Implementation relies on the fact that "long long" is 64-bit on
47
 * both 32- and 64-bit platforms. If some compiler vendor comes up
48
 * with 128-bit long long, adjustment to sha.h would be required.
49
 * As this implementation relies on 64-bit integer type, it's totally
50
 * inappropriate for platforms which don't support it, most notably
51
 * 16-bit platforms.
52
 */
53
#include <stdlib.h>
54
#include <string.h>
55
56
#include <openssl/crypto.h>
57
#include <openssl/sha.h>
58
#include <openssl/opensslv.h>
59
60
#include "internal/cryptlib.h"
61
#include "crypto/sha.h"
62
63
#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || defined(__x86_64) || defined(_M_AMD64) || defined(_M_X64) || defined(__s390__) || defined(__s390x__) || defined(__aarch64__) || defined(SHA512_ASM)
64
#define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
65
#endif
66
67
#if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32__)
68
#define U64(C) C##UI64
69
#elif defined(__arch64__)
70
#define U64(C) C##UL
71
#else
72
59.6k
#define U64(C) C##ULL
73
#endif
74
75
int SHA512_Update_thunk(void *cp, const unsigned char *data, size_t len);
76
77
int sha512_224_init(SHA512_CTX *c)
78
2
{
79
2
    c->h[0] = U64(0x8c3d37c819544da2);
80
2
    c->h[1] = U64(0x73e1996689dcd4d6);
81
2
    c->h[2] = U64(0x1dfab7ae32ff9c82);
82
2
    c->h[3] = U64(0x679dd514582f9fcf);
83
2
    c->h[4] = U64(0x0f6d2b697bd44da8);
84
2
    c->h[5] = U64(0x77e36f7304c48942);
85
2
    c->h[6] = U64(0x3f9d85a86a1d36c8);
86
2
    c->h[7] = U64(0x1112e6ad91d692a1);
87
88
2
    c->Nl = 0;
89
2
    c->Nh = 0;
90
2
    c->num = 0;
91
2
    c->md_len = SHA224_DIGEST_LENGTH;
92
2
    return 1;
93
2
}
94
95
int sha512_256_init(SHA512_CTX *c)
96
2
{
97
2
    c->h[0] = U64(0x22312194fc2bf72c);
98
2
    c->h[1] = U64(0x9f555fa3c84c64c2);
99
2
    c->h[2] = U64(0x2393b86b6f53b151);
100
2
    c->h[3] = U64(0x963877195940eabd);
101
2
    c->h[4] = U64(0x96283ee2a88effe3);
102
2
    c->h[5] = U64(0xbe5e1e2553863992);
103
2
    c->h[6] = U64(0x2b0199fc2c85b8aa);
104
2
    c->h[7] = U64(0x0eb72ddc81c52ca2);
105
106
2
    c->Nl = 0;
107
2
    c->Nh = 0;
108
2
    c->num = 0;
109
2
    c->md_len = SHA256_DIGEST_LENGTH;
110
2
    return 1;
111
2
}
112
113
int SHA384_Init(SHA512_CTX *c)
114
28
{
115
28
    c->h[0] = U64(0xcbbb9d5dc1059ed8);
116
28
    c->h[1] = U64(0x629a292a367cd507);
117
28
    c->h[2] = U64(0x9159015a3070dd17);
118
28
    c->h[3] = U64(0x152fecd8f70e5939);
119
28
    c->h[4] = U64(0x67332667ffc00b31);
120
28
    c->h[5] = U64(0x8eb44a8768581511);
121
28
    c->h[6] = U64(0xdb0c2e0d64f98fa7);
122
28
    c->h[7] = U64(0x47b5481dbefa4fa4);
123
124
28
    c->Nl = 0;
125
28
    c->Nh = 0;
126
28
    c->num = 0;
127
28
    c->md_len = SHA384_DIGEST_LENGTH;
128
28
    return 1;
129
28
}
130
131
int SHA512_Init(SHA512_CTX *c)
132
6.59k
{
133
6.59k
    c->h[0] = U64(0x6a09e667f3bcc908);
134
6.59k
    c->h[1] = U64(0xbb67ae8584caa73b);
135
6.59k
    c->h[2] = U64(0x3c6ef372fe94f82b);
136
6.59k
    c->h[3] = U64(0xa54ff53a5f1d36f1);
137
6.59k
    c->h[4] = U64(0x510e527fade682d1);
138
6.59k
    c->h[5] = U64(0x9b05688c2b3e6c1f);
139
6.59k
    c->h[6] = U64(0x1f83d9abfb41bd6b);
140
6.59k
    c->h[7] = U64(0x5be0cd19137e2179);
141
142
6.59k
    c->Nl = 0;
143
6.59k
    c->Nh = 0;
144
6.59k
    c->num = 0;
145
6.59k
    c->md_len = SHA512_DIGEST_LENGTH;
146
6.59k
    return 1;
147
6.59k
}
148
149
#ifndef SHA512_ASM
150
static
151
#else
152
#ifdef INCLUDE_C_SHA512
153
void sha512_block_data_order_c(SHA512_CTX *ctx, const void *in, size_t num);
154
#endif
155
#endif
156
    void sha512_block_data_order(SHA512_CTX *ctx, const void *in, size_t num);
157
158
#define OUTPUT_RESULT(md, len)      \
159
29.7k
    for (n = 0; n < (len / 8); n++) \
160
26.4k
    md = OPENSSL_store_u64_be(md, (uint64_t)c->h[n])
161
162
int SHA512_Final(unsigned char *out, SHA512_CTX *c)
163
3.31k
{
164
3.31k
    unsigned char *p = (unsigned char *)c->u.p;
165
3.31k
    size_t n = c->num;
166
167
3.31k
    p[n] = 0x80; /* There always is a room for one */
168
3.31k
    n++;
169
3.31k
    if (n > (sizeof(c->u) - 16)) {
170
42
        memset(p + n, 0, sizeof(c->u) - n);
171
42
        n = 0;
172
42
        sha512_block_data_order(c, p, 1);
173
42
    }
174
175
3.31k
    memset(p + n, 0, sizeof(c->u) - 16 - n);
176
#ifdef B_ENDIAN
177
    c->u.d[SHA_LBLOCK - 2] = c->Nh;
178
    c->u.d[SHA_LBLOCK - 1] = c->Nl;
179
#else
180
3.31k
    uint8_t *cu = p + sizeof(c->u) - 16;
181
182
3.31k
    cu = OPENSSL_store_u64_be(cu, (uint64_t)c->Nh);
183
3.31k
    cu = OPENSSL_store_u64_be(cu, (uint64_t)c->Nl);
184
3.31k
#endif
185
186
3.31k
    sha512_block_data_order(c, p, 1);
187
188
3.31k
    if (out == NULL)
189
0
        return 0;
190
191
    /* Let compiler decide if it's appropriate to unroll... */
192
3.31k
    switch (c->md_len) {
193
0
    case SHA256_192_DIGEST_LENGTH:
194
0
        OUTPUT_RESULT(out, SHA256_192_DIGEST_LENGTH);
195
0
        break;
196
1
    case SHA256_DIGEST_LENGTH:
197
1
        OUTPUT_RESULT(out, SHA256_DIGEST_LENGTH);
198
1
        break;
199
14
    case SHA384_DIGEST_LENGTH:
200
14
        OUTPUT_RESULT(out, SHA384_DIGEST_LENGTH);
201
14
        break;
202
3.29k
    case SHA512_DIGEST_LENGTH:
203
3.29k
        OUTPUT_RESULT(out, SHA512_DIGEST_LENGTH);
204
3.29k
        break;
205
1
    case SHA224_DIGEST_LENGTH: {
206
1
        OUTPUT_RESULT(out, SHA224_DIGEST_LENGTH);
207
        /*
208
         * For 224 bits, there are four bytes left over that have to be
209
         * processed separately.
210
         */
211
1
        {
212
1
            SHA_LONG64 t = c->h[SHA224_DIGEST_LENGTH / 8];
213
214
1
            *(out++) = (unsigned char)(t >> 56);
215
1
            *(out++) = (unsigned char)(t >> 48);
216
1
            *(out++) = (unsigned char)(t >> 40);
217
1
            *(out++) = (unsigned char)(t >> 32);
218
1
        }
219
1
        break;
220
0
    }
221
    /* ... as well as make sure md_len is not abused. */
222
0
    default:
223
0
        return 0;
224
3.31k
    }
225
226
3.31k
    return 1;
227
3.31k
}
228
229
int SHA384_Final(unsigned char *md, SHA512_CTX *c)
230
14
{
231
14
    return SHA512_Final(md, c);
232
14
}
233
234
int SHA512_Update_thunk(void *cp, const unsigned char *data, size_t len)
235
6.63k
{
236
6.63k
    SHA512_CTX *c = (SHA512_CTX *)cp;
237
6.63k
    SHA_LONG64 l;
238
6.63k
    unsigned char *p = c->u.p;
239
240
6.63k
    if (len == 0)
241
0
        return 1;
242
243
6.63k
    l = (c->Nl + (((SHA_LONG64)len) << 3)) & U64(0xffffffffffffffff);
244
6.63k
    if (l < c->Nl)
245
0
        c->Nh++;
246
6.63k
    if (sizeof(len) >= 8)
247
6.63k
        c->Nh += (((SHA_LONG64)len) >> 61);
248
6.63k
    c->Nl = l;
249
250
6.63k
    if (c->num != 0) {
251
0
        size_t n = sizeof(c->u) - c->num;
252
253
0
        if (len < n) {
254
0
            memcpy(p + c->num, data, len), c->num += (unsigned int)len;
255
0
            return 1;
256
0
        } else {
257
0
            memcpy(p + c->num, data, n), c->num = 0;
258
0
            len -= n, data += n;
259
0
            sha512_block_data_order(c, p, 1);
260
0
        }
261
0
    }
262
263
6.63k
    if (len >= sizeof(c->u)) {
264
#ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
265
        if ((size_t)data % sizeof(c->u.d[0]) != 0)
266
            while (len >= sizeof(c->u))
267
                memcpy(p, data, sizeof(c->u)),
268
                    sha512_block_data_order(c, p, 1),
269
                    len -= sizeof(c->u), data += sizeof(c->u);
270
        else
271
#endif
272
6.63k
            sha512_block_data_order(c, data, len / sizeof(c->u)),
273
6.63k
                data += len, len %= sizeof(c->u), data -= len;
274
6.63k
    }
275
276
6.63k
    if (len != 0)
277
79
        memcpy(p, data, len), c->num = (int)len;
278
279
6.63k
    return 1;
280
6.63k
}
281
282
int SHA512_Update(SHA512_CTX *c, const void *_data, size_t len)
283
0
{
284
0
    return SHA512_Update_thunk((void *)c, (const unsigned char *)_data, len);
285
0
}
286
287
int SHA384_Update(SHA512_CTX *c, const void *data, size_t len)
288
0
{
289
0
    return SHA512_Update_thunk((void *)c, (const unsigned char *)data, len);
290
0
}
291
292
void SHA512_Transform(SHA512_CTX *c, const unsigned char *data)
293
0
{
294
#ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
295
    if ((size_t)data % sizeof(c->u.d[0]) != 0)
296
        memcpy(c->u.p, data, sizeof(c->u.p)), data = c->u.p;
297
#endif
298
0
    sha512_block_data_order(c, data, 1);
299
0
}
300
301
#if !defined(SHA512_ASM) || defined(INCLUDE_C_SHA512)
302
static const SHA_LONG64 K512[80] = {
303
    U64(0x428a2f98d728ae22), U64(0x7137449123ef65cd),
304
    U64(0xb5c0fbcfec4d3b2f), U64(0xe9b5dba58189dbbc),
305
    U64(0x3956c25bf348b538), U64(0x59f111f1b605d019),
306
    U64(0x923f82a4af194f9b), U64(0xab1c5ed5da6d8118),
307
    U64(0xd807aa98a3030242), U64(0x12835b0145706fbe),
308
    U64(0x243185be4ee4b28c), U64(0x550c7dc3d5ffb4e2),
309
    U64(0x72be5d74f27b896f), U64(0x80deb1fe3b1696b1),
310
    U64(0x9bdc06a725c71235), U64(0xc19bf174cf692694),
311
    U64(0xe49b69c19ef14ad2), U64(0xefbe4786384f25e3),
312
    U64(0x0fc19dc68b8cd5b5), U64(0x240ca1cc77ac9c65),
313
    U64(0x2de92c6f592b0275), U64(0x4a7484aa6ea6e483),
314
    U64(0x5cb0a9dcbd41fbd4), U64(0x76f988da831153b5),
315
    U64(0x983e5152ee66dfab), U64(0xa831c66d2db43210),
316
    U64(0xb00327c898fb213f), U64(0xbf597fc7beef0ee4),
317
    U64(0xc6e00bf33da88fc2), U64(0xd5a79147930aa725),
318
    U64(0x06ca6351e003826f), U64(0x142929670a0e6e70),
319
    U64(0x27b70a8546d22ffc), U64(0x2e1b21385c26c926),
320
    U64(0x4d2c6dfc5ac42aed), U64(0x53380d139d95b3df),
321
    U64(0x650a73548baf63de), U64(0x766a0abb3c77b2a8),
322
    U64(0x81c2c92e47edaee6), U64(0x92722c851482353b),
323
    U64(0xa2bfe8a14cf10364), U64(0xa81a664bbc423001),
324
    U64(0xc24b8b70d0f89791), U64(0xc76c51a30654be30),
325
    U64(0xd192e819d6ef5218), U64(0xd69906245565a910),
326
    U64(0xf40e35855771202a), U64(0x106aa07032bbd1b8),
327
    U64(0x19a4c116b8d2d0c8), U64(0x1e376c085141ab53),
328
    U64(0x2748774cdf8eeb99), U64(0x34b0bcb5e19b48a8),
329
    U64(0x391c0cb3c5c95a63), U64(0x4ed8aa4ae3418acb),
330
    U64(0x5b9cca4f7763e373), U64(0x682e6ff3d6b2b8a3),
331
    U64(0x748f82ee5defb2fc), U64(0x78a5636f43172f60),
332
    U64(0x84c87814a1f0ab72), U64(0x8cc702081a6439ec),
333
    U64(0x90befffa23631e28), U64(0xa4506cebde82bde9),
334
    U64(0xbef9a3f7b2c67915), U64(0xc67178f2e372532b),
335
    U64(0xca273eceea26619c), U64(0xd186b8c721c0c207),
336
    U64(0xeada7dd6cde0eb1e), U64(0xf57d4f7fee6ed178),
337
    U64(0x06f067aa72176fba), U64(0x0a637dc5a2c898a6),
338
    U64(0x113f9804bef90dae), U64(0x1b710b35131c471b),
339
    U64(0x28db77f523047d84), U64(0x32caab7b40c72493),
340
    U64(0x3c9ebe0a15c9bebc), U64(0x431d67c49c100d4c),
341
    U64(0x4cc5d4becb3e42b6), U64(0x597f299cfc657e2a),
342
    U64(0x5fcb6fab3ad6faec), U64(0x6c44198c4a475817)
343
};
344
345
#ifndef PEDANTIC
346
#if defined(__GNUC__) && __GNUC__ >= 2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
347
#if defined(__x86_64) || defined(__x86_64__)
348
#define ROTR(a, n) ({ SHA_LONG64 ret;             \
349
                                asm ("rorq %1,%0"       \
350
                                : "=r"(ret)             \
351
                                : "J"(n),"0"(a)         \
352
                                : "cc"); ret; })
353
#if !defined(B_ENDIAN)
354
#define PULL64(x) ({ SHA_LONG64 ret=*((const SHA_LONG64 *)(&(x)));  \
355
                                asm ("bswapq    %0"             \
356
                                : "=r"(ret)                     \
357
                                : "0"(ret)); ret; })
358
#endif
359
#elif (defined(__i386) || defined(__i386__)) && !defined(B_ENDIAN)
360
#if defined(I386_ONLY)
361
#define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
362
                          unsigned int hi=p[0],lo=p[1];          \
363
                                asm("xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
364
                                    "roll $16,%%eax; roll $16,%%edx; "\
365
                                    "xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
366
                                : "=a"(lo),"=d"(hi)             \
367
                                : "0"(lo),"1"(hi) : "cc");      \
368
                                ((SHA_LONG64)hi)<<32|lo; })
369
#else
370
#define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
371
                          unsigned int hi=p[0],lo=p[1];         \
372
                                asm ("bswapl %0; bswapl %1;"    \
373
                                : "=r"(lo),"=r"(hi)             \
374
                                : "0"(lo),"1"(hi));             \
375
                                ((SHA_LONG64)hi)<<32|lo; })
376
#endif
377
#elif (defined(_ARCH_PPC) && defined(__64BIT__)) || defined(_ARCH_PPC64)
378
#define ROTR(a, n) ({ SHA_LONG64 ret;             \
379
                                asm ("rotrdi %0,%1,%2"  \
380
                                : "=r"(ret)             \
381
                                : "r"(a),"K"(n)); ret; })
382
#elif defined(__aarch64__)
383
#define ROTR(a, n) ({ SHA_LONG64 ret;             \
384
                                asm ("ror %0,%1,%2"     \
385
                                : "=r"(ret)             \
386
                                : "r"(a),"I"(n)); ret; })
387
#if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
388
#define PULL64(x) ({ SHA_LONG64 ret;                     \
389
                                asm ("rev       %0,%1"          \
390
                                : "=r"(ret)                     \
391
                                : "r"(*((const SHA_LONG64 *)(&(x))))); ret; })
392
#endif
393
#elif (defined(__riscv_zbkb) || defined(__riscv_zbb)) && __riscv_xlen == 32
394
#define PULL64(x) ({ SHA_LONG64 ret;                                        \
395
                        unsigned int *r = (unsigned int *)(&(ret));             \
396
                        const unsigned int *p = (const unsigned int *)(&(x));   \
397
                        asm ("rev8 %0, %1"                                      \
398
                        : "=r"(r[0])                                            \
399
                        : "r" (p[1]));                                          \
400
                        asm ("rev8 %0, %1"                                      \
401
                        : "=r"(r[1])                                            \
402
                        : "r" (p[0])); ret; })
403
#elif (defined(__riscv_zbkb) || defined(__riscv_zbb)) && __riscv_xlen == 64
404
#define PULL64(x) ({ SHA_LONG64 ret;    \
405
                        asm ("rev8 %0, %1"  \
406
                        : "=r"(ret)         \
407
                        : "r"(x)); ret; })
408
#endif
409
#if defined(__riscv_zknh) && __riscv_xlen == 32
410
#define Sigma0(x) ({ SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret));    \
411
                        const unsigned int *p = (const unsigned int *)(&(x));           \
412
                        asm ("sha512sum0r %0, %1, %2"                                   \
413
                        : "=r"(r[0])                                                    \
414
                        : "r" (p[0]), "r" (p[1]));                                      \
415
                        asm ("sha512sum0r %0, %2, %1"                                   \
416
                        : "=r"(r[1])                                                    \
417
                        : "r" (p[0]), "r" (p[1])); ret; })
418
#define Sigma1(x) ({ SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret));    \
419
                        const unsigned int *p = (const unsigned int *)(&(x));           \
420
                        asm ("sha512sum1r %0, %1, %2"                                   \
421
                        : "=r"(r[0])                                                    \
422
                        : "r" (p[0]), "r" (p[1]));                                      \
423
                        asm ("sha512sum1r %0, %2, %1"                                   \
424
                        : "=r"(r[1])                                                    \
425
                        : "r" (p[0]), "r" (p[1])); ret; })
426
#define sigma0(x) ({ SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret));    \
427
                        const unsigned int *p = (const unsigned int *)(&(x));           \
428
                        asm ("sha512sig0l %0, %1, %2"                                   \
429
                        : "=r"(r[0])                                                    \
430
                        : "r" (p[0]), "r" (p[1]));                                      \
431
                        asm ("sha512sig0h %0, %2, %1"                                   \
432
                        : "=r"(r[1])                                                    \
433
                        : "r" (p[0]), "r" (p[1])); ret; })
434
#define sigma1(x) ({ SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret));    \
435
                        const unsigned int *p = (const unsigned int *)(&(x));           \
436
                        asm ("sha512sig1l %0, %1, %2"                                   \
437
                        : "=r"(r[0])                                                    \
438
                        : "r" (p[0]), "r" (p[1]));                                      \
439
                        asm ("sha512sig1h %0, %2, %1"                                   \
440
                        : "=r"(r[1])                                                    \
441
                        : "r" (p[0]), "r" (p[1])); ret; })
442
#elif defined(__riscv_zknh) && __riscv_xlen == 64
443
#define Sigma0(x) ({ SHA_LONG64 ret;            \
444
                        asm ("sha512sum0 %0, %1"    \
445
                        : "=r"(ret)                 \
446
                        : "r"(x)); ret; })
447
#define Sigma1(x) ({ SHA_LONG64 ret;            \
448
                        asm ("sha512sum1 %0, %1"    \
449
                        : "=r"(ret)                 \
450
                        : "r"(x)); ret; })
451
#define sigma0(x) ({ SHA_LONG64 ret;            \
452
                        asm ("sha512sig0 %0, %1"    \
453
                        : "=r"(ret)                 \
454
                        : "r"(x)); ret; })
455
#define sigma1(x) ({ SHA_LONG64 ret;            \
456
                        asm ("sha512sig1 %0, %1"    \
457
                        : "=r"(ret)                 \
458
                        : "r"(x)); ret; })
459
#endif
460
#if (defined(__riscv_zbt) || defined(__riscv_zpn)) && __riscv_xlen == 32
461
#define Ch(x, y, z) ({  SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret));   \
462
                        const unsigned int *xp = (const unsigned int *)(&(x));          \
463
                        const unsigned int *yp = (const unsigned int *)(&(y));          \
464
                        const unsigned int *zp = (const unsigned int *)(&(z));          \
465
                        asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3\n\t"                \
466
                        : "=r"(r[0])                                                    \
467
                        : "r"(xp[0]), "r"(yp[0]), "r"(zp[0]));                          \
468
                        asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3\n\t"                \
469
                        : "=r"(r[1])                                                    \
470
                        : "r"(xp[1]), "r"(yp[1]), "r"(zp[1])); ret; })
471
#define Maj(x, y, z) ({ SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret));   \
472
                        const unsigned int *xp = (const unsigned int *)(&(x));          \
473
                        const unsigned int *yp = (const unsigned int *)(&(y));          \
474
                        const unsigned int *zp = (const unsigned int *)(&(z));          \
475
                        asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3\n\t"                \
476
                        : "=r"(r[0])                                                    \
477
                        : "r"(xp[0]^zp[0]), "r"(yp[0]), "r"(zp[0]));                    \
478
                        asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3\n\t"                \
479
                        : "=r"(r[1])                                                    \
480
                        : "r"(xp[1]^zp[1]), "r"(yp[1]), "r"(zp[1])); ret; })
481
#elif (defined(__riscv_zbt) || defined(__riscv_zpn)) && __riscv_xlen == 64
482
#define Ch(x, y, z) ({  SHA_LONG64 ret;                           \
483
                        asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3"\
484
                        : "=r"(ret)                                 \
485
                        : "r"(x), "r"(y), "r"(z)); ret; })
486
#define Maj(x, y, z) ({ SHA_LONG64 ret;                           \
487
                        asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3"\
488
                        : "=r"(ret)                                 \
489
                        : "r"(x^z), "r"(y), "r"(x)); ret; })
490
#endif
491
#elif defined(_MSC_VER)
492
#if defined(_WIN64) /* applies to both IA-64 and AMD64 */
493
#pragma intrinsic(_rotr64)
494
#define ROTR(a, n) _rotr64((a), n)
495
#endif
496
#if defined(_M_IX86) && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
497
#if defined(I386_ONLY)
498
static SHA_LONG64 __fastcall __pull64be(const void *x)
499
{
500
    _asm mov edx, [ecx + 0] _asm mov eax, [ecx + 4] _asm xchg dh, dl _asm xchg ah, al _asm rol edx, 16 _asm rol eax, 16 _asm xchg dh, dl _asm xchg ah, al
501
}
502
#else
503
static SHA_LONG64 __fastcall __pull64be(const void *x) {
504
    _asm mov edx, [ecx + 0] _asm mov eax, [ecx + 4] _asm bswap edx _asm bswap eax
505
}
506
#endif
507
#define PULL64(x) __pull64be(&(x))
508
#endif
509
#endif
510
#endif
511
#ifndef PULL64
512
426M
#define B(x, j) (((SHA_LONG64)(*(((const unsigned char *)(&x)) + j))) << ((7 - j) * 8))
513
53.3M
#define PULL64(x) (B(x, 0) | B(x, 1) | B(x, 2) | B(x, 3) | B(x, 4) | B(x, 5) | B(x, 6) | B(x, 7))
514
#endif
515
#ifndef ROTR
516
2.45G
#define ROTR(x, s) (((x) >> s) | (x) << (64 - s))
517
#endif
518
#ifndef Sigma0
519
266M
#define Sigma0(x) (ROTR((x), 28) ^ ROTR((x), 34) ^ ROTR((x), 39))
520
#endif
521
#ifndef Sigma1
522
266M
#define Sigma1(x) (ROTR((x), 14) ^ ROTR((x), 18) ^ ROTR((x), 41))
523
#endif
524
#ifndef sigma0
525
213M
#define sigma0(x) (ROTR((x), 1) ^ ROTR((x), 8) ^ ((x) >> 7))
526
#endif
527
#ifndef sigma1
528
213M
#define sigma1(x) (ROTR((x), 19) ^ ROTR((x), 61) ^ ((x) >> 6))
529
#endif
530
#ifndef Ch
531
266M
#define Ch(x, y, z) (((x) & (y)) ^ ((~(x)) & (z)))
532
#endif
533
#ifndef Maj
534
266M
#define Maj(x, y, z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
535
#endif
536
537
#if defined(__i386) || defined(__i386__) || defined(_M_IX86)
538
/*
539
 * This code should give better results on 32-bit CPU with less than
540
 * ~24 registers, both size and performance wise...
541
 */
542
543
static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
544
    size_t num)
545
{
546
    const SHA_LONG64 *W = in;
547
    SHA_LONG64 A, E, T;
548
    SHA_LONG64 X[9 + 80], *F;
549
    int i;
550
551
    while (num--) {
552
553
        F = X + 80;
554
        A = ctx->h[0];
555
        F[1] = ctx->h[1];
556
        F[2] = ctx->h[2];
557
        F[3] = ctx->h[3];
558
        E = ctx->h[4];
559
        F[5] = ctx->h[5];
560
        F[6] = ctx->h[6];
561
        F[7] = ctx->h[7];
562
563
        for (i = 0; i < 16; i++, F--) {
564
#ifdef B_ENDIAN
565
            T = W[i];
566
#else
567
            T = PULL64(W[i]);
568
#endif
569
            F[0] = A;
570
            F[4] = E;
571
            F[8] = T;
572
            T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
573
            E = F[3] + T;
574
            A = T + Sigma0(A) + Maj(A, F[1], F[2]);
575
        }
576
577
        for (; i < 80; i++, F--) {
578
            T = sigma0(F[8 + 16 - 1]);
579
            T += sigma1(F[8 + 16 - 14]);
580
            T += F[8 + 16] + F[8 + 16 - 9];
581
582
            F[0] = A;
583
            F[4] = E;
584
            F[8] = T;
585
            T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
586
            E = F[3] + T;
587
            A = T + Sigma0(A) + Maj(A, F[1], F[2]);
588
        }
589
590
        ctx->h[0] += A;
591
        ctx->h[1] += F[1];
592
        ctx->h[2] += F[2];
593
        ctx->h[3] += F[3];
594
        ctx->h[4] += E;
595
        ctx->h[5] += F[5];
596
        ctx->h[6] += F[6];
597
        ctx->h[7] += F[7];
598
599
        W += SHA_LBLOCK;
600
    }
601
}
602
603
#elif defined(OPENSSL_SMALL_FOOTPRINT)
604
605
static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
606
    size_t num)
607
{
608
    const SHA_LONG64 *W = in;
609
    SHA_LONG64 a, b, c, d, e, f, g, h, s0, s1, T1, T2;
610
    SHA_LONG64 X[16];
611
    int i;
612
613
    while (num--) {
614
615
        a = ctx->h[0];
616
        b = ctx->h[1];
617
        c = ctx->h[2];
618
        d = ctx->h[3];
619
        e = ctx->h[4];
620
        f = ctx->h[5];
621
        g = ctx->h[6];
622
        h = ctx->h[7];
623
624
        for (i = 0; i < 16; i++) {
625
#ifdef B_ENDIAN
626
            T1 = X[i] = W[i];
627
#else
628
            T1 = X[i] = PULL64(W[i]);
629
#endif
630
            T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i];
631
            T2 = Sigma0(a) + Maj(a, b, c);
632
            h = g;
633
            g = f;
634
            f = e;
635
            e = d + T1;
636
            d = c;
637
            c = b;
638
            b = a;
639
            a = T1 + T2;
640
        }
641
642
        for (; i < 80; i++) {
643
            s0 = X[(i + 1) & 0x0f];
644
            s0 = sigma0(s0);
645
            s1 = X[(i + 14) & 0x0f];
646
            s1 = sigma1(s1);
647
648
            T1 = X[i & 0xf] += s0 + s1 + X[(i + 9) & 0xf];
649
            T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i];
650
            T2 = Sigma0(a) + Maj(a, b, c);
651
            h = g;
652
            g = f;
653
            f = e;
654
            e = d + T1;
655
            d = c;
656
            c = b;
657
            b = a;
658
            a = T1 + T2;
659
        }
660
661
        ctx->h[0] += a;
662
        ctx->h[1] += b;
663
        ctx->h[2] += c;
664
        ctx->h[3] += d;
665
        ctx->h[4] += e;
666
        ctx->h[5] += f;
667
        ctx->h[6] += g;
668
        ctx->h[7] += h;
669
670
        W += SHA_LBLOCK;
671
    }
672
}
673
674
#else
675
#define ROUND_00_15(i, a, b, c, d, e, f, g, h)       \
676
266M
    do {                                             \
677
266M
        T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i]; \
678
266M
        h = Sigma0(a) + Maj(a, b, c);                \
679
266M
        d += T1;                                     \
680
266M
        h += T1;                                     \
681
266M
    } while (0)
682
683
#define ROUND_16_80(i, j, a, b, c, d, e, f, g, h, X)       \
684
213M
    do {                                                   \
685
213M
        s0 = X[(j + 1) & 0x0f];                            \
686
213M
        s0 = sigma0(s0);                                   \
687
213M
        s1 = X[(j + 14) & 0x0f];                           \
688
213M
        s1 = sigma1(s1);                                   \
689
213M
        T1 = X[(j) & 0x0f] += s0 + s1 + X[(j + 9) & 0x0f]; \
690
213M
        ROUND_00_15(i + j, a, b, c, d, e, f, g, h);        \
691
213M
    } while (0)
692
693
#ifdef INCLUDE_C_SHA512
694
void sha512_block_data_order_c(SHA512_CTX *ctx, const void *in, size_t num)
695
#else
696
static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
697
    size_t num)
698
#endif
699
9.98k
{
700
9.98k
    const SHA_LONG64 *W = in;
701
9.98k
    SHA_LONG64 a, b, c, d, e, f, g, h, s0, s1, T1;
702
9.98k
    SHA_LONG64 X[16];
703
9.98k
    int i;
704
705
3.34M
    while (num--) {
706
707
3.33M
        a = ctx->h[0];
708
3.33M
        b = ctx->h[1];
709
3.33M
        c = ctx->h[2];
710
3.33M
        d = ctx->h[3];
711
3.33M
        e = ctx->h[4];
712
3.33M
        f = ctx->h[5];
713
3.33M
        g = ctx->h[6];
714
3.33M
        h = ctx->h[7];
715
716
#ifdef B_ENDIAN
717
        T1 = X[0] = W[0];
718
        ROUND_00_15(0, a, b, c, d, e, f, g, h);
719
        T1 = X[1] = W[1];
720
        ROUND_00_15(1, h, a, b, c, d, e, f, g);
721
        T1 = X[2] = W[2];
722
        ROUND_00_15(2, g, h, a, b, c, d, e, f);
723
        T1 = X[3] = W[3];
724
        ROUND_00_15(3, f, g, h, a, b, c, d, e);
725
        T1 = X[4] = W[4];
726
        ROUND_00_15(4, e, f, g, h, a, b, c, d);
727
        T1 = X[5] = W[5];
728
        ROUND_00_15(5, d, e, f, g, h, a, b, c);
729
        T1 = X[6] = W[6];
730
        ROUND_00_15(6, c, d, e, f, g, h, a, b);
731
        T1 = X[7] = W[7];
732
        ROUND_00_15(7, b, c, d, e, f, g, h, a);
733
        T1 = X[8] = W[8];
734
        ROUND_00_15(8, a, b, c, d, e, f, g, h);
735
        T1 = X[9] = W[9];
736
        ROUND_00_15(9, h, a, b, c, d, e, f, g);
737
        T1 = X[10] = W[10];
738
        ROUND_00_15(10, g, h, a, b, c, d, e, f);
739
        T1 = X[11] = W[11];
740
        ROUND_00_15(11, f, g, h, a, b, c, d, e);
741
        T1 = X[12] = W[12];
742
        ROUND_00_15(12, e, f, g, h, a, b, c, d);
743
        T1 = X[13] = W[13];
744
        ROUND_00_15(13, d, e, f, g, h, a, b, c);
745
        T1 = X[14] = W[14];
746
        ROUND_00_15(14, c, d, e, f, g, h, a, b);
747
        T1 = X[15] = W[15];
748
        ROUND_00_15(15, b, c, d, e, f, g, h, a);
749
#else
750
3.33M
        T1 = X[0] = PULL64(W[0]);
751
3.33M
        ROUND_00_15(0, a, b, c, d, e, f, g, h);
752
3.33M
        T1 = X[1] = PULL64(W[1]);
753
3.33M
        ROUND_00_15(1, h, a, b, c, d, e, f, g);
754
3.33M
        T1 = X[2] = PULL64(W[2]);
755
3.33M
        ROUND_00_15(2, g, h, a, b, c, d, e, f);
756
3.33M
        T1 = X[3] = PULL64(W[3]);
757
3.33M
        ROUND_00_15(3, f, g, h, a, b, c, d, e);
758
3.33M
        T1 = X[4] = PULL64(W[4]);
759
3.33M
        ROUND_00_15(4, e, f, g, h, a, b, c, d);
760
3.33M
        T1 = X[5] = PULL64(W[5]);
761
3.33M
        ROUND_00_15(5, d, e, f, g, h, a, b, c);
762
3.33M
        T1 = X[6] = PULL64(W[6]);
763
3.33M
        ROUND_00_15(6, c, d, e, f, g, h, a, b);
764
3.33M
        T1 = X[7] = PULL64(W[7]);
765
3.33M
        ROUND_00_15(7, b, c, d, e, f, g, h, a);
766
3.33M
        T1 = X[8] = PULL64(W[8]);
767
3.33M
        ROUND_00_15(8, a, b, c, d, e, f, g, h);
768
3.33M
        T1 = X[9] = PULL64(W[9]);
769
3.33M
        ROUND_00_15(9, h, a, b, c, d, e, f, g);
770
3.33M
        T1 = X[10] = PULL64(W[10]);
771
3.33M
        ROUND_00_15(10, g, h, a, b, c, d, e, f);
772
3.33M
        T1 = X[11] = PULL64(W[11]);
773
3.33M
        ROUND_00_15(11, f, g, h, a, b, c, d, e);
774
3.33M
        T1 = X[12] = PULL64(W[12]);
775
3.33M
        ROUND_00_15(12, e, f, g, h, a, b, c, d);
776
3.33M
        T1 = X[13] = PULL64(W[13]);
777
3.33M
        ROUND_00_15(13, d, e, f, g, h, a, b, c);
778
3.33M
        T1 = X[14] = PULL64(W[14]);
779
3.33M
        ROUND_00_15(14, c, d, e, f, g, h, a, b);
780
3.33M
        T1 = X[15] = PULL64(W[15]);
781
3.33M
        ROUND_00_15(15, b, c, d, e, f, g, h, a);
782
3.33M
#endif
783
784
16.6M
        for (i = 16; i < 80; i += 16) {
785
13.3M
            ROUND_16_80(i, 0, a, b, c, d, e, f, g, h, X);
786
13.3M
            ROUND_16_80(i, 1, h, a, b, c, d, e, f, g, X);
787
13.3M
            ROUND_16_80(i, 2, g, h, a, b, c, d, e, f, X);
788
13.3M
            ROUND_16_80(i, 3, f, g, h, a, b, c, d, e, X);
789
13.3M
            ROUND_16_80(i, 4, e, f, g, h, a, b, c, d, X);
790
13.3M
            ROUND_16_80(i, 5, d, e, f, g, h, a, b, c, X);
791
13.3M
            ROUND_16_80(i, 6, c, d, e, f, g, h, a, b, X);
792
13.3M
            ROUND_16_80(i, 7, b, c, d, e, f, g, h, a, X);
793
13.3M
            ROUND_16_80(i, 8, a, b, c, d, e, f, g, h, X);
794
13.3M
            ROUND_16_80(i, 9, h, a, b, c, d, e, f, g, X);
795
13.3M
            ROUND_16_80(i, 10, g, h, a, b, c, d, e, f, X);
796
13.3M
            ROUND_16_80(i, 11, f, g, h, a, b, c, d, e, X);
797
13.3M
            ROUND_16_80(i, 12, e, f, g, h, a, b, c, d, X);
798
13.3M
            ROUND_16_80(i, 13, d, e, f, g, h, a, b, c, X);
799
13.3M
            ROUND_16_80(i, 14, c, d, e, f, g, h, a, b, X);
800
13.3M
            ROUND_16_80(i, 15, b, c, d, e, f, g, h, a, X);
801
13.3M
        }
802
803
3.33M
        ctx->h[0] += a;
804
3.33M
        ctx->h[1] += b;
805
3.33M
        ctx->h[2] += c;
806
3.33M
        ctx->h[3] += d;
807
3.33M
        ctx->h[4] += e;
808
3.33M
        ctx->h[5] += f;
809
3.33M
        ctx->h[6] += g;
810
3.33M
        ctx->h[7] += h;
811
812
3.33M
        W += SHA_LBLOCK;
813
3.33M
    }
814
9.98k
}
815
816
#endif
817
818
#endif /* SHA512_ASM */