Coverage Report

Created: 2026-03-09 06:55

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/openssl/crypto/sha/sha512.c
Line
Count
Source
1
/*
2
 * Copyright 2004-2024 The OpenSSL Project Authors. All Rights Reserved.
3
 *
4
 * Licensed under the Apache License 2.0 (the "License").  You may not use
5
 * this file except in compliance with the License.  You can obtain a copy
6
 * in the file LICENSE in the source distribution or at
7
 * https://www.openssl.org/source/license.html
8
 */
9
10
/*
11
 * SHA512 low level APIs are deprecated for public use, but still ok for
12
 * internal use.
13
 */
14
#include "internal/deprecated.h"
15
16
#include <stdio.h>
17
#include <openssl/opensslconf.h>
18
#include <openssl/byteorder.h>
19
/*-
20
 * IMPLEMENTATION NOTES.
21
 *
22
 * As you might have noticed, 32-bit hash algorithms:
23
 *
24
 * - permit SHA_LONG to be wider than 32-bit
25
 * - optimized versions implement two transform functions: one operating
26
 *   on [aligned] data in host byte order, and one operating on data in input
27
 *   stream byte order;
28
 * - share common byte-order neutral collector and padding function
29
 *   implementations, crypto/md32_common.h;
30
 *
31
 * Neither of the above applies to this SHA-512 implementation. Reasons
32
 * [in reverse order] are:
33
 *
34
 * - it's the only 64-bit hash algorithm for the moment of this writing,
35
 *   there is no need for common collector/padding implementation [yet];
36
 * - by supporting only one transform function [which operates on
37
 *   *aligned* data in input stream byte order, big-endian in this case]
38
 *   we minimize burden of maintenance in two ways: a) collector/padding
39
 *   function is simpler; b) only one transform function to stare at;
40
 * - SHA_LONG64 is required to be exactly 64-bit in order to be able to
41
 *   apply a number of optimizations to mitigate potential performance
42
 *   penalties caused by previous design decision;
43
 *
44
 * Caveat lector.
45
 *
46
 * Implementation relies on the fact that "long long" is 64-bit on
47
 * both 32- and 64-bit platforms. If some compiler vendor comes up
48
 * with 128-bit long long, adjustment to sha.h would be required.
49
 * As this implementation relies on 64-bit integer type, it's totally
50
 * inappropriate for platforms which don't support it, most notably
51
 * 16-bit platforms.
52
 */
53
#include <stdlib.h>
54
#include <string.h>
55
56
#include <openssl/crypto.h>
57
#include <openssl/sha.h>
58
#include <openssl/opensslv.h>
59
60
#include "internal/cryptlib.h"
61
#include "crypto/sha.h"
62
63
#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || defined(__x86_64) || defined(_M_AMD64) || defined(_M_X64) || defined(__s390__) || defined(__s390x__) || defined(__aarch64__) || defined(SHA512_ASM)
64
#define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
65
#endif
66
67
#if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32__)
68
#define U64(C) C##UI64
69
#elif defined(__arch64__)
70
#define U64(C) C##UL
71
#else
72
656
#define U64(C) C##ULL
73
#endif
74
75
int SHA512_Update_thunk(void *cp, const unsigned char *data, size_t len);
76
77
int sha512_224_init(SHA512_CTX *c)
78
0
{
79
0
    c->h[0] = U64(0x8c3d37c819544da2);
80
0
    c->h[1] = U64(0x73e1996689dcd4d6);
81
0
    c->h[2] = U64(0x1dfab7ae32ff9c82);
82
0
    c->h[3] = U64(0x679dd514582f9fcf);
83
0
    c->h[4] = U64(0x0f6d2b697bd44da8);
84
0
    c->h[5] = U64(0x77e36f7304c48942);
85
0
    c->h[6] = U64(0x3f9d85a86a1d36c8);
86
0
    c->h[7] = U64(0x1112e6ad91d692a1);
87
88
0
    c->Nl = 0;
89
0
    c->Nh = 0;
90
0
    c->num = 0;
91
0
    c->md_len = SHA224_DIGEST_LENGTH;
92
0
    return 1;
93
0
}
94
95
int sha512_256_init(SHA512_CTX *c)
96
0
{
97
0
    c->h[0] = U64(0x22312194fc2bf72c);
98
0
    c->h[1] = U64(0x9f555fa3c84c64c2);
99
0
    c->h[2] = U64(0x2393b86b6f53b151);
100
0
    c->h[3] = U64(0x963877195940eabd);
101
0
    c->h[4] = U64(0x96283ee2a88effe3);
102
0
    c->h[5] = U64(0xbe5e1e2553863992);
103
0
    c->h[6] = U64(0x2b0199fc2c85b8aa);
104
0
    c->h[7] = U64(0x0eb72ddc81c52ca2);
105
106
0
    c->Nl = 0;
107
0
    c->Nh = 0;
108
0
    c->num = 0;
109
0
    c->md_len = SHA256_DIGEST_LENGTH;
110
0
    return 1;
111
0
}
112
113
int SHA384_Init(SHA512_CTX *c)
114
0
{
115
0
    c->h[0] = U64(0xcbbb9d5dc1059ed8);
116
0
    c->h[1] = U64(0x629a292a367cd507);
117
0
    c->h[2] = U64(0x9159015a3070dd17);
118
0
    c->h[3] = U64(0x152fecd8f70e5939);
119
0
    c->h[4] = U64(0x67332667ffc00b31);
120
0
    c->h[5] = U64(0x8eb44a8768581511);
121
0
    c->h[6] = U64(0xdb0c2e0d64f98fa7);
122
0
    c->h[7] = U64(0x47b5481dbefa4fa4);
123
124
0
    c->Nl = 0;
125
0
    c->Nh = 0;
126
0
    c->num = 0;
127
0
    c->md_len = SHA384_DIGEST_LENGTH;
128
0
    return 1;
129
0
}
130
131
int SHA512_Init(SHA512_CTX *c)
132
64
{
133
64
    c->h[0] = U64(0x6a09e667f3bcc908);
134
64
    c->h[1] = U64(0xbb67ae8584caa73b);
135
64
    c->h[2] = U64(0x3c6ef372fe94f82b);
136
64
    c->h[3] = U64(0xa54ff53a5f1d36f1);
137
64
    c->h[4] = U64(0x510e527fade682d1);
138
64
    c->h[5] = U64(0x9b05688c2b3e6c1f);
139
64
    c->h[6] = U64(0x1f83d9abfb41bd6b);
140
64
    c->h[7] = U64(0x5be0cd19137e2179);
141
142
64
    c->Nl = 0;
143
64
    c->Nh = 0;
144
64
    c->num = 0;
145
64
    c->md_len = SHA512_DIGEST_LENGTH;
146
64
    return 1;
147
64
}
148
149
#ifndef SHA512_ASM
150
static
151
#else
152
#ifdef INCLUDE_C_SHA512
153
void sha512_block_data_order_c(SHA512_CTX *ctx, const void *in, size_t num);
154
#endif
155
#endif
156
    void sha512_block_data_order(SHA512_CTX *ctx, const void *in, size_t num);
157
158
#define OUTPUT_RESULT(md, len)      \
159
576
    for (n = 0; n < (len / 8); n++) \
160
512
    md = OPENSSL_store_u64_be(md, (uint64_t)c->h[n])
161
162
int SHA512_Final(unsigned char *out, SHA512_CTX *c)
163
64
{
164
64
    unsigned char *p = (unsigned char *)c->u.p;
165
64
    size_t n = c->num;
166
167
64
    p[n] = 0x80; /* There always is a room for one */
168
64
    n++;
169
64
    if (n > (sizeof(c->u) - 16)) {
170
0
        memset(p + n, 0, sizeof(c->u) - n);
171
0
        n = 0;
172
0
        sha512_block_data_order(c, p, 1);
173
0
    }
174
175
64
    memset(p + n, 0, sizeof(c->u) - 16 - n);
176
#ifdef B_ENDIAN
177
    c->u.d[SHA_LBLOCK - 2] = c->Nh;
178
    c->u.d[SHA_LBLOCK - 1] = c->Nl;
179
#else
180
64
    uint8_t *cu = p + sizeof(c->u) - 16;
181
182
64
    cu = OPENSSL_store_u64_be(cu, (uint64_t)c->Nh);
183
64
    cu = OPENSSL_store_u64_be(cu, (uint64_t)c->Nl);
184
64
#endif
185
186
64
    sha512_block_data_order(c, p, 1);
187
188
64
    if (out == NULL)
189
0
        return 0;
190
191
    /* Let compiler decide if it's appropriate to unroll... */
192
64
    switch (c->md_len) {
193
0
    case SHA256_192_DIGEST_LENGTH:
194
0
        OUTPUT_RESULT(out, SHA256_192_DIGEST_LENGTH);
195
0
        break;
196
0
    case SHA256_DIGEST_LENGTH:
197
0
        OUTPUT_RESULT(out, SHA256_DIGEST_LENGTH);
198
0
        break;
199
0
    case SHA384_DIGEST_LENGTH:
200
0
        OUTPUT_RESULT(out, SHA384_DIGEST_LENGTH);
201
0
        break;
202
64
    case SHA512_DIGEST_LENGTH:
203
64
        OUTPUT_RESULT(out, SHA512_DIGEST_LENGTH);
204
64
        break;
205
0
    case SHA224_DIGEST_LENGTH: {
206
0
        OUTPUT_RESULT(out, SHA224_DIGEST_LENGTH);
207
        /*
208
         * For 224 bits, there are four bytes left over that have to be
209
         * processed separately.
210
         */
211
0
        {
212
0
            SHA_LONG64 t = c->h[SHA224_DIGEST_LENGTH / 8];
213
214
0
            *(out++) = (unsigned char)(t >> 56);
215
0
            *(out++) = (unsigned char)(t >> 48);
216
0
            *(out++) = (unsigned char)(t >> 40);
217
0
            *(out++) = (unsigned char)(t >> 32);
218
0
        }
219
0
        break;
220
0
    }
221
    /* ... as well as make sure md_len is not abused. */
222
0
    default:
223
0
        return 0;
224
64
    }
225
226
64
    return 1;
227
64
}
228
229
int SHA384_Final(unsigned char *md, SHA512_CTX *c)
230
0
{
231
0
    return SHA512_Final(md, c);
232
0
}
233
234
int SHA512_Update_thunk(void *cp, const unsigned char *data, size_t len)
235
144
{
236
144
    SHA512_CTX *c = (SHA512_CTX *)cp;
237
144
    SHA_LONG64 l;
238
144
    unsigned char *p = c->u.p;
239
240
144
    if (len == 0)
241
0
        return 1;
242
243
144
    l = (c->Nl + (((SHA_LONG64)len) << 3)) & U64(0xffffffffffffffff);
244
144
    if (l < c->Nl)
245
0
        c->Nh++;
246
144
    if (sizeof(len) >= 8)
247
144
        c->Nh += (((SHA_LONG64)len) >> 61);
248
144
    c->Nl = l;
249
250
144
    if (c->num != 0) {
251
80
        size_t n = sizeof(c->u) - c->num;
252
253
80
        if (len < n) {
254
80
            memcpy(p + c->num, data, len), c->num += (unsigned int)len;
255
80
            return 1;
256
80
        } else {
257
0
            memcpy(p + c->num, data, n), c->num = 0;
258
0
            len -= n, data += n;
259
0
            sha512_block_data_order(c, p, 1);
260
0
        }
261
80
    }
262
263
64
    if (len >= sizeof(c->u)) {
264
#ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
265
        if ((size_t)data % sizeof(c->u.d[0]) != 0)
266
            while (len >= sizeof(c->u))
267
                memcpy(p, data, sizeof(c->u)),
268
                    sha512_block_data_order(c, p, 1),
269
                    len -= sizeof(c->u), data += sizeof(c->u);
270
        else
271
#endif
272
0
            sha512_block_data_order(c, data, len / sizeof(c->u)),
273
0
                data += len, len %= sizeof(c->u), data -= len;
274
0
    }
275
276
64
    if (len != 0)
277
64
        memcpy(p, data, len), c->num = (int)len;
278
279
64
    return 1;
280
144
}
281
282
int SHA512_Update(SHA512_CTX *c, const void *_data, size_t len)
283
128
{
284
128
    return SHA512_Update_thunk((void *)c, (const unsigned char *)_data, len);
285
128
}
286
287
int SHA384_Update(SHA512_CTX *c, const void *data, size_t len)
288
0
{
289
0
    return SHA512_Update_thunk((void *)c, (const unsigned char *)data, len);
290
0
}
291
292
void SHA512_Transform(SHA512_CTX *c, const unsigned char *data)
293
0
{
294
#ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
295
    if ((size_t)data % sizeof(c->u.d[0]) != 0)
296
        memcpy(c->u.p, data, sizeof(c->u.p)), data = c->u.p;
297
#endif
298
0
    sha512_block_data_order(c, data, 1);
299
0
}
300
301
#if !defined(SHA512_ASM) || defined(INCLUDE_C_SHA512)
302
static const SHA_LONG64 K512[80] = {
303
    U64(0x428a2f98d728ae22), U64(0x7137449123ef65cd),
304
    U64(0xb5c0fbcfec4d3b2f), U64(0xe9b5dba58189dbbc),
305
    U64(0x3956c25bf348b538), U64(0x59f111f1b605d019),
306
    U64(0x923f82a4af194f9b), U64(0xab1c5ed5da6d8118),
307
    U64(0xd807aa98a3030242), U64(0x12835b0145706fbe),
308
    U64(0x243185be4ee4b28c), U64(0x550c7dc3d5ffb4e2),
309
    U64(0x72be5d74f27b896f), U64(0x80deb1fe3b1696b1),
310
    U64(0x9bdc06a725c71235), U64(0xc19bf174cf692694),
311
    U64(0xe49b69c19ef14ad2), U64(0xefbe4786384f25e3),
312
    U64(0x0fc19dc68b8cd5b5), U64(0x240ca1cc77ac9c65),
313
    U64(0x2de92c6f592b0275), U64(0x4a7484aa6ea6e483),
314
    U64(0x5cb0a9dcbd41fbd4), U64(0x76f988da831153b5),
315
    U64(0x983e5152ee66dfab), U64(0xa831c66d2db43210),
316
    U64(0xb00327c898fb213f), U64(0xbf597fc7beef0ee4),
317
    U64(0xc6e00bf33da88fc2), U64(0xd5a79147930aa725),
318
    U64(0x06ca6351e003826f), U64(0x142929670a0e6e70),
319
    U64(0x27b70a8546d22ffc), U64(0x2e1b21385c26c926),
320
    U64(0x4d2c6dfc5ac42aed), U64(0x53380d139d95b3df),
321
    U64(0x650a73548baf63de), U64(0x766a0abb3c77b2a8),
322
    U64(0x81c2c92e47edaee6), U64(0x92722c851482353b),
323
    U64(0xa2bfe8a14cf10364), U64(0xa81a664bbc423001),
324
    U64(0xc24b8b70d0f89791), U64(0xc76c51a30654be30),
325
    U64(0xd192e819d6ef5218), U64(0xd69906245565a910),
326
    U64(0xf40e35855771202a), U64(0x106aa07032bbd1b8),
327
    U64(0x19a4c116b8d2d0c8), U64(0x1e376c085141ab53),
328
    U64(0x2748774cdf8eeb99), U64(0x34b0bcb5e19b48a8),
329
    U64(0x391c0cb3c5c95a63), U64(0x4ed8aa4ae3418acb),
330
    U64(0x5b9cca4f7763e373), U64(0x682e6ff3d6b2b8a3),
331
    U64(0x748f82ee5defb2fc), U64(0x78a5636f43172f60),
332
    U64(0x84c87814a1f0ab72), U64(0x8cc702081a6439ec),
333
    U64(0x90befffa23631e28), U64(0xa4506cebde82bde9),
334
    U64(0xbef9a3f7b2c67915), U64(0xc67178f2e372532b),
335
    U64(0xca273eceea26619c), U64(0xd186b8c721c0c207),
336
    U64(0xeada7dd6cde0eb1e), U64(0xf57d4f7fee6ed178),
337
    U64(0x06f067aa72176fba), U64(0x0a637dc5a2c898a6),
338
    U64(0x113f9804bef90dae), U64(0x1b710b35131c471b),
339
    U64(0x28db77f523047d84), U64(0x32caab7b40c72493),
340
    U64(0x3c9ebe0a15c9bebc), U64(0x431d67c49c100d4c),
341
    U64(0x4cc5d4becb3e42b6), U64(0x597f299cfc657e2a),
342
    U64(0x5fcb6fab3ad6faec), U64(0x6c44198c4a475817)
343
};
344
345
#ifndef PEDANTIC
346
#if defined(__GNUC__) && __GNUC__ >= 2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
347
#if defined(__x86_64) || defined(__x86_64__)
348
#define ROTR(a, n) ({ SHA_LONG64 ret;             \
349
                                asm ("rorq %1,%0"       \
350
                                : "=r"(ret)             \
351
                                : "J"(n),"0"(a)         \
352
                                : "cc"); ret; })
353
#if !defined(B_ENDIAN)
354
#define PULL64(x) ({ SHA_LONG64 ret=*((const SHA_LONG64 *)(&(x)));  \
355
                                asm ("bswapq    %0"             \
356
                                : "=r"(ret)                     \
357
                                : "0"(ret)); ret; })
358
#endif
359
#elif (defined(__i386) || defined(__i386__)) && !defined(B_ENDIAN)
360
#if defined(I386_ONLY)
361
#define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
362
                          unsigned int hi=p[0],lo=p[1];          \
363
                                asm("xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
364
                                    "roll $16,%%eax; roll $16,%%edx; "\
365
                                    "xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
366
                                : "=a"(lo),"=d"(hi)             \
367
                                : "0"(lo),"1"(hi) : "cc");      \
368
                                ((SHA_LONG64)hi)<<32|lo; })
369
#else
370
#define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
371
                          unsigned int hi=p[0],lo=p[1];         \
372
                                asm ("bswapl %0; bswapl %1;"    \
373
                                : "=r"(lo),"=r"(hi)             \
374
                                : "0"(lo),"1"(hi));             \
375
                                ((SHA_LONG64)hi)<<32|lo; })
376
#endif
377
#elif (defined(_ARCH_PPC) && defined(__64BIT__)) || defined(_ARCH_PPC64)
378
#define ROTR(a, n) ({ SHA_LONG64 ret;             \
379
                                asm ("rotrdi %0,%1,%2"  \
380
                                : "=r"(ret)             \
381
                                : "r"(a),"K"(n)); ret; })
382
#elif defined(__aarch64__)
383
#define ROTR(a, n) ({ SHA_LONG64 ret;             \
384
                                asm ("ror %0,%1,%2"     \
385
                                : "=r"(ret)             \
386
                                : "r"(a),"I"(n)); ret; })
387
#if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
388
#define PULL64(x) ({ SHA_LONG64 ret;                     \
389
                                asm ("rev       %0,%1"          \
390
                                : "=r"(ret)                     \
391
                                : "r"(*((const SHA_LONG64 *)(&(x))))); ret; })
392
#endif
393
#elif (defined(__riscv_zbkb) || defined(__riscv_zbb)) && __riscv_xlen == 32
394
#define PULL64(x) ({ SHA_LONG64 ret;                                        \
395
                        unsigned int *r = (unsigned int *)(&(ret));             \
396
                        const unsigned int *p = (const unsigned int *)(&(x));   \
397
                        asm ("rev8 %0, %1"                                      \
398
                        : "=r"(r[0])                                            \
399
                        : "r" (p[1]));                                          \
400
                        asm ("rev8 %0, %1"                                      \
401
                        : "=r"(r[1])                                            \
402
                        : "r" (p[0])); ret; })
403
#elif (defined(__riscv_zbkb) || defined(__riscv_zbb)) && __riscv_xlen == 64
404
#define PULL64(x) ({ SHA_LONG64 ret;    \
405
                        asm ("rev8 %0, %1"  \
406
                        : "=r"(ret)         \
407
                        : "r"(x)); ret; })
408
#endif
409
#if defined(__riscv_zknh) && __riscv_xlen == 32
410
#define Sigma0(x) ({ SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret));    \
411
                        const unsigned int *p = (const unsigned int *)(&(x));           \
412
                        asm ("sha512sum0r %0, %1, %2"                                   \
413
                        : "=r"(r[0])                                                    \
414
                        : "r" (p[0]), "r" (p[1]));                                      \
415
                        asm ("sha512sum0r %0, %2, %1"                                   \
416
                        : "=r"(r[1])                                                    \
417
                        : "r" (p[0]), "r" (p[1])); ret; })
418
#define Sigma1(x) ({ SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret));    \
419
                        const unsigned int *p = (const unsigned int *)(&(x));           \
420
                        asm ("sha512sum1r %0, %1, %2"                                   \
421
                        : "=r"(r[0])                                                    \
422
                        : "r" (p[0]), "r" (p[1]));                                      \
423
                        asm ("sha512sum1r %0, %2, %1"                                   \
424
                        : "=r"(r[1])                                                    \
425
                        : "r" (p[0]), "r" (p[1])); ret; })
426
#define sigma0(x) ({ SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret));    \
427
                        const unsigned int *p = (const unsigned int *)(&(x));           \
428
                        asm ("sha512sig0l %0, %1, %2"                                   \
429
                        : "=r"(r[0])                                                    \
430
                        : "r" (p[0]), "r" (p[1]));                                      \
431
                        asm ("sha512sig0h %0, %2, %1"                                   \
432
                        : "=r"(r[1])                                                    \
433
                        : "r" (p[0]), "r" (p[1])); ret; })
434
#define sigma1(x) ({ SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret));    \
435
                        const unsigned int *p = (const unsigned int *)(&(x));           \
436
                        asm ("sha512sig1l %0, %1, %2"                                   \
437
                        : "=r"(r[0])                                                    \
438
                        : "r" (p[0]), "r" (p[1]));                                      \
439
                        asm ("sha512sig1h %0, %2, %1"                                   \
440
                        : "=r"(r[1])                                                    \
441
                        : "r" (p[0]), "r" (p[1])); ret; })
442
#elif defined(__riscv_zknh) && __riscv_xlen == 64
443
#define Sigma0(x) ({ SHA_LONG64 ret;            \
444
                        asm ("sha512sum0 %0, %1"    \
445
                        : "=r"(ret)                 \
446
                        : "r"(x)); ret; })
447
#define Sigma1(x) ({ SHA_LONG64 ret;            \
448
                        asm ("sha512sum1 %0, %1"    \
449
                        : "=r"(ret)                 \
450
                        : "r"(x)); ret; })
451
#define sigma0(x) ({ SHA_LONG64 ret;            \
452
                        asm ("sha512sig0 %0, %1"    \
453
                        : "=r"(ret)                 \
454
                        : "r"(x)); ret; })
455
#define sigma1(x) ({ SHA_LONG64 ret;            \
456
                        asm ("sha512sig1 %0, %1"    \
457
                        : "=r"(ret)                 \
458
                        : "r"(x)); ret; })
459
#endif
460
#if (defined(__riscv_zbt) || defined(__riscv_zpn)) && __riscv_xlen == 32
461
#define Ch(x, y, z) ({  SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret));   \
462
                        const unsigned int *xp = (const unsigned int *)(&(x));          \
463
                        const unsigned int *yp = (const unsigned int *)(&(y));          \
464
                        const unsigned int *zp = (const unsigned int *)(&(z));          \
465
                        asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3\n\t"                \
466
                        : "=r"(r[0])                                                    \
467
                        : "r"(xp[0]), "r"(yp[0]), "r"(zp[0]));                          \
468
                        asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3\n\t"                \
469
                        : "=r"(r[1])                                                    \
470
                        : "r"(xp[1]), "r"(yp[1]), "r"(zp[1])); ret; })
471
#define Maj(x, y, z) ({ SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret));   \
472
                        const unsigned int *xp = (const unsigned int *)(&(x));          \
473
                        const unsigned int *yp = (const unsigned int *)(&(y));          \
474
                        const unsigned int *zp = (const unsigned int *)(&(z));          \
475
                        asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3\n\t"                \
476
                        : "=r"(r[0])                                                    \
477
                        : "r"(xp[0]^zp[0]), "r"(yp[0]), "r"(zp[0]));                    \
478
                        asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3\n\t"                \
479
                        : "=r"(r[1])                                                    \
480
                        : "r"(xp[1]^zp[1]), "r"(yp[1]), "r"(zp[1])); ret; })
481
#elif (defined(__riscv_zbt) || defined(__riscv_zpn)) && __riscv_xlen == 64
482
#define Ch(x, y, z) ({  SHA_LONG64 ret;                           \
483
                        asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3"\
484
                        : "=r"(ret)                                 \
485
                        : "r"(x), "r"(y), "r"(z)); ret; })
486
#define Maj(x, y, z) ({ SHA_LONG64 ret;                           \
487
                        asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3"\
488
                        : "=r"(ret)                                 \
489
                        : "r"(x^z), "r"(y), "r"(x)); ret; })
490
#endif
491
#elif defined(_MSC_VER)
492
#if defined(_WIN64) /* applies to both IA-64 and AMD64 */
493
#pragma intrinsic(_rotr64)
494
#define ROTR(a, n) _rotr64((a), n)
495
#endif
496
#if defined(_M_IX86) && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
497
#if defined(I386_ONLY)
498
static SHA_LONG64 __fastcall __pull64be(const void *x)
499
{
500
    _asm mov edx, [ecx + 0] _asm mov eax, [ecx + 4] _asm xchg dh, dl _asm xchg ah, al _asm rol edx, 16 _asm rol eax, 16 _asm xchg dh, dl _asm xchg ah, al
501
}
502
#else
503
static SHA_LONG64 __fastcall __pull64be(const void *x) {
504
    _asm mov edx, [ecx + 0] _asm mov eax, [ecx + 4] _asm bswap edx _asm bswap eax
505
}
506
#endif
507
#define PULL64(x) __pull64be(&(x))
508
#endif
509
#endif
510
#endif
511
#ifndef PULL64
512
#define B(x, j) (((SHA_LONG64)(*(((const unsigned char *)(&x)) + j))) << ((7 - j) * 8))
513
#define PULL64(x) (B(x, 0) | B(x, 1) | B(x, 2) | B(x, 3) | B(x, 4) | B(x, 5) | B(x, 6) | B(x, 7))
514
#endif
515
#ifndef ROTR
516
#define ROTR(x, s) (((x) >> s) | (x) << (64 - s))
517
#endif
518
#ifndef Sigma0
519
#define Sigma0(x) (ROTR((x), 28) ^ ROTR((x), 34) ^ ROTR((x), 39))
520
#endif
521
#ifndef Sigma1
522
#define Sigma1(x) (ROTR((x), 14) ^ ROTR((x), 18) ^ ROTR((x), 41))
523
#endif
524
#ifndef sigma0
525
#define sigma0(x) (ROTR((x), 1) ^ ROTR((x), 8) ^ ((x) >> 7))
526
#endif
527
#ifndef sigma1
528
#define sigma1(x) (ROTR((x), 19) ^ ROTR((x), 61) ^ ((x) >> 6))
529
#endif
530
#ifndef Ch
531
#define Ch(x, y, z) (((x) & (y)) ^ ((~(x)) & (z)))
532
#endif
533
#ifndef Maj
534
#define Maj(x, y, z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
535
#endif
536
537
#if defined(__i386) || defined(__i386__) || defined(_M_IX86)
538
/*
539
 * This code should give better results on 32-bit CPU with less than
540
 * ~24 registers, both size and performance wise...
541
 */
542
543
static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
544
    size_t num)
545
{
546
    const SHA_LONG64 *W = in;
547
    SHA_LONG64 A, E, T;
548
    SHA_LONG64 X[9 + 80], *F;
549
    int i;
550
551
    while (num--) {
552
553
        F = X + 80;
554
        A = ctx->h[0];
555
        F[1] = ctx->h[1];
556
        F[2] = ctx->h[2];
557
        F[3] = ctx->h[3];
558
        E = ctx->h[4];
559
        F[5] = ctx->h[5];
560
        F[6] = ctx->h[6];
561
        F[7] = ctx->h[7];
562
563
        for (i = 0; i < 16; i++, F--) {
564
#ifdef B_ENDIAN
565
            T = W[i];
566
#else
567
            T = PULL64(W[i]);
568
#endif
569
            F[0] = A;
570
            F[4] = E;
571
            F[8] = T;
572
            T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
573
            E = F[3] + T;
574
            A = T + Sigma0(A) + Maj(A, F[1], F[2]);
575
        }
576
577
        for (; i < 80; i++, F--) {
578
            T = sigma0(F[8 + 16 - 1]);
579
            T += sigma1(F[8 + 16 - 14]);
580
            T += F[8 + 16] + F[8 + 16 - 9];
581
582
            F[0] = A;
583
            F[4] = E;
584
            F[8] = T;
585
            T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
586
            E = F[3] + T;
587
            A = T + Sigma0(A) + Maj(A, F[1], F[2]);
588
        }
589
590
        ctx->h[0] += A;
591
        ctx->h[1] += F[1];
592
        ctx->h[2] += F[2];
593
        ctx->h[3] += F[3];
594
        ctx->h[4] += E;
595
        ctx->h[5] += F[5];
596
        ctx->h[6] += F[6];
597
        ctx->h[7] += F[7];
598
599
        W += SHA_LBLOCK;
600
    }
601
}
602
603
#elif defined(OPENSSL_SMALL_FOOTPRINT)
604
605
static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
606
    size_t num)
607
{
608
    const SHA_LONG64 *W = in;
609
    SHA_LONG64 a, b, c, d, e, f, g, h, s0, s1, T1, T2;
610
    SHA_LONG64 X[16];
611
    int i;
612
613
    while (num--) {
614
615
        a = ctx->h[0];
616
        b = ctx->h[1];
617
        c = ctx->h[2];
618
        d = ctx->h[3];
619
        e = ctx->h[4];
620
        f = ctx->h[5];
621
        g = ctx->h[6];
622
        h = ctx->h[7];
623
624
        for (i = 0; i < 16; i++) {
625
#ifdef B_ENDIAN
626
            T1 = X[i] = W[i];
627
#else
628
            T1 = X[i] = PULL64(W[i]);
629
#endif
630
            T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i];
631
            T2 = Sigma0(a) + Maj(a, b, c);
632
            h = g;
633
            g = f;
634
            f = e;
635
            e = d + T1;
636
            d = c;
637
            c = b;
638
            b = a;
639
            a = T1 + T2;
640
        }
641
642
        for (; i < 80; i++) {
643
            s0 = X[(i + 1) & 0x0f];
644
            s0 = sigma0(s0);
645
            s1 = X[(i + 14) & 0x0f];
646
            s1 = sigma1(s1);
647
648
            T1 = X[i & 0xf] += s0 + s1 + X[(i + 9) & 0xf];
649
            T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i];
650
            T2 = Sigma0(a) + Maj(a, b, c);
651
            h = g;
652
            g = f;
653
            f = e;
654
            e = d + T1;
655
            d = c;
656
            c = b;
657
            b = a;
658
            a = T1 + T2;
659
        }
660
661
        ctx->h[0] += a;
662
        ctx->h[1] += b;
663
        ctx->h[2] += c;
664
        ctx->h[3] += d;
665
        ctx->h[4] += e;
666
        ctx->h[5] += f;
667
        ctx->h[6] += g;
668
        ctx->h[7] += h;
669
670
        W += SHA_LBLOCK;
671
    }
672
}
673
674
#else
675
#define ROUND_00_15(i, a, b, c, d, e, f, g, h)       \
676
    do {                                             \
677
        T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i]; \
678
        h = Sigma0(a) + Maj(a, b, c);                \
679
        d += T1;                                     \
680
        h += T1;                                     \
681
    } while (0)
682
683
#define ROUND_16_80(i, j, a, b, c, d, e, f, g, h, X)       \
684
    do {                                                   \
685
        s0 = X[(j + 1) & 0x0f];                            \
686
        s0 = sigma0(s0);                                   \
687
        s1 = X[(j + 14) & 0x0f];                           \
688
        s1 = sigma1(s1);                                   \
689
        T1 = X[(j) & 0x0f] += s0 + s1 + X[(j + 9) & 0x0f]; \
690
        ROUND_00_15(i + j, a, b, c, d, e, f, g, h);        \
691
    } while (0)
692
693
#ifdef INCLUDE_C_SHA512
694
void sha512_block_data_order_c(SHA512_CTX *ctx, const void *in, size_t num)
695
#else
696
static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
697
    size_t num)
698
#endif
699
{
700
    const SHA_LONG64 *W = in;
701
    SHA_LONG64 a, b, c, d, e, f, g, h, s0, s1, T1;
702
    SHA_LONG64 X[16];
703
    int i;
704
705
    while (num--) {
706
707
        a = ctx->h[0];
708
        b = ctx->h[1];
709
        c = ctx->h[2];
710
        d = ctx->h[3];
711
        e = ctx->h[4];
712
        f = ctx->h[5];
713
        g = ctx->h[6];
714
        h = ctx->h[7];
715
716
#ifdef B_ENDIAN
717
        T1 = X[0] = W[0];
718
        ROUND_00_15(0, a, b, c, d, e, f, g, h);
719
        T1 = X[1] = W[1];
720
        ROUND_00_15(1, h, a, b, c, d, e, f, g);
721
        T1 = X[2] = W[2];
722
        ROUND_00_15(2, g, h, a, b, c, d, e, f);
723
        T1 = X[3] = W[3];
724
        ROUND_00_15(3, f, g, h, a, b, c, d, e);
725
        T1 = X[4] = W[4];
726
        ROUND_00_15(4, e, f, g, h, a, b, c, d);
727
        T1 = X[5] = W[5];
728
        ROUND_00_15(5, d, e, f, g, h, a, b, c);
729
        T1 = X[6] = W[6];
730
        ROUND_00_15(6, c, d, e, f, g, h, a, b);
731
        T1 = X[7] = W[7];
732
        ROUND_00_15(7, b, c, d, e, f, g, h, a);
733
        T1 = X[8] = W[8];
734
        ROUND_00_15(8, a, b, c, d, e, f, g, h);
735
        T1 = X[9] = W[9];
736
        ROUND_00_15(9, h, a, b, c, d, e, f, g);
737
        T1 = X[10] = W[10];
738
        ROUND_00_15(10, g, h, a, b, c, d, e, f);
739
        T1 = X[11] = W[11];
740
        ROUND_00_15(11, f, g, h, a, b, c, d, e);
741
        T1 = X[12] = W[12];
742
        ROUND_00_15(12, e, f, g, h, a, b, c, d);
743
        T1 = X[13] = W[13];
744
        ROUND_00_15(13, d, e, f, g, h, a, b, c);
745
        T1 = X[14] = W[14];
746
        ROUND_00_15(14, c, d, e, f, g, h, a, b);
747
        T1 = X[15] = W[15];
748
        ROUND_00_15(15, b, c, d, e, f, g, h, a);
749
#else
750
        T1 = X[0] = PULL64(W[0]);
751
        ROUND_00_15(0, a, b, c, d, e, f, g, h);
752
        T1 = X[1] = PULL64(W[1]);
753
        ROUND_00_15(1, h, a, b, c, d, e, f, g);
754
        T1 = X[2] = PULL64(W[2]);
755
        ROUND_00_15(2, g, h, a, b, c, d, e, f);
756
        T1 = X[3] = PULL64(W[3]);
757
        ROUND_00_15(3, f, g, h, a, b, c, d, e);
758
        T1 = X[4] = PULL64(W[4]);
759
        ROUND_00_15(4, e, f, g, h, a, b, c, d);
760
        T1 = X[5] = PULL64(W[5]);
761
        ROUND_00_15(5, d, e, f, g, h, a, b, c);
762
        T1 = X[6] = PULL64(W[6]);
763
        ROUND_00_15(6, c, d, e, f, g, h, a, b);
764
        T1 = X[7] = PULL64(W[7]);
765
        ROUND_00_15(7, b, c, d, e, f, g, h, a);
766
        T1 = X[8] = PULL64(W[8]);
767
        ROUND_00_15(8, a, b, c, d, e, f, g, h);
768
        T1 = X[9] = PULL64(W[9]);
769
        ROUND_00_15(9, h, a, b, c, d, e, f, g);
770
        T1 = X[10] = PULL64(W[10]);
771
        ROUND_00_15(10, g, h, a, b, c, d, e, f);
772
        T1 = X[11] = PULL64(W[11]);
773
        ROUND_00_15(11, f, g, h, a, b, c, d, e);
774
        T1 = X[12] = PULL64(W[12]);
775
        ROUND_00_15(12, e, f, g, h, a, b, c, d);
776
        T1 = X[13] = PULL64(W[13]);
777
        ROUND_00_15(13, d, e, f, g, h, a, b, c);
778
        T1 = X[14] = PULL64(W[14]);
779
        ROUND_00_15(14, c, d, e, f, g, h, a, b);
780
        T1 = X[15] = PULL64(W[15]);
781
        ROUND_00_15(15, b, c, d, e, f, g, h, a);
782
#endif
783
784
        for (i = 16; i < 80; i += 16) {
785
            ROUND_16_80(i, 0, a, b, c, d, e, f, g, h, X);
786
            ROUND_16_80(i, 1, h, a, b, c, d, e, f, g, X);
787
            ROUND_16_80(i, 2, g, h, a, b, c, d, e, f, X);
788
            ROUND_16_80(i, 3, f, g, h, a, b, c, d, e, X);
789
            ROUND_16_80(i, 4, e, f, g, h, a, b, c, d, X);
790
            ROUND_16_80(i, 5, d, e, f, g, h, a, b, c, X);
791
            ROUND_16_80(i, 6, c, d, e, f, g, h, a, b, X);
792
            ROUND_16_80(i, 7, b, c, d, e, f, g, h, a, X);
793
            ROUND_16_80(i, 8, a, b, c, d, e, f, g, h, X);
794
            ROUND_16_80(i, 9, h, a, b, c, d, e, f, g, X);
795
            ROUND_16_80(i, 10, g, h, a, b, c, d, e, f, X);
796
            ROUND_16_80(i, 11, f, g, h, a, b, c, d, e, X);
797
            ROUND_16_80(i, 12, e, f, g, h, a, b, c, d, X);
798
            ROUND_16_80(i, 13, d, e, f, g, h, a, b, c, X);
799
            ROUND_16_80(i, 14, c, d, e, f, g, h, a, b, X);
800
            ROUND_16_80(i, 15, b, c, d, e, f, g, h, a, X);
801
        }
802
803
        ctx->h[0] += a;
804
        ctx->h[1] += b;
805
        ctx->h[2] += c;
806
        ctx->h[3] += d;
807
        ctx->h[4] += e;
808
        ctx->h[5] += f;
809
        ctx->h[6] += g;
810
        ctx->h[7] += h;
811
812
        W += SHA_LBLOCK;
813
    }
814
}
815
816
#endif
817
818
#endif /* SHA512_ASM */