Coverage Report

Created: 2026-02-11 06:19

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/openssl/crypto/sha/sha512.c
Line
Count
Source
1
/*
2
 * Copyright 2004-2024 The OpenSSL Project Authors. All Rights Reserved.
3
 *
4
 * Licensed under the Apache License 2.0 (the "License").  You may not use
5
 * this file except in compliance with the License.  You can obtain a copy
6
 * in the file LICENSE in the source distribution or at
7
 * https://www.openssl.org/source/license.html
8
 */
9
10
/*
11
 * SHA512 low level APIs are deprecated for public use, but still ok for
12
 * internal use.
13
 */
14
#include "internal/deprecated.h"
15
16
#include <stdio.h>
17
#include <openssl/opensslconf.h>
18
/*-
19
 * IMPLEMENTATION NOTES.
20
 *
21
 * As you might have noticed, 32-bit hash algorithms:
22
 *
23
 * - permit SHA_LONG to be wider than 32-bit
24
 * - optimized versions implement two transform functions: one operating
25
 *   on [aligned] data in host byte order, and one operating on data in input
26
 *   stream byte order;
27
 * - share common byte-order neutral collector and padding function
28
 *   implementations, crypto/md32_common.h;
29
 *
30
 * Neither of the above applies to this SHA-512 implementation. Reasons
31
 * [in reverse order] are:
32
 *
33
 * - it's the only 64-bit hash algorithm for the moment of this writing,
34
 *   there is no need for common collector/padding implementation [yet];
35
 * - by supporting only one transform function [which operates on
36
 *   *aligned* data in input stream byte order, big-endian in this case]
37
 *   we minimize burden of maintenance in two ways: a) collector/padding
38
 *   function is simpler; b) only one transform function to stare at;
39
 * - SHA_LONG64 is required to be exactly 64-bit in order to be able to
40
 *   apply a number of optimizations to mitigate potential performance
41
 *   penalties caused by previous design decision;
42
 *
43
 * Caveat lector.
44
 *
45
 * Implementation relies on the fact that "long long" is 64-bit on
46
 * both 32- and 64-bit platforms. If some compiler vendor comes up
47
 * with 128-bit long long, adjustment to sha.h would be required.
48
 * As this implementation relies on 64-bit integer type, it's totally
49
 * inappropriate for platforms which don't support it, most notably
50
 * 16-bit platforms.
51
 */
52
#include <stdlib.h>
53
#include <string.h>
54
55
#include <openssl/crypto.h>
56
#include <openssl/sha.h>
57
#include <openssl/opensslv.h>
58
59
#include "internal/cryptlib.h"
60
#include "crypto/sha.h"
61
62
#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || defined(__x86_64) || defined(_M_AMD64) || defined(_M_X64) || defined(__s390__) || defined(__s390x__) || defined(__aarch64__) || defined(SHA512_ASM)
63
#define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
64
#endif
65
66
#if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32__)
67
#define U64(C) C##UI64
68
#elif defined(__arch64__)
69
#define U64(C) C##UL
70
#else
71
63.6k
#define U64(C) C##ULL
72
#endif
73
74
int SHA512_Update_thunk(void *cp, const unsigned char *data, size_t len);
75
76
int sha512_224_init(SHA512_CTX *c)
77
4
{
78
4
    c->h[0] = U64(0x8c3d37c819544da2);
79
4
    c->h[1] = U64(0x73e1996689dcd4d6);
80
4
    c->h[2] = U64(0x1dfab7ae32ff9c82);
81
4
    c->h[3] = U64(0x679dd514582f9fcf);
82
4
    c->h[4] = U64(0x0f6d2b697bd44da8);
83
4
    c->h[5] = U64(0x77e36f7304c48942);
84
4
    c->h[6] = U64(0x3f9d85a86a1d36c8);
85
4
    c->h[7] = U64(0x1112e6ad91d692a1);
86
87
4
    c->Nl = 0;
88
4
    c->Nh = 0;
89
4
    c->num = 0;
90
4
    c->md_len = SHA224_DIGEST_LENGTH;
91
4
    return 1;
92
4
}
93
94
int sha512_256_init(SHA512_CTX *c)
95
2
{
96
2
    c->h[0] = U64(0x22312194fc2bf72c);
97
2
    c->h[1] = U64(0x9f555fa3c84c64c2);
98
2
    c->h[2] = U64(0x2393b86b6f53b151);
99
2
    c->h[3] = U64(0x963877195940eabd);
100
2
    c->h[4] = U64(0x96283ee2a88effe3);
101
2
    c->h[5] = U64(0xbe5e1e2553863992);
102
2
    c->h[6] = U64(0x2b0199fc2c85b8aa);
103
2
    c->h[7] = U64(0x0eb72ddc81c52ca2);
104
105
2
    c->Nl = 0;
106
2
    c->Nh = 0;
107
2
    c->num = 0;
108
2
    c->md_len = SHA256_DIGEST_LENGTH;
109
2
    return 1;
110
2
}
111
112
int SHA384_Init(SHA512_CTX *c)
113
42
{
114
42
    c->h[0] = U64(0xcbbb9d5dc1059ed8);
115
42
    c->h[1] = U64(0x629a292a367cd507);
116
42
    c->h[2] = U64(0x9159015a3070dd17);
117
42
    c->h[3] = U64(0x152fecd8f70e5939);
118
42
    c->h[4] = U64(0x67332667ffc00b31);
119
42
    c->h[5] = U64(0x8eb44a8768581511);
120
42
    c->h[6] = U64(0xdb0c2e0d64f98fa7);
121
42
    c->h[7] = U64(0x47b5481dbefa4fa4);
122
123
42
    c->Nl = 0;
124
42
    c->Nh = 0;
125
42
    c->num = 0;
126
42
    c->md_len = SHA384_DIGEST_LENGTH;
127
42
    return 1;
128
42
}
129
130
int SHA512_Init(SHA512_CTX *c)
131
7.02k
{
132
7.02k
    c->h[0] = U64(0x6a09e667f3bcc908);
133
7.02k
    c->h[1] = U64(0xbb67ae8584caa73b);
134
7.02k
    c->h[2] = U64(0x3c6ef372fe94f82b);
135
7.02k
    c->h[3] = U64(0xa54ff53a5f1d36f1);
136
7.02k
    c->h[4] = U64(0x510e527fade682d1);
137
7.02k
    c->h[5] = U64(0x9b05688c2b3e6c1f);
138
7.02k
    c->h[6] = U64(0x1f83d9abfb41bd6b);
139
7.02k
    c->h[7] = U64(0x5be0cd19137e2179);
140
141
7.02k
    c->Nl = 0;
142
7.02k
    c->Nh = 0;
143
7.02k
    c->num = 0;
144
7.02k
    c->md_len = SHA512_DIGEST_LENGTH;
145
7.02k
    return 1;
146
7.02k
}
147
148
#ifndef SHA512_ASM
149
static
150
#else
151
#ifdef INCLUDE_C_SHA512
152
void sha512_block_data_order_c(SHA512_CTX *ctx, const void *in, size_t num);
153
#endif
154
#endif
155
    void sha512_block_data_order(SHA512_CTX *ctx, const void *in, size_t num);
156
157
int SHA512_Final(unsigned char *md, SHA512_CTX *c)
158
3.53k
{
159
3.53k
    unsigned char *p = (unsigned char *)c->u.p;
160
3.53k
    size_t n = c->num;
161
162
3.53k
    p[n] = 0x80; /* There always is a room for one */
163
3.53k
    n++;
164
3.53k
    if (n > (sizeof(c->u) - 16)) {
165
31
        memset(p + n, 0, sizeof(c->u) - n);
166
31
        n = 0;
167
31
        sha512_block_data_order(c, p, 1);
168
31
    }
169
170
3.53k
    memset(p + n, 0, sizeof(c->u) - 16 - n);
171
#ifdef B_ENDIAN
172
    c->u.d[SHA_LBLOCK - 2] = c->Nh;
173
    c->u.d[SHA_LBLOCK - 1] = c->Nl;
174
#else
175
3.53k
    p[sizeof(c->u) - 1] = (unsigned char)(c->Nl);
176
3.53k
    p[sizeof(c->u) - 2] = (unsigned char)(c->Nl >> 8);
177
3.53k
    p[sizeof(c->u) - 3] = (unsigned char)(c->Nl >> 16);
178
3.53k
    p[sizeof(c->u) - 4] = (unsigned char)(c->Nl >> 24);
179
3.53k
    p[sizeof(c->u) - 5] = (unsigned char)(c->Nl >> 32);
180
3.53k
    p[sizeof(c->u) - 6] = (unsigned char)(c->Nl >> 40);
181
3.53k
    p[sizeof(c->u) - 7] = (unsigned char)(c->Nl >> 48);
182
3.53k
    p[sizeof(c->u) - 8] = (unsigned char)(c->Nl >> 56);
183
3.53k
    p[sizeof(c->u) - 9] = (unsigned char)(c->Nh);
184
3.53k
    p[sizeof(c->u) - 10] = (unsigned char)(c->Nh >> 8);
185
3.53k
    p[sizeof(c->u) - 11] = (unsigned char)(c->Nh >> 16);
186
3.53k
    p[sizeof(c->u) - 12] = (unsigned char)(c->Nh >> 24);
187
3.53k
    p[sizeof(c->u) - 13] = (unsigned char)(c->Nh >> 32);
188
3.53k
    p[sizeof(c->u) - 14] = (unsigned char)(c->Nh >> 40);
189
3.53k
    p[sizeof(c->u) - 15] = (unsigned char)(c->Nh >> 48);
190
3.53k
    p[sizeof(c->u) - 16] = (unsigned char)(c->Nh >> 56);
191
3.53k
#endif
192
193
3.53k
    sha512_block_data_order(c, p, 1);
194
195
3.53k
    if (md == 0)
196
0
        return 0;
197
198
3.53k
    switch (c->md_len) {
199
    /* Let compiler decide if it's appropriate to unroll... */
200
2
    case SHA224_DIGEST_LENGTH:
201
8
        for (n = 0; n < SHA224_DIGEST_LENGTH / 8; n++) {
202
6
            SHA_LONG64 t = c->h[n];
203
204
6
            *(md++) = (unsigned char)(t >> 56);
205
6
            *(md++) = (unsigned char)(t >> 48);
206
6
            *(md++) = (unsigned char)(t >> 40);
207
6
            *(md++) = (unsigned char)(t >> 32);
208
6
            *(md++) = (unsigned char)(t >> 24);
209
6
            *(md++) = (unsigned char)(t >> 16);
210
6
            *(md++) = (unsigned char)(t >> 8);
211
6
            *(md++) = (unsigned char)(t);
212
6
        }
213
        /*
214
         * For 224 bits, there are four bytes left over that have to be
215
         * processed separately.
216
         */
217
2
        {
218
2
            SHA_LONG64 t = c->h[SHA224_DIGEST_LENGTH / 8];
219
220
2
            *(md++) = (unsigned char)(t >> 56);
221
2
            *(md++) = (unsigned char)(t >> 48);
222
2
            *(md++) = (unsigned char)(t >> 40);
223
2
            *(md++) = (unsigned char)(t >> 32);
224
2
        }
225
2
        break;
226
1
    case SHA256_DIGEST_LENGTH:
227
5
        for (n = 0; n < SHA256_DIGEST_LENGTH / 8; n++) {
228
4
            SHA_LONG64 t = c->h[n];
229
230
4
            *(md++) = (unsigned char)(t >> 56);
231
4
            *(md++) = (unsigned char)(t >> 48);
232
4
            *(md++) = (unsigned char)(t >> 40);
233
4
            *(md++) = (unsigned char)(t >> 32);
234
4
            *(md++) = (unsigned char)(t >> 24);
235
4
            *(md++) = (unsigned char)(t >> 16);
236
4
            *(md++) = (unsigned char)(t >> 8);
237
4
            *(md++) = (unsigned char)(t);
238
4
        }
239
1
        break;
240
21
    case SHA384_DIGEST_LENGTH:
241
147
        for (n = 0; n < SHA384_DIGEST_LENGTH / 8; n++) {
242
126
            SHA_LONG64 t = c->h[n];
243
244
126
            *(md++) = (unsigned char)(t >> 56);
245
126
            *(md++) = (unsigned char)(t >> 48);
246
126
            *(md++) = (unsigned char)(t >> 40);
247
126
            *(md++) = (unsigned char)(t >> 32);
248
126
            *(md++) = (unsigned char)(t >> 24);
249
126
            *(md++) = (unsigned char)(t >> 16);
250
126
            *(md++) = (unsigned char)(t >> 8);
251
126
            *(md++) = (unsigned char)(t);
252
126
        }
253
21
        break;
254
3.51k
    case SHA512_DIGEST_LENGTH:
255
31.5k
        for (n = 0; n < SHA512_DIGEST_LENGTH / 8; n++) {
256
28.0k
            SHA_LONG64 t = c->h[n];
257
258
28.0k
            *(md++) = (unsigned char)(t >> 56);
259
28.0k
            *(md++) = (unsigned char)(t >> 48);
260
28.0k
            *(md++) = (unsigned char)(t >> 40);
261
28.0k
            *(md++) = (unsigned char)(t >> 32);
262
28.0k
            *(md++) = (unsigned char)(t >> 24);
263
28.0k
            *(md++) = (unsigned char)(t >> 16);
264
28.0k
            *(md++) = (unsigned char)(t >> 8);
265
28.0k
            *(md++) = (unsigned char)(t);
266
28.0k
        }
267
3.51k
        break;
268
    /* ... as well as make sure md_len is not abused. */
269
0
    default:
270
0
        return 0;
271
3.53k
    }
272
273
3.53k
    return 1;
274
3.53k
}
275
276
int SHA384_Final(unsigned char *md, SHA512_CTX *c)
277
21
{
278
21
    return SHA512_Final(md, c);
279
21
}
280
281
int SHA512_Update_thunk(void *cp, const unsigned char *data, size_t len)
282
7.07k
{
283
7.07k
    SHA512_CTX *c = (SHA512_CTX *)cp;
284
7.07k
    SHA_LONG64 l;
285
7.07k
    unsigned char *p = c->u.p;
286
287
7.07k
    if (len == 0)
288
0
        return 1;
289
290
7.07k
    l = (c->Nl + (((SHA_LONG64)len) << 3)) & U64(0xffffffffffffffff);
291
7.07k
    if (l < c->Nl)
292
0
        c->Nh++;
293
7.07k
    if (sizeof(len) >= 8)
294
7.07k
        c->Nh += (((SHA_LONG64)len) >> 61);
295
7.07k
    c->Nl = l;
296
297
7.07k
    if (c->num != 0) {
298
0
        size_t n = sizeof(c->u) - c->num;
299
300
0
        if (len < n) {
301
0
            memcpy(p + c->num, data, len), c->num += (unsigned int)len;
302
0
            return 1;
303
0
        } else {
304
0
            memcpy(p + c->num, data, n), c->num = 0;
305
0
            len -= n, data += n;
306
0
            sha512_block_data_order(c, p, 1);
307
0
        }
308
0
    }
309
310
7.07k
    if (len >= sizeof(c->u)) {
311
#ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
312
        if ((size_t)data % sizeof(c->u.d[0]) != 0)
313
            while (len >= sizeof(c->u))
314
                memcpy(p, data, sizeof(c->u)),
315
                    sha512_block_data_order(c, p, 1),
316
                    len -= sizeof(c->u), data += sizeof(c->u);
317
        else
318
#endif
319
7.07k
            sha512_block_data_order(c, data, len / sizeof(c->u)),
320
7.07k
                data += len, len %= sizeof(c->u), data -= len;
321
7.07k
    }
322
323
7.07k
    if (len != 0)
324
77
        memcpy(p, data, len), c->num = (int)len;
325
326
7.07k
    return 1;
327
7.07k
}
328
329
int SHA512_Update(SHA512_CTX *c, const void *_data, size_t len)
330
0
{
331
0
    return SHA512_Update_thunk((void *)c, (const unsigned char *)_data, len);
332
0
}
333
334
int SHA384_Update(SHA512_CTX *c, const void *data, size_t len)
335
0
{
336
0
    return SHA512_Update_thunk((void *)c, (const unsigned char *)data, len);
337
0
}
338
339
void SHA512_Transform(SHA512_CTX *c, const unsigned char *data)
340
0
{
341
#ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
342
    if ((size_t)data % sizeof(c->u.d[0]) != 0)
343
        memcpy(c->u.p, data, sizeof(c->u.p)), data = c->u.p;
344
#endif
345
0
    sha512_block_data_order(c, data, 1);
346
0
}
347
348
#if !defined(SHA512_ASM) || defined(INCLUDE_C_SHA512)
349
static const SHA_LONG64 K512[80] = {
350
    U64(0x428a2f98d728ae22), U64(0x7137449123ef65cd),
351
    U64(0xb5c0fbcfec4d3b2f), U64(0xe9b5dba58189dbbc),
352
    U64(0x3956c25bf348b538), U64(0x59f111f1b605d019),
353
    U64(0x923f82a4af194f9b), U64(0xab1c5ed5da6d8118),
354
    U64(0xd807aa98a3030242), U64(0x12835b0145706fbe),
355
    U64(0x243185be4ee4b28c), U64(0x550c7dc3d5ffb4e2),
356
    U64(0x72be5d74f27b896f), U64(0x80deb1fe3b1696b1),
357
    U64(0x9bdc06a725c71235), U64(0xc19bf174cf692694),
358
    U64(0xe49b69c19ef14ad2), U64(0xefbe4786384f25e3),
359
    U64(0x0fc19dc68b8cd5b5), U64(0x240ca1cc77ac9c65),
360
    U64(0x2de92c6f592b0275), U64(0x4a7484aa6ea6e483),
361
    U64(0x5cb0a9dcbd41fbd4), U64(0x76f988da831153b5),
362
    U64(0x983e5152ee66dfab), U64(0xa831c66d2db43210),
363
    U64(0xb00327c898fb213f), U64(0xbf597fc7beef0ee4),
364
    U64(0xc6e00bf33da88fc2), U64(0xd5a79147930aa725),
365
    U64(0x06ca6351e003826f), U64(0x142929670a0e6e70),
366
    U64(0x27b70a8546d22ffc), U64(0x2e1b21385c26c926),
367
    U64(0x4d2c6dfc5ac42aed), U64(0x53380d139d95b3df),
368
    U64(0x650a73548baf63de), U64(0x766a0abb3c77b2a8),
369
    U64(0x81c2c92e47edaee6), U64(0x92722c851482353b),
370
    U64(0xa2bfe8a14cf10364), U64(0xa81a664bbc423001),
371
    U64(0xc24b8b70d0f89791), U64(0xc76c51a30654be30),
372
    U64(0xd192e819d6ef5218), U64(0xd69906245565a910),
373
    U64(0xf40e35855771202a), U64(0x106aa07032bbd1b8),
374
    U64(0x19a4c116b8d2d0c8), U64(0x1e376c085141ab53),
375
    U64(0x2748774cdf8eeb99), U64(0x34b0bcb5e19b48a8),
376
    U64(0x391c0cb3c5c95a63), U64(0x4ed8aa4ae3418acb),
377
    U64(0x5b9cca4f7763e373), U64(0x682e6ff3d6b2b8a3),
378
    U64(0x748f82ee5defb2fc), U64(0x78a5636f43172f60),
379
    U64(0x84c87814a1f0ab72), U64(0x8cc702081a6439ec),
380
    U64(0x90befffa23631e28), U64(0xa4506cebde82bde9),
381
    U64(0xbef9a3f7b2c67915), U64(0xc67178f2e372532b),
382
    U64(0xca273eceea26619c), U64(0xd186b8c721c0c207),
383
    U64(0xeada7dd6cde0eb1e), U64(0xf57d4f7fee6ed178),
384
    U64(0x06f067aa72176fba), U64(0x0a637dc5a2c898a6),
385
    U64(0x113f9804bef90dae), U64(0x1b710b35131c471b),
386
    U64(0x28db77f523047d84), U64(0x32caab7b40c72493),
387
    U64(0x3c9ebe0a15c9bebc), U64(0x431d67c49c100d4c),
388
    U64(0x4cc5d4becb3e42b6), U64(0x597f299cfc657e2a),
389
    U64(0x5fcb6fab3ad6faec), U64(0x6c44198c4a475817)
390
};
391
392
#ifndef PEDANTIC
393
#if defined(__GNUC__) && __GNUC__ >= 2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
394
#if defined(__x86_64) || defined(__x86_64__)
395
#define ROTR(a, n) ({ SHA_LONG64 ret;             \
396
                                asm ("rorq %1,%0"       \
397
                                : "=r"(ret)             \
398
                                : "J"(n),"0"(a)         \
399
                                : "cc"); ret; })
400
#if !defined(B_ENDIAN)
401
#define PULL64(x) ({ SHA_LONG64 ret=*((const SHA_LONG64 *)(&(x)));  \
402
                                asm ("bswapq    %0"             \
403
                                : "=r"(ret)                     \
404
                                : "0"(ret)); ret; })
405
#endif
406
#elif (defined(__i386) || defined(__i386__)) && !defined(B_ENDIAN)
407
#if defined(I386_ONLY)
408
#define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
409
                          unsigned int hi=p[0],lo=p[1];          \
410
                                asm("xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
411
                                    "roll $16,%%eax; roll $16,%%edx; "\
412
                                    "xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
413
                                : "=a"(lo),"=d"(hi)             \
414
                                : "0"(lo),"1"(hi) : "cc");      \
415
                                ((SHA_LONG64)hi)<<32|lo; })
416
#else
417
#define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
418
                          unsigned int hi=p[0],lo=p[1];         \
419
                                asm ("bswapl %0; bswapl %1;"    \
420
                                : "=r"(lo),"=r"(hi)             \
421
                                : "0"(lo),"1"(hi));             \
422
                                ((SHA_LONG64)hi)<<32|lo; })
423
#endif
424
#elif (defined(_ARCH_PPC) && defined(__64BIT__)) || defined(_ARCH_PPC64)
425
#define ROTR(a, n) ({ SHA_LONG64 ret;             \
426
                                asm ("rotrdi %0,%1,%2"  \
427
                                : "=r"(ret)             \
428
                                : "r"(a),"K"(n)); ret; })
429
#elif defined(__aarch64__)
430
#define ROTR(a, n) ({ SHA_LONG64 ret;             \
431
                                asm ("ror %0,%1,%2"     \
432
                                : "=r"(ret)             \
433
                                : "r"(a),"I"(n)); ret; })
434
#if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
435
#define PULL64(x) ({ SHA_LONG64 ret;                     \
436
                                asm ("rev       %0,%1"          \
437
                                : "=r"(ret)                     \
438
                                : "r"(*((const SHA_LONG64 *)(&(x))))); ret; })
439
#endif
440
#elif (defined(__riscv_zbkb) || defined(__riscv_zbb)) && __riscv_xlen == 32
441
#define PULL64(x) ({ SHA_LONG64 ret;                                        \
442
                        unsigned int *r = (unsigned int *)(&(ret));             \
443
                        const unsigned int *p = (const unsigned int *)(&(x));   \
444
                        asm ("rev8 %0, %1"                                      \
445
                        : "=r"(r[0])                                            \
446
                        : "r" (p[1]));                                          \
447
                        asm ("rev8 %0, %1"                                      \
448
                        : "=r"(r[1])                                            \
449
                        : "r" (p[0])); ret; })
450
#elif (defined(__riscv_zbkb) || defined(__riscv_zbb)) && __riscv_xlen == 64
451
#define PULL64(x) ({ SHA_LONG64 ret;    \
452
                        asm ("rev8 %0, %1"  \
453
                        : "=r"(ret)         \
454
                        : "r"(x)); ret; })
455
#endif
456
#if defined(__riscv_zknh) && __riscv_xlen == 32
457
#define Sigma0(x) ({ SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret));    \
458
                        const unsigned int *p = (const unsigned int *)(&(x));           \
459
                        asm ("sha512sum0r %0, %1, %2"                                   \
460
                        : "=r"(r[0])                                                    \
461
                        : "r" (p[0]), "r" (p[1]));                                      \
462
                        asm ("sha512sum0r %0, %2, %1"                                   \
463
                        : "=r"(r[1])                                                    \
464
                        : "r" (p[0]), "r" (p[1])); ret; })
465
#define Sigma1(x) ({ SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret));    \
466
                        const unsigned int *p = (const unsigned int *)(&(x));           \
467
                        asm ("sha512sum1r %0, %1, %2"                                   \
468
                        : "=r"(r[0])                                                    \
469
                        : "r" (p[0]), "r" (p[1]));                                      \
470
                        asm ("sha512sum1r %0, %2, %1"                                   \
471
                        : "=r"(r[1])                                                    \
472
                        : "r" (p[0]), "r" (p[1])); ret; })
473
#define sigma0(x) ({ SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret));    \
474
                        const unsigned int *p = (const unsigned int *)(&(x));           \
475
                        asm ("sha512sig0l %0, %1, %2"                                   \
476
                        : "=r"(r[0])                                                    \
477
                        : "r" (p[0]), "r" (p[1]));                                      \
478
                        asm ("sha512sig0h %0, %2, %1"                                   \
479
                        : "=r"(r[1])                                                    \
480
                        : "r" (p[0]), "r" (p[1])); ret; })
481
#define sigma1(x) ({ SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret));    \
482
                        const unsigned int *p = (const unsigned int *)(&(x));           \
483
                        asm ("sha512sig1l %0, %1, %2"                                   \
484
                        : "=r"(r[0])                                                    \
485
                        : "r" (p[0]), "r" (p[1]));                                      \
486
                        asm ("sha512sig1h %0, %2, %1"                                   \
487
                        : "=r"(r[1])                                                    \
488
                        : "r" (p[0]), "r" (p[1])); ret; })
489
#elif defined(__riscv_zknh) && __riscv_xlen == 64
490
#define Sigma0(x) ({ SHA_LONG64 ret;            \
491
                        asm ("sha512sum0 %0, %1"    \
492
                        : "=r"(ret)                 \
493
                        : "r"(x)); ret; })
494
#define Sigma1(x) ({ SHA_LONG64 ret;            \
495
                        asm ("sha512sum1 %0, %1"    \
496
                        : "=r"(ret)                 \
497
                        : "r"(x)); ret; })
498
#define sigma0(x) ({ SHA_LONG64 ret;            \
499
                        asm ("sha512sig0 %0, %1"    \
500
                        : "=r"(ret)                 \
501
                        : "r"(x)); ret; })
502
#define sigma1(x) ({ SHA_LONG64 ret;            \
503
                        asm ("sha512sig1 %0, %1"    \
504
                        : "=r"(ret)                 \
505
                        : "r"(x)); ret; })
506
#endif
507
#if (defined(__riscv_zbt) || defined(__riscv_zpn)) && __riscv_xlen == 32
508
#define Ch(x, y, z) ({  SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret));   \
509
                        const unsigned int *xp = (const unsigned int *)(&(x));          \
510
                        const unsigned int *yp = (const unsigned int *)(&(y));          \
511
                        const unsigned int *zp = (const unsigned int *)(&(z));          \
512
                        asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3\n\t"                \
513
                        : "=r"(r[0])                                                    \
514
                        : "r"(xp[0]), "r"(yp[0]), "r"(zp[0]));                          \
515
                        asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3\n\t"                \
516
                        : "=r"(r[1])                                                    \
517
                        : "r"(xp[1]), "r"(yp[1]), "r"(zp[1])); ret; })
518
#define Maj(x, y, z) ({ SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret));   \
519
                        const unsigned int *xp = (const unsigned int *)(&(x));          \
520
                        const unsigned int *yp = (const unsigned int *)(&(y));          \
521
                        const unsigned int *zp = (const unsigned int *)(&(z));          \
522
                        asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3\n\t"                \
523
                        : "=r"(r[0])                                                    \
524
                        : "r"(xp[0]^zp[0]), "r"(yp[0]), "r"(zp[0]));                    \
525
                        asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3\n\t"                \
526
                        : "=r"(r[1])                                                    \
527
                        : "r"(xp[1]^zp[1]), "r"(yp[1]), "r"(zp[1])); ret; })
528
#elif (defined(__riscv_zbt) || defined(__riscv_zpn)) && __riscv_xlen == 64
529
#define Ch(x, y, z) ({  SHA_LONG64 ret;                           \
530
                        asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3"\
531
                        : "=r"(ret)                                 \
532
                        : "r"(x), "r"(y), "r"(z)); ret; })
533
#define Maj(x, y, z) ({ SHA_LONG64 ret;                           \
534
                        asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3"\
535
                        : "=r"(ret)                                 \
536
                        : "r"(x^z), "r"(y), "r"(x)); ret; })
537
#endif
538
#elif defined(_MSC_VER)
539
#if defined(_WIN64) /* applies to both IA-64 and AMD64 */
540
#pragma intrinsic(_rotr64)
541
#define ROTR(a, n) _rotr64((a), n)
542
#endif
543
#if defined(_M_IX86) && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
544
#if defined(I386_ONLY)
545
static SHA_LONG64 __fastcall __pull64be(const void *x)
546
{
547
    _asm mov edx, [ecx + 0] _asm mov eax, [ecx + 4] _asm xchg dh, dl _asm xchg ah, al _asm rol edx, 16 _asm rol eax, 16 _asm xchg dh, dl _asm xchg ah, al
548
}
549
#else
550
static SHA_LONG64 __fastcall __pull64be(const void *x) {
551
    _asm mov edx, [ecx + 0] _asm mov eax, [ecx + 4] _asm bswap edx _asm bswap eax
552
}
553
#endif
554
#define PULL64(x) __pull64be(&(x))
555
#endif
556
#endif
557
#endif
558
#ifndef PULL64
559
429M
#define B(x, j) (((SHA_LONG64)(*(((const unsigned char *)(&x)) + j))) << ((7 - j) * 8))
560
53.6M
#define PULL64(x) (B(x, 0) | B(x, 1) | B(x, 2) | B(x, 3) | B(x, 4) | B(x, 5) | B(x, 6) | B(x, 7))
561
#endif
562
#ifndef ROTR
563
2.46G
#define ROTR(x, s) (((x) >> s) | (x) << (64 - s))
564
#endif
565
#ifndef Sigma0
566
268M
#define Sigma0(x) (ROTR((x), 28) ^ ROTR((x), 34) ^ ROTR((x), 39))
567
#endif
568
#ifndef Sigma1
569
268M
#define Sigma1(x) (ROTR((x), 14) ^ ROTR((x), 18) ^ ROTR((x), 41))
570
#endif
571
#ifndef sigma0
572
214M
#define sigma0(x) (ROTR((x), 1) ^ ROTR((x), 8) ^ ((x) >> 7))
573
#endif
574
#ifndef sigma1
575
214M
#define sigma1(x) (ROTR((x), 19) ^ ROTR((x), 61) ^ ((x) >> 6))
576
#endif
577
#ifndef Ch
578
268M
#define Ch(x, y, z) (((x) & (y)) ^ ((~(x)) & (z)))
579
#endif
580
#ifndef Maj
581
268M
#define Maj(x, y, z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
582
#endif
583
584
#if defined(__i386) || defined(__i386__) || defined(_M_IX86)
585
/*
586
 * This code should give better results on 32-bit CPU with less than
587
 * ~24 registers, both size and performance wise...
588
 */
589
590
static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
591
    size_t num)
592
{
593
    const SHA_LONG64 *W = in;
594
    SHA_LONG64 A, E, T;
595
    SHA_LONG64 X[9 + 80], *F;
596
    int i;
597
598
    while (num--) {
599
600
        F = X + 80;
601
        A = ctx->h[0];
602
        F[1] = ctx->h[1];
603
        F[2] = ctx->h[2];
604
        F[3] = ctx->h[3];
605
        E = ctx->h[4];
606
        F[5] = ctx->h[5];
607
        F[6] = ctx->h[6];
608
        F[7] = ctx->h[7];
609
610
        for (i = 0; i < 16; i++, F--) {
611
#ifdef B_ENDIAN
612
            T = W[i];
613
#else
614
            T = PULL64(W[i]);
615
#endif
616
            F[0] = A;
617
            F[4] = E;
618
            F[8] = T;
619
            T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
620
            E = F[3] + T;
621
            A = T + Sigma0(A) + Maj(A, F[1], F[2]);
622
        }
623
624
        for (; i < 80; i++, F--) {
625
            T = sigma0(F[8 + 16 - 1]);
626
            T += sigma1(F[8 + 16 - 14]);
627
            T += F[8 + 16] + F[8 + 16 - 9];
628
629
            F[0] = A;
630
            F[4] = E;
631
            F[8] = T;
632
            T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
633
            E = F[3] + T;
634
            A = T + Sigma0(A) + Maj(A, F[1], F[2]);
635
        }
636
637
        ctx->h[0] += A;
638
        ctx->h[1] += F[1];
639
        ctx->h[2] += F[2];
640
        ctx->h[3] += F[3];
641
        ctx->h[4] += E;
642
        ctx->h[5] += F[5];
643
        ctx->h[6] += F[6];
644
        ctx->h[7] += F[7];
645
646
        W += SHA_LBLOCK;
647
    }
648
}
649
650
#elif defined(OPENSSL_SMALL_FOOTPRINT)
651
652
static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
653
    size_t num)
654
{
655
    const SHA_LONG64 *W = in;
656
    SHA_LONG64 a, b, c, d, e, f, g, h, s0, s1, T1, T2;
657
    SHA_LONG64 X[16];
658
    int i;
659
660
    while (num--) {
661
662
        a = ctx->h[0];
663
        b = ctx->h[1];
664
        c = ctx->h[2];
665
        d = ctx->h[3];
666
        e = ctx->h[4];
667
        f = ctx->h[5];
668
        g = ctx->h[6];
669
        h = ctx->h[7];
670
671
        for (i = 0; i < 16; i++) {
672
#ifdef B_ENDIAN
673
            T1 = X[i] = W[i];
674
#else
675
            T1 = X[i] = PULL64(W[i]);
676
#endif
677
            T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i];
678
            T2 = Sigma0(a) + Maj(a, b, c);
679
            h = g;
680
            g = f;
681
            f = e;
682
            e = d + T1;
683
            d = c;
684
            c = b;
685
            b = a;
686
            a = T1 + T2;
687
        }
688
689
        for (; i < 80; i++) {
690
            s0 = X[(i + 1) & 0x0f];
691
            s0 = sigma0(s0);
692
            s1 = X[(i + 14) & 0x0f];
693
            s1 = sigma1(s1);
694
695
            T1 = X[i & 0xf] += s0 + s1 + X[(i + 9) & 0xf];
696
            T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i];
697
            T2 = Sigma0(a) + Maj(a, b, c);
698
            h = g;
699
            g = f;
700
            f = e;
701
            e = d + T1;
702
            d = c;
703
            c = b;
704
            b = a;
705
            a = T1 + T2;
706
        }
707
708
        ctx->h[0] += a;
709
        ctx->h[1] += b;
710
        ctx->h[2] += c;
711
        ctx->h[3] += d;
712
        ctx->h[4] += e;
713
        ctx->h[5] += f;
714
        ctx->h[6] += g;
715
        ctx->h[7] += h;
716
717
        W += SHA_LBLOCK;
718
    }
719
}
720
721
#else
722
#define ROUND_00_15(i, a, b, c, d, e, f, g, h)       \
723
268M
    do {                                             \
724
268M
        T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i]; \
725
268M
        h = Sigma0(a) + Maj(a, b, c);                \
726
268M
        d += T1;                                     \
727
268M
        h += T1;                                     \
728
268M
    } while (0)
729
730
#define ROUND_16_80(i, j, a, b, c, d, e, f, g, h, X)       \
731
214M
    do {                                                   \
732
214M
        s0 = X[(j + 1) & 0x0f];                            \
733
214M
        s0 = sigma0(s0);                                   \
734
214M
        s1 = X[(j + 14) & 0x0f];                           \
735
214M
        s1 = sigma1(s1);                                   \
736
214M
        T1 = X[(j) & 0x0f] += s0 + s1 + X[(j + 9) & 0x0f]; \
737
214M
        ROUND_00_15(i + j, a, b, c, d, e, f, g, h);        \
738
214M
    } while (0)
739
740
#ifdef INCLUDE_C_SHA512
741
void sha512_block_data_order_c(SHA512_CTX *ctx, const void *in, size_t num)
742
#else
743
static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
744
    size_t num)
745
#endif
746
10.6k
{
747
10.6k
    const SHA_LONG64 *W = in;
748
10.6k
    SHA_LONG64 a, b, c, d, e, f, g, h, s0, s1, T1;
749
10.6k
    SHA_LONG64 X[16];
750
10.6k
    int i;
751
752
3.36M
    while (num--) {
753
754
3.35M
        a = ctx->h[0];
755
3.35M
        b = ctx->h[1];
756
3.35M
        c = ctx->h[2];
757
3.35M
        d = ctx->h[3];
758
3.35M
        e = ctx->h[4];
759
3.35M
        f = ctx->h[5];
760
3.35M
        g = ctx->h[6];
761
3.35M
        h = ctx->h[7];
762
763
#ifdef B_ENDIAN
764
        T1 = X[0] = W[0];
765
        ROUND_00_15(0, a, b, c, d, e, f, g, h);
766
        T1 = X[1] = W[1];
767
        ROUND_00_15(1, h, a, b, c, d, e, f, g);
768
        T1 = X[2] = W[2];
769
        ROUND_00_15(2, g, h, a, b, c, d, e, f);
770
        T1 = X[3] = W[3];
771
        ROUND_00_15(3, f, g, h, a, b, c, d, e);
772
        T1 = X[4] = W[4];
773
        ROUND_00_15(4, e, f, g, h, a, b, c, d);
774
        T1 = X[5] = W[5];
775
        ROUND_00_15(5, d, e, f, g, h, a, b, c);
776
        T1 = X[6] = W[6];
777
        ROUND_00_15(6, c, d, e, f, g, h, a, b);
778
        T1 = X[7] = W[7];
779
        ROUND_00_15(7, b, c, d, e, f, g, h, a);
780
        T1 = X[8] = W[8];
781
        ROUND_00_15(8, a, b, c, d, e, f, g, h);
782
        T1 = X[9] = W[9];
783
        ROUND_00_15(9, h, a, b, c, d, e, f, g);
784
        T1 = X[10] = W[10];
785
        ROUND_00_15(10, g, h, a, b, c, d, e, f);
786
        T1 = X[11] = W[11];
787
        ROUND_00_15(11, f, g, h, a, b, c, d, e);
788
        T1 = X[12] = W[12];
789
        ROUND_00_15(12, e, f, g, h, a, b, c, d);
790
        T1 = X[13] = W[13];
791
        ROUND_00_15(13, d, e, f, g, h, a, b, c);
792
        T1 = X[14] = W[14];
793
        ROUND_00_15(14, c, d, e, f, g, h, a, b);
794
        T1 = X[15] = W[15];
795
        ROUND_00_15(15, b, c, d, e, f, g, h, a);
796
#else
797
3.35M
        T1 = X[0] = PULL64(W[0]);
798
3.35M
        ROUND_00_15(0, a, b, c, d, e, f, g, h);
799
3.35M
        T1 = X[1] = PULL64(W[1]);
800
3.35M
        ROUND_00_15(1, h, a, b, c, d, e, f, g);
801
3.35M
        T1 = X[2] = PULL64(W[2]);
802
3.35M
        ROUND_00_15(2, g, h, a, b, c, d, e, f);
803
3.35M
        T1 = X[3] = PULL64(W[3]);
804
3.35M
        ROUND_00_15(3, f, g, h, a, b, c, d, e);
805
3.35M
        T1 = X[4] = PULL64(W[4]);
806
3.35M
        ROUND_00_15(4, e, f, g, h, a, b, c, d);
807
3.35M
        T1 = X[5] = PULL64(W[5]);
808
3.35M
        ROUND_00_15(5, d, e, f, g, h, a, b, c);
809
3.35M
        T1 = X[6] = PULL64(W[6]);
810
3.35M
        ROUND_00_15(6, c, d, e, f, g, h, a, b);
811
3.35M
        T1 = X[7] = PULL64(W[7]);
812
3.35M
        ROUND_00_15(7, b, c, d, e, f, g, h, a);
813
3.35M
        T1 = X[8] = PULL64(W[8]);
814
3.35M
        ROUND_00_15(8, a, b, c, d, e, f, g, h);
815
3.35M
        T1 = X[9] = PULL64(W[9]);
816
3.35M
        ROUND_00_15(9, h, a, b, c, d, e, f, g);
817
3.35M
        T1 = X[10] = PULL64(W[10]);
818
3.35M
        ROUND_00_15(10, g, h, a, b, c, d, e, f);
819
3.35M
        T1 = X[11] = PULL64(W[11]);
820
3.35M
        ROUND_00_15(11, f, g, h, a, b, c, d, e);
821
3.35M
        T1 = X[12] = PULL64(W[12]);
822
3.35M
        ROUND_00_15(12, e, f, g, h, a, b, c, d);
823
3.35M
        T1 = X[13] = PULL64(W[13]);
824
3.35M
        ROUND_00_15(13, d, e, f, g, h, a, b, c);
825
3.35M
        T1 = X[14] = PULL64(W[14]);
826
3.35M
        ROUND_00_15(14, c, d, e, f, g, h, a, b);
827
3.35M
        T1 = X[15] = PULL64(W[15]);
828
3.35M
        ROUND_00_15(15, b, c, d, e, f, g, h, a);
829
3.35M
#endif
830
831
16.7M
        for (i = 16; i < 80; i += 16) {
832
13.4M
            ROUND_16_80(i, 0, a, b, c, d, e, f, g, h, X);
833
13.4M
            ROUND_16_80(i, 1, h, a, b, c, d, e, f, g, X);
834
13.4M
            ROUND_16_80(i, 2, g, h, a, b, c, d, e, f, X);
835
13.4M
            ROUND_16_80(i, 3, f, g, h, a, b, c, d, e, X);
836
13.4M
            ROUND_16_80(i, 4, e, f, g, h, a, b, c, d, X);
837
13.4M
            ROUND_16_80(i, 5, d, e, f, g, h, a, b, c, X);
838
13.4M
            ROUND_16_80(i, 6, c, d, e, f, g, h, a, b, X);
839
13.4M
            ROUND_16_80(i, 7, b, c, d, e, f, g, h, a, X);
840
13.4M
            ROUND_16_80(i, 8, a, b, c, d, e, f, g, h, X);
841
13.4M
            ROUND_16_80(i, 9, h, a, b, c, d, e, f, g, X);
842
13.4M
            ROUND_16_80(i, 10, g, h, a, b, c, d, e, f, X);
843
13.4M
            ROUND_16_80(i, 11, f, g, h, a, b, c, d, e, X);
844
13.4M
            ROUND_16_80(i, 12, e, f, g, h, a, b, c, d, X);
845
13.4M
            ROUND_16_80(i, 13, d, e, f, g, h, a, b, c, X);
846
13.4M
            ROUND_16_80(i, 14, c, d, e, f, g, h, a, b, X);
847
13.4M
            ROUND_16_80(i, 15, b, c, d, e, f, g, h, a, X);
848
13.4M
        }
849
850
3.35M
        ctx->h[0] += a;
851
3.35M
        ctx->h[1] += b;
852
3.35M
        ctx->h[2] += c;
853
3.35M
        ctx->h[3] += d;
854
3.35M
        ctx->h[4] += e;
855
3.35M
        ctx->h[5] += f;
856
3.35M
        ctx->h[6] += g;
857
3.35M
        ctx->h[7] += h;
858
859
3.35M
        W += SHA_LBLOCK;
860
3.35M
    }
861
10.6k
}
862
863
#endif
864
865
#endif /* SHA512_ASM */