Coverage Report

Created: 2025-06-13 06:36

/src/openssl/crypto/sha/sha512.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright 2004-2024 The OpenSSL Project Authors. All Rights Reserved.
3
 *
4
 * Licensed under the Apache License 2.0 (the "License").  You may not use
5
 * this file except in compliance with the License.  You can obtain a copy
6
 * in the file LICENSE in the source distribution or at
7
 * https://www.openssl.org/source/license.html
8
 */
9
10
/*
11
 * SHA512 low level APIs are deprecated for public use, but still ok for
12
 * internal use.
13
 */
14
#include "internal/deprecated.h"
15
16
#include <stdio.h>
17
#include <openssl/opensslconf.h>
18
/*-
19
 * IMPLEMENTATION NOTES.
20
 *
21
 * As you might have noticed, 32-bit hash algorithms:
22
 *
23
 * - permit SHA_LONG to be wider than 32-bit
24
 * - optimized versions implement two transform functions: one operating
25
 *   on [aligned] data in host byte order, and one operating on data in input
26
 *   stream byte order;
27
 * - share common byte-order neutral collector and padding function
28
 *   implementations, crypto/md32_common.h;
29
 *
30
 * Neither of the above applies to this SHA-512 implementation. Reasons
31
 * [in reverse order] are:
32
 *
33
 * - it's the only 64-bit hash algorithm for the moment of this writing,
34
 *   there is no need for common collector/padding implementation [yet];
35
 * - by supporting only one transform function [which operates on
36
 *   *aligned* data in input stream byte order, big-endian in this case]
37
 *   we minimize burden of maintenance in two ways: a) collector/padding
38
 *   function is simpler; b) only one transform function to stare at;
39
 * - SHA_LONG64 is required to be exactly 64-bit in order to be able to
40
 *   apply a number of optimizations to mitigate potential performance
41
 *   penalties caused by previous design decision;
42
 *
43
 * Caveat lector.
44
 *
45
 * Implementation relies on the fact that "long long" is 64-bit on
46
 * both 32- and 64-bit platforms. If some compiler vendor comes up
47
 * with 128-bit long long, adjustment to sha.h would be required.
48
 * As this implementation relies on 64-bit integer type, it's totally
49
 * inappropriate for platforms which don't support it, most notably
50
 * 16-bit platforms.
51
 */
52
#include <stdlib.h>
53
#include <string.h>
54
55
#include <openssl/crypto.h>
56
#include <openssl/sha.h>
57
#include <openssl/opensslv.h>
58
59
#include "internal/cryptlib.h"
60
#include "crypto/sha.h"
61
62
#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
63
    defined(__x86_64) || defined(_M_AMD64) || defined(_M_X64) || \
64
    defined(__s390__) || defined(__s390x__) || \
65
    defined(__aarch64__) || \
66
    defined(SHA512_ASM)
67
# define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
68
#endif
69
70
#if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32__)
71
# define U64(C)     C##UI64
72
#elif defined(__arch64__)
73
# define U64(C)     C##UL
74
#else
75
74.0k
# define U64(C)     C##ULL
76
#endif
77
78
int sha512_224_init(SHA512_CTX *c)
79
2
{
80
2
    c->h[0] = U64(0x8c3d37c819544da2);
81
2
    c->h[1] = U64(0x73e1996689dcd4d6);
82
2
    c->h[2] = U64(0x1dfab7ae32ff9c82);
83
2
    c->h[3] = U64(0x679dd514582f9fcf);
84
2
    c->h[4] = U64(0x0f6d2b697bd44da8);
85
2
    c->h[5] = U64(0x77e36f7304c48942);
86
2
    c->h[6] = U64(0x3f9d85a86a1d36c8);
87
2
    c->h[7] = U64(0x1112e6ad91d692a1);
88
89
2
    c->Nl = 0;
90
2
    c->Nh = 0;
91
2
    c->num = 0;
92
2
    c->md_len = SHA224_DIGEST_LENGTH;
93
2
    return 1;
94
2
}
95
96
int sha512_256_init(SHA512_CTX *c)
97
2
{
98
2
    c->h[0] = U64(0x22312194fc2bf72c);
99
2
    c->h[1] = U64(0x9f555fa3c84c64c2);
100
2
    c->h[2] = U64(0x2393b86b6f53b151);
101
2
    c->h[3] = U64(0x963877195940eabd);
102
2
    c->h[4] = U64(0x96283ee2a88effe3);
103
2
    c->h[5] = U64(0xbe5e1e2553863992);
104
2
    c->h[6] = U64(0x2b0199fc2c85b8aa);
105
2
    c->h[7] = U64(0x0eb72ddc81c52ca2);
106
107
2
    c->Nl = 0;
108
2
    c->Nh = 0;
109
2
    c->num = 0;
110
2
    c->md_len = SHA256_DIGEST_LENGTH;
111
2
    return 1;
112
2
}
113
114
int SHA384_Init(SHA512_CTX *c)
115
22
{
116
22
    c->h[0] = U64(0xcbbb9d5dc1059ed8);
117
22
    c->h[1] = U64(0x629a292a367cd507);
118
22
    c->h[2] = U64(0x9159015a3070dd17);
119
22
    c->h[3] = U64(0x152fecd8f70e5939);
120
22
    c->h[4] = U64(0x67332667ffc00b31);
121
22
    c->h[5] = U64(0x8eb44a8768581511);
122
22
    c->h[6] = U64(0xdb0c2e0d64f98fa7);
123
22
    c->h[7] = U64(0x47b5481dbefa4fa4);
124
125
22
    c->Nl = 0;
126
22
    c->Nh = 0;
127
22
    c->num = 0;
128
22
    c->md_len = SHA384_DIGEST_LENGTH;
129
22
    return 1;
130
22
}
131
132
int SHA512_Init(SHA512_CTX *c)
133
8.19k
{
134
8.19k
    c->h[0] = U64(0x6a09e667f3bcc908);
135
8.19k
    c->h[1] = U64(0xbb67ae8584caa73b);
136
8.19k
    c->h[2] = U64(0x3c6ef372fe94f82b);
137
8.19k
    c->h[3] = U64(0xa54ff53a5f1d36f1);
138
8.19k
    c->h[4] = U64(0x510e527fade682d1);
139
8.19k
    c->h[5] = U64(0x9b05688c2b3e6c1f);
140
8.19k
    c->h[6] = U64(0x1f83d9abfb41bd6b);
141
8.19k
    c->h[7] = U64(0x5be0cd19137e2179);
142
143
8.19k
    c->Nl = 0;
144
8.19k
    c->Nh = 0;
145
8.19k
    c->num = 0;
146
8.19k
    c->md_len = SHA512_DIGEST_LENGTH;
147
8.19k
    return 1;
148
8.19k
}
149
150
#ifndef SHA512_ASM
151
static
152
#else
153
# ifdef INCLUDE_C_SHA512
154
void sha512_block_data_order_c(SHA512_CTX *ctx, const void *in, size_t num);
155
# endif
156
#endif
157
void sha512_block_data_order(SHA512_CTX *ctx, const void *in, size_t num);
158
159
int SHA512_Final(unsigned char *md, SHA512_CTX *c)
160
4.11k
{
161
4.11k
    unsigned char *p = (unsigned char *)c->u.p;
162
4.11k
    size_t n = c->num;
163
164
4.11k
    p[n] = 0x80;                /* There always is a room for one */
165
4.11k
    n++;
166
4.11k
    if (n > (sizeof(c->u) - 16)) {
167
49
        memset(p + n, 0, sizeof(c->u) - n);
168
49
        n = 0;
169
49
        sha512_block_data_order(c, p, 1);
170
49
    }
171
172
4.11k
    memset(p + n, 0, sizeof(c->u) - 16 - n);
173
#ifdef  B_ENDIAN
174
    c->u.d[SHA_LBLOCK - 2] = c->Nh;
175
    c->u.d[SHA_LBLOCK - 1] = c->Nl;
176
#else
177
4.11k
    p[sizeof(c->u) - 1] = (unsigned char)(c->Nl);
178
4.11k
    p[sizeof(c->u) - 2] = (unsigned char)(c->Nl >> 8);
179
4.11k
    p[sizeof(c->u) - 3] = (unsigned char)(c->Nl >> 16);
180
4.11k
    p[sizeof(c->u) - 4] = (unsigned char)(c->Nl >> 24);
181
4.11k
    p[sizeof(c->u) - 5] = (unsigned char)(c->Nl >> 32);
182
4.11k
    p[sizeof(c->u) - 6] = (unsigned char)(c->Nl >> 40);
183
4.11k
    p[sizeof(c->u) - 7] = (unsigned char)(c->Nl >> 48);
184
4.11k
    p[sizeof(c->u) - 8] = (unsigned char)(c->Nl >> 56);
185
4.11k
    p[sizeof(c->u) - 9] = (unsigned char)(c->Nh);
186
4.11k
    p[sizeof(c->u) - 10] = (unsigned char)(c->Nh >> 8);
187
4.11k
    p[sizeof(c->u) - 11] = (unsigned char)(c->Nh >> 16);
188
4.11k
    p[sizeof(c->u) - 12] = (unsigned char)(c->Nh >> 24);
189
4.11k
    p[sizeof(c->u) - 13] = (unsigned char)(c->Nh >> 32);
190
4.11k
    p[sizeof(c->u) - 14] = (unsigned char)(c->Nh >> 40);
191
4.11k
    p[sizeof(c->u) - 15] = (unsigned char)(c->Nh >> 48);
192
4.11k
    p[sizeof(c->u) - 16] = (unsigned char)(c->Nh >> 56);
193
4.11k
#endif
194
195
4.11k
    sha512_block_data_order(c, p, 1);
196
197
4.11k
    if (md == 0)
198
0
        return 0;
199
200
4.11k
    switch (c->md_len) {
201
    /* Let compiler decide if it's appropriate to unroll... */
202
1
    case SHA224_DIGEST_LENGTH:
203
4
        for (n = 0; n < SHA224_DIGEST_LENGTH / 8; n++) {
204
3
            SHA_LONG64 t = c->h[n];
205
206
3
            *(md++) = (unsigned char)(t >> 56);
207
3
            *(md++) = (unsigned char)(t >> 48);
208
3
            *(md++) = (unsigned char)(t >> 40);
209
3
            *(md++) = (unsigned char)(t >> 32);
210
3
            *(md++) = (unsigned char)(t >> 24);
211
3
            *(md++) = (unsigned char)(t >> 16);
212
3
            *(md++) = (unsigned char)(t >> 8);
213
3
            *(md++) = (unsigned char)(t);
214
3
        }
215
        /*
216
         * For 224 bits, there are four bytes left over that have to be
217
         * processed separately.
218
         */
219
1
        {
220
1
            SHA_LONG64 t = c->h[SHA224_DIGEST_LENGTH / 8];
221
222
1
            *(md++) = (unsigned char)(t >> 56);
223
1
            *(md++) = (unsigned char)(t >> 48);
224
1
            *(md++) = (unsigned char)(t >> 40);
225
1
            *(md++) = (unsigned char)(t >> 32);
226
1
        }
227
1
        break;
228
1
    case SHA256_DIGEST_LENGTH:
229
5
        for (n = 0; n < SHA256_DIGEST_LENGTH / 8; n++) {
230
4
            SHA_LONG64 t = c->h[n];
231
232
4
            *(md++) = (unsigned char)(t >> 56);
233
4
            *(md++) = (unsigned char)(t >> 48);
234
4
            *(md++) = (unsigned char)(t >> 40);
235
4
            *(md++) = (unsigned char)(t >> 32);
236
4
            *(md++) = (unsigned char)(t >> 24);
237
4
            *(md++) = (unsigned char)(t >> 16);
238
4
            *(md++) = (unsigned char)(t >> 8);
239
4
            *(md++) = (unsigned char)(t);
240
4
        }
241
1
        break;
242
11
    case SHA384_DIGEST_LENGTH:
243
77
        for (n = 0; n < SHA384_DIGEST_LENGTH / 8; n++) {
244
66
            SHA_LONG64 t = c->h[n];
245
246
66
            *(md++) = (unsigned char)(t >> 56);
247
66
            *(md++) = (unsigned char)(t >> 48);
248
66
            *(md++) = (unsigned char)(t >> 40);
249
66
            *(md++) = (unsigned char)(t >> 32);
250
66
            *(md++) = (unsigned char)(t >> 24);
251
66
            *(md++) = (unsigned char)(t >> 16);
252
66
            *(md++) = (unsigned char)(t >> 8);
253
66
            *(md++) = (unsigned char)(t);
254
66
        }
255
11
        break;
256
4.09k
    case SHA512_DIGEST_LENGTH:
257
36.8k
        for (n = 0; n < SHA512_DIGEST_LENGTH / 8; n++) {
258
32.7k
            SHA_LONG64 t = c->h[n];
259
260
32.7k
            *(md++) = (unsigned char)(t >> 56);
261
32.7k
            *(md++) = (unsigned char)(t >> 48);
262
32.7k
            *(md++) = (unsigned char)(t >> 40);
263
32.7k
            *(md++) = (unsigned char)(t >> 32);
264
32.7k
            *(md++) = (unsigned char)(t >> 24);
265
32.7k
            *(md++) = (unsigned char)(t >> 16);
266
32.7k
            *(md++) = (unsigned char)(t >> 8);
267
32.7k
            *(md++) = (unsigned char)(t);
268
32.7k
        }
269
4.09k
        break;
270
    /* ... as well as make sure md_len is not abused. */
271
0
    default:
272
0
        return 0;
273
4.11k
    }
274
275
4.11k
    return 1;
276
4.11k
}
277
278
int SHA384_Final(unsigned char *md, SHA512_CTX *c)
279
11
{
280
11
    return SHA512_Final(md, c);
281
11
}
282
283
int SHA512_Update(SHA512_CTX *c, const void *_data, size_t len)
284
8.22k
{
285
8.22k
    SHA_LONG64 l;
286
8.22k
    unsigned char *p = c->u.p;
287
8.22k
    const unsigned char *data = (const unsigned char *)_data;
288
289
8.22k
    if (len == 0)
290
0
        return 1;
291
292
8.22k
    l = (c->Nl + (((SHA_LONG64) len) << 3)) & U64(0xffffffffffffffff);
293
8.22k
    if (l < c->Nl)
294
0
        c->Nh++;
295
8.22k
    if (sizeof(len) >= 8)
296
8.22k
        c->Nh += (((SHA_LONG64) len) >> 61);
297
8.22k
    c->Nl = l;
298
299
8.22k
    if (c->num != 0) {
300
0
        size_t n = sizeof(c->u) - c->num;
301
302
0
        if (len < n) {
303
0
            memcpy(p + c->num, data, len), c->num += (unsigned int)len;
304
0
            return 1;
305
0
        } else {
306
0
            memcpy(p + c->num, data, n), c->num = 0;
307
0
            len -= n, data += n;
308
0
            sha512_block_data_order(c, p, 1);
309
0
        }
310
0
    }
311
312
8.22k
    if (len >= sizeof(c->u)) {
313
#ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
314
        if ((size_t)data % sizeof(c->u.d[0]) != 0)
315
            while (len >= sizeof(c->u))
316
                memcpy(p, data, sizeof(c->u)),
317
                sha512_block_data_order(c, p, 1),
318
                len -= sizeof(c->u), data += sizeof(c->u);
319
        else
320
#endif
321
8.22k
            sha512_block_data_order(c, data, len / sizeof(c->u)),
322
8.22k
            data += len, len %= sizeof(c->u), data -= len;
323
8.22k
    }
324
325
8.22k
    if (len != 0)
326
92
        memcpy(p, data, len), c->num = (int)len;
327
328
8.22k
    return 1;
329
8.22k
}
330
331
int SHA384_Update(SHA512_CTX *c, const void *data, size_t len)
332
22
{
333
22
    return SHA512_Update(c, data, len);
334
22
}
335
336
void SHA512_Transform(SHA512_CTX *c, const unsigned char *data)
337
0
{
338
#ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
339
    if ((size_t)data % sizeof(c->u.d[0]) != 0)
340
        memcpy(c->u.p, data, sizeof(c->u.p)), data = c->u.p;
341
#endif
342
0
    sha512_block_data_order(c, data, 1);
343
0
}
344
345
#if !defined(SHA512_ASM) || defined(INCLUDE_C_SHA512)
346
static const SHA_LONG64 K512[80] = {
347
    U64(0x428a2f98d728ae22), U64(0x7137449123ef65cd),
348
    U64(0xb5c0fbcfec4d3b2f), U64(0xe9b5dba58189dbbc),
349
    U64(0x3956c25bf348b538), U64(0x59f111f1b605d019),
350
    U64(0x923f82a4af194f9b), U64(0xab1c5ed5da6d8118),
351
    U64(0xd807aa98a3030242), U64(0x12835b0145706fbe),
352
    U64(0x243185be4ee4b28c), U64(0x550c7dc3d5ffb4e2),
353
    U64(0x72be5d74f27b896f), U64(0x80deb1fe3b1696b1),
354
    U64(0x9bdc06a725c71235), U64(0xc19bf174cf692694),
355
    U64(0xe49b69c19ef14ad2), U64(0xefbe4786384f25e3),
356
    U64(0x0fc19dc68b8cd5b5), U64(0x240ca1cc77ac9c65),
357
    U64(0x2de92c6f592b0275), U64(0x4a7484aa6ea6e483),
358
    U64(0x5cb0a9dcbd41fbd4), U64(0x76f988da831153b5),
359
    U64(0x983e5152ee66dfab), U64(0xa831c66d2db43210),
360
    U64(0xb00327c898fb213f), U64(0xbf597fc7beef0ee4),
361
    U64(0xc6e00bf33da88fc2), U64(0xd5a79147930aa725),
362
    U64(0x06ca6351e003826f), U64(0x142929670a0e6e70),
363
    U64(0x27b70a8546d22ffc), U64(0x2e1b21385c26c926),
364
    U64(0x4d2c6dfc5ac42aed), U64(0x53380d139d95b3df),
365
    U64(0x650a73548baf63de), U64(0x766a0abb3c77b2a8),
366
    U64(0x81c2c92e47edaee6), U64(0x92722c851482353b),
367
    U64(0xa2bfe8a14cf10364), U64(0xa81a664bbc423001),
368
    U64(0xc24b8b70d0f89791), U64(0xc76c51a30654be30),
369
    U64(0xd192e819d6ef5218), U64(0xd69906245565a910),
370
    U64(0xf40e35855771202a), U64(0x106aa07032bbd1b8),
371
    U64(0x19a4c116b8d2d0c8), U64(0x1e376c085141ab53),
372
    U64(0x2748774cdf8eeb99), U64(0x34b0bcb5e19b48a8),
373
    U64(0x391c0cb3c5c95a63), U64(0x4ed8aa4ae3418acb),
374
    U64(0x5b9cca4f7763e373), U64(0x682e6ff3d6b2b8a3),
375
    U64(0x748f82ee5defb2fc), U64(0x78a5636f43172f60),
376
    U64(0x84c87814a1f0ab72), U64(0x8cc702081a6439ec),
377
    U64(0x90befffa23631e28), U64(0xa4506cebde82bde9),
378
    U64(0xbef9a3f7b2c67915), U64(0xc67178f2e372532b),
379
    U64(0xca273eceea26619c), U64(0xd186b8c721c0c207),
380
    U64(0xeada7dd6cde0eb1e), U64(0xf57d4f7fee6ed178),
381
    U64(0x06f067aa72176fba), U64(0x0a637dc5a2c898a6),
382
    U64(0x113f9804bef90dae), U64(0x1b710b35131c471b),
383
    U64(0x28db77f523047d84), U64(0x32caab7b40c72493),
384
    U64(0x3c9ebe0a15c9bebc), U64(0x431d67c49c100d4c),
385
    U64(0x4cc5d4becb3e42b6), U64(0x597f299cfc657e2a),
386
    U64(0x5fcb6fab3ad6faec), U64(0x6c44198c4a475817)
387
};
388
389
# ifndef PEDANTIC
390
#  if defined(__GNUC__) && __GNUC__>=2 && \
391
      !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
392
#   if defined(__x86_64) || defined(__x86_64__)
393
#    define ROTR(a,n)    ({ SHA_LONG64 ret;             \
394
                                asm ("rorq %1,%0"       \
395
                                : "=r"(ret)             \
396
                                : "J"(n),"0"(a)         \
397
                                : "cc"); ret;           })
398
#    if !defined(B_ENDIAN)
399
#     define PULL64(x) ({ SHA_LONG64 ret=*((const SHA_LONG64 *)(&(x)));  \
400
                                asm ("bswapq    %0"             \
401
                                : "=r"(ret)                     \
402
                                : "0"(ret)); ret;               })
403
#    endif
404
#   elif (defined(__i386) || defined(__i386__)) && !defined(B_ENDIAN)
405
#    if defined(I386_ONLY)
406
#     define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
407
                          unsigned int hi=p[0],lo=p[1];          \
408
                                asm("xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
409
                                    "roll $16,%%eax; roll $16,%%edx; "\
410
                                    "xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
411
                                : "=a"(lo),"=d"(hi)             \
412
                                : "0"(lo),"1"(hi) : "cc");      \
413
                                ((SHA_LONG64)hi)<<32|lo;        })
414
#    else
415
#     define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
416
                          unsigned int hi=p[0],lo=p[1];         \
417
                                asm ("bswapl %0; bswapl %1;"    \
418
                                : "=r"(lo),"=r"(hi)             \
419
                                : "0"(lo),"1"(hi));             \
420
                                ((SHA_LONG64)hi)<<32|lo;        })
421
#    endif
422
#   elif (defined(_ARCH_PPC) && defined(__64BIT__)) || defined(_ARCH_PPC64)
423
#    define ROTR(a,n)    ({ SHA_LONG64 ret;             \
424
                                asm ("rotrdi %0,%1,%2"  \
425
                                : "=r"(ret)             \
426
                                : "r"(a),"K"(n)); ret;  })
427
#   elif defined(__aarch64__)
428
#    define ROTR(a,n)    ({ SHA_LONG64 ret;             \
429
                                asm ("ror %0,%1,%2"     \
430
                                : "=r"(ret)             \
431
                                : "r"(a),"I"(n)); ret;  })
432
#    if  defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && \
433
        __BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__
434
#     define PULL64(x)   ({ SHA_LONG64 ret;                     \
435
                                asm ("rev       %0,%1"          \
436
                                : "=r"(ret)                     \
437
                                : "r"(*((const SHA_LONG64 *)(&(x))))); ret; })
438
#    endif
439
#   elif (defined(__riscv_zbkb) || defined(__riscv_zbb)) && __riscv_xlen == 32
440
#    define PULL64(x) ({ SHA_LONG64 ret;                                        \
441
                        unsigned int *r = (unsigned int *)(&(ret));             \
442
                        const unsigned int *p = (const unsigned int *)(&(x));   \
443
                        asm ("rev8 %0, %1"                                      \
444
                        : "=r"(r[0])                                            \
445
                        : "r" (p[1]));                                          \
446
                        asm ("rev8 %0, %1"                                      \
447
                        : "=r"(r[1])                                            \
448
                        : "r" (p[0])); ret;                                     })
449
#   elif (defined(__riscv_zbkb) || defined(__riscv_zbb)) && __riscv_xlen == 64
450
#    define PULL64(x) ({ SHA_LONG64 ret;    \
451
                        asm ("rev8 %0, %1"  \
452
                        : "=r"(ret)         \
453
                        : "r"(x)); ret;     })
454
#   endif
455
#   if defined(__riscv_zknh) && __riscv_xlen == 32
456
#    define Sigma0(x) ({ SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret));    \
457
                        const unsigned int *p = (const unsigned int *)(&(x));           \
458
                        asm ("sha512sum0r %0, %1, %2"                                   \
459
                        : "=r"(r[0])                                                    \
460
                        : "r" (p[0]), "r" (p[1]));                                      \
461
                        asm ("sha512sum0r %0, %2, %1"                                   \
462
                        : "=r"(r[1])                                                    \
463
                        : "r" (p[0]), "r" (p[1])); ret;                                 })
464
#    define Sigma1(x) ({ SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret));    \
465
                        const unsigned int *p = (const unsigned int *)(&(x));           \
466
                        asm ("sha512sum1r %0, %1, %2"                                   \
467
                        : "=r"(r[0])                                                    \
468
                        : "r" (p[0]), "r" (p[1]));                                      \
469
                        asm ("sha512sum1r %0, %2, %1"                                   \
470
                        : "=r"(r[1])                                                    \
471
                        : "r" (p[0]), "r" (p[1])); ret;                                 })
472
#    define sigma0(x) ({ SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret));    \
473
                        const unsigned int *p = (const unsigned int *)(&(x));           \
474
                        asm ("sha512sig0l %0, %1, %2"                                   \
475
                        : "=r"(r[0])                                                    \
476
                        : "r" (p[0]), "r" (p[1]));                                      \
477
                        asm ("sha512sig0h %0, %2, %1"                                   \
478
                        : "=r"(r[1])                                                    \
479
                        : "r" (p[0]), "r" (p[1])); ret;                                 })
480
#    define sigma1(x) ({ SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret));    \
481
                        const unsigned int *p = (const unsigned int *)(&(x));           \
482
                        asm ("sha512sig1l %0, %1, %2"                                   \
483
                        : "=r"(r[0])                                                    \
484
                        : "r" (p[0]), "r" (p[1]));                                      \
485
                        asm ("sha512sig1h %0, %2, %1"                                   \
486
                        : "=r"(r[1])                                                    \
487
                        : "r" (p[0]), "r" (p[1])); ret;                                 })
488
#   elif defined(__riscv_zknh) && __riscv_xlen == 64
489
#    define Sigma0(x) ({ SHA_LONG64 ret;            \
490
                        asm ("sha512sum0 %0, %1"    \
491
                        : "=r"(ret)                 \
492
                        : "r"(x)); ret;             })
493
#    define Sigma1(x) ({ SHA_LONG64 ret;            \
494
                        asm ("sha512sum1 %0, %1"    \
495
                        : "=r"(ret)                 \
496
                        : "r"(x)); ret;             })
497
#    define sigma0(x) ({ SHA_LONG64 ret;            \
498
                        asm ("sha512sig0 %0, %1"    \
499
                        : "=r"(ret)                 \
500
                        : "r"(x)); ret;             })
501
#    define sigma1(x) ({ SHA_LONG64 ret;            \
502
                        asm ("sha512sig1 %0, %1"    \
503
                        : "=r"(ret)                 \
504
                        : "r"(x)); ret;             })
505
#   endif
506
#   if (defined(__riscv_zbt) || defined(__riscv_zpn)) && __riscv_xlen == 32
507
#    define Ch(x,y,z) ({  SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret));   \
508
                        const unsigned int *xp = (const unsigned int *)(&(x));          \
509
                        const unsigned int *yp = (const unsigned int *)(&(y));          \
510
                        const unsigned int *zp = (const unsigned int *)(&(z));          \
511
                        asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3\n\t"                \
512
                        : "=r"(r[0])                                                    \
513
                        : "r"(xp[0]), "r"(yp[0]), "r"(zp[0]));                          \
514
                        asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3\n\t"                \
515
                        : "=r"(r[1])                                                    \
516
                        : "r"(xp[1]), "r"(yp[1]), "r"(zp[1])); ret;                     })
517
#    define Maj(x,y,z) ({ SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret));   \
518
                        const unsigned int *xp = (const unsigned int *)(&(x));          \
519
                        const unsigned int *yp = (const unsigned int *)(&(y));          \
520
                        const unsigned int *zp = (const unsigned int *)(&(z));          \
521
                        asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3\n\t"                \
522
                        : "=r"(r[0])                                                    \
523
                        : "r"(xp[0]^zp[0]), "r"(yp[0]), "r"(zp[0]));                    \
524
                        asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3\n\t"                \
525
                        : "=r"(r[1])                                                    \
526
                        : "r"(xp[1]^zp[1]), "r"(yp[1]), "r"(zp[1])); ret;               })
527
#   elif (defined(__riscv_zbt) || defined(__riscv_zpn)) && __riscv_xlen == 64
528
#    define Ch(x,y,z) ({  SHA_LONG64 ret;                           \
529
                        asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3"\
530
                        : "=r"(ret)                                 \
531
                        : "r"(x), "r"(y), "r"(z)); ret;             })
532
#    define Maj(x,y,z) ({ SHA_LONG64 ret;                           \
533
                        asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3"\
534
                        : "=r"(ret)                                 \
535
                        : "r"(x^z), "r"(y), "r"(x)); ret;           })
536
#   endif
537
#  elif defined(_MSC_VER)
538
#   if defined(_WIN64)         /* applies to both IA-64 and AMD64 */
539
#    pragma intrinsic(_rotr64)
540
#    define ROTR(a,n)    _rotr64((a),n)
541
#   endif
542
#   if defined(_M_IX86) && !defined(OPENSSL_NO_ASM) && \
543
       !defined(OPENSSL_NO_INLINE_ASM)
544
#    if defined(I386_ONLY)
545
static SHA_LONG64 __fastcall __pull64be(const void *x)
546
{
547
    _asm mov  edx,[ecx + 0]
548
    _asm mov  eax,[ecx + 4]
549
    _asm xchg dh, dl
550
    _asm xchg ah, al
551
    _asm rol  edx, 16
552
    _asm rol  eax, 16
553
    _asm xchg dh, dl
554
    _asm xchg ah, al
555
}
556
#    else
557
static SHA_LONG64 __fastcall __pull64be(const void *x)
558
{
559
    _asm mov   edx,[ecx + 0]
560
    _asm mov   eax,[ecx + 4]
561
    _asm bswap edx
562
    _asm bswap eax
563
}
564
#    endif
565
#    define PULL64(x) __pull64be(&(x))
566
#   endif
567
#  endif
568
# endif
569
# ifndef PULL64
570
492M
#  define B(x,j)    (((SHA_LONG64)(*(((const unsigned char *)(&x))+j)))<<((7-j)*8))
571
61.5M
#  define PULL64(x) (B(x,0)|B(x,1)|B(x,2)|B(x,3)|B(x,4)|B(x,5)|B(x,6)|B(x,7))
572
# endif
573
# ifndef ROTR
574
2.83G
#  define ROTR(x,s)       (((x)>>s) | (x)<<(64-s))
575
# endif
576
# ifndef Sigma0
577
307M
#  define Sigma0(x)       (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
578
# endif
579
# ifndef Sigma1
580
307M
#  define Sigma1(x)       (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
581
# endif
582
# ifndef sigma0
583
246M
#  define sigma0(x)       (ROTR((x),1)  ^ ROTR((x),8)  ^ ((x)>>7))
584
# endif
585
# ifndef sigma1
586
246M
#  define sigma1(x)       (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
587
# endif
588
# ifndef Ch
589
307M
#  define Ch(x,y,z)       (((x) & (y)) ^ ((~(x)) & (z)))
590
# endif
591
# ifndef Maj
592
307M
#  define Maj(x,y,z)      (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
593
# endif
594
595
# if defined(__i386) || defined(__i386__) || defined(_M_IX86)
596
/*
597
 * This code should give better results on 32-bit CPU with less than
598
 * ~24 registers, both size and performance wise...
599
 */
600
601
static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
602
                                    size_t num)
603
{
604
    const SHA_LONG64 *W = in;
605
    SHA_LONG64 A, E, T;
606
    SHA_LONG64 X[9 + 80], *F;
607
    int i;
608
609
    while (num--) {
610
611
        F = X + 80;
612
        A = ctx->h[0];
613
        F[1] = ctx->h[1];
614
        F[2] = ctx->h[2];
615
        F[3] = ctx->h[3];
616
        E = ctx->h[4];
617
        F[5] = ctx->h[5];
618
        F[6] = ctx->h[6];
619
        F[7] = ctx->h[7];
620
621
        for (i = 0; i < 16; i++, F--) {
622
#  ifdef B_ENDIAN
623
            T = W[i];
624
#  else
625
            T = PULL64(W[i]);
626
#  endif
627
            F[0] = A;
628
            F[4] = E;
629
            F[8] = T;
630
            T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
631
            E = F[3] + T;
632
            A = T + Sigma0(A) + Maj(A, F[1], F[2]);
633
        }
634
635
        for (; i < 80; i++, F--) {
636
            T = sigma0(F[8 + 16 - 1]);
637
            T += sigma1(F[8 + 16 - 14]);
638
            T += F[8 + 16] + F[8 + 16 - 9];
639
640
            F[0] = A;
641
            F[4] = E;
642
            F[8] = T;
643
            T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
644
            E = F[3] + T;
645
            A = T + Sigma0(A) + Maj(A, F[1], F[2]);
646
        }
647
648
        ctx->h[0] += A;
649
        ctx->h[1] += F[1];
650
        ctx->h[2] += F[2];
651
        ctx->h[3] += F[3];
652
        ctx->h[4] += E;
653
        ctx->h[5] += F[5];
654
        ctx->h[6] += F[6];
655
        ctx->h[7] += F[7];
656
657
        W += SHA_LBLOCK;
658
    }
659
}
660
661
# elif defined(OPENSSL_SMALL_FOOTPRINT)
662
663
static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
664
                                    size_t num)
665
{
666
    const SHA_LONG64 *W = in;
667
    SHA_LONG64 a, b, c, d, e, f, g, h, s0, s1, T1, T2;
668
    SHA_LONG64 X[16];
669
    int i;
670
671
    while (num--) {
672
673
        a = ctx->h[0];
674
        b = ctx->h[1];
675
        c = ctx->h[2];
676
        d = ctx->h[3];
677
        e = ctx->h[4];
678
        f = ctx->h[5];
679
        g = ctx->h[6];
680
        h = ctx->h[7];
681
682
        for (i = 0; i < 16; i++) {
683
#  ifdef B_ENDIAN
684
            T1 = X[i] = W[i];
685
#  else
686
            T1 = X[i] = PULL64(W[i]);
687
#  endif
688
            T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i];
689
            T2 = Sigma0(a) + Maj(a, b, c);
690
            h = g;
691
            g = f;
692
            f = e;
693
            e = d + T1;
694
            d = c;
695
            c = b;
696
            b = a;
697
            a = T1 + T2;
698
        }
699
700
        for (; i < 80; i++) {
701
            s0 = X[(i + 1) & 0x0f];
702
            s0 = sigma0(s0);
703
            s1 = X[(i + 14) & 0x0f];
704
            s1 = sigma1(s1);
705
706
            T1 = X[i & 0xf] += s0 + s1 + X[(i + 9) & 0xf];
707
            T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i];
708
            T2 = Sigma0(a) + Maj(a, b, c);
709
            h = g;
710
            g = f;
711
            f = e;
712
            e = d + T1;
713
            d = c;
714
            c = b;
715
            b = a;
716
            a = T1 + T2;
717
        }
718
719
        ctx->h[0] += a;
720
        ctx->h[1] += b;
721
        ctx->h[2] += c;
722
        ctx->h[3] += d;
723
        ctx->h[4] += e;
724
        ctx->h[5] += f;
725
        ctx->h[6] += g;
726
        ctx->h[7] += h;
727
728
        W += SHA_LBLOCK;
729
    }
730
}
731
732
# else
733
307M
#  define ROUND_00_15(i,a,b,c,d,e,f,g,h)        do {    \
734
307M
        T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];      \
735
307M
        h = Sigma0(a) + Maj(a,b,c);                     \
736
307M
        d += T1;        h += T1;                        } while (0)
737
738
246M
#  define ROUND_16_80(i,j,a,b,c,d,e,f,g,h,X)    do {    \
739
246M
        s0 = X[(j+1)&0x0f];     s0 = sigma0(s0);        \
740
246M
        s1 = X[(j+14)&0x0f];    s1 = sigma1(s1);        \
741
246M
        T1 = X[(j)&0x0f] += s0 + s1 + X[(j+9)&0x0f];    \
742
246M
        ROUND_00_15(i+j,a,b,c,d,e,f,g,h);               } while (0)
743
744
#ifdef INCLUDE_C_SHA512
745
void sha512_block_data_order_c(SHA512_CTX *ctx, const void *in, size_t num)
746
#else
747
static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
748
                                    size_t num)
749
#endif
750
12.3k
{
751
12.3k
    const SHA_LONG64 *W = in;
752
12.3k
    SHA_LONG64 a, b, c, d, e, f, g, h, s0, s1, T1;
753
12.3k
    SHA_LONG64 X[16];
754
12.3k
    int i;
755
756
3.86M
    while (num--) {
757
758
3.84M
        a = ctx->h[0];
759
3.84M
        b = ctx->h[1];
760
3.84M
        c = ctx->h[2];
761
3.84M
        d = ctx->h[3];
762
3.84M
        e = ctx->h[4];
763
3.84M
        f = ctx->h[5];
764
3.84M
        g = ctx->h[6];
765
3.84M
        h = ctx->h[7];
766
767
#  ifdef B_ENDIAN
768
        T1 = X[0] = W[0];
769
        ROUND_00_15(0, a, b, c, d, e, f, g, h);
770
        T1 = X[1] = W[1];
771
        ROUND_00_15(1, h, a, b, c, d, e, f, g);
772
        T1 = X[2] = W[2];
773
        ROUND_00_15(2, g, h, a, b, c, d, e, f);
774
        T1 = X[3] = W[3];
775
        ROUND_00_15(3, f, g, h, a, b, c, d, e);
776
        T1 = X[4] = W[4];
777
        ROUND_00_15(4, e, f, g, h, a, b, c, d);
778
        T1 = X[5] = W[5];
779
        ROUND_00_15(5, d, e, f, g, h, a, b, c);
780
        T1 = X[6] = W[6];
781
        ROUND_00_15(6, c, d, e, f, g, h, a, b);
782
        T1 = X[7] = W[7];
783
        ROUND_00_15(7, b, c, d, e, f, g, h, a);
784
        T1 = X[8] = W[8];
785
        ROUND_00_15(8, a, b, c, d, e, f, g, h);
786
        T1 = X[9] = W[9];
787
        ROUND_00_15(9, h, a, b, c, d, e, f, g);
788
        T1 = X[10] = W[10];
789
        ROUND_00_15(10, g, h, a, b, c, d, e, f);
790
        T1 = X[11] = W[11];
791
        ROUND_00_15(11, f, g, h, a, b, c, d, e);
792
        T1 = X[12] = W[12];
793
        ROUND_00_15(12, e, f, g, h, a, b, c, d);
794
        T1 = X[13] = W[13];
795
        ROUND_00_15(13, d, e, f, g, h, a, b, c);
796
        T1 = X[14] = W[14];
797
        ROUND_00_15(14, c, d, e, f, g, h, a, b);
798
        T1 = X[15] = W[15];
799
        ROUND_00_15(15, b, c, d, e, f, g, h, a);
800
#  else
801
3.84M
        T1 = X[0] = PULL64(W[0]);
802
3.84M
        ROUND_00_15(0, a, b, c, d, e, f, g, h);
803
3.84M
        T1 = X[1] = PULL64(W[1]);
804
3.84M
        ROUND_00_15(1, h, a, b, c, d, e, f, g);
805
3.84M
        T1 = X[2] = PULL64(W[2]);
806
3.84M
        ROUND_00_15(2, g, h, a, b, c, d, e, f);
807
3.84M
        T1 = X[3] = PULL64(W[3]);
808
3.84M
        ROUND_00_15(3, f, g, h, a, b, c, d, e);
809
3.84M
        T1 = X[4] = PULL64(W[4]);
810
3.84M
        ROUND_00_15(4, e, f, g, h, a, b, c, d);
811
3.84M
        T1 = X[5] = PULL64(W[5]);
812
3.84M
        ROUND_00_15(5, d, e, f, g, h, a, b, c);
813
3.84M
        T1 = X[6] = PULL64(W[6]);
814
3.84M
        ROUND_00_15(6, c, d, e, f, g, h, a, b);
815
3.84M
        T1 = X[7] = PULL64(W[7]);
816
3.84M
        ROUND_00_15(7, b, c, d, e, f, g, h, a);
817
3.84M
        T1 = X[8] = PULL64(W[8]);
818
3.84M
        ROUND_00_15(8, a, b, c, d, e, f, g, h);
819
3.84M
        T1 = X[9] = PULL64(W[9]);
820
3.84M
        ROUND_00_15(9, h, a, b, c, d, e, f, g);
821
3.84M
        T1 = X[10] = PULL64(W[10]);
822
3.84M
        ROUND_00_15(10, g, h, a, b, c, d, e, f);
823
3.84M
        T1 = X[11] = PULL64(W[11]);
824
3.84M
        ROUND_00_15(11, f, g, h, a, b, c, d, e);
825
3.84M
        T1 = X[12] = PULL64(W[12]);
826
3.84M
        ROUND_00_15(12, e, f, g, h, a, b, c, d);
827
3.84M
        T1 = X[13] = PULL64(W[13]);
828
3.84M
        ROUND_00_15(13, d, e, f, g, h, a, b, c);
829
3.84M
        T1 = X[14] = PULL64(W[14]);
830
3.84M
        ROUND_00_15(14, c, d, e, f, g, h, a, b);
831
3.84M
        T1 = X[15] = PULL64(W[15]);
832
3.84M
        ROUND_00_15(15, b, c, d, e, f, g, h, a);
833
3.84M
#  endif
834
835
19.2M
        for (i = 16; i < 80; i += 16) {
836
15.3M
            ROUND_16_80(i, 0, a, b, c, d, e, f, g, h, X);
837
15.3M
            ROUND_16_80(i, 1, h, a, b, c, d, e, f, g, X);
838
15.3M
            ROUND_16_80(i, 2, g, h, a, b, c, d, e, f, X);
839
15.3M
            ROUND_16_80(i, 3, f, g, h, a, b, c, d, e, X);
840
15.3M
            ROUND_16_80(i, 4, e, f, g, h, a, b, c, d, X);
841
15.3M
            ROUND_16_80(i, 5, d, e, f, g, h, a, b, c, X);
842
15.3M
            ROUND_16_80(i, 6, c, d, e, f, g, h, a, b, X);
843
15.3M
            ROUND_16_80(i, 7, b, c, d, e, f, g, h, a, X);
844
15.3M
            ROUND_16_80(i, 8, a, b, c, d, e, f, g, h, X);
845
15.3M
            ROUND_16_80(i, 9, h, a, b, c, d, e, f, g, X);
846
15.3M
            ROUND_16_80(i, 10, g, h, a, b, c, d, e, f, X);
847
15.3M
            ROUND_16_80(i, 11, f, g, h, a, b, c, d, e, X);
848
15.3M
            ROUND_16_80(i, 12, e, f, g, h, a, b, c, d, X);
849
15.3M
            ROUND_16_80(i, 13, d, e, f, g, h, a, b, c, X);
850
15.3M
            ROUND_16_80(i, 14, c, d, e, f, g, h, a, b, X);
851
15.3M
            ROUND_16_80(i, 15, b, c, d, e, f, g, h, a, X);
852
15.3M
        }
853
854
3.84M
        ctx->h[0] += a;
855
3.84M
        ctx->h[1] += b;
856
3.84M
        ctx->h[2] += c;
857
3.84M
        ctx->h[3] += d;
858
3.84M
        ctx->h[4] += e;
859
3.84M
        ctx->h[5] += f;
860
3.84M
        ctx->h[6] += g;
861
3.84M
        ctx->h[7] += h;
862
863
3.84M
        W += SHA_LBLOCK;
864
3.84M
    }
865
12.3k
}
866
867
# endif
868
869
#endif                         /* SHA512_ASM */