Coverage Report

Created: 2025-06-13 06:57

/src/openssl/crypto/sha/sha512.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright 2004-2024 The OpenSSL Project Authors. All Rights Reserved.
3
 *
4
 * Licensed under the Apache License 2.0 (the "License").  You may not use
5
 * this file except in compliance with the License.  You can obtain a copy
6
 * in the file LICENSE in the source distribution or at
7
 * https://www.openssl.org/source/license.html
8
 */
9
10
/*
11
 * SHA512 low level APIs are deprecated for public use, but still ok for
12
 * internal use.
13
 */
14
#include "internal/deprecated.h"
15
16
#include <stdio.h>
17
#include <openssl/opensslconf.h>
18
/*-
19
 * IMPLEMENTATION NOTES.
20
 *
21
 * As you might have noticed, 32-bit hash algorithms:
22
 *
23
 * - permit SHA_LONG to be wider than 32-bit
24
 * - optimized versions implement two transform functions: one operating
25
 *   on [aligned] data in host byte order, and one operating on data in input
26
 *   stream byte order;
27
 * - share common byte-order neutral collector and padding function
28
 *   implementations, crypto/md32_common.h;
29
 *
30
 * Neither of the above applies to this SHA-512 implementation. Reasons
31
 * [in reverse order] are:
32
 *
33
 * - it's the only 64-bit hash algorithm for the moment of this writing,
34
 *   there is no need for common collector/padding implementation [yet];
35
 * - by supporting only one transform function [which operates on
36
 *   *aligned* data in input stream byte order, big-endian in this case]
37
 *   we minimize burden of maintenance in two ways: a) collector/padding
38
 *   function is simpler; b) only one transform function to stare at;
39
 * - SHA_LONG64 is required to be exactly 64-bit in order to be able to
40
 *   apply a number of optimizations to mitigate potential performance
41
 *   penalties caused by previous design decision;
42
 *
43
 * Caveat lector.
44
 *
45
 * Implementation relies on the fact that "long long" is 64-bit on
46
 * both 32- and 64-bit platforms. If some compiler vendor comes up
47
 * with 128-bit long long, adjustment to sha.h would be required.
48
 * As this implementation relies on 64-bit integer type, it's totally
49
 * inappropriate for platforms which don't support it, most notably
50
 * 16-bit platforms.
51
 */
52
#include <stdlib.h>
53
#include <string.h>
54
55
#include <openssl/crypto.h>
56
#include <openssl/sha.h>
57
#include <openssl/opensslv.h>
58
59
#include "internal/cryptlib.h"
60
#include "crypto/sha.h"
61
62
#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
63
    defined(__x86_64) || defined(_M_AMD64) || defined(_M_X64) || \
64
    defined(__s390__) || defined(__s390x__) || \
65
    defined(__aarch64__) || \
66
    defined(SHA512_ASM)
67
# define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
68
#endif
69
70
#if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32__)
71
# define U64(C)     C##UI64
72
#elif defined(__arch64__)
73
# define U64(C)     C##UL
74
#else
75
0
# define U64(C)     C##ULL
76
#endif
77
78
int sha512_224_init(SHA512_CTX *c)
79
0
{
80
0
    c->h[0] = U64(0x8c3d37c819544da2);
81
0
    c->h[1] = U64(0x73e1996689dcd4d6);
82
0
    c->h[2] = U64(0x1dfab7ae32ff9c82);
83
0
    c->h[3] = U64(0x679dd514582f9fcf);
84
0
    c->h[4] = U64(0x0f6d2b697bd44da8);
85
0
    c->h[5] = U64(0x77e36f7304c48942);
86
0
    c->h[6] = U64(0x3f9d85a86a1d36c8);
87
0
    c->h[7] = U64(0x1112e6ad91d692a1);
88
89
0
    c->Nl = 0;
90
0
    c->Nh = 0;
91
0
    c->num = 0;
92
0
    c->md_len = SHA224_DIGEST_LENGTH;
93
0
    return 1;
94
0
}
95
96
int sha512_256_init(SHA512_CTX *c)
97
0
{
98
0
    c->h[0] = U64(0x22312194fc2bf72c);
99
0
    c->h[1] = U64(0x9f555fa3c84c64c2);
100
0
    c->h[2] = U64(0x2393b86b6f53b151);
101
0
    c->h[3] = U64(0x963877195940eabd);
102
0
    c->h[4] = U64(0x96283ee2a88effe3);
103
0
    c->h[5] = U64(0xbe5e1e2553863992);
104
0
    c->h[6] = U64(0x2b0199fc2c85b8aa);
105
0
    c->h[7] = U64(0x0eb72ddc81c52ca2);
106
107
0
    c->Nl = 0;
108
0
    c->Nh = 0;
109
0
    c->num = 0;
110
0
    c->md_len = SHA256_DIGEST_LENGTH;
111
0
    return 1;
112
0
}
113
114
int SHA384_Init(SHA512_CTX *c)
115
0
{
116
0
    c->h[0] = U64(0xcbbb9d5dc1059ed8);
117
0
    c->h[1] = U64(0x629a292a367cd507);
118
0
    c->h[2] = U64(0x9159015a3070dd17);
119
0
    c->h[3] = U64(0x152fecd8f70e5939);
120
0
    c->h[4] = U64(0x67332667ffc00b31);
121
0
    c->h[5] = U64(0x8eb44a8768581511);
122
0
    c->h[6] = U64(0xdb0c2e0d64f98fa7);
123
0
    c->h[7] = U64(0x47b5481dbefa4fa4);
124
125
0
    c->Nl = 0;
126
0
    c->Nh = 0;
127
0
    c->num = 0;
128
0
    c->md_len = SHA384_DIGEST_LENGTH;
129
0
    return 1;
130
0
}
131
132
int SHA512_Init(SHA512_CTX *c)
133
0
{
134
0
    c->h[0] = U64(0x6a09e667f3bcc908);
135
0
    c->h[1] = U64(0xbb67ae8584caa73b);
136
0
    c->h[2] = U64(0x3c6ef372fe94f82b);
137
0
    c->h[3] = U64(0xa54ff53a5f1d36f1);
138
0
    c->h[4] = U64(0x510e527fade682d1);
139
0
    c->h[5] = U64(0x9b05688c2b3e6c1f);
140
0
    c->h[6] = U64(0x1f83d9abfb41bd6b);
141
0
    c->h[7] = U64(0x5be0cd19137e2179);
142
143
0
    c->Nl = 0;
144
0
    c->Nh = 0;
145
0
    c->num = 0;
146
0
    c->md_len = SHA512_DIGEST_LENGTH;
147
0
    return 1;
148
0
}
149
150
#ifndef SHA512_ASM
151
static
152
#else
153
# ifdef INCLUDE_C_SHA512
154
void sha512_block_data_order_c(SHA512_CTX *ctx, const void *in, size_t num);
155
# endif
156
#endif
157
void sha512_block_data_order(SHA512_CTX *ctx, const void *in, size_t num);
158
159
int SHA512_Final(unsigned char *md, SHA512_CTX *c)
160
0
{
161
0
    unsigned char *p = (unsigned char *)c->u.p;
162
0
    size_t n = c->num;
163
164
0
    p[n] = 0x80;                /* There always is a room for one */
165
0
    n++;
166
0
    if (n > (sizeof(c->u) - 16)) {
167
0
        memset(p + n, 0, sizeof(c->u) - n);
168
0
        n = 0;
169
0
        sha512_block_data_order(c, p, 1);
170
0
    }
171
172
0
    memset(p + n, 0, sizeof(c->u) - 16 - n);
173
#ifdef  B_ENDIAN
174
    c->u.d[SHA_LBLOCK - 2] = c->Nh;
175
    c->u.d[SHA_LBLOCK - 1] = c->Nl;
176
#else
177
0
    p[sizeof(c->u) - 1] = (unsigned char)(c->Nl);
178
0
    p[sizeof(c->u) - 2] = (unsigned char)(c->Nl >> 8);
179
0
    p[sizeof(c->u) - 3] = (unsigned char)(c->Nl >> 16);
180
0
    p[sizeof(c->u) - 4] = (unsigned char)(c->Nl >> 24);
181
0
    p[sizeof(c->u) - 5] = (unsigned char)(c->Nl >> 32);
182
0
    p[sizeof(c->u) - 6] = (unsigned char)(c->Nl >> 40);
183
0
    p[sizeof(c->u) - 7] = (unsigned char)(c->Nl >> 48);
184
0
    p[sizeof(c->u) - 8] = (unsigned char)(c->Nl >> 56);
185
0
    p[sizeof(c->u) - 9] = (unsigned char)(c->Nh);
186
0
    p[sizeof(c->u) - 10] = (unsigned char)(c->Nh >> 8);
187
0
    p[sizeof(c->u) - 11] = (unsigned char)(c->Nh >> 16);
188
0
    p[sizeof(c->u) - 12] = (unsigned char)(c->Nh >> 24);
189
0
    p[sizeof(c->u) - 13] = (unsigned char)(c->Nh >> 32);
190
0
    p[sizeof(c->u) - 14] = (unsigned char)(c->Nh >> 40);
191
0
    p[sizeof(c->u) - 15] = (unsigned char)(c->Nh >> 48);
192
0
    p[sizeof(c->u) - 16] = (unsigned char)(c->Nh >> 56);
193
0
#endif
194
195
0
    sha512_block_data_order(c, p, 1);
196
197
0
    if (md == 0)
198
0
        return 0;
199
200
0
    switch (c->md_len) {
201
    /* Let compiler decide if it's appropriate to unroll... */
202
0
    case SHA224_DIGEST_LENGTH:
203
0
        for (n = 0; n < SHA224_DIGEST_LENGTH / 8; n++) {
204
0
            SHA_LONG64 t = c->h[n];
205
206
0
            *(md++) = (unsigned char)(t >> 56);
207
0
            *(md++) = (unsigned char)(t >> 48);
208
0
            *(md++) = (unsigned char)(t >> 40);
209
0
            *(md++) = (unsigned char)(t >> 32);
210
0
            *(md++) = (unsigned char)(t >> 24);
211
0
            *(md++) = (unsigned char)(t >> 16);
212
0
            *(md++) = (unsigned char)(t >> 8);
213
0
            *(md++) = (unsigned char)(t);
214
0
        }
215
        /*
216
         * For 224 bits, there are four bytes left over that have to be
217
         * processed separately.
218
         */
219
0
        {
220
0
            SHA_LONG64 t = c->h[SHA224_DIGEST_LENGTH / 8];
221
222
0
            *(md++) = (unsigned char)(t >> 56);
223
0
            *(md++) = (unsigned char)(t >> 48);
224
0
            *(md++) = (unsigned char)(t >> 40);
225
0
            *(md++) = (unsigned char)(t >> 32);
226
0
        }
227
0
        break;
228
0
    case SHA256_DIGEST_LENGTH:
229
0
        for (n = 0; n < SHA256_DIGEST_LENGTH / 8; n++) {
230
0
            SHA_LONG64 t = c->h[n];
231
232
0
            *(md++) = (unsigned char)(t >> 56);
233
0
            *(md++) = (unsigned char)(t >> 48);
234
0
            *(md++) = (unsigned char)(t >> 40);
235
0
            *(md++) = (unsigned char)(t >> 32);
236
0
            *(md++) = (unsigned char)(t >> 24);
237
0
            *(md++) = (unsigned char)(t >> 16);
238
0
            *(md++) = (unsigned char)(t >> 8);
239
0
            *(md++) = (unsigned char)(t);
240
0
        }
241
0
        break;
242
0
    case SHA384_DIGEST_LENGTH:
243
0
        for (n = 0; n < SHA384_DIGEST_LENGTH / 8; n++) {
244
0
            SHA_LONG64 t = c->h[n];
245
246
0
            *(md++) = (unsigned char)(t >> 56);
247
0
            *(md++) = (unsigned char)(t >> 48);
248
0
            *(md++) = (unsigned char)(t >> 40);
249
0
            *(md++) = (unsigned char)(t >> 32);
250
0
            *(md++) = (unsigned char)(t >> 24);
251
0
            *(md++) = (unsigned char)(t >> 16);
252
0
            *(md++) = (unsigned char)(t >> 8);
253
0
            *(md++) = (unsigned char)(t);
254
0
        }
255
0
        break;
256
0
    case SHA512_DIGEST_LENGTH:
257
0
        for (n = 0; n < SHA512_DIGEST_LENGTH / 8; n++) {
258
0
            SHA_LONG64 t = c->h[n];
259
260
0
            *(md++) = (unsigned char)(t >> 56);
261
0
            *(md++) = (unsigned char)(t >> 48);
262
0
            *(md++) = (unsigned char)(t >> 40);
263
0
            *(md++) = (unsigned char)(t >> 32);
264
0
            *(md++) = (unsigned char)(t >> 24);
265
0
            *(md++) = (unsigned char)(t >> 16);
266
0
            *(md++) = (unsigned char)(t >> 8);
267
0
            *(md++) = (unsigned char)(t);
268
0
        }
269
0
        break;
270
    /* ... as well as make sure md_len is not abused. */
271
0
    default:
272
0
        return 0;
273
0
    }
274
275
0
    return 1;
276
0
}
277
278
int SHA384_Final(unsigned char *md, SHA512_CTX *c)
279
0
{
280
0
    return SHA512_Final(md, c);
281
0
}
282
283
int SHA512_Update(SHA512_CTX *c, const void *_data, size_t len)
284
0
{
285
0
    SHA_LONG64 l;
286
0
    unsigned char *p = c->u.p;
287
0
    const unsigned char *data = (const unsigned char *)_data;
288
289
0
    if (len == 0)
290
0
        return 1;
291
292
0
    l = (c->Nl + (((SHA_LONG64) len) << 3)) & U64(0xffffffffffffffff);
293
0
    if (l < c->Nl)
294
0
        c->Nh++;
295
0
    if (sizeof(len) >= 8)
296
0
        c->Nh += (((SHA_LONG64) len) >> 61);
297
0
    c->Nl = l;
298
299
0
    if (c->num != 0) {
300
0
        size_t n = sizeof(c->u) - c->num;
301
302
0
        if (len < n) {
303
0
            memcpy(p + c->num, data, len), c->num += (unsigned int)len;
304
0
            return 1;
305
0
        } else {
306
0
            memcpy(p + c->num, data, n), c->num = 0;
307
0
            len -= n, data += n;
308
0
            sha512_block_data_order(c, p, 1);
309
0
        }
310
0
    }
311
312
0
    if (len >= sizeof(c->u)) {
313
#ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
314
        if ((size_t)data % sizeof(c->u.d[0]) != 0)
315
            while (len >= sizeof(c->u))
316
                memcpy(p, data, sizeof(c->u)),
317
                sha512_block_data_order(c, p, 1),
318
                len -= sizeof(c->u), data += sizeof(c->u);
319
        else
320
#endif
321
0
            sha512_block_data_order(c, data, len / sizeof(c->u)),
322
0
            data += len, len %= sizeof(c->u), data -= len;
323
0
    }
324
325
0
    if (len != 0)
326
0
        memcpy(p, data, len), c->num = (int)len;
327
328
0
    return 1;
329
0
}
330
331
int SHA384_Update(SHA512_CTX *c, const void *data, size_t len)
332
0
{
333
0
    return SHA512_Update(c, data, len);
334
0
}
335
336
void SHA512_Transform(SHA512_CTX *c, const unsigned char *data)
337
0
{
338
#ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
339
    if ((size_t)data % sizeof(c->u.d[0]) != 0)
340
        memcpy(c->u.p, data, sizeof(c->u.p)), data = c->u.p;
341
#endif
342
0
    sha512_block_data_order(c, data, 1);
343
0
}
344
345
#if !defined(SHA512_ASM) || defined(INCLUDE_C_SHA512)
346
static const SHA_LONG64 K512[80] = {
347
    U64(0x428a2f98d728ae22), U64(0x7137449123ef65cd),
348
    U64(0xb5c0fbcfec4d3b2f), U64(0xe9b5dba58189dbbc),
349
    U64(0x3956c25bf348b538), U64(0x59f111f1b605d019),
350
    U64(0x923f82a4af194f9b), U64(0xab1c5ed5da6d8118),
351
    U64(0xd807aa98a3030242), U64(0x12835b0145706fbe),
352
    U64(0x243185be4ee4b28c), U64(0x550c7dc3d5ffb4e2),
353
    U64(0x72be5d74f27b896f), U64(0x80deb1fe3b1696b1),
354
    U64(0x9bdc06a725c71235), U64(0xc19bf174cf692694),
355
    U64(0xe49b69c19ef14ad2), U64(0xefbe4786384f25e3),
356
    U64(0x0fc19dc68b8cd5b5), U64(0x240ca1cc77ac9c65),
357
    U64(0x2de92c6f592b0275), U64(0x4a7484aa6ea6e483),
358
    U64(0x5cb0a9dcbd41fbd4), U64(0x76f988da831153b5),
359
    U64(0x983e5152ee66dfab), U64(0xa831c66d2db43210),
360
    U64(0xb00327c898fb213f), U64(0xbf597fc7beef0ee4),
361
    U64(0xc6e00bf33da88fc2), U64(0xd5a79147930aa725),
362
    U64(0x06ca6351e003826f), U64(0x142929670a0e6e70),
363
    U64(0x27b70a8546d22ffc), U64(0x2e1b21385c26c926),
364
    U64(0x4d2c6dfc5ac42aed), U64(0x53380d139d95b3df),
365
    U64(0x650a73548baf63de), U64(0x766a0abb3c77b2a8),
366
    U64(0x81c2c92e47edaee6), U64(0x92722c851482353b),
367
    U64(0xa2bfe8a14cf10364), U64(0xa81a664bbc423001),
368
    U64(0xc24b8b70d0f89791), U64(0xc76c51a30654be30),
369
    U64(0xd192e819d6ef5218), U64(0xd69906245565a910),
370
    U64(0xf40e35855771202a), U64(0x106aa07032bbd1b8),
371
    U64(0x19a4c116b8d2d0c8), U64(0x1e376c085141ab53),
372
    U64(0x2748774cdf8eeb99), U64(0x34b0bcb5e19b48a8),
373
    U64(0x391c0cb3c5c95a63), U64(0x4ed8aa4ae3418acb),
374
    U64(0x5b9cca4f7763e373), U64(0x682e6ff3d6b2b8a3),
375
    U64(0x748f82ee5defb2fc), U64(0x78a5636f43172f60),
376
    U64(0x84c87814a1f0ab72), U64(0x8cc702081a6439ec),
377
    U64(0x90befffa23631e28), U64(0xa4506cebde82bde9),
378
    U64(0xbef9a3f7b2c67915), U64(0xc67178f2e372532b),
379
    U64(0xca273eceea26619c), U64(0xd186b8c721c0c207),
380
    U64(0xeada7dd6cde0eb1e), U64(0xf57d4f7fee6ed178),
381
    U64(0x06f067aa72176fba), U64(0x0a637dc5a2c898a6),
382
    U64(0x113f9804bef90dae), U64(0x1b710b35131c471b),
383
    U64(0x28db77f523047d84), U64(0x32caab7b40c72493),
384
    U64(0x3c9ebe0a15c9bebc), U64(0x431d67c49c100d4c),
385
    U64(0x4cc5d4becb3e42b6), U64(0x597f299cfc657e2a),
386
    U64(0x5fcb6fab3ad6faec), U64(0x6c44198c4a475817)
387
};
388
389
# ifndef PEDANTIC
390
#  if defined(__GNUC__) && __GNUC__>=2 && \
391
      !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
392
#   if defined(__x86_64) || defined(__x86_64__)
393
#    define ROTR(a,n)    ({ SHA_LONG64 ret;             \
394
                                asm ("rorq %1,%0"       \
395
                                : "=r"(ret)             \
396
                                : "J"(n),"0"(a)         \
397
                                : "cc"); ret;           })
398
#    if !defined(B_ENDIAN)
399
#     define PULL64(x) ({ SHA_LONG64 ret=*((const SHA_LONG64 *)(&(x)));  \
400
                                asm ("bswapq    %0"             \
401
                                : "=r"(ret)                     \
402
                                : "0"(ret)); ret;               })
403
#    endif
404
#   elif (defined(__i386) || defined(__i386__)) && !defined(B_ENDIAN)
405
#    if defined(I386_ONLY)
406
#     define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
407
                          unsigned int hi=p[0],lo=p[1];          \
408
                                asm("xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
409
                                    "roll $16,%%eax; roll $16,%%edx; "\
410
                                    "xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
411
                                : "=a"(lo),"=d"(hi)             \
412
                                : "0"(lo),"1"(hi) : "cc");      \
413
                                ((SHA_LONG64)hi)<<32|lo;        })
414
#    else
415
#     define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
416
                          unsigned int hi=p[0],lo=p[1];         \
417
                                asm ("bswapl %0; bswapl %1;"    \
418
                                : "=r"(lo),"=r"(hi)             \
419
                                : "0"(lo),"1"(hi));             \
420
                                ((SHA_LONG64)hi)<<32|lo;        })
421
#    endif
422
#   elif (defined(_ARCH_PPC) && defined(__64BIT__)) || defined(_ARCH_PPC64)
423
#    define ROTR(a,n)    ({ SHA_LONG64 ret;             \
424
                                asm ("rotrdi %0,%1,%2"  \
425
                                : "=r"(ret)             \
426
                                : "r"(a),"K"(n)); ret;  })
427
#   elif defined(__aarch64__)
428
#    define ROTR(a,n)    ({ SHA_LONG64 ret;             \
429
                                asm ("ror %0,%1,%2"     \
430
                                : "=r"(ret)             \
431
                                : "r"(a),"I"(n)); ret;  })
432
#    if  defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && \
433
        __BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__
434
#     define PULL64(x)   ({ SHA_LONG64 ret;                     \
435
                                asm ("rev       %0,%1"          \
436
                                : "=r"(ret)                     \
437
                                : "r"(*((const SHA_LONG64 *)(&(x))))); ret; })
438
#    endif
439
#   elif (defined(__riscv_zbkb) || defined(__riscv_zbb)) && __riscv_xlen == 32
440
#    define PULL64(x) ({ SHA_LONG64 ret;                                        \
441
                        unsigned int *r = (unsigned int *)(&(ret));             \
442
                        const unsigned int *p = (const unsigned int *)(&(x));   \
443
                        asm ("rev8 %0, %1"                                      \
444
                        : "=r"(r[0])                                            \
445
                        : "r" (p[1]));                                          \
446
                        asm ("rev8 %0, %1"                                      \
447
                        : "=r"(r[1])                                            \
448
                        : "r" (p[0])); ret;                                     })
449
#   elif (defined(__riscv_zbkb) || defined(__riscv_zbb)) && __riscv_xlen == 64
450
#    define PULL64(x) ({ SHA_LONG64 ret;    \
451
                        asm ("rev8 %0, %1"  \
452
                        : "=r"(ret)         \
453
                        : "r"(x)); ret;     })
454
#   endif
455
#   if defined(__riscv_zknh) && __riscv_xlen == 32
456
#    define Sigma0(x) ({ SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret));    \
457
                        const unsigned int *p = (const unsigned int *)(&(x));           \
458
                        asm ("sha512sum0r %0, %1, %2"                                   \
459
                        : "=r"(r[0])                                                    \
460
                        : "r" (p[0]), "r" (p[1]));                                      \
461
                        asm ("sha512sum0r %0, %2, %1"                                   \
462
                        : "=r"(r[1])                                                    \
463
                        : "r" (p[0]), "r" (p[1])); ret;                                 })
464
#    define Sigma1(x) ({ SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret));    \
465
                        const unsigned int *p = (const unsigned int *)(&(x));           \
466
                        asm ("sha512sum1r %0, %1, %2"                                   \
467
                        : "=r"(r[0])                                                    \
468
                        : "r" (p[0]), "r" (p[1]));                                      \
469
                        asm ("sha512sum1r %0, %2, %1"                                   \
470
                        : "=r"(r[1])                                                    \
471
                        : "r" (p[0]), "r" (p[1])); ret;                                 })
472
#    define sigma0(x) ({ SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret));    \
473
                        const unsigned int *p = (const unsigned int *)(&(x));           \
474
                        asm ("sha512sig0l %0, %1, %2"                                   \
475
                        : "=r"(r[0])                                                    \
476
                        : "r" (p[0]), "r" (p[1]));                                      \
477
                        asm ("sha512sig0h %0, %2, %1"                                   \
478
                        : "=r"(r[1])                                                    \
479
                        : "r" (p[0]), "r" (p[1])); ret;                                 })
480
#    define sigma1(x) ({ SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret));    \
481
                        const unsigned int *p = (const unsigned int *)(&(x));           \
482
                        asm ("sha512sig1l %0, %1, %2"                                   \
483
                        : "=r"(r[0])                                                    \
484
                        : "r" (p[0]), "r" (p[1]));                                      \
485
                        asm ("sha512sig1h %0, %2, %1"                                   \
486
                        : "=r"(r[1])                                                    \
487
                        : "r" (p[0]), "r" (p[1])); ret;                                 })
488
#   elif defined(__riscv_zknh) && __riscv_xlen == 64
489
#    define Sigma0(x) ({ SHA_LONG64 ret;            \
490
                        asm ("sha512sum0 %0, %1"    \
491
                        : "=r"(ret)                 \
492
                        : "r"(x)); ret;             })
493
#    define Sigma1(x) ({ SHA_LONG64 ret;            \
494
                        asm ("sha512sum1 %0, %1"    \
495
                        : "=r"(ret)                 \
496
                        : "r"(x)); ret;             })
497
#    define sigma0(x) ({ SHA_LONG64 ret;            \
498
                        asm ("sha512sig0 %0, %1"    \
499
                        : "=r"(ret)                 \
500
                        : "r"(x)); ret;             })
501
#    define sigma1(x) ({ SHA_LONG64 ret;            \
502
                        asm ("sha512sig1 %0, %1"    \
503
                        : "=r"(ret)                 \
504
                        : "r"(x)); ret;             })
505
#   endif
506
#   if (defined(__riscv_zbt) || defined(__riscv_zpn)) && __riscv_xlen == 32
507
#    define Ch(x,y,z) ({  SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret));   \
508
                        const unsigned int *xp = (const unsigned int *)(&(x));          \
509
                        const unsigned int *yp = (const unsigned int *)(&(y));          \
510
                        const unsigned int *zp = (const unsigned int *)(&(z));          \
511
                        asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3\n\t"                \
512
                        : "=r"(r[0])                                                    \
513
                        : "r"(xp[0]), "r"(yp[0]), "r"(zp[0]));                          \
514
                        asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3\n\t"                \
515
                        : "=r"(r[1])                                                    \
516
                        : "r"(xp[1]), "r"(yp[1]), "r"(zp[1])); ret;                     })
517
#    define Maj(x,y,z) ({ SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret));   \
518
                        const unsigned int *xp = (const unsigned int *)(&(x));          \
519
                        const unsigned int *yp = (const unsigned int *)(&(y));          \
520
                        const unsigned int *zp = (const unsigned int *)(&(z));          \
521
                        asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3\n\t"                \
522
                        : "=r"(r[0])                                                    \
523
                        : "r"(xp[0]^zp[0]), "r"(yp[0]), "r"(zp[0]));                    \
524
                        asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3\n\t"                \
525
                        : "=r"(r[1])                                                    \
526
                        : "r"(xp[1]^zp[1]), "r"(yp[1]), "r"(zp[1])); ret;               })
527
#   elif (defined(__riscv_zbt) || defined(__riscv_zpn)) && __riscv_xlen == 64
528
#    define Ch(x,y,z) ({  SHA_LONG64 ret;                           \
529
                        asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3"\
530
                        : "=r"(ret)                                 \
531
                        : "r"(x), "r"(y), "r"(z)); ret;             })
532
#    define Maj(x,y,z) ({ SHA_LONG64 ret;                           \
533
                        asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3"\
534
                        : "=r"(ret)                                 \
535
                        : "r"(x^z), "r"(y), "r"(x)); ret;           })
536
#   endif
537
#  elif defined(_MSC_VER)
538
#   if defined(_WIN64)         /* applies to both IA-64 and AMD64 */
539
#    pragma intrinsic(_rotr64)
540
#    define ROTR(a,n)    _rotr64((a),n)
541
#   endif
542
#   if defined(_M_IX86) && !defined(OPENSSL_NO_ASM) && \
543
       !defined(OPENSSL_NO_INLINE_ASM)
544
#    if defined(I386_ONLY)
545
static SHA_LONG64 __fastcall __pull64be(const void *x)
546
{
547
    _asm mov  edx,[ecx + 0]
548
    _asm mov  eax,[ecx + 4]
549
    _asm xchg dh, dl
550
    _asm xchg ah, al
551
    _asm rol  edx, 16
552
    _asm rol  eax, 16
553
    _asm xchg dh, dl
554
    _asm xchg ah, al
555
}
556
#    else
557
static SHA_LONG64 __fastcall __pull64be(const void *x)
558
{
559
    _asm mov   edx,[ecx + 0]
560
    _asm mov   eax,[ecx + 4]
561
    _asm bswap edx
562
    _asm bswap eax
563
}
564
#    endif
565
#    define PULL64(x) __pull64be(&(x))
566
#   endif
567
#  endif
568
# endif
569
# ifndef PULL64
570
#  define B(x,j)    (((SHA_LONG64)(*(((const unsigned char *)(&x))+j)))<<((7-j)*8))
571
#  define PULL64(x) (B(x,0)|B(x,1)|B(x,2)|B(x,3)|B(x,4)|B(x,5)|B(x,6)|B(x,7))
572
# endif
573
# ifndef ROTR
574
#  define ROTR(x,s)       (((x)>>s) | (x)<<(64-s))
575
# endif
576
# ifndef Sigma0
577
#  define Sigma0(x)       (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
578
# endif
579
# ifndef Sigma1
580
#  define Sigma1(x)       (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
581
# endif
582
# ifndef sigma0
583
#  define sigma0(x)       (ROTR((x),1)  ^ ROTR((x),8)  ^ ((x)>>7))
584
# endif
585
# ifndef sigma1
586
#  define sigma1(x)       (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
587
# endif
588
# ifndef Ch
589
#  define Ch(x,y,z)       (((x) & (y)) ^ ((~(x)) & (z)))
590
# endif
591
# ifndef Maj
592
#  define Maj(x,y,z)      (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
593
# endif
594
595
# if defined(__i386) || defined(__i386__) || defined(_M_IX86)
596
/*
597
 * This code should give better results on 32-bit CPU with less than
598
 * ~24 registers, both size and performance wise...
599
 */
600
601
static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
602
                                    size_t num)
603
{
604
    const SHA_LONG64 *W = in;
605
    SHA_LONG64 A, E, T;
606
    SHA_LONG64 X[9 + 80], *F;
607
    int i;
608
609
    while (num--) {
610
611
        F = X + 80;
612
        A = ctx->h[0];
613
        F[1] = ctx->h[1];
614
        F[2] = ctx->h[2];
615
        F[3] = ctx->h[3];
616
        E = ctx->h[4];
617
        F[5] = ctx->h[5];
618
        F[6] = ctx->h[6];
619
        F[7] = ctx->h[7];
620
621
        for (i = 0; i < 16; i++, F--) {
622
#  ifdef B_ENDIAN
623
            T = W[i];
624
#  else
625
            T = PULL64(W[i]);
626
#  endif
627
            F[0] = A;
628
            F[4] = E;
629
            F[8] = T;
630
            T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
631
            E = F[3] + T;
632
            A = T + Sigma0(A) + Maj(A, F[1], F[2]);
633
        }
634
635
        for (; i < 80; i++, F--) {
636
            T = sigma0(F[8 + 16 - 1]);
637
            T += sigma1(F[8 + 16 - 14]);
638
            T += F[8 + 16] + F[8 + 16 - 9];
639
640
            F[0] = A;
641
            F[4] = E;
642
            F[8] = T;
643
            T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
644
            E = F[3] + T;
645
            A = T + Sigma0(A) + Maj(A, F[1], F[2]);
646
        }
647
648
        ctx->h[0] += A;
649
        ctx->h[1] += F[1];
650
        ctx->h[2] += F[2];
651
        ctx->h[3] += F[3];
652
        ctx->h[4] += E;
653
        ctx->h[5] += F[5];
654
        ctx->h[6] += F[6];
655
        ctx->h[7] += F[7];
656
657
        W += SHA_LBLOCK;
658
    }
659
}
660
661
# elif defined(OPENSSL_SMALL_FOOTPRINT)
662
663
static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
664
                                    size_t num)
665
{
666
    const SHA_LONG64 *W = in;
667
    SHA_LONG64 a, b, c, d, e, f, g, h, s0, s1, T1, T2;
668
    SHA_LONG64 X[16];
669
    int i;
670
671
    while (num--) {
672
673
        a = ctx->h[0];
674
        b = ctx->h[1];
675
        c = ctx->h[2];
676
        d = ctx->h[3];
677
        e = ctx->h[4];
678
        f = ctx->h[5];
679
        g = ctx->h[6];
680
        h = ctx->h[7];
681
682
        for (i = 0; i < 16; i++) {
683
#  ifdef B_ENDIAN
684
            T1 = X[i] = W[i];
685
#  else
686
            T1 = X[i] = PULL64(W[i]);
687
#  endif
688
            T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i];
689
            T2 = Sigma0(a) + Maj(a, b, c);
690
            h = g;
691
            g = f;
692
            f = e;
693
            e = d + T1;
694
            d = c;
695
            c = b;
696
            b = a;
697
            a = T1 + T2;
698
        }
699
700
        for (; i < 80; i++) {
701
            s0 = X[(i + 1) & 0x0f];
702
            s0 = sigma0(s0);
703
            s1 = X[(i + 14) & 0x0f];
704
            s1 = sigma1(s1);
705
706
            T1 = X[i & 0xf] += s0 + s1 + X[(i + 9) & 0xf];
707
            T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i];
708
            T2 = Sigma0(a) + Maj(a, b, c);
709
            h = g;
710
            g = f;
711
            f = e;
712
            e = d + T1;
713
            d = c;
714
            c = b;
715
            b = a;
716
            a = T1 + T2;
717
        }
718
719
        ctx->h[0] += a;
720
        ctx->h[1] += b;
721
        ctx->h[2] += c;
722
        ctx->h[3] += d;
723
        ctx->h[4] += e;
724
        ctx->h[5] += f;
725
        ctx->h[6] += g;
726
        ctx->h[7] += h;
727
728
        W += SHA_LBLOCK;
729
    }
730
}
731
732
# else
733
#  define ROUND_00_15(i,a,b,c,d,e,f,g,h)        do {    \
734
        T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];      \
735
        h = Sigma0(a) + Maj(a,b,c);                     \
736
        d += T1;        h += T1;                        } while (0)
737
738
#  define ROUND_16_80(i,j,a,b,c,d,e,f,g,h,X)    do {    \
739
        s0 = X[(j+1)&0x0f];     s0 = sigma0(s0);        \
740
        s1 = X[(j+14)&0x0f];    s1 = sigma1(s1);        \
741
        T1 = X[(j)&0x0f] += s0 + s1 + X[(j+9)&0x0f];    \
742
        ROUND_00_15(i+j,a,b,c,d,e,f,g,h);               } while (0)
743
744
#ifdef INCLUDE_C_SHA512
745
void sha512_block_data_order_c(SHA512_CTX *ctx, const void *in, size_t num)
746
#else
747
static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
748
                                    size_t num)
749
#endif
750
{
751
    const SHA_LONG64 *W = in;
752
    SHA_LONG64 a, b, c, d, e, f, g, h, s0, s1, T1;
753
    SHA_LONG64 X[16];
754
    int i;
755
756
    while (num--) {
757
758
        a = ctx->h[0];
759
        b = ctx->h[1];
760
        c = ctx->h[2];
761
        d = ctx->h[3];
762
        e = ctx->h[4];
763
        f = ctx->h[5];
764
        g = ctx->h[6];
765
        h = ctx->h[7];
766
767
#  ifdef B_ENDIAN
768
        T1 = X[0] = W[0];
769
        ROUND_00_15(0, a, b, c, d, e, f, g, h);
770
        T1 = X[1] = W[1];
771
        ROUND_00_15(1, h, a, b, c, d, e, f, g);
772
        T1 = X[2] = W[2];
773
        ROUND_00_15(2, g, h, a, b, c, d, e, f);
774
        T1 = X[3] = W[3];
775
        ROUND_00_15(3, f, g, h, a, b, c, d, e);
776
        T1 = X[4] = W[4];
777
        ROUND_00_15(4, e, f, g, h, a, b, c, d);
778
        T1 = X[5] = W[5];
779
        ROUND_00_15(5, d, e, f, g, h, a, b, c);
780
        T1 = X[6] = W[6];
781
        ROUND_00_15(6, c, d, e, f, g, h, a, b);
782
        T1 = X[7] = W[7];
783
        ROUND_00_15(7, b, c, d, e, f, g, h, a);
784
        T1 = X[8] = W[8];
785
        ROUND_00_15(8, a, b, c, d, e, f, g, h);
786
        T1 = X[9] = W[9];
787
        ROUND_00_15(9, h, a, b, c, d, e, f, g);
788
        T1 = X[10] = W[10];
789
        ROUND_00_15(10, g, h, a, b, c, d, e, f);
790
        T1 = X[11] = W[11];
791
        ROUND_00_15(11, f, g, h, a, b, c, d, e);
792
        T1 = X[12] = W[12];
793
        ROUND_00_15(12, e, f, g, h, a, b, c, d);
794
        T1 = X[13] = W[13];
795
        ROUND_00_15(13, d, e, f, g, h, a, b, c);
796
        T1 = X[14] = W[14];
797
        ROUND_00_15(14, c, d, e, f, g, h, a, b);
798
        T1 = X[15] = W[15];
799
        ROUND_00_15(15, b, c, d, e, f, g, h, a);
800
#  else
801
        T1 = X[0] = PULL64(W[0]);
802
        ROUND_00_15(0, a, b, c, d, e, f, g, h);
803
        T1 = X[1] = PULL64(W[1]);
804
        ROUND_00_15(1, h, a, b, c, d, e, f, g);
805
        T1 = X[2] = PULL64(W[2]);
806
        ROUND_00_15(2, g, h, a, b, c, d, e, f);
807
        T1 = X[3] = PULL64(W[3]);
808
        ROUND_00_15(3, f, g, h, a, b, c, d, e);
809
        T1 = X[4] = PULL64(W[4]);
810
        ROUND_00_15(4, e, f, g, h, a, b, c, d);
811
        T1 = X[5] = PULL64(W[5]);
812
        ROUND_00_15(5, d, e, f, g, h, a, b, c);
813
        T1 = X[6] = PULL64(W[6]);
814
        ROUND_00_15(6, c, d, e, f, g, h, a, b);
815
        T1 = X[7] = PULL64(W[7]);
816
        ROUND_00_15(7, b, c, d, e, f, g, h, a);
817
        T1 = X[8] = PULL64(W[8]);
818
        ROUND_00_15(8, a, b, c, d, e, f, g, h);
819
        T1 = X[9] = PULL64(W[9]);
820
        ROUND_00_15(9, h, a, b, c, d, e, f, g);
821
        T1 = X[10] = PULL64(W[10]);
822
        ROUND_00_15(10, g, h, a, b, c, d, e, f);
823
        T1 = X[11] = PULL64(W[11]);
824
        ROUND_00_15(11, f, g, h, a, b, c, d, e);
825
        T1 = X[12] = PULL64(W[12]);
826
        ROUND_00_15(12, e, f, g, h, a, b, c, d);
827
        T1 = X[13] = PULL64(W[13]);
828
        ROUND_00_15(13, d, e, f, g, h, a, b, c);
829
        T1 = X[14] = PULL64(W[14]);
830
        ROUND_00_15(14, c, d, e, f, g, h, a, b);
831
        T1 = X[15] = PULL64(W[15]);
832
        ROUND_00_15(15, b, c, d, e, f, g, h, a);
833
#  endif
834
835
        for (i = 16; i < 80; i += 16) {
836
            ROUND_16_80(i, 0, a, b, c, d, e, f, g, h, X);
837
            ROUND_16_80(i, 1, h, a, b, c, d, e, f, g, X);
838
            ROUND_16_80(i, 2, g, h, a, b, c, d, e, f, X);
839
            ROUND_16_80(i, 3, f, g, h, a, b, c, d, e, X);
840
            ROUND_16_80(i, 4, e, f, g, h, a, b, c, d, X);
841
            ROUND_16_80(i, 5, d, e, f, g, h, a, b, c, X);
842
            ROUND_16_80(i, 6, c, d, e, f, g, h, a, b, X);
843
            ROUND_16_80(i, 7, b, c, d, e, f, g, h, a, X);
844
            ROUND_16_80(i, 8, a, b, c, d, e, f, g, h, X);
845
            ROUND_16_80(i, 9, h, a, b, c, d, e, f, g, X);
846
            ROUND_16_80(i, 10, g, h, a, b, c, d, e, f, X);
847
            ROUND_16_80(i, 11, f, g, h, a, b, c, d, e, X);
848
            ROUND_16_80(i, 12, e, f, g, h, a, b, c, d, X);
849
            ROUND_16_80(i, 13, d, e, f, g, h, a, b, c, X);
850
            ROUND_16_80(i, 14, c, d, e, f, g, h, a, b, X);
851
            ROUND_16_80(i, 15, b, c, d, e, f, g, h, a, X);
852
        }
853
854
        ctx->h[0] += a;
855
        ctx->h[1] += b;
856
        ctx->h[2] += c;
857
        ctx->h[3] += d;
858
        ctx->h[4] += e;
859
        ctx->h[5] += f;
860
        ctx->h[6] += g;
861
        ctx->h[7] += h;
862
863
        W += SHA_LBLOCK;
864
    }
865
}
866
867
# endif
868
869
#endif                         /* SHA512_ASM */