Coverage Report

Created: 2025-06-13 06:58

/src/openssl31/crypto/sha/sha512.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright 2004-2022 The OpenSSL Project Authors. All Rights Reserved.
3
 *
4
 * Licensed under the Apache License 2.0 (the "License").  You may not use
5
 * this file except in compliance with the License.  You can obtain a copy
6
 * in the file LICENSE in the source distribution or at
7
 * https://www.openssl.org/source/license.html
8
 */
9
10
/*
11
 * SHA512 low level APIs are deprecated for public use, but still ok for
12
 * internal use.
13
 */
14
#include "internal/deprecated.h"
15
16
#include <stdio.h>
17
#include <openssl/opensslconf.h>
18
/*-
19
 * IMPLEMENTATION NOTES.
20
 *
21
 * As you might have noticed 32-bit hash algorithms:
22
 *
23
 * - permit SHA_LONG to be wider than 32-bit
24
 * - optimized versions implement two transform functions: one operating
25
 *   on [aligned] data in host byte order and one - on data in input
26
 *   stream byte order;
27
 * - share common byte-order neutral collector and padding function
28
 *   implementations, ../md32_common.h;
29
 *
30
 * Neither of the above applies to this SHA-512 implementations. Reasons
31
 * [in reverse order] are:
32
 *
33
 * - it's the only 64-bit hash algorithm for the moment of this writing,
34
 *   there is no need for common collector/padding implementation [yet];
35
 * - by supporting only one transform function [which operates on
36
 *   *aligned* data in input stream byte order, big-endian in this case]
37
 *   we minimize burden of maintenance in two ways: a) collector/padding
38
 *   function is simpler; b) only one transform function to stare at;
39
 * - SHA_LONG64 is required to be exactly 64-bit in order to be able to
40
 *   apply a number of optimizations to mitigate potential performance
41
 *   penalties caused by previous design decision;
42
 *
43
 * Caveat lector.
44
 *
45
 * Implementation relies on the fact that "long long" is 64-bit on
46
 * both 32- and 64-bit platforms. If some compiler vendor comes up
47
 * with 128-bit long long, adjustment to sha.h would be required.
48
 * As this implementation relies on 64-bit integer type, it's totally
49
 * inappropriate for platforms which don't support it, most notably
50
 * 16-bit platforms.
51
 */
52
#include <stdlib.h>
53
#include <string.h>
54
55
#include <openssl/crypto.h>
56
#include <openssl/sha.h>
57
#include <openssl/opensslv.h>
58
59
#include "internal/cryptlib.h"
60
#include "crypto/sha.h"
61
62
#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
63
    defined(__x86_64) || defined(_M_AMD64) || defined(_M_X64) || \
64
    defined(__s390__) || defined(__s390x__) || \
65
    defined(__aarch64__) || \
66
    defined(SHA512_ASM)
67
# define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
68
#endif
69
70
#if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32__)
71
# define U64(C)     C##UI64
72
#elif defined(__arch64__)
73
# define U64(C)     C##UL
74
#else
75
193M
# define U64(C)     C##ULL
76
#endif
77
78
int sha512_224_init(SHA512_CTX *c)
79
71.3k
{
80
71.3k
    c->h[0] = U64(0x8c3d37c819544da2);
81
71.3k
    c->h[1] = U64(0x73e1996689dcd4d6);
82
71.3k
    c->h[2] = U64(0x1dfab7ae32ff9c82);
83
71.3k
    c->h[3] = U64(0x679dd514582f9fcf);
84
71.3k
    c->h[4] = U64(0x0f6d2b697bd44da8);
85
71.3k
    c->h[5] = U64(0x77e36f7304c48942);
86
71.3k
    c->h[6] = U64(0x3f9d85a86a1d36c8);
87
71.3k
    c->h[7] = U64(0x1112e6ad91d692a1);
88
89
71.3k
    c->Nl = 0;
90
71.3k
    c->Nh = 0;
91
71.3k
    c->num = 0;
92
71.3k
    c->md_len = SHA224_DIGEST_LENGTH;
93
71.3k
    return 1;
94
71.3k
}
95
96
int sha512_256_init(SHA512_CTX *c)
97
83.9k
{
98
83.9k
    c->h[0] = U64(0x22312194fc2bf72c);
99
83.9k
    c->h[1] = U64(0x9f555fa3c84c64c2);
100
83.9k
    c->h[2] = U64(0x2393b86b6f53b151);
101
83.9k
    c->h[3] = U64(0x963877195940eabd);
102
83.9k
    c->h[4] = U64(0x96283ee2a88effe3);
103
83.9k
    c->h[5] = U64(0xbe5e1e2553863992);
104
83.9k
    c->h[6] = U64(0x2b0199fc2c85b8aa);
105
83.9k
    c->h[7] = U64(0x0eb72ddc81c52ca2);
106
107
83.9k
    c->Nl = 0;
108
83.9k
    c->Nh = 0;
109
83.9k
    c->num = 0;
110
83.9k
    c->md_len = SHA256_DIGEST_LENGTH;
111
83.9k
    return 1;
112
83.9k
}
113
114
int SHA384_Init(SHA512_CTX *c)
115
1.10M
{
116
1.10M
    c->h[0] = U64(0xcbbb9d5dc1059ed8);
117
1.10M
    c->h[1] = U64(0x629a292a367cd507);
118
1.10M
    c->h[2] = U64(0x9159015a3070dd17);
119
1.10M
    c->h[3] = U64(0x152fecd8f70e5939);
120
1.10M
    c->h[4] = U64(0x67332667ffc00b31);
121
1.10M
    c->h[5] = U64(0x8eb44a8768581511);
122
1.10M
    c->h[6] = U64(0xdb0c2e0d64f98fa7);
123
1.10M
    c->h[7] = U64(0x47b5481dbefa4fa4);
124
125
1.10M
    c->Nl = 0;
126
1.10M
    c->Nh = 0;
127
1.10M
    c->num = 0;
128
1.10M
    c->md_len = SHA384_DIGEST_LENGTH;
129
1.10M
    return 1;
130
1.10M
}
131
132
int SHA512_Init(SHA512_CTX *c)
133
15.1M
{
134
15.1M
    c->h[0] = U64(0x6a09e667f3bcc908);
135
15.1M
    c->h[1] = U64(0xbb67ae8584caa73b);
136
15.1M
    c->h[2] = U64(0x3c6ef372fe94f82b);
137
15.1M
    c->h[3] = U64(0xa54ff53a5f1d36f1);
138
15.1M
    c->h[4] = U64(0x510e527fade682d1);
139
15.1M
    c->h[5] = U64(0x9b05688c2b3e6c1f);
140
15.1M
    c->h[6] = U64(0x1f83d9abfb41bd6b);
141
15.1M
    c->h[7] = U64(0x5be0cd19137e2179);
142
143
15.1M
    c->Nl = 0;
144
15.1M
    c->Nh = 0;
145
15.1M
    c->num = 0;
146
15.1M
    c->md_len = SHA512_DIGEST_LENGTH;
147
15.1M
    return 1;
148
15.1M
}
149
150
#ifndef SHA512_ASM
151
static
152
#endif
153
void sha512_block_data_order(SHA512_CTX *ctx, const void *in, size_t num);
154
155
int SHA512_Final(unsigned char *md, SHA512_CTX *c)
156
16.3M
{
157
16.3M
    unsigned char *p = (unsigned char *)c->u.p;
158
16.3M
    size_t n = c->num;
159
160
16.3M
    p[n] = 0x80;                /* There always is a room for one */
161
16.3M
    n++;
162
16.3M
    if (n > (sizeof(c->u) - 16)) {
163
93.8k
        memset(p + n, 0, sizeof(c->u) - n);
164
93.8k
        n = 0;
165
93.8k
        sha512_block_data_order(c, p, 1);
166
93.8k
    }
167
168
16.3M
    memset(p + n, 0, sizeof(c->u) - 16 - n);
169
#ifdef  B_ENDIAN
170
    c->u.d[SHA_LBLOCK - 2] = c->Nh;
171
    c->u.d[SHA_LBLOCK - 1] = c->Nl;
172
#else
173
16.3M
    p[sizeof(c->u) - 1] = (unsigned char)(c->Nl);
174
16.3M
    p[sizeof(c->u) - 2] = (unsigned char)(c->Nl >> 8);
175
16.3M
    p[sizeof(c->u) - 3] = (unsigned char)(c->Nl >> 16);
176
16.3M
    p[sizeof(c->u) - 4] = (unsigned char)(c->Nl >> 24);
177
16.3M
    p[sizeof(c->u) - 5] = (unsigned char)(c->Nl >> 32);
178
16.3M
    p[sizeof(c->u) - 6] = (unsigned char)(c->Nl >> 40);
179
16.3M
    p[sizeof(c->u) - 7] = (unsigned char)(c->Nl >> 48);
180
16.3M
    p[sizeof(c->u) - 8] = (unsigned char)(c->Nl >> 56);
181
16.3M
    p[sizeof(c->u) - 9] = (unsigned char)(c->Nh);
182
16.3M
    p[sizeof(c->u) - 10] = (unsigned char)(c->Nh >> 8);
183
16.3M
    p[sizeof(c->u) - 11] = (unsigned char)(c->Nh >> 16);
184
16.3M
    p[sizeof(c->u) - 12] = (unsigned char)(c->Nh >> 24);
185
16.3M
    p[sizeof(c->u) - 13] = (unsigned char)(c->Nh >> 32);
186
16.3M
    p[sizeof(c->u) - 14] = (unsigned char)(c->Nh >> 40);
187
16.3M
    p[sizeof(c->u) - 15] = (unsigned char)(c->Nh >> 48);
188
16.3M
    p[sizeof(c->u) - 16] = (unsigned char)(c->Nh >> 56);
189
16.3M
#endif
190
191
16.3M
    sha512_block_data_order(c, p, 1);
192
193
16.3M
    if (md == 0)
194
0
        return 0;
195
196
16.3M
    switch (c->md_len) {
197
    /* Let compiler decide if it's appropriate to unroll... */
198
71.3k
    case SHA224_DIGEST_LENGTH:
199
285k
        for (n = 0; n < SHA224_DIGEST_LENGTH / 8; n++) {
200
214k
            SHA_LONG64 t = c->h[n];
201
202
214k
            *(md++) = (unsigned char)(t >> 56);
203
214k
            *(md++) = (unsigned char)(t >> 48);
204
214k
            *(md++) = (unsigned char)(t >> 40);
205
214k
            *(md++) = (unsigned char)(t >> 32);
206
214k
            *(md++) = (unsigned char)(t >> 24);
207
214k
            *(md++) = (unsigned char)(t >> 16);
208
214k
            *(md++) = (unsigned char)(t >> 8);
209
214k
            *(md++) = (unsigned char)(t);
210
214k
        }
211
        /*
212
         * For 224 bits, there are four bytes left over that have to be
213
         * processed separately.
214
         */
215
71.3k
        {
216
71.3k
            SHA_LONG64 t = c->h[SHA224_DIGEST_LENGTH / 8];
217
218
71.3k
            *(md++) = (unsigned char)(t >> 56);
219
71.3k
            *(md++) = (unsigned char)(t >> 48);
220
71.3k
            *(md++) = (unsigned char)(t >> 40);
221
71.3k
            *(md++) = (unsigned char)(t >> 32);
222
71.3k
        }
223
71.3k
        break;
224
83.9k
    case SHA256_DIGEST_LENGTH:
225
419k
        for (n = 0; n < SHA256_DIGEST_LENGTH / 8; n++) {
226
335k
            SHA_LONG64 t = c->h[n];
227
228
335k
            *(md++) = (unsigned char)(t >> 56);
229
335k
            *(md++) = (unsigned char)(t >> 48);
230
335k
            *(md++) = (unsigned char)(t >> 40);
231
335k
            *(md++) = (unsigned char)(t >> 32);
232
335k
            *(md++) = (unsigned char)(t >> 24);
233
335k
            *(md++) = (unsigned char)(t >> 16);
234
335k
            *(md++) = (unsigned char)(t >> 8);
235
335k
            *(md++) = (unsigned char)(t);
236
335k
        }
237
83.9k
        break;
238
1.14M
    case SHA384_DIGEST_LENGTH:
239
8.03M
        for (n = 0; n < SHA384_DIGEST_LENGTH / 8; n++) {
240
6.88M
            SHA_LONG64 t = c->h[n];
241
242
6.88M
            *(md++) = (unsigned char)(t >> 56);
243
6.88M
            *(md++) = (unsigned char)(t >> 48);
244
6.88M
            *(md++) = (unsigned char)(t >> 40);
245
6.88M
            *(md++) = (unsigned char)(t >> 32);
246
6.88M
            *(md++) = (unsigned char)(t >> 24);
247
6.88M
            *(md++) = (unsigned char)(t >> 16);
248
6.88M
            *(md++) = (unsigned char)(t >> 8);
249
6.88M
            *(md++) = (unsigned char)(t);
250
6.88M
        }
251
1.14M
        break;
252
15.0M
    case SHA512_DIGEST_LENGTH:
253
135M
        for (n = 0; n < SHA512_DIGEST_LENGTH / 8; n++) {
254
120M
            SHA_LONG64 t = c->h[n];
255
256
120M
            *(md++) = (unsigned char)(t >> 56);
257
120M
            *(md++) = (unsigned char)(t >> 48);
258
120M
            *(md++) = (unsigned char)(t >> 40);
259
120M
            *(md++) = (unsigned char)(t >> 32);
260
120M
            *(md++) = (unsigned char)(t >> 24);
261
120M
            *(md++) = (unsigned char)(t >> 16);
262
120M
            *(md++) = (unsigned char)(t >> 8);
263
120M
            *(md++) = (unsigned char)(t);
264
120M
        }
265
15.0M
        break;
266
    /* ... as well as make sure md_len is not abused. */
267
0
    default:
268
0
        return 0;
269
16.3M
    }
270
271
16.3M
    return 1;
272
16.3M
}
273
274
int SHA384_Final(unsigned char *md, SHA512_CTX *c)
275
1.14M
{
276
1.14M
    return SHA512_Final(md, c);
277
1.14M
}
278
279
int SHA512_Update(SHA512_CTX *c, const void *_data, size_t len)
280
62.2M
{
281
62.2M
    SHA_LONG64 l;
282
62.2M
    unsigned char *p = c->u.p;
283
62.2M
    const unsigned char *data = (const unsigned char *)_data;
284
285
62.2M
    if (len == 0)
286
0
        return 1;
287
288
62.2M
    l = (c->Nl + (((SHA_LONG64) len) << 3)) & U64(0xffffffffffffffff);
289
62.2M
    if (l < c->Nl)
290
0
        c->Nh++;
291
62.2M
    if (sizeof(len) >= 8)
292
62.2M
        c->Nh += (((SHA_LONG64) len) >> 61);
293
62.2M
    c->Nl = l;
294
295
62.2M
    if (c->num != 0) {
296
30.2M
        size_t n = sizeof(c->u) - c->num;
297
298
30.2M
        if (len < n) {
299
15.1M
            memcpy(p + c->num, data, len), c->num += (unsigned int)len;
300
15.1M
            return 1;
301
15.1M
        } else {
302
15.1M
            memcpy(p + c->num, data, n), c->num = 0;
303
15.1M
            len -= n, data += n;
304
15.1M
            sha512_block_data_order(c, p, 1);
305
15.1M
        }
306
30.2M
    }
307
308
47.1M
    if (len >= sizeof(c->u)) {
309
#ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
310
        if ((size_t)data % sizeof(c->u.d[0]) != 0)
311
            while (len >= sizeof(c->u))
312
                memcpy(p, data, sizeof(c->u)),
313
                sha512_block_data_order(c, p, 1),
314
                len -= sizeof(c->u), data += sizeof(c->u);
315
        else
316
#endif
317
938k
            sha512_block_data_order(c, data, len / sizeof(c->u)),
318
938k
            data += len, len %= sizeof(c->u), data -= len;
319
938k
    }
320
321
47.1M
    if (len != 0)
322
31.4M
        memcpy(p, data, len), c->num = (int)len;
323
324
47.1M
    return 1;
325
62.2M
}
326
327
int SHA384_Update(SHA512_CTX *c, const void *data, size_t len)
328
2.22M
{
329
2.22M
    return SHA512_Update(c, data, len);
330
2.22M
}
331
332
void SHA512_Transform(SHA512_CTX *c, const unsigned char *data)
333
29.2k
{
334
#ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
335
    if ((size_t)data % sizeof(c->u.d[0]) != 0)
336
        memcpy(c->u.p, data, sizeof(c->u.p)), data = c->u.p;
337
#endif
338
29.2k
    sha512_block_data_order(c, data, 1);
339
29.2k
}
340
341
#ifndef SHA512_ASM
342
static const SHA_LONG64 K512[80] = {
343
    U64(0x428a2f98d728ae22), U64(0x7137449123ef65cd),
344
    U64(0xb5c0fbcfec4d3b2f), U64(0xe9b5dba58189dbbc),
345
    U64(0x3956c25bf348b538), U64(0x59f111f1b605d019),
346
    U64(0x923f82a4af194f9b), U64(0xab1c5ed5da6d8118),
347
    U64(0xd807aa98a3030242), U64(0x12835b0145706fbe),
348
    U64(0x243185be4ee4b28c), U64(0x550c7dc3d5ffb4e2),
349
    U64(0x72be5d74f27b896f), U64(0x80deb1fe3b1696b1),
350
    U64(0x9bdc06a725c71235), U64(0xc19bf174cf692694),
351
    U64(0xe49b69c19ef14ad2), U64(0xefbe4786384f25e3),
352
    U64(0x0fc19dc68b8cd5b5), U64(0x240ca1cc77ac9c65),
353
    U64(0x2de92c6f592b0275), U64(0x4a7484aa6ea6e483),
354
    U64(0x5cb0a9dcbd41fbd4), U64(0x76f988da831153b5),
355
    U64(0x983e5152ee66dfab), U64(0xa831c66d2db43210),
356
    U64(0xb00327c898fb213f), U64(0xbf597fc7beef0ee4),
357
    U64(0xc6e00bf33da88fc2), U64(0xd5a79147930aa725),
358
    U64(0x06ca6351e003826f), U64(0x142929670a0e6e70),
359
    U64(0x27b70a8546d22ffc), U64(0x2e1b21385c26c926),
360
    U64(0x4d2c6dfc5ac42aed), U64(0x53380d139d95b3df),
361
    U64(0x650a73548baf63de), U64(0x766a0abb3c77b2a8),
362
    U64(0x81c2c92e47edaee6), U64(0x92722c851482353b),
363
    U64(0xa2bfe8a14cf10364), U64(0xa81a664bbc423001),
364
    U64(0xc24b8b70d0f89791), U64(0xc76c51a30654be30),
365
    U64(0xd192e819d6ef5218), U64(0xd69906245565a910),
366
    U64(0xf40e35855771202a), U64(0x106aa07032bbd1b8),
367
    U64(0x19a4c116b8d2d0c8), U64(0x1e376c085141ab53),
368
    U64(0x2748774cdf8eeb99), U64(0x34b0bcb5e19b48a8),
369
    U64(0x391c0cb3c5c95a63), U64(0x4ed8aa4ae3418acb),
370
    U64(0x5b9cca4f7763e373), U64(0x682e6ff3d6b2b8a3),
371
    U64(0x748f82ee5defb2fc), U64(0x78a5636f43172f60),
372
    U64(0x84c87814a1f0ab72), U64(0x8cc702081a6439ec),
373
    U64(0x90befffa23631e28), U64(0xa4506cebde82bde9),
374
    U64(0xbef9a3f7b2c67915), U64(0xc67178f2e372532b),
375
    U64(0xca273eceea26619c), U64(0xd186b8c721c0c207),
376
    U64(0xeada7dd6cde0eb1e), U64(0xf57d4f7fee6ed178),
377
    U64(0x06f067aa72176fba), U64(0x0a637dc5a2c898a6),
378
    U64(0x113f9804bef90dae), U64(0x1b710b35131c471b),
379
    U64(0x28db77f523047d84), U64(0x32caab7b40c72493),
380
    U64(0x3c9ebe0a15c9bebc), U64(0x431d67c49c100d4c),
381
    U64(0x4cc5d4becb3e42b6), U64(0x597f299cfc657e2a),
382
    U64(0x5fcb6fab3ad6faec), U64(0x6c44198c4a475817)
383
};
384
385
# ifndef PEDANTIC
386
#  if defined(__GNUC__) && __GNUC__>=2 && \
387
      !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
388
#   if defined(__x86_64) || defined(__x86_64__)
389
#    define ROTR(a,n)    ({ SHA_LONG64 ret;             \
390
                                asm ("rorq %1,%0"       \
391
                                : "=r"(ret)             \
392
                                : "J"(n),"0"(a)         \
393
                                : "cc"); ret;           })
394
#    if !defined(B_ENDIAN)
395
#     define PULL64(x) ({ SHA_LONG64 ret=*((const SHA_LONG64 *)(&(x)));  \
396
                                asm ("bswapq    %0"             \
397
                                : "=r"(ret)                     \
398
                                : "0"(ret)); ret;               })
399
#    endif
400
#   elif (defined(__i386) || defined(__i386__)) && !defined(B_ENDIAN)
401
#    if defined(I386_ONLY)
402
#     define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
403
                          unsigned int hi=p[0],lo=p[1];          \
404
                                asm("xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
405
                                    "roll $16,%%eax; roll $16,%%edx; "\
406
                                    "xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
407
                                : "=a"(lo),"=d"(hi)             \
408
                                : "0"(lo),"1"(hi) : "cc");      \
409
                                ((SHA_LONG64)hi)<<32|lo;        })
410
#    else
411
#     define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
412
                          unsigned int hi=p[0],lo=p[1];         \
413
                                asm ("bswapl %0; bswapl %1;"    \
414
                                : "=r"(lo),"=r"(hi)             \
415
                                : "0"(lo),"1"(hi));             \
416
                                ((SHA_LONG64)hi)<<32|lo;        })
417
#    endif
418
#   elif (defined(_ARCH_PPC) && defined(__64BIT__)) || defined(_ARCH_PPC64)
419
#    define ROTR(a,n)    ({ SHA_LONG64 ret;             \
420
                                asm ("rotrdi %0,%1,%2"  \
421
                                : "=r"(ret)             \
422
                                : "r"(a),"K"(n)); ret;  })
423
#   elif defined(__aarch64__)
424
#    define ROTR(a,n)    ({ SHA_LONG64 ret;             \
425
                                asm ("ror %0,%1,%2"     \
426
                                : "=r"(ret)             \
427
                                : "r"(a),"I"(n)); ret;  })
428
#    if  defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && \
429
        __BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__
430
#     define PULL64(x)   ({ SHA_LONG64 ret;                     \
431
                                asm ("rev       %0,%1"          \
432
                                : "=r"(ret)                     \
433
                                : "r"(*((const SHA_LONG64 *)(&(x))))); ret; })
434
#    endif
435
#   elif (defined(__riscv_zbkb) || defined(__riscv_zbb)) && __riscv_xlen == 32
436
#    define PULL64(x) ({ SHA_LONG64 ret;                                        \
437
                        unsigned int *r = (unsigned int *)(&(ret));             \
438
                        const unsigned int *p = (const unsigned int *)(&(x));   \
439
                        asm ("rev8 %0, %1"                                      \
440
                        : "=r"(r[0])                                            \
441
                        : "r" (p[1]));                                          \
442
                        asm ("rev8 %0, %1"                                      \
443
                        : "=r"(r[1])                                            \
444
                        : "r" (p[0])); ret;                                     })
445
#   elif (defined(__riscv_zbkb) || defined(__riscv_zbb)) && __riscv_xlen == 64
446
#    define PULL64(x) ({ SHA_LONG64 ret;    \
447
                        asm ("rev8 %0, %1"  \
448
                        : "=r"(ret)         \
449
                        : "r"(x)); ret;     })
450
#   endif
451
#   if defined(__riscv_zknh) && __riscv_xlen == 32
452
#    define Sigma0(x) ({ SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret));    \
453
                        const unsigned int *p = (const unsigned int *)(&(x));           \
454
                        asm ("sha512sum0r %0, %1, %2"                                   \
455
                        : "=r"(r[0])                                                    \
456
                        : "r" (p[0]), "r" (p[1]));                                      \
457
                        asm ("sha512sum0r %0, %2, %1"                                   \
458
                        : "=r"(r[1])                                                    \
459
                        : "r" (p[0]), "r" (p[1])); ret;                                 })
460
#    define Sigma1(x) ({ SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret));    \
461
                        const unsigned int *p = (const unsigned int *)(&(x));           \
462
                        asm ("sha512sum1r %0, %1, %2"                                   \
463
                        : "=r"(r[0])                                                    \
464
                        : "r" (p[0]), "r" (p[1]));                                      \
465
                        asm ("sha512sum1r %0, %2, %1"                                   \
466
                        : "=r"(r[1])                                                    \
467
                        : "r" (p[0]), "r" (p[1])); ret;                                 })
468
#    define sigma0(x) ({ SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret));    \
469
                        const unsigned int *p = (const unsigned int *)(&(x));           \
470
                        asm ("sha512sig0l %0, %1, %2"                                   \
471
                        : "=r"(r[0])                                                    \
472
                        : "r" (p[0]), "r" (p[1]));                                      \
473
                        asm ("sha512sig0h %0, %2, %1"                                   \
474
                        : "=r"(r[1])                                                    \
475
                        : "r" (p[0]), "r" (p[1])); ret;                                 })
476
#    define sigma1(x) ({ SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret));    \
477
                        const unsigned int *p = (const unsigned int *)(&(x));           \
478
                        asm ("sha512sig1l %0, %1, %2"                                   \
479
                        : "=r"(r[0])                                                    \
480
                        : "r" (p[0]), "r" (p[1]));                                      \
481
                        asm ("sha512sig1h %0, %2, %1"                                   \
482
                        : "=r"(r[1])                                                    \
483
                        : "r" (p[0]), "r" (p[1])); ret;                                 })
484
#   elif defined(__riscv_zknh) && __riscv_xlen == 64
485
#    define Sigma0(x) ({ SHA_LONG64 ret;            \
486
                        asm ("sha512sum0 %0, %1"    \
487
                        : "=r"(ret)                 \
488
                        : "r"(x)); ret;             })
489
#    define Sigma1(x) ({ SHA_LONG64 ret;            \
490
                        asm ("sha512sum1 %0, %1"    \
491
                        : "=r"(ret)                 \
492
                        : "r"(x)); ret;             })
493
#    define sigma0(x) ({ SHA_LONG64 ret;            \
494
                        asm ("sha512sig0 %0, %1"    \
495
                        : "=r"(ret)                 \
496
                        : "r"(x)); ret;             })
497
#    define sigma1(x) ({ SHA_LONG64 ret;            \
498
                        asm ("sha512sig1 %0, %1"    \
499
                        : "=r"(ret)                 \
500
                        : "r"(x)); ret;             })
501
#   endif
502
#   if (defined(__riscv_zbt) || defined(__riscv_zpn)) && __riscv_xlen == 32
503
#    define Ch(x,y,z) ({  SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret));   \
504
                        const unsigned int *xp = (const unsigned int *)(&(x));          \
505
                        const unsigned int *yp = (const unsigned int *)(&(y));          \
506
                        const unsigned int *zp = (const unsigned int *)(&(z));          \
507
                        asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3\n\t"                \
508
                        : "=r"(r[0])                                                    \
509
                        : "r"(xp[0]), "r"(yp[0]), "r"(zp[0]));                          \
510
                        asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3\n\t"                \
511
                        : "=r"(r[1])                                                    \
512
                        : "r"(xp[1]), "r"(yp[1]), "r"(zp[1])); ret;                     })
513
#    define Maj(x,y,z) ({ SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret));   \
514
                        const unsigned int *xp = (const unsigned int *)(&(x));          \
515
                        const unsigned int *yp = (const unsigned int *)(&(y));          \
516
                        const unsigned int *zp = (const unsigned int *)(&(z));          \
517
                        asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3\n\t"                \
518
                        : "=r"(r[0])                                                    \
519
                        : "r"(xp[0]^zp[0]), "r"(yp[0]), "r"(zp[0]));                    \
520
                        asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3\n\t"                \
521
                        : "=r"(r[1])                                                    \
522
                        : "r"(xp[1]^zp[1]), "r"(yp[1]), "r"(zp[1])); ret;               })
523
#   elif (defined(__riscv_zbt) || defined(__riscv_zpn)) && __riscv_xlen == 64
524
#    define Ch(x,y,z) ({  SHA_LONG64 ret;                           \
525
                        asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3"\
526
                        : "=r"(ret)                                 \
527
                        : "r"(x), "r"(y), "r"(z)); ret;             })
528
#    define Maj(x,y,z) ({ SHA_LONG64 ret;                           \
529
                        asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3"\
530
                        : "=r"(ret)                                 \
531
                        : "r"(x^z), "r"(y), "r"(x)); ret;           })
532
#   endif
533
#  elif defined(_MSC_VER)
534
#   if defined(_WIN64)         /* applies to both IA-64 and AMD64 */
535
#    pragma intrinsic(_rotr64)
536
#    define ROTR(a,n)    _rotr64((a),n)
537
#   endif
538
#   if defined(_M_IX86) && !defined(OPENSSL_NO_ASM) && \
539
       !defined(OPENSSL_NO_INLINE_ASM)
540
#    if defined(I386_ONLY)
541
static SHA_LONG64 __fastcall __pull64be(const void *x)
542
{
543
    _asm mov  edx,[ecx + 0]
544
    _asm mov  eax,[ecx + 4]
545
    _asm xchg dh, dl
546
    _asm xchg ah, al
547
    _asm rol  edx, 16
548
    _asm rol  eax, 16
549
    _asm xchg dh, dl
550
    _asm xchg ah, al
551
}
552
#    else
553
static SHA_LONG64 __fastcall __pull64be(const void *x)
554
{
555
    _asm mov   edx,[ecx + 0]
556
    _asm mov   eax,[ecx + 4]
557
    _asm bswap edx
558
    _asm bswap eax
559
}
560
#    endif
561
#    define PULL64(x) __pull64be(&(x))
562
#   endif
563
#  endif
564
# endif
565
# ifndef PULL64
566
#  define B(x,j)    (((SHA_LONG64)(*(((const unsigned char *)(&x))+j)))<<((7-j)*8))
567
#  define PULL64(x) (B(x,0)|B(x,1)|B(x,2)|B(x,3)|B(x,4)|B(x,5)|B(x,6)|B(x,7))
568
# endif
569
# ifndef ROTR
570
#  define ROTR(x,s)       (((x)>>s) | (x)<<(64-s))
571
# endif
572
# ifndef Sigma0
573
#  define Sigma0(x)       (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
574
# endif
575
# ifndef Sigma1
576
#  define Sigma1(x)       (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
577
# endif
578
# ifndef sigma0
579
#  define sigma0(x)       (ROTR((x),1)  ^ ROTR((x),8)  ^ ((x)>>7))
580
# endif
581
# ifndef sigma1
582
#  define sigma1(x)       (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
583
# endif
584
# ifndef Ch
585
#  define Ch(x,y,z)       (((x) & (y)) ^ ((~(x)) & (z)))
586
# endif
587
# ifndef Maj
588
#  define Maj(x,y,z)      (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
589
# endif
590
591
# if defined(__i386) || defined(__i386__) || defined(_M_IX86)
592
/*
593
 * This code should give better results on 32-bit CPU with less than
594
 * ~24 registers, both size and performance wise...
595
 */
596
597
static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
598
                                    size_t num)
599
{
600
    const SHA_LONG64 *W = in;
601
    SHA_LONG64 A, E, T;
602
    SHA_LONG64 X[9 + 80], *F;
603
    int i;
604
605
    while (num--) {
606
607
        F = X + 80;
608
        A = ctx->h[0];
609
        F[1] = ctx->h[1];
610
        F[2] = ctx->h[2];
611
        F[3] = ctx->h[3];
612
        E = ctx->h[4];
613
        F[5] = ctx->h[5];
614
        F[6] = ctx->h[6];
615
        F[7] = ctx->h[7];
616
617
        for (i = 0; i < 16; i++, F--) {
618
#  ifdef B_ENDIAN
619
            T = W[i];
620
#  else
621
            T = PULL64(W[i]);
622
#  endif
623
            F[0] = A;
624
            F[4] = E;
625
            F[8] = T;
626
            T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
627
            E = F[3] + T;
628
            A = T + Sigma0(A) + Maj(A, F[1], F[2]);
629
        }
630
631
        for (; i < 80; i++, F--) {
632
            T = sigma0(F[8 + 16 - 1]);
633
            T += sigma1(F[8 + 16 - 14]);
634
            T += F[8 + 16] + F[8 + 16 - 9];
635
636
            F[0] = A;
637
            F[4] = E;
638
            F[8] = T;
639
            T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
640
            E = F[3] + T;
641
            A = T + Sigma0(A) + Maj(A, F[1], F[2]);
642
        }
643
644
        ctx->h[0] += A;
645
        ctx->h[1] += F[1];
646
        ctx->h[2] += F[2];
647
        ctx->h[3] += F[3];
648
        ctx->h[4] += E;
649
        ctx->h[5] += F[5];
650
        ctx->h[6] += F[6];
651
        ctx->h[7] += F[7];
652
653
        W += SHA_LBLOCK;
654
    }
655
}
656
657
# elif defined(OPENSSL_SMALL_FOOTPRINT)
658
659
static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
660
                                    size_t num)
661
{
662
    const SHA_LONG64 *W = in;
663
    SHA_LONG64 a, b, c, d, e, f, g, h, s0, s1, T1, T2;
664
    SHA_LONG64 X[16];
665
    int i;
666
667
    while (num--) {
668
669
        a = ctx->h[0];
670
        b = ctx->h[1];
671
        c = ctx->h[2];
672
        d = ctx->h[3];
673
        e = ctx->h[4];
674
        f = ctx->h[5];
675
        g = ctx->h[6];
676
        h = ctx->h[7];
677
678
        for (i = 0; i < 16; i++) {
679
#  ifdef B_ENDIAN
680
            T1 = X[i] = W[i];
681
#  else
682
            T1 = X[i] = PULL64(W[i]);
683
#  endif
684
            T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i];
685
            T2 = Sigma0(a) + Maj(a, b, c);
686
            h = g;
687
            g = f;
688
            f = e;
689
            e = d + T1;
690
            d = c;
691
            c = b;
692
            b = a;
693
            a = T1 + T2;
694
        }
695
696
        for (; i < 80; i++) {
697
            s0 = X[(i + 1) & 0x0f];
698
            s0 = sigma0(s0);
699
            s1 = X[(i + 14) & 0x0f];
700
            s1 = sigma1(s1);
701
702
            T1 = X[i & 0xf] += s0 + s1 + X[(i + 9) & 0xf];
703
            T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i];
704
            T2 = Sigma0(a) + Maj(a, b, c);
705
            h = g;
706
            g = f;
707
            f = e;
708
            e = d + T1;
709
            d = c;
710
            c = b;
711
            b = a;
712
            a = T1 + T2;
713
        }
714
715
        ctx->h[0] += a;
716
        ctx->h[1] += b;
717
        ctx->h[2] += c;
718
        ctx->h[3] += d;
719
        ctx->h[4] += e;
720
        ctx->h[5] += f;
721
        ctx->h[6] += g;
722
        ctx->h[7] += h;
723
724
        W += SHA_LBLOCK;
725
    }
726
}
727
728
# else
729
#  define ROUND_00_15(i,a,b,c,d,e,f,g,h)        do {    \
730
        T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];      \
731
        h = Sigma0(a) + Maj(a,b,c);                     \
732
        d += T1;        h += T1;                        } while (0)
733
734
#  define ROUND_16_80(i,j,a,b,c,d,e,f,g,h,X)    do {    \
735
        s0 = X[(j+1)&0x0f];     s0 = sigma0(s0);        \
736
        s1 = X[(j+14)&0x0f];    s1 = sigma1(s1);        \
737
        T1 = X[(j)&0x0f] += s0 + s1 + X[(j+9)&0x0f];    \
738
        ROUND_00_15(i+j,a,b,c,d,e,f,g,h);               } while (0)
739
740
static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
741
                                    size_t num)
742
{
743
    const SHA_LONG64 *W = in;
744
    SHA_LONG64 a, b, c, d, e, f, g, h, s0, s1, T1;
745
    SHA_LONG64 X[16];
746
    int i;
747
748
    while (num--) {
749
750
        a = ctx->h[0];
751
        b = ctx->h[1];
752
        c = ctx->h[2];
753
        d = ctx->h[3];
754
        e = ctx->h[4];
755
        f = ctx->h[5];
756
        g = ctx->h[6];
757
        h = ctx->h[7];
758
759
#  ifdef B_ENDIAN
760
        T1 = X[0] = W[0];
761
        ROUND_00_15(0, a, b, c, d, e, f, g, h);
762
        T1 = X[1] = W[1];
763
        ROUND_00_15(1, h, a, b, c, d, e, f, g);
764
        T1 = X[2] = W[2];
765
        ROUND_00_15(2, g, h, a, b, c, d, e, f);
766
        T1 = X[3] = W[3];
767
        ROUND_00_15(3, f, g, h, a, b, c, d, e);
768
        T1 = X[4] = W[4];
769
        ROUND_00_15(4, e, f, g, h, a, b, c, d);
770
        T1 = X[5] = W[5];
771
        ROUND_00_15(5, d, e, f, g, h, a, b, c);
772
        T1 = X[6] = W[6];
773
        ROUND_00_15(6, c, d, e, f, g, h, a, b);
774
        T1 = X[7] = W[7];
775
        ROUND_00_15(7, b, c, d, e, f, g, h, a);
776
        T1 = X[8] = W[8];
777
        ROUND_00_15(8, a, b, c, d, e, f, g, h);
778
        T1 = X[9] = W[9];
779
        ROUND_00_15(9, h, a, b, c, d, e, f, g);
780
        T1 = X[10] = W[10];
781
        ROUND_00_15(10, g, h, a, b, c, d, e, f);
782
        T1 = X[11] = W[11];
783
        ROUND_00_15(11, f, g, h, a, b, c, d, e);
784
        T1 = X[12] = W[12];
785
        ROUND_00_15(12, e, f, g, h, a, b, c, d);
786
        T1 = X[13] = W[13];
787
        ROUND_00_15(13, d, e, f, g, h, a, b, c);
788
        T1 = X[14] = W[14];
789
        ROUND_00_15(14, c, d, e, f, g, h, a, b);
790
        T1 = X[15] = W[15];
791
        ROUND_00_15(15, b, c, d, e, f, g, h, a);
792
#  else
793
        T1 = X[0] = PULL64(W[0]);
794
        ROUND_00_15(0, a, b, c, d, e, f, g, h);
795
        T1 = X[1] = PULL64(W[1]);
796
        ROUND_00_15(1, h, a, b, c, d, e, f, g);
797
        T1 = X[2] = PULL64(W[2]);
798
        ROUND_00_15(2, g, h, a, b, c, d, e, f);
799
        T1 = X[3] = PULL64(W[3]);
800
        ROUND_00_15(3, f, g, h, a, b, c, d, e);
801
        T1 = X[4] = PULL64(W[4]);
802
        ROUND_00_15(4, e, f, g, h, a, b, c, d);
803
        T1 = X[5] = PULL64(W[5]);
804
        ROUND_00_15(5, d, e, f, g, h, a, b, c);
805
        T1 = X[6] = PULL64(W[6]);
806
        ROUND_00_15(6, c, d, e, f, g, h, a, b);
807
        T1 = X[7] = PULL64(W[7]);
808
        ROUND_00_15(7, b, c, d, e, f, g, h, a);
809
        T1 = X[8] = PULL64(W[8]);
810
        ROUND_00_15(8, a, b, c, d, e, f, g, h);
811
        T1 = X[9] = PULL64(W[9]);
812
        ROUND_00_15(9, h, a, b, c, d, e, f, g);
813
        T1 = X[10] = PULL64(W[10]);
814
        ROUND_00_15(10, g, h, a, b, c, d, e, f);
815
        T1 = X[11] = PULL64(W[11]);
816
        ROUND_00_15(11, f, g, h, a, b, c, d, e);
817
        T1 = X[12] = PULL64(W[12]);
818
        ROUND_00_15(12, e, f, g, h, a, b, c, d);
819
        T1 = X[13] = PULL64(W[13]);
820
        ROUND_00_15(13, d, e, f, g, h, a, b, c);
821
        T1 = X[14] = PULL64(W[14]);
822
        ROUND_00_15(14, c, d, e, f, g, h, a, b);
823
        T1 = X[15] = PULL64(W[15]);
824
        ROUND_00_15(15, b, c, d, e, f, g, h, a);
825
#  endif
826
827
        for (i = 16; i < 80; i += 16) {
828
            ROUND_16_80(i, 0, a, b, c, d, e, f, g, h, X);
829
            ROUND_16_80(i, 1, h, a, b, c, d, e, f, g, X);
830
            ROUND_16_80(i, 2, g, h, a, b, c, d, e, f, X);
831
            ROUND_16_80(i, 3, f, g, h, a, b, c, d, e, X);
832
            ROUND_16_80(i, 4, e, f, g, h, a, b, c, d, X);
833
            ROUND_16_80(i, 5, d, e, f, g, h, a, b, c, X);
834
            ROUND_16_80(i, 6, c, d, e, f, g, h, a, b, X);
835
            ROUND_16_80(i, 7, b, c, d, e, f, g, h, a, X);
836
            ROUND_16_80(i, 8, a, b, c, d, e, f, g, h, X);
837
            ROUND_16_80(i, 9, h, a, b, c, d, e, f, g, X);
838
            ROUND_16_80(i, 10, g, h, a, b, c, d, e, f, X);
839
            ROUND_16_80(i, 11, f, g, h, a, b, c, d, e, X);
840
            ROUND_16_80(i, 12, e, f, g, h, a, b, c, d, X);
841
            ROUND_16_80(i, 13, d, e, f, g, h, a, b, c, X);
842
            ROUND_16_80(i, 14, c, d, e, f, g, h, a, b, X);
843
            ROUND_16_80(i, 15, b, c, d, e, f, g, h, a, X);
844
        }
845
846
        ctx->h[0] += a;
847
        ctx->h[1] += b;
848
        ctx->h[2] += c;
849
        ctx->h[3] += d;
850
        ctx->h[4] += e;
851
        ctx->h[5] += f;
852
        ctx->h[6] += g;
853
        ctx->h[7] += h;
854
855
        W += SHA_LBLOCK;
856
    }
857
}
858
859
# endif
860
861
#endif                         /* SHA512_ASM */