Coverage Report

Created: 2025-12-04 06:33

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/openssl33/crypto/sha/sha512.c
Line
Count
Source
1
/*
2
 * Copyright 2004-2022 The OpenSSL Project Authors. All Rights Reserved.
3
 *
4
 * Licensed under the Apache License 2.0 (the "License").  You may not use
5
 * this file except in compliance with the License.  You can obtain a copy
6
 * in the file LICENSE in the source distribution or at
7
 * https://www.openssl.org/source/license.html
8
 */
9
10
/*
11
 * SHA512 low level APIs are deprecated for public use, but still ok for
12
 * internal use.
13
 */
14
#include "internal/deprecated.h"
15
16
#include <stdio.h>
17
#include <openssl/opensslconf.h>
18
/*-
19
 * IMPLEMENTATION NOTES.
20
 *
21
 * As you might have noticed 32-bit hash algorithms:
22
 *
23
 * - permit SHA_LONG to be wider than 32-bit
24
 * - optimized versions implement two transform functions: one operating
25
 *   on [aligned] data in host byte order and one - on data in input
26
 *   stream byte order;
27
 * - share common byte-order neutral collector and padding function
28
 *   implementations, crypto/md32_common.h;
29
 *
30
 * Neither of the above applies to this SHA-512 implementations. Reasons
31
 * [in reverse order] are:
32
 *
33
 * - it's the only 64-bit hash algorithm for the moment of this writing,
34
 *   there is no need for common collector/padding implementation [yet];
35
 * - by supporting only one transform function [which operates on
36
 *   *aligned* data in input stream byte order, big-endian in this case]
37
 *   we minimize burden of maintenance in two ways: a) collector/padding
38
 *   function is simpler; b) only one transform function to stare at;
39
 * - SHA_LONG64 is required to be exactly 64-bit in order to be able to
40
 *   apply a number of optimizations to mitigate potential performance
41
 *   penalties caused by previous design decision;
42
 *
43
 * Caveat lector.
44
 *
45
 * Implementation relies on the fact that "long long" is 64-bit on
46
 * both 32- and 64-bit platforms. If some compiler vendor comes up
47
 * with 128-bit long long, adjustment to sha.h would be required.
48
 * As this implementation relies on 64-bit integer type, it's totally
49
 * inappropriate for platforms which don't support it, most notably
50
 * 16-bit platforms.
51
 */
52
#include <stdlib.h>
53
#include <string.h>
54
55
#include <openssl/crypto.h>
56
#include <openssl/sha.h>
57
#include <openssl/opensslv.h>
58
59
#include "internal/cryptlib.h"
60
#include "crypto/sha.h"
61
62
#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
63
    defined(__x86_64) || defined(_M_AMD64) || defined(_M_X64) || \
64
    defined(__s390__) || defined(__s390x__) || \
65
    defined(__aarch64__) || \
66
    defined(SHA512_ASM)
67
# define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
68
#endif
69
70
#if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32__)
71
# define U64(C)     C##UI64
72
#elif defined(__arch64__)
73
# define U64(C)     C##UL
74
#else
75
420M
# define U64(C)     C##ULL
76
#endif
77
78
int sha512_224_init(SHA512_CTX *c)
79
200k
{
80
200k
    c->h[0] = U64(0x8c3d37c819544da2);
81
200k
    c->h[1] = U64(0x73e1996689dcd4d6);
82
200k
    c->h[2] = U64(0x1dfab7ae32ff9c82);
83
200k
    c->h[3] = U64(0x679dd514582f9fcf);
84
200k
    c->h[4] = U64(0x0f6d2b697bd44da8);
85
200k
    c->h[5] = U64(0x77e36f7304c48942);
86
200k
    c->h[6] = U64(0x3f9d85a86a1d36c8);
87
200k
    c->h[7] = U64(0x1112e6ad91d692a1);
88
89
200k
    c->Nl = 0;
90
200k
    c->Nh = 0;
91
200k
    c->num = 0;
92
200k
    c->md_len = SHA224_DIGEST_LENGTH;
93
200k
    return 1;
94
200k
}
95
96
int sha512_256_init(SHA512_CTX *c)
97
132k
{
98
132k
    c->h[0] = U64(0x22312194fc2bf72c);
99
132k
    c->h[1] = U64(0x9f555fa3c84c64c2);
100
132k
    c->h[2] = U64(0x2393b86b6f53b151);
101
132k
    c->h[3] = U64(0x963877195940eabd);
102
132k
    c->h[4] = U64(0x96283ee2a88effe3);
103
132k
    c->h[5] = U64(0xbe5e1e2553863992);
104
132k
    c->h[6] = U64(0x2b0199fc2c85b8aa);
105
132k
    c->h[7] = U64(0x0eb72ddc81c52ca2);
106
107
132k
    c->Nl = 0;
108
132k
    c->Nh = 0;
109
132k
    c->num = 0;
110
132k
    c->md_len = SHA256_DIGEST_LENGTH;
111
132k
    return 1;
112
132k
}
113
114
int SHA384_Init(SHA512_CTX *c)
115
2.27M
{
116
2.27M
    c->h[0] = U64(0xcbbb9d5dc1059ed8);
117
2.27M
    c->h[1] = U64(0x629a292a367cd507);
118
2.27M
    c->h[2] = U64(0x9159015a3070dd17);
119
2.27M
    c->h[3] = U64(0x152fecd8f70e5939);
120
2.27M
    c->h[4] = U64(0x67332667ffc00b31);
121
2.27M
    c->h[5] = U64(0x8eb44a8768581511);
122
2.27M
    c->h[6] = U64(0xdb0c2e0d64f98fa7);
123
2.27M
    c->h[7] = U64(0x47b5481dbefa4fa4);
124
125
2.27M
    c->Nl = 0;
126
2.27M
    c->Nh = 0;
127
2.27M
    c->num = 0;
128
2.27M
    c->md_len = SHA384_DIGEST_LENGTH;
129
2.27M
    return 1;
130
2.27M
}
131
132
int SHA512_Init(SHA512_CTX *c)
133
32.9M
{
134
32.9M
    c->h[0] = U64(0x6a09e667f3bcc908);
135
32.9M
    c->h[1] = U64(0xbb67ae8584caa73b);
136
32.9M
    c->h[2] = U64(0x3c6ef372fe94f82b);
137
32.9M
    c->h[3] = U64(0xa54ff53a5f1d36f1);
138
32.9M
    c->h[4] = U64(0x510e527fade682d1);
139
32.9M
    c->h[5] = U64(0x9b05688c2b3e6c1f);
140
32.9M
    c->h[6] = U64(0x1f83d9abfb41bd6b);
141
32.9M
    c->h[7] = U64(0x5be0cd19137e2179);
142
143
32.9M
    c->Nl = 0;
144
32.9M
    c->Nh = 0;
145
32.9M
    c->num = 0;
146
32.9M
    c->md_len = SHA512_DIGEST_LENGTH;
147
32.9M
    return 1;
148
32.9M
}
149
150
#ifndef SHA512_ASM
151
static
152
#else
153
# ifdef INCLUDE_C_SHA512
154
void sha512_block_data_order_c(SHA512_CTX *ctx, const void *in, size_t num);
155
# endif
156
#endif
157
void sha512_block_data_order(SHA512_CTX *ctx, const void *in, size_t num);
158
159
int SHA512_Final(unsigned char *md, SHA512_CTX *c)
160
35.6M
{
161
35.6M
    unsigned char *p = (unsigned char *)c->u.p;
162
35.6M
    size_t n = c->num;
163
164
35.6M
    p[n] = 0x80;                /* There always is a room for one */
165
35.6M
    n++;
166
35.6M
    if (n > (sizeof(c->u) - 16)) {
167
236k
        memset(p + n, 0, sizeof(c->u) - n);
168
236k
        n = 0;
169
236k
        sha512_block_data_order(c, p, 1);
170
236k
    }
171
172
35.6M
    memset(p + n, 0, sizeof(c->u) - 16 - n);
173
#ifdef  B_ENDIAN
174
    c->u.d[SHA_LBLOCK - 2] = c->Nh;
175
    c->u.d[SHA_LBLOCK - 1] = c->Nl;
176
#else
177
35.6M
    p[sizeof(c->u) - 1] = (unsigned char)(c->Nl);
178
35.6M
    p[sizeof(c->u) - 2] = (unsigned char)(c->Nl >> 8);
179
35.6M
    p[sizeof(c->u) - 3] = (unsigned char)(c->Nl >> 16);
180
35.6M
    p[sizeof(c->u) - 4] = (unsigned char)(c->Nl >> 24);
181
35.6M
    p[sizeof(c->u) - 5] = (unsigned char)(c->Nl >> 32);
182
35.6M
    p[sizeof(c->u) - 6] = (unsigned char)(c->Nl >> 40);
183
35.6M
    p[sizeof(c->u) - 7] = (unsigned char)(c->Nl >> 48);
184
35.6M
    p[sizeof(c->u) - 8] = (unsigned char)(c->Nl >> 56);
185
35.6M
    p[sizeof(c->u) - 9] = (unsigned char)(c->Nh);
186
35.6M
    p[sizeof(c->u) - 10] = (unsigned char)(c->Nh >> 8);
187
35.6M
    p[sizeof(c->u) - 11] = (unsigned char)(c->Nh >> 16);
188
35.6M
    p[sizeof(c->u) - 12] = (unsigned char)(c->Nh >> 24);
189
35.6M
    p[sizeof(c->u) - 13] = (unsigned char)(c->Nh >> 32);
190
35.6M
    p[sizeof(c->u) - 14] = (unsigned char)(c->Nh >> 40);
191
35.6M
    p[sizeof(c->u) - 15] = (unsigned char)(c->Nh >> 48);
192
35.6M
    p[sizeof(c->u) - 16] = (unsigned char)(c->Nh >> 56);
193
35.6M
#endif
194
195
35.6M
    sha512_block_data_order(c, p, 1);
196
197
35.6M
    if (md == 0)
198
0
        return 0;
199
200
35.6M
    switch (c->md_len) {
201
    /* Let compiler decide if it's appropriate to unroll... */
202
200k
    case SHA224_DIGEST_LENGTH:
203
800k
        for (n = 0; n < SHA224_DIGEST_LENGTH / 8; n++) {
204
600k
            SHA_LONG64 t = c->h[n];
205
206
600k
            *(md++) = (unsigned char)(t >> 56);
207
600k
            *(md++) = (unsigned char)(t >> 48);
208
600k
            *(md++) = (unsigned char)(t >> 40);
209
600k
            *(md++) = (unsigned char)(t >> 32);
210
600k
            *(md++) = (unsigned char)(t >> 24);
211
600k
            *(md++) = (unsigned char)(t >> 16);
212
600k
            *(md++) = (unsigned char)(t >> 8);
213
600k
            *(md++) = (unsigned char)(t);
214
600k
        }
215
        /*
216
         * For 224 bits, there are four bytes left over that have to be
217
         * processed separately.
218
         */
219
200k
        {
220
200k
            SHA_LONG64 t = c->h[SHA224_DIGEST_LENGTH / 8];
221
222
200k
            *(md++) = (unsigned char)(t >> 56);
223
200k
            *(md++) = (unsigned char)(t >> 48);
224
200k
            *(md++) = (unsigned char)(t >> 40);
225
200k
            *(md++) = (unsigned char)(t >> 32);
226
200k
        }
227
200k
        break;
228
132k
    case SHA256_DIGEST_LENGTH:
229
660k
        for (n = 0; n < SHA256_DIGEST_LENGTH / 8; n++) {
230
528k
            SHA_LONG64 t = c->h[n];
231
232
528k
            *(md++) = (unsigned char)(t >> 56);
233
528k
            *(md++) = (unsigned char)(t >> 48);
234
528k
            *(md++) = (unsigned char)(t >> 40);
235
528k
            *(md++) = (unsigned char)(t >> 32);
236
528k
            *(md++) = (unsigned char)(t >> 24);
237
528k
            *(md++) = (unsigned char)(t >> 16);
238
528k
            *(md++) = (unsigned char)(t >> 8);
239
528k
            *(md++) = (unsigned char)(t);
240
528k
        }
241
132k
        break;
242
2.38M
    case SHA384_DIGEST_LENGTH:
243
16.7M
        for (n = 0; n < SHA384_DIGEST_LENGTH / 8; n++) {
244
14.3M
            SHA_LONG64 t = c->h[n];
245
246
14.3M
            *(md++) = (unsigned char)(t >> 56);
247
14.3M
            *(md++) = (unsigned char)(t >> 48);
248
14.3M
            *(md++) = (unsigned char)(t >> 40);
249
14.3M
            *(md++) = (unsigned char)(t >> 32);
250
14.3M
            *(md++) = (unsigned char)(t >> 24);
251
14.3M
            *(md++) = (unsigned char)(t >> 16);
252
14.3M
            *(md++) = (unsigned char)(t >> 8);
253
14.3M
            *(md++) = (unsigned char)(t);
254
14.3M
        }
255
2.38M
        break;
256
32.9M
    case SHA512_DIGEST_LENGTH:
257
296M
        for (n = 0; n < SHA512_DIGEST_LENGTH / 8; n++) {
258
263M
            SHA_LONG64 t = c->h[n];
259
260
263M
            *(md++) = (unsigned char)(t >> 56);
261
263M
            *(md++) = (unsigned char)(t >> 48);
262
263M
            *(md++) = (unsigned char)(t >> 40);
263
263M
            *(md++) = (unsigned char)(t >> 32);
264
263M
            *(md++) = (unsigned char)(t >> 24);
265
263M
            *(md++) = (unsigned char)(t >> 16);
266
263M
            *(md++) = (unsigned char)(t >> 8);
267
263M
            *(md++) = (unsigned char)(t);
268
263M
        }
269
32.9M
        break;
270
    /* ... as well as make sure md_len is not abused. */
271
0
    default:
272
0
        return 0;
273
35.6M
    }
274
275
35.6M
    return 1;
276
35.6M
}
277
278
int SHA384_Final(unsigned char *md, SHA512_CTX *c)
279
2.38M
{
280
2.38M
    return SHA512_Final(md, c);
281
2.38M
}
282
283
int SHA512_Update(SHA512_CTX *c, const void *_data, size_t len)
284
136M
{
285
136M
    SHA_LONG64 l;
286
136M
    unsigned char *p = c->u.p;
287
136M
    const unsigned char *data = (const unsigned char *)_data;
288
289
136M
    if (len == 0)
290
0
        return 1;
291
292
136M
    l = (c->Nl + (((SHA_LONG64) len) << 3)) & U64(0xffffffffffffffff);
293
136M
    if (l < c->Nl)
294
0
        c->Nh++;
295
136M
    if (sizeof(len) >= 8)
296
136M
        c->Nh += (((SHA_LONG64) len) >> 61);
297
136M
    c->Nl = l;
298
299
136M
    if (c->num != 0) {
300
66.1M
        size_t n = sizeof(c->u) - c->num;
301
302
66.1M
        if (len < n) {
303
33.1M
            memcpy(p + c->num, data, len), c->num += (unsigned int)len;
304
33.1M
            return 1;
305
33.1M
        } else {
306
33.0M
            memcpy(p + c->num, data, n), c->num = 0;
307
33.0M
            len -= n, data += n;
308
33.0M
            sha512_block_data_order(c, p, 1);
309
33.0M
        }
310
66.1M
    }
311
312
102M
    if (len >= sizeof(c->u)) {
313
#ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
314
        if ((size_t)data % sizeof(c->u.d[0]) != 0)
315
            while (len >= sizeof(c->u))
316
                memcpy(p, data, sizeof(c->u)),
317
                sha512_block_data_order(c, p, 1),
318
                len -= sizeof(c->u), data += sizeof(c->u);
319
        else
320
#endif
321
2.09M
            sha512_block_data_order(c, data, len / sizeof(c->u)),
322
2.09M
            data += len, len %= sizeof(c->u), data -= len;
323
2.09M
    }
324
325
102M
    if (len != 0)
326
68.5M
        memcpy(p, data, len), c->num = (int)len;
327
328
102M
    return 1;
329
136M
}
330
331
int SHA384_Update(SHA512_CTX *c, const void *data, size_t len)
332
4.83M
{
333
4.83M
    return SHA512_Update(c, data, len);
334
4.83M
}
335
336
void SHA512_Transform(SHA512_CTX *c, const unsigned char *data)
337
55.5k
{
338
#ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
339
    if ((size_t)data % sizeof(c->u.d[0]) != 0)
340
        memcpy(c->u.p, data, sizeof(c->u.p)), data = c->u.p;
341
#endif
342
55.5k
    sha512_block_data_order(c, data, 1);
343
55.5k
}
344
345
#if !defined(SHA512_ASM) || defined(INCLUDE_C_SHA512)
346
static const SHA_LONG64 K512[80] = {
347
    U64(0x428a2f98d728ae22), U64(0x7137449123ef65cd),
348
    U64(0xb5c0fbcfec4d3b2f), U64(0xe9b5dba58189dbbc),
349
    U64(0x3956c25bf348b538), U64(0x59f111f1b605d019),
350
    U64(0x923f82a4af194f9b), U64(0xab1c5ed5da6d8118),
351
    U64(0xd807aa98a3030242), U64(0x12835b0145706fbe),
352
    U64(0x243185be4ee4b28c), U64(0x550c7dc3d5ffb4e2),
353
    U64(0x72be5d74f27b896f), U64(0x80deb1fe3b1696b1),
354
    U64(0x9bdc06a725c71235), U64(0xc19bf174cf692694),
355
    U64(0xe49b69c19ef14ad2), U64(0xefbe4786384f25e3),
356
    U64(0x0fc19dc68b8cd5b5), U64(0x240ca1cc77ac9c65),
357
    U64(0x2de92c6f592b0275), U64(0x4a7484aa6ea6e483),
358
    U64(0x5cb0a9dcbd41fbd4), U64(0x76f988da831153b5),
359
    U64(0x983e5152ee66dfab), U64(0xa831c66d2db43210),
360
    U64(0xb00327c898fb213f), U64(0xbf597fc7beef0ee4),
361
    U64(0xc6e00bf33da88fc2), U64(0xd5a79147930aa725),
362
    U64(0x06ca6351e003826f), U64(0x142929670a0e6e70),
363
    U64(0x27b70a8546d22ffc), U64(0x2e1b21385c26c926),
364
    U64(0x4d2c6dfc5ac42aed), U64(0x53380d139d95b3df),
365
    U64(0x650a73548baf63de), U64(0x766a0abb3c77b2a8),
366
    U64(0x81c2c92e47edaee6), U64(0x92722c851482353b),
367
    U64(0xa2bfe8a14cf10364), U64(0xa81a664bbc423001),
368
    U64(0xc24b8b70d0f89791), U64(0xc76c51a30654be30),
369
    U64(0xd192e819d6ef5218), U64(0xd69906245565a910),
370
    U64(0xf40e35855771202a), U64(0x106aa07032bbd1b8),
371
    U64(0x19a4c116b8d2d0c8), U64(0x1e376c085141ab53),
372
    U64(0x2748774cdf8eeb99), U64(0x34b0bcb5e19b48a8),
373
    U64(0x391c0cb3c5c95a63), U64(0x4ed8aa4ae3418acb),
374
    U64(0x5b9cca4f7763e373), U64(0x682e6ff3d6b2b8a3),
375
    U64(0x748f82ee5defb2fc), U64(0x78a5636f43172f60),
376
    U64(0x84c87814a1f0ab72), U64(0x8cc702081a6439ec),
377
    U64(0x90befffa23631e28), U64(0xa4506cebde82bde9),
378
    U64(0xbef9a3f7b2c67915), U64(0xc67178f2e372532b),
379
    U64(0xca273eceea26619c), U64(0xd186b8c721c0c207),
380
    U64(0xeada7dd6cde0eb1e), U64(0xf57d4f7fee6ed178),
381
    U64(0x06f067aa72176fba), U64(0x0a637dc5a2c898a6),
382
    U64(0x113f9804bef90dae), U64(0x1b710b35131c471b),
383
    U64(0x28db77f523047d84), U64(0x32caab7b40c72493),
384
    U64(0x3c9ebe0a15c9bebc), U64(0x431d67c49c100d4c),
385
    U64(0x4cc5d4becb3e42b6), U64(0x597f299cfc657e2a),
386
    U64(0x5fcb6fab3ad6faec), U64(0x6c44198c4a475817)
387
};
388
389
# ifndef PEDANTIC
390
#  if defined(__GNUC__) && __GNUC__>=2 && \
391
      !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
392
#   if defined(__x86_64) || defined(__x86_64__)
393
#    define ROTR(a,n)    ({ SHA_LONG64 ret;             \
394
                                asm ("rorq %1,%0"       \
395
                                : "=r"(ret)             \
396
                                : "J"(n),"0"(a)         \
397
                                : "cc"); ret;           })
398
#    if !defined(B_ENDIAN)
399
#     define PULL64(x) ({ SHA_LONG64 ret=*((const SHA_LONG64 *)(&(x)));  \
400
                                asm ("bswapq    %0"             \
401
                                : "=r"(ret)                     \
402
                                : "0"(ret)); ret;               })
403
#    endif
404
#   elif (defined(__i386) || defined(__i386__)) && !defined(B_ENDIAN)
405
#    if defined(I386_ONLY)
406
#     define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
407
                          unsigned int hi=p[0],lo=p[1];          \
408
                                asm("xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
409
                                    "roll $16,%%eax; roll $16,%%edx; "\
410
                                    "xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
411
                                : "=a"(lo),"=d"(hi)             \
412
                                : "0"(lo),"1"(hi) : "cc");      \
413
                                ((SHA_LONG64)hi)<<32|lo;        })
414
#    else
415
#     define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
416
                          unsigned int hi=p[0],lo=p[1];         \
417
                                asm ("bswapl %0; bswapl %1;"    \
418
                                : "=r"(lo),"=r"(hi)             \
419
                                : "0"(lo),"1"(hi));             \
420
                                ((SHA_LONG64)hi)<<32|lo;        })
421
#    endif
422
#   elif (defined(_ARCH_PPC) && defined(__64BIT__)) || defined(_ARCH_PPC64)
423
#    define ROTR(a,n)    ({ SHA_LONG64 ret;             \
424
                                asm ("rotrdi %0,%1,%2"  \
425
                                : "=r"(ret)             \
426
                                : "r"(a),"K"(n)); ret;  })
427
#   elif defined(__aarch64__)
428
#    define ROTR(a,n)    ({ SHA_LONG64 ret;             \
429
                                asm ("ror %0,%1,%2"     \
430
                                : "=r"(ret)             \
431
                                : "r"(a),"I"(n)); ret;  })
432
#    if  defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && \
433
        __BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__
434
#     define PULL64(x)   ({ SHA_LONG64 ret;                     \
435
                                asm ("rev       %0,%1"          \
436
                                : "=r"(ret)                     \
437
                                : "r"(*((const SHA_LONG64 *)(&(x))))); ret; })
438
#    endif
439
#   elif (defined(__riscv_zbkb) || defined(__riscv_zbb)) && __riscv_xlen == 32
440
#    define PULL64(x) ({ SHA_LONG64 ret;                                        \
441
                        unsigned int *r = (unsigned int *)(&(ret));             \
442
                        const unsigned int *p = (const unsigned int *)(&(x));   \
443
                        asm ("rev8 %0, %1"                                      \
444
                        : "=r"(r[0])                                            \
445
                        : "r" (p[1]));                                          \
446
                        asm ("rev8 %0, %1"                                      \
447
                        : "=r"(r[1])                                            \
448
                        : "r" (p[0])); ret;                                     })
449
#   elif (defined(__riscv_zbkb) || defined(__riscv_zbb)) && __riscv_xlen == 64
450
#    define PULL64(x) ({ SHA_LONG64 ret;    \
451
                        asm ("rev8 %0, %1"  \
452
                        : "=r"(ret)         \
453
                        : "r"(x)); ret;     })
454
#   endif
455
#   if defined(__riscv_zknh) && __riscv_xlen == 32
456
#    define Sigma0(x) ({ SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret));    \
457
                        const unsigned int *p = (const unsigned int *)(&(x));           \
458
                        asm ("sha512sum0r %0, %1, %2"                                   \
459
                        : "=r"(r[0])                                                    \
460
                        : "r" (p[0]), "r" (p[1]));                                      \
461
                        asm ("sha512sum0r %0, %2, %1"                                   \
462
                        : "=r"(r[1])                                                    \
463
                        : "r" (p[0]), "r" (p[1])); ret;                                 })
464
#    define Sigma1(x) ({ SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret));    \
465
                        const unsigned int *p = (const unsigned int *)(&(x));           \
466
                        asm ("sha512sum1r %0, %1, %2"                                   \
467
                        : "=r"(r[0])                                                    \
468
                        : "r" (p[0]), "r" (p[1]));                                      \
469
                        asm ("sha512sum1r %0, %2, %1"                                   \
470
                        : "=r"(r[1])                                                    \
471
                        : "r" (p[0]), "r" (p[1])); ret;                                 })
472
#    define sigma0(x) ({ SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret));    \
473
                        const unsigned int *p = (const unsigned int *)(&(x));           \
474
                        asm ("sha512sig0l %0, %1, %2"                                   \
475
                        : "=r"(r[0])                                                    \
476
                        : "r" (p[0]), "r" (p[1]));                                      \
477
                        asm ("sha512sig0h %0, %2, %1"                                   \
478
                        : "=r"(r[1])                                                    \
479
                        : "r" (p[0]), "r" (p[1])); ret;                                 })
480
#    define sigma1(x) ({ SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret));    \
481
                        const unsigned int *p = (const unsigned int *)(&(x));           \
482
                        asm ("sha512sig1l %0, %1, %2"                                   \
483
                        : "=r"(r[0])                                                    \
484
                        : "r" (p[0]), "r" (p[1]));                                      \
485
                        asm ("sha512sig1h %0, %2, %1"                                   \
486
                        : "=r"(r[1])                                                    \
487
                        : "r" (p[0]), "r" (p[1])); ret;                                 })
488
#   elif defined(__riscv_zknh) && __riscv_xlen == 64
489
#    define Sigma0(x) ({ SHA_LONG64 ret;            \
490
                        asm ("sha512sum0 %0, %1"    \
491
                        : "=r"(ret)                 \
492
                        : "r"(x)); ret;             })
493
#    define Sigma1(x) ({ SHA_LONG64 ret;            \
494
                        asm ("sha512sum1 %0, %1"    \
495
                        : "=r"(ret)                 \
496
                        : "r"(x)); ret;             })
497
#    define sigma0(x) ({ SHA_LONG64 ret;            \
498
                        asm ("sha512sig0 %0, %1"    \
499
                        : "=r"(ret)                 \
500
                        : "r"(x)); ret;             })
501
#    define sigma1(x) ({ SHA_LONG64 ret;            \
502
                        asm ("sha512sig1 %0, %1"    \
503
                        : "=r"(ret)                 \
504
                        : "r"(x)); ret;             })
505
#   endif
506
#   if (defined(__riscv_zbt) || defined(__riscv_zpn)) && __riscv_xlen == 32
507
#    define Ch(x,y,z) ({  SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret));   \
508
                        const unsigned int *xp = (const unsigned int *)(&(x));          \
509
                        const unsigned int *yp = (const unsigned int *)(&(y));          \
510
                        const unsigned int *zp = (const unsigned int *)(&(z));          \
511
                        asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3\n\t"                \
512
                        : "=r"(r[0])                                                    \
513
                        : "r"(xp[0]), "r"(yp[0]), "r"(zp[0]));                          \
514
                        asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3\n\t"                \
515
                        : "=r"(r[1])                                                    \
516
                        : "r"(xp[1]), "r"(yp[1]), "r"(zp[1])); ret;                     })
517
#    define Maj(x,y,z) ({ SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret));   \
518
                        const unsigned int *xp = (const unsigned int *)(&(x));          \
519
                        const unsigned int *yp = (const unsigned int *)(&(y));          \
520
                        const unsigned int *zp = (const unsigned int *)(&(z));          \
521
                        asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3\n\t"                \
522
                        : "=r"(r[0])                                                    \
523
                        : "r"(xp[0]^zp[0]), "r"(yp[0]), "r"(zp[0]));                    \
524
                        asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3\n\t"                \
525
                        : "=r"(r[1])                                                    \
526
                        : "r"(xp[1]^zp[1]), "r"(yp[1]), "r"(zp[1])); ret;               })
527
#   elif (defined(__riscv_zbt) || defined(__riscv_zpn)) && __riscv_xlen == 64
528
#    define Ch(x,y,z) ({  SHA_LONG64 ret;                           \
529
                        asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3"\
530
                        : "=r"(ret)                                 \
531
                        : "r"(x), "r"(y), "r"(z)); ret;             })
532
#    define Maj(x,y,z) ({ SHA_LONG64 ret;                           \
533
                        asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3"\
534
                        : "=r"(ret)                                 \
535
                        : "r"(x^z), "r"(y), "r"(x)); ret;           })
536
#   endif
537
#  elif defined(_MSC_VER)
538
#   if defined(_WIN64)         /* applies to both IA-64 and AMD64 */
539
#    pragma intrinsic(_rotr64)
540
#    define ROTR(a,n)    _rotr64((a),n)
541
#   endif
542
#   if defined(_M_IX86) && !defined(OPENSSL_NO_ASM) && \
543
       !defined(OPENSSL_NO_INLINE_ASM)
544
#    if defined(I386_ONLY)
545
static SHA_LONG64 __fastcall __pull64be(const void *x)
546
{
547
    _asm mov  edx,[ecx + 0]
548
    _asm mov  eax,[ecx + 4]
549
    _asm xchg dh, dl
550
    _asm xchg ah, al
551
    _asm rol  edx, 16
552
    _asm rol  eax, 16
553
    _asm xchg dh, dl
554
    _asm xchg ah, al
555
}
556
#    else
557
static SHA_LONG64 __fastcall __pull64be(const void *x)
558
{
559
    _asm mov   edx,[ecx + 0]
560
    _asm mov   eax,[ecx + 4]
561
    _asm bswap edx
562
    _asm bswap eax
563
}
564
#    endif
565
#    define PULL64(x) __pull64be(&(x))
566
#   endif
567
#  endif
568
# endif
569
# ifndef PULL64
570
#  define B(x,j)    (((SHA_LONG64)(*(((const unsigned char *)(&x))+j)))<<((7-j)*8))
571
#  define PULL64(x) (B(x,0)|B(x,1)|B(x,2)|B(x,3)|B(x,4)|B(x,5)|B(x,6)|B(x,7))
572
# endif
573
# ifndef ROTR
574
#  define ROTR(x,s)       (((x)>>s) | (x)<<(64-s))
575
# endif
576
# ifndef Sigma0
577
#  define Sigma0(x)       (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
578
# endif
579
# ifndef Sigma1
580
#  define Sigma1(x)       (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
581
# endif
582
# ifndef sigma0
583
#  define sigma0(x)       (ROTR((x),1)  ^ ROTR((x),8)  ^ ((x)>>7))
584
# endif
585
# ifndef sigma1
586
#  define sigma1(x)       (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
587
# endif
588
# ifndef Ch
589
#  define Ch(x,y,z)       (((x) & (y)) ^ ((~(x)) & (z)))
590
# endif
591
# ifndef Maj
592
#  define Maj(x,y,z)      (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
593
# endif
594
595
# if defined(__i386) || defined(__i386__) || defined(_M_IX86)
596
/*
597
 * This code should give better results on 32-bit CPU with less than
598
 * ~24 registers, both size and performance wise...
599
 */
600
601
static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
602
                                    size_t num)
603
{
604
    const SHA_LONG64 *W = in;
605
    SHA_LONG64 A, E, T;
606
    SHA_LONG64 X[9 + 80], *F;
607
    int i;
608
609
    while (num--) {
610
611
        F = X + 80;
612
        A = ctx->h[0];
613
        F[1] = ctx->h[1];
614
        F[2] = ctx->h[2];
615
        F[3] = ctx->h[3];
616
        E = ctx->h[4];
617
        F[5] = ctx->h[5];
618
        F[6] = ctx->h[6];
619
        F[7] = ctx->h[7];
620
621
        for (i = 0; i < 16; i++, F--) {
622
#  ifdef B_ENDIAN
623
            T = W[i];
624
#  else
625
            T = PULL64(W[i]);
626
#  endif
627
            F[0] = A;
628
            F[4] = E;
629
            F[8] = T;
630
            T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
631
            E = F[3] + T;
632
            A = T + Sigma0(A) + Maj(A, F[1], F[2]);
633
        }
634
635
        for (; i < 80; i++, F--) {
636
            T = sigma0(F[8 + 16 - 1]);
637
            T += sigma1(F[8 + 16 - 14]);
638
            T += F[8 + 16] + F[8 + 16 - 9];
639
640
            F[0] = A;
641
            F[4] = E;
642
            F[8] = T;
643
            T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
644
            E = F[3] + T;
645
            A = T + Sigma0(A) + Maj(A, F[1], F[2]);
646
        }
647
648
        ctx->h[0] += A;
649
        ctx->h[1] += F[1];
650
        ctx->h[2] += F[2];
651
        ctx->h[3] += F[3];
652
        ctx->h[4] += E;
653
        ctx->h[5] += F[5];
654
        ctx->h[6] += F[6];
655
        ctx->h[7] += F[7];
656
657
        W += SHA_LBLOCK;
658
    }
659
}
660
661
# elif defined(OPENSSL_SMALL_FOOTPRINT)
662
663
static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
664
                                    size_t num)
665
{
666
    const SHA_LONG64 *W = in;
667
    SHA_LONG64 a, b, c, d, e, f, g, h, s0, s1, T1, T2;
668
    SHA_LONG64 X[16];
669
    int i;
670
671
    while (num--) {
672
673
        a = ctx->h[0];
674
        b = ctx->h[1];
675
        c = ctx->h[2];
676
        d = ctx->h[3];
677
        e = ctx->h[4];
678
        f = ctx->h[5];
679
        g = ctx->h[6];
680
        h = ctx->h[7];
681
682
        for (i = 0; i < 16; i++) {
683
#  ifdef B_ENDIAN
684
            T1 = X[i] = W[i];
685
#  else
686
            T1 = X[i] = PULL64(W[i]);
687
#  endif
688
            T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i];
689
            T2 = Sigma0(a) + Maj(a, b, c);
690
            h = g;
691
            g = f;
692
            f = e;
693
            e = d + T1;
694
            d = c;
695
            c = b;
696
            b = a;
697
            a = T1 + T2;
698
        }
699
700
        for (; i < 80; i++) {
701
            s0 = X[(i + 1) & 0x0f];
702
            s0 = sigma0(s0);
703
            s1 = X[(i + 14) & 0x0f];
704
            s1 = sigma1(s1);
705
706
            T1 = X[i & 0xf] += s0 + s1 + X[(i + 9) & 0xf];
707
            T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i];
708
            T2 = Sigma0(a) + Maj(a, b, c);
709
            h = g;
710
            g = f;
711
            f = e;
712
            e = d + T1;
713
            d = c;
714
            c = b;
715
            b = a;
716
            a = T1 + T2;
717
        }
718
719
        ctx->h[0] += a;
720
        ctx->h[1] += b;
721
        ctx->h[2] += c;
722
        ctx->h[3] += d;
723
        ctx->h[4] += e;
724
        ctx->h[5] += f;
725
        ctx->h[6] += g;
726
        ctx->h[7] += h;
727
728
        W += SHA_LBLOCK;
729
    }
730
}
731
732
# else
733
#  define ROUND_00_15(i,a,b,c,d,e,f,g,h)        do {    \
734
        T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];      \
735
        h = Sigma0(a) + Maj(a,b,c);                     \
736
        d += T1;        h += T1;                        } while (0)
737
738
#  define ROUND_16_80(i,j,a,b,c,d,e,f,g,h,X)    do {    \
739
        s0 = X[(j+1)&0x0f];     s0 = sigma0(s0);        \
740
        s1 = X[(j+14)&0x0f];    s1 = sigma1(s1);        \
741
        T1 = X[(j)&0x0f] += s0 + s1 + X[(j+9)&0x0f];    \
742
        ROUND_00_15(i+j,a,b,c,d,e,f,g,h);               } while (0)
743
744
#ifdef INCLUDE_C_SHA512
745
void sha512_block_data_order_c(SHA512_CTX *ctx, const void *in, size_t num)
746
#else
747
static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
748
                                    size_t num)
749
#endif
750
{
751
    const SHA_LONG64 *W = in;
752
    SHA_LONG64 a, b, c, d, e, f, g, h, s0, s1, T1;
753
    SHA_LONG64 X[16];
754
    int i;
755
756
    while (num--) {
757
758
        a = ctx->h[0];
759
        b = ctx->h[1];
760
        c = ctx->h[2];
761
        d = ctx->h[3];
762
        e = ctx->h[4];
763
        f = ctx->h[5];
764
        g = ctx->h[6];
765
        h = ctx->h[7];
766
767
#  ifdef B_ENDIAN
768
        T1 = X[0] = W[0];
769
        ROUND_00_15(0, a, b, c, d, e, f, g, h);
770
        T1 = X[1] = W[1];
771
        ROUND_00_15(1, h, a, b, c, d, e, f, g);
772
        T1 = X[2] = W[2];
773
        ROUND_00_15(2, g, h, a, b, c, d, e, f);
774
        T1 = X[3] = W[3];
775
        ROUND_00_15(3, f, g, h, a, b, c, d, e);
776
        T1 = X[4] = W[4];
777
        ROUND_00_15(4, e, f, g, h, a, b, c, d);
778
        T1 = X[5] = W[5];
779
        ROUND_00_15(5, d, e, f, g, h, a, b, c);
780
        T1 = X[6] = W[6];
781
        ROUND_00_15(6, c, d, e, f, g, h, a, b);
782
        T1 = X[7] = W[7];
783
        ROUND_00_15(7, b, c, d, e, f, g, h, a);
784
        T1 = X[8] = W[8];
785
        ROUND_00_15(8, a, b, c, d, e, f, g, h);
786
        T1 = X[9] = W[9];
787
        ROUND_00_15(9, h, a, b, c, d, e, f, g);
788
        T1 = X[10] = W[10];
789
        ROUND_00_15(10, g, h, a, b, c, d, e, f);
790
        T1 = X[11] = W[11];
791
        ROUND_00_15(11, f, g, h, a, b, c, d, e);
792
        T1 = X[12] = W[12];
793
        ROUND_00_15(12, e, f, g, h, a, b, c, d);
794
        T1 = X[13] = W[13];
795
        ROUND_00_15(13, d, e, f, g, h, a, b, c);
796
        T1 = X[14] = W[14];
797
        ROUND_00_15(14, c, d, e, f, g, h, a, b);
798
        T1 = X[15] = W[15];
799
        ROUND_00_15(15, b, c, d, e, f, g, h, a);
800
#  else
801
        T1 = X[0] = PULL64(W[0]);
802
        ROUND_00_15(0, a, b, c, d, e, f, g, h);
803
        T1 = X[1] = PULL64(W[1]);
804
        ROUND_00_15(1, h, a, b, c, d, e, f, g);
805
        T1 = X[2] = PULL64(W[2]);
806
        ROUND_00_15(2, g, h, a, b, c, d, e, f);
807
        T1 = X[3] = PULL64(W[3]);
808
        ROUND_00_15(3, f, g, h, a, b, c, d, e);
809
        T1 = X[4] = PULL64(W[4]);
810
        ROUND_00_15(4, e, f, g, h, a, b, c, d);
811
        T1 = X[5] = PULL64(W[5]);
812
        ROUND_00_15(5, d, e, f, g, h, a, b, c);
813
        T1 = X[6] = PULL64(W[6]);
814
        ROUND_00_15(6, c, d, e, f, g, h, a, b);
815
        T1 = X[7] = PULL64(W[7]);
816
        ROUND_00_15(7, b, c, d, e, f, g, h, a);
817
        T1 = X[8] = PULL64(W[8]);
818
        ROUND_00_15(8, a, b, c, d, e, f, g, h);
819
        T1 = X[9] = PULL64(W[9]);
820
        ROUND_00_15(9, h, a, b, c, d, e, f, g);
821
        T1 = X[10] = PULL64(W[10]);
822
        ROUND_00_15(10, g, h, a, b, c, d, e, f);
823
        T1 = X[11] = PULL64(W[11]);
824
        ROUND_00_15(11, f, g, h, a, b, c, d, e);
825
        T1 = X[12] = PULL64(W[12]);
826
        ROUND_00_15(12, e, f, g, h, a, b, c, d);
827
        T1 = X[13] = PULL64(W[13]);
828
        ROUND_00_15(13, d, e, f, g, h, a, b, c);
829
        T1 = X[14] = PULL64(W[14]);
830
        ROUND_00_15(14, c, d, e, f, g, h, a, b);
831
        T1 = X[15] = PULL64(W[15]);
832
        ROUND_00_15(15, b, c, d, e, f, g, h, a);
833
#  endif
834
835
        for (i = 16; i < 80; i += 16) {
836
            ROUND_16_80(i, 0, a, b, c, d, e, f, g, h, X);
837
            ROUND_16_80(i, 1, h, a, b, c, d, e, f, g, X);
838
            ROUND_16_80(i, 2, g, h, a, b, c, d, e, f, X);
839
            ROUND_16_80(i, 3, f, g, h, a, b, c, d, e, X);
840
            ROUND_16_80(i, 4, e, f, g, h, a, b, c, d, X);
841
            ROUND_16_80(i, 5, d, e, f, g, h, a, b, c, X);
842
            ROUND_16_80(i, 6, c, d, e, f, g, h, a, b, X);
843
            ROUND_16_80(i, 7, b, c, d, e, f, g, h, a, X);
844
            ROUND_16_80(i, 8, a, b, c, d, e, f, g, h, X);
845
            ROUND_16_80(i, 9, h, a, b, c, d, e, f, g, X);
846
            ROUND_16_80(i, 10, g, h, a, b, c, d, e, f, X);
847
            ROUND_16_80(i, 11, f, g, h, a, b, c, d, e, X);
848
            ROUND_16_80(i, 12, e, f, g, h, a, b, c, d, X);
849
            ROUND_16_80(i, 13, d, e, f, g, h, a, b, c, X);
850
            ROUND_16_80(i, 14, c, d, e, f, g, h, a, b, X);
851
            ROUND_16_80(i, 15, b, c, d, e, f, g, h, a, X);
852
        }
853
854
        ctx->h[0] += a;
855
        ctx->h[1] += b;
856
        ctx->h[2] += c;
857
        ctx->h[3] += d;
858
        ctx->h[4] += e;
859
        ctx->h[5] += f;
860
        ctx->h[6] += g;
861
        ctx->h[7] += h;
862
863
        W += SHA_LBLOCK;
864
    }
865
}
866
867
# endif
868
869
#endif                         /* SHA512_ASM */