Coverage Report

Created: 2024-11-21 07:03

/src/openssl/crypto/sha/sha512.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright 2004-2024 The OpenSSL Project Authors. All Rights Reserved.
3
 *
4
 * Licensed under the Apache License 2.0 (the "License").  You may not use
5
 * this file except in compliance with the License.  You can obtain a copy
6
 * in the file LICENSE in the source distribution or at
7
 * https://www.openssl.org/source/license.html
8
 */
9
10
/*
11
 * SHA512 low level APIs are deprecated for public use, but still ok for
12
 * internal use.
13
 */
14
#include "internal/deprecated.h"
15
16
#include <stdio.h>
17
#include <openssl/opensslconf.h>
18
/*-
19
 * IMPLEMENTATION NOTES.
20
 *
21
 * As you might have noticed, 32-bit hash algorithms:
22
 *
23
 * - permit SHA_LONG to be wider than 32-bit
24
 * - optimized versions implement two transform functions: one operating
25
 *   on [aligned] data in host byte order, and one operating on data in input
26
 *   stream byte order;
27
 * - share common byte-order neutral collector and padding function
28
 *   implementations, crypto/md32_common.h;
29
 *
30
 * Neither of the above applies to this SHA-512 implementation. Reasons
31
 * [in reverse order] are:
32
 *
33
 * - it's the only 64-bit hash algorithm for the moment of this writing,
34
 *   there is no need for common collector/padding implementation [yet];
35
 * - by supporting only one transform function [which operates on
36
 *   *aligned* data in input stream byte order, big-endian in this case]
37
 *   we minimize burden of maintenance in two ways: a) collector/padding
38
 *   function is simpler; b) only one transform function to stare at;
39
 * - SHA_LONG64 is required to be exactly 64-bit in order to be able to
40
 *   apply a number of optimizations to mitigate potential performance
41
 *   penalties caused by previous design decision;
42
 *
43
 * Caveat lector.
44
 *
45
 * Implementation relies on the fact that "long long" is 64-bit on
46
 * both 32- and 64-bit platforms. If some compiler vendor comes up
47
 * with 128-bit long long, adjustment to sha.h would be required.
48
 * As this implementation relies on 64-bit integer type, it's totally
49
 * inappropriate for platforms which don't support it, most notably
50
 * 16-bit platforms.
51
 */
52
#include <stdlib.h>
53
#include <string.h>
54
55
#include <openssl/crypto.h>
56
#include <openssl/sha.h>
57
#include <openssl/opensslv.h>
58
59
#include "internal/cryptlib.h"
60
#include "crypto/sha.h"
61
62
#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
63
    defined(__x86_64) || defined(_M_AMD64) || defined(_M_X64) || \
64
    defined(__s390__) || defined(__s390x__) || \
65
    defined(__aarch64__) || \
66
    defined(SHA512_ASM)
67
# define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
68
#endif
69
70
#if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32__)
71
# define U64(C)     C##UI64
72
#elif defined(__arch64__)
73
# define U64(C)     C##UL
74
#else
75
86.1k
# define U64(C)     C##ULL
76
#endif
77
78
int sha512_224_init(SHA512_CTX *c)
79
628
{
80
628
    c->h[0] = U64(0x8c3d37c819544da2);
81
628
    c->h[1] = U64(0x73e1996689dcd4d6);
82
628
    c->h[2] = U64(0x1dfab7ae32ff9c82);
83
628
    c->h[3] = U64(0x679dd514582f9fcf);
84
628
    c->h[4] = U64(0x0f6d2b697bd44da8);
85
628
    c->h[5] = U64(0x77e36f7304c48942);
86
628
    c->h[6] = U64(0x3f9d85a86a1d36c8);
87
628
    c->h[7] = U64(0x1112e6ad91d692a1);
88
89
628
    c->Nl = 0;
90
628
    c->Nh = 0;
91
628
    c->num = 0;
92
628
    c->md_len = SHA224_DIGEST_LENGTH;
93
628
    return 1;
94
628
}
95
96
int sha512_256_init(SHA512_CTX *c)
97
423
{
98
423
    c->h[0] = U64(0x22312194fc2bf72c);
99
423
    c->h[1] = U64(0x9f555fa3c84c64c2);
100
423
    c->h[2] = U64(0x2393b86b6f53b151);
101
423
    c->h[3] = U64(0x963877195940eabd);
102
423
    c->h[4] = U64(0x96283ee2a88effe3);
103
423
    c->h[5] = U64(0xbe5e1e2553863992);
104
423
    c->h[6] = U64(0x2b0199fc2c85b8aa);
105
423
    c->h[7] = U64(0x0eb72ddc81c52ca2);
106
107
423
    c->Nl = 0;
108
423
    c->Nh = 0;
109
423
    c->num = 0;
110
423
    c->md_len = SHA256_DIGEST_LENGTH;
111
423
    return 1;
112
423
}
113
114
int SHA384_Init(SHA512_CTX *c)
115
1.05k
{
116
1.05k
    c->h[0] = U64(0xcbbb9d5dc1059ed8);
117
1.05k
    c->h[1] = U64(0x629a292a367cd507);
118
1.05k
    c->h[2] = U64(0x9159015a3070dd17);
119
1.05k
    c->h[3] = U64(0x152fecd8f70e5939);
120
1.05k
    c->h[4] = U64(0x67332667ffc00b31);
121
1.05k
    c->h[5] = U64(0x8eb44a8768581511);
122
1.05k
    c->h[6] = U64(0xdb0c2e0d64f98fa7);
123
1.05k
    c->h[7] = U64(0x47b5481dbefa4fa4);
124
125
1.05k
    c->Nl = 0;
126
1.05k
    c->Nh = 0;
127
1.05k
    c->num = 0;
128
1.05k
    c->md_len = SHA384_DIGEST_LENGTH;
129
1.05k
    return 1;
130
1.05k
}
131
132
int SHA512_Init(SHA512_CTX *c)
133
2.34k
{
134
2.34k
    c->h[0] = U64(0x6a09e667f3bcc908);
135
2.34k
    c->h[1] = U64(0xbb67ae8584caa73b);
136
2.34k
    c->h[2] = U64(0x3c6ef372fe94f82b);
137
2.34k
    c->h[3] = U64(0xa54ff53a5f1d36f1);
138
2.34k
    c->h[4] = U64(0x510e527fade682d1);
139
2.34k
    c->h[5] = U64(0x9b05688c2b3e6c1f);
140
2.34k
    c->h[6] = U64(0x1f83d9abfb41bd6b);
141
2.34k
    c->h[7] = U64(0x5be0cd19137e2179);
142
143
2.34k
    c->Nl = 0;
144
2.34k
    c->Nh = 0;
145
2.34k
    c->num = 0;
146
2.34k
    c->md_len = SHA512_DIGEST_LENGTH;
147
2.34k
    return 1;
148
2.34k
}
149
150
#ifndef SHA512_ASM
151
static
152
#else
153
# ifdef INCLUDE_C_SHA512
154
void sha512_block_data_order_c(SHA512_CTX *ctx, const void *in, size_t num);
155
# endif
156
#endif
157
void sha512_block_data_order(SHA512_CTX *ctx, const void *in, size_t num);
158
159
int SHA512_Final(unsigned char *md, SHA512_CTX *c)
160
22.3k
{
161
22.3k
    unsigned char *p = (unsigned char *)c->u.p;
162
22.3k
    size_t n = c->num;
163
164
22.3k
    p[n] = 0x80;                /* There always is a room for one */
165
22.3k
    n++;
166
22.3k
    if (n > (sizeof(c->u) - 16)) {
167
2.55k
        memset(p + n, 0, sizeof(c->u) - n);
168
2.55k
        n = 0;
169
2.55k
        sha512_block_data_order(c, p, 1);
170
2.55k
    }
171
172
22.3k
    memset(p + n, 0, sizeof(c->u) - 16 - n);
173
#ifdef  B_ENDIAN
174
    c->u.d[SHA_LBLOCK - 2] = c->Nh;
175
    c->u.d[SHA_LBLOCK - 1] = c->Nl;
176
#else
177
22.3k
    p[sizeof(c->u) - 1] = (unsigned char)(c->Nl);
178
22.3k
    p[sizeof(c->u) - 2] = (unsigned char)(c->Nl >> 8);
179
22.3k
    p[sizeof(c->u) - 3] = (unsigned char)(c->Nl >> 16);
180
22.3k
    p[sizeof(c->u) - 4] = (unsigned char)(c->Nl >> 24);
181
22.3k
    p[sizeof(c->u) - 5] = (unsigned char)(c->Nl >> 32);
182
22.3k
    p[sizeof(c->u) - 6] = (unsigned char)(c->Nl >> 40);
183
22.3k
    p[sizeof(c->u) - 7] = (unsigned char)(c->Nl >> 48);
184
22.3k
    p[sizeof(c->u) - 8] = (unsigned char)(c->Nl >> 56);
185
22.3k
    p[sizeof(c->u) - 9] = (unsigned char)(c->Nh);
186
22.3k
    p[sizeof(c->u) - 10] = (unsigned char)(c->Nh >> 8);
187
22.3k
    p[sizeof(c->u) - 11] = (unsigned char)(c->Nh >> 16);
188
22.3k
    p[sizeof(c->u) - 12] = (unsigned char)(c->Nh >> 24);
189
22.3k
    p[sizeof(c->u) - 13] = (unsigned char)(c->Nh >> 32);
190
22.3k
    p[sizeof(c->u) - 14] = (unsigned char)(c->Nh >> 40);
191
22.3k
    p[sizeof(c->u) - 15] = (unsigned char)(c->Nh >> 48);
192
22.3k
    p[sizeof(c->u) - 16] = (unsigned char)(c->Nh >> 56);
193
22.3k
#endif
194
195
22.3k
    sha512_block_data_order(c, p, 1);
196
197
22.3k
    if (md == 0)
198
0
        return 0;
199
200
22.3k
    switch (c->md_len) {
201
    /* Let compiler decide if it's appropriate to unroll... */
202
3.17k
    case SHA224_DIGEST_LENGTH:
203
12.6k
        for (n = 0; n < SHA224_DIGEST_LENGTH / 8; n++) {
204
9.51k
            SHA_LONG64 t = c->h[n];
205
206
9.51k
            *(md++) = (unsigned char)(t >> 56);
207
9.51k
            *(md++) = (unsigned char)(t >> 48);
208
9.51k
            *(md++) = (unsigned char)(t >> 40);
209
9.51k
            *(md++) = (unsigned char)(t >> 32);
210
9.51k
            *(md++) = (unsigned char)(t >> 24);
211
9.51k
            *(md++) = (unsigned char)(t >> 16);
212
9.51k
            *(md++) = (unsigned char)(t >> 8);
213
9.51k
            *(md++) = (unsigned char)(t);
214
9.51k
        }
215
        /*
216
         * For 224 bits, there are four bytes left over that have to be
217
         * processed separately.
218
         */
219
3.17k
        {
220
3.17k
            SHA_LONG64 t = c->h[SHA224_DIGEST_LENGTH / 8];
221
222
3.17k
            *(md++) = (unsigned char)(t >> 56);
223
3.17k
            *(md++) = (unsigned char)(t >> 48);
224
3.17k
            *(md++) = (unsigned char)(t >> 40);
225
3.17k
            *(md++) = (unsigned char)(t >> 32);
226
3.17k
        }
227
3.17k
        break;
228
3.61k
    case SHA256_DIGEST_LENGTH:
229
18.0k
        for (n = 0; n < SHA256_DIGEST_LENGTH / 8; n++) {
230
14.4k
            SHA_LONG64 t = c->h[n];
231
232
14.4k
            *(md++) = (unsigned char)(t >> 56);
233
14.4k
            *(md++) = (unsigned char)(t >> 48);
234
14.4k
            *(md++) = (unsigned char)(t >> 40);
235
14.4k
            *(md++) = (unsigned char)(t >> 32);
236
14.4k
            *(md++) = (unsigned char)(t >> 24);
237
14.4k
            *(md++) = (unsigned char)(t >> 16);
238
14.4k
            *(md++) = (unsigned char)(t >> 8);
239
14.4k
            *(md++) = (unsigned char)(t);
240
14.4k
        }
241
3.61k
        break;
242
5.54k
    case SHA384_DIGEST_LENGTH:
243
38.8k
        for (n = 0; n < SHA384_DIGEST_LENGTH / 8; n++) {
244
33.2k
            SHA_LONG64 t = c->h[n];
245
246
33.2k
            *(md++) = (unsigned char)(t >> 56);
247
33.2k
            *(md++) = (unsigned char)(t >> 48);
248
33.2k
            *(md++) = (unsigned char)(t >> 40);
249
33.2k
            *(md++) = (unsigned char)(t >> 32);
250
33.2k
            *(md++) = (unsigned char)(t >> 24);
251
33.2k
            *(md++) = (unsigned char)(t >> 16);
252
33.2k
            *(md++) = (unsigned char)(t >> 8);
253
33.2k
            *(md++) = (unsigned char)(t);
254
33.2k
        }
255
5.54k
        break;
256
10.0k
    case SHA512_DIGEST_LENGTH:
257
90.1k
        for (n = 0; n < SHA512_DIGEST_LENGTH / 8; n++) {
258
80.1k
            SHA_LONG64 t = c->h[n];
259
260
80.1k
            *(md++) = (unsigned char)(t >> 56);
261
80.1k
            *(md++) = (unsigned char)(t >> 48);
262
80.1k
            *(md++) = (unsigned char)(t >> 40);
263
80.1k
            *(md++) = (unsigned char)(t >> 32);
264
80.1k
            *(md++) = (unsigned char)(t >> 24);
265
80.1k
            *(md++) = (unsigned char)(t >> 16);
266
80.1k
            *(md++) = (unsigned char)(t >> 8);
267
80.1k
            *(md++) = (unsigned char)(t);
268
80.1k
        }
269
10.0k
        break;
270
    /* ... as well as make sure md_len is not abused. */
271
0
    default:
272
0
        return 0;
273
22.3k
    }
274
275
22.3k
    return 1;
276
22.3k
}
277
278
int SHA384_Final(unsigned char *md, SHA512_CTX *c)
279
5.54k
{
280
5.54k
    return SHA512_Final(md, c);
281
5.54k
}
282
283
int SHA512_Update(SHA512_CTX *c, const void *_data, size_t len)
284
50.6k
{
285
50.6k
    SHA_LONG64 l;
286
50.6k
    unsigned char *p = c->u.p;
287
50.6k
    const unsigned char *data = (const unsigned char *)_data;
288
289
50.6k
    if (len == 0)
290
0
        return 1;
291
292
50.6k
    l = (c->Nl + (((SHA_LONG64) len) << 3)) & U64(0xffffffffffffffff);
293
50.6k
    if (l < c->Nl)
294
0
        c->Nh++;
295
50.6k
    if (sizeof(len) >= 8)
296
50.6k
        c->Nh += (((SHA_LONG64) len) >> 61);
297
50.6k
    c->Nl = l;
298
299
50.6k
    if (c->num != 0) {
300
26.8k
        size_t n = sizeof(c->u) - c->num;
301
302
26.8k
        if (len < n) {
303
17.0k
            memcpy(p + c->num, data, len), c->num += (unsigned int)len;
304
17.0k
            return 1;
305
17.0k
        } else {
306
9.79k
            memcpy(p + c->num, data, n), c->num = 0;
307
9.79k
            len -= n, data += n;
308
9.79k
            sha512_block_data_order(c, p, 1);
309
9.79k
        }
310
26.8k
    }
311
312
33.5k
    if (len >= sizeof(c->u)) {
313
#ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
314
        if ((size_t)data % sizeof(c->u.d[0]) != 0)
315
            while (len >= sizeof(c->u))
316
                memcpy(p, data, sizeof(c->u)),
317
                sha512_block_data_order(c, p, 1),
318
                len -= sizeof(c->u), data += sizeof(c->u);
319
        else
320
#endif
321
9.39k
            sha512_block_data_order(c, data, len / sizeof(c->u)),
322
9.39k
            data += len, len %= sizeof(c->u), data -= len;
323
9.39k
    }
324
325
33.5k
    if (len != 0)
326
30.6k
        memcpy(p, data, len), c->num = (int)len;
327
328
33.5k
    return 1;
329
50.6k
}
330
331
int SHA384_Update(SHA512_CTX *c, const void *data, size_t len)
332
12.0k
{
333
12.0k
    return SHA512_Update(c, data, len);
334
12.0k
}
335
336
void SHA512_Transform(SHA512_CTX *c, const unsigned char *data)
337
0
{
338
#ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
339
    if ((size_t)data % sizeof(c->u.d[0]) != 0)
340
        memcpy(c->u.p, data, sizeof(c->u.p)), data = c->u.p;
341
#endif
342
0
    sha512_block_data_order(c, data, 1);
343
0
}
344
345
#if !defined(SHA512_ASM) || defined(INCLUDE_C_SHA512)
346
static const SHA_LONG64 K512[80] = {
347
    U64(0x428a2f98d728ae22), U64(0x7137449123ef65cd),
348
    U64(0xb5c0fbcfec4d3b2f), U64(0xe9b5dba58189dbbc),
349
    U64(0x3956c25bf348b538), U64(0x59f111f1b605d019),
350
    U64(0x923f82a4af194f9b), U64(0xab1c5ed5da6d8118),
351
    U64(0xd807aa98a3030242), U64(0x12835b0145706fbe),
352
    U64(0x243185be4ee4b28c), U64(0x550c7dc3d5ffb4e2),
353
    U64(0x72be5d74f27b896f), U64(0x80deb1fe3b1696b1),
354
    U64(0x9bdc06a725c71235), U64(0xc19bf174cf692694),
355
    U64(0xe49b69c19ef14ad2), U64(0xefbe4786384f25e3),
356
    U64(0x0fc19dc68b8cd5b5), U64(0x240ca1cc77ac9c65),
357
    U64(0x2de92c6f592b0275), U64(0x4a7484aa6ea6e483),
358
    U64(0x5cb0a9dcbd41fbd4), U64(0x76f988da831153b5),
359
    U64(0x983e5152ee66dfab), U64(0xa831c66d2db43210),
360
    U64(0xb00327c898fb213f), U64(0xbf597fc7beef0ee4),
361
    U64(0xc6e00bf33da88fc2), U64(0xd5a79147930aa725),
362
    U64(0x06ca6351e003826f), U64(0x142929670a0e6e70),
363
    U64(0x27b70a8546d22ffc), U64(0x2e1b21385c26c926),
364
    U64(0x4d2c6dfc5ac42aed), U64(0x53380d139d95b3df),
365
    U64(0x650a73548baf63de), U64(0x766a0abb3c77b2a8),
366
    U64(0x81c2c92e47edaee6), U64(0x92722c851482353b),
367
    U64(0xa2bfe8a14cf10364), U64(0xa81a664bbc423001),
368
    U64(0xc24b8b70d0f89791), U64(0xc76c51a30654be30),
369
    U64(0xd192e819d6ef5218), U64(0xd69906245565a910),
370
    U64(0xf40e35855771202a), U64(0x106aa07032bbd1b8),
371
    U64(0x19a4c116b8d2d0c8), U64(0x1e376c085141ab53),
372
    U64(0x2748774cdf8eeb99), U64(0x34b0bcb5e19b48a8),
373
    U64(0x391c0cb3c5c95a63), U64(0x4ed8aa4ae3418acb),
374
    U64(0x5b9cca4f7763e373), U64(0x682e6ff3d6b2b8a3),
375
    U64(0x748f82ee5defb2fc), U64(0x78a5636f43172f60),
376
    U64(0x84c87814a1f0ab72), U64(0x8cc702081a6439ec),
377
    U64(0x90befffa23631e28), U64(0xa4506cebde82bde9),
378
    U64(0xbef9a3f7b2c67915), U64(0xc67178f2e372532b),
379
    U64(0xca273eceea26619c), U64(0xd186b8c721c0c207),
380
    U64(0xeada7dd6cde0eb1e), U64(0xf57d4f7fee6ed178),
381
    U64(0x06f067aa72176fba), U64(0x0a637dc5a2c898a6),
382
    U64(0x113f9804bef90dae), U64(0x1b710b35131c471b),
383
    U64(0x28db77f523047d84), U64(0x32caab7b40c72493),
384
    U64(0x3c9ebe0a15c9bebc), U64(0x431d67c49c100d4c),
385
    U64(0x4cc5d4becb3e42b6), U64(0x597f299cfc657e2a),
386
    U64(0x5fcb6fab3ad6faec), U64(0x6c44198c4a475817)
387
};
388
389
# ifndef PEDANTIC
390
#  if defined(__GNUC__) && __GNUC__>=2 && \
391
      !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
392
#   if defined(__x86_64) || defined(__x86_64__)
393
#    define ROTR(a,n)    ({ SHA_LONG64 ret;             \
394
                                asm ("rorq %1,%0"       \
395
                                : "=r"(ret)             \
396
                                : "J"(n),"0"(a)         \
397
                                : "cc"); ret;           })
398
#    if !defined(B_ENDIAN)
399
#     define PULL64(x) ({ SHA_LONG64 ret=*((const SHA_LONG64 *)(&(x)));  \
400
                                asm ("bswapq    %0"             \
401
                                : "=r"(ret)                     \
402
                                : "0"(ret)); ret;               })
403
#    endif
404
#   elif (defined(__i386) || defined(__i386__)) && !defined(B_ENDIAN)
405
#    if defined(I386_ONLY)
406
#     define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
407
                          unsigned int hi=p[0],lo=p[1];          \
408
                                asm("xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
409
                                    "roll $16,%%eax; roll $16,%%edx; "\
410
                                    "xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
411
                                : "=a"(lo),"=d"(hi)             \
412
                                : "0"(lo),"1"(hi) : "cc");      \
413
                                ((SHA_LONG64)hi)<<32|lo;        })
414
#    else
415
#     define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
416
                          unsigned int hi=p[0],lo=p[1];         \
417
                                asm ("bswapl %0; bswapl %1;"    \
418
                                : "=r"(lo),"=r"(hi)             \
419
                                : "0"(lo),"1"(hi));             \
420
                                ((SHA_LONG64)hi)<<32|lo;        })
421
#    endif
422
#   elif (defined(_ARCH_PPC) && defined(__64BIT__)) || defined(_ARCH_PPC64)
423
#    define ROTR(a,n)    ({ SHA_LONG64 ret;             \
424
                                asm ("rotrdi %0,%1,%2"  \
425
                                : "=r"(ret)             \
426
                                : "r"(a),"K"(n)); ret;  })
427
#   elif defined(__aarch64__)
428
#    define ROTR(a,n)    ({ SHA_LONG64 ret;             \
429
                                asm ("ror %0,%1,%2"     \
430
                                : "=r"(ret)             \
431
                                : "r"(a),"I"(n)); ret;  })
432
#    if  defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && \
433
        __BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__
434
#     define PULL64(x)   ({ SHA_LONG64 ret;                     \
435
                                asm ("rev       %0,%1"          \
436
                                : "=r"(ret)                     \
437
                                : "r"(*((const SHA_LONG64 *)(&(x))))); ret; })
438
#    endif
439
#   elif (defined(__riscv_zbkb) || defined(__riscv_zbb)) && __riscv_xlen == 32
440
#    define PULL64(x) ({ SHA_LONG64 ret;                                        \
441
                        unsigned int *r = (unsigned int *)(&(ret));             \
442
                        const unsigned int *p = (const unsigned int *)(&(x));   \
443
                        asm ("rev8 %0, %1"                                      \
444
                        : "=r"(r[0])                                            \
445
                        : "r" (p[1]));                                          \
446
                        asm ("rev8 %0, %1"                                      \
447
                        : "=r"(r[1])                                            \
448
                        : "r" (p[0])); ret;                                     })
449
#   elif (defined(__riscv_zbkb) || defined(__riscv_zbb)) && __riscv_xlen == 64
450
#    define PULL64(x) ({ SHA_LONG64 ret;    \
451
                        asm ("rev8 %0, %1"  \
452
                        : "=r"(ret)         \
453
                        : "r"(x)); ret;     })
454
#   endif
455
#   if defined(__riscv_zknh) && __riscv_xlen == 32
456
#    define Sigma0(x) ({ SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret));    \
457
                        const unsigned int *p = (const unsigned int *)(&(x));           \
458
                        asm ("sha512sum0r %0, %1, %2"                                   \
459
                        : "=r"(r[0])                                                    \
460
                        : "r" (p[0]), "r" (p[1]));                                      \
461
                        asm ("sha512sum0r %0, %2, %1"                                   \
462
                        : "=r"(r[1])                                                    \
463
                        : "r" (p[0]), "r" (p[1])); ret;                                 })
464
#    define Sigma1(x) ({ SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret));    \
465
                        const unsigned int *p = (const unsigned int *)(&(x));           \
466
                        asm ("sha512sum1r %0, %1, %2"                                   \
467
                        : "=r"(r[0])                                                    \
468
                        : "r" (p[0]), "r" (p[1]));                                      \
469
                        asm ("sha512sum1r %0, %2, %1"                                   \
470
                        : "=r"(r[1])                                                    \
471
                        : "r" (p[0]), "r" (p[1])); ret;                                 })
472
#    define sigma0(x) ({ SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret));    \
473
                        const unsigned int *p = (const unsigned int *)(&(x));           \
474
                        asm ("sha512sig0l %0, %1, %2"                                   \
475
                        : "=r"(r[0])                                                    \
476
                        : "r" (p[0]), "r" (p[1]));                                      \
477
                        asm ("sha512sig0h %0, %2, %1"                                   \
478
                        : "=r"(r[1])                                                    \
479
                        : "r" (p[0]), "r" (p[1])); ret;                                 })
480
#    define sigma1(x) ({ SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret));    \
481
                        const unsigned int *p = (const unsigned int *)(&(x));           \
482
                        asm ("sha512sig1l %0, %1, %2"                                   \
483
                        : "=r"(r[0])                                                    \
484
                        : "r" (p[0]), "r" (p[1]));                                      \
485
                        asm ("sha512sig1h %0, %2, %1"                                   \
486
                        : "=r"(r[1])                                                    \
487
                        : "r" (p[0]), "r" (p[1])); ret;                                 })
488
#   elif defined(__riscv_zknh) && __riscv_xlen == 64
489
#    define Sigma0(x) ({ SHA_LONG64 ret;            \
490
                        asm ("sha512sum0 %0, %1"    \
491
                        : "=r"(ret)                 \
492
                        : "r"(x)); ret;             })
493
#    define Sigma1(x) ({ SHA_LONG64 ret;            \
494
                        asm ("sha512sum1 %0, %1"    \
495
                        : "=r"(ret)                 \
496
                        : "r"(x)); ret;             })
497
#    define sigma0(x) ({ SHA_LONG64 ret;            \
498
                        asm ("sha512sig0 %0, %1"    \
499
                        : "=r"(ret)                 \
500
                        : "r"(x)); ret;             })
501
#    define sigma1(x) ({ SHA_LONG64 ret;            \
502
                        asm ("sha512sig1 %0, %1"    \
503
                        : "=r"(ret)                 \
504
                        : "r"(x)); ret;             })
505
#   endif
506
#   if (defined(__riscv_zbt) || defined(__riscv_zpn)) && __riscv_xlen == 32
507
#    define Ch(x,y,z) ({  SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret));   \
508
                        const unsigned int *xp = (const unsigned int *)(&(x));          \
509
                        const unsigned int *yp = (const unsigned int *)(&(y));          \
510
                        const unsigned int *zp = (const unsigned int *)(&(z));          \
511
                        asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3\n\t"                \
512
                        : "=r"(r[0])                                                    \
513
                        : "r"(xp[0]), "r"(yp[0]), "r"(zp[0]));                          \
514
                        asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3\n\t"                \
515
                        : "=r"(r[1])                                                    \
516
                        : "r"(xp[1]), "r"(yp[1]), "r"(zp[1])); ret;                     })
517
#    define Maj(x,y,z) ({ SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret));   \
518
                        const unsigned int *xp = (const unsigned int *)(&(x));          \
519
                        const unsigned int *yp = (const unsigned int *)(&(y));          \
520
                        const unsigned int *zp = (const unsigned int *)(&(z));          \
521
                        asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3\n\t"                \
522
                        : "=r"(r[0])                                                    \
523
                        : "r"(xp[0]^zp[0]), "r"(yp[0]), "r"(zp[0]));                    \
524
                        asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3\n\t"                \
525
                        : "=r"(r[1])                                                    \
526
                        : "r"(xp[1]^zp[1]), "r"(yp[1]), "r"(zp[1])); ret;               })
527
#   elif (defined(__riscv_zbt) || defined(__riscv_zpn)) && __riscv_xlen == 64
528
#    define Ch(x,y,z) ({  SHA_LONG64 ret;                           \
529
                        asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3"\
530
                        : "=r"(ret)                                 \
531
                        : "r"(x), "r"(y), "r"(z)); ret;             })
532
#    define Maj(x,y,z) ({ SHA_LONG64 ret;                           \
533
                        asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3"\
534
                        : "=r"(ret)                                 \
535
                        : "r"(x^z), "r"(y), "r"(x)); ret;           })
536
#   endif
537
#  elif defined(_MSC_VER)
538
#   if defined(_WIN64)         /* applies to both IA-64 and AMD64 */
539
#    pragma intrinsic(_rotr64)
540
#    define ROTR(a,n)    _rotr64((a),n)
541
#   endif
542
#   if defined(_M_IX86) && !defined(OPENSSL_NO_ASM) && \
543
       !defined(OPENSSL_NO_INLINE_ASM)
544
#    if defined(I386_ONLY)
545
static SHA_LONG64 __fastcall __pull64be(const void *x)
546
{
547
    _asm mov  edx,[ecx + 0]
548
    _asm mov  eax,[ecx + 4]
549
    _asm xchg dh, dl
550
    _asm xchg ah, al
551
    _asm rol  edx, 16
552
    _asm rol  eax, 16
553
    _asm xchg dh, dl
554
    _asm xchg ah, al
555
}
556
#    else
557
static SHA_LONG64 __fastcall __pull64be(const void *x)
558
{
559
    _asm mov   edx,[ecx + 0]
560
    _asm mov   eax,[ecx + 4]
561
    _asm bswap edx
562
    _asm bswap eax
563
}
564
#    endif
565
#    define PULL64(x) __pull64be(&(x))
566
#   endif
567
#  endif
568
# endif
569
# ifndef PULL64
570
9.81M
#  define B(x,j)    (((SHA_LONG64)(*(((const unsigned char *)(&x))+j)))<<((7-j)*8))
571
1.22M
#  define PULL64(x) (B(x,0)|B(x,1)|B(x,2)|B(x,3)|B(x,4)|B(x,5)|B(x,6)|B(x,7))
572
# endif
573
# ifndef ROTR
574
56.4M
#  define ROTR(x,s)       (((x)>>s) | (x)<<(64-s))
575
# endif
576
# ifndef Sigma0
577
6.13M
#  define Sigma0(x)       (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
578
# endif
579
# ifndef Sigma1
580
6.13M
#  define Sigma1(x)       (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
581
# endif
582
# ifndef sigma0
583
4.90M
#  define sigma0(x)       (ROTR((x),1)  ^ ROTR((x),8)  ^ ((x)>>7))
584
# endif
585
# ifndef sigma1
586
4.90M
#  define sigma1(x)       (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
587
# endif
588
# ifndef Ch
589
6.13M
#  define Ch(x,y,z)       (((x) & (y)) ^ ((~(x)) & (z)))
590
# endif
591
# ifndef Maj
592
6.13M
#  define Maj(x,y,z)      (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
593
# endif
594
595
# if defined(__i386) || defined(__i386__) || defined(_M_IX86)
596
/*
597
 * This code should give better results on 32-bit CPU with less than
598
 * ~24 registers, both size and performance wise...
599
 */
600
601
static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
602
                                    size_t num)
603
{
604
    const SHA_LONG64 *W = in;
605
    SHA_LONG64 A, E, T;
606
    SHA_LONG64 X[9 + 80], *F;
607
    int i;
608
609
    while (num--) {
610
611
        F = X + 80;
612
        A = ctx->h[0];
613
        F[1] = ctx->h[1];
614
        F[2] = ctx->h[2];
615
        F[3] = ctx->h[3];
616
        E = ctx->h[4];
617
        F[5] = ctx->h[5];
618
        F[6] = ctx->h[6];
619
        F[7] = ctx->h[7];
620
621
        for (i = 0; i < 16; i++, F--) {
622
#  ifdef B_ENDIAN
623
            T = W[i];
624
#  else
625
            T = PULL64(W[i]);
626
#  endif
627
            F[0] = A;
628
            F[4] = E;
629
            F[8] = T;
630
            T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
631
            E = F[3] + T;
632
            A = T + Sigma0(A) + Maj(A, F[1], F[2]);
633
        }
634
635
        for (; i < 80; i++, F--) {
636
            T = sigma0(F[8 + 16 - 1]);
637
            T += sigma1(F[8 + 16 - 14]);
638
            T += F[8 + 16] + F[8 + 16 - 9];
639
640
            F[0] = A;
641
            F[4] = E;
642
            F[8] = T;
643
            T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
644
            E = F[3] + T;
645
            A = T + Sigma0(A) + Maj(A, F[1], F[2]);
646
        }
647
648
        ctx->h[0] += A;
649
        ctx->h[1] += F[1];
650
        ctx->h[2] += F[2];
651
        ctx->h[3] += F[3];
652
        ctx->h[4] += E;
653
        ctx->h[5] += F[5];
654
        ctx->h[6] += F[6];
655
        ctx->h[7] += F[7];
656
657
        W += SHA_LBLOCK;
658
    }
659
}
660
661
# elif defined(OPENSSL_SMALL_FOOTPRINT)
662
663
static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
664
                                    size_t num)
665
{
666
    const SHA_LONG64 *W = in;
667
    SHA_LONG64 a, b, c, d, e, f, g, h, s0, s1, T1, T2;
668
    SHA_LONG64 X[16];
669
    int i;
670
671
    while (num--) {
672
673
        a = ctx->h[0];
674
        b = ctx->h[1];
675
        c = ctx->h[2];
676
        d = ctx->h[3];
677
        e = ctx->h[4];
678
        f = ctx->h[5];
679
        g = ctx->h[6];
680
        h = ctx->h[7];
681
682
        for (i = 0; i < 16; i++) {
683
#  ifdef B_ENDIAN
684
            T1 = X[i] = W[i];
685
#  else
686
            T1 = X[i] = PULL64(W[i]);
687
#  endif
688
            T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i];
689
            T2 = Sigma0(a) + Maj(a, b, c);
690
            h = g;
691
            g = f;
692
            f = e;
693
            e = d + T1;
694
            d = c;
695
            c = b;
696
            b = a;
697
            a = T1 + T2;
698
        }
699
700
        for (; i < 80; i++) {
701
            s0 = X[(i + 1) & 0x0f];
702
            s0 = sigma0(s0);
703
            s1 = X[(i + 14) & 0x0f];
704
            s1 = sigma1(s1);
705
706
            T1 = X[i & 0xf] += s0 + s1 + X[(i + 9) & 0xf];
707
            T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i];
708
            T2 = Sigma0(a) + Maj(a, b, c);
709
            h = g;
710
            g = f;
711
            f = e;
712
            e = d + T1;
713
            d = c;
714
            c = b;
715
            b = a;
716
            a = T1 + T2;
717
        }
718
719
        ctx->h[0] += a;
720
        ctx->h[1] += b;
721
        ctx->h[2] += c;
722
        ctx->h[3] += d;
723
        ctx->h[4] += e;
724
        ctx->h[5] += f;
725
        ctx->h[6] += g;
726
        ctx->h[7] += h;
727
728
        W += SHA_LBLOCK;
729
    }
730
}
731
732
# else
733
6.13M
#  define ROUND_00_15(i,a,b,c,d,e,f,g,h)        do {    \
734
6.13M
        T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];      \
735
6.13M
        h = Sigma0(a) + Maj(a,b,c);                     \
736
6.13M
        d += T1;        h += T1;                        } while (0)
737
738
4.90M
#  define ROUND_16_80(i,j,a,b,c,d,e,f,g,h,X)    do {    \
739
4.90M
        s0 = X[(j+1)&0x0f];     s0 = sigma0(s0);        \
740
4.90M
        s1 = X[(j+14)&0x0f];    s1 = sigma1(s1);        \
741
4.90M
        T1 = X[(j)&0x0f] += s0 + s1 + X[(j+9)&0x0f];    \
742
4.90M
        ROUND_00_15(i+j,a,b,c,d,e,f,g,h);               } while (0)
743
744
#ifdef INCLUDE_C_SHA512
745
void sha512_block_data_order_c(SHA512_CTX *ctx, const void *in, size_t num)
746
#else
747
static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
748
                                    size_t num)
749
#endif
750
18.8k
{
751
18.8k
    const SHA_LONG64 *W = in;
752
18.8k
    SHA_LONG64 a, b, c, d, e, f, g, h, s0, s1, T1;
753
18.8k
    SHA_LONG64 X[16];
754
18.8k
    int i;
755
756
95.5k
    while (num--) {
757
758
76.7k
        a = ctx->h[0];
759
76.7k
        b = ctx->h[1];
760
76.7k
        c = ctx->h[2];
761
76.7k
        d = ctx->h[3];
762
76.7k
        e = ctx->h[4];
763
76.7k
        f = ctx->h[5];
764
76.7k
        g = ctx->h[6];
765
76.7k
        h = ctx->h[7];
766
767
#  ifdef B_ENDIAN
768
        T1 = X[0] = W[0];
769
        ROUND_00_15(0, a, b, c, d, e, f, g, h);
770
        T1 = X[1] = W[1];
771
        ROUND_00_15(1, h, a, b, c, d, e, f, g);
772
        T1 = X[2] = W[2];
773
        ROUND_00_15(2, g, h, a, b, c, d, e, f);
774
        T1 = X[3] = W[3];
775
        ROUND_00_15(3, f, g, h, a, b, c, d, e);
776
        T1 = X[4] = W[4];
777
        ROUND_00_15(4, e, f, g, h, a, b, c, d);
778
        T1 = X[5] = W[5];
779
        ROUND_00_15(5, d, e, f, g, h, a, b, c);
780
        T1 = X[6] = W[6];
781
        ROUND_00_15(6, c, d, e, f, g, h, a, b);
782
        T1 = X[7] = W[7];
783
        ROUND_00_15(7, b, c, d, e, f, g, h, a);
784
        T1 = X[8] = W[8];
785
        ROUND_00_15(8, a, b, c, d, e, f, g, h);
786
        T1 = X[9] = W[9];
787
        ROUND_00_15(9, h, a, b, c, d, e, f, g);
788
        T1 = X[10] = W[10];
789
        ROUND_00_15(10, g, h, a, b, c, d, e, f);
790
        T1 = X[11] = W[11];
791
        ROUND_00_15(11, f, g, h, a, b, c, d, e);
792
        T1 = X[12] = W[12];
793
        ROUND_00_15(12, e, f, g, h, a, b, c, d);
794
        T1 = X[13] = W[13];
795
        ROUND_00_15(13, d, e, f, g, h, a, b, c);
796
        T1 = X[14] = W[14];
797
        ROUND_00_15(14, c, d, e, f, g, h, a, b);
798
        T1 = X[15] = W[15];
799
        ROUND_00_15(15, b, c, d, e, f, g, h, a);
800
#  else
801
76.7k
        T1 = X[0] = PULL64(W[0]);
802
76.7k
        ROUND_00_15(0, a, b, c, d, e, f, g, h);
803
76.7k
        T1 = X[1] = PULL64(W[1]);
804
76.7k
        ROUND_00_15(1, h, a, b, c, d, e, f, g);
805
76.7k
        T1 = X[2] = PULL64(W[2]);
806
76.7k
        ROUND_00_15(2, g, h, a, b, c, d, e, f);
807
76.7k
        T1 = X[3] = PULL64(W[3]);
808
76.7k
        ROUND_00_15(3, f, g, h, a, b, c, d, e);
809
76.7k
        T1 = X[4] = PULL64(W[4]);
810
76.7k
        ROUND_00_15(4, e, f, g, h, a, b, c, d);
811
76.7k
        T1 = X[5] = PULL64(W[5]);
812
76.7k
        ROUND_00_15(5, d, e, f, g, h, a, b, c);
813
76.7k
        T1 = X[6] = PULL64(W[6]);
814
76.7k
        ROUND_00_15(6, c, d, e, f, g, h, a, b);
815
76.7k
        T1 = X[7] = PULL64(W[7]);
816
76.7k
        ROUND_00_15(7, b, c, d, e, f, g, h, a);
817
76.7k
        T1 = X[8] = PULL64(W[8]);
818
76.7k
        ROUND_00_15(8, a, b, c, d, e, f, g, h);
819
76.7k
        T1 = X[9] = PULL64(W[9]);
820
76.7k
        ROUND_00_15(9, h, a, b, c, d, e, f, g);
821
76.7k
        T1 = X[10] = PULL64(W[10]);
822
76.7k
        ROUND_00_15(10, g, h, a, b, c, d, e, f);
823
76.7k
        T1 = X[11] = PULL64(W[11]);
824
76.7k
        ROUND_00_15(11, f, g, h, a, b, c, d, e);
825
76.7k
        T1 = X[12] = PULL64(W[12]);
826
76.7k
        ROUND_00_15(12, e, f, g, h, a, b, c, d);
827
76.7k
        T1 = X[13] = PULL64(W[13]);
828
76.7k
        ROUND_00_15(13, d, e, f, g, h, a, b, c);
829
76.7k
        T1 = X[14] = PULL64(W[14]);
830
76.7k
        ROUND_00_15(14, c, d, e, f, g, h, a, b);
831
76.7k
        T1 = X[15] = PULL64(W[15]);
832
76.7k
        ROUND_00_15(15, b, c, d, e, f, g, h, a);
833
76.7k
#  endif
834
835
383k
        for (i = 16; i < 80; i += 16) {
836
306k
            ROUND_16_80(i, 0, a, b, c, d, e, f, g, h, X);
837
306k
            ROUND_16_80(i, 1, h, a, b, c, d, e, f, g, X);
838
306k
            ROUND_16_80(i, 2, g, h, a, b, c, d, e, f, X);
839
306k
            ROUND_16_80(i, 3, f, g, h, a, b, c, d, e, X);
840
306k
            ROUND_16_80(i, 4, e, f, g, h, a, b, c, d, X);
841
306k
            ROUND_16_80(i, 5, d, e, f, g, h, a, b, c, X);
842
306k
            ROUND_16_80(i, 6, c, d, e, f, g, h, a, b, X);
843
306k
            ROUND_16_80(i, 7, b, c, d, e, f, g, h, a, X);
844
306k
            ROUND_16_80(i, 8, a, b, c, d, e, f, g, h, X);
845
306k
            ROUND_16_80(i, 9, h, a, b, c, d, e, f, g, X);
846
306k
            ROUND_16_80(i, 10, g, h, a, b, c, d, e, f, X);
847
306k
            ROUND_16_80(i, 11, f, g, h, a, b, c, d, e, X);
848
306k
            ROUND_16_80(i, 12, e, f, g, h, a, b, c, d, X);
849
306k
            ROUND_16_80(i, 13, d, e, f, g, h, a, b, c, X);
850
306k
            ROUND_16_80(i, 14, c, d, e, f, g, h, a, b, X);
851
306k
            ROUND_16_80(i, 15, b, c, d, e, f, g, h, a, X);
852
306k
        }
853
854
76.7k
        ctx->h[0] += a;
855
76.7k
        ctx->h[1] += b;
856
76.7k
        ctx->h[2] += c;
857
76.7k
        ctx->h[3] += d;
858
76.7k
        ctx->h[4] += e;
859
76.7k
        ctx->h[5] += f;
860
76.7k
        ctx->h[6] += g;
861
76.7k
        ctx->h[7] += h;
862
863
76.7k
        W += SHA_LBLOCK;
864
76.7k
    }
865
18.8k
}
866
867
# endif
868
869
#endif                         /* SHA512_ASM */