Coverage Report

Created: 2025-06-13 06:58

/src/openssl30/crypto/sha/sha512.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright 2004-2021 The OpenSSL Project Authors. All Rights Reserved.
3
 *
4
 * Licensed under the Apache License 2.0 (the "License").  You may not use
5
 * this file except in compliance with the License.  You can obtain a copy
6
 * in the file LICENSE in the source distribution or at
7
 * https://www.openssl.org/source/license.html
8
 */
9
10
/*
11
 * SHA512 low level APIs are deprecated for public use, but still ok for
12
 * internal use.
13
 */
14
#include "internal/deprecated.h"
15
16
#include <stdio.h>
17
#include <openssl/opensslconf.h>
18
/*-
19
 * IMPLEMENTATION NOTES.
20
 *
21
 * As you might have noticed 32-bit hash algorithms:
22
 *
23
 * - permit SHA_LONG to be wider than 32-bit
24
 * - optimized versions implement two transform functions: one operating
25
 *   on [aligned] data in host byte order and one - on data in input
26
 *   stream byte order;
27
 * - share common byte-order neutral collector and padding function
28
 *   implementations, ../md32_common.h;
29
 *
30
 * Neither of the above applies to this SHA-512 implementations. Reasons
31
 * [in reverse order] are:
32
 *
33
 * - it's the only 64-bit hash algorithm for the moment of this writing,
34
 *   there is no need for common collector/padding implementation [yet];
35
 * - by supporting only one transform function [which operates on
36
 *   *aligned* data in input stream byte order, big-endian in this case]
37
 *   we minimize burden of maintenance in two ways: a) collector/padding
38
 *   function is simpler; b) only one transform function to stare at;
39
 * - SHA_LONG64 is required to be exactly 64-bit in order to be able to
40
 *   apply a number of optimizations to mitigate potential performance
41
 *   penalties caused by previous design decision;
42
 *
43
 * Caveat lector.
44
 *
45
 * Implementation relies on the fact that "long long" is 64-bit on
46
 * both 32- and 64-bit platforms. If some compiler vendor comes up
47
 * with 128-bit long long, adjustment to sha.h would be required.
48
 * As this implementation relies on 64-bit integer type, it's totally
49
 * inappropriate for platforms which don't support it, most notably
50
 * 16-bit platforms.
51
 */
52
#include <stdlib.h>
53
#include <string.h>
54
55
#include <openssl/crypto.h>
56
#include <openssl/sha.h>
57
#include <openssl/opensslv.h>
58
59
#include "internal/cryptlib.h"
60
#include "crypto/sha.h"
61
62
#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
63
    defined(__x86_64) || defined(_M_AMD64) || defined(_M_X64) || \
64
    defined(__s390__) || defined(__s390x__) || \
65
    defined(__aarch64__) || \
66
    defined(SHA512_ASM)
67
# define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
68
#endif
69
70
#if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32__)
71
# define U64(C)     C##UI64
72
#elif defined(__arch64__)
73
# define U64(C)     C##UL
74
#else
75
193M
# define U64(C)     C##ULL
76
#endif
77
78
int sha512_224_init(SHA512_CTX *c)
79
71.3k
{
80
71.3k
    c->h[0] = U64(0x8c3d37c819544da2);
81
71.3k
    c->h[1] = U64(0x73e1996689dcd4d6);
82
71.3k
    c->h[2] = U64(0x1dfab7ae32ff9c82);
83
71.3k
    c->h[3] = U64(0x679dd514582f9fcf);
84
71.3k
    c->h[4] = U64(0x0f6d2b697bd44da8);
85
71.3k
    c->h[5] = U64(0x77e36f7304c48942);
86
71.3k
    c->h[6] = U64(0x3f9d85a86a1d36c8);
87
71.3k
    c->h[7] = U64(0x1112e6ad91d692a1);
88
89
71.3k
    c->Nl = 0;
90
71.3k
    c->Nh = 0;
91
71.3k
    c->num = 0;
92
71.3k
    c->md_len = SHA224_DIGEST_LENGTH;
93
71.3k
    return 1;
94
71.3k
}
95
96
int sha512_256_init(SHA512_CTX *c)
97
83.9k
{
98
83.9k
    c->h[0] = U64(0x22312194fc2bf72c);
99
83.9k
    c->h[1] = U64(0x9f555fa3c84c64c2);
100
83.9k
    c->h[2] = U64(0x2393b86b6f53b151);
101
83.9k
    c->h[3] = U64(0x963877195940eabd);
102
83.9k
    c->h[4] = U64(0x96283ee2a88effe3);
103
83.9k
    c->h[5] = U64(0xbe5e1e2553863992);
104
83.9k
    c->h[6] = U64(0x2b0199fc2c85b8aa);
105
83.9k
    c->h[7] = U64(0x0eb72ddc81c52ca2);
106
107
83.9k
    c->Nl = 0;
108
83.9k
    c->Nh = 0;
109
83.9k
    c->num = 0;
110
83.9k
    c->md_len = SHA256_DIGEST_LENGTH;
111
83.9k
    return 1;
112
83.9k
}
113
114
int SHA384_Init(SHA512_CTX *c)
115
1.10M
{
116
1.10M
    c->h[0] = U64(0xcbbb9d5dc1059ed8);
117
1.10M
    c->h[1] = U64(0x629a292a367cd507);
118
1.10M
    c->h[2] = U64(0x9159015a3070dd17);
119
1.10M
    c->h[3] = U64(0x152fecd8f70e5939);
120
1.10M
    c->h[4] = U64(0x67332667ffc00b31);
121
1.10M
    c->h[5] = U64(0x8eb44a8768581511);
122
1.10M
    c->h[6] = U64(0xdb0c2e0d64f98fa7);
123
1.10M
    c->h[7] = U64(0x47b5481dbefa4fa4);
124
125
1.10M
    c->Nl = 0;
126
1.10M
    c->Nh = 0;
127
1.10M
    c->num = 0;
128
1.10M
    c->md_len = SHA384_DIGEST_LENGTH;
129
1.10M
    return 1;
130
1.10M
}
131
132
int SHA512_Init(SHA512_CTX *c)
133
15.1M
{
134
15.1M
    c->h[0] = U64(0x6a09e667f3bcc908);
135
15.1M
    c->h[1] = U64(0xbb67ae8584caa73b);
136
15.1M
    c->h[2] = U64(0x3c6ef372fe94f82b);
137
15.1M
    c->h[3] = U64(0xa54ff53a5f1d36f1);
138
15.1M
    c->h[4] = U64(0x510e527fade682d1);
139
15.1M
    c->h[5] = U64(0x9b05688c2b3e6c1f);
140
15.1M
    c->h[6] = U64(0x1f83d9abfb41bd6b);
141
15.1M
    c->h[7] = U64(0x5be0cd19137e2179);
142
143
15.1M
    c->Nl = 0;
144
15.1M
    c->Nh = 0;
145
15.1M
    c->num = 0;
146
15.1M
    c->md_len = SHA512_DIGEST_LENGTH;
147
15.1M
    return 1;
148
15.1M
}
149
150
#ifndef SHA512_ASM
151
static
152
#endif
153
void sha512_block_data_order(SHA512_CTX *ctx, const void *in, size_t num);
154
155
int SHA512_Final(unsigned char *md, SHA512_CTX *c)
156
16.3M
{
157
16.3M
    unsigned char *p = (unsigned char *)c->u.p;
158
16.3M
    size_t n = c->num;
159
160
16.3M
    p[n] = 0x80;                /* There always is a room for one */
161
16.3M
    n++;
162
16.3M
    if (n > (sizeof(c->u) - 16)) {
163
93.8k
        memset(p + n, 0, sizeof(c->u) - n);
164
93.8k
        n = 0;
165
93.8k
        sha512_block_data_order(c, p, 1);
166
93.8k
    }
167
168
16.3M
    memset(p + n, 0, sizeof(c->u) - 16 - n);
169
#ifdef  B_ENDIAN
170
    c->u.d[SHA_LBLOCK - 2] = c->Nh;
171
    c->u.d[SHA_LBLOCK - 1] = c->Nl;
172
#else
173
16.3M
    p[sizeof(c->u) - 1] = (unsigned char)(c->Nl);
174
16.3M
    p[sizeof(c->u) - 2] = (unsigned char)(c->Nl >> 8);
175
16.3M
    p[sizeof(c->u) - 3] = (unsigned char)(c->Nl >> 16);
176
16.3M
    p[sizeof(c->u) - 4] = (unsigned char)(c->Nl >> 24);
177
16.3M
    p[sizeof(c->u) - 5] = (unsigned char)(c->Nl >> 32);
178
16.3M
    p[sizeof(c->u) - 6] = (unsigned char)(c->Nl >> 40);
179
16.3M
    p[sizeof(c->u) - 7] = (unsigned char)(c->Nl >> 48);
180
16.3M
    p[sizeof(c->u) - 8] = (unsigned char)(c->Nl >> 56);
181
16.3M
    p[sizeof(c->u) - 9] = (unsigned char)(c->Nh);
182
16.3M
    p[sizeof(c->u) - 10] = (unsigned char)(c->Nh >> 8);
183
16.3M
    p[sizeof(c->u) - 11] = (unsigned char)(c->Nh >> 16);
184
16.3M
    p[sizeof(c->u) - 12] = (unsigned char)(c->Nh >> 24);
185
16.3M
    p[sizeof(c->u) - 13] = (unsigned char)(c->Nh >> 32);
186
16.3M
    p[sizeof(c->u) - 14] = (unsigned char)(c->Nh >> 40);
187
16.3M
    p[sizeof(c->u) - 15] = (unsigned char)(c->Nh >> 48);
188
16.3M
    p[sizeof(c->u) - 16] = (unsigned char)(c->Nh >> 56);
189
16.3M
#endif
190
191
16.3M
    sha512_block_data_order(c, p, 1);
192
193
16.3M
    if (md == 0)
194
0
        return 0;
195
196
16.3M
    switch (c->md_len) {
197
    /* Let compiler decide if it's appropriate to unroll... */
198
71.3k
    case SHA224_DIGEST_LENGTH:
199
285k
        for (n = 0; n < SHA224_DIGEST_LENGTH / 8; n++) {
200
214k
            SHA_LONG64 t = c->h[n];
201
202
214k
            *(md++) = (unsigned char)(t >> 56);
203
214k
            *(md++) = (unsigned char)(t >> 48);
204
214k
            *(md++) = (unsigned char)(t >> 40);
205
214k
            *(md++) = (unsigned char)(t >> 32);
206
214k
            *(md++) = (unsigned char)(t >> 24);
207
214k
            *(md++) = (unsigned char)(t >> 16);
208
214k
            *(md++) = (unsigned char)(t >> 8);
209
214k
            *(md++) = (unsigned char)(t);
210
214k
        }
211
        /*
212
         * For 224 bits, there are four bytes left over that have to be
213
         * processed separately.
214
         */
215
71.3k
        {
216
71.3k
            SHA_LONG64 t = c->h[SHA224_DIGEST_LENGTH / 8];
217
218
71.3k
            *(md++) = (unsigned char)(t >> 56);
219
71.3k
            *(md++) = (unsigned char)(t >> 48);
220
71.3k
            *(md++) = (unsigned char)(t >> 40);
221
71.3k
            *(md++) = (unsigned char)(t >> 32);
222
71.3k
        }
223
71.3k
        break;
224
83.9k
    case SHA256_DIGEST_LENGTH:
225
419k
        for (n = 0; n < SHA256_DIGEST_LENGTH / 8; n++) {
226
335k
            SHA_LONG64 t = c->h[n];
227
228
335k
            *(md++) = (unsigned char)(t >> 56);
229
335k
            *(md++) = (unsigned char)(t >> 48);
230
335k
            *(md++) = (unsigned char)(t >> 40);
231
335k
            *(md++) = (unsigned char)(t >> 32);
232
335k
            *(md++) = (unsigned char)(t >> 24);
233
335k
            *(md++) = (unsigned char)(t >> 16);
234
335k
            *(md++) = (unsigned char)(t >> 8);
235
335k
            *(md++) = (unsigned char)(t);
236
335k
        }
237
83.9k
        break;
238
1.14M
    case SHA384_DIGEST_LENGTH:
239
8.03M
        for (n = 0; n < SHA384_DIGEST_LENGTH / 8; n++) {
240
6.88M
            SHA_LONG64 t = c->h[n];
241
242
6.88M
            *(md++) = (unsigned char)(t >> 56);
243
6.88M
            *(md++) = (unsigned char)(t >> 48);
244
6.88M
            *(md++) = (unsigned char)(t >> 40);
245
6.88M
            *(md++) = (unsigned char)(t >> 32);
246
6.88M
            *(md++) = (unsigned char)(t >> 24);
247
6.88M
            *(md++) = (unsigned char)(t >> 16);
248
6.88M
            *(md++) = (unsigned char)(t >> 8);
249
6.88M
            *(md++) = (unsigned char)(t);
250
6.88M
        }
251
1.14M
        break;
252
15.0M
    case SHA512_DIGEST_LENGTH:
253
135M
        for (n = 0; n < SHA512_DIGEST_LENGTH / 8; n++) {
254
120M
            SHA_LONG64 t = c->h[n];
255
256
120M
            *(md++) = (unsigned char)(t >> 56);
257
120M
            *(md++) = (unsigned char)(t >> 48);
258
120M
            *(md++) = (unsigned char)(t >> 40);
259
120M
            *(md++) = (unsigned char)(t >> 32);
260
120M
            *(md++) = (unsigned char)(t >> 24);
261
120M
            *(md++) = (unsigned char)(t >> 16);
262
120M
            *(md++) = (unsigned char)(t >> 8);
263
120M
            *(md++) = (unsigned char)(t);
264
120M
        }
265
15.0M
        break;
266
    /* ... as well as make sure md_len is not abused. */
267
0
    default:
268
0
        return 0;
269
16.3M
    }
270
271
16.3M
    return 1;
272
16.3M
}
273
274
int SHA384_Final(unsigned char *md, SHA512_CTX *c)
275
1.14M
{
276
1.14M
    return SHA512_Final(md, c);
277
1.14M
}
278
279
int SHA512_Update(SHA512_CTX *c, const void *_data, size_t len)
280
62.2M
{
281
62.2M
    SHA_LONG64 l;
282
62.2M
    unsigned char *p = c->u.p;
283
62.2M
    const unsigned char *data = (const unsigned char *)_data;
284
285
62.2M
    if (len == 0)
286
0
        return 1;
287
288
62.2M
    l = (c->Nl + (((SHA_LONG64) len) << 3)) & U64(0xffffffffffffffff);
289
62.2M
    if (l < c->Nl)
290
0
        c->Nh++;
291
62.2M
    if (sizeof(len) >= 8)
292
62.2M
        c->Nh += (((SHA_LONG64) len) >> 61);
293
62.2M
    c->Nl = l;
294
295
62.2M
    if (c->num != 0) {
296
30.2M
        size_t n = sizeof(c->u) - c->num;
297
298
30.2M
        if (len < n) {
299
15.1M
            memcpy(p + c->num, data, len), c->num += (unsigned int)len;
300
15.1M
            return 1;
301
15.1M
        } else {
302
15.1M
            memcpy(p + c->num, data, n), c->num = 0;
303
15.1M
            len -= n, data += n;
304
15.1M
            sha512_block_data_order(c, p, 1);
305
15.1M
        }
306
30.2M
    }
307
308
47.1M
    if (len >= sizeof(c->u)) {
309
#ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
310
        if ((size_t)data % sizeof(c->u.d[0]) != 0)
311
            while (len >= sizeof(c->u))
312
                memcpy(p, data, sizeof(c->u)),
313
                sha512_block_data_order(c, p, 1),
314
                len -= sizeof(c->u), data += sizeof(c->u);
315
        else
316
#endif
317
938k
            sha512_block_data_order(c, data, len / sizeof(c->u)),
318
938k
            data += len, len %= sizeof(c->u), data -= len;
319
938k
    }
320
321
47.1M
    if (len != 0)
322
31.4M
        memcpy(p, data, len), c->num = (int)len;
323
324
47.1M
    return 1;
325
62.2M
}
326
327
int SHA384_Update(SHA512_CTX *c, const void *data, size_t len)
328
2.22M
{
329
2.22M
    return SHA512_Update(c, data, len);
330
2.22M
}
331
332
void SHA512_Transform(SHA512_CTX *c, const unsigned char *data)
333
29.2k
{
334
#ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
335
    if ((size_t)data % sizeof(c->u.d[0]) != 0)
336
        memcpy(c->u.p, data, sizeof(c->u.p)), data = c->u.p;
337
#endif
338
29.2k
    sha512_block_data_order(c, data, 1);
339
29.2k
}
340
341
#ifndef SHA512_ASM
342
static const SHA_LONG64 K512[80] = {
343
    U64(0x428a2f98d728ae22), U64(0x7137449123ef65cd),
344
    U64(0xb5c0fbcfec4d3b2f), U64(0xe9b5dba58189dbbc),
345
    U64(0x3956c25bf348b538), U64(0x59f111f1b605d019),
346
    U64(0x923f82a4af194f9b), U64(0xab1c5ed5da6d8118),
347
    U64(0xd807aa98a3030242), U64(0x12835b0145706fbe),
348
    U64(0x243185be4ee4b28c), U64(0x550c7dc3d5ffb4e2),
349
    U64(0x72be5d74f27b896f), U64(0x80deb1fe3b1696b1),
350
    U64(0x9bdc06a725c71235), U64(0xc19bf174cf692694),
351
    U64(0xe49b69c19ef14ad2), U64(0xefbe4786384f25e3),
352
    U64(0x0fc19dc68b8cd5b5), U64(0x240ca1cc77ac9c65),
353
    U64(0x2de92c6f592b0275), U64(0x4a7484aa6ea6e483),
354
    U64(0x5cb0a9dcbd41fbd4), U64(0x76f988da831153b5),
355
    U64(0x983e5152ee66dfab), U64(0xa831c66d2db43210),
356
    U64(0xb00327c898fb213f), U64(0xbf597fc7beef0ee4),
357
    U64(0xc6e00bf33da88fc2), U64(0xd5a79147930aa725),
358
    U64(0x06ca6351e003826f), U64(0x142929670a0e6e70),
359
    U64(0x27b70a8546d22ffc), U64(0x2e1b21385c26c926),
360
    U64(0x4d2c6dfc5ac42aed), U64(0x53380d139d95b3df),
361
    U64(0x650a73548baf63de), U64(0x766a0abb3c77b2a8),
362
    U64(0x81c2c92e47edaee6), U64(0x92722c851482353b),
363
    U64(0xa2bfe8a14cf10364), U64(0xa81a664bbc423001),
364
    U64(0xc24b8b70d0f89791), U64(0xc76c51a30654be30),
365
    U64(0xd192e819d6ef5218), U64(0xd69906245565a910),
366
    U64(0xf40e35855771202a), U64(0x106aa07032bbd1b8),
367
    U64(0x19a4c116b8d2d0c8), U64(0x1e376c085141ab53),
368
    U64(0x2748774cdf8eeb99), U64(0x34b0bcb5e19b48a8),
369
    U64(0x391c0cb3c5c95a63), U64(0x4ed8aa4ae3418acb),
370
    U64(0x5b9cca4f7763e373), U64(0x682e6ff3d6b2b8a3),
371
    U64(0x748f82ee5defb2fc), U64(0x78a5636f43172f60),
372
    U64(0x84c87814a1f0ab72), U64(0x8cc702081a6439ec),
373
    U64(0x90befffa23631e28), U64(0xa4506cebde82bde9),
374
    U64(0xbef9a3f7b2c67915), U64(0xc67178f2e372532b),
375
    U64(0xca273eceea26619c), U64(0xd186b8c721c0c207),
376
    U64(0xeada7dd6cde0eb1e), U64(0xf57d4f7fee6ed178),
377
    U64(0x06f067aa72176fba), U64(0x0a637dc5a2c898a6),
378
    U64(0x113f9804bef90dae), U64(0x1b710b35131c471b),
379
    U64(0x28db77f523047d84), U64(0x32caab7b40c72493),
380
    U64(0x3c9ebe0a15c9bebc), U64(0x431d67c49c100d4c),
381
    U64(0x4cc5d4becb3e42b6), U64(0x597f299cfc657e2a),
382
    U64(0x5fcb6fab3ad6faec), U64(0x6c44198c4a475817)
383
};
384
385
# ifndef PEDANTIC
386
#  if defined(__GNUC__) && __GNUC__>=2 && \
387
      !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
388
#   if defined(__x86_64) || defined(__x86_64__)
389
#    define ROTR(a,n)    ({ SHA_LONG64 ret;             \
390
                                asm ("rorq %1,%0"       \
391
                                : "=r"(ret)             \
392
                                : "J"(n),"0"(a)         \
393
                                : "cc"); ret;           })
394
#    if !defined(B_ENDIAN)
395
#     define PULL64(x) ({ SHA_LONG64 ret=*((const SHA_LONG64 *)(&(x)));  \
396
                                asm ("bswapq    %0"             \
397
                                : "=r"(ret)                     \
398
                                : "0"(ret)); ret;               })
399
#    endif
400
#   elif (defined(__i386) || defined(__i386__)) && !defined(B_ENDIAN)
401
#    if defined(I386_ONLY)
402
#     define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
403
                          unsigned int hi=p[0],lo=p[1];          \
404
                                asm("xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
405
                                    "roll $16,%%eax; roll $16,%%edx; "\
406
                                    "xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
407
                                : "=a"(lo),"=d"(hi)             \
408
                                : "0"(lo),"1"(hi) : "cc");      \
409
                                ((SHA_LONG64)hi)<<32|lo;        })
410
#    else
411
#     define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
412
                          unsigned int hi=p[0],lo=p[1];         \
413
                                asm ("bswapl %0; bswapl %1;"    \
414
                                : "=r"(lo),"=r"(hi)             \
415
                                : "0"(lo),"1"(hi));             \
416
                                ((SHA_LONG64)hi)<<32|lo;        })
417
#    endif
418
#   elif (defined(_ARCH_PPC) && defined(__64BIT__)) || defined(_ARCH_PPC64)
419
#    define ROTR(a,n)    ({ SHA_LONG64 ret;             \
420
                                asm ("rotrdi %0,%1,%2"  \
421
                                : "=r"(ret)             \
422
                                : "r"(a),"K"(n)); ret;  })
423
#   elif defined(__aarch64__)
424
#    define ROTR(a,n)    ({ SHA_LONG64 ret;             \
425
                                asm ("ror %0,%1,%2"     \
426
                                : "=r"(ret)             \
427
                                : "r"(a),"I"(n)); ret;  })
428
#    if  defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && \
429
        __BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__
430
#     define PULL64(x)   ({ SHA_LONG64 ret;                     \
431
                                asm ("rev       %0,%1"          \
432
                                : "=r"(ret)                     \
433
                                : "r"(*((const SHA_LONG64 *)(&(x))))); ret; })
434
#    endif
435
#   endif
436
#  elif defined(_MSC_VER)
437
#   if defined(_WIN64)         /* applies to both IA-64 and AMD64 */
438
#    pragma intrinsic(_rotr64)
439
#    define ROTR(a,n)    _rotr64((a),n)
440
#   endif
441
#   if defined(_M_IX86) && !defined(OPENSSL_NO_ASM) && \
442
       !defined(OPENSSL_NO_INLINE_ASM)
443
#    if defined(I386_ONLY)
444
static SHA_LONG64 __fastcall __pull64be(const void *x)
445
{
446
    _asm mov  edx,[ecx + 0]
447
    _asm mov  eax,[ecx + 4]
448
    _asm xchg dh, dl
449
    _asm xchg ah, al
450
    _asm rol  edx, 16
451
    _asm rol  eax, 16
452
    _asm xchg dh, dl
453
    _asm xchg ah, al
454
}
455
#    else
456
static SHA_LONG64 __fastcall __pull64be(const void *x)
457
{
458
    _asm mov   edx,[ecx + 0]
459
    _asm mov   eax,[ecx + 4]
460
    _asm bswap edx
461
    _asm bswap eax
462
}
463
#    endif
464
#    define PULL64(x) __pull64be(&(x))
465
#   endif
466
#  endif
467
# endif
468
# ifndef PULL64
469
#  define B(x,j)    (((SHA_LONG64)(*(((const unsigned char *)(&x))+j)))<<((7-j)*8))
470
#  define PULL64(x) (B(x,0)|B(x,1)|B(x,2)|B(x,3)|B(x,4)|B(x,5)|B(x,6)|B(x,7))
471
# endif
472
# ifndef ROTR
473
#  define ROTR(x,s)       (((x)>>s) | (x)<<(64-s))
474
# endif
475
# define Sigma0(x)       (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
476
# define Sigma1(x)       (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
477
# define sigma0(x)       (ROTR((x),1)  ^ ROTR((x),8)  ^ ((x)>>7))
478
# define sigma1(x)       (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
479
# define Ch(x,y,z)       (((x) & (y)) ^ ((~(x)) & (z)))
480
# define Maj(x,y,z)      (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
481
482
# if defined(__i386) || defined(__i386__) || defined(_M_IX86)
483
/*
484
 * This code should give better results on 32-bit CPU with less than
485
 * ~24 registers, both size and performance wise...
486
 */
487
488
static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
489
                                    size_t num)
490
{
491
    const SHA_LONG64 *W = in;
492
    SHA_LONG64 A, E, T;
493
    SHA_LONG64 X[9 + 80], *F;
494
    int i;
495
496
    while (num--) {
497
498
        F = X + 80;
499
        A = ctx->h[0];
500
        F[1] = ctx->h[1];
501
        F[2] = ctx->h[2];
502
        F[3] = ctx->h[3];
503
        E = ctx->h[4];
504
        F[5] = ctx->h[5];
505
        F[6] = ctx->h[6];
506
        F[7] = ctx->h[7];
507
508
        for (i = 0; i < 16; i++, F--) {
509
#  ifdef B_ENDIAN
510
            T = W[i];
511
#  else
512
            T = PULL64(W[i]);
513
#  endif
514
            F[0] = A;
515
            F[4] = E;
516
            F[8] = T;
517
            T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
518
            E = F[3] + T;
519
            A = T + Sigma0(A) + Maj(A, F[1], F[2]);
520
        }
521
522
        for (; i < 80; i++, F--) {
523
            T = sigma0(F[8 + 16 - 1]);
524
            T += sigma1(F[8 + 16 - 14]);
525
            T += F[8 + 16] + F[8 + 16 - 9];
526
527
            F[0] = A;
528
            F[4] = E;
529
            F[8] = T;
530
            T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
531
            E = F[3] + T;
532
            A = T + Sigma0(A) + Maj(A, F[1], F[2]);
533
        }
534
535
        ctx->h[0] += A;
536
        ctx->h[1] += F[1];
537
        ctx->h[2] += F[2];
538
        ctx->h[3] += F[3];
539
        ctx->h[4] += E;
540
        ctx->h[5] += F[5];
541
        ctx->h[6] += F[6];
542
        ctx->h[7] += F[7];
543
544
        W += SHA_LBLOCK;
545
    }
546
}
547
548
# elif defined(OPENSSL_SMALL_FOOTPRINT)
549
550
static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
551
                                    size_t num)
552
{
553
    const SHA_LONG64 *W = in;
554
    SHA_LONG64 a, b, c, d, e, f, g, h, s0, s1, T1, T2;
555
    SHA_LONG64 X[16];
556
    int i;
557
558
    while (num--) {
559
560
        a = ctx->h[0];
561
        b = ctx->h[1];
562
        c = ctx->h[2];
563
        d = ctx->h[3];
564
        e = ctx->h[4];
565
        f = ctx->h[5];
566
        g = ctx->h[6];
567
        h = ctx->h[7];
568
569
        for (i = 0; i < 16; i++) {
570
#  ifdef B_ENDIAN
571
            T1 = X[i] = W[i];
572
#  else
573
            T1 = X[i] = PULL64(W[i]);
574
#  endif
575
            T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i];
576
            T2 = Sigma0(a) + Maj(a, b, c);
577
            h = g;
578
            g = f;
579
            f = e;
580
            e = d + T1;
581
            d = c;
582
            c = b;
583
            b = a;
584
            a = T1 + T2;
585
        }
586
587
        for (; i < 80; i++) {
588
            s0 = X[(i + 1) & 0x0f];
589
            s0 = sigma0(s0);
590
            s1 = X[(i + 14) & 0x0f];
591
            s1 = sigma1(s1);
592
593
            T1 = X[i & 0xf] += s0 + s1 + X[(i + 9) & 0xf];
594
            T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i];
595
            T2 = Sigma0(a) + Maj(a, b, c);
596
            h = g;
597
            g = f;
598
            f = e;
599
            e = d + T1;
600
            d = c;
601
            c = b;
602
            b = a;
603
            a = T1 + T2;
604
        }
605
606
        ctx->h[0] += a;
607
        ctx->h[1] += b;
608
        ctx->h[2] += c;
609
        ctx->h[3] += d;
610
        ctx->h[4] += e;
611
        ctx->h[5] += f;
612
        ctx->h[6] += g;
613
        ctx->h[7] += h;
614
615
        W += SHA_LBLOCK;
616
    }
617
}
618
619
# else
620
#  define ROUND_00_15(i,a,b,c,d,e,f,g,h)        do {    \
621
        T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];      \
622
        h = Sigma0(a) + Maj(a,b,c);                     \
623
        d += T1;        h += T1;                        } while (0)
624
625
#  define ROUND_16_80(i,j,a,b,c,d,e,f,g,h,X)    do {    \
626
        s0 = X[(j+1)&0x0f];     s0 = sigma0(s0);        \
627
        s1 = X[(j+14)&0x0f];    s1 = sigma1(s1);        \
628
        T1 = X[(j)&0x0f] += s0 + s1 + X[(j+9)&0x0f];    \
629
        ROUND_00_15(i+j,a,b,c,d,e,f,g,h);               } while (0)
630
631
static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
632
                                    size_t num)
633
{
634
    const SHA_LONG64 *W = in;
635
    SHA_LONG64 a, b, c, d, e, f, g, h, s0, s1, T1;
636
    SHA_LONG64 X[16];
637
    int i;
638
639
    while (num--) {
640
641
        a = ctx->h[0];
642
        b = ctx->h[1];
643
        c = ctx->h[2];
644
        d = ctx->h[3];
645
        e = ctx->h[4];
646
        f = ctx->h[5];
647
        g = ctx->h[6];
648
        h = ctx->h[7];
649
650
#  ifdef B_ENDIAN
651
        T1 = X[0] = W[0];
652
        ROUND_00_15(0, a, b, c, d, e, f, g, h);
653
        T1 = X[1] = W[1];
654
        ROUND_00_15(1, h, a, b, c, d, e, f, g);
655
        T1 = X[2] = W[2];
656
        ROUND_00_15(2, g, h, a, b, c, d, e, f);
657
        T1 = X[3] = W[3];
658
        ROUND_00_15(3, f, g, h, a, b, c, d, e);
659
        T1 = X[4] = W[4];
660
        ROUND_00_15(4, e, f, g, h, a, b, c, d);
661
        T1 = X[5] = W[5];
662
        ROUND_00_15(5, d, e, f, g, h, a, b, c);
663
        T1 = X[6] = W[6];
664
        ROUND_00_15(6, c, d, e, f, g, h, a, b);
665
        T1 = X[7] = W[7];
666
        ROUND_00_15(7, b, c, d, e, f, g, h, a);
667
        T1 = X[8] = W[8];
668
        ROUND_00_15(8, a, b, c, d, e, f, g, h);
669
        T1 = X[9] = W[9];
670
        ROUND_00_15(9, h, a, b, c, d, e, f, g);
671
        T1 = X[10] = W[10];
672
        ROUND_00_15(10, g, h, a, b, c, d, e, f);
673
        T1 = X[11] = W[11];
674
        ROUND_00_15(11, f, g, h, a, b, c, d, e);
675
        T1 = X[12] = W[12];
676
        ROUND_00_15(12, e, f, g, h, a, b, c, d);
677
        T1 = X[13] = W[13];
678
        ROUND_00_15(13, d, e, f, g, h, a, b, c);
679
        T1 = X[14] = W[14];
680
        ROUND_00_15(14, c, d, e, f, g, h, a, b);
681
        T1 = X[15] = W[15];
682
        ROUND_00_15(15, b, c, d, e, f, g, h, a);
683
#  else
684
        T1 = X[0] = PULL64(W[0]);
685
        ROUND_00_15(0, a, b, c, d, e, f, g, h);
686
        T1 = X[1] = PULL64(W[1]);
687
        ROUND_00_15(1, h, a, b, c, d, e, f, g);
688
        T1 = X[2] = PULL64(W[2]);
689
        ROUND_00_15(2, g, h, a, b, c, d, e, f);
690
        T1 = X[3] = PULL64(W[3]);
691
        ROUND_00_15(3, f, g, h, a, b, c, d, e);
692
        T1 = X[4] = PULL64(W[4]);
693
        ROUND_00_15(4, e, f, g, h, a, b, c, d);
694
        T1 = X[5] = PULL64(W[5]);
695
        ROUND_00_15(5, d, e, f, g, h, a, b, c);
696
        T1 = X[6] = PULL64(W[6]);
697
        ROUND_00_15(6, c, d, e, f, g, h, a, b);
698
        T1 = X[7] = PULL64(W[7]);
699
        ROUND_00_15(7, b, c, d, e, f, g, h, a);
700
        T1 = X[8] = PULL64(W[8]);
701
        ROUND_00_15(8, a, b, c, d, e, f, g, h);
702
        T1 = X[9] = PULL64(W[9]);
703
        ROUND_00_15(9, h, a, b, c, d, e, f, g);
704
        T1 = X[10] = PULL64(W[10]);
705
        ROUND_00_15(10, g, h, a, b, c, d, e, f);
706
        T1 = X[11] = PULL64(W[11]);
707
        ROUND_00_15(11, f, g, h, a, b, c, d, e);
708
        T1 = X[12] = PULL64(W[12]);
709
        ROUND_00_15(12, e, f, g, h, a, b, c, d);
710
        T1 = X[13] = PULL64(W[13]);
711
        ROUND_00_15(13, d, e, f, g, h, a, b, c);
712
        T1 = X[14] = PULL64(W[14]);
713
        ROUND_00_15(14, c, d, e, f, g, h, a, b);
714
        T1 = X[15] = PULL64(W[15]);
715
        ROUND_00_15(15, b, c, d, e, f, g, h, a);
716
#  endif
717
718
        for (i = 16; i < 80; i += 16) {
719
            ROUND_16_80(i, 0, a, b, c, d, e, f, g, h, X);
720
            ROUND_16_80(i, 1, h, a, b, c, d, e, f, g, X);
721
            ROUND_16_80(i, 2, g, h, a, b, c, d, e, f, X);
722
            ROUND_16_80(i, 3, f, g, h, a, b, c, d, e, X);
723
            ROUND_16_80(i, 4, e, f, g, h, a, b, c, d, X);
724
            ROUND_16_80(i, 5, d, e, f, g, h, a, b, c, X);
725
            ROUND_16_80(i, 6, c, d, e, f, g, h, a, b, X);
726
            ROUND_16_80(i, 7, b, c, d, e, f, g, h, a, X);
727
            ROUND_16_80(i, 8, a, b, c, d, e, f, g, h, X);
728
            ROUND_16_80(i, 9, h, a, b, c, d, e, f, g, X);
729
            ROUND_16_80(i, 10, g, h, a, b, c, d, e, f, X);
730
            ROUND_16_80(i, 11, f, g, h, a, b, c, d, e, X);
731
            ROUND_16_80(i, 12, e, f, g, h, a, b, c, d, X);
732
            ROUND_16_80(i, 13, d, e, f, g, h, a, b, c, X);
733
            ROUND_16_80(i, 14, c, d, e, f, g, h, a, b, X);
734
            ROUND_16_80(i, 15, b, c, d, e, f, g, h, a, X);
735
        }
736
737
        ctx->h[0] += a;
738
        ctx->h[1] += b;
739
        ctx->h[2] += c;
740
        ctx->h[3] += d;
741
        ctx->h[4] += e;
742
        ctx->h[5] += f;
743
        ctx->h[6] += g;
744
        ctx->h[7] += h;
745
746
        W += SHA_LBLOCK;
747
    }
748
}
749
750
# endif
751
752
#endif                         /* SHA512_ASM */