Coverage Report

Created: 2023-09-25 06:45

/src/openssl111/crypto/sha/sha512.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright 2004-2018 The OpenSSL Project Authors. All Rights Reserved.
3
 *
4
 * Licensed under the OpenSSL license (the "License").  You may not use
5
 * this file except in compliance with the License.  You can obtain a copy
6
 * in the file LICENSE in the source distribution or at
7
 * https://www.openssl.org/source/license.html
8
 */
9
10
#include <openssl/opensslconf.h>
11
/*-
12
 * IMPLEMENTATION NOTES.
13
 *
14
 * As you might have noticed 32-bit hash algorithms:
15
 *
16
 * - permit SHA_LONG to be wider than 32-bit
17
 * - optimized versions implement two transform functions: one operating
18
 *   on [aligned] data in host byte order and one - on data in input
19
 *   stream byte order;
20
 * - share common byte-order neutral collector and padding function
21
 *   implementations, ../md32_common.h;
22
 *
23
 * Neither of the above applies to this SHA-512 implementations. Reasons
24
 * [in reverse order] are:
25
 *
26
 * - it's the only 64-bit hash algorithm for the moment of this writing,
27
 *   there is no need for common collector/padding implementation [yet];
28
 * - by supporting only one transform function [which operates on
29
 *   *aligned* data in input stream byte order, big-endian in this case]
30
 *   we minimize burden of maintenance in two ways: a) collector/padding
31
 *   function is simpler; b) only one transform function to stare at;
32
 * - SHA_LONG64 is required to be exactly 64-bit in order to be able to
33
 *   apply a number of optimizations to mitigate potential performance
34
 *   penalties caused by previous design decision;
35
 *
36
 * Caveat lector.
37
 *
38
 * Implementation relies on the fact that "long long" is 64-bit on
39
 * both 32- and 64-bit platforms. If some compiler vendor comes up
40
 * with 128-bit long long, adjustment to sha.h would be required.
41
 * As this implementation relies on 64-bit integer type, it's totally
42
 * inappropriate for platforms which don't support it, most notably
43
 * 16-bit platforms.
44
 */
45
#include <stdlib.h>
46
#include <string.h>
47
48
#include <openssl/crypto.h>
49
#include <openssl/sha.h>
50
#include <openssl/opensslv.h>
51
52
#include "internal/cryptlib.h"
53
#include "crypto/sha.h"
54
55
#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
56
    defined(__x86_64) || defined(_M_AMD64) || defined(_M_X64) || \
57
    defined(__s390__) || defined(__s390x__) || \
58
    defined(__aarch64__) || \
59
    defined(SHA512_ASM)
60
# define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
61
#endif
62
63
int sha512_224_init(SHA512_CTX *c)
64
42.5k
{
65
42.5k
    c->h[0] = U64(0x8c3d37c819544da2);
66
42.5k
    c->h[1] = U64(0x73e1996689dcd4d6);
67
42.5k
    c->h[2] = U64(0x1dfab7ae32ff9c82);
68
42.5k
    c->h[3] = U64(0x679dd514582f9fcf);
69
42.5k
    c->h[4] = U64(0x0f6d2b697bd44da8);
70
42.5k
    c->h[5] = U64(0x77e36f7304c48942);
71
42.5k
    c->h[6] = U64(0x3f9d85a86a1d36c8);
72
42.5k
    c->h[7] = U64(0x1112e6ad91d692a1);
73
74
42.5k
    c->Nl = 0;
75
42.5k
    c->Nh = 0;
76
42.5k
    c->num = 0;
77
42.5k
    c->md_len = SHA224_DIGEST_LENGTH;
78
42.5k
    return 1;
79
42.5k
}
80
81
int sha512_256_init(SHA512_CTX *c)
82
31.1k
{
83
31.1k
    c->h[0] = U64(0x22312194fc2bf72c);
84
31.1k
    c->h[1] = U64(0x9f555fa3c84c64c2);
85
31.1k
    c->h[2] = U64(0x2393b86b6f53b151);
86
31.1k
    c->h[3] = U64(0x963877195940eabd);
87
31.1k
    c->h[4] = U64(0x96283ee2a88effe3);
88
31.1k
    c->h[5] = U64(0xbe5e1e2553863992);
89
31.1k
    c->h[6] = U64(0x2b0199fc2c85b8aa);
90
31.1k
    c->h[7] = U64(0x0eb72ddc81c52ca2);
91
92
31.1k
    c->Nl = 0;
93
31.1k
    c->Nh = 0;
94
31.1k
    c->num = 0;
95
31.1k
    c->md_len = SHA256_DIGEST_LENGTH;
96
31.1k
    return 1;
97
31.1k
}
98
99
int SHA384_Init(SHA512_CTX *c)
100
63.6k
{
101
63.6k
    c->h[0] = U64(0xcbbb9d5dc1059ed8);
102
63.6k
    c->h[1] = U64(0x629a292a367cd507);
103
63.6k
    c->h[2] = U64(0x9159015a3070dd17);
104
63.6k
    c->h[3] = U64(0x152fecd8f70e5939);
105
63.6k
    c->h[4] = U64(0x67332667ffc00b31);
106
63.6k
    c->h[5] = U64(0x8eb44a8768581511);
107
63.6k
    c->h[6] = U64(0xdb0c2e0d64f98fa7);
108
63.6k
    c->h[7] = U64(0x47b5481dbefa4fa4);
109
110
63.6k
    c->Nl = 0;
111
63.6k
    c->Nh = 0;
112
63.6k
    c->num = 0;
113
63.6k
    c->md_len = SHA384_DIGEST_LENGTH;
114
63.6k
    return 1;
115
63.6k
}
116
117
int SHA512_Init(SHA512_CTX *c)
118
49.1k
{
119
49.1k
    c->h[0] = U64(0x6a09e667f3bcc908);
120
49.1k
    c->h[1] = U64(0xbb67ae8584caa73b);
121
49.1k
    c->h[2] = U64(0x3c6ef372fe94f82b);
122
49.1k
    c->h[3] = U64(0xa54ff53a5f1d36f1);
123
49.1k
    c->h[4] = U64(0x510e527fade682d1);
124
49.1k
    c->h[5] = U64(0x9b05688c2b3e6c1f);
125
49.1k
    c->h[6] = U64(0x1f83d9abfb41bd6b);
126
49.1k
    c->h[7] = U64(0x5be0cd19137e2179);
127
128
49.1k
    c->Nl = 0;
129
49.1k
    c->Nh = 0;
130
49.1k
    c->num = 0;
131
49.1k
    c->md_len = SHA512_DIGEST_LENGTH;
132
49.1k
    return 1;
133
49.1k
}
134
135
#ifndef SHA512_ASM
136
static
137
#endif
138
void sha512_block_data_order(SHA512_CTX *ctx, const void *in, size_t num);
139
140
int SHA512_Final(unsigned char *md, SHA512_CTX *c)
141
176k
{
142
176k
    unsigned char *p = (unsigned char *)c->u.p;
143
176k
    size_t n = c->num;
144
145
176k
    p[n] = 0x80;                /* There always is a room for one */
146
176k
    n++;
147
176k
    if (n > (sizeof(c->u) - 16)) {
148
2.13k
        memset(p + n, 0, sizeof(c->u) - n);
149
2.13k
        n = 0;
150
2.13k
        sha512_block_data_order(c, p, 1);
151
2.13k
    }
152
153
176k
    memset(p + n, 0, sizeof(c->u) - 16 - n);
154
#ifdef  B_ENDIAN
155
    c->u.d[SHA_LBLOCK - 2] = c->Nh;
156
    c->u.d[SHA_LBLOCK - 1] = c->Nl;
157
#else
158
176k
    p[sizeof(c->u) - 1] = (unsigned char)(c->Nl);
159
176k
    p[sizeof(c->u) - 2] = (unsigned char)(c->Nl >> 8);
160
176k
    p[sizeof(c->u) - 3] = (unsigned char)(c->Nl >> 16);
161
176k
    p[sizeof(c->u) - 4] = (unsigned char)(c->Nl >> 24);
162
176k
    p[sizeof(c->u) - 5] = (unsigned char)(c->Nl >> 32);
163
176k
    p[sizeof(c->u) - 6] = (unsigned char)(c->Nl >> 40);
164
176k
    p[sizeof(c->u) - 7] = (unsigned char)(c->Nl >> 48);
165
176k
    p[sizeof(c->u) - 8] = (unsigned char)(c->Nl >> 56);
166
176k
    p[sizeof(c->u) - 9] = (unsigned char)(c->Nh);
167
176k
    p[sizeof(c->u) - 10] = (unsigned char)(c->Nh >> 8);
168
176k
    p[sizeof(c->u) - 11] = (unsigned char)(c->Nh >> 16);
169
176k
    p[sizeof(c->u) - 12] = (unsigned char)(c->Nh >> 24);
170
176k
    p[sizeof(c->u) - 13] = (unsigned char)(c->Nh >> 32);
171
176k
    p[sizeof(c->u) - 14] = (unsigned char)(c->Nh >> 40);
172
176k
    p[sizeof(c->u) - 15] = (unsigned char)(c->Nh >> 48);
173
176k
    p[sizeof(c->u) - 16] = (unsigned char)(c->Nh >> 56);
174
176k
#endif
175
176
176k
    sha512_block_data_order(c, p, 1);
177
178
176k
    if (md == 0)
179
0
        return 0;
180
181
176k
    switch (c->md_len) {
182
    /* Let compiler decide if it's appropriate to unroll... */
183
42.5k
    case SHA224_DIGEST_LENGTH:
184
170k
        for (n = 0; n < SHA224_DIGEST_LENGTH / 8; n++) {
185
127k
            SHA_LONG64 t = c->h[n];
186
187
127k
            *(md++) = (unsigned char)(t >> 56);
188
127k
            *(md++) = (unsigned char)(t >> 48);
189
127k
            *(md++) = (unsigned char)(t >> 40);
190
127k
            *(md++) = (unsigned char)(t >> 32);
191
127k
            *(md++) = (unsigned char)(t >> 24);
192
127k
            *(md++) = (unsigned char)(t >> 16);
193
127k
            *(md++) = (unsigned char)(t >> 8);
194
127k
            *(md++) = (unsigned char)(t);
195
127k
        }
196
        /*
197
         * For 224 bits, there are four bytes left over that have to be
198
         * processed separately.
199
         */
200
42.5k
        {
201
42.5k
            SHA_LONG64 t = c->h[SHA224_DIGEST_LENGTH / 8];
202
203
42.5k
            *(md++) = (unsigned char)(t >> 56);
204
42.5k
            *(md++) = (unsigned char)(t >> 48);
205
42.5k
            *(md++) = (unsigned char)(t >> 40);
206
42.5k
            *(md++) = (unsigned char)(t >> 32);
207
42.5k
        }
208
42.5k
        break;
209
31.1k
    case SHA256_DIGEST_LENGTH:
210
155k
        for (n = 0; n < SHA256_DIGEST_LENGTH / 8; n++) {
211
124k
            SHA_LONG64 t = c->h[n];
212
213
124k
            *(md++) = (unsigned char)(t >> 56);
214
124k
            *(md++) = (unsigned char)(t >> 48);
215
124k
            *(md++) = (unsigned char)(t >> 40);
216
124k
            *(md++) = (unsigned char)(t >> 32);
217
124k
            *(md++) = (unsigned char)(t >> 24);
218
124k
            *(md++) = (unsigned char)(t >> 16);
219
124k
            *(md++) = (unsigned char)(t >> 8);
220
124k
            *(md++) = (unsigned char)(t);
221
124k
        }
222
31.1k
        break;
223
66.7k
    case SHA384_DIGEST_LENGTH:
224
467k
        for (n = 0; n < SHA384_DIGEST_LENGTH / 8; n++) {
225
400k
            SHA_LONG64 t = c->h[n];
226
227
400k
            *(md++) = (unsigned char)(t >> 56);
228
400k
            *(md++) = (unsigned char)(t >> 48);
229
400k
            *(md++) = (unsigned char)(t >> 40);
230
400k
            *(md++) = (unsigned char)(t >> 32);
231
400k
            *(md++) = (unsigned char)(t >> 24);
232
400k
            *(md++) = (unsigned char)(t >> 16);
233
400k
            *(md++) = (unsigned char)(t >> 8);
234
400k
            *(md++) = (unsigned char)(t);
235
400k
        }
236
66.7k
        break;
237
36.2k
    case SHA512_DIGEST_LENGTH:
238
326k
        for (n = 0; n < SHA512_DIGEST_LENGTH / 8; n++) {
239
290k
            SHA_LONG64 t = c->h[n];
240
241
290k
            *(md++) = (unsigned char)(t >> 56);
242
290k
            *(md++) = (unsigned char)(t >> 48);
243
290k
            *(md++) = (unsigned char)(t >> 40);
244
290k
            *(md++) = (unsigned char)(t >> 32);
245
290k
            *(md++) = (unsigned char)(t >> 24);
246
290k
            *(md++) = (unsigned char)(t >> 16);
247
290k
            *(md++) = (unsigned char)(t >> 8);
248
290k
            *(md++) = (unsigned char)(t);
249
290k
        }
250
36.2k
        break;
251
    /* ... as well as make sure md_len is not abused. */
252
0
    default:
253
0
        return 0;
254
176k
    }
255
256
176k
    return 1;
257
176k
}
258
259
int SHA384_Final(unsigned char *md, SHA512_CTX *c)
260
66.7k
{
261
66.7k
    return SHA512_Final(md, c);
262
66.7k
}
263
264
int SHA512_Update(SHA512_CTX *c, const void *_data, size_t len)
265
202k
{
266
202k
    SHA_LONG64 l;
267
202k
    unsigned char *p = c->u.p;
268
202k
    const unsigned char *data = (const unsigned char *)_data;
269
270
202k
    if (len == 0)
271
0
        return 1;
272
273
202k
    l = (c->Nl + (((SHA_LONG64) len) << 3)) & U64(0xffffffffffffffff);
274
202k
    if (l < c->Nl)
275
0
        c->Nh++;
276
202k
    if (sizeof(len) >= 8)
277
202k
        c->Nh += (((SHA_LONG64) len) >> 61);
278
202k
    c->Nl = l;
279
280
202k
    if (c->num != 0) {
281
17.0k
        size_t n = sizeof(c->u) - c->num;
282
283
17.0k
        if (len < n) {
284
11.7k
            memcpy(p + c->num, data, len), c->num += (unsigned int)len;
285
11.7k
            return 1;
286
11.7k
        } else {
287
5.30k
            memcpy(p + c->num, data, n), c->num = 0;
288
5.30k
            len -= n, data += n;
289
5.30k
            sha512_block_data_order(c, p, 1);
290
5.30k
        }
291
17.0k
    }
292
293
191k
    if (len >= sizeof(c->u)) {
294
#ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
295
        if ((size_t)data % sizeof(c->u.d[0]) != 0)
296
            while (len >= sizeof(c->u))
297
                memcpy(p, data, sizeof(c->u)),
298
                sha512_block_data_order(c, p, 1),
299
                len -= sizeof(c->u), data += sizeof(c->u);
300
        else
301
#endif
302
13.2k
            sha512_block_data_order(c, data, len / sizeof(c->u)),
303
13.2k
            data += len, len %= sizeof(c->u), data -= len;
304
13.2k
    }
305
306
191k
    if (len != 0)
307
180k
        memcpy(p, data, len), c->num = (int)len;
308
309
191k
    return 1;
310
202k
}
311
312
int SHA384_Update(SHA512_CTX *c, const void *data, size_t len)
313
86.1k
{
314
86.1k
    return SHA512_Update(c, data, len);
315
86.1k
}
316
317
void SHA512_Transform(SHA512_CTX *c, const unsigned char *data)
318
920
{
319
#ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
320
    if ((size_t)data % sizeof(c->u.d[0]) != 0)
321
        memcpy(c->u.p, data, sizeof(c->u.p)), data = c->u.p;
322
#endif
323
920
    sha512_block_data_order(c, data, 1);
324
920
}
325
326
unsigned char *SHA384(const unsigned char *d, size_t n, unsigned char *md)
327
0
{
328
0
    SHA512_CTX c;
329
0
    static unsigned char m[SHA384_DIGEST_LENGTH];
330
331
0
    if (md == NULL)
332
0
        md = m;
333
0
    SHA384_Init(&c);
334
0
    SHA512_Update(&c, d, n);
335
0
    SHA512_Final(md, &c);
336
0
    OPENSSL_cleanse(&c, sizeof(c));
337
0
    return md;
338
0
}
339
340
unsigned char *SHA512(const unsigned char *d, size_t n, unsigned char *md)
341
1
{
342
1
    SHA512_CTX c;
343
1
    static unsigned char m[SHA512_DIGEST_LENGTH];
344
345
1
    if (md == NULL)
346
0
        md = m;
347
1
    SHA512_Init(&c);
348
1
    SHA512_Update(&c, d, n);
349
1
    SHA512_Final(md, &c);
350
1
    OPENSSL_cleanse(&c, sizeof(c));
351
1
    return md;
352
1
}
353
354
#ifndef SHA512_ASM
355
static const SHA_LONG64 K512[80] = {
356
    U64(0x428a2f98d728ae22), U64(0x7137449123ef65cd),
357
    U64(0xb5c0fbcfec4d3b2f), U64(0xe9b5dba58189dbbc),
358
    U64(0x3956c25bf348b538), U64(0x59f111f1b605d019),
359
    U64(0x923f82a4af194f9b), U64(0xab1c5ed5da6d8118),
360
    U64(0xd807aa98a3030242), U64(0x12835b0145706fbe),
361
    U64(0x243185be4ee4b28c), U64(0x550c7dc3d5ffb4e2),
362
    U64(0x72be5d74f27b896f), U64(0x80deb1fe3b1696b1),
363
    U64(0x9bdc06a725c71235), U64(0xc19bf174cf692694),
364
    U64(0xe49b69c19ef14ad2), U64(0xefbe4786384f25e3),
365
    U64(0x0fc19dc68b8cd5b5), U64(0x240ca1cc77ac9c65),
366
    U64(0x2de92c6f592b0275), U64(0x4a7484aa6ea6e483),
367
    U64(0x5cb0a9dcbd41fbd4), U64(0x76f988da831153b5),
368
    U64(0x983e5152ee66dfab), U64(0xa831c66d2db43210),
369
    U64(0xb00327c898fb213f), U64(0xbf597fc7beef0ee4),
370
    U64(0xc6e00bf33da88fc2), U64(0xd5a79147930aa725),
371
    U64(0x06ca6351e003826f), U64(0x142929670a0e6e70),
372
    U64(0x27b70a8546d22ffc), U64(0x2e1b21385c26c926),
373
    U64(0x4d2c6dfc5ac42aed), U64(0x53380d139d95b3df),
374
    U64(0x650a73548baf63de), U64(0x766a0abb3c77b2a8),
375
    U64(0x81c2c92e47edaee6), U64(0x92722c851482353b),
376
    U64(0xa2bfe8a14cf10364), U64(0xa81a664bbc423001),
377
    U64(0xc24b8b70d0f89791), U64(0xc76c51a30654be30),
378
    U64(0xd192e819d6ef5218), U64(0xd69906245565a910),
379
    U64(0xf40e35855771202a), U64(0x106aa07032bbd1b8),
380
    U64(0x19a4c116b8d2d0c8), U64(0x1e376c085141ab53),
381
    U64(0x2748774cdf8eeb99), U64(0x34b0bcb5e19b48a8),
382
    U64(0x391c0cb3c5c95a63), U64(0x4ed8aa4ae3418acb),
383
    U64(0x5b9cca4f7763e373), U64(0x682e6ff3d6b2b8a3),
384
    U64(0x748f82ee5defb2fc), U64(0x78a5636f43172f60),
385
    U64(0x84c87814a1f0ab72), U64(0x8cc702081a6439ec),
386
    U64(0x90befffa23631e28), U64(0xa4506cebde82bde9),
387
    U64(0xbef9a3f7b2c67915), U64(0xc67178f2e372532b),
388
    U64(0xca273eceea26619c), U64(0xd186b8c721c0c207),
389
    U64(0xeada7dd6cde0eb1e), U64(0xf57d4f7fee6ed178),
390
    U64(0x06f067aa72176fba), U64(0x0a637dc5a2c898a6),
391
    U64(0x113f9804bef90dae), U64(0x1b710b35131c471b),
392
    U64(0x28db77f523047d84), U64(0x32caab7b40c72493),
393
    U64(0x3c9ebe0a15c9bebc), U64(0x431d67c49c100d4c),
394
    U64(0x4cc5d4becb3e42b6), U64(0x597f299cfc657e2a),
395
    U64(0x5fcb6fab3ad6faec), U64(0x6c44198c4a475817)
396
};
397
398
# ifndef PEDANTIC
399
#  if defined(__GNUC__) && __GNUC__>=2 && \
400
      !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
401
#   if defined(__x86_64) || defined(__x86_64__)
402
#    define ROTR(a,n)    ({ SHA_LONG64 ret;             \
403
                                asm ("rorq %1,%0"       \
404
                                : "=r"(ret)             \
405
                                : "J"(n),"0"(a)         \
406
                                : "cc"); ret;           })
407
#    if !defined(B_ENDIAN)
408
#     define PULL64(x) ({ SHA_LONG64 ret=*((const SHA_LONG64 *)(&(x)));  \
409
                                asm ("bswapq    %0"             \
410
                                : "=r"(ret)                     \
411
                                : "0"(ret)); ret;               })
412
#    endif
413
#   elif (defined(__i386) || defined(__i386__)) && !defined(B_ENDIAN)
414
#    if defined(I386_ONLY)
415
#     define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
416
                          unsigned int hi=p[0],lo=p[1];          \
417
                                asm("xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
418
                                    "roll $16,%%eax; roll $16,%%edx; "\
419
                                    "xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
420
                                : "=a"(lo),"=d"(hi)             \
421
                                : "0"(lo),"1"(hi) : "cc");      \
422
                                ((SHA_LONG64)hi)<<32|lo;        })
423
#    else
424
#     define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
425
                          unsigned int hi=p[0],lo=p[1];         \
426
                                asm ("bswapl %0; bswapl %1;"    \
427
                                : "=r"(lo),"=r"(hi)             \
428
                                : "0"(lo),"1"(hi));             \
429
                                ((SHA_LONG64)hi)<<32|lo;        })
430
#    endif
431
#   elif (defined(_ARCH_PPC) && defined(__64BIT__)) || defined(_ARCH_PPC64)
432
#    define ROTR(a,n)    ({ SHA_LONG64 ret;             \
433
                                asm ("rotrdi %0,%1,%2"  \
434
                                : "=r"(ret)             \
435
                                : "r"(a),"K"(n)); ret;  })
436
#   elif defined(__aarch64__)
437
#    define ROTR(a,n)    ({ SHA_LONG64 ret;             \
438
                                asm ("ror %0,%1,%2"     \
439
                                : "=r"(ret)             \
440
                                : "r"(a),"I"(n)); ret;  })
441
#    if  defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && \
442
        __BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__
443
#     define PULL64(x)   ({ SHA_LONG64 ret;                     \
444
                                asm ("rev       %0,%1"          \
445
                                : "=r"(ret)                     \
446
                                : "r"(*((const SHA_LONG64 *)(&(x))))); ret; })
447
#    endif
448
#   endif
449
#  elif defined(_MSC_VER)
450
#   if defined(_WIN64)         /* applies to both IA-64 and AMD64 */
451
#    pragma intrinsic(_rotr64)
452
#    define ROTR(a,n)    _rotr64((a),n)
453
#   endif
454
#   if defined(_M_IX86) && !defined(OPENSSL_NO_ASM) && \
455
       !defined(OPENSSL_NO_INLINE_ASM)
456
#    if defined(I386_ONLY)
457
static SHA_LONG64 __fastcall __pull64be(const void *x)
458
{
459
    _asm mov  edx,[ecx + 0]
460
    _asm mov  eax,[ecx + 4]
461
    _asm xchg dh, dl
462
    _asm xchg ah, al
463
    _asm rol  edx, 16
464
    _asm rol  eax, 16
465
    _asm xchg dh, dl
466
    _asm xchg ah, al
467
}
468
#    else
469
static SHA_LONG64 __fastcall __pull64be(const void *x)
470
{
471
    _asm mov   edx,[ecx + 0]
472
    _asm mov   eax,[ecx + 4]
473
    _asm bswap edx
474
    _asm bswap eax
475
}
476
#    endif
477
#    define PULL64(x) __pull64be(&(x))
478
#   endif
479
#  endif
480
# endif
481
# ifndef PULL64
482
#  define B(x,j)    (((SHA_LONG64)(*(((const unsigned char *)(&x))+j)))<<((7-j)*8))
483
#  define PULL64(x) (B(x,0)|B(x,1)|B(x,2)|B(x,3)|B(x,4)|B(x,5)|B(x,6)|B(x,7))
484
# endif
485
# ifndef ROTR
486
#  define ROTR(x,s)       (((x)>>s) | (x)<<(64-s))
487
# endif
488
# define Sigma0(x)       (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
489
# define Sigma1(x)       (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
490
# define sigma0(x)       (ROTR((x),1)  ^ ROTR((x),8)  ^ ((x)>>7))
491
# define sigma1(x)       (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
492
# define Ch(x,y,z)       (((x) & (y)) ^ ((~(x)) & (z)))
493
# define Maj(x,y,z)      (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
494
495
# if defined(__i386) || defined(__i386__) || defined(_M_IX86)
496
/*
497
 * This code should give better results on 32-bit CPU with less than
498
 * ~24 registers, both size and performance wise...
499
 */
500
501
static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
502
                                    size_t num)
503
{
504
    const SHA_LONG64 *W = in;
505
    SHA_LONG64 A, E, T;
506
    SHA_LONG64 X[9 + 80], *F;
507
    int i;
508
509
    while (num--) {
510
511
        F = X + 80;
512
        A = ctx->h[0];
513
        F[1] = ctx->h[1];
514
        F[2] = ctx->h[2];
515
        F[3] = ctx->h[3];
516
        E = ctx->h[4];
517
        F[5] = ctx->h[5];
518
        F[6] = ctx->h[6];
519
        F[7] = ctx->h[7];
520
521
        for (i = 0; i < 16; i++, F--) {
522
#  ifdef B_ENDIAN
523
            T = W[i];
524
#  else
525
            T = PULL64(W[i]);
526
#  endif
527
            F[0] = A;
528
            F[4] = E;
529
            F[8] = T;
530
            T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
531
            E = F[3] + T;
532
            A = T + Sigma0(A) + Maj(A, F[1], F[2]);
533
        }
534
535
        for (; i < 80; i++, F--) {
536
            T = sigma0(F[8 + 16 - 1]);
537
            T += sigma1(F[8 + 16 - 14]);
538
            T += F[8 + 16] + F[8 + 16 - 9];
539
540
            F[0] = A;
541
            F[4] = E;
542
            F[8] = T;
543
            T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
544
            E = F[3] + T;
545
            A = T + Sigma0(A) + Maj(A, F[1], F[2]);
546
        }
547
548
        ctx->h[0] += A;
549
        ctx->h[1] += F[1];
550
        ctx->h[2] += F[2];
551
        ctx->h[3] += F[3];
552
        ctx->h[4] += E;
553
        ctx->h[5] += F[5];
554
        ctx->h[6] += F[6];
555
        ctx->h[7] += F[7];
556
557
        W += SHA_LBLOCK;
558
    }
559
}
560
561
# elif defined(OPENSSL_SMALL_FOOTPRINT)
562
563
static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
564
                                    size_t num)
565
{
566
    const SHA_LONG64 *W = in;
567
    SHA_LONG64 a, b, c, d, e, f, g, h, s0, s1, T1, T2;
568
    SHA_LONG64 X[16];
569
    int i;
570
571
    while (num--) {
572
573
        a = ctx->h[0];
574
        b = ctx->h[1];
575
        c = ctx->h[2];
576
        d = ctx->h[3];
577
        e = ctx->h[4];
578
        f = ctx->h[5];
579
        g = ctx->h[6];
580
        h = ctx->h[7];
581
582
        for (i = 0; i < 16; i++) {
583
#  ifdef B_ENDIAN
584
            T1 = X[i] = W[i];
585
#  else
586
            T1 = X[i] = PULL64(W[i]);
587
#  endif
588
            T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i];
589
            T2 = Sigma0(a) + Maj(a, b, c);
590
            h = g;
591
            g = f;
592
            f = e;
593
            e = d + T1;
594
            d = c;
595
            c = b;
596
            b = a;
597
            a = T1 + T2;
598
        }
599
600
        for (; i < 80; i++) {
601
            s0 = X[(i + 1) & 0x0f];
602
            s0 = sigma0(s0);
603
            s1 = X[(i + 14) & 0x0f];
604
            s1 = sigma1(s1);
605
606
            T1 = X[i & 0xf] += s0 + s1 + X[(i + 9) & 0xf];
607
            T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i];
608
            T2 = Sigma0(a) + Maj(a, b, c);
609
            h = g;
610
            g = f;
611
            f = e;
612
            e = d + T1;
613
            d = c;
614
            c = b;
615
            b = a;
616
            a = T1 + T2;
617
        }
618
619
        ctx->h[0] += a;
620
        ctx->h[1] += b;
621
        ctx->h[2] += c;
622
        ctx->h[3] += d;
623
        ctx->h[4] += e;
624
        ctx->h[5] += f;
625
        ctx->h[6] += g;
626
        ctx->h[7] += h;
627
628
        W += SHA_LBLOCK;
629
    }
630
}
631
632
# else
633
#  define ROUND_00_15(i,a,b,c,d,e,f,g,h)        do {    \
634
        T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];      \
635
        h = Sigma0(a) + Maj(a,b,c);                     \
636
        d += T1;        h += T1;                        } while (0)
637
638
#  define ROUND_16_80(i,j,a,b,c,d,e,f,g,h,X)    do {    \
639
        s0 = X[(j+1)&0x0f];     s0 = sigma0(s0);        \
640
        s1 = X[(j+14)&0x0f];    s1 = sigma1(s1);        \
641
        T1 = X[(j)&0x0f] += s0 + s1 + X[(j+9)&0x0f];    \
642
        ROUND_00_15(i+j,a,b,c,d,e,f,g,h);               } while (0)
643
644
static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
645
                                    size_t num)
646
{
647
    const SHA_LONG64 *W = in;
648
    SHA_LONG64 a, b, c, d, e, f, g, h, s0, s1, T1;
649
    SHA_LONG64 X[16];
650
    int i;
651
652
    while (num--) {
653
654
        a = ctx->h[0];
655
        b = ctx->h[1];
656
        c = ctx->h[2];
657
        d = ctx->h[3];
658
        e = ctx->h[4];
659
        f = ctx->h[5];
660
        g = ctx->h[6];
661
        h = ctx->h[7];
662
663
#  ifdef B_ENDIAN
664
        T1 = X[0] = W[0];
665
        ROUND_00_15(0, a, b, c, d, e, f, g, h);
666
        T1 = X[1] = W[1];
667
        ROUND_00_15(1, h, a, b, c, d, e, f, g);
668
        T1 = X[2] = W[2];
669
        ROUND_00_15(2, g, h, a, b, c, d, e, f);
670
        T1 = X[3] = W[3];
671
        ROUND_00_15(3, f, g, h, a, b, c, d, e);
672
        T1 = X[4] = W[4];
673
        ROUND_00_15(4, e, f, g, h, a, b, c, d);
674
        T1 = X[5] = W[5];
675
        ROUND_00_15(5, d, e, f, g, h, a, b, c);
676
        T1 = X[6] = W[6];
677
        ROUND_00_15(6, c, d, e, f, g, h, a, b);
678
        T1 = X[7] = W[7];
679
        ROUND_00_15(7, b, c, d, e, f, g, h, a);
680
        T1 = X[8] = W[8];
681
        ROUND_00_15(8, a, b, c, d, e, f, g, h);
682
        T1 = X[9] = W[9];
683
        ROUND_00_15(9, h, a, b, c, d, e, f, g);
684
        T1 = X[10] = W[10];
685
        ROUND_00_15(10, g, h, a, b, c, d, e, f);
686
        T1 = X[11] = W[11];
687
        ROUND_00_15(11, f, g, h, a, b, c, d, e);
688
        T1 = X[12] = W[12];
689
        ROUND_00_15(12, e, f, g, h, a, b, c, d);
690
        T1 = X[13] = W[13];
691
        ROUND_00_15(13, d, e, f, g, h, a, b, c);
692
        T1 = X[14] = W[14];
693
        ROUND_00_15(14, c, d, e, f, g, h, a, b);
694
        T1 = X[15] = W[15];
695
        ROUND_00_15(15, b, c, d, e, f, g, h, a);
696
#  else
697
        T1 = X[0] = PULL64(W[0]);
698
        ROUND_00_15(0, a, b, c, d, e, f, g, h);
699
        T1 = X[1] = PULL64(W[1]);
700
        ROUND_00_15(1, h, a, b, c, d, e, f, g);
701
        T1 = X[2] = PULL64(W[2]);
702
        ROUND_00_15(2, g, h, a, b, c, d, e, f);
703
        T1 = X[3] = PULL64(W[3]);
704
        ROUND_00_15(3, f, g, h, a, b, c, d, e);
705
        T1 = X[4] = PULL64(W[4]);
706
        ROUND_00_15(4, e, f, g, h, a, b, c, d);
707
        T1 = X[5] = PULL64(W[5]);
708
        ROUND_00_15(5, d, e, f, g, h, a, b, c);
709
        T1 = X[6] = PULL64(W[6]);
710
        ROUND_00_15(6, c, d, e, f, g, h, a, b);
711
        T1 = X[7] = PULL64(W[7]);
712
        ROUND_00_15(7, b, c, d, e, f, g, h, a);
713
        T1 = X[8] = PULL64(W[8]);
714
        ROUND_00_15(8, a, b, c, d, e, f, g, h);
715
        T1 = X[9] = PULL64(W[9]);
716
        ROUND_00_15(9, h, a, b, c, d, e, f, g);
717
        T1 = X[10] = PULL64(W[10]);
718
        ROUND_00_15(10, g, h, a, b, c, d, e, f);
719
        T1 = X[11] = PULL64(W[11]);
720
        ROUND_00_15(11, f, g, h, a, b, c, d, e);
721
        T1 = X[12] = PULL64(W[12]);
722
        ROUND_00_15(12, e, f, g, h, a, b, c, d);
723
        T1 = X[13] = PULL64(W[13]);
724
        ROUND_00_15(13, d, e, f, g, h, a, b, c);
725
        T1 = X[14] = PULL64(W[14]);
726
        ROUND_00_15(14, c, d, e, f, g, h, a, b);
727
        T1 = X[15] = PULL64(W[15]);
728
        ROUND_00_15(15, b, c, d, e, f, g, h, a);
729
#  endif
730
731
        for (i = 16; i < 80; i += 16) {
732
            ROUND_16_80(i, 0, a, b, c, d, e, f, g, h, X);
733
            ROUND_16_80(i, 1, h, a, b, c, d, e, f, g, X);
734
            ROUND_16_80(i, 2, g, h, a, b, c, d, e, f, X);
735
            ROUND_16_80(i, 3, f, g, h, a, b, c, d, e, X);
736
            ROUND_16_80(i, 4, e, f, g, h, a, b, c, d, X);
737
            ROUND_16_80(i, 5, d, e, f, g, h, a, b, c, X);
738
            ROUND_16_80(i, 6, c, d, e, f, g, h, a, b, X);
739
            ROUND_16_80(i, 7, b, c, d, e, f, g, h, a, X);
740
            ROUND_16_80(i, 8, a, b, c, d, e, f, g, h, X);
741
            ROUND_16_80(i, 9, h, a, b, c, d, e, f, g, X);
742
            ROUND_16_80(i, 10, g, h, a, b, c, d, e, f, X);
743
            ROUND_16_80(i, 11, f, g, h, a, b, c, d, e, X);
744
            ROUND_16_80(i, 12, e, f, g, h, a, b, c, d, X);
745
            ROUND_16_80(i, 13, d, e, f, g, h, a, b, c, X);
746
            ROUND_16_80(i, 14, c, d, e, f, g, h, a, b, X);
747
            ROUND_16_80(i, 15, b, c, d, e, f, g, h, a, X);
748
        }
749
750
        ctx->h[0] += a;
751
        ctx->h[1] += b;
752
        ctx->h[2] += c;
753
        ctx->h[3] += d;
754
        ctx->h[4] += e;
755
        ctx->h[5] += f;
756
        ctx->h[6] += g;
757
        ctx->h[7] += h;
758
759
        W += SHA_LBLOCK;
760
    }
761
}
762
763
# endif
764
765
#endif                         /* SHA512_ASM */