Coverage Report

Created: 2024-01-20 12:36

/src/openssl/crypto/sha/sha512.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright 2004-2018 The OpenSSL Project Authors. All Rights Reserved.
3
 *
4
 * Licensed under the Apache License 2.0 (the "License").  You may not use
5
 * this file except in compliance with the License.  You can obtain a copy
6
 * in the file LICENSE in the source distribution or at
7
 * https://www.openssl.org/source/license.html
8
 */
9
10
/*
11
 * SHA512 low level APIs are deprecated for public use, but still ok for
12
 * internal use.
13
 */
14
#include "internal/deprecated.h"
15
16
#include <stdio.h>
17
#include <openssl/opensslconf.h>
18
/*-
19
 * IMPLEMENTATION NOTES.
20
 *
21
 * As you might have noticed 32-bit hash algorithms:
22
 *
23
 * - permit SHA_LONG to be wider than 32-bit
24
 * - optimized versions implement two transform functions: one operating
25
 *   on [aligned] data in host byte order and one - on data in input
26
 *   stream byte order;
27
 * - share common byte-order neutral collector and padding function
28
 *   implementations, ../md32_common.h;
29
 *
30
 * Neither of the above applies to this SHA-512 implementations. Reasons
31
 * [in reverse order] are:
32
 *
33
 * - it's the only 64-bit hash algorithm for the moment of this writing,
34
 *   there is no need for common collector/padding implementation [yet];
35
 * - by supporting only one transform function [which operates on
36
 *   *aligned* data in input stream byte order, big-endian in this case]
37
 *   we minimize burden of maintenance in two ways: a) collector/padding
38
 *   function is simpler; b) only one transform function to stare at;
39
 * - SHA_LONG64 is required to be exactly 64-bit in order to be able to
40
 *   apply a number of optimizations to mitigate potential performance
41
 *   penalties caused by previous design decision;
42
 *
43
 * Caveat lector.
44
 *
45
 * Implementation relies on the fact that "long long" is 64-bit on
46
 * both 32- and 64-bit platforms. If some compiler vendor comes up
47
 * with 128-bit long long, adjustment to sha.h would be required.
48
 * As this implementation relies on 64-bit integer type, it's totally
49
 * inappropriate for platforms which don't support it, most notably
50
 * 16-bit platforms.
51
 */
52
#include <stdlib.h>
53
#include <string.h>
54
55
#include <openssl/crypto.h>
56
#include <openssl/sha.h>
57
#include <openssl/opensslv.h>
58
59
#include "internal/cryptlib.h"
60
#include "crypto/sha.h"
61
62
#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
63
    defined(__x86_64) || defined(_M_AMD64) || defined(_M_X64) || \
64
    defined(__s390__) || defined(__s390x__) || \
65
    defined(__aarch64__) || \
66
    defined(SHA512_ASM)
67
# define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
68
#endif
69
70
#if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32__)
71
# define U64(C)     C##UI64
72
#elif defined(__arch64__)
73
# define U64(C)     C##UL
74
#else
75
0
# define U64(C)     C##ULL
76
#endif
77
78
int sha512_224_init(SHA512_CTX *c)
79
0
{
80
0
    c->h[0] = U64(0x8c3d37c819544da2);
81
0
    c->h[1] = U64(0x73e1996689dcd4d6);
82
0
    c->h[2] = U64(0x1dfab7ae32ff9c82);
83
0
    c->h[3] = U64(0x679dd514582f9fcf);
84
0
    c->h[4] = U64(0x0f6d2b697bd44da8);
85
0
    c->h[5] = U64(0x77e36f7304c48942);
86
0
    c->h[6] = U64(0x3f9d85a86a1d36c8);
87
0
    c->h[7] = U64(0x1112e6ad91d692a1);
88
89
0
    c->Nl = 0;
90
0
    c->Nh = 0;
91
0
    c->num = 0;
92
0
    c->md_len = SHA224_DIGEST_LENGTH;
93
0
    return 1;
94
0
}
95
96
int sha512_256_init(SHA512_CTX *c)
97
0
{
98
0
    c->h[0] = U64(0x22312194fc2bf72c);
99
0
    c->h[1] = U64(0x9f555fa3c84c64c2);
100
0
    c->h[2] = U64(0x2393b86b6f53b151);
101
0
    c->h[3] = U64(0x963877195940eabd);
102
0
    c->h[4] = U64(0x96283ee2a88effe3);
103
0
    c->h[5] = U64(0xbe5e1e2553863992);
104
0
    c->h[6] = U64(0x2b0199fc2c85b8aa);
105
0
    c->h[7] = U64(0x0eb72ddc81c52ca2);
106
107
0
    c->Nl = 0;
108
0
    c->Nh = 0;
109
0
    c->num = 0;
110
0
    c->md_len = SHA256_DIGEST_LENGTH;
111
0
    return 1;
112
0
}
113
114
int SHA384_Init(SHA512_CTX *c)
115
0
{
116
0
    c->h[0] = U64(0xcbbb9d5dc1059ed8);
117
0
    c->h[1] = U64(0x629a292a367cd507);
118
0
    c->h[2] = U64(0x9159015a3070dd17);
119
0
    c->h[3] = U64(0x152fecd8f70e5939);
120
0
    c->h[4] = U64(0x67332667ffc00b31);
121
0
    c->h[5] = U64(0x8eb44a8768581511);
122
0
    c->h[6] = U64(0xdb0c2e0d64f98fa7);
123
0
    c->h[7] = U64(0x47b5481dbefa4fa4);
124
125
0
    c->Nl = 0;
126
0
    c->Nh = 0;
127
0
    c->num = 0;
128
0
    c->md_len = SHA384_DIGEST_LENGTH;
129
0
    return 1;
130
0
}
131
132
int SHA512_Init(SHA512_CTX *c)
133
0
{
134
0
    c->h[0] = U64(0x6a09e667f3bcc908);
135
0
    c->h[1] = U64(0xbb67ae8584caa73b);
136
0
    c->h[2] = U64(0x3c6ef372fe94f82b);
137
0
    c->h[3] = U64(0xa54ff53a5f1d36f1);
138
0
    c->h[4] = U64(0x510e527fade682d1);
139
0
    c->h[5] = U64(0x9b05688c2b3e6c1f);
140
0
    c->h[6] = U64(0x1f83d9abfb41bd6b);
141
0
    c->h[7] = U64(0x5be0cd19137e2179);
142
143
0
    c->Nl = 0;
144
0
    c->Nh = 0;
145
0
    c->num = 0;
146
0
    c->md_len = SHA512_DIGEST_LENGTH;
147
0
    return 1;
148
0
}
149
150
#ifndef SHA512_ASM
151
static
152
#endif
153
void sha512_block_data_order(SHA512_CTX *ctx, const void *in, size_t num);
154
155
int SHA512_Final(unsigned char *md, SHA512_CTX *c)
156
0
{
157
0
    unsigned char *p = (unsigned char *)c->u.p;
158
0
    size_t n = c->num;
159
160
0
    p[n] = 0x80;                /* There always is a room for one */
161
0
    n++;
162
0
    if (n > (sizeof(c->u) - 16)) {
163
0
        memset(p + n, 0, sizeof(c->u) - n);
164
0
        n = 0;
165
0
        sha512_block_data_order(c, p, 1);
166
0
    }
167
168
0
    memset(p + n, 0, sizeof(c->u) - 16 - n);
169
#ifdef  B_ENDIAN
170
    c->u.d[SHA_LBLOCK - 2] = c->Nh;
171
    c->u.d[SHA_LBLOCK - 1] = c->Nl;
172
#else
173
0
    p[sizeof(c->u) - 1] = (unsigned char)(c->Nl);
174
0
    p[sizeof(c->u) - 2] = (unsigned char)(c->Nl >> 8);
175
0
    p[sizeof(c->u) - 3] = (unsigned char)(c->Nl >> 16);
176
0
    p[sizeof(c->u) - 4] = (unsigned char)(c->Nl >> 24);
177
0
    p[sizeof(c->u) - 5] = (unsigned char)(c->Nl >> 32);
178
0
    p[sizeof(c->u) - 6] = (unsigned char)(c->Nl >> 40);
179
0
    p[sizeof(c->u) - 7] = (unsigned char)(c->Nl >> 48);
180
0
    p[sizeof(c->u) - 8] = (unsigned char)(c->Nl >> 56);
181
0
    p[sizeof(c->u) - 9] = (unsigned char)(c->Nh);
182
0
    p[sizeof(c->u) - 10] = (unsigned char)(c->Nh >> 8);
183
0
    p[sizeof(c->u) - 11] = (unsigned char)(c->Nh >> 16);
184
0
    p[sizeof(c->u) - 12] = (unsigned char)(c->Nh >> 24);
185
0
    p[sizeof(c->u) - 13] = (unsigned char)(c->Nh >> 32);
186
0
    p[sizeof(c->u) - 14] = (unsigned char)(c->Nh >> 40);
187
0
    p[sizeof(c->u) - 15] = (unsigned char)(c->Nh >> 48);
188
0
    p[sizeof(c->u) - 16] = (unsigned char)(c->Nh >> 56);
189
0
#endif
190
191
0
    sha512_block_data_order(c, p, 1);
192
193
0
    if (md == 0)
194
0
        return 0;
195
196
0
    switch (c->md_len) {
197
    /* Let compiler decide if it's appropriate to unroll... */
198
0
    case SHA224_DIGEST_LENGTH:
199
0
        for (n = 0; n < SHA224_DIGEST_LENGTH / 8; n++) {
200
0
            SHA_LONG64 t = c->h[n];
201
202
0
            *(md++) = (unsigned char)(t >> 56);
203
0
            *(md++) = (unsigned char)(t >> 48);
204
0
            *(md++) = (unsigned char)(t >> 40);
205
0
            *(md++) = (unsigned char)(t >> 32);
206
0
            *(md++) = (unsigned char)(t >> 24);
207
0
            *(md++) = (unsigned char)(t >> 16);
208
0
            *(md++) = (unsigned char)(t >> 8);
209
0
            *(md++) = (unsigned char)(t);
210
0
        }
211
        /*
212
         * For 224 bits, there are four bytes left over that have to be
213
         * processed separately.
214
         */
215
0
        {
216
0
            SHA_LONG64 t = c->h[SHA224_DIGEST_LENGTH / 8];
217
218
0
            *(md++) = (unsigned char)(t >> 56);
219
0
            *(md++) = (unsigned char)(t >> 48);
220
0
            *(md++) = (unsigned char)(t >> 40);
221
0
            *(md++) = (unsigned char)(t >> 32);
222
0
        }
223
0
        break;
224
0
    case SHA256_DIGEST_LENGTH:
225
0
        for (n = 0; n < SHA256_DIGEST_LENGTH / 8; n++) {
226
0
            SHA_LONG64 t = c->h[n];
227
228
0
            *(md++) = (unsigned char)(t >> 56);
229
0
            *(md++) = (unsigned char)(t >> 48);
230
0
            *(md++) = (unsigned char)(t >> 40);
231
0
            *(md++) = (unsigned char)(t >> 32);
232
0
            *(md++) = (unsigned char)(t >> 24);
233
0
            *(md++) = (unsigned char)(t >> 16);
234
0
            *(md++) = (unsigned char)(t >> 8);
235
0
            *(md++) = (unsigned char)(t);
236
0
        }
237
0
        break;
238
0
    case SHA384_DIGEST_LENGTH:
239
0
        for (n = 0; n < SHA384_DIGEST_LENGTH / 8; n++) {
240
0
            SHA_LONG64 t = c->h[n];
241
242
0
            *(md++) = (unsigned char)(t >> 56);
243
0
            *(md++) = (unsigned char)(t >> 48);
244
0
            *(md++) = (unsigned char)(t >> 40);
245
0
            *(md++) = (unsigned char)(t >> 32);
246
0
            *(md++) = (unsigned char)(t >> 24);
247
0
            *(md++) = (unsigned char)(t >> 16);
248
0
            *(md++) = (unsigned char)(t >> 8);
249
0
            *(md++) = (unsigned char)(t);
250
0
        }
251
0
        break;
252
0
    case SHA512_DIGEST_LENGTH:
253
0
        for (n = 0; n < SHA512_DIGEST_LENGTH / 8; n++) {
254
0
            SHA_LONG64 t = c->h[n];
255
256
0
            *(md++) = (unsigned char)(t >> 56);
257
0
            *(md++) = (unsigned char)(t >> 48);
258
0
            *(md++) = (unsigned char)(t >> 40);
259
0
            *(md++) = (unsigned char)(t >> 32);
260
0
            *(md++) = (unsigned char)(t >> 24);
261
0
            *(md++) = (unsigned char)(t >> 16);
262
0
            *(md++) = (unsigned char)(t >> 8);
263
0
            *(md++) = (unsigned char)(t);
264
0
        }
265
0
        break;
266
    /* ... as well as make sure md_len is not abused. */
267
0
    default:
268
0
        return 0;
269
0
    }
270
271
0
    return 1;
272
0
}
273
274
int SHA384_Final(unsigned char *md, SHA512_CTX *c)
275
0
{
276
0
    return SHA512_Final(md, c);
277
0
}
278
279
int SHA512_Update(SHA512_CTX *c, const void *_data, size_t len)
280
0
{
281
0
    SHA_LONG64 l;
282
0
    unsigned char *p = c->u.p;
283
0
    const unsigned char *data = (const unsigned char *)_data;
284
285
0
    if (len == 0)
286
0
        return 1;
287
288
0
    l = (c->Nl + (((SHA_LONG64) len) << 3)) & U64(0xffffffffffffffff);
289
0
    if (l < c->Nl)
290
0
        c->Nh++;
291
0
    if (sizeof(len) >= 8)
292
0
        c->Nh += (((SHA_LONG64) len) >> 61);
293
0
    c->Nl = l;
294
295
0
    if (c->num != 0) {
296
0
        size_t n = sizeof(c->u) - c->num;
297
298
0
        if (len < n) {
299
0
            memcpy(p + c->num, data, len), c->num += (unsigned int)len;
300
0
            return 1;
301
0
        } else {
302
0
            memcpy(p + c->num, data, n), c->num = 0;
303
0
            len -= n, data += n;
304
0
            sha512_block_data_order(c, p, 1);
305
0
        }
306
0
    }
307
308
0
    if (len >= sizeof(c->u)) {
309
#ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
310
        if ((size_t)data % sizeof(c->u.d[0]) != 0)
311
            while (len >= sizeof(c->u))
312
                memcpy(p, data, sizeof(c->u)),
313
                sha512_block_data_order(c, p, 1),
314
                len -= sizeof(c->u), data += sizeof(c->u);
315
        else
316
#endif
317
0
            sha512_block_data_order(c, data, len / sizeof(c->u)),
318
0
            data += len, len %= sizeof(c->u), data -= len;
319
0
    }
320
321
0
    if (len != 0)
322
0
        memcpy(p, data, len), c->num = (int)len;
323
324
0
    return 1;
325
0
}
326
327
int SHA384_Update(SHA512_CTX *c, const void *data, size_t len)
328
0
{
329
0
    return SHA512_Update(c, data, len);
330
0
}
331
332
void SHA512_Transform(SHA512_CTX *c, const unsigned char *data)
333
0
{
334
#ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
335
    if ((size_t)data % sizeof(c->u.d[0]) != 0)
336
        memcpy(c->u.p, data, sizeof(c->u.p)), data = c->u.p;
337
#endif
338
0
    sha512_block_data_order(c, data, 1);
339
0
}
340
341
unsigned char *SHA384(const unsigned char *d, size_t n, unsigned char *md)
342
0
{
343
0
    SHA512_CTX c;
344
0
    static unsigned char m[SHA384_DIGEST_LENGTH];
345
346
0
    if (md == NULL)
347
0
        md = m;
348
0
    SHA384_Init(&c);
349
0
    SHA512_Update(&c, d, n);
350
0
    SHA512_Final(md, &c);
351
0
    OPENSSL_cleanse(&c, sizeof(c));
352
0
    return md;
353
0
}
354
355
unsigned char *SHA512(const unsigned char *d, size_t n, unsigned char *md)
356
0
{
357
0
    SHA512_CTX c;
358
0
    static unsigned char m[SHA512_DIGEST_LENGTH];
359
360
0
    if (md == NULL)
361
0
        md = m;
362
0
    SHA512_Init(&c);
363
0
    SHA512_Update(&c, d, n);
364
0
    SHA512_Final(md, &c);
365
0
    OPENSSL_cleanse(&c, sizeof(c));
366
0
    return md;
367
0
}
368
369
#ifndef SHA512_ASM
370
static const SHA_LONG64 K512[80] = {
371
    U64(0x428a2f98d728ae22), U64(0x7137449123ef65cd),
372
    U64(0xb5c0fbcfec4d3b2f), U64(0xe9b5dba58189dbbc),
373
    U64(0x3956c25bf348b538), U64(0x59f111f1b605d019),
374
    U64(0x923f82a4af194f9b), U64(0xab1c5ed5da6d8118),
375
    U64(0xd807aa98a3030242), U64(0x12835b0145706fbe),
376
    U64(0x243185be4ee4b28c), U64(0x550c7dc3d5ffb4e2),
377
    U64(0x72be5d74f27b896f), U64(0x80deb1fe3b1696b1),
378
    U64(0x9bdc06a725c71235), U64(0xc19bf174cf692694),
379
    U64(0xe49b69c19ef14ad2), U64(0xefbe4786384f25e3),
380
    U64(0x0fc19dc68b8cd5b5), U64(0x240ca1cc77ac9c65),
381
    U64(0x2de92c6f592b0275), U64(0x4a7484aa6ea6e483),
382
    U64(0x5cb0a9dcbd41fbd4), U64(0x76f988da831153b5),
383
    U64(0x983e5152ee66dfab), U64(0xa831c66d2db43210),
384
    U64(0xb00327c898fb213f), U64(0xbf597fc7beef0ee4),
385
    U64(0xc6e00bf33da88fc2), U64(0xd5a79147930aa725),
386
    U64(0x06ca6351e003826f), U64(0x142929670a0e6e70),
387
    U64(0x27b70a8546d22ffc), U64(0x2e1b21385c26c926),
388
    U64(0x4d2c6dfc5ac42aed), U64(0x53380d139d95b3df),
389
    U64(0x650a73548baf63de), U64(0x766a0abb3c77b2a8),
390
    U64(0x81c2c92e47edaee6), U64(0x92722c851482353b),
391
    U64(0xa2bfe8a14cf10364), U64(0xa81a664bbc423001),
392
    U64(0xc24b8b70d0f89791), U64(0xc76c51a30654be30),
393
    U64(0xd192e819d6ef5218), U64(0xd69906245565a910),
394
    U64(0xf40e35855771202a), U64(0x106aa07032bbd1b8),
395
    U64(0x19a4c116b8d2d0c8), U64(0x1e376c085141ab53),
396
    U64(0x2748774cdf8eeb99), U64(0x34b0bcb5e19b48a8),
397
    U64(0x391c0cb3c5c95a63), U64(0x4ed8aa4ae3418acb),
398
    U64(0x5b9cca4f7763e373), U64(0x682e6ff3d6b2b8a3),
399
    U64(0x748f82ee5defb2fc), U64(0x78a5636f43172f60),
400
    U64(0x84c87814a1f0ab72), U64(0x8cc702081a6439ec),
401
    U64(0x90befffa23631e28), U64(0xa4506cebde82bde9),
402
    U64(0xbef9a3f7b2c67915), U64(0xc67178f2e372532b),
403
    U64(0xca273eceea26619c), U64(0xd186b8c721c0c207),
404
    U64(0xeada7dd6cde0eb1e), U64(0xf57d4f7fee6ed178),
405
    U64(0x06f067aa72176fba), U64(0x0a637dc5a2c898a6),
406
    U64(0x113f9804bef90dae), U64(0x1b710b35131c471b),
407
    U64(0x28db77f523047d84), U64(0x32caab7b40c72493),
408
    U64(0x3c9ebe0a15c9bebc), U64(0x431d67c49c100d4c),
409
    U64(0x4cc5d4becb3e42b6), U64(0x597f299cfc657e2a),
410
    U64(0x5fcb6fab3ad6faec), U64(0x6c44198c4a475817)
411
};
412
413
# ifndef PEDANTIC
414
#  if defined(__GNUC__) && __GNUC__>=2 && \
415
      !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
416
#   if defined(__x86_64) || defined(__x86_64__)
417
#    define ROTR(a,n)    ({ SHA_LONG64 ret;             \
418
                                asm ("rorq %1,%0"       \
419
                                : "=r"(ret)             \
420
                                : "J"(n),"0"(a)         \
421
                                : "cc"); ret;           })
422
#    if !defined(B_ENDIAN)
423
#     define PULL64(x) ({ SHA_LONG64 ret=*((const SHA_LONG64 *)(&(x)));  \
424
                                asm ("bswapq    %0"             \
425
                                : "=r"(ret)                     \
426
                                : "0"(ret)); ret;               })
427
#    endif
428
#   elif (defined(__i386) || defined(__i386__)) && !defined(B_ENDIAN)
429
#    if defined(I386_ONLY)
430
#     define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
431
                          unsigned int hi=p[0],lo=p[1];          \
432
                                asm("xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
433
                                    "roll $16,%%eax; roll $16,%%edx; "\
434
                                    "xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
435
                                : "=a"(lo),"=d"(hi)             \
436
                                : "0"(lo),"1"(hi) : "cc");      \
437
                                ((SHA_LONG64)hi)<<32|lo;        })
438
#    else
439
#     define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
440
                          unsigned int hi=p[0],lo=p[1];         \
441
                                asm ("bswapl %0; bswapl %1;"    \
442
                                : "=r"(lo),"=r"(hi)             \
443
                                : "0"(lo),"1"(hi));             \
444
                                ((SHA_LONG64)hi)<<32|lo;        })
445
#    endif
446
#   elif (defined(_ARCH_PPC) && defined(__64BIT__)) || defined(_ARCH_PPC64)
447
#    define ROTR(a,n)    ({ SHA_LONG64 ret;             \
448
                                asm ("rotrdi %0,%1,%2"  \
449
                                : "=r"(ret)             \
450
                                : "r"(a),"K"(n)); ret;  })
451
#   elif defined(__aarch64__)
452
#    define ROTR(a,n)    ({ SHA_LONG64 ret;             \
453
                                asm ("ror %0,%1,%2"     \
454
                                : "=r"(ret)             \
455
                                : "r"(a),"I"(n)); ret;  })
456
#    if  defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && \
457
        __BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__
458
#     define PULL64(x)   ({ SHA_LONG64 ret;                     \
459
                                asm ("rev       %0,%1"          \
460
                                : "=r"(ret)                     \
461
                                : "r"(*((const SHA_LONG64 *)(&(x))))); ret; })
462
#    endif
463
#   endif
464
#  elif defined(_MSC_VER)
465
#   if defined(_WIN64)         /* applies to both IA-64 and AMD64 */
466
#    pragma intrinsic(_rotr64)
467
#    define ROTR(a,n)    _rotr64((a),n)
468
#   endif
469
#   if defined(_M_IX86) && !defined(OPENSSL_NO_ASM) && \
470
       !defined(OPENSSL_NO_INLINE_ASM)
471
#    if defined(I386_ONLY)
472
static SHA_LONG64 __fastcall __pull64be(const void *x)
473
{
474
    _asm mov  edx,[ecx + 0]
475
    _asm mov  eax,[ecx + 4]
476
    _asm xchg dh, dl
477
    _asm xchg ah, al
478
    _asm rol  edx, 16
479
    _asm rol  eax, 16
480
    _asm xchg dh, dl
481
    _asm xchg ah, al
482
}
483
#    else
484
static SHA_LONG64 __fastcall __pull64be(const void *x)
485
{
486
    _asm mov   edx,[ecx + 0]
487
    _asm mov   eax,[ecx + 4]
488
    _asm bswap edx
489
    _asm bswap eax
490
}
491
#    endif
492
#    define PULL64(x) __pull64be(&(x))
493
#   endif
494
#  endif
495
# endif
496
# ifndef PULL64
497
#  define B(x,j)    (((SHA_LONG64)(*(((const unsigned char *)(&x))+j)))<<((7-j)*8))
498
#  define PULL64(x) (B(x,0)|B(x,1)|B(x,2)|B(x,3)|B(x,4)|B(x,5)|B(x,6)|B(x,7))
499
# endif
500
# ifndef ROTR
501
#  define ROTR(x,s)       (((x)>>s) | (x)<<(64-s))
502
# endif
503
# define Sigma0(x)       (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
504
# define Sigma1(x)       (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
505
# define sigma0(x)       (ROTR((x),1)  ^ ROTR((x),8)  ^ ((x)>>7))
506
# define sigma1(x)       (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
507
# define Ch(x,y,z)       (((x) & (y)) ^ ((~(x)) & (z)))
508
# define Maj(x,y,z)      (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
509
510
# if defined(__i386) || defined(__i386__) || defined(_M_IX86)
511
/*
512
 * This code should give better results on 32-bit CPU with less than
513
 * ~24 registers, both size and performance wise...
514
 */
515
516
static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
517
                                    size_t num)
518
{
519
    const SHA_LONG64 *W = in;
520
    SHA_LONG64 A, E, T;
521
    SHA_LONG64 X[9 + 80], *F;
522
    int i;
523
524
    while (num--) {
525
526
        F = X + 80;
527
        A = ctx->h[0];
528
        F[1] = ctx->h[1];
529
        F[2] = ctx->h[2];
530
        F[3] = ctx->h[3];
531
        E = ctx->h[4];
532
        F[5] = ctx->h[5];
533
        F[6] = ctx->h[6];
534
        F[7] = ctx->h[7];
535
536
        for (i = 0; i < 16; i++, F--) {
537
#  ifdef B_ENDIAN
538
            T = W[i];
539
#  else
540
            T = PULL64(W[i]);
541
#  endif
542
            F[0] = A;
543
            F[4] = E;
544
            F[8] = T;
545
            T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
546
            E = F[3] + T;
547
            A = T + Sigma0(A) + Maj(A, F[1], F[2]);
548
        }
549
550
        for (; i < 80; i++, F--) {
551
            T = sigma0(F[8 + 16 - 1]);
552
            T += sigma1(F[8 + 16 - 14]);
553
            T += F[8 + 16] + F[8 + 16 - 9];
554
555
            F[0] = A;
556
            F[4] = E;
557
            F[8] = T;
558
            T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
559
            E = F[3] + T;
560
            A = T + Sigma0(A) + Maj(A, F[1], F[2]);
561
        }
562
563
        ctx->h[0] += A;
564
        ctx->h[1] += F[1];
565
        ctx->h[2] += F[2];
566
        ctx->h[3] += F[3];
567
        ctx->h[4] += E;
568
        ctx->h[5] += F[5];
569
        ctx->h[6] += F[6];
570
        ctx->h[7] += F[7];
571
572
        W += SHA_LBLOCK;
573
    }
574
}
575
576
# elif defined(OPENSSL_SMALL_FOOTPRINT)
577
578
static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
579
                                    size_t num)
580
{
581
    const SHA_LONG64 *W = in;
582
    SHA_LONG64 a, b, c, d, e, f, g, h, s0, s1, T1, T2;
583
    SHA_LONG64 X[16];
584
    int i;
585
586
    while (num--) {
587
588
        a = ctx->h[0];
589
        b = ctx->h[1];
590
        c = ctx->h[2];
591
        d = ctx->h[3];
592
        e = ctx->h[4];
593
        f = ctx->h[5];
594
        g = ctx->h[6];
595
        h = ctx->h[7];
596
597
        for (i = 0; i < 16; i++) {
598
#  ifdef B_ENDIAN
599
            T1 = X[i] = W[i];
600
#  else
601
            T1 = X[i] = PULL64(W[i]);
602
#  endif
603
            T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i];
604
            T2 = Sigma0(a) + Maj(a, b, c);
605
            h = g;
606
            g = f;
607
            f = e;
608
            e = d + T1;
609
            d = c;
610
            c = b;
611
            b = a;
612
            a = T1 + T2;
613
        }
614
615
        for (; i < 80; i++) {
616
            s0 = X[(i + 1) & 0x0f];
617
            s0 = sigma0(s0);
618
            s1 = X[(i + 14) & 0x0f];
619
            s1 = sigma1(s1);
620
621
            T1 = X[i & 0xf] += s0 + s1 + X[(i + 9) & 0xf];
622
            T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i];
623
            T2 = Sigma0(a) + Maj(a, b, c);
624
            h = g;
625
            g = f;
626
            f = e;
627
            e = d + T1;
628
            d = c;
629
            c = b;
630
            b = a;
631
            a = T1 + T2;
632
        }
633
634
        ctx->h[0] += a;
635
        ctx->h[1] += b;
636
        ctx->h[2] += c;
637
        ctx->h[3] += d;
638
        ctx->h[4] += e;
639
        ctx->h[5] += f;
640
        ctx->h[6] += g;
641
        ctx->h[7] += h;
642
643
        W += SHA_LBLOCK;
644
    }
645
}
646
647
# else
648
#  define ROUND_00_15(i,a,b,c,d,e,f,g,h)        do {    \
649
        T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];      \
650
        h = Sigma0(a) + Maj(a,b,c);                     \
651
        d += T1;        h += T1;                        } while (0)
652
653
#  define ROUND_16_80(i,j,a,b,c,d,e,f,g,h,X)    do {    \
654
        s0 = X[(j+1)&0x0f];     s0 = sigma0(s0);        \
655
        s1 = X[(j+14)&0x0f];    s1 = sigma1(s1);        \
656
        T1 = X[(j)&0x0f] += s0 + s1 + X[(j+9)&0x0f];    \
657
        ROUND_00_15(i+j,a,b,c,d,e,f,g,h);               } while (0)
658
659
static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
660
                                    size_t num)
661
{
662
    const SHA_LONG64 *W = in;
663
    SHA_LONG64 a, b, c, d, e, f, g, h, s0, s1, T1;
664
    SHA_LONG64 X[16];
665
    int i;
666
667
    while (num--) {
668
669
        a = ctx->h[0];
670
        b = ctx->h[1];
671
        c = ctx->h[2];
672
        d = ctx->h[3];
673
        e = ctx->h[4];
674
        f = ctx->h[5];
675
        g = ctx->h[6];
676
        h = ctx->h[7];
677
678
#  ifdef B_ENDIAN
679
        T1 = X[0] = W[0];
680
        ROUND_00_15(0, a, b, c, d, e, f, g, h);
681
        T1 = X[1] = W[1];
682
        ROUND_00_15(1, h, a, b, c, d, e, f, g);
683
        T1 = X[2] = W[2];
684
        ROUND_00_15(2, g, h, a, b, c, d, e, f);
685
        T1 = X[3] = W[3];
686
        ROUND_00_15(3, f, g, h, a, b, c, d, e);
687
        T1 = X[4] = W[4];
688
        ROUND_00_15(4, e, f, g, h, a, b, c, d);
689
        T1 = X[5] = W[5];
690
        ROUND_00_15(5, d, e, f, g, h, a, b, c);
691
        T1 = X[6] = W[6];
692
        ROUND_00_15(6, c, d, e, f, g, h, a, b);
693
        T1 = X[7] = W[7];
694
        ROUND_00_15(7, b, c, d, e, f, g, h, a);
695
        T1 = X[8] = W[8];
696
        ROUND_00_15(8, a, b, c, d, e, f, g, h);
697
        T1 = X[9] = W[9];
698
        ROUND_00_15(9, h, a, b, c, d, e, f, g);
699
        T1 = X[10] = W[10];
700
        ROUND_00_15(10, g, h, a, b, c, d, e, f);
701
        T1 = X[11] = W[11];
702
        ROUND_00_15(11, f, g, h, a, b, c, d, e);
703
        T1 = X[12] = W[12];
704
        ROUND_00_15(12, e, f, g, h, a, b, c, d);
705
        T1 = X[13] = W[13];
706
        ROUND_00_15(13, d, e, f, g, h, a, b, c);
707
        T1 = X[14] = W[14];
708
        ROUND_00_15(14, c, d, e, f, g, h, a, b);
709
        T1 = X[15] = W[15];
710
        ROUND_00_15(15, b, c, d, e, f, g, h, a);
711
#  else
712
        T1 = X[0] = PULL64(W[0]);
713
        ROUND_00_15(0, a, b, c, d, e, f, g, h);
714
        T1 = X[1] = PULL64(W[1]);
715
        ROUND_00_15(1, h, a, b, c, d, e, f, g);
716
        T1 = X[2] = PULL64(W[2]);
717
        ROUND_00_15(2, g, h, a, b, c, d, e, f);
718
        T1 = X[3] = PULL64(W[3]);
719
        ROUND_00_15(3, f, g, h, a, b, c, d, e);
720
        T1 = X[4] = PULL64(W[4]);
721
        ROUND_00_15(4, e, f, g, h, a, b, c, d);
722
        T1 = X[5] = PULL64(W[5]);
723
        ROUND_00_15(5, d, e, f, g, h, a, b, c);
724
        T1 = X[6] = PULL64(W[6]);
725
        ROUND_00_15(6, c, d, e, f, g, h, a, b);
726
        T1 = X[7] = PULL64(W[7]);
727
        ROUND_00_15(7, b, c, d, e, f, g, h, a);
728
        T1 = X[8] = PULL64(W[8]);
729
        ROUND_00_15(8, a, b, c, d, e, f, g, h);
730
        T1 = X[9] = PULL64(W[9]);
731
        ROUND_00_15(9, h, a, b, c, d, e, f, g);
732
        T1 = X[10] = PULL64(W[10]);
733
        ROUND_00_15(10, g, h, a, b, c, d, e, f);
734
        T1 = X[11] = PULL64(W[11]);
735
        ROUND_00_15(11, f, g, h, a, b, c, d, e);
736
        T1 = X[12] = PULL64(W[12]);
737
        ROUND_00_15(12, e, f, g, h, a, b, c, d);
738
        T1 = X[13] = PULL64(W[13]);
739
        ROUND_00_15(13, d, e, f, g, h, a, b, c);
740
        T1 = X[14] = PULL64(W[14]);
741
        ROUND_00_15(14, c, d, e, f, g, h, a, b);
742
        T1 = X[15] = PULL64(W[15]);
743
        ROUND_00_15(15, b, c, d, e, f, g, h, a);
744
#  endif
745
746
        for (i = 16; i < 80; i += 16) {
747
            ROUND_16_80(i, 0, a, b, c, d, e, f, g, h, X);
748
            ROUND_16_80(i, 1, h, a, b, c, d, e, f, g, X);
749
            ROUND_16_80(i, 2, g, h, a, b, c, d, e, f, X);
750
            ROUND_16_80(i, 3, f, g, h, a, b, c, d, e, X);
751
            ROUND_16_80(i, 4, e, f, g, h, a, b, c, d, X);
752
            ROUND_16_80(i, 5, d, e, f, g, h, a, b, c, X);
753
            ROUND_16_80(i, 6, c, d, e, f, g, h, a, b, X);
754
            ROUND_16_80(i, 7, b, c, d, e, f, g, h, a, X);
755
            ROUND_16_80(i, 8, a, b, c, d, e, f, g, h, X);
756
            ROUND_16_80(i, 9, h, a, b, c, d, e, f, g, X);
757
            ROUND_16_80(i, 10, g, h, a, b, c, d, e, f, X);
758
            ROUND_16_80(i, 11, f, g, h, a, b, c, d, e, X);
759
            ROUND_16_80(i, 12, e, f, g, h, a, b, c, d, X);
760
            ROUND_16_80(i, 13, d, e, f, g, h, a, b, c, X);
761
            ROUND_16_80(i, 14, c, d, e, f, g, h, a, b, X);
762
            ROUND_16_80(i, 15, b, c, d, e, f, g, h, a, X);
763
        }
764
765
        ctx->h[0] += a;
766
        ctx->h[1] += b;
767
        ctx->h[2] += c;
768
        ctx->h[3] += d;
769
        ctx->h[4] += e;
770
        ctx->h[5] += f;
771
        ctx->h[6] += g;
772
        ctx->h[7] += h;
773
774
        W += SHA_LBLOCK;
775
    }
776
}
777
778
# endif
779
780
#endif                         /* SHA512_ASM */