Coverage Report

Created: 2018-08-29 13:53

/src/openssl/crypto/sha/sha512.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright 2004-2018 The OpenSSL Project Authors. All Rights Reserved.
3
 *
4
 * Licensed under the OpenSSL license (the "License").  You may not use
5
 * this file except in compliance with the License.  You can obtain a copy
6
 * in the file LICENSE in the source distribution or at
7
 * https://www.openssl.org/source/license.html
8
 */
9
10
#include <openssl/opensslconf.h>
11
/*-
12
 * IMPLEMENTATION NOTES.
13
 *
14
 * As you might have noticed 32-bit hash algorithms:
15
 *
16
 * - permit SHA_LONG to be wider than 32-bit
17
 * - optimized versions implement two transform functions: one operating
18
 *   on [aligned] data in host byte order and one - on data in input
19
 *   stream byte order;
20
 * - share common byte-order neutral collector and padding function
21
 *   implementations, ../md32_common.h;
22
 *
23
 * Neither of the above applies to this SHA-512 implementations. Reasons
24
 * [in reverse order] are:
25
 *
26
 * - it's the only 64-bit hash algorithm for the moment of this writing,
27
 *   there is no need for common collector/padding implementation [yet];
28
 * - by supporting only one transform function [which operates on
29
 *   *aligned* data in input stream byte order, big-endian in this case]
30
 *   we minimize burden of maintenance in two ways: a) collector/padding
31
 *   function is simpler; b) only one transform function to stare at;
32
 * - SHA_LONG64 is required to be exactly 64-bit in order to be able to
33
 *   apply a number of optimizations to mitigate potential performance
34
 *   penalties caused by previous design decision;
35
 *
36
 * Caveat lector.
37
 *
38
 * Implementation relies on the fact that "long long" is 64-bit on
39
 * both 32- and 64-bit platforms. If some compiler vendor comes up
40
 * with 128-bit long long, adjustment to sha.h would be required.
41
 * As this implementation relies on 64-bit integer type, it's totally
42
 * inappropriate for platforms which don't support it, most notably
43
 * 16-bit platforms.
44
 */
45
#include <stdlib.h>
46
#include <string.h>
47
48
#include <openssl/crypto.h>
49
#include <openssl/sha.h>
50
#include <openssl/opensslv.h>
51
52
#include "internal/cryptlib.h"
53
#include "internal/sha.h"
54
55
#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
56
    defined(__x86_64) || defined(_M_AMD64) || defined(_M_X64) || \
57
    defined(__s390__) || defined(__s390x__) || \
58
    defined(__aarch64__) || \
59
    defined(SHA512_ASM)
60
# define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
61
#endif
62
63
int sha512_224_init(SHA512_CTX *c)
64
0
{
65
0
    c->h[0] = U64(0x8c3d37c819544da2);
66
0
    c->h[1] = U64(0x73e1996689dcd4d6);
67
0
    c->h[2] = U64(0x1dfab7ae32ff9c82);
68
0
    c->h[3] = U64(0x679dd514582f9fcf);
69
0
    c->h[4] = U64(0x0f6d2b697bd44da8);
70
0
    c->h[5] = U64(0x77e36f7304c48942);
71
0
    c->h[6] = U64(0x3f9d85a86a1d36c8);
72
0
    c->h[7] = U64(0x1112e6ad91d692a1);
73
0
74
0
    c->Nl = 0;
75
0
    c->Nh = 0;
76
0
    c->num = 0;
77
0
    c->md_len = SHA224_DIGEST_LENGTH;
78
0
    return 1;
79
0
}
80
81
int sha512_256_init(SHA512_CTX *c)
82
0
{
83
0
    c->h[0] = U64(0x22312194fc2bf72c);
84
0
    c->h[1] = U64(0x9f555fa3c84c64c2);
85
0
    c->h[2] = U64(0x2393b86b6f53b151);
86
0
    c->h[3] = U64(0x963877195940eabd);
87
0
    c->h[4] = U64(0x96283ee2a88effe3);
88
0
    c->h[5] = U64(0xbe5e1e2553863992);
89
0
    c->h[6] = U64(0x2b0199fc2c85b8aa);
90
0
    c->h[7] = U64(0x0eb72ddc81c52ca2);
91
0
92
0
    c->Nl = 0;
93
0
    c->Nh = 0;
94
0
    c->num = 0;
95
0
    c->md_len = SHA256_DIGEST_LENGTH;
96
0
    return 1;
97
0
}
98
99
int SHA384_Init(SHA512_CTX *c)
100
0
{
101
0
    c->h[0] = U64(0xcbbb9d5dc1059ed8);
102
0
    c->h[1] = U64(0x629a292a367cd507);
103
0
    c->h[2] = U64(0x9159015a3070dd17);
104
0
    c->h[3] = U64(0x152fecd8f70e5939);
105
0
    c->h[4] = U64(0x67332667ffc00b31);
106
0
    c->h[5] = U64(0x8eb44a8768581511);
107
0
    c->h[6] = U64(0xdb0c2e0d64f98fa7);
108
0
    c->h[7] = U64(0x47b5481dbefa4fa4);
109
0
110
0
    c->Nl = 0;
111
0
    c->Nh = 0;
112
0
    c->num = 0;
113
0
    c->md_len = SHA384_DIGEST_LENGTH;
114
0
    return 1;
115
0
}
116
117
int SHA512_Init(SHA512_CTX *c)
118
0
{
119
0
    c->h[0] = U64(0x6a09e667f3bcc908);
120
0
    c->h[1] = U64(0xbb67ae8584caa73b);
121
0
    c->h[2] = U64(0x3c6ef372fe94f82b);
122
0
    c->h[3] = U64(0xa54ff53a5f1d36f1);
123
0
    c->h[4] = U64(0x510e527fade682d1);
124
0
    c->h[5] = U64(0x9b05688c2b3e6c1f);
125
0
    c->h[6] = U64(0x1f83d9abfb41bd6b);
126
0
    c->h[7] = U64(0x5be0cd19137e2179);
127
0
128
0
    c->Nl = 0;
129
0
    c->Nh = 0;
130
0
    c->num = 0;
131
0
    c->md_len = SHA512_DIGEST_LENGTH;
132
0
    return 1;
133
0
}
134
135
#ifndef SHA512_ASM
136
static
137
#endif
138
void sha512_block_data_order(SHA512_CTX *ctx, const void *in, size_t num);
139
140
int SHA512_Final(unsigned char *md, SHA512_CTX *c)
141
0
{
142
0
    unsigned char *p = (unsigned char *)c->u.p;
143
0
    size_t n = c->num;
144
0
145
0
    p[n] = 0x80;                /* There always is a room for one */
146
0
    n++;
147
0
    if (n > (sizeof(c->u) - 16)) {
148
0
        memset(p + n, 0, sizeof(c->u) - n);
149
0
        n = 0;
150
0
        sha512_block_data_order(c, p, 1);
151
0
    }
152
0
153
0
    memset(p + n, 0, sizeof(c->u) - 16 - n);
154
#ifdef  B_ENDIAN
155
    c->u.d[SHA_LBLOCK - 2] = c->Nh;
156
    c->u.d[SHA_LBLOCK - 1] = c->Nl;
157
#else
158
    p[sizeof(c->u) - 1] = (unsigned char)(c->Nl);
159
0
    p[sizeof(c->u) - 2] = (unsigned char)(c->Nl >> 8);
160
0
    p[sizeof(c->u) - 3] = (unsigned char)(c->Nl >> 16);
161
0
    p[sizeof(c->u) - 4] = (unsigned char)(c->Nl >> 24);
162
0
    p[sizeof(c->u) - 5] = (unsigned char)(c->Nl >> 32);
163
0
    p[sizeof(c->u) - 6] = (unsigned char)(c->Nl >> 40);
164
0
    p[sizeof(c->u) - 7] = (unsigned char)(c->Nl >> 48);
165
0
    p[sizeof(c->u) - 8] = (unsigned char)(c->Nl >> 56);
166
0
    p[sizeof(c->u) - 9] = (unsigned char)(c->Nh);
167
0
    p[sizeof(c->u) - 10] = (unsigned char)(c->Nh >> 8);
168
0
    p[sizeof(c->u) - 11] = (unsigned char)(c->Nh >> 16);
169
0
    p[sizeof(c->u) - 12] = (unsigned char)(c->Nh >> 24);
170
0
    p[sizeof(c->u) - 13] = (unsigned char)(c->Nh >> 32);
171
0
    p[sizeof(c->u) - 14] = (unsigned char)(c->Nh >> 40);
172
0
    p[sizeof(c->u) - 15] = (unsigned char)(c->Nh >> 48);
173
0
    p[sizeof(c->u) - 16] = (unsigned char)(c->Nh >> 56);
174
0
#endif
175
0
176
0
    sha512_block_data_order(c, p, 1);
177
0
178
0
    if (md == 0)
179
0
        return 0;
180
0
181
0
    switch (c->md_len) {
182
0
    /* Let compiler decide if it's appropriate to unroll... */
183
0
    case SHA224_DIGEST_LENGTH:
184
0
        for (n = 0; n < SHA224_DIGEST_LENGTH / 8; n++) {
185
0
            SHA_LONG64 t = c->h[n];
186
0
187
0
            *(md++) = (unsigned char)(t >> 56);
188
0
            *(md++) = (unsigned char)(t >> 48);
189
0
            *(md++) = (unsigned char)(t >> 40);
190
0
            *(md++) = (unsigned char)(t >> 32);
191
0
            *(md++) = (unsigned char)(t >> 24);
192
0
            *(md++) = (unsigned char)(t >> 16);
193
0
            *(md++) = (unsigned char)(t >> 8);
194
0
            *(md++) = (unsigned char)(t);
195
0
        }
196
0
        /*
197
0
         * For 224 bits, there are four bytes left over that have to be
198
0
         * processed separately.
199
0
         */
200
0
        {
201
0
            SHA_LONG64 t = c->h[SHA224_DIGEST_LENGTH / 8];
202
0
203
0
            *(md++) = (unsigned char)(t >> 56);
204
0
            *(md++) = (unsigned char)(t >> 48);
205
0
            *(md++) = (unsigned char)(t >> 40);
206
0
            *(md++) = (unsigned char)(t >> 32);
207
0
        }
208
0
        break;
209
0
    case SHA256_DIGEST_LENGTH:
210
0
        for (n = 0; n < SHA256_DIGEST_LENGTH / 8; n++) {
211
0
            SHA_LONG64 t = c->h[n];
212
0
213
0
            *(md++) = (unsigned char)(t >> 56);
214
0
            *(md++) = (unsigned char)(t >> 48);
215
0
            *(md++) = (unsigned char)(t >> 40);
216
0
            *(md++) = (unsigned char)(t >> 32);
217
0
            *(md++) = (unsigned char)(t >> 24);
218
0
            *(md++) = (unsigned char)(t >> 16);
219
0
            *(md++) = (unsigned char)(t >> 8);
220
0
            *(md++) = (unsigned char)(t);
221
0
        }
222
0
        break;
223
0
    case SHA384_DIGEST_LENGTH:
224
0
        for (n = 0; n < SHA384_DIGEST_LENGTH / 8; n++) {
225
0
            SHA_LONG64 t = c->h[n];
226
0
227
0
            *(md++) = (unsigned char)(t >> 56);
228
0
            *(md++) = (unsigned char)(t >> 48);
229
0
            *(md++) = (unsigned char)(t >> 40);
230
0
            *(md++) = (unsigned char)(t >> 32);
231
0
            *(md++) = (unsigned char)(t >> 24);
232
0
            *(md++) = (unsigned char)(t >> 16);
233
0
            *(md++) = (unsigned char)(t >> 8);
234
0
            *(md++) = (unsigned char)(t);
235
0
        }
236
0
        break;
237
0
    case SHA512_DIGEST_LENGTH:
238
0
        for (n = 0; n < SHA512_DIGEST_LENGTH / 8; n++) {
239
0
            SHA_LONG64 t = c->h[n];
240
0
241
0
            *(md++) = (unsigned char)(t >> 56);
242
0
            *(md++) = (unsigned char)(t >> 48);
243
0
            *(md++) = (unsigned char)(t >> 40);
244
0
            *(md++) = (unsigned char)(t >> 32);
245
0
            *(md++) = (unsigned char)(t >> 24);
246
0
            *(md++) = (unsigned char)(t >> 16);
247
0
            *(md++) = (unsigned char)(t >> 8);
248
0
            *(md++) = (unsigned char)(t);
249
0
        }
250
0
        break;
251
0
    /* ... as well as make sure md_len is not abused. */
252
0
    default:
253
0
        return 0;
254
0
    }
255
0
256
0
    return 1;
257
0
}
258
259
int SHA384_Final(unsigned char *md, SHA512_CTX *c)
260
0
{
261
0
    return SHA512_Final(md, c);
262
0
}
263
264
int SHA512_Update(SHA512_CTX *c, const void *_data, size_t len)
265
0
{
266
0
    SHA_LONG64 l;
267
0
    unsigned char *p = c->u.p;
268
0
    const unsigned char *data = (const unsigned char *)_data;
269
0
270
0
    if (len == 0)
271
0
        return 1;
272
0
273
0
    l = (c->Nl + (((SHA_LONG64) len) << 3)) & U64(0xffffffffffffffff);
274
0
    if (l < c->Nl)
275
0
        c->Nh++;
276
0
    if (sizeof(len) >= 8)
277
0
        c->Nh += (((SHA_LONG64) len) >> 61);
278
0
    c->Nl = l;
279
0
280
0
    if (c->num != 0) {
281
0
        size_t n = sizeof(c->u) - c->num;
282
0
283
0
        if (len < n) {
284
0
            memcpy(p + c->num, data, len), c->num += (unsigned int)len;
285
0
            return 1;
286
0
        } else {
287
0
            memcpy(p + c->num, data, n), c->num = 0;
288
0
            len -= n, data += n;
289
0
            sha512_block_data_order(c, p, 1);
290
0
        }
291
0
    }
292
0
293
0
    if (len >= sizeof(c->u)) {
294
#ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
295
        if ((size_t)data % sizeof(c->u.d[0]) != 0)
296
            while (len >= sizeof(c->u))
297
                memcpy(p, data, sizeof(c->u)),
298
                sha512_block_data_order(c, p, 1),
299
                len -= sizeof(c->u), data += sizeof(c->u);
300
        else
301
#endif
302
            sha512_block_data_order(c, data, len / sizeof(c->u)),
303
0
            data += len, len %= sizeof(c->u), data -= len;
304
0
    }
305
0
306
0
    if (len != 0)
307
0
        memcpy(p, data, len), c->num = (int)len;
308
0
309
0
    return 1;
310
0
}
311
312
int SHA384_Update(SHA512_CTX *c, const void *data, size_t len)
313
0
{
314
0
    return SHA512_Update(c, data, len);
315
0
}
316
317
void SHA512_Transform(SHA512_CTX *c, const unsigned char *data)
318
0
{
319
#ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
320
    if ((size_t)data % sizeof(c->u.d[0]) != 0)
321
        memcpy(c->u.p, data, sizeof(c->u.p)), data = c->u.p;
322
#endif
323
    sha512_block_data_order(c, data, 1);
324
0
}
325
326
unsigned char *SHA384(const unsigned char *d, size_t n, unsigned char *md)
327
0
{
328
0
    SHA512_CTX c;
329
0
    static unsigned char m[SHA384_DIGEST_LENGTH];
330
0
331
0
    if (md == NULL)
332
0
        md = m;
333
0
    SHA384_Init(&c);
334
0
    SHA512_Update(&c, d, n);
335
0
    SHA512_Final(md, &c);
336
0
    OPENSSL_cleanse(&c, sizeof(c));
337
0
    return md;
338
0
}
339
340
unsigned char *SHA512(const unsigned char *d, size_t n, unsigned char *md)
341
0
{
342
0
    SHA512_CTX c;
343
0
    static unsigned char m[SHA512_DIGEST_LENGTH];
344
0
345
0
    if (md == NULL)
346
0
        md = m;
347
0
    SHA512_Init(&c);
348
0
    SHA512_Update(&c, d, n);
349
0
    SHA512_Final(md, &c);
350
0
    OPENSSL_cleanse(&c, sizeof(c));
351
0
    return md;
352
0
}
353
354
#ifndef SHA512_ASM
355
static const SHA_LONG64 K512[80] = {
356
    U64(0x428a2f98d728ae22), U64(0x7137449123ef65cd),
357
    U64(0xb5c0fbcfec4d3b2f), U64(0xe9b5dba58189dbbc),
358
    U64(0x3956c25bf348b538), U64(0x59f111f1b605d019),
359
    U64(0x923f82a4af194f9b), U64(0xab1c5ed5da6d8118),
360
    U64(0xd807aa98a3030242), U64(0x12835b0145706fbe),
361
    U64(0x243185be4ee4b28c), U64(0x550c7dc3d5ffb4e2),
362
    U64(0x72be5d74f27b896f), U64(0x80deb1fe3b1696b1),
363
    U64(0x9bdc06a725c71235), U64(0xc19bf174cf692694),
364
    U64(0xe49b69c19ef14ad2), U64(0xefbe4786384f25e3),
365
    U64(0x0fc19dc68b8cd5b5), U64(0x240ca1cc77ac9c65),
366
    U64(0x2de92c6f592b0275), U64(0x4a7484aa6ea6e483),
367
    U64(0x5cb0a9dcbd41fbd4), U64(0x76f988da831153b5),
368
    U64(0x983e5152ee66dfab), U64(0xa831c66d2db43210),
369
    U64(0xb00327c898fb213f), U64(0xbf597fc7beef0ee4),
370
    U64(0xc6e00bf33da88fc2), U64(0xd5a79147930aa725),
371
    U64(0x06ca6351e003826f), U64(0x142929670a0e6e70),
372
    U64(0x27b70a8546d22ffc), U64(0x2e1b21385c26c926),
373
    U64(0x4d2c6dfc5ac42aed), U64(0x53380d139d95b3df),
374
    U64(0x650a73548baf63de), U64(0x766a0abb3c77b2a8),
375
    U64(0x81c2c92e47edaee6), U64(0x92722c851482353b),
376
    U64(0xa2bfe8a14cf10364), U64(0xa81a664bbc423001),
377
    U64(0xc24b8b70d0f89791), U64(0xc76c51a30654be30),
378
    U64(0xd192e819d6ef5218), U64(0xd69906245565a910),
379
    U64(0xf40e35855771202a), U64(0x106aa07032bbd1b8),
380
    U64(0x19a4c116b8d2d0c8), U64(0x1e376c085141ab53),
381
    U64(0x2748774cdf8eeb99), U64(0x34b0bcb5e19b48a8),
382
    U64(0x391c0cb3c5c95a63), U64(0x4ed8aa4ae3418acb),
383
    U64(0x5b9cca4f7763e373), U64(0x682e6ff3d6b2b8a3),
384
    U64(0x748f82ee5defb2fc), U64(0x78a5636f43172f60),
385
    U64(0x84c87814a1f0ab72), U64(0x8cc702081a6439ec),
386
    U64(0x90befffa23631e28), U64(0xa4506cebde82bde9),
387
    U64(0xbef9a3f7b2c67915), U64(0xc67178f2e372532b),
388
    U64(0xca273eceea26619c), U64(0xd186b8c721c0c207),
389
    U64(0xeada7dd6cde0eb1e), U64(0xf57d4f7fee6ed178),
390
    U64(0x06f067aa72176fba), U64(0x0a637dc5a2c898a6),
391
    U64(0x113f9804bef90dae), U64(0x1b710b35131c471b),
392
    U64(0x28db77f523047d84), U64(0x32caab7b40c72493),
393
    U64(0x3c9ebe0a15c9bebc), U64(0x431d67c49c100d4c),
394
    U64(0x4cc5d4becb3e42b6), U64(0x597f299cfc657e2a),
395
    U64(0x5fcb6fab3ad6faec), U64(0x6c44198c4a475817)
396
};
397
398
# ifndef PEDANTIC
399
#  if defined(__GNUC__) && __GNUC__>=2 && \
400
      !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
401
#   if defined(__x86_64) || defined(__x86_64__)
402
#    define ROTR(a,n)    ({ SHA_LONG64 ret;             \
403
                                asm ("rorq %1,%0"       \
404
                                : "=r"(ret)             \
405
                                : "J"(n),"0"(a)         \
406
                                : "cc"); ret;           })
407
#    if !defined(B_ENDIAN)
408
#     define PULL64(x) ({ SHA_LONG64 ret=*((const SHA_LONG64 *)(&(x)));  \
409
                                asm ("bswapq    %0"             \
410
                                : "=r"(ret)                     \
411
                                : "0"(ret)); ret;               })
412
#    endif
413
#   elif (defined(__i386) || defined(__i386__)) && !defined(B_ENDIAN)
414
#    if defined(I386_ONLY)
415
#     define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
416
                          unsigned int hi=p[0],lo=p[1];          \
417
                                asm("xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
418
                                    "roll $16,%%eax; roll $16,%%edx; "\
419
                                    "xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
420
                                : "=a"(lo),"=d"(hi)             \
421
                                : "0"(lo),"1"(hi) : "cc");      \
422
                                ((SHA_LONG64)hi)<<32|lo;        })
423
#    else
424
#     define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
425
                          unsigned int hi=p[0],lo=p[1];         \
426
                                asm ("bswapl %0; bswapl %1;"    \
427
                                : "=r"(lo),"=r"(hi)             \
428
                                : "0"(lo),"1"(hi));             \
429
                                ((SHA_LONG64)hi)<<32|lo;        })
430
#    endif
431
#   elif (defined(_ARCH_PPC) && defined(__64BIT__)) || defined(_ARCH_PPC64)
432
#    define ROTR(a,n)    ({ SHA_LONG64 ret;             \
433
                                asm ("rotrdi %0,%1,%2"  \
434
                                : "=r"(ret)             \
435
                                : "r"(a),"K"(n)); ret;  })
436
#   elif defined(__aarch64__)
437
#    define ROTR(a,n)    ({ SHA_LONG64 ret;             \
438
                                asm ("ror %0,%1,%2"     \
439
                                : "=r"(ret)             \
440
                                : "r"(a),"I"(n)); ret;  })
441
#    if  defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && \
442
        __BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__
443
#     define PULL64(x)   ({ SHA_LONG64 ret;                     \
444
                                asm ("rev       %0,%1"          \
445
                                : "=r"(ret)                     \
446
                                : "r"(*((const SHA_LONG64 *)(&(x))))); ret; })
447
#    endif
448
#   endif
449
#  elif defined(_MSC_VER)
450
#   if defined(_WIN64)         /* applies to both IA-64 and AMD64 */
451
#    pragma intrinsic(_rotr64)
452
#    define ROTR(a,n)    _rotr64((a),n)
453
#   endif
454
#   if defined(_M_IX86) && !defined(OPENSSL_NO_ASM) && \
455
       !defined(OPENSSL_NO_INLINE_ASM)
456
#    if defined(I386_ONLY)
457
static SHA_LONG64 __fastcall __pull64be(const void *x)
458
{
459
    _asm mov  edx,[ecx + 0]
460
    _asm mov  eax,[ecx + 4]
461
    _asm xchg dh, dl
462
    _asm xchg ah, al
463
    _asm rol  edx, 16
464
    _asm rol  eax, 16
465
    _asm xchg dh, dl
466
    _asm xchg ah, al
467
}
468
#    else
469
static SHA_LONG64 __fastcall __pull64be(const void *x)
470
{
471
    _asm mov   edx,[ecx + 0]
472
    _asm mov   eax,[ecx + 4]
473
    _asm bswap edx
474
    _asm bswap eax
475
}
476
#    endif
477
#    define PULL64(x) __pull64be(&(x))
478
#   endif
479
#  endif
480
# endif
481
# ifndef PULL64
482
#  define B(x,j)    (((SHA_LONG64)(*(((const unsigned char *)(&x))+j)))<<((7-j)*8))
483
#  define PULL64(x) (B(x,0)|B(x,1)|B(x,2)|B(x,3)|B(x,4)|B(x,5)|B(x,6)|B(x,7))
484
# endif
485
# ifndef ROTR
486
#  define ROTR(x,s)       (((x)>>s) | (x)<<(64-s))
487
# endif
488
# define Sigma0(x)       (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
489
# define Sigma1(x)       (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
490
# define sigma0(x)       (ROTR((x),1)  ^ ROTR((x),8)  ^ ((x)>>7))
491
# define sigma1(x)       (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
492
# define Ch(x,y,z)       (((x) & (y)) ^ ((~(x)) & (z)))
493
# define Maj(x,y,z)      (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
494
495
# if defined(__i386) || defined(__i386__) || defined(_M_IX86)
496
/*
497
 * This code should give better results on 32-bit CPU with less than
498
 * ~24 registers, both size and performance wise...
499
 */
500
501
static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
502
                                    size_t num)
503
{
504
    const SHA_LONG64 *W = in;
505
    SHA_LONG64 A, E, T;
506
    SHA_LONG64 X[9 + 80], *F;
507
    int i;
508
509
    while (num--) {
510
511
        F = X + 80;
512
        A = ctx->h[0];
513
        F[1] = ctx->h[1];
514
        F[2] = ctx->h[2];
515
        F[3] = ctx->h[3];
516
        E = ctx->h[4];
517
        F[5] = ctx->h[5];
518
        F[6] = ctx->h[6];
519
        F[7] = ctx->h[7];
520
521
        for (i = 0; i < 16; i++, F--) {
522
#  ifdef B_ENDIAN
523
            T = W[i];
524
#  else
525
            T = PULL64(W[i]);
526
#  endif
527
            F[0] = A;
528
            F[4] = E;
529
            F[8] = T;
530
            T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
531
            E = F[3] + T;
532
            A = T + Sigma0(A) + Maj(A, F[1], F[2]);
533
        }
534
535
        for (; i < 80; i++, F--) {
536
            T = sigma0(F[8 + 16 - 1]);
537
            T += sigma1(F[8 + 16 - 14]);
538
            T += F[8 + 16] + F[8 + 16 - 9];
539
540
            F[0] = A;
541
            F[4] = E;
542
            F[8] = T;
543
            T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
544
            E = F[3] + T;
545
            A = T + Sigma0(A) + Maj(A, F[1], F[2]);
546
        }
547
548
        ctx->h[0] += A;
549
        ctx->h[1] += F[1];
550
        ctx->h[2] += F[2];
551
        ctx->h[3] += F[3];
552
        ctx->h[4] += E;
553
        ctx->h[5] += F[5];
554
        ctx->h[6] += F[6];
555
        ctx->h[7] += F[7];
556
557
        W += SHA_LBLOCK;
558
    }
559
}
560
561
# elif defined(OPENSSL_SMALL_FOOTPRINT)
562
563
static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
564
                                    size_t num)
565
{
566
    const SHA_LONG64 *W = in;
567
    SHA_LONG64 a, b, c, d, e, f, g, h, s0, s1, T1, T2;
568
    SHA_LONG64 X[16];
569
    int i;
570
571
    while (num--) {
572
573
        a = ctx->h[0];
574
        b = ctx->h[1];
575
        c = ctx->h[2];
576
        d = ctx->h[3];
577
        e = ctx->h[4];
578
        f = ctx->h[5];
579
        g = ctx->h[6];
580
        h = ctx->h[7];
581
582
        for (i = 0; i < 16; i++) {
583
#  ifdef B_ENDIAN
584
            T1 = X[i] = W[i];
585
#  else
586
            T1 = X[i] = PULL64(W[i]);
587
#  endif
588
            T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i];
589
            T2 = Sigma0(a) + Maj(a, b, c);
590
            h = g;
591
            g = f;
592
            f = e;
593
            e = d + T1;
594
            d = c;
595
            c = b;
596
            b = a;
597
            a = T1 + T2;
598
        }
599
600
        for (; i < 80; i++) {
601
            s0 = X[(i + 1) & 0x0f];
602
            s0 = sigma0(s0);
603
            s1 = X[(i + 14) & 0x0f];
604
            s1 = sigma1(s1);
605
606
            T1 = X[i & 0xf] += s0 + s1 + X[(i + 9) & 0xf];
607
            T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i];
608
            T2 = Sigma0(a) + Maj(a, b, c);
609
            h = g;
610
            g = f;
611
            f = e;
612
            e = d + T1;
613
            d = c;
614
            c = b;
615
            b = a;
616
            a = T1 + T2;
617
        }
618
619
        ctx->h[0] += a;
620
        ctx->h[1] += b;
621
        ctx->h[2] += c;
622
        ctx->h[3] += d;
623
        ctx->h[4] += e;
624
        ctx->h[5] += f;
625
        ctx->h[6] += g;
626
        ctx->h[7] += h;
627
628
        W += SHA_LBLOCK;
629
    }
630
}
631
632
# else
633
#  define ROUND_00_15(i,a,b,c,d,e,f,g,h)        do {    \
634
        T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];      \
635
        h = Sigma0(a) + Maj(a,b,c);                     \
636
        d += T1;        h += T1;                        } while (0)
637
638
#  define ROUND_16_80(i,j,a,b,c,d,e,f,g,h,X)    do {    \
639
        s0 = X[(j+1)&0x0f];     s0 = sigma0(s0);        \
640
        s1 = X[(j+14)&0x0f];    s1 = sigma1(s1);        \
641
        T1 = X[(j)&0x0f] += s0 + s1 + X[(j+9)&0x0f];    \
642
        ROUND_00_15(i+j,a,b,c,d,e,f,g,h);               } while (0)
643
644
static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
645
                                    size_t num)
646
{
647
    const SHA_LONG64 *W = in;
648
    SHA_LONG64 a, b, c, d, e, f, g, h, s0, s1, T1;
649
    SHA_LONG64 X[16];
650
    int i;
651
652
    while (num--) {
653
654
        a = ctx->h[0];
655
        b = ctx->h[1];
656
        c = ctx->h[2];
657
        d = ctx->h[3];
658
        e = ctx->h[4];
659
        f = ctx->h[5];
660
        g = ctx->h[6];
661
        h = ctx->h[7];
662
663
#  ifdef B_ENDIAN
664
        T1 = X[0] = W[0];
665
        ROUND_00_15(0, a, b, c, d, e, f, g, h);
666
        T1 = X[1] = W[1];
667
        ROUND_00_15(1, h, a, b, c, d, e, f, g);
668
        T1 = X[2] = W[2];
669
        ROUND_00_15(2, g, h, a, b, c, d, e, f);
670
        T1 = X[3] = W[3];
671
        ROUND_00_15(3, f, g, h, a, b, c, d, e);
672
        T1 = X[4] = W[4];
673
        ROUND_00_15(4, e, f, g, h, a, b, c, d);
674
        T1 = X[5] = W[5];
675
        ROUND_00_15(5, d, e, f, g, h, a, b, c);
676
        T1 = X[6] = W[6];
677
        ROUND_00_15(6, c, d, e, f, g, h, a, b);
678
        T1 = X[7] = W[7];
679
        ROUND_00_15(7, b, c, d, e, f, g, h, a);
680
        T1 = X[8] = W[8];
681
        ROUND_00_15(8, a, b, c, d, e, f, g, h);
682
        T1 = X[9] = W[9];
683
        ROUND_00_15(9, h, a, b, c, d, e, f, g);
684
        T1 = X[10] = W[10];
685
        ROUND_00_15(10, g, h, a, b, c, d, e, f);
686
        T1 = X[11] = W[11];
687
        ROUND_00_15(11, f, g, h, a, b, c, d, e);
688
        T1 = X[12] = W[12];
689
        ROUND_00_15(12, e, f, g, h, a, b, c, d);
690
        T1 = X[13] = W[13];
691
        ROUND_00_15(13, d, e, f, g, h, a, b, c);
692
        T1 = X[14] = W[14];
693
        ROUND_00_15(14, c, d, e, f, g, h, a, b);
694
        T1 = X[15] = W[15];
695
        ROUND_00_15(15, b, c, d, e, f, g, h, a);
696
#  else
697
        T1 = X[0] = PULL64(W[0]);
698
        ROUND_00_15(0, a, b, c, d, e, f, g, h);
699
        T1 = X[1] = PULL64(W[1]);
700
        ROUND_00_15(1, h, a, b, c, d, e, f, g);
701
        T1 = X[2] = PULL64(W[2]);
702
        ROUND_00_15(2, g, h, a, b, c, d, e, f);
703
        T1 = X[3] = PULL64(W[3]);
704
        ROUND_00_15(3, f, g, h, a, b, c, d, e);
705
        T1 = X[4] = PULL64(W[4]);
706
        ROUND_00_15(4, e, f, g, h, a, b, c, d);
707
        T1 = X[5] = PULL64(W[5]);
708
        ROUND_00_15(5, d, e, f, g, h, a, b, c);
709
        T1 = X[6] = PULL64(W[6]);
710
        ROUND_00_15(6, c, d, e, f, g, h, a, b);
711
        T1 = X[7] = PULL64(W[7]);
712
        ROUND_00_15(7, b, c, d, e, f, g, h, a);
713
        T1 = X[8] = PULL64(W[8]);
714
        ROUND_00_15(8, a, b, c, d, e, f, g, h);
715
        T1 = X[9] = PULL64(W[9]);
716
        ROUND_00_15(9, h, a, b, c, d, e, f, g);
717
        T1 = X[10] = PULL64(W[10]);
718
        ROUND_00_15(10, g, h, a, b, c, d, e, f);
719
        T1 = X[11] = PULL64(W[11]);
720
        ROUND_00_15(11, f, g, h, a, b, c, d, e);
721
        T1 = X[12] = PULL64(W[12]);
722
        ROUND_00_15(12, e, f, g, h, a, b, c, d);
723
        T1 = X[13] = PULL64(W[13]);
724
        ROUND_00_15(13, d, e, f, g, h, a, b, c);
725
        T1 = X[14] = PULL64(W[14]);
726
        ROUND_00_15(14, c, d, e, f, g, h, a, b);
727
        T1 = X[15] = PULL64(W[15]);
728
        ROUND_00_15(15, b, c, d, e, f, g, h, a);
729
#  endif
730
731
        for (i = 16; i < 80; i += 16) {
732
            ROUND_16_80(i, 0, a, b, c, d, e, f, g, h, X);
733
            ROUND_16_80(i, 1, h, a, b, c, d, e, f, g, X);
734
            ROUND_16_80(i, 2, g, h, a, b, c, d, e, f, X);
735
            ROUND_16_80(i, 3, f, g, h, a, b, c, d, e, X);
736
            ROUND_16_80(i, 4, e, f, g, h, a, b, c, d, X);
737
            ROUND_16_80(i, 5, d, e, f, g, h, a, b, c, X);
738
            ROUND_16_80(i, 6, c, d, e, f, g, h, a, b, X);
739
            ROUND_16_80(i, 7, b, c, d, e, f, g, h, a, X);
740
            ROUND_16_80(i, 8, a, b, c, d, e, f, g, h, X);
741
            ROUND_16_80(i, 9, h, a, b, c, d, e, f, g, X);
742
            ROUND_16_80(i, 10, g, h, a, b, c, d, e, f, X);
743
            ROUND_16_80(i, 11, f, g, h, a, b, c, d, e, X);
744
            ROUND_16_80(i, 12, e, f, g, h, a, b, c, d, X);
745
            ROUND_16_80(i, 13, d, e, f, g, h, a, b, c, X);
746
            ROUND_16_80(i, 14, c, d, e, f, g, h, a, b, X);
747
            ROUND_16_80(i, 15, b, c, d, e, f, g, h, a, X);
748
        }
749
750
        ctx->h[0] += a;
751
        ctx->h[1] += b;
752
        ctx->h[2] += c;
753
        ctx->h[3] += d;
754
        ctx->h[4] += e;
755
        ctx->h[5] += f;
756
        ctx->h[6] += g;
757
        ctx->h[7] += h;
758
759
        W += SHA_LBLOCK;
760
    }
761
}
762
763
# endif
764
765
#endif                         /* SHA512_ASM */