Coverage Report

Created: 2026-02-14 07:20

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/openssl30/crypto/sha/sha512.c
Line
Count
Source
1
/*
2
 * Copyright 2004-2021 The OpenSSL Project Authors. All Rights Reserved.
3
 *
4
 * Licensed under the Apache License 2.0 (the "License").  You may not use
5
 * this file except in compliance with the License.  You can obtain a copy
6
 * in the file LICENSE in the source distribution or at
7
 * https://www.openssl.org/source/license.html
8
 */
9
10
/*
11
 * SHA512 low level APIs are deprecated for public use, but still ok for
12
 * internal use.
13
 */
14
#include "internal/deprecated.h"
15
16
#include <stdio.h>
17
#include <openssl/opensslconf.h>
18
/*-
19
 * IMPLEMENTATION NOTES.
20
 *
21
 * As you might have noticed 32-bit hash algorithms:
22
 *
23
 * - permit SHA_LONG to be wider than 32-bit
24
 * - optimized versions implement two transform functions: one operating
25
 *   on [aligned] data in host byte order and one - on data in input
26
 *   stream byte order;
27
 * - share common byte-order neutral collector and padding function
28
 *   implementations, ../md32_common.h;
29
 *
30
 * Neither of the above applies to this SHA-512 implementations. Reasons
31
 * [in reverse order] are:
32
 *
33
 * - it's the only 64-bit hash algorithm for the moment of this writing,
34
 *   there is no need for common collector/padding implementation [yet];
35
 * - by supporting only one transform function [which operates on
36
 *   *aligned* data in input stream byte order, big-endian in this case]
37
 *   we minimize burden of maintenance in two ways: a) collector/padding
38
 *   function is simpler; b) only one transform function to stare at;
39
 * - SHA_LONG64 is required to be exactly 64-bit in order to be able to
40
 *   apply a number of optimizations to mitigate potential performance
41
 *   penalties caused by previous design decision;
42
 *
43
 * Caveat lector.
44
 *
45
 * Implementation relies on the fact that "long long" is 64-bit on
46
 * both 32- and 64-bit platforms. If some compiler vendor comes up
47
 * with 128-bit long long, adjustment to sha.h would be required.
48
 * As this implementation relies on 64-bit integer type, it's totally
49
 * inappropriate for platforms which don't support it, most notably
50
 * 16-bit platforms.
51
 */
52
#include <stdlib.h>
53
#include <string.h>
54
55
#include <openssl/crypto.h>
56
#include <openssl/sha.h>
57
#include <openssl/opensslv.h>
58
59
#include "internal/cryptlib.h"
60
#include "crypto/sha.h"
61
62
#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || defined(__x86_64) || defined(_M_AMD64) || defined(_M_X64) || defined(__s390__) || defined(__s390x__) || defined(__aarch64__) || defined(SHA512_ASM)
63
#define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
64
#endif
65
66
#if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32__)
67
#define U64(C) C##UI64
68
#elif defined(__arch64__)
69
#define U64(C) C##UL
70
#else
71
87.5M
#define U64(C) C##ULL
72
#endif
73
74
int sha512_224_init(SHA512_CTX *c)
75
445k
{
76
445k
    c->h[0] = U64(0x8c3d37c819544da2);
77
445k
    c->h[1] = U64(0x73e1996689dcd4d6);
78
445k
    c->h[2] = U64(0x1dfab7ae32ff9c82);
79
445k
    c->h[3] = U64(0x679dd514582f9fcf);
80
445k
    c->h[4] = U64(0x0f6d2b697bd44da8);
81
445k
    c->h[5] = U64(0x77e36f7304c48942);
82
445k
    c->h[6] = U64(0x3f9d85a86a1d36c8);
83
445k
    c->h[7] = U64(0x1112e6ad91d692a1);
84
85
445k
    c->Nl = 0;
86
445k
    c->Nh = 0;
87
445k
    c->num = 0;
88
445k
    c->md_len = SHA224_DIGEST_LENGTH;
89
445k
    return 1;
90
445k
}
91
92
int sha512_256_init(SHA512_CTX *c)
93
349k
{
94
349k
    c->h[0] = U64(0x22312194fc2bf72c);
95
349k
    c->h[1] = U64(0x9f555fa3c84c64c2);
96
349k
    c->h[2] = U64(0x2393b86b6f53b151);
97
349k
    c->h[3] = U64(0x963877195940eabd);
98
349k
    c->h[4] = U64(0x96283ee2a88effe3);
99
349k
    c->h[5] = U64(0xbe5e1e2553863992);
100
349k
    c->h[6] = U64(0x2b0199fc2c85b8aa);
101
349k
    c->h[7] = U64(0x0eb72ddc81c52ca2);
102
103
349k
    c->Nl = 0;
104
349k
    c->Nh = 0;
105
349k
    c->num = 0;
106
349k
    c->md_len = SHA256_DIGEST_LENGTH;
107
349k
    return 1;
108
349k
}
109
110
int SHA384_Init(SHA512_CTX *c)
111
2.50M
{
112
2.50M
    c->h[0] = U64(0xcbbb9d5dc1059ed8);
113
2.50M
    c->h[1] = U64(0x629a292a367cd507);
114
2.50M
    c->h[2] = U64(0x9159015a3070dd17);
115
2.50M
    c->h[3] = U64(0x152fecd8f70e5939);
116
2.50M
    c->h[4] = U64(0x67332667ffc00b31);
117
2.50M
    c->h[5] = U64(0x8eb44a8768581511);
118
2.50M
    c->h[6] = U64(0xdb0c2e0d64f98fa7);
119
2.50M
    c->h[7] = U64(0x47b5481dbefa4fa4);
120
121
2.50M
    c->Nl = 0;
122
2.50M
    c->Nh = 0;
123
2.50M
    c->num = 0;
124
2.50M
    c->md_len = SHA384_DIGEST_LENGTH;
125
2.50M
    return 1;
126
2.50M
}
127
128
int SHA512_Init(SHA512_CTX *c)
129
5.51M
{
130
5.51M
    c->h[0] = U64(0x6a09e667f3bcc908);
131
5.51M
    c->h[1] = U64(0xbb67ae8584caa73b);
132
5.51M
    c->h[2] = U64(0x3c6ef372fe94f82b);
133
5.51M
    c->h[3] = U64(0xa54ff53a5f1d36f1);
134
5.51M
    c->h[4] = U64(0x510e527fade682d1);
135
5.51M
    c->h[5] = U64(0x9b05688c2b3e6c1f);
136
5.51M
    c->h[6] = U64(0x1f83d9abfb41bd6b);
137
5.51M
    c->h[7] = U64(0x5be0cd19137e2179);
138
139
5.51M
    c->Nl = 0;
140
5.51M
    c->Nh = 0;
141
5.51M
    c->num = 0;
142
5.51M
    c->md_len = SHA512_DIGEST_LENGTH;
143
5.51M
    return 1;
144
5.51M
}
145
146
#ifndef SHA512_ASM
147
static
148
#endif
149
    void sha512_block_data_order(SHA512_CTX *ctx, const void *in, size_t num);
150
151
int SHA512_Final(unsigned char *md, SHA512_CTX *c)
152
9.47M
{
153
9.47M
    unsigned char *p = (unsigned char *)c->u.p;
154
9.47M
    size_t n = c->num;
155
156
9.47M
    p[n] = 0x80; /* There always is a room for one */
157
9.47M
    n++;
158
9.47M
    if (n > (sizeof(c->u) - 16)) {
159
92.8k
        memset(p + n, 0, sizeof(c->u) - n);
160
92.8k
        n = 0;
161
92.8k
        sha512_block_data_order(c, p, 1);
162
92.8k
    }
163
164
9.47M
    memset(p + n, 0, sizeof(c->u) - 16 - n);
165
#ifdef B_ENDIAN
166
    c->u.d[SHA_LBLOCK - 2] = c->Nh;
167
    c->u.d[SHA_LBLOCK - 1] = c->Nl;
168
#else
169
9.47M
    p[sizeof(c->u) - 1] = (unsigned char)(c->Nl);
170
9.47M
    p[sizeof(c->u) - 2] = (unsigned char)(c->Nl >> 8);
171
9.47M
    p[sizeof(c->u) - 3] = (unsigned char)(c->Nl >> 16);
172
9.47M
    p[sizeof(c->u) - 4] = (unsigned char)(c->Nl >> 24);
173
9.47M
    p[sizeof(c->u) - 5] = (unsigned char)(c->Nl >> 32);
174
9.47M
    p[sizeof(c->u) - 6] = (unsigned char)(c->Nl >> 40);
175
9.47M
    p[sizeof(c->u) - 7] = (unsigned char)(c->Nl >> 48);
176
9.47M
    p[sizeof(c->u) - 8] = (unsigned char)(c->Nl >> 56);
177
9.47M
    p[sizeof(c->u) - 9] = (unsigned char)(c->Nh);
178
9.47M
    p[sizeof(c->u) - 10] = (unsigned char)(c->Nh >> 8);
179
9.47M
    p[sizeof(c->u) - 11] = (unsigned char)(c->Nh >> 16);
180
9.47M
    p[sizeof(c->u) - 12] = (unsigned char)(c->Nh >> 24);
181
9.47M
    p[sizeof(c->u) - 13] = (unsigned char)(c->Nh >> 32);
182
9.47M
    p[sizeof(c->u) - 14] = (unsigned char)(c->Nh >> 40);
183
9.47M
    p[sizeof(c->u) - 15] = (unsigned char)(c->Nh >> 48);
184
9.47M
    p[sizeof(c->u) - 16] = (unsigned char)(c->Nh >> 56);
185
9.47M
#endif
186
187
9.47M
    sha512_block_data_order(c, p, 1);
188
189
9.47M
    if (md == 0)
190
0
        return 0;
191
192
9.47M
    switch (c->md_len) {
193
    /* Let compiler decide if it's appropriate to unroll... */
194
589k
    case SHA224_DIGEST_LENGTH:
195
2.35M
        for (n = 0; n < SHA224_DIGEST_LENGTH / 8; n++) {
196
1.76M
            SHA_LONG64 t = c->h[n];
197
198
1.76M
            *(md++) = (unsigned char)(t >> 56);
199
1.76M
            *(md++) = (unsigned char)(t >> 48);
200
1.76M
            *(md++) = (unsigned char)(t >> 40);
201
1.76M
            *(md++) = (unsigned char)(t >> 32);
202
1.76M
            *(md++) = (unsigned char)(t >> 24);
203
1.76M
            *(md++) = (unsigned char)(t >> 16);
204
1.76M
            *(md++) = (unsigned char)(t >> 8);
205
1.76M
            *(md++) = (unsigned char)(t);
206
1.76M
        }
207
        /*
208
         * For 224 bits, there are four bytes left over that have to be
209
         * processed separately.
210
         */
211
589k
        {
212
589k
            SHA_LONG64 t = c->h[SHA224_DIGEST_LENGTH / 8];
213
214
589k
            *(md++) = (unsigned char)(t >> 56);
215
589k
            *(md++) = (unsigned char)(t >> 48);
216
589k
            *(md++) = (unsigned char)(t >> 40);
217
589k
            *(md++) = (unsigned char)(t >> 32);
218
589k
        }
219
589k
        break;
220
399k
    case SHA256_DIGEST_LENGTH:
221
1.99M
        for (n = 0; n < SHA256_DIGEST_LENGTH / 8; n++) {
222
1.59M
            SHA_LONG64 t = c->h[n];
223
224
1.59M
            *(md++) = (unsigned char)(t >> 56);
225
1.59M
            *(md++) = (unsigned char)(t >> 48);
226
1.59M
            *(md++) = (unsigned char)(t >> 40);
227
1.59M
            *(md++) = (unsigned char)(t >> 32);
228
1.59M
            *(md++) = (unsigned char)(t >> 24);
229
1.59M
            *(md++) = (unsigned char)(t >> 16);
230
1.59M
            *(md++) = (unsigned char)(t >> 8);
231
1.59M
            *(md++) = (unsigned char)(t);
232
1.59M
        }
233
399k
        break;
234
2.64M
    case SHA384_DIGEST_LENGTH:
235
18.4M
        for (n = 0; n < SHA384_DIGEST_LENGTH / 8; n++) {
236
15.8M
            SHA_LONG64 t = c->h[n];
237
238
15.8M
            *(md++) = (unsigned char)(t >> 56);
239
15.8M
            *(md++) = (unsigned char)(t >> 48);
240
15.8M
            *(md++) = (unsigned char)(t >> 40);
241
15.8M
            *(md++) = (unsigned char)(t >> 32);
242
15.8M
            *(md++) = (unsigned char)(t >> 24);
243
15.8M
            *(md++) = (unsigned char)(t >> 16);
244
15.8M
            *(md++) = (unsigned char)(t >> 8);
245
15.8M
            *(md++) = (unsigned char)(t);
246
15.8M
        }
247
2.64M
        break;
248
5.84M
    case SHA512_DIGEST_LENGTH:
249
52.6M
        for (n = 0; n < SHA512_DIGEST_LENGTH / 8; n++) {
250
46.7M
            SHA_LONG64 t = c->h[n];
251
252
46.7M
            *(md++) = (unsigned char)(t >> 56);
253
46.7M
            *(md++) = (unsigned char)(t >> 48);
254
46.7M
            *(md++) = (unsigned char)(t >> 40);
255
46.7M
            *(md++) = (unsigned char)(t >> 32);
256
46.7M
            *(md++) = (unsigned char)(t >> 24);
257
46.7M
            *(md++) = (unsigned char)(t >> 16);
258
46.7M
            *(md++) = (unsigned char)(t >> 8);
259
46.7M
            *(md++) = (unsigned char)(t);
260
46.7M
        }
261
5.84M
        break;
262
    /* ... as well as make sure md_len is not abused. */
263
0
    default:
264
0
        return 0;
265
9.47M
    }
266
267
9.47M
    return 1;
268
9.47M
}
269
270
int SHA384_Final(unsigned char *md, SHA512_CTX *c)
271
2.64M
{
272
2.64M
    return SHA512_Final(md, c);
273
2.64M
}
274
275
int SHA512_Update(SHA512_CTX *c, const void *_data, size_t len)
276
17.0M
{
277
17.0M
    SHA_LONG64 l;
278
17.0M
    unsigned char *p = c->u.p;
279
17.0M
    const unsigned char *data = (const unsigned char *)_data;
280
281
17.0M
    if (len == 0)
282
0
        return 1;
283
284
17.0M
    l = (c->Nl + (((SHA_LONG64)len) << 3)) & U64(0xffffffffffffffff);
285
17.0M
    if (l < c->Nl)
286
0
        c->Nh++;
287
17.0M
    if (sizeof(len) >= 8)
288
17.0M
        c->Nh += (((SHA_LONG64)len) >> 61);
289
17.0M
    c->Nl = l;
290
291
17.0M
    if (c->num != 0) {
292
6.93M
        size_t n = sizeof(c->u) - c->num;
293
294
6.93M
        if (len < n) {
295
3.67M
            memcpy(p + c->num, data, len), c->num += (unsigned int)len;
296
3.67M
            return 1;
297
3.67M
        } else {
298
3.25M
            memcpy(p + c->num, data, n), c->num = 0;
299
3.25M
            len -= n, data += n;
300
3.25M
            sha512_block_data_order(c, p, 1);
301
3.25M
        }
302
6.93M
    }
303
304
13.3M
    if (len >= sizeof(c->u)) {
305
#ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
306
        if ((size_t)data % sizeof(c->u.d[0]) != 0)
307
            while (len >= sizeof(c->u))
308
                memcpy(p, data, sizeof(c->u)),
309
                    sha512_block_data_order(c, p, 1),
310
                    len -= sizeof(c->u), data += sizeof(c->u);
311
        else
312
#endif
313
1.53M
            sha512_block_data_order(c, data, len / sizeof(c->u)),
314
1.53M
                data += len, len %= sizeof(c->u), data -= len;
315
1.53M
    }
316
317
13.3M
    if (len != 0)
318
8.89M
        memcpy(p, data, len), c->num = (int)len;
319
320
13.3M
    return 1;
321
17.0M
}
322
323
int SHA384_Update(SHA512_CTX *c, const void *data, size_t len)
324
3.88M
{
325
3.88M
    return SHA512_Update(c, data, len);
326
3.88M
}
327
328
void SHA512_Transform(SHA512_CTX *c, const unsigned char *data)
329
97.9k
{
330
#ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
331
    if ((size_t)data % sizeof(c->u.d[0]) != 0)
332
        memcpy(c->u.p, data, sizeof(c->u.p)), data = c->u.p;
333
#endif
334
97.9k
    sha512_block_data_order(c, data, 1);
335
97.9k
}
336
337
#ifndef SHA512_ASM
338
static const SHA_LONG64 K512[80] = {
339
    U64(0x428a2f98d728ae22), U64(0x7137449123ef65cd),
340
    U64(0xb5c0fbcfec4d3b2f), U64(0xe9b5dba58189dbbc),
341
    U64(0x3956c25bf348b538), U64(0x59f111f1b605d019),
342
    U64(0x923f82a4af194f9b), U64(0xab1c5ed5da6d8118),
343
    U64(0xd807aa98a3030242), U64(0x12835b0145706fbe),
344
    U64(0x243185be4ee4b28c), U64(0x550c7dc3d5ffb4e2),
345
    U64(0x72be5d74f27b896f), U64(0x80deb1fe3b1696b1),
346
    U64(0x9bdc06a725c71235), U64(0xc19bf174cf692694),
347
    U64(0xe49b69c19ef14ad2), U64(0xefbe4786384f25e3),
348
    U64(0x0fc19dc68b8cd5b5), U64(0x240ca1cc77ac9c65),
349
    U64(0x2de92c6f592b0275), U64(0x4a7484aa6ea6e483),
350
    U64(0x5cb0a9dcbd41fbd4), U64(0x76f988da831153b5),
351
    U64(0x983e5152ee66dfab), U64(0xa831c66d2db43210),
352
    U64(0xb00327c898fb213f), U64(0xbf597fc7beef0ee4),
353
    U64(0xc6e00bf33da88fc2), U64(0xd5a79147930aa725),
354
    U64(0x06ca6351e003826f), U64(0x142929670a0e6e70),
355
    U64(0x27b70a8546d22ffc), U64(0x2e1b21385c26c926),
356
    U64(0x4d2c6dfc5ac42aed), U64(0x53380d139d95b3df),
357
    U64(0x650a73548baf63de), U64(0x766a0abb3c77b2a8),
358
    U64(0x81c2c92e47edaee6), U64(0x92722c851482353b),
359
    U64(0xa2bfe8a14cf10364), U64(0xa81a664bbc423001),
360
    U64(0xc24b8b70d0f89791), U64(0xc76c51a30654be30),
361
    U64(0xd192e819d6ef5218), U64(0xd69906245565a910),
362
    U64(0xf40e35855771202a), U64(0x106aa07032bbd1b8),
363
    U64(0x19a4c116b8d2d0c8), U64(0x1e376c085141ab53),
364
    U64(0x2748774cdf8eeb99), U64(0x34b0bcb5e19b48a8),
365
    U64(0x391c0cb3c5c95a63), U64(0x4ed8aa4ae3418acb),
366
    U64(0x5b9cca4f7763e373), U64(0x682e6ff3d6b2b8a3),
367
    U64(0x748f82ee5defb2fc), U64(0x78a5636f43172f60),
368
    U64(0x84c87814a1f0ab72), U64(0x8cc702081a6439ec),
369
    U64(0x90befffa23631e28), U64(0xa4506cebde82bde9),
370
    U64(0xbef9a3f7b2c67915), U64(0xc67178f2e372532b),
371
    U64(0xca273eceea26619c), U64(0xd186b8c721c0c207),
372
    U64(0xeada7dd6cde0eb1e), U64(0xf57d4f7fee6ed178),
373
    U64(0x06f067aa72176fba), U64(0x0a637dc5a2c898a6),
374
    U64(0x113f9804bef90dae), U64(0x1b710b35131c471b),
375
    U64(0x28db77f523047d84), U64(0x32caab7b40c72493),
376
    U64(0x3c9ebe0a15c9bebc), U64(0x431d67c49c100d4c),
377
    U64(0x4cc5d4becb3e42b6), U64(0x597f299cfc657e2a),
378
    U64(0x5fcb6fab3ad6faec), U64(0x6c44198c4a475817)
379
};
380
381
#ifndef PEDANTIC
382
#if defined(__GNUC__) && __GNUC__ >= 2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
383
#if defined(__x86_64) || defined(__x86_64__)
384
#define ROTR(a, n) ({ SHA_LONG64 ret;             \
385
                                asm ("rorq %1,%0"       \
386
                                : "=r"(ret)             \
387
                                : "J"(n),"0"(a)         \
388
                                : "cc"); ret; })
389
#if !defined(B_ENDIAN)
390
#define PULL64(x) ({ SHA_LONG64 ret=*((const SHA_LONG64 *)(&(x)));  \
391
                                asm ("bswapq    %0"             \
392
                                : "=r"(ret)                     \
393
                                : "0"(ret)); ret; })
394
#endif
395
#elif (defined(__i386) || defined(__i386__)) && !defined(B_ENDIAN)
396
#if defined(I386_ONLY)
397
#define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
398
                          unsigned int hi=p[0],lo=p[1];          \
399
                                asm("xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
400
                                    "roll $16,%%eax; roll $16,%%edx; "\
401
                                    "xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
402
                                : "=a"(lo),"=d"(hi)             \
403
                                : "0"(lo),"1"(hi) : "cc");      \
404
                                ((SHA_LONG64)hi)<<32|lo; })
405
#else
406
#define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
407
                          unsigned int hi=p[0],lo=p[1];         \
408
                                asm ("bswapl %0; bswapl %1;"    \
409
                                : "=r"(lo),"=r"(hi)             \
410
                                : "0"(lo),"1"(hi));             \
411
                                ((SHA_LONG64)hi)<<32|lo; })
412
#endif
413
#elif (defined(_ARCH_PPC) && defined(__64BIT__)) || defined(_ARCH_PPC64)
414
#define ROTR(a, n) ({ SHA_LONG64 ret;             \
415
                                asm ("rotrdi %0,%1,%2"  \
416
                                : "=r"(ret)             \
417
                                : "r"(a),"K"(n)); ret; })
418
#elif defined(__aarch64__)
419
#define ROTR(a, n) ({ SHA_LONG64 ret;             \
420
                                asm ("ror %0,%1,%2"     \
421
                                : "=r"(ret)             \
422
                                : "r"(a),"I"(n)); ret; })
423
#if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
424
#define PULL64(x) ({ SHA_LONG64 ret;                     \
425
                                asm ("rev       %0,%1"          \
426
                                : "=r"(ret)                     \
427
                                : "r"(*((const SHA_LONG64 *)(&(x))))); ret; })
428
#endif
429
#endif
430
#elif defined(_MSC_VER)
431
#if defined(_WIN64) /* applies to both IA-64 and AMD64 */
432
#pragma intrinsic(_rotr64)
433
#define ROTR(a, n) _rotr64((a), n)
434
#endif
435
#if defined(_M_IX86) && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
436
#if defined(I386_ONLY)
437
static SHA_LONG64 __fastcall __pull64be(const void *x)
438
{
439
    _asm mov edx, [ecx + 0] _asm mov eax, [ecx + 4] _asm xchg dh, dl _asm xchg ah, al _asm rol edx, 16 _asm rol eax, 16 _asm xchg dh, dl _asm xchg ah, al
440
}
441
#else
442
static SHA_LONG64 __fastcall __pull64be(const void *x) {
443
    _asm mov edx, [ecx + 0] _asm mov eax, [ecx + 4] _asm bswap edx _asm bswap eax
444
}
445
#endif
446
#define PULL64(x) __pull64be(&(x))
447
#endif
448
#endif
449
#endif
450
#ifndef PULL64
451
#define B(x, j) (((SHA_LONG64)(*(((const unsigned char *)(&x)) + j))) << ((7 - j) * 8))
452
#define PULL64(x) (B(x, 0) | B(x, 1) | B(x, 2) | B(x, 3) | B(x, 4) | B(x, 5) | B(x, 6) | B(x, 7))
453
#endif
454
#ifndef ROTR
455
#define ROTR(x, s) (((x) >> s) | (x) << (64 - s))
456
#endif
457
#define Sigma0(x) (ROTR((x), 28) ^ ROTR((x), 34) ^ ROTR((x), 39))
458
#define Sigma1(x) (ROTR((x), 14) ^ ROTR((x), 18) ^ ROTR((x), 41))
459
#define sigma0(x) (ROTR((x), 1) ^ ROTR((x), 8) ^ ((x) >> 7))
460
#define sigma1(x) (ROTR((x), 19) ^ ROTR((x), 61) ^ ((x) >> 6))
461
#define Ch(x, y, z) (((x) & (y)) ^ ((~(x)) & (z)))
462
#define Maj(x, y, z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
463
464
#if defined(__i386) || defined(__i386__) || defined(_M_IX86)
465
/*
466
 * This code should give better results on 32-bit CPU with less than
467
 * ~24 registers, both size and performance wise...
468
 */
469
470
static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
471
    size_t num)
472
{
473
    const SHA_LONG64 *W = in;
474
    SHA_LONG64 A, E, T;
475
    SHA_LONG64 X[9 + 80], *F;
476
    int i;
477
478
    while (num--) {
479
480
        F = X + 80;
481
        A = ctx->h[0];
482
        F[1] = ctx->h[1];
483
        F[2] = ctx->h[2];
484
        F[3] = ctx->h[3];
485
        E = ctx->h[4];
486
        F[5] = ctx->h[5];
487
        F[6] = ctx->h[6];
488
        F[7] = ctx->h[7];
489
490
        for (i = 0; i < 16; i++, F--) {
491
#ifdef B_ENDIAN
492
            T = W[i];
493
#else
494
            T = PULL64(W[i]);
495
#endif
496
            F[0] = A;
497
            F[4] = E;
498
            F[8] = T;
499
            T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
500
            E = F[3] + T;
501
            A = T + Sigma0(A) + Maj(A, F[1], F[2]);
502
        }
503
504
        for (; i < 80; i++, F--) {
505
            T = sigma0(F[8 + 16 - 1]);
506
            T += sigma1(F[8 + 16 - 14]);
507
            T += F[8 + 16] + F[8 + 16 - 9];
508
509
            F[0] = A;
510
            F[4] = E;
511
            F[8] = T;
512
            T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
513
            E = F[3] + T;
514
            A = T + Sigma0(A) + Maj(A, F[1], F[2]);
515
        }
516
517
        ctx->h[0] += A;
518
        ctx->h[1] += F[1];
519
        ctx->h[2] += F[2];
520
        ctx->h[3] += F[3];
521
        ctx->h[4] += E;
522
        ctx->h[5] += F[5];
523
        ctx->h[6] += F[6];
524
        ctx->h[7] += F[7];
525
526
        W += SHA_LBLOCK;
527
    }
528
}
529
530
#elif defined(OPENSSL_SMALL_FOOTPRINT)
531
532
static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
533
    size_t num)
534
{
535
    const SHA_LONG64 *W = in;
536
    SHA_LONG64 a, b, c, d, e, f, g, h, s0, s1, T1, T2;
537
    SHA_LONG64 X[16];
538
    int i;
539
540
    while (num--) {
541
542
        a = ctx->h[0];
543
        b = ctx->h[1];
544
        c = ctx->h[2];
545
        d = ctx->h[3];
546
        e = ctx->h[4];
547
        f = ctx->h[5];
548
        g = ctx->h[6];
549
        h = ctx->h[7];
550
551
        for (i = 0; i < 16; i++) {
552
#ifdef B_ENDIAN
553
            T1 = X[i] = W[i];
554
#else
555
            T1 = X[i] = PULL64(W[i]);
556
#endif
557
            T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i];
558
            T2 = Sigma0(a) + Maj(a, b, c);
559
            h = g;
560
            g = f;
561
            f = e;
562
            e = d + T1;
563
            d = c;
564
            c = b;
565
            b = a;
566
            a = T1 + T2;
567
        }
568
569
        for (; i < 80; i++) {
570
            s0 = X[(i + 1) & 0x0f];
571
            s0 = sigma0(s0);
572
            s1 = X[(i + 14) & 0x0f];
573
            s1 = sigma1(s1);
574
575
            T1 = X[i & 0xf] += s0 + s1 + X[(i + 9) & 0xf];
576
            T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i];
577
            T2 = Sigma0(a) + Maj(a, b, c);
578
            h = g;
579
            g = f;
580
            f = e;
581
            e = d + T1;
582
            d = c;
583
            c = b;
584
            b = a;
585
            a = T1 + T2;
586
        }
587
588
        ctx->h[0] += a;
589
        ctx->h[1] += b;
590
        ctx->h[2] += c;
591
        ctx->h[3] += d;
592
        ctx->h[4] += e;
593
        ctx->h[5] += f;
594
        ctx->h[6] += g;
595
        ctx->h[7] += h;
596
597
        W += SHA_LBLOCK;
598
    }
599
}
600
601
#else
602
#define ROUND_00_15(i, a, b, c, d, e, f, g, h)       \
603
    do {                                             \
604
        T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i]; \
605
        h = Sigma0(a) + Maj(a, b, c);                \
606
        d += T1;                                     \
607
        h += T1;                                     \
608
    } while (0)
609
610
#define ROUND_16_80(i, j, a, b, c, d, e, f, g, h, X)       \
611
    do {                                                   \
612
        s0 = X[(j + 1) & 0x0f];                            \
613
        s0 = sigma0(s0);                                   \
614
        s1 = X[(j + 14) & 0x0f];                           \
615
        s1 = sigma1(s1);                                   \
616
        T1 = X[(j) & 0x0f] += s0 + s1 + X[(j + 9) & 0x0f]; \
617
        ROUND_00_15(i + j, a, b, c, d, e, f, g, h);        \
618
    } while (0)
619
620
static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
621
    size_t num)
622
{
623
    const SHA_LONG64 *W = in;
624
    SHA_LONG64 a, b, c, d, e, f, g, h, s0, s1, T1;
625
    SHA_LONG64 X[16];
626
    int i;
627
628
    while (num--) {
629
630
        a = ctx->h[0];
631
        b = ctx->h[1];
632
        c = ctx->h[2];
633
        d = ctx->h[3];
634
        e = ctx->h[4];
635
        f = ctx->h[5];
636
        g = ctx->h[6];
637
        h = ctx->h[7];
638
639
#ifdef B_ENDIAN
640
        T1 = X[0] = W[0];
641
        ROUND_00_15(0, a, b, c, d, e, f, g, h);
642
        T1 = X[1] = W[1];
643
        ROUND_00_15(1, h, a, b, c, d, e, f, g);
644
        T1 = X[2] = W[2];
645
        ROUND_00_15(2, g, h, a, b, c, d, e, f);
646
        T1 = X[3] = W[3];
647
        ROUND_00_15(3, f, g, h, a, b, c, d, e);
648
        T1 = X[4] = W[4];
649
        ROUND_00_15(4, e, f, g, h, a, b, c, d);
650
        T1 = X[5] = W[5];
651
        ROUND_00_15(5, d, e, f, g, h, a, b, c);
652
        T1 = X[6] = W[6];
653
        ROUND_00_15(6, c, d, e, f, g, h, a, b);
654
        T1 = X[7] = W[7];
655
        ROUND_00_15(7, b, c, d, e, f, g, h, a);
656
        T1 = X[8] = W[8];
657
        ROUND_00_15(8, a, b, c, d, e, f, g, h);
658
        T1 = X[9] = W[9];
659
        ROUND_00_15(9, h, a, b, c, d, e, f, g);
660
        T1 = X[10] = W[10];
661
        ROUND_00_15(10, g, h, a, b, c, d, e, f);
662
        T1 = X[11] = W[11];
663
        ROUND_00_15(11, f, g, h, a, b, c, d, e);
664
        T1 = X[12] = W[12];
665
        ROUND_00_15(12, e, f, g, h, a, b, c, d);
666
        T1 = X[13] = W[13];
667
        ROUND_00_15(13, d, e, f, g, h, a, b, c);
668
        T1 = X[14] = W[14];
669
        ROUND_00_15(14, c, d, e, f, g, h, a, b);
670
        T1 = X[15] = W[15];
671
        ROUND_00_15(15, b, c, d, e, f, g, h, a);
672
#else
673
        T1 = X[0] = PULL64(W[0]);
674
        ROUND_00_15(0, a, b, c, d, e, f, g, h);
675
        T1 = X[1] = PULL64(W[1]);
676
        ROUND_00_15(1, h, a, b, c, d, e, f, g);
677
        T1 = X[2] = PULL64(W[2]);
678
        ROUND_00_15(2, g, h, a, b, c, d, e, f);
679
        T1 = X[3] = PULL64(W[3]);
680
        ROUND_00_15(3, f, g, h, a, b, c, d, e);
681
        T1 = X[4] = PULL64(W[4]);
682
        ROUND_00_15(4, e, f, g, h, a, b, c, d);
683
        T1 = X[5] = PULL64(W[5]);
684
        ROUND_00_15(5, d, e, f, g, h, a, b, c);
685
        T1 = X[6] = PULL64(W[6]);
686
        ROUND_00_15(6, c, d, e, f, g, h, a, b);
687
        T1 = X[7] = PULL64(W[7]);
688
        ROUND_00_15(7, b, c, d, e, f, g, h, a);
689
        T1 = X[8] = PULL64(W[8]);
690
        ROUND_00_15(8, a, b, c, d, e, f, g, h);
691
        T1 = X[9] = PULL64(W[9]);
692
        ROUND_00_15(9, h, a, b, c, d, e, f, g);
693
        T1 = X[10] = PULL64(W[10]);
694
        ROUND_00_15(10, g, h, a, b, c, d, e, f);
695
        T1 = X[11] = PULL64(W[11]);
696
        ROUND_00_15(11, f, g, h, a, b, c, d, e);
697
        T1 = X[12] = PULL64(W[12]);
698
        ROUND_00_15(12, e, f, g, h, a, b, c, d);
699
        T1 = X[13] = PULL64(W[13]);
700
        ROUND_00_15(13, d, e, f, g, h, a, b, c);
701
        T1 = X[14] = PULL64(W[14]);
702
        ROUND_00_15(14, c, d, e, f, g, h, a, b);
703
        T1 = X[15] = PULL64(W[15]);
704
        ROUND_00_15(15, b, c, d, e, f, g, h, a);
705
#endif
706
707
        for (i = 16; i < 80; i += 16) {
708
            ROUND_16_80(i, 0, a, b, c, d, e, f, g, h, X);
709
            ROUND_16_80(i, 1, h, a, b, c, d, e, f, g, X);
710
            ROUND_16_80(i, 2, g, h, a, b, c, d, e, f, X);
711
            ROUND_16_80(i, 3, f, g, h, a, b, c, d, e, X);
712
            ROUND_16_80(i, 4, e, f, g, h, a, b, c, d, X);
713
            ROUND_16_80(i, 5, d, e, f, g, h, a, b, c, X);
714
            ROUND_16_80(i, 6, c, d, e, f, g, h, a, b, X);
715
            ROUND_16_80(i, 7, b, c, d, e, f, g, h, a, X);
716
            ROUND_16_80(i, 8, a, b, c, d, e, f, g, h, X);
717
            ROUND_16_80(i, 9, h, a, b, c, d, e, f, g, X);
718
            ROUND_16_80(i, 10, g, h, a, b, c, d, e, f, X);
719
            ROUND_16_80(i, 11, f, g, h, a, b, c, d, e, X);
720
            ROUND_16_80(i, 12, e, f, g, h, a, b, c, d, X);
721
            ROUND_16_80(i, 13, d, e, f, g, h, a, b, c, X);
722
            ROUND_16_80(i, 14, c, d, e, f, g, h, a, b, X);
723
            ROUND_16_80(i, 15, b, c, d, e, f, g, h, a, X);
724
        }
725
726
        ctx->h[0] += a;
727
        ctx->h[1] += b;
728
        ctx->h[2] += c;
729
        ctx->h[3] += d;
730
        ctx->h[4] += e;
731
        ctx->h[5] += f;
732
        ctx->h[6] += g;
733
        ctx->h[7] += h;
734
735
        W += SHA_LBLOCK;
736
    }
737
}
738
739
#endif
740
741
#endif /* SHA512_ASM */