Coverage Report

Created: 2025-06-24 06:45

/src/binutils-gdb/libiberty/sha1.c
Line
Count
Source (jump to first uncovered line)
1
/* sha1.c - Functions to compute SHA1 message digest of files or
2
   memory blocks according to the NIST specification FIPS-180-1.
3
4
   Copyright (C) 2000-2025 Free Software Foundation, Inc.
5
6
   This program is free software; you can redistribute it and/or modify it
7
   under the terms of the GNU General Public License as published by the
8
   Free Software Foundation; either version 2, or (at your option) any
9
   later version.
10
11
   This program is distributed in the hope that it will be useful,
12
   but WITHOUT ANY WARRANTY; without even the implied warranty of
13
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
   GNU General Public License for more details.
15
16
   You should have received a copy of the GNU General Public License
17
   along with this program; if not, write to the Free Software Foundation,
18
   Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
19
20
/* Written by Scott G. Miller
21
   Credits:
22
      Robert Klep <robert@ilse.nl>  -- Expansion function fix
23
*/
24
25
#include <config.h>
26
27
#include "sha1.h"
28
29
#include <stddef.h>
30
#include <string.h>
31
32
#ifdef HAVE_X86_SHA1_HW_SUPPORT
33
# include <x86intrin.h>
34
# include <cpuid.h>
35
#endif
36
37
#if USE_UNLOCKED_IO
38
# include "unlocked-io.h"
39
#endif
40
41
#ifdef WORDS_BIGENDIAN
42
# define SWAP(n) (n)
43
#else
44
# define SWAP(n) \
45
0
    (((n) << 24) | (((n) & 0xff00) << 8) | (((n) >> 8) & 0xff00) | ((n) >> 24))
46
#endif
47
48
0
#define BLOCKSIZE 4096
49
#if BLOCKSIZE % 64 != 0
50
# error "invalid BLOCKSIZE"
51
#endif
52
53
/* This array contains the bytes used to pad the buffer to the next
54
   64-byte boundary.  (RFC 1321, 3.1: Step 1)  */
55
static const unsigned char fillbuf[64] = { 0x80, 0 /* , 0, 0, ...  */ };
56
57
58
/* Take a pointer to a 160 bit block of data (five 32 bit ints) and
59
   initialize it to the start constants of the SHA1 algorithm.  This
60
   must be called before using hash in the call to sha1_hash.  */
61
void
62
sha1_init_ctx (struct sha1_ctx *ctx)
63
0
{
64
0
  ctx->A = 0x67452301;
65
0
  ctx->B = 0xefcdab89;
66
0
  ctx->C = 0x98badcfe;
67
0
  ctx->D = 0x10325476;
68
0
  ctx->E = 0xc3d2e1f0;
69
70
0
  ctx->total[0] = ctx->total[1] = 0;
71
0
  ctx->buflen = 0;
72
0
}
73
74
/* Put result from CTX in first 20 bytes following RESBUF.  The result
75
   must be in little endian byte order.
76
77
   IMPORTANT: On some systems it is required that RESBUF is correctly
78
   aligned for a 32-bit value.  */
79
void *
80
sha1_read_ctx (const struct sha1_ctx *ctx, void *resbuf)
81
0
{
82
0
  ((sha1_uint32 *) resbuf)[0] = SWAP (ctx->A);
83
0
  ((sha1_uint32 *) resbuf)[1] = SWAP (ctx->B);
84
0
  ((sha1_uint32 *) resbuf)[2] = SWAP (ctx->C);
85
0
  ((sha1_uint32 *) resbuf)[3] = SWAP (ctx->D);
86
0
  ((sha1_uint32 *) resbuf)[4] = SWAP (ctx->E);
87
88
0
  return resbuf;
89
0
}
90
91
/* Process the remaining bytes in the internal buffer and the usual
92
   prolog according to the standard and write the result to RESBUF.
93
94
   IMPORTANT: On some systems it is required that RESBUF is correctly
95
   aligned for a 32-bit value.  */
96
void *
97
sha1_finish_ctx (struct sha1_ctx *ctx, void *resbuf)
98
0
{
99
  /* Take yet unprocessed bytes into account.  */
100
0
  sha1_uint32 bytes = ctx->buflen;
101
0
  size_t size = (bytes < 56) ? 64 / 4 : 64 * 2 / 4;
102
103
  /* Now count remaining bytes.  */
104
0
  ctx->total[0] += bytes;
105
0
  if (ctx->total[0] < bytes)
106
0
    ++ctx->total[1];
107
108
  /* Put the 64-bit file length in *bits* at the end of the buffer.  */
109
0
  ctx->buffer[size - 2] = SWAP ((ctx->total[1] << 3) | (ctx->total[0] >> 29));
110
0
  ctx->buffer[size - 1] = SWAP (ctx->total[0] << 3);
111
112
0
  memcpy (&((char *) ctx->buffer)[bytes], fillbuf, (size - 2) * 4 - bytes);
113
114
  /* Process last bytes.  */
115
0
  sha1_process_block (ctx->buffer, size * 4, ctx);
116
117
0
  return sha1_read_ctx (ctx, resbuf);
118
0
}
119
120
/* Compute SHA1 message digest for bytes read from STREAM.  The
121
   resulting message digest number will be written into the 16 bytes
122
   beginning at RESBLOCK.  */
123
int
124
sha1_stream (FILE *stream, void *resblock)
125
0
{
126
0
  struct sha1_ctx ctx;
127
0
  char buffer[BLOCKSIZE + 72];
128
0
  size_t sum;
129
130
  /* Initialize the computation context.  */
131
0
  sha1_init_ctx (&ctx);
132
133
  /* Iterate over full file contents.  */
134
0
  while (1)
135
0
    {
136
      /* We read the file in blocks of BLOCKSIZE bytes.  One call of the
137
   computation function processes the whole buffer so that with the
138
   next round of the loop another block can be read.  */
139
0
      size_t n;
140
0
      sum = 0;
141
142
      /* Read block.  Take care for partial reads.  */
143
0
      while (1)
144
0
  {
145
0
    n = fread (buffer + sum, 1, BLOCKSIZE - sum, stream);
146
147
0
    sum += n;
148
149
0
    if (sum == BLOCKSIZE)
150
0
      break;
151
152
0
    if (n == 0)
153
0
      {
154
        /* Check for the error flag IFF N == 0, so that we don't
155
     exit the loop after a partial read due to e.g., EAGAIN
156
     or EWOULDBLOCK.  */
157
0
        if (ferror (stream))
158
0
    return 1;
159
0
        goto process_partial_block;
160
0
      }
161
162
    /* We've read at least one byte, so ignore errors.  But always
163
       check for EOF, since feof may be true even though N > 0.
164
       Otherwise, we could end up calling fread after EOF.  */
165
0
    if (feof (stream))
166
0
      goto process_partial_block;
167
0
  }
168
169
      /* Process buffer with BLOCKSIZE bytes.  Note that
170
      BLOCKSIZE % 64 == 0
171
       */
172
0
      sha1_process_block (buffer, BLOCKSIZE, &ctx);
173
0
    }
174
175
0
 process_partial_block:;
176
177
  /* Process any remaining bytes.  */
178
0
  if (sum > 0)
179
0
    sha1_process_bytes (buffer, sum, &ctx);
180
181
  /* Construct result in desired memory.  */
182
0
  sha1_finish_ctx (&ctx, resblock);
183
0
  return 0;
184
0
}
185
186
/* Compute SHA1 message digest for LEN bytes beginning at BUFFER.  The
187
   result is always in little endian byte order, so that a byte-wise
188
   output yields to the wanted ASCII representation of the message
189
   digest.  */
190
void *
191
sha1_buffer (const char *buffer, size_t len, void *resblock)
192
0
{
193
0
  struct sha1_ctx ctx;
194
195
  /* Initialize the computation context.  */
196
0
  sha1_init_ctx (&ctx);
197
198
  /* Process whole buffer but last len % 64 bytes.  */
199
0
  sha1_process_bytes (buffer, len, &ctx);
200
201
  /* Put result in desired memory area.  */
202
0
  return sha1_finish_ctx (&ctx, resblock);
203
0
}
204
205
void
206
sha1_process_bytes (const void *buffer, size_t len, struct sha1_ctx *ctx)
207
0
{
208
  /* When we already have some bits in our internal buffer concatenate
209
     both inputs first.  */
210
0
  if (ctx->buflen != 0)
211
0
    {
212
0
      size_t left_over = ctx->buflen;
213
0
      size_t add = 128 - left_over > len ? len : 128 - left_over;
214
215
0
      memcpy (&((char *) ctx->buffer)[left_over], buffer, add);
216
0
      ctx->buflen += add;
217
218
0
      if (ctx->buflen > 64)
219
0
  {
220
0
    sha1_process_block (ctx->buffer, ctx->buflen & ~63, ctx);
221
222
0
    ctx->buflen &= 63;
223
    /* The regions in the following copy operation cannot overlap.  */
224
0
    memcpy (ctx->buffer,
225
0
      &((char *) ctx->buffer)[(left_over + add) & ~63],
226
0
      ctx->buflen);
227
0
  }
228
229
0
      buffer = (const char *) buffer + add;
230
0
      len -= add;
231
0
    }
232
233
  /* Process available complete blocks.  */
234
0
  if (len >= 64)
235
0
    {
236
0
#if !_STRING_ARCH_unaligned
237
0
# define alignof(type) offsetof (struct { char c; type x; }, x)
238
0
# define UNALIGNED_P(p) (((size_t) p) % alignof (sha1_uint32) != 0)
239
0
      if (UNALIGNED_P (buffer))
240
0
  while (len > 64)
241
0
    {
242
0
      sha1_process_block (memcpy (ctx->buffer, buffer, 64), 64, ctx);
243
0
      buffer = (const char *) buffer + 64;
244
0
      len -= 64;
245
0
    }
246
0
      else
247
0
#endif
248
0
  {
249
0
    sha1_process_block (buffer, len & ~63, ctx);
250
0
    buffer = (const char *) buffer + (len & ~63);
251
0
    len &= 63;
252
0
  }
253
0
    }
254
255
  /* Move remaining bytes in internal buffer.  */
256
0
  if (len > 0)
257
0
    {
258
0
      size_t left_over = ctx->buflen;
259
260
0
      memcpy (&((char *) ctx->buffer)[left_over], buffer, len);
261
0
      left_over += len;
262
0
      if (left_over >= 64)
263
0
  {
264
0
    sha1_process_block (ctx->buffer, 64, ctx);
265
0
    left_over -= 64;
266
0
    memmove (ctx->buffer, &ctx->buffer[16], left_over);
267
0
  }
268
0
      ctx->buflen = left_over;
269
0
    }
270
0
}
271
272
/* --- Code below is the primary difference between md5.c and sha1.c --- */
273
274
/* SHA1 round constants */
275
#define K1 0x5a827999
276
#define K2 0x6ed9eba1
277
#define K3 0x8f1bbcdc
278
#define K4 0xca62c1d6
279
280
/* Round functions.  Note that F2 is the same as F4.  */
281
0
#define F1(B,C,D) ( D ^ ( B & ( C ^ D ) ) )
282
0
#define F2(B,C,D) (B ^ C ^ D)
283
0
#define F3(B,C,D) ( ( B & C ) | ( D & ( B | C ) ) )
284
0
#define F4(B,C,D) (B ^ C ^ D)
285
286
/* Process LEN bytes of BUFFER, accumulating context into CTX.
287
   It is assumed that LEN % 64 == 0.
288
   Most of this code comes from GnuPG's cipher/sha1.c.  */
289
290
void
291
sha1_process_block (const void *buffer, size_t len, struct sha1_ctx *ctx)
292
0
{
293
0
  const sha1_uint32 *words = (const sha1_uint32*) buffer;
294
0
  size_t nwords = len / sizeof (sha1_uint32);
295
0
  const sha1_uint32 *endp = words + nwords;
296
0
  sha1_uint32 x[16];
297
0
  sha1_uint32 a = ctx->A;
298
0
  sha1_uint32 b = ctx->B;
299
0
  sha1_uint32 c = ctx->C;
300
0
  sha1_uint32 d = ctx->D;
301
0
  sha1_uint32 e = ctx->E;
302
303
  /* First increment the byte count.  RFC 1321 specifies the possible
304
     length of the file up to 2^64 bits.  Here we only compute the
305
     number of bytes.  Do a double word increment.  */
306
0
  ctx->total[0] += len;
307
0
  ctx->total[1] += ((len >> 31) >> 1) + (ctx->total[0] < len);
308
309
0
#define rol(x, n) (((x) << (n)) | ((sha1_uint32) (x) >> (32 - (n))))
310
311
0
#define M(I) ( tm =   x[I&0x0f] ^ x[(I-14)&0x0f] \
312
0
        ^ x[(I-8)&0x0f] ^ x[(I-3)&0x0f] \
313
0
         , (x[I&0x0f] = rol(tm, 1)) )
314
315
0
#define R(A,B,C,D,E,F,K,M)  do { E += rol( A, 5 )     \
316
0
              + F( B, C, D )  \
317
0
              + K       \
318
0
              + M;        \
319
0
         B = rol( B, 30 );    \
320
0
             } while(0)
321
322
0
  while (words < endp)
323
0
    {
324
0
      sha1_uint32 tm;
325
0
      int t;
326
0
      for (t = 0; t < 16; t++)
327
0
  {
328
0
    x[t] = SWAP (*words);
329
0
    words++;
330
0
  }
331
332
0
      R( a, b, c, d, e, F1, K1, x[ 0] );
333
0
      R( e, a, b, c, d, F1, K1, x[ 1] );
334
0
      R( d, e, a, b, c, F1, K1, x[ 2] );
335
0
      R( c, d, e, a, b, F1, K1, x[ 3] );
336
0
      R( b, c, d, e, a, F1, K1, x[ 4] );
337
0
      R( a, b, c, d, e, F1, K1, x[ 5] );
338
0
      R( e, a, b, c, d, F1, K1, x[ 6] );
339
0
      R( d, e, a, b, c, F1, K1, x[ 7] );
340
0
      R( c, d, e, a, b, F1, K1, x[ 8] );
341
0
      R( b, c, d, e, a, F1, K1, x[ 9] );
342
0
      R( a, b, c, d, e, F1, K1, x[10] );
343
0
      R( e, a, b, c, d, F1, K1, x[11] );
344
0
      R( d, e, a, b, c, F1, K1, x[12] );
345
0
      R( c, d, e, a, b, F1, K1, x[13] );
346
0
      R( b, c, d, e, a, F1, K1, x[14] );
347
0
      R( a, b, c, d, e, F1, K1, x[15] );
348
0
      R( e, a, b, c, d, F1, K1, M(16) );
349
0
      R( d, e, a, b, c, F1, K1, M(17) );
350
0
      R( c, d, e, a, b, F1, K1, M(18) );
351
0
      R( b, c, d, e, a, F1, K1, M(19) );
352
0
      R( a, b, c, d, e, F2, K2, M(20) );
353
0
      R( e, a, b, c, d, F2, K2, M(21) );
354
0
      R( d, e, a, b, c, F2, K2, M(22) );
355
0
      R( c, d, e, a, b, F2, K2, M(23) );
356
0
      R( b, c, d, e, a, F2, K2, M(24) );
357
0
      R( a, b, c, d, e, F2, K2, M(25) );
358
0
      R( e, a, b, c, d, F2, K2, M(26) );
359
0
      R( d, e, a, b, c, F2, K2, M(27) );
360
0
      R( c, d, e, a, b, F2, K2, M(28) );
361
0
      R( b, c, d, e, a, F2, K2, M(29) );
362
0
      R( a, b, c, d, e, F2, K2, M(30) );
363
0
      R( e, a, b, c, d, F2, K2, M(31) );
364
0
      R( d, e, a, b, c, F2, K2, M(32) );
365
0
      R( c, d, e, a, b, F2, K2, M(33) );
366
0
      R( b, c, d, e, a, F2, K2, M(34) );
367
0
      R( a, b, c, d, e, F2, K2, M(35) );
368
0
      R( e, a, b, c, d, F2, K2, M(36) );
369
0
      R( d, e, a, b, c, F2, K2, M(37) );
370
0
      R( c, d, e, a, b, F2, K2, M(38) );
371
0
      R( b, c, d, e, a, F2, K2, M(39) );
372
0
      R( a, b, c, d, e, F3, K3, M(40) );
373
0
      R( e, a, b, c, d, F3, K3, M(41) );
374
0
      R( d, e, a, b, c, F3, K3, M(42) );
375
0
      R( c, d, e, a, b, F3, K3, M(43) );
376
0
      R( b, c, d, e, a, F3, K3, M(44) );
377
0
      R( a, b, c, d, e, F3, K3, M(45) );
378
0
      R( e, a, b, c, d, F3, K3, M(46) );
379
0
      R( d, e, a, b, c, F3, K3, M(47) );
380
0
      R( c, d, e, a, b, F3, K3, M(48) );
381
0
      R( b, c, d, e, a, F3, K3, M(49) );
382
0
      R( a, b, c, d, e, F3, K3, M(50) );
383
0
      R( e, a, b, c, d, F3, K3, M(51) );
384
0
      R( d, e, a, b, c, F3, K3, M(52) );
385
0
      R( c, d, e, a, b, F3, K3, M(53) );
386
0
      R( b, c, d, e, a, F3, K3, M(54) );
387
0
      R( a, b, c, d, e, F3, K3, M(55) );
388
0
      R( e, a, b, c, d, F3, K3, M(56) );
389
0
      R( d, e, a, b, c, F3, K3, M(57) );
390
0
      R( c, d, e, a, b, F3, K3, M(58) );
391
0
      R( b, c, d, e, a, F3, K3, M(59) );
392
0
      R( a, b, c, d, e, F4, K4, M(60) );
393
0
      R( e, a, b, c, d, F4, K4, M(61) );
394
0
      R( d, e, a, b, c, F4, K4, M(62) );
395
0
      R( c, d, e, a, b, F4, K4, M(63) );
396
0
      R( b, c, d, e, a, F4, K4, M(64) );
397
0
      R( a, b, c, d, e, F4, K4, M(65) );
398
0
      R( e, a, b, c, d, F4, K4, M(66) );
399
0
      R( d, e, a, b, c, F4, K4, M(67) );
400
0
      R( c, d, e, a, b, F4, K4, M(68) );
401
0
      R( b, c, d, e, a, F4, K4, M(69) );
402
0
      R( a, b, c, d, e, F4, K4, M(70) );
403
0
      R( e, a, b, c, d, F4, K4, M(71) );
404
0
      R( d, e, a, b, c, F4, K4, M(72) );
405
0
      R( c, d, e, a, b, F4, K4, M(73) );
406
0
      R( b, c, d, e, a, F4, K4, M(74) );
407
0
      R( a, b, c, d, e, F4, K4, M(75) );
408
0
      R( e, a, b, c, d, F4, K4, M(76) );
409
0
      R( d, e, a, b, c, F4, K4, M(77) );
410
0
      R( c, d, e, a, b, F4, K4, M(78) );
411
0
      R( b, c, d, e, a, F4, K4, M(79) );
412
413
0
      a = ctx->A += a;
414
0
      b = ctx->B += b;
415
0
      c = ctx->C += c;
416
0
      d = ctx->D += d;
417
0
      e = ctx->E += e;
418
0
    }
419
0
}
420
421
#if defined(HAVE_X86_SHA1_HW_SUPPORT)
422
/* HW specific version of sha1_process_bytes.  */
423
424
static void sha1_hw_process_block (const void *, size_t, struct sha1_ctx *);
425
426
static void
427
sha1_hw_process_bytes (const void *buffer, size_t len, struct sha1_ctx *ctx)
428
0
{
429
  /* When we already have some bits in our internal buffer concatenate
430
     both inputs first.  */
431
0
  if (ctx->buflen != 0)
432
0
    {
433
0
      size_t left_over = ctx->buflen;
434
0
      size_t add = 128 - left_over > len ? len : 128 - left_over;
435
436
0
      memcpy (&((char *) ctx->buffer)[left_over], buffer, add);
437
0
      ctx->buflen += add;
438
439
0
      if (ctx->buflen > 64)
440
0
  {
441
0
    sha1_hw_process_block (ctx->buffer, ctx->buflen & ~63, ctx);
442
443
0
    ctx->buflen &= 63;
444
    /* The regions in the following copy operation cannot overlap.  */
445
0
    memcpy (ctx->buffer,
446
0
      &((char *) ctx->buffer)[(left_over + add) & ~63],
447
0
      ctx->buflen);
448
0
  }
449
450
0
      buffer = (const char *) buffer + add;
451
0
      len -= add;
452
0
    }
453
454
  /* Process available complete blocks.  */
455
0
  if (len >= 64)
456
0
    {
457
0
#if !_STRING_ARCH_unaligned
458
0
# define alignof(type) offsetof (struct { char c; type x; }, x)
459
0
# define UNALIGNED_P(p) (((size_t) p) % alignof (sha1_uint32) != 0)
460
0
      if (UNALIGNED_P (buffer))
461
0
  while (len > 64)
462
0
    {
463
0
      sha1_hw_process_block (memcpy (ctx->buffer, buffer, 64), 64, ctx);
464
0
      buffer = (const char *) buffer + 64;
465
0
      len -= 64;
466
0
    }
467
0
      else
468
0
#endif
469
0
  {
470
0
    sha1_hw_process_block (buffer, len & ~63, ctx);
471
0
    buffer = (const char *) buffer + (len & ~63);
472
0
    len &= 63;
473
0
  }
474
0
    }
475
476
  /* Move remaining bytes in internal buffer.  */
477
0
  if (len > 0)
478
0
    {
479
0
      size_t left_over = ctx->buflen;
480
481
0
      memcpy (&((char *) ctx->buffer)[left_over], buffer, len);
482
0
      left_over += len;
483
0
      if (left_over >= 64)
484
0
  {
485
0
    sha1_hw_process_block (ctx->buffer, 64, ctx);
486
0
    left_over -= 64;
487
0
    memmove (ctx->buffer, &ctx->buffer[16], left_over);
488
0
  }
489
0
      ctx->buflen = left_over;
490
0
    }
491
0
}
492
493
/* Process LEN bytes of BUFFER, accumulating context into CTX.
494
   Using CPU specific intrinsics.  */
495
496
#ifdef HAVE_X86_SHA1_HW_SUPPORT
497
__attribute__((__target__ ("sse4.1,sha")))
498
#endif
499
static void
500
sha1_hw_process_block (const void *buffer, size_t len, struct sha1_ctx *ctx)
501
0
{
502
0
#ifdef HAVE_X86_SHA1_HW_SUPPORT
503
  /* Implemented from
504
     https://www.intel.com/content/www/us/en/developer/articles/technical/intel-sha-extensions.html  */
505
0
  const __m128i *words = (const __m128i *) buffer;
506
0
  const __m128i *endp = (const __m128i *) ((const char *) buffer + len);
507
0
  __m128i abcd, abcd_save, e0, e0_save, e1, msg0, msg1, msg2, msg3;
508
0
  const __m128i shuf_mask
509
0
    = _mm_set_epi64x (0x0001020304050607ULL, 0x08090a0b0c0d0e0fULL);
510
0
  char check[((offsetof (struct sha1_ctx, B)
511
0
       == offsetof (struct sha1_ctx, A) + sizeof (ctx->A))
512
0
       && (offsetof (struct sha1_ctx, C)
513
0
           == offsetof (struct sha1_ctx, A) + 2 * sizeof (ctx->A))
514
0
       && (offsetof (struct sha1_ctx, D)
515
0
           == offsetof (struct sha1_ctx, A) + 3 * sizeof (ctx->A)))
516
0
      ? 1 : -1];
517
518
  /* First increment the byte count.  RFC 1321 specifies the possible
519
     length of the file up to 2^64 bits.  Here we only compute the
520
     number of bytes.  Do a double word increment.  */
521
0
  ctx->total[0] += len;
522
0
  ctx->total[1] += ((len >> 31) >> 1) + (ctx->total[0] < len);
523
524
0
  (void) &check[0];
525
0
  abcd = _mm_loadu_si128 ((const __m128i *) &ctx->A);
526
0
  e0 = _mm_set_epi32 (ctx->E, 0, 0, 0);
527
0
  abcd = _mm_shuffle_epi32 (abcd, 0x1b); /* 0, 1, 2, 3 */
528
529
0
  while (words < endp)
530
0
    {
531
0
      abcd_save = abcd;
532
0
      e0_save = e0;
533
534
      /* 0..3 */
535
0
      msg0 = _mm_loadu_si128 (words);
536
0
      msg0 = _mm_shuffle_epi8 (msg0, shuf_mask);
537
0
      e0 = _mm_add_epi32 (e0, msg0);
538
0
      e1 = abcd;
539
0
      abcd = _mm_sha1rnds4_epu32 (abcd, e0, 0);
540
541
      /* 4..7 */
542
0
      msg1 = _mm_loadu_si128 (words + 1);
543
0
      msg1 = _mm_shuffle_epi8 (msg1, shuf_mask);
544
0
      e1 = _mm_sha1nexte_epu32 (e1, msg1);
545
0
      e0 = abcd;
546
0
      abcd = _mm_sha1rnds4_epu32 (abcd, e1, 0);
547
0
      msg0 = _mm_sha1msg1_epu32 (msg0, msg1);
548
549
      /* 8..11 */
550
0
      msg2 = _mm_loadu_si128 (words + 2);
551
0
      msg2 = _mm_shuffle_epi8 (msg2, shuf_mask);
552
0
      e0 = _mm_sha1nexte_epu32 (e0, msg2);
553
0
      e1 = abcd;
554
0
      abcd = _mm_sha1rnds4_epu32 (abcd, e0, 0);
555
0
      msg1 = _mm_sha1msg1_epu32 (msg1, msg2);
556
0
      msg0 = _mm_xor_si128 (msg0, msg2);
557
558
      /* 12..15 */
559
0
      msg3 = _mm_loadu_si128 (words + 3);
560
0
      msg3 = _mm_shuffle_epi8 (msg3, shuf_mask);
561
0
      e1 = _mm_sha1nexte_epu32 (e1, msg3);
562
0
      e0 = abcd;
563
0
      msg0 = _mm_sha1msg2_epu32 (msg0, msg3);
564
0
      abcd = _mm_sha1rnds4_epu32 (abcd, e1, 0);
565
0
      msg2 = _mm_sha1msg1_epu32 (msg2, msg3);
566
0
      msg1 = _mm_xor_si128 (msg1, msg3);
567
568
      /* 16..19 */
569
0
      e0 = _mm_sha1nexte_epu32 (e0, msg0);
570
0
      e1 = abcd;
571
0
      msg1 = _mm_sha1msg2_epu32 (msg1, msg0);
572
0
      abcd = _mm_sha1rnds4_epu32 (abcd, e0, 0);
573
0
      msg3 = _mm_sha1msg1_epu32 (msg3, msg0);
574
0
      msg2 = _mm_xor_si128 (msg2, msg0);
575
576
      /* 20..23 */
577
0
      e1 = _mm_sha1nexte_epu32 (e1, msg1);
578
0
      e0 = abcd;
579
0
      msg2 = _mm_sha1msg2_epu32 (msg2, msg1);
580
0
      abcd = _mm_sha1rnds4_epu32 (abcd, e1, 1);
581
0
      msg0 = _mm_sha1msg1_epu32 (msg0, msg1);
582
0
      msg3 = _mm_xor_si128 (msg3, msg1);
583
584
      /* 24..27 */
585
0
      e0 = _mm_sha1nexte_epu32 (e0, msg2);
586
0
      e1 = abcd;
587
0
      msg3 = _mm_sha1msg2_epu32 (msg3, msg2);
588
0
      abcd = _mm_sha1rnds4_epu32 (abcd, e0, 1);
589
0
      msg1 = _mm_sha1msg1_epu32 (msg1, msg2);
590
0
      msg0 = _mm_xor_si128 (msg0, msg2);
591
592
      /* 28..31 */
593
0
      e1 = _mm_sha1nexte_epu32 (e1, msg3);
594
0
      e0 = abcd;
595
0
      msg0 = _mm_sha1msg2_epu32 (msg0, msg3);
596
0
      abcd = _mm_sha1rnds4_epu32 (abcd, e1, 1);
597
0
      msg2 = _mm_sha1msg1_epu32 (msg2, msg3);
598
0
      msg1 = _mm_xor_si128 (msg1, msg3);
599
600
      /* 32..35 */
601
0
      e0 = _mm_sha1nexte_epu32 (e0, msg0);
602
0
      e1 = abcd;
603
0
      msg1 = _mm_sha1msg2_epu32 (msg1, msg0);
604
0
      abcd = _mm_sha1rnds4_epu32 (abcd, e0, 1);
605
0
      msg3 = _mm_sha1msg1_epu32 (msg3, msg0);
606
0
      msg2 = _mm_xor_si128 (msg2, msg0);
607
608
      /* 36..39 */
609
0
      e1 = _mm_sha1nexte_epu32 (e1, msg1);
610
0
      e0 = abcd;
611
0
      msg2 = _mm_sha1msg2_epu32 (msg2, msg1);
612
0
      abcd = _mm_sha1rnds4_epu32 (abcd, e1, 1);
613
0
      msg0 = _mm_sha1msg1_epu32 (msg0, msg1);
614
0
      msg3 = _mm_xor_si128 (msg3, msg1);
615
616
      /* 40..43 */
617
0
      e0 = _mm_sha1nexte_epu32 (e0, msg2);
618
0
      e1 = abcd;
619
0
      msg3 = _mm_sha1msg2_epu32 (msg3, msg2);
620
0
      abcd = _mm_sha1rnds4_epu32 (abcd, e0, 2);
621
0
      msg1 = _mm_sha1msg1_epu32 (msg1, msg2);
622
0
      msg0 = _mm_xor_si128 (msg0, msg2);
623
624
      /* 44..47 */
625
0
      e1 = _mm_sha1nexte_epu32 (e1, msg3);
626
0
      e0 = abcd;
627
0
      msg0 = _mm_sha1msg2_epu32 (msg0, msg3);
628
0
      abcd = _mm_sha1rnds4_epu32 (abcd, e1, 2);
629
0
      msg2 = _mm_sha1msg1_epu32 (msg2, msg3);
630
0
      msg1 = _mm_xor_si128 (msg1, msg3);
631
632
      /* 48..51 */
633
0
      e0 = _mm_sha1nexte_epu32 (e0, msg0);
634
0
      e1 = abcd;
635
0
      msg1 = _mm_sha1msg2_epu32 (msg1, msg0);
636
0
      abcd = _mm_sha1rnds4_epu32 (abcd, e0, 2);
637
0
      msg3 = _mm_sha1msg1_epu32 (msg3, msg0);
638
0
      msg2 = _mm_xor_si128 (msg2, msg0);
639
640
      /* 52..55 */
641
0
      e1 = _mm_sha1nexte_epu32 (e1, msg1);
642
0
      e0 = abcd;
643
0
      msg2 = _mm_sha1msg2_epu32 (msg2, msg1);
644
0
      abcd = _mm_sha1rnds4_epu32 (abcd, e1, 2);
645
0
      msg0 = _mm_sha1msg1_epu32 (msg0, msg1);
646
0
      msg3 = _mm_xor_si128 (msg3, msg1);
647
648
      /* 56..59 */
649
0
      e0 = _mm_sha1nexte_epu32 (e0, msg2);
650
0
      e1 = abcd;
651
0
      msg3 = _mm_sha1msg2_epu32 (msg3, msg2);
652
0
      abcd = _mm_sha1rnds4_epu32 (abcd, e0, 2);
653
0
      msg1 = _mm_sha1msg1_epu32 (msg1, msg2);
654
0
      msg0 = _mm_xor_si128 (msg0, msg2);
655
656
      /* 60..63 */
657
0
      e1 = _mm_sha1nexte_epu32 (e1, msg3);
658
0
      e0 = abcd;
659
0
      msg0 = _mm_sha1msg2_epu32 (msg0, msg3);
660
0
      abcd = _mm_sha1rnds4_epu32 (abcd, e1, 3);
661
0
      msg2 = _mm_sha1msg1_epu32 (msg2, msg3);
662
0
      msg1 = _mm_xor_si128 (msg1, msg3);
663
664
      /* 64..67 */
665
0
      e0 = _mm_sha1nexte_epu32 (e0, msg0);
666
0
      e1 = abcd;
667
0
      msg1 = _mm_sha1msg2_epu32 (msg1, msg0);
668
0
      abcd = _mm_sha1rnds4_epu32 (abcd, e0, 3);
669
0
      msg3 = _mm_sha1msg1_epu32 (msg3, msg0);
670
0
      msg2 = _mm_xor_si128 (msg2, msg0);
671
672
      /* 68..71 */
673
0
      e1 = _mm_sha1nexte_epu32 (e1, msg1);
674
0
      e0 = abcd;
675
0
      msg2 = _mm_sha1msg2_epu32 (msg2, msg1);
676
0
      abcd = _mm_sha1rnds4_epu32 (abcd, e1, 3);
677
0
      msg3 = _mm_xor_si128 (msg3, msg1);
678
679
      /* 72..75 */
680
0
      e0 = _mm_sha1nexte_epu32 (e0, msg2);
681
0
      e1 = abcd;
682
0
      msg3 = _mm_sha1msg2_epu32 (msg3, msg2);
683
0
      abcd = _mm_sha1rnds4_epu32 (abcd, e0, 3);
684
685
      /* 76..79 */
686
0
      e1 = _mm_sha1nexte_epu32 (e1, msg3);
687
0
      e0 = abcd;
688
0
      abcd = _mm_sha1rnds4_epu32 (abcd, e1, 3);
689
690
      /* Finalize. */
691
0
      e0 = _mm_sha1nexte_epu32 (e0, e0_save);
692
0
      abcd = _mm_add_epi32 (abcd, abcd_save);
693
694
0
      words = words + 4;
695
0
    }
696
697
0
  abcd = _mm_shuffle_epi32 (abcd, 0x1b); /* 0, 1, 2, 3 */
698
0
  _mm_storeu_si128 ((__m128i *) &ctx->A, abcd);
699
0
  ctx->E = _mm_extract_epi32 (e0, 3);
700
0
#endif
701
0
}
702
#endif
703
704
/* Return sha1_process_bytes or some hardware optimized version thereof
705
   depending on current CPU.  */
706
707
sha1_process_bytes_fn
708
sha1_choose_process_bytes (void)
709
0
{
710
0
#ifdef HAVE_X86_SHA1_HW_SUPPORT
711
0
  unsigned int eax, ebx, ecx, edx;
712
0
  if (__get_cpuid_count (7, 0, &eax, &ebx, &ecx, &edx)
713
0
      && (ebx & bit_SHA) != 0
714
0
      && __get_cpuid (1, &eax, &ebx, &ecx, &edx)
715
0
      && (ecx & bit_SSE4_1) != 0)
716
0
    return sha1_hw_process_bytes;
717
0
#endif
718
0
  return sha1_process_bytes;
719
0
}