/src/mozilla-central/mfbt/SHA1.cpp

Source (jump to first uncovered line)
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#include "mozilla/Assertions.h"
#include "mozilla/EndianUtils.h"
#include "mozilla/SHA1.h"

#include <string.h>

using mozilla::NativeEndian;
using mozilla::SHA1Sum;

static inline uint32_t
SHA_ROTL(uint32_t aT, uint32_t aN)
{
  MOZ_ASSERT(aN < 32);
  return (aT << aN) | (aT >> (32 - aN));
}

static void
shaCompress(volatile unsigned* aX, const uint32_t* aBuf);

#define SHA_F1(X, Y, Z) ((((Y) ^ (Z)) & (X)) ^ (Z))
#define SHA_F2(X, Y, Z) ((X) ^ (Y) ^ (Z))
#define SHA_F3(X, Y, Z) (((X) & (Y)) | ((Z) & ((X) | (Y))))
#define SHA_F4(X, Y, Z) ((X) ^ (Y) ^ (Z))

#define SHA_MIX(n, a, b, c)    XW(n) = SHA_ROTL(XW(a) ^ XW(b) ^ XW(c) ^XW(n), 1)

SHA1Sum::SHA1Sum()
  : mSize(0), mDone(false)
{
  // Initialize H with constants from FIPS180-1.
  mH[0] = 0x67452301L;
  mH[1] = 0xefcdab89L;
  mH[2] = 0x98badcfeL;
  mH[3] = 0x10325476L;
  mH[4] = 0xc3d2e1f0L;
}

/*
 * Explanation of H array and index values:
 *
 * The context's H array is actually the concatenation of two arrays
 * defined by SHA1, the H array of state variables (5 elements),
 * and the W array of intermediate values, of which there are 16 elements.
 * The W array starts at H[5], that is W[0] is H[5].
 * Although these values are defined as 32-bit values, we use 64-bit
 * variables to hold them because the AMD64 stores 64 bit values in
 * memory MUCH faster than it stores any smaller values.
 *
 * Rather than passing the context structure to shaCompress, we pass
 * this combined array of H and W values.  We do not pass the address
 * of the first element of this array, but rather pass the address of an
 * element in the middle of the array, element X.  Presently X[0] is H[11].
 * So we pass the address of H[11] as the address of array X to shaCompress.
 * Then shaCompress accesses the members of the array using positive AND
 * negative indexes.
 *
 * Pictorially: (each element is 8 bytes)
 * H | H0 H1 H2 H3 H4 W0 W1 W2 W3 W4 W5 W6 W7 W8 W9 Wa Wb Wc Wd We Wf |
 * X |-11-10 -9 -8 -7 -6 -5 -4 -3 -2 -1 X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 |
 *
 * The byte offset from X[0] to any member of H and W is always
 * representable in a signed 8-bit value, which will be encoded
 * as a single byte offset in the X86-64 instruction set.
 * If we didn't pass the address of H[11], and instead passed the
 * address of H[0], the offsets to elements H[16] and above would be
 * greater than 127, not representable in a signed 8-bit value, and the
 * x86-64 instruction set would encode every such offset as a 32-bit
 * signed number in each instruction that accessed element H[16] or
 * higher.  This results in much bigger and slower code.
 */
#define H2X 11 /* X[0] is H[11], and H[0] is X[-11] */
#define W2X  6 /* X[0] is W[6],  and W[0] is X[-6]  */

/*
 *  SHA: Add data to context.
 */
void
SHA1Sum::update(const void* aData, uint32_t aLen)
{
  MOZ_ASSERT(!mDone, "SHA1Sum can only be used to compute a single hash.");

  const uint8_t* data = static_cast<const uint8_t*>(aData);

  if (aLen == 0) {
    return;
  }

  /* Accumulate the byte count. */
  unsigned int lenB = static_cast<unsigned int>(mSize) & 63U;

  mSize += aLen;

  /* Read the data into W and process blocks as they get full. */
  unsigned int togo;
  if (lenB > 0) {
    togo = 64U - lenB;
    if (aLen < togo) {
      togo = aLen;
    }
    memcpy(mU.mB + lenB, data, togo);
    aLen -= togo;
    data += togo;
    lenB = (lenB + togo) & 63U;
    if (!lenB) {
      shaCompress(&mH[H2X], mU.mW);
    }
  }

  while (aLen >= 64U) {
    aLen -= 64U;
    shaCompress(&mH[H2X], reinterpret_cast<const uint32_t*>(data));
    data += 64U;
  }

  if (aLen > 0) {
    memcpy(mU.mB, data, aLen);
  }
}


/*
 *  SHA: Generate hash value
 */
void
SHA1Sum::finish(SHA1Sum::Hash& aHashOut)
{
  MOZ_ASSERT(!mDone, "SHA1Sum can only be used to compute a single hash.");

  uint64_t size = mSize;
  uint32_t lenB = uint32_t(size) & 63;

  static const uint8_t bulk_pad[64] =
    { 0x80,0,0,0,0,0,0,0,0,0,
      0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
      0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 };

  /* Pad with a binary 1 (e.g. 0x80), then zeroes, then length in bits. */
  update(bulk_pad, (((55 + 64) - lenB) & 63) + 1);
  MOZ_ASSERT((uint32_t(mSize) & 63) == 56);

  /* Convert size from bytes to bits. */
  size <<= 3;
  mU.mW[14] = NativeEndian::swapToBigEndian(uint32_t(size >> 32));
  mU.mW[15] = NativeEndian::swapToBigEndian(uint32_t(size));
  shaCompress(&mH[H2X], mU.mW);

  /* Output hash. */
  mU.mW[0] = NativeEndian::swapToBigEndian(mH[0]);
  mU.mW[1] = NativeEndian::swapToBigEndian(mH[1]);
  mU.mW[2] = NativeEndian::swapToBigEndian(mH[2]);
  mU.mW[3] = NativeEndian::swapToBigEndian(mH[3]);
  mU.mW[4] = NativeEndian::swapToBigEndian(mH[4]);
  memcpy(aHashOut, mU.mW, 20);
  mDone = true;
}

/*
 *  SHA: Compression function, unrolled.
 *
 * Some operations in shaCompress are done as 5 groups of 16 operations.
 * Others are done as 4 groups of 20 operations.
 * The code below shows that structure.
 *
 * The functions that compute the new values of the 5 state variables
 * A-E are done in 4 groups of 20 operations (or you may also think
 * of them as being done in 16 groups of 5 operations).  They are
 * done by the SHA_RNDx macros below, in the right column.
 *
 * The functions that set the 16 values of the W array are done in
 * 5 groups of 16 operations.  The first group is done by the
 * LOAD macros below, the latter 4 groups are done by SHA_MIX below,
 * in the left column.
 *
 * gcc's optimizer observes that each member of the W array is assigned
 * a value 5 times in this code.  It reduces the number of store
 * operations done to the W array in the context (that is, in the X array)
 * by creating a W array on the stack, and storing the W values there for
 * the first 4 groups of operations on W, and storing the values in the
 * context's W array only in the fifth group.  This is undesirable.
 * It is MUCH bigger code than simply using the context's W array, because
 * all the offsets to the W array in the stack are 32-bit signed offsets,
 * and it is no faster than storing the values in the context's W array.
 *
 * The original code for sha_fast.c prevented this creation of a separate
 * W array in the stack by creating a W array of 80 members, each of
 * whose elements is assigned only once. It also separated the computations
 * of the W array values and the computations of the values for the 5
 * state variables into two separate passes, W's, then A-E's so that the
 * second pass could be done all in registers (except for accessing the W
 * array) on machines with fewer registers.  The method is suboptimal
 * for machines with enough registers to do it all in one pass, and it
 * necessitates using many instructions with 32-bit offsets.
 *
 * This code eliminates the separate W array on the stack by a completely
 * different means: by declaring the X array volatile.  This prevents
 * the optimizer from trying to reduce the use of the X array by the
 * creation of a MORE expensive W array on the stack. The result is
 * that all instructions use signed 8-bit offsets and not 32-bit offsets.
 *
 * The combination of this code and the -O3 optimizer flag on GCC 3.4.3
 * results in code that is 3 times faster than the previous NSS sha_fast
 * code on AMD64.
 */
static void
shaCompress(volatile unsigned* aX, const uint32_t* aBuf)
{
  unsigned A, B, C, D, E;

#define XH(n) aX[n - H2X]
#define XW(n) aX[n - W2X]

#define K0 0x5a827999L
#define K1 0x6ed9eba1L
#define K2 0x8f1bbcdcL
#define K3 0xca62c1d6L

#define SHA_RND1(a, b, c, d, e, n) \
  a = SHA_ROTL(b, 5) + SHA_F1(c, d, e) + a + XW(n) + K0; c = SHA_ROTL(c, 30)
#define SHA_RND2(a, b, c, d, e, n) \
  a = SHA_ROTL(b, 5) + SHA_F2(c, d, e) + a + XW(n) + K1; c = SHA_ROTL(c, 30)
#define SHA_RND3(a, b, c, d, e, n) \
  a = SHA_ROTL(b, 5) + SHA_F3(c, d, e) + a + XW(n) + K2; c = SHA_ROTL(c, 30)
#define SHA_RND4(a, b, c, d, e, n) \
  a = SHA_ROTL(b ,5) + SHA_F4(c, d, e) + a + XW(n) + K3; c = SHA_ROTL(c, 30)

#define LOAD(n) XW(n) = NativeEndian::swapToBigEndian(aBuf[n])

  A = XH(0);
  B = XH(1);
  C = XH(2);
  D = XH(3);
  E = XH(4);

  LOAD(0);       SHA_RND1(E,A,B,C,D, 0);
  LOAD(1);       SHA_RND1(D,E,A,B,C, 1);
  LOAD(2);       SHA_RND1(C,D,E,A,B, 2);
  LOAD(3);       SHA_RND1(B,C,D,E,A, 3);
  LOAD(4);       SHA_RND1(A,B,C,D,E, 4);
  LOAD(5);       SHA_RND1(E,A,B,C,D, 5);
  LOAD(6);       SHA_RND1(D,E,A,B,C, 6);
  LOAD(7);       SHA_RND1(C,D,E,A,B, 7);
  LOAD(8);       SHA_RND1(B,C,D,E,A, 8);
  LOAD(9);       SHA_RND1(A,B,C,D,E, 9);
  LOAD(10);      SHA_RND1(E,A,B,C,D,10);
  LOAD(11);      SHA_RND1(D,E,A,B,C,11);
  LOAD(12);      SHA_RND1(C,D,E,A,B,12);
  LOAD(13);      SHA_RND1(B,C,D,E,A,13);
  LOAD(14);      SHA_RND1(A,B,C,D,E,14);
  LOAD(15);      SHA_RND1(E,A,B,C,D,15);

  SHA_MIX( 0, 13,  8,  2); SHA_RND1(D,E,A,B,C, 0);
  SHA_MIX( 1, 14,  9,  3); SHA_RND1(C,D,E,A,B, 1);
  SHA_MIX( 2, 15, 10,  4); SHA_RND1(B,C,D,E,A, 2);
  SHA_MIX( 3,  0, 11,  5); SHA_RND1(A,B,C,D,E, 3);

  SHA_MIX( 4,  1, 12,  6); SHA_RND2(E,A,B,C,D, 4);
  SHA_MIX( 5,  2, 13,  7); SHA_RND2(D,E,A,B,C, 5);
  SHA_MIX( 6,  3, 14,  8); SHA_RND2(C,D,E,A,B, 6);
  SHA_MIX( 7,  4, 15,  9); SHA_RND2(B,C,D,E,A, 7);
  SHA_MIX( 8,  5,  0, 10); SHA_RND2(A,B,C,D,E, 8);
  SHA_MIX( 9,  6,  1, 11); SHA_RND2(E,A,B,C,D, 9);
  SHA_MIX(10,  7,  2, 12); SHA_RND2(D,E,A,B,C,10);
  SHA_MIX(11,  8,  3, 13); SHA_RND2(C,D,E,A,B,11);
  SHA_MIX(12,  9,  4, 14); SHA_RND2(B,C,D,E,A,12);
  SHA_MIX(13, 10,  5, 15); SHA_RND2(A,B,C,D,E,13);
  SHA_MIX(14, 11,  6,  0); SHA_RND2(E,A,B,C,D,14);
  SHA_MIX(15, 12,  7,  1); SHA_RND2(D,E,A,B,C,15);

  SHA_MIX( 0, 13,  8,  2); SHA_RND2(C,D,E,A,B, 0);
  SHA_MIX( 1, 14,  9,  3); SHA_RND2(B,C,D,E,A, 1);
  SHA_MIX( 2, 15, 10,  4); SHA_RND2(A,B,C,D,E, 2);
  SHA_MIX( 3,  0, 11,  5); SHA_RND2(E,A,B,C,D, 3);
  SHA_MIX( 4,  1, 12,  6); SHA_RND2(D,E,A,B,C, 4);
  SHA_MIX( 5,  2, 13,  7); SHA_RND2(C,D,E,A,B, 5);
  SHA_MIX( 6,  3, 14,  8); SHA_RND2(B,C,D,E,A, 6);
  SHA_MIX( 7,  4, 15,  9); SHA_RND2(A,B,C,D,E, 7);

  SHA_MIX( 8,  5,  0, 10); SHA_RND3(E,A,B,C,D, 8);
  SHA_MIX( 9,  6,  1, 11); SHA_RND3(D,E,A,B,C, 9);
  SHA_MIX(10,  7,  2, 12); SHA_RND3(C,D,E,A,B,10);
  SHA_MIX(11,  8,  3, 13); SHA_RND3(B,C,D,E,A,11);
  SHA_MIX(12,  9,  4, 14); SHA_RND3(A,B,C,D,E,12);
  SHA_MIX(13, 10,  5, 15); SHA_RND3(E,A,B,C,D,13);
  SHA_MIX(14, 11,  6,  0); SHA_RND3(D,E,A,B,C,14);
  SHA_MIX(15, 12,  7,  1); SHA_RND3(C,D,E,A,B,15);

  SHA_MIX( 0, 13,  8,  2); SHA_RND3(B,C,D,E,A, 0);
  SHA_MIX( 1, 14,  9,  3); SHA_RND3(A,B,C,D,E, 1);
  SHA_MIX( 2, 15, 10,  4); SHA_RND3(E,A,B,C,D, 2);
  SHA_MIX( 3,  0, 11,  5); SHA_RND3(D,E,A,B,C, 3);
  SHA_MIX( 4,  1, 12,  6); SHA_RND3(C,D,E,A,B, 4);
  SHA_MIX( 5,  2, 13,  7); SHA_RND3(B,C,D,E,A, 5);
  SHA_MIX( 6,  3, 14,  8); SHA_RND3(A,B,C,D,E, 6);
  SHA_MIX( 7,  4, 15,  9); SHA_RND3(E,A,B,C,D, 7);
  SHA_MIX( 8,  5,  0, 10); SHA_RND3(D,E,A,B,C, 8);
  SHA_MIX( 9,  6,  1, 11); SHA_RND3(C,D,E,A,B, 9);
  SHA_MIX(10,  7,  2, 12); SHA_RND3(B,C,D,E,A,10);
  SHA_MIX(11,  8,  3, 13); SHA_RND3(A,B,C,D,E,11);

  SHA_MIX(12,  9,  4, 14); SHA_RND4(E,A,B,C,D,12);
  SHA_MIX(13, 10,  5, 15); SHA_RND4(D,E,A,B,C,13);
  SHA_MIX(14, 11,  6,  0); SHA_RND4(C,D,E,A,B,14);
  SHA_MIX(15, 12,  7,  1); SHA_RND4(B,C,D,E,A,15);

  SHA_MIX( 0, 13,  8,  2); SHA_RND4(A,B,C,D,E, 0);
  SHA_MIX( 1, 14,  9,  3); SHA_RND4(E,A,B,C,D, 1);
  SHA_MIX( 2, 15, 10,  4); SHA_RND4(D,E,A,B,C, 2);
  SHA_MIX( 3,  0, 11,  5); SHA_RND4(C,D,E,A,B, 3);
  SHA_MIX( 4,  1, 12,  6); SHA_RND4(B,C,D,E,A, 4);
  SHA_MIX( 5,  2, 13,  7); SHA_RND4(A,B,C,D,E, 5);
  SHA_MIX( 6,  3, 14,  8); SHA_RND4(E,A,B,C,D, 6);
  SHA_MIX( 7,  4, 15,  9); SHA_RND4(D,E,A,B,C, 7);
  SHA_MIX( 8,  5,  0, 10); SHA_RND4(C,D,E,A,B, 8);
  SHA_MIX( 9,  6,  1, 11); SHA_RND4(B,C,D,E,A, 9);
  SHA_MIX(10,  7,  2, 12); SHA_RND4(A,B,C,D,E,10);
  SHA_MIX(11,  8,  3, 13); SHA_RND4(E,A,B,C,D,11);
  SHA_MIX(12,  9,  4, 14); SHA_RND4(D,E,A,B,C,12);
  SHA_MIX(13, 10,  5, 15); SHA_RND4(C,D,E,A,B,13);
  SHA_MIX(14, 11,  6,  0); SHA_RND4(B,C,D,E,A,14);
  SHA_MIX(15, 12,  7,  1); SHA_RND4(A,B,C,D,E,15);

  XH(0) += A;
  XH(1) += B;
  XH(2) += C;
  XH(3) += D;
  XH(4) += E;
}

Coverage Report

Created: 2018-09-25 14:53

Line	Count	Source (jump to first uncovered line)
1		/* -- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -- */
2		/* vim: set ts=8 sts=2 et sw=2 tw=80: */
3		/* This Source Code Form is subject to the terms of the Mozilla Public
4		* License, v. 2.0. If a copy of the MPL was not distributed with this
5		* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7		#include "mozilla/Assertions.h"
8		#include "mozilla/EndianUtils.h"
9		#include "mozilla/SHA1.h"
10
11		#include <string.h>
12
13		using mozilla::NativeEndian;
14		using mozilla::SHA1Sum;
15
16		static inline uint32_t
17		SHA_ROTL(uint32_t aT, uint32_t aN)
18	0	{
19	0	MOZ_ASSERT(aN < 32);
20	0	return (aT << aN) \| (aT >> (32 - aN));
21	0	}
22
23		static void
24		shaCompress(volatile unsigned* aX, const uint32_t* aBuf);
25
26	0	#define SHA_F1(X, Y, Z) ((((Y) ^ (Z)) & (X)) ^ (Z))
27	0	#define SHA_F2(X, Y, Z) ((X) ^ (Y) ^ (Z))
28	0	#define SHA_F3(X, Y, Z) (((X) & (Y)) \| ((Z) & ((X) \| (Y))))
29	0	#define SHA_F4(X, Y, Z) ((X) ^ (Y) ^ (Z))
30
31	0	#define SHA_MIX(n, a, b, c) XW(n) = SHA_ROTL(XW(a) ^ XW(b) ^ XW(c) ^XW(n), 1)
32
33		SHA1Sum::SHA1Sum()
34		: mSize(0), mDone(false)
35	0	{
36	0	// Initialize H with constants from FIPS180-1.
37	0	mH[0] = 0x67452301L;
38	0	mH[1] = 0xefcdab89L;
39	0	mH[2] = 0x98badcfeL;
40	0	mH[3] = 0x10325476L;
41	0	mH[4] = 0xc3d2e1f0L;
42	0	}
43
44		/*
45		* Explanation of H array and index values:
46		*
47		* The context's H array is actually the concatenation of two arrays
48		* defined by SHA1, the H array of state variables (5 elements),
49		* and the W array of intermediate values, of which there are 16 elements.
50		* The W array starts at H[5], that is W[0] is H[5].
51		* Although these values are defined as 32-bit values, we use 64-bit
52		* variables to hold them because the AMD64 stores 64 bit values in
53		* memory MUCH faster than it stores any smaller values.
54		*
55		* Rather than passing the context structure to shaCompress, we pass
56		* this combined array of H and W values. We do not pass the address
57		* of the first element of this array, but rather pass the address of an
58		* element in the middle of the array, element X. Presently X[0] is H[11].
59		* So we pass the address of H[11] as the address of array X to shaCompress.
60		* Then shaCompress accesses the members of the array using positive AND
61		* negative indexes.
62		*
63		* Pictorially: (each element is 8 bytes)
64		* H \| H0 H1 H2 H3 H4 W0 W1 W2 W3 W4 W5 W6 W7 W8 W9 Wa Wb Wc Wd We Wf \|
65		* X \|-11-10 -9 -8 -7 -6 -5 -4 -3 -2 -1 X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 \|
66		*
67		* The byte offset from X[0] to any member of H and W is always
68		* representable in a signed 8-bit value, which will be encoded
69		* as a single byte offset in the X86-64 instruction set.
70		* If we didn't pass the address of H[11], and instead passed the
71		* address of H[0], the offsets to elements H[16] and above would be
72		* greater than 127, not representable in a signed 8-bit value, and the
73		* x86-64 instruction set would encode every such offset as a 32-bit
74		* signed number in each instruction that accessed element H[16] or
75		* higher. This results in much bigger and slower code.
76		*/
77	0	#define H2X 11 /* X[0] is H[11], and H[0] is X[-11] */
78	0	#define W2X 6 /* X[0] is W[6], and W[0] is X[-6] */
79
80		/*
81		* SHA: Add data to context.
82		*/
83		void
84		SHA1Sum::update(const void* aData, uint32_t aLen)
85	0	{
86	0	MOZ_ASSERT(!mDone, "SHA1Sum can only be used to compute a single hash.");
87	0
88	0	const uint8_t* data = static_cast<const uint8_t*>(aData);
89	0
90	0	if (aLen == 0) {
91	0	return;
92	0	}
93	0
94	0	/* Accumulate the byte count. */
95	0	unsigned int lenB = static_cast<unsigned int>(mSize) & 63U;
96	0
97	0	mSize += aLen;
98	0
99	0	/* Read the data into W and process blocks as they get full. */
100	0	unsigned int togo;
101	0	if (lenB > 0) {
102	0	togo = 64U - lenB;
103	0	if (aLen < togo) {
104	0	togo = aLen;
105	0	}
106	0	memcpy(mU.mB + lenB, data, togo);
107	0	aLen -= togo;
108	0	data += togo;
109	0	lenB = (lenB + togo) & 63U;
110	0	if (!lenB) {
111	0	shaCompress(&mH[H2X], mU.mW);
112	0	}
113	0	}
114	0
115	0	while (aLen >= 64U) {
116	0	aLen -= 64U;
117	0	shaCompress(&mH[H2X], reinterpret_cast<const uint32_t*>(data));
118	0	data += 64U;
119	0	}
120	0
121	0	if (aLen > 0) {
122	0	memcpy(mU.mB, data, aLen);
123	0	}
124	0	}
125
126
127		/*
128		* SHA: Generate hash value
129		*/
130		void
131		SHA1Sum::finish(SHA1Sum::Hash& aHashOut)
132	0	{
133	0	MOZ_ASSERT(!mDone, "SHA1Sum can only be used to compute a single hash.");
134	0
135	0	uint64_t size = mSize;
136	0	uint32_t lenB = uint32_t(size) & 63;
137	0
138	0	static const uint8_t bulk_pad[64] =
139	0	{ 0x80,0,0,0,0,0,0,0,0,0,
140	0	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
141	0	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 };
142	0
143	0	/* Pad with a binary 1 (e.g. 0x80), then zeroes, then length in bits. */
144	0	update(bulk_pad, (((55 + 64) - lenB) & 63) + 1);
145	0	MOZ_ASSERT((uint32_t(mSize) & 63) == 56);
146	0
147	0	/* Convert size from bytes to bits. */
148	0	size <<= 3;
149	0	mU.mW[14] = NativeEndian::swapToBigEndian(uint32_t(size >> 32));
150	0	mU.mW[15] = NativeEndian::swapToBigEndian(uint32_t(size));
151	0	shaCompress(&mH[H2X], mU.mW);
152	0
153	0	/* Output hash. */
154	0	mU.mW[0] = NativeEndian::swapToBigEndian(mH[0]);
155	0	mU.mW[1] = NativeEndian::swapToBigEndian(mH[1]);
156	0	mU.mW[2] = NativeEndian::swapToBigEndian(mH[2]);
157	0	mU.mW[3] = NativeEndian::swapToBigEndian(mH[3]);
158	0	mU.mW[4] = NativeEndian::swapToBigEndian(mH[4]);
159	0	memcpy(aHashOut, mU.mW, 20);
160	0	mDone = true;
161	0	}
162
163		/*
164		* SHA: Compression function, unrolled.
165		*
166		* Some operations in shaCompress are done as 5 groups of 16 operations.
167		* Others are done as 4 groups of 20 operations.
168		* The code below shows that structure.
169		*
170		* The functions that compute the new values of the 5 state variables
171		* A-E are done in 4 groups of 20 operations (or you may also think
172		* of them as being done in 16 groups of 5 operations). They are
173		* done by the SHA_RNDx macros below, in the right column.
174		*
175		* The functions that set the 16 values of the W array are done in
176		* 5 groups of 16 operations. The first group is done by the
177		* LOAD macros below, the latter 4 groups are done by SHA_MIX below,
178		* in the left column.
179		*
180		* gcc's optimizer observes that each member of the W array is assigned
181		* a value 5 times in this code. It reduces the number of store
182		* operations done to the W array in the context (that is, in the X array)
183		* by creating a W array on the stack, and storing the W values there for
184		* the first 4 groups of operations on W, and storing the values in the
185		* context's W array only in the fifth group. This is undesirable.
186		* It is MUCH bigger code than simply using the context's W array, because
187		* all the offsets to the W array in the stack are 32-bit signed offsets,
188		* and it is no faster than storing the values in the context's W array.
189		*
190		* The original code for sha_fast.c prevented this creation of a separate
191		* W array in the stack by creating a W array of 80 members, each of
192		* whose elements is assigned only once. It also separated the computations
193		* of the W array values and the computations of the values for the 5
194		* state variables into two separate passes, W's, then A-E's so that the
195		* second pass could be done all in registers (except for accessing the W
196		* array) on machines with fewer registers. The method is suboptimal
197		* for machines with enough registers to do it all in one pass, and it
198		* necessitates using many instructions with 32-bit offsets.
199		*
200		* This code eliminates the separate W array on the stack by a completely
201		* different means: by declaring the X array volatile. This prevents
202		* the optimizer from trying to reduce the use of the X array by the
203		* creation of a MORE expensive W array on the stack. The result is
204		* that all instructions use signed 8-bit offsets and not 32-bit offsets.
205		*
206		* The combination of this code and the -O3 optimizer flag on GCC 3.4.3
207		* results in code that is 3 times faster than the previous NSS sha_fast
208		* code on AMD64.
209		*/
210		static void
211		shaCompress(volatile unsigned* aX, const uint32_t* aBuf)
212	0	{
213	0	unsigned A, B, C, D, E;
214	0
215	0	#define XH(n) aX[n - H2X]
216	0	#define XW(n) aX[n - W2X]
217	0
218	0	#define K0 0x5a827999L
219	0	#define K1 0x6ed9eba1L
220	0	#define K2 0x8f1bbcdcL
221	0	#define K3 0xca62c1d6L
222	0
223	0	#define SHA_RND1(a, b, c, d, e, n) \
224	0	a = SHA_ROTL(b, 5) + SHA_F1(c, d, e) + a + XW(n) + K0; c = SHA_ROTL(c, 30)
225	0	#define SHA_RND2(a, b, c, d, e, n) \
226	0	a = SHA_ROTL(b, 5) + SHA_F2(c, d, e) + a + XW(n) + K1; c = SHA_ROTL(c, 30)
227	0	#define SHA_RND3(a, b, c, d, e, n) \
228	0	a = SHA_ROTL(b, 5) + SHA_F3(c, d, e) + a + XW(n) + K2; c = SHA_ROTL(c, 30)
229	0	#define SHA_RND4(a, b, c, d, e, n) \
230	0	a = SHA_ROTL(b ,5) + SHA_F4(c, d, e) + a + XW(n) + K3; c = SHA_ROTL(c, 30)
231	0
232	0	#define LOAD(n) XW(n) = NativeEndian::swapToBigEndian(aBuf[n])
233	0
234	0	A = XH(0);
235	0	B = XH(1);
236	0	C = XH(2);
237	0	D = XH(3);
238	0	E = XH(4);
239	0
240	0	LOAD(0); SHA_RND1(E,A,B,C,D, 0);
241	0	LOAD(1); SHA_RND1(D,E,A,B,C, 1);
242	0	LOAD(2); SHA_RND1(C,D,E,A,B, 2);
243	0	LOAD(3); SHA_RND1(B,C,D,E,A, 3);
244	0	LOAD(4); SHA_RND1(A,B,C,D,E, 4);
245	0	LOAD(5); SHA_RND1(E,A,B,C,D, 5);
246	0	LOAD(6); SHA_RND1(D,E,A,B,C, 6);
247	0	LOAD(7); SHA_RND1(C,D,E,A,B, 7);
248	0	LOAD(8); SHA_RND1(B,C,D,E,A, 8);
249	0	LOAD(9); SHA_RND1(A,B,C,D,E, 9);
250	0	LOAD(10); SHA_RND1(E,A,B,C,D,10);
251	0	LOAD(11); SHA_RND1(D,E,A,B,C,11);
252	0	LOAD(12); SHA_RND1(C,D,E,A,B,12);
253	0	LOAD(13); SHA_RND1(B,C,D,E,A,13);
254	0	LOAD(14); SHA_RND1(A,B,C,D,E,14);
255	0	LOAD(15); SHA_RND1(E,A,B,C,D,15);
256	0
257	0	SHA_MIX( 0, 13, 8, 2); SHA_RND1(D,E,A,B,C, 0);
258	0	SHA_MIX( 1, 14, 9, 3); SHA_RND1(C,D,E,A,B, 1);
259	0	SHA_MIX( 2, 15, 10, 4); SHA_RND1(B,C,D,E,A, 2);
260	0	SHA_MIX( 3, 0, 11, 5); SHA_RND1(A,B,C,D,E, 3);
261	0
262	0	SHA_MIX( 4, 1, 12, 6); SHA_RND2(E,A,B,C,D, 4);
263	0	SHA_MIX( 5, 2, 13, 7); SHA_RND2(D,E,A,B,C, 5);
264	0	SHA_MIX( 6, 3, 14, 8); SHA_RND2(C,D,E,A,B, 6);
265	0	SHA_MIX( 7, 4, 15, 9); SHA_RND2(B,C,D,E,A, 7);
266	0	SHA_MIX( 8, 5, 0, 10); SHA_RND2(A,B,C,D,E, 8);
267	0	SHA_MIX( 9, 6, 1, 11); SHA_RND2(E,A,B,C,D, 9);
268	0	SHA_MIX(10, 7, 2, 12); SHA_RND2(D,E,A,B,C,10);
269	0	SHA_MIX(11, 8, 3, 13); SHA_RND2(C,D,E,A,B,11);
270	0	SHA_MIX(12, 9, 4, 14); SHA_RND2(B,C,D,E,A,12);
271	0	SHA_MIX(13, 10, 5, 15); SHA_RND2(A,B,C,D,E,13);
272	0	SHA_MIX(14, 11, 6, 0); SHA_RND2(E,A,B,C,D,14);
273	0	SHA_MIX(15, 12, 7, 1); SHA_RND2(D,E,A,B,C,15);
274	0
275	0	SHA_MIX( 0, 13, 8, 2); SHA_RND2(C,D,E,A,B, 0);
276	0	SHA_MIX( 1, 14, 9, 3); SHA_RND2(B,C,D,E,A, 1);
277	0	SHA_MIX( 2, 15, 10, 4); SHA_RND2(A,B,C,D,E, 2);
278	0	SHA_MIX( 3, 0, 11, 5); SHA_RND2(E,A,B,C,D, 3);
279	0	SHA_MIX( 4, 1, 12, 6); SHA_RND2(D,E,A,B,C, 4);
280	0	SHA_MIX( 5, 2, 13, 7); SHA_RND2(C,D,E,A,B, 5);
281	0	SHA_MIX( 6, 3, 14, 8); SHA_RND2(B,C,D,E,A, 6);
282	0	SHA_MIX( 7, 4, 15, 9); SHA_RND2(A,B,C,D,E, 7);
283	0
284	0	SHA_MIX( 8, 5, 0, 10); SHA_RND3(E,A,B,C,D, 8);
285	0	SHA_MIX( 9, 6, 1, 11); SHA_RND3(D,E,A,B,C, 9);
286	0	SHA_MIX(10, 7, 2, 12); SHA_RND3(C,D,E,A,B,10);
287	0	SHA_MIX(11, 8, 3, 13); SHA_RND3(B,C,D,E,A,11);
288	0	SHA_MIX(12, 9, 4, 14); SHA_RND3(A,B,C,D,E,12);
289	0	SHA_MIX(13, 10, 5, 15); SHA_RND3(E,A,B,C,D,13);
290	0	SHA_MIX(14, 11, 6, 0); SHA_RND3(D,E,A,B,C,14);
291	0	SHA_MIX(15, 12, 7, 1); SHA_RND3(C,D,E,A,B,15);
292	0
293	0	SHA_MIX( 0, 13, 8, 2); SHA_RND3(B,C,D,E,A, 0);
294	0	SHA_MIX( 1, 14, 9, 3); SHA_RND3(A,B,C,D,E, 1);
295	0	SHA_MIX( 2, 15, 10, 4); SHA_RND3(E,A,B,C,D, 2);
296	0	SHA_MIX( 3, 0, 11, 5); SHA_RND3(D,E,A,B,C, 3);
297	0	SHA_MIX( 4, 1, 12, 6); SHA_RND3(C,D,E,A,B, 4);
298	0	SHA_MIX( 5, 2, 13, 7); SHA_RND3(B,C,D,E,A, 5);
299	0	SHA_MIX( 6, 3, 14, 8); SHA_RND3(A,B,C,D,E, 6);
300	0	SHA_MIX( 7, 4, 15, 9); SHA_RND3(E,A,B,C,D, 7);
301	0	SHA_MIX( 8, 5, 0, 10); SHA_RND3(D,E,A,B,C, 8);
302	0	SHA_MIX( 9, 6, 1, 11); SHA_RND3(C,D,E,A,B, 9);
303	0	SHA_MIX(10, 7, 2, 12); SHA_RND3(B,C,D,E,A,10);
304	0	SHA_MIX(11, 8, 3, 13); SHA_RND3(A,B,C,D,E,11);
305	0
306	0	SHA_MIX(12, 9, 4, 14); SHA_RND4(E,A,B,C,D,12);
307	0	SHA_MIX(13, 10, 5, 15); SHA_RND4(D,E,A,B,C,13);
308	0	SHA_MIX(14, 11, 6, 0); SHA_RND4(C,D,E,A,B,14);
309	0	SHA_MIX(15, 12, 7, 1); SHA_RND4(B,C,D,E,A,15);
310	0
311	0	SHA_MIX( 0, 13, 8, 2); SHA_RND4(A,B,C,D,E, 0);
312	0	SHA_MIX( 1, 14, 9, 3); SHA_RND4(E,A,B,C,D, 1);
313	0	SHA_MIX( 2, 15, 10, 4); SHA_RND4(D,E,A,B,C, 2);
314	0	SHA_MIX( 3, 0, 11, 5); SHA_RND4(C,D,E,A,B, 3);
315	0	SHA_MIX( 4, 1, 12, 6); SHA_RND4(B,C,D,E,A, 4);
316	0	SHA_MIX( 5, 2, 13, 7); SHA_RND4(A,B,C,D,E, 5);
317	0	SHA_MIX( 6, 3, 14, 8); SHA_RND4(E,A,B,C,D, 6);
318	0	SHA_MIX( 7, 4, 15, 9); SHA_RND4(D,E,A,B,C, 7);
319	0	SHA_MIX( 8, 5, 0, 10); SHA_RND4(C,D,E,A,B, 8);
320	0	SHA_MIX( 9, 6, 1, 11); SHA_RND4(B,C,D,E,A, 9);
321	0	SHA_MIX(10, 7, 2, 12); SHA_RND4(A,B,C,D,E,10);
322	0	SHA_MIX(11, 8, 3, 13); SHA_RND4(E,A,B,C,D,11);
323	0	SHA_MIX(12, 9, 4, 14); SHA_RND4(D,E,A,B,C,12);
324	0	SHA_MIX(13, 10, 5, 15); SHA_RND4(C,D,E,A,B,13);
325	0	SHA_MIX(14, 11, 6, 0); SHA_RND4(B,C,D,E,A,14);
326	0	SHA_MIX(15, 12, 7, 1); SHA_RND4(A,B,C,D,E,15);
327	0
328	0	XH(0) += A;
329	0	XH(1) += B;
330	0	XH(2) += C;
331	0	XH(3) += D;
332	0	XH(4) += E;
333	0	}