/src/nettle-with-mini-gmp/twofish.c

Source (jump to first uncovered line)
/* twofish.c

   The twofish block cipher.

   Copyright (C) 2001, 2014 Niels Möller
   Copyright (C) 1999 Ruud de Rooij <ruud@debian.org>

   Modifications for lsh, integrated testing
   Copyright (C) 1999 J.H.M. Dassen (Ray) <jdassen@wi.LeidenUniv.nl>

   This file is part of GNU Nettle.

   GNU Nettle is free software: you can redistribute it and/or
   modify it under the terms of either:

     * the GNU Lesser General Public License as published by the Free
       Software Foundation; either version 3 of the License, or (at your
       option) any later version.

   or

     * the GNU General Public License as published by the Free
       Software Foundation; either version 2 of the License, or (at your
       option) any later version.

   or both in parallel, as here.

   GNU Nettle is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   General Public License for more details.

   You should have received copies of the GNU General Public License and
   the GNU Lesser General Public License along with this program.  If
   not, see http://www.gnu.org/licenses/.
*/

#if HAVE_CONFIG_H
# include "config.h"
#endif

#include <assert.h>
#include <string.h>

#include "twofish.h"

#include "macros.h"

/* Bitwise rotations on 32-bit words.  These are defined as macros that
 * evaluate their argument twice, so do not apply to any expressions with
 * side effects.
 */

#define rol1(x) (((x) << 1) | (((x) & 0x80000000) >> 31))
#define rol8(x) (((x) << 8) | (((x) & 0xFF000000) >> 24))
#define rol9(x) (((x) << 9) | (((x) & 0xFF800000) >> 23))
#define ror1(x) (((x) >> 1) | (((x) & 0x00000001) << 31))

/* ------------------------------------------------------------------------- */

/* The permutations q0 and q1.  These are fixed permutations on 8-bit values.
 * The permutations have been computed using the program twofish-data,
 * which is distributed along with this file.
 */

static const uint8_t q0[256] = {
  0xA9,0x67,0xB3,0xE8,0x04,0xFD,0xA3,0x76,
  0x9A,0x92,0x80,0x78,0xE4,0xDD,0xD1,0x38,
  0x0D,0xC6,0x35,0x98,0x18,0xF7,0xEC,0x6C,
  0x43,0x75,0x37,0x26,0xFA,0x13,0x94,0x48,
  0xF2,0xD0,0x8B,0x30,0x84,0x54,0xDF,0x23,
  0x19,0x5B,0x3D,0x59,0xF3,0xAE,0xA2,0x82,
  0x63,0x01,0x83,0x2E,0xD9,0x51,0x9B,0x7C,
  0xA6,0xEB,0xA5,0xBE,0x16,0x0C,0xE3,0x61,
  0xC0,0x8C,0x3A,0xF5,0x73,0x2C,0x25,0x0B,
  0xBB,0x4E,0x89,0x6B,0x53,0x6A,0xB4,0xF1,
  0xE1,0xE6,0xBD,0x45,0xE2,0xF4,0xB6,0x66,
  0xCC,0x95,0x03,0x56,0xD4,0x1C,0x1E,0xD7,
  0xFB,0xC3,0x8E,0xB5,0xE9,0xCF,0xBF,0xBA,
  0xEA,0x77,0x39,0xAF,0x33,0xC9,0x62,0x71,
  0x81,0x79,0x09,0xAD,0x24,0xCD,0xF9,0xD8,
  0xE5,0xC5,0xB9,0x4D,0x44,0x08,0x86,0xE7,
  0xA1,0x1D,0xAA,0xED,0x06,0x70,0xB2,0xD2,
  0x41,0x7B,0xA0,0x11,0x31,0xC2,0x27,0x90,
  0x20,0xF6,0x60,0xFF,0x96,0x5C,0xB1,0xAB,
  0x9E,0x9C,0x52,0x1B,0x5F,0x93,0x0A,0xEF,
  0x91,0x85,0x49,0xEE,0x2D,0x4F,0x8F,0x3B,
  0x47,0x87,0x6D,0x46,0xD6,0x3E,0x69,0x64,
  0x2A,0xCE,0xCB,0x2F,0xFC,0x97,0x05,0x7A,
  0xAC,0x7F,0xD5,0x1A,0x4B,0x0E,0xA7,0x5A,
  0x28,0x14,0x3F,0x29,0x88,0x3C,0x4C,0x02,
  0xB8,0xDA,0xB0,0x17,0x55,0x1F,0x8A,0x7D,
  0x57,0xC7,0x8D,0x74,0xB7,0xC4,0x9F,0x72,
  0x7E,0x15,0x22,0x12,0x58,0x07,0x99,0x34,
  0x6E,0x50,0xDE,0x68,0x65,0xBC,0xDB,0xF8,
  0xC8,0xA8,0x2B,0x40,0xDC,0xFE,0x32,0xA4,
  0xCA,0x10,0x21,0xF0,0xD3,0x5D,0x0F,0x00,
  0x6F,0x9D,0x36,0x42,0x4A,0x5E,0xC1,0xE0,
};

static const uint8_t q1[256] = {
  0x75,0xF3,0xC6,0xF4,0xDB,0x7B,0xFB,0xC8,
  0x4A,0xD3,0xE6,0x6B,0x45,0x7D,0xE8,0x4B,
  0xD6,0x32,0xD8,0xFD,0x37,0x71,0xF1,0xE1,
  0x30,0x0F,0xF8,0x1B,0x87,0xFA,0x06,0x3F,
  0x5E,0xBA,0xAE,0x5B,0x8A,0x00,0xBC,0x9D,
  0x6D,0xC1,0xB1,0x0E,0x80,0x5D,0xD2,0xD5,
  0xA0,0x84,0x07,0x14,0xB5,0x90,0x2C,0xA3,
  0xB2,0x73,0x4C,0x54,0x92,0x74,0x36,0x51,
  0x38,0xB0,0xBD,0x5A,0xFC,0x60,0x62,0x96,
  0x6C,0x42,0xF7,0x10,0x7C,0x28,0x27,0x8C,
  0x13,0x95,0x9C,0xC7,0x24,0x46,0x3B,0x70,
  0xCA,0xE3,0x85,0xCB,0x11,0xD0,0x93,0xB8,
  0xA6,0x83,0x20,0xFF,0x9F,0x77,0xC3,0xCC,
  0x03,0x6F,0x08,0xBF,0x40,0xE7,0x2B,0xE2,
  0x79,0x0C,0xAA,0x82,0x41,0x3A,0xEA,0xB9,
  0xE4,0x9A,0xA4,0x97,0x7E,0xDA,0x7A,0x17,
  0x66,0x94,0xA1,0x1D,0x3D,0xF0,0xDE,0xB3,
  0x0B,0x72,0xA7,0x1C,0xEF,0xD1,0x53,0x3E,
  0x8F,0x33,0x26,0x5F,0xEC,0x76,0x2A,0x49,
  0x81,0x88,0xEE,0x21,0xC4,0x1A,0xEB,0xD9,
  0xC5,0x39,0x99,0xCD,0xAD,0x31,0x8B,0x01,
  0x18,0x23,0xDD,0x1F,0x4E,0x2D,0xF9,0x48,
  0x4F,0xF2,0x65,0x8E,0x78,0x5C,0x58,0x19,
  0x8D,0xE5,0x98,0x57,0x67,0x7F,0x05,0x64,
  0xAF,0x63,0xB6,0xFE,0xF5,0xB7,0x3C,0xA5,
  0xCE,0xE9,0x68,0x44,0xE0,0x4D,0x43,0x69,
  0x29,0x2E,0xAC,0x15,0x59,0xA8,0x0A,0x9E,
  0x6E,0x47,0xDF,0x34,0x35,0x6A,0xCF,0xDC,
  0x22,0xC9,0xC0,0x9B,0x89,0xD4,0xED,0xAB,
  0x12,0xA2,0x0D,0x52,0xBB,0x02,0x2F,0xA9,
  0xD7,0x61,0x1E,0xB4,0x50,0x04,0xF6,0xC2,
  0x16,0x25,0x86,0x56,0x55,0x09,0xBE,0x91,
};

/* ------------------------------------------------------------------------- */

/* uint32_t gf_multiply(uint8_t p, uint8_t a, uint8_t b)
 *
 * Multiplication in GF(2^8). Larger return type, to avoid need for
 * type casts when the return value is shifted left.
 *
 * This function multiplies a times b in the Galois Field GF(2^8) with
 * primitive polynomial p.
 * The representation of the polynomials a, b, and p uses bits with
 * values 2^i to represent the terms x^i.  The polynomial p contains an
 * implicit term x^8.
 *
 * Note that addition and subtraction in GF(2^8) is simply the XOR
 * operation.
 */

static uint32_t
gf_multiply(uint8_t p, uint8_t a, uint8_t b)
{
  uint32_t shift  = b;
  uint8_t result = 0;
  while (a)
    {
      if (a & 1) result ^= shift;
      a = a >> 1;
      shift = shift << 1;
      if (shift & 0x100) shift ^= p;
    }
  return result;
}

/* ------------------------------------------------------------------------- */

/* The matrix RS as specified in section 4.3 the twofish paper. */

static const uint8_t rs_matrix[4][8] = {
    { 0x01, 0xA4, 0x55, 0x87, 0x5A, 0x58, 0xDB, 0x9E },
    { 0xA4, 0x56, 0x82, 0xF3, 0x1E, 0xC6, 0x68, 0xE5 },
    { 0x02, 0xA1, 0xFC, 0xC1, 0x47, 0xAE, 0x3D, 0x19 },
    { 0xA4, 0x55, 0x87, 0x5A, 0x58, 0xDB, 0x9E, 0x03 } };

/* uint32_t compute_s(uint32_t m1, uint32_t m2);
 *
 * Computes the value RS * M, where M is a byte vector composed of the
 * bytes of m1 and m2.  Arithmetic is done in GF(2^8) with primitive
 * polynomial x^8 + x^6 + x^3 + x^2 + 1.
 *
 * This function is used to compute the sub-keys S which are in turn used
 * to generate the S-boxes.
 */

static uint32_t
compute_s(uint32_t m1, uint32_t m2)
{
  uint32_t s = 0;
  int i;
  for (i = 0; i < 4; i++)
    s |=  ((  gf_multiply(0x4D, m1,       rs_matrix[i][0])
      ^ gf_multiply(0x4D, m1 >> 8,  rs_matrix[i][1])
      ^ gf_multiply(0x4D, m1 >> 16, rs_matrix[i][2])
      ^ gf_multiply(0x4D, m1 >> 24, rs_matrix[i][3])
      ^ gf_multiply(0x4D, m2,       rs_matrix[i][4])
      ^ gf_multiply(0x4D, m2 >> 8,  rs_matrix[i][5])
      ^ gf_multiply(0x4D, m2 >> 16, rs_matrix[i][6])
      ^ gf_multiply(0x4D, m2 >> 24, rs_matrix[i][7])) << (i*8));
  return s;
}

/* ------------------------------------------------------------------------- */

/* This table describes which q S-boxes are used for each byte in each stage
 * of the function h, cf. figure 2 of the twofish paper.
 */

static const uint8_t * const q_table[4][5] =
  { { q1, q1, q0, q0, q1 },
    { q0, q1, q1, q0, q0 },
    { q0, q0, q0, q1, q1 },
    { q1, q0, q1, q1, q0 } };

/* The matrix MDS as specified in section 4.3.2 of the twofish paper. */

static const uint8_t mds_matrix[4][4] = { { 0x01, 0xEF, 0x5B, 0x5B },
         { 0x5B, 0xEF, 0xEF, 0x01 },
         { 0xEF, 0x5B, 0x01, 0xEF },
         { 0xEF, 0x01, 0xEF, 0x5B } };

/* uint32_t h_uint8_t(int k, int i, uint8_t x, uint8_t l0, uint8_t l1, uint8_t l2, uint8_t l3);
 *
 * Perform the h function (section 4.3.2) on one byte.  It consists of
 * repeated applications of the q permutation, followed by a XOR with
 * part of a sub-key.  Finally, the value is multiplied by one column of
 * the MDS matrix.  To obtain the result for a full word, the results of
 * h for the individual bytes are XORed.
 *
 * k is the key size (/ 64 bits), i is the byte number (0 = LSB), x is the
 * actual byte to apply the function to; l0, l1, l2, and l3 are the
 * appropriate bytes from the subkey.  Note that only l0..l(k-1) are used.
 */

static uint32_t
h_byte(int k, int i, uint8_t x, uint8_t l0, uint8_t l1, uint8_t l2, uint8_t l3)
{
  uint8_t y = q_table[i][4][l0 ^
            q_table[i][3][l1 ^
              q_table[i][2][k == 2 ? x : l2 ^
                q_table[i][1][k == 3 ? x : l3 ^ q_table[i][0][x]]]]];

  return ( (gf_multiply(0x69, mds_matrix[0][i], y))
     | (gf_multiply(0x69, mds_matrix[1][i], y) << 8)
     | (gf_multiply(0x69, mds_matrix[2][i], y) << 16)
     | (gf_multiply(0x69, mds_matrix[3][i], y) << 24) );
}

/* uint32_t h(int k, uint8_t x, uint32_t l0, uint32_t l1, uint32_t l2, uint32_t l3);
 *
 * Perform the function h on a word.  See the description of h_byte() above.
 */

static uint32_t
h(int k, uint8_t x, uint32_t l0, uint32_t l1, uint32_t l2, uint32_t l3)
{
  return (  h_byte(k, 0, x, l0,       l1,       l2,       l3)
    ^ h_byte(k, 1, x, l0 >> 8,  l1 >> 8,  l2 >> 8,  l3 >> 8)
    ^ h_byte(k, 2, x, l0 >> 16, l1 >> 16, l2 >> 16, l3 >> 16)
    ^ h_byte(k, 3, x, l0 >> 24, l1 >> 24, l2 >> 24, l3 >> 24) );
}


/* ------------------------------------------------------------------------- */

/* API */

/* Structure which contains the tables containing the subkeys and the
 * key-dependent s-boxes.
 */


/* Set up internal tables required for twofish encryption and decryption.
 *
 * The key size is specified in bytes.  Key sizes up to 32 bytes are
 * supported.  Larger key sizes are silently truncated.  
 */

void
twofish_set_key(struct twofish_ctx *context,
    size_t keysize, const uint8_t *key)
{
  uint8_t key_copy[32];
  uint32_t m[8], s[4], t;
  int i, j, k;

  /* Extend key as necessary */

  assert(keysize <= 32);

  /* We do a little more copying than necessary, but that doesn't
   * really matter. */
  memset(key_copy, 0, 32);
  memcpy(key_copy, key, keysize);

  for (i = 0; i<8; i++)
    m[i] = LE_READ_UINT32(key_copy + i*4);
  
  if (keysize <= 16)
    k = 2;
  else if (keysize <= 24)
    k = 3;
  else
    k = 4;

  /* Compute sub-keys */

  for (i = 0; i < 20; i++)
    {
      t = h(k, 2*i+1, m[1], m[3], m[5], m[7]);
      t = rol8(t);
      t += (context->keys[2*i] =
      t + h(k, 2*i, m[0], m[2], m[4], m[6]));
      t = rol9(t);
      context->keys[2*i+1] = t;
    }

  /* Compute key-dependent S-boxes */

  for (i = 0; i < k; i++)
    s[k-1-i] = compute_s(m[2*i], m[2*i+1]);

  for (i = 0; i < 4; i++)
    for (j = 0; j < 256; j++)
      context->s_box[i][j] = h_byte(k, i, j,
            s[0] >> (i*8),
            s[1] >> (i*8),
            s[2] >> (i*8),
            s[3] >> (i*8));
}

void
twofish128_set_key(struct twofish_ctx *context, const uint8_t *key)
{
  twofish_set_key (context, TWOFISH128_KEY_SIZE, key);
}
void
twofish192_set_key(struct twofish_ctx *context, const uint8_t *key)
{
  twofish_set_key (context, TWOFISH192_KEY_SIZE, key);
}
void
twofish256_set_key(struct twofish_ctx *context, const uint8_t *key)
{
  twofish_set_key (context, TWOFISH256_KEY_SIZE, key);
}

/* Encrypt blocks of 16 bytes of data with the twofish algorithm.
 *
 * Before this function can be used, twofish_set_key() must be used in order to
 * set up various tables required for the encryption algorithm.
 * 
 * This function always encrypts 16 bytes of plaintext to 16 bytes of
 * ciphertext.  The memory areas of the plaintext and the ciphertext can
 * overlap.
 */

void
twofish_encrypt(const struct twofish_ctx *context,
    size_t length,
    uint8_t *ciphertext,
    const uint8_t *plaintext)
{
  const uint32_t * keys        = context->keys;
  const uint32_t (*s_box)[256] = context->s_box;

  assert( !(length % TWOFISH_BLOCK_SIZE) );
  for ( ; length; length -= TWOFISH_BLOCK_SIZE)
    {  
      uint32_t words[4];
      uint32_t r0, r1, r2, r3, t0, t1;
      int i;

      for (i = 0; i<4; i++, plaintext += 4)
  words[i] = LE_READ_UINT32(plaintext);

      r0 = words[0] ^ keys[0];
      r1 = words[1] ^ keys[1];
      r2 = words[2] ^ keys[2];
      r3 = words[3] ^ keys[3];
  
      for (i = 0; i < 8; i++) {
  t1 = (  s_box[1][r1 & 0xFF]
    ^ s_box[2][(r1 >> 8) & 0xFF]
    ^ s_box[3][(r1 >> 16) & 0xFF]
    ^ s_box[0][(r1 >> 24) & 0xFF]);
  t0 = (  s_box[0][r0 & 0xFF]
    ^ s_box[1][(r0 >> 8) & 0xFF]
    ^ s_box[2][(r0 >> 16) & 0xFF]
    ^ s_box[3][(r0 >> 24) & 0xFF]) + t1;
  r3 = (t1 + t0 + keys[4*i+9]) ^ rol1(r3);
  r2 = (t0 + keys[4*i+8]) ^ r2;
  r2 = ror1(r2);

  t1 = (  s_box[1][r3 & 0xFF]
    ^ s_box[2][(r3 >> 8) & 0xFF]
    ^ s_box[3][(r3 >> 16) & 0xFF]
    ^ s_box[0][(r3 >> 24) & 0xFF]);
  t0 = (  s_box[0][r2 & 0xFF]
    ^ s_box[1][(r2 >> 8) & 0xFF]
    ^ s_box[2][(r2 >> 16) & 0xFF]
    ^ s_box[3][(r2 >> 24) & 0xFF]) + t1;
  r1 = (t1 + t0 + keys[4*i+11]) ^ rol1(r1);
  r0 = (t0 + keys[4*i+10]) ^ r0;
  r0 = ror1(r0);
      }

      words[0] = r2 ^ keys[4];
      words[1] = r3 ^ keys[5];
      words[2] = r0 ^ keys[6];
      words[3] = r1 ^ keys[7];

      for (i = 0; i<4; i++, ciphertext += 4)
  LE_WRITE_UINT32(ciphertext, words[i]);
    }
}

/* Decrypt blocks of 16 bytes of data with the twofish algorithm.
 *
 * Before this function can be used, twofish_set_key() must be used in order to
 * set up various tables required for the decryption algorithm.
 * 
 * This function always decrypts 16 bytes of ciphertext to 16 bytes of
 * plaintext.  The memory areas of the plaintext and the ciphertext can
 * overlap.
 */

void
twofish_decrypt(const struct twofish_ctx *context,
    size_t length,
    uint8_t *plaintext,
    const uint8_t *ciphertext)

{
  const uint32_t *keys  = context->keys;
  const uint32_t (*s_box)[256] = context->s_box;

  assert( !(length % TWOFISH_BLOCK_SIZE) );
  for ( ; length; length -= TWOFISH_BLOCK_SIZE)
    {  
      uint32_t words[4];
      uint32_t r0, r1, r2, r3, t0, t1;
      int i;

      for (i = 0; i<4; i++, ciphertext += 4)
  words[i] = LE_READ_UINT32(ciphertext);

      r0 = words[2] ^ keys[6];
      r1 = words[3] ^ keys[7];
      r2 = words[0] ^ keys[4];
      r3 = words[1] ^ keys[5];

      for (i = 0; i < 8; i++) {
  t1 = (  s_box[1][r3 & 0xFF]
    ^ s_box[2][(r3 >> 8) & 0xFF]
    ^ s_box[3][(r3 >> 16) & 0xFF]
    ^ s_box[0][(r3 >> 24) & 0xFF]);
  t0 = (  s_box[0][r2 & 0xFF]
    ^ s_box[1][(r2 >> 8) & 0xFF]
    ^ s_box[2][(r2 >> 16) & 0xFF]
    ^ s_box[3][(r2 >> 24) & 0xFF]) + t1;
  r1 = (t1 + t0 + keys[39-4*i]) ^ r1;
  r1 = ror1(r1);
  r0 = (t0 + keys[38-4*i]) ^ rol1(r0);

  t1 = (  s_box[1][r1 & 0xFF]
    ^ s_box[2][(r1 >> 8) & 0xFF]
    ^ s_box[3][(r1 >> 16) & 0xFF]
    ^ s_box[0][(r1 >> 24) & 0xFF]);
  t0 = (  s_box[0][r0 & 0xFF]
    ^ s_box[1][(r0 >> 8) & 0xFF]
    ^ s_box[2][(r0 >> 16) & 0xFF]
    ^ s_box[3][(r0 >> 24) & 0xFF]) + t1;
  r3 = (t1 + t0 + keys[37-4*i]) ^ r3;
  r3 = ror1(r3);
  r2 = (t0 + keys[36-4*i]) ^ rol1(r2);
      }

      words[0] = r0 ^ keys[0];
      words[1] = r1 ^ keys[1];
      words[2] = r2 ^ keys[2];
      words[3] = r3 ^ keys[3];

      for (i = 0; i<4; i++, plaintext += 4)
  LE_WRITE_UINT32(plaintext, words[i]);
    }
}

Coverage Report

Created: 2024-06-28 06:39

Line	Count	Source (jump to first uncovered line)
1		/* twofish.c
2
3		The twofish block cipher.
4
5		Copyright (C) 2001, 2014 Niels Möller
6		Copyright (C) 1999 Ruud de Rooij <ruud@debian.org>
7
8		Modifications for lsh, integrated testing
9		Copyright (C) 1999 J.H.M. Dassen (Ray) <jdassen@wi.LeidenUniv.nl>
10
11		This file is part of GNU Nettle.
12
13		GNU Nettle is free software: you can redistribute it and/or
14		modify it under the terms of either:
15
16		* the GNU Lesser General Public License as published by the Free
17		Software Foundation; either version 3 of the License, or (at your
18		option) any later version.
19
20		or
21
22		* the GNU General Public License as published by the Free
23		Software Foundation; either version 2 of the License, or (at your
24		option) any later version.
25
26		or both in parallel, as here.
27
28		GNU Nettle is distributed in the hope that it will be useful,
29		but WITHOUT ANY WARRANTY; without even the implied warranty of
30		MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
31		General Public License for more details.
32
33		You should have received copies of the GNU General Public License and
34		the GNU Lesser General Public License along with this program. If
35		not, see http://www.gnu.org/licenses/.
36		*/
37
38		#if HAVE_CONFIG_H
39		# include "config.h"
40		#endif
41
42		#include <assert.h>
43		#include <string.h>
44
45		#include "twofish.h"
46
47		#include "macros.h"
48
49		/* Bitwise rotations on 32-bit words. These are defined as macros that
50		* evaluate their argument twice, so do not apply to any expressions with
51		* side effects.
52		*/
53
54	10.7k	#define rol1(x) (((x) << 1) \| (((x) & 0x80000000) >> 31))
55	10.7k	#define rol8(x) (((x) << 8) \| (((x) & 0xFF000000) >> 24))
56	10.7k	#define rol9(x) (((x) << 9) \| (((x) & 0xFF800000) >> 23))
57	10.7k	#define ror1(x) (((x) >> 1) \| (((x) & 0x00000001) << 31))
58
59		/* ------------------------------------------------------------------------- */
60
61		/* The permutations q0 and q1. These are fixed permutations on 8-bit values.
62		* The permutations have been computed using the program twofish-data,
63		* which is distributed along with this file.
64		*/
65
66		static const uint8_t q0[256] = {
67		0xA9,0x67,0xB3,0xE8,0x04,0xFD,0xA3,0x76,
68		0x9A,0x92,0x80,0x78,0xE4,0xDD,0xD1,0x38,
69		0x0D,0xC6,0x35,0x98,0x18,0xF7,0xEC,0x6C,
70		0x43,0x75,0x37,0x26,0xFA,0x13,0x94,0x48,
71		0xF2,0xD0,0x8B,0x30,0x84,0x54,0xDF,0x23,
72		0x19,0x5B,0x3D,0x59,0xF3,0xAE,0xA2,0x82,
73		0x63,0x01,0x83,0x2E,0xD9,0x51,0x9B,0x7C,
74		0xA6,0xEB,0xA5,0xBE,0x16,0x0C,0xE3,0x61,
75		0xC0,0x8C,0x3A,0xF5,0x73,0x2C,0x25,0x0B,
76		0xBB,0x4E,0x89,0x6B,0x53,0x6A,0xB4,0xF1,
77		0xE1,0xE6,0xBD,0x45,0xE2,0xF4,0xB6,0x66,
78		0xCC,0x95,0x03,0x56,0xD4,0x1C,0x1E,0xD7,
79		0xFB,0xC3,0x8E,0xB5,0xE9,0xCF,0xBF,0xBA,
80		0xEA,0x77,0x39,0xAF,0x33,0xC9,0x62,0x71,
81		0x81,0x79,0x09,0xAD,0x24,0xCD,0xF9,0xD8,
82		0xE5,0xC5,0xB9,0x4D,0x44,0x08,0x86,0xE7,
83		0xA1,0x1D,0xAA,0xED,0x06,0x70,0xB2,0xD2,
84		0x41,0x7B,0xA0,0x11,0x31,0xC2,0x27,0x90,
85		0x20,0xF6,0x60,0xFF,0x96,0x5C,0xB1,0xAB,
86		0x9E,0x9C,0x52,0x1B,0x5F,0x93,0x0A,0xEF,
87		0x91,0x85,0x49,0xEE,0x2D,0x4F,0x8F,0x3B,
88		0x47,0x87,0x6D,0x46,0xD6,0x3E,0x69,0x64,
89		0x2A,0xCE,0xCB,0x2F,0xFC,0x97,0x05,0x7A,
90		0xAC,0x7F,0xD5,0x1A,0x4B,0x0E,0xA7,0x5A,
91		0x28,0x14,0x3F,0x29,0x88,0x3C,0x4C,0x02,
92		0xB8,0xDA,0xB0,0x17,0x55,0x1F,0x8A,0x7D,
93		0x57,0xC7,0x8D,0x74,0xB7,0xC4,0x9F,0x72,
94		0x7E,0x15,0x22,0x12,0x58,0x07,0x99,0x34,
95		0x6E,0x50,0xDE,0x68,0x65,0xBC,0xDB,0xF8,
96		0xC8,0xA8,0x2B,0x40,0xDC,0xFE,0x32,0xA4,
97		0xCA,0x10,0x21,0xF0,0xD3,0x5D,0x0F,0x00,
98		0x6F,0x9D,0x36,0x42,0x4A,0x5E,0xC1,0xE0,
99		};
100
101		static const uint8_t q1[256] = {
102		0x75,0xF3,0xC6,0xF4,0xDB,0x7B,0xFB,0xC8,
103		0x4A,0xD3,0xE6,0x6B,0x45,0x7D,0xE8,0x4B,
104		0xD6,0x32,0xD8,0xFD,0x37,0x71,0xF1,0xE1,
105		0x30,0x0F,0xF8,0x1B,0x87,0xFA,0x06,0x3F,
106		0x5E,0xBA,0xAE,0x5B,0x8A,0x00,0xBC,0x9D,
107		0x6D,0xC1,0xB1,0x0E,0x80,0x5D,0xD2,0xD5,
108		0xA0,0x84,0x07,0x14,0xB5,0x90,0x2C,0xA3,
109		0xB2,0x73,0x4C,0x54,0x92,0x74,0x36,0x51,
110		0x38,0xB0,0xBD,0x5A,0xFC,0x60,0x62,0x96,
111		0x6C,0x42,0xF7,0x10,0x7C,0x28,0x27,0x8C,
112		0x13,0x95,0x9C,0xC7,0x24,0x46,0x3B,0x70,
113		0xCA,0xE3,0x85,0xCB,0x11,0xD0,0x93,0xB8,
114		0xA6,0x83,0x20,0xFF,0x9F,0x77,0xC3,0xCC,
115		0x03,0x6F,0x08,0xBF,0x40,0xE7,0x2B,0xE2,
116		0x79,0x0C,0xAA,0x82,0x41,0x3A,0xEA,0xB9,
117		0xE4,0x9A,0xA4,0x97,0x7E,0xDA,0x7A,0x17,
118		0x66,0x94,0xA1,0x1D,0x3D,0xF0,0xDE,0xB3,
119		0x0B,0x72,0xA7,0x1C,0xEF,0xD1,0x53,0x3E,
120		0x8F,0x33,0x26,0x5F,0xEC,0x76,0x2A,0x49,
121		0x81,0x88,0xEE,0x21,0xC4,0x1A,0xEB,0xD9,
122		0xC5,0x39,0x99,0xCD,0xAD,0x31,0x8B,0x01,
123		0x18,0x23,0xDD,0x1F,0x4E,0x2D,0xF9,0x48,
124		0x4F,0xF2,0x65,0x8E,0x78,0x5C,0x58,0x19,
125		0x8D,0xE5,0x98,0x57,0x67,0x7F,0x05,0x64,
126		0xAF,0x63,0xB6,0xFE,0xF5,0xB7,0x3C,0xA5,
127		0xCE,0xE9,0x68,0x44,0xE0,0x4D,0x43,0x69,
128		0x29,0x2E,0xAC,0x15,0x59,0xA8,0x0A,0x9E,
129		0x6E,0x47,0xDF,0x34,0x35,0x6A,0xCF,0xDC,
130		0x22,0xC9,0xC0,0x9B,0x89,0xD4,0xED,0xAB,
131		0x12,0xA2,0x0D,0x52,0xBB,0x02,0x2F,0xA9,
132		0xD7,0x61,0x1E,0xB4,0x50,0x04,0xF6,0xC2,
133		0x16,0x25,0x86,0x56,0x55,0x09,0xBE,0x91,
134		};
135
136		/* ------------------------------------------------------------------------- */
137
138		/* uint32_t gf_multiply(uint8_t p, uint8_t a, uint8_t b)
139		*
140		* Multiplication in GF(2^8). Larger return type, to avoid need for
141		* type casts when the return value is shifted left.
142		*
143		* This function multiplies a times b in the Galois Field GF(2^8) with
144		* primitive polynomial p.
145		* The representation of the polynomials a, b, and p uses bits with
146		* values 2^i to represent the terms x^i. The polynomial p contains an
147		* implicit term x^8.
148		*
149		* Note that addition and subtraction in GF(2^8) is simply the XOR
150		* operation.
151		*/
152
153		static uint32_t
154		gf_multiply(uint8_t p, uint8_t a, uint8_t b)
155	2.60M	{
156	2.60M	uint32_t shift = b;
157	2.60M	uint8_t result = 0;
158	17.9M	while (a)
159	15.3M	{
160	15.3M	if (a & 1) result ^= shift;
161	15.3M	a = a >> 1;
162	15.3M	shift = shift << 1;
163	15.3M	if (shift & 0x100) shift ^= p;
164	15.3M	}
165	2.60M	return result;
166	2.60M	}
167
168		/* ------------------------------------------------------------------------- */
169
170		/* The matrix RS as specified in section 4.3 the twofish paper. */
171
172		static const uint8_t rs_matrix[4][8] = {
173		{ 0x01, 0xA4, 0x55, 0x87, 0x5A, 0x58, 0xDB, 0x9E },
174		{ 0xA4, 0x56, 0x82, 0xF3, 0x1E, 0xC6, 0x68, 0xE5 },
175		{ 0x02, 0xA1, 0xFC, 0xC1, 0x47, 0xAE, 0x3D, 0x19 },
176		{ 0xA4, 0x55, 0x87, 0x5A, 0x58, 0xDB, 0x9E, 0x03 } };
177
178		/* uint32_t compute_s(uint32_t m1, uint32_t m2);
179		*
180		* Computes the value RS * M, where M is a byte vector composed of the
181		* bytes of m1 and m2. Arithmetic is done in GF(2^8) with primitive
182		* polynomial x^8 + x^6 + x^3 + x^2 + 1.
183		*
184		* This function is used to compute the sub-keys S which are in turn used
185		* to generate the S-boxes.
186		*/
187
188		static uint32_t
189		compute_s(uint32_t m1, uint32_t m2)
190	1.96k	{
191	1.96k	uint32_t s = 0;
192	1.96k	int i;
193	9.84k	for (i = 0; i < 4; i++)
194	7.87k	s \|= (( gf_multiply(0x4D, m1, rs_matrix[i][0])
195	7.87k	^ gf_multiply(0x4D, m1 >> 8, rs_matrix[i][1])
196	7.87k	^ gf_multiply(0x4D, m1 >> 16, rs_matrix[i][2])
197	7.87k	^ gf_multiply(0x4D, m1 >> 24, rs_matrix[i][3])
198	7.87k	^ gf_multiply(0x4D, m2, rs_matrix[i][4])
199	7.87k	^ gf_multiply(0x4D, m2 >> 8, rs_matrix[i][5])
200	7.87k	^ gf_multiply(0x4D, m2 >> 16, rs_matrix[i][6])
201	7.87k	^ gf_multiply(0x4D, m2 >> 24, rs_matrix[i][7])) << (i*8));
202	1.96k	return s;
203	1.96k	}
204
205		/* ------------------------------------------------------------------------- */
206
207		/* This table describes which q S-boxes are used for each byte in each stage
208		* of the function h, cf. figure 2 of the twofish paper.
209		*/
210
211		static const uint8_t * const q_table[4][5] =
212		{ { q1, q1, q0, q0, q1 },
213		{ q0, q1, q1, q0, q0 },
214		{ q0, q0, q0, q1, q1 },
215		{ q1, q0, q1, q1, q0 } };
216
217		/* The matrix MDS as specified in section 4.3.2 of the twofish paper. */
218
219		static const uint8_t mds_matrix[4][4] = { { 0x01, 0xEF, 0x5B, 0x5B },
220		{ 0x5B, 0xEF, 0xEF, 0x01 },
221		{ 0xEF, 0x5B, 0x01, 0xEF },
222		{ 0xEF, 0x01, 0xEF, 0x5B } };
223
224		/* uint32_t h_uint8_t(int k, int i, uint8_t x, uint8_t l0, uint8_t l1, uint8_t l2, uint8_t l3);
225		*
226		* Perform the h function (section 4.3.2) on one byte. It consists of
227		* repeated applications of the q permutation, followed by a XOR with
228		* part of a sub-key. Finally, the value is multiplied by one column of
229		* the MDS matrix. To obtain the result for a full word, the results of
230		* h for the individual bytes are XORed.
231		*
232		* k is the key size (/ 64 bits), i is the byte number (0 = LSB), x is the
233		* actual byte to apply the function to; l0, l1, l2, and l3 are the
234		* appropriate bytes from the subkey. Note that only l0..l(k-1) are used.
235		*/
236
237		static uint32_t
238		h_byte(int k, int i, uint8_t x, uint8_t l0, uint8_t l1, uint8_t l2, uint8_t l3)
239	634k	{
240	634k	uint8_t y = q_table[i][4][l0 ^
241	634k	q_table[i][3][l1 ^
242	634k	q_table[i][2][k == 2 ? x : l2 ^
243	562k	q_table[i][1][k == 3 ? x : l3 ^ q_table[i][0][x]]]]];
244
245	634k	return ( (gf_multiply(0x69, mds_matrix[0][i], y))
246	634k	\| (gf_multiply(0x69, mds_matrix[1][i], y) << 8)
247	634k	\| (gf_multiply(0x69, mds_matrix[2][i], y) << 16)
248	634k	\| (gf_multiply(0x69, mds_matrix[3][i], y) << 24) );
249	634k	}
250
251		/* uint32_t h(int k, uint8_t x, uint32_t l0, uint32_t l1, uint32_t l2, uint32_t l3);
252		*
253		* Perform the function h on a word. See the description of h_byte() above.
254		*/
255
256		static uint32_t
257		h(int k, uint8_t x, uint32_t l0, uint32_t l1, uint32_t l2, uint32_t l3)
258	21.4k	{
259	21.4k	return ( h_byte(k, 0, x, l0, l1, l2, l3)
260	21.4k	^ h_byte(k, 1, x, l0 >> 8, l1 >> 8, l2 >> 8, l3 >> 8)
261	21.4k	^ h_byte(k, 2, x, l0 >> 16, l1 >> 16, l2 >> 16, l3 >> 16)
262	21.4k	^ h_byte(k, 3, x, l0 >> 24, l1 >> 24, l2 >> 24, l3 >> 24) );
263	21.4k	}
264
265
266		/* ------------------------------------------------------------------------- */
267
268		/* API */
269
270		/* Structure which contains the tables containing the subkeys and the
271		* key-dependent s-boxes.
272		*/
273
274
275		/* Set up internal tables required for twofish encryption and decryption.
276		*
277		* The key size is specified in bytes. Key sizes up to 32 bytes are
278		* supported. Larger key sizes are silently truncated.
279		*/
280
281		void
282		twofish_set_key(struct twofish_ctx *context,
283		size_t keysize, const uint8_t *key)
284	536	{
285	536	uint8_t key_copy[32];
286	536	uint32_t m[8], s[4], t;
287	536	int i, j, k;
288
289		/* Extend key as necessary */
290
291	536	assert(keysize <= 32);
292
293		/* We do a little more copying than necessary, but that doesn't
294		* really matter. */
295	536	memset(key_copy, 0, 32);
296	536	memcpy(key_copy, key, keysize);
297
298	4.82k	for (i = 0; i<8; i++)
299	4.28k	m[i] = LE_READ_UINT32(key_copy + i*4);
300
301	536	if (keysize <= 16)
302	61	k = 2;
303	475	else if (keysize <= 24)
304	54	k = 3;
305	421	else
306	421	k = 4;
307
308		/* Compute sub-keys */
309
310	11.2k	for (i = 0; i < 20; i++)
311	10.7k	{
312	10.7k	t = h(k, 2*i+1, m[1], m[3], m[5], m[7]);
313	10.7k	t = rol8(t);
314	10.7k	t += (context->keys[2*i] =
315	10.7k	t + h(k, 2*i, m[0], m[2], m[4], m[6]));
316	10.7k	t = rol9(t);
317	10.7k	context->keys[2*i+1] = t;
318	10.7k	}
319
320		/* Compute key-dependent S-boxes */
321
322	2.50k	for (i = 0; i < k; i++)
323	1.96k	s[k-1-i] = compute_s(m[2i], m[2i+1]);
324
325	2.68k	for (i = 0; i < 4; i++)
326	551k	for (j = 0; j < 256; j++)
327	548k	context->s_box[i][j] = h_byte(k, i, j,
328	548k	s[0] >> (i*8),
329	548k	s[1] >> (i*8),
330	548k	s[2] >> (i*8),
331	548k	s[3] >> (i*8));
332	536	}
333
334		void
335		twofish128_set_key(struct twofish_ctx context, const uint8_t key)
336	0	{
337	0	twofish_set_key (context, TWOFISH128_KEY_SIZE, key);
338	0	}
339		void
340		twofish192_set_key(struct twofish_ctx context, const uint8_t key)
341	0	{
342	0	twofish_set_key (context, TWOFISH192_KEY_SIZE, key);
343	0	}
344		void
345		twofish256_set_key(struct twofish_ctx context, const uint8_t key)
346	0	{
347	0	twofish_set_key (context, TWOFISH256_KEY_SIZE, key);
348	0	}
349
350		/* Encrypt blocks of 16 bytes of data with the twofish algorithm.
351		*
352		* Before this function can be used, twofish_set_key() must be used in order to
353		* set up various tables required for the encryption algorithm.
354		*
355		* This function always encrypts 16 bytes of plaintext to 16 bytes of
356		* ciphertext. The memory areas of the plaintext and the ciphertext can
357		* overlap.
358		*/
359
360		void
361		twofish_encrypt(const struct twofish_ctx *context,
362		size_t length,
363		uint8_t *ciphertext,
364		const uint8_t *plaintext)
365	232	{
366	232	const uint32_t * keys = context->keys;
367	232	const uint32_t (*s_box)[256] = context->s_box;
368
369	232	assert( !(length % TWOFISH_BLOCK_SIZE) );
370	542	for ( ; length; length -= TWOFISH_BLOCK_SIZE)
371	310	{
372	310	uint32_t words[4];
373	310	uint32_t r0, r1, r2, r3, t0, t1;
374	310	int i;
375
376	1.55k	for (i = 0; i<4; i++, plaintext += 4)
377	1.24k	words[i] = LE_READ_UINT32(plaintext);
378
379	310	r0 = words[0] ^ keys[0];
380	310	r1 = words[1] ^ keys[1];
381	310	r2 = words[2] ^ keys[2];
382	310	r3 = words[3] ^ keys[3];
383
384	2.79k	for (i = 0; i < 8; i++) {
385	2.48k	t1 = ( s_box[1][r1 & 0xFF]
386	2.48k	^ s_box[2][(r1 >> 8) & 0xFF]
387	2.48k	^ s_box[3][(r1 >> 16) & 0xFF]
388	2.48k	^ s_box[0][(r1 >> 24) & 0xFF]);
389	2.48k	t0 = ( s_box[0][r0 & 0xFF]
390	2.48k	^ s_box[1][(r0 >> 8) & 0xFF]
391	2.48k	^ s_box[2][(r0 >> 16) & 0xFF]
392	2.48k	^ s_box[3][(r0 >> 24) & 0xFF]) + t1;
393	2.48k	r3 = (t1 + t0 + keys[4*i+9]) ^ rol1(r3);
394	2.48k	r2 = (t0 + keys[4*i+8]) ^ r2;
395	2.48k	r2 = ror1(r2);
396
397	2.48k	t1 = ( s_box[1][r3 & 0xFF]
398	2.48k	^ s_box[2][(r3 >> 8) & 0xFF]
399	2.48k	^ s_box[3][(r3 >> 16) & 0xFF]
400	2.48k	^ s_box[0][(r3 >> 24) & 0xFF]);
401	2.48k	t0 = ( s_box[0][r2 & 0xFF]
402	2.48k	^ s_box[1][(r2 >> 8) & 0xFF]
403	2.48k	^ s_box[2][(r2 >> 16) & 0xFF]
404	2.48k	^ s_box[3][(r2 >> 24) & 0xFF]) + t1;
405	2.48k	r1 = (t1 + t0 + keys[4*i+11]) ^ rol1(r1);
406	2.48k	r0 = (t0 + keys[4*i+10]) ^ r0;
407	2.48k	r0 = ror1(r0);
408	2.48k	}
409
410	310	words[0] = r2 ^ keys[4];
411	310	words[1] = r3 ^ keys[5];
412	310	words[2] = r0 ^ keys[6];
413	310	words[3] = r1 ^ keys[7];
414
415	1.55k	for (i = 0; i<4; i++, ciphertext += 4)
416	1.24k	LE_WRITE_UINT32(ciphertext, words[i]);
417	310	}
418	232	}
419
420		/* Decrypt blocks of 16 bytes of data with the twofish algorithm.
421		*
422		* Before this function can be used, twofish_set_key() must be used in order to
423		* set up various tables required for the decryption algorithm.
424		*
425		* This function always decrypts 16 bytes of ciphertext to 16 bytes of
426		* plaintext. The memory areas of the plaintext and the ciphertext can
427		* overlap.
428		*/
429
430		void
431		twofish_decrypt(const struct twofish_ctx *context,
432		size_t length,
433		uint8_t *plaintext,
434		const uint8_t *ciphertext)
435
436	304	{
437	304	const uint32_t *keys = context->keys;
438	304	const uint32_t (*s_box)[256] = context->s_box;
439
440	304	assert( !(length % TWOFISH_BLOCK_SIZE) );
441	667	for ( ; length; length -= TWOFISH_BLOCK_SIZE)
442	363	{
443	363	uint32_t words[4];
444	363	uint32_t r0, r1, r2, r3, t0, t1;
445	363	int i;
446
447	1.81k	for (i = 0; i<4; i++, ciphertext += 4)
448	1.45k	words[i] = LE_READ_UINT32(ciphertext);
449
450	363	r0 = words[2] ^ keys[6];
451	363	r1 = words[3] ^ keys[7];
452	363	r2 = words[0] ^ keys[4];
453	363	r3 = words[1] ^ keys[5];
454
455	3.26k	for (i = 0; i < 8; i++) {
456	2.90k	t1 = ( s_box[1][r3 & 0xFF]
457	2.90k	^ s_box[2][(r3 >> 8) & 0xFF]
458	2.90k	^ s_box[3][(r3 >> 16) & 0xFF]
459	2.90k	^ s_box[0][(r3 >> 24) & 0xFF]);
460	2.90k	t0 = ( s_box[0][r2 & 0xFF]
461	2.90k	^ s_box[1][(r2 >> 8) & 0xFF]
462	2.90k	^ s_box[2][(r2 >> 16) & 0xFF]
463	2.90k	^ s_box[3][(r2 >> 24) & 0xFF]) + t1;
464	2.90k	r1 = (t1 + t0 + keys[39-4*i]) ^ r1;
465	2.90k	r1 = ror1(r1);
466	2.90k	r0 = (t0 + keys[38-4*i]) ^ rol1(r0);
467
468	2.90k	t1 = ( s_box[1][r1 & 0xFF]
469	2.90k	^ s_box[2][(r1 >> 8) & 0xFF]
470	2.90k	^ s_box[3][(r1 >> 16) & 0xFF]
471	2.90k	^ s_box[0][(r1 >> 24) & 0xFF]);
472	2.90k	t0 = ( s_box[0][r0 & 0xFF]
473	2.90k	^ s_box[1][(r0 >> 8) & 0xFF]
474	2.90k	^ s_box[2][(r0 >> 16) & 0xFF]
475	2.90k	^ s_box[3][(r0 >> 24) & 0xFF]) + t1;
476	2.90k	r3 = (t1 + t0 + keys[37-4*i]) ^ r3;
477	2.90k	r3 = ror1(r3);
478	2.90k	r2 = (t0 + keys[36-4*i]) ^ rol1(r2);
479	2.90k	}
480
481	363	words[0] = r0 ^ keys[0];
482	363	words[1] = r1 ^ keys[1];
483	363	words[2] = r2 ^ keys[2];
484	363	words[3] = r3 ^ keys[3];
485
486	1.81k	for (i = 0; i<4; i++, plaintext += 4)
487	1.45k	LE_WRITE_UINT32(plaintext, words[i]);
488	363	}
489	304	}