/src/nettle-with-libgmp/twofish.c

Source (jump to first uncovered line)
/* twofish.c

   The twofish block cipher.

   Copyright (C) 2001, 2014 Niels Möller
   Copyright (C) 1999 Ruud de Rooij <ruud@debian.org>

   Modifications for lsh, integrated testing
   Copyright (C) 1999 J.H.M. Dassen (Ray) <jdassen@wi.LeidenUniv.nl>

   This file is part of GNU Nettle.

   GNU Nettle is free software: you can redistribute it and/or
   modify it under the terms of either:

     * the GNU Lesser General Public License as published by the Free
       Software Foundation; either version 3 of the License, or (at your
       option) any later version.

   or

     * the GNU General Public License as published by the Free
       Software Foundation; either version 2 of the License, or (at your
       option) any later version.

   or both in parallel, as here.

   GNU Nettle is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   General Public License for more details.

   You should have received copies of the GNU General Public License and
   the GNU Lesser General Public License along with this program.  If
   not, see http://www.gnu.org/licenses/.
*/

#if HAVE_CONFIG_H
# include "config.h"
#endif

#include <assert.h>
#include <string.h>

#include "twofish.h"

#include "macros.h"

/* Bitwise rotations on 32-bit words.  These are defined as macros that
 * evaluate their argument twice, so do not apply to any expressions with
 * side effects.
 */

#define rol1(x) (((x) << 1) | (((x) & 0x80000000) >> 31))
#define rol8(x) (((x) << 8) | (((x) & 0xFF000000) >> 24))
#define rol9(x) (((x) << 9) | (((x) & 0xFF800000) >> 23))
#define ror1(x) (((x) >> 1) | (((x) & 0x00000001) << 31))

/* ------------------------------------------------------------------------- */

/* The permutations q0 and q1.  These are fixed permutations on 8-bit values.
 * The permutations have been computed using the program twofish-data,
 * which is distributed along with this file.
 */

static const uint8_t q0[256] = {
  0xA9,0x67,0xB3,0xE8,0x04,0xFD,0xA3,0x76,
  0x9A,0x92,0x80,0x78,0xE4,0xDD,0xD1,0x38,
  0x0D,0xC6,0x35,0x98,0x18,0xF7,0xEC,0x6C,
  0x43,0x75,0x37,0x26,0xFA,0x13,0x94,0x48,
  0xF2,0xD0,0x8B,0x30,0x84,0x54,0xDF,0x23,
  0x19,0x5B,0x3D,0x59,0xF3,0xAE,0xA2,0x82,
  0x63,0x01,0x83,0x2E,0xD9,0x51,0x9B,0x7C,
  0xA6,0xEB,0xA5,0xBE,0x16,0x0C,0xE3,0x61,
  0xC0,0x8C,0x3A,0xF5,0x73,0x2C,0x25,0x0B,
  0xBB,0x4E,0x89,0x6B,0x53,0x6A,0xB4,0xF1,
  0xE1,0xE6,0xBD,0x45,0xE2,0xF4,0xB6,0x66,
  0xCC,0x95,0x03,0x56,0xD4,0x1C,0x1E,0xD7,
  0xFB,0xC3,0x8E,0xB5,0xE9,0xCF,0xBF,0xBA,
  0xEA,0x77,0x39,0xAF,0x33,0xC9,0x62,0x71,
  0x81,0x79,0x09,0xAD,0x24,0xCD,0xF9,0xD8,
  0xE5,0xC5,0xB9,0x4D,0x44,0x08,0x86,0xE7,
  0xA1,0x1D,0xAA,0xED,0x06,0x70,0xB2,0xD2,
  0x41,0x7B,0xA0,0x11,0x31,0xC2,0x27,0x90,
  0x20,0xF6,0x60,0xFF,0x96,0x5C,0xB1,0xAB,
  0x9E,0x9C,0x52,0x1B,0x5F,0x93,0x0A,0xEF,
  0x91,0x85,0x49,0xEE,0x2D,0x4F,0x8F,0x3B,
  0x47,0x87,0x6D,0x46,0xD6,0x3E,0x69,0x64,
  0x2A,0xCE,0xCB,0x2F,0xFC,0x97,0x05,0x7A,
  0xAC,0x7F,0xD5,0x1A,0x4B,0x0E,0xA7,0x5A,
  0x28,0x14,0x3F,0x29,0x88,0x3C,0x4C,0x02,
  0xB8,0xDA,0xB0,0x17,0x55,0x1F,0x8A,0x7D,
  0x57,0xC7,0x8D,0x74,0xB7,0xC4,0x9F,0x72,
  0x7E,0x15,0x22,0x12,0x58,0x07,0x99,0x34,
  0x6E,0x50,0xDE,0x68,0x65,0xBC,0xDB,0xF8,
  0xC8,0xA8,0x2B,0x40,0xDC,0xFE,0x32,0xA4,
  0xCA,0x10,0x21,0xF0,0xD3,0x5D,0x0F,0x00,
  0x6F,0x9D,0x36,0x42,0x4A,0x5E,0xC1,0xE0,
};

static const uint8_t q1[256] = {
  0x75,0xF3,0xC6,0xF4,0xDB,0x7B,0xFB,0xC8,
  0x4A,0xD3,0xE6,0x6B,0x45,0x7D,0xE8,0x4B,
  0xD6,0x32,0xD8,0xFD,0x37,0x71,0xF1,0xE1,
  0x30,0x0F,0xF8,0x1B,0x87,0xFA,0x06,0x3F,
  0x5E,0xBA,0xAE,0x5B,0x8A,0x00,0xBC,0x9D,
  0x6D,0xC1,0xB1,0x0E,0x80,0x5D,0xD2,0xD5,
  0xA0,0x84,0x07,0x14,0xB5,0x90,0x2C,0xA3,
  0xB2,0x73,0x4C,0x54,0x92,0x74,0x36,0x51,
  0x38,0xB0,0xBD,0x5A,0xFC,0x60,0x62,0x96,
  0x6C,0x42,0xF7,0x10,0x7C,0x28,0x27,0x8C,
  0x13,0x95,0x9C,0xC7,0x24,0x46,0x3B,0x70,
  0xCA,0xE3,0x85,0xCB,0x11,0xD0,0x93,0xB8,
  0xA6,0x83,0x20,0xFF,0x9F,0x77,0xC3,0xCC,
  0x03,0x6F,0x08,0xBF,0x40,0xE7,0x2B,0xE2,
  0x79,0x0C,0xAA,0x82,0x41,0x3A,0xEA,0xB9,
  0xE4,0x9A,0xA4,0x97,0x7E,0xDA,0x7A,0x17,
  0x66,0x94,0xA1,0x1D,0x3D,0xF0,0xDE,0xB3,
  0x0B,0x72,0xA7,0x1C,0xEF,0xD1,0x53,0x3E,
  0x8F,0x33,0x26,0x5F,0xEC,0x76,0x2A,0x49,
  0x81,0x88,0xEE,0x21,0xC4,0x1A,0xEB,0xD9,
  0xC5,0x39,0x99,0xCD,0xAD,0x31,0x8B,0x01,
  0x18,0x23,0xDD,0x1F,0x4E,0x2D,0xF9,0x48,
  0x4F,0xF2,0x65,0x8E,0x78,0x5C,0x58,0x19,
  0x8D,0xE5,0x98,0x57,0x67,0x7F,0x05,0x64,
  0xAF,0x63,0xB6,0xFE,0xF5,0xB7,0x3C,0xA5,
  0xCE,0xE9,0x68,0x44,0xE0,0x4D,0x43,0x69,
  0x29,0x2E,0xAC,0x15,0x59,0xA8,0x0A,0x9E,
  0x6E,0x47,0xDF,0x34,0x35,0x6A,0xCF,0xDC,
  0x22,0xC9,0xC0,0x9B,0x89,0xD4,0xED,0xAB,
  0x12,0xA2,0x0D,0x52,0xBB,0x02,0x2F,0xA9,
  0xD7,0x61,0x1E,0xB4,0x50,0x04,0xF6,0xC2,
  0x16,0x25,0x86,0x56,0x55,0x09,0xBE,0x91,
};

/* ------------------------------------------------------------------------- */

/* uint32_t gf_multiply(uint8_t p, uint8_t a, uint8_t b)
 *
 * Multiplication in GF(2^8). Larger return type, to avoid need for
 * type casts when the return value is shifted left.
 *
 * This function multiplies a times b in the Galois Field GF(2^8) with
 * primitive polynomial p.
 * The representation of the polynomials a, b, and p uses bits with
 * values 2^i to represent the terms x^i.  The polynomial p contains an
 * implicit term x^8.
 *
 * Note that addition and subtraction in GF(2^8) is simply the XOR
 * operation.
 */

static uint32_t
gf_multiply(uint8_t p, uint8_t a, uint8_t b)
{
  uint32_t shift  = b;
  uint8_t result = 0;
  while (a)
    {
      if (a & 1) result ^= shift;
      a = a >> 1;
      shift = shift << 1;
      if (shift & 0x100) shift ^= p;
    }
  return result;
}

/* ------------------------------------------------------------------------- */

/* The matrix RS as specified in section 4.3 the twofish paper. */

static const uint8_t rs_matrix[4][8] = {
    { 0x01, 0xA4, 0x55, 0x87, 0x5A, 0x58, 0xDB, 0x9E },
    { 0xA4, 0x56, 0x82, 0xF3, 0x1E, 0xC6, 0x68, 0xE5 },
    { 0x02, 0xA1, 0xFC, 0xC1, 0x47, 0xAE, 0x3D, 0x19 },
    { 0xA4, 0x55, 0x87, 0x5A, 0x58, 0xDB, 0x9E, 0x03 } };

/* uint32_t compute_s(uint32_t m1, uint32_t m2);
 *
 * Computes the value RS * M, where M is a byte vector composed of the
 * bytes of m1 and m2.  Arithmetic is done in GF(2^8) with primitive
 * polynomial x^8 + x^6 + x^3 + x^2 + 1.
 *
 * This function is used to compute the sub-keys S which are in turn used
 * to generate the S-boxes.
 */

static uint32_t
compute_s(uint32_t m1, uint32_t m2)
{
  uint32_t s = 0;
  int i;
  for (i = 0; i < 4; i++)
    s |=  ((  gf_multiply(0x4D, m1,       rs_matrix[i][0])
      ^ gf_multiply(0x4D, m1 >> 8,  rs_matrix[i][1])
      ^ gf_multiply(0x4D, m1 >> 16, rs_matrix[i][2])
      ^ gf_multiply(0x4D, m1 >> 24, rs_matrix[i][3])
      ^ gf_multiply(0x4D, m2,       rs_matrix[i][4])
      ^ gf_multiply(0x4D, m2 >> 8,  rs_matrix[i][5])
      ^ gf_multiply(0x4D, m2 >> 16, rs_matrix[i][6])
      ^ gf_multiply(0x4D, m2 >> 24, rs_matrix[i][7])) << (i*8));
  return s;
}

/* ------------------------------------------------------------------------- */

/* This table describes which q S-boxes are used for each byte in each stage
 * of the function h, cf. figure 2 of the twofish paper.
 */

static const uint8_t * const q_table[4][5] =
  { { q1, q1, q0, q0, q1 },
    { q0, q1, q1, q0, q0 },
    { q0, q0, q0, q1, q1 },
    { q1, q0, q1, q1, q0 } };

/* The matrix MDS as specified in section 4.3.2 of the twofish paper. */

static const uint8_t mds_matrix[4][4] = { { 0x01, 0xEF, 0x5B, 0x5B },
         { 0x5B, 0xEF, 0xEF, 0x01 },
         { 0xEF, 0x5B, 0x01, 0xEF },
         { 0xEF, 0x01, 0xEF, 0x5B } };

/* uint32_t h_uint8_t(int k, int i, uint8_t x, uint8_t l0, uint8_t l1, uint8_t l2, uint8_t l3);
 *
 * Perform the h function (section 4.3.2) on one byte.  It consists of
 * repeated applications of the q permutation, followed by a XOR with
 * part of a sub-key.  Finally, the value is multiplied by one column of
 * the MDS matrix.  To obtain the result for a full word, the results of
 * h for the individual bytes are XORed.
 *
 * k is the key size (/ 64 bits), i is the byte number (0 = LSB), x is the
 * actual byte to apply the function to; l0, l1, l2, and l3 are the
 * appropriate bytes from the subkey.  Note that only l0..l(k-1) are used.
 */

static uint32_t
h_byte(int k, int i, uint8_t x, uint8_t l0, uint8_t l1, uint8_t l2, uint8_t l3)
{
  uint8_t y = q_table[i][4][l0 ^
            q_table[i][3][l1 ^
              q_table[i][2][k == 2 ? x : l2 ^
                q_table[i][1][k == 3 ? x : l3 ^ q_table[i][0][x]]]]];

  return ( (gf_multiply(0x69, mds_matrix[0][i], y))
     | (gf_multiply(0x69, mds_matrix[1][i], y) << 8)
     | (gf_multiply(0x69, mds_matrix[2][i], y) << 16)
     | (gf_multiply(0x69, mds_matrix[3][i], y) << 24) );
}

/* uint32_t h(int k, uint8_t x, uint32_t l0, uint32_t l1, uint32_t l2, uint32_t l3);
 *
 * Perform the function h on a word.  See the description of h_byte() above.
 */

static uint32_t
h(int k, uint8_t x, uint32_t l0, uint32_t l1, uint32_t l2, uint32_t l3)
{
  return (  h_byte(k, 0, x, l0,       l1,       l2,       l3)
    ^ h_byte(k, 1, x, l0 >> 8,  l1 >> 8,  l2 >> 8,  l3 >> 8)
    ^ h_byte(k, 2, x, l0 >> 16, l1 >> 16, l2 >> 16, l3 >> 16)
    ^ h_byte(k, 3, x, l0 >> 24, l1 >> 24, l2 >> 24, l3 >> 24) );
}


/* ------------------------------------------------------------------------- */

/* API */

/* Structure which contains the tables containing the subkeys and the
 * key-dependent s-boxes.
 */


/* Set up internal tables required for twofish encryption and decryption.
 *
 * The key size is specified in bytes.  Key sizes up to 32 bytes are
 * supported.  Larger key sizes are silently truncated.  
 */

void
twofish_set_key(struct twofish_ctx *context,
    size_t keysize, const uint8_t *key)
{
  uint8_t key_copy[32];
  uint32_t m[8], s[4], t;
  int i, j, k;

  /* Extend key as necessary */

  assert(keysize <= 32);

  /* We do a little more copying than necessary, but that doesn't
   * really matter. */
  memset(key_copy, 0, 32);
  memcpy(key_copy, key, keysize);

  for (i = 0; i<8; i++)
    m[i] = LE_READ_UINT32(key_copy + i*4);
  
  if (keysize <= 16)
    k = 2;
  else if (keysize <= 24)
    k = 3;
  else
    k = 4;

  /* Compute sub-keys */

  for (i = 0; i < 20; i++)
    {
      t = h(k, 2*i+1, m[1], m[3], m[5], m[7]);
      t = rol8(t);
      t += (context->keys[2*i] =
      t + h(k, 2*i, m[0], m[2], m[4], m[6]));
      t = rol9(t);
      context->keys[2*i+1] = t;
    }

  /* Compute key-dependent S-boxes */

  for (i = 0; i < k; i++)
    s[k-1-i] = compute_s(m[2*i], m[2*i+1]);

  for (i = 0; i < 4; i++)
    for (j = 0; j < 256; j++)
      context->s_box[i][j] = h_byte(k, i, j,
            s[0] >> (i*8),
            s[1] >> (i*8),
            s[2] >> (i*8),
            s[3] >> (i*8));
}

void
twofish128_set_key(struct twofish_ctx *context, const uint8_t *key)
{
  twofish_set_key (context, TWOFISH128_KEY_SIZE, key);
}
void
twofish192_set_key(struct twofish_ctx *context, const uint8_t *key)
{
  twofish_set_key (context, TWOFISH192_KEY_SIZE, key);
}
void
twofish256_set_key(struct twofish_ctx *context, const uint8_t *key)
{
  twofish_set_key (context, TWOFISH256_KEY_SIZE, key);
}

/* Encrypt blocks of 16 bytes of data with the twofish algorithm.
 *
 * Before this function can be used, twofish_set_key() must be used in order to
 * set up various tables required for the encryption algorithm.
 * 
 * This function always encrypts 16 bytes of plaintext to 16 bytes of
 * ciphertext.  The memory areas of the plaintext and the ciphertext can
 * overlap.
 */

void
twofish_encrypt(const struct twofish_ctx *context,
    size_t length,
    uint8_t *ciphertext,
    const uint8_t *plaintext)
{
  const uint32_t * keys        = context->keys;
  const uint32_t (*s_box)[256] = context->s_box;

  assert( !(length % TWOFISH_BLOCK_SIZE) );
  for ( ; length; length -= TWOFISH_BLOCK_SIZE)
    {  
      uint32_t words[4];
      uint32_t r0, r1, r2, r3, t0, t1;
      int i;

      for (i = 0; i<4; i++, plaintext += 4)
  words[i] = LE_READ_UINT32(plaintext);

      r0 = words[0] ^ keys[0];
      r1 = words[1] ^ keys[1];
      r2 = words[2] ^ keys[2];
      r3 = words[3] ^ keys[3];
  
      for (i = 0; i < 8; i++) {
  t1 = (  s_box[1][r1 & 0xFF]
    ^ s_box[2][(r1 >> 8) & 0xFF]
    ^ s_box[3][(r1 >> 16) & 0xFF]
    ^ s_box[0][(r1 >> 24) & 0xFF]);
  t0 = (  s_box[0][r0 & 0xFF]
    ^ s_box[1][(r0 >> 8) & 0xFF]
    ^ s_box[2][(r0 >> 16) & 0xFF]
    ^ s_box[3][(r0 >> 24) & 0xFF]) + t1;
  r3 = (t1 + t0 + keys[4*i+9]) ^ rol1(r3);
  r2 = (t0 + keys[4*i+8]) ^ r2;
  r2 = ror1(r2);

  t1 = (  s_box[1][r3 & 0xFF]
    ^ s_box[2][(r3 >> 8) & 0xFF]
    ^ s_box[3][(r3 >> 16) & 0xFF]
    ^ s_box[0][(r3 >> 24) & 0xFF]);
  t0 = (  s_box[0][r2 & 0xFF]
    ^ s_box[1][(r2 >> 8) & 0xFF]
    ^ s_box[2][(r2 >> 16) & 0xFF]
    ^ s_box[3][(r2 >> 24) & 0xFF]) + t1;
  r1 = (t1 + t0 + keys[4*i+11]) ^ rol1(r1);
  r0 = (t0 + keys[4*i+10]) ^ r0;
  r0 = ror1(r0);
      }

      words[0] = r2 ^ keys[4];
      words[1] = r3 ^ keys[5];
      words[2] = r0 ^ keys[6];
      words[3] = r1 ^ keys[7];

      for (i = 0; i<4; i++, ciphertext += 4)
  LE_WRITE_UINT32(ciphertext, words[i]);
    }
}

/* Decrypt blocks of 16 bytes of data with the twofish algorithm.
 *
 * Before this function can be used, twofish_set_key() must be used in order to
 * set up various tables required for the decryption algorithm.
 * 
 * This function always decrypts 16 bytes of ciphertext to 16 bytes of
 * plaintext.  The memory areas of the plaintext and the ciphertext can
 * overlap.
 */

void
twofish_decrypt(const struct twofish_ctx *context,
    size_t length,
    uint8_t *plaintext,
    const uint8_t *ciphertext)

{
  const uint32_t *keys  = context->keys;
  const uint32_t (*s_box)[256] = context->s_box;

  assert( !(length % TWOFISH_BLOCK_SIZE) );
  for ( ; length; length -= TWOFISH_BLOCK_SIZE)
    {  
      uint32_t words[4];
      uint32_t r0, r1, r2, r3, t0, t1;
      int i;

      for (i = 0; i<4; i++, ciphertext += 4)
  words[i] = LE_READ_UINT32(ciphertext);

      r0 = words[2] ^ keys[6];
      r1 = words[3] ^ keys[7];
      r2 = words[0] ^ keys[4];
      r3 = words[1] ^ keys[5];

      for (i = 0; i < 8; i++) {
  t1 = (  s_box[1][r3 & 0xFF]
    ^ s_box[2][(r3 >> 8) & 0xFF]
    ^ s_box[3][(r3 >> 16) & 0xFF]
    ^ s_box[0][(r3 >> 24) & 0xFF]);
  t0 = (  s_box[0][r2 & 0xFF]
    ^ s_box[1][(r2 >> 8) & 0xFF]
    ^ s_box[2][(r2 >> 16) & 0xFF]
    ^ s_box[3][(r2 >> 24) & 0xFF]) + t1;
  r1 = (t1 + t0 + keys[39-4*i]) ^ r1;
  r1 = ror1(r1);
  r0 = (t0 + keys[38-4*i]) ^ rol1(r0);

  t1 = (  s_box[1][r1 & 0xFF]
    ^ s_box[2][(r1 >> 8) & 0xFF]
    ^ s_box[3][(r1 >> 16) & 0xFF]
    ^ s_box[0][(r1 >> 24) & 0xFF]);
  t0 = (  s_box[0][r0 & 0xFF]
    ^ s_box[1][(r0 >> 8) & 0xFF]
    ^ s_box[2][(r0 >> 16) & 0xFF]
    ^ s_box[3][(r0 >> 24) & 0xFF]) + t1;
  r3 = (t1 + t0 + keys[37-4*i]) ^ r3;
  r3 = ror1(r3);
  r2 = (t0 + keys[36-4*i]) ^ rol1(r2);
      }

      words[0] = r0 ^ keys[0];
      words[1] = r1 ^ keys[1];
      words[2] = r2 ^ keys[2];
      words[3] = r3 ^ keys[3];

      for (i = 0; i<4; i++, plaintext += 4)
  LE_WRITE_UINT32(plaintext, words[i]);
    }
}

Coverage Report

Created: 2023-09-25 06:33

Line	Count	Source (jump to first uncovered line)
1		/* twofish.c
2
3		The twofish block cipher.
4
5		Copyright (C) 2001, 2014 Niels Möller
6		Copyright (C) 1999 Ruud de Rooij <ruud@debian.org>
7
8		Modifications for lsh, integrated testing
9		Copyright (C) 1999 J.H.M. Dassen (Ray) <jdassen@wi.LeidenUniv.nl>
10
11		This file is part of GNU Nettle.
12
13		GNU Nettle is free software: you can redistribute it and/or
14		modify it under the terms of either:
15
16		* the GNU Lesser General Public License as published by the Free
17		Software Foundation; either version 3 of the License, or (at your
18		option) any later version.
19
20		or
21
22		* the GNU General Public License as published by the Free
23		Software Foundation; either version 2 of the License, or (at your
24		option) any later version.
25
26		or both in parallel, as here.
27
28		GNU Nettle is distributed in the hope that it will be useful,
29		but WITHOUT ANY WARRANTY; without even the implied warranty of
30		MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
31		General Public License for more details.
32
33		You should have received copies of the GNU General Public License and
34		the GNU Lesser General Public License along with this program. If
35		not, see http://www.gnu.org/licenses/.
36		*/
37
38		#if HAVE_CONFIG_H
39		# include "config.h"
40		#endif
41
42		#include <assert.h>
43		#include <string.h>
44
45		#include "twofish.h"
46
47		#include "macros.h"
48
49		/* Bitwise rotations on 32-bit words. These are defined as macros that
50		* evaluate their argument twice, so do not apply to any expressions with
51		* side effects.
52		*/
53
54	1.02k	#define rol1(x) (((x) << 1) \| (((x) & 0x80000000) >> 31))
55	1.24k	#define rol8(x) (((x) << 8) \| (((x) & 0xFF000000) >> 24))
56	1.24k	#define rol9(x) (((x) << 9) \| (((x) & 0xFF800000) >> 23))
57	1.02k	#define ror1(x) (((x) >> 1) \| (((x) & 0x00000001) << 31))
58
59		/* ------------------------------------------------------------------------- */
60
61		/* The permutations q0 and q1. These are fixed permutations on 8-bit values.
62		* The permutations have been computed using the program twofish-data,
63		* which is distributed along with this file.
64		*/
65
66		static const uint8_t q0[256] = {
67		0xA9,0x67,0xB3,0xE8,0x04,0xFD,0xA3,0x76,
68		0x9A,0x92,0x80,0x78,0xE4,0xDD,0xD1,0x38,
69		0x0D,0xC6,0x35,0x98,0x18,0xF7,0xEC,0x6C,
70		0x43,0x75,0x37,0x26,0xFA,0x13,0x94,0x48,
71		0xF2,0xD0,0x8B,0x30,0x84,0x54,0xDF,0x23,
72		0x19,0x5B,0x3D,0x59,0xF3,0xAE,0xA2,0x82,
73		0x63,0x01,0x83,0x2E,0xD9,0x51,0x9B,0x7C,
74		0xA6,0xEB,0xA5,0xBE,0x16,0x0C,0xE3,0x61,
75		0xC0,0x8C,0x3A,0xF5,0x73,0x2C,0x25,0x0B,
76		0xBB,0x4E,0x89,0x6B,0x53,0x6A,0xB4,0xF1,
77		0xE1,0xE6,0xBD,0x45,0xE2,0xF4,0xB6,0x66,
78		0xCC,0x95,0x03,0x56,0xD4,0x1C,0x1E,0xD7,
79		0xFB,0xC3,0x8E,0xB5,0xE9,0xCF,0xBF,0xBA,
80		0xEA,0x77,0x39,0xAF,0x33,0xC9,0x62,0x71,
81		0x81,0x79,0x09,0xAD,0x24,0xCD,0xF9,0xD8,
82		0xE5,0xC5,0xB9,0x4D,0x44,0x08,0x86,0xE7,
83		0xA1,0x1D,0xAA,0xED,0x06,0x70,0xB2,0xD2,
84		0x41,0x7B,0xA0,0x11,0x31,0xC2,0x27,0x90,
85		0x20,0xF6,0x60,0xFF,0x96,0x5C,0xB1,0xAB,
86		0x9E,0x9C,0x52,0x1B,0x5F,0x93,0x0A,0xEF,
87		0x91,0x85,0x49,0xEE,0x2D,0x4F,0x8F,0x3B,
88		0x47,0x87,0x6D,0x46,0xD6,0x3E,0x69,0x64,
89		0x2A,0xCE,0xCB,0x2F,0xFC,0x97,0x05,0x7A,
90		0xAC,0x7F,0xD5,0x1A,0x4B,0x0E,0xA7,0x5A,
91		0x28,0x14,0x3F,0x29,0x88,0x3C,0x4C,0x02,
92		0xB8,0xDA,0xB0,0x17,0x55,0x1F,0x8A,0x7D,
93		0x57,0xC7,0x8D,0x74,0xB7,0xC4,0x9F,0x72,
94		0x7E,0x15,0x22,0x12,0x58,0x07,0x99,0x34,
95		0x6E,0x50,0xDE,0x68,0x65,0xBC,0xDB,0xF8,
96		0xC8,0xA8,0x2B,0x40,0xDC,0xFE,0x32,0xA4,
97		0xCA,0x10,0x21,0xF0,0xD3,0x5D,0x0F,0x00,
98		0x6F,0x9D,0x36,0x42,0x4A,0x5E,0xC1,0xE0,
99		};
100
101		static const uint8_t q1[256] = {
102		0x75,0xF3,0xC6,0xF4,0xDB,0x7B,0xFB,0xC8,
103		0x4A,0xD3,0xE6,0x6B,0x45,0x7D,0xE8,0x4B,
104		0xD6,0x32,0xD8,0xFD,0x37,0x71,0xF1,0xE1,
105		0x30,0x0F,0xF8,0x1B,0x87,0xFA,0x06,0x3F,
106		0x5E,0xBA,0xAE,0x5B,0x8A,0x00,0xBC,0x9D,
107		0x6D,0xC1,0xB1,0x0E,0x80,0x5D,0xD2,0xD5,
108		0xA0,0x84,0x07,0x14,0xB5,0x90,0x2C,0xA3,
109		0xB2,0x73,0x4C,0x54,0x92,0x74,0x36,0x51,
110		0x38,0xB0,0xBD,0x5A,0xFC,0x60,0x62,0x96,
111		0x6C,0x42,0xF7,0x10,0x7C,0x28,0x27,0x8C,
112		0x13,0x95,0x9C,0xC7,0x24,0x46,0x3B,0x70,
113		0xCA,0xE3,0x85,0xCB,0x11,0xD0,0x93,0xB8,
114		0xA6,0x83,0x20,0xFF,0x9F,0x77,0xC3,0xCC,
115		0x03,0x6F,0x08,0xBF,0x40,0xE7,0x2B,0xE2,
116		0x79,0x0C,0xAA,0x82,0x41,0x3A,0xEA,0xB9,
117		0xE4,0x9A,0xA4,0x97,0x7E,0xDA,0x7A,0x17,
118		0x66,0x94,0xA1,0x1D,0x3D,0xF0,0xDE,0xB3,
119		0x0B,0x72,0xA7,0x1C,0xEF,0xD1,0x53,0x3E,
120		0x8F,0x33,0x26,0x5F,0xEC,0x76,0x2A,0x49,
121		0x81,0x88,0xEE,0x21,0xC4,0x1A,0xEB,0xD9,
122		0xC5,0x39,0x99,0xCD,0xAD,0x31,0x8B,0x01,
123		0x18,0x23,0xDD,0x1F,0x4E,0x2D,0xF9,0x48,
124		0x4F,0xF2,0x65,0x8E,0x78,0x5C,0x58,0x19,
125		0x8D,0xE5,0x98,0x57,0x67,0x7F,0x05,0x64,
126		0xAF,0x63,0xB6,0xFE,0xF5,0xB7,0x3C,0xA5,
127		0xCE,0xE9,0x68,0x44,0xE0,0x4D,0x43,0x69,
128		0x29,0x2E,0xAC,0x15,0x59,0xA8,0x0A,0x9E,
129		0x6E,0x47,0xDF,0x34,0x35,0x6A,0xCF,0xDC,
130		0x22,0xC9,0xC0,0x9B,0x89,0xD4,0xED,0xAB,
131		0x12,0xA2,0x0D,0x52,0xBB,0x02,0x2F,0xA9,
132		0xD7,0x61,0x1E,0xB4,0x50,0x04,0xF6,0xC2,
133		0x16,0x25,0x86,0x56,0x55,0x09,0xBE,0x91,
134		};
135
136		/* ------------------------------------------------------------------------- */
137
138		/* uint32_t gf_multiply(uint8_t p, uint8_t a, uint8_t b)
139		*
140		* Multiplication in GF(2^8). Larger return type, to avoid need for
141		* type casts when the return value is shifted left.
142		*
143		* This function multiplies a times b in the Galois Field GF(2^8) with
144		* primitive polynomial p.
145		* The representation of the polynomials a, b, and p uses bits with
146		* values 2^i to represent the terms x^i. The polynomial p contains an
147		* implicit term x^8.
148		*
149		* Note that addition and subtraction in GF(2^8) is simply the XOR
150		* operation.
151		*/
152
153		static uint32_t
154		gf_multiply(uint8_t p, uint8_t a, uint8_t b)
155	300k	{
156	300k	uint32_t shift = b;
157	300k	uint8_t result = 0;
158	2.07M	while (a)
159	1.77M	{
160	1.77M	if (a & 1) result ^= shift;
161	1.77M	a = a >> 1;
162	1.77M	shift = shift << 1;
163	1.77M	if (shift & 0x100) shift ^= p;
164	1.77M	}
165	300k	return result;
166	300k	}
167
168		/* ------------------------------------------------------------------------- */
169
170		/* The matrix RS as specified in section 4.3 the twofish paper. */
171
172		static const uint8_t rs_matrix[4][8] = {
173		{ 0x01, 0xA4, 0x55, 0x87, 0x5A, 0x58, 0xDB, 0x9E },
174		{ 0xA4, 0x56, 0x82, 0xF3, 0x1E, 0xC6, 0x68, 0xE5 },
175		{ 0x02, 0xA1, 0xFC, 0xC1, 0x47, 0xAE, 0x3D, 0x19 },
176		{ 0xA4, 0x55, 0x87, 0x5A, 0x58, 0xDB, 0x9E, 0x03 } };
177
178		/* uint32_t compute_s(uint32_t m1, uint32_t m2);
179		*
180		* Computes the value RS * M, where M is a byte vector composed of the
181		* bytes of m1 and m2. Arithmetic is done in GF(2^8) with primitive
182		* polynomial x^8 + x^6 + x^3 + x^2 + 1.
183		*
184		* This function is used to compute the sub-keys S which are in turn used
185		* to generate the S-boxes.
186		*/
187
188		static uint32_t
189		compute_s(uint32_t m1, uint32_t m2)
190	224	{
191	224	uint32_t s = 0;
192	224	int i;
193	1.12k	for (i = 0; i < 4; i++)
194	896	s \|= (( gf_multiply(0x4D, m1, rs_matrix[i][0])
195	896	^ gf_multiply(0x4D, m1 >> 8, rs_matrix[i][1])
196	896	^ gf_multiply(0x4D, m1 >> 16, rs_matrix[i][2])
197	896	^ gf_multiply(0x4D, m1 >> 24, rs_matrix[i][3])
198	896	^ gf_multiply(0x4D, m2, rs_matrix[i][4])
199	896	^ gf_multiply(0x4D, m2 >> 8, rs_matrix[i][5])
200	896	^ gf_multiply(0x4D, m2 >> 16, rs_matrix[i][6])
201	896	^ gf_multiply(0x4D, m2 >> 24, rs_matrix[i][7])) << (i*8));
202	224	return s;
203	224	}
204
205		/* ------------------------------------------------------------------------- */
206
207		/* This table describes which q S-boxes are used for each byte in each stage
208		* of the function h, cf. figure 2 of the twofish paper.
209		*/
210
211		static const uint8_t * const q_table[4][5] =
212		{ { q1, q1, q0, q0, q1 },
213		{ q0, q1, q1, q0, q0 },
214		{ q0, q0, q0, q1, q1 },
215		{ q1, q0, q1, q1, q0 } };
216
217		/* The matrix MDS as specified in section 4.3.2 of the twofish paper. */
218
219		static const uint8_t mds_matrix[4][4] = { { 0x01, 0xEF, 0x5B, 0x5B },
220		{ 0x5B, 0xEF, 0xEF, 0x01 },
221		{ 0xEF, 0x5B, 0x01, 0xEF },
222		{ 0xEF, 0x01, 0xEF, 0x5B } };
223
224		/* uint32_t h_uint8_t(int k, int i, uint8_t x, uint8_t l0, uint8_t l1, uint8_t l2, uint8_t l3);
225		*
226		* Perform the h function (section 4.3.2) on one byte. It consists of
227		* repeated applications of the q permutation, followed by a XOR with
228		* part of a sub-key. Finally, the value is multiplied by one column of
229		* the MDS matrix. To obtain the result for a full word, the results of
230		* h for the individual bytes are XORed.
231		*
232		* k is the key size (/ 64 bits), i is the byte number (0 = LSB), x is the
233		* actual byte to apply the function to; l0, l1, l2, and l3 are the
234		* appropriate bytes from the subkey. Note that only l0..l(k-1) are used.
235		*/
236
237		static uint32_t
238		h_byte(int k, int i, uint8_t x, uint8_t l0, uint8_t l1, uint8_t l2, uint8_t l3)
239	73.4k	{
240	73.4k	uint8_t y = q_table[i][4][l0 ^
241	73.4k	q_table[i][3][l1 ^
242	73.4k	q_table[i][2][k == 2 ? x : l2 ^
243	61.5k	q_table[i][1][k == 3 ? x : l3 ^ q_table[i][0][x]]]]];
244
245	73.4k	return ( (gf_multiply(0x69, mds_matrix[0][i], y))
246	73.4k	\| (gf_multiply(0x69, mds_matrix[1][i], y) << 8)
247	73.4k	\| (gf_multiply(0x69, mds_matrix[2][i], y) << 16)
248	73.4k	\| (gf_multiply(0x69, mds_matrix[3][i], y) << 24) );
249	73.4k	}
250
251		/* uint32_t h(int k, uint8_t x, uint32_t l0, uint32_t l1, uint32_t l2, uint32_t l3);
252		*
253		* Perform the function h on a word. See the description of h_byte() above.
254		*/
255
256		static uint32_t
257		h(int k, uint8_t x, uint32_t l0, uint32_t l1, uint32_t l2, uint32_t l3)
258	2.48k	{
259	2.48k	return ( h_byte(k, 0, x, l0, l1, l2, l3)
260	2.48k	^ h_byte(k, 1, x, l0 >> 8, l1 >> 8, l2 >> 8, l3 >> 8)
261	2.48k	^ h_byte(k, 2, x, l0 >> 16, l1 >> 16, l2 >> 16, l3 >> 16)
262	2.48k	^ h_byte(k, 3, x, l0 >> 24, l1 >> 24, l2 >> 24, l3 >> 24) );
263	2.48k	}
264
265
266		/* ------------------------------------------------------------------------- */
267
268		/* API */
269
270		/* Structure which contains the tables containing the subkeys and the
271		* key-dependent s-boxes.
272		*/
273
274
275		/* Set up internal tables required for twofish encryption and decryption.
276		*
277		* The key size is specified in bytes. Key sizes up to 32 bytes are
278		* supported. Larger key sizes are silently truncated.
279		*/
280
281		void
282		twofish_set_key(struct twofish_ctx *context,
283		size_t keysize, const uint8_t *key)
284	62	{
285	62	uint8_t key_copy[32];
286	62	uint32_t m[8], s[4], t;
287	62	int i, j, k;
288
289		/* Extend key as necessary */
290
291	62	assert(keysize <= 32);
292
293		/* We do a little more copying than necessary, but that doesn't
294		* really matter. */
295	62	memset(key_copy, 0, 32);
296	62	memcpy(key_copy, key, keysize);
297
298	558	for (i = 0; i<8; i++)
299	496	m[i] = LE_READ_UINT32(key_copy + i*4);
300
301	62	if (keysize <= 16)
302	10	k = 2;
303	52	else if (keysize <= 24)
304	4	k = 3;
305	48	else
306	48	k = 4;
307
308		/* Compute sub-keys */
309
310	1.30k	for (i = 0; i < 20; i++)
311	1.24k	{
312	1.24k	t = h(k, 2*i+1, m[1], m[3], m[5], m[7]);
313	1.24k	t = rol8(t);
314	1.24k	t += (context->keys[2*i] =
315	1.24k	t + h(k, 2*i, m[0], m[2], m[4], m[6]));
316	1.24k	t = rol9(t);
317	1.24k	context->keys[2*i+1] = t;
318	1.24k	}
319
320		/* Compute key-dependent S-boxes */
321
322	286	for (i = 0; i < k; i++)
323	224	s[k-1-i] = compute_s(m[2i], m[2i+1]);
324
325	310	for (i = 0; i < 4; i++)
326	63.7k	for (j = 0; j < 256; j++)
327	63.4k	context->s_box[i][j] = h_byte(k, i, j,
328	63.4k	s[0] >> (i*8),
329	63.4k	s[1] >> (i*8),
330	63.4k	s[2] >> (i*8),
331	63.4k	s[3] >> (i*8));
332	62	}
333
334		void
335		twofish128_set_key(struct twofish_ctx context, const uint8_t key)
336	0	{
337	0	twofish_set_key (context, TWOFISH128_KEY_SIZE, key);
338	0	}
339		void
340		twofish192_set_key(struct twofish_ctx context, const uint8_t key)
341	0	{
342	0	twofish_set_key (context, TWOFISH192_KEY_SIZE, key);
343	0	}
344		void
345		twofish256_set_key(struct twofish_ctx context, const uint8_t key)
346	0	{
347	0	twofish_set_key (context, TWOFISH256_KEY_SIZE, key);
348	0	}
349
350		/* Encrypt blocks of 16 bytes of data with the twofish algorithm.
351		*
352		* Before this function can be used, twofish_set_key() must be used in order to
353		* set up various tables required for the encryption algorithm.
354		*
355		* This function always encrypts 16 bytes of plaintext to 16 bytes of
356		* ciphertext. The memory areas of the plaintext and the ciphertext can
357		* overlap.
358		*/
359
360		void
361		twofish_encrypt(const struct twofish_ctx *context,
362		size_t length,
363		uint8_t *ciphertext,
364		const uint8_t *plaintext)
365	37	{
366	37	const uint32_t * keys = context->keys;
367	37	const uint32_t (*s_box)[256] = context->s_box;
368
369	37	assert( !(length % TWOFISH_BLOCK_SIZE) );
370	69	for ( ; length; length -= TWOFISH_BLOCK_SIZE)
371	32	{
372	32	uint32_t words[4];
373	32	uint32_t r0, r1, r2, r3, t0, t1;
374	32	int i;
375
376	160	for (i = 0; i<4; i++, plaintext += 4)
377	128	words[i] = LE_READ_UINT32(plaintext);
378
379	32	r0 = words[0] ^ keys[0];
380	32	r1 = words[1] ^ keys[1];
381	32	r2 = words[2] ^ keys[2];
382	32	r3 = words[3] ^ keys[3];
383
384	288	for (i = 0; i < 8; i++) {
385	256	t1 = ( s_box[1][r1 & 0xFF]
386	256	^ s_box[2][(r1 >> 8) & 0xFF]
387	256	^ s_box[3][(r1 >> 16) & 0xFF]
388	256	^ s_box[0][(r1 >> 24) & 0xFF]);
389	256	t0 = ( s_box[0][r0 & 0xFF]
390	256	^ s_box[1][(r0 >> 8) & 0xFF]
391	256	^ s_box[2][(r0 >> 16) & 0xFF]
392	256	^ s_box[3][(r0 >> 24) & 0xFF]) + t1;
393	256	r3 = (t1 + t0 + keys[4*i+9]) ^ rol1(r3);
394	256	r2 = (t0 + keys[4*i+8]) ^ r2;
395	256	r2 = ror1(r2);
396
397	256	t1 = ( s_box[1][r3 & 0xFF]
398	256	^ s_box[2][(r3 >> 8) & 0xFF]
399	256	^ s_box[3][(r3 >> 16) & 0xFF]
400	256	^ s_box[0][(r3 >> 24) & 0xFF]);
401	256	t0 = ( s_box[0][r2 & 0xFF]
402	256	^ s_box[1][(r2 >> 8) & 0xFF]
403	256	^ s_box[2][(r2 >> 16) & 0xFF]
404	256	^ s_box[3][(r2 >> 24) & 0xFF]) + t1;
405	256	r1 = (t1 + t0 + keys[4*i+11]) ^ rol1(r1);
406	256	r0 = (t0 + keys[4*i+10]) ^ r0;
407	256	r0 = ror1(r0);
408	256	}
409
410	32	words[0] = r2 ^ keys[4];
411	32	words[1] = r3 ^ keys[5];
412	32	words[2] = r0 ^ keys[6];
413	32	words[3] = r1 ^ keys[7];
414
415	160	for (i = 0; i<4; i++, ciphertext += 4)
416	128	LE_WRITE_UINT32(ciphertext, words[i]);
417	32	}
418	37	}
419
420		/* Decrypt blocks of 16 bytes of data with the twofish algorithm.
421		*
422		* Before this function can be used, twofish_set_key() must be used in order to
423		* set up various tables required for the decryption algorithm.
424		*
425		* This function always decrypts 16 bytes of ciphertext to 16 bytes of
426		* plaintext. The memory areas of the plaintext and the ciphertext can
427		* overlap.
428		*/
429
430		void
431		twofish_decrypt(const struct twofish_ctx *context,
432		size_t length,
433		uint8_t *plaintext,
434		const uint8_t *ciphertext)
435
436	25	{
437	25	const uint32_t *keys = context->keys;
438	25	const uint32_t (*s_box)[256] = context->s_box;
439
440	25	assert( !(length % TWOFISH_BLOCK_SIZE) );
441	57	for ( ; length; length -= TWOFISH_BLOCK_SIZE)
442	32	{
443	32	uint32_t words[4];
444	32	uint32_t r0, r1, r2, r3, t0, t1;
445	32	int i;
446
447	160	for (i = 0; i<4; i++, ciphertext += 4)
448	128	words[i] = LE_READ_UINT32(ciphertext);
449
450	32	r0 = words[2] ^ keys[6];
451	32	r1 = words[3] ^ keys[7];
452	32	r2 = words[0] ^ keys[4];
453	32	r3 = words[1] ^ keys[5];
454
455	288	for (i = 0; i < 8; i++) {
456	256	t1 = ( s_box[1][r3 & 0xFF]
457	256	^ s_box[2][(r3 >> 8) & 0xFF]
458	256	^ s_box[3][(r3 >> 16) & 0xFF]
459	256	^ s_box[0][(r3 >> 24) & 0xFF]);
460	256	t0 = ( s_box[0][r2 & 0xFF]
461	256	^ s_box[1][(r2 >> 8) & 0xFF]
462	256	^ s_box[2][(r2 >> 16) & 0xFF]
463	256	^ s_box[3][(r2 >> 24) & 0xFF]) + t1;
464	256	r1 = (t1 + t0 + keys[39-4*i]) ^ r1;
465	256	r1 = ror1(r1);
466	256	r0 = (t0 + keys[38-4*i]) ^ rol1(r0);
467
468	256	t1 = ( s_box[1][r1 & 0xFF]
469	256	^ s_box[2][(r1 >> 8) & 0xFF]
470	256	^ s_box[3][(r1 >> 16) & 0xFF]
471	256	^ s_box[0][(r1 >> 24) & 0xFF]);
472	256	t0 = ( s_box[0][r0 & 0xFF]
473	256	^ s_box[1][(r0 >> 8) & 0xFF]
474	256	^ s_box[2][(r0 >> 16) & 0xFF]
475	256	^ s_box[3][(r0 >> 24) & 0xFF]) + t1;
476	256	r3 = (t1 + t0 + keys[37-4*i]) ^ r3;
477	256	r3 = ror1(r3);
478	256	r2 = (t0 + keys[36-4*i]) ^ rol1(r2);
479	256	}
480
481	32	words[0] = r0 ^ keys[0];
482	32	words[1] = r1 ^ keys[1];
483	32	words[2] = r2 ^ keys[2];
484	32	words[3] = r3 ^ keys[3];
485
486	160	for (i = 0; i<4; i++, plaintext += 4)
487	128	LE_WRITE_UINT32(plaintext, words[i]);
488	32	}
489	25	}