/src/nettle/aes-encrypt-internal.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* aes-encrypt-internal.c |
2 | | |
3 | | Encryption function for the aes/rijndael block cipher. |
4 | | |
5 | | Copyright (C) 2002, 2013 Niels Möller |
6 | | |
7 | | This file is part of GNU Nettle. |
8 | | |
9 | | GNU Nettle is free software: you can redistribute it and/or |
10 | | modify it under the terms of either: |
11 | | |
12 | | * the GNU Lesser General Public License as published by the Free |
13 | | Software Foundation; either version 3 of the License, or (at your |
14 | | option) any later version. |
15 | | |
16 | | or |
17 | | |
18 | | * the GNU General Public License as published by the Free |
19 | | Software Foundation; either version 2 of the License, or (at your |
20 | | option) any later version. |
21 | | |
22 | | or both in parallel, as here. |
23 | | |
24 | | GNU Nettle is distributed in the hope that it will be useful, |
25 | | but WITHOUT ANY WARRANTY; without even the implied warranty of |
26 | | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
27 | | General Public License for more details. |
28 | | |
29 | | You should have received copies of the GNU General Public License and |
30 | | the GNU Lesser General Public License along with this program. If |
31 | | not, see http://www.gnu.org/licenses/. |
32 | | */ |
33 | | |
34 | | #if HAVE_CONFIG_H |
35 | | # include "config.h" |
36 | | #endif |
37 | | |
38 | | #include <assert.h> |
39 | | |
40 | | #include "aes-internal.h" |
41 | | #include "macros.h" |
42 | | |
43 | | /* For fat builds */ |
44 | | #if HAVE_NATIVE_aes_encrypt |
45 | | void |
46 | | _nettle_aes_encrypt_c(unsigned rounds, const uint32_t *keys, |
47 | | const struct aes_table *T, |
48 | | size_t length, uint8_t *dst, |
49 | | const uint8_t *src); |
50 | | #define _nettle_aes_encrypt _nettle_aes_encrypt_c |
51 | | #endif |
52 | | |
53 | | void |
54 | | _nettle_aes_encrypt(unsigned rounds, const uint32_t *keys, |
55 | | const struct aes_table *T, |
56 | | size_t length, uint8_t *dst, |
57 | | const uint8_t *src) |
58 | 0 | { |
59 | 0 | FOR_BLOCKS(length, dst, src, AES_BLOCK_SIZE) |
60 | 0 | { |
61 | 0 | uint32_t w0, w1, w2, w3; /* working ciphertext */ |
62 | 0 | uint32_t t0, t1, t2, t3; |
63 | 0 | unsigned i; |
64 | | |
65 | | /* Get clear text, using little-endian byte order. |
66 | | * Also XOR with the first subkey. */ |
67 | |
|
68 | 0 | w0 = LE_READ_UINT32(src) ^ keys[0]; |
69 | 0 | w1 = LE_READ_UINT32(src + 4) ^ keys[1]; |
70 | 0 | w2 = LE_READ_UINT32(src + 8) ^ keys[2]; |
71 | 0 | w3 = LE_READ_UINT32(src + 12) ^ keys[3]; |
72 | |
|
73 | 0 | for (i = 1; i < rounds; i++) |
74 | 0 | { |
75 | 0 | t0 = AES_ROUND(T, w0, w1, w2, w3, keys[4*i]); |
76 | 0 | t1 = AES_ROUND(T, w1, w2, w3, w0, keys[4*i + 1]); |
77 | 0 | t2 = AES_ROUND(T, w2, w3, w0, w1, keys[4*i + 2]); |
78 | 0 | t3 = AES_ROUND(T, w3, w0, w1, w2, keys[4*i + 3]); |
79 | | |
80 | | /* We could unroll the loop twice, to avoid these |
81 | | assignments. If all eight variables fit in registers, |
82 | | that should give a slight speedup. */ |
83 | 0 | w0 = t0; |
84 | 0 | w1 = t1; |
85 | 0 | w2 = t2; |
86 | 0 | w3 = t3; |
87 | 0 | } |
88 | | |
89 | | /* Final round */ |
90 | |
|
91 | 0 | t0 = AES_FINAL_ROUND(T, w0, w1, w2, w3, keys[4*i]); |
92 | 0 | t1 = AES_FINAL_ROUND(T, w1, w2, w3, w0, keys[4*i + 1]); |
93 | 0 | t2 = AES_FINAL_ROUND(T, w2, w3, w0, w1, keys[4*i + 2]); |
94 | 0 | t3 = AES_FINAL_ROUND(T, w3, w0, w1, w2, keys[4*i + 3]); |
95 | |
|
96 | 0 | LE_WRITE_UINT32(dst, t0); |
97 | 0 | LE_WRITE_UINT32(dst + 4, t1); |
98 | 0 | LE_WRITE_UINT32(dst + 8, t2); |
99 | 0 | LE_WRITE_UINT32(dst + 12, t3); |
100 | 0 | } |
101 | 0 | } |
102 | | |
103 | | /* Some stats, all for AES 128: |
104 | | |
105 | | A. Table-driven indexing (the approach of the old unified |
106 | | _aes_crypt function). |
107 | | B. Unrolling the j-loop. |
108 | | |
109 | | C. Eliminated the use of IDXk(j) in the main loop. |
110 | | |
111 | | D. Put wtxt in four scalar variables. |
112 | | |
113 | | E. Also put t in four scalar variables. |
114 | | |
115 | | P4 2.2 GHz AMD Duron 1.4GHz |
116 | | |
117 | | MB/s code size |
118 | | A 35.9 0x202 17 MB/s |
119 | | B 37.3 0x334 |
120 | | C 33.0 0x2a7 |
121 | | D 40.7 0x3f9 |
122 | | E 42.9 0x44a 26 MB/s |
123 | | */ |