Line | Count | Source (jump to first uncovered line) |
1 | | /* desCode.h |
2 | | * |
3 | | */ |
4 | | |
5 | | /* des - fast & portable DES encryption & decryption. |
6 | | * Copyright (C) 1992 Dana L. How |
7 | | * Please see the file `descore.README' for the complete copyright notice. |
8 | | */ |
9 | | |
10 | | #include "des.h" |
11 | | |
12 | | /* optional customization: |
13 | | * the idea here is to alter the code so it will still run correctly |
14 | | * on any machine, but the quickest on the specific machine in mind. |
15 | | * note that these silly tweaks can give you a 15%-20% speed improvement |
16 | | * on the sparc -- it's probably even more significant on the 68000. */ |
17 | | |
18 | | /* take care of machines with incredibly few registers */ |
19 | | #if defined(i386) |
20 | | #define REGISTER /* only x, y, z will be declared register */ |
21 | | #else |
22 | | #define REGISTER register |
23 | | #endif /* i386 */ |
24 | | |
25 | | /* is auto inc/dec faster than 7bit unsigned indexing? */ |
26 | | #if defined(vax) || defined(mc68000) |
27 | | #define FIXR r += 32; |
28 | | #define FIXS s += 8; |
29 | | #define PREV(v,o) *--v |
30 | | #define NEXT(v,o) *v++ |
31 | | #else |
32 | | #define FIXR |
33 | | #define FIXS |
34 | 0 | #define PREV(v,o) v[o] |
35 | 0 | #define NEXT(v,o) v[o] |
36 | | #endif |
37 | | |
38 | | /* if no machine type, default is indexing, 6 registers and cheap literals */ |
39 | | #if !defined(i386) && !defined(vax) && !defined(mc68000) && !defined(sparc) |
40 | | #define vax |
41 | | #endif |
42 | | |
43 | | |
44 | | /* handle a compiler which can't reallocate registers */ |
45 | | /* The BYTE type is used as parameter for the encrypt/decrypt functions. |
46 | | * It's pretty bad to have the function prototypes depend on |
47 | | * a macro definition that the users of the function doesn't |
48 | | * know about. /Niels */ |
49 | | #if 0 /* didn't feel like deleting */ |
50 | | #define SREGFREE ; s = (uint8_t *) D |
51 | | #define DEST s |
52 | | #define D m0 |
53 | | #define BYTE uint32_t |
54 | | #else |
55 | | #define SREGFREE |
56 | | #define DEST d |
57 | | #define D d |
58 | | #define BYTE uint8_t |
59 | | #endif |
60 | | |
61 | | /* handle constants in the optimal way for 386 & vax */ |
62 | | /* 386: we declare 3 register variables (see above) and use 3 more variables; |
63 | | * vax: we use 6 variables, all declared register; |
64 | | * we assume address literals are cheap & unrestricted; |
65 | | * we assume immediate constants are cheap & unrestricted. */ |
66 | | #if defined(i386) || defined(vax) |
67 | | #define MQ0 des_bigmap |
68 | | #define MQ1 (des_bigmap + 64) |
69 | | #define MQ2 (des_bigmap + 128) |
70 | | #define MQ3 (des_bigmap + 192) |
71 | | #define HQ0(z) /* z |= 0x01000000L; */ |
72 | | #define HQ2(z) /* z |= 0x03000200L; */ |
73 | | #define LQ0(z) 0xFCFC & z |
74 | | #define LQ1(z) 0xFCFC & z |
75 | | #define LQ2(z) 0xFCFC & z |
76 | | #define LQ3(z) 0xFCFC & z |
77 | | #define SQ 16 |
78 | | #define MS0 des_keymap |
79 | | #define MS1 (des_keymap + 64) |
80 | | #define MS2 (des_keymap + 128) |
81 | | #define MS3 (des_keymap + 192) |
82 | | #define MS4 (des_keymap + 256) |
83 | | #define MS5 (des_keymap + 320) |
84 | | #define MS6 (des_keymap + 384) |
85 | | #define MS7 (des_keymap + 448) |
86 | | #define HS(z) |
87 | | #define LS0(z) 0xFC & z |
88 | | #define LS1(z) 0xFC & z |
89 | | #define LS2(z) 0xFC & z |
90 | | #define LS3(z) 0xFC & z |
91 | | #define REGQUICK |
92 | | #define SETQUICK |
93 | | #define REGSMALL |
94 | | #define SETSMALL |
95 | | #endif /* defined(i386) || defined(vax) */ |
96 | | |
97 | | /* handle constants in the optimal way for mc68000 */ |
98 | | /* in addition to the core 6 variables, we declare 3 registers holding constants |
99 | | * and 4 registers holding address literals. |
100 | | * at most 6 data values and 5 address values are actively used at once. |
101 | | * we assume address literals are so expensive we never use them; |
102 | | * we assume constant index offsets > 127 are expensive, so they are not used. |
103 | | * we assume all constants are expensive and put them in registers, |
104 | | * including shift counts greater than 8. */ |
105 | | #if defined(mc68000) |
106 | | #define MQ0 m0 |
107 | | #define MQ1 m1 |
108 | | #define MQ2 m2 |
109 | | #define MQ3 m3 |
110 | | #define HQ0(z) |
111 | | #define HQ2(z) |
112 | | #define LQ0(z) k0 & z |
113 | | #define LQ1(z) k0 & z |
114 | | #define LQ2(z) k0 & z |
115 | | #define LQ3(z) k0 & z |
116 | | #define SQ k1 |
117 | | #define MS0 m0 |
118 | | #define MS1 m0 |
119 | | #define MS2 m1 |
120 | | #define MS3 m1 |
121 | | #define MS4 m2 |
122 | | #define MS5 m2 |
123 | | #define MS6 m3 |
124 | | #define MS7 m3 |
125 | | #define HS(z) z |= k0; |
126 | | #define LS0(z) k1 & z |
127 | | #define LS1(z) k2 & z |
128 | | #define LS2(z) k1 & z |
129 | | #define LS3(z) k2 & z |
130 | | #define REGQUICK \ |
131 | | register uint32_t k0, k1; \ |
132 | | register uint32_t *m0, *m1, *m2, *m3; |
133 | | #define SETQUICK \ |
134 | | ; k0 = 0xFCFC \ |
135 | | ; k1 = 16 \ |
136 | | /*k2 = 28 to speed up ROL */ \ |
137 | | ; m0 = des_bigmap \ |
138 | | ; m1 = m0 + 64 \ |
139 | | ; m2 = m1 + 64 \ |
140 | | ; m3 = m2 + 64 |
141 | | #define REGSMALL \ |
142 | | register uint32_t k0, k1, k2; \ |
143 | | register uint32_t *m0, *m1, *m2, *m3; |
144 | | #define SETSMALL \ |
145 | | ; k0 = 0x01000100L \ |
146 | | ; k1 = 0x0FC \ |
147 | | ; k2 = 0x1FC \ |
148 | | ; m0 = des_keymap \ |
149 | | ; m1 = m0 + 128 \ |
150 | | ; m2 = m1 + 128 \ |
151 | | ; m3 = m2 + 128 |
152 | | #endif /* defined(mc68000) */ |
153 | | |
154 | | /* handle constants in the optimal way for sparc */ |
155 | | /* in addition to the core 6 variables, we either declare: |
156 | | * 4 registers holding address literals and 1 register holding a constant, or |
157 | | * 8 registers holding address literals. |
158 | | * up to 14 register variables are declared (sparc has %i0-%i5, %l0-%l7). |
159 | | * we assume address literals are so expensive we never use them; |
160 | | * we assume any constant with >10 bits is expensive and put it in a register, |
161 | | * and any other is cheap and is coded in-line. */ |
162 | | #if defined(sparc) |
163 | | #define MQ0 m0 |
164 | | #define MQ1 m1 |
165 | | #define MQ2 m2 |
166 | | #define MQ3 m3 |
167 | | #define HQ0(z) |
168 | | #define HQ2(z) |
169 | | #define LQ0(z) k0 & z |
170 | | #define LQ1(z) k0 & z |
171 | | #define LQ2(z) k0 & z |
172 | | #define LQ3(z) k0 & z |
173 | | #define SQ 16 |
174 | | #define MS0 m0 |
175 | | #define MS1 m1 |
176 | | #define MS2 m2 |
177 | | #define MS3 m3 |
178 | | #define MS4 m4 |
179 | | #define MS5 m5 |
180 | | #define MS6 m6 |
181 | | #define MS7 m7 |
182 | | #define HS(z) |
183 | | #define LS0(z) 0xFC & z |
184 | | #define LS1(z) 0xFC & z |
185 | | #define LS2(z) 0xFC & z |
186 | | #define LS3(z) 0xFC & z |
187 | | #define REGQUICK \ |
188 | | register uint32_t k0; \ |
189 | | register uint32_t *m0, *m1, *m2, *m3; |
190 | | #define SETQUICK \ |
191 | | ; k0 = 0xFCFC \ |
192 | | ; m0 = des_bigmap \ |
193 | | ; m1 = m0 + 64 \ |
194 | | ; m2 = m1 + 64 \ |
195 | | ; m3 = m2 + 64 |
196 | | #define REGSMALL \ |
197 | | register uint32_t *m0, *m1, *m2, *m3, *m4, *m5, *m6, *m7; |
198 | | #define SETSMALL \ |
199 | | ; m0 = des_keymap \ |
200 | | ; m1 = m0 + 64 \ |
201 | | ; m2 = m1 + 64 \ |
202 | | ; m3 = m2 + 64 \ |
203 | | ; m4 = m3 + 64 \ |
204 | | ; m5 = m4 + 64 \ |
205 | | ; m6 = m5 + 64 \ |
206 | | ; m7 = m6 + 64 |
207 | | #endif /* defined(sparc) */ |
208 | | |
209 | | |
210 | | /* some basic stuff */ |
211 | | |
212 | | /* generate addresses from a base and an index */ |
213 | | /* FIXME: This is used only as *ADD(msi,lsi(z)) or *ADD(mqi,lqi(z)). |
214 | | * Why not use plain indexing instead? /Niels */ |
215 | 0 | #define ADD(b,x) (uint32_t *) ((uint8_t *)b + (x)) |
216 | | |
217 | | /* low level rotate operations */ |
218 | | #define NOP(d,c,o) |
219 | 0 | #define ROL(d,c,o) d = d << c | d >> o |
220 | 0 | #define ROR(d,c,o) d = d >> c | d << o |
221 | 0 | #define ROL1(d) ROL(d, 1, 31) |
222 | 0 | #define ROR1(d) ROR(d, 1, 31) |
223 | | |
224 | | /* elementary swap for doing IP/FP */ |
225 | | #define SWAP(x,y,m,b) \ |
226 | 0 | z = ((x >> b) ^ y) & m; \ |
227 | 0 | x ^= z << b; \ |
228 | 0 | y ^= z |
229 | | |
230 | | |
231 | | /* the following macros contain all the important code fragments */ |
232 | | |
233 | | /* load input data, then setup special registers holding constants */ |
234 | | #define TEMPQUICK(LOAD) \ |
235 | | REGQUICK \ |
236 | | LOAD() \ |
237 | | SETQUICK |
238 | | #define TEMPSMALL(LOAD) \ |
239 | 0 | REGSMALL \ |
240 | 0 | LOAD() \ |
241 | 0 | SETSMALL |
242 | | |
243 | | /* load data */ |
244 | | #define LOADDATA(x,y) \ |
245 | 0 | FIXS \ |
246 | 0 | y = PREV(s, 7); y<<= 8; \ |
247 | 0 | y |= PREV(s, 6); y<<= 8; \ |
248 | 0 | y |= PREV(s, 5); y<<= 8; \ |
249 | 0 | y |= PREV(s, 4); \ |
250 | 0 | x = PREV(s, 3); x<<= 8; \ |
251 | 0 | x |= PREV(s, 2); x<<= 8; \ |
252 | 0 | x |= PREV(s, 1); x<<= 8; \ |
253 | 0 | x |= PREV(s, 0) \ |
254 | 0 | SREGFREE |
255 | | /* load data without initial permutation and put into efficient position */ |
256 | | #define LOADCORE() \ |
257 | | LOADDATA(x, y); \ |
258 | | ROR1(x); \ |
259 | | ROR1(y) |
260 | | /* load data, do the initial permutation and put into efficient position */ |
261 | | #define LOADFIPS() \ |
262 | 0 | LOADDATA(y, x); \ |
263 | 0 | SWAP(x, y, 0x0F0F0F0FL, 004); \ |
264 | 0 | SWAP(y, x, 0x0000FFFFL, 020); \ |
265 | 0 | SWAP(x, y, 0x33333333L, 002); \ |
266 | 0 | SWAP(y, x, 0x00FF00FFL, 010); \ |
267 | 0 | ROR1(x); \ |
268 | 0 | z = (x ^ y) & 0x55555555L; \ |
269 | 0 | y ^= z; \ |
270 | 0 | x ^= z; \ |
271 | 0 | ROR1(y) |
272 | | |
273 | | |
274 | | /* core encryption/decryption operations */ |
275 | | /* S box mapping and P perm */ |
276 | | #define KEYMAPSMALL(x,z,mq0,mq1,hq,lq0,lq1,sq,ms0,ms1,ms2,ms3,hs,ls0,ls1,ls2,ls3)\ |
277 | 0 | hs(z) \ |
278 | 0 | x ^= *ADD(ms3, ls3(z)); \ |
279 | 0 | z>>= 8; \ |
280 | 0 | x ^= *ADD(ms2, ls2(z)); \ |
281 | 0 | z>>= 8; \ |
282 | 0 | x ^= *ADD(ms1, ls1(z)); \ |
283 | 0 | z>>= 8; \ |
284 | 0 | x ^= *ADD(ms0, ls0(z)) |
285 | | /* alternate version: use 64k of tables */ |
286 | | #define KEYMAPQUICK(x,z,mq0,mq1,hq,lq0,lq1,sq,ms0,ms1,ms2,ms3,hs,ls0,ls1,ls2,ls3)\ |
287 | | hq(z) \ |
288 | | x ^= *ADD(mq0, lq0(z)); \ |
289 | | z>>= sq; \ |
290 | | x ^= *ADD(mq1, lq1(z)) |
291 | | /* apply 24 key bits and do the odd s boxes */ |
292 | | #define S7S1(x,y,z,r,m,KEYMAP,LOAD) \ |
293 | 0 | z = LOAD(r, m); \ |
294 | 0 | z ^= y; \ |
295 | 0 | KEYMAP(x,z,MQ0,MQ1,HQ0,LQ0,LQ1,SQ,MS0,MS1,MS2,MS3,HS,LS0,LS1,LS2,LS3) |
296 | | /* apply 24 key bits and do the even s boxes */ |
297 | | #define S6S0(x,y,z,r,m,KEYMAP,LOAD) \ |
298 | 0 | z = LOAD(r, m); \ |
299 | 0 | z ^= y; \ |
300 | 0 | ROL(z, 4, 28); \ |
301 | 0 | KEYMAP(x,z,MQ2,MQ3,HQ2,LQ2,LQ3,SQ,MS4,MS5,MS6,MS7,HS,LS0,LS1,LS2,LS3) |
302 | | /* actual iterations. equivalent except for UPDATE & swapping m and n */ |
303 | | #define ENCR(x,y,z,r,m,n,KEYMAP) \ |
304 | 0 | S7S1(x,y,z,r,m,KEYMAP,NEXT); \ |
305 | 0 | S6S0(x,y,z,r,n,KEYMAP,NEXT) |
306 | | #define DECR(x,y,z,r,m,n,KEYMAP) \ |
307 | 0 | S6S0(x,y,z,r,m,KEYMAP,PREV); \ |
308 | 0 | S7S1(x,y,z,r,n,KEYMAP,PREV) |
309 | | |
310 | | /* write out result in correct byte order */ |
311 | | #define SAVEDATA(x,y) \ |
312 | 0 | NEXT(DEST, 0) = x; x>>= 8; \ |
313 | 0 | NEXT(DEST, 1) = x; x>>= 8; \ |
314 | 0 | NEXT(DEST, 2) = x; x>>= 8; \ |
315 | 0 | NEXT(DEST, 3) = x; \ |
316 | 0 | NEXT(DEST, 4) = y; y>>= 8; \ |
317 | 0 | NEXT(DEST, 5) = y; y>>= 8; \ |
318 | 0 | NEXT(DEST, 6) = y; y>>= 8; \ |
319 | 0 | NEXT(DEST, 7) = y |
320 | | /* write out result */ |
321 | | #define SAVECORE() \ |
322 | | ROL1(x); \ |
323 | | ROL1(y); \ |
324 | | SAVEDATA(y, x) |
325 | | /* do final permutation and write out result */ |
326 | | #define SAVEFIPS() \ |
327 | 0 | ROL1(x); \ |
328 | 0 | z = (x ^ y) & 0x55555555L; \ |
329 | 0 | y ^= z; \ |
330 | 0 | x ^= z; \ |
331 | 0 | ROL1(y); \ |
332 | 0 | SWAP(x, y, 0x00FF00FFL, 010); \ |
333 | 0 | SWAP(y, x, 0x33333333L, 002); \ |
334 | 0 | SWAP(x, y, 0x0000FFFFL, 020); \ |
335 | 0 | SWAP(y, x, 0x0F0F0F0FL, 004); \ |
336 | 0 | SAVEDATA(x, y) |
337 | | |
338 | | |
339 | | /* the following macros contain the encryption/decryption skeletons */ |
340 | | |
341 | | #define ENCRYPT(NAME, TEMP, LOAD, KEYMAP, SAVE) \ |
342 | | \ |
343 | | void \ |
344 | | NAME(REGISTER BYTE *D, \ |
345 | | REGISTER const uint32_t *r, \ |
346 | 0 | REGISTER const uint8_t *s) \ |
347 | 0 | { \ |
348 | 0 | register uint32_t x, y, z; \ |
349 | 0 | \ |
350 | 0 | /* declare temps & load data */ \ |
351 | 0 | TEMP(LOAD); \ |
352 | 0 | \ |
353 | 0 | /* do the 16 iterations */ \ |
354 | 0 | ENCR(x,y,z,r, 0, 1,KEYMAP); \ |
355 | 0 | ENCR(y,x,z,r, 2, 3,KEYMAP); \ |
356 | 0 | ENCR(x,y,z,r, 4, 5,KEYMAP); \ |
357 | 0 | ENCR(y,x,z,r, 6, 7,KEYMAP); \ |
358 | 0 | ENCR(x,y,z,r, 8, 9,KEYMAP); \ |
359 | 0 | ENCR(y,x,z,r,10,11,KEYMAP); \ |
360 | 0 | ENCR(x,y,z,r,12,13,KEYMAP); \ |
361 | 0 | ENCR(y,x,z,r,14,15,KEYMAP); \ |
362 | 0 | ENCR(x,y,z,r,16,17,KEYMAP); \ |
363 | 0 | ENCR(y,x,z,r,18,19,KEYMAP); \ |
364 | 0 | ENCR(x,y,z,r,20,21,KEYMAP); \ |
365 | 0 | ENCR(y,x,z,r,22,23,KEYMAP); \ |
366 | 0 | ENCR(x,y,z,r,24,25,KEYMAP); \ |
367 | 0 | ENCR(y,x,z,r,26,27,KEYMAP); \ |
368 | 0 | ENCR(x,y,z,r,28,29,KEYMAP); \ |
369 | 0 | ENCR(y,x,z,r,30,31,KEYMAP); \ |
370 | 0 | \ |
371 | 0 | /* save result */ \ |
372 | 0 | SAVE(); \ |
373 | 0 | \ |
374 | 0 | return; \ |
375 | 0 | } |
376 | | |
377 | | #define DECRYPT(NAME, TEMP, LOAD, KEYMAP, SAVE) \ |
378 | | \ |
379 | | void \ |
380 | | NAME(REGISTER BYTE *D, \ |
381 | | REGISTER const uint32_t *r, \ |
382 | 0 | REGISTER const uint8_t *s) \ |
383 | 0 | { \ |
384 | 0 | register uint32_t x, y, z; \ |
385 | 0 | \ |
386 | 0 | /* declare temps & load data */ \ |
387 | 0 | TEMP(LOAD); \ |
388 | 0 | \ |
389 | 0 | /* do the 16 iterations */ \ |
390 | 0 | FIXR \ |
391 | 0 | DECR(x,y,z,r,31,30,KEYMAP); \ |
392 | 0 | DECR(y,x,z,r,29,28,KEYMAP); \ |
393 | 0 | DECR(x,y,z,r,27,26,KEYMAP); \ |
394 | 0 | DECR(y,x,z,r,25,24,KEYMAP); \ |
395 | 0 | DECR(x,y,z,r,23,22,KEYMAP); \ |
396 | 0 | DECR(y,x,z,r,21,20,KEYMAP); \ |
397 | 0 | DECR(x,y,z,r,19,18,KEYMAP); \ |
398 | 0 | DECR(y,x,z,r,17,16,KEYMAP); \ |
399 | 0 | DECR(x,y,z,r,15,14,KEYMAP); \ |
400 | 0 | DECR(y,x,z,r,13,12,KEYMAP); \ |
401 | 0 | DECR(x,y,z,r,11,10,KEYMAP); \ |
402 | 0 | DECR(y,x,z,r, 9, 8,KEYMAP); \ |
403 | 0 | DECR(x,y,z,r, 7, 6,KEYMAP); \ |
404 | 0 | DECR(y,x,z,r, 5, 4,KEYMAP); \ |
405 | 0 | DECR(x,y,z,r, 3, 2,KEYMAP); \ |
406 | 0 | DECR(y,x,z,r, 1, 0,KEYMAP); \ |
407 | 0 | \ |
408 | 0 | /* save result */ \ |
409 | 0 | SAVE(); \ |
410 | 0 | \ |
411 | 0 | return; \ |
412 | 0 | } |