1*0957b409SSimon J. Gerraty /*
2*0957b409SSimon J. Gerraty * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
3*0957b409SSimon J. Gerraty *
4*0957b409SSimon J. Gerraty * Permission is hereby granted, free of charge, to any person obtaining
5*0957b409SSimon J. Gerraty * a copy of this software and associated documentation files (the
6*0957b409SSimon J. Gerraty * "Software"), to deal in the Software without restriction, including
7*0957b409SSimon J. Gerraty * without limitation the rights to use, copy, modify, merge, publish,
8*0957b409SSimon J. Gerraty * distribute, sublicense, and/or sell copies of the Software, and to
9*0957b409SSimon J. Gerraty * permit persons to whom the Software is furnished to do so, subject to
10*0957b409SSimon J. Gerraty * the following conditions:
11*0957b409SSimon J. Gerraty *
12*0957b409SSimon J. Gerraty * The above copyright notice and this permission notice shall be
13*0957b409SSimon J. Gerraty * included in all copies or substantial portions of the Software.
14*0957b409SSimon J. Gerraty *
15*0957b409SSimon J. Gerraty * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16*0957b409SSimon J. Gerraty * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17*0957b409SSimon J. Gerraty * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18*0957b409SSimon J. Gerraty * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19*0957b409SSimon J. Gerraty * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20*0957b409SSimon J. Gerraty * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21*0957b409SSimon J. Gerraty * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22*0957b409SSimon J. Gerraty * SOFTWARE.
23*0957b409SSimon J. Gerraty */
24*0957b409SSimon J. Gerraty
25*0957b409SSimon J. Gerraty #include "inner.h"
26*0957b409SSimon J. Gerraty
27*0957b409SSimon J. Gerraty /*
28*0957b409SSimon J. Gerraty * During key schedule, we need to apply bit extraction PC-2 then permute
29*0957b409SSimon J. Gerraty * things into our bitslice representation. PC-2 extracts 48 bits out
30*0957b409SSimon J. Gerraty * of two 28-bit words (kl and kr), and we store these bits into two
31*0957b409SSimon J. Gerraty * 32-bit words sk0 and sk1.
32*0957b409SSimon J. Gerraty *
33*0957b409SSimon J. Gerraty * -- bit 16+x of sk0 comes from bit QL0[x] of kl
34*0957b409SSimon J. Gerraty * -- bit x of sk0 comes from bit QR0[x] of kr
35*0957b409SSimon J. Gerraty * -- bit 16+x of sk1 comes from bit QL1[x] of kl
36*0957b409SSimon J. Gerraty * -- bit x of sk1 comes from bit QR1[x] of kr
37*0957b409SSimon J. Gerraty */
38*0957b409SSimon J. Gerraty
39*0957b409SSimon J. Gerraty static const unsigned char QL0[] = {
40*0957b409SSimon J. Gerraty 17, 4, 27, 23, 13, 22, 7, 18,
41*0957b409SSimon J. Gerraty 16, 24, 2, 20, 1, 8, 15, 26
42*0957b409SSimon J. Gerraty };
43*0957b409SSimon J. Gerraty
44*0957b409SSimon J. Gerraty static const unsigned char QR0[] = {
45*0957b409SSimon J. Gerraty 25, 19, 9, 1, 5, 11, 23, 8,
46*0957b409SSimon J. Gerraty 17, 0, 22, 3, 6, 20, 27, 24
47*0957b409SSimon J. Gerraty };
48*0957b409SSimon J. Gerraty
49*0957b409SSimon J. Gerraty static const unsigned char QL1[] = {
50*0957b409SSimon J. Gerraty 28, 28, 14, 11, 28, 28, 25, 0,
51*0957b409SSimon J. Gerraty 28, 28, 5, 9, 28, 28, 12, 21
52*0957b409SSimon J. Gerraty };
53*0957b409SSimon J. Gerraty
54*0957b409SSimon J. Gerraty static const unsigned char QR1[] = {
55*0957b409SSimon J. Gerraty 28, 28, 15, 4, 28, 28, 26, 16,
56*0957b409SSimon J. Gerraty 28, 28, 12, 7, 28, 28, 10, 14
57*0957b409SSimon J. Gerraty };
58*0957b409SSimon J. Gerraty
59*0957b409SSimon J. Gerraty /*
60*0957b409SSimon J. Gerraty * 32-bit rotation. The C compiler is supposed to recognize it as a
61*0957b409SSimon J. Gerraty * rotation and use the local architecture rotation opcode (if available).
62*0957b409SSimon J. Gerraty */
63*0957b409SSimon J. Gerraty static inline uint32_t
rotl(uint32_t x,int n)64*0957b409SSimon J. Gerraty rotl(uint32_t x, int n)
65*0957b409SSimon J. Gerraty {
66*0957b409SSimon J. Gerraty return (x << n) | (x >> (32 - n));
67*0957b409SSimon J. Gerraty }
68*0957b409SSimon J. Gerraty
69*0957b409SSimon J. Gerraty /*
70*0957b409SSimon J. Gerraty * Compute key schedule for 8 key bytes (produces 32 subkey words).
71*0957b409SSimon J. Gerraty */
72*0957b409SSimon J. Gerraty static void
keysched_unit(uint32_t * skey,const void * key)73*0957b409SSimon J. Gerraty keysched_unit(uint32_t *skey, const void *key)
74*0957b409SSimon J. Gerraty {
75*0957b409SSimon J. Gerraty int i;
76*0957b409SSimon J. Gerraty
77*0957b409SSimon J. Gerraty br_des_keysched_unit(skey, key);
78*0957b409SSimon J. Gerraty
79*0957b409SSimon J. Gerraty /*
80*0957b409SSimon J. Gerraty * Apply PC-2 + bitslicing.
81*0957b409SSimon J. Gerraty */
82*0957b409SSimon J. Gerraty for (i = 0; i < 16; i ++) {
83*0957b409SSimon J. Gerraty uint32_t kl, kr, sk0, sk1;
84*0957b409SSimon J. Gerraty int j;
85*0957b409SSimon J. Gerraty
86*0957b409SSimon J. Gerraty kl = skey[(i << 1) + 0];
87*0957b409SSimon J. Gerraty kr = skey[(i << 1) + 1];
88*0957b409SSimon J. Gerraty sk0 = 0;
89*0957b409SSimon J. Gerraty sk1 = 0;
90*0957b409SSimon J. Gerraty for (j = 0; j < 16; j ++) {
91*0957b409SSimon J. Gerraty sk0 <<= 1;
92*0957b409SSimon J. Gerraty sk1 <<= 1;
93*0957b409SSimon J. Gerraty sk0 |= ((kl >> QL0[j]) & (uint32_t)1) << 16;
94*0957b409SSimon J. Gerraty sk0 |= (kr >> QR0[j]) & (uint32_t)1;
95*0957b409SSimon J. Gerraty sk1 |= ((kl >> QL1[j]) & (uint32_t)1) << 16;
96*0957b409SSimon J. Gerraty sk1 |= (kr >> QR1[j]) & (uint32_t)1;
97*0957b409SSimon J. Gerraty }
98*0957b409SSimon J. Gerraty
99*0957b409SSimon J. Gerraty skey[(i << 1) + 0] = sk0;
100*0957b409SSimon J. Gerraty skey[(i << 1) + 1] = sk1;
101*0957b409SSimon J. Gerraty }
102*0957b409SSimon J. Gerraty
103*0957b409SSimon J. Gerraty #if 0
104*0957b409SSimon J. Gerraty /*
105*0957b409SSimon J. Gerraty * Speed-optimized version for PC-2 + bitslicing.
106*0957b409SSimon J. Gerraty * (Unused. Kept for reference only.)
107*0957b409SSimon J. Gerraty */
108*0957b409SSimon J. Gerraty sk0 = kl & (uint32_t)0x00100000;
109*0957b409SSimon J. Gerraty sk0 |= (kl & (uint32_t)0x08008000) << 2;
110*0957b409SSimon J. Gerraty sk0 |= (kl & (uint32_t)0x00400000) << 4;
111*0957b409SSimon J. Gerraty sk0 |= (kl & (uint32_t)0x00800000) << 5;
112*0957b409SSimon J. Gerraty sk0 |= (kl & (uint32_t)0x00040000) << 6;
113*0957b409SSimon J. Gerraty sk0 |= (kl & (uint32_t)0x00010000) << 7;
114*0957b409SSimon J. Gerraty sk0 |= (kl & (uint32_t)0x00000100) << 10;
115*0957b409SSimon J. Gerraty sk0 |= (kl & (uint32_t)0x00022000) << 14;
116*0957b409SSimon J. Gerraty sk0 |= (kl & (uint32_t)0x00000082) << 18;
117*0957b409SSimon J. Gerraty sk0 |= (kl & (uint32_t)0x00000004) << 19;
118*0957b409SSimon J. Gerraty sk0 |= (kl & (uint32_t)0x04000000) >> 10;
119*0957b409SSimon J. Gerraty sk0 |= (kl & (uint32_t)0x00000010) << 26;
120*0957b409SSimon J. Gerraty sk0 |= (kl & (uint32_t)0x01000000) >> 2;
121*0957b409SSimon J. Gerraty
122*0957b409SSimon J. Gerraty sk0 |= kr & (uint32_t)0x00000100;
123*0957b409SSimon J. Gerraty sk0 |= (kr & (uint32_t)0x00000008) << 1;
124*0957b409SSimon J. Gerraty sk0 |= (kr & (uint32_t)0x00000200) << 4;
125*0957b409SSimon J. Gerraty sk0 |= rotl(kr & (uint32_t)0x08000021, 6);
126*0957b409SSimon J. Gerraty sk0 |= (kr & (uint32_t)0x01000000) >> 24;
127*0957b409SSimon J. Gerraty sk0 |= (kr & (uint32_t)0x00000002) << 11;
128*0957b409SSimon J. Gerraty sk0 |= (kr & (uint32_t)0x00100000) >> 18;
129*0957b409SSimon J. Gerraty sk0 |= (kr & (uint32_t)0x00400000) >> 17;
130*0957b409SSimon J. Gerraty sk0 |= (kr & (uint32_t)0x00800000) >> 14;
131*0957b409SSimon J. Gerraty sk0 |= (kr & (uint32_t)0x02020000) >> 10;
132*0957b409SSimon J. Gerraty sk0 |= (kr & (uint32_t)0x00080000) >> 5;
133*0957b409SSimon J. Gerraty sk0 |= (kr & (uint32_t)0x00000040) >> 3;
134*0957b409SSimon J. Gerraty sk0 |= (kr & (uint32_t)0x00000800) >> 1;
135*0957b409SSimon J. Gerraty
136*0957b409SSimon J. Gerraty sk1 = kl & (uint32_t)0x02000000;
137*0957b409SSimon J. Gerraty sk1 |= (kl & (uint32_t)0x00001000) << 5;
138*0957b409SSimon J. Gerraty sk1 |= (kl & (uint32_t)0x00000200) << 11;
139*0957b409SSimon J. Gerraty sk1 |= (kl & (uint32_t)0x00004000) << 15;
140*0957b409SSimon J. Gerraty sk1 |= (kl & (uint32_t)0x00000020) << 16;
141*0957b409SSimon J. Gerraty sk1 |= (kl & (uint32_t)0x00000800) << 17;
142*0957b409SSimon J. Gerraty sk1 |= (kl & (uint32_t)0x00000001) << 24;
143*0957b409SSimon J. Gerraty sk1 |= (kl & (uint32_t)0x00200000) >> 5;
144*0957b409SSimon J. Gerraty
145*0957b409SSimon J. Gerraty sk1 |= (kr & (uint32_t)0x00000010) << 8;
146*0957b409SSimon J. Gerraty sk1 |= (kr & (uint32_t)0x04000000) >> 17;
147*0957b409SSimon J. Gerraty sk1 |= (kr & (uint32_t)0x00004000) >> 14;
148*0957b409SSimon J. Gerraty sk1 |= (kr & (uint32_t)0x00000400) >> 9;
149*0957b409SSimon J. Gerraty sk1 |= (kr & (uint32_t)0x00010000) >> 8;
150*0957b409SSimon J. Gerraty sk1 |= (kr & (uint32_t)0x00001000) >> 7;
151*0957b409SSimon J. Gerraty sk1 |= (kr & (uint32_t)0x00000080) >> 3;
152*0957b409SSimon J. Gerraty sk1 |= (kr & (uint32_t)0x00008000) >> 2;
153*0957b409SSimon J. Gerraty #endif
154*0957b409SSimon J. Gerraty }
155*0957b409SSimon J. Gerraty
156*0957b409SSimon J. Gerraty /* see inner.h */
157*0957b409SSimon J. Gerraty unsigned
br_des_ct_keysched(uint32_t * skey,const void * key,size_t key_len)158*0957b409SSimon J. Gerraty br_des_ct_keysched(uint32_t *skey, const void *key, size_t key_len)
159*0957b409SSimon J. Gerraty {
160*0957b409SSimon J. Gerraty switch (key_len) {
161*0957b409SSimon J. Gerraty case 8:
162*0957b409SSimon J. Gerraty keysched_unit(skey, key);
163*0957b409SSimon J. Gerraty return 1;
164*0957b409SSimon J. Gerraty case 16:
165*0957b409SSimon J. Gerraty keysched_unit(skey, key);
166*0957b409SSimon J. Gerraty keysched_unit(skey + 32, (const unsigned char *)key + 8);
167*0957b409SSimon J. Gerraty br_des_rev_skey(skey + 32);
168*0957b409SSimon J. Gerraty memcpy(skey + 64, skey, 32 * sizeof *skey);
169*0957b409SSimon J. Gerraty return 3;
170*0957b409SSimon J. Gerraty default:
171*0957b409SSimon J. Gerraty keysched_unit(skey, key);
172*0957b409SSimon J. Gerraty keysched_unit(skey + 32, (const unsigned char *)key + 8);
173*0957b409SSimon J. Gerraty br_des_rev_skey(skey + 32);
174*0957b409SSimon J. Gerraty keysched_unit(skey + 64, (const unsigned char *)key + 16);
175*0957b409SSimon J. Gerraty return 3;
176*0957b409SSimon J. Gerraty }
177*0957b409SSimon J. Gerraty }
178*0957b409SSimon J. Gerraty
179*0957b409SSimon J. Gerraty /*
180*0957b409SSimon J. Gerraty * DES confusion function. This function performs expansion E (32 to
181*0957b409SSimon J. Gerraty * 48 bits), XOR with subkey, S-boxes, and permutation P.
182*0957b409SSimon J. Gerraty */
183*0957b409SSimon J. Gerraty static inline uint32_t
Fconf(uint32_t r0,const uint32_t * sk)184*0957b409SSimon J. Gerraty Fconf(uint32_t r0, const uint32_t *sk)
185*0957b409SSimon J. Gerraty {
186*0957b409SSimon J. Gerraty /*
187*0957b409SSimon J. Gerraty * Each 6->4 S-box is virtually turned into four 6->1 boxes; we
188*0957b409SSimon J. Gerraty * thus end up with 32 boxes that we call "T-boxes" here. We will
189*0957b409SSimon J. Gerraty * evaluate them with bitslice code.
190*0957b409SSimon J. Gerraty *
191*0957b409SSimon J. Gerraty * Each T-box is a circuit of multiplexers (sort of) and thus
192*0957b409SSimon J. Gerraty * takes 70 inputs: the 6 actual T-box inputs, and 64 constants
193*0957b409SSimon J. Gerraty * that describe the T-box output for all combinations of the
194*0957b409SSimon J. Gerraty * 6 inputs. With this model, all T-boxes are identical (with
195*0957b409SSimon J. Gerraty * distinct inputs) and thus can be executed in parallel with
196*0957b409SSimon J. Gerraty * bitslice code.
197*0957b409SSimon J. Gerraty *
198*0957b409SSimon J. Gerraty * T-boxes are numbered from 0 to 31, in least-to-most
199*0957b409SSimon J. Gerraty * significant order. Thus, S-box S1 corresponds to T-boxes 31,
200*0957b409SSimon J. Gerraty * 30, 29 and 28, in that order. T-box 'n' is computed with the
201*0957b409SSimon J. Gerraty * bits at rank 'n' in the 32-bit words.
202*0957b409SSimon J. Gerraty *
203*0957b409SSimon J. Gerraty * Words x0 to x5 contain the T-box inputs 0 to 5.
204*0957b409SSimon J. Gerraty */
205*0957b409SSimon J. Gerraty uint32_t x0, x1, x2, x3, x4, x5, z0;
206*0957b409SSimon J. Gerraty uint32_t y0, y1, y2, y3, y4, y5, y6, y7, y8, y9;
207*0957b409SSimon J. Gerraty uint32_t y10, y11, y12, y13, y14, y15, y16, y17, y18, y19;
208*0957b409SSimon J. Gerraty uint32_t y20, y21, y22, y23, y24, y25, y26, y27, y28, y29;
209*0957b409SSimon J. Gerraty uint32_t y30;
210*0957b409SSimon J. Gerraty
211*0957b409SSimon J. Gerraty /*
212*0957b409SSimon J. Gerraty * Spread input bits over the 6 input words x*.
213*0957b409SSimon J. Gerraty */
214*0957b409SSimon J. Gerraty x1 = r0 & (uint32_t)0x11111111;
215*0957b409SSimon J. Gerraty x2 = (r0 >> 1) & (uint32_t)0x11111111;
216*0957b409SSimon J. Gerraty x3 = (r0 >> 2) & (uint32_t)0x11111111;
217*0957b409SSimon J. Gerraty x4 = (r0 >> 3) & (uint32_t)0x11111111;
218*0957b409SSimon J. Gerraty x1 = (x1 << 4) - x1;
219*0957b409SSimon J. Gerraty x2 = (x2 << 4) - x2;
220*0957b409SSimon J. Gerraty x3 = (x3 << 4) - x3;
221*0957b409SSimon J. Gerraty x4 = (x4 << 4) - x4;
222*0957b409SSimon J. Gerraty x0 = (x4 << 4) | (x4 >> 28);
223*0957b409SSimon J. Gerraty x5 = (x1 >> 4) | (x1 << 28);
224*0957b409SSimon J. Gerraty
225*0957b409SSimon J. Gerraty /*
226*0957b409SSimon J. Gerraty * XOR with the subkey for this round.
227*0957b409SSimon J. Gerraty */
228*0957b409SSimon J. Gerraty x0 ^= sk[0];
229*0957b409SSimon J. Gerraty x1 ^= sk[1];
230*0957b409SSimon J. Gerraty x2 ^= sk[2];
231*0957b409SSimon J. Gerraty x3 ^= sk[3];
232*0957b409SSimon J. Gerraty x4 ^= sk[4];
233*0957b409SSimon J. Gerraty x5 ^= sk[5];
234*0957b409SSimon J. Gerraty
235*0957b409SSimon J. Gerraty /*
236*0957b409SSimon J. Gerraty * The T-boxes are done in parallel, since they all use a
237*0957b409SSimon J. Gerraty * "tree of multiplexer". We use "fake multiplexers":
238*0957b409SSimon J. Gerraty *
239*0957b409SSimon J. Gerraty * y = a ^ (x & b)
240*0957b409SSimon J. Gerraty *
241*0957b409SSimon J. Gerraty * computes y as either 'a' (if x == 0) or 'a ^ b' (if x == 1).
242*0957b409SSimon J. Gerraty */
243*0957b409SSimon J. Gerraty y0 = (uint32_t)0xEFA72C4D ^ (x0 & (uint32_t)0xEC7AC69C);
244*0957b409SSimon J. Gerraty y1 = (uint32_t)0xAEAAEDFF ^ (x0 & (uint32_t)0x500FB821);
245*0957b409SSimon J. Gerraty y2 = (uint32_t)0x37396665 ^ (x0 & (uint32_t)0x40EFA809);
246*0957b409SSimon J. Gerraty y3 = (uint32_t)0x68D7B833 ^ (x0 & (uint32_t)0xA5EC0B28);
247*0957b409SSimon J. Gerraty y4 = (uint32_t)0xC9C755BB ^ (x0 & (uint32_t)0x252CF820);
248*0957b409SSimon J. Gerraty y5 = (uint32_t)0x73FC3606 ^ (x0 & (uint32_t)0x40205801);
249*0957b409SSimon J. Gerraty y6 = (uint32_t)0xA2A0A918 ^ (x0 & (uint32_t)0xE220F929);
250*0957b409SSimon J. Gerraty y7 = (uint32_t)0x8222BD90 ^ (x0 & (uint32_t)0x44A3F9E1);
251*0957b409SSimon J. Gerraty y8 = (uint32_t)0xD6B6AC77 ^ (x0 & (uint32_t)0x794F104A);
252*0957b409SSimon J. Gerraty y9 = (uint32_t)0x3069300C ^ (x0 & (uint32_t)0x026F320B);
253*0957b409SSimon J. Gerraty y10 = (uint32_t)0x6CE0D5CC ^ (x0 & (uint32_t)0x7640B01A);
254*0957b409SSimon J. Gerraty y11 = (uint32_t)0x59A9A22D ^ (x0 & (uint32_t)0x238F1572);
255*0957b409SSimon J. Gerraty y12 = (uint32_t)0xAC6D0BD4 ^ (x0 & (uint32_t)0x7A63C083);
256*0957b409SSimon J. Gerraty y13 = (uint32_t)0x21C83200 ^ (x0 & (uint32_t)0x11CCA000);
257*0957b409SSimon J. Gerraty y14 = (uint32_t)0xA0E62188 ^ (x0 & (uint32_t)0x202F69AA);
258*0957b409SSimon J. Gerraty /* y15 = (uint32_t)0x00000000 ^ (x0 & (uint32_t)0x00000000); */
259*0957b409SSimon J. Gerraty y16 = (uint32_t)0xAF7D655A ^ (x0 & (uint32_t)0x51B33BE9);
260*0957b409SSimon J. Gerraty y17 = (uint32_t)0xF0168AA3 ^ (x0 & (uint32_t)0x3B0FE8AE);
261*0957b409SSimon J. Gerraty y18 = (uint32_t)0x90AA30C6 ^ (x0 & (uint32_t)0x90BF8816);
262*0957b409SSimon J. Gerraty y19 = (uint32_t)0x5AB2750A ^ (x0 & (uint32_t)0x09E34F9B);
263*0957b409SSimon J. Gerraty y20 = (uint32_t)0x5391BE65 ^ (x0 & (uint32_t)0x0103BE88);
264*0957b409SSimon J. Gerraty y21 = (uint32_t)0x93372BAF ^ (x0 & (uint32_t)0x49AC8E25);
265*0957b409SSimon J. Gerraty y22 = (uint32_t)0xF288210C ^ (x0 & (uint32_t)0x922C313D);
266*0957b409SSimon J. Gerraty y23 = (uint32_t)0x920AF5C0 ^ (x0 & (uint32_t)0x70EF31B0);
267*0957b409SSimon J. Gerraty y24 = (uint32_t)0x63D312C0 ^ (x0 & (uint32_t)0x6A707100);
268*0957b409SSimon J. Gerraty y25 = (uint32_t)0x537B3006 ^ (x0 & (uint32_t)0xB97C9011);
269*0957b409SSimon J. Gerraty y26 = (uint32_t)0xA2EFB0A5 ^ (x0 & (uint32_t)0xA320C959);
270*0957b409SSimon J. Gerraty y27 = (uint32_t)0xBC8F96A5 ^ (x0 & (uint32_t)0x6EA0AB4A);
271*0957b409SSimon J. Gerraty y28 = (uint32_t)0xFAD176A5 ^ (x0 & (uint32_t)0x6953DDF8);
272*0957b409SSimon J. Gerraty y29 = (uint32_t)0x665A14A3 ^ (x0 & (uint32_t)0xF74F3E2B);
273*0957b409SSimon J. Gerraty y30 = (uint32_t)0xF2EFF0CC ^ (x0 & (uint32_t)0xF0306CAD);
274*0957b409SSimon J. Gerraty /* y31 = (uint32_t)0x00000000 ^ (x0 & (uint32_t)0x00000000); */
275*0957b409SSimon J. Gerraty
276*0957b409SSimon J. Gerraty y0 = y0 ^ (x1 & y1);
277*0957b409SSimon J. Gerraty y1 = y2 ^ (x1 & y3);
278*0957b409SSimon J. Gerraty y2 = y4 ^ (x1 & y5);
279*0957b409SSimon J. Gerraty y3 = y6 ^ (x1 & y7);
280*0957b409SSimon J. Gerraty y4 = y8 ^ (x1 & y9);
281*0957b409SSimon J. Gerraty y5 = y10 ^ (x1 & y11);
282*0957b409SSimon J. Gerraty y6 = y12 ^ (x1 & y13);
283*0957b409SSimon J. Gerraty y7 = y14; /* was: y14 ^ (x1 & y15) */
284*0957b409SSimon J. Gerraty y8 = y16 ^ (x1 & y17);
285*0957b409SSimon J. Gerraty y9 = y18 ^ (x1 & y19);
286*0957b409SSimon J. Gerraty y10 = y20 ^ (x1 & y21);
287*0957b409SSimon J. Gerraty y11 = y22 ^ (x1 & y23);
288*0957b409SSimon J. Gerraty y12 = y24 ^ (x1 & y25);
289*0957b409SSimon J. Gerraty y13 = y26 ^ (x1 & y27);
290*0957b409SSimon J. Gerraty y14 = y28 ^ (x1 & y29);
291*0957b409SSimon J. Gerraty y15 = y30; /* was: y30 ^ (x1 & y31) */
292*0957b409SSimon J. Gerraty
293*0957b409SSimon J. Gerraty y0 = y0 ^ (x2 & y1);
294*0957b409SSimon J. Gerraty y1 = y2 ^ (x2 & y3);
295*0957b409SSimon J. Gerraty y2 = y4 ^ (x2 & y5);
296*0957b409SSimon J. Gerraty y3 = y6 ^ (x2 & y7);
297*0957b409SSimon J. Gerraty y4 = y8 ^ (x2 & y9);
298*0957b409SSimon J. Gerraty y5 = y10 ^ (x2 & y11);
299*0957b409SSimon J. Gerraty y6 = y12 ^ (x2 & y13);
300*0957b409SSimon J. Gerraty y7 = y14 ^ (x2 & y15);
301*0957b409SSimon J. Gerraty
302*0957b409SSimon J. Gerraty y0 = y0 ^ (x3 & y1);
303*0957b409SSimon J. Gerraty y1 = y2 ^ (x3 & y3);
304*0957b409SSimon J. Gerraty y2 = y4 ^ (x3 & y5);
305*0957b409SSimon J. Gerraty y3 = y6 ^ (x3 & y7);
306*0957b409SSimon J. Gerraty
307*0957b409SSimon J. Gerraty y0 = y0 ^ (x4 & y1);
308*0957b409SSimon J. Gerraty y1 = y2 ^ (x4 & y3);
309*0957b409SSimon J. Gerraty
310*0957b409SSimon J. Gerraty y0 = y0 ^ (x5 & y1);
311*0957b409SSimon J. Gerraty
312*0957b409SSimon J. Gerraty /*
313*0957b409SSimon J. Gerraty * The P permutation:
314*0957b409SSimon J. Gerraty * -- Each bit move is converted into a mask + left rotation.
315*0957b409SSimon J. Gerraty * -- Rotations that use the same movement are coalesced together.
316*0957b409SSimon J. Gerraty * -- Left and right shifts are used as alternatives to a rotation
317*0957b409SSimon J. Gerraty * where appropriate (this will help architectures that do not have
318*0957b409SSimon J. Gerraty * a rotation opcode).
319*0957b409SSimon J. Gerraty */
320*0957b409SSimon J. Gerraty z0 = (y0 & (uint32_t)0x00000004) << 3;
321*0957b409SSimon J. Gerraty z0 |= (y0 & (uint32_t)0x00004000) << 4;
322*0957b409SSimon J. Gerraty z0 |= rotl(y0 & 0x12020120, 5);
323*0957b409SSimon J. Gerraty z0 |= (y0 & (uint32_t)0x00100000) << 6;
324*0957b409SSimon J. Gerraty z0 |= (y0 & (uint32_t)0x00008000) << 9;
325*0957b409SSimon J. Gerraty z0 |= (y0 & (uint32_t)0x04000000) >> 22;
326*0957b409SSimon J. Gerraty z0 |= (y0 & (uint32_t)0x00000001) << 11;
327*0957b409SSimon J. Gerraty z0 |= rotl(y0 & 0x20000200, 12);
328*0957b409SSimon J. Gerraty z0 |= (y0 & (uint32_t)0x00200000) >> 19;
329*0957b409SSimon J. Gerraty z0 |= (y0 & (uint32_t)0x00000040) << 14;
330*0957b409SSimon J. Gerraty z0 |= (y0 & (uint32_t)0x00010000) << 15;
331*0957b409SSimon J. Gerraty z0 |= (y0 & (uint32_t)0x00000002) << 16;
332*0957b409SSimon J. Gerraty z0 |= rotl(y0 & 0x40801800, 17);
333*0957b409SSimon J. Gerraty z0 |= (y0 & (uint32_t)0x00080000) >> 13;
334*0957b409SSimon J. Gerraty z0 |= (y0 & (uint32_t)0x00000010) << 21;
335*0957b409SSimon J. Gerraty z0 |= (y0 & (uint32_t)0x01000000) >> 10;
336*0957b409SSimon J. Gerraty z0 |= rotl(y0 & 0x88000008, 24);
337*0957b409SSimon J. Gerraty z0 |= (y0 & (uint32_t)0x00000480) >> 7;
338*0957b409SSimon J. Gerraty z0 |= (y0 & (uint32_t)0x00442000) >> 6;
339*0957b409SSimon J. Gerraty return z0;
340*0957b409SSimon J. Gerraty }
341*0957b409SSimon J. Gerraty
342*0957b409SSimon J. Gerraty /*
343*0957b409SSimon J. Gerraty * Process one block through 16 successive rounds, omitting the swap
344*0957b409SSimon J. Gerraty * in the final round.
345*0957b409SSimon J. Gerraty */
346*0957b409SSimon J. Gerraty static void
process_block_unit(uint32_t * pl,uint32_t * pr,const uint32_t * sk_exp)347*0957b409SSimon J. Gerraty process_block_unit(uint32_t *pl, uint32_t *pr, const uint32_t *sk_exp)
348*0957b409SSimon J. Gerraty {
349*0957b409SSimon J. Gerraty int i;
350*0957b409SSimon J. Gerraty uint32_t l, r;
351*0957b409SSimon J. Gerraty
352*0957b409SSimon J. Gerraty l = *pl;
353*0957b409SSimon J. Gerraty r = *pr;
354*0957b409SSimon J. Gerraty for (i = 0; i < 16; i ++) {
355*0957b409SSimon J. Gerraty uint32_t t;
356*0957b409SSimon J. Gerraty
357*0957b409SSimon J. Gerraty t = l ^ Fconf(r, sk_exp);
358*0957b409SSimon J. Gerraty l = r;
359*0957b409SSimon J. Gerraty r = t;
360*0957b409SSimon J. Gerraty sk_exp += 6;
361*0957b409SSimon J. Gerraty }
362*0957b409SSimon J. Gerraty *pl = r;
363*0957b409SSimon J. Gerraty *pr = l;
364*0957b409SSimon J. Gerraty }
365*0957b409SSimon J. Gerraty
366*0957b409SSimon J. Gerraty /* see inner.h */
367*0957b409SSimon J. Gerraty void
br_des_ct_process_block(unsigned num_rounds,const uint32_t * sk_exp,void * block)368*0957b409SSimon J. Gerraty br_des_ct_process_block(unsigned num_rounds,
369*0957b409SSimon J. Gerraty const uint32_t *sk_exp, void *block)
370*0957b409SSimon J. Gerraty {
371*0957b409SSimon J. Gerraty unsigned char *buf;
372*0957b409SSimon J. Gerraty uint32_t l, r;
373*0957b409SSimon J. Gerraty
374*0957b409SSimon J. Gerraty buf = block;
375*0957b409SSimon J. Gerraty l = br_dec32be(buf);
376*0957b409SSimon J. Gerraty r = br_dec32be(buf + 4);
377*0957b409SSimon J. Gerraty br_des_do_IP(&l, &r);
378*0957b409SSimon J. Gerraty while (num_rounds -- > 0) {
379*0957b409SSimon J. Gerraty process_block_unit(&l, &r, sk_exp);
380*0957b409SSimon J. Gerraty sk_exp += 96;
381*0957b409SSimon J. Gerraty }
382*0957b409SSimon J. Gerraty br_des_do_invIP(&l, &r);
383*0957b409SSimon J. Gerraty br_enc32be(buf, l);
384*0957b409SSimon J. Gerraty br_enc32be(buf + 4, r);
385*0957b409SSimon J. Gerraty }
386*0957b409SSimon J. Gerraty
387*0957b409SSimon J. Gerraty /* see inner.h */
388*0957b409SSimon J. Gerraty void
br_des_ct_skey_expand(uint32_t * sk_exp,unsigned num_rounds,const uint32_t * skey)389*0957b409SSimon J. Gerraty br_des_ct_skey_expand(uint32_t *sk_exp,
390*0957b409SSimon J. Gerraty unsigned num_rounds, const uint32_t *skey)
391*0957b409SSimon J. Gerraty {
392*0957b409SSimon J. Gerraty num_rounds <<= 4;
393*0957b409SSimon J. Gerraty while (num_rounds -- > 0) {
394*0957b409SSimon J. Gerraty uint32_t v, w0, w1, w2, w3;
395*0957b409SSimon J. Gerraty
396*0957b409SSimon J. Gerraty v = *skey ++;
397*0957b409SSimon J. Gerraty w0 = v & 0x11111111;
398*0957b409SSimon J. Gerraty w1 = (v >> 1) & 0x11111111;
399*0957b409SSimon J. Gerraty w2 = (v >> 2) & 0x11111111;
400*0957b409SSimon J. Gerraty w3 = (v >> 3) & 0x11111111;
401*0957b409SSimon J. Gerraty *sk_exp ++ = (w0 << 4) - w0;
402*0957b409SSimon J. Gerraty *sk_exp ++ = (w1 << 4) - w1;
403*0957b409SSimon J. Gerraty *sk_exp ++ = (w2 << 4) - w2;
404*0957b409SSimon J. Gerraty *sk_exp ++ = (w3 << 4) - w3;
405*0957b409SSimon J. Gerraty v = *skey ++;
406*0957b409SSimon J. Gerraty w0 = v & 0x11111111;
407*0957b409SSimon J. Gerraty w1 = (v >> 1) & 0x11111111;
408*0957b409SSimon J. Gerraty *sk_exp ++ = (w0 << 4) - w0;
409*0957b409SSimon J. Gerraty *sk_exp ++ = (w1 << 4) - w1;
410*0957b409SSimon J. Gerraty }
411*0957b409SSimon J. Gerraty }
412