xref: /illumos-gate/usr/src/uts/common/des/des_soft.c (revision ba00d94ad32aec378c65c2bad5cd13dd9145041a)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  *
22  * Copyright 1989 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
27 /*	  All Rights Reserved  	*/
28 
29 /*
30  * Portions of this source code were derived from Berkeley 4.3 BSD
31  * under license from the Regents of the University of California.
32  */
33 
34 /*
35  * Warning!  Things are arranged very carefully in this file to
36  * allow read-only data to be moved to the text segment.  The
37  * various DES tables must appear before any function definitions
38  * (this is arranged by including them immediately below) and partab
39  * must also appear before and function definitions
40  * This arrangement allows all data up through the first text to
41  * be moved to text.
42  */
43 
44 /*
45  * Fast (?) software implementation of DES
46  * Has been seen going at 2000 bytes/sec on a Sun-2
47  * Works on a VAX too.
48  * Won't work without 8 bit chars and 32 bit longs
49  */
50 
51 #include <sys/types.h>
52 #include <des/des.h>
53 #include <des/softdes.h>
54 #include <des/desdata.h>
55 #include <sys/debug.h>
56 
57 static void des_setkey(u_char userkey[8], struct deskeydata *kd,
58     unsigned int dir);
59 static void des_encrypt(u_char *data, struct deskeydata *kd);
60 
61 #define	btst(k, b)	(k[b >> 3] & (0x80 >> (b & 07)))
62 #define	BIT28	(1<<28)
63 
64 /*
65  * Software encrypt or decrypt a block of data (multiple of 8 bytes)
66  * Do the CBC ourselves if needed.
67  */
68 /* ARGSUSED */
69 int
70 _des_crypt(char *buf, size_t len, struct desparams *desp)
71 {
72 	short i;
73 	uint_t mode;
74 	uint_t dir;
75 	char nextiv[8];
76 	struct deskeydata softkey;
77 
78 	mode = desp->des_mode;
79 	dir = desp->des_dir;
80 	des_setkey(desp->des_key, &softkey, dir);
81 	while (len != 0) {
82 		switch (mode) {
83 		case CBC:
84 			switch (dir) {
85 			case ENCRYPT:
86 				for (i = 0; i < 8; i++)
87 					buf[i] ^= desp->des_ivec[i];
88 				des_encrypt((u_char *)buf, &softkey);
89 				for (i = 0; i < 8; i++)
90 					desp->des_ivec[i] = buf[i];
91 				break;
92 			case DECRYPT:
93 				for (i = 0; i < 8; i++)
94 					nextiv[i] = buf[i];
95 				des_encrypt((u_char *)buf, &softkey);
96 				for (i = 0; i < 8; i++) {
97 					buf[i] ^= desp->des_ivec[i];
98 					desp->des_ivec[i] = nextiv[i];
99 				}
100 				break;
101 			}
102 			break;
103 		case ECB:
104 			des_encrypt((u_char *)buf, &softkey);
105 			break;
106 		}
107 		buf += 8;
108 		len -= 8;
109 	}
110 	return (1);
111 }
112 
113 
114 /*
115  * Set the key and direction for an encryption operation
116  * We build the 16 key entries here
117  */
118 /* ARGSUSED */
119 static void
120 des_setkey(u_char userkey[8], struct deskeydata *kd, unsigned int dir)
121 {
122 	int32_t C, D;
123 	short i;
124 
125 	/*
126 	 * First, generate C and D by permuting
127 	 * the key. The low order bit of each
128 	 * 8-bit char is not used, so C and D are only 28
129 	 * bits apiece.
130 	 */
131 	{
132 		short bit;
133 		short *pcc = (short *)PC1_C, *pcd = (short *)PC1_D;
134 
135 		C = D = 0;
136 		for (i = 0; i < 28; i++) {
137 			C <<= 1;
138 			D <<= 1;
139 			bit = *pcc++;
140 			if (btst(userkey, bit))
141 				C |= 1;
142 			bit = *pcd++;
143 			if (btst(userkey, bit))
144 				D |= 1;
145 		}
146 	}
147 	/*
148 	 * To generate Ki, rotate C and D according
149 	 * to schedule and pick up a permutation
150 	 * using PC2.
151 	 */
152 	for (i = 0; i < 16; i++) {
153 		chunk_t *c;
154 		short j, k, bit;
155 		int bbit;
156 
157 		/*
158 		 * Do the "left shift" (rotate)
159 		 * We know we always rotate by either 1 or 2 bits
160 		 * the shifts table tells us if its 2
161 		 */
162 		C <<= 1;
163 		if (C & BIT28)
164 			C |= 1;
165 		D <<= 1;
166 		if (D & BIT28)
167 			D |= 1;
168 		if (shifts[i]) {
169 			C <<= 1;
170 			if (C & BIT28)
171 				C |= 1;
172 			D <<= 1;
173 			if (D & BIT28)
174 				D |= 1;
175 		}
176 		/*
177 		 * get Ki. Note C and D are concatenated.
178 		 */
179 		bit = 0;
180 		switch (dir) {
181 		case ENCRYPT:
182 			c = &kd->keyval[i];
183 			break;
184 		case DECRYPT:
185 			c = &kd->keyval[15 - i];
186 			break;
187 		}
188 		c->long0 = 0;
189 		c->long1 = 0;
190 		bbit = (1 << 5) << 24;
191 		for (j = 0; j < 4; j++) {
192 			for (k = 0; k < 6; k++) {
193 				if (C & (BIT28 >> PC2_C[bit]))
194 					c->long0 |= bbit >> k;
195 				if (D & (BIT28 >> PC2_D[bit]))
196 					c->long1 |= bbit >> k;
197 				bit++;
198 			}
199 			bbit >>= 8;
200 		}
201 	}
202 }
203 
204 
205 
206 /*
207  * Do an encryption operation
208  * Much pain is taken (with preprocessor) to avoid loops so the compiler
209  * can do address arithmetic instead of doing it at runtime.
210  * Note that the byte-to-chunk conversion is necessary to guarantee
211  * processor byte-order independence.
212  */
213 /* ARGSUSED */
214 static void
215 des_encrypt(u_char *data, struct deskeydata *kd)
216 {
217 	chunk_t work1, work2;
218 
219 	/*
220 	 * Initial permutation
221 	 * and byte to chunk conversion
222 	 */
223 	{
224 		const uint32_t *lp;
225 		uint32_t l0, l1, w;
226 		short i, pbit;
227 
228 		work1.byte0 = data[0];
229 		work1.byte1 = data[1];
230 		work1.byte2 = data[2];
231 		work1.byte3 = data[3];
232 		work1.byte4 = data[4];
233 		work1.byte5 = data[5];
234 		work1.byte6 = data[6];
235 		work1.byte7 = data[7];
236 		l0 = l1 = 0;
237 		w = work1.long0;
238 		for (lp = &longtab[0], i = 0; i < 32; i++) {
239 			if (w & *lp++) {
240 				pbit = IPtab[i];
241 				if (pbit < 32)
242 					l0 |= longtab[pbit];
243 				else
244 					l1 |= longtab[pbit-32];
245 			}
246 		}
247 		w = work1.long1;
248 		for (lp = &longtab[0], i = 32; i < 64; i++) {
249 			if (w & *lp++) {
250 				pbit = IPtab[i];
251 				if (pbit < 32)
252 					l0 |= longtab[pbit];
253 				else
254 					l1 |= longtab[pbit-32];
255 			}
256 		}
257 		work2.long0 = l0;
258 		work2.long1 = l1;
259 	}
260 
261 /*
262  * Expand 8 bits of 32 bit R to 48 bit R
263  */
264 #ifdef __STDC__
265 #define	do_R_to_ER(op, b) {					\
266 	struct R_to_ER *p =					\
267 	    (struct R_to_ER *)&R_to_ER_tab[b][R.byte##b];	\
268 	e0 op p->l0;						\
269 	e1 op p->l1;						\
270 }
271 #else
272 #define	do_R_to_ER(op, b)	{				\
273 	/*CSTYLED*/						\
274 	struct R_to_ER *p = &R_to_ER_tab[b][R.byte/**/b];	\
275 	e0 op p->l0;						\
276 	e1 op p->l1;						\
277 }
278 #endif
279 
280 /*
281  * Inner part of the algorithm:
282  * Expand R from 32 to 48 bits; xor key value;
283  * apply S boxes; permute 32 bits of output
284  */
285 #define	do_F(iter, inR, outR) 	{			\
286 	chunk_t R, ER;					\
287 	u_int e0, e1;					\
288 	R.long0 = inR;					\
289 	/*CSTYLED*/					\
290 	do_R_to_ER(=,0);				\
291 	/*CSTYLED*/					\
292 	do_R_to_ER(|=,1);				\
293 	/*CSTYLED*/					\
294 	do_R_to_ER(|=,2);				\
295 	/*CSTYLED*/					\
296 	do_R_to_ER(|=,3);				\
297 	ER.long0 = e0 ^ kd->keyval[iter].long0;		\
298 	ER.long1 = e1 ^ kd->keyval[iter].long1;		\
299 	R.long0 =					\
300 		S_tab[0][ER.byte0] +			\
301 		S_tab[1][ER.byte1] +			\
302 		S_tab[2][ER.byte2] +			\
303 		S_tab[3][ER.byte3] +			\
304 		S_tab[4][ER.byte4] +			\
305 		S_tab[5][ER.byte5] +			\
306 		S_tab[6][ER.byte6] +			\
307 		S_tab[7][ER.byte7]; 			\
308 	outR =						\
309 		P_tab[0][R.byte0] +			\
310 		P_tab[1][R.byte1] +			\
311 		P_tab[2][R.byte2] +			\
312 		P_tab[3][R.byte3]; 			\
313 }
314 
315 /*
316  * Do a cipher step
317  * Apply inner part; do xor and exchange of 32 bit parts
318  */
319 #define	cipher(iter, inR, inL, outR, outL)	{	\
320 	do_F(iter, inR, outR);				\
321 	outR ^= inL;					\
322 	outL = inR;					\
323 }
324 
325 	/*
326 	 * Apply the 16 ciphering steps
327 	 */
328 	{
329 		u_int r0, l0, r1, l1;
330 
331 		l0 = work2.long0;
332 		r0 = work2.long1;
333 		cipher(0, r0, l0, r1, l1);
334 		cipher(1, r1, l1, r0, l0);
335 		cipher(2, r0, l0, r1, l1);
336 		cipher(3, r1, l1, r0, l0);
337 		cipher(4, r0, l0, r1, l1);
338 		cipher(5, r1, l1, r0, l0);
339 		cipher(6, r0, l0, r1, l1);
340 		cipher(7, r1, l1, r0, l0);
341 		cipher(8, r0, l0, r1, l1);
342 		cipher(9, r1, l1, r0, l0);
343 		cipher(10, r0, l0, r1, l1);
344 		cipher(11, r1, l1, r0, l0);
345 		cipher(12, r0, l0, r1, l1);
346 		cipher(13, r1, l1, r0, l0);
347 		cipher(14, r0, l0, r1, l1);
348 		cipher(15, r1, l1, r0, l0);
349 		work1.long0 = r0;
350 		work1.long1 = l0;
351 	}
352 
353 	/*
354 	 * Final permutation
355 	 * and chunk to byte conversion
356 	 */
357 	{
358 		const uint32_t *lp;
359 		uint32_t l0, l1, w;
360 		short i, pbit;
361 
362 		l0 = l1 = 0;
363 		w = work1.long0;
364 		for (lp = &longtab[0], i = 0; i < 32; i++) {
365 			if (w & *lp++) {
366 				pbit = FPtab[i];
367 				if (pbit < 32)
368 					l0 |= longtab[pbit];
369 				else
370 					l1 |= longtab[pbit-32];
371 			}
372 		}
373 		w = work1.long1;
374 		for (lp = &longtab[0], i = 32; i < 64; i++) {
375 			if (w & *lp++) {
376 				pbit = FPtab[i];
377 				if (pbit < 32)
378 					l0 |= longtab[pbit];
379 				else
380 					l1 |= longtab[pbit-32];
381 			}
382 		}
383 		work2.long0 = l0;
384 		work2.long1 = l1;
385 	}
386 	data[0] = work2.byte0;
387 	data[1] = work2.byte1;
388 	data[2] = work2.byte2;
389 	data[3] = work2.byte3;
390 	data[4] = work2.byte4;
391 	data[5] = work2.byte5;
392 	data[6] = work2.byte6;
393 	data[7] = work2.byte7;
394 }
395