1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 #define ARCFOUR_LOOP_OPTIMIZED
27
28 #ifndef _KERNEL
29 #include <stdint.h>
30 #endif /* _KERNEL */
31
32 #include "arcfour.h"
33
34 #if defined(__amd64)
35 /* ARCFour_key.flag values */
36 #define ARCFOUR_ON_INTEL 1
37 #define ARCFOUR_ON_AMD64 0
38
39 #ifdef _KERNEL
40 #include <sys/x86_archext.h>
41 #include <sys/cpuvar.h>
42
43 #else
44 #include <sys/auxv.h>
45 #endif /* _KERNEL */
46 #endif /* __amd64 */
47
48 #ifndef __amd64
49 /*
50 * Initialize the key stream 'key' using the key value.
51 *
52 * Input:
53 * keyval User-provided key
54 * keyvallen Length, in bytes, of keyval
55 * Output:
56 * key Initialized ARCFOUR key schedule, based on keyval
57 */
58 void
arcfour_key_init(ARCFour_key * key,uchar_t * keyval,int keyvallen)59 arcfour_key_init(ARCFour_key *key, uchar_t *keyval, int keyvallen)
60 {
61 uchar_t ext_keyval[256];
62 uchar_t tmp;
63 int i, j;
64
65 /* Normalize key length to 256 */
66 for (i = j = 0; i < 256; i++, j++) {
67 if (j == keyvallen)
68 j = 0;
69 ext_keyval[i] = keyval[j];
70 }
71
72 for (i = 0; i < 256; i++)
73 key->arr[i] = (uchar_t)i;
74
75 j = 0;
76 for (i = 0; i < 256; i++) {
77 j = (j + key->arr[i] + ext_keyval[i]) & 0xff;
78 tmp = key->arr[i];
79 key->arr[i] = key->arr[j];
80 key->arr[j] = tmp;
81 }
82 key->i = 0;
83 key->j = 0;
84 }
85 #endif /* !__amd64 */
86
87
88 /*
89 * Encipher 'in' using 'key'.
90 *
91 * Input:
92 * key ARCFOUR key, initialized by arcfour_key_init()
93 * in Input text
94 * out Buffer to contain output text
95 * len Length, in bytes, of the in and out buffers
96 *
97 * Output:
98 * out Buffer containing output text
99 *
100 * Note: in and out can point to the same location
101 */
102 void
arcfour_crypt(ARCFour_key * key,uchar_t * in,uchar_t * out,size_t len)103 arcfour_crypt(ARCFour_key *key, uchar_t *in, uchar_t *out, size_t len)
104 {
105 #ifdef __amd64
106 if (key->flag == ARCFOUR_ON_AMD64) {
107 arcfour_crypt_asm(key, in, out, len);
108 } else { /* Intel EM64T */
109 #endif /* amd64 */
110
111 size_t ii;
112 uchar_t i, j, ti, tj;
113 #ifdef ARCFOUR_LOOP_OPTIMIZED
114 uchar_t arr_ij;
115 #endif
116 #ifdef __amd64
117 uint32_t *arr;
118 #else
119 uchar_t *arr;
120 #endif
121
122 #ifdef sun4u
123 /*
124 * The sun4u has a version of arcfour_crypt_aligned() hand-tuned for
125 * the cases where the input and output buffers are aligned on
126 * a multiple of 8-byte boundary.
127 */
128 int index;
129 uchar_t tmp;
130
131 index = (((uint64_t)(uintptr_t)in) & 0x7);
132
133 /* Get the 'in' on an 8-byte alignment */
134 if (index > 0) {
135 i = key->i;
136 j = key->j;
137 for (index = 8 - (uint64_t)(uintptr_t)in & 0x7;
138 (index-- > 0) && len > 0;
139 len--, in++, out++) {
140 ++i;
141 j = j + key->arr[i];
142 tmp = key->arr[i];
143 key->arr[i] = key->arr[j];
144 key->arr[j] = tmp;
145 tmp = key->arr[i] + key->arr[j];
146 *out = *in ^ key->arr[tmp];
147 }
148 key->i = i;
149 key->j = j;
150 }
151
152 if (len == 0)
153 return;
154
155 /* See if we're fortunate and 'out' got aligned as well */
156
157 if ((((uint64_t)(uintptr_t)out) & 7) != 0) {
158 #endif /* sun4u */
159
160 i = key->i;
161 j = key->j;
162 arr = key->arr;
163
164 #ifndef ARCFOUR_LOOP_OPTIMIZED
165 /*
166 * This loop is hasn't been reordered, but is kept for reference
167 * purposes as it's more readable
168 */
169 for (ii = 0; ii < len; ++ii) {
170 ++i;
171 ti = arr[i];
172 j = j + ti;
173 tj = arr[j];
174 arr[j] = ti;
175 arr[i] = tj;
176 out[ii] = in[ii] ^ arr[(ti + tj) & 0xff];
177 }
178
179 #else
180 /*
181 * This for loop is optimized by carefully spreading out
182 * memory access and storage to avoid conflicts,
183 * allowing the processor to process operations in parallel
184 */
185
186 /* for loop setup */
187 ++i;
188 ti = arr[i];
189 j = j + ti;
190 tj = arr[j];
191 arr[j] = ti;
192 arr[i] = tj;
193 arr_ij = arr[(ti + tj) & 0xff];
194 --len;
195
196 for (ii = 0; ii < len; ) {
197 ++i;
198 ti = arr[i];
199 j = j + ti;
200 tj = arr[j];
201 arr[j] = ti;
202 arr[i] = tj;
203
204 /* save result from previous loop: */
205 out[ii] = in[ii] ^ arr_ij;
206
207 ++ii;
208 arr_ij = arr[(ti + tj) & 0xff];
209 }
210 /* save result from last loop: */
211 out[ii] = in[ii] ^ arr_ij;
212 #endif
213
214 key->i = i;
215 key->j = j;
216
217 #ifdef sun4u
218 } else {
219 arcfour_crypt_aligned(key, len, in, out);
220 }
221 #endif /* sun4u */
222 #ifdef __amd64
223 }
224 #endif /* amd64 */
225 }
226
227
228 #ifdef __amd64
229 /*
230 * Return 1 if executing on Intel, otherwise 0 (e.g., AMD64).
231 * Cache the result, as the CPU can't change.
232 *
233 * Note: the userland version uses getisax() and checks for an AMD-64-only
234 * feature. The kernel version uses cpuid_getvendor().
235 */
236 int
arcfour_crypt_on_intel(void)237 arcfour_crypt_on_intel(void)
238 {
239 static int cached_result = -1;
240
241 if (cached_result == -1) { /* first time */
242 #ifdef _KERNEL
243 cached_result = (cpuid_getvendor(CPU) == X86_VENDOR_Intel);
244 #else
245 uint_t ui;
246
247 (void) getisax(&ui, 1);
248 cached_result = ((ui & AV_386_AMD_MMX) == 0);
249 #endif /* _KERNEL */
250 }
251
252 return (cached_result);
253 }
254 #endif /* __amd64 */
255