1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #define ARCFOUR_LOOP_OPTIMIZED 27 28 #ifndef _KERNEL 29 #include <stdint.h> 30 #endif /* _KERNEL */ 31 32 #include "arcfour.h" 33 34 #if defined(__amd64) 35 /* ARCFour_key.flag values */ 36 #define ARCFOUR_ON_INTEL 1 37 #define ARCFOUR_ON_AMD64 0 38 39 #ifdef _KERNEL 40 #include <sys/x86_archext.h> 41 #include <sys/cpuvar.h> 42 43 #else 44 #include <sys/auxv.h> 45 #endif /* _KERNEL */ 46 #endif /* __amd64 */ 47 48 #ifndef __amd64 49 /* 50 * Initialize the key stream 'key' using the key value. 51 * 52 * Input: 53 * keyval User-provided key 54 * keyvallen Length, in bytes, of keyval 55 * Output: 56 * key Initialized ARCFOUR key schedule, based on keyval 57 */ 58 void 59 arcfour_key_init(ARCFour_key *key, uchar_t *keyval, int keyvallen) 60 { 61 /* EXPORT DELETE START */ 62 63 uchar_t ext_keyval[256]; 64 uchar_t tmp; 65 int i, j; 66 67 /* Normalize key length to 256 */ 68 for (i = j = 0; i < 256; i++, j++) { 69 if (j == keyvallen) 70 j = 0; 71 ext_keyval[i] = keyval[j]; 72 } 73 74 for (i = 0; i < 256; i++) 75 key->arr[i] = (uchar_t)i; 76 77 j = 0; 78 for (i = 0; i < 256; i++) { 79 j = (j + key->arr[i] + ext_keyval[i]) & 0xff; 80 tmp = key->arr[i]; 81 key->arr[i] = key->arr[j]; 82 key->arr[j] = tmp; 83 } 84 key->i = 0; 85 key->j = 0; 86 87 /* EXPORT DELETE END */ 88 } 89 #endif /* !__amd64 */ 90 91 92 /* 93 * Encipher 'in' using 'key'. 94 * 95 * Input: 96 * key ARCFOUR key, initialized by arcfour_key_init() 97 * in Input text 98 * out Buffer to contain output text 99 * len Length, in bytes, of the in and out buffers 100 * 101 * Output: 102 * out Buffer containing output text 103 * 104 * Note: in and out can point to the same location 105 */ 106 void 107 arcfour_crypt(ARCFour_key *key, uchar_t *in, uchar_t *out, size_t len) 108 { 109 /* EXPORT DELETE START */ 110 #ifdef __amd64 111 if (key->flag == ARCFOUR_ON_AMD64) { 112 arcfour_crypt_asm(key, in, out, len); 113 } else { /* Intel EM64T */ 114 #endif /* amd64 */ 115 116 size_t ii; 117 uchar_t i, j, ti, tj; 118 #ifdef ARCFOUR_LOOP_OPTIMIZED 119 uchar_t arr_ij; 120 #endif 121 #ifdef __amd64 122 uint32_t *arr; 123 #else 124 uchar_t *arr; 125 #endif 126 127 #ifdef sun4u 128 /* 129 * The sun4u has a version of arcfour_crypt_aligned() hand-tuned for 130 * the cases where the input and output buffers are aligned on 131 * a multiple of 8-byte boundary. 132 */ 133 int index; 134 uchar_t tmp; 135 136 index = (((uint64_t)(uintptr_t)in) & 0x7); 137 138 /* Get the 'in' on an 8-byte alignment */ 139 if (index > 0) { 140 i = key->i; 141 j = key->j; 142 for (index = 8 - (uint64_t)(uintptr_t)in & 0x7; 143 (index-- > 0) && len > 0; 144 len--, in++, out++) { 145 ++i; 146 j = j + key->arr[i]; 147 tmp = key->arr[i]; 148 key->arr[i] = key->arr[j]; 149 key->arr[j] = tmp; 150 tmp = key->arr[i] + key->arr[j]; 151 *out = *in ^ key->arr[tmp]; 152 } 153 key->i = i; 154 key->j = j; 155 } 156 157 if (len == 0) 158 return; 159 160 /* See if we're fortunate and 'out' got aligned as well */ 161 162 if ((((uint64_t)(uintptr_t)out) & 7) != 0) { 163 #endif /* sun4u */ 164 165 i = key->i; 166 j = key->j; 167 arr = key->arr; 168 169 #ifndef ARCFOUR_LOOP_OPTIMIZED 170 /* 171 * This loop is hasn't been reordered, but is kept for reference 172 * purposes as it's more readable 173 */ 174 for (ii = 0; ii < len; ++ii) { 175 ++i; 176 ti = arr[i]; 177 j = j + ti; 178 tj = arr[j]; 179 arr[j] = ti; 180 arr[i] = tj; 181 out[ii] = in[ii] ^ arr[(ti + tj) & 0xff]; 182 } 183 184 #else 185 /* 186 * This for loop is optimized by carefully spreading out 187 * memory access and storage to avoid conflicts, 188 * allowing the processor to process operations in parallel 189 */ 190 191 /* for loop setup */ 192 ++i; 193 ti = arr[i]; 194 j = j + ti; 195 tj = arr[j]; 196 arr[j] = ti; 197 arr[i] = tj; 198 arr_ij = arr[(ti + tj) & 0xff]; 199 --len; 200 201 for (ii = 0; ii < len; ) { 202 ++i; 203 ti = arr[i]; 204 j = j + ti; 205 tj = arr[j]; 206 arr[j] = ti; 207 arr[i] = tj; 208 209 /* save result from previous loop: */ 210 out[ii] = in[ii] ^ arr_ij; 211 212 ++ii; 213 arr_ij = arr[(ti + tj) & 0xff]; 214 } 215 /* save result from last loop: */ 216 out[ii] = in[ii] ^ arr_ij; 217 #endif 218 219 key->i = i; 220 key->j = j; 221 222 #ifdef sun4u 223 } else { 224 arcfour_crypt_aligned(key, len, in, out); 225 } 226 #endif /* sun4u */ 227 #ifdef __amd64 228 } 229 #endif /* amd64 */ 230 231 /* EXPORT DELETE END */ 232 } 233 234 235 #ifdef __amd64 236 /* 237 * Return 1 if executing on Intel, otherwise 0 (e.g., AMD64). 238 * Cache the result, as the CPU can't change. 239 * 240 * Note: the userland version uses getisax() and checks for an AMD-64-only 241 * feature. The kernel version uses cpuid_getvendor(). 242 */ 243 int 244 arcfour_crypt_on_intel(void) 245 { 246 static int cached_result = -1; 247 248 if (cached_result == -1) { /* first time */ 249 #ifdef _KERNEL 250 cached_result = (cpuid_getvendor(CPU) == X86_VENDOR_Intel); 251 #else 252 uint_t ui; 253 254 (void) getisax(&ui, 1); 255 cached_result = ((ui & AV_386_AMD_MMX) == 0); 256 #endif /* _KERNEL */ 257 } 258 259 return (cached_result); 260 } 261 #endif /* __amd64 */ 262