1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #define ARCFOUR_LOOP_OPTIMIZED 27 28 #ifndef _KERNEL 29 #include <stdint.h> 30 #endif /* _KERNEL */ 31 32 #include "arcfour.h" 33 34 #if defined(__amd64) 35 /* ARCFour_key.flag values */ 36 #define ARCFOUR_ON_INTEL 1 37 #define ARCFOUR_ON_AMD64 0 38 39 #ifdef _KERNEL 40 #include <sys/x86_archext.h> 41 #include <sys/cpuvar.h> 42 43 #else 44 #include <sys/auxv.h> 45 #endif /* _KERNEL */ 46 #endif /* __amd64 */ 47 48 #ifndef __amd64 49 /* 50 * Initialize the key stream 'key' using the key value. 51 * 52 * Input: 53 * keyval User-provided key 54 * keyvallen Length, in bytes, of keyval 55 * Output: 56 * key Initialized ARCFOUR key schedule, based on keyval 57 */ 58 void 59 arcfour_key_init(ARCFour_key *key, uchar_t *keyval, int keyvallen) 60 { 61 uchar_t ext_keyval[256]; 62 uchar_t tmp; 63 int i, j; 64 65 /* Normalize key length to 256 */ 66 for (i = j = 0; i < 256; i++, j++) { 67 if (j == keyvallen) 68 j = 0; 69 ext_keyval[i] = keyval[j]; 70 } 71 72 for (i = 0; i < 256; i++) 73 key->arr[i] = (uchar_t)i; 74 75 j = 0; 76 for (i = 0; i < 256; i++) { 77 j = (j + key->arr[i] + ext_keyval[i]) & 0xff; 78 tmp = key->arr[i]; 79 key->arr[i] = key->arr[j]; 80 key->arr[j] = tmp; 81 } 82 key->i = 0; 83 key->j = 0; 84 } 85 #endif /* !__amd64 */ 86 87 88 /* 89 * Encipher 'in' using 'key'. 90 * 91 * Input: 92 * key ARCFOUR key, initialized by arcfour_key_init() 93 * in Input text 94 * out Buffer to contain output text 95 * len Length, in bytes, of the in and out buffers 96 * 97 * Output: 98 * out Buffer containing output text 99 * 100 * Note: in and out can point to the same location 101 */ 102 void 103 arcfour_crypt(ARCFour_key *key, uchar_t *in, uchar_t *out, size_t len) 104 { 105 #ifdef __amd64 106 if (key->flag == ARCFOUR_ON_AMD64) { 107 arcfour_crypt_asm(key, in, out, len); 108 } else { /* Intel EM64T */ 109 #endif /* amd64 */ 110 111 size_t ii; 112 uchar_t i, j, ti, tj; 113 #ifdef ARCFOUR_LOOP_OPTIMIZED 114 uchar_t arr_ij; 115 #endif 116 #ifdef __amd64 117 uint32_t *arr; 118 #else 119 uchar_t *arr; 120 #endif 121 122 #ifdef sun4u 123 /* 124 * The sun4u has a version of arcfour_crypt_aligned() hand-tuned for 125 * the cases where the input and output buffers are aligned on 126 * a multiple of 8-byte boundary. 127 */ 128 int index; 129 uchar_t tmp; 130 131 index = (((uint64_t)(uintptr_t)in) & 0x7); 132 133 /* Get the 'in' on an 8-byte alignment */ 134 if (index > 0) { 135 i = key->i; 136 j = key->j; 137 for (index = 8 - (uint64_t)(uintptr_t)in & 0x7; 138 (index-- > 0) && len > 0; 139 len--, in++, out++) { 140 ++i; 141 j = j + key->arr[i]; 142 tmp = key->arr[i]; 143 key->arr[i] = key->arr[j]; 144 key->arr[j] = tmp; 145 tmp = key->arr[i] + key->arr[j]; 146 *out = *in ^ key->arr[tmp]; 147 } 148 key->i = i; 149 key->j = j; 150 } 151 152 if (len == 0) 153 return; 154 155 /* See if we're fortunate and 'out' got aligned as well */ 156 157 if ((((uint64_t)(uintptr_t)out) & 7) != 0) { 158 #endif /* sun4u */ 159 160 i = key->i; 161 j = key->j; 162 arr = key->arr; 163 164 #ifndef ARCFOUR_LOOP_OPTIMIZED 165 /* 166 * This loop is hasn't been reordered, but is kept for reference 167 * purposes as it's more readable 168 */ 169 for (ii = 0; ii < len; ++ii) { 170 ++i; 171 ti = arr[i]; 172 j = j + ti; 173 tj = arr[j]; 174 arr[j] = ti; 175 arr[i] = tj; 176 out[ii] = in[ii] ^ arr[(ti + tj) & 0xff]; 177 } 178 179 #else 180 /* 181 * This for loop is optimized by carefully spreading out 182 * memory access and storage to avoid conflicts, 183 * allowing the processor to process operations in parallel 184 */ 185 186 /* for loop setup */ 187 ++i; 188 ti = arr[i]; 189 j = j + ti; 190 tj = arr[j]; 191 arr[j] = ti; 192 arr[i] = tj; 193 arr_ij = arr[(ti + tj) & 0xff]; 194 --len; 195 196 for (ii = 0; ii < len; ) { 197 ++i; 198 ti = arr[i]; 199 j = j + ti; 200 tj = arr[j]; 201 arr[j] = ti; 202 arr[i] = tj; 203 204 /* save result from previous loop: */ 205 out[ii] = in[ii] ^ arr_ij; 206 207 ++ii; 208 arr_ij = arr[(ti + tj) & 0xff]; 209 } 210 /* save result from last loop: */ 211 out[ii] = in[ii] ^ arr_ij; 212 #endif 213 214 key->i = i; 215 key->j = j; 216 217 #ifdef sun4u 218 } else { 219 arcfour_crypt_aligned(key, len, in, out); 220 } 221 #endif /* sun4u */ 222 #ifdef __amd64 223 } 224 #endif /* amd64 */ 225 } 226 227 228 #ifdef __amd64 229 /* 230 * Return 1 if executing on Intel, otherwise 0 (e.g., AMD64). 231 * Cache the result, as the CPU can't change. 232 * 233 * Note: the userland version uses getisax() and checks for an AMD-64-only 234 * feature. The kernel version uses cpuid_getvendor(). 235 */ 236 int 237 arcfour_crypt_on_intel(void) 238 { 239 static int cached_result = -1; 240 241 if (cached_result == -1) { /* first time */ 242 #ifdef _KERNEL 243 cached_result = (cpuid_getvendor(CPU) == X86_VENDOR_Intel); 244 #else 245 uint_t ui; 246 247 (void) getisax(&ui, 1); 248 cached_result = ((ui & AV_386_AMD_MMX) == 0); 249 #endif /* _KERNEL */ 250 } 251 252 return (cached_result); 253 } 254 #endif /* __amd64 */ 255