1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include "../arcfour.h" 29 30 /* Initialize the key stream 'key' using the key value */ 31 void 32 arcfour_key_init(ARCFour_key *key, uchar_t *keyval, int keyvallen) 33 { 34 /* EXPORT DELETE START */ 35 36 uchar_t ext_keyval[256]; 37 uchar_t tmp; 38 int i, j; 39 40 for (i = j = 0; i < 256; i++, j++) { 41 if (j == keyvallen) 42 j = 0; 43 44 ext_keyval[i] = keyval[j]; 45 } 46 for (i = 0; i < 256; i++) 47 key->arr[i] = (uchar_t)i; 48 49 j = 0; 50 for (i = 0; i < 256; i++) { 51 j = (j + key->arr[i] + ext_keyval[i]) % 256; 52 tmp = key->arr[i]; 53 key->arr[i] = key->arr[j]; 54 key->arr[j] = tmp; 55 } 56 key->i = 0; 57 key->j = 0; 58 59 /* EXPORT DELETE END */ 60 } 61 62 63 /* 64 * Encipher 'in' using 'key. 65 * in and out can point to the same location 66 */ 67 void 68 arcfour_crypt(ARCFour_key *key, uchar_t *in, uchar_t *out, size_t len) 69 { 70 size_t ii, it; 71 unsigned long long in0, out0, merge = 0, merge0 = 0, merge1, mask = 0; 72 uchar_t i, j, *base, jj, *base1, tmp; 73 unsigned int tmp0, tmp1, i_accum, count = 0, shift = 0, i1; 74 75 76 /* EXPORT DELETE START */ 77 int index; 78 79 base = key->arr; 80 81 index = (((uintptr_t)in) & 0x7); 82 83 /* Get the 'in' on an 8-byte alignment */ 84 if (index > 0) { 85 i = key->i; 86 j = key->j; 87 88 for (index = 8 - index; (index-- > 0) && len > 0; 89 len--, in++, out++) { 90 91 i = i + 1; 92 j = j + key->arr[i]; 93 tmp = key->arr[i]; 94 key->arr[i] = key->arr[j]; 95 key->arr[j] = tmp; 96 tmp = key->arr[i] + key->arr[j]; 97 *out = *in ^ key->arr[tmp]; 98 } 99 key->i = i; 100 key->j = j; 101 102 } 103 if (len == 0) 104 return; 105 106 /* See if we're fortunate and 'out' got aligned as well */ 107 108 109 /* 110 * Niagara optimized version for 111 * the cases where the input and output buffers are aligned on 112 * a multiple of 8-byte boundary. 113 */ 114 #ifdef sun4v 115 if ((((uintptr_t)out) & 7) != 0) { 116 #endif /* sun4v */ 117 i = key->i; 118 j = key->j; 119 for (ii = 0; ii < len; ii++) { 120 i = i + 1; 121 tmp0 = base[i]; 122 j = j + tmp0; 123 tmp1 = base[j]; 124 base[i] = tmp1; 125 base[j] = tmp0; 126 tmp0 += tmp1; 127 tmp0 = tmp0 & 0xff; 128 out[ii] = in[ii] ^ base[tmp0]; 129 } 130 key->i = i; 131 key->j = j; 132 #ifdef sun4v 133 } else { 134 i = key->i; 135 j = key->j; 136 137 /* 138 * Want to align base[i] on a 2B boundary -- allows updates 139 * via [i] to be performed in 2B chunks (reducing # of stores). 140 * Requires appropriate alias detection. 141 */ 142 143 if (((i+1) % 2) != 0) { 144 i = i + 1; 145 tmp0 = base[i]; 146 j = j + tmp0; 147 tmp1 = base[j]; 148 149 base[i] = tmp1; 150 base[j] = tmp0; 151 152 tmp0 += tmp1; 153 tmp0 = tmp0 & 0xff; 154 155 merge0 = (unsigned long long)(base[tmp0]) << 56; 156 shift = 8; mask = 0xff; 157 } 158 159 /* 160 * Note - in and out may now be misaligned - 161 * as updating [out] in 8B chunks need to handle this 162 * possibility. Also could have a 1B overrun. 163 * Need to drop out of loop early as a result. 164 */ 165 166 for (ii = 0, i1 = i; ii < ((len-1) & (~7)); 167 ii += 8, i1 = i1&0xff) { 168 169 /* 170 * If i < less than 248, know wont wrap around 171 * (i % 256), so don't need to bother with masking i 172 * after each increment 173 */ 174 if (i1 < 248) { 175 176 /* BYTE 0 */ 177 i1 = (i1 + 1); 178 179 /* 180 * Creating this base pointer reduces subsequent 181 * arihmetic ops required to load [i] 182 * 183 * N.B. don't need to check if [j] aliases. 184 * [i] and [j] end up with the same values 185 * anyway. 186 */ 187 base1 = &base[i1]; 188 189 tmp0 = base1[0]; 190 j = j + tmp0; 191 192 tmp1 = base[j]; 193 /* 194 * Don't store [i] yet 195 */ 196 i_accum = tmp1; 197 base[j] = tmp0; 198 199 tmp0 += tmp1; 200 tmp0 = tmp0 & 0xff; 201 202 /* 203 * Check [tmp0] doesn't alias with [i] 204 */ 205 206 /* 207 * Updating [out] in 8B chunks 208 */ 209 if (i1 == tmp0) { 210 merge = 211 (unsigned long long)(i_accum) << 56; 212 } else { 213 merge = 214 (unsigned long long)(base[tmp0]) << 215 56; 216 } 217 218 /* BYTE 1 */ 219 tmp0 = base1[1]; 220 221 j = j + tmp0; 222 223 /* 224 * [j] can now alias with [i] and [i-1] 225 * If alias abort speculation 226 */ 227 if ((i1 ^ j) < 2) { 228 base1[0] = i_accum; 229 230 tmp1 = base[j]; 231 232 base1[1] = tmp1; 233 base[j] = tmp0; 234 235 tmp0 += tmp1; 236 tmp0 = tmp0 & 0xff; 237 238 merge |= (unsigned long long) 239 (base[tmp0]) << 48; 240 } else { 241 242 tmp1 = base[j]; 243 244 i_accum = i_accum << 8; 245 i_accum |= tmp1; 246 247 base[j] = tmp0; 248 249 tmp0 += tmp1; 250 tmp0 = tmp0 & 0xff; 251 252 /* 253 * Speculation suceeded! Update [i] 254 * in 2B chunk 255 */ 256 *((unsigned short *) &base[i1]) = 257 i_accum; 258 259 merge |= 260 (unsigned long long)(base[tmp0]) << 261 48; 262 } 263 264 265 /* 266 * Too expensive to perform [i] speculation for 267 * every byte. Just need to reduce frequency 268 * of stores until store buffer full stalls 269 * are not the bottleneck. 270 */ 271 272 /* BYTE 2 */ 273 tmp0 = base1[2]; 274 j = j + tmp0; 275 tmp1 = base[j]; 276 base1[2] = tmp1; 277 base[j] = tmp0; 278 tmp1 += tmp0; 279 tmp1 = tmp1 & 0xff; 280 merge |= (unsigned long long)(base[tmp1]) << 40; 281 282 /* BYTE 3 */ 283 tmp0 = base1[3]; 284 j = j + tmp0; 285 tmp1 = base[j]; 286 base1[3] = tmp1; 287 base[j] = tmp0; 288 tmp0 += tmp1; 289 tmp0 = tmp0 & 0xff; 290 merge |= (unsigned long long)(base[tmp0]) << 32; 291 292 /* BYTE 4 */ 293 tmp0 = base1[4]; 294 j = j + tmp0; 295 tmp1 = base[j]; 296 base1[4] = tmp1; 297 base[j] = tmp0; 298 tmp0 += tmp1; 299 tmp0 = tmp0 & 0xff; 300 merge |= (unsigned long long)(base[tmp0]) << 24; 301 302 /* BYTE 5 */ 303 tmp0 = base1[5]; 304 j = j + tmp0; 305 tmp1 = base[j]; 306 base1[5] = tmp1; 307 base[j] = tmp0; 308 tmp0 += tmp1; 309 tmp0 = tmp0 & 0xff; 310 merge |= (unsigned long long)(base[tmp0]) << 16; 311 312 /* BYTE 6 */ 313 i1 = (i1+6); 314 tmp0 = base1[6]; 315 j = j + tmp0; 316 tmp1 = base[j]; 317 i_accum = tmp1; 318 base[j] = tmp0; 319 320 tmp0 += tmp1; 321 tmp0 = tmp0 & 0xff; 322 323 if (i1 == tmp0) { 324 merge |= 325 (unsigned long long)(i_accum) << 8; 326 } else { 327 merge |= 328 (unsigned long long)(base[tmp0]) << 329 8; 330 } 331 332 /* BYTE 7 */ 333 tmp0 = base1[7]; 334 335 /* 336 * Perform [i] speculation again. Indentical 337 * to that performed for BYTE0 and BYTE1. 338 */ 339 j = j + tmp0; 340 if ((i1 ^ j) < 2) { 341 base1[6] = i_accum; 342 tmp1 = base[j]; 343 344 base1[7] = tmp1; 345 base[j] = tmp0; 346 347 tmp0 += tmp1; 348 tmp0 = tmp0 & 0xff; 349 350 merge |= 351 (unsigned long long)(base[tmp0]); 352 353 } else { 354 tmp1 = base[j]; 355 356 i_accum = i_accum << 8; 357 i_accum |= tmp1; 358 359 base[j] = tmp0; 360 361 tmp0 += tmp1; 362 tmp0 = tmp0 & 0xff; 363 364 *((unsigned short *) &base[i1]) = 365 i_accum; 366 367 merge |= 368 (unsigned long long)(base[tmp0]); 369 } 370 i1++; 371 } else { 372 /* 373 * i is too close to wrap-around to allow 374 * masking to be disregarded 375 */ 376 377 /* 378 * Same old speculation for BYTE 0 and BYTE 1 379 */ 380 381 /* BYTE 0 */ 382 i1 = (i1 + 1) & 0xff; 383 jj = i1; 384 385 tmp0 = base[i1]; 386 j = j + tmp0; 387 388 tmp1 = base[j]; 389 i_accum = tmp1; 390 base[j] = tmp0; 391 392 tmp0 += tmp1; 393 tmp0 = tmp0 & 0xff; 394 395 if (i1 == tmp0) { 396 merge = 397 (unsigned long long)(i_accum) << 56; 398 } else { 399 merge = 400 (unsigned long long)(base[tmp0]) << 401 56; 402 } 403 404 /* BYTE 1 */ 405 tmp0 = base[i1+1]; 406 407 j = j + tmp0; 408 409 if ((jj ^ j) < 2) { 410 base[jj] = i_accum; 411 412 tmp1 = base[j]; 413 414 base[i1+1] = tmp1; 415 base[j] = tmp0; 416 417 tmp0 += tmp1; 418 tmp0 = tmp0 & 0xff; 419 420 merge |= 421 (unsigned long long)(base[tmp0]) << 422 48; 423 } else { 424 425 tmp1 = base[j]; 426 427 i_accum = i_accum << 8; 428 i_accum |= tmp1; 429 430 base[j] = tmp0; 431 432 tmp0 += tmp1; 433 tmp0 = tmp0 & 0xff; 434 435 *((unsigned short *) &base[jj]) = 436 i_accum; 437 438 merge |= 439 (unsigned long long)(base[tmp0]) << 440 48; 441 } 442 443 /* BYTE 2 */ 444 /* 445 * As know i must be even when enter loop (to 446 * satisfy alignment), can only wrap around 447 * on the even bytes. So just need to perform 448 * mask every 2nd byte 449 */ 450 i1 = (i1 + 2) & 0xff; 451 tmp0 = base[i1]; 452 j = j + tmp0; 453 tmp1 = base[j]; 454 base[i1] = tmp1; 455 base[j] = tmp0; 456 tmp0 += tmp1; 457 tmp0 = tmp0 & 0xff; 458 merge |= (unsigned long long)(base[tmp0]) << 40; 459 460 /* BYTE 3 */ 461 tmp0 = base[i1+1]; 462 j = j + tmp0; 463 tmp1 = base[j]; 464 base[i1+1] = tmp1; 465 base[j] = tmp0; 466 tmp0 += tmp1; 467 tmp0 = tmp0 & 0xff; 468 merge |= (unsigned long long)(base[tmp0]) << 32; 469 470 /* BYTE 4 */ 471 i1 = (i1 + 2) & 0xff; 472 tmp0 = base[i1]; 473 j = j + tmp0; 474 tmp1 = base[j]; 475 base[i1] = tmp1; 476 base[j] = tmp0; 477 tmp0 += tmp1; 478 tmp0 = tmp0 & 0xff; 479 merge |= (unsigned long long)(base[tmp0]) << 24; 480 481 /* BYTE 5 */ 482 tmp0 = base[i1+1]; 483 j = j + tmp0; 484 tmp1 = base[j]; 485 base[i1+1] = tmp1; 486 base[j] = tmp0; 487 tmp0 += tmp1; 488 tmp0 = tmp0 & 0xff; 489 merge |= (unsigned long long)(base[tmp0]) << 16; 490 491 /* BYTE 6 */ 492 i1 = (i1+2) &0xff; 493 jj = i1; 494 tmp0 = base[i1]; 495 496 j = j + tmp0; 497 498 tmp1 = base[j]; 499 i_accum = tmp1; 500 base[j] = tmp0; 501 502 503 tmp0 += tmp1; 504 tmp0 = tmp0 & 0xff; 505 506 if (i1 == tmp0) { 507 merge |= 508 (unsigned long long)(i_accum) << 8; 509 } else { 510 merge |= 511 (unsigned long long)(base[tmp0]) << 512 8; 513 } 514 515 /* BYTE 7 */ 516 i1++; 517 tmp0 = base[i1]; 518 519 j = j + tmp0; 520 if ((jj ^ j) < 2) { 521 base[jj] = i_accum; 522 tmp1 = base[j]; 523 524 base[i1] = tmp1; 525 base[j] = tmp0; 526 527 tmp0 += tmp1; 528 tmp0 = tmp0 & 0xff; 529 530 merge |= 531 (unsigned long long)(base[tmp0]); 532 533 } else { 534 tmp1 = base[j]; 535 536 i_accum = i_accum << 8; 537 i_accum |= tmp1; 538 539 base[j] = tmp0; 540 541 tmp0 += tmp1; 542 tmp0 = tmp0 & 0xff; 543 544 *((unsigned short *) &base[jj]) = 545 i_accum; 546 547 merge |= 548 (unsigned long long)(base[tmp0]); 549 } 550 } 551 552 /* 553 * Perform update to [out] 554 * Remember could be alignment issues 555 */ 556 in0 = *((unsigned long long *) (&in[ii])); 557 558 merge1 = merge0 | (merge >> shift); 559 560 merge0 = (merge & mask) << 56; 561 562 in0 = in0 ^ merge1; 563 564 *((unsigned long long *) (&out[ii])) = in0; 565 } 566 567 i = i1; 568 569 /* 570 * Handle any overrun 571 */ 572 if (shift) { 573 out[ii] = in[ii] ^ (merge0 >> 56); 574 ii++; 575 } 576 577 /* 578 * Handle final few bytes 579 */ 580 for (; ii < len; ii++) { 581 i = i + 1; 582 tmp0 = base[i]; 583 j = j + tmp0; 584 tmp1 = base[j]; 585 586 base[i] = tmp1; 587 base[j] = tmp0; 588 589 tmp0 += tmp1; 590 tmp0 = tmp0 & 0xff; 591 out[ii] = in[ii] ^ base[tmp0]; 592 } 593 key->i = i; 594 key->j = j; 595 } 596 #endif /* sun4v */ 597 598 /* EXPORT DELETE END */ 599 } 600