1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include "../arcfour.h" 30 31 /* Initialize the key stream 'key' using the key value */ 32 void 33 arcfour_key_init(ARCFour_key *key, uchar_t *keyval, int keyvallen) 34 { 35 /* EXPORT DELETE START */ 36 37 uchar_t ext_keyval[256]; 38 uchar_t tmp; 39 int i, j; 40 41 for (i = j = 0; i < 256; i++, j++) { 42 if (j == keyvallen) 43 j = 0; 44 45 ext_keyval[i] = keyval[j]; 46 } 47 for (i = 0; i < 256; i++) 48 key->arr[i] = (uchar_t)i; 49 50 j = 0; 51 for (i = 0; i < 256; i++) { 52 j = (j + key->arr[i] + ext_keyval[i]) % 256; 53 tmp = key->arr[i]; 54 key->arr[i] = key->arr[j]; 55 key->arr[j] = tmp; 56 } 57 key->i = 0; 58 key->j = 0; 59 60 /* EXPORT DELETE END */ 61 } 62 63 64 /* 65 * Encipher 'in' using 'key. 66 * in and out can point to the same location 67 */ 68 void 69 arcfour_crypt(ARCFour_key *key, uchar_t *in, uchar_t *out, size_t len) 70 { 71 size_t ii, it; 72 unsigned long long in0, out0, merge = 0, merge0 = 0, merge1, mask = 0; 73 uchar_t i, j, *base, jj, *base1, tmp; 74 unsigned int tmp0, tmp1, i_accum, count = 0, shift = 0, i1; 75 76 77 /* EXPORT DELETE START */ 78 int index; 79 80 base = key->arr; 81 82 index = (((uint64_t)in) & 0x7); 83 84 /* Get the 'in' on an 8-byte alignment */ 85 if (index > 0) { 86 i = key->i; 87 j = key->j; 88 for (index = 8 - (uint64_t)in & 0x7; (index-- > 0) && len > 0; 89 len--, in++, out++) { 90 i = i + 1; 91 j = j + key->arr[i]; 92 tmp = key->arr[i]; 93 key->arr[i] = key->arr[j]; 94 key->arr[j] = tmp; 95 tmp = key->arr[i] + key->arr[j]; 96 *out = *in ^ key->arr[tmp]; 97 } 98 key->i = i; 99 key->j = j; 100 101 } 102 if (len == 0) 103 return; 104 105 /* See if we're fortunate and 'out' got aligned as well */ 106 107 108 /* 109 * Niagara optimized version for 110 * the cases where the input and output buffers are aligned on 111 * a multiple of 8-byte boundary. 112 */ 113 #ifdef sun4v 114 if ((((uint64_t)out) & 7) != 0) { 115 #endif /* sun4v */ 116 i = key->i; 117 j = key->j; 118 for (ii = 0; ii < len; ii++) { 119 i = i + 1; 120 tmp0 = base[i]; 121 j = j + tmp0; 122 tmp1 = base[j]; 123 base[i] = tmp1; 124 base[j] = tmp0; 125 tmp0 += tmp1; 126 tmp0 = tmp0 & 0xff; 127 out[ii] = in[ii] ^ base[tmp0]; 128 } 129 key->i = i; 130 key->j = j; 131 #ifdef sun4v 132 } else { 133 i = key->i; 134 j = key->j; 135 136 /* 137 * Want to align base[i] on a 2B boundary -- allows updates 138 * via [i] to be performed in 2B chunks (reducing # of stores). 139 * Requires appropriate alias detection. 140 */ 141 142 if (((i+1) % 2) != 0) { 143 i = i + 1; 144 tmp0 = base[i]; 145 j = j + tmp0; 146 tmp1 = base[j]; 147 148 base[i] = tmp1; 149 base[j] = tmp0; 150 151 tmp0 += tmp1; 152 tmp0 = tmp0 & 0xff; 153 154 merge0 = (unsigned long long)(base[tmp0]) << 56; 155 shift = 8; mask = 0xff; 156 } 157 158 /* 159 * Note - in and out may now be misaligned - 160 * as updating [out] in 8B chunks need to handle this 161 * possibility. Also could have a 1B overrun. 162 * Need to drop out of loop early as a result. 163 */ 164 165 for (ii = 0, i1 = i; ii < ((len-1) & (~7)); 166 ii += 8, i1 = i1&0xff) { 167 168 /* 169 * If i < less than 248, know wont wrap around 170 * (i % 256), so don't need to bother with masking i 171 * after each increment 172 */ 173 if (i1 < 248) { 174 175 /* BYTE 0 */ 176 i1 = (i1 + 1); 177 178 /* 179 * Creating this base pointer reduces subsequent 180 * arihmetic ops required to load [i] 181 * 182 * N.B. don't need to check if [j] aliases. 183 * [i] and [j] end up with the same values 184 * anyway. 185 */ 186 base1 = &base[i1]; 187 188 tmp0 = base1[0]; 189 j = j + tmp0; 190 191 tmp1 = base[j]; 192 /* 193 * Don't store [i] yet 194 */ 195 i_accum = tmp1; 196 base[j] = tmp0; 197 198 tmp0 += tmp1; 199 tmp0 = tmp0 & 0xff; 200 201 /* 202 * Check [tmp0] doesn't alias with [i] 203 */ 204 205 /* 206 * Updating [out] in 8B chunks 207 */ 208 if (i1 == tmp0) { 209 merge = 210 (unsigned long long)(i_accum) << 56; 211 } else { 212 merge = 213 (unsigned long long)(base[tmp0]) << 214 56; 215 } 216 217 /* BYTE 1 */ 218 tmp0 = base1[1]; 219 220 j = j + tmp0; 221 222 /* 223 * [j] can now alias with [i] and [i-1] 224 * If alias abort speculation 225 */ 226 if ((i1 ^ j) < 2) { 227 base1[0] = i_accum; 228 229 tmp1 = base[j]; 230 231 base1[1] = tmp1; 232 base[j] = tmp0; 233 234 tmp0 += tmp1; 235 tmp0 = tmp0 & 0xff; 236 237 merge |= (unsigned long long) 238 (base[tmp0]) << 48; 239 } else { 240 241 tmp1 = base[j]; 242 243 i_accum = i_accum << 8; 244 i_accum |= tmp1; 245 246 base[j] = tmp0; 247 248 tmp0 += tmp1; 249 tmp0 = tmp0 & 0xff; 250 251 /* 252 * Speculation suceeded! Update [i] 253 * in 2B chunk 254 */ 255 *((unsigned short *) &base[i1]) = 256 i_accum; 257 258 merge |= 259 (unsigned long long)(base[tmp0]) << 260 48; 261 } 262 263 264 /* 265 * Too expensive to perform [i] speculation for 266 * every byte. Just need to reduce frequency 267 * of stores until store buffer full stalls 268 * are not the bottleneck. 269 */ 270 271 /* BYTE 2 */ 272 tmp0 = base1[2]; 273 j = j + tmp0; 274 tmp1 = base[j]; 275 base1[2] = tmp1; 276 base[j] = tmp0; 277 tmp1 += tmp0; 278 tmp1 = tmp1 & 0xff; 279 merge |= (unsigned long long)(base[tmp1]) << 40; 280 281 /* BYTE 3 */ 282 tmp0 = base1[3]; 283 j = j + tmp0; 284 tmp1 = base[j]; 285 base1[3] = tmp1; 286 base[j] = tmp0; 287 tmp0 += tmp1; 288 tmp0 = tmp0 & 0xff; 289 merge |= (unsigned long long)(base[tmp0]) << 32; 290 291 /* BYTE 4 */ 292 tmp0 = base1[4]; 293 j = j + tmp0; 294 tmp1 = base[j]; 295 base1[4] = tmp1; 296 base[j] = tmp0; 297 tmp0 += tmp1; 298 tmp0 = tmp0 & 0xff; 299 merge |= (unsigned long long)(base[tmp0]) << 24; 300 301 /* BYTE 5 */ 302 tmp0 = base1[5]; 303 j = j + tmp0; 304 tmp1 = base[j]; 305 base1[5] = tmp1; 306 base[j] = tmp0; 307 tmp0 += tmp1; 308 tmp0 = tmp0 & 0xff; 309 merge |= (unsigned long long)(base[tmp0]) << 16; 310 311 /* BYTE 6 */ 312 i1 = (i1+6); 313 tmp0 = base1[6]; 314 j = j + tmp0; 315 tmp1 = base[j]; 316 i_accum = tmp1; 317 base[j] = tmp0; 318 319 tmp0 += tmp1; 320 tmp0 = tmp0 & 0xff; 321 322 if (i1 == tmp0) { 323 merge |= 324 (unsigned long long)(i_accum) << 8; 325 } else { 326 merge |= 327 (unsigned long long)(base[tmp0]) << 328 8; 329 } 330 331 /* BYTE 7 */ 332 tmp0 = base1[7]; 333 334 /* 335 * Perform [i] speculation again. Indentical 336 * to that performed for BYTE0 and BYTE1. 337 */ 338 j = j + tmp0; 339 if ((i1 ^ j) < 2) { 340 base1[6] = i_accum; 341 tmp1 = base[j]; 342 343 base1[7] = tmp1; 344 base[j] = tmp0; 345 346 tmp0 += tmp1; 347 tmp0 = tmp0 & 0xff; 348 349 merge |= 350 (unsigned long long)(base[tmp0]); 351 352 } else { 353 tmp1 = base[j]; 354 355 i_accum = i_accum << 8; 356 i_accum |= tmp1; 357 358 base[j] = tmp0; 359 360 tmp0 += tmp1; 361 tmp0 = tmp0 & 0xff; 362 363 *((unsigned short *) &base[i1]) = 364 i_accum; 365 366 merge |= 367 (unsigned long long)(base[tmp0]); 368 } 369 i1++; 370 } else { 371 /* 372 * i is too close to wrap-around to allow 373 * masking to be disregarded 374 */ 375 376 /* 377 * Same old speculation for BYTE 0 and BYTE 1 378 */ 379 380 /* BYTE 0 */ 381 i1 = (i1 + 1) & 0xff; 382 jj = i1; 383 384 tmp0 = base[i1]; 385 j = j + tmp0; 386 387 tmp1 = base[j]; 388 i_accum = tmp1; 389 base[j] = tmp0; 390 391 tmp0 += tmp1; 392 tmp0 = tmp0 & 0xff; 393 394 if (i1 == tmp0) { 395 merge = 396 (unsigned long long)(i_accum) << 56; 397 } else { 398 merge = 399 (unsigned long long)(base[tmp0]) << 400 56; 401 } 402 403 /* BYTE 1 */ 404 tmp0 = base[i1+1]; 405 406 j = j + tmp0; 407 408 if ((jj ^ j) < 2) { 409 base[jj] = i_accum; 410 411 tmp1 = base[j]; 412 413 base[i1+1] = tmp1; 414 base[j] = tmp0; 415 416 tmp0 += tmp1; 417 tmp0 = tmp0 & 0xff; 418 419 merge |= 420 (unsigned long long)(base[tmp0]) << 421 48; 422 } else { 423 424 tmp1 = base[j]; 425 426 i_accum = i_accum << 8; 427 i_accum |= tmp1; 428 429 base[j] = tmp0; 430 431 tmp0 += tmp1; 432 tmp0 = tmp0 & 0xff; 433 434 *((unsigned short *) &base[jj]) = 435 i_accum; 436 437 merge |= 438 (unsigned long long)(base[tmp0]) << 439 48; 440 } 441 442 /* BYTE 2 */ 443 /* 444 * As know i must be even when enter loop (to 445 * satisfy alignment), can only wrap around 446 * on the even bytes. So just need to perform 447 * mask every 2nd byte 448 */ 449 i1 = (i1 + 2) & 0xff; 450 tmp0 = base[i1]; 451 j = j + tmp0; 452 tmp1 = base[j]; 453 base[i1] = tmp1; 454 base[j] = tmp0; 455 tmp0 += tmp1; 456 tmp0 = tmp0 & 0xff; 457 merge |= (unsigned long long)(base[tmp0]) << 40; 458 459 /* BYTE 3 */ 460 tmp0 = base[i1+1]; 461 j = j + tmp0; 462 tmp1 = base[j]; 463 base[i1+1] = tmp1; 464 base[j] = tmp0; 465 tmp0 += tmp1; 466 tmp0 = tmp0 & 0xff; 467 merge |= (unsigned long long)(base[tmp0]) << 32; 468 469 /* BYTE 4 */ 470 i1 = (i1 + 2) & 0xff; 471 tmp0 = base[i1]; 472 j = j + tmp0; 473 tmp1 = base[j]; 474 base[i1] = tmp1; 475 base[j] = tmp0; 476 tmp0 += tmp1; 477 tmp0 = tmp0 & 0xff; 478 merge |= (unsigned long long)(base[tmp0]) << 24; 479 480 /* BYTE 5 */ 481 tmp0 = base[i1+1]; 482 j = j + tmp0; 483 tmp1 = base[j]; 484 base[i1+1] = tmp1; 485 base[j] = tmp0; 486 tmp0 += tmp1; 487 tmp0 = tmp0 & 0xff; 488 merge |= (unsigned long long)(base[tmp0]) << 16; 489 490 /* BYTE 6 */ 491 i1 = (i1+2) &0xff; 492 jj = i1; 493 tmp0 = base[i1]; 494 495 j = j + tmp0; 496 497 tmp1 = base[j]; 498 i_accum = tmp1; 499 base[j] = tmp0; 500 501 502 tmp0 += tmp1; 503 tmp0 = tmp0 & 0xff; 504 505 if (i1 == tmp0) { 506 merge |= 507 (unsigned long long)(i_accum) << 8; 508 } else { 509 merge |= 510 (unsigned long long)(base[tmp0]) << 511 8; 512 } 513 514 /* BYTE 7 */ 515 i1++; 516 tmp0 = base[i1]; 517 518 j = j + tmp0; 519 if ((jj ^ j) < 2) { 520 base[jj] = i_accum; 521 tmp1 = base[j]; 522 523 base[i1] = tmp1; 524 base[j] = tmp0; 525 526 tmp0 += tmp1; 527 tmp0 = tmp0 & 0xff; 528 529 merge |= 530 (unsigned long long)(base[tmp0]); 531 532 } else { 533 tmp1 = base[j]; 534 535 i_accum = i_accum << 8; 536 i_accum |= tmp1; 537 538 base[j] = tmp0; 539 540 tmp0 += tmp1; 541 tmp0 = tmp0 & 0xff; 542 543 *((unsigned short *) &base[jj]) = 544 i_accum; 545 546 merge |= 547 (unsigned long long)(base[tmp0]); 548 } 549 } 550 551 /* 552 * Perform update to [out] 553 * Remember could be alignment issues 554 */ 555 in0 = *((unsigned long long *) (&in[ii])); 556 557 merge1 = merge0 | (merge >> shift); 558 559 merge0 = (merge & mask) << 56; 560 561 in0 = in0 ^ merge1; 562 563 *((unsigned long long *) (&out[ii])) = in0; 564 } 565 566 i = i1; 567 568 /* 569 * Handle any overrun 570 */ 571 if (shift) { 572 out[ii] = in[ii] ^ (merge0 >> 56); 573 ii++; 574 } 575 576 /* 577 * Handle final few bytes 578 */ 579 for (; ii < len; ii++) { 580 i = i + 1; 581 tmp0 = base[i]; 582 j = j + tmp0; 583 tmp1 = base[j]; 584 585 base[i] = tmp1; 586 base[j] = tmp0; 587 588 tmp0 += tmp1; 589 tmp0 = tmp0 & 0xff; 590 out[ii] = in[ii] ^ base[tmp0]; 591 } 592 key->i = i; 593 key->j = j; 594 } 595 #endif /* sun4v */ 596 597 /* EXPORT DELETE END */ 598 } 599