1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include "../arcfour.h" 27 28 /* Initialize the key stream 'key' using the key value */ 29 void 30 arcfour_key_init(ARCFour_key *key, uchar_t *keyval, int keyvallen) 31 { 32 /* EXPORT DELETE START */ 33 34 uchar_t ext_keyval[256]; 35 uchar_t tmp; 36 int i, j; 37 38 for (i = j = 0; i < 256; i++, j++) { 39 if (j == keyvallen) 40 j = 0; 41 42 ext_keyval[i] = keyval[j]; 43 } 44 for (i = 0; i < 256; i++) 45 key->arr[i] = (uchar_t)i; 46 47 j = 0; 48 for (i = 0; i < 256; i++) { 49 j = (j + key->arr[i] + ext_keyval[i]) % 256; 50 tmp = key->arr[i]; 51 key->arr[i] = key->arr[j]; 52 key->arr[j] = tmp; 53 } 54 key->i = 0; 55 key->j = 0; 56 57 /* EXPORT DELETE END */ 58 } 59 60 61 /* 62 * Encipher 'in' using 'key. 63 * in and out can point to the same location 64 */ 65 void 66 arcfour_crypt(ARCFour_key *key, uchar_t *in, uchar_t *out, size_t len) 67 { 68 size_t ii; 69 unsigned long long in0, merge = 0, merge0 = 0, merge1, mask = 0; 70 uchar_t i, j, *base, jj, *base1, tmp; 71 unsigned int tmp0, tmp1, i_accum, shift = 0, i1; 72 73 74 /* EXPORT DELETE START */ 75 int index; 76 77 base = key->arr; 78 79 index = (((uintptr_t)in) & 0x7); 80 81 /* Get the 'in' on an 8-byte alignment */ 82 if (index > 0) { 83 i = key->i; 84 j = key->j; 85 86 for (index = 8 - index; (index-- > 0) && len > 0; 87 len--, in++, out++) { 88 89 i = i + 1; 90 j = j + key->arr[i]; 91 tmp = key->arr[i]; 92 key->arr[i] = key->arr[j]; 93 key->arr[j] = tmp; 94 tmp = key->arr[i] + key->arr[j]; 95 *out = *in ^ key->arr[tmp]; 96 } 97 key->i = i; 98 key->j = j; 99 100 } 101 if (len == 0) 102 return; 103 104 /* See if we're fortunate and 'out' got aligned as well */ 105 106 107 /* 108 * Niagara optimized version for 109 * the cases where the input and output buffers are aligned on 110 * a multiple of 8-byte boundary. 111 */ 112 #ifdef sun4v 113 if ((((uintptr_t)out) & 7) != 0) { 114 #endif /* sun4v */ 115 i = key->i; 116 j = key->j; 117 for (ii = 0; ii < len; ii++) { 118 i = i + 1; 119 tmp0 = base[i]; 120 j = j + tmp0; 121 tmp1 = base[j]; 122 base[i] = tmp1; 123 base[j] = tmp0; 124 tmp0 += tmp1; 125 tmp0 = tmp0 & 0xff; 126 out[ii] = in[ii] ^ base[tmp0]; 127 } 128 key->i = i; 129 key->j = j; 130 #ifdef sun4v 131 } else { 132 i = key->i; 133 j = key->j; 134 135 /* 136 * Want to align base[i] on a 2B boundary -- allows updates 137 * via [i] to be performed in 2B chunks (reducing # of stores). 138 * Requires appropriate alias detection. 139 */ 140 141 if (((i+1) % 2) != 0) { 142 i = i + 1; 143 tmp0 = base[i]; 144 j = j + tmp0; 145 tmp1 = base[j]; 146 147 base[i] = tmp1; 148 base[j] = tmp0; 149 150 tmp0 += tmp1; 151 tmp0 = tmp0 & 0xff; 152 153 merge0 = (unsigned long long)(base[tmp0]) << 56; 154 shift = 8; mask = 0xff; 155 } 156 157 /* 158 * Note - in and out may now be misaligned - 159 * as updating [out] in 8B chunks need to handle this 160 * possibility. Also could have a 1B overrun. 161 * Need to drop out of loop early as a result. 162 */ 163 164 for (ii = 0, i1 = i; ii < ((len-1) & (~7)); 165 ii += 8, i1 = i1&0xff) { 166 167 /* 168 * If i < less than 248, know wont wrap around 169 * (i % 256), so don't need to bother with masking i 170 * after each increment 171 */ 172 if (i1 < 248) { 173 174 /* BYTE 0 */ 175 i1 = (i1 + 1); 176 177 /* 178 * Creating this base pointer reduces subsequent 179 * arihmetic ops required to load [i] 180 * 181 * N.B. don't need to check if [j] aliases. 182 * [i] and [j] end up with the same values 183 * anyway. 184 */ 185 base1 = &base[i1]; 186 187 tmp0 = base1[0]; 188 j = j + tmp0; 189 190 tmp1 = base[j]; 191 /* 192 * Don't store [i] yet 193 */ 194 i_accum = tmp1; 195 base[j] = tmp0; 196 197 tmp0 += tmp1; 198 tmp0 = tmp0 & 0xff; 199 200 /* 201 * Check [tmp0] doesn't alias with [i] 202 */ 203 204 /* 205 * Updating [out] in 8B chunks 206 */ 207 if (i1 == tmp0) { 208 merge = 209 (unsigned long long)(i_accum) << 56; 210 } else { 211 merge = 212 (unsigned long long)(base[tmp0]) << 213 56; 214 } 215 216 /* BYTE 1 */ 217 tmp0 = base1[1]; 218 219 j = j + tmp0; 220 221 /* 222 * [j] can now alias with [i] and [i-1] 223 * If alias abort speculation 224 */ 225 if ((i1 ^ j) < 2) { 226 base1[0] = i_accum; 227 228 tmp1 = base[j]; 229 230 base1[1] = tmp1; 231 base[j] = tmp0; 232 233 tmp0 += tmp1; 234 tmp0 = tmp0 & 0xff; 235 236 merge |= (unsigned long long) 237 (base[tmp0]) << 48; 238 } else { 239 240 tmp1 = base[j]; 241 242 i_accum = i_accum << 8; 243 i_accum |= tmp1; 244 245 base[j] = tmp0; 246 247 tmp0 += tmp1; 248 tmp0 = tmp0 & 0xff; 249 250 /* 251 * Speculation suceeded! Update [i] 252 * in 2B chunk 253 */ 254 /* LINTED E_BAD_PTR_CAST_ALIGN */ 255 *((unsigned short *) &base[i1]) = 256 i_accum; 257 258 merge |= 259 (unsigned long long)(base[tmp0]) << 260 48; 261 } 262 263 264 /* 265 * Too expensive to perform [i] speculation for 266 * every byte. Just need to reduce frequency 267 * of stores until store buffer full stalls 268 * are not the bottleneck. 269 */ 270 271 /* BYTE 2 */ 272 tmp0 = base1[2]; 273 j = j + tmp0; 274 tmp1 = base[j]; 275 base1[2] = tmp1; 276 base[j] = tmp0; 277 tmp1 += tmp0; 278 tmp1 = tmp1 & 0xff; 279 merge |= (unsigned long long)(base[tmp1]) << 40; 280 281 /* BYTE 3 */ 282 tmp0 = base1[3]; 283 j = j + tmp0; 284 tmp1 = base[j]; 285 base1[3] = tmp1; 286 base[j] = tmp0; 287 tmp0 += tmp1; 288 tmp0 = tmp0 & 0xff; 289 merge |= (unsigned long long)(base[tmp0]) << 32; 290 291 /* BYTE 4 */ 292 tmp0 = base1[4]; 293 j = j + tmp0; 294 tmp1 = base[j]; 295 base1[4] = tmp1; 296 base[j] = tmp0; 297 tmp0 += tmp1; 298 tmp0 = tmp0 & 0xff; 299 merge |= (unsigned long long)(base[tmp0]) << 24; 300 301 /* BYTE 5 */ 302 tmp0 = base1[5]; 303 j = j + tmp0; 304 tmp1 = base[j]; 305 base1[5] = tmp1; 306 base[j] = tmp0; 307 tmp0 += tmp1; 308 tmp0 = tmp0 & 0xff; 309 merge |= (unsigned long long)(base[tmp0]) << 16; 310 311 /* BYTE 6 */ 312 i1 = (i1+6); 313 tmp0 = base1[6]; 314 j = j + tmp0; 315 tmp1 = base[j]; 316 i_accum = tmp1; 317 base[j] = tmp0; 318 319 tmp0 += tmp1; 320 tmp0 = tmp0 & 0xff; 321 322 if (i1 == tmp0) { 323 merge |= 324 (unsigned long long)(i_accum) << 8; 325 } else { 326 merge |= 327 (unsigned long long)(base[tmp0]) << 328 8; 329 } 330 331 /* BYTE 7 */ 332 tmp0 = base1[7]; 333 334 /* 335 * Perform [i] speculation again. Indentical 336 * to that performed for BYTE0 and BYTE1. 337 */ 338 j = j + tmp0; 339 if ((i1 ^ j) < 2) { 340 base1[6] = i_accum; 341 tmp1 = base[j]; 342 343 base1[7] = tmp1; 344 base[j] = tmp0; 345 346 tmp0 += tmp1; 347 tmp0 = tmp0 & 0xff; 348 349 merge |= 350 (unsigned long long)(base[tmp0]); 351 352 } else { 353 tmp1 = base[j]; 354 355 i_accum = i_accum << 8; 356 i_accum |= tmp1; 357 358 base[j] = tmp0; 359 360 tmp0 += tmp1; 361 tmp0 = tmp0 & 0xff; 362 363 /* LINTED E_BAD_PTR_CAST_ALIGN */ 364 *((unsigned short *) &base[i1]) = 365 i_accum; 366 367 merge |= 368 (unsigned long long)(base[tmp0]); 369 } 370 i1++; 371 } else { 372 /* 373 * i is too close to wrap-around to allow 374 * masking to be disregarded 375 */ 376 377 /* 378 * Same old speculation for BYTE 0 and BYTE 1 379 */ 380 381 /* BYTE 0 */ 382 i1 = (i1 + 1) & 0xff; 383 jj = i1; 384 385 tmp0 = base[i1]; 386 j = j + tmp0; 387 388 tmp1 = base[j]; 389 i_accum = tmp1; 390 base[j] = tmp0; 391 392 tmp0 += tmp1; 393 tmp0 = tmp0 & 0xff; 394 395 if (i1 == tmp0) { 396 merge = 397 (unsigned long long)(i_accum) << 56; 398 } else { 399 merge = 400 (unsigned long long)(base[tmp0]) << 401 56; 402 } 403 404 /* BYTE 1 */ 405 tmp0 = base[i1+1]; 406 407 j = j + tmp0; 408 409 if ((jj ^ j) < 2) { 410 base[jj] = i_accum; 411 412 tmp1 = base[j]; 413 414 base[i1+1] = tmp1; 415 base[j] = tmp0; 416 417 tmp0 += tmp1; 418 tmp0 = tmp0 & 0xff; 419 420 merge |= 421 (unsigned long long)(base[tmp0]) << 422 48; 423 } else { 424 425 tmp1 = base[j]; 426 427 i_accum = i_accum << 8; 428 i_accum |= tmp1; 429 430 base[j] = tmp0; 431 432 tmp0 += tmp1; 433 tmp0 = tmp0 & 0xff; 434 435 /* LINTED E_BAD_PTR_CAST_ALIGN */ 436 *((unsigned short *) &base[jj]) = 437 i_accum; 438 439 merge |= 440 (unsigned long long)(base[tmp0]) << 441 48; 442 } 443 444 /* BYTE 2 */ 445 /* 446 * As know i must be even when enter loop (to 447 * satisfy alignment), can only wrap around 448 * on the even bytes. So just need to perform 449 * mask every 2nd byte 450 */ 451 i1 = (i1 + 2) & 0xff; 452 tmp0 = base[i1]; 453 j = j + tmp0; 454 tmp1 = base[j]; 455 base[i1] = tmp1; 456 base[j] = tmp0; 457 tmp0 += tmp1; 458 tmp0 = tmp0 & 0xff; 459 merge |= (unsigned long long)(base[tmp0]) << 40; 460 461 /* BYTE 3 */ 462 tmp0 = base[i1+1]; 463 j = j + tmp0; 464 tmp1 = base[j]; 465 base[i1+1] = tmp1; 466 base[j] = tmp0; 467 tmp0 += tmp1; 468 tmp0 = tmp0 & 0xff; 469 merge |= (unsigned long long)(base[tmp0]) << 32; 470 471 /* BYTE 4 */ 472 i1 = (i1 + 2) & 0xff; 473 tmp0 = base[i1]; 474 j = j + tmp0; 475 tmp1 = base[j]; 476 base[i1] = tmp1; 477 base[j] = tmp0; 478 tmp0 += tmp1; 479 tmp0 = tmp0 & 0xff; 480 merge |= (unsigned long long)(base[tmp0]) << 24; 481 482 /* BYTE 5 */ 483 tmp0 = base[i1+1]; 484 j = j + tmp0; 485 tmp1 = base[j]; 486 base[i1+1] = tmp1; 487 base[j] = tmp0; 488 tmp0 += tmp1; 489 tmp0 = tmp0 & 0xff; 490 merge |= (unsigned long long)(base[tmp0]) << 16; 491 492 /* BYTE 6 */ 493 i1 = (i1+2) &0xff; 494 jj = i1; 495 tmp0 = base[i1]; 496 497 j = j + tmp0; 498 499 tmp1 = base[j]; 500 i_accum = tmp1; 501 base[j] = tmp0; 502 503 504 tmp0 += tmp1; 505 tmp0 = tmp0 & 0xff; 506 507 if (i1 == tmp0) { 508 merge |= 509 (unsigned long long)(i_accum) << 8; 510 } else { 511 merge |= 512 (unsigned long long)(base[tmp0]) << 513 8; 514 } 515 516 /* BYTE 7 */ 517 i1++; 518 tmp0 = base[i1]; 519 520 j = j + tmp0; 521 if ((jj ^ j) < 2) { 522 base[jj] = i_accum; 523 tmp1 = base[j]; 524 525 base[i1] = tmp1; 526 base[j] = tmp0; 527 528 tmp0 += tmp1; 529 tmp0 = tmp0 & 0xff; 530 531 merge |= 532 (unsigned long long)(base[tmp0]); 533 534 } else { 535 536 tmp1 = base[j]; 537 538 i_accum = i_accum << 8; 539 i_accum |= tmp1; 540 541 base[j] = tmp0; 542 543 tmp0 += tmp1; 544 tmp0 = tmp0 & 0xff; 545 546 /* LINTED E_BAD_PTR_CAST_ALIGN */ 547 *((unsigned short *) &base[jj]) = 548 i_accum; 549 550 merge |= 551 (unsigned long long)(base[tmp0]); 552 } 553 } 554 555 /* 556 * Perform update to [out] 557 * Remember could be alignment issues 558 */ 559 /* LINTED E_BAD_PTR_CAST_ALIGN */ 560 in0 = *((unsigned long long *) (&in[ii])); 561 562 merge1 = merge0 | (merge >> shift); 563 564 merge0 = (merge & mask) << 56; 565 566 in0 = in0 ^ merge1; 567 568 /* LINTED E_BAD_PTR_CAST_ALIGN */ 569 *((unsigned long long *) (&out[ii])) = in0; 570 } 571 572 i = i1; 573 574 /* 575 * Handle any overrun 576 */ 577 if (shift) { 578 out[ii] = in[ii] ^ (merge0 >> 56); 579 ii++; 580 } 581 582 /* 583 * Handle final few bytes 584 */ 585 for (; ii < len; ii++) { 586 i = i + 1; 587 tmp0 = base[i]; 588 j = j + tmp0; 589 tmp1 = base[j]; 590 591 base[i] = tmp1; 592 base[j] = tmp0; 593 594 tmp0 += tmp1; 595 tmp0 = tmp0 & 0xff; 596 out[ii] = in[ii] ^ base[tmp0]; 597 } 598 key->i = i; 599 key->j = j; 600 } 601 #endif /* sun4v */ 602 603 /* EXPORT DELETE END */ 604 } 605