1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 #include "../arcfour.h" 27 28 /* Initialize the key stream 'key' using the key value */ 29 void 30 arcfour_key_init(ARCFour_key *key, uchar_t *keyval, int keyvallen) 31 { 32 uchar_t ext_keyval[256]; 33 uchar_t tmp; 34 int i, j; 35 36 for (i = j = 0; i < 256; i++, j++) { 37 if (j == keyvallen) 38 j = 0; 39 40 ext_keyval[i] = keyval[j]; 41 } 42 for (i = 0; i < 256; i++) 43 key->arr[i] = (uchar_t)i; 44 45 j = 0; 46 for (i = 0; i < 256; i++) { 47 j = (j + key->arr[i] + ext_keyval[i]) % 256; 48 tmp = key->arr[i]; 49 key->arr[i] = key->arr[j]; 50 key->arr[j] = tmp; 51 } 52 key->i = 0; 53 key->j = 0; 54 } 55 56 57 /* 58 * Encipher 'in' using 'key. 59 * in and out can point to the same location 60 */ 61 void 62 arcfour_crypt(ARCFour_key *key, uchar_t *in, uchar_t *out, size_t len) 63 { 64 size_t ii; 65 unsigned long long in0, merge = 0, merge0 = 0, merge1, mask = 0; 66 uchar_t i, j, *base, jj, *base1, tmp; 67 unsigned int tmp0, tmp1, i_accum, shift = 0, i1; 68 69 int index; 70 71 base = key->arr; 72 73 index = (((uintptr_t)in) & 0x7); 74 75 /* Get the 'in' on an 8-byte alignment */ 76 if (index > 0) { 77 i = key->i; 78 j = key->j; 79 80 for (index = 8 - index; (index-- > 0) && len > 0; 81 len--, in++, out++) { 82 83 i = i + 1; 84 j = j + key->arr[i]; 85 tmp = key->arr[i]; 86 key->arr[i] = key->arr[j]; 87 key->arr[j] = tmp; 88 tmp = key->arr[i] + key->arr[j]; 89 *out = *in ^ key->arr[tmp]; 90 } 91 key->i = i; 92 key->j = j; 93 94 } 95 if (len == 0) 96 return; 97 98 /* See if we're fortunate and 'out' got aligned as well */ 99 100 101 /* 102 * Niagara optimized version for 103 * the cases where the input and output buffers are aligned on 104 * a multiple of 8-byte boundary. 105 */ 106 #ifdef sun4v 107 if ((((uintptr_t)out) & 7) != 0) { 108 #endif /* sun4v */ 109 i = key->i; 110 j = key->j; 111 for (ii = 0; ii < len; ii++) { 112 i = i + 1; 113 tmp0 = base[i]; 114 j = j + tmp0; 115 tmp1 = base[j]; 116 base[i] = (uchar_t)tmp1; 117 base[j] = (uchar_t)tmp0; 118 tmp0 += tmp1; 119 tmp0 = tmp0 & 0xff; 120 out[ii] = in[ii] ^ base[tmp0]; 121 } 122 key->i = i; 123 key->j = j; 124 #ifdef sun4v 125 } else { 126 i = key->i; 127 j = key->j; 128 129 /* 130 * Want to align base[i] on a 2B boundary -- allows updates 131 * via [i] to be performed in 2B chunks (reducing # of stores). 132 * Requires appropriate alias detection. 133 */ 134 135 if (((i+1) % 2) != 0) { 136 i = i + 1; 137 tmp0 = base[i]; 138 j = j + tmp0; 139 tmp1 = base[j]; 140 141 base[i] = (uchar_t)tmp1; 142 base[j] = (uchar_t)tmp0; 143 144 tmp0 += tmp1; 145 tmp0 = tmp0 & 0xff; 146 147 merge0 = (unsigned long long)(base[tmp0]) << 56; 148 shift = 8; mask = 0xff; 149 } 150 151 /* 152 * Note - in and out may now be misaligned - 153 * as updating [out] in 8B chunks need to handle this 154 * possibility. Also could have a 1B overrun. 155 * Need to drop out of loop early as a result. 156 */ 157 158 for (ii = 0, i1 = i; ii < ((len-1) & (~7)); 159 ii += 8, i1 = i1&0xff) { 160 161 /* 162 * If i < less than 248, know wont wrap around 163 * (i % 256), so don't need to bother with masking i 164 * after each increment 165 */ 166 if (i1 < 248) { 167 168 /* BYTE 0 */ 169 i1 = (i1 + 1); 170 171 /* 172 * Creating this base pointer reduces subsequent 173 * arihmetic ops required to load [i] 174 * 175 * N.B. don't need to check if [j] aliases. 176 * [i] and [j] end up with the same values 177 * anyway. 178 */ 179 base1 = &base[i1]; 180 181 tmp0 = base1[0]; 182 j = j + tmp0; 183 184 tmp1 = base[j]; 185 /* 186 * Don't store [i] yet 187 */ 188 i_accum = tmp1; 189 base[j] = (uchar_t)tmp0; 190 191 tmp0 += tmp1; 192 tmp0 = tmp0 & 0xff; 193 194 /* 195 * Check [tmp0] doesn't alias with [i] 196 */ 197 198 /* 199 * Updating [out] in 8B chunks 200 */ 201 if (i1 == tmp0) { 202 merge = 203 (unsigned long long)(i_accum) << 56; 204 } else { 205 merge = 206 (unsigned long long)(base[tmp0]) << 207 56; 208 } 209 210 /* BYTE 1 */ 211 tmp0 = base1[1]; 212 213 j = j + tmp0; 214 215 /* 216 * [j] can now alias with [i] and [i-1] 217 * If alias abort speculation 218 */ 219 if ((i1 ^ j) < 2) { 220 base1[0] = (uchar_t)i_accum; 221 222 tmp1 = base[j]; 223 224 base1[1] = (uchar_t)tmp1; 225 base[j] = (uchar_t)tmp0; 226 227 tmp0 += tmp1; 228 tmp0 = tmp0 & 0xff; 229 230 merge |= (unsigned long long) 231 (base[tmp0]) << 48; 232 } else { 233 234 tmp1 = base[j]; 235 236 i_accum = i_accum << 8; 237 i_accum |= tmp1; 238 239 base[j] = (uchar_t)tmp0; 240 241 tmp0 += tmp1; 242 tmp0 = tmp0 & 0xff; 243 244 /* 245 * Speculation suceeded! Update [i] 246 * in 2B chunk 247 */ 248 /* LINTED E_BAD_PTR_CAST_ALIGN */ 249 *((unsigned short *) &base[i1]) = 250 i_accum; 251 252 merge |= 253 (unsigned long long)(base[tmp0]) << 254 48; 255 } 256 257 258 /* 259 * Too expensive to perform [i] speculation for 260 * every byte. Just need to reduce frequency 261 * of stores until store buffer full stalls 262 * are not the bottleneck. 263 */ 264 265 /* BYTE 2 */ 266 tmp0 = base1[2]; 267 j = j + tmp0; 268 tmp1 = base[j]; 269 base1[2] = (uchar_t)tmp1; 270 base[j] = (uchar_t)tmp0; 271 tmp1 += tmp0; 272 tmp1 = tmp1 & 0xff; 273 merge |= (unsigned long long)(base[tmp1]) << 40; 274 275 /* BYTE 3 */ 276 tmp0 = base1[3]; 277 j = j + tmp0; 278 tmp1 = base[j]; 279 base1[3] = (uchar_t)tmp1; 280 base[j] = (uchar_t)tmp0; 281 tmp0 += tmp1; 282 tmp0 = tmp0 & 0xff; 283 merge |= (unsigned long long)(base[tmp0]) << 32; 284 285 /* BYTE 4 */ 286 tmp0 = base1[4]; 287 j = j + tmp0; 288 tmp1 = base[j]; 289 base1[4] = (uchar_t)tmp1; 290 base[j] = (uchar_t)tmp0; 291 tmp0 += tmp1; 292 tmp0 = tmp0 & 0xff; 293 merge |= (unsigned long long)(base[tmp0]) << 24; 294 295 /* BYTE 5 */ 296 tmp0 = base1[5]; 297 j = j + tmp0; 298 tmp1 = base[j]; 299 base1[5] = (uchar_t)tmp1; 300 base[j] = (uchar_t)tmp0; 301 tmp0 += tmp1; 302 tmp0 = tmp0 & 0xff; 303 merge |= (unsigned long long)(base[tmp0]) << 16; 304 305 /* BYTE 6 */ 306 i1 = (i1+6); 307 tmp0 = base1[6]; 308 j = j + tmp0; 309 tmp1 = base[j]; 310 i_accum = tmp1; 311 base[j] = (uchar_t)tmp0; 312 313 tmp0 += tmp1; 314 tmp0 = tmp0 & 0xff; 315 316 if (i1 == tmp0) { 317 merge |= 318 (unsigned long long)(i_accum) << 8; 319 } else { 320 merge |= 321 (unsigned long long)(base[tmp0]) << 322 8; 323 } 324 325 /* BYTE 7 */ 326 tmp0 = base1[7]; 327 328 /* 329 * Perform [i] speculation again. Indentical 330 * to that performed for BYTE0 and BYTE1. 331 */ 332 j = j + tmp0; 333 if ((i1 ^ j) < 2) { 334 base1[6] = (uchar_t)i_accum; 335 tmp1 = base[j]; 336 337 base1[7] = (uchar_t)tmp1; 338 base[j] = (uchar_t)tmp0; 339 340 tmp0 += tmp1; 341 tmp0 = tmp0 & 0xff; 342 343 merge |= 344 (unsigned long long)(base[tmp0]); 345 346 } else { 347 tmp1 = base[j]; 348 349 i_accum = i_accum << 8; 350 i_accum |= tmp1; 351 352 base[j] = (uchar_t)tmp0; 353 354 tmp0 += tmp1; 355 tmp0 = tmp0 & 0xff; 356 357 /* LINTED E_BAD_PTR_CAST_ALIGN */ 358 *((unsigned short *) &base[i1]) = 359 i_accum; 360 361 merge |= 362 (unsigned long long)(base[tmp0]); 363 } 364 i1++; 365 } else { 366 /* 367 * i is too close to wrap-around to allow 368 * masking to be disregarded 369 */ 370 371 /* 372 * Same old speculation for BYTE 0 and BYTE 1 373 */ 374 375 /* BYTE 0 */ 376 i1 = (i1 + 1) & 0xff; 377 jj = (uchar_t)i1; 378 379 tmp0 = base[i1]; 380 j = j + tmp0; 381 382 tmp1 = base[j]; 383 i_accum = tmp1; 384 base[j] = (uchar_t)tmp0; 385 386 tmp0 += tmp1; 387 tmp0 = tmp0 & 0xff; 388 389 if (i1 == tmp0) { 390 merge = 391 (unsigned long long)(i_accum) << 56; 392 } else { 393 merge = 394 (unsigned long long)(base[tmp0]) << 395 56; 396 } 397 398 /* BYTE 1 */ 399 tmp0 = base[i1+1]; 400 401 j = j + tmp0; 402 403 if ((jj ^ j) < 2) { 404 base[jj] = (uchar_t)i_accum; 405 406 tmp1 = base[j]; 407 408 base[i1+1] = (uchar_t)tmp1; 409 base[j] = (uchar_t)tmp0; 410 411 tmp0 += tmp1; 412 tmp0 = tmp0 & 0xff; 413 414 merge |= 415 (unsigned long long)(base[tmp0]) << 416 48; 417 } else { 418 419 tmp1 = base[j]; 420 421 i_accum = i_accum << 8; 422 i_accum |= tmp1; 423 424 base[j] = (uchar_t)tmp0; 425 426 tmp0 += tmp1; 427 tmp0 = tmp0 & 0xff; 428 429 /* LINTED E_BAD_PTR_CAST_ALIGN */ 430 *((unsigned short *) &base[jj]) = 431 i_accum; 432 433 merge |= 434 (unsigned long long)(base[tmp0]) << 435 48; 436 } 437 438 /* BYTE 2 */ 439 /* 440 * As know i must be even when enter loop (to 441 * satisfy alignment), can only wrap around 442 * on the even bytes. So just need to perform 443 * mask every 2nd byte 444 */ 445 i1 = (i1 + 2) & 0xff; 446 tmp0 = base[i1]; 447 j = j + tmp0; 448 tmp1 = base[j]; 449 base[i1] = (uchar_t)tmp1; 450 base[j] = (uchar_t)tmp0; 451 tmp0 += tmp1; 452 tmp0 = tmp0 & 0xff; 453 merge |= (unsigned long long)(base[tmp0]) << 40; 454 455 /* BYTE 3 */ 456 tmp0 = base[i1+1]; 457 j = j + tmp0; 458 tmp1 = base[j]; 459 base[i1+1] = (uchar_t)tmp1; 460 base[j] = (uchar_t)tmp0; 461 tmp0 += tmp1; 462 tmp0 = tmp0 & 0xff; 463 merge |= (unsigned long long)(base[tmp0]) << 32; 464 465 /* BYTE 4 */ 466 i1 = (i1 + 2) & 0xff; 467 tmp0 = base[i1]; 468 j = j + tmp0; 469 tmp1 = base[j]; 470 base[i1] = (uchar_t)tmp1; 471 base[j] = (uchar_t)tmp0; 472 tmp0 += tmp1; 473 tmp0 = tmp0 & 0xff; 474 merge |= (unsigned long long)(base[tmp0]) << 24; 475 476 /* BYTE 5 */ 477 tmp0 = base[i1+1]; 478 j = j + tmp0; 479 tmp1 = base[j]; 480 base[i1+1] = (uchar_t)tmp1; 481 base[j] = (uchar_t)tmp0; 482 tmp0 += tmp1; 483 tmp0 = tmp0 & 0xff; 484 merge |= (unsigned long long)(base[tmp0]) << 16; 485 486 /* BYTE 6 */ 487 i1 = (i1+2) &0xff; 488 jj = (uchar_t)i1; 489 tmp0 = base[i1]; 490 491 j = j + tmp0; 492 493 tmp1 = base[j]; 494 i_accum = tmp1; 495 base[j] = (uchar_t)tmp0; 496 497 498 tmp0 += tmp1; 499 tmp0 = tmp0 & 0xff; 500 501 if (i1 == tmp0) { 502 merge |= 503 (unsigned long long)(i_accum) << 8; 504 } else { 505 merge |= 506 (unsigned long long)(base[tmp0]) << 507 8; 508 } 509 510 /* BYTE 7 */ 511 i1++; 512 tmp0 = base[i1]; 513 514 j = j + tmp0; 515 if ((jj ^ j) < 2) { 516 base[jj] = (uchar_t)i_accum; 517 tmp1 = base[j]; 518 519 base[i1] = (uchar_t)tmp1; 520 base[j] = (uchar_t)tmp0; 521 522 tmp0 += tmp1; 523 tmp0 = tmp0 & 0xff; 524 525 merge |= 526 (unsigned long long)(base[tmp0]); 527 528 } else { 529 530 tmp1 = base[j]; 531 532 i_accum = i_accum << 8; 533 i_accum |= tmp1; 534 535 base[j] = (uchar_t)tmp0; 536 537 tmp0 += tmp1; 538 tmp0 = tmp0 & 0xff; 539 540 /* LINTED E_BAD_PTR_CAST_ALIGN */ 541 *((unsigned short *) &base[jj]) = 542 i_accum; 543 544 merge |= 545 (unsigned long long)(base[tmp0]); 546 } 547 } 548 549 /* 550 * Perform update to [out] 551 * Remember could be alignment issues 552 */ 553 /* LINTED E_BAD_PTR_CAST_ALIGN */ 554 in0 = *((unsigned long long *) (&in[ii])); 555 556 merge1 = merge0 | (merge >> shift); 557 558 merge0 = (merge & mask) << 56; 559 560 in0 = in0 ^ merge1; 561 562 /* LINTED E_BAD_PTR_CAST_ALIGN */ 563 *((unsigned long long *) (&out[ii])) = in0; 564 } 565 566 i = (uchar_t)i1; 567 568 /* 569 * Handle any overrun 570 */ 571 if (shift) { 572 out[ii] = in[ii] ^ (merge0 >> 56); 573 ii++; 574 } 575 576 /* 577 * Handle final few bytes 578 */ 579 for (; ii < len; ii++) { 580 i = i + 1; 581 tmp0 = base[i]; 582 j = j + tmp0; 583 tmp1 = base[j]; 584 585 base[i] = (uchar_t)tmp1; 586 base[j] = (uchar_t)tmp0; 587 588 tmp0 += tmp1; 589 tmp0 = tmp0 & 0xff; 590 out[ii] = in[ii] ^ base[tmp0]; 591 } 592 key->i = i; 593 key->j = j; 594 } 595 #endif /* sun4v */ 596 } 597