1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 #include "../arcfour.h"
27
28 /* Initialize the key stream 'key' using the key value */
29 void
arcfour_key_init(ARCFour_key * key,uchar_t * keyval,int keyvallen)30 arcfour_key_init(ARCFour_key *key, uchar_t *keyval, int keyvallen)
31 {
32 uchar_t ext_keyval[256];
33 uchar_t tmp;
34 int i, j;
35
36 for (i = j = 0; i < 256; i++, j++) {
37 if (j == keyvallen)
38 j = 0;
39
40 ext_keyval[i] = keyval[j];
41 }
42 for (i = 0; i < 256; i++)
43 key->arr[i] = (uchar_t)i;
44
45 j = 0;
46 for (i = 0; i < 256; i++) {
47 j = (j + key->arr[i] + ext_keyval[i]) % 256;
48 tmp = key->arr[i];
49 key->arr[i] = key->arr[j];
50 key->arr[j] = tmp;
51 }
52 key->i = 0;
53 key->j = 0;
54 }
55
56
57 /*
58 * Encipher 'in' using 'key.
59 * in and out can point to the same location
60 */
61 void
arcfour_crypt(ARCFour_key * key,uchar_t * in,uchar_t * out,size_t len)62 arcfour_crypt(ARCFour_key *key, uchar_t *in, uchar_t *out, size_t len)
63 {
64 size_t ii;
65 unsigned long long in0, merge = 0, merge0 = 0, merge1, mask = 0;
66 uchar_t i, j, *base, jj, *base1, tmp;
67 unsigned int tmp0, tmp1, i_accum, shift = 0, i1;
68
69 int index;
70
71 base = key->arr;
72
73 index = (((uintptr_t)in) & 0x7);
74
75 /* Get the 'in' on an 8-byte alignment */
76 if (index > 0) {
77 i = key->i;
78 j = key->j;
79
80 for (index = 8 - index; (index-- > 0) && len > 0;
81 len--, in++, out++) {
82
83 i = i + 1;
84 j = j + key->arr[i];
85 tmp = key->arr[i];
86 key->arr[i] = key->arr[j];
87 key->arr[j] = tmp;
88 tmp = key->arr[i] + key->arr[j];
89 *out = *in ^ key->arr[tmp];
90 }
91 key->i = i;
92 key->j = j;
93
94 }
95 if (len == 0)
96 return;
97
98 /* See if we're fortunate and 'out' got aligned as well */
99
100
101 /*
102 * Niagara optimized version for
103 * the cases where the input and output buffers are aligned on
104 * a multiple of 8-byte boundary.
105 */
106 #ifdef sun4v
107 if ((((uintptr_t)out) & 7) != 0) {
108 #endif /* sun4v */
109 i = key->i;
110 j = key->j;
111 for (ii = 0; ii < len; ii++) {
112 i = i + 1;
113 tmp0 = base[i];
114 j = j + tmp0;
115 tmp1 = base[j];
116 base[i] = (uchar_t)tmp1;
117 base[j] = (uchar_t)tmp0;
118 tmp0 += tmp1;
119 tmp0 = tmp0 & 0xff;
120 out[ii] = in[ii] ^ base[tmp0];
121 }
122 key->i = i;
123 key->j = j;
124 #ifdef sun4v
125 } else {
126 i = key->i;
127 j = key->j;
128
129 /*
130 * Want to align base[i] on a 2B boundary -- allows updates
131 * via [i] to be performed in 2B chunks (reducing # of stores).
132 * Requires appropriate alias detection.
133 */
134
135 if (((i+1) % 2) != 0) {
136 i = i + 1;
137 tmp0 = base[i];
138 j = j + tmp0;
139 tmp1 = base[j];
140
141 base[i] = (uchar_t)tmp1;
142 base[j] = (uchar_t)tmp0;
143
144 tmp0 += tmp1;
145 tmp0 = tmp0 & 0xff;
146
147 merge0 = (unsigned long long)(base[tmp0]) << 56;
148 shift = 8; mask = 0xff;
149 }
150
151 /*
152 * Note - in and out may now be misaligned -
153 * as updating [out] in 8B chunks need to handle this
154 * possibility. Also could have a 1B overrun.
155 * Need to drop out of loop early as a result.
156 */
157
158 for (ii = 0, i1 = i; ii < ((len-1) & (~7));
159 ii += 8, i1 = i1&0xff) {
160
161 /*
162 * If i < less than 248, know wont wrap around
163 * (i % 256), so don't need to bother with masking i
164 * after each increment
165 */
166 if (i1 < 248) {
167
168 /* BYTE 0 */
169 i1 = (i1 + 1);
170
171 /*
172 * Creating this base pointer reduces subsequent
173 * arihmetic ops required to load [i]
174 *
175 * N.B. don't need to check if [j] aliases.
176 * [i] and [j] end up with the same values
177 * anyway.
178 */
179 base1 = &base[i1];
180
181 tmp0 = base1[0];
182 j = j + tmp0;
183
184 tmp1 = base[j];
185 /*
186 * Don't store [i] yet
187 */
188 i_accum = tmp1;
189 base[j] = (uchar_t)tmp0;
190
191 tmp0 += tmp1;
192 tmp0 = tmp0 & 0xff;
193
194 /*
195 * Check [tmp0] doesn't alias with [i]
196 */
197
198 /*
199 * Updating [out] in 8B chunks
200 */
201 if (i1 == tmp0) {
202 merge =
203 (unsigned long long)(i_accum) << 56;
204 } else {
205 merge =
206 (unsigned long long)(base[tmp0]) <<
207 56;
208 }
209
210 /* BYTE 1 */
211 tmp0 = base1[1];
212
213 j = j + tmp0;
214
215 /*
216 * [j] can now alias with [i] and [i-1]
217 * If alias abort speculation
218 */
219 if ((i1 ^ j) < 2) {
220 base1[0] = (uchar_t)i_accum;
221
222 tmp1 = base[j];
223
224 base1[1] = (uchar_t)tmp1;
225 base[j] = (uchar_t)tmp0;
226
227 tmp0 += tmp1;
228 tmp0 = tmp0 & 0xff;
229
230 merge |= (unsigned long long)
231 (base[tmp0]) << 48;
232 } else {
233
234 tmp1 = base[j];
235
236 i_accum = i_accum << 8;
237 i_accum |= tmp1;
238
239 base[j] = (uchar_t)tmp0;
240
241 tmp0 += tmp1;
242 tmp0 = tmp0 & 0xff;
243
244 /*
245 * Speculation suceeded! Update [i]
246 * in 2B chunk
247 */
248 /* LINTED E_BAD_PTR_CAST_ALIGN */
249 *((unsigned short *) &base[i1]) =
250 i_accum;
251
252 merge |=
253 (unsigned long long)(base[tmp0]) <<
254 48;
255 }
256
257
258 /*
259 * Too expensive to perform [i] speculation for
260 * every byte. Just need to reduce frequency
261 * of stores until store buffer full stalls
262 * are not the bottleneck.
263 */
264
265 /* BYTE 2 */
266 tmp0 = base1[2];
267 j = j + tmp0;
268 tmp1 = base[j];
269 base1[2] = (uchar_t)tmp1;
270 base[j] = (uchar_t)tmp0;
271 tmp1 += tmp0;
272 tmp1 = tmp1 & 0xff;
273 merge |= (unsigned long long)(base[tmp1]) << 40;
274
275 /* BYTE 3 */
276 tmp0 = base1[3];
277 j = j + tmp0;
278 tmp1 = base[j];
279 base1[3] = (uchar_t)tmp1;
280 base[j] = (uchar_t)tmp0;
281 tmp0 += tmp1;
282 tmp0 = tmp0 & 0xff;
283 merge |= (unsigned long long)(base[tmp0]) << 32;
284
285 /* BYTE 4 */
286 tmp0 = base1[4];
287 j = j + tmp0;
288 tmp1 = base[j];
289 base1[4] = (uchar_t)tmp1;
290 base[j] = (uchar_t)tmp0;
291 tmp0 += tmp1;
292 tmp0 = tmp0 & 0xff;
293 merge |= (unsigned long long)(base[tmp0]) << 24;
294
295 /* BYTE 5 */
296 tmp0 = base1[5];
297 j = j + tmp0;
298 tmp1 = base[j];
299 base1[5] = (uchar_t)tmp1;
300 base[j] = (uchar_t)tmp0;
301 tmp0 += tmp1;
302 tmp0 = tmp0 & 0xff;
303 merge |= (unsigned long long)(base[tmp0]) << 16;
304
305 /* BYTE 6 */
306 i1 = (i1+6);
307 tmp0 = base1[6];
308 j = j + tmp0;
309 tmp1 = base[j];
310 i_accum = tmp1;
311 base[j] = (uchar_t)tmp0;
312
313 tmp0 += tmp1;
314 tmp0 = tmp0 & 0xff;
315
316 if (i1 == tmp0) {
317 merge |=
318 (unsigned long long)(i_accum) << 8;
319 } else {
320 merge |=
321 (unsigned long long)(base[tmp0]) <<
322 8;
323 }
324
325 /* BYTE 7 */
326 tmp0 = base1[7];
327
328 /*
329 * Perform [i] speculation again. Indentical
330 * to that performed for BYTE0 and BYTE1.
331 */
332 j = j + tmp0;
333 if ((i1 ^ j) < 2) {
334 base1[6] = (uchar_t)i_accum;
335 tmp1 = base[j];
336
337 base1[7] = (uchar_t)tmp1;
338 base[j] = (uchar_t)tmp0;
339
340 tmp0 += tmp1;
341 tmp0 = tmp0 & 0xff;
342
343 merge |=
344 (unsigned long long)(base[tmp0]);
345
346 } else {
347 tmp1 = base[j];
348
349 i_accum = i_accum << 8;
350 i_accum |= tmp1;
351
352 base[j] = (uchar_t)tmp0;
353
354 tmp0 += tmp1;
355 tmp0 = tmp0 & 0xff;
356
357 /* LINTED E_BAD_PTR_CAST_ALIGN */
358 *((unsigned short *) &base[i1]) =
359 i_accum;
360
361 merge |=
362 (unsigned long long)(base[tmp0]);
363 }
364 i1++;
365 } else {
366 /*
367 * i is too close to wrap-around to allow
368 * masking to be disregarded
369 */
370
371 /*
372 * Same old speculation for BYTE 0 and BYTE 1
373 */
374
375 /* BYTE 0 */
376 i1 = (i1 + 1) & 0xff;
377 jj = (uchar_t)i1;
378
379 tmp0 = base[i1];
380 j = j + tmp0;
381
382 tmp1 = base[j];
383 i_accum = tmp1;
384 base[j] = (uchar_t)tmp0;
385
386 tmp0 += tmp1;
387 tmp0 = tmp0 & 0xff;
388
389 if (i1 == tmp0) {
390 merge =
391 (unsigned long long)(i_accum) << 56;
392 } else {
393 merge =
394 (unsigned long long)(base[tmp0]) <<
395 56;
396 }
397
398 /* BYTE 1 */
399 tmp0 = base[i1+1];
400
401 j = j + tmp0;
402
403 if ((jj ^ j) < 2) {
404 base[jj] = (uchar_t)i_accum;
405
406 tmp1 = base[j];
407
408 base[i1+1] = (uchar_t)tmp1;
409 base[j] = (uchar_t)tmp0;
410
411 tmp0 += tmp1;
412 tmp0 = tmp0 & 0xff;
413
414 merge |=
415 (unsigned long long)(base[tmp0]) <<
416 48;
417 } else {
418
419 tmp1 = base[j];
420
421 i_accum = i_accum << 8;
422 i_accum |= tmp1;
423
424 base[j] = (uchar_t)tmp0;
425
426 tmp0 += tmp1;
427 tmp0 = tmp0 & 0xff;
428
429 /* LINTED E_BAD_PTR_CAST_ALIGN */
430 *((unsigned short *) &base[jj]) =
431 i_accum;
432
433 merge |=
434 (unsigned long long)(base[tmp0]) <<
435 48;
436 }
437
438 /* BYTE 2 */
439 /*
440 * As know i must be even when enter loop (to
441 * satisfy alignment), can only wrap around
442 * on the even bytes. So just need to perform
443 * mask every 2nd byte
444 */
445 i1 = (i1 + 2) & 0xff;
446 tmp0 = base[i1];
447 j = j + tmp0;
448 tmp1 = base[j];
449 base[i1] = (uchar_t)tmp1;
450 base[j] = (uchar_t)tmp0;
451 tmp0 += tmp1;
452 tmp0 = tmp0 & 0xff;
453 merge |= (unsigned long long)(base[tmp0]) << 40;
454
455 /* BYTE 3 */
456 tmp0 = base[i1+1];
457 j = j + tmp0;
458 tmp1 = base[j];
459 base[i1+1] = (uchar_t)tmp1;
460 base[j] = (uchar_t)tmp0;
461 tmp0 += tmp1;
462 tmp0 = tmp0 & 0xff;
463 merge |= (unsigned long long)(base[tmp0]) << 32;
464
465 /* BYTE 4 */
466 i1 = (i1 + 2) & 0xff;
467 tmp0 = base[i1];
468 j = j + tmp0;
469 tmp1 = base[j];
470 base[i1] = (uchar_t)tmp1;
471 base[j] = (uchar_t)tmp0;
472 tmp0 += tmp1;
473 tmp0 = tmp0 & 0xff;
474 merge |= (unsigned long long)(base[tmp0]) << 24;
475
476 /* BYTE 5 */
477 tmp0 = base[i1+1];
478 j = j + tmp0;
479 tmp1 = base[j];
480 base[i1+1] = (uchar_t)tmp1;
481 base[j] = (uchar_t)tmp0;
482 tmp0 += tmp1;
483 tmp0 = tmp0 & 0xff;
484 merge |= (unsigned long long)(base[tmp0]) << 16;
485
486 /* BYTE 6 */
487 i1 = (i1+2) &0xff;
488 jj = (uchar_t)i1;
489 tmp0 = base[i1];
490
491 j = j + tmp0;
492
493 tmp1 = base[j];
494 i_accum = tmp1;
495 base[j] = (uchar_t)tmp0;
496
497
498 tmp0 += tmp1;
499 tmp0 = tmp0 & 0xff;
500
501 if (i1 == tmp0) {
502 merge |=
503 (unsigned long long)(i_accum) << 8;
504 } else {
505 merge |=
506 (unsigned long long)(base[tmp0]) <<
507 8;
508 }
509
510 /* BYTE 7 */
511 i1++;
512 tmp0 = base[i1];
513
514 j = j + tmp0;
515 if ((jj ^ j) < 2) {
516 base[jj] = (uchar_t)i_accum;
517 tmp1 = base[j];
518
519 base[i1] = (uchar_t)tmp1;
520 base[j] = (uchar_t)tmp0;
521
522 tmp0 += tmp1;
523 tmp0 = tmp0 & 0xff;
524
525 merge |=
526 (unsigned long long)(base[tmp0]);
527
528 } else {
529
530 tmp1 = base[j];
531
532 i_accum = i_accum << 8;
533 i_accum |= tmp1;
534
535 base[j] = (uchar_t)tmp0;
536
537 tmp0 += tmp1;
538 tmp0 = tmp0 & 0xff;
539
540 /* LINTED E_BAD_PTR_CAST_ALIGN */
541 *((unsigned short *) &base[jj]) =
542 i_accum;
543
544 merge |=
545 (unsigned long long)(base[tmp0]);
546 }
547 }
548
549 /*
550 * Perform update to [out]
551 * Remember could be alignment issues
552 */
553 /* LINTED E_BAD_PTR_CAST_ALIGN */
554 in0 = *((unsigned long long *) (&in[ii]));
555
556 merge1 = merge0 | (merge >> shift);
557
558 merge0 = (merge & mask) << 56;
559
560 in0 = in0 ^ merge1;
561
562 /* LINTED E_BAD_PTR_CAST_ALIGN */
563 *((unsigned long long *) (&out[ii])) = in0;
564 }
565
566 i = (uchar_t)i1;
567
568 /*
569 * Handle any overrun
570 */
571 if (shift) {
572 out[ii] = in[ii] ^ (merge0 >> 56);
573 ii++;
574 }
575
576 /*
577 * Handle final few bytes
578 */
579 for (; ii < len; ii++) {
580 i = i + 1;
581 tmp0 = base[i];
582 j = j + tmp0;
583 tmp1 = base[j];
584
585 base[i] = (uchar_t)tmp1;
586 base[j] = (uchar_t)tmp0;
587
588 tmp0 += tmp1;
589 tmp0 = tmp0 & 0xff;
590 out[ii] = in[ii] ^ base[tmp0];
591 }
592 key->i = i;
593 key->j = j;
594 }
595 #endif /* sun4v */
596 }
597