1#if defined(lint) || defined(__lint)
2
3#include "arcfour.h"
4
5/* ARGSUSED */
6void
7arcfour_crypt_asm(ARCFour_key *key, uchar_t *in, uchar_t *out, size_t len)
8{}
9
10/* ARGSUSED */
11void
12arcfour_key_init(ARCFour_key *key, uchar_t *keyval, int keyvallen)
13{}
14
15#else
16#include <sys/asm_linkage.h>
17
18ENTRY_NP(arcfour_crypt_asm)
19	or	%rcx,%rcx # If (len == 0) return
20	jne	.Lentry
21	ret
22.Lentry:
23	push	%r12
24	push	%r13
25
26	/ Set %rdi to beginning of array, key->arr[0]
27	add	$8,%rdi
28	/ Get key->j
29	movl	-8(%rdi),%r8d
30	/ Get key->i
31	movl	-4(%rdi),%r12d
32
33	/
34	/ Use a 4-byte key schedule element array
35	/
36	inc	%r8b
37	movl	(%rdi,%r8,4),%r9d
38	test	$-8,%rcx
39	jz	.Lloop1
40	jmp	.Lloop8
41
42.align	16
43.Lloop8:
44	add	%r9b,%r12b
45	mov	%r8,%r10
46	movl	(%rdi,%r12,4),%r13d
47	ror	$8,%rax			# ror is redundant when 0=0
48	inc	%r10b
49	movl	(%rdi,%r10,4),%r11d
50	cmp	%r10,%r12
51	movl	%r9d,(%rdi,%r12,4)
52	cmove	%r9,%r11
53	movl	%r13d,(%rdi,%r8,4)
54	add	%r9b,%r13b
55	movb	(%rdi,%r13,4),%al
56	add	%r11b,%r12b
57	mov	%r10,%r8
58	movl	(%rdi,%r12,4),%r13d
59	ror	$8,%rax			# ror is redundant when 1=0
60	inc	%r8b
61	movl	(%rdi,%r8,4),%r9d
62	cmp	%r8,%r12
63	movl	%r11d,(%rdi,%r12,4)
64	cmove	%r11,%r9
65	movl	%r13d,(%rdi,%r10,4)
66	add	%r11b,%r13b
67	movb	(%rdi,%r13,4),%al
68	add	%r9b,%r12b
69	mov	%r8,%r10
70	movl	(%rdi,%r12,4),%r13d
71	ror	$8,%rax			# ror is redundant when 2=0
72	inc	%r10b
73	movl	(%rdi,%r10,4),%r11d
74	cmp	%r10,%r12
75	movl	%r9d,(%rdi,%r12,4)
76	cmove	%r9,%r11
77	movl	%r13d,(%rdi,%r8,4)
78	add	%r9b,%r13b
79	movb	(%rdi,%r13,4),%al
80	add	%r11b,%r12b
81	mov	%r10,%r8
82	movl	(%rdi,%r12,4),%r13d
83	ror	$8,%rax			# ror is redundant when 3=0
84	inc	%r8b
85	movl	(%rdi,%r8,4),%r9d
86	cmp	%r8,%r12
87	movl	%r11d,(%rdi,%r12,4)
88	cmove	%r11,%r9
89	movl	%r13d,(%rdi,%r10,4)
90	add	%r11b,%r13b
91	movb	(%rdi,%r13,4),%al
92	add	%r9b,%r12b
93	mov	%r8,%r10
94	movl	(%rdi,%r12,4),%r13d
95	ror	$8,%rax			# ror is redundant when 4=0
96	inc	%r10b
97	movl	(%rdi,%r10,4),%r11d
98	cmp	%r10,%r12
99	movl	%r9d,(%rdi,%r12,4)
100	cmove	%r9,%r11
101	movl	%r13d,(%rdi,%r8,4)
102	add	%r9b,%r13b
103	movb	(%rdi,%r13,4),%al
104	add	%r11b,%r12b
105	mov	%r10,%r8
106	movl	(%rdi,%r12,4),%r13d
107	ror	$8,%rax			# ror is redundant when 5=0
108	inc	%r8b
109	movl	(%rdi,%r8,4),%r9d
110	cmp	%r8,%r12
111	movl	%r11d,(%rdi,%r12,4)
112	cmove	%r11,%r9
113	movl	%r13d,(%rdi,%r10,4)
114	add	%r11b,%r13b
115	movb	(%rdi,%r13,4),%al
116	add	%r9b,%r12b
117	mov	%r8,%r10
118	movl	(%rdi,%r12,4),%r13d
119	ror	$8,%rax			# ror is redundant when 6=0
120	inc	%r10b
121	movl	(%rdi,%r10,4),%r11d
122	cmp	%r10,%r12
123	movl	%r9d,(%rdi,%r12,4)
124	cmove	%r9,%r11
125	movl	%r13d,(%rdi,%r8,4)
126	add	%r9b,%r13b
127	movb	(%rdi,%r13,4),%al
128	add	%r11b,%r12b
129	mov	%r10,%r8
130	movl	(%rdi,%r12,4),%r13d
131	ror	$8,%rax			# ror is redundant when 7=0
132	inc	%r8b
133	movl	(%rdi,%r8,4),%r9d
134	cmp	%r8,%r12
135	movl	%r11d,(%rdi,%r12,4)
136	cmove	%r11,%r9
137	movl	%r13d,(%rdi,%r10,4)
138	add	%r11b,%r13b
139	movb	(%rdi,%r13,4),%al
140	ror	$8,%rax
141	sub	$8,%rcx
142
143	xor	(%rsi),%rax
144	add	$8,%rsi
145	mov	%rax,(%rdx)
146	add	$8,%rdx
147
148	test	$-8,%rcx
149	jnz	.Lloop8
150	cmp	$0,%rcx
151	jne	.Lloop1
152
153.Lexit:
154	/
155	/ Cleanup and exit code
156	/
157	/ --i to undo ++i done at entry
158	sub	$1,%r8b
159	/ set key->i
160	movl	%r8d,-8(%rdi)
161	/ set key->j
162	movl	%r12d,-4(%rdi)
163
164	pop	%r13
165	pop	%r12
166	ret
167
168.align	16
169.Lloop1:
170	add	%r9b,%r12b
171	movl	(%rdi,%r12,4),%r13d
172	movl	%r9d,(%rdi,%r12,4)
173	movl	%r13d,(%rdi,%r8,4)
174	add	%r13b,%r9b
175	inc	%r8b
176	movl	(%rdi,%r9,4),%r13d
177	movl	(%rdi,%r8,4),%r9d
178	xorb	(%rsi),%r13b
179	inc	%rsi
180	movb	%r13b,(%rdx)
181	inc	%rdx
182	dec	%rcx
183	jnz	.Lloop1
184	jmp	.Lexit
185
186	ret
187SET_SIZE(arcfour_crypt_asm)
188	/ int arcfour_crypt_on_intel(void);
189.extern	arcfour_crypt_on_intel
190
191ENTRY_NP(arcfour_key_init)
192	/ Find out if we're running on Intel or something else (e.g., AMD64).
193	/ This sets %eax to 1 for Intel, otherwise 0.
194	push	%rdi		/ Save arg1
195	push	%rsi		/ Save arg2
196	push	%rdx		/ Save arg3
197	call	arcfour_crypt_on_intel
198	pop	%rdx		/ Restore arg3
199	pop	%rsi		/ Restore arg2
200	pop	%rdi		/ Restore arg1
201	/ Save return value in key->flag (1=Intel, 0=AMD)
202	movl	%eax,1032(%rdi)
203
204	/ Set %rdi to beginning of array, key->arr[0]
205	lea	8(%rdi),%rdi
206	lea	(%rsi,%rdx),%rsi
207	neg	%rdx
208	mov	%rdx,%rcx
209
210	xor	%eax,%eax
211	xor	%r9,%r9
212	xor	%r10,%r10
213	xor	%r11,%r11
214
215	/ Use a 4-byte data array
216	jmp	.Lw1stloop
217
218.align	16
219.Lw1stloop:
220	/ AMD64 (4-byte array)
221	mov	%eax,(%rdi,%rax,4)
222	add	$1,%al
223	jnc	.Lw1stloop
224
225	xor	%r9,%r9
226	xor	%r8,%r8
227
228.align	16
229.Lw2ndloop:
230	mov	(%rdi,%r9,4),%r10d
231	add	(%rsi,%rdx,1),%r8b
232	add	%r10b,%r8b
233	add	$1,%rdx
234	mov	(%rdi,%r8,4),%r11d
235	cmovz	%rcx,%rdx
236	mov	%r10d,(%rdi,%r8,4)
237	mov	%r11d,(%rdi,%r9,4)
238	add	$1,%r9b
239	jnc	.Lw2ndloop
240
241	/ Exit code
242	xor	%eax,%eax
243	mov	%eax,-8(%rdi)
244	mov	%eax,-4(%rdi)
245
246	ret
247SET_SIZE(arcfour_key_init)
248.asciz	"RC4 for x86_64, CRYPTOGAMS by <appro@openssl.org>"
249#endif /* !lint && !__lint */
250