xref: /freebsd/sys/crypto/openssl/i386/chacha-x86.S (revision c0855eaa3ee9614804b6bd6a255aa9f71e095f43)
1bc3d5698SJohn Baldwin/* Do not modify. This file is auto-generated from chacha-x86.pl. */
2bc3d5698SJohn Baldwin#ifdef PIC
3bc3d5698SJohn Baldwin.text
4bc3d5698SJohn Baldwin.globl	ChaCha20_ctr32
5bc3d5698SJohn Baldwin.type	ChaCha20_ctr32,@function
6bc3d5698SJohn Baldwin.align	16
7bc3d5698SJohn BaldwinChaCha20_ctr32:
8bc3d5698SJohn Baldwin.L_ChaCha20_ctr32_begin:
9*c0855eaaSJohn Baldwin	#ifdef __CET__
10*c0855eaaSJohn Baldwin
11*c0855eaaSJohn Baldwin.byte	243,15,30,251
12*c0855eaaSJohn Baldwin	#endif
13*c0855eaaSJohn Baldwin
14bc3d5698SJohn Baldwin	pushl	%ebp
15bc3d5698SJohn Baldwin	pushl	%ebx
16bc3d5698SJohn Baldwin	pushl	%esi
17bc3d5698SJohn Baldwin	pushl	%edi
18bc3d5698SJohn Baldwin	xorl	%eax,%eax
19bc3d5698SJohn Baldwin	cmpl	28(%esp),%eax
20bc3d5698SJohn Baldwin	je	.L000no_data
21bc3d5698SJohn Baldwin	call	.Lpic_point
22bc3d5698SJohn Baldwin.Lpic_point:
23bc3d5698SJohn Baldwin	popl	%eax
24bc3d5698SJohn Baldwin	leal	OPENSSL_ia32cap_P-.Lpic_point(%eax),%ebp
25bc3d5698SJohn Baldwin	testl	$16777216,(%ebp)
26bc3d5698SJohn Baldwin	jz	.L001x86
27bc3d5698SJohn Baldwin	testl	$512,4(%ebp)
28bc3d5698SJohn Baldwin	jz	.L001x86
29bc3d5698SJohn Baldwin	jmp	.Lssse3_shortcut
30bc3d5698SJohn Baldwin.L001x86:
31bc3d5698SJohn Baldwin	movl	32(%esp),%esi
32bc3d5698SJohn Baldwin	movl	36(%esp),%edi
33bc3d5698SJohn Baldwin	subl	$132,%esp
34bc3d5698SJohn Baldwin	movl	(%esi),%eax
35bc3d5698SJohn Baldwin	movl	4(%esi),%ebx
36bc3d5698SJohn Baldwin	movl	8(%esi),%ecx
37bc3d5698SJohn Baldwin	movl	12(%esi),%edx
38bc3d5698SJohn Baldwin	movl	%eax,80(%esp)
39bc3d5698SJohn Baldwin	movl	%ebx,84(%esp)
40bc3d5698SJohn Baldwin	movl	%ecx,88(%esp)
41bc3d5698SJohn Baldwin	movl	%edx,92(%esp)
42bc3d5698SJohn Baldwin	movl	16(%esi),%eax
43bc3d5698SJohn Baldwin	movl	20(%esi),%ebx
44bc3d5698SJohn Baldwin	movl	24(%esi),%ecx
45bc3d5698SJohn Baldwin	movl	28(%esi),%edx
46bc3d5698SJohn Baldwin	movl	%eax,96(%esp)
47bc3d5698SJohn Baldwin	movl	%ebx,100(%esp)
48bc3d5698SJohn Baldwin	movl	%ecx,104(%esp)
49bc3d5698SJohn Baldwin	movl	%edx,108(%esp)
50bc3d5698SJohn Baldwin	movl	(%edi),%eax
51bc3d5698SJohn Baldwin	movl	4(%edi),%ebx
52bc3d5698SJohn Baldwin	movl	8(%edi),%ecx
53bc3d5698SJohn Baldwin	movl	12(%edi),%edx
54bc3d5698SJohn Baldwin	subl	$1,%eax
55bc3d5698SJohn Baldwin	movl	%eax,112(%esp)
56bc3d5698SJohn Baldwin	movl	%ebx,116(%esp)
57bc3d5698SJohn Baldwin	movl	%ecx,120(%esp)
58bc3d5698SJohn Baldwin	movl	%edx,124(%esp)
59bc3d5698SJohn Baldwin	jmp	.L002entry
60bc3d5698SJohn Baldwin.align	16
61bc3d5698SJohn Baldwin.L003outer_loop:
62bc3d5698SJohn Baldwin	movl	%ebx,156(%esp)
63bc3d5698SJohn Baldwin	movl	%eax,152(%esp)
64bc3d5698SJohn Baldwin	movl	%ecx,160(%esp)
65bc3d5698SJohn Baldwin.L002entry:
66bc3d5698SJohn Baldwin	movl	$1634760805,%eax
67bc3d5698SJohn Baldwin	movl	$857760878,4(%esp)
68bc3d5698SJohn Baldwin	movl	$2036477234,8(%esp)
69bc3d5698SJohn Baldwin	movl	$1797285236,12(%esp)
70bc3d5698SJohn Baldwin	movl	84(%esp),%ebx
71bc3d5698SJohn Baldwin	movl	88(%esp),%ebp
72bc3d5698SJohn Baldwin	movl	104(%esp),%ecx
73bc3d5698SJohn Baldwin	movl	108(%esp),%esi
74bc3d5698SJohn Baldwin	movl	116(%esp),%edx
75bc3d5698SJohn Baldwin	movl	120(%esp),%edi
76bc3d5698SJohn Baldwin	movl	%ebx,20(%esp)
77bc3d5698SJohn Baldwin	movl	%ebp,24(%esp)
78bc3d5698SJohn Baldwin	movl	%ecx,40(%esp)
79bc3d5698SJohn Baldwin	movl	%esi,44(%esp)
80bc3d5698SJohn Baldwin	movl	%edx,52(%esp)
81bc3d5698SJohn Baldwin	movl	%edi,56(%esp)
82bc3d5698SJohn Baldwin	movl	92(%esp),%ebx
83bc3d5698SJohn Baldwin	movl	124(%esp),%edi
84bc3d5698SJohn Baldwin	movl	112(%esp),%edx
85bc3d5698SJohn Baldwin	movl	80(%esp),%ebp
86bc3d5698SJohn Baldwin	movl	96(%esp),%ecx
87bc3d5698SJohn Baldwin	movl	100(%esp),%esi
88bc3d5698SJohn Baldwin	addl	$1,%edx
89bc3d5698SJohn Baldwin	movl	%ebx,28(%esp)
90bc3d5698SJohn Baldwin	movl	%edi,60(%esp)
91bc3d5698SJohn Baldwin	movl	%edx,112(%esp)
92bc3d5698SJohn Baldwin	movl	$10,%ebx
93bc3d5698SJohn Baldwin	jmp	.L004loop
94bc3d5698SJohn Baldwin.align	16
95bc3d5698SJohn Baldwin.L004loop:
96bc3d5698SJohn Baldwin	addl	%ebp,%eax
97bc3d5698SJohn Baldwin	movl	%ebx,128(%esp)
98bc3d5698SJohn Baldwin	movl	%ebp,%ebx
99bc3d5698SJohn Baldwin	xorl	%eax,%edx
100bc3d5698SJohn Baldwin	roll	$16,%edx
101bc3d5698SJohn Baldwin	addl	%edx,%ecx
102bc3d5698SJohn Baldwin	xorl	%ecx,%ebx
103bc3d5698SJohn Baldwin	movl	52(%esp),%edi
104bc3d5698SJohn Baldwin	roll	$12,%ebx
105bc3d5698SJohn Baldwin	movl	20(%esp),%ebp
106bc3d5698SJohn Baldwin	addl	%ebx,%eax
107bc3d5698SJohn Baldwin	xorl	%eax,%edx
108bc3d5698SJohn Baldwin	movl	%eax,(%esp)
109bc3d5698SJohn Baldwin	roll	$8,%edx
110bc3d5698SJohn Baldwin	movl	4(%esp),%eax
111bc3d5698SJohn Baldwin	addl	%edx,%ecx
112bc3d5698SJohn Baldwin	movl	%edx,48(%esp)
113bc3d5698SJohn Baldwin	xorl	%ecx,%ebx
114bc3d5698SJohn Baldwin	addl	%ebp,%eax
115bc3d5698SJohn Baldwin	roll	$7,%ebx
116bc3d5698SJohn Baldwin	xorl	%eax,%edi
117bc3d5698SJohn Baldwin	movl	%ecx,32(%esp)
118bc3d5698SJohn Baldwin	roll	$16,%edi
119bc3d5698SJohn Baldwin	movl	%ebx,16(%esp)
120bc3d5698SJohn Baldwin	addl	%edi,%esi
121bc3d5698SJohn Baldwin	movl	40(%esp),%ecx
122bc3d5698SJohn Baldwin	xorl	%esi,%ebp
123bc3d5698SJohn Baldwin	movl	56(%esp),%edx
124bc3d5698SJohn Baldwin	roll	$12,%ebp
125bc3d5698SJohn Baldwin	movl	24(%esp),%ebx
126bc3d5698SJohn Baldwin	addl	%ebp,%eax
127bc3d5698SJohn Baldwin	xorl	%eax,%edi
128bc3d5698SJohn Baldwin	movl	%eax,4(%esp)
129bc3d5698SJohn Baldwin	roll	$8,%edi
130bc3d5698SJohn Baldwin	movl	8(%esp),%eax
131bc3d5698SJohn Baldwin	addl	%edi,%esi
132bc3d5698SJohn Baldwin	movl	%edi,52(%esp)
133bc3d5698SJohn Baldwin	xorl	%esi,%ebp
134bc3d5698SJohn Baldwin	addl	%ebx,%eax
135bc3d5698SJohn Baldwin	roll	$7,%ebp
136bc3d5698SJohn Baldwin	xorl	%eax,%edx
137bc3d5698SJohn Baldwin	movl	%esi,36(%esp)
138bc3d5698SJohn Baldwin	roll	$16,%edx
139bc3d5698SJohn Baldwin	movl	%ebp,20(%esp)
140bc3d5698SJohn Baldwin	addl	%edx,%ecx
141bc3d5698SJohn Baldwin	movl	44(%esp),%esi
142bc3d5698SJohn Baldwin	xorl	%ecx,%ebx
143bc3d5698SJohn Baldwin	movl	60(%esp),%edi
144bc3d5698SJohn Baldwin	roll	$12,%ebx
145bc3d5698SJohn Baldwin	movl	28(%esp),%ebp
146bc3d5698SJohn Baldwin	addl	%ebx,%eax
147bc3d5698SJohn Baldwin	xorl	%eax,%edx
148bc3d5698SJohn Baldwin	movl	%eax,8(%esp)
149bc3d5698SJohn Baldwin	roll	$8,%edx
150bc3d5698SJohn Baldwin	movl	12(%esp),%eax
151bc3d5698SJohn Baldwin	addl	%edx,%ecx
152bc3d5698SJohn Baldwin	movl	%edx,56(%esp)
153bc3d5698SJohn Baldwin	xorl	%ecx,%ebx
154bc3d5698SJohn Baldwin	addl	%ebp,%eax
155bc3d5698SJohn Baldwin	roll	$7,%ebx
156bc3d5698SJohn Baldwin	xorl	%eax,%edi
157bc3d5698SJohn Baldwin	roll	$16,%edi
158bc3d5698SJohn Baldwin	movl	%ebx,24(%esp)
159bc3d5698SJohn Baldwin	addl	%edi,%esi
160bc3d5698SJohn Baldwin	xorl	%esi,%ebp
161bc3d5698SJohn Baldwin	roll	$12,%ebp
162bc3d5698SJohn Baldwin	movl	20(%esp),%ebx
163bc3d5698SJohn Baldwin	addl	%ebp,%eax
164bc3d5698SJohn Baldwin	xorl	%eax,%edi
165bc3d5698SJohn Baldwin	movl	%eax,12(%esp)
166bc3d5698SJohn Baldwin	roll	$8,%edi
167bc3d5698SJohn Baldwin	movl	(%esp),%eax
168bc3d5698SJohn Baldwin	addl	%edi,%esi
169bc3d5698SJohn Baldwin	movl	%edi,%edx
170bc3d5698SJohn Baldwin	xorl	%esi,%ebp
171bc3d5698SJohn Baldwin	addl	%ebx,%eax
172bc3d5698SJohn Baldwin	roll	$7,%ebp
173bc3d5698SJohn Baldwin	xorl	%eax,%edx
174bc3d5698SJohn Baldwin	roll	$16,%edx
175bc3d5698SJohn Baldwin	movl	%ebp,28(%esp)
176bc3d5698SJohn Baldwin	addl	%edx,%ecx
177bc3d5698SJohn Baldwin	xorl	%ecx,%ebx
178bc3d5698SJohn Baldwin	movl	48(%esp),%edi
179bc3d5698SJohn Baldwin	roll	$12,%ebx
180bc3d5698SJohn Baldwin	movl	24(%esp),%ebp
181bc3d5698SJohn Baldwin	addl	%ebx,%eax
182bc3d5698SJohn Baldwin	xorl	%eax,%edx
183bc3d5698SJohn Baldwin	movl	%eax,(%esp)
184bc3d5698SJohn Baldwin	roll	$8,%edx
185bc3d5698SJohn Baldwin	movl	4(%esp),%eax
186bc3d5698SJohn Baldwin	addl	%edx,%ecx
187bc3d5698SJohn Baldwin	movl	%edx,60(%esp)
188bc3d5698SJohn Baldwin	xorl	%ecx,%ebx
189bc3d5698SJohn Baldwin	addl	%ebp,%eax
190bc3d5698SJohn Baldwin	roll	$7,%ebx
191bc3d5698SJohn Baldwin	xorl	%eax,%edi
192bc3d5698SJohn Baldwin	movl	%ecx,40(%esp)
193bc3d5698SJohn Baldwin	roll	$16,%edi
194bc3d5698SJohn Baldwin	movl	%ebx,20(%esp)
195bc3d5698SJohn Baldwin	addl	%edi,%esi
196bc3d5698SJohn Baldwin	movl	32(%esp),%ecx
197bc3d5698SJohn Baldwin	xorl	%esi,%ebp
198bc3d5698SJohn Baldwin	movl	52(%esp),%edx
199bc3d5698SJohn Baldwin	roll	$12,%ebp
200bc3d5698SJohn Baldwin	movl	28(%esp),%ebx
201bc3d5698SJohn Baldwin	addl	%ebp,%eax
202bc3d5698SJohn Baldwin	xorl	%eax,%edi
203bc3d5698SJohn Baldwin	movl	%eax,4(%esp)
204bc3d5698SJohn Baldwin	roll	$8,%edi
205bc3d5698SJohn Baldwin	movl	8(%esp),%eax
206bc3d5698SJohn Baldwin	addl	%edi,%esi
207bc3d5698SJohn Baldwin	movl	%edi,48(%esp)
208bc3d5698SJohn Baldwin	xorl	%esi,%ebp
209bc3d5698SJohn Baldwin	addl	%ebx,%eax
210bc3d5698SJohn Baldwin	roll	$7,%ebp
211bc3d5698SJohn Baldwin	xorl	%eax,%edx
212bc3d5698SJohn Baldwin	movl	%esi,44(%esp)
213bc3d5698SJohn Baldwin	roll	$16,%edx
214bc3d5698SJohn Baldwin	movl	%ebp,24(%esp)
215bc3d5698SJohn Baldwin	addl	%edx,%ecx
216bc3d5698SJohn Baldwin	movl	36(%esp),%esi
217bc3d5698SJohn Baldwin	xorl	%ecx,%ebx
218bc3d5698SJohn Baldwin	movl	56(%esp),%edi
219bc3d5698SJohn Baldwin	roll	$12,%ebx
220bc3d5698SJohn Baldwin	movl	16(%esp),%ebp
221bc3d5698SJohn Baldwin	addl	%ebx,%eax
222bc3d5698SJohn Baldwin	xorl	%eax,%edx
223bc3d5698SJohn Baldwin	movl	%eax,8(%esp)
224bc3d5698SJohn Baldwin	roll	$8,%edx
225bc3d5698SJohn Baldwin	movl	12(%esp),%eax
226bc3d5698SJohn Baldwin	addl	%edx,%ecx
227bc3d5698SJohn Baldwin	movl	%edx,52(%esp)
228bc3d5698SJohn Baldwin	xorl	%ecx,%ebx
229bc3d5698SJohn Baldwin	addl	%ebp,%eax
230bc3d5698SJohn Baldwin	roll	$7,%ebx
231bc3d5698SJohn Baldwin	xorl	%eax,%edi
232bc3d5698SJohn Baldwin	roll	$16,%edi
233bc3d5698SJohn Baldwin	movl	%ebx,28(%esp)
234bc3d5698SJohn Baldwin	addl	%edi,%esi
235bc3d5698SJohn Baldwin	xorl	%esi,%ebp
236bc3d5698SJohn Baldwin	movl	48(%esp),%edx
237bc3d5698SJohn Baldwin	roll	$12,%ebp
238bc3d5698SJohn Baldwin	movl	128(%esp),%ebx
239bc3d5698SJohn Baldwin	addl	%ebp,%eax
240bc3d5698SJohn Baldwin	xorl	%eax,%edi
241bc3d5698SJohn Baldwin	movl	%eax,12(%esp)
242bc3d5698SJohn Baldwin	roll	$8,%edi
243bc3d5698SJohn Baldwin	movl	(%esp),%eax
244bc3d5698SJohn Baldwin	addl	%edi,%esi
245bc3d5698SJohn Baldwin	movl	%edi,56(%esp)
246bc3d5698SJohn Baldwin	xorl	%esi,%ebp
247bc3d5698SJohn Baldwin	roll	$7,%ebp
248bc3d5698SJohn Baldwin	decl	%ebx
249bc3d5698SJohn Baldwin	jnz	.L004loop
250bc3d5698SJohn Baldwin	movl	160(%esp),%ebx
251bc3d5698SJohn Baldwin	addl	$1634760805,%eax
252bc3d5698SJohn Baldwin	addl	80(%esp),%ebp
253bc3d5698SJohn Baldwin	addl	96(%esp),%ecx
254bc3d5698SJohn Baldwin	addl	100(%esp),%esi
255bc3d5698SJohn Baldwin	cmpl	$64,%ebx
256bc3d5698SJohn Baldwin	jb	.L005tail
257bc3d5698SJohn Baldwin	movl	156(%esp),%ebx
258bc3d5698SJohn Baldwin	addl	112(%esp),%edx
259bc3d5698SJohn Baldwin	addl	120(%esp),%edi
260bc3d5698SJohn Baldwin	xorl	(%ebx),%eax
261bc3d5698SJohn Baldwin	xorl	16(%ebx),%ebp
262bc3d5698SJohn Baldwin	movl	%eax,(%esp)
263bc3d5698SJohn Baldwin	movl	152(%esp),%eax
264bc3d5698SJohn Baldwin	xorl	32(%ebx),%ecx
265bc3d5698SJohn Baldwin	xorl	36(%ebx),%esi
266bc3d5698SJohn Baldwin	xorl	48(%ebx),%edx
267bc3d5698SJohn Baldwin	xorl	56(%ebx),%edi
268bc3d5698SJohn Baldwin	movl	%ebp,16(%eax)
269bc3d5698SJohn Baldwin	movl	%ecx,32(%eax)
270bc3d5698SJohn Baldwin	movl	%esi,36(%eax)
271bc3d5698SJohn Baldwin	movl	%edx,48(%eax)
272bc3d5698SJohn Baldwin	movl	%edi,56(%eax)
273bc3d5698SJohn Baldwin	movl	4(%esp),%ebp
274bc3d5698SJohn Baldwin	movl	8(%esp),%ecx
275bc3d5698SJohn Baldwin	movl	12(%esp),%esi
276bc3d5698SJohn Baldwin	movl	20(%esp),%edx
277bc3d5698SJohn Baldwin	movl	24(%esp),%edi
278bc3d5698SJohn Baldwin	addl	$857760878,%ebp
279bc3d5698SJohn Baldwin	addl	$2036477234,%ecx
280bc3d5698SJohn Baldwin	addl	$1797285236,%esi
281bc3d5698SJohn Baldwin	addl	84(%esp),%edx
282bc3d5698SJohn Baldwin	addl	88(%esp),%edi
283bc3d5698SJohn Baldwin	xorl	4(%ebx),%ebp
284bc3d5698SJohn Baldwin	xorl	8(%ebx),%ecx
285bc3d5698SJohn Baldwin	xorl	12(%ebx),%esi
286bc3d5698SJohn Baldwin	xorl	20(%ebx),%edx
287bc3d5698SJohn Baldwin	xorl	24(%ebx),%edi
288bc3d5698SJohn Baldwin	movl	%ebp,4(%eax)
289bc3d5698SJohn Baldwin	movl	%ecx,8(%eax)
290bc3d5698SJohn Baldwin	movl	%esi,12(%eax)
291bc3d5698SJohn Baldwin	movl	%edx,20(%eax)
292bc3d5698SJohn Baldwin	movl	%edi,24(%eax)
293bc3d5698SJohn Baldwin	movl	28(%esp),%ebp
294bc3d5698SJohn Baldwin	movl	40(%esp),%ecx
295bc3d5698SJohn Baldwin	movl	44(%esp),%esi
296bc3d5698SJohn Baldwin	movl	52(%esp),%edx
297bc3d5698SJohn Baldwin	movl	60(%esp),%edi
298bc3d5698SJohn Baldwin	addl	92(%esp),%ebp
299bc3d5698SJohn Baldwin	addl	104(%esp),%ecx
300bc3d5698SJohn Baldwin	addl	108(%esp),%esi
301bc3d5698SJohn Baldwin	addl	116(%esp),%edx
302bc3d5698SJohn Baldwin	addl	124(%esp),%edi
303bc3d5698SJohn Baldwin	xorl	28(%ebx),%ebp
304bc3d5698SJohn Baldwin	xorl	40(%ebx),%ecx
305bc3d5698SJohn Baldwin	xorl	44(%ebx),%esi
306bc3d5698SJohn Baldwin	xorl	52(%ebx),%edx
307bc3d5698SJohn Baldwin	xorl	60(%ebx),%edi
308bc3d5698SJohn Baldwin	leal	64(%ebx),%ebx
309bc3d5698SJohn Baldwin	movl	%ebp,28(%eax)
310bc3d5698SJohn Baldwin	movl	(%esp),%ebp
311bc3d5698SJohn Baldwin	movl	%ecx,40(%eax)
312bc3d5698SJohn Baldwin	movl	160(%esp),%ecx
313bc3d5698SJohn Baldwin	movl	%esi,44(%eax)
314bc3d5698SJohn Baldwin	movl	%edx,52(%eax)
315bc3d5698SJohn Baldwin	movl	%edi,60(%eax)
316bc3d5698SJohn Baldwin	movl	%ebp,(%eax)
317bc3d5698SJohn Baldwin	leal	64(%eax),%eax
318bc3d5698SJohn Baldwin	subl	$64,%ecx
319bc3d5698SJohn Baldwin	jnz	.L003outer_loop
320bc3d5698SJohn Baldwin	jmp	.L006done
321bc3d5698SJohn Baldwin.L005tail:
322bc3d5698SJohn Baldwin	addl	112(%esp),%edx
323bc3d5698SJohn Baldwin	addl	120(%esp),%edi
324bc3d5698SJohn Baldwin	movl	%eax,(%esp)
325bc3d5698SJohn Baldwin	movl	%ebp,16(%esp)
326bc3d5698SJohn Baldwin	movl	%ecx,32(%esp)
327bc3d5698SJohn Baldwin	movl	%esi,36(%esp)
328bc3d5698SJohn Baldwin	movl	%edx,48(%esp)
329bc3d5698SJohn Baldwin	movl	%edi,56(%esp)
330bc3d5698SJohn Baldwin	movl	4(%esp),%ebp
331bc3d5698SJohn Baldwin	movl	8(%esp),%ecx
332bc3d5698SJohn Baldwin	movl	12(%esp),%esi
333bc3d5698SJohn Baldwin	movl	20(%esp),%edx
334bc3d5698SJohn Baldwin	movl	24(%esp),%edi
335bc3d5698SJohn Baldwin	addl	$857760878,%ebp
336bc3d5698SJohn Baldwin	addl	$2036477234,%ecx
337bc3d5698SJohn Baldwin	addl	$1797285236,%esi
338bc3d5698SJohn Baldwin	addl	84(%esp),%edx
339bc3d5698SJohn Baldwin	addl	88(%esp),%edi
340bc3d5698SJohn Baldwin	movl	%ebp,4(%esp)
341bc3d5698SJohn Baldwin	movl	%ecx,8(%esp)
342bc3d5698SJohn Baldwin	movl	%esi,12(%esp)
343bc3d5698SJohn Baldwin	movl	%edx,20(%esp)
344bc3d5698SJohn Baldwin	movl	%edi,24(%esp)
345bc3d5698SJohn Baldwin	movl	28(%esp),%ebp
346bc3d5698SJohn Baldwin	movl	40(%esp),%ecx
347bc3d5698SJohn Baldwin	movl	44(%esp),%esi
348bc3d5698SJohn Baldwin	movl	52(%esp),%edx
349bc3d5698SJohn Baldwin	movl	60(%esp),%edi
350bc3d5698SJohn Baldwin	addl	92(%esp),%ebp
351bc3d5698SJohn Baldwin	addl	104(%esp),%ecx
352bc3d5698SJohn Baldwin	addl	108(%esp),%esi
353bc3d5698SJohn Baldwin	addl	116(%esp),%edx
354bc3d5698SJohn Baldwin	addl	124(%esp),%edi
355bc3d5698SJohn Baldwin	movl	%ebp,28(%esp)
356bc3d5698SJohn Baldwin	movl	156(%esp),%ebp
357bc3d5698SJohn Baldwin	movl	%ecx,40(%esp)
358bc3d5698SJohn Baldwin	movl	152(%esp),%ecx
359bc3d5698SJohn Baldwin	movl	%esi,44(%esp)
360bc3d5698SJohn Baldwin	xorl	%esi,%esi
361bc3d5698SJohn Baldwin	movl	%edx,52(%esp)
362bc3d5698SJohn Baldwin	movl	%edi,60(%esp)
363bc3d5698SJohn Baldwin	xorl	%eax,%eax
364bc3d5698SJohn Baldwin	xorl	%edx,%edx
365bc3d5698SJohn Baldwin.L007tail_loop:
366bc3d5698SJohn Baldwin	movb	(%esi,%ebp,1),%al
367bc3d5698SJohn Baldwin	movb	(%esp,%esi,1),%dl
368bc3d5698SJohn Baldwin	leal	1(%esi),%esi
369bc3d5698SJohn Baldwin	xorb	%dl,%al
370bc3d5698SJohn Baldwin	movb	%al,-1(%ecx,%esi,1)
371bc3d5698SJohn Baldwin	decl	%ebx
372bc3d5698SJohn Baldwin	jnz	.L007tail_loop
373bc3d5698SJohn Baldwin.L006done:
374bc3d5698SJohn Baldwin	addl	$132,%esp
375bc3d5698SJohn Baldwin.L000no_data:
376bc3d5698SJohn Baldwin	popl	%edi
377bc3d5698SJohn Baldwin	popl	%esi
378bc3d5698SJohn Baldwin	popl	%ebx
379bc3d5698SJohn Baldwin	popl	%ebp
380bc3d5698SJohn Baldwin	ret
381bc3d5698SJohn Baldwin.size	ChaCha20_ctr32,.-.L_ChaCha20_ctr32_begin
382bc3d5698SJohn Baldwin.globl	ChaCha20_ssse3
383bc3d5698SJohn Baldwin.type	ChaCha20_ssse3,@function
384bc3d5698SJohn Baldwin.align	16
385bc3d5698SJohn BaldwinChaCha20_ssse3:
386bc3d5698SJohn Baldwin.L_ChaCha20_ssse3_begin:
387*c0855eaaSJohn Baldwin	#ifdef __CET__
388*c0855eaaSJohn Baldwin
389*c0855eaaSJohn Baldwin.byte	243,15,30,251
390*c0855eaaSJohn Baldwin	#endif
391*c0855eaaSJohn Baldwin
392bc3d5698SJohn Baldwin	pushl	%ebp
393bc3d5698SJohn Baldwin	pushl	%ebx
394bc3d5698SJohn Baldwin	pushl	%esi
395bc3d5698SJohn Baldwin	pushl	%edi
396bc3d5698SJohn Baldwin.Lssse3_shortcut:
397bc3d5698SJohn Baldwin	testl	$2048,4(%ebp)
398bc3d5698SJohn Baldwin	jnz	.Lxop_shortcut
399bc3d5698SJohn Baldwin	movl	20(%esp),%edi
400bc3d5698SJohn Baldwin	movl	24(%esp),%esi
401bc3d5698SJohn Baldwin	movl	28(%esp),%ecx
402bc3d5698SJohn Baldwin	movl	32(%esp),%edx
403bc3d5698SJohn Baldwin	movl	36(%esp),%ebx
404bc3d5698SJohn Baldwin	movl	%esp,%ebp
405bc3d5698SJohn Baldwin	subl	$524,%esp
406bc3d5698SJohn Baldwin	andl	$-64,%esp
407bc3d5698SJohn Baldwin	movl	%ebp,512(%esp)
408bc3d5698SJohn Baldwin	leal	.Lssse3_data-.Lpic_point(%eax),%eax
409bc3d5698SJohn Baldwin	movdqu	(%ebx),%xmm3
410bc3d5698SJohn Baldwin.L0081x:
411bc3d5698SJohn Baldwin	movdqa	32(%eax),%xmm0
412bc3d5698SJohn Baldwin	movdqu	(%edx),%xmm1
413bc3d5698SJohn Baldwin	movdqu	16(%edx),%xmm2
414bc3d5698SJohn Baldwin	movdqa	(%eax),%xmm6
415bc3d5698SJohn Baldwin	movdqa	16(%eax),%xmm7
416bc3d5698SJohn Baldwin	movl	%ebp,48(%esp)
417bc3d5698SJohn Baldwin	movdqa	%xmm0,(%esp)
418bc3d5698SJohn Baldwin	movdqa	%xmm1,16(%esp)
419bc3d5698SJohn Baldwin	movdqa	%xmm2,32(%esp)
420bc3d5698SJohn Baldwin	movdqa	%xmm3,48(%esp)
421bc3d5698SJohn Baldwin	movl	$10,%edx
422bc3d5698SJohn Baldwin	jmp	.L009loop1x
423bc3d5698SJohn Baldwin.align	16
424bc3d5698SJohn Baldwin.L010outer1x:
425bc3d5698SJohn Baldwin	movdqa	80(%eax),%xmm3
426bc3d5698SJohn Baldwin	movdqa	(%esp),%xmm0
427bc3d5698SJohn Baldwin	movdqa	16(%esp),%xmm1
428bc3d5698SJohn Baldwin	movdqa	32(%esp),%xmm2
429bc3d5698SJohn Baldwin	paddd	48(%esp),%xmm3
430bc3d5698SJohn Baldwin	movl	$10,%edx
431bc3d5698SJohn Baldwin	movdqa	%xmm3,48(%esp)
432bc3d5698SJohn Baldwin	jmp	.L009loop1x
433bc3d5698SJohn Baldwin.align	16
434bc3d5698SJohn Baldwin.L009loop1x:
435bc3d5698SJohn Baldwin	paddd	%xmm1,%xmm0
436bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm3
437bc3d5698SJohn Baldwin.byte	102,15,56,0,222
438bc3d5698SJohn Baldwin	paddd	%xmm3,%xmm2
439bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm1
440bc3d5698SJohn Baldwin	movdqa	%xmm1,%xmm4
441bc3d5698SJohn Baldwin	psrld	$20,%xmm1
442bc3d5698SJohn Baldwin	pslld	$12,%xmm4
443bc3d5698SJohn Baldwin	por	%xmm4,%xmm1
444bc3d5698SJohn Baldwin	paddd	%xmm1,%xmm0
445bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm3
446bc3d5698SJohn Baldwin.byte	102,15,56,0,223
447bc3d5698SJohn Baldwin	paddd	%xmm3,%xmm2
448bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm1
449bc3d5698SJohn Baldwin	movdqa	%xmm1,%xmm4
450bc3d5698SJohn Baldwin	psrld	$25,%xmm1
451bc3d5698SJohn Baldwin	pslld	$7,%xmm4
452bc3d5698SJohn Baldwin	por	%xmm4,%xmm1
453bc3d5698SJohn Baldwin	pshufd	$78,%xmm2,%xmm2
454bc3d5698SJohn Baldwin	pshufd	$57,%xmm1,%xmm1
455bc3d5698SJohn Baldwin	pshufd	$147,%xmm3,%xmm3
456bc3d5698SJohn Baldwin	nop
457bc3d5698SJohn Baldwin	paddd	%xmm1,%xmm0
458bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm3
459bc3d5698SJohn Baldwin.byte	102,15,56,0,222
460bc3d5698SJohn Baldwin	paddd	%xmm3,%xmm2
461bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm1
462bc3d5698SJohn Baldwin	movdqa	%xmm1,%xmm4
463bc3d5698SJohn Baldwin	psrld	$20,%xmm1
464bc3d5698SJohn Baldwin	pslld	$12,%xmm4
465bc3d5698SJohn Baldwin	por	%xmm4,%xmm1
466bc3d5698SJohn Baldwin	paddd	%xmm1,%xmm0
467bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm3
468bc3d5698SJohn Baldwin.byte	102,15,56,0,223
469bc3d5698SJohn Baldwin	paddd	%xmm3,%xmm2
470bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm1
471bc3d5698SJohn Baldwin	movdqa	%xmm1,%xmm4
472bc3d5698SJohn Baldwin	psrld	$25,%xmm1
473bc3d5698SJohn Baldwin	pslld	$7,%xmm4
474bc3d5698SJohn Baldwin	por	%xmm4,%xmm1
475bc3d5698SJohn Baldwin	pshufd	$78,%xmm2,%xmm2
476bc3d5698SJohn Baldwin	pshufd	$147,%xmm1,%xmm1
477bc3d5698SJohn Baldwin	pshufd	$57,%xmm3,%xmm3
478bc3d5698SJohn Baldwin	decl	%edx
479bc3d5698SJohn Baldwin	jnz	.L009loop1x
480bc3d5698SJohn Baldwin	paddd	(%esp),%xmm0
481bc3d5698SJohn Baldwin	paddd	16(%esp),%xmm1
482bc3d5698SJohn Baldwin	paddd	32(%esp),%xmm2
483bc3d5698SJohn Baldwin	paddd	48(%esp),%xmm3
484bc3d5698SJohn Baldwin	cmpl	$64,%ecx
485bc3d5698SJohn Baldwin	jb	.L011tail
486bc3d5698SJohn Baldwin	movdqu	(%esi),%xmm4
487bc3d5698SJohn Baldwin	movdqu	16(%esi),%xmm5
488bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm0
489bc3d5698SJohn Baldwin	movdqu	32(%esi),%xmm4
490bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm1
491bc3d5698SJohn Baldwin	movdqu	48(%esi),%xmm5
492bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm2
493bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm3
494bc3d5698SJohn Baldwin	leal	64(%esi),%esi
495bc3d5698SJohn Baldwin	movdqu	%xmm0,(%edi)
496bc3d5698SJohn Baldwin	movdqu	%xmm1,16(%edi)
497bc3d5698SJohn Baldwin	movdqu	%xmm2,32(%edi)
498bc3d5698SJohn Baldwin	movdqu	%xmm3,48(%edi)
499bc3d5698SJohn Baldwin	leal	64(%edi),%edi
500bc3d5698SJohn Baldwin	subl	$64,%ecx
501bc3d5698SJohn Baldwin	jnz	.L010outer1x
502bc3d5698SJohn Baldwin	jmp	.L012done
503bc3d5698SJohn Baldwin.L011tail:
504bc3d5698SJohn Baldwin	movdqa	%xmm0,(%esp)
505bc3d5698SJohn Baldwin	movdqa	%xmm1,16(%esp)
506bc3d5698SJohn Baldwin	movdqa	%xmm2,32(%esp)
507bc3d5698SJohn Baldwin	movdqa	%xmm3,48(%esp)
508bc3d5698SJohn Baldwin	xorl	%eax,%eax
509bc3d5698SJohn Baldwin	xorl	%edx,%edx
510bc3d5698SJohn Baldwin	xorl	%ebp,%ebp
511bc3d5698SJohn Baldwin.L013tail_loop:
512bc3d5698SJohn Baldwin	movb	(%esp,%ebp,1),%al
513bc3d5698SJohn Baldwin	movb	(%esi,%ebp,1),%dl
514bc3d5698SJohn Baldwin	leal	1(%ebp),%ebp
515bc3d5698SJohn Baldwin	xorb	%dl,%al
516bc3d5698SJohn Baldwin	movb	%al,-1(%edi,%ebp,1)
517bc3d5698SJohn Baldwin	decl	%ecx
518bc3d5698SJohn Baldwin	jnz	.L013tail_loop
519bc3d5698SJohn Baldwin.L012done:
520bc3d5698SJohn Baldwin	movl	512(%esp),%esp
521bc3d5698SJohn Baldwin	popl	%edi
522bc3d5698SJohn Baldwin	popl	%esi
523bc3d5698SJohn Baldwin	popl	%ebx
524bc3d5698SJohn Baldwin	popl	%ebp
525bc3d5698SJohn Baldwin	ret
526bc3d5698SJohn Baldwin.size	ChaCha20_ssse3,.-.L_ChaCha20_ssse3_begin
527bc3d5698SJohn Baldwin.align	64
528bc3d5698SJohn Baldwin.Lssse3_data:
529bc3d5698SJohn Baldwin.byte	2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13
530bc3d5698SJohn Baldwin.byte	3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14
531bc3d5698SJohn Baldwin.long	1634760805,857760878,2036477234,1797285236
532bc3d5698SJohn Baldwin.long	0,1,2,3
533bc3d5698SJohn Baldwin.long	4,4,4,4
534bc3d5698SJohn Baldwin.long	1,0,0,0
535bc3d5698SJohn Baldwin.long	4,0,0,0
536bc3d5698SJohn Baldwin.long	0,-1,-1,-1
537bc3d5698SJohn Baldwin.align	64
538bc3d5698SJohn Baldwin.byte	67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54
539bc3d5698SJohn Baldwin.byte	44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
540bc3d5698SJohn Baldwin.byte	60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
541bc3d5698SJohn Baldwin.byte	114,103,62,0
542bc3d5698SJohn Baldwin.globl	ChaCha20_xop
543bc3d5698SJohn Baldwin.type	ChaCha20_xop,@function
544bc3d5698SJohn Baldwin.align	16
545bc3d5698SJohn BaldwinChaCha20_xop:
546bc3d5698SJohn Baldwin.L_ChaCha20_xop_begin:
547*c0855eaaSJohn Baldwin	#ifdef __CET__
548*c0855eaaSJohn Baldwin
549*c0855eaaSJohn Baldwin.byte	243,15,30,251
550*c0855eaaSJohn Baldwin	#endif
551*c0855eaaSJohn Baldwin
552bc3d5698SJohn Baldwin	pushl	%ebp
553bc3d5698SJohn Baldwin	pushl	%ebx
554bc3d5698SJohn Baldwin	pushl	%esi
555bc3d5698SJohn Baldwin	pushl	%edi
556bc3d5698SJohn Baldwin.Lxop_shortcut:
557bc3d5698SJohn Baldwin	movl	20(%esp),%edi
558bc3d5698SJohn Baldwin	movl	24(%esp),%esi
559bc3d5698SJohn Baldwin	movl	28(%esp),%ecx
560bc3d5698SJohn Baldwin	movl	32(%esp),%edx
561bc3d5698SJohn Baldwin	movl	36(%esp),%ebx
562bc3d5698SJohn Baldwin	vzeroupper
563bc3d5698SJohn Baldwin	movl	%esp,%ebp
564bc3d5698SJohn Baldwin	subl	$524,%esp
565bc3d5698SJohn Baldwin	andl	$-64,%esp
566bc3d5698SJohn Baldwin	movl	%ebp,512(%esp)
567bc3d5698SJohn Baldwin	leal	.Lssse3_data-.Lpic_point(%eax),%eax
568bc3d5698SJohn Baldwin	vmovdqu	(%ebx),%xmm3
569bc3d5698SJohn Baldwin	cmpl	$256,%ecx
570bc3d5698SJohn Baldwin	jb	.L0141x
571bc3d5698SJohn Baldwin	movl	%edx,516(%esp)
572bc3d5698SJohn Baldwin	movl	%ebx,520(%esp)
573bc3d5698SJohn Baldwin	subl	$256,%ecx
574bc3d5698SJohn Baldwin	leal	384(%esp),%ebp
575bc3d5698SJohn Baldwin	vmovdqu	(%edx),%xmm7
576bc3d5698SJohn Baldwin	vpshufd	$0,%xmm3,%xmm0
577bc3d5698SJohn Baldwin	vpshufd	$85,%xmm3,%xmm1
578bc3d5698SJohn Baldwin	vpshufd	$170,%xmm3,%xmm2
579bc3d5698SJohn Baldwin	vpshufd	$255,%xmm3,%xmm3
580bc3d5698SJohn Baldwin	vpaddd	48(%eax),%xmm0,%xmm0
581bc3d5698SJohn Baldwin	vpshufd	$0,%xmm7,%xmm4
582bc3d5698SJohn Baldwin	vpshufd	$85,%xmm7,%xmm5
583bc3d5698SJohn Baldwin	vpsubd	64(%eax),%xmm0,%xmm0
584bc3d5698SJohn Baldwin	vpshufd	$170,%xmm7,%xmm6
585bc3d5698SJohn Baldwin	vpshufd	$255,%xmm7,%xmm7
586bc3d5698SJohn Baldwin	vmovdqa	%xmm0,64(%ebp)
587bc3d5698SJohn Baldwin	vmovdqa	%xmm1,80(%ebp)
588bc3d5698SJohn Baldwin	vmovdqa	%xmm2,96(%ebp)
589bc3d5698SJohn Baldwin	vmovdqa	%xmm3,112(%ebp)
590bc3d5698SJohn Baldwin	vmovdqu	16(%edx),%xmm3
591bc3d5698SJohn Baldwin	vmovdqa	%xmm4,-64(%ebp)
592bc3d5698SJohn Baldwin	vmovdqa	%xmm5,-48(%ebp)
593bc3d5698SJohn Baldwin	vmovdqa	%xmm6,-32(%ebp)
594bc3d5698SJohn Baldwin	vmovdqa	%xmm7,-16(%ebp)
595bc3d5698SJohn Baldwin	vmovdqa	32(%eax),%xmm7
596bc3d5698SJohn Baldwin	leal	128(%esp),%ebx
597bc3d5698SJohn Baldwin	vpshufd	$0,%xmm3,%xmm0
598bc3d5698SJohn Baldwin	vpshufd	$85,%xmm3,%xmm1
599bc3d5698SJohn Baldwin	vpshufd	$170,%xmm3,%xmm2
600bc3d5698SJohn Baldwin	vpshufd	$255,%xmm3,%xmm3
601bc3d5698SJohn Baldwin	vpshufd	$0,%xmm7,%xmm4
602bc3d5698SJohn Baldwin	vpshufd	$85,%xmm7,%xmm5
603bc3d5698SJohn Baldwin	vpshufd	$170,%xmm7,%xmm6
604bc3d5698SJohn Baldwin	vpshufd	$255,%xmm7,%xmm7
605bc3d5698SJohn Baldwin	vmovdqa	%xmm0,(%ebp)
606bc3d5698SJohn Baldwin	vmovdqa	%xmm1,16(%ebp)
607bc3d5698SJohn Baldwin	vmovdqa	%xmm2,32(%ebp)
608bc3d5698SJohn Baldwin	vmovdqa	%xmm3,48(%ebp)
609bc3d5698SJohn Baldwin	vmovdqa	%xmm4,-128(%ebp)
610bc3d5698SJohn Baldwin	vmovdqa	%xmm5,-112(%ebp)
611bc3d5698SJohn Baldwin	vmovdqa	%xmm6,-96(%ebp)
612bc3d5698SJohn Baldwin	vmovdqa	%xmm7,-80(%ebp)
613bc3d5698SJohn Baldwin	leal	128(%esi),%esi
614bc3d5698SJohn Baldwin	leal	128(%edi),%edi
615bc3d5698SJohn Baldwin	jmp	.L015outer_loop
616bc3d5698SJohn Baldwin.align	32
617bc3d5698SJohn Baldwin.L015outer_loop:
618bc3d5698SJohn Baldwin	vmovdqa	-112(%ebp),%xmm1
619bc3d5698SJohn Baldwin	vmovdqa	-96(%ebp),%xmm2
620bc3d5698SJohn Baldwin	vmovdqa	-80(%ebp),%xmm3
621bc3d5698SJohn Baldwin	vmovdqa	-48(%ebp),%xmm5
622bc3d5698SJohn Baldwin	vmovdqa	-32(%ebp),%xmm6
623bc3d5698SJohn Baldwin	vmovdqa	-16(%ebp),%xmm7
624bc3d5698SJohn Baldwin	vmovdqa	%xmm1,-112(%ebx)
625bc3d5698SJohn Baldwin	vmovdqa	%xmm2,-96(%ebx)
626bc3d5698SJohn Baldwin	vmovdqa	%xmm3,-80(%ebx)
627bc3d5698SJohn Baldwin	vmovdqa	%xmm5,-48(%ebx)
628bc3d5698SJohn Baldwin	vmovdqa	%xmm6,-32(%ebx)
629bc3d5698SJohn Baldwin	vmovdqa	%xmm7,-16(%ebx)
630bc3d5698SJohn Baldwin	vmovdqa	32(%ebp),%xmm2
631bc3d5698SJohn Baldwin	vmovdqa	48(%ebp),%xmm3
632bc3d5698SJohn Baldwin	vmovdqa	64(%ebp),%xmm4
633bc3d5698SJohn Baldwin	vmovdqa	80(%ebp),%xmm5
634bc3d5698SJohn Baldwin	vmovdqa	96(%ebp),%xmm6
635bc3d5698SJohn Baldwin	vmovdqa	112(%ebp),%xmm7
636bc3d5698SJohn Baldwin	vpaddd	64(%eax),%xmm4,%xmm4
637bc3d5698SJohn Baldwin	vmovdqa	%xmm2,32(%ebx)
638bc3d5698SJohn Baldwin	vmovdqa	%xmm3,48(%ebx)
639bc3d5698SJohn Baldwin	vmovdqa	%xmm4,64(%ebx)
640bc3d5698SJohn Baldwin	vmovdqa	%xmm5,80(%ebx)
641bc3d5698SJohn Baldwin	vmovdqa	%xmm6,96(%ebx)
642bc3d5698SJohn Baldwin	vmovdqa	%xmm7,112(%ebx)
643bc3d5698SJohn Baldwin	vmovdqa	%xmm4,64(%ebp)
644bc3d5698SJohn Baldwin	vmovdqa	-128(%ebp),%xmm0
645bc3d5698SJohn Baldwin	vmovdqa	%xmm4,%xmm6
646bc3d5698SJohn Baldwin	vmovdqa	-64(%ebp),%xmm3
647bc3d5698SJohn Baldwin	vmovdqa	(%ebp),%xmm4
648bc3d5698SJohn Baldwin	vmovdqa	16(%ebp),%xmm5
649bc3d5698SJohn Baldwin	movl	$10,%edx
650bc3d5698SJohn Baldwin	nop
651bc3d5698SJohn Baldwin.align	32
652bc3d5698SJohn Baldwin.L016loop:
653bc3d5698SJohn Baldwin	vpaddd	%xmm3,%xmm0,%xmm0
654bc3d5698SJohn Baldwin	vpxor	%xmm0,%xmm6,%xmm6
655bc3d5698SJohn Baldwin.byte	143,232,120,194,246,16
656bc3d5698SJohn Baldwin	vpaddd	%xmm6,%xmm4,%xmm4
657bc3d5698SJohn Baldwin	vpxor	%xmm4,%xmm3,%xmm2
658bc3d5698SJohn Baldwin	vmovdqa	-112(%ebx),%xmm1
659bc3d5698SJohn Baldwin.byte	143,232,120,194,210,12
660bc3d5698SJohn Baldwin	vmovdqa	-48(%ebx),%xmm3
661bc3d5698SJohn Baldwin	vpaddd	%xmm2,%xmm0,%xmm0
662bc3d5698SJohn Baldwin	vmovdqa	80(%ebx),%xmm7
663bc3d5698SJohn Baldwin	vpxor	%xmm0,%xmm6,%xmm6
664bc3d5698SJohn Baldwin	vpaddd	%xmm3,%xmm1,%xmm1
665bc3d5698SJohn Baldwin.byte	143,232,120,194,246,8
666bc3d5698SJohn Baldwin	vmovdqa	%xmm0,-128(%ebx)
667bc3d5698SJohn Baldwin	vpaddd	%xmm6,%xmm4,%xmm4
668bc3d5698SJohn Baldwin	vmovdqa	%xmm6,64(%ebx)
669bc3d5698SJohn Baldwin	vpxor	%xmm4,%xmm2,%xmm2
670bc3d5698SJohn Baldwin	vpxor	%xmm1,%xmm7,%xmm7
671bc3d5698SJohn Baldwin.byte	143,232,120,194,210,7
672bc3d5698SJohn Baldwin	vmovdqa	%xmm4,(%ebx)
673bc3d5698SJohn Baldwin.byte	143,232,120,194,255,16
674bc3d5698SJohn Baldwin	vmovdqa	%xmm2,-64(%ebx)
675bc3d5698SJohn Baldwin	vpaddd	%xmm7,%xmm5,%xmm5
676bc3d5698SJohn Baldwin	vmovdqa	32(%ebx),%xmm4
677bc3d5698SJohn Baldwin	vpxor	%xmm5,%xmm3,%xmm3
678bc3d5698SJohn Baldwin	vmovdqa	-96(%ebx),%xmm0
679bc3d5698SJohn Baldwin.byte	143,232,120,194,219,12
680bc3d5698SJohn Baldwin	vmovdqa	-32(%ebx),%xmm2
681bc3d5698SJohn Baldwin	vpaddd	%xmm3,%xmm1,%xmm1
682bc3d5698SJohn Baldwin	vmovdqa	96(%ebx),%xmm6
683bc3d5698SJohn Baldwin	vpxor	%xmm1,%xmm7,%xmm7
684bc3d5698SJohn Baldwin	vpaddd	%xmm2,%xmm0,%xmm0
685bc3d5698SJohn Baldwin.byte	143,232,120,194,255,8
686bc3d5698SJohn Baldwin	vmovdqa	%xmm1,-112(%ebx)
687bc3d5698SJohn Baldwin	vpaddd	%xmm7,%xmm5,%xmm5
688bc3d5698SJohn Baldwin	vmovdqa	%xmm7,80(%ebx)
689bc3d5698SJohn Baldwin	vpxor	%xmm5,%xmm3,%xmm3
690bc3d5698SJohn Baldwin	vpxor	%xmm0,%xmm6,%xmm6
691bc3d5698SJohn Baldwin.byte	143,232,120,194,219,7
692bc3d5698SJohn Baldwin	vmovdqa	%xmm5,16(%ebx)
693bc3d5698SJohn Baldwin.byte	143,232,120,194,246,16
694bc3d5698SJohn Baldwin	vmovdqa	%xmm3,-48(%ebx)
695bc3d5698SJohn Baldwin	vpaddd	%xmm6,%xmm4,%xmm4
696bc3d5698SJohn Baldwin	vmovdqa	48(%ebx),%xmm5
697bc3d5698SJohn Baldwin	vpxor	%xmm4,%xmm2,%xmm2
698bc3d5698SJohn Baldwin	vmovdqa	-80(%ebx),%xmm1
699bc3d5698SJohn Baldwin.byte	143,232,120,194,210,12
700bc3d5698SJohn Baldwin	vmovdqa	-16(%ebx),%xmm3
701bc3d5698SJohn Baldwin	vpaddd	%xmm2,%xmm0,%xmm0
702bc3d5698SJohn Baldwin	vmovdqa	112(%ebx),%xmm7
703bc3d5698SJohn Baldwin	vpxor	%xmm0,%xmm6,%xmm6
704bc3d5698SJohn Baldwin	vpaddd	%xmm3,%xmm1,%xmm1
705bc3d5698SJohn Baldwin.byte	143,232,120,194,246,8
706bc3d5698SJohn Baldwin	vmovdqa	%xmm0,-96(%ebx)
707bc3d5698SJohn Baldwin	vpaddd	%xmm6,%xmm4,%xmm4
708bc3d5698SJohn Baldwin	vmovdqa	%xmm6,96(%ebx)
709bc3d5698SJohn Baldwin	vpxor	%xmm4,%xmm2,%xmm2
710bc3d5698SJohn Baldwin	vpxor	%xmm1,%xmm7,%xmm7
711bc3d5698SJohn Baldwin.byte	143,232,120,194,210,7
712bc3d5698SJohn Baldwin.byte	143,232,120,194,255,16
713bc3d5698SJohn Baldwin	vmovdqa	%xmm2,-32(%ebx)
714bc3d5698SJohn Baldwin	vpaddd	%xmm7,%xmm5,%xmm5
715bc3d5698SJohn Baldwin	vpxor	%xmm5,%xmm3,%xmm3
716bc3d5698SJohn Baldwin	vmovdqa	-128(%ebx),%xmm0
717bc3d5698SJohn Baldwin.byte	143,232,120,194,219,12
718bc3d5698SJohn Baldwin	vmovdqa	-48(%ebx),%xmm2
719bc3d5698SJohn Baldwin	vpaddd	%xmm3,%xmm1,%xmm1
720bc3d5698SJohn Baldwin	vpxor	%xmm1,%xmm7,%xmm7
721bc3d5698SJohn Baldwin	vpaddd	%xmm2,%xmm0,%xmm0
722bc3d5698SJohn Baldwin.byte	143,232,120,194,255,8
723bc3d5698SJohn Baldwin	vmovdqa	%xmm1,-80(%ebx)
724bc3d5698SJohn Baldwin	vpaddd	%xmm7,%xmm5,%xmm5
725bc3d5698SJohn Baldwin	vpxor	%xmm5,%xmm3,%xmm3
726bc3d5698SJohn Baldwin	vpxor	%xmm0,%xmm7,%xmm6
727bc3d5698SJohn Baldwin.byte	143,232,120,194,219,7
728bc3d5698SJohn Baldwin.byte	143,232,120,194,246,16
729bc3d5698SJohn Baldwin	vmovdqa	%xmm3,-16(%ebx)
730bc3d5698SJohn Baldwin	vpaddd	%xmm6,%xmm4,%xmm4
731bc3d5698SJohn Baldwin	vpxor	%xmm4,%xmm2,%xmm2
732bc3d5698SJohn Baldwin	vmovdqa	-112(%ebx),%xmm1
733bc3d5698SJohn Baldwin.byte	143,232,120,194,210,12
734bc3d5698SJohn Baldwin	vmovdqa	-32(%ebx),%xmm3
735bc3d5698SJohn Baldwin	vpaddd	%xmm2,%xmm0,%xmm0
736bc3d5698SJohn Baldwin	vmovdqa	64(%ebx),%xmm7
737bc3d5698SJohn Baldwin	vpxor	%xmm0,%xmm6,%xmm6
738bc3d5698SJohn Baldwin	vpaddd	%xmm3,%xmm1,%xmm1
739bc3d5698SJohn Baldwin.byte	143,232,120,194,246,8
740bc3d5698SJohn Baldwin	vmovdqa	%xmm0,-128(%ebx)
741bc3d5698SJohn Baldwin	vpaddd	%xmm6,%xmm4,%xmm4
742bc3d5698SJohn Baldwin	vmovdqa	%xmm6,112(%ebx)
743bc3d5698SJohn Baldwin	vpxor	%xmm4,%xmm2,%xmm2
744bc3d5698SJohn Baldwin	vpxor	%xmm1,%xmm7,%xmm7
745bc3d5698SJohn Baldwin.byte	143,232,120,194,210,7
746bc3d5698SJohn Baldwin	vmovdqa	%xmm4,32(%ebx)
747bc3d5698SJohn Baldwin.byte	143,232,120,194,255,16
748bc3d5698SJohn Baldwin	vmovdqa	%xmm2,-48(%ebx)
749bc3d5698SJohn Baldwin	vpaddd	%xmm7,%xmm5,%xmm5
750bc3d5698SJohn Baldwin	vmovdqa	(%ebx),%xmm4
751bc3d5698SJohn Baldwin	vpxor	%xmm5,%xmm3,%xmm3
752bc3d5698SJohn Baldwin	vmovdqa	-96(%ebx),%xmm0
753bc3d5698SJohn Baldwin.byte	143,232,120,194,219,12
754bc3d5698SJohn Baldwin	vmovdqa	-16(%ebx),%xmm2
755bc3d5698SJohn Baldwin	vpaddd	%xmm3,%xmm1,%xmm1
756bc3d5698SJohn Baldwin	vmovdqa	80(%ebx),%xmm6
757bc3d5698SJohn Baldwin	vpxor	%xmm1,%xmm7,%xmm7
758bc3d5698SJohn Baldwin	vpaddd	%xmm2,%xmm0,%xmm0
759bc3d5698SJohn Baldwin.byte	143,232,120,194,255,8
760bc3d5698SJohn Baldwin	vmovdqa	%xmm1,-112(%ebx)
761bc3d5698SJohn Baldwin	vpaddd	%xmm7,%xmm5,%xmm5
762bc3d5698SJohn Baldwin	vmovdqa	%xmm7,64(%ebx)
763bc3d5698SJohn Baldwin	vpxor	%xmm5,%xmm3,%xmm3
764bc3d5698SJohn Baldwin	vpxor	%xmm0,%xmm6,%xmm6
765bc3d5698SJohn Baldwin.byte	143,232,120,194,219,7
766bc3d5698SJohn Baldwin	vmovdqa	%xmm5,48(%ebx)
767bc3d5698SJohn Baldwin.byte	143,232,120,194,246,16
768bc3d5698SJohn Baldwin	vmovdqa	%xmm3,-32(%ebx)
769bc3d5698SJohn Baldwin	vpaddd	%xmm6,%xmm4,%xmm4
770bc3d5698SJohn Baldwin	vmovdqa	16(%ebx),%xmm5
771bc3d5698SJohn Baldwin	vpxor	%xmm4,%xmm2,%xmm2
772bc3d5698SJohn Baldwin	vmovdqa	-80(%ebx),%xmm1
773bc3d5698SJohn Baldwin.byte	143,232,120,194,210,12
774bc3d5698SJohn Baldwin	vmovdqa	-64(%ebx),%xmm3
775bc3d5698SJohn Baldwin	vpaddd	%xmm2,%xmm0,%xmm0
776bc3d5698SJohn Baldwin	vmovdqa	96(%ebx),%xmm7
777bc3d5698SJohn Baldwin	vpxor	%xmm0,%xmm6,%xmm6
778bc3d5698SJohn Baldwin	vpaddd	%xmm3,%xmm1,%xmm1
779bc3d5698SJohn Baldwin.byte	143,232,120,194,246,8
780bc3d5698SJohn Baldwin	vmovdqa	%xmm0,-96(%ebx)
781bc3d5698SJohn Baldwin	vpaddd	%xmm6,%xmm4,%xmm4
782bc3d5698SJohn Baldwin	vmovdqa	%xmm6,80(%ebx)
783bc3d5698SJohn Baldwin	vpxor	%xmm4,%xmm2,%xmm2
784bc3d5698SJohn Baldwin	vpxor	%xmm1,%xmm7,%xmm7
785bc3d5698SJohn Baldwin.byte	143,232,120,194,210,7
786bc3d5698SJohn Baldwin.byte	143,232,120,194,255,16
787bc3d5698SJohn Baldwin	vmovdqa	%xmm2,-16(%ebx)
788bc3d5698SJohn Baldwin	vpaddd	%xmm7,%xmm5,%xmm5
789bc3d5698SJohn Baldwin	vpxor	%xmm5,%xmm3,%xmm3
790bc3d5698SJohn Baldwin	vmovdqa	-128(%ebx),%xmm0
791bc3d5698SJohn Baldwin.byte	143,232,120,194,219,12
792bc3d5698SJohn Baldwin	vpaddd	%xmm3,%xmm1,%xmm1
793bc3d5698SJohn Baldwin	vmovdqa	64(%ebx),%xmm6
794bc3d5698SJohn Baldwin	vpxor	%xmm1,%xmm7,%xmm7
795bc3d5698SJohn Baldwin.byte	143,232,120,194,255,8
796bc3d5698SJohn Baldwin	vmovdqa	%xmm1,-80(%ebx)
797bc3d5698SJohn Baldwin	vpaddd	%xmm7,%xmm5,%xmm5
798bc3d5698SJohn Baldwin	vmovdqa	%xmm7,96(%ebx)
799bc3d5698SJohn Baldwin	vpxor	%xmm5,%xmm3,%xmm3
800bc3d5698SJohn Baldwin.byte	143,232,120,194,219,7
801bc3d5698SJohn Baldwin	decl	%edx
802bc3d5698SJohn Baldwin	jnz	.L016loop
803bc3d5698SJohn Baldwin	vmovdqa	%xmm3,-64(%ebx)
804bc3d5698SJohn Baldwin	vmovdqa	%xmm4,(%ebx)
805bc3d5698SJohn Baldwin	vmovdqa	%xmm5,16(%ebx)
806bc3d5698SJohn Baldwin	vmovdqa	%xmm6,64(%ebx)
807bc3d5698SJohn Baldwin	vmovdqa	%xmm7,96(%ebx)
808bc3d5698SJohn Baldwin	vmovdqa	-112(%ebx),%xmm1
809bc3d5698SJohn Baldwin	vmovdqa	-96(%ebx),%xmm2
810bc3d5698SJohn Baldwin	vmovdqa	-80(%ebx),%xmm3
811bc3d5698SJohn Baldwin	vpaddd	-128(%ebp),%xmm0,%xmm0
812bc3d5698SJohn Baldwin	vpaddd	-112(%ebp),%xmm1,%xmm1
813bc3d5698SJohn Baldwin	vpaddd	-96(%ebp),%xmm2,%xmm2
814bc3d5698SJohn Baldwin	vpaddd	-80(%ebp),%xmm3,%xmm3
815bc3d5698SJohn Baldwin	vpunpckldq	%xmm1,%xmm0,%xmm6
816bc3d5698SJohn Baldwin	vpunpckldq	%xmm3,%xmm2,%xmm7
817bc3d5698SJohn Baldwin	vpunpckhdq	%xmm1,%xmm0,%xmm0
818bc3d5698SJohn Baldwin	vpunpckhdq	%xmm3,%xmm2,%xmm2
819bc3d5698SJohn Baldwin	vpunpcklqdq	%xmm7,%xmm6,%xmm1
820bc3d5698SJohn Baldwin	vpunpckhqdq	%xmm7,%xmm6,%xmm6
821bc3d5698SJohn Baldwin	vpunpcklqdq	%xmm2,%xmm0,%xmm7
822bc3d5698SJohn Baldwin	vpunpckhqdq	%xmm2,%xmm0,%xmm3
823bc3d5698SJohn Baldwin	vpxor	-128(%esi),%xmm1,%xmm4
824bc3d5698SJohn Baldwin	vpxor	-64(%esi),%xmm6,%xmm5
825bc3d5698SJohn Baldwin	vpxor	(%esi),%xmm7,%xmm6
826bc3d5698SJohn Baldwin	vpxor	64(%esi),%xmm3,%xmm7
827bc3d5698SJohn Baldwin	leal	16(%esi),%esi
828bc3d5698SJohn Baldwin	vmovdqa	-64(%ebx),%xmm0
829bc3d5698SJohn Baldwin	vmovdqa	-48(%ebx),%xmm1
830bc3d5698SJohn Baldwin	vmovdqa	-32(%ebx),%xmm2
831bc3d5698SJohn Baldwin	vmovdqa	-16(%ebx),%xmm3
832bc3d5698SJohn Baldwin	vmovdqu	%xmm4,-128(%edi)
833bc3d5698SJohn Baldwin	vmovdqu	%xmm5,-64(%edi)
834bc3d5698SJohn Baldwin	vmovdqu	%xmm6,(%edi)
835bc3d5698SJohn Baldwin	vmovdqu	%xmm7,64(%edi)
836bc3d5698SJohn Baldwin	leal	16(%edi),%edi
837bc3d5698SJohn Baldwin	vpaddd	-64(%ebp),%xmm0,%xmm0
838bc3d5698SJohn Baldwin	vpaddd	-48(%ebp),%xmm1,%xmm1
839bc3d5698SJohn Baldwin	vpaddd	-32(%ebp),%xmm2,%xmm2
840bc3d5698SJohn Baldwin	vpaddd	-16(%ebp),%xmm3,%xmm3
841bc3d5698SJohn Baldwin	vpunpckldq	%xmm1,%xmm0,%xmm6
842bc3d5698SJohn Baldwin	vpunpckldq	%xmm3,%xmm2,%xmm7
843bc3d5698SJohn Baldwin	vpunpckhdq	%xmm1,%xmm0,%xmm0
844bc3d5698SJohn Baldwin	vpunpckhdq	%xmm3,%xmm2,%xmm2
845bc3d5698SJohn Baldwin	vpunpcklqdq	%xmm7,%xmm6,%xmm1
846bc3d5698SJohn Baldwin	vpunpckhqdq	%xmm7,%xmm6,%xmm6
847bc3d5698SJohn Baldwin	vpunpcklqdq	%xmm2,%xmm0,%xmm7
848bc3d5698SJohn Baldwin	vpunpckhqdq	%xmm2,%xmm0,%xmm3
849bc3d5698SJohn Baldwin	vpxor	-128(%esi),%xmm1,%xmm4
850bc3d5698SJohn Baldwin	vpxor	-64(%esi),%xmm6,%xmm5
851bc3d5698SJohn Baldwin	vpxor	(%esi),%xmm7,%xmm6
852bc3d5698SJohn Baldwin	vpxor	64(%esi),%xmm3,%xmm7
853bc3d5698SJohn Baldwin	leal	16(%esi),%esi
854bc3d5698SJohn Baldwin	vmovdqa	(%ebx),%xmm0
855bc3d5698SJohn Baldwin	vmovdqa	16(%ebx),%xmm1
856bc3d5698SJohn Baldwin	vmovdqa	32(%ebx),%xmm2
857bc3d5698SJohn Baldwin	vmovdqa	48(%ebx),%xmm3
858bc3d5698SJohn Baldwin	vmovdqu	%xmm4,-128(%edi)
859bc3d5698SJohn Baldwin	vmovdqu	%xmm5,-64(%edi)
860bc3d5698SJohn Baldwin	vmovdqu	%xmm6,(%edi)
861bc3d5698SJohn Baldwin	vmovdqu	%xmm7,64(%edi)
862bc3d5698SJohn Baldwin	leal	16(%edi),%edi
863bc3d5698SJohn Baldwin	vpaddd	(%ebp),%xmm0,%xmm0
864bc3d5698SJohn Baldwin	vpaddd	16(%ebp),%xmm1,%xmm1
865bc3d5698SJohn Baldwin	vpaddd	32(%ebp),%xmm2,%xmm2
866bc3d5698SJohn Baldwin	vpaddd	48(%ebp),%xmm3,%xmm3
867bc3d5698SJohn Baldwin	vpunpckldq	%xmm1,%xmm0,%xmm6
868bc3d5698SJohn Baldwin	vpunpckldq	%xmm3,%xmm2,%xmm7
869bc3d5698SJohn Baldwin	vpunpckhdq	%xmm1,%xmm0,%xmm0
870bc3d5698SJohn Baldwin	vpunpckhdq	%xmm3,%xmm2,%xmm2
871bc3d5698SJohn Baldwin	vpunpcklqdq	%xmm7,%xmm6,%xmm1
872bc3d5698SJohn Baldwin	vpunpckhqdq	%xmm7,%xmm6,%xmm6
873bc3d5698SJohn Baldwin	vpunpcklqdq	%xmm2,%xmm0,%xmm7
874bc3d5698SJohn Baldwin	vpunpckhqdq	%xmm2,%xmm0,%xmm3
875bc3d5698SJohn Baldwin	vpxor	-128(%esi),%xmm1,%xmm4
876bc3d5698SJohn Baldwin	vpxor	-64(%esi),%xmm6,%xmm5
877bc3d5698SJohn Baldwin	vpxor	(%esi),%xmm7,%xmm6
878bc3d5698SJohn Baldwin	vpxor	64(%esi),%xmm3,%xmm7
879bc3d5698SJohn Baldwin	leal	16(%esi),%esi
880bc3d5698SJohn Baldwin	vmovdqa	64(%ebx),%xmm0
881bc3d5698SJohn Baldwin	vmovdqa	80(%ebx),%xmm1
882bc3d5698SJohn Baldwin	vmovdqa	96(%ebx),%xmm2
883bc3d5698SJohn Baldwin	vmovdqa	112(%ebx),%xmm3
884bc3d5698SJohn Baldwin	vmovdqu	%xmm4,-128(%edi)
885bc3d5698SJohn Baldwin	vmovdqu	%xmm5,-64(%edi)
886bc3d5698SJohn Baldwin	vmovdqu	%xmm6,(%edi)
887bc3d5698SJohn Baldwin	vmovdqu	%xmm7,64(%edi)
888bc3d5698SJohn Baldwin	leal	16(%edi),%edi
889bc3d5698SJohn Baldwin	vpaddd	64(%ebp),%xmm0,%xmm0
890bc3d5698SJohn Baldwin	vpaddd	80(%ebp),%xmm1,%xmm1
891bc3d5698SJohn Baldwin	vpaddd	96(%ebp),%xmm2,%xmm2
892bc3d5698SJohn Baldwin	vpaddd	112(%ebp),%xmm3,%xmm3
893bc3d5698SJohn Baldwin	vpunpckldq	%xmm1,%xmm0,%xmm6
894bc3d5698SJohn Baldwin	vpunpckldq	%xmm3,%xmm2,%xmm7
895bc3d5698SJohn Baldwin	vpunpckhdq	%xmm1,%xmm0,%xmm0
896bc3d5698SJohn Baldwin	vpunpckhdq	%xmm3,%xmm2,%xmm2
897bc3d5698SJohn Baldwin	vpunpcklqdq	%xmm7,%xmm6,%xmm1
898bc3d5698SJohn Baldwin	vpunpckhqdq	%xmm7,%xmm6,%xmm6
899bc3d5698SJohn Baldwin	vpunpcklqdq	%xmm2,%xmm0,%xmm7
900bc3d5698SJohn Baldwin	vpunpckhqdq	%xmm2,%xmm0,%xmm3
901bc3d5698SJohn Baldwin	vpxor	-128(%esi),%xmm1,%xmm4
902bc3d5698SJohn Baldwin	vpxor	-64(%esi),%xmm6,%xmm5
903bc3d5698SJohn Baldwin	vpxor	(%esi),%xmm7,%xmm6
904bc3d5698SJohn Baldwin	vpxor	64(%esi),%xmm3,%xmm7
905bc3d5698SJohn Baldwin	leal	208(%esi),%esi
906bc3d5698SJohn Baldwin	vmovdqu	%xmm4,-128(%edi)
907bc3d5698SJohn Baldwin	vmovdqu	%xmm5,-64(%edi)
908bc3d5698SJohn Baldwin	vmovdqu	%xmm6,(%edi)
909bc3d5698SJohn Baldwin	vmovdqu	%xmm7,64(%edi)
910bc3d5698SJohn Baldwin	leal	208(%edi),%edi
911bc3d5698SJohn Baldwin	subl	$256,%ecx
912bc3d5698SJohn Baldwin	jnc	.L015outer_loop
913bc3d5698SJohn Baldwin	addl	$256,%ecx
914bc3d5698SJohn Baldwin	jz	.L017done
915bc3d5698SJohn Baldwin	movl	520(%esp),%ebx
916bc3d5698SJohn Baldwin	leal	-128(%esi),%esi
917bc3d5698SJohn Baldwin	movl	516(%esp),%edx
918bc3d5698SJohn Baldwin	leal	-128(%edi),%edi
919bc3d5698SJohn Baldwin	vmovd	64(%ebp),%xmm2
920bc3d5698SJohn Baldwin	vmovdqu	(%ebx),%xmm3
921bc3d5698SJohn Baldwin	vpaddd	96(%eax),%xmm2,%xmm2
922bc3d5698SJohn Baldwin	vpand	112(%eax),%xmm3,%xmm3
923bc3d5698SJohn Baldwin	vpor	%xmm2,%xmm3,%xmm3
924bc3d5698SJohn Baldwin.L0141x:
925bc3d5698SJohn Baldwin	vmovdqa	32(%eax),%xmm0
926bc3d5698SJohn Baldwin	vmovdqu	(%edx),%xmm1
927bc3d5698SJohn Baldwin	vmovdqu	16(%edx),%xmm2
928bc3d5698SJohn Baldwin	vmovdqa	(%eax),%xmm6
929bc3d5698SJohn Baldwin	vmovdqa	16(%eax),%xmm7
930bc3d5698SJohn Baldwin	movl	%ebp,48(%esp)
931bc3d5698SJohn Baldwin	vmovdqa	%xmm0,(%esp)
932bc3d5698SJohn Baldwin	vmovdqa	%xmm1,16(%esp)
933bc3d5698SJohn Baldwin	vmovdqa	%xmm2,32(%esp)
934bc3d5698SJohn Baldwin	vmovdqa	%xmm3,48(%esp)
935bc3d5698SJohn Baldwin	movl	$10,%edx
936bc3d5698SJohn Baldwin	jmp	.L018loop1x
937bc3d5698SJohn Baldwin.align	16
938bc3d5698SJohn Baldwin.L019outer1x:
939bc3d5698SJohn Baldwin	vmovdqa	80(%eax),%xmm3
940bc3d5698SJohn Baldwin	vmovdqa	(%esp),%xmm0
941bc3d5698SJohn Baldwin	vmovdqa	16(%esp),%xmm1
942bc3d5698SJohn Baldwin	vmovdqa	32(%esp),%xmm2
943bc3d5698SJohn Baldwin	vpaddd	48(%esp),%xmm3,%xmm3
944bc3d5698SJohn Baldwin	movl	$10,%edx
945bc3d5698SJohn Baldwin	vmovdqa	%xmm3,48(%esp)
946bc3d5698SJohn Baldwin	jmp	.L018loop1x
947bc3d5698SJohn Baldwin.align	16
948bc3d5698SJohn Baldwin.L018loop1x:
949bc3d5698SJohn Baldwin	vpaddd	%xmm1,%xmm0,%xmm0
950bc3d5698SJohn Baldwin	vpxor	%xmm0,%xmm3,%xmm3
951bc3d5698SJohn Baldwin.byte	143,232,120,194,219,16
952bc3d5698SJohn Baldwin	vpaddd	%xmm3,%xmm2,%xmm2
953bc3d5698SJohn Baldwin	vpxor	%xmm2,%xmm1,%xmm1
954bc3d5698SJohn Baldwin.byte	143,232,120,194,201,12
955bc3d5698SJohn Baldwin	vpaddd	%xmm1,%xmm0,%xmm0
956bc3d5698SJohn Baldwin	vpxor	%xmm0,%xmm3,%xmm3
957bc3d5698SJohn Baldwin.byte	143,232,120,194,219,8
958bc3d5698SJohn Baldwin	vpaddd	%xmm3,%xmm2,%xmm2
959bc3d5698SJohn Baldwin	vpxor	%xmm2,%xmm1,%xmm1
960bc3d5698SJohn Baldwin.byte	143,232,120,194,201,7
961bc3d5698SJohn Baldwin	vpshufd	$78,%xmm2,%xmm2
962bc3d5698SJohn Baldwin	vpshufd	$57,%xmm1,%xmm1
963bc3d5698SJohn Baldwin	vpshufd	$147,%xmm3,%xmm3
964bc3d5698SJohn Baldwin	vpaddd	%xmm1,%xmm0,%xmm0
965bc3d5698SJohn Baldwin	vpxor	%xmm0,%xmm3,%xmm3
966bc3d5698SJohn Baldwin.byte	143,232,120,194,219,16
967bc3d5698SJohn Baldwin	vpaddd	%xmm3,%xmm2,%xmm2
968bc3d5698SJohn Baldwin	vpxor	%xmm2,%xmm1,%xmm1
969bc3d5698SJohn Baldwin.byte	143,232,120,194,201,12
970bc3d5698SJohn Baldwin	vpaddd	%xmm1,%xmm0,%xmm0
971bc3d5698SJohn Baldwin	vpxor	%xmm0,%xmm3,%xmm3
972bc3d5698SJohn Baldwin.byte	143,232,120,194,219,8
973bc3d5698SJohn Baldwin	vpaddd	%xmm3,%xmm2,%xmm2
974bc3d5698SJohn Baldwin	vpxor	%xmm2,%xmm1,%xmm1
975bc3d5698SJohn Baldwin.byte	143,232,120,194,201,7
976bc3d5698SJohn Baldwin	vpshufd	$78,%xmm2,%xmm2
977bc3d5698SJohn Baldwin	vpshufd	$147,%xmm1,%xmm1
978bc3d5698SJohn Baldwin	vpshufd	$57,%xmm3,%xmm3
979bc3d5698SJohn Baldwin	decl	%edx
980bc3d5698SJohn Baldwin	jnz	.L018loop1x
981bc3d5698SJohn Baldwin	vpaddd	(%esp),%xmm0,%xmm0
982bc3d5698SJohn Baldwin	vpaddd	16(%esp),%xmm1,%xmm1
983bc3d5698SJohn Baldwin	vpaddd	32(%esp),%xmm2,%xmm2
984bc3d5698SJohn Baldwin	vpaddd	48(%esp),%xmm3,%xmm3
985bc3d5698SJohn Baldwin	cmpl	$64,%ecx
986bc3d5698SJohn Baldwin	jb	.L020tail
987bc3d5698SJohn Baldwin	vpxor	(%esi),%xmm0,%xmm0
988bc3d5698SJohn Baldwin	vpxor	16(%esi),%xmm1,%xmm1
989bc3d5698SJohn Baldwin	vpxor	32(%esi),%xmm2,%xmm2
990bc3d5698SJohn Baldwin	vpxor	48(%esi),%xmm3,%xmm3
991bc3d5698SJohn Baldwin	leal	64(%esi),%esi
992bc3d5698SJohn Baldwin	vmovdqu	%xmm0,(%edi)
993bc3d5698SJohn Baldwin	vmovdqu	%xmm1,16(%edi)
994bc3d5698SJohn Baldwin	vmovdqu	%xmm2,32(%edi)
995bc3d5698SJohn Baldwin	vmovdqu	%xmm3,48(%edi)
996bc3d5698SJohn Baldwin	leal	64(%edi),%edi
997bc3d5698SJohn Baldwin	subl	$64,%ecx
998bc3d5698SJohn Baldwin	jnz	.L019outer1x
999bc3d5698SJohn Baldwin	jmp	.L017done
1000bc3d5698SJohn Baldwin.L020tail:
1001bc3d5698SJohn Baldwin	vmovdqa	%xmm0,(%esp)
1002bc3d5698SJohn Baldwin	vmovdqa	%xmm1,16(%esp)
1003bc3d5698SJohn Baldwin	vmovdqa	%xmm2,32(%esp)
1004bc3d5698SJohn Baldwin	vmovdqa	%xmm3,48(%esp)
1005bc3d5698SJohn Baldwin	xorl	%eax,%eax
1006bc3d5698SJohn Baldwin	xorl	%edx,%edx
1007bc3d5698SJohn Baldwin	xorl	%ebp,%ebp
1008bc3d5698SJohn Baldwin.L021tail_loop:
1009bc3d5698SJohn Baldwin	movb	(%esp,%ebp,1),%al
1010bc3d5698SJohn Baldwin	movb	(%esi,%ebp,1),%dl
1011bc3d5698SJohn Baldwin	leal	1(%ebp),%ebp
1012bc3d5698SJohn Baldwin	xorb	%dl,%al
1013bc3d5698SJohn Baldwin	movb	%al,-1(%edi,%ebp,1)
1014bc3d5698SJohn Baldwin	decl	%ecx
1015bc3d5698SJohn Baldwin	jnz	.L021tail_loop
1016bc3d5698SJohn Baldwin.L017done:
1017bc3d5698SJohn Baldwin	vzeroupper
1018bc3d5698SJohn Baldwin	movl	512(%esp),%esp
1019bc3d5698SJohn Baldwin	popl	%edi
1020bc3d5698SJohn Baldwin	popl	%esi
1021bc3d5698SJohn Baldwin	popl	%ebx
1022bc3d5698SJohn Baldwin	popl	%ebp
1023bc3d5698SJohn Baldwin	ret
1024bc3d5698SJohn Baldwin.size	ChaCha20_xop,.-.L_ChaCha20_xop_begin
1025bc3d5698SJohn Baldwin.comm	OPENSSL_ia32cap_P,16,4
1026*c0855eaaSJohn Baldwin
1027*c0855eaaSJohn Baldwin	.section ".note.gnu.property", "a"
1028*c0855eaaSJohn Baldwin	.p2align 2
1029*c0855eaaSJohn Baldwin	.long 1f - 0f
1030*c0855eaaSJohn Baldwin	.long 4f - 1f
1031*c0855eaaSJohn Baldwin	.long 5
1032*c0855eaaSJohn Baldwin0:
1033*c0855eaaSJohn Baldwin	.asciz "GNU"
1034*c0855eaaSJohn Baldwin1:
1035*c0855eaaSJohn Baldwin	.p2align 2
1036*c0855eaaSJohn Baldwin	.long 0xc0000002
1037*c0855eaaSJohn Baldwin	.long 3f - 2f
1038*c0855eaaSJohn Baldwin2:
1039*c0855eaaSJohn Baldwin	.long 3
1040*c0855eaaSJohn Baldwin3:
1041*c0855eaaSJohn Baldwin	.p2align 2
1042*c0855eaaSJohn Baldwin4:
1043bc3d5698SJohn Baldwin#else
1044bc3d5698SJohn Baldwin.text
1045bc3d5698SJohn Baldwin.globl	ChaCha20_ctr32
1046bc3d5698SJohn Baldwin.type	ChaCha20_ctr32,@function
1047bc3d5698SJohn Baldwin.align	16
1048bc3d5698SJohn BaldwinChaCha20_ctr32:
1049bc3d5698SJohn Baldwin.L_ChaCha20_ctr32_begin:
1050*c0855eaaSJohn Baldwin	#ifdef __CET__
1051*c0855eaaSJohn Baldwin
1052*c0855eaaSJohn Baldwin.byte	243,15,30,251
1053*c0855eaaSJohn Baldwin	#endif
1054*c0855eaaSJohn Baldwin
1055bc3d5698SJohn Baldwin	pushl	%ebp
1056bc3d5698SJohn Baldwin	pushl	%ebx
1057bc3d5698SJohn Baldwin	pushl	%esi
1058bc3d5698SJohn Baldwin	pushl	%edi
1059bc3d5698SJohn Baldwin	xorl	%eax,%eax
1060bc3d5698SJohn Baldwin	cmpl	28(%esp),%eax
1061bc3d5698SJohn Baldwin	je	.L000no_data
1062bc3d5698SJohn Baldwin	call	.Lpic_point
1063bc3d5698SJohn Baldwin.Lpic_point:
1064bc3d5698SJohn Baldwin	popl	%eax
1065bc3d5698SJohn Baldwin	leal	OPENSSL_ia32cap_P,%ebp
1066bc3d5698SJohn Baldwin	testl	$16777216,(%ebp)
1067bc3d5698SJohn Baldwin	jz	.L001x86
1068bc3d5698SJohn Baldwin	testl	$512,4(%ebp)
1069bc3d5698SJohn Baldwin	jz	.L001x86
1070bc3d5698SJohn Baldwin	jmp	.Lssse3_shortcut
1071bc3d5698SJohn Baldwin.L001x86:
1072bc3d5698SJohn Baldwin	movl	32(%esp),%esi
1073bc3d5698SJohn Baldwin	movl	36(%esp),%edi
1074bc3d5698SJohn Baldwin	subl	$132,%esp
1075bc3d5698SJohn Baldwin	movl	(%esi),%eax
1076bc3d5698SJohn Baldwin	movl	4(%esi),%ebx
1077bc3d5698SJohn Baldwin	movl	8(%esi),%ecx
1078bc3d5698SJohn Baldwin	movl	12(%esi),%edx
1079bc3d5698SJohn Baldwin	movl	%eax,80(%esp)
1080bc3d5698SJohn Baldwin	movl	%ebx,84(%esp)
1081bc3d5698SJohn Baldwin	movl	%ecx,88(%esp)
1082bc3d5698SJohn Baldwin	movl	%edx,92(%esp)
1083bc3d5698SJohn Baldwin	movl	16(%esi),%eax
1084bc3d5698SJohn Baldwin	movl	20(%esi),%ebx
1085bc3d5698SJohn Baldwin	movl	24(%esi),%ecx
1086bc3d5698SJohn Baldwin	movl	28(%esi),%edx
1087bc3d5698SJohn Baldwin	movl	%eax,96(%esp)
1088bc3d5698SJohn Baldwin	movl	%ebx,100(%esp)
1089bc3d5698SJohn Baldwin	movl	%ecx,104(%esp)
1090bc3d5698SJohn Baldwin	movl	%edx,108(%esp)
1091bc3d5698SJohn Baldwin	movl	(%edi),%eax
1092bc3d5698SJohn Baldwin	movl	4(%edi),%ebx
1093bc3d5698SJohn Baldwin	movl	8(%edi),%ecx
1094bc3d5698SJohn Baldwin	movl	12(%edi),%edx
1095bc3d5698SJohn Baldwin	subl	$1,%eax
1096bc3d5698SJohn Baldwin	movl	%eax,112(%esp)
1097bc3d5698SJohn Baldwin	movl	%ebx,116(%esp)
1098bc3d5698SJohn Baldwin	movl	%ecx,120(%esp)
1099bc3d5698SJohn Baldwin	movl	%edx,124(%esp)
1100bc3d5698SJohn Baldwin	jmp	.L002entry
1101bc3d5698SJohn Baldwin.align	16
1102bc3d5698SJohn Baldwin.L003outer_loop:
1103bc3d5698SJohn Baldwin	movl	%ebx,156(%esp)
1104bc3d5698SJohn Baldwin	movl	%eax,152(%esp)
1105bc3d5698SJohn Baldwin	movl	%ecx,160(%esp)
1106bc3d5698SJohn Baldwin.L002entry:
1107bc3d5698SJohn Baldwin	movl	$1634760805,%eax
1108bc3d5698SJohn Baldwin	movl	$857760878,4(%esp)
1109bc3d5698SJohn Baldwin	movl	$2036477234,8(%esp)
1110bc3d5698SJohn Baldwin	movl	$1797285236,12(%esp)
1111bc3d5698SJohn Baldwin	movl	84(%esp),%ebx
1112bc3d5698SJohn Baldwin	movl	88(%esp),%ebp
1113bc3d5698SJohn Baldwin	movl	104(%esp),%ecx
1114bc3d5698SJohn Baldwin	movl	108(%esp),%esi
1115bc3d5698SJohn Baldwin	movl	116(%esp),%edx
1116bc3d5698SJohn Baldwin	movl	120(%esp),%edi
1117bc3d5698SJohn Baldwin	movl	%ebx,20(%esp)
1118bc3d5698SJohn Baldwin	movl	%ebp,24(%esp)
1119bc3d5698SJohn Baldwin	movl	%ecx,40(%esp)
1120bc3d5698SJohn Baldwin	movl	%esi,44(%esp)
1121bc3d5698SJohn Baldwin	movl	%edx,52(%esp)
1122bc3d5698SJohn Baldwin	movl	%edi,56(%esp)
1123bc3d5698SJohn Baldwin	movl	92(%esp),%ebx
1124bc3d5698SJohn Baldwin	movl	124(%esp),%edi
1125bc3d5698SJohn Baldwin	movl	112(%esp),%edx
1126bc3d5698SJohn Baldwin	movl	80(%esp),%ebp
1127bc3d5698SJohn Baldwin	movl	96(%esp),%ecx
1128bc3d5698SJohn Baldwin	movl	100(%esp),%esi
1129bc3d5698SJohn Baldwin	addl	$1,%edx
1130bc3d5698SJohn Baldwin	movl	%ebx,28(%esp)
1131bc3d5698SJohn Baldwin	movl	%edi,60(%esp)
1132bc3d5698SJohn Baldwin	movl	%edx,112(%esp)
1133bc3d5698SJohn Baldwin	movl	$10,%ebx
1134bc3d5698SJohn Baldwin	jmp	.L004loop
1135bc3d5698SJohn Baldwin.align	16
1136bc3d5698SJohn Baldwin.L004loop:
1137bc3d5698SJohn Baldwin	addl	%ebp,%eax
1138bc3d5698SJohn Baldwin	movl	%ebx,128(%esp)
1139bc3d5698SJohn Baldwin	movl	%ebp,%ebx
1140bc3d5698SJohn Baldwin	xorl	%eax,%edx
1141bc3d5698SJohn Baldwin	roll	$16,%edx
1142bc3d5698SJohn Baldwin	addl	%edx,%ecx
1143bc3d5698SJohn Baldwin	xorl	%ecx,%ebx
1144bc3d5698SJohn Baldwin	movl	52(%esp),%edi
1145bc3d5698SJohn Baldwin	roll	$12,%ebx
1146bc3d5698SJohn Baldwin	movl	20(%esp),%ebp
1147bc3d5698SJohn Baldwin	addl	%ebx,%eax
1148bc3d5698SJohn Baldwin	xorl	%eax,%edx
1149bc3d5698SJohn Baldwin	movl	%eax,(%esp)
1150bc3d5698SJohn Baldwin	roll	$8,%edx
1151bc3d5698SJohn Baldwin	movl	4(%esp),%eax
1152bc3d5698SJohn Baldwin	addl	%edx,%ecx
1153bc3d5698SJohn Baldwin	movl	%edx,48(%esp)
1154bc3d5698SJohn Baldwin	xorl	%ecx,%ebx
1155bc3d5698SJohn Baldwin	addl	%ebp,%eax
1156bc3d5698SJohn Baldwin	roll	$7,%ebx
1157bc3d5698SJohn Baldwin	xorl	%eax,%edi
1158bc3d5698SJohn Baldwin	movl	%ecx,32(%esp)
1159bc3d5698SJohn Baldwin	roll	$16,%edi
1160bc3d5698SJohn Baldwin	movl	%ebx,16(%esp)
1161bc3d5698SJohn Baldwin	addl	%edi,%esi
1162bc3d5698SJohn Baldwin	movl	40(%esp),%ecx
1163bc3d5698SJohn Baldwin	xorl	%esi,%ebp
1164bc3d5698SJohn Baldwin	movl	56(%esp),%edx
1165bc3d5698SJohn Baldwin	roll	$12,%ebp
1166bc3d5698SJohn Baldwin	movl	24(%esp),%ebx
1167bc3d5698SJohn Baldwin	addl	%ebp,%eax
1168bc3d5698SJohn Baldwin	xorl	%eax,%edi
1169bc3d5698SJohn Baldwin	movl	%eax,4(%esp)
1170bc3d5698SJohn Baldwin	roll	$8,%edi
1171bc3d5698SJohn Baldwin	movl	8(%esp),%eax
1172bc3d5698SJohn Baldwin	addl	%edi,%esi
1173bc3d5698SJohn Baldwin	movl	%edi,52(%esp)
1174bc3d5698SJohn Baldwin	xorl	%esi,%ebp
1175bc3d5698SJohn Baldwin	addl	%ebx,%eax
1176bc3d5698SJohn Baldwin	roll	$7,%ebp
1177bc3d5698SJohn Baldwin	xorl	%eax,%edx
1178bc3d5698SJohn Baldwin	movl	%esi,36(%esp)
1179bc3d5698SJohn Baldwin	roll	$16,%edx
1180bc3d5698SJohn Baldwin	movl	%ebp,20(%esp)
1181bc3d5698SJohn Baldwin	addl	%edx,%ecx
1182bc3d5698SJohn Baldwin	movl	44(%esp),%esi
1183bc3d5698SJohn Baldwin	xorl	%ecx,%ebx
1184bc3d5698SJohn Baldwin	movl	60(%esp),%edi
1185bc3d5698SJohn Baldwin	roll	$12,%ebx
1186bc3d5698SJohn Baldwin	movl	28(%esp),%ebp
1187bc3d5698SJohn Baldwin	addl	%ebx,%eax
1188bc3d5698SJohn Baldwin	xorl	%eax,%edx
1189bc3d5698SJohn Baldwin	movl	%eax,8(%esp)
1190bc3d5698SJohn Baldwin	roll	$8,%edx
1191bc3d5698SJohn Baldwin	movl	12(%esp),%eax
1192bc3d5698SJohn Baldwin	addl	%edx,%ecx
1193bc3d5698SJohn Baldwin	movl	%edx,56(%esp)
1194bc3d5698SJohn Baldwin	xorl	%ecx,%ebx
1195bc3d5698SJohn Baldwin	addl	%ebp,%eax
1196bc3d5698SJohn Baldwin	roll	$7,%ebx
1197bc3d5698SJohn Baldwin	xorl	%eax,%edi
1198bc3d5698SJohn Baldwin	roll	$16,%edi
1199bc3d5698SJohn Baldwin	movl	%ebx,24(%esp)
1200bc3d5698SJohn Baldwin	addl	%edi,%esi
1201bc3d5698SJohn Baldwin	xorl	%esi,%ebp
1202bc3d5698SJohn Baldwin	roll	$12,%ebp
1203bc3d5698SJohn Baldwin	movl	20(%esp),%ebx
1204bc3d5698SJohn Baldwin	addl	%ebp,%eax
1205bc3d5698SJohn Baldwin	xorl	%eax,%edi
1206bc3d5698SJohn Baldwin	movl	%eax,12(%esp)
1207bc3d5698SJohn Baldwin	roll	$8,%edi
1208bc3d5698SJohn Baldwin	movl	(%esp),%eax
1209bc3d5698SJohn Baldwin	addl	%edi,%esi
1210bc3d5698SJohn Baldwin	movl	%edi,%edx
1211bc3d5698SJohn Baldwin	xorl	%esi,%ebp
1212bc3d5698SJohn Baldwin	addl	%ebx,%eax
1213bc3d5698SJohn Baldwin	roll	$7,%ebp
1214bc3d5698SJohn Baldwin	xorl	%eax,%edx
1215bc3d5698SJohn Baldwin	roll	$16,%edx
1216bc3d5698SJohn Baldwin	movl	%ebp,28(%esp)
1217bc3d5698SJohn Baldwin	addl	%edx,%ecx
1218bc3d5698SJohn Baldwin	xorl	%ecx,%ebx
1219bc3d5698SJohn Baldwin	movl	48(%esp),%edi
1220bc3d5698SJohn Baldwin	roll	$12,%ebx
1221bc3d5698SJohn Baldwin	movl	24(%esp),%ebp
1222bc3d5698SJohn Baldwin	addl	%ebx,%eax
1223bc3d5698SJohn Baldwin	xorl	%eax,%edx
1224bc3d5698SJohn Baldwin	movl	%eax,(%esp)
1225bc3d5698SJohn Baldwin	roll	$8,%edx
1226bc3d5698SJohn Baldwin	movl	4(%esp),%eax
1227bc3d5698SJohn Baldwin	addl	%edx,%ecx
1228bc3d5698SJohn Baldwin	movl	%edx,60(%esp)
1229bc3d5698SJohn Baldwin	xorl	%ecx,%ebx
1230bc3d5698SJohn Baldwin	addl	%ebp,%eax
1231bc3d5698SJohn Baldwin	roll	$7,%ebx
1232bc3d5698SJohn Baldwin	xorl	%eax,%edi
1233bc3d5698SJohn Baldwin	movl	%ecx,40(%esp)
1234bc3d5698SJohn Baldwin	roll	$16,%edi
1235bc3d5698SJohn Baldwin	movl	%ebx,20(%esp)
1236bc3d5698SJohn Baldwin	addl	%edi,%esi
1237bc3d5698SJohn Baldwin	movl	32(%esp),%ecx
1238bc3d5698SJohn Baldwin	xorl	%esi,%ebp
1239bc3d5698SJohn Baldwin	movl	52(%esp),%edx
1240bc3d5698SJohn Baldwin	roll	$12,%ebp
1241bc3d5698SJohn Baldwin	movl	28(%esp),%ebx
1242bc3d5698SJohn Baldwin	addl	%ebp,%eax
1243bc3d5698SJohn Baldwin	xorl	%eax,%edi
1244bc3d5698SJohn Baldwin	movl	%eax,4(%esp)
1245bc3d5698SJohn Baldwin	roll	$8,%edi
1246bc3d5698SJohn Baldwin	movl	8(%esp),%eax
1247bc3d5698SJohn Baldwin	addl	%edi,%esi
1248bc3d5698SJohn Baldwin	movl	%edi,48(%esp)
1249bc3d5698SJohn Baldwin	xorl	%esi,%ebp
1250bc3d5698SJohn Baldwin	addl	%ebx,%eax
1251bc3d5698SJohn Baldwin	roll	$7,%ebp
1252bc3d5698SJohn Baldwin	xorl	%eax,%edx
1253bc3d5698SJohn Baldwin	movl	%esi,44(%esp)
1254bc3d5698SJohn Baldwin	roll	$16,%edx
1255bc3d5698SJohn Baldwin	movl	%ebp,24(%esp)
1256bc3d5698SJohn Baldwin	addl	%edx,%ecx
1257bc3d5698SJohn Baldwin	movl	36(%esp),%esi
1258bc3d5698SJohn Baldwin	xorl	%ecx,%ebx
1259bc3d5698SJohn Baldwin	movl	56(%esp),%edi
1260bc3d5698SJohn Baldwin	roll	$12,%ebx
1261bc3d5698SJohn Baldwin	movl	16(%esp),%ebp
1262bc3d5698SJohn Baldwin	addl	%ebx,%eax
1263bc3d5698SJohn Baldwin	xorl	%eax,%edx
1264bc3d5698SJohn Baldwin	movl	%eax,8(%esp)
1265bc3d5698SJohn Baldwin	roll	$8,%edx
1266bc3d5698SJohn Baldwin	movl	12(%esp),%eax
1267bc3d5698SJohn Baldwin	addl	%edx,%ecx
1268bc3d5698SJohn Baldwin	movl	%edx,52(%esp)
1269bc3d5698SJohn Baldwin	xorl	%ecx,%ebx
1270bc3d5698SJohn Baldwin	addl	%ebp,%eax
1271bc3d5698SJohn Baldwin	roll	$7,%ebx
1272bc3d5698SJohn Baldwin	xorl	%eax,%edi
1273bc3d5698SJohn Baldwin	roll	$16,%edi
1274bc3d5698SJohn Baldwin	movl	%ebx,28(%esp)
1275bc3d5698SJohn Baldwin	addl	%edi,%esi
1276bc3d5698SJohn Baldwin	xorl	%esi,%ebp
1277bc3d5698SJohn Baldwin	movl	48(%esp),%edx
1278bc3d5698SJohn Baldwin	roll	$12,%ebp
1279bc3d5698SJohn Baldwin	movl	128(%esp),%ebx
1280bc3d5698SJohn Baldwin	addl	%ebp,%eax
1281bc3d5698SJohn Baldwin	xorl	%eax,%edi
1282bc3d5698SJohn Baldwin	movl	%eax,12(%esp)
1283bc3d5698SJohn Baldwin	roll	$8,%edi
1284bc3d5698SJohn Baldwin	movl	(%esp),%eax
1285bc3d5698SJohn Baldwin	addl	%edi,%esi
1286bc3d5698SJohn Baldwin	movl	%edi,56(%esp)
1287bc3d5698SJohn Baldwin	xorl	%esi,%ebp
1288bc3d5698SJohn Baldwin	roll	$7,%ebp
1289bc3d5698SJohn Baldwin	decl	%ebx
1290bc3d5698SJohn Baldwin	jnz	.L004loop
1291bc3d5698SJohn Baldwin	movl	160(%esp),%ebx
1292bc3d5698SJohn Baldwin	addl	$1634760805,%eax
1293bc3d5698SJohn Baldwin	addl	80(%esp),%ebp
1294bc3d5698SJohn Baldwin	addl	96(%esp),%ecx
1295bc3d5698SJohn Baldwin	addl	100(%esp),%esi
1296bc3d5698SJohn Baldwin	cmpl	$64,%ebx
1297bc3d5698SJohn Baldwin	jb	.L005tail
1298bc3d5698SJohn Baldwin	movl	156(%esp),%ebx
1299bc3d5698SJohn Baldwin	addl	112(%esp),%edx
1300bc3d5698SJohn Baldwin	addl	120(%esp),%edi
1301bc3d5698SJohn Baldwin	xorl	(%ebx),%eax
1302bc3d5698SJohn Baldwin	xorl	16(%ebx),%ebp
1303bc3d5698SJohn Baldwin	movl	%eax,(%esp)
1304bc3d5698SJohn Baldwin	movl	152(%esp),%eax
1305bc3d5698SJohn Baldwin	xorl	32(%ebx),%ecx
1306bc3d5698SJohn Baldwin	xorl	36(%ebx),%esi
1307bc3d5698SJohn Baldwin	xorl	48(%ebx),%edx
1308bc3d5698SJohn Baldwin	xorl	56(%ebx),%edi
1309bc3d5698SJohn Baldwin	movl	%ebp,16(%eax)
1310bc3d5698SJohn Baldwin	movl	%ecx,32(%eax)
1311bc3d5698SJohn Baldwin	movl	%esi,36(%eax)
1312bc3d5698SJohn Baldwin	movl	%edx,48(%eax)
1313bc3d5698SJohn Baldwin	movl	%edi,56(%eax)
1314bc3d5698SJohn Baldwin	movl	4(%esp),%ebp
1315bc3d5698SJohn Baldwin	movl	8(%esp),%ecx
1316bc3d5698SJohn Baldwin	movl	12(%esp),%esi
1317bc3d5698SJohn Baldwin	movl	20(%esp),%edx
1318bc3d5698SJohn Baldwin	movl	24(%esp),%edi
1319bc3d5698SJohn Baldwin	addl	$857760878,%ebp
1320bc3d5698SJohn Baldwin	addl	$2036477234,%ecx
1321bc3d5698SJohn Baldwin	addl	$1797285236,%esi
1322bc3d5698SJohn Baldwin	addl	84(%esp),%edx
1323bc3d5698SJohn Baldwin	addl	88(%esp),%edi
1324bc3d5698SJohn Baldwin	xorl	4(%ebx),%ebp
1325bc3d5698SJohn Baldwin	xorl	8(%ebx),%ecx
1326bc3d5698SJohn Baldwin	xorl	12(%ebx),%esi
1327bc3d5698SJohn Baldwin	xorl	20(%ebx),%edx
1328bc3d5698SJohn Baldwin	xorl	24(%ebx),%edi
1329bc3d5698SJohn Baldwin	movl	%ebp,4(%eax)
1330bc3d5698SJohn Baldwin	movl	%ecx,8(%eax)
1331bc3d5698SJohn Baldwin	movl	%esi,12(%eax)
1332bc3d5698SJohn Baldwin	movl	%edx,20(%eax)
1333bc3d5698SJohn Baldwin	movl	%edi,24(%eax)
1334bc3d5698SJohn Baldwin	movl	28(%esp),%ebp
1335bc3d5698SJohn Baldwin	movl	40(%esp),%ecx
1336bc3d5698SJohn Baldwin	movl	44(%esp),%esi
1337bc3d5698SJohn Baldwin	movl	52(%esp),%edx
1338bc3d5698SJohn Baldwin	movl	60(%esp),%edi
1339bc3d5698SJohn Baldwin	addl	92(%esp),%ebp
1340bc3d5698SJohn Baldwin	addl	104(%esp),%ecx
1341bc3d5698SJohn Baldwin	addl	108(%esp),%esi
1342bc3d5698SJohn Baldwin	addl	116(%esp),%edx
1343bc3d5698SJohn Baldwin	addl	124(%esp),%edi
1344bc3d5698SJohn Baldwin	xorl	28(%ebx),%ebp
1345bc3d5698SJohn Baldwin	xorl	40(%ebx),%ecx
1346bc3d5698SJohn Baldwin	xorl	44(%ebx),%esi
1347bc3d5698SJohn Baldwin	xorl	52(%ebx),%edx
1348bc3d5698SJohn Baldwin	xorl	60(%ebx),%edi
1349bc3d5698SJohn Baldwin	leal	64(%ebx),%ebx
1350bc3d5698SJohn Baldwin	movl	%ebp,28(%eax)
1351bc3d5698SJohn Baldwin	movl	(%esp),%ebp
1352bc3d5698SJohn Baldwin	movl	%ecx,40(%eax)
1353bc3d5698SJohn Baldwin	movl	160(%esp),%ecx
1354bc3d5698SJohn Baldwin	movl	%esi,44(%eax)
1355bc3d5698SJohn Baldwin	movl	%edx,52(%eax)
1356bc3d5698SJohn Baldwin	movl	%edi,60(%eax)
1357bc3d5698SJohn Baldwin	movl	%ebp,(%eax)
1358bc3d5698SJohn Baldwin	leal	64(%eax),%eax
1359bc3d5698SJohn Baldwin	subl	$64,%ecx
1360bc3d5698SJohn Baldwin	jnz	.L003outer_loop
1361bc3d5698SJohn Baldwin	jmp	.L006done
1362bc3d5698SJohn Baldwin.L005tail:
1363bc3d5698SJohn Baldwin	addl	112(%esp),%edx
1364bc3d5698SJohn Baldwin	addl	120(%esp),%edi
1365bc3d5698SJohn Baldwin	movl	%eax,(%esp)
1366bc3d5698SJohn Baldwin	movl	%ebp,16(%esp)
1367bc3d5698SJohn Baldwin	movl	%ecx,32(%esp)
1368bc3d5698SJohn Baldwin	movl	%esi,36(%esp)
1369bc3d5698SJohn Baldwin	movl	%edx,48(%esp)
1370bc3d5698SJohn Baldwin	movl	%edi,56(%esp)
1371bc3d5698SJohn Baldwin	movl	4(%esp),%ebp
1372bc3d5698SJohn Baldwin	movl	8(%esp),%ecx
1373bc3d5698SJohn Baldwin	movl	12(%esp),%esi
1374bc3d5698SJohn Baldwin	movl	20(%esp),%edx
1375bc3d5698SJohn Baldwin	movl	24(%esp),%edi
1376bc3d5698SJohn Baldwin	addl	$857760878,%ebp
1377bc3d5698SJohn Baldwin	addl	$2036477234,%ecx
1378bc3d5698SJohn Baldwin	addl	$1797285236,%esi
1379bc3d5698SJohn Baldwin	addl	84(%esp),%edx
1380bc3d5698SJohn Baldwin	addl	88(%esp),%edi
1381bc3d5698SJohn Baldwin	movl	%ebp,4(%esp)
1382bc3d5698SJohn Baldwin	movl	%ecx,8(%esp)
1383bc3d5698SJohn Baldwin	movl	%esi,12(%esp)
1384bc3d5698SJohn Baldwin	movl	%edx,20(%esp)
1385bc3d5698SJohn Baldwin	movl	%edi,24(%esp)
1386bc3d5698SJohn Baldwin	movl	28(%esp),%ebp
1387bc3d5698SJohn Baldwin	movl	40(%esp),%ecx
1388bc3d5698SJohn Baldwin	movl	44(%esp),%esi
1389bc3d5698SJohn Baldwin	movl	52(%esp),%edx
1390bc3d5698SJohn Baldwin	movl	60(%esp),%edi
1391bc3d5698SJohn Baldwin	addl	92(%esp),%ebp
1392bc3d5698SJohn Baldwin	addl	104(%esp),%ecx
1393bc3d5698SJohn Baldwin	addl	108(%esp),%esi
1394bc3d5698SJohn Baldwin	addl	116(%esp),%edx
1395bc3d5698SJohn Baldwin	addl	124(%esp),%edi
1396bc3d5698SJohn Baldwin	movl	%ebp,28(%esp)
1397bc3d5698SJohn Baldwin	movl	156(%esp),%ebp
1398bc3d5698SJohn Baldwin	movl	%ecx,40(%esp)
1399bc3d5698SJohn Baldwin	movl	152(%esp),%ecx
1400bc3d5698SJohn Baldwin	movl	%esi,44(%esp)
1401bc3d5698SJohn Baldwin	xorl	%esi,%esi
1402bc3d5698SJohn Baldwin	movl	%edx,52(%esp)
1403bc3d5698SJohn Baldwin	movl	%edi,60(%esp)
1404bc3d5698SJohn Baldwin	xorl	%eax,%eax
1405bc3d5698SJohn Baldwin	xorl	%edx,%edx
1406bc3d5698SJohn Baldwin.L007tail_loop:
1407bc3d5698SJohn Baldwin	movb	(%esi,%ebp,1),%al
1408bc3d5698SJohn Baldwin	movb	(%esp,%esi,1),%dl
1409bc3d5698SJohn Baldwin	leal	1(%esi),%esi
1410bc3d5698SJohn Baldwin	xorb	%dl,%al
1411bc3d5698SJohn Baldwin	movb	%al,-1(%ecx,%esi,1)
1412bc3d5698SJohn Baldwin	decl	%ebx
1413bc3d5698SJohn Baldwin	jnz	.L007tail_loop
1414bc3d5698SJohn Baldwin.L006done:
1415bc3d5698SJohn Baldwin	addl	$132,%esp
1416bc3d5698SJohn Baldwin.L000no_data:
1417bc3d5698SJohn Baldwin	popl	%edi
1418bc3d5698SJohn Baldwin	popl	%esi
1419bc3d5698SJohn Baldwin	popl	%ebx
1420bc3d5698SJohn Baldwin	popl	%ebp
1421bc3d5698SJohn Baldwin	ret
1422bc3d5698SJohn Baldwin.size	ChaCha20_ctr32,.-.L_ChaCha20_ctr32_begin
1423bc3d5698SJohn Baldwin.globl	ChaCha20_ssse3
1424bc3d5698SJohn Baldwin.type	ChaCha20_ssse3,@function
1425bc3d5698SJohn Baldwin.align	16
1426bc3d5698SJohn BaldwinChaCha20_ssse3:
1427bc3d5698SJohn Baldwin.L_ChaCha20_ssse3_begin:
1428*c0855eaaSJohn Baldwin	#ifdef __CET__
1429*c0855eaaSJohn Baldwin
1430*c0855eaaSJohn Baldwin.byte	243,15,30,251
1431*c0855eaaSJohn Baldwin	#endif
1432*c0855eaaSJohn Baldwin
1433bc3d5698SJohn Baldwin	pushl	%ebp
1434bc3d5698SJohn Baldwin	pushl	%ebx
1435bc3d5698SJohn Baldwin	pushl	%esi
1436bc3d5698SJohn Baldwin	pushl	%edi
1437bc3d5698SJohn Baldwin.Lssse3_shortcut:
1438bc3d5698SJohn Baldwin	testl	$2048,4(%ebp)
1439bc3d5698SJohn Baldwin	jnz	.Lxop_shortcut
1440bc3d5698SJohn Baldwin	movl	20(%esp),%edi
1441bc3d5698SJohn Baldwin	movl	24(%esp),%esi
1442bc3d5698SJohn Baldwin	movl	28(%esp),%ecx
1443bc3d5698SJohn Baldwin	movl	32(%esp),%edx
1444bc3d5698SJohn Baldwin	movl	36(%esp),%ebx
1445bc3d5698SJohn Baldwin	movl	%esp,%ebp
1446bc3d5698SJohn Baldwin	subl	$524,%esp
1447bc3d5698SJohn Baldwin	andl	$-64,%esp
1448bc3d5698SJohn Baldwin	movl	%ebp,512(%esp)
1449bc3d5698SJohn Baldwin	leal	.Lssse3_data-.Lpic_point(%eax),%eax
1450bc3d5698SJohn Baldwin	movdqu	(%ebx),%xmm3
1451bc3d5698SJohn Baldwin.L0081x:
1452bc3d5698SJohn Baldwin	movdqa	32(%eax),%xmm0
1453bc3d5698SJohn Baldwin	movdqu	(%edx),%xmm1
1454bc3d5698SJohn Baldwin	movdqu	16(%edx),%xmm2
1455bc3d5698SJohn Baldwin	movdqa	(%eax),%xmm6
1456bc3d5698SJohn Baldwin	movdqa	16(%eax),%xmm7
1457bc3d5698SJohn Baldwin	movl	%ebp,48(%esp)
1458bc3d5698SJohn Baldwin	movdqa	%xmm0,(%esp)
1459bc3d5698SJohn Baldwin	movdqa	%xmm1,16(%esp)
1460bc3d5698SJohn Baldwin	movdqa	%xmm2,32(%esp)
1461bc3d5698SJohn Baldwin	movdqa	%xmm3,48(%esp)
1462bc3d5698SJohn Baldwin	movl	$10,%edx
1463bc3d5698SJohn Baldwin	jmp	.L009loop1x
1464bc3d5698SJohn Baldwin.align	16
1465bc3d5698SJohn Baldwin.L010outer1x:
1466bc3d5698SJohn Baldwin	movdqa	80(%eax),%xmm3
1467bc3d5698SJohn Baldwin	movdqa	(%esp),%xmm0
1468bc3d5698SJohn Baldwin	movdqa	16(%esp),%xmm1
1469bc3d5698SJohn Baldwin	movdqa	32(%esp),%xmm2
1470bc3d5698SJohn Baldwin	paddd	48(%esp),%xmm3
1471bc3d5698SJohn Baldwin	movl	$10,%edx
1472bc3d5698SJohn Baldwin	movdqa	%xmm3,48(%esp)
1473bc3d5698SJohn Baldwin	jmp	.L009loop1x
1474bc3d5698SJohn Baldwin.align	16
1475bc3d5698SJohn Baldwin.L009loop1x:
1476bc3d5698SJohn Baldwin	paddd	%xmm1,%xmm0
1477bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm3
1478bc3d5698SJohn Baldwin.byte	102,15,56,0,222
1479bc3d5698SJohn Baldwin	paddd	%xmm3,%xmm2
1480bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm1
1481bc3d5698SJohn Baldwin	movdqa	%xmm1,%xmm4
1482bc3d5698SJohn Baldwin	psrld	$20,%xmm1
1483bc3d5698SJohn Baldwin	pslld	$12,%xmm4
1484bc3d5698SJohn Baldwin	por	%xmm4,%xmm1
1485bc3d5698SJohn Baldwin	paddd	%xmm1,%xmm0
1486bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm3
1487bc3d5698SJohn Baldwin.byte	102,15,56,0,223
1488bc3d5698SJohn Baldwin	paddd	%xmm3,%xmm2
1489bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm1
1490bc3d5698SJohn Baldwin	movdqa	%xmm1,%xmm4
1491bc3d5698SJohn Baldwin	psrld	$25,%xmm1
1492bc3d5698SJohn Baldwin	pslld	$7,%xmm4
1493bc3d5698SJohn Baldwin	por	%xmm4,%xmm1
1494bc3d5698SJohn Baldwin	pshufd	$78,%xmm2,%xmm2
1495bc3d5698SJohn Baldwin	pshufd	$57,%xmm1,%xmm1
1496bc3d5698SJohn Baldwin	pshufd	$147,%xmm3,%xmm3
1497bc3d5698SJohn Baldwin	nop
1498bc3d5698SJohn Baldwin	paddd	%xmm1,%xmm0
1499bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm3
1500bc3d5698SJohn Baldwin.byte	102,15,56,0,222
1501bc3d5698SJohn Baldwin	paddd	%xmm3,%xmm2
1502bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm1
1503bc3d5698SJohn Baldwin	movdqa	%xmm1,%xmm4
1504bc3d5698SJohn Baldwin	psrld	$20,%xmm1
1505bc3d5698SJohn Baldwin	pslld	$12,%xmm4
1506bc3d5698SJohn Baldwin	por	%xmm4,%xmm1
1507bc3d5698SJohn Baldwin	paddd	%xmm1,%xmm0
1508bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm3
1509bc3d5698SJohn Baldwin.byte	102,15,56,0,223
1510bc3d5698SJohn Baldwin	paddd	%xmm3,%xmm2
1511bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm1
1512bc3d5698SJohn Baldwin	movdqa	%xmm1,%xmm4
1513bc3d5698SJohn Baldwin	psrld	$25,%xmm1
1514bc3d5698SJohn Baldwin	pslld	$7,%xmm4
1515bc3d5698SJohn Baldwin	por	%xmm4,%xmm1
1516bc3d5698SJohn Baldwin	pshufd	$78,%xmm2,%xmm2
1517bc3d5698SJohn Baldwin	pshufd	$147,%xmm1,%xmm1
1518bc3d5698SJohn Baldwin	pshufd	$57,%xmm3,%xmm3
1519bc3d5698SJohn Baldwin	decl	%edx
1520bc3d5698SJohn Baldwin	jnz	.L009loop1x
1521bc3d5698SJohn Baldwin	paddd	(%esp),%xmm0
1522bc3d5698SJohn Baldwin	paddd	16(%esp),%xmm1
1523bc3d5698SJohn Baldwin	paddd	32(%esp),%xmm2
1524bc3d5698SJohn Baldwin	paddd	48(%esp),%xmm3
1525bc3d5698SJohn Baldwin	cmpl	$64,%ecx
1526bc3d5698SJohn Baldwin	jb	.L011tail
1527bc3d5698SJohn Baldwin	movdqu	(%esi),%xmm4
1528bc3d5698SJohn Baldwin	movdqu	16(%esi),%xmm5
1529bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm0
1530bc3d5698SJohn Baldwin	movdqu	32(%esi),%xmm4
1531bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm1
1532bc3d5698SJohn Baldwin	movdqu	48(%esi),%xmm5
1533bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm2
1534bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm3
1535bc3d5698SJohn Baldwin	leal	64(%esi),%esi
1536bc3d5698SJohn Baldwin	movdqu	%xmm0,(%edi)
1537bc3d5698SJohn Baldwin	movdqu	%xmm1,16(%edi)
1538bc3d5698SJohn Baldwin	movdqu	%xmm2,32(%edi)
1539bc3d5698SJohn Baldwin	movdqu	%xmm3,48(%edi)
1540bc3d5698SJohn Baldwin	leal	64(%edi),%edi
1541bc3d5698SJohn Baldwin	subl	$64,%ecx
1542bc3d5698SJohn Baldwin	jnz	.L010outer1x
1543bc3d5698SJohn Baldwin	jmp	.L012done
1544bc3d5698SJohn Baldwin.L011tail:
1545bc3d5698SJohn Baldwin	movdqa	%xmm0,(%esp)
1546bc3d5698SJohn Baldwin	movdqa	%xmm1,16(%esp)
1547bc3d5698SJohn Baldwin	movdqa	%xmm2,32(%esp)
1548bc3d5698SJohn Baldwin	movdqa	%xmm3,48(%esp)
1549bc3d5698SJohn Baldwin	xorl	%eax,%eax
1550bc3d5698SJohn Baldwin	xorl	%edx,%edx
1551bc3d5698SJohn Baldwin	xorl	%ebp,%ebp
1552bc3d5698SJohn Baldwin.L013tail_loop:
1553bc3d5698SJohn Baldwin	movb	(%esp,%ebp,1),%al
1554bc3d5698SJohn Baldwin	movb	(%esi,%ebp,1),%dl
1555bc3d5698SJohn Baldwin	leal	1(%ebp),%ebp
1556bc3d5698SJohn Baldwin	xorb	%dl,%al
1557bc3d5698SJohn Baldwin	movb	%al,-1(%edi,%ebp,1)
1558bc3d5698SJohn Baldwin	decl	%ecx
1559bc3d5698SJohn Baldwin	jnz	.L013tail_loop
1560bc3d5698SJohn Baldwin.L012done:
1561bc3d5698SJohn Baldwin	movl	512(%esp),%esp
1562bc3d5698SJohn Baldwin	popl	%edi
1563bc3d5698SJohn Baldwin	popl	%esi
1564bc3d5698SJohn Baldwin	popl	%ebx
1565bc3d5698SJohn Baldwin	popl	%ebp
1566bc3d5698SJohn Baldwin	ret
1567bc3d5698SJohn Baldwin.size	ChaCha20_ssse3,.-.L_ChaCha20_ssse3_begin
1568bc3d5698SJohn Baldwin.align	64
1569bc3d5698SJohn Baldwin.Lssse3_data:
1570bc3d5698SJohn Baldwin.byte	2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13
1571bc3d5698SJohn Baldwin.byte	3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14
1572bc3d5698SJohn Baldwin.long	1634760805,857760878,2036477234,1797285236
1573bc3d5698SJohn Baldwin.long	0,1,2,3
1574bc3d5698SJohn Baldwin.long	4,4,4,4
1575bc3d5698SJohn Baldwin.long	1,0,0,0
1576bc3d5698SJohn Baldwin.long	4,0,0,0
1577bc3d5698SJohn Baldwin.long	0,-1,-1,-1
1578bc3d5698SJohn Baldwin.align	64
1579bc3d5698SJohn Baldwin.byte	67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54
1580bc3d5698SJohn Baldwin.byte	44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
1581bc3d5698SJohn Baldwin.byte	60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
1582bc3d5698SJohn Baldwin.byte	114,103,62,0
1583bc3d5698SJohn Baldwin.globl	ChaCha20_xop
1584bc3d5698SJohn Baldwin.type	ChaCha20_xop,@function
1585bc3d5698SJohn Baldwin.align	16
1586bc3d5698SJohn BaldwinChaCha20_xop:
1587bc3d5698SJohn Baldwin.L_ChaCha20_xop_begin:
1588*c0855eaaSJohn Baldwin	#ifdef __CET__
1589*c0855eaaSJohn Baldwin
1590*c0855eaaSJohn Baldwin.byte	243,15,30,251
1591*c0855eaaSJohn Baldwin	#endif
1592*c0855eaaSJohn Baldwin
1593bc3d5698SJohn Baldwin	pushl	%ebp
1594bc3d5698SJohn Baldwin	pushl	%ebx
1595bc3d5698SJohn Baldwin	pushl	%esi
1596bc3d5698SJohn Baldwin	pushl	%edi
1597bc3d5698SJohn Baldwin.Lxop_shortcut:
1598bc3d5698SJohn Baldwin	movl	20(%esp),%edi
1599bc3d5698SJohn Baldwin	movl	24(%esp),%esi
1600bc3d5698SJohn Baldwin	movl	28(%esp),%ecx
1601bc3d5698SJohn Baldwin	movl	32(%esp),%edx
1602bc3d5698SJohn Baldwin	movl	36(%esp),%ebx
1603bc3d5698SJohn Baldwin	vzeroupper
1604bc3d5698SJohn Baldwin	movl	%esp,%ebp
1605bc3d5698SJohn Baldwin	subl	$524,%esp
1606bc3d5698SJohn Baldwin	andl	$-64,%esp
1607bc3d5698SJohn Baldwin	movl	%ebp,512(%esp)
1608bc3d5698SJohn Baldwin	leal	.Lssse3_data-.Lpic_point(%eax),%eax
1609bc3d5698SJohn Baldwin	vmovdqu	(%ebx),%xmm3
1610bc3d5698SJohn Baldwin	cmpl	$256,%ecx
1611bc3d5698SJohn Baldwin	jb	.L0141x
1612bc3d5698SJohn Baldwin	movl	%edx,516(%esp)
1613bc3d5698SJohn Baldwin	movl	%ebx,520(%esp)
1614bc3d5698SJohn Baldwin	subl	$256,%ecx
1615bc3d5698SJohn Baldwin	leal	384(%esp),%ebp
1616bc3d5698SJohn Baldwin	vmovdqu	(%edx),%xmm7
1617bc3d5698SJohn Baldwin	vpshufd	$0,%xmm3,%xmm0
1618bc3d5698SJohn Baldwin	vpshufd	$85,%xmm3,%xmm1
1619bc3d5698SJohn Baldwin	vpshufd	$170,%xmm3,%xmm2
1620bc3d5698SJohn Baldwin	vpshufd	$255,%xmm3,%xmm3
1621bc3d5698SJohn Baldwin	vpaddd	48(%eax),%xmm0,%xmm0
1622bc3d5698SJohn Baldwin	vpshufd	$0,%xmm7,%xmm4
1623bc3d5698SJohn Baldwin	vpshufd	$85,%xmm7,%xmm5
1624bc3d5698SJohn Baldwin	vpsubd	64(%eax),%xmm0,%xmm0
1625bc3d5698SJohn Baldwin	vpshufd	$170,%xmm7,%xmm6
1626bc3d5698SJohn Baldwin	vpshufd	$255,%xmm7,%xmm7
1627bc3d5698SJohn Baldwin	vmovdqa	%xmm0,64(%ebp)
1628bc3d5698SJohn Baldwin	vmovdqa	%xmm1,80(%ebp)
1629bc3d5698SJohn Baldwin	vmovdqa	%xmm2,96(%ebp)
1630bc3d5698SJohn Baldwin	vmovdqa	%xmm3,112(%ebp)
1631bc3d5698SJohn Baldwin	vmovdqu	16(%edx),%xmm3
1632bc3d5698SJohn Baldwin	vmovdqa	%xmm4,-64(%ebp)
1633bc3d5698SJohn Baldwin	vmovdqa	%xmm5,-48(%ebp)
1634bc3d5698SJohn Baldwin	vmovdqa	%xmm6,-32(%ebp)
1635bc3d5698SJohn Baldwin	vmovdqa	%xmm7,-16(%ebp)
1636bc3d5698SJohn Baldwin	vmovdqa	32(%eax),%xmm7
1637bc3d5698SJohn Baldwin	leal	128(%esp),%ebx
1638bc3d5698SJohn Baldwin	vpshufd	$0,%xmm3,%xmm0
1639bc3d5698SJohn Baldwin	vpshufd	$85,%xmm3,%xmm1
1640bc3d5698SJohn Baldwin	vpshufd	$170,%xmm3,%xmm2
1641bc3d5698SJohn Baldwin	vpshufd	$255,%xmm3,%xmm3
1642bc3d5698SJohn Baldwin	vpshufd	$0,%xmm7,%xmm4
1643bc3d5698SJohn Baldwin	vpshufd	$85,%xmm7,%xmm5
1644bc3d5698SJohn Baldwin	vpshufd	$170,%xmm7,%xmm6
1645bc3d5698SJohn Baldwin	vpshufd	$255,%xmm7,%xmm7
1646bc3d5698SJohn Baldwin	vmovdqa	%xmm0,(%ebp)
1647bc3d5698SJohn Baldwin	vmovdqa	%xmm1,16(%ebp)
1648bc3d5698SJohn Baldwin	vmovdqa	%xmm2,32(%ebp)
1649bc3d5698SJohn Baldwin	vmovdqa	%xmm3,48(%ebp)
1650bc3d5698SJohn Baldwin	vmovdqa	%xmm4,-128(%ebp)
1651bc3d5698SJohn Baldwin	vmovdqa	%xmm5,-112(%ebp)
1652bc3d5698SJohn Baldwin	vmovdqa	%xmm6,-96(%ebp)
1653bc3d5698SJohn Baldwin	vmovdqa	%xmm7,-80(%ebp)
1654bc3d5698SJohn Baldwin	leal	128(%esi),%esi
1655bc3d5698SJohn Baldwin	leal	128(%edi),%edi
1656bc3d5698SJohn Baldwin	jmp	.L015outer_loop
1657bc3d5698SJohn Baldwin.align	32
1658bc3d5698SJohn Baldwin.L015outer_loop:
1659bc3d5698SJohn Baldwin	vmovdqa	-112(%ebp),%xmm1
1660bc3d5698SJohn Baldwin	vmovdqa	-96(%ebp),%xmm2
1661bc3d5698SJohn Baldwin	vmovdqa	-80(%ebp),%xmm3
1662bc3d5698SJohn Baldwin	vmovdqa	-48(%ebp),%xmm5
1663bc3d5698SJohn Baldwin	vmovdqa	-32(%ebp),%xmm6
1664bc3d5698SJohn Baldwin	vmovdqa	-16(%ebp),%xmm7
1665bc3d5698SJohn Baldwin	vmovdqa	%xmm1,-112(%ebx)
1666bc3d5698SJohn Baldwin	vmovdqa	%xmm2,-96(%ebx)
1667bc3d5698SJohn Baldwin	vmovdqa	%xmm3,-80(%ebx)
1668bc3d5698SJohn Baldwin	vmovdqa	%xmm5,-48(%ebx)
1669bc3d5698SJohn Baldwin	vmovdqa	%xmm6,-32(%ebx)
1670bc3d5698SJohn Baldwin	vmovdqa	%xmm7,-16(%ebx)
1671bc3d5698SJohn Baldwin	vmovdqa	32(%ebp),%xmm2
1672bc3d5698SJohn Baldwin	vmovdqa	48(%ebp),%xmm3
1673bc3d5698SJohn Baldwin	vmovdqa	64(%ebp),%xmm4
1674bc3d5698SJohn Baldwin	vmovdqa	80(%ebp),%xmm5
1675bc3d5698SJohn Baldwin	vmovdqa	96(%ebp),%xmm6
1676bc3d5698SJohn Baldwin	vmovdqa	112(%ebp),%xmm7
1677bc3d5698SJohn Baldwin	vpaddd	64(%eax),%xmm4,%xmm4
1678bc3d5698SJohn Baldwin	vmovdqa	%xmm2,32(%ebx)
1679bc3d5698SJohn Baldwin	vmovdqa	%xmm3,48(%ebx)
1680bc3d5698SJohn Baldwin	vmovdqa	%xmm4,64(%ebx)
1681bc3d5698SJohn Baldwin	vmovdqa	%xmm5,80(%ebx)
1682bc3d5698SJohn Baldwin	vmovdqa	%xmm6,96(%ebx)
1683bc3d5698SJohn Baldwin	vmovdqa	%xmm7,112(%ebx)
1684bc3d5698SJohn Baldwin	vmovdqa	%xmm4,64(%ebp)
1685bc3d5698SJohn Baldwin	vmovdqa	-128(%ebp),%xmm0
1686bc3d5698SJohn Baldwin	vmovdqa	%xmm4,%xmm6
1687bc3d5698SJohn Baldwin	vmovdqa	-64(%ebp),%xmm3
1688bc3d5698SJohn Baldwin	vmovdqa	(%ebp),%xmm4
1689bc3d5698SJohn Baldwin	vmovdqa	16(%ebp),%xmm5
1690bc3d5698SJohn Baldwin	movl	$10,%edx
1691bc3d5698SJohn Baldwin	nop
1692bc3d5698SJohn Baldwin.align	32
1693bc3d5698SJohn Baldwin.L016loop:
1694bc3d5698SJohn Baldwin	vpaddd	%xmm3,%xmm0,%xmm0
1695bc3d5698SJohn Baldwin	vpxor	%xmm0,%xmm6,%xmm6
1696bc3d5698SJohn Baldwin.byte	143,232,120,194,246,16
1697bc3d5698SJohn Baldwin	vpaddd	%xmm6,%xmm4,%xmm4
1698bc3d5698SJohn Baldwin	vpxor	%xmm4,%xmm3,%xmm2
1699bc3d5698SJohn Baldwin	vmovdqa	-112(%ebx),%xmm1
1700bc3d5698SJohn Baldwin.byte	143,232,120,194,210,12
1701bc3d5698SJohn Baldwin	vmovdqa	-48(%ebx),%xmm3
1702bc3d5698SJohn Baldwin	vpaddd	%xmm2,%xmm0,%xmm0
1703bc3d5698SJohn Baldwin	vmovdqa	80(%ebx),%xmm7
1704bc3d5698SJohn Baldwin	vpxor	%xmm0,%xmm6,%xmm6
1705bc3d5698SJohn Baldwin	vpaddd	%xmm3,%xmm1,%xmm1
1706bc3d5698SJohn Baldwin.byte	143,232,120,194,246,8
1707bc3d5698SJohn Baldwin	vmovdqa	%xmm0,-128(%ebx)
1708bc3d5698SJohn Baldwin	vpaddd	%xmm6,%xmm4,%xmm4
1709bc3d5698SJohn Baldwin	vmovdqa	%xmm6,64(%ebx)
1710bc3d5698SJohn Baldwin	vpxor	%xmm4,%xmm2,%xmm2
1711bc3d5698SJohn Baldwin	vpxor	%xmm1,%xmm7,%xmm7
1712bc3d5698SJohn Baldwin.byte	143,232,120,194,210,7
1713bc3d5698SJohn Baldwin	vmovdqa	%xmm4,(%ebx)
1714bc3d5698SJohn Baldwin.byte	143,232,120,194,255,16
1715bc3d5698SJohn Baldwin	vmovdqa	%xmm2,-64(%ebx)
1716bc3d5698SJohn Baldwin	vpaddd	%xmm7,%xmm5,%xmm5
1717bc3d5698SJohn Baldwin	vmovdqa	32(%ebx),%xmm4
1718bc3d5698SJohn Baldwin	vpxor	%xmm5,%xmm3,%xmm3
1719bc3d5698SJohn Baldwin	vmovdqa	-96(%ebx),%xmm0
1720bc3d5698SJohn Baldwin.byte	143,232,120,194,219,12
1721bc3d5698SJohn Baldwin	vmovdqa	-32(%ebx),%xmm2
1722bc3d5698SJohn Baldwin	vpaddd	%xmm3,%xmm1,%xmm1
1723bc3d5698SJohn Baldwin	vmovdqa	96(%ebx),%xmm6
1724bc3d5698SJohn Baldwin	vpxor	%xmm1,%xmm7,%xmm7
1725bc3d5698SJohn Baldwin	vpaddd	%xmm2,%xmm0,%xmm0
1726bc3d5698SJohn Baldwin.byte	143,232,120,194,255,8
1727bc3d5698SJohn Baldwin	vmovdqa	%xmm1,-112(%ebx)
1728bc3d5698SJohn Baldwin	vpaddd	%xmm7,%xmm5,%xmm5
1729bc3d5698SJohn Baldwin	vmovdqa	%xmm7,80(%ebx)
1730bc3d5698SJohn Baldwin	vpxor	%xmm5,%xmm3,%xmm3
1731bc3d5698SJohn Baldwin	vpxor	%xmm0,%xmm6,%xmm6
1732bc3d5698SJohn Baldwin.byte	143,232,120,194,219,7
1733bc3d5698SJohn Baldwin	vmovdqa	%xmm5,16(%ebx)
1734bc3d5698SJohn Baldwin.byte	143,232,120,194,246,16
1735bc3d5698SJohn Baldwin	vmovdqa	%xmm3,-48(%ebx)
1736bc3d5698SJohn Baldwin	vpaddd	%xmm6,%xmm4,%xmm4
1737bc3d5698SJohn Baldwin	vmovdqa	48(%ebx),%xmm5
1738bc3d5698SJohn Baldwin	vpxor	%xmm4,%xmm2,%xmm2
1739bc3d5698SJohn Baldwin	vmovdqa	-80(%ebx),%xmm1
1740bc3d5698SJohn Baldwin.byte	143,232,120,194,210,12
1741bc3d5698SJohn Baldwin	vmovdqa	-16(%ebx),%xmm3
1742bc3d5698SJohn Baldwin	vpaddd	%xmm2,%xmm0,%xmm0
1743bc3d5698SJohn Baldwin	vmovdqa	112(%ebx),%xmm7
1744bc3d5698SJohn Baldwin	vpxor	%xmm0,%xmm6,%xmm6
1745bc3d5698SJohn Baldwin	vpaddd	%xmm3,%xmm1,%xmm1
1746bc3d5698SJohn Baldwin.byte	143,232,120,194,246,8
1747bc3d5698SJohn Baldwin	vmovdqa	%xmm0,-96(%ebx)
1748bc3d5698SJohn Baldwin	vpaddd	%xmm6,%xmm4,%xmm4
1749bc3d5698SJohn Baldwin	vmovdqa	%xmm6,96(%ebx)
1750bc3d5698SJohn Baldwin	vpxor	%xmm4,%xmm2,%xmm2
1751bc3d5698SJohn Baldwin	vpxor	%xmm1,%xmm7,%xmm7
1752bc3d5698SJohn Baldwin.byte	143,232,120,194,210,7
1753bc3d5698SJohn Baldwin.byte	143,232,120,194,255,16
1754bc3d5698SJohn Baldwin	vmovdqa	%xmm2,-32(%ebx)
1755bc3d5698SJohn Baldwin	vpaddd	%xmm7,%xmm5,%xmm5
1756bc3d5698SJohn Baldwin	vpxor	%xmm5,%xmm3,%xmm3
1757bc3d5698SJohn Baldwin	vmovdqa	-128(%ebx),%xmm0
1758bc3d5698SJohn Baldwin.byte	143,232,120,194,219,12
1759bc3d5698SJohn Baldwin	vmovdqa	-48(%ebx),%xmm2
1760bc3d5698SJohn Baldwin	vpaddd	%xmm3,%xmm1,%xmm1
1761bc3d5698SJohn Baldwin	vpxor	%xmm1,%xmm7,%xmm7
1762bc3d5698SJohn Baldwin	vpaddd	%xmm2,%xmm0,%xmm0
1763bc3d5698SJohn Baldwin.byte	143,232,120,194,255,8
1764bc3d5698SJohn Baldwin	vmovdqa	%xmm1,-80(%ebx)
1765bc3d5698SJohn Baldwin	vpaddd	%xmm7,%xmm5,%xmm5
1766bc3d5698SJohn Baldwin	vpxor	%xmm5,%xmm3,%xmm3
1767bc3d5698SJohn Baldwin	vpxor	%xmm0,%xmm7,%xmm6
1768bc3d5698SJohn Baldwin.byte	143,232,120,194,219,7
1769bc3d5698SJohn Baldwin.byte	143,232,120,194,246,16
1770bc3d5698SJohn Baldwin	vmovdqa	%xmm3,-16(%ebx)
1771bc3d5698SJohn Baldwin	vpaddd	%xmm6,%xmm4,%xmm4
1772bc3d5698SJohn Baldwin	vpxor	%xmm4,%xmm2,%xmm2
1773bc3d5698SJohn Baldwin	vmovdqa	-112(%ebx),%xmm1
1774bc3d5698SJohn Baldwin.byte	143,232,120,194,210,12
1775bc3d5698SJohn Baldwin	vmovdqa	-32(%ebx),%xmm3
1776bc3d5698SJohn Baldwin	vpaddd	%xmm2,%xmm0,%xmm0
1777bc3d5698SJohn Baldwin	vmovdqa	64(%ebx),%xmm7
1778bc3d5698SJohn Baldwin	vpxor	%xmm0,%xmm6,%xmm6
1779bc3d5698SJohn Baldwin	vpaddd	%xmm3,%xmm1,%xmm1
1780bc3d5698SJohn Baldwin.byte	143,232,120,194,246,8
1781bc3d5698SJohn Baldwin	vmovdqa	%xmm0,-128(%ebx)
1782bc3d5698SJohn Baldwin	vpaddd	%xmm6,%xmm4,%xmm4
1783bc3d5698SJohn Baldwin	vmovdqa	%xmm6,112(%ebx)
1784bc3d5698SJohn Baldwin	vpxor	%xmm4,%xmm2,%xmm2
1785bc3d5698SJohn Baldwin	vpxor	%xmm1,%xmm7,%xmm7
1786bc3d5698SJohn Baldwin.byte	143,232,120,194,210,7
1787bc3d5698SJohn Baldwin	vmovdqa	%xmm4,32(%ebx)
1788bc3d5698SJohn Baldwin.byte	143,232,120,194,255,16
1789bc3d5698SJohn Baldwin	vmovdqa	%xmm2,-48(%ebx)
1790bc3d5698SJohn Baldwin	vpaddd	%xmm7,%xmm5,%xmm5
1791bc3d5698SJohn Baldwin	vmovdqa	(%ebx),%xmm4
1792bc3d5698SJohn Baldwin	vpxor	%xmm5,%xmm3,%xmm3
1793bc3d5698SJohn Baldwin	vmovdqa	-96(%ebx),%xmm0
1794bc3d5698SJohn Baldwin.byte	143,232,120,194,219,12
1795bc3d5698SJohn Baldwin	vmovdqa	-16(%ebx),%xmm2
1796bc3d5698SJohn Baldwin	vpaddd	%xmm3,%xmm1,%xmm1
1797bc3d5698SJohn Baldwin	vmovdqa	80(%ebx),%xmm6
1798bc3d5698SJohn Baldwin	vpxor	%xmm1,%xmm7,%xmm7
1799bc3d5698SJohn Baldwin	vpaddd	%xmm2,%xmm0,%xmm0
1800bc3d5698SJohn Baldwin.byte	143,232,120,194,255,8
1801bc3d5698SJohn Baldwin	vmovdqa	%xmm1,-112(%ebx)
1802bc3d5698SJohn Baldwin	vpaddd	%xmm7,%xmm5,%xmm5
1803bc3d5698SJohn Baldwin	vmovdqa	%xmm7,64(%ebx)
1804bc3d5698SJohn Baldwin	vpxor	%xmm5,%xmm3,%xmm3
1805bc3d5698SJohn Baldwin	vpxor	%xmm0,%xmm6,%xmm6
1806bc3d5698SJohn Baldwin.byte	143,232,120,194,219,7
1807bc3d5698SJohn Baldwin	vmovdqa	%xmm5,48(%ebx)
1808bc3d5698SJohn Baldwin.byte	143,232,120,194,246,16
1809bc3d5698SJohn Baldwin	vmovdqa	%xmm3,-32(%ebx)
1810bc3d5698SJohn Baldwin	vpaddd	%xmm6,%xmm4,%xmm4
1811bc3d5698SJohn Baldwin	vmovdqa	16(%ebx),%xmm5
1812bc3d5698SJohn Baldwin	vpxor	%xmm4,%xmm2,%xmm2
1813bc3d5698SJohn Baldwin	vmovdqa	-80(%ebx),%xmm1
1814bc3d5698SJohn Baldwin.byte	143,232,120,194,210,12
1815bc3d5698SJohn Baldwin	vmovdqa	-64(%ebx),%xmm3
1816bc3d5698SJohn Baldwin	vpaddd	%xmm2,%xmm0,%xmm0
1817bc3d5698SJohn Baldwin	vmovdqa	96(%ebx),%xmm7
1818bc3d5698SJohn Baldwin	vpxor	%xmm0,%xmm6,%xmm6
1819bc3d5698SJohn Baldwin	vpaddd	%xmm3,%xmm1,%xmm1
1820bc3d5698SJohn Baldwin.byte	143,232,120,194,246,8
1821bc3d5698SJohn Baldwin	vmovdqa	%xmm0,-96(%ebx)
1822bc3d5698SJohn Baldwin	vpaddd	%xmm6,%xmm4,%xmm4
1823bc3d5698SJohn Baldwin	vmovdqa	%xmm6,80(%ebx)
1824bc3d5698SJohn Baldwin	vpxor	%xmm4,%xmm2,%xmm2
1825bc3d5698SJohn Baldwin	vpxor	%xmm1,%xmm7,%xmm7
1826bc3d5698SJohn Baldwin.byte	143,232,120,194,210,7
1827bc3d5698SJohn Baldwin.byte	143,232,120,194,255,16
1828bc3d5698SJohn Baldwin	vmovdqa	%xmm2,-16(%ebx)
1829bc3d5698SJohn Baldwin	vpaddd	%xmm7,%xmm5,%xmm5
1830bc3d5698SJohn Baldwin	vpxor	%xmm5,%xmm3,%xmm3
1831bc3d5698SJohn Baldwin	vmovdqa	-128(%ebx),%xmm0
1832bc3d5698SJohn Baldwin.byte	143,232,120,194,219,12
1833bc3d5698SJohn Baldwin	vpaddd	%xmm3,%xmm1,%xmm1
1834bc3d5698SJohn Baldwin	vmovdqa	64(%ebx),%xmm6
1835bc3d5698SJohn Baldwin	vpxor	%xmm1,%xmm7,%xmm7
1836bc3d5698SJohn Baldwin.byte	143,232,120,194,255,8
1837bc3d5698SJohn Baldwin	vmovdqa	%xmm1,-80(%ebx)
1838bc3d5698SJohn Baldwin	vpaddd	%xmm7,%xmm5,%xmm5
1839bc3d5698SJohn Baldwin	vmovdqa	%xmm7,96(%ebx)
1840bc3d5698SJohn Baldwin	vpxor	%xmm5,%xmm3,%xmm3
1841bc3d5698SJohn Baldwin.byte	143,232,120,194,219,7
1842bc3d5698SJohn Baldwin	decl	%edx
1843bc3d5698SJohn Baldwin	jnz	.L016loop
1844bc3d5698SJohn Baldwin	vmovdqa	%xmm3,-64(%ebx)
1845bc3d5698SJohn Baldwin	vmovdqa	%xmm4,(%ebx)
1846bc3d5698SJohn Baldwin	vmovdqa	%xmm5,16(%ebx)
1847bc3d5698SJohn Baldwin	vmovdqa	%xmm6,64(%ebx)
1848bc3d5698SJohn Baldwin	vmovdqa	%xmm7,96(%ebx)
1849bc3d5698SJohn Baldwin	vmovdqa	-112(%ebx),%xmm1
1850bc3d5698SJohn Baldwin	vmovdqa	-96(%ebx),%xmm2
1851bc3d5698SJohn Baldwin	vmovdqa	-80(%ebx),%xmm3
1852bc3d5698SJohn Baldwin	vpaddd	-128(%ebp),%xmm0,%xmm0
1853bc3d5698SJohn Baldwin	vpaddd	-112(%ebp),%xmm1,%xmm1
1854bc3d5698SJohn Baldwin	vpaddd	-96(%ebp),%xmm2,%xmm2
1855bc3d5698SJohn Baldwin	vpaddd	-80(%ebp),%xmm3,%xmm3
1856bc3d5698SJohn Baldwin	vpunpckldq	%xmm1,%xmm0,%xmm6
1857bc3d5698SJohn Baldwin	vpunpckldq	%xmm3,%xmm2,%xmm7
1858bc3d5698SJohn Baldwin	vpunpckhdq	%xmm1,%xmm0,%xmm0
1859bc3d5698SJohn Baldwin	vpunpckhdq	%xmm3,%xmm2,%xmm2
1860bc3d5698SJohn Baldwin	vpunpcklqdq	%xmm7,%xmm6,%xmm1
1861bc3d5698SJohn Baldwin	vpunpckhqdq	%xmm7,%xmm6,%xmm6
1862bc3d5698SJohn Baldwin	vpunpcklqdq	%xmm2,%xmm0,%xmm7
1863bc3d5698SJohn Baldwin	vpunpckhqdq	%xmm2,%xmm0,%xmm3
1864bc3d5698SJohn Baldwin	vpxor	-128(%esi),%xmm1,%xmm4
1865bc3d5698SJohn Baldwin	vpxor	-64(%esi),%xmm6,%xmm5
1866bc3d5698SJohn Baldwin	vpxor	(%esi),%xmm7,%xmm6
1867bc3d5698SJohn Baldwin	vpxor	64(%esi),%xmm3,%xmm7
1868bc3d5698SJohn Baldwin	leal	16(%esi),%esi
1869bc3d5698SJohn Baldwin	vmovdqa	-64(%ebx),%xmm0
1870bc3d5698SJohn Baldwin	vmovdqa	-48(%ebx),%xmm1
1871bc3d5698SJohn Baldwin	vmovdqa	-32(%ebx),%xmm2
1872bc3d5698SJohn Baldwin	vmovdqa	-16(%ebx),%xmm3
1873bc3d5698SJohn Baldwin	vmovdqu	%xmm4,-128(%edi)
1874bc3d5698SJohn Baldwin	vmovdqu	%xmm5,-64(%edi)
1875bc3d5698SJohn Baldwin	vmovdqu	%xmm6,(%edi)
1876bc3d5698SJohn Baldwin	vmovdqu	%xmm7,64(%edi)
1877bc3d5698SJohn Baldwin	leal	16(%edi),%edi
1878bc3d5698SJohn Baldwin	vpaddd	-64(%ebp),%xmm0,%xmm0
1879bc3d5698SJohn Baldwin	vpaddd	-48(%ebp),%xmm1,%xmm1
1880bc3d5698SJohn Baldwin	vpaddd	-32(%ebp),%xmm2,%xmm2
1881bc3d5698SJohn Baldwin	vpaddd	-16(%ebp),%xmm3,%xmm3
1882bc3d5698SJohn Baldwin	vpunpckldq	%xmm1,%xmm0,%xmm6
1883bc3d5698SJohn Baldwin	vpunpckldq	%xmm3,%xmm2,%xmm7
1884bc3d5698SJohn Baldwin	vpunpckhdq	%xmm1,%xmm0,%xmm0
1885bc3d5698SJohn Baldwin	vpunpckhdq	%xmm3,%xmm2,%xmm2
1886bc3d5698SJohn Baldwin	vpunpcklqdq	%xmm7,%xmm6,%xmm1
1887bc3d5698SJohn Baldwin	vpunpckhqdq	%xmm7,%xmm6,%xmm6
1888bc3d5698SJohn Baldwin	vpunpcklqdq	%xmm2,%xmm0,%xmm7
1889bc3d5698SJohn Baldwin	vpunpckhqdq	%xmm2,%xmm0,%xmm3
1890bc3d5698SJohn Baldwin	vpxor	-128(%esi),%xmm1,%xmm4
1891bc3d5698SJohn Baldwin	vpxor	-64(%esi),%xmm6,%xmm5
1892bc3d5698SJohn Baldwin	vpxor	(%esi),%xmm7,%xmm6
1893bc3d5698SJohn Baldwin	vpxor	64(%esi),%xmm3,%xmm7
1894bc3d5698SJohn Baldwin	leal	16(%esi),%esi
1895bc3d5698SJohn Baldwin	vmovdqa	(%ebx),%xmm0
1896bc3d5698SJohn Baldwin	vmovdqa	16(%ebx),%xmm1
1897bc3d5698SJohn Baldwin	vmovdqa	32(%ebx),%xmm2
1898bc3d5698SJohn Baldwin	vmovdqa	48(%ebx),%xmm3
1899bc3d5698SJohn Baldwin	vmovdqu	%xmm4,-128(%edi)
1900bc3d5698SJohn Baldwin	vmovdqu	%xmm5,-64(%edi)
1901bc3d5698SJohn Baldwin	vmovdqu	%xmm6,(%edi)
1902bc3d5698SJohn Baldwin	vmovdqu	%xmm7,64(%edi)
1903bc3d5698SJohn Baldwin	leal	16(%edi),%edi
1904bc3d5698SJohn Baldwin	vpaddd	(%ebp),%xmm0,%xmm0
1905bc3d5698SJohn Baldwin	vpaddd	16(%ebp),%xmm1,%xmm1
1906bc3d5698SJohn Baldwin	vpaddd	32(%ebp),%xmm2,%xmm2
1907bc3d5698SJohn Baldwin	vpaddd	48(%ebp),%xmm3,%xmm3
1908bc3d5698SJohn Baldwin	vpunpckldq	%xmm1,%xmm0,%xmm6
1909bc3d5698SJohn Baldwin	vpunpckldq	%xmm3,%xmm2,%xmm7
1910bc3d5698SJohn Baldwin	vpunpckhdq	%xmm1,%xmm0,%xmm0
1911bc3d5698SJohn Baldwin	vpunpckhdq	%xmm3,%xmm2,%xmm2
1912bc3d5698SJohn Baldwin	vpunpcklqdq	%xmm7,%xmm6,%xmm1
1913bc3d5698SJohn Baldwin	vpunpckhqdq	%xmm7,%xmm6,%xmm6
1914bc3d5698SJohn Baldwin	vpunpcklqdq	%xmm2,%xmm0,%xmm7
1915bc3d5698SJohn Baldwin	vpunpckhqdq	%xmm2,%xmm0,%xmm3
1916bc3d5698SJohn Baldwin	vpxor	-128(%esi),%xmm1,%xmm4
1917bc3d5698SJohn Baldwin	vpxor	-64(%esi),%xmm6,%xmm5
1918bc3d5698SJohn Baldwin	vpxor	(%esi),%xmm7,%xmm6
1919bc3d5698SJohn Baldwin	vpxor	64(%esi),%xmm3,%xmm7
1920bc3d5698SJohn Baldwin	leal	16(%esi),%esi
1921bc3d5698SJohn Baldwin	vmovdqa	64(%ebx),%xmm0
1922bc3d5698SJohn Baldwin	vmovdqa	80(%ebx),%xmm1
1923bc3d5698SJohn Baldwin	vmovdqa	96(%ebx),%xmm2
1924bc3d5698SJohn Baldwin	vmovdqa	112(%ebx),%xmm3
1925bc3d5698SJohn Baldwin	vmovdqu	%xmm4,-128(%edi)
1926bc3d5698SJohn Baldwin	vmovdqu	%xmm5,-64(%edi)
1927bc3d5698SJohn Baldwin	vmovdqu	%xmm6,(%edi)
1928bc3d5698SJohn Baldwin	vmovdqu	%xmm7,64(%edi)
1929bc3d5698SJohn Baldwin	leal	16(%edi),%edi
1930bc3d5698SJohn Baldwin	vpaddd	64(%ebp),%xmm0,%xmm0
1931bc3d5698SJohn Baldwin	vpaddd	80(%ebp),%xmm1,%xmm1
1932bc3d5698SJohn Baldwin	vpaddd	96(%ebp),%xmm2,%xmm2
1933bc3d5698SJohn Baldwin	vpaddd	112(%ebp),%xmm3,%xmm3
1934bc3d5698SJohn Baldwin	vpunpckldq	%xmm1,%xmm0,%xmm6
1935bc3d5698SJohn Baldwin	vpunpckldq	%xmm3,%xmm2,%xmm7
1936bc3d5698SJohn Baldwin	vpunpckhdq	%xmm1,%xmm0,%xmm0
1937bc3d5698SJohn Baldwin	vpunpckhdq	%xmm3,%xmm2,%xmm2
1938bc3d5698SJohn Baldwin	vpunpcklqdq	%xmm7,%xmm6,%xmm1
1939bc3d5698SJohn Baldwin	vpunpckhqdq	%xmm7,%xmm6,%xmm6
1940bc3d5698SJohn Baldwin	vpunpcklqdq	%xmm2,%xmm0,%xmm7
1941bc3d5698SJohn Baldwin	vpunpckhqdq	%xmm2,%xmm0,%xmm3
1942bc3d5698SJohn Baldwin	vpxor	-128(%esi),%xmm1,%xmm4
1943bc3d5698SJohn Baldwin	vpxor	-64(%esi),%xmm6,%xmm5
1944bc3d5698SJohn Baldwin	vpxor	(%esi),%xmm7,%xmm6
1945bc3d5698SJohn Baldwin	vpxor	64(%esi),%xmm3,%xmm7
1946bc3d5698SJohn Baldwin	leal	208(%esi),%esi
1947bc3d5698SJohn Baldwin	vmovdqu	%xmm4,-128(%edi)
1948bc3d5698SJohn Baldwin	vmovdqu	%xmm5,-64(%edi)
1949bc3d5698SJohn Baldwin	vmovdqu	%xmm6,(%edi)
1950bc3d5698SJohn Baldwin	vmovdqu	%xmm7,64(%edi)
1951bc3d5698SJohn Baldwin	leal	208(%edi),%edi
1952bc3d5698SJohn Baldwin	subl	$256,%ecx
1953bc3d5698SJohn Baldwin	jnc	.L015outer_loop
1954bc3d5698SJohn Baldwin	addl	$256,%ecx
1955bc3d5698SJohn Baldwin	jz	.L017done
1956bc3d5698SJohn Baldwin	movl	520(%esp),%ebx
1957bc3d5698SJohn Baldwin	leal	-128(%esi),%esi
1958bc3d5698SJohn Baldwin	movl	516(%esp),%edx
1959bc3d5698SJohn Baldwin	leal	-128(%edi),%edi
1960bc3d5698SJohn Baldwin	vmovd	64(%ebp),%xmm2
1961bc3d5698SJohn Baldwin	vmovdqu	(%ebx),%xmm3
1962bc3d5698SJohn Baldwin	vpaddd	96(%eax),%xmm2,%xmm2
1963bc3d5698SJohn Baldwin	vpand	112(%eax),%xmm3,%xmm3
1964bc3d5698SJohn Baldwin	vpor	%xmm2,%xmm3,%xmm3
1965bc3d5698SJohn Baldwin.L0141x:
1966bc3d5698SJohn Baldwin	vmovdqa	32(%eax),%xmm0
1967bc3d5698SJohn Baldwin	vmovdqu	(%edx),%xmm1
1968bc3d5698SJohn Baldwin	vmovdqu	16(%edx),%xmm2
1969bc3d5698SJohn Baldwin	vmovdqa	(%eax),%xmm6
1970bc3d5698SJohn Baldwin	vmovdqa	16(%eax),%xmm7
1971bc3d5698SJohn Baldwin	movl	%ebp,48(%esp)
1972bc3d5698SJohn Baldwin	vmovdqa	%xmm0,(%esp)
1973bc3d5698SJohn Baldwin	vmovdqa	%xmm1,16(%esp)
1974bc3d5698SJohn Baldwin	vmovdqa	%xmm2,32(%esp)
1975bc3d5698SJohn Baldwin	vmovdqa	%xmm3,48(%esp)
1976bc3d5698SJohn Baldwin	movl	$10,%edx
1977bc3d5698SJohn Baldwin	jmp	.L018loop1x
1978bc3d5698SJohn Baldwin.align	16
1979bc3d5698SJohn Baldwin.L019outer1x:
1980bc3d5698SJohn Baldwin	vmovdqa	80(%eax),%xmm3
1981bc3d5698SJohn Baldwin	vmovdqa	(%esp),%xmm0
1982bc3d5698SJohn Baldwin	vmovdqa	16(%esp),%xmm1
1983bc3d5698SJohn Baldwin	vmovdqa	32(%esp),%xmm2
1984bc3d5698SJohn Baldwin	vpaddd	48(%esp),%xmm3,%xmm3
1985bc3d5698SJohn Baldwin	movl	$10,%edx
1986bc3d5698SJohn Baldwin	vmovdqa	%xmm3,48(%esp)
1987bc3d5698SJohn Baldwin	jmp	.L018loop1x
1988bc3d5698SJohn Baldwin.align	16
1989bc3d5698SJohn Baldwin.L018loop1x:
1990bc3d5698SJohn Baldwin	vpaddd	%xmm1,%xmm0,%xmm0
1991bc3d5698SJohn Baldwin	vpxor	%xmm0,%xmm3,%xmm3
1992bc3d5698SJohn Baldwin.byte	143,232,120,194,219,16
1993bc3d5698SJohn Baldwin	vpaddd	%xmm3,%xmm2,%xmm2
1994bc3d5698SJohn Baldwin	vpxor	%xmm2,%xmm1,%xmm1
1995bc3d5698SJohn Baldwin.byte	143,232,120,194,201,12
1996bc3d5698SJohn Baldwin	vpaddd	%xmm1,%xmm0,%xmm0
1997bc3d5698SJohn Baldwin	vpxor	%xmm0,%xmm3,%xmm3
1998bc3d5698SJohn Baldwin.byte	143,232,120,194,219,8
1999bc3d5698SJohn Baldwin	vpaddd	%xmm3,%xmm2,%xmm2
2000bc3d5698SJohn Baldwin	vpxor	%xmm2,%xmm1,%xmm1
2001bc3d5698SJohn Baldwin.byte	143,232,120,194,201,7
2002bc3d5698SJohn Baldwin	vpshufd	$78,%xmm2,%xmm2
2003bc3d5698SJohn Baldwin	vpshufd	$57,%xmm1,%xmm1
2004bc3d5698SJohn Baldwin	vpshufd	$147,%xmm3,%xmm3
2005bc3d5698SJohn Baldwin	vpaddd	%xmm1,%xmm0,%xmm0
2006bc3d5698SJohn Baldwin	vpxor	%xmm0,%xmm3,%xmm3
2007bc3d5698SJohn Baldwin.byte	143,232,120,194,219,16
2008bc3d5698SJohn Baldwin	vpaddd	%xmm3,%xmm2,%xmm2
2009bc3d5698SJohn Baldwin	vpxor	%xmm2,%xmm1,%xmm1
2010bc3d5698SJohn Baldwin.byte	143,232,120,194,201,12
2011bc3d5698SJohn Baldwin	vpaddd	%xmm1,%xmm0,%xmm0
2012bc3d5698SJohn Baldwin	vpxor	%xmm0,%xmm3,%xmm3
2013bc3d5698SJohn Baldwin.byte	143,232,120,194,219,8
2014bc3d5698SJohn Baldwin	vpaddd	%xmm3,%xmm2,%xmm2
2015bc3d5698SJohn Baldwin	vpxor	%xmm2,%xmm1,%xmm1
2016bc3d5698SJohn Baldwin.byte	143,232,120,194,201,7
2017bc3d5698SJohn Baldwin	vpshufd	$78,%xmm2,%xmm2
2018bc3d5698SJohn Baldwin	vpshufd	$147,%xmm1,%xmm1
2019bc3d5698SJohn Baldwin	vpshufd	$57,%xmm3,%xmm3
2020bc3d5698SJohn Baldwin	decl	%edx
2021bc3d5698SJohn Baldwin	jnz	.L018loop1x
2022bc3d5698SJohn Baldwin	vpaddd	(%esp),%xmm0,%xmm0
2023bc3d5698SJohn Baldwin	vpaddd	16(%esp),%xmm1,%xmm1
2024bc3d5698SJohn Baldwin	vpaddd	32(%esp),%xmm2,%xmm2
2025bc3d5698SJohn Baldwin	vpaddd	48(%esp),%xmm3,%xmm3
2026bc3d5698SJohn Baldwin	cmpl	$64,%ecx
2027bc3d5698SJohn Baldwin	jb	.L020tail
2028bc3d5698SJohn Baldwin	vpxor	(%esi),%xmm0,%xmm0
2029bc3d5698SJohn Baldwin	vpxor	16(%esi),%xmm1,%xmm1
2030bc3d5698SJohn Baldwin	vpxor	32(%esi),%xmm2,%xmm2
2031bc3d5698SJohn Baldwin	vpxor	48(%esi),%xmm3,%xmm3
2032bc3d5698SJohn Baldwin	leal	64(%esi),%esi
2033bc3d5698SJohn Baldwin	vmovdqu	%xmm0,(%edi)
2034bc3d5698SJohn Baldwin	vmovdqu	%xmm1,16(%edi)
2035bc3d5698SJohn Baldwin	vmovdqu	%xmm2,32(%edi)
2036bc3d5698SJohn Baldwin	vmovdqu	%xmm3,48(%edi)
2037bc3d5698SJohn Baldwin	leal	64(%edi),%edi
2038bc3d5698SJohn Baldwin	subl	$64,%ecx
2039bc3d5698SJohn Baldwin	jnz	.L019outer1x
2040bc3d5698SJohn Baldwin	jmp	.L017done
2041bc3d5698SJohn Baldwin.L020tail:
2042bc3d5698SJohn Baldwin	vmovdqa	%xmm0,(%esp)
2043bc3d5698SJohn Baldwin	vmovdqa	%xmm1,16(%esp)
2044bc3d5698SJohn Baldwin	vmovdqa	%xmm2,32(%esp)
2045bc3d5698SJohn Baldwin	vmovdqa	%xmm3,48(%esp)
2046bc3d5698SJohn Baldwin	xorl	%eax,%eax
2047bc3d5698SJohn Baldwin	xorl	%edx,%edx
2048bc3d5698SJohn Baldwin	xorl	%ebp,%ebp
2049bc3d5698SJohn Baldwin.L021tail_loop:
2050bc3d5698SJohn Baldwin	movb	(%esp,%ebp,1),%al
2051bc3d5698SJohn Baldwin	movb	(%esi,%ebp,1),%dl
2052bc3d5698SJohn Baldwin	leal	1(%ebp),%ebp
2053bc3d5698SJohn Baldwin	xorb	%dl,%al
2054bc3d5698SJohn Baldwin	movb	%al,-1(%edi,%ebp,1)
2055bc3d5698SJohn Baldwin	decl	%ecx
2056bc3d5698SJohn Baldwin	jnz	.L021tail_loop
2057bc3d5698SJohn Baldwin.L017done:
2058bc3d5698SJohn Baldwin	vzeroupper
2059bc3d5698SJohn Baldwin	movl	512(%esp),%esp
2060bc3d5698SJohn Baldwin	popl	%edi
2061bc3d5698SJohn Baldwin	popl	%esi
2062bc3d5698SJohn Baldwin	popl	%ebx
2063bc3d5698SJohn Baldwin	popl	%ebp
2064bc3d5698SJohn Baldwin	ret
2065bc3d5698SJohn Baldwin.size	ChaCha20_xop,.-.L_ChaCha20_xop_begin
2066bc3d5698SJohn Baldwin.comm	OPENSSL_ia32cap_P,16,4
2067*c0855eaaSJohn Baldwin
2068*c0855eaaSJohn Baldwin	.section ".note.gnu.property", "a"
2069*c0855eaaSJohn Baldwin	.p2align 2
2070*c0855eaaSJohn Baldwin	.long 1f - 0f
2071*c0855eaaSJohn Baldwin	.long 4f - 1f
2072*c0855eaaSJohn Baldwin	.long 5
2073*c0855eaaSJohn Baldwin0:
2074*c0855eaaSJohn Baldwin	.asciz "GNU"
2075*c0855eaaSJohn Baldwin1:
2076*c0855eaaSJohn Baldwin	.p2align 2
2077*c0855eaaSJohn Baldwin	.long 0xc0000002
2078*c0855eaaSJohn Baldwin	.long 3f - 2f
2079*c0855eaaSJohn Baldwin2:
2080*c0855eaaSJohn Baldwin	.long 3
2081*c0855eaaSJohn Baldwin3:
2082*c0855eaaSJohn Baldwin	.p2align 2
2083*c0855eaaSJohn Baldwin4:
2084bc3d5698SJohn Baldwin#endif
2085