xref: /freebsd/sys/crypto/openssl/i386/chacha-x86.S (revision 734e82fe33aa764367791a7d603b383996c6b40b)
1/* Do not modify. This file is auto-generated from chacha-x86.pl. */
2#ifdef PIC
3.text
4.globl	ChaCha20_ctr32
5.type	ChaCha20_ctr32,@function
6.align	16
7ChaCha20_ctr32:
8.L_ChaCha20_ctr32_begin:
9	pushl	%ebp
10	pushl	%ebx
11	pushl	%esi
12	pushl	%edi
13	xorl	%eax,%eax
14	cmpl	28(%esp),%eax
15	je	.L000no_data
16	call	.Lpic_point
17.Lpic_point:
18	popl	%eax
19	leal	OPENSSL_ia32cap_P-.Lpic_point(%eax),%ebp
20	testl	$16777216,(%ebp)
21	jz	.L001x86
22	testl	$512,4(%ebp)
23	jz	.L001x86
24	jmp	.Lssse3_shortcut
25.L001x86:
26	movl	32(%esp),%esi
27	movl	36(%esp),%edi
28	subl	$132,%esp
29	movl	(%esi),%eax
30	movl	4(%esi),%ebx
31	movl	8(%esi),%ecx
32	movl	12(%esi),%edx
33	movl	%eax,80(%esp)
34	movl	%ebx,84(%esp)
35	movl	%ecx,88(%esp)
36	movl	%edx,92(%esp)
37	movl	16(%esi),%eax
38	movl	20(%esi),%ebx
39	movl	24(%esi),%ecx
40	movl	28(%esi),%edx
41	movl	%eax,96(%esp)
42	movl	%ebx,100(%esp)
43	movl	%ecx,104(%esp)
44	movl	%edx,108(%esp)
45	movl	(%edi),%eax
46	movl	4(%edi),%ebx
47	movl	8(%edi),%ecx
48	movl	12(%edi),%edx
49	subl	$1,%eax
50	movl	%eax,112(%esp)
51	movl	%ebx,116(%esp)
52	movl	%ecx,120(%esp)
53	movl	%edx,124(%esp)
54	jmp	.L002entry
55.align	16
56.L003outer_loop:
57	movl	%ebx,156(%esp)
58	movl	%eax,152(%esp)
59	movl	%ecx,160(%esp)
60.L002entry:
61	movl	$1634760805,%eax
62	movl	$857760878,4(%esp)
63	movl	$2036477234,8(%esp)
64	movl	$1797285236,12(%esp)
65	movl	84(%esp),%ebx
66	movl	88(%esp),%ebp
67	movl	104(%esp),%ecx
68	movl	108(%esp),%esi
69	movl	116(%esp),%edx
70	movl	120(%esp),%edi
71	movl	%ebx,20(%esp)
72	movl	%ebp,24(%esp)
73	movl	%ecx,40(%esp)
74	movl	%esi,44(%esp)
75	movl	%edx,52(%esp)
76	movl	%edi,56(%esp)
77	movl	92(%esp),%ebx
78	movl	124(%esp),%edi
79	movl	112(%esp),%edx
80	movl	80(%esp),%ebp
81	movl	96(%esp),%ecx
82	movl	100(%esp),%esi
83	addl	$1,%edx
84	movl	%ebx,28(%esp)
85	movl	%edi,60(%esp)
86	movl	%edx,112(%esp)
87	movl	$10,%ebx
88	jmp	.L004loop
89.align	16
90.L004loop:
91	addl	%ebp,%eax
92	movl	%ebx,128(%esp)
93	movl	%ebp,%ebx
94	xorl	%eax,%edx
95	roll	$16,%edx
96	addl	%edx,%ecx
97	xorl	%ecx,%ebx
98	movl	52(%esp),%edi
99	roll	$12,%ebx
100	movl	20(%esp),%ebp
101	addl	%ebx,%eax
102	xorl	%eax,%edx
103	movl	%eax,(%esp)
104	roll	$8,%edx
105	movl	4(%esp),%eax
106	addl	%edx,%ecx
107	movl	%edx,48(%esp)
108	xorl	%ecx,%ebx
109	addl	%ebp,%eax
110	roll	$7,%ebx
111	xorl	%eax,%edi
112	movl	%ecx,32(%esp)
113	roll	$16,%edi
114	movl	%ebx,16(%esp)
115	addl	%edi,%esi
116	movl	40(%esp),%ecx
117	xorl	%esi,%ebp
118	movl	56(%esp),%edx
119	roll	$12,%ebp
120	movl	24(%esp),%ebx
121	addl	%ebp,%eax
122	xorl	%eax,%edi
123	movl	%eax,4(%esp)
124	roll	$8,%edi
125	movl	8(%esp),%eax
126	addl	%edi,%esi
127	movl	%edi,52(%esp)
128	xorl	%esi,%ebp
129	addl	%ebx,%eax
130	roll	$7,%ebp
131	xorl	%eax,%edx
132	movl	%esi,36(%esp)
133	roll	$16,%edx
134	movl	%ebp,20(%esp)
135	addl	%edx,%ecx
136	movl	44(%esp),%esi
137	xorl	%ecx,%ebx
138	movl	60(%esp),%edi
139	roll	$12,%ebx
140	movl	28(%esp),%ebp
141	addl	%ebx,%eax
142	xorl	%eax,%edx
143	movl	%eax,8(%esp)
144	roll	$8,%edx
145	movl	12(%esp),%eax
146	addl	%edx,%ecx
147	movl	%edx,56(%esp)
148	xorl	%ecx,%ebx
149	addl	%ebp,%eax
150	roll	$7,%ebx
151	xorl	%eax,%edi
152	roll	$16,%edi
153	movl	%ebx,24(%esp)
154	addl	%edi,%esi
155	xorl	%esi,%ebp
156	roll	$12,%ebp
157	movl	20(%esp),%ebx
158	addl	%ebp,%eax
159	xorl	%eax,%edi
160	movl	%eax,12(%esp)
161	roll	$8,%edi
162	movl	(%esp),%eax
163	addl	%edi,%esi
164	movl	%edi,%edx
165	xorl	%esi,%ebp
166	addl	%ebx,%eax
167	roll	$7,%ebp
168	xorl	%eax,%edx
169	roll	$16,%edx
170	movl	%ebp,28(%esp)
171	addl	%edx,%ecx
172	xorl	%ecx,%ebx
173	movl	48(%esp),%edi
174	roll	$12,%ebx
175	movl	24(%esp),%ebp
176	addl	%ebx,%eax
177	xorl	%eax,%edx
178	movl	%eax,(%esp)
179	roll	$8,%edx
180	movl	4(%esp),%eax
181	addl	%edx,%ecx
182	movl	%edx,60(%esp)
183	xorl	%ecx,%ebx
184	addl	%ebp,%eax
185	roll	$7,%ebx
186	xorl	%eax,%edi
187	movl	%ecx,40(%esp)
188	roll	$16,%edi
189	movl	%ebx,20(%esp)
190	addl	%edi,%esi
191	movl	32(%esp),%ecx
192	xorl	%esi,%ebp
193	movl	52(%esp),%edx
194	roll	$12,%ebp
195	movl	28(%esp),%ebx
196	addl	%ebp,%eax
197	xorl	%eax,%edi
198	movl	%eax,4(%esp)
199	roll	$8,%edi
200	movl	8(%esp),%eax
201	addl	%edi,%esi
202	movl	%edi,48(%esp)
203	xorl	%esi,%ebp
204	addl	%ebx,%eax
205	roll	$7,%ebp
206	xorl	%eax,%edx
207	movl	%esi,44(%esp)
208	roll	$16,%edx
209	movl	%ebp,24(%esp)
210	addl	%edx,%ecx
211	movl	36(%esp),%esi
212	xorl	%ecx,%ebx
213	movl	56(%esp),%edi
214	roll	$12,%ebx
215	movl	16(%esp),%ebp
216	addl	%ebx,%eax
217	xorl	%eax,%edx
218	movl	%eax,8(%esp)
219	roll	$8,%edx
220	movl	12(%esp),%eax
221	addl	%edx,%ecx
222	movl	%edx,52(%esp)
223	xorl	%ecx,%ebx
224	addl	%ebp,%eax
225	roll	$7,%ebx
226	xorl	%eax,%edi
227	roll	$16,%edi
228	movl	%ebx,28(%esp)
229	addl	%edi,%esi
230	xorl	%esi,%ebp
231	movl	48(%esp),%edx
232	roll	$12,%ebp
233	movl	128(%esp),%ebx
234	addl	%ebp,%eax
235	xorl	%eax,%edi
236	movl	%eax,12(%esp)
237	roll	$8,%edi
238	movl	(%esp),%eax
239	addl	%edi,%esi
240	movl	%edi,56(%esp)
241	xorl	%esi,%ebp
242	roll	$7,%ebp
243	decl	%ebx
244	jnz	.L004loop
245	movl	160(%esp),%ebx
246	addl	$1634760805,%eax
247	addl	80(%esp),%ebp
248	addl	96(%esp),%ecx
249	addl	100(%esp),%esi
250	cmpl	$64,%ebx
251	jb	.L005tail
252	movl	156(%esp),%ebx
253	addl	112(%esp),%edx
254	addl	120(%esp),%edi
255	xorl	(%ebx),%eax
256	xorl	16(%ebx),%ebp
257	movl	%eax,(%esp)
258	movl	152(%esp),%eax
259	xorl	32(%ebx),%ecx
260	xorl	36(%ebx),%esi
261	xorl	48(%ebx),%edx
262	xorl	56(%ebx),%edi
263	movl	%ebp,16(%eax)
264	movl	%ecx,32(%eax)
265	movl	%esi,36(%eax)
266	movl	%edx,48(%eax)
267	movl	%edi,56(%eax)
268	movl	4(%esp),%ebp
269	movl	8(%esp),%ecx
270	movl	12(%esp),%esi
271	movl	20(%esp),%edx
272	movl	24(%esp),%edi
273	addl	$857760878,%ebp
274	addl	$2036477234,%ecx
275	addl	$1797285236,%esi
276	addl	84(%esp),%edx
277	addl	88(%esp),%edi
278	xorl	4(%ebx),%ebp
279	xorl	8(%ebx),%ecx
280	xorl	12(%ebx),%esi
281	xorl	20(%ebx),%edx
282	xorl	24(%ebx),%edi
283	movl	%ebp,4(%eax)
284	movl	%ecx,8(%eax)
285	movl	%esi,12(%eax)
286	movl	%edx,20(%eax)
287	movl	%edi,24(%eax)
288	movl	28(%esp),%ebp
289	movl	40(%esp),%ecx
290	movl	44(%esp),%esi
291	movl	52(%esp),%edx
292	movl	60(%esp),%edi
293	addl	92(%esp),%ebp
294	addl	104(%esp),%ecx
295	addl	108(%esp),%esi
296	addl	116(%esp),%edx
297	addl	124(%esp),%edi
298	xorl	28(%ebx),%ebp
299	xorl	40(%ebx),%ecx
300	xorl	44(%ebx),%esi
301	xorl	52(%ebx),%edx
302	xorl	60(%ebx),%edi
303	leal	64(%ebx),%ebx
304	movl	%ebp,28(%eax)
305	movl	(%esp),%ebp
306	movl	%ecx,40(%eax)
307	movl	160(%esp),%ecx
308	movl	%esi,44(%eax)
309	movl	%edx,52(%eax)
310	movl	%edi,60(%eax)
311	movl	%ebp,(%eax)
312	leal	64(%eax),%eax
313	subl	$64,%ecx
314	jnz	.L003outer_loop
315	jmp	.L006done
316.L005tail:
317	addl	112(%esp),%edx
318	addl	120(%esp),%edi
319	movl	%eax,(%esp)
320	movl	%ebp,16(%esp)
321	movl	%ecx,32(%esp)
322	movl	%esi,36(%esp)
323	movl	%edx,48(%esp)
324	movl	%edi,56(%esp)
325	movl	4(%esp),%ebp
326	movl	8(%esp),%ecx
327	movl	12(%esp),%esi
328	movl	20(%esp),%edx
329	movl	24(%esp),%edi
330	addl	$857760878,%ebp
331	addl	$2036477234,%ecx
332	addl	$1797285236,%esi
333	addl	84(%esp),%edx
334	addl	88(%esp),%edi
335	movl	%ebp,4(%esp)
336	movl	%ecx,8(%esp)
337	movl	%esi,12(%esp)
338	movl	%edx,20(%esp)
339	movl	%edi,24(%esp)
340	movl	28(%esp),%ebp
341	movl	40(%esp),%ecx
342	movl	44(%esp),%esi
343	movl	52(%esp),%edx
344	movl	60(%esp),%edi
345	addl	92(%esp),%ebp
346	addl	104(%esp),%ecx
347	addl	108(%esp),%esi
348	addl	116(%esp),%edx
349	addl	124(%esp),%edi
350	movl	%ebp,28(%esp)
351	movl	156(%esp),%ebp
352	movl	%ecx,40(%esp)
353	movl	152(%esp),%ecx
354	movl	%esi,44(%esp)
355	xorl	%esi,%esi
356	movl	%edx,52(%esp)
357	movl	%edi,60(%esp)
358	xorl	%eax,%eax
359	xorl	%edx,%edx
360.L007tail_loop:
361	movb	(%esi,%ebp,1),%al
362	movb	(%esp,%esi,1),%dl
363	leal	1(%esi),%esi
364	xorb	%dl,%al
365	movb	%al,-1(%ecx,%esi,1)
366	decl	%ebx
367	jnz	.L007tail_loop
368.L006done:
369	addl	$132,%esp
370.L000no_data:
371	popl	%edi
372	popl	%esi
373	popl	%ebx
374	popl	%ebp
375	ret
376.size	ChaCha20_ctr32,.-.L_ChaCha20_ctr32_begin
377.globl	ChaCha20_ssse3
378.type	ChaCha20_ssse3,@function
379.align	16
380ChaCha20_ssse3:
381.L_ChaCha20_ssse3_begin:
382	pushl	%ebp
383	pushl	%ebx
384	pushl	%esi
385	pushl	%edi
386.Lssse3_shortcut:
387	testl	$2048,4(%ebp)
388	jnz	.Lxop_shortcut
389	movl	20(%esp),%edi
390	movl	24(%esp),%esi
391	movl	28(%esp),%ecx
392	movl	32(%esp),%edx
393	movl	36(%esp),%ebx
394	movl	%esp,%ebp
395	subl	$524,%esp
396	andl	$-64,%esp
397	movl	%ebp,512(%esp)
398	leal	.Lssse3_data-.Lpic_point(%eax),%eax
399	movdqu	(%ebx),%xmm3
400.L0081x:
401	movdqa	32(%eax),%xmm0
402	movdqu	(%edx),%xmm1
403	movdqu	16(%edx),%xmm2
404	movdqa	(%eax),%xmm6
405	movdqa	16(%eax),%xmm7
406	movl	%ebp,48(%esp)
407	movdqa	%xmm0,(%esp)
408	movdqa	%xmm1,16(%esp)
409	movdqa	%xmm2,32(%esp)
410	movdqa	%xmm3,48(%esp)
411	movl	$10,%edx
412	jmp	.L009loop1x
413.align	16
414.L010outer1x:
415	movdqa	80(%eax),%xmm3
416	movdqa	(%esp),%xmm0
417	movdqa	16(%esp),%xmm1
418	movdqa	32(%esp),%xmm2
419	paddd	48(%esp),%xmm3
420	movl	$10,%edx
421	movdqa	%xmm3,48(%esp)
422	jmp	.L009loop1x
423.align	16
424.L009loop1x:
425	paddd	%xmm1,%xmm0
426	pxor	%xmm0,%xmm3
427.byte	102,15,56,0,222
428	paddd	%xmm3,%xmm2
429	pxor	%xmm2,%xmm1
430	movdqa	%xmm1,%xmm4
431	psrld	$20,%xmm1
432	pslld	$12,%xmm4
433	por	%xmm4,%xmm1
434	paddd	%xmm1,%xmm0
435	pxor	%xmm0,%xmm3
436.byte	102,15,56,0,223
437	paddd	%xmm3,%xmm2
438	pxor	%xmm2,%xmm1
439	movdqa	%xmm1,%xmm4
440	psrld	$25,%xmm1
441	pslld	$7,%xmm4
442	por	%xmm4,%xmm1
443	pshufd	$78,%xmm2,%xmm2
444	pshufd	$57,%xmm1,%xmm1
445	pshufd	$147,%xmm3,%xmm3
446	nop
447	paddd	%xmm1,%xmm0
448	pxor	%xmm0,%xmm3
449.byte	102,15,56,0,222
450	paddd	%xmm3,%xmm2
451	pxor	%xmm2,%xmm1
452	movdqa	%xmm1,%xmm4
453	psrld	$20,%xmm1
454	pslld	$12,%xmm4
455	por	%xmm4,%xmm1
456	paddd	%xmm1,%xmm0
457	pxor	%xmm0,%xmm3
458.byte	102,15,56,0,223
459	paddd	%xmm3,%xmm2
460	pxor	%xmm2,%xmm1
461	movdqa	%xmm1,%xmm4
462	psrld	$25,%xmm1
463	pslld	$7,%xmm4
464	por	%xmm4,%xmm1
465	pshufd	$78,%xmm2,%xmm2
466	pshufd	$147,%xmm1,%xmm1
467	pshufd	$57,%xmm3,%xmm3
468	decl	%edx
469	jnz	.L009loop1x
470	paddd	(%esp),%xmm0
471	paddd	16(%esp),%xmm1
472	paddd	32(%esp),%xmm2
473	paddd	48(%esp),%xmm3
474	cmpl	$64,%ecx
475	jb	.L011tail
476	movdqu	(%esi),%xmm4
477	movdqu	16(%esi),%xmm5
478	pxor	%xmm4,%xmm0
479	movdqu	32(%esi),%xmm4
480	pxor	%xmm5,%xmm1
481	movdqu	48(%esi),%xmm5
482	pxor	%xmm4,%xmm2
483	pxor	%xmm5,%xmm3
484	leal	64(%esi),%esi
485	movdqu	%xmm0,(%edi)
486	movdqu	%xmm1,16(%edi)
487	movdqu	%xmm2,32(%edi)
488	movdqu	%xmm3,48(%edi)
489	leal	64(%edi),%edi
490	subl	$64,%ecx
491	jnz	.L010outer1x
492	jmp	.L012done
493.L011tail:
494	movdqa	%xmm0,(%esp)
495	movdqa	%xmm1,16(%esp)
496	movdqa	%xmm2,32(%esp)
497	movdqa	%xmm3,48(%esp)
498	xorl	%eax,%eax
499	xorl	%edx,%edx
500	xorl	%ebp,%ebp
501.L013tail_loop:
502	movb	(%esp,%ebp,1),%al
503	movb	(%esi,%ebp,1),%dl
504	leal	1(%ebp),%ebp
505	xorb	%dl,%al
506	movb	%al,-1(%edi,%ebp,1)
507	decl	%ecx
508	jnz	.L013tail_loop
509.L012done:
510	movl	512(%esp),%esp
511	popl	%edi
512	popl	%esi
513	popl	%ebx
514	popl	%ebp
515	ret
516.size	ChaCha20_ssse3,.-.L_ChaCha20_ssse3_begin
517.align	64
518.Lssse3_data:
519.byte	2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13
520.byte	3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14
521.long	1634760805,857760878,2036477234,1797285236
522.long	0,1,2,3
523.long	4,4,4,4
524.long	1,0,0,0
525.long	4,0,0,0
526.long	0,-1,-1,-1
527.align	64
528.byte	67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54
529.byte	44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
530.byte	60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
531.byte	114,103,62,0
532.globl	ChaCha20_xop
533.type	ChaCha20_xop,@function
534.align	16
535ChaCha20_xop:
536.L_ChaCha20_xop_begin:
537	pushl	%ebp
538	pushl	%ebx
539	pushl	%esi
540	pushl	%edi
541.Lxop_shortcut:
542	movl	20(%esp),%edi
543	movl	24(%esp),%esi
544	movl	28(%esp),%ecx
545	movl	32(%esp),%edx
546	movl	36(%esp),%ebx
547	vzeroupper
548	movl	%esp,%ebp
549	subl	$524,%esp
550	andl	$-64,%esp
551	movl	%ebp,512(%esp)
552	leal	.Lssse3_data-.Lpic_point(%eax),%eax
553	vmovdqu	(%ebx),%xmm3
554	cmpl	$256,%ecx
555	jb	.L0141x
556	movl	%edx,516(%esp)
557	movl	%ebx,520(%esp)
558	subl	$256,%ecx
559	leal	384(%esp),%ebp
560	vmovdqu	(%edx),%xmm7
561	vpshufd	$0,%xmm3,%xmm0
562	vpshufd	$85,%xmm3,%xmm1
563	vpshufd	$170,%xmm3,%xmm2
564	vpshufd	$255,%xmm3,%xmm3
565	vpaddd	48(%eax),%xmm0,%xmm0
566	vpshufd	$0,%xmm7,%xmm4
567	vpshufd	$85,%xmm7,%xmm5
568	vpsubd	64(%eax),%xmm0,%xmm0
569	vpshufd	$170,%xmm7,%xmm6
570	vpshufd	$255,%xmm7,%xmm7
571	vmovdqa	%xmm0,64(%ebp)
572	vmovdqa	%xmm1,80(%ebp)
573	vmovdqa	%xmm2,96(%ebp)
574	vmovdqa	%xmm3,112(%ebp)
575	vmovdqu	16(%edx),%xmm3
576	vmovdqa	%xmm4,-64(%ebp)
577	vmovdqa	%xmm5,-48(%ebp)
578	vmovdqa	%xmm6,-32(%ebp)
579	vmovdqa	%xmm7,-16(%ebp)
580	vmovdqa	32(%eax),%xmm7
581	leal	128(%esp),%ebx
582	vpshufd	$0,%xmm3,%xmm0
583	vpshufd	$85,%xmm3,%xmm1
584	vpshufd	$170,%xmm3,%xmm2
585	vpshufd	$255,%xmm3,%xmm3
586	vpshufd	$0,%xmm7,%xmm4
587	vpshufd	$85,%xmm7,%xmm5
588	vpshufd	$170,%xmm7,%xmm6
589	vpshufd	$255,%xmm7,%xmm7
590	vmovdqa	%xmm0,(%ebp)
591	vmovdqa	%xmm1,16(%ebp)
592	vmovdqa	%xmm2,32(%ebp)
593	vmovdqa	%xmm3,48(%ebp)
594	vmovdqa	%xmm4,-128(%ebp)
595	vmovdqa	%xmm5,-112(%ebp)
596	vmovdqa	%xmm6,-96(%ebp)
597	vmovdqa	%xmm7,-80(%ebp)
598	leal	128(%esi),%esi
599	leal	128(%edi),%edi
600	jmp	.L015outer_loop
601.align	32
602.L015outer_loop:
603	vmovdqa	-112(%ebp),%xmm1
604	vmovdqa	-96(%ebp),%xmm2
605	vmovdqa	-80(%ebp),%xmm3
606	vmovdqa	-48(%ebp),%xmm5
607	vmovdqa	-32(%ebp),%xmm6
608	vmovdqa	-16(%ebp),%xmm7
609	vmovdqa	%xmm1,-112(%ebx)
610	vmovdqa	%xmm2,-96(%ebx)
611	vmovdqa	%xmm3,-80(%ebx)
612	vmovdqa	%xmm5,-48(%ebx)
613	vmovdqa	%xmm6,-32(%ebx)
614	vmovdqa	%xmm7,-16(%ebx)
615	vmovdqa	32(%ebp),%xmm2
616	vmovdqa	48(%ebp),%xmm3
617	vmovdqa	64(%ebp),%xmm4
618	vmovdqa	80(%ebp),%xmm5
619	vmovdqa	96(%ebp),%xmm6
620	vmovdqa	112(%ebp),%xmm7
621	vpaddd	64(%eax),%xmm4,%xmm4
622	vmovdqa	%xmm2,32(%ebx)
623	vmovdqa	%xmm3,48(%ebx)
624	vmovdqa	%xmm4,64(%ebx)
625	vmovdqa	%xmm5,80(%ebx)
626	vmovdqa	%xmm6,96(%ebx)
627	vmovdqa	%xmm7,112(%ebx)
628	vmovdqa	%xmm4,64(%ebp)
629	vmovdqa	-128(%ebp),%xmm0
630	vmovdqa	%xmm4,%xmm6
631	vmovdqa	-64(%ebp),%xmm3
632	vmovdqa	(%ebp),%xmm4
633	vmovdqa	16(%ebp),%xmm5
634	movl	$10,%edx
635	nop
636.align	32
637.L016loop:
638	vpaddd	%xmm3,%xmm0,%xmm0
639	vpxor	%xmm0,%xmm6,%xmm6
640.byte	143,232,120,194,246,16
641	vpaddd	%xmm6,%xmm4,%xmm4
642	vpxor	%xmm4,%xmm3,%xmm2
643	vmovdqa	-112(%ebx),%xmm1
644.byte	143,232,120,194,210,12
645	vmovdqa	-48(%ebx),%xmm3
646	vpaddd	%xmm2,%xmm0,%xmm0
647	vmovdqa	80(%ebx),%xmm7
648	vpxor	%xmm0,%xmm6,%xmm6
649	vpaddd	%xmm3,%xmm1,%xmm1
650.byte	143,232,120,194,246,8
651	vmovdqa	%xmm0,-128(%ebx)
652	vpaddd	%xmm6,%xmm4,%xmm4
653	vmovdqa	%xmm6,64(%ebx)
654	vpxor	%xmm4,%xmm2,%xmm2
655	vpxor	%xmm1,%xmm7,%xmm7
656.byte	143,232,120,194,210,7
657	vmovdqa	%xmm4,(%ebx)
658.byte	143,232,120,194,255,16
659	vmovdqa	%xmm2,-64(%ebx)
660	vpaddd	%xmm7,%xmm5,%xmm5
661	vmovdqa	32(%ebx),%xmm4
662	vpxor	%xmm5,%xmm3,%xmm3
663	vmovdqa	-96(%ebx),%xmm0
664.byte	143,232,120,194,219,12
665	vmovdqa	-32(%ebx),%xmm2
666	vpaddd	%xmm3,%xmm1,%xmm1
667	vmovdqa	96(%ebx),%xmm6
668	vpxor	%xmm1,%xmm7,%xmm7
669	vpaddd	%xmm2,%xmm0,%xmm0
670.byte	143,232,120,194,255,8
671	vmovdqa	%xmm1,-112(%ebx)
672	vpaddd	%xmm7,%xmm5,%xmm5
673	vmovdqa	%xmm7,80(%ebx)
674	vpxor	%xmm5,%xmm3,%xmm3
675	vpxor	%xmm0,%xmm6,%xmm6
676.byte	143,232,120,194,219,7
677	vmovdqa	%xmm5,16(%ebx)
678.byte	143,232,120,194,246,16
679	vmovdqa	%xmm3,-48(%ebx)
680	vpaddd	%xmm6,%xmm4,%xmm4
681	vmovdqa	48(%ebx),%xmm5
682	vpxor	%xmm4,%xmm2,%xmm2
683	vmovdqa	-80(%ebx),%xmm1
684.byte	143,232,120,194,210,12
685	vmovdqa	-16(%ebx),%xmm3
686	vpaddd	%xmm2,%xmm0,%xmm0
687	vmovdqa	112(%ebx),%xmm7
688	vpxor	%xmm0,%xmm6,%xmm6
689	vpaddd	%xmm3,%xmm1,%xmm1
690.byte	143,232,120,194,246,8
691	vmovdqa	%xmm0,-96(%ebx)
692	vpaddd	%xmm6,%xmm4,%xmm4
693	vmovdqa	%xmm6,96(%ebx)
694	vpxor	%xmm4,%xmm2,%xmm2
695	vpxor	%xmm1,%xmm7,%xmm7
696.byte	143,232,120,194,210,7
697.byte	143,232,120,194,255,16
698	vmovdqa	%xmm2,-32(%ebx)
699	vpaddd	%xmm7,%xmm5,%xmm5
700	vpxor	%xmm5,%xmm3,%xmm3
701	vmovdqa	-128(%ebx),%xmm0
702.byte	143,232,120,194,219,12
703	vmovdqa	-48(%ebx),%xmm2
704	vpaddd	%xmm3,%xmm1,%xmm1
705	vpxor	%xmm1,%xmm7,%xmm7
706	vpaddd	%xmm2,%xmm0,%xmm0
707.byte	143,232,120,194,255,8
708	vmovdqa	%xmm1,-80(%ebx)
709	vpaddd	%xmm7,%xmm5,%xmm5
710	vpxor	%xmm5,%xmm3,%xmm3
711	vpxor	%xmm0,%xmm7,%xmm6
712.byte	143,232,120,194,219,7
713.byte	143,232,120,194,246,16
714	vmovdqa	%xmm3,-16(%ebx)
715	vpaddd	%xmm6,%xmm4,%xmm4
716	vpxor	%xmm4,%xmm2,%xmm2
717	vmovdqa	-112(%ebx),%xmm1
718.byte	143,232,120,194,210,12
719	vmovdqa	-32(%ebx),%xmm3
720	vpaddd	%xmm2,%xmm0,%xmm0
721	vmovdqa	64(%ebx),%xmm7
722	vpxor	%xmm0,%xmm6,%xmm6
723	vpaddd	%xmm3,%xmm1,%xmm1
724.byte	143,232,120,194,246,8
725	vmovdqa	%xmm0,-128(%ebx)
726	vpaddd	%xmm6,%xmm4,%xmm4
727	vmovdqa	%xmm6,112(%ebx)
728	vpxor	%xmm4,%xmm2,%xmm2
729	vpxor	%xmm1,%xmm7,%xmm7
730.byte	143,232,120,194,210,7
731	vmovdqa	%xmm4,32(%ebx)
732.byte	143,232,120,194,255,16
733	vmovdqa	%xmm2,-48(%ebx)
734	vpaddd	%xmm7,%xmm5,%xmm5
735	vmovdqa	(%ebx),%xmm4
736	vpxor	%xmm5,%xmm3,%xmm3
737	vmovdqa	-96(%ebx),%xmm0
738.byte	143,232,120,194,219,12
739	vmovdqa	-16(%ebx),%xmm2
740	vpaddd	%xmm3,%xmm1,%xmm1
741	vmovdqa	80(%ebx),%xmm6
742	vpxor	%xmm1,%xmm7,%xmm7
743	vpaddd	%xmm2,%xmm0,%xmm0
744.byte	143,232,120,194,255,8
745	vmovdqa	%xmm1,-112(%ebx)
746	vpaddd	%xmm7,%xmm5,%xmm5
747	vmovdqa	%xmm7,64(%ebx)
748	vpxor	%xmm5,%xmm3,%xmm3
749	vpxor	%xmm0,%xmm6,%xmm6
750.byte	143,232,120,194,219,7
751	vmovdqa	%xmm5,48(%ebx)
752.byte	143,232,120,194,246,16
753	vmovdqa	%xmm3,-32(%ebx)
754	vpaddd	%xmm6,%xmm4,%xmm4
755	vmovdqa	16(%ebx),%xmm5
756	vpxor	%xmm4,%xmm2,%xmm2
757	vmovdqa	-80(%ebx),%xmm1
758.byte	143,232,120,194,210,12
759	vmovdqa	-64(%ebx),%xmm3
760	vpaddd	%xmm2,%xmm0,%xmm0
761	vmovdqa	96(%ebx),%xmm7
762	vpxor	%xmm0,%xmm6,%xmm6
763	vpaddd	%xmm3,%xmm1,%xmm1
764.byte	143,232,120,194,246,8
765	vmovdqa	%xmm0,-96(%ebx)
766	vpaddd	%xmm6,%xmm4,%xmm4
767	vmovdqa	%xmm6,80(%ebx)
768	vpxor	%xmm4,%xmm2,%xmm2
769	vpxor	%xmm1,%xmm7,%xmm7
770.byte	143,232,120,194,210,7
771.byte	143,232,120,194,255,16
772	vmovdqa	%xmm2,-16(%ebx)
773	vpaddd	%xmm7,%xmm5,%xmm5
774	vpxor	%xmm5,%xmm3,%xmm3
775	vmovdqa	-128(%ebx),%xmm0
776.byte	143,232,120,194,219,12
777	vpaddd	%xmm3,%xmm1,%xmm1
778	vmovdqa	64(%ebx),%xmm6
779	vpxor	%xmm1,%xmm7,%xmm7
780.byte	143,232,120,194,255,8
781	vmovdqa	%xmm1,-80(%ebx)
782	vpaddd	%xmm7,%xmm5,%xmm5
783	vmovdqa	%xmm7,96(%ebx)
784	vpxor	%xmm5,%xmm3,%xmm3
785.byte	143,232,120,194,219,7
786	decl	%edx
787	jnz	.L016loop
788	vmovdqa	%xmm3,-64(%ebx)
789	vmovdqa	%xmm4,(%ebx)
790	vmovdqa	%xmm5,16(%ebx)
791	vmovdqa	%xmm6,64(%ebx)
792	vmovdqa	%xmm7,96(%ebx)
793	vmovdqa	-112(%ebx),%xmm1
794	vmovdqa	-96(%ebx),%xmm2
795	vmovdqa	-80(%ebx),%xmm3
796	vpaddd	-128(%ebp),%xmm0,%xmm0
797	vpaddd	-112(%ebp),%xmm1,%xmm1
798	vpaddd	-96(%ebp),%xmm2,%xmm2
799	vpaddd	-80(%ebp),%xmm3,%xmm3
800	vpunpckldq	%xmm1,%xmm0,%xmm6
801	vpunpckldq	%xmm3,%xmm2,%xmm7
802	vpunpckhdq	%xmm1,%xmm0,%xmm0
803	vpunpckhdq	%xmm3,%xmm2,%xmm2
804	vpunpcklqdq	%xmm7,%xmm6,%xmm1
805	vpunpckhqdq	%xmm7,%xmm6,%xmm6
806	vpunpcklqdq	%xmm2,%xmm0,%xmm7
807	vpunpckhqdq	%xmm2,%xmm0,%xmm3
808	vpxor	-128(%esi),%xmm1,%xmm4
809	vpxor	-64(%esi),%xmm6,%xmm5
810	vpxor	(%esi),%xmm7,%xmm6
811	vpxor	64(%esi),%xmm3,%xmm7
812	leal	16(%esi),%esi
813	vmovdqa	-64(%ebx),%xmm0
814	vmovdqa	-48(%ebx),%xmm1
815	vmovdqa	-32(%ebx),%xmm2
816	vmovdqa	-16(%ebx),%xmm3
817	vmovdqu	%xmm4,-128(%edi)
818	vmovdqu	%xmm5,-64(%edi)
819	vmovdqu	%xmm6,(%edi)
820	vmovdqu	%xmm7,64(%edi)
821	leal	16(%edi),%edi
822	vpaddd	-64(%ebp),%xmm0,%xmm0
823	vpaddd	-48(%ebp),%xmm1,%xmm1
824	vpaddd	-32(%ebp),%xmm2,%xmm2
825	vpaddd	-16(%ebp),%xmm3,%xmm3
826	vpunpckldq	%xmm1,%xmm0,%xmm6
827	vpunpckldq	%xmm3,%xmm2,%xmm7
828	vpunpckhdq	%xmm1,%xmm0,%xmm0
829	vpunpckhdq	%xmm3,%xmm2,%xmm2
830	vpunpcklqdq	%xmm7,%xmm6,%xmm1
831	vpunpckhqdq	%xmm7,%xmm6,%xmm6
832	vpunpcklqdq	%xmm2,%xmm0,%xmm7
833	vpunpckhqdq	%xmm2,%xmm0,%xmm3
834	vpxor	-128(%esi),%xmm1,%xmm4
835	vpxor	-64(%esi),%xmm6,%xmm5
836	vpxor	(%esi),%xmm7,%xmm6
837	vpxor	64(%esi),%xmm3,%xmm7
838	leal	16(%esi),%esi
839	vmovdqa	(%ebx),%xmm0
840	vmovdqa	16(%ebx),%xmm1
841	vmovdqa	32(%ebx),%xmm2
842	vmovdqa	48(%ebx),%xmm3
843	vmovdqu	%xmm4,-128(%edi)
844	vmovdqu	%xmm5,-64(%edi)
845	vmovdqu	%xmm6,(%edi)
846	vmovdqu	%xmm7,64(%edi)
847	leal	16(%edi),%edi
848	vpaddd	(%ebp),%xmm0,%xmm0
849	vpaddd	16(%ebp),%xmm1,%xmm1
850	vpaddd	32(%ebp),%xmm2,%xmm2
851	vpaddd	48(%ebp),%xmm3,%xmm3
852	vpunpckldq	%xmm1,%xmm0,%xmm6
853	vpunpckldq	%xmm3,%xmm2,%xmm7
854	vpunpckhdq	%xmm1,%xmm0,%xmm0
855	vpunpckhdq	%xmm3,%xmm2,%xmm2
856	vpunpcklqdq	%xmm7,%xmm6,%xmm1
857	vpunpckhqdq	%xmm7,%xmm6,%xmm6
858	vpunpcklqdq	%xmm2,%xmm0,%xmm7
859	vpunpckhqdq	%xmm2,%xmm0,%xmm3
860	vpxor	-128(%esi),%xmm1,%xmm4
861	vpxor	-64(%esi),%xmm6,%xmm5
862	vpxor	(%esi),%xmm7,%xmm6
863	vpxor	64(%esi),%xmm3,%xmm7
864	leal	16(%esi),%esi
865	vmovdqa	64(%ebx),%xmm0
866	vmovdqa	80(%ebx),%xmm1
867	vmovdqa	96(%ebx),%xmm2
868	vmovdqa	112(%ebx),%xmm3
869	vmovdqu	%xmm4,-128(%edi)
870	vmovdqu	%xmm5,-64(%edi)
871	vmovdqu	%xmm6,(%edi)
872	vmovdqu	%xmm7,64(%edi)
873	leal	16(%edi),%edi
874	vpaddd	64(%ebp),%xmm0,%xmm0
875	vpaddd	80(%ebp),%xmm1,%xmm1
876	vpaddd	96(%ebp),%xmm2,%xmm2
877	vpaddd	112(%ebp),%xmm3,%xmm3
878	vpunpckldq	%xmm1,%xmm0,%xmm6
879	vpunpckldq	%xmm3,%xmm2,%xmm7
880	vpunpckhdq	%xmm1,%xmm0,%xmm0
881	vpunpckhdq	%xmm3,%xmm2,%xmm2
882	vpunpcklqdq	%xmm7,%xmm6,%xmm1
883	vpunpckhqdq	%xmm7,%xmm6,%xmm6
884	vpunpcklqdq	%xmm2,%xmm0,%xmm7
885	vpunpckhqdq	%xmm2,%xmm0,%xmm3
886	vpxor	-128(%esi),%xmm1,%xmm4
887	vpxor	-64(%esi),%xmm6,%xmm5
888	vpxor	(%esi),%xmm7,%xmm6
889	vpxor	64(%esi),%xmm3,%xmm7
890	leal	208(%esi),%esi
891	vmovdqu	%xmm4,-128(%edi)
892	vmovdqu	%xmm5,-64(%edi)
893	vmovdqu	%xmm6,(%edi)
894	vmovdqu	%xmm7,64(%edi)
895	leal	208(%edi),%edi
896	subl	$256,%ecx
897	jnc	.L015outer_loop
898	addl	$256,%ecx
899	jz	.L017done
900	movl	520(%esp),%ebx
901	leal	-128(%esi),%esi
902	movl	516(%esp),%edx
903	leal	-128(%edi),%edi
904	vmovd	64(%ebp),%xmm2
905	vmovdqu	(%ebx),%xmm3
906	vpaddd	96(%eax),%xmm2,%xmm2
907	vpand	112(%eax),%xmm3,%xmm3
908	vpor	%xmm2,%xmm3,%xmm3
909.L0141x:
910	vmovdqa	32(%eax),%xmm0
911	vmovdqu	(%edx),%xmm1
912	vmovdqu	16(%edx),%xmm2
913	vmovdqa	(%eax),%xmm6
914	vmovdqa	16(%eax),%xmm7
915	movl	%ebp,48(%esp)
916	vmovdqa	%xmm0,(%esp)
917	vmovdqa	%xmm1,16(%esp)
918	vmovdqa	%xmm2,32(%esp)
919	vmovdqa	%xmm3,48(%esp)
920	movl	$10,%edx
921	jmp	.L018loop1x
922.align	16
923.L019outer1x:
924	vmovdqa	80(%eax),%xmm3
925	vmovdqa	(%esp),%xmm0
926	vmovdqa	16(%esp),%xmm1
927	vmovdqa	32(%esp),%xmm2
928	vpaddd	48(%esp),%xmm3,%xmm3
929	movl	$10,%edx
930	vmovdqa	%xmm3,48(%esp)
931	jmp	.L018loop1x
932.align	16
933.L018loop1x:
934	vpaddd	%xmm1,%xmm0,%xmm0
935	vpxor	%xmm0,%xmm3,%xmm3
936.byte	143,232,120,194,219,16
937	vpaddd	%xmm3,%xmm2,%xmm2
938	vpxor	%xmm2,%xmm1,%xmm1
939.byte	143,232,120,194,201,12
940	vpaddd	%xmm1,%xmm0,%xmm0
941	vpxor	%xmm0,%xmm3,%xmm3
942.byte	143,232,120,194,219,8
943	vpaddd	%xmm3,%xmm2,%xmm2
944	vpxor	%xmm2,%xmm1,%xmm1
945.byte	143,232,120,194,201,7
946	vpshufd	$78,%xmm2,%xmm2
947	vpshufd	$57,%xmm1,%xmm1
948	vpshufd	$147,%xmm3,%xmm3
949	vpaddd	%xmm1,%xmm0,%xmm0
950	vpxor	%xmm0,%xmm3,%xmm3
951.byte	143,232,120,194,219,16
952	vpaddd	%xmm3,%xmm2,%xmm2
953	vpxor	%xmm2,%xmm1,%xmm1
954.byte	143,232,120,194,201,12
955	vpaddd	%xmm1,%xmm0,%xmm0
956	vpxor	%xmm0,%xmm3,%xmm3
957.byte	143,232,120,194,219,8
958	vpaddd	%xmm3,%xmm2,%xmm2
959	vpxor	%xmm2,%xmm1,%xmm1
960.byte	143,232,120,194,201,7
961	vpshufd	$78,%xmm2,%xmm2
962	vpshufd	$147,%xmm1,%xmm1
963	vpshufd	$57,%xmm3,%xmm3
964	decl	%edx
965	jnz	.L018loop1x
966	vpaddd	(%esp),%xmm0,%xmm0
967	vpaddd	16(%esp),%xmm1,%xmm1
968	vpaddd	32(%esp),%xmm2,%xmm2
969	vpaddd	48(%esp),%xmm3,%xmm3
970	cmpl	$64,%ecx
971	jb	.L020tail
972	vpxor	(%esi),%xmm0,%xmm0
973	vpxor	16(%esi),%xmm1,%xmm1
974	vpxor	32(%esi),%xmm2,%xmm2
975	vpxor	48(%esi),%xmm3,%xmm3
976	leal	64(%esi),%esi
977	vmovdqu	%xmm0,(%edi)
978	vmovdqu	%xmm1,16(%edi)
979	vmovdqu	%xmm2,32(%edi)
980	vmovdqu	%xmm3,48(%edi)
981	leal	64(%edi),%edi
982	subl	$64,%ecx
983	jnz	.L019outer1x
984	jmp	.L017done
985.L020tail:
986	vmovdqa	%xmm0,(%esp)
987	vmovdqa	%xmm1,16(%esp)
988	vmovdqa	%xmm2,32(%esp)
989	vmovdqa	%xmm3,48(%esp)
990	xorl	%eax,%eax
991	xorl	%edx,%edx
992	xorl	%ebp,%ebp
993.L021tail_loop:
994	movb	(%esp,%ebp,1),%al
995	movb	(%esi,%ebp,1),%dl
996	leal	1(%ebp),%ebp
997	xorb	%dl,%al
998	movb	%al,-1(%edi,%ebp,1)
999	decl	%ecx
1000	jnz	.L021tail_loop
1001.L017done:
1002	vzeroupper
1003	movl	512(%esp),%esp
1004	popl	%edi
1005	popl	%esi
1006	popl	%ebx
1007	popl	%ebp
1008	ret
1009.size	ChaCha20_xop,.-.L_ChaCha20_xop_begin
1010.comm	OPENSSL_ia32cap_P,16,4
1011#else
1012.text
1013.globl	ChaCha20_ctr32
1014.type	ChaCha20_ctr32,@function
1015.align	16
1016ChaCha20_ctr32:
1017.L_ChaCha20_ctr32_begin:
1018	pushl	%ebp
1019	pushl	%ebx
1020	pushl	%esi
1021	pushl	%edi
1022	xorl	%eax,%eax
1023	cmpl	28(%esp),%eax
1024	je	.L000no_data
1025	call	.Lpic_point
1026.Lpic_point:
1027	popl	%eax
1028	leal	OPENSSL_ia32cap_P,%ebp
1029	testl	$16777216,(%ebp)
1030	jz	.L001x86
1031	testl	$512,4(%ebp)
1032	jz	.L001x86
1033	jmp	.Lssse3_shortcut
1034.L001x86:
1035	movl	32(%esp),%esi
1036	movl	36(%esp),%edi
1037	subl	$132,%esp
1038	movl	(%esi),%eax
1039	movl	4(%esi),%ebx
1040	movl	8(%esi),%ecx
1041	movl	12(%esi),%edx
1042	movl	%eax,80(%esp)
1043	movl	%ebx,84(%esp)
1044	movl	%ecx,88(%esp)
1045	movl	%edx,92(%esp)
1046	movl	16(%esi),%eax
1047	movl	20(%esi),%ebx
1048	movl	24(%esi),%ecx
1049	movl	28(%esi),%edx
1050	movl	%eax,96(%esp)
1051	movl	%ebx,100(%esp)
1052	movl	%ecx,104(%esp)
1053	movl	%edx,108(%esp)
1054	movl	(%edi),%eax
1055	movl	4(%edi),%ebx
1056	movl	8(%edi),%ecx
1057	movl	12(%edi),%edx
1058	subl	$1,%eax
1059	movl	%eax,112(%esp)
1060	movl	%ebx,116(%esp)
1061	movl	%ecx,120(%esp)
1062	movl	%edx,124(%esp)
1063	jmp	.L002entry
1064.align	16
1065.L003outer_loop:
1066	movl	%ebx,156(%esp)
1067	movl	%eax,152(%esp)
1068	movl	%ecx,160(%esp)
1069.L002entry:
1070	movl	$1634760805,%eax
1071	movl	$857760878,4(%esp)
1072	movl	$2036477234,8(%esp)
1073	movl	$1797285236,12(%esp)
1074	movl	84(%esp),%ebx
1075	movl	88(%esp),%ebp
1076	movl	104(%esp),%ecx
1077	movl	108(%esp),%esi
1078	movl	116(%esp),%edx
1079	movl	120(%esp),%edi
1080	movl	%ebx,20(%esp)
1081	movl	%ebp,24(%esp)
1082	movl	%ecx,40(%esp)
1083	movl	%esi,44(%esp)
1084	movl	%edx,52(%esp)
1085	movl	%edi,56(%esp)
1086	movl	92(%esp),%ebx
1087	movl	124(%esp),%edi
1088	movl	112(%esp),%edx
1089	movl	80(%esp),%ebp
1090	movl	96(%esp),%ecx
1091	movl	100(%esp),%esi
1092	addl	$1,%edx
1093	movl	%ebx,28(%esp)
1094	movl	%edi,60(%esp)
1095	movl	%edx,112(%esp)
1096	movl	$10,%ebx
1097	jmp	.L004loop
1098.align	16
1099.L004loop:
1100	addl	%ebp,%eax
1101	movl	%ebx,128(%esp)
1102	movl	%ebp,%ebx
1103	xorl	%eax,%edx
1104	roll	$16,%edx
1105	addl	%edx,%ecx
1106	xorl	%ecx,%ebx
1107	movl	52(%esp),%edi
1108	roll	$12,%ebx
1109	movl	20(%esp),%ebp
1110	addl	%ebx,%eax
1111	xorl	%eax,%edx
1112	movl	%eax,(%esp)
1113	roll	$8,%edx
1114	movl	4(%esp),%eax
1115	addl	%edx,%ecx
1116	movl	%edx,48(%esp)
1117	xorl	%ecx,%ebx
1118	addl	%ebp,%eax
1119	roll	$7,%ebx
1120	xorl	%eax,%edi
1121	movl	%ecx,32(%esp)
1122	roll	$16,%edi
1123	movl	%ebx,16(%esp)
1124	addl	%edi,%esi
1125	movl	40(%esp),%ecx
1126	xorl	%esi,%ebp
1127	movl	56(%esp),%edx
1128	roll	$12,%ebp
1129	movl	24(%esp),%ebx
1130	addl	%ebp,%eax
1131	xorl	%eax,%edi
1132	movl	%eax,4(%esp)
1133	roll	$8,%edi
1134	movl	8(%esp),%eax
1135	addl	%edi,%esi
1136	movl	%edi,52(%esp)
1137	xorl	%esi,%ebp
1138	addl	%ebx,%eax
1139	roll	$7,%ebp
1140	xorl	%eax,%edx
1141	movl	%esi,36(%esp)
1142	roll	$16,%edx
1143	movl	%ebp,20(%esp)
1144	addl	%edx,%ecx
1145	movl	44(%esp),%esi
1146	xorl	%ecx,%ebx
1147	movl	60(%esp),%edi
1148	roll	$12,%ebx
1149	movl	28(%esp),%ebp
1150	addl	%ebx,%eax
1151	xorl	%eax,%edx
1152	movl	%eax,8(%esp)
1153	roll	$8,%edx
1154	movl	12(%esp),%eax
1155	addl	%edx,%ecx
1156	movl	%edx,56(%esp)
1157	xorl	%ecx,%ebx
1158	addl	%ebp,%eax
1159	roll	$7,%ebx
1160	xorl	%eax,%edi
1161	roll	$16,%edi
1162	movl	%ebx,24(%esp)
1163	addl	%edi,%esi
1164	xorl	%esi,%ebp
1165	roll	$12,%ebp
1166	movl	20(%esp),%ebx
1167	addl	%ebp,%eax
1168	xorl	%eax,%edi
1169	movl	%eax,12(%esp)
1170	roll	$8,%edi
1171	movl	(%esp),%eax
1172	addl	%edi,%esi
1173	movl	%edi,%edx
1174	xorl	%esi,%ebp
1175	addl	%ebx,%eax
1176	roll	$7,%ebp
1177	xorl	%eax,%edx
1178	roll	$16,%edx
1179	movl	%ebp,28(%esp)
1180	addl	%edx,%ecx
1181	xorl	%ecx,%ebx
1182	movl	48(%esp),%edi
1183	roll	$12,%ebx
1184	movl	24(%esp),%ebp
1185	addl	%ebx,%eax
1186	xorl	%eax,%edx
1187	movl	%eax,(%esp)
1188	roll	$8,%edx
1189	movl	4(%esp),%eax
1190	addl	%edx,%ecx
1191	movl	%edx,60(%esp)
1192	xorl	%ecx,%ebx
1193	addl	%ebp,%eax
1194	roll	$7,%ebx
1195	xorl	%eax,%edi
1196	movl	%ecx,40(%esp)
1197	roll	$16,%edi
1198	movl	%ebx,20(%esp)
1199	addl	%edi,%esi
1200	movl	32(%esp),%ecx
1201	xorl	%esi,%ebp
1202	movl	52(%esp),%edx
1203	roll	$12,%ebp
1204	movl	28(%esp),%ebx
1205	addl	%ebp,%eax
1206	xorl	%eax,%edi
1207	movl	%eax,4(%esp)
1208	roll	$8,%edi
1209	movl	8(%esp),%eax
1210	addl	%edi,%esi
1211	movl	%edi,48(%esp)
1212	xorl	%esi,%ebp
1213	addl	%ebx,%eax
1214	roll	$7,%ebp
1215	xorl	%eax,%edx
1216	movl	%esi,44(%esp)
1217	roll	$16,%edx
1218	movl	%ebp,24(%esp)
1219	addl	%edx,%ecx
1220	movl	36(%esp),%esi
1221	xorl	%ecx,%ebx
1222	movl	56(%esp),%edi
1223	roll	$12,%ebx
1224	movl	16(%esp),%ebp
1225	addl	%ebx,%eax
1226	xorl	%eax,%edx
1227	movl	%eax,8(%esp)
1228	roll	$8,%edx
1229	movl	12(%esp),%eax
1230	addl	%edx,%ecx
1231	movl	%edx,52(%esp)
1232	xorl	%ecx,%ebx
1233	addl	%ebp,%eax
1234	roll	$7,%ebx
1235	xorl	%eax,%edi
1236	roll	$16,%edi
1237	movl	%ebx,28(%esp)
1238	addl	%edi,%esi
1239	xorl	%esi,%ebp
1240	movl	48(%esp),%edx
1241	roll	$12,%ebp
1242	movl	128(%esp),%ebx
1243	addl	%ebp,%eax
1244	xorl	%eax,%edi
1245	movl	%eax,12(%esp)
1246	roll	$8,%edi
1247	movl	(%esp),%eax
1248	addl	%edi,%esi
1249	movl	%edi,56(%esp)
1250	xorl	%esi,%ebp
1251	roll	$7,%ebp
1252	decl	%ebx
1253	jnz	.L004loop
1254	movl	160(%esp),%ebx
1255	addl	$1634760805,%eax
1256	addl	80(%esp),%ebp
1257	addl	96(%esp),%ecx
1258	addl	100(%esp),%esi
1259	cmpl	$64,%ebx
1260	jb	.L005tail
1261	movl	156(%esp),%ebx
1262	addl	112(%esp),%edx
1263	addl	120(%esp),%edi
1264	xorl	(%ebx),%eax
1265	xorl	16(%ebx),%ebp
1266	movl	%eax,(%esp)
1267	movl	152(%esp),%eax
1268	xorl	32(%ebx),%ecx
1269	xorl	36(%ebx),%esi
1270	xorl	48(%ebx),%edx
1271	xorl	56(%ebx),%edi
1272	movl	%ebp,16(%eax)
1273	movl	%ecx,32(%eax)
1274	movl	%esi,36(%eax)
1275	movl	%edx,48(%eax)
1276	movl	%edi,56(%eax)
1277	movl	4(%esp),%ebp
1278	movl	8(%esp),%ecx
1279	movl	12(%esp),%esi
1280	movl	20(%esp),%edx
1281	movl	24(%esp),%edi
1282	addl	$857760878,%ebp
1283	addl	$2036477234,%ecx
1284	addl	$1797285236,%esi
1285	addl	84(%esp),%edx
1286	addl	88(%esp),%edi
1287	xorl	4(%ebx),%ebp
1288	xorl	8(%ebx),%ecx
1289	xorl	12(%ebx),%esi
1290	xorl	20(%ebx),%edx
1291	xorl	24(%ebx),%edi
1292	movl	%ebp,4(%eax)
1293	movl	%ecx,8(%eax)
1294	movl	%esi,12(%eax)
1295	movl	%edx,20(%eax)
1296	movl	%edi,24(%eax)
1297	movl	28(%esp),%ebp
1298	movl	40(%esp),%ecx
1299	movl	44(%esp),%esi
1300	movl	52(%esp),%edx
1301	movl	60(%esp),%edi
1302	addl	92(%esp),%ebp
1303	addl	104(%esp),%ecx
1304	addl	108(%esp),%esi
1305	addl	116(%esp),%edx
1306	addl	124(%esp),%edi
1307	xorl	28(%ebx),%ebp
1308	xorl	40(%ebx),%ecx
1309	xorl	44(%ebx),%esi
1310	xorl	52(%ebx),%edx
1311	xorl	60(%ebx),%edi
1312	leal	64(%ebx),%ebx
1313	movl	%ebp,28(%eax)
1314	movl	(%esp),%ebp
1315	movl	%ecx,40(%eax)
1316	movl	160(%esp),%ecx
1317	movl	%esi,44(%eax)
1318	movl	%edx,52(%eax)
1319	movl	%edi,60(%eax)
1320	movl	%ebp,(%eax)
1321	leal	64(%eax),%eax
1322	subl	$64,%ecx
1323	jnz	.L003outer_loop
1324	jmp	.L006done
1325.L005tail:
1326	addl	112(%esp),%edx
1327	addl	120(%esp),%edi
1328	movl	%eax,(%esp)
1329	movl	%ebp,16(%esp)
1330	movl	%ecx,32(%esp)
1331	movl	%esi,36(%esp)
1332	movl	%edx,48(%esp)
1333	movl	%edi,56(%esp)
1334	movl	4(%esp),%ebp
1335	movl	8(%esp),%ecx
1336	movl	12(%esp),%esi
1337	movl	20(%esp),%edx
1338	movl	24(%esp),%edi
1339	addl	$857760878,%ebp
1340	addl	$2036477234,%ecx
1341	addl	$1797285236,%esi
1342	addl	84(%esp),%edx
1343	addl	88(%esp),%edi
1344	movl	%ebp,4(%esp)
1345	movl	%ecx,8(%esp)
1346	movl	%esi,12(%esp)
1347	movl	%edx,20(%esp)
1348	movl	%edi,24(%esp)
1349	movl	28(%esp),%ebp
1350	movl	40(%esp),%ecx
1351	movl	44(%esp),%esi
1352	movl	52(%esp),%edx
1353	movl	60(%esp),%edi
1354	addl	92(%esp),%ebp
1355	addl	104(%esp),%ecx
1356	addl	108(%esp),%esi
1357	addl	116(%esp),%edx
1358	addl	124(%esp),%edi
1359	movl	%ebp,28(%esp)
1360	movl	156(%esp),%ebp
1361	movl	%ecx,40(%esp)
1362	movl	152(%esp),%ecx
1363	movl	%esi,44(%esp)
1364	xorl	%esi,%esi
1365	movl	%edx,52(%esp)
1366	movl	%edi,60(%esp)
1367	xorl	%eax,%eax
1368	xorl	%edx,%edx
1369.L007tail_loop:
1370	movb	(%esi,%ebp,1),%al
1371	movb	(%esp,%esi,1),%dl
1372	leal	1(%esi),%esi
1373	xorb	%dl,%al
1374	movb	%al,-1(%ecx,%esi,1)
1375	decl	%ebx
1376	jnz	.L007tail_loop
1377.L006done:
1378	addl	$132,%esp
1379.L000no_data:
1380	popl	%edi
1381	popl	%esi
1382	popl	%ebx
1383	popl	%ebp
1384	ret
1385.size	ChaCha20_ctr32,.-.L_ChaCha20_ctr32_begin
1386.globl	ChaCha20_ssse3
1387.type	ChaCha20_ssse3,@function
1388.align	16
1389ChaCha20_ssse3:
1390.L_ChaCha20_ssse3_begin:
1391	pushl	%ebp
1392	pushl	%ebx
1393	pushl	%esi
1394	pushl	%edi
1395.Lssse3_shortcut:
1396	testl	$2048,4(%ebp)
1397	jnz	.Lxop_shortcut
1398	movl	20(%esp),%edi
1399	movl	24(%esp),%esi
1400	movl	28(%esp),%ecx
1401	movl	32(%esp),%edx
1402	movl	36(%esp),%ebx
1403	movl	%esp,%ebp
1404	subl	$524,%esp
1405	andl	$-64,%esp
1406	movl	%ebp,512(%esp)
1407	leal	.Lssse3_data-.Lpic_point(%eax),%eax
1408	movdqu	(%ebx),%xmm3
1409.L0081x:
1410	movdqa	32(%eax),%xmm0
1411	movdqu	(%edx),%xmm1
1412	movdqu	16(%edx),%xmm2
1413	movdqa	(%eax),%xmm6
1414	movdqa	16(%eax),%xmm7
1415	movl	%ebp,48(%esp)
1416	movdqa	%xmm0,(%esp)
1417	movdqa	%xmm1,16(%esp)
1418	movdqa	%xmm2,32(%esp)
1419	movdqa	%xmm3,48(%esp)
1420	movl	$10,%edx
1421	jmp	.L009loop1x
1422.align	16
1423.L010outer1x:
1424	movdqa	80(%eax),%xmm3
1425	movdqa	(%esp),%xmm0
1426	movdqa	16(%esp),%xmm1
1427	movdqa	32(%esp),%xmm2
1428	paddd	48(%esp),%xmm3
1429	movl	$10,%edx
1430	movdqa	%xmm3,48(%esp)
1431	jmp	.L009loop1x
1432.align	16
1433.L009loop1x:
1434	paddd	%xmm1,%xmm0
1435	pxor	%xmm0,%xmm3
1436.byte	102,15,56,0,222
1437	paddd	%xmm3,%xmm2
1438	pxor	%xmm2,%xmm1
1439	movdqa	%xmm1,%xmm4
1440	psrld	$20,%xmm1
1441	pslld	$12,%xmm4
1442	por	%xmm4,%xmm1
1443	paddd	%xmm1,%xmm0
1444	pxor	%xmm0,%xmm3
1445.byte	102,15,56,0,223
1446	paddd	%xmm3,%xmm2
1447	pxor	%xmm2,%xmm1
1448	movdqa	%xmm1,%xmm4
1449	psrld	$25,%xmm1
1450	pslld	$7,%xmm4
1451	por	%xmm4,%xmm1
1452	pshufd	$78,%xmm2,%xmm2
1453	pshufd	$57,%xmm1,%xmm1
1454	pshufd	$147,%xmm3,%xmm3
1455	nop
1456	paddd	%xmm1,%xmm0
1457	pxor	%xmm0,%xmm3
1458.byte	102,15,56,0,222
1459	paddd	%xmm3,%xmm2
1460	pxor	%xmm2,%xmm1
1461	movdqa	%xmm1,%xmm4
1462	psrld	$20,%xmm1
1463	pslld	$12,%xmm4
1464	por	%xmm4,%xmm1
1465	paddd	%xmm1,%xmm0
1466	pxor	%xmm0,%xmm3
1467.byte	102,15,56,0,223
1468	paddd	%xmm3,%xmm2
1469	pxor	%xmm2,%xmm1
1470	movdqa	%xmm1,%xmm4
1471	psrld	$25,%xmm1
1472	pslld	$7,%xmm4
1473	por	%xmm4,%xmm1
1474	pshufd	$78,%xmm2,%xmm2
1475	pshufd	$147,%xmm1,%xmm1
1476	pshufd	$57,%xmm3,%xmm3
1477	decl	%edx
1478	jnz	.L009loop1x
1479	paddd	(%esp),%xmm0
1480	paddd	16(%esp),%xmm1
1481	paddd	32(%esp),%xmm2
1482	paddd	48(%esp),%xmm3
1483	cmpl	$64,%ecx
1484	jb	.L011tail
1485	movdqu	(%esi),%xmm4
1486	movdqu	16(%esi),%xmm5
1487	pxor	%xmm4,%xmm0
1488	movdqu	32(%esi),%xmm4
1489	pxor	%xmm5,%xmm1
1490	movdqu	48(%esi),%xmm5
1491	pxor	%xmm4,%xmm2
1492	pxor	%xmm5,%xmm3
1493	leal	64(%esi),%esi
1494	movdqu	%xmm0,(%edi)
1495	movdqu	%xmm1,16(%edi)
1496	movdqu	%xmm2,32(%edi)
1497	movdqu	%xmm3,48(%edi)
1498	leal	64(%edi),%edi
1499	subl	$64,%ecx
1500	jnz	.L010outer1x
1501	jmp	.L012done
1502.L011tail:
1503	movdqa	%xmm0,(%esp)
1504	movdqa	%xmm1,16(%esp)
1505	movdqa	%xmm2,32(%esp)
1506	movdqa	%xmm3,48(%esp)
1507	xorl	%eax,%eax
1508	xorl	%edx,%edx
1509	xorl	%ebp,%ebp
1510.L013tail_loop:
1511	movb	(%esp,%ebp,1),%al
1512	movb	(%esi,%ebp,1),%dl
1513	leal	1(%ebp),%ebp
1514	xorb	%dl,%al
1515	movb	%al,-1(%edi,%ebp,1)
1516	decl	%ecx
1517	jnz	.L013tail_loop
1518.L012done:
1519	movl	512(%esp),%esp
1520	popl	%edi
1521	popl	%esi
1522	popl	%ebx
1523	popl	%ebp
1524	ret
1525.size	ChaCha20_ssse3,.-.L_ChaCha20_ssse3_begin
1526.align	64
1527.Lssse3_data:
1528.byte	2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13
1529.byte	3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14
1530.long	1634760805,857760878,2036477234,1797285236
1531.long	0,1,2,3
1532.long	4,4,4,4
1533.long	1,0,0,0
1534.long	4,0,0,0
1535.long	0,-1,-1,-1
1536.align	64
1537.byte	67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54
1538.byte	44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
1539.byte	60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
1540.byte	114,103,62,0
1541.globl	ChaCha20_xop
1542.type	ChaCha20_xop,@function
1543.align	16
1544ChaCha20_xop:
1545.L_ChaCha20_xop_begin:
1546	pushl	%ebp
1547	pushl	%ebx
1548	pushl	%esi
1549	pushl	%edi
1550.Lxop_shortcut:
1551	movl	20(%esp),%edi
1552	movl	24(%esp),%esi
1553	movl	28(%esp),%ecx
1554	movl	32(%esp),%edx
1555	movl	36(%esp),%ebx
1556	vzeroupper
1557	movl	%esp,%ebp
1558	subl	$524,%esp
1559	andl	$-64,%esp
1560	movl	%ebp,512(%esp)
1561	leal	.Lssse3_data-.Lpic_point(%eax),%eax
1562	vmovdqu	(%ebx),%xmm3
1563	cmpl	$256,%ecx
1564	jb	.L0141x
1565	movl	%edx,516(%esp)
1566	movl	%ebx,520(%esp)
1567	subl	$256,%ecx
1568	leal	384(%esp),%ebp
1569	vmovdqu	(%edx),%xmm7
1570	vpshufd	$0,%xmm3,%xmm0
1571	vpshufd	$85,%xmm3,%xmm1
1572	vpshufd	$170,%xmm3,%xmm2
1573	vpshufd	$255,%xmm3,%xmm3
1574	vpaddd	48(%eax),%xmm0,%xmm0
1575	vpshufd	$0,%xmm7,%xmm4
1576	vpshufd	$85,%xmm7,%xmm5
1577	vpsubd	64(%eax),%xmm0,%xmm0
1578	vpshufd	$170,%xmm7,%xmm6
1579	vpshufd	$255,%xmm7,%xmm7
1580	vmovdqa	%xmm0,64(%ebp)
1581	vmovdqa	%xmm1,80(%ebp)
1582	vmovdqa	%xmm2,96(%ebp)
1583	vmovdqa	%xmm3,112(%ebp)
1584	vmovdqu	16(%edx),%xmm3
1585	vmovdqa	%xmm4,-64(%ebp)
1586	vmovdqa	%xmm5,-48(%ebp)
1587	vmovdqa	%xmm6,-32(%ebp)
1588	vmovdqa	%xmm7,-16(%ebp)
1589	vmovdqa	32(%eax),%xmm7
1590	leal	128(%esp),%ebx
1591	vpshufd	$0,%xmm3,%xmm0
1592	vpshufd	$85,%xmm3,%xmm1
1593	vpshufd	$170,%xmm3,%xmm2
1594	vpshufd	$255,%xmm3,%xmm3
1595	vpshufd	$0,%xmm7,%xmm4
1596	vpshufd	$85,%xmm7,%xmm5
1597	vpshufd	$170,%xmm7,%xmm6
1598	vpshufd	$255,%xmm7,%xmm7
1599	vmovdqa	%xmm0,(%ebp)
1600	vmovdqa	%xmm1,16(%ebp)
1601	vmovdqa	%xmm2,32(%ebp)
1602	vmovdqa	%xmm3,48(%ebp)
1603	vmovdqa	%xmm4,-128(%ebp)
1604	vmovdqa	%xmm5,-112(%ebp)
1605	vmovdqa	%xmm6,-96(%ebp)
1606	vmovdqa	%xmm7,-80(%ebp)
1607	leal	128(%esi),%esi
1608	leal	128(%edi),%edi
1609	jmp	.L015outer_loop
1610.align	32
1611.L015outer_loop:
1612	vmovdqa	-112(%ebp),%xmm1
1613	vmovdqa	-96(%ebp),%xmm2
1614	vmovdqa	-80(%ebp),%xmm3
1615	vmovdqa	-48(%ebp),%xmm5
1616	vmovdqa	-32(%ebp),%xmm6
1617	vmovdqa	-16(%ebp),%xmm7
1618	vmovdqa	%xmm1,-112(%ebx)
1619	vmovdqa	%xmm2,-96(%ebx)
1620	vmovdqa	%xmm3,-80(%ebx)
1621	vmovdqa	%xmm5,-48(%ebx)
1622	vmovdqa	%xmm6,-32(%ebx)
1623	vmovdqa	%xmm7,-16(%ebx)
1624	vmovdqa	32(%ebp),%xmm2
1625	vmovdqa	48(%ebp),%xmm3
1626	vmovdqa	64(%ebp),%xmm4
1627	vmovdqa	80(%ebp),%xmm5
1628	vmovdqa	96(%ebp),%xmm6
1629	vmovdqa	112(%ebp),%xmm7
1630	vpaddd	64(%eax),%xmm4,%xmm4
1631	vmovdqa	%xmm2,32(%ebx)
1632	vmovdqa	%xmm3,48(%ebx)
1633	vmovdqa	%xmm4,64(%ebx)
1634	vmovdqa	%xmm5,80(%ebx)
1635	vmovdqa	%xmm6,96(%ebx)
1636	vmovdqa	%xmm7,112(%ebx)
1637	vmovdqa	%xmm4,64(%ebp)
1638	vmovdqa	-128(%ebp),%xmm0
1639	vmovdqa	%xmm4,%xmm6
1640	vmovdqa	-64(%ebp),%xmm3
1641	vmovdqa	(%ebp),%xmm4
1642	vmovdqa	16(%ebp),%xmm5
1643	movl	$10,%edx
1644	nop
1645.align	32
1646.L016loop:
1647	vpaddd	%xmm3,%xmm0,%xmm0
1648	vpxor	%xmm0,%xmm6,%xmm6
1649.byte	143,232,120,194,246,16
1650	vpaddd	%xmm6,%xmm4,%xmm4
1651	vpxor	%xmm4,%xmm3,%xmm2
1652	vmovdqa	-112(%ebx),%xmm1
1653.byte	143,232,120,194,210,12
1654	vmovdqa	-48(%ebx),%xmm3
1655	vpaddd	%xmm2,%xmm0,%xmm0
1656	vmovdqa	80(%ebx),%xmm7
1657	vpxor	%xmm0,%xmm6,%xmm6
1658	vpaddd	%xmm3,%xmm1,%xmm1
1659.byte	143,232,120,194,246,8
1660	vmovdqa	%xmm0,-128(%ebx)
1661	vpaddd	%xmm6,%xmm4,%xmm4
1662	vmovdqa	%xmm6,64(%ebx)
1663	vpxor	%xmm4,%xmm2,%xmm2
1664	vpxor	%xmm1,%xmm7,%xmm7
1665.byte	143,232,120,194,210,7
1666	vmovdqa	%xmm4,(%ebx)
1667.byte	143,232,120,194,255,16
1668	vmovdqa	%xmm2,-64(%ebx)
1669	vpaddd	%xmm7,%xmm5,%xmm5
1670	vmovdqa	32(%ebx),%xmm4
1671	vpxor	%xmm5,%xmm3,%xmm3
1672	vmovdqa	-96(%ebx),%xmm0
1673.byte	143,232,120,194,219,12
1674	vmovdqa	-32(%ebx),%xmm2
1675	vpaddd	%xmm3,%xmm1,%xmm1
1676	vmovdqa	96(%ebx),%xmm6
1677	vpxor	%xmm1,%xmm7,%xmm7
1678	vpaddd	%xmm2,%xmm0,%xmm0
1679.byte	143,232,120,194,255,8
1680	vmovdqa	%xmm1,-112(%ebx)
1681	vpaddd	%xmm7,%xmm5,%xmm5
1682	vmovdqa	%xmm7,80(%ebx)
1683	vpxor	%xmm5,%xmm3,%xmm3
1684	vpxor	%xmm0,%xmm6,%xmm6
1685.byte	143,232,120,194,219,7
1686	vmovdqa	%xmm5,16(%ebx)
1687.byte	143,232,120,194,246,16
1688	vmovdqa	%xmm3,-48(%ebx)
1689	vpaddd	%xmm6,%xmm4,%xmm4
1690	vmovdqa	48(%ebx),%xmm5
1691	vpxor	%xmm4,%xmm2,%xmm2
1692	vmovdqa	-80(%ebx),%xmm1
1693.byte	143,232,120,194,210,12
1694	vmovdqa	-16(%ebx),%xmm3
1695	vpaddd	%xmm2,%xmm0,%xmm0
1696	vmovdqa	112(%ebx),%xmm7
1697	vpxor	%xmm0,%xmm6,%xmm6
1698	vpaddd	%xmm3,%xmm1,%xmm1
1699.byte	143,232,120,194,246,8
1700	vmovdqa	%xmm0,-96(%ebx)
1701	vpaddd	%xmm6,%xmm4,%xmm4
1702	vmovdqa	%xmm6,96(%ebx)
1703	vpxor	%xmm4,%xmm2,%xmm2
1704	vpxor	%xmm1,%xmm7,%xmm7
1705.byte	143,232,120,194,210,7
1706.byte	143,232,120,194,255,16
1707	vmovdqa	%xmm2,-32(%ebx)
1708	vpaddd	%xmm7,%xmm5,%xmm5
1709	vpxor	%xmm5,%xmm3,%xmm3
1710	vmovdqa	-128(%ebx),%xmm0
1711.byte	143,232,120,194,219,12
1712	vmovdqa	-48(%ebx),%xmm2
1713	vpaddd	%xmm3,%xmm1,%xmm1
1714	vpxor	%xmm1,%xmm7,%xmm7
1715	vpaddd	%xmm2,%xmm0,%xmm0
1716.byte	143,232,120,194,255,8
1717	vmovdqa	%xmm1,-80(%ebx)
1718	vpaddd	%xmm7,%xmm5,%xmm5
1719	vpxor	%xmm5,%xmm3,%xmm3
1720	vpxor	%xmm0,%xmm7,%xmm6
1721.byte	143,232,120,194,219,7
1722.byte	143,232,120,194,246,16
1723	vmovdqa	%xmm3,-16(%ebx)
1724	vpaddd	%xmm6,%xmm4,%xmm4
1725	vpxor	%xmm4,%xmm2,%xmm2
1726	vmovdqa	-112(%ebx),%xmm1
1727.byte	143,232,120,194,210,12
1728	vmovdqa	-32(%ebx),%xmm3
1729	vpaddd	%xmm2,%xmm0,%xmm0
1730	vmovdqa	64(%ebx),%xmm7
1731	vpxor	%xmm0,%xmm6,%xmm6
1732	vpaddd	%xmm3,%xmm1,%xmm1
1733.byte	143,232,120,194,246,8
1734	vmovdqa	%xmm0,-128(%ebx)
1735	vpaddd	%xmm6,%xmm4,%xmm4
1736	vmovdqa	%xmm6,112(%ebx)
1737	vpxor	%xmm4,%xmm2,%xmm2
1738	vpxor	%xmm1,%xmm7,%xmm7
1739.byte	143,232,120,194,210,7
1740	vmovdqa	%xmm4,32(%ebx)
1741.byte	143,232,120,194,255,16
1742	vmovdqa	%xmm2,-48(%ebx)
1743	vpaddd	%xmm7,%xmm5,%xmm5
1744	vmovdqa	(%ebx),%xmm4
1745	vpxor	%xmm5,%xmm3,%xmm3
1746	vmovdqa	-96(%ebx),%xmm0
1747.byte	143,232,120,194,219,12
1748	vmovdqa	-16(%ebx),%xmm2
1749	vpaddd	%xmm3,%xmm1,%xmm1
1750	vmovdqa	80(%ebx),%xmm6
1751	vpxor	%xmm1,%xmm7,%xmm7
1752	vpaddd	%xmm2,%xmm0,%xmm0
1753.byte	143,232,120,194,255,8
1754	vmovdqa	%xmm1,-112(%ebx)
1755	vpaddd	%xmm7,%xmm5,%xmm5
1756	vmovdqa	%xmm7,64(%ebx)
1757	vpxor	%xmm5,%xmm3,%xmm3
1758	vpxor	%xmm0,%xmm6,%xmm6
1759.byte	143,232,120,194,219,7
1760	vmovdqa	%xmm5,48(%ebx)
1761.byte	143,232,120,194,246,16
1762	vmovdqa	%xmm3,-32(%ebx)
1763	vpaddd	%xmm6,%xmm4,%xmm4
1764	vmovdqa	16(%ebx),%xmm5
1765	vpxor	%xmm4,%xmm2,%xmm2
1766	vmovdqa	-80(%ebx),%xmm1
1767.byte	143,232,120,194,210,12
1768	vmovdqa	-64(%ebx),%xmm3
1769	vpaddd	%xmm2,%xmm0,%xmm0
1770	vmovdqa	96(%ebx),%xmm7
1771	vpxor	%xmm0,%xmm6,%xmm6
1772	vpaddd	%xmm3,%xmm1,%xmm1
1773.byte	143,232,120,194,246,8
1774	vmovdqa	%xmm0,-96(%ebx)
1775	vpaddd	%xmm6,%xmm4,%xmm4
1776	vmovdqa	%xmm6,80(%ebx)
1777	vpxor	%xmm4,%xmm2,%xmm2
1778	vpxor	%xmm1,%xmm7,%xmm7
1779.byte	143,232,120,194,210,7
1780.byte	143,232,120,194,255,16
1781	vmovdqa	%xmm2,-16(%ebx)
1782	vpaddd	%xmm7,%xmm5,%xmm5
1783	vpxor	%xmm5,%xmm3,%xmm3
1784	vmovdqa	-128(%ebx),%xmm0
1785.byte	143,232,120,194,219,12
1786	vpaddd	%xmm3,%xmm1,%xmm1
1787	vmovdqa	64(%ebx),%xmm6
1788	vpxor	%xmm1,%xmm7,%xmm7
1789.byte	143,232,120,194,255,8
1790	vmovdqa	%xmm1,-80(%ebx)
1791	vpaddd	%xmm7,%xmm5,%xmm5
1792	vmovdqa	%xmm7,96(%ebx)
1793	vpxor	%xmm5,%xmm3,%xmm3
1794.byte	143,232,120,194,219,7
1795	decl	%edx
1796	jnz	.L016loop
1797	vmovdqa	%xmm3,-64(%ebx)
1798	vmovdqa	%xmm4,(%ebx)
1799	vmovdqa	%xmm5,16(%ebx)
1800	vmovdqa	%xmm6,64(%ebx)
1801	vmovdqa	%xmm7,96(%ebx)
1802	vmovdqa	-112(%ebx),%xmm1
1803	vmovdqa	-96(%ebx),%xmm2
1804	vmovdqa	-80(%ebx),%xmm3
1805	vpaddd	-128(%ebp),%xmm0,%xmm0
1806	vpaddd	-112(%ebp),%xmm1,%xmm1
1807	vpaddd	-96(%ebp),%xmm2,%xmm2
1808	vpaddd	-80(%ebp),%xmm3,%xmm3
1809	vpunpckldq	%xmm1,%xmm0,%xmm6
1810	vpunpckldq	%xmm3,%xmm2,%xmm7
1811	vpunpckhdq	%xmm1,%xmm0,%xmm0
1812	vpunpckhdq	%xmm3,%xmm2,%xmm2
1813	vpunpcklqdq	%xmm7,%xmm6,%xmm1
1814	vpunpckhqdq	%xmm7,%xmm6,%xmm6
1815	vpunpcklqdq	%xmm2,%xmm0,%xmm7
1816	vpunpckhqdq	%xmm2,%xmm0,%xmm3
1817	vpxor	-128(%esi),%xmm1,%xmm4
1818	vpxor	-64(%esi),%xmm6,%xmm5
1819	vpxor	(%esi),%xmm7,%xmm6
1820	vpxor	64(%esi),%xmm3,%xmm7
1821	leal	16(%esi),%esi
1822	vmovdqa	-64(%ebx),%xmm0
1823	vmovdqa	-48(%ebx),%xmm1
1824	vmovdqa	-32(%ebx),%xmm2
1825	vmovdqa	-16(%ebx),%xmm3
1826	vmovdqu	%xmm4,-128(%edi)
1827	vmovdqu	%xmm5,-64(%edi)
1828	vmovdqu	%xmm6,(%edi)
1829	vmovdqu	%xmm7,64(%edi)
1830	leal	16(%edi),%edi
1831	vpaddd	-64(%ebp),%xmm0,%xmm0
1832	vpaddd	-48(%ebp),%xmm1,%xmm1
1833	vpaddd	-32(%ebp),%xmm2,%xmm2
1834	vpaddd	-16(%ebp),%xmm3,%xmm3
1835	vpunpckldq	%xmm1,%xmm0,%xmm6
1836	vpunpckldq	%xmm3,%xmm2,%xmm7
1837	vpunpckhdq	%xmm1,%xmm0,%xmm0
1838	vpunpckhdq	%xmm3,%xmm2,%xmm2
1839	vpunpcklqdq	%xmm7,%xmm6,%xmm1
1840	vpunpckhqdq	%xmm7,%xmm6,%xmm6
1841	vpunpcklqdq	%xmm2,%xmm0,%xmm7
1842	vpunpckhqdq	%xmm2,%xmm0,%xmm3
1843	vpxor	-128(%esi),%xmm1,%xmm4
1844	vpxor	-64(%esi),%xmm6,%xmm5
1845	vpxor	(%esi),%xmm7,%xmm6
1846	vpxor	64(%esi),%xmm3,%xmm7
1847	leal	16(%esi),%esi
1848	vmovdqa	(%ebx),%xmm0
1849	vmovdqa	16(%ebx),%xmm1
1850	vmovdqa	32(%ebx),%xmm2
1851	vmovdqa	48(%ebx),%xmm3
1852	vmovdqu	%xmm4,-128(%edi)
1853	vmovdqu	%xmm5,-64(%edi)
1854	vmovdqu	%xmm6,(%edi)
1855	vmovdqu	%xmm7,64(%edi)
1856	leal	16(%edi),%edi
1857	vpaddd	(%ebp),%xmm0,%xmm0
1858	vpaddd	16(%ebp),%xmm1,%xmm1
1859	vpaddd	32(%ebp),%xmm2,%xmm2
1860	vpaddd	48(%ebp),%xmm3,%xmm3
1861	vpunpckldq	%xmm1,%xmm0,%xmm6
1862	vpunpckldq	%xmm3,%xmm2,%xmm7
1863	vpunpckhdq	%xmm1,%xmm0,%xmm0
1864	vpunpckhdq	%xmm3,%xmm2,%xmm2
1865	vpunpcklqdq	%xmm7,%xmm6,%xmm1
1866	vpunpckhqdq	%xmm7,%xmm6,%xmm6
1867	vpunpcklqdq	%xmm2,%xmm0,%xmm7
1868	vpunpckhqdq	%xmm2,%xmm0,%xmm3
1869	vpxor	-128(%esi),%xmm1,%xmm4
1870	vpxor	-64(%esi),%xmm6,%xmm5
1871	vpxor	(%esi),%xmm7,%xmm6
1872	vpxor	64(%esi),%xmm3,%xmm7
1873	leal	16(%esi),%esi
1874	vmovdqa	64(%ebx),%xmm0
1875	vmovdqa	80(%ebx),%xmm1
1876	vmovdqa	96(%ebx),%xmm2
1877	vmovdqa	112(%ebx),%xmm3
1878	vmovdqu	%xmm4,-128(%edi)
1879	vmovdqu	%xmm5,-64(%edi)
1880	vmovdqu	%xmm6,(%edi)
1881	vmovdqu	%xmm7,64(%edi)
1882	leal	16(%edi),%edi
1883	vpaddd	64(%ebp),%xmm0,%xmm0
1884	vpaddd	80(%ebp),%xmm1,%xmm1
1885	vpaddd	96(%ebp),%xmm2,%xmm2
1886	vpaddd	112(%ebp),%xmm3,%xmm3
1887	vpunpckldq	%xmm1,%xmm0,%xmm6
1888	vpunpckldq	%xmm3,%xmm2,%xmm7
1889	vpunpckhdq	%xmm1,%xmm0,%xmm0
1890	vpunpckhdq	%xmm3,%xmm2,%xmm2
1891	vpunpcklqdq	%xmm7,%xmm6,%xmm1
1892	vpunpckhqdq	%xmm7,%xmm6,%xmm6
1893	vpunpcklqdq	%xmm2,%xmm0,%xmm7
1894	vpunpckhqdq	%xmm2,%xmm0,%xmm3
1895	vpxor	-128(%esi),%xmm1,%xmm4
1896	vpxor	-64(%esi),%xmm6,%xmm5
1897	vpxor	(%esi),%xmm7,%xmm6
1898	vpxor	64(%esi),%xmm3,%xmm7
1899	leal	208(%esi),%esi
1900	vmovdqu	%xmm4,-128(%edi)
1901	vmovdqu	%xmm5,-64(%edi)
1902	vmovdqu	%xmm6,(%edi)
1903	vmovdqu	%xmm7,64(%edi)
1904	leal	208(%edi),%edi
1905	subl	$256,%ecx
1906	jnc	.L015outer_loop
1907	addl	$256,%ecx
1908	jz	.L017done
1909	movl	520(%esp),%ebx
1910	leal	-128(%esi),%esi
1911	movl	516(%esp),%edx
1912	leal	-128(%edi),%edi
1913	vmovd	64(%ebp),%xmm2
1914	vmovdqu	(%ebx),%xmm3
1915	vpaddd	96(%eax),%xmm2,%xmm2
1916	vpand	112(%eax),%xmm3,%xmm3
1917	vpor	%xmm2,%xmm3,%xmm3
1918.L0141x:
1919	vmovdqa	32(%eax),%xmm0
1920	vmovdqu	(%edx),%xmm1
1921	vmovdqu	16(%edx),%xmm2
1922	vmovdqa	(%eax),%xmm6
1923	vmovdqa	16(%eax),%xmm7
1924	movl	%ebp,48(%esp)
1925	vmovdqa	%xmm0,(%esp)
1926	vmovdqa	%xmm1,16(%esp)
1927	vmovdqa	%xmm2,32(%esp)
1928	vmovdqa	%xmm3,48(%esp)
1929	movl	$10,%edx
1930	jmp	.L018loop1x
1931.align	16
1932.L019outer1x:
1933	vmovdqa	80(%eax),%xmm3
1934	vmovdqa	(%esp),%xmm0
1935	vmovdqa	16(%esp),%xmm1
1936	vmovdqa	32(%esp),%xmm2
1937	vpaddd	48(%esp),%xmm3,%xmm3
1938	movl	$10,%edx
1939	vmovdqa	%xmm3,48(%esp)
1940	jmp	.L018loop1x
1941.align	16
1942.L018loop1x:
1943	vpaddd	%xmm1,%xmm0,%xmm0
1944	vpxor	%xmm0,%xmm3,%xmm3
1945.byte	143,232,120,194,219,16
1946	vpaddd	%xmm3,%xmm2,%xmm2
1947	vpxor	%xmm2,%xmm1,%xmm1
1948.byte	143,232,120,194,201,12
1949	vpaddd	%xmm1,%xmm0,%xmm0
1950	vpxor	%xmm0,%xmm3,%xmm3
1951.byte	143,232,120,194,219,8
1952	vpaddd	%xmm3,%xmm2,%xmm2
1953	vpxor	%xmm2,%xmm1,%xmm1
1954.byte	143,232,120,194,201,7
1955	vpshufd	$78,%xmm2,%xmm2
1956	vpshufd	$57,%xmm1,%xmm1
1957	vpshufd	$147,%xmm3,%xmm3
1958	vpaddd	%xmm1,%xmm0,%xmm0
1959	vpxor	%xmm0,%xmm3,%xmm3
1960.byte	143,232,120,194,219,16
1961	vpaddd	%xmm3,%xmm2,%xmm2
1962	vpxor	%xmm2,%xmm1,%xmm1
1963.byte	143,232,120,194,201,12
1964	vpaddd	%xmm1,%xmm0,%xmm0
1965	vpxor	%xmm0,%xmm3,%xmm3
1966.byte	143,232,120,194,219,8
1967	vpaddd	%xmm3,%xmm2,%xmm2
1968	vpxor	%xmm2,%xmm1,%xmm1
1969.byte	143,232,120,194,201,7
1970	vpshufd	$78,%xmm2,%xmm2
1971	vpshufd	$147,%xmm1,%xmm1
1972	vpshufd	$57,%xmm3,%xmm3
1973	decl	%edx
1974	jnz	.L018loop1x
1975	vpaddd	(%esp),%xmm0,%xmm0
1976	vpaddd	16(%esp),%xmm1,%xmm1
1977	vpaddd	32(%esp),%xmm2,%xmm2
1978	vpaddd	48(%esp),%xmm3,%xmm3
1979	cmpl	$64,%ecx
1980	jb	.L020tail
1981	vpxor	(%esi),%xmm0,%xmm0
1982	vpxor	16(%esi),%xmm1,%xmm1
1983	vpxor	32(%esi),%xmm2,%xmm2
1984	vpxor	48(%esi),%xmm3,%xmm3
1985	leal	64(%esi),%esi
1986	vmovdqu	%xmm0,(%edi)
1987	vmovdqu	%xmm1,16(%edi)
1988	vmovdqu	%xmm2,32(%edi)
1989	vmovdqu	%xmm3,48(%edi)
1990	leal	64(%edi),%edi
1991	subl	$64,%ecx
1992	jnz	.L019outer1x
1993	jmp	.L017done
1994.L020tail:
1995	vmovdqa	%xmm0,(%esp)
1996	vmovdqa	%xmm1,16(%esp)
1997	vmovdqa	%xmm2,32(%esp)
1998	vmovdqa	%xmm3,48(%esp)
1999	xorl	%eax,%eax
2000	xorl	%edx,%edx
2001	xorl	%ebp,%ebp
2002.L021tail_loop:
2003	movb	(%esp,%ebp,1),%al
2004	movb	(%esi,%ebp,1),%dl
2005	leal	1(%ebp),%ebp
2006	xorb	%dl,%al
2007	movb	%al,-1(%edi,%ebp,1)
2008	decl	%ecx
2009	jnz	.L021tail_loop
2010.L017done:
2011	vzeroupper
2012	movl	512(%esp),%esp
2013	popl	%edi
2014	popl	%esi
2015	popl	%ebx
2016	popl	%ebp
2017	ret
2018.size	ChaCha20_xop,.-.L_ChaCha20_xop_begin
2019.comm	OPENSSL_ia32cap_P,16,4
2020#endif
2021