xref: /freebsd/sys/crypto/openssl/i386/chacha-x86.S (revision 1fa4ddcc6de6a0c46416f719a5b7efa1169f51ce)
1/* Do not modify. This file is auto-generated from chacha-x86.pl. */
2#ifdef PIC
3.text
4.globl	ChaCha20_ctr32
5.type	ChaCha20_ctr32,@function
6.align	16
7ChaCha20_ctr32:
8.L_ChaCha20_ctr32_begin:
9	#ifdef __CET__
10
11.byte	243,15,30,251
12	#endif
13
14	pushl	%ebp
15	pushl	%ebx
16	pushl	%esi
17	pushl	%edi
18	xorl	%eax,%eax
19	cmpl	28(%esp),%eax
20	je	.L000no_data
21	call	.Lpic_point
22.Lpic_point:
23	popl	%eax
24	leal	OPENSSL_ia32cap_P-.Lpic_point(%eax),%ebp
25	testl	$16777216,(%ebp)
26	jz	.L001x86
27	testl	$512,4(%ebp)
28	jz	.L001x86
29	jmp	.Lssse3_shortcut
30.L001x86:
31	movl	32(%esp),%esi
32	movl	36(%esp),%edi
33	subl	$132,%esp
34	movl	(%esi),%eax
35	movl	4(%esi),%ebx
36	movl	8(%esi),%ecx
37	movl	12(%esi),%edx
38	movl	%eax,80(%esp)
39	movl	%ebx,84(%esp)
40	movl	%ecx,88(%esp)
41	movl	%edx,92(%esp)
42	movl	16(%esi),%eax
43	movl	20(%esi),%ebx
44	movl	24(%esi),%ecx
45	movl	28(%esi),%edx
46	movl	%eax,96(%esp)
47	movl	%ebx,100(%esp)
48	movl	%ecx,104(%esp)
49	movl	%edx,108(%esp)
50	movl	(%edi),%eax
51	movl	4(%edi),%ebx
52	movl	8(%edi),%ecx
53	movl	12(%edi),%edx
54	subl	$1,%eax
55	movl	%eax,112(%esp)
56	movl	%ebx,116(%esp)
57	movl	%ecx,120(%esp)
58	movl	%edx,124(%esp)
59	jmp	.L002entry
60.align	16
61.L003outer_loop:
62	movl	%ebx,156(%esp)
63	movl	%eax,152(%esp)
64	movl	%ecx,160(%esp)
65.L002entry:
66	movl	$1634760805,%eax
67	movl	$857760878,4(%esp)
68	movl	$2036477234,8(%esp)
69	movl	$1797285236,12(%esp)
70	movl	84(%esp),%ebx
71	movl	88(%esp),%ebp
72	movl	104(%esp),%ecx
73	movl	108(%esp),%esi
74	movl	116(%esp),%edx
75	movl	120(%esp),%edi
76	movl	%ebx,20(%esp)
77	movl	%ebp,24(%esp)
78	movl	%ecx,40(%esp)
79	movl	%esi,44(%esp)
80	movl	%edx,52(%esp)
81	movl	%edi,56(%esp)
82	movl	92(%esp),%ebx
83	movl	124(%esp),%edi
84	movl	112(%esp),%edx
85	movl	80(%esp),%ebp
86	movl	96(%esp),%ecx
87	movl	100(%esp),%esi
88	addl	$1,%edx
89	movl	%ebx,28(%esp)
90	movl	%edi,60(%esp)
91	movl	%edx,112(%esp)
92	movl	$10,%ebx
93	jmp	.L004loop
94.align	16
95.L004loop:
96	addl	%ebp,%eax
97	movl	%ebx,128(%esp)
98	movl	%ebp,%ebx
99	xorl	%eax,%edx
100	roll	$16,%edx
101	addl	%edx,%ecx
102	xorl	%ecx,%ebx
103	movl	52(%esp),%edi
104	roll	$12,%ebx
105	movl	20(%esp),%ebp
106	addl	%ebx,%eax
107	xorl	%eax,%edx
108	movl	%eax,(%esp)
109	roll	$8,%edx
110	movl	4(%esp),%eax
111	addl	%edx,%ecx
112	movl	%edx,48(%esp)
113	xorl	%ecx,%ebx
114	addl	%ebp,%eax
115	roll	$7,%ebx
116	xorl	%eax,%edi
117	movl	%ecx,32(%esp)
118	roll	$16,%edi
119	movl	%ebx,16(%esp)
120	addl	%edi,%esi
121	movl	40(%esp),%ecx
122	xorl	%esi,%ebp
123	movl	56(%esp),%edx
124	roll	$12,%ebp
125	movl	24(%esp),%ebx
126	addl	%ebp,%eax
127	xorl	%eax,%edi
128	movl	%eax,4(%esp)
129	roll	$8,%edi
130	movl	8(%esp),%eax
131	addl	%edi,%esi
132	movl	%edi,52(%esp)
133	xorl	%esi,%ebp
134	addl	%ebx,%eax
135	roll	$7,%ebp
136	xorl	%eax,%edx
137	movl	%esi,36(%esp)
138	roll	$16,%edx
139	movl	%ebp,20(%esp)
140	addl	%edx,%ecx
141	movl	44(%esp),%esi
142	xorl	%ecx,%ebx
143	movl	60(%esp),%edi
144	roll	$12,%ebx
145	movl	28(%esp),%ebp
146	addl	%ebx,%eax
147	xorl	%eax,%edx
148	movl	%eax,8(%esp)
149	roll	$8,%edx
150	movl	12(%esp),%eax
151	addl	%edx,%ecx
152	movl	%edx,56(%esp)
153	xorl	%ecx,%ebx
154	addl	%ebp,%eax
155	roll	$7,%ebx
156	xorl	%eax,%edi
157	roll	$16,%edi
158	movl	%ebx,24(%esp)
159	addl	%edi,%esi
160	xorl	%esi,%ebp
161	roll	$12,%ebp
162	movl	20(%esp),%ebx
163	addl	%ebp,%eax
164	xorl	%eax,%edi
165	movl	%eax,12(%esp)
166	roll	$8,%edi
167	movl	(%esp),%eax
168	addl	%edi,%esi
169	movl	%edi,%edx
170	xorl	%esi,%ebp
171	addl	%ebx,%eax
172	roll	$7,%ebp
173	xorl	%eax,%edx
174	roll	$16,%edx
175	movl	%ebp,28(%esp)
176	addl	%edx,%ecx
177	xorl	%ecx,%ebx
178	movl	48(%esp),%edi
179	roll	$12,%ebx
180	movl	24(%esp),%ebp
181	addl	%ebx,%eax
182	xorl	%eax,%edx
183	movl	%eax,(%esp)
184	roll	$8,%edx
185	movl	4(%esp),%eax
186	addl	%edx,%ecx
187	movl	%edx,60(%esp)
188	xorl	%ecx,%ebx
189	addl	%ebp,%eax
190	roll	$7,%ebx
191	xorl	%eax,%edi
192	movl	%ecx,40(%esp)
193	roll	$16,%edi
194	movl	%ebx,20(%esp)
195	addl	%edi,%esi
196	movl	32(%esp),%ecx
197	xorl	%esi,%ebp
198	movl	52(%esp),%edx
199	roll	$12,%ebp
200	movl	28(%esp),%ebx
201	addl	%ebp,%eax
202	xorl	%eax,%edi
203	movl	%eax,4(%esp)
204	roll	$8,%edi
205	movl	8(%esp),%eax
206	addl	%edi,%esi
207	movl	%edi,48(%esp)
208	xorl	%esi,%ebp
209	addl	%ebx,%eax
210	roll	$7,%ebp
211	xorl	%eax,%edx
212	movl	%esi,44(%esp)
213	roll	$16,%edx
214	movl	%ebp,24(%esp)
215	addl	%edx,%ecx
216	movl	36(%esp),%esi
217	xorl	%ecx,%ebx
218	movl	56(%esp),%edi
219	roll	$12,%ebx
220	movl	16(%esp),%ebp
221	addl	%ebx,%eax
222	xorl	%eax,%edx
223	movl	%eax,8(%esp)
224	roll	$8,%edx
225	movl	12(%esp),%eax
226	addl	%edx,%ecx
227	movl	%edx,52(%esp)
228	xorl	%ecx,%ebx
229	addl	%ebp,%eax
230	roll	$7,%ebx
231	xorl	%eax,%edi
232	roll	$16,%edi
233	movl	%ebx,28(%esp)
234	addl	%edi,%esi
235	xorl	%esi,%ebp
236	movl	48(%esp),%edx
237	roll	$12,%ebp
238	movl	128(%esp),%ebx
239	addl	%ebp,%eax
240	xorl	%eax,%edi
241	movl	%eax,12(%esp)
242	roll	$8,%edi
243	movl	(%esp),%eax
244	addl	%edi,%esi
245	movl	%edi,56(%esp)
246	xorl	%esi,%ebp
247	roll	$7,%ebp
248	decl	%ebx
249	jnz	.L004loop
250	movl	160(%esp),%ebx
251	addl	$1634760805,%eax
252	addl	80(%esp),%ebp
253	addl	96(%esp),%ecx
254	addl	100(%esp),%esi
255	cmpl	$64,%ebx
256	jb	.L005tail
257	movl	156(%esp),%ebx
258	addl	112(%esp),%edx
259	addl	120(%esp),%edi
260	xorl	(%ebx),%eax
261	xorl	16(%ebx),%ebp
262	movl	%eax,(%esp)
263	movl	152(%esp),%eax
264	xorl	32(%ebx),%ecx
265	xorl	36(%ebx),%esi
266	xorl	48(%ebx),%edx
267	xorl	56(%ebx),%edi
268	movl	%ebp,16(%eax)
269	movl	%ecx,32(%eax)
270	movl	%esi,36(%eax)
271	movl	%edx,48(%eax)
272	movl	%edi,56(%eax)
273	movl	4(%esp),%ebp
274	movl	8(%esp),%ecx
275	movl	12(%esp),%esi
276	movl	20(%esp),%edx
277	movl	24(%esp),%edi
278	addl	$857760878,%ebp
279	addl	$2036477234,%ecx
280	addl	$1797285236,%esi
281	addl	84(%esp),%edx
282	addl	88(%esp),%edi
283	xorl	4(%ebx),%ebp
284	xorl	8(%ebx),%ecx
285	xorl	12(%ebx),%esi
286	xorl	20(%ebx),%edx
287	xorl	24(%ebx),%edi
288	movl	%ebp,4(%eax)
289	movl	%ecx,8(%eax)
290	movl	%esi,12(%eax)
291	movl	%edx,20(%eax)
292	movl	%edi,24(%eax)
293	movl	28(%esp),%ebp
294	movl	40(%esp),%ecx
295	movl	44(%esp),%esi
296	movl	52(%esp),%edx
297	movl	60(%esp),%edi
298	addl	92(%esp),%ebp
299	addl	104(%esp),%ecx
300	addl	108(%esp),%esi
301	addl	116(%esp),%edx
302	addl	124(%esp),%edi
303	xorl	28(%ebx),%ebp
304	xorl	40(%ebx),%ecx
305	xorl	44(%ebx),%esi
306	xorl	52(%ebx),%edx
307	xorl	60(%ebx),%edi
308	leal	64(%ebx),%ebx
309	movl	%ebp,28(%eax)
310	movl	(%esp),%ebp
311	movl	%ecx,40(%eax)
312	movl	160(%esp),%ecx
313	movl	%esi,44(%eax)
314	movl	%edx,52(%eax)
315	movl	%edi,60(%eax)
316	movl	%ebp,(%eax)
317	leal	64(%eax),%eax
318	subl	$64,%ecx
319	jnz	.L003outer_loop
320	jmp	.L006done
321.L005tail:
322	addl	112(%esp),%edx
323	addl	120(%esp),%edi
324	movl	%eax,(%esp)
325	movl	%ebp,16(%esp)
326	movl	%ecx,32(%esp)
327	movl	%esi,36(%esp)
328	movl	%edx,48(%esp)
329	movl	%edi,56(%esp)
330	movl	4(%esp),%ebp
331	movl	8(%esp),%ecx
332	movl	12(%esp),%esi
333	movl	20(%esp),%edx
334	movl	24(%esp),%edi
335	addl	$857760878,%ebp
336	addl	$2036477234,%ecx
337	addl	$1797285236,%esi
338	addl	84(%esp),%edx
339	addl	88(%esp),%edi
340	movl	%ebp,4(%esp)
341	movl	%ecx,8(%esp)
342	movl	%esi,12(%esp)
343	movl	%edx,20(%esp)
344	movl	%edi,24(%esp)
345	movl	28(%esp),%ebp
346	movl	40(%esp),%ecx
347	movl	44(%esp),%esi
348	movl	52(%esp),%edx
349	movl	60(%esp),%edi
350	addl	92(%esp),%ebp
351	addl	104(%esp),%ecx
352	addl	108(%esp),%esi
353	addl	116(%esp),%edx
354	addl	124(%esp),%edi
355	movl	%ebp,28(%esp)
356	movl	156(%esp),%ebp
357	movl	%ecx,40(%esp)
358	movl	152(%esp),%ecx
359	movl	%esi,44(%esp)
360	xorl	%esi,%esi
361	movl	%edx,52(%esp)
362	movl	%edi,60(%esp)
363	xorl	%eax,%eax
364	xorl	%edx,%edx
365.L007tail_loop:
366	movb	(%esi,%ebp,1),%al
367	movb	(%esp,%esi,1),%dl
368	leal	1(%esi),%esi
369	xorb	%dl,%al
370	movb	%al,-1(%ecx,%esi,1)
371	decl	%ebx
372	jnz	.L007tail_loop
373.L006done:
374	addl	$132,%esp
375.L000no_data:
376	popl	%edi
377	popl	%esi
378	popl	%ebx
379	popl	%ebp
380	ret
381.size	ChaCha20_ctr32,.-.L_ChaCha20_ctr32_begin
382.globl	ChaCha20_ssse3
383.type	ChaCha20_ssse3,@function
384.align	16
385ChaCha20_ssse3:
386.L_ChaCha20_ssse3_begin:
387	#ifdef __CET__
388
389.byte	243,15,30,251
390	#endif
391
392	pushl	%ebp
393	pushl	%ebx
394	pushl	%esi
395	pushl	%edi
396.Lssse3_shortcut:
397	testl	$2048,4(%ebp)
398	jnz	.Lxop_shortcut
399	movl	20(%esp),%edi
400	movl	24(%esp),%esi
401	movl	28(%esp),%ecx
402	movl	32(%esp),%edx
403	movl	36(%esp),%ebx
404	movl	%esp,%ebp
405	subl	$524,%esp
406	andl	$-64,%esp
407	movl	%ebp,512(%esp)
408	leal	.Lssse3_data-.Lpic_point(%eax),%eax
409	movdqu	(%ebx),%xmm3
410.L0081x:
411	movdqa	32(%eax),%xmm0
412	movdqu	(%edx),%xmm1
413	movdqu	16(%edx),%xmm2
414	movdqa	(%eax),%xmm6
415	movdqa	16(%eax),%xmm7
416	movl	%ebp,48(%esp)
417	movdqa	%xmm0,(%esp)
418	movdqa	%xmm1,16(%esp)
419	movdqa	%xmm2,32(%esp)
420	movdqa	%xmm3,48(%esp)
421	movl	$10,%edx
422	jmp	.L009loop1x
423.align	16
424.L010outer1x:
425	movdqa	80(%eax),%xmm3
426	movdqa	(%esp),%xmm0
427	movdqa	16(%esp),%xmm1
428	movdqa	32(%esp),%xmm2
429	paddd	48(%esp),%xmm3
430	movl	$10,%edx
431	movdqa	%xmm3,48(%esp)
432	jmp	.L009loop1x
433.align	16
434.L009loop1x:
435	paddd	%xmm1,%xmm0
436	pxor	%xmm0,%xmm3
437.byte	102,15,56,0,222
438	paddd	%xmm3,%xmm2
439	pxor	%xmm2,%xmm1
440	movdqa	%xmm1,%xmm4
441	psrld	$20,%xmm1
442	pslld	$12,%xmm4
443	por	%xmm4,%xmm1
444	paddd	%xmm1,%xmm0
445	pxor	%xmm0,%xmm3
446.byte	102,15,56,0,223
447	paddd	%xmm3,%xmm2
448	pxor	%xmm2,%xmm1
449	movdqa	%xmm1,%xmm4
450	psrld	$25,%xmm1
451	pslld	$7,%xmm4
452	por	%xmm4,%xmm1
453	pshufd	$78,%xmm2,%xmm2
454	pshufd	$57,%xmm1,%xmm1
455	pshufd	$147,%xmm3,%xmm3
456	nop
457	paddd	%xmm1,%xmm0
458	pxor	%xmm0,%xmm3
459.byte	102,15,56,0,222
460	paddd	%xmm3,%xmm2
461	pxor	%xmm2,%xmm1
462	movdqa	%xmm1,%xmm4
463	psrld	$20,%xmm1
464	pslld	$12,%xmm4
465	por	%xmm4,%xmm1
466	paddd	%xmm1,%xmm0
467	pxor	%xmm0,%xmm3
468.byte	102,15,56,0,223
469	paddd	%xmm3,%xmm2
470	pxor	%xmm2,%xmm1
471	movdqa	%xmm1,%xmm4
472	psrld	$25,%xmm1
473	pslld	$7,%xmm4
474	por	%xmm4,%xmm1
475	pshufd	$78,%xmm2,%xmm2
476	pshufd	$147,%xmm1,%xmm1
477	pshufd	$57,%xmm3,%xmm3
478	decl	%edx
479	jnz	.L009loop1x
480	paddd	(%esp),%xmm0
481	paddd	16(%esp),%xmm1
482	paddd	32(%esp),%xmm2
483	paddd	48(%esp),%xmm3
484	cmpl	$64,%ecx
485	jb	.L011tail
486	movdqu	(%esi),%xmm4
487	movdqu	16(%esi),%xmm5
488	pxor	%xmm4,%xmm0
489	movdqu	32(%esi),%xmm4
490	pxor	%xmm5,%xmm1
491	movdqu	48(%esi),%xmm5
492	pxor	%xmm4,%xmm2
493	pxor	%xmm5,%xmm3
494	leal	64(%esi),%esi
495	movdqu	%xmm0,(%edi)
496	movdqu	%xmm1,16(%edi)
497	movdqu	%xmm2,32(%edi)
498	movdqu	%xmm3,48(%edi)
499	leal	64(%edi),%edi
500	subl	$64,%ecx
501	jnz	.L010outer1x
502	jmp	.L012done
503.L011tail:
504	movdqa	%xmm0,(%esp)
505	movdqa	%xmm1,16(%esp)
506	movdqa	%xmm2,32(%esp)
507	movdqa	%xmm3,48(%esp)
508	xorl	%eax,%eax
509	xorl	%edx,%edx
510	xorl	%ebp,%ebp
511.L013tail_loop:
512	movb	(%esp,%ebp,1),%al
513	movb	(%esi,%ebp,1),%dl
514	leal	1(%ebp),%ebp
515	xorb	%dl,%al
516	movb	%al,-1(%edi,%ebp,1)
517	decl	%ecx
518	jnz	.L013tail_loop
519.L012done:
520	movl	512(%esp),%esp
521	popl	%edi
522	popl	%esi
523	popl	%ebx
524	popl	%ebp
525	ret
526.size	ChaCha20_ssse3,.-.L_ChaCha20_ssse3_begin
527.align	64
528.Lssse3_data:
529.byte	2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13
530.byte	3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14
531.long	1634760805,857760878,2036477234,1797285236
532.long	0,1,2,3
533.long	4,4,4,4
534.long	1,0,0,0
535.long	4,0,0,0
536.long	0,-1,-1,-1
537.align	64
538.byte	67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54
539.byte	44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
540.byte	60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
541.byte	114,103,62,0
542.globl	ChaCha20_xop
543.type	ChaCha20_xop,@function
544.align	16
545ChaCha20_xop:
546.L_ChaCha20_xop_begin:
547	#ifdef __CET__
548
549.byte	243,15,30,251
550	#endif
551
552	pushl	%ebp
553	pushl	%ebx
554	pushl	%esi
555	pushl	%edi
556.Lxop_shortcut:
557	movl	20(%esp),%edi
558	movl	24(%esp),%esi
559	movl	28(%esp),%ecx
560	movl	32(%esp),%edx
561	movl	36(%esp),%ebx
562	vzeroupper
563	movl	%esp,%ebp
564	subl	$524,%esp
565	andl	$-64,%esp
566	movl	%ebp,512(%esp)
567	leal	.Lssse3_data-.Lpic_point(%eax),%eax
568	vmovdqu	(%ebx),%xmm3
569	cmpl	$256,%ecx
570	jb	.L0141x
571	movl	%edx,516(%esp)
572	movl	%ebx,520(%esp)
573	subl	$256,%ecx
574	leal	384(%esp),%ebp
575	vmovdqu	(%edx),%xmm7
576	vpshufd	$0,%xmm3,%xmm0
577	vpshufd	$85,%xmm3,%xmm1
578	vpshufd	$170,%xmm3,%xmm2
579	vpshufd	$255,%xmm3,%xmm3
580	vpaddd	48(%eax),%xmm0,%xmm0
581	vpshufd	$0,%xmm7,%xmm4
582	vpshufd	$85,%xmm7,%xmm5
583	vpsubd	64(%eax),%xmm0,%xmm0
584	vpshufd	$170,%xmm7,%xmm6
585	vpshufd	$255,%xmm7,%xmm7
586	vmovdqa	%xmm0,64(%ebp)
587	vmovdqa	%xmm1,80(%ebp)
588	vmovdqa	%xmm2,96(%ebp)
589	vmovdqa	%xmm3,112(%ebp)
590	vmovdqu	16(%edx),%xmm3
591	vmovdqa	%xmm4,-64(%ebp)
592	vmovdqa	%xmm5,-48(%ebp)
593	vmovdqa	%xmm6,-32(%ebp)
594	vmovdqa	%xmm7,-16(%ebp)
595	vmovdqa	32(%eax),%xmm7
596	leal	128(%esp),%ebx
597	vpshufd	$0,%xmm3,%xmm0
598	vpshufd	$85,%xmm3,%xmm1
599	vpshufd	$170,%xmm3,%xmm2
600	vpshufd	$255,%xmm3,%xmm3
601	vpshufd	$0,%xmm7,%xmm4
602	vpshufd	$85,%xmm7,%xmm5
603	vpshufd	$170,%xmm7,%xmm6
604	vpshufd	$255,%xmm7,%xmm7
605	vmovdqa	%xmm0,(%ebp)
606	vmovdqa	%xmm1,16(%ebp)
607	vmovdqa	%xmm2,32(%ebp)
608	vmovdqa	%xmm3,48(%ebp)
609	vmovdqa	%xmm4,-128(%ebp)
610	vmovdqa	%xmm5,-112(%ebp)
611	vmovdqa	%xmm6,-96(%ebp)
612	vmovdqa	%xmm7,-80(%ebp)
613	leal	128(%esi),%esi
614	leal	128(%edi),%edi
615	jmp	.L015outer_loop
616.align	32
617.L015outer_loop:
618	vmovdqa	-112(%ebp),%xmm1
619	vmovdqa	-96(%ebp),%xmm2
620	vmovdqa	-80(%ebp),%xmm3
621	vmovdqa	-48(%ebp),%xmm5
622	vmovdqa	-32(%ebp),%xmm6
623	vmovdqa	-16(%ebp),%xmm7
624	vmovdqa	%xmm1,-112(%ebx)
625	vmovdqa	%xmm2,-96(%ebx)
626	vmovdqa	%xmm3,-80(%ebx)
627	vmovdqa	%xmm5,-48(%ebx)
628	vmovdqa	%xmm6,-32(%ebx)
629	vmovdqa	%xmm7,-16(%ebx)
630	vmovdqa	32(%ebp),%xmm2
631	vmovdqa	48(%ebp),%xmm3
632	vmovdqa	64(%ebp),%xmm4
633	vmovdqa	80(%ebp),%xmm5
634	vmovdqa	96(%ebp),%xmm6
635	vmovdqa	112(%ebp),%xmm7
636	vpaddd	64(%eax),%xmm4,%xmm4
637	vmovdqa	%xmm2,32(%ebx)
638	vmovdqa	%xmm3,48(%ebx)
639	vmovdqa	%xmm4,64(%ebx)
640	vmovdqa	%xmm5,80(%ebx)
641	vmovdqa	%xmm6,96(%ebx)
642	vmovdqa	%xmm7,112(%ebx)
643	vmovdqa	%xmm4,64(%ebp)
644	vmovdqa	-128(%ebp),%xmm0
645	vmovdqa	%xmm4,%xmm6
646	vmovdqa	-64(%ebp),%xmm3
647	vmovdqa	(%ebp),%xmm4
648	vmovdqa	16(%ebp),%xmm5
649	movl	$10,%edx
650	nop
651.align	32
652.L016loop:
653	vpaddd	%xmm3,%xmm0,%xmm0
654	vpxor	%xmm0,%xmm6,%xmm6
655.byte	143,232,120,194,246,16
656	vpaddd	%xmm6,%xmm4,%xmm4
657	vpxor	%xmm4,%xmm3,%xmm2
658	vmovdqa	-112(%ebx),%xmm1
659.byte	143,232,120,194,210,12
660	vmovdqa	-48(%ebx),%xmm3
661	vpaddd	%xmm2,%xmm0,%xmm0
662	vmovdqa	80(%ebx),%xmm7
663	vpxor	%xmm0,%xmm6,%xmm6
664	vpaddd	%xmm3,%xmm1,%xmm1
665.byte	143,232,120,194,246,8
666	vmovdqa	%xmm0,-128(%ebx)
667	vpaddd	%xmm6,%xmm4,%xmm4
668	vmovdqa	%xmm6,64(%ebx)
669	vpxor	%xmm4,%xmm2,%xmm2
670	vpxor	%xmm1,%xmm7,%xmm7
671.byte	143,232,120,194,210,7
672	vmovdqa	%xmm4,(%ebx)
673.byte	143,232,120,194,255,16
674	vmovdqa	%xmm2,-64(%ebx)
675	vpaddd	%xmm7,%xmm5,%xmm5
676	vmovdqa	32(%ebx),%xmm4
677	vpxor	%xmm5,%xmm3,%xmm3
678	vmovdqa	-96(%ebx),%xmm0
679.byte	143,232,120,194,219,12
680	vmovdqa	-32(%ebx),%xmm2
681	vpaddd	%xmm3,%xmm1,%xmm1
682	vmovdqa	96(%ebx),%xmm6
683	vpxor	%xmm1,%xmm7,%xmm7
684	vpaddd	%xmm2,%xmm0,%xmm0
685.byte	143,232,120,194,255,8
686	vmovdqa	%xmm1,-112(%ebx)
687	vpaddd	%xmm7,%xmm5,%xmm5
688	vmovdqa	%xmm7,80(%ebx)
689	vpxor	%xmm5,%xmm3,%xmm3
690	vpxor	%xmm0,%xmm6,%xmm6
691.byte	143,232,120,194,219,7
692	vmovdqa	%xmm5,16(%ebx)
693.byte	143,232,120,194,246,16
694	vmovdqa	%xmm3,-48(%ebx)
695	vpaddd	%xmm6,%xmm4,%xmm4
696	vmovdqa	48(%ebx),%xmm5
697	vpxor	%xmm4,%xmm2,%xmm2
698	vmovdqa	-80(%ebx),%xmm1
699.byte	143,232,120,194,210,12
700	vmovdqa	-16(%ebx),%xmm3
701	vpaddd	%xmm2,%xmm0,%xmm0
702	vmovdqa	112(%ebx),%xmm7
703	vpxor	%xmm0,%xmm6,%xmm6
704	vpaddd	%xmm3,%xmm1,%xmm1
705.byte	143,232,120,194,246,8
706	vmovdqa	%xmm0,-96(%ebx)
707	vpaddd	%xmm6,%xmm4,%xmm4
708	vmovdqa	%xmm6,96(%ebx)
709	vpxor	%xmm4,%xmm2,%xmm2
710	vpxor	%xmm1,%xmm7,%xmm7
711.byte	143,232,120,194,210,7
712.byte	143,232,120,194,255,16
713	vmovdqa	%xmm2,-32(%ebx)
714	vpaddd	%xmm7,%xmm5,%xmm5
715	vpxor	%xmm5,%xmm3,%xmm3
716	vmovdqa	-128(%ebx),%xmm0
717.byte	143,232,120,194,219,12
718	vmovdqa	-48(%ebx),%xmm2
719	vpaddd	%xmm3,%xmm1,%xmm1
720	vpxor	%xmm1,%xmm7,%xmm7
721	vpaddd	%xmm2,%xmm0,%xmm0
722.byte	143,232,120,194,255,8
723	vmovdqa	%xmm1,-80(%ebx)
724	vpaddd	%xmm7,%xmm5,%xmm5
725	vpxor	%xmm5,%xmm3,%xmm3
726	vpxor	%xmm0,%xmm7,%xmm6
727.byte	143,232,120,194,219,7
728.byte	143,232,120,194,246,16
729	vmovdqa	%xmm3,-16(%ebx)
730	vpaddd	%xmm6,%xmm4,%xmm4
731	vpxor	%xmm4,%xmm2,%xmm2
732	vmovdqa	-112(%ebx),%xmm1
733.byte	143,232,120,194,210,12
734	vmovdqa	-32(%ebx),%xmm3
735	vpaddd	%xmm2,%xmm0,%xmm0
736	vmovdqa	64(%ebx),%xmm7
737	vpxor	%xmm0,%xmm6,%xmm6
738	vpaddd	%xmm3,%xmm1,%xmm1
739.byte	143,232,120,194,246,8
740	vmovdqa	%xmm0,-128(%ebx)
741	vpaddd	%xmm6,%xmm4,%xmm4
742	vmovdqa	%xmm6,112(%ebx)
743	vpxor	%xmm4,%xmm2,%xmm2
744	vpxor	%xmm1,%xmm7,%xmm7
745.byte	143,232,120,194,210,7
746	vmovdqa	%xmm4,32(%ebx)
747.byte	143,232,120,194,255,16
748	vmovdqa	%xmm2,-48(%ebx)
749	vpaddd	%xmm7,%xmm5,%xmm5
750	vmovdqa	(%ebx),%xmm4
751	vpxor	%xmm5,%xmm3,%xmm3
752	vmovdqa	-96(%ebx),%xmm0
753.byte	143,232,120,194,219,12
754	vmovdqa	-16(%ebx),%xmm2
755	vpaddd	%xmm3,%xmm1,%xmm1
756	vmovdqa	80(%ebx),%xmm6
757	vpxor	%xmm1,%xmm7,%xmm7
758	vpaddd	%xmm2,%xmm0,%xmm0
759.byte	143,232,120,194,255,8
760	vmovdqa	%xmm1,-112(%ebx)
761	vpaddd	%xmm7,%xmm5,%xmm5
762	vmovdqa	%xmm7,64(%ebx)
763	vpxor	%xmm5,%xmm3,%xmm3
764	vpxor	%xmm0,%xmm6,%xmm6
765.byte	143,232,120,194,219,7
766	vmovdqa	%xmm5,48(%ebx)
767.byte	143,232,120,194,246,16
768	vmovdqa	%xmm3,-32(%ebx)
769	vpaddd	%xmm6,%xmm4,%xmm4
770	vmovdqa	16(%ebx),%xmm5
771	vpxor	%xmm4,%xmm2,%xmm2
772	vmovdqa	-80(%ebx),%xmm1
773.byte	143,232,120,194,210,12
774	vmovdqa	-64(%ebx),%xmm3
775	vpaddd	%xmm2,%xmm0,%xmm0
776	vmovdqa	96(%ebx),%xmm7
777	vpxor	%xmm0,%xmm6,%xmm6
778	vpaddd	%xmm3,%xmm1,%xmm1
779.byte	143,232,120,194,246,8
780	vmovdqa	%xmm0,-96(%ebx)
781	vpaddd	%xmm6,%xmm4,%xmm4
782	vmovdqa	%xmm6,80(%ebx)
783	vpxor	%xmm4,%xmm2,%xmm2
784	vpxor	%xmm1,%xmm7,%xmm7
785.byte	143,232,120,194,210,7
786.byte	143,232,120,194,255,16
787	vmovdqa	%xmm2,-16(%ebx)
788	vpaddd	%xmm7,%xmm5,%xmm5
789	vpxor	%xmm5,%xmm3,%xmm3
790	vmovdqa	-128(%ebx),%xmm0
791.byte	143,232,120,194,219,12
792	vpaddd	%xmm3,%xmm1,%xmm1
793	vmovdqa	64(%ebx),%xmm6
794	vpxor	%xmm1,%xmm7,%xmm7
795.byte	143,232,120,194,255,8
796	vmovdqa	%xmm1,-80(%ebx)
797	vpaddd	%xmm7,%xmm5,%xmm5
798	vmovdqa	%xmm7,96(%ebx)
799	vpxor	%xmm5,%xmm3,%xmm3
800.byte	143,232,120,194,219,7
801	decl	%edx
802	jnz	.L016loop
803	vmovdqa	%xmm3,-64(%ebx)
804	vmovdqa	%xmm4,(%ebx)
805	vmovdqa	%xmm5,16(%ebx)
806	vmovdqa	%xmm6,64(%ebx)
807	vmovdqa	%xmm7,96(%ebx)
808	vmovdqa	-112(%ebx),%xmm1
809	vmovdqa	-96(%ebx),%xmm2
810	vmovdqa	-80(%ebx),%xmm3
811	vpaddd	-128(%ebp),%xmm0,%xmm0
812	vpaddd	-112(%ebp),%xmm1,%xmm1
813	vpaddd	-96(%ebp),%xmm2,%xmm2
814	vpaddd	-80(%ebp),%xmm3,%xmm3
815	vpunpckldq	%xmm1,%xmm0,%xmm6
816	vpunpckldq	%xmm3,%xmm2,%xmm7
817	vpunpckhdq	%xmm1,%xmm0,%xmm0
818	vpunpckhdq	%xmm3,%xmm2,%xmm2
819	vpunpcklqdq	%xmm7,%xmm6,%xmm1
820	vpunpckhqdq	%xmm7,%xmm6,%xmm6
821	vpunpcklqdq	%xmm2,%xmm0,%xmm7
822	vpunpckhqdq	%xmm2,%xmm0,%xmm3
823	vpxor	-128(%esi),%xmm1,%xmm4
824	vpxor	-64(%esi),%xmm6,%xmm5
825	vpxor	(%esi),%xmm7,%xmm6
826	vpxor	64(%esi),%xmm3,%xmm7
827	leal	16(%esi),%esi
828	vmovdqa	-64(%ebx),%xmm0
829	vmovdqa	-48(%ebx),%xmm1
830	vmovdqa	-32(%ebx),%xmm2
831	vmovdqa	-16(%ebx),%xmm3
832	vmovdqu	%xmm4,-128(%edi)
833	vmovdqu	%xmm5,-64(%edi)
834	vmovdqu	%xmm6,(%edi)
835	vmovdqu	%xmm7,64(%edi)
836	leal	16(%edi),%edi
837	vpaddd	-64(%ebp),%xmm0,%xmm0
838	vpaddd	-48(%ebp),%xmm1,%xmm1
839	vpaddd	-32(%ebp),%xmm2,%xmm2
840	vpaddd	-16(%ebp),%xmm3,%xmm3
841	vpunpckldq	%xmm1,%xmm0,%xmm6
842	vpunpckldq	%xmm3,%xmm2,%xmm7
843	vpunpckhdq	%xmm1,%xmm0,%xmm0
844	vpunpckhdq	%xmm3,%xmm2,%xmm2
845	vpunpcklqdq	%xmm7,%xmm6,%xmm1
846	vpunpckhqdq	%xmm7,%xmm6,%xmm6
847	vpunpcklqdq	%xmm2,%xmm0,%xmm7
848	vpunpckhqdq	%xmm2,%xmm0,%xmm3
849	vpxor	-128(%esi),%xmm1,%xmm4
850	vpxor	-64(%esi),%xmm6,%xmm5
851	vpxor	(%esi),%xmm7,%xmm6
852	vpxor	64(%esi),%xmm3,%xmm7
853	leal	16(%esi),%esi
854	vmovdqa	(%ebx),%xmm0
855	vmovdqa	16(%ebx),%xmm1
856	vmovdqa	32(%ebx),%xmm2
857	vmovdqa	48(%ebx),%xmm3
858	vmovdqu	%xmm4,-128(%edi)
859	vmovdqu	%xmm5,-64(%edi)
860	vmovdqu	%xmm6,(%edi)
861	vmovdqu	%xmm7,64(%edi)
862	leal	16(%edi),%edi
863	vpaddd	(%ebp),%xmm0,%xmm0
864	vpaddd	16(%ebp),%xmm1,%xmm1
865	vpaddd	32(%ebp),%xmm2,%xmm2
866	vpaddd	48(%ebp),%xmm3,%xmm3
867	vpunpckldq	%xmm1,%xmm0,%xmm6
868	vpunpckldq	%xmm3,%xmm2,%xmm7
869	vpunpckhdq	%xmm1,%xmm0,%xmm0
870	vpunpckhdq	%xmm3,%xmm2,%xmm2
871	vpunpcklqdq	%xmm7,%xmm6,%xmm1
872	vpunpckhqdq	%xmm7,%xmm6,%xmm6
873	vpunpcklqdq	%xmm2,%xmm0,%xmm7
874	vpunpckhqdq	%xmm2,%xmm0,%xmm3
875	vpxor	-128(%esi),%xmm1,%xmm4
876	vpxor	-64(%esi),%xmm6,%xmm5
877	vpxor	(%esi),%xmm7,%xmm6
878	vpxor	64(%esi),%xmm3,%xmm7
879	leal	16(%esi),%esi
880	vmovdqa	64(%ebx),%xmm0
881	vmovdqa	80(%ebx),%xmm1
882	vmovdqa	96(%ebx),%xmm2
883	vmovdqa	112(%ebx),%xmm3
884	vmovdqu	%xmm4,-128(%edi)
885	vmovdqu	%xmm5,-64(%edi)
886	vmovdqu	%xmm6,(%edi)
887	vmovdqu	%xmm7,64(%edi)
888	leal	16(%edi),%edi
889	vpaddd	64(%ebp),%xmm0,%xmm0
890	vpaddd	80(%ebp),%xmm1,%xmm1
891	vpaddd	96(%ebp),%xmm2,%xmm2
892	vpaddd	112(%ebp),%xmm3,%xmm3
893	vpunpckldq	%xmm1,%xmm0,%xmm6
894	vpunpckldq	%xmm3,%xmm2,%xmm7
895	vpunpckhdq	%xmm1,%xmm0,%xmm0
896	vpunpckhdq	%xmm3,%xmm2,%xmm2
897	vpunpcklqdq	%xmm7,%xmm6,%xmm1
898	vpunpckhqdq	%xmm7,%xmm6,%xmm6
899	vpunpcklqdq	%xmm2,%xmm0,%xmm7
900	vpunpckhqdq	%xmm2,%xmm0,%xmm3
901	vpxor	-128(%esi),%xmm1,%xmm4
902	vpxor	-64(%esi),%xmm6,%xmm5
903	vpxor	(%esi),%xmm7,%xmm6
904	vpxor	64(%esi),%xmm3,%xmm7
905	leal	208(%esi),%esi
906	vmovdqu	%xmm4,-128(%edi)
907	vmovdqu	%xmm5,-64(%edi)
908	vmovdqu	%xmm6,(%edi)
909	vmovdqu	%xmm7,64(%edi)
910	leal	208(%edi),%edi
911	subl	$256,%ecx
912	jnc	.L015outer_loop
913	addl	$256,%ecx
914	jz	.L017done
915	movl	520(%esp),%ebx
916	leal	-128(%esi),%esi
917	movl	516(%esp),%edx
918	leal	-128(%edi),%edi
919	vmovd	64(%ebp),%xmm2
920	vmovdqu	(%ebx),%xmm3
921	vpaddd	96(%eax),%xmm2,%xmm2
922	vpand	112(%eax),%xmm3,%xmm3
923	vpor	%xmm2,%xmm3,%xmm3
924.L0141x:
925	vmovdqa	32(%eax),%xmm0
926	vmovdqu	(%edx),%xmm1
927	vmovdqu	16(%edx),%xmm2
928	vmovdqa	(%eax),%xmm6
929	vmovdqa	16(%eax),%xmm7
930	movl	%ebp,48(%esp)
931	vmovdqa	%xmm0,(%esp)
932	vmovdqa	%xmm1,16(%esp)
933	vmovdqa	%xmm2,32(%esp)
934	vmovdqa	%xmm3,48(%esp)
935	movl	$10,%edx
936	jmp	.L018loop1x
937.align	16
938.L019outer1x:
939	vmovdqa	80(%eax),%xmm3
940	vmovdqa	(%esp),%xmm0
941	vmovdqa	16(%esp),%xmm1
942	vmovdqa	32(%esp),%xmm2
943	vpaddd	48(%esp),%xmm3,%xmm3
944	movl	$10,%edx
945	vmovdqa	%xmm3,48(%esp)
946	jmp	.L018loop1x
947.align	16
948.L018loop1x:
949	vpaddd	%xmm1,%xmm0,%xmm0
950	vpxor	%xmm0,%xmm3,%xmm3
951.byte	143,232,120,194,219,16
952	vpaddd	%xmm3,%xmm2,%xmm2
953	vpxor	%xmm2,%xmm1,%xmm1
954.byte	143,232,120,194,201,12
955	vpaddd	%xmm1,%xmm0,%xmm0
956	vpxor	%xmm0,%xmm3,%xmm3
957.byte	143,232,120,194,219,8
958	vpaddd	%xmm3,%xmm2,%xmm2
959	vpxor	%xmm2,%xmm1,%xmm1
960.byte	143,232,120,194,201,7
961	vpshufd	$78,%xmm2,%xmm2
962	vpshufd	$57,%xmm1,%xmm1
963	vpshufd	$147,%xmm3,%xmm3
964	vpaddd	%xmm1,%xmm0,%xmm0
965	vpxor	%xmm0,%xmm3,%xmm3
966.byte	143,232,120,194,219,16
967	vpaddd	%xmm3,%xmm2,%xmm2
968	vpxor	%xmm2,%xmm1,%xmm1
969.byte	143,232,120,194,201,12
970	vpaddd	%xmm1,%xmm0,%xmm0
971	vpxor	%xmm0,%xmm3,%xmm3
972.byte	143,232,120,194,219,8
973	vpaddd	%xmm3,%xmm2,%xmm2
974	vpxor	%xmm2,%xmm1,%xmm1
975.byte	143,232,120,194,201,7
976	vpshufd	$78,%xmm2,%xmm2
977	vpshufd	$147,%xmm1,%xmm1
978	vpshufd	$57,%xmm3,%xmm3
979	decl	%edx
980	jnz	.L018loop1x
981	vpaddd	(%esp),%xmm0,%xmm0
982	vpaddd	16(%esp),%xmm1,%xmm1
983	vpaddd	32(%esp),%xmm2,%xmm2
984	vpaddd	48(%esp),%xmm3,%xmm3
985	cmpl	$64,%ecx
986	jb	.L020tail
987	vpxor	(%esi),%xmm0,%xmm0
988	vpxor	16(%esi),%xmm1,%xmm1
989	vpxor	32(%esi),%xmm2,%xmm2
990	vpxor	48(%esi),%xmm3,%xmm3
991	leal	64(%esi),%esi
992	vmovdqu	%xmm0,(%edi)
993	vmovdqu	%xmm1,16(%edi)
994	vmovdqu	%xmm2,32(%edi)
995	vmovdqu	%xmm3,48(%edi)
996	leal	64(%edi),%edi
997	subl	$64,%ecx
998	jnz	.L019outer1x
999	jmp	.L017done
1000.L020tail:
1001	vmovdqa	%xmm0,(%esp)
1002	vmovdqa	%xmm1,16(%esp)
1003	vmovdqa	%xmm2,32(%esp)
1004	vmovdqa	%xmm3,48(%esp)
1005	xorl	%eax,%eax
1006	xorl	%edx,%edx
1007	xorl	%ebp,%ebp
1008.L021tail_loop:
1009	movb	(%esp,%ebp,1),%al
1010	movb	(%esi,%ebp,1),%dl
1011	leal	1(%ebp),%ebp
1012	xorb	%dl,%al
1013	movb	%al,-1(%edi,%ebp,1)
1014	decl	%ecx
1015	jnz	.L021tail_loop
1016.L017done:
1017	vzeroupper
1018	movl	512(%esp),%esp
1019	popl	%edi
1020	popl	%esi
1021	popl	%ebx
1022	popl	%ebp
1023	ret
1024.size	ChaCha20_xop,.-.L_ChaCha20_xop_begin
1025.comm	OPENSSL_ia32cap_P,16,4
1026
1027	.section ".note.gnu.property", "a"
1028	.p2align 2
1029	.long 1f - 0f
1030	.long 4f - 1f
1031	.long 5
10320:
1033	.asciz "GNU"
10341:
1035	.p2align 2
1036	.long 0xc0000002
1037	.long 3f - 2f
10382:
1039	.long 3
10403:
1041	.p2align 2
10424:
1043#else
1044.text
1045.globl	ChaCha20_ctr32
1046.type	ChaCha20_ctr32,@function
1047.align	16
1048ChaCha20_ctr32:
1049.L_ChaCha20_ctr32_begin:
1050	#ifdef __CET__
1051
1052.byte	243,15,30,251
1053	#endif
1054
1055	pushl	%ebp
1056	pushl	%ebx
1057	pushl	%esi
1058	pushl	%edi
1059	xorl	%eax,%eax
1060	cmpl	28(%esp),%eax
1061	je	.L000no_data
1062	call	.Lpic_point
1063.Lpic_point:
1064	popl	%eax
1065	leal	OPENSSL_ia32cap_P,%ebp
1066	testl	$16777216,(%ebp)
1067	jz	.L001x86
1068	testl	$512,4(%ebp)
1069	jz	.L001x86
1070	jmp	.Lssse3_shortcut
1071.L001x86:
1072	movl	32(%esp),%esi
1073	movl	36(%esp),%edi
1074	subl	$132,%esp
1075	movl	(%esi),%eax
1076	movl	4(%esi),%ebx
1077	movl	8(%esi),%ecx
1078	movl	12(%esi),%edx
1079	movl	%eax,80(%esp)
1080	movl	%ebx,84(%esp)
1081	movl	%ecx,88(%esp)
1082	movl	%edx,92(%esp)
1083	movl	16(%esi),%eax
1084	movl	20(%esi),%ebx
1085	movl	24(%esi),%ecx
1086	movl	28(%esi),%edx
1087	movl	%eax,96(%esp)
1088	movl	%ebx,100(%esp)
1089	movl	%ecx,104(%esp)
1090	movl	%edx,108(%esp)
1091	movl	(%edi),%eax
1092	movl	4(%edi),%ebx
1093	movl	8(%edi),%ecx
1094	movl	12(%edi),%edx
1095	subl	$1,%eax
1096	movl	%eax,112(%esp)
1097	movl	%ebx,116(%esp)
1098	movl	%ecx,120(%esp)
1099	movl	%edx,124(%esp)
1100	jmp	.L002entry
1101.align	16
1102.L003outer_loop:
1103	movl	%ebx,156(%esp)
1104	movl	%eax,152(%esp)
1105	movl	%ecx,160(%esp)
1106.L002entry:
1107	movl	$1634760805,%eax
1108	movl	$857760878,4(%esp)
1109	movl	$2036477234,8(%esp)
1110	movl	$1797285236,12(%esp)
1111	movl	84(%esp),%ebx
1112	movl	88(%esp),%ebp
1113	movl	104(%esp),%ecx
1114	movl	108(%esp),%esi
1115	movl	116(%esp),%edx
1116	movl	120(%esp),%edi
1117	movl	%ebx,20(%esp)
1118	movl	%ebp,24(%esp)
1119	movl	%ecx,40(%esp)
1120	movl	%esi,44(%esp)
1121	movl	%edx,52(%esp)
1122	movl	%edi,56(%esp)
1123	movl	92(%esp),%ebx
1124	movl	124(%esp),%edi
1125	movl	112(%esp),%edx
1126	movl	80(%esp),%ebp
1127	movl	96(%esp),%ecx
1128	movl	100(%esp),%esi
1129	addl	$1,%edx
1130	movl	%ebx,28(%esp)
1131	movl	%edi,60(%esp)
1132	movl	%edx,112(%esp)
1133	movl	$10,%ebx
1134	jmp	.L004loop
1135.align	16
1136.L004loop:
1137	addl	%ebp,%eax
1138	movl	%ebx,128(%esp)
1139	movl	%ebp,%ebx
1140	xorl	%eax,%edx
1141	roll	$16,%edx
1142	addl	%edx,%ecx
1143	xorl	%ecx,%ebx
1144	movl	52(%esp),%edi
1145	roll	$12,%ebx
1146	movl	20(%esp),%ebp
1147	addl	%ebx,%eax
1148	xorl	%eax,%edx
1149	movl	%eax,(%esp)
1150	roll	$8,%edx
1151	movl	4(%esp),%eax
1152	addl	%edx,%ecx
1153	movl	%edx,48(%esp)
1154	xorl	%ecx,%ebx
1155	addl	%ebp,%eax
1156	roll	$7,%ebx
1157	xorl	%eax,%edi
1158	movl	%ecx,32(%esp)
1159	roll	$16,%edi
1160	movl	%ebx,16(%esp)
1161	addl	%edi,%esi
1162	movl	40(%esp),%ecx
1163	xorl	%esi,%ebp
1164	movl	56(%esp),%edx
1165	roll	$12,%ebp
1166	movl	24(%esp),%ebx
1167	addl	%ebp,%eax
1168	xorl	%eax,%edi
1169	movl	%eax,4(%esp)
1170	roll	$8,%edi
1171	movl	8(%esp),%eax
1172	addl	%edi,%esi
1173	movl	%edi,52(%esp)
1174	xorl	%esi,%ebp
1175	addl	%ebx,%eax
1176	roll	$7,%ebp
1177	xorl	%eax,%edx
1178	movl	%esi,36(%esp)
1179	roll	$16,%edx
1180	movl	%ebp,20(%esp)
1181	addl	%edx,%ecx
1182	movl	44(%esp),%esi
1183	xorl	%ecx,%ebx
1184	movl	60(%esp),%edi
1185	roll	$12,%ebx
1186	movl	28(%esp),%ebp
1187	addl	%ebx,%eax
1188	xorl	%eax,%edx
1189	movl	%eax,8(%esp)
1190	roll	$8,%edx
1191	movl	12(%esp),%eax
1192	addl	%edx,%ecx
1193	movl	%edx,56(%esp)
1194	xorl	%ecx,%ebx
1195	addl	%ebp,%eax
1196	roll	$7,%ebx
1197	xorl	%eax,%edi
1198	roll	$16,%edi
1199	movl	%ebx,24(%esp)
1200	addl	%edi,%esi
1201	xorl	%esi,%ebp
1202	roll	$12,%ebp
1203	movl	20(%esp),%ebx
1204	addl	%ebp,%eax
1205	xorl	%eax,%edi
1206	movl	%eax,12(%esp)
1207	roll	$8,%edi
1208	movl	(%esp),%eax
1209	addl	%edi,%esi
1210	movl	%edi,%edx
1211	xorl	%esi,%ebp
1212	addl	%ebx,%eax
1213	roll	$7,%ebp
1214	xorl	%eax,%edx
1215	roll	$16,%edx
1216	movl	%ebp,28(%esp)
1217	addl	%edx,%ecx
1218	xorl	%ecx,%ebx
1219	movl	48(%esp),%edi
1220	roll	$12,%ebx
1221	movl	24(%esp),%ebp
1222	addl	%ebx,%eax
1223	xorl	%eax,%edx
1224	movl	%eax,(%esp)
1225	roll	$8,%edx
1226	movl	4(%esp),%eax
1227	addl	%edx,%ecx
1228	movl	%edx,60(%esp)
1229	xorl	%ecx,%ebx
1230	addl	%ebp,%eax
1231	roll	$7,%ebx
1232	xorl	%eax,%edi
1233	movl	%ecx,40(%esp)
1234	roll	$16,%edi
1235	movl	%ebx,20(%esp)
1236	addl	%edi,%esi
1237	movl	32(%esp),%ecx
1238	xorl	%esi,%ebp
1239	movl	52(%esp),%edx
1240	roll	$12,%ebp
1241	movl	28(%esp),%ebx
1242	addl	%ebp,%eax
1243	xorl	%eax,%edi
1244	movl	%eax,4(%esp)
1245	roll	$8,%edi
1246	movl	8(%esp),%eax
1247	addl	%edi,%esi
1248	movl	%edi,48(%esp)
1249	xorl	%esi,%ebp
1250	addl	%ebx,%eax
1251	roll	$7,%ebp
1252	xorl	%eax,%edx
1253	movl	%esi,44(%esp)
1254	roll	$16,%edx
1255	movl	%ebp,24(%esp)
1256	addl	%edx,%ecx
1257	movl	36(%esp),%esi
1258	xorl	%ecx,%ebx
1259	movl	56(%esp),%edi
1260	roll	$12,%ebx
1261	movl	16(%esp),%ebp
1262	addl	%ebx,%eax
1263	xorl	%eax,%edx
1264	movl	%eax,8(%esp)
1265	roll	$8,%edx
1266	movl	12(%esp),%eax
1267	addl	%edx,%ecx
1268	movl	%edx,52(%esp)
1269	xorl	%ecx,%ebx
1270	addl	%ebp,%eax
1271	roll	$7,%ebx
1272	xorl	%eax,%edi
1273	roll	$16,%edi
1274	movl	%ebx,28(%esp)
1275	addl	%edi,%esi
1276	xorl	%esi,%ebp
1277	movl	48(%esp),%edx
1278	roll	$12,%ebp
1279	movl	128(%esp),%ebx
1280	addl	%ebp,%eax
1281	xorl	%eax,%edi
1282	movl	%eax,12(%esp)
1283	roll	$8,%edi
1284	movl	(%esp),%eax
1285	addl	%edi,%esi
1286	movl	%edi,56(%esp)
1287	xorl	%esi,%ebp
1288	roll	$7,%ebp
1289	decl	%ebx
1290	jnz	.L004loop
1291	movl	160(%esp),%ebx
1292	addl	$1634760805,%eax
1293	addl	80(%esp),%ebp
1294	addl	96(%esp),%ecx
1295	addl	100(%esp),%esi
1296	cmpl	$64,%ebx
1297	jb	.L005tail
1298	movl	156(%esp),%ebx
1299	addl	112(%esp),%edx
1300	addl	120(%esp),%edi
1301	xorl	(%ebx),%eax
1302	xorl	16(%ebx),%ebp
1303	movl	%eax,(%esp)
1304	movl	152(%esp),%eax
1305	xorl	32(%ebx),%ecx
1306	xorl	36(%ebx),%esi
1307	xorl	48(%ebx),%edx
1308	xorl	56(%ebx),%edi
1309	movl	%ebp,16(%eax)
1310	movl	%ecx,32(%eax)
1311	movl	%esi,36(%eax)
1312	movl	%edx,48(%eax)
1313	movl	%edi,56(%eax)
1314	movl	4(%esp),%ebp
1315	movl	8(%esp),%ecx
1316	movl	12(%esp),%esi
1317	movl	20(%esp),%edx
1318	movl	24(%esp),%edi
1319	addl	$857760878,%ebp
1320	addl	$2036477234,%ecx
1321	addl	$1797285236,%esi
1322	addl	84(%esp),%edx
1323	addl	88(%esp),%edi
1324	xorl	4(%ebx),%ebp
1325	xorl	8(%ebx),%ecx
1326	xorl	12(%ebx),%esi
1327	xorl	20(%ebx),%edx
1328	xorl	24(%ebx),%edi
1329	movl	%ebp,4(%eax)
1330	movl	%ecx,8(%eax)
1331	movl	%esi,12(%eax)
1332	movl	%edx,20(%eax)
1333	movl	%edi,24(%eax)
1334	movl	28(%esp),%ebp
1335	movl	40(%esp),%ecx
1336	movl	44(%esp),%esi
1337	movl	52(%esp),%edx
1338	movl	60(%esp),%edi
1339	addl	92(%esp),%ebp
1340	addl	104(%esp),%ecx
1341	addl	108(%esp),%esi
1342	addl	116(%esp),%edx
1343	addl	124(%esp),%edi
1344	xorl	28(%ebx),%ebp
1345	xorl	40(%ebx),%ecx
1346	xorl	44(%ebx),%esi
1347	xorl	52(%ebx),%edx
1348	xorl	60(%ebx),%edi
1349	leal	64(%ebx),%ebx
1350	movl	%ebp,28(%eax)
1351	movl	(%esp),%ebp
1352	movl	%ecx,40(%eax)
1353	movl	160(%esp),%ecx
1354	movl	%esi,44(%eax)
1355	movl	%edx,52(%eax)
1356	movl	%edi,60(%eax)
1357	movl	%ebp,(%eax)
1358	leal	64(%eax),%eax
1359	subl	$64,%ecx
1360	jnz	.L003outer_loop
1361	jmp	.L006done
1362.L005tail:
1363	addl	112(%esp),%edx
1364	addl	120(%esp),%edi
1365	movl	%eax,(%esp)
1366	movl	%ebp,16(%esp)
1367	movl	%ecx,32(%esp)
1368	movl	%esi,36(%esp)
1369	movl	%edx,48(%esp)
1370	movl	%edi,56(%esp)
1371	movl	4(%esp),%ebp
1372	movl	8(%esp),%ecx
1373	movl	12(%esp),%esi
1374	movl	20(%esp),%edx
1375	movl	24(%esp),%edi
1376	addl	$857760878,%ebp
1377	addl	$2036477234,%ecx
1378	addl	$1797285236,%esi
1379	addl	84(%esp),%edx
1380	addl	88(%esp),%edi
1381	movl	%ebp,4(%esp)
1382	movl	%ecx,8(%esp)
1383	movl	%esi,12(%esp)
1384	movl	%edx,20(%esp)
1385	movl	%edi,24(%esp)
1386	movl	28(%esp),%ebp
1387	movl	40(%esp),%ecx
1388	movl	44(%esp),%esi
1389	movl	52(%esp),%edx
1390	movl	60(%esp),%edi
1391	addl	92(%esp),%ebp
1392	addl	104(%esp),%ecx
1393	addl	108(%esp),%esi
1394	addl	116(%esp),%edx
1395	addl	124(%esp),%edi
1396	movl	%ebp,28(%esp)
1397	movl	156(%esp),%ebp
1398	movl	%ecx,40(%esp)
1399	movl	152(%esp),%ecx
1400	movl	%esi,44(%esp)
1401	xorl	%esi,%esi
1402	movl	%edx,52(%esp)
1403	movl	%edi,60(%esp)
1404	xorl	%eax,%eax
1405	xorl	%edx,%edx
1406.L007tail_loop:
1407	movb	(%esi,%ebp,1),%al
1408	movb	(%esp,%esi,1),%dl
1409	leal	1(%esi),%esi
1410	xorb	%dl,%al
1411	movb	%al,-1(%ecx,%esi,1)
1412	decl	%ebx
1413	jnz	.L007tail_loop
1414.L006done:
1415	addl	$132,%esp
1416.L000no_data:
1417	popl	%edi
1418	popl	%esi
1419	popl	%ebx
1420	popl	%ebp
1421	ret
1422.size	ChaCha20_ctr32,.-.L_ChaCha20_ctr32_begin
1423.globl	ChaCha20_ssse3
1424.type	ChaCha20_ssse3,@function
1425.align	16
1426ChaCha20_ssse3:
1427.L_ChaCha20_ssse3_begin:
1428	#ifdef __CET__
1429
1430.byte	243,15,30,251
1431	#endif
1432
1433	pushl	%ebp
1434	pushl	%ebx
1435	pushl	%esi
1436	pushl	%edi
1437.Lssse3_shortcut:
1438	testl	$2048,4(%ebp)
1439	jnz	.Lxop_shortcut
1440	movl	20(%esp),%edi
1441	movl	24(%esp),%esi
1442	movl	28(%esp),%ecx
1443	movl	32(%esp),%edx
1444	movl	36(%esp),%ebx
1445	movl	%esp,%ebp
1446	subl	$524,%esp
1447	andl	$-64,%esp
1448	movl	%ebp,512(%esp)
1449	leal	.Lssse3_data-.Lpic_point(%eax),%eax
1450	movdqu	(%ebx),%xmm3
1451.L0081x:
1452	movdqa	32(%eax),%xmm0
1453	movdqu	(%edx),%xmm1
1454	movdqu	16(%edx),%xmm2
1455	movdqa	(%eax),%xmm6
1456	movdqa	16(%eax),%xmm7
1457	movl	%ebp,48(%esp)
1458	movdqa	%xmm0,(%esp)
1459	movdqa	%xmm1,16(%esp)
1460	movdqa	%xmm2,32(%esp)
1461	movdqa	%xmm3,48(%esp)
1462	movl	$10,%edx
1463	jmp	.L009loop1x
1464.align	16
1465.L010outer1x:
1466	movdqa	80(%eax),%xmm3
1467	movdqa	(%esp),%xmm0
1468	movdqa	16(%esp),%xmm1
1469	movdqa	32(%esp),%xmm2
1470	paddd	48(%esp),%xmm3
1471	movl	$10,%edx
1472	movdqa	%xmm3,48(%esp)
1473	jmp	.L009loop1x
1474.align	16
1475.L009loop1x:
1476	paddd	%xmm1,%xmm0
1477	pxor	%xmm0,%xmm3
1478.byte	102,15,56,0,222
1479	paddd	%xmm3,%xmm2
1480	pxor	%xmm2,%xmm1
1481	movdqa	%xmm1,%xmm4
1482	psrld	$20,%xmm1
1483	pslld	$12,%xmm4
1484	por	%xmm4,%xmm1
1485	paddd	%xmm1,%xmm0
1486	pxor	%xmm0,%xmm3
1487.byte	102,15,56,0,223
1488	paddd	%xmm3,%xmm2
1489	pxor	%xmm2,%xmm1
1490	movdqa	%xmm1,%xmm4
1491	psrld	$25,%xmm1
1492	pslld	$7,%xmm4
1493	por	%xmm4,%xmm1
1494	pshufd	$78,%xmm2,%xmm2
1495	pshufd	$57,%xmm1,%xmm1
1496	pshufd	$147,%xmm3,%xmm3
1497	nop
1498	paddd	%xmm1,%xmm0
1499	pxor	%xmm0,%xmm3
1500.byte	102,15,56,0,222
1501	paddd	%xmm3,%xmm2
1502	pxor	%xmm2,%xmm1
1503	movdqa	%xmm1,%xmm4
1504	psrld	$20,%xmm1
1505	pslld	$12,%xmm4
1506	por	%xmm4,%xmm1
1507	paddd	%xmm1,%xmm0
1508	pxor	%xmm0,%xmm3
1509.byte	102,15,56,0,223
1510	paddd	%xmm3,%xmm2
1511	pxor	%xmm2,%xmm1
1512	movdqa	%xmm1,%xmm4
1513	psrld	$25,%xmm1
1514	pslld	$7,%xmm4
1515	por	%xmm4,%xmm1
1516	pshufd	$78,%xmm2,%xmm2
1517	pshufd	$147,%xmm1,%xmm1
1518	pshufd	$57,%xmm3,%xmm3
1519	decl	%edx
1520	jnz	.L009loop1x
1521	paddd	(%esp),%xmm0
1522	paddd	16(%esp),%xmm1
1523	paddd	32(%esp),%xmm2
1524	paddd	48(%esp),%xmm3
1525	cmpl	$64,%ecx
1526	jb	.L011tail
1527	movdqu	(%esi),%xmm4
1528	movdqu	16(%esi),%xmm5
1529	pxor	%xmm4,%xmm0
1530	movdqu	32(%esi),%xmm4
1531	pxor	%xmm5,%xmm1
1532	movdqu	48(%esi),%xmm5
1533	pxor	%xmm4,%xmm2
1534	pxor	%xmm5,%xmm3
1535	leal	64(%esi),%esi
1536	movdqu	%xmm0,(%edi)
1537	movdqu	%xmm1,16(%edi)
1538	movdqu	%xmm2,32(%edi)
1539	movdqu	%xmm3,48(%edi)
1540	leal	64(%edi),%edi
1541	subl	$64,%ecx
1542	jnz	.L010outer1x
1543	jmp	.L012done
1544.L011tail:
1545	movdqa	%xmm0,(%esp)
1546	movdqa	%xmm1,16(%esp)
1547	movdqa	%xmm2,32(%esp)
1548	movdqa	%xmm3,48(%esp)
1549	xorl	%eax,%eax
1550	xorl	%edx,%edx
1551	xorl	%ebp,%ebp
1552.L013tail_loop:
1553	movb	(%esp,%ebp,1),%al
1554	movb	(%esi,%ebp,1),%dl
1555	leal	1(%ebp),%ebp
1556	xorb	%dl,%al
1557	movb	%al,-1(%edi,%ebp,1)
1558	decl	%ecx
1559	jnz	.L013tail_loop
1560.L012done:
1561	movl	512(%esp),%esp
1562	popl	%edi
1563	popl	%esi
1564	popl	%ebx
1565	popl	%ebp
1566	ret
1567.size	ChaCha20_ssse3,.-.L_ChaCha20_ssse3_begin
1568.align	64
1569.Lssse3_data:
1570.byte	2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13
1571.byte	3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14
1572.long	1634760805,857760878,2036477234,1797285236
1573.long	0,1,2,3
1574.long	4,4,4,4
1575.long	1,0,0,0
1576.long	4,0,0,0
1577.long	0,-1,-1,-1
1578.align	64
1579.byte	67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54
1580.byte	44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
1581.byte	60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
1582.byte	114,103,62,0
1583.globl	ChaCha20_xop
1584.type	ChaCha20_xop,@function
1585.align	16
1586ChaCha20_xop:
1587.L_ChaCha20_xop_begin:
1588	#ifdef __CET__
1589
1590.byte	243,15,30,251
1591	#endif
1592
1593	pushl	%ebp
1594	pushl	%ebx
1595	pushl	%esi
1596	pushl	%edi
1597.Lxop_shortcut:
1598	movl	20(%esp),%edi
1599	movl	24(%esp),%esi
1600	movl	28(%esp),%ecx
1601	movl	32(%esp),%edx
1602	movl	36(%esp),%ebx
1603	vzeroupper
1604	movl	%esp,%ebp
1605	subl	$524,%esp
1606	andl	$-64,%esp
1607	movl	%ebp,512(%esp)
1608	leal	.Lssse3_data-.Lpic_point(%eax),%eax
1609	vmovdqu	(%ebx),%xmm3
1610	cmpl	$256,%ecx
1611	jb	.L0141x
1612	movl	%edx,516(%esp)
1613	movl	%ebx,520(%esp)
1614	subl	$256,%ecx
1615	leal	384(%esp),%ebp
1616	vmovdqu	(%edx),%xmm7
1617	vpshufd	$0,%xmm3,%xmm0
1618	vpshufd	$85,%xmm3,%xmm1
1619	vpshufd	$170,%xmm3,%xmm2
1620	vpshufd	$255,%xmm3,%xmm3
1621	vpaddd	48(%eax),%xmm0,%xmm0
1622	vpshufd	$0,%xmm7,%xmm4
1623	vpshufd	$85,%xmm7,%xmm5
1624	vpsubd	64(%eax),%xmm0,%xmm0
1625	vpshufd	$170,%xmm7,%xmm6
1626	vpshufd	$255,%xmm7,%xmm7
1627	vmovdqa	%xmm0,64(%ebp)
1628	vmovdqa	%xmm1,80(%ebp)
1629	vmovdqa	%xmm2,96(%ebp)
1630	vmovdqa	%xmm3,112(%ebp)
1631	vmovdqu	16(%edx),%xmm3
1632	vmovdqa	%xmm4,-64(%ebp)
1633	vmovdqa	%xmm5,-48(%ebp)
1634	vmovdqa	%xmm6,-32(%ebp)
1635	vmovdqa	%xmm7,-16(%ebp)
1636	vmovdqa	32(%eax),%xmm7
1637	leal	128(%esp),%ebx
1638	vpshufd	$0,%xmm3,%xmm0
1639	vpshufd	$85,%xmm3,%xmm1
1640	vpshufd	$170,%xmm3,%xmm2
1641	vpshufd	$255,%xmm3,%xmm3
1642	vpshufd	$0,%xmm7,%xmm4
1643	vpshufd	$85,%xmm7,%xmm5
1644	vpshufd	$170,%xmm7,%xmm6
1645	vpshufd	$255,%xmm7,%xmm7
1646	vmovdqa	%xmm0,(%ebp)
1647	vmovdqa	%xmm1,16(%ebp)
1648	vmovdqa	%xmm2,32(%ebp)
1649	vmovdqa	%xmm3,48(%ebp)
1650	vmovdqa	%xmm4,-128(%ebp)
1651	vmovdqa	%xmm5,-112(%ebp)
1652	vmovdqa	%xmm6,-96(%ebp)
1653	vmovdqa	%xmm7,-80(%ebp)
1654	leal	128(%esi),%esi
1655	leal	128(%edi),%edi
1656	jmp	.L015outer_loop
1657.align	32
1658.L015outer_loop:
1659	vmovdqa	-112(%ebp),%xmm1
1660	vmovdqa	-96(%ebp),%xmm2
1661	vmovdqa	-80(%ebp),%xmm3
1662	vmovdqa	-48(%ebp),%xmm5
1663	vmovdqa	-32(%ebp),%xmm6
1664	vmovdqa	-16(%ebp),%xmm7
1665	vmovdqa	%xmm1,-112(%ebx)
1666	vmovdqa	%xmm2,-96(%ebx)
1667	vmovdqa	%xmm3,-80(%ebx)
1668	vmovdqa	%xmm5,-48(%ebx)
1669	vmovdqa	%xmm6,-32(%ebx)
1670	vmovdqa	%xmm7,-16(%ebx)
1671	vmovdqa	32(%ebp),%xmm2
1672	vmovdqa	48(%ebp),%xmm3
1673	vmovdqa	64(%ebp),%xmm4
1674	vmovdqa	80(%ebp),%xmm5
1675	vmovdqa	96(%ebp),%xmm6
1676	vmovdqa	112(%ebp),%xmm7
1677	vpaddd	64(%eax),%xmm4,%xmm4
1678	vmovdqa	%xmm2,32(%ebx)
1679	vmovdqa	%xmm3,48(%ebx)
1680	vmovdqa	%xmm4,64(%ebx)
1681	vmovdqa	%xmm5,80(%ebx)
1682	vmovdqa	%xmm6,96(%ebx)
1683	vmovdqa	%xmm7,112(%ebx)
1684	vmovdqa	%xmm4,64(%ebp)
1685	vmovdqa	-128(%ebp),%xmm0
1686	vmovdqa	%xmm4,%xmm6
1687	vmovdqa	-64(%ebp),%xmm3
1688	vmovdqa	(%ebp),%xmm4
1689	vmovdqa	16(%ebp),%xmm5
1690	movl	$10,%edx
1691	nop
1692.align	32
1693.L016loop:
1694	vpaddd	%xmm3,%xmm0,%xmm0
1695	vpxor	%xmm0,%xmm6,%xmm6
1696.byte	143,232,120,194,246,16
1697	vpaddd	%xmm6,%xmm4,%xmm4
1698	vpxor	%xmm4,%xmm3,%xmm2
1699	vmovdqa	-112(%ebx),%xmm1
1700.byte	143,232,120,194,210,12
1701	vmovdqa	-48(%ebx),%xmm3
1702	vpaddd	%xmm2,%xmm0,%xmm0
1703	vmovdqa	80(%ebx),%xmm7
1704	vpxor	%xmm0,%xmm6,%xmm6
1705	vpaddd	%xmm3,%xmm1,%xmm1
1706.byte	143,232,120,194,246,8
1707	vmovdqa	%xmm0,-128(%ebx)
1708	vpaddd	%xmm6,%xmm4,%xmm4
1709	vmovdqa	%xmm6,64(%ebx)
1710	vpxor	%xmm4,%xmm2,%xmm2
1711	vpxor	%xmm1,%xmm7,%xmm7
1712.byte	143,232,120,194,210,7
1713	vmovdqa	%xmm4,(%ebx)
1714.byte	143,232,120,194,255,16
1715	vmovdqa	%xmm2,-64(%ebx)
1716	vpaddd	%xmm7,%xmm5,%xmm5
1717	vmovdqa	32(%ebx),%xmm4
1718	vpxor	%xmm5,%xmm3,%xmm3
1719	vmovdqa	-96(%ebx),%xmm0
1720.byte	143,232,120,194,219,12
1721	vmovdqa	-32(%ebx),%xmm2
1722	vpaddd	%xmm3,%xmm1,%xmm1
1723	vmovdqa	96(%ebx),%xmm6
1724	vpxor	%xmm1,%xmm7,%xmm7
1725	vpaddd	%xmm2,%xmm0,%xmm0
1726.byte	143,232,120,194,255,8
1727	vmovdqa	%xmm1,-112(%ebx)
1728	vpaddd	%xmm7,%xmm5,%xmm5
1729	vmovdqa	%xmm7,80(%ebx)
1730	vpxor	%xmm5,%xmm3,%xmm3
1731	vpxor	%xmm0,%xmm6,%xmm6
1732.byte	143,232,120,194,219,7
1733	vmovdqa	%xmm5,16(%ebx)
1734.byte	143,232,120,194,246,16
1735	vmovdqa	%xmm3,-48(%ebx)
1736	vpaddd	%xmm6,%xmm4,%xmm4
1737	vmovdqa	48(%ebx),%xmm5
1738	vpxor	%xmm4,%xmm2,%xmm2
1739	vmovdqa	-80(%ebx),%xmm1
1740.byte	143,232,120,194,210,12
1741	vmovdqa	-16(%ebx),%xmm3
1742	vpaddd	%xmm2,%xmm0,%xmm0
1743	vmovdqa	112(%ebx),%xmm7
1744	vpxor	%xmm0,%xmm6,%xmm6
1745	vpaddd	%xmm3,%xmm1,%xmm1
1746.byte	143,232,120,194,246,8
1747	vmovdqa	%xmm0,-96(%ebx)
1748	vpaddd	%xmm6,%xmm4,%xmm4
1749	vmovdqa	%xmm6,96(%ebx)
1750	vpxor	%xmm4,%xmm2,%xmm2
1751	vpxor	%xmm1,%xmm7,%xmm7
1752.byte	143,232,120,194,210,7
1753.byte	143,232,120,194,255,16
1754	vmovdqa	%xmm2,-32(%ebx)
1755	vpaddd	%xmm7,%xmm5,%xmm5
1756	vpxor	%xmm5,%xmm3,%xmm3
1757	vmovdqa	-128(%ebx),%xmm0
1758.byte	143,232,120,194,219,12
1759	vmovdqa	-48(%ebx),%xmm2
1760	vpaddd	%xmm3,%xmm1,%xmm1
1761	vpxor	%xmm1,%xmm7,%xmm7
1762	vpaddd	%xmm2,%xmm0,%xmm0
1763.byte	143,232,120,194,255,8
1764	vmovdqa	%xmm1,-80(%ebx)
1765	vpaddd	%xmm7,%xmm5,%xmm5
1766	vpxor	%xmm5,%xmm3,%xmm3
1767	vpxor	%xmm0,%xmm7,%xmm6
1768.byte	143,232,120,194,219,7
1769.byte	143,232,120,194,246,16
1770	vmovdqa	%xmm3,-16(%ebx)
1771	vpaddd	%xmm6,%xmm4,%xmm4
1772	vpxor	%xmm4,%xmm2,%xmm2
1773	vmovdqa	-112(%ebx),%xmm1
1774.byte	143,232,120,194,210,12
1775	vmovdqa	-32(%ebx),%xmm3
1776	vpaddd	%xmm2,%xmm0,%xmm0
1777	vmovdqa	64(%ebx),%xmm7
1778	vpxor	%xmm0,%xmm6,%xmm6
1779	vpaddd	%xmm3,%xmm1,%xmm1
1780.byte	143,232,120,194,246,8
1781	vmovdqa	%xmm0,-128(%ebx)
1782	vpaddd	%xmm6,%xmm4,%xmm4
1783	vmovdqa	%xmm6,112(%ebx)
1784	vpxor	%xmm4,%xmm2,%xmm2
1785	vpxor	%xmm1,%xmm7,%xmm7
1786.byte	143,232,120,194,210,7
1787	vmovdqa	%xmm4,32(%ebx)
1788.byte	143,232,120,194,255,16
1789	vmovdqa	%xmm2,-48(%ebx)
1790	vpaddd	%xmm7,%xmm5,%xmm5
1791	vmovdqa	(%ebx),%xmm4
1792	vpxor	%xmm5,%xmm3,%xmm3
1793	vmovdqa	-96(%ebx),%xmm0
1794.byte	143,232,120,194,219,12
1795	vmovdqa	-16(%ebx),%xmm2
1796	vpaddd	%xmm3,%xmm1,%xmm1
1797	vmovdqa	80(%ebx),%xmm6
1798	vpxor	%xmm1,%xmm7,%xmm7
1799	vpaddd	%xmm2,%xmm0,%xmm0
1800.byte	143,232,120,194,255,8
1801	vmovdqa	%xmm1,-112(%ebx)
1802	vpaddd	%xmm7,%xmm5,%xmm5
1803	vmovdqa	%xmm7,64(%ebx)
1804	vpxor	%xmm5,%xmm3,%xmm3
1805	vpxor	%xmm0,%xmm6,%xmm6
1806.byte	143,232,120,194,219,7
1807	vmovdqa	%xmm5,48(%ebx)
1808.byte	143,232,120,194,246,16
1809	vmovdqa	%xmm3,-32(%ebx)
1810	vpaddd	%xmm6,%xmm4,%xmm4
1811	vmovdqa	16(%ebx),%xmm5
1812	vpxor	%xmm4,%xmm2,%xmm2
1813	vmovdqa	-80(%ebx),%xmm1
1814.byte	143,232,120,194,210,12
1815	vmovdqa	-64(%ebx),%xmm3
1816	vpaddd	%xmm2,%xmm0,%xmm0
1817	vmovdqa	96(%ebx),%xmm7
1818	vpxor	%xmm0,%xmm6,%xmm6
1819	vpaddd	%xmm3,%xmm1,%xmm1
1820.byte	143,232,120,194,246,8
1821	vmovdqa	%xmm0,-96(%ebx)
1822	vpaddd	%xmm6,%xmm4,%xmm4
1823	vmovdqa	%xmm6,80(%ebx)
1824	vpxor	%xmm4,%xmm2,%xmm2
1825	vpxor	%xmm1,%xmm7,%xmm7
1826.byte	143,232,120,194,210,7
1827.byte	143,232,120,194,255,16
1828	vmovdqa	%xmm2,-16(%ebx)
1829	vpaddd	%xmm7,%xmm5,%xmm5
1830	vpxor	%xmm5,%xmm3,%xmm3
1831	vmovdqa	-128(%ebx),%xmm0
1832.byte	143,232,120,194,219,12
1833	vpaddd	%xmm3,%xmm1,%xmm1
1834	vmovdqa	64(%ebx),%xmm6
1835	vpxor	%xmm1,%xmm7,%xmm7
1836.byte	143,232,120,194,255,8
1837	vmovdqa	%xmm1,-80(%ebx)
1838	vpaddd	%xmm7,%xmm5,%xmm5
1839	vmovdqa	%xmm7,96(%ebx)
1840	vpxor	%xmm5,%xmm3,%xmm3
1841.byte	143,232,120,194,219,7
1842	decl	%edx
1843	jnz	.L016loop
1844	vmovdqa	%xmm3,-64(%ebx)
1845	vmovdqa	%xmm4,(%ebx)
1846	vmovdqa	%xmm5,16(%ebx)
1847	vmovdqa	%xmm6,64(%ebx)
1848	vmovdqa	%xmm7,96(%ebx)
1849	vmovdqa	-112(%ebx),%xmm1
1850	vmovdqa	-96(%ebx),%xmm2
1851	vmovdqa	-80(%ebx),%xmm3
1852	vpaddd	-128(%ebp),%xmm0,%xmm0
1853	vpaddd	-112(%ebp),%xmm1,%xmm1
1854	vpaddd	-96(%ebp),%xmm2,%xmm2
1855	vpaddd	-80(%ebp),%xmm3,%xmm3
1856	vpunpckldq	%xmm1,%xmm0,%xmm6
1857	vpunpckldq	%xmm3,%xmm2,%xmm7
1858	vpunpckhdq	%xmm1,%xmm0,%xmm0
1859	vpunpckhdq	%xmm3,%xmm2,%xmm2
1860	vpunpcklqdq	%xmm7,%xmm6,%xmm1
1861	vpunpckhqdq	%xmm7,%xmm6,%xmm6
1862	vpunpcklqdq	%xmm2,%xmm0,%xmm7
1863	vpunpckhqdq	%xmm2,%xmm0,%xmm3
1864	vpxor	-128(%esi),%xmm1,%xmm4
1865	vpxor	-64(%esi),%xmm6,%xmm5
1866	vpxor	(%esi),%xmm7,%xmm6
1867	vpxor	64(%esi),%xmm3,%xmm7
1868	leal	16(%esi),%esi
1869	vmovdqa	-64(%ebx),%xmm0
1870	vmovdqa	-48(%ebx),%xmm1
1871	vmovdqa	-32(%ebx),%xmm2
1872	vmovdqa	-16(%ebx),%xmm3
1873	vmovdqu	%xmm4,-128(%edi)
1874	vmovdqu	%xmm5,-64(%edi)
1875	vmovdqu	%xmm6,(%edi)
1876	vmovdqu	%xmm7,64(%edi)
1877	leal	16(%edi),%edi
1878	vpaddd	-64(%ebp),%xmm0,%xmm0
1879	vpaddd	-48(%ebp),%xmm1,%xmm1
1880	vpaddd	-32(%ebp),%xmm2,%xmm2
1881	vpaddd	-16(%ebp),%xmm3,%xmm3
1882	vpunpckldq	%xmm1,%xmm0,%xmm6
1883	vpunpckldq	%xmm3,%xmm2,%xmm7
1884	vpunpckhdq	%xmm1,%xmm0,%xmm0
1885	vpunpckhdq	%xmm3,%xmm2,%xmm2
1886	vpunpcklqdq	%xmm7,%xmm6,%xmm1
1887	vpunpckhqdq	%xmm7,%xmm6,%xmm6
1888	vpunpcklqdq	%xmm2,%xmm0,%xmm7
1889	vpunpckhqdq	%xmm2,%xmm0,%xmm3
1890	vpxor	-128(%esi),%xmm1,%xmm4
1891	vpxor	-64(%esi),%xmm6,%xmm5
1892	vpxor	(%esi),%xmm7,%xmm6
1893	vpxor	64(%esi),%xmm3,%xmm7
1894	leal	16(%esi),%esi
1895	vmovdqa	(%ebx),%xmm0
1896	vmovdqa	16(%ebx),%xmm1
1897	vmovdqa	32(%ebx),%xmm2
1898	vmovdqa	48(%ebx),%xmm3
1899	vmovdqu	%xmm4,-128(%edi)
1900	vmovdqu	%xmm5,-64(%edi)
1901	vmovdqu	%xmm6,(%edi)
1902	vmovdqu	%xmm7,64(%edi)
1903	leal	16(%edi),%edi
1904	vpaddd	(%ebp),%xmm0,%xmm0
1905	vpaddd	16(%ebp),%xmm1,%xmm1
1906	vpaddd	32(%ebp),%xmm2,%xmm2
1907	vpaddd	48(%ebp),%xmm3,%xmm3
1908	vpunpckldq	%xmm1,%xmm0,%xmm6
1909	vpunpckldq	%xmm3,%xmm2,%xmm7
1910	vpunpckhdq	%xmm1,%xmm0,%xmm0
1911	vpunpckhdq	%xmm3,%xmm2,%xmm2
1912	vpunpcklqdq	%xmm7,%xmm6,%xmm1
1913	vpunpckhqdq	%xmm7,%xmm6,%xmm6
1914	vpunpcklqdq	%xmm2,%xmm0,%xmm7
1915	vpunpckhqdq	%xmm2,%xmm0,%xmm3
1916	vpxor	-128(%esi),%xmm1,%xmm4
1917	vpxor	-64(%esi),%xmm6,%xmm5
1918	vpxor	(%esi),%xmm7,%xmm6
1919	vpxor	64(%esi),%xmm3,%xmm7
1920	leal	16(%esi),%esi
1921	vmovdqa	64(%ebx),%xmm0
1922	vmovdqa	80(%ebx),%xmm1
1923	vmovdqa	96(%ebx),%xmm2
1924	vmovdqa	112(%ebx),%xmm3
1925	vmovdqu	%xmm4,-128(%edi)
1926	vmovdqu	%xmm5,-64(%edi)
1927	vmovdqu	%xmm6,(%edi)
1928	vmovdqu	%xmm7,64(%edi)
1929	leal	16(%edi),%edi
1930	vpaddd	64(%ebp),%xmm0,%xmm0
1931	vpaddd	80(%ebp),%xmm1,%xmm1
1932	vpaddd	96(%ebp),%xmm2,%xmm2
1933	vpaddd	112(%ebp),%xmm3,%xmm3
1934	vpunpckldq	%xmm1,%xmm0,%xmm6
1935	vpunpckldq	%xmm3,%xmm2,%xmm7
1936	vpunpckhdq	%xmm1,%xmm0,%xmm0
1937	vpunpckhdq	%xmm3,%xmm2,%xmm2
1938	vpunpcklqdq	%xmm7,%xmm6,%xmm1
1939	vpunpckhqdq	%xmm7,%xmm6,%xmm6
1940	vpunpcklqdq	%xmm2,%xmm0,%xmm7
1941	vpunpckhqdq	%xmm2,%xmm0,%xmm3
1942	vpxor	-128(%esi),%xmm1,%xmm4
1943	vpxor	-64(%esi),%xmm6,%xmm5
1944	vpxor	(%esi),%xmm7,%xmm6
1945	vpxor	64(%esi),%xmm3,%xmm7
1946	leal	208(%esi),%esi
1947	vmovdqu	%xmm4,-128(%edi)
1948	vmovdqu	%xmm5,-64(%edi)
1949	vmovdqu	%xmm6,(%edi)
1950	vmovdqu	%xmm7,64(%edi)
1951	leal	208(%edi),%edi
1952	subl	$256,%ecx
1953	jnc	.L015outer_loop
1954	addl	$256,%ecx
1955	jz	.L017done
1956	movl	520(%esp),%ebx
1957	leal	-128(%esi),%esi
1958	movl	516(%esp),%edx
1959	leal	-128(%edi),%edi
1960	vmovd	64(%ebp),%xmm2
1961	vmovdqu	(%ebx),%xmm3
1962	vpaddd	96(%eax),%xmm2,%xmm2
1963	vpand	112(%eax),%xmm3,%xmm3
1964	vpor	%xmm2,%xmm3,%xmm3
1965.L0141x:
1966	vmovdqa	32(%eax),%xmm0
1967	vmovdqu	(%edx),%xmm1
1968	vmovdqu	16(%edx),%xmm2
1969	vmovdqa	(%eax),%xmm6
1970	vmovdqa	16(%eax),%xmm7
1971	movl	%ebp,48(%esp)
1972	vmovdqa	%xmm0,(%esp)
1973	vmovdqa	%xmm1,16(%esp)
1974	vmovdqa	%xmm2,32(%esp)
1975	vmovdqa	%xmm3,48(%esp)
1976	movl	$10,%edx
1977	jmp	.L018loop1x
1978.align	16
1979.L019outer1x:
1980	vmovdqa	80(%eax),%xmm3
1981	vmovdqa	(%esp),%xmm0
1982	vmovdqa	16(%esp),%xmm1
1983	vmovdqa	32(%esp),%xmm2
1984	vpaddd	48(%esp),%xmm3,%xmm3
1985	movl	$10,%edx
1986	vmovdqa	%xmm3,48(%esp)
1987	jmp	.L018loop1x
1988.align	16
1989.L018loop1x:
1990	vpaddd	%xmm1,%xmm0,%xmm0
1991	vpxor	%xmm0,%xmm3,%xmm3
1992.byte	143,232,120,194,219,16
1993	vpaddd	%xmm3,%xmm2,%xmm2
1994	vpxor	%xmm2,%xmm1,%xmm1
1995.byte	143,232,120,194,201,12
1996	vpaddd	%xmm1,%xmm0,%xmm0
1997	vpxor	%xmm0,%xmm3,%xmm3
1998.byte	143,232,120,194,219,8
1999	vpaddd	%xmm3,%xmm2,%xmm2
2000	vpxor	%xmm2,%xmm1,%xmm1
2001.byte	143,232,120,194,201,7
2002	vpshufd	$78,%xmm2,%xmm2
2003	vpshufd	$57,%xmm1,%xmm1
2004	vpshufd	$147,%xmm3,%xmm3
2005	vpaddd	%xmm1,%xmm0,%xmm0
2006	vpxor	%xmm0,%xmm3,%xmm3
2007.byte	143,232,120,194,219,16
2008	vpaddd	%xmm3,%xmm2,%xmm2
2009	vpxor	%xmm2,%xmm1,%xmm1
2010.byte	143,232,120,194,201,12
2011	vpaddd	%xmm1,%xmm0,%xmm0
2012	vpxor	%xmm0,%xmm3,%xmm3
2013.byte	143,232,120,194,219,8
2014	vpaddd	%xmm3,%xmm2,%xmm2
2015	vpxor	%xmm2,%xmm1,%xmm1
2016.byte	143,232,120,194,201,7
2017	vpshufd	$78,%xmm2,%xmm2
2018	vpshufd	$147,%xmm1,%xmm1
2019	vpshufd	$57,%xmm3,%xmm3
2020	decl	%edx
2021	jnz	.L018loop1x
2022	vpaddd	(%esp),%xmm0,%xmm0
2023	vpaddd	16(%esp),%xmm1,%xmm1
2024	vpaddd	32(%esp),%xmm2,%xmm2
2025	vpaddd	48(%esp),%xmm3,%xmm3
2026	cmpl	$64,%ecx
2027	jb	.L020tail
2028	vpxor	(%esi),%xmm0,%xmm0
2029	vpxor	16(%esi),%xmm1,%xmm1
2030	vpxor	32(%esi),%xmm2,%xmm2
2031	vpxor	48(%esi),%xmm3,%xmm3
2032	leal	64(%esi),%esi
2033	vmovdqu	%xmm0,(%edi)
2034	vmovdqu	%xmm1,16(%edi)
2035	vmovdqu	%xmm2,32(%edi)
2036	vmovdqu	%xmm3,48(%edi)
2037	leal	64(%edi),%edi
2038	subl	$64,%ecx
2039	jnz	.L019outer1x
2040	jmp	.L017done
2041.L020tail:
2042	vmovdqa	%xmm0,(%esp)
2043	vmovdqa	%xmm1,16(%esp)
2044	vmovdqa	%xmm2,32(%esp)
2045	vmovdqa	%xmm3,48(%esp)
2046	xorl	%eax,%eax
2047	xorl	%edx,%edx
2048	xorl	%ebp,%ebp
2049.L021tail_loop:
2050	movb	(%esp,%ebp,1),%al
2051	movb	(%esi,%ebp,1),%dl
2052	leal	1(%ebp),%ebp
2053	xorb	%dl,%al
2054	movb	%al,-1(%edi,%ebp,1)
2055	decl	%ecx
2056	jnz	.L021tail_loop
2057.L017done:
2058	vzeroupper
2059	movl	512(%esp),%esp
2060	popl	%edi
2061	popl	%esi
2062	popl	%ebx
2063	popl	%ebp
2064	ret
2065.size	ChaCha20_xop,.-.L_ChaCha20_xop_begin
2066.comm	OPENSSL_ia32cap_P,16,4
2067
2068	.section ".note.gnu.property", "a"
2069	.p2align 2
2070	.long 1f - 0f
2071	.long 4f - 1f
2072	.long 5
20730:
2074	.asciz "GNU"
20751:
2076	.p2align 2
2077	.long 0xc0000002
2078	.long 3f - 2f
20792:
2080	.long 3
20813:
2082	.p2align 2
20834:
2084#endif
2085