xref: /freebsd/sys/crypto/openssl/i386/chacha-x86.S (revision c66ec88fed842fbaad62c30d510644ceb7bd2d71)
1/* $FreeBSD$ */
2/* Do not modify. This file is auto-generated from chacha-x86.pl. */
3#ifdef PIC
4.text
5.globl	ChaCha20_ctr32
6.type	ChaCha20_ctr32,@function
7.align	16
8ChaCha20_ctr32:
9.L_ChaCha20_ctr32_begin:
10	pushl	%ebp
11	pushl	%ebx
12	pushl	%esi
13	pushl	%edi
14	xorl	%eax,%eax
15	cmpl	28(%esp),%eax
16	je	.L000no_data
17	call	.Lpic_point
18.Lpic_point:
19	popl	%eax
20	leal	OPENSSL_ia32cap_P-.Lpic_point(%eax),%ebp
21	testl	$16777216,(%ebp)
22	jz	.L001x86
23	testl	$512,4(%ebp)
24	jz	.L001x86
25	jmp	.Lssse3_shortcut
26.L001x86:
27	movl	32(%esp),%esi
28	movl	36(%esp),%edi
29	subl	$132,%esp
30	movl	(%esi),%eax
31	movl	4(%esi),%ebx
32	movl	8(%esi),%ecx
33	movl	12(%esi),%edx
34	movl	%eax,80(%esp)
35	movl	%ebx,84(%esp)
36	movl	%ecx,88(%esp)
37	movl	%edx,92(%esp)
38	movl	16(%esi),%eax
39	movl	20(%esi),%ebx
40	movl	24(%esi),%ecx
41	movl	28(%esi),%edx
42	movl	%eax,96(%esp)
43	movl	%ebx,100(%esp)
44	movl	%ecx,104(%esp)
45	movl	%edx,108(%esp)
46	movl	(%edi),%eax
47	movl	4(%edi),%ebx
48	movl	8(%edi),%ecx
49	movl	12(%edi),%edx
50	subl	$1,%eax
51	movl	%eax,112(%esp)
52	movl	%ebx,116(%esp)
53	movl	%ecx,120(%esp)
54	movl	%edx,124(%esp)
55	jmp	.L002entry
56.align	16
57.L003outer_loop:
58	movl	%ebx,156(%esp)
59	movl	%eax,152(%esp)
60	movl	%ecx,160(%esp)
61.L002entry:
62	movl	$1634760805,%eax
63	movl	$857760878,4(%esp)
64	movl	$2036477234,8(%esp)
65	movl	$1797285236,12(%esp)
66	movl	84(%esp),%ebx
67	movl	88(%esp),%ebp
68	movl	104(%esp),%ecx
69	movl	108(%esp),%esi
70	movl	116(%esp),%edx
71	movl	120(%esp),%edi
72	movl	%ebx,20(%esp)
73	movl	%ebp,24(%esp)
74	movl	%ecx,40(%esp)
75	movl	%esi,44(%esp)
76	movl	%edx,52(%esp)
77	movl	%edi,56(%esp)
78	movl	92(%esp),%ebx
79	movl	124(%esp),%edi
80	movl	112(%esp),%edx
81	movl	80(%esp),%ebp
82	movl	96(%esp),%ecx
83	movl	100(%esp),%esi
84	addl	$1,%edx
85	movl	%ebx,28(%esp)
86	movl	%edi,60(%esp)
87	movl	%edx,112(%esp)
88	movl	$10,%ebx
89	jmp	.L004loop
90.align	16
91.L004loop:
92	addl	%ebp,%eax
93	movl	%ebx,128(%esp)
94	movl	%ebp,%ebx
95	xorl	%eax,%edx
96	roll	$16,%edx
97	addl	%edx,%ecx
98	xorl	%ecx,%ebx
99	movl	52(%esp),%edi
100	roll	$12,%ebx
101	movl	20(%esp),%ebp
102	addl	%ebx,%eax
103	xorl	%eax,%edx
104	movl	%eax,(%esp)
105	roll	$8,%edx
106	movl	4(%esp),%eax
107	addl	%edx,%ecx
108	movl	%edx,48(%esp)
109	xorl	%ecx,%ebx
110	addl	%ebp,%eax
111	roll	$7,%ebx
112	xorl	%eax,%edi
113	movl	%ecx,32(%esp)
114	roll	$16,%edi
115	movl	%ebx,16(%esp)
116	addl	%edi,%esi
117	movl	40(%esp),%ecx
118	xorl	%esi,%ebp
119	movl	56(%esp),%edx
120	roll	$12,%ebp
121	movl	24(%esp),%ebx
122	addl	%ebp,%eax
123	xorl	%eax,%edi
124	movl	%eax,4(%esp)
125	roll	$8,%edi
126	movl	8(%esp),%eax
127	addl	%edi,%esi
128	movl	%edi,52(%esp)
129	xorl	%esi,%ebp
130	addl	%ebx,%eax
131	roll	$7,%ebp
132	xorl	%eax,%edx
133	movl	%esi,36(%esp)
134	roll	$16,%edx
135	movl	%ebp,20(%esp)
136	addl	%edx,%ecx
137	movl	44(%esp),%esi
138	xorl	%ecx,%ebx
139	movl	60(%esp),%edi
140	roll	$12,%ebx
141	movl	28(%esp),%ebp
142	addl	%ebx,%eax
143	xorl	%eax,%edx
144	movl	%eax,8(%esp)
145	roll	$8,%edx
146	movl	12(%esp),%eax
147	addl	%edx,%ecx
148	movl	%edx,56(%esp)
149	xorl	%ecx,%ebx
150	addl	%ebp,%eax
151	roll	$7,%ebx
152	xorl	%eax,%edi
153	roll	$16,%edi
154	movl	%ebx,24(%esp)
155	addl	%edi,%esi
156	xorl	%esi,%ebp
157	roll	$12,%ebp
158	movl	20(%esp),%ebx
159	addl	%ebp,%eax
160	xorl	%eax,%edi
161	movl	%eax,12(%esp)
162	roll	$8,%edi
163	movl	(%esp),%eax
164	addl	%edi,%esi
165	movl	%edi,%edx
166	xorl	%esi,%ebp
167	addl	%ebx,%eax
168	roll	$7,%ebp
169	xorl	%eax,%edx
170	roll	$16,%edx
171	movl	%ebp,28(%esp)
172	addl	%edx,%ecx
173	xorl	%ecx,%ebx
174	movl	48(%esp),%edi
175	roll	$12,%ebx
176	movl	24(%esp),%ebp
177	addl	%ebx,%eax
178	xorl	%eax,%edx
179	movl	%eax,(%esp)
180	roll	$8,%edx
181	movl	4(%esp),%eax
182	addl	%edx,%ecx
183	movl	%edx,60(%esp)
184	xorl	%ecx,%ebx
185	addl	%ebp,%eax
186	roll	$7,%ebx
187	xorl	%eax,%edi
188	movl	%ecx,40(%esp)
189	roll	$16,%edi
190	movl	%ebx,20(%esp)
191	addl	%edi,%esi
192	movl	32(%esp),%ecx
193	xorl	%esi,%ebp
194	movl	52(%esp),%edx
195	roll	$12,%ebp
196	movl	28(%esp),%ebx
197	addl	%ebp,%eax
198	xorl	%eax,%edi
199	movl	%eax,4(%esp)
200	roll	$8,%edi
201	movl	8(%esp),%eax
202	addl	%edi,%esi
203	movl	%edi,48(%esp)
204	xorl	%esi,%ebp
205	addl	%ebx,%eax
206	roll	$7,%ebp
207	xorl	%eax,%edx
208	movl	%esi,44(%esp)
209	roll	$16,%edx
210	movl	%ebp,24(%esp)
211	addl	%edx,%ecx
212	movl	36(%esp),%esi
213	xorl	%ecx,%ebx
214	movl	56(%esp),%edi
215	roll	$12,%ebx
216	movl	16(%esp),%ebp
217	addl	%ebx,%eax
218	xorl	%eax,%edx
219	movl	%eax,8(%esp)
220	roll	$8,%edx
221	movl	12(%esp),%eax
222	addl	%edx,%ecx
223	movl	%edx,52(%esp)
224	xorl	%ecx,%ebx
225	addl	%ebp,%eax
226	roll	$7,%ebx
227	xorl	%eax,%edi
228	roll	$16,%edi
229	movl	%ebx,28(%esp)
230	addl	%edi,%esi
231	xorl	%esi,%ebp
232	movl	48(%esp),%edx
233	roll	$12,%ebp
234	movl	128(%esp),%ebx
235	addl	%ebp,%eax
236	xorl	%eax,%edi
237	movl	%eax,12(%esp)
238	roll	$8,%edi
239	movl	(%esp),%eax
240	addl	%edi,%esi
241	movl	%edi,56(%esp)
242	xorl	%esi,%ebp
243	roll	$7,%ebp
244	decl	%ebx
245	jnz	.L004loop
246	movl	160(%esp),%ebx
247	addl	$1634760805,%eax
248	addl	80(%esp),%ebp
249	addl	96(%esp),%ecx
250	addl	100(%esp),%esi
251	cmpl	$64,%ebx
252	jb	.L005tail
253	movl	156(%esp),%ebx
254	addl	112(%esp),%edx
255	addl	120(%esp),%edi
256	xorl	(%ebx),%eax
257	xorl	16(%ebx),%ebp
258	movl	%eax,(%esp)
259	movl	152(%esp),%eax
260	xorl	32(%ebx),%ecx
261	xorl	36(%ebx),%esi
262	xorl	48(%ebx),%edx
263	xorl	56(%ebx),%edi
264	movl	%ebp,16(%eax)
265	movl	%ecx,32(%eax)
266	movl	%esi,36(%eax)
267	movl	%edx,48(%eax)
268	movl	%edi,56(%eax)
269	movl	4(%esp),%ebp
270	movl	8(%esp),%ecx
271	movl	12(%esp),%esi
272	movl	20(%esp),%edx
273	movl	24(%esp),%edi
274	addl	$857760878,%ebp
275	addl	$2036477234,%ecx
276	addl	$1797285236,%esi
277	addl	84(%esp),%edx
278	addl	88(%esp),%edi
279	xorl	4(%ebx),%ebp
280	xorl	8(%ebx),%ecx
281	xorl	12(%ebx),%esi
282	xorl	20(%ebx),%edx
283	xorl	24(%ebx),%edi
284	movl	%ebp,4(%eax)
285	movl	%ecx,8(%eax)
286	movl	%esi,12(%eax)
287	movl	%edx,20(%eax)
288	movl	%edi,24(%eax)
289	movl	28(%esp),%ebp
290	movl	40(%esp),%ecx
291	movl	44(%esp),%esi
292	movl	52(%esp),%edx
293	movl	60(%esp),%edi
294	addl	92(%esp),%ebp
295	addl	104(%esp),%ecx
296	addl	108(%esp),%esi
297	addl	116(%esp),%edx
298	addl	124(%esp),%edi
299	xorl	28(%ebx),%ebp
300	xorl	40(%ebx),%ecx
301	xorl	44(%ebx),%esi
302	xorl	52(%ebx),%edx
303	xorl	60(%ebx),%edi
304	leal	64(%ebx),%ebx
305	movl	%ebp,28(%eax)
306	movl	(%esp),%ebp
307	movl	%ecx,40(%eax)
308	movl	160(%esp),%ecx
309	movl	%esi,44(%eax)
310	movl	%edx,52(%eax)
311	movl	%edi,60(%eax)
312	movl	%ebp,(%eax)
313	leal	64(%eax),%eax
314	subl	$64,%ecx
315	jnz	.L003outer_loop
316	jmp	.L006done
317.L005tail:
318	addl	112(%esp),%edx
319	addl	120(%esp),%edi
320	movl	%eax,(%esp)
321	movl	%ebp,16(%esp)
322	movl	%ecx,32(%esp)
323	movl	%esi,36(%esp)
324	movl	%edx,48(%esp)
325	movl	%edi,56(%esp)
326	movl	4(%esp),%ebp
327	movl	8(%esp),%ecx
328	movl	12(%esp),%esi
329	movl	20(%esp),%edx
330	movl	24(%esp),%edi
331	addl	$857760878,%ebp
332	addl	$2036477234,%ecx
333	addl	$1797285236,%esi
334	addl	84(%esp),%edx
335	addl	88(%esp),%edi
336	movl	%ebp,4(%esp)
337	movl	%ecx,8(%esp)
338	movl	%esi,12(%esp)
339	movl	%edx,20(%esp)
340	movl	%edi,24(%esp)
341	movl	28(%esp),%ebp
342	movl	40(%esp),%ecx
343	movl	44(%esp),%esi
344	movl	52(%esp),%edx
345	movl	60(%esp),%edi
346	addl	92(%esp),%ebp
347	addl	104(%esp),%ecx
348	addl	108(%esp),%esi
349	addl	116(%esp),%edx
350	addl	124(%esp),%edi
351	movl	%ebp,28(%esp)
352	movl	156(%esp),%ebp
353	movl	%ecx,40(%esp)
354	movl	152(%esp),%ecx
355	movl	%esi,44(%esp)
356	xorl	%esi,%esi
357	movl	%edx,52(%esp)
358	movl	%edi,60(%esp)
359	xorl	%eax,%eax
360	xorl	%edx,%edx
361.L007tail_loop:
362	movb	(%esi,%ebp,1),%al
363	movb	(%esp,%esi,1),%dl
364	leal	1(%esi),%esi
365	xorb	%dl,%al
366	movb	%al,-1(%ecx,%esi,1)
367	decl	%ebx
368	jnz	.L007tail_loop
369.L006done:
370	addl	$132,%esp
371.L000no_data:
372	popl	%edi
373	popl	%esi
374	popl	%ebx
375	popl	%ebp
376	ret
377.size	ChaCha20_ctr32,.-.L_ChaCha20_ctr32_begin
378.globl	ChaCha20_ssse3
379.type	ChaCha20_ssse3,@function
380.align	16
381ChaCha20_ssse3:
382.L_ChaCha20_ssse3_begin:
383	pushl	%ebp
384	pushl	%ebx
385	pushl	%esi
386	pushl	%edi
387.Lssse3_shortcut:
388	testl	$2048,4(%ebp)
389	jnz	.Lxop_shortcut
390	movl	20(%esp),%edi
391	movl	24(%esp),%esi
392	movl	28(%esp),%ecx
393	movl	32(%esp),%edx
394	movl	36(%esp),%ebx
395	movl	%esp,%ebp
396	subl	$524,%esp
397	andl	$-64,%esp
398	movl	%ebp,512(%esp)
399	leal	.Lssse3_data-.Lpic_point(%eax),%eax
400	movdqu	(%ebx),%xmm3
401.L0081x:
402	movdqa	32(%eax),%xmm0
403	movdqu	(%edx),%xmm1
404	movdqu	16(%edx),%xmm2
405	movdqa	(%eax),%xmm6
406	movdqa	16(%eax),%xmm7
407	movl	%ebp,48(%esp)
408	movdqa	%xmm0,(%esp)
409	movdqa	%xmm1,16(%esp)
410	movdqa	%xmm2,32(%esp)
411	movdqa	%xmm3,48(%esp)
412	movl	$10,%edx
413	jmp	.L009loop1x
414.align	16
415.L010outer1x:
416	movdqa	80(%eax),%xmm3
417	movdqa	(%esp),%xmm0
418	movdqa	16(%esp),%xmm1
419	movdqa	32(%esp),%xmm2
420	paddd	48(%esp),%xmm3
421	movl	$10,%edx
422	movdqa	%xmm3,48(%esp)
423	jmp	.L009loop1x
424.align	16
425.L009loop1x:
426	paddd	%xmm1,%xmm0
427	pxor	%xmm0,%xmm3
428.byte	102,15,56,0,222
429	paddd	%xmm3,%xmm2
430	pxor	%xmm2,%xmm1
431	movdqa	%xmm1,%xmm4
432	psrld	$20,%xmm1
433	pslld	$12,%xmm4
434	por	%xmm4,%xmm1
435	paddd	%xmm1,%xmm0
436	pxor	%xmm0,%xmm3
437.byte	102,15,56,0,223
438	paddd	%xmm3,%xmm2
439	pxor	%xmm2,%xmm1
440	movdqa	%xmm1,%xmm4
441	psrld	$25,%xmm1
442	pslld	$7,%xmm4
443	por	%xmm4,%xmm1
444	pshufd	$78,%xmm2,%xmm2
445	pshufd	$57,%xmm1,%xmm1
446	pshufd	$147,%xmm3,%xmm3
447	nop
448	paddd	%xmm1,%xmm0
449	pxor	%xmm0,%xmm3
450.byte	102,15,56,0,222
451	paddd	%xmm3,%xmm2
452	pxor	%xmm2,%xmm1
453	movdqa	%xmm1,%xmm4
454	psrld	$20,%xmm1
455	pslld	$12,%xmm4
456	por	%xmm4,%xmm1
457	paddd	%xmm1,%xmm0
458	pxor	%xmm0,%xmm3
459.byte	102,15,56,0,223
460	paddd	%xmm3,%xmm2
461	pxor	%xmm2,%xmm1
462	movdqa	%xmm1,%xmm4
463	psrld	$25,%xmm1
464	pslld	$7,%xmm4
465	por	%xmm4,%xmm1
466	pshufd	$78,%xmm2,%xmm2
467	pshufd	$147,%xmm1,%xmm1
468	pshufd	$57,%xmm3,%xmm3
469	decl	%edx
470	jnz	.L009loop1x
471	paddd	(%esp),%xmm0
472	paddd	16(%esp),%xmm1
473	paddd	32(%esp),%xmm2
474	paddd	48(%esp),%xmm3
475	cmpl	$64,%ecx
476	jb	.L011tail
477	movdqu	(%esi),%xmm4
478	movdqu	16(%esi),%xmm5
479	pxor	%xmm4,%xmm0
480	movdqu	32(%esi),%xmm4
481	pxor	%xmm5,%xmm1
482	movdqu	48(%esi),%xmm5
483	pxor	%xmm4,%xmm2
484	pxor	%xmm5,%xmm3
485	leal	64(%esi),%esi
486	movdqu	%xmm0,(%edi)
487	movdqu	%xmm1,16(%edi)
488	movdqu	%xmm2,32(%edi)
489	movdqu	%xmm3,48(%edi)
490	leal	64(%edi),%edi
491	subl	$64,%ecx
492	jnz	.L010outer1x
493	jmp	.L012done
494.L011tail:
495	movdqa	%xmm0,(%esp)
496	movdqa	%xmm1,16(%esp)
497	movdqa	%xmm2,32(%esp)
498	movdqa	%xmm3,48(%esp)
499	xorl	%eax,%eax
500	xorl	%edx,%edx
501	xorl	%ebp,%ebp
502.L013tail_loop:
503	movb	(%esp,%ebp,1),%al
504	movb	(%esi,%ebp,1),%dl
505	leal	1(%ebp),%ebp
506	xorb	%dl,%al
507	movb	%al,-1(%edi,%ebp,1)
508	decl	%ecx
509	jnz	.L013tail_loop
510.L012done:
511	movl	512(%esp),%esp
512	popl	%edi
513	popl	%esi
514	popl	%ebx
515	popl	%ebp
516	ret
517.size	ChaCha20_ssse3,.-.L_ChaCha20_ssse3_begin
518.align	64
519.Lssse3_data:
520.byte	2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13
521.byte	3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14
522.long	1634760805,857760878,2036477234,1797285236
523.long	0,1,2,3
524.long	4,4,4,4
525.long	1,0,0,0
526.long	4,0,0,0
527.long	0,-1,-1,-1
528.align	64
529.byte	67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54
530.byte	44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
531.byte	60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
532.byte	114,103,62,0
533.globl	ChaCha20_xop
534.type	ChaCha20_xop,@function
535.align	16
536ChaCha20_xop:
537.L_ChaCha20_xop_begin:
538	pushl	%ebp
539	pushl	%ebx
540	pushl	%esi
541	pushl	%edi
542.Lxop_shortcut:
543	movl	20(%esp),%edi
544	movl	24(%esp),%esi
545	movl	28(%esp),%ecx
546	movl	32(%esp),%edx
547	movl	36(%esp),%ebx
548	vzeroupper
549	movl	%esp,%ebp
550	subl	$524,%esp
551	andl	$-64,%esp
552	movl	%ebp,512(%esp)
553	leal	.Lssse3_data-.Lpic_point(%eax),%eax
554	vmovdqu	(%ebx),%xmm3
555	cmpl	$256,%ecx
556	jb	.L0141x
557	movl	%edx,516(%esp)
558	movl	%ebx,520(%esp)
559	subl	$256,%ecx
560	leal	384(%esp),%ebp
561	vmovdqu	(%edx),%xmm7
562	vpshufd	$0,%xmm3,%xmm0
563	vpshufd	$85,%xmm3,%xmm1
564	vpshufd	$170,%xmm3,%xmm2
565	vpshufd	$255,%xmm3,%xmm3
566	vpaddd	48(%eax),%xmm0,%xmm0
567	vpshufd	$0,%xmm7,%xmm4
568	vpshufd	$85,%xmm7,%xmm5
569	vpsubd	64(%eax),%xmm0,%xmm0
570	vpshufd	$170,%xmm7,%xmm6
571	vpshufd	$255,%xmm7,%xmm7
572	vmovdqa	%xmm0,64(%ebp)
573	vmovdqa	%xmm1,80(%ebp)
574	vmovdqa	%xmm2,96(%ebp)
575	vmovdqa	%xmm3,112(%ebp)
576	vmovdqu	16(%edx),%xmm3
577	vmovdqa	%xmm4,-64(%ebp)
578	vmovdqa	%xmm5,-48(%ebp)
579	vmovdqa	%xmm6,-32(%ebp)
580	vmovdqa	%xmm7,-16(%ebp)
581	vmovdqa	32(%eax),%xmm7
582	leal	128(%esp),%ebx
583	vpshufd	$0,%xmm3,%xmm0
584	vpshufd	$85,%xmm3,%xmm1
585	vpshufd	$170,%xmm3,%xmm2
586	vpshufd	$255,%xmm3,%xmm3
587	vpshufd	$0,%xmm7,%xmm4
588	vpshufd	$85,%xmm7,%xmm5
589	vpshufd	$170,%xmm7,%xmm6
590	vpshufd	$255,%xmm7,%xmm7
591	vmovdqa	%xmm0,(%ebp)
592	vmovdqa	%xmm1,16(%ebp)
593	vmovdqa	%xmm2,32(%ebp)
594	vmovdqa	%xmm3,48(%ebp)
595	vmovdqa	%xmm4,-128(%ebp)
596	vmovdqa	%xmm5,-112(%ebp)
597	vmovdqa	%xmm6,-96(%ebp)
598	vmovdqa	%xmm7,-80(%ebp)
599	leal	128(%esi),%esi
600	leal	128(%edi),%edi
601	jmp	.L015outer_loop
602.align	32
603.L015outer_loop:
604	vmovdqa	-112(%ebp),%xmm1
605	vmovdqa	-96(%ebp),%xmm2
606	vmovdqa	-80(%ebp),%xmm3
607	vmovdqa	-48(%ebp),%xmm5
608	vmovdqa	-32(%ebp),%xmm6
609	vmovdqa	-16(%ebp),%xmm7
610	vmovdqa	%xmm1,-112(%ebx)
611	vmovdqa	%xmm2,-96(%ebx)
612	vmovdqa	%xmm3,-80(%ebx)
613	vmovdqa	%xmm5,-48(%ebx)
614	vmovdqa	%xmm6,-32(%ebx)
615	vmovdqa	%xmm7,-16(%ebx)
616	vmovdqa	32(%ebp),%xmm2
617	vmovdqa	48(%ebp),%xmm3
618	vmovdqa	64(%ebp),%xmm4
619	vmovdqa	80(%ebp),%xmm5
620	vmovdqa	96(%ebp),%xmm6
621	vmovdqa	112(%ebp),%xmm7
622	vpaddd	64(%eax),%xmm4,%xmm4
623	vmovdqa	%xmm2,32(%ebx)
624	vmovdqa	%xmm3,48(%ebx)
625	vmovdqa	%xmm4,64(%ebx)
626	vmovdqa	%xmm5,80(%ebx)
627	vmovdqa	%xmm6,96(%ebx)
628	vmovdqa	%xmm7,112(%ebx)
629	vmovdqa	%xmm4,64(%ebp)
630	vmovdqa	-128(%ebp),%xmm0
631	vmovdqa	%xmm4,%xmm6
632	vmovdqa	-64(%ebp),%xmm3
633	vmovdqa	(%ebp),%xmm4
634	vmovdqa	16(%ebp),%xmm5
635	movl	$10,%edx
636	nop
637.align	32
638.L016loop:
639	vpaddd	%xmm3,%xmm0,%xmm0
640	vpxor	%xmm0,%xmm6,%xmm6
641.byte	143,232,120,194,246,16
642	vpaddd	%xmm6,%xmm4,%xmm4
643	vpxor	%xmm4,%xmm3,%xmm2
644	vmovdqa	-112(%ebx),%xmm1
645.byte	143,232,120,194,210,12
646	vmovdqa	-48(%ebx),%xmm3
647	vpaddd	%xmm2,%xmm0,%xmm0
648	vmovdqa	80(%ebx),%xmm7
649	vpxor	%xmm0,%xmm6,%xmm6
650	vpaddd	%xmm3,%xmm1,%xmm1
651.byte	143,232,120,194,246,8
652	vmovdqa	%xmm0,-128(%ebx)
653	vpaddd	%xmm6,%xmm4,%xmm4
654	vmovdqa	%xmm6,64(%ebx)
655	vpxor	%xmm4,%xmm2,%xmm2
656	vpxor	%xmm1,%xmm7,%xmm7
657.byte	143,232,120,194,210,7
658	vmovdqa	%xmm4,(%ebx)
659.byte	143,232,120,194,255,16
660	vmovdqa	%xmm2,-64(%ebx)
661	vpaddd	%xmm7,%xmm5,%xmm5
662	vmovdqa	32(%ebx),%xmm4
663	vpxor	%xmm5,%xmm3,%xmm3
664	vmovdqa	-96(%ebx),%xmm0
665.byte	143,232,120,194,219,12
666	vmovdqa	-32(%ebx),%xmm2
667	vpaddd	%xmm3,%xmm1,%xmm1
668	vmovdqa	96(%ebx),%xmm6
669	vpxor	%xmm1,%xmm7,%xmm7
670	vpaddd	%xmm2,%xmm0,%xmm0
671.byte	143,232,120,194,255,8
672	vmovdqa	%xmm1,-112(%ebx)
673	vpaddd	%xmm7,%xmm5,%xmm5
674	vmovdqa	%xmm7,80(%ebx)
675	vpxor	%xmm5,%xmm3,%xmm3
676	vpxor	%xmm0,%xmm6,%xmm6
677.byte	143,232,120,194,219,7
678	vmovdqa	%xmm5,16(%ebx)
679.byte	143,232,120,194,246,16
680	vmovdqa	%xmm3,-48(%ebx)
681	vpaddd	%xmm6,%xmm4,%xmm4
682	vmovdqa	48(%ebx),%xmm5
683	vpxor	%xmm4,%xmm2,%xmm2
684	vmovdqa	-80(%ebx),%xmm1
685.byte	143,232,120,194,210,12
686	vmovdqa	-16(%ebx),%xmm3
687	vpaddd	%xmm2,%xmm0,%xmm0
688	vmovdqa	112(%ebx),%xmm7
689	vpxor	%xmm0,%xmm6,%xmm6
690	vpaddd	%xmm3,%xmm1,%xmm1
691.byte	143,232,120,194,246,8
692	vmovdqa	%xmm0,-96(%ebx)
693	vpaddd	%xmm6,%xmm4,%xmm4
694	vmovdqa	%xmm6,96(%ebx)
695	vpxor	%xmm4,%xmm2,%xmm2
696	vpxor	%xmm1,%xmm7,%xmm7
697.byte	143,232,120,194,210,7
698.byte	143,232,120,194,255,16
699	vmovdqa	%xmm2,-32(%ebx)
700	vpaddd	%xmm7,%xmm5,%xmm5
701	vpxor	%xmm5,%xmm3,%xmm3
702	vmovdqa	-128(%ebx),%xmm0
703.byte	143,232,120,194,219,12
704	vmovdqa	-48(%ebx),%xmm2
705	vpaddd	%xmm3,%xmm1,%xmm1
706	vpxor	%xmm1,%xmm7,%xmm7
707	vpaddd	%xmm2,%xmm0,%xmm0
708.byte	143,232,120,194,255,8
709	vmovdqa	%xmm1,-80(%ebx)
710	vpaddd	%xmm7,%xmm5,%xmm5
711	vpxor	%xmm5,%xmm3,%xmm3
712	vpxor	%xmm0,%xmm7,%xmm6
713.byte	143,232,120,194,219,7
714.byte	143,232,120,194,246,16
715	vmovdqa	%xmm3,-16(%ebx)
716	vpaddd	%xmm6,%xmm4,%xmm4
717	vpxor	%xmm4,%xmm2,%xmm2
718	vmovdqa	-112(%ebx),%xmm1
719.byte	143,232,120,194,210,12
720	vmovdqa	-32(%ebx),%xmm3
721	vpaddd	%xmm2,%xmm0,%xmm0
722	vmovdqa	64(%ebx),%xmm7
723	vpxor	%xmm0,%xmm6,%xmm6
724	vpaddd	%xmm3,%xmm1,%xmm1
725.byte	143,232,120,194,246,8
726	vmovdqa	%xmm0,-128(%ebx)
727	vpaddd	%xmm6,%xmm4,%xmm4
728	vmovdqa	%xmm6,112(%ebx)
729	vpxor	%xmm4,%xmm2,%xmm2
730	vpxor	%xmm1,%xmm7,%xmm7
731.byte	143,232,120,194,210,7
732	vmovdqa	%xmm4,32(%ebx)
733.byte	143,232,120,194,255,16
734	vmovdqa	%xmm2,-48(%ebx)
735	vpaddd	%xmm7,%xmm5,%xmm5
736	vmovdqa	(%ebx),%xmm4
737	vpxor	%xmm5,%xmm3,%xmm3
738	vmovdqa	-96(%ebx),%xmm0
739.byte	143,232,120,194,219,12
740	vmovdqa	-16(%ebx),%xmm2
741	vpaddd	%xmm3,%xmm1,%xmm1
742	vmovdqa	80(%ebx),%xmm6
743	vpxor	%xmm1,%xmm7,%xmm7
744	vpaddd	%xmm2,%xmm0,%xmm0
745.byte	143,232,120,194,255,8
746	vmovdqa	%xmm1,-112(%ebx)
747	vpaddd	%xmm7,%xmm5,%xmm5
748	vmovdqa	%xmm7,64(%ebx)
749	vpxor	%xmm5,%xmm3,%xmm3
750	vpxor	%xmm0,%xmm6,%xmm6
751.byte	143,232,120,194,219,7
752	vmovdqa	%xmm5,48(%ebx)
753.byte	143,232,120,194,246,16
754	vmovdqa	%xmm3,-32(%ebx)
755	vpaddd	%xmm6,%xmm4,%xmm4
756	vmovdqa	16(%ebx),%xmm5
757	vpxor	%xmm4,%xmm2,%xmm2
758	vmovdqa	-80(%ebx),%xmm1
759.byte	143,232,120,194,210,12
760	vmovdqa	-64(%ebx),%xmm3
761	vpaddd	%xmm2,%xmm0,%xmm0
762	vmovdqa	96(%ebx),%xmm7
763	vpxor	%xmm0,%xmm6,%xmm6
764	vpaddd	%xmm3,%xmm1,%xmm1
765.byte	143,232,120,194,246,8
766	vmovdqa	%xmm0,-96(%ebx)
767	vpaddd	%xmm6,%xmm4,%xmm4
768	vmovdqa	%xmm6,80(%ebx)
769	vpxor	%xmm4,%xmm2,%xmm2
770	vpxor	%xmm1,%xmm7,%xmm7
771.byte	143,232,120,194,210,7
772.byte	143,232,120,194,255,16
773	vmovdqa	%xmm2,-16(%ebx)
774	vpaddd	%xmm7,%xmm5,%xmm5
775	vpxor	%xmm5,%xmm3,%xmm3
776	vmovdqa	-128(%ebx),%xmm0
777.byte	143,232,120,194,219,12
778	vpaddd	%xmm3,%xmm1,%xmm1
779	vmovdqa	64(%ebx),%xmm6
780	vpxor	%xmm1,%xmm7,%xmm7
781.byte	143,232,120,194,255,8
782	vmovdqa	%xmm1,-80(%ebx)
783	vpaddd	%xmm7,%xmm5,%xmm5
784	vmovdqa	%xmm7,96(%ebx)
785	vpxor	%xmm5,%xmm3,%xmm3
786.byte	143,232,120,194,219,7
787	decl	%edx
788	jnz	.L016loop
789	vmovdqa	%xmm3,-64(%ebx)
790	vmovdqa	%xmm4,(%ebx)
791	vmovdqa	%xmm5,16(%ebx)
792	vmovdqa	%xmm6,64(%ebx)
793	vmovdqa	%xmm7,96(%ebx)
794	vmovdqa	-112(%ebx),%xmm1
795	vmovdqa	-96(%ebx),%xmm2
796	vmovdqa	-80(%ebx),%xmm3
797	vpaddd	-128(%ebp),%xmm0,%xmm0
798	vpaddd	-112(%ebp),%xmm1,%xmm1
799	vpaddd	-96(%ebp),%xmm2,%xmm2
800	vpaddd	-80(%ebp),%xmm3,%xmm3
801	vpunpckldq	%xmm1,%xmm0,%xmm6
802	vpunpckldq	%xmm3,%xmm2,%xmm7
803	vpunpckhdq	%xmm1,%xmm0,%xmm0
804	vpunpckhdq	%xmm3,%xmm2,%xmm2
805	vpunpcklqdq	%xmm7,%xmm6,%xmm1
806	vpunpckhqdq	%xmm7,%xmm6,%xmm6
807	vpunpcklqdq	%xmm2,%xmm0,%xmm7
808	vpunpckhqdq	%xmm2,%xmm0,%xmm3
809	vpxor	-128(%esi),%xmm1,%xmm4
810	vpxor	-64(%esi),%xmm6,%xmm5
811	vpxor	(%esi),%xmm7,%xmm6
812	vpxor	64(%esi),%xmm3,%xmm7
813	leal	16(%esi),%esi
814	vmovdqa	-64(%ebx),%xmm0
815	vmovdqa	-48(%ebx),%xmm1
816	vmovdqa	-32(%ebx),%xmm2
817	vmovdqa	-16(%ebx),%xmm3
818	vmovdqu	%xmm4,-128(%edi)
819	vmovdqu	%xmm5,-64(%edi)
820	vmovdqu	%xmm6,(%edi)
821	vmovdqu	%xmm7,64(%edi)
822	leal	16(%edi),%edi
823	vpaddd	-64(%ebp),%xmm0,%xmm0
824	vpaddd	-48(%ebp),%xmm1,%xmm1
825	vpaddd	-32(%ebp),%xmm2,%xmm2
826	vpaddd	-16(%ebp),%xmm3,%xmm3
827	vpunpckldq	%xmm1,%xmm0,%xmm6
828	vpunpckldq	%xmm3,%xmm2,%xmm7
829	vpunpckhdq	%xmm1,%xmm0,%xmm0
830	vpunpckhdq	%xmm3,%xmm2,%xmm2
831	vpunpcklqdq	%xmm7,%xmm6,%xmm1
832	vpunpckhqdq	%xmm7,%xmm6,%xmm6
833	vpunpcklqdq	%xmm2,%xmm0,%xmm7
834	vpunpckhqdq	%xmm2,%xmm0,%xmm3
835	vpxor	-128(%esi),%xmm1,%xmm4
836	vpxor	-64(%esi),%xmm6,%xmm5
837	vpxor	(%esi),%xmm7,%xmm6
838	vpxor	64(%esi),%xmm3,%xmm7
839	leal	16(%esi),%esi
840	vmovdqa	(%ebx),%xmm0
841	vmovdqa	16(%ebx),%xmm1
842	vmovdqa	32(%ebx),%xmm2
843	vmovdqa	48(%ebx),%xmm3
844	vmovdqu	%xmm4,-128(%edi)
845	vmovdqu	%xmm5,-64(%edi)
846	vmovdqu	%xmm6,(%edi)
847	vmovdqu	%xmm7,64(%edi)
848	leal	16(%edi),%edi
849	vpaddd	(%ebp),%xmm0,%xmm0
850	vpaddd	16(%ebp),%xmm1,%xmm1
851	vpaddd	32(%ebp),%xmm2,%xmm2
852	vpaddd	48(%ebp),%xmm3,%xmm3
853	vpunpckldq	%xmm1,%xmm0,%xmm6
854	vpunpckldq	%xmm3,%xmm2,%xmm7
855	vpunpckhdq	%xmm1,%xmm0,%xmm0
856	vpunpckhdq	%xmm3,%xmm2,%xmm2
857	vpunpcklqdq	%xmm7,%xmm6,%xmm1
858	vpunpckhqdq	%xmm7,%xmm6,%xmm6
859	vpunpcklqdq	%xmm2,%xmm0,%xmm7
860	vpunpckhqdq	%xmm2,%xmm0,%xmm3
861	vpxor	-128(%esi),%xmm1,%xmm4
862	vpxor	-64(%esi),%xmm6,%xmm5
863	vpxor	(%esi),%xmm7,%xmm6
864	vpxor	64(%esi),%xmm3,%xmm7
865	leal	16(%esi),%esi
866	vmovdqa	64(%ebx),%xmm0
867	vmovdqa	80(%ebx),%xmm1
868	vmovdqa	96(%ebx),%xmm2
869	vmovdqa	112(%ebx),%xmm3
870	vmovdqu	%xmm4,-128(%edi)
871	vmovdqu	%xmm5,-64(%edi)
872	vmovdqu	%xmm6,(%edi)
873	vmovdqu	%xmm7,64(%edi)
874	leal	16(%edi),%edi
875	vpaddd	64(%ebp),%xmm0,%xmm0
876	vpaddd	80(%ebp),%xmm1,%xmm1
877	vpaddd	96(%ebp),%xmm2,%xmm2
878	vpaddd	112(%ebp),%xmm3,%xmm3
879	vpunpckldq	%xmm1,%xmm0,%xmm6
880	vpunpckldq	%xmm3,%xmm2,%xmm7
881	vpunpckhdq	%xmm1,%xmm0,%xmm0
882	vpunpckhdq	%xmm3,%xmm2,%xmm2
883	vpunpcklqdq	%xmm7,%xmm6,%xmm1
884	vpunpckhqdq	%xmm7,%xmm6,%xmm6
885	vpunpcklqdq	%xmm2,%xmm0,%xmm7
886	vpunpckhqdq	%xmm2,%xmm0,%xmm3
887	vpxor	-128(%esi),%xmm1,%xmm4
888	vpxor	-64(%esi),%xmm6,%xmm5
889	vpxor	(%esi),%xmm7,%xmm6
890	vpxor	64(%esi),%xmm3,%xmm7
891	leal	208(%esi),%esi
892	vmovdqu	%xmm4,-128(%edi)
893	vmovdqu	%xmm5,-64(%edi)
894	vmovdqu	%xmm6,(%edi)
895	vmovdqu	%xmm7,64(%edi)
896	leal	208(%edi),%edi
897	subl	$256,%ecx
898	jnc	.L015outer_loop
899	addl	$256,%ecx
900	jz	.L017done
901	movl	520(%esp),%ebx
902	leal	-128(%esi),%esi
903	movl	516(%esp),%edx
904	leal	-128(%edi),%edi
905	vmovd	64(%ebp),%xmm2
906	vmovdqu	(%ebx),%xmm3
907	vpaddd	96(%eax),%xmm2,%xmm2
908	vpand	112(%eax),%xmm3,%xmm3
909	vpor	%xmm2,%xmm3,%xmm3
910.L0141x:
911	vmovdqa	32(%eax),%xmm0
912	vmovdqu	(%edx),%xmm1
913	vmovdqu	16(%edx),%xmm2
914	vmovdqa	(%eax),%xmm6
915	vmovdqa	16(%eax),%xmm7
916	movl	%ebp,48(%esp)
917	vmovdqa	%xmm0,(%esp)
918	vmovdqa	%xmm1,16(%esp)
919	vmovdqa	%xmm2,32(%esp)
920	vmovdqa	%xmm3,48(%esp)
921	movl	$10,%edx
922	jmp	.L018loop1x
923.align	16
924.L019outer1x:
925	vmovdqa	80(%eax),%xmm3
926	vmovdqa	(%esp),%xmm0
927	vmovdqa	16(%esp),%xmm1
928	vmovdqa	32(%esp),%xmm2
929	vpaddd	48(%esp),%xmm3,%xmm3
930	movl	$10,%edx
931	vmovdqa	%xmm3,48(%esp)
932	jmp	.L018loop1x
933.align	16
934.L018loop1x:
935	vpaddd	%xmm1,%xmm0,%xmm0
936	vpxor	%xmm0,%xmm3,%xmm3
937.byte	143,232,120,194,219,16
938	vpaddd	%xmm3,%xmm2,%xmm2
939	vpxor	%xmm2,%xmm1,%xmm1
940.byte	143,232,120,194,201,12
941	vpaddd	%xmm1,%xmm0,%xmm0
942	vpxor	%xmm0,%xmm3,%xmm3
943.byte	143,232,120,194,219,8
944	vpaddd	%xmm3,%xmm2,%xmm2
945	vpxor	%xmm2,%xmm1,%xmm1
946.byte	143,232,120,194,201,7
947	vpshufd	$78,%xmm2,%xmm2
948	vpshufd	$57,%xmm1,%xmm1
949	vpshufd	$147,%xmm3,%xmm3
950	vpaddd	%xmm1,%xmm0,%xmm0
951	vpxor	%xmm0,%xmm3,%xmm3
952.byte	143,232,120,194,219,16
953	vpaddd	%xmm3,%xmm2,%xmm2
954	vpxor	%xmm2,%xmm1,%xmm1
955.byte	143,232,120,194,201,12
956	vpaddd	%xmm1,%xmm0,%xmm0
957	vpxor	%xmm0,%xmm3,%xmm3
958.byte	143,232,120,194,219,8
959	vpaddd	%xmm3,%xmm2,%xmm2
960	vpxor	%xmm2,%xmm1,%xmm1
961.byte	143,232,120,194,201,7
962	vpshufd	$78,%xmm2,%xmm2
963	vpshufd	$147,%xmm1,%xmm1
964	vpshufd	$57,%xmm3,%xmm3
965	decl	%edx
966	jnz	.L018loop1x
967	vpaddd	(%esp),%xmm0,%xmm0
968	vpaddd	16(%esp),%xmm1,%xmm1
969	vpaddd	32(%esp),%xmm2,%xmm2
970	vpaddd	48(%esp),%xmm3,%xmm3
971	cmpl	$64,%ecx
972	jb	.L020tail
973	vpxor	(%esi),%xmm0,%xmm0
974	vpxor	16(%esi),%xmm1,%xmm1
975	vpxor	32(%esi),%xmm2,%xmm2
976	vpxor	48(%esi),%xmm3,%xmm3
977	leal	64(%esi),%esi
978	vmovdqu	%xmm0,(%edi)
979	vmovdqu	%xmm1,16(%edi)
980	vmovdqu	%xmm2,32(%edi)
981	vmovdqu	%xmm3,48(%edi)
982	leal	64(%edi),%edi
983	subl	$64,%ecx
984	jnz	.L019outer1x
985	jmp	.L017done
986.L020tail:
987	vmovdqa	%xmm0,(%esp)
988	vmovdqa	%xmm1,16(%esp)
989	vmovdqa	%xmm2,32(%esp)
990	vmovdqa	%xmm3,48(%esp)
991	xorl	%eax,%eax
992	xorl	%edx,%edx
993	xorl	%ebp,%ebp
994.L021tail_loop:
995	movb	(%esp,%ebp,1),%al
996	movb	(%esi,%ebp,1),%dl
997	leal	1(%ebp),%ebp
998	xorb	%dl,%al
999	movb	%al,-1(%edi,%ebp,1)
1000	decl	%ecx
1001	jnz	.L021tail_loop
1002.L017done:
1003	vzeroupper
1004	movl	512(%esp),%esp
1005	popl	%edi
1006	popl	%esi
1007	popl	%ebx
1008	popl	%ebp
1009	ret
1010.size	ChaCha20_xop,.-.L_ChaCha20_xop_begin
1011.comm	OPENSSL_ia32cap_P,16,4
1012#else
1013.text
1014.globl	ChaCha20_ctr32
1015.type	ChaCha20_ctr32,@function
1016.align	16
1017ChaCha20_ctr32:
1018.L_ChaCha20_ctr32_begin:
1019	pushl	%ebp
1020	pushl	%ebx
1021	pushl	%esi
1022	pushl	%edi
1023	xorl	%eax,%eax
1024	cmpl	28(%esp),%eax
1025	je	.L000no_data
1026	call	.Lpic_point
1027.Lpic_point:
1028	popl	%eax
1029	leal	OPENSSL_ia32cap_P,%ebp
1030	testl	$16777216,(%ebp)
1031	jz	.L001x86
1032	testl	$512,4(%ebp)
1033	jz	.L001x86
1034	jmp	.Lssse3_shortcut
1035.L001x86:
1036	movl	32(%esp),%esi
1037	movl	36(%esp),%edi
1038	subl	$132,%esp
1039	movl	(%esi),%eax
1040	movl	4(%esi),%ebx
1041	movl	8(%esi),%ecx
1042	movl	12(%esi),%edx
1043	movl	%eax,80(%esp)
1044	movl	%ebx,84(%esp)
1045	movl	%ecx,88(%esp)
1046	movl	%edx,92(%esp)
1047	movl	16(%esi),%eax
1048	movl	20(%esi),%ebx
1049	movl	24(%esi),%ecx
1050	movl	28(%esi),%edx
1051	movl	%eax,96(%esp)
1052	movl	%ebx,100(%esp)
1053	movl	%ecx,104(%esp)
1054	movl	%edx,108(%esp)
1055	movl	(%edi),%eax
1056	movl	4(%edi),%ebx
1057	movl	8(%edi),%ecx
1058	movl	12(%edi),%edx
1059	subl	$1,%eax
1060	movl	%eax,112(%esp)
1061	movl	%ebx,116(%esp)
1062	movl	%ecx,120(%esp)
1063	movl	%edx,124(%esp)
1064	jmp	.L002entry
1065.align	16
1066.L003outer_loop:
1067	movl	%ebx,156(%esp)
1068	movl	%eax,152(%esp)
1069	movl	%ecx,160(%esp)
1070.L002entry:
1071	movl	$1634760805,%eax
1072	movl	$857760878,4(%esp)
1073	movl	$2036477234,8(%esp)
1074	movl	$1797285236,12(%esp)
1075	movl	84(%esp),%ebx
1076	movl	88(%esp),%ebp
1077	movl	104(%esp),%ecx
1078	movl	108(%esp),%esi
1079	movl	116(%esp),%edx
1080	movl	120(%esp),%edi
1081	movl	%ebx,20(%esp)
1082	movl	%ebp,24(%esp)
1083	movl	%ecx,40(%esp)
1084	movl	%esi,44(%esp)
1085	movl	%edx,52(%esp)
1086	movl	%edi,56(%esp)
1087	movl	92(%esp),%ebx
1088	movl	124(%esp),%edi
1089	movl	112(%esp),%edx
1090	movl	80(%esp),%ebp
1091	movl	96(%esp),%ecx
1092	movl	100(%esp),%esi
1093	addl	$1,%edx
1094	movl	%ebx,28(%esp)
1095	movl	%edi,60(%esp)
1096	movl	%edx,112(%esp)
1097	movl	$10,%ebx
1098	jmp	.L004loop
1099.align	16
1100.L004loop:
1101	addl	%ebp,%eax
1102	movl	%ebx,128(%esp)
1103	movl	%ebp,%ebx
1104	xorl	%eax,%edx
1105	roll	$16,%edx
1106	addl	%edx,%ecx
1107	xorl	%ecx,%ebx
1108	movl	52(%esp),%edi
1109	roll	$12,%ebx
1110	movl	20(%esp),%ebp
1111	addl	%ebx,%eax
1112	xorl	%eax,%edx
1113	movl	%eax,(%esp)
1114	roll	$8,%edx
1115	movl	4(%esp),%eax
1116	addl	%edx,%ecx
1117	movl	%edx,48(%esp)
1118	xorl	%ecx,%ebx
1119	addl	%ebp,%eax
1120	roll	$7,%ebx
1121	xorl	%eax,%edi
1122	movl	%ecx,32(%esp)
1123	roll	$16,%edi
1124	movl	%ebx,16(%esp)
1125	addl	%edi,%esi
1126	movl	40(%esp),%ecx
1127	xorl	%esi,%ebp
1128	movl	56(%esp),%edx
1129	roll	$12,%ebp
1130	movl	24(%esp),%ebx
1131	addl	%ebp,%eax
1132	xorl	%eax,%edi
1133	movl	%eax,4(%esp)
1134	roll	$8,%edi
1135	movl	8(%esp),%eax
1136	addl	%edi,%esi
1137	movl	%edi,52(%esp)
1138	xorl	%esi,%ebp
1139	addl	%ebx,%eax
1140	roll	$7,%ebp
1141	xorl	%eax,%edx
1142	movl	%esi,36(%esp)
1143	roll	$16,%edx
1144	movl	%ebp,20(%esp)
1145	addl	%edx,%ecx
1146	movl	44(%esp),%esi
1147	xorl	%ecx,%ebx
1148	movl	60(%esp),%edi
1149	roll	$12,%ebx
1150	movl	28(%esp),%ebp
1151	addl	%ebx,%eax
1152	xorl	%eax,%edx
1153	movl	%eax,8(%esp)
1154	roll	$8,%edx
1155	movl	12(%esp),%eax
1156	addl	%edx,%ecx
1157	movl	%edx,56(%esp)
1158	xorl	%ecx,%ebx
1159	addl	%ebp,%eax
1160	roll	$7,%ebx
1161	xorl	%eax,%edi
1162	roll	$16,%edi
1163	movl	%ebx,24(%esp)
1164	addl	%edi,%esi
1165	xorl	%esi,%ebp
1166	roll	$12,%ebp
1167	movl	20(%esp),%ebx
1168	addl	%ebp,%eax
1169	xorl	%eax,%edi
1170	movl	%eax,12(%esp)
1171	roll	$8,%edi
1172	movl	(%esp),%eax
1173	addl	%edi,%esi
1174	movl	%edi,%edx
1175	xorl	%esi,%ebp
1176	addl	%ebx,%eax
1177	roll	$7,%ebp
1178	xorl	%eax,%edx
1179	roll	$16,%edx
1180	movl	%ebp,28(%esp)
1181	addl	%edx,%ecx
1182	xorl	%ecx,%ebx
1183	movl	48(%esp),%edi
1184	roll	$12,%ebx
1185	movl	24(%esp),%ebp
1186	addl	%ebx,%eax
1187	xorl	%eax,%edx
1188	movl	%eax,(%esp)
1189	roll	$8,%edx
1190	movl	4(%esp),%eax
1191	addl	%edx,%ecx
1192	movl	%edx,60(%esp)
1193	xorl	%ecx,%ebx
1194	addl	%ebp,%eax
1195	roll	$7,%ebx
1196	xorl	%eax,%edi
1197	movl	%ecx,40(%esp)
1198	roll	$16,%edi
1199	movl	%ebx,20(%esp)
1200	addl	%edi,%esi
1201	movl	32(%esp),%ecx
1202	xorl	%esi,%ebp
1203	movl	52(%esp),%edx
1204	roll	$12,%ebp
1205	movl	28(%esp),%ebx
1206	addl	%ebp,%eax
1207	xorl	%eax,%edi
1208	movl	%eax,4(%esp)
1209	roll	$8,%edi
1210	movl	8(%esp),%eax
1211	addl	%edi,%esi
1212	movl	%edi,48(%esp)
1213	xorl	%esi,%ebp
1214	addl	%ebx,%eax
1215	roll	$7,%ebp
1216	xorl	%eax,%edx
1217	movl	%esi,44(%esp)
1218	roll	$16,%edx
1219	movl	%ebp,24(%esp)
1220	addl	%edx,%ecx
1221	movl	36(%esp),%esi
1222	xorl	%ecx,%ebx
1223	movl	56(%esp),%edi
1224	roll	$12,%ebx
1225	movl	16(%esp),%ebp
1226	addl	%ebx,%eax
1227	xorl	%eax,%edx
1228	movl	%eax,8(%esp)
1229	roll	$8,%edx
1230	movl	12(%esp),%eax
1231	addl	%edx,%ecx
1232	movl	%edx,52(%esp)
1233	xorl	%ecx,%ebx
1234	addl	%ebp,%eax
1235	roll	$7,%ebx
1236	xorl	%eax,%edi
1237	roll	$16,%edi
1238	movl	%ebx,28(%esp)
1239	addl	%edi,%esi
1240	xorl	%esi,%ebp
1241	movl	48(%esp),%edx
1242	roll	$12,%ebp
1243	movl	128(%esp),%ebx
1244	addl	%ebp,%eax
1245	xorl	%eax,%edi
1246	movl	%eax,12(%esp)
1247	roll	$8,%edi
1248	movl	(%esp),%eax
1249	addl	%edi,%esi
1250	movl	%edi,56(%esp)
1251	xorl	%esi,%ebp
1252	roll	$7,%ebp
1253	decl	%ebx
1254	jnz	.L004loop
1255	movl	160(%esp),%ebx
1256	addl	$1634760805,%eax
1257	addl	80(%esp),%ebp
1258	addl	96(%esp),%ecx
1259	addl	100(%esp),%esi
1260	cmpl	$64,%ebx
1261	jb	.L005tail
1262	movl	156(%esp),%ebx
1263	addl	112(%esp),%edx
1264	addl	120(%esp),%edi
1265	xorl	(%ebx),%eax
1266	xorl	16(%ebx),%ebp
1267	movl	%eax,(%esp)
1268	movl	152(%esp),%eax
1269	xorl	32(%ebx),%ecx
1270	xorl	36(%ebx),%esi
1271	xorl	48(%ebx),%edx
1272	xorl	56(%ebx),%edi
1273	movl	%ebp,16(%eax)
1274	movl	%ecx,32(%eax)
1275	movl	%esi,36(%eax)
1276	movl	%edx,48(%eax)
1277	movl	%edi,56(%eax)
1278	movl	4(%esp),%ebp
1279	movl	8(%esp),%ecx
1280	movl	12(%esp),%esi
1281	movl	20(%esp),%edx
1282	movl	24(%esp),%edi
1283	addl	$857760878,%ebp
1284	addl	$2036477234,%ecx
1285	addl	$1797285236,%esi
1286	addl	84(%esp),%edx
1287	addl	88(%esp),%edi
1288	xorl	4(%ebx),%ebp
1289	xorl	8(%ebx),%ecx
1290	xorl	12(%ebx),%esi
1291	xorl	20(%ebx),%edx
1292	xorl	24(%ebx),%edi
1293	movl	%ebp,4(%eax)
1294	movl	%ecx,8(%eax)
1295	movl	%esi,12(%eax)
1296	movl	%edx,20(%eax)
1297	movl	%edi,24(%eax)
1298	movl	28(%esp),%ebp
1299	movl	40(%esp),%ecx
1300	movl	44(%esp),%esi
1301	movl	52(%esp),%edx
1302	movl	60(%esp),%edi
1303	addl	92(%esp),%ebp
1304	addl	104(%esp),%ecx
1305	addl	108(%esp),%esi
1306	addl	116(%esp),%edx
1307	addl	124(%esp),%edi
1308	xorl	28(%ebx),%ebp
1309	xorl	40(%ebx),%ecx
1310	xorl	44(%ebx),%esi
1311	xorl	52(%ebx),%edx
1312	xorl	60(%ebx),%edi
1313	leal	64(%ebx),%ebx
1314	movl	%ebp,28(%eax)
1315	movl	(%esp),%ebp
1316	movl	%ecx,40(%eax)
1317	movl	160(%esp),%ecx
1318	movl	%esi,44(%eax)
1319	movl	%edx,52(%eax)
1320	movl	%edi,60(%eax)
1321	movl	%ebp,(%eax)
1322	leal	64(%eax),%eax
1323	subl	$64,%ecx
1324	jnz	.L003outer_loop
1325	jmp	.L006done
1326.L005tail:
1327	addl	112(%esp),%edx
1328	addl	120(%esp),%edi
1329	movl	%eax,(%esp)
1330	movl	%ebp,16(%esp)
1331	movl	%ecx,32(%esp)
1332	movl	%esi,36(%esp)
1333	movl	%edx,48(%esp)
1334	movl	%edi,56(%esp)
1335	movl	4(%esp),%ebp
1336	movl	8(%esp),%ecx
1337	movl	12(%esp),%esi
1338	movl	20(%esp),%edx
1339	movl	24(%esp),%edi
1340	addl	$857760878,%ebp
1341	addl	$2036477234,%ecx
1342	addl	$1797285236,%esi
1343	addl	84(%esp),%edx
1344	addl	88(%esp),%edi
1345	movl	%ebp,4(%esp)
1346	movl	%ecx,8(%esp)
1347	movl	%esi,12(%esp)
1348	movl	%edx,20(%esp)
1349	movl	%edi,24(%esp)
1350	movl	28(%esp),%ebp
1351	movl	40(%esp),%ecx
1352	movl	44(%esp),%esi
1353	movl	52(%esp),%edx
1354	movl	60(%esp),%edi
1355	addl	92(%esp),%ebp
1356	addl	104(%esp),%ecx
1357	addl	108(%esp),%esi
1358	addl	116(%esp),%edx
1359	addl	124(%esp),%edi
1360	movl	%ebp,28(%esp)
1361	movl	156(%esp),%ebp
1362	movl	%ecx,40(%esp)
1363	movl	152(%esp),%ecx
1364	movl	%esi,44(%esp)
1365	xorl	%esi,%esi
1366	movl	%edx,52(%esp)
1367	movl	%edi,60(%esp)
1368	xorl	%eax,%eax
1369	xorl	%edx,%edx
1370.L007tail_loop:
1371	movb	(%esi,%ebp,1),%al
1372	movb	(%esp,%esi,1),%dl
1373	leal	1(%esi),%esi
1374	xorb	%dl,%al
1375	movb	%al,-1(%ecx,%esi,1)
1376	decl	%ebx
1377	jnz	.L007tail_loop
1378.L006done:
1379	addl	$132,%esp
1380.L000no_data:
1381	popl	%edi
1382	popl	%esi
1383	popl	%ebx
1384	popl	%ebp
1385	ret
1386.size	ChaCha20_ctr32,.-.L_ChaCha20_ctr32_begin
1387.globl	ChaCha20_ssse3
1388.type	ChaCha20_ssse3,@function
1389.align	16
1390ChaCha20_ssse3:
1391.L_ChaCha20_ssse3_begin:
1392	pushl	%ebp
1393	pushl	%ebx
1394	pushl	%esi
1395	pushl	%edi
1396.Lssse3_shortcut:
1397	testl	$2048,4(%ebp)
1398	jnz	.Lxop_shortcut
1399	movl	20(%esp),%edi
1400	movl	24(%esp),%esi
1401	movl	28(%esp),%ecx
1402	movl	32(%esp),%edx
1403	movl	36(%esp),%ebx
1404	movl	%esp,%ebp
1405	subl	$524,%esp
1406	andl	$-64,%esp
1407	movl	%ebp,512(%esp)
1408	leal	.Lssse3_data-.Lpic_point(%eax),%eax
1409	movdqu	(%ebx),%xmm3
1410.L0081x:
1411	movdqa	32(%eax),%xmm0
1412	movdqu	(%edx),%xmm1
1413	movdqu	16(%edx),%xmm2
1414	movdqa	(%eax),%xmm6
1415	movdqa	16(%eax),%xmm7
1416	movl	%ebp,48(%esp)
1417	movdqa	%xmm0,(%esp)
1418	movdqa	%xmm1,16(%esp)
1419	movdqa	%xmm2,32(%esp)
1420	movdqa	%xmm3,48(%esp)
1421	movl	$10,%edx
1422	jmp	.L009loop1x
1423.align	16
1424.L010outer1x:
1425	movdqa	80(%eax),%xmm3
1426	movdqa	(%esp),%xmm0
1427	movdqa	16(%esp),%xmm1
1428	movdqa	32(%esp),%xmm2
1429	paddd	48(%esp),%xmm3
1430	movl	$10,%edx
1431	movdqa	%xmm3,48(%esp)
1432	jmp	.L009loop1x
1433.align	16
1434.L009loop1x:
1435	paddd	%xmm1,%xmm0
1436	pxor	%xmm0,%xmm3
1437.byte	102,15,56,0,222
1438	paddd	%xmm3,%xmm2
1439	pxor	%xmm2,%xmm1
1440	movdqa	%xmm1,%xmm4
1441	psrld	$20,%xmm1
1442	pslld	$12,%xmm4
1443	por	%xmm4,%xmm1
1444	paddd	%xmm1,%xmm0
1445	pxor	%xmm0,%xmm3
1446.byte	102,15,56,0,223
1447	paddd	%xmm3,%xmm2
1448	pxor	%xmm2,%xmm1
1449	movdqa	%xmm1,%xmm4
1450	psrld	$25,%xmm1
1451	pslld	$7,%xmm4
1452	por	%xmm4,%xmm1
1453	pshufd	$78,%xmm2,%xmm2
1454	pshufd	$57,%xmm1,%xmm1
1455	pshufd	$147,%xmm3,%xmm3
1456	nop
1457	paddd	%xmm1,%xmm0
1458	pxor	%xmm0,%xmm3
1459.byte	102,15,56,0,222
1460	paddd	%xmm3,%xmm2
1461	pxor	%xmm2,%xmm1
1462	movdqa	%xmm1,%xmm4
1463	psrld	$20,%xmm1
1464	pslld	$12,%xmm4
1465	por	%xmm4,%xmm1
1466	paddd	%xmm1,%xmm0
1467	pxor	%xmm0,%xmm3
1468.byte	102,15,56,0,223
1469	paddd	%xmm3,%xmm2
1470	pxor	%xmm2,%xmm1
1471	movdqa	%xmm1,%xmm4
1472	psrld	$25,%xmm1
1473	pslld	$7,%xmm4
1474	por	%xmm4,%xmm1
1475	pshufd	$78,%xmm2,%xmm2
1476	pshufd	$147,%xmm1,%xmm1
1477	pshufd	$57,%xmm3,%xmm3
1478	decl	%edx
1479	jnz	.L009loop1x
1480	paddd	(%esp),%xmm0
1481	paddd	16(%esp),%xmm1
1482	paddd	32(%esp),%xmm2
1483	paddd	48(%esp),%xmm3
1484	cmpl	$64,%ecx
1485	jb	.L011tail
1486	movdqu	(%esi),%xmm4
1487	movdqu	16(%esi),%xmm5
1488	pxor	%xmm4,%xmm0
1489	movdqu	32(%esi),%xmm4
1490	pxor	%xmm5,%xmm1
1491	movdqu	48(%esi),%xmm5
1492	pxor	%xmm4,%xmm2
1493	pxor	%xmm5,%xmm3
1494	leal	64(%esi),%esi
1495	movdqu	%xmm0,(%edi)
1496	movdqu	%xmm1,16(%edi)
1497	movdqu	%xmm2,32(%edi)
1498	movdqu	%xmm3,48(%edi)
1499	leal	64(%edi),%edi
1500	subl	$64,%ecx
1501	jnz	.L010outer1x
1502	jmp	.L012done
1503.L011tail:
1504	movdqa	%xmm0,(%esp)
1505	movdqa	%xmm1,16(%esp)
1506	movdqa	%xmm2,32(%esp)
1507	movdqa	%xmm3,48(%esp)
1508	xorl	%eax,%eax
1509	xorl	%edx,%edx
1510	xorl	%ebp,%ebp
1511.L013tail_loop:
1512	movb	(%esp,%ebp,1),%al
1513	movb	(%esi,%ebp,1),%dl
1514	leal	1(%ebp),%ebp
1515	xorb	%dl,%al
1516	movb	%al,-1(%edi,%ebp,1)
1517	decl	%ecx
1518	jnz	.L013tail_loop
1519.L012done:
1520	movl	512(%esp),%esp
1521	popl	%edi
1522	popl	%esi
1523	popl	%ebx
1524	popl	%ebp
1525	ret
1526.size	ChaCha20_ssse3,.-.L_ChaCha20_ssse3_begin
1527.align	64
1528.Lssse3_data:
1529.byte	2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13
1530.byte	3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14
1531.long	1634760805,857760878,2036477234,1797285236
1532.long	0,1,2,3
1533.long	4,4,4,4
1534.long	1,0,0,0
1535.long	4,0,0,0
1536.long	0,-1,-1,-1
1537.align	64
1538.byte	67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54
1539.byte	44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
1540.byte	60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
1541.byte	114,103,62,0
1542.globl	ChaCha20_xop
1543.type	ChaCha20_xop,@function
1544.align	16
1545ChaCha20_xop:
1546.L_ChaCha20_xop_begin:
1547	pushl	%ebp
1548	pushl	%ebx
1549	pushl	%esi
1550	pushl	%edi
1551.Lxop_shortcut:
1552	movl	20(%esp),%edi
1553	movl	24(%esp),%esi
1554	movl	28(%esp),%ecx
1555	movl	32(%esp),%edx
1556	movl	36(%esp),%ebx
1557	vzeroupper
1558	movl	%esp,%ebp
1559	subl	$524,%esp
1560	andl	$-64,%esp
1561	movl	%ebp,512(%esp)
1562	leal	.Lssse3_data-.Lpic_point(%eax),%eax
1563	vmovdqu	(%ebx),%xmm3
1564	cmpl	$256,%ecx
1565	jb	.L0141x
1566	movl	%edx,516(%esp)
1567	movl	%ebx,520(%esp)
1568	subl	$256,%ecx
1569	leal	384(%esp),%ebp
1570	vmovdqu	(%edx),%xmm7
1571	vpshufd	$0,%xmm3,%xmm0
1572	vpshufd	$85,%xmm3,%xmm1
1573	vpshufd	$170,%xmm3,%xmm2
1574	vpshufd	$255,%xmm3,%xmm3
1575	vpaddd	48(%eax),%xmm0,%xmm0
1576	vpshufd	$0,%xmm7,%xmm4
1577	vpshufd	$85,%xmm7,%xmm5
1578	vpsubd	64(%eax),%xmm0,%xmm0
1579	vpshufd	$170,%xmm7,%xmm6
1580	vpshufd	$255,%xmm7,%xmm7
1581	vmovdqa	%xmm0,64(%ebp)
1582	vmovdqa	%xmm1,80(%ebp)
1583	vmovdqa	%xmm2,96(%ebp)
1584	vmovdqa	%xmm3,112(%ebp)
1585	vmovdqu	16(%edx),%xmm3
1586	vmovdqa	%xmm4,-64(%ebp)
1587	vmovdqa	%xmm5,-48(%ebp)
1588	vmovdqa	%xmm6,-32(%ebp)
1589	vmovdqa	%xmm7,-16(%ebp)
1590	vmovdqa	32(%eax),%xmm7
1591	leal	128(%esp),%ebx
1592	vpshufd	$0,%xmm3,%xmm0
1593	vpshufd	$85,%xmm3,%xmm1
1594	vpshufd	$170,%xmm3,%xmm2
1595	vpshufd	$255,%xmm3,%xmm3
1596	vpshufd	$0,%xmm7,%xmm4
1597	vpshufd	$85,%xmm7,%xmm5
1598	vpshufd	$170,%xmm7,%xmm6
1599	vpshufd	$255,%xmm7,%xmm7
1600	vmovdqa	%xmm0,(%ebp)
1601	vmovdqa	%xmm1,16(%ebp)
1602	vmovdqa	%xmm2,32(%ebp)
1603	vmovdqa	%xmm3,48(%ebp)
1604	vmovdqa	%xmm4,-128(%ebp)
1605	vmovdqa	%xmm5,-112(%ebp)
1606	vmovdqa	%xmm6,-96(%ebp)
1607	vmovdqa	%xmm7,-80(%ebp)
1608	leal	128(%esi),%esi
1609	leal	128(%edi),%edi
1610	jmp	.L015outer_loop
1611.align	32
1612.L015outer_loop:
1613	vmovdqa	-112(%ebp),%xmm1
1614	vmovdqa	-96(%ebp),%xmm2
1615	vmovdqa	-80(%ebp),%xmm3
1616	vmovdqa	-48(%ebp),%xmm5
1617	vmovdqa	-32(%ebp),%xmm6
1618	vmovdqa	-16(%ebp),%xmm7
1619	vmovdqa	%xmm1,-112(%ebx)
1620	vmovdqa	%xmm2,-96(%ebx)
1621	vmovdqa	%xmm3,-80(%ebx)
1622	vmovdqa	%xmm5,-48(%ebx)
1623	vmovdqa	%xmm6,-32(%ebx)
1624	vmovdqa	%xmm7,-16(%ebx)
1625	vmovdqa	32(%ebp),%xmm2
1626	vmovdqa	48(%ebp),%xmm3
1627	vmovdqa	64(%ebp),%xmm4
1628	vmovdqa	80(%ebp),%xmm5
1629	vmovdqa	96(%ebp),%xmm6
1630	vmovdqa	112(%ebp),%xmm7
1631	vpaddd	64(%eax),%xmm4,%xmm4
1632	vmovdqa	%xmm2,32(%ebx)
1633	vmovdqa	%xmm3,48(%ebx)
1634	vmovdqa	%xmm4,64(%ebx)
1635	vmovdqa	%xmm5,80(%ebx)
1636	vmovdqa	%xmm6,96(%ebx)
1637	vmovdqa	%xmm7,112(%ebx)
1638	vmovdqa	%xmm4,64(%ebp)
1639	vmovdqa	-128(%ebp),%xmm0
1640	vmovdqa	%xmm4,%xmm6
1641	vmovdqa	-64(%ebp),%xmm3
1642	vmovdqa	(%ebp),%xmm4
1643	vmovdqa	16(%ebp),%xmm5
1644	movl	$10,%edx
1645	nop
1646.align	32
1647.L016loop:
1648	vpaddd	%xmm3,%xmm0,%xmm0
1649	vpxor	%xmm0,%xmm6,%xmm6
1650.byte	143,232,120,194,246,16
1651	vpaddd	%xmm6,%xmm4,%xmm4
1652	vpxor	%xmm4,%xmm3,%xmm2
1653	vmovdqa	-112(%ebx),%xmm1
1654.byte	143,232,120,194,210,12
1655	vmovdqa	-48(%ebx),%xmm3
1656	vpaddd	%xmm2,%xmm0,%xmm0
1657	vmovdqa	80(%ebx),%xmm7
1658	vpxor	%xmm0,%xmm6,%xmm6
1659	vpaddd	%xmm3,%xmm1,%xmm1
1660.byte	143,232,120,194,246,8
1661	vmovdqa	%xmm0,-128(%ebx)
1662	vpaddd	%xmm6,%xmm4,%xmm4
1663	vmovdqa	%xmm6,64(%ebx)
1664	vpxor	%xmm4,%xmm2,%xmm2
1665	vpxor	%xmm1,%xmm7,%xmm7
1666.byte	143,232,120,194,210,7
1667	vmovdqa	%xmm4,(%ebx)
1668.byte	143,232,120,194,255,16
1669	vmovdqa	%xmm2,-64(%ebx)
1670	vpaddd	%xmm7,%xmm5,%xmm5
1671	vmovdqa	32(%ebx),%xmm4
1672	vpxor	%xmm5,%xmm3,%xmm3
1673	vmovdqa	-96(%ebx),%xmm0
1674.byte	143,232,120,194,219,12
1675	vmovdqa	-32(%ebx),%xmm2
1676	vpaddd	%xmm3,%xmm1,%xmm1
1677	vmovdqa	96(%ebx),%xmm6
1678	vpxor	%xmm1,%xmm7,%xmm7
1679	vpaddd	%xmm2,%xmm0,%xmm0
1680.byte	143,232,120,194,255,8
1681	vmovdqa	%xmm1,-112(%ebx)
1682	vpaddd	%xmm7,%xmm5,%xmm5
1683	vmovdqa	%xmm7,80(%ebx)
1684	vpxor	%xmm5,%xmm3,%xmm3
1685	vpxor	%xmm0,%xmm6,%xmm6
1686.byte	143,232,120,194,219,7
1687	vmovdqa	%xmm5,16(%ebx)
1688.byte	143,232,120,194,246,16
1689	vmovdqa	%xmm3,-48(%ebx)
1690	vpaddd	%xmm6,%xmm4,%xmm4
1691	vmovdqa	48(%ebx),%xmm5
1692	vpxor	%xmm4,%xmm2,%xmm2
1693	vmovdqa	-80(%ebx),%xmm1
1694.byte	143,232,120,194,210,12
1695	vmovdqa	-16(%ebx),%xmm3
1696	vpaddd	%xmm2,%xmm0,%xmm0
1697	vmovdqa	112(%ebx),%xmm7
1698	vpxor	%xmm0,%xmm6,%xmm6
1699	vpaddd	%xmm3,%xmm1,%xmm1
1700.byte	143,232,120,194,246,8
1701	vmovdqa	%xmm0,-96(%ebx)
1702	vpaddd	%xmm6,%xmm4,%xmm4
1703	vmovdqa	%xmm6,96(%ebx)
1704	vpxor	%xmm4,%xmm2,%xmm2
1705	vpxor	%xmm1,%xmm7,%xmm7
1706.byte	143,232,120,194,210,7
1707.byte	143,232,120,194,255,16
1708	vmovdqa	%xmm2,-32(%ebx)
1709	vpaddd	%xmm7,%xmm5,%xmm5
1710	vpxor	%xmm5,%xmm3,%xmm3
1711	vmovdqa	-128(%ebx),%xmm0
1712.byte	143,232,120,194,219,12
1713	vmovdqa	-48(%ebx),%xmm2
1714	vpaddd	%xmm3,%xmm1,%xmm1
1715	vpxor	%xmm1,%xmm7,%xmm7
1716	vpaddd	%xmm2,%xmm0,%xmm0
1717.byte	143,232,120,194,255,8
1718	vmovdqa	%xmm1,-80(%ebx)
1719	vpaddd	%xmm7,%xmm5,%xmm5
1720	vpxor	%xmm5,%xmm3,%xmm3
1721	vpxor	%xmm0,%xmm7,%xmm6
1722.byte	143,232,120,194,219,7
1723.byte	143,232,120,194,246,16
1724	vmovdqa	%xmm3,-16(%ebx)
1725	vpaddd	%xmm6,%xmm4,%xmm4
1726	vpxor	%xmm4,%xmm2,%xmm2
1727	vmovdqa	-112(%ebx),%xmm1
1728.byte	143,232,120,194,210,12
1729	vmovdqa	-32(%ebx),%xmm3
1730	vpaddd	%xmm2,%xmm0,%xmm0
1731	vmovdqa	64(%ebx),%xmm7
1732	vpxor	%xmm0,%xmm6,%xmm6
1733	vpaddd	%xmm3,%xmm1,%xmm1
1734.byte	143,232,120,194,246,8
1735	vmovdqa	%xmm0,-128(%ebx)
1736	vpaddd	%xmm6,%xmm4,%xmm4
1737	vmovdqa	%xmm6,112(%ebx)
1738	vpxor	%xmm4,%xmm2,%xmm2
1739	vpxor	%xmm1,%xmm7,%xmm7
1740.byte	143,232,120,194,210,7
1741	vmovdqa	%xmm4,32(%ebx)
1742.byte	143,232,120,194,255,16
1743	vmovdqa	%xmm2,-48(%ebx)
1744	vpaddd	%xmm7,%xmm5,%xmm5
1745	vmovdqa	(%ebx),%xmm4
1746	vpxor	%xmm5,%xmm3,%xmm3
1747	vmovdqa	-96(%ebx),%xmm0
1748.byte	143,232,120,194,219,12
1749	vmovdqa	-16(%ebx),%xmm2
1750	vpaddd	%xmm3,%xmm1,%xmm1
1751	vmovdqa	80(%ebx),%xmm6
1752	vpxor	%xmm1,%xmm7,%xmm7
1753	vpaddd	%xmm2,%xmm0,%xmm0
1754.byte	143,232,120,194,255,8
1755	vmovdqa	%xmm1,-112(%ebx)
1756	vpaddd	%xmm7,%xmm5,%xmm5
1757	vmovdqa	%xmm7,64(%ebx)
1758	vpxor	%xmm5,%xmm3,%xmm3
1759	vpxor	%xmm0,%xmm6,%xmm6
1760.byte	143,232,120,194,219,7
1761	vmovdqa	%xmm5,48(%ebx)
1762.byte	143,232,120,194,246,16
1763	vmovdqa	%xmm3,-32(%ebx)
1764	vpaddd	%xmm6,%xmm4,%xmm4
1765	vmovdqa	16(%ebx),%xmm5
1766	vpxor	%xmm4,%xmm2,%xmm2
1767	vmovdqa	-80(%ebx),%xmm1
1768.byte	143,232,120,194,210,12
1769	vmovdqa	-64(%ebx),%xmm3
1770	vpaddd	%xmm2,%xmm0,%xmm0
1771	vmovdqa	96(%ebx),%xmm7
1772	vpxor	%xmm0,%xmm6,%xmm6
1773	vpaddd	%xmm3,%xmm1,%xmm1
1774.byte	143,232,120,194,246,8
1775	vmovdqa	%xmm0,-96(%ebx)
1776	vpaddd	%xmm6,%xmm4,%xmm4
1777	vmovdqa	%xmm6,80(%ebx)
1778	vpxor	%xmm4,%xmm2,%xmm2
1779	vpxor	%xmm1,%xmm7,%xmm7
1780.byte	143,232,120,194,210,7
1781.byte	143,232,120,194,255,16
1782	vmovdqa	%xmm2,-16(%ebx)
1783	vpaddd	%xmm7,%xmm5,%xmm5
1784	vpxor	%xmm5,%xmm3,%xmm3
1785	vmovdqa	-128(%ebx),%xmm0
1786.byte	143,232,120,194,219,12
1787	vpaddd	%xmm3,%xmm1,%xmm1
1788	vmovdqa	64(%ebx),%xmm6
1789	vpxor	%xmm1,%xmm7,%xmm7
1790.byte	143,232,120,194,255,8
1791	vmovdqa	%xmm1,-80(%ebx)
1792	vpaddd	%xmm7,%xmm5,%xmm5
1793	vmovdqa	%xmm7,96(%ebx)
1794	vpxor	%xmm5,%xmm3,%xmm3
1795.byte	143,232,120,194,219,7
1796	decl	%edx
1797	jnz	.L016loop
1798	vmovdqa	%xmm3,-64(%ebx)
1799	vmovdqa	%xmm4,(%ebx)
1800	vmovdqa	%xmm5,16(%ebx)
1801	vmovdqa	%xmm6,64(%ebx)
1802	vmovdqa	%xmm7,96(%ebx)
1803	vmovdqa	-112(%ebx),%xmm1
1804	vmovdqa	-96(%ebx),%xmm2
1805	vmovdqa	-80(%ebx),%xmm3
1806	vpaddd	-128(%ebp),%xmm0,%xmm0
1807	vpaddd	-112(%ebp),%xmm1,%xmm1
1808	vpaddd	-96(%ebp),%xmm2,%xmm2
1809	vpaddd	-80(%ebp),%xmm3,%xmm3
1810	vpunpckldq	%xmm1,%xmm0,%xmm6
1811	vpunpckldq	%xmm3,%xmm2,%xmm7
1812	vpunpckhdq	%xmm1,%xmm0,%xmm0
1813	vpunpckhdq	%xmm3,%xmm2,%xmm2
1814	vpunpcklqdq	%xmm7,%xmm6,%xmm1
1815	vpunpckhqdq	%xmm7,%xmm6,%xmm6
1816	vpunpcklqdq	%xmm2,%xmm0,%xmm7
1817	vpunpckhqdq	%xmm2,%xmm0,%xmm3
1818	vpxor	-128(%esi),%xmm1,%xmm4
1819	vpxor	-64(%esi),%xmm6,%xmm5
1820	vpxor	(%esi),%xmm7,%xmm6
1821	vpxor	64(%esi),%xmm3,%xmm7
1822	leal	16(%esi),%esi
1823	vmovdqa	-64(%ebx),%xmm0
1824	vmovdqa	-48(%ebx),%xmm1
1825	vmovdqa	-32(%ebx),%xmm2
1826	vmovdqa	-16(%ebx),%xmm3
1827	vmovdqu	%xmm4,-128(%edi)
1828	vmovdqu	%xmm5,-64(%edi)
1829	vmovdqu	%xmm6,(%edi)
1830	vmovdqu	%xmm7,64(%edi)
1831	leal	16(%edi),%edi
1832	vpaddd	-64(%ebp),%xmm0,%xmm0
1833	vpaddd	-48(%ebp),%xmm1,%xmm1
1834	vpaddd	-32(%ebp),%xmm2,%xmm2
1835	vpaddd	-16(%ebp),%xmm3,%xmm3
1836	vpunpckldq	%xmm1,%xmm0,%xmm6
1837	vpunpckldq	%xmm3,%xmm2,%xmm7
1838	vpunpckhdq	%xmm1,%xmm0,%xmm0
1839	vpunpckhdq	%xmm3,%xmm2,%xmm2
1840	vpunpcklqdq	%xmm7,%xmm6,%xmm1
1841	vpunpckhqdq	%xmm7,%xmm6,%xmm6
1842	vpunpcklqdq	%xmm2,%xmm0,%xmm7
1843	vpunpckhqdq	%xmm2,%xmm0,%xmm3
1844	vpxor	-128(%esi),%xmm1,%xmm4
1845	vpxor	-64(%esi),%xmm6,%xmm5
1846	vpxor	(%esi),%xmm7,%xmm6
1847	vpxor	64(%esi),%xmm3,%xmm7
1848	leal	16(%esi),%esi
1849	vmovdqa	(%ebx),%xmm0
1850	vmovdqa	16(%ebx),%xmm1
1851	vmovdqa	32(%ebx),%xmm2
1852	vmovdqa	48(%ebx),%xmm3
1853	vmovdqu	%xmm4,-128(%edi)
1854	vmovdqu	%xmm5,-64(%edi)
1855	vmovdqu	%xmm6,(%edi)
1856	vmovdqu	%xmm7,64(%edi)
1857	leal	16(%edi),%edi
1858	vpaddd	(%ebp),%xmm0,%xmm0
1859	vpaddd	16(%ebp),%xmm1,%xmm1
1860	vpaddd	32(%ebp),%xmm2,%xmm2
1861	vpaddd	48(%ebp),%xmm3,%xmm3
1862	vpunpckldq	%xmm1,%xmm0,%xmm6
1863	vpunpckldq	%xmm3,%xmm2,%xmm7
1864	vpunpckhdq	%xmm1,%xmm0,%xmm0
1865	vpunpckhdq	%xmm3,%xmm2,%xmm2
1866	vpunpcklqdq	%xmm7,%xmm6,%xmm1
1867	vpunpckhqdq	%xmm7,%xmm6,%xmm6
1868	vpunpcklqdq	%xmm2,%xmm0,%xmm7
1869	vpunpckhqdq	%xmm2,%xmm0,%xmm3
1870	vpxor	-128(%esi),%xmm1,%xmm4
1871	vpxor	-64(%esi),%xmm6,%xmm5
1872	vpxor	(%esi),%xmm7,%xmm6
1873	vpxor	64(%esi),%xmm3,%xmm7
1874	leal	16(%esi),%esi
1875	vmovdqa	64(%ebx),%xmm0
1876	vmovdqa	80(%ebx),%xmm1
1877	vmovdqa	96(%ebx),%xmm2
1878	vmovdqa	112(%ebx),%xmm3
1879	vmovdqu	%xmm4,-128(%edi)
1880	vmovdqu	%xmm5,-64(%edi)
1881	vmovdqu	%xmm6,(%edi)
1882	vmovdqu	%xmm7,64(%edi)
1883	leal	16(%edi),%edi
1884	vpaddd	64(%ebp),%xmm0,%xmm0
1885	vpaddd	80(%ebp),%xmm1,%xmm1
1886	vpaddd	96(%ebp),%xmm2,%xmm2
1887	vpaddd	112(%ebp),%xmm3,%xmm3
1888	vpunpckldq	%xmm1,%xmm0,%xmm6
1889	vpunpckldq	%xmm3,%xmm2,%xmm7
1890	vpunpckhdq	%xmm1,%xmm0,%xmm0
1891	vpunpckhdq	%xmm3,%xmm2,%xmm2
1892	vpunpcklqdq	%xmm7,%xmm6,%xmm1
1893	vpunpckhqdq	%xmm7,%xmm6,%xmm6
1894	vpunpcklqdq	%xmm2,%xmm0,%xmm7
1895	vpunpckhqdq	%xmm2,%xmm0,%xmm3
1896	vpxor	-128(%esi),%xmm1,%xmm4
1897	vpxor	-64(%esi),%xmm6,%xmm5
1898	vpxor	(%esi),%xmm7,%xmm6
1899	vpxor	64(%esi),%xmm3,%xmm7
1900	leal	208(%esi),%esi
1901	vmovdqu	%xmm4,-128(%edi)
1902	vmovdqu	%xmm5,-64(%edi)
1903	vmovdqu	%xmm6,(%edi)
1904	vmovdqu	%xmm7,64(%edi)
1905	leal	208(%edi),%edi
1906	subl	$256,%ecx
1907	jnc	.L015outer_loop
1908	addl	$256,%ecx
1909	jz	.L017done
1910	movl	520(%esp),%ebx
1911	leal	-128(%esi),%esi
1912	movl	516(%esp),%edx
1913	leal	-128(%edi),%edi
1914	vmovd	64(%ebp),%xmm2
1915	vmovdqu	(%ebx),%xmm3
1916	vpaddd	96(%eax),%xmm2,%xmm2
1917	vpand	112(%eax),%xmm3,%xmm3
1918	vpor	%xmm2,%xmm3,%xmm3
1919.L0141x:
1920	vmovdqa	32(%eax),%xmm0
1921	vmovdqu	(%edx),%xmm1
1922	vmovdqu	16(%edx),%xmm2
1923	vmovdqa	(%eax),%xmm6
1924	vmovdqa	16(%eax),%xmm7
1925	movl	%ebp,48(%esp)
1926	vmovdqa	%xmm0,(%esp)
1927	vmovdqa	%xmm1,16(%esp)
1928	vmovdqa	%xmm2,32(%esp)
1929	vmovdqa	%xmm3,48(%esp)
1930	movl	$10,%edx
1931	jmp	.L018loop1x
1932.align	16
1933.L019outer1x:
1934	vmovdqa	80(%eax),%xmm3
1935	vmovdqa	(%esp),%xmm0
1936	vmovdqa	16(%esp),%xmm1
1937	vmovdqa	32(%esp),%xmm2
1938	vpaddd	48(%esp),%xmm3,%xmm3
1939	movl	$10,%edx
1940	vmovdqa	%xmm3,48(%esp)
1941	jmp	.L018loop1x
1942.align	16
1943.L018loop1x:
1944	vpaddd	%xmm1,%xmm0,%xmm0
1945	vpxor	%xmm0,%xmm3,%xmm3
1946.byte	143,232,120,194,219,16
1947	vpaddd	%xmm3,%xmm2,%xmm2
1948	vpxor	%xmm2,%xmm1,%xmm1
1949.byte	143,232,120,194,201,12
1950	vpaddd	%xmm1,%xmm0,%xmm0
1951	vpxor	%xmm0,%xmm3,%xmm3
1952.byte	143,232,120,194,219,8
1953	vpaddd	%xmm3,%xmm2,%xmm2
1954	vpxor	%xmm2,%xmm1,%xmm1
1955.byte	143,232,120,194,201,7
1956	vpshufd	$78,%xmm2,%xmm2
1957	vpshufd	$57,%xmm1,%xmm1
1958	vpshufd	$147,%xmm3,%xmm3
1959	vpaddd	%xmm1,%xmm0,%xmm0
1960	vpxor	%xmm0,%xmm3,%xmm3
1961.byte	143,232,120,194,219,16
1962	vpaddd	%xmm3,%xmm2,%xmm2
1963	vpxor	%xmm2,%xmm1,%xmm1
1964.byte	143,232,120,194,201,12
1965	vpaddd	%xmm1,%xmm0,%xmm0
1966	vpxor	%xmm0,%xmm3,%xmm3
1967.byte	143,232,120,194,219,8
1968	vpaddd	%xmm3,%xmm2,%xmm2
1969	vpxor	%xmm2,%xmm1,%xmm1
1970.byte	143,232,120,194,201,7
1971	vpshufd	$78,%xmm2,%xmm2
1972	vpshufd	$147,%xmm1,%xmm1
1973	vpshufd	$57,%xmm3,%xmm3
1974	decl	%edx
1975	jnz	.L018loop1x
1976	vpaddd	(%esp),%xmm0,%xmm0
1977	vpaddd	16(%esp),%xmm1,%xmm1
1978	vpaddd	32(%esp),%xmm2,%xmm2
1979	vpaddd	48(%esp),%xmm3,%xmm3
1980	cmpl	$64,%ecx
1981	jb	.L020tail
1982	vpxor	(%esi),%xmm0,%xmm0
1983	vpxor	16(%esi),%xmm1,%xmm1
1984	vpxor	32(%esi),%xmm2,%xmm2
1985	vpxor	48(%esi),%xmm3,%xmm3
1986	leal	64(%esi),%esi
1987	vmovdqu	%xmm0,(%edi)
1988	vmovdqu	%xmm1,16(%edi)
1989	vmovdqu	%xmm2,32(%edi)
1990	vmovdqu	%xmm3,48(%edi)
1991	leal	64(%edi),%edi
1992	subl	$64,%ecx
1993	jnz	.L019outer1x
1994	jmp	.L017done
1995.L020tail:
1996	vmovdqa	%xmm0,(%esp)
1997	vmovdqa	%xmm1,16(%esp)
1998	vmovdqa	%xmm2,32(%esp)
1999	vmovdqa	%xmm3,48(%esp)
2000	xorl	%eax,%eax
2001	xorl	%edx,%edx
2002	xorl	%ebp,%ebp
2003.L021tail_loop:
2004	movb	(%esp,%ebp,1),%al
2005	movb	(%esi,%ebp,1),%dl
2006	leal	1(%ebp),%ebp
2007	xorb	%dl,%al
2008	movb	%al,-1(%edi,%ebp,1)
2009	decl	%ecx
2010	jnz	.L021tail_loop
2011.L017done:
2012	vzeroupper
2013	movl	512(%esp),%esp
2014	popl	%edi
2015	popl	%esi
2016	popl	%ebx
2017	popl	%ebp
2018	ret
2019.size	ChaCha20_xop,.-.L_ChaCha20_xop_begin
2020.comm	OPENSSL_ia32cap_P,16,4
2021#endif
2022