xref: /freebsd/sys/crypto/openssl/i386/poly1305-x86.S (revision c0855eaa3ee9614804b6bd6a255aa9f71e095f43)
1bc3d5698SJohn Baldwin/* Do not modify. This file is auto-generated from poly1305-x86.pl. */
2bc3d5698SJohn Baldwin#ifdef PIC
3bc3d5698SJohn Baldwin.text
4bc3d5698SJohn Baldwin.align	64
5bc3d5698SJohn Baldwin.globl	poly1305_init
6bc3d5698SJohn Baldwin.type	poly1305_init,@function
7bc3d5698SJohn Baldwin.align	16
8bc3d5698SJohn Baldwinpoly1305_init:
9bc3d5698SJohn Baldwin.L_poly1305_init_begin:
10*c0855eaaSJohn Baldwin	#ifdef __CET__
11*c0855eaaSJohn Baldwin
12*c0855eaaSJohn Baldwin.byte	243,15,30,251
13*c0855eaaSJohn Baldwin	#endif
14*c0855eaaSJohn Baldwin
15bc3d5698SJohn Baldwin	pushl	%ebp
16bc3d5698SJohn Baldwin	pushl	%ebx
17bc3d5698SJohn Baldwin	pushl	%esi
18bc3d5698SJohn Baldwin	pushl	%edi
19bc3d5698SJohn Baldwin	movl	20(%esp),%edi
20bc3d5698SJohn Baldwin	movl	24(%esp),%esi
21bc3d5698SJohn Baldwin	movl	28(%esp),%ebp
22bc3d5698SJohn Baldwin	xorl	%eax,%eax
23bc3d5698SJohn Baldwin	movl	%eax,(%edi)
24bc3d5698SJohn Baldwin	movl	%eax,4(%edi)
25bc3d5698SJohn Baldwin	movl	%eax,8(%edi)
26bc3d5698SJohn Baldwin	movl	%eax,12(%edi)
27bc3d5698SJohn Baldwin	movl	%eax,16(%edi)
28bc3d5698SJohn Baldwin	movl	%eax,20(%edi)
29bc3d5698SJohn Baldwin	cmpl	$0,%esi
30bc3d5698SJohn Baldwin	je	.L000nokey
31bc3d5698SJohn Baldwin	call	.L001pic_point
32bc3d5698SJohn Baldwin.L001pic_point:
33bc3d5698SJohn Baldwin	popl	%ebx
34bc3d5698SJohn Baldwin	leal	poly1305_blocks-.L001pic_point(%ebx),%eax
35bc3d5698SJohn Baldwin	leal	poly1305_emit-.L001pic_point(%ebx),%edx
36bc3d5698SJohn Baldwin	leal	OPENSSL_ia32cap_P-.L001pic_point(%ebx),%edi
37bc3d5698SJohn Baldwin	movl	(%edi),%ecx
38bc3d5698SJohn Baldwin	andl	$83886080,%ecx
39bc3d5698SJohn Baldwin	cmpl	$83886080,%ecx
40bc3d5698SJohn Baldwin	jne	.L002no_sse2
41bc3d5698SJohn Baldwin	leal	_poly1305_blocks_sse2-.L001pic_point(%ebx),%eax
42bc3d5698SJohn Baldwin	leal	_poly1305_emit_sse2-.L001pic_point(%ebx),%edx
43bc3d5698SJohn Baldwin	movl	8(%edi),%ecx
44bc3d5698SJohn Baldwin	testl	$32,%ecx
45bc3d5698SJohn Baldwin	jz	.L002no_sse2
46bc3d5698SJohn Baldwin	leal	_poly1305_blocks_avx2-.L001pic_point(%ebx),%eax
47bc3d5698SJohn Baldwin.L002no_sse2:
48bc3d5698SJohn Baldwin	movl	20(%esp),%edi
49bc3d5698SJohn Baldwin	movl	%eax,(%ebp)
50bc3d5698SJohn Baldwin	movl	%edx,4(%ebp)
51bc3d5698SJohn Baldwin	movl	(%esi),%eax
52bc3d5698SJohn Baldwin	movl	4(%esi),%ebx
53bc3d5698SJohn Baldwin	movl	8(%esi),%ecx
54bc3d5698SJohn Baldwin	movl	12(%esi),%edx
55bc3d5698SJohn Baldwin	andl	$268435455,%eax
56bc3d5698SJohn Baldwin	andl	$268435452,%ebx
57bc3d5698SJohn Baldwin	andl	$268435452,%ecx
58bc3d5698SJohn Baldwin	andl	$268435452,%edx
59bc3d5698SJohn Baldwin	movl	%eax,24(%edi)
60bc3d5698SJohn Baldwin	movl	%ebx,28(%edi)
61bc3d5698SJohn Baldwin	movl	%ecx,32(%edi)
62bc3d5698SJohn Baldwin	movl	%edx,36(%edi)
63bc3d5698SJohn Baldwin	movl	$1,%eax
64bc3d5698SJohn Baldwin.L000nokey:
65bc3d5698SJohn Baldwin	popl	%edi
66bc3d5698SJohn Baldwin	popl	%esi
67bc3d5698SJohn Baldwin	popl	%ebx
68bc3d5698SJohn Baldwin	popl	%ebp
69bc3d5698SJohn Baldwin	ret
70bc3d5698SJohn Baldwin.size	poly1305_init,.-.L_poly1305_init_begin
71bc3d5698SJohn Baldwin.globl	poly1305_blocks
72bc3d5698SJohn Baldwin.type	poly1305_blocks,@function
73bc3d5698SJohn Baldwin.align	16
74bc3d5698SJohn Baldwinpoly1305_blocks:
75bc3d5698SJohn Baldwin.L_poly1305_blocks_begin:
76*c0855eaaSJohn Baldwin	#ifdef __CET__
77*c0855eaaSJohn Baldwin
78*c0855eaaSJohn Baldwin.byte	243,15,30,251
79*c0855eaaSJohn Baldwin	#endif
80*c0855eaaSJohn Baldwin
81bc3d5698SJohn Baldwin	pushl	%ebp
82bc3d5698SJohn Baldwin	pushl	%ebx
83bc3d5698SJohn Baldwin	pushl	%esi
84bc3d5698SJohn Baldwin	pushl	%edi
85bc3d5698SJohn Baldwin	movl	20(%esp),%edi
86bc3d5698SJohn Baldwin	movl	24(%esp),%esi
87bc3d5698SJohn Baldwin	movl	28(%esp),%ecx
88bc3d5698SJohn Baldwin.Lenter_blocks:
89bc3d5698SJohn Baldwin	andl	$-15,%ecx
90bc3d5698SJohn Baldwin	jz	.L003nodata
91bc3d5698SJohn Baldwin	subl	$64,%esp
92bc3d5698SJohn Baldwin	movl	24(%edi),%eax
93bc3d5698SJohn Baldwin	movl	28(%edi),%ebx
94bc3d5698SJohn Baldwin	leal	(%esi,%ecx,1),%ebp
95bc3d5698SJohn Baldwin	movl	32(%edi),%ecx
96bc3d5698SJohn Baldwin	movl	36(%edi),%edx
97bc3d5698SJohn Baldwin	movl	%ebp,92(%esp)
98bc3d5698SJohn Baldwin	movl	%esi,%ebp
99bc3d5698SJohn Baldwin	movl	%eax,36(%esp)
100bc3d5698SJohn Baldwin	movl	%ebx,%eax
101bc3d5698SJohn Baldwin	shrl	$2,%eax
102bc3d5698SJohn Baldwin	movl	%ebx,40(%esp)
103bc3d5698SJohn Baldwin	addl	%ebx,%eax
104bc3d5698SJohn Baldwin	movl	%ecx,%ebx
105bc3d5698SJohn Baldwin	shrl	$2,%ebx
106bc3d5698SJohn Baldwin	movl	%ecx,44(%esp)
107bc3d5698SJohn Baldwin	addl	%ecx,%ebx
108bc3d5698SJohn Baldwin	movl	%edx,%ecx
109bc3d5698SJohn Baldwin	shrl	$2,%ecx
110bc3d5698SJohn Baldwin	movl	%edx,48(%esp)
111bc3d5698SJohn Baldwin	addl	%edx,%ecx
112bc3d5698SJohn Baldwin	movl	%eax,52(%esp)
113bc3d5698SJohn Baldwin	movl	%ebx,56(%esp)
114bc3d5698SJohn Baldwin	movl	%ecx,60(%esp)
115bc3d5698SJohn Baldwin	movl	(%edi),%eax
116bc3d5698SJohn Baldwin	movl	4(%edi),%ebx
117bc3d5698SJohn Baldwin	movl	8(%edi),%ecx
118bc3d5698SJohn Baldwin	movl	12(%edi),%esi
119bc3d5698SJohn Baldwin	movl	16(%edi),%edi
120bc3d5698SJohn Baldwin	jmp	.L004loop
121bc3d5698SJohn Baldwin.align	32
122bc3d5698SJohn Baldwin.L004loop:
123bc3d5698SJohn Baldwin	addl	(%ebp),%eax
124bc3d5698SJohn Baldwin	adcl	4(%ebp),%ebx
125bc3d5698SJohn Baldwin	adcl	8(%ebp),%ecx
126bc3d5698SJohn Baldwin	adcl	12(%ebp),%esi
127bc3d5698SJohn Baldwin	leal	16(%ebp),%ebp
128bc3d5698SJohn Baldwin	adcl	96(%esp),%edi
129bc3d5698SJohn Baldwin	movl	%eax,(%esp)
130bc3d5698SJohn Baldwin	movl	%esi,12(%esp)
131bc3d5698SJohn Baldwin	mull	36(%esp)
132bc3d5698SJohn Baldwin	movl	%edi,16(%esp)
133bc3d5698SJohn Baldwin	movl	%eax,%edi
134bc3d5698SJohn Baldwin	movl	%ebx,%eax
135bc3d5698SJohn Baldwin	movl	%edx,%esi
136bc3d5698SJohn Baldwin	mull	60(%esp)
137bc3d5698SJohn Baldwin	addl	%eax,%edi
138bc3d5698SJohn Baldwin	movl	%ecx,%eax
139bc3d5698SJohn Baldwin	adcl	%edx,%esi
140bc3d5698SJohn Baldwin	mull	56(%esp)
141bc3d5698SJohn Baldwin	addl	%eax,%edi
142bc3d5698SJohn Baldwin	movl	12(%esp),%eax
143bc3d5698SJohn Baldwin	adcl	%edx,%esi
144bc3d5698SJohn Baldwin	mull	52(%esp)
145bc3d5698SJohn Baldwin	addl	%eax,%edi
146bc3d5698SJohn Baldwin	movl	(%esp),%eax
147bc3d5698SJohn Baldwin	adcl	%edx,%esi
148bc3d5698SJohn Baldwin	mull	40(%esp)
149bc3d5698SJohn Baldwin	movl	%edi,20(%esp)
150bc3d5698SJohn Baldwin	xorl	%edi,%edi
151bc3d5698SJohn Baldwin	addl	%eax,%esi
152bc3d5698SJohn Baldwin	movl	%ebx,%eax
153bc3d5698SJohn Baldwin	adcl	%edx,%edi
154bc3d5698SJohn Baldwin	mull	36(%esp)
155bc3d5698SJohn Baldwin	addl	%eax,%esi
156bc3d5698SJohn Baldwin	movl	%ecx,%eax
157bc3d5698SJohn Baldwin	adcl	%edx,%edi
158bc3d5698SJohn Baldwin	mull	60(%esp)
159bc3d5698SJohn Baldwin	addl	%eax,%esi
160bc3d5698SJohn Baldwin	movl	12(%esp),%eax
161bc3d5698SJohn Baldwin	adcl	%edx,%edi
162bc3d5698SJohn Baldwin	mull	56(%esp)
163bc3d5698SJohn Baldwin	addl	%eax,%esi
164bc3d5698SJohn Baldwin	movl	16(%esp),%eax
165bc3d5698SJohn Baldwin	adcl	%edx,%edi
166bc3d5698SJohn Baldwin	imull	52(%esp),%eax
167bc3d5698SJohn Baldwin	addl	%eax,%esi
168bc3d5698SJohn Baldwin	movl	(%esp),%eax
169bc3d5698SJohn Baldwin	adcl	$0,%edi
170bc3d5698SJohn Baldwin	mull	44(%esp)
171bc3d5698SJohn Baldwin	movl	%esi,24(%esp)
172bc3d5698SJohn Baldwin	xorl	%esi,%esi
173bc3d5698SJohn Baldwin	addl	%eax,%edi
174bc3d5698SJohn Baldwin	movl	%ebx,%eax
175bc3d5698SJohn Baldwin	adcl	%edx,%esi
176bc3d5698SJohn Baldwin	mull	40(%esp)
177bc3d5698SJohn Baldwin	addl	%eax,%edi
178bc3d5698SJohn Baldwin	movl	%ecx,%eax
179bc3d5698SJohn Baldwin	adcl	%edx,%esi
180bc3d5698SJohn Baldwin	mull	36(%esp)
181bc3d5698SJohn Baldwin	addl	%eax,%edi
182bc3d5698SJohn Baldwin	movl	12(%esp),%eax
183bc3d5698SJohn Baldwin	adcl	%edx,%esi
184bc3d5698SJohn Baldwin	mull	60(%esp)
185bc3d5698SJohn Baldwin	addl	%eax,%edi
186bc3d5698SJohn Baldwin	movl	16(%esp),%eax
187bc3d5698SJohn Baldwin	adcl	%edx,%esi
188bc3d5698SJohn Baldwin	imull	56(%esp),%eax
189bc3d5698SJohn Baldwin	addl	%eax,%edi
190bc3d5698SJohn Baldwin	movl	(%esp),%eax
191bc3d5698SJohn Baldwin	adcl	$0,%esi
192bc3d5698SJohn Baldwin	mull	48(%esp)
193bc3d5698SJohn Baldwin	movl	%edi,28(%esp)
194bc3d5698SJohn Baldwin	xorl	%edi,%edi
195bc3d5698SJohn Baldwin	addl	%eax,%esi
196bc3d5698SJohn Baldwin	movl	%ebx,%eax
197bc3d5698SJohn Baldwin	adcl	%edx,%edi
198bc3d5698SJohn Baldwin	mull	44(%esp)
199bc3d5698SJohn Baldwin	addl	%eax,%esi
200bc3d5698SJohn Baldwin	movl	%ecx,%eax
201bc3d5698SJohn Baldwin	adcl	%edx,%edi
202bc3d5698SJohn Baldwin	mull	40(%esp)
203bc3d5698SJohn Baldwin	addl	%eax,%esi
204bc3d5698SJohn Baldwin	movl	12(%esp),%eax
205bc3d5698SJohn Baldwin	adcl	%edx,%edi
206bc3d5698SJohn Baldwin	mull	36(%esp)
207bc3d5698SJohn Baldwin	addl	%eax,%esi
208bc3d5698SJohn Baldwin	movl	16(%esp),%ecx
209bc3d5698SJohn Baldwin	adcl	%edx,%edi
210bc3d5698SJohn Baldwin	movl	%ecx,%edx
211bc3d5698SJohn Baldwin	imull	60(%esp),%ecx
212bc3d5698SJohn Baldwin	addl	%ecx,%esi
213bc3d5698SJohn Baldwin	movl	20(%esp),%eax
214bc3d5698SJohn Baldwin	adcl	$0,%edi
215bc3d5698SJohn Baldwin	imull	36(%esp),%edx
216bc3d5698SJohn Baldwin	addl	%edi,%edx
217bc3d5698SJohn Baldwin	movl	24(%esp),%ebx
218bc3d5698SJohn Baldwin	movl	28(%esp),%ecx
219bc3d5698SJohn Baldwin	movl	%edx,%edi
220bc3d5698SJohn Baldwin	shrl	$2,%edx
221bc3d5698SJohn Baldwin	andl	$3,%edi
222bc3d5698SJohn Baldwin	leal	(%edx,%edx,4),%edx
223bc3d5698SJohn Baldwin	addl	%edx,%eax
224bc3d5698SJohn Baldwin	adcl	$0,%ebx
225bc3d5698SJohn Baldwin	adcl	$0,%ecx
226bc3d5698SJohn Baldwin	adcl	$0,%esi
227bc3d5698SJohn Baldwin	adcl	$0,%edi
228bc3d5698SJohn Baldwin	cmpl	92(%esp),%ebp
229bc3d5698SJohn Baldwin	jne	.L004loop
230bc3d5698SJohn Baldwin	movl	84(%esp),%edx
231bc3d5698SJohn Baldwin	addl	$64,%esp
232bc3d5698SJohn Baldwin	movl	%eax,(%edx)
233bc3d5698SJohn Baldwin	movl	%ebx,4(%edx)
234bc3d5698SJohn Baldwin	movl	%ecx,8(%edx)
235bc3d5698SJohn Baldwin	movl	%esi,12(%edx)
236bc3d5698SJohn Baldwin	movl	%edi,16(%edx)
237bc3d5698SJohn Baldwin.L003nodata:
238bc3d5698SJohn Baldwin	popl	%edi
239bc3d5698SJohn Baldwin	popl	%esi
240bc3d5698SJohn Baldwin	popl	%ebx
241bc3d5698SJohn Baldwin	popl	%ebp
242bc3d5698SJohn Baldwin	ret
243bc3d5698SJohn Baldwin.size	poly1305_blocks,.-.L_poly1305_blocks_begin
244bc3d5698SJohn Baldwin.globl	poly1305_emit
245bc3d5698SJohn Baldwin.type	poly1305_emit,@function
246bc3d5698SJohn Baldwin.align	16
247bc3d5698SJohn Baldwinpoly1305_emit:
248bc3d5698SJohn Baldwin.L_poly1305_emit_begin:
249*c0855eaaSJohn Baldwin	#ifdef __CET__
250*c0855eaaSJohn Baldwin
251*c0855eaaSJohn Baldwin.byte	243,15,30,251
252*c0855eaaSJohn Baldwin	#endif
253*c0855eaaSJohn Baldwin
254bc3d5698SJohn Baldwin	pushl	%ebp
255bc3d5698SJohn Baldwin	pushl	%ebx
256bc3d5698SJohn Baldwin	pushl	%esi
257bc3d5698SJohn Baldwin	pushl	%edi
258bc3d5698SJohn Baldwin	movl	20(%esp),%ebp
259bc3d5698SJohn Baldwin.Lenter_emit:
260bc3d5698SJohn Baldwin	movl	24(%esp),%edi
261bc3d5698SJohn Baldwin	movl	(%ebp),%eax
262bc3d5698SJohn Baldwin	movl	4(%ebp),%ebx
263bc3d5698SJohn Baldwin	movl	8(%ebp),%ecx
264bc3d5698SJohn Baldwin	movl	12(%ebp),%edx
265bc3d5698SJohn Baldwin	movl	16(%ebp),%esi
266bc3d5698SJohn Baldwin	addl	$5,%eax
267bc3d5698SJohn Baldwin	adcl	$0,%ebx
268bc3d5698SJohn Baldwin	adcl	$0,%ecx
269bc3d5698SJohn Baldwin	adcl	$0,%edx
270bc3d5698SJohn Baldwin	adcl	$0,%esi
271bc3d5698SJohn Baldwin	shrl	$2,%esi
272bc3d5698SJohn Baldwin	negl	%esi
273bc3d5698SJohn Baldwin	andl	%esi,%eax
274bc3d5698SJohn Baldwin	andl	%esi,%ebx
275bc3d5698SJohn Baldwin	andl	%esi,%ecx
276bc3d5698SJohn Baldwin	andl	%esi,%edx
277bc3d5698SJohn Baldwin	movl	%eax,(%edi)
278bc3d5698SJohn Baldwin	movl	%ebx,4(%edi)
279bc3d5698SJohn Baldwin	movl	%ecx,8(%edi)
280bc3d5698SJohn Baldwin	movl	%edx,12(%edi)
281bc3d5698SJohn Baldwin	notl	%esi
282bc3d5698SJohn Baldwin	movl	(%ebp),%eax
283bc3d5698SJohn Baldwin	movl	4(%ebp),%ebx
284bc3d5698SJohn Baldwin	movl	8(%ebp),%ecx
285bc3d5698SJohn Baldwin	movl	12(%ebp),%edx
286bc3d5698SJohn Baldwin	movl	28(%esp),%ebp
287bc3d5698SJohn Baldwin	andl	%esi,%eax
288bc3d5698SJohn Baldwin	andl	%esi,%ebx
289bc3d5698SJohn Baldwin	andl	%esi,%ecx
290bc3d5698SJohn Baldwin	andl	%esi,%edx
291bc3d5698SJohn Baldwin	orl	(%edi),%eax
292bc3d5698SJohn Baldwin	orl	4(%edi),%ebx
293bc3d5698SJohn Baldwin	orl	8(%edi),%ecx
294bc3d5698SJohn Baldwin	orl	12(%edi),%edx
295bc3d5698SJohn Baldwin	addl	(%ebp),%eax
296bc3d5698SJohn Baldwin	adcl	4(%ebp),%ebx
297bc3d5698SJohn Baldwin	adcl	8(%ebp),%ecx
298bc3d5698SJohn Baldwin	adcl	12(%ebp),%edx
299bc3d5698SJohn Baldwin	movl	%eax,(%edi)
300bc3d5698SJohn Baldwin	movl	%ebx,4(%edi)
301bc3d5698SJohn Baldwin	movl	%ecx,8(%edi)
302bc3d5698SJohn Baldwin	movl	%edx,12(%edi)
303bc3d5698SJohn Baldwin	popl	%edi
304bc3d5698SJohn Baldwin	popl	%esi
305bc3d5698SJohn Baldwin	popl	%ebx
306bc3d5698SJohn Baldwin	popl	%ebp
307bc3d5698SJohn Baldwin	ret
308bc3d5698SJohn Baldwin.size	poly1305_emit,.-.L_poly1305_emit_begin
309bc3d5698SJohn Baldwin.align	32
310bc3d5698SJohn Baldwin.type	_poly1305_init_sse2,@function
311bc3d5698SJohn Baldwin.align	16
312bc3d5698SJohn Baldwin_poly1305_init_sse2:
313*c0855eaaSJohn Baldwin	#ifdef __CET__
314*c0855eaaSJohn Baldwin
315*c0855eaaSJohn Baldwin.byte	243,15,30,251
316*c0855eaaSJohn Baldwin	#endif
317*c0855eaaSJohn Baldwin
318bc3d5698SJohn Baldwin	movdqu	24(%edi),%xmm4
319bc3d5698SJohn Baldwin	leal	48(%edi),%edi
320bc3d5698SJohn Baldwin	movl	%esp,%ebp
321bc3d5698SJohn Baldwin	subl	$224,%esp
322bc3d5698SJohn Baldwin	andl	$-16,%esp
323bc3d5698SJohn Baldwin	movq	64(%ebx),%xmm7
324bc3d5698SJohn Baldwin	movdqa	%xmm4,%xmm0
325bc3d5698SJohn Baldwin	movdqa	%xmm4,%xmm1
326bc3d5698SJohn Baldwin	movdqa	%xmm4,%xmm2
327bc3d5698SJohn Baldwin	pand	%xmm7,%xmm0
328bc3d5698SJohn Baldwin	psrlq	$26,%xmm1
329bc3d5698SJohn Baldwin	psrldq	$6,%xmm2
330bc3d5698SJohn Baldwin	pand	%xmm7,%xmm1
331bc3d5698SJohn Baldwin	movdqa	%xmm2,%xmm3
332bc3d5698SJohn Baldwin	psrlq	$4,%xmm2
333bc3d5698SJohn Baldwin	psrlq	$30,%xmm3
334bc3d5698SJohn Baldwin	pand	%xmm7,%xmm2
335bc3d5698SJohn Baldwin	pand	%xmm7,%xmm3
336bc3d5698SJohn Baldwin	psrldq	$13,%xmm4
337bc3d5698SJohn Baldwin	leal	144(%esp),%edx
338bc3d5698SJohn Baldwin	movl	$2,%ecx
339bc3d5698SJohn Baldwin.L005square:
340bc3d5698SJohn Baldwin	movdqa	%xmm0,(%esp)
341bc3d5698SJohn Baldwin	movdqa	%xmm1,16(%esp)
342bc3d5698SJohn Baldwin	movdqa	%xmm2,32(%esp)
343bc3d5698SJohn Baldwin	movdqa	%xmm3,48(%esp)
344bc3d5698SJohn Baldwin	movdqa	%xmm4,64(%esp)
345bc3d5698SJohn Baldwin	movdqa	%xmm1,%xmm6
346bc3d5698SJohn Baldwin	movdqa	%xmm2,%xmm5
347bc3d5698SJohn Baldwin	pslld	$2,%xmm6
348bc3d5698SJohn Baldwin	pslld	$2,%xmm5
349bc3d5698SJohn Baldwin	paddd	%xmm1,%xmm6
350bc3d5698SJohn Baldwin	paddd	%xmm2,%xmm5
351bc3d5698SJohn Baldwin	movdqa	%xmm6,80(%esp)
352bc3d5698SJohn Baldwin	movdqa	%xmm5,96(%esp)
353bc3d5698SJohn Baldwin	movdqa	%xmm3,%xmm6
354bc3d5698SJohn Baldwin	movdqa	%xmm4,%xmm5
355bc3d5698SJohn Baldwin	pslld	$2,%xmm6
356bc3d5698SJohn Baldwin	pslld	$2,%xmm5
357bc3d5698SJohn Baldwin	paddd	%xmm3,%xmm6
358bc3d5698SJohn Baldwin	paddd	%xmm4,%xmm5
359bc3d5698SJohn Baldwin	movdqa	%xmm6,112(%esp)
360bc3d5698SJohn Baldwin	movdqa	%xmm5,128(%esp)
361bc3d5698SJohn Baldwin	pshufd	$68,%xmm0,%xmm6
362bc3d5698SJohn Baldwin	movdqa	%xmm1,%xmm5
363bc3d5698SJohn Baldwin	pshufd	$68,%xmm1,%xmm1
364bc3d5698SJohn Baldwin	pshufd	$68,%xmm2,%xmm2
365bc3d5698SJohn Baldwin	pshufd	$68,%xmm3,%xmm3
366bc3d5698SJohn Baldwin	pshufd	$68,%xmm4,%xmm4
367bc3d5698SJohn Baldwin	movdqa	%xmm6,(%edx)
368bc3d5698SJohn Baldwin	movdqa	%xmm1,16(%edx)
369bc3d5698SJohn Baldwin	movdqa	%xmm2,32(%edx)
370bc3d5698SJohn Baldwin	movdqa	%xmm3,48(%edx)
371bc3d5698SJohn Baldwin	movdqa	%xmm4,64(%edx)
372bc3d5698SJohn Baldwin	pmuludq	%xmm0,%xmm4
373bc3d5698SJohn Baldwin	pmuludq	%xmm0,%xmm3
374bc3d5698SJohn Baldwin	pmuludq	%xmm0,%xmm2
375bc3d5698SJohn Baldwin	pmuludq	%xmm0,%xmm1
376bc3d5698SJohn Baldwin	pmuludq	%xmm6,%xmm0
377bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm6
378bc3d5698SJohn Baldwin	pmuludq	48(%edx),%xmm5
379bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm7
380bc3d5698SJohn Baldwin	pmuludq	32(%edx),%xmm6
381bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm4
382bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm5
383bc3d5698SJohn Baldwin	pmuludq	16(%edx),%xmm7
384bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm3
385bc3d5698SJohn Baldwin	movdqa	80(%esp),%xmm6
386bc3d5698SJohn Baldwin	pmuludq	(%edx),%xmm5
387bc3d5698SJohn Baldwin	paddq	%xmm7,%xmm2
388bc3d5698SJohn Baldwin	pmuludq	64(%edx),%xmm6
389bc3d5698SJohn Baldwin	movdqa	32(%esp),%xmm7
390bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm1
391bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm5
392bc3d5698SJohn Baldwin	pmuludq	32(%edx),%xmm7
393bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm0
394bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm6
395bc3d5698SJohn Baldwin	pmuludq	16(%edx),%xmm5
396bc3d5698SJohn Baldwin	paddq	%xmm7,%xmm4
397bc3d5698SJohn Baldwin	movdqa	96(%esp),%xmm7
398bc3d5698SJohn Baldwin	pmuludq	(%edx),%xmm6
399bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm3
400bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm5
401bc3d5698SJohn Baldwin	pmuludq	64(%edx),%xmm7
402bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm2
403bc3d5698SJohn Baldwin	pmuludq	48(%edx),%xmm5
404bc3d5698SJohn Baldwin	movdqa	48(%esp),%xmm6
405bc3d5698SJohn Baldwin	paddq	%xmm7,%xmm1
406bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm7
407bc3d5698SJohn Baldwin	pmuludq	16(%edx),%xmm6
408bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm0
409bc3d5698SJohn Baldwin	movdqa	112(%esp),%xmm5
410bc3d5698SJohn Baldwin	pmuludq	(%edx),%xmm7
411bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm4
412bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm6
413bc3d5698SJohn Baldwin	pmuludq	64(%edx),%xmm5
414bc3d5698SJohn Baldwin	paddq	%xmm7,%xmm3
415bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm7
416bc3d5698SJohn Baldwin	pmuludq	48(%edx),%xmm6
417bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm2
418bc3d5698SJohn Baldwin	pmuludq	32(%edx),%xmm7
419bc3d5698SJohn Baldwin	movdqa	64(%esp),%xmm5
420bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm1
421bc3d5698SJohn Baldwin	movdqa	128(%esp),%xmm6
422bc3d5698SJohn Baldwin	pmuludq	(%edx),%xmm5
423bc3d5698SJohn Baldwin	paddq	%xmm7,%xmm0
424bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm7
425bc3d5698SJohn Baldwin	pmuludq	64(%edx),%xmm6
426bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm4
427bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm5
428bc3d5698SJohn Baldwin	pmuludq	16(%edx),%xmm7
429bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm3
430bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm6
431bc3d5698SJohn Baldwin	pmuludq	32(%edx),%xmm5
432bc3d5698SJohn Baldwin	paddq	%xmm7,%xmm0
433bc3d5698SJohn Baldwin	pmuludq	48(%edx),%xmm6
434bc3d5698SJohn Baldwin	movdqa	64(%ebx),%xmm7
435bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm1
436bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm2
437bc3d5698SJohn Baldwin	movdqa	%xmm3,%xmm5
438bc3d5698SJohn Baldwin	pand	%xmm7,%xmm3
439bc3d5698SJohn Baldwin	psrlq	$26,%xmm5
440bc3d5698SJohn Baldwin	paddq	%xmm4,%xmm5
441bc3d5698SJohn Baldwin	movdqa	%xmm0,%xmm6
442bc3d5698SJohn Baldwin	pand	%xmm7,%xmm0
443bc3d5698SJohn Baldwin	psrlq	$26,%xmm6
444bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm4
445bc3d5698SJohn Baldwin	paddq	%xmm1,%xmm6
446bc3d5698SJohn Baldwin	psrlq	$26,%xmm5
447bc3d5698SJohn Baldwin	pand	%xmm7,%xmm4
448bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm1
449bc3d5698SJohn Baldwin	psrlq	$26,%xmm6
450bc3d5698SJohn Baldwin	paddd	%xmm5,%xmm0
451bc3d5698SJohn Baldwin	psllq	$2,%xmm5
452bc3d5698SJohn Baldwin	paddq	%xmm2,%xmm6
453bc3d5698SJohn Baldwin	paddq	%xmm0,%xmm5
454bc3d5698SJohn Baldwin	pand	%xmm7,%xmm1
455bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm2
456bc3d5698SJohn Baldwin	psrlq	$26,%xmm6
457bc3d5698SJohn Baldwin	pand	%xmm7,%xmm2
458bc3d5698SJohn Baldwin	paddd	%xmm3,%xmm6
459bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm0
460bc3d5698SJohn Baldwin	psrlq	$26,%xmm5
461bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm3
462bc3d5698SJohn Baldwin	psrlq	$26,%xmm6
463bc3d5698SJohn Baldwin	pand	%xmm7,%xmm0
464bc3d5698SJohn Baldwin	paddd	%xmm5,%xmm1
465bc3d5698SJohn Baldwin	pand	%xmm7,%xmm3
466bc3d5698SJohn Baldwin	paddd	%xmm6,%xmm4
467bc3d5698SJohn Baldwin	decl	%ecx
468bc3d5698SJohn Baldwin	jz	.L006square_break
469bc3d5698SJohn Baldwin	punpcklqdq	(%esp),%xmm0
470bc3d5698SJohn Baldwin	punpcklqdq	16(%esp),%xmm1
471bc3d5698SJohn Baldwin	punpcklqdq	32(%esp),%xmm2
472bc3d5698SJohn Baldwin	punpcklqdq	48(%esp),%xmm3
473bc3d5698SJohn Baldwin	punpcklqdq	64(%esp),%xmm4
474bc3d5698SJohn Baldwin	jmp	.L005square
475bc3d5698SJohn Baldwin.L006square_break:
476bc3d5698SJohn Baldwin	psllq	$32,%xmm0
477bc3d5698SJohn Baldwin	psllq	$32,%xmm1
478bc3d5698SJohn Baldwin	psllq	$32,%xmm2
479bc3d5698SJohn Baldwin	psllq	$32,%xmm3
480bc3d5698SJohn Baldwin	psllq	$32,%xmm4
481bc3d5698SJohn Baldwin	por	(%esp),%xmm0
482bc3d5698SJohn Baldwin	por	16(%esp),%xmm1
483bc3d5698SJohn Baldwin	por	32(%esp),%xmm2
484bc3d5698SJohn Baldwin	por	48(%esp),%xmm3
485bc3d5698SJohn Baldwin	por	64(%esp),%xmm4
486bc3d5698SJohn Baldwin	pshufd	$141,%xmm0,%xmm0
487bc3d5698SJohn Baldwin	pshufd	$141,%xmm1,%xmm1
488bc3d5698SJohn Baldwin	pshufd	$141,%xmm2,%xmm2
489bc3d5698SJohn Baldwin	pshufd	$141,%xmm3,%xmm3
490bc3d5698SJohn Baldwin	pshufd	$141,%xmm4,%xmm4
491bc3d5698SJohn Baldwin	movdqu	%xmm0,(%edi)
492bc3d5698SJohn Baldwin	movdqu	%xmm1,16(%edi)
493bc3d5698SJohn Baldwin	movdqu	%xmm2,32(%edi)
494bc3d5698SJohn Baldwin	movdqu	%xmm3,48(%edi)
495bc3d5698SJohn Baldwin	movdqu	%xmm4,64(%edi)
496bc3d5698SJohn Baldwin	movdqa	%xmm1,%xmm6
497bc3d5698SJohn Baldwin	movdqa	%xmm2,%xmm5
498bc3d5698SJohn Baldwin	pslld	$2,%xmm6
499bc3d5698SJohn Baldwin	pslld	$2,%xmm5
500bc3d5698SJohn Baldwin	paddd	%xmm1,%xmm6
501bc3d5698SJohn Baldwin	paddd	%xmm2,%xmm5
502bc3d5698SJohn Baldwin	movdqu	%xmm6,80(%edi)
503bc3d5698SJohn Baldwin	movdqu	%xmm5,96(%edi)
504bc3d5698SJohn Baldwin	movdqa	%xmm3,%xmm6
505bc3d5698SJohn Baldwin	movdqa	%xmm4,%xmm5
506bc3d5698SJohn Baldwin	pslld	$2,%xmm6
507bc3d5698SJohn Baldwin	pslld	$2,%xmm5
508bc3d5698SJohn Baldwin	paddd	%xmm3,%xmm6
509bc3d5698SJohn Baldwin	paddd	%xmm4,%xmm5
510bc3d5698SJohn Baldwin	movdqu	%xmm6,112(%edi)
511bc3d5698SJohn Baldwin	movdqu	%xmm5,128(%edi)
512bc3d5698SJohn Baldwin	movl	%ebp,%esp
513bc3d5698SJohn Baldwin	leal	-48(%edi),%edi
514bc3d5698SJohn Baldwin	ret
515bc3d5698SJohn Baldwin.size	_poly1305_init_sse2,.-_poly1305_init_sse2
516bc3d5698SJohn Baldwin.align	32
517bc3d5698SJohn Baldwin.type	_poly1305_blocks_sse2,@function
518bc3d5698SJohn Baldwin.align	16
519bc3d5698SJohn Baldwin_poly1305_blocks_sse2:
520*c0855eaaSJohn Baldwin	#ifdef __CET__
521*c0855eaaSJohn Baldwin
522*c0855eaaSJohn Baldwin.byte	243,15,30,251
523*c0855eaaSJohn Baldwin	#endif
524*c0855eaaSJohn Baldwin
525bc3d5698SJohn Baldwin	pushl	%ebp
526bc3d5698SJohn Baldwin	pushl	%ebx
527bc3d5698SJohn Baldwin	pushl	%esi
528bc3d5698SJohn Baldwin	pushl	%edi
529bc3d5698SJohn Baldwin	movl	20(%esp),%edi
530bc3d5698SJohn Baldwin	movl	24(%esp),%esi
531bc3d5698SJohn Baldwin	movl	28(%esp),%ecx
532bc3d5698SJohn Baldwin	movl	20(%edi),%eax
533bc3d5698SJohn Baldwin	andl	$-16,%ecx
534bc3d5698SJohn Baldwin	jz	.L007nodata
535bc3d5698SJohn Baldwin	cmpl	$64,%ecx
536bc3d5698SJohn Baldwin	jae	.L008enter_sse2
537bc3d5698SJohn Baldwin	testl	%eax,%eax
538bc3d5698SJohn Baldwin	jz	.Lenter_blocks
539bc3d5698SJohn Baldwin.align	16
540bc3d5698SJohn Baldwin.L008enter_sse2:
541bc3d5698SJohn Baldwin	call	.L009pic_point
542bc3d5698SJohn Baldwin.L009pic_point:
543bc3d5698SJohn Baldwin	popl	%ebx
544bc3d5698SJohn Baldwin	leal	.Lconst_sse2-.L009pic_point(%ebx),%ebx
545bc3d5698SJohn Baldwin	testl	%eax,%eax
546bc3d5698SJohn Baldwin	jnz	.L010base2_26
547bc3d5698SJohn Baldwin	call	_poly1305_init_sse2
548bc3d5698SJohn Baldwin	movl	(%edi),%eax
549bc3d5698SJohn Baldwin	movl	3(%edi),%ecx
550bc3d5698SJohn Baldwin	movl	6(%edi),%edx
551bc3d5698SJohn Baldwin	movl	9(%edi),%esi
552bc3d5698SJohn Baldwin	movl	13(%edi),%ebp
553bc3d5698SJohn Baldwin	movl	$1,20(%edi)
554bc3d5698SJohn Baldwin	shrl	$2,%ecx
555bc3d5698SJohn Baldwin	andl	$67108863,%eax
556bc3d5698SJohn Baldwin	shrl	$4,%edx
557bc3d5698SJohn Baldwin	andl	$67108863,%ecx
558bc3d5698SJohn Baldwin	shrl	$6,%esi
559bc3d5698SJohn Baldwin	andl	$67108863,%edx
560bc3d5698SJohn Baldwin	movd	%eax,%xmm0
561bc3d5698SJohn Baldwin	movd	%ecx,%xmm1
562bc3d5698SJohn Baldwin	movd	%edx,%xmm2
563bc3d5698SJohn Baldwin	movd	%esi,%xmm3
564bc3d5698SJohn Baldwin	movd	%ebp,%xmm4
565bc3d5698SJohn Baldwin	movl	24(%esp),%esi
566bc3d5698SJohn Baldwin	movl	28(%esp),%ecx
567bc3d5698SJohn Baldwin	jmp	.L011base2_32
568bc3d5698SJohn Baldwin.align	16
569bc3d5698SJohn Baldwin.L010base2_26:
570bc3d5698SJohn Baldwin	movd	(%edi),%xmm0
571bc3d5698SJohn Baldwin	movd	4(%edi),%xmm1
572bc3d5698SJohn Baldwin	movd	8(%edi),%xmm2
573bc3d5698SJohn Baldwin	movd	12(%edi),%xmm3
574bc3d5698SJohn Baldwin	movd	16(%edi),%xmm4
575bc3d5698SJohn Baldwin	movdqa	64(%ebx),%xmm7
576bc3d5698SJohn Baldwin.L011base2_32:
577bc3d5698SJohn Baldwin	movl	32(%esp),%eax
578bc3d5698SJohn Baldwin	movl	%esp,%ebp
579bc3d5698SJohn Baldwin	subl	$528,%esp
580bc3d5698SJohn Baldwin	andl	$-16,%esp
581bc3d5698SJohn Baldwin	leal	48(%edi),%edi
582bc3d5698SJohn Baldwin	shll	$24,%eax
583bc3d5698SJohn Baldwin	testl	$31,%ecx
584bc3d5698SJohn Baldwin	jz	.L012even
585bc3d5698SJohn Baldwin	movdqu	(%esi),%xmm6
586bc3d5698SJohn Baldwin	leal	16(%esi),%esi
587bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm5
588bc3d5698SJohn Baldwin	pand	%xmm7,%xmm6
589bc3d5698SJohn Baldwin	paddd	%xmm6,%xmm0
590bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm6
591bc3d5698SJohn Baldwin	psrlq	$26,%xmm5
592bc3d5698SJohn Baldwin	psrldq	$6,%xmm6
593bc3d5698SJohn Baldwin	pand	%xmm7,%xmm5
594bc3d5698SJohn Baldwin	paddd	%xmm5,%xmm1
595bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm5
596bc3d5698SJohn Baldwin	psrlq	$4,%xmm6
597bc3d5698SJohn Baldwin	pand	%xmm7,%xmm6
598bc3d5698SJohn Baldwin	paddd	%xmm6,%xmm2
599bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm6
600bc3d5698SJohn Baldwin	psrlq	$30,%xmm5
601bc3d5698SJohn Baldwin	pand	%xmm7,%xmm5
602bc3d5698SJohn Baldwin	psrldq	$7,%xmm6
603bc3d5698SJohn Baldwin	paddd	%xmm5,%xmm3
604bc3d5698SJohn Baldwin	movd	%eax,%xmm5
605bc3d5698SJohn Baldwin	paddd	%xmm6,%xmm4
606bc3d5698SJohn Baldwin	movd	12(%edi),%xmm6
607bc3d5698SJohn Baldwin	paddd	%xmm5,%xmm4
608bc3d5698SJohn Baldwin	movdqa	%xmm0,(%esp)
609bc3d5698SJohn Baldwin	movdqa	%xmm1,16(%esp)
610bc3d5698SJohn Baldwin	movdqa	%xmm2,32(%esp)
611bc3d5698SJohn Baldwin	movdqa	%xmm3,48(%esp)
612bc3d5698SJohn Baldwin	movdqa	%xmm4,64(%esp)
613bc3d5698SJohn Baldwin	pmuludq	%xmm6,%xmm0
614bc3d5698SJohn Baldwin	pmuludq	%xmm6,%xmm1
615bc3d5698SJohn Baldwin	pmuludq	%xmm6,%xmm2
616bc3d5698SJohn Baldwin	movd	28(%edi),%xmm5
617bc3d5698SJohn Baldwin	pmuludq	%xmm6,%xmm3
618bc3d5698SJohn Baldwin	pmuludq	%xmm6,%xmm4
619bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm6
620bc3d5698SJohn Baldwin	pmuludq	48(%esp),%xmm5
621bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm7
622bc3d5698SJohn Baldwin	pmuludq	32(%esp),%xmm6
623bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm4
624bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm5
625bc3d5698SJohn Baldwin	pmuludq	16(%esp),%xmm7
626bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm3
627bc3d5698SJohn Baldwin	movd	92(%edi),%xmm6
628bc3d5698SJohn Baldwin	pmuludq	(%esp),%xmm5
629bc3d5698SJohn Baldwin	paddq	%xmm7,%xmm2
630bc3d5698SJohn Baldwin	pmuludq	64(%esp),%xmm6
631bc3d5698SJohn Baldwin	movd	44(%edi),%xmm7
632bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm1
633bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm5
634bc3d5698SJohn Baldwin	pmuludq	32(%esp),%xmm7
635bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm0
636bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm6
637bc3d5698SJohn Baldwin	pmuludq	16(%esp),%xmm5
638bc3d5698SJohn Baldwin	paddq	%xmm7,%xmm4
639bc3d5698SJohn Baldwin	movd	108(%edi),%xmm7
640bc3d5698SJohn Baldwin	pmuludq	(%esp),%xmm6
641bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm3
642bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm5
643bc3d5698SJohn Baldwin	pmuludq	64(%esp),%xmm7
644bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm2
645bc3d5698SJohn Baldwin	pmuludq	48(%esp),%xmm5
646bc3d5698SJohn Baldwin	movd	60(%edi),%xmm6
647bc3d5698SJohn Baldwin	paddq	%xmm7,%xmm1
648bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm7
649bc3d5698SJohn Baldwin	pmuludq	16(%esp),%xmm6
650bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm0
651bc3d5698SJohn Baldwin	movd	124(%edi),%xmm5
652bc3d5698SJohn Baldwin	pmuludq	(%esp),%xmm7
653bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm4
654bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm6
655bc3d5698SJohn Baldwin	pmuludq	64(%esp),%xmm5
656bc3d5698SJohn Baldwin	paddq	%xmm7,%xmm3
657bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm7
658bc3d5698SJohn Baldwin	pmuludq	48(%esp),%xmm6
659bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm2
660bc3d5698SJohn Baldwin	pmuludq	32(%esp),%xmm7
661bc3d5698SJohn Baldwin	movd	76(%edi),%xmm5
662bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm1
663bc3d5698SJohn Baldwin	movd	140(%edi),%xmm6
664bc3d5698SJohn Baldwin	pmuludq	(%esp),%xmm5
665bc3d5698SJohn Baldwin	paddq	%xmm7,%xmm0
666bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm7
667bc3d5698SJohn Baldwin	pmuludq	64(%esp),%xmm6
668bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm4
669bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm5
670bc3d5698SJohn Baldwin	pmuludq	16(%esp),%xmm7
671bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm3
672bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm6
673bc3d5698SJohn Baldwin	pmuludq	32(%esp),%xmm5
674bc3d5698SJohn Baldwin	paddq	%xmm7,%xmm0
675bc3d5698SJohn Baldwin	pmuludq	48(%esp),%xmm6
676bc3d5698SJohn Baldwin	movdqa	64(%ebx),%xmm7
677bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm1
678bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm2
679bc3d5698SJohn Baldwin	movdqa	%xmm3,%xmm5
680bc3d5698SJohn Baldwin	pand	%xmm7,%xmm3
681bc3d5698SJohn Baldwin	psrlq	$26,%xmm5
682bc3d5698SJohn Baldwin	paddq	%xmm4,%xmm5
683bc3d5698SJohn Baldwin	movdqa	%xmm0,%xmm6
684bc3d5698SJohn Baldwin	pand	%xmm7,%xmm0
685bc3d5698SJohn Baldwin	psrlq	$26,%xmm6
686bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm4
687bc3d5698SJohn Baldwin	paddq	%xmm1,%xmm6
688bc3d5698SJohn Baldwin	psrlq	$26,%xmm5
689bc3d5698SJohn Baldwin	pand	%xmm7,%xmm4
690bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm1
691bc3d5698SJohn Baldwin	psrlq	$26,%xmm6
692bc3d5698SJohn Baldwin	paddd	%xmm5,%xmm0
693bc3d5698SJohn Baldwin	psllq	$2,%xmm5
694bc3d5698SJohn Baldwin	paddq	%xmm2,%xmm6
695bc3d5698SJohn Baldwin	paddq	%xmm0,%xmm5
696bc3d5698SJohn Baldwin	pand	%xmm7,%xmm1
697bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm2
698bc3d5698SJohn Baldwin	psrlq	$26,%xmm6
699bc3d5698SJohn Baldwin	pand	%xmm7,%xmm2
700bc3d5698SJohn Baldwin	paddd	%xmm3,%xmm6
701bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm0
702bc3d5698SJohn Baldwin	psrlq	$26,%xmm5
703bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm3
704bc3d5698SJohn Baldwin	psrlq	$26,%xmm6
705bc3d5698SJohn Baldwin	pand	%xmm7,%xmm0
706bc3d5698SJohn Baldwin	paddd	%xmm5,%xmm1
707bc3d5698SJohn Baldwin	pand	%xmm7,%xmm3
708bc3d5698SJohn Baldwin	paddd	%xmm6,%xmm4
709bc3d5698SJohn Baldwin	subl	$16,%ecx
710bc3d5698SJohn Baldwin	jz	.L013done
711bc3d5698SJohn Baldwin.L012even:
712bc3d5698SJohn Baldwin	leal	384(%esp),%edx
713bc3d5698SJohn Baldwin	leal	-32(%esi),%eax
714bc3d5698SJohn Baldwin	subl	$64,%ecx
715bc3d5698SJohn Baldwin	movdqu	(%edi),%xmm5
716bc3d5698SJohn Baldwin	pshufd	$68,%xmm5,%xmm6
717bc3d5698SJohn Baldwin	cmovbl	%eax,%esi
718bc3d5698SJohn Baldwin	pshufd	$238,%xmm5,%xmm5
719bc3d5698SJohn Baldwin	movdqa	%xmm6,(%edx)
720bc3d5698SJohn Baldwin	leal	160(%esp),%eax
721bc3d5698SJohn Baldwin	movdqu	16(%edi),%xmm6
722bc3d5698SJohn Baldwin	movdqa	%xmm5,-144(%edx)
723bc3d5698SJohn Baldwin	pshufd	$68,%xmm6,%xmm5
724bc3d5698SJohn Baldwin	pshufd	$238,%xmm6,%xmm6
725bc3d5698SJohn Baldwin	movdqa	%xmm5,16(%edx)
726bc3d5698SJohn Baldwin	movdqu	32(%edi),%xmm5
727bc3d5698SJohn Baldwin	movdqa	%xmm6,-128(%edx)
728bc3d5698SJohn Baldwin	pshufd	$68,%xmm5,%xmm6
729bc3d5698SJohn Baldwin	pshufd	$238,%xmm5,%xmm5
730bc3d5698SJohn Baldwin	movdqa	%xmm6,32(%edx)
731bc3d5698SJohn Baldwin	movdqu	48(%edi),%xmm6
732bc3d5698SJohn Baldwin	movdqa	%xmm5,-112(%edx)
733bc3d5698SJohn Baldwin	pshufd	$68,%xmm6,%xmm5
734bc3d5698SJohn Baldwin	pshufd	$238,%xmm6,%xmm6
735bc3d5698SJohn Baldwin	movdqa	%xmm5,48(%edx)
736bc3d5698SJohn Baldwin	movdqu	64(%edi),%xmm5
737bc3d5698SJohn Baldwin	movdqa	%xmm6,-96(%edx)
738bc3d5698SJohn Baldwin	pshufd	$68,%xmm5,%xmm6
739bc3d5698SJohn Baldwin	pshufd	$238,%xmm5,%xmm5
740bc3d5698SJohn Baldwin	movdqa	%xmm6,64(%edx)
741bc3d5698SJohn Baldwin	movdqu	80(%edi),%xmm6
742bc3d5698SJohn Baldwin	movdqa	%xmm5,-80(%edx)
743bc3d5698SJohn Baldwin	pshufd	$68,%xmm6,%xmm5
744bc3d5698SJohn Baldwin	pshufd	$238,%xmm6,%xmm6
745bc3d5698SJohn Baldwin	movdqa	%xmm5,80(%edx)
746bc3d5698SJohn Baldwin	movdqu	96(%edi),%xmm5
747bc3d5698SJohn Baldwin	movdqa	%xmm6,-64(%edx)
748bc3d5698SJohn Baldwin	pshufd	$68,%xmm5,%xmm6
749bc3d5698SJohn Baldwin	pshufd	$238,%xmm5,%xmm5
750bc3d5698SJohn Baldwin	movdqa	%xmm6,96(%edx)
751bc3d5698SJohn Baldwin	movdqu	112(%edi),%xmm6
752bc3d5698SJohn Baldwin	movdqa	%xmm5,-48(%edx)
753bc3d5698SJohn Baldwin	pshufd	$68,%xmm6,%xmm5
754bc3d5698SJohn Baldwin	pshufd	$238,%xmm6,%xmm6
755bc3d5698SJohn Baldwin	movdqa	%xmm5,112(%edx)
756bc3d5698SJohn Baldwin	movdqu	128(%edi),%xmm5
757bc3d5698SJohn Baldwin	movdqa	%xmm6,-32(%edx)
758bc3d5698SJohn Baldwin	pshufd	$68,%xmm5,%xmm6
759bc3d5698SJohn Baldwin	pshufd	$238,%xmm5,%xmm5
760bc3d5698SJohn Baldwin	movdqa	%xmm6,128(%edx)
761bc3d5698SJohn Baldwin	movdqa	%xmm5,-16(%edx)
762bc3d5698SJohn Baldwin	movdqu	32(%esi),%xmm5
763bc3d5698SJohn Baldwin	movdqu	48(%esi),%xmm6
764bc3d5698SJohn Baldwin	leal	32(%esi),%esi
765bc3d5698SJohn Baldwin	movdqa	%xmm2,112(%esp)
766bc3d5698SJohn Baldwin	movdqa	%xmm3,128(%esp)
767bc3d5698SJohn Baldwin	movdqa	%xmm4,144(%esp)
768bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm2
769bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm3
770bc3d5698SJohn Baldwin	psrldq	$6,%xmm2
771bc3d5698SJohn Baldwin	psrldq	$6,%xmm3
772bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm4
773bc3d5698SJohn Baldwin	punpcklqdq	%xmm3,%xmm2
774bc3d5698SJohn Baldwin	punpckhqdq	%xmm6,%xmm4
775bc3d5698SJohn Baldwin	punpcklqdq	%xmm6,%xmm5
776bc3d5698SJohn Baldwin	movdqa	%xmm2,%xmm3
777bc3d5698SJohn Baldwin	psrlq	$4,%xmm2
778bc3d5698SJohn Baldwin	psrlq	$30,%xmm3
779bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm6
780bc3d5698SJohn Baldwin	psrlq	$40,%xmm4
781bc3d5698SJohn Baldwin	psrlq	$26,%xmm6
782bc3d5698SJohn Baldwin	pand	%xmm7,%xmm5
783bc3d5698SJohn Baldwin	pand	%xmm7,%xmm6
784bc3d5698SJohn Baldwin	pand	%xmm7,%xmm2
785bc3d5698SJohn Baldwin	pand	%xmm7,%xmm3
786bc3d5698SJohn Baldwin	por	(%ebx),%xmm4
787bc3d5698SJohn Baldwin	movdqa	%xmm0,80(%esp)
788bc3d5698SJohn Baldwin	movdqa	%xmm1,96(%esp)
789bc3d5698SJohn Baldwin	jbe	.L014skip_loop
790bc3d5698SJohn Baldwin	jmp	.L015loop
791bc3d5698SJohn Baldwin.align	32
792bc3d5698SJohn Baldwin.L015loop:
793bc3d5698SJohn Baldwin	movdqa	-144(%edx),%xmm7
794bc3d5698SJohn Baldwin	movdqa	%xmm6,16(%eax)
795bc3d5698SJohn Baldwin	movdqa	%xmm2,32(%eax)
796bc3d5698SJohn Baldwin	movdqa	%xmm3,48(%eax)
797bc3d5698SJohn Baldwin	movdqa	%xmm4,64(%eax)
798bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm1
799bc3d5698SJohn Baldwin	pmuludq	%xmm7,%xmm5
800bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm0
801bc3d5698SJohn Baldwin	pmuludq	%xmm7,%xmm6
802bc3d5698SJohn Baldwin	pmuludq	%xmm7,%xmm2
803bc3d5698SJohn Baldwin	pmuludq	%xmm7,%xmm3
804bc3d5698SJohn Baldwin	pmuludq	%xmm7,%xmm4
805bc3d5698SJohn Baldwin	pmuludq	-16(%edx),%xmm0
806bc3d5698SJohn Baldwin	movdqa	%xmm1,%xmm7
807bc3d5698SJohn Baldwin	pmuludq	-128(%edx),%xmm1
808bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm0
809bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm5
810bc3d5698SJohn Baldwin	pmuludq	-112(%edx),%xmm7
811bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm1
812bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm6
813bc3d5698SJohn Baldwin	pmuludq	-96(%edx),%xmm5
814bc3d5698SJohn Baldwin	paddq	%xmm7,%xmm2
815bc3d5698SJohn Baldwin	movdqa	16(%eax),%xmm7
816bc3d5698SJohn Baldwin	pmuludq	-80(%edx),%xmm6
817bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm3
818bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm5
819bc3d5698SJohn Baldwin	pmuludq	-128(%edx),%xmm7
820bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm4
821bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm6
822bc3d5698SJohn Baldwin	pmuludq	-112(%edx),%xmm5
823bc3d5698SJohn Baldwin	paddq	%xmm7,%xmm2
824bc3d5698SJohn Baldwin	movdqa	32(%eax),%xmm7
825bc3d5698SJohn Baldwin	pmuludq	-96(%edx),%xmm6
826bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm3
827bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm5
828bc3d5698SJohn Baldwin	pmuludq	-32(%edx),%xmm7
829bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm4
830bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm6
831bc3d5698SJohn Baldwin	pmuludq	-16(%edx),%xmm5
832bc3d5698SJohn Baldwin	paddq	%xmm7,%xmm0
833bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm7
834bc3d5698SJohn Baldwin	pmuludq	-128(%edx),%xmm6
835bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm1
836bc3d5698SJohn Baldwin	movdqa	48(%eax),%xmm5
837bc3d5698SJohn Baldwin	pmuludq	-112(%edx),%xmm7
838bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm3
839bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm6
840bc3d5698SJohn Baldwin	pmuludq	-48(%edx),%xmm5
841bc3d5698SJohn Baldwin	paddq	%xmm7,%xmm4
842bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm7
843bc3d5698SJohn Baldwin	pmuludq	-32(%edx),%xmm6
844bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm0
845bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm5
846bc3d5698SJohn Baldwin	pmuludq	-16(%edx),%xmm7
847bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm1
848bc3d5698SJohn Baldwin	movdqa	64(%eax),%xmm6
849bc3d5698SJohn Baldwin	pmuludq	-128(%edx),%xmm5
850bc3d5698SJohn Baldwin	paddq	%xmm7,%xmm2
851bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm7
852bc3d5698SJohn Baldwin	pmuludq	-16(%edx),%xmm6
853bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm4
854bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm5
855bc3d5698SJohn Baldwin	pmuludq	-64(%edx),%xmm7
856bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm3
857bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm6
858bc3d5698SJohn Baldwin	pmuludq	-48(%edx),%xmm5
859bc3d5698SJohn Baldwin	paddq	%xmm7,%xmm0
860bc3d5698SJohn Baldwin	movdqa	64(%ebx),%xmm7
861bc3d5698SJohn Baldwin	pmuludq	-32(%edx),%xmm6
862bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm1
863bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm2
864bc3d5698SJohn Baldwin	movdqu	-32(%esi),%xmm5
865bc3d5698SJohn Baldwin	movdqu	-16(%esi),%xmm6
866bc3d5698SJohn Baldwin	leal	32(%esi),%esi
867bc3d5698SJohn Baldwin	movdqa	%xmm2,32(%esp)
868bc3d5698SJohn Baldwin	movdqa	%xmm3,48(%esp)
869bc3d5698SJohn Baldwin	movdqa	%xmm4,64(%esp)
870bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm2
871bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm3
872bc3d5698SJohn Baldwin	psrldq	$6,%xmm2
873bc3d5698SJohn Baldwin	psrldq	$6,%xmm3
874bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm4
875bc3d5698SJohn Baldwin	punpcklqdq	%xmm3,%xmm2
876bc3d5698SJohn Baldwin	punpckhqdq	%xmm6,%xmm4
877bc3d5698SJohn Baldwin	punpcklqdq	%xmm6,%xmm5
878bc3d5698SJohn Baldwin	movdqa	%xmm2,%xmm3
879bc3d5698SJohn Baldwin	psrlq	$4,%xmm2
880bc3d5698SJohn Baldwin	psrlq	$30,%xmm3
881bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm6
882bc3d5698SJohn Baldwin	psrlq	$40,%xmm4
883bc3d5698SJohn Baldwin	psrlq	$26,%xmm6
884bc3d5698SJohn Baldwin	pand	%xmm7,%xmm5
885bc3d5698SJohn Baldwin	pand	%xmm7,%xmm6
886bc3d5698SJohn Baldwin	pand	%xmm7,%xmm2
887bc3d5698SJohn Baldwin	pand	%xmm7,%xmm3
888bc3d5698SJohn Baldwin	por	(%ebx),%xmm4
889bc3d5698SJohn Baldwin	leal	-32(%esi),%eax
890bc3d5698SJohn Baldwin	subl	$64,%ecx
891bc3d5698SJohn Baldwin	paddd	80(%esp),%xmm5
892bc3d5698SJohn Baldwin	paddd	96(%esp),%xmm6
893bc3d5698SJohn Baldwin	paddd	112(%esp),%xmm2
894bc3d5698SJohn Baldwin	paddd	128(%esp),%xmm3
895bc3d5698SJohn Baldwin	paddd	144(%esp),%xmm4
896bc3d5698SJohn Baldwin	cmovbl	%eax,%esi
897bc3d5698SJohn Baldwin	leal	160(%esp),%eax
898bc3d5698SJohn Baldwin	movdqa	(%edx),%xmm7
899bc3d5698SJohn Baldwin	movdqa	%xmm1,16(%esp)
900bc3d5698SJohn Baldwin	movdqa	%xmm6,16(%eax)
901bc3d5698SJohn Baldwin	movdqa	%xmm2,32(%eax)
902bc3d5698SJohn Baldwin	movdqa	%xmm3,48(%eax)
903bc3d5698SJohn Baldwin	movdqa	%xmm4,64(%eax)
904bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm1
905bc3d5698SJohn Baldwin	pmuludq	%xmm7,%xmm5
906bc3d5698SJohn Baldwin	paddq	%xmm0,%xmm5
907bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm0
908bc3d5698SJohn Baldwin	pmuludq	%xmm7,%xmm6
909bc3d5698SJohn Baldwin	pmuludq	%xmm7,%xmm2
910bc3d5698SJohn Baldwin	pmuludq	%xmm7,%xmm3
911bc3d5698SJohn Baldwin	pmuludq	%xmm7,%xmm4
912bc3d5698SJohn Baldwin	paddq	16(%esp),%xmm6
913bc3d5698SJohn Baldwin	paddq	32(%esp),%xmm2
914bc3d5698SJohn Baldwin	paddq	48(%esp),%xmm3
915bc3d5698SJohn Baldwin	paddq	64(%esp),%xmm4
916bc3d5698SJohn Baldwin	pmuludq	128(%edx),%xmm0
917bc3d5698SJohn Baldwin	movdqa	%xmm1,%xmm7
918bc3d5698SJohn Baldwin	pmuludq	16(%edx),%xmm1
919bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm0
920bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm5
921bc3d5698SJohn Baldwin	pmuludq	32(%edx),%xmm7
922bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm1
923bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm6
924bc3d5698SJohn Baldwin	pmuludq	48(%edx),%xmm5
925bc3d5698SJohn Baldwin	paddq	%xmm7,%xmm2
926bc3d5698SJohn Baldwin	movdqa	16(%eax),%xmm7
927bc3d5698SJohn Baldwin	pmuludq	64(%edx),%xmm6
928bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm3
929bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm5
930bc3d5698SJohn Baldwin	pmuludq	16(%edx),%xmm7
931bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm4
932bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm6
933bc3d5698SJohn Baldwin	pmuludq	32(%edx),%xmm5
934bc3d5698SJohn Baldwin	paddq	%xmm7,%xmm2
935bc3d5698SJohn Baldwin	movdqa	32(%eax),%xmm7
936bc3d5698SJohn Baldwin	pmuludq	48(%edx),%xmm6
937bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm3
938bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm5
939bc3d5698SJohn Baldwin	pmuludq	112(%edx),%xmm7
940bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm4
941bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm6
942bc3d5698SJohn Baldwin	pmuludq	128(%edx),%xmm5
943bc3d5698SJohn Baldwin	paddq	%xmm7,%xmm0
944bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm7
945bc3d5698SJohn Baldwin	pmuludq	16(%edx),%xmm6
946bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm1
947bc3d5698SJohn Baldwin	movdqa	48(%eax),%xmm5
948bc3d5698SJohn Baldwin	pmuludq	32(%edx),%xmm7
949bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm3
950bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm6
951bc3d5698SJohn Baldwin	pmuludq	96(%edx),%xmm5
952bc3d5698SJohn Baldwin	paddq	%xmm7,%xmm4
953bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm7
954bc3d5698SJohn Baldwin	pmuludq	112(%edx),%xmm6
955bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm0
956bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm5
957bc3d5698SJohn Baldwin	pmuludq	128(%edx),%xmm7
958bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm1
959bc3d5698SJohn Baldwin	movdqa	64(%eax),%xmm6
960bc3d5698SJohn Baldwin	pmuludq	16(%edx),%xmm5
961bc3d5698SJohn Baldwin	paddq	%xmm7,%xmm2
962bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm7
963bc3d5698SJohn Baldwin	pmuludq	128(%edx),%xmm6
964bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm4
965bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm5
966bc3d5698SJohn Baldwin	pmuludq	80(%edx),%xmm7
967bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm3
968bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm6
969bc3d5698SJohn Baldwin	pmuludq	96(%edx),%xmm5
970bc3d5698SJohn Baldwin	paddq	%xmm7,%xmm0
971bc3d5698SJohn Baldwin	movdqa	64(%ebx),%xmm7
972bc3d5698SJohn Baldwin	pmuludq	112(%edx),%xmm6
973bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm1
974bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm2
975bc3d5698SJohn Baldwin	movdqa	%xmm3,%xmm5
976bc3d5698SJohn Baldwin	pand	%xmm7,%xmm3
977bc3d5698SJohn Baldwin	psrlq	$26,%xmm5
978bc3d5698SJohn Baldwin	paddq	%xmm4,%xmm5
979bc3d5698SJohn Baldwin	movdqa	%xmm0,%xmm6
980bc3d5698SJohn Baldwin	pand	%xmm7,%xmm0
981bc3d5698SJohn Baldwin	psrlq	$26,%xmm6
982bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm4
983bc3d5698SJohn Baldwin	paddq	%xmm1,%xmm6
984bc3d5698SJohn Baldwin	psrlq	$26,%xmm5
985bc3d5698SJohn Baldwin	pand	%xmm7,%xmm4
986bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm1
987bc3d5698SJohn Baldwin	psrlq	$26,%xmm6
988bc3d5698SJohn Baldwin	paddd	%xmm5,%xmm0
989bc3d5698SJohn Baldwin	psllq	$2,%xmm5
990bc3d5698SJohn Baldwin	paddq	%xmm2,%xmm6
991bc3d5698SJohn Baldwin	paddq	%xmm0,%xmm5
992bc3d5698SJohn Baldwin	pand	%xmm7,%xmm1
993bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm2
994bc3d5698SJohn Baldwin	psrlq	$26,%xmm6
995bc3d5698SJohn Baldwin	pand	%xmm7,%xmm2
996bc3d5698SJohn Baldwin	paddd	%xmm3,%xmm6
997bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm0
998bc3d5698SJohn Baldwin	psrlq	$26,%xmm5
999bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm3
1000bc3d5698SJohn Baldwin	psrlq	$26,%xmm6
1001bc3d5698SJohn Baldwin	pand	%xmm7,%xmm0
1002bc3d5698SJohn Baldwin	paddd	%xmm5,%xmm1
1003bc3d5698SJohn Baldwin	pand	%xmm7,%xmm3
1004bc3d5698SJohn Baldwin	paddd	%xmm6,%xmm4
1005bc3d5698SJohn Baldwin	movdqu	32(%esi),%xmm5
1006bc3d5698SJohn Baldwin	movdqu	48(%esi),%xmm6
1007bc3d5698SJohn Baldwin	leal	32(%esi),%esi
1008bc3d5698SJohn Baldwin	movdqa	%xmm2,112(%esp)
1009bc3d5698SJohn Baldwin	movdqa	%xmm3,128(%esp)
1010bc3d5698SJohn Baldwin	movdqa	%xmm4,144(%esp)
1011bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm2
1012bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm3
1013bc3d5698SJohn Baldwin	psrldq	$6,%xmm2
1014bc3d5698SJohn Baldwin	psrldq	$6,%xmm3
1015bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm4
1016bc3d5698SJohn Baldwin	punpcklqdq	%xmm3,%xmm2
1017bc3d5698SJohn Baldwin	punpckhqdq	%xmm6,%xmm4
1018bc3d5698SJohn Baldwin	punpcklqdq	%xmm6,%xmm5
1019bc3d5698SJohn Baldwin	movdqa	%xmm2,%xmm3
1020bc3d5698SJohn Baldwin	psrlq	$4,%xmm2
1021bc3d5698SJohn Baldwin	psrlq	$30,%xmm3
1022bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm6
1023bc3d5698SJohn Baldwin	psrlq	$40,%xmm4
1024bc3d5698SJohn Baldwin	psrlq	$26,%xmm6
1025bc3d5698SJohn Baldwin	pand	%xmm7,%xmm5
1026bc3d5698SJohn Baldwin	pand	%xmm7,%xmm6
1027bc3d5698SJohn Baldwin	pand	%xmm7,%xmm2
1028bc3d5698SJohn Baldwin	pand	%xmm7,%xmm3
1029bc3d5698SJohn Baldwin	por	(%ebx),%xmm4
1030bc3d5698SJohn Baldwin	movdqa	%xmm0,80(%esp)
1031bc3d5698SJohn Baldwin	movdqa	%xmm1,96(%esp)
1032bc3d5698SJohn Baldwin	ja	.L015loop
1033bc3d5698SJohn Baldwin.L014skip_loop:
1034bc3d5698SJohn Baldwin	pshufd	$16,-144(%edx),%xmm7
1035bc3d5698SJohn Baldwin	addl	$32,%ecx
1036bc3d5698SJohn Baldwin	jnz	.L016long_tail
1037bc3d5698SJohn Baldwin	paddd	%xmm0,%xmm5
1038bc3d5698SJohn Baldwin	paddd	%xmm1,%xmm6
1039bc3d5698SJohn Baldwin	paddd	112(%esp),%xmm2
1040bc3d5698SJohn Baldwin	paddd	128(%esp),%xmm3
1041bc3d5698SJohn Baldwin	paddd	144(%esp),%xmm4
1042bc3d5698SJohn Baldwin.L016long_tail:
1043bc3d5698SJohn Baldwin	movdqa	%xmm5,(%eax)
1044bc3d5698SJohn Baldwin	movdqa	%xmm6,16(%eax)
1045bc3d5698SJohn Baldwin	movdqa	%xmm2,32(%eax)
1046bc3d5698SJohn Baldwin	movdqa	%xmm3,48(%eax)
1047bc3d5698SJohn Baldwin	movdqa	%xmm4,64(%eax)
1048bc3d5698SJohn Baldwin	pmuludq	%xmm7,%xmm5
1049bc3d5698SJohn Baldwin	pmuludq	%xmm7,%xmm6
1050bc3d5698SJohn Baldwin	pmuludq	%xmm7,%xmm2
1051bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm0
1052bc3d5698SJohn Baldwin	pshufd	$16,-128(%edx),%xmm5
1053bc3d5698SJohn Baldwin	pmuludq	%xmm7,%xmm3
1054bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm1
1055bc3d5698SJohn Baldwin	pmuludq	%xmm7,%xmm4
1056bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm6
1057bc3d5698SJohn Baldwin	pmuludq	48(%eax),%xmm5
1058bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm7
1059bc3d5698SJohn Baldwin	pmuludq	32(%eax),%xmm6
1060bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm4
1061bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm5
1062bc3d5698SJohn Baldwin	pmuludq	16(%eax),%xmm7
1063bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm3
1064bc3d5698SJohn Baldwin	pshufd	$16,-64(%edx),%xmm6
1065bc3d5698SJohn Baldwin	pmuludq	(%eax),%xmm5
1066bc3d5698SJohn Baldwin	paddq	%xmm7,%xmm2
1067bc3d5698SJohn Baldwin	pmuludq	64(%eax),%xmm6
1068bc3d5698SJohn Baldwin	pshufd	$16,-112(%edx),%xmm7
1069bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm1
1070bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm5
1071bc3d5698SJohn Baldwin	pmuludq	32(%eax),%xmm7
1072bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm0
1073bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm6
1074bc3d5698SJohn Baldwin	pmuludq	16(%eax),%xmm5
1075bc3d5698SJohn Baldwin	paddq	%xmm7,%xmm4
1076bc3d5698SJohn Baldwin	pshufd	$16,-48(%edx),%xmm7
1077bc3d5698SJohn Baldwin	pmuludq	(%eax),%xmm6
1078bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm3
1079bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm5
1080bc3d5698SJohn Baldwin	pmuludq	64(%eax),%xmm7
1081bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm2
1082bc3d5698SJohn Baldwin	pmuludq	48(%eax),%xmm5
1083bc3d5698SJohn Baldwin	pshufd	$16,-96(%edx),%xmm6
1084bc3d5698SJohn Baldwin	paddq	%xmm7,%xmm1
1085bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm7
1086bc3d5698SJohn Baldwin	pmuludq	16(%eax),%xmm6
1087bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm0
1088bc3d5698SJohn Baldwin	pshufd	$16,-32(%edx),%xmm5
1089bc3d5698SJohn Baldwin	pmuludq	(%eax),%xmm7
1090bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm4
1091bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm6
1092bc3d5698SJohn Baldwin	pmuludq	64(%eax),%xmm5
1093bc3d5698SJohn Baldwin	paddq	%xmm7,%xmm3
1094bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm7
1095bc3d5698SJohn Baldwin	pmuludq	48(%eax),%xmm6
1096bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm2
1097bc3d5698SJohn Baldwin	pmuludq	32(%eax),%xmm7
1098bc3d5698SJohn Baldwin	pshufd	$16,-80(%edx),%xmm5
1099bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm1
1100bc3d5698SJohn Baldwin	pshufd	$16,-16(%edx),%xmm6
1101bc3d5698SJohn Baldwin	pmuludq	(%eax),%xmm5
1102bc3d5698SJohn Baldwin	paddq	%xmm7,%xmm0
1103bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm7
1104bc3d5698SJohn Baldwin	pmuludq	64(%eax),%xmm6
1105bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm4
1106bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm5
1107bc3d5698SJohn Baldwin	pmuludq	16(%eax),%xmm7
1108bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm3
1109bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm6
1110bc3d5698SJohn Baldwin	pmuludq	32(%eax),%xmm5
1111bc3d5698SJohn Baldwin	paddq	%xmm7,%xmm0
1112bc3d5698SJohn Baldwin	pmuludq	48(%eax),%xmm6
1113bc3d5698SJohn Baldwin	movdqa	64(%ebx),%xmm7
1114bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm1
1115bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm2
1116bc3d5698SJohn Baldwin	jz	.L017short_tail
1117bc3d5698SJohn Baldwin	movdqu	-32(%esi),%xmm5
1118bc3d5698SJohn Baldwin	movdqu	-16(%esi),%xmm6
1119bc3d5698SJohn Baldwin	leal	32(%esi),%esi
1120bc3d5698SJohn Baldwin	movdqa	%xmm2,32(%esp)
1121bc3d5698SJohn Baldwin	movdqa	%xmm3,48(%esp)
1122bc3d5698SJohn Baldwin	movdqa	%xmm4,64(%esp)
1123bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm2
1124bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm3
1125bc3d5698SJohn Baldwin	psrldq	$6,%xmm2
1126bc3d5698SJohn Baldwin	psrldq	$6,%xmm3
1127bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm4
1128bc3d5698SJohn Baldwin	punpcklqdq	%xmm3,%xmm2
1129bc3d5698SJohn Baldwin	punpckhqdq	%xmm6,%xmm4
1130bc3d5698SJohn Baldwin	punpcklqdq	%xmm6,%xmm5
1131bc3d5698SJohn Baldwin	movdqa	%xmm2,%xmm3
1132bc3d5698SJohn Baldwin	psrlq	$4,%xmm2
1133bc3d5698SJohn Baldwin	psrlq	$30,%xmm3
1134bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm6
1135bc3d5698SJohn Baldwin	psrlq	$40,%xmm4
1136bc3d5698SJohn Baldwin	psrlq	$26,%xmm6
1137bc3d5698SJohn Baldwin	pand	%xmm7,%xmm5
1138bc3d5698SJohn Baldwin	pand	%xmm7,%xmm6
1139bc3d5698SJohn Baldwin	pand	%xmm7,%xmm2
1140bc3d5698SJohn Baldwin	pand	%xmm7,%xmm3
1141bc3d5698SJohn Baldwin	por	(%ebx),%xmm4
1142bc3d5698SJohn Baldwin	pshufd	$16,(%edx),%xmm7
1143bc3d5698SJohn Baldwin	paddd	80(%esp),%xmm5
1144bc3d5698SJohn Baldwin	paddd	96(%esp),%xmm6
1145bc3d5698SJohn Baldwin	paddd	112(%esp),%xmm2
1146bc3d5698SJohn Baldwin	paddd	128(%esp),%xmm3
1147bc3d5698SJohn Baldwin	paddd	144(%esp),%xmm4
1148bc3d5698SJohn Baldwin	movdqa	%xmm5,(%esp)
1149bc3d5698SJohn Baldwin	pmuludq	%xmm7,%xmm5
1150bc3d5698SJohn Baldwin	movdqa	%xmm6,16(%esp)
1151bc3d5698SJohn Baldwin	pmuludq	%xmm7,%xmm6
1152bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm0
1153bc3d5698SJohn Baldwin	movdqa	%xmm2,%xmm5
1154bc3d5698SJohn Baldwin	pmuludq	%xmm7,%xmm2
1155bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm1
1156bc3d5698SJohn Baldwin	movdqa	%xmm3,%xmm6
1157bc3d5698SJohn Baldwin	pmuludq	%xmm7,%xmm3
1158bc3d5698SJohn Baldwin	paddq	32(%esp),%xmm2
1159bc3d5698SJohn Baldwin	movdqa	%xmm5,32(%esp)
1160bc3d5698SJohn Baldwin	pshufd	$16,16(%edx),%xmm5
1161bc3d5698SJohn Baldwin	paddq	48(%esp),%xmm3
1162bc3d5698SJohn Baldwin	movdqa	%xmm6,48(%esp)
1163bc3d5698SJohn Baldwin	movdqa	%xmm4,%xmm6
1164bc3d5698SJohn Baldwin	pmuludq	%xmm7,%xmm4
1165bc3d5698SJohn Baldwin	paddq	64(%esp),%xmm4
1166bc3d5698SJohn Baldwin	movdqa	%xmm6,64(%esp)
1167bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm6
1168bc3d5698SJohn Baldwin	pmuludq	48(%esp),%xmm5
1169bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm7
1170bc3d5698SJohn Baldwin	pmuludq	32(%esp),%xmm6
1171bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm4
1172bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm5
1173bc3d5698SJohn Baldwin	pmuludq	16(%esp),%xmm7
1174bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm3
1175bc3d5698SJohn Baldwin	pshufd	$16,80(%edx),%xmm6
1176bc3d5698SJohn Baldwin	pmuludq	(%esp),%xmm5
1177bc3d5698SJohn Baldwin	paddq	%xmm7,%xmm2
1178bc3d5698SJohn Baldwin	pmuludq	64(%esp),%xmm6
1179bc3d5698SJohn Baldwin	pshufd	$16,32(%edx),%xmm7
1180bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm1
1181bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm5
1182bc3d5698SJohn Baldwin	pmuludq	32(%esp),%xmm7
1183bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm0
1184bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm6
1185bc3d5698SJohn Baldwin	pmuludq	16(%esp),%xmm5
1186bc3d5698SJohn Baldwin	paddq	%xmm7,%xmm4
1187bc3d5698SJohn Baldwin	pshufd	$16,96(%edx),%xmm7
1188bc3d5698SJohn Baldwin	pmuludq	(%esp),%xmm6
1189bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm3
1190bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm5
1191bc3d5698SJohn Baldwin	pmuludq	64(%esp),%xmm7
1192bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm2
1193bc3d5698SJohn Baldwin	pmuludq	48(%esp),%xmm5
1194bc3d5698SJohn Baldwin	pshufd	$16,48(%edx),%xmm6
1195bc3d5698SJohn Baldwin	paddq	%xmm7,%xmm1
1196bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm7
1197bc3d5698SJohn Baldwin	pmuludq	16(%esp),%xmm6
1198bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm0
1199bc3d5698SJohn Baldwin	pshufd	$16,112(%edx),%xmm5
1200bc3d5698SJohn Baldwin	pmuludq	(%esp),%xmm7
1201bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm4
1202bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm6
1203bc3d5698SJohn Baldwin	pmuludq	64(%esp),%xmm5
1204bc3d5698SJohn Baldwin	paddq	%xmm7,%xmm3
1205bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm7
1206bc3d5698SJohn Baldwin	pmuludq	48(%esp),%xmm6
1207bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm2
1208bc3d5698SJohn Baldwin	pmuludq	32(%esp),%xmm7
1209bc3d5698SJohn Baldwin	pshufd	$16,64(%edx),%xmm5
1210bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm1
1211bc3d5698SJohn Baldwin	pshufd	$16,128(%edx),%xmm6
1212bc3d5698SJohn Baldwin	pmuludq	(%esp),%xmm5
1213bc3d5698SJohn Baldwin	paddq	%xmm7,%xmm0
1214bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm7
1215bc3d5698SJohn Baldwin	pmuludq	64(%esp),%xmm6
1216bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm4
1217bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm5
1218bc3d5698SJohn Baldwin	pmuludq	16(%esp),%xmm7
1219bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm3
1220bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm6
1221bc3d5698SJohn Baldwin	pmuludq	32(%esp),%xmm5
1222bc3d5698SJohn Baldwin	paddq	%xmm7,%xmm0
1223bc3d5698SJohn Baldwin	pmuludq	48(%esp),%xmm6
1224bc3d5698SJohn Baldwin	movdqa	64(%ebx),%xmm7
1225bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm1
1226bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm2
1227bc3d5698SJohn Baldwin.L017short_tail:
1228bc3d5698SJohn Baldwin	pshufd	$78,%xmm4,%xmm6
1229bc3d5698SJohn Baldwin	pshufd	$78,%xmm3,%xmm5
1230bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm4
1231bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm3
1232bc3d5698SJohn Baldwin	pshufd	$78,%xmm0,%xmm6
1233bc3d5698SJohn Baldwin	pshufd	$78,%xmm1,%xmm5
1234bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm0
1235bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm1
1236bc3d5698SJohn Baldwin	pshufd	$78,%xmm2,%xmm6
1237bc3d5698SJohn Baldwin	movdqa	%xmm3,%xmm5
1238bc3d5698SJohn Baldwin	pand	%xmm7,%xmm3
1239bc3d5698SJohn Baldwin	psrlq	$26,%xmm5
1240bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm2
1241bc3d5698SJohn Baldwin	paddq	%xmm4,%xmm5
1242bc3d5698SJohn Baldwin	movdqa	%xmm0,%xmm6
1243bc3d5698SJohn Baldwin	pand	%xmm7,%xmm0
1244bc3d5698SJohn Baldwin	psrlq	$26,%xmm6
1245bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm4
1246bc3d5698SJohn Baldwin	paddq	%xmm1,%xmm6
1247bc3d5698SJohn Baldwin	psrlq	$26,%xmm5
1248bc3d5698SJohn Baldwin	pand	%xmm7,%xmm4
1249bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm1
1250bc3d5698SJohn Baldwin	psrlq	$26,%xmm6
1251bc3d5698SJohn Baldwin	paddd	%xmm5,%xmm0
1252bc3d5698SJohn Baldwin	psllq	$2,%xmm5
1253bc3d5698SJohn Baldwin	paddq	%xmm2,%xmm6
1254bc3d5698SJohn Baldwin	paddq	%xmm0,%xmm5
1255bc3d5698SJohn Baldwin	pand	%xmm7,%xmm1
1256bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm2
1257bc3d5698SJohn Baldwin	psrlq	$26,%xmm6
1258bc3d5698SJohn Baldwin	pand	%xmm7,%xmm2
1259bc3d5698SJohn Baldwin	paddd	%xmm3,%xmm6
1260bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm0
1261bc3d5698SJohn Baldwin	psrlq	$26,%xmm5
1262bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm3
1263bc3d5698SJohn Baldwin	psrlq	$26,%xmm6
1264bc3d5698SJohn Baldwin	pand	%xmm7,%xmm0
1265bc3d5698SJohn Baldwin	paddd	%xmm5,%xmm1
1266bc3d5698SJohn Baldwin	pand	%xmm7,%xmm3
1267bc3d5698SJohn Baldwin	paddd	%xmm6,%xmm4
1268bc3d5698SJohn Baldwin.L013done:
1269bc3d5698SJohn Baldwin	movd	%xmm0,-48(%edi)
1270bc3d5698SJohn Baldwin	movd	%xmm1,-44(%edi)
1271bc3d5698SJohn Baldwin	movd	%xmm2,-40(%edi)
1272bc3d5698SJohn Baldwin	movd	%xmm3,-36(%edi)
1273bc3d5698SJohn Baldwin	movd	%xmm4,-32(%edi)
1274bc3d5698SJohn Baldwin	movl	%ebp,%esp
1275bc3d5698SJohn Baldwin.L007nodata:
1276bc3d5698SJohn Baldwin	popl	%edi
1277bc3d5698SJohn Baldwin	popl	%esi
1278bc3d5698SJohn Baldwin	popl	%ebx
1279bc3d5698SJohn Baldwin	popl	%ebp
1280bc3d5698SJohn Baldwin	ret
1281bc3d5698SJohn Baldwin.size	_poly1305_blocks_sse2,.-_poly1305_blocks_sse2
1282bc3d5698SJohn Baldwin.align	32
1283bc3d5698SJohn Baldwin.type	_poly1305_emit_sse2,@function
1284bc3d5698SJohn Baldwin.align	16
1285bc3d5698SJohn Baldwin_poly1305_emit_sse2:
1286*c0855eaaSJohn Baldwin	#ifdef __CET__
1287*c0855eaaSJohn Baldwin
1288*c0855eaaSJohn Baldwin.byte	243,15,30,251
1289*c0855eaaSJohn Baldwin	#endif
1290*c0855eaaSJohn Baldwin
1291bc3d5698SJohn Baldwin	pushl	%ebp
1292bc3d5698SJohn Baldwin	pushl	%ebx
1293bc3d5698SJohn Baldwin	pushl	%esi
1294bc3d5698SJohn Baldwin	pushl	%edi
1295bc3d5698SJohn Baldwin	movl	20(%esp),%ebp
1296bc3d5698SJohn Baldwin	cmpl	$0,20(%ebp)
1297bc3d5698SJohn Baldwin	je	.Lenter_emit
1298bc3d5698SJohn Baldwin	movl	(%ebp),%eax
1299bc3d5698SJohn Baldwin	movl	4(%ebp),%edi
1300bc3d5698SJohn Baldwin	movl	8(%ebp),%ecx
1301bc3d5698SJohn Baldwin	movl	12(%ebp),%edx
1302bc3d5698SJohn Baldwin	movl	16(%ebp),%esi
1303bc3d5698SJohn Baldwin	movl	%edi,%ebx
1304bc3d5698SJohn Baldwin	shll	$26,%edi
1305bc3d5698SJohn Baldwin	shrl	$6,%ebx
1306bc3d5698SJohn Baldwin	addl	%edi,%eax
1307bc3d5698SJohn Baldwin	movl	%ecx,%edi
1308bc3d5698SJohn Baldwin	adcl	$0,%ebx
1309bc3d5698SJohn Baldwin	shll	$20,%edi
1310bc3d5698SJohn Baldwin	shrl	$12,%ecx
1311bc3d5698SJohn Baldwin	addl	%edi,%ebx
1312bc3d5698SJohn Baldwin	movl	%edx,%edi
1313bc3d5698SJohn Baldwin	adcl	$0,%ecx
1314bc3d5698SJohn Baldwin	shll	$14,%edi
1315bc3d5698SJohn Baldwin	shrl	$18,%edx
1316bc3d5698SJohn Baldwin	addl	%edi,%ecx
1317bc3d5698SJohn Baldwin	movl	%esi,%edi
1318bc3d5698SJohn Baldwin	adcl	$0,%edx
1319bc3d5698SJohn Baldwin	shll	$8,%edi
1320bc3d5698SJohn Baldwin	shrl	$24,%esi
1321bc3d5698SJohn Baldwin	addl	%edi,%edx
1322bc3d5698SJohn Baldwin	adcl	$0,%esi
1323bc3d5698SJohn Baldwin	movl	%esi,%edi
1324bc3d5698SJohn Baldwin	andl	$3,%esi
1325bc3d5698SJohn Baldwin	shrl	$2,%edi
1326bc3d5698SJohn Baldwin	leal	(%edi,%edi,4),%ebp
1327bc3d5698SJohn Baldwin	movl	24(%esp),%edi
1328bc3d5698SJohn Baldwin	addl	%ebp,%eax
1329bc3d5698SJohn Baldwin	movl	28(%esp),%ebp
1330bc3d5698SJohn Baldwin	adcl	$0,%ebx
1331bc3d5698SJohn Baldwin	adcl	$0,%ecx
1332bc3d5698SJohn Baldwin	adcl	$0,%edx
1333bc3d5698SJohn Baldwin	adcl	$0,%esi
1334bc3d5698SJohn Baldwin	movd	%eax,%xmm0
1335bc3d5698SJohn Baldwin	addl	$5,%eax
1336bc3d5698SJohn Baldwin	movd	%ebx,%xmm1
1337bc3d5698SJohn Baldwin	adcl	$0,%ebx
1338bc3d5698SJohn Baldwin	movd	%ecx,%xmm2
1339bc3d5698SJohn Baldwin	adcl	$0,%ecx
1340bc3d5698SJohn Baldwin	movd	%edx,%xmm3
1341bc3d5698SJohn Baldwin	adcl	$0,%edx
1342bc3d5698SJohn Baldwin	adcl	$0,%esi
1343bc3d5698SJohn Baldwin	shrl	$2,%esi
1344bc3d5698SJohn Baldwin	negl	%esi
1345bc3d5698SJohn Baldwin	andl	%esi,%eax
1346bc3d5698SJohn Baldwin	andl	%esi,%ebx
1347bc3d5698SJohn Baldwin	andl	%esi,%ecx
1348bc3d5698SJohn Baldwin	andl	%esi,%edx
1349bc3d5698SJohn Baldwin	movl	%eax,(%edi)
1350bc3d5698SJohn Baldwin	movd	%xmm0,%eax
1351bc3d5698SJohn Baldwin	movl	%ebx,4(%edi)
1352bc3d5698SJohn Baldwin	movd	%xmm1,%ebx
1353bc3d5698SJohn Baldwin	movl	%ecx,8(%edi)
1354bc3d5698SJohn Baldwin	movd	%xmm2,%ecx
1355bc3d5698SJohn Baldwin	movl	%edx,12(%edi)
1356bc3d5698SJohn Baldwin	movd	%xmm3,%edx
1357bc3d5698SJohn Baldwin	notl	%esi
1358bc3d5698SJohn Baldwin	andl	%esi,%eax
1359bc3d5698SJohn Baldwin	andl	%esi,%ebx
1360bc3d5698SJohn Baldwin	orl	(%edi),%eax
1361bc3d5698SJohn Baldwin	andl	%esi,%ecx
1362bc3d5698SJohn Baldwin	orl	4(%edi),%ebx
1363bc3d5698SJohn Baldwin	andl	%esi,%edx
1364bc3d5698SJohn Baldwin	orl	8(%edi),%ecx
1365bc3d5698SJohn Baldwin	orl	12(%edi),%edx
1366bc3d5698SJohn Baldwin	addl	(%ebp),%eax
1367bc3d5698SJohn Baldwin	adcl	4(%ebp),%ebx
1368bc3d5698SJohn Baldwin	movl	%eax,(%edi)
1369bc3d5698SJohn Baldwin	adcl	8(%ebp),%ecx
1370bc3d5698SJohn Baldwin	movl	%ebx,4(%edi)
1371bc3d5698SJohn Baldwin	adcl	12(%ebp),%edx
1372bc3d5698SJohn Baldwin	movl	%ecx,8(%edi)
1373bc3d5698SJohn Baldwin	movl	%edx,12(%edi)
1374bc3d5698SJohn Baldwin	popl	%edi
1375bc3d5698SJohn Baldwin	popl	%esi
1376bc3d5698SJohn Baldwin	popl	%ebx
1377bc3d5698SJohn Baldwin	popl	%ebp
1378bc3d5698SJohn Baldwin	ret
1379bc3d5698SJohn Baldwin.size	_poly1305_emit_sse2,.-_poly1305_emit_sse2
1380bc3d5698SJohn Baldwin.align	32
1381bc3d5698SJohn Baldwin.type	_poly1305_init_avx2,@function
1382bc3d5698SJohn Baldwin.align	16
1383bc3d5698SJohn Baldwin_poly1305_init_avx2:
1384*c0855eaaSJohn Baldwin	#ifdef __CET__
1385*c0855eaaSJohn Baldwin
1386*c0855eaaSJohn Baldwin.byte	243,15,30,251
1387*c0855eaaSJohn Baldwin	#endif
1388*c0855eaaSJohn Baldwin
1389bc3d5698SJohn Baldwin	vmovdqu	24(%edi),%xmm4
1390bc3d5698SJohn Baldwin	leal	48(%edi),%edi
1391bc3d5698SJohn Baldwin	movl	%esp,%ebp
1392bc3d5698SJohn Baldwin	subl	$224,%esp
1393bc3d5698SJohn Baldwin	andl	$-16,%esp
1394bc3d5698SJohn Baldwin	vmovdqa	64(%ebx),%xmm7
1395bc3d5698SJohn Baldwin	vpand	%xmm7,%xmm4,%xmm0
1396bc3d5698SJohn Baldwin	vpsrlq	$26,%xmm4,%xmm1
1397bc3d5698SJohn Baldwin	vpsrldq	$6,%xmm4,%xmm3
1398bc3d5698SJohn Baldwin	vpand	%xmm7,%xmm1,%xmm1
1399bc3d5698SJohn Baldwin	vpsrlq	$4,%xmm3,%xmm2
1400bc3d5698SJohn Baldwin	vpsrlq	$30,%xmm3,%xmm3
1401bc3d5698SJohn Baldwin	vpand	%xmm7,%xmm2,%xmm2
1402bc3d5698SJohn Baldwin	vpand	%xmm7,%xmm3,%xmm3
1403bc3d5698SJohn Baldwin	vpsrldq	$13,%xmm4,%xmm4
1404bc3d5698SJohn Baldwin	leal	144(%esp),%edx
1405bc3d5698SJohn Baldwin	movl	$2,%ecx
1406bc3d5698SJohn Baldwin.L018square:
1407bc3d5698SJohn Baldwin	vmovdqa	%xmm0,(%esp)
1408bc3d5698SJohn Baldwin	vmovdqa	%xmm1,16(%esp)
1409bc3d5698SJohn Baldwin	vmovdqa	%xmm2,32(%esp)
1410bc3d5698SJohn Baldwin	vmovdqa	%xmm3,48(%esp)
1411bc3d5698SJohn Baldwin	vmovdqa	%xmm4,64(%esp)
1412bc3d5698SJohn Baldwin	vpslld	$2,%xmm1,%xmm6
1413bc3d5698SJohn Baldwin	vpslld	$2,%xmm2,%xmm5
1414bc3d5698SJohn Baldwin	vpaddd	%xmm1,%xmm6,%xmm6
1415bc3d5698SJohn Baldwin	vpaddd	%xmm2,%xmm5,%xmm5
1416bc3d5698SJohn Baldwin	vmovdqa	%xmm6,80(%esp)
1417bc3d5698SJohn Baldwin	vmovdqa	%xmm5,96(%esp)
1418bc3d5698SJohn Baldwin	vpslld	$2,%xmm3,%xmm6
1419bc3d5698SJohn Baldwin	vpslld	$2,%xmm4,%xmm5
1420bc3d5698SJohn Baldwin	vpaddd	%xmm3,%xmm6,%xmm6
1421bc3d5698SJohn Baldwin	vpaddd	%xmm4,%xmm5,%xmm5
1422bc3d5698SJohn Baldwin	vmovdqa	%xmm6,112(%esp)
1423bc3d5698SJohn Baldwin	vmovdqa	%xmm5,128(%esp)
1424bc3d5698SJohn Baldwin	vpshufd	$68,%xmm0,%xmm5
1425bc3d5698SJohn Baldwin	vmovdqa	%xmm1,%xmm6
1426bc3d5698SJohn Baldwin	vpshufd	$68,%xmm1,%xmm1
1427bc3d5698SJohn Baldwin	vpshufd	$68,%xmm2,%xmm2
1428bc3d5698SJohn Baldwin	vpshufd	$68,%xmm3,%xmm3
1429bc3d5698SJohn Baldwin	vpshufd	$68,%xmm4,%xmm4
1430bc3d5698SJohn Baldwin	vmovdqa	%xmm5,(%edx)
1431bc3d5698SJohn Baldwin	vmovdqa	%xmm1,16(%edx)
1432bc3d5698SJohn Baldwin	vmovdqa	%xmm2,32(%edx)
1433bc3d5698SJohn Baldwin	vmovdqa	%xmm3,48(%edx)
1434bc3d5698SJohn Baldwin	vmovdqa	%xmm4,64(%edx)
1435bc3d5698SJohn Baldwin	vpmuludq	%xmm0,%xmm4,%xmm4
1436bc3d5698SJohn Baldwin	vpmuludq	%xmm0,%xmm3,%xmm3
1437bc3d5698SJohn Baldwin	vpmuludq	%xmm0,%xmm2,%xmm2
1438bc3d5698SJohn Baldwin	vpmuludq	%xmm0,%xmm1,%xmm1
1439bc3d5698SJohn Baldwin	vpmuludq	%xmm0,%xmm5,%xmm0
1440bc3d5698SJohn Baldwin	vpmuludq	48(%edx),%xmm6,%xmm5
1441bc3d5698SJohn Baldwin	vpaddq	%xmm5,%xmm4,%xmm4
1442bc3d5698SJohn Baldwin	vpmuludq	32(%edx),%xmm6,%xmm7
1443bc3d5698SJohn Baldwin	vpaddq	%xmm7,%xmm3,%xmm3
1444bc3d5698SJohn Baldwin	vpmuludq	16(%edx),%xmm6,%xmm5
1445bc3d5698SJohn Baldwin	vpaddq	%xmm5,%xmm2,%xmm2
1446bc3d5698SJohn Baldwin	vmovdqa	80(%esp),%xmm7
1447bc3d5698SJohn Baldwin	vpmuludq	(%edx),%xmm6,%xmm6
1448bc3d5698SJohn Baldwin	vpaddq	%xmm6,%xmm1,%xmm1
1449bc3d5698SJohn Baldwin	vmovdqa	32(%esp),%xmm5
1450bc3d5698SJohn Baldwin	vpmuludq	64(%edx),%xmm7,%xmm7
1451bc3d5698SJohn Baldwin	vpaddq	%xmm7,%xmm0,%xmm0
1452bc3d5698SJohn Baldwin	vpmuludq	32(%edx),%xmm5,%xmm6
1453bc3d5698SJohn Baldwin	vpaddq	%xmm6,%xmm4,%xmm4
1454bc3d5698SJohn Baldwin	vpmuludq	16(%edx),%xmm5,%xmm7
1455bc3d5698SJohn Baldwin	vpaddq	%xmm7,%xmm3,%xmm3
1456bc3d5698SJohn Baldwin	vmovdqa	96(%esp),%xmm6
1457bc3d5698SJohn Baldwin	vpmuludq	(%edx),%xmm5,%xmm5
1458bc3d5698SJohn Baldwin	vpaddq	%xmm5,%xmm2,%xmm2
1459bc3d5698SJohn Baldwin	vpmuludq	64(%edx),%xmm6,%xmm7
1460bc3d5698SJohn Baldwin	vpaddq	%xmm7,%xmm1,%xmm1
1461bc3d5698SJohn Baldwin	vmovdqa	48(%esp),%xmm5
1462bc3d5698SJohn Baldwin	vpmuludq	48(%edx),%xmm6,%xmm6
1463bc3d5698SJohn Baldwin	vpaddq	%xmm6,%xmm0,%xmm0
1464bc3d5698SJohn Baldwin	vpmuludq	16(%edx),%xmm5,%xmm7
1465bc3d5698SJohn Baldwin	vpaddq	%xmm7,%xmm4,%xmm4
1466bc3d5698SJohn Baldwin	vmovdqa	112(%esp),%xmm6
1467bc3d5698SJohn Baldwin	vpmuludq	(%edx),%xmm5,%xmm5
1468bc3d5698SJohn Baldwin	vpaddq	%xmm5,%xmm3,%xmm3
1469bc3d5698SJohn Baldwin	vpmuludq	64(%edx),%xmm6,%xmm7
1470bc3d5698SJohn Baldwin	vpaddq	%xmm7,%xmm2,%xmm2
1471bc3d5698SJohn Baldwin	vpmuludq	48(%edx),%xmm6,%xmm5
1472bc3d5698SJohn Baldwin	vpaddq	%xmm5,%xmm1,%xmm1
1473bc3d5698SJohn Baldwin	vmovdqa	64(%esp),%xmm7
1474bc3d5698SJohn Baldwin	vpmuludq	32(%edx),%xmm6,%xmm6
1475bc3d5698SJohn Baldwin	vpaddq	%xmm6,%xmm0,%xmm0
1476bc3d5698SJohn Baldwin	vmovdqa	128(%esp),%xmm5
1477bc3d5698SJohn Baldwin	vpmuludq	(%edx),%xmm7,%xmm7
1478bc3d5698SJohn Baldwin	vpaddq	%xmm7,%xmm4,%xmm4
1479bc3d5698SJohn Baldwin	vpmuludq	64(%edx),%xmm5,%xmm6
1480bc3d5698SJohn Baldwin	vpaddq	%xmm6,%xmm3,%xmm3
1481bc3d5698SJohn Baldwin	vpmuludq	16(%edx),%xmm5,%xmm7
1482bc3d5698SJohn Baldwin	vpaddq	%xmm7,%xmm0,%xmm0
1483bc3d5698SJohn Baldwin	vpmuludq	32(%edx),%xmm5,%xmm6
1484bc3d5698SJohn Baldwin	vpaddq	%xmm6,%xmm1,%xmm1
1485bc3d5698SJohn Baldwin	vmovdqa	64(%ebx),%xmm7
1486bc3d5698SJohn Baldwin	vpmuludq	48(%edx),%xmm5,%xmm5
1487bc3d5698SJohn Baldwin	vpaddq	%xmm5,%xmm2,%xmm2
1488bc3d5698SJohn Baldwin	vpsrlq	$26,%xmm3,%xmm5
1489bc3d5698SJohn Baldwin	vpand	%xmm7,%xmm3,%xmm3
1490bc3d5698SJohn Baldwin	vpsrlq	$26,%xmm0,%xmm6
1491bc3d5698SJohn Baldwin	vpand	%xmm7,%xmm0,%xmm0
1492bc3d5698SJohn Baldwin	vpaddq	%xmm5,%xmm4,%xmm4
1493bc3d5698SJohn Baldwin	vpaddq	%xmm6,%xmm1,%xmm1
1494bc3d5698SJohn Baldwin	vpsrlq	$26,%xmm4,%xmm5
1495bc3d5698SJohn Baldwin	vpand	%xmm7,%xmm4,%xmm4
1496bc3d5698SJohn Baldwin	vpsrlq	$26,%xmm1,%xmm6
1497bc3d5698SJohn Baldwin	vpand	%xmm7,%xmm1,%xmm1
1498bc3d5698SJohn Baldwin	vpaddq	%xmm6,%xmm2,%xmm2
1499bc3d5698SJohn Baldwin	vpaddd	%xmm5,%xmm0,%xmm0
1500bc3d5698SJohn Baldwin	vpsllq	$2,%xmm5,%xmm5
1501bc3d5698SJohn Baldwin	vpsrlq	$26,%xmm2,%xmm6
1502bc3d5698SJohn Baldwin	vpand	%xmm7,%xmm2,%xmm2
1503bc3d5698SJohn Baldwin	vpaddd	%xmm5,%xmm0,%xmm0
1504bc3d5698SJohn Baldwin	vpaddd	%xmm6,%xmm3,%xmm3
1505bc3d5698SJohn Baldwin	vpsrlq	$26,%xmm3,%xmm6
1506bc3d5698SJohn Baldwin	vpsrlq	$26,%xmm0,%xmm5
1507bc3d5698SJohn Baldwin	vpand	%xmm7,%xmm0,%xmm0
1508bc3d5698SJohn Baldwin	vpand	%xmm7,%xmm3,%xmm3
1509bc3d5698SJohn Baldwin	vpaddd	%xmm5,%xmm1,%xmm1
1510bc3d5698SJohn Baldwin	vpaddd	%xmm6,%xmm4,%xmm4
1511bc3d5698SJohn Baldwin	decl	%ecx
1512bc3d5698SJohn Baldwin	jz	.L019square_break
1513bc3d5698SJohn Baldwin	vpunpcklqdq	(%esp),%xmm0,%xmm0
1514bc3d5698SJohn Baldwin	vpunpcklqdq	16(%esp),%xmm1,%xmm1
1515bc3d5698SJohn Baldwin	vpunpcklqdq	32(%esp),%xmm2,%xmm2
1516bc3d5698SJohn Baldwin	vpunpcklqdq	48(%esp),%xmm3,%xmm3
1517bc3d5698SJohn Baldwin	vpunpcklqdq	64(%esp),%xmm4,%xmm4
1518bc3d5698SJohn Baldwin	jmp	.L018square
1519bc3d5698SJohn Baldwin.L019square_break:
1520bc3d5698SJohn Baldwin	vpsllq	$32,%xmm0,%xmm0
1521bc3d5698SJohn Baldwin	vpsllq	$32,%xmm1,%xmm1
1522bc3d5698SJohn Baldwin	vpsllq	$32,%xmm2,%xmm2
1523bc3d5698SJohn Baldwin	vpsllq	$32,%xmm3,%xmm3
1524bc3d5698SJohn Baldwin	vpsllq	$32,%xmm4,%xmm4
1525bc3d5698SJohn Baldwin	vpor	(%esp),%xmm0,%xmm0
1526bc3d5698SJohn Baldwin	vpor	16(%esp),%xmm1,%xmm1
1527bc3d5698SJohn Baldwin	vpor	32(%esp),%xmm2,%xmm2
1528bc3d5698SJohn Baldwin	vpor	48(%esp),%xmm3,%xmm3
1529bc3d5698SJohn Baldwin	vpor	64(%esp),%xmm4,%xmm4
1530bc3d5698SJohn Baldwin	vpshufd	$141,%xmm0,%xmm0
1531bc3d5698SJohn Baldwin	vpshufd	$141,%xmm1,%xmm1
1532bc3d5698SJohn Baldwin	vpshufd	$141,%xmm2,%xmm2
1533bc3d5698SJohn Baldwin	vpshufd	$141,%xmm3,%xmm3
1534bc3d5698SJohn Baldwin	vpshufd	$141,%xmm4,%xmm4
1535bc3d5698SJohn Baldwin	vmovdqu	%xmm0,(%edi)
1536bc3d5698SJohn Baldwin	vmovdqu	%xmm1,16(%edi)
1537bc3d5698SJohn Baldwin	vmovdqu	%xmm2,32(%edi)
1538bc3d5698SJohn Baldwin	vmovdqu	%xmm3,48(%edi)
1539bc3d5698SJohn Baldwin	vmovdqu	%xmm4,64(%edi)
1540bc3d5698SJohn Baldwin	vpslld	$2,%xmm1,%xmm6
1541bc3d5698SJohn Baldwin	vpslld	$2,%xmm2,%xmm5
1542bc3d5698SJohn Baldwin	vpaddd	%xmm1,%xmm6,%xmm6
1543bc3d5698SJohn Baldwin	vpaddd	%xmm2,%xmm5,%xmm5
1544bc3d5698SJohn Baldwin	vmovdqu	%xmm6,80(%edi)
1545bc3d5698SJohn Baldwin	vmovdqu	%xmm5,96(%edi)
1546bc3d5698SJohn Baldwin	vpslld	$2,%xmm3,%xmm6
1547bc3d5698SJohn Baldwin	vpslld	$2,%xmm4,%xmm5
1548bc3d5698SJohn Baldwin	vpaddd	%xmm3,%xmm6,%xmm6
1549bc3d5698SJohn Baldwin	vpaddd	%xmm4,%xmm5,%xmm5
1550bc3d5698SJohn Baldwin	vmovdqu	%xmm6,112(%edi)
1551bc3d5698SJohn Baldwin	vmovdqu	%xmm5,128(%edi)
1552bc3d5698SJohn Baldwin	movl	%ebp,%esp
1553bc3d5698SJohn Baldwin	leal	-48(%edi),%edi
1554bc3d5698SJohn Baldwin	ret
1555bc3d5698SJohn Baldwin.size	_poly1305_init_avx2,.-_poly1305_init_avx2
1556bc3d5698SJohn Baldwin.align	32
1557bc3d5698SJohn Baldwin.type	_poly1305_blocks_avx2,@function
1558bc3d5698SJohn Baldwin.align	16
1559bc3d5698SJohn Baldwin_poly1305_blocks_avx2:
1560*c0855eaaSJohn Baldwin	#ifdef __CET__
1561*c0855eaaSJohn Baldwin
1562*c0855eaaSJohn Baldwin.byte	243,15,30,251
1563*c0855eaaSJohn Baldwin	#endif
1564*c0855eaaSJohn Baldwin
1565bc3d5698SJohn Baldwin	pushl	%ebp
1566bc3d5698SJohn Baldwin	pushl	%ebx
1567bc3d5698SJohn Baldwin	pushl	%esi
1568bc3d5698SJohn Baldwin	pushl	%edi
1569bc3d5698SJohn Baldwin	movl	20(%esp),%edi
1570bc3d5698SJohn Baldwin	movl	24(%esp),%esi
1571bc3d5698SJohn Baldwin	movl	28(%esp),%ecx
1572bc3d5698SJohn Baldwin	movl	20(%edi),%eax
1573bc3d5698SJohn Baldwin	andl	$-16,%ecx
1574bc3d5698SJohn Baldwin	jz	.L020nodata
1575bc3d5698SJohn Baldwin	cmpl	$64,%ecx
1576bc3d5698SJohn Baldwin	jae	.L021enter_avx2
1577bc3d5698SJohn Baldwin	testl	%eax,%eax
1578bc3d5698SJohn Baldwin	jz	.Lenter_blocks
1579bc3d5698SJohn Baldwin.L021enter_avx2:
1580bc3d5698SJohn Baldwin	vzeroupper
1581bc3d5698SJohn Baldwin	call	.L022pic_point
1582bc3d5698SJohn Baldwin.L022pic_point:
1583bc3d5698SJohn Baldwin	popl	%ebx
1584bc3d5698SJohn Baldwin	leal	.Lconst_sse2-.L022pic_point(%ebx),%ebx
1585bc3d5698SJohn Baldwin	testl	%eax,%eax
1586bc3d5698SJohn Baldwin	jnz	.L023base2_26
1587bc3d5698SJohn Baldwin	call	_poly1305_init_avx2
1588bc3d5698SJohn Baldwin	movl	(%edi),%eax
1589bc3d5698SJohn Baldwin	movl	3(%edi),%ecx
1590bc3d5698SJohn Baldwin	movl	6(%edi),%edx
1591bc3d5698SJohn Baldwin	movl	9(%edi),%esi
1592bc3d5698SJohn Baldwin	movl	13(%edi),%ebp
1593bc3d5698SJohn Baldwin	shrl	$2,%ecx
1594bc3d5698SJohn Baldwin	andl	$67108863,%eax
1595bc3d5698SJohn Baldwin	shrl	$4,%edx
1596bc3d5698SJohn Baldwin	andl	$67108863,%ecx
1597bc3d5698SJohn Baldwin	shrl	$6,%esi
1598bc3d5698SJohn Baldwin	andl	$67108863,%edx
1599bc3d5698SJohn Baldwin	movl	%eax,(%edi)
1600bc3d5698SJohn Baldwin	movl	%ecx,4(%edi)
1601bc3d5698SJohn Baldwin	movl	%edx,8(%edi)
1602bc3d5698SJohn Baldwin	movl	%esi,12(%edi)
1603bc3d5698SJohn Baldwin	movl	%ebp,16(%edi)
1604bc3d5698SJohn Baldwin	movl	$1,20(%edi)
1605bc3d5698SJohn Baldwin	movl	24(%esp),%esi
1606bc3d5698SJohn Baldwin	movl	28(%esp),%ecx
1607bc3d5698SJohn Baldwin.L023base2_26:
1608bc3d5698SJohn Baldwin	movl	32(%esp),%eax
1609bc3d5698SJohn Baldwin	movl	%esp,%ebp
1610bc3d5698SJohn Baldwin	subl	$448,%esp
1611bc3d5698SJohn Baldwin	andl	$-512,%esp
1612bc3d5698SJohn Baldwin	vmovdqu	48(%edi),%xmm0
1613bc3d5698SJohn Baldwin	leal	288(%esp),%edx
1614bc3d5698SJohn Baldwin	vmovdqu	64(%edi),%xmm1
1615bc3d5698SJohn Baldwin	vmovdqu	80(%edi),%xmm2
1616bc3d5698SJohn Baldwin	vmovdqu	96(%edi),%xmm3
1617bc3d5698SJohn Baldwin	vmovdqu	112(%edi),%xmm4
1618bc3d5698SJohn Baldwin	leal	48(%edi),%edi
1619bc3d5698SJohn Baldwin	vpermq	$64,%ymm0,%ymm0
1620bc3d5698SJohn Baldwin	vpermq	$64,%ymm1,%ymm1
1621bc3d5698SJohn Baldwin	vpermq	$64,%ymm2,%ymm2
1622bc3d5698SJohn Baldwin	vpermq	$64,%ymm3,%ymm3
1623bc3d5698SJohn Baldwin	vpermq	$64,%ymm4,%ymm4
1624bc3d5698SJohn Baldwin	vpshufd	$200,%ymm0,%ymm0
1625bc3d5698SJohn Baldwin	vpshufd	$200,%ymm1,%ymm1
1626bc3d5698SJohn Baldwin	vpshufd	$200,%ymm2,%ymm2
1627bc3d5698SJohn Baldwin	vpshufd	$200,%ymm3,%ymm3
1628bc3d5698SJohn Baldwin	vpshufd	$200,%ymm4,%ymm4
1629bc3d5698SJohn Baldwin	vmovdqa	%ymm0,-128(%edx)
1630bc3d5698SJohn Baldwin	vmovdqu	80(%edi),%xmm0
1631bc3d5698SJohn Baldwin	vmovdqa	%ymm1,-96(%edx)
1632bc3d5698SJohn Baldwin	vmovdqu	96(%edi),%xmm1
1633bc3d5698SJohn Baldwin	vmovdqa	%ymm2,-64(%edx)
1634bc3d5698SJohn Baldwin	vmovdqu	112(%edi),%xmm2
1635bc3d5698SJohn Baldwin	vmovdqa	%ymm3,-32(%edx)
1636bc3d5698SJohn Baldwin	vmovdqu	128(%edi),%xmm3
1637bc3d5698SJohn Baldwin	vmovdqa	%ymm4,(%edx)
1638bc3d5698SJohn Baldwin	vpermq	$64,%ymm0,%ymm0
1639bc3d5698SJohn Baldwin	vpermq	$64,%ymm1,%ymm1
1640bc3d5698SJohn Baldwin	vpermq	$64,%ymm2,%ymm2
1641bc3d5698SJohn Baldwin	vpermq	$64,%ymm3,%ymm3
1642bc3d5698SJohn Baldwin	vpshufd	$200,%ymm0,%ymm0
1643bc3d5698SJohn Baldwin	vpshufd	$200,%ymm1,%ymm1
1644bc3d5698SJohn Baldwin	vpshufd	$200,%ymm2,%ymm2
1645bc3d5698SJohn Baldwin	vpshufd	$200,%ymm3,%ymm3
1646bc3d5698SJohn Baldwin	vmovdqa	%ymm0,32(%edx)
1647bc3d5698SJohn Baldwin	vmovd	-48(%edi),%xmm0
1648bc3d5698SJohn Baldwin	vmovdqa	%ymm1,64(%edx)
1649bc3d5698SJohn Baldwin	vmovd	-44(%edi),%xmm1
1650bc3d5698SJohn Baldwin	vmovdqa	%ymm2,96(%edx)
1651bc3d5698SJohn Baldwin	vmovd	-40(%edi),%xmm2
1652bc3d5698SJohn Baldwin	vmovdqa	%ymm3,128(%edx)
1653bc3d5698SJohn Baldwin	vmovd	-36(%edi),%xmm3
1654bc3d5698SJohn Baldwin	vmovd	-32(%edi),%xmm4
1655bc3d5698SJohn Baldwin	vmovdqa	64(%ebx),%ymm7
1656bc3d5698SJohn Baldwin	negl	%eax
1657bc3d5698SJohn Baldwin	testl	$63,%ecx
1658bc3d5698SJohn Baldwin	jz	.L024even
1659bc3d5698SJohn Baldwin	movl	%ecx,%edx
1660bc3d5698SJohn Baldwin	andl	$-64,%ecx
1661bc3d5698SJohn Baldwin	andl	$63,%edx
1662bc3d5698SJohn Baldwin	vmovdqu	(%esi),%xmm5
1663bc3d5698SJohn Baldwin	cmpl	$32,%edx
1664bc3d5698SJohn Baldwin	jb	.L025one
1665bc3d5698SJohn Baldwin	vmovdqu	16(%esi),%xmm6
1666bc3d5698SJohn Baldwin	je	.L026two
1667bc3d5698SJohn Baldwin	vinserti128	$1,32(%esi),%ymm5,%ymm5
1668bc3d5698SJohn Baldwin	leal	48(%esi),%esi
1669bc3d5698SJohn Baldwin	leal	8(%ebx),%ebx
1670bc3d5698SJohn Baldwin	leal	296(%esp),%edx
1671bc3d5698SJohn Baldwin	jmp	.L027tail
1672bc3d5698SJohn Baldwin.L026two:
1673bc3d5698SJohn Baldwin	leal	32(%esi),%esi
1674bc3d5698SJohn Baldwin	leal	16(%ebx),%ebx
1675bc3d5698SJohn Baldwin	leal	304(%esp),%edx
1676bc3d5698SJohn Baldwin	jmp	.L027tail
1677bc3d5698SJohn Baldwin.L025one:
1678bc3d5698SJohn Baldwin	leal	16(%esi),%esi
1679bc3d5698SJohn Baldwin	vpxor	%ymm6,%ymm6,%ymm6
1680bc3d5698SJohn Baldwin	leal	32(%ebx,%eax,8),%ebx
1681bc3d5698SJohn Baldwin	leal	312(%esp),%edx
1682bc3d5698SJohn Baldwin	jmp	.L027tail
1683bc3d5698SJohn Baldwin.align	32
1684bc3d5698SJohn Baldwin.L024even:
1685bc3d5698SJohn Baldwin	vmovdqu	(%esi),%xmm5
1686bc3d5698SJohn Baldwin	vmovdqu	16(%esi),%xmm6
1687bc3d5698SJohn Baldwin	vinserti128	$1,32(%esi),%ymm5,%ymm5
1688bc3d5698SJohn Baldwin	vinserti128	$1,48(%esi),%ymm6,%ymm6
1689bc3d5698SJohn Baldwin	leal	64(%esi),%esi
1690bc3d5698SJohn Baldwin	subl	$64,%ecx
1691bc3d5698SJohn Baldwin	jz	.L027tail
1692bc3d5698SJohn Baldwin.L028loop:
1693bc3d5698SJohn Baldwin	vmovdqa	%ymm2,64(%esp)
1694bc3d5698SJohn Baldwin	vpsrldq	$6,%ymm5,%ymm2
1695bc3d5698SJohn Baldwin	vmovdqa	%ymm0,(%esp)
1696bc3d5698SJohn Baldwin	vpsrldq	$6,%ymm6,%ymm0
1697bc3d5698SJohn Baldwin	vmovdqa	%ymm1,32(%esp)
1698bc3d5698SJohn Baldwin	vpunpckhqdq	%ymm6,%ymm5,%ymm1
1699bc3d5698SJohn Baldwin	vpunpcklqdq	%ymm6,%ymm5,%ymm5
1700bc3d5698SJohn Baldwin	vpunpcklqdq	%ymm0,%ymm2,%ymm2
1701bc3d5698SJohn Baldwin	vpsrlq	$30,%ymm2,%ymm0
1702bc3d5698SJohn Baldwin	vpsrlq	$4,%ymm2,%ymm2
1703bc3d5698SJohn Baldwin	vpsrlq	$26,%ymm5,%ymm6
1704bc3d5698SJohn Baldwin	vpsrlq	$40,%ymm1,%ymm1
1705bc3d5698SJohn Baldwin	vpand	%ymm7,%ymm2,%ymm2
1706bc3d5698SJohn Baldwin	vpand	%ymm7,%ymm5,%ymm5
1707bc3d5698SJohn Baldwin	vpand	%ymm7,%ymm6,%ymm6
1708bc3d5698SJohn Baldwin	vpand	%ymm7,%ymm0,%ymm0
1709bc3d5698SJohn Baldwin	vpor	(%ebx),%ymm1,%ymm1
1710bc3d5698SJohn Baldwin	vpaddq	64(%esp),%ymm2,%ymm2
1711bc3d5698SJohn Baldwin	vpaddq	(%esp),%ymm5,%ymm5
1712bc3d5698SJohn Baldwin	vpaddq	32(%esp),%ymm6,%ymm6
1713bc3d5698SJohn Baldwin	vpaddq	%ymm3,%ymm0,%ymm0
1714bc3d5698SJohn Baldwin	vpaddq	%ymm4,%ymm1,%ymm1
1715bc3d5698SJohn Baldwin	vpmuludq	-96(%edx),%ymm2,%ymm3
1716bc3d5698SJohn Baldwin	vmovdqa	%ymm6,32(%esp)
1717bc3d5698SJohn Baldwin	vpmuludq	-64(%edx),%ymm2,%ymm4
1718bc3d5698SJohn Baldwin	vmovdqa	%ymm0,96(%esp)
1719bc3d5698SJohn Baldwin	vpmuludq	96(%edx),%ymm2,%ymm0
1720bc3d5698SJohn Baldwin	vmovdqa	%ymm1,128(%esp)
1721bc3d5698SJohn Baldwin	vpmuludq	128(%edx),%ymm2,%ymm1
1722bc3d5698SJohn Baldwin	vpmuludq	-128(%edx),%ymm2,%ymm2
1723bc3d5698SJohn Baldwin	vpmuludq	-32(%edx),%ymm5,%ymm7
1724bc3d5698SJohn Baldwin	vpaddq	%ymm7,%ymm3,%ymm3
1725bc3d5698SJohn Baldwin	vpmuludq	(%edx),%ymm5,%ymm6
1726bc3d5698SJohn Baldwin	vpaddq	%ymm6,%ymm4,%ymm4
1727bc3d5698SJohn Baldwin	vpmuludq	-128(%edx),%ymm5,%ymm7
1728bc3d5698SJohn Baldwin	vpaddq	%ymm7,%ymm0,%ymm0
1729bc3d5698SJohn Baldwin	vmovdqa	32(%esp),%ymm7
1730bc3d5698SJohn Baldwin	vpmuludq	-96(%edx),%ymm5,%ymm6
1731bc3d5698SJohn Baldwin	vpaddq	%ymm6,%ymm1,%ymm1
1732bc3d5698SJohn Baldwin	vpmuludq	-64(%edx),%ymm5,%ymm5
1733bc3d5698SJohn Baldwin	vpaddq	%ymm5,%ymm2,%ymm2
1734bc3d5698SJohn Baldwin	vpmuludq	-64(%edx),%ymm7,%ymm6
1735bc3d5698SJohn Baldwin	vpaddq	%ymm6,%ymm3,%ymm3
1736bc3d5698SJohn Baldwin	vpmuludq	-32(%edx),%ymm7,%ymm5
1737bc3d5698SJohn Baldwin	vpaddq	%ymm5,%ymm4,%ymm4
1738bc3d5698SJohn Baldwin	vpmuludq	128(%edx),%ymm7,%ymm6
1739bc3d5698SJohn Baldwin	vpaddq	%ymm6,%ymm0,%ymm0
1740bc3d5698SJohn Baldwin	vmovdqa	96(%esp),%ymm6
1741bc3d5698SJohn Baldwin	vpmuludq	-128(%edx),%ymm7,%ymm5
1742bc3d5698SJohn Baldwin	vpaddq	%ymm5,%ymm1,%ymm1
1743bc3d5698SJohn Baldwin	vpmuludq	-96(%edx),%ymm7,%ymm7
1744bc3d5698SJohn Baldwin	vpaddq	%ymm7,%ymm2,%ymm2
1745bc3d5698SJohn Baldwin	vpmuludq	-128(%edx),%ymm6,%ymm5
1746bc3d5698SJohn Baldwin	vpaddq	%ymm5,%ymm3,%ymm3
1747bc3d5698SJohn Baldwin	vpmuludq	-96(%edx),%ymm6,%ymm7
1748bc3d5698SJohn Baldwin	vpaddq	%ymm7,%ymm4,%ymm4
1749bc3d5698SJohn Baldwin	vpmuludq	64(%edx),%ymm6,%ymm5
1750bc3d5698SJohn Baldwin	vpaddq	%ymm5,%ymm0,%ymm0
1751bc3d5698SJohn Baldwin	vmovdqa	128(%esp),%ymm5
1752bc3d5698SJohn Baldwin	vpmuludq	96(%edx),%ymm6,%ymm7
1753bc3d5698SJohn Baldwin	vpaddq	%ymm7,%ymm1,%ymm1
1754bc3d5698SJohn Baldwin	vpmuludq	128(%edx),%ymm6,%ymm6
1755bc3d5698SJohn Baldwin	vpaddq	%ymm6,%ymm2,%ymm2
1756bc3d5698SJohn Baldwin	vpmuludq	128(%edx),%ymm5,%ymm7
1757bc3d5698SJohn Baldwin	vpaddq	%ymm7,%ymm3,%ymm3
1758bc3d5698SJohn Baldwin	vpmuludq	32(%edx),%ymm5,%ymm6
1759bc3d5698SJohn Baldwin	vpaddq	%ymm6,%ymm0,%ymm0
1760bc3d5698SJohn Baldwin	vpmuludq	-128(%edx),%ymm5,%ymm7
1761bc3d5698SJohn Baldwin	vpaddq	%ymm7,%ymm4,%ymm4
1762bc3d5698SJohn Baldwin	vmovdqa	64(%ebx),%ymm7
1763bc3d5698SJohn Baldwin	vpmuludq	64(%edx),%ymm5,%ymm6
1764bc3d5698SJohn Baldwin	vpaddq	%ymm6,%ymm1,%ymm1
1765bc3d5698SJohn Baldwin	vpmuludq	96(%edx),%ymm5,%ymm5
1766bc3d5698SJohn Baldwin	vpaddq	%ymm5,%ymm2,%ymm2
1767bc3d5698SJohn Baldwin	vpsrlq	$26,%ymm3,%ymm5
1768bc3d5698SJohn Baldwin	vpand	%ymm7,%ymm3,%ymm3
1769bc3d5698SJohn Baldwin	vpsrlq	$26,%ymm0,%ymm6
1770bc3d5698SJohn Baldwin	vpand	%ymm7,%ymm0,%ymm0
1771bc3d5698SJohn Baldwin	vpaddq	%ymm5,%ymm4,%ymm4
1772bc3d5698SJohn Baldwin	vpaddq	%ymm6,%ymm1,%ymm1
1773bc3d5698SJohn Baldwin	vpsrlq	$26,%ymm4,%ymm5
1774bc3d5698SJohn Baldwin	vpand	%ymm7,%ymm4,%ymm4
1775bc3d5698SJohn Baldwin	vpsrlq	$26,%ymm1,%ymm6
1776bc3d5698SJohn Baldwin	vpand	%ymm7,%ymm1,%ymm1
1777bc3d5698SJohn Baldwin	vpaddq	%ymm6,%ymm2,%ymm2
1778bc3d5698SJohn Baldwin	vpaddq	%ymm5,%ymm0,%ymm0
1779bc3d5698SJohn Baldwin	vpsllq	$2,%ymm5,%ymm5
1780bc3d5698SJohn Baldwin	vpsrlq	$26,%ymm2,%ymm6
1781bc3d5698SJohn Baldwin	vpand	%ymm7,%ymm2,%ymm2
1782bc3d5698SJohn Baldwin	vpaddq	%ymm5,%ymm0,%ymm0
1783bc3d5698SJohn Baldwin	vpaddq	%ymm6,%ymm3,%ymm3
1784bc3d5698SJohn Baldwin	vpsrlq	$26,%ymm3,%ymm6
1785bc3d5698SJohn Baldwin	vpsrlq	$26,%ymm0,%ymm5
1786bc3d5698SJohn Baldwin	vpand	%ymm7,%ymm0,%ymm0
1787bc3d5698SJohn Baldwin	vpand	%ymm7,%ymm3,%ymm3
1788bc3d5698SJohn Baldwin	vpaddq	%ymm5,%ymm1,%ymm1
1789bc3d5698SJohn Baldwin	vpaddq	%ymm6,%ymm4,%ymm4
1790bc3d5698SJohn Baldwin	vmovdqu	(%esi),%xmm5
1791bc3d5698SJohn Baldwin	vmovdqu	16(%esi),%xmm6
1792bc3d5698SJohn Baldwin	vinserti128	$1,32(%esi),%ymm5,%ymm5
1793bc3d5698SJohn Baldwin	vinserti128	$1,48(%esi),%ymm6,%ymm6
1794bc3d5698SJohn Baldwin	leal	64(%esi),%esi
1795bc3d5698SJohn Baldwin	subl	$64,%ecx
1796bc3d5698SJohn Baldwin	jnz	.L028loop
1797bc3d5698SJohn Baldwin.L027tail:
1798bc3d5698SJohn Baldwin	vmovdqa	%ymm2,64(%esp)
1799bc3d5698SJohn Baldwin	vpsrldq	$6,%ymm5,%ymm2
1800bc3d5698SJohn Baldwin	vmovdqa	%ymm0,(%esp)
1801bc3d5698SJohn Baldwin	vpsrldq	$6,%ymm6,%ymm0
1802bc3d5698SJohn Baldwin	vmovdqa	%ymm1,32(%esp)
1803bc3d5698SJohn Baldwin	vpunpckhqdq	%ymm6,%ymm5,%ymm1
1804bc3d5698SJohn Baldwin	vpunpcklqdq	%ymm6,%ymm5,%ymm5
1805bc3d5698SJohn Baldwin	vpunpcklqdq	%ymm0,%ymm2,%ymm2
1806bc3d5698SJohn Baldwin	vpsrlq	$30,%ymm2,%ymm0
1807bc3d5698SJohn Baldwin	vpsrlq	$4,%ymm2,%ymm2
1808bc3d5698SJohn Baldwin	vpsrlq	$26,%ymm5,%ymm6
1809bc3d5698SJohn Baldwin	vpsrlq	$40,%ymm1,%ymm1
1810bc3d5698SJohn Baldwin	vpand	%ymm7,%ymm2,%ymm2
1811bc3d5698SJohn Baldwin	vpand	%ymm7,%ymm5,%ymm5
1812bc3d5698SJohn Baldwin	vpand	%ymm7,%ymm6,%ymm6
1813bc3d5698SJohn Baldwin	vpand	%ymm7,%ymm0,%ymm0
1814bc3d5698SJohn Baldwin	vpor	(%ebx),%ymm1,%ymm1
1815bc3d5698SJohn Baldwin	andl	$-64,%ebx
1816bc3d5698SJohn Baldwin	vpaddq	64(%esp),%ymm2,%ymm2
1817bc3d5698SJohn Baldwin	vpaddq	(%esp),%ymm5,%ymm5
1818bc3d5698SJohn Baldwin	vpaddq	32(%esp),%ymm6,%ymm6
1819bc3d5698SJohn Baldwin	vpaddq	%ymm3,%ymm0,%ymm0
1820bc3d5698SJohn Baldwin	vpaddq	%ymm4,%ymm1,%ymm1
1821bc3d5698SJohn Baldwin	vpmuludq	-92(%edx),%ymm2,%ymm3
1822bc3d5698SJohn Baldwin	vmovdqa	%ymm6,32(%esp)
1823bc3d5698SJohn Baldwin	vpmuludq	-60(%edx),%ymm2,%ymm4
1824bc3d5698SJohn Baldwin	vmovdqa	%ymm0,96(%esp)
1825bc3d5698SJohn Baldwin	vpmuludq	100(%edx),%ymm2,%ymm0
1826bc3d5698SJohn Baldwin	vmovdqa	%ymm1,128(%esp)
1827bc3d5698SJohn Baldwin	vpmuludq	132(%edx),%ymm2,%ymm1
1828bc3d5698SJohn Baldwin	vpmuludq	-124(%edx),%ymm2,%ymm2
1829bc3d5698SJohn Baldwin	vpmuludq	-28(%edx),%ymm5,%ymm7
1830bc3d5698SJohn Baldwin	vpaddq	%ymm7,%ymm3,%ymm3
1831bc3d5698SJohn Baldwin	vpmuludq	4(%edx),%ymm5,%ymm6
1832bc3d5698SJohn Baldwin	vpaddq	%ymm6,%ymm4,%ymm4
1833bc3d5698SJohn Baldwin	vpmuludq	-124(%edx),%ymm5,%ymm7
1834bc3d5698SJohn Baldwin	vpaddq	%ymm7,%ymm0,%ymm0
1835bc3d5698SJohn Baldwin	vmovdqa	32(%esp),%ymm7
1836bc3d5698SJohn Baldwin	vpmuludq	-92(%edx),%ymm5,%ymm6
1837bc3d5698SJohn Baldwin	vpaddq	%ymm6,%ymm1,%ymm1
1838bc3d5698SJohn Baldwin	vpmuludq	-60(%edx),%ymm5,%ymm5
1839bc3d5698SJohn Baldwin	vpaddq	%ymm5,%ymm2,%ymm2
1840bc3d5698SJohn Baldwin	vpmuludq	-60(%edx),%ymm7,%ymm6
1841bc3d5698SJohn Baldwin	vpaddq	%ymm6,%ymm3,%ymm3
1842bc3d5698SJohn Baldwin	vpmuludq	-28(%edx),%ymm7,%ymm5
1843bc3d5698SJohn Baldwin	vpaddq	%ymm5,%ymm4,%ymm4
1844bc3d5698SJohn Baldwin	vpmuludq	132(%edx),%ymm7,%ymm6
1845bc3d5698SJohn Baldwin	vpaddq	%ymm6,%ymm0,%ymm0
1846bc3d5698SJohn Baldwin	vmovdqa	96(%esp),%ymm6
1847bc3d5698SJohn Baldwin	vpmuludq	-124(%edx),%ymm7,%ymm5
1848bc3d5698SJohn Baldwin	vpaddq	%ymm5,%ymm1,%ymm1
1849bc3d5698SJohn Baldwin	vpmuludq	-92(%edx),%ymm7,%ymm7
1850bc3d5698SJohn Baldwin	vpaddq	%ymm7,%ymm2,%ymm2
1851bc3d5698SJohn Baldwin	vpmuludq	-124(%edx),%ymm6,%ymm5
1852bc3d5698SJohn Baldwin	vpaddq	%ymm5,%ymm3,%ymm3
1853bc3d5698SJohn Baldwin	vpmuludq	-92(%edx),%ymm6,%ymm7
1854bc3d5698SJohn Baldwin	vpaddq	%ymm7,%ymm4,%ymm4
1855bc3d5698SJohn Baldwin	vpmuludq	68(%edx),%ymm6,%ymm5
1856bc3d5698SJohn Baldwin	vpaddq	%ymm5,%ymm0,%ymm0
1857bc3d5698SJohn Baldwin	vmovdqa	128(%esp),%ymm5
1858bc3d5698SJohn Baldwin	vpmuludq	100(%edx),%ymm6,%ymm7
1859bc3d5698SJohn Baldwin	vpaddq	%ymm7,%ymm1,%ymm1
1860bc3d5698SJohn Baldwin	vpmuludq	132(%edx),%ymm6,%ymm6
1861bc3d5698SJohn Baldwin	vpaddq	%ymm6,%ymm2,%ymm2
1862bc3d5698SJohn Baldwin	vpmuludq	132(%edx),%ymm5,%ymm7
1863bc3d5698SJohn Baldwin	vpaddq	%ymm7,%ymm3,%ymm3
1864bc3d5698SJohn Baldwin	vpmuludq	36(%edx),%ymm5,%ymm6
1865bc3d5698SJohn Baldwin	vpaddq	%ymm6,%ymm0,%ymm0
1866bc3d5698SJohn Baldwin	vpmuludq	-124(%edx),%ymm5,%ymm7
1867bc3d5698SJohn Baldwin	vpaddq	%ymm7,%ymm4,%ymm4
1868bc3d5698SJohn Baldwin	vmovdqa	64(%ebx),%ymm7
1869bc3d5698SJohn Baldwin	vpmuludq	68(%edx),%ymm5,%ymm6
1870bc3d5698SJohn Baldwin	vpaddq	%ymm6,%ymm1,%ymm1
1871bc3d5698SJohn Baldwin	vpmuludq	100(%edx),%ymm5,%ymm5
1872bc3d5698SJohn Baldwin	vpaddq	%ymm5,%ymm2,%ymm2
1873bc3d5698SJohn Baldwin	vpsrldq	$8,%ymm4,%ymm5
1874bc3d5698SJohn Baldwin	vpsrldq	$8,%ymm3,%ymm6
1875bc3d5698SJohn Baldwin	vpaddq	%ymm5,%ymm4,%ymm4
1876bc3d5698SJohn Baldwin	vpsrldq	$8,%ymm0,%ymm5
1877bc3d5698SJohn Baldwin	vpaddq	%ymm6,%ymm3,%ymm3
1878bc3d5698SJohn Baldwin	vpsrldq	$8,%ymm1,%ymm6
1879bc3d5698SJohn Baldwin	vpaddq	%ymm5,%ymm0,%ymm0
1880bc3d5698SJohn Baldwin	vpsrldq	$8,%ymm2,%ymm5
1881bc3d5698SJohn Baldwin	vpaddq	%ymm6,%ymm1,%ymm1
1882bc3d5698SJohn Baldwin	vpermq	$2,%ymm4,%ymm6
1883bc3d5698SJohn Baldwin	vpaddq	%ymm5,%ymm2,%ymm2
1884bc3d5698SJohn Baldwin	vpermq	$2,%ymm3,%ymm5
1885bc3d5698SJohn Baldwin	vpaddq	%ymm6,%ymm4,%ymm4
1886bc3d5698SJohn Baldwin	vpermq	$2,%ymm0,%ymm6
1887bc3d5698SJohn Baldwin	vpaddq	%ymm5,%ymm3,%ymm3
1888bc3d5698SJohn Baldwin	vpermq	$2,%ymm1,%ymm5
1889bc3d5698SJohn Baldwin	vpaddq	%ymm6,%ymm0,%ymm0
1890bc3d5698SJohn Baldwin	vpermq	$2,%ymm2,%ymm6
1891bc3d5698SJohn Baldwin	vpaddq	%ymm5,%ymm1,%ymm1
1892bc3d5698SJohn Baldwin	vpaddq	%ymm6,%ymm2,%ymm2
1893bc3d5698SJohn Baldwin	vpsrlq	$26,%ymm3,%ymm5
1894bc3d5698SJohn Baldwin	vpand	%ymm7,%ymm3,%ymm3
1895bc3d5698SJohn Baldwin	vpsrlq	$26,%ymm0,%ymm6
1896bc3d5698SJohn Baldwin	vpand	%ymm7,%ymm0,%ymm0
1897bc3d5698SJohn Baldwin	vpaddq	%ymm5,%ymm4,%ymm4
1898bc3d5698SJohn Baldwin	vpaddq	%ymm6,%ymm1,%ymm1
1899bc3d5698SJohn Baldwin	vpsrlq	$26,%ymm4,%ymm5
1900bc3d5698SJohn Baldwin	vpand	%ymm7,%ymm4,%ymm4
1901bc3d5698SJohn Baldwin	vpsrlq	$26,%ymm1,%ymm6
1902bc3d5698SJohn Baldwin	vpand	%ymm7,%ymm1,%ymm1
1903bc3d5698SJohn Baldwin	vpaddq	%ymm6,%ymm2,%ymm2
1904bc3d5698SJohn Baldwin	vpaddq	%ymm5,%ymm0,%ymm0
1905bc3d5698SJohn Baldwin	vpsllq	$2,%ymm5,%ymm5
1906bc3d5698SJohn Baldwin	vpsrlq	$26,%ymm2,%ymm6
1907bc3d5698SJohn Baldwin	vpand	%ymm7,%ymm2,%ymm2
1908bc3d5698SJohn Baldwin	vpaddq	%ymm5,%ymm0,%ymm0
1909bc3d5698SJohn Baldwin	vpaddq	%ymm6,%ymm3,%ymm3
1910bc3d5698SJohn Baldwin	vpsrlq	$26,%ymm3,%ymm6
1911bc3d5698SJohn Baldwin	vpsrlq	$26,%ymm0,%ymm5
1912bc3d5698SJohn Baldwin	vpand	%ymm7,%ymm0,%ymm0
1913bc3d5698SJohn Baldwin	vpand	%ymm7,%ymm3,%ymm3
1914bc3d5698SJohn Baldwin	vpaddq	%ymm5,%ymm1,%ymm1
1915bc3d5698SJohn Baldwin	vpaddq	%ymm6,%ymm4,%ymm4
1916bc3d5698SJohn Baldwin	cmpl	$0,%ecx
1917bc3d5698SJohn Baldwin	je	.L029done
1918bc3d5698SJohn Baldwin	vpshufd	$252,%xmm0,%xmm0
1919bc3d5698SJohn Baldwin	leal	288(%esp),%edx
1920bc3d5698SJohn Baldwin	vpshufd	$252,%xmm1,%xmm1
1921bc3d5698SJohn Baldwin	vpshufd	$252,%xmm2,%xmm2
1922bc3d5698SJohn Baldwin	vpshufd	$252,%xmm3,%xmm3
1923bc3d5698SJohn Baldwin	vpshufd	$252,%xmm4,%xmm4
1924bc3d5698SJohn Baldwin	jmp	.L024even
1925bc3d5698SJohn Baldwin.align	16
1926bc3d5698SJohn Baldwin.L029done:
1927bc3d5698SJohn Baldwin	vmovd	%xmm0,-48(%edi)
1928bc3d5698SJohn Baldwin	vmovd	%xmm1,-44(%edi)
1929bc3d5698SJohn Baldwin	vmovd	%xmm2,-40(%edi)
1930bc3d5698SJohn Baldwin	vmovd	%xmm3,-36(%edi)
1931bc3d5698SJohn Baldwin	vmovd	%xmm4,-32(%edi)
1932bc3d5698SJohn Baldwin	vzeroupper
1933bc3d5698SJohn Baldwin	movl	%ebp,%esp
1934bc3d5698SJohn Baldwin.L020nodata:
1935bc3d5698SJohn Baldwin	popl	%edi
1936bc3d5698SJohn Baldwin	popl	%esi
1937bc3d5698SJohn Baldwin	popl	%ebx
1938bc3d5698SJohn Baldwin	popl	%ebp
1939bc3d5698SJohn Baldwin	ret
1940bc3d5698SJohn Baldwin.size	_poly1305_blocks_avx2,.-_poly1305_blocks_avx2
1941bc3d5698SJohn Baldwin.align	64
1942bc3d5698SJohn Baldwin.Lconst_sse2:
1943bc3d5698SJohn Baldwin.long	16777216,0,16777216,0,16777216,0,16777216,0
1944bc3d5698SJohn Baldwin.long	0,0,0,0,0,0,0,0
1945bc3d5698SJohn Baldwin.long	67108863,0,67108863,0,67108863,0,67108863,0
1946bc3d5698SJohn Baldwin.long	268435455,268435452,268435452,268435452
1947bc3d5698SJohn Baldwin.byte	80,111,108,121,49,51,48,53,32,102,111,114,32,120,56,54
1948bc3d5698SJohn Baldwin.byte	44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
1949bc3d5698SJohn Baldwin.byte	60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
1950bc3d5698SJohn Baldwin.byte	114,103,62,0
1951bc3d5698SJohn Baldwin.align	4
1952bc3d5698SJohn Baldwin.comm	OPENSSL_ia32cap_P,16,4
1953*c0855eaaSJohn Baldwin
1954*c0855eaaSJohn Baldwin	.section ".note.gnu.property", "a"
1955*c0855eaaSJohn Baldwin	.p2align 2
1956*c0855eaaSJohn Baldwin	.long 1f - 0f
1957*c0855eaaSJohn Baldwin	.long 4f - 1f
1958*c0855eaaSJohn Baldwin	.long 5
1959*c0855eaaSJohn Baldwin0:
1960*c0855eaaSJohn Baldwin	.asciz "GNU"
1961*c0855eaaSJohn Baldwin1:
1962*c0855eaaSJohn Baldwin	.p2align 2
1963*c0855eaaSJohn Baldwin	.long 0xc0000002
1964*c0855eaaSJohn Baldwin	.long 3f - 2f
1965*c0855eaaSJohn Baldwin2:
1966*c0855eaaSJohn Baldwin	.long 3
1967*c0855eaaSJohn Baldwin3:
1968*c0855eaaSJohn Baldwin	.p2align 2
1969*c0855eaaSJohn Baldwin4:
1970bc3d5698SJohn Baldwin#else
1971bc3d5698SJohn Baldwin.text
1972bc3d5698SJohn Baldwin.align	64
1973bc3d5698SJohn Baldwin.globl	poly1305_init
1974bc3d5698SJohn Baldwin.type	poly1305_init,@function
1975bc3d5698SJohn Baldwin.align	16
1976bc3d5698SJohn Baldwinpoly1305_init:
1977bc3d5698SJohn Baldwin.L_poly1305_init_begin:
1978*c0855eaaSJohn Baldwin	#ifdef __CET__
1979*c0855eaaSJohn Baldwin
1980*c0855eaaSJohn Baldwin.byte	243,15,30,251
1981*c0855eaaSJohn Baldwin	#endif
1982*c0855eaaSJohn Baldwin
1983bc3d5698SJohn Baldwin	pushl	%ebp
1984bc3d5698SJohn Baldwin	pushl	%ebx
1985bc3d5698SJohn Baldwin	pushl	%esi
1986bc3d5698SJohn Baldwin	pushl	%edi
1987bc3d5698SJohn Baldwin	movl	20(%esp),%edi
1988bc3d5698SJohn Baldwin	movl	24(%esp),%esi
1989bc3d5698SJohn Baldwin	movl	28(%esp),%ebp
1990bc3d5698SJohn Baldwin	xorl	%eax,%eax
1991bc3d5698SJohn Baldwin	movl	%eax,(%edi)
1992bc3d5698SJohn Baldwin	movl	%eax,4(%edi)
1993bc3d5698SJohn Baldwin	movl	%eax,8(%edi)
1994bc3d5698SJohn Baldwin	movl	%eax,12(%edi)
1995bc3d5698SJohn Baldwin	movl	%eax,16(%edi)
1996bc3d5698SJohn Baldwin	movl	%eax,20(%edi)
1997bc3d5698SJohn Baldwin	cmpl	$0,%esi
1998bc3d5698SJohn Baldwin	je	.L000nokey
1999bc3d5698SJohn Baldwin	call	.L001pic_point
2000bc3d5698SJohn Baldwin.L001pic_point:
2001bc3d5698SJohn Baldwin	popl	%ebx
2002bc3d5698SJohn Baldwin	leal	poly1305_blocks-.L001pic_point(%ebx),%eax
2003bc3d5698SJohn Baldwin	leal	poly1305_emit-.L001pic_point(%ebx),%edx
2004bc3d5698SJohn Baldwin	leal	OPENSSL_ia32cap_P,%edi
2005bc3d5698SJohn Baldwin	movl	(%edi),%ecx
2006bc3d5698SJohn Baldwin	andl	$83886080,%ecx
2007bc3d5698SJohn Baldwin	cmpl	$83886080,%ecx
2008bc3d5698SJohn Baldwin	jne	.L002no_sse2
2009bc3d5698SJohn Baldwin	leal	_poly1305_blocks_sse2-.L001pic_point(%ebx),%eax
2010bc3d5698SJohn Baldwin	leal	_poly1305_emit_sse2-.L001pic_point(%ebx),%edx
2011bc3d5698SJohn Baldwin	movl	8(%edi),%ecx
2012bc3d5698SJohn Baldwin	testl	$32,%ecx
2013bc3d5698SJohn Baldwin	jz	.L002no_sse2
2014bc3d5698SJohn Baldwin	leal	_poly1305_blocks_avx2-.L001pic_point(%ebx),%eax
2015bc3d5698SJohn Baldwin.L002no_sse2:
2016bc3d5698SJohn Baldwin	movl	20(%esp),%edi
2017bc3d5698SJohn Baldwin	movl	%eax,(%ebp)
2018bc3d5698SJohn Baldwin	movl	%edx,4(%ebp)
2019bc3d5698SJohn Baldwin	movl	(%esi),%eax
2020bc3d5698SJohn Baldwin	movl	4(%esi),%ebx
2021bc3d5698SJohn Baldwin	movl	8(%esi),%ecx
2022bc3d5698SJohn Baldwin	movl	12(%esi),%edx
2023bc3d5698SJohn Baldwin	andl	$268435455,%eax
2024bc3d5698SJohn Baldwin	andl	$268435452,%ebx
2025bc3d5698SJohn Baldwin	andl	$268435452,%ecx
2026bc3d5698SJohn Baldwin	andl	$268435452,%edx
2027bc3d5698SJohn Baldwin	movl	%eax,24(%edi)
2028bc3d5698SJohn Baldwin	movl	%ebx,28(%edi)
2029bc3d5698SJohn Baldwin	movl	%ecx,32(%edi)
2030bc3d5698SJohn Baldwin	movl	%edx,36(%edi)
2031bc3d5698SJohn Baldwin	movl	$1,%eax
2032bc3d5698SJohn Baldwin.L000nokey:
2033bc3d5698SJohn Baldwin	popl	%edi
2034bc3d5698SJohn Baldwin	popl	%esi
2035bc3d5698SJohn Baldwin	popl	%ebx
2036bc3d5698SJohn Baldwin	popl	%ebp
2037bc3d5698SJohn Baldwin	ret
2038bc3d5698SJohn Baldwin.size	poly1305_init,.-.L_poly1305_init_begin
2039bc3d5698SJohn Baldwin.globl	poly1305_blocks
2040bc3d5698SJohn Baldwin.type	poly1305_blocks,@function
2041bc3d5698SJohn Baldwin.align	16
2042bc3d5698SJohn Baldwinpoly1305_blocks:
2043bc3d5698SJohn Baldwin.L_poly1305_blocks_begin:
2044*c0855eaaSJohn Baldwin	#ifdef __CET__
2045*c0855eaaSJohn Baldwin
2046*c0855eaaSJohn Baldwin.byte	243,15,30,251
2047*c0855eaaSJohn Baldwin	#endif
2048*c0855eaaSJohn Baldwin
2049bc3d5698SJohn Baldwin	pushl	%ebp
2050bc3d5698SJohn Baldwin	pushl	%ebx
2051bc3d5698SJohn Baldwin	pushl	%esi
2052bc3d5698SJohn Baldwin	pushl	%edi
2053bc3d5698SJohn Baldwin	movl	20(%esp),%edi
2054bc3d5698SJohn Baldwin	movl	24(%esp),%esi
2055bc3d5698SJohn Baldwin	movl	28(%esp),%ecx
2056bc3d5698SJohn Baldwin.Lenter_blocks:
2057bc3d5698SJohn Baldwin	andl	$-15,%ecx
2058bc3d5698SJohn Baldwin	jz	.L003nodata
2059bc3d5698SJohn Baldwin	subl	$64,%esp
2060bc3d5698SJohn Baldwin	movl	24(%edi),%eax
2061bc3d5698SJohn Baldwin	movl	28(%edi),%ebx
2062bc3d5698SJohn Baldwin	leal	(%esi,%ecx,1),%ebp
2063bc3d5698SJohn Baldwin	movl	32(%edi),%ecx
2064bc3d5698SJohn Baldwin	movl	36(%edi),%edx
2065bc3d5698SJohn Baldwin	movl	%ebp,92(%esp)
2066bc3d5698SJohn Baldwin	movl	%esi,%ebp
2067bc3d5698SJohn Baldwin	movl	%eax,36(%esp)
2068bc3d5698SJohn Baldwin	movl	%ebx,%eax
2069bc3d5698SJohn Baldwin	shrl	$2,%eax
2070bc3d5698SJohn Baldwin	movl	%ebx,40(%esp)
2071bc3d5698SJohn Baldwin	addl	%ebx,%eax
2072bc3d5698SJohn Baldwin	movl	%ecx,%ebx
2073bc3d5698SJohn Baldwin	shrl	$2,%ebx
2074bc3d5698SJohn Baldwin	movl	%ecx,44(%esp)
2075bc3d5698SJohn Baldwin	addl	%ecx,%ebx
2076bc3d5698SJohn Baldwin	movl	%edx,%ecx
2077bc3d5698SJohn Baldwin	shrl	$2,%ecx
2078bc3d5698SJohn Baldwin	movl	%edx,48(%esp)
2079bc3d5698SJohn Baldwin	addl	%edx,%ecx
2080bc3d5698SJohn Baldwin	movl	%eax,52(%esp)
2081bc3d5698SJohn Baldwin	movl	%ebx,56(%esp)
2082bc3d5698SJohn Baldwin	movl	%ecx,60(%esp)
2083bc3d5698SJohn Baldwin	movl	(%edi),%eax
2084bc3d5698SJohn Baldwin	movl	4(%edi),%ebx
2085bc3d5698SJohn Baldwin	movl	8(%edi),%ecx
2086bc3d5698SJohn Baldwin	movl	12(%edi),%esi
2087bc3d5698SJohn Baldwin	movl	16(%edi),%edi
2088bc3d5698SJohn Baldwin	jmp	.L004loop
2089bc3d5698SJohn Baldwin.align	32
2090bc3d5698SJohn Baldwin.L004loop:
2091bc3d5698SJohn Baldwin	addl	(%ebp),%eax
2092bc3d5698SJohn Baldwin	adcl	4(%ebp),%ebx
2093bc3d5698SJohn Baldwin	adcl	8(%ebp),%ecx
2094bc3d5698SJohn Baldwin	adcl	12(%ebp),%esi
2095bc3d5698SJohn Baldwin	leal	16(%ebp),%ebp
2096bc3d5698SJohn Baldwin	adcl	96(%esp),%edi
2097bc3d5698SJohn Baldwin	movl	%eax,(%esp)
2098bc3d5698SJohn Baldwin	movl	%esi,12(%esp)
2099bc3d5698SJohn Baldwin	mull	36(%esp)
2100bc3d5698SJohn Baldwin	movl	%edi,16(%esp)
2101bc3d5698SJohn Baldwin	movl	%eax,%edi
2102bc3d5698SJohn Baldwin	movl	%ebx,%eax
2103bc3d5698SJohn Baldwin	movl	%edx,%esi
2104bc3d5698SJohn Baldwin	mull	60(%esp)
2105bc3d5698SJohn Baldwin	addl	%eax,%edi
2106bc3d5698SJohn Baldwin	movl	%ecx,%eax
2107bc3d5698SJohn Baldwin	adcl	%edx,%esi
2108bc3d5698SJohn Baldwin	mull	56(%esp)
2109bc3d5698SJohn Baldwin	addl	%eax,%edi
2110bc3d5698SJohn Baldwin	movl	12(%esp),%eax
2111bc3d5698SJohn Baldwin	adcl	%edx,%esi
2112bc3d5698SJohn Baldwin	mull	52(%esp)
2113bc3d5698SJohn Baldwin	addl	%eax,%edi
2114bc3d5698SJohn Baldwin	movl	(%esp),%eax
2115bc3d5698SJohn Baldwin	adcl	%edx,%esi
2116bc3d5698SJohn Baldwin	mull	40(%esp)
2117bc3d5698SJohn Baldwin	movl	%edi,20(%esp)
2118bc3d5698SJohn Baldwin	xorl	%edi,%edi
2119bc3d5698SJohn Baldwin	addl	%eax,%esi
2120bc3d5698SJohn Baldwin	movl	%ebx,%eax
2121bc3d5698SJohn Baldwin	adcl	%edx,%edi
2122bc3d5698SJohn Baldwin	mull	36(%esp)
2123bc3d5698SJohn Baldwin	addl	%eax,%esi
2124bc3d5698SJohn Baldwin	movl	%ecx,%eax
2125bc3d5698SJohn Baldwin	adcl	%edx,%edi
2126bc3d5698SJohn Baldwin	mull	60(%esp)
2127bc3d5698SJohn Baldwin	addl	%eax,%esi
2128bc3d5698SJohn Baldwin	movl	12(%esp),%eax
2129bc3d5698SJohn Baldwin	adcl	%edx,%edi
2130bc3d5698SJohn Baldwin	mull	56(%esp)
2131bc3d5698SJohn Baldwin	addl	%eax,%esi
2132bc3d5698SJohn Baldwin	movl	16(%esp),%eax
2133bc3d5698SJohn Baldwin	adcl	%edx,%edi
2134bc3d5698SJohn Baldwin	imull	52(%esp),%eax
2135bc3d5698SJohn Baldwin	addl	%eax,%esi
2136bc3d5698SJohn Baldwin	movl	(%esp),%eax
2137bc3d5698SJohn Baldwin	adcl	$0,%edi
2138bc3d5698SJohn Baldwin	mull	44(%esp)
2139bc3d5698SJohn Baldwin	movl	%esi,24(%esp)
2140bc3d5698SJohn Baldwin	xorl	%esi,%esi
2141bc3d5698SJohn Baldwin	addl	%eax,%edi
2142bc3d5698SJohn Baldwin	movl	%ebx,%eax
2143bc3d5698SJohn Baldwin	adcl	%edx,%esi
2144bc3d5698SJohn Baldwin	mull	40(%esp)
2145bc3d5698SJohn Baldwin	addl	%eax,%edi
2146bc3d5698SJohn Baldwin	movl	%ecx,%eax
2147bc3d5698SJohn Baldwin	adcl	%edx,%esi
2148bc3d5698SJohn Baldwin	mull	36(%esp)
2149bc3d5698SJohn Baldwin	addl	%eax,%edi
2150bc3d5698SJohn Baldwin	movl	12(%esp),%eax
2151bc3d5698SJohn Baldwin	adcl	%edx,%esi
2152bc3d5698SJohn Baldwin	mull	60(%esp)
2153bc3d5698SJohn Baldwin	addl	%eax,%edi
2154bc3d5698SJohn Baldwin	movl	16(%esp),%eax
2155bc3d5698SJohn Baldwin	adcl	%edx,%esi
2156bc3d5698SJohn Baldwin	imull	56(%esp),%eax
2157bc3d5698SJohn Baldwin	addl	%eax,%edi
2158bc3d5698SJohn Baldwin	movl	(%esp),%eax
2159bc3d5698SJohn Baldwin	adcl	$0,%esi
2160bc3d5698SJohn Baldwin	mull	48(%esp)
2161bc3d5698SJohn Baldwin	movl	%edi,28(%esp)
2162bc3d5698SJohn Baldwin	xorl	%edi,%edi
2163bc3d5698SJohn Baldwin	addl	%eax,%esi
2164bc3d5698SJohn Baldwin	movl	%ebx,%eax
2165bc3d5698SJohn Baldwin	adcl	%edx,%edi
2166bc3d5698SJohn Baldwin	mull	44(%esp)
2167bc3d5698SJohn Baldwin	addl	%eax,%esi
2168bc3d5698SJohn Baldwin	movl	%ecx,%eax
2169bc3d5698SJohn Baldwin	adcl	%edx,%edi
2170bc3d5698SJohn Baldwin	mull	40(%esp)
2171bc3d5698SJohn Baldwin	addl	%eax,%esi
2172bc3d5698SJohn Baldwin	movl	12(%esp),%eax
2173bc3d5698SJohn Baldwin	adcl	%edx,%edi
2174bc3d5698SJohn Baldwin	mull	36(%esp)
2175bc3d5698SJohn Baldwin	addl	%eax,%esi
2176bc3d5698SJohn Baldwin	movl	16(%esp),%ecx
2177bc3d5698SJohn Baldwin	adcl	%edx,%edi
2178bc3d5698SJohn Baldwin	movl	%ecx,%edx
2179bc3d5698SJohn Baldwin	imull	60(%esp),%ecx
2180bc3d5698SJohn Baldwin	addl	%ecx,%esi
2181bc3d5698SJohn Baldwin	movl	20(%esp),%eax
2182bc3d5698SJohn Baldwin	adcl	$0,%edi
2183bc3d5698SJohn Baldwin	imull	36(%esp),%edx
2184bc3d5698SJohn Baldwin	addl	%edi,%edx
2185bc3d5698SJohn Baldwin	movl	24(%esp),%ebx
2186bc3d5698SJohn Baldwin	movl	28(%esp),%ecx
2187bc3d5698SJohn Baldwin	movl	%edx,%edi
2188bc3d5698SJohn Baldwin	shrl	$2,%edx
2189bc3d5698SJohn Baldwin	andl	$3,%edi
2190bc3d5698SJohn Baldwin	leal	(%edx,%edx,4),%edx
2191bc3d5698SJohn Baldwin	addl	%edx,%eax
2192bc3d5698SJohn Baldwin	adcl	$0,%ebx
2193bc3d5698SJohn Baldwin	adcl	$0,%ecx
2194bc3d5698SJohn Baldwin	adcl	$0,%esi
2195bc3d5698SJohn Baldwin	adcl	$0,%edi
2196bc3d5698SJohn Baldwin	cmpl	92(%esp),%ebp
2197bc3d5698SJohn Baldwin	jne	.L004loop
2198bc3d5698SJohn Baldwin	movl	84(%esp),%edx
2199bc3d5698SJohn Baldwin	addl	$64,%esp
2200bc3d5698SJohn Baldwin	movl	%eax,(%edx)
2201bc3d5698SJohn Baldwin	movl	%ebx,4(%edx)
2202bc3d5698SJohn Baldwin	movl	%ecx,8(%edx)
2203bc3d5698SJohn Baldwin	movl	%esi,12(%edx)
2204bc3d5698SJohn Baldwin	movl	%edi,16(%edx)
2205bc3d5698SJohn Baldwin.L003nodata:
2206bc3d5698SJohn Baldwin	popl	%edi
2207bc3d5698SJohn Baldwin	popl	%esi
2208bc3d5698SJohn Baldwin	popl	%ebx
2209bc3d5698SJohn Baldwin	popl	%ebp
2210bc3d5698SJohn Baldwin	ret
2211bc3d5698SJohn Baldwin.size	poly1305_blocks,.-.L_poly1305_blocks_begin
2212bc3d5698SJohn Baldwin.globl	poly1305_emit
2213bc3d5698SJohn Baldwin.type	poly1305_emit,@function
2214bc3d5698SJohn Baldwin.align	16
2215bc3d5698SJohn Baldwinpoly1305_emit:
2216bc3d5698SJohn Baldwin.L_poly1305_emit_begin:
2217*c0855eaaSJohn Baldwin	#ifdef __CET__
2218*c0855eaaSJohn Baldwin
2219*c0855eaaSJohn Baldwin.byte	243,15,30,251
2220*c0855eaaSJohn Baldwin	#endif
2221*c0855eaaSJohn Baldwin
2222bc3d5698SJohn Baldwin	pushl	%ebp
2223bc3d5698SJohn Baldwin	pushl	%ebx
2224bc3d5698SJohn Baldwin	pushl	%esi
2225bc3d5698SJohn Baldwin	pushl	%edi
2226bc3d5698SJohn Baldwin	movl	20(%esp),%ebp
2227bc3d5698SJohn Baldwin.Lenter_emit:
2228bc3d5698SJohn Baldwin	movl	24(%esp),%edi
2229bc3d5698SJohn Baldwin	movl	(%ebp),%eax
2230bc3d5698SJohn Baldwin	movl	4(%ebp),%ebx
2231bc3d5698SJohn Baldwin	movl	8(%ebp),%ecx
2232bc3d5698SJohn Baldwin	movl	12(%ebp),%edx
2233bc3d5698SJohn Baldwin	movl	16(%ebp),%esi
2234bc3d5698SJohn Baldwin	addl	$5,%eax
2235bc3d5698SJohn Baldwin	adcl	$0,%ebx
2236bc3d5698SJohn Baldwin	adcl	$0,%ecx
2237bc3d5698SJohn Baldwin	adcl	$0,%edx
2238bc3d5698SJohn Baldwin	adcl	$0,%esi
2239bc3d5698SJohn Baldwin	shrl	$2,%esi
2240bc3d5698SJohn Baldwin	negl	%esi
2241bc3d5698SJohn Baldwin	andl	%esi,%eax
2242bc3d5698SJohn Baldwin	andl	%esi,%ebx
2243bc3d5698SJohn Baldwin	andl	%esi,%ecx
2244bc3d5698SJohn Baldwin	andl	%esi,%edx
2245bc3d5698SJohn Baldwin	movl	%eax,(%edi)
2246bc3d5698SJohn Baldwin	movl	%ebx,4(%edi)
2247bc3d5698SJohn Baldwin	movl	%ecx,8(%edi)
2248bc3d5698SJohn Baldwin	movl	%edx,12(%edi)
2249bc3d5698SJohn Baldwin	notl	%esi
2250bc3d5698SJohn Baldwin	movl	(%ebp),%eax
2251bc3d5698SJohn Baldwin	movl	4(%ebp),%ebx
2252bc3d5698SJohn Baldwin	movl	8(%ebp),%ecx
2253bc3d5698SJohn Baldwin	movl	12(%ebp),%edx
2254bc3d5698SJohn Baldwin	movl	28(%esp),%ebp
2255bc3d5698SJohn Baldwin	andl	%esi,%eax
2256bc3d5698SJohn Baldwin	andl	%esi,%ebx
2257bc3d5698SJohn Baldwin	andl	%esi,%ecx
2258bc3d5698SJohn Baldwin	andl	%esi,%edx
2259bc3d5698SJohn Baldwin	orl	(%edi),%eax
2260bc3d5698SJohn Baldwin	orl	4(%edi),%ebx
2261bc3d5698SJohn Baldwin	orl	8(%edi),%ecx
2262bc3d5698SJohn Baldwin	orl	12(%edi),%edx
2263bc3d5698SJohn Baldwin	addl	(%ebp),%eax
2264bc3d5698SJohn Baldwin	adcl	4(%ebp),%ebx
2265bc3d5698SJohn Baldwin	adcl	8(%ebp),%ecx
2266bc3d5698SJohn Baldwin	adcl	12(%ebp),%edx
2267bc3d5698SJohn Baldwin	movl	%eax,(%edi)
2268bc3d5698SJohn Baldwin	movl	%ebx,4(%edi)
2269bc3d5698SJohn Baldwin	movl	%ecx,8(%edi)
2270bc3d5698SJohn Baldwin	movl	%edx,12(%edi)
2271bc3d5698SJohn Baldwin	popl	%edi
2272bc3d5698SJohn Baldwin	popl	%esi
2273bc3d5698SJohn Baldwin	popl	%ebx
2274bc3d5698SJohn Baldwin	popl	%ebp
2275bc3d5698SJohn Baldwin	ret
2276bc3d5698SJohn Baldwin.size	poly1305_emit,.-.L_poly1305_emit_begin
2277bc3d5698SJohn Baldwin.align	32
2278bc3d5698SJohn Baldwin.type	_poly1305_init_sse2,@function
2279bc3d5698SJohn Baldwin.align	16
2280bc3d5698SJohn Baldwin_poly1305_init_sse2:
2281*c0855eaaSJohn Baldwin	#ifdef __CET__
2282*c0855eaaSJohn Baldwin
2283*c0855eaaSJohn Baldwin.byte	243,15,30,251
2284*c0855eaaSJohn Baldwin	#endif
2285*c0855eaaSJohn Baldwin
2286bc3d5698SJohn Baldwin	movdqu	24(%edi),%xmm4
2287bc3d5698SJohn Baldwin	leal	48(%edi),%edi
2288bc3d5698SJohn Baldwin	movl	%esp,%ebp
2289bc3d5698SJohn Baldwin	subl	$224,%esp
2290bc3d5698SJohn Baldwin	andl	$-16,%esp
2291bc3d5698SJohn Baldwin	movq	64(%ebx),%xmm7
2292bc3d5698SJohn Baldwin	movdqa	%xmm4,%xmm0
2293bc3d5698SJohn Baldwin	movdqa	%xmm4,%xmm1
2294bc3d5698SJohn Baldwin	movdqa	%xmm4,%xmm2
2295bc3d5698SJohn Baldwin	pand	%xmm7,%xmm0
2296bc3d5698SJohn Baldwin	psrlq	$26,%xmm1
2297bc3d5698SJohn Baldwin	psrldq	$6,%xmm2
2298bc3d5698SJohn Baldwin	pand	%xmm7,%xmm1
2299bc3d5698SJohn Baldwin	movdqa	%xmm2,%xmm3
2300bc3d5698SJohn Baldwin	psrlq	$4,%xmm2
2301bc3d5698SJohn Baldwin	psrlq	$30,%xmm3
2302bc3d5698SJohn Baldwin	pand	%xmm7,%xmm2
2303bc3d5698SJohn Baldwin	pand	%xmm7,%xmm3
2304bc3d5698SJohn Baldwin	psrldq	$13,%xmm4
2305bc3d5698SJohn Baldwin	leal	144(%esp),%edx
2306bc3d5698SJohn Baldwin	movl	$2,%ecx
2307bc3d5698SJohn Baldwin.L005square:
2308bc3d5698SJohn Baldwin	movdqa	%xmm0,(%esp)
2309bc3d5698SJohn Baldwin	movdqa	%xmm1,16(%esp)
2310bc3d5698SJohn Baldwin	movdqa	%xmm2,32(%esp)
2311bc3d5698SJohn Baldwin	movdqa	%xmm3,48(%esp)
2312bc3d5698SJohn Baldwin	movdqa	%xmm4,64(%esp)
2313bc3d5698SJohn Baldwin	movdqa	%xmm1,%xmm6
2314bc3d5698SJohn Baldwin	movdqa	%xmm2,%xmm5
2315bc3d5698SJohn Baldwin	pslld	$2,%xmm6
2316bc3d5698SJohn Baldwin	pslld	$2,%xmm5
2317bc3d5698SJohn Baldwin	paddd	%xmm1,%xmm6
2318bc3d5698SJohn Baldwin	paddd	%xmm2,%xmm5
2319bc3d5698SJohn Baldwin	movdqa	%xmm6,80(%esp)
2320bc3d5698SJohn Baldwin	movdqa	%xmm5,96(%esp)
2321bc3d5698SJohn Baldwin	movdqa	%xmm3,%xmm6
2322bc3d5698SJohn Baldwin	movdqa	%xmm4,%xmm5
2323bc3d5698SJohn Baldwin	pslld	$2,%xmm6
2324bc3d5698SJohn Baldwin	pslld	$2,%xmm5
2325bc3d5698SJohn Baldwin	paddd	%xmm3,%xmm6
2326bc3d5698SJohn Baldwin	paddd	%xmm4,%xmm5
2327bc3d5698SJohn Baldwin	movdqa	%xmm6,112(%esp)
2328bc3d5698SJohn Baldwin	movdqa	%xmm5,128(%esp)
2329bc3d5698SJohn Baldwin	pshufd	$68,%xmm0,%xmm6
2330bc3d5698SJohn Baldwin	movdqa	%xmm1,%xmm5
2331bc3d5698SJohn Baldwin	pshufd	$68,%xmm1,%xmm1
2332bc3d5698SJohn Baldwin	pshufd	$68,%xmm2,%xmm2
2333bc3d5698SJohn Baldwin	pshufd	$68,%xmm3,%xmm3
2334bc3d5698SJohn Baldwin	pshufd	$68,%xmm4,%xmm4
2335bc3d5698SJohn Baldwin	movdqa	%xmm6,(%edx)
2336bc3d5698SJohn Baldwin	movdqa	%xmm1,16(%edx)
2337bc3d5698SJohn Baldwin	movdqa	%xmm2,32(%edx)
2338bc3d5698SJohn Baldwin	movdqa	%xmm3,48(%edx)
2339bc3d5698SJohn Baldwin	movdqa	%xmm4,64(%edx)
2340bc3d5698SJohn Baldwin	pmuludq	%xmm0,%xmm4
2341bc3d5698SJohn Baldwin	pmuludq	%xmm0,%xmm3
2342bc3d5698SJohn Baldwin	pmuludq	%xmm0,%xmm2
2343bc3d5698SJohn Baldwin	pmuludq	%xmm0,%xmm1
2344bc3d5698SJohn Baldwin	pmuludq	%xmm6,%xmm0
2345bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm6
2346bc3d5698SJohn Baldwin	pmuludq	48(%edx),%xmm5
2347bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm7
2348bc3d5698SJohn Baldwin	pmuludq	32(%edx),%xmm6
2349bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm4
2350bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm5
2351bc3d5698SJohn Baldwin	pmuludq	16(%edx),%xmm7
2352bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm3
2353bc3d5698SJohn Baldwin	movdqa	80(%esp),%xmm6
2354bc3d5698SJohn Baldwin	pmuludq	(%edx),%xmm5
2355bc3d5698SJohn Baldwin	paddq	%xmm7,%xmm2
2356bc3d5698SJohn Baldwin	pmuludq	64(%edx),%xmm6
2357bc3d5698SJohn Baldwin	movdqa	32(%esp),%xmm7
2358bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm1
2359bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm5
2360bc3d5698SJohn Baldwin	pmuludq	32(%edx),%xmm7
2361bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm0
2362bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm6
2363bc3d5698SJohn Baldwin	pmuludq	16(%edx),%xmm5
2364bc3d5698SJohn Baldwin	paddq	%xmm7,%xmm4
2365bc3d5698SJohn Baldwin	movdqa	96(%esp),%xmm7
2366bc3d5698SJohn Baldwin	pmuludq	(%edx),%xmm6
2367bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm3
2368bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm5
2369bc3d5698SJohn Baldwin	pmuludq	64(%edx),%xmm7
2370bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm2
2371bc3d5698SJohn Baldwin	pmuludq	48(%edx),%xmm5
2372bc3d5698SJohn Baldwin	movdqa	48(%esp),%xmm6
2373bc3d5698SJohn Baldwin	paddq	%xmm7,%xmm1
2374bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm7
2375bc3d5698SJohn Baldwin	pmuludq	16(%edx),%xmm6
2376bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm0
2377bc3d5698SJohn Baldwin	movdqa	112(%esp),%xmm5
2378bc3d5698SJohn Baldwin	pmuludq	(%edx),%xmm7
2379bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm4
2380bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm6
2381bc3d5698SJohn Baldwin	pmuludq	64(%edx),%xmm5
2382bc3d5698SJohn Baldwin	paddq	%xmm7,%xmm3
2383bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm7
2384bc3d5698SJohn Baldwin	pmuludq	48(%edx),%xmm6
2385bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm2
2386bc3d5698SJohn Baldwin	pmuludq	32(%edx),%xmm7
2387bc3d5698SJohn Baldwin	movdqa	64(%esp),%xmm5
2388bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm1
2389bc3d5698SJohn Baldwin	movdqa	128(%esp),%xmm6
2390bc3d5698SJohn Baldwin	pmuludq	(%edx),%xmm5
2391bc3d5698SJohn Baldwin	paddq	%xmm7,%xmm0
2392bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm7
2393bc3d5698SJohn Baldwin	pmuludq	64(%edx),%xmm6
2394bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm4
2395bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm5
2396bc3d5698SJohn Baldwin	pmuludq	16(%edx),%xmm7
2397bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm3
2398bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm6
2399bc3d5698SJohn Baldwin	pmuludq	32(%edx),%xmm5
2400bc3d5698SJohn Baldwin	paddq	%xmm7,%xmm0
2401bc3d5698SJohn Baldwin	pmuludq	48(%edx),%xmm6
2402bc3d5698SJohn Baldwin	movdqa	64(%ebx),%xmm7
2403bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm1
2404bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm2
2405bc3d5698SJohn Baldwin	movdqa	%xmm3,%xmm5
2406bc3d5698SJohn Baldwin	pand	%xmm7,%xmm3
2407bc3d5698SJohn Baldwin	psrlq	$26,%xmm5
2408bc3d5698SJohn Baldwin	paddq	%xmm4,%xmm5
2409bc3d5698SJohn Baldwin	movdqa	%xmm0,%xmm6
2410bc3d5698SJohn Baldwin	pand	%xmm7,%xmm0
2411bc3d5698SJohn Baldwin	psrlq	$26,%xmm6
2412bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm4
2413bc3d5698SJohn Baldwin	paddq	%xmm1,%xmm6
2414bc3d5698SJohn Baldwin	psrlq	$26,%xmm5
2415bc3d5698SJohn Baldwin	pand	%xmm7,%xmm4
2416bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm1
2417bc3d5698SJohn Baldwin	psrlq	$26,%xmm6
2418bc3d5698SJohn Baldwin	paddd	%xmm5,%xmm0
2419bc3d5698SJohn Baldwin	psllq	$2,%xmm5
2420bc3d5698SJohn Baldwin	paddq	%xmm2,%xmm6
2421bc3d5698SJohn Baldwin	paddq	%xmm0,%xmm5
2422bc3d5698SJohn Baldwin	pand	%xmm7,%xmm1
2423bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm2
2424bc3d5698SJohn Baldwin	psrlq	$26,%xmm6
2425bc3d5698SJohn Baldwin	pand	%xmm7,%xmm2
2426bc3d5698SJohn Baldwin	paddd	%xmm3,%xmm6
2427bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm0
2428bc3d5698SJohn Baldwin	psrlq	$26,%xmm5
2429bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm3
2430bc3d5698SJohn Baldwin	psrlq	$26,%xmm6
2431bc3d5698SJohn Baldwin	pand	%xmm7,%xmm0
2432bc3d5698SJohn Baldwin	paddd	%xmm5,%xmm1
2433bc3d5698SJohn Baldwin	pand	%xmm7,%xmm3
2434bc3d5698SJohn Baldwin	paddd	%xmm6,%xmm4
2435bc3d5698SJohn Baldwin	decl	%ecx
2436bc3d5698SJohn Baldwin	jz	.L006square_break
2437bc3d5698SJohn Baldwin	punpcklqdq	(%esp),%xmm0
2438bc3d5698SJohn Baldwin	punpcklqdq	16(%esp),%xmm1
2439bc3d5698SJohn Baldwin	punpcklqdq	32(%esp),%xmm2
2440bc3d5698SJohn Baldwin	punpcklqdq	48(%esp),%xmm3
2441bc3d5698SJohn Baldwin	punpcklqdq	64(%esp),%xmm4
2442bc3d5698SJohn Baldwin	jmp	.L005square
2443bc3d5698SJohn Baldwin.L006square_break:
2444bc3d5698SJohn Baldwin	psllq	$32,%xmm0
2445bc3d5698SJohn Baldwin	psllq	$32,%xmm1
2446bc3d5698SJohn Baldwin	psllq	$32,%xmm2
2447bc3d5698SJohn Baldwin	psllq	$32,%xmm3
2448bc3d5698SJohn Baldwin	psllq	$32,%xmm4
2449bc3d5698SJohn Baldwin	por	(%esp),%xmm0
2450bc3d5698SJohn Baldwin	por	16(%esp),%xmm1
2451bc3d5698SJohn Baldwin	por	32(%esp),%xmm2
2452bc3d5698SJohn Baldwin	por	48(%esp),%xmm3
2453bc3d5698SJohn Baldwin	por	64(%esp),%xmm4
2454bc3d5698SJohn Baldwin	pshufd	$141,%xmm0,%xmm0
2455bc3d5698SJohn Baldwin	pshufd	$141,%xmm1,%xmm1
2456bc3d5698SJohn Baldwin	pshufd	$141,%xmm2,%xmm2
2457bc3d5698SJohn Baldwin	pshufd	$141,%xmm3,%xmm3
2458bc3d5698SJohn Baldwin	pshufd	$141,%xmm4,%xmm4
2459bc3d5698SJohn Baldwin	movdqu	%xmm0,(%edi)
2460bc3d5698SJohn Baldwin	movdqu	%xmm1,16(%edi)
2461bc3d5698SJohn Baldwin	movdqu	%xmm2,32(%edi)
2462bc3d5698SJohn Baldwin	movdqu	%xmm3,48(%edi)
2463bc3d5698SJohn Baldwin	movdqu	%xmm4,64(%edi)
2464bc3d5698SJohn Baldwin	movdqa	%xmm1,%xmm6
2465bc3d5698SJohn Baldwin	movdqa	%xmm2,%xmm5
2466bc3d5698SJohn Baldwin	pslld	$2,%xmm6
2467bc3d5698SJohn Baldwin	pslld	$2,%xmm5
2468bc3d5698SJohn Baldwin	paddd	%xmm1,%xmm6
2469bc3d5698SJohn Baldwin	paddd	%xmm2,%xmm5
2470bc3d5698SJohn Baldwin	movdqu	%xmm6,80(%edi)
2471bc3d5698SJohn Baldwin	movdqu	%xmm5,96(%edi)
2472bc3d5698SJohn Baldwin	movdqa	%xmm3,%xmm6
2473bc3d5698SJohn Baldwin	movdqa	%xmm4,%xmm5
2474bc3d5698SJohn Baldwin	pslld	$2,%xmm6
2475bc3d5698SJohn Baldwin	pslld	$2,%xmm5
2476bc3d5698SJohn Baldwin	paddd	%xmm3,%xmm6
2477bc3d5698SJohn Baldwin	paddd	%xmm4,%xmm5
2478bc3d5698SJohn Baldwin	movdqu	%xmm6,112(%edi)
2479bc3d5698SJohn Baldwin	movdqu	%xmm5,128(%edi)
2480bc3d5698SJohn Baldwin	movl	%ebp,%esp
2481bc3d5698SJohn Baldwin	leal	-48(%edi),%edi
2482bc3d5698SJohn Baldwin	ret
2483bc3d5698SJohn Baldwin.size	_poly1305_init_sse2,.-_poly1305_init_sse2
2484bc3d5698SJohn Baldwin.align	32
2485bc3d5698SJohn Baldwin.type	_poly1305_blocks_sse2,@function
2486bc3d5698SJohn Baldwin.align	16
2487bc3d5698SJohn Baldwin_poly1305_blocks_sse2:
2488*c0855eaaSJohn Baldwin	#ifdef __CET__
2489*c0855eaaSJohn Baldwin
2490*c0855eaaSJohn Baldwin.byte	243,15,30,251
2491*c0855eaaSJohn Baldwin	#endif
2492*c0855eaaSJohn Baldwin
2493bc3d5698SJohn Baldwin	pushl	%ebp
2494bc3d5698SJohn Baldwin	pushl	%ebx
2495bc3d5698SJohn Baldwin	pushl	%esi
2496bc3d5698SJohn Baldwin	pushl	%edi
2497bc3d5698SJohn Baldwin	movl	20(%esp),%edi
2498bc3d5698SJohn Baldwin	movl	24(%esp),%esi
2499bc3d5698SJohn Baldwin	movl	28(%esp),%ecx
2500bc3d5698SJohn Baldwin	movl	20(%edi),%eax
2501bc3d5698SJohn Baldwin	andl	$-16,%ecx
2502bc3d5698SJohn Baldwin	jz	.L007nodata
2503bc3d5698SJohn Baldwin	cmpl	$64,%ecx
2504bc3d5698SJohn Baldwin	jae	.L008enter_sse2
2505bc3d5698SJohn Baldwin	testl	%eax,%eax
2506bc3d5698SJohn Baldwin	jz	.Lenter_blocks
2507bc3d5698SJohn Baldwin.align	16
2508bc3d5698SJohn Baldwin.L008enter_sse2:
2509bc3d5698SJohn Baldwin	call	.L009pic_point
2510bc3d5698SJohn Baldwin.L009pic_point:
2511bc3d5698SJohn Baldwin	popl	%ebx
2512bc3d5698SJohn Baldwin	leal	.Lconst_sse2-.L009pic_point(%ebx),%ebx
2513bc3d5698SJohn Baldwin	testl	%eax,%eax
2514bc3d5698SJohn Baldwin	jnz	.L010base2_26
2515bc3d5698SJohn Baldwin	call	_poly1305_init_sse2
2516bc3d5698SJohn Baldwin	movl	(%edi),%eax
2517bc3d5698SJohn Baldwin	movl	3(%edi),%ecx
2518bc3d5698SJohn Baldwin	movl	6(%edi),%edx
2519bc3d5698SJohn Baldwin	movl	9(%edi),%esi
2520bc3d5698SJohn Baldwin	movl	13(%edi),%ebp
2521bc3d5698SJohn Baldwin	movl	$1,20(%edi)
2522bc3d5698SJohn Baldwin	shrl	$2,%ecx
2523bc3d5698SJohn Baldwin	andl	$67108863,%eax
2524bc3d5698SJohn Baldwin	shrl	$4,%edx
2525bc3d5698SJohn Baldwin	andl	$67108863,%ecx
2526bc3d5698SJohn Baldwin	shrl	$6,%esi
2527bc3d5698SJohn Baldwin	andl	$67108863,%edx
2528bc3d5698SJohn Baldwin	movd	%eax,%xmm0
2529bc3d5698SJohn Baldwin	movd	%ecx,%xmm1
2530bc3d5698SJohn Baldwin	movd	%edx,%xmm2
2531bc3d5698SJohn Baldwin	movd	%esi,%xmm3
2532bc3d5698SJohn Baldwin	movd	%ebp,%xmm4
2533bc3d5698SJohn Baldwin	movl	24(%esp),%esi
2534bc3d5698SJohn Baldwin	movl	28(%esp),%ecx
2535bc3d5698SJohn Baldwin	jmp	.L011base2_32
2536bc3d5698SJohn Baldwin.align	16
2537bc3d5698SJohn Baldwin.L010base2_26:
2538bc3d5698SJohn Baldwin	movd	(%edi),%xmm0
2539bc3d5698SJohn Baldwin	movd	4(%edi),%xmm1
2540bc3d5698SJohn Baldwin	movd	8(%edi),%xmm2
2541bc3d5698SJohn Baldwin	movd	12(%edi),%xmm3
2542bc3d5698SJohn Baldwin	movd	16(%edi),%xmm4
2543bc3d5698SJohn Baldwin	movdqa	64(%ebx),%xmm7
2544bc3d5698SJohn Baldwin.L011base2_32:
2545bc3d5698SJohn Baldwin	movl	32(%esp),%eax
2546bc3d5698SJohn Baldwin	movl	%esp,%ebp
2547bc3d5698SJohn Baldwin	subl	$528,%esp
2548bc3d5698SJohn Baldwin	andl	$-16,%esp
2549bc3d5698SJohn Baldwin	leal	48(%edi),%edi
2550bc3d5698SJohn Baldwin	shll	$24,%eax
2551bc3d5698SJohn Baldwin	testl	$31,%ecx
2552bc3d5698SJohn Baldwin	jz	.L012even
2553bc3d5698SJohn Baldwin	movdqu	(%esi),%xmm6
2554bc3d5698SJohn Baldwin	leal	16(%esi),%esi
2555bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm5
2556bc3d5698SJohn Baldwin	pand	%xmm7,%xmm6
2557bc3d5698SJohn Baldwin	paddd	%xmm6,%xmm0
2558bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm6
2559bc3d5698SJohn Baldwin	psrlq	$26,%xmm5
2560bc3d5698SJohn Baldwin	psrldq	$6,%xmm6
2561bc3d5698SJohn Baldwin	pand	%xmm7,%xmm5
2562bc3d5698SJohn Baldwin	paddd	%xmm5,%xmm1
2563bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm5
2564bc3d5698SJohn Baldwin	psrlq	$4,%xmm6
2565bc3d5698SJohn Baldwin	pand	%xmm7,%xmm6
2566bc3d5698SJohn Baldwin	paddd	%xmm6,%xmm2
2567bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm6
2568bc3d5698SJohn Baldwin	psrlq	$30,%xmm5
2569bc3d5698SJohn Baldwin	pand	%xmm7,%xmm5
2570bc3d5698SJohn Baldwin	psrldq	$7,%xmm6
2571bc3d5698SJohn Baldwin	paddd	%xmm5,%xmm3
2572bc3d5698SJohn Baldwin	movd	%eax,%xmm5
2573bc3d5698SJohn Baldwin	paddd	%xmm6,%xmm4
2574bc3d5698SJohn Baldwin	movd	12(%edi),%xmm6
2575bc3d5698SJohn Baldwin	paddd	%xmm5,%xmm4
2576bc3d5698SJohn Baldwin	movdqa	%xmm0,(%esp)
2577bc3d5698SJohn Baldwin	movdqa	%xmm1,16(%esp)
2578bc3d5698SJohn Baldwin	movdqa	%xmm2,32(%esp)
2579bc3d5698SJohn Baldwin	movdqa	%xmm3,48(%esp)
2580bc3d5698SJohn Baldwin	movdqa	%xmm4,64(%esp)
2581bc3d5698SJohn Baldwin	pmuludq	%xmm6,%xmm0
2582bc3d5698SJohn Baldwin	pmuludq	%xmm6,%xmm1
2583bc3d5698SJohn Baldwin	pmuludq	%xmm6,%xmm2
2584bc3d5698SJohn Baldwin	movd	28(%edi),%xmm5
2585bc3d5698SJohn Baldwin	pmuludq	%xmm6,%xmm3
2586bc3d5698SJohn Baldwin	pmuludq	%xmm6,%xmm4
2587bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm6
2588bc3d5698SJohn Baldwin	pmuludq	48(%esp),%xmm5
2589bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm7
2590bc3d5698SJohn Baldwin	pmuludq	32(%esp),%xmm6
2591bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm4
2592bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm5
2593bc3d5698SJohn Baldwin	pmuludq	16(%esp),%xmm7
2594bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm3
2595bc3d5698SJohn Baldwin	movd	92(%edi),%xmm6
2596bc3d5698SJohn Baldwin	pmuludq	(%esp),%xmm5
2597bc3d5698SJohn Baldwin	paddq	%xmm7,%xmm2
2598bc3d5698SJohn Baldwin	pmuludq	64(%esp),%xmm6
2599bc3d5698SJohn Baldwin	movd	44(%edi),%xmm7
2600bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm1
2601bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm5
2602bc3d5698SJohn Baldwin	pmuludq	32(%esp),%xmm7
2603bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm0
2604bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm6
2605bc3d5698SJohn Baldwin	pmuludq	16(%esp),%xmm5
2606bc3d5698SJohn Baldwin	paddq	%xmm7,%xmm4
2607bc3d5698SJohn Baldwin	movd	108(%edi),%xmm7
2608bc3d5698SJohn Baldwin	pmuludq	(%esp),%xmm6
2609bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm3
2610bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm5
2611bc3d5698SJohn Baldwin	pmuludq	64(%esp),%xmm7
2612bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm2
2613bc3d5698SJohn Baldwin	pmuludq	48(%esp),%xmm5
2614bc3d5698SJohn Baldwin	movd	60(%edi),%xmm6
2615bc3d5698SJohn Baldwin	paddq	%xmm7,%xmm1
2616bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm7
2617bc3d5698SJohn Baldwin	pmuludq	16(%esp),%xmm6
2618bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm0
2619bc3d5698SJohn Baldwin	movd	124(%edi),%xmm5
2620bc3d5698SJohn Baldwin	pmuludq	(%esp),%xmm7
2621bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm4
2622bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm6
2623bc3d5698SJohn Baldwin	pmuludq	64(%esp),%xmm5
2624bc3d5698SJohn Baldwin	paddq	%xmm7,%xmm3
2625bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm7
2626bc3d5698SJohn Baldwin	pmuludq	48(%esp),%xmm6
2627bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm2
2628bc3d5698SJohn Baldwin	pmuludq	32(%esp),%xmm7
2629bc3d5698SJohn Baldwin	movd	76(%edi),%xmm5
2630bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm1
2631bc3d5698SJohn Baldwin	movd	140(%edi),%xmm6
2632bc3d5698SJohn Baldwin	pmuludq	(%esp),%xmm5
2633bc3d5698SJohn Baldwin	paddq	%xmm7,%xmm0
2634bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm7
2635bc3d5698SJohn Baldwin	pmuludq	64(%esp),%xmm6
2636bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm4
2637bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm5
2638bc3d5698SJohn Baldwin	pmuludq	16(%esp),%xmm7
2639bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm3
2640bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm6
2641bc3d5698SJohn Baldwin	pmuludq	32(%esp),%xmm5
2642bc3d5698SJohn Baldwin	paddq	%xmm7,%xmm0
2643bc3d5698SJohn Baldwin	pmuludq	48(%esp),%xmm6
2644bc3d5698SJohn Baldwin	movdqa	64(%ebx),%xmm7
2645bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm1
2646bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm2
2647bc3d5698SJohn Baldwin	movdqa	%xmm3,%xmm5
2648bc3d5698SJohn Baldwin	pand	%xmm7,%xmm3
2649bc3d5698SJohn Baldwin	psrlq	$26,%xmm5
2650bc3d5698SJohn Baldwin	paddq	%xmm4,%xmm5
2651bc3d5698SJohn Baldwin	movdqa	%xmm0,%xmm6
2652bc3d5698SJohn Baldwin	pand	%xmm7,%xmm0
2653bc3d5698SJohn Baldwin	psrlq	$26,%xmm6
2654bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm4
2655bc3d5698SJohn Baldwin	paddq	%xmm1,%xmm6
2656bc3d5698SJohn Baldwin	psrlq	$26,%xmm5
2657bc3d5698SJohn Baldwin	pand	%xmm7,%xmm4
2658bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm1
2659bc3d5698SJohn Baldwin	psrlq	$26,%xmm6
2660bc3d5698SJohn Baldwin	paddd	%xmm5,%xmm0
2661bc3d5698SJohn Baldwin	psllq	$2,%xmm5
2662bc3d5698SJohn Baldwin	paddq	%xmm2,%xmm6
2663bc3d5698SJohn Baldwin	paddq	%xmm0,%xmm5
2664bc3d5698SJohn Baldwin	pand	%xmm7,%xmm1
2665bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm2
2666bc3d5698SJohn Baldwin	psrlq	$26,%xmm6
2667bc3d5698SJohn Baldwin	pand	%xmm7,%xmm2
2668bc3d5698SJohn Baldwin	paddd	%xmm3,%xmm6
2669bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm0
2670bc3d5698SJohn Baldwin	psrlq	$26,%xmm5
2671bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm3
2672bc3d5698SJohn Baldwin	psrlq	$26,%xmm6
2673bc3d5698SJohn Baldwin	pand	%xmm7,%xmm0
2674bc3d5698SJohn Baldwin	paddd	%xmm5,%xmm1
2675bc3d5698SJohn Baldwin	pand	%xmm7,%xmm3
2676bc3d5698SJohn Baldwin	paddd	%xmm6,%xmm4
2677bc3d5698SJohn Baldwin	subl	$16,%ecx
2678bc3d5698SJohn Baldwin	jz	.L013done
2679bc3d5698SJohn Baldwin.L012even:
2680bc3d5698SJohn Baldwin	leal	384(%esp),%edx
2681bc3d5698SJohn Baldwin	leal	-32(%esi),%eax
2682bc3d5698SJohn Baldwin	subl	$64,%ecx
2683bc3d5698SJohn Baldwin	movdqu	(%edi),%xmm5
2684bc3d5698SJohn Baldwin	pshufd	$68,%xmm5,%xmm6
2685bc3d5698SJohn Baldwin	cmovbl	%eax,%esi
2686bc3d5698SJohn Baldwin	pshufd	$238,%xmm5,%xmm5
2687bc3d5698SJohn Baldwin	movdqa	%xmm6,(%edx)
2688bc3d5698SJohn Baldwin	leal	160(%esp),%eax
2689bc3d5698SJohn Baldwin	movdqu	16(%edi),%xmm6
2690bc3d5698SJohn Baldwin	movdqa	%xmm5,-144(%edx)
2691bc3d5698SJohn Baldwin	pshufd	$68,%xmm6,%xmm5
2692bc3d5698SJohn Baldwin	pshufd	$238,%xmm6,%xmm6
2693bc3d5698SJohn Baldwin	movdqa	%xmm5,16(%edx)
2694bc3d5698SJohn Baldwin	movdqu	32(%edi),%xmm5
2695bc3d5698SJohn Baldwin	movdqa	%xmm6,-128(%edx)
2696bc3d5698SJohn Baldwin	pshufd	$68,%xmm5,%xmm6
2697bc3d5698SJohn Baldwin	pshufd	$238,%xmm5,%xmm5
2698bc3d5698SJohn Baldwin	movdqa	%xmm6,32(%edx)
2699bc3d5698SJohn Baldwin	movdqu	48(%edi),%xmm6
2700bc3d5698SJohn Baldwin	movdqa	%xmm5,-112(%edx)
2701bc3d5698SJohn Baldwin	pshufd	$68,%xmm6,%xmm5
2702bc3d5698SJohn Baldwin	pshufd	$238,%xmm6,%xmm6
2703bc3d5698SJohn Baldwin	movdqa	%xmm5,48(%edx)
2704bc3d5698SJohn Baldwin	movdqu	64(%edi),%xmm5
2705bc3d5698SJohn Baldwin	movdqa	%xmm6,-96(%edx)
2706bc3d5698SJohn Baldwin	pshufd	$68,%xmm5,%xmm6
2707bc3d5698SJohn Baldwin	pshufd	$238,%xmm5,%xmm5
2708bc3d5698SJohn Baldwin	movdqa	%xmm6,64(%edx)
2709bc3d5698SJohn Baldwin	movdqu	80(%edi),%xmm6
2710bc3d5698SJohn Baldwin	movdqa	%xmm5,-80(%edx)
2711bc3d5698SJohn Baldwin	pshufd	$68,%xmm6,%xmm5
2712bc3d5698SJohn Baldwin	pshufd	$238,%xmm6,%xmm6
2713bc3d5698SJohn Baldwin	movdqa	%xmm5,80(%edx)
2714bc3d5698SJohn Baldwin	movdqu	96(%edi),%xmm5
2715bc3d5698SJohn Baldwin	movdqa	%xmm6,-64(%edx)
2716bc3d5698SJohn Baldwin	pshufd	$68,%xmm5,%xmm6
2717bc3d5698SJohn Baldwin	pshufd	$238,%xmm5,%xmm5
2718bc3d5698SJohn Baldwin	movdqa	%xmm6,96(%edx)
2719bc3d5698SJohn Baldwin	movdqu	112(%edi),%xmm6
2720bc3d5698SJohn Baldwin	movdqa	%xmm5,-48(%edx)
2721bc3d5698SJohn Baldwin	pshufd	$68,%xmm6,%xmm5
2722bc3d5698SJohn Baldwin	pshufd	$238,%xmm6,%xmm6
2723bc3d5698SJohn Baldwin	movdqa	%xmm5,112(%edx)
2724bc3d5698SJohn Baldwin	movdqu	128(%edi),%xmm5
2725bc3d5698SJohn Baldwin	movdqa	%xmm6,-32(%edx)
2726bc3d5698SJohn Baldwin	pshufd	$68,%xmm5,%xmm6
2727bc3d5698SJohn Baldwin	pshufd	$238,%xmm5,%xmm5
2728bc3d5698SJohn Baldwin	movdqa	%xmm6,128(%edx)
2729bc3d5698SJohn Baldwin	movdqa	%xmm5,-16(%edx)
2730bc3d5698SJohn Baldwin	movdqu	32(%esi),%xmm5
2731bc3d5698SJohn Baldwin	movdqu	48(%esi),%xmm6
2732bc3d5698SJohn Baldwin	leal	32(%esi),%esi
2733bc3d5698SJohn Baldwin	movdqa	%xmm2,112(%esp)
2734bc3d5698SJohn Baldwin	movdqa	%xmm3,128(%esp)
2735bc3d5698SJohn Baldwin	movdqa	%xmm4,144(%esp)
2736bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm2
2737bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm3
2738bc3d5698SJohn Baldwin	psrldq	$6,%xmm2
2739bc3d5698SJohn Baldwin	psrldq	$6,%xmm3
2740bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm4
2741bc3d5698SJohn Baldwin	punpcklqdq	%xmm3,%xmm2
2742bc3d5698SJohn Baldwin	punpckhqdq	%xmm6,%xmm4
2743bc3d5698SJohn Baldwin	punpcklqdq	%xmm6,%xmm5
2744bc3d5698SJohn Baldwin	movdqa	%xmm2,%xmm3
2745bc3d5698SJohn Baldwin	psrlq	$4,%xmm2
2746bc3d5698SJohn Baldwin	psrlq	$30,%xmm3
2747bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm6
2748bc3d5698SJohn Baldwin	psrlq	$40,%xmm4
2749bc3d5698SJohn Baldwin	psrlq	$26,%xmm6
2750bc3d5698SJohn Baldwin	pand	%xmm7,%xmm5
2751bc3d5698SJohn Baldwin	pand	%xmm7,%xmm6
2752bc3d5698SJohn Baldwin	pand	%xmm7,%xmm2
2753bc3d5698SJohn Baldwin	pand	%xmm7,%xmm3
2754bc3d5698SJohn Baldwin	por	(%ebx),%xmm4
2755bc3d5698SJohn Baldwin	movdqa	%xmm0,80(%esp)
2756bc3d5698SJohn Baldwin	movdqa	%xmm1,96(%esp)
2757bc3d5698SJohn Baldwin	jbe	.L014skip_loop
2758bc3d5698SJohn Baldwin	jmp	.L015loop
2759bc3d5698SJohn Baldwin.align	32
2760bc3d5698SJohn Baldwin.L015loop:
2761bc3d5698SJohn Baldwin	movdqa	-144(%edx),%xmm7
2762bc3d5698SJohn Baldwin	movdqa	%xmm6,16(%eax)
2763bc3d5698SJohn Baldwin	movdqa	%xmm2,32(%eax)
2764bc3d5698SJohn Baldwin	movdqa	%xmm3,48(%eax)
2765bc3d5698SJohn Baldwin	movdqa	%xmm4,64(%eax)
2766bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm1
2767bc3d5698SJohn Baldwin	pmuludq	%xmm7,%xmm5
2768bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm0
2769bc3d5698SJohn Baldwin	pmuludq	%xmm7,%xmm6
2770bc3d5698SJohn Baldwin	pmuludq	%xmm7,%xmm2
2771bc3d5698SJohn Baldwin	pmuludq	%xmm7,%xmm3
2772bc3d5698SJohn Baldwin	pmuludq	%xmm7,%xmm4
2773bc3d5698SJohn Baldwin	pmuludq	-16(%edx),%xmm0
2774bc3d5698SJohn Baldwin	movdqa	%xmm1,%xmm7
2775bc3d5698SJohn Baldwin	pmuludq	-128(%edx),%xmm1
2776bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm0
2777bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm5
2778bc3d5698SJohn Baldwin	pmuludq	-112(%edx),%xmm7
2779bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm1
2780bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm6
2781bc3d5698SJohn Baldwin	pmuludq	-96(%edx),%xmm5
2782bc3d5698SJohn Baldwin	paddq	%xmm7,%xmm2
2783bc3d5698SJohn Baldwin	movdqa	16(%eax),%xmm7
2784bc3d5698SJohn Baldwin	pmuludq	-80(%edx),%xmm6
2785bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm3
2786bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm5
2787bc3d5698SJohn Baldwin	pmuludq	-128(%edx),%xmm7
2788bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm4
2789bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm6
2790bc3d5698SJohn Baldwin	pmuludq	-112(%edx),%xmm5
2791bc3d5698SJohn Baldwin	paddq	%xmm7,%xmm2
2792bc3d5698SJohn Baldwin	movdqa	32(%eax),%xmm7
2793bc3d5698SJohn Baldwin	pmuludq	-96(%edx),%xmm6
2794bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm3
2795bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm5
2796bc3d5698SJohn Baldwin	pmuludq	-32(%edx),%xmm7
2797bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm4
2798bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm6
2799bc3d5698SJohn Baldwin	pmuludq	-16(%edx),%xmm5
2800bc3d5698SJohn Baldwin	paddq	%xmm7,%xmm0
2801bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm7
2802bc3d5698SJohn Baldwin	pmuludq	-128(%edx),%xmm6
2803bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm1
2804bc3d5698SJohn Baldwin	movdqa	48(%eax),%xmm5
2805bc3d5698SJohn Baldwin	pmuludq	-112(%edx),%xmm7
2806bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm3
2807bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm6
2808bc3d5698SJohn Baldwin	pmuludq	-48(%edx),%xmm5
2809bc3d5698SJohn Baldwin	paddq	%xmm7,%xmm4
2810bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm7
2811bc3d5698SJohn Baldwin	pmuludq	-32(%edx),%xmm6
2812bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm0
2813bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm5
2814bc3d5698SJohn Baldwin	pmuludq	-16(%edx),%xmm7
2815bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm1
2816bc3d5698SJohn Baldwin	movdqa	64(%eax),%xmm6
2817bc3d5698SJohn Baldwin	pmuludq	-128(%edx),%xmm5
2818bc3d5698SJohn Baldwin	paddq	%xmm7,%xmm2
2819bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm7
2820bc3d5698SJohn Baldwin	pmuludq	-16(%edx),%xmm6
2821bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm4
2822bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm5
2823bc3d5698SJohn Baldwin	pmuludq	-64(%edx),%xmm7
2824bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm3
2825bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm6
2826bc3d5698SJohn Baldwin	pmuludq	-48(%edx),%xmm5
2827bc3d5698SJohn Baldwin	paddq	%xmm7,%xmm0
2828bc3d5698SJohn Baldwin	movdqa	64(%ebx),%xmm7
2829bc3d5698SJohn Baldwin	pmuludq	-32(%edx),%xmm6
2830bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm1
2831bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm2
2832bc3d5698SJohn Baldwin	movdqu	-32(%esi),%xmm5
2833bc3d5698SJohn Baldwin	movdqu	-16(%esi),%xmm6
2834bc3d5698SJohn Baldwin	leal	32(%esi),%esi
2835bc3d5698SJohn Baldwin	movdqa	%xmm2,32(%esp)
2836bc3d5698SJohn Baldwin	movdqa	%xmm3,48(%esp)
2837bc3d5698SJohn Baldwin	movdqa	%xmm4,64(%esp)
2838bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm2
2839bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm3
2840bc3d5698SJohn Baldwin	psrldq	$6,%xmm2
2841bc3d5698SJohn Baldwin	psrldq	$6,%xmm3
2842bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm4
2843bc3d5698SJohn Baldwin	punpcklqdq	%xmm3,%xmm2
2844bc3d5698SJohn Baldwin	punpckhqdq	%xmm6,%xmm4
2845bc3d5698SJohn Baldwin	punpcklqdq	%xmm6,%xmm5
2846bc3d5698SJohn Baldwin	movdqa	%xmm2,%xmm3
2847bc3d5698SJohn Baldwin	psrlq	$4,%xmm2
2848bc3d5698SJohn Baldwin	psrlq	$30,%xmm3
2849bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm6
2850bc3d5698SJohn Baldwin	psrlq	$40,%xmm4
2851bc3d5698SJohn Baldwin	psrlq	$26,%xmm6
2852bc3d5698SJohn Baldwin	pand	%xmm7,%xmm5
2853bc3d5698SJohn Baldwin	pand	%xmm7,%xmm6
2854bc3d5698SJohn Baldwin	pand	%xmm7,%xmm2
2855bc3d5698SJohn Baldwin	pand	%xmm7,%xmm3
2856bc3d5698SJohn Baldwin	por	(%ebx),%xmm4
2857bc3d5698SJohn Baldwin	leal	-32(%esi),%eax
2858bc3d5698SJohn Baldwin	subl	$64,%ecx
2859bc3d5698SJohn Baldwin	paddd	80(%esp),%xmm5
2860bc3d5698SJohn Baldwin	paddd	96(%esp),%xmm6
2861bc3d5698SJohn Baldwin	paddd	112(%esp),%xmm2
2862bc3d5698SJohn Baldwin	paddd	128(%esp),%xmm3
2863bc3d5698SJohn Baldwin	paddd	144(%esp),%xmm4
2864bc3d5698SJohn Baldwin	cmovbl	%eax,%esi
2865bc3d5698SJohn Baldwin	leal	160(%esp),%eax
2866bc3d5698SJohn Baldwin	movdqa	(%edx),%xmm7
2867bc3d5698SJohn Baldwin	movdqa	%xmm1,16(%esp)
2868bc3d5698SJohn Baldwin	movdqa	%xmm6,16(%eax)
2869bc3d5698SJohn Baldwin	movdqa	%xmm2,32(%eax)
2870bc3d5698SJohn Baldwin	movdqa	%xmm3,48(%eax)
2871bc3d5698SJohn Baldwin	movdqa	%xmm4,64(%eax)
2872bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm1
2873bc3d5698SJohn Baldwin	pmuludq	%xmm7,%xmm5
2874bc3d5698SJohn Baldwin	paddq	%xmm0,%xmm5
2875bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm0
2876bc3d5698SJohn Baldwin	pmuludq	%xmm7,%xmm6
2877bc3d5698SJohn Baldwin	pmuludq	%xmm7,%xmm2
2878bc3d5698SJohn Baldwin	pmuludq	%xmm7,%xmm3
2879bc3d5698SJohn Baldwin	pmuludq	%xmm7,%xmm4
2880bc3d5698SJohn Baldwin	paddq	16(%esp),%xmm6
2881bc3d5698SJohn Baldwin	paddq	32(%esp),%xmm2
2882bc3d5698SJohn Baldwin	paddq	48(%esp),%xmm3
2883bc3d5698SJohn Baldwin	paddq	64(%esp),%xmm4
2884bc3d5698SJohn Baldwin	pmuludq	128(%edx),%xmm0
2885bc3d5698SJohn Baldwin	movdqa	%xmm1,%xmm7
2886bc3d5698SJohn Baldwin	pmuludq	16(%edx),%xmm1
2887bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm0
2888bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm5
2889bc3d5698SJohn Baldwin	pmuludq	32(%edx),%xmm7
2890bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm1
2891bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm6
2892bc3d5698SJohn Baldwin	pmuludq	48(%edx),%xmm5
2893bc3d5698SJohn Baldwin	paddq	%xmm7,%xmm2
2894bc3d5698SJohn Baldwin	movdqa	16(%eax),%xmm7
2895bc3d5698SJohn Baldwin	pmuludq	64(%edx),%xmm6
2896bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm3
2897bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm5
2898bc3d5698SJohn Baldwin	pmuludq	16(%edx),%xmm7
2899bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm4
2900bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm6
2901bc3d5698SJohn Baldwin	pmuludq	32(%edx),%xmm5
2902bc3d5698SJohn Baldwin	paddq	%xmm7,%xmm2
2903bc3d5698SJohn Baldwin	movdqa	32(%eax),%xmm7
2904bc3d5698SJohn Baldwin	pmuludq	48(%edx),%xmm6
2905bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm3
2906bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm5
2907bc3d5698SJohn Baldwin	pmuludq	112(%edx),%xmm7
2908bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm4
2909bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm6
2910bc3d5698SJohn Baldwin	pmuludq	128(%edx),%xmm5
2911bc3d5698SJohn Baldwin	paddq	%xmm7,%xmm0
2912bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm7
2913bc3d5698SJohn Baldwin	pmuludq	16(%edx),%xmm6
2914bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm1
2915bc3d5698SJohn Baldwin	movdqa	48(%eax),%xmm5
2916bc3d5698SJohn Baldwin	pmuludq	32(%edx),%xmm7
2917bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm3
2918bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm6
2919bc3d5698SJohn Baldwin	pmuludq	96(%edx),%xmm5
2920bc3d5698SJohn Baldwin	paddq	%xmm7,%xmm4
2921bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm7
2922bc3d5698SJohn Baldwin	pmuludq	112(%edx),%xmm6
2923bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm0
2924bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm5
2925bc3d5698SJohn Baldwin	pmuludq	128(%edx),%xmm7
2926bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm1
2927bc3d5698SJohn Baldwin	movdqa	64(%eax),%xmm6
2928bc3d5698SJohn Baldwin	pmuludq	16(%edx),%xmm5
2929bc3d5698SJohn Baldwin	paddq	%xmm7,%xmm2
2930bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm7
2931bc3d5698SJohn Baldwin	pmuludq	128(%edx),%xmm6
2932bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm4
2933bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm5
2934bc3d5698SJohn Baldwin	pmuludq	80(%edx),%xmm7
2935bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm3
2936bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm6
2937bc3d5698SJohn Baldwin	pmuludq	96(%edx),%xmm5
2938bc3d5698SJohn Baldwin	paddq	%xmm7,%xmm0
2939bc3d5698SJohn Baldwin	movdqa	64(%ebx),%xmm7
2940bc3d5698SJohn Baldwin	pmuludq	112(%edx),%xmm6
2941bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm1
2942bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm2
2943bc3d5698SJohn Baldwin	movdqa	%xmm3,%xmm5
2944bc3d5698SJohn Baldwin	pand	%xmm7,%xmm3
2945bc3d5698SJohn Baldwin	psrlq	$26,%xmm5
2946bc3d5698SJohn Baldwin	paddq	%xmm4,%xmm5
2947bc3d5698SJohn Baldwin	movdqa	%xmm0,%xmm6
2948bc3d5698SJohn Baldwin	pand	%xmm7,%xmm0
2949bc3d5698SJohn Baldwin	psrlq	$26,%xmm6
2950bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm4
2951bc3d5698SJohn Baldwin	paddq	%xmm1,%xmm6
2952bc3d5698SJohn Baldwin	psrlq	$26,%xmm5
2953bc3d5698SJohn Baldwin	pand	%xmm7,%xmm4
2954bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm1
2955bc3d5698SJohn Baldwin	psrlq	$26,%xmm6
2956bc3d5698SJohn Baldwin	paddd	%xmm5,%xmm0
2957bc3d5698SJohn Baldwin	psllq	$2,%xmm5
2958bc3d5698SJohn Baldwin	paddq	%xmm2,%xmm6
2959bc3d5698SJohn Baldwin	paddq	%xmm0,%xmm5
2960bc3d5698SJohn Baldwin	pand	%xmm7,%xmm1
2961bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm2
2962bc3d5698SJohn Baldwin	psrlq	$26,%xmm6
2963bc3d5698SJohn Baldwin	pand	%xmm7,%xmm2
2964bc3d5698SJohn Baldwin	paddd	%xmm3,%xmm6
2965bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm0
2966bc3d5698SJohn Baldwin	psrlq	$26,%xmm5
2967bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm3
2968bc3d5698SJohn Baldwin	psrlq	$26,%xmm6
2969bc3d5698SJohn Baldwin	pand	%xmm7,%xmm0
2970bc3d5698SJohn Baldwin	paddd	%xmm5,%xmm1
2971bc3d5698SJohn Baldwin	pand	%xmm7,%xmm3
2972bc3d5698SJohn Baldwin	paddd	%xmm6,%xmm4
2973bc3d5698SJohn Baldwin	movdqu	32(%esi),%xmm5
2974bc3d5698SJohn Baldwin	movdqu	48(%esi),%xmm6
2975bc3d5698SJohn Baldwin	leal	32(%esi),%esi
2976bc3d5698SJohn Baldwin	movdqa	%xmm2,112(%esp)
2977bc3d5698SJohn Baldwin	movdqa	%xmm3,128(%esp)
2978bc3d5698SJohn Baldwin	movdqa	%xmm4,144(%esp)
2979bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm2
2980bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm3
2981bc3d5698SJohn Baldwin	psrldq	$6,%xmm2
2982bc3d5698SJohn Baldwin	psrldq	$6,%xmm3
2983bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm4
2984bc3d5698SJohn Baldwin	punpcklqdq	%xmm3,%xmm2
2985bc3d5698SJohn Baldwin	punpckhqdq	%xmm6,%xmm4
2986bc3d5698SJohn Baldwin	punpcklqdq	%xmm6,%xmm5
2987bc3d5698SJohn Baldwin	movdqa	%xmm2,%xmm3
2988bc3d5698SJohn Baldwin	psrlq	$4,%xmm2
2989bc3d5698SJohn Baldwin	psrlq	$30,%xmm3
2990bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm6
2991bc3d5698SJohn Baldwin	psrlq	$40,%xmm4
2992bc3d5698SJohn Baldwin	psrlq	$26,%xmm6
2993bc3d5698SJohn Baldwin	pand	%xmm7,%xmm5
2994bc3d5698SJohn Baldwin	pand	%xmm7,%xmm6
2995bc3d5698SJohn Baldwin	pand	%xmm7,%xmm2
2996bc3d5698SJohn Baldwin	pand	%xmm7,%xmm3
2997bc3d5698SJohn Baldwin	por	(%ebx),%xmm4
2998bc3d5698SJohn Baldwin	movdqa	%xmm0,80(%esp)
2999bc3d5698SJohn Baldwin	movdqa	%xmm1,96(%esp)
3000bc3d5698SJohn Baldwin	ja	.L015loop
3001bc3d5698SJohn Baldwin.L014skip_loop:
3002bc3d5698SJohn Baldwin	pshufd	$16,-144(%edx),%xmm7
3003bc3d5698SJohn Baldwin	addl	$32,%ecx
3004bc3d5698SJohn Baldwin	jnz	.L016long_tail
3005bc3d5698SJohn Baldwin	paddd	%xmm0,%xmm5
3006bc3d5698SJohn Baldwin	paddd	%xmm1,%xmm6
3007bc3d5698SJohn Baldwin	paddd	112(%esp),%xmm2
3008bc3d5698SJohn Baldwin	paddd	128(%esp),%xmm3
3009bc3d5698SJohn Baldwin	paddd	144(%esp),%xmm4
3010bc3d5698SJohn Baldwin.L016long_tail:
3011bc3d5698SJohn Baldwin	movdqa	%xmm5,(%eax)
3012bc3d5698SJohn Baldwin	movdqa	%xmm6,16(%eax)
3013bc3d5698SJohn Baldwin	movdqa	%xmm2,32(%eax)
3014bc3d5698SJohn Baldwin	movdqa	%xmm3,48(%eax)
3015bc3d5698SJohn Baldwin	movdqa	%xmm4,64(%eax)
3016bc3d5698SJohn Baldwin	pmuludq	%xmm7,%xmm5
3017bc3d5698SJohn Baldwin	pmuludq	%xmm7,%xmm6
3018bc3d5698SJohn Baldwin	pmuludq	%xmm7,%xmm2
3019bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm0
3020bc3d5698SJohn Baldwin	pshufd	$16,-128(%edx),%xmm5
3021bc3d5698SJohn Baldwin	pmuludq	%xmm7,%xmm3
3022bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm1
3023bc3d5698SJohn Baldwin	pmuludq	%xmm7,%xmm4
3024bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm6
3025bc3d5698SJohn Baldwin	pmuludq	48(%eax),%xmm5
3026bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm7
3027bc3d5698SJohn Baldwin	pmuludq	32(%eax),%xmm6
3028bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm4
3029bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm5
3030bc3d5698SJohn Baldwin	pmuludq	16(%eax),%xmm7
3031bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm3
3032bc3d5698SJohn Baldwin	pshufd	$16,-64(%edx),%xmm6
3033bc3d5698SJohn Baldwin	pmuludq	(%eax),%xmm5
3034bc3d5698SJohn Baldwin	paddq	%xmm7,%xmm2
3035bc3d5698SJohn Baldwin	pmuludq	64(%eax),%xmm6
3036bc3d5698SJohn Baldwin	pshufd	$16,-112(%edx),%xmm7
3037bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm1
3038bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm5
3039bc3d5698SJohn Baldwin	pmuludq	32(%eax),%xmm7
3040bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm0
3041bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm6
3042bc3d5698SJohn Baldwin	pmuludq	16(%eax),%xmm5
3043bc3d5698SJohn Baldwin	paddq	%xmm7,%xmm4
3044bc3d5698SJohn Baldwin	pshufd	$16,-48(%edx),%xmm7
3045bc3d5698SJohn Baldwin	pmuludq	(%eax),%xmm6
3046bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm3
3047bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm5
3048bc3d5698SJohn Baldwin	pmuludq	64(%eax),%xmm7
3049bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm2
3050bc3d5698SJohn Baldwin	pmuludq	48(%eax),%xmm5
3051bc3d5698SJohn Baldwin	pshufd	$16,-96(%edx),%xmm6
3052bc3d5698SJohn Baldwin	paddq	%xmm7,%xmm1
3053bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm7
3054bc3d5698SJohn Baldwin	pmuludq	16(%eax),%xmm6
3055bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm0
3056bc3d5698SJohn Baldwin	pshufd	$16,-32(%edx),%xmm5
3057bc3d5698SJohn Baldwin	pmuludq	(%eax),%xmm7
3058bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm4
3059bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm6
3060bc3d5698SJohn Baldwin	pmuludq	64(%eax),%xmm5
3061bc3d5698SJohn Baldwin	paddq	%xmm7,%xmm3
3062bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm7
3063bc3d5698SJohn Baldwin	pmuludq	48(%eax),%xmm6
3064bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm2
3065bc3d5698SJohn Baldwin	pmuludq	32(%eax),%xmm7
3066bc3d5698SJohn Baldwin	pshufd	$16,-80(%edx),%xmm5
3067bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm1
3068bc3d5698SJohn Baldwin	pshufd	$16,-16(%edx),%xmm6
3069bc3d5698SJohn Baldwin	pmuludq	(%eax),%xmm5
3070bc3d5698SJohn Baldwin	paddq	%xmm7,%xmm0
3071bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm7
3072bc3d5698SJohn Baldwin	pmuludq	64(%eax),%xmm6
3073bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm4
3074bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm5
3075bc3d5698SJohn Baldwin	pmuludq	16(%eax),%xmm7
3076bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm3
3077bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm6
3078bc3d5698SJohn Baldwin	pmuludq	32(%eax),%xmm5
3079bc3d5698SJohn Baldwin	paddq	%xmm7,%xmm0
3080bc3d5698SJohn Baldwin	pmuludq	48(%eax),%xmm6
3081bc3d5698SJohn Baldwin	movdqa	64(%ebx),%xmm7
3082bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm1
3083bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm2
3084bc3d5698SJohn Baldwin	jz	.L017short_tail
3085bc3d5698SJohn Baldwin	movdqu	-32(%esi),%xmm5
3086bc3d5698SJohn Baldwin	movdqu	-16(%esi),%xmm6
3087bc3d5698SJohn Baldwin	leal	32(%esi),%esi
3088bc3d5698SJohn Baldwin	movdqa	%xmm2,32(%esp)
3089bc3d5698SJohn Baldwin	movdqa	%xmm3,48(%esp)
3090bc3d5698SJohn Baldwin	movdqa	%xmm4,64(%esp)
3091bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm2
3092bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm3
3093bc3d5698SJohn Baldwin	psrldq	$6,%xmm2
3094bc3d5698SJohn Baldwin	psrldq	$6,%xmm3
3095bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm4
3096bc3d5698SJohn Baldwin	punpcklqdq	%xmm3,%xmm2
3097bc3d5698SJohn Baldwin	punpckhqdq	%xmm6,%xmm4
3098bc3d5698SJohn Baldwin	punpcklqdq	%xmm6,%xmm5
3099bc3d5698SJohn Baldwin	movdqa	%xmm2,%xmm3
3100bc3d5698SJohn Baldwin	psrlq	$4,%xmm2
3101bc3d5698SJohn Baldwin	psrlq	$30,%xmm3
3102bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm6
3103bc3d5698SJohn Baldwin	psrlq	$40,%xmm4
3104bc3d5698SJohn Baldwin	psrlq	$26,%xmm6
3105bc3d5698SJohn Baldwin	pand	%xmm7,%xmm5
3106bc3d5698SJohn Baldwin	pand	%xmm7,%xmm6
3107bc3d5698SJohn Baldwin	pand	%xmm7,%xmm2
3108bc3d5698SJohn Baldwin	pand	%xmm7,%xmm3
3109bc3d5698SJohn Baldwin	por	(%ebx),%xmm4
3110bc3d5698SJohn Baldwin	pshufd	$16,(%edx),%xmm7
3111bc3d5698SJohn Baldwin	paddd	80(%esp),%xmm5
3112bc3d5698SJohn Baldwin	paddd	96(%esp),%xmm6
3113bc3d5698SJohn Baldwin	paddd	112(%esp),%xmm2
3114bc3d5698SJohn Baldwin	paddd	128(%esp),%xmm3
3115bc3d5698SJohn Baldwin	paddd	144(%esp),%xmm4
3116bc3d5698SJohn Baldwin	movdqa	%xmm5,(%esp)
3117bc3d5698SJohn Baldwin	pmuludq	%xmm7,%xmm5
3118bc3d5698SJohn Baldwin	movdqa	%xmm6,16(%esp)
3119bc3d5698SJohn Baldwin	pmuludq	%xmm7,%xmm6
3120bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm0
3121bc3d5698SJohn Baldwin	movdqa	%xmm2,%xmm5
3122bc3d5698SJohn Baldwin	pmuludq	%xmm7,%xmm2
3123bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm1
3124bc3d5698SJohn Baldwin	movdqa	%xmm3,%xmm6
3125bc3d5698SJohn Baldwin	pmuludq	%xmm7,%xmm3
3126bc3d5698SJohn Baldwin	paddq	32(%esp),%xmm2
3127bc3d5698SJohn Baldwin	movdqa	%xmm5,32(%esp)
3128bc3d5698SJohn Baldwin	pshufd	$16,16(%edx),%xmm5
3129bc3d5698SJohn Baldwin	paddq	48(%esp),%xmm3
3130bc3d5698SJohn Baldwin	movdqa	%xmm6,48(%esp)
3131bc3d5698SJohn Baldwin	movdqa	%xmm4,%xmm6
3132bc3d5698SJohn Baldwin	pmuludq	%xmm7,%xmm4
3133bc3d5698SJohn Baldwin	paddq	64(%esp),%xmm4
3134bc3d5698SJohn Baldwin	movdqa	%xmm6,64(%esp)
3135bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm6
3136bc3d5698SJohn Baldwin	pmuludq	48(%esp),%xmm5
3137bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm7
3138bc3d5698SJohn Baldwin	pmuludq	32(%esp),%xmm6
3139bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm4
3140bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm5
3141bc3d5698SJohn Baldwin	pmuludq	16(%esp),%xmm7
3142bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm3
3143bc3d5698SJohn Baldwin	pshufd	$16,80(%edx),%xmm6
3144bc3d5698SJohn Baldwin	pmuludq	(%esp),%xmm5
3145bc3d5698SJohn Baldwin	paddq	%xmm7,%xmm2
3146bc3d5698SJohn Baldwin	pmuludq	64(%esp),%xmm6
3147bc3d5698SJohn Baldwin	pshufd	$16,32(%edx),%xmm7
3148bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm1
3149bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm5
3150bc3d5698SJohn Baldwin	pmuludq	32(%esp),%xmm7
3151bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm0
3152bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm6
3153bc3d5698SJohn Baldwin	pmuludq	16(%esp),%xmm5
3154bc3d5698SJohn Baldwin	paddq	%xmm7,%xmm4
3155bc3d5698SJohn Baldwin	pshufd	$16,96(%edx),%xmm7
3156bc3d5698SJohn Baldwin	pmuludq	(%esp),%xmm6
3157bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm3
3158bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm5
3159bc3d5698SJohn Baldwin	pmuludq	64(%esp),%xmm7
3160bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm2
3161bc3d5698SJohn Baldwin	pmuludq	48(%esp),%xmm5
3162bc3d5698SJohn Baldwin	pshufd	$16,48(%edx),%xmm6
3163bc3d5698SJohn Baldwin	paddq	%xmm7,%xmm1
3164bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm7
3165bc3d5698SJohn Baldwin	pmuludq	16(%esp),%xmm6
3166bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm0
3167bc3d5698SJohn Baldwin	pshufd	$16,112(%edx),%xmm5
3168bc3d5698SJohn Baldwin	pmuludq	(%esp),%xmm7
3169bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm4
3170bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm6
3171bc3d5698SJohn Baldwin	pmuludq	64(%esp),%xmm5
3172bc3d5698SJohn Baldwin	paddq	%xmm7,%xmm3
3173bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm7
3174bc3d5698SJohn Baldwin	pmuludq	48(%esp),%xmm6
3175bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm2
3176bc3d5698SJohn Baldwin	pmuludq	32(%esp),%xmm7
3177bc3d5698SJohn Baldwin	pshufd	$16,64(%edx),%xmm5
3178bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm1
3179bc3d5698SJohn Baldwin	pshufd	$16,128(%edx),%xmm6
3180bc3d5698SJohn Baldwin	pmuludq	(%esp),%xmm5
3181bc3d5698SJohn Baldwin	paddq	%xmm7,%xmm0
3182bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm7
3183bc3d5698SJohn Baldwin	pmuludq	64(%esp),%xmm6
3184bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm4
3185bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm5
3186bc3d5698SJohn Baldwin	pmuludq	16(%esp),%xmm7
3187bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm3
3188bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm6
3189bc3d5698SJohn Baldwin	pmuludq	32(%esp),%xmm5
3190bc3d5698SJohn Baldwin	paddq	%xmm7,%xmm0
3191bc3d5698SJohn Baldwin	pmuludq	48(%esp),%xmm6
3192bc3d5698SJohn Baldwin	movdqa	64(%ebx),%xmm7
3193bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm1
3194bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm2
3195bc3d5698SJohn Baldwin.L017short_tail:
3196bc3d5698SJohn Baldwin	pshufd	$78,%xmm4,%xmm6
3197bc3d5698SJohn Baldwin	pshufd	$78,%xmm3,%xmm5
3198bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm4
3199bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm3
3200bc3d5698SJohn Baldwin	pshufd	$78,%xmm0,%xmm6
3201bc3d5698SJohn Baldwin	pshufd	$78,%xmm1,%xmm5
3202bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm0
3203bc3d5698SJohn Baldwin	paddq	%xmm5,%xmm1
3204bc3d5698SJohn Baldwin	pshufd	$78,%xmm2,%xmm6
3205bc3d5698SJohn Baldwin	movdqa	%xmm3,%xmm5
3206bc3d5698SJohn Baldwin	pand	%xmm7,%xmm3
3207bc3d5698SJohn Baldwin	psrlq	$26,%xmm5
3208bc3d5698SJohn Baldwin	paddq	%xmm6,%xmm2
3209bc3d5698SJohn Baldwin	paddq	%xmm4,%xmm5
3210bc3d5698SJohn Baldwin	movdqa	%xmm0,%xmm6
3211bc3d5698SJohn Baldwin	pand	%xmm7,%xmm0
3212bc3d5698SJohn Baldwin	psrlq	$26,%xmm6
3213bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm4
3214bc3d5698SJohn Baldwin	paddq	%xmm1,%xmm6
3215bc3d5698SJohn Baldwin	psrlq	$26,%xmm5
3216bc3d5698SJohn Baldwin	pand	%xmm7,%xmm4
3217bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm1
3218bc3d5698SJohn Baldwin	psrlq	$26,%xmm6
3219bc3d5698SJohn Baldwin	paddd	%xmm5,%xmm0
3220bc3d5698SJohn Baldwin	psllq	$2,%xmm5
3221bc3d5698SJohn Baldwin	paddq	%xmm2,%xmm6
3222bc3d5698SJohn Baldwin	paddq	%xmm0,%xmm5
3223bc3d5698SJohn Baldwin	pand	%xmm7,%xmm1
3224bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm2
3225bc3d5698SJohn Baldwin	psrlq	$26,%xmm6
3226bc3d5698SJohn Baldwin	pand	%xmm7,%xmm2
3227bc3d5698SJohn Baldwin	paddd	%xmm3,%xmm6
3228bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm0
3229bc3d5698SJohn Baldwin	psrlq	$26,%xmm5
3230bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm3
3231bc3d5698SJohn Baldwin	psrlq	$26,%xmm6
3232bc3d5698SJohn Baldwin	pand	%xmm7,%xmm0
3233bc3d5698SJohn Baldwin	paddd	%xmm5,%xmm1
3234bc3d5698SJohn Baldwin	pand	%xmm7,%xmm3
3235bc3d5698SJohn Baldwin	paddd	%xmm6,%xmm4
3236bc3d5698SJohn Baldwin.L013done:
3237bc3d5698SJohn Baldwin	movd	%xmm0,-48(%edi)
3238bc3d5698SJohn Baldwin	movd	%xmm1,-44(%edi)
3239bc3d5698SJohn Baldwin	movd	%xmm2,-40(%edi)
3240bc3d5698SJohn Baldwin	movd	%xmm3,-36(%edi)
3241bc3d5698SJohn Baldwin	movd	%xmm4,-32(%edi)
3242bc3d5698SJohn Baldwin	movl	%ebp,%esp
3243bc3d5698SJohn Baldwin.L007nodata:
3244bc3d5698SJohn Baldwin	popl	%edi
3245bc3d5698SJohn Baldwin	popl	%esi
3246bc3d5698SJohn Baldwin	popl	%ebx
3247bc3d5698SJohn Baldwin	popl	%ebp
3248bc3d5698SJohn Baldwin	ret
3249bc3d5698SJohn Baldwin.size	_poly1305_blocks_sse2,.-_poly1305_blocks_sse2
3250bc3d5698SJohn Baldwin.align	32
3251bc3d5698SJohn Baldwin.type	_poly1305_emit_sse2,@function
3252bc3d5698SJohn Baldwin.align	16
3253bc3d5698SJohn Baldwin_poly1305_emit_sse2:
3254*c0855eaaSJohn Baldwin	#ifdef __CET__
3255*c0855eaaSJohn Baldwin
3256*c0855eaaSJohn Baldwin.byte	243,15,30,251
3257*c0855eaaSJohn Baldwin	#endif
3258*c0855eaaSJohn Baldwin
3259bc3d5698SJohn Baldwin	pushl	%ebp
3260bc3d5698SJohn Baldwin	pushl	%ebx
3261bc3d5698SJohn Baldwin	pushl	%esi
3262bc3d5698SJohn Baldwin	pushl	%edi
3263bc3d5698SJohn Baldwin	movl	20(%esp),%ebp
3264bc3d5698SJohn Baldwin	cmpl	$0,20(%ebp)
3265bc3d5698SJohn Baldwin	je	.Lenter_emit
3266bc3d5698SJohn Baldwin	movl	(%ebp),%eax
3267bc3d5698SJohn Baldwin	movl	4(%ebp),%edi
3268bc3d5698SJohn Baldwin	movl	8(%ebp),%ecx
3269bc3d5698SJohn Baldwin	movl	12(%ebp),%edx
3270bc3d5698SJohn Baldwin	movl	16(%ebp),%esi
3271bc3d5698SJohn Baldwin	movl	%edi,%ebx
3272bc3d5698SJohn Baldwin	shll	$26,%edi
3273bc3d5698SJohn Baldwin	shrl	$6,%ebx
3274bc3d5698SJohn Baldwin	addl	%edi,%eax
3275bc3d5698SJohn Baldwin	movl	%ecx,%edi
3276bc3d5698SJohn Baldwin	adcl	$0,%ebx
3277bc3d5698SJohn Baldwin	shll	$20,%edi
3278bc3d5698SJohn Baldwin	shrl	$12,%ecx
3279bc3d5698SJohn Baldwin	addl	%edi,%ebx
3280bc3d5698SJohn Baldwin	movl	%edx,%edi
3281bc3d5698SJohn Baldwin	adcl	$0,%ecx
3282bc3d5698SJohn Baldwin	shll	$14,%edi
3283bc3d5698SJohn Baldwin	shrl	$18,%edx
3284bc3d5698SJohn Baldwin	addl	%edi,%ecx
3285bc3d5698SJohn Baldwin	movl	%esi,%edi
3286bc3d5698SJohn Baldwin	adcl	$0,%edx
3287bc3d5698SJohn Baldwin	shll	$8,%edi
3288bc3d5698SJohn Baldwin	shrl	$24,%esi
3289bc3d5698SJohn Baldwin	addl	%edi,%edx
3290bc3d5698SJohn Baldwin	adcl	$0,%esi
3291bc3d5698SJohn Baldwin	movl	%esi,%edi
3292bc3d5698SJohn Baldwin	andl	$3,%esi
3293bc3d5698SJohn Baldwin	shrl	$2,%edi
3294bc3d5698SJohn Baldwin	leal	(%edi,%edi,4),%ebp
3295bc3d5698SJohn Baldwin	movl	24(%esp),%edi
3296bc3d5698SJohn Baldwin	addl	%ebp,%eax
3297bc3d5698SJohn Baldwin	movl	28(%esp),%ebp
3298bc3d5698SJohn Baldwin	adcl	$0,%ebx
3299bc3d5698SJohn Baldwin	adcl	$0,%ecx
3300bc3d5698SJohn Baldwin	adcl	$0,%edx
3301bc3d5698SJohn Baldwin	adcl	$0,%esi
3302bc3d5698SJohn Baldwin	movd	%eax,%xmm0
3303bc3d5698SJohn Baldwin	addl	$5,%eax
3304bc3d5698SJohn Baldwin	movd	%ebx,%xmm1
3305bc3d5698SJohn Baldwin	adcl	$0,%ebx
3306bc3d5698SJohn Baldwin	movd	%ecx,%xmm2
3307bc3d5698SJohn Baldwin	adcl	$0,%ecx
3308bc3d5698SJohn Baldwin	movd	%edx,%xmm3
3309bc3d5698SJohn Baldwin	adcl	$0,%edx
3310bc3d5698SJohn Baldwin	adcl	$0,%esi
3311bc3d5698SJohn Baldwin	shrl	$2,%esi
3312bc3d5698SJohn Baldwin	negl	%esi
3313bc3d5698SJohn Baldwin	andl	%esi,%eax
3314bc3d5698SJohn Baldwin	andl	%esi,%ebx
3315bc3d5698SJohn Baldwin	andl	%esi,%ecx
3316bc3d5698SJohn Baldwin	andl	%esi,%edx
3317bc3d5698SJohn Baldwin	movl	%eax,(%edi)
3318bc3d5698SJohn Baldwin	movd	%xmm0,%eax
3319bc3d5698SJohn Baldwin	movl	%ebx,4(%edi)
3320bc3d5698SJohn Baldwin	movd	%xmm1,%ebx
3321bc3d5698SJohn Baldwin	movl	%ecx,8(%edi)
3322bc3d5698SJohn Baldwin	movd	%xmm2,%ecx
3323bc3d5698SJohn Baldwin	movl	%edx,12(%edi)
3324bc3d5698SJohn Baldwin	movd	%xmm3,%edx
3325bc3d5698SJohn Baldwin	notl	%esi
3326bc3d5698SJohn Baldwin	andl	%esi,%eax
3327bc3d5698SJohn Baldwin	andl	%esi,%ebx
3328bc3d5698SJohn Baldwin	orl	(%edi),%eax
3329bc3d5698SJohn Baldwin	andl	%esi,%ecx
3330bc3d5698SJohn Baldwin	orl	4(%edi),%ebx
3331bc3d5698SJohn Baldwin	andl	%esi,%edx
3332bc3d5698SJohn Baldwin	orl	8(%edi),%ecx
3333bc3d5698SJohn Baldwin	orl	12(%edi),%edx
3334bc3d5698SJohn Baldwin	addl	(%ebp),%eax
3335bc3d5698SJohn Baldwin	adcl	4(%ebp),%ebx
3336bc3d5698SJohn Baldwin	movl	%eax,(%edi)
3337bc3d5698SJohn Baldwin	adcl	8(%ebp),%ecx
3338bc3d5698SJohn Baldwin	movl	%ebx,4(%edi)
3339bc3d5698SJohn Baldwin	adcl	12(%ebp),%edx
3340bc3d5698SJohn Baldwin	movl	%ecx,8(%edi)
3341bc3d5698SJohn Baldwin	movl	%edx,12(%edi)
3342bc3d5698SJohn Baldwin	popl	%edi
3343bc3d5698SJohn Baldwin	popl	%esi
3344bc3d5698SJohn Baldwin	popl	%ebx
3345bc3d5698SJohn Baldwin	popl	%ebp
3346bc3d5698SJohn Baldwin	ret
3347bc3d5698SJohn Baldwin.size	_poly1305_emit_sse2,.-_poly1305_emit_sse2
3348bc3d5698SJohn Baldwin.align	32
3349bc3d5698SJohn Baldwin.type	_poly1305_init_avx2,@function
3350bc3d5698SJohn Baldwin.align	16
3351bc3d5698SJohn Baldwin_poly1305_init_avx2:
3352*c0855eaaSJohn Baldwin	#ifdef __CET__
3353*c0855eaaSJohn Baldwin
3354*c0855eaaSJohn Baldwin.byte	243,15,30,251
3355*c0855eaaSJohn Baldwin	#endif
3356*c0855eaaSJohn Baldwin
3357bc3d5698SJohn Baldwin	vmovdqu	24(%edi),%xmm4
3358bc3d5698SJohn Baldwin	leal	48(%edi),%edi
3359bc3d5698SJohn Baldwin	movl	%esp,%ebp
3360bc3d5698SJohn Baldwin	subl	$224,%esp
3361bc3d5698SJohn Baldwin	andl	$-16,%esp
3362bc3d5698SJohn Baldwin	vmovdqa	64(%ebx),%xmm7
3363bc3d5698SJohn Baldwin	vpand	%xmm7,%xmm4,%xmm0
3364bc3d5698SJohn Baldwin	vpsrlq	$26,%xmm4,%xmm1
3365bc3d5698SJohn Baldwin	vpsrldq	$6,%xmm4,%xmm3
3366bc3d5698SJohn Baldwin	vpand	%xmm7,%xmm1,%xmm1
3367bc3d5698SJohn Baldwin	vpsrlq	$4,%xmm3,%xmm2
3368bc3d5698SJohn Baldwin	vpsrlq	$30,%xmm3,%xmm3
3369bc3d5698SJohn Baldwin	vpand	%xmm7,%xmm2,%xmm2
3370bc3d5698SJohn Baldwin	vpand	%xmm7,%xmm3,%xmm3
3371bc3d5698SJohn Baldwin	vpsrldq	$13,%xmm4,%xmm4
3372bc3d5698SJohn Baldwin	leal	144(%esp),%edx
3373bc3d5698SJohn Baldwin	movl	$2,%ecx
3374bc3d5698SJohn Baldwin.L018square:
3375bc3d5698SJohn Baldwin	vmovdqa	%xmm0,(%esp)
3376bc3d5698SJohn Baldwin	vmovdqa	%xmm1,16(%esp)
3377bc3d5698SJohn Baldwin	vmovdqa	%xmm2,32(%esp)
3378bc3d5698SJohn Baldwin	vmovdqa	%xmm3,48(%esp)
3379bc3d5698SJohn Baldwin	vmovdqa	%xmm4,64(%esp)
3380bc3d5698SJohn Baldwin	vpslld	$2,%xmm1,%xmm6
3381bc3d5698SJohn Baldwin	vpslld	$2,%xmm2,%xmm5
3382bc3d5698SJohn Baldwin	vpaddd	%xmm1,%xmm6,%xmm6
3383bc3d5698SJohn Baldwin	vpaddd	%xmm2,%xmm5,%xmm5
3384bc3d5698SJohn Baldwin	vmovdqa	%xmm6,80(%esp)
3385bc3d5698SJohn Baldwin	vmovdqa	%xmm5,96(%esp)
3386bc3d5698SJohn Baldwin	vpslld	$2,%xmm3,%xmm6
3387bc3d5698SJohn Baldwin	vpslld	$2,%xmm4,%xmm5
3388bc3d5698SJohn Baldwin	vpaddd	%xmm3,%xmm6,%xmm6
3389bc3d5698SJohn Baldwin	vpaddd	%xmm4,%xmm5,%xmm5
3390bc3d5698SJohn Baldwin	vmovdqa	%xmm6,112(%esp)
3391bc3d5698SJohn Baldwin	vmovdqa	%xmm5,128(%esp)
3392bc3d5698SJohn Baldwin	vpshufd	$68,%xmm0,%xmm5
3393bc3d5698SJohn Baldwin	vmovdqa	%xmm1,%xmm6
3394bc3d5698SJohn Baldwin	vpshufd	$68,%xmm1,%xmm1
3395bc3d5698SJohn Baldwin	vpshufd	$68,%xmm2,%xmm2
3396bc3d5698SJohn Baldwin	vpshufd	$68,%xmm3,%xmm3
3397bc3d5698SJohn Baldwin	vpshufd	$68,%xmm4,%xmm4
3398bc3d5698SJohn Baldwin	vmovdqa	%xmm5,(%edx)
3399bc3d5698SJohn Baldwin	vmovdqa	%xmm1,16(%edx)
3400bc3d5698SJohn Baldwin	vmovdqa	%xmm2,32(%edx)
3401bc3d5698SJohn Baldwin	vmovdqa	%xmm3,48(%edx)
3402bc3d5698SJohn Baldwin	vmovdqa	%xmm4,64(%edx)
3403bc3d5698SJohn Baldwin	vpmuludq	%xmm0,%xmm4,%xmm4
3404bc3d5698SJohn Baldwin	vpmuludq	%xmm0,%xmm3,%xmm3
3405bc3d5698SJohn Baldwin	vpmuludq	%xmm0,%xmm2,%xmm2
3406bc3d5698SJohn Baldwin	vpmuludq	%xmm0,%xmm1,%xmm1
3407bc3d5698SJohn Baldwin	vpmuludq	%xmm0,%xmm5,%xmm0
3408bc3d5698SJohn Baldwin	vpmuludq	48(%edx),%xmm6,%xmm5
3409bc3d5698SJohn Baldwin	vpaddq	%xmm5,%xmm4,%xmm4
3410bc3d5698SJohn Baldwin	vpmuludq	32(%edx),%xmm6,%xmm7
3411bc3d5698SJohn Baldwin	vpaddq	%xmm7,%xmm3,%xmm3
3412bc3d5698SJohn Baldwin	vpmuludq	16(%edx),%xmm6,%xmm5
3413bc3d5698SJohn Baldwin	vpaddq	%xmm5,%xmm2,%xmm2
3414bc3d5698SJohn Baldwin	vmovdqa	80(%esp),%xmm7
3415bc3d5698SJohn Baldwin	vpmuludq	(%edx),%xmm6,%xmm6
3416bc3d5698SJohn Baldwin	vpaddq	%xmm6,%xmm1,%xmm1
3417bc3d5698SJohn Baldwin	vmovdqa	32(%esp),%xmm5
3418bc3d5698SJohn Baldwin	vpmuludq	64(%edx),%xmm7,%xmm7
3419bc3d5698SJohn Baldwin	vpaddq	%xmm7,%xmm0,%xmm0
3420bc3d5698SJohn Baldwin	vpmuludq	32(%edx),%xmm5,%xmm6
3421bc3d5698SJohn Baldwin	vpaddq	%xmm6,%xmm4,%xmm4
3422bc3d5698SJohn Baldwin	vpmuludq	16(%edx),%xmm5,%xmm7
3423bc3d5698SJohn Baldwin	vpaddq	%xmm7,%xmm3,%xmm3
3424bc3d5698SJohn Baldwin	vmovdqa	96(%esp),%xmm6
3425bc3d5698SJohn Baldwin	vpmuludq	(%edx),%xmm5,%xmm5
3426bc3d5698SJohn Baldwin	vpaddq	%xmm5,%xmm2,%xmm2
3427bc3d5698SJohn Baldwin	vpmuludq	64(%edx),%xmm6,%xmm7
3428bc3d5698SJohn Baldwin	vpaddq	%xmm7,%xmm1,%xmm1
3429bc3d5698SJohn Baldwin	vmovdqa	48(%esp),%xmm5
3430bc3d5698SJohn Baldwin	vpmuludq	48(%edx),%xmm6,%xmm6
3431bc3d5698SJohn Baldwin	vpaddq	%xmm6,%xmm0,%xmm0
3432bc3d5698SJohn Baldwin	vpmuludq	16(%edx),%xmm5,%xmm7
3433bc3d5698SJohn Baldwin	vpaddq	%xmm7,%xmm4,%xmm4
3434bc3d5698SJohn Baldwin	vmovdqa	112(%esp),%xmm6
3435bc3d5698SJohn Baldwin	vpmuludq	(%edx),%xmm5,%xmm5
3436bc3d5698SJohn Baldwin	vpaddq	%xmm5,%xmm3,%xmm3
3437bc3d5698SJohn Baldwin	vpmuludq	64(%edx),%xmm6,%xmm7
3438bc3d5698SJohn Baldwin	vpaddq	%xmm7,%xmm2,%xmm2
3439bc3d5698SJohn Baldwin	vpmuludq	48(%edx),%xmm6,%xmm5
3440bc3d5698SJohn Baldwin	vpaddq	%xmm5,%xmm1,%xmm1
3441bc3d5698SJohn Baldwin	vmovdqa	64(%esp),%xmm7
3442bc3d5698SJohn Baldwin	vpmuludq	32(%edx),%xmm6,%xmm6
3443bc3d5698SJohn Baldwin	vpaddq	%xmm6,%xmm0,%xmm0
3444bc3d5698SJohn Baldwin	vmovdqa	128(%esp),%xmm5
3445bc3d5698SJohn Baldwin	vpmuludq	(%edx),%xmm7,%xmm7
3446bc3d5698SJohn Baldwin	vpaddq	%xmm7,%xmm4,%xmm4
3447bc3d5698SJohn Baldwin	vpmuludq	64(%edx),%xmm5,%xmm6
3448bc3d5698SJohn Baldwin	vpaddq	%xmm6,%xmm3,%xmm3
3449bc3d5698SJohn Baldwin	vpmuludq	16(%edx),%xmm5,%xmm7
3450bc3d5698SJohn Baldwin	vpaddq	%xmm7,%xmm0,%xmm0
3451bc3d5698SJohn Baldwin	vpmuludq	32(%edx),%xmm5,%xmm6
3452bc3d5698SJohn Baldwin	vpaddq	%xmm6,%xmm1,%xmm1
3453bc3d5698SJohn Baldwin	vmovdqa	64(%ebx),%xmm7
3454bc3d5698SJohn Baldwin	vpmuludq	48(%edx),%xmm5,%xmm5
3455bc3d5698SJohn Baldwin	vpaddq	%xmm5,%xmm2,%xmm2
3456bc3d5698SJohn Baldwin	vpsrlq	$26,%xmm3,%xmm5
3457bc3d5698SJohn Baldwin	vpand	%xmm7,%xmm3,%xmm3
3458bc3d5698SJohn Baldwin	vpsrlq	$26,%xmm0,%xmm6
3459bc3d5698SJohn Baldwin	vpand	%xmm7,%xmm0,%xmm0
3460bc3d5698SJohn Baldwin	vpaddq	%xmm5,%xmm4,%xmm4
3461bc3d5698SJohn Baldwin	vpaddq	%xmm6,%xmm1,%xmm1
3462bc3d5698SJohn Baldwin	vpsrlq	$26,%xmm4,%xmm5
3463bc3d5698SJohn Baldwin	vpand	%xmm7,%xmm4,%xmm4
3464bc3d5698SJohn Baldwin	vpsrlq	$26,%xmm1,%xmm6
3465bc3d5698SJohn Baldwin	vpand	%xmm7,%xmm1,%xmm1
3466bc3d5698SJohn Baldwin	vpaddq	%xmm6,%xmm2,%xmm2
3467bc3d5698SJohn Baldwin	vpaddd	%xmm5,%xmm0,%xmm0
3468bc3d5698SJohn Baldwin	vpsllq	$2,%xmm5,%xmm5
3469bc3d5698SJohn Baldwin	vpsrlq	$26,%xmm2,%xmm6
3470bc3d5698SJohn Baldwin	vpand	%xmm7,%xmm2,%xmm2
3471bc3d5698SJohn Baldwin	vpaddd	%xmm5,%xmm0,%xmm0
3472bc3d5698SJohn Baldwin	vpaddd	%xmm6,%xmm3,%xmm3
3473bc3d5698SJohn Baldwin	vpsrlq	$26,%xmm3,%xmm6
3474bc3d5698SJohn Baldwin	vpsrlq	$26,%xmm0,%xmm5
3475bc3d5698SJohn Baldwin	vpand	%xmm7,%xmm0,%xmm0
3476bc3d5698SJohn Baldwin	vpand	%xmm7,%xmm3,%xmm3
3477bc3d5698SJohn Baldwin	vpaddd	%xmm5,%xmm1,%xmm1
3478bc3d5698SJohn Baldwin	vpaddd	%xmm6,%xmm4,%xmm4
3479bc3d5698SJohn Baldwin	decl	%ecx
3480bc3d5698SJohn Baldwin	jz	.L019square_break
3481bc3d5698SJohn Baldwin	vpunpcklqdq	(%esp),%xmm0,%xmm0
3482bc3d5698SJohn Baldwin	vpunpcklqdq	16(%esp),%xmm1,%xmm1
3483bc3d5698SJohn Baldwin	vpunpcklqdq	32(%esp),%xmm2,%xmm2
3484bc3d5698SJohn Baldwin	vpunpcklqdq	48(%esp),%xmm3,%xmm3
3485bc3d5698SJohn Baldwin	vpunpcklqdq	64(%esp),%xmm4,%xmm4
3486bc3d5698SJohn Baldwin	jmp	.L018square
3487bc3d5698SJohn Baldwin.L019square_break:
3488bc3d5698SJohn Baldwin	vpsllq	$32,%xmm0,%xmm0
3489bc3d5698SJohn Baldwin	vpsllq	$32,%xmm1,%xmm1
3490bc3d5698SJohn Baldwin	vpsllq	$32,%xmm2,%xmm2
3491bc3d5698SJohn Baldwin	vpsllq	$32,%xmm3,%xmm3
3492bc3d5698SJohn Baldwin	vpsllq	$32,%xmm4,%xmm4
3493bc3d5698SJohn Baldwin	vpor	(%esp),%xmm0,%xmm0
3494bc3d5698SJohn Baldwin	vpor	16(%esp),%xmm1,%xmm1
3495bc3d5698SJohn Baldwin	vpor	32(%esp),%xmm2,%xmm2
3496bc3d5698SJohn Baldwin	vpor	48(%esp),%xmm3,%xmm3
3497bc3d5698SJohn Baldwin	vpor	64(%esp),%xmm4,%xmm4
3498bc3d5698SJohn Baldwin	vpshufd	$141,%xmm0,%xmm0
3499bc3d5698SJohn Baldwin	vpshufd	$141,%xmm1,%xmm1
3500bc3d5698SJohn Baldwin	vpshufd	$141,%xmm2,%xmm2
3501bc3d5698SJohn Baldwin	vpshufd	$141,%xmm3,%xmm3
3502bc3d5698SJohn Baldwin	vpshufd	$141,%xmm4,%xmm4
3503bc3d5698SJohn Baldwin	vmovdqu	%xmm0,(%edi)
3504bc3d5698SJohn Baldwin	vmovdqu	%xmm1,16(%edi)
3505bc3d5698SJohn Baldwin	vmovdqu	%xmm2,32(%edi)
3506bc3d5698SJohn Baldwin	vmovdqu	%xmm3,48(%edi)
3507bc3d5698SJohn Baldwin	vmovdqu	%xmm4,64(%edi)
3508bc3d5698SJohn Baldwin	vpslld	$2,%xmm1,%xmm6
3509bc3d5698SJohn Baldwin	vpslld	$2,%xmm2,%xmm5
3510bc3d5698SJohn Baldwin	vpaddd	%xmm1,%xmm6,%xmm6
3511bc3d5698SJohn Baldwin	vpaddd	%xmm2,%xmm5,%xmm5
3512bc3d5698SJohn Baldwin	vmovdqu	%xmm6,80(%edi)
3513bc3d5698SJohn Baldwin	vmovdqu	%xmm5,96(%edi)
3514bc3d5698SJohn Baldwin	vpslld	$2,%xmm3,%xmm6
3515bc3d5698SJohn Baldwin	vpslld	$2,%xmm4,%xmm5
3516bc3d5698SJohn Baldwin	vpaddd	%xmm3,%xmm6,%xmm6
3517bc3d5698SJohn Baldwin	vpaddd	%xmm4,%xmm5,%xmm5
3518bc3d5698SJohn Baldwin	vmovdqu	%xmm6,112(%edi)
3519bc3d5698SJohn Baldwin	vmovdqu	%xmm5,128(%edi)
3520bc3d5698SJohn Baldwin	movl	%ebp,%esp
3521bc3d5698SJohn Baldwin	leal	-48(%edi),%edi
3522bc3d5698SJohn Baldwin	ret
3523bc3d5698SJohn Baldwin.size	_poly1305_init_avx2,.-_poly1305_init_avx2
3524bc3d5698SJohn Baldwin.align	32
3525bc3d5698SJohn Baldwin.type	_poly1305_blocks_avx2,@function
3526bc3d5698SJohn Baldwin.align	16
3527bc3d5698SJohn Baldwin_poly1305_blocks_avx2:
3528*c0855eaaSJohn Baldwin	#ifdef __CET__
3529*c0855eaaSJohn Baldwin
3530*c0855eaaSJohn Baldwin.byte	243,15,30,251
3531*c0855eaaSJohn Baldwin	#endif
3532*c0855eaaSJohn Baldwin
3533bc3d5698SJohn Baldwin	pushl	%ebp
3534bc3d5698SJohn Baldwin	pushl	%ebx
3535bc3d5698SJohn Baldwin	pushl	%esi
3536bc3d5698SJohn Baldwin	pushl	%edi
3537bc3d5698SJohn Baldwin	movl	20(%esp),%edi
3538bc3d5698SJohn Baldwin	movl	24(%esp),%esi
3539bc3d5698SJohn Baldwin	movl	28(%esp),%ecx
3540bc3d5698SJohn Baldwin	movl	20(%edi),%eax
3541bc3d5698SJohn Baldwin	andl	$-16,%ecx
3542bc3d5698SJohn Baldwin	jz	.L020nodata
3543bc3d5698SJohn Baldwin	cmpl	$64,%ecx
3544bc3d5698SJohn Baldwin	jae	.L021enter_avx2
3545bc3d5698SJohn Baldwin	testl	%eax,%eax
3546bc3d5698SJohn Baldwin	jz	.Lenter_blocks
3547bc3d5698SJohn Baldwin.L021enter_avx2:
3548bc3d5698SJohn Baldwin	vzeroupper
3549bc3d5698SJohn Baldwin	call	.L022pic_point
3550bc3d5698SJohn Baldwin.L022pic_point:
3551bc3d5698SJohn Baldwin	popl	%ebx
3552bc3d5698SJohn Baldwin	leal	.Lconst_sse2-.L022pic_point(%ebx),%ebx
3553bc3d5698SJohn Baldwin	testl	%eax,%eax
3554bc3d5698SJohn Baldwin	jnz	.L023base2_26
3555bc3d5698SJohn Baldwin	call	_poly1305_init_avx2
3556bc3d5698SJohn Baldwin	movl	(%edi),%eax
3557bc3d5698SJohn Baldwin	movl	3(%edi),%ecx
3558bc3d5698SJohn Baldwin	movl	6(%edi),%edx
3559bc3d5698SJohn Baldwin	movl	9(%edi),%esi
3560bc3d5698SJohn Baldwin	movl	13(%edi),%ebp
3561bc3d5698SJohn Baldwin	shrl	$2,%ecx
3562bc3d5698SJohn Baldwin	andl	$67108863,%eax
3563bc3d5698SJohn Baldwin	shrl	$4,%edx
3564bc3d5698SJohn Baldwin	andl	$67108863,%ecx
3565bc3d5698SJohn Baldwin	shrl	$6,%esi
3566bc3d5698SJohn Baldwin	andl	$67108863,%edx
3567bc3d5698SJohn Baldwin	movl	%eax,(%edi)
3568bc3d5698SJohn Baldwin	movl	%ecx,4(%edi)
3569bc3d5698SJohn Baldwin	movl	%edx,8(%edi)
3570bc3d5698SJohn Baldwin	movl	%esi,12(%edi)
3571bc3d5698SJohn Baldwin	movl	%ebp,16(%edi)
3572bc3d5698SJohn Baldwin	movl	$1,20(%edi)
3573bc3d5698SJohn Baldwin	movl	24(%esp),%esi
3574bc3d5698SJohn Baldwin	movl	28(%esp),%ecx
3575bc3d5698SJohn Baldwin.L023base2_26:
3576bc3d5698SJohn Baldwin	movl	32(%esp),%eax
3577bc3d5698SJohn Baldwin	movl	%esp,%ebp
3578bc3d5698SJohn Baldwin	subl	$448,%esp
3579bc3d5698SJohn Baldwin	andl	$-512,%esp
3580bc3d5698SJohn Baldwin	vmovdqu	48(%edi),%xmm0
3581bc3d5698SJohn Baldwin	leal	288(%esp),%edx
3582bc3d5698SJohn Baldwin	vmovdqu	64(%edi),%xmm1
3583bc3d5698SJohn Baldwin	vmovdqu	80(%edi),%xmm2
3584bc3d5698SJohn Baldwin	vmovdqu	96(%edi),%xmm3
3585bc3d5698SJohn Baldwin	vmovdqu	112(%edi),%xmm4
3586bc3d5698SJohn Baldwin	leal	48(%edi),%edi
3587bc3d5698SJohn Baldwin	vpermq	$64,%ymm0,%ymm0
3588bc3d5698SJohn Baldwin	vpermq	$64,%ymm1,%ymm1
3589bc3d5698SJohn Baldwin	vpermq	$64,%ymm2,%ymm2
3590bc3d5698SJohn Baldwin	vpermq	$64,%ymm3,%ymm3
3591bc3d5698SJohn Baldwin	vpermq	$64,%ymm4,%ymm4
3592bc3d5698SJohn Baldwin	vpshufd	$200,%ymm0,%ymm0
3593bc3d5698SJohn Baldwin	vpshufd	$200,%ymm1,%ymm1
3594bc3d5698SJohn Baldwin	vpshufd	$200,%ymm2,%ymm2
3595bc3d5698SJohn Baldwin	vpshufd	$200,%ymm3,%ymm3
3596bc3d5698SJohn Baldwin	vpshufd	$200,%ymm4,%ymm4
3597bc3d5698SJohn Baldwin	vmovdqa	%ymm0,-128(%edx)
3598bc3d5698SJohn Baldwin	vmovdqu	80(%edi),%xmm0
3599bc3d5698SJohn Baldwin	vmovdqa	%ymm1,-96(%edx)
3600bc3d5698SJohn Baldwin	vmovdqu	96(%edi),%xmm1
3601bc3d5698SJohn Baldwin	vmovdqa	%ymm2,-64(%edx)
3602bc3d5698SJohn Baldwin	vmovdqu	112(%edi),%xmm2
3603bc3d5698SJohn Baldwin	vmovdqa	%ymm3,-32(%edx)
3604bc3d5698SJohn Baldwin	vmovdqu	128(%edi),%xmm3
3605bc3d5698SJohn Baldwin	vmovdqa	%ymm4,(%edx)
3606bc3d5698SJohn Baldwin	vpermq	$64,%ymm0,%ymm0
3607bc3d5698SJohn Baldwin	vpermq	$64,%ymm1,%ymm1
3608bc3d5698SJohn Baldwin	vpermq	$64,%ymm2,%ymm2
3609bc3d5698SJohn Baldwin	vpermq	$64,%ymm3,%ymm3
3610bc3d5698SJohn Baldwin	vpshufd	$200,%ymm0,%ymm0
3611bc3d5698SJohn Baldwin	vpshufd	$200,%ymm1,%ymm1
3612bc3d5698SJohn Baldwin	vpshufd	$200,%ymm2,%ymm2
3613bc3d5698SJohn Baldwin	vpshufd	$200,%ymm3,%ymm3
3614bc3d5698SJohn Baldwin	vmovdqa	%ymm0,32(%edx)
3615bc3d5698SJohn Baldwin	vmovd	-48(%edi),%xmm0
3616bc3d5698SJohn Baldwin	vmovdqa	%ymm1,64(%edx)
3617bc3d5698SJohn Baldwin	vmovd	-44(%edi),%xmm1
3618bc3d5698SJohn Baldwin	vmovdqa	%ymm2,96(%edx)
3619bc3d5698SJohn Baldwin	vmovd	-40(%edi),%xmm2
3620bc3d5698SJohn Baldwin	vmovdqa	%ymm3,128(%edx)
3621bc3d5698SJohn Baldwin	vmovd	-36(%edi),%xmm3
3622bc3d5698SJohn Baldwin	vmovd	-32(%edi),%xmm4
3623bc3d5698SJohn Baldwin	vmovdqa	64(%ebx),%ymm7
3624bc3d5698SJohn Baldwin	negl	%eax
3625bc3d5698SJohn Baldwin	testl	$63,%ecx
3626bc3d5698SJohn Baldwin	jz	.L024even
3627bc3d5698SJohn Baldwin	movl	%ecx,%edx
3628bc3d5698SJohn Baldwin	andl	$-64,%ecx
3629bc3d5698SJohn Baldwin	andl	$63,%edx
3630bc3d5698SJohn Baldwin	vmovdqu	(%esi),%xmm5
3631bc3d5698SJohn Baldwin	cmpl	$32,%edx
3632bc3d5698SJohn Baldwin	jb	.L025one
3633bc3d5698SJohn Baldwin	vmovdqu	16(%esi),%xmm6
3634bc3d5698SJohn Baldwin	je	.L026two
3635bc3d5698SJohn Baldwin	vinserti128	$1,32(%esi),%ymm5,%ymm5
3636bc3d5698SJohn Baldwin	leal	48(%esi),%esi
3637bc3d5698SJohn Baldwin	leal	8(%ebx),%ebx
3638bc3d5698SJohn Baldwin	leal	296(%esp),%edx
3639bc3d5698SJohn Baldwin	jmp	.L027tail
3640bc3d5698SJohn Baldwin.L026two:
3641bc3d5698SJohn Baldwin	leal	32(%esi),%esi
3642bc3d5698SJohn Baldwin	leal	16(%ebx),%ebx
3643bc3d5698SJohn Baldwin	leal	304(%esp),%edx
3644bc3d5698SJohn Baldwin	jmp	.L027tail
3645bc3d5698SJohn Baldwin.L025one:
3646bc3d5698SJohn Baldwin	leal	16(%esi),%esi
3647bc3d5698SJohn Baldwin	vpxor	%ymm6,%ymm6,%ymm6
3648bc3d5698SJohn Baldwin	leal	32(%ebx,%eax,8),%ebx
3649bc3d5698SJohn Baldwin	leal	312(%esp),%edx
3650bc3d5698SJohn Baldwin	jmp	.L027tail
3651bc3d5698SJohn Baldwin.align	32
3652bc3d5698SJohn Baldwin.L024even:
3653bc3d5698SJohn Baldwin	vmovdqu	(%esi),%xmm5
3654bc3d5698SJohn Baldwin	vmovdqu	16(%esi),%xmm6
3655bc3d5698SJohn Baldwin	vinserti128	$1,32(%esi),%ymm5,%ymm5
3656bc3d5698SJohn Baldwin	vinserti128	$1,48(%esi),%ymm6,%ymm6
3657bc3d5698SJohn Baldwin	leal	64(%esi),%esi
3658bc3d5698SJohn Baldwin	subl	$64,%ecx
3659bc3d5698SJohn Baldwin	jz	.L027tail
3660bc3d5698SJohn Baldwin.L028loop:
3661bc3d5698SJohn Baldwin	vmovdqa	%ymm2,64(%esp)
3662bc3d5698SJohn Baldwin	vpsrldq	$6,%ymm5,%ymm2
3663bc3d5698SJohn Baldwin	vmovdqa	%ymm0,(%esp)
3664bc3d5698SJohn Baldwin	vpsrldq	$6,%ymm6,%ymm0
3665bc3d5698SJohn Baldwin	vmovdqa	%ymm1,32(%esp)
3666bc3d5698SJohn Baldwin	vpunpckhqdq	%ymm6,%ymm5,%ymm1
3667bc3d5698SJohn Baldwin	vpunpcklqdq	%ymm6,%ymm5,%ymm5
3668bc3d5698SJohn Baldwin	vpunpcklqdq	%ymm0,%ymm2,%ymm2
3669bc3d5698SJohn Baldwin	vpsrlq	$30,%ymm2,%ymm0
3670bc3d5698SJohn Baldwin	vpsrlq	$4,%ymm2,%ymm2
3671bc3d5698SJohn Baldwin	vpsrlq	$26,%ymm5,%ymm6
3672bc3d5698SJohn Baldwin	vpsrlq	$40,%ymm1,%ymm1
3673bc3d5698SJohn Baldwin	vpand	%ymm7,%ymm2,%ymm2
3674bc3d5698SJohn Baldwin	vpand	%ymm7,%ymm5,%ymm5
3675bc3d5698SJohn Baldwin	vpand	%ymm7,%ymm6,%ymm6
3676bc3d5698SJohn Baldwin	vpand	%ymm7,%ymm0,%ymm0
3677bc3d5698SJohn Baldwin	vpor	(%ebx),%ymm1,%ymm1
3678bc3d5698SJohn Baldwin	vpaddq	64(%esp),%ymm2,%ymm2
3679bc3d5698SJohn Baldwin	vpaddq	(%esp),%ymm5,%ymm5
3680bc3d5698SJohn Baldwin	vpaddq	32(%esp),%ymm6,%ymm6
3681bc3d5698SJohn Baldwin	vpaddq	%ymm3,%ymm0,%ymm0
3682bc3d5698SJohn Baldwin	vpaddq	%ymm4,%ymm1,%ymm1
3683bc3d5698SJohn Baldwin	vpmuludq	-96(%edx),%ymm2,%ymm3
3684bc3d5698SJohn Baldwin	vmovdqa	%ymm6,32(%esp)
3685bc3d5698SJohn Baldwin	vpmuludq	-64(%edx),%ymm2,%ymm4
3686bc3d5698SJohn Baldwin	vmovdqa	%ymm0,96(%esp)
3687bc3d5698SJohn Baldwin	vpmuludq	96(%edx),%ymm2,%ymm0
3688bc3d5698SJohn Baldwin	vmovdqa	%ymm1,128(%esp)
3689bc3d5698SJohn Baldwin	vpmuludq	128(%edx),%ymm2,%ymm1
3690bc3d5698SJohn Baldwin	vpmuludq	-128(%edx),%ymm2,%ymm2
3691bc3d5698SJohn Baldwin	vpmuludq	-32(%edx),%ymm5,%ymm7
3692bc3d5698SJohn Baldwin	vpaddq	%ymm7,%ymm3,%ymm3
3693bc3d5698SJohn Baldwin	vpmuludq	(%edx),%ymm5,%ymm6
3694bc3d5698SJohn Baldwin	vpaddq	%ymm6,%ymm4,%ymm4
3695bc3d5698SJohn Baldwin	vpmuludq	-128(%edx),%ymm5,%ymm7
3696bc3d5698SJohn Baldwin	vpaddq	%ymm7,%ymm0,%ymm0
3697bc3d5698SJohn Baldwin	vmovdqa	32(%esp),%ymm7
3698bc3d5698SJohn Baldwin	vpmuludq	-96(%edx),%ymm5,%ymm6
3699bc3d5698SJohn Baldwin	vpaddq	%ymm6,%ymm1,%ymm1
3700bc3d5698SJohn Baldwin	vpmuludq	-64(%edx),%ymm5,%ymm5
3701bc3d5698SJohn Baldwin	vpaddq	%ymm5,%ymm2,%ymm2
3702bc3d5698SJohn Baldwin	vpmuludq	-64(%edx),%ymm7,%ymm6
3703bc3d5698SJohn Baldwin	vpaddq	%ymm6,%ymm3,%ymm3
3704bc3d5698SJohn Baldwin	vpmuludq	-32(%edx),%ymm7,%ymm5
3705bc3d5698SJohn Baldwin	vpaddq	%ymm5,%ymm4,%ymm4
3706bc3d5698SJohn Baldwin	vpmuludq	128(%edx),%ymm7,%ymm6
3707bc3d5698SJohn Baldwin	vpaddq	%ymm6,%ymm0,%ymm0
3708bc3d5698SJohn Baldwin	vmovdqa	96(%esp),%ymm6
3709bc3d5698SJohn Baldwin	vpmuludq	-128(%edx),%ymm7,%ymm5
3710bc3d5698SJohn Baldwin	vpaddq	%ymm5,%ymm1,%ymm1
3711bc3d5698SJohn Baldwin	vpmuludq	-96(%edx),%ymm7,%ymm7
3712bc3d5698SJohn Baldwin	vpaddq	%ymm7,%ymm2,%ymm2
3713bc3d5698SJohn Baldwin	vpmuludq	-128(%edx),%ymm6,%ymm5
3714bc3d5698SJohn Baldwin	vpaddq	%ymm5,%ymm3,%ymm3
3715bc3d5698SJohn Baldwin	vpmuludq	-96(%edx),%ymm6,%ymm7
3716bc3d5698SJohn Baldwin	vpaddq	%ymm7,%ymm4,%ymm4
3717bc3d5698SJohn Baldwin	vpmuludq	64(%edx),%ymm6,%ymm5
3718bc3d5698SJohn Baldwin	vpaddq	%ymm5,%ymm0,%ymm0
3719bc3d5698SJohn Baldwin	vmovdqa	128(%esp),%ymm5
3720bc3d5698SJohn Baldwin	vpmuludq	96(%edx),%ymm6,%ymm7
3721bc3d5698SJohn Baldwin	vpaddq	%ymm7,%ymm1,%ymm1
3722bc3d5698SJohn Baldwin	vpmuludq	128(%edx),%ymm6,%ymm6
3723bc3d5698SJohn Baldwin	vpaddq	%ymm6,%ymm2,%ymm2
3724bc3d5698SJohn Baldwin	vpmuludq	128(%edx),%ymm5,%ymm7
3725bc3d5698SJohn Baldwin	vpaddq	%ymm7,%ymm3,%ymm3
3726bc3d5698SJohn Baldwin	vpmuludq	32(%edx),%ymm5,%ymm6
3727bc3d5698SJohn Baldwin	vpaddq	%ymm6,%ymm0,%ymm0
3728bc3d5698SJohn Baldwin	vpmuludq	-128(%edx),%ymm5,%ymm7
3729bc3d5698SJohn Baldwin	vpaddq	%ymm7,%ymm4,%ymm4
3730bc3d5698SJohn Baldwin	vmovdqa	64(%ebx),%ymm7
3731bc3d5698SJohn Baldwin	vpmuludq	64(%edx),%ymm5,%ymm6
3732bc3d5698SJohn Baldwin	vpaddq	%ymm6,%ymm1,%ymm1
3733bc3d5698SJohn Baldwin	vpmuludq	96(%edx),%ymm5,%ymm5
3734bc3d5698SJohn Baldwin	vpaddq	%ymm5,%ymm2,%ymm2
3735bc3d5698SJohn Baldwin	vpsrlq	$26,%ymm3,%ymm5
3736bc3d5698SJohn Baldwin	vpand	%ymm7,%ymm3,%ymm3
3737bc3d5698SJohn Baldwin	vpsrlq	$26,%ymm0,%ymm6
3738bc3d5698SJohn Baldwin	vpand	%ymm7,%ymm0,%ymm0
3739bc3d5698SJohn Baldwin	vpaddq	%ymm5,%ymm4,%ymm4
3740bc3d5698SJohn Baldwin	vpaddq	%ymm6,%ymm1,%ymm1
3741bc3d5698SJohn Baldwin	vpsrlq	$26,%ymm4,%ymm5
3742bc3d5698SJohn Baldwin	vpand	%ymm7,%ymm4,%ymm4
3743bc3d5698SJohn Baldwin	vpsrlq	$26,%ymm1,%ymm6
3744bc3d5698SJohn Baldwin	vpand	%ymm7,%ymm1,%ymm1
3745bc3d5698SJohn Baldwin	vpaddq	%ymm6,%ymm2,%ymm2
3746bc3d5698SJohn Baldwin	vpaddq	%ymm5,%ymm0,%ymm0
3747bc3d5698SJohn Baldwin	vpsllq	$2,%ymm5,%ymm5
3748bc3d5698SJohn Baldwin	vpsrlq	$26,%ymm2,%ymm6
3749bc3d5698SJohn Baldwin	vpand	%ymm7,%ymm2,%ymm2
3750bc3d5698SJohn Baldwin	vpaddq	%ymm5,%ymm0,%ymm0
3751bc3d5698SJohn Baldwin	vpaddq	%ymm6,%ymm3,%ymm3
3752bc3d5698SJohn Baldwin	vpsrlq	$26,%ymm3,%ymm6
3753bc3d5698SJohn Baldwin	vpsrlq	$26,%ymm0,%ymm5
3754bc3d5698SJohn Baldwin	vpand	%ymm7,%ymm0,%ymm0
3755bc3d5698SJohn Baldwin	vpand	%ymm7,%ymm3,%ymm3
3756bc3d5698SJohn Baldwin	vpaddq	%ymm5,%ymm1,%ymm1
3757bc3d5698SJohn Baldwin	vpaddq	%ymm6,%ymm4,%ymm4
3758bc3d5698SJohn Baldwin	vmovdqu	(%esi),%xmm5
3759bc3d5698SJohn Baldwin	vmovdqu	16(%esi),%xmm6
3760bc3d5698SJohn Baldwin	vinserti128	$1,32(%esi),%ymm5,%ymm5
3761bc3d5698SJohn Baldwin	vinserti128	$1,48(%esi),%ymm6,%ymm6
3762bc3d5698SJohn Baldwin	leal	64(%esi),%esi
3763bc3d5698SJohn Baldwin	subl	$64,%ecx
3764bc3d5698SJohn Baldwin	jnz	.L028loop
3765bc3d5698SJohn Baldwin.L027tail:
3766bc3d5698SJohn Baldwin	vmovdqa	%ymm2,64(%esp)
3767bc3d5698SJohn Baldwin	vpsrldq	$6,%ymm5,%ymm2
3768bc3d5698SJohn Baldwin	vmovdqa	%ymm0,(%esp)
3769bc3d5698SJohn Baldwin	vpsrldq	$6,%ymm6,%ymm0
3770bc3d5698SJohn Baldwin	vmovdqa	%ymm1,32(%esp)
3771bc3d5698SJohn Baldwin	vpunpckhqdq	%ymm6,%ymm5,%ymm1
3772bc3d5698SJohn Baldwin	vpunpcklqdq	%ymm6,%ymm5,%ymm5
3773bc3d5698SJohn Baldwin	vpunpcklqdq	%ymm0,%ymm2,%ymm2
3774bc3d5698SJohn Baldwin	vpsrlq	$30,%ymm2,%ymm0
3775bc3d5698SJohn Baldwin	vpsrlq	$4,%ymm2,%ymm2
3776bc3d5698SJohn Baldwin	vpsrlq	$26,%ymm5,%ymm6
3777bc3d5698SJohn Baldwin	vpsrlq	$40,%ymm1,%ymm1
3778bc3d5698SJohn Baldwin	vpand	%ymm7,%ymm2,%ymm2
3779bc3d5698SJohn Baldwin	vpand	%ymm7,%ymm5,%ymm5
3780bc3d5698SJohn Baldwin	vpand	%ymm7,%ymm6,%ymm6
3781bc3d5698SJohn Baldwin	vpand	%ymm7,%ymm0,%ymm0
3782bc3d5698SJohn Baldwin	vpor	(%ebx),%ymm1,%ymm1
3783bc3d5698SJohn Baldwin	andl	$-64,%ebx
3784bc3d5698SJohn Baldwin	vpaddq	64(%esp),%ymm2,%ymm2
3785bc3d5698SJohn Baldwin	vpaddq	(%esp),%ymm5,%ymm5
3786bc3d5698SJohn Baldwin	vpaddq	32(%esp),%ymm6,%ymm6
3787bc3d5698SJohn Baldwin	vpaddq	%ymm3,%ymm0,%ymm0
3788bc3d5698SJohn Baldwin	vpaddq	%ymm4,%ymm1,%ymm1
3789bc3d5698SJohn Baldwin	vpmuludq	-92(%edx),%ymm2,%ymm3
3790bc3d5698SJohn Baldwin	vmovdqa	%ymm6,32(%esp)
3791bc3d5698SJohn Baldwin	vpmuludq	-60(%edx),%ymm2,%ymm4
3792bc3d5698SJohn Baldwin	vmovdqa	%ymm0,96(%esp)
3793bc3d5698SJohn Baldwin	vpmuludq	100(%edx),%ymm2,%ymm0
3794bc3d5698SJohn Baldwin	vmovdqa	%ymm1,128(%esp)
3795bc3d5698SJohn Baldwin	vpmuludq	132(%edx),%ymm2,%ymm1
3796bc3d5698SJohn Baldwin	vpmuludq	-124(%edx),%ymm2,%ymm2
3797bc3d5698SJohn Baldwin	vpmuludq	-28(%edx),%ymm5,%ymm7
3798bc3d5698SJohn Baldwin	vpaddq	%ymm7,%ymm3,%ymm3
3799bc3d5698SJohn Baldwin	vpmuludq	4(%edx),%ymm5,%ymm6
3800bc3d5698SJohn Baldwin	vpaddq	%ymm6,%ymm4,%ymm4
3801bc3d5698SJohn Baldwin	vpmuludq	-124(%edx),%ymm5,%ymm7
3802bc3d5698SJohn Baldwin	vpaddq	%ymm7,%ymm0,%ymm0
3803bc3d5698SJohn Baldwin	vmovdqa	32(%esp),%ymm7
3804bc3d5698SJohn Baldwin	vpmuludq	-92(%edx),%ymm5,%ymm6
3805bc3d5698SJohn Baldwin	vpaddq	%ymm6,%ymm1,%ymm1
3806bc3d5698SJohn Baldwin	vpmuludq	-60(%edx),%ymm5,%ymm5
3807bc3d5698SJohn Baldwin	vpaddq	%ymm5,%ymm2,%ymm2
3808bc3d5698SJohn Baldwin	vpmuludq	-60(%edx),%ymm7,%ymm6
3809bc3d5698SJohn Baldwin	vpaddq	%ymm6,%ymm3,%ymm3
3810bc3d5698SJohn Baldwin	vpmuludq	-28(%edx),%ymm7,%ymm5
3811bc3d5698SJohn Baldwin	vpaddq	%ymm5,%ymm4,%ymm4
3812bc3d5698SJohn Baldwin	vpmuludq	132(%edx),%ymm7,%ymm6
3813bc3d5698SJohn Baldwin	vpaddq	%ymm6,%ymm0,%ymm0
3814bc3d5698SJohn Baldwin	vmovdqa	96(%esp),%ymm6
3815bc3d5698SJohn Baldwin	vpmuludq	-124(%edx),%ymm7,%ymm5
3816bc3d5698SJohn Baldwin	vpaddq	%ymm5,%ymm1,%ymm1
3817bc3d5698SJohn Baldwin	vpmuludq	-92(%edx),%ymm7,%ymm7
3818bc3d5698SJohn Baldwin	vpaddq	%ymm7,%ymm2,%ymm2
3819bc3d5698SJohn Baldwin	vpmuludq	-124(%edx),%ymm6,%ymm5
3820bc3d5698SJohn Baldwin	vpaddq	%ymm5,%ymm3,%ymm3
3821bc3d5698SJohn Baldwin	vpmuludq	-92(%edx),%ymm6,%ymm7
3822bc3d5698SJohn Baldwin	vpaddq	%ymm7,%ymm4,%ymm4
3823bc3d5698SJohn Baldwin	vpmuludq	68(%edx),%ymm6,%ymm5
3824bc3d5698SJohn Baldwin	vpaddq	%ymm5,%ymm0,%ymm0
3825bc3d5698SJohn Baldwin	vmovdqa	128(%esp),%ymm5
3826bc3d5698SJohn Baldwin	vpmuludq	100(%edx),%ymm6,%ymm7
3827bc3d5698SJohn Baldwin	vpaddq	%ymm7,%ymm1,%ymm1
3828bc3d5698SJohn Baldwin	vpmuludq	132(%edx),%ymm6,%ymm6
3829bc3d5698SJohn Baldwin	vpaddq	%ymm6,%ymm2,%ymm2
3830bc3d5698SJohn Baldwin	vpmuludq	132(%edx),%ymm5,%ymm7
3831bc3d5698SJohn Baldwin	vpaddq	%ymm7,%ymm3,%ymm3
3832bc3d5698SJohn Baldwin	vpmuludq	36(%edx),%ymm5,%ymm6
3833bc3d5698SJohn Baldwin	vpaddq	%ymm6,%ymm0,%ymm0
3834bc3d5698SJohn Baldwin	vpmuludq	-124(%edx),%ymm5,%ymm7
3835bc3d5698SJohn Baldwin	vpaddq	%ymm7,%ymm4,%ymm4
3836bc3d5698SJohn Baldwin	vmovdqa	64(%ebx),%ymm7
3837bc3d5698SJohn Baldwin	vpmuludq	68(%edx),%ymm5,%ymm6
3838bc3d5698SJohn Baldwin	vpaddq	%ymm6,%ymm1,%ymm1
3839bc3d5698SJohn Baldwin	vpmuludq	100(%edx),%ymm5,%ymm5
3840bc3d5698SJohn Baldwin	vpaddq	%ymm5,%ymm2,%ymm2
3841bc3d5698SJohn Baldwin	vpsrldq	$8,%ymm4,%ymm5
3842bc3d5698SJohn Baldwin	vpsrldq	$8,%ymm3,%ymm6
3843bc3d5698SJohn Baldwin	vpaddq	%ymm5,%ymm4,%ymm4
3844bc3d5698SJohn Baldwin	vpsrldq	$8,%ymm0,%ymm5
3845bc3d5698SJohn Baldwin	vpaddq	%ymm6,%ymm3,%ymm3
3846bc3d5698SJohn Baldwin	vpsrldq	$8,%ymm1,%ymm6
3847bc3d5698SJohn Baldwin	vpaddq	%ymm5,%ymm0,%ymm0
3848bc3d5698SJohn Baldwin	vpsrldq	$8,%ymm2,%ymm5
3849bc3d5698SJohn Baldwin	vpaddq	%ymm6,%ymm1,%ymm1
3850bc3d5698SJohn Baldwin	vpermq	$2,%ymm4,%ymm6
3851bc3d5698SJohn Baldwin	vpaddq	%ymm5,%ymm2,%ymm2
3852bc3d5698SJohn Baldwin	vpermq	$2,%ymm3,%ymm5
3853bc3d5698SJohn Baldwin	vpaddq	%ymm6,%ymm4,%ymm4
3854bc3d5698SJohn Baldwin	vpermq	$2,%ymm0,%ymm6
3855bc3d5698SJohn Baldwin	vpaddq	%ymm5,%ymm3,%ymm3
3856bc3d5698SJohn Baldwin	vpermq	$2,%ymm1,%ymm5
3857bc3d5698SJohn Baldwin	vpaddq	%ymm6,%ymm0,%ymm0
3858bc3d5698SJohn Baldwin	vpermq	$2,%ymm2,%ymm6
3859bc3d5698SJohn Baldwin	vpaddq	%ymm5,%ymm1,%ymm1
3860bc3d5698SJohn Baldwin	vpaddq	%ymm6,%ymm2,%ymm2
3861bc3d5698SJohn Baldwin	vpsrlq	$26,%ymm3,%ymm5
3862bc3d5698SJohn Baldwin	vpand	%ymm7,%ymm3,%ymm3
3863bc3d5698SJohn Baldwin	vpsrlq	$26,%ymm0,%ymm6
3864bc3d5698SJohn Baldwin	vpand	%ymm7,%ymm0,%ymm0
3865bc3d5698SJohn Baldwin	vpaddq	%ymm5,%ymm4,%ymm4
3866bc3d5698SJohn Baldwin	vpaddq	%ymm6,%ymm1,%ymm1
3867bc3d5698SJohn Baldwin	vpsrlq	$26,%ymm4,%ymm5
3868bc3d5698SJohn Baldwin	vpand	%ymm7,%ymm4,%ymm4
3869bc3d5698SJohn Baldwin	vpsrlq	$26,%ymm1,%ymm6
3870bc3d5698SJohn Baldwin	vpand	%ymm7,%ymm1,%ymm1
3871bc3d5698SJohn Baldwin	vpaddq	%ymm6,%ymm2,%ymm2
3872bc3d5698SJohn Baldwin	vpaddq	%ymm5,%ymm0,%ymm0
3873bc3d5698SJohn Baldwin	vpsllq	$2,%ymm5,%ymm5
3874bc3d5698SJohn Baldwin	vpsrlq	$26,%ymm2,%ymm6
3875bc3d5698SJohn Baldwin	vpand	%ymm7,%ymm2,%ymm2
3876bc3d5698SJohn Baldwin	vpaddq	%ymm5,%ymm0,%ymm0
3877bc3d5698SJohn Baldwin	vpaddq	%ymm6,%ymm3,%ymm3
3878bc3d5698SJohn Baldwin	vpsrlq	$26,%ymm3,%ymm6
3879bc3d5698SJohn Baldwin	vpsrlq	$26,%ymm0,%ymm5
3880bc3d5698SJohn Baldwin	vpand	%ymm7,%ymm0,%ymm0
3881bc3d5698SJohn Baldwin	vpand	%ymm7,%ymm3,%ymm3
3882bc3d5698SJohn Baldwin	vpaddq	%ymm5,%ymm1,%ymm1
3883bc3d5698SJohn Baldwin	vpaddq	%ymm6,%ymm4,%ymm4
3884bc3d5698SJohn Baldwin	cmpl	$0,%ecx
3885bc3d5698SJohn Baldwin	je	.L029done
3886bc3d5698SJohn Baldwin	vpshufd	$252,%xmm0,%xmm0
3887bc3d5698SJohn Baldwin	leal	288(%esp),%edx
3888bc3d5698SJohn Baldwin	vpshufd	$252,%xmm1,%xmm1
3889bc3d5698SJohn Baldwin	vpshufd	$252,%xmm2,%xmm2
3890bc3d5698SJohn Baldwin	vpshufd	$252,%xmm3,%xmm3
3891bc3d5698SJohn Baldwin	vpshufd	$252,%xmm4,%xmm4
3892bc3d5698SJohn Baldwin	jmp	.L024even
3893bc3d5698SJohn Baldwin.align	16
3894bc3d5698SJohn Baldwin.L029done:
3895bc3d5698SJohn Baldwin	vmovd	%xmm0,-48(%edi)
3896bc3d5698SJohn Baldwin	vmovd	%xmm1,-44(%edi)
3897bc3d5698SJohn Baldwin	vmovd	%xmm2,-40(%edi)
3898bc3d5698SJohn Baldwin	vmovd	%xmm3,-36(%edi)
3899bc3d5698SJohn Baldwin	vmovd	%xmm4,-32(%edi)
3900bc3d5698SJohn Baldwin	vzeroupper
3901bc3d5698SJohn Baldwin	movl	%ebp,%esp
3902bc3d5698SJohn Baldwin.L020nodata:
3903bc3d5698SJohn Baldwin	popl	%edi
3904bc3d5698SJohn Baldwin	popl	%esi
3905bc3d5698SJohn Baldwin	popl	%ebx
3906bc3d5698SJohn Baldwin	popl	%ebp
3907bc3d5698SJohn Baldwin	ret
3908bc3d5698SJohn Baldwin.size	_poly1305_blocks_avx2,.-_poly1305_blocks_avx2
3909bc3d5698SJohn Baldwin.align	64
3910bc3d5698SJohn Baldwin.Lconst_sse2:
3911bc3d5698SJohn Baldwin.long	16777216,0,16777216,0,16777216,0,16777216,0
3912bc3d5698SJohn Baldwin.long	0,0,0,0,0,0,0,0
3913bc3d5698SJohn Baldwin.long	67108863,0,67108863,0,67108863,0,67108863,0
3914bc3d5698SJohn Baldwin.long	268435455,268435452,268435452,268435452
3915bc3d5698SJohn Baldwin.byte	80,111,108,121,49,51,48,53,32,102,111,114,32,120,56,54
3916bc3d5698SJohn Baldwin.byte	44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
3917bc3d5698SJohn Baldwin.byte	60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
3918bc3d5698SJohn Baldwin.byte	114,103,62,0
3919bc3d5698SJohn Baldwin.align	4
3920bc3d5698SJohn Baldwin.comm	OPENSSL_ia32cap_P,16,4
3921*c0855eaaSJohn Baldwin
3922*c0855eaaSJohn Baldwin	.section ".note.gnu.property", "a"
3923*c0855eaaSJohn Baldwin	.p2align 2
3924*c0855eaaSJohn Baldwin	.long 1f - 0f
3925*c0855eaaSJohn Baldwin	.long 4f - 1f
3926*c0855eaaSJohn Baldwin	.long 5
3927*c0855eaaSJohn Baldwin0:
3928*c0855eaaSJohn Baldwin	.asciz "GNU"
3929*c0855eaaSJohn Baldwin1:
3930*c0855eaaSJohn Baldwin	.p2align 2
3931*c0855eaaSJohn Baldwin	.long 0xc0000002
3932*c0855eaaSJohn Baldwin	.long 3f - 2f
3933*c0855eaaSJohn Baldwin2:
3934*c0855eaaSJohn Baldwin	.long 3
3935*c0855eaaSJohn Baldwin3:
3936*c0855eaaSJohn Baldwin	.p2align 2
3937*c0855eaaSJohn Baldwin4:
3938bc3d5698SJohn Baldwin#endif
3939