/* Do not modify. This file is auto-generated from poly1305-x86.pl. */
#ifdef PIC
.text
.align	64
.globl	poly1305_init
.type	poly1305_init,@function
.align	16
poly1305_init:
.L_poly1305_init_begin:
	#ifdef __CET__

.byte	243,15,30,251
	#endif

	pushl	%ebp
	pushl	%ebx
	pushl	%esi
	pushl	%edi
	movl	20(%esp),%edi
	movl	24(%esp),%esi
	movl	28(%esp),%ebp
	xorl	%eax,%eax
	movl	%eax,(%edi)
	movl	%eax,4(%edi)
	movl	%eax,8(%edi)
	movl	%eax,12(%edi)
	movl	%eax,16(%edi)
	movl	%eax,20(%edi)
	cmpl	$0,%esi
	je	.L000nokey
	call	.L001pic_point
.L001pic_point:
	popl	%ebx
	leal	poly1305_blocks-.L001pic_point(%ebx),%eax
	leal	poly1305_emit-.L001pic_point(%ebx),%edx
	leal	OPENSSL_ia32cap_P-.L001pic_point(%ebx),%edi
	movl	(%edi),%ecx
	andl	$83886080,%ecx
	cmpl	$83886080,%ecx
	jne	.L002no_sse2
	leal	_poly1305_blocks_sse2-.L001pic_point(%ebx),%eax
	leal	_poly1305_emit_sse2-.L001pic_point(%ebx),%edx
	movl	8(%edi),%ecx
	testl	$32,%ecx
	jz	.L002no_sse2
	leal	_poly1305_blocks_avx2-.L001pic_point(%ebx),%eax
.L002no_sse2:
	movl	20(%esp),%edi
	movl	%eax,(%ebp)
	movl	%edx,4(%ebp)
	movl	(%esi),%eax
	movl	4(%esi),%ebx
	movl	8(%esi),%ecx
	movl	12(%esi),%edx
	andl	$268435455,%eax
	andl	$268435452,%ebx
	andl	$268435452,%ecx
	andl	$268435452,%edx
	movl	%eax,24(%edi)
	movl	%ebx,28(%edi)
	movl	%ecx,32(%edi)
	movl	%edx,36(%edi)
	movl	$1,%eax
.L000nokey:
	popl	%edi
	popl	%esi
	popl	%ebx
	popl	%ebp
	ret
.size	poly1305_init,.-.L_poly1305_init_begin
.globl	poly1305_blocks
.type	poly1305_blocks,@function
.align	16
poly1305_blocks:
.L_poly1305_blocks_begin:
	#ifdef __CET__

.byte	243,15,30,251
	#endif

	pushl	%ebp
	pushl	%ebx
	pushl	%esi
	pushl	%edi
	movl	20(%esp),%edi
	movl	24(%esp),%esi
	movl	28(%esp),%ecx
.Lenter_blocks:
	andl	$-15,%ecx
	jz	.L003nodata
	subl	$64,%esp
	movl	24(%edi),%eax
	movl	28(%edi),%ebx
	leal	(%esi,%ecx,1),%ebp
	movl	32(%edi),%ecx
	movl	36(%edi),%edx
	movl	%ebp,92(%esp)
	movl	%esi,%ebp
	movl	%eax,36(%esp)
	movl	%ebx,%eax
	shrl	$2,%eax
	movl	%ebx,40(%esp)
	addl	%ebx,%eax
	movl	%ecx,%ebx
	shrl	$2,%ebx
	movl	%ecx,44(%esp)
	addl	%ecx,%ebx
	movl	%edx,%ecx
	shrl	$2,%ecx
	movl	%edx,48(%esp)
	addl	%edx,%ecx
	movl	%eax,52(%esp)
	movl	%ebx,56(%esp)
	movl	%ecx,60(%esp)
	movl	(%edi),%eax
	movl	4(%edi),%ebx
	movl	8(%edi),%ecx
	movl	12(%edi),%esi
	movl	16(%edi),%edi
	jmp	.L004loop
.align	32
.L004loop:
	addl	(%ebp),%eax
	adcl	4(%ebp),%ebx
	adcl	8(%ebp),%ecx
	adcl	12(%ebp),%esi
	leal	16(%ebp),%ebp
	adcl	96(%esp),%edi
	movl	%eax,(%esp)
	movl	%esi,12(%esp)
	mull	36(%esp)
	movl	%edi,16(%esp)
	movl	%eax,%edi
	movl	%ebx,%eax
	movl	%edx,%esi
	mull	60(%esp)
	addl	%eax,%edi
	movl	%ecx,%eax
	adcl	%edx,%esi
	mull	56(%esp)
	addl	%eax,%edi
	movl	12(%esp),%eax
	adcl	%edx,%esi
	mull	52(%esp)
	addl	%eax,%edi
	movl	(%esp),%eax
	adcl	%edx,%esi
	mull	40(%esp)
	movl	%edi,20(%esp)
	xorl	%edi,%edi
	addl	%eax,%esi
	movl	%ebx,%eax
	adcl	%edx,%edi
	mull	36(%esp)
	addl	%eax,%esi
	movl	%ecx,%eax
	adcl	%edx,%edi
	mull	60(%esp)
	addl	%eax,%esi
	movl	12(%esp),%eax
	adcl	%edx,%edi
	mull	56(%esp)
	addl	%eax,%esi
	movl	16(%esp),%eax
	adcl	%edx,%edi
	imull	52(%esp),%eax
	addl	%eax,%esi
	movl	(%esp),%eax
	adcl	$0,%edi
	mull	44(%esp)
	movl	%esi,24(%esp)
	xorl	%esi,%esi
	addl	%eax,%edi
	movl	%ebx,%eax
	adcl	%edx,%esi
	mull	40(%esp)
	addl	%eax,%edi
	movl	%ecx,%eax
	adcl	%edx,%esi
	mull	36(%esp)
	addl	%eax,%edi
	movl	12(%esp),%eax
	adcl	%edx,%esi
	mull	60(%esp)
	addl	%eax,%edi
	movl	16(%esp),%eax
	adcl	%edx,%esi
	imull	56(%esp),%eax
	addl	%eax,%edi
	movl	(%esp),%eax
	adcl	$0,%esi
	mull	48(%esp)
	movl	%edi,28(%esp)
	xorl	%edi,%edi
	addl	%eax,%esi
	movl	%ebx,%eax
	adcl	%edx,%edi
	mull	44(%esp)
	addl	%eax,%esi
	movl	%ecx,%eax
	adcl	%edx,%edi
	mull	40(%esp)
	addl	%eax,%esi
	movl	12(%esp),%eax
	adcl	%edx,%edi
	mull	36(%esp)
	addl	%eax,%esi
	movl	16(%esp),%ecx
	adcl	%edx,%edi
	movl	%ecx,%edx
	imull	60(%esp),%ecx
	addl	%ecx,%esi
	movl	20(%esp),%eax
	adcl	$0,%edi
	imull	36(%esp),%edx
	addl	%edi,%edx
	movl	24(%esp),%ebx
	movl	28(%esp),%ecx
	movl	%edx,%edi
	shrl	$2,%edx
	andl	$3,%edi
	leal	(%edx,%edx,4),%edx
	addl	%edx,%eax
	adcl	$0,%ebx
	adcl	$0,%ecx
	adcl	$0,%esi
	adcl	$0,%edi
	cmpl	92(%esp),%ebp
	jne	.L004loop
	movl	84(%esp),%edx
	addl	$64,%esp
	movl	%eax,(%edx)
	movl	%ebx,4(%edx)
	movl	%ecx,8(%edx)
	movl	%esi,12(%edx)
	movl	%edi,16(%edx)
.L003nodata:
	popl	%edi
	popl	%esi
	popl	%ebx
	popl	%ebp
	ret
.size	poly1305_blocks,.-.L_poly1305_blocks_begin
.globl	poly1305_emit
.type	poly1305_emit,@function
.align	16
poly1305_emit:
.L_poly1305_emit_begin:
	#ifdef __CET__

.byte	243,15,30,251
	#endif

	pushl	%ebp
	pushl	%ebx
	pushl	%esi
	pushl	%edi
	movl	20(%esp),%ebp
.Lenter_emit:
	movl	24(%esp),%edi
	movl	(%ebp),%eax
	movl	4(%ebp),%ebx
	movl	8(%ebp),%ecx
	movl	12(%ebp),%edx
	movl	16(%ebp),%esi
	addl	$5,%eax
	adcl	$0,%ebx
	adcl	$0,%ecx
	adcl	$0,%edx
	adcl	$0,%esi
	shrl	$2,%esi
	negl	%esi
	andl	%esi,%eax
	andl	%esi,%ebx
	andl	%esi,%ecx
	andl	%esi,%edx
	movl	%eax,(%edi)
	movl	%ebx,4(%edi)
	movl	%ecx,8(%edi)
	movl	%edx,12(%edi)
	notl	%esi
	movl	(%ebp),%eax
	movl	4(%ebp),%ebx
	movl	8(%ebp),%ecx
	movl	12(%ebp),%edx
	movl	28(%esp),%ebp
	andl	%esi,%eax
	andl	%esi,%ebx
	andl	%esi,%ecx
	andl	%esi,%edx
	orl	(%edi),%eax
	orl	4(%edi),%ebx
	orl	8(%edi),%ecx
	orl	12(%edi),%edx
	addl	(%ebp),%eax
	adcl	4(%ebp),%ebx
	adcl	8(%ebp),%ecx
	adcl	12(%ebp),%edx
	movl	%eax,(%edi)
	movl	%ebx,4(%edi)
	movl	%ecx,8(%edi)
	movl	%edx,12(%edi)
	popl	%edi
	popl	%esi
	popl	%ebx
	popl	%ebp
	ret
.size	poly1305_emit,.-.L_poly1305_emit_begin
.align	32
.type	_poly1305_init_sse2,@function
.align	16
_poly1305_init_sse2:
	#ifdef __CET__

.byte	243,15,30,251
	#endif

	movdqu	24(%edi),%xmm4
	leal	48(%edi),%edi
	movl	%esp,%ebp
	subl	$224,%esp
	andl	$-16,%esp
	movq	64(%ebx),%xmm7
	movdqa	%xmm4,%xmm0
	movdqa	%xmm4,%xmm1
	movdqa	%xmm4,%xmm2
	pand	%xmm7,%xmm0
	psrlq	$26,%xmm1
	psrldq	$6,%xmm2
	pand	%xmm7,%xmm1
	movdqa	%xmm2,%xmm3
	psrlq	$4,%xmm2
	psrlq	$30,%xmm3
	pand	%xmm7,%xmm2
	pand	%xmm7,%xmm3
	psrldq	$13,%xmm4
	leal	144(%esp),%edx
	movl	$2,%ecx
.L005square:
	movdqa	%xmm0,(%esp)
	movdqa	%xmm1,16(%esp)
	movdqa	%xmm2,32(%esp)
	movdqa	%xmm3,48(%esp)
	movdqa	%xmm4,64(%esp)
	movdqa	%xmm1,%xmm6
	movdqa	%xmm2,%xmm5
	pslld	$2,%xmm6
	pslld	$2,%xmm5
	paddd	%xmm1,%xmm6
	paddd	%xmm2,%xmm5
	movdqa	%xmm6,80(%esp)
	movdqa	%xmm5,96(%esp)
	movdqa	%xmm3,%xmm6
	movdqa	%xmm4,%xmm5
	pslld	$2,%xmm6
	pslld	$2,%xmm5
	paddd	%xmm3,%xmm6
	paddd	%xmm4,%xmm5
	movdqa	%xmm6,112(%esp)
	movdqa	%xmm5,128(%esp)
	pshufd	$68,%xmm0,%xmm6
	movdqa	%xmm1,%xmm5
	pshufd	$68,%xmm1,%xmm1
	pshufd	$68,%xmm2,%xmm2
	pshufd	$68,%xmm3,%xmm3
	pshufd	$68,%xmm4,%xmm4
	movdqa	%xmm6,(%edx)
	movdqa	%xmm1,16(%edx)
	movdqa	%xmm2,32(%edx)
	movdqa	%xmm3,48(%edx)
	movdqa	%xmm4,64(%edx)
	pmuludq	%xmm0,%xmm4
	pmuludq	%xmm0,%xmm3
	pmuludq	%xmm0,%xmm2
	pmuludq	%xmm0,%xmm1
	pmuludq	%xmm6,%xmm0
	movdqa	%xmm5,%xmm6
	pmuludq	48(%edx),%xmm5
	movdqa	%xmm6,%xmm7
	pmuludq	32(%edx),%xmm6
	paddq	%xmm5,%xmm4
	movdqa	%xmm7,%xmm5
	pmuludq	16(%edx),%xmm7
	paddq	%xmm6,%xmm3
	movdqa	80(%esp),%xmm6
	pmuludq	(%edx),%xmm5
	paddq	%xmm7,%xmm2
	pmuludq	64(%edx),%xmm6
	movdqa	32(%esp),%xmm7
	paddq	%xmm5,%xmm1
	movdqa	%xmm7,%xmm5
	pmuludq	32(%edx),%xmm7
	paddq	%xmm6,%xmm0
	movdqa	%xmm5,%xmm6
	pmuludq	16(%edx),%xmm5
	paddq	%xmm7,%xmm4
	movdqa	96(%esp),%xmm7
	pmuludq	(%edx),%xmm6
	paddq	%xmm5,%xmm3
	movdqa	%xmm7,%xmm5
	pmuludq	64(%edx),%xmm7
	paddq	%xmm6,%xmm2
	pmuludq	48(%edx),%xmm5
	movdqa	48(%esp),%xmm6
	paddq	%xmm7,%xmm1
	movdqa	%xmm6,%xmm7
	pmuludq	16(%edx),%xmm6
	paddq	%xmm5,%xmm0
	movdqa	112(%esp),%xmm5
	pmuludq	(%edx),%xmm7
	paddq	%xmm6,%xmm4
	movdqa	%xmm5,%xmm6
	pmuludq	64(%edx),%xmm5
	paddq	%xmm7,%xmm3
	movdqa	%xmm6,%xmm7
	pmuludq	48(%edx),%xmm6
	paddq	%xmm5,%xmm2
	pmuludq	32(%edx),%xmm7
	movdqa	64(%esp),%xmm5
	paddq	%xmm6,%xmm1
	movdqa	128(%esp),%xmm6
	pmuludq	(%edx),%xmm5
	paddq	%xmm7,%xmm0
	movdqa	%xmm6,%xmm7
	pmuludq	64(%edx),%xmm6
	paddq	%xmm5,%xmm4
	movdqa	%xmm7,%xmm5
	pmuludq	16(%edx),%xmm7
	paddq	%xmm6,%xmm3
	movdqa	%xmm5,%xmm6
	pmuludq	32(%edx),%xmm5
	paddq	%xmm7,%xmm0
	pmuludq	48(%edx),%xmm6
	movdqa	64(%ebx),%xmm7
	paddq	%xmm5,%xmm1
	paddq	%xmm6,%xmm2
	movdqa	%xmm3,%xmm5
	pand	%xmm7,%xmm3
	psrlq	$26,%xmm5
	paddq	%xmm4,%xmm5
	movdqa	%xmm0,%xmm6
	pand	%xmm7,%xmm0
	psrlq	$26,%xmm6
	movdqa	%xmm5,%xmm4
	paddq	%xmm1,%xmm6
	psrlq	$26,%xmm5
	pand	%xmm7,%xmm4
	movdqa	%xmm6,%xmm1
	psrlq	$26,%xmm6
	paddd	%xmm5,%xmm0
	psllq	$2,%xmm5
	paddq	%xmm2,%xmm6
	paddq	%xmm0,%xmm5
	pand	%xmm7,%xmm1
	movdqa	%xmm6,%xmm2
	psrlq	$26,%xmm6
	pand	%xmm7,%xmm2
	paddd	%xmm3,%xmm6
	movdqa	%xmm5,%xmm0
	psrlq	$26,%xmm5
	movdqa	%xmm6,%xmm3
	psrlq	$26,%xmm6
	pand	%xmm7,%xmm0
	paddd	%xmm5,%xmm1
	pand	%xmm7,%xmm3
	paddd	%xmm6,%xmm4
	decl	%ecx
	jz	.L006square_break
	punpcklqdq	(%esp),%xmm0
	punpcklqdq	16(%esp),%xmm1
	punpcklqdq	32(%esp),%xmm2
	punpcklqdq	48(%esp),%xmm3
	punpcklqdq	64(%esp),%xmm4
	jmp	.L005square
.L006square_break:
	psllq	$32,%xmm0
	psllq	$32,%xmm1
	psllq	$32,%xmm2
	psllq	$32,%xmm3
	psllq	$32,%xmm4
	por	(%esp),%xmm0
	por	16(%esp),%xmm1
	por	32(%esp),%xmm2
	por	48(%esp),%xmm3
	por	64(%esp),%xmm4
	pshufd	$141,%xmm0,%xmm0
	pshufd	$141,%xmm1,%xmm1
	pshufd	$141,%xmm2,%xmm2
	pshufd	$141,%xmm3,%xmm3
	pshufd	$141,%xmm4,%xmm4
	movdqu	%xmm0,(%edi)
	movdqu	%xmm1,16(%edi)
	movdqu	%xmm2,32(%edi)
	movdqu	%xmm3,48(%edi)
	movdqu	%xmm4,64(%edi)
	movdqa	%xmm1,%xmm6
	movdqa	%xmm2,%xmm5
	pslld	$2,%xmm6
	pslld	$2,%xmm5
	paddd	%xmm1,%xmm6
	paddd	%xmm2,%xmm5
	movdqu	%xmm6,80(%edi)
	movdqu	%xmm5,96(%edi)
	movdqa	%xmm3,%xmm6
	movdqa	%xmm4,%xmm5
	pslld	$2,%xmm6
	pslld	$2,%xmm5
	paddd	%xmm3,%xmm6
	paddd	%xmm4,%xmm5
	movdqu	%xmm6,112(%edi)
	movdqu	%xmm5,128(%edi)
	movl	%ebp,%esp
	leal	-48(%edi),%edi
	ret
.size	_poly1305_init_sse2,.-_poly1305_init_sse2
.align	32
.type	_poly1305_blocks_sse2,@function
.align	16
_poly1305_blocks_sse2:
	#ifdef __CET__

.byte	243,15,30,251
	#endif

	pushl	%ebp
	pushl	%ebx
	pushl	%esi
	pushl	%edi
	movl	20(%esp),%edi
	movl	24(%esp),%esi
	movl	28(%esp),%ecx
	movl	20(%edi),%eax
	andl	$-16,%ecx
	jz	.L007nodata
	cmpl	$64,%ecx
	jae	.L008enter_sse2
	testl	%eax,%eax
	jz	.Lenter_blocks
.align	16
.L008enter_sse2:
	call	.L009pic_point
.L009pic_point:
	popl	%ebx
	leal	.Lconst_sse2-.L009pic_point(%ebx),%ebx
	testl	%eax,%eax
	jnz	.L010base2_26
	call	_poly1305_init_sse2
	movl	(%edi),%eax
	movl	3(%edi),%ecx
	movl	6(%edi),%edx
	movl	9(%edi),%esi
	movl	13(%edi),%ebp
	movl	$1,20(%edi)
	shrl	$2,%ecx
	andl	$67108863,%eax
	shrl	$4,%edx
	andl	$67108863,%ecx
	shrl	$6,%esi
	andl	$67108863,%edx
	movd	%eax,%xmm0
	movd	%ecx,%xmm1
	movd	%edx,%xmm2
	movd	%esi,%xmm3
	movd	%ebp,%xmm4
	movl	24(%esp),%esi
	movl	28(%esp),%ecx
	jmp	.L011base2_32
.align	16
.L010base2_26:
	movd	(%edi),%xmm0
	movd	4(%edi),%xmm1
	movd	8(%edi),%xmm2
	movd	12(%edi),%xmm3
	movd	16(%edi),%xmm4
	movdqa	64(%ebx),%xmm7
.L011base2_32:
	movl	32(%esp),%eax
	movl	%esp,%ebp
	subl	$528,%esp
	andl	$-16,%esp
	leal	48(%edi),%edi
	shll	$24,%eax
	testl	$31,%ecx
	jz	.L012even
	movdqu	(%esi),%xmm6
	leal	16(%esi),%esi
	movdqa	%xmm6,%xmm5
	pand	%xmm7,%xmm6
	paddd	%xmm6,%xmm0
	movdqa	%xmm5,%xmm6
	psrlq	$26,%xmm5
	psrldq	$6,%xmm6
	pand	%xmm7,%xmm5
	paddd	%xmm5,%xmm1
	movdqa	%xmm6,%xmm5
	psrlq	$4,%xmm6
	pand	%xmm7,%xmm6
	paddd	%xmm6,%xmm2
	movdqa	%xmm5,%xmm6
	psrlq	$30,%xmm5
	pand	%xmm7,%xmm5
	psrldq	$7,%xmm6
	paddd	%xmm5,%xmm3
	movd	%eax,%xmm5
	paddd	%xmm6,%xmm4
	movd	12(%edi),%xmm6
	paddd	%xmm5,%xmm4
	movdqa	%xmm0,(%esp)
	movdqa	%xmm1,16(%esp)
	movdqa	%xmm2,32(%esp)
	movdqa	%xmm3,48(%esp)
	movdqa	%xmm4,64(%esp)
	pmuludq	%xmm6,%xmm0
	pmuludq	%xmm6,%xmm1
	pmuludq	%xmm6,%xmm2
	movd	28(%edi),%xmm5
	pmuludq	%xmm6,%xmm3
	pmuludq	%xmm6,%xmm4
	movdqa	%xmm5,%xmm6
	pmuludq	48(%esp),%xmm5
	movdqa	%xmm6,%xmm7
	pmuludq	32(%esp),%xmm6
	paddq	%xmm5,%xmm4
	movdqa	%xmm7,%xmm5
	pmuludq	16(%esp),%xmm7
	paddq	%xmm6,%xmm3
	movd	92(%edi),%xmm6
	pmuludq	(%esp),%xmm5
	paddq	%xmm7,%xmm2
	pmuludq	64(%esp),%xmm6
	movd	44(%edi),%xmm7
	paddq	%xmm5,%xmm1
	movdqa	%xmm7,%xmm5
	pmuludq	32(%esp),%xmm7
	paddq	%xmm6,%xmm0
	movdqa	%xmm5,%xmm6
	pmuludq	16(%esp),%xmm5
	paddq	%xmm7,%xmm4
	movd	108(%edi),%xmm7
	pmuludq	(%esp),%xmm6
	paddq	%xmm5,%xmm3
	movdqa	%xmm7,%xmm5
	pmuludq	64(%esp),%xmm7
	paddq	%xmm6,%xmm2
	pmuludq	48(%esp),%xmm5
	movd	60(%edi),%xmm6
	paddq	%xmm7,%xmm1
	movdqa	%xmm6,%xmm7
	pmuludq	16(%esp),%xmm6
	paddq	%xmm5,%xmm0
	movd	124(%edi),%xmm5
	pmuludq	(%esp),%xmm7
	paddq	%xmm6,%xmm4
	movdqa	%xmm5,%xmm6
	pmuludq	64(%esp),%xmm5
	paddq	%xmm7,%xmm3
	movdqa	%xmm6,%xmm7
	pmuludq	48(%esp),%xmm6
	paddq	%xmm5,%xmm2
	pmuludq	32(%esp),%xmm7
	movd	76(%edi),%xmm5
	paddq	%xmm6,%xmm1
	movd	140(%edi),%xmm6
	pmuludq	(%esp),%xmm5
	paddq	%xmm7,%xmm0
	movdqa	%xmm6,%xmm7
	pmuludq	64(%esp),%xmm6
	paddq	%xmm5,%xmm4
	movdqa	%xmm7,%xmm5
	pmuludq	16(%esp),%xmm7
	paddq	%xmm6,%xmm3
	movdqa	%xmm5,%xmm6
	pmuludq	32(%esp),%xmm5
	paddq	%xmm7,%xmm0
	pmuludq	48(%esp),%xmm6
	movdqa	64(%ebx),%xmm7
	paddq	%xmm5,%xmm1
	paddq	%xmm6,%xmm2
	movdqa	%xmm3,%xmm5
	pand	%xmm7,%xmm3
	psrlq	$26,%xmm5
	paddq	%xmm4,%xmm5
	movdqa	%xmm0,%xmm6
	pand	%xmm7,%xmm0
	psrlq	$26,%xmm6
	movdqa	%xmm5,%xmm4
	paddq	%xmm1,%xmm6
	psrlq	$26,%xmm5
	pand	%xmm7,%xmm4
	movdqa	%xmm6,%xmm1
	psrlq	$26,%xmm6
	paddd	%xmm5,%xmm0
	psllq	$2,%xmm5
	paddq	%xmm2,%xmm6
	paddq	%xmm0,%xmm5
	pand	%xmm7,%xmm1
	movdqa	%xmm6,%xmm2
	psrlq	$26,%xmm6
	pand	%xmm7,%xmm2
	paddd	%xmm3,%xmm6
	movdqa	%xmm5,%xmm0
	psrlq	$26,%xmm5
	movdqa	%xmm6,%xmm3
	psrlq	$26,%xmm6
	pand	%xmm7,%xmm0
	paddd	%xmm5,%xmm1
	pand	%xmm7,%xmm3
	paddd	%xmm6,%xmm4
	subl	$16,%ecx
	jz	.L013done
.L012even:
	leal	384(%esp),%edx
	leal	-32(%esi),%eax
	subl	$64,%ecx
	movdqu	(%edi),%xmm5
	pshufd	$68,%xmm5,%xmm6
	cmovbl	%eax,%esi
	pshufd	$238,%xmm5,%xmm5
	movdqa	%xmm6,(%edx)
	leal	160(%esp),%eax
	movdqu	16(%edi),%xmm6
	movdqa	%xmm5,-144(%edx)
	pshufd	$68,%xmm6,%xmm5
	pshufd	$238,%xmm6,%xmm6
	movdqa	%xmm5,16(%edx)
	movdqu	32(%edi),%xmm5
	movdqa	%xmm6,-128(%edx)
	pshufd	$68,%xmm5,%xmm6
	pshufd	$238,%xmm5,%xmm5
	movdqa	%xmm6,32(%edx)
	movdqu	48(%edi),%xmm6
	movdqa	%xmm5,-112(%edx)
	pshufd	$68,%xmm6,%xmm5
	pshufd	$238,%xmm6,%xmm6
	movdqa	%xmm5,48(%edx)
	movdqu	64(%edi),%xmm5
	movdqa	%xmm6,-96(%edx)
	pshufd	$68,%xmm5,%xmm6
	pshufd	$238,%xmm5,%xmm5
	movdqa	%xmm6,64(%edx)
	movdqu	80(%edi),%xmm6
	movdqa	%xmm5,-80(%edx)
	pshufd	$68,%xmm6,%xmm5
	pshufd	$238,%xmm6,%xmm6
	movdqa	%xmm5,80(%edx)
	movdqu	96(%edi),%xmm5
	movdqa	%xmm6,-64(%edx)
	pshufd	$68,%xmm5,%xmm6
	pshufd	$238,%xmm5,%xmm5
	movdqa	%xmm6,96(%edx)
	movdqu	112(%edi),%xmm6
	movdqa	%xmm5,-48(%edx)
	pshufd	$68,%xmm6,%xmm5
	pshufd	$238,%xmm6,%xmm6
	movdqa	%xmm5,112(%edx)
	movdqu	128(%edi),%xmm5
	movdqa	%xmm6,-32(%edx)
	pshufd	$68,%xmm5,%xmm6
	pshufd	$238,%xmm5,%xmm5
	movdqa	%xmm6,128(%edx)
	movdqa	%xmm5,-16(%edx)
	movdqu	32(%esi),%xmm5
	movdqu	48(%esi),%xmm6
	leal	32(%esi),%esi
	movdqa	%xmm2,112(%esp)
	movdqa	%xmm3,128(%esp)
	movdqa	%xmm4,144(%esp)
	movdqa	%xmm5,%xmm2
	movdqa	%xmm6,%xmm3
	psrldq	$6,%xmm2
	psrldq	$6,%xmm3
	movdqa	%xmm5,%xmm4
	punpcklqdq	%xmm3,%xmm2
	punpckhqdq	%xmm6,%xmm4
	punpcklqdq	%xmm6,%xmm5
	movdqa	%xmm2,%xmm3
	psrlq	$4,%xmm2
	psrlq	$30,%xmm3
	movdqa	%xmm5,%xmm6
	psrlq	$40,%xmm4
	psrlq	$26,%xmm6
	pand	%xmm7,%xmm5
	pand	%xmm7,%xmm6
	pand	%xmm7,%xmm2
	pand	%xmm7,%xmm3
	por	(%ebx),%xmm4
	movdqa	%xmm0,80(%esp)
	movdqa	%xmm1,96(%esp)
	jbe	.L014skip_loop
	jmp	.L015loop
.align	32
.L015loop:
	movdqa	-144(%edx),%xmm7
	movdqa	%xmm6,16(%eax)
	movdqa	%xmm2,32(%eax)
	movdqa	%xmm3,48(%eax)
	movdqa	%xmm4,64(%eax)
	movdqa	%xmm5,%xmm1
	pmuludq	%xmm7,%xmm5
	movdqa	%xmm6,%xmm0
	pmuludq	%xmm7,%xmm6
	pmuludq	%xmm7,%xmm2
	pmuludq	%xmm7,%xmm3
	pmuludq	%xmm7,%xmm4
	pmuludq	-16(%edx),%xmm0
	movdqa	%xmm1,%xmm7
	pmuludq	-128(%edx),%xmm1
	paddq	%xmm5,%xmm0
	movdqa	%xmm7,%xmm5
	pmuludq	-112(%edx),%xmm7
	paddq	%xmm6,%xmm1
	movdqa	%xmm5,%xmm6
	pmuludq	-96(%edx),%xmm5
	paddq	%xmm7,%xmm2
	movdqa	16(%eax),%xmm7
	pmuludq	-80(%edx),%xmm6
	paddq	%xmm5,%xmm3
	movdqa	%xmm7,%xmm5
	pmuludq	-128(%edx),%xmm7
	paddq	%xmm6,%xmm4
	movdqa	%xmm5,%xmm6
	pmuludq	-112(%edx),%xmm5
	paddq	%xmm7,%xmm2
	movdqa	32(%eax),%xmm7
	pmuludq	-96(%edx),%xmm6
	paddq	%xmm5,%xmm3
	movdqa	%xmm7,%xmm5
	pmuludq	-32(%edx),%xmm7
	paddq	%xmm6,%xmm4
	movdqa	%xmm5,%xmm6
	pmuludq	-16(%edx),%xmm5
	paddq	%xmm7,%xmm0
	movdqa	%xmm6,%xmm7
	pmuludq	-128(%edx),%xmm6
	paddq	%xmm5,%xmm1
	movdqa	48(%eax),%xmm5
	pmuludq	-112(%edx),%xmm7
	paddq	%xmm6,%xmm3
	movdqa	%xmm5,%xmm6
	pmuludq	-48(%edx),%xmm5
	paddq	%xmm7,%xmm4
	movdqa	%xmm6,%xmm7
	pmuludq	-32(%edx),%xmm6
	paddq	%xmm5,%xmm0
	movdqa	%xmm7,%xmm5
	pmuludq	-16(%edx),%xmm7
	paddq	%xmm6,%xmm1
	movdqa	64(%eax),%xmm6
	pmuludq	-128(%edx),%xmm5
	paddq	%xmm7,%xmm2
	movdqa	%xmm6,%xmm7
	pmuludq	-16(%edx),%xmm6
	paddq	%xmm5,%xmm4
	movdqa	%xmm7,%xmm5
	pmuludq	-64(%edx),%xmm7
	paddq	%xmm6,%xmm3
	movdqa	%xmm5,%xmm6
	pmuludq	-48(%edx),%xmm5
	paddq	%xmm7,%xmm0
	movdqa	64(%ebx),%xmm7
	pmuludq	-32(%edx),%xmm6
	paddq	%xmm5,%xmm1
	paddq	%xmm6,%xmm2
	movdqu	-32(%esi),%xmm5
	movdqu	-16(%esi),%xmm6
	leal	32(%esi),%esi
	movdqa	%xmm2,32(%esp)
	movdqa	%xmm3,48(%esp)
	movdqa	%xmm4,64(%esp)
	movdqa	%xmm5,%xmm2
	movdqa	%xmm6,%xmm3
	psrldq	$6,%xmm2
	psrldq	$6,%xmm3
	movdqa	%xmm5,%xmm4
	punpcklqdq	%xmm3,%xmm2
	punpckhqdq	%xmm6,%xmm4
	punpcklqdq	%xmm6,%xmm5
	movdqa	%xmm2,%xmm3
	psrlq	$4,%xmm2
	psrlq	$30,%xmm3
	movdqa	%xmm5,%xmm6
	psrlq	$40,%xmm4
	psrlq	$26,%xmm6
	pand	%xmm7,%xmm5
	pand	%xmm7,%xmm6
	pand	%xmm7,%xmm2
	pand	%xmm7,%xmm3
	por	(%ebx),%xmm4
	leal	-32(%esi),%eax
	subl	$64,%ecx
	paddd	80(%esp),%xmm5
	paddd	96(%esp),%xmm6
	paddd	112(%esp),%xmm2
	paddd	128(%esp),%xmm3
	paddd	144(%esp),%xmm4
	cmovbl	%eax,%esi
	leal	160(%esp),%eax
	movdqa	(%edx),%xmm7
	movdqa	%xmm1,16(%esp)
	movdqa	%xmm6,16(%eax)
	movdqa	%xmm2,32(%eax)
	movdqa	%xmm3,48(%eax)
	movdqa	%xmm4,64(%eax)
	movdqa	%xmm5,%xmm1
	pmuludq	%xmm7,%xmm5
	paddq	%xmm0,%xmm5
	movdqa	%xmm6,%xmm0
	pmuludq	%xmm7,%xmm6
	pmuludq	%xmm7,%xmm2
	pmuludq	%xmm7,%xmm3
	pmuludq	%xmm7,%xmm4
	paddq	16(%esp),%xmm6
	paddq	32(%esp),%xmm2
	paddq	48(%esp),%xmm3
	paddq	64(%esp),%xmm4
	pmuludq	128(%edx),%xmm0
	movdqa	%xmm1,%xmm7
	pmuludq	16(%edx),%xmm1
	paddq	%xmm5,%xmm0
	movdqa	%xmm7,%xmm5
	pmuludq	32(%edx),%xmm7
	paddq	%xmm6,%xmm1
	movdqa	%xmm5,%xmm6
	pmuludq	48(%edx),%xmm5
	paddq	%xmm7,%xmm2
	movdqa	16(%eax),%xmm7
	pmuludq	64(%edx),%xmm6
	paddq	%xmm5,%xmm3
	movdqa	%xmm7,%xmm5
	pmuludq	16(%edx),%xmm7
	paddq	%xmm6,%xmm4
	movdqa	%xmm5,%xmm6
	pmuludq	32(%edx),%xmm5
	paddq	%xmm7,%xmm2
	movdqa	32(%eax),%xmm7
	pmuludq	48(%edx),%xmm6
	paddq	%xmm5,%xmm3
	movdqa	%xmm7,%xmm5
	pmuludq	112(%edx),%xmm7
	paddq	%xmm6,%xmm4
	movdqa	%xmm5,%xmm6
	pmuludq	128(%edx),%xmm5
	paddq	%xmm7,%xmm0
	movdqa	%xmm6,%xmm7
	pmuludq	16(%edx),%xmm6
	paddq	%xmm5,%xmm1
	movdqa	48(%eax),%xmm5
	pmuludq	32(%edx),%xmm7
	paddq	%xmm6,%xmm3
	movdqa	%xmm5,%xmm6
	pmuludq	96(%edx),%xmm5
	paddq	%xmm7,%xmm4
	movdqa	%xmm6,%xmm7
	pmuludq	112(%edx),%xmm6
	paddq	%xmm5,%xmm0
	movdqa	%xmm7,%xmm5
	pmuludq	128(%edx),%xmm7
	paddq	%xmm6,%xmm1
	movdqa	64(%eax),%xmm6
	pmuludq	16(%edx),%xmm5
	paddq	%xmm7,%xmm2
	movdqa	%xmm6,%xmm7
	pmuludq	128(%edx),%xmm6
	paddq	%xmm5,%xmm4
	movdqa	%xmm7,%xmm5
	pmuludq	80(%edx),%xmm7
	paddq	%xmm6,%xmm3
	movdqa	%xmm5,%xmm6
	pmuludq	96(%edx),%xmm5
	paddq	%xmm7,%xmm0
	movdqa	64(%ebx),%xmm7
	pmuludq	112(%edx),%xmm6
	paddq	%xmm5,%xmm1
	paddq	%xmm6,%xmm2
	movdqa	%xmm3,%xmm5
	pand	%xmm7,%xmm3
	psrlq	$26,%xmm5
	paddq	%xmm4,%xmm5
	movdqa	%xmm0,%xmm6
	pand	%xmm7,%xmm0
	psrlq	$26,%xmm6
	movdqa	%xmm5,%xmm4
	paddq	%xmm1,%xmm6
	psrlq	$26,%xmm5
	pand	%xmm7,%xmm4
	movdqa	%xmm6,%xmm1
	psrlq	$26,%xmm6
	paddd	%xmm5,%xmm0
	psllq	$2,%xmm5
	paddq	%xmm2,%xmm6
	paddq	%xmm0,%xmm5
	pand	%xmm7,%xmm1
	movdqa	%xmm6,%xmm2
	psrlq	$26,%xmm6
	pand	%xmm7,%xmm2
	paddd	%xmm3,%xmm6
	movdqa	%xmm5,%xmm0
	psrlq	$26,%xmm5
	movdqa	%xmm6,%xmm3
	psrlq	$26,%xmm6
	pand	%xmm7,%xmm0
	paddd	%xmm5,%xmm1
	pand	%xmm7,%xmm3
	paddd	%xmm6,%xmm4
	movdqu	32(%esi),%xmm5
	movdqu	48(%esi),%xmm6
	leal	32(%esi),%esi
	movdqa	%xmm2,112(%esp)
	movdqa	%xmm3,128(%esp)
	movdqa	%xmm4,144(%esp)
	movdqa	%xmm5,%xmm2
	movdqa	%xmm6,%xmm3
	psrldq	$6,%xmm2
	psrldq	$6,%xmm3
	movdqa	%xmm5,%xmm4
	punpcklqdq	%xmm3,%xmm2
	punpckhqdq	%xmm6,%xmm4
	punpcklqdq	%xmm6,%xmm5
	movdqa	%xmm2,%xmm3
	psrlq	$4,%xmm2
	psrlq	$30,%xmm3
	movdqa	%xmm5,%xmm6
	psrlq	$40,%xmm4
	psrlq	$26,%xmm6
	pand	%xmm7,%xmm5
	pand	%xmm7,%xmm6
	pand	%xmm7,%xmm2
	pand	%xmm7,%xmm3
	por	(%ebx),%xmm4
	movdqa	%xmm0,80(%esp)
	movdqa	%xmm1,96(%esp)
	ja	.L015loop
.L014skip_loop:
	pshufd	$16,-144(%edx),%xmm7
	addl	$32,%ecx
	jnz	.L016long_tail
	paddd	%xmm0,%xmm5
	paddd	%xmm1,%xmm6
	paddd	112(%esp),%xmm2
	paddd	128(%esp),%xmm3
	paddd	144(%esp),%xmm4
.L016long_tail:
	movdqa	%xmm5,(%eax)
	movdqa	%xmm6,16(%eax)
	movdqa	%xmm2,32(%eax)
	movdqa	%xmm3,48(%eax)
	movdqa	%xmm4,64(%eax)
	pmuludq	%xmm7,%xmm5
	pmuludq	%xmm7,%xmm6
	pmuludq	%xmm7,%xmm2
	movdqa	%xmm5,%xmm0
	pshufd	$16,-128(%edx),%xmm5
	pmuludq	%xmm7,%xmm3
	movdqa	%xmm6,%xmm1
	pmuludq	%xmm7,%xmm4
	movdqa	%xmm5,%xmm6
	pmuludq	48(%eax),%xmm5
	movdqa	%xmm6,%xmm7
	pmuludq	32(%eax),%xmm6
	paddq	%xmm5,%xmm4
	movdqa	%xmm7,%xmm5
	pmuludq	16(%eax),%xmm7
	paddq	%xmm6,%xmm3
	pshufd	$16,-64(%edx),%xmm6
	pmuludq	(%eax),%xmm5
	paddq	%xmm7,%xmm2
	pmuludq	64(%eax),%xmm6
	pshufd	$16,-112(%edx),%xmm7
	paddq	%xmm5,%xmm1
	movdqa	%xmm7,%xmm5
	pmuludq	32(%eax),%xmm7
	paddq	%xmm6,%xmm0
	movdqa	%xmm5,%xmm6
	pmuludq	16(%eax),%xmm5
	paddq	%xmm7,%xmm4
	pshufd	$16,-48(%edx),%xmm7
	pmuludq	(%eax),%xmm6
	paddq	%xmm5,%xmm3
	movdqa	%xmm7,%xmm5
	pmuludq	64(%eax),%xmm7
	paddq	%xmm6,%xmm2
	pmuludq	48(%eax),%xmm5
	pshufd	$16,-96(%edx),%xmm6
	paddq	%xmm7,%xmm1
	movdqa	%xmm6,%xmm7
	pmuludq	16(%eax),%xmm6
	paddq	%xmm5,%xmm0
	pshufd	$16,-32(%edx),%xmm5
	pmuludq	(%eax),%xmm7
	paddq	%xmm6,%xmm4
	movdqa	%xmm5,%xmm6
	pmuludq	64(%eax),%xmm5
	paddq	%xmm7,%xmm3
	movdqa	%xmm6,%xmm7
	pmuludq	48(%eax),%xmm6
	paddq	%xmm5,%xmm2
	pmuludq	32(%eax),%xmm7
	pshufd	$16,-80(%edx),%xmm5
	paddq	%xmm6,%xmm1
	pshufd	$16,-16(%edx),%xmm6
	pmuludq	(%eax),%xmm5
	paddq	%xmm7,%xmm0
	movdqa	%xmm6,%xmm7
	pmuludq	64(%eax),%xmm6
	paddq	%xmm5,%xmm4
	movdqa	%xmm7,%xmm5
	pmuludq	16(%eax),%xmm7
	paddq	%xmm6,%xmm3
	movdqa	%xmm5,%xmm6
	pmuludq	32(%eax),%xmm5
	paddq	%xmm7,%xmm0
	pmuludq	48(%eax),%xmm6
	movdqa	64(%ebx),%xmm7
	paddq	%xmm5,%xmm1
	paddq	%xmm6,%xmm2
	jz	.L017short_tail
	movdqu	-32(%esi),%xmm5
	movdqu	-16(%esi),%xmm6
	leal	32(%esi),%esi
	movdqa	%xmm2,32(%esp)
	movdqa	%xmm3,48(%esp)
	movdqa	%xmm4,64(%esp)
	movdqa	%xmm5,%xmm2
	movdqa	%xmm6,%xmm3
	psrldq	$6,%xmm2
	psrldq	$6,%xmm3
	movdqa	%xmm5,%xmm4
	punpcklqdq	%xmm3,%xmm2
	punpckhqdq	%xmm6,%xmm4
	punpcklqdq	%xmm6,%xmm5
	movdqa	%xmm2,%xmm3
	psrlq	$4,%xmm2
	psrlq	$30,%xmm3
	movdqa	%xmm5,%xmm6
	psrlq	$40,%xmm4
	psrlq	$26,%xmm6
	pand	%xmm7,%xmm5
	pand	%xmm7,%xmm6
	pand	%xmm7,%xmm2
	pand	%xmm7,%xmm3
	por	(%ebx),%xmm4
	pshufd	$16,(%edx),%xmm7
	paddd	80(%esp),%xmm5
	paddd	96(%esp),%xmm6
	paddd	112(%esp),%xmm2
	paddd	128(%esp),%xmm3
	paddd	144(%esp),%xmm4
	movdqa	%xmm5,(%esp)
	pmuludq	%xmm7,%xmm5
	movdqa	%xmm6,16(%esp)
	pmuludq	%xmm7,%xmm6
	paddq	%xmm5,%xmm0
	movdqa	%xmm2,%xmm5
	pmuludq	%xmm7,%xmm2
	paddq	%xmm6,%xmm1
	movdqa	%xmm3,%xmm6
	pmuludq	%xmm7,%xmm3
	paddq	32(%esp),%xmm2
	movdqa	%xmm5,32(%esp)
	pshufd	$16,16(%edx),%xmm5
	paddq	48(%esp),%xmm3
	movdqa	%xmm6,48(%esp)
	movdqa	%xmm4,%xmm6
	pmuludq	%xmm7,%xmm4
	paddq	64(%esp),%xmm4
	movdqa	%xmm6,64(%esp)
	movdqa	%xmm5,%xmm6
	pmuludq	48(%esp),%xmm5
	movdqa	%xmm6,%xmm7
	pmuludq	32(%esp),%xmm6
	paddq	%xmm5,%xmm4
	movdqa	%xmm7,%xmm5
	pmuludq	16(%esp),%xmm7
	paddq	%xmm6,%xmm3
	pshufd	$16,80(%edx),%xmm6
	pmuludq	(%esp),%xmm5
	paddq	%xmm7,%xmm2
	pmuludq	64(%esp),%xmm6
	pshufd	$16,32(%edx),%xmm7
	paddq	%xmm5,%xmm1
	movdqa	%xmm7,%xmm5
	pmuludq	32(%esp),%xmm7
	paddq	%xmm6,%xmm0
	movdqa	%xmm5,%xmm6
	pmuludq	16(%esp),%xmm5
	paddq	%xmm7,%xmm4
	pshufd	$16,96(%edx),%xmm7
	pmuludq	(%esp),%xmm6
	paddq	%xmm5,%xmm3
	movdqa	%xmm7,%xmm5
	pmuludq	64(%esp),%xmm7
	paddq	%xmm6,%xmm2
	pmuludq	48(%esp),%xmm5
	pshufd	$16,48(%edx),%xmm6
	paddq	%xmm7,%xmm1
	movdqa	%xmm6,%xmm7
	pmuludq	16(%esp),%xmm6
	paddq	%xmm5,%xmm0
	pshufd	$16,112(%edx),%xmm5
	pmuludq	(%esp),%xmm7
	paddq	%xmm6,%xmm4
	movdqa	%xmm5,%xmm6
	pmuludq	64(%esp),%xmm5
	paddq	%xmm7,%xmm3
	movdqa	%xmm6,%xmm7
	pmuludq	48(%esp),%xmm6
	paddq	%xmm5,%xmm2
	pmuludq	32(%esp),%xmm7
	pshufd	$16,64(%edx),%xmm5
	paddq	%xmm6,%xmm1
	pshufd	$16,128(%edx),%xmm6
	pmuludq	(%esp),%xmm5
	paddq	%xmm7,%xmm0
	movdqa	%xmm6,%xmm7
	pmuludq	64(%esp),%xmm6
	paddq	%xmm5,%xmm4
	movdqa	%xmm7,%xmm5
	pmuludq	16(%esp),%xmm7
	paddq	%xmm6,%xmm3
	movdqa	%xmm5,%xmm6
	pmuludq	32(%esp),%xmm5
	paddq	%xmm7,%xmm0
	pmuludq	48(%esp),%xmm6
	movdqa	64(%ebx),%xmm7
	paddq	%xmm5,%xmm1
	paddq	%xmm6,%xmm2
.L017short_tail:
	pshufd	$78,%xmm4,%xmm6
	pshufd	$78,%xmm3,%xmm5
	paddq	%xmm6,%xmm4
	paddq	%xmm5,%xmm3
	pshufd	$78,%xmm0,%xmm6
	pshufd	$78,%xmm1,%xmm5
	paddq	%xmm6,%xmm0
	paddq	%xmm5,%xmm1
	pshufd	$78,%xmm2,%xmm6
	movdqa	%xmm3,%xmm5
	pand	%xmm7,%xmm3
	psrlq	$26,%xmm5
	paddq	%xmm6,%xmm2
	paddq	%xmm4,%xmm5
	movdqa	%xmm0,%xmm6
	pand	%xmm7,%xmm0
	psrlq	$26,%xmm6
	movdqa	%xmm5,%xmm4
	paddq	%xmm1,%xmm6
	psrlq	$26,%xmm5
	pand	%xmm7,%xmm4
	movdqa	%xmm6,%xmm1
	psrlq	$26,%xmm6
	paddd	%xmm5,%xmm0
	psllq	$2,%xmm5
	paddq	%xmm2,%xmm6
	paddq	%xmm0,%xmm5
	pand	%xmm7,%xmm1
	movdqa	%xmm6,%xmm2
	psrlq	$26,%xmm6
	pand	%xmm7,%xmm2
	paddd	%xmm3,%xmm6
	movdqa	%xmm5,%xmm0
	psrlq	$26,%xmm5
	movdqa	%xmm6,%xmm3
	psrlq	$26,%xmm6
	pand	%xmm7,%xmm0
	paddd	%xmm5,%xmm1
	pand	%xmm7,%xmm3
	paddd	%xmm6,%xmm4
.L013done:
	movd	%xmm0,-48(%edi)
	movd	%xmm1,-44(%edi)
	movd	%xmm2,-40(%edi)
	movd	%xmm3,-36(%edi)
	movd	%xmm4,-32(%edi)
	movl	%ebp,%esp
.L007nodata:
	popl	%edi
	popl	%esi
	popl	%ebx
	popl	%ebp
	ret
.size	_poly1305_blocks_sse2,.-_poly1305_blocks_sse2
.align	32
.type	_poly1305_emit_sse2,@function
.align	16
_poly1305_emit_sse2:
	#ifdef __CET__

.byte	243,15,30,251
	#endif

	pushl	%ebp
	pushl	%ebx
	pushl	%esi
	pushl	%edi
	movl	20(%esp),%ebp
	cmpl	$0,20(%ebp)
	je	.Lenter_emit
	movl	(%ebp),%eax
	movl	4(%ebp),%edi
	movl	8(%ebp),%ecx
	movl	12(%ebp),%edx
	movl	16(%ebp),%esi
	movl	%edi,%ebx
	shll	$26,%edi
	shrl	$6,%ebx
	addl	%edi,%eax
	movl	%ecx,%edi
	adcl	$0,%ebx
	shll	$20,%edi
	shrl	$12,%ecx
	addl	%edi,%ebx
	movl	%edx,%edi
	adcl	$0,%ecx
	shll	$14,%edi
	shrl	$18,%edx
	addl	%edi,%ecx
	movl	%esi,%edi
	adcl	$0,%edx
	shll	$8,%edi
	shrl	$24,%esi
	addl	%edi,%edx
	adcl	$0,%esi
	movl	%esi,%edi
	andl	$3,%esi
	shrl	$2,%edi
	leal	(%edi,%edi,4),%ebp
	movl	24(%esp),%edi
	addl	%ebp,%eax
	movl	28(%esp),%ebp
	adcl	$0,%ebx
	adcl	$0,%ecx
	adcl	$0,%edx
	adcl	$0,%esi
	movd	%eax,%xmm0
	addl	$5,%eax
	movd	%ebx,%xmm1
	adcl	$0,%ebx
	movd	%ecx,%xmm2
	adcl	$0,%ecx
	movd	%edx,%xmm3
	adcl	$0,%edx
	adcl	$0,%esi
	shrl	$2,%esi
	negl	%esi
	andl	%esi,%eax
	andl	%esi,%ebx
	andl	%esi,%ecx
	andl	%esi,%edx
	movl	%eax,(%edi)
	movd	%xmm0,%eax
	movl	%ebx,4(%edi)
	movd	%xmm1,%ebx
	movl	%ecx,8(%edi)
	movd	%xmm2,%ecx
	movl	%edx,12(%edi)
	movd	%xmm3,%edx
	notl	%esi
	andl	%esi,%eax
	andl	%esi,%ebx
	orl	(%edi),%eax
	andl	%esi,%ecx
	orl	4(%edi),%ebx
	andl	%esi,%edx
	orl	8(%edi),%ecx
	orl	12(%edi),%edx
	addl	(%ebp),%eax
	adcl	4(%ebp),%ebx
	movl	%eax,(%edi)
	adcl	8(%ebp),%ecx
	movl	%ebx,4(%edi)
	adcl	12(%ebp),%edx
	movl	%ecx,8(%edi)
	movl	%edx,12(%edi)
	popl	%edi
	popl	%esi
	popl	%ebx
	popl	%ebp
	ret
.size	_poly1305_emit_sse2,.-_poly1305_emit_sse2
.align	32
.type	_poly1305_init_avx2,@function
.align	16
_poly1305_init_avx2:
	#ifdef __CET__

.byte	243,15,30,251
	#endif

	vmovdqu	24(%edi),%xmm4
	leal	48(%edi),%edi
	movl	%esp,%ebp
	subl	$224,%esp
	andl	$-16,%esp
	vmovdqa	64(%ebx),%xmm7
	vpand	%xmm7,%xmm4,%xmm0
	vpsrlq	$26,%xmm4,%xmm1
	vpsrldq	$6,%xmm4,%xmm3
	vpand	%xmm7,%xmm1,%xmm1
	vpsrlq	$4,%xmm3,%xmm2
	vpsrlq	$30,%xmm3,%xmm3
	vpand	%xmm7,%xmm2,%xmm2
	vpand	%xmm7,%xmm3,%xmm3
	vpsrldq	$13,%xmm4,%xmm4
	leal	144(%esp),%edx
	movl	$2,%ecx
.L018square:
	vmovdqa	%xmm0,(%esp)
	vmovdqa	%xmm1,16(%esp)
	vmovdqa	%xmm2,32(%esp)
	vmovdqa	%xmm3,48(%esp)
	vmovdqa	%xmm4,64(%esp)
	vpslld	$2,%xmm1,%xmm6
	vpslld	$2,%xmm2,%xmm5
	vpaddd	%xmm1,%xmm6,%xmm6
	vpaddd	%xmm2,%xmm5,%xmm5
	vmovdqa	%xmm6,80(%esp)
	vmovdqa	%xmm5,96(%esp)
	vpslld	$2,%xmm3,%xmm6
	vpslld	$2,%xmm4,%xmm5
	vpaddd	%xmm3,%xmm6,%xmm6
	vpaddd	%xmm4,%xmm5,%xmm5
	vmovdqa	%xmm6,112(%esp)
	vmovdqa	%xmm5,128(%esp)
	vpshufd	$68,%xmm0,%xmm5
	vmovdqa	%xmm1,%xmm6
	vpshufd	$68,%xmm1,%xmm1
	vpshufd	$68,%xmm2,%xmm2
	vpshufd	$68,%xmm3,%xmm3
	vpshufd	$68,%xmm4,%xmm4
	vmovdqa	%xmm5,(%edx)
	vmovdqa	%xmm1,16(%edx)
	vmovdqa	%xmm2,32(%edx)
	vmovdqa	%xmm3,48(%edx)
	vmovdqa	%xmm4,64(%edx)
	vpmuludq	%xmm0,%xmm4,%xmm4
	vpmuludq	%xmm0,%xmm3,%xmm3
	vpmuludq	%xmm0,%xmm2,%xmm2
	vpmuludq	%xmm0,%xmm1,%xmm1
	vpmuludq	%xmm0,%xmm5,%xmm0
	vpmuludq	48(%edx),%xmm6,%xmm5
	vpaddq	%xmm5,%xmm4,%xmm4
	vpmuludq	32(%edx),%xmm6,%xmm7
	vpaddq	%xmm7,%xmm3,%xmm3
	vpmuludq	16(%edx),%xmm6,%xmm5
	vpaddq	%xmm5,%xmm2,%xmm2
	vmovdqa	80(%esp),%xmm7
	vpmuludq	(%edx),%xmm6,%xmm6
	vpaddq	%xmm6,%xmm1,%xmm1
	vmovdqa	32(%esp),%xmm5
	vpmuludq	64(%edx),%xmm7,%xmm7
	vpaddq	%xmm7,%xmm0,%xmm0
	vpmuludq	32(%edx),%xmm5,%xmm6
	vpaddq	%xmm6,%xmm4,%xmm4
	vpmuludq	16(%edx),%xmm5,%xmm7
	vpaddq	%xmm7,%xmm3,%xmm3
	vmovdqa	96(%esp),%xmm6
	vpmuludq	(%edx),%xmm5,%xmm5
	vpaddq	%xmm5,%xmm2,%xmm2
	vpmuludq	64(%edx),%xmm6,%xmm7
	vpaddq	%xmm7,%xmm1,%xmm1
	vmovdqa	48(%esp),%xmm5
	vpmuludq	48(%edx),%xmm6,%xmm6
	vpaddq	%xmm6,%xmm0,%xmm0
	vpmuludq	16(%edx),%xmm5,%xmm7
	vpaddq	%xmm7,%xmm4,%xmm4
	vmovdqa	112(%esp),%xmm6
	vpmuludq	(%edx),%xmm5,%xmm5
	vpaddq	%xmm5,%xmm3,%xmm3
	vpmuludq	64(%edx),%xmm6,%xmm7
	vpaddq	%xmm7,%xmm2,%xmm2
	vpmuludq	48(%edx),%xmm6,%xmm5
	vpaddq	%xmm5,%xmm1,%xmm1
	vmovdqa	64(%esp),%xmm7
	vpmuludq	32(%edx),%xmm6,%xmm6
	vpaddq	%xmm6,%xmm0,%xmm0
	vmovdqa	128(%esp),%xmm5
	vpmuludq	(%edx),%xmm7,%xmm7
	vpaddq	%xmm7,%xmm4,%xmm4
	vpmuludq	64(%edx),%xmm5,%xmm6
	vpaddq	%xmm6,%xmm3,%xmm3
	vpmuludq	16(%edx),%xmm5,%xmm7
	vpaddq	%xmm7,%xmm0,%xmm0
	vpmuludq	32(%edx),%xmm5,%xmm6
	vpaddq	%xmm6,%xmm1,%xmm1
	vmovdqa	64(%ebx),%xmm7
	vpmuludq	48(%edx),%xmm5,%xmm5
	vpaddq	%xmm5,%xmm2,%xmm2
	vpsrlq	$26,%xmm3,%xmm5
	vpand	%xmm7,%xmm3,%xmm3
	vpsrlq	$26,%xmm0,%xmm6
	vpand	%xmm7,%xmm0,%xmm0
	vpaddq	%xmm5,%xmm4,%xmm4
	vpaddq	%xmm6,%xmm1,%xmm1
	vpsrlq	$26,%xmm4,%xmm5
	vpand	%xmm7,%xmm4,%xmm4
	vpsrlq	$26,%xmm1,%xmm6
	vpand	%xmm7,%xmm1,%xmm1
	vpaddq	%xmm6,%xmm2,%xmm2
	vpaddd	%xmm5,%xmm0,%xmm0
	vpsllq	$2,%xmm5,%xmm5
	vpsrlq	$26,%xmm2,%xmm6
	vpand	%xmm7,%xmm2,%xmm2
	vpaddd	%xmm5,%xmm0,%xmm0
	vpaddd	%xmm6,%xmm3,%xmm3
	vpsrlq	$26,%xmm3,%xmm6
	vpsrlq	$26,%xmm0,%xmm5
	vpand	%xmm7,%xmm0,%xmm0
	vpand	%xmm7,%xmm3,%xmm3
	vpaddd	%xmm5,%xmm1,%xmm1
	vpaddd	%xmm6,%xmm4,%xmm4
	decl	%ecx
	jz	.L019square_break
	vpunpcklqdq	(%esp),%xmm0,%xmm0
	vpunpcklqdq	16(%esp),%xmm1,%xmm1
	vpunpcklqdq	32(%esp),%xmm2,%xmm2
	vpunpcklqdq	48(%esp),%xmm3,%xmm3
	vpunpcklqdq	64(%esp),%xmm4,%xmm4
	jmp	.L018square
.L019square_break:
	vpsllq	$32,%xmm0,%xmm0
	vpsllq	$32,%xmm1,%xmm1
	vpsllq	$32,%xmm2,%xmm2
	vpsllq	$32,%xmm3,%xmm3
	vpsllq	$32,%xmm4,%xmm4
	vpor	(%esp),%xmm0,%xmm0
	vpor	16(%esp),%xmm1,%xmm1
	vpor	32(%esp),%xmm2,%xmm2
	vpor	48(%esp),%xmm3,%xmm3
	vpor	64(%esp),%xmm4,%xmm4
	vpshufd	$141,%xmm0,%xmm0
	vpshufd	$141,%xmm1,%xmm1
	vpshufd	$141,%xmm2,%xmm2
	vpshufd	$141,%xmm3,%xmm3
	vpshufd	$141,%xmm4,%xmm4
	vmovdqu	%xmm0,(%edi)
	vmovdqu	%xmm1,16(%edi)
	vmovdqu	%xmm2,32(%edi)
	vmovdqu	%xmm3,48(%edi)
	vmovdqu	%xmm4,64(%edi)
	vpslld	$2,%xmm1,%xmm6
	vpslld	$2,%xmm2,%xmm5
	vpaddd	%xmm1,%xmm6,%xmm6
	vpaddd	%xmm2,%xmm5,%xmm5
	vmovdqu	%xmm6,80(%edi)
	vmovdqu	%xmm5,96(%edi)
	vpslld	$2,%xmm3,%xmm6
	vpslld	$2,%xmm4,%xmm5
	vpaddd	%xmm3,%xmm6,%xmm6
	vpaddd	%xmm4,%xmm5,%xmm5
	vmovdqu	%xmm6,112(%edi)
	vmovdqu	%xmm5,128(%edi)
	movl	%ebp,%esp
	leal	-48(%edi),%edi
	ret
.size	_poly1305_init_avx2,.-_poly1305_init_avx2
.align	32
.type	_poly1305_blocks_avx2,@function
.align	16
_poly1305_blocks_avx2:
	#ifdef __CET__

.byte	243,15,30,251
	#endif

	pushl	%ebp
	pushl	%ebx
	pushl	%esi
	pushl	%edi
	movl	20(%esp),%edi
	movl	24(%esp),%esi
	movl	28(%esp),%ecx
	movl	20(%edi),%eax
	andl	$-16,%ecx
	jz	.L020nodata
	cmpl	$64,%ecx
	jae	.L021enter_avx2
	testl	%eax,%eax
	jz	.Lenter_blocks
.L021enter_avx2:
	vzeroupper
	call	.L022pic_point
.L022pic_point:
	popl	%ebx
	leal	.Lconst_sse2-.L022pic_point(%ebx),%ebx
	testl	%eax,%eax
	jnz	.L023base2_26
	call	_poly1305_init_avx2
	movl	(%edi),%eax
	movl	3(%edi),%ecx
	movl	6(%edi),%edx
	movl	9(%edi),%esi
	movl	13(%edi),%ebp
	shrl	$2,%ecx
	andl	$67108863,%eax
	shrl	$4,%edx
	andl	$67108863,%ecx
	shrl	$6,%esi
	andl	$67108863,%edx
	movl	%eax,(%edi)
	movl	%ecx,4(%edi)
	movl	%edx,8(%edi)
	movl	%esi,12(%edi)
	movl	%ebp,16(%edi)
	movl	$1,20(%edi)
	movl	24(%esp),%esi
	movl	28(%esp),%ecx
.L023base2_26:
	movl	32(%esp),%eax
	movl	%esp,%ebp
	subl	$448,%esp
	andl	$-512,%esp
	vmovdqu	48(%edi),%xmm0
	leal	288(%esp),%edx
	vmovdqu	64(%edi),%xmm1
	vmovdqu	80(%edi),%xmm2
	vmovdqu	96(%edi),%xmm3
	vmovdqu	112(%edi),%xmm4
	leal	48(%edi),%edi
	vpermq	$64,%ymm0,%ymm0
	vpermq	$64,%ymm1,%ymm1
	vpermq	$64,%ymm2,%ymm2
	vpermq	$64,%ymm3,%ymm3
	vpermq	$64,%ymm4,%ymm4
	vpshufd	$200,%ymm0,%ymm0
	vpshufd	$200,%ymm1,%ymm1
	vpshufd	$200,%ymm2,%ymm2
	vpshufd	$200,%ymm3,%ymm3
	vpshufd	$200,%ymm4,%ymm4
	vmovdqa	%ymm0,-128(%edx)
	vmovdqu	80(%edi),%xmm0
	vmovdqa	%ymm1,-96(%edx)
	vmovdqu	96(%edi),%xmm1
	vmovdqa	%ymm2,-64(%edx)
	vmovdqu	112(%edi),%xmm2
	vmovdqa	%ymm3,-32(%edx)
	vmovdqu	128(%edi),%xmm3
	vmovdqa	%ymm4,(%edx)
	vpermq	$64,%ymm0,%ymm0
	vpermq	$64,%ymm1,%ymm1
	vpermq	$64,%ymm2,%ymm2
	vpermq	$64,%ymm3,%ymm3
	vpshufd	$200,%ymm0,%ymm0
	vpshufd	$200,%ymm1,%ymm1
	vpshufd	$200,%ymm2,%ymm2
	vpshufd	$200,%ymm3,%ymm3
	vmovdqa	%ymm0,32(%edx)
	vmovd	-48(%edi),%xmm0
	vmovdqa	%ymm1,64(%edx)
	vmovd	-44(%edi),%xmm1
	vmovdqa	%ymm2,96(%edx)
	vmovd	-40(%edi),%xmm2
	vmovdqa	%ymm3,128(%edx)
	vmovd	-36(%edi),%xmm3
	vmovd	-32(%edi),%xmm4
	vmovdqa	64(%ebx),%ymm7
	negl	%eax
	testl	$63,%ecx
	jz	.L024even
	movl	%ecx,%edx
	andl	$-64,%ecx
	andl	$63,%edx
	vmovdqu	(%esi),%xmm5
	cmpl	$32,%edx
	jb	.L025one
	vmovdqu	16(%esi),%xmm6
	je	.L026two
	vinserti128	$1,32(%esi),%ymm5,%ymm5
	leal	48(%esi),%esi
	leal	8(%ebx),%ebx
	leal	296(%esp),%edx
	jmp	.L027tail
.L026two:
	leal	32(%esi),%esi
	leal	16(%ebx),%ebx
	leal	304(%esp),%edx
	jmp	.L027tail
.L025one:
	leal	16(%esi),%esi
	vpxor	%ymm6,%ymm6,%ymm6
	leal	32(%ebx,%eax,8),%ebx
	leal	312(%esp),%edx
	jmp	.L027tail
.align	32
.L024even:
	vmovdqu	(%esi),%xmm5
	vmovdqu	16(%esi),%xmm6
	vinserti128	$1,32(%esi),%ymm5,%ymm5
	vinserti128	$1,48(%esi),%ymm6,%ymm6
	leal	64(%esi),%esi
	subl	$64,%ecx
	jz	.L027tail
.L028loop:
	vmovdqa	%ymm2,64(%esp)
	vpsrldq	$6,%ymm5,%ymm2
	vmovdqa	%ymm0,(%esp)
	vpsrldq	$6,%ymm6,%ymm0
	vmovdqa	%ymm1,32(%esp)
	vpunpckhqdq	%ymm6,%ymm5,%ymm1
	vpunpcklqdq	%ymm6,%ymm5,%ymm5
	vpunpcklqdq	%ymm0,%ymm2,%ymm2
	vpsrlq	$30,%ymm2,%ymm0
	vpsrlq	$4,%ymm2,%ymm2
	vpsrlq	$26,%ymm5,%ymm6
	vpsrlq	$40,%ymm1,%ymm1
	vpand	%ymm7,%ymm2,%ymm2
	vpand	%ymm7,%ymm5,%ymm5
	vpand	%ymm7,%ymm6,%ymm6
	vpand	%ymm7,%ymm0,%ymm0
	vpor	(%ebx),%ymm1,%ymm1
	vpaddq	64(%esp),%ymm2,%ymm2
	vpaddq	(%esp),%ymm5,%ymm5
	vpaddq	32(%esp),%ymm6,%ymm6
	vpaddq	%ymm3,%ymm0,%ymm0
	vpaddq	%ymm4,%ymm1,%ymm1
	vpmuludq	-96(%edx),%ymm2,%ymm3
	vmovdqa	%ymm6,32(%esp)
	vpmuludq	-64(%edx),%ymm2,%ymm4
	vmovdqa	%ymm0,96(%esp)
	vpmuludq	96(%edx),%ymm2,%ymm0
	vmovdqa	%ymm1,128(%esp)
	vpmuludq	128(%edx),%ymm2,%ymm1
	vpmuludq	-128(%edx),%ymm2,%ymm2
	vpmuludq	-32(%edx),%ymm5,%ymm7
	vpaddq	%ymm7,%ymm3,%ymm3
	vpmuludq	(%edx),%ymm5,%ymm6
	vpaddq	%ymm6,%ymm4,%ymm4
	vpmuludq	-128(%edx),%ymm5,%ymm7
	vpaddq	%ymm7,%ymm0,%ymm0
	vmovdqa	32(%esp),%ymm7
	vpmuludq	-96(%edx),%ymm5,%ymm6
	vpaddq	%ymm6,%ymm1,%ymm1
	vpmuludq	-64(%edx),%ymm5,%ymm5
	vpaddq	%ymm5,%ymm2,%ymm2
	vpmuludq	-64(%edx),%ymm7,%ymm6
	vpaddq	%ymm6,%ymm3,%ymm3
	vpmuludq	-32(%edx),%ymm7,%ymm5
	vpaddq	%ymm5,%ymm4,%ymm4
	vpmuludq	128(%edx),%ymm7,%ymm6
	vpaddq	%ymm6,%ymm0,%ymm0
	vmovdqa	96(%esp),%ymm6
	vpmuludq	-128(%edx),%ymm7,%ymm5
	vpaddq	%ymm5,%ymm1,%ymm1
	vpmuludq	-96(%edx),%ymm7,%ymm7
	vpaddq	%ymm7,%ymm2,%ymm2
	vpmuludq	-128(%edx),%ymm6,%ymm5
	vpaddq	%ymm5,%ymm3,%ymm3
	vpmuludq	-96(%edx),%ymm6,%ymm7
	vpaddq	%ymm7,%ymm4,%ymm4
	vpmuludq	64(%edx),%ymm6,%ymm5
	vpaddq	%ymm5,%ymm0,%ymm0
	vmovdqa	128(%esp),%ymm5
	vpmuludq	96(%edx),%ymm6,%ymm7
	vpaddq	%ymm7,%ymm1,%ymm1
	vpmuludq	128(%edx),%ymm6,%ymm6
	vpaddq	%ymm6,%ymm2,%ymm2
	vpmuludq	128(%edx),%ymm5,%ymm7
	vpaddq	%ymm7,%ymm3,%ymm3
	vpmuludq	32(%edx),%ymm5,%ymm6
	vpaddq	%ymm6,%ymm0,%ymm0
	vpmuludq	-128(%edx),%ymm5,%ymm7
	vpaddq	%ymm7,%ymm4,%ymm4
	vmovdqa	64(%ebx),%ymm7
	vpmuludq	64(%edx),%ymm5,%ymm6
	vpaddq	%ymm6,%ymm1,%ymm1
	vpmuludq	96(%edx),%ymm5,%ymm5
	vpaddq	%ymm5,%ymm2,%ymm2
	vpsrlq	$26,%ymm3,%ymm5
	vpand	%ymm7,%ymm3,%ymm3
	vpsrlq	$26,%ymm0,%ymm6
	vpand	%ymm7,%ymm0,%ymm0
	vpaddq	%ymm5,%ymm4,%ymm4
	vpaddq	%ymm6,%ymm1,%ymm1
	vpsrlq	$26,%ymm4,%ymm5
	vpand	%ymm7,%ymm4,%ymm4
	vpsrlq	$26,%ymm1,%ymm6
	vpand	%ymm7,%ymm1,%ymm1
	vpaddq	%ymm6,%ymm2,%ymm2
	vpaddq	%ymm5,%ymm0,%ymm0
	vpsllq	$2,%ymm5,%ymm5
	vpsrlq	$26,%ymm2,%ymm6
	vpand	%ymm7,%ymm2,%ymm2
	vpaddq	%ymm5,%ymm0,%ymm0
	vpaddq	%ymm6,%ymm3,%ymm3
	vpsrlq	$26,%ymm3,%ymm6
	vpsrlq	$26,%ymm0,%ymm5
	vpand	%ymm7,%ymm0,%ymm0
	vpand	%ymm7,%ymm3,%ymm3
	vpaddq	%ymm5,%ymm1,%ymm1
	vpaddq	%ymm6,%ymm4,%ymm4
	vmovdqu	(%esi),%xmm5
	vmovdqu	16(%esi),%xmm6
	vinserti128	$1,32(%esi),%ymm5,%ymm5
	vinserti128	$1,48(%esi),%ymm6,%ymm6
	leal	64(%esi),%esi
	subl	$64,%ecx
	jnz	.L028loop
.L027tail:
	vmovdqa	%ymm2,64(%esp)
	vpsrldq	$6,%ymm5,%ymm2
	vmovdqa	%ymm0,(%esp)
	vpsrldq	$6,%ymm6,%ymm0
	vmovdqa	%ymm1,32(%esp)
	vpunpckhqdq	%ymm6,%ymm5,%ymm1
	vpunpcklqdq	%ymm6,%ymm5,%ymm5
	vpunpcklqdq	%ymm0,%ymm2,%ymm2
	vpsrlq	$30,%ymm2,%ymm0
	vpsrlq	$4,%ymm2,%ymm2
	vpsrlq	$26,%ymm5,%ymm6
	vpsrlq	$40,%ymm1,%ymm1
	vpand	%ymm7,%ymm2,%ymm2
	vpand	%ymm7,%ymm5,%ymm5
	vpand	%ymm7,%ymm6,%ymm6
	vpand	%ymm7,%ymm0,%ymm0
	vpor	(%ebx),%ymm1,%ymm1
	andl	$-64,%ebx
	vpaddq	64(%esp),%ymm2,%ymm2
	vpaddq	(%esp),%ymm5,%ymm5
	vpaddq	32(%esp),%ymm6,%ymm6
	vpaddq	%ymm3,%ymm0,%ymm0
	vpaddq	%ymm4,%ymm1,%ymm1
	vpmuludq	-92(%edx),%ymm2,%ymm3
	vmovdqa	%ymm6,32(%esp)
	vpmuludq	-60(%edx),%ymm2,%ymm4
	vmovdqa	%ymm0,96(%esp)
	vpmuludq	100(%edx),%ymm2,%ymm0
	vmovdqa	%ymm1,128(%esp)
	vpmuludq	132(%edx),%ymm2,%ymm1
	vpmuludq	-124(%edx),%ymm2,%ymm2
	vpmuludq	-28(%edx),%ymm5,%ymm7
	vpaddq	%ymm7,%ymm3,%ymm3
	vpmuludq	4(%edx),%ymm5,%ymm6
	vpaddq	%ymm6,%ymm4,%ymm4
	vpmuludq	-124(%edx),%ymm5,%ymm7
	vpaddq	%ymm7,%ymm0,%ymm0
	vmovdqa	32(%esp),%ymm7
	vpmuludq	-92(%edx),%ymm5,%ymm6
	vpaddq	%ymm6,%ymm1,%ymm1
	vpmuludq	-60(%edx),%ymm5,%ymm5
	vpaddq	%ymm5,%ymm2,%ymm2
	vpmuludq	-60(%edx),%ymm7,%ymm6
	vpaddq	%ymm6,%ymm3,%ymm3
	vpmuludq	-28(%edx),%ymm7,%ymm5
	vpaddq	%ymm5,%ymm4,%ymm4
	vpmuludq	132(%edx),%ymm7,%ymm6
	vpaddq	%ymm6,%ymm0,%ymm0
	vmovdqa	96(%esp),%ymm6
	vpmuludq	-124(%edx),%ymm7,%ymm5
	vpaddq	%ymm5,%ymm1,%ymm1
	vpmuludq	-92(%edx),%ymm7,%ymm7
	vpaddq	%ymm7,%ymm2,%ymm2
	vpmuludq	-124(%edx),%ymm6,%ymm5
	vpaddq	%ymm5,%ymm3,%ymm3
	vpmuludq	-92(%edx),%ymm6,%ymm7
	vpaddq	%ymm7,%ymm4,%ymm4
	vpmuludq	68(%edx),%ymm6,%ymm5
	vpaddq	%ymm5,%ymm0,%ymm0
	vmovdqa	128(%esp),%ymm5
	vpmuludq	100(%edx),%ymm6,%ymm7
	vpaddq	%ymm7,%ymm1,%ymm1
	vpmuludq	132(%edx),%ymm6,%ymm6
	vpaddq	%ymm6,%ymm2,%ymm2
	vpmuludq	132(%edx),%ymm5,%ymm7
	vpaddq	%ymm7,%ymm3,%ymm3
	vpmuludq	36(%edx),%ymm5,%ymm6
	vpaddq	%ymm6,%ymm0,%ymm0
	vpmuludq	-124(%edx),%ymm5,%ymm7
	vpaddq	%ymm7,%ymm4,%ymm4
	vmovdqa	64(%ebx),%ymm7
	vpmuludq	68(%edx),%ymm5,%ymm6
	vpaddq	%ymm6,%ymm1,%ymm1
	vpmuludq	100(%edx),%ymm5,%ymm5
	vpaddq	%ymm5,%ymm2,%ymm2
	vpsrldq	$8,%ymm4,%ymm5
	vpsrldq	$8,%ymm3,%ymm6
	vpaddq	%ymm5,%ymm4,%ymm4
	vpsrldq	$8,%ymm0,%ymm5
	vpaddq	%ymm6,%ymm3,%ymm3
	vpsrldq	$8,%ymm1,%ymm6
	vpaddq	%ymm5,%ymm0,%ymm0
	vpsrldq	$8,%ymm2,%ymm5
	vpaddq	%ymm6,%ymm1,%ymm1
	vpermq	$2,%ymm4,%ymm6
	vpaddq	%ymm5,%ymm2,%ymm2
	vpermq	$2,%ymm3,%ymm5
	vpaddq	%ymm6,%ymm4,%ymm4
	vpermq	$2,%ymm0,%ymm6
	vpaddq	%ymm5,%ymm3,%ymm3
	vpermq	$2,%ymm1,%ymm5
	vpaddq	%ymm6,%ymm0,%ymm0
	vpermq	$2,%ymm2,%ymm6
	vpaddq	%ymm5,%ymm1,%ymm1
	vpaddq	%ymm6,%ymm2,%ymm2
	vpsrlq	$26,%ymm3,%ymm5
	vpand	%ymm7,%ymm3,%ymm3
	vpsrlq	$26,%ymm0,%ymm6
	vpand	%ymm7,%ymm0,%ymm0
	vpaddq	%ymm5,%ymm4,%ymm4
	vpaddq	%ymm6,%ymm1,%ymm1
	vpsrlq	$26,%ymm4,%ymm5
	vpand	%ymm7,%ymm4,%ymm4
	vpsrlq	$26,%ymm1,%ymm6
	vpand	%ymm7,%ymm1,%ymm1
	vpaddq	%ymm6,%ymm2,%ymm2
	vpaddq	%ymm5,%ymm0,%ymm0
	vpsllq	$2,%ymm5,%ymm5
	vpsrlq	$26,%ymm2,%ymm6
	vpand	%ymm7,%ymm2,%ymm2
	vpaddq	%ymm5,%ymm0,%ymm0
	vpaddq	%ymm6,%ymm3,%ymm3
	vpsrlq	$26,%ymm3,%ymm6
	vpsrlq	$26,%ymm0,%ymm5
	vpand	%ymm7,%ymm0,%ymm0
	vpand	%ymm7,%ymm3,%ymm3
	vpaddq	%ymm5,%ymm1,%ymm1
	vpaddq	%ymm6,%ymm4,%ymm4
	cmpl	$0,%ecx
	je	.L029done
	vpshufd	$252,%xmm0,%xmm0
	leal	288(%esp),%edx
	vpshufd	$252,%xmm1,%xmm1
	vpshufd	$252,%xmm2,%xmm2
	vpshufd	$252,%xmm3,%xmm3
	vpshufd	$252,%xmm4,%xmm4
	jmp	.L024even
.align	16
.L029done:
	vmovd	%xmm0,-48(%edi)
	vmovd	%xmm1,-44(%edi)
	vmovd	%xmm2,-40(%edi)
	vmovd	%xmm3,-36(%edi)
	vmovd	%xmm4,-32(%edi)
	vzeroupper
	movl	%ebp,%esp
.L020nodata:
	popl	%edi
	popl	%esi
	popl	%ebx
	popl	%ebp
	ret
.size	_poly1305_blocks_avx2,.-_poly1305_blocks_avx2
.align	64
.Lconst_sse2:
.long	16777216,0,16777216,0,16777216,0,16777216,0
.long	0,0,0,0,0,0,0,0
.long	67108863,0,67108863,0,67108863,0,67108863,0
.long	268435455,268435452,268435452,268435452
.byte	80,111,108,121,49,51,48,53,32,102,111,114,32,120,56,54
.byte	44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
.byte	60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
.byte	114,103,62,0
.align	4
.comm	OPENSSL_ia32cap_P,16,4

	.section ".note.gnu.property", "a"
	.p2align 2
	.long 1f - 0f
	.long 4f - 1f
	.long 5
0:
	.asciz "GNU"
1:
	.p2align 2
	.long 0xc0000002
	.long 3f - 2f
2:
	.long 3
3:
	.p2align 2
4:
#else
.text
.align	64
.globl	poly1305_init
.type	poly1305_init,@function
.align	16
poly1305_init:
.L_poly1305_init_begin:
	#ifdef __CET__

.byte	243,15,30,251
	#endif

	pushl	%ebp
	pushl	%ebx
	pushl	%esi
	pushl	%edi
	movl	20(%esp),%edi
	movl	24(%esp),%esi
	movl	28(%esp),%ebp
	xorl	%eax,%eax
	movl	%eax,(%edi)
	movl	%eax,4(%edi)
	movl	%eax,8(%edi)
	movl	%eax,12(%edi)
	movl	%eax,16(%edi)
	movl	%eax,20(%edi)
	cmpl	$0,%esi
	je	.L000nokey
	call	.L001pic_point
.L001pic_point:
	popl	%ebx
	leal	poly1305_blocks-.L001pic_point(%ebx),%eax
	leal	poly1305_emit-.L001pic_point(%ebx),%edx
	leal	OPENSSL_ia32cap_P,%edi
	movl	(%edi),%ecx
	andl	$83886080,%ecx
	cmpl	$83886080,%ecx
	jne	.L002no_sse2
	leal	_poly1305_blocks_sse2-.L001pic_point(%ebx),%eax
	leal	_poly1305_emit_sse2-.L001pic_point(%ebx),%edx
	movl	8(%edi),%ecx
	testl	$32,%ecx
	jz	.L002no_sse2
	leal	_poly1305_blocks_avx2-.L001pic_point(%ebx),%eax
.L002no_sse2:
	movl	20(%esp),%edi
	movl	%eax,(%ebp)
	movl	%edx,4(%ebp)
	movl	(%esi),%eax
	movl	4(%esi),%ebx
	movl	8(%esi),%ecx
	movl	12(%esi),%edx
	andl	$268435455,%eax
	andl	$268435452,%ebx
	andl	$268435452,%ecx
	andl	$268435452,%edx
	movl	%eax,24(%edi)
	movl	%ebx,28(%edi)
	movl	%ecx,32(%edi)
	movl	%edx,36(%edi)
	movl	$1,%eax
.L000nokey:
	popl	%edi
	popl	%esi
	popl	%ebx
	popl	%ebp
	ret
.size	poly1305_init,.-.L_poly1305_init_begin
.globl	poly1305_blocks
.type	poly1305_blocks,@function
.align	16
poly1305_blocks:
.L_poly1305_blocks_begin:
	#ifdef __CET__

.byte	243,15,30,251
	#endif

	pushl	%ebp
	pushl	%ebx
	pushl	%esi
	pushl	%edi
	movl	20(%esp),%edi
	movl	24(%esp),%esi
	movl	28(%esp),%ecx
.Lenter_blocks:
	andl	$-15,%ecx
	jz	.L003nodata
	subl	$64,%esp
	movl	24(%edi),%eax
	movl	28(%edi),%ebx
	leal	(%esi,%ecx,1),%ebp
	movl	32(%edi),%ecx
	movl	36(%edi),%edx
	movl	%ebp,92(%esp)
	movl	%esi,%ebp
	movl	%eax,36(%esp)
	movl	%ebx,%eax
	shrl	$2,%eax
	movl	%ebx,40(%esp)
	addl	%ebx,%eax
	movl	%ecx,%ebx
	shrl	$2,%ebx
	movl	%ecx,44(%esp)
	addl	%ecx,%ebx
	movl	%edx,%ecx
	shrl	$2,%ecx
	movl	%edx,48(%esp)
	addl	%edx,%ecx
	movl	%eax,52(%esp)
	movl	%ebx,56(%esp)
	movl	%ecx,60(%esp)
	movl	(%edi),%eax
	movl	4(%edi),%ebx
	movl	8(%edi),%ecx
	movl	12(%edi),%esi
	movl	16(%edi),%edi
	jmp	.L004loop
.align	32
.L004loop:
	addl	(%ebp),%eax
	adcl	4(%ebp),%ebx
	adcl	8(%ebp),%ecx
	adcl	12(%ebp),%esi
	leal	16(%ebp),%ebp
	adcl	96(%esp),%edi
	movl	%eax,(%esp)
	movl	%esi,12(%esp)
	mull	36(%esp)
	movl	%edi,16(%esp)
	movl	%eax,%edi
	movl	%ebx,%eax
	movl	%edx,%esi
	mull	60(%esp)
	addl	%eax,%edi
	movl	%ecx,%eax
	adcl	%edx,%esi
	mull	56(%esp)
	addl	%eax,%edi
	movl	12(%esp),%eax
	adcl	%edx,%esi
	mull	52(%esp)
	addl	%eax,%edi
	movl	(%esp),%eax
	adcl	%edx,%esi
	mull	40(%esp)
	movl	%edi,20(%esp)
	xorl	%edi,%edi
	addl	%eax,%esi
	movl	%ebx,%eax
	adcl	%edx,%edi
	mull	36(%esp)
	addl	%eax,%esi
	movl	%ecx,%eax
	adcl	%edx,%edi
	mull	60(%esp)
	addl	%eax,%esi
	movl	12(%esp),%eax
	adcl	%edx,%edi
	mull	56(%esp)
	addl	%eax,%esi
	movl	16(%esp),%eax
	adcl	%edx,%edi
	imull	52(%esp),%eax
	addl	%eax,%esi
	movl	(%esp),%eax
	adcl	$0,%edi
	mull	44(%esp)
	movl	%esi,24(%esp)
	xorl	%esi,%esi
	addl	%eax,%edi
	movl	%ebx,%eax
	adcl	%edx,%esi
	mull	40(%esp)
	addl	%eax,%edi
	movl	%ecx,%eax
	adcl	%edx,%esi
	mull	36(%esp)
	addl	%eax,%edi
	movl	12(%esp),%eax
	adcl	%edx,%esi
	mull	60(%esp)
	addl	%eax,%edi
	movl	16(%esp),%eax
	adcl	%edx,%esi
	imull	56(%esp),%eax
	addl	%eax,%edi
	movl	(%esp),%eax
	adcl	$0,%esi
	mull	48(%esp)
	movl	%edi,28(%esp)
	xorl	%edi,%edi
	addl	%eax,%esi
	movl	%ebx,%eax
	adcl	%edx,%edi
	mull	44(%esp)
	addl	%eax,%esi
	movl	%ecx,%eax
	adcl	%edx,%edi
	mull	40(%esp)
	addl	%eax,%esi
	movl	12(%esp),%eax
	adcl	%edx,%edi
	mull	36(%esp)
	addl	%eax,%esi
	movl	16(%esp),%ecx
	adcl	%edx,%edi
	movl	%ecx,%edx
	imull	60(%esp),%ecx
	addl	%ecx,%esi
	movl	20(%esp),%eax
	adcl	$0,%edi
	imull	36(%esp),%edx
	addl	%edi,%edx
	movl	24(%esp),%ebx
	movl	28(%esp),%ecx
	movl	%edx,%edi
	shrl	$2,%edx
	andl	$3,%edi
	leal	(%edx,%edx,4),%edx
	addl	%edx,%eax
	adcl	$0,%ebx
	adcl	$0,%ecx
	adcl	$0,%esi
	adcl	$0,%edi
	cmpl	92(%esp),%ebp
	jne	.L004loop
	movl	84(%esp),%edx
	addl	$64,%esp
	movl	%eax,(%edx)
	movl	%ebx,4(%edx)
	movl	%ecx,8(%edx)
	movl	%esi,12(%edx)
	movl	%edi,16(%edx)
.L003nodata:
	popl	%edi
	popl	%esi
	popl	%ebx
	popl	%ebp
	ret
.size	poly1305_blocks,.-.L_poly1305_blocks_begin
.globl	poly1305_emit
.type	poly1305_emit,@function
.align	16
poly1305_emit:
.L_poly1305_emit_begin:
	#ifdef __CET__

.byte	243,15,30,251
	#endif

	pushl	%ebp
	pushl	%ebx
	pushl	%esi
	pushl	%edi
	movl	20(%esp),%ebp
.Lenter_emit:
	movl	24(%esp),%edi
	movl	(%ebp),%eax
	movl	4(%ebp),%ebx
	movl	8(%ebp),%ecx
	movl	12(%ebp),%edx
	movl	16(%ebp),%esi
	addl	$5,%eax
	adcl	$0,%ebx
	adcl	$0,%ecx
	adcl	$0,%edx
	adcl	$0,%esi
	shrl	$2,%esi
	negl	%esi
	andl	%esi,%eax
	andl	%esi,%ebx
	andl	%esi,%ecx
	andl	%esi,%edx
	movl	%eax,(%edi)
	movl	%ebx,4(%edi)
	movl	%ecx,8(%edi)
	movl	%edx,12(%edi)
	notl	%esi
	movl	(%ebp),%eax
	movl	4(%ebp),%ebx
	movl	8(%ebp),%ecx
	movl	12(%ebp),%edx
	movl	28(%esp),%ebp
	andl	%esi,%eax
	andl	%esi,%ebx
	andl	%esi,%ecx
	andl	%esi,%edx
	orl	(%edi),%eax
	orl	4(%edi),%ebx
	orl	8(%edi),%ecx
	orl	12(%edi),%edx
	addl	(%ebp),%eax
	adcl	4(%ebp),%ebx
	adcl	8(%ebp),%ecx
	adcl	12(%ebp),%edx
	movl	%eax,(%edi)
	movl	%ebx,4(%edi)
	movl	%ecx,8(%edi)
	movl	%edx,12(%edi)
	popl	%edi
	popl	%esi
	popl	%ebx
	popl	%ebp
	ret
.size	poly1305_emit,.-.L_poly1305_emit_begin
.align	32
.type	_poly1305_init_sse2,@function
.align	16
_poly1305_init_sse2:
	#ifdef __CET__

.byte	243,15,30,251
	#endif

	movdqu	24(%edi),%xmm4
	leal	48(%edi),%edi
	movl	%esp,%ebp
	subl	$224,%esp
	andl	$-16,%esp
	movq	64(%ebx),%xmm7
	movdqa	%xmm4,%xmm0
	movdqa	%xmm4,%xmm1
	movdqa	%xmm4,%xmm2
	pand	%xmm7,%xmm0
	psrlq	$26,%xmm1
	psrldq	$6,%xmm2
	pand	%xmm7,%xmm1
	movdqa	%xmm2,%xmm3
	psrlq	$4,%xmm2
	psrlq	$30,%xmm3
	pand	%xmm7,%xmm2
	pand	%xmm7,%xmm3
	psrldq	$13,%xmm4
	leal	144(%esp),%edx
	movl	$2,%ecx
.L005square:
	movdqa	%xmm0,(%esp)
	movdqa	%xmm1,16(%esp)
	movdqa	%xmm2,32(%esp)
	movdqa	%xmm3,48(%esp)
	movdqa	%xmm4,64(%esp)
	movdqa	%xmm1,%xmm6
	movdqa	%xmm2,%xmm5
	pslld	$2,%xmm6
	pslld	$2,%xmm5
	paddd	%xmm1,%xmm6
	paddd	%xmm2,%xmm5
	movdqa	%xmm6,80(%esp)
	movdqa	%xmm5,96(%esp)
	movdqa	%xmm3,%xmm6
	movdqa	%xmm4,%xmm5
	pslld	$2,%xmm6
	pslld	$2,%xmm5
	paddd	%xmm3,%xmm6
	paddd	%xmm4,%xmm5
	movdqa	%xmm6,112(%esp)
	movdqa	%xmm5,128(%esp)
	pshufd	$68,%xmm0,%xmm6
	movdqa	%xmm1,%xmm5
	pshufd	$68,%xmm1,%xmm1
	pshufd	$68,%xmm2,%xmm2
	pshufd	$68,%xmm3,%xmm3
	pshufd	$68,%xmm4,%xmm4
	movdqa	%xmm6,(%edx)
	movdqa	%xmm1,16(%edx)
	movdqa	%xmm2,32(%edx)
	movdqa	%xmm3,48(%edx)
	movdqa	%xmm4,64(%edx)
	pmuludq	%xmm0,%xmm4
	pmuludq	%xmm0,%xmm3
	pmuludq	%xmm0,%xmm2
	pmuludq	%xmm0,%xmm1
	pmuludq	%xmm6,%xmm0
	movdqa	%xmm5,%xmm6
	pmuludq	48(%edx),%xmm5
	movdqa	%xmm6,%xmm7
	pmuludq	32(%edx),%xmm6
	paddq	%xmm5,%xmm4
	movdqa	%xmm7,%xmm5
	pmuludq	16(%edx),%xmm7
	paddq	%xmm6,%xmm3
	movdqa	80(%esp),%xmm6
	pmuludq	(%edx),%xmm5
	paddq	%xmm7,%xmm2
	pmuludq	64(%edx),%xmm6
	movdqa	32(%esp),%xmm7
	paddq	%xmm5,%xmm1
	movdqa	%xmm7,%xmm5
	pmuludq	32(%edx),%xmm7
	paddq	%xmm6,%xmm0
	movdqa	%xmm5,%xmm6
	pmuludq	16(%edx),%xmm5
	paddq	%xmm7,%xmm4
	movdqa	96(%esp),%xmm7
	pmuludq	(%edx),%xmm6
	paddq	%xmm5,%xmm3
	movdqa	%xmm7,%xmm5
	pmuludq	64(%edx),%xmm7
	paddq	%xmm6,%xmm2
	pmuludq	48(%edx),%xmm5
	movdqa	48(%esp),%xmm6
	paddq	%xmm7,%xmm1
	movdqa	%xmm6,%xmm7
	pmuludq	16(%edx),%xmm6
	paddq	%xmm5,%xmm0
	movdqa	112(%esp),%xmm5
	pmuludq	(%edx),%xmm7
	paddq	%xmm6,%xmm4
	movdqa	%xmm5,%xmm6
	pmuludq	64(%edx),%xmm5
	paddq	%xmm7,%xmm3
	movdqa	%xmm6,%xmm7
	pmuludq	48(%edx),%xmm6
	paddq	%xmm5,%xmm2
	pmuludq	32(%edx),%xmm7
	movdqa	64(%esp),%xmm5
	paddq	%xmm6,%xmm1
	movdqa	128(%esp),%xmm6
	pmuludq	(%edx),%xmm5
	paddq	%xmm7,%xmm0
	movdqa	%xmm6,%xmm7
	pmuludq	64(%edx),%xmm6
	paddq	%xmm5,%xmm4
	movdqa	%xmm7,%xmm5
	pmuludq	16(%edx),%xmm7
	paddq	%xmm6,%xmm3
	movdqa	%xmm5,%xmm6
	pmuludq	32(%edx),%xmm5
	paddq	%xmm7,%xmm0
	pmuludq	48(%edx),%xmm6
	movdqa	64(%ebx),%xmm7
	paddq	%xmm5,%xmm1
	paddq	%xmm6,%xmm2
	movdqa	%xmm3,%xmm5
	pand	%xmm7,%xmm3
	psrlq	$26,%xmm5
	paddq	%xmm4,%xmm5
	movdqa	%xmm0,%xmm6
	pand	%xmm7,%xmm0
	psrlq	$26,%xmm6
	movdqa	%xmm5,%xmm4
	paddq	%xmm1,%xmm6
	psrlq	$26,%xmm5
	pand	%xmm7,%xmm4
	movdqa	%xmm6,%xmm1
	psrlq	$26,%xmm6
	paddd	%xmm5,%xmm0
	psllq	$2,%xmm5
	paddq	%xmm2,%xmm6
	paddq	%xmm0,%xmm5
	pand	%xmm7,%xmm1
	movdqa	%xmm6,%xmm2
	psrlq	$26,%xmm6
	pand	%xmm7,%xmm2
	paddd	%xmm3,%xmm6
	movdqa	%xmm5,%xmm0
	psrlq	$26,%xmm5
	movdqa	%xmm6,%xmm3
	psrlq	$26,%xmm6
	pand	%xmm7,%xmm0
	paddd	%xmm5,%xmm1
	pand	%xmm7,%xmm3
	paddd	%xmm6,%xmm4
	decl	%ecx
	jz	.L006square_break
	punpcklqdq	(%esp),%xmm0
	punpcklqdq	16(%esp),%xmm1
	punpcklqdq	32(%esp),%xmm2
	punpcklqdq	48(%esp),%xmm3
	punpcklqdq	64(%esp),%xmm4
	jmp	.L005square
.L006square_break:
	psllq	$32,%xmm0
	psllq	$32,%xmm1
	psllq	$32,%xmm2
	psllq	$32,%xmm3
	psllq	$32,%xmm4
	por	(%esp),%xmm0
	por	16(%esp),%xmm1
	por	32(%esp),%xmm2
	por	48(%esp),%xmm3
	por	64(%esp),%xmm4
	pshufd	$141,%xmm0,%xmm0
	pshufd	$141,%xmm1,%xmm1
	pshufd	$141,%xmm2,%xmm2
	pshufd	$141,%xmm3,%xmm3
	pshufd	$141,%xmm4,%xmm4
	movdqu	%xmm0,(%edi)
	movdqu	%xmm1,16(%edi)
	movdqu	%xmm2,32(%edi)
	movdqu	%xmm3,48(%edi)
	movdqu	%xmm4,64(%edi)
	movdqa	%xmm1,%xmm6
	movdqa	%xmm2,%xmm5
	pslld	$2,%xmm6
	pslld	$2,%xmm5
	paddd	%xmm1,%xmm6
	paddd	%xmm2,%xmm5
	movdqu	%xmm6,80(%edi)
	movdqu	%xmm5,96(%edi)
	movdqa	%xmm3,%xmm6
	movdqa	%xmm4,%xmm5
	pslld	$2,%xmm6
	pslld	$2,%xmm5
	paddd	%xmm3,%xmm6
	paddd	%xmm4,%xmm5
	movdqu	%xmm6,112(%edi)
	movdqu	%xmm5,128(%edi)
	movl	%ebp,%esp
	leal	-48(%edi),%edi
	ret
.size	_poly1305_init_sse2,.-_poly1305_init_sse2
.align	32
.type	_poly1305_blocks_sse2,@function
.align	16
_poly1305_blocks_sse2:
	#ifdef __CET__

.byte	243,15,30,251
	#endif

	pushl	%ebp
	pushl	%ebx
	pushl	%esi
	pushl	%edi
	movl	20(%esp),%edi
	movl	24(%esp),%esi
	movl	28(%esp),%ecx
	movl	20(%edi),%eax
	andl	$-16,%ecx
	jz	.L007nodata
	cmpl	$64,%ecx
	jae	.L008enter_sse2
	testl	%eax,%eax
	jz	.Lenter_blocks
.align	16
.L008enter_sse2:
	call	.L009pic_point
.L009pic_point:
	popl	%ebx
	leal	.Lconst_sse2-.L009pic_point(%ebx),%ebx
	testl	%eax,%eax
	jnz	.L010base2_26
	call	_poly1305_init_sse2
	movl	(%edi),%eax
	movl	3(%edi),%ecx
	movl	6(%edi),%edx
	movl	9(%edi),%esi
	movl	13(%edi),%ebp
	movl	$1,20(%edi)
	shrl	$2,%ecx
	andl	$67108863,%eax
	shrl	$4,%edx
	andl	$67108863,%ecx
	shrl	$6,%esi
	andl	$67108863,%edx
	movd	%eax,%xmm0
	movd	%ecx,%xmm1
	movd	%edx,%xmm2
	movd	%esi,%xmm3
	movd	%ebp,%xmm4
	movl	24(%esp),%esi
	movl	28(%esp),%ecx
	jmp	.L011base2_32
.align	16
.L010base2_26:
	movd	(%edi),%xmm0
	movd	4(%edi),%xmm1
	movd	8(%edi),%xmm2
	movd	12(%edi),%xmm3
	movd	16(%edi),%xmm4
	movdqa	64(%ebx),%xmm7
.L011base2_32:
	movl	32(%esp),%eax
	movl	%esp,%ebp
	subl	$528,%esp
	andl	$-16,%esp
	leal	48(%edi),%edi
	shll	$24,%eax
	testl	$31,%ecx
	jz	.L012even
	movdqu	(%esi),%xmm6
	leal	16(%esi),%esi
	movdqa	%xmm6,%xmm5
	pand	%xmm7,%xmm6
	paddd	%xmm6,%xmm0
	movdqa	%xmm5,%xmm6
	psrlq	$26,%xmm5
	psrldq	$6,%xmm6
	pand	%xmm7,%xmm5
	paddd	%xmm5,%xmm1
	movdqa	%xmm6,%xmm5
	psrlq	$4,%xmm6
	pand	%xmm7,%xmm6
	paddd	%xmm6,%xmm2
	movdqa	%xmm5,%xmm6
	psrlq	$30,%xmm5
	pand	%xmm7,%xmm5
	psrldq	$7,%xmm6
	paddd	%xmm5,%xmm3
	movd	%eax,%xmm5
	paddd	%xmm6,%xmm4
	movd	12(%edi),%xmm6
	paddd	%xmm5,%xmm4
	movdqa	%xmm0,(%esp)
	movdqa	%xmm1,16(%esp)
	movdqa	%xmm2,32(%esp)
	movdqa	%xmm3,48(%esp)
	movdqa	%xmm4,64(%esp)
	pmuludq	%xmm6,%xmm0
	pmuludq	%xmm6,%xmm1
	pmuludq	%xmm6,%xmm2
	movd	28(%edi),%xmm5
	pmuludq	%xmm6,%xmm3
	pmuludq	%xmm6,%xmm4
	movdqa	%xmm5,%xmm6
	pmuludq	48(%esp),%xmm5
	movdqa	%xmm6,%xmm7
	pmuludq	32(%esp),%xmm6
	paddq	%xmm5,%xmm4
	movdqa	%xmm7,%xmm5
	pmuludq	16(%esp),%xmm7
	paddq	%xmm6,%xmm3
	movd	92(%edi),%xmm6
	pmuludq	(%esp),%xmm5
	paddq	%xmm7,%xmm2
	pmuludq	64(%esp),%xmm6
	movd	44(%edi),%xmm7
	paddq	%xmm5,%xmm1
	movdqa	%xmm7,%xmm5
	pmuludq	32(%esp),%xmm7
	paddq	%xmm6,%xmm0
	movdqa	%xmm5,%xmm6
	pmuludq	16(%esp),%xmm5
	paddq	%xmm7,%xmm4
	movd	108(%edi),%xmm7
	pmuludq	(%esp),%xmm6
	paddq	%xmm5,%xmm3
	movdqa	%xmm7,%xmm5
	pmuludq	64(%esp),%xmm7
	paddq	%xmm6,%xmm2
	pmuludq	48(%esp),%xmm5
	movd	60(%edi),%xmm6
	paddq	%xmm7,%xmm1
	movdqa	%xmm6,%xmm7
	pmuludq	16(%esp),%xmm6
	paddq	%xmm5,%xmm0
	movd	124(%edi),%xmm5
	pmuludq	(%esp),%xmm7
	paddq	%xmm6,%xmm4
	movdqa	%xmm5,%xmm6
	pmuludq	64(%esp),%xmm5
	paddq	%xmm7,%xmm3
	movdqa	%xmm6,%xmm7
	pmuludq	48(%esp),%xmm6
	paddq	%xmm5,%xmm2
	pmuludq	32(%esp),%xmm7
	movd	76(%edi),%xmm5
	paddq	%xmm6,%xmm1
	movd	140(%edi),%xmm6
	pmuludq	(%esp),%xmm5
	paddq	%xmm7,%xmm0
	movdqa	%xmm6,%xmm7
	pmuludq	64(%esp),%xmm6
	paddq	%xmm5,%xmm4
	movdqa	%xmm7,%xmm5
	pmuludq	16(%esp),%xmm7
	paddq	%xmm6,%xmm3
	movdqa	%xmm5,%xmm6
	pmuludq	32(%esp),%xmm5
	paddq	%xmm7,%xmm0
	pmuludq	48(%esp),%xmm6
	movdqa	64(%ebx),%xmm7
	paddq	%xmm5,%xmm1
	paddq	%xmm6,%xmm2
	movdqa	%xmm3,%xmm5
	pand	%xmm7,%xmm3
	psrlq	$26,%xmm5
	paddq	%xmm4,%xmm5
	movdqa	%xmm0,%xmm6
	pand	%xmm7,%xmm0
	psrlq	$26,%xmm6
	movdqa	%xmm5,%xmm4
	paddq	%xmm1,%xmm6
	psrlq	$26,%xmm5
	pand	%xmm7,%xmm4
	movdqa	%xmm6,%xmm1
	psrlq	$26,%xmm6
	paddd	%xmm5,%xmm0
	psllq	$2,%xmm5
	paddq	%xmm2,%xmm6
	paddq	%xmm0,%xmm5
	pand	%xmm7,%xmm1
	movdqa	%xmm6,%xmm2
	psrlq	$26,%xmm6
	pand	%xmm7,%xmm2
	paddd	%xmm3,%xmm6
	movdqa	%xmm5,%xmm0
	psrlq	$26,%xmm5
	movdqa	%xmm6,%xmm3
	psrlq	$26,%xmm6
	pand	%xmm7,%xmm0
	paddd	%xmm5,%xmm1
	pand	%xmm7,%xmm3
	paddd	%xmm6,%xmm4
	subl	$16,%ecx
	jz	.L013done
.L012even:
	leal	384(%esp),%edx
	leal	-32(%esi),%eax
	subl	$64,%ecx
	movdqu	(%edi),%xmm5
	pshufd	$68,%xmm5,%xmm6
	cmovbl	%eax,%esi
	pshufd	$238,%xmm5,%xmm5
	movdqa	%xmm6,(%edx)
	leal	160(%esp),%eax
	movdqu	16(%edi),%xmm6
	movdqa	%xmm5,-144(%edx)
	pshufd	$68,%xmm6,%xmm5
	pshufd	$238,%xmm6,%xmm6
	movdqa	%xmm5,16(%edx)
	movdqu	32(%edi),%xmm5
	movdqa	%xmm6,-128(%edx)
	pshufd	$68,%xmm5,%xmm6
	pshufd	$238,%xmm5,%xmm5
	movdqa	%xmm6,32(%edx)
	movdqu	48(%edi),%xmm6
	movdqa	%xmm5,-112(%edx)
	pshufd	$68,%xmm6,%xmm5
	pshufd	$238,%xmm6,%xmm6
	movdqa	%xmm5,48(%edx)
	movdqu	64(%edi),%xmm5
	movdqa	%xmm6,-96(%edx)
	pshufd	$68,%xmm5,%xmm6
	pshufd	$238,%xmm5,%xmm5
	movdqa	%xmm6,64(%edx)
	movdqu	80(%edi),%xmm6
	movdqa	%xmm5,-80(%edx)
	pshufd	$68,%xmm6,%xmm5
	pshufd	$238,%xmm6,%xmm6
	movdqa	%xmm5,80(%edx)
	movdqu	96(%edi),%xmm5
	movdqa	%xmm6,-64(%edx)
	pshufd	$68,%xmm5,%xmm6
	pshufd	$238,%xmm5,%xmm5
	movdqa	%xmm6,96(%edx)
	movdqu	112(%edi),%xmm6
	movdqa	%xmm5,-48(%edx)
	pshufd	$68,%xmm6,%xmm5
	pshufd	$238,%xmm6,%xmm6
	movdqa	%xmm5,112(%edx)
	movdqu	128(%edi),%xmm5
	movdqa	%xmm6,-32(%edx)
	pshufd	$68,%xmm5,%xmm6
	pshufd	$238,%xmm5,%xmm5
	movdqa	%xmm6,128(%edx)
	movdqa	%xmm5,-16(%edx)
	movdqu	32(%esi),%xmm5
	movdqu	48(%esi),%xmm6
	leal	32(%esi),%esi
	movdqa	%xmm2,112(%esp)
	movdqa	%xmm3,128(%esp)
	movdqa	%xmm4,144(%esp)
	movdqa	%xmm5,%xmm2
	movdqa	%xmm6,%xmm3
	psrldq	$6,%xmm2
	psrldq	$6,%xmm3
	movdqa	%xmm5,%xmm4
	punpcklqdq	%xmm3,%xmm2
	punpckhqdq	%xmm6,%xmm4
	punpcklqdq	%xmm6,%xmm5
	movdqa	%xmm2,%xmm3
	psrlq	$4,%xmm2
	psrlq	$30,%xmm3
	movdqa	%xmm5,%xmm6
	psrlq	$40,%xmm4
	psrlq	$26,%xmm6
	pand	%xmm7,%xmm5
	pand	%xmm7,%xmm6
	pand	%xmm7,%xmm2
	pand	%xmm7,%xmm3
	por	(%ebx),%xmm4
	movdqa	%xmm0,80(%esp)
	movdqa	%xmm1,96(%esp)
	jbe	.L014skip_loop
	jmp	.L015loop
.align	32
.L015loop:
	movdqa	-144(%edx),%xmm7
	movdqa	%xmm6,16(%eax)
	movdqa	%xmm2,32(%eax)
	movdqa	%xmm3,48(%eax)
	movdqa	%xmm4,64(%eax)
	movdqa	%xmm5,%xmm1
	pmuludq	%xmm7,%xmm5
	movdqa	%xmm6,%xmm0
	pmuludq	%xmm7,%xmm6
	pmuludq	%xmm7,%xmm2
	pmuludq	%xmm7,%xmm3
	pmuludq	%xmm7,%xmm4
	pmuludq	-16(%edx),%xmm0
	movdqa	%xmm1,%xmm7
	pmuludq	-128(%edx),%xmm1
	paddq	%xmm5,%xmm0
	movdqa	%xmm7,%xmm5
	pmuludq	-112(%edx),%xmm7
	paddq	%xmm6,%xmm1
	movdqa	%xmm5,%xmm6
	pmuludq	-96(%edx),%xmm5
	paddq	%xmm7,%xmm2
	movdqa	16(%eax),%xmm7
	pmuludq	-80(%edx),%xmm6
	paddq	%xmm5,%xmm3
	movdqa	%xmm7,%xmm5
	pmuludq	-128(%edx),%xmm7
	paddq	%xmm6,%xmm4
	movdqa	%xmm5,%xmm6
	pmuludq	-112(%edx),%xmm5
	paddq	%xmm7,%xmm2
	movdqa	32(%eax),%xmm7
	pmuludq	-96(%edx),%xmm6
	paddq	%xmm5,%xmm3
	movdqa	%xmm7,%xmm5
	pmuludq	-32(%edx),%xmm7
	paddq	%xmm6,%xmm4
	movdqa	%xmm5,%xmm6
	pmuludq	-16(%edx),%xmm5
	paddq	%xmm7,%xmm0
	movdqa	%xmm6,%xmm7
	pmuludq	-128(%edx),%xmm6
	paddq	%xmm5,%xmm1
	movdqa	48(%eax),%xmm5
	pmuludq	-112(%edx),%xmm7
	paddq	%xmm6,%xmm3
	movdqa	%xmm5,%xmm6
	pmuludq	-48(%edx),%xmm5
	paddq	%xmm7,%xmm4
	movdqa	%xmm6,%xmm7
	pmuludq	-32(%edx),%xmm6
	paddq	%xmm5,%xmm0
	movdqa	%xmm7,%xmm5
	pmuludq	-16(%edx),%xmm7
	paddq	%xmm6,%xmm1
	movdqa	64(%eax),%xmm6
	pmuludq	-128(%edx),%xmm5
	paddq	%xmm7,%xmm2
	movdqa	%xmm6,%xmm7
	pmuludq	-16(%edx),%xmm6
	paddq	%xmm5,%xmm4
	movdqa	%xmm7,%xmm5
	pmuludq	-64(%edx),%xmm7
	paddq	%xmm6,%xmm3
	movdqa	%xmm5,%xmm6
	pmuludq	-48(%edx),%xmm5
	paddq	%xmm7,%xmm0
	movdqa	64(%ebx),%xmm7
	pmuludq	-32(%edx),%xmm6
	paddq	%xmm5,%xmm1
	paddq	%xmm6,%xmm2
	movdqu	-32(%esi),%xmm5
	movdqu	-16(%esi),%xmm6
	leal	32(%esi),%esi
	movdqa	%xmm2,32(%esp)
	movdqa	%xmm3,48(%esp)
	movdqa	%xmm4,64(%esp)
	movdqa	%xmm5,%xmm2
	movdqa	%xmm6,%xmm3
	psrldq	$6,%xmm2
	psrldq	$6,%xmm3
	movdqa	%xmm5,%xmm4
	punpcklqdq	%xmm3,%xmm2
	punpckhqdq	%xmm6,%xmm4
	punpcklqdq	%xmm6,%xmm5
	movdqa	%xmm2,%xmm3
	psrlq	$4,%xmm2
	psrlq	$30,%xmm3
	movdqa	%xmm5,%xmm6
	psrlq	$40,%xmm4
	psrlq	$26,%xmm6
	pand	%xmm7,%xmm5
	pand	%xmm7,%xmm6
	pand	%xmm7,%xmm2
	pand	%xmm7,%xmm3
	por	(%ebx),%xmm4
	leal	-32(%esi),%eax
	subl	$64,%ecx
	paddd	80(%esp),%xmm5
	paddd	96(%esp),%xmm6
	paddd	112(%esp),%xmm2
	paddd	128(%esp),%xmm3
	paddd	144(%esp),%xmm4
	cmovbl	%eax,%esi
	leal	160(%esp),%eax
	movdqa	(%edx),%xmm7
	movdqa	%xmm1,16(%esp)
	movdqa	%xmm6,16(%eax)
	movdqa	%xmm2,32(%eax)
	movdqa	%xmm3,48(%eax)
	movdqa	%xmm4,64(%eax)
	movdqa	%xmm5,%xmm1
	pmuludq	%xmm7,%xmm5
	paddq	%xmm0,%xmm5
	movdqa	%xmm6,%xmm0
	pmuludq	%xmm7,%xmm6
	pmuludq	%xmm7,%xmm2
	pmuludq	%xmm7,%xmm3
	pmuludq	%xmm7,%xmm4
	paddq	16(%esp),%xmm6
	paddq	32(%esp),%xmm2
	paddq	48(%esp),%xmm3
	paddq	64(%esp),%xmm4
	pmuludq	128(%edx),%xmm0
	movdqa	%xmm1,%xmm7
	pmuludq	16(%edx),%xmm1
	paddq	%xmm5,%xmm0
	movdqa	%xmm7,%xmm5
	pmuludq	32(%edx),%xmm7
	paddq	%xmm6,%xmm1
	movdqa	%xmm5,%xmm6
	pmuludq	48(%edx),%xmm5
	paddq	%xmm7,%xmm2
	movdqa	16(%eax),%xmm7
	pmuludq	64(%edx),%xmm6
	paddq	%xmm5,%xmm3
	movdqa	%xmm7,%xmm5
	pmuludq	16(%edx),%xmm7
	paddq	%xmm6,%xmm4
	movdqa	%xmm5,%xmm6
	pmuludq	32(%edx),%xmm5
	paddq	%xmm7,%xmm2
	movdqa	32(%eax),%xmm7
	pmuludq	48(%edx),%xmm6
	paddq	%xmm5,%xmm3
	movdqa	%xmm7,%xmm5
	pmuludq	112(%edx),%xmm7
	paddq	%xmm6,%xmm4
	movdqa	%xmm5,%xmm6
	pmuludq	128(%edx),%xmm5
	paddq	%xmm7,%xmm0
	movdqa	%xmm6,%xmm7
	pmuludq	16(%edx),%xmm6
	paddq	%xmm5,%xmm1
	movdqa	48(%eax),%xmm5
	pmuludq	32(%edx),%xmm7
	paddq	%xmm6,%xmm3
	movdqa	%xmm5,%xmm6
	pmuludq	96(%edx),%xmm5
	paddq	%xmm7,%xmm4
	movdqa	%xmm6,%xmm7
	pmuludq	112(%edx),%xmm6
	paddq	%xmm5,%xmm0
	movdqa	%xmm7,%xmm5
	pmuludq	128(%edx),%xmm7
	paddq	%xmm6,%xmm1
	movdqa	64(%eax),%xmm6
	pmuludq	16(%edx),%xmm5
	paddq	%xmm7,%xmm2
	movdqa	%xmm6,%xmm7
	pmuludq	128(%edx),%xmm6
	paddq	%xmm5,%xmm4
	movdqa	%xmm7,%xmm5
	pmuludq	80(%edx),%xmm7
	paddq	%xmm6,%xmm3
	movdqa	%xmm5,%xmm6
	pmuludq	96(%edx),%xmm5
	paddq	%xmm7,%xmm0
	movdqa	64(%ebx),%xmm7
	pmuludq	112(%edx),%xmm6
	paddq	%xmm5,%xmm1
	paddq	%xmm6,%xmm2
	movdqa	%xmm3,%xmm5
	pand	%xmm7,%xmm3
	psrlq	$26,%xmm5
	paddq	%xmm4,%xmm5
	movdqa	%xmm0,%xmm6
	pand	%xmm7,%xmm0
	psrlq	$26,%xmm6
	movdqa	%xmm5,%xmm4
	paddq	%xmm1,%xmm6
	psrlq	$26,%xmm5
	pand	%xmm7,%xmm4
	movdqa	%xmm6,%xmm1
	psrlq	$26,%xmm6
	paddd	%xmm5,%xmm0
	psllq	$2,%xmm5
	paddq	%xmm2,%xmm6
	paddq	%xmm0,%xmm5
	pand	%xmm7,%xmm1
	movdqa	%xmm6,%xmm2
	psrlq	$26,%xmm6
	pand	%xmm7,%xmm2
	paddd	%xmm3,%xmm6
	movdqa	%xmm5,%xmm0
	psrlq	$26,%xmm5
	movdqa	%xmm6,%xmm3
	psrlq	$26,%xmm6
	pand	%xmm7,%xmm0
	paddd	%xmm5,%xmm1
	pand	%xmm7,%xmm3
	paddd	%xmm6,%xmm4
	movdqu	32(%esi),%xmm5
	movdqu	48(%esi),%xmm6
	leal	32(%esi),%esi
	movdqa	%xmm2,112(%esp)
	movdqa	%xmm3,128(%esp)
	movdqa	%xmm4,144(%esp)
	movdqa	%xmm5,%xmm2
	movdqa	%xmm6,%xmm3
	psrldq	$6,%xmm2
	psrldq	$6,%xmm3
	movdqa	%xmm5,%xmm4
	punpcklqdq	%xmm3,%xmm2
	punpckhqdq	%xmm6,%xmm4
	punpcklqdq	%xmm6,%xmm5
	movdqa	%xmm2,%xmm3
	psrlq	$4,%xmm2
	psrlq	$30,%xmm3
	movdqa	%xmm5,%xmm6
	psrlq	$40,%xmm4
	psrlq	$26,%xmm6
	pand	%xmm7,%xmm5
	pand	%xmm7,%xmm6
	pand	%xmm7,%xmm2
	pand	%xmm7,%xmm3
	por	(%ebx),%xmm4
	movdqa	%xmm0,80(%esp)
	movdqa	%xmm1,96(%esp)
	ja	.L015loop
.L014skip_loop:
	pshufd	$16,-144(%edx),%xmm7
	addl	$32,%ecx
	jnz	.L016long_tail
	paddd	%xmm0,%xmm5
	paddd	%xmm1,%xmm6
	paddd	112(%esp),%xmm2
	paddd	128(%esp),%xmm3
	paddd	144(%esp),%xmm4
.L016long_tail:
	movdqa	%xmm5,(%eax)
	movdqa	%xmm6,16(%eax)
	movdqa	%xmm2,32(%eax)
	movdqa	%xmm3,48(%eax)
	movdqa	%xmm4,64(%eax)
	pmuludq	%xmm7,%xmm5
	pmuludq	%xmm7,%xmm6
	pmuludq	%xmm7,%xmm2
	movdqa	%xmm5,%xmm0
	pshufd	$16,-128(%edx),%xmm5
	pmuludq	%xmm7,%xmm3
	movdqa	%xmm6,%xmm1
	pmuludq	%xmm7,%xmm4
	movdqa	%xmm5,%xmm6
	pmuludq	48(%eax),%xmm5
	movdqa	%xmm6,%xmm7
	pmuludq	32(%eax),%xmm6
	paddq	%xmm5,%xmm4
	movdqa	%xmm7,%xmm5
	pmuludq	16(%eax),%xmm7
	paddq	%xmm6,%xmm3
	pshufd	$16,-64(%edx),%xmm6
	pmuludq	(%eax),%xmm5
	paddq	%xmm7,%xmm2
	pmuludq	64(%eax),%xmm6
	pshufd	$16,-112(%edx),%xmm7
	paddq	%xmm5,%xmm1
	movdqa	%xmm7,%xmm5
	pmuludq	32(%eax),%xmm7
	paddq	%xmm6,%xmm0
	movdqa	%xmm5,%xmm6
	pmuludq	16(%eax),%xmm5
	paddq	%xmm7,%xmm4
	pshufd	$16,-48(%edx),%xmm7
	pmuludq	(%eax),%xmm6
	paddq	%xmm5,%xmm3
	movdqa	%xmm7,%xmm5
	pmuludq	64(%eax),%xmm7
	paddq	%xmm6,%xmm2
	pmuludq	48(%eax),%xmm5
	pshufd	$16,-96(%edx),%xmm6
	paddq	%xmm7,%xmm1
	movdqa	%xmm6,%xmm7
	pmuludq	16(%eax),%xmm6
	paddq	%xmm5,%xmm0
	pshufd	$16,-32(%edx),%xmm5
	pmuludq	(%eax),%xmm7
	paddq	%xmm6,%xmm4
	movdqa	%xmm5,%xmm6
	pmuludq	64(%eax),%xmm5
	paddq	%xmm7,%xmm3
	movdqa	%xmm6,%xmm7
	pmuludq	48(%eax),%xmm6
	paddq	%xmm5,%xmm2
	pmuludq	32(%eax),%xmm7
	pshufd	$16,-80(%edx),%xmm5
	paddq	%xmm6,%xmm1
	pshufd	$16,-16(%edx),%xmm6
	pmuludq	(%eax),%xmm5
	paddq	%xmm7,%xmm0
	movdqa	%xmm6,%xmm7
	pmuludq	64(%eax),%xmm6
	paddq	%xmm5,%xmm4
	movdqa	%xmm7,%xmm5
	pmuludq	16(%eax),%xmm7
	paddq	%xmm6,%xmm3
	movdqa	%xmm5,%xmm6
	pmuludq	32(%eax),%xmm5
	paddq	%xmm7,%xmm0
	pmuludq	48(%eax),%xmm6
	movdqa	64(%ebx),%xmm7
	paddq	%xmm5,%xmm1
	paddq	%xmm6,%xmm2
	jz	.L017short_tail
	movdqu	-32(%esi),%xmm5
	movdqu	-16(%esi),%xmm6
	leal	32(%esi),%esi
	movdqa	%xmm2,32(%esp)
	movdqa	%xmm3,48(%esp)
	movdqa	%xmm4,64(%esp)
	movdqa	%xmm5,%xmm2
	movdqa	%xmm6,%xmm3
	psrldq	$6,%xmm2
	psrldq	$6,%xmm3
	movdqa	%xmm5,%xmm4
	punpcklqdq	%xmm3,%xmm2
	punpckhqdq	%xmm6,%xmm4
	punpcklqdq	%xmm6,%xmm5
	movdqa	%xmm2,%xmm3
	psrlq	$4,%xmm2
	psrlq	$30,%xmm3
	movdqa	%xmm5,%xmm6
	psrlq	$40,%xmm4
	psrlq	$26,%xmm6
	pand	%xmm7,%xmm5
	pand	%xmm7,%xmm6
	pand	%xmm7,%xmm2
	pand	%xmm7,%xmm3
	por	(%ebx),%xmm4
	pshufd	$16,(%edx),%xmm7
	paddd	80(%esp),%xmm5
	paddd	96(%esp),%xmm6
	paddd	112(%esp),%xmm2
	paddd	128(%esp),%xmm3
	paddd	144(%esp),%xmm4
	movdqa	%xmm5,(%esp)
	pmuludq	%xmm7,%xmm5
	movdqa	%xmm6,16(%esp)
	pmuludq	%xmm7,%xmm6
	paddq	%xmm5,%xmm0
	movdqa	%xmm2,%xmm5
	pmuludq	%xmm7,%xmm2
	paddq	%xmm6,%xmm1
	movdqa	%xmm3,%xmm6
	pmuludq	%xmm7,%xmm3
	paddq	32(%esp),%xmm2
	movdqa	%xmm5,32(%esp)
	pshufd	$16,16(%edx),%xmm5
	paddq	48(%esp),%xmm3
	movdqa	%xmm6,48(%esp)
	movdqa	%xmm4,%xmm6
	pmuludq	%xmm7,%xmm4
	paddq	64(%esp),%xmm4
	movdqa	%xmm6,64(%esp)
	movdqa	%xmm5,%xmm6
	pmuludq	48(%esp),%xmm5
	movdqa	%xmm6,%xmm7
	pmuludq	32(%esp),%xmm6
	paddq	%xmm5,%xmm4
	movdqa	%xmm7,%xmm5
	pmuludq	16(%esp),%xmm7
	paddq	%xmm6,%xmm3
	pshufd	$16,80(%edx),%xmm6
	pmuludq	(%esp),%xmm5
	paddq	%xmm7,%xmm2
	pmuludq	64(%esp),%xmm6
	pshufd	$16,32(%edx),%xmm7
	paddq	%xmm5,%xmm1
	movdqa	%xmm7,%xmm5
	pmuludq	32(%esp),%xmm7
	paddq	%xmm6,%xmm0
	movdqa	%xmm5,%xmm6
	pmuludq	16(%esp),%xmm5
	paddq	%xmm7,%xmm4
	pshufd	$16,96(%edx),%xmm7
	pmuludq	(%esp),%xmm6
	paddq	%xmm5,%xmm3
	movdqa	%xmm7,%xmm5
	pmuludq	64(%esp),%xmm7
	paddq	%xmm6,%xmm2
	pmuludq	48(%esp),%xmm5
	pshufd	$16,48(%edx),%xmm6
	paddq	%xmm7,%xmm1
	movdqa	%xmm6,%xmm7
	pmuludq	16(%esp),%xmm6
	paddq	%xmm5,%xmm0
	pshufd	$16,112(%edx),%xmm5
	pmuludq	(%esp),%xmm7
	paddq	%xmm6,%xmm4
	movdqa	%xmm5,%xmm6
	pmuludq	64(%esp),%xmm5
	paddq	%xmm7,%xmm3
	movdqa	%xmm6,%xmm7
	pmuludq	48(%esp),%xmm6
	paddq	%xmm5,%xmm2
	pmuludq	32(%esp),%xmm7
	pshufd	$16,64(%edx),%xmm5
	paddq	%xmm6,%xmm1
	pshufd	$16,128(%edx),%xmm6
	pmuludq	(%esp),%xmm5
	paddq	%xmm7,%xmm0
	movdqa	%xmm6,%xmm7
	pmuludq	64(%esp),%xmm6
	paddq	%xmm5,%xmm4
	movdqa	%xmm7,%xmm5
	pmuludq	16(%esp),%xmm7
	paddq	%xmm6,%xmm3
	movdqa	%xmm5,%xmm6
	pmuludq	32(%esp),%xmm5
	paddq	%xmm7,%xmm0
	pmuludq	48(%esp),%xmm6
	movdqa	64(%ebx),%xmm7
	paddq	%xmm5,%xmm1
	paddq	%xmm6,%xmm2
.L017short_tail:
	pshufd	$78,%xmm4,%xmm6
	pshufd	$78,%xmm3,%xmm5
	paddq	%xmm6,%xmm4
	paddq	%xmm5,%xmm3
	pshufd	$78,%xmm0,%xmm6
	pshufd	$78,%xmm1,%xmm5
	paddq	%xmm6,%xmm0
	paddq	%xmm5,%xmm1
	pshufd	$78,%xmm2,%xmm6
	movdqa	%xmm3,%xmm5
	pand	%xmm7,%xmm3
	psrlq	$26,%xmm5
	paddq	%xmm6,%xmm2
	paddq	%xmm4,%xmm5
	movdqa	%xmm0,%xmm6
	pand	%xmm7,%xmm0
	psrlq	$26,%xmm6
	movdqa	%xmm5,%xmm4
	paddq	%xmm1,%xmm6
	psrlq	$26,%xmm5
	pand	%xmm7,%xmm4
	movdqa	%xmm6,%xmm1
	psrlq	$26,%xmm6
	paddd	%xmm5,%xmm0
	psllq	$2,%xmm5
	paddq	%xmm2,%xmm6
	paddq	%xmm0,%xmm5
	pand	%xmm7,%xmm1
	movdqa	%xmm6,%xmm2
	psrlq	$26,%xmm6
	pand	%xmm7,%xmm2
	paddd	%xmm3,%xmm6
	movdqa	%xmm5,%xmm0
	psrlq	$26,%xmm5
	movdqa	%xmm6,%xmm3
	psrlq	$26,%xmm6
	pand	%xmm7,%xmm0
	paddd	%xmm5,%xmm1
	pand	%xmm7,%xmm3
	paddd	%xmm6,%xmm4
.L013done:
	movd	%xmm0,-48(%edi)
	movd	%xmm1,-44(%edi)
	movd	%xmm2,-40(%edi)
	movd	%xmm3,-36(%edi)
	movd	%xmm4,-32(%edi)
	movl	%ebp,%esp
.L007nodata:
	popl	%edi
	popl	%esi
	popl	%ebx
	popl	%ebp
	ret
.size	_poly1305_blocks_sse2,.-_poly1305_blocks_sse2
.align	32
.type	_poly1305_emit_sse2,@function
.align	16
_poly1305_emit_sse2:
	#ifdef __CET__

.byte	243,15,30,251
	#endif

	pushl	%ebp
	pushl	%ebx
	pushl	%esi
	pushl	%edi
	movl	20(%esp),%ebp
	cmpl	$0,20(%ebp)
	je	.Lenter_emit
	movl	(%ebp),%eax
	movl	4(%ebp),%edi
	movl	8(%ebp),%ecx
	movl	12(%ebp),%edx
	movl	16(%ebp),%esi
	movl	%edi,%ebx
	shll	$26,%edi
	shrl	$6,%ebx
	addl	%edi,%eax
	movl	%ecx,%edi
	adcl	$0,%ebx
	shll	$20,%edi
	shrl	$12,%ecx
	addl	%edi,%ebx
	movl	%edx,%edi
	adcl	$0,%ecx
	shll	$14,%edi
	shrl	$18,%edx
	addl	%edi,%ecx
	movl	%esi,%edi
	adcl	$0,%edx
	shll	$8,%edi
	shrl	$24,%esi
	addl	%edi,%edx
	adcl	$0,%esi
	movl	%esi,%edi
	andl	$3,%esi
	shrl	$2,%edi
	leal	(%edi,%edi,4),%ebp
	movl	24(%esp),%edi
	addl	%ebp,%eax
	movl	28(%esp),%ebp
	adcl	$0,%ebx
	adcl	$0,%ecx
	adcl	$0,%edx
	adcl	$0,%esi
	movd	%eax,%xmm0
	addl	$5,%eax
	movd	%ebx,%xmm1
	adcl	$0,%ebx
	movd	%ecx,%xmm2
	adcl	$0,%ecx
	movd	%edx,%xmm3
	adcl	$0,%edx
	adcl	$0,%esi
	shrl	$2,%esi
	negl	%esi
	andl	%esi,%eax
	andl	%esi,%ebx
	andl	%esi,%ecx
	andl	%esi,%edx
	movl	%eax,(%edi)
	movd	%xmm0,%eax
	movl	%ebx,4(%edi)
	movd	%xmm1,%ebx
	movl	%ecx,8(%edi)
	movd	%xmm2,%ecx
	movl	%edx,12(%edi)
	movd	%xmm3,%edx
	notl	%esi
	andl	%esi,%eax
	andl	%esi,%ebx
	orl	(%edi),%eax
	andl	%esi,%ecx
	orl	4(%edi),%ebx
	andl	%esi,%edx
	orl	8(%edi),%ecx
	orl	12(%edi),%edx
	addl	(%ebp),%eax
	adcl	4(%ebp),%ebx
	movl	%eax,(%edi)
	adcl	8(%ebp),%ecx
	movl	%ebx,4(%edi)
	adcl	12(%ebp),%edx
	movl	%ecx,8(%edi)
	movl	%edx,12(%edi)
	popl	%edi
	popl	%esi
	popl	%ebx
	popl	%ebp
	ret
.size	_poly1305_emit_sse2,.-_poly1305_emit_sse2
.align	32
.type	_poly1305_init_avx2,@function
.align	16
_poly1305_init_avx2:
	#ifdef __CET__

.byte	243,15,30,251
	#endif

	vmovdqu	24(%edi),%xmm4
	leal	48(%edi),%edi
	movl	%esp,%ebp
	subl	$224,%esp
	andl	$-16,%esp
	vmovdqa	64(%ebx),%xmm7
	vpand	%xmm7,%xmm4,%xmm0
	vpsrlq	$26,%xmm4,%xmm1
	vpsrldq	$6,%xmm4,%xmm3
	vpand	%xmm7,%xmm1,%xmm1
	vpsrlq	$4,%xmm3,%xmm2
	vpsrlq	$30,%xmm3,%xmm3
	vpand	%xmm7,%xmm2,%xmm2
	vpand	%xmm7,%xmm3,%xmm3
	vpsrldq	$13,%xmm4,%xmm4
	leal	144(%esp),%edx
	movl	$2,%ecx
.L018square:
	vmovdqa	%xmm0,(%esp)
	vmovdqa	%xmm1,16(%esp)
	vmovdqa	%xmm2,32(%esp)
	vmovdqa	%xmm3,48(%esp)
	vmovdqa	%xmm4,64(%esp)
	vpslld	$2,%xmm1,%xmm6
	vpslld	$2,%xmm2,%xmm5
	vpaddd	%xmm1,%xmm6,%xmm6
	vpaddd	%xmm2,%xmm5,%xmm5
	vmovdqa	%xmm6,80(%esp)
	vmovdqa	%xmm5,96(%esp)
	vpslld	$2,%xmm3,%xmm6
	vpslld	$2,%xmm4,%xmm5
	vpaddd	%xmm3,%xmm6,%xmm6
	vpaddd	%xmm4,%xmm5,%xmm5
	vmovdqa	%xmm6,112(%esp)
	vmovdqa	%xmm5,128(%esp)
	vpshufd	$68,%xmm0,%xmm5
	vmovdqa	%xmm1,%xmm6
	vpshufd	$68,%xmm1,%xmm1
	vpshufd	$68,%xmm2,%xmm2
	vpshufd	$68,%xmm3,%xmm3
	vpshufd	$68,%xmm4,%xmm4
	vmovdqa	%xmm5,(%edx)
	vmovdqa	%xmm1,16(%edx)
	vmovdqa	%xmm2,32(%edx)
	vmovdqa	%xmm3,48(%edx)
	vmovdqa	%xmm4,64(%edx)
	vpmuludq	%xmm0,%xmm4,%xmm4
	vpmuludq	%xmm0,%xmm3,%xmm3
	vpmuludq	%xmm0,%xmm2,%xmm2
	vpmuludq	%xmm0,%xmm1,%xmm1
	vpmuludq	%xmm0,%xmm5,%xmm0
	vpmuludq	48(%edx),%xmm6,%xmm5
	vpaddq	%xmm5,%xmm4,%xmm4
	vpmuludq	32(%edx),%xmm6,%xmm7
	vpaddq	%xmm7,%xmm3,%xmm3
	vpmuludq	16(%edx),%xmm6,%xmm5
	vpaddq	%xmm5,%xmm2,%xmm2
	vmovdqa	80(%esp),%xmm7
	vpmuludq	(%edx),%xmm6,%xmm6
	vpaddq	%xmm6,%xmm1,%xmm1
	vmovdqa	32(%esp),%xmm5
	vpmuludq	64(%edx),%xmm7,%xmm7
	vpaddq	%xmm7,%xmm0,%xmm0
	vpmuludq	32(%edx),%xmm5,%xmm6
	vpaddq	%xmm6,%xmm4,%xmm4
	vpmuludq	16(%edx),%xmm5,%xmm7
	vpaddq	%xmm7,%xmm3,%xmm3
	vmovdqa	96(%esp),%xmm6
	vpmuludq	(%edx),%xmm5,%xmm5
	vpaddq	%xmm5,%xmm2,%xmm2
	vpmuludq	64(%edx),%xmm6,%xmm7
	vpaddq	%xmm7,%xmm1,%xmm1
	vmovdqa	48(%esp),%xmm5
	vpmuludq	48(%edx),%xmm6,%xmm6
	vpaddq	%xmm6,%xmm0,%xmm0
	vpmuludq	16(%edx),%xmm5,%xmm7
	vpaddq	%xmm7,%xmm4,%xmm4
	vmovdqa	112(%esp),%xmm6
	vpmuludq	(%edx),%xmm5,%xmm5
	vpaddq	%xmm5,%xmm3,%xmm3
	vpmuludq	64(%edx),%xmm6,%xmm7
	vpaddq	%xmm7,%xmm2,%xmm2
	vpmuludq	48(%edx),%xmm6,%xmm5
	vpaddq	%xmm5,%xmm1,%xmm1
	vmovdqa	64(%esp),%xmm7
	vpmuludq	32(%edx),%xmm6,%xmm6
	vpaddq	%xmm6,%xmm0,%xmm0
	vmovdqa	128(%esp),%xmm5
	vpmuludq	(%edx),%xmm7,%xmm7
	vpaddq	%xmm7,%xmm4,%xmm4
	vpmuludq	64(%edx),%xmm5,%xmm6
	vpaddq	%xmm6,%xmm3,%xmm3
	vpmuludq	16(%edx),%xmm5,%xmm7
	vpaddq	%xmm7,%xmm0,%xmm0
	vpmuludq	32(%edx),%xmm5,%xmm6
	vpaddq	%xmm6,%xmm1,%xmm1
	vmovdqa	64(%ebx),%xmm7
	vpmuludq	48(%edx),%xmm5,%xmm5
	vpaddq	%xmm5,%xmm2,%xmm2
	vpsrlq	$26,%xmm3,%xmm5
	vpand	%xmm7,%xmm3,%xmm3
	vpsrlq	$26,%xmm0,%xmm6
	vpand	%xmm7,%xmm0,%xmm0
	vpaddq	%xmm5,%xmm4,%xmm4
	vpaddq	%xmm6,%xmm1,%xmm1
	vpsrlq	$26,%xmm4,%xmm5
	vpand	%xmm7,%xmm4,%xmm4
	vpsrlq	$26,%xmm1,%xmm6
	vpand	%xmm7,%xmm1,%xmm1
	vpaddq	%xmm6,%xmm2,%xmm2
	vpaddd	%xmm5,%xmm0,%xmm0
	vpsllq	$2,%xmm5,%xmm5
	vpsrlq	$26,%xmm2,%xmm6
	vpand	%xmm7,%xmm2,%xmm2
	vpaddd	%xmm5,%xmm0,%xmm0
	vpaddd	%xmm6,%xmm3,%xmm3
	vpsrlq	$26,%xmm3,%xmm6
	vpsrlq	$26,%xmm0,%xmm5
	vpand	%xmm7,%xmm0,%xmm0
	vpand	%xmm7,%xmm3,%xmm3
	vpaddd	%xmm5,%xmm1,%xmm1
	vpaddd	%xmm6,%xmm4,%xmm4
	decl	%ecx
	jz	.L019square_break
	vpunpcklqdq	(%esp),%xmm0,%xmm0
	vpunpcklqdq	16(%esp),%xmm1,%xmm1
	vpunpcklqdq	32(%esp),%xmm2,%xmm2
	vpunpcklqdq	48(%esp),%xmm3,%xmm3
	vpunpcklqdq	64(%esp),%xmm4,%xmm4
	jmp	.L018square
.L019square_break:
	vpsllq	$32,%xmm0,%xmm0
	vpsllq	$32,%xmm1,%xmm1
	vpsllq	$32,%xmm2,%xmm2
	vpsllq	$32,%xmm3,%xmm3
	vpsllq	$32,%xmm4,%xmm4
	vpor	(%esp),%xmm0,%xmm0
	vpor	16(%esp),%xmm1,%xmm1
	vpor	32(%esp),%xmm2,%xmm2
	vpor	48(%esp),%xmm3,%xmm3
	vpor	64(%esp),%xmm4,%xmm4
	vpshufd	$141,%xmm0,%xmm0
	vpshufd	$141,%xmm1,%xmm1
	vpshufd	$141,%xmm2,%xmm2
	vpshufd	$141,%xmm3,%xmm3
	vpshufd	$141,%xmm4,%xmm4
	vmovdqu	%xmm0,(%edi)
	vmovdqu	%xmm1,16(%edi)
	vmovdqu	%xmm2,32(%edi)
	vmovdqu	%xmm3,48(%edi)
	vmovdqu	%xmm4,64(%edi)
	vpslld	$2,%xmm1,%xmm6
	vpslld	$2,%xmm2,%xmm5
	vpaddd	%xmm1,%xmm6,%xmm6
	vpaddd	%xmm2,%xmm5,%xmm5
	vmovdqu	%xmm6,80(%edi)
	vmovdqu	%xmm5,96(%edi)
	vpslld	$2,%xmm3,%xmm6
	vpslld	$2,%xmm4,%xmm5
	vpaddd	%xmm3,%xmm6,%xmm6
	vpaddd	%xmm4,%xmm5,%xmm5
	vmovdqu	%xmm6,112(%edi)
	vmovdqu	%xmm5,128(%edi)
	movl	%ebp,%esp
	leal	-48(%edi),%edi
	ret
.size	_poly1305_init_avx2,.-_poly1305_init_avx2
.align	32
.type	_poly1305_blocks_avx2,@function
.align	16
_poly1305_blocks_avx2:
	#ifdef __CET__

.byte	243,15,30,251
	#endif

	pushl	%ebp
	pushl	%ebx
	pushl	%esi
	pushl	%edi
	movl	20(%esp),%edi
	movl	24(%esp),%esi
	movl	28(%esp),%ecx
	movl	20(%edi),%eax
	andl	$-16,%ecx
	jz	.L020nodata
	cmpl	$64,%ecx
	jae	.L021enter_avx2
	testl	%eax,%eax
	jz	.Lenter_blocks
.L021enter_avx2:
	vzeroupper
	call	.L022pic_point
.L022pic_point:
	popl	%ebx
	leal	.Lconst_sse2-.L022pic_point(%ebx),%ebx
	testl	%eax,%eax
	jnz	.L023base2_26
	call	_poly1305_init_avx2
	movl	(%edi),%eax
	movl	3(%edi),%ecx
	movl	6(%edi),%edx
	movl	9(%edi),%esi
	movl	13(%edi),%ebp
	shrl	$2,%ecx
	andl	$67108863,%eax
	shrl	$4,%edx
	andl	$67108863,%ecx
	shrl	$6,%esi
	andl	$67108863,%edx
	movl	%eax,(%edi)
	movl	%ecx,4(%edi)
	movl	%edx,8(%edi)
	movl	%esi,12(%edi)
	movl	%ebp,16(%edi)
	movl	$1,20(%edi)
	movl	24(%esp),%esi
	movl	28(%esp),%ecx
.L023base2_26:
	movl	32(%esp),%eax
	movl	%esp,%ebp
	subl	$448,%esp
	andl	$-512,%esp
	vmovdqu	48(%edi),%xmm0
	leal	288(%esp),%edx
	vmovdqu	64(%edi),%xmm1
	vmovdqu	80(%edi),%xmm2
	vmovdqu	96(%edi),%xmm3
	vmovdqu	112(%edi),%xmm4
	leal	48(%edi),%edi
	vpermq	$64,%ymm0,%ymm0
	vpermq	$64,%ymm1,%ymm1
	vpermq	$64,%ymm2,%ymm2
	vpermq	$64,%ymm3,%ymm3
	vpermq	$64,%ymm4,%ymm4
	vpshufd	$200,%ymm0,%ymm0
	vpshufd	$200,%ymm1,%ymm1
	vpshufd	$200,%ymm2,%ymm2
	vpshufd	$200,%ymm3,%ymm3
	vpshufd	$200,%ymm4,%ymm4
	vmovdqa	%ymm0,-128(%edx)
	vmovdqu	80(%edi),%xmm0
	vmovdqa	%ymm1,-96(%edx)
	vmovdqu	96(%edi),%xmm1
	vmovdqa	%ymm2,-64(%edx)
	vmovdqu	112(%edi),%xmm2
	vmovdqa	%ymm3,-32(%edx)
	vmovdqu	128(%edi),%xmm3
	vmovdqa	%ymm4,(%edx)
	vpermq	$64,%ymm0,%ymm0
	vpermq	$64,%ymm1,%ymm1
	vpermq	$64,%ymm2,%ymm2
	vpermq	$64,%ymm3,%ymm3
	vpshufd	$200,%ymm0,%ymm0
	vpshufd	$200,%ymm1,%ymm1
	vpshufd	$200,%ymm2,%ymm2
	vpshufd	$200,%ymm3,%ymm3
	vmovdqa	%ymm0,32(%edx)
	vmovd	-48(%edi),%xmm0
	vmovdqa	%ymm1,64(%edx)
	vmovd	-44(%edi),%xmm1
	vmovdqa	%ymm2,96(%edx)
	vmovd	-40(%edi),%xmm2
	vmovdqa	%ymm3,128(%edx)
	vmovd	-36(%edi),%xmm3
	vmovd	-32(%edi),%xmm4
	vmovdqa	64(%ebx),%ymm7
	negl	%eax
	testl	$63,%ecx
	jz	.L024even
	movl	%ecx,%edx
	andl	$-64,%ecx
	andl	$63,%edx
	vmovdqu	(%esi),%xmm5
	cmpl	$32,%edx
	jb	.L025one
	vmovdqu	16(%esi),%xmm6
	je	.L026two
	vinserti128	$1,32(%esi),%ymm5,%ymm5
	leal	48(%esi),%esi
	leal	8(%ebx),%ebx
	leal	296(%esp),%edx
	jmp	.L027tail
.L026two:
	leal	32(%esi),%esi
	leal	16(%ebx),%ebx
	leal	304(%esp),%edx
	jmp	.L027tail
.L025one:
	leal	16(%esi),%esi
	vpxor	%ymm6,%ymm6,%ymm6
	leal	32(%ebx,%eax,8),%ebx
	leal	312(%esp),%edx
	jmp	.L027tail
.align	32
.L024even:
	vmovdqu	(%esi),%xmm5
	vmovdqu	16(%esi),%xmm6
	vinserti128	$1,32(%esi),%ymm5,%ymm5
	vinserti128	$1,48(%esi),%ymm6,%ymm6
	leal	64(%esi),%esi
	subl	$64,%ecx
	jz	.L027tail
.L028loop:
	vmovdqa	%ymm2,64(%esp)
	vpsrldq	$6,%ymm5,%ymm2
	vmovdqa	%ymm0,(%esp)
	vpsrldq	$6,%ymm6,%ymm0
	vmovdqa	%ymm1,32(%esp)
	vpunpckhqdq	%ymm6,%ymm5,%ymm1
	vpunpcklqdq	%ymm6,%ymm5,%ymm5
	vpunpcklqdq	%ymm0,%ymm2,%ymm2
	vpsrlq	$30,%ymm2,%ymm0
	vpsrlq	$4,%ymm2,%ymm2
	vpsrlq	$26,%ymm5,%ymm6
	vpsrlq	$40,%ymm1,%ymm1
	vpand	%ymm7,%ymm2,%ymm2
	vpand	%ymm7,%ymm5,%ymm5
	vpand	%ymm7,%ymm6,%ymm6
	vpand	%ymm7,%ymm0,%ymm0
	vpor	(%ebx),%ymm1,%ymm1
	vpaddq	64(%esp),%ymm2,%ymm2
	vpaddq	(%esp),%ymm5,%ymm5
	vpaddq	32(%esp),%ymm6,%ymm6
	vpaddq	%ymm3,%ymm0,%ymm0
	vpaddq	%ymm4,%ymm1,%ymm1
	vpmuludq	-96(%edx),%ymm2,%ymm3
	vmovdqa	%ymm6,32(%esp)
	vpmuludq	-64(%edx),%ymm2,%ymm4
	vmovdqa	%ymm0,96(%esp)
	vpmuludq	96(%edx),%ymm2,%ymm0
	vmovdqa	%ymm1,128(%esp)
	vpmuludq	128(%edx),%ymm2,%ymm1
	vpmuludq	-128(%edx),%ymm2,%ymm2
	vpmuludq	-32(%edx),%ymm5,%ymm7
	vpaddq	%ymm7,%ymm3,%ymm3
	vpmuludq	(%edx),%ymm5,%ymm6
	vpaddq	%ymm6,%ymm4,%ymm4
	vpmuludq	-128(%edx),%ymm5,%ymm7
	vpaddq	%ymm7,%ymm0,%ymm0
	vmovdqa	32(%esp),%ymm7
	vpmuludq	-96(%edx),%ymm5,%ymm6
	vpaddq	%ymm6,%ymm1,%ymm1
	vpmuludq	-64(%edx),%ymm5,%ymm5
	vpaddq	%ymm5,%ymm2,%ymm2
	vpmuludq	-64(%edx),%ymm7,%ymm6
	vpaddq	%ymm6,%ymm3,%ymm3
	vpmuludq	-32(%edx),%ymm7,%ymm5
	vpaddq	%ymm5,%ymm4,%ymm4
	vpmuludq	128(%edx),%ymm7,%ymm6
	vpaddq	%ymm6,%ymm0,%ymm0
	vmovdqa	96(%esp),%ymm6
	vpmuludq	-128(%edx),%ymm7,%ymm5
	vpaddq	%ymm5,%ymm1,%ymm1
	vpmuludq	-96(%edx),%ymm7,%ymm7
	vpaddq	%ymm7,%ymm2,%ymm2
	vpmuludq	-128(%edx),%ymm6,%ymm5
	vpaddq	%ymm5,%ymm3,%ymm3
	vpmuludq	-96(%edx),%ymm6,%ymm7
	vpaddq	%ymm7,%ymm4,%ymm4
	vpmuludq	64(%edx),%ymm6,%ymm5
	vpaddq	%ymm5,%ymm0,%ymm0
	vmovdqa	128(%esp),%ymm5
	vpmuludq	96(%edx),%ymm6,%ymm7
	vpaddq	%ymm7,%ymm1,%ymm1
	vpmuludq	128(%edx),%ymm6,%ymm6
	vpaddq	%ymm6,%ymm2,%ymm2
	vpmuludq	128(%edx),%ymm5,%ymm7
	vpaddq	%ymm7,%ymm3,%ymm3
	vpmuludq	32(%edx),%ymm5,%ymm6
	vpaddq	%ymm6,%ymm0,%ymm0
	vpmuludq	-128(%edx),%ymm5,%ymm7
	vpaddq	%ymm7,%ymm4,%ymm4
	vmovdqa	64(%ebx),%ymm7
	vpmuludq	64(%edx),%ymm5,%ymm6
	vpaddq	%ymm6,%ymm1,%ymm1
	vpmuludq	96(%edx),%ymm5,%ymm5
	vpaddq	%ymm5,%ymm2,%ymm2
	vpsrlq	$26,%ymm3,%ymm5
	vpand	%ymm7,%ymm3,%ymm3
	vpsrlq	$26,%ymm0,%ymm6
	vpand	%ymm7,%ymm0,%ymm0
	vpaddq	%ymm5,%ymm4,%ymm4
	vpaddq	%ymm6,%ymm1,%ymm1
	vpsrlq	$26,%ymm4,%ymm5
	vpand	%ymm7,%ymm4,%ymm4
	vpsrlq	$26,%ymm1,%ymm6
	vpand	%ymm7,%ymm1,%ymm1
	vpaddq	%ymm6,%ymm2,%ymm2
	vpaddq	%ymm5,%ymm0,%ymm0
	vpsllq	$2,%ymm5,%ymm5
	vpsrlq	$26,%ymm2,%ymm6
	vpand	%ymm7,%ymm2,%ymm2
	vpaddq	%ymm5,%ymm0,%ymm0
	vpaddq	%ymm6,%ymm3,%ymm3
	vpsrlq	$26,%ymm3,%ymm6
	vpsrlq	$26,%ymm0,%ymm5
	vpand	%ymm7,%ymm0,%ymm0
	vpand	%ymm7,%ymm3,%ymm3
	vpaddq	%ymm5,%ymm1,%ymm1
	vpaddq	%ymm6,%ymm4,%ymm4
	vmovdqu	(%esi),%xmm5
	vmovdqu	16(%esi),%xmm6
	vinserti128	$1,32(%esi),%ymm5,%ymm5
	vinserti128	$1,48(%esi),%ymm6,%ymm6
	leal	64(%esi),%esi
	subl	$64,%ecx
	jnz	.L028loop
.L027tail:
	vmovdqa	%ymm2,64(%esp)
	vpsrldq	$6,%ymm5,%ymm2
	vmovdqa	%ymm0,(%esp)
	vpsrldq	$6,%ymm6,%ymm0
	vmovdqa	%ymm1,32(%esp)
	vpunpckhqdq	%ymm6,%ymm5,%ymm1
	vpunpcklqdq	%ymm6,%ymm5,%ymm5
	vpunpcklqdq	%ymm0,%ymm2,%ymm2
	vpsrlq	$30,%ymm2,%ymm0
	vpsrlq	$4,%ymm2,%ymm2
	vpsrlq	$26,%ymm5,%ymm6
	vpsrlq	$40,%ymm1,%ymm1
	vpand	%ymm7,%ymm2,%ymm2
	vpand	%ymm7,%ymm5,%ymm5
	vpand	%ymm7,%ymm6,%ymm6
	vpand	%ymm7,%ymm0,%ymm0
	vpor	(%ebx),%ymm1,%ymm1
	andl	$-64,%ebx
	vpaddq	64(%esp),%ymm2,%ymm2
	vpaddq	(%esp),%ymm5,%ymm5
	vpaddq	32(%esp),%ymm6,%ymm6
	vpaddq	%ymm3,%ymm0,%ymm0
	vpaddq	%ymm4,%ymm1,%ymm1
	vpmuludq	-92(%edx),%ymm2,%ymm3
	vmovdqa	%ymm6,32(%esp)
	vpmuludq	-60(%edx),%ymm2,%ymm4
	vmovdqa	%ymm0,96(%esp)
	vpmuludq	100(%edx),%ymm2,%ymm0
	vmovdqa	%ymm1,128(%esp)
	vpmuludq	132(%edx),%ymm2,%ymm1
	vpmuludq	-124(%edx),%ymm2,%ymm2
	vpmuludq	-28(%edx),%ymm5,%ymm7
	vpaddq	%ymm7,%ymm3,%ymm3
	vpmuludq	4(%edx),%ymm5,%ymm6
	vpaddq	%ymm6,%ymm4,%ymm4
	vpmuludq	-124(%edx),%ymm5,%ymm7
	vpaddq	%ymm7,%ymm0,%ymm0
	vmovdqa	32(%esp),%ymm7
	vpmuludq	-92(%edx),%ymm5,%ymm6
	vpaddq	%ymm6,%ymm1,%ymm1
	vpmuludq	-60(%edx),%ymm5,%ymm5
	vpaddq	%ymm5,%ymm2,%ymm2
	vpmuludq	-60(%edx),%ymm7,%ymm6
	vpaddq	%ymm6,%ymm3,%ymm3
	vpmuludq	-28(%edx),%ymm7,%ymm5
	vpaddq	%ymm5,%ymm4,%ymm4
	vpmuludq	132(%edx),%ymm7,%ymm6
	vpaddq	%ymm6,%ymm0,%ymm0
	vmovdqa	96(%esp),%ymm6
	vpmuludq	-124(%edx),%ymm7,%ymm5
	vpaddq	%ymm5,%ymm1,%ymm1
	vpmuludq	-92(%edx),%ymm7,%ymm7
	vpaddq	%ymm7,%ymm2,%ymm2
	vpmuludq	-124(%edx),%ymm6,%ymm5
	vpaddq	%ymm5,%ymm3,%ymm3
	vpmuludq	-92(%edx),%ymm6,%ymm7
	vpaddq	%ymm7,%ymm4,%ymm4
	vpmuludq	68(%edx),%ymm6,%ymm5
	vpaddq	%ymm5,%ymm0,%ymm0
	vmovdqa	128(%esp),%ymm5
	vpmuludq	100(%edx),%ymm6,%ymm7
	vpaddq	%ymm7,%ymm1,%ymm1
	vpmuludq	132(%edx),%ymm6,%ymm6
	vpaddq	%ymm6,%ymm2,%ymm2
	vpmuludq	132(%edx),%ymm5,%ymm7
	vpaddq	%ymm7,%ymm3,%ymm3
	vpmuludq	36(%edx),%ymm5,%ymm6
	vpaddq	%ymm6,%ymm0,%ymm0
	vpmuludq	-124(%edx),%ymm5,%ymm7
	vpaddq	%ymm7,%ymm4,%ymm4
	vmovdqa	64(%ebx),%ymm7
	vpmuludq	68(%edx),%ymm5,%ymm6
	vpaddq	%ymm6,%ymm1,%ymm1
	vpmuludq	100(%edx),%ymm5,%ymm5
	vpaddq	%ymm5,%ymm2,%ymm2
	vpsrldq	$8,%ymm4,%ymm5
	vpsrldq	$8,%ymm3,%ymm6
	vpaddq	%ymm5,%ymm4,%ymm4
	vpsrldq	$8,%ymm0,%ymm5
	vpaddq	%ymm6,%ymm3,%ymm3
	vpsrldq	$8,%ymm1,%ymm6
	vpaddq	%ymm5,%ymm0,%ymm0
	vpsrldq	$8,%ymm2,%ymm5
	vpaddq	%ymm6,%ymm1,%ymm1
	vpermq	$2,%ymm4,%ymm6
	vpaddq	%ymm5,%ymm2,%ymm2
	vpermq	$2,%ymm3,%ymm5
	vpaddq	%ymm6,%ymm4,%ymm4
	vpermq	$2,%ymm0,%ymm6
	vpaddq	%ymm5,%ymm3,%ymm3
	vpermq	$2,%ymm1,%ymm5
	vpaddq	%ymm6,%ymm0,%ymm0
	vpermq	$2,%ymm2,%ymm6
	vpaddq	%ymm5,%ymm1,%ymm1
	vpaddq	%ymm6,%ymm2,%ymm2
	vpsrlq	$26,%ymm3,%ymm5
	vpand	%ymm7,%ymm3,%ymm3
	vpsrlq	$26,%ymm0,%ymm6
	vpand	%ymm7,%ymm0,%ymm0
	vpaddq	%ymm5,%ymm4,%ymm4
	vpaddq	%ymm6,%ymm1,%ymm1
	vpsrlq	$26,%ymm4,%ymm5
	vpand	%ymm7,%ymm4,%ymm4
	vpsrlq	$26,%ymm1,%ymm6
	vpand	%ymm7,%ymm1,%ymm1
	vpaddq	%ymm6,%ymm2,%ymm2
	vpaddq	%ymm5,%ymm0,%ymm0
	vpsllq	$2,%ymm5,%ymm5
	vpsrlq	$26,%ymm2,%ymm6
	vpand	%ymm7,%ymm2,%ymm2
	vpaddq	%ymm5,%ymm0,%ymm0
	vpaddq	%ymm6,%ymm3,%ymm3
	vpsrlq	$26,%ymm3,%ymm6
	vpsrlq	$26,%ymm0,%ymm5
	vpand	%ymm7,%ymm0,%ymm0
	vpand	%ymm7,%ymm3,%ymm3
	vpaddq	%ymm5,%ymm1,%ymm1
	vpaddq	%ymm6,%ymm4,%ymm4
	cmpl	$0,%ecx
	je	.L029done
	vpshufd	$252,%xmm0,%xmm0
	leal	288(%esp),%edx
	vpshufd	$252,%xmm1,%xmm1
	vpshufd	$252,%xmm2,%xmm2
	vpshufd	$252,%xmm3,%xmm3
	vpshufd	$252,%xmm4,%xmm4
	jmp	.L024even
.align	16
.L029done:
	vmovd	%xmm0,-48(%edi)
	vmovd	%xmm1,-44(%edi)
	vmovd	%xmm2,-40(%edi)
	vmovd	%xmm3,-36(%edi)
	vmovd	%xmm4,-32(%edi)
	vzeroupper
	movl	%ebp,%esp
.L020nodata:
	popl	%edi
	popl	%esi
	popl	%ebx
	popl	%ebp
	ret
.size	_poly1305_blocks_avx2,.-_poly1305_blocks_avx2
.align	64
.Lconst_sse2:
.long	16777216,0,16777216,0,16777216,0,16777216,0
.long	0,0,0,0,0,0,0,0
.long	67108863,0,67108863,0,67108863,0,67108863,0
.long	268435455,268435452,268435452,268435452
.byte	80,111,108,121,49,51,48,53,32,102,111,114,32,120,56,54
.byte	44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
.byte	60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
.byte	114,103,62,0
.align	4
.comm	OPENSSL_ia32cap_P,16,4

	.section ".note.gnu.property", "a"
	.p2align 2
	.long 1f - 0f
	.long 4f - 1f
	.long 5
0:
	.asciz "GNU"
1:
	.p2align 2
	.long 0xc0000002
	.long 3f - 2f
2:
	.long 3
3:
	.p2align 2
4:
#endif