xref: /freebsd/sys/crypto/openssl/amd64/aesni-gcm-x86_64.S (revision c0855eaa3ee9614804b6bd6a255aa9f71e095f43)
1bc3d5698SJohn Baldwin/* Do not modify. This file is auto-generated from aesni-gcm-x86_64.pl. */
2bc3d5698SJohn Baldwin.text
3bc3d5698SJohn Baldwin
4bc3d5698SJohn Baldwin.type	_aesni_ctr32_ghash_6x,@function
5bc3d5698SJohn Baldwin.align	32
6bc3d5698SJohn Baldwin_aesni_ctr32_ghash_6x:
7bc3d5698SJohn Baldwin.cfi_startproc
8bc3d5698SJohn Baldwin	vmovdqu	32(%r11),%xmm2
9bc3d5698SJohn Baldwin	subq	$6,%rdx
10bc3d5698SJohn Baldwin	vpxor	%xmm4,%xmm4,%xmm4
11bc3d5698SJohn Baldwin	vmovdqu	0-128(%rcx),%xmm15
12bc3d5698SJohn Baldwin	vpaddb	%xmm2,%xmm1,%xmm10
13bc3d5698SJohn Baldwin	vpaddb	%xmm2,%xmm10,%xmm11
14bc3d5698SJohn Baldwin	vpaddb	%xmm2,%xmm11,%xmm12
15bc3d5698SJohn Baldwin	vpaddb	%xmm2,%xmm12,%xmm13
16bc3d5698SJohn Baldwin	vpaddb	%xmm2,%xmm13,%xmm14
17bc3d5698SJohn Baldwin	vpxor	%xmm15,%xmm1,%xmm9
18bc3d5698SJohn Baldwin	vmovdqu	%xmm4,16+8(%rsp)
19bc3d5698SJohn Baldwin	jmp	.Loop6x
20bc3d5698SJohn Baldwin
21bc3d5698SJohn Baldwin.align	32
22bc3d5698SJohn Baldwin.Loop6x:
23bc3d5698SJohn Baldwin	addl	$100663296,%ebx
24bc3d5698SJohn Baldwin	jc	.Lhandle_ctr32
25bc3d5698SJohn Baldwin	vmovdqu	0-32(%r9),%xmm3
26bc3d5698SJohn Baldwin	vpaddb	%xmm2,%xmm14,%xmm1
27bc3d5698SJohn Baldwin	vpxor	%xmm15,%xmm10,%xmm10
28bc3d5698SJohn Baldwin	vpxor	%xmm15,%xmm11,%xmm11
29bc3d5698SJohn Baldwin
30bc3d5698SJohn Baldwin.Lresume_ctr32:
31bc3d5698SJohn Baldwin	vmovdqu	%xmm1,(%r8)
32bc3d5698SJohn Baldwin	vpclmulqdq	$0x10,%xmm3,%xmm7,%xmm5
33bc3d5698SJohn Baldwin	vpxor	%xmm15,%xmm12,%xmm12
34bc3d5698SJohn Baldwin	vmovups	16-128(%rcx),%xmm2
35bc3d5698SJohn Baldwin	vpclmulqdq	$0x01,%xmm3,%xmm7,%xmm6
36bc3d5698SJohn Baldwin	xorq	%r12,%r12
37bc3d5698SJohn Baldwin	cmpq	%r14,%r15
38bc3d5698SJohn Baldwin
39bc3d5698SJohn Baldwin	vaesenc	%xmm2,%xmm9,%xmm9
40bc3d5698SJohn Baldwin	vmovdqu	48+8(%rsp),%xmm0
41bc3d5698SJohn Baldwin	vpxor	%xmm15,%xmm13,%xmm13
42bc3d5698SJohn Baldwin	vpclmulqdq	$0x00,%xmm3,%xmm7,%xmm1
43bc3d5698SJohn Baldwin	vaesenc	%xmm2,%xmm10,%xmm10
44bc3d5698SJohn Baldwin	vpxor	%xmm15,%xmm14,%xmm14
45bc3d5698SJohn Baldwin	setnc	%r12b
46bc3d5698SJohn Baldwin	vpclmulqdq	$0x11,%xmm3,%xmm7,%xmm7
47bc3d5698SJohn Baldwin	vaesenc	%xmm2,%xmm11,%xmm11
48bc3d5698SJohn Baldwin	vmovdqu	16-32(%r9),%xmm3
49bc3d5698SJohn Baldwin	negq	%r12
50bc3d5698SJohn Baldwin	vaesenc	%xmm2,%xmm12,%xmm12
51bc3d5698SJohn Baldwin	vpxor	%xmm5,%xmm6,%xmm6
52bc3d5698SJohn Baldwin	vpclmulqdq	$0x00,%xmm3,%xmm0,%xmm5
53bc3d5698SJohn Baldwin	vpxor	%xmm4,%xmm8,%xmm8
54bc3d5698SJohn Baldwin	vaesenc	%xmm2,%xmm13,%xmm13
55bc3d5698SJohn Baldwin	vpxor	%xmm5,%xmm1,%xmm4
56bc3d5698SJohn Baldwin	andq	$0x60,%r12
57bc3d5698SJohn Baldwin	vmovups	32-128(%rcx),%xmm15
58bc3d5698SJohn Baldwin	vpclmulqdq	$0x10,%xmm3,%xmm0,%xmm1
59bc3d5698SJohn Baldwin	vaesenc	%xmm2,%xmm14,%xmm14
60bc3d5698SJohn Baldwin
61bc3d5698SJohn Baldwin	vpclmulqdq	$0x01,%xmm3,%xmm0,%xmm2
62bc3d5698SJohn Baldwin	leaq	(%r14,%r12,1),%r14
63bc3d5698SJohn Baldwin	vaesenc	%xmm15,%xmm9,%xmm9
64bc3d5698SJohn Baldwin	vpxor	16+8(%rsp),%xmm8,%xmm8
65bc3d5698SJohn Baldwin	vpclmulqdq	$0x11,%xmm3,%xmm0,%xmm3
66bc3d5698SJohn Baldwin	vmovdqu	64+8(%rsp),%xmm0
67bc3d5698SJohn Baldwin	vaesenc	%xmm15,%xmm10,%xmm10
68bc3d5698SJohn Baldwin	movbeq	88(%r14),%r13
69bc3d5698SJohn Baldwin	vaesenc	%xmm15,%xmm11,%xmm11
70bc3d5698SJohn Baldwin	movbeq	80(%r14),%r12
71bc3d5698SJohn Baldwin	vaesenc	%xmm15,%xmm12,%xmm12
72bc3d5698SJohn Baldwin	movq	%r13,32+8(%rsp)
73bc3d5698SJohn Baldwin	vaesenc	%xmm15,%xmm13,%xmm13
74bc3d5698SJohn Baldwin	movq	%r12,40+8(%rsp)
75bc3d5698SJohn Baldwin	vmovdqu	48-32(%r9),%xmm5
76bc3d5698SJohn Baldwin	vaesenc	%xmm15,%xmm14,%xmm14
77bc3d5698SJohn Baldwin
78bc3d5698SJohn Baldwin	vmovups	48-128(%rcx),%xmm15
79bc3d5698SJohn Baldwin	vpxor	%xmm1,%xmm6,%xmm6
80bc3d5698SJohn Baldwin	vpclmulqdq	$0x00,%xmm5,%xmm0,%xmm1
81bc3d5698SJohn Baldwin	vaesenc	%xmm15,%xmm9,%xmm9
82bc3d5698SJohn Baldwin	vpxor	%xmm2,%xmm6,%xmm6
83bc3d5698SJohn Baldwin	vpclmulqdq	$0x10,%xmm5,%xmm0,%xmm2
84bc3d5698SJohn Baldwin	vaesenc	%xmm15,%xmm10,%xmm10
85bc3d5698SJohn Baldwin	vpxor	%xmm3,%xmm7,%xmm7
86bc3d5698SJohn Baldwin	vpclmulqdq	$0x01,%xmm5,%xmm0,%xmm3
87bc3d5698SJohn Baldwin	vaesenc	%xmm15,%xmm11,%xmm11
88bc3d5698SJohn Baldwin	vpclmulqdq	$0x11,%xmm5,%xmm0,%xmm5
89bc3d5698SJohn Baldwin	vmovdqu	80+8(%rsp),%xmm0
90bc3d5698SJohn Baldwin	vaesenc	%xmm15,%xmm12,%xmm12
91bc3d5698SJohn Baldwin	vaesenc	%xmm15,%xmm13,%xmm13
92bc3d5698SJohn Baldwin	vpxor	%xmm1,%xmm4,%xmm4
93bc3d5698SJohn Baldwin	vmovdqu	64-32(%r9),%xmm1
94bc3d5698SJohn Baldwin	vaesenc	%xmm15,%xmm14,%xmm14
95bc3d5698SJohn Baldwin
96bc3d5698SJohn Baldwin	vmovups	64-128(%rcx),%xmm15
97bc3d5698SJohn Baldwin	vpxor	%xmm2,%xmm6,%xmm6
98bc3d5698SJohn Baldwin	vpclmulqdq	$0x00,%xmm1,%xmm0,%xmm2
99bc3d5698SJohn Baldwin	vaesenc	%xmm15,%xmm9,%xmm9
100bc3d5698SJohn Baldwin	vpxor	%xmm3,%xmm6,%xmm6
101bc3d5698SJohn Baldwin	vpclmulqdq	$0x10,%xmm1,%xmm0,%xmm3
102bc3d5698SJohn Baldwin	vaesenc	%xmm15,%xmm10,%xmm10
103bc3d5698SJohn Baldwin	movbeq	72(%r14),%r13
104bc3d5698SJohn Baldwin	vpxor	%xmm5,%xmm7,%xmm7
105bc3d5698SJohn Baldwin	vpclmulqdq	$0x01,%xmm1,%xmm0,%xmm5
106bc3d5698SJohn Baldwin	vaesenc	%xmm15,%xmm11,%xmm11
107bc3d5698SJohn Baldwin	movbeq	64(%r14),%r12
108bc3d5698SJohn Baldwin	vpclmulqdq	$0x11,%xmm1,%xmm0,%xmm1
109bc3d5698SJohn Baldwin	vmovdqu	96+8(%rsp),%xmm0
110bc3d5698SJohn Baldwin	vaesenc	%xmm15,%xmm12,%xmm12
111bc3d5698SJohn Baldwin	movq	%r13,48+8(%rsp)
112bc3d5698SJohn Baldwin	vaesenc	%xmm15,%xmm13,%xmm13
113bc3d5698SJohn Baldwin	movq	%r12,56+8(%rsp)
114bc3d5698SJohn Baldwin	vpxor	%xmm2,%xmm4,%xmm4
115bc3d5698SJohn Baldwin	vmovdqu	96-32(%r9),%xmm2
116bc3d5698SJohn Baldwin	vaesenc	%xmm15,%xmm14,%xmm14
117bc3d5698SJohn Baldwin
118bc3d5698SJohn Baldwin	vmovups	80-128(%rcx),%xmm15
119bc3d5698SJohn Baldwin	vpxor	%xmm3,%xmm6,%xmm6
120bc3d5698SJohn Baldwin	vpclmulqdq	$0x00,%xmm2,%xmm0,%xmm3
121bc3d5698SJohn Baldwin	vaesenc	%xmm15,%xmm9,%xmm9
122bc3d5698SJohn Baldwin	vpxor	%xmm5,%xmm6,%xmm6
123bc3d5698SJohn Baldwin	vpclmulqdq	$0x10,%xmm2,%xmm0,%xmm5
124bc3d5698SJohn Baldwin	vaesenc	%xmm15,%xmm10,%xmm10
125bc3d5698SJohn Baldwin	movbeq	56(%r14),%r13
126bc3d5698SJohn Baldwin	vpxor	%xmm1,%xmm7,%xmm7
127bc3d5698SJohn Baldwin	vpclmulqdq	$0x01,%xmm2,%xmm0,%xmm1
128bc3d5698SJohn Baldwin	vpxor	112+8(%rsp),%xmm8,%xmm8
129bc3d5698SJohn Baldwin	vaesenc	%xmm15,%xmm11,%xmm11
130bc3d5698SJohn Baldwin	movbeq	48(%r14),%r12
131bc3d5698SJohn Baldwin	vpclmulqdq	$0x11,%xmm2,%xmm0,%xmm2
132bc3d5698SJohn Baldwin	vaesenc	%xmm15,%xmm12,%xmm12
133bc3d5698SJohn Baldwin	movq	%r13,64+8(%rsp)
134bc3d5698SJohn Baldwin	vaesenc	%xmm15,%xmm13,%xmm13
135bc3d5698SJohn Baldwin	movq	%r12,72+8(%rsp)
136bc3d5698SJohn Baldwin	vpxor	%xmm3,%xmm4,%xmm4
137bc3d5698SJohn Baldwin	vmovdqu	112-32(%r9),%xmm3
138bc3d5698SJohn Baldwin	vaesenc	%xmm15,%xmm14,%xmm14
139bc3d5698SJohn Baldwin
140bc3d5698SJohn Baldwin	vmovups	96-128(%rcx),%xmm15
141bc3d5698SJohn Baldwin	vpxor	%xmm5,%xmm6,%xmm6
142bc3d5698SJohn Baldwin	vpclmulqdq	$0x10,%xmm3,%xmm8,%xmm5
143bc3d5698SJohn Baldwin	vaesenc	%xmm15,%xmm9,%xmm9
144bc3d5698SJohn Baldwin	vpxor	%xmm1,%xmm6,%xmm6
145bc3d5698SJohn Baldwin	vpclmulqdq	$0x01,%xmm3,%xmm8,%xmm1
146bc3d5698SJohn Baldwin	vaesenc	%xmm15,%xmm10,%xmm10
147bc3d5698SJohn Baldwin	movbeq	40(%r14),%r13
148bc3d5698SJohn Baldwin	vpxor	%xmm2,%xmm7,%xmm7
149bc3d5698SJohn Baldwin	vpclmulqdq	$0x00,%xmm3,%xmm8,%xmm2
150bc3d5698SJohn Baldwin	vaesenc	%xmm15,%xmm11,%xmm11
151bc3d5698SJohn Baldwin	movbeq	32(%r14),%r12
152bc3d5698SJohn Baldwin	vpclmulqdq	$0x11,%xmm3,%xmm8,%xmm8
153bc3d5698SJohn Baldwin	vaesenc	%xmm15,%xmm12,%xmm12
154bc3d5698SJohn Baldwin	movq	%r13,80+8(%rsp)
155bc3d5698SJohn Baldwin	vaesenc	%xmm15,%xmm13,%xmm13
156bc3d5698SJohn Baldwin	movq	%r12,88+8(%rsp)
157bc3d5698SJohn Baldwin	vpxor	%xmm5,%xmm6,%xmm6
158bc3d5698SJohn Baldwin	vaesenc	%xmm15,%xmm14,%xmm14
159bc3d5698SJohn Baldwin	vpxor	%xmm1,%xmm6,%xmm6
160bc3d5698SJohn Baldwin
161bc3d5698SJohn Baldwin	vmovups	112-128(%rcx),%xmm15
162bc3d5698SJohn Baldwin	vpslldq	$8,%xmm6,%xmm5
163bc3d5698SJohn Baldwin	vpxor	%xmm2,%xmm4,%xmm4
164bc3d5698SJohn Baldwin	vmovdqu	16(%r11),%xmm3
165bc3d5698SJohn Baldwin
166bc3d5698SJohn Baldwin	vaesenc	%xmm15,%xmm9,%xmm9
167bc3d5698SJohn Baldwin	vpxor	%xmm8,%xmm7,%xmm7
168bc3d5698SJohn Baldwin	vaesenc	%xmm15,%xmm10,%xmm10
169bc3d5698SJohn Baldwin	vpxor	%xmm5,%xmm4,%xmm4
170bc3d5698SJohn Baldwin	movbeq	24(%r14),%r13
171bc3d5698SJohn Baldwin	vaesenc	%xmm15,%xmm11,%xmm11
172bc3d5698SJohn Baldwin	movbeq	16(%r14),%r12
173bc3d5698SJohn Baldwin	vpalignr	$8,%xmm4,%xmm4,%xmm0
174bc3d5698SJohn Baldwin	vpclmulqdq	$0x10,%xmm3,%xmm4,%xmm4
175bc3d5698SJohn Baldwin	movq	%r13,96+8(%rsp)
176bc3d5698SJohn Baldwin	vaesenc	%xmm15,%xmm12,%xmm12
177bc3d5698SJohn Baldwin	movq	%r12,104+8(%rsp)
178bc3d5698SJohn Baldwin	vaesenc	%xmm15,%xmm13,%xmm13
179bc3d5698SJohn Baldwin	vmovups	128-128(%rcx),%xmm1
180bc3d5698SJohn Baldwin	vaesenc	%xmm15,%xmm14,%xmm14
181bc3d5698SJohn Baldwin
182bc3d5698SJohn Baldwin	vaesenc	%xmm1,%xmm9,%xmm9
183bc3d5698SJohn Baldwin	vmovups	144-128(%rcx),%xmm15
184bc3d5698SJohn Baldwin	vaesenc	%xmm1,%xmm10,%xmm10
185bc3d5698SJohn Baldwin	vpsrldq	$8,%xmm6,%xmm6
186bc3d5698SJohn Baldwin	vaesenc	%xmm1,%xmm11,%xmm11
187bc3d5698SJohn Baldwin	vpxor	%xmm6,%xmm7,%xmm7
188bc3d5698SJohn Baldwin	vaesenc	%xmm1,%xmm12,%xmm12
189bc3d5698SJohn Baldwin	vpxor	%xmm0,%xmm4,%xmm4
190bc3d5698SJohn Baldwin	movbeq	8(%r14),%r13
191bc3d5698SJohn Baldwin	vaesenc	%xmm1,%xmm13,%xmm13
192bc3d5698SJohn Baldwin	movbeq	0(%r14),%r12
193bc3d5698SJohn Baldwin	vaesenc	%xmm1,%xmm14,%xmm14
194bc3d5698SJohn Baldwin	vmovups	160-128(%rcx),%xmm1
195bc3d5698SJohn Baldwin	cmpl	$11,%ebp
196bc3d5698SJohn Baldwin	jb	.Lenc_tail
197bc3d5698SJohn Baldwin
198bc3d5698SJohn Baldwin	vaesenc	%xmm15,%xmm9,%xmm9
199bc3d5698SJohn Baldwin	vaesenc	%xmm15,%xmm10,%xmm10
200bc3d5698SJohn Baldwin	vaesenc	%xmm15,%xmm11,%xmm11
201bc3d5698SJohn Baldwin	vaesenc	%xmm15,%xmm12,%xmm12
202bc3d5698SJohn Baldwin	vaesenc	%xmm15,%xmm13,%xmm13
203bc3d5698SJohn Baldwin	vaesenc	%xmm15,%xmm14,%xmm14
204bc3d5698SJohn Baldwin
205bc3d5698SJohn Baldwin	vaesenc	%xmm1,%xmm9,%xmm9
206bc3d5698SJohn Baldwin	vaesenc	%xmm1,%xmm10,%xmm10
207bc3d5698SJohn Baldwin	vaesenc	%xmm1,%xmm11,%xmm11
208bc3d5698SJohn Baldwin	vaesenc	%xmm1,%xmm12,%xmm12
209bc3d5698SJohn Baldwin	vaesenc	%xmm1,%xmm13,%xmm13
210bc3d5698SJohn Baldwin	vmovups	176-128(%rcx),%xmm15
211bc3d5698SJohn Baldwin	vaesenc	%xmm1,%xmm14,%xmm14
212bc3d5698SJohn Baldwin	vmovups	192-128(%rcx),%xmm1
213bc3d5698SJohn Baldwin	je	.Lenc_tail
214bc3d5698SJohn Baldwin
215bc3d5698SJohn Baldwin	vaesenc	%xmm15,%xmm9,%xmm9
216bc3d5698SJohn Baldwin	vaesenc	%xmm15,%xmm10,%xmm10
217bc3d5698SJohn Baldwin	vaesenc	%xmm15,%xmm11,%xmm11
218bc3d5698SJohn Baldwin	vaesenc	%xmm15,%xmm12,%xmm12
219bc3d5698SJohn Baldwin	vaesenc	%xmm15,%xmm13,%xmm13
220bc3d5698SJohn Baldwin	vaesenc	%xmm15,%xmm14,%xmm14
221bc3d5698SJohn Baldwin
222bc3d5698SJohn Baldwin	vaesenc	%xmm1,%xmm9,%xmm9
223bc3d5698SJohn Baldwin	vaesenc	%xmm1,%xmm10,%xmm10
224bc3d5698SJohn Baldwin	vaesenc	%xmm1,%xmm11,%xmm11
225bc3d5698SJohn Baldwin	vaesenc	%xmm1,%xmm12,%xmm12
226bc3d5698SJohn Baldwin	vaesenc	%xmm1,%xmm13,%xmm13
227bc3d5698SJohn Baldwin	vmovups	208-128(%rcx),%xmm15
228bc3d5698SJohn Baldwin	vaesenc	%xmm1,%xmm14,%xmm14
229bc3d5698SJohn Baldwin	vmovups	224-128(%rcx),%xmm1
230bc3d5698SJohn Baldwin	jmp	.Lenc_tail
231bc3d5698SJohn Baldwin
232bc3d5698SJohn Baldwin.align	32
233bc3d5698SJohn Baldwin.Lhandle_ctr32:
234bc3d5698SJohn Baldwin	vmovdqu	(%r11),%xmm0
235bc3d5698SJohn Baldwin	vpshufb	%xmm0,%xmm1,%xmm6
236bc3d5698SJohn Baldwin	vmovdqu	48(%r11),%xmm5
237bc3d5698SJohn Baldwin	vpaddd	64(%r11),%xmm6,%xmm10
238bc3d5698SJohn Baldwin	vpaddd	%xmm5,%xmm6,%xmm11
239bc3d5698SJohn Baldwin	vmovdqu	0-32(%r9),%xmm3
240bc3d5698SJohn Baldwin	vpaddd	%xmm5,%xmm10,%xmm12
241bc3d5698SJohn Baldwin	vpshufb	%xmm0,%xmm10,%xmm10
242bc3d5698SJohn Baldwin	vpaddd	%xmm5,%xmm11,%xmm13
243bc3d5698SJohn Baldwin	vpshufb	%xmm0,%xmm11,%xmm11
244bc3d5698SJohn Baldwin	vpxor	%xmm15,%xmm10,%xmm10
245bc3d5698SJohn Baldwin	vpaddd	%xmm5,%xmm12,%xmm14
246bc3d5698SJohn Baldwin	vpshufb	%xmm0,%xmm12,%xmm12
247bc3d5698SJohn Baldwin	vpxor	%xmm15,%xmm11,%xmm11
248bc3d5698SJohn Baldwin	vpaddd	%xmm5,%xmm13,%xmm1
249bc3d5698SJohn Baldwin	vpshufb	%xmm0,%xmm13,%xmm13
250bc3d5698SJohn Baldwin	vpshufb	%xmm0,%xmm14,%xmm14
251bc3d5698SJohn Baldwin	vpshufb	%xmm0,%xmm1,%xmm1
252bc3d5698SJohn Baldwin	jmp	.Lresume_ctr32
253bc3d5698SJohn Baldwin
254bc3d5698SJohn Baldwin.align	32
255bc3d5698SJohn Baldwin.Lenc_tail:
256bc3d5698SJohn Baldwin	vaesenc	%xmm15,%xmm9,%xmm9
257bc3d5698SJohn Baldwin	vmovdqu	%xmm7,16+8(%rsp)
258bc3d5698SJohn Baldwin	vpalignr	$8,%xmm4,%xmm4,%xmm8
259bc3d5698SJohn Baldwin	vaesenc	%xmm15,%xmm10,%xmm10
260bc3d5698SJohn Baldwin	vpclmulqdq	$0x10,%xmm3,%xmm4,%xmm4
261bc3d5698SJohn Baldwin	vpxor	0(%rdi),%xmm1,%xmm2
262bc3d5698SJohn Baldwin	vaesenc	%xmm15,%xmm11,%xmm11
263bc3d5698SJohn Baldwin	vpxor	16(%rdi),%xmm1,%xmm0
264bc3d5698SJohn Baldwin	vaesenc	%xmm15,%xmm12,%xmm12
265bc3d5698SJohn Baldwin	vpxor	32(%rdi),%xmm1,%xmm5
266bc3d5698SJohn Baldwin	vaesenc	%xmm15,%xmm13,%xmm13
267bc3d5698SJohn Baldwin	vpxor	48(%rdi),%xmm1,%xmm6
268bc3d5698SJohn Baldwin	vaesenc	%xmm15,%xmm14,%xmm14
269bc3d5698SJohn Baldwin	vpxor	64(%rdi),%xmm1,%xmm7
270bc3d5698SJohn Baldwin	vpxor	80(%rdi),%xmm1,%xmm3
271bc3d5698SJohn Baldwin	vmovdqu	(%r8),%xmm1
272bc3d5698SJohn Baldwin
273bc3d5698SJohn Baldwin	vaesenclast	%xmm2,%xmm9,%xmm9
274bc3d5698SJohn Baldwin	vmovdqu	32(%r11),%xmm2
275bc3d5698SJohn Baldwin	vaesenclast	%xmm0,%xmm10,%xmm10
276bc3d5698SJohn Baldwin	vpaddb	%xmm2,%xmm1,%xmm0
277bc3d5698SJohn Baldwin	movq	%r13,112+8(%rsp)
278bc3d5698SJohn Baldwin	leaq	96(%rdi),%rdi
279bc3d5698SJohn Baldwin	vaesenclast	%xmm5,%xmm11,%xmm11
280bc3d5698SJohn Baldwin	vpaddb	%xmm2,%xmm0,%xmm5
281bc3d5698SJohn Baldwin	movq	%r12,120+8(%rsp)
282bc3d5698SJohn Baldwin	leaq	96(%rsi),%rsi
283bc3d5698SJohn Baldwin	vmovdqu	0-128(%rcx),%xmm15
284bc3d5698SJohn Baldwin	vaesenclast	%xmm6,%xmm12,%xmm12
285bc3d5698SJohn Baldwin	vpaddb	%xmm2,%xmm5,%xmm6
286bc3d5698SJohn Baldwin	vaesenclast	%xmm7,%xmm13,%xmm13
287bc3d5698SJohn Baldwin	vpaddb	%xmm2,%xmm6,%xmm7
288bc3d5698SJohn Baldwin	vaesenclast	%xmm3,%xmm14,%xmm14
289bc3d5698SJohn Baldwin	vpaddb	%xmm2,%xmm7,%xmm3
290bc3d5698SJohn Baldwin
291bc3d5698SJohn Baldwin	addq	$0x60,%r10
292bc3d5698SJohn Baldwin	subq	$0x6,%rdx
293bc3d5698SJohn Baldwin	jc	.L6x_done
294bc3d5698SJohn Baldwin
295bc3d5698SJohn Baldwin	vmovups	%xmm9,-96(%rsi)
296bc3d5698SJohn Baldwin	vpxor	%xmm15,%xmm1,%xmm9
297bc3d5698SJohn Baldwin	vmovups	%xmm10,-80(%rsi)
298bc3d5698SJohn Baldwin	vmovdqa	%xmm0,%xmm10
299bc3d5698SJohn Baldwin	vmovups	%xmm11,-64(%rsi)
300bc3d5698SJohn Baldwin	vmovdqa	%xmm5,%xmm11
301bc3d5698SJohn Baldwin	vmovups	%xmm12,-48(%rsi)
302bc3d5698SJohn Baldwin	vmovdqa	%xmm6,%xmm12
303bc3d5698SJohn Baldwin	vmovups	%xmm13,-32(%rsi)
304bc3d5698SJohn Baldwin	vmovdqa	%xmm7,%xmm13
305bc3d5698SJohn Baldwin	vmovups	%xmm14,-16(%rsi)
306bc3d5698SJohn Baldwin	vmovdqa	%xmm3,%xmm14
307bc3d5698SJohn Baldwin	vmovdqu	32+8(%rsp),%xmm7
308bc3d5698SJohn Baldwin	jmp	.Loop6x
309bc3d5698SJohn Baldwin
310bc3d5698SJohn Baldwin.L6x_done:
311bc3d5698SJohn Baldwin	vpxor	16+8(%rsp),%xmm8,%xmm8
312bc3d5698SJohn Baldwin	vpxor	%xmm4,%xmm8,%xmm8
313bc3d5698SJohn Baldwin
314bc3d5698SJohn Baldwin	.byte	0xf3,0xc3
315bc3d5698SJohn Baldwin.cfi_endproc
316bc3d5698SJohn Baldwin.size	_aesni_ctr32_ghash_6x,.-_aesni_ctr32_ghash_6x
317bc3d5698SJohn Baldwin.globl	aesni_gcm_decrypt
318bc3d5698SJohn Baldwin.type	aesni_gcm_decrypt,@function
319bc3d5698SJohn Baldwin.align	32
320bc3d5698SJohn Baldwinaesni_gcm_decrypt:
321bc3d5698SJohn Baldwin.cfi_startproc
322bc3d5698SJohn Baldwin	xorq	%r10,%r10
323bc3d5698SJohn Baldwin	cmpq	$0x60,%rdx
324bc3d5698SJohn Baldwin	jb	.Lgcm_dec_abort
325bc3d5698SJohn Baldwin
326bc3d5698SJohn Baldwin	leaq	(%rsp),%rax
327bc3d5698SJohn Baldwin.cfi_def_cfa_register	%rax
328bc3d5698SJohn Baldwin	pushq	%rbx
329bc3d5698SJohn Baldwin.cfi_offset	%rbx,-16
330bc3d5698SJohn Baldwin	pushq	%rbp
331bc3d5698SJohn Baldwin.cfi_offset	%rbp,-24
332bc3d5698SJohn Baldwin	pushq	%r12
333bc3d5698SJohn Baldwin.cfi_offset	%r12,-32
334bc3d5698SJohn Baldwin	pushq	%r13
335bc3d5698SJohn Baldwin.cfi_offset	%r13,-40
336bc3d5698SJohn Baldwin	pushq	%r14
337bc3d5698SJohn Baldwin.cfi_offset	%r14,-48
338bc3d5698SJohn Baldwin	pushq	%r15
339bc3d5698SJohn Baldwin.cfi_offset	%r15,-56
340bc3d5698SJohn Baldwin	vzeroupper
341bc3d5698SJohn Baldwin
342bc3d5698SJohn Baldwin	vmovdqu	(%r8),%xmm1
343bc3d5698SJohn Baldwin	addq	$-128,%rsp
344bc3d5698SJohn Baldwin	movl	12(%r8),%ebx
345bc3d5698SJohn Baldwin	leaq	.Lbswap_mask(%rip),%r11
346bc3d5698SJohn Baldwin	leaq	-128(%rcx),%r14
347bc3d5698SJohn Baldwin	movq	$0xf80,%r15
348bc3d5698SJohn Baldwin	vmovdqu	(%r9),%xmm8
349bc3d5698SJohn Baldwin	andq	$-128,%rsp
350bc3d5698SJohn Baldwin	vmovdqu	(%r11),%xmm0
351bc3d5698SJohn Baldwin	leaq	128(%rcx),%rcx
352bc3d5698SJohn Baldwin	leaq	32+32(%r9),%r9
353bc3d5698SJohn Baldwin	movl	240-128(%rcx),%ebp
354bc3d5698SJohn Baldwin	vpshufb	%xmm0,%xmm8,%xmm8
355bc3d5698SJohn Baldwin
356bc3d5698SJohn Baldwin	andq	%r15,%r14
357bc3d5698SJohn Baldwin	andq	%rsp,%r15
358bc3d5698SJohn Baldwin	subq	%r14,%r15
359bc3d5698SJohn Baldwin	jc	.Ldec_no_key_aliasing
360bc3d5698SJohn Baldwin	cmpq	$768,%r15
361bc3d5698SJohn Baldwin	jnc	.Ldec_no_key_aliasing
362bc3d5698SJohn Baldwin	subq	%r15,%rsp
363bc3d5698SJohn Baldwin.Ldec_no_key_aliasing:
364bc3d5698SJohn Baldwin
365bc3d5698SJohn Baldwin	vmovdqu	80(%rdi),%xmm7
366bc3d5698SJohn Baldwin	leaq	(%rdi),%r14
367bc3d5698SJohn Baldwin	vmovdqu	64(%rdi),%xmm4
368bc3d5698SJohn Baldwin	leaq	-192(%rdi,%rdx,1),%r15
369bc3d5698SJohn Baldwin	vmovdqu	48(%rdi),%xmm5
370bc3d5698SJohn Baldwin	shrq	$4,%rdx
371bc3d5698SJohn Baldwin	xorq	%r10,%r10
372bc3d5698SJohn Baldwin	vmovdqu	32(%rdi),%xmm6
373bc3d5698SJohn Baldwin	vpshufb	%xmm0,%xmm7,%xmm7
374bc3d5698SJohn Baldwin	vmovdqu	16(%rdi),%xmm2
375bc3d5698SJohn Baldwin	vpshufb	%xmm0,%xmm4,%xmm4
376bc3d5698SJohn Baldwin	vmovdqu	(%rdi),%xmm3
377bc3d5698SJohn Baldwin	vpshufb	%xmm0,%xmm5,%xmm5
378bc3d5698SJohn Baldwin	vmovdqu	%xmm4,48(%rsp)
379bc3d5698SJohn Baldwin	vpshufb	%xmm0,%xmm6,%xmm6
380bc3d5698SJohn Baldwin	vmovdqu	%xmm5,64(%rsp)
381bc3d5698SJohn Baldwin	vpshufb	%xmm0,%xmm2,%xmm2
382bc3d5698SJohn Baldwin	vmovdqu	%xmm6,80(%rsp)
383bc3d5698SJohn Baldwin	vpshufb	%xmm0,%xmm3,%xmm3
384bc3d5698SJohn Baldwin	vmovdqu	%xmm2,96(%rsp)
385bc3d5698SJohn Baldwin	vmovdqu	%xmm3,112(%rsp)
386bc3d5698SJohn Baldwin
387bc3d5698SJohn Baldwin	call	_aesni_ctr32_ghash_6x
388bc3d5698SJohn Baldwin
389bc3d5698SJohn Baldwin	vmovups	%xmm9,-96(%rsi)
390bc3d5698SJohn Baldwin	vmovups	%xmm10,-80(%rsi)
391bc3d5698SJohn Baldwin	vmovups	%xmm11,-64(%rsi)
392bc3d5698SJohn Baldwin	vmovups	%xmm12,-48(%rsi)
393bc3d5698SJohn Baldwin	vmovups	%xmm13,-32(%rsi)
394bc3d5698SJohn Baldwin	vmovups	%xmm14,-16(%rsi)
395bc3d5698SJohn Baldwin
396bc3d5698SJohn Baldwin	vpshufb	(%r11),%xmm8,%xmm8
397bc3d5698SJohn Baldwin	vmovdqu	%xmm8,-64(%r9)
398bc3d5698SJohn Baldwin
399bc3d5698SJohn Baldwin	vzeroupper
400bc3d5698SJohn Baldwin	movq	-48(%rax),%r15
401bc3d5698SJohn Baldwin.cfi_restore	%r15
402bc3d5698SJohn Baldwin	movq	-40(%rax),%r14
403bc3d5698SJohn Baldwin.cfi_restore	%r14
404bc3d5698SJohn Baldwin	movq	-32(%rax),%r13
405bc3d5698SJohn Baldwin.cfi_restore	%r13
406bc3d5698SJohn Baldwin	movq	-24(%rax),%r12
407bc3d5698SJohn Baldwin.cfi_restore	%r12
408bc3d5698SJohn Baldwin	movq	-16(%rax),%rbp
409bc3d5698SJohn Baldwin.cfi_restore	%rbp
410bc3d5698SJohn Baldwin	movq	-8(%rax),%rbx
411bc3d5698SJohn Baldwin.cfi_restore	%rbx
412bc3d5698SJohn Baldwin	leaq	(%rax),%rsp
413bc3d5698SJohn Baldwin.cfi_def_cfa_register	%rsp
414bc3d5698SJohn Baldwin.Lgcm_dec_abort:
415bc3d5698SJohn Baldwin	movq	%r10,%rax
416bc3d5698SJohn Baldwin	.byte	0xf3,0xc3
417bc3d5698SJohn Baldwin.cfi_endproc
418bc3d5698SJohn Baldwin.size	aesni_gcm_decrypt,.-aesni_gcm_decrypt
419bc3d5698SJohn Baldwin.type	_aesni_ctr32_6x,@function
420bc3d5698SJohn Baldwin.align	32
421bc3d5698SJohn Baldwin_aesni_ctr32_6x:
422bc3d5698SJohn Baldwin.cfi_startproc
423bc3d5698SJohn Baldwin	vmovdqu	0-128(%rcx),%xmm4
424bc3d5698SJohn Baldwin	vmovdqu	32(%r11),%xmm2
425bc3d5698SJohn Baldwin	leaq	-1(%rbp),%r13
426bc3d5698SJohn Baldwin	vmovups	16-128(%rcx),%xmm15
427bc3d5698SJohn Baldwin	leaq	32-128(%rcx),%r12
428bc3d5698SJohn Baldwin	vpxor	%xmm4,%xmm1,%xmm9
429bc3d5698SJohn Baldwin	addl	$100663296,%ebx
430bc3d5698SJohn Baldwin	jc	.Lhandle_ctr32_2
431bc3d5698SJohn Baldwin	vpaddb	%xmm2,%xmm1,%xmm10
432bc3d5698SJohn Baldwin	vpaddb	%xmm2,%xmm10,%xmm11
433bc3d5698SJohn Baldwin	vpxor	%xmm4,%xmm10,%xmm10
434bc3d5698SJohn Baldwin	vpaddb	%xmm2,%xmm11,%xmm12
435bc3d5698SJohn Baldwin	vpxor	%xmm4,%xmm11,%xmm11
436bc3d5698SJohn Baldwin	vpaddb	%xmm2,%xmm12,%xmm13
437bc3d5698SJohn Baldwin	vpxor	%xmm4,%xmm12,%xmm12
438bc3d5698SJohn Baldwin	vpaddb	%xmm2,%xmm13,%xmm14
439bc3d5698SJohn Baldwin	vpxor	%xmm4,%xmm13,%xmm13
440bc3d5698SJohn Baldwin	vpaddb	%xmm2,%xmm14,%xmm1
441bc3d5698SJohn Baldwin	vpxor	%xmm4,%xmm14,%xmm14
442bc3d5698SJohn Baldwin	jmp	.Loop_ctr32
443bc3d5698SJohn Baldwin
444bc3d5698SJohn Baldwin.align	16
445bc3d5698SJohn Baldwin.Loop_ctr32:
446bc3d5698SJohn Baldwin	vaesenc	%xmm15,%xmm9,%xmm9
447bc3d5698SJohn Baldwin	vaesenc	%xmm15,%xmm10,%xmm10
448bc3d5698SJohn Baldwin	vaesenc	%xmm15,%xmm11,%xmm11
449bc3d5698SJohn Baldwin	vaesenc	%xmm15,%xmm12,%xmm12
450bc3d5698SJohn Baldwin	vaesenc	%xmm15,%xmm13,%xmm13
451bc3d5698SJohn Baldwin	vaesenc	%xmm15,%xmm14,%xmm14
452bc3d5698SJohn Baldwin	vmovups	(%r12),%xmm15
453bc3d5698SJohn Baldwin	leaq	16(%r12),%r12
454bc3d5698SJohn Baldwin	decl	%r13d
455bc3d5698SJohn Baldwin	jnz	.Loop_ctr32
456bc3d5698SJohn Baldwin
457bc3d5698SJohn Baldwin	vmovdqu	(%r12),%xmm3
458bc3d5698SJohn Baldwin	vaesenc	%xmm15,%xmm9,%xmm9
459bc3d5698SJohn Baldwin	vpxor	0(%rdi),%xmm3,%xmm4
460bc3d5698SJohn Baldwin	vaesenc	%xmm15,%xmm10,%xmm10
461bc3d5698SJohn Baldwin	vpxor	16(%rdi),%xmm3,%xmm5
462bc3d5698SJohn Baldwin	vaesenc	%xmm15,%xmm11,%xmm11
463bc3d5698SJohn Baldwin	vpxor	32(%rdi),%xmm3,%xmm6
464bc3d5698SJohn Baldwin	vaesenc	%xmm15,%xmm12,%xmm12
465bc3d5698SJohn Baldwin	vpxor	48(%rdi),%xmm3,%xmm8
466bc3d5698SJohn Baldwin	vaesenc	%xmm15,%xmm13,%xmm13
467bc3d5698SJohn Baldwin	vpxor	64(%rdi),%xmm3,%xmm2
468bc3d5698SJohn Baldwin	vaesenc	%xmm15,%xmm14,%xmm14
469bc3d5698SJohn Baldwin	vpxor	80(%rdi),%xmm3,%xmm3
470bc3d5698SJohn Baldwin	leaq	96(%rdi),%rdi
471bc3d5698SJohn Baldwin
472bc3d5698SJohn Baldwin	vaesenclast	%xmm4,%xmm9,%xmm9
473bc3d5698SJohn Baldwin	vaesenclast	%xmm5,%xmm10,%xmm10
474bc3d5698SJohn Baldwin	vaesenclast	%xmm6,%xmm11,%xmm11
475bc3d5698SJohn Baldwin	vaesenclast	%xmm8,%xmm12,%xmm12
476bc3d5698SJohn Baldwin	vaesenclast	%xmm2,%xmm13,%xmm13
477bc3d5698SJohn Baldwin	vaesenclast	%xmm3,%xmm14,%xmm14
478bc3d5698SJohn Baldwin	vmovups	%xmm9,0(%rsi)
479bc3d5698SJohn Baldwin	vmovups	%xmm10,16(%rsi)
480bc3d5698SJohn Baldwin	vmovups	%xmm11,32(%rsi)
481bc3d5698SJohn Baldwin	vmovups	%xmm12,48(%rsi)
482bc3d5698SJohn Baldwin	vmovups	%xmm13,64(%rsi)
483bc3d5698SJohn Baldwin	vmovups	%xmm14,80(%rsi)
484bc3d5698SJohn Baldwin	leaq	96(%rsi),%rsi
485bc3d5698SJohn Baldwin
486bc3d5698SJohn Baldwin	.byte	0xf3,0xc3
487bc3d5698SJohn Baldwin.align	32
488bc3d5698SJohn Baldwin.Lhandle_ctr32_2:
489bc3d5698SJohn Baldwin	vpshufb	%xmm0,%xmm1,%xmm6
490bc3d5698SJohn Baldwin	vmovdqu	48(%r11),%xmm5
491bc3d5698SJohn Baldwin	vpaddd	64(%r11),%xmm6,%xmm10
492bc3d5698SJohn Baldwin	vpaddd	%xmm5,%xmm6,%xmm11
493bc3d5698SJohn Baldwin	vpaddd	%xmm5,%xmm10,%xmm12
494bc3d5698SJohn Baldwin	vpshufb	%xmm0,%xmm10,%xmm10
495bc3d5698SJohn Baldwin	vpaddd	%xmm5,%xmm11,%xmm13
496bc3d5698SJohn Baldwin	vpshufb	%xmm0,%xmm11,%xmm11
497bc3d5698SJohn Baldwin	vpxor	%xmm4,%xmm10,%xmm10
498bc3d5698SJohn Baldwin	vpaddd	%xmm5,%xmm12,%xmm14
499bc3d5698SJohn Baldwin	vpshufb	%xmm0,%xmm12,%xmm12
500bc3d5698SJohn Baldwin	vpxor	%xmm4,%xmm11,%xmm11
501bc3d5698SJohn Baldwin	vpaddd	%xmm5,%xmm13,%xmm1
502bc3d5698SJohn Baldwin	vpshufb	%xmm0,%xmm13,%xmm13
503bc3d5698SJohn Baldwin	vpxor	%xmm4,%xmm12,%xmm12
504bc3d5698SJohn Baldwin	vpshufb	%xmm0,%xmm14,%xmm14
505bc3d5698SJohn Baldwin	vpxor	%xmm4,%xmm13,%xmm13
506bc3d5698SJohn Baldwin	vpshufb	%xmm0,%xmm1,%xmm1
507bc3d5698SJohn Baldwin	vpxor	%xmm4,%xmm14,%xmm14
508bc3d5698SJohn Baldwin	jmp	.Loop_ctr32
509bc3d5698SJohn Baldwin.cfi_endproc
510bc3d5698SJohn Baldwin.size	_aesni_ctr32_6x,.-_aesni_ctr32_6x
511bc3d5698SJohn Baldwin
512bc3d5698SJohn Baldwin.globl	aesni_gcm_encrypt
513bc3d5698SJohn Baldwin.type	aesni_gcm_encrypt,@function
514bc3d5698SJohn Baldwin.align	32
515bc3d5698SJohn Baldwinaesni_gcm_encrypt:
516bc3d5698SJohn Baldwin.cfi_startproc
517bc3d5698SJohn Baldwin	xorq	%r10,%r10
518bc3d5698SJohn Baldwin	cmpq	$288,%rdx
519bc3d5698SJohn Baldwin	jb	.Lgcm_enc_abort
520bc3d5698SJohn Baldwin
521bc3d5698SJohn Baldwin	leaq	(%rsp),%rax
522bc3d5698SJohn Baldwin.cfi_def_cfa_register	%rax
523bc3d5698SJohn Baldwin	pushq	%rbx
524bc3d5698SJohn Baldwin.cfi_offset	%rbx,-16
525bc3d5698SJohn Baldwin	pushq	%rbp
526bc3d5698SJohn Baldwin.cfi_offset	%rbp,-24
527bc3d5698SJohn Baldwin	pushq	%r12
528bc3d5698SJohn Baldwin.cfi_offset	%r12,-32
529bc3d5698SJohn Baldwin	pushq	%r13
530bc3d5698SJohn Baldwin.cfi_offset	%r13,-40
531bc3d5698SJohn Baldwin	pushq	%r14
532bc3d5698SJohn Baldwin.cfi_offset	%r14,-48
533bc3d5698SJohn Baldwin	pushq	%r15
534bc3d5698SJohn Baldwin.cfi_offset	%r15,-56
535bc3d5698SJohn Baldwin	vzeroupper
536bc3d5698SJohn Baldwin
537bc3d5698SJohn Baldwin	vmovdqu	(%r8),%xmm1
538bc3d5698SJohn Baldwin	addq	$-128,%rsp
539bc3d5698SJohn Baldwin	movl	12(%r8),%ebx
540bc3d5698SJohn Baldwin	leaq	.Lbswap_mask(%rip),%r11
541bc3d5698SJohn Baldwin	leaq	-128(%rcx),%r14
542bc3d5698SJohn Baldwin	movq	$0xf80,%r15
543bc3d5698SJohn Baldwin	leaq	128(%rcx),%rcx
544bc3d5698SJohn Baldwin	vmovdqu	(%r11),%xmm0
545bc3d5698SJohn Baldwin	andq	$-128,%rsp
546bc3d5698SJohn Baldwin	movl	240-128(%rcx),%ebp
547bc3d5698SJohn Baldwin
548bc3d5698SJohn Baldwin	andq	%r15,%r14
549bc3d5698SJohn Baldwin	andq	%rsp,%r15
550bc3d5698SJohn Baldwin	subq	%r14,%r15
551bc3d5698SJohn Baldwin	jc	.Lenc_no_key_aliasing
552bc3d5698SJohn Baldwin	cmpq	$768,%r15
553bc3d5698SJohn Baldwin	jnc	.Lenc_no_key_aliasing
554bc3d5698SJohn Baldwin	subq	%r15,%rsp
555bc3d5698SJohn Baldwin.Lenc_no_key_aliasing:
556bc3d5698SJohn Baldwin
557bc3d5698SJohn Baldwin	leaq	(%rsi),%r14
558bc3d5698SJohn Baldwin	leaq	-192(%rsi,%rdx,1),%r15
559bc3d5698SJohn Baldwin	shrq	$4,%rdx
560bc3d5698SJohn Baldwin
561bc3d5698SJohn Baldwin	call	_aesni_ctr32_6x
562bc3d5698SJohn Baldwin	vpshufb	%xmm0,%xmm9,%xmm8
563bc3d5698SJohn Baldwin	vpshufb	%xmm0,%xmm10,%xmm2
564bc3d5698SJohn Baldwin	vmovdqu	%xmm8,112(%rsp)
565bc3d5698SJohn Baldwin	vpshufb	%xmm0,%xmm11,%xmm4
566bc3d5698SJohn Baldwin	vmovdqu	%xmm2,96(%rsp)
567bc3d5698SJohn Baldwin	vpshufb	%xmm0,%xmm12,%xmm5
568bc3d5698SJohn Baldwin	vmovdqu	%xmm4,80(%rsp)
569bc3d5698SJohn Baldwin	vpshufb	%xmm0,%xmm13,%xmm6
570bc3d5698SJohn Baldwin	vmovdqu	%xmm5,64(%rsp)
571bc3d5698SJohn Baldwin	vpshufb	%xmm0,%xmm14,%xmm7
572bc3d5698SJohn Baldwin	vmovdqu	%xmm6,48(%rsp)
573bc3d5698SJohn Baldwin
574bc3d5698SJohn Baldwin	call	_aesni_ctr32_6x
575bc3d5698SJohn Baldwin
576bc3d5698SJohn Baldwin	vmovdqu	(%r9),%xmm8
577bc3d5698SJohn Baldwin	leaq	32+32(%r9),%r9
578bc3d5698SJohn Baldwin	subq	$12,%rdx
579bc3d5698SJohn Baldwin	movq	$192,%r10
580bc3d5698SJohn Baldwin	vpshufb	%xmm0,%xmm8,%xmm8
581bc3d5698SJohn Baldwin
582bc3d5698SJohn Baldwin	call	_aesni_ctr32_ghash_6x
583bc3d5698SJohn Baldwin	vmovdqu	32(%rsp),%xmm7
584bc3d5698SJohn Baldwin	vmovdqu	(%r11),%xmm0
585bc3d5698SJohn Baldwin	vmovdqu	0-32(%r9),%xmm3
586bc3d5698SJohn Baldwin	vpunpckhqdq	%xmm7,%xmm7,%xmm1
587bc3d5698SJohn Baldwin	vmovdqu	32-32(%r9),%xmm15
588bc3d5698SJohn Baldwin	vmovups	%xmm9,-96(%rsi)
589bc3d5698SJohn Baldwin	vpshufb	%xmm0,%xmm9,%xmm9
590bc3d5698SJohn Baldwin	vpxor	%xmm7,%xmm1,%xmm1
591bc3d5698SJohn Baldwin	vmovups	%xmm10,-80(%rsi)
592bc3d5698SJohn Baldwin	vpshufb	%xmm0,%xmm10,%xmm10
593bc3d5698SJohn Baldwin	vmovups	%xmm11,-64(%rsi)
594bc3d5698SJohn Baldwin	vpshufb	%xmm0,%xmm11,%xmm11
595bc3d5698SJohn Baldwin	vmovups	%xmm12,-48(%rsi)
596bc3d5698SJohn Baldwin	vpshufb	%xmm0,%xmm12,%xmm12
597bc3d5698SJohn Baldwin	vmovups	%xmm13,-32(%rsi)
598bc3d5698SJohn Baldwin	vpshufb	%xmm0,%xmm13,%xmm13
599bc3d5698SJohn Baldwin	vmovups	%xmm14,-16(%rsi)
600bc3d5698SJohn Baldwin	vpshufb	%xmm0,%xmm14,%xmm14
601bc3d5698SJohn Baldwin	vmovdqu	%xmm9,16(%rsp)
602bc3d5698SJohn Baldwin	vmovdqu	48(%rsp),%xmm6
603bc3d5698SJohn Baldwin	vmovdqu	16-32(%r9),%xmm0
604bc3d5698SJohn Baldwin	vpunpckhqdq	%xmm6,%xmm6,%xmm2
605bc3d5698SJohn Baldwin	vpclmulqdq	$0x00,%xmm3,%xmm7,%xmm5
606bc3d5698SJohn Baldwin	vpxor	%xmm6,%xmm2,%xmm2
607bc3d5698SJohn Baldwin	vpclmulqdq	$0x11,%xmm3,%xmm7,%xmm7
608bc3d5698SJohn Baldwin	vpclmulqdq	$0x00,%xmm15,%xmm1,%xmm1
609bc3d5698SJohn Baldwin
610bc3d5698SJohn Baldwin	vmovdqu	64(%rsp),%xmm9
611bc3d5698SJohn Baldwin	vpclmulqdq	$0x00,%xmm0,%xmm6,%xmm4
612bc3d5698SJohn Baldwin	vmovdqu	48-32(%r9),%xmm3
613bc3d5698SJohn Baldwin	vpxor	%xmm5,%xmm4,%xmm4
614bc3d5698SJohn Baldwin	vpunpckhqdq	%xmm9,%xmm9,%xmm5
615bc3d5698SJohn Baldwin	vpclmulqdq	$0x11,%xmm0,%xmm6,%xmm6
616bc3d5698SJohn Baldwin	vpxor	%xmm9,%xmm5,%xmm5
617bc3d5698SJohn Baldwin	vpxor	%xmm7,%xmm6,%xmm6
618bc3d5698SJohn Baldwin	vpclmulqdq	$0x10,%xmm15,%xmm2,%xmm2
619bc3d5698SJohn Baldwin	vmovdqu	80-32(%r9),%xmm15
620bc3d5698SJohn Baldwin	vpxor	%xmm1,%xmm2,%xmm2
621bc3d5698SJohn Baldwin
622bc3d5698SJohn Baldwin	vmovdqu	80(%rsp),%xmm1
623bc3d5698SJohn Baldwin	vpclmulqdq	$0x00,%xmm3,%xmm9,%xmm7
624bc3d5698SJohn Baldwin	vmovdqu	64-32(%r9),%xmm0
625bc3d5698SJohn Baldwin	vpxor	%xmm4,%xmm7,%xmm7
626bc3d5698SJohn Baldwin	vpunpckhqdq	%xmm1,%xmm1,%xmm4
627bc3d5698SJohn Baldwin	vpclmulqdq	$0x11,%xmm3,%xmm9,%xmm9
628bc3d5698SJohn Baldwin	vpxor	%xmm1,%xmm4,%xmm4
629bc3d5698SJohn Baldwin	vpxor	%xmm6,%xmm9,%xmm9
630bc3d5698SJohn Baldwin	vpclmulqdq	$0x00,%xmm15,%xmm5,%xmm5
631bc3d5698SJohn Baldwin	vpxor	%xmm2,%xmm5,%xmm5
632bc3d5698SJohn Baldwin
633bc3d5698SJohn Baldwin	vmovdqu	96(%rsp),%xmm2
634bc3d5698SJohn Baldwin	vpclmulqdq	$0x00,%xmm0,%xmm1,%xmm6
635bc3d5698SJohn Baldwin	vmovdqu	96-32(%r9),%xmm3
636bc3d5698SJohn Baldwin	vpxor	%xmm7,%xmm6,%xmm6
637bc3d5698SJohn Baldwin	vpunpckhqdq	%xmm2,%xmm2,%xmm7
638bc3d5698SJohn Baldwin	vpclmulqdq	$0x11,%xmm0,%xmm1,%xmm1
639bc3d5698SJohn Baldwin	vpxor	%xmm2,%xmm7,%xmm7
640bc3d5698SJohn Baldwin	vpxor	%xmm9,%xmm1,%xmm1
641bc3d5698SJohn Baldwin	vpclmulqdq	$0x10,%xmm15,%xmm4,%xmm4
642bc3d5698SJohn Baldwin	vmovdqu	128-32(%r9),%xmm15
643bc3d5698SJohn Baldwin	vpxor	%xmm5,%xmm4,%xmm4
644bc3d5698SJohn Baldwin
645bc3d5698SJohn Baldwin	vpxor	112(%rsp),%xmm8,%xmm8
646bc3d5698SJohn Baldwin	vpclmulqdq	$0x00,%xmm3,%xmm2,%xmm5
647bc3d5698SJohn Baldwin	vmovdqu	112-32(%r9),%xmm0
648bc3d5698SJohn Baldwin	vpunpckhqdq	%xmm8,%xmm8,%xmm9
649bc3d5698SJohn Baldwin	vpxor	%xmm6,%xmm5,%xmm5
650bc3d5698SJohn Baldwin	vpclmulqdq	$0x11,%xmm3,%xmm2,%xmm2
651bc3d5698SJohn Baldwin	vpxor	%xmm8,%xmm9,%xmm9
652bc3d5698SJohn Baldwin	vpxor	%xmm1,%xmm2,%xmm2
653bc3d5698SJohn Baldwin	vpclmulqdq	$0x00,%xmm15,%xmm7,%xmm7
654bc3d5698SJohn Baldwin	vpxor	%xmm4,%xmm7,%xmm4
655bc3d5698SJohn Baldwin
656bc3d5698SJohn Baldwin	vpclmulqdq	$0x00,%xmm0,%xmm8,%xmm6
657bc3d5698SJohn Baldwin	vmovdqu	0-32(%r9),%xmm3
658bc3d5698SJohn Baldwin	vpunpckhqdq	%xmm14,%xmm14,%xmm1
659bc3d5698SJohn Baldwin	vpclmulqdq	$0x11,%xmm0,%xmm8,%xmm8
660bc3d5698SJohn Baldwin	vpxor	%xmm14,%xmm1,%xmm1
661bc3d5698SJohn Baldwin	vpxor	%xmm5,%xmm6,%xmm5
662bc3d5698SJohn Baldwin	vpclmulqdq	$0x10,%xmm15,%xmm9,%xmm9
663bc3d5698SJohn Baldwin	vmovdqu	32-32(%r9),%xmm15
664bc3d5698SJohn Baldwin	vpxor	%xmm2,%xmm8,%xmm7
665bc3d5698SJohn Baldwin	vpxor	%xmm4,%xmm9,%xmm6
666bc3d5698SJohn Baldwin
667bc3d5698SJohn Baldwin	vmovdqu	16-32(%r9),%xmm0
668bc3d5698SJohn Baldwin	vpxor	%xmm5,%xmm7,%xmm9
669bc3d5698SJohn Baldwin	vpclmulqdq	$0x00,%xmm3,%xmm14,%xmm4
670bc3d5698SJohn Baldwin	vpxor	%xmm9,%xmm6,%xmm6
671bc3d5698SJohn Baldwin	vpunpckhqdq	%xmm13,%xmm13,%xmm2
672bc3d5698SJohn Baldwin	vpclmulqdq	$0x11,%xmm3,%xmm14,%xmm14
673bc3d5698SJohn Baldwin	vpxor	%xmm13,%xmm2,%xmm2
674bc3d5698SJohn Baldwin	vpslldq	$8,%xmm6,%xmm9
675bc3d5698SJohn Baldwin	vpclmulqdq	$0x00,%xmm15,%xmm1,%xmm1
676bc3d5698SJohn Baldwin	vpxor	%xmm9,%xmm5,%xmm8
677bc3d5698SJohn Baldwin	vpsrldq	$8,%xmm6,%xmm6
678bc3d5698SJohn Baldwin	vpxor	%xmm6,%xmm7,%xmm7
679bc3d5698SJohn Baldwin
680bc3d5698SJohn Baldwin	vpclmulqdq	$0x00,%xmm0,%xmm13,%xmm5
681bc3d5698SJohn Baldwin	vmovdqu	48-32(%r9),%xmm3
682bc3d5698SJohn Baldwin	vpxor	%xmm4,%xmm5,%xmm5
683bc3d5698SJohn Baldwin	vpunpckhqdq	%xmm12,%xmm12,%xmm9
684bc3d5698SJohn Baldwin	vpclmulqdq	$0x11,%xmm0,%xmm13,%xmm13
685bc3d5698SJohn Baldwin	vpxor	%xmm12,%xmm9,%xmm9
686bc3d5698SJohn Baldwin	vpxor	%xmm14,%xmm13,%xmm13
687bc3d5698SJohn Baldwin	vpalignr	$8,%xmm8,%xmm8,%xmm14
688bc3d5698SJohn Baldwin	vpclmulqdq	$0x10,%xmm15,%xmm2,%xmm2
689bc3d5698SJohn Baldwin	vmovdqu	80-32(%r9),%xmm15
690bc3d5698SJohn Baldwin	vpxor	%xmm1,%xmm2,%xmm2
691bc3d5698SJohn Baldwin
692bc3d5698SJohn Baldwin	vpclmulqdq	$0x00,%xmm3,%xmm12,%xmm4
693bc3d5698SJohn Baldwin	vmovdqu	64-32(%r9),%xmm0
694bc3d5698SJohn Baldwin	vpxor	%xmm5,%xmm4,%xmm4
695bc3d5698SJohn Baldwin	vpunpckhqdq	%xmm11,%xmm11,%xmm1
696bc3d5698SJohn Baldwin	vpclmulqdq	$0x11,%xmm3,%xmm12,%xmm12
697bc3d5698SJohn Baldwin	vpxor	%xmm11,%xmm1,%xmm1
698bc3d5698SJohn Baldwin	vpxor	%xmm13,%xmm12,%xmm12
699bc3d5698SJohn Baldwin	vxorps	16(%rsp),%xmm7,%xmm7
700bc3d5698SJohn Baldwin	vpclmulqdq	$0x00,%xmm15,%xmm9,%xmm9
701bc3d5698SJohn Baldwin	vpxor	%xmm2,%xmm9,%xmm9
702bc3d5698SJohn Baldwin
703bc3d5698SJohn Baldwin	vpclmulqdq	$0x10,16(%r11),%xmm8,%xmm8
704bc3d5698SJohn Baldwin	vxorps	%xmm14,%xmm8,%xmm8
705bc3d5698SJohn Baldwin
706bc3d5698SJohn Baldwin	vpclmulqdq	$0x00,%xmm0,%xmm11,%xmm5
707bc3d5698SJohn Baldwin	vmovdqu	96-32(%r9),%xmm3
708bc3d5698SJohn Baldwin	vpxor	%xmm4,%xmm5,%xmm5
709bc3d5698SJohn Baldwin	vpunpckhqdq	%xmm10,%xmm10,%xmm2
710bc3d5698SJohn Baldwin	vpclmulqdq	$0x11,%xmm0,%xmm11,%xmm11
711bc3d5698SJohn Baldwin	vpxor	%xmm10,%xmm2,%xmm2
712bc3d5698SJohn Baldwin	vpalignr	$8,%xmm8,%xmm8,%xmm14
713bc3d5698SJohn Baldwin	vpxor	%xmm12,%xmm11,%xmm11
714bc3d5698SJohn Baldwin	vpclmulqdq	$0x10,%xmm15,%xmm1,%xmm1
715bc3d5698SJohn Baldwin	vmovdqu	128-32(%r9),%xmm15
716bc3d5698SJohn Baldwin	vpxor	%xmm9,%xmm1,%xmm1
717bc3d5698SJohn Baldwin
718bc3d5698SJohn Baldwin	vxorps	%xmm7,%xmm14,%xmm14
719bc3d5698SJohn Baldwin	vpclmulqdq	$0x10,16(%r11),%xmm8,%xmm8
720bc3d5698SJohn Baldwin	vxorps	%xmm14,%xmm8,%xmm8
721bc3d5698SJohn Baldwin
722bc3d5698SJohn Baldwin	vpclmulqdq	$0x00,%xmm3,%xmm10,%xmm4
723bc3d5698SJohn Baldwin	vmovdqu	112-32(%r9),%xmm0
724bc3d5698SJohn Baldwin	vpxor	%xmm5,%xmm4,%xmm4
725bc3d5698SJohn Baldwin	vpunpckhqdq	%xmm8,%xmm8,%xmm9
726bc3d5698SJohn Baldwin	vpclmulqdq	$0x11,%xmm3,%xmm10,%xmm10
727bc3d5698SJohn Baldwin	vpxor	%xmm8,%xmm9,%xmm9
728bc3d5698SJohn Baldwin	vpxor	%xmm11,%xmm10,%xmm10
729bc3d5698SJohn Baldwin	vpclmulqdq	$0x00,%xmm15,%xmm2,%xmm2
730bc3d5698SJohn Baldwin	vpxor	%xmm1,%xmm2,%xmm2
731bc3d5698SJohn Baldwin
732bc3d5698SJohn Baldwin	vpclmulqdq	$0x00,%xmm0,%xmm8,%xmm5
733bc3d5698SJohn Baldwin	vpclmulqdq	$0x11,%xmm0,%xmm8,%xmm7
734bc3d5698SJohn Baldwin	vpxor	%xmm4,%xmm5,%xmm5
735bc3d5698SJohn Baldwin	vpclmulqdq	$0x10,%xmm15,%xmm9,%xmm6
736bc3d5698SJohn Baldwin	vpxor	%xmm10,%xmm7,%xmm7
737bc3d5698SJohn Baldwin	vpxor	%xmm2,%xmm6,%xmm6
738bc3d5698SJohn Baldwin
739bc3d5698SJohn Baldwin	vpxor	%xmm5,%xmm7,%xmm4
740bc3d5698SJohn Baldwin	vpxor	%xmm4,%xmm6,%xmm6
741bc3d5698SJohn Baldwin	vpslldq	$8,%xmm6,%xmm1
742bc3d5698SJohn Baldwin	vmovdqu	16(%r11),%xmm3
743bc3d5698SJohn Baldwin	vpsrldq	$8,%xmm6,%xmm6
744bc3d5698SJohn Baldwin	vpxor	%xmm1,%xmm5,%xmm8
745bc3d5698SJohn Baldwin	vpxor	%xmm6,%xmm7,%xmm7
746bc3d5698SJohn Baldwin
747bc3d5698SJohn Baldwin	vpalignr	$8,%xmm8,%xmm8,%xmm2
748bc3d5698SJohn Baldwin	vpclmulqdq	$0x10,%xmm3,%xmm8,%xmm8
749bc3d5698SJohn Baldwin	vpxor	%xmm2,%xmm8,%xmm8
750bc3d5698SJohn Baldwin
751bc3d5698SJohn Baldwin	vpalignr	$8,%xmm8,%xmm8,%xmm2
752bc3d5698SJohn Baldwin	vpclmulqdq	$0x10,%xmm3,%xmm8,%xmm8
753bc3d5698SJohn Baldwin	vpxor	%xmm7,%xmm2,%xmm2
754bc3d5698SJohn Baldwin	vpxor	%xmm2,%xmm8,%xmm8
755bc3d5698SJohn Baldwin	vpshufb	(%r11),%xmm8,%xmm8
756bc3d5698SJohn Baldwin	vmovdqu	%xmm8,-64(%r9)
757bc3d5698SJohn Baldwin
758bc3d5698SJohn Baldwin	vzeroupper
759bc3d5698SJohn Baldwin	movq	-48(%rax),%r15
760bc3d5698SJohn Baldwin.cfi_restore	%r15
761bc3d5698SJohn Baldwin	movq	-40(%rax),%r14
762bc3d5698SJohn Baldwin.cfi_restore	%r14
763bc3d5698SJohn Baldwin	movq	-32(%rax),%r13
764bc3d5698SJohn Baldwin.cfi_restore	%r13
765bc3d5698SJohn Baldwin	movq	-24(%rax),%r12
766bc3d5698SJohn Baldwin.cfi_restore	%r12
767bc3d5698SJohn Baldwin	movq	-16(%rax),%rbp
768bc3d5698SJohn Baldwin.cfi_restore	%rbp
769bc3d5698SJohn Baldwin	movq	-8(%rax),%rbx
770bc3d5698SJohn Baldwin.cfi_restore	%rbx
771bc3d5698SJohn Baldwin	leaq	(%rax),%rsp
772bc3d5698SJohn Baldwin.cfi_def_cfa_register	%rsp
773bc3d5698SJohn Baldwin.Lgcm_enc_abort:
774bc3d5698SJohn Baldwin	movq	%r10,%rax
775bc3d5698SJohn Baldwin	.byte	0xf3,0xc3
776bc3d5698SJohn Baldwin.cfi_endproc
777bc3d5698SJohn Baldwin.size	aesni_gcm_encrypt,.-aesni_gcm_encrypt
778bc3d5698SJohn Baldwin.align	64
779bc3d5698SJohn Baldwin.Lbswap_mask:
780bc3d5698SJohn Baldwin.byte	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
781bc3d5698SJohn Baldwin.Lpoly:
782bc3d5698SJohn Baldwin.byte	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
783bc3d5698SJohn Baldwin.Lone_msb:
784bc3d5698SJohn Baldwin.byte	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
785bc3d5698SJohn Baldwin.Ltwo_lsb:
786bc3d5698SJohn Baldwin.byte	2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
787bc3d5698SJohn Baldwin.Lone_lsb:
788bc3d5698SJohn Baldwin.byte	1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
789bc3d5698SJohn Baldwin.byte	65,69,83,45,78,73,32,71,67,77,32,109,111,100,117,108,101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
790bc3d5698SJohn Baldwin.align	64
791*c0855eaaSJohn Baldwin	.section ".note.gnu.property", "a"
792*c0855eaaSJohn Baldwin	.p2align 3
793*c0855eaaSJohn Baldwin	.long 1f - 0f
794*c0855eaaSJohn Baldwin	.long 4f - 1f
795*c0855eaaSJohn Baldwin	.long 5
796*c0855eaaSJohn Baldwin0:
797*c0855eaaSJohn Baldwin	# "GNU" encoded with .byte, since .asciz isn't supported
798*c0855eaaSJohn Baldwin	# on Solaris.
799*c0855eaaSJohn Baldwin	.byte 0x47
800*c0855eaaSJohn Baldwin	.byte 0x4e
801*c0855eaaSJohn Baldwin	.byte 0x55
802*c0855eaaSJohn Baldwin	.byte 0
803*c0855eaaSJohn Baldwin1:
804*c0855eaaSJohn Baldwin	.p2align 3
805*c0855eaaSJohn Baldwin	.long 0xc0000002
806*c0855eaaSJohn Baldwin	.long 3f - 2f
807*c0855eaaSJohn Baldwin2:
808*c0855eaaSJohn Baldwin	.long 3
809*c0855eaaSJohn Baldwin3:
810*c0855eaaSJohn Baldwin	.p2align 3
811*c0855eaaSJohn Baldwin4:
812