xref: /freebsd/sys/crypto/openssl/amd64/aesni-x86_64.S (revision c0855eaa3ee9614804b6bd6a255aa9f71e095f43)
1bc3d5698SJohn Baldwin/* Do not modify. This file is auto-generated from aesni-x86_64.pl. */
2bc3d5698SJohn Baldwin.text
3bc3d5698SJohn Baldwin
4bc3d5698SJohn Baldwin.globl	aesni_encrypt
5bc3d5698SJohn Baldwin.type	aesni_encrypt,@function
6bc3d5698SJohn Baldwin.align	16
7bc3d5698SJohn Baldwinaesni_encrypt:
8bc3d5698SJohn Baldwin.cfi_startproc
9*c0855eaaSJohn Baldwin.byte	243,15,30,250
10bc3d5698SJohn Baldwin	movups	(%rdi),%xmm2
11bc3d5698SJohn Baldwin	movl	240(%rdx),%eax
12bc3d5698SJohn Baldwin	movups	(%rdx),%xmm0
13bc3d5698SJohn Baldwin	movups	16(%rdx),%xmm1
14bc3d5698SJohn Baldwin	leaq	32(%rdx),%rdx
15bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
16bc3d5698SJohn Baldwin.Loop_enc1_1:
17bc3d5698SJohn Baldwin.byte	102,15,56,220,209
18bc3d5698SJohn Baldwin	decl	%eax
19bc3d5698SJohn Baldwin	movups	(%rdx),%xmm1
20bc3d5698SJohn Baldwin	leaq	16(%rdx),%rdx
21bc3d5698SJohn Baldwin	jnz	.Loop_enc1_1
22bc3d5698SJohn Baldwin.byte	102,15,56,221,209
23bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
24bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm1
25bc3d5698SJohn Baldwin	movups	%xmm2,(%rsi)
26bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm2
27bc3d5698SJohn Baldwin	.byte	0xf3,0xc3
28bc3d5698SJohn Baldwin.cfi_endproc
29bc3d5698SJohn Baldwin.size	aesni_encrypt,.-aesni_encrypt
30bc3d5698SJohn Baldwin
31bc3d5698SJohn Baldwin.globl	aesni_decrypt
32bc3d5698SJohn Baldwin.type	aesni_decrypt,@function
33bc3d5698SJohn Baldwin.align	16
34bc3d5698SJohn Baldwinaesni_decrypt:
35bc3d5698SJohn Baldwin.cfi_startproc
36*c0855eaaSJohn Baldwin.byte	243,15,30,250
37bc3d5698SJohn Baldwin	movups	(%rdi),%xmm2
38bc3d5698SJohn Baldwin	movl	240(%rdx),%eax
39bc3d5698SJohn Baldwin	movups	(%rdx),%xmm0
40bc3d5698SJohn Baldwin	movups	16(%rdx),%xmm1
41bc3d5698SJohn Baldwin	leaq	32(%rdx),%rdx
42bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
43bc3d5698SJohn Baldwin.Loop_dec1_2:
44bc3d5698SJohn Baldwin.byte	102,15,56,222,209
45bc3d5698SJohn Baldwin	decl	%eax
46bc3d5698SJohn Baldwin	movups	(%rdx),%xmm1
47bc3d5698SJohn Baldwin	leaq	16(%rdx),%rdx
48bc3d5698SJohn Baldwin	jnz	.Loop_dec1_2
49bc3d5698SJohn Baldwin.byte	102,15,56,223,209
50bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
51bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm1
52bc3d5698SJohn Baldwin	movups	%xmm2,(%rsi)
53bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm2
54bc3d5698SJohn Baldwin	.byte	0xf3,0xc3
55bc3d5698SJohn Baldwin.cfi_endproc
56bc3d5698SJohn Baldwin.size	aesni_decrypt, .-aesni_decrypt
57bc3d5698SJohn Baldwin.type	_aesni_encrypt2,@function
58bc3d5698SJohn Baldwin.align	16
59bc3d5698SJohn Baldwin_aesni_encrypt2:
60bc3d5698SJohn Baldwin.cfi_startproc
61bc3d5698SJohn Baldwin	movups	(%rcx),%xmm0
62bc3d5698SJohn Baldwin	shll	$4,%eax
63bc3d5698SJohn Baldwin	movups	16(%rcx),%xmm1
64bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
65bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm3
66bc3d5698SJohn Baldwin	movups	32(%rcx),%xmm0
67bc3d5698SJohn Baldwin	leaq	32(%rcx,%rax,1),%rcx
68bc3d5698SJohn Baldwin	negq	%rax
69bc3d5698SJohn Baldwin	addq	$16,%rax
70bc3d5698SJohn Baldwin
71bc3d5698SJohn Baldwin.Lenc_loop2:
72bc3d5698SJohn Baldwin.byte	102,15,56,220,209
73bc3d5698SJohn Baldwin.byte	102,15,56,220,217
74bc3d5698SJohn Baldwin	movups	(%rcx,%rax,1),%xmm1
75bc3d5698SJohn Baldwin	addq	$32,%rax
76bc3d5698SJohn Baldwin.byte	102,15,56,220,208
77bc3d5698SJohn Baldwin.byte	102,15,56,220,216
78bc3d5698SJohn Baldwin	movups	-16(%rcx,%rax,1),%xmm0
79bc3d5698SJohn Baldwin	jnz	.Lenc_loop2
80bc3d5698SJohn Baldwin
81bc3d5698SJohn Baldwin.byte	102,15,56,220,209
82bc3d5698SJohn Baldwin.byte	102,15,56,220,217
83bc3d5698SJohn Baldwin.byte	102,15,56,221,208
84bc3d5698SJohn Baldwin.byte	102,15,56,221,216
85bc3d5698SJohn Baldwin	.byte	0xf3,0xc3
86bc3d5698SJohn Baldwin.cfi_endproc
87bc3d5698SJohn Baldwin.size	_aesni_encrypt2,.-_aesni_encrypt2
88bc3d5698SJohn Baldwin.type	_aesni_decrypt2,@function
89bc3d5698SJohn Baldwin.align	16
90bc3d5698SJohn Baldwin_aesni_decrypt2:
91bc3d5698SJohn Baldwin.cfi_startproc
92bc3d5698SJohn Baldwin	movups	(%rcx),%xmm0
93bc3d5698SJohn Baldwin	shll	$4,%eax
94bc3d5698SJohn Baldwin	movups	16(%rcx),%xmm1
95bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
96bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm3
97bc3d5698SJohn Baldwin	movups	32(%rcx),%xmm0
98bc3d5698SJohn Baldwin	leaq	32(%rcx,%rax,1),%rcx
99bc3d5698SJohn Baldwin	negq	%rax
100bc3d5698SJohn Baldwin	addq	$16,%rax
101bc3d5698SJohn Baldwin
102bc3d5698SJohn Baldwin.Ldec_loop2:
103bc3d5698SJohn Baldwin.byte	102,15,56,222,209
104bc3d5698SJohn Baldwin.byte	102,15,56,222,217
105bc3d5698SJohn Baldwin	movups	(%rcx,%rax,1),%xmm1
106bc3d5698SJohn Baldwin	addq	$32,%rax
107bc3d5698SJohn Baldwin.byte	102,15,56,222,208
108bc3d5698SJohn Baldwin.byte	102,15,56,222,216
109bc3d5698SJohn Baldwin	movups	-16(%rcx,%rax,1),%xmm0
110bc3d5698SJohn Baldwin	jnz	.Ldec_loop2
111bc3d5698SJohn Baldwin
112bc3d5698SJohn Baldwin.byte	102,15,56,222,209
113bc3d5698SJohn Baldwin.byte	102,15,56,222,217
114bc3d5698SJohn Baldwin.byte	102,15,56,223,208
115bc3d5698SJohn Baldwin.byte	102,15,56,223,216
116bc3d5698SJohn Baldwin	.byte	0xf3,0xc3
117bc3d5698SJohn Baldwin.cfi_endproc
118bc3d5698SJohn Baldwin.size	_aesni_decrypt2,.-_aesni_decrypt2
119bc3d5698SJohn Baldwin.type	_aesni_encrypt3,@function
120bc3d5698SJohn Baldwin.align	16
121bc3d5698SJohn Baldwin_aesni_encrypt3:
122bc3d5698SJohn Baldwin.cfi_startproc
123bc3d5698SJohn Baldwin	movups	(%rcx),%xmm0
124bc3d5698SJohn Baldwin	shll	$4,%eax
125bc3d5698SJohn Baldwin	movups	16(%rcx),%xmm1
126bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
127bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm3
128bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm4
129bc3d5698SJohn Baldwin	movups	32(%rcx),%xmm0
130bc3d5698SJohn Baldwin	leaq	32(%rcx,%rax,1),%rcx
131bc3d5698SJohn Baldwin	negq	%rax
132bc3d5698SJohn Baldwin	addq	$16,%rax
133bc3d5698SJohn Baldwin
134bc3d5698SJohn Baldwin.Lenc_loop3:
135bc3d5698SJohn Baldwin.byte	102,15,56,220,209
136bc3d5698SJohn Baldwin.byte	102,15,56,220,217
137bc3d5698SJohn Baldwin.byte	102,15,56,220,225
138bc3d5698SJohn Baldwin	movups	(%rcx,%rax,1),%xmm1
139bc3d5698SJohn Baldwin	addq	$32,%rax
140bc3d5698SJohn Baldwin.byte	102,15,56,220,208
141bc3d5698SJohn Baldwin.byte	102,15,56,220,216
142bc3d5698SJohn Baldwin.byte	102,15,56,220,224
143bc3d5698SJohn Baldwin	movups	-16(%rcx,%rax,1),%xmm0
144bc3d5698SJohn Baldwin	jnz	.Lenc_loop3
145bc3d5698SJohn Baldwin
146bc3d5698SJohn Baldwin.byte	102,15,56,220,209
147bc3d5698SJohn Baldwin.byte	102,15,56,220,217
148bc3d5698SJohn Baldwin.byte	102,15,56,220,225
149bc3d5698SJohn Baldwin.byte	102,15,56,221,208
150bc3d5698SJohn Baldwin.byte	102,15,56,221,216
151bc3d5698SJohn Baldwin.byte	102,15,56,221,224
152bc3d5698SJohn Baldwin	.byte	0xf3,0xc3
153bc3d5698SJohn Baldwin.cfi_endproc
154bc3d5698SJohn Baldwin.size	_aesni_encrypt3,.-_aesni_encrypt3
155bc3d5698SJohn Baldwin.type	_aesni_decrypt3,@function
156bc3d5698SJohn Baldwin.align	16
157bc3d5698SJohn Baldwin_aesni_decrypt3:
158bc3d5698SJohn Baldwin.cfi_startproc
159bc3d5698SJohn Baldwin	movups	(%rcx),%xmm0
160bc3d5698SJohn Baldwin	shll	$4,%eax
161bc3d5698SJohn Baldwin	movups	16(%rcx),%xmm1
162bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
163bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm3
164bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm4
165bc3d5698SJohn Baldwin	movups	32(%rcx),%xmm0
166bc3d5698SJohn Baldwin	leaq	32(%rcx,%rax,1),%rcx
167bc3d5698SJohn Baldwin	negq	%rax
168bc3d5698SJohn Baldwin	addq	$16,%rax
169bc3d5698SJohn Baldwin
170bc3d5698SJohn Baldwin.Ldec_loop3:
171bc3d5698SJohn Baldwin.byte	102,15,56,222,209
172bc3d5698SJohn Baldwin.byte	102,15,56,222,217
173bc3d5698SJohn Baldwin.byte	102,15,56,222,225
174bc3d5698SJohn Baldwin	movups	(%rcx,%rax,1),%xmm1
175bc3d5698SJohn Baldwin	addq	$32,%rax
176bc3d5698SJohn Baldwin.byte	102,15,56,222,208
177bc3d5698SJohn Baldwin.byte	102,15,56,222,216
178bc3d5698SJohn Baldwin.byte	102,15,56,222,224
179bc3d5698SJohn Baldwin	movups	-16(%rcx,%rax,1),%xmm0
180bc3d5698SJohn Baldwin	jnz	.Ldec_loop3
181bc3d5698SJohn Baldwin
182bc3d5698SJohn Baldwin.byte	102,15,56,222,209
183bc3d5698SJohn Baldwin.byte	102,15,56,222,217
184bc3d5698SJohn Baldwin.byte	102,15,56,222,225
185bc3d5698SJohn Baldwin.byte	102,15,56,223,208
186bc3d5698SJohn Baldwin.byte	102,15,56,223,216
187bc3d5698SJohn Baldwin.byte	102,15,56,223,224
188bc3d5698SJohn Baldwin	.byte	0xf3,0xc3
189bc3d5698SJohn Baldwin.cfi_endproc
190bc3d5698SJohn Baldwin.size	_aesni_decrypt3,.-_aesni_decrypt3
191bc3d5698SJohn Baldwin.type	_aesni_encrypt4,@function
192bc3d5698SJohn Baldwin.align	16
193bc3d5698SJohn Baldwin_aesni_encrypt4:
194bc3d5698SJohn Baldwin.cfi_startproc
195bc3d5698SJohn Baldwin	movups	(%rcx),%xmm0
196bc3d5698SJohn Baldwin	shll	$4,%eax
197bc3d5698SJohn Baldwin	movups	16(%rcx),%xmm1
198bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
199bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm3
200bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm4
201bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm5
202bc3d5698SJohn Baldwin	movups	32(%rcx),%xmm0
203bc3d5698SJohn Baldwin	leaq	32(%rcx,%rax,1),%rcx
204bc3d5698SJohn Baldwin	negq	%rax
205bc3d5698SJohn Baldwin.byte	0x0f,0x1f,0x00
206bc3d5698SJohn Baldwin	addq	$16,%rax
207bc3d5698SJohn Baldwin
208bc3d5698SJohn Baldwin.Lenc_loop4:
209bc3d5698SJohn Baldwin.byte	102,15,56,220,209
210bc3d5698SJohn Baldwin.byte	102,15,56,220,217
211bc3d5698SJohn Baldwin.byte	102,15,56,220,225
212bc3d5698SJohn Baldwin.byte	102,15,56,220,233
213bc3d5698SJohn Baldwin	movups	(%rcx,%rax,1),%xmm1
214bc3d5698SJohn Baldwin	addq	$32,%rax
215bc3d5698SJohn Baldwin.byte	102,15,56,220,208
216bc3d5698SJohn Baldwin.byte	102,15,56,220,216
217bc3d5698SJohn Baldwin.byte	102,15,56,220,224
218bc3d5698SJohn Baldwin.byte	102,15,56,220,232
219bc3d5698SJohn Baldwin	movups	-16(%rcx,%rax,1),%xmm0
220bc3d5698SJohn Baldwin	jnz	.Lenc_loop4
221bc3d5698SJohn Baldwin
222bc3d5698SJohn Baldwin.byte	102,15,56,220,209
223bc3d5698SJohn Baldwin.byte	102,15,56,220,217
224bc3d5698SJohn Baldwin.byte	102,15,56,220,225
225bc3d5698SJohn Baldwin.byte	102,15,56,220,233
226bc3d5698SJohn Baldwin.byte	102,15,56,221,208
227bc3d5698SJohn Baldwin.byte	102,15,56,221,216
228bc3d5698SJohn Baldwin.byte	102,15,56,221,224
229bc3d5698SJohn Baldwin.byte	102,15,56,221,232
230bc3d5698SJohn Baldwin	.byte	0xf3,0xc3
231bc3d5698SJohn Baldwin.cfi_endproc
232bc3d5698SJohn Baldwin.size	_aesni_encrypt4,.-_aesni_encrypt4
233bc3d5698SJohn Baldwin.type	_aesni_decrypt4,@function
234bc3d5698SJohn Baldwin.align	16
235bc3d5698SJohn Baldwin_aesni_decrypt4:
236bc3d5698SJohn Baldwin.cfi_startproc
237bc3d5698SJohn Baldwin	movups	(%rcx),%xmm0
238bc3d5698SJohn Baldwin	shll	$4,%eax
239bc3d5698SJohn Baldwin	movups	16(%rcx),%xmm1
240bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
241bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm3
242bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm4
243bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm5
244bc3d5698SJohn Baldwin	movups	32(%rcx),%xmm0
245bc3d5698SJohn Baldwin	leaq	32(%rcx,%rax,1),%rcx
246bc3d5698SJohn Baldwin	negq	%rax
247bc3d5698SJohn Baldwin.byte	0x0f,0x1f,0x00
248bc3d5698SJohn Baldwin	addq	$16,%rax
249bc3d5698SJohn Baldwin
250bc3d5698SJohn Baldwin.Ldec_loop4:
251bc3d5698SJohn Baldwin.byte	102,15,56,222,209
252bc3d5698SJohn Baldwin.byte	102,15,56,222,217
253bc3d5698SJohn Baldwin.byte	102,15,56,222,225
254bc3d5698SJohn Baldwin.byte	102,15,56,222,233
255bc3d5698SJohn Baldwin	movups	(%rcx,%rax,1),%xmm1
256bc3d5698SJohn Baldwin	addq	$32,%rax
257bc3d5698SJohn Baldwin.byte	102,15,56,222,208
258bc3d5698SJohn Baldwin.byte	102,15,56,222,216
259bc3d5698SJohn Baldwin.byte	102,15,56,222,224
260bc3d5698SJohn Baldwin.byte	102,15,56,222,232
261bc3d5698SJohn Baldwin	movups	-16(%rcx,%rax,1),%xmm0
262bc3d5698SJohn Baldwin	jnz	.Ldec_loop4
263bc3d5698SJohn Baldwin
264bc3d5698SJohn Baldwin.byte	102,15,56,222,209
265bc3d5698SJohn Baldwin.byte	102,15,56,222,217
266bc3d5698SJohn Baldwin.byte	102,15,56,222,225
267bc3d5698SJohn Baldwin.byte	102,15,56,222,233
268bc3d5698SJohn Baldwin.byte	102,15,56,223,208
269bc3d5698SJohn Baldwin.byte	102,15,56,223,216
270bc3d5698SJohn Baldwin.byte	102,15,56,223,224
271bc3d5698SJohn Baldwin.byte	102,15,56,223,232
272bc3d5698SJohn Baldwin	.byte	0xf3,0xc3
273bc3d5698SJohn Baldwin.cfi_endproc
274bc3d5698SJohn Baldwin.size	_aesni_decrypt4,.-_aesni_decrypt4
275bc3d5698SJohn Baldwin.type	_aesni_encrypt6,@function
276bc3d5698SJohn Baldwin.align	16
277bc3d5698SJohn Baldwin_aesni_encrypt6:
278bc3d5698SJohn Baldwin.cfi_startproc
279bc3d5698SJohn Baldwin	movups	(%rcx),%xmm0
280bc3d5698SJohn Baldwin	shll	$4,%eax
281bc3d5698SJohn Baldwin	movups	16(%rcx),%xmm1
282bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
283bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm3
284bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm4
285bc3d5698SJohn Baldwin.byte	102,15,56,220,209
286bc3d5698SJohn Baldwin	leaq	32(%rcx,%rax,1),%rcx
287bc3d5698SJohn Baldwin	negq	%rax
288bc3d5698SJohn Baldwin.byte	102,15,56,220,217
289bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm5
290bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm6
291bc3d5698SJohn Baldwin.byte	102,15,56,220,225
292bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm7
293bc3d5698SJohn Baldwin	movups	(%rcx,%rax,1),%xmm0
294bc3d5698SJohn Baldwin	addq	$16,%rax
295bc3d5698SJohn Baldwin	jmp	.Lenc_loop6_enter
296bc3d5698SJohn Baldwin.align	16
297bc3d5698SJohn Baldwin.Lenc_loop6:
298bc3d5698SJohn Baldwin.byte	102,15,56,220,209
299bc3d5698SJohn Baldwin.byte	102,15,56,220,217
300bc3d5698SJohn Baldwin.byte	102,15,56,220,225
301bc3d5698SJohn Baldwin.Lenc_loop6_enter:
302bc3d5698SJohn Baldwin.byte	102,15,56,220,233
303bc3d5698SJohn Baldwin.byte	102,15,56,220,241
304bc3d5698SJohn Baldwin.byte	102,15,56,220,249
305bc3d5698SJohn Baldwin	movups	(%rcx,%rax,1),%xmm1
306bc3d5698SJohn Baldwin	addq	$32,%rax
307bc3d5698SJohn Baldwin.byte	102,15,56,220,208
308bc3d5698SJohn Baldwin.byte	102,15,56,220,216
309bc3d5698SJohn Baldwin.byte	102,15,56,220,224
310bc3d5698SJohn Baldwin.byte	102,15,56,220,232
311bc3d5698SJohn Baldwin.byte	102,15,56,220,240
312bc3d5698SJohn Baldwin.byte	102,15,56,220,248
313bc3d5698SJohn Baldwin	movups	-16(%rcx,%rax,1),%xmm0
314bc3d5698SJohn Baldwin	jnz	.Lenc_loop6
315bc3d5698SJohn Baldwin
316bc3d5698SJohn Baldwin.byte	102,15,56,220,209
317bc3d5698SJohn Baldwin.byte	102,15,56,220,217
318bc3d5698SJohn Baldwin.byte	102,15,56,220,225
319bc3d5698SJohn Baldwin.byte	102,15,56,220,233
320bc3d5698SJohn Baldwin.byte	102,15,56,220,241
321bc3d5698SJohn Baldwin.byte	102,15,56,220,249
322bc3d5698SJohn Baldwin.byte	102,15,56,221,208
323bc3d5698SJohn Baldwin.byte	102,15,56,221,216
324bc3d5698SJohn Baldwin.byte	102,15,56,221,224
325bc3d5698SJohn Baldwin.byte	102,15,56,221,232
326bc3d5698SJohn Baldwin.byte	102,15,56,221,240
327bc3d5698SJohn Baldwin.byte	102,15,56,221,248
328bc3d5698SJohn Baldwin	.byte	0xf3,0xc3
329bc3d5698SJohn Baldwin.cfi_endproc
330bc3d5698SJohn Baldwin.size	_aesni_encrypt6,.-_aesni_encrypt6
331bc3d5698SJohn Baldwin.type	_aesni_decrypt6,@function
332bc3d5698SJohn Baldwin.align	16
333bc3d5698SJohn Baldwin_aesni_decrypt6:
334bc3d5698SJohn Baldwin.cfi_startproc
335bc3d5698SJohn Baldwin	movups	(%rcx),%xmm0
336bc3d5698SJohn Baldwin	shll	$4,%eax
337bc3d5698SJohn Baldwin	movups	16(%rcx),%xmm1
338bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
339bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm3
340bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm4
341bc3d5698SJohn Baldwin.byte	102,15,56,222,209
342bc3d5698SJohn Baldwin	leaq	32(%rcx,%rax,1),%rcx
343bc3d5698SJohn Baldwin	negq	%rax
344bc3d5698SJohn Baldwin.byte	102,15,56,222,217
345bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm5
346bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm6
347bc3d5698SJohn Baldwin.byte	102,15,56,222,225
348bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm7
349bc3d5698SJohn Baldwin	movups	(%rcx,%rax,1),%xmm0
350bc3d5698SJohn Baldwin	addq	$16,%rax
351bc3d5698SJohn Baldwin	jmp	.Ldec_loop6_enter
352bc3d5698SJohn Baldwin.align	16
353bc3d5698SJohn Baldwin.Ldec_loop6:
354bc3d5698SJohn Baldwin.byte	102,15,56,222,209
355bc3d5698SJohn Baldwin.byte	102,15,56,222,217
356bc3d5698SJohn Baldwin.byte	102,15,56,222,225
357bc3d5698SJohn Baldwin.Ldec_loop6_enter:
358bc3d5698SJohn Baldwin.byte	102,15,56,222,233
359bc3d5698SJohn Baldwin.byte	102,15,56,222,241
360bc3d5698SJohn Baldwin.byte	102,15,56,222,249
361bc3d5698SJohn Baldwin	movups	(%rcx,%rax,1),%xmm1
362bc3d5698SJohn Baldwin	addq	$32,%rax
363bc3d5698SJohn Baldwin.byte	102,15,56,222,208
364bc3d5698SJohn Baldwin.byte	102,15,56,222,216
365bc3d5698SJohn Baldwin.byte	102,15,56,222,224
366bc3d5698SJohn Baldwin.byte	102,15,56,222,232
367bc3d5698SJohn Baldwin.byte	102,15,56,222,240
368bc3d5698SJohn Baldwin.byte	102,15,56,222,248
369bc3d5698SJohn Baldwin	movups	-16(%rcx,%rax,1),%xmm0
370bc3d5698SJohn Baldwin	jnz	.Ldec_loop6
371bc3d5698SJohn Baldwin
372bc3d5698SJohn Baldwin.byte	102,15,56,222,209
373bc3d5698SJohn Baldwin.byte	102,15,56,222,217
374bc3d5698SJohn Baldwin.byte	102,15,56,222,225
375bc3d5698SJohn Baldwin.byte	102,15,56,222,233
376bc3d5698SJohn Baldwin.byte	102,15,56,222,241
377bc3d5698SJohn Baldwin.byte	102,15,56,222,249
378bc3d5698SJohn Baldwin.byte	102,15,56,223,208
379bc3d5698SJohn Baldwin.byte	102,15,56,223,216
380bc3d5698SJohn Baldwin.byte	102,15,56,223,224
381bc3d5698SJohn Baldwin.byte	102,15,56,223,232
382bc3d5698SJohn Baldwin.byte	102,15,56,223,240
383bc3d5698SJohn Baldwin.byte	102,15,56,223,248
384bc3d5698SJohn Baldwin	.byte	0xf3,0xc3
385bc3d5698SJohn Baldwin.cfi_endproc
386bc3d5698SJohn Baldwin.size	_aesni_decrypt6,.-_aesni_decrypt6
387bc3d5698SJohn Baldwin.type	_aesni_encrypt8,@function
388bc3d5698SJohn Baldwin.align	16
389bc3d5698SJohn Baldwin_aesni_encrypt8:
390bc3d5698SJohn Baldwin.cfi_startproc
391bc3d5698SJohn Baldwin	movups	(%rcx),%xmm0
392bc3d5698SJohn Baldwin	shll	$4,%eax
393bc3d5698SJohn Baldwin	movups	16(%rcx),%xmm1
394bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
395bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm3
396bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm4
397bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm5
398bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm6
399bc3d5698SJohn Baldwin	leaq	32(%rcx,%rax,1),%rcx
400bc3d5698SJohn Baldwin	negq	%rax
401bc3d5698SJohn Baldwin.byte	102,15,56,220,209
402bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm7
403bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm8
404bc3d5698SJohn Baldwin.byte	102,15,56,220,217
405bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm9
406bc3d5698SJohn Baldwin	movups	(%rcx,%rax,1),%xmm0
407bc3d5698SJohn Baldwin	addq	$16,%rax
408bc3d5698SJohn Baldwin	jmp	.Lenc_loop8_inner
409bc3d5698SJohn Baldwin.align	16
410bc3d5698SJohn Baldwin.Lenc_loop8:
411bc3d5698SJohn Baldwin.byte	102,15,56,220,209
412bc3d5698SJohn Baldwin.byte	102,15,56,220,217
413bc3d5698SJohn Baldwin.Lenc_loop8_inner:
414bc3d5698SJohn Baldwin.byte	102,15,56,220,225
415bc3d5698SJohn Baldwin.byte	102,15,56,220,233
416bc3d5698SJohn Baldwin.byte	102,15,56,220,241
417bc3d5698SJohn Baldwin.byte	102,15,56,220,249
418bc3d5698SJohn Baldwin.byte	102,68,15,56,220,193
419bc3d5698SJohn Baldwin.byte	102,68,15,56,220,201
420bc3d5698SJohn Baldwin.Lenc_loop8_enter:
421bc3d5698SJohn Baldwin	movups	(%rcx,%rax,1),%xmm1
422bc3d5698SJohn Baldwin	addq	$32,%rax
423bc3d5698SJohn Baldwin.byte	102,15,56,220,208
424bc3d5698SJohn Baldwin.byte	102,15,56,220,216
425bc3d5698SJohn Baldwin.byte	102,15,56,220,224
426bc3d5698SJohn Baldwin.byte	102,15,56,220,232
427bc3d5698SJohn Baldwin.byte	102,15,56,220,240
428bc3d5698SJohn Baldwin.byte	102,15,56,220,248
429bc3d5698SJohn Baldwin.byte	102,68,15,56,220,192
430bc3d5698SJohn Baldwin.byte	102,68,15,56,220,200
431bc3d5698SJohn Baldwin	movups	-16(%rcx,%rax,1),%xmm0
432bc3d5698SJohn Baldwin	jnz	.Lenc_loop8
433bc3d5698SJohn Baldwin
434bc3d5698SJohn Baldwin.byte	102,15,56,220,209
435bc3d5698SJohn Baldwin.byte	102,15,56,220,217
436bc3d5698SJohn Baldwin.byte	102,15,56,220,225
437bc3d5698SJohn Baldwin.byte	102,15,56,220,233
438bc3d5698SJohn Baldwin.byte	102,15,56,220,241
439bc3d5698SJohn Baldwin.byte	102,15,56,220,249
440bc3d5698SJohn Baldwin.byte	102,68,15,56,220,193
441bc3d5698SJohn Baldwin.byte	102,68,15,56,220,201
442bc3d5698SJohn Baldwin.byte	102,15,56,221,208
443bc3d5698SJohn Baldwin.byte	102,15,56,221,216
444bc3d5698SJohn Baldwin.byte	102,15,56,221,224
445bc3d5698SJohn Baldwin.byte	102,15,56,221,232
446bc3d5698SJohn Baldwin.byte	102,15,56,221,240
447bc3d5698SJohn Baldwin.byte	102,15,56,221,248
448bc3d5698SJohn Baldwin.byte	102,68,15,56,221,192
449bc3d5698SJohn Baldwin.byte	102,68,15,56,221,200
450bc3d5698SJohn Baldwin	.byte	0xf3,0xc3
451bc3d5698SJohn Baldwin.cfi_endproc
452bc3d5698SJohn Baldwin.size	_aesni_encrypt8,.-_aesni_encrypt8
453bc3d5698SJohn Baldwin.type	_aesni_decrypt8,@function
454bc3d5698SJohn Baldwin.align	16
455bc3d5698SJohn Baldwin_aesni_decrypt8:
456bc3d5698SJohn Baldwin.cfi_startproc
457bc3d5698SJohn Baldwin	movups	(%rcx),%xmm0
458bc3d5698SJohn Baldwin	shll	$4,%eax
459bc3d5698SJohn Baldwin	movups	16(%rcx),%xmm1
460bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
461bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm3
462bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm4
463bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm5
464bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm6
465bc3d5698SJohn Baldwin	leaq	32(%rcx,%rax,1),%rcx
466bc3d5698SJohn Baldwin	negq	%rax
467bc3d5698SJohn Baldwin.byte	102,15,56,222,209
468bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm7
469bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm8
470bc3d5698SJohn Baldwin.byte	102,15,56,222,217
471bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm9
472bc3d5698SJohn Baldwin	movups	(%rcx,%rax,1),%xmm0
473bc3d5698SJohn Baldwin	addq	$16,%rax
474bc3d5698SJohn Baldwin	jmp	.Ldec_loop8_inner
475bc3d5698SJohn Baldwin.align	16
476bc3d5698SJohn Baldwin.Ldec_loop8:
477bc3d5698SJohn Baldwin.byte	102,15,56,222,209
478bc3d5698SJohn Baldwin.byte	102,15,56,222,217
479bc3d5698SJohn Baldwin.Ldec_loop8_inner:
480bc3d5698SJohn Baldwin.byte	102,15,56,222,225
481bc3d5698SJohn Baldwin.byte	102,15,56,222,233
482bc3d5698SJohn Baldwin.byte	102,15,56,222,241
483bc3d5698SJohn Baldwin.byte	102,15,56,222,249
484bc3d5698SJohn Baldwin.byte	102,68,15,56,222,193
485bc3d5698SJohn Baldwin.byte	102,68,15,56,222,201
486bc3d5698SJohn Baldwin.Ldec_loop8_enter:
487bc3d5698SJohn Baldwin	movups	(%rcx,%rax,1),%xmm1
488bc3d5698SJohn Baldwin	addq	$32,%rax
489bc3d5698SJohn Baldwin.byte	102,15,56,222,208
490bc3d5698SJohn Baldwin.byte	102,15,56,222,216
491bc3d5698SJohn Baldwin.byte	102,15,56,222,224
492bc3d5698SJohn Baldwin.byte	102,15,56,222,232
493bc3d5698SJohn Baldwin.byte	102,15,56,222,240
494bc3d5698SJohn Baldwin.byte	102,15,56,222,248
495bc3d5698SJohn Baldwin.byte	102,68,15,56,222,192
496bc3d5698SJohn Baldwin.byte	102,68,15,56,222,200
497bc3d5698SJohn Baldwin	movups	-16(%rcx,%rax,1),%xmm0
498bc3d5698SJohn Baldwin	jnz	.Ldec_loop8
499bc3d5698SJohn Baldwin
500bc3d5698SJohn Baldwin.byte	102,15,56,222,209
501bc3d5698SJohn Baldwin.byte	102,15,56,222,217
502bc3d5698SJohn Baldwin.byte	102,15,56,222,225
503bc3d5698SJohn Baldwin.byte	102,15,56,222,233
504bc3d5698SJohn Baldwin.byte	102,15,56,222,241
505bc3d5698SJohn Baldwin.byte	102,15,56,222,249
506bc3d5698SJohn Baldwin.byte	102,68,15,56,222,193
507bc3d5698SJohn Baldwin.byte	102,68,15,56,222,201
508bc3d5698SJohn Baldwin.byte	102,15,56,223,208
509bc3d5698SJohn Baldwin.byte	102,15,56,223,216
510bc3d5698SJohn Baldwin.byte	102,15,56,223,224
511bc3d5698SJohn Baldwin.byte	102,15,56,223,232
512bc3d5698SJohn Baldwin.byte	102,15,56,223,240
513bc3d5698SJohn Baldwin.byte	102,15,56,223,248
514bc3d5698SJohn Baldwin.byte	102,68,15,56,223,192
515bc3d5698SJohn Baldwin.byte	102,68,15,56,223,200
516bc3d5698SJohn Baldwin	.byte	0xf3,0xc3
517bc3d5698SJohn Baldwin.cfi_endproc
518bc3d5698SJohn Baldwin.size	_aesni_decrypt8,.-_aesni_decrypt8
519bc3d5698SJohn Baldwin.globl	aesni_ecb_encrypt
520bc3d5698SJohn Baldwin.type	aesni_ecb_encrypt,@function
521bc3d5698SJohn Baldwin.align	16
522bc3d5698SJohn Baldwinaesni_ecb_encrypt:
523bc3d5698SJohn Baldwin.cfi_startproc
524*c0855eaaSJohn Baldwin.byte	243,15,30,250
525bc3d5698SJohn Baldwin	andq	$-16,%rdx
526bc3d5698SJohn Baldwin	jz	.Lecb_ret
527bc3d5698SJohn Baldwin
528bc3d5698SJohn Baldwin	movl	240(%rcx),%eax
529bc3d5698SJohn Baldwin	movups	(%rcx),%xmm0
530bc3d5698SJohn Baldwin	movq	%rcx,%r11
531bc3d5698SJohn Baldwin	movl	%eax,%r10d
532bc3d5698SJohn Baldwin	testl	%r8d,%r8d
533bc3d5698SJohn Baldwin	jz	.Lecb_decrypt
534bc3d5698SJohn Baldwin
535bc3d5698SJohn Baldwin	cmpq	$0x80,%rdx
536bc3d5698SJohn Baldwin	jb	.Lecb_enc_tail
537bc3d5698SJohn Baldwin
538bc3d5698SJohn Baldwin	movdqu	(%rdi),%xmm2
539bc3d5698SJohn Baldwin	movdqu	16(%rdi),%xmm3
540bc3d5698SJohn Baldwin	movdqu	32(%rdi),%xmm4
541bc3d5698SJohn Baldwin	movdqu	48(%rdi),%xmm5
542bc3d5698SJohn Baldwin	movdqu	64(%rdi),%xmm6
543bc3d5698SJohn Baldwin	movdqu	80(%rdi),%xmm7
544bc3d5698SJohn Baldwin	movdqu	96(%rdi),%xmm8
545bc3d5698SJohn Baldwin	movdqu	112(%rdi),%xmm9
546bc3d5698SJohn Baldwin	leaq	128(%rdi),%rdi
547bc3d5698SJohn Baldwin	subq	$0x80,%rdx
548bc3d5698SJohn Baldwin	jmp	.Lecb_enc_loop8_enter
549bc3d5698SJohn Baldwin.align	16
550bc3d5698SJohn Baldwin.Lecb_enc_loop8:
551bc3d5698SJohn Baldwin	movups	%xmm2,(%rsi)
552bc3d5698SJohn Baldwin	movq	%r11,%rcx
553bc3d5698SJohn Baldwin	movdqu	(%rdi),%xmm2
554bc3d5698SJohn Baldwin	movl	%r10d,%eax
555bc3d5698SJohn Baldwin	movups	%xmm3,16(%rsi)
556bc3d5698SJohn Baldwin	movdqu	16(%rdi),%xmm3
557bc3d5698SJohn Baldwin	movups	%xmm4,32(%rsi)
558bc3d5698SJohn Baldwin	movdqu	32(%rdi),%xmm4
559bc3d5698SJohn Baldwin	movups	%xmm5,48(%rsi)
560bc3d5698SJohn Baldwin	movdqu	48(%rdi),%xmm5
561bc3d5698SJohn Baldwin	movups	%xmm6,64(%rsi)
562bc3d5698SJohn Baldwin	movdqu	64(%rdi),%xmm6
563bc3d5698SJohn Baldwin	movups	%xmm7,80(%rsi)
564bc3d5698SJohn Baldwin	movdqu	80(%rdi),%xmm7
565bc3d5698SJohn Baldwin	movups	%xmm8,96(%rsi)
566bc3d5698SJohn Baldwin	movdqu	96(%rdi),%xmm8
567bc3d5698SJohn Baldwin	movups	%xmm9,112(%rsi)
568bc3d5698SJohn Baldwin	leaq	128(%rsi),%rsi
569bc3d5698SJohn Baldwin	movdqu	112(%rdi),%xmm9
570bc3d5698SJohn Baldwin	leaq	128(%rdi),%rdi
571bc3d5698SJohn Baldwin.Lecb_enc_loop8_enter:
572bc3d5698SJohn Baldwin
573bc3d5698SJohn Baldwin	call	_aesni_encrypt8
574bc3d5698SJohn Baldwin
575bc3d5698SJohn Baldwin	subq	$0x80,%rdx
576bc3d5698SJohn Baldwin	jnc	.Lecb_enc_loop8
577bc3d5698SJohn Baldwin
578bc3d5698SJohn Baldwin	movups	%xmm2,(%rsi)
579bc3d5698SJohn Baldwin	movq	%r11,%rcx
580bc3d5698SJohn Baldwin	movups	%xmm3,16(%rsi)
581bc3d5698SJohn Baldwin	movl	%r10d,%eax
582bc3d5698SJohn Baldwin	movups	%xmm4,32(%rsi)
583bc3d5698SJohn Baldwin	movups	%xmm5,48(%rsi)
584bc3d5698SJohn Baldwin	movups	%xmm6,64(%rsi)
585bc3d5698SJohn Baldwin	movups	%xmm7,80(%rsi)
586bc3d5698SJohn Baldwin	movups	%xmm8,96(%rsi)
587bc3d5698SJohn Baldwin	movups	%xmm9,112(%rsi)
588bc3d5698SJohn Baldwin	leaq	128(%rsi),%rsi
589bc3d5698SJohn Baldwin	addq	$0x80,%rdx
590bc3d5698SJohn Baldwin	jz	.Lecb_ret
591bc3d5698SJohn Baldwin
592bc3d5698SJohn Baldwin.Lecb_enc_tail:
593bc3d5698SJohn Baldwin	movups	(%rdi),%xmm2
594bc3d5698SJohn Baldwin	cmpq	$0x20,%rdx
595bc3d5698SJohn Baldwin	jb	.Lecb_enc_one
596bc3d5698SJohn Baldwin	movups	16(%rdi),%xmm3
597bc3d5698SJohn Baldwin	je	.Lecb_enc_two
598bc3d5698SJohn Baldwin	movups	32(%rdi),%xmm4
599bc3d5698SJohn Baldwin	cmpq	$0x40,%rdx
600bc3d5698SJohn Baldwin	jb	.Lecb_enc_three
601bc3d5698SJohn Baldwin	movups	48(%rdi),%xmm5
602bc3d5698SJohn Baldwin	je	.Lecb_enc_four
603bc3d5698SJohn Baldwin	movups	64(%rdi),%xmm6
604bc3d5698SJohn Baldwin	cmpq	$0x60,%rdx
605bc3d5698SJohn Baldwin	jb	.Lecb_enc_five
606bc3d5698SJohn Baldwin	movups	80(%rdi),%xmm7
607bc3d5698SJohn Baldwin	je	.Lecb_enc_six
608bc3d5698SJohn Baldwin	movdqu	96(%rdi),%xmm8
609bc3d5698SJohn Baldwin	xorps	%xmm9,%xmm9
610bc3d5698SJohn Baldwin	call	_aesni_encrypt8
611bc3d5698SJohn Baldwin	movups	%xmm2,(%rsi)
612bc3d5698SJohn Baldwin	movups	%xmm3,16(%rsi)
613bc3d5698SJohn Baldwin	movups	%xmm4,32(%rsi)
614bc3d5698SJohn Baldwin	movups	%xmm5,48(%rsi)
615bc3d5698SJohn Baldwin	movups	%xmm6,64(%rsi)
616bc3d5698SJohn Baldwin	movups	%xmm7,80(%rsi)
617bc3d5698SJohn Baldwin	movups	%xmm8,96(%rsi)
618bc3d5698SJohn Baldwin	jmp	.Lecb_ret
619bc3d5698SJohn Baldwin.align	16
620bc3d5698SJohn Baldwin.Lecb_enc_one:
621bc3d5698SJohn Baldwin	movups	(%rcx),%xmm0
622bc3d5698SJohn Baldwin	movups	16(%rcx),%xmm1
623bc3d5698SJohn Baldwin	leaq	32(%rcx),%rcx
624bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
625bc3d5698SJohn Baldwin.Loop_enc1_3:
626bc3d5698SJohn Baldwin.byte	102,15,56,220,209
627bc3d5698SJohn Baldwin	decl	%eax
628bc3d5698SJohn Baldwin	movups	(%rcx),%xmm1
629bc3d5698SJohn Baldwin	leaq	16(%rcx),%rcx
630bc3d5698SJohn Baldwin	jnz	.Loop_enc1_3
631bc3d5698SJohn Baldwin.byte	102,15,56,221,209
632bc3d5698SJohn Baldwin	movups	%xmm2,(%rsi)
633bc3d5698SJohn Baldwin	jmp	.Lecb_ret
634bc3d5698SJohn Baldwin.align	16
635bc3d5698SJohn Baldwin.Lecb_enc_two:
636bc3d5698SJohn Baldwin	call	_aesni_encrypt2
637bc3d5698SJohn Baldwin	movups	%xmm2,(%rsi)
638bc3d5698SJohn Baldwin	movups	%xmm3,16(%rsi)
639bc3d5698SJohn Baldwin	jmp	.Lecb_ret
640bc3d5698SJohn Baldwin.align	16
641bc3d5698SJohn Baldwin.Lecb_enc_three:
642bc3d5698SJohn Baldwin	call	_aesni_encrypt3
643bc3d5698SJohn Baldwin	movups	%xmm2,(%rsi)
644bc3d5698SJohn Baldwin	movups	%xmm3,16(%rsi)
645bc3d5698SJohn Baldwin	movups	%xmm4,32(%rsi)
646bc3d5698SJohn Baldwin	jmp	.Lecb_ret
647bc3d5698SJohn Baldwin.align	16
648bc3d5698SJohn Baldwin.Lecb_enc_four:
649bc3d5698SJohn Baldwin	call	_aesni_encrypt4
650bc3d5698SJohn Baldwin	movups	%xmm2,(%rsi)
651bc3d5698SJohn Baldwin	movups	%xmm3,16(%rsi)
652bc3d5698SJohn Baldwin	movups	%xmm4,32(%rsi)
653bc3d5698SJohn Baldwin	movups	%xmm5,48(%rsi)
654bc3d5698SJohn Baldwin	jmp	.Lecb_ret
655bc3d5698SJohn Baldwin.align	16
656bc3d5698SJohn Baldwin.Lecb_enc_five:
657bc3d5698SJohn Baldwin	xorps	%xmm7,%xmm7
658bc3d5698SJohn Baldwin	call	_aesni_encrypt6
659bc3d5698SJohn Baldwin	movups	%xmm2,(%rsi)
660bc3d5698SJohn Baldwin	movups	%xmm3,16(%rsi)
661bc3d5698SJohn Baldwin	movups	%xmm4,32(%rsi)
662bc3d5698SJohn Baldwin	movups	%xmm5,48(%rsi)
663bc3d5698SJohn Baldwin	movups	%xmm6,64(%rsi)
664bc3d5698SJohn Baldwin	jmp	.Lecb_ret
665bc3d5698SJohn Baldwin.align	16
666bc3d5698SJohn Baldwin.Lecb_enc_six:
667bc3d5698SJohn Baldwin	call	_aesni_encrypt6
668bc3d5698SJohn Baldwin	movups	%xmm2,(%rsi)
669bc3d5698SJohn Baldwin	movups	%xmm3,16(%rsi)
670bc3d5698SJohn Baldwin	movups	%xmm4,32(%rsi)
671bc3d5698SJohn Baldwin	movups	%xmm5,48(%rsi)
672bc3d5698SJohn Baldwin	movups	%xmm6,64(%rsi)
673bc3d5698SJohn Baldwin	movups	%xmm7,80(%rsi)
674bc3d5698SJohn Baldwin	jmp	.Lecb_ret
675bc3d5698SJohn Baldwin
676bc3d5698SJohn Baldwin.align	16
677bc3d5698SJohn Baldwin.Lecb_decrypt:
678bc3d5698SJohn Baldwin	cmpq	$0x80,%rdx
679bc3d5698SJohn Baldwin	jb	.Lecb_dec_tail
680bc3d5698SJohn Baldwin
681bc3d5698SJohn Baldwin	movdqu	(%rdi),%xmm2
682bc3d5698SJohn Baldwin	movdqu	16(%rdi),%xmm3
683bc3d5698SJohn Baldwin	movdqu	32(%rdi),%xmm4
684bc3d5698SJohn Baldwin	movdqu	48(%rdi),%xmm5
685bc3d5698SJohn Baldwin	movdqu	64(%rdi),%xmm6
686bc3d5698SJohn Baldwin	movdqu	80(%rdi),%xmm7
687bc3d5698SJohn Baldwin	movdqu	96(%rdi),%xmm8
688bc3d5698SJohn Baldwin	movdqu	112(%rdi),%xmm9
689bc3d5698SJohn Baldwin	leaq	128(%rdi),%rdi
690bc3d5698SJohn Baldwin	subq	$0x80,%rdx
691bc3d5698SJohn Baldwin	jmp	.Lecb_dec_loop8_enter
692bc3d5698SJohn Baldwin.align	16
693bc3d5698SJohn Baldwin.Lecb_dec_loop8:
694bc3d5698SJohn Baldwin	movups	%xmm2,(%rsi)
695bc3d5698SJohn Baldwin	movq	%r11,%rcx
696bc3d5698SJohn Baldwin	movdqu	(%rdi),%xmm2
697bc3d5698SJohn Baldwin	movl	%r10d,%eax
698bc3d5698SJohn Baldwin	movups	%xmm3,16(%rsi)
699bc3d5698SJohn Baldwin	movdqu	16(%rdi),%xmm3
700bc3d5698SJohn Baldwin	movups	%xmm4,32(%rsi)
701bc3d5698SJohn Baldwin	movdqu	32(%rdi),%xmm4
702bc3d5698SJohn Baldwin	movups	%xmm5,48(%rsi)
703bc3d5698SJohn Baldwin	movdqu	48(%rdi),%xmm5
704bc3d5698SJohn Baldwin	movups	%xmm6,64(%rsi)
705bc3d5698SJohn Baldwin	movdqu	64(%rdi),%xmm6
706bc3d5698SJohn Baldwin	movups	%xmm7,80(%rsi)
707bc3d5698SJohn Baldwin	movdqu	80(%rdi),%xmm7
708bc3d5698SJohn Baldwin	movups	%xmm8,96(%rsi)
709bc3d5698SJohn Baldwin	movdqu	96(%rdi),%xmm8
710bc3d5698SJohn Baldwin	movups	%xmm9,112(%rsi)
711bc3d5698SJohn Baldwin	leaq	128(%rsi),%rsi
712bc3d5698SJohn Baldwin	movdqu	112(%rdi),%xmm9
713bc3d5698SJohn Baldwin	leaq	128(%rdi),%rdi
714bc3d5698SJohn Baldwin.Lecb_dec_loop8_enter:
715bc3d5698SJohn Baldwin
716bc3d5698SJohn Baldwin	call	_aesni_decrypt8
717bc3d5698SJohn Baldwin
718bc3d5698SJohn Baldwin	movups	(%r11),%xmm0
719bc3d5698SJohn Baldwin	subq	$0x80,%rdx
720bc3d5698SJohn Baldwin	jnc	.Lecb_dec_loop8
721bc3d5698SJohn Baldwin
722bc3d5698SJohn Baldwin	movups	%xmm2,(%rsi)
723bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm2
724bc3d5698SJohn Baldwin	movq	%r11,%rcx
725bc3d5698SJohn Baldwin	movups	%xmm3,16(%rsi)
726bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm3
727bc3d5698SJohn Baldwin	movl	%r10d,%eax
728bc3d5698SJohn Baldwin	movups	%xmm4,32(%rsi)
729bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm4
730bc3d5698SJohn Baldwin	movups	%xmm5,48(%rsi)
731bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm5
732bc3d5698SJohn Baldwin	movups	%xmm6,64(%rsi)
733bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm6
734bc3d5698SJohn Baldwin	movups	%xmm7,80(%rsi)
735bc3d5698SJohn Baldwin	pxor	%xmm7,%xmm7
736bc3d5698SJohn Baldwin	movups	%xmm8,96(%rsi)
737bc3d5698SJohn Baldwin	pxor	%xmm8,%xmm8
738bc3d5698SJohn Baldwin	movups	%xmm9,112(%rsi)
739bc3d5698SJohn Baldwin	pxor	%xmm9,%xmm9
740bc3d5698SJohn Baldwin	leaq	128(%rsi),%rsi
741bc3d5698SJohn Baldwin	addq	$0x80,%rdx
742bc3d5698SJohn Baldwin	jz	.Lecb_ret
743bc3d5698SJohn Baldwin
744bc3d5698SJohn Baldwin.Lecb_dec_tail:
745bc3d5698SJohn Baldwin	movups	(%rdi),%xmm2
746bc3d5698SJohn Baldwin	cmpq	$0x20,%rdx
747bc3d5698SJohn Baldwin	jb	.Lecb_dec_one
748bc3d5698SJohn Baldwin	movups	16(%rdi),%xmm3
749bc3d5698SJohn Baldwin	je	.Lecb_dec_two
750bc3d5698SJohn Baldwin	movups	32(%rdi),%xmm4
751bc3d5698SJohn Baldwin	cmpq	$0x40,%rdx
752bc3d5698SJohn Baldwin	jb	.Lecb_dec_three
753bc3d5698SJohn Baldwin	movups	48(%rdi),%xmm5
754bc3d5698SJohn Baldwin	je	.Lecb_dec_four
755bc3d5698SJohn Baldwin	movups	64(%rdi),%xmm6
756bc3d5698SJohn Baldwin	cmpq	$0x60,%rdx
757bc3d5698SJohn Baldwin	jb	.Lecb_dec_five
758bc3d5698SJohn Baldwin	movups	80(%rdi),%xmm7
759bc3d5698SJohn Baldwin	je	.Lecb_dec_six
760bc3d5698SJohn Baldwin	movups	96(%rdi),%xmm8
761bc3d5698SJohn Baldwin	movups	(%rcx),%xmm0
762bc3d5698SJohn Baldwin	xorps	%xmm9,%xmm9
763bc3d5698SJohn Baldwin	call	_aesni_decrypt8
764bc3d5698SJohn Baldwin	movups	%xmm2,(%rsi)
765bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm2
766bc3d5698SJohn Baldwin	movups	%xmm3,16(%rsi)
767bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm3
768bc3d5698SJohn Baldwin	movups	%xmm4,32(%rsi)
769bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm4
770bc3d5698SJohn Baldwin	movups	%xmm5,48(%rsi)
771bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm5
772bc3d5698SJohn Baldwin	movups	%xmm6,64(%rsi)
773bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm6
774bc3d5698SJohn Baldwin	movups	%xmm7,80(%rsi)
775bc3d5698SJohn Baldwin	pxor	%xmm7,%xmm7
776bc3d5698SJohn Baldwin	movups	%xmm8,96(%rsi)
777bc3d5698SJohn Baldwin	pxor	%xmm8,%xmm8
778bc3d5698SJohn Baldwin	pxor	%xmm9,%xmm9
779bc3d5698SJohn Baldwin	jmp	.Lecb_ret
780bc3d5698SJohn Baldwin.align	16
781bc3d5698SJohn Baldwin.Lecb_dec_one:
782bc3d5698SJohn Baldwin	movups	(%rcx),%xmm0
783bc3d5698SJohn Baldwin	movups	16(%rcx),%xmm1
784bc3d5698SJohn Baldwin	leaq	32(%rcx),%rcx
785bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
786bc3d5698SJohn Baldwin.Loop_dec1_4:
787bc3d5698SJohn Baldwin.byte	102,15,56,222,209
788bc3d5698SJohn Baldwin	decl	%eax
789bc3d5698SJohn Baldwin	movups	(%rcx),%xmm1
790bc3d5698SJohn Baldwin	leaq	16(%rcx),%rcx
791bc3d5698SJohn Baldwin	jnz	.Loop_dec1_4
792bc3d5698SJohn Baldwin.byte	102,15,56,223,209
793bc3d5698SJohn Baldwin	movups	%xmm2,(%rsi)
794bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm2
795bc3d5698SJohn Baldwin	jmp	.Lecb_ret
796bc3d5698SJohn Baldwin.align	16
797bc3d5698SJohn Baldwin.Lecb_dec_two:
798bc3d5698SJohn Baldwin	call	_aesni_decrypt2
799bc3d5698SJohn Baldwin	movups	%xmm2,(%rsi)
800bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm2
801bc3d5698SJohn Baldwin	movups	%xmm3,16(%rsi)
802bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm3
803bc3d5698SJohn Baldwin	jmp	.Lecb_ret
804bc3d5698SJohn Baldwin.align	16
805bc3d5698SJohn Baldwin.Lecb_dec_three:
806bc3d5698SJohn Baldwin	call	_aesni_decrypt3
807bc3d5698SJohn Baldwin	movups	%xmm2,(%rsi)
808bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm2
809bc3d5698SJohn Baldwin	movups	%xmm3,16(%rsi)
810bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm3
811bc3d5698SJohn Baldwin	movups	%xmm4,32(%rsi)
812bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm4
813bc3d5698SJohn Baldwin	jmp	.Lecb_ret
814bc3d5698SJohn Baldwin.align	16
815bc3d5698SJohn Baldwin.Lecb_dec_four:
816bc3d5698SJohn Baldwin	call	_aesni_decrypt4
817bc3d5698SJohn Baldwin	movups	%xmm2,(%rsi)
818bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm2
819bc3d5698SJohn Baldwin	movups	%xmm3,16(%rsi)
820bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm3
821bc3d5698SJohn Baldwin	movups	%xmm4,32(%rsi)
822bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm4
823bc3d5698SJohn Baldwin	movups	%xmm5,48(%rsi)
824bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm5
825bc3d5698SJohn Baldwin	jmp	.Lecb_ret
826bc3d5698SJohn Baldwin.align	16
827bc3d5698SJohn Baldwin.Lecb_dec_five:
828bc3d5698SJohn Baldwin	xorps	%xmm7,%xmm7
829bc3d5698SJohn Baldwin	call	_aesni_decrypt6
830bc3d5698SJohn Baldwin	movups	%xmm2,(%rsi)
831bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm2
832bc3d5698SJohn Baldwin	movups	%xmm3,16(%rsi)
833bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm3
834bc3d5698SJohn Baldwin	movups	%xmm4,32(%rsi)
835bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm4
836bc3d5698SJohn Baldwin	movups	%xmm5,48(%rsi)
837bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm5
838bc3d5698SJohn Baldwin	movups	%xmm6,64(%rsi)
839bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm6
840bc3d5698SJohn Baldwin	pxor	%xmm7,%xmm7
841bc3d5698SJohn Baldwin	jmp	.Lecb_ret
842bc3d5698SJohn Baldwin.align	16
843bc3d5698SJohn Baldwin.Lecb_dec_six:
844bc3d5698SJohn Baldwin	call	_aesni_decrypt6
845bc3d5698SJohn Baldwin	movups	%xmm2,(%rsi)
846bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm2
847bc3d5698SJohn Baldwin	movups	%xmm3,16(%rsi)
848bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm3
849bc3d5698SJohn Baldwin	movups	%xmm4,32(%rsi)
850bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm4
851bc3d5698SJohn Baldwin	movups	%xmm5,48(%rsi)
852bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm5
853bc3d5698SJohn Baldwin	movups	%xmm6,64(%rsi)
854bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm6
855bc3d5698SJohn Baldwin	movups	%xmm7,80(%rsi)
856bc3d5698SJohn Baldwin	pxor	%xmm7,%xmm7
857bc3d5698SJohn Baldwin
858bc3d5698SJohn Baldwin.Lecb_ret:
859bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm0
860bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm1
861bc3d5698SJohn Baldwin	.byte	0xf3,0xc3
862bc3d5698SJohn Baldwin.cfi_endproc
863bc3d5698SJohn Baldwin.size	aesni_ecb_encrypt,.-aesni_ecb_encrypt
864bc3d5698SJohn Baldwin.globl	aesni_ccm64_encrypt_blocks
865bc3d5698SJohn Baldwin.type	aesni_ccm64_encrypt_blocks,@function
866bc3d5698SJohn Baldwin.align	16
867bc3d5698SJohn Baldwinaesni_ccm64_encrypt_blocks:
868bc3d5698SJohn Baldwin.cfi_startproc
869*c0855eaaSJohn Baldwin.byte	243,15,30,250
870bc3d5698SJohn Baldwin	movl	240(%rcx),%eax
871bc3d5698SJohn Baldwin	movdqu	(%r8),%xmm6
872bc3d5698SJohn Baldwin	movdqa	.Lincrement64(%rip),%xmm9
873bc3d5698SJohn Baldwin	movdqa	.Lbswap_mask(%rip),%xmm7
874bc3d5698SJohn Baldwin
875bc3d5698SJohn Baldwin	shll	$4,%eax
876bc3d5698SJohn Baldwin	movl	$16,%r10d
877bc3d5698SJohn Baldwin	leaq	0(%rcx),%r11
878bc3d5698SJohn Baldwin	movdqu	(%r9),%xmm3
879bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm2
880bc3d5698SJohn Baldwin	leaq	32(%rcx,%rax,1),%rcx
881bc3d5698SJohn Baldwin.byte	102,15,56,0,247
882bc3d5698SJohn Baldwin	subq	%rax,%r10
883bc3d5698SJohn Baldwin	jmp	.Lccm64_enc_outer
884bc3d5698SJohn Baldwin.align	16
885bc3d5698SJohn Baldwin.Lccm64_enc_outer:
886bc3d5698SJohn Baldwin	movups	(%r11),%xmm0
887bc3d5698SJohn Baldwin	movq	%r10,%rax
888bc3d5698SJohn Baldwin	movups	(%rdi),%xmm8
889bc3d5698SJohn Baldwin
890bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
891bc3d5698SJohn Baldwin	movups	16(%r11),%xmm1
892bc3d5698SJohn Baldwin	xorps	%xmm8,%xmm0
893bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm3
894bc3d5698SJohn Baldwin	movups	32(%r11),%xmm0
895bc3d5698SJohn Baldwin
896bc3d5698SJohn Baldwin.Lccm64_enc2_loop:
897bc3d5698SJohn Baldwin.byte	102,15,56,220,209
898bc3d5698SJohn Baldwin.byte	102,15,56,220,217
899bc3d5698SJohn Baldwin	movups	(%rcx,%rax,1),%xmm1
900bc3d5698SJohn Baldwin	addq	$32,%rax
901bc3d5698SJohn Baldwin.byte	102,15,56,220,208
902bc3d5698SJohn Baldwin.byte	102,15,56,220,216
903bc3d5698SJohn Baldwin	movups	-16(%rcx,%rax,1),%xmm0
904bc3d5698SJohn Baldwin	jnz	.Lccm64_enc2_loop
905bc3d5698SJohn Baldwin.byte	102,15,56,220,209
906bc3d5698SJohn Baldwin.byte	102,15,56,220,217
907bc3d5698SJohn Baldwin	paddq	%xmm9,%xmm6
908bc3d5698SJohn Baldwin	decq	%rdx
909bc3d5698SJohn Baldwin.byte	102,15,56,221,208
910bc3d5698SJohn Baldwin.byte	102,15,56,221,216
911bc3d5698SJohn Baldwin
912bc3d5698SJohn Baldwin	leaq	16(%rdi),%rdi
913bc3d5698SJohn Baldwin	xorps	%xmm2,%xmm8
914bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm2
915bc3d5698SJohn Baldwin	movups	%xmm8,(%rsi)
916bc3d5698SJohn Baldwin.byte	102,15,56,0,215
917bc3d5698SJohn Baldwin	leaq	16(%rsi),%rsi
918bc3d5698SJohn Baldwin	jnz	.Lccm64_enc_outer
919bc3d5698SJohn Baldwin
920bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
921bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm1
922bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm2
923bc3d5698SJohn Baldwin	movups	%xmm3,(%r9)
924bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm3
925bc3d5698SJohn Baldwin	pxor	%xmm8,%xmm8
926bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm6
927bc3d5698SJohn Baldwin	.byte	0xf3,0xc3
928bc3d5698SJohn Baldwin.cfi_endproc
929bc3d5698SJohn Baldwin.size	aesni_ccm64_encrypt_blocks,.-aesni_ccm64_encrypt_blocks
930bc3d5698SJohn Baldwin.globl	aesni_ccm64_decrypt_blocks
931bc3d5698SJohn Baldwin.type	aesni_ccm64_decrypt_blocks,@function
932bc3d5698SJohn Baldwin.align	16
933bc3d5698SJohn Baldwinaesni_ccm64_decrypt_blocks:
934bc3d5698SJohn Baldwin.cfi_startproc
935*c0855eaaSJohn Baldwin.byte	243,15,30,250
936bc3d5698SJohn Baldwin	movl	240(%rcx),%eax
937bc3d5698SJohn Baldwin	movups	(%r8),%xmm6
938bc3d5698SJohn Baldwin	movdqu	(%r9),%xmm3
939bc3d5698SJohn Baldwin	movdqa	.Lincrement64(%rip),%xmm9
940bc3d5698SJohn Baldwin	movdqa	.Lbswap_mask(%rip),%xmm7
941bc3d5698SJohn Baldwin
942bc3d5698SJohn Baldwin	movaps	%xmm6,%xmm2
943bc3d5698SJohn Baldwin	movl	%eax,%r10d
944bc3d5698SJohn Baldwin	movq	%rcx,%r11
945bc3d5698SJohn Baldwin.byte	102,15,56,0,247
946bc3d5698SJohn Baldwin	movups	(%rcx),%xmm0
947bc3d5698SJohn Baldwin	movups	16(%rcx),%xmm1
948bc3d5698SJohn Baldwin	leaq	32(%rcx),%rcx
949bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
950bc3d5698SJohn Baldwin.Loop_enc1_5:
951bc3d5698SJohn Baldwin.byte	102,15,56,220,209
952bc3d5698SJohn Baldwin	decl	%eax
953bc3d5698SJohn Baldwin	movups	(%rcx),%xmm1
954bc3d5698SJohn Baldwin	leaq	16(%rcx),%rcx
955bc3d5698SJohn Baldwin	jnz	.Loop_enc1_5
956bc3d5698SJohn Baldwin.byte	102,15,56,221,209
957bc3d5698SJohn Baldwin	shll	$4,%r10d
958bc3d5698SJohn Baldwin	movl	$16,%eax
959bc3d5698SJohn Baldwin	movups	(%rdi),%xmm8
960bc3d5698SJohn Baldwin	paddq	%xmm9,%xmm6
961bc3d5698SJohn Baldwin	leaq	16(%rdi),%rdi
962bc3d5698SJohn Baldwin	subq	%r10,%rax
963bc3d5698SJohn Baldwin	leaq	32(%r11,%r10,1),%rcx
964bc3d5698SJohn Baldwin	movq	%rax,%r10
965bc3d5698SJohn Baldwin	jmp	.Lccm64_dec_outer
966bc3d5698SJohn Baldwin.align	16
967bc3d5698SJohn Baldwin.Lccm64_dec_outer:
968bc3d5698SJohn Baldwin	xorps	%xmm2,%xmm8
969bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm2
970bc3d5698SJohn Baldwin	movups	%xmm8,(%rsi)
971bc3d5698SJohn Baldwin	leaq	16(%rsi),%rsi
972bc3d5698SJohn Baldwin.byte	102,15,56,0,215
973bc3d5698SJohn Baldwin
974bc3d5698SJohn Baldwin	subq	$1,%rdx
975bc3d5698SJohn Baldwin	jz	.Lccm64_dec_break
976bc3d5698SJohn Baldwin
977bc3d5698SJohn Baldwin	movups	(%r11),%xmm0
978bc3d5698SJohn Baldwin	movq	%r10,%rax
979bc3d5698SJohn Baldwin	movups	16(%r11),%xmm1
980bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm8
981bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
982bc3d5698SJohn Baldwin	xorps	%xmm8,%xmm3
983bc3d5698SJohn Baldwin	movups	32(%r11),%xmm0
984bc3d5698SJohn Baldwin	jmp	.Lccm64_dec2_loop
985bc3d5698SJohn Baldwin.align	16
986bc3d5698SJohn Baldwin.Lccm64_dec2_loop:
987bc3d5698SJohn Baldwin.byte	102,15,56,220,209
988bc3d5698SJohn Baldwin.byte	102,15,56,220,217
989bc3d5698SJohn Baldwin	movups	(%rcx,%rax,1),%xmm1
990bc3d5698SJohn Baldwin	addq	$32,%rax
991bc3d5698SJohn Baldwin.byte	102,15,56,220,208
992bc3d5698SJohn Baldwin.byte	102,15,56,220,216
993bc3d5698SJohn Baldwin	movups	-16(%rcx,%rax,1),%xmm0
994bc3d5698SJohn Baldwin	jnz	.Lccm64_dec2_loop
995bc3d5698SJohn Baldwin	movups	(%rdi),%xmm8
996bc3d5698SJohn Baldwin	paddq	%xmm9,%xmm6
997bc3d5698SJohn Baldwin.byte	102,15,56,220,209
998bc3d5698SJohn Baldwin.byte	102,15,56,220,217
999bc3d5698SJohn Baldwin.byte	102,15,56,221,208
1000bc3d5698SJohn Baldwin.byte	102,15,56,221,216
1001bc3d5698SJohn Baldwin	leaq	16(%rdi),%rdi
1002bc3d5698SJohn Baldwin	jmp	.Lccm64_dec_outer
1003bc3d5698SJohn Baldwin
1004bc3d5698SJohn Baldwin.align	16
1005bc3d5698SJohn Baldwin.Lccm64_dec_break:
1006bc3d5698SJohn Baldwin
1007bc3d5698SJohn Baldwin	movl	240(%r11),%eax
1008bc3d5698SJohn Baldwin	movups	(%r11),%xmm0
1009bc3d5698SJohn Baldwin	movups	16(%r11),%xmm1
1010bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm8
1011bc3d5698SJohn Baldwin	leaq	32(%r11),%r11
1012bc3d5698SJohn Baldwin	xorps	%xmm8,%xmm3
1013bc3d5698SJohn Baldwin.Loop_enc1_6:
1014bc3d5698SJohn Baldwin.byte	102,15,56,220,217
1015bc3d5698SJohn Baldwin	decl	%eax
1016bc3d5698SJohn Baldwin	movups	(%r11),%xmm1
1017bc3d5698SJohn Baldwin	leaq	16(%r11),%r11
1018bc3d5698SJohn Baldwin	jnz	.Loop_enc1_6
1019bc3d5698SJohn Baldwin.byte	102,15,56,221,217
1020bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
1021bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm1
1022bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm2
1023bc3d5698SJohn Baldwin	movups	%xmm3,(%r9)
1024bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm3
1025bc3d5698SJohn Baldwin	pxor	%xmm8,%xmm8
1026bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm6
1027bc3d5698SJohn Baldwin	.byte	0xf3,0xc3
1028bc3d5698SJohn Baldwin.cfi_endproc
1029bc3d5698SJohn Baldwin.size	aesni_ccm64_decrypt_blocks,.-aesni_ccm64_decrypt_blocks
1030bc3d5698SJohn Baldwin.globl	aesni_ctr32_encrypt_blocks
1031bc3d5698SJohn Baldwin.type	aesni_ctr32_encrypt_blocks,@function
1032bc3d5698SJohn Baldwin.align	16
1033bc3d5698SJohn Baldwinaesni_ctr32_encrypt_blocks:
1034bc3d5698SJohn Baldwin.cfi_startproc
1035*c0855eaaSJohn Baldwin.byte	243,15,30,250
1036bc3d5698SJohn Baldwin	cmpq	$1,%rdx
1037bc3d5698SJohn Baldwin	jne	.Lctr32_bulk
1038bc3d5698SJohn Baldwin
1039bc3d5698SJohn Baldwin
1040bc3d5698SJohn Baldwin
1041bc3d5698SJohn Baldwin	movups	(%r8),%xmm2
1042bc3d5698SJohn Baldwin	movups	(%rdi),%xmm3
1043bc3d5698SJohn Baldwin	movl	240(%rcx),%edx
1044bc3d5698SJohn Baldwin	movups	(%rcx),%xmm0
1045bc3d5698SJohn Baldwin	movups	16(%rcx),%xmm1
1046bc3d5698SJohn Baldwin	leaq	32(%rcx),%rcx
1047bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
1048bc3d5698SJohn Baldwin.Loop_enc1_7:
1049bc3d5698SJohn Baldwin.byte	102,15,56,220,209
1050bc3d5698SJohn Baldwin	decl	%edx
1051bc3d5698SJohn Baldwin	movups	(%rcx),%xmm1
1052bc3d5698SJohn Baldwin	leaq	16(%rcx),%rcx
1053bc3d5698SJohn Baldwin	jnz	.Loop_enc1_7
1054bc3d5698SJohn Baldwin.byte	102,15,56,221,209
1055bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
1056bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm1
1057bc3d5698SJohn Baldwin	xorps	%xmm3,%xmm2
1058bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm3
1059bc3d5698SJohn Baldwin	movups	%xmm2,(%rsi)
1060bc3d5698SJohn Baldwin	xorps	%xmm2,%xmm2
1061bc3d5698SJohn Baldwin	jmp	.Lctr32_epilogue
1062bc3d5698SJohn Baldwin
1063bc3d5698SJohn Baldwin.align	16
1064bc3d5698SJohn Baldwin.Lctr32_bulk:
1065bc3d5698SJohn Baldwin	leaq	(%rsp),%r11
1066bc3d5698SJohn Baldwin.cfi_def_cfa_register	%r11
1067bc3d5698SJohn Baldwin	pushq	%rbp
1068bc3d5698SJohn Baldwin.cfi_offset	%rbp,-16
1069bc3d5698SJohn Baldwin	subq	$128,%rsp
1070bc3d5698SJohn Baldwin	andq	$-16,%rsp
1071bc3d5698SJohn Baldwin
1072bc3d5698SJohn Baldwin
1073bc3d5698SJohn Baldwin
1074bc3d5698SJohn Baldwin
1075bc3d5698SJohn Baldwin	movdqu	(%r8),%xmm2
1076bc3d5698SJohn Baldwin	movdqu	(%rcx),%xmm0
1077bc3d5698SJohn Baldwin	movl	12(%r8),%r8d
1078bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm2
1079bc3d5698SJohn Baldwin	movl	12(%rcx),%ebp
1080bc3d5698SJohn Baldwin	movdqa	%xmm2,0(%rsp)
1081bc3d5698SJohn Baldwin	bswapl	%r8d
1082bc3d5698SJohn Baldwin	movdqa	%xmm2,%xmm3
1083bc3d5698SJohn Baldwin	movdqa	%xmm2,%xmm4
1084bc3d5698SJohn Baldwin	movdqa	%xmm2,%xmm5
1085bc3d5698SJohn Baldwin	movdqa	%xmm2,64(%rsp)
1086bc3d5698SJohn Baldwin	movdqa	%xmm2,80(%rsp)
1087bc3d5698SJohn Baldwin	movdqa	%xmm2,96(%rsp)
1088bc3d5698SJohn Baldwin	movq	%rdx,%r10
1089bc3d5698SJohn Baldwin	movdqa	%xmm2,112(%rsp)
1090bc3d5698SJohn Baldwin
1091bc3d5698SJohn Baldwin	leaq	1(%r8),%rax
1092bc3d5698SJohn Baldwin	leaq	2(%r8),%rdx
1093bc3d5698SJohn Baldwin	bswapl	%eax
1094bc3d5698SJohn Baldwin	bswapl	%edx
1095bc3d5698SJohn Baldwin	xorl	%ebp,%eax
1096bc3d5698SJohn Baldwin	xorl	%ebp,%edx
1097bc3d5698SJohn Baldwin.byte	102,15,58,34,216,3
1098bc3d5698SJohn Baldwin	leaq	3(%r8),%rax
1099bc3d5698SJohn Baldwin	movdqa	%xmm3,16(%rsp)
1100bc3d5698SJohn Baldwin.byte	102,15,58,34,226,3
1101bc3d5698SJohn Baldwin	bswapl	%eax
1102bc3d5698SJohn Baldwin	movq	%r10,%rdx
1103bc3d5698SJohn Baldwin	leaq	4(%r8),%r10
1104bc3d5698SJohn Baldwin	movdqa	%xmm4,32(%rsp)
1105bc3d5698SJohn Baldwin	xorl	%ebp,%eax
1106bc3d5698SJohn Baldwin	bswapl	%r10d
1107bc3d5698SJohn Baldwin.byte	102,15,58,34,232,3
1108bc3d5698SJohn Baldwin	xorl	%ebp,%r10d
1109bc3d5698SJohn Baldwin	movdqa	%xmm5,48(%rsp)
1110bc3d5698SJohn Baldwin	leaq	5(%r8),%r9
1111bc3d5698SJohn Baldwin	movl	%r10d,64+12(%rsp)
1112bc3d5698SJohn Baldwin	bswapl	%r9d
1113bc3d5698SJohn Baldwin	leaq	6(%r8),%r10
1114bc3d5698SJohn Baldwin	movl	240(%rcx),%eax
1115bc3d5698SJohn Baldwin	xorl	%ebp,%r9d
1116bc3d5698SJohn Baldwin	bswapl	%r10d
1117bc3d5698SJohn Baldwin	movl	%r9d,80+12(%rsp)
1118bc3d5698SJohn Baldwin	xorl	%ebp,%r10d
1119bc3d5698SJohn Baldwin	leaq	7(%r8),%r9
1120bc3d5698SJohn Baldwin	movl	%r10d,96+12(%rsp)
1121bc3d5698SJohn Baldwin	bswapl	%r9d
1122bc3d5698SJohn Baldwin	movl	OPENSSL_ia32cap_P+4(%rip),%r10d
1123bc3d5698SJohn Baldwin	xorl	%ebp,%r9d
1124bc3d5698SJohn Baldwin	andl	$71303168,%r10d
1125bc3d5698SJohn Baldwin	movl	%r9d,112+12(%rsp)
1126bc3d5698SJohn Baldwin
1127bc3d5698SJohn Baldwin	movups	16(%rcx),%xmm1
1128bc3d5698SJohn Baldwin
1129bc3d5698SJohn Baldwin	movdqa	64(%rsp),%xmm6
1130bc3d5698SJohn Baldwin	movdqa	80(%rsp),%xmm7
1131bc3d5698SJohn Baldwin
1132bc3d5698SJohn Baldwin	cmpq	$8,%rdx
1133bc3d5698SJohn Baldwin	jb	.Lctr32_tail
1134bc3d5698SJohn Baldwin
1135bc3d5698SJohn Baldwin	subq	$6,%rdx
1136bc3d5698SJohn Baldwin	cmpl	$4194304,%r10d
1137bc3d5698SJohn Baldwin	je	.Lctr32_6x
1138bc3d5698SJohn Baldwin
1139bc3d5698SJohn Baldwin	leaq	128(%rcx),%rcx
1140bc3d5698SJohn Baldwin	subq	$2,%rdx
1141bc3d5698SJohn Baldwin	jmp	.Lctr32_loop8
1142bc3d5698SJohn Baldwin
1143bc3d5698SJohn Baldwin.align	16
1144bc3d5698SJohn Baldwin.Lctr32_6x:
1145bc3d5698SJohn Baldwin	shll	$4,%eax
1146bc3d5698SJohn Baldwin	movl	$48,%r10d
1147bc3d5698SJohn Baldwin	bswapl	%ebp
1148bc3d5698SJohn Baldwin	leaq	32(%rcx,%rax,1),%rcx
1149bc3d5698SJohn Baldwin	subq	%rax,%r10
1150bc3d5698SJohn Baldwin	jmp	.Lctr32_loop6
1151bc3d5698SJohn Baldwin
1152bc3d5698SJohn Baldwin.align	16
1153bc3d5698SJohn Baldwin.Lctr32_loop6:
1154bc3d5698SJohn Baldwin	addl	$6,%r8d
1155bc3d5698SJohn Baldwin	movups	-48(%rcx,%r10,1),%xmm0
1156bc3d5698SJohn Baldwin.byte	102,15,56,220,209
1157bc3d5698SJohn Baldwin	movl	%r8d,%eax
1158bc3d5698SJohn Baldwin	xorl	%ebp,%eax
1159bc3d5698SJohn Baldwin.byte	102,15,56,220,217
1160bc3d5698SJohn Baldwin.byte	0x0f,0x38,0xf1,0x44,0x24,12
1161bc3d5698SJohn Baldwin	leal	1(%r8),%eax
1162bc3d5698SJohn Baldwin.byte	102,15,56,220,225
1163bc3d5698SJohn Baldwin	xorl	%ebp,%eax
1164bc3d5698SJohn Baldwin.byte	0x0f,0x38,0xf1,0x44,0x24,28
1165bc3d5698SJohn Baldwin.byte	102,15,56,220,233
1166bc3d5698SJohn Baldwin	leal	2(%r8),%eax
1167bc3d5698SJohn Baldwin	xorl	%ebp,%eax
1168bc3d5698SJohn Baldwin.byte	102,15,56,220,241
1169bc3d5698SJohn Baldwin.byte	0x0f,0x38,0xf1,0x44,0x24,44
1170bc3d5698SJohn Baldwin	leal	3(%r8),%eax
1171bc3d5698SJohn Baldwin.byte	102,15,56,220,249
1172bc3d5698SJohn Baldwin	movups	-32(%rcx,%r10,1),%xmm1
1173bc3d5698SJohn Baldwin	xorl	%ebp,%eax
1174bc3d5698SJohn Baldwin
1175bc3d5698SJohn Baldwin.byte	102,15,56,220,208
1176bc3d5698SJohn Baldwin.byte	0x0f,0x38,0xf1,0x44,0x24,60
1177bc3d5698SJohn Baldwin	leal	4(%r8),%eax
1178bc3d5698SJohn Baldwin.byte	102,15,56,220,216
1179bc3d5698SJohn Baldwin	xorl	%ebp,%eax
1180bc3d5698SJohn Baldwin.byte	0x0f,0x38,0xf1,0x44,0x24,76
1181bc3d5698SJohn Baldwin.byte	102,15,56,220,224
1182bc3d5698SJohn Baldwin	leal	5(%r8),%eax
1183bc3d5698SJohn Baldwin	xorl	%ebp,%eax
1184bc3d5698SJohn Baldwin.byte	102,15,56,220,232
1185bc3d5698SJohn Baldwin.byte	0x0f,0x38,0xf1,0x44,0x24,92
1186bc3d5698SJohn Baldwin	movq	%r10,%rax
1187bc3d5698SJohn Baldwin.byte	102,15,56,220,240
1188bc3d5698SJohn Baldwin.byte	102,15,56,220,248
1189bc3d5698SJohn Baldwin	movups	-16(%rcx,%r10,1),%xmm0
1190bc3d5698SJohn Baldwin
1191bc3d5698SJohn Baldwin	call	.Lenc_loop6
1192bc3d5698SJohn Baldwin
1193bc3d5698SJohn Baldwin	movdqu	(%rdi),%xmm8
1194bc3d5698SJohn Baldwin	movdqu	16(%rdi),%xmm9
1195bc3d5698SJohn Baldwin	movdqu	32(%rdi),%xmm10
1196bc3d5698SJohn Baldwin	movdqu	48(%rdi),%xmm11
1197bc3d5698SJohn Baldwin	movdqu	64(%rdi),%xmm12
1198bc3d5698SJohn Baldwin	movdqu	80(%rdi),%xmm13
1199bc3d5698SJohn Baldwin	leaq	96(%rdi),%rdi
1200bc3d5698SJohn Baldwin	movups	-64(%rcx,%r10,1),%xmm1
1201bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm8
1202bc3d5698SJohn Baldwin	movaps	0(%rsp),%xmm2
1203bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm9
1204bc3d5698SJohn Baldwin	movaps	16(%rsp),%xmm3
1205bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm10
1206bc3d5698SJohn Baldwin	movaps	32(%rsp),%xmm4
1207bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm11
1208bc3d5698SJohn Baldwin	movaps	48(%rsp),%xmm5
1209bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm12
1210bc3d5698SJohn Baldwin	movaps	64(%rsp),%xmm6
1211bc3d5698SJohn Baldwin	pxor	%xmm7,%xmm13
1212bc3d5698SJohn Baldwin	movaps	80(%rsp),%xmm7
1213bc3d5698SJohn Baldwin	movdqu	%xmm8,(%rsi)
1214bc3d5698SJohn Baldwin	movdqu	%xmm9,16(%rsi)
1215bc3d5698SJohn Baldwin	movdqu	%xmm10,32(%rsi)
1216bc3d5698SJohn Baldwin	movdqu	%xmm11,48(%rsi)
1217bc3d5698SJohn Baldwin	movdqu	%xmm12,64(%rsi)
1218bc3d5698SJohn Baldwin	movdqu	%xmm13,80(%rsi)
1219bc3d5698SJohn Baldwin	leaq	96(%rsi),%rsi
1220bc3d5698SJohn Baldwin
1221bc3d5698SJohn Baldwin	subq	$6,%rdx
1222bc3d5698SJohn Baldwin	jnc	.Lctr32_loop6
1223bc3d5698SJohn Baldwin
1224bc3d5698SJohn Baldwin	addq	$6,%rdx
1225bc3d5698SJohn Baldwin	jz	.Lctr32_done
1226bc3d5698SJohn Baldwin
1227bc3d5698SJohn Baldwin	leal	-48(%r10),%eax
1228bc3d5698SJohn Baldwin	leaq	-80(%rcx,%r10,1),%rcx
1229bc3d5698SJohn Baldwin	negl	%eax
1230bc3d5698SJohn Baldwin	shrl	$4,%eax
1231bc3d5698SJohn Baldwin	jmp	.Lctr32_tail
1232bc3d5698SJohn Baldwin
1233bc3d5698SJohn Baldwin.align	32
1234bc3d5698SJohn Baldwin.Lctr32_loop8:
1235bc3d5698SJohn Baldwin	addl	$8,%r8d
1236bc3d5698SJohn Baldwin	movdqa	96(%rsp),%xmm8
1237bc3d5698SJohn Baldwin.byte	102,15,56,220,209
1238bc3d5698SJohn Baldwin	movl	%r8d,%r9d
1239bc3d5698SJohn Baldwin	movdqa	112(%rsp),%xmm9
1240bc3d5698SJohn Baldwin.byte	102,15,56,220,217
1241bc3d5698SJohn Baldwin	bswapl	%r9d
1242bc3d5698SJohn Baldwin	movups	32-128(%rcx),%xmm0
1243bc3d5698SJohn Baldwin.byte	102,15,56,220,225
1244bc3d5698SJohn Baldwin	xorl	%ebp,%r9d
1245bc3d5698SJohn Baldwin	nop
1246bc3d5698SJohn Baldwin.byte	102,15,56,220,233
1247bc3d5698SJohn Baldwin	movl	%r9d,0+12(%rsp)
1248bc3d5698SJohn Baldwin	leaq	1(%r8),%r9
1249bc3d5698SJohn Baldwin.byte	102,15,56,220,241
1250bc3d5698SJohn Baldwin.byte	102,15,56,220,249
1251bc3d5698SJohn Baldwin.byte	102,68,15,56,220,193
1252bc3d5698SJohn Baldwin.byte	102,68,15,56,220,201
1253bc3d5698SJohn Baldwin	movups	48-128(%rcx),%xmm1
1254bc3d5698SJohn Baldwin	bswapl	%r9d
1255bc3d5698SJohn Baldwin.byte	102,15,56,220,208
1256bc3d5698SJohn Baldwin.byte	102,15,56,220,216
1257bc3d5698SJohn Baldwin	xorl	%ebp,%r9d
1258bc3d5698SJohn Baldwin.byte	0x66,0x90
1259bc3d5698SJohn Baldwin.byte	102,15,56,220,224
1260bc3d5698SJohn Baldwin.byte	102,15,56,220,232
1261bc3d5698SJohn Baldwin	movl	%r9d,16+12(%rsp)
1262bc3d5698SJohn Baldwin	leaq	2(%r8),%r9
1263bc3d5698SJohn Baldwin.byte	102,15,56,220,240
1264bc3d5698SJohn Baldwin.byte	102,15,56,220,248
1265bc3d5698SJohn Baldwin.byte	102,68,15,56,220,192
1266bc3d5698SJohn Baldwin.byte	102,68,15,56,220,200
1267bc3d5698SJohn Baldwin	movups	64-128(%rcx),%xmm0
1268bc3d5698SJohn Baldwin	bswapl	%r9d
1269bc3d5698SJohn Baldwin.byte	102,15,56,220,209
1270bc3d5698SJohn Baldwin.byte	102,15,56,220,217
1271bc3d5698SJohn Baldwin	xorl	%ebp,%r9d
1272bc3d5698SJohn Baldwin.byte	0x66,0x90
1273bc3d5698SJohn Baldwin.byte	102,15,56,220,225
1274bc3d5698SJohn Baldwin.byte	102,15,56,220,233
1275bc3d5698SJohn Baldwin	movl	%r9d,32+12(%rsp)
1276bc3d5698SJohn Baldwin	leaq	3(%r8),%r9
1277bc3d5698SJohn Baldwin.byte	102,15,56,220,241
1278bc3d5698SJohn Baldwin.byte	102,15,56,220,249
1279bc3d5698SJohn Baldwin.byte	102,68,15,56,220,193
1280bc3d5698SJohn Baldwin.byte	102,68,15,56,220,201
1281bc3d5698SJohn Baldwin	movups	80-128(%rcx),%xmm1
1282bc3d5698SJohn Baldwin	bswapl	%r9d
1283bc3d5698SJohn Baldwin.byte	102,15,56,220,208
1284bc3d5698SJohn Baldwin.byte	102,15,56,220,216
1285bc3d5698SJohn Baldwin	xorl	%ebp,%r9d
1286bc3d5698SJohn Baldwin.byte	0x66,0x90
1287bc3d5698SJohn Baldwin.byte	102,15,56,220,224
1288bc3d5698SJohn Baldwin.byte	102,15,56,220,232
1289bc3d5698SJohn Baldwin	movl	%r9d,48+12(%rsp)
1290bc3d5698SJohn Baldwin	leaq	4(%r8),%r9
1291bc3d5698SJohn Baldwin.byte	102,15,56,220,240
1292bc3d5698SJohn Baldwin.byte	102,15,56,220,248
1293bc3d5698SJohn Baldwin.byte	102,68,15,56,220,192
1294bc3d5698SJohn Baldwin.byte	102,68,15,56,220,200
1295bc3d5698SJohn Baldwin	movups	96-128(%rcx),%xmm0
1296bc3d5698SJohn Baldwin	bswapl	%r9d
1297bc3d5698SJohn Baldwin.byte	102,15,56,220,209
1298bc3d5698SJohn Baldwin.byte	102,15,56,220,217
1299bc3d5698SJohn Baldwin	xorl	%ebp,%r9d
1300bc3d5698SJohn Baldwin.byte	0x66,0x90
1301bc3d5698SJohn Baldwin.byte	102,15,56,220,225
1302bc3d5698SJohn Baldwin.byte	102,15,56,220,233
1303bc3d5698SJohn Baldwin	movl	%r9d,64+12(%rsp)
1304bc3d5698SJohn Baldwin	leaq	5(%r8),%r9
1305bc3d5698SJohn Baldwin.byte	102,15,56,220,241
1306bc3d5698SJohn Baldwin.byte	102,15,56,220,249
1307bc3d5698SJohn Baldwin.byte	102,68,15,56,220,193
1308bc3d5698SJohn Baldwin.byte	102,68,15,56,220,201
1309bc3d5698SJohn Baldwin	movups	112-128(%rcx),%xmm1
1310bc3d5698SJohn Baldwin	bswapl	%r9d
1311bc3d5698SJohn Baldwin.byte	102,15,56,220,208
1312bc3d5698SJohn Baldwin.byte	102,15,56,220,216
1313bc3d5698SJohn Baldwin	xorl	%ebp,%r9d
1314bc3d5698SJohn Baldwin.byte	0x66,0x90
1315bc3d5698SJohn Baldwin.byte	102,15,56,220,224
1316bc3d5698SJohn Baldwin.byte	102,15,56,220,232
1317bc3d5698SJohn Baldwin	movl	%r9d,80+12(%rsp)
1318bc3d5698SJohn Baldwin	leaq	6(%r8),%r9
1319bc3d5698SJohn Baldwin.byte	102,15,56,220,240
1320bc3d5698SJohn Baldwin.byte	102,15,56,220,248
1321bc3d5698SJohn Baldwin.byte	102,68,15,56,220,192
1322bc3d5698SJohn Baldwin.byte	102,68,15,56,220,200
1323bc3d5698SJohn Baldwin	movups	128-128(%rcx),%xmm0
1324bc3d5698SJohn Baldwin	bswapl	%r9d
1325bc3d5698SJohn Baldwin.byte	102,15,56,220,209
1326bc3d5698SJohn Baldwin.byte	102,15,56,220,217
1327bc3d5698SJohn Baldwin	xorl	%ebp,%r9d
1328bc3d5698SJohn Baldwin.byte	0x66,0x90
1329bc3d5698SJohn Baldwin.byte	102,15,56,220,225
1330bc3d5698SJohn Baldwin.byte	102,15,56,220,233
1331bc3d5698SJohn Baldwin	movl	%r9d,96+12(%rsp)
1332bc3d5698SJohn Baldwin	leaq	7(%r8),%r9
1333bc3d5698SJohn Baldwin.byte	102,15,56,220,241
1334bc3d5698SJohn Baldwin.byte	102,15,56,220,249
1335bc3d5698SJohn Baldwin.byte	102,68,15,56,220,193
1336bc3d5698SJohn Baldwin.byte	102,68,15,56,220,201
1337bc3d5698SJohn Baldwin	movups	144-128(%rcx),%xmm1
1338bc3d5698SJohn Baldwin	bswapl	%r9d
1339bc3d5698SJohn Baldwin.byte	102,15,56,220,208
1340bc3d5698SJohn Baldwin.byte	102,15,56,220,216
1341bc3d5698SJohn Baldwin.byte	102,15,56,220,224
1342bc3d5698SJohn Baldwin	xorl	%ebp,%r9d
1343bc3d5698SJohn Baldwin	movdqu	0(%rdi),%xmm10
1344bc3d5698SJohn Baldwin.byte	102,15,56,220,232
1345bc3d5698SJohn Baldwin	movl	%r9d,112+12(%rsp)
1346bc3d5698SJohn Baldwin	cmpl	$11,%eax
1347bc3d5698SJohn Baldwin.byte	102,15,56,220,240
1348bc3d5698SJohn Baldwin.byte	102,15,56,220,248
1349bc3d5698SJohn Baldwin.byte	102,68,15,56,220,192
1350bc3d5698SJohn Baldwin.byte	102,68,15,56,220,200
1351bc3d5698SJohn Baldwin	movups	160-128(%rcx),%xmm0
1352bc3d5698SJohn Baldwin
1353bc3d5698SJohn Baldwin	jb	.Lctr32_enc_done
1354bc3d5698SJohn Baldwin
1355bc3d5698SJohn Baldwin.byte	102,15,56,220,209
1356bc3d5698SJohn Baldwin.byte	102,15,56,220,217
1357bc3d5698SJohn Baldwin.byte	102,15,56,220,225
1358bc3d5698SJohn Baldwin.byte	102,15,56,220,233
1359bc3d5698SJohn Baldwin.byte	102,15,56,220,241
1360bc3d5698SJohn Baldwin.byte	102,15,56,220,249
1361bc3d5698SJohn Baldwin.byte	102,68,15,56,220,193
1362bc3d5698SJohn Baldwin.byte	102,68,15,56,220,201
1363bc3d5698SJohn Baldwin	movups	176-128(%rcx),%xmm1
1364bc3d5698SJohn Baldwin
1365bc3d5698SJohn Baldwin.byte	102,15,56,220,208
1366bc3d5698SJohn Baldwin.byte	102,15,56,220,216
1367bc3d5698SJohn Baldwin.byte	102,15,56,220,224
1368bc3d5698SJohn Baldwin.byte	102,15,56,220,232
1369bc3d5698SJohn Baldwin.byte	102,15,56,220,240
1370bc3d5698SJohn Baldwin.byte	102,15,56,220,248
1371bc3d5698SJohn Baldwin.byte	102,68,15,56,220,192
1372bc3d5698SJohn Baldwin.byte	102,68,15,56,220,200
1373bc3d5698SJohn Baldwin	movups	192-128(%rcx),%xmm0
1374bc3d5698SJohn Baldwin	je	.Lctr32_enc_done
1375bc3d5698SJohn Baldwin
1376bc3d5698SJohn Baldwin.byte	102,15,56,220,209
1377bc3d5698SJohn Baldwin.byte	102,15,56,220,217
1378bc3d5698SJohn Baldwin.byte	102,15,56,220,225
1379bc3d5698SJohn Baldwin.byte	102,15,56,220,233
1380bc3d5698SJohn Baldwin.byte	102,15,56,220,241
1381bc3d5698SJohn Baldwin.byte	102,15,56,220,249
1382bc3d5698SJohn Baldwin.byte	102,68,15,56,220,193
1383bc3d5698SJohn Baldwin.byte	102,68,15,56,220,201
1384bc3d5698SJohn Baldwin	movups	208-128(%rcx),%xmm1
1385bc3d5698SJohn Baldwin
1386bc3d5698SJohn Baldwin.byte	102,15,56,220,208
1387bc3d5698SJohn Baldwin.byte	102,15,56,220,216
1388bc3d5698SJohn Baldwin.byte	102,15,56,220,224
1389bc3d5698SJohn Baldwin.byte	102,15,56,220,232
1390bc3d5698SJohn Baldwin.byte	102,15,56,220,240
1391bc3d5698SJohn Baldwin.byte	102,15,56,220,248
1392bc3d5698SJohn Baldwin.byte	102,68,15,56,220,192
1393bc3d5698SJohn Baldwin.byte	102,68,15,56,220,200
1394bc3d5698SJohn Baldwin	movups	224-128(%rcx),%xmm0
1395bc3d5698SJohn Baldwin	jmp	.Lctr32_enc_done
1396bc3d5698SJohn Baldwin
1397bc3d5698SJohn Baldwin.align	16
1398bc3d5698SJohn Baldwin.Lctr32_enc_done:
1399bc3d5698SJohn Baldwin	movdqu	16(%rdi),%xmm11
1400bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm10
1401bc3d5698SJohn Baldwin	movdqu	32(%rdi),%xmm12
1402bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm11
1403bc3d5698SJohn Baldwin	movdqu	48(%rdi),%xmm13
1404bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm12
1405bc3d5698SJohn Baldwin	movdqu	64(%rdi),%xmm14
1406bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm13
1407bc3d5698SJohn Baldwin	movdqu	80(%rdi),%xmm15
1408bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm14
1409bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm15
1410bc3d5698SJohn Baldwin.byte	102,15,56,220,209
1411bc3d5698SJohn Baldwin.byte	102,15,56,220,217
1412bc3d5698SJohn Baldwin.byte	102,15,56,220,225
1413bc3d5698SJohn Baldwin.byte	102,15,56,220,233
1414bc3d5698SJohn Baldwin.byte	102,15,56,220,241
1415bc3d5698SJohn Baldwin.byte	102,15,56,220,249
1416bc3d5698SJohn Baldwin.byte	102,68,15,56,220,193
1417bc3d5698SJohn Baldwin.byte	102,68,15,56,220,201
1418bc3d5698SJohn Baldwin	movdqu	96(%rdi),%xmm1
1419bc3d5698SJohn Baldwin	leaq	128(%rdi),%rdi
1420bc3d5698SJohn Baldwin
1421bc3d5698SJohn Baldwin.byte	102,65,15,56,221,210
1422bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm1
1423bc3d5698SJohn Baldwin	movdqu	112-128(%rdi),%xmm10
1424bc3d5698SJohn Baldwin.byte	102,65,15,56,221,219
1425bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm10
1426bc3d5698SJohn Baldwin	movdqa	0(%rsp),%xmm11
1427bc3d5698SJohn Baldwin.byte	102,65,15,56,221,228
1428bc3d5698SJohn Baldwin.byte	102,65,15,56,221,237
1429bc3d5698SJohn Baldwin	movdqa	16(%rsp),%xmm12
1430bc3d5698SJohn Baldwin	movdqa	32(%rsp),%xmm13
1431bc3d5698SJohn Baldwin.byte	102,65,15,56,221,246
1432bc3d5698SJohn Baldwin.byte	102,65,15,56,221,255
1433bc3d5698SJohn Baldwin	movdqa	48(%rsp),%xmm14
1434bc3d5698SJohn Baldwin	movdqa	64(%rsp),%xmm15
1435bc3d5698SJohn Baldwin.byte	102,68,15,56,221,193
1436bc3d5698SJohn Baldwin	movdqa	80(%rsp),%xmm0
1437bc3d5698SJohn Baldwin	movups	16-128(%rcx),%xmm1
1438bc3d5698SJohn Baldwin.byte	102,69,15,56,221,202
1439bc3d5698SJohn Baldwin
1440bc3d5698SJohn Baldwin	movups	%xmm2,(%rsi)
1441bc3d5698SJohn Baldwin	movdqa	%xmm11,%xmm2
1442bc3d5698SJohn Baldwin	movups	%xmm3,16(%rsi)
1443bc3d5698SJohn Baldwin	movdqa	%xmm12,%xmm3
1444bc3d5698SJohn Baldwin	movups	%xmm4,32(%rsi)
1445bc3d5698SJohn Baldwin	movdqa	%xmm13,%xmm4
1446bc3d5698SJohn Baldwin	movups	%xmm5,48(%rsi)
1447bc3d5698SJohn Baldwin	movdqa	%xmm14,%xmm5
1448bc3d5698SJohn Baldwin	movups	%xmm6,64(%rsi)
1449bc3d5698SJohn Baldwin	movdqa	%xmm15,%xmm6
1450bc3d5698SJohn Baldwin	movups	%xmm7,80(%rsi)
1451bc3d5698SJohn Baldwin	movdqa	%xmm0,%xmm7
1452bc3d5698SJohn Baldwin	movups	%xmm8,96(%rsi)
1453bc3d5698SJohn Baldwin	movups	%xmm9,112(%rsi)
1454bc3d5698SJohn Baldwin	leaq	128(%rsi),%rsi
1455bc3d5698SJohn Baldwin
1456bc3d5698SJohn Baldwin	subq	$8,%rdx
1457bc3d5698SJohn Baldwin	jnc	.Lctr32_loop8
1458bc3d5698SJohn Baldwin
1459bc3d5698SJohn Baldwin	addq	$8,%rdx
1460bc3d5698SJohn Baldwin	jz	.Lctr32_done
1461bc3d5698SJohn Baldwin	leaq	-128(%rcx),%rcx
1462bc3d5698SJohn Baldwin
1463bc3d5698SJohn Baldwin.Lctr32_tail:
1464bc3d5698SJohn Baldwin
1465bc3d5698SJohn Baldwin
1466bc3d5698SJohn Baldwin	leaq	16(%rcx),%rcx
1467bc3d5698SJohn Baldwin	cmpq	$4,%rdx
1468bc3d5698SJohn Baldwin	jb	.Lctr32_loop3
1469bc3d5698SJohn Baldwin	je	.Lctr32_loop4
1470bc3d5698SJohn Baldwin
1471bc3d5698SJohn Baldwin
1472bc3d5698SJohn Baldwin	shll	$4,%eax
1473bc3d5698SJohn Baldwin	movdqa	96(%rsp),%xmm8
1474bc3d5698SJohn Baldwin	pxor	%xmm9,%xmm9
1475bc3d5698SJohn Baldwin
1476bc3d5698SJohn Baldwin	movups	16(%rcx),%xmm0
1477bc3d5698SJohn Baldwin.byte	102,15,56,220,209
1478bc3d5698SJohn Baldwin.byte	102,15,56,220,217
1479bc3d5698SJohn Baldwin	leaq	32-16(%rcx,%rax,1),%rcx
1480bc3d5698SJohn Baldwin	negq	%rax
1481bc3d5698SJohn Baldwin.byte	102,15,56,220,225
1482bc3d5698SJohn Baldwin	addq	$16,%rax
1483bc3d5698SJohn Baldwin	movups	(%rdi),%xmm10
1484bc3d5698SJohn Baldwin.byte	102,15,56,220,233
1485bc3d5698SJohn Baldwin.byte	102,15,56,220,241
1486bc3d5698SJohn Baldwin	movups	16(%rdi),%xmm11
1487bc3d5698SJohn Baldwin	movups	32(%rdi),%xmm12
1488bc3d5698SJohn Baldwin.byte	102,15,56,220,249
1489bc3d5698SJohn Baldwin.byte	102,68,15,56,220,193
1490bc3d5698SJohn Baldwin
1491bc3d5698SJohn Baldwin	call	.Lenc_loop8_enter
1492bc3d5698SJohn Baldwin
1493bc3d5698SJohn Baldwin	movdqu	48(%rdi),%xmm13
1494bc3d5698SJohn Baldwin	pxor	%xmm10,%xmm2
1495bc3d5698SJohn Baldwin	movdqu	64(%rdi),%xmm10
1496bc3d5698SJohn Baldwin	pxor	%xmm11,%xmm3
1497bc3d5698SJohn Baldwin	movdqu	%xmm2,(%rsi)
1498bc3d5698SJohn Baldwin	pxor	%xmm12,%xmm4
1499bc3d5698SJohn Baldwin	movdqu	%xmm3,16(%rsi)
1500bc3d5698SJohn Baldwin	pxor	%xmm13,%xmm5
1501bc3d5698SJohn Baldwin	movdqu	%xmm4,32(%rsi)
1502bc3d5698SJohn Baldwin	pxor	%xmm10,%xmm6
1503bc3d5698SJohn Baldwin	movdqu	%xmm5,48(%rsi)
1504bc3d5698SJohn Baldwin	movdqu	%xmm6,64(%rsi)
1505bc3d5698SJohn Baldwin	cmpq	$6,%rdx
1506bc3d5698SJohn Baldwin	jb	.Lctr32_done
1507bc3d5698SJohn Baldwin
1508bc3d5698SJohn Baldwin	movups	80(%rdi),%xmm11
1509bc3d5698SJohn Baldwin	xorps	%xmm11,%xmm7
1510bc3d5698SJohn Baldwin	movups	%xmm7,80(%rsi)
1511bc3d5698SJohn Baldwin	je	.Lctr32_done
1512bc3d5698SJohn Baldwin
1513bc3d5698SJohn Baldwin	movups	96(%rdi),%xmm12
1514bc3d5698SJohn Baldwin	xorps	%xmm12,%xmm8
1515bc3d5698SJohn Baldwin	movups	%xmm8,96(%rsi)
1516bc3d5698SJohn Baldwin	jmp	.Lctr32_done
1517bc3d5698SJohn Baldwin
1518bc3d5698SJohn Baldwin.align	32
1519bc3d5698SJohn Baldwin.Lctr32_loop4:
1520bc3d5698SJohn Baldwin.byte	102,15,56,220,209
1521bc3d5698SJohn Baldwin	leaq	16(%rcx),%rcx
1522bc3d5698SJohn Baldwin	decl	%eax
1523bc3d5698SJohn Baldwin.byte	102,15,56,220,217
1524bc3d5698SJohn Baldwin.byte	102,15,56,220,225
1525bc3d5698SJohn Baldwin.byte	102,15,56,220,233
1526bc3d5698SJohn Baldwin	movups	(%rcx),%xmm1
1527bc3d5698SJohn Baldwin	jnz	.Lctr32_loop4
1528bc3d5698SJohn Baldwin.byte	102,15,56,221,209
1529bc3d5698SJohn Baldwin.byte	102,15,56,221,217
1530bc3d5698SJohn Baldwin	movups	(%rdi),%xmm10
1531bc3d5698SJohn Baldwin	movups	16(%rdi),%xmm11
1532bc3d5698SJohn Baldwin.byte	102,15,56,221,225
1533bc3d5698SJohn Baldwin.byte	102,15,56,221,233
1534bc3d5698SJohn Baldwin	movups	32(%rdi),%xmm12
1535bc3d5698SJohn Baldwin	movups	48(%rdi),%xmm13
1536bc3d5698SJohn Baldwin
1537bc3d5698SJohn Baldwin	xorps	%xmm10,%xmm2
1538bc3d5698SJohn Baldwin	movups	%xmm2,(%rsi)
1539bc3d5698SJohn Baldwin	xorps	%xmm11,%xmm3
1540bc3d5698SJohn Baldwin	movups	%xmm3,16(%rsi)
1541bc3d5698SJohn Baldwin	pxor	%xmm12,%xmm4
1542bc3d5698SJohn Baldwin	movdqu	%xmm4,32(%rsi)
1543bc3d5698SJohn Baldwin	pxor	%xmm13,%xmm5
1544bc3d5698SJohn Baldwin	movdqu	%xmm5,48(%rsi)
1545bc3d5698SJohn Baldwin	jmp	.Lctr32_done
1546bc3d5698SJohn Baldwin
1547bc3d5698SJohn Baldwin.align	32
1548bc3d5698SJohn Baldwin.Lctr32_loop3:
1549bc3d5698SJohn Baldwin.byte	102,15,56,220,209
1550bc3d5698SJohn Baldwin	leaq	16(%rcx),%rcx
1551bc3d5698SJohn Baldwin	decl	%eax
1552bc3d5698SJohn Baldwin.byte	102,15,56,220,217
1553bc3d5698SJohn Baldwin.byte	102,15,56,220,225
1554bc3d5698SJohn Baldwin	movups	(%rcx),%xmm1
1555bc3d5698SJohn Baldwin	jnz	.Lctr32_loop3
1556bc3d5698SJohn Baldwin.byte	102,15,56,221,209
1557bc3d5698SJohn Baldwin.byte	102,15,56,221,217
1558bc3d5698SJohn Baldwin.byte	102,15,56,221,225
1559bc3d5698SJohn Baldwin
1560bc3d5698SJohn Baldwin	movups	(%rdi),%xmm10
1561bc3d5698SJohn Baldwin	xorps	%xmm10,%xmm2
1562bc3d5698SJohn Baldwin	movups	%xmm2,(%rsi)
1563bc3d5698SJohn Baldwin	cmpq	$2,%rdx
1564bc3d5698SJohn Baldwin	jb	.Lctr32_done
1565bc3d5698SJohn Baldwin
1566bc3d5698SJohn Baldwin	movups	16(%rdi),%xmm11
1567bc3d5698SJohn Baldwin	xorps	%xmm11,%xmm3
1568bc3d5698SJohn Baldwin	movups	%xmm3,16(%rsi)
1569bc3d5698SJohn Baldwin	je	.Lctr32_done
1570bc3d5698SJohn Baldwin
1571bc3d5698SJohn Baldwin	movups	32(%rdi),%xmm12
1572bc3d5698SJohn Baldwin	xorps	%xmm12,%xmm4
1573bc3d5698SJohn Baldwin	movups	%xmm4,32(%rsi)
1574bc3d5698SJohn Baldwin
1575bc3d5698SJohn Baldwin.Lctr32_done:
1576bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm0
1577bc3d5698SJohn Baldwin	xorl	%ebp,%ebp
1578bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm1
1579bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm2
1580bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm3
1581bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm4
1582bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm5
1583bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm6
1584bc3d5698SJohn Baldwin	pxor	%xmm7,%xmm7
1585bc3d5698SJohn Baldwin	movaps	%xmm0,0(%rsp)
1586bc3d5698SJohn Baldwin	pxor	%xmm8,%xmm8
1587bc3d5698SJohn Baldwin	movaps	%xmm0,16(%rsp)
1588bc3d5698SJohn Baldwin	pxor	%xmm9,%xmm9
1589bc3d5698SJohn Baldwin	movaps	%xmm0,32(%rsp)
1590bc3d5698SJohn Baldwin	pxor	%xmm10,%xmm10
1591bc3d5698SJohn Baldwin	movaps	%xmm0,48(%rsp)
1592bc3d5698SJohn Baldwin	pxor	%xmm11,%xmm11
1593bc3d5698SJohn Baldwin	movaps	%xmm0,64(%rsp)
1594bc3d5698SJohn Baldwin	pxor	%xmm12,%xmm12
1595bc3d5698SJohn Baldwin	movaps	%xmm0,80(%rsp)
1596bc3d5698SJohn Baldwin	pxor	%xmm13,%xmm13
1597bc3d5698SJohn Baldwin	movaps	%xmm0,96(%rsp)
1598bc3d5698SJohn Baldwin	pxor	%xmm14,%xmm14
1599bc3d5698SJohn Baldwin	movaps	%xmm0,112(%rsp)
1600bc3d5698SJohn Baldwin	pxor	%xmm15,%xmm15
1601bc3d5698SJohn Baldwin	movq	-8(%r11),%rbp
1602bc3d5698SJohn Baldwin.cfi_restore	%rbp
1603bc3d5698SJohn Baldwin	leaq	(%r11),%rsp
1604bc3d5698SJohn Baldwin.cfi_def_cfa_register	%rsp
1605bc3d5698SJohn Baldwin.Lctr32_epilogue:
1606bc3d5698SJohn Baldwin	.byte	0xf3,0xc3
1607bc3d5698SJohn Baldwin.cfi_endproc
1608bc3d5698SJohn Baldwin.size	aesni_ctr32_encrypt_blocks,.-aesni_ctr32_encrypt_blocks
1609bc3d5698SJohn Baldwin.globl	aesni_xts_encrypt
1610bc3d5698SJohn Baldwin.type	aesni_xts_encrypt,@function
1611bc3d5698SJohn Baldwin.align	16
1612bc3d5698SJohn Baldwinaesni_xts_encrypt:
1613bc3d5698SJohn Baldwin.cfi_startproc
1614*c0855eaaSJohn Baldwin.byte	243,15,30,250
1615bc3d5698SJohn Baldwin	leaq	(%rsp),%r11
1616bc3d5698SJohn Baldwin.cfi_def_cfa_register	%r11
1617bc3d5698SJohn Baldwin	pushq	%rbp
1618bc3d5698SJohn Baldwin.cfi_offset	%rbp,-16
1619bc3d5698SJohn Baldwin	subq	$112,%rsp
1620bc3d5698SJohn Baldwin	andq	$-16,%rsp
1621bc3d5698SJohn Baldwin	movups	(%r9),%xmm2
1622bc3d5698SJohn Baldwin	movl	240(%r8),%eax
1623bc3d5698SJohn Baldwin	movl	240(%rcx),%r10d
1624bc3d5698SJohn Baldwin	movups	(%r8),%xmm0
1625bc3d5698SJohn Baldwin	movups	16(%r8),%xmm1
1626bc3d5698SJohn Baldwin	leaq	32(%r8),%r8
1627bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
1628bc3d5698SJohn Baldwin.Loop_enc1_8:
1629bc3d5698SJohn Baldwin.byte	102,15,56,220,209
1630bc3d5698SJohn Baldwin	decl	%eax
1631bc3d5698SJohn Baldwin	movups	(%r8),%xmm1
1632bc3d5698SJohn Baldwin	leaq	16(%r8),%r8
1633bc3d5698SJohn Baldwin	jnz	.Loop_enc1_8
1634bc3d5698SJohn Baldwin.byte	102,15,56,221,209
1635bc3d5698SJohn Baldwin	movups	(%rcx),%xmm0
1636bc3d5698SJohn Baldwin	movq	%rcx,%rbp
1637bc3d5698SJohn Baldwin	movl	%r10d,%eax
1638bc3d5698SJohn Baldwin	shll	$4,%r10d
1639bc3d5698SJohn Baldwin	movq	%rdx,%r9
1640bc3d5698SJohn Baldwin	andq	$-16,%rdx
1641bc3d5698SJohn Baldwin
1642bc3d5698SJohn Baldwin	movups	16(%rcx,%r10,1),%xmm1
1643bc3d5698SJohn Baldwin
1644bc3d5698SJohn Baldwin	movdqa	.Lxts_magic(%rip),%xmm8
1645bc3d5698SJohn Baldwin	movdqa	%xmm2,%xmm15
1646bc3d5698SJohn Baldwin	pshufd	$0x5f,%xmm2,%xmm9
1647bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm1
1648bc3d5698SJohn Baldwin	movdqa	%xmm9,%xmm14
1649bc3d5698SJohn Baldwin	paddd	%xmm9,%xmm9
1650bc3d5698SJohn Baldwin	movdqa	%xmm15,%xmm10
1651bc3d5698SJohn Baldwin	psrad	$31,%xmm14
1652bc3d5698SJohn Baldwin	paddq	%xmm15,%xmm15
1653bc3d5698SJohn Baldwin	pand	%xmm8,%xmm14
1654bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm10
1655bc3d5698SJohn Baldwin	pxor	%xmm14,%xmm15
1656bc3d5698SJohn Baldwin	movdqa	%xmm9,%xmm14
1657bc3d5698SJohn Baldwin	paddd	%xmm9,%xmm9
1658bc3d5698SJohn Baldwin	movdqa	%xmm15,%xmm11
1659bc3d5698SJohn Baldwin	psrad	$31,%xmm14
1660bc3d5698SJohn Baldwin	paddq	%xmm15,%xmm15
1661bc3d5698SJohn Baldwin	pand	%xmm8,%xmm14
1662bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm11
1663bc3d5698SJohn Baldwin	pxor	%xmm14,%xmm15
1664bc3d5698SJohn Baldwin	movdqa	%xmm9,%xmm14
1665bc3d5698SJohn Baldwin	paddd	%xmm9,%xmm9
1666bc3d5698SJohn Baldwin	movdqa	%xmm15,%xmm12
1667bc3d5698SJohn Baldwin	psrad	$31,%xmm14
1668bc3d5698SJohn Baldwin	paddq	%xmm15,%xmm15
1669bc3d5698SJohn Baldwin	pand	%xmm8,%xmm14
1670bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm12
1671bc3d5698SJohn Baldwin	pxor	%xmm14,%xmm15
1672bc3d5698SJohn Baldwin	movdqa	%xmm9,%xmm14
1673bc3d5698SJohn Baldwin	paddd	%xmm9,%xmm9
1674bc3d5698SJohn Baldwin	movdqa	%xmm15,%xmm13
1675bc3d5698SJohn Baldwin	psrad	$31,%xmm14
1676bc3d5698SJohn Baldwin	paddq	%xmm15,%xmm15
1677bc3d5698SJohn Baldwin	pand	%xmm8,%xmm14
1678bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm13
1679bc3d5698SJohn Baldwin	pxor	%xmm14,%xmm15
1680bc3d5698SJohn Baldwin	movdqa	%xmm15,%xmm14
1681bc3d5698SJohn Baldwin	psrad	$31,%xmm9
1682bc3d5698SJohn Baldwin	paddq	%xmm15,%xmm15
1683bc3d5698SJohn Baldwin	pand	%xmm8,%xmm9
1684bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm14
1685bc3d5698SJohn Baldwin	pxor	%xmm9,%xmm15
1686bc3d5698SJohn Baldwin	movaps	%xmm1,96(%rsp)
1687bc3d5698SJohn Baldwin
1688bc3d5698SJohn Baldwin	subq	$96,%rdx
1689bc3d5698SJohn Baldwin	jc	.Lxts_enc_short
1690bc3d5698SJohn Baldwin
1691bc3d5698SJohn Baldwin	movl	$16+96,%eax
1692bc3d5698SJohn Baldwin	leaq	32(%rbp,%r10,1),%rcx
1693bc3d5698SJohn Baldwin	subq	%r10,%rax
1694bc3d5698SJohn Baldwin	movups	16(%rbp),%xmm1
1695bc3d5698SJohn Baldwin	movq	%rax,%r10
1696bc3d5698SJohn Baldwin	leaq	.Lxts_magic(%rip),%r8
1697bc3d5698SJohn Baldwin	jmp	.Lxts_enc_grandloop
1698bc3d5698SJohn Baldwin
1699bc3d5698SJohn Baldwin.align	32
1700bc3d5698SJohn Baldwin.Lxts_enc_grandloop:
1701bc3d5698SJohn Baldwin	movdqu	0(%rdi),%xmm2
1702bc3d5698SJohn Baldwin	movdqa	%xmm0,%xmm8
1703bc3d5698SJohn Baldwin	movdqu	16(%rdi),%xmm3
1704bc3d5698SJohn Baldwin	pxor	%xmm10,%xmm2
1705bc3d5698SJohn Baldwin	movdqu	32(%rdi),%xmm4
1706bc3d5698SJohn Baldwin	pxor	%xmm11,%xmm3
1707bc3d5698SJohn Baldwin.byte	102,15,56,220,209
1708bc3d5698SJohn Baldwin	movdqu	48(%rdi),%xmm5
1709bc3d5698SJohn Baldwin	pxor	%xmm12,%xmm4
1710bc3d5698SJohn Baldwin.byte	102,15,56,220,217
1711bc3d5698SJohn Baldwin	movdqu	64(%rdi),%xmm6
1712bc3d5698SJohn Baldwin	pxor	%xmm13,%xmm5
1713bc3d5698SJohn Baldwin.byte	102,15,56,220,225
1714bc3d5698SJohn Baldwin	movdqu	80(%rdi),%xmm7
1715bc3d5698SJohn Baldwin	pxor	%xmm15,%xmm8
1716bc3d5698SJohn Baldwin	movdqa	96(%rsp),%xmm9
1717bc3d5698SJohn Baldwin	pxor	%xmm14,%xmm6
1718bc3d5698SJohn Baldwin.byte	102,15,56,220,233
1719bc3d5698SJohn Baldwin	movups	32(%rbp),%xmm0
1720bc3d5698SJohn Baldwin	leaq	96(%rdi),%rdi
1721bc3d5698SJohn Baldwin	pxor	%xmm8,%xmm7
1722bc3d5698SJohn Baldwin
1723bc3d5698SJohn Baldwin	pxor	%xmm9,%xmm10
1724bc3d5698SJohn Baldwin.byte	102,15,56,220,241
1725bc3d5698SJohn Baldwin	pxor	%xmm9,%xmm11
1726bc3d5698SJohn Baldwin	movdqa	%xmm10,0(%rsp)
1727bc3d5698SJohn Baldwin.byte	102,15,56,220,249
1728bc3d5698SJohn Baldwin	movups	48(%rbp),%xmm1
1729bc3d5698SJohn Baldwin	pxor	%xmm9,%xmm12
1730bc3d5698SJohn Baldwin
1731bc3d5698SJohn Baldwin.byte	102,15,56,220,208
1732bc3d5698SJohn Baldwin	pxor	%xmm9,%xmm13
1733bc3d5698SJohn Baldwin	movdqa	%xmm11,16(%rsp)
1734bc3d5698SJohn Baldwin.byte	102,15,56,220,216
1735bc3d5698SJohn Baldwin	pxor	%xmm9,%xmm14
1736bc3d5698SJohn Baldwin	movdqa	%xmm12,32(%rsp)
1737bc3d5698SJohn Baldwin.byte	102,15,56,220,224
1738bc3d5698SJohn Baldwin.byte	102,15,56,220,232
1739bc3d5698SJohn Baldwin	pxor	%xmm9,%xmm8
1740bc3d5698SJohn Baldwin	movdqa	%xmm14,64(%rsp)
1741bc3d5698SJohn Baldwin.byte	102,15,56,220,240
1742bc3d5698SJohn Baldwin.byte	102,15,56,220,248
1743bc3d5698SJohn Baldwin	movups	64(%rbp),%xmm0
1744bc3d5698SJohn Baldwin	movdqa	%xmm8,80(%rsp)
1745bc3d5698SJohn Baldwin	pshufd	$0x5f,%xmm15,%xmm9
1746bc3d5698SJohn Baldwin	jmp	.Lxts_enc_loop6
1747bc3d5698SJohn Baldwin.align	32
1748bc3d5698SJohn Baldwin.Lxts_enc_loop6:
1749bc3d5698SJohn Baldwin.byte	102,15,56,220,209
1750bc3d5698SJohn Baldwin.byte	102,15,56,220,217
1751bc3d5698SJohn Baldwin.byte	102,15,56,220,225
1752bc3d5698SJohn Baldwin.byte	102,15,56,220,233
1753bc3d5698SJohn Baldwin.byte	102,15,56,220,241
1754bc3d5698SJohn Baldwin.byte	102,15,56,220,249
1755bc3d5698SJohn Baldwin	movups	-64(%rcx,%rax,1),%xmm1
1756bc3d5698SJohn Baldwin	addq	$32,%rax
1757bc3d5698SJohn Baldwin
1758bc3d5698SJohn Baldwin.byte	102,15,56,220,208
1759bc3d5698SJohn Baldwin.byte	102,15,56,220,216
1760bc3d5698SJohn Baldwin.byte	102,15,56,220,224
1761bc3d5698SJohn Baldwin.byte	102,15,56,220,232
1762bc3d5698SJohn Baldwin.byte	102,15,56,220,240
1763bc3d5698SJohn Baldwin.byte	102,15,56,220,248
1764bc3d5698SJohn Baldwin	movups	-80(%rcx,%rax,1),%xmm0
1765bc3d5698SJohn Baldwin	jnz	.Lxts_enc_loop6
1766bc3d5698SJohn Baldwin
1767bc3d5698SJohn Baldwin	movdqa	(%r8),%xmm8
1768bc3d5698SJohn Baldwin	movdqa	%xmm9,%xmm14
1769bc3d5698SJohn Baldwin	paddd	%xmm9,%xmm9
1770bc3d5698SJohn Baldwin.byte	102,15,56,220,209
1771bc3d5698SJohn Baldwin	paddq	%xmm15,%xmm15
1772bc3d5698SJohn Baldwin	psrad	$31,%xmm14
1773bc3d5698SJohn Baldwin.byte	102,15,56,220,217
1774bc3d5698SJohn Baldwin	pand	%xmm8,%xmm14
1775bc3d5698SJohn Baldwin	movups	(%rbp),%xmm10
1776bc3d5698SJohn Baldwin.byte	102,15,56,220,225
1777bc3d5698SJohn Baldwin.byte	102,15,56,220,233
1778bc3d5698SJohn Baldwin.byte	102,15,56,220,241
1779bc3d5698SJohn Baldwin	pxor	%xmm14,%xmm15
1780bc3d5698SJohn Baldwin	movaps	%xmm10,%xmm11
1781bc3d5698SJohn Baldwin.byte	102,15,56,220,249
1782bc3d5698SJohn Baldwin	movups	-64(%rcx),%xmm1
1783bc3d5698SJohn Baldwin
1784bc3d5698SJohn Baldwin	movdqa	%xmm9,%xmm14
1785bc3d5698SJohn Baldwin.byte	102,15,56,220,208
1786bc3d5698SJohn Baldwin	paddd	%xmm9,%xmm9
1787bc3d5698SJohn Baldwin	pxor	%xmm15,%xmm10
1788bc3d5698SJohn Baldwin.byte	102,15,56,220,216
1789bc3d5698SJohn Baldwin	psrad	$31,%xmm14
1790bc3d5698SJohn Baldwin	paddq	%xmm15,%xmm15
1791bc3d5698SJohn Baldwin.byte	102,15,56,220,224
1792bc3d5698SJohn Baldwin.byte	102,15,56,220,232
1793bc3d5698SJohn Baldwin	pand	%xmm8,%xmm14
1794bc3d5698SJohn Baldwin	movaps	%xmm11,%xmm12
1795bc3d5698SJohn Baldwin.byte	102,15,56,220,240
1796bc3d5698SJohn Baldwin	pxor	%xmm14,%xmm15
1797bc3d5698SJohn Baldwin	movdqa	%xmm9,%xmm14
1798bc3d5698SJohn Baldwin.byte	102,15,56,220,248
1799bc3d5698SJohn Baldwin	movups	-48(%rcx),%xmm0
1800bc3d5698SJohn Baldwin
1801bc3d5698SJohn Baldwin	paddd	%xmm9,%xmm9
1802bc3d5698SJohn Baldwin.byte	102,15,56,220,209
1803bc3d5698SJohn Baldwin	pxor	%xmm15,%xmm11
1804bc3d5698SJohn Baldwin	psrad	$31,%xmm14
1805bc3d5698SJohn Baldwin.byte	102,15,56,220,217
1806bc3d5698SJohn Baldwin	paddq	%xmm15,%xmm15
1807bc3d5698SJohn Baldwin	pand	%xmm8,%xmm14
1808bc3d5698SJohn Baldwin.byte	102,15,56,220,225
1809bc3d5698SJohn Baldwin.byte	102,15,56,220,233
1810bc3d5698SJohn Baldwin	movdqa	%xmm13,48(%rsp)
1811bc3d5698SJohn Baldwin	pxor	%xmm14,%xmm15
1812bc3d5698SJohn Baldwin.byte	102,15,56,220,241
1813bc3d5698SJohn Baldwin	movaps	%xmm12,%xmm13
1814bc3d5698SJohn Baldwin	movdqa	%xmm9,%xmm14
1815bc3d5698SJohn Baldwin.byte	102,15,56,220,249
1816bc3d5698SJohn Baldwin	movups	-32(%rcx),%xmm1
1817bc3d5698SJohn Baldwin
1818bc3d5698SJohn Baldwin	paddd	%xmm9,%xmm9
1819bc3d5698SJohn Baldwin.byte	102,15,56,220,208
1820bc3d5698SJohn Baldwin	pxor	%xmm15,%xmm12
1821bc3d5698SJohn Baldwin	psrad	$31,%xmm14
1822bc3d5698SJohn Baldwin.byte	102,15,56,220,216
1823bc3d5698SJohn Baldwin	paddq	%xmm15,%xmm15
1824bc3d5698SJohn Baldwin	pand	%xmm8,%xmm14
1825bc3d5698SJohn Baldwin.byte	102,15,56,220,224
1826bc3d5698SJohn Baldwin.byte	102,15,56,220,232
1827bc3d5698SJohn Baldwin.byte	102,15,56,220,240
1828bc3d5698SJohn Baldwin	pxor	%xmm14,%xmm15
1829bc3d5698SJohn Baldwin	movaps	%xmm13,%xmm14
1830bc3d5698SJohn Baldwin.byte	102,15,56,220,248
1831bc3d5698SJohn Baldwin
1832bc3d5698SJohn Baldwin	movdqa	%xmm9,%xmm0
1833bc3d5698SJohn Baldwin	paddd	%xmm9,%xmm9
1834bc3d5698SJohn Baldwin.byte	102,15,56,220,209
1835bc3d5698SJohn Baldwin	pxor	%xmm15,%xmm13
1836bc3d5698SJohn Baldwin	psrad	$31,%xmm0
1837bc3d5698SJohn Baldwin.byte	102,15,56,220,217
1838bc3d5698SJohn Baldwin	paddq	%xmm15,%xmm15
1839bc3d5698SJohn Baldwin	pand	%xmm8,%xmm0
1840bc3d5698SJohn Baldwin.byte	102,15,56,220,225
1841bc3d5698SJohn Baldwin.byte	102,15,56,220,233
1842bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm15
1843bc3d5698SJohn Baldwin	movups	(%rbp),%xmm0
1844bc3d5698SJohn Baldwin.byte	102,15,56,220,241
1845bc3d5698SJohn Baldwin.byte	102,15,56,220,249
1846bc3d5698SJohn Baldwin	movups	16(%rbp),%xmm1
1847bc3d5698SJohn Baldwin
1848bc3d5698SJohn Baldwin	pxor	%xmm15,%xmm14
1849bc3d5698SJohn Baldwin.byte	102,15,56,221,84,36,0
1850bc3d5698SJohn Baldwin	psrad	$31,%xmm9
1851bc3d5698SJohn Baldwin	paddq	%xmm15,%xmm15
1852bc3d5698SJohn Baldwin.byte	102,15,56,221,92,36,16
1853bc3d5698SJohn Baldwin.byte	102,15,56,221,100,36,32
1854bc3d5698SJohn Baldwin	pand	%xmm8,%xmm9
1855bc3d5698SJohn Baldwin	movq	%r10,%rax
1856bc3d5698SJohn Baldwin.byte	102,15,56,221,108,36,48
1857bc3d5698SJohn Baldwin.byte	102,15,56,221,116,36,64
1858bc3d5698SJohn Baldwin.byte	102,15,56,221,124,36,80
1859bc3d5698SJohn Baldwin	pxor	%xmm9,%xmm15
1860bc3d5698SJohn Baldwin
1861bc3d5698SJohn Baldwin	leaq	96(%rsi),%rsi
1862bc3d5698SJohn Baldwin	movups	%xmm2,-96(%rsi)
1863bc3d5698SJohn Baldwin	movups	%xmm3,-80(%rsi)
1864bc3d5698SJohn Baldwin	movups	%xmm4,-64(%rsi)
1865bc3d5698SJohn Baldwin	movups	%xmm5,-48(%rsi)
1866bc3d5698SJohn Baldwin	movups	%xmm6,-32(%rsi)
1867bc3d5698SJohn Baldwin	movups	%xmm7,-16(%rsi)
1868bc3d5698SJohn Baldwin	subq	$96,%rdx
1869bc3d5698SJohn Baldwin	jnc	.Lxts_enc_grandloop
1870bc3d5698SJohn Baldwin
1871bc3d5698SJohn Baldwin	movl	$16+96,%eax
1872bc3d5698SJohn Baldwin	subl	%r10d,%eax
1873bc3d5698SJohn Baldwin	movq	%rbp,%rcx
1874bc3d5698SJohn Baldwin	shrl	$4,%eax
1875bc3d5698SJohn Baldwin
1876bc3d5698SJohn Baldwin.Lxts_enc_short:
1877bc3d5698SJohn Baldwin
1878bc3d5698SJohn Baldwin	movl	%eax,%r10d
1879bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm10
1880bc3d5698SJohn Baldwin	addq	$96,%rdx
1881bc3d5698SJohn Baldwin	jz	.Lxts_enc_done
1882bc3d5698SJohn Baldwin
1883bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm11
1884bc3d5698SJohn Baldwin	cmpq	$0x20,%rdx
1885bc3d5698SJohn Baldwin	jb	.Lxts_enc_one
1886bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm12
1887bc3d5698SJohn Baldwin	je	.Lxts_enc_two
1888bc3d5698SJohn Baldwin
1889bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm13
1890bc3d5698SJohn Baldwin	cmpq	$0x40,%rdx
1891bc3d5698SJohn Baldwin	jb	.Lxts_enc_three
1892bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm14
1893bc3d5698SJohn Baldwin	je	.Lxts_enc_four
1894bc3d5698SJohn Baldwin
1895bc3d5698SJohn Baldwin	movdqu	(%rdi),%xmm2
1896bc3d5698SJohn Baldwin	movdqu	16(%rdi),%xmm3
1897bc3d5698SJohn Baldwin	movdqu	32(%rdi),%xmm4
1898bc3d5698SJohn Baldwin	pxor	%xmm10,%xmm2
1899bc3d5698SJohn Baldwin	movdqu	48(%rdi),%xmm5
1900bc3d5698SJohn Baldwin	pxor	%xmm11,%xmm3
1901bc3d5698SJohn Baldwin	movdqu	64(%rdi),%xmm6
1902bc3d5698SJohn Baldwin	leaq	80(%rdi),%rdi
1903bc3d5698SJohn Baldwin	pxor	%xmm12,%xmm4
1904bc3d5698SJohn Baldwin	pxor	%xmm13,%xmm5
1905bc3d5698SJohn Baldwin	pxor	%xmm14,%xmm6
1906bc3d5698SJohn Baldwin	pxor	%xmm7,%xmm7
1907bc3d5698SJohn Baldwin
1908bc3d5698SJohn Baldwin	call	_aesni_encrypt6
1909bc3d5698SJohn Baldwin
1910bc3d5698SJohn Baldwin	xorps	%xmm10,%xmm2
1911bc3d5698SJohn Baldwin	movdqa	%xmm15,%xmm10
1912bc3d5698SJohn Baldwin	xorps	%xmm11,%xmm3
1913bc3d5698SJohn Baldwin	xorps	%xmm12,%xmm4
1914bc3d5698SJohn Baldwin	movdqu	%xmm2,(%rsi)
1915bc3d5698SJohn Baldwin	xorps	%xmm13,%xmm5
1916bc3d5698SJohn Baldwin	movdqu	%xmm3,16(%rsi)
1917bc3d5698SJohn Baldwin	xorps	%xmm14,%xmm6
1918bc3d5698SJohn Baldwin	movdqu	%xmm4,32(%rsi)
1919bc3d5698SJohn Baldwin	movdqu	%xmm5,48(%rsi)
1920bc3d5698SJohn Baldwin	movdqu	%xmm6,64(%rsi)
1921bc3d5698SJohn Baldwin	leaq	80(%rsi),%rsi
1922bc3d5698SJohn Baldwin	jmp	.Lxts_enc_done
1923bc3d5698SJohn Baldwin
1924bc3d5698SJohn Baldwin.align	16
1925bc3d5698SJohn Baldwin.Lxts_enc_one:
1926bc3d5698SJohn Baldwin	movups	(%rdi),%xmm2
1927bc3d5698SJohn Baldwin	leaq	16(%rdi),%rdi
1928bc3d5698SJohn Baldwin	xorps	%xmm10,%xmm2
1929bc3d5698SJohn Baldwin	movups	(%rcx),%xmm0
1930bc3d5698SJohn Baldwin	movups	16(%rcx),%xmm1
1931bc3d5698SJohn Baldwin	leaq	32(%rcx),%rcx
1932bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
1933bc3d5698SJohn Baldwin.Loop_enc1_9:
1934bc3d5698SJohn Baldwin.byte	102,15,56,220,209
1935bc3d5698SJohn Baldwin	decl	%eax
1936bc3d5698SJohn Baldwin	movups	(%rcx),%xmm1
1937bc3d5698SJohn Baldwin	leaq	16(%rcx),%rcx
1938bc3d5698SJohn Baldwin	jnz	.Loop_enc1_9
1939bc3d5698SJohn Baldwin.byte	102,15,56,221,209
1940bc3d5698SJohn Baldwin	xorps	%xmm10,%xmm2
1941bc3d5698SJohn Baldwin	movdqa	%xmm11,%xmm10
1942bc3d5698SJohn Baldwin	movups	%xmm2,(%rsi)
1943bc3d5698SJohn Baldwin	leaq	16(%rsi),%rsi
1944bc3d5698SJohn Baldwin	jmp	.Lxts_enc_done
1945bc3d5698SJohn Baldwin
1946bc3d5698SJohn Baldwin.align	16
1947bc3d5698SJohn Baldwin.Lxts_enc_two:
1948bc3d5698SJohn Baldwin	movups	(%rdi),%xmm2
1949bc3d5698SJohn Baldwin	movups	16(%rdi),%xmm3
1950bc3d5698SJohn Baldwin	leaq	32(%rdi),%rdi
1951bc3d5698SJohn Baldwin	xorps	%xmm10,%xmm2
1952bc3d5698SJohn Baldwin	xorps	%xmm11,%xmm3
1953bc3d5698SJohn Baldwin
1954bc3d5698SJohn Baldwin	call	_aesni_encrypt2
1955bc3d5698SJohn Baldwin
1956bc3d5698SJohn Baldwin	xorps	%xmm10,%xmm2
1957bc3d5698SJohn Baldwin	movdqa	%xmm12,%xmm10
1958bc3d5698SJohn Baldwin	xorps	%xmm11,%xmm3
1959bc3d5698SJohn Baldwin	movups	%xmm2,(%rsi)
1960bc3d5698SJohn Baldwin	movups	%xmm3,16(%rsi)
1961bc3d5698SJohn Baldwin	leaq	32(%rsi),%rsi
1962bc3d5698SJohn Baldwin	jmp	.Lxts_enc_done
1963bc3d5698SJohn Baldwin
1964bc3d5698SJohn Baldwin.align	16
1965bc3d5698SJohn Baldwin.Lxts_enc_three:
1966bc3d5698SJohn Baldwin	movups	(%rdi),%xmm2
1967bc3d5698SJohn Baldwin	movups	16(%rdi),%xmm3
1968bc3d5698SJohn Baldwin	movups	32(%rdi),%xmm4
1969bc3d5698SJohn Baldwin	leaq	48(%rdi),%rdi
1970bc3d5698SJohn Baldwin	xorps	%xmm10,%xmm2
1971bc3d5698SJohn Baldwin	xorps	%xmm11,%xmm3
1972bc3d5698SJohn Baldwin	xorps	%xmm12,%xmm4
1973bc3d5698SJohn Baldwin
1974bc3d5698SJohn Baldwin	call	_aesni_encrypt3
1975bc3d5698SJohn Baldwin
1976bc3d5698SJohn Baldwin	xorps	%xmm10,%xmm2
1977bc3d5698SJohn Baldwin	movdqa	%xmm13,%xmm10
1978bc3d5698SJohn Baldwin	xorps	%xmm11,%xmm3
1979bc3d5698SJohn Baldwin	xorps	%xmm12,%xmm4
1980bc3d5698SJohn Baldwin	movups	%xmm2,(%rsi)
1981bc3d5698SJohn Baldwin	movups	%xmm3,16(%rsi)
1982bc3d5698SJohn Baldwin	movups	%xmm4,32(%rsi)
1983bc3d5698SJohn Baldwin	leaq	48(%rsi),%rsi
1984bc3d5698SJohn Baldwin	jmp	.Lxts_enc_done
1985bc3d5698SJohn Baldwin
1986bc3d5698SJohn Baldwin.align	16
1987bc3d5698SJohn Baldwin.Lxts_enc_four:
1988bc3d5698SJohn Baldwin	movups	(%rdi),%xmm2
1989bc3d5698SJohn Baldwin	movups	16(%rdi),%xmm3
1990bc3d5698SJohn Baldwin	movups	32(%rdi),%xmm4
1991bc3d5698SJohn Baldwin	xorps	%xmm10,%xmm2
1992bc3d5698SJohn Baldwin	movups	48(%rdi),%xmm5
1993bc3d5698SJohn Baldwin	leaq	64(%rdi),%rdi
1994bc3d5698SJohn Baldwin	xorps	%xmm11,%xmm3
1995bc3d5698SJohn Baldwin	xorps	%xmm12,%xmm4
1996bc3d5698SJohn Baldwin	xorps	%xmm13,%xmm5
1997bc3d5698SJohn Baldwin
1998bc3d5698SJohn Baldwin	call	_aesni_encrypt4
1999bc3d5698SJohn Baldwin
2000bc3d5698SJohn Baldwin	pxor	%xmm10,%xmm2
2001bc3d5698SJohn Baldwin	movdqa	%xmm14,%xmm10
2002bc3d5698SJohn Baldwin	pxor	%xmm11,%xmm3
2003bc3d5698SJohn Baldwin	pxor	%xmm12,%xmm4
2004bc3d5698SJohn Baldwin	movdqu	%xmm2,(%rsi)
2005bc3d5698SJohn Baldwin	pxor	%xmm13,%xmm5
2006bc3d5698SJohn Baldwin	movdqu	%xmm3,16(%rsi)
2007bc3d5698SJohn Baldwin	movdqu	%xmm4,32(%rsi)
2008bc3d5698SJohn Baldwin	movdqu	%xmm5,48(%rsi)
2009bc3d5698SJohn Baldwin	leaq	64(%rsi),%rsi
2010bc3d5698SJohn Baldwin	jmp	.Lxts_enc_done
2011bc3d5698SJohn Baldwin
2012bc3d5698SJohn Baldwin.align	16
2013bc3d5698SJohn Baldwin.Lxts_enc_done:
2014bc3d5698SJohn Baldwin	andq	$15,%r9
2015bc3d5698SJohn Baldwin	jz	.Lxts_enc_ret
2016bc3d5698SJohn Baldwin	movq	%r9,%rdx
2017bc3d5698SJohn Baldwin
2018bc3d5698SJohn Baldwin.Lxts_enc_steal:
2019bc3d5698SJohn Baldwin	movzbl	(%rdi),%eax
2020bc3d5698SJohn Baldwin	movzbl	-16(%rsi),%ecx
2021bc3d5698SJohn Baldwin	leaq	1(%rdi),%rdi
2022bc3d5698SJohn Baldwin	movb	%al,-16(%rsi)
2023bc3d5698SJohn Baldwin	movb	%cl,0(%rsi)
2024bc3d5698SJohn Baldwin	leaq	1(%rsi),%rsi
2025bc3d5698SJohn Baldwin	subq	$1,%rdx
2026bc3d5698SJohn Baldwin	jnz	.Lxts_enc_steal
2027bc3d5698SJohn Baldwin
2028bc3d5698SJohn Baldwin	subq	%r9,%rsi
2029bc3d5698SJohn Baldwin	movq	%rbp,%rcx
2030bc3d5698SJohn Baldwin	movl	%r10d,%eax
2031bc3d5698SJohn Baldwin
2032bc3d5698SJohn Baldwin	movups	-16(%rsi),%xmm2
2033bc3d5698SJohn Baldwin	xorps	%xmm10,%xmm2
2034bc3d5698SJohn Baldwin	movups	(%rcx),%xmm0
2035bc3d5698SJohn Baldwin	movups	16(%rcx),%xmm1
2036bc3d5698SJohn Baldwin	leaq	32(%rcx),%rcx
2037bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
2038bc3d5698SJohn Baldwin.Loop_enc1_10:
2039bc3d5698SJohn Baldwin.byte	102,15,56,220,209
2040bc3d5698SJohn Baldwin	decl	%eax
2041bc3d5698SJohn Baldwin	movups	(%rcx),%xmm1
2042bc3d5698SJohn Baldwin	leaq	16(%rcx),%rcx
2043bc3d5698SJohn Baldwin	jnz	.Loop_enc1_10
2044bc3d5698SJohn Baldwin.byte	102,15,56,221,209
2045bc3d5698SJohn Baldwin	xorps	%xmm10,%xmm2
2046bc3d5698SJohn Baldwin	movups	%xmm2,-16(%rsi)
2047bc3d5698SJohn Baldwin
2048bc3d5698SJohn Baldwin.Lxts_enc_ret:
2049bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm0
2050bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm1
2051bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm2
2052bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm3
2053bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm4
2054bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm5
2055bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm6
2056bc3d5698SJohn Baldwin	pxor	%xmm7,%xmm7
2057bc3d5698SJohn Baldwin	movaps	%xmm0,0(%rsp)
2058bc3d5698SJohn Baldwin	pxor	%xmm8,%xmm8
2059bc3d5698SJohn Baldwin	movaps	%xmm0,16(%rsp)
2060bc3d5698SJohn Baldwin	pxor	%xmm9,%xmm9
2061bc3d5698SJohn Baldwin	movaps	%xmm0,32(%rsp)
2062bc3d5698SJohn Baldwin	pxor	%xmm10,%xmm10
2063bc3d5698SJohn Baldwin	movaps	%xmm0,48(%rsp)
2064bc3d5698SJohn Baldwin	pxor	%xmm11,%xmm11
2065bc3d5698SJohn Baldwin	movaps	%xmm0,64(%rsp)
2066bc3d5698SJohn Baldwin	pxor	%xmm12,%xmm12
2067bc3d5698SJohn Baldwin	movaps	%xmm0,80(%rsp)
2068bc3d5698SJohn Baldwin	pxor	%xmm13,%xmm13
2069bc3d5698SJohn Baldwin	movaps	%xmm0,96(%rsp)
2070bc3d5698SJohn Baldwin	pxor	%xmm14,%xmm14
2071bc3d5698SJohn Baldwin	pxor	%xmm15,%xmm15
2072bc3d5698SJohn Baldwin	movq	-8(%r11),%rbp
2073bc3d5698SJohn Baldwin.cfi_restore	%rbp
2074bc3d5698SJohn Baldwin	leaq	(%r11),%rsp
2075bc3d5698SJohn Baldwin.cfi_def_cfa_register	%rsp
2076bc3d5698SJohn Baldwin.Lxts_enc_epilogue:
2077bc3d5698SJohn Baldwin	.byte	0xf3,0xc3
2078bc3d5698SJohn Baldwin.cfi_endproc
2079bc3d5698SJohn Baldwin.size	aesni_xts_encrypt,.-aesni_xts_encrypt
2080bc3d5698SJohn Baldwin.globl	aesni_xts_decrypt
2081bc3d5698SJohn Baldwin.type	aesni_xts_decrypt,@function
2082bc3d5698SJohn Baldwin.align	16
2083bc3d5698SJohn Baldwinaesni_xts_decrypt:
2084bc3d5698SJohn Baldwin.cfi_startproc
2085*c0855eaaSJohn Baldwin.byte	243,15,30,250
2086bc3d5698SJohn Baldwin	leaq	(%rsp),%r11
2087bc3d5698SJohn Baldwin.cfi_def_cfa_register	%r11
2088bc3d5698SJohn Baldwin	pushq	%rbp
2089bc3d5698SJohn Baldwin.cfi_offset	%rbp,-16
2090bc3d5698SJohn Baldwin	subq	$112,%rsp
2091bc3d5698SJohn Baldwin	andq	$-16,%rsp
2092bc3d5698SJohn Baldwin	movups	(%r9),%xmm2
2093bc3d5698SJohn Baldwin	movl	240(%r8),%eax
2094bc3d5698SJohn Baldwin	movl	240(%rcx),%r10d
2095bc3d5698SJohn Baldwin	movups	(%r8),%xmm0
2096bc3d5698SJohn Baldwin	movups	16(%r8),%xmm1
2097bc3d5698SJohn Baldwin	leaq	32(%r8),%r8
2098bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
2099bc3d5698SJohn Baldwin.Loop_enc1_11:
2100bc3d5698SJohn Baldwin.byte	102,15,56,220,209
2101bc3d5698SJohn Baldwin	decl	%eax
2102bc3d5698SJohn Baldwin	movups	(%r8),%xmm1
2103bc3d5698SJohn Baldwin	leaq	16(%r8),%r8
2104bc3d5698SJohn Baldwin	jnz	.Loop_enc1_11
2105bc3d5698SJohn Baldwin.byte	102,15,56,221,209
2106bc3d5698SJohn Baldwin	xorl	%eax,%eax
2107bc3d5698SJohn Baldwin	testq	$15,%rdx
2108bc3d5698SJohn Baldwin	setnz	%al
2109bc3d5698SJohn Baldwin	shlq	$4,%rax
2110bc3d5698SJohn Baldwin	subq	%rax,%rdx
2111bc3d5698SJohn Baldwin
2112bc3d5698SJohn Baldwin	movups	(%rcx),%xmm0
2113bc3d5698SJohn Baldwin	movq	%rcx,%rbp
2114bc3d5698SJohn Baldwin	movl	%r10d,%eax
2115bc3d5698SJohn Baldwin	shll	$4,%r10d
2116bc3d5698SJohn Baldwin	movq	%rdx,%r9
2117bc3d5698SJohn Baldwin	andq	$-16,%rdx
2118bc3d5698SJohn Baldwin
2119bc3d5698SJohn Baldwin	movups	16(%rcx,%r10,1),%xmm1
2120bc3d5698SJohn Baldwin
2121bc3d5698SJohn Baldwin	movdqa	.Lxts_magic(%rip),%xmm8
2122bc3d5698SJohn Baldwin	movdqa	%xmm2,%xmm15
2123bc3d5698SJohn Baldwin	pshufd	$0x5f,%xmm2,%xmm9
2124bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm1
2125bc3d5698SJohn Baldwin	movdqa	%xmm9,%xmm14
2126bc3d5698SJohn Baldwin	paddd	%xmm9,%xmm9
2127bc3d5698SJohn Baldwin	movdqa	%xmm15,%xmm10
2128bc3d5698SJohn Baldwin	psrad	$31,%xmm14
2129bc3d5698SJohn Baldwin	paddq	%xmm15,%xmm15
2130bc3d5698SJohn Baldwin	pand	%xmm8,%xmm14
2131bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm10
2132bc3d5698SJohn Baldwin	pxor	%xmm14,%xmm15
2133bc3d5698SJohn Baldwin	movdqa	%xmm9,%xmm14
2134bc3d5698SJohn Baldwin	paddd	%xmm9,%xmm9
2135bc3d5698SJohn Baldwin	movdqa	%xmm15,%xmm11
2136bc3d5698SJohn Baldwin	psrad	$31,%xmm14
2137bc3d5698SJohn Baldwin	paddq	%xmm15,%xmm15
2138bc3d5698SJohn Baldwin	pand	%xmm8,%xmm14
2139bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm11
2140bc3d5698SJohn Baldwin	pxor	%xmm14,%xmm15
2141bc3d5698SJohn Baldwin	movdqa	%xmm9,%xmm14
2142bc3d5698SJohn Baldwin	paddd	%xmm9,%xmm9
2143bc3d5698SJohn Baldwin	movdqa	%xmm15,%xmm12
2144bc3d5698SJohn Baldwin	psrad	$31,%xmm14
2145bc3d5698SJohn Baldwin	paddq	%xmm15,%xmm15
2146bc3d5698SJohn Baldwin	pand	%xmm8,%xmm14
2147bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm12
2148bc3d5698SJohn Baldwin	pxor	%xmm14,%xmm15
2149bc3d5698SJohn Baldwin	movdqa	%xmm9,%xmm14
2150bc3d5698SJohn Baldwin	paddd	%xmm9,%xmm9
2151bc3d5698SJohn Baldwin	movdqa	%xmm15,%xmm13
2152bc3d5698SJohn Baldwin	psrad	$31,%xmm14
2153bc3d5698SJohn Baldwin	paddq	%xmm15,%xmm15
2154bc3d5698SJohn Baldwin	pand	%xmm8,%xmm14
2155bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm13
2156bc3d5698SJohn Baldwin	pxor	%xmm14,%xmm15
2157bc3d5698SJohn Baldwin	movdqa	%xmm15,%xmm14
2158bc3d5698SJohn Baldwin	psrad	$31,%xmm9
2159bc3d5698SJohn Baldwin	paddq	%xmm15,%xmm15
2160bc3d5698SJohn Baldwin	pand	%xmm8,%xmm9
2161bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm14
2162bc3d5698SJohn Baldwin	pxor	%xmm9,%xmm15
2163bc3d5698SJohn Baldwin	movaps	%xmm1,96(%rsp)
2164bc3d5698SJohn Baldwin
2165bc3d5698SJohn Baldwin	subq	$96,%rdx
2166bc3d5698SJohn Baldwin	jc	.Lxts_dec_short
2167bc3d5698SJohn Baldwin
2168bc3d5698SJohn Baldwin	movl	$16+96,%eax
2169bc3d5698SJohn Baldwin	leaq	32(%rbp,%r10,1),%rcx
2170bc3d5698SJohn Baldwin	subq	%r10,%rax
2171bc3d5698SJohn Baldwin	movups	16(%rbp),%xmm1
2172bc3d5698SJohn Baldwin	movq	%rax,%r10
2173bc3d5698SJohn Baldwin	leaq	.Lxts_magic(%rip),%r8
2174bc3d5698SJohn Baldwin	jmp	.Lxts_dec_grandloop
2175bc3d5698SJohn Baldwin
2176bc3d5698SJohn Baldwin.align	32
2177bc3d5698SJohn Baldwin.Lxts_dec_grandloop:
2178bc3d5698SJohn Baldwin	movdqu	0(%rdi),%xmm2
2179bc3d5698SJohn Baldwin	movdqa	%xmm0,%xmm8
2180bc3d5698SJohn Baldwin	movdqu	16(%rdi),%xmm3
2181bc3d5698SJohn Baldwin	pxor	%xmm10,%xmm2
2182bc3d5698SJohn Baldwin	movdqu	32(%rdi),%xmm4
2183bc3d5698SJohn Baldwin	pxor	%xmm11,%xmm3
2184bc3d5698SJohn Baldwin.byte	102,15,56,222,209
2185bc3d5698SJohn Baldwin	movdqu	48(%rdi),%xmm5
2186bc3d5698SJohn Baldwin	pxor	%xmm12,%xmm4
2187bc3d5698SJohn Baldwin.byte	102,15,56,222,217
2188bc3d5698SJohn Baldwin	movdqu	64(%rdi),%xmm6
2189bc3d5698SJohn Baldwin	pxor	%xmm13,%xmm5
2190bc3d5698SJohn Baldwin.byte	102,15,56,222,225
2191bc3d5698SJohn Baldwin	movdqu	80(%rdi),%xmm7
2192bc3d5698SJohn Baldwin	pxor	%xmm15,%xmm8
2193bc3d5698SJohn Baldwin	movdqa	96(%rsp),%xmm9
2194bc3d5698SJohn Baldwin	pxor	%xmm14,%xmm6
2195bc3d5698SJohn Baldwin.byte	102,15,56,222,233
2196bc3d5698SJohn Baldwin	movups	32(%rbp),%xmm0
2197bc3d5698SJohn Baldwin	leaq	96(%rdi),%rdi
2198bc3d5698SJohn Baldwin	pxor	%xmm8,%xmm7
2199bc3d5698SJohn Baldwin
2200bc3d5698SJohn Baldwin	pxor	%xmm9,%xmm10
2201bc3d5698SJohn Baldwin.byte	102,15,56,222,241
2202bc3d5698SJohn Baldwin	pxor	%xmm9,%xmm11
2203bc3d5698SJohn Baldwin	movdqa	%xmm10,0(%rsp)
2204bc3d5698SJohn Baldwin.byte	102,15,56,222,249
2205bc3d5698SJohn Baldwin	movups	48(%rbp),%xmm1
2206bc3d5698SJohn Baldwin	pxor	%xmm9,%xmm12
2207bc3d5698SJohn Baldwin
2208bc3d5698SJohn Baldwin.byte	102,15,56,222,208
2209bc3d5698SJohn Baldwin	pxor	%xmm9,%xmm13
2210bc3d5698SJohn Baldwin	movdqa	%xmm11,16(%rsp)
2211bc3d5698SJohn Baldwin.byte	102,15,56,222,216
2212bc3d5698SJohn Baldwin	pxor	%xmm9,%xmm14
2213bc3d5698SJohn Baldwin	movdqa	%xmm12,32(%rsp)
2214bc3d5698SJohn Baldwin.byte	102,15,56,222,224
2215bc3d5698SJohn Baldwin.byte	102,15,56,222,232
2216bc3d5698SJohn Baldwin	pxor	%xmm9,%xmm8
2217bc3d5698SJohn Baldwin	movdqa	%xmm14,64(%rsp)
2218bc3d5698SJohn Baldwin.byte	102,15,56,222,240
2219bc3d5698SJohn Baldwin.byte	102,15,56,222,248
2220bc3d5698SJohn Baldwin	movups	64(%rbp),%xmm0
2221bc3d5698SJohn Baldwin	movdqa	%xmm8,80(%rsp)
2222bc3d5698SJohn Baldwin	pshufd	$0x5f,%xmm15,%xmm9
2223bc3d5698SJohn Baldwin	jmp	.Lxts_dec_loop6
2224bc3d5698SJohn Baldwin.align	32
2225bc3d5698SJohn Baldwin.Lxts_dec_loop6:
2226bc3d5698SJohn Baldwin.byte	102,15,56,222,209
2227bc3d5698SJohn Baldwin.byte	102,15,56,222,217
2228bc3d5698SJohn Baldwin.byte	102,15,56,222,225
2229bc3d5698SJohn Baldwin.byte	102,15,56,222,233
2230bc3d5698SJohn Baldwin.byte	102,15,56,222,241
2231bc3d5698SJohn Baldwin.byte	102,15,56,222,249
2232bc3d5698SJohn Baldwin	movups	-64(%rcx,%rax,1),%xmm1
2233bc3d5698SJohn Baldwin	addq	$32,%rax
2234bc3d5698SJohn Baldwin
2235bc3d5698SJohn Baldwin.byte	102,15,56,222,208
2236bc3d5698SJohn Baldwin.byte	102,15,56,222,216
2237bc3d5698SJohn Baldwin.byte	102,15,56,222,224
2238bc3d5698SJohn Baldwin.byte	102,15,56,222,232
2239bc3d5698SJohn Baldwin.byte	102,15,56,222,240
2240bc3d5698SJohn Baldwin.byte	102,15,56,222,248
2241bc3d5698SJohn Baldwin	movups	-80(%rcx,%rax,1),%xmm0
2242bc3d5698SJohn Baldwin	jnz	.Lxts_dec_loop6
2243bc3d5698SJohn Baldwin
2244bc3d5698SJohn Baldwin	movdqa	(%r8),%xmm8
2245bc3d5698SJohn Baldwin	movdqa	%xmm9,%xmm14
2246bc3d5698SJohn Baldwin	paddd	%xmm9,%xmm9
2247bc3d5698SJohn Baldwin.byte	102,15,56,222,209
2248bc3d5698SJohn Baldwin	paddq	%xmm15,%xmm15
2249bc3d5698SJohn Baldwin	psrad	$31,%xmm14
2250bc3d5698SJohn Baldwin.byte	102,15,56,222,217
2251bc3d5698SJohn Baldwin	pand	%xmm8,%xmm14
2252bc3d5698SJohn Baldwin	movups	(%rbp),%xmm10
2253bc3d5698SJohn Baldwin.byte	102,15,56,222,225
2254bc3d5698SJohn Baldwin.byte	102,15,56,222,233
2255bc3d5698SJohn Baldwin.byte	102,15,56,222,241
2256bc3d5698SJohn Baldwin	pxor	%xmm14,%xmm15
2257bc3d5698SJohn Baldwin	movaps	%xmm10,%xmm11
2258bc3d5698SJohn Baldwin.byte	102,15,56,222,249
2259bc3d5698SJohn Baldwin	movups	-64(%rcx),%xmm1
2260bc3d5698SJohn Baldwin
2261bc3d5698SJohn Baldwin	movdqa	%xmm9,%xmm14
2262bc3d5698SJohn Baldwin.byte	102,15,56,222,208
2263bc3d5698SJohn Baldwin	paddd	%xmm9,%xmm9
2264bc3d5698SJohn Baldwin	pxor	%xmm15,%xmm10
2265bc3d5698SJohn Baldwin.byte	102,15,56,222,216
2266bc3d5698SJohn Baldwin	psrad	$31,%xmm14
2267bc3d5698SJohn Baldwin	paddq	%xmm15,%xmm15
2268bc3d5698SJohn Baldwin.byte	102,15,56,222,224
2269bc3d5698SJohn Baldwin.byte	102,15,56,222,232
2270bc3d5698SJohn Baldwin	pand	%xmm8,%xmm14
2271bc3d5698SJohn Baldwin	movaps	%xmm11,%xmm12
2272bc3d5698SJohn Baldwin.byte	102,15,56,222,240
2273bc3d5698SJohn Baldwin	pxor	%xmm14,%xmm15
2274bc3d5698SJohn Baldwin	movdqa	%xmm9,%xmm14
2275bc3d5698SJohn Baldwin.byte	102,15,56,222,248
2276bc3d5698SJohn Baldwin	movups	-48(%rcx),%xmm0
2277bc3d5698SJohn Baldwin
2278bc3d5698SJohn Baldwin	paddd	%xmm9,%xmm9
2279bc3d5698SJohn Baldwin.byte	102,15,56,222,209
2280bc3d5698SJohn Baldwin	pxor	%xmm15,%xmm11
2281bc3d5698SJohn Baldwin	psrad	$31,%xmm14
2282bc3d5698SJohn Baldwin.byte	102,15,56,222,217
2283bc3d5698SJohn Baldwin	paddq	%xmm15,%xmm15
2284bc3d5698SJohn Baldwin	pand	%xmm8,%xmm14
2285bc3d5698SJohn Baldwin.byte	102,15,56,222,225
2286bc3d5698SJohn Baldwin.byte	102,15,56,222,233
2287bc3d5698SJohn Baldwin	movdqa	%xmm13,48(%rsp)
2288bc3d5698SJohn Baldwin	pxor	%xmm14,%xmm15
2289bc3d5698SJohn Baldwin.byte	102,15,56,222,241
2290bc3d5698SJohn Baldwin	movaps	%xmm12,%xmm13
2291bc3d5698SJohn Baldwin	movdqa	%xmm9,%xmm14
2292bc3d5698SJohn Baldwin.byte	102,15,56,222,249
2293bc3d5698SJohn Baldwin	movups	-32(%rcx),%xmm1
2294bc3d5698SJohn Baldwin
2295bc3d5698SJohn Baldwin	paddd	%xmm9,%xmm9
2296bc3d5698SJohn Baldwin.byte	102,15,56,222,208
2297bc3d5698SJohn Baldwin	pxor	%xmm15,%xmm12
2298bc3d5698SJohn Baldwin	psrad	$31,%xmm14
2299bc3d5698SJohn Baldwin.byte	102,15,56,222,216
2300bc3d5698SJohn Baldwin	paddq	%xmm15,%xmm15
2301bc3d5698SJohn Baldwin	pand	%xmm8,%xmm14
2302bc3d5698SJohn Baldwin.byte	102,15,56,222,224
2303bc3d5698SJohn Baldwin.byte	102,15,56,222,232
2304bc3d5698SJohn Baldwin.byte	102,15,56,222,240
2305bc3d5698SJohn Baldwin	pxor	%xmm14,%xmm15
2306bc3d5698SJohn Baldwin	movaps	%xmm13,%xmm14
2307bc3d5698SJohn Baldwin.byte	102,15,56,222,248
2308bc3d5698SJohn Baldwin
2309bc3d5698SJohn Baldwin	movdqa	%xmm9,%xmm0
2310bc3d5698SJohn Baldwin	paddd	%xmm9,%xmm9
2311bc3d5698SJohn Baldwin.byte	102,15,56,222,209
2312bc3d5698SJohn Baldwin	pxor	%xmm15,%xmm13
2313bc3d5698SJohn Baldwin	psrad	$31,%xmm0
2314bc3d5698SJohn Baldwin.byte	102,15,56,222,217
2315bc3d5698SJohn Baldwin	paddq	%xmm15,%xmm15
2316bc3d5698SJohn Baldwin	pand	%xmm8,%xmm0
2317bc3d5698SJohn Baldwin.byte	102,15,56,222,225
2318bc3d5698SJohn Baldwin.byte	102,15,56,222,233
2319bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm15
2320bc3d5698SJohn Baldwin	movups	(%rbp),%xmm0
2321bc3d5698SJohn Baldwin.byte	102,15,56,222,241
2322bc3d5698SJohn Baldwin.byte	102,15,56,222,249
2323bc3d5698SJohn Baldwin	movups	16(%rbp),%xmm1
2324bc3d5698SJohn Baldwin
2325bc3d5698SJohn Baldwin	pxor	%xmm15,%xmm14
2326bc3d5698SJohn Baldwin.byte	102,15,56,223,84,36,0
2327bc3d5698SJohn Baldwin	psrad	$31,%xmm9
2328bc3d5698SJohn Baldwin	paddq	%xmm15,%xmm15
2329bc3d5698SJohn Baldwin.byte	102,15,56,223,92,36,16
2330bc3d5698SJohn Baldwin.byte	102,15,56,223,100,36,32
2331bc3d5698SJohn Baldwin	pand	%xmm8,%xmm9
2332bc3d5698SJohn Baldwin	movq	%r10,%rax
2333bc3d5698SJohn Baldwin.byte	102,15,56,223,108,36,48
2334bc3d5698SJohn Baldwin.byte	102,15,56,223,116,36,64
2335bc3d5698SJohn Baldwin.byte	102,15,56,223,124,36,80
2336bc3d5698SJohn Baldwin	pxor	%xmm9,%xmm15
2337bc3d5698SJohn Baldwin
2338bc3d5698SJohn Baldwin	leaq	96(%rsi),%rsi
2339bc3d5698SJohn Baldwin	movups	%xmm2,-96(%rsi)
2340bc3d5698SJohn Baldwin	movups	%xmm3,-80(%rsi)
2341bc3d5698SJohn Baldwin	movups	%xmm4,-64(%rsi)
2342bc3d5698SJohn Baldwin	movups	%xmm5,-48(%rsi)
2343bc3d5698SJohn Baldwin	movups	%xmm6,-32(%rsi)
2344bc3d5698SJohn Baldwin	movups	%xmm7,-16(%rsi)
2345bc3d5698SJohn Baldwin	subq	$96,%rdx
2346bc3d5698SJohn Baldwin	jnc	.Lxts_dec_grandloop
2347bc3d5698SJohn Baldwin
2348bc3d5698SJohn Baldwin	movl	$16+96,%eax
2349bc3d5698SJohn Baldwin	subl	%r10d,%eax
2350bc3d5698SJohn Baldwin	movq	%rbp,%rcx
2351bc3d5698SJohn Baldwin	shrl	$4,%eax
2352bc3d5698SJohn Baldwin
2353bc3d5698SJohn Baldwin.Lxts_dec_short:
2354bc3d5698SJohn Baldwin
2355bc3d5698SJohn Baldwin	movl	%eax,%r10d
2356bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm10
2357bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm11
2358bc3d5698SJohn Baldwin	addq	$96,%rdx
2359bc3d5698SJohn Baldwin	jz	.Lxts_dec_done
2360bc3d5698SJohn Baldwin
2361bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm12
2362bc3d5698SJohn Baldwin	cmpq	$0x20,%rdx
2363bc3d5698SJohn Baldwin	jb	.Lxts_dec_one
2364bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm13
2365bc3d5698SJohn Baldwin	je	.Lxts_dec_two
2366bc3d5698SJohn Baldwin
2367bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm14
2368bc3d5698SJohn Baldwin	cmpq	$0x40,%rdx
2369bc3d5698SJohn Baldwin	jb	.Lxts_dec_three
2370bc3d5698SJohn Baldwin	je	.Lxts_dec_four
2371bc3d5698SJohn Baldwin
2372bc3d5698SJohn Baldwin	movdqu	(%rdi),%xmm2
2373bc3d5698SJohn Baldwin	movdqu	16(%rdi),%xmm3
2374bc3d5698SJohn Baldwin	movdqu	32(%rdi),%xmm4
2375bc3d5698SJohn Baldwin	pxor	%xmm10,%xmm2
2376bc3d5698SJohn Baldwin	movdqu	48(%rdi),%xmm5
2377bc3d5698SJohn Baldwin	pxor	%xmm11,%xmm3
2378bc3d5698SJohn Baldwin	movdqu	64(%rdi),%xmm6
2379bc3d5698SJohn Baldwin	leaq	80(%rdi),%rdi
2380bc3d5698SJohn Baldwin	pxor	%xmm12,%xmm4
2381bc3d5698SJohn Baldwin	pxor	%xmm13,%xmm5
2382bc3d5698SJohn Baldwin	pxor	%xmm14,%xmm6
2383bc3d5698SJohn Baldwin
2384bc3d5698SJohn Baldwin	call	_aesni_decrypt6
2385bc3d5698SJohn Baldwin
2386bc3d5698SJohn Baldwin	xorps	%xmm10,%xmm2
2387bc3d5698SJohn Baldwin	xorps	%xmm11,%xmm3
2388bc3d5698SJohn Baldwin	xorps	%xmm12,%xmm4
2389bc3d5698SJohn Baldwin	movdqu	%xmm2,(%rsi)
2390bc3d5698SJohn Baldwin	xorps	%xmm13,%xmm5
2391bc3d5698SJohn Baldwin	movdqu	%xmm3,16(%rsi)
2392bc3d5698SJohn Baldwin	xorps	%xmm14,%xmm6
2393bc3d5698SJohn Baldwin	movdqu	%xmm4,32(%rsi)
2394bc3d5698SJohn Baldwin	pxor	%xmm14,%xmm14
2395bc3d5698SJohn Baldwin	movdqu	%xmm5,48(%rsi)
2396bc3d5698SJohn Baldwin	pcmpgtd	%xmm15,%xmm14
2397bc3d5698SJohn Baldwin	movdqu	%xmm6,64(%rsi)
2398bc3d5698SJohn Baldwin	leaq	80(%rsi),%rsi
2399bc3d5698SJohn Baldwin	pshufd	$0x13,%xmm14,%xmm11
2400bc3d5698SJohn Baldwin	andq	$15,%r9
2401bc3d5698SJohn Baldwin	jz	.Lxts_dec_ret
2402bc3d5698SJohn Baldwin
2403bc3d5698SJohn Baldwin	movdqa	%xmm15,%xmm10
2404bc3d5698SJohn Baldwin	paddq	%xmm15,%xmm15
2405bc3d5698SJohn Baldwin	pand	%xmm8,%xmm11
2406bc3d5698SJohn Baldwin	pxor	%xmm15,%xmm11
2407bc3d5698SJohn Baldwin	jmp	.Lxts_dec_done2
2408bc3d5698SJohn Baldwin
2409bc3d5698SJohn Baldwin.align	16
2410bc3d5698SJohn Baldwin.Lxts_dec_one:
2411bc3d5698SJohn Baldwin	movups	(%rdi),%xmm2
2412bc3d5698SJohn Baldwin	leaq	16(%rdi),%rdi
2413bc3d5698SJohn Baldwin	xorps	%xmm10,%xmm2
2414bc3d5698SJohn Baldwin	movups	(%rcx),%xmm0
2415bc3d5698SJohn Baldwin	movups	16(%rcx),%xmm1
2416bc3d5698SJohn Baldwin	leaq	32(%rcx),%rcx
2417bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
2418bc3d5698SJohn Baldwin.Loop_dec1_12:
2419bc3d5698SJohn Baldwin.byte	102,15,56,222,209
2420bc3d5698SJohn Baldwin	decl	%eax
2421bc3d5698SJohn Baldwin	movups	(%rcx),%xmm1
2422bc3d5698SJohn Baldwin	leaq	16(%rcx),%rcx
2423bc3d5698SJohn Baldwin	jnz	.Loop_dec1_12
2424bc3d5698SJohn Baldwin.byte	102,15,56,223,209
2425bc3d5698SJohn Baldwin	xorps	%xmm10,%xmm2
2426bc3d5698SJohn Baldwin	movdqa	%xmm11,%xmm10
2427bc3d5698SJohn Baldwin	movups	%xmm2,(%rsi)
2428bc3d5698SJohn Baldwin	movdqa	%xmm12,%xmm11
2429bc3d5698SJohn Baldwin	leaq	16(%rsi),%rsi
2430bc3d5698SJohn Baldwin	jmp	.Lxts_dec_done
2431bc3d5698SJohn Baldwin
2432bc3d5698SJohn Baldwin.align	16
2433bc3d5698SJohn Baldwin.Lxts_dec_two:
2434bc3d5698SJohn Baldwin	movups	(%rdi),%xmm2
2435bc3d5698SJohn Baldwin	movups	16(%rdi),%xmm3
2436bc3d5698SJohn Baldwin	leaq	32(%rdi),%rdi
2437bc3d5698SJohn Baldwin	xorps	%xmm10,%xmm2
2438bc3d5698SJohn Baldwin	xorps	%xmm11,%xmm3
2439bc3d5698SJohn Baldwin
2440bc3d5698SJohn Baldwin	call	_aesni_decrypt2
2441bc3d5698SJohn Baldwin
2442bc3d5698SJohn Baldwin	xorps	%xmm10,%xmm2
2443bc3d5698SJohn Baldwin	movdqa	%xmm12,%xmm10
2444bc3d5698SJohn Baldwin	xorps	%xmm11,%xmm3
2445bc3d5698SJohn Baldwin	movdqa	%xmm13,%xmm11
2446bc3d5698SJohn Baldwin	movups	%xmm2,(%rsi)
2447bc3d5698SJohn Baldwin	movups	%xmm3,16(%rsi)
2448bc3d5698SJohn Baldwin	leaq	32(%rsi),%rsi
2449bc3d5698SJohn Baldwin	jmp	.Lxts_dec_done
2450bc3d5698SJohn Baldwin
2451bc3d5698SJohn Baldwin.align	16
2452bc3d5698SJohn Baldwin.Lxts_dec_three:
2453bc3d5698SJohn Baldwin	movups	(%rdi),%xmm2
2454bc3d5698SJohn Baldwin	movups	16(%rdi),%xmm3
2455bc3d5698SJohn Baldwin	movups	32(%rdi),%xmm4
2456bc3d5698SJohn Baldwin	leaq	48(%rdi),%rdi
2457bc3d5698SJohn Baldwin	xorps	%xmm10,%xmm2
2458bc3d5698SJohn Baldwin	xorps	%xmm11,%xmm3
2459bc3d5698SJohn Baldwin	xorps	%xmm12,%xmm4
2460bc3d5698SJohn Baldwin
2461bc3d5698SJohn Baldwin	call	_aesni_decrypt3
2462bc3d5698SJohn Baldwin
2463bc3d5698SJohn Baldwin	xorps	%xmm10,%xmm2
2464bc3d5698SJohn Baldwin	movdqa	%xmm13,%xmm10
2465bc3d5698SJohn Baldwin	xorps	%xmm11,%xmm3
2466bc3d5698SJohn Baldwin	movdqa	%xmm14,%xmm11
2467bc3d5698SJohn Baldwin	xorps	%xmm12,%xmm4
2468bc3d5698SJohn Baldwin	movups	%xmm2,(%rsi)
2469bc3d5698SJohn Baldwin	movups	%xmm3,16(%rsi)
2470bc3d5698SJohn Baldwin	movups	%xmm4,32(%rsi)
2471bc3d5698SJohn Baldwin	leaq	48(%rsi),%rsi
2472bc3d5698SJohn Baldwin	jmp	.Lxts_dec_done
2473bc3d5698SJohn Baldwin
2474bc3d5698SJohn Baldwin.align	16
2475bc3d5698SJohn Baldwin.Lxts_dec_four:
2476bc3d5698SJohn Baldwin	movups	(%rdi),%xmm2
2477bc3d5698SJohn Baldwin	movups	16(%rdi),%xmm3
2478bc3d5698SJohn Baldwin	movups	32(%rdi),%xmm4
2479bc3d5698SJohn Baldwin	xorps	%xmm10,%xmm2
2480bc3d5698SJohn Baldwin	movups	48(%rdi),%xmm5
2481bc3d5698SJohn Baldwin	leaq	64(%rdi),%rdi
2482bc3d5698SJohn Baldwin	xorps	%xmm11,%xmm3
2483bc3d5698SJohn Baldwin	xorps	%xmm12,%xmm4
2484bc3d5698SJohn Baldwin	xorps	%xmm13,%xmm5
2485bc3d5698SJohn Baldwin
2486bc3d5698SJohn Baldwin	call	_aesni_decrypt4
2487bc3d5698SJohn Baldwin
2488bc3d5698SJohn Baldwin	pxor	%xmm10,%xmm2
2489bc3d5698SJohn Baldwin	movdqa	%xmm14,%xmm10
2490bc3d5698SJohn Baldwin	pxor	%xmm11,%xmm3
2491bc3d5698SJohn Baldwin	movdqa	%xmm15,%xmm11
2492bc3d5698SJohn Baldwin	pxor	%xmm12,%xmm4
2493bc3d5698SJohn Baldwin	movdqu	%xmm2,(%rsi)
2494bc3d5698SJohn Baldwin	pxor	%xmm13,%xmm5
2495bc3d5698SJohn Baldwin	movdqu	%xmm3,16(%rsi)
2496bc3d5698SJohn Baldwin	movdqu	%xmm4,32(%rsi)
2497bc3d5698SJohn Baldwin	movdqu	%xmm5,48(%rsi)
2498bc3d5698SJohn Baldwin	leaq	64(%rsi),%rsi
2499bc3d5698SJohn Baldwin	jmp	.Lxts_dec_done
2500bc3d5698SJohn Baldwin
2501bc3d5698SJohn Baldwin.align	16
2502bc3d5698SJohn Baldwin.Lxts_dec_done:
2503bc3d5698SJohn Baldwin	andq	$15,%r9
2504bc3d5698SJohn Baldwin	jz	.Lxts_dec_ret
2505bc3d5698SJohn Baldwin.Lxts_dec_done2:
2506bc3d5698SJohn Baldwin	movq	%r9,%rdx
2507bc3d5698SJohn Baldwin	movq	%rbp,%rcx
2508bc3d5698SJohn Baldwin	movl	%r10d,%eax
2509bc3d5698SJohn Baldwin
2510bc3d5698SJohn Baldwin	movups	(%rdi),%xmm2
2511bc3d5698SJohn Baldwin	xorps	%xmm11,%xmm2
2512bc3d5698SJohn Baldwin	movups	(%rcx),%xmm0
2513bc3d5698SJohn Baldwin	movups	16(%rcx),%xmm1
2514bc3d5698SJohn Baldwin	leaq	32(%rcx),%rcx
2515bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
2516bc3d5698SJohn Baldwin.Loop_dec1_13:
2517bc3d5698SJohn Baldwin.byte	102,15,56,222,209
2518bc3d5698SJohn Baldwin	decl	%eax
2519bc3d5698SJohn Baldwin	movups	(%rcx),%xmm1
2520bc3d5698SJohn Baldwin	leaq	16(%rcx),%rcx
2521bc3d5698SJohn Baldwin	jnz	.Loop_dec1_13
2522bc3d5698SJohn Baldwin.byte	102,15,56,223,209
2523bc3d5698SJohn Baldwin	xorps	%xmm11,%xmm2
2524bc3d5698SJohn Baldwin	movups	%xmm2,(%rsi)
2525bc3d5698SJohn Baldwin
2526bc3d5698SJohn Baldwin.Lxts_dec_steal:
2527bc3d5698SJohn Baldwin	movzbl	16(%rdi),%eax
2528bc3d5698SJohn Baldwin	movzbl	(%rsi),%ecx
2529bc3d5698SJohn Baldwin	leaq	1(%rdi),%rdi
2530bc3d5698SJohn Baldwin	movb	%al,(%rsi)
2531bc3d5698SJohn Baldwin	movb	%cl,16(%rsi)
2532bc3d5698SJohn Baldwin	leaq	1(%rsi),%rsi
2533bc3d5698SJohn Baldwin	subq	$1,%rdx
2534bc3d5698SJohn Baldwin	jnz	.Lxts_dec_steal
2535bc3d5698SJohn Baldwin
2536bc3d5698SJohn Baldwin	subq	%r9,%rsi
2537bc3d5698SJohn Baldwin	movq	%rbp,%rcx
2538bc3d5698SJohn Baldwin	movl	%r10d,%eax
2539bc3d5698SJohn Baldwin
2540bc3d5698SJohn Baldwin	movups	(%rsi),%xmm2
2541bc3d5698SJohn Baldwin	xorps	%xmm10,%xmm2
2542bc3d5698SJohn Baldwin	movups	(%rcx),%xmm0
2543bc3d5698SJohn Baldwin	movups	16(%rcx),%xmm1
2544bc3d5698SJohn Baldwin	leaq	32(%rcx),%rcx
2545bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
2546bc3d5698SJohn Baldwin.Loop_dec1_14:
2547bc3d5698SJohn Baldwin.byte	102,15,56,222,209
2548bc3d5698SJohn Baldwin	decl	%eax
2549bc3d5698SJohn Baldwin	movups	(%rcx),%xmm1
2550bc3d5698SJohn Baldwin	leaq	16(%rcx),%rcx
2551bc3d5698SJohn Baldwin	jnz	.Loop_dec1_14
2552bc3d5698SJohn Baldwin.byte	102,15,56,223,209
2553bc3d5698SJohn Baldwin	xorps	%xmm10,%xmm2
2554bc3d5698SJohn Baldwin	movups	%xmm2,(%rsi)
2555bc3d5698SJohn Baldwin
2556bc3d5698SJohn Baldwin.Lxts_dec_ret:
2557bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm0
2558bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm1
2559bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm2
2560bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm3
2561bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm4
2562bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm5
2563bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm6
2564bc3d5698SJohn Baldwin	pxor	%xmm7,%xmm7
2565bc3d5698SJohn Baldwin	movaps	%xmm0,0(%rsp)
2566bc3d5698SJohn Baldwin	pxor	%xmm8,%xmm8
2567bc3d5698SJohn Baldwin	movaps	%xmm0,16(%rsp)
2568bc3d5698SJohn Baldwin	pxor	%xmm9,%xmm9
2569bc3d5698SJohn Baldwin	movaps	%xmm0,32(%rsp)
2570bc3d5698SJohn Baldwin	pxor	%xmm10,%xmm10
2571bc3d5698SJohn Baldwin	movaps	%xmm0,48(%rsp)
2572bc3d5698SJohn Baldwin	pxor	%xmm11,%xmm11
2573bc3d5698SJohn Baldwin	movaps	%xmm0,64(%rsp)
2574bc3d5698SJohn Baldwin	pxor	%xmm12,%xmm12
2575bc3d5698SJohn Baldwin	movaps	%xmm0,80(%rsp)
2576bc3d5698SJohn Baldwin	pxor	%xmm13,%xmm13
2577bc3d5698SJohn Baldwin	movaps	%xmm0,96(%rsp)
2578bc3d5698SJohn Baldwin	pxor	%xmm14,%xmm14
2579bc3d5698SJohn Baldwin	pxor	%xmm15,%xmm15
2580bc3d5698SJohn Baldwin	movq	-8(%r11),%rbp
2581bc3d5698SJohn Baldwin.cfi_restore	%rbp
2582bc3d5698SJohn Baldwin	leaq	(%r11),%rsp
2583bc3d5698SJohn Baldwin.cfi_def_cfa_register	%rsp
2584bc3d5698SJohn Baldwin.Lxts_dec_epilogue:
2585bc3d5698SJohn Baldwin	.byte	0xf3,0xc3
2586bc3d5698SJohn Baldwin.cfi_endproc
2587bc3d5698SJohn Baldwin.size	aesni_xts_decrypt,.-aesni_xts_decrypt
2588bc3d5698SJohn Baldwin.globl	aesni_ocb_encrypt
2589bc3d5698SJohn Baldwin.type	aesni_ocb_encrypt,@function
2590bc3d5698SJohn Baldwin.align	32
2591bc3d5698SJohn Baldwinaesni_ocb_encrypt:
2592bc3d5698SJohn Baldwin.cfi_startproc
2593*c0855eaaSJohn Baldwin.byte	243,15,30,250
2594bc3d5698SJohn Baldwin	leaq	(%rsp),%rax
2595bc3d5698SJohn Baldwin	pushq	%rbx
2596bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	8
2597bc3d5698SJohn Baldwin.cfi_offset	%rbx,-16
2598bc3d5698SJohn Baldwin	pushq	%rbp
2599bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	8
2600bc3d5698SJohn Baldwin.cfi_offset	%rbp,-24
2601bc3d5698SJohn Baldwin	pushq	%r12
2602bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	8
2603bc3d5698SJohn Baldwin.cfi_offset	%r12,-32
2604bc3d5698SJohn Baldwin	pushq	%r13
2605bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	8
2606bc3d5698SJohn Baldwin.cfi_offset	%r13,-40
2607bc3d5698SJohn Baldwin	pushq	%r14
2608bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	8
2609bc3d5698SJohn Baldwin.cfi_offset	%r14,-48
2610bc3d5698SJohn Baldwin	movq	8(%rax),%rbx
2611bc3d5698SJohn Baldwin	movq	8+8(%rax),%rbp
2612bc3d5698SJohn Baldwin
2613bc3d5698SJohn Baldwin	movl	240(%rcx),%r10d
2614bc3d5698SJohn Baldwin	movq	%rcx,%r11
2615bc3d5698SJohn Baldwin	shll	$4,%r10d
2616bc3d5698SJohn Baldwin	movups	(%rcx),%xmm9
2617bc3d5698SJohn Baldwin	movups	16(%rcx,%r10,1),%xmm1
2618bc3d5698SJohn Baldwin
2619bc3d5698SJohn Baldwin	movdqu	(%r9),%xmm15
2620bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm9
2621bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm15
2622bc3d5698SJohn Baldwin
2623bc3d5698SJohn Baldwin	movl	$16+32,%eax
2624bc3d5698SJohn Baldwin	leaq	32(%r11,%r10,1),%rcx
2625bc3d5698SJohn Baldwin	movups	16(%r11),%xmm1
2626bc3d5698SJohn Baldwin	subq	%r10,%rax
2627bc3d5698SJohn Baldwin	movq	%rax,%r10
2628bc3d5698SJohn Baldwin
2629bc3d5698SJohn Baldwin	movdqu	(%rbx),%xmm10
2630bc3d5698SJohn Baldwin	movdqu	(%rbp),%xmm8
2631bc3d5698SJohn Baldwin
2632bc3d5698SJohn Baldwin	testq	$1,%r8
2633bc3d5698SJohn Baldwin	jnz	.Locb_enc_odd
2634bc3d5698SJohn Baldwin
2635bc3d5698SJohn Baldwin	bsfq	%r8,%r12
2636bc3d5698SJohn Baldwin	addq	$1,%r8
2637bc3d5698SJohn Baldwin	shlq	$4,%r12
2638bc3d5698SJohn Baldwin	movdqu	(%rbx,%r12,1),%xmm7
2639bc3d5698SJohn Baldwin	movdqu	(%rdi),%xmm2
2640bc3d5698SJohn Baldwin	leaq	16(%rdi),%rdi
2641bc3d5698SJohn Baldwin
2642bc3d5698SJohn Baldwin	call	__ocb_encrypt1
2643bc3d5698SJohn Baldwin
2644bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm15
2645bc3d5698SJohn Baldwin	movups	%xmm2,(%rsi)
2646bc3d5698SJohn Baldwin	leaq	16(%rsi),%rsi
2647bc3d5698SJohn Baldwin	subq	$1,%rdx
2648bc3d5698SJohn Baldwin	jz	.Locb_enc_done
2649bc3d5698SJohn Baldwin
2650bc3d5698SJohn Baldwin.Locb_enc_odd:
2651bc3d5698SJohn Baldwin	leaq	1(%r8),%r12
2652bc3d5698SJohn Baldwin	leaq	3(%r8),%r13
2653bc3d5698SJohn Baldwin	leaq	5(%r8),%r14
2654bc3d5698SJohn Baldwin	leaq	6(%r8),%r8
2655bc3d5698SJohn Baldwin	bsfq	%r12,%r12
2656bc3d5698SJohn Baldwin	bsfq	%r13,%r13
2657bc3d5698SJohn Baldwin	bsfq	%r14,%r14
2658bc3d5698SJohn Baldwin	shlq	$4,%r12
2659bc3d5698SJohn Baldwin	shlq	$4,%r13
2660bc3d5698SJohn Baldwin	shlq	$4,%r14
2661bc3d5698SJohn Baldwin
2662bc3d5698SJohn Baldwin	subq	$6,%rdx
2663bc3d5698SJohn Baldwin	jc	.Locb_enc_short
2664bc3d5698SJohn Baldwin	jmp	.Locb_enc_grandloop
2665bc3d5698SJohn Baldwin
2666bc3d5698SJohn Baldwin.align	32
2667bc3d5698SJohn Baldwin.Locb_enc_grandloop:
2668bc3d5698SJohn Baldwin	movdqu	0(%rdi),%xmm2
2669bc3d5698SJohn Baldwin	movdqu	16(%rdi),%xmm3
2670bc3d5698SJohn Baldwin	movdqu	32(%rdi),%xmm4
2671bc3d5698SJohn Baldwin	movdqu	48(%rdi),%xmm5
2672bc3d5698SJohn Baldwin	movdqu	64(%rdi),%xmm6
2673bc3d5698SJohn Baldwin	movdqu	80(%rdi),%xmm7
2674bc3d5698SJohn Baldwin	leaq	96(%rdi),%rdi
2675bc3d5698SJohn Baldwin
2676bc3d5698SJohn Baldwin	call	__ocb_encrypt6
2677bc3d5698SJohn Baldwin
2678bc3d5698SJohn Baldwin	movups	%xmm2,0(%rsi)
2679bc3d5698SJohn Baldwin	movups	%xmm3,16(%rsi)
2680bc3d5698SJohn Baldwin	movups	%xmm4,32(%rsi)
2681bc3d5698SJohn Baldwin	movups	%xmm5,48(%rsi)
2682bc3d5698SJohn Baldwin	movups	%xmm6,64(%rsi)
2683bc3d5698SJohn Baldwin	movups	%xmm7,80(%rsi)
2684bc3d5698SJohn Baldwin	leaq	96(%rsi),%rsi
2685bc3d5698SJohn Baldwin	subq	$6,%rdx
2686bc3d5698SJohn Baldwin	jnc	.Locb_enc_grandloop
2687bc3d5698SJohn Baldwin
2688bc3d5698SJohn Baldwin.Locb_enc_short:
2689bc3d5698SJohn Baldwin	addq	$6,%rdx
2690bc3d5698SJohn Baldwin	jz	.Locb_enc_done
2691bc3d5698SJohn Baldwin
2692bc3d5698SJohn Baldwin	movdqu	0(%rdi),%xmm2
2693bc3d5698SJohn Baldwin	cmpq	$2,%rdx
2694bc3d5698SJohn Baldwin	jb	.Locb_enc_one
2695bc3d5698SJohn Baldwin	movdqu	16(%rdi),%xmm3
2696bc3d5698SJohn Baldwin	je	.Locb_enc_two
2697bc3d5698SJohn Baldwin
2698bc3d5698SJohn Baldwin	movdqu	32(%rdi),%xmm4
2699bc3d5698SJohn Baldwin	cmpq	$4,%rdx
2700bc3d5698SJohn Baldwin	jb	.Locb_enc_three
2701bc3d5698SJohn Baldwin	movdqu	48(%rdi),%xmm5
2702bc3d5698SJohn Baldwin	je	.Locb_enc_four
2703bc3d5698SJohn Baldwin
2704bc3d5698SJohn Baldwin	movdqu	64(%rdi),%xmm6
2705bc3d5698SJohn Baldwin	pxor	%xmm7,%xmm7
2706bc3d5698SJohn Baldwin
2707bc3d5698SJohn Baldwin	call	__ocb_encrypt6
2708bc3d5698SJohn Baldwin
2709bc3d5698SJohn Baldwin	movdqa	%xmm14,%xmm15
2710bc3d5698SJohn Baldwin	movups	%xmm2,0(%rsi)
2711bc3d5698SJohn Baldwin	movups	%xmm3,16(%rsi)
2712bc3d5698SJohn Baldwin	movups	%xmm4,32(%rsi)
2713bc3d5698SJohn Baldwin	movups	%xmm5,48(%rsi)
2714bc3d5698SJohn Baldwin	movups	%xmm6,64(%rsi)
2715bc3d5698SJohn Baldwin
2716bc3d5698SJohn Baldwin	jmp	.Locb_enc_done
2717bc3d5698SJohn Baldwin
2718bc3d5698SJohn Baldwin.align	16
2719bc3d5698SJohn Baldwin.Locb_enc_one:
2720bc3d5698SJohn Baldwin	movdqa	%xmm10,%xmm7
2721bc3d5698SJohn Baldwin
2722bc3d5698SJohn Baldwin	call	__ocb_encrypt1
2723bc3d5698SJohn Baldwin
2724bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm15
2725bc3d5698SJohn Baldwin	movups	%xmm2,0(%rsi)
2726bc3d5698SJohn Baldwin	jmp	.Locb_enc_done
2727bc3d5698SJohn Baldwin
2728bc3d5698SJohn Baldwin.align	16
2729bc3d5698SJohn Baldwin.Locb_enc_two:
2730bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm4
2731bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm5
2732bc3d5698SJohn Baldwin
2733bc3d5698SJohn Baldwin	call	__ocb_encrypt4
2734bc3d5698SJohn Baldwin
2735bc3d5698SJohn Baldwin	movdqa	%xmm11,%xmm15
2736bc3d5698SJohn Baldwin	movups	%xmm2,0(%rsi)
2737bc3d5698SJohn Baldwin	movups	%xmm3,16(%rsi)
2738bc3d5698SJohn Baldwin
2739bc3d5698SJohn Baldwin	jmp	.Locb_enc_done
2740bc3d5698SJohn Baldwin
2741bc3d5698SJohn Baldwin.align	16
2742bc3d5698SJohn Baldwin.Locb_enc_three:
2743bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm5
2744bc3d5698SJohn Baldwin
2745bc3d5698SJohn Baldwin	call	__ocb_encrypt4
2746bc3d5698SJohn Baldwin
2747bc3d5698SJohn Baldwin	movdqa	%xmm12,%xmm15
2748bc3d5698SJohn Baldwin	movups	%xmm2,0(%rsi)
2749bc3d5698SJohn Baldwin	movups	%xmm3,16(%rsi)
2750bc3d5698SJohn Baldwin	movups	%xmm4,32(%rsi)
2751bc3d5698SJohn Baldwin
2752bc3d5698SJohn Baldwin	jmp	.Locb_enc_done
2753bc3d5698SJohn Baldwin
2754bc3d5698SJohn Baldwin.align	16
2755bc3d5698SJohn Baldwin.Locb_enc_four:
2756bc3d5698SJohn Baldwin	call	__ocb_encrypt4
2757bc3d5698SJohn Baldwin
2758bc3d5698SJohn Baldwin	movdqa	%xmm13,%xmm15
2759bc3d5698SJohn Baldwin	movups	%xmm2,0(%rsi)
2760bc3d5698SJohn Baldwin	movups	%xmm3,16(%rsi)
2761bc3d5698SJohn Baldwin	movups	%xmm4,32(%rsi)
2762bc3d5698SJohn Baldwin	movups	%xmm5,48(%rsi)
2763bc3d5698SJohn Baldwin
2764bc3d5698SJohn Baldwin.Locb_enc_done:
2765bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm15
2766bc3d5698SJohn Baldwin	movdqu	%xmm8,(%rbp)
2767bc3d5698SJohn Baldwin	movdqu	%xmm15,(%r9)
2768bc3d5698SJohn Baldwin
2769bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm0
2770bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm1
2771bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm2
2772bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm3
2773bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm4
2774bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm5
2775bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm6
2776bc3d5698SJohn Baldwin	pxor	%xmm7,%xmm7
2777bc3d5698SJohn Baldwin	pxor	%xmm8,%xmm8
2778bc3d5698SJohn Baldwin	pxor	%xmm9,%xmm9
2779bc3d5698SJohn Baldwin	pxor	%xmm10,%xmm10
2780bc3d5698SJohn Baldwin	pxor	%xmm11,%xmm11
2781bc3d5698SJohn Baldwin	pxor	%xmm12,%xmm12
2782bc3d5698SJohn Baldwin	pxor	%xmm13,%xmm13
2783bc3d5698SJohn Baldwin	pxor	%xmm14,%xmm14
2784bc3d5698SJohn Baldwin	pxor	%xmm15,%xmm15
2785bc3d5698SJohn Baldwin	leaq	40(%rsp),%rax
2786bc3d5698SJohn Baldwin.cfi_def_cfa	%rax,8
2787bc3d5698SJohn Baldwin	movq	-40(%rax),%r14
2788bc3d5698SJohn Baldwin.cfi_restore	%r14
2789bc3d5698SJohn Baldwin	movq	-32(%rax),%r13
2790bc3d5698SJohn Baldwin.cfi_restore	%r13
2791bc3d5698SJohn Baldwin	movq	-24(%rax),%r12
2792bc3d5698SJohn Baldwin.cfi_restore	%r12
2793bc3d5698SJohn Baldwin	movq	-16(%rax),%rbp
2794bc3d5698SJohn Baldwin.cfi_restore	%rbp
2795bc3d5698SJohn Baldwin	movq	-8(%rax),%rbx
2796bc3d5698SJohn Baldwin.cfi_restore	%rbx
2797bc3d5698SJohn Baldwin	leaq	(%rax),%rsp
2798bc3d5698SJohn Baldwin.cfi_def_cfa_register	%rsp
2799bc3d5698SJohn Baldwin.Locb_enc_epilogue:
2800bc3d5698SJohn Baldwin	.byte	0xf3,0xc3
2801bc3d5698SJohn Baldwin.cfi_endproc
2802bc3d5698SJohn Baldwin.size	aesni_ocb_encrypt,.-aesni_ocb_encrypt
2803bc3d5698SJohn Baldwin
2804bc3d5698SJohn Baldwin.type	__ocb_encrypt6,@function
2805bc3d5698SJohn Baldwin.align	32
2806bc3d5698SJohn Baldwin__ocb_encrypt6:
2807bc3d5698SJohn Baldwin.cfi_startproc
2808bc3d5698SJohn Baldwin	pxor	%xmm9,%xmm15
2809bc3d5698SJohn Baldwin	movdqu	(%rbx,%r12,1),%xmm11
2810bc3d5698SJohn Baldwin	movdqa	%xmm10,%xmm12
2811bc3d5698SJohn Baldwin	movdqu	(%rbx,%r13,1),%xmm13
2812bc3d5698SJohn Baldwin	movdqa	%xmm10,%xmm14
2813bc3d5698SJohn Baldwin	pxor	%xmm15,%xmm10
2814bc3d5698SJohn Baldwin	movdqu	(%rbx,%r14,1),%xmm15
2815bc3d5698SJohn Baldwin	pxor	%xmm10,%xmm11
2816bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm8
2817bc3d5698SJohn Baldwin	pxor	%xmm10,%xmm2
2818bc3d5698SJohn Baldwin	pxor	%xmm11,%xmm12
2819bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm8
2820bc3d5698SJohn Baldwin	pxor	%xmm11,%xmm3
2821bc3d5698SJohn Baldwin	pxor	%xmm12,%xmm13
2822bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm8
2823bc3d5698SJohn Baldwin	pxor	%xmm12,%xmm4
2824bc3d5698SJohn Baldwin	pxor	%xmm13,%xmm14
2825bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm8
2826bc3d5698SJohn Baldwin	pxor	%xmm13,%xmm5
2827bc3d5698SJohn Baldwin	pxor	%xmm14,%xmm15
2828bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm8
2829bc3d5698SJohn Baldwin	pxor	%xmm14,%xmm6
2830bc3d5698SJohn Baldwin	pxor	%xmm7,%xmm8
2831bc3d5698SJohn Baldwin	pxor	%xmm15,%xmm7
2832bc3d5698SJohn Baldwin	movups	32(%r11),%xmm0
2833bc3d5698SJohn Baldwin
2834bc3d5698SJohn Baldwin	leaq	1(%r8),%r12
2835bc3d5698SJohn Baldwin	leaq	3(%r8),%r13
2836bc3d5698SJohn Baldwin	leaq	5(%r8),%r14
2837bc3d5698SJohn Baldwin	addq	$6,%r8
2838bc3d5698SJohn Baldwin	pxor	%xmm9,%xmm10
2839bc3d5698SJohn Baldwin	bsfq	%r12,%r12
2840bc3d5698SJohn Baldwin	bsfq	%r13,%r13
2841bc3d5698SJohn Baldwin	bsfq	%r14,%r14
2842bc3d5698SJohn Baldwin
2843bc3d5698SJohn Baldwin.byte	102,15,56,220,209
2844bc3d5698SJohn Baldwin.byte	102,15,56,220,217
2845bc3d5698SJohn Baldwin.byte	102,15,56,220,225
2846bc3d5698SJohn Baldwin.byte	102,15,56,220,233
2847bc3d5698SJohn Baldwin	pxor	%xmm9,%xmm11
2848bc3d5698SJohn Baldwin	pxor	%xmm9,%xmm12
2849bc3d5698SJohn Baldwin.byte	102,15,56,220,241
2850bc3d5698SJohn Baldwin	pxor	%xmm9,%xmm13
2851bc3d5698SJohn Baldwin	pxor	%xmm9,%xmm14
2852bc3d5698SJohn Baldwin.byte	102,15,56,220,249
2853bc3d5698SJohn Baldwin	movups	48(%r11),%xmm1
2854bc3d5698SJohn Baldwin	pxor	%xmm9,%xmm15
2855bc3d5698SJohn Baldwin
2856bc3d5698SJohn Baldwin.byte	102,15,56,220,208
2857bc3d5698SJohn Baldwin.byte	102,15,56,220,216
2858bc3d5698SJohn Baldwin.byte	102,15,56,220,224
2859bc3d5698SJohn Baldwin.byte	102,15,56,220,232
2860bc3d5698SJohn Baldwin.byte	102,15,56,220,240
2861bc3d5698SJohn Baldwin.byte	102,15,56,220,248
2862bc3d5698SJohn Baldwin	movups	64(%r11),%xmm0
2863bc3d5698SJohn Baldwin	shlq	$4,%r12
2864bc3d5698SJohn Baldwin	shlq	$4,%r13
2865bc3d5698SJohn Baldwin	jmp	.Locb_enc_loop6
2866bc3d5698SJohn Baldwin
2867bc3d5698SJohn Baldwin.align	32
2868bc3d5698SJohn Baldwin.Locb_enc_loop6:
2869bc3d5698SJohn Baldwin.byte	102,15,56,220,209
2870bc3d5698SJohn Baldwin.byte	102,15,56,220,217
2871bc3d5698SJohn Baldwin.byte	102,15,56,220,225
2872bc3d5698SJohn Baldwin.byte	102,15,56,220,233
2873bc3d5698SJohn Baldwin.byte	102,15,56,220,241
2874bc3d5698SJohn Baldwin.byte	102,15,56,220,249
2875bc3d5698SJohn Baldwin	movups	(%rcx,%rax,1),%xmm1
2876bc3d5698SJohn Baldwin	addq	$32,%rax
2877bc3d5698SJohn Baldwin
2878bc3d5698SJohn Baldwin.byte	102,15,56,220,208
2879bc3d5698SJohn Baldwin.byte	102,15,56,220,216
2880bc3d5698SJohn Baldwin.byte	102,15,56,220,224
2881bc3d5698SJohn Baldwin.byte	102,15,56,220,232
2882bc3d5698SJohn Baldwin.byte	102,15,56,220,240
2883bc3d5698SJohn Baldwin.byte	102,15,56,220,248
2884bc3d5698SJohn Baldwin	movups	-16(%rcx,%rax,1),%xmm0
2885bc3d5698SJohn Baldwin	jnz	.Locb_enc_loop6
2886bc3d5698SJohn Baldwin
2887bc3d5698SJohn Baldwin.byte	102,15,56,220,209
2888bc3d5698SJohn Baldwin.byte	102,15,56,220,217
2889bc3d5698SJohn Baldwin.byte	102,15,56,220,225
2890bc3d5698SJohn Baldwin.byte	102,15,56,220,233
2891bc3d5698SJohn Baldwin.byte	102,15,56,220,241
2892bc3d5698SJohn Baldwin.byte	102,15,56,220,249
2893bc3d5698SJohn Baldwin	movups	16(%r11),%xmm1
2894bc3d5698SJohn Baldwin	shlq	$4,%r14
2895bc3d5698SJohn Baldwin
2896bc3d5698SJohn Baldwin.byte	102,65,15,56,221,210
2897bc3d5698SJohn Baldwin	movdqu	(%rbx),%xmm10
2898bc3d5698SJohn Baldwin	movq	%r10,%rax
2899bc3d5698SJohn Baldwin.byte	102,65,15,56,221,219
2900bc3d5698SJohn Baldwin.byte	102,65,15,56,221,228
2901bc3d5698SJohn Baldwin.byte	102,65,15,56,221,237
2902bc3d5698SJohn Baldwin.byte	102,65,15,56,221,246
2903bc3d5698SJohn Baldwin.byte	102,65,15,56,221,255
2904bc3d5698SJohn Baldwin	.byte	0xf3,0xc3
2905bc3d5698SJohn Baldwin.cfi_endproc
2906bc3d5698SJohn Baldwin.size	__ocb_encrypt6,.-__ocb_encrypt6
2907bc3d5698SJohn Baldwin
2908bc3d5698SJohn Baldwin.type	__ocb_encrypt4,@function
2909bc3d5698SJohn Baldwin.align	32
2910bc3d5698SJohn Baldwin__ocb_encrypt4:
2911bc3d5698SJohn Baldwin.cfi_startproc
2912bc3d5698SJohn Baldwin	pxor	%xmm9,%xmm15
2913bc3d5698SJohn Baldwin	movdqu	(%rbx,%r12,1),%xmm11
2914bc3d5698SJohn Baldwin	movdqa	%xmm10,%xmm12
2915bc3d5698SJohn Baldwin	movdqu	(%rbx,%r13,1),%xmm13
2916bc3d5698SJohn Baldwin	pxor	%xmm15,%xmm10
2917bc3d5698SJohn Baldwin	pxor	%xmm10,%xmm11
2918bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm8
2919bc3d5698SJohn Baldwin	pxor	%xmm10,%xmm2
2920bc3d5698SJohn Baldwin	pxor	%xmm11,%xmm12
2921bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm8
2922bc3d5698SJohn Baldwin	pxor	%xmm11,%xmm3
2923bc3d5698SJohn Baldwin	pxor	%xmm12,%xmm13
2924bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm8
2925bc3d5698SJohn Baldwin	pxor	%xmm12,%xmm4
2926bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm8
2927bc3d5698SJohn Baldwin	pxor	%xmm13,%xmm5
2928bc3d5698SJohn Baldwin	movups	32(%r11),%xmm0
2929bc3d5698SJohn Baldwin
2930bc3d5698SJohn Baldwin	pxor	%xmm9,%xmm10
2931bc3d5698SJohn Baldwin	pxor	%xmm9,%xmm11
2932bc3d5698SJohn Baldwin	pxor	%xmm9,%xmm12
2933bc3d5698SJohn Baldwin	pxor	%xmm9,%xmm13
2934bc3d5698SJohn Baldwin
2935bc3d5698SJohn Baldwin.byte	102,15,56,220,209
2936bc3d5698SJohn Baldwin.byte	102,15,56,220,217
2937bc3d5698SJohn Baldwin.byte	102,15,56,220,225
2938bc3d5698SJohn Baldwin.byte	102,15,56,220,233
2939bc3d5698SJohn Baldwin	movups	48(%r11),%xmm1
2940bc3d5698SJohn Baldwin
2941bc3d5698SJohn Baldwin.byte	102,15,56,220,208
2942bc3d5698SJohn Baldwin.byte	102,15,56,220,216
2943bc3d5698SJohn Baldwin.byte	102,15,56,220,224
2944bc3d5698SJohn Baldwin.byte	102,15,56,220,232
2945bc3d5698SJohn Baldwin	movups	64(%r11),%xmm0
2946bc3d5698SJohn Baldwin	jmp	.Locb_enc_loop4
2947bc3d5698SJohn Baldwin
2948bc3d5698SJohn Baldwin.align	32
2949bc3d5698SJohn Baldwin.Locb_enc_loop4:
2950bc3d5698SJohn Baldwin.byte	102,15,56,220,209
2951bc3d5698SJohn Baldwin.byte	102,15,56,220,217
2952bc3d5698SJohn Baldwin.byte	102,15,56,220,225
2953bc3d5698SJohn Baldwin.byte	102,15,56,220,233
2954bc3d5698SJohn Baldwin	movups	(%rcx,%rax,1),%xmm1
2955bc3d5698SJohn Baldwin	addq	$32,%rax
2956bc3d5698SJohn Baldwin
2957bc3d5698SJohn Baldwin.byte	102,15,56,220,208
2958bc3d5698SJohn Baldwin.byte	102,15,56,220,216
2959bc3d5698SJohn Baldwin.byte	102,15,56,220,224
2960bc3d5698SJohn Baldwin.byte	102,15,56,220,232
2961bc3d5698SJohn Baldwin	movups	-16(%rcx,%rax,1),%xmm0
2962bc3d5698SJohn Baldwin	jnz	.Locb_enc_loop4
2963bc3d5698SJohn Baldwin
2964bc3d5698SJohn Baldwin.byte	102,15,56,220,209
2965bc3d5698SJohn Baldwin.byte	102,15,56,220,217
2966bc3d5698SJohn Baldwin.byte	102,15,56,220,225
2967bc3d5698SJohn Baldwin.byte	102,15,56,220,233
2968bc3d5698SJohn Baldwin	movups	16(%r11),%xmm1
2969bc3d5698SJohn Baldwin	movq	%r10,%rax
2970bc3d5698SJohn Baldwin
2971bc3d5698SJohn Baldwin.byte	102,65,15,56,221,210
2972bc3d5698SJohn Baldwin.byte	102,65,15,56,221,219
2973bc3d5698SJohn Baldwin.byte	102,65,15,56,221,228
2974bc3d5698SJohn Baldwin.byte	102,65,15,56,221,237
2975bc3d5698SJohn Baldwin	.byte	0xf3,0xc3
2976bc3d5698SJohn Baldwin.cfi_endproc
2977bc3d5698SJohn Baldwin.size	__ocb_encrypt4,.-__ocb_encrypt4
2978bc3d5698SJohn Baldwin
2979bc3d5698SJohn Baldwin.type	__ocb_encrypt1,@function
2980bc3d5698SJohn Baldwin.align	32
2981bc3d5698SJohn Baldwin__ocb_encrypt1:
2982bc3d5698SJohn Baldwin.cfi_startproc
2983bc3d5698SJohn Baldwin	pxor	%xmm15,%xmm7
2984bc3d5698SJohn Baldwin	pxor	%xmm9,%xmm7
2985bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm8
2986bc3d5698SJohn Baldwin	pxor	%xmm7,%xmm2
2987bc3d5698SJohn Baldwin	movups	32(%r11),%xmm0
2988bc3d5698SJohn Baldwin
2989bc3d5698SJohn Baldwin.byte	102,15,56,220,209
2990bc3d5698SJohn Baldwin	movups	48(%r11),%xmm1
2991bc3d5698SJohn Baldwin	pxor	%xmm9,%xmm7
2992bc3d5698SJohn Baldwin
2993bc3d5698SJohn Baldwin.byte	102,15,56,220,208
2994bc3d5698SJohn Baldwin	movups	64(%r11),%xmm0
2995bc3d5698SJohn Baldwin	jmp	.Locb_enc_loop1
2996bc3d5698SJohn Baldwin
2997bc3d5698SJohn Baldwin.align	32
2998bc3d5698SJohn Baldwin.Locb_enc_loop1:
2999bc3d5698SJohn Baldwin.byte	102,15,56,220,209
3000bc3d5698SJohn Baldwin	movups	(%rcx,%rax,1),%xmm1
3001bc3d5698SJohn Baldwin	addq	$32,%rax
3002bc3d5698SJohn Baldwin
3003bc3d5698SJohn Baldwin.byte	102,15,56,220,208
3004bc3d5698SJohn Baldwin	movups	-16(%rcx,%rax,1),%xmm0
3005bc3d5698SJohn Baldwin	jnz	.Locb_enc_loop1
3006bc3d5698SJohn Baldwin
3007bc3d5698SJohn Baldwin.byte	102,15,56,220,209
3008bc3d5698SJohn Baldwin	movups	16(%r11),%xmm1
3009bc3d5698SJohn Baldwin	movq	%r10,%rax
3010bc3d5698SJohn Baldwin
3011bc3d5698SJohn Baldwin.byte	102,15,56,221,215
3012bc3d5698SJohn Baldwin	.byte	0xf3,0xc3
3013bc3d5698SJohn Baldwin.cfi_endproc
3014bc3d5698SJohn Baldwin.size	__ocb_encrypt1,.-__ocb_encrypt1
3015bc3d5698SJohn Baldwin
3016bc3d5698SJohn Baldwin.globl	aesni_ocb_decrypt
3017bc3d5698SJohn Baldwin.type	aesni_ocb_decrypt,@function
3018bc3d5698SJohn Baldwin.align	32
3019bc3d5698SJohn Baldwinaesni_ocb_decrypt:
3020bc3d5698SJohn Baldwin.cfi_startproc
3021*c0855eaaSJohn Baldwin.byte	243,15,30,250
3022bc3d5698SJohn Baldwin	leaq	(%rsp),%rax
3023bc3d5698SJohn Baldwin	pushq	%rbx
3024bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	8
3025bc3d5698SJohn Baldwin.cfi_offset	%rbx,-16
3026bc3d5698SJohn Baldwin	pushq	%rbp
3027bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	8
3028bc3d5698SJohn Baldwin.cfi_offset	%rbp,-24
3029bc3d5698SJohn Baldwin	pushq	%r12
3030bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	8
3031bc3d5698SJohn Baldwin.cfi_offset	%r12,-32
3032bc3d5698SJohn Baldwin	pushq	%r13
3033bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	8
3034bc3d5698SJohn Baldwin.cfi_offset	%r13,-40
3035bc3d5698SJohn Baldwin	pushq	%r14
3036bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	8
3037bc3d5698SJohn Baldwin.cfi_offset	%r14,-48
3038bc3d5698SJohn Baldwin	movq	8(%rax),%rbx
3039bc3d5698SJohn Baldwin	movq	8+8(%rax),%rbp
3040bc3d5698SJohn Baldwin
3041bc3d5698SJohn Baldwin	movl	240(%rcx),%r10d
3042bc3d5698SJohn Baldwin	movq	%rcx,%r11
3043bc3d5698SJohn Baldwin	shll	$4,%r10d
3044bc3d5698SJohn Baldwin	movups	(%rcx),%xmm9
3045bc3d5698SJohn Baldwin	movups	16(%rcx,%r10,1),%xmm1
3046bc3d5698SJohn Baldwin
3047bc3d5698SJohn Baldwin	movdqu	(%r9),%xmm15
3048bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm9
3049bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm15
3050bc3d5698SJohn Baldwin
3051bc3d5698SJohn Baldwin	movl	$16+32,%eax
3052bc3d5698SJohn Baldwin	leaq	32(%r11,%r10,1),%rcx
3053bc3d5698SJohn Baldwin	movups	16(%r11),%xmm1
3054bc3d5698SJohn Baldwin	subq	%r10,%rax
3055bc3d5698SJohn Baldwin	movq	%rax,%r10
3056bc3d5698SJohn Baldwin
3057bc3d5698SJohn Baldwin	movdqu	(%rbx),%xmm10
3058bc3d5698SJohn Baldwin	movdqu	(%rbp),%xmm8
3059bc3d5698SJohn Baldwin
3060bc3d5698SJohn Baldwin	testq	$1,%r8
3061bc3d5698SJohn Baldwin	jnz	.Locb_dec_odd
3062bc3d5698SJohn Baldwin
3063bc3d5698SJohn Baldwin	bsfq	%r8,%r12
3064bc3d5698SJohn Baldwin	addq	$1,%r8
3065bc3d5698SJohn Baldwin	shlq	$4,%r12
3066bc3d5698SJohn Baldwin	movdqu	(%rbx,%r12,1),%xmm7
3067bc3d5698SJohn Baldwin	movdqu	(%rdi),%xmm2
3068bc3d5698SJohn Baldwin	leaq	16(%rdi),%rdi
3069bc3d5698SJohn Baldwin
3070bc3d5698SJohn Baldwin	call	__ocb_decrypt1
3071bc3d5698SJohn Baldwin
3072bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm15
3073bc3d5698SJohn Baldwin	movups	%xmm2,(%rsi)
3074bc3d5698SJohn Baldwin	xorps	%xmm2,%xmm8
3075bc3d5698SJohn Baldwin	leaq	16(%rsi),%rsi
3076bc3d5698SJohn Baldwin	subq	$1,%rdx
3077bc3d5698SJohn Baldwin	jz	.Locb_dec_done
3078bc3d5698SJohn Baldwin
3079bc3d5698SJohn Baldwin.Locb_dec_odd:
3080bc3d5698SJohn Baldwin	leaq	1(%r8),%r12
3081bc3d5698SJohn Baldwin	leaq	3(%r8),%r13
3082bc3d5698SJohn Baldwin	leaq	5(%r8),%r14
3083bc3d5698SJohn Baldwin	leaq	6(%r8),%r8
3084bc3d5698SJohn Baldwin	bsfq	%r12,%r12
3085bc3d5698SJohn Baldwin	bsfq	%r13,%r13
3086bc3d5698SJohn Baldwin	bsfq	%r14,%r14
3087bc3d5698SJohn Baldwin	shlq	$4,%r12
3088bc3d5698SJohn Baldwin	shlq	$4,%r13
3089bc3d5698SJohn Baldwin	shlq	$4,%r14
3090bc3d5698SJohn Baldwin
3091bc3d5698SJohn Baldwin	subq	$6,%rdx
3092bc3d5698SJohn Baldwin	jc	.Locb_dec_short
3093bc3d5698SJohn Baldwin	jmp	.Locb_dec_grandloop
3094bc3d5698SJohn Baldwin
3095bc3d5698SJohn Baldwin.align	32
3096bc3d5698SJohn Baldwin.Locb_dec_grandloop:
3097bc3d5698SJohn Baldwin	movdqu	0(%rdi),%xmm2
3098bc3d5698SJohn Baldwin	movdqu	16(%rdi),%xmm3
3099bc3d5698SJohn Baldwin	movdqu	32(%rdi),%xmm4
3100bc3d5698SJohn Baldwin	movdqu	48(%rdi),%xmm5
3101bc3d5698SJohn Baldwin	movdqu	64(%rdi),%xmm6
3102bc3d5698SJohn Baldwin	movdqu	80(%rdi),%xmm7
3103bc3d5698SJohn Baldwin	leaq	96(%rdi),%rdi
3104bc3d5698SJohn Baldwin
3105bc3d5698SJohn Baldwin	call	__ocb_decrypt6
3106bc3d5698SJohn Baldwin
3107bc3d5698SJohn Baldwin	movups	%xmm2,0(%rsi)
3108bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm8
3109bc3d5698SJohn Baldwin	movups	%xmm3,16(%rsi)
3110bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm8
3111bc3d5698SJohn Baldwin	movups	%xmm4,32(%rsi)
3112bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm8
3113bc3d5698SJohn Baldwin	movups	%xmm5,48(%rsi)
3114bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm8
3115bc3d5698SJohn Baldwin	movups	%xmm6,64(%rsi)
3116bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm8
3117bc3d5698SJohn Baldwin	movups	%xmm7,80(%rsi)
3118bc3d5698SJohn Baldwin	pxor	%xmm7,%xmm8
3119bc3d5698SJohn Baldwin	leaq	96(%rsi),%rsi
3120bc3d5698SJohn Baldwin	subq	$6,%rdx
3121bc3d5698SJohn Baldwin	jnc	.Locb_dec_grandloop
3122bc3d5698SJohn Baldwin
3123bc3d5698SJohn Baldwin.Locb_dec_short:
3124bc3d5698SJohn Baldwin	addq	$6,%rdx
3125bc3d5698SJohn Baldwin	jz	.Locb_dec_done
3126bc3d5698SJohn Baldwin
3127bc3d5698SJohn Baldwin	movdqu	0(%rdi),%xmm2
3128bc3d5698SJohn Baldwin	cmpq	$2,%rdx
3129bc3d5698SJohn Baldwin	jb	.Locb_dec_one
3130bc3d5698SJohn Baldwin	movdqu	16(%rdi),%xmm3
3131bc3d5698SJohn Baldwin	je	.Locb_dec_two
3132bc3d5698SJohn Baldwin
3133bc3d5698SJohn Baldwin	movdqu	32(%rdi),%xmm4
3134bc3d5698SJohn Baldwin	cmpq	$4,%rdx
3135bc3d5698SJohn Baldwin	jb	.Locb_dec_three
3136bc3d5698SJohn Baldwin	movdqu	48(%rdi),%xmm5
3137bc3d5698SJohn Baldwin	je	.Locb_dec_four
3138bc3d5698SJohn Baldwin
3139bc3d5698SJohn Baldwin	movdqu	64(%rdi),%xmm6
3140bc3d5698SJohn Baldwin	pxor	%xmm7,%xmm7
3141bc3d5698SJohn Baldwin
3142bc3d5698SJohn Baldwin	call	__ocb_decrypt6
3143bc3d5698SJohn Baldwin
3144bc3d5698SJohn Baldwin	movdqa	%xmm14,%xmm15
3145bc3d5698SJohn Baldwin	movups	%xmm2,0(%rsi)
3146bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm8
3147bc3d5698SJohn Baldwin	movups	%xmm3,16(%rsi)
3148bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm8
3149bc3d5698SJohn Baldwin	movups	%xmm4,32(%rsi)
3150bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm8
3151bc3d5698SJohn Baldwin	movups	%xmm5,48(%rsi)
3152bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm8
3153bc3d5698SJohn Baldwin	movups	%xmm6,64(%rsi)
3154bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm8
3155bc3d5698SJohn Baldwin
3156bc3d5698SJohn Baldwin	jmp	.Locb_dec_done
3157bc3d5698SJohn Baldwin
3158bc3d5698SJohn Baldwin.align	16
3159bc3d5698SJohn Baldwin.Locb_dec_one:
3160bc3d5698SJohn Baldwin	movdqa	%xmm10,%xmm7
3161bc3d5698SJohn Baldwin
3162bc3d5698SJohn Baldwin	call	__ocb_decrypt1
3163bc3d5698SJohn Baldwin
3164bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm15
3165bc3d5698SJohn Baldwin	movups	%xmm2,0(%rsi)
3166bc3d5698SJohn Baldwin	xorps	%xmm2,%xmm8
3167bc3d5698SJohn Baldwin	jmp	.Locb_dec_done
3168bc3d5698SJohn Baldwin
3169bc3d5698SJohn Baldwin.align	16
3170bc3d5698SJohn Baldwin.Locb_dec_two:
3171bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm4
3172bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm5
3173bc3d5698SJohn Baldwin
3174bc3d5698SJohn Baldwin	call	__ocb_decrypt4
3175bc3d5698SJohn Baldwin
3176bc3d5698SJohn Baldwin	movdqa	%xmm11,%xmm15
3177bc3d5698SJohn Baldwin	movups	%xmm2,0(%rsi)
3178bc3d5698SJohn Baldwin	xorps	%xmm2,%xmm8
3179bc3d5698SJohn Baldwin	movups	%xmm3,16(%rsi)
3180bc3d5698SJohn Baldwin	xorps	%xmm3,%xmm8
3181bc3d5698SJohn Baldwin
3182bc3d5698SJohn Baldwin	jmp	.Locb_dec_done
3183bc3d5698SJohn Baldwin
3184bc3d5698SJohn Baldwin.align	16
3185bc3d5698SJohn Baldwin.Locb_dec_three:
3186bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm5
3187bc3d5698SJohn Baldwin
3188bc3d5698SJohn Baldwin	call	__ocb_decrypt4
3189bc3d5698SJohn Baldwin
3190bc3d5698SJohn Baldwin	movdqa	%xmm12,%xmm15
3191bc3d5698SJohn Baldwin	movups	%xmm2,0(%rsi)
3192bc3d5698SJohn Baldwin	xorps	%xmm2,%xmm8
3193bc3d5698SJohn Baldwin	movups	%xmm3,16(%rsi)
3194bc3d5698SJohn Baldwin	xorps	%xmm3,%xmm8
3195bc3d5698SJohn Baldwin	movups	%xmm4,32(%rsi)
3196bc3d5698SJohn Baldwin	xorps	%xmm4,%xmm8
3197bc3d5698SJohn Baldwin
3198bc3d5698SJohn Baldwin	jmp	.Locb_dec_done
3199bc3d5698SJohn Baldwin
3200bc3d5698SJohn Baldwin.align	16
3201bc3d5698SJohn Baldwin.Locb_dec_four:
3202bc3d5698SJohn Baldwin	call	__ocb_decrypt4
3203bc3d5698SJohn Baldwin
3204bc3d5698SJohn Baldwin	movdqa	%xmm13,%xmm15
3205bc3d5698SJohn Baldwin	movups	%xmm2,0(%rsi)
3206bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm8
3207bc3d5698SJohn Baldwin	movups	%xmm3,16(%rsi)
3208bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm8
3209bc3d5698SJohn Baldwin	movups	%xmm4,32(%rsi)
3210bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm8
3211bc3d5698SJohn Baldwin	movups	%xmm5,48(%rsi)
3212bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm8
3213bc3d5698SJohn Baldwin
3214bc3d5698SJohn Baldwin.Locb_dec_done:
3215bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm15
3216bc3d5698SJohn Baldwin	movdqu	%xmm8,(%rbp)
3217bc3d5698SJohn Baldwin	movdqu	%xmm15,(%r9)
3218bc3d5698SJohn Baldwin
3219bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm0
3220bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm1
3221bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm2
3222bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm3
3223bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm4
3224bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm5
3225bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm6
3226bc3d5698SJohn Baldwin	pxor	%xmm7,%xmm7
3227bc3d5698SJohn Baldwin	pxor	%xmm8,%xmm8
3228bc3d5698SJohn Baldwin	pxor	%xmm9,%xmm9
3229bc3d5698SJohn Baldwin	pxor	%xmm10,%xmm10
3230bc3d5698SJohn Baldwin	pxor	%xmm11,%xmm11
3231bc3d5698SJohn Baldwin	pxor	%xmm12,%xmm12
3232bc3d5698SJohn Baldwin	pxor	%xmm13,%xmm13
3233bc3d5698SJohn Baldwin	pxor	%xmm14,%xmm14
3234bc3d5698SJohn Baldwin	pxor	%xmm15,%xmm15
3235bc3d5698SJohn Baldwin	leaq	40(%rsp),%rax
3236bc3d5698SJohn Baldwin.cfi_def_cfa	%rax,8
3237bc3d5698SJohn Baldwin	movq	-40(%rax),%r14
3238bc3d5698SJohn Baldwin.cfi_restore	%r14
3239bc3d5698SJohn Baldwin	movq	-32(%rax),%r13
3240bc3d5698SJohn Baldwin.cfi_restore	%r13
3241bc3d5698SJohn Baldwin	movq	-24(%rax),%r12
3242bc3d5698SJohn Baldwin.cfi_restore	%r12
3243bc3d5698SJohn Baldwin	movq	-16(%rax),%rbp
3244bc3d5698SJohn Baldwin.cfi_restore	%rbp
3245bc3d5698SJohn Baldwin	movq	-8(%rax),%rbx
3246bc3d5698SJohn Baldwin.cfi_restore	%rbx
3247bc3d5698SJohn Baldwin	leaq	(%rax),%rsp
3248bc3d5698SJohn Baldwin.cfi_def_cfa_register	%rsp
3249bc3d5698SJohn Baldwin.Locb_dec_epilogue:
3250bc3d5698SJohn Baldwin	.byte	0xf3,0xc3
3251bc3d5698SJohn Baldwin.cfi_endproc
3252bc3d5698SJohn Baldwin.size	aesni_ocb_decrypt,.-aesni_ocb_decrypt
3253bc3d5698SJohn Baldwin
3254bc3d5698SJohn Baldwin.type	__ocb_decrypt6,@function
3255bc3d5698SJohn Baldwin.align	32
3256bc3d5698SJohn Baldwin__ocb_decrypt6:
3257bc3d5698SJohn Baldwin.cfi_startproc
3258bc3d5698SJohn Baldwin	pxor	%xmm9,%xmm15
3259bc3d5698SJohn Baldwin	movdqu	(%rbx,%r12,1),%xmm11
3260bc3d5698SJohn Baldwin	movdqa	%xmm10,%xmm12
3261bc3d5698SJohn Baldwin	movdqu	(%rbx,%r13,1),%xmm13
3262bc3d5698SJohn Baldwin	movdqa	%xmm10,%xmm14
3263bc3d5698SJohn Baldwin	pxor	%xmm15,%xmm10
3264bc3d5698SJohn Baldwin	movdqu	(%rbx,%r14,1),%xmm15
3265bc3d5698SJohn Baldwin	pxor	%xmm10,%xmm11
3266bc3d5698SJohn Baldwin	pxor	%xmm10,%xmm2
3267bc3d5698SJohn Baldwin	pxor	%xmm11,%xmm12
3268bc3d5698SJohn Baldwin	pxor	%xmm11,%xmm3
3269bc3d5698SJohn Baldwin	pxor	%xmm12,%xmm13
3270bc3d5698SJohn Baldwin	pxor	%xmm12,%xmm4
3271bc3d5698SJohn Baldwin	pxor	%xmm13,%xmm14
3272bc3d5698SJohn Baldwin	pxor	%xmm13,%xmm5
3273bc3d5698SJohn Baldwin	pxor	%xmm14,%xmm15
3274bc3d5698SJohn Baldwin	pxor	%xmm14,%xmm6
3275bc3d5698SJohn Baldwin	pxor	%xmm15,%xmm7
3276bc3d5698SJohn Baldwin	movups	32(%r11),%xmm0
3277bc3d5698SJohn Baldwin
3278bc3d5698SJohn Baldwin	leaq	1(%r8),%r12
3279bc3d5698SJohn Baldwin	leaq	3(%r8),%r13
3280bc3d5698SJohn Baldwin	leaq	5(%r8),%r14
3281bc3d5698SJohn Baldwin	addq	$6,%r8
3282bc3d5698SJohn Baldwin	pxor	%xmm9,%xmm10
3283bc3d5698SJohn Baldwin	bsfq	%r12,%r12
3284bc3d5698SJohn Baldwin	bsfq	%r13,%r13
3285bc3d5698SJohn Baldwin	bsfq	%r14,%r14
3286bc3d5698SJohn Baldwin
3287bc3d5698SJohn Baldwin.byte	102,15,56,222,209
3288bc3d5698SJohn Baldwin.byte	102,15,56,222,217
3289bc3d5698SJohn Baldwin.byte	102,15,56,222,225
3290bc3d5698SJohn Baldwin.byte	102,15,56,222,233
3291bc3d5698SJohn Baldwin	pxor	%xmm9,%xmm11
3292bc3d5698SJohn Baldwin	pxor	%xmm9,%xmm12
3293bc3d5698SJohn Baldwin.byte	102,15,56,222,241
3294bc3d5698SJohn Baldwin	pxor	%xmm9,%xmm13
3295bc3d5698SJohn Baldwin	pxor	%xmm9,%xmm14
3296bc3d5698SJohn Baldwin.byte	102,15,56,222,249
3297bc3d5698SJohn Baldwin	movups	48(%r11),%xmm1
3298bc3d5698SJohn Baldwin	pxor	%xmm9,%xmm15
3299bc3d5698SJohn Baldwin
3300bc3d5698SJohn Baldwin.byte	102,15,56,222,208
3301bc3d5698SJohn Baldwin.byte	102,15,56,222,216
3302bc3d5698SJohn Baldwin.byte	102,15,56,222,224
3303bc3d5698SJohn Baldwin.byte	102,15,56,222,232
3304bc3d5698SJohn Baldwin.byte	102,15,56,222,240
3305bc3d5698SJohn Baldwin.byte	102,15,56,222,248
3306bc3d5698SJohn Baldwin	movups	64(%r11),%xmm0
3307bc3d5698SJohn Baldwin	shlq	$4,%r12
3308bc3d5698SJohn Baldwin	shlq	$4,%r13
3309bc3d5698SJohn Baldwin	jmp	.Locb_dec_loop6
3310bc3d5698SJohn Baldwin
3311bc3d5698SJohn Baldwin.align	32
3312bc3d5698SJohn Baldwin.Locb_dec_loop6:
3313bc3d5698SJohn Baldwin.byte	102,15,56,222,209
3314bc3d5698SJohn Baldwin.byte	102,15,56,222,217
3315bc3d5698SJohn Baldwin.byte	102,15,56,222,225
3316bc3d5698SJohn Baldwin.byte	102,15,56,222,233
3317bc3d5698SJohn Baldwin.byte	102,15,56,222,241
3318bc3d5698SJohn Baldwin.byte	102,15,56,222,249
3319bc3d5698SJohn Baldwin	movups	(%rcx,%rax,1),%xmm1
3320bc3d5698SJohn Baldwin	addq	$32,%rax
3321bc3d5698SJohn Baldwin
3322bc3d5698SJohn Baldwin.byte	102,15,56,222,208
3323bc3d5698SJohn Baldwin.byte	102,15,56,222,216
3324bc3d5698SJohn Baldwin.byte	102,15,56,222,224
3325bc3d5698SJohn Baldwin.byte	102,15,56,222,232
3326bc3d5698SJohn Baldwin.byte	102,15,56,222,240
3327bc3d5698SJohn Baldwin.byte	102,15,56,222,248
3328bc3d5698SJohn Baldwin	movups	-16(%rcx,%rax,1),%xmm0
3329bc3d5698SJohn Baldwin	jnz	.Locb_dec_loop6
3330bc3d5698SJohn Baldwin
3331bc3d5698SJohn Baldwin.byte	102,15,56,222,209
3332bc3d5698SJohn Baldwin.byte	102,15,56,222,217
3333bc3d5698SJohn Baldwin.byte	102,15,56,222,225
3334bc3d5698SJohn Baldwin.byte	102,15,56,222,233
3335bc3d5698SJohn Baldwin.byte	102,15,56,222,241
3336bc3d5698SJohn Baldwin.byte	102,15,56,222,249
3337bc3d5698SJohn Baldwin	movups	16(%r11),%xmm1
3338bc3d5698SJohn Baldwin	shlq	$4,%r14
3339bc3d5698SJohn Baldwin
3340bc3d5698SJohn Baldwin.byte	102,65,15,56,223,210
3341bc3d5698SJohn Baldwin	movdqu	(%rbx),%xmm10
3342bc3d5698SJohn Baldwin	movq	%r10,%rax
3343bc3d5698SJohn Baldwin.byte	102,65,15,56,223,219
3344bc3d5698SJohn Baldwin.byte	102,65,15,56,223,228
3345bc3d5698SJohn Baldwin.byte	102,65,15,56,223,237
3346bc3d5698SJohn Baldwin.byte	102,65,15,56,223,246
3347bc3d5698SJohn Baldwin.byte	102,65,15,56,223,255
3348bc3d5698SJohn Baldwin	.byte	0xf3,0xc3
3349bc3d5698SJohn Baldwin.cfi_endproc
3350bc3d5698SJohn Baldwin.size	__ocb_decrypt6,.-__ocb_decrypt6
3351bc3d5698SJohn Baldwin
3352bc3d5698SJohn Baldwin.type	__ocb_decrypt4,@function
3353bc3d5698SJohn Baldwin.align	32
3354bc3d5698SJohn Baldwin__ocb_decrypt4:
3355bc3d5698SJohn Baldwin.cfi_startproc
3356bc3d5698SJohn Baldwin	pxor	%xmm9,%xmm15
3357bc3d5698SJohn Baldwin	movdqu	(%rbx,%r12,1),%xmm11
3358bc3d5698SJohn Baldwin	movdqa	%xmm10,%xmm12
3359bc3d5698SJohn Baldwin	movdqu	(%rbx,%r13,1),%xmm13
3360bc3d5698SJohn Baldwin	pxor	%xmm15,%xmm10
3361bc3d5698SJohn Baldwin	pxor	%xmm10,%xmm11
3362bc3d5698SJohn Baldwin	pxor	%xmm10,%xmm2
3363bc3d5698SJohn Baldwin	pxor	%xmm11,%xmm12
3364bc3d5698SJohn Baldwin	pxor	%xmm11,%xmm3
3365bc3d5698SJohn Baldwin	pxor	%xmm12,%xmm13
3366bc3d5698SJohn Baldwin	pxor	%xmm12,%xmm4
3367bc3d5698SJohn Baldwin	pxor	%xmm13,%xmm5
3368bc3d5698SJohn Baldwin	movups	32(%r11),%xmm0
3369bc3d5698SJohn Baldwin
3370bc3d5698SJohn Baldwin	pxor	%xmm9,%xmm10
3371bc3d5698SJohn Baldwin	pxor	%xmm9,%xmm11
3372bc3d5698SJohn Baldwin	pxor	%xmm9,%xmm12
3373bc3d5698SJohn Baldwin	pxor	%xmm9,%xmm13
3374bc3d5698SJohn Baldwin
3375bc3d5698SJohn Baldwin.byte	102,15,56,222,209
3376bc3d5698SJohn Baldwin.byte	102,15,56,222,217
3377bc3d5698SJohn Baldwin.byte	102,15,56,222,225
3378bc3d5698SJohn Baldwin.byte	102,15,56,222,233
3379bc3d5698SJohn Baldwin	movups	48(%r11),%xmm1
3380bc3d5698SJohn Baldwin
3381bc3d5698SJohn Baldwin.byte	102,15,56,222,208
3382bc3d5698SJohn Baldwin.byte	102,15,56,222,216
3383bc3d5698SJohn Baldwin.byte	102,15,56,222,224
3384bc3d5698SJohn Baldwin.byte	102,15,56,222,232
3385bc3d5698SJohn Baldwin	movups	64(%r11),%xmm0
3386bc3d5698SJohn Baldwin	jmp	.Locb_dec_loop4
3387bc3d5698SJohn Baldwin
3388bc3d5698SJohn Baldwin.align	32
3389bc3d5698SJohn Baldwin.Locb_dec_loop4:
3390bc3d5698SJohn Baldwin.byte	102,15,56,222,209
3391bc3d5698SJohn Baldwin.byte	102,15,56,222,217
3392bc3d5698SJohn Baldwin.byte	102,15,56,222,225
3393bc3d5698SJohn Baldwin.byte	102,15,56,222,233
3394bc3d5698SJohn Baldwin	movups	(%rcx,%rax,1),%xmm1
3395bc3d5698SJohn Baldwin	addq	$32,%rax
3396bc3d5698SJohn Baldwin
3397bc3d5698SJohn Baldwin.byte	102,15,56,222,208
3398bc3d5698SJohn Baldwin.byte	102,15,56,222,216
3399bc3d5698SJohn Baldwin.byte	102,15,56,222,224
3400bc3d5698SJohn Baldwin.byte	102,15,56,222,232
3401bc3d5698SJohn Baldwin	movups	-16(%rcx,%rax,1),%xmm0
3402bc3d5698SJohn Baldwin	jnz	.Locb_dec_loop4
3403bc3d5698SJohn Baldwin
3404bc3d5698SJohn Baldwin.byte	102,15,56,222,209
3405bc3d5698SJohn Baldwin.byte	102,15,56,222,217
3406bc3d5698SJohn Baldwin.byte	102,15,56,222,225
3407bc3d5698SJohn Baldwin.byte	102,15,56,222,233
3408bc3d5698SJohn Baldwin	movups	16(%r11),%xmm1
3409bc3d5698SJohn Baldwin	movq	%r10,%rax
3410bc3d5698SJohn Baldwin
3411bc3d5698SJohn Baldwin.byte	102,65,15,56,223,210
3412bc3d5698SJohn Baldwin.byte	102,65,15,56,223,219
3413bc3d5698SJohn Baldwin.byte	102,65,15,56,223,228
3414bc3d5698SJohn Baldwin.byte	102,65,15,56,223,237
3415bc3d5698SJohn Baldwin	.byte	0xf3,0xc3
3416bc3d5698SJohn Baldwin.cfi_endproc
3417bc3d5698SJohn Baldwin.size	__ocb_decrypt4,.-__ocb_decrypt4
3418bc3d5698SJohn Baldwin
3419bc3d5698SJohn Baldwin.type	__ocb_decrypt1,@function
3420bc3d5698SJohn Baldwin.align	32
3421bc3d5698SJohn Baldwin__ocb_decrypt1:
3422bc3d5698SJohn Baldwin.cfi_startproc
3423bc3d5698SJohn Baldwin	pxor	%xmm15,%xmm7
3424bc3d5698SJohn Baldwin	pxor	%xmm9,%xmm7
3425bc3d5698SJohn Baldwin	pxor	%xmm7,%xmm2
3426bc3d5698SJohn Baldwin	movups	32(%r11),%xmm0
3427bc3d5698SJohn Baldwin
3428bc3d5698SJohn Baldwin.byte	102,15,56,222,209
3429bc3d5698SJohn Baldwin	movups	48(%r11),%xmm1
3430bc3d5698SJohn Baldwin	pxor	%xmm9,%xmm7
3431bc3d5698SJohn Baldwin
3432bc3d5698SJohn Baldwin.byte	102,15,56,222,208
3433bc3d5698SJohn Baldwin	movups	64(%r11),%xmm0
3434bc3d5698SJohn Baldwin	jmp	.Locb_dec_loop1
3435bc3d5698SJohn Baldwin
3436bc3d5698SJohn Baldwin.align	32
3437bc3d5698SJohn Baldwin.Locb_dec_loop1:
3438bc3d5698SJohn Baldwin.byte	102,15,56,222,209
3439bc3d5698SJohn Baldwin	movups	(%rcx,%rax,1),%xmm1
3440bc3d5698SJohn Baldwin	addq	$32,%rax
3441bc3d5698SJohn Baldwin
3442bc3d5698SJohn Baldwin.byte	102,15,56,222,208
3443bc3d5698SJohn Baldwin	movups	-16(%rcx,%rax,1),%xmm0
3444bc3d5698SJohn Baldwin	jnz	.Locb_dec_loop1
3445bc3d5698SJohn Baldwin
3446bc3d5698SJohn Baldwin.byte	102,15,56,222,209
3447bc3d5698SJohn Baldwin	movups	16(%r11),%xmm1
3448bc3d5698SJohn Baldwin	movq	%r10,%rax
3449bc3d5698SJohn Baldwin
3450bc3d5698SJohn Baldwin.byte	102,15,56,223,215
3451bc3d5698SJohn Baldwin	.byte	0xf3,0xc3
3452bc3d5698SJohn Baldwin.cfi_endproc
3453bc3d5698SJohn Baldwin.size	__ocb_decrypt1,.-__ocb_decrypt1
3454bc3d5698SJohn Baldwin.globl	aesni_cbc_encrypt
3455bc3d5698SJohn Baldwin.type	aesni_cbc_encrypt,@function
3456bc3d5698SJohn Baldwin.align	16
3457bc3d5698SJohn Baldwinaesni_cbc_encrypt:
3458bc3d5698SJohn Baldwin.cfi_startproc
3459*c0855eaaSJohn Baldwin.byte	243,15,30,250
3460bc3d5698SJohn Baldwin	testq	%rdx,%rdx
3461bc3d5698SJohn Baldwin	jz	.Lcbc_ret
3462bc3d5698SJohn Baldwin
3463bc3d5698SJohn Baldwin	movl	240(%rcx),%r10d
3464bc3d5698SJohn Baldwin	movq	%rcx,%r11
3465bc3d5698SJohn Baldwin	testl	%r9d,%r9d
3466bc3d5698SJohn Baldwin	jz	.Lcbc_decrypt
3467bc3d5698SJohn Baldwin
3468bc3d5698SJohn Baldwin	movups	(%r8),%xmm2
3469bc3d5698SJohn Baldwin	movl	%r10d,%eax
3470bc3d5698SJohn Baldwin	cmpq	$16,%rdx
3471bc3d5698SJohn Baldwin	jb	.Lcbc_enc_tail
3472bc3d5698SJohn Baldwin	subq	$16,%rdx
3473bc3d5698SJohn Baldwin	jmp	.Lcbc_enc_loop
3474bc3d5698SJohn Baldwin.align	16
3475bc3d5698SJohn Baldwin.Lcbc_enc_loop:
3476bc3d5698SJohn Baldwin	movups	(%rdi),%xmm3
3477bc3d5698SJohn Baldwin	leaq	16(%rdi),%rdi
3478bc3d5698SJohn Baldwin
3479bc3d5698SJohn Baldwin	movups	(%rcx),%xmm0
3480bc3d5698SJohn Baldwin	movups	16(%rcx),%xmm1
3481bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm3
3482bc3d5698SJohn Baldwin	leaq	32(%rcx),%rcx
3483bc3d5698SJohn Baldwin	xorps	%xmm3,%xmm2
3484bc3d5698SJohn Baldwin.Loop_enc1_15:
3485bc3d5698SJohn Baldwin.byte	102,15,56,220,209
3486bc3d5698SJohn Baldwin	decl	%eax
3487bc3d5698SJohn Baldwin	movups	(%rcx),%xmm1
3488bc3d5698SJohn Baldwin	leaq	16(%rcx),%rcx
3489bc3d5698SJohn Baldwin	jnz	.Loop_enc1_15
3490bc3d5698SJohn Baldwin.byte	102,15,56,221,209
3491bc3d5698SJohn Baldwin	movl	%r10d,%eax
3492bc3d5698SJohn Baldwin	movq	%r11,%rcx
3493bc3d5698SJohn Baldwin	movups	%xmm2,0(%rsi)
3494bc3d5698SJohn Baldwin	leaq	16(%rsi),%rsi
3495bc3d5698SJohn Baldwin	subq	$16,%rdx
3496bc3d5698SJohn Baldwin	jnc	.Lcbc_enc_loop
3497bc3d5698SJohn Baldwin	addq	$16,%rdx
3498bc3d5698SJohn Baldwin	jnz	.Lcbc_enc_tail
3499bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
3500bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm1
3501bc3d5698SJohn Baldwin	movups	%xmm2,(%r8)
3502bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm2
3503bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm3
3504bc3d5698SJohn Baldwin	jmp	.Lcbc_ret
3505bc3d5698SJohn Baldwin
3506bc3d5698SJohn Baldwin.Lcbc_enc_tail:
3507bc3d5698SJohn Baldwin	movq	%rdx,%rcx
3508bc3d5698SJohn Baldwin	xchgq	%rdi,%rsi
3509bc3d5698SJohn Baldwin.long	0x9066A4F3
3510bc3d5698SJohn Baldwin	movl	$16,%ecx
3511bc3d5698SJohn Baldwin	subq	%rdx,%rcx
3512bc3d5698SJohn Baldwin	xorl	%eax,%eax
3513bc3d5698SJohn Baldwin.long	0x9066AAF3
3514bc3d5698SJohn Baldwin	leaq	-16(%rdi),%rdi
3515bc3d5698SJohn Baldwin	movl	%r10d,%eax
3516bc3d5698SJohn Baldwin	movq	%rdi,%rsi
3517bc3d5698SJohn Baldwin	movq	%r11,%rcx
3518bc3d5698SJohn Baldwin	xorq	%rdx,%rdx
3519bc3d5698SJohn Baldwin	jmp	.Lcbc_enc_loop
3520bc3d5698SJohn Baldwin
3521bc3d5698SJohn Baldwin.align	16
3522bc3d5698SJohn Baldwin.Lcbc_decrypt:
3523bc3d5698SJohn Baldwin	cmpq	$16,%rdx
3524bc3d5698SJohn Baldwin	jne	.Lcbc_decrypt_bulk
3525bc3d5698SJohn Baldwin
3526bc3d5698SJohn Baldwin
3527bc3d5698SJohn Baldwin
3528bc3d5698SJohn Baldwin	movdqu	(%rdi),%xmm2
3529bc3d5698SJohn Baldwin	movdqu	(%r8),%xmm3
3530bc3d5698SJohn Baldwin	movdqa	%xmm2,%xmm4
3531bc3d5698SJohn Baldwin	movups	(%rcx),%xmm0
3532bc3d5698SJohn Baldwin	movups	16(%rcx),%xmm1
3533bc3d5698SJohn Baldwin	leaq	32(%rcx),%rcx
3534bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
3535bc3d5698SJohn Baldwin.Loop_dec1_16:
3536bc3d5698SJohn Baldwin.byte	102,15,56,222,209
3537bc3d5698SJohn Baldwin	decl	%r10d
3538bc3d5698SJohn Baldwin	movups	(%rcx),%xmm1
3539bc3d5698SJohn Baldwin	leaq	16(%rcx),%rcx
3540bc3d5698SJohn Baldwin	jnz	.Loop_dec1_16
3541bc3d5698SJohn Baldwin.byte	102,15,56,223,209
3542bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
3543bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm1
3544bc3d5698SJohn Baldwin	movdqu	%xmm4,(%r8)
3545bc3d5698SJohn Baldwin	xorps	%xmm3,%xmm2
3546bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm3
3547bc3d5698SJohn Baldwin	movups	%xmm2,(%rsi)
3548bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm2
3549bc3d5698SJohn Baldwin	jmp	.Lcbc_ret
3550bc3d5698SJohn Baldwin.align	16
3551bc3d5698SJohn Baldwin.Lcbc_decrypt_bulk:
3552bc3d5698SJohn Baldwin	leaq	(%rsp),%r11
3553bc3d5698SJohn Baldwin.cfi_def_cfa_register	%r11
3554bc3d5698SJohn Baldwin	pushq	%rbp
3555bc3d5698SJohn Baldwin.cfi_offset	%rbp,-16
3556bc3d5698SJohn Baldwin	subq	$16,%rsp
3557bc3d5698SJohn Baldwin	andq	$-16,%rsp
3558bc3d5698SJohn Baldwin	movq	%rcx,%rbp
3559bc3d5698SJohn Baldwin	movups	(%r8),%xmm10
3560bc3d5698SJohn Baldwin	movl	%r10d,%eax
3561bc3d5698SJohn Baldwin	cmpq	$0x50,%rdx
3562bc3d5698SJohn Baldwin	jbe	.Lcbc_dec_tail
3563bc3d5698SJohn Baldwin
3564bc3d5698SJohn Baldwin	movups	(%rcx),%xmm0
3565bc3d5698SJohn Baldwin	movdqu	0(%rdi),%xmm2
3566bc3d5698SJohn Baldwin	movdqu	16(%rdi),%xmm3
3567bc3d5698SJohn Baldwin	movdqa	%xmm2,%xmm11
3568bc3d5698SJohn Baldwin	movdqu	32(%rdi),%xmm4
3569bc3d5698SJohn Baldwin	movdqa	%xmm3,%xmm12
3570bc3d5698SJohn Baldwin	movdqu	48(%rdi),%xmm5
3571bc3d5698SJohn Baldwin	movdqa	%xmm4,%xmm13
3572bc3d5698SJohn Baldwin	movdqu	64(%rdi),%xmm6
3573bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm14
3574bc3d5698SJohn Baldwin	movdqu	80(%rdi),%xmm7
3575bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm15
3576bc3d5698SJohn Baldwin	movl	OPENSSL_ia32cap_P+4(%rip),%r9d
3577bc3d5698SJohn Baldwin	cmpq	$0x70,%rdx
3578bc3d5698SJohn Baldwin	jbe	.Lcbc_dec_six_or_seven
3579bc3d5698SJohn Baldwin
3580bc3d5698SJohn Baldwin	andl	$71303168,%r9d
3581bc3d5698SJohn Baldwin	subq	$0x50,%rdx
3582bc3d5698SJohn Baldwin	cmpl	$4194304,%r9d
3583bc3d5698SJohn Baldwin	je	.Lcbc_dec_loop6_enter
3584bc3d5698SJohn Baldwin	subq	$0x20,%rdx
3585bc3d5698SJohn Baldwin	leaq	112(%rcx),%rcx
3586bc3d5698SJohn Baldwin	jmp	.Lcbc_dec_loop8_enter
3587bc3d5698SJohn Baldwin.align	16
3588bc3d5698SJohn Baldwin.Lcbc_dec_loop8:
3589bc3d5698SJohn Baldwin	movups	%xmm9,(%rsi)
3590bc3d5698SJohn Baldwin	leaq	16(%rsi),%rsi
3591bc3d5698SJohn Baldwin.Lcbc_dec_loop8_enter:
3592bc3d5698SJohn Baldwin	movdqu	96(%rdi),%xmm8
3593bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm2
3594bc3d5698SJohn Baldwin	movdqu	112(%rdi),%xmm9
3595bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm3
3596bc3d5698SJohn Baldwin	movups	16-112(%rcx),%xmm1
3597bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm4
3598bc3d5698SJohn Baldwin	movq	$-1,%rbp
3599bc3d5698SJohn Baldwin	cmpq	$0x70,%rdx
3600bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm5
3601bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm6
3602bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm7
3603bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm8
3604bc3d5698SJohn Baldwin
3605bc3d5698SJohn Baldwin.byte	102,15,56,222,209
3606bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm9
3607bc3d5698SJohn Baldwin	movups	32-112(%rcx),%xmm0
3608bc3d5698SJohn Baldwin.byte	102,15,56,222,217
3609bc3d5698SJohn Baldwin.byte	102,15,56,222,225
3610bc3d5698SJohn Baldwin.byte	102,15,56,222,233
3611bc3d5698SJohn Baldwin.byte	102,15,56,222,241
3612bc3d5698SJohn Baldwin.byte	102,15,56,222,249
3613bc3d5698SJohn Baldwin.byte	102,68,15,56,222,193
3614bc3d5698SJohn Baldwin	adcq	$0,%rbp
3615bc3d5698SJohn Baldwin	andq	$128,%rbp
3616bc3d5698SJohn Baldwin.byte	102,68,15,56,222,201
3617bc3d5698SJohn Baldwin	addq	%rdi,%rbp
3618bc3d5698SJohn Baldwin	movups	48-112(%rcx),%xmm1
3619bc3d5698SJohn Baldwin.byte	102,15,56,222,208
3620bc3d5698SJohn Baldwin.byte	102,15,56,222,216
3621bc3d5698SJohn Baldwin.byte	102,15,56,222,224
3622bc3d5698SJohn Baldwin.byte	102,15,56,222,232
3623bc3d5698SJohn Baldwin.byte	102,15,56,222,240
3624bc3d5698SJohn Baldwin.byte	102,15,56,222,248
3625bc3d5698SJohn Baldwin.byte	102,68,15,56,222,192
3626bc3d5698SJohn Baldwin.byte	102,68,15,56,222,200
3627bc3d5698SJohn Baldwin	movups	64-112(%rcx),%xmm0
3628bc3d5698SJohn Baldwin	nop
3629bc3d5698SJohn Baldwin.byte	102,15,56,222,209
3630bc3d5698SJohn Baldwin.byte	102,15,56,222,217
3631bc3d5698SJohn Baldwin.byte	102,15,56,222,225
3632bc3d5698SJohn Baldwin.byte	102,15,56,222,233
3633bc3d5698SJohn Baldwin.byte	102,15,56,222,241
3634bc3d5698SJohn Baldwin.byte	102,15,56,222,249
3635bc3d5698SJohn Baldwin.byte	102,68,15,56,222,193
3636bc3d5698SJohn Baldwin.byte	102,68,15,56,222,201
3637bc3d5698SJohn Baldwin	movups	80-112(%rcx),%xmm1
3638bc3d5698SJohn Baldwin	nop
3639bc3d5698SJohn Baldwin.byte	102,15,56,222,208
3640bc3d5698SJohn Baldwin.byte	102,15,56,222,216
3641bc3d5698SJohn Baldwin.byte	102,15,56,222,224
3642bc3d5698SJohn Baldwin.byte	102,15,56,222,232
3643bc3d5698SJohn Baldwin.byte	102,15,56,222,240
3644bc3d5698SJohn Baldwin.byte	102,15,56,222,248
3645bc3d5698SJohn Baldwin.byte	102,68,15,56,222,192
3646bc3d5698SJohn Baldwin.byte	102,68,15,56,222,200
3647bc3d5698SJohn Baldwin	movups	96-112(%rcx),%xmm0
3648bc3d5698SJohn Baldwin	nop
3649bc3d5698SJohn Baldwin.byte	102,15,56,222,209
3650bc3d5698SJohn Baldwin.byte	102,15,56,222,217
3651bc3d5698SJohn Baldwin.byte	102,15,56,222,225
3652bc3d5698SJohn Baldwin.byte	102,15,56,222,233
3653bc3d5698SJohn Baldwin.byte	102,15,56,222,241
3654bc3d5698SJohn Baldwin.byte	102,15,56,222,249
3655bc3d5698SJohn Baldwin.byte	102,68,15,56,222,193
3656bc3d5698SJohn Baldwin.byte	102,68,15,56,222,201
3657bc3d5698SJohn Baldwin	movups	112-112(%rcx),%xmm1
3658bc3d5698SJohn Baldwin	nop
3659bc3d5698SJohn Baldwin.byte	102,15,56,222,208
3660bc3d5698SJohn Baldwin.byte	102,15,56,222,216
3661bc3d5698SJohn Baldwin.byte	102,15,56,222,224
3662bc3d5698SJohn Baldwin.byte	102,15,56,222,232
3663bc3d5698SJohn Baldwin.byte	102,15,56,222,240
3664bc3d5698SJohn Baldwin.byte	102,15,56,222,248
3665bc3d5698SJohn Baldwin.byte	102,68,15,56,222,192
3666bc3d5698SJohn Baldwin.byte	102,68,15,56,222,200
3667bc3d5698SJohn Baldwin	movups	128-112(%rcx),%xmm0
3668bc3d5698SJohn Baldwin	nop
3669bc3d5698SJohn Baldwin.byte	102,15,56,222,209
3670bc3d5698SJohn Baldwin.byte	102,15,56,222,217
3671bc3d5698SJohn Baldwin.byte	102,15,56,222,225
3672bc3d5698SJohn Baldwin.byte	102,15,56,222,233
3673bc3d5698SJohn Baldwin.byte	102,15,56,222,241
3674bc3d5698SJohn Baldwin.byte	102,15,56,222,249
3675bc3d5698SJohn Baldwin.byte	102,68,15,56,222,193
3676bc3d5698SJohn Baldwin.byte	102,68,15,56,222,201
3677bc3d5698SJohn Baldwin	movups	144-112(%rcx),%xmm1
3678bc3d5698SJohn Baldwin	cmpl	$11,%eax
3679bc3d5698SJohn Baldwin.byte	102,15,56,222,208
3680bc3d5698SJohn Baldwin.byte	102,15,56,222,216
3681bc3d5698SJohn Baldwin.byte	102,15,56,222,224
3682bc3d5698SJohn Baldwin.byte	102,15,56,222,232
3683bc3d5698SJohn Baldwin.byte	102,15,56,222,240
3684bc3d5698SJohn Baldwin.byte	102,15,56,222,248
3685bc3d5698SJohn Baldwin.byte	102,68,15,56,222,192
3686bc3d5698SJohn Baldwin.byte	102,68,15,56,222,200
3687bc3d5698SJohn Baldwin	movups	160-112(%rcx),%xmm0
3688bc3d5698SJohn Baldwin	jb	.Lcbc_dec_done
3689bc3d5698SJohn Baldwin.byte	102,15,56,222,209
3690bc3d5698SJohn Baldwin.byte	102,15,56,222,217
3691bc3d5698SJohn Baldwin.byte	102,15,56,222,225
3692bc3d5698SJohn Baldwin.byte	102,15,56,222,233
3693bc3d5698SJohn Baldwin.byte	102,15,56,222,241
3694bc3d5698SJohn Baldwin.byte	102,15,56,222,249
3695bc3d5698SJohn Baldwin.byte	102,68,15,56,222,193
3696bc3d5698SJohn Baldwin.byte	102,68,15,56,222,201
3697bc3d5698SJohn Baldwin	movups	176-112(%rcx),%xmm1
3698bc3d5698SJohn Baldwin	nop
3699bc3d5698SJohn Baldwin.byte	102,15,56,222,208
3700bc3d5698SJohn Baldwin.byte	102,15,56,222,216
3701bc3d5698SJohn Baldwin.byte	102,15,56,222,224
3702bc3d5698SJohn Baldwin.byte	102,15,56,222,232
3703bc3d5698SJohn Baldwin.byte	102,15,56,222,240
3704bc3d5698SJohn Baldwin.byte	102,15,56,222,248
3705bc3d5698SJohn Baldwin.byte	102,68,15,56,222,192
3706bc3d5698SJohn Baldwin.byte	102,68,15,56,222,200
3707bc3d5698SJohn Baldwin	movups	192-112(%rcx),%xmm0
3708bc3d5698SJohn Baldwin	je	.Lcbc_dec_done
3709bc3d5698SJohn Baldwin.byte	102,15,56,222,209
3710bc3d5698SJohn Baldwin.byte	102,15,56,222,217
3711bc3d5698SJohn Baldwin.byte	102,15,56,222,225
3712bc3d5698SJohn Baldwin.byte	102,15,56,222,233
3713bc3d5698SJohn Baldwin.byte	102,15,56,222,241
3714bc3d5698SJohn Baldwin.byte	102,15,56,222,249
3715bc3d5698SJohn Baldwin.byte	102,68,15,56,222,193
3716bc3d5698SJohn Baldwin.byte	102,68,15,56,222,201
3717bc3d5698SJohn Baldwin	movups	208-112(%rcx),%xmm1
3718bc3d5698SJohn Baldwin	nop
3719bc3d5698SJohn Baldwin.byte	102,15,56,222,208
3720bc3d5698SJohn Baldwin.byte	102,15,56,222,216
3721bc3d5698SJohn Baldwin.byte	102,15,56,222,224
3722bc3d5698SJohn Baldwin.byte	102,15,56,222,232
3723bc3d5698SJohn Baldwin.byte	102,15,56,222,240
3724bc3d5698SJohn Baldwin.byte	102,15,56,222,248
3725bc3d5698SJohn Baldwin.byte	102,68,15,56,222,192
3726bc3d5698SJohn Baldwin.byte	102,68,15,56,222,200
3727bc3d5698SJohn Baldwin	movups	224-112(%rcx),%xmm0
3728bc3d5698SJohn Baldwin	jmp	.Lcbc_dec_done
3729bc3d5698SJohn Baldwin.align	16
3730bc3d5698SJohn Baldwin.Lcbc_dec_done:
3731bc3d5698SJohn Baldwin.byte	102,15,56,222,209
3732bc3d5698SJohn Baldwin.byte	102,15,56,222,217
3733bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm10
3734bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm11
3735bc3d5698SJohn Baldwin.byte	102,15,56,222,225
3736bc3d5698SJohn Baldwin.byte	102,15,56,222,233
3737bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm12
3738bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm13
3739bc3d5698SJohn Baldwin.byte	102,15,56,222,241
3740bc3d5698SJohn Baldwin.byte	102,15,56,222,249
3741bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm14
3742bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm15
3743bc3d5698SJohn Baldwin.byte	102,68,15,56,222,193
3744bc3d5698SJohn Baldwin.byte	102,68,15,56,222,201
3745bc3d5698SJohn Baldwin	movdqu	80(%rdi),%xmm1
3746bc3d5698SJohn Baldwin
3747bc3d5698SJohn Baldwin.byte	102,65,15,56,223,210
3748bc3d5698SJohn Baldwin	movdqu	96(%rdi),%xmm10
3749bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm1
3750bc3d5698SJohn Baldwin.byte	102,65,15,56,223,219
3751bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm10
3752bc3d5698SJohn Baldwin	movdqu	112(%rdi),%xmm0
3753bc3d5698SJohn Baldwin.byte	102,65,15,56,223,228
3754bc3d5698SJohn Baldwin	leaq	128(%rdi),%rdi
3755bc3d5698SJohn Baldwin	movdqu	0(%rbp),%xmm11
3756bc3d5698SJohn Baldwin.byte	102,65,15,56,223,237
3757bc3d5698SJohn Baldwin.byte	102,65,15,56,223,246
3758bc3d5698SJohn Baldwin	movdqu	16(%rbp),%xmm12
3759bc3d5698SJohn Baldwin	movdqu	32(%rbp),%xmm13
3760bc3d5698SJohn Baldwin.byte	102,65,15,56,223,255
3761bc3d5698SJohn Baldwin.byte	102,68,15,56,223,193
3762bc3d5698SJohn Baldwin	movdqu	48(%rbp),%xmm14
3763bc3d5698SJohn Baldwin	movdqu	64(%rbp),%xmm15
3764bc3d5698SJohn Baldwin.byte	102,69,15,56,223,202
3765bc3d5698SJohn Baldwin	movdqa	%xmm0,%xmm10
3766bc3d5698SJohn Baldwin	movdqu	80(%rbp),%xmm1
3767bc3d5698SJohn Baldwin	movups	-112(%rcx),%xmm0
3768bc3d5698SJohn Baldwin
3769bc3d5698SJohn Baldwin	movups	%xmm2,(%rsi)
3770bc3d5698SJohn Baldwin	movdqa	%xmm11,%xmm2
3771bc3d5698SJohn Baldwin	movups	%xmm3,16(%rsi)
3772bc3d5698SJohn Baldwin	movdqa	%xmm12,%xmm3
3773bc3d5698SJohn Baldwin	movups	%xmm4,32(%rsi)
3774bc3d5698SJohn Baldwin	movdqa	%xmm13,%xmm4
3775bc3d5698SJohn Baldwin	movups	%xmm5,48(%rsi)
3776bc3d5698SJohn Baldwin	movdqa	%xmm14,%xmm5
3777bc3d5698SJohn Baldwin	movups	%xmm6,64(%rsi)
3778bc3d5698SJohn Baldwin	movdqa	%xmm15,%xmm6
3779bc3d5698SJohn Baldwin	movups	%xmm7,80(%rsi)
3780bc3d5698SJohn Baldwin	movdqa	%xmm1,%xmm7
3781bc3d5698SJohn Baldwin	movups	%xmm8,96(%rsi)
3782bc3d5698SJohn Baldwin	leaq	112(%rsi),%rsi
3783bc3d5698SJohn Baldwin
3784bc3d5698SJohn Baldwin	subq	$0x80,%rdx
3785bc3d5698SJohn Baldwin	ja	.Lcbc_dec_loop8
3786bc3d5698SJohn Baldwin
3787bc3d5698SJohn Baldwin	movaps	%xmm9,%xmm2
3788bc3d5698SJohn Baldwin	leaq	-112(%rcx),%rcx
3789bc3d5698SJohn Baldwin	addq	$0x70,%rdx
3790bc3d5698SJohn Baldwin	jle	.Lcbc_dec_clear_tail_collected
3791bc3d5698SJohn Baldwin	movups	%xmm9,(%rsi)
3792bc3d5698SJohn Baldwin	leaq	16(%rsi),%rsi
3793bc3d5698SJohn Baldwin	cmpq	$0x50,%rdx
3794bc3d5698SJohn Baldwin	jbe	.Lcbc_dec_tail
3795bc3d5698SJohn Baldwin
3796bc3d5698SJohn Baldwin	movaps	%xmm11,%xmm2
3797bc3d5698SJohn Baldwin.Lcbc_dec_six_or_seven:
3798bc3d5698SJohn Baldwin	cmpq	$0x60,%rdx
3799bc3d5698SJohn Baldwin	ja	.Lcbc_dec_seven
3800bc3d5698SJohn Baldwin
3801bc3d5698SJohn Baldwin	movaps	%xmm7,%xmm8
3802bc3d5698SJohn Baldwin	call	_aesni_decrypt6
3803bc3d5698SJohn Baldwin	pxor	%xmm10,%xmm2
3804bc3d5698SJohn Baldwin	movaps	%xmm8,%xmm10
3805bc3d5698SJohn Baldwin	pxor	%xmm11,%xmm3
3806bc3d5698SJohn Baldwin	movdqu	%xmm2,(%rsi)
3807bc3d5698SJohn Baldwin	pxor	%xmm12,%xmm4
3808bc3d5698SJohn Baldwin	movdqu	%xmm3,16(%rsi)
3809bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm3
3810bc3d5698SJohn Baldwin	pxor	%xmm13,%xmm5
3811bc3d5698SJohn Baldwin	movdqu	%xmm4,32(%rsi)
3812bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm4
3813bc3d5698SJohn Baldwin	pxor	%xmm14,%xmm6
3814bc3d5698SJohn Baldwin	movdqu	%xmm5,48(%rsi)
3815bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm5
3816bc3d5698SJohn Baldwin	pxor	%xmm15,%xmm7
3817bc3d5698SJohn Baldwin	movdqu	%xmm6,64(%rsi)
3818bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm6
3819bc3d5698SJohn Baldwin	leaq	80(%rsi),%rsi
3820bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm2
3821bc3d5698SJohn Baldwin	pxor	%xmm7,%xmm7
3822bc3d5698SJohn Baldwin	jmp	.Lcbc_dec_tail_collected
3823bc3d5698SJohn Baldwin
3824bc3d5698SJohn Baldwin.align	16
3825bc3d5698SJohn Baldwin.Lcbc_dec_seven:
3826bc3d5698SJohn Baldwin	movups	96(%rdi),%xmm8
3827bc3d5698SJohn Baldwin	xorps	%xmm9,%xmm9
3828bc3d5698SJohn Baldwin	call	_aesni_decrypt8
3829bc3d5698SJohn Baldwin	movups	80(%rdi),%xmm9
3830bc3d5698SJohn Baldwin	pxor	%xmm10,%xmm2
3831bc3d5698SJohn Baldwin	movups	96(%rdi),%xmm10
3832bc3d5698SJohn Baldwin	pxor	%xmm11,%xmm3
3833bc3d5698SJohn Baldwin	movdqu	%xmm2,(%rsi)
3834bc3d5698SJohn Baldwin	pxor	%xmm12,%xmm4
3835bc3d5698SJohn Baldwin	movdqu	%xmm3,16(%rsi)
3836bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm3
3837bc3d5698SJohn Baldwin	pxor	%xmm13,%xmm5
3838bc3d5698SJohn Baldwin	movdqu	%xmm4,32(%rsi)
3839bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm4
3840bc3d5698SJohn Baldwin	pxor	%xmm14,%xmm6
3841bc3d5698SJohn Baldwin	movdqu	%xmm5,48(%rsi)
3842bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm5
3843bc3d5698SJohn Baldwin	pxor	%xmm15,%xmm7
3844bc3d5698SJohn Baldwin	movdqu	%xmm6,64(%rsi)
3845bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm6
3846bc3d5698SJohn Baldwin	pxor	%xmm9,%xmm8
3847bc3d5698SJohn Baldwin	movdqu	%xmm7,80(%rsi)
3848bc3d5698SJohn Baldwin	pxor	%xmm7,%xmm7
3849bc3d5698SJohn Baldwin	leaq	96(%rsi),%rsi
3850bc3d5698SJohn Baldwin	movdqa	%xmm8,%xmm2
3851bc3d5698SJohn Baldwin	pxor	%xmm8,%xmm8
3852bc3d5698SJohn Baldwin	pxor	%xmm9,%xmm9
3853bc3d5698SJohn Baldwin	jmp	.Lcbc_dec_tail_collected
3854bc3d5698SJohn Baldwin
3855bc3d5698SJohn Baldwin.align	16
3856bc3d5698SJohn Baldwin.Lcbc_dec_loop6:
3857bc3d5698SJohn Baldwin	movups	%xmm7,(%rsi)
3858bc3d5698SJohn Baldwin	leaq	16(%rsi),%rsi
3859bc3d5698SJohn Baldwin	movdqu	0(%rdi),%xmm2
3860bc3d5698SJohn Baldwin	movdqu	16(%rdi),%xmm3
3861bc3d5698SJohn Baldwin	movdqa	%xmm2,%xmm11
3862bc3d5698SJohn Baldwin	movdqu	32(%rdi),%xmm4
3863bc3d5698SJohn Baldwin	movdqa	%xmm3,%xmm12
3864bc3d5698SJohn Baldwin	movdqu	48(%rdi),%xmm5
3865bc3d5698SJohn Baldwin	movdqa	%xmm4,%xmm13
3866bc3d5698SJohn Baldwin	movdqu	64(%rdi),%xmm6
3867bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm14
3868bc3d5698SJohn Baldwin	movdqu	80(%rdi),%xmm7
3869bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm15
3870bc3d5698SJohn Baldwin.Lcbc_dec_loop6_enter:
3871bc3d5698SJohn Baldwin	leaq	96(%rdi),%rdi
3872bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm8
3873bc3d5698SJohn Baldwin
3874bc3d5698SJohn Baldwin	call	_aesni_decrypt6
3875bc3d5698SJohn Baldwin
3876bc3d5698SJohn Baldwin	pxor	%xmm10,%xmm2
3877bc3d5698SJohn Baldwin	movdqa	%xmm8,%xmm10
3878bc3d5698SJohn Baldwin	pxor	%xmm11,%xmm3
3879bc3d5698SJohn Baldwin	movdqu	%xmm2,(%rsi)
3880bc3d5698SJohn Baldwin	pxor	%xmm12,%xmm4
3881bc3d5698SJohn Baldwin	movdqu	%xmm3,16(%rsi)
3882bc3d5698SJohn Baldwin	pxor	%xmm13,%xmm5
3883bc3d5698SJohn Baldwin	movdqu	%xmm4,32(%rsi)
3884bc3d5698SJohn Baldwin	pxor	%xmm14,%xmm6
3885bc3d5698SJohn Baldwin	movq	%rbp,%rcx
3886bc3d5698SJohn Baldwin	movdqu	%xmm5,48(%rsi)
3887bc3d5698SJohn Baldwin	pxor	%xmm15,%xmm7
3888bc3d5698SJohn Baldwin	movl	%r10d,%eax
3889bc3d5698SJohn Baldwin	movdqu	%xmm6,64(%rsi)
3890bc3d5698SJohn Baldwin	leaq	80(%rsi),%rsi
3891bc3d5698SJohn Baldwin	subq	$0x60,%rdx
3892bc3d5698SJohn Baldwin	ja	.Lcbc_dec_loop6
3893bc3d5698SJohn Baldwin
3894bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm2
3895bc3d5698SJohn Baldwin	addq	$0x50,%rdx
3896bc3d5698SJohn Baldwin	jle	.Lcbc_dec_clear_tail_collected
3897bc3d5698SJohn Baldwin	movups	%xmm7,(%rsi)
3898bc3d5698SJohn Baldwin	leaq	16(%rsi),%rsi
3899bc3d5698SJohn Baldwin
3900bc3d5698SJohn Baldwin.Lcbc_dec_tail:
3901bc3d5698SJohn Baldwin	movups	(%rdi),%xmm2
3902bc3d5698SJohn Baldwin	subq	$0x10,%rdx
3903bc3d5698SJohn Baldwin	jbe	.Lcbc_dec_one
3904bc3d5698SJohn Baldwin
3905bc3d5698SJohn Baldwin	movups	16(%rdi),%xmm3
3906bc3d5698SJohn Baldwin	movaps	%xmm2,%xmm11
3907bc3d5698SJohn Baldwin	subq	$0x10,%rdx
3908bc3d5698SJohn Baldwin	jbe	.Lcbc_dec_two
3909bc3d5698SJohn Baldwin
3910bc3d5698SJohn Baldwin	movups	32(%rdi),%xmm4
3911bc3d5698SJohn Baldwin	movaps	%xmm3,%xmm12
3912bc3d5698SJohn Baldwin	subq	$0x10,%rdx
3913bc3d5698SJohn Baldwin	jbe	.Lcbc_dec_three
3914bc3d5698SJohn Baldwin
3915bc3d5698SJohn Baldwin	movups	48(%rdi),%xmm5
3916bc3d5698SJohn Baldwin	movaps	%xmm4,%xmm13
3917bc3d5698SJohn Baldwin	subq	$0x10,%rdx
3918bc3d5698SJohn Baldwin	jbe	.Lcbc_dec_four
3919bc3d5698SJohn Baldwin
3920bc3d5698SJohn Baldwin	movups	64(%rdi),%xmm6
3921bc3d5698SJohn Baldwin	movaps	%xmm5,%xmm14
3922bc3d5698SJohn Baldwin	movaps	%xmm6,%xmm15
3923bc3d5698SJohn Baldwin	xorps	%xmm7,%xmm7
3924bc3d5698SJohn Baldwin	call	_aesni_decrypt6
3925bc3d5698SJohn Baldwin	pxor	%xmm10,%xmm2
3926bc3d5698SJohn Baldwin	movaps	%xmm15,%xmm10
3927bc3d5698SJohn Baldwin	pxor	%xmm11,%xmm3
3928bc3d5698SJohn Baldwin	movdqu	%xmm2,(%rsi)
3929bc3d5698SJohn Baldwin	pxor	%xmm12,%xmm4
3930bc3d5698SJohn Baldwin	movdqu	%xmm3,16(%rsi)
3931bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm3
3932bc3d5698SJohn Baldwin	pxor	%xmm13,%xmm5
3933bc3d5698SJohn Baldwin	movdqu	%xmm4,32(%rsi)
3934bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm4
3935bc3d5698SJohn Baldwin	pxor	%xmm14,%xmm6
3936bc3d5698SJohn Baldwin	movdqu	%xmm5,48(%rsi)
3937bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm5
3938bc3d5698SJohn Baldwin	leaq	64(%rsi),%rsi
3939bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm2
3940bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm6
3941bc3d5698SJohn Baldwin	pxor	%xmm7,%xmm7
3942bc3d5698SJohn Baldwin	subq	$0x10,%rdx
3943bc3d5698SJohn Baldwin	jmp	.Lcbc_dec_tail_collected
3944bc3d5698SJohn Baldwin
3945bc3d5698SJohn Baldwin.align	16
3946bc3d5698SJohn Baldwin.Lcbc_dec_one:
3947bc3d5698SJohn Baldwin	movaps	%xmm2,%xmm11
3948bc3d5698SJohn Baldwin	movups	(%rcx),%xmm0
3949bc3d5698SJohn Baldwin	movups	16(%rcx),%xmm1
3950bc3d5698SJohn Baldwin	leaq	32(%rcx),%rcx
3951bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
3952bc3d5698SJohn Baldwin.Loop_dec1_17:
3953bc3d5698SJohn Baldwin.byte	102,15,56,222,209
3954bc3d5698SJohn Baldwin	decl	%eax
3955bc3d5698SJohn Baldwin	movups	(%rcx),%xmm1
3956bc3d5698SJohn Baldwin	leaq	16(%rcx),%rcx
3957bc3d5698SJohn Baldwin	jnz	.Loop_dec1_17
3958bc3d5698SJohn Baldwin.byte	102,15,56,223,209
3959bc3d5698SJohn Baldwin	xorps	%xmm10,%xmm2
3960bc3d5698SJohn Baldwin	movaps	%xmm11,%xmm10
3961bc3d5698SJohn Baldwin	jmp	.Lcbc_dec_tail_collected
3962bc3d5698SJohn Baldwin.align	16
3963bc3d5698SJohn Baldwin.Lcbc_dec_two:
3964bc3d5698SJohn Baldwin	movaps	%xmm3,%xmm12
3965bc3d5698SJohn Baldwin	call	_aesni_decrypt2
3966bc3d5698SJohn Baldwin	pxor	%xmm10,%xmm2
3967bc3d5698SJohn Baldwin	movaps	%xmm12,%xmm10
3968bc3d5698SJohn Baldwin	pxor	%xmm11,%xmm3
3969bc3d5698SJohn Baldwin	movdqu	%xmm2,(%rsi)
3970bc3d5698SJohn Baldwin	movdqa	%xmm3,%xmm2
3971bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm3
3972bc3d5698SJohn Baldwin	leaq	16(%rsi),%rsi
3973bc3d5698SJohn Baldwin	jmp	.Lcbc_dec_tail_collected
3974bc3d5698SJohn Baldwin.align	16
3975bc3d5698SJohn Baldwin.Lcbc_dec_three:
3976bc3d5698SJohn Baldwin	movaps	%xmm4,%xmm13
3977bc3d5698SJohn Baldwin	call	_aesni_decrypt3
3978bc3d5698SJohn Baldwin	pxor	%xmm10,%xmm2
3979bc3d5698SJohn Baldwin	movaps	%xmm13,%xmm10
3980bc3d5698SJohn Baldwin	pxor	%xmm11,%xmm3
3981bc3d5698SJohn Baldwin	movdqu	%xmm2,(%rsi)
3982bc3d5698SJohn Baldwin	pxor	%xmm12,%xmm4
3983bc3d5698SJohn Baldwin	movdqu	%xmm3,16(%rsi)
3984bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm3
3985bc3d5698SJohn Baldwin	movdqa	%xmm4,%xmm2
3986bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm4
3987bc3d5698SJohn Baldwin	leaq	32(%rsi),%rsi
3988bc3d5698SJohn Baldwin	jmp	.Lcbc_dec_tail_collected
3989bc3d5698SJohn Baldwin.align	16
3990bc3d5698SJohn Baldwin.Lcbc_dec_four:
3991bc3d5698SJohn Baldwin	movaps	%xmm5,%xmm14
3992bc3d5698SJohn Baldwin	call	_aesni_decrypt4
3993bc3d5698SJohn Baldwin	pxor	%xmm10,%xmm2
3994bc3d5698SJohn Baldwin	movaps	%xmm14,%xmm10
3995bc3d5698SJohn Baldwin	pxor	%xmm11,%xmm3
3996bc3d5698SJohn Baldwin	movdqu	%xmm2,(%rsi)
3997bc3d5698SJohn Baldwin	pxor	%xmm12,%xmm4
3998bc3d5698SJohn Baldwin	movdqu	%xmm3,16(%rsi)
3999bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm3
4000bc3d5698SJohn Baldwin	pxor	%xmm13,%xmm5
4001bc3d5698SJohn Baldwin	movdqu	%xmm4,32(%rsi)
4002bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm4
4003bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm2
4004bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm5
4005bc3d5698SJohn Baldwin	leaq	48(%rsi),%rsi
4006bc3d5698SJohn Baldwin	jmp	.Lcbc_dec_tail_collected
4007bc3d5698SJohn Baldwin
4008bc3d5698SJohn Baldwin.align	16
4009bc3d5698SJohn Baldwin.Lcbc_dec_clear_tail_collected:
4010bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm3
4011bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm4
4012bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm5
4013bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm6
4014bc3d5698SJohn Baldwin	pxor	%xmm7,%xmm7
4015bc3d5698SJohn Baldwin	pxor	%xmm8,%xmm8
4016bc3d5698SJohn Baldwin	pxor	%xmm9,%xmm9
4017bc3d5698SJohn Baldwin.Lcbc_dec_tail_collected:
4018bc3d5698SJohn Baldwin	movups	%xmm10,(%r8)
4019bc3d5698SJohn Baldwin	andq	$15,%rdx
4020bc3d5698SJohn Baldwin	jnz	.Lcbc_dec_tail_partial
4021bc3d5698SJohn Baldwin	movups	%xmm2,(%rsi)
4022bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm2
4023bc3d5698SJohn Baldwin	jmp	.Lcbc_dec_ret
4024bc3d5698SJohn Baldwin.align	16
4025bc3d5698SJohn Baldwin.Lcbc_dec_tail_partial:
4026bc3d5698SJohn Baldwin	movaps	%xmm2,(%rsp)
4027bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm2
4028bc3d5698SJohn Baldwin	movq	$16,%rcx
4029bc3d5698SJohn Baldwin	movq	%rsi,%rdi
4030bc3d5698SJohn Baldwin	subq	%rdx,%rcx
4031bc3d5698SJohn Baldwin	leaq	(%rsp),%rsi
4032bc3d5698SJohn Baldwin.long	0x9066A4F3
4033bc3d5698SJohn Baldwin	movdqa	%xmm2,(%rsp)
4034bc3d5698SJohn Baldwin
4035bc3d5698SJohn Baldwin.Lcbc_dec_ret:
4036bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm0
4037bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm1
4038bc3d5698SJohn Baldwin	movq	-8(%r11),%rbp
4039bc3d5698SJohn Baldwin.cfi_restore	%rbp
4040bc3d5698SJohn Baldwin	leaq	(%r11),%rsp
4041bc3d5698SJohn Baldwin.cfi_def_cfa_register	%rsp
4042bc3d5698SJohn Baldwin.Lcbc_ret:
4043bc3d5698SJohn Baldwin	.byte	0xf3,0xc3
4044bc3d5698SJohn Baldwin.cfi_endproc
4045bc3d5698SJohn Baldwin.size	aesni_cbc_encrypt,.-aesni_cbc_encrypt
4046bc3d5698SJohn Baldwin.globl	aesni_set_decrypt_key
4047bc3d5698SJohn Baldwin.type	aesni_set_decrypt_key,@function
4048bc3d5698SJohn Baldwin.align	16
4049bc3d5698SJohn Baldwinaesni_set_decrypt_key:
4050bc3d5698SJohn Baldwin.cfi_startproc
4051bc3d5698SJohn Baldwin.byte	0x48,0x83,0xEC,0x08
4052bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	8
4053bc3d5698SJohn Baldwin	call	__aesni_set_encrypt_key
4054bc3d5698SJohn Baldwin	shll	$4,%esi
4055bc3d5698SJohn Baldwin	testl	%eax,%eax
4056bc3d5698SJohn Baldwin	jnz	.Ldec_key_ret
4057bc3d5698SJohn Baldwin	leaq	16(%rdx,%rsi,1),%rdi
4058bc3d5698SJohn Baldwin
4059bc3d5698SJohn Baldwin	movups	(%rdx),%xmm0
4060bc3d5698SJohn Baldwin	movups	(%rdi),%xmm1
4061bc3d5698SJohn Baldwin	movups	%xmm0,(%rdi)
4062bc3d5698SJohn Baldwin	movups	%xmm1,(%rdx)
4063bc3d5698SJohn Baldwin	leaq	16(%rdx),%rdx
4064bc3d5698SJohn Baldwin	leaq	-16(%rdi),%rdi
4065bc3d5698SJohn Baldwin
4066bc3d5698SJohn Baldwin.Ldec_key_inverse:
4067bc3d5698SJohn Baldwin	movups	(%rdx),%xmm0
4068bc3d5698SJohn Baldwin	movups	(%rdi),%xmm1
4069bc3d5698SJohn Baldwin.byte	102,15,56,219,192
4070bc3d5698SJohn Baldwin.byte	102,15,56,219,201
4071bc3d5698SJohn Baldwin	leaq	16(%rdx),%rdx
4072bc3d5698SJohn Baldwin	leaq	-16(%rdi),%rdi
4073bc3d5698SJohn Baldwin	movups	%xmm0,16(%rdi)
4074bc3d5698SJohn Baldwin	movups	%xmm1,-16(%rdx)
4075bc3d5698SJohn Baldwin	cmpq	%rdx,%rdi
4076bc3d5698SJohn Baldwin	ja	.Ldec_key_inverse
4077bc3d5698SJohn Baldwin
4078bc3d5698SJohn Baldwin	movups	(%rdx),%xmm0
4079bc3d5698SJohn Baldwin.byte	102,15,56,219,192
4080bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm1
4081bc3d5698SJohn Baldwin	movups	%xmm0,(%rdi)
4082bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
4083bc3d5698SJohn Baldwin.Ldec_key_ret:
4084bc3d5698SJohn Baldwin	addq	$8,%rsp
4085bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	-8
4086bc3d5698SJohn Baldwin	.byte	0xf3,0xc3
4087bc3d5698SJohn Baldwin.cfi_endproc
4088bc3d5698SJohn Baldwin.LSEH_end_set_decrypt_key:
4089bc3d5698SJohn Baldwin.size	aesni_set_decrypt_key,.-aesni_set_decrypt_key
4090bc3d5698SJohn Baldwin.globl	aesni_set_encrypt_key
4091bc3d5698SJohn Baldwin.type	aesni_set_encrypt_key,@function
4092bc3d5698SJohn Baldwin.align	16
4093bc3d5698SJohn Baldwinaesni_set_encrypt_key:
4094bc3d5698SJohn Baldwin__aesni_set_encrypt_key:
4095bc3d5698SJohn Baldwin.cfi_startproc
4096bc3d5698SJohn Baldwin.byte	0x48,0x83,0xEC,0x08
4097bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	8
4098bc3d5698SJohn Baldwin	movq	$-1,%rax
4099bc3d5698SJohn Baldwin	testq	%rdi,%rdi
4100bc3d5698SJohn Baldwin	jz	.Lenc_key_ret
4101bc3d5698SJohn Baldwin	testq	%rdx,%rdx
4102bc3d5698SJohn Baldwin	jz	.Lenc_key_ret
4103bc3d5698SJohn Baldwin
4104bc3d5698SJohn Baldwin	movl	$268437504,%r10d
4105bc3d5698SJohn Baldwin	movups	(%rdi),%xmm0
4106bc3d5698SJohn Baldwin	xorps	%xmm4,%xmm4
4107bc3d5698SJohn Baldwin	andl	OPENSSL_ia32cap_P+4(%rip),%r10d
4108bc3d5698SJohn Baldwin	leaq	16(%rdx),%rax
4109bc3d5698SJohn Baldwin	cmpl	$256,%esi
4110bc3d5698SJohn Baldwin	je	.L14rounds
4111bc3d5698SJohn Baldwin	cmpl	$192,%esi
4112bc3d5698SJohn Baldwin	je	.L12rounds
4113bc3d5698SJohn Baldwin	cmpl	$128,%esi
4114bc3d5698SJohn Baldwin	jne	.Lbad_keybits
4115bc3d5698SJohn Baldwin
4116bc3d5698SJohn Baldwin.L10rounds:
4117bc3d5698SJohn Baldwin	movl	$9,%esi
4118bc3d5698SJohn Baldwin	cmpl	$268435456,%r10d
4119bc3d5698SJohn Baldwin	je	.L10rounds_alt
4120bc3d5698SJohn Baldwin
4121bc3d5698SJohn Baldwin	movups	%xmm0,(%rdx)
4122bc3d5698SJohn Baldwin.byte	102,15,58,223,200,1
4123bc3d5698SJohn Baldwin	call	.Lkey_expansion_128_cold
4124bc3d5698SJohn Baldwin.byte	102,15,58,223,200,2
4125bc3d5698SJohn Baldwin	call	.Lkey_expansion_128
4126bc3d5698SJohn Baldwin.byte	102,15,58,223,200,4
4127bc3d5698SJohn Baldwin	call	.Lkey_expansion_128
4128bc3d5698SJohn Baldwin.byte	102,15,58,223,200,8
4129bc3d5698SJohn Baldwin	call	.Lkey_expansion_128
4130bc3d5698SJohn Baldwin.byte	102,15,58,223,200,16
4131bc3d5698SJohn Baldwin	call	.Lkey_expansion_128
4132bc3d5698SJohn Baldwin.byte	102,15,58,223,200,32
4133bc3d5698SJohn Baldwin	call	.Lkey_expansion_128
4134bc3d5698SJohn Baldwin.byte	102,15,58,223,200,64
4135bc3d5698SJohn Baldwin	call	.Lkey_expansion_128
4136bc3d5698SJohn Baldwin.byte	102,15,58,223,200,128
4137bc3d5698SJohn Baldwin	call	.Lkey_expansion_128
4138bc3d5698SJohn Baldwin.byte	102,15,58,223,200,27
4139bc3d5698SJohn Baldwin	call	.Lkey_expansion_128
4140bc3d5698SJohn Baldwin.byte	102,15,58,223,200,54
4141bc3d5698SJohn Baldwin	call	.Lkey_expansion_128
4142bc3d5698SJohn Baldwin	movups	%xmm0,(%rax)
4143bc3d5698SJohn Baldwin	movl	%esi,80(%rax)
4144bc3d5698SJohn Baldwin	xorl	%eax,%eax
4145bc3d5698SJohn Baldwin	jmp	.Lenc_key_ret
4146bc3d5698SJohn Baldwin
4147bc3d5698SJohn Baldwin.align	16
4148bc3d5698SJohn Baldwin.L10rounds_alt:
4149bc3d5698SJohn Baldwin	movdqa	.Lkey_rotate(%rip),%xmm5
4150bc3d5698SJohn Baldwin	movl	$8,%r10d
4151bc3d5698SJohn Baldwin	movdqa	.Lkey_rcon1(%rip),%xmm4
4152bc3d5698SJohn Baldwin	movdqa	%xmm0,%xmm2
4153bc3d5698SJohn Baldwin	movdqu	%xmm0,(%rdx)
4154bc3d5698SJohn Baldwin	jmp	.Loop_key128
4155bc3d5698SJohn Baldwin
4156bc3d5698SJohn Baldwin.align	16
4157bc3d5698SJohn Baldwin.Loop_key128:
4158bc3d5698SJohn Baldwin.byte	102,15,56,0,197
4159bc3d5698SJohn Baldwin.byte	102,15,56,221,196
4160bc3d5698SJohn Baldwin	pslld	$1,%xmm4
4161bc3d5698SJohn Baldwin	leaq	16(%rax),%rax
4162bc3d5698SJohn Baldwin
4163bc3d5698SJohn Baldwin	movdqa	%xmm2,%xmm3
4164bc3d5698SJohn Baldwin	pslldq	$4,%xmm2
4165bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm3
4166bc3d5698SJohn Baldwin	pslldq	$4,%xmm2
4167bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm3
4168bc3d5698SJohn Baldwin	pslldq	$4,%xmm2
4169bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm2
4170bc3d5698SJohn Baldwin
4171bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm0
4172bc3d5698SJohn Baldwin	movdqu	%xmm0,-16(%rax)
4173bc3d5698SJohn Baldwin	movdqa	%xmm0,%xmm2
4174bc3d5698SJohn Baldwin
4175bc3d5698SJohn Baldwin	decl	%r10d
4176bc3d5698SJohn Baldwin	jnz	.Loop_key128
4177bc3d5698SJohn Baldwin
4178bc3d5698SJohn Baldwin	movdqa	.Lkey_rcon1b(%rip),%xmm4
4179bc3d5698SJohn Baldwin
4180bc3d5698SJohn Baldwin.byte	102,15,56,0,197
4181bc3d5698SJohn Baldwin.byte	102,15,56,221,196
4182bc3d5698SJohn Baldwin	pslld	$1,%xmm4
4183bc3d5698SJohn Baldwin
4184bc3d5698SJohn Baldwin	movdqa	%xmm2,%xmm3
4185bc3d5698SJohn Baldwin	pslldq	$4,%xmm2
4186bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm3
4187bc3d5698SJohn Baldwin	pslldq	$4,%xmm2
4188bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm3
4189bc3d5698SJohn Baldwin	pslldq	$4,%xmm2
4190bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm2
4191bc3d5698SJohn Baldwin
4192bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm0
4193bc3d5698SJohn Baldwin	movdqu	%xmm0,(%rax)
4194bc3d5698SJohn Baldwin
4195bc3d5698SJohn Baldwin	movdqa	%xmm0,%xmm2
4196bc3d5698SJohn Baldwin.byte	102,15,56,0,197
4197bc3d5698SJohn Baldwin.byte	102,15,56,221,196
4198bc3d5698SJohn Baldwin
4199bc3d5698SJohn Baldwin	movdqa	%xmm2,%xmm3
4200bc3d5698SJohn Baldwin	pslldq	$4,%xmm2
4201bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm3
4202bc3d5698SJohn Baldwin	pslldq	$4,%xmm2
4203bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm3
4204bc3d5698SJohn Baldwin	pslldq	$4,%xmm2
4205bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm2
4206bc3d5698SJohn Baldwin
4207bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm0
4208bc3d5698SJohn Baldwin	movdqu	%xmm0,16(%rax)
4209bc3d5698SJohn Baldwin
4210bc3d5698SJohn Baldwin	movl	%esi,96(%rax)
4211bc3d5698SJohn Baldwin	xorl	%eax,%eax
4212bc3d5698SJohn Baldwin	jmp	.Lenc_key_ret
4213bc3d5698SJohn Baldwin
4214bc3d5698SJohn Baldwin.align	16
4215bc3d5698SJohn Baldwin.L12rounds:
4216bc3d5698SJohn Baldwin	movq	16(%rdi),%xmm2
4217bc3d5698SJohn Baldwin	movl	$11,%esi
4218bc3d5698SJohn Baldwin	cmpl	$268435456,%r10d
4219bc3d5698SJohn Baldwin	je	.L12rounds_alt
4220bc3d5698SJohn Baldwin
4221bc3d5698SJohn Baldwin	movups	%xmm0,(%rdx)
4222bc3d5698SJohn Baldwin.byte	102,15,58,223,202,1
4223bc3d5698SJohn Baldwin	call	.Lkey_expansion_192a_cold
4224bc3d5698SJohn Baldwin.byte	102,15,58,223,202,2
4225bc3d5698SJohn Baldwin	call	.Lkey_expansion_192b
4226bc3d5698SJohn Baldwin.byte	102,15,58,223,202,4
4227bc3d5698SJohn Baldwin	call	.Lkey_expansion_192a
4228bc3d5698SJohn Baldwin.byte	102,15,58,223,202,8
4229bc3d5698SJohn Baldwin	call	.Lkey_expansion_192b
4230bc3d5698SJohn Baldwin.byte	102,15,58,223,202,16
4231bc3d5698SJohn Baldwin	call	.Lkey_expansion_192a
4232bc3d5698SJohn Baldwin.byte	102,15,58,223,202,32
4233bc3d5698SJohn Baldwin	call	.Lkey_expansion_192b
4234bc3d5698SJohn Baldwin.byte	102,15,58,223,202,64
4235bc3d5698SJohn Baldwin	call	.Lkey_expansion_192a
4236bc3d5698SJohn Baldwin.byte	102,15,58,223,202,128
4237bc3d5698SJohn Baldwin	call	.Lkey_expansion_192b
4238bc3d5698SJohn Baldwin	movups	%xmm0,(%rax)
4239bc3d5698SJohn Baldwin	movl	%esi,48(%rax)
4240bc3d5698SJohn Baldwin	xorq	%rax,%rax
4241bc3d5698SJohn Baldwin	jmp	.Lenc_key_ret
4242bc3d5698SJohn Baldwin
4243bc3d5698SJohn Baldwin.align	16
4244bc3d5698SJohn Baldwin.L12rounds_alt:
4245bc3d5698SJohn Baldwin	movdqa	.Lkey_rotate192(%rip),%xmm5
4246bc3d5698SJohn Baldwin	movdqa	.Lkey_rcon1(%rip),%xmm4
4247bc3d5698SJohn Baldwin	movl	$8,%r10d
4248bc3d5698SJohn Baldwin	movdqu	%xmm0,(%rdx)
4249bc3d5698SJohn Baldwin	jmp	.Loop_key192
4250bc3d5698SJohn Baldwin
4251bc3d5698SJohn Baldwin.align	16
4252bc3d5698SJohn Baldwin.Loop_key192:
4253bc3d5698SJohn Baldwin	movq	%xmm2,0(%rax)
4254bc3d5698SJohn Baldwin	movdqa	%xmm2,%xmm1
4255bc3d5698SJohn Baldwin.byte	102,15,56,0,213
4256bc3d5698SJohn Baldwin.byte	102,15,56,221,212
4257bc3d5698SJohn Baldwin	pslld	$1,%xmm4
4258bc3d5698SJohn Baldwin	leaq	24(%rax),%rax
4259bc3d5698SJohn Baldwin
4260bc3d5698SJohn Baldwin	movdqa	%xmm0,%xmm3
4261bc3d5698SJohn Baldwin	pslldq	$4,%xmm0
4262bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm3
4263bc3d5698SJohn Baldwin	pslldq	$4,%xmm0
4264bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm3
4265bc3d5698SJohn Baldwin	pslldq	$4,%xmm0
4266bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm0
4267bc3d5698SJohn Baldwin
4268bc3d5698SJohn Baldwin	pshufd	$0xff,%xmm0,%xmm3
4269bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm3
4270bc3d5698SJohn Baldwin	pslldq	$4,%xmm1
4271bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm3
4272bc3d5698SJohn Baldwin
4273bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm0
4274bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm2
4275bc3d5698SJohn Baldwin	movdqu	%xmm0,-16(%rax)
4276bc3d5698SJohn Baldwin
4277bc3d5698SJohn Baldwin	decl	%r10d
4278bc3d5698SJohn Baldwin	jnz	.Loop_key192
4279bc3d5698SJohn Baldwin
4280bc3d5698SJohn Baldwin	movl	%esi,32(%rax)
4281bc3d5698SJohn Baldwin	xorl	%eax,%eax
4282bc3d5698SJohn Baldwin	jmp	.Lenc_key_ret
4283bc3d5698SJohn Baldwin
4284bc3d5698SJohn Baldwin.align	16
4285bc3d5698SJohn Baldwin.L14rounds:
4286bc3d5698SJohn Baldwin	movups	16(%rdi),%xmm2
4287bc3d5698SJohn Baldwin	movl	$13,%esi
4288bc3d5698SJohn Baldwin	leaq	16(%rax),%rax
4289bc3d5698SJohn Baldwin	cmpl	$268435456,%r10d
4290bc3d5698SJohn Baldwin	je	.L14rounds_alt
4291bc3d5698SJohn Baldwin
4292bc3d5698SJohn Baldwin	movups	%xmm0,(%rdx)
4293bc3d5698SJohn Baldwin	movups	%xmm2,16(%rdx)
4294bc3d5698SJohn Baldwin.byte	102,15,58,223,202,1
4295bc3d5698SJohn Baldwin	call	.Lkey_expansion_256a_cold
4296bc3d5698SJohn Baldwin.byte	102,15,58,223,200,1
4297bc3d5698SJohn Baldwin	call	.Lkey_expansion_256b
4298bc3d5698SJohn Baldwin.byte	102,15,58,223,202,2
4299bc3d5698SJohn Baldwin	call	.Lkey_expansion_256a
4300bc3d5698SJohn Baldwin.byte	102,15,58,223,200,2
4301bc3d5698SJohn Baldwin	call	.Lkey_expansion_256b
4302bc3d5698SJohn Baldwin.byte	102,15,58,223,202,4
4303bc3d5698SJohn Baldwin	call	.Lkey_expansion_256a
4304bc3d5698SJohn Baldwin.byte	102,15,58,223,200,4
4305bc3d5698SJohn Baldwin	call	.Lkey_expansion_256b
4306bc3d5698SJohn Baldwin.byte	102,15,58,223,202,8
4307bc3d5698SJohn Baldwin	call	.Lkey_expansion_256a
4308bc3d5698SJohn Baldwin.byte	102,15,58,223,200,8
4309bc3d5698SJohn Baldwin	call	.Lkey_expansion_256b
4310bc3d5698SJohn Baldwin.byte	102,15,58,223,202,16
4311bc3d5698SJohn Baldwin	call	.Lkey_expansion_256a
4312bc3d5698SJohn Baldwin.byte	102,15,58,223,200,16
4313bc3d5698SJohn Baldwin	call	.Lkey_expansion_256b
4314bc3d5698SJohn Baldwin.byte	102,15,58,223,202,32
4315bc3d5698SJohn Baldwin	call	.Lkey_expansion_256a
4316bc3d5698SJohn Baldwin.byte	102,15,58,223,200,32
4317bc3d5698SJohn Baldwin	call	.Lkey_expansion_256b
4318bc3d5698SJohn Baldwin.byte	102,15,58,223,202,64
4319bc3d5698SJohn Baldwin	call	.Lkey_expansion_256a
4320bc3d5698SJohn Baldwin	movups	%xmm0,(%rax)
4321bc3d5698SJohn Baldwin	movl	%esi,16(%rax)
4322bc3d5698SJohn Baldwin	xorq	%rax,%rax
4323bc3d5698SJohn Baldwin	jmp	.Lenc_key_ret
4324bc3d5698SJohn Baldwin
4325bc3d5698SJohn Baldwin.align	16
4326bc3d5698SJohn Baldwin.L14rounds_alt:
4327bc3d5698SJohn Baldwin	movdqa	.Lkey_rotate(%rip),%xmm5
4328bc3d5698SJohn Baldwin	movdqa	.Lkey_rcon1(%rip),%xmm4
4329bc3d5698SJohn Baldwin	movl	$7,%r10d
4330bc3d5698SJohn Baldwin	movdqu	%xmm0,0(%rdx)
4331bc3d5698SJohn Baldwin	movdqa	%xmm2,%xmm1
4332bc3d5698SJohn Baldwin	movdqu	%xmm2,16(%rdx)
4333bc3d5698SJohn Baldwin	jmp	.Loop_key256
4334bc3d5698SJohn Baldwin
4335bc3d5698SJohn Baldwin.align	16
4336bc3d5698SJohn Baldwin.Loop_key256:
4337bc3d5698SJohn Baldwin.byte	102,15,56,0,213
4338bc3d5698SJohn Baldwin.byte	102,15,56,221,212
4339bc3d5698SJohn Baldwin
4340bc3d5698SJohn Baldwin	movdqa	%xmm0,%xmm3
4341bc3d5698SJohn Baldwin	pslldq	$4,%xmm0
4342bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm3
4343bc3d5698SJohn Baldwin	pslldq	$4,%xmm0
4344bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm3
4345bc3d5698SJohn Baldwin	pslldq	$4,%xmm0
4346bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm0
4347bc3d5698SJohn Baldwin	pslld	$1,%xmm4
4348bc3d5698SJohn Baldwin
4349bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm0
4350bc3d5698SJohn Baldwin	movdqu	%xmm0,(%rax)
4351bc3d5698SJohn Baldwin
4352bc3d5698SJohn Baldwin	decl	%r10d
4353bc3d5698SJohn Baldwin	jz	.Ldone_key256
4354bc3d5698SJohn Baldwin
4355bc3d5698SJohn Baldwin	pshufd	$0xff,%xmm0,%xmm2
4356bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm3
4357bc3d5698SJohn Baldwin.byte	102,15,56,221,211
4358bc3d5698SJohn Baldwin
4359bc3d5698SJohn Baldwin	movdqa	%xmm1,%xmm3
4360bc3d5698SJohn Baldwin	pslldq	$4,%xmm1
4361bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm3
4362bc3d5698SJohn Baldwin	pslldq	$4,%xmm1
4363bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm3
4364bc3d5698SJohn Baldwin	pslldq	$4,%xmm1
4365bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm1
4366bc3d5698SJohn Baldwin
4367bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm2
4368bc3d5698SJohn Baldwin	movdqu	%xmm2,16(%rax)
4369bc3d5698SJohn Baldwin	leaq	32(%rax),%rax
4370bc3d5698SJohn Baldwin	movdqa	%xmm2,%xmm1
4371bc3d5698SJohn Baldwin
4372bc3d5698SJohn Baldwin	jmp	.Loop_key256
4373bc3d5698SJohn Baldwin
4374bc3d5698SJohn Baldwin.Ldone_key256:
4375bc3d5698SJohn Baldwin	movl	%esi,16(%rax)
4376bc3d5698SJohn Baldwin	xorl	%eax,%eax
4377bc3d5698SJohn Baldwin	jmp	.Lenc_key_ret
4378bc3d5698SJohn Baldwin
4379bc3d5698SJohn Baldwin.align	16
4380bc3d5698SJohn Baldwin.Lbad_keybits:
4381bc3d5698SJohn Baldwin	movq	$-2,%rax
4382bc3d5698SJohn Baldwin.Lenc_key_ret:
4383bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
4384bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm1
4385bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm2
4386bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm3
4387bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm4
4388bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm5
4389bc3d5698SJohn Baldwin	addq	$8,%rsp
4390bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	-8
4391bc3d5698SJohn Baldwin	.byte	0xf3,0xc3
4392bc3d5698SJohn Baldwin.LSEH_end_set_encrypt_key:
4393bc3d5698SJohn Baldwin
4394bc3d5698SJohn Baldwin.align	16
4395bc3d5698SJohn Baldwin.Lkey_expansion_128:
4396bc3d5698SJohn Baldwin	movups	%xmm0,(%rax)
4397bc3d5698SJohn Baldwin	leaq	16(%rax),%rax
4398bc3d5698SJohn Baldwin.Lkey_expansion_128_cold:
4399bc3d5698SJohn Baldwin	shufps	$16,%xmm0,%xmm4
4400bc3d5698SJohn Baldwin	xorps	%xmm4,%xmm0
4401bc3d5698SJohn Baldwin	shufps	$140,%xmm0,%xmm4
4402bc3d5698SJohn Baldwin	xorps	%xmm4,%xmm0
4403bc3d5698SJohn Baldwin	shufps	$255,%xmm1,%xmm1
4404bc3d5698SJohn Baldwin	xorps	%xmm1,%xmm0
4405bc3d5698SJohn Baldwin	.byte	0xf3,0xc3
4406bc3d5698SJohn Baldwin
4407bc3d5698SJohn Baldwin.align	16
4408bc3d5698SJohn Baldwin.Lkey_expansion_192a:
4409bc3d5698SJohn Baldwin	movups	%xmm0,(%rax)
4410bc3d5698SJohn Baldwin	leaq	16(%rax),%rax
4411bc3d5698SJohn Baldwin.Lkey_expansion_192a_cold:
4412bc3d5698SJohn Baldwin	movaps	%xmm2,%xmm5
4413bc3d5698SJohn Baldwin.Lkey_expansion_192b_warm:
4414bc3d5698SJohn Baldwin	shufps	$16,%xmm0,%xmm4
4415bc3d5698SJohn Baldwin	movdqa	%xmm2,%xmm3
4416bc3d5698SJohn Baldwin	xorps	%xmm4,%xmm0
4417bc3d5698SJohn Baldwin	shufps	$140,%xmm0,%xmm4
4418bc3d5698SJohn Baldwin	pslldq	$4,%xmm3
4419bc3d5698SJohn Baldwin	xorps	%xmm4,%xmm0
4420bc3d5698SJohn Baldwin	pshufd	$85,%xmm1,%xmm1
4421bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm2
4422bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm0
4423bc3d5698SJohn Baldwin	pshufd	$255,%xmm0,%xmm3
4424bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm2
4425bc3d5698SJohn Baldwin	.byte	0xf3,0xc3
4426bc3d5698SJohn Baldwin
4427bc3d5698SJohn Baldwin.align	16
4428bc3d5698SJohn Baldwin.Lkey_expansion_192b:
4429bc3d5698SJohn Baldwin	movaps	%xmm0,%xmm3
4430bc3d5698SJohn Baldwin	shufps	$68,%xmm0,%xmm5
4431bc3d5698SJohn Baldwin	movups	%xmm5,(%rax)
4432bc3d5698SJohn Baldwin	shufps	$78,%xmm2,%xmm3
4433bc3d5698SJohn Baldwin	movups	%xmm3,16(%rax)
4434bc3d5698SJohn Baldwin	leaq	32(%rax),%rax
4435bc3d5698SJohn Baldwin	jmp	.Lkey_expansion_192b_warm
4436bc3d5698SJohn Baldwin
4437bc3d5698SJohn Baldwin.align	16
4438bc3d5698SJohn Baldwin.Lkey_expansion_256a:
4439bc3d5698SJohn Baldwin	movups	%xmm2,(%rax)
4440bc3d5698SJohn Baldwin	leaq	16(%rax),%rax
4441bc3d5698SJohn Baldwin.Lkey_expansion_256a_cold:
4442bc3d5698SJohn Baldwin	shufps	$16,%xmm0,%xmm4
4443bc3d5698SJohn Baldwin	xorps	%xmm4,%xmm0
4444bc3d5698SJohn Baldwin	shufps	$140,%xmm0,%xmm4
4445bc3d5698SJohn Baldwin	xorps	%xmm4,%xmm0
4446bc3d5698SJohn Baldwin	shufps	$255,%xmm1,%xmm1
4447bc3d5698SJohn Baldwin	xorps	%xmm1,%xmm0
4448bc3d5698SJohn Baldwin	.byte	0xf3,0xc3
4449bc3d5698SJohn Baldwin
4450bc3d5698SJohn Baldwin.align	16
4451bc3d5698SJohn Baldwin.Lkey_expansion_256b:
4452bc3d5698SJohn Baldwin	movups	%xmm0,(%rax)
4453bc3d5698SJohn Baldwin	leaq	16(%rax),%rax
4454bc3d5698SJohn Baldwin
4455bc3d5698SJohn Baldwin	shufps	$16,%xmm2,%xmm4
4456bc3d5698SJohn Baldwin	xorps	%xmm4,%xmm2
4457bc3d5698SJohn Baldwin	shufps	$140,%xmm2,%xmm4
4458bc3d5698SJohn Baldwin	xorps	%xmm4,%xmm2
4459bc3d5698SJohn Baldwin	shufps	$170,%xmm1,%xmm1
4460bc3d5698SJohn Baldwin	xorps	%xmm1,%xmm2
4461bc3d5698SJohn Baldwin	.byte	0xf3,0xc3
4462bc3d5698SJohn Baldwin.cfi_endproc
4463bc3d5698SJohn Baldwin.size	aesni_set_encrypt_key,.-aesni_set_encrypt_key
4464bc3d5698SJohn Baldwin.size	__aesni_set_encrypt_key,.-__aesni_set_encrypt_key
4465bc3d5698SJohn Baldwin.align	64
4466bc3d5698SJohn Baldwin.Lbswap_mask:
4467bc3d5698SJohn Baldwin.byte	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
4468bc3d5698SJohn Baldwin.Lincrement32:
4469bc3d5698SJohn Baldwin.long	6,6,6,0
4470bc3d5698SJohn Baldwin.Lincrement64:
4471bc3d5698SJohn Baldwin.long	1,0,0,0
4472bc3d5698SJohn Baldwin.Lxts_magic:
4473bc3d5698SJohn Baldwin.long	0x87,0,1,0
4474bc3d5698SJohn Baldwin.Lincrement1:
4475bc3d5698SJohn Baldwin.byte	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
4476bc3d5698SJohn Baldwin.Lkey_rotate:
4477bc3d5698SJohn Baldwin.long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d
4478bc3d5698SJohn Baldwin.Lkey_rotate192:
4479bc3d5698SJohn Baldwin.long	0x04070605,0x04070605,0x04070605,0x04070605
4480bc3d5698SJohn Baldwin.Lkey_rcon1:
4481bc3d5698SJohn Baldwin.long	1,1,1,1
4482bc3d5698SJohn Baldwin.Lkey_rcon1b:
4483bc3d5698SJohn Baldwin.long	0x1b,0x1b,0x1b,0x1b
4484bc3d5698SJohn Baldwin
4485bc3d5698SJohn Baldwin.byte	65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
4486bc3d5698SJohn Baldwin.align	64
4487*c0855eaaSJohn Baldwin	.section ".note.gnu.property", "a"
4488*c0855eaaSJohn Baldwin	.p2align 3
4489*c0855eaaSJohn Baldwin	.long 1f - 0f
4490*c0855eaaSJohn Baldwin	.long 4f - 1f
4491*c0855eaaSJohn Baldwin	.long 5
4492*c0855eaaSJohn Baldwin0:
4493*c0855eaaSJohn Baldwin	# "GNU" encoded with .byte, since .asciz isn't supported
4494*c0855eaaSJohn Baldwin	# on Solaris.
4495*c0855eaaSJohn Baldwin	.byte 0x47
4496*c0855eaaSJohn Baldwin	.byte 0x4e
4497*c0855eaaSJohn Baldwin	.byte 0x55
4498*c0855eaaSJohn Baldwin	.byte 0
4499*c0855eaaSJohn Baldwin1:
4500*c0855eaaSJohn Baldwin	.p2align 3
4501*c0855eaaSJohn Baldwin	.long 0xc0000002
4502*c0855eaaSJohn Baldwin	.long 3f - 2f
4503*c0855eaaSJohn Baldwin2:
4504*c0855eaaSJohn Baldwin	.long 3
4505*c0855eaaSJohn Baldwin3:
4506*c0855eaaSJohn Baldwin	.p2align 3
4507*c0855eaaSJohn Baldwin4:
4508