xref: /freebsd/sys/crypto/openssl/amd64/aesni-x86_64.S (revision e6bfd18d21b225af6a0ed67ceeaf1293b7b9eba5)
1/* $FreeBSD$ */
2/* Do not modify. This file is auto-generated from aesni-x86_64.pl. */
3.text
4
5.globl	aesni_encrypt
6.type	aesni_encrypt,@function
7.align	16
8aesni_encrypt:
9.cfi_startproc
10	movups	(%rdi),%xmm2
11	movl	240(%rdx),%eax
12	movups	(%rdx),%xmm0
13	movups	16(%rdx),%xmm1
14	leaq	32(%rdx),%rdx
15	xorps	%xmm0,%xmm2
16.Loop_enc1_1:
17.byte	102,15,56,220,209
18	decl	%eax
19	movups	(%rdx),%xmm1
20	leaq	16(%rdx),%rdx
21	jnz	.Loop_enc1_1
22.byte	102,15,56,221,209
23	pxor	%xmm0,%xmm0
24	pxor	%xmm1,%xmm1
25	movups	%xmm2,(%rsi)
26	pxor	%xmm2,%xmm2
27	.byte	0xf3,0xc3
28.cfi_endproc
29.size	aesni_encrypt,.-aesni_encrypt
30
31.globl	aesni_decrypt
32.type	aesni_decrypt,@function
33.align	16
34aesni_decrypt:
35.cfi_startproc
36	movups	(%rdi),%xmm2
37	movl	240(%rdx),%eax
38	movups	(%rdx),%xmm0
39	movups	16(%rdx),%xmm1
40	leaq	32(%rdx),%rdx
41	xorps	%xmm0,%xmm2
42.Loop_dec1_2:
43.byte	102,15,56,222,209
44	decl	%eax
45	movups	(%rdx),%xmm1
46	leaq	16(%rdx),%rdx
47	jnz	.Loop_dec1_2
48.byte	102,15,56,223,209
49	pxor	%xmm0,%xmm0
50	pxor	%xmm1,%xmm1
51	movups	%xmm2,(%rsi)
52	pxor	%xmm2,%xmm2
53	.byte	0xf3,0xc3
54.cfi_endproc
55.size	aesni_decrypt, .-aesni_decrypt
56.type	_aesni_encrypt2,@function
57.align	16
58_aesni_encrypt2:
59.cfi_startproc
60	movups	(%rcx),%xmm0
61	shll	$4,%eax
62	movups	16(%rcx),%xmm1
63	xorps	%xmm0,%xmm2
64	xorps	%xmm0,%xmm3
65	movups	32(%rcx),%xmm0
66	leaq	32(%rcx,%rax,1),%rcx
67	negq	%rax
68	addq	$16,%rax
69
70.Lenc_loop2:
71.byte	102,15,56,220,209
72.byte	102,15,56,220,217
73	movups	(%rcx,%rax,1),%xmm1
74	addq	$32,%rax
75.byte	102,15,56,220,208
76.byte	102,15,56,220,216
77	movups	-16(%rcx,%rax,1),%xmm0
78	jnz	.Lenc_loop2
79
80.byte	102,15,56,220,209
81.byte	102,15,56,220,217
82.byte	102,15,56,221,208
83.byte	102,15,56,221,216
84	.byte	0xf3,0xc3
85.cfi_endproc
86.size	_aesni_encrypt2,.-_aesni_encrypt2
87.type	_aesni_decrypt2,@function
88.align	16
89_aesni_decrypt2:
90.cfi_startproc
91	movups	(%rcx),%xmm0
92	shll	$4,%eax
93	movups	16(%rcx),%xmm1
94	xorps	%xmm0,%xmm2
95	xorps	%xmm0,%xmm3
96	movups	32(%rcx),%xmm0
97	leaq	32(%rcx,%rax,1),%rcx
98	negq	%rax
99	addq	$16,%rax
100
101.Ldec_loop2:
102.byte	102,15,56,222,209
103.byte	102,15,56,222,217
104	movups	(%rcx,%rax,1),%xmm1
105	addq	$32,%rax
106.byte	102,15,56,222,208
107.byte	102,15,56,222,216
108	movups	-16(%rcx,%rax,1),%xmm0
109	jnz	.Ldec_loop2
110
111.byte	102,15,56,222,209
112.byte	102,15,56,222,217
113.byte	102,15,56,223,208
114.byte	102,15,56,223,216
115	.byte	0xf3,0xc3
116.cfi_endproc
117.size	_aesni_decrypt2,.-_aesni_decrypt2
118.type	_aesni_encrypt3,@function
119.align	16
120_aesni_encrypt3:
121.cfi_startproc
122	movups	(%rcx),%xmm0
123	shll	$4,%eax
124	movups	16(%rcx),%xmm1
125	xorps	%xmm0,%xmm2
126	xorps	%xmm0,%xmm3
127	xorps	%xmm0,%xmm4
128	movups	32(%rcx),%xmm0
129	leaq	32(%rcx,%rax,1),%rcx
130	negq	%rax
131	addq	$16,%rax
132
133.Lenc_loop3:
134.byte	102,15,56,220,209
135.byte	102,15,56,220,217
136.byte	102,15,56,220,225
137	movups	(%rcx,%rax,1),%xmm1
138	addq	$32,%rax
139.byte	102,15,56,220,208
140.byte	102,15,56,220,216
141.byte	102,15,56,220,224
142	movups	-16(%rcx,%rax,1),%xmm0
143	jnz	.Lenc_loop3
144
145.byte	102,15,56,220,209
146.byte	102,15,56,220,217
147.byte	102,15,56,220,225
148.byte	102,15,56,221,208
149.byte	102,15,56,221,216
150.byte	102,15,56,221,224
151	.byte	0xf3,0xc3
152.cfi_endproc
153.size	_aesni_encrypt3,.-_aesni_encrypt3
154.type	_aesni_decrypt3,@function
155.align	16
156_aesni_decrypt3:
157.cfi_startproc
158	movups	(%rcx),%xmm0
159	shll	$4,%eax
160	movups	16(%rcx),%xmm1
161	xorps	%xmm0,%xmm2
162	xorps	%xmm0,%xmm3
163	xorps	%xmm0,%xmm4
164	movups	32(%rcx),%xmm0
165	leaq	32(%rcx,%rax,1),%rcx
166	negq	%rax
167	addq	$16,%rax
168
169.Ldec_loop3:
170.byte	102,15,56,222,209
171.byte	102,15,56,222,217
172.byte	102,15,56,222,225
173	movups	(%rcx,%rax,1),%xmm1
174	addq	$32,%rax
175.byte	102,15,56,222,208
176.byte	102,15,56,222,216
177.byte	102,15,56,222,224
178	movups	-16(%rcx,%rax,1),%xmm0
179	jnz	.Ldec_loop3
180
181.byte	102,15,56,222,209
182.byte	102,15,56,222,217
183.byte	102,15,56,222,225
184.byte	102,15,56,223,208
185.byte	102,15,56,223,216
186.byte	102,15,56,223,224
187	.byte	0xf3,0xc3
188.cfi_endproc
189.size	_aesni_decrypt3,.-_aesni_decrypt3
190.type	_aesni_encrypt4,@function
191.align	16
192_aesni_encrypt4:
193.cfi_startproc
194	movups	(%rcx),%xmm0
195	shll	$4,%eax
196	movups	16(%rcx),%xmm1
197	xorps	%xmm0,%xmm2
198	xorps	%xmm0,%xmm3
199	xorps	%xmm0,%xmm4
200	xorps	%xmm0,%xmm5
201	movups	32(%rcx),%xmm0
202	leaq	32(%rcx,%rax,1),%rcx
203	negq	%rax
204.byte	0x0f,0x1f,0x00
205	addq	$16,%rax
206
207.Lenc_loop4:
208.byte	102,15,56,220,209
209.byte	102,15,56,220,217
210.byte	102,15,56,220,225
211.byte	102,15,56,220,233
212	movups	(%rcx,%rax,1),%xmm1
213	addq	$32,%rax
214.byte	102,15,56,220,208
215.byte	102,15,56,220,216
216.byte	102,15,56,220,224
217.byte	102,15,56,220,232
218	movups	-16(%rcx,%rax,1),%xmm0
219	jnz	.Lenc_loop4
220
221.byte	102,15,56,220,209
222.byte	102,15,56,220,217
223.byte	102,15,56,220,225
224.byte	102,15,56,220,233
225.byte	102,15,56,221,208
226.byte	102,15,56,221,216
227.byte	102,15,56,221,224
228.byte	102,15,56,221,232
229	.byte	0xf3,0xc3
230.cfi_endproc
231.size	_aesni_encrypt4,.-_aesni_encrypt4
232.type	_aesni_decrypt4,@function
233.align	16
234_aesni_decrypt4:
235.cfi_startproc
236	movups	(%rcx),%xmm0
237	shll	$4,%eax
238	movups	16(%rcx),%xmm1
239	xorps	%xmm0,%xmm2
240	xorps	%xmm0,%xmm3
241	xorps	%xmm0,%xmm4
242	xorps	%xmm0,%xmm5
243	movups	32(%rcx),%xmm0
244	leaq	32(%rcx,%rax,1),%rcx
245	negq	%rax
246.byte	0x0f,0x1f,0x00
247	addq	$16,%rax
248
249.Ldec_loop4:
250.byte	102,15,56,222,209
251.byte	102,15,56,222,217
252.byte	102,15,56,222,225
253.byte	102,15,56,222,233
254	movups	(%rcx,%rax,1),%xmm1
255	addq	$32,%rax
256.byte	102,15,56,222,208
257.byte	102,15,56,222,216
258.byte	102,15,56,222,224
259.byte	102,15,56,222,232
260	movups	-16(%rcx,%rax,1),%xmm0
261	jnz	.Ldec_loop4
262
263.byte	102,15,56,222,209
264.byte	102,15,56,222,217
265.byte	102,15,56,222,225
266.byte	102,15,56,222,233
267.byte	102,15,56,223,208
268.byte	102,15,56,223,216
269.byte	102,15,56,223,224
270.byte	102,15,56,223,232
271	.byte	0xf3,0xc3
272.cfi_endproc
273.size	_aesni_decrypt4,.-_aesni_decrypt4
274.type	_aesni_encrypt6,@function
275.align	16
276_aesni_encrypt6:
277.cfi_startproc
278	movups	(%rcx),%xmm0
279	shll	$4,%eax
280	movups	16(%rcx),%xmm1
281	xorps	%xmm0,%xmm2
282	pxor	%xmm0,%xmm3
283	pxor	%xmm0,%xmm4
284.byte	102,15,56,220,209
285	leaq	32(%rcx,%rax,1),%rcx
286	negq	%rax
287.byte	102,15,56,220,217
288	pxor	%xmm0,%xmm5
289	pxor	%xmm0,%xmm6
290.byte	102,15,56,220,225
291	pxor	%xmm0,%xmm7
292	movups	(%rcx,%rax,1),%xmm0
293	addq	$16,%rax
294	jmp	.Lenc_loop6_enter
295.align	16
296.Lenc_loop6:
297.byte	102,15,56,220,209
298.byte	102,15,56,220,217
299.byte	102,15,56,220,225
300.Lenc_loop6_enter:
301.byte	102,15,56,220,233
302.byte	102,15,56,220,241
303.byte	102,15,56,220,249
304	movups	(%rcx,%rax,1),%xmm1
305	addq	$32,%rax
306.byte	102,15,56,220,208
307.byte	102,15,56,220,216
308.byte	102,15,56,220,224
309.byte	102,15,56,220,232
310.byte	102,15,56,220,240
311.byte	102,15,56,220,248
312	movups	-16(%rcx,%rax,1),%xmm0
313	jnz	.Lenc_loop6
314
315.byte	102,15,56,220,209
316.byte	102,15,56,220,217
317.byte	102,15,56,220,225
318.byte	102,15,56,220,233
319.byte	102,15,56,220,241
320.byte	102,15,56,220,249
321.byte	102,15,56,221,208
322.byte	102,15,56,221,216
323.byte	102,15,56,221,224
324.byte	102,15,56,221,232
325.byte	102,15,56,221,240
326.byte	102,15,56,221,248
327	.byte	0xf3,0xc3
328.cfi_endproc
329.size	_aesni_encrypt6,.-_aesni_encrypt6
330.type	_aesni_decrypt6,@function
331.align	16
332_aesni_decrypt6:
333.cfi_startproc
334	movups	(%rcx),%xmm0
335	shll	$4,%eax
336	movups	16(%rcx),%xmm1
337	xorps	%xmm0,%xmm2
338	pxor	%xmm0,%xmm3
339	pxor	%xmm0,%xmm4
340.byte	102,15,56,222,209
341	leaq	32(%rcx,%rax,1),%rcx
342	negq	%rax
343.byte	102,15,56,222,217
344	pxor	%xmm0,%xmm5
345	pxor	%xmm0,%xmm6
346.byte	102,15,56,222,225
347	pxor	%xmm0,%xmm7
348	movups	(%rcx,%rax,1),%xmm0
349	addq	$16,%rax
350	jmp	.Ldec_loop6_enter
351.align	16
352.Ldec_loop6:
353.byte	102,15,56,222,209
354.byte	102,15,56,222,217
355.byte	102,15,56,222,225
356.Ldec_loop6_enter:
357.byte	102,15,56,222,233
358.byte	102,15,56,222,241
359.byte	102,15,56,222,249
360	movups	(%rcx,%rax,1),%xmm1
361	addq	$32,%rax
362.byte	102,15,56,222,208
363.byte	102,15,56,222,216
364.byte	102,15,56,222,224
365.byte	102,15,56,222,232
366.byte	102,15,56,222,240
367.byte	102,15,56,222,248
368	movups	-16(%rcx,%rax,1),%xmm0
369	jnz	.Ldec_loop6
370
371.byte	102,15,56,222,209
372.byte	102,15,56,222,217
373.byte	102,15,56,222,225
374.byte	102,15,56,222,233
375.byte	102,15,56,222,241
376.byte	102,15,56,222,249
377.byte	102,15,56,223,208
378.byte	102,15,56,223,216
379.byte	102,15,56,223,224
380.byte	102,15,56,223,232
381.byte	102,15,56,223,240
382.byte	102,15,56,223,248
383	.byte	0xf3,0xc3
384.cfi_endproc
385.size	_aesni_decrypt6,.-_aesni_decrypt6
386.type	_aesni_encrypt8,@function
387.align	16
388_aesni_encrypt8:
389.cfi_startproc
390	movups	(%rcx),%xmm0
391	shll	$4,%eax
392	movups	16(%rcx),%xmm1
393	xorps	%xmm0,%xmm2
394	xorps	%xmm0,%xmm3
395	pxor	%xmm0,%xmm4
396	pxor	%xmm0,%xmm5
397	pxor	%xmm0,%xmm6
398	leaq	32(%rcx,%rax,1),%rcx
399	negq	%rax
400.byte	102,15,56,220,209
401	pxor	%xmm0,%xmm7
402	pxor	%xmm0,%xmm8
403.byte	102,15,56,220,217
404	pxor	%xmm0,%xmm9
405	movups	(%rcx,%rax,1),%xmm0
406	addq	$16,%rax
407	jmp	.Lenc_loop8_inner
408.align	16
409.Lenc_loop8:
410.byte	102,15,56,220,209
411.byte	102,15,56,220,217
412.Lenc_loop8_inner:
413.byte	102,15,56,220,225
414.byte	102,15,56,220,233
415.byte	102,15,56,220,241
416.byte	102,15,56,220,249
417.byte	102,68,15,56,220,193
418.byte	102,68,15,56,220,201
419.Lenc_loop8_enter:
420	movups	(%rcx,%rax,1),%xmm1
421	addq	$32,%rax
422.byte	102,15,56,220,208
423.byte	102,15,56,220,216
424.byte	102,15,56,220,224
425.byte	102,15,56,220,232
426.byte	102,15,56,220,240
427.byte	102,15,56,220,248
428.byte	102,68,15,56,220,192
429.byte	102,68,15,56,220,200
430	movups	-16(%rcx,%rax,1),%xmm0
431	jnz	.Lenc_loop8
432
433.byte	102,15,56,220,209
434.byte	102,15,56,220,217
435.byte	102,15,56,220,225
436.byte	102,15,56,220,233
437.byte	102,15,56,220,241
438.byte	102,15,56,220,249
439.byte	102,68,15,56,220,193
440.byte	102,68,15,56,220,201
441.byte	102,15,56,221,208
442.byte	102,15,56,221,216
443.byte	102,15,56,221,224
444.byte	102,15,56,221,232
445.byte	102,15,56,221,240
446.byte	102,15,56,221,248
447.byte	102,68,15,56,221,192
448.byte	102,68,15,56,221,200
449	.byte	0xf3,0xc3
450.cfi_endproc
451.size	_aesni_encrypt8,.-_aesni_encrypt8
452.type	_aesni_decrypt8,@function
453.align	16
454_aesni_decrypt8:
455.cfi_startproc
456	movups	(%rcx),%xmm0
457	shll	$4,%eax
458	movups	16(%rcx),%xmm1
459	xorps	%xmm0,%xmm2
460	xorps	%xmm0,%xmm3
461	pxor	%xmm0,%xmm4
462	pxor	%xmm0,%xmm5
463	pxor	%xmm0,%xmm6
464	leaq	32(%rcx,%rax,1),%rcx
465	negq	%rax
466.byte	102,15,56,222,209
467	pxor	%xmm0,%xmm7
468	pxor	%xmm0,%xmm8
469.byte	102,15,56,222,217
470	pxor	%xmm0,%xmm9
471	movups	(%rcx,%rax,1),%xmm0
472	addq	$16,%rax
473	jmp	.Ldec_loop8_inner
474.align	16
475.Ldec_loop8:
476.byte	102,15,56,222,209
477.byte	102,15,56,222,217
478.Ldec_loop8_inner:
479.byte	102,15,56,222,225
480.byte	102,15,56,222,233
481.byte	102,15,56,222,241
482.byte	102,15,56,222,249
483.byte	102,68,15,56,222,193
484.byte	102,68,15,56,222,201
485.Ldec_loop8_enter:
486	movups	(%rcx,%rax,1),%xmm1
487	addq	$32,%rax
488.byte	102,15,56,222,208
489.byte	102,15,56,222,216
490.byte	102,15,56,222,224
491.byte	102,15,56,222,232
492.byte	102,15,56,222,240
493.byte	102,15,56,222,248
494.byte	102,68,15,56,222,192
495.byte	102,68,15,56,222,200
496	movups	-16(%rcx,%rax,1),%xmm0
497	jnz	.Ldec_loop8
498
499.byte	102,15,56,222,209
500.byte	102,15,56,222,217
501.byte	102,15,56,222,225
502.byte	102,15,56,222,233
503.byte	102,15,56,222,241
504.byte	102,15,56,222,249
505.byte	102,68,15,56,222,193
506.byte	102,68,15,56,222,201
507.byte	102,15,56,223,208
508.byte	102,15,56,223,216
509.byte	102,15,56,223,224
510.byte	102,15,56,223,232
511.byte	102,15,56,223,240
512.byte	102,15,56,223,248
513.byte	102,68,15,56,223,192
514.byte	102,68,15,56,223,200
515	.byte	0xf3,0xc3
516.cfi_endproc
517.size	_aesni_decrypt8,.-_aesni_decrypt8
518.globl	aesni_ecb_encrypt
519.type	aesni_ecb_encrypt,@function
520.align	16
521aesni_ecb_encrypt:
522.cfi_startproc
523	andq	$-16,%rdx
524	jz	.Lecb_ret
525
526	movl	240(%rcx),%eax
527	movups	(%rcx),%xmm0
528	movq	%rcx,%r11
529	movl	%eax,%r10d
530	testl	%r8d,%r8d
531	jz	.Lecb_decrypt
532
533	cmpq	$0x80,%rdx
534	jb	.Lecb_enc_tail
535
536	movdqu	(%rdi),%xmm2
537	movdqu	16(%rdi),%xmm3
538	movdqu	32(%rdi),%xmm4
539	movdqu	48(%rdi),%xmm5
540	movdqu	64(%rdi),%xmm6
541	movdqu	80(%rdi),%xmm7
542	movdqu	96(%rdi),%xmm8
543	movdqu	112(%rdi),%xmm9
544	leaq	128(%rdi),%rdi
545	subq	$0x80,%rdx
546	jmp	.Lecb_enc_loop8_enter
547.align	16
548.Lecb_enc_loop8:
549	movups	%xmm2,(%rsi)
550	movq	%r11,%rcx
551	movdqu	(%rdi),%xmm2
552	movl	%r10d,%eax
553	movups	%xmm3,16(%rsi)
554	movdqu	16(%rdi),%xmm3
555	movups	%xmm4,32(%rsi)
556	movdqu	32(%rdi),%xmm4
557	movups	%xmm5,48(%rsi)
558	movdqu	48(%rdi),%xmm5
559	movups	%xmm6,64(%rsi)
560	movdqu	64(%rdi),%xmm6
561	movups	%xmm7,80(%rsi)
562	movdqu	80(%rdi),%xmm7
563	movups	%xmm8,96(%rsi)
564	movdqu	96(%rdi),%xmm8
565	movups	%xmm9,112(%rsi)
566	leaq	128(%rsi),%rsi
567	movdqu	112(%rdi),%xmm9
568	leaq	128(%rdi),%rdi
569.Lecb_enc_loop8_enter:
570
571	call	_aesni_encrypt8
572
573	subq	$0x80,%rdx
574	jnc	.Lecb_enc_loop8
575
576	movups	%xmm2,(%rsi)
577	movq	%r11,%rcx
578	movups	%xmm3,16(%rsi)
579	movl	%r10d,%eax
580	movups	%xmm4,32(%rsi)
581	movups	%xmm5,48(%rsi)
582	movups	%xmm6,64(%rsi)
583	movups	%xmm7,80(%rsi)
584	movups	%xmm8,96(%rsi)
585	movups	%xmm9,112(%rsi)
586	leaq	128(%rsi),%rsi
587	addq	$0x80,%rdx
588	jz	.Lecb_ret
589
590.Lecb_enc_tail:
591	movups	(%rdi),%xmm2
592	cmpq	$0x20,%rdx
593	jb	.Lecb_enc_one
594	movups	16(%rdi),%xmm3
595	je	.Lecb_enc_two
596	movups	32(%rdi),%xmm4
597	cmpq	$0x40,%rdx
598	jb	.Lecb_enc_three
599	movups	48(%rdi),%xmm5
600	je	.Lecb_enc_four
601	movups	64(%rdi),%xmm6
602	cmpq	$0x60,%rdx
603	jb	.Lecb_enc_five
604	movups	80(%rdi),%xmm7
605	je	.Lecb_enc_six
606	movdqu	96(%rdi),%xmm8
607	xorps	%xmm9,%xmm9
608	call	_aesni_encrypt8
609	movups	%xmm2,(%rsi)
610	movups	%xmm3,16(%rsi)
611	movups	%xmm4,32(%rsi)
612	movups	%xmm5,48(%rsi)
613	movups	%xmm6,64(%rsi)
614	movups	%xmm7,80(%rsi)
615	movups	%xmm8,96(%rsi)
616	jmp	.Lecb_ret
617.align	16
618.Lecb_enc_one:
619	movups	(%rcx),%xmm0
620	movups	16(%rcx),%xmm1
621	leaq	32(%rcx),%rcx
622	xorps	%xmm0,%xmm2
623.Loop_enc1_3:
624.byte	102,15,56,220,209
625	decl	%eax
626	movups	(%rcx),%xmm1
627	leaq	16(%rcx),%rcx
628	jnz	.Loop_enc1_3
629.byte	102,15,56,221,209
630	movups	%xmm2,(%rsi)
631	jmp	.Lecb_ret
632.align	16
633.Lecb_enc_two:
634	call	_aesni_encrypt2
635	movups	%xmm2,(%rsi)
636	movups	%xmm3,16(%rsi)
637	jmp	.Lecb_ret
638.align	16
639.Lecb_enc_three:
640	call	_aesni_encrypt3
641	movups	%xmm2,(%rsi)
642	movups	%xmm3,16(%rsi)
643	movups	%xmm4,32(%rsi)
644	jmp	.Lecb_ret
645.align	16
646.Lecb_enc_four:
647	call	_aesni_encrypt4
648	movups	%xmm2,(%rsi)
649	movups	%xmm3,16(%rsi)
650	movups	%xmm4,32(%rsi)
651	movups	%xmm5,48(%rsi)
652	jmp	.Lecb_ret
653.align	16
654.Lecb_enc_five:
655	xorps	%xmm7,%xmm7
656	call	_aesni_encrypt6
657	movups	%xmm2,(%rsi)
658	movups	%xmm3,16(%rsi)
659	movups	%xmm4,32(%rsi)
660	movups	%xmm5,48(%rsi)
661	movups	%xmm6,64(%rsi)
662	jmp	.Lecb_ret
663.align	16
664.Lecb_enc_six:
665	call	_aesni_encrypt6
666	movups	%xmm2,(%rsi)
667	movups	%xmm3,16(%rsi)
668	movups	%xmm4,32(%rsi)
669	movups	%xmm5,48(%rsi)
670	movups	%xmm6,64(%rsi)
671	movups	%xmm7,80(%rsi)
672	jmp	.Lecb_ret
673
674.align	16
675.Lecb_decrypt:
676	cmpq	$0x80,%rdx
677	jb	.Lecb_dec_tail
678
679	movdqu	(%rdi),%xmm2
680	movdqu	16(%rdi),%xmm3
681	movdqu	32(%rdi),%xmm4
682	movdqu	48(%rdi),%xmm5
683	movdqu	64(%rdi),%xmm6
684	movdqu	80(%rdi),%xmm7
685	movdqu	96(%rdi),%xmm8
686	movdqu	112(%rdi),%xmm9
687	leaq	128(%rdi),%rdi
688	subq	$0x80,%rdx
689	jmp	.Lecb_dec_loop8_enter
690.align	16
691.Lecb_dec_loop8:
692	movups	%xmm2,(%rsi)
693	movq	%r11,%rcx
694	movdqu	(%rdi),%xmm2
695	movl	%r10d,%eax
696	movups	%xmm3,16(%rsi)
697	movdqu	16(%rdi),%xmm3
698	movups	%xmm4,32(%rsi)
699	movdqu	32(%rdi),%xmm4
700	movups	%xmm5,48(%rsi)
701	movdqu	48(%rdi),%xmm5
702	movups	%xmm6,64(%rsi)
703	movdqu	64(%rdi),%xmm6
704	movups	%xmm7,80(%rsi)
705	movdqu	80(%rdi),%xmm7
706	movups	%xmm8,96(%rsi)
707	movdqu	96(%rdi),%xmm8
708	movups	%xmm9,112(%rsi)
709	leaq	128(%rsi),%rsi
710	movdqu	112(%rdi),%xmm9
711	leaq	128(%rdi),%rdi
712.Lecb_dec_loop8_enter:
713
714	call	_aesni_decrypt8
715
716	movups	(%r11),%xmm0
717	subq	$0x80,%rdx
718	jnc	.Lecb_dec_loop8
719
720	movups	%xmm2,(%rsi)
721	pxor	%xmm2,%xmm2
722	movq	%r11,%rcx
723	movups	%xmm3,16(%rsi)
724	pxor	%xmm3,%xmm3
725	movl	%r10d,%eax
726	movups	%xmm4,32(%rsi)
727	pxor	%xmm4,%xmm4
728	movups	%xmm5,48(%rsi)
729	pxor	%xmm5,%xmm5
730	movups	%xmm6,64(%rsi)
731	pxor	%xmm6,%xmm6
732	movups	%xmm7,80(%rsi)
733	pxor	%xmm7,%xmm7
734	movups	%xmm8,96(%rsi)
735	pxor	%xmm8,%xmm8
736	movups	%xmm9,112(%rsi)
737	pxor	%xmm9,%xmm9
738	leaq	128(%rsi),%rsi
739	addq	$0x80,%rdx
740	jz	.Lecb_ret
741
742.Lecb_dec_tail:
743	movups	(%rdi),%xmm2
744	cmpq	$0x20,%rdx
745	jb	.Lecb_dec_one
746	movups	16(%rdi),%xmm3
747	je	.Lecb_dec_two
748	movups	32(%rdi),%xmm4
749	cmpq	$0x40,%rdx
750	jb	.Lecb_dec_three
751	movups	48(%rdi),%xmm5
752	je	.Lecb_dec_four
753	movups	64(%rdi),%xmm6
754	cmpq	$0x60,%rdx
755	jb	.Lecb_dec_five
756	movups	80(%rdi),%xmm7
757	je	.Lecb_dec_six
758	movups	96(%rdi),%xmm8
759	movups	(%rcx),%xmm0
760	xorps	%xmm9,%xmm9
761	call	_aesni_decrypt8
762	movups	%xmm2,(%rsi)
763	pxor	%xmm2,%xmm2
764	movups	%xmm3,16(%rsi)
765	pxor	%xmm3,%xmm3
766	movups	%xmm4,32(%rsi)
767	pxor	%xmm4,%xmm4
768	movups	%xmm5,48(%rsi)
769	pxor	%xmm5,%xmm5
770	movups	%xmm6,64(%rsi)
771	pxor	%xmm6,%xmm6
772	movups	%xmm7,80(%rsi)
773	pxor	%xmm7,%xmm7
774	movups	%xmm8,96(%rsi)
775	pxor	%xmm8,%xmm8
776	pxor	%xmm9,%xmm9
777	jmp	.Lecb_ret
778.align	16
779.Lecb_dec_one:
780	movups	(%rcx),%xmm0
781	movups	16(%rcx),%xmm1
782	leaq	32(%rcx),%rcx
783	xorps	%xmm0,%xmm2
784.Loop_dec1_4:
785.byte	102,15,56,222,209
786	decl	%eax
787	movups	(%rcx),%xmm1
788	leaq	16(%rcx),%rcx
789	jnz	.Loop_dec1_4
790.byte	102,15,56,223,209
791	movups	%xmm2,(%rsi)
792	pxor	%xmm2,%xmm2
793	jmp	.Lecb_ret
794.align	16
795.Lecb_dec_two:
796	call	_aesni_decrypt2
797	movups	%xmm2,(%rsi)
798	pxor	%xmm2,%xmm2
799	movups	%xmm3,16(%rsi)
800	pxor	%xmm3,%xmm3
801	jmp	.Lecb_ret
802.align	16
803.Lecb_dec_three:
804	call	_aesni_decrypt3
805	movups	%xmm2,(%rsi)
806	pxor	%xmm2,%xmm2
807	movups	%xmm3,16(%rsi)
808	pxor	%xmm3,%xmm3
809	movups	%xmm4,32(%rsi)
810	pxor	%xmm4,%xmm4
811	jmp	.Lecb_ret
812.align	16
813.Lecb_dec_four:
814	call	_aesni_decrypt4
815	movups	%xmm2,(%rsi)
816	pxor	%xmm2,%xmm2
817	movups	%xmm3,16(%rsi)
818	pxor	%xmm3,%xmm3
819	movups	%xmm4,32(%rsi)
820	pxor	%xmm4,%xmm4
821	movups	%xmm5,48(%rsi)
822	pxor	%xmm5,%xmm5
823	jmp	.Lecb_ret
824.align	16
825.Lecb_dec_five:
826	xorps	%xmm7,%xmm7
827	call	_aesni_decrypt6
828	movups	%xmm2,(%rsi)
829	pxor	%xmm2,%xmm2
830	movups	%xmm3,16(%rsi)
831	pxor	%xmm3,%xmm3
832	movups	%xmm4,32(%rsi)
833	pxor	%xmm4,%xmm4
834	movups	%xmm5,48(%rsi)
835	pxor	%xmm5,%xmm5
836	movups	%xmm6,64(%rsi)
837	pxor	%xmm6,%xmm6
838	pxor	%xmm7,%xmm7
839	jmp	.Lecb_ret
840.align	16
841.Lecb_dec_six:
842	call	_aesni_decrypt6
843	movups	%xmm2,(%rsi)
844	pxor	%xmm2,%xmm2
845	movups	%xmm3,16(%rsi)
846	pxor	%xmm3,%xmm3
847	movups	%xmm4,32(%rsi)
848	pxor	%xmm4,%xmm4
849	movups	%xmm5,48(%rsi)
850	pxor	%xmm5,%xmm5
851	movups	%xmm6,64(%rsi)
852	pxor	%xmm6,%xmm6
853	movups	%xmm7,80(%rsi)
854	pxor	%xmm7,%xmm7
855
856.Lecb_ret:
857	xorps	%xmm0,%xmm0
858	pxor	%xmm1,%xmm1
859	.byte	0xf3,0xc3
860.cfi_endproc
861.size	aesni_ecb_encrypt,.-aesni_ecb_encrypt
862.globl	aesni_ccm64_encrypt_blocks
863.type	aesni_ccm64_encrypt_blocks,@function
864.align	16
865aesni_ccm64_encrypt_blocks:
866.cfi_startproc
867	movl	240(%rcx),%eax
868	movdqu	(%r8),%xmm6
869	movdqa	.Lincrement64(%rip),%xmm9
870	movdqa	.Lbswap_mask(%rip),%xmm7
871
872	shll	$4,%eax
873	movl	$16,%r10d
874	leaq	0(%rcx),%r11
875	movdqu	(%r9),%xmm3
876	movdqa	%xmm6,%xmm2
877	leaq	32(%rcx,%rax,1),%rcx
878.byte	102,15,56,0,247
879	subq	%rax,%r10
880	jmp	.Lccm64_enc_outer
881.align	16
882.Lccm64_enc_outer:
883	movups	(%r11),%xmm0
884	movq	%r10,%rax
885	movups	(%rdi),%xmm8
886
887	xorps	%xmm0,%xmm2
888	movups	16(%r11),%xmm1
889	xorps	%xmm8,%xmm0
890	xorps	%xmm0,%xmm3
891	movups	32(%r11),%xmm0
892
893.Lccm64_enc2_loop:
894.byte	102,15,56,220,209
895.byte	102,15,56,220,217
896	movups	(%rcx,%rax,1),%xmm1
897	addq	$32,%rax
898.byte	102,15,56,220,208
899.byte	102,15,56,220,216
900	movups	-16(%rcx,%rax,1),%xmm0
901	jnz	.Lccm64_enc2_loop
902.byte	102,15,56,220,209
903.byte	102,15,56,220,217
904	paddq	%xmm9,%xmm6
905	decq	%rdx
906.byte	102,15,56,221,208
907.byte	102,15,56,221,216
908
909	leaq	16(%rdi),%rdi
910	xorps	%xmm2,%xmm8
911	movdqa	%xmm6,%xmm2
912	movups	%xmm8,(%rsi)
913.byte	102,15,56,0,215
914	leaq	16(%rsi),%rsi
915	jnz	.Lccm64_enc_outer
916
917	pxor	%xmm0,%xmm0
918	pxor	%xmm1,%xmm1
919	pxor	%xmm2,%xmm2
920	movups	%xmm3,(%r9)
921	pxor	%xmm3,%xmm3
922	pxor	%xmm8,%xmm8
923	pxor	%xmm6,%xmm6
924	.byte	0xf3,0xc3
925.cfi_endproc
926.size	aesni_ccm64_encrypt_blocks,.-aesni_ccm64_encrypt_blocks
927.globl	aesni_ccm64_decrypt_blocks
928.type	aesni_ccm64_decrypt_blocks,@function
929.align	16
930aesni_ccm64_decrypt_blocks:
931.cfi_startproc
932	movl	240(%rcx),%eax
933	movups	(%r8),%xmm6
934	movdqu	(%r9),%xmm3
935	movdqa	.Lincrement64(%rip),%xmm9
936	movdqa	.Lbswap_mask(%rip),%xmm7
937
938	movaps	%xmm6,%xmm2
939	movl	%eax,%r10d
940	movq	%rcx,%r11
941.byte	102,15,56,0,247
942	movups	(%rcx),%xmm0
943	movups	16(%rcx),%xmm1
944	leaq	32(%rcx),%rcx
945	xorps	%xmm0,%xmm2
946.Loop_enc1_5:
947.byte	102,15,56,220,209
948	decl	%eax
949	movups	(%rcx),%xmm1
950	leaq	16(%rcx),%rcx
951	jnz	.Loop_enc1_5
952.byte	102,15,56,221,209
953	shll	$4,%r10d
954	movl	$16,%eax
955	movups	(%rdi),%xmm8
956	paddq	%xmm9,%xmm6
957	leaq	16(%rdi),%rdi
958	subq	%r10,%rax
959	leaq	32(%r11,%r10,1),%rcx
960	movq	%rax,%r10
961	jmp	.Lccm64_dec_outer
962.align	16
963.Lccm64_dec_outer:
964	xorps	%xmm2,%xmm8
965	movdqa	%xmm6,%xmm2
966	movups	%xmm8,(%rsi)
967	leaq	16(%rsi),%rsi
968.byte	102,15,56,0,215
969
970	subq	$1,%rdx
971	jz	.Lccm64_dec_break
972
973	movups	(%r11),%xmm0
974	movq	%r10,%rax
975	movups	16(%r11),%xmm1
976	xorps	%xmm0,%xmm8
977	xorps	%xmm0,%xmm2
978	xorps	%xmm8,%xmm3
979	movups	32(%r11),%xmm0
980	jmp	.Lccm64_dec2_loop
981.align	16
982.Lccm64_dec2_loop:
983.byte	102,15,56,220,209
984.byte	102,15,56,220,217
985	movups	(%rcx,%rax,1),%xmm1
986	addq	$32,%rax
987.byte	102,15,56,220,208
988.byte	102,15,56,220,216
989	movups	-16(%rcx,%rax,1),%xmm0
990	jnz	.Lccm64_dec2_loop
991	movups	(%rdi),%xmm8
992	paddq	%xmm9,%xmm6
993.byte	102,15,56,220,209
994.byte	102,15,56,220,217
995.byte	102,15,56,221,208
996.byte	102,15,56,221,216
997	leaq	16(%rdi),%rdi
998	jmp	.Lccm64_dec_outer
999
1000.align	16
1001.Lccm64_dec_break:
1002
1003	movl	240(%r11),%eax
1004	movups	(%r11),%xmm0
1005	movups	16(%r11),%xmm1
1006	xorps	%xmm0,%xmm8
1007	leaq	32(%r11),%r11
1008	xorps	%xmm8,%xmm3
1009.Loop_enc1_6:
1010.byte	102,15,56,220,217
1011	decl	%eax
1012	movups	(%r11),%xmm1
1013	leaq	16(%r11),%r11
1014	jnz	.Loop_enc1_6
1015.byte	102,15,56,221,217
1016	pxor	%xmm0,%xmm0
1017	pxor	%xmm1,%xmm1
1018	pxor	%xmm2,%xmm2
1019	movups	%xmm3,(%r9)
1020	pxor	%xmm3,%xmm3
1021	pxor	%xmm8,%xmm8
1022	pxor	%xmm6,%xmm6
1023	.byte	0xf3,0xc3
1024.cfi_endproc
1025.size	aesni_ccm64_decrypt_blocks,.-aesni_ccm64_decrypt_blocks
1026.globl	aesni_ctr32_encrypt_blocks
1027.type	aesni_ctr32_encrypt_blocks,@function
1028.align	16
1029aesni_ctr32_encrypt_blocks:
1030.cfi_startproc
1031	cmpq	$1,%rdx
1032	jne	.Lctr32_bulk
1033
1034
1035
1036	movups	(%r8),%xmm2
1037	movups	(%rdi),%xmm3
1038	movl	240(%rcx),%edx
1039	movups	(%rcx),%xmm0
1040	movups	16(%rcx),%xmm1
1041	leaq	32(%rcx),%rcx
1042	xorps	%xmm0,%xmm2
1043.Loop_enc1_7:
1044.byte	102,15,56,220,209
1045	decl	%edx
1046	movups	(%rcx),%xmm1
1047	leaq	16(%rcx),%rcx
1048	jnz	.Loop_enc1_7
1049.byte	102,15,56,221,209
1050	pxor	%xmm0,%xmm0
1051	pxor	%xmm1,%xmm1
1052	xorps	%xmm3,%xmm2
1053	pxor	%xmm3,%xmm3
1054	movups	%xmm2,(%rsi)
1055	xorps	%xmm2,%xmm2
1056	jmp	.Lctr32_epilogue
1057
1058.align	16
1059.Lctr32_bulk:
1060	leaq	(%rsp),%r11
1061.cfi_def_cfa_register	%r11
1062	pushq	%rbp
1063.cfi_offset	%rbp,-16
1064	subq	$128,%rsp
1065	andq	$-16,%rsp
1066
1067
1068
1069
1070	movdqu	(%r8),%xmm2
1071	movdqu	(%rcx),%xmm0
1072	movl	12(%r8),%r8d
1073	pxor	%xmm0,%xmm2
1074	movl	12(%rcx),%ebp
1075	movdqa	%xmm2,0(%rsp)
1076	bswapl	%r8d
1077	movdqa	%xmm2,%xmm3
1078	movdqa	%xmm2,%xmm4
1079	movdqa	%xmm2,%xmm5
1080	movdqa	%xmm2,64(%rsp)
1081	movdqa	%xmm2,80(%rsp)
1082	movdqa	%xmm2,96(%rsp)
1083	movq	%rdx,%r10
1084	movdqa	%xmm2,112(%rsp)
1085
1086	leaq	1(%r8),%rax
1087	leaq	2(%r8),%rdx
1088	bswapl	%eax
1089	bswapl	%edx
1090	xorl	%ebp,%eax
1091	xorl	%ebp,%edx
1092.byte	102,15,58,34,216,3
1093	leaq	3(%r8),%rax
1094	movdqa	%xmm3,16(%rsp)
1095.byte	102,15,58,34,226,3
1096	bswapl	%eax
1097	movq	%r10,%rdx
1098	leaq	4(%r8),%r10
1099	movdqa	%xmm4,32(%rsp)
1100	xorl	%ebp,%eax
1101	bswapl	%r10d
1102.byte	102,15,58,34,232,3
1103	xorl	%ebp,%r10d
1104	movdqa	%xmm5,48(%rsp)
1105	leaq	5(%r8),%r9
1106	movl	%r10d,64+12(%rsp)
1107	bswapl	%r9d
1108	leaq	6(%r8),%r10
1109	movl	240(%rcx),%eax
1110	xorl	%ebp,%r9d
1111	bswapl	%r10d
1112	movl	%r9d,80+12(%rsp)
1113	xorl	%ebp,%r10d
1114	leaq	7(%r8),%r9
1115	movl	%r10d,96+12(%rsp)
1116	bswapl	%r9d
1117	movl	OPENSSL_ia32cap_P+4(%rip),%r10d
1118	xorl	%ebp,%r9d
1119	andl	$71303168,%r10d
1120	movl	%r9d,112+12(%rsp)
1121
1122	movups	16(%rcx),%xmm1
1123
1124	movdqa	64(%rsp),%xmm6
1125	movdqa	80(%rsp),%xmm7
1126
1127	cmpq	$8,%rdx
1128	jb	.Lctr32_tail
1129
1130	subq	$6,%rdx
1131	cmpl	$4194304,%r10d
1132	je	.Lctr32_6x
1133
1134	leaq	128(%rcx),%rcx
1135	subq	$2,%rdx
1136	jmp	.Lctr32_loop8
1137
1138.align	16
1139.Lctr32_6x:
1140	shll	$4,%eax
1141	movl	$48,%r10d
1142	bswapl	%ebp
1143	leaq	32(%rcx,%rax,1),%rcx
1144	subq	%rax,%r10
1145	jmp	.Lctr32_loop6
1146
1147.align	16
1148.Lctr32_loop6:
1149	addl	$6,%r8d
1150	movups	-48(%rcx,%r10,1),%xmm0
1151.byte	102,15,56,220,209
1152	movl	%r8d,%eax
1153	xorl	%ebp,%eax
1154.byte	102,15,56,220,217
1155.byte	0x0f,0x38,0xf1,0x44,0x24,12
1156	leal	1(%r8),%eax
1157.byte	102,15,56,220,225
1158	xorl	%ebp,%eax
1159.byte	0x0f,0x38,0xf1,0x44,0x24,28
1160.byte	102,15,56,220,233
1161	leal	2(%r8),%eax
1162	xorl	%ebp,%eax
1163.byte	102,15,56,220,241
1164.byte	0x0f,0x38,0xf1,0x44,0x24,44
1165	leal	3(%r8),%eax
1166.byte	102,15,56,220,249
1167	movups	-32(%rcx,%r10,1),%xmm1
1168	xorl	%ebp,%eax
1169
1170.byte	102,15,56,220,208
1171.byte	0x0f,0x38,0xf1,0x44,0x24,60
1172	leal	4(%r8),%eax
1173.byte	102,15,56,220,216
1174	xorl	%ebp,%eax
1175.byte	0x0f,0x38,0xf1,0x44,0x24,76
1176.byte	102,15,56,220,224
1177	leal	5(%r8),%eax
1178	xorl	%ebp,%eax
1179.byte	102,15,56,220,232
1180.byte	0x0f,0x38,0xf1,0x44,0x24,92
1181	movq	%r10,%rax
1182.byte	102,15,56,220,240
1183.byte	102,15,56,220,248
1184	movups	-16(%rcx,%r10,1),%xmm0
1185
1186	call	.Lenc_loop6
1187
1188	movdqu	(%rdi),%xmm8
1189	movdqu	16(%rdi),%xmm9
1190	movdqu	32(%rdi),%xmm10
1191	movdqu	48(%rdi),%xmm11
1192	movdqu	64(%rdi),%xmm12
1193	movdqu	80(%rdi),%xmm13
1194	leaq	96(%rdi),%rdi
1195	movups	-64(%rcx,%r10,1),%xmm1
1196	pxor	%xmm2,%xmm8
1197	movaps	0(%rsp),%xmm2
1198	pxor	%xmm3,%xmm9
1199	movaps	16(%rsp),%xmm3
1200	pxor	%xmm4,%xmm10
1201	movaps	32(%rsp),%xmm4
1202	pxor	%xmm5,%xmm11
1203	movaps	48(%rsp),%xmm5
1204	pxor	%xmm6,%xmm12
1205	movaps	64(%rsp),%xmm6
1206	pxor	%xmm7,%xmm13
1207	movaps	80(%rsp),%xmm7
1208	movdqu	%xmm8,(%rsi)
1209	movdqu	%xmm9,16(%rsi)
1210	movdqu	%xmm10,32(%rsi)
1211	movdqu	%xmm11,48(%rsi)
1212	movdqu	%xmm12,64(%rsi)
1213	movdqu	%xmm13,80(%rsi)
1214	leaq	96(%rsi),%rsi
1215
1216	subq	$6,%rdx
1217	jnc	.Lctr32_loop6
1218
1219	addq	$6,%rdx
1220	jz	.Lctr32_done
1221
1222	leal	-48(%r10),%eax
1223	leaq	-80(%rcx,%r10,1),%rcx
1224	negl	%eax
1225	shrl	$4,%eax
1226	jmp	.Lctr32_tail
1227
1228.align	32
1229.Lctr32_loop8:
1230	addl	$8,%r8d
1231	movdqa	96(%rsp),%xmm8
1232.byte	102,15,56,220,209
1233	movl	%r8d,%r9d
1234	movdqa	112(%rsp),%xmm9
1235.byte	102,15,56,220,217
1236	bswapl	%r9d
1237	movups	32-128(%rcx),%xmm0
1238.byte	102,15,56,220,225
1239	xorl	%ebp,%r9d
1240	nop
1241.byte	102,15,56,220,233
1242	movl	%r9d,0+12(%rsp)
1243	leaq	1(%r8),%r9
1244.byte	102,15,56,220,241
1245.byte	102,15,56,220,249
1246.byte	102,68,15,56,220,193
1247.byte	102,68,15,56,220,201
1248	movups	48-128(%rcx),%xmm1
1249	bswapl	%r9d
1250.byte	102,15,56,220,208
1251.byte	102,15,56,220,216
1252	xorl	%ebp,%r9d
1253.byte	0x66,0x90
1254.byte	102,15,56,220,224
1255.byte	102,15,56,220,232
1256	movl	%r9d,16+12(%rsp)
1257	leaq	2(%r8),%r9
1258.byte	102,15,56,220,240
1259.byte	102,15,56,220,248
1260.byte	102,68,15,56,220,192
1261.byte	102,68,15,56,220,200
1262	movups	64-128(%rcx),%xmm0
1263	bswapl	%r9d
1264.byte	102,15,56,220,209
1265.byte	102,15,56,220,217
1266	xorl	%ebp,%r9d
1267.byte	0x66,0x90
1268.byte	102,15,56,220,225
1269.byte	102,15,56,220,233
1270	movl	%r9d,32+12(%rsp)
1271	leaq	3(%r8),%r9
1272.byte	102,15,56,220,241
1273.byte	102,15,56,220,249
1274.byte	102,68,15,56,220,193
1275.byte	102,68,15,56,220,201
1276	movups	80-128(%rcx),%xmm1
1277	bswapl	%r9d
1278.byte	102,15,56,220,208
1279.byte	102,15,56,220,216
1280	xorl	%ebp,%r9d
1281.byte	0x66,0x90
1282.byte	102,15,56,220,224
1283.byte	102,15,56,220,232
1284	movl	%r9d,48+12(%rsp)
1285	leaq	4(%r8),%r9
1286.byte	102,15,56,220,240
1287.byte	102,15,56,220,248
1288.byte	102,68,15,56,220,192
1289.byte	102,68,15,56,220,200
1290	movups	96-128(%rcx),%xmm0
1291	bswapl	%r9d
1292.byte	102,15,56,220,209
1293.byte	102,15,56,220,217
1294	xorl	%ebp,%r9d
1295.byte	0x66,0x90
1296.byte	102,15,56,220,225
1297.byte	102,15,56,220,233
1298	movl	%r9d,64+12(%rsp)
1299	leaq	5(%r8),%r9
1300.byte	102,15,56,220,241
1301.byte	102,15,56,220,249
1302.byte	102,68,15,56,220,193
1303.byte	102,68,15,56,220,201
1304	movups	112-128(%rcx),%xmm1
1305	bswapl	%r9d
1306.byte	102,15,56,220,208
1307.byte	102,15,56,220,216
1308	xorl	%ebp,%r9d
1309.byte	0x66,0x90
1310.byte	102,15,56,220,224
1311.byte	102,15,56,220,232
1312	movl	%r9d,80+12(%rsp)
1313	leaq	6(%r8),%r9
1314.byte	102,15,56,220,240
1315.byte	102,15,56,220,248
1316.byte	102,68,15,56,220,192
1317.byte	102,68,15,56,220,200
1318	movups	128-128(%rcx),%xmm0
1319	bswapl	%r9d
1320.byte	102,15,56,220,209
1321.byte	102,15,56,220,217
1322	xorl	%ebp,%r9d
1323.byte	0x66,0x90
1324.byte	102,15,56,220,225
1325.byte	102,15,56,220,233
1326	movl	%r9d,96+12(%rsp)
1327	leaq	7(%r8),%r9
1328.byte	102,15,56,220,241
1329.byte	102,15,56,220,249
1330.byte	102,68,15,56,220,193
1331.byte	102,68,15,56,220,201
1332	movups	144-128(%rcx),%xmm1
1333	bswapl	%r9d
1334.byte	102,15,56,220,208
1335.byte	102,15,56,220,216
1336.byte	102,15,56,220,224
1337	xorl	%ebp,%r9d
1338	movdqu	0(%rdi),%xmm10
1339.byte	102,15,56,220,232
1340	movl	%r9d,112+12(%rsp)
1341	cmpl	$11,%eax
1342.byte	102,15,56,220,240
1343.byte	102,15,56,220,248
1344.byte	102,68,15,56,220,192
1345.byte	102,68,15,56,220,200
1346	movups	160-128(%rcx),%xmm0
1347
1348	jb	.Lctr32_enc_done
1349
1350.byte	102,15,56,220,209
1351.byte	102,15,56,220,217
1352.byte	102,15,56,220,225
1353.byte	102,15,56,220,233
1354.byte	102,15,56,220,241
1355.byte	102,15,56,220,249
1356.byte	102,68,15,56,220,193
1357.byte	102,68,15,56,220,201
1358	movups	176-128(%rcx),%xmm1
1359
1360.byte	102,15,56,220,208
1361.byte	102,15,56,220,216
1362.byte	102,15,56,220,224
1363.byte	102,15,56,220,232
1364.byte	102,15,56,220,240
1365.byte	102,15,56,220,248
1366.byte	102,68,15,56,220,192
1367.byte	102,68,15,56,220,200
1368	movups	192-128(%rcx),%xmm0
1369	je	.Lctr32_enc_done
1370
1371.byte	102,15,56,220,209
1372.byte	102,15,56,220,217
1373.byte	102,15,56,220,225
1374.byte	102,15,56,220,233
1375.byte	102,15,56,220,241
1376.byte	102,15,56,220,249
1377.byte	102,68,15,56,220,193
1378.byte	102,68,15,56,220,201
1379	movups	208-128(%rcx),%xmm1
1380
1381.byte	102,15,56,220,208
1382.byte	102,15,56,220,216
1383.byte	102,15,56,220,224
1384.byte	102,15,56,220,232
1385.byte	102,15,56,220,240
1386.byte	102,15,56,220,248
1387.byte	102,68,15,56,220,192
1388.byte	102,68,15,56,220,200
1389	movups	224-128(%rcx),%xmm0
1390	jmp	.Lctr32_enc_done
1391
1392.align	16
1393.Lctr32_enc_done:
1394	movdqu	16(%rdi),%xmm11
1395	pxor	%xmm0,%xmm10
1396	movdqu	32(%rdi),%xmm12
1397	pxor	%xmm0,%xmm11
1398	movdqu	48(%rdi),%xmm13
1399	pxor	%xmm0,%xmm12
1400	movdqu	64(%rdi),%xmm14
1401	pxor	%xmm0,%xmm13
1402	movdqu	80(%rdi),%xmm15
1403	pxor	%xmm0,%xmm14
1404	pxor	%xmm0,%xmm15
1405.byte	102,15,56,220,209
1406.byte	102,15,56,220,217
1407.byte	102,15,56,220,225
1408.byte	102,15,56,220,233
1409.byte	102,15,56,220,241
1410.byte	102,15,56,220,249
1411.byte	102,68,15,56,220,193
1412.byte	102,68,15,56,220,201
1413	movdqu	96(%rdi),%xmm1
1414	leaq	128(%rdi),%rdi
1415
1416.byte	102,65,15,56,221,210
1417	pxor	%xmm0,%xmm1
1418	movdqu	112-128(%rdi),%xmm10
1419.byte	102,65,15,56,221,219
1420	pxor	%xmm0,%xmm10
1421	movdqa	0(%rsp),%xmm11
1422.byte	102,65,15,56,221,228
1423.byte	102,65,15,56,221,237
1424	movdqa	16(%rsp),%xmm12
1425	movdqa	32(%rsp),%xmm13
1426.byte	102,65,15,56,221,246
1427.byte	102,65,15,56,221,255
1428	movdqa	48(%rsp),%xmm14
1429	movdqa	64(%rsp),%xmm15
1430.byte	102,68,15,56,221,193
1431	movdqa	80(%rsp),%xmm0
1432	movups	16-128(%rcx),%xmm1
1433.byte	102,69,15,56,221,202
1434
1435	movups	%xmm2,(%rsi)
1436	movdqa	%xmm11,%xmm2
1437	movups	%xmm3,16(%rsi)
1438	movdqa	%xmm12,%xmm3
1439	movups	%xmm4,32(%rsi)
1440	movdqa	%xmm13,%xmm4
1441	movups	%xmm5,48(%rsi)
1442	movdqa	%xmm14,%xmm5
1443	movups	%xmm6,64(%rsi)
1444	movdqa	%xmm15,%xmm6
1445	movups	%xmm7,80(%rsi)
1446	movdqa	%xmm0,%xmm7
1447	movups	%xmm8,96(%rsi)
1448	movups	%xmm9,112(%rsi)
1449	leaq	128(%rsi),%rsi
1450
1451	subq	$8,%rdx
1452	jnc	.Lctr32_loop8
1453
1454	addq	$8,%rdx
1455	jz	.Lctr32_done
1456	leaq	-128(%rcx),%rcx
1457
1458.Lctr32_tail:
1459
1460
1461	leaq	16(%rcx),%rcx
1462	cmpq	$4,%rdx
1463	jb	.Lctr32_loop3
1464	je	.Lctr32_loop4
1465
1466
1467	shll	$4,%eax
1468	movdqa	96(%rsp),%xmm8
1469	pxor	%xmm9,%xmm9
1470
1471	movups	16(%rcx),%xmm0
1472.byte	102,15,56,220,209
1473.byte	102,15,56,220,217
1474	leaq	32-16(%rcx,%rax,1),%rcx
1475	negq	%rax
1476.byte	102,15,56,220,225
1477	addq	$16,%rax
1478	movups	(%rdi),%xmm10
1479.byte	102,15,56,220,233
1480.byte	102,15,56,220,241
1481	movups	16(%rdi),%xmm11
1482	movups	32(%rdi),%xmm12
1483.byte	102,15,56,220,249
1484.byte	102,68,15,56,220,193
1485
1486	call	.Lenc_loop8_enter
1487
1488	movdqu	48(%rdi),%xmm13
1489	pxor	%xmm10,%xmm2
1490	movdqu	64(%rdi),%xmm10
1491	pxor	%xmm11,%xmm3
1492	movdqu	%xmm2,(%rsi)
1493	pxor	%xmm12,%xmm4
1494	movdqu	%xmm3,16(%rsi)
1495	pxor	%xmm13,%xmm5
1496	movdqu	%xmm4,32(%rsi)
1497	pxor	%xmm10,%xmm6
1498	movdqu	%xmm5,48(%rsi)
1499	movdqu	%xmm6,64(%rsi)
1500	cmpq	$6,%rdx
1501	jb	.Lctr32_done
1502
1503	movups	80(%rdi),%xmm11
1504	xorps	%xmm11,%xmm7
1505	movups	%xmm7,80(%rsi)
1506	je	.Lctr32_done
1507
1508	movups	96(%rdi),%xmm12
1509	xorps	%xmm12,%xmm8
1510	movups	%xmm8,96(%rsi)
1511	jmp	.Lctr32_done
1512
1513.align	32
1514.Lctr32_loop4:
1515.byte	102,15,56,220,209
1516	leaq	16(%rcx),%rcx
1517	decl	%eax
1518.byte	102,15,56,220,217
1519.byte	102,15,56,220,225
1520.byte	102,15,56,220,233
1521	movups	(%rcx),%xmm1
1522	jnz	.Lctr32_loop4
1523.byte	102,15,56,221,209
1524.byte	102,15,56,221,217
1525	movups	(%rdi),%xmm10
1526	movups	16(%rdi),%xmm11
1527.byte	102,15,56,221,225
1528.byte	102,15,56,221,233
1529	movups	32(%rdi),%xmm12
1530	movups	48(%rdi),%xmm13
1531
1532	xorps	%xmm10,%xmm2
1533	movups	%xmm2,(%rsi)
1534	xorps	%xmm11,%xmm3
1535	movups	%xmm3,16(%rsi)
1536	pxor	%xmm12,%xmm4
1537	movdqu	%xmm4,32(%rsi)
1538	pxor	%xmm13,%xmm5
1539	movdqu	%xmm5,48(%rsi)
1540	jmp	.Lctr32_done
1541
1542.align	32
1543.Lctr32_loop3:
1544.byte	102,15,56,220,209
1545	leaq	16(%rcx),%rcx
1546	decl	%eax
1547.byte	102,15,56,220,217
1548.byte	102,15,56,220,225
1549	movups	(%rcx),%xmm1
1550	jnz	.Lctr32_loop3
1551.byte	102,15,56,221,209
1552.byte	102,15,56,221,217
1553.byte	102,15,56,221,225
1554
1555	movups	(%rdi),%xmm10
1556	xorps	%xmm10,%xmm2
1557	movups	%xmm2,(%rsi)
1558	cmpq	$2,%rdx
1559	jb	.Lctr32_done
1560
1561	movups	16(%rdi),%xmm11
1562	xorps	%xmm11,%xmm3
1563	movups	%xmm3,16(%rsi)
1564	je	.Lctr32_done
1565
1566	movups	32(%rdi),%xmm12
1567	xorps	%xmm12,%xmm4
1568	movups	%xmm4,32(%rsi)
1569
1570.Lctr32_done:
1571	xorps	%xmm0,%xmm0
1572	xorl	%ebp,%ebp
1573	pxor	%xmm1,%xmm1
1574	pxor	%xmm2,%xmm2
1575	pxor	%xmm3,%xmm3
1576	pxor	%xmm4,%xmm4
1577	pxor	%xmm5,%xmm5
1578	pxor	%xmm6,%xmm6
1579	pxor	%xmm7,%xmm7
1580	movaps	%xmm0,0(%rsp)
1581	pxor	%xmm8,%xmm8
1582	movaps	%xmm0,16(%rsp)
1583	pxor	%xmm9,%xmm9
1584	movaps	%xmm0,32(%rsp)
1585	pxor	%xmm10,%xmm10
1586	movaps	%xmm0,48(%rsp)
1587	pxor	%xmm11,%xmm11
1588	movaps	%xmm0,64(%rsp)
1589	pxor	%xmm12,%xmm12
1590	movaps	%xmm0,80(%rsp)
1591	pxor	%xmm13,%xmm13
1592	movaps	%xmm0,96(%rsp)
1593	pxor	%xmm14,%xmm14
1594	movaps	%xmm0,112(%rsp)
1595	pxor	%xmm15,%xmm15
1596	movq	-8(%r11),%rbp
1597.cfi_restore	%rbp
1598	leaq	(%r11),%rsp
1599.cfi_def_cfa_register	%rsp
1600.Lctr32_epilogue:
1601	.byte	0xf3,0xc3
1602.cfi_endproc
1603.size	aesni_ctr32_encrypt_blocks,.-aesni_ctr32_encrypt_blocks
1604.globl	aesni_xts_encrypt
1605.type	aesni_xts_encrypt,@function
1606.align	16
1607aesni_xts_encrypt:
1608.cfi_startproc
1609	leaq	(%rsp),%r11
1610.cfi_def_cfa_register	%r11
1611	pushq	%rbp
1612.cfi_offset	%rbp,-16
1613	subq	$112,%rsp
1614	andq	$-16,%rsp
1615	movups	(%r9),%xmm2
1616	movl	240(%r8),%eax
1617	movl	240(%rcx),%r10d
1618	movups	(%r8),%xmm0
1619	movups	16(%r8),%xmm1
1620	leaq	32(%r8),%r8
1621	xorps	%xmm0,%xmm2
1622.Loop_enc1_8:
1623.byte	102,15,56,220,209
1624	decl	%eax
1625	movups	(%r8),%xmm1
1626	leaq	16(%r8),%r8
1627	jnz	.Loop_enc1_8
1628.byte	102,15,56,221,209
1629	movups	(%rcx),%xmm0
1630	movq	%rcx,%rbp
1631	movl	%r10d,%eax
1632	shll	$4,%r10d
1633	movq	%rdx,%r9
1634	andq	$-16,%rdx
1635
1636	movups	16(%rcx,%r10,1),%xmm1
1637
1638	movdqa	.Lxts_magic(%rip),%xmm8
1639	movdqa	%xmm2,%xmm15
1640	pshufd	$0x5f,%xmm2,%xmm9
1641	pxor	%xmm0,%xmm1
1642	movdqa	%xmm9,%xmm14
1643	paddd	%xmm9,%xmm9
1644	movdqa	%xmm15,%xmm10
1645	psrad	$31,%xmm14
1646	paddq	%xmm15,%xmm15
1647	pand	%xmm8,%xmm14
1648	pxor	%xmm0,%xmm10
1649	pxor	%xmm14,%xmm15
1650	movdqa	%xmm9,%xmm14
1651	paddd	%xmm9,%xmm9
1652	movdqa	%xmm15,%xmm11
1653	psrad	$31,%xmm14
1654	paddq	%xmm15,%xmm15
1655	pand	%xmm8,%xmm14
1656	pxor	%xmm0,%xmm11
1657	pxor	%xmm14,%xmm15
1658	movdqa	%xmm9,%xmm14
1659	paddd	%xmm9,%xmm9
1660	movdqa	%xmm15,%xmm12
1661	psrad	$31,%xmm14
1662	paddq	%xmm15,%xmm15
1663	pand	%xmm8,%xmm14
1664	pxor	%xmm0,%xmm12
1665	pxor	%xmm14,%xmm15
1666	movdqa	%xmm9,%xmm14
1667	paddd	%xmm9,%xmm9
1668	movdqa	%xmm15,%xmm13
1669	psrad	$31,%xmm14
1670	paddq	%xmm15,%xmm15
1671	pand	%xmm8,%xmm14
1672	pxor	%xmm0,%xmm13
1673	pxor	%xmm14,%xmm15
1674	movdqa	%xmm15,%xmm14
1675	psrad	$31,%xmm9
1676	paddq	%xmm15,%xmm15
1677	pand	%xmm8,%xmm9
1678	pxor	%xmm0,%xmm14
1679	pxor	%xmm9,%xmm15
1680	movaps	%xmm1,96(%rsp)
1681
1682	subq	$96,%rdx
1683	jc	.Lxts_enc_short
1684
1685	movl	$16+96,%eax
1686	leaq	32(%rbp,%r10,1),%rcx
1687	subq	%r10,%rax
1688	movups	16(%rbp),%xmm1
1689	movq	%rax,%r10
1690	leaq	.Lxts_magic(%rip),%r8
1691	jmp	.Lxts_enc_grandloop
1692
1693.align	32
1694.Lxts_enc_grandloop:
1695	movdqu	0(%rdi),%xmm2
1696	movdqa	%xmm0,%xmm8
1697	movdqu	16(%rdi),%xmm3
1698	pxor	%xmm10,%xmm2
1699	movdqu	32(%rdi),%xmm4
1700	pxor	%xmm11,%xmm3
1701.byte	102,15,56,220,209
1702	movdqu	48(%rdi),%xmm5
1703	pxor	%xmm12,%xmm4
1704.byte	102,15,56,220,217
1705	movdqu	64(%rdi),%xmm6
1706	pxor	%xmm13,%xmm5
1707.byte	102,15,56,220,225
1708	movdqu	80(%rdi),%xmm7
1709	pxor	%xmm15,%xmm8
1710	movdqa	96(%rsp),%xmm9
1711	pxor	%xmm14,%xmm6
1712.byte	102,15,56,220,233
1713	movups	32(%rbp),%xmm0
1714	leaq	96(%rdi),%rdi
1715	pxor	%xmm8,%xmm7
1716
1717	pxor	%xmm9,%xmm10
1718.byte	102,15,56,220,241
1719	pxor	%xmm9,%xmm11
1720	movdqa	%xmm10,0(%rsp)
1721.byte	102,15,56,220,249
1722	movups	48(%rbp),%xmm1
1723	pxor	%xmm9,%xmm12
1724
1725.byte	102,15,56,220,208
1726	pxor	%xmm9,%xmm13
1727	movdqa	%xmm11,16(%rsp)
1728.byte	102,15,56,220,216
1729	pxor	%xmm9,%xmm14
1730	movdqa	%xmm12,32(%rsp)
1731.byte	102,15,56,220,224
1732.byte	102,15,56,220,232
1733	pxor	%xmm9,%xmm8
1734	movdqa	%xmm14,64(%rsp)
1735.byte	102,15,56,220,240
1736.byte	102,15,56,220,248
1737	movups	64(%rbp),%xmm0
1738	movdqa	%xmm8,80(%rsp)
1739	pshufd	$0x5f,%xmm15,%xmm9
1740	jmp	.Lxts_enc_loop6
1741.align	32
1742.Lxts_enc_loop6:
1743.byte	102,15,56,220,209
1744.byte	102,15,56,220,217
1745.byte	102,15,56,220,225
1746.byte	102,15,56,220,233
1747.byte	102,15,56,220,241
1748.byte	102,15,56,220,249
1749	movups	-64(%rcx,%rax,1),%xmm1
1750	addq	$32,%rax
1751
1752.byte	102,15,56,220,208
1753.byte	102,15,56,220,216
1754.byte	102,15,56,220,224
1755.byte	102,15,56,220,232
1756.byte	102,15,56,220,240
1757.byte	102,15,56,220,248
1758	movups	-80(%rcx,%rax,1),%xmm0
1759	jnz	.Lxts_enc_loop6
1760
1761	movdqa	(%r8),%xmm8
1762	movdqa	%xmm9,%xmm14
1763	paddd	%xmm9,%xmm9
1764.byte	102,15,56,220,209
1765	paddq	%xmm15,%xmm15
1766	psrad	$31,%xmm14
1767.byte	102,15,56,220,217
1768	pand	%xmm8,%xmm14
1769	movups	(%rbp),%xmm10
1770.byte	102,15,56,220,225
1771.byte	102,15,56,220,233
1772.byte	102,15,56,220,241
1773	pxor	%xmm14,%xmm15
1774	movaps	%xmm10,%xmm11
1775.byte	102,15,56,220,249
1776	movups	-64(%rcx),%xmm1
1777
1778	movdqa	%xmm9,%xmm14
1779.byte	102,15,56,220,208
1780	paddd	%xmm9,%xmm9
1781	pxor	%xmm15,%xmm10
1782.byte	102,15,56,220,216
1783	psrad	$31,%xmm14
1784	paddq	%xmm15,%xmm15
1785.byte	102,15,56,220,224
1786.byte	102,15,56,220,232
1787	pand	%xmm8,%xmm14
1788	movaps	%xmm11,%xmm12
1789.byte	102,15,56,220,240
1790	pxor	%xmm14,%xmm15
1791	movdqa	%xmm9,%xmm14
1792.byte	102,15,56,220,248
1793	movups	-48(%rcx),%xmm0
1794
1795	paddd	%xmm9,%xmm9
1796.byte	102,15,56,220,209
1797	pxor	%xmm15,%xmm11
1798	psrad	$31,%xmm14
1799.byte	102,15,56,220,217
1800	paddq	%xmm15,%xmm15
1801	pand	%xmm8,%xmm14
1802.byte	102,15,56,220,225
1803.byte	102,15,56,220,233
1804	movdqa	%xmm13,48(%rsp)
1805	pxor	%xmm14,%xmm15
1806.byte	102,15,56,220,241
1807	movaps	%xmm12,%xmm13
1808	movdqa	%xmm9,%xmm14
1809.byte	102,15,56,220,249
1810	movups	-32(%rcx),%xmm1
1811
1812	paddd	%xmm9,%xmm9
1813.byte	102,15,56,220,208
1814	pxor	%xmm15,%xmm12
1815	psrad	$31,%xmm14
1816.byte	102,15,56,220,216
1817	paddq	%xmm15,%xmm15
1818	pand	%xmm8,%xmm14
1819.byte	102,15,56,220,224
1820.byte	102,15,56,220,232
1821.byte	102,15,56,220,240
1822	pxor	%xmm14,%xmm15
1823	movaps	%xmm13,%xmm14
1824.byte	102,15,56,220,248
1825
1826	movdqa	%xmm9,%xmm0
1827	paddd	%xmm9,%xmm9
1828.byte	102,15,56,220,209
1829	pxor	%xmm15,%xmm13
1830	psrad	$31,%xmm0
1831.byte	102,15,56,220,217
1832	paddq	%xmm15,%xmm15
1833	pand	%xmm8,%xmm0
1834.byte	102,15,56,220,225
1835.byte	102,15,56,220,233
1836	pxor	%xmm0,%xmm15
1837	movups	(%rbp),%xmm0
1838.byte	102,15,56,220,241
1839.byte	102,15,56,220,249
1840	movups	16(%rbp),%xmm1
1841
1842	pxor	%xmm15,%xmm14
1843.byte	102,15,56,221,84,36,0
1844	psrad	$31,%xmm9
1845	paddq	%xmm15,%xmm15
1846.byte	102,15,56,221,92,36,16
1847.byte	102,15,56,221,100,36,32
1848	pand	%xmm8,%xmm9
1849	movq	%r10,%rax
1850.byte	102,15,56,221,108,36,48
1851.byte	102,15,56,221,116,36,64
1852.byte	102,15,56,221,124,36,80
1853	pxor	%xmm9,%xmm15
1854
1855	leaq	96(%rsi),%rsi
1856	movups	%xmm2,-96(%rsi)
1857	movups	%xmm3,-80(%rsi)
1858	movups	%xmm4,-64(%rsi)
1859	movups	%xmm5,-48(%rsi)
1860	movups	%xmm6,-32(%rsi)
1861	movups	%xmm7,-16(%rsi)
1862	subq	$96,%rdx
1863	jnc	.Lxts_enc_grandloop
1864
1865	movl	$16+96,%eax
1866	subl	%r10d,%eax
1867	movq	%rbp,%rcx
1868	shrl	$4,%eax
1869
1870.Lxts_enc_short:
1871
1872	movl	%eax,%r10d
1873	pxor	%xmm0,%xmm10
1874	addq	$96,%rdx
1875	jz	.Lxts_enc_done
1876
1877	pxor	%xmm0,%xmm11
1878	cmpq	$0x20,%rdx
1879	jb	.Lxts_enc_one
1880	pxor	%xmm0,%xmm12
1881	je	.Lxts_enc_two
1882
1883	pxor	%xmm0,%xmm13
1884	cmpq	$0x40,%rdx
1885	jb	.Lxts_enc_three
1886	pxor	%xmm0,%xmm14
1887	je	.Lxts_enc_four
1888
1889	movdqu	(%rdi),%xmm2
1890	movdqu	16(%rdi),%xmm3
1891	movdqu	32(%rdi),%xmm4
1892	pxor	%xmm10,%xmm2
1893	movdqu	48(%rdi),%xmm5
1894	pxor	%xmm11,%xmm3
1895	movdqu	64(%rdi),%xmm6
1896	leaq	80(%rdi),%rdi
1897	pxor	%xmm12,%xmm4
1898	pxor	%xmm13,%xmm5
1899	pxor	%xmm14,%xmm6
1900	pxor	%xmm7,%xmm7
1901
1902	call	_aesni_encrypt6
1903
1904	xorps	%xmm10,%xmm2
1905	movdqa	%xmm15,%xmm10
1906	xorps	%xmm11,%xmm3
1907	xorps	%xmm12,%xmm4
1908	movdqu	%xmm2,(%rsi)
1909	xorps	%xmm13,%xmm5
1910	movdqu	%xmm3,16(%rsi)
1911	xorps	%xmm14,%xmm6
1912	movdqu	%xmm4,32(%rsi)
1913	movdqu	%xmm5,48(%rsi)
1914	movdqu	%xmm6,64(%rsi)
1915	leaq	80(%rsi),%rsi
1916	jmp	.Lxts_enc_done
1917
1918.align	16
1919.Lxts_enc_one:
1920	movups	(%rdi),%xmm2
1921	leaq	16(%rdi),%rdi
1922	xorps	%xmm10,%xmm2
1923	movups	(%rcx),%xmm0
1924	movups	16(%rcx),%xmm1
1925	leaq	32(%rcx),%rcx
1926	xorps	%xmm0,%xmm2
1927.Loop_enc1_9:
1928.byte	102,15,56,220,209
1929	decl	%eax
1930	movups	(%rcx),%xmm1
1931	leaq	16(%rcx),%rcx
1932	jnz	.Loop_enc1_9
1933.byte	102,15,56,221,209
1934	xorps	%xmm10,%xmm2
1935	movdqa	%xmm11,%xmm10
1936	movups	%xmm2,(%rsi)
1937	leaq	16(%rsi),%rsi
1938	jmp	.Lxts_enc_done
1939
1940.align	16
1941.Lxts_enc_two:
1942	movups	(%rdi),%xmm2
1943	movups	16(%rdi),%xmm3
1944	leaq	32(%rdi),%rdi
1945	xorps	%xmm10,%xmm2
1946	xorps	%xmm11,%xmm3
1947
1948	call	_aesni_encrypt2
1949
1950	xorps	%xmm10,%xmm2
1951	movdqa	%xmm12,%xmm10
1952	xorps	%xmm11,%xmm3
1953	movups	%xmm2,(%rsi)
1954	movups	%xmm3,16(%rsi)
1955	leaq	32(%rsi),%rsi
1956	jmp	.Lxts_enc_done
1957
1958.align	16
1959.Lxts_enc_three:
1960	movups	(%rdi),%xmm2
1961	movups	16(%rdi),%xmm3
1962	movups	32(%rdi),%xmm4
1963	leaq	48(%rdi),%rdi
1964	xorps	%xmm10,%xmm2
1965	xorps	%xmm11,%xmm3
1966	xorps	%xmm12,%xmm4
1967
1968	call	_aesni_encrypt3
1969
1970	xorps	%xmm10,%xmm2
1971	movdqa	%xmm13,%xmm10
1972	xorps	%xmm11,%xmm3
1973	xorps	%xmm12,%xmm4
1974	movups	%xmm2,(%rsi)
1975	movups	%xmm3,16(%rsi)
1976	movups	%xmm4,32(%rsi)
1977	leaq	48(%rsi),%rsi
1978	jmp	.Lxts_enc_done
1979
1980.align	16
1981.Lxts_enc_four:
1982	movups	(%rdi),%xmm2
1983	movups	16(%rdi),%xmm3
1984	movups	32(%rdi),%xmm4
1985	xorps	%xmm10,%xmm2
1986	movups	48(%rdi),%xmm5
1987	leaq	64(%rdi),%rdi
1988	xorps	%xmm11,%xmm3
1989	xorps	%xmm12,%xmm4
1990	xorps	%xmm13,%xmm5
1991
1992	call	_aesni_encrypt4
1993
1994	pxor	%xmm10,%xmm2
1995	movdqa	%xmm14,%xmm10
1996	pxor	%xmm11,%xmm3
1997	pxor	%xmm12,%xmm4
1998	movdqu	%xmm2,(%rsi)
1999	pxor	%xmm13,%xmm5
2000	movdqu	%xmm3,16(%rsi)
2001	movdqu	%xmm4,32(%rsi)
2002	movdqu	%xmm5,48(%rsi)
2003	leaq	64(%rsi),%rsi
2004	jmp	.Lxts_enc_done
2005
2006.align	16
2007.Lxts_enc_done:
2008	andq	$15,%r9
2009	jz	.Lxts_enc_ret
2010	movq	%r9,%rdx
2011
2012.Lxts_enc_steal:
2013	movzbl	(%rdi),%eax
2014	movzbl	-16(%rsi),%ecx
2015	leaq	1(%rdi),%rdi
2016	movb	%al,-16(%rsi)
2017	movb	%cl,0(%rsi)
2018	leaq	1(%rsi),%rsi
2019	subq	$1,%rdx
2020	jnz	.Lxts_enc_steal
2021
2022	subq	%r9,%rsi
2023	movq	%rbp,%rcx
2024	movl	%r10d,%eax
2025
2026	movups	-16(%rsi),%xmm2
2027	xorps	%xmm10,%xmm2
2028	movups	(%rcx),%xmm0
2029	movups	16(%rcx),%xmm1
2030	leaq	32(%rcx),%rcx
2031	xorps	%xmm0,%xmm2
2032.Loop_enc1_10:
2033.byte	102,15,56,220,209
2034	decl	%eax
2035	movups	(%rcx),%xmm1
2036	leaq	16(%rcx),%rcx
2037	jnz	.Loop_enc1_10
2038.byte	102,15,56,221,209
2039	xorps	%xmm10,%xmm2
2040	movups	%xmm2,-16(%rsi)
2041
2042.Lxts_enc_ret:
2043	xorps	%xmm0,%xmm0
2044	pxor	%xmm1,%xmm1
2045	pxor	%xmm2,%xmm2
2046	pxor	%xmm3,%xmm3
2047	pxor	%xmm4,%xmm4
2048	pxor	%xmm5,%xmm5
2049	pxor	%xmm6,%xmm6
2050	pxor	%xmm7,%xmm7
2051	movaps	%xmm0,0(%rsp)
2052	pxor	%xmm8,%xmm8
2053	movaps	%xmm0,16(%rsp)
2054	pxor	%xmm9,%xmm9
2055	movaps	%xmm0,32(%rsp)
2056	pxor	%xmm10,%xmm10
2057	movaps	%xmm0,48(%rsp)
2058	pxor	%xmm11,%xmm11
2059	movaps	%xmm0,64(%rsp)
2060	pxor	%xmm12,%xmm12
2061	movaps	%xmm0,80(%rsp)
2062	pxor	%xmm13,%xmm13
2063	movaps	%xmm0,96(%rsp)
2064	pxor	%xmm14,%xmm14
2065	pxor	%xmm15,%xmm15
2066	movq	-8(%r11),%rbp
2067.cfi_restore	%rbp
2068	leaq	(%r11),%rsp
2069.cfi_def_cfa_register	%rsp
2070.Lxts_enc_epilogue:
2071	.byte	0xf3,0xc3
2072.cfi_endproc
2073.size	aesni_xts_encrypt,.-aesni_xts_encrypt
2074.globl	aesni_xts_decrypt
2075.type	aesni_xts_decrypt,@function
2076.align	16
2077aesni_xts_decrypt:
2078.cfi_startproc
2079	leaq	(%rsp),%r11
2080.cfi_def_cfa_register	%r11
2081	pushq	%rbp
2082.cfi_offset	%rbp,-16
2083	subq	$112,%rsp
2084	andq	$-16,%rsp
2085	movups	(%r9),%xmm2
2086	movl	240(%r8),%eax
2087	movl	240(%rcx),%r10d
2088	movups	(%r8),%xmm0
2089	movups	16(%r8),%xmm1
2090	leaq	32(%r8),%r8
2091	xorps	%xmm0,%xmm2
2092.Loop_enc1_11:
2093.byte	102,15,56,220,209
2094	decl	%eax
2095	movups	(%r8),%xmm1
2096	leaq	16(%r8),%r8
2097	jnz	.Loop_enc1_11
2098.byte	102,15,56,221,209
2099	xorl	%eax,%eax
2100	testq	$15,%rdx
2101	setnz	%al
2102	shlq	$4,%rax
2103	subq	%rax,%rdx
2104
2105	movups	(%rcx),%xmm0
2106	movq	%rcx,%rbp
2107	movl	%r10d,%eax
2108	shll	$4,%r10d
2109	movq	%rdx,%r9
2110	andq	$-16,%rdx
2111
2112	movups	16(%rcx,%r10,1),%xmm1
2113
2114	movdqa	.Lxts_magic(%rip),%xmm8
2115	movdqa	%xmm2,%xmm15
2116	pshufd	$0x5f,%xmm2,%xmm9
2117	pxor	%xmm0,%xmm1
2118	movdqa	%xmm9,%xmm14
2119	paddd	%xmm9,%xmm9
2120	movdqa	%xmm15,%xmm10
2121	psrad	$31,%xmm14
2122	paddq	%xmm15,%xmm15
2123	pand	%xmm8,%xmm14
2124	pxor	%xmm0,%xmm10
2125	pxor	%xmm14,%xmm15
2126	movdqa	%xmm9,%xmm14
2127	paddd	%xmm9,%xmm9
2128	movdqa	%xmm15,%xmm11
2129	psrad	$31,%xmm14
2130	paddq	%xmm15,%xmm15
2131	pand	%xmm8,%xmm14
2132	pxor	%xmm0,%xmm11
2133	pxor	%xmm14,%xmm15
2134	movdqa	%xmm9,%xmm14
2135	paddd	%xmm9,%xmm9
2136	movdqa	%xmm15,%xmm12
2137	psrad	$31,%xmm14
2138	paddq	%xmm15,%xmm15
2139	pand	%xmm8,%xmm14
2140	pxor	%xmm0,%xmm12
2141	pxor	%xmm14,%xmm15
2142	movdqa	%xmm9,%xmm14
2143	paddd	%xmm9,%xmm9
2144	movdqa	%xmm15,%xmm13
2145	psrad	$31,%xmm14
2146	paddq	%xmm15,%xmm15
2147	pand	%xmm8,%xmm14
2148	pxor	%xmm0,%xmm13
2149	pxor	%xmm14,%xmm15
2150	movdqa	%xmm15,%xmm14
2151	psrad	$31,%xmm9
2152	paddq	%xmm15,%xmm15
2153	pand	%xmm8,%xmm9
2154	pxor	%xmm0,%xmm14
2155	pxor	%xmm9,%xmm15
2156	movaps	%xmm1,96(%rsp)
2157
2158	subq	$96,%rdx
2159	jc	.Lxts_dec_short
2160
2161	movl	$16+96,%eax
2162	leaq	32(%rbp,%r10,1),%rcx
2163	subq	%r10,%rax
2164	movups	16(%rbp),%xmm1
2165	movq	%rax,%r10
2166	leaq	.Lxts_magic(%rip),%r8
2167	jmp	.Lxts_dec_grandloop
2168
2169.align	32
2170.Lxts_dec_grandloop:
2171	movdqu	0(%rdi),%xmm2
2172	movdqa	%xmm0,%xmm8
2173	movdqu	16(%rdi),%xmm3
2174	pxor	%xmm10,%xmm2
2175	movdqu	32(%rdi),%xmm4
2176	pxor	%xmm11,%xmm3
2177.byte	102,15,56,222,209
2178	movdqu	48(%rdi),%xmm5
2179	pxor	%xmm12,%xmm4
2180.byte	102,15,56,222,217
2181	movdqu	64(%rdi),%xmm6
2182	pxor	%xmm13,%xmm5
2183.byte	102,15,56,222,225
2184	movdqu	80(%rdi),%xmm7
2185	pxor	%xmm15,%xmm8
2186	movdqa	96(%rsp),%xmm9
2187	pxor	%xmm14,%xmm6
2188.byte	102,15,56,222,233
2189	movups	32(%rbp),%xmm0
2190	leaq	96(%rdi),%rdi
2191	pxor	%xmm8,%xmm7
2192
2193	pxor	%xmm9,%xmm10
2194.byte	102,15,56,222,241
2195	pxor	%xmm9,%xmm11
2196	movdqa	%xmm10,0(%rsp)
2197.byte	102,15,56,222,249
2198	movups	48(%rbp),%xmm1
2199	pxor	%xmm9,%xmm12
2200
2201.byte	102,15,56,222,208
2202	pxor	%xmm9,%xmm13
2203	movdqa	%xmm11,16(%rsp)
2204.byte	102,15,56,222,216
2205	pxor	%xmm9,%xmm14
2206	movdqa	%xmm12,32(%rsp)
2207.byte	102,15,56,222,224
2208.byte	102,15,56,222,232
2209	pxor	%xmm9,%xmm8
2210	movdqa	%xmm14,64(%rsp)
2211.byte	102,15,56,222,240
2212.byte	102,15,56,222,248
2213	movups	64(%rbp),%xmm0
2214	movdqa	%xmm8,80(%rsp)
2215	pshufd	$0x5f,%xmm15,%xmm9
2216	jmp	.Lxts_dec_loop6
2217.align	32
2218.Lxts_dec_loop6:
2219.byte	102,15,56,222,209
2220.byte	102,15,56,222,217
2221.byte	102,15,56,222,225
2222.byte	102,15,56,222,233
2223.byte	102,15,56,222,241
2224.byte	102,15,56,222,249
2225	movups	-64(%rcx,%rax,1),%xmm1
2226	addq	$32,%rax
2227
2228.byte	102,15,56,222,208
2229.byte	102,15,56,222,216
2230.byte	102,15,56,222,224
2231.byte	102,15,56,222,232
2232.byte	102,15,56,222,240
2233.byte	102,15,56,222,248
2234	movups	-80(%rcx,%rax,1),%xmm0
2235	jnz	.Lxts_dec_loop6
2236
2237	movdqa	(%r8),%xmm8
2238	movdqa	%xmm9,%xmm14
2239	paddd	%xmm9,%xmm9
2240.byte	102,15,56,222,209
2241	paddq	%xmm15,%xmm15
2242	psrad	$31,%xmm14
2243.byte	102,15,56,222,217
2244	pand	%xmm8,%xmm14
2245	movups	(%rbp),%xmm10
2246.byte	102,15,56,222,225
2247.byte	102,15,56,222,233
2248.byte	102,15,56,222,241
2249	pxor	%xmm14,%xmm15
2250	movaps	%xmm10,%xmm11
2251.byte	102,15,56,222,249
2252	movups	-64(%rcx),%xmm1
2253
2254	movdqa	%xmm9,%xmm14
2255.byte	102,15,56,222,208
2256	paddd	%xmm9,%xmm9
2257	pxor	%xmm15,%xmm10
2258.byte	102,15,56,222,216
2259	psrad	$31,%xmm14
2260	paddq	%xmm15,%xmm15
2261.byte	102,15,56,222,224
2262.byte	102,15,56,222,232
2263	pand	%xmm8,%xmm14
2264	movaps	%xmm11,%xmm12
2265.byte	102,15,56,222,240
2266	pxor	%xmm14,%xmm15
2267	movdqa	%xmm9,%xmm14
2268.byte	102,15,56,222,248
2269	movups	-48(%rcx),%xmm0
2270
2271	paddd	%xmm9,%xmm9
2272.byte	102,15,56,222,209
2273	pxor	%xmm15,%xmm11
2274	psrad	$31,%xmm14
2275.byte	102,15,56,222,217
2276	paddq	%xmm15,%xmm15
2277	pand	%xmm8,%xmm14
2278.byte	102,15,56,222,225
2279.byte	102,15,56,222,233
2280	movdqa	%xmm13,48(%rsp)
2281	pxor	%xmm14,%xmm15
2282.byte	102,15,56,222,241
2283	movaps	%xmm12,%xmm13
2284	movdqa	%xmm9,%xmm14
2285.byte	102,15,56,222,249
2286	movups	-32(%rcx),%xmm1
2287
2288	paddd	%xmm9,%xmm9
2289.byte	102,15,56,222,208
2290	pxor	%xmm15,%xmm12
2291	psrad	$31,%xmm14
2292.byte	102,15,56,222,216
2293	paddq	%xmm15,%xmm15
2294	pand	%xmm8,%xmm14
2295.byte	102,15,56,222,224
2296.byte	102,15,56,222,232
2297.byte	102,15,56,222,240
2298	pxor	%xmm14,%xmm15
2299	movaps	%xmm13,%xmm14
2300.byte	102,15,56,222,248
2301
2302	movdqa	%xmm9,%xmm0
2303	paddd	%xmm9,%xmm9
2304.byte	102,15,56,222,209
2305	pxor	%xmm15,%xmm13
2306	psrad	$31,%xmm0
2307.byte	102,15,56,222,217
2308	paddq	%xmm15,%xmm15
2309	pand	%xmm8,%xmm0
2310.byte	102,15,56,222,225
2311.byte	102,15,56,222,233
2312	pxor	%xmm0,%xmm15
2313	movups	(%rbp),%xmm0
2314.byte	102,15,56,222,241
2315.byte	102,15,56,222,249
2316	movups	16(%rbp),%xmm1
2317
2318	pxor	%xmm15,%xmm14
2319.byte	102,15,56,223,84,36,0
2320	psrad	$31,%xmm9
2321	paddq	%xmm15,%xmm15
2322.byte	102,15,56,223,92,36,16
2323.byte	102,15,56,223,100,36,32
2324	pand	%xmm8,%xmm9
2325	movq	%r10,%rax
2326.byte	102,15,56,223,108,36,48
2327.byte	102,15,56,223,116,36,64
2328.byte	102,15,56,223,124,36,80
2329	pxor	%xmm9,%xmm15
2330
2331	leaq	96(%rsi),%rsi
2332	movups	%xmm2,-96(%rsi)
2333	movups	%xmm3,-80(%rsi)
2334	movups	%xmm4,-64(%rsi)
2335	movups	%xmm5,-48(%rsi)
2336	movups	%xmm6,-32(%rsi)
2337	movups	%xmm7,-16(%rsi)
2338	subq	$96,%rdx
2339	jnc	.Lxts_dec_grandloop
2340
2341	movl	$16+96,%eax
2342	subl	%r10d,%eax
2343	movq	%rbp,%rcx
2344	shrl	$4,%eax
2345
2346.Lxts_dec_short:
2347
2348	movl	%eax,%r10d
2349	pxor	%xmm0,%xmm10
2350	pxor	%xmm0,%xmm11
2351	addq	$96,%rdx
2352	jz	.Lxts_dec_done
2353
2354	pxor	%xmm0,%xmm12
2355	cmpq	$0x20,%rdx
2356	jb	.Lxts_dec_one
2357	pxor	%xmm0,%xmm13
2358	je	.Lxts_dec_two
2359
2360	pxor	%xmm0,%xmm14
2361	cmpq	$0x40,%rdx
2362	jb	.Lxts_dec_three
2363	je	.Lxts_dec_four
2364
2365	movdqu	(%rdi),%xmm2
2366	movdqu	16(%rdi),%xmm3
2367	movdqu	32(%rdi),%xmm4
2368	pxor	%xmm10,%xmm2
2369	movdqu	48(%rdi),%xmm5
2370	pxor	%xmm11,%xmm3
2371	movdqu	64(%rdi),%xmm6
2372	leaq	80(%rdi),%rdi
2373	pxor	%xmm12,%xmm4
2374	pxor	%xmm13,%xmm5
2375	pxor	%xmm14,%xmm6
2376
2377	call	_aesni_decrypt6
2378
2379	xorps	%xmm10,%xmm2
2380	xorps	%xmm11,%xmm3
2381	xorps	%xmm12,%xmm4
2382	movdqu	%xmm2,(%rsi)
2383	xorps	%xmm13,%xmm5
2384	movdqu	%xmm3,16(%rsi)
2385	xorps	%xmm14,%xmm6
2386	movdqu	%xmm4,32(%rsi)
2387	pxor	%xmm14,%xmm14
2388	movdqu	%xmm5,48(%rsi)
2389	pcmpgtd	%xmm15,%xmm14
2390	movdqu	%xmm6,64(%rsi)
2391	leaq	80(%rsi),%rsi
2392	pshufd	$0x13,%xmm14,%xmm11
2393	andq	$15,%r9
2394	jz	.Lxts_dec_ret
2395
2396	movdqa	%xmm15,%xmm10
2397	paddq	%xmm15,%xmm15
2398	pand	%xmm8,%xmm11
2399	pxor	%xmm15,%xmm11
2400	jmp	.Lxts_dec_done2
2401
2402.align	16
2403.Lxts_dec_one:
2404	movups	(%rdi),%xmm2
2405	leaq	16(%rdi),%rdi
2406	xorps	%xmm10,%xmm2
2407	movups	(%rcx),%xmm0
2408	movups	16(%rcx),%xmm1
2409	leaq	32(%rcx),%rcx
2410	xorps	%xmm0,%xmm2
2411.Loop_dec1_12:
2412.byte	102,15,56,222,209
2413	decl	%eax
2414	movups	(%rcx),%xmm1
2415	leaq	16(%rcx),%rcx
2416	jnz	.Loop_dec1_12
2417.byte	102,15,56,223,209
2418	xorps	%xmm10,%xmm2
2419	movdqa	%xmm11,%xmm10
2420	movups	%xmm2,(%rsi)
2421	movdqa	%xmm12,%xmm11
2422	leaq	16(%rsi),%rsi
2423	jmp	.Lxts_dec_done
2424
2425.align	16
2426.Lxts_dec_two:
2427	movups	(%rdi),%xmm2
2428	movups	16(%rdi),%xmm3
2429	leaq	32(%rdi),%rdi
2430	xorps	%xmm10,%xmm2
2431	xorps	%xmm11,%xmm3
2432
2433	call	_aesni_decrypt2
2434
2435	xorps	%xmm10,%xmm2
2436	movdqa	%xmm12,%xmm10
2437	xorps	%xmm11,%xmm3
2438	movdqa	%xmm13,%xmm11
2439	movups	%xmm2,(%rsi)
2440	movups	%xmm3,16(%rsi)
2441	leaq	32(%rsi),%rsi
2442	jmp	.Lxts_dec_done
2443
2444.align	16
2445.Lxts_dec_three:
2446	movups	(%rdi),%xmm2
2447	movups	16(%rdi),%xmm3
2448	movups	32(%rdi),%xmm4
2449	leaq	48(%rdi),%rdi
2450	xorps	%xmm10,%xmm2
2451	xorps	%xmm11,%xmm3
2452	xorps	%xmm12,%xmm4
2453
2454	call	_aesni_decrypt3
2455
2456	xorps	%xmm10,%xmm2
2457	movdqa	%xmm13,%xmm10
2458	xorps	%xmm11,%xmm3
2459	movdqa	%xmm14,%xmm11
2460	xorps	%xmm12,%xmm4
2461	movups	%xmm2,(%rsi)
2462	movups	%xmm3,16(%rsi)
2463	movups	%xmm4,32(%rsi)
2464	leaq	48(%rsi),%rsi
2465	jmp	.Lxts_dec_done
2466
2467.align	16
2468.Lxts_dec_four:
2469	movups	(%rdi),%xmm2
2470	movups	16(%rdi),%xmm3
2471	movups	32(%rdi),%xmm4
2472	xorps	%xmm10,%xmm2
2473	movups	48(%rdi),%xmm5
2474	leaq	64(%rdi),%rdi
2475	xorps	%xmm11,%xmm3
2476	xorps	%xmm12,%xmm4
2477	xorps	%xmm13,%xmm5
2478
2479	call	_aesni_decrypt4
2480
2481	pxor	%xmm10,%xmm2
2482	movdqa	%xmm14,%xmm10
2483	pxor	%xmm11,%xmm3
2484	movdqa	%xmm15,%xmm11
2485	pxor	%xmm12,%xmm4
2486	movdqu	%xmm2,(%rsi)
2487	pxor	%xmm13,%xmm5
2488	movdqu	%xmm3,16(%rsi)
2489	movdqu	%xmm4,32(%rsi)
2490	movdqu	%xmm5,48(%rsi)
2491	leaq	64(%rsi),%rsi
2492	jmp	.Lxts_dec_done
2493
2494.align	16
2495.Lxts_dec_done:
2496	andq	$15,%r9
2497	jz	.Lxts_dec_ret
2498.Lxts_dec_done2:
2499	movq	%r9,%rdx
2500	movq	%rbp,%rcx
2501	movl	%r10d,%eax
2502
2503	movups	(%rdi),%xmm2
2504	xorps	%xmm11,%xmm2
2505	movups	(%rcx),%xmm0
2506	movups	16(%rcx),%xmm1
2507	leaq	32(%rcx),%rcx
2508	xorps	%xmm0,%xmm2
2509.Loop_dec1_13:
2510.byte	102,15,56,222,209
2511	decl	%eax
2512	movups	(%rcx),%xmm1
2513	leaq	16(%rcx),%rcx
2514	jnz	.Loop_dec1_13
2515.byte	102,15,56,223,209
2516	xorps	%xmm11,%xmm2
2517	movups	%xmm2,(%rsi)
2518
2519.Lxts_dec_steal:
2520	movzbl	16(%rdi),%eax
2521	movzbl	(%rsi),%ecx
2522	leaq	1(%rdi),%rdi
2523	movb	%al,(%rsi)
2524	movb	%cl,16(%rsi)
2525	leaq	1(%rsi),%rsi
2526	subq	$1,%rdx
2527	jnz	.Lxts_dec_steal
2528
2529	subq	%r9,%rsi
2530	movq	%rbp,%rcx
2531	movl	%r10d,%eax
2532
2533	movups	(%rsi),%xmm2
2534	xorps	%xmm10,%xmm2
2535	movups	(%rcx),%xmm0
2536	movups	16(%rcx),%xmm1
2537	leaq	32(%rcx),%rcx
2538	xorps	%xmm0,%xmm2
2539.Loop_dec1_14:
2540.byte	102,15,56,222,209
2541	decl	%eax
2542	movups	(%rcx),%xmm1
2543	leaq	16(%rcx),%rcx
2544	jnz	.Loop_dec1_14
2545.byte	102,15,56,223,209
2546	xorps	%xmm10,%xmm2
2547	movups	%xmm2,(%rsi)
2548
2549.Lxts_dec_ret:
2550	xorps	%xmm0,%xmm0
2551	pxor	%xmm1,%xmm1
2552	pxor	%xmm2,%xmm2
2553	pxor	%xmm3,%xmm3
2554	pxor	%xmm4,%xmm4
2555	pxor	%xmm5,%xmm5
2556	pxor	%xmm6,%xmm6
2557	pxor	%xmm7,%xmm7
2558	movaps	%xmm0,0(%rsp)
2559	pxor	%xmm8,%xmm8
2560	movaps	%xmm0,16(%rsp)
2561	pxor	%xmm9,%xmm9
2562	movaps	%xmm0,32(%rsp)
2563	pxor	%xmm10,%xmm10
2564	movaps	%xmm0,48(%rsp)
2565	pxor	%xmm11,%xmm11
2566	movaps	%xmm0,64(%rsp)
2567	pxor	%xmm12,%xmm12
2568	movaps	%xmm0,80(%rsp)
2569	pxor	%xmm13,%xmm13
2570	movaps	%xmm0,96(%rsp)
2571	pxor	%xmm14,%xmm14
2572	pxor	%xmm15,%xmm15
2573	movq	-8(%r11),%rbp
2574.cfi_restore	%rbp
2575	leaq	(%r11),%rsp
2576.cfi_def_cfa_register	%rsp
2577.Lxts_dec_epilogue:
2578	.byte	0xf3,0xc3
2579.cfi_endproc
2580.size	aesni_xts_decrypt,.-aesni_xts_decrypt
2581.globl	aesni_ocb_encrypt
2582.type	aesni_ocb_encrypt,@function
2583.align	32
2584aesni_ocb_encrypt:
2585.cfi_startproc
2586	leaq	(%rsp),%rax
2587	pushq	%rbx
2588.cfi_adjust_cfa_offset	8
2589.cfi_offset	%rbx,-16
2590	pushq	%rbp
2591.cfi_adjust_cfa_offset	8
2592.cfi_offset	%rbp,-24
2593	pushq	%r12
2594.cfi_adjust_cfa_offset	8
2595.cfi_offset	%r12,-32
2596	pushq	%r13
2597.cfi_adjust_cfa_offset	8
2598.cfi_offset	%r13,-40
2599	pushq	%r14
2600.cfi_adjust_cfa_offset	8
2601.cfi_offset	%r14,-48
2602	movq	8(%rax),%rbx
2603	movq	8+8(%rax),%rbp
2604
2605	movl	240(%rcx),%r10d
2606	movq	%rcx,%r11
2607	shll	$4,%r10d
2608	movups	(%rcx),%xmm9
2609	movups	16(%rcx,%r10,1),%xmm1
2610
2611	movdqu	(%r9),%xmm15
2612	pxor	%xmm1,%xmm9
2613	pxor	%xmm1,%xmm15
2614
2615	movl	$16+32,%eax
2616	leaq	32(%r11,%r10,1),%rcx
2617	movups	16(%r11),%xmm1
2618	subq	%r10,%rax
2619	movq	%rax,%r10
2620
2621	movdqu	(%rbx),%xmm10
2622	movdqu	(%rbp),%xmm8
2623
2624	testq	$1,%r8
2625	jnz	.Locb_enc_odd
2626
2627	bsfq	%r8,%r12
2628	addq	$1,%r8
2629	shlq	$4,%r12
2630	movdqu	(%rbx,%r12,1),%xmm7
2631	movdqu	(%rdi),%xmm2
2632	leaq	16(%rdi),%rdi
2633
2634	call	__ocb_encrypt1
2635
2636	movdqa	%xmm7,%xmm15
2637	movups	%xmm2,(%rsi)
2638	leaq	16(%rsi),%rsi
2639	subq	$1,%rdx
2640	jz	.Locb_enc_done
2641
2642.Locb_enc_odd:
2643	leaq	1(%r8),%r12
2644	leaq	3(%r8),%r13
2645	leaq	5(%r8),%r14
2646	leaq	6(%r8),%r8
2647	bsfq	%r12,%r12
2648	bsfq	%r13,%r13
2649	bsfq	%r14,%r14
2650	shlq	$4,%r12
2651	shlq	$4,%r13
2652	shlq	$4,%r14
2653
2654	subq	$6,%rdx
2655	jc	.Locb_enc_short
2656	jmp	.Locb_enc_grandloop
2657
2658.align	32
2659.Locb_enc_grandloop:
2660	movdqu	0(%rdi),%xmm2
2661	movdqu	16(%rdi),%xmm3
2662	movdqu	32(%rdi),%xmm4
2663	movdqu	48(%rdi),%xmm5
2664	movdqu	64(%rdi),%xmm6
2665	movdqu	80(%rdi),%xmm7
2666	leaq	96(%rdi),%rdi
2667
2668	call	__ocb_encrypt6
2669
2670	movups	%xmm2,0(%rsi)
2671	movups	%xmm3,16(%rsi)
2672	movups	%xmm4,32(%rsi)
2673	movups	%xmm5,48(%rsi)
2674	movups	%xmm6,64(%rsi)
2675	movups	%xmm7,80(%rsi)
2676	leaq	96(%rsi),%rsi
2677	subq	$6,%rdx
2678	jnc	.Locb_enc_grandloop
2679
2680.Locb_enc_short:
2681	addq	$6,%rdx
2682	jz	.Locb_enc_done
2683
2684	movdqu	0(%rdi),%xmm2
2685	cmpq	$2,%rdx
2686	jb	.Locb_enc_one
2687	movdqu	16(%rdi),%xmm3
2688	je	.Locb_enc_two
2689
2690	movdqu	32(%rdi),%xmm4
2691	cmpq	$4,%rdx
2692	jb	.Locb_enc_three
2693	movdqu	48(%rdi),%xmm5
2694	je	.Locb_enc_four
2695
2696	movdqu	64(%rdi),%xmm6
2697	pxor	%xmm7,%xmm7
2698
2699	call	__ocb_encrypt6
2700
2701	movdqa	%xmm14,%xmm15
2702	movups	%xmm2,0(%rsi)
2703	movups	%xmm3,16(%rsi)
2704	movups	%xmm4,32(%rsi)
2705	movups	%xmm5,48(%rsi)
2706	movups	%xmm6,64(%rsi)
2707
2708	jmp	.Locb_enc_done
2709
2710.align	16
2711.Locb_enc_one:
2712	movdqa	%xmm10,%xmm7
2713
2714	call	__ocb_encrypt1
2715
2716	movdqa	%xmm7,%xmm15
2717	movups	%xmm2,0(%rsi)
2718	jmp	.Locb_enc_done
2719
2720.align	16
2721.Locb_enc_two:
2722	pxor	%xmm4,%xmm4
2723	pxor	%xmm5,%xmm5
2724
2725	call	__ocb_encrypt4
2726
2727	movdqa	%xmm11,%xmm15
2728	movups	%xmm2,0(%rsi)
2729	movups	%xmm3,16(%rsi)
2730
2731	jmp	.Locb_enc_done
2732
2733.align	16
2734.Locb_enc_three:
2735	pxor	%xmm5,%xmm5
2736
2737	call	__ocb_encrypt4
2738
2739	movdqa	%xmm12,%xmm15
2740	movups	%xmm2,0(%rsi)
2741	movups	%xmm3,16(%rsi)
2742	movups	%xmm4,32(%rsi)
2743
2744	jmp	.Locb_enc_done
2745
2746.align	16
2747.Locb_enc_four:
2748	call	__ocb_encrypt4
2749
2750	movdqa	%xmm13,%xmm15
2751	movups	%xmm2,0(%rsi)
2752	movups	%xmm3,16(%rsi)
2753	movups	%xmm4,32(%rsi)
2754	movups	%xmm5,48(%rsi)
2755
2756.Locb_enc_done:
2757	pxor	%xmm0,%xmm15
2758	movdqu	%xmm8,(%rbp)
2759	movdqu	%xmm15,(%r9)
2760
2761	xorps	%xmm0,%xmm0
2762	pxor	%xmm1,%xmm1
2763	pxor	%xmm2,%xmm2
2764	pxor	%xmm3,%xmm3
2765	pxor	%xmm4,%xmm4
2766	pxor	%xmm5,%xmm5
2767	pxor	%xmm6,%xmm6
2768	pxor	%xmm7,%xmm7
2769	pxor	%xmm8,%xmm8
2770	pxor	%xmm9,%xmm9
2771	pxor	%xmm10,%xmm10
2772	pxor	%xmm11,%xmm11
2773	pxor	%xmm12,%xmm12
2774	pxor	%xmm13,%xmm13
2775	pxor	%xmm14,%xmm14
2776	pxor	%xmm15,%xmm15
2777	leaq	40(%rsp),%rax
2778.cfi_def_cfa	%rax,8
2779	movq	-40(%rax),%r14
2780.cfi_restore	%r14
2781	movq	-32(%rax),%r13
2782.cfi_restore	%r13
2783	movq	-24(%rax),%r12
2784.cfi_restore	%r12
2785	movq	-16(%rax),%rbp
2786.cfi_restore	%rbp
2787	movq	-8(%rax),%rbx
2788.cfi_restore	%rbx
2789	leaq	(%rax),%rsp
2790.cfi_def_cfa_register	%rsp
2791.Locb_enc_epilogue:
2792	.byte	0xf3,0xc3
2793.cfi_endproc
2794.size	aesni_ocb_encrypt,.-aesni_ocb_encrypt
2795
2796.type	__ocb_encrypt6,@function
2797.align	32
2798__ocb_encrypt6:
2799.cfi_startproc
2800	pxor	%xmm9,%xmm15
2801	movdqu	(%rbx,%r12,1),%xmm11
2802	movdqa	%xmm10,%xmm12
2803	movdqu	(%rbx,%r13,1),%xmm13
2804	movdqa	%xmm10,%xmm14
2805	pxor	%xmm15,%xmm10
2806	movdqu	(%rbx,%r14,1),%xmm15
2807	pxor	%xmm10,%xmm11
2808	pxor	%xmm2,%xmm8
2809	pxor	%xmm10,%xmm2
2810	pxor	%xmm11,%xmm12
2811	pxor	%xmm3,%xmm8
2812	pxor	%xmm11,%xmm3
2813	pxor	%xmm12,%xmm13
2814	pxor	%xmm4,%xmm8
2815	pxor	%xmm12,%xmm4
2816	pxor	%xmm13,%xmm14
2817	pxor	%xmm5,%xmm8
2818	pxor	%xmm13,%xmm5
2819	pxor	%xmm14,%xmm15
2820	pxor	%xmm6,%xmm8
2821	pxor	%xmm14,%xmm6
2822	pxor	%xmm7,%xmm8
2823	pxor	%xmm15,%xmm7
2824	movups	32(%r11),%xmm0
2825
2826	leaq	1(%r8),%r12
2827	leaq	3(%r8),%r13
2828	leaq	5(%r8),%r14
2829	addq	$6,%r8
2830	pxor	%xmm9,%xmm10
2831	bsfq	%r12,%r12
2832	bsfq	%r13,%r13
2833	bsfq	%r14,%r14
2834
2835.byte	102,15,56,220,209
2836.byte	102,15,56,220,217
2837.byte	102,15,56,220,225
2838.byte	102,15,56,220,233
2839	pxor	%xmm9,%xmm11
2840	pxor	%xmm9,%xmm12
2841.byte	102,15,56,220,241
2842	pxor	%xmm9,%xmm13
2843	pxor	%xmm9,%xmm14
2844.byte	102,15,56,220,249
2845	movups	48(%r11),%xmm1
2846	pxor	%xmm9,%xmm15
2847
2848.byte	102,15,56,220,208
2849.byte	102,15,56,220,216
2850.byte	102,15,56,220,224
2851.byte	102,15,56,220,232
2852.byte	102,15,56,220,240
2853.byte	102,15,56,220,248
2854	movups	64(%r11),%xmm0
2855	shlq	$4,%r12
2856	shlq	$4,%r13
2857	jmp	.Locb_enc_loop6
2858
2859.align	32
2860.Locb_enc_loop6:
2861.byte	102,15,56,220,209
2862.byte	102,15,56,220,217
2863.byte	102,15,56,220,225
2864.byte	102,15,56,220,233
2865.byte	102,15,56,220,241
2866.byte	102,15,56,220,249
2867	movups	(%rcx,%rax,1),%xmm1
2868	addq	$32,%rax
2869
2870.byte	102,15,56,220,208
2871.byte	102,15,56,220,216
2872.byte	102,15,56,220,224
2873.byte	102,15,56,220,232
2874.byte	102,15,56,220,240
2875.byte	102,15,56,220,248
2876	movups	-16(%rcx,%rax,1),%xmm0
2877	jnz	.Locb_enc_loop6
2878
2879.byte	102,15,56,220,209
2880.byte	102,15,56,220,217
2881.byte	102,15,56,220,225
2882.byte	102,15,56,220,233
2883.byte	102,15,56,220,241
2884.byte	102,15,56,220,249
2885	movups	16(%r11),%xmm1
2886	shlq	$4,%r14
2887
2888.byte	102,65,15,56,221,210
2889	movdqu	(%rbx),%xmm10
2890	movq	%r10,%rax
2891.byte	102,65,15,56,221,219
2892.byte	102,65,15,56,221,228
2893.byte	102,65,15,56,221,237
2894.byte	102,65,15,56,221,246
2895.byte	102,65,15,56,221,255
2896	.byte	0xf3,0xc3
2897.cfi_endproc
2898.size	__ocb_encrypt6,.-__ocb_encrypt6
2899
2900.type	__ocb_encrypt4,@function
2901.align	32
2902__ocb_encrypt4:
2903.cfi_startproc
2904	pxor	%xmm9,%xmm15
2905	movdqu	(%rbx,%r12,1),%xmm11
2906	movdqa	%xmm10,%xmm12
2907	movdqu	(%rbx,%r13,1),%xmm13
2908	pxor	%xmm15,%xmm10
2909	pxor	%xmm10,%xmm11
2910	pxor	%xmm2,%xmm8
2911	pxor	%xmm10,%xmm2
2912	pxor	%xmm11,%xmm12
2913	pxor	%xmm3,%xmm8
2914	pxor	%xmm11,%xmm3
2915	pxor	%xmm12,%xmm13
2916	pxor	%xmm4,%xmm8
2917	pxor	%xmm12,%xmm4
2918	pxor	%xmm5,%xmm8
2919	pxor	%xmm13,%xmm5
2920	movups	32(%r11),%xmm0
2921
2922	pxor	%xmm9,%xmm10
2923	pxor	%xmm9,%xmm11
2924	pxor	%xmm9,%xmm12
2925	pxor	%xmm9,%xmm13
2926
2927.byte	102,15,56,220,209
2928.byte	102,15,56,220,217
2929.byte	102,15,56,220,225
2930.byte	102,15,56,220,233
2931	movups	48(%r11),%xmm1
2932
2933.byte	102,15,56,220,208
2934.byte	102,15,56,220,216
2935.byte	102,15,56,220,224
2936.byte	102,15,56,220,232
2937	movups	64(%r11),%xmm0
2938	jmp	.Locb_enc_loop4
2939
2940.align	32
2941.Locb_enc_loop4:
2942.byte	102,15,56,220,209
2943.byte	102,15,56,220,217
2944.byte	102,15,56,220,225
2945.byte	102,15,56,220,233
2946	movups	(%rcx,%rax,1),%xmm1
2947	addq	$32,%rax
2948
2949.byte	102,15,56,220,208
2950.byte	102,15,56,220,216
2951.byte	102,15,56,220,224
2952.byte	102,15,56,220,232
2953	movups	-16(%rcx,%rax,1),%xmm0
2954	jnz	.Locb_enc_loop4
2955
2956.byte	102,15,56,220,209
2957.byte	102,15,56,220,217
2958.byte	102,15,56,220,225
2959.byte	102,15,56,220,233
2960	movups	16(%r11),%xmm1
2961	movq	%r10,%rax
2962
2963.byte	102,65,15,56,221,210
2964.byte	102,65,15,56,221,219
2965.byte	102,65,15,56,221,228
2966.byte	102,65,15,56,221,237
2967	.byte	0xf3,0xc3
2968.cfi_endproc
2969.size	__ocb_encrypt4,.-__ocb_encrypt4
2970
2971.type	__ocb_encrypt1,@function
2972.align	32
2973__ocb_encrypt1:
2974.cfi_startproc
2975	pxor	%xmm15,%xmm7
2976	pxor	%xmm9,%xmm7
2977	pxor	%xmm2,%xmm8
2978	pxor	%xmm7,%xmm2
2979	movups	32(%r11),%xmm0
2980
2981.byte	102,15,56,220,209
2982	movups	48(%r11),%xmm1
2983	pxor	%xmm9,%xmm7
2984
2985.byte	102,15,56,220,208
2986	movups	64(%r11),%xmm0
2987	jmp	.Locb_enc_loop1
2988
2989.align	32
2990.Locb_enc_loop1:
2991.byte	102,15,56,220,209
2992	movups	(%rcx,%rax,1),%xmm1
2993	addq	$32,%rax
2994
2995.byte	102,15,56,220,208
2996	movups	-16(%rcx,%rax,1),%xmm0
2997	jnz	.Locb_enc_loop1
2998
2999.byte	102,15,56,220,209
3000	movups	16(%r11),%xmm1
3001	movq	%r10,%rax
3002
3003.byte	102,15,56,221,215
3004	.byte	0xf3,0xc3
3005.cfi_endproc
3006.size	__ocb_encrypt1,.-__ocb_encrypt1
3007
3008.globl	aesni_ocb_decrypt
3009.type	aesni_ocb_decrypt,@function
3010.align	32
3011aesni_ocb_decrypt:
3012.cfi_startproc
3013	leaq	(%rsp),%rax
3014	pushq	%rbx
3015.cfi_adjust_cfa_offset	8
3016.cfi_offset	%rbx,-16
3017	pushq	%rbp
3018.cfi_adjust_cfa_offset	8
3019.cfi_offset	%rbp,-24
3020	pushq	%r12
3021.cfi_adjust_cfa_offset	8
3022.cfi_offset	%r12,-32
3023	pushq	%r13
3024.cfi_adjust_cfa_offset	8
3025.cfi_offset	%r13,-40
3026	pushq	%r14
3027.cfi_adjust_cfa_offset	8
3028.cfi_offset	%r14,-48
3029	movq	8(%rax),%rbx
3030	movq	8+8(%rax),%rbp
3031
3032	movl	240(%rcx),%r10d
3033	movq	%rcx,%r11
3034	shll	$4,%r10d
3035	movups	(%rcx),%xmm9
3036	movups	16(%rcx,%r10,1),%xmm1
3037
3038	movdqu	(%r9),%xmm15
3039	pxor	%xmm1,%xmm9
3040	pxor	%xmm1,%xmm15
3041
3042	movl	$16+32,%eax
3043	leaq	32(%r11,%r10,1),%rcx
3044	movups	16(%r11),%xmm1
3045	subq	%r10,%rax
3046	movq	%rax,%r10
3047
3048	movdqu	(%rbx),%xmm10
3049	movdqu	(%rbp),%xmm8
3050
3051	testq	$1,%r8
3052	jnz	.Locb_dec_odd
3053
3054	bsfq	%r8,%r12
3055	addq	$1,%r8
3056	shlq	$4,%r12
3057	movdqu	(%rbx,%r12,1),%xmm7
3058	movdqu	(%rdi),%xmm2
3059	leaq	16(%rdi),%rdi
3060
3061	call	__ocb_decrypt1
3062
3063	movdqa	%xmm7,%xmm15
3064	movups	%xmm2,(%rsi)
3065	xorps	%xmm2,%xmm8
3066	leaq	16(%rsi),%rsi
3067	subq	$1,%rdx
3068	jz	.Locb_dec_done
3069
3070.Locb_dec_odd:
3071	leaq	1(%r8),%r12
3072	leaq	3(%r8),%r13
3073	leaq	5(%r8),%r14
3074	leaq	6(%r8),%r8
3075	bsfq	%r12,%r12
3076	bsfq	%r13,%r13
3077	bsfq	%r14,%r14
3078	shlq	$4,%r12
3079	shlq	$4,%r13
3080	shlq	$4,%r14
3081
3082	subq	$6,%rdx
3083	jc	.Locb_dec_short
3084	jmp	.Locb_dec_grandloop
3085
3086.align	32
3087.Locb_dec_grandloop:
3088	movdqu	0(%rdi),%xmm2
3089	movdqu	16(%rdi),%xmm3
3090	movdqu	32(%rdi),%xmm4
3091	movdqu	48(%rdi),%xmm5
3092	movdqu	64(%rdi),%xmm6
3093	movdqu	80(%rdi),%xmm7
3094	leaq	96(%rdi),%rdi
3095
3096	call	__ocb_decrypt6
3097
3098	movups	%xmm2,0(%rsi)
3099	pxor	%xmm2,%xmm8
3100	movups	%xmm3,16(%rsi)
3101	pxor	%xmm3,%xmm8
3102	movups	%xmm4,32(%rsi)
3103	pxor	%xmm4,%xmm8
3104	movups	%xmm5,48(%rsi)
3105	pxor	%xmm5,%xmm8
3106	movups	%xmm6,64(%rsi)
3107	pxor	%xmm6,%xmm8
3108	movups	%xmm7,80(%rsi)
3109	pxor	%xmm7,%xmm8
3110	leaq	96(%rsi),%rsi
3111	subq	$6,%rdx
3112	jnc	.Locb_dec_grandloop
3113
3114.Locb_dec_short:
3115	addq	$6,%rdx
3116	jz	.Locb_dec_done
3117
3118	movdqu	0(%rdi),%xmm2
3119	cmpq	$2,%rdx
3120	jb	.Locb_dec_one
3121	movdqu	16(%rdi),%xmm3
3122	je	.Locb_dec_two
3123
3124	movdqu	32(%rdi),%xmm4
3125	cmpq	$4,%rdx
3126	jb	.Locb_dec_three
3127	movdqu	48(%rdi),%xmm5
3128	je	.Locb_dec_four
3129
3130	movdqu	64(%rdi),%xmm6
3131	pxor	%xmm7,%xmm7
3132
3133	call	__ocb_decrypt6
3134
3135	movdqa	%xmm14,%xmm15
3136	movups	%xmm2,0(%rsi)
3137	pxor	%xmm2,%xmm8
3138	movups	%xmm3,16(%rsi)
3139	pxor	%xmm3,%xmm8
3140	movups	%xmm4,32(%rsi)
3141	pxor	%xmm4,%xmm8
3142	movups	%xmm5,48(%rsi)
3143	pxor	%xmm5,%xmm8
3144	movups	%xmm6,64(%rsi)
3145	pxor	%xmm6,%xmm8
3146
3147	jmp	.Locb_dec_done
3148
3149.align	16
3150.Locb_dec_one:
3151	movdqa	%xmm10,%xmm7
3152
3153	call	__ocb_decrypt1
3154
3155	movdqa	%xmm7,%xmm15
3156	movups	%xmm2,0(%rsi)
3157	xorps	%xmm2,%xmm8
3158	jmp	.Locb_dec_done
3159
3160.align	16
3161.Locb_dec_two:
3162	pxor	%xmm4,%xmm4
3163	pxor	%xmm5,%xmm5
3164
3165	call	__ocb_decrypt4
3166
3167	movdqa	%xmm11,%xmm15
3168	movups	%xmm2,0(%rsi)
3169	xorps	%xmm2,%xmm8
3170	movups	%xmm3,16(%rsi)
3171	xorps	%xmm3,%xmm8
3172
3173	jmp	.Locb_dec_done
3174
3175.align	16
3176.Locb_dec_three:
3177	pxor	%xmm5,%xmm5
3178
3179	call	__ocb_decrypt4
3180
3181	movdqa	%xmm12,%xmm15
3182	movups	%xmm2,0(%rsi)
3183	xorps	%xmm2,%xmm8
3184	movups	%xmm3,16(%rsi)
3185	xorps	%xmm3,%xmm8
3186	movups	%xmm4,32(%rsi)
3187	xorps	%xmm4,%xmm8
3188
3189	jmp	.Locb_dec_done
3190
3191.align	16
3192.Locb_dec_four:
3193	call	__ocb_decrypt4
3194
3195	movdqa	%xmm13,%xmm15
3196	movups	%xmm2,0(%rsi)
3197	pxor	%xmm2,%xmm8
3198	movups	%xmm3,16(%rsi)
3199	pxor	%xmm3,%xmm8
3200	movups	%xmm4,32(%rsi)
3201	pxor	%xmm4,%xmm8
3202	movups	%xmm5,48(%rsi)
3203	pxor	%xmm5,%xmm8
3204
3205.Locb_dec_done:
3206	pxor	%xmm0,%xmm15
3207	movdqu	%xmm8,(%rbp)
3208	movdqu	%xmm15,(%r9)
3209
3210	xorps	%xmm0,%xmm0
3211	pxor	%xmm1,%xmm1
3212	pxor	%xmm2,%xmm2
3213	pxor	%xmm3,%xmm3
3214	pxor	%xmm4,%xmm4
3215	pxor	%xmm5,%xmm5
3216	pxor	%xmm6,%xmm6
3217	pxor	%xmm7,%xmm7
3218	pxor	%xmm8,%xmm8
3219	pxor	%xmm9,%xmm9
3220	pxor	%xmm10,%xmm10
3221	pxor	%xmm11,%xmm11
3222	pxor	%xmm12,%xmm12
3223	pxor	%xmm13,%xmm13
3224	pxor	%xmm14,%xmm14
3225	pxor	%xmm15,%xmm15
3226	leaq	40(%rsp),%rax
3227.cfi_def_cfa	%rax,8
3228	movq	-40(%rax),%r14
3229.cfi_restore	%r14
3230	movq	-32(%rax),%r13
3231.cfi_restore	%r13
3232	movq	-24(%rax),%r12
3233.cfi_restore	%r12
3234	movq	-16(%rax),%rbp
3235.cfi_restore	%rbp
3236	movq	-8(%rax),%rbx
3237.cfi_restore	%rbx
3238	leaq	(%rax),%rsp
3239.cfi_def_cfa_register	%rsp
3240.Locb_dec_epilogue:
3241	.byte	0xf3,0xc3
3242.cfi_endproc
3243.size	aesni_ocb_decrypt,.-aesni_ocb_decrypt
3244
3245.type	__ocb_decrypt6,@function
3246.align	32
3247__ocb_decrypt6:
3248.cfi_startproc
3249	pxor	%xmm9,%xmm15
3250	movdqu	(%rbx,%r12,1),%xmm11
3251	movdqa	%xmm10,%xmm12
3252	movdqu	(%rbx,%r13,1),%xmm13
3253	movdqa	%xmm10,%xmm14
3254	pxor	%xmm15,%xmm10
3255	movdqu	(%rbx,%r14,1),%xmm15
3256	pxor	%xmm10,%xmm11
3257	pxor	%xmm10,%xmm2
3258	pxor	%xmm11,%xmm12
3259	pxor	%xmm11,%xmm3
3260	pxor	%xmm12,%xmm13
3261	pxor	%xmm12,%xmm4
3262	pxor	%xmm13,%xmm14
3263	pxor	%xmm13,%xmm5
3264	pxor	%xmm14,%xmm15
3265	pxor	%xmm14,%xmm6
3266	pxor	%xmm15,%xmm7
3267	movups	32(%r11),%xmm0
3268
3269	leaq	1(%r8),%r12
3270	leaq	3(%r8),%r13
3271	leaq	5(%r8),%r14
3272	addq	$6,%r8
3273	pxor	%xmm9,%xmm10
3274	bsfq	%r12,%r12
3275	bsfq	%r13,%r13
3276	bsfq	%r14,%r14
3277
3278.byte	102,15,56,222,209
3279.byte	102,15,56,222,217
3280.byte	102,15,56,222,225
3281.byte	102,15,56,222,233
3282	pxor	%xmm9,%xmm11
3283	pxor	%xmm9,%xmm12
3284.byte	102,15,56,222,241
3285	pxor	%xmm9,%xmm13
3286	pxor	%xmm9,%xmm14
3287.byte	102,15,56,222,249
3288	movups	48(%r11),%xmm1
3289	pxor	%xmm9,%xmm15
3290
3291.byte	102,15,56,222,208
3292.byte	102,15,56,222,216
3293.byte	102,15,56,222,224
3294.byte	102,15,56,222,232
3295.byte	102,15,56,222,240
3296.byte	102,15,56,222,248
3297	movups	64(%r11),%xmm0
3298	shlq	$4,%r12
3299	shlq	$4,%r13
3300	jmp	.Locb_dec_loop6
3301
3302.align	32
3303.Locb_dec_loop6:
3304.byte	102,15,56,222,209
3305.byte	102,15,56,222,217
3306.byte	102,15,56,222,225
3307.byte	102,15,56,222,233
3308.byte	102,15,56,222,241
3309.byte	102,15,56,222,249
3310	movups	(%rcx,%rax,1),%xmm1
3311	addq	$32,%rax
3312
3313.byte	102,15,56,222,208
3314.byte	102,15,56,222,216
3315.byte	102,15,56,222,224
3316.byte	102,15,56,222,232
3317.byte	102,15,56,222,240
3318.byte	102,15,56,222,248
3319	movups	-16(%rcx,%rax,1),%xmm0
3320	jnz	.Locb_dec_loop6
3321
3322.byte	102,15,56,222,209
3323.byte	102,15,56,222,217
3324.byte	102,15,56,222,225
3325.byte	102,15,56,222,233
3326.byte	102,15,56,222,241
3327.byte	102,15,56,222,249
3328	movups	16(%r11),%xmm1
3329	shlq	$4,%r14
3330
3331.byte	102,65,15,56,223,210
3332	movdqu	(%rbx),%xmm10
3333	movq	%r10,%rax
3334.byte	102,65,15,56,223,219
3335.byte	102,65,15,56,223,228
3336.byte	102,65,15,56,223,237
3337.byte	102,65,15,56,223,246
3338.byte	102,65,15,56,223,255
3339	.byte	0xf3,0xc3
3340.cfi_endproc
3341.size	__ocb_decrypt6,.-__ocb_decrypt6
3342
3343.type	__ocb_decrypt4,@function
3344.align	32
3345__ocb_decrypt4:
3346.cfi_startproc
3347	pxor	%xmm9,%xmm15
3348	movdqu	(%rbx,%r12,1),%xmm11
3349	movdqa	%xmm10,%xmm12
3350	movdqu	(%rbx,%r13,1),%xmm13
3351	pxor	%xmm15,%xmm10
3352	pxor	%xmm10,%xmm11
3353	pxor	%xmm10,%xmm2
3354	pxor	%xmm11,%xmm12
3355	pxor	%xmm11,%xmm3
3356	pxor	%xmm12,%xmm13
3357	pxor	%xmm12,%xmm4
3358	pxor	%xmm13,%xmm5
3359	movups	32(%r11),%xmm0
3360
3361	pxor	%xmm9,%xmm10
3362	pxor	%xmm9,%xmm11
3363	pxor	%xmm9,%xmm12
3364	pxor	%xmm9,%xmm13
3365
3366.byte	102,15,56,222,209
3367.byte	102,15,56,222,217
3368.byte	102,15,56,222,225
3369.byte	102,15,56,222,233
3370	movups	48(%r11),%xmm1
3371
3372.byte	102,15,56,222,208
3373.byte	102,15,56,222,216
3374.byte	102,15,56,222,224
3375.byte	102,15,56,222,232
3376	movups	64(%r11),%xmm0
3377	jmp	.Locb_dec_loop4
3378
3379.align	32
3380.Locb_dec_loop4:
3381.byte	102,15,56,222,209
3382.byte	102,15,56,222,217
3383.byte	102,15,56,222,225
3384.byte	102,15,56,222,233
3385	movups	(%rcx,%rax,1),%xmm1
3386	addq	$32,%rax
3387
3388.byte	102,15,56,222,208
3389.byte	102,15,56,222,216
3390.byte	102,15,56,222,224
3391.byte	102,15,56,222,232
3392	movups	-16(%rcx,%rax,1),%xmm0
3393	jnz	.Locb_dec_loop4
3394
3395.byte	102,15,56,222,209
3396.byte	102,15,56,222,217
3397.byte	102,15,56,222,225
3398.byte	102,15,56,222,233
3399	movups	16(%r11),%xmm1
3400	movq	%r10,%rax
3401
3402.byte	102,65,15,56,223,210
3403.byte	102,65,15,56,223,219
3404.byte	102,65,15,56,223,228
3405.byte	102,65,15,56,223,237
3406	.byte	0xf3,0xc3
3407.cfi_endproc
3408.size	__ocb_decrypt4,.-__ocb_decrypt4
3409
3410.type	__ocb_decrypt1,@function
3411.align	32
3412__ocb_decrypt1:
3413.cfi_startproc
3414	pxor	%xmm15,%xmm7
3415	pxor	%xmm9,%xmm7
3416	pxor	%xmm7,%xmm2
3417	movups	32(%r11),%xmm0
3418
3419.byte	102,15,56,222,209
3420	movups	48(%r11),%xmm1
3421	pxor	%xmm9,%xmm7
3422
3423.byte	102,15,56,222,208
3424	movups	64(%r11),%xmm0
3425	jmp	.Locb_dec_loop1
3426
3427.align	32
3428.Locb_dec_loop1:
3429.byte	102,15,56,222,209
3430	movups	(%rcx,%rax,1),%xmm1
3431	addq	$32,%rax
3432
3433.byte	102,15,56,222,208
3434	movups	-16(%rcx,%rax,1),%xmm0
3435	jnz	.Locb_dec_loop1
3436
3437.byte	102,15,56,222,209
3438	movups	16(%r11),%xmm1
3439	movq	%r10,%rax
3440
3441.byte	102,15,56,223,215
3442	.byte	0xf3,0xc3
3443.cfi_endproc
3444.size	__ocb_decrypt1,.-__ocb_decrypt1
3445.globl	aesni_cbc_encrypt
3446.type	aesni_cbc_encrypt,@function
3447.align	16
3448aesni_cbc_encrypt:
3449.cfi_startproc
3450	testq	%rdx,%rdx
3451	jz	.Lcbc_ret
3452
3453	movl	240(%rcx),%r10d
3454	movq	%rcx,%r11
3455	testl	%r9d,%r9d
3456	jz	.Lcbc_decrypt
3457
3458	movups	(%r8),%xmm2
3459	movl	%r10d,%eax
3460	cmpq	$16,%rdx
3461	jb	.Lcbc_enc_tail
3462	subq	$16,%rdx
3463	jmp	.Lcbc_enc_loop
3464.align	16
3465.Lcbc_enc_loop:
3466	movups	(%rdi),%xmm3
3467	leaq	16(%rdi),%rdi
3468
3469	movups	(%rcx),%xmm0
3470	movups	16(%rcx),%xmm1
3471	xorps	%xmm0,%xmm3
3472	leaq	32(%rcx),%rcx
3473	xorps	%xmm3,%xmm2
3474.Loop_enc1_15:
3475.byte	102,15,56,220,209
3476	decl	%eax
3477	movups	(%rcx),%xmm1
3478	leaq	16(%rcx),%rcx
3479	jnz	.Loop_enc1_15
3480.byte	102,15,56,221,209
3481	movl	%r10d,%eax
3482	movq	%r11,%rcx
3483	movups	%xmm2,0(%rsi)
3484	leaq	16(%rsi),%rsi
3485	subq	$16,%rdx
3486	jnc	.Lcbc_enc_loop
3487	addq	$16,%rdx
3488	jnz	.Lcbc_enc_tail
3489	pxor	%xmm0,%xmm0
3490	pxor	%xmm1,%xmm1
3491	movups	%xmm2,(%r8)
3492	pxor	%xmm2,%xmm2
3493	pxor	%xmm3,%xmm3
3494	jmp	.Lcbc_ret
3495
3496.Lcbc_enc_tail:
3497	movq	%rdx,%rcx
3498	xchgq	%rdi,%rsi
3499.long	0x9066A4F3
3500	movl	$16,%ecx
3501	subq	%rdx,%rcx
3502	xorl	%eax,%eax
3503.long	0x9066AAF3
3504	leaq	-16(%rdi),%rdi
3505	movl	%r10d,%eax
3506	movq	%rdi,%rsi
3507	movq	%r11,%rcx
3508	xorq	%rdx,%rdx
3509	jmp	.Lcbc_enc_loop
3510
3511.align	16
3512.Lcbc_decrypt:
3513	cmpq	$16,%rdx
3514	jne	.Lcbc_decrypt_bulk
3515
3516
3517
3518	movdqu	(%rdi),%xmm2
3519	movdqu	(%r8),%xmm3
3520	movdqa	%xmm2,%xmm4
3521	movups	(%rcx),%xmm0
3522	movups	16(%rcx),%xmm1
3523	leaq	32(%rcx),%rcx
3524	xorps	%xmm0,%xmm2
3525.Loop_dec1_16:
3526.byte	102,15,56,222,209
3527	decl	%r10d
3528	movups	(%rcx),%xmm1
3529	leaq	16(%rcx),%rcx
3530	jnz	.Loop_dec1_16
3531.byte	102,15,56,223,209
3532	pxor	%xmm0,%xmm0
3533	pxor	%xmm1,%xmm1
3534	movdqu	%xmm4,(%r8)
3535	xorps	%xmm3,%xmm2
3536	pxor	%xmm3,%xmm3
3537	movups	%xmm2,(%rsi)
3538	pxor	%xmm2,%xmm2
3539	jmp	.Lcbc_ret
3540.align	16
3541.Lcbc_decrypt_bulk:
3542	leaq	(%rsp),%r11
3543.cfi_def_cfa_register	%r11
3544	pushq	%rbp
3545.cfi_offset	%rbp,-16
3546	subq	$16,%rsp
3547	andq	$-16,%rsp
3548	movq	%rcx,%rbp
3549	movups	(%r8),%xmm10
3550	movl	%r10d,%eax
3551	cmpq	$0x50,%rdx
3552	jbe	.Lcbc_dec_tail
3553
3554	movups	(%rcx),%xmm0
3555	movdqu	0(%rdi),%xmm2
3556	movdqu	16(%rdi),%xmm3
3557	movdqa	%xmm2,%xmm11
3558	movdqu	32(%rdi),%xmm4
3559	movdqa	%xmm3,%xmm12
3560	movdqu	48(%rdi),%xmm5
3561	movdqa	%xmm4,%xmm13
3562	movdqu	64(%rdi),%xmm6
3563	movdqa	%xmm5,%xmm14
3564	movdqu	80(%rdi),%xmm7
3565	movdqa	%xmm6,%xmm15
3566	movl	OPENSSL_ia32cap_P+4(%rip),%r9d
3567	cmpq	$0x70,%rdx
3568	jbe	.Lcbc_dec_six_or_seven
3569
3570	andl	$71303168,%r9d
3571	subq	$0x50,%rdx
3572	cmpl	$4194304,%r9d
3573	je	.Lcbc_dec_loop6_enter
3574	subq	$0x20,%rdx
3575	leaq	112(%rcx),%rcx
3576	jmp	.Lcbc_dec_loop8_enter
3577.align	16
3578.Lcbc_dec_loop8:
3579	movups	%xmm9,(%rsi)
3580	leaq	16(%rsi),%rsi
3581.Lcbc_dec_loop8_enter:
3582	movdqu	96(%rdi),%xmm8
3583	pxor	%xmm0,%xmm2
3584	movdqu	112(%rdi),%xmm9
3585	pxor	%xmm0,%xmm3
3586	movups	16-112(%rcx),%xmm1
3587	pxor	%xmm0,%xmm4
3588	movq	$-1,%rbp
3589	cmpq	$0x70,%rdx
3590	pxor	%xmm0,%xmm5
3591	pxor	%xmm0,%xmm6
3592	pxor	%xmm0,%xmm7
3593	pxor	%xmm0,%xmm8
3594
3595.byte	102,15,56,222,209
3596	pxor	%xmm0,%xmm9
3597	movups	32-112(%rcx),%xmm0
3598.byte	102,15,56,222,217
3599.byte	102,15,56,222,225
3600.byte	102,15,56,222,233
3601.byte	102,15,56,222,241
3602.byte	102,15,56,222,249
3603.byte	102,68,15,56,222,193
3604	adcq	$0,%rbp
3605	andq	$128,%rbp
3606.byte	102,68,15,56,222,201
3607	addq	%rdi,%rbp
3608	movups	48-112(%rcx),%xmm1
3609.byte	102,15,56,222,208
3610.byte	102,15,56,222,216
3611.byte	102,15,56,222,224
3612.byte	102,15,56,222,232
3613.byte	102,15,56,222,240
3614.byte	102,15,56,222,248
3615.byte	102,68,15,56,222,192
3616.byte	102,68,15,56,222,200
3617	movups	64-112(%rcx),%xmm0
3618	nop
3619.byte	102,15,56,222,209
3620.byte	102,15,56,222,217
3621.byte	102,15,56,222,225
3622.byte	102,15,56,222,233
3623.byte	102,15,56,222,241
3624.byte	102,15,56,222,249
3625.byte	102,68,15,56,222,193
3626.byte	102,68,15,56,222,201
3627	movups	80-112(%rcx),%xmm1
3628	nop
3629.byte	102,15,56,222,208
3630.byte	102,15,56,222,216
3631.byte	102,15,56,222,224
3632.byte	102,15,56,222,232
3633.byte	102,15,56,222,240
3634.byte	102,15,56,222,248
3635.byte	102,68,15,56,222,192
3636.byte	102,68,15,56,222,200
3637	movups	96-112(%rcx),%xmm0
3638	nop
3639.byte	102,15,56,222,209
3640.byte	102,15,56,222,217
3641.byte	102,15,56,222,225
3642.byte	102,15,56,222,233
3643.byte	102,15,56,222,241
3644.byte	102,15,56,222,249
3645.byte	102,68,15,56,222,193
3646.byte	102,68,15,56,222,201
3647	movups	112-112(%rcx),%xmm1
3648	nop
3649.byte	102,15,56,222,208
3650.byte	102,15,56,222,216
3651.byte	102,15,56,222,224
3652.byte	102,15,56,222,232
3653.byte	102,15,56,222,240
3654.byte	102,15,56,222,248
3655.byte	102,68,15,56,222,192
3656.byte	102,68,15,56,222,200
3657	movups	128-112(%rcx),%xmm0
3658	nop
3659.byte	102,15,56,222,209
3660.byte	102,15,56,222,217
3661.byte	102,15,56,222,225
3662.byte	102,15,56,222,233
3663.byte	102,15,56,222,241
3664.byte	102,15,56,222,249
3665.byte	102,68,15,56,222,193
3666.byte	102,68,15,56,222,201
3667	movups	144-112(%rcx),%xmm1
3668	cmpl	$11,%eax
3669.byte	102,15,56,222,208
3670.byte	102,15,56,222,216
3671.byte	102,15,56,222,224
3672.byte	102,15,56,222,232
3673.byte	102,15,56,222,240
3674.byte	102,15,56,222,248
3675.byte	102,68,15,56,222,192
3676.byte	102,68,15,56,222,200
3677	movups	160-112(%rcx),%xmm0
3678	jb	.Lcbc_dec_done
3679.byte	102,15,56,222,209
3680.byte	102,15,56,222,217
3681.byte	102,15,56,222,225
3682.byte	102,15,56,222,233
3683.byte	102,15,56,222,241
3684.byte	102,15,56,222,249
3685.byte	102,68,15,56,222,193
3686.byte	102,68,15,56,222,201
3687	movups	176-112(%rcx),%xmm1
3688	nop
3689.byte	102,15,56,222,208
3690.byte	102,15,56,222,216
3691.byte	102,15,56,222,224
3692.byte	102,15,56,222,232
3693.byte	102,15,56,222,240
3694.byte	102,15,56,222,248
3695.byte	102,68,15,56,222,192
3696.byte	102,68,15,56,222,200
3697	movups	192-112(%rcx),%xmm0
3698	je	.Lcbc_dec_done
3699.byte	102,15,56,222,209
3700.byte	102,15,56,222,217
3701.byte	102,15,56,222,225
3702.byte	102,15,56,222,233
3703.byte	102,15,56,222,241
3704.byte	102,15,56,222,249
3705.byte	102,68,15,56,222,193
3706.byte	102,68,15,56,222,201
3707	movups	208-112(%rcx),%xmm1
3708	nop
3709.byte	102,15,56,222,208
3710.byte	102,15,56,222,216
3711.byte	102,15,56,222,224
3712.byte	102,15,56,222,232
3713.byte	102,15,56,222,240
3714.byte	102,15,56,222,248
3715.byte	102,68,15,56,222,192
3716.byte	102,68,15,56,222,200
3717	movups	224-112(%rcx),%xmm0
3718	jmp	.Lcbc_dec_done
3719.align	16
3720.Lcbc_dec_done:
3721.byte	102,15,56,222,209
3722.byte	102,15,56,222,217
3723	pxor	%xmm0,%xmm10
3724	pxor	%xmm0,%xmm11
3725.byte	102,15,56,222,225
3726.byte	102,15,56,222,233
3727	pxor	%xmm0,%xmm12
3728	pxor	%xmm0,%xmm13
3729.byte	102,15,56,222,241
3730.byte	102,15,56,222,249
3731	pxor	%xmm0,%xmm14
3732	pxor	%xmm0,%xmm15
3733.byte	102,68,15,56,222,193
3734.byte	102,68,15,56,222,201
3735	movdqu	80(%rdi),%xmm1
3736
3737.byte	102,65,15,56,223,210
3738	movdqu	96(%rdi),%xmm10
3739	pxor	%xmm0,%xmm1
3740.byte	102,65,15,56,223,219
3741	pxor	%xmm0,%xmm10
3742	movdqu	112(%rdi),%xmm0
3743.byte	102,65,15,56,223,228
3744	leaq	128(%rdi),%rdi
3745	movdqu	0(%rbp),%xmm11
3746.byte	102,65,15,56,223,237
3747.byte	102,65,15,56,223,246
3748	movdqu	16(%rbp),%xmm12
3749	movdqu	32(%rbp),%xmm13
3750.byte	102,65,15,56,223,255
3751.byte	102,68,15,56,223,193
3752	movdqu	48(%rbp),%xmm14
3753	movdqu	64(%rbp),%xmm15
3754.byte	102,69,15,56,223,202
3755	movdqa	%xmm0,%xmm10
3756	movdqu	80(%rbp),%xmm1
3757	movups	-112(%rcx),%xmm0
3758
3759	movups	%xmm2,(%rsi)
3760	movdqa	%xmm11,%xmm2
3761	movups	%xmm3,16(%rsi)
3762	movdqa	%xmm12,%xmm3
3763	movups	%xmm4,32(%rsi)
3764	movdqa	%xmm13,%xmm4
3765	movups	%xmm5,48(%rsi)
3766	movdqa	%xmm14,%xmm5
3767	movups	%xmm6,64(%rsi)
3768	movdqa	%xmm15,%xmm6
3769	movups	%xmm7,80(%rsi)
3770	movdqa	%xmm1,%xmm7
3771	movups	%xmm8,96(%rsi)
3772	leaq	112(%rsi),%rsi
3773
3774	subq	$0x80,%rdx
3775	ja	.Lcbc_dec_loop8
3776
3777	movaps	%xmm9,%xmm2
3778	leaq	-112(%rcx),%rcx
3779	addq	$0x70,%rdx
3780	jle	.Lcbc_dec_clear_tail_collected
3781	movups	%xmm9,(%rsi)
3782	leaq	16(%rsi),%rsi
3783	cmpq	$0x50,%rdx
3784	jbe	.Lcbc_dec_tail
3785
3786	movaps	%xmm11,%xmm2
3787.Lcbc_dec_six_or_seven:
3788	cmpq	$0x60,%rdx
3789	ja	.Lcbc_dec_seven
3790
3791	movaps	%xmm7,%xmm8
3792	call	_aesni_decrypt6
3793	pxor	%xmm10,%xmm2
3794	movaps	%xmm8,%xmm10
3795	pxor	%xmm11,%xmm3
3796	movdqu	%xmm2,(%rsi)
3797	pxor	%xmm12,%xmm4
3798	movdqu	%xmm3,16(%rsi)
3799	pxor	%xmm3,%xmm3
3800	pxor	%xmm13,%xmm5
3801	movdqu	%xmm4,32(%rsi)
3802	pxor	%xmm4,%xmm4
3803	pxor	%xmm14,%xmm6
3804	movdqu	%xmm5,48(%rsi)
3805	pxor	%xmm5,%xmm5
3806	pxor	%xmm15,%xmm7
3807	movdqu	%xmm6,64(%rsi)
3808	pxor	%xmm6,%xmm6
3809	leaq	80(%rsi),%rsi
3810	movdqa	%xmm7,%xmm2
3811	pxor	%xmm7,%xmm7
3812	jmp	.Lcbc_dec_tail_collected
3813
3814.align	16
3815.Lcbc_dec_seven:
3816	movups	96(%rdi),%xmm8
3817	xorps	%xmm9,%xmm9
3818	call	_aesni_decrypt8
3819	movups	80(%rdi),%xmm9
3820	pxor	%xmm10,%xmm2
3821	movups	96(%rdi),%xmm10
3822	pxor	%xmm11,%xmm3
3823	movdqu	%xmm2,(%rsi)
3824	pxor	%xmm12,%xmm4
3825	movdqu	%xmm3,16(%rsi)
3826	pxor	%xmm3,%xmm3
3827	pxor	%xmm13,%xmm5
3828	movdqu	%xmm4,32(%rsi)
3829	pxor	%xmm4,%xmm4
3830	pxor	%xmm14,%xmm6
3831	movdqu	%xmm5,48(%rsi)
3832	pxor	%xmm5,%xmm5
3833	pxor	%xmm15,%xmm7
3834	movdqu	%xmm6,64(%rsi)
3835	pxor	%xmm6,%xmm6
3836	pxor	%xmm9,%xmm8
3837	movdqu	%xmm7,80(%rsi)
3838	pxor	%xmm7,%xmm7
3839	leaq	96(%rsi),%rsi
3840	movdqa	%xmm8,%xmm2
3841	pxor	%xmm8,%xmm8
3842	pxor	%xmm9,%xmm9
3843	jmp	.Lcbc_dec_tail_collected
3844
3845.align	16
3846.Lcbc_dec_loop6:
3847	movups	%xmm7,(%rsi)
3848	leaq	16(%rsi),%rsi
3849	movdqu	0(%rdi),%xmm2
3850	movdqu	16(%rdi),%xmm3
3851	movdqa	%xmm2,%xmm11
3852	movdqu	32(%rdi),%xmm4
3853	movdqa	%xmm3,%xmm12
3854	movdqu	48(%rdi),%xmm5
3855	movdqa	%xmm4,%xmm13
3856	movdqu	64(%rdi),%xmm6
3857	movdqa	%xmm5,%xmm14
3858	movdqu	80(%rdi),%xmm7
3859	movdqa	%xmm6,%xmm15
3860.Lcbc_dec_loop6_enter:
3861	leaq	96(%rdi),%rdi
3862	movdqa	%xmm7,%xmm8
3863
3864	call	_aesni_decrypt6
3865
3866	pxor	%xmm10,%xmm2
3867	movdqa	%xmm8,%xmm10
3868	pxor	%xmm11,%xmm3
3869	movdqu	%xmm2,(%rsi)
3870	pxor	%xmm12,%xmm4
3871	movdqu	%xmm3,16(%rsi)
3872	pxor	%xmm13,%xmm5
3873	movdqu	%xmm4,32(%rsi)
3874	pxor	%xmm14,%xmm6
3875	movq	%rbp,%rcx
3876	movdqu	%xmm5,48(%rsi)
3877	pxor	%xmm15,%xmm7
3878	movl	%r10d,%eax
3879	movdqu	%xmm6,64(%rsi)
3880	leaq	80(%rsi),%rsi
3881	subq	$0x60,%rdx
3882	ja	.Lcbc_dec_loop6
3883
3884	movdqa	%xmm7,%xmm2
3885	addq	$0x50,%rdx
3886	jle	.Lcbc_dec_clear_tail_collected
3887	movups	%xmm7,(%rsi)
3888	leaq	16(%rsi),%rsi
3889
3890.Lcbc_dec_tail:
3891	movups	(%rdi),%xmm2
3892	subq	$0x10,%rdx
3893	jbe	.Lcbc_dec_one
3894
3895	movups	16(%rdi),%xmm3
3896	movaps	%xmm2,%xmm11
3897	subq	$0x10,%rdx
3898	jbe	.Lcbc_dec_two
3899
3900	movups	32(%rdi),%xmm4
3901	movaps	%xmm3,%xmm12
3902	subq	$0x10,%rdx
3903	jbe	.Lcbc_dec_three
3904
3905	movups	48(%rdi),%xmm5
3906	movaps	%xmm4,%xmm13
3907	subq	$0x10,%rdx
3908	jbe	.Lcbc_dec_four
3909
3910	movups	64(%rdi),%xmm6
3911	movaps	%xmm5,%xmm14
3912	movaps	%xmm6,%xmm15
3913	xorps	%xmm7,%xmm7
3914	call	_aesni_decrypt6
3915	pxor	%xmm10,%xmm2
3916	movaps	%xmm15,%xmm10
3917	pxor	%xmm11,%xmm3
3918	movdqu	%xmm2,(%rsi)
3919	pxor	%xmm12,%xmm4
3920	movdqu	%xmm3,16(%rsi)
3921	pxor	%xmm3,%xmm3
3922	pxor	%xmm13,%xmm5
3923	movdqu	%xmm4,32(%rsi)
3924	pxor	%xmm4,%xmm4
3925	pxor	%xmm14,%xmm6
3926	movdqu	%xmm5,48(%rsi)
3927	pxor	%xmm5,%xmm5
3928	leaq	64(%rsi),%rsi
3929	movdqa	%xmm6,%xmm2
3930	pxor	%xmm6,%xmm6
3931	pxor	%xmm7,%xmm7
3932	subq	$0x10,%rdx
3933	jmp	.Lcbc_dec_tail_collected
3934
3935.align	16
3936.Lcbc_dec_one:
3937	movaps	%xmm2,%xmm11
3938	movups	(%rcx),%xmm0
3939	movups	16(%rcx),%xmm1
3940	leaq	32(%rcx),%rcx
3941	xorps	%xmm0,%xmm2
3942.Loop_dec1_17:
3943.byte	102,15,56,222,209
3944	decl	%eax
3945	movups	(%rcx),%xmm1
3946	leaq	16(%rcx),%rcx
3947	jnz	.Loop_dec1_17
3948.byte	102,15,56,223,209
3949	xorps	%xmm10,%xmm2
3950	movaps	%xmm11,%xmm10
3951	jmp	.Lcbc_dec_tail_collected
3952.align	16
3953.Lcbc_dec_two:
3954	movaps	%xmm3,%xmm12
3955	call	_aesni_decrypt2
3956	pxor	%xmm10,%xmm2
3957	movaps	%xmm12,%xmm10
3958	pxor	%xmm11,%xmm3
3959	movdqu	%xmm2,(%rsi)
3960	movdqa	%xmm3,%xmm2
3961	pxor	%xmm3,%xmm3
3962	leaq	16(%rsi),%rsi
3963	jmp	.Lcbc_dec_tail_collected
3964.align	16
3965.Lcbc_dec_three:
3966	movaps	%xmm4,%xmm13
3967	call	_aesni_decrypt3
3968	pxor	%xmm10,%xmm2
3969	movaps	%xmm13,%xmm10
3970	pxor	%xmm11,%xmm3
3971	movdqu	%xmm2,(%rsi)
3972	pxor	%xmm12,%xmm4
3973	movdqu	%xmm3,16(%rsi)
3974	pxor	%xmm3,%xmm3
3975	movdqa	%xmm4,%xmm2
3976	pxor	%xmm4,%xmm4
3977	leaq	32(%rsi),%rsi
3978	jmp	.Lcbc_dec_tail_collected
3979.align	16
3980.Lcbc_dec_four:
3981	movaps	%xmm5,%xmm14
3982	call	_aesni_decrypt4
3983	pxor	%xmm10,%xmm2
3984	movaps	%xmm14,%xmm10
3985	pxor	%xmm11,%xmm3
3986	movdqu	%xmm2,(%rsi)
3987	pxor	%xmm12,%xmm4
3988	movdqu	%xmm3,16(%rsi)
3989	pxor	%xmm3,%xmm3
3990	pxor	%xmm13,%xmm5
3991	movdqu	%xmm4,32(%rsi)
3992	pxor	%xmm4,%xmm4
3993	movdqa	%xmm5,%xmm2
3994	pxor	%xmm5,%xmm5
3995	leaq	48(%rsi),%rsi
3996	jmp	.Lcbc_dec_tail_collected
3997
3998.align	16
3999.Lcbc_dec_clear_tail_collected:
4000	pxor	%xmm3,%xmm3
4001	pxor	%xmm4,%xmm4
4002	pxor	%xmm5,%xmm5
4003	pxor	%xmm6,%xmm6
4004	pxor	%xmm7,%xmm7
4005	pxor	%xmm8,%xmm8
4006	pxor	%xmm9,%xmm9
4007.Lcbc_dec_tail_collected:
4008	movups	%xmm10,(%r8)
4009	andq	$15,%rdx
4010	jnz	.Lcbc_dec_tail_partial
4011	movups	%xmm2,(%rsi)
4012	pxor	%xmm2,%xmm2
4013	jmp	.Lcbc_dec_ret
4014.align	16
4015.Lcbc_dec_tail_partial:
4016	movaps	%xmm2,(%rsp)
4017	pxor	%xmm2,%xmm2
4018	movq	$16,%rcx
4019	movq	%rsi,%rdi
4020	subq	%rdx,%rcx
4021	leaq	(%rsp),%rsi
4022.long	0x9066A4F3
4023	movdqa	%xmm2,(%rsp)
4024
4025.Lcbc_dec_ret:
4026	xorps	%xmm0,%xmm0
4027	pxor	%xmm1,%xmm1
4028	movq	-8(%r11),%rbp
4029.cfi_restore	%rbp
4030	leaq	(%r11),%rsp
4031.cfi_def_cfa_register	%rsp
4032.Lcbc_ret:
4033	.byte	0xf3,0xc3
4034.cfi_endproc
4035.size	aesni_cbc_encrypt,.-aesni_cbc_encrypt
4036.globl	aesni_set_decrypt_key
4037.type	aesni_set_decrypt_key,@function
4038.align	16
4039aesni_set_decrypt_key:
4040.cfi_startproc
4041.byte	0x48,0x83,0xEC,0x08
4042.cfi_adjust_cfa_offset	8
4043	call	__aesni_set_encrypt_key
4044	shll	$4,%esi
4045	testl	%eax,%eax
4046	jnz	.Ldec_key_ret
4047	leaq	16(%rdx,%rsi,1),%rdi
4048
4049	movups	(%rdx),%xmm0
4050	movups	(%rdi),%xmm1
4051	movups	%xmm0,(%rdi)
4052	movups	%xmm1,(%rdx)
4053	leaq	16(%rdx),%rdx
4054	leaq	-16(%rdi),%rdi
4055
4056.Ldec_key_inverse:
4057	movups	(%rdx),%xmm0
4058	movups	(%rdi),%xmm1
4059.byte	102,15,56,219,192
4060.byte	102,15,56,219,201
4061	leaq	16(%rdx),%rdx
4062	leaq	-16(%rdi),%rdi
4063	movups	%xmm0,16(%rdi)
4064	movups	%xmm1,-16(%rdx)
4065	cmpq	%rdx,%rdi
4066	ja	.Ldec_key_inverse
4067
4068	movups	(%rdx),%xmm0
4069.byte	102,15,56,219,192
4070	pxor	%xmm1,%xmm1
4071	movups	%xmm0,(%rdi)
4072	pxor	%xmm0,%xmm0
4073.Ldec_key_ret:
4074	addq	$8,%rsp
4075.cfi_adjust_cfa_offset	-8
4076	.byte	0xf3,0xc3
4077.cfi_endproc
4078.LSEH_end_set_decrypt_key:
4079.size	aesni_set_decrypt_key,.-aesni_set_decrypt_key
4080.globl	aesni_set_encrypt_key
4081.type	aesni_set_encrypt_key,@function
4082.align	16
4083aesni_set_encrypt_key:
4084__aesni_set_encrypt_key:
4085.cfi_startproc
4086.byte	0x48,0x83,0xEC,0x08
4087.cfi_adjust_cfa_offset	8
4088	movq	$-1,%rax
4089	testq	%rdi,%rdi
4090	jz	.Lenc_key_ret
4091	testq	%rdx,%rdx
4092	jz	.Lenc_key_ret
4093
4094	movl	$268437504,%r10d
4095	movups	(%rdi),%xmm0
4096	xorps	%xmm4,%xmm4
4097	andl	OPENSSL_ia32cap_P+4(%rip),%r10d
4098	leaq	16(%rdx),%rax
4099	cmpl	$256,%esi
4100	je	.L14rounds
4101	cmpl	$192,%esi
4102	je	.L12rounds
4103	cmpl	$128,%esi
4104	jne	.Lbad_keybits
4105
4106.L10rounds:
4107	movl	$9,%esi
4108	cmpl	$268435456,%r10d
4109	je	.L10rounds_alt
4110
4111	movups	%xmm0,(%rdx)
4112.byte	102,15,58,223,200,1
4113	call	.Lkey_expansion_128_cold
4114.byte	102,15,58,223,200,2
4115	call	.Lkey_expansion_128
4116.byte	102,15,58,223,200,4
4117	call	.Lkey_expansion_128
4118.byte	102,15,58,223,200,8
4119	call	.Lkey_expansion_128
4120.byte	102,15,58,223,200,16
4121	call	.Lkey_expansion_128
4122.byte	102,15,58,223,200,32
4123	call	.Lkey_expansion_128
4124.byte	102,15,58,223,200,64
4125	call	.Lkey_expansion_128
4126.byte	102,15,58,223,200,128
4127	call	.Lkey_expansion_128
4128.byte	102,15,58,223,200,27
4129	call	.Lkey_expansion_128
4130.byte	102,15,58,223,200,54
4131	call	.Lkey_expansion_128
4132	movups	%xmm0,(%rax)
4133	movl	%esi,80(%rax)
4134	xorl	%eax,%eax
4135	jmp	.Lenc_key_ret
4136
4137.align	16
4138.L10rounds_alt:
4139	movdqa	.Lkey_rotate(%rip),%xmm5
4140	movl	$8,%r10d
4141	movdqa	.Lkey_rcon1(%rip),%xmm4
4142	movdqa	%xmm0,%xmm2
4143	movdqu	%xmm0,(%rdx)
4144	jmp	.Loop_key128
4145
4146.align	16
4147.Loop_key128:
4148.byte	102,15,56,0,197
4149.byte	102,15,56,221,196
4150	pslld	$1,%xmm4
4151	leaq	16(%rax),%rax
4152
4153	movdqa	%xmm2,%xmm3
4154	pslldq	$4,%xmm2
4155	pxor	%xmm2,%xmm3
4156	pslldq	$4,%xmm2
4157	pxor	%xmm2,%xmm3
4158	pslldq	$4,%xmm2
4159	pxor	%xmm3,%xmm2
4160
4161	pxor	%xmm2,%xmm0
4162	movdqu	%xmm0,-16(%rax)
4163	movdqa	%xmm0,%xmm2
4164
4165	decl	%r10d
4166	jnz	.Loop_key128
4167
4168	movdqa	.Lkey_rcon1b(%rip),%xmm4
4169
4170.byte	102,15,56,0,197
4171.byte	102,15,56,221,196
4172	pslld	$1,%xmm4
4173
4174	movdqa	%xmm2,%xmm3
4175	pslldq	$4,%xmm2
4176	pxor	%xmm2,%xmm3
4177	pslldq	$4,%xmm2
4178	pxor	%xmm2,%xmm3
4179	pslldq	$4,%xmm2
4180	pxor	%xmm3,%xmm2
4181
4182	pxor	%xmm2,%xmm0
4183	movdqu	%xmm0,(%rax)
4184
4185	movdqa	%xmm0,%xmm2
4186.byte	102,15,56,0,197
4187.byte	102,15,56,221,196
4188
4189	movdqa	%xmm2,%xmm3
4190	pslldq	$4,%xmm2
4191	pxor	%xmm2,%xmm3
4192	pslldq	$4,%xmm2
4193	pxor	%xmm2,%xmm3
4194	pslldq	$4,%xmm2
4195	pxor	%xmm3,%xmm2
4196
4197	pxor	%xmm2,%xmm0
4198	movdqu	%xmm0,16(%rax)
4199
4200	movl	%esi,96(%rax)
4201	xorl	%eax,%eax
4202	jmp	.Lenc_key_ret
4203
4204.align	16
4205.L12rounds:
4206	movq	16(%rdi),%xmm2
4207	movl	$11,%esi
4208	cmpl	$268435456,%r10d
4209	je	.L12rounds_alt
4210
4211	movups	%xmm0,(%rdx)
4212.byte	102,15,58,223,202,1
4213	call	.Lkey_expansion_192a_cold
4214.byte	102,15,58,223,202,2
4215	call	.Lkey_expansion_192b
4216.byte	102,15,58,223,202,4
4217	call	.Lkey_expansion_192a
4218.byte	102,15,58,223,202,8
4219	call	.Lkey_expansion_192b
4220.byte	102,15,58,223,202,16
4221	call	.Lkey_expansion_192a
4222.byte	102,15,58,223,202,32
4223	call	.Lkey_expansion_192b
4224.byte	102,15,58,223,202,64
4225	call	.Lkey_expansion_192a
4226.byte	102,15,58,223,202,128
4227	call	.Lkey_expansion_192b
4228	movups	%xmm0,(%rax)
4229	movl	%esi,48(%rax)
4230	xorq	%rax,%rax
4231	jmp	.Lenc_key_ret
4232
4233.align	16
4234.L12rounds_alt:
4235	movdqa	.Lkey_rotate192(%rip),%xmm5
4236	movdqa	.Lkey_rcon1(%rip),%xmm4
4237	movl	$8,%r10d
4238	movdqu	%xmm0,(%rdx)
4239	jmp	.Loop_key192
4240
4241.align	16
4242.Loop_key192:
4243	movq	%xmm2,0(%rax)
4244	movdqa	%xmm2,%xmm1
4245.byte	102,15,56,0,213
4246.byte	102,15,56,221,212
4247	pslld	$1,%xmm4
4248	leaq	24(%rax),%rax
4249
4250	movdqa	%xmm0,%xmm3
4251	pslldq	$4,%xmm0
4252	pxor	%xmm0,%xmm3
4253	pslldq	$4,%xmm0
4254	pxor	%xmm0,%xmm3
4255	pslldq	$4,%xmm0
4256	pxor	%xmm3,%xmm0
4257
4258	pshufd	$0xff,%xmm0,%xmm3
4259	pxor	%xmm1,%xmm3
4260	pslldq	$4,%xmm1
4261	pxor	%xmm1,%xmm3
4262
4263	pxor	%xmm2,%xmm0
4264	pxor	%xmm3,%xmm2
4265	movdqu	%xmm0,-16(%rax)
4266
4267	decl	%r10d
4268	jnz	.Loop_key192
4269
4270	movl	%esi,32(%rax)
4271	xorl	%eax,%eax
4272	jmp	.Lenc_key_ret
4273
4274.align	16
4275.L14rounds:
4276	movups	16(%rdi),%xmm2
4277	movl	$13,%esi
4278	leaq	16(%rax),%rax
4279	cmpl	$268435456,%r10d
4280	je	.L14rounds_alt
4281
4282	movups	%xmm0,(%rdx)
4283	movups	%xmm2,16(%rdx)
4284.byte	102,15,58,223,202,1
4285	call	.Lkey_expansion_256a_cold
4286.byte	102,15,58,223,200,1
4287	call	.Lkey_expansion_256b
4288.byte	102,15,58,223,202,2
4289	call	.Lkey_expansion_256a
4290.byte	102,15,58,223,200,2
4291	call	.Lkey_expansion_256b
4292.byte	102,15,58,223,202,4
4293	call	.Lkey_expansion_256a
4294.byte	102,15,58,223,200,4
4295	call	.Lkey_expansion_256b
4296.byte	102,15,58,223,202,8
4297	call	.Lkey_expansion_256a
4298.byte	102,15,58,223,200,8
4299	call	.Lkey_expansion_256b
4300.byte	102,15,58,223,202,16
4301	call	.Lkey_expansion_256a
4302.byte	102,15,58,223,200,16
4303	call	.Lkey_expansion_256b
4304.byte	102,15,58,223,202,32
4305	call	.Lkey_expansion_256a
4306.byte	102,15,58,223,200,32
4307	call	.Lkey_expansion_256b
4308.byte	102,15,58,223,202,64
4309	call	.Lkey_expansion_256a
4310	movups	%xmm0,(%rax)
4311	movl	%esi,16(%rax)
4312	xorq	%rax,%rax
4313	jmp	.Lenc_key_ret
4314
4315.align	16
4316.L14rounds_alt:
4317	movdqa	.Lkey_rotate(%rip),%xmm5
4318	movdqa	.Lkey_rcon1(%rip),%xmm4
4319	movl	$7,%r10d
4320	movdqu	%xmm0,0(%rdx)
4321	movdqa	%xmm2,%xmm1
4322	movdqu	%xmm2,16(%rdx)
4323	jmp	.Loop_key256
4324
4325.align	16
4326.Loop_key256:
4327.byte	102,15,56,0,213
4328.byte	102,15,56,221,212
4329
4330	movdqa	%xmm0,%xmm3
4331	pslldq	$4,%xmm0
4332	pxor	%xmm0,%xmm3
4333	pslldq	$4,%xmm0
4334	pxor	%xmm0,%xmm3
4335	pslldq	$4,%xmm0
4336	pxor	%xmm3,%xmm0
4337	pslld	$1,%xmm4
4338
4339	pxor	%xmm2,%xmm0
4340	movdqu	%xmm0,(%rax)
4341
4342	decl	%r10d
4343	jz	.Ldone_key256
4344
4345	pshufd	$0xff,%xmm0,%xmm2
4346	pxor	%xmm3,%xmm3
4347.byte	102,15,56,221,211
4348
4349	movdqa	%xmm1,%xmm3
4350	pslldq	$4,%xmm1
4351	pxor	%xmm1,%xmm3
4352	pslldq	$4,%xmm1
4353	pxor	%xmm1,%xmm3
4354	pslldq	$4,%xmm1
4355	pxor	%xmm3,%xmm1
4356
4357	pxor	%xmm1,%xmm2
4358	movdqu	%xmm2,16(%rax)
4359	leaq	32(%rax),%rax
4360	movdqa	%xmm2,%xmm1
4361
4362	jmp	.Loop_key256
4363
4364.Ldone_key256:
4365	movl	%esi,16(%rax)
4366	xorl	%eax,%eax
4367	jmp	.Lenc_key_ret
4368
4369.align	16
4370.Lbad_keybits:
4371	movq	$-2,%rax
4372.Lenc_key_ret:
4373	pxor	%xmm0,%xmm0
4374	pxor	%xmm1,%xmm1
4375	pxor	%xmm2,%xmm2
4376	pxor	%xmm3,%xmm3
4377	pxor	%xmm4,%xmm4
4378	pxor	%xmm5,%xmm5
4379	addq	$8,%rsp
4380.cfi_adjust_cfa_offset	-8
4381	.byte	0xf3,0xc3
4382.LSEH_end_set_encrypt_key:
4383
4384.align	16
4385.Lkey_expansion_128:
4386	movups	%xmm0,(%rax)
4387	leaq	16(%rax),%rax
4388.Lkey_expansion_128_cold:
4389	shufps	$16,%xmm0,%xmm4
4390	xorps	%xmm4,%xmm0
4391	shufps	$140,%xmm0,%xmm4
4392	xorps	%xmm4,%xmm0
4393	shufps	$255,%xmm1,%xmm1
4394	xorps	%xmm1,%xmm0
4395	.byte	0xf3,0xc3
4396
4397.align	16
4398.Lkey_expansion_192a:
4399	movups	%xmm0,(%rax)
4400	leaq	16(%rax),%rax
4401.Lkey_expansion_192a_cold:
4402	movaps	%xmm2,%xmm5
4403.Lkey_expansion_192b_warm:
4404	shufps	$16,%xmm0,%xmm4
4405	movdqa	%xmm2,%xmm3
4406	xorps	%xmm4,%xmm0
4407	shufps	$140,%xmm0,%xmm4
4408	pslldq	$4,%xmm3
4409	xorps	%xmm4,%xmm0
4410	pshufd	$85,%xmm1,%xmm1
4411	pxor	%xmm3,%xmm2
4412	pxor	%xmm1,%xmm0
4413	pshufd	$255,%xmm0,%xmm3
4414	pxor	%xmm3,%xmm2
4415	.byte	0xf3,0xc3
4416
4417.align	16
4418.Lkey_expansion_192b:
4419	movaps	%xmm0,%xmm3
4420	shufps	$68,%xmm0,%xmm5
4421	movups	%xmm5,(%rax)
4422	shufps	$78,%xmm2,%xmm3
4423	movups	%xmm3,16(%rax)
4424	leaq	32(%rax),%rax
4425	jmp	.Lkey_expansion_192b_warm
4426
4427.align	16
4428.Lkey_expansion_256a:
4429	movups	%xmm2,(%rax)
4430	leaq	16(%rax),%rax
4431.Lkey_expansion_256a_cold:
4432	shufps	$16,%xmm0,%xmm4
4433	xorps	%xmm4,%xmm0
4434	shufps	$140,%xmm0,%xmm4
4435	xorps	%xmm4,%xmm0
4436	shufps	$255,%xmm1,%xmm1
4437	xorps	%xmm1,%xmm0
4438	.byte	0xf3,0xc3
4439
4440.align	16
4441.Lkey_expansion_256b:
4442	movups	%xmm0,(%rax)
4443	leaq	16(%rax),%rax
4444
4445	shufps	$16,%xmm2,%xmm4
4446	xorps	%xmm4,%xmm2
4447	shufps	$140,%xmm2,%xmm4
4448	xorps	%xmm4,%xmm2
4449	shufps	$170,%xmm1,%xmm1
4450	xorps	%xmm1,%xmm2
4451	.byte	0xf3,0xc3
4452.cfi_endproc
4453.size	aesni_set_encrypt_key,.-aesni_set_encrypt_key
4454.size	__aesni_set_encrypt_key,.-__aesni_set_encrypt_key
4455.align	64
4456.Lbswap_mask:
4457.byte	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
4458.Lincrement32:
4459.long	6,6,6,0
4460.Lincrement64:
4461.long	1,0,0,0
4462.Lxts_magic:
4463.long	0x87,0,1,0
4464.Lincrement1:
4465.byte	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
4466.Lkey_rotate:
4467.long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d
4468.Lkey_rotate192:
4469.long	0x04070605,0x04070605,0x04070605,0x04070605
4470.Lkey_rcon1:
4471.long	1,1,1,1
4472.Lkey_rcon1b:
4473.long	0x1b,0x1b,0x1b,0x1b
4474
4475.byte	65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
4476.align	64
4477