xref: /freebsd/sys/crypto/openssl/amd64/aesni-x86_64.S (revision d0b2dbfa0ecf2bbc9709efc5e20baf8e4b44bbbf)
1/* Do not modify. This file is auto-generated from aesni-x86_64.pl. */
2.text
3
4.globl	aesni_encrypt
5.type	aesni_encrypt,@function
6.align	16
7aesni_encrypt:
8.cfi_startproc
9	movups	(%rdi),%xmm2
10	movl	240(%rdx),%eax
11	movups	(%rdx),%xmm0
12	movups	16(%rdx),%xmm1
13	leaq	32(%rdx),%rdx
14	xorps	%xmm0,%xmm2
15.Loop_enc1_1:
16.byte	102,15,56,220,209
17	decl	%eax
18	movups	(%rdx),%xmm1
19	leaq	16(%rdx),%rdx
20	jnz	.Loop_enc1_1
21.byte	102,15,56,221,209
22	pxor	%xmm0,%xmm0
23	pxor	%xmm1,%xmm1
24	movups	%xmm2,(%rsi)
25	pxor	%xmm2,%xmm2
26	.byte	0xf3,0xc3
27.cfi_endproc
28.size	aesni_encrypt,.-aesni_encrypt
29
30.globl	aesni_decrypt
31.type	aesni_decrypt,@function
32.align	16
33aesni_decrypt:
34.cfi_startproc
35	movups	(%rdi),%xmm2
36	movl	240(%rdx),%eax
37	movups	(%rdx),%xmm0
38	movups	16(%rdx),%xmm1
39	leaq	32(%rdx),%rdx
40	xorps	%xmm0,%xmm2
41.Loop_dec1_2:
42.byte	102,15,56,222,209
43	decl	%eax
44	movups	(%rdx),%xmm1
45	leaq	16(%rdx),%rdx
46	jnz	.Loop_dec1_2
47.byte	102,15,56,223,209
48	pxor	%xmm0,%xmm0
49	pxor	%xmm1,%xmm1
50	movups	%xmm2,(%rsi)
51	pxor	%xmm2,%xmm2
52	.byte	0xf3,0xc3
53.cfi_endproc
54.size	aesni_decrypt, .-aesni_decrypt
55.type	_aesni_encrypt2,@function
56.align	16
57_aesni_encrypt2:
58.cfi_startproc
59	movups	(%rcx),%xmm0
60	shll	$4,%eax
61	movups	16(%rcx),%xmm1
62	xorps	%xmm0,%xmm2
63	xorps	%xmm0,%xmm3
64	movups	32(%rcx),%xmm0
65	leaq	32(%rcx,%rax,1),%rcx
66	negq	%rax
67	addq	$16,%rax
68
69.Lenc_loop2:
70.byte	102,15,56,220,209
71.byte	102,15,56,220,217
72	movups	(%rcx,%rax,1),%xmm1
73	addq	$32,%rax
74.byte	102,15,56,220,208
75.byte	102,15,56,220,216
76	movups	-16(%rcx,%rax,1),%xmm0
77	jnz	.Lenc_loop2
78
79.byte	102,15,56,220,209
80.byte	102,15,56,220,217
81.byte	102,15,56,221,208
82.byte	102,15,56,221,216
83	.byte	0xf3,0xc3
84.cfi_endproc
85.size	_aesni_encrypt2,.-_aesni_encrypt2
86.type	_aesni_decrypt2,@function
87.align	16
88_aesni_decrypt2:
89.cfi_startproc
90	movups	(%rcx),%xmm0
91	shll	$4,%eax
92	movups	16(%rcx),%xmm1
93	xorps	%xmm0,%xmm2
94	xorps	%xmm0,%xmm3
95	movups	32(%rcx),%xmm0
96	leaq	32(%rcx,%rax,1),%rcx
97	negq	%rax
98	addq	$16,%rax
99
100.Ldec_loop2:
101.byte	102,15,56,222,209
102.byte	102,15,56,222,217
103	movups	(%rcx,%rax,1),%xmm1
104	addq	$32,%rax
105.byte	102,15,56,222,208
106.byte	102,15,56,222,216
107	movups	-16(%rcx,%rax,1),%xmm0
108	jnz	.Ldec_loop2
109
110.byte	102,15,56,222,209
111.byte	102,15,56,222,217
112.byte	102,15,56,223,208
113.byte	102,15,56,223,216
114	.byte	0xf3,0xc3
115.cfi_endproc
116.size	_aesni_decrypt2,.-_aesni_decrypt2
117.type	_aesni_encrypt3,@function
118.align	16
119_aesni_encrypt3:
120.cfi_startproc
121	movups	(%rcx),%xmm0
122	shll	$4,%eax
123	movups	16(%rcx),%xmm1
124	xorps	%xmm0,%xmm2
125	xorps	%xmm0,%xmm3
126	xorps	%xmm0,%xmm4
127	movups	32(%rcx),%xmm0
128	leaq	32(%rcx,%rax,1),%rcx
129	negq	%rax
130	addq	$16,%rax
131
132.Lenc_loop3:
133.byte	102,15,56,220,209
134.byte	102,15,56,220,217
135.byte	102,15,56,220,225
136	movups	(%rcx,%rax,1),%xmm1
137	addq	$32,%rax
138.byte	102,15,56,220,208
139.byte	102,15,56,220,216
140.byte	102,15,56,220,224
141	movups	-16(%rcx,%rax,1),%xmm0
142	jnz	.Lenc_loop3
143
144.byte	102,15,56,220,209
145.byte	102,15,56,220,217
146.byte	102,15,56,220,225
147.byte	102,15,56,221,208
148.byte	102,15,56,221,216
149.byte	102,15,56,221,224
150	.byte	0xf3,0xc3
151.cfi_endproc
152.size	_aesni_encrypt3,.-_aesni_encrypt3
153.type	_aesni_decrypt3,@function
154.align	16
155_aesni_decrypt3:
156.cfi_startproc
157	movups	(%rcx),%xmm0
158	shll	$4,%eax
159	movups	16(%rcx),%xmm1
160	xorps	%xmm0,%xmm2
161	xorps	%xmm0,%xmm3
162	xorps	%xmm0,%xmm4
163	movups	32(%rcx),%xmm0
164	leaq	32(%rcx,%rax,1),%rcx
165	negq	%rax
166	addq	$16,%rax
167
168.Ldec_loop3:
169.byte	102,15,56,222,209
170.byte	102,15,56,222,217
171.byte	102,15,56,222,225
172	movups	(%rcx,%rax,1),%xmm1
173	addq	$32,%rax
174.byte	102,15,56,222,208
175.byte	102,15,56,222,216
176.byte	102,15,56,222,224
177	movups	-16(%rcx,%rax,1),%xmm0
178	jnz	.Ldec_loop3
179
180.byte	102,15,56,222,209
181.byte	102,15,56,222,217
182.byte	102,15,56,222,225
183.byte	102,15,56,223,208
184.byte	102,15,56,223,216
185.byte	102,15,56,223,224
186	.byte	0xf3,0xc3
187.cfi_endproc
188.size	_aesni_decrypt3,.-_aesni_decrypt3
189.type	_aesni_encrypt4,@function
190.align	16
191_aesni_encrypt4:
192.cfi_startproc
193	movups	(%rcx),%xmm0
194	shll	$4,%eax
195	movups	16(%rcx),%xmm1
196	xorps	%xmm0,%xmm2
197	xorps	%xmm0,%xmm3
198	xorps	%xmm0,%xmm4
199	xorps	%xmm0,%xmm5
200	movups	32(%rcx),%xmm0
201	leaq	32(%rcx,%rax,1),%rcx
202	negq	%rax
203.byte	0x0f,0x1f,0x00
204	addq	$16,%rax
205
206.Lenc_loop4:
207.byte	102,15,56,220,209
208.byte	102,15,56,220,217
209.byte	102,15,56,220,225
210.byte	102,15,56,220,233
211	movups	(%rcx,%rax,1),%xmm1
212	addq	$32,%rax
213.byte	102,15,56,220,208
214.byte	102,15,56,220,216
215.byte	102,15,56,220,224
216.byte	102,15,56,220,232
217	movups	-16(%rcx,%rax,1),%xmm0
218	jnz	.Lenc_loop4
219
220.byte	102,15,56,220,209
221.byte	102,15,56,220,217
222.byte	102,15,56,220,225
223.byte	102,15,56,220,233
224.byte	102,15,56,221,208
225.byte	102,15,56,221,216
226.byte	102,15,56,221,224
227.byte	102,15,56,221,232
228	.byte	0xf3,0xc3
229.cfi_endproc
230.size	_aesni_encrypt4,.-_aesni_encrypt4
231.type	_aesni_decrypt4,@function
232.align	16
233_aesni_decrypt4:
234.cfi_startproc
235	movups	(%rcx),%xmm0
236	shll	$4,%eax
237	movups	16(%rcx),%xmm1
238	xorps	%xmm0,%xmm2
239	xorps	%xmm0,%xmm3
240	xorps	%xmm0,%xmm4
241	xorps	%xmm0,%xmm5
242	movups	32(%rcx),%xmm0
243	leaq	32(%rcx,%rax,1),%rcx
244	negq	%rax
245.byte	0x0f,0x1f,0x00
246	addq	$16,%rax
247
248.Ldec_loop4:
249.byte	102,15,56,222,209
250.byte	102,15,56,222,217
251.byte	102,15,56,222,225
252.byte	102,15,56,222,233
253	movups	(%rcx,%rax,1),%xmm1
254	addq	$32,%rax
255.byte	102,15,56,222,208
256.byte	102,15,56,222,216
257.byte	102,15,56,222,224
258.byte	102,15,56,222,232
259	movups	-16(%rcx,%rax,1),%xmm0
260	jnz	.Ldec_loop4
261
262.byte	102,15,56,222,209
263.byte	102,15,56,222,217
264.byte	102,15,56,222,225
265.byte	102,15,56,222,233
266.byte	102,15,56,223,208
267.byte	102,15,56,223,216
268.byte	102,15,56,223,224
269.byte	102,15,56,223,232
270	.byte	0xf3,0xc3
271.cfi_endproc
272.size	_aesni_decrypt4,.-_aesni_decrypt4
273.type	_aesni_encrypt6,@function
274.align	16
275_aesni_encrypt6:
276.cfi_startproc
277	movups	(%rcx),%xmm0
278	shll	$4,%eax
279	movups	16(%rcx),%xmm1
280	xorps	%xmm0,%xmm2
281	pxor	%xmm0,%xmm3
282	pxor	%xmm0,%xmm4
283.byte	102,15,56,220,209
284	leaq	32(%rcx,%rax,1),%rcx
285	negq	%rax
286.byte	102,15,56,220,217
287	pxor	%xmm0,%xmm5
288	pxor	%xmm0,%xmm6
289.byte	102,15,56,220,225
290	pxor	%xmm0,%xmm7
291	movups	(%rcx,%rax,1),%xmm0
292	addq	$16,%rax
293	jmp	.Lenc_loop6_enter
294.align	16
295.Lenc_loop6:
296.byte	102,15,56,220,209
297.byte	102,15,56,220,217
298.byte	102,15,56,220,225
299.Lenc_loop6_enter:
300.byte	102,15,56,220,233
301.byte	102,15,56,220,241
302.byte	102,15,56,220,249
303	movups	(%rcx,%rax,1),%xmm1
304	addq	$32,%rax
305.byte	102,15,56,220,208
306.byte	102,15,56,220,216
307.byte	102,15,56,220,224
308.byte	102,15,56,220,232
309.byte	102,15,56,220,240
310.byte	102,15,56,220,248
311	movups	-16(%rcx,%rax,1),%xmm0
312	jnz	.Lenc_loop6
313
314.byte	102,15,56,220,209
315.byte	102,15,56,220,217
316.byte	102,15,56,220,225
317.byte	102,15,56,220,233
318.byte	102,15,56,220,241
319.byte	102,15,56,220,249
320.byte	102,15,56,221,208
321.byte	102,15,56,221,216
322.byte	102,15,56,221,224
323.byte	102,15,56,221,232
324.byte	102,15,56,221,240
325.byte	102,15,56,221,248
326	.byte	0xf3,0xc3
327.cfi_endproc
328.size	_aesni_encrypt6,.-_aesni_encrypt6
329.type	_aesni_decrypt6,@function
330.align	16
331_aesni_decrypt6:
332.cfi_startproc
333	movups	(%rcx),%xmm0
334	shll	$4,%eax
335	movups	16(%rcx),%xmm1
336	xorps	%xmm0,%xmm2
337	pxor	%xmm0,%xmm3
338	pxor	%xmm0,%xmm4
339.byte	102,15,56,222,209
340	leaq	32(%rcx,%rax,1),%rcx
341	negq	%rax
342.byte	102,15,56,222,217
343	pxor	%xmm0,%xmm5
344	pxor	%xmm0,%xmm6
345.byte	102,15,56,222,225
346	pxor	%xmm0,%xmm7
347	movups	(%rcx,%rax,1),%xmm0
348	addq	$16,%rax
349	jmp	.Ldec_loop6_enter
350.align	16
351.Ldec_loop6:
352.byte	102,15,56,222,209
353.byte	102,15,56,222,217
354.byte	102,15,56,222,225
355.Ldec_loop6_enter:
356.byte	102,15,56,222,233
357.byte	102,15,56,222,241
358.byte	102,15,56,222,249
359	movups	(%rcx,%rax,1),%xmm1
360	addq	$32,%rax
361.byte	102,15,56,222,208
362.byte	102,15,56,222,216
363.byte	102,15,56,222,224
364.byte	102,15,56,222,232
365.byte	102,15,56,222,240
366.byte	102,15,56,222,248
367	movups	-16(%rcx,%rax,1),%xmm0
368	jnz	.Ldec_loop6
369
370.byte	102,15,56,222,209
371.byte	102,15,56,222,217
372.byte	102,15,56,222,225
373.byte	102,15,56,222,233
374.byte	102,15,56,222,241
375.byte	102,15,56,222,249
376.byte	102,15,56,223,208
377.byte	102,15,56,223,216
378.byte	102,15,56,223,224
379.byte	102,15,56,223,232
380.byte	102,15,56,223,240
381.byte	102,15,56,223,248
382	.byte	0xf3,0xc3
383.cfi_endproc
384.size	_aesni_decrypt6,.-_aesni_decrypt6
385.type	_aesni_encrypt8,@function
386.align	16
387_aesni_encrypt8:
388.cfi_startproc
389	movups	(%rcx),%xmm0
390	shll	$4,%eax
391	movups	16(%rcx),%xmm1
392	xorps	%xmm0,%xmm2
393	xorps	%xmm0,%xmm3
394	pxor	%xmm0,%xmm4
395	pxor	%xmm0,%xmm5
396	pxor	%xmm0,%xmm6
397	leaq	32(%rcx,%rax,1),%rcx
398	negq	%rax
399.byte	102,15,56,220,209
400	pxor	%xmm0,%xmm7
401	pxor	%xmm0,%xmm8
402.byte	102,15,56,220,217
403	pxor	%xmm0,%xmm9
404	movups	(%rcx,%rax,1),%xmm0
405	addq	$16,%rax
406	jmp	.Lenc_loop8_inner
407.align	16
408.Lenc_loop8:
409.byte	102,15,56,220,209
410.byte	102,15,56,220,217
411.Lenc_loop8_inner:
412.byte	102,15,56,220,225
413.byte	102,15,56,220,233
414.byte	102,15,56,220,241
415.byte	102,15,56,220,249
416.byte	102,68,15,56,220,193
417.byte	102,68,15,56,220,201
418.Lenc_loop8_enter:
419	movups	(%rcx,%rax,1),%xmm1
420	addq	$32,%rax
421.byte	102,15,56,220,208
422.byte	102,15,56,220,216
423.byte	102,15,56,220,224
424.byte	102,15,56,220,232
425.byte	102,15,56,220,240
426.byte	102,15,56,220,248
427.byte	102,68,15,56,220,192
428.byte	102,68,15,56,220,200
429	movups	-16(%rcx,%rax,1),%xmm0
430	jnz	.Lenc_loop8
431
432.byte	102,15,56,220,209
433.byte	102,15,56,220,217
434.byte	102,15,56,220,225
435.byte	102,15,56,220,233
436.byte	102,15,56,220,241
437.byte	102,15,56,220,249
438.byte	102,68,15,56,220,193
439.byte	102,68,15,56,220,201
440.byte	102,15,56,221,208
441.byte	102,15,56,221,216
442.byte	102,15,56,221,224
443.byte	102,15,56,221,232
444.byte	102,15,56,221,240
445.byte	102,15,56,221,248
446.byte	102,68,15,56,221,192
447.byte	102,68,15,56,221,200
448	.byte	0xf3,0xc3
449.cfi_endproc
450.size	_aesni_encrypt8,.-_aesni_encrypt8
451.type	_aesni_decrypt8,@function
452.align	16
453_aesni_decrypt8:
454.cfi_startproc
455	movups	(%rcx),%xmm0
456	shll	$4,%eax
457	movups	16(%rcx),%xmm1
458	xorps	%xmm0,%xmm2
459	xorps	%xmm0,%xmm3
460	pxor	%xmm0,%xmm4
461	pxor	%xmm0,%xmm5
462	pxor	%xmm0,%xmm6
463	leaq	32(%rcx,%rax,1),%rcx
464	negq	%rax
465.byte	102,15,56,222,209
466	pxor	%xmm0,%xmm7
467	pxor	%xmm0,%xmm8
468.byte	102,15,56,222,217
469	pxor	%xmm0,%xmm9
470	movups	(%rcx,%rax,1),%xmm0
471	addq	$16,%rax
472	jmp	.Ldec_loop8_inner
473.align	16
474.Ldec_loop8:
475.byte	102,15,56,222,209
476.byte	102,15,56,222,217
477.Ldec_loop8_inner:
478.byte	102,15,56,222,225
479.byte	102,15,56,222,233
480.byte	102,15,56,222,241
481.byte	102,15,56,222,249
482.byte	102,68,15,56,222,193
483.byte	102,68,15,56,222,201
484.Ldec_loop8_enter:
485	movups	(%rcx,%rax,1),%xmm1
486	addq	$32,%rax
487.byte	102,15,56,222,208
488.byte	102,15,56,222,216
489.byte	102,15,56,222,224
490.byte	102,15,56,222,232
491.byte	102,15,56,222,240
492.byte	102,15,56,222,248
493.byte	102,68,15,56,222,192
494.byte	102,68,15,56,222,200
495	movups	-16(%rcx,%rax,1),%xmm0
496	jnz	.Ldec_loop8
497
498.byte	102,15,56,222,209
499.byte	102,15,56,222,217
500.byte	102,15,56,222,225
501.byte	102,15,56,222,233
502.byte	102,15,56,222,241
503.byte	102,15,56,222,249
504.byte	102,68,15,56,222,193
505.byte	102,68,15,56,222,201
506.byte	102,15,56,223,208
507.byte	102,15,56,223,216
508.byte	102,15,56,223,224
509.byte	102,15,56,223,232
510.byte	102,15,56,223,240
511.byte	102,15,56,223,248
512.byte	102,68,15,56,223,192
513.byte	102,68,15,56,223,200
514	.byte	0xf3,0xc3
515.cfi_endproc
516.size	_aesni_decrypt8,.-_aesni_decrypt8
517.globl	aesni_ecb_encrypt
518.type	aesni_ecb_encrypt,@function
519.align	16
520aesni_ecb_encrypt:
521.cfi_startproc
522	andq	$-16,%rdx
523	jz	.Lecb_ret
524
525	movl	240(%rcx),%eax
526	movups	(%rcx),%xmm0
527	movq	%rcx,%r11
528	movl	%eax,%r10d
529	testl	%r8d,%r8d
530	jz	.Lecb_decrypt
531
532	cmpq	$0x80,%rdx
533	jb	.Lecb_enc_tail
534
535	movdqu	(%rdi),%xmm2
536	movdqu	16(%rdi),%xmm3
537	movdqu	32(%rdi),%xmm4
538	movdqu	48(%rdi),%xmm5
539	movdqu	64(%rdi),%xmm6
540	movdqu	80(%rdi),%xmm7
541	movdqu	96(%rdi),%xmm8
542	movdqu	112(%rdi),%xmm9
543	leaq	128(%rdi),%rdi
544	subq	$0x80,%rdx
545	jmp	.Lecb_enc_loop8_enter
546.align	16
547.Lecb_enc_loop8:
548	movups	%xmm2,(%rsi)
549	movq	%r11,%rcx
550	movdqu	(%rdi),%xmm2
551	movl	%r10d,%eax
552	movups	%xmm3,16(%rsi)
553	movdqu	16(%rdi),%xmm3
554	movups	%xmm4,32(%rsi)
555	movdqu	32(%rdi),%xmm4
556	movups	%xmm5,48(%rsi)
557	movdqu	48(%rdi),%xmm5
558	movups	%xmm6,64(%rsi)
559	movdqu	64(%rdi),%xmm6
560	movups	%xmm7,80(%rsi)
561	movdqu	80(%rdi),%xmm7
562	movups	%xmm8,96(%rsi)
563	movdqu	96(%rdi),%xmm8
564	movups	%xmm9,112(%rsi)
565	leaq	128(%rsi),%rsi
566	movdqu	112(%rdi),%xmm9
567	leaq	128(%rdi),%rdi
568.Lecb_enc_loop8_enter:
569
570	call	_aesni_encrypt8
571
572	subq	$0x80,%rdx
573	jnc	.Lecb_enc_loop8
574
575	movups	%xmm2,(%rsi)
576	movq	%r11,%rcx
577	movups	%xmm3,16(%rsi)
578	movl	%r10d,%eax
579	movups	%xmm4,32(%rsi)
580	movups	%xmm5,48(%rsi)
581	movups	%xmm6,64(%rsi)
582	movups	%xmm7,80(%rsi)
583	movups	%xmm8,96(%rsi)
584	movups	%xmm9,112(%rsi)
585	leaq	128(%rsi),%rsi
586	addq	$0x80,%rdx
587	jz	.Lecb_ret
588
589.Lecb_enc_tail:
590	movups	(%rdi),%xmm2
591	cmpq	$0x20,%rdx
592	jb	.Lecb_enc_one
593	movups	16(%rdi),%xmm3
594	je	.Lecb_enc_two
595	movups	32(%rdi),%xmm4
596	cmpq	$0x40,%rdx
597	jb	.Lecb_enc_three
598	movups	48(%rdi),%xmm5
599	je	.Lecb_enc_four
600	movups	64(%rdi),%xmm6
601	cmpq	$0x60,%rdx
602	jb	.Lecb_enc_five
603	movups	80(%rdi),%xmm7
604	je	.Lecb_enc_six
605	movdqu	96(%rdi),%xmm8
606	xorps	%xmm9,%xmm9
607	call	_aesni_encrypt8
608	movups	%xmm2,(%rsi)
609	movups	%xmm3,16(%rsi)
610	movups	%xmm4,32(%rsi)
611	movups	%xmm5,48(%rsi)
612	movups	%xmm6,64(%rsi)
613	movups	%xmm7,80(%rsi)
614	movups	%xmm8,96(%rsi)
615	jmp	.Lecb_ret
616.align	16
617.Lecb_enc_one:
618	movups	(%rcx),%xmm0
619	movups	16(%rcx),%xmm1
620	leaq	32(%rcx),%rcx
621	xorps	%xmm0,%xmm2
622.Loop_enc1_3:
623.byte	102,15,56,220,209
624	decl	%eax
625	movups	(%rcx),%xmm1
626	leaq	16(%rcx),%rcx
627	jnz	.Loop_enc1_3
628.byte	102,15,56,221,209
629	movups	%xmm2,(%rsi)
630	jmp	.Lecb_ret
631.align	16
632.Lecb_enc_two:
633	call	_aesni_encrypt2
634	movups	%xmm2,(%rsi)
635	movups	%xmm3,16(%rsi)
636	jmp	.Lecb_ret
637.align	16
638.Lecb_enc_three:
639	call	_aesni_encrypt3
640	movups	%xmm2,(%rsi)
641	movups	%xmm3,16(%rsi)
642	movups	%xmm4,32(%rsi)
643	jmp	.Lecb_ret
644.align	16
645.Lecb_enc_four:
646	call	_aesni_encrypt4
647	movups	%xmm2,(%rsi)
648	movups	%xmm3,16(%rsi)
649	movups	%xmm4,32(%rsi)
650	movups	%xmm5,48(%rsi)
651	jmp	.Lecb_ret
652.align	16
653.Lecb_enc_five:
654	xorps	%xmm7,%xmm7
655	call	_aesni_encrypt6
656	movups	%xmm2,(%rsi)
657	movups	%xmm3,16(%rsi)
658	movups	%xmm4,32(%rsi)
659	movups	%xmm5,48(%rsi)
660	movups	%xmm6,64(%rsi)
661	jmp	.Lecb_ret
662.align	16
663.Lecb_enc_six:
664	call	_aesni_encrypt6
665	movups	%xmm2,(%rsi)
666	movups	%xmm3,16(%rsi)
667	movups	%xmm4,32(%rsi)
668	movups	%xmm5,48(%rsi)
669	movups	%xmm6,64(%rsi)
670	movups	%xmm7,80(%rsi)
671	jmp	.Lecb_ret
672
673.align	16
674.Lecb_decrypt:
675	cmpq	$0x80,%rdx
676	jb	.Lecb_dec_tail
677
678	movdqu	(%rdi),%xmm2
679	movdqu	16(%rdi),%xmm3
680	movdqu	32(%rdi),%xmm4
681	movdqu	48(%rdi),%xmm5
682	movdqu	64(%rdi),%xmm6
683	movdqu	80(%rdi),%xmm7
684	movdqu	96(%rdi),%xmm8
685	movdqu	112(%rdi),%xmm9
686	leaq	128(%rdi),%rdi
687	subq	$0x80,%rdx
688	jmp	.Lecb_dec_loop8_enter
689.align	16
690.Lecb_dec_loop8:
691	movups	%xmm2,(%rsi)
692	movq	%r11,%rcx
693	movdqu	(%rdi),%xmm2
694	movl	%r10d,%eax
695	movups	%xmm3,16(%rsi)
696	movdqu	16(%rdi),%xmm3
697	movups	%xmm4,32(%rsi)
698	movdqu	32(%rdi),%xmm4
699	movups	%xmm5,48(%rsi)
700	movdqu	48(%rdi),%xmm5
701	movups	%xmm6,64(%rsi)
702	movdqu	64(%rdi),%xmm6
703	movups	%xmm7,80(%rsi)
704	movdqu	80(%rdi),%xmm7
705	movups	%xmm8,96(%rsi)
706	movdqu	96(%rdi),%xmm8
707	movups	%xmm9,112(%rsi)
708	leaq	128(%rsi),%rsi
709	movdqu	112(%rdi),%xmm9
710	leaq	128(%rdi),%rdi
711.Lecb_dec_loop8_enter:
712
713	call	_aesni_decrypt8
714
715	movups	(%r11),%xmm0
716	subq	$0x80,%rdx
717	jnc	.Lecb_dec_loop8
718
719	movups	%xmm2,(%rsi)
720	pxor	%xmm2,%xmm2
721	movq	%r11,%rcx
722	movups	%xmm3,16(%rsi)
723	pxor	%xmm3,%xmm3
724	movl	%r10d,%eax
725	movups	%xmm4,32(%rsi)
726	pxor	%xmm4,%xmm4
727	movups	%xmm5,48(%rsi)
728	pxor	%xmm5,%xmm5
729	movups	%xmm6,64(%rsi)
730	pxor	%xmm6,%xmm6
731	movups	%xmm7,80(%rsi)
732	pxor	%xmm7,%xmm7
733	movups	%xmm8,96(%rsi)
734	pxor	%xmm8,%xmm8
735	movups	%xmm9,112(%rsi)
736	pxor	%xmm9,%xmm9
737	leaq	128(%rsi),%rsi
738	addq	$0x80,%rdx
739	jz	.Lecb_ret
740
741.Lecb_dec_tail:
742	movups	(%rdi),%xmm2
743	cmpq	$0x20,%rdx
744	jb	.Lecb_dec_one
745	movups	16(%rdi),%xmm3
746	je	.Lecb_dec_two
747	movups	32(%rdi),%xmm4
748	cmpq	$0x40,%rdx
749	jb	.Lecb_dec_three
750	movups	48(%rdi),%xmm5
751	je	.Lecb_dec_four
752	movups	64(%rdi),%xmm6
753	cmpq	$0x60,%rdx
754	jb	.Lecb_dec_five
755	movups	80(%rdi),%xmm7
756	je	.Lecb_dec_six
757	movups	96(%rdi),%xmm8
758	movups	(%rcx),%xmm0
759	xorps	%xmm9,%xmm9
760	call	_aesni_decrypt8
761	movups	%xmm2,(%rsi)
762	pxor	%xmm2,%xmm2
763	movups	%xmm3,16(%rsi)
764	pxor	%xmm3,%xmm3
765	movups	%xmm4,32(%rsi)
766	pxor	%xmm4,%xmm4
767	movups	%xmm5,48(%rsi)
768	pxor	%xmm5,%xmm5
769	movups	%xmm6,64(%rsi)
770	pxor	%xmm6,%xmm6
771	movups	%xmm7,80(%rsi)
772	pxor	%xmm7,%xmm7
773	movups	%xmm8,96(%rsi)
774	pxor	%xmm8,%xmm8
775	pxor	%xmm9,%xmm9
776	jmp	.Lecb_ret
777.align	16
778.Lecb_dec_one:
779	movups	(%rcx),%xmm0
780	movups	16(%rcx),%xmm1
781	leaq	32(%rcx),%rcx
782	xorps	%xmm0,%xmm2
783.Loop_dec1_4:
784.byte	102,15,56,222,209
785	decl	%eax
786	movups	(%rcx),%xmm1
787	leaq	16(%rcx),%rcx
788	jnz	.Loop_dec1_4
789.byte	102,15,56,223,209
790	movups	%xmm2,(%rsi)
791	pxor	%xmm2,%xmm2
792	jmp	.Lecb_ret
793.align	16
794.Lecb_dec_two:
795	call	_aesni_decrypt2
796	movups	%xmm2,(%rsi)
797	pxor	%xmm2,%xmm2
798	movups	%xmm3,16(%rsi)
799	pxor	%xmm3,%xmm3
800	jmp	.Lecb_ret
801.align	16
802.Lecb_dec_three:
803	call	_aesni_decrypt3
804	movups	%xmm2,(%rsi)
805	pxor	%xmm2,%xmm2
806	movups	%xmm3,16(%rsi)
807	pxor	%xmm3,%xmm3
808	movups	%xmm4,32(%rsi)
809	pxor	%xmm4,%xmm4
810	jmp	.Lecb_ret
811.align	16
812.Lecb_dec_four:
813	call	_aesni_decrypt4
814	movups	%xmm2,(%rsi)
815	pxor	%xmm2,%xmm2
816	movups	%xmm3,16(%rsi)
817	pxor	%xmm3,%xmm3
818	movups	%xmm4,32(%rsi)
819	pxor	%xmm4,%xmm4
820	movups	%xmm5,48(%rsi)
821	pxor	%xmm5,%xmm5
822	jmp	.Lecb_ret
823.align	16
824.Lecb_dec_five:
825	xorps	%xmm7,%xmm7
826	call	_aesni_decrypt6
827	movups	%xmm2,(%rsi)
828	pxor	%xmm2,%xmm2
829	movups	%xmm3,16(%rsi)
830	pxor	%xmm3,%xmm3
831	movups	%xmm4,32(%rsi)
832	pxor	%xmm4,%xmm4
833	movups	%xmm5,48(%rsi)
834	pxor	%xmm5,%xmm5
835	movups	%xmm6,64(%rsi)
836	pxor	%xmm6,%xmm6
837	pxor	%xmm7,%xmm7
838	jmp	.Lecb_ret
839.align	16
840.Lecb_dec_six:
841	call	_aesni_decrypt6
842	movups	%xmm2,(%rsi)
843	pxor	%xmm2,%xmm2
844	movups	%xmm3,16(%rsi)
845	pxor	%xmm3,%xmm3
846	movups	%xmm4,32(%rsi)
847	pxor	%xmm4,%xmm4
848	movups	%xmm5,48(%rsi)
849	pxor	%xmm5,%xmm5
850	movups	%xmm6,64(%rsi)
851	pxor	%xmm6,%xmm6
852	movups	%xmm7,80(%rsi)
853	pxor	%xmm7,%xmm7
854
855.Lecb_ret:
856	xorps	%xmm0,%xmm0
857	pxor	%xmm1,%xmm1
858	.byte	0xf3,0xc3
859.cfi_endproc
860.size	aesni_ecb_encrypt,.-aesni_ecb_encrypt
861.globl	aesni_ccm64_encrypt_blocks
862.type	aesni_ccm64_encrypt_blocks,@function
863.align	16
864aesni_ccm64_encrypt_blocks:
865.cfi_startproc
866	movl	240(%rcx),%eax
867	movdqu	(%r8),%xmm6
868	movdqa	.Lincrement64(%rip),%xmm9
869	movdqa	.Lbswap_mask(%rip),%xmm7
870
871	shll	$4,%eax
872	movl	$16,%r10d
873	leaq	0(%rcx),%r11
874	movdqu	(%r9),%xmm3
875	movdqa	%xmm6,%xmm2
876	leaq	32(%rcx,%rax,1),%rcx
877.byte	102,15,56,0,247
878	subq	%rax,%r10
879	jmp	.Lccm64_enc_outer
880.align	16
881.Lccm64_enc_outer:
882	movups	(%r11),%xmm0
883	movq	%r10,%rax
884	movups	(%rdi),%xmm8
885
886	xorps	%xmm0,%xmm2
887	movups	16(%r11),%xmm1
888	xorps	%xmm8,%xmm0
889	xorps	%xmm0,%xmm3
890	movups	32(%r11),%xmm0
891
892.Lccm64_enc2_loop:
893.byte	102,15,56,220,209
894.byte	102,15,56,220,217
895	movups	(%rcx,%rax,1),%xmm1
896	addq	$32,%rax
897.byte	102,15,56,220,208
898.byte	102,15,56,220,216
899	movups	-16(%rcx,%rax,1),%xmm0
900	jnz	.Lccm64_enc2_loop
901.byte	102,15,56,220,209
902.byte	102,15,56,220,217
903	paddq	%xmm9,%xmm6
904	decq	%rdx
905.byte	102,15,56,221,208
906.byte	102,15,56,221,216
907
908	leaq	16(%rdi),%rdi
909	xorps	%xmm2,%xmm8
910	movdqa	%xmm6,%xmm2
911	movups	%xmm8,(%rsi)
912.byte	102,15,56,0,215
913	leaq	16(%rsi),%rsi
914	jnz	.Lccm64_enc_outer
915
916	pxor	%xmm0,%xmm0
917	pxor	%xmm1,%xmm1
918	pxor	%xmm2,%xmm2
919	movups	%xmm3,(%r9)
920	pxor	%xmm3,%xmm3
921	pxor	%xmm8,%xmm8
922	pxor	%xmm6,%xmm6
923	.byte	0xf3,0xc3
924.cfi_endproc
925.size	aesni_ccm64_encrypt_blocks,.-aesni_ccm64_encrypt_blocks
926.globl	aesni_ccm64_decrypt_blocks
927.type	aesni_ccm64_decrypt_blocks,@function
928.align	16
929aesni_ccm64_decrypt_blocks:
930.cfi_startproc
931	movl	240(%rcx),%eax
932	movups	(%r8),%xmm6
933	movdqu	(%r9),%xmm3
934	movdqa	.Lincrement64(%rip),%xmm9
935	movdqa	.Lbswap_mask(%rip),%xmm7
936
937	movaps	%xmm6,%xmm2
938	movl	%eax,%r10d
939	movq	%rcx,%r11
940.byte	102,15,56,0,247
941	movups	(%rcx),%xmm0
942	movups	16(%rcx),%xmm1
943	leaq	32(%rcx),%rcx
944	xorps	%xmm0,%xmm2
945.Loop_enc1_5:
946.byte	102,15,56,220,209
947	decl	%eax
948	movups	(%rcx),%xmm1
949	leaq	16(%rcx),%rcx
950	jnz	.Loop_enc1_5
951.byte	102,15,56,221,209
952	shll	$4,%r10d
953	movl	$16,%eax
954	movups	(%rdi),%xmm8
955	paddq	%xmm9,%xmm6
956	leaq	16(%rdi),%rdi
957	subq	%r10,%rax
958	leaq	32(%r11,%r10,1),%rcx
959	movq	%rax,%r10
960	jmp	.Lccm64_dec_outer
961.align	16
962.Lccm64_dec_outer:
963	xorps	%xmm2,%xmm8
964	movdqa	%xmm6,%xmm2
965	movups	%xmm8,(%rsi)
966	leaq	16(%rsi),%rsi
967.byte	102,15,56,0,215
968
969	subq	$1,%rdx
970	jz	.Lccm64_dec_break
971
972	movups	(%r11),%xmm0
973	movq	%r10,%rax
974	movups	16(%r11),%xmm1
975	xorps	%xmm0,%xmm8
976	xorps	%xmm0,%xmm2
977	xorps	%xmm8,%xmm3
978	movups	32(%r11),%xmm0
979	jmp	.Lccm64_dec2_loop
980.align	16
981.Lccm64_dec2_loop:
982.byte	102,15,56,220,209
983.byte	102,15,56,220,217
984	movups	(%rcx,%rax,1),%xmm1
985	addq	$32,%rax
986.byte	102,15,56,220,208
987.byte	102,15,56,220,216
988	movups	-16(%rcx,%rax,1),%xmm0
989	jnz	.Lccm64_dec2_loop
990	movups	(%rdi),%xmm8
991	paddq	%xmm9,%xmm6
992.byte	102,15,56,220,209
993.byte	102,15,56,220,217
994.byte	102,15,56,221,208
995.byte	102,15,56,221,216
996	leaq	16(%rdi),%rdi
997	jmp	.Lccm64_dec_outer
998
999.align	16
1000.Lccm64_dec_break:
1001
1002	movl	240(%r11),%eax
1003	movups	(%r11),%xmm0
1004	movups	16(%r11),%xmm1
1005	xorps	%xmm0,%xmm8
1006	leaq	32(%r11),%r11
1007	xorps	%xmm8,%xmm3
1008.Loop_enc1_6:
1009.byte	102,15,56,220,217
1010	decl	%eax
1011	movups	(%r11),%xmm1
1012	leaq	16(%r11),%r11
1013	jnz	.Loop_enc1_6
1014.byte	102,15,56,221,217
1015	pxor	%xmm0,%xmm0
1016	pxor	%xmm1,%xmm1
1017	pxor	%xmm2,%xmm2
1018	movups	%xmm3,(%r9)
1019	pxor	%xmm3,%xmm3
1020	pxor	%xmm8,%xmm8
1021	pxor	%xmm6,%xmm6
1022	.byte	0xf3,0xc3
1023.cfi_endproc
1024.size	aesni_ccm64_decrypt_blocks,.-aesni_ccm64_decrypt_blocks
1025.globl	aesni_ctr32_encrypt_blocks
1026.type	aesni_ctr32_encrypt_blocks,@function
1027.align	16
1028aesni_ctr32_encrypt_blocks:
1029.cfi_startproc
1030	cmpq	$1,%rdx
1031	jne	.Lctr32_bulk
1032
1033
1034
1035	movups	(%r8),%xmm2
1036	movups	(%rdi),%xmm3
1037	movl	240(%rcx),%edx
1038	movups	(%rcx),%xmm0
1039	movups	16(%rcx),%xmm1
1040	leaq	32(%rcx),%rcx
1041	xorps	%xmm0,%xmm2
1042.Loop_enc1_7:
1043.byte	102,15,56,220,209
1044	decl	%edx
1045	movups	(%rcx),%xmm1
1046	leaq	16(%rcx),%rcx
1047	jnz	.Loop_enc1_7
1048.byte	102,15,56,221,209
1049	pxor	%xmm0,%xmm0
1050	pxor	%xmm1,%xmm1
1051	xorps	%xmm3,%xmm2
1052	pxor	%xmm3,%xmm3
1053	movups	%xmm2,(%rsi)
1054	xorps	%xmm2,%xmm2
1055	jmp	.Lctr32_epilogue
1056
1057.align	16
1058.Lctr32_bulk:
1059	leaq	(%rsp),%r11
1060.cfi_def_cfa_register	%r11
1061	pushq	%rbp
1062.cfi_offset	%rbp,-16
1063	subq	$128,%rsp
1064	andq	$-16,%rsp
1065
1066
1067
1068
1069	movdqu	(%r8),%xmm2
1070	movdqu	(%rcx),%xmm0
1071	movl	12(%r8),%r8d
1072	pxor	%xmm0,%xmm2
1073	movl	12(%rcx),%ebp
1074	movdqa	%xmm2,0(%rsp)
1075	bswapl	%r8d
1076	movdqa	%xmm2,%xmm3
1077	movdqa	%xmm2,%xmm4
1078	movdqa	%xmm2,%xmm5
1079	movdqa	%xmm2,64(%rsp)
1080	movdqa	%xmm2,80(%rsp)
1081	movdqa	%xmm2,96(%rsp)
1082	movq	%rdx,%r10
1083	movdqa	%xmm2,112(%rsp)
1084
1085	leaq	1(%r8),%rax
1086	leaq	2(%r8),%rdx
1087	bswapl	%eax
1088	bswapl	%edx
1089	xorl	%ebp,%eax
1090	xorl	%ebp,%edx
1091.byte	102,15,58,34,216,3
1092	leaq	3(%r8),%rax
1093	movdqa	%xmm3,16(%rsp)
1094.byte	102,15,58,34,226,3
1095	bswapl	%eax
1096	movq	%r10,%rdx
1097	leaq	4(%r8),%r10
1098	movdqa	%xmm4,32(%rsp)
1099	xorl	%ebp,%eax
1100	bswapl	%r10d
1101.byte	102,15,58,34,232,3
1102	xorl	%ebp,%r10d
1103	movdqa	%xmm5,48(%rsp)
1104	leaq	5(%r8),%r9
1105	movl	%r10d,64+12(%rsp)
1106	bswapl	%r9d
1107	leaq	6(%r8),%r10
1108	movl	240(%rcx),%eax
1109	xorl	%ebp,%r9d
1110	bswapl	%r10d
1111	movl	%r9d,80+12(%rsp)
1112	xorl	%ebp,%r10d
1113	leaq	7(%r8),%r9
1114	movl	%r10d,96+12(%rsp)
1115	bswapl	%r9d
1116	movl	OPENSSL_ia32cap_P+4(%rip),%r10d
1117	xorl	%ebp,%r9d
1118	andl	$71303168,%r10d
1119	movl	%r9d,112+12(%rsp)
1120
1121	movups	16(%rcx),%xmm1
1122
1123	movdqa	64(%rsp),%xmm6
1124	movdqa	80(%rsp),%xmm7
1125
1126	cmpq	$8,%rdx
1127	jb	.Lctr32_tail
1128
1129	subq	$6,%rdx
1130	cmpl	$4194304,%r10d
1131	je	.Lctr32_6x
1132
1133	leaq	128(%rcx),%rcx
1134	subq	$2,%rdx
1135	jmp	.Lctr32_loop8
1136
1137.align	16
1138.Lctr32_6x:
1139	shll	$4,%eax
1140	movl	$48,%r10d
1141	bswapl	%ebp
1142	leaq	32(%rcx,%rax,1),%rcx
1143	subq	%rax,%r10
1144	jmp	.Lctr32_loop6
1145
1146.align	16
1147.Lctr32_loop6:
1148	addl	$6,%r8d
1149	movups	-48(%rcx,%r10,1),%xmm0
1150.byte	102,15,56,220,209
1151	movl	%r8d,%eax
1152	xorl	%ebp,%eax
1153.byte	102,15,56,220,217
1154.byte	0x0f,0x38,0xf1,0x44,0x24,12
1155	leal	1(%r8),%eax
1156.byte	102,15,56,220,225
1157	xorl	%ebp,%eax
1158.byte	0x0f,0x38,0xf1,0x44,0x24,28
1159.byte	102,15,56,220,233
1160	leal	2(%r8),%eax
1161	xorl	%ebp,%eax
1162.byte	102,15,56,220,241
1163.byte	0x0f,0x38,0xf1,0x44,0x24,44
1164	leal	3(%r8),%eax
1165.byte	102,15,56,220,249
1166	movups	-32(%rcx,%r10,1),%xmm1
1167	xorl	%ebp,%eax
1168
1169.byte	102,15,56,220,208
1170.byte	0x0f,0x38,0xf1,0x44,0x24,60
1171	leal	4(%r8),%eax
1172.byte	102,15,56,220,216
1173	xorl	%ebp,%eax
1174.byte	0x0f,0x38,0xf1,0x44,0x24,76
1175.byte	102,15,56,220,224
1176	leal	5(%r8),%eax
1177	xorl	%ebp,%eax
1178.byte	102,15,56,220,232
1179.byte	0x0f,0x38,0xf1,0x44,0x24,92
1180	movq	%r10,%rax
1181.byte	102,15,56,220,240
1182.byte	102,15,56,220,248
1183	movups	-16(%rcx,%r10,1),%xmm0
1184
1185	call	.Lenc_loop6
1186
1187	movdqu	(%rdi),%xmm8
1188	movdqu	16(%rdi),%xmm9
1189	movdqu	32(%rdi),%xmm10
1190	movdqu	48(%rdi),%xmm11
1191	movdqu	64(%rdi),%xmm12
1192	movdqu	80(%rdi),%xmm13
1193	leaq	96(%rdi),%rdi
1194	movups	-64(%rcx,%r10,1),%xmm1
1195	pxor	%xmm2,%xmm8
1196	movaps	0(%rsp),%xmm2
1197	pxor	%xmm3,%xmm9
1198	movaps	16(%rsp),%xmm3
1199	pxor	%xmm4,%xmm10
1200	movaps	32(%rsp),%xmm4
1201	pxor	%xmm5,%xmm11
1202	movaps	48(%rsp),%xmm5
1203	pxor	%xmm6,%xmm12
1204	movaps	64(%rsp),%xmm6
1205	pxor	%xmm7,%xmm13
1206	movaps	80(%rsp),%xmm7
1207	movdqu	%xmm8,(%rsi)
1208	movdqu	%xmm9,16(%rsi)
1209	movdqu	%xmm10,32(%rsi)
1210	movdqu	%xmm11,48(%rsi)
1211	movdqu	%xmm12,64(%rsi)
1212	movdqu	%xmm13,80(%rsi)
1213	leaq	96(%rsi),%rsi
1214
1215	subq	$6,%rdx
1216	jnc	.Lctr32_loop6
1217
1218	addq	$6,%rdx
1219	jz	.Lctr32_done
1220
1221	leal	-48(%r10),%eax
1222	leaq	-80(%rcx,%r10,1),%rcx
1223	negl	%eax
1224	shrl	$4,%eax
1225	jmp	.Lctr32_tail
1226
1227.align	32
1228.Lctr32_loop8:
1229	addl	$8,%r8d
1230	movdqa	96(%rsp),%xmm8
1231.byte	102,15,56,220,209
1232	movl	%r8d,%r9d
1233	movdqa	112(%rsp),%xmm9
1234.byte	102,15,56,220,217
1235	bswapl	%r9d
1236	movups	32-128(%rcx),%xmm0
1237.byte	102,15,56,220,225
1238	xorl	%ebp,%r9d
1239	nop
1240.byte	102,15,56,220,233
1241	movl	%r9d,0+12(%rsp)
1242	leaq	1(%r8),%r9
1243.byte	102,15,56,220,241
1244.byte	102,15,56,220,249
1245.byte	102,68,15,56,220,193
1246.byte	102,68,15,56,220,201
1247	movups	48-128(%rcx),%xmm1
1248	bswapl	%r9d
1249.byte	102,15,56,220,208
1250.byte	102,15,56,220,216
1251	xorl	%ebp,%r9d
1252.byte	0x66,0x90
1253.byte	102,15,56,220,224
1254.byte	102,15,56,220,232
1255	movl	%r9d,16+12(%rsp)
1256	leaq	2(%r8),%r9
1257.byte	102,15,56,220,240
1258.byte	102,15,56,220,248
1259.byte	102,68,15,56,220,192
1260.byte	102,68,15,56,220,200
1261	movups	64-128(%rcx),%xmm0
1262	bswapl	%r9d
1263.byte	102,15,56,220,209
1264.byte	102,15,56,220,217
1265	xorl	%ebp,%r9d
1266.byte	0x66,0x90
1267.byte	102,15,56,220,225
1268.byte	102,15,56,220,233
1269	movl	%r9d,32+12(%rsp)
1270	leaq	3(%r8),%r9
1271.byte	102,15,56,220,241
1272.byte	102,15,56,220,249
1273.byte	102,68,15,56,220,193
1274.byte	102,68,15,56,220,201
1275	movups	80-128(%rcx),%xmm1
1276	bswapl	%r9d
1277.byte	102,15,56,220,208
1278.byte	102,15,56,220,216
1279	xorl	%ebp,%r9d
1280.byte	0x66,0x90
1281.byte	102,15,56,220,224
1282.byte	102,15,56,220,232
1283	movl	%r9d,48+12(%rsp)
1284	leaq	4(%r8),%r9
1285.byte	102,15,56,220,240
1286.byte	102,15,56,220,248
1287.byte	102,68,15,56,220,192
1288.byte	102,68,15,56,220,200
1289	movups	96-128(%rcx),%xmm0
1290	bswapl	%r9d
1291.byte	102,15,56,220,209
1292.byte	102,15,56,220,217
1293	xorl	%ebp,%r9d
1294.byte	0x66,0x90
1295.byte	102,15,56,220,225
1296.byte	102,15,56,220,233
1297	movl	%r9d,64+12(%rsp)
1298	leaq	5(%r8),%r9
1299.byte	102,15,56,220,241
1300.byte	102,15,56,220,249
1301.byte	102,68,15,56,220,193
1302.byte	102,68,15,56,220,201
1303	movups	112-128(%rcx),%xmm1
1304	bswapl	%r9d
1305.byte	102,15,56,220,208
1306.byte	102,15,56,220,216
1307	xorl	%ebp,%r9d
1308.byte	0x66,0x90
1309.byte	102,15,56,220,224
1310.byte	102,15,56,220,232
1311	movl	%r9d,80+12(%rsp)
1312	leaq	6(%r8),%r9
1313.byte	102,15,56,220,240
1314.byte	102,15,56,220,248
1315.byte	102,68,15,56,220,192
1316.byte	102,68,15,56,220,200
1317	movups	128-128(%rcx),%xmm0
1318	bswapl	%r9d
1319.byte	102,15,56,220,209
1320.byte	102,15,56,220,217
1321	xorl	%ebp,%r9d
1322.byte	0x66,0x90
1323.byte	102,15,56,220,225
1324.byte	102,15,56,220,233
1325	movl	%r9d,96+12(%rsp)
1326	leaq	7(%r8),%r9
1327.byte	102,15,56,220,241
1328.byte	102,15,56,220,249
1329.byte	102,68,15,56,220,193
1330.byte	102,68,15,56,220,201
1331	movups	144-128(%rcx),%xmm1
1332	bswapl	%r9d
1333.byte	102,15,56,220,208
1334.byte	102,15,56,220,216
1335.byte	102,15,56,220,224
1336	xorl	%ebp,%r9d
1337	movdqu	0(%rdi),%xmm10
1338.byte	102,15,56,220,232
1339	movl	%r9d,112+12(%rsp)
1340	cmpl	$11,%eax
1341.byte	102,15,56,220,240
1342.byte	102,15,56,220,248
1343.byte	102,68,15,56,220,192
1344.byte	102,68,15,56,220,200
1345	movups	160-128(%rcx),%xmm0
1346
1347	jb	.Lctr32_enc_done
1348
1349.byte	102,15,56,220,209
1350.byte	102,15,56,220,217
1351.byte	102,15,56,220,225
1352.byte	102,15,56,220,233
1353.byte	102,15,56,220,241
1354.byte	102,15,56,220,249
1355.byte	102,68,15,56,220,193
1356.byte	102,68,15,56,220,201
1357	movups	176-128(%rcx),%xmm1
1358
1359.byte	102,15,56,220,208
1360.byte	102,15,56,220,216
1361.byte	102,15,56,220,224
1362.byte	102,15,56,220,232
1363.byte	102,15,56,220,240
1364.byte	102,15,56,220,248
1365.byte	102,68,15,56,220,192
1366.byte	102,68,15,56,220,200
1367	movups	192-128(%rcx),%xmm0
1368	je	.Lctr32_enc_done
1369
1370.byte	102,15,56,220,209
1371.byte	102,15,56,220,217
1372.byte	102,15,56,220,225
1373.byte	102,15,56,220,233
1374.byte	102,15,56,220,241
1375.byte	102,15,56,220,249
1376.byte	102,68,15,56,220,193
1377.byte	102,68,15,56,220,201
1378	movups	208-128(%rcx),%xmm1
1379
1380.byte	102,15,56,220,208
1381.byte	102,15,56,220,216
1382.byte	102,15,56,220,224
1383.byte	102,15,56,220,232
1384.byte	102,15,56,220,240
1385.byte	102,15,56,220,248
1386.byte	102,68,15,56,220,192
1387.byte	102,68,15,56,220,200
1388	movups	224-128(%rcx),%xmm0
1389	jmp	.Lctr32_enc_done
1390
1391.align	16
1392.Lctr32_enc_done:
1393	movdqu	16(%rdi),%xmm11
1394	pxor	%xmm0,%xmm10
1395	movdqu	32(%rdi),%xmm12
1396	pxor	%xmm0,%xmm11
1397	movdqu	48(%rdi),%xmm13
1398	pxor	%xmm0,%xmm12
1399	movdqu	64(%rdi),%xmm14
1400	pxor	%xmm0,%xmm13
1401	movdqu	80(%rdi),%xmm15
1402	pxor	%xmm0,%xmm14
1403	pxor	%xmm0,%xmm15
1404.byte	102,15,56,220,209
1405.byte	102,15,56,220,217
1406.byte	102,15,56,220,225
1407.byte	102,15,56,220,233
1408.byte	102,15,56,220,241
1409.byte	102,15,56,220,249
1410.byte	102,68,15,56,220,193
1411.byte	102,68,15,56,220,201
1412	movdqu	96(%rdi),%xmm1
1413	leaq	128(%rdi),%rdi
1414
1415.byte	102,65,15,56,221,210
1416	pxor	%xmm0,%xmm1
1417	movdqu	112-128(%rdi),%xmm10
1418.byte	102,65,15,56,221,219
1419	pxor	%xmm0,%xmm10
1420	movdqa	0(%rsp),%xmm11
1421.byte	102,65,15,56,221,228
1422.byte	102,65,15,56,221,237
1423	movdqa	16(%rsp),%xmm12
1424	movdqa	32(%rsp),%xmm13
1425.byte	102,65,15,56,221,246
1426.byte	102,65,15,56,221,255
1427	movdqa	48(%rsp),%xmm14
1428	movdqa	64(%rsp),%xmm15
1429.byte	102,68,15,56,221,193
1430	movdqa	80(%rsp),%xmm0
1431	movups	16-128(%rcx),%xmm1
1432.byte	102,69,15,56,221,202
1433
1434	movups	%xmm2,(%rsi)
1435	movdqa	%xmm11,%xmm2
1436	movups	%xmm3,16(%rsi)
1437	movdqa	%xmm12,%xmm3
1438	movups	%xmm4,32(%rsi)
1439	movdqa	%xmm13,%xmm4
1440	movups	%xmm5,48(%rsi)
1441	movdqa	%xmm14,%xmm5
1442	movups	%xmm6,64(%rsi)
1443	movdqa	%xmm15,%xmm6
1444	movups	%xmm7,80(%rsi)
1445	movdqa	%xmm0,%xmm7
1446	movups	%xmm8,96(%rsi)
1447	movups	%xmm9,112(%rsi)
1448	leaq	128(%rsi),%rsi
1449
1450	subq	$8,%rdx
1451	jnc	.Lctr32_loop8
1452
1453	addq	$8,%rdx
1454	jz	.Lctr32_done
1455	leaq	-128(%rcx),%rcx
1456
1457.Lctr32_tail:
1458
1459
1460	leaq	16(%rcx),%rcx
1461	cmpq	$4,%rdx
1462	jb	.Lctr32_loop3
1463	je	.Lctr32_loop4
1464
1465
1466	shll	$4,%eax
1467	movdqa	96(%rsp),%xmm8
1468	pxor	%xmm9,%xmm9
1469
1470	movups	16(%rcx),%xmm0
1471.byte	102,15,56,220,209
1472.byte	102,15,56,220,217
1473	leaq	32-16(%rcx,%rax,1),%rcx
1474	negq	%rax
1475.byte	102,15,56,220,225
1476	addq	$16,%rax
1477	movups	(%rdi),%xmm10
1478.byte	102,15,56,220,233
1479.byte	102,15,56,220,241
1480	movups	16(%rdi),%xmm11
1481	movups	32(%rdi),%xmm12
1482.byte	102,15,56,220,249
1483.byte	102,68,15,56,220,193
1484
1485	call	.Lenc_loop8_enter
1486
1487	movdqu	48(%rdi),%xmm13
1488	pxor	%xmm10,%xmm2
1489	movdqu	64(%rdi),%xmm10
1490	pxor	%xmm11,%xmm3
1491	movdqu	%xmm2,(%rsi)
1492	pxor	%xmm12,%xmm4
1493	movdqu	%xmm3,16(%rsi)
1494	pxor	%xmm13,%xmm5
1495	movdqu	%xmm4,32(%rsi)
1496	pxor	%xmm10,%xmm6
1497	movdqu	%xmm5,48(%rsi)
1498	movdqu	%xmm6,64(%rsi)
1499	cmpq	$6,%rdx
1500	jb	.Lctr32_done
1501
1502	movups	80(%rdi),%xmm11
1503	xorps	%xmm11,%xmm7
1504	movups	%xmm7,80(%rsi)
1505	je	.Lctr32_done
1506
1507	movups	96(%rdi),%xmm12
1508	xorps	%xmm12,%xmm8
1509	movups	%xmm8,96(%rsi)
1510	jmp	.Lctr32_done
1511
1512.align	32
1513.Lctr32_loop4:
1514.byte	102,15,56,220,209
1515	leaq	16(%rcx),%rcx
1516	decl	%eax
1517.byte	102,15,56,220,217
1518.byte	102,15,56,220,225
1519.byte	102,15,56,220,233
1520	movups	(%rcx),%xmm1
1521	jnz	.Lctr32_loop4
1522.byte	102,15,56,221,209
1523.byte	102,15,56,221,217
1524	movups	(%rdi),%xmm10
1525	movups	16(%rdi),%xmm11
1526.byte	102,15,56,221,225
1527.byte	102,15,56,221,233
1528	movups	32(%rdi),%xmm12
1529	movups	48(%rdi),%xmm13
1530
1531	xorps	%xmm10,%xmm2
1532	movups	%xmm2,(%rsi)
1533	xorps	%xmm11,%xmm3
1534	movups	%xmm3,16(%rsi)
1535	pxor	%xmm12,%xmm4
1536	movdqu	%xmm4,32(%rsi)
1537	pxor	%xmm13,%xmm5
1538	movdqu	%xmm5,48(%rsi)
1539	jmp	.Lctr32_done
1540
1541.align	32
1542.Lctr32_loop3:
1543.byte	102,15,56,220,209
1544	leaq	16(%rcx),%rcx
1545	decl	%eax
1546.byte	102,15,56,220,217
1547.byte	102,15,56,220,225
1548	movups	(%rcx),%xmm1
1549	jnz	.Lctr32_loop3
1550.byte	102,15,56,221,209
1551.byte	102,15,56,221,217
1552.byte	102,15,56,221,225
1553
1554	movups	(%rdi),%xmm10
1555	xorps	%xmm10,%xmm2
1556	movups	%xmm2,(%rsi)
1557	cmpq	$2,%rdx
1558	jb	.Lctr32_done
1559
1560	movups	16(%rdi),%xmm11
1561	xorps	%xmm11,%xmm3
1562	movups	%xmm3,16(%rsi)
1563	je	.Lctr32_done
1564
1565	movups	32(%rdi),%xmm12
1566	xorps	%xmm12,%xmm4
1567	movups	%xmm4,32(%rsi)
1568
1569.Lctr32_done:
1570	xorps	%xmm0,%xmm0
1571	xorl	%ebp,%ebp
1572	pxor	%xmm1,%xmm1
1573	pxor	%xmm2,%xmm2
1574	pxor	%xmm3,%xmm3
1575	pxor	%xmm4,%xmm4
1576	pxor	%xmm5,%xmm5
1577	pxor	%xmm6,%xmm6
1578	pxor	%xmm7,%xmm7
1579	movaps	%xmm0,0(%rsp)
1580	pxor	%xmm8,%xmm8
1581	movaps	%xmm0,16(%rsp)
1582	pxor	%xmm9,%xmm9
1583	movaps	%xmm0,32(%rsp)
1584	pxor	%xmm10,%xmm10
1585	movaps	%xmm0,48(%rsp)
1586	pxor	%xmm11,%xmm11
1587	movaps	%xmm0,64(%rsp)
1588	pxor	%xmm12,%xmm12
1589	movaps	%xmm0,80(%rsp)
1590	pxor	%xmm13,%xmm13
1591	movaps	%xmm0,96(%rsp)
1592	pxor	%xmm14,%xmm14
1593	movaps	%xmm0,112(%rsp)
1594	pxor	%xmm15,%xmm15
1595	movq	-8(%r11),%rbp
1596.cfi_restore	%rbp
1597	leaq	(%r11),%rsp
1598.cfi_def_cfa_register	%rsp
1599.Lctr32_epilogue:
1600	.byte	0xf3,0xc3
1601.cfi_endproc
1602.size	aesni_ctr32_encrypt_blocks,.-aesni_ctr32_encrypt_blocks
1603.globl	aesni_xts_encrypt
1604.type	aesni_xts_encrypt,@function
1605.align	16
1606aesni_xts_encrypt:
1607.cfi_startproc
1608	leaq	(%rsp),%r11
1609.cfi_def_cfa_register	%r11
1610	pushq	%rbp
1611.cfi_offset	%rbp,-16
1612	subq	$112,%rsp
1613	andq	$-16,%rsp
1614	movups	(%r9),%xmm2
1615	movl	240(%r8),%eax
1616	movl	240(%rcx),%r10d
1617	movups	(%r8),%xmm0
1618	movups	16(%r8),%xmm1
1619	leaq	32(%r8),%r8
1620	xorps	%xmm0,%xmm2
1621.Loop_enc1_8:
1622.byte	102,15,56,220,209
1623	decl	%eax
1624	movups	(%r8),%xmm1
1625	leaq	16(%r8),%r8
1626	jnz	.Loop_enc1_8
1627.byte	102,15,56,221,209
1628	movups	(%rcx),%xmm0
1629	movq	%rcx,%rbp
1630	movl	%r10d,%eax
1631	shll	$4,%r10d
1632	movq	%rdx,%r9
1633	andq	$-16,%rdx
1634
1635	movups	16(%rcx,%r10,1),%xmm1
1636
1637	movdqa	.Lxts_magic(%rip),%xmm8
1638	movdqa	%xmm2,%xmm15
1639	pshufd	$0x5f,%xmm2,%xmm9
1640	pxor	%xmm0,%xmm1
1641	movdqa	%xmm9,%xmm14
1642	paddd	%xmm9,%xmm9
1643	movdqa	%xmm15,%xmm10
1644	psrad	$31,%xmm14
1645	paddq	%xmm15,%xmm15
1646	pand	%xmm8,%xmm14
1647	pxor	%xmm0,%xmm10
1648	pxor	%xmm14,%xmm15
1649	movdqa	%xmm9,%xmm14
1650	paddd	%xmm9,%xmm9
1651	movdqa	%xmm15,%xmm11
1652	psrad	$31,%xmm14
1653	paddq	%xmm15,%xmm15
1654	pand	%xmm8,%xmm14
1655	pxor	%xmm0,%xmm11
1656	pxor	%xmm14,%xmm15
1657	movdqa	%xmm9,%xmm14
1658	paddd	%xmm9,%xmm9
1659	movdqa	%xmm15,%xmm12
1660	psrad	$31,%xmm14
1661	paddq	%xmm15,%xmm15
1662	pand	%xmm8,%xmm14
1663	pxor	%xmm0,%xmm12
1664	pxor	%xmm14,%xmm15
1665	movdqa	%xmm9,%xmm14
1666	paddd	%xmm9,%xmm9
1667	movdqa	%xmm15,%xmm13
1668	psrad	$31,%xmm14
1669	paddq	%xmm15,%xmm15
1670	pand	%xmm8,%xmm14
1671	pxor	%xmm0,%xmm13
1672	pxor	%xmm14,%xmm15
1673	movdqa	%xmm15,%xmm14
1674	psrad	$31,%xmm9
1675	paddq	%xmm15,%xmm15
1676	pand	%xmm8,%xmm9
1677	pxor	%xmm0,%xmm14
1678	pxor	%xmm9,%xmm15
1679	movaps	%xmm1,96(%rsp)
1680
1681	subq	$96,%rdx
1682	jc	.Lxts_enc_short
1683
1684	movl	$16+96,%eax
1685	leaq	32(%rbp,%r10,1),%rcx
1686	subq	%r10,%rax
1687	movups	16(%rbp),%xmm1
1688	movq	%rax,%r10
1689	leaq	.Lxts_magic(%rip),%r8
1690	jmp	.Lxts_enc_grandloop
1691
1692.align	32
1693.Lxts_enc_grandloop:
1694	movdqu	0(%rdi),%xmm2
1695	movdqa	%xmm0,%xmm8
1696	movdqu	16(%rdi),%xmm3
1697	pxor	%xmm10,%xmm2
1698	movdqu	32(%rdi),%xmm4
1699	pxor	%xmm11,%xmm3
1700.byte	102,15,56,220,209
1701	movdqu	48(%rdi),%xmm5
1702	pxor	%xmm12,%xmm4
1703.byte	102,15,56,220,217
1704	movdqu	64(%rdi),%xmm6
1705	pxor	%xmm13,%xmm5
1706.byte	102,15,56,220,225
1707	movdqu	80(%rdi),%xmm7
1708	pxor	%xmm15,%xmm8
1709	movdqa	96(%rsp),%xmm9
1710	pxor	%xmm14,%xmm6
1711.byte	102,15,56,220,233
1712	movups	32(%rbp),%xmm0
1713	leaq	96(%rdi),%rdi
1714	pxor	%xmm8,%xmm7
1715
1716	pxor	%xmm9,%xmm10
1717.byte	102,15,56,220,241
1718	pxor	%xmm9,%xmm11
1719	movdqa	%xmm10,0(%rsp)
1720.byte	102,15,56,220,249
1721	movups	48(%rbp),%xmm1
1722	pxor	%xmm9,%xmm12
1723
1724.byte	102,15,56,220,208
1725	pxor	%xmm9,%xmm13
1726	movdqa	%xmm11,16(%rsp)
1727.byte	102,15,56,220,216
1728	pxor	%xmm9,%xmm14
1729	movdqa	%xmm12,32(%rsp)
1730.byte	102,15,56,220,224
1731.byte	102,15,56,220,232
1732	pxor	%xmm9,%xmm8
1733	movdqa	%xmm14,64(%rsp)
1734.byte	102,15,56,220,240
1735.byte	102,15,56,220,248
1736	movups	64(%rbp),%xmm0
1737	movdqa	%xmm8,80(%rsp)
1738	pshufd	$0x5f,%xmm15,%xmm9
1739	jmp	.Lxts_enc_loop6
1740.align	32
1741.Lxts_enc_loop6:
1742.byte	102,15,56,220,209
1743.byte	102,15,56,220,217
1744.byte	102,15,56,220,225
1745.byte	102,15,56,220,233
1746.byte	102,15,56,220,241
1747.byte	102,15,56,220,249
1748	movups	-64(%rcx,%rax,1),%xmm1
1749	addq	$32,%rax
1750
1751.byte	102,15,56,220,208
1752.byte	102,15,56,220,216
1753.byte	102,15,56,220,224
1754.byte	102,15,56,220,232
1755.byte	102,15,56,220,240
1756.byte	102,15,56,220,248
1757	movups	-80(%rcx,%rax,1),%xmm0
1758	jnz	.Lxts_enc_loop6
1759
1760	movdqa	(%r8),%xmm8
1761	movdqa	%xmm9,%xmm14
1762	paddd	%xmm9,%xmm9
1763.byte	102,15,56,220,209
1764	paddq	%xmm15,%xmm15
1765	psrad	$31,%xmm14
1766.byte	102,15,56,220,217
1767	pand	%xmm8,%xmm14
1768	movups	(%rbp),%xmm10
1769.byte	102,15,56,220,225
1770.byte	102,15,56,220,233
1771.byte	102,15,56,220,241
1772	pxor	%xmm14,%xmm15
1773	movaps	%xmm10,%xmm11
1774.byte	102,15,56,220,249
1775	movups	-64(%rcx),%xmm1
1776
1777	movdqa	%xmm9,%xmm14
1778.byte	102,15,56,220,208
1779	paddd	%xmm9,%xmm9
1780	pxor	%xmm15,%xmm10
1781.byte	102,15,56,220,216
1782	psrad	$31,%xmm14
1783	paddq	%xmm15,%xmm15
1784.byte	102,15,56,220,224
1785.byte	102,15,56,220,232
1786	pand	%xmm8,%xmm14
1787	movaps	%xmm11,%xmm12
1788.byte	102,15,56,220,240
1789	pxor	%xmm14,%xmm15
1790	movdqa	%xmm9,%xmm14
1791.byte	102,15,56,220,248
1792	movups	-48(%rcx),%xmm0
1793
1794	paddd	%xmm9,%xmm9
1795.byte	102,15,56,220,209
1796	pxor	%xmm15,%xmm11
1797	psrad	$31,%xmm14
1798.byte	102,15,56,220,217
1799	paddq	%xmm15,%xmm15
1800	pand	%xmm8,%xmm14
1801.byte	102,15,56,220,225
1802.byte	102,15,56,220,233
1803	movdqa	%xmm13,48(%rsp)
1804	pxor	%xmm14,%xmm15
1805.byte	102,15,56,220,241
1806	movaps	%xmm12,%xmm13
1807	movdqa	%xmm9,%xmm14
1808.byte	102,15,56,220,249
1809	movups	-32(%rcx),%xmm1
1810
1811	paddd	%xmm9,%xmm9
1812.byte	102,15,56,220,208
1813	pxor	%xmm15,%xmm12
1814	psrad	$31,%xmm14
1815.byte	102,15,56,220,216
1816	paddq	%xmm15,%xmm15
1817	pand	%xmm8,%xmm14
1818.byte	102,15,56,220,224
1819.byte	102,15,56,220,232
1820.byte	102,15,56,220,240
1821	pxor	%xmm14,%xmm15
1822	movaps	%xmm13,%xmm14
1823.byte	102,15,56,220,248
1824
1825	movdqa	%xmm9,%xmm0
1826	paddd	%xmm9,%xmm9
1827.byte	102,15,56,220,209
1828	pxor	%xmm15,%xmm13
1829	psrad	$31,%xmm0
1830.byte	102,15,56,220,217
1831	paddq	%xmm15,%xmm15
1832	pand	%xmm8,%xmm0
1833.byte	102,15,56,220,225
1834.byte	102,15,56,220,233
1835	pxor	%xmm0,%xmm15
1836	movups	(%rbp),%xmm0
1837.byte	102,15,56,220,241
1838.byte	102,15,56,220,249
1839	movups	16(%rbp),%xmm1
1840
1841	pxor	%xmm15,%xmm14
1842.byte	102,15,56,221,84,36,0
1843	psrad	$31,%xmm9
1844	paddq	%xmm15,%xmm15
1845.byte	102,15,56,221,92,36,16
1846.byte	102,15,56,221,100,36,32
1847	pand	%xmm8,%xmm9
1848	movq	%r10,%rax
1849.byte	102,15,56,221,108,36,48
1850.byte	102,15,56,221,116,36,64
1851.byte	102,15,56,221,124,36,80
1852	pxor	%xmm9,%xmm15
1853
1854	leaq	96(%rsi),%rsi
1855	movups	%xmm2,-96(%rsi)
1856	movups	%xmm3,-80(%rsi)
1857	movups	%xmm4,-64(%rsi)
1858	movups	%xmm5,-48(%rsi)
1859	movups	%xmm6,-32(%rsi)
1860	movups	%xmm7,-16(%rsi)
1861	subq	$96,%rdx
1862	jnc	.Lxts_enc_grandloop
1863
1864	movl	$16+96,%eax
1865	subl	%r10d,%eax
1866	movq	%rbp,%rcx
1867	shrl	$4,%eax
1868
1869.Lxts_enc_short:
1870
1871	movl	%eax,%r10d
1872	pxor	%xmm0,%xmm10
1873	addq	$96,%rdx
1874	jz	.Lxts_enc_done
1875
1876	pxor	%xmm0,%xmm11
1877	cmpq	$0x20,%rdx
1878	jb	.Lxts_enc_one
1879	pxor	%xmm0,%xmm12
1880	je	.Lxts_enc_two
1881
1882	pxor	%xmm0,%xmm13
1883	cmpq	$0x40,%rdx
1884	jb	.Lxts_enc_three
1885	pxor	%xmm0,%xmm14
1886	je	.Lxts_enc_four
1887
1888	movdqu	(%rdi),%xmm2
1889	movdqu	16(%rdi),%xmm3
1890	movdqu	32(%rdi),%xmm4
1891	pxor	%xmm10,%xmm2
1892	movdqu	48(%rdi),%xmm5
1893	pxor	%xmm11,%xmm3
1894	movdqu	64(%rdi),%xmm6
1895	leaq	80(%rdi),%rdi
1896	pxor	%xmm12,%xmm4
1897	pxor	%xmm13,%xmm5
1898	pxor	%xmm14,%xmm6
1899	pxor	%xmm7,%xmm7
1900
1901	call	_aesni_encrypt6
1902
1903	xorps	%xmm10,%xmm2
1904	movdqa	%xmm15,%xmm10
1905	xorps	%xmm11,%xmm3
1906	xorps	%xmm12,%xmm4
1907	movdqu	%xmm2,(%rsi)
1908	xorps	%xmm13,%xmm5
1909	movdqu	%xmm3,16(%rsi)
1910	xorps	%xmm14,%xmm6
1911	movdqu	%xmm4,32(%rsi)
1912	movdqu	%xmm5,48(%rsi)
1913	movdqu	%xmm6,64(%rsi)
1914	leaq	80(%rsi),%rsi
1915	jmp	.Lxts_enc_done
1916
1917.align	16
1918.Lxts_enc_one:
1919	movups	(%rdi),%xmm2
1920	leaq	16(%rdi),%rdi
1921	xorps	%xmm10,%xmm2
1922	movups	(%rcx),%xmm0
1923	movups	16(%rcx),%xmm1
1924	leaq	32(%rcx),%rcx
1925	xorps	%xmm0,%xmm2
1926.Loop_enc1_9:
1927.byte	102,15,56,220,209
1928	decl	%eax
1929	movups	(%rcx),%xmm1
1930	leaq	16(%rcx),%rcx
1931	jnz	.Loop_enc1_9
1932.byte	102,15,56,221,209
1933	xorps	%xmm10,%xmm2
1934	movdqa	%xmm11,%xmm10
1935	movups	%xmm2,(%rsi)
1936	leaq	16(%rsi),%rsi
1937	jmp	.Lxts_enc_done
1938
1939.align	16
1940.Lxts_enc_two:
1941	movups	(%rdi),%xmm2
1942	movups	16(%rdi),%xmm3
1943	leaq	32(%rdi),%rdi
1944	xorps	%xmm10,%xmm2
1945	xorps	%xmm11,%xmm3
1946
1947	call	_aesni_encrypt2
1948
1949	xorps	%xmm10,%xmm2
1950	movdqa	%xmm12,%xmm10
1951	xorps	%xmm11,%xmm3
1952	movups	%xmm2,(%rsi)
1953	movups	%xmm3,16(%rsi)
1954	leaq	32(%rsi),%rsi
1955	jmp	.Lxts_enc_done
1956
1957.align	16
1958.Lxts_enc_three:
1959	movups	(%rdi),%xmm2
1960	movups	16(%rdi),%xmm3
1961	movups	32(%rdi),%xmm4
1962	leaq	48(%rdi),%rdi
1963	xorps	%xmm10,%xmm2
1964	xorps	%xmm11,%xmm3
1965	xorps	%xmm12,%xmm4
1966
1967	call	_aesni_encrypt3
1968
1969	xorps	%xmm10,%xmm2
1970	movdqa	%xmm13,%xmm10
1971	xorps	%xmm11,%xmm3
1972	xorps	%xmm12,%xmm4
1973	movups	%xmm2,(%rsi)
1974	movups	%xmm3,16(%rsi)
1975	movups	%xmm4,32(%rsi)
1976	leaq	48(%rsi),%rsi
1977	jmp	.Lxts_enc_done
1978
1979.align	16
1980.Lxts_enc_four:
1981	movups	(%rdi),%xmm2
1982	movups	16(%rdi),%xmm3
1983	movups	32(%rdi),%xmm4
1984	xorps	%xmm10,%xmm2
1985	movups	48(%rdi),%xmm5
1986	leaq	64(%rdi),%rdi
1987	xorps	%xmm11,%xmm3
1988	xorps	%xmm12,%xmm4
1989	xorps	%xmm13,%xmm5
1990
1991	call	_aesni_encrypt4
1992
1993	pxor	%xmm10,%xmm2
1994	movdqa	%xmm14,%xmm10
1995	pxor	%xmm11,%xmm3
1996	pxor	%xmm12,%xmm4
1997	movdqu	%xmm2,(%rsi)
1998	pxor	%xmm13,%xmm5
1999	movdqu	%xmm3,16(%rsi)
2000	movdqu	%xmm4,32(%rsi)
2001	movdqu	%xmm5,48(%rsi)
2002	leaq	64(%rsi),%rsi
2003	jmp	.Lxts_enc_done
2004
2005.align	16
2006.Lxts_enc_done:
2007	andq	$15,%r9
2008	jz	.Lxts_enc_ret
2009	movq	%r9,%rdx
2010
2011.Lxts_enc_steal:
2012	movzbl	(%rdi),%eax
2013	movzbl	-16(%rsi),%ecx
2014	leaq	1(%rdi),%rdi
2015	movb	%al,-16(%rsi)
2016	movb	%cl,0(%rsi)
2017	leaq	1(%rsi),%rsi
2018	subq	$1,%rdx
2019	jnz	.Lxts_enc_steal
2020
2021	subq	%r9,%rsi
2022	movq	%rbp,%rcx
2023	movl	%r10d,%eax
2024
2025	movups	-16(%rsi),%xmm2
2026	xorps	%xmm10,%xmm2
2027	movups	(%rcx),%xmm0
2028	movups	16(%rcx),%xmm1
2029	leaq	32(%rcx),%rcx
2030	xorps	%xmm0,%xmm2
2031.Loop_enc1_10:
2032.byte	102,15,56,220,209
2033	decl	%eax
2034	movups	(%rcx),%xmm1
2035	leaq	16(%rcx),%rcx
2036	jnz	.Loop_enc1_10
2037.byte	102,15,56,221,209
2038	xorps	%xmm10,%xmm2
2039	movups	%xmm2,-16(%rsi)
2040
2041.Lxts_enc_ret:
2042	xorps	%xmm0,%xmm0
2043	pxor	%xmm1,%xmm1
2044	pxor	%xmm2,%xmm2
2045	pxor	%xmm3,%xmm3
2046	pxor	%xmm4,%xmm4
2047	pxor	%xmm5,%xmm5
2048	pxor	%xmm6,%xmm6
2049	pxor	%xmm7,%xmm7
2050	movaps	%xmm0,0(%rsp)
2051	pxor	%xmm8,%xmm8
2052	movaps	%xmm0,16(%rsp)
2053	pxor	%xmm9,%xmm9
2054	movaps	%xmm0,32(%rsp)
2055	pxor	%xmm10,%xmm10
2056	movaps	%xmm0,48(%rsp)
2057	pxor	%xmm11,%xmm11
2058	movaps	%xmm0,64(%rsp)
2059	pxor	%xmm12,%xmm12
2060	movaps	%xmm0,80(%rsp)
2061	pxor	%xmm13,%xmm13
2062	movaps	%xmm0,96(%rsp)
2063	pxor	%xmm14,%xmm14
2064	pxor	%xmm15,%xmm15
2065	movq	-8(%r11),%rbp
2066.cfi_restore	%rbp
2067	leaq	(%r11),%rsp
2068.cfi_def_cfa_register	%rsp
2069.Lxts_enc_epilogue:
2070	.byte	0xf3,0xc3
2071.cfi_endproc
2072.size	aesni_xts_encrypt,.-aesni_xts_encrypt
2073.globl	aesni_xts_decrypt
2074.type	aesni_xts_decrypt,@function
2075.align	16
2076aesni_xts_decrypt:
2077.cfi_startproc
2078	leaq	(%rsp),%r11
2079.cfi_def_cfa_register	%r11
2080	pushq	%rbp
2081.cfi_offset	%rbp,-16
2082	subq	$112,%rsp
2083	andq	$-16,%rsp
2084	movups	(%r9),%xmm2
2085	movl	240(%r8),%eax
2086	movl	240(%rcx),%r10d
2087	movups	(%r8),%xmm0
2088	movups	16(%r8),%xmm1
2089	leaq	32(%r8),%r8
2090	xorps	%xmm0,%xmm2
2091.Loop_enc1_11:
2092.byte	102,15,56,220,209
2093	decl	%eax
2094	movups	(%r8),%xmm1
2095	leaq	16(%r8),%r8
2096	jnz	.Loop_enc1_11
2097.byte	102,15,56,221,209
2098	xorl	%eax,%eax
2099	testq	$15,%rdx
2100	setnz	%al
2101	shlq	$4,%rax
2102	subq	%rax,%rdx
2103
2104	movups	(%rcx),%xmm0
2105	movq	%rcx,%rbp
2106	movl	%r10d,%eax
2107	shll	$4,%r10d
2108	movq	%rdx,%r9
2109	andq	$-16,%rdx
2110
2111	movups	16(%rcx,%r10,1),%xmm1
2112
2113	movdqa	.Lxts_magic(%rip),%xmm8
2114	movdqa	%xmm2,%xmm15
2115	pshufd	$0x5f,%xmm2,%xmm9
2116	pxor	%xmm0,%xmm1
2117	movdqa	%xmm9,%xmm14
2118	paddd	%xmm9,%xmm9
2119	movdqa	%xmm15,%xmm10
2120	psrad	$31,%xmm14
2121	paddq	%xmm15,%xmm15
2122	pand	%xmm8,%xmm14
2123	pxor	%xmm0,%xmm10
2124	pxor	%xmm14,%xmm15
2125	movdqa	%xmm9,%xmm14
2126	paddd	%xmm9,%xmm9
2127	movdqa	%xmm15,%xmm11
2128	psrad	$31,%xmm14
2129	paddq	%xmm15,%xmm15
2130	pand	%xmm8,%xmm14
2131	pxor	%xmm0,%xmm11
2132	pxor	%xmm14,%xmm15
2133	movdqa	%xmm9,%xmm14
2134	paddd	%xmm9,%xmm9
2135	movdqa	%xmm15,%xmm12
2136	psrad	$31,%xmm14
2137	paddq	%xmm15,%xmm15
2138	pand	%xmm8,%xmm14
2139	pxor	%xmm0,%xmm12
2140	pxor	%xmm14,%xmm15
2141	movdqa	%xmm9,%xmm14
2142	paddd	%xmm9,%xmm9
2143	movdqa	%xmm15,%xmm13
2144	psrad	$31,%xmm14
2145	paddq	%xmm15,%xmm15
2146	pand	%xmm8,%xmm14
2147	pxor	%xmm0,%xmm13
2148	pxor	%xmm14,%xmm15
2149	movdqa	%xmm15,%xmm14
2150	psrad	$31,%xmm9
2151	paddq	%xmm15,%xmm15
2152	pand	%xmm8,%xmm9
2153	pxor	%xmm0,%xmm14
2154	pxor	%xmm9,%xmm15
2155	movaps	%xmm1,96(%rsp)
2156
2157	subq	$96,%rdx
2158	jc	.Lxts_dec_short
2159
2160	movl	$16+96,%eax
2161	leaq	32(%rbp,%r10,1),%rcx
2162	subq	%r10,%rax
2163	movups	16(%rbp),%xmm1
2164	movq	%rax,%r10
2165	leaq	.Lxts_magic(%rip),%r8
2166	jmp	.Lxts_dec_grandloop
2167
2168.align	32
2169.Lxts_dec_grandloop:
2170	movdqu	0(%rdi),%xmm2
2171	movdqa	%xmm0,%xmm8
2172	movdqu	16(%rdi),%xmm3
2173	pxor	%xmm10,%xmm2
2174	movdqu	32(%rdi),%xmm4
2175	pxor	%xmm11,%xmm3
2176.byte	102,15,56,222,209
2177	movdqu	48(%rdi),%xmm5
2178	pxor	%xmm12,%xmm4
2179.byte	102,15,56,222,217
2180	movdqu	64(%rdi),%xmm6
2181	pxor	%xmm13,%xmm5
2182.byte	102,15,56,222,225
2183	movdqu	80(%rdi),%xmm7
2184	pxor	%xmm15,%xmm8
2185	movdqa	96(%rsp),%xmm9
2186	pxor	%xmm14,%xmm6
2187.byte	102,15,56,222,233
2188	movups	32(%rbp),%xmm0
2189	leaq	96(%rdi),%rdi
2190	pxor	%xmm8,%xmm7
2191
2192	pxor	%xmm9,%xmm10
2193.byte	102,15,56,222,241
2194	pxor	%xmm9,%xmm11
2195	movdqa	%xmm10,0(%rsp)
2196.byte	102,15,56,222,249
2197	movups	48(%rbp),%xmm1
2198	pxor	%xmm9,%xmm12
2199
2200.byte	102,15,56,222,208
2201	pxor	%xmm9,%xmm13
2202	movdqa	%xmm11,16(%rsp)
2203.byte	102,15,56,222,216
2204	pxor	%xmm9,%xmm14
2205	movdqa	%xmm12,32(%rsp)
2206.byte	102,15,56,222,224
2207.byte	102,15,56,222,232
2208	pxor	%xmm9,%xmm8
2209	movdqa	%xmm14,64(%rsp)
2210.byte	102,15,56,222,240
2211.byte	102,15,56,222,248
2212	movups	64(%rbp),%xmm0
2213	movdqa	%xmm8,80(%rsp)
2214	pshufd	$0x5f,%xmm15,%xmm9
2215	jmp	.Lxts_dec_loop6
2216.align	32
2217.Lxts_dec_loop6:
2218.byte	102,15,56,222,209
2219.byte	102,15,56,222,217
2220.byte	102,15,56,222,225
2221.byte	102,15,56,222,233
2222.byte	102,15,56,222,241
2223.byte	102,15,56,222,249
2224	movups	-64(%rcx,%rax,1),%xmm1
2225	addq	$32,%rax
2226
2227.byte	102,15,56,222,208
2228.byte	102,15,56,222,216
2229.byte	102,15,56,222,224
2230.byte	102,15,56,222,232
2231.byte	102,15,56,222,240
2232.byte	102,15,56,222,248
2233	movups	-80(%rcx,%rax,1),%xmm0
2234	jnz	.Lxts_dec_loop6
2235
2236	movdqa	(%r8),%xmm8
2237	movdqa	%xmm9,%xmm14
2238	paddd	%xmm9,%xmm9
2239.byte	102,15,56,222,209
2240	paddq	%xmm15,%xmm15
2241	psrad	$31,%xmm14
2242.byte	102,15,56,222,217
2243	pand	%xmm8,%xmm14
2244	movups	(%rbp),%xmm10
2245.byte	102,15,56,222,225
2246.byte	102,15,56,222,233
2247.byte	102,15,56,222,241
2248	pxor	%xmm14,%xmm15
2249	movaps	%xmm10,%xmm11
2250.byte	102,15,56,222,249
2251	movups	-64(%rcx),%xmm1
2252
2253	movdqa	%xmm9,%xmm14
2254.byte	102,15,56,222,208
2255	paddd	%xmm9,%xmm9
2256	pxor	%xmm15,%xmm10
2257.byte	102,15,56,222,216
2258	psrad	$31,%xmm14
2259	paddq	%xmm15,%xmm15
2260.byte	102,15,56,222,224
2261.byte	102,15,56,222,232
2262	pand	%xmm8,%xmm14
2263	movaps	%xmm11,%xmm12
2264.byte	102,15,56,222,240
2265	pxor	%xmm14,%xmm15
2266	movdqa	%xmm9,%xmm14
2267.byte	102,15,56,222,248
2268	movups	-48(%rcx),%xmm0
2269
2270	paddd	%xmm9,%xmm9
2271.byte	102,15,56,222,209
2272	pxor	%xmm15,%xmm11
2273	psrad	$31,%xmm14
2274.byte	102,15,56,222,217
2275	paddq	%xmm15,%xmm15
2276	pand	%xmm8,%xmm14
2277.byte	102,15,56,222,225
2278.byte	102,15,56,222,233
2279	movdqa	%xmm13,48(%rsp)
2280	pxor	%xmm14,%xmm15
2281.byte	102,15,56,222,241
2282	movaps	%xmm12,%xmm13
2283	movdqa	%xmm9,%xmm14
2284.byte	102,15,56,222,249
2285	movups	-32(%rcx),%xmm1
2286
2287	paddd	%xmm9,%xmm9
2288.byte	102,15,56,222,208
2289	pxor	%xmm15,%xmm12
2290	psrad	$31,%xmm14
2291.byte	102,15,56,222,216
2292	paddq	%xmm15,%xmm15
2293	pand	%xmm8,%xmm14
2294.byte	102,15,56,222,224
2295.byte	102,15,56,222,232
2296.byte	102,15,56,222,240
2297	pxor	%xmm14,%xmm15
2298	movaps	%xmm13,%xmm14
2299.byte	102,15,56,222,248
2300
2301	movdqa	%xmm9,%xmm0
2302	paddd	%xmm9,%xmm9
2303.byte	102,15,56,222,209
2304	pxor	%xmm15,%xmm13
2305	psrad	$31,%xmm0
2306.byte	102,15,56,222,217
2307	paddq	%xmm15,%xmm15
2308	pand	%xmm8,%xmm0
2309.byte	102,15,56,222,225
2310.byte	102,15,56,222,233
2311	pxor	%xmm0,%xmm15
2312	movups	(%rbp),%xmm0
2313.byte	102,15,56,222,241
2314.byte	102,15,56,222,249
2315	movups	16(%rbp),%xmm1
2316
2317	pxor	%xmm15,%xmm14
2318.byte	102,15,56,223,84,36,0
2319	psrad	$31,%xmm9
2320	paddq	%xmm15,%xmm15
2321.byte	102,15,56,223,92,36,16
2322.byte	102,15,56,223,100,36,32
2323	pand	%xmm8,%xmm9
2324	movq	%r10,%rax
2325.byte	102,15,56,223,108,36,48
2326.byte	102,15,56,223,116,36,64
2327.byte	102,15,56,223,124,36,80
2328	pxor	%xmm9,%xmm15
2329
2330	leaq	96(%rsi),%rsi
2331	movups	%xmm2,-96(%rsi)
2332	movups	%xmm3,-80(%rsi)
2333	movups	%xmm4,-64(%rsi)
2334	movups	%xmm5,-48(%rsi)
2335	movups	%xmm6,-32(%rsi)
2336	movups	%xmm7,-16(%rsi)
2337	subq	$96,%rdx
2338	jnc	.Lxts_dec_grandloop
2339
2340	movl	$16+96,%eax
2341	subl	%r10d,%eax
2342	movq	%rbp,%rcx
2343	shrl	$4,%eax
2344
2345.Lxts_dec_short:
2346
2347	movl	%eax,%r10d
2348	pxor	%xmm0,%xmm10
2349	pxor	%xmm0,%xmm11
2350	addq	$96,%rdx
2351	jz	.Lxts_dec_done
2352
2353	pxor	%xmm0,%xmm12
2354	cmpq	$0x20,%rdx
2355	jb	.Lxts_dec_one
2356	pxor	%xmm0,%xmm13
2357	je	.Lxts_dec_two
2358
2359	pxor	%xmm0,%xmm14
2360	cmpq	$0x40,%rdx
2361	jb	.Lxts_dec_three
2362	je	.Lxts_dec_four
2363
2364	movdqu	(%rdi),%xmm2
2365	movdqu	16(%rdi),%xmm3
2366	movdqu	32(%rdi),%xmm4
2367	pxor	%xmm10,%xmm2
2368	movdqu	48(%rdi),%xmm5
2369	pxor	%xmm11,%xmm3
2370	movdqu	64(%rdi),%xmm6
2371	leaq	80(%rdi),%rdi
2372	pxor	%xmm12,%xmm4
2373	pxor	%xmm13,%xmm5
2374	pxor	%xmm14,%xmm6
2375
2376	call	_aesni_decrypt6
2377
2378	xorps	%xmm10,%xmm2
2379	xorps	%xmm11,%xmm3
2380	xorps	%xmm12,%xmm4
2381	movdqu	%xmm2,(%rsi)
2382	xorps	%xmm13,%xmm5
2383	movdqu	%xmm3,16(%rsi)
2384	xorps	%xmm14,%xmm6
2385	movdqu	%xmm4,32(%rsi)
2386	pxor	%xmm14,%xmm14
2387	movdqu	%xmm5,48(%rsi)
2388	pcmpgtd	%xmm15,%xmm14
2389	movdqu	%xmm6,64(%rsi)
2390	leaq	80(%rsi),%rsi
2391	pshufd	$0x13,%xmm14,%xmm11
2392	andq	$15,%r9
2393	jz	.Lxts_dec_ret
2394
2395	movdqa	%xmm15,%xmm10
2396	paddq	%xmm15,%xmm15
2397	pand	%xmm8,%xmm11
2398	pxor	%xmm15,%xmm11
2399	jmp	.Lxts_dec_done2
2400
2401.align	16
2402.Lxts_dec_one:
2403	movups	(%rdi),%xmm2
2404	leaq	16(%rdi),%rdi
2405	xorps	%xmm10,%xmm2
2406	movups	(%rcx),%xmm0
2407	movups	16(%rcx),%xmm1
2408	leaq	32(%rcx),%rcx
2409	xorps	%xmm0,%xmm2
2410.Loop_dec1_12:
2411.byte	102,15,56,222,209
2412	decl	%eax
2413	movups	(%rcx),%xmm1
2414	leaq	16(%rcx),%rcx
2415	jnz	.Loop_dec1_12
2416.byte	102,15,56,223,209
2417	xorps	%xmm10,%xmm2
2418	movdqa	%xmm11,%xmm10
2419	movups	%xmm2,(%rsi)
2420	movdqa	%xmm12,%xmm11
2421	leaq	16(%rsi),%rsi
2422	jmp	.Lxts_dec_done
2423
2424.align	16
2425.Lxts_dec_two:
2426	movups	(%rdi),%xmm2
2427	movups	16(%rdi),%xmm3
2428	leaq	32(%rdi),%rdi
2429	xorps	%xmm10,%xmm2
2430	xorps	%xmm11,%xmm3
2431
2432	call	_aesni_decrypt2
2433
2434	xorps	%xmm10,%xmm2
2435	movdqa	%xmm12,%xmm10
2436	xorps	%xmm11,%xmm3
2437	movdqa	%xmm13,%xmm11
2438	movups	%xmm2,(%rsi)
2439	movups	%xmm3,16(%rsi)
2440	leaq	32(%rsi),%rsi
2441	jmp	.Lxts_dec_done
2442
2443.align	16
2444.Lxts_dec_three:
2445	movups	(%rdi),%xmm2
2446	movups	16(%rdi),%xmm3
2447	movups	32(%rdi),%xmm4
2448	leaq	48(%rdi),%rdi
2449	xorps	%xmm10,%xmm2
2450	xorps	%xmm11,%xmm3
2451	xorps	%xmm12,%xmm4
2452
2453	call	_aesni_decrypt3
2454
2455	xorps	%xmm10,%xmm2
2456	movdqa	%xmm13,%xmm10
2457	xorps	%xmm11,%xmm3
2458	movdqa	%xmm14,%xmm11
2459	xorps	%xmm12,%xmm4
2460	movups	%xmm2,(%rsi)
2461	movups	%xmm3,16(%rsi)
2462	movups	%xmm4,32(%rsi)
2463	leaq	48(%rsi),%rsi
2464	jmp	.Lxts_dec_done
2465
2466.align	16
2467.Lxts_dec_four:
2468	movups	(%rdi),%xmm2
2469	movups	16(%rdi),%xmm3
2470	movups	32(%rdi),%xmm4
2471	xorps	%xmm10,%xmm2
2472	movups	48(%rdi),%xmm5
2473	leaq	64(%rdi),%rdi
2474	xorps	%xmm11,%xmm3
2475	xorps	%xmm12,%xmm4
2476	xorps	%xmm13,%xmm5
2477
2478	call	_aesni_decrypt4
2479
2480	pxor	%xmm10,%xmm2
2481	movdqa	%xmm14,%xmm10
2482	pxor	%xmm11,%xmm3
2483	movdqa	%xmm15,%xmm11
2484	pxor	%xmm12,%xmm4
2485	movdqu	%xmm2,(%rsi)
2486	pxor	%xmm13,%xmm5
2487	movdqu	%xmm3,16(%rsi)
2488	movdqu	%xmm4,32(%rsi)
2489	movdqu	%xmm5,48(%rsi)
2490	leaq	64(%rsi),%rsi
2491	jmp	.Lxts_dec_done
2492
2493.align	16
2494.Lxts_dec_done:
2495	andq	$15,%r9
2496	jz	.Lxts_dec_ret
2497.Lxts_dec_done2:
2498	movq	%r9,%rdx
2499	movq	%rbp,%rcx
2500	movl	%r10d,%eax
2501
2502	movups	(%rdi),%xmm2
2503	xorps	%xmm11,%xmm2
2504	movups	(%rcx),%xmm0
2505	movups	16(%rcx),%xmm1
2506	leaq	32(%rcx),%rcx
2507	xorps	%xmm0,%xmm2
2508.Loop_dec1_13:
2509.byte	102,15,56,222,209
2510	decl	%eax
2511	movups	(%rcx),%xmm1
2512	leaq	16(%rcx),%rcx
2513	jnz	.Loop_dec1_13
2514.byte	102,15,56,223,209
2515	xorps	%xmm11,%xmm2
2516	movups	%xmm2,(%rsi)
2517
2518.Lxts_dec_steal:
2519	movzbl	16(%rdi),%eax
2520	movzbl	(%rsi),%ecx
2521	leaq	1(%rdi),%rdi
2522	movb	%al,(%rsi)
2523	movb	%cl,16(%rsi)
2524	leaq	1(%rsi),%rsi
2525	subq	$1,%rdx
2526	jnz	.Lxts_dec_steal
2527
2528	subq	%r9,%rsi
2529	movq	%rbp,%rcx
2530	movl	%r10d,%eax
2531
2532	movups	(%rsi),%xmm2
2533	xorps	%xmm10,%xmm2
2534	movups	(%rcx),%xmm0
2535	movups	16(%rcx),%xmm1
2536	leaq	32(%rcx),%rcx
2537	xorps	%xmm0,%xmm2
2538.Loop_dec1_14:
2539.byte	102,15,56,222,209
2540	decl	%eax
2541	movups	(%rcx),%xmm1
2542	leaq	16(%rcx),%rcx
2543	jnz	.Loop_dec1_14
2544.byte	102,15,56,223,209
2545	xorps	%xmm10,%xmm2
2546	movups	%xmm2,(%rsi)
2547
2548.Lxts_dec_ret:
2549	xorps	%xmm0,%xmm0
2550	pxor	%xmm1,%xmm1
2551	pxor	%xmm2,%xmm2
2552	pxor	%xmm3,%xmm3
2553	pxor	%xmm4,%xmm4
2554	pxor	%xmm5,%xmm5
2555	pxor	%xmm6,%xmm6
2556	pxor	%xmm7,%xmm7
2557	movaps	%xmm0,0(%rsp)
2558	pxor	%xmm8,%xmm8
2559	movaps	%xmm0,16(%rsp)
2560	pxor	%xmm9,%xmm9
2561	movaps	%xmm0,32(%rsp)
2562	pxor	%xmm10,%xmm10
2563	movaps	%xmm0,48(%rsp)
2564	pxor	%xmm11,%xmm11
2565	movaps	%xmm0,64(%rsp)
2566	pxor	%xmm12,%xmm12
2567	movaps	%xmm0,80(%rsp)
2568	pxor	%xmm13,%xmm13
2569	movaps	%xmm0,96(%rsp)
2570	pxor	%xmm14,%xmm14
2571	pxor	%xmm15,%xmm15
2572	movq	-8(%r11),%rbp
2573.cfi_restore	%rbp
2574	leaq	(%r11),%rsp
2575.cfi_def_cfa_register	%rsp
2576.Lxts_dec_epilogue:
2577	.byte	0xf3,0xc3
2578.cfi_endproc
2579.size	aesni_xts_decrypt,.-aesni_xts_decrypt
2580.globl	aesni_ocb_encrypt
2581.type	aesni_ocb_encrypt,@function
2582.align	32
2583aesni_ocb_encrypt:
2584.cfi_startproc
2585	leaq	(%rsp),%rax
2586	pushq	%rbx
2587.cfi_adjust_cfa_offset	8
2588.cfi_offset	%rbx,-16
2589	pushq	%rbp
2590.cfi_adjust_cfa_offset	8
2591.cfi_offset	%rbp,-24
2592	pushq	%r12
2593.cfi_adjust_cfa_offset	8
2594.cfi_offset	%r12,-32
2595	pushq	%r13
2596.cfi_adjust_cfa_offset	8
2597.cfi_offset	%r13,-40
2598	pushq	%r14
2599.cfi_adjust_cfa_offset	8
2600.cfi_offset	%r14,-48
2601	movq	8(%rax),%rbx
2602	movq	8+8(%rax),%rbp
2603
2604	movl	240(%rcx),%r10d
2605	movq	%rcx,%r11
2606	shll	$4,%r10d
2607	movups	(%rcx),%xmm9
2608	movups	16(%rcx,%r10,1),%xmm1
2609
2610	movdqu	(%r9),%xmm15
2611	pxor	%xmm1,%xmm9
2612	pxor	%xmm1,%xmm15
2613
2614	movl	$16+32,%eax
2615	leaq	32(%r11,%r10,1),%rcx
2616	movups	16(%r11),%xmm1
2617	subq	%r10,%rax
2618	movq	%rax,%r10
2619
2620	movdqu	(%rbx),%xmm10
2621	movdqu	(%rbp),%xmm8
2622
2623	testq	$1,%r8
2624	jnz	.Locb_enc_odd
2625
2626	bsfq	%r8,%r12
2627	addq	$1,%r8
2628	shlq	$4,%r12
2629	movdqu	(%rbx,%r12,1),%xmm7
2630	movdqu	(%rdi),%xmm2
2631	leaq	16(%rdi),%rdi
2632
2633	call	__ocb_encrypt1
2634
2635	movdqa	%xmm7,%xmm15
2636	movups	%xmm2,(%rsi)
2637	leaq	16(%rsi),%rsi
2638	subq	$1,%rdx
2639	jz	.Locb_enc_done
2640
2641.Locb_enc_odd:
2642	leaq	1(%r8),%r12
2643	leaq	3(%r8),%r13
2644	leaq	5(%r8),%r14
2645	leaq	6(%r8),%r8
2646	bsfq	%r12,%r12
2647	bsfq	%r13,%r13
2648	bsfq	%r14,%r14
2649	shlq	$4,%r12
2650	shlq	$4,%r13
2651	shlq	$4,%r14
2652
2653	subq	$6,%rdx
2654	jc	.Locb_enc_short
2655	jmp	.Locb_enc_grandloop
2656
2657.align	32
2658.Locb_enc_grandloop:
2659	movdqu	0(%rdi),%xmm2
2660	movdqu	16(%rdi),%xmm3
2661	movdqu	32(%rdi),%xmm4
2662	movdqu	48(%rdi),%xmm5
2663	movdqu	64(%rdi),%xmm6
2664	movdqu	80(%rdi),%xmm7
2665	leaq	96(%rdi),%rdi
2666
2667	call	__ocb_encrypt6
2668
2669	movups	%xmm2,0(%rsi)
2670	movups	%xmm3,16(%rsi)
2671	movups	%xmm4,32(%rsi)
2672	movups	%xmm5,48(%rsi)
2673	movups	%xmm6,64(%rsi)
2674	movups	%xmm7,80(%rsi)
2675	leaq	96(%rsi),%rsi
2676	subq	$6,%rdx
2677	jnc	.Locb_enc_grandloop
2678
2679.Locb_enc_short:
2680	addq	$6,%rdx
2681	jz	.Locb_enc_done
2682
2683	movdqu	0(%rdi),%xmm2
2684	cmpq	$2,%rdx
2685	jb	.Locb_enc_one
2686	movdqu	16(%rdi),%xmm3
2687	je	.Locb_enc_two
2688
2689	movdqu	32(%rdi),%xmm4
2690	cmpq	$4,%rdx
2691	jb	.Locb_enc_three
2692	movdqu	48(%rdi),%xmm5
2693	je	.Locb_enc_four
2694
2695	movdqu	64(%rdi),%xmm6
2696	pxor	%xmm7,%xmm7
2697
2698	call	__ocb_encrypt6
2699
2700	movdqa	%xmm14,%xmm15
2701	movups	%xmm2,0(%rsi)
2702	movups	%xmm3,16(%rsi)
2703	movups	%xmm4,32(%rsi)
2704	movups	%xmm5,48(%rsi)
2705	movups	%xmm6,64(%rsi)
2706
2707	jmp	.Locb_enc_done
2708
2709.align	16
2710.Locb_enc_one:
2711	movdqa	%xmm10,%xmm7
2712
2713	call	__ocb_encrypt1
2714
2715	movdqa	%xmm7,%xmm15
2716	movups	%xmm2,0(%rsi)
2717	jmp	.Locb_enc_done
2718
2719.align	16
2720.Locb_enc_two:
2721	pxor	%xmm4,%xmm4
2722	pxor	%xmm5,%xmm5
2723
2724	call	__ocb_encrypt4
2725
2726	movdqa	%xmm11,%xmm15
2727	movups	%xmm2,0(%rsi)
2728	movups	%xmm3,16(%rsi)
2729
2730	jmp	.Locb_enc_done
2731
2732.align	16
2733.Locb_enc_three:
2734	pxor	%xmm5,%xmm5
2735
2736	call	__ocb_encrypt4
2737
2738	movdqa	%xmm12,%xmm15
2739	movups	%xmm2,0(%rsi)
2740	movups	%xmm3,16(%rsi)
2741	movups	%xmm4,32(%rsi)
2742
2743	jmp	.Locb_enc_done
2744
2745.align	16
2746.Locb_enc_four:
2747	call	__ocb_encrypt4
2748
2749	movdqa	%xmm13,%xmm15
2750	movups	%xmm2,0(%rsi)
2751	movups	%xmm3,16(%rsi)
2752	movups	%xmm4,32(%rsi)
2753	movups	%xmm5,48(%rsi)
2754
2755.Locb_enc_done:
2756	pxor	%xmm0,%xmm15
2757	movdqu	%xmm8,(%rbp)
2758	movdqu	%xmm15,(%r9)
2759
2760	xorps	%xmm0,%xmm0
2761	pxor	%xmm1,%xmm1
2762	pxor	%xmm2,%xmm2
2763	pxor	%xmm3,%xmm3
2764	pxor	%xmm4,%xmm4
2765	pxor	%xmm5,%xmm5
2766	pxor	%xmm6,%xmm6
2767	pxor	%xmm7,%xmm7
2768	pxor	%xmm8,%xmm8
2769	pxor	%xmm9,%xmm9
2770	pxor	%xmm10,%xmm10
2771	pxor	%xmm11,%xmm11
2772	pxor	%xmm12,%xmm12
2773	pxor	%xmm13,%xmm13
2774	pxor	%xmm14,%xmm14
2775	pxor	%xmm15,%xmm15
2776	leaq	40(%rsp),%rax
2777.cfi_def_cfa	%rax,8
2778	movq	-40(%rax),%r14
2779.cfi_restore	%r14
2780	movq	-32(%rax),%r13
2781.cfi_restore	%r13
2782	movq	-24(%rax),%r12
2783.cfi_restore	%r12
2784	movq	-16(%rax),%rbp
2785.cfi_restore	%rbp
2786	movq	-8(%rax),%rbx
2787.cfi_restore	%rbx
2788	leaq	(%rax),%rsp
2789.cfi_def_cfa_register	%rsp
2790.Locb_enc_epilogue:
2791	.byte	0xf3,0xc3
2792.cfi_endproc
2793.size	aesni_ocb_encrypt,.-aesni_ocb_encrypt
2794
2795.type	__ocb_encrypt6,@function
2796.align	32
2797__ocb_encrypt6:
2798.cfi_startproc
2799	pxor	%xmm9,%xmm15
2800	movdqu	(%rbx,%r12,1),%xmm11
2801	movdqa	%xmm10,%xmm12
2802	movdqu	(%rbx,%r13,1),%xmm13
2803	movdqa	%xmm10,%xmm14
2804	pxor	%xmm15,%xmm10
2805	movdqu	(%rbx,%r14,1),%xmm15
2806	pxor	%xmm10,%xmm11
2807	pxor	%xmm2,%xmm8
2808	pxor	%xmm10,%xmm2
2809	pxor	%xmm11,%xmm12
2810	pxor	%xmm3,%xmm8
2811	pxor	%xmm11,%xmm3
2812	pxor	%xmm12,%xmm13
2813	pxor	%xmm4,%xmm8
2814	pxor	%xmm12,%xmm4
2815	pxor	%xmm13,%xmm14
2816	pxor	%xmm5,%xmm8
2817	pxor	%xmm13,%xmm5
2818	pxor	%xmm14,%xmm15
2819	pxor	%xmm6,%xmm8
2820	pxor	%xmm14,%xmm6
2821	pxor	%xmm7,%xmm8
2822	pxor	%xmm15,%xmm7
2823	movups	32(%r11),%xmm0
2824
2825	leaq	1(%r8),%r12
2826	leaq	3(%r8),%r13
2827	leaq	5(%r8),%r14
2828	addq	$6,%r8
2829	pxor	%xmm9,%xmm10
2830	bsfq	%r12,%r12
2831	bsfq	%r13,%r13
2832	bsfq	%r14,%r14
2833
2834.byte	102,15,56,220,209
2835.byte	102,15,56,220,217
2836.byte	102,15,56,220,225
2837.byte	102,15,56,220,233
2838	pxor	%xmm9,%xmm11
2839	pxor	%xmm9,%xmm12
2840.byte	102,15,56,220,241
2841	pxor	%xmm9,%xmm13
2842	pxor	%xmm9,%xmm14
2843.byte	102,15,56,220,249
2844	movups	48(%r11),%xmm1
2845	pxor	%xmm9,%xmm15
2846
2847.byte	102,15,56,220,208
2848.byte	102,15,56,220,216
2849.byte	102,15,56,220,224
2850.byte	102,15,56,220,232
2851.byte	102,15,56,220,240
2852.byte	102,15,56,220,248
2853	movups	64(%r11),%xmm0
2854	shlq	$4,%r12
2855	shlq	$4,%r13
2856	jmp	.Locb_enc_loop6
2857
2858.align	32
2859.Locb_enc_loop6:
2860.byte	102,15,56,220,209
2861.byte	102,15,56,220,217
2862.byte	102,15,56,220,225
2863.byte	102,15,56,220,233
2864.byte	102,15,56,220,241
2865.byte	102,15,56,220,249
2866	movups	(%rcx,%rax,1),%xmm1
2867	addq	$32,%rax
2868
2869.byte	102,15,56,220,208
2870.byte	102,15,56,220,216
2871.byte	102,15,56,220,224
2872.byte	102,15,56,220,232
2873.byte	102,15,56,220,240
2874.byte	102,15,56,220,248
2875	movups	-16(%rcx,%rax,1),%xmm0
2876	jnz	.Locb_enc_loop6
2877
2878.byte	102,15,56,220,209
2879.byte	102,15,56,220,217
2880.byte	102,15,56,220,225
2881.byte	102,15,56,220,233
2882.byte	102,15,56,220,241
2883.byte	102,15,56,220,249
2884	movups	16(%r11),%xmm1
2885	shlq	$4,%r14
2886
2887.byte	102,65,15,56,221,210
2888	movdqu	(%rbx),%xmm10
2889	movq	%r10,%rax
2890.byte	102,65,15,56,221,219
2891.byte	102,65,15,56,221,228
2892.byte	102,65,15,56,221,237
2893.byte	102,65,15,56,221,246
2894.byte	102,65,15,56,221,255
2895	.byte	0xf3,0xc3
2896.cfi_endproc
2897.size	__ocb_encrypt6,.-__ocb_encrypt6
2898
2899.type	__ocb_encrypt4,@function
2900.align	32
2901__ocb_encrypt4:
2902.cfi_startproc
2903	pxor	%xmm9,%xmm15
2904	movdqu	(%rbx,%r12,1),%xmm11
2905	movdqa	%xmm10,%xmm12
2906	movdqu	(%rbx,%r13,1),%xmm13
2907	pxor	%xmm15,%xmm10
2908	pxor	%xmm10,%xmm11
2909	pxor	%xmm2,%xmm8
2910	pxor	%xmm10,%xmm2
2911	pxor	%xmm11,%xmm12
2912	pxor	%xmm3,%xmm8
2913	pxor	%xmm11,%xmm3
2914	pxor	%xmm12,%xmm13
2915	pxor	%xmm4,%xmm8
2916	pxor	%xmm12,%xmm4
2917	pxor	%xmm5,%xmm8
2918	pxor	%xmm13,%xmm5
2919	movups	32(%r11),%xmm0
2920
2921	pxor	%xmm9,%xmm10
2922	pxor	%xmm9,%xmm11
2923	pxor	%xmm9,%xmm12
2924	pxor	%xmm9,%xmm13
2925
2926.byte	102,15,56,220,209
2927.byte	102,15,56,220,217
2928.byte	102,15,56,220,225
2929.byte	102,15,56,220,233
2930	movups	48(%r11),%xmm1
2931
2932.byte	102,15,56,220,208
2933.byte	102,15,56,220,216
2934.byte	102,15,56,220,224
2935.byte	102,15,56,220,232
2936	movups	64(%r11),%xmm0
2937	jmp	.Locb_enc_loop4
2938
2939.align	32
2940.Locb_enc_loop4:
2941.byte	102,15,56,220,209
2942.byte	102,15,56,220,217
2943.byte	102,15,56,220,225
2944.byte	102,15,56,220,233
2945	movups	(%rcx,%rax,1),%xmm1
2946	addq	$32,%rax
2947
2948.byte	102,15,56,220,208
2949.byte	102,15,56,220,216
2950.byte	102,15,56,220,224
2951.byte	102,15,56,220,232
2952	movups	-16(%rcx,%rax,1),%xmm0
2953	jnz	.Locb_enc_loop4
2954
2955.byte	102,15,56,220,209
2956.byte	102,15,56,220,217
2957.byte	102,15,56,220,225
2958.byte	102,15,56,220,233
2959	movups	16(%r11),%xmm1
2960	movq	%r10,%rax
2961
2962.byte	102,65,15,56,221,210
2963.byte	102,65,15,56,221,219
2964.byte	102,65,15,56,221,228
2965.byte	102,65,15,56,221,237
2966	.byte	0xf3,0xc3
2967.cfi_endproc
2968.size	__ocb_encrypt4,.-__ocb_encrypt4
2969
2970.type	__ocb_encrypt1,@function
2971.align	32
2972__ocb_encrypt1:
2973.cfi_startproc
2974	pxor	%xmm15,%xmm7
2975	pxor	%xmm9,%xmm7
2976	pxor	%xmm2,%xmm8
2977	pxor	%xmm7,%xmm2
2978	movups	32(%r11),%xmm0
2979
2980.byte	102,15,56,220,209
2981	movups	48(%r11),%xmm1
2982	pxor	%xmm9,%xmm7
2983
2984.byte	102,15,56,220,208
2985	movups	64(%r11),%xmm0
2986	jmp	.Locb_enc_loop1
2987
2988.align	32
2989.Locb_enc_loop1:
2990.byte	102,15,56,220,209
2991	movups	(%rcx,%rax,1),%xmm1
2992	addq	$32,%rax
2993
2994.byte	102,15,56,220,208
2995	movups	-16(%rcx,%rax,1),%xmm0
2996	jnz	.Locb_enc_loop1
2997
2998.byte	102,15,56,220,209
2999	movups	16(%r11),%xmm1
3000	movq	%r10,%rax
3001
3002.byte	102,15,56,221,215
3003	.byte	0xf3,0xc3
3004.cfi_endproc
3005.size	__ocb_encrypt1,.-__ocb_encrypt1
3006
3007.globl	aesni_ocb_decrypt
3008.type	aesni_ocb_decrypt,@function
3009.align	32
3010aesni_ocb_decrypt:
3011.cfi_startproc
3012	leaq	(%rsp),%rax
3013	pushq	%rbx
3014.cfi_adjust_cfa_offset	8
3015.cfi_offset	%rbx,-16
3016	pushq	%rbp
3017.cfi_adjust_cfa_offset	8
3018.cfi_offset	%rbp,-24
3019	pushq	%r12
3020.cfi_adjust_cfa_offset	8
3021.cfi_offset	%r12,-32
3022	pushq	%r13
3023.cfi_adjust_cfa_offset	8
3024.cfi_offset	%r13,-40
3025	pushq	%r14
3026.cfi_adjust_cfa_offset	8
3027.cfi_offset	%r14,-48
3028	movq	8(%rax),%rbx
3029	movq	8+8(%rax),%rbp
3030
3031	movl	240(%rcx),%r10d
3032	movq	%rcx,%r11
3033	shll	$4,%r10d
3034	movups	(%rcx),%xmm9
3035	movups	16(%rcx,%r10,1),%xmm1
3036
3037	movdqu	(%r9),%xmm15
3038	pxor	%xmm1,%xmm9
3039	pxor	%xmm1,%xmm15
3040
3041	movl	$16+32,%eax
3042	leaq	32(%r11,%r10,1),%rcx
3043	movups	16(%r11),%xmm1
3044	subq	%r10,%rax
3045	movq	%rax,%r10
3046
3047	movdqu	(%rbx),%xmm10
3048	movdqu	(%rbp),%xmm8
3049
3050	testq	$1,%r8
3051	jnz	.Locb_dec_odd
3052
3053	bsfq	%r8,%r12
3054	addq	$1,%r8
3055	shlq	$4,%r12
3056	movdqu	(%rbx,%r12,1),%xmm7
3057	movdqu	(%rdi),%xmm2
3058	leaq	16(%rdi),%rdi
3059
3060	call	__ocb_decrypt1
3061
3062	movdqa	%xmm7,%xmm15
3063	movups	%xmm2,(%rsi)
3064	xorps	%xmm2,%xmm8
3065	leaq	16(%rsi),%rsi
3066	subq	$1,%rdx
3067	jz	.Locb_dec_done
3068
3069.Locb_dec_odd:
3070	leaq	1(%r8),%r12
3071	leaq	3(%r8),%r13
3072	leaq	5(%r8),%r14
3073	leaq	6(%r8),%r8
3074	bsfq	%r12,%r12
3075	bsfq	%r13,%r13
3076	bsfq	%r14,%r14
3077	shlq	$4,%r12
3078	shlq	$4,%r13
3079	shlq	$4,%r14
3080
3081	subq	$6,%rdx
3082	jc	.Locb_dec_short
3083	jmp	.Locb_dec_grandloop
3084
3085.align	32
3086.Locb_dec_grandloop:
3087	movdqu	0(%rdi),%xmm2
3088	movdqu	16(%rdi),%xmm3
3089	movdqu	32(%rdi),%xmm4
3090	movdqu	48(%rdi),%xmm5
3091	movdqu	64(%rdi),%xmm6
3092	movdqu	80(%rdi),%xmm7
3093	leaq	96(%rdi),%rdi
3094
3095	call	__ocb_decrypt6
3096
3097	movups	%xmm2,0(%rsi)
3098	pxor	%xmm2,%xmm8
3099	movups	%xmm3,16(%rsi)
3100	pxor	%xmm3,%xmm8
3101	movups	%xmm4,32(%rsi)
3102	pxor	%xmm4,%xmm8
3103	movups	%xmm5,48(%rsi)
3104	pxor	%xmm5,%xmm8
3105	movups	%xmm6,64(%rsi)
3106	pxor	%xmm6,%xmm8
3107	movups	%xmm7,80(%rsi)
3108	pxor	%xmm7,%xmm8
3109	leaq	96(%rsi),%rsi
3110	subq	$6,%rdx
3111	jnc	.Locb_dec_grandloop
3112
3113.Locb_dec_short:
3114	addq	$6,%rdx
3115	jz	.Locb_dec_done
3116
3117	movdqu	0(%rdi),%xmm2
3118	cmpq	$2,%rdx
3119	jb	.Locb_dec_one
3120	movdqu	16(%rdi),%xmm3
3121	je	.Locb_dec_two
3122
3123	movdqu	32(%rdi),%xmm4
3124	cmpq	$4,%rdx
3125	jb	.Locb_dec_three
3126	movdqu	48(%rdi),%xmm5
3127	je	.Locb_dec_four
3128
3129	movdqu	64(%rdi),%xmm6
3130	pxor	%xmm7,%xmm7
3131
3132	call	__ocb_decrypt6
3133
3134	movdqa	%xmm14,%xmm15
3135	movups	%xmm2,0(%rsi)
3136	pxor	%xmm2,%xmm8
3137	movups	%xmm3,16(%rsi)
3138	pxor	%xmm3,%xmm8
3139	movups	%xmm4,32(%rsi)
3140	pxor	%xmm4,%xmm8
3141	movups	%xmm5,48(%rsi)
3142	pxor	%xmm5,%xmm8
3143	movups	%xmm6,64(%rsi)
3144	pxor	%xmm6,%xmm8
3145
3146	jmp	.Locb_dec_done
3147
3148.align	16
3149.Locb_dec_one:
3150	movdqa	%xmm10,%xmm7
3151
3152	call	__ocb_decrypt1
3153
3154	movdqa	%xmm7,%xmm15
3155	movups	%xmm2,0(%rsi)
3156	xorps	%xmm2,%xmm8
3157	jmp	.Locb_dec_done
3158
3159.align	16
3160.Locb_dec_two:
3161	pxor	%xmm4,%xmm4
3162	pxor	%xmm5,%xmm5
3163
3164	call	__ocb_decrypt4
3165
3166	movdqa	%xmm11,%xmm15
3167	movups	%xmm2,0(%rsi)
3168	xorps	%xmm2,%xmm8
3169	movups	%xmm3,16(%rsi)
3170	xorps	%xmm3,%xmm8
3171
3172	jmp	.Locb_dec_done
3173
3174.align	16
3175.Locb_dec_three:
3176	pxor	%xmm5,%xmm5
3177
3178	call	__ocb_decrypt4
3179
3180	movdqa	%xmm12,%xmm15
3181	movups	%xmm2,0(%rsi)
3182	xorps	%xmm2,%xmm8
3183	movups	%xmm3,16(%rsi)
3184	xorps	%xmm3,%xmm8
3185	movups	%xmm4,32(%rsi)
3186	xorps	%xmm4,%xmm8
3187
3188	jmp	.Locb_dec_done
3189
3190.align	16
3191.Locb_dec_four:
3192	call	__ocb_decrypt4
3193
3194	movdqa	%xmm13,%xmm15
3195	movups	%xmm2,0(%rsi)
3196	pxor	%xmm2,%xmm8
3197	movups	%xmm3,16(%rsi)
3198	pxor	%xmm3,%xmm8
3199	movups	%xmm4,32(%rsi)
3200	pxor	%xmm4,%xmm8
3201	movups	%xmm5,48(%rsi)
3202	pxor	%xmm5,%xmm8
3203
3204.Locb_dec_done:
3205	pxor	%xmm0,%xmm15
3206	movdqu	%xmm8,(%rbp)
3207	movdqu	%xmm15,(%r9)
3208
3209	xorps	%xmm0,%xmm0
3210	pxor	%xmm1,%xmm1
3211	pxor	%xmm2,%xmm2
3212	pxor	%xmm3,%xmm3
3213	pxor	%xmm4,%xmm4
3214	pxor	%xmm5,%xmm5
3215	pxor	%xmm6,%xmm6
3216	pxor	%xmm7,%xmm7
3217	pxor	%xmm8,%xmm8
3218	pxor	%xmm9,%xmm9
3219	pxor	%xmm10,%xmm10
3220	pxor	%xmm11,%xmm11
3221	pxor	%xmm12,%xmm12
3222	pxor	%xmm13,%xmm13
3223	pxor	%xmm14,%xmm14
3224	pxor	%xmm15,%xmm15
3225	leaq	40(%rsp),%rax
3226.cfi_def_cfa	%rax,8
3227	movq	-40(%rax),%r14
3228.cfi_restore	%r14
3229	movq	-32(%rax),%r13
3230.cfi_restore	%r13
3231	movq	-24(%rax),%r12
3232.cfi_restore	%r12
3233	movq	-16(%rax),%rbp
3234.cfi_restore	%rbp
3235	movq	-8(%rax),%rbx
3236.cfi_restore	%rbx
3237	leaq	(%rax),%rsp
3238.cfi_def_cfa_register	%rsp
3239.Locb_dec_epilogue:
3240	.byte	0xf3,0xc3
3241.cfi_endproc
3242.size	aesni_ocb_decrypt,.-aesni_ocb_decrypt
3243
3244.type	__ocb_decrypt6,@function
3245.align	32
3246__ocb_decrypt6:
3247.cfi_startproc
3248	pxor	%xmm9,%xmm15
3249	movdqu	(%rbx,%r12,1),%xmm11
3250	movdqa	%xmm10,%xmm12
3251	movdqu	(%rbx,%r13,1),%xmm13
3252	movdqa	%xmm10,%xmm14
3253	pxor	%xmm15,%xmm10
3254	movdqu	(%rbx,%r14,1),%xmm15
3255	pxor	%xmm10,%xmm11
3256	pxor	%xmm10,%xmm2
3257	pxor	%xmm11,%xmm12
3258	pxor	%xmm11,%xmm3
3259	pxor	%xmm12,%xmm13
3260	pxor	%xmm12,%xmm4
3261	pxor	%xmm13,%xmm14
3262	pxor	%xmm13,%xmm5
3263	pxor	%xmm14,%xmm15
3264	pxor	%xmm14,%xmm6
3265	pxor	%xmm15,%xmm7
3266	movups	32(%r11),%xmm0
3267
3268	leaq	1(%r8),%r12
3269	leaq	3(%r8),%r13
3270	leaq	5(%r8),%r14
3271	addq	$6,%r8
3272	pxor	%xmm9,%xmm10
3273	bsfq	%r12,%r12
3274	bsfq	%r13,%r13
3275	bsfq	%r14,%r14
3276
3277.byte	102,15,56,222,209
3278.byte	102,15,56,222,217
3279.byte	102,15,56,222,225
3280.byte	102,15,56,222,233
3281	pxor	%xmm9,%xmm11
3282	pxor	%xmm9,%xmm12
3283.byte	102,15,56,222,241
3284	pxor	%xmm9,%xmm13
3285	pxor	%xmm9,%xmm14
3286.byte	102,15,56,222,249
3287	movups	48(%r11),%xmm1
3288	pxor	%xmm9,%xmm15
3289
3290.byte	102,15,56,222,208
3291.byte	102,15,56,222,216
3292.byte	102,15,56,222,224
3293.byte	102,15,56,222,232
3294.byte	102,15,56,222,240
3295.byte	102,15,56,222,248
3296	movups	64(%r11),%xmm0
3297	shlq	$4,%r12
3298	shlq	$4,%r13
3299	jmp	.Locb_dec_loop6
3300
3301.align	32
3302.Locb_dec_loop6:
3303.byte	102,15,56,222,209
3304.byte	102,15,56,222,217
3305.byte	102,15,56,222,225
3306.byte	102,15,56,222,233
3307.byte	102,15,56,222,241
3308.byte	102,15,56,222,249
3309	movups	(%rcx,%rax,1),%xmm1
3310	addq	$32,%rax
3311
3312.byte	102,15,56,222,208
3313.byte	102,15,56,222,216
3314.byte	102,15,56,222,224
3315.byte	102,15,56,222,232
3316.byte	102,15,56,222,240
3317.byte	102,15,56,222,248
3318	movups	-16(%rcx,%rax,1),%xmm0
3319	jnz	.Locb_dec_loop6
3320
3321.byte	102,15,56,222,209
3322.byte	102,15,56,222,217
3323.byte	102,15,56,222,225
3324.byte	102,15,56,222,233
3325.byte	102,15,56,222,241
3326.byte	102,15,56,222,249
3327	movups	16(%r11),%xmm1
3328	shlq	$4,%r14
3329
3330.byte	102,65,15,56,223,210
3331	movdqu	(%rbx),%xmm10
3332	movq	%r10,%rax
3333.byte	102,65,15,56,223,219
3334.byte	102,65,15,56,223,228
3335.byte	102,65,15,56,223,237
3336.byte	102,65,15,56,223,246
3337.byte	102,65,15,56,223,255
3338	.byte	0xf3,0xc3
3339.cfi_endproc
3340.size	__ocb_decrypt6,.-__ocb_decrypt6
3341
3342.type	__ocb_decrypt4,@function
3343.align	32
3344__ocb_decrypt4:
3345.cfi_startproc
3346	pxor	%xmm9,%xmm15
3347	movdqu	(%rbx,%r12,1),%xmm11
3348	movdqa	%xmm10,%xmm12
3349	movdqu	(%rbx,%r13,1),%xmm13
3350	pxor	%xmm15,%xmm10
3351	pxor	%xmm10,%xmm11
3352	pxor	%xmm10,%xmm2
3353	pxor	%xmm11,%xmm12
3354	pxor	%xmm11,%xmm3
3355	pxor	%xmm12,%xmm13
3356	pxor	%xmm12,%xmm4
3357	pxor	%xmm13,%xmm5
3358	movups	32(%r11),%xmm0
3359
3360	pxor	%xmm9,%xmm10
3361	pxor	%xmm9,%xmm11
3362	pxor	%xmm9,%xmm12
3363	pxor	%xmm9,%xmm13
3364
3365.byte	102,15,56,222,209
3366.byte	102,15,56,222,217
3367.byte	102,15,56,222,225
3368.byte	102,15,56,222,233
3369	movups	48(%r11),%xmm1
3370
3371.byte	102,15,56,222,208
3372.byte	102,15,56,222,216
3373.byte	102,15,56,222,224
3374.byte	102,15,56,222,232
3375	movups	64(%r11),%xmm0
3376	jmp	.Locb_dec_loop4
3377
3378.align	32
3379.Locb_dec_loop4:
3380.byte	102,15,56,222,209
3381.byte	102,15,56,222,217
3382.byte	102,15,56,222,225
3383.byte	102,15,56,222,233
3384	movups	(%rcx,%rax,1),%xmm1
3385	addq	$32,%rax
3386
3387.byte	102,15,56,222,208
3388.byte	102,15,56,222,216
3389.byte	102,15,56,222,224
3390.byte	102,15,56,222,232
3391	movups	-16(%rcx,%rax,1),%xmm0
3392	jnz	.Locb_dec_loop4
3393
3394.byte	102,15,56,222,209
3395.byte	102,15,56,222,217
3396.byte	102,15,56,222,225
3397.byte	102,15,56,222,233
3398	movups	16(%r11),%xmm1
3399	movq	%r10,%rax
3400
3401.byte	102,65,15,56,223,210
3402.byte	102,65,15,56,223,219
3403.byte	102,65,15,56,223,228
3404.byte	102,65,15,56,223,237
3405	.byte	0xf3,0xc3
3406.cfi_endproc
3407.size	__ocb_decrypt4,.-__ocb_decrypt4
3408
3409.type	__ocb_decrypt1,@function
3410.align	32
3411__ocb_decrypt1:
3412.cfi_startproc
3413	pxor	%xmm15,%xmm7
3414	pxor	%xmm9,%xmm7
3415	pxor	%xmm7,%xmm2
3416	movups	32(%r11),%xmm0
3417
3418.byte	102,15,56,222,209
3419	movups	48(%r11),%xmm1
3420	pxor	%xmm9,%xmm7
3421
3422.byte	102,15,56,222,208
3423	movups	64(%r11),%xmm0
3424	jmp	.Locb_dec_loop1
3425
3426.align	32
3427.Locb_dec_loop1:
3428.byte	102,15,56,222,209
3429	movups	(%rcx,%rax,1),%xmm1
3430	addq	$32,%rax
3431
3432.byte	102,15,56,222,208
3433	movups	-16(%rcx,%rax,1),%xmm0
3434	jnz	.Locb_dec_loop1
3435
3436.byte	102,15,56,222,209
3437	movups	16(%r11),%xmm1
3438	movq	%r10,%rax
3439
3440.byte	102,15,56,223,215
3441	.byte	0xf3,0xc3
3442.cfi_endproc
3443.size	__ocb_decrypt1,.-__ocb_decrypt1
3444.globl	aesni_cbc_encrypt
3445.type	aesni_cbc_encrypt,@function
3446.align	16
3447aesni_cbc_encrypt:
3448.cfi_startproc
3449	testq	%rdx,%rdx
3450	jz	.Lcbc_ret
3451
3452	movl	240(%rcx),%r10d
3453	movq	%rcx,%r11
3454	testl	%r9d,%r9d
3455	jz	.Lcbc_decrypt
3456
3457	movups	(%r8),%xmm2
3458	movl	%r10d,%eax
3459	cmpq	$16,%rdx
3460	jb	.Lcbc_enc_tail
3461	subq	$16,%rdx
3462	jmp	.Lcbc_enc_loop
3463.align	16
3464.Lcbc_enc_loop:
3465	movups	(%rdi),%xmm3
3466	leaq	16(%rdi),%rdi
3467
3468	movups	(%rcx),%xmm0
3469	movups	16(%rcx),%xmm1
3470	xorps	%xmm0,%xmm3
3471	leaq	32(%rcx),%rcx
3472	xorps	%xmm3,%xmm2
3473.Loop_enc1_15:
3474.byte	102,15,56,220,209
3475	decl	%eax
3476	movups	(%rcx),%xmm1
3477	leaq	16(%rcx),%rcx
3478	jnz	.Loop_enc1_15
3479.byte	102,15,56,221,209
3480	movl	%r10d,%eax
3481	movq	%r11,%rcx
3482	movups	%xmm2,0(%rsi)
3483	leaq	16(%rsi),%rsi
3484	subq	$16,%rdx
3485	jnc	.Lcbc_enc_loop
3486	addq	$16,%rdx
3487	jnz	.Lcbc_enc_tail
3488	pxor	%xmm0,%xmm0
3489	pxor	%xmm1,%xmm1
3490	movups	%xmm2,(%r8)
3491	pxor	%xmm2,%xmm2
3492	pxor	%xmm3,%xmm3
3493	jmp	.Lcbc_ret
3494
3495.Lcbc_enc_tail:
3496	movq	%rdx,%rcx
3497	xchgq	%rdi,%rsi
3498.long	0x9066A4F3
3499	movl	$16,%ecx
3500	subq	%rdx,%rcx
3501	xorl	%eax,%eax
3502.long	0x9066AAF3
3503	leaq	-16(%rdi),%rdi
3504	movl	%r10d,%eax
3505	movq	%rdi,%rsi
3506	movq	%r11,%rcx
3507	xorq	%rdx,%rdx
3508	jmp	.Lcbc_enc_loop
3509
3510.align	16
3511.Lcbc_decrypt:
3512	cmpq	$16,%rdx
3513	jne	.Lcbc_decrypt_bulk
3514
3515
3516
3517	movdqu	(%rdi),%xmm2
3518	movdqu	(%r8),%xmm3
3519	movdqa	%xmm2,%xmm4
3520	movups	(%rcx),%xmm0
3521	movups	16(%rcx),%xmm1
3522	leaq	32(%rcx),%rcx
3523	xorps	%xmm0,%xmm2
3524.Loop_dec1_16:
3525.byte	102,15,56,222,209
3526	decl	%r10d
3527	movups	(%rcx),%xmm1
3528	leaq	16(%rcx),%rcx
3529	jnz	.Loop_dec1_16
3530.byte	102,15,56,223,209
3531	pxor	%xmm0,%xmm0
3532	pxor	%xmm1,%xmm1
3533	movdqu	%xmm4,(%r8)
3534	xorps	%xmm3,%xmm2
3535	pxor	%xmm3,%xmm3
3536	movups	%xmm2,(%rsi)
3537	pxor	%xmm2,%xmm2
3538	jmp	.Lcbc_ret
3539.align	16
3540.Lcbc_decrypt_bulk:
3541	leaq	(%rsp),%r11
3542.cfi_def_cfa_register	%r11
3543	pushq	%rbp
3544.cfi_offset	%rbp,-16
3545	subq	$16,%rsp
3546	andq	$-16,%rsp
3547	movq	%rcx,%rbp
3548	movups	(%r8),%xmm10
3549	movl	%r10d,%eax
3550	cmpq	$0x50,%rdx
3551	jbe	.Lcbc_dec_tail
3552
3553	movups	(%rcx),%xmm0
3554	movdqu	0(%rdi),%xmm2
3555	movdqu	16(%rdi),%xmm3
3556	movdqa	%xmm2,%xmm11
3557	movdqu	32(%rdi),%xmm4
3558	movdqa	%xmm3,%xmm12
3559	movdqu	48(%rdi),%xmm5
3560	movdqa	%xmm4,%xmm13
3561	movdqu	64(%rdi),%xmm6
3562	movdqa	%xmm5,%xmm14
3563	movdqu	80(%rdi),%xmm7
3564	movdqa	%xmm6,%xmm15
3565	movl	OPENSSL_ia32cap_P+4(%rip),%r9d
3566	cmpq	$0x70,%rdx
3567	jbe	.Lcbc_dec_six_or_seven
3568
3569	andl	$71303168,%r9d
3570	subq	$0x50,%rdx
3571	cmpl	$4194304,%r9d
3572	je	.Lcbc_dec_loop6_enter
3573	subq	$0x20,%rdx
3574	leaq	112(%rcx),%rcx
3575	jmp	.Lcbc_dec_loop8_enter
3576.align	16
3577.Lcbc_dec_loop8:
3578	movups	%xmm9,(%rsi)
3579	leaq	16(%rsi),%rsi
3580.Lcbc_dec_loop8_enter:
3581	movdqu	96(%rdi),%xmm8
3582	pxor	%xmm0,%xmm2
3583	movdqu	112(%rdi),%xmm9
3584	pxor	%xmm0,%xmm3
3585	movups	16-112(%rcx),%xmm1
3586	pxor	%xmm0,%xmm4
3587	movq	$-1,%rbp
3588	cmpq	$0x70,%rdx
3589	pxor	%xmm0,%xmm5
3590	pxor	%xmm0,%xmm6
3591	pxor	%xmm0,%xmm7
3592	pxor	%xmm0,%xmm8
3593
3594.byte	102,15,56,222,209
3595	pxor	%xmm0,%xmm9
3596	movups	32-112(%rcx),%xmm0
3597.byte	102,15,56,222,217
3598.byte	102,15,56,222,225
3599.byte	102,15,56,222,233
3600.byte	102,15,56,222,241
3601.byte	102,15,56,222,249
3602.byte	102,68,15,56,222,193
3603	adcq	$0,%rbp
3604	andq	$128,%rbp
3605.byte	102,68,15,56,222,201
3606	addq	%rdi,%rbp
3607	movups	48-112(%rcx),%xmm1
3608.byte	102,15,56,222,208
3609.byte	102,15,56,222,216
3610.byte	102,15,56,222,224
3611.byte	102,15,56,222,232
3612.byte	102,15,56,222,240
3613.byte	102,15,56,222,248
3614.byte	102,68,15,56,222,192
3615.byte	102,68,15,56,222,200
3616	movups	64-112(%rcx),%xmm0
3617	nop
3618.byte	102,15,56,222,209
3619.byte	102,15,56,222,217
3620.byte	102,15,56,222,225
3621.byte	102,15,56,222,233
3622.byte	102,15,56,222,241
3623.byte	102,15,56,222,249
3624.byte	102,68,15,56,222,193
3625.byte	102,68,15,56,222,201
3626	movups	80-112(%rcx),%xmm1
3627	nop
3628.byte	102,15,56,222,208
3629.byte	102,15,56,222,216
3630.byte	102,15,56,222,224
3631.byte	102,15,56,222,232
3632.byte	102,15,56,222,240
3633.byte	102,15,56,222,248
3634.byte	102,68,15,56,222,192
3635.byte	102,68,15,56,222,200
3636	movups	96-112(%rcx),%xmm0
3637	nop
3638.byte	102,15,56,222,209
3639.byte	102,15,56,222,217
3640.byte	102,15,56,222,225
3641.byte	102,15,56,222,233
3642.byte	102,15,56,222,241
3643.byte	102,15,56,222,249
3644.byte	102,68,15,56,222,193
3645.byte	102,68,15,56,222,201
3646	movups	112-112(%rcx),%xmm1
3647	nop
3648.byte	102,15,56,222,208
3649.byte	102,15,56,222,216
3650.byte	102,15,56,222,224
3651.byte	102,15,56,222,232
3652.byte	102,15,56,222,240
3653.byte	102,15,56,222,248
3654.byte	102,68,15,56,222,192
3655.byte	102,68,15,56,222,200
3656	movups	128-112(%rcx),%xmm0
3657	nop
3658.byte	102,15,56,222,209
3659.byte	102,15,56,222,217
3660.byte	102,15,56,222,225
3661.byte	102,15,56,222,233
3662.byte	102,15,56,222,241
3663.byte	102,15,56,222,249
3664.byte	102,68,15,56,222,193
3665.byte	102,68,15,56,222,201
3666	movups	144-112(%rcx),%xmm1
3667	cmpl	$11,%eax
3668.byte	102,15,56,222,208
3669.byte	102,15,56,222,216
3670.byte	102,15,56,222,224
3671.byte	102,15,56,222,232
3672.byte	102,15,56,222,240
3673.byte	102,15,56,222,248
3674.byte	102,68,15,56,222,192
3675.byte	102,68,15,56,222,200
3676	movups	160-112(%rcx),%xmm0
3677	jb	.Lcbc_dec_done
3678.byte	102,15,56,222,209
3679.byte	102,15,56,222,217
3680.byte	102,15,56,222,225
3681.byte	102,15,56,222,233
3682.byte	102,15,56,222,241
3683.byte	102,15,56,222,249
3684.byte	102,68,15,56,222,193
3685.byte	102,68,15,56,222,201
3686	movups	176-112(%rcx),%xmm1
3687	nop
3688.byte	102,15,56,222,208
3689.byte	102,15,56,222,216
3690.byte	102,15,56,222,224
3691.byte	102,15,56,222,232
3692.byte	102,15,56,222,240
3693.byte	102,15,56,222,248
3694.byte	102,68,15,56,222,192
3695.byte	102,68,15,56,222,200
3696	movups	192-112(%rcx),%xmm0
3697	je	.Lcbc_dec_done
3698.byte	102,15,56,222,209
3699.byte	102,15,56,222,217
3700.byte	102,15,56,222,225
3701.byte	102,15,56,222,233
3702.byte	102,15,56,222,241
3703.byte	102,15,56,222,249
3704.byte	102,68,15,56,222,193
3705.byte	102,68,15,56,222,201
3706	movups	208-112(%rcx),%xmm1
3707	nop
3708.byte	102,15,56,222,208
3709.byte	102,15,56,222,216
3710.byte	102,15,56,222,224
3711.byte	102,15,56,222,232
3712.byte	102,15,56,222,240
3713.byte	102,15,56,222,248
3714.byte	102,68,15,56,222,192
3715.byte	102,68,15,56,222,200
3716	movups	224-112(%rcx),%xmm0
3717	jmp	.Lcbc_dec_done
3718.align	16
3719.Lcbc_dec_done:
3720.byte	102,15,56,222,209
3721.byte	102,15,56,222,217
3722	pxor	%xmm0,%xmm10
3723	pxor	%xmm0,%xmm11
3724.byte	102,15,56,222,225
3725.byte	102,15,56,222,233
3726	pxor	%xmm0,%xmm12
3727	pxor	%xmm0,%xmm13
3728.byte	102,15,56,222,241
3729.byte	102,15,56,222,249
3730	pxor	%xmm0,%xmm14
3731	pxor	%xmm0,%xmm15
3732.byte	102,68,15,56,222,193
3733.byte	102,68,15,56,222,201
3734	movdqu	80(%rdi),%xmm1
3735
3736.byte	102,65,15,56,223,210
3737	movdqu	96(%rdi),%xmm10
3738	pxor	%xmm0,%xmm1
3739.byte	102,65,15,56,223,219
3740	pxor	%xmm0,%xmm10
3741	movdqu	112(%rdi),%xmm0
3742.byte	102,65,15,56,223,228
3743	leaq	128(%rdi),%rdi
3744	movdqu	0(%rbp),%xmm11
3745.byte	102,65,15,56,223,237
3746.byte	102,65,15,56,223,246
3747	movdqu	16(%rbp),%xmm12
3748	movdqu	32(%rbp),%xmm13
3749.byte	102,65,15,56,223,255
3750.byte	102,68,15,56,223,193
3751	movdqu	48(%rbp),%xmm14
3752	movdqu	64(%rbp),%xmm15
3753.byte	102,69,15,56,223,202
3754	movdqa	%xmm0,%xmm10
3755	movdqu	80(%rbp),%xmm1
3756	movups	-112(%rcx),%xmm0
3757
3758	movups	%xmm2,(%rsi)
3759	movdqa	%xmm11,%xmm2
3760	movups	%xmm3,16(%rsi)
3761	movdqa	%xmm12,%xmm3
3762	movups	%xmm4,32(%rsi)
3763	movdqa	%xmm13,%xmm4
3764	movups	%xmm5,48(%rsi)
3765	movdqa	%xmm14,%xmm5
3766	movups	%xmm6,64(%rsi)
3767	movdqa	%xmm15,%xmm6
3768	movups	%xmm7,80(%rsi)
3769	movdqa	%xmm1,%xmm7
3770	movups	%xmm8,96(%rsi)
3771	leaq	112(%rsi),%rsi
3772
3773	subq	$0x80,%rdx
3774	ja	.Lcbc_dec_loop8
3775
3776	movaps	%xmm9,%xmm2
3777	leaq	-112(%rcx),%rcx
3778	addq	$0x70,%rdx
3779	jle	.Lcbc_dec_clear_tail_collected
3780	movups	%xmm9,(%rsi)
3781	leaq	16(%rsi),%rsi
3782	cmpq	$0x50,%rdx
3783	jbe	.Lcbc_dec_tail
3784
3785	movaps	%xmm11,%xmm2
3786.Lcbc_dec_six_or_seven:
3787	cmpq	$0x60,%rdx
3788	ja	.Lcbc_dec_seven
3789
3790	movaps	%xmm7,%xmm8
3791	call	_aesni_decrypt6
3792	pxor	%xmm10,%xmm2
3793	movaps	%xmm8,%xmm10
3794	pxor	%xmm11,%xmm3
3795	movdqu	%xmm2,(%rsi)
3796	pxor	%xmm12,%xmm4
3797	movdqu	%xmm3,16(%rsi)
3798	pxor	%xmm3,%xmm3
3799	pxor	%xmm13,%xmm5
3800	movdqu	%xmm4,32(%rsi)
3801	pxor	%xmm4,%xmm4
3802	pxor	%xmm14,%xmm6
3803	movdqu	%xmm5,48(%rsi)
3804	pxor	%xmm5,%xmm5
3805	pxor	%xmm15,%xmm7
3806	movdqu	%xmm6,64(%rsi)
3807	pxor	%xmm6,%xmm6
3808	leaq	80(%rsi),%rsi
3809	movdqa	%xmm7,%xmm2
3810	pxor	%xmm7,%xmm7
3811	jmp	.Lcbc_dec_tail_collected
3812
3813.align	16
3814.Lcbc_dec_seven:
3815	movups	96(%rdi),%xmm8
3816	xorps	%xmm9,%xmm9
3817	call	_aesni_decrypt8
3818	movups	80(%rdi),%xmm9
3819	pxor	%xmm10,%xmm2
3820	movups	96(%rdi),%xmm10
3821	pxor	%xmm11,%xmm3
3822	movdqu	%xmm2,(%rsi)
3823	pxor	%xmm12,%xmm4
3824	movdqu	%xmm3,16(%rsi)
3825	pxor	%xmm3,%xmm3
3826	pxor	%xmm13,%xmm5
3827	movdqu	%xmm4,32(%rsi)
3828	pxor	%xmm4,%xmm4
3829	pxor	%xmm14,%xmm6
3830	movdqu	%xmm5,48(%rsi)
3831	pxor	%xmm5,%xmm5
3832	pxor	%xmm15,%xmm7
3833	movdqu	%xmm6,64(%rsi)
3834	pxor	%xmm6,%xmm6
3835	pxor	%xmm9,%xmm8
3836	movdqu	%xmm7,80(%rsi)
3837	pxor	%xmm7,%xmm7
3838	leaq	96(%rsi),%rsi
3839	movdqa	%xmm8,%xmm2
3840	pxor	%xmm8,%xmm8
3841	pxor	%xmm9,%xmm9
3842	jmp	.Lcbc_dec_tail_collected
3843
3844.align	16
3845.Lcbc_dec_loop6:
3846	movups	%xmm7,(%rsi)
3847	leaq	16(%rsi),%rsi
3848	movdqu	0(%rdi),%xmm2
3849	movdqu	16(%rdi),%xmm3
3850	movdqa	%xmm2,%xmm11
3851	movdqu	32(%rdi),%xmm4
3852	movdqa	%xmm3,%xmm12
3853	movdqu	48(%rdi),%xmm5
3854	movdqa	%xmm4,%xmm13
3855	movdqu	64(%rdi),%xmm6
3856	movdqa	%xmm5,%xmm14
3857	movdqu	80(%rdi),%xmm7
3858	movdqa	%xmm6,%xmm15
3859.Lcbc_dec_loop6_enter:
3860	leaq	96(%rdi),%rdi
3861	movdqa	%xmm7,%xmm8
3862
3863	call	_aesni_decrypt6
3864
3865	pxor	%xmm10,%xmm2
3866	movdqa	%xmm8,%xmm10
3867	pxor	%xmm11,%xmm3
3868	movdqu	%xmm2,(%rsi)
3869	pxor	%xmm12,%xmm4
3870	movdqu	%xmm3,16(%rsi)
3871	pxor	%xmm13,%xmm5
3872	movdqu	%xmm4,32(%rsi)
3873	pxor	%xmm14,%xmm6
3874	movq	%rbp,%rcx
3875	movdqu	%xmm5,48(%rsi)
3876	pxor	%xmm15,%xmm7
3877	movl	%r10d,%eax
3878	movdqu	%xmm6,64(%rsi)
3879	leaq	80(%rsi),%rsi
3880	subq	$0x60,%rdx
3881	ja	.Lcbc_dec_loop6
3882
3883	movdqa	%xmm7,%xmm2
3884	addq	$0x50,%rdx
3885	jle	.Lcbc_dec_clear_tail_collected
3886	movups	%xmm7,(%rsi)
3887	leaq	16(%rsi),%rsi
3888
3889.Lcbc_dec_tail:
3890	movups	(%rdi),%xmm2
3891	subq	$0x10,%rdx
3892	jbe	.Lcbc_dec_one
3893
3894	movups	16(%rdi),%xmm3
3895	movaps	%xmm2,%xmm11
3896	subq	$0x10,%rdx
3897	jbe	.Lcbc_dec_two
3898
3899	movups	32(%rdi),%xmm4
3900	movaps	%xmm3,%xmm12
3901	subq	$0x10,%rdx
3902	jbe	.Lcbc_dec_three
3903
3904	movups	48(%rdi),%xmm5
3905	movaps	%xmm4,%xmm13
3906	subq	$0x10,%rdx
3907	jbe	.Lcbc_dec_four
3908
3909	movups	64(%rdi),%xmm6
3910	movaps	%xmm5,%xmm14
3911	movaps	%xmm6,%xmm15
3912	xorps	%xmm7,%xmm7
3913	call	_aesni_decrypt6
3914	pxor	%xmm10,%xmm2
3915	movaps	%xmm15,%xmm10
3916	pxor	%xmm11,%xmm3
3917	movdqu	%xmm2,(%rsi)
3918	pxor	%xmm12,%xmm4
3919	movdqu	%xmm3,16(%rsi)
3920	pxor	%xmm3,%xmm3
3921	pxor	%xmm13,%xmm5
3922	movdqu	%xmm4,32(%rsi)
3923	pxor	%xmm4,%xmm4
3924	pxor	%xmm14,%xmm6
3925	movdqu	%xmm5,48(%rsi)
3926	pxor	%xmm5,%xmm5
3927	leaq	64(%rsi),%rsi
3928	movdqa	%xmm6,%xmm2
3929	pxor	%xmm6,%xmm6
3930	pxor	%xmm7,%xmm7
3931	subq	$0x10,%rdx
3932	jmp	.Lcbc_dec_tail_collected
3933
3934.align	16
3935.Lcbc_dec_one:
3936	movaps	%xmm2,%xmm11
3937	movups	(%rcx),%xmm0
3938	movups	16(%rcx),%xmm1
3939	leaq	32(%rcx),%rcx
3940	xorps	%xmm0,%xmm2
3941.Loop_dec1_17:
3942.byte	102,15,56,222,209
3943	decl	%eax
3944	movups	(%rcx),%xmm1
3945	leaq	16(%rcx),%rcx
3946	jnz	.Loop_dec1_17
3947.byte	102,15,56,223,209
3948	xorps	%xmm10,%xmm2
3949	movaps	%xmm11,%xmm10
3950	jmp	.Lcbc_dec_tail_collected
3951.align	16
3952.Lcbc_dec_two:
3953	movaps	%xmm3,%xmm12
3954	call	_aesni_decrypt2
3955	pxor	%xmm10,%xmm2
3956	movaps	%xmm12,%xmm10
3957	pxor	%xmm11,%xmm3
3958	movdqu	%xmm2,(%rsi)
3959	movdqa	%xmm3,%xmm2
3960	pxor	%xmm3,%xmm3
3961	leaq	16(%rsi),%rsi
3962	jmp	.Lcbc_dec_tail_collected
3963.align	16
3964.Lcbc_dec_three:
3965	movaps	%xmm4,%xmm13
3966	call	_aesni_decrypt3
3967	pxor	%xmm10,%xmm2
3968	movaps	%xmm13,%xmm10
3969	pxor	%xmm11,%xmm3
3970	movdqu	%xmm2,(%rsi)
3971	pxor	%xmm12,%xmm4
3972	movdqu	%xmm3,16(%rsi)
3973	pxor	%xmm3,%xmm3
3974	movdqa	%xmm4,%xmm2
3975	pxor	%xmm4,%xmm4
3976	leaq	32(%rsi),%rsi
3977	jmp	.Lcbc_dec_tail_collected
3978.align	16
3979.Lcbc_dec_four:
3980	movaps	%xmm5,%xmm14
3981	call	_aesni_decrypt4
3982	pxor	%xmm10,%xmm2
3983	movaps	%xmm14,%xmm10
3984	pxor	%xmm11,%xmm3
3985	movdqu	%xmm2,(%rsi)
3986	pxor	%xmm12,%xmm4
3987	movdqu	%xmm3,16(%rsi)
3988	pxor	%xmm3,%xmm3
3989	pxor	%xmm13,%xmm5
3990	movdqu	%xmm4,32(%rsi)
3991	pxor	%xmm4,%xmm4
3992	movdqa	%xmm5,%xmm2
3993	pxor	%xmm5,%xmm5
3994	leaq	48(%rsi),%rsi
3995	jmp	.Lcbc_dec_tail_collected
3996
3997.align	16
3998.Lcbc_dec_clear_tail_collected:
3999	pxor	%xmm3,%xmm3
4000	pxor	%xmm4,%xmm4
4001	pxor	%xmm5,%xmm5
4002	pxor	%xmm6,%xmm6
4003	pxor	%xmm7,%xmm7
4004	pxor	%xmm8,%xmm8
4005	pxor	%xmm9,%xmm9
4006.Lcbc_dec_tail_collected:
4007	movups	%xmm10,(%r8)
4008	andq	$15,%rdx
4009	jnz	.Lcbc_dec_tail_partial
4010	movups	%xmm2,(%rsi)
4011	pxor	%xmm2,%xmm2
4012	jmp	.Lcbc_dec_ret
4013.align	16
4014.Lcbc_dec_tail_partial:
4015	movaps	%xmm2,(%rsp)
4016	pxor	%xmm2,%xmm2
4017	movq	$16,%rcx
4018	movq	%rsi,%rdi
4019	subq	%rdx,%rcx
4020	leaq	(%rsp),%rsi
4021.long	0x9066A4F3
4022	movdqa	%xmm2,(%rsp)
4023
4024.Lcbc_dec_ret:
4025	xorps	%xmm0,%xmm0
4026	pxor	%xmm1,%xmm1
4027	movq	-8(%r11),%rbp
4028.cfi_restore	%rbp
4029	leaq	(%r11),%rsp
4030.cfi_def_cfa_register	%rsp
4031.Lcbc_ret:
4032	.byte	0xf3,0xc3
4033.cfi_endproc
4034.size	aesni_cbc_encrypt,.-aesni_cbc_encrypt
4035.globl	aesni_set_decrypt_key
4036.type	aesni_set_decrypt_key,@function
4037.align	16
4038aesni_set_decrypt_key:
4039.cfi_startproc
4040.byte	0x48,0x83,0xEC,0x08
4041.cfi_adjust_cfa_offset	8
4042	call	__aesni_set_encrypt_key
4043	shll	$4,%esi
4044	testl	%eax,%eax
4045	jnz	.Ldec_key_ret
4046	leaq	16(%rdx,%rsi,1),%rdi
4047
4048	movups	(%rdx),%xmm0
4049	movups	(%rdi),%xmm1
4050	movups	%xmm0,(%rdi)
4051	movups	%xmm1,(%rdx)
4052	leaq	16(%rdx),%rdx
4053	leaq	-16(%rdi),%rdi
4054
4055.Ldec_key_inverse:
4056	movups	(%rdx),%xmm0
4057	movups	(%rdi),%xmm1
4058.byte	102,15,56,219,192
4059.byte	102,15,56,219,201
4060	leaq	16(%rdx),%rdx
4061	leaq	-16(%rdi),%rdi
4062	movups	%xmm0,16(%rdi)
4063	movups	%xmm1,-16(%rdx)
4064	cmpq	%rdx,%rdi
4065	ja	.Ldec_key_inverse
4066
4067	movups	(%rdx),%xmm0
4068.byte	102,15,56,219,192
4069	pxor	%xmm1,%xmm1
4070	movups	%xmm0,(%rdi)
4071	pxor	%xmm0,%xmm0
4072.Ldec_key_ret:
4073	addq	$8,%rsp
4074.cfi_adjust_cfa_offset	-8
4075	.byte	0xf3,0xc3
4076.cfi_endproc
4077.LSEH_end_set_decrypt_key:
4078.size	aesni_set_decrypt_key,.-aesni_set_decrypt_key
4079.globl	aesni_set_encrypt_key
4080.type	aesni_set_encrypt_key,@function
4081.align	16
4082aesni_set_encrypt_key:
4083__aesni_set_encrypt_key:
4084.cfi_startproc
4085.byte	0x48,0x83,0xEC,0x08
4086.cfi_adjust_cfa_offset	8
4087	movq	$-1,%rax
4088	testq	%rdi,%rdi
4089	jz	.Lenc_key_ret
4090	testq	%rdx,%rdx
4091	jz	.Lenc_key_ret
4092
4093	movl	$268437504,%r10d
4094	movups	(%rdi),%xmm0
4095	xorps	%xmm4,%xmm4
4096	andl	OPENSSL_ia32cap_P+4(%rip),%r10d
4097	leaq	16(%rdx),%rax
4098	cmpl	$256,%esi
4099	je	.L14rounds
4100	cmpl	$192,%esi
4101	je	.L12rounds
4102	cmpl	$128,%esi
4103	jne	.Lbad_keybits
4104
4105.L10rounds:
4106	movl	$9,%esi
4107	cmpl	$268435456,%r10d
4108	je	.L10rounds_alt
4109
4110	movups	%xmm0,(%rdx)
4111.byte	102,15,58,223,200,1
4112	call	.Lkey_expansion_128_cold
4113.byte	102,15,58,223,200,2
4114	call	.Lkey_expansion_128
4115.byte	102,15,58,223,200,4
4116	call	.Lkey_expansion_128
4117.byte	102,15,58,223,200,8
4118	call	.Lkey_expansion_128
4119.byte	102,15,58,223,200,16
4120	call	.Lkey_expansion_128
4121.byte	102,15,58,223,200,32
4122	call	.Lkey_expansion_128
4123.byte	102,15,58,223,200,64
4124	call	.Lkey_expansion_128
4125.byte	102,15,58,223,200,128
4126	call	.Lkey_expansion_128
4127.byte	102,15,58,223,200,27
4128	call	.Lkey_expansion_128
4129.byte	102,15,58,223,200,54
4130	call	.Lkey_expansion_128
4131	movups	%xmm0,(%rax)
4132	movl	%esi,80(%rax)
4133	xorl	%eax,%eax
4134	jmp	.Lenc_key_ret
4135
4136.align	16
4137.L10rounds_alt:
4138	movdqa	.Lkey_rotate(%rip),%xmm5
4139	movl	$8,%r10d
4140	movdqa	.Lkey_rcon1(%rip),%xmm4
4141	movdqa	%xmm0,%xmm2
4142	movdqu	%xmm0,(%rdx)
4143	jmp	.Loop_key128
4144
4145.align	16
4146.Loop_key128:
4147.byte	102,15,56,0,197
4148.byte	102,15,56,221,196
4149	pslld	$1,%xmm4
4150	leaq	16(%rax),%rax
4151
4152	movdqa	%xmm2,%xmm3
4153	pslldq	$4,%xmm2
4154	pxor	%xmm2,%xmm3
4155	pslldq	$4,%xmm2
4156	pxor	%xmm2,%xmm3
4157	pslldq	$4,%xmm2
4158	pxor	%xmm3,%xmm2
4159
4160	pxor	%xmm2,%xmm0
4161	movdqu	%xmm0,-16(%rax)
4162	movdqa	%xmm0,%xmm2
4163
4164	decl	%r10d
4165	jnz	.Loop_key128
4166
4167	movdqa	.Lkey_rcon1b(%rip),%xmm4
4168
4169.byte	102,15,56,0,197
4170.byte	102,15,56,221,196
4171	pslld	$1,%xmm4
4172
4173	movdqa	%xmm2,%xmm3
4174	pslldq	$4,%xmm2
4175	pxor	%xmm2,%xmm3
4176	pslldq	$4,%xmm2
4177	pxor	%xmm2,%xmm3
4178	pslldq	$4,%xmm2
4179	pxor	%xmm3,%xmm2
4180
4181	pxor	%xmm2,%xmm0
4182	movdqu	%xmm0,(%rax)
4183
4184	movdqa	%xmm0,%xmm2
4185.byte	102,15,56,0,197
4186.byte	102,15,56,221,196
4187
4188	movdqa	%xmm2,%xmm3
4189	pslldq	$4,%xmm2
4190	pxor	%xmm2,%xmm3
4191	pslldq	$4,%xmm2
4192	pxor	%xmm2,%xmm3
4193	pslldq	$4,%xmm2
4194	pxor	%xmm3,%xmm2
4195
4196	pxor	%xmm2,%xmm0
4197	movdqu	%xmm0,16(%rax)
4198
4199	movl	%esi,96(%rax)
4200	xorl	%eax,%eax
4201	jmp	.Lenc_key_ret
4202
4203.align	16
4204.L12rounds:
4205	movq	16(%rdi),%xmm2
4206	movl	$11,%esi
4207	cmpl	$268435456,%r10d
4208	je	.L12rounds_alt
4209
4210	movups	%xmm0,(%rdx)
4211.byte	102,15,58,223,202,1
4212	call	.Lkey_expansion_192a_cold
4213.byte	102,15,58,223,202,2
4214	call	.Lkey_expansion_192b
4215.byte	102,15,58,223,202,4
4216	call	.Lkey_expansion_192a
4217.byte	102,15,58,223,202,8
4218	call	.Lkey_expansion_192b
4219.byte	102,15,58,223,202,16
4220	call	.Lkey_expansion_192a
4221.byte	102,15,58,223,202,32
4222	call	.Lkey_expansion_192b
4223.byte	102,15,58,223,202,64
4224	call	.Lkey_expansion_192a
4225.byte	102,15,58,223,202,128
4226	call	.Lkey_expansion_192b
4227	movups	%xmm0,(%rax)
4228	movl	%esi,48(%rax)
4229	xorq	%rax,%rax
4230	jmp	.Lenc_key_ret
4231
4232.align	16
4233.L12rounds_alt:
4234	movdqa	.Lkey_rotate192(%rip),%xmm5
4235	movdqa	.Lkey_rcon1(%rip),%xmm4
4236	movl	$8,%r10d
4237	movdqu	%xmm0,(%rdx)
4238	jmp	.Loop_key192
4239
4240.align	16
4241.Loop_key192:
4242	movq	%xmm2,0(%rax)
4243	movdqa	%xmm2,%xmm1
4244.byte	102,15,56,0,213
4245.byte	102,15,56,221,212
4246	pslld	$1,%xmm4
4247	leaq	24(%rax),%rax
4248
4249	movdqa	%xmm0,%xmm3
4250	pslldq	$4,%xmm0
4251	pxor	%xmm0,%xmm3
4252	pslldq	$4,%xmm0
4253	pxor	%xmm0,%xmm3
4254	pslldq	$4,%xmm0
4255	pxor	%xmm3,%xmm0
4256
4257	pshufd	$0xff,%xmm0,%xmm3
4258	pxor	%xmm1,%xmm3
4259	pslldq	$4,%xmm1
4260	pxor	%xmm1,%xmm3
4261
4262	pxor	%xmm2,%xmm0
4263	pxor	%xmm3,%xmm2
4264	movdqu	%xmm0,-16(%rax)
4265
4266	decl	%r10d
4267	jnz	.Loop_key192
4268
4269	movl	%esi,32(%rax)
4270	xorl	%eax,%eax
4271	jmp	.Lenc_key_ret
4272
4273.align	16
4274.L14rounds:
4275	movups	16(%rdi),%xmm2
4276	movl	$13,%esi
4277	leaq	16(%rax),%rax
4278	cmpl	$268435456,%r10d
4279	je	.L14rounds_alt
4280
4281	movups	%xmm0,(%rdx)
4282	movups	%xmm2,16(%rdx)
4283.byte	102,15,58,223,202,1
4284	call	.Lkey_expansion_256a_cold
4285.byte	102,15,58,223,200,1
4286	call	.Lkey_expansion_256b
4287.byte	102,15,58,223,202,2
4288	call	.Lkey_expansion_256a
4289.byte	102,15,58,223,200,2
4290	call	.Lkey_expansion_256b
4291.byte	102,15,58,223,202,4
4292	call	.Lkey_expansion_256a
4293.byte	102,15,58,223,200,4
4294	call	.Lkey_expansion_256b
4295.byte	102,15,58,223,202,8
4296	call	.Lkey_expansion_256a
4297.byte	102,15,58,223,200,8
4298	call	.Lkey_expansion_256b
4299.byte	102,15,58,223,202,16
4300	call	.Lkey_expansion_256a
4301.byte	102,15,58,223,200,16
4302	call	.Lkey_expansion_256b
4303.byte	102,15,58,223,202,32
4304	call	.Lkey_expansion_256a
4305.byte	102,15,58,223,200,32
4306	call	.Lkey_expansion_256b
4307.byte	102,15,58,223,202,64
4308	call	.Lkey_expansion_256a
4309	movups	%xmm0,(%rax)
4310	movl	%esi,16(%rax)
4311	xorq	%rax,%rax
4312	jmp	.Lenc_key_ret
4313
4314.align	16
4315.L14rounds_alt:
4316	movdqa	.Lkey_rotate(%rip),%xmm5
4317	movdqa	.Lkey_rcon1(%rip),%xmm4
4318	movl	$7,%r10d
4319	movdqu	%xmm0,0(%rdx)
4320	movdqa	%xmm2,%xmm1
4321	movdqu	%xmm2,16(%rdx)
4322	jmp	.Loop_key256
4323
4324.align	16
4325.Loop_key256:
4326.byte	102,15,56,0,213
4327.byte	102,15,56,221,212
4328
4329	movdqa	%xmm0,%xmm3
4330	pslldq	$4,%xmm0
4331	pxor	%xmm0,%xmm3
4332	pslldq	$4,%xmm0
4333	pxor	%xmm0,%xmm3
4334	pslldq	$4,%xmm0
4335	pxor	%xmm3,%xmm0
4336	pslld	$1,%xmm4
4337
4338	pxor	%xmm2,%xmm0
4339	movdqu	%xmm0,(%rax)
4340
4341	decl	%r10d
4342	jz	.Ldone_key256
4343
4344	pshufd	$0xff,%xmm0,%xmm2
4345	pxor	%xmm3,%xmm3
4346.byte	102,15,56,221,211
4347
4348	movdqa	%xmm1,%xmm3
4349	pslldq	$4,%xmm1
4350	pxor	%xmm1,%xmm3
4351	pslldq	$4,%xmm1
4352	pxor	%xmm1,%xmm3
4353	pslldq	$4,%xmm1
4354	pxor	%xmm3,%xmm1
4355
4356	pxor	%xmm1,%xmm2
4357	movdqu	%xmm2,16(%rax)
4358	leaq	32(%rax),%rax
4359	movdqa	%xmm2,%xmm1
4360
4361	jmp	.Loop_key256
4362
4363.Ldone_key256:
4364	movl	%esi,16(%rax)
4365	xorl	%eax,%eax
4366	jmp	.Lenc_key_ret
4367
4368.align	16
4369.Lbad_keybits:
4370	movq	$-2,%rax
4371.Lenc_key_ret:
4372	pxor	%xmm0,%xmm0
4373	pxor	%xmm1,%xmm1
4374	pxor	%xmm2,%xmm2
4375	pxor	%xmm3,%xmm3
4376	pxor	%xmm4,%xmm4
4377	pxor	%xmm5,%xmm5
4378	addq	$8,%rsp
4379.cfi_adjust_cfa_offset	-8
4380	.byte	0xf3,0xc3
4381.LSEH_end_set_encrypt_key:
4382
4383.align	16
4384.Lkey_expansion_128:
4385	movups	%xmm0,(%rax)
4386	leaq	16(%rax),%rax
4387.Lkey_expansion_128_cold:
4388	shufps	$16,%xmm0,%xmm4
4389	xorps	%xmm4,%xmm0
4390	shufps	$140,%xmm0,%xmm4
4391	xorps	%xmm4,%xmm0
4392	shufps	$255,%xmm1,%xmm1
4393	xorps	%xmm1,%xmm0
4394	.byte	0xf3,0xc3
4395
4396.align	16
4397.Lkey_expansion_192a:
4398	movups	%xmm0,(%rax)
4399	leaq	16(%rax),%rax
4400.Lkey_expansion_192a_cold:
4401	movaps	%xmm2,%xmm5
4402.Lkey_expansion_192b_warm:
4403	shufps	$16,%xmm0,%xmm4
4404	movdqa	%xmm2,%xmm3
4405	xorps	%xmm4,%xmm0
4406	shufps	$140,%xmm0,%xmm4
4407	pslldq	$4,%xmm3
4408	xorps	%xmm4,%xmm0
4409	pshufd	$85,%xmm1,%xmm1
4410	pxor	%xmm3,%xmm2
4411	pxor	%xmm1,%xmm0
4412	pshufd	$255,%xmm0,%xmm3
4413	pxor	%xmm3,%xmm2
4414	.byte	0xf3,0xc3
4415
4416.align	16
4417.Lkey_expansion_192b:
4418	movaps	%xmm0,%xmm3
4419	shufps	$68,%xmm0,%xmm5
4420	movups	%xmm5,(%rax)
4421	shufps	$78,%xmm2,%xmm3
4422	movups	%xmm3,16(%rax)
4423	leaq	32(%rax),%rax
4424	jmp	.Lkey_expansion_192b_warm
4425
4426.align	16
4427.Lkey_expansion_256a:
4428	movups	%xmm2,(%rax)
4429	leaq	16(%rax),%rax
4430.Lkey_expansion_256a_cold:
4431	shufps	$16,%xmm0,%xmm4
4432	xorps	%xmm4,%xmm0
4433	shufps	$140,%xmm0,%xmm4
4434	xorps	%xmm4,%xmm0
4435	shufps	$255,%xmm1,%xmm1
4436	xorps	%xmm1,%xmm0
4437	.byte	0xf3,0xc3
4438
4439.align	16
4440.Lkey_expansion_256b:
4441	movups	%xmm0,(%rax)
4442	leaq	16(%rax),%rax
4443
4444	shufps	$16,%xmm2,%xmm4
4445	xorps	%xmm4,%xmm2
4446	shufps	$140,%xmm2,%xmm4
4447	xorps	%xmm4,%xmm2
4448	shufps	$170,%xmm1,%xmm1
4449	xorps	%xmm1,%xmm2
4450	.byte	0xf3,0xc3
4451.cfi_endproc
4452.size	aesni_set_encrypt_key,.-aesni_set_encrypt_key
4453.size	__aesni_set_encrypt_key,.-__aesni_set_encrypt_key
4454.align	64
4455.Lbswap_mask:
4456.byte	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
4457.Lincrement32:
4458.long	6,6,6,0
4459.Lincrement64:
4460.long	1,0,0,0
4461.Lxts_magic:
4462.long	0x87,0,1,0
4463.Lincrement1:
4464.byte	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
4465.Lkey_rotate:
4466.long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d
4467.Lkey_rotate192:
4468.long	0x04070605,0x04070605,0x04070605,0x04070605
4469.Lkey_rcon1:
4470.long	1,1,1,1
4471.Lkey_rcon1b:
4472.long	0x1b,0x1b,0x1b,0x1b
4473
4474.byte	65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
4475.align	64
4476