xref: /freebsd/sys/crypto/openssl/amd64/aesni-x86_64.S (revision f5463265955b829775bbb32e1fd0bc11dafc36ce)
1/* Do not modify. This file is auto-generated from aesni-x86_64.pl. */
2.text
3
4.globl	aesni_encrypt
5.type	aesni_encrypt,@function
6.align	16
7aesni_encrypt:
8.cfi_startproc
9.byte	243,15,30,250
10	movups	(%rdi),%xmm2
11	movl	240(%rdx),%eax
12	movups	(%rdx),%xmm0
13	movups	16(%rdx),%xmm1
14	leaq	32(%rdx),%rdx
15	xorps	%xmm0,%xmm2
16.Loop_enc1_1:
17.byte	102,15,56,220,209
18	decl	%eax
19	movups	(%rdx),%xmm1
20	leaq	16(%rdx),%rdx
21	jnz	.Loop_enc1_1
22.byte	102,15,56,221,209
23	pxor	%xmm0,%xmm0
24	pxor	%xmm1,%xmm1
25	movups	%xmm2,(%rsi)
26	pxor	%xmm2,%xmm2
27	.byte	0xf3,0xc3
28.cfi_endproc
29.size	aesni_encrypt,.-aesni_encrypt
30
31.globl	aesni_decrypt
32.type	aesni_decrypt,@function
33.align	16
34aesni_decrypt:
35.cfi_startproc
36.byte	243,15,30,250
37	movups	(%rdi),%xmm2
38	movl	240(%rdx),%eax
39	movups	(%rdx),%xmm0
40	movups	16(%rdx),%xmm1
41	leaq	32(%rdx),%rdx
42	xorps	%xmm0,%xmm2
43.Loop_dec1_2:
44.byte	102,15,56,222,209
45	decl	%eax
46	movups	(%rdx),%xmm1
47	leaq	16(%rdx),%rdx
48	jnz	.Loop_dec1_2
49.byte	102,15,56,223,209
50	pxor	%xmm0,%xmm0
51	pxor	%xmm1,%xmm1
52	movups	%xmm2,(%rsi)
53	pxor	%xmm2,%xmm2
54	.byte	0xf3,0xc3
55.cfi_endproc
56.size	aesni_decrypt, .-aesni_decrypt
57.type	_aesni_encrypt2,@function
58.align	16
59_aesni_encrypt2:
60.cfi_startproc
61	movups	(%rcx),%xmm0
62	shll	$4,%eax
63	movups	16(%rcx),%xmm1
64	xorps	%xmm0,%xmm2
65	xorps	%xmm0,%xmm3
66	movups	32(%rcx),%xmm0
67	leaq	32(%rcx,%rax,1),%rcx
68	negq	%rax
69	addq	$16,%rax
70
71.Lenc_loop2:
72.byte	102,15,56,220,209
73.byte	102,15,56,220,217
74	movups	(%rcx,%rax,1),%xmm1
75	addq	$32,%rax
76.byte	102,15,56,220,208
77.byte	102,15,56,220,216
78	movups	-16(%rcx,%rax,1),%xmm0
79	jnz	.Lenc_loop2
80
81.byte	102,15,56,220,209
82.byte	102,15,56,220,217
83.byte	102,15,56,221,208
84.byte	102,15,56,221,216
85	.byte	0xf3,0xc3
86.cfi_endproc
87.size	_aesni_encrypt2,.-_aesni_encrypt2
88.type	_aesni_decrypt2,@function
89.align	16
90_aesni_decrypt2:
91.cfi_startproc
92	movups	(%rcx),%xmm0
93	shll	$4,%eax
94	movups	16(%rcx),%xmm1
95	xorps	%xmm0,%xmm2
96	xorps	%xmm0,%xmm3
97	movups	32(%rcx),%xmm0
98	leaq	32(%rcx,%rax,1),%rcx
99	negq	%rax
100	addq	$16,%rax
101
102.Ldec_loop2:
103.byte	102,15,56,222,209
104.byte	102,15,56,222,217
105	movups	(%rcx,%rax,1),%xmm1
106	addq	$32,%rax
107.byte	102,15,56,222,208
108.byte	102,15,56,222,216
109	movups	-16(%rcx,%rax,1),%xmm0
110	jnz	.Ldec_loop2
111
112.byte	102,15,56,222,209
113.byte	102,15,56,222,217
114.byte	102,15,56,223,208
115.byte	102,15,56,223,216
116	.byte	0xf3,0xc3
117.cfi_endproc
118.size	_aesni_decrypt2,.-_aesni_decrypt2
119.type	_aesni_encrypt3,@function
120.align	16
121_aesni_encrypt3:
122.cfi_startproc
123	movups	(%rcx),%xmm0
124	shll	$4,%eax
125	movups	16(%rcx),%xmm1
126	xorps	%xmm0,%xmm2
127	xorps	%xmm0,%xmm3
128	xorps	%xmm0,%xmm4
129	movups	32(%rcx),%xmm0
130	leaq	32(%rcx,%rax,1),%rcx
131	negq	%rax
132	addq	$16,%rax
133
134.Lenc_loop3:
135.byte	102,15,56,220,209
136.byte	102,15,56,220,217
137.byte	102,15,56,220,225
138	movups	(%rcx,%rax,1),%xmm1
139	addq	$32,%rax
140.byte	102,15,56,220,208
141.byte	102,15,56,220,216
142.byte	102,15,56,220,224
143	movups	-16(%rcx,%rax,1),%xmm0
144	jnz	.Lenc_loop3
145
146.byte	102,15,56,220,209
147.byte	102,15,56,220,217
148.byte	102,15,56,220,225
149.byte	102,15,56,221,208
150.byte	102,15,56,221,216
151.byte	102,15,56,221,224
152	.byte	0xf3,0xc3
153.cfi_endproc
154.size	_aesni_encrypt3,.-_aesni_encrypt3
155.type	_aesni_decrypt3,@function
156.align	16
157_aesni_decrypt3:
158.cfi_startproc
159	movups	(%rcx),%xmm0
160	shll	$4,%eax
161	movups	16(%rcx),%xmm1
162	xorps	%xmm0,%xmm2
163	xorps	%xmm0,%xmm3
164	xorps	%xmm0,%xmm4
165	movups	32(%rcx),%xmm0
166	leaq	32(%rcx,%rax,1),%rcx
167	negq	%rax
168	addq	$16,%rax
169
170.Ldec_loop3:
171.byte	102,15,56,222,209
172.byte	102,15,56,222,217
173.byte	102,15,56,222,225
174	movups	(%rcx,%rax,1),%xmm1
175	addq	$32,%rax
176.byte	102,15,56,222,208
177.byte	102,15,56,222,216
178.byte	102,15,56,222,224
179	movups	-16(%rcx,%rax,1),%xmm0
180	jnz	.Ldec_loop3
181
182.byte	102,15,56,222,209
183.byte	102,15,56,222,217
184.byte	102,15,56,222,225
185.byte	102,15,56,223,208
186.byte	102,15,56,223,216
187.byte	102,15,56,223,224
188	.byte	0xf3,0xc3
189.cfi_endproc
190.size	_aesni_decrypt3,.-_aesni_decrypt3
191.type	_aesni_encrypt4,@function
192.align	16
193_aesni_encrypt4:
194.cfi_startproc
195	movups	(%rcx),%xmm0
196	shll	$4,%eax
197	movups	16(%rcx),%xmm1
198	xorps	%xmm0,%xmm2
199	xorps	%xmm0,%xmm3
200	xorps	%xmm0,%xmm4
201	xorps	%xmm0,%xmm5
202	movups	32(%rcx),%xmm0
203	leaq	32(%rcx,%rax,1),%rcx
204	negq	%rax
205.byte	0x0f,0x1f,0x00
206	addq	$16,%rax
207
208.Lenc_loop4:
209.byte	102,15,56,220,209
210.byte	102,15,56,220,217
211.byte	102,15,56,220,225
212.byte	102,15,56,220,233
213	movups	(%rcx,%rax,1),%xmm1
214	addq	$32,%rax
215.byte	102,15,56,220,208
216.byte	102,15,56,220,216
217.byte	102,15,56,220,224
218.byte	102,15,56,220,232
219	movups	-16(%rcx,%rax,1),%xmm0
220	jnz	.Lenc_loop4
221
222.byte	102,15,56,220,209
223.byte	102,15,56,220,217
224.byte	102,15,56,220,225
225.byte	102,15,56,220,233
226.byte	102,15,56,221,208
227.byte	102,15,56,221,216
228.byte	102,15,56,221,224
229.byte	102,15,56,221,232
230	.byte	0xf3,0xc3
231.cfi_endproc
232.size	_aesni_encrypt4,.-_aesni_encrypt4
233.type	_aesni_decrypt4,@function
234.align	16
235_aesni_decrypt4:
236.cfi_startproc
237	movups	(%rcx),%xmm0
238	shll	$4,%eax
239	movups	16(%rcx),%xmm1
240	xorps	%xmm0,%xmm2
241	xorps	%xmm0,%xmm3
242	xorps	%xmm0,%xmm4
243	xorps	%xmm0,%xmm5
244	movups	32(%rcx),%xmm0
245	leaq	32(%rcx,%rax,1),%rcx
246	negq	%rax
247.byte	0x0f,0x1f,0x00
248	addq	$16,%rax
249
250.Ldec_loop4:
251.byte	102,15,56,222,209
252.byte	102,15,56,222,217
253.byte	102,15,56,222,225
254.byte	102,15,56,222,233
255	movups	(%rcx,%rax,1),%xmm1
256	addq	$32,%rax
257.byte	102,15,56,222,208
258.byte	102,15,56,222,216
259.byte	102,15,56,222,224
260.byte	102,15,56,222,232
261	movups	-16(%rcx,%rax,1),%xmm0
262	jnz	.Ldec_loop4
263
264.byte	102,15,56,222,209
265.byte	102,15,56,222,217
266.byte	102,15,56,222,225
267.byte	102,15,56,222,233
268.byte	102,15,56,223,208
269.byte	102,15,56,223,216
270.byte	102,15,56,223,224
271.byte	102,15,56,223,232
272	.byte	0xf3,0xc3
273.cfi_endproc
274.size	_aesni_decrypt4,.-_aesni_decrypt4
275.type	_aesni_encrypt6,@function
276.align	16
277_aesni_encrypt6:
278.cfi_startproc
279	movups	(%rcx),%xmm0
280	shll	$4,%eax
281	movups	16(%rcx),%xmm1
282	xorps	%xmm0,%xmm2
283	pxor	%xmm0,%xmm3
284	pxor	%xmm0,%xmm4
285.byte	102,15,56,220,209
286	leaq	32(%rcx,%rax,1),%rcx
287	negq	%rax
288.byte	102,15,56,220,217
289	pxor	%xmm0,%xmm5
290	pxor	%xmm0,%xmm6
291.byte	102,15,56,220,225
292	pxor	%xmm0,%xmm7
293	movups	(%rcx,%rax,1),%xmm0
294	addq	$16,%rax
295	jmp	.Lenc_loop6_enter
296.align	16
297.Lenc_loop6:
298.byte	102,15,56,220,209
299.byte	102,15,56,220,217
300.byte	102,15,56,220,225
301.Lenc_loop6_enter:
302.byte	102,15,56,220,233
303.byte	102,15,56,220,241
304.byte	102,15,56,220,249
305	movups	(%rcx,%rax,1),%xmm1
306	addq	$32,%rax
307.byte	102,15,56,220,208
308.byte	102,15,56,220,216
309.byte	102,15,56,220,224
310.byte	102,15,56,220,232
311.byte	102,15,56,220,240
312.byte	102,15,56,220,248
313	movups	-16(%rcx,%rax,1),%xmm0
314	jnz	.Lenc_loop6
315
316.byte	102,15,56,220,209
317.byte	102,15,56,220,217
318.byte	102,15,56,220,225
319.byte	102,15,56,220,233
320.byte	102,15,56,220,241
321.byte	102,15,56,220,249
322.byte	102,15,56,221,208
323.byte	102,15,56,221,216
324.byte	102,15,56,221,224
325.byte	102,15,56,221,232
326.byte	102,15,56,221,240
327.byte	102,15,56,221,248
328	.byte	0xf3,0xc3
329.cfi_endproc
330.size	_aesni_encrypt6,.-_aesni_encrypt6
331.type	_aesni_decrypt6,@function
332.align	16
333_aesni_decrypt6:
334.cfi_startproc
335	movups	(%rcx),%xmm0
336	shll	$4,%eax
337	movups	16(%rcx),%xmm1
338	xorps	%xmm0,%xmm2
339	pxor	%xmm0,%xmm3
340	pxor	%xmm0,%xmm4
341.byte	102,15,56,222,209
342	leaq	32(%rcx,%rax,1),%rcx
343	negq	%rax
344.byte	102,15,56,222,217
345	pxor	%xmm0,%xmm5
346	pxor	%xmm0,%xmm6
347.byte	102,15,56,222,225
348	pxor	%xmm0,%xmm7
349	movups	(%rcx,%rax,1),%xmm0
350	addq	$16,%rax
351	jmp	.Ldec_loop6_enter
352.align	16
353.Ldec_loop6:
354.byte	102,15,56,222,209
355.byte	102,15,56,222,217
356.byte	102,15,56,222,225
357.Ldec_loop6_enter:
358.byte	102,15,56,222,233
359.byte	102,15,56,222,241
360.byte	102,15,56,222,249
361	movups	(%rcx,%rax,1),%xmm1
362	addq	$32,%rax
363.byte	102,15,56,222,208
364.byte	102,15,56,222,216
365.byte	102,15,56,222,224
366.byte	102,15,56,222,232
367.byte	102,15,56,222,240
368.byte	102,15,56,222,248
369	movups	-16(%rcx,%rax,1),%xmm0
370	jnz	.Ldec_loop6
371
372.byte	102,15,56,222,209
373.byte	102,15,56,222,217
374.byte	102,15,56,222,225
375.byte	102,15,56,222,233
376.byte	102,15,56,222,241
377.byte	102,15,56,222,249
378.byte	102,15,56,223,208
379.byte	102,15,56,223,216
380.byte	102,15,56,223,224
381.byte	102,15,56,223,232
382.byte	102,15,56,223,240
383.byte	102,15,56,223,248
384	.byte	0xf3,0xc3
385.cfi_endproc
386.size	_aesni_decrypt6,.-_aesni_decrypt6
387.type	_aesni_encrypt8,@function
388.align	16
389_aesni_encrypt8:
390.cfi_startproc
391	movups	(%rcx),%xmm0
392	shll	$4,%eax
393	movups	16(%rcx),%xmm1
394	xorps	%xmm0,%xmm2
395	xorps	%xmm0,%xmm3
396	pxor	%xmm0,%xmm4
397	pxor	%xmm0,%xmm5
398	pxor	%xmm0,%xmm6
399	leaq	32(%rcx,%rax,1),%rcx
400	negq	%rax
401.byte	102,15,56,220,209
402	pxor	%xmm0,%xmm7
403	pxor	%xmm0,%xmm8
404.byte	102,15,56,220,217
405	pxor	%xmm0,%xmm9
406	movups	(%rcx,%rax,1),%xmm0
407	addq	$16,%rax
408	jmp	.Lenc_loop8_inner
409.align	16
410.Lenc_loop8:
411.byte	102,15,56,220,209
412.byte	102,15,56,220,217
413.Lenc_loop8_inner:
414.byte	102,15,56,220,225
415.byte	102,15,56,220,233
416.byte	102,15,56,220,241
417.byte	102,15,56,220,249
418.byte	102,68,15,56,220,193
419.byte	102,68,15,56,220,201
420.Lenc_loop8_enter:
421	movups	(%rcx,%rax,1),%xmm1
422	addq	$32,%rax
423.byte	102,15,56,220,208
424.byte	102,15,56,220,216
425.byte	102,15,56,220,224
426.byte	102,15,56,220,232
427.byte	102,15,56,220,240
428.byte	102,15,56,220,248
429.byte	102,68,15,56,220,192
430.byte	102,68,15,56,220,200
431	movups	-16(%rcx,%rax,1),%xmm0
432	jnz	.Lenc_loop8
433
434.byte	102,15,56,220,209
435.byte	102,15,56,220,217
436.byte	102,15,56,220,225
437.byte	102,15,56,220,233
438.byte	102,15,56,220,241
439.byte	102,15,56,220,249
440.byte	102,68,15,56,220,193
441.byte	102,68,15,56,220,201
442.byte	102,15,56,221,208
443.byte	102,15,56,221,216
444.byte	102,15,56,221,224
445.byte	102,15,56,221,232
446.byte	102,15,56,221,240
447.byte	102,15,56,221,248
448.byte	102,68,15,56,221,192
449.byte	102,68,15,56,221,200
450	.byte	0xf3,0xc3
451.cfi_endproc
452.size	_aesni_encrypt8,.-_aesni_encrypt8
453.type	_aesni_decrypt8,@function
454.align	16
455_aesni_decrypt8:
456.cfi_startproc
457	movups	(%rcx),%xmm0
458	shll	$4,%eax
459	movups	16(%rcx),%xmm1
460	xorps	%xmm0,%xmm2
461	xorps	%xmm0,%xmm3
462	pxor	%xmm0,%xmm4
463	pxor	%xmm0,%xmm5
464	pxor	%xmm0,%xmm6
465	leaq	32(%rcx,%rax,1),%rcx
466	negq	%rax
467.byte	102,15,56,222,209
468	pxor	%xmm0,%xmm7
469	pxor	%xmm0,%xmm8
470.byte	102,15,56,222,217
471	pxor	%xmm0,%xmm9
472	movups	(%rcx,%rax,1),%xmm0
473	addq	$16,%rax
474	jmp	.Ldec_loop8_inner
475.align	16
476.Ldec_loop8:
477.byte	102,15,56,222,209
478.byte	102,15,56,222,217
479.Ldec_loop8_inner:
480.byte	102,15,56,222,225
481.byte	102,15,56,222,233
482.byte	102,15,56,222,241
483.byte	102,15,56,222,249
484.byte	102,68,15,56,222,193
485.byte	102,68,15,56,222,201
486.Ldec_loop8_enter:
487	movups	(%rcx,%rax,1),%xmm1
488	addq	$32,%rax
489.byte	102,15,56,222,208
490.byte	102,15,56,222,216
491.byte	102,15,56,222,224
492.byte	102,15,56,222,232
493.byte	102,15,56,222,240
494.byte	102,15,56,222,248
495.byte	102,68,15,56,222,192
496.byte	102,68,15,56,222,200
497	movups	-16(%rcx,%rax,1),%xmm0
498	jnz	.Ldec_loop8
499
500.byte	102,15,56,222,209
501.byte	102,15,56,222,217
502.byte	102,15,56,222,225
503.byte	102,15,56,222,233
504.byte	102,15,56,222,241
505.byte	102,15,56,222,249
506.byte	102,68,15,56,222,193
507.byte	102,68,15,56,222,201
508.byte	102,15,56,223,208
509.byte	102,15,56,223,216
510.byte	102,15,56,223,224
511.byte	102,15,56,223,232
512.byte	102,15,56,223,240
513.byte	102,15,56,223,248
514.byte	102,68,15,56,223,192
515.byte	102,68,15,56,223,200
516	.byte	0xf3,0xc3
517.cfi_endproc
518.size	_aesni_decrypt8,.-_aesni_decrypt8
519.globl	aesni_ecb_encrypt
520.type	aesni_ecb_encrypt,@function
521.align	16
522aesni_ecb_encrypt:
523.cfi_startproc
524.byte	243,15,30,250
525	andq	$-16,%rdx
526	jz	.Lecb_ret
527
528	movl	240(%rcx),%eax
529	movups	(%rcx),%xmm0
530	movq	%rcx,%r11
531	movl	%eax,%r10d
532	testl	%r8d,%r8d
533	jz	.Lecb_decrypt
534
535	cmpq	$0x80,%rdx
536	jb	.Lecb_enc_tail
537
538	movdqu	(%rdi),%xmm2
539	movdqu	16(%rdi),%xmm3
540	movdqu	32(%rdi),%xmm4
541	movdqu	48(%rdi),%xmm5
542	movdqu	64(%rdi),%xmm6
543	movdqu	80(%rdi),%xmm7
544	movdqu	96(%rdi),%xmm8
545	movdqu	112(%rdi),%xmm9
546	leaq	128(%rdi),%rdi
547	subq	$0x80,%rdx
548	jmp	.Lecb_enc_loop8_enter
549.align	16
550.Lecb_enc_loop8:
551	movups	%xmm2,(%rsi)
552	movq	%r11,%rcx
553	movdqu	(%rdi),%xmm2
554	movl	%r10d,%eax
555	movups	%xmm3,16(%rsi)
556	movdqu	16(%rdi),%xmm3
557	movups	%xmm4,32(%rsi)
558	movdqu	32(%rdi),%xmm4
559	movups	%xmm5,48(%rsi)
560	movdqu	48(%rdi),%xmm5
561	movups	%xmm6,64(%rsi)
562	movdqu	64(%rdi),%xmm6
563	movups	%xmm7,80(%rsi)
564	movdqu	80(%rdi),%xmm7
565	movups	%xmm8,96(%rsi)
566	movdqu	96(%rdi),%xmm8
567	movups	%xmm9,112(%rsi)
568	leaq	128(%rsi),%rsi
569	movdqu	112(%rdi),%xmm9
570	leaq	128(%rdi),%rdi
571.Lecb_enc_loop8_enter:
572
573	call	_aesni_encrypt8
574
575	subq	$0x80,%rdx
576	jnc	.Lecb_enc_loop8
577
578	movups	%xmm2,(%rsi)
579	movq	%r11,%rcx
580	movups	%xmm3,16(%rsi)
581	movl	%r10d,%eax
582	movups	%xmm4,32(%rsi)
583	movups	%xmm5,48(%rsi)
584	movups	%xmm6,64(%rsi)
585	movups	%xmm7,80(%rsi)
586	movups	%xmm8,96(%rsi)
587	movups	%xmm9,112(%rsi)
588	leaq	128(%rsi),%rsi
589	addq	$0x80,%rdx
590	jz	.Lecb_ret
591
592.Lecb_enc_tail:
593	movups	(%rdi),%xmm2
594	cmpq	$0x20,%rdx
595	jb	.Lecb_enc_one
596	movups	16(%rdi),%xmm3
597	je	.Lecb_enc_two
598	movups	32(%rdi),%xmm4
599	cmpq	$0x40,%rdx
600	jb	.Lecb_enc_three
601	movups	48(%rdi),%xmm5
602	je	.Lecb_enc_four
603	movups	64(%rdi),%xmm6
604	cmpq	$0x60,%rdx
605	jb	.Lecb_enc_five
606	movups	80(%rdi),%xmm7
607	je	.Lecb_enc_six
608	movdqu	96(%rdi),%xmm8
609	xorps	%xmm9,%xmm9
610	call	_aesni_encrypt8
611	movups	%xmm2,(%rsi)
612	movups	%xmm3,16(%rsi)
613	movups	%xmm4,32(%rsi)
614	movups	%xmm5,48(%rsi)
615	movups	%xmm6,64(%rsi)
616	movups	%xmm7,80(%rsi)
617	movups	%xmm8,96(%rsi)
618	jmp	.Lecb_ret
619.align	16
620.Lecb_enc_one:
621	movups	(%rcx),%xmm0
622	movups	16(%rcx),%xmm1
623	leaq	32(%rcx),%rcx
624	xorps	%xmm0,%xmm2
625.Loop_enc1_3:
626.byte	102,15,56,220,209
627	decl	%eax
628	movups	(%rcx),%xmm1
629	leaq	16(%rcx),%rcx
630	jnz	.Loop_enc1_3
631.byte	102,15,56,221,209
632	movups	%xmm2,(%rsi)
633	jmp	.Lecb_ret
634.align	16
635.Lecb_enc_two:
636	call	_aesni_encrypt2
637	movups	%xmm2,(%rsi)
638	movups	%xmm3,16(%rsi)
639	jmp	.Lecb_ret
640.align	16
641.Lecb_enc_three:
642	call	_aesni_encrypt3
643	movups	%xmm2,(%rsi)
644	movups	%xmm3,16(%rsi)
645	movups	%xmm4,32(%rsi)
646	jmp	.Lecb_ret
647.align	16
648.Lecb_enc_four:
649	call	_aesni_encrypt4
650	movups	%xmm2,(%rsi)
651	movups	%xmm3,16(%rsi)
652	movups	%xmm4,32(%rsi)
653	movups	%xmm5,48(%rsi)
654	jmp	.Lecb_ret
655.align	16
656.Lecb_enc_five:
657	xorps	%xmm7,%xmm7
658	call	_aesni_encrypt6
659	movups	%xmm2,(%rsi)
660	movups	%xmm3,16(%rsi)
661	movups	%xmm4,32(%rsi)
662	movups	%xmm5,48(%rsi)
663	movups	%xmm6,64(%rsi)
664	jmp	.Lecb_ret
665.align	16
666.Lecb_enc_six:
667	call	_aesni_encrypt6
668	movups	%xmm2,(%rsi)
669	movups	%xmm3,16(%rsi)
670	movups	%xmm4,32(%rsi)
671	movups	%xmm5,48(%rsi)
672	movups	%xmm6,64(%rsi)
673	movups	%xmm7,80(%rsi)
674	jmp	.Lecb_ret
675
676.align	16
677.Lecb_decrypt:
678	cmpq	$0x80,%rdx
679	jb	.Lecb_dec_tail
680
681	movdqu	(%rdi),%xmm2
682	movdqu	16(%rdi),%xmm3
683	movdqu	32(%rdi),%xmm4
684	movdqu	48(%rdi),%xmm5
685	movdqu	64(%rdi),%xmm6
686	movdqu	80(%rdi),%xmm7
687	movdqu	96(%rdi),%xmm8
688	movdqu	112(%rdi),%xmm9
689	leaq	128(%rdi),%rdi
690	subq	$0x80,%rdx
691	jmp	.Lecb_dec_loop8_enter
692.align	16
693.Lecb_dec_loop8:
694	movups	%xmm2,(%rsi)
695	movq	%r11,%rcx
696	movdqu	(%rdi),%xmm2
697	movl	%r10d,%eax
698	movups	%xmm3,16(%rsi)
699	movdqu	16(%rdi),%xmm3
700	movups	%xmm4,32(%rsi)
701	movdqu	32(%rdi),%xmm4
702	movups	%xmm5,48(%rsi)
703	movdqu	48(%rdi),%xmm5
704	movups	%xmm6,64(%rsi)
705	movdqu	64(%rdi),%xmm6
706	movups	%xmm7,80(%rsi)
707	movdqu	80(%rdi),%xmm7
708	movups	%xmm8,96(%rsi)
709	movdqu	96(%rdi),%xmm8
710	movups	%xmm9,112(%rsi)
711	leaq	128(%rsi),%rsi
712	movdqu	112(%rdi),%xmm9
713	leaq	128(%rdi),%rdi
714.Lecb_dec_loop8_enter:
715
716	call	_aesni_decrypt8
717
718	movups	(%r11),%xmm0
719	subq	$0x80,%rdx
720	jnc	.Lecb_dec_loop8
721
722	movups	%xmm2,(%rsi)
723	pxor	%xmm2,%xmm2
724	movq	%r11,%rcx
725	movups	%xmm3,16(%rsi)
726	pxor	%xmm3,%xmm3
727	movl	%r10d,%eax
728	movups	%xmm4,32(%rsi)
729	pxor	%xmm4,%xmm4
730	movups	%xmm5,48(%rsi)
731	pxor	%xmm5,%xmm5
732	movups	%xmm6,64(%rsi)
733	pxor	%xmm6,%xmm6
734	movups	%xmm7,80(%rsi)
735	pxor	%xmm7,%xmm7
736	movups	%xmm8,96(%rsi)
737	pxor	%xmm8,%xmm8
738	movups	%xmm9,112(%rsi)
739	pxor	%xmm9,%xmm9
740	leaq	128(%rsi),%rsi
741	addq	$0x80,%rdx
742	jz	.Lecb_ret
743
744.Lecb_dec_tail:
745	movups	(%rdi),%xmm2
746	cmpq	$0x20,%rdx
747	jb	.Lecb_dec_one
748	movups	16(%rdi),%xmm3
749	je	.Lecb_dec_two
750	movups	32(%rdi),%xmm4
751	cmpq	$0x40,%rdx
752	jb	.Lecb_dec_three
753	movups	48(%rdi),%xmm5
754	je	.Lecb_dec_four
755	movups	64(%rdi),%xmm6
756	cmpq	$0x60,%rdx
757	jb	.Lecb_dec_five
758	movups	80(%rdi),%xmm7
759	je	.Lecb_dec_six
760	movups	96(%rdi),%xmm8
761	movups	(%rcx),%xmm0
762	xorps	%xmm9,%xmm9
763	call	_aesni_decrypt8
764	movups	%xmm2,(%rsi)
765	pxor	%xmm2,%xmm2
766	movups	%xmm3,16(%rsi)
767	pxor	%xmm3,%xmm3
768	movups	%xmm4,32(%rsi)
769	pxor	%xmm4,%xmm4
770	movups	%xmm5,48(%rsi)
771	pxor	%xmm5,%xmm5
772	movups	%xmm6,64(%rsi)
773	pxor	%xmm6,%xmm6
774	movups	%xmm7,80(%rsi)
775	pxor	%xmm7,%xmm7
776	movups	%xmm8,96(%rsi)
777	pxor	%xmm8,%xmm8
778	pxor	%xmm9,%xmm9
779	jmp	.Lecb_ret
780.align	16
781.Lecb_dec_one:
782	movups	(%rcx),%xmm0
783	movups	16(%rcx),%xmm1
784	leaq	32(%rcx),%rcx
785	xorps	%xmm0,%xmm2
786.Loop_dec1_4:
787.byte	102,15,56,222,209
788	decl	%eax
789	movups	(%rcx),%xmm1
790	leaq	16(%rcx),%rcx
791	jnz	.Loop_dec1_4
792.byte	102,15,56,223,209
793	movups	%xmm2,(%rsi)
794	pxor	%xmm2,%xmm2
795	jmp	.Lecb_ret
796.align	16
797.Lecb_dec_two:
798	call	_aesni_decrypt2
799	movups	%xmm2,(%rsi)
800	pxor	%xmm2,%xmm2
801	movups	%xmm3,16(%rsi)
802	pxor	%xmm3,%xmm3
803	jmp	.Lecb_ret
804.align	16
805.Lecb_dec_three:
806	call	_aesni_decrypt3
807	movups	%xmm2,(%rsi)
808	pxor	%xmm2,%xmm2
809	movups	%xmm3,16(%rsi)
810	pxor	%xmm3,%xmm3
811	movups	%xmm4,32(%rsi)
812	pxor	%xmm4,%xmm4
813	jmp	.Lecb_ret
814.align	16
815.Lecb_dec_four:
816	call	_aesni_decrypt4
817	movups	%xmm2,(%rsi)
818	pxor	%xmm2,%xmm2
819	movups	%xmm3,16(%rsi)
820	pxor	%xmm3,%xmm3
821	movups	%xmm4,32(%rsi)
822	pxor	%xmm4,%xmm4
823	movups	%xmm5,48(%rsi)
824	pxor	%xmm5,%xmm5
825	jmp	.Lecb_ret
826.align	16
827.Lecb_dec_five:
828	xorps	%xmm7,%xmm7
829	call	_aesni_decrypt6
830	movups	%xmm2,(%rsi)
831	pxor	%xmm2,%xmm2
832	movups	%xmm3,16(%rsi)
833	pxor	%xmm3,%xmm3
834	movups	%xmm4,32(%rsi)
835	pxor	%xmm4,%xmm4
836	movups	%xmm5,48(%rsi)
837	pxor	%xmm5,%xmm5
838	movups	%xmm6,64(%rsi)
839	pxor	%xmm6,%xmm6
840	pxor	%xmm7,%xmm7
841	jmp	.Lecb_ret
842.align	16
843.Lecb_dec_six:
844	call	_aesni_decrypt6
845	movups	%xmm2,(%rsi)
846	pxor	%xmm2,%xmm2
847	movups	%xmm3,16(%rsi)
848	pxor	%xmm3,%xmm3
849	movups	%xmm4,32(%rsi)
850	pxor	%xmm4,%xmm4
851	movups	%xmm5,48(%rsi)
852	pxor	%xmm5,%xmm5
853	movups	%xmm6,64(%rsi)
854	pxor	%xmm6,%xmm6
855	movups	%xmm7,80(%rsi)
856	pxor	%xmm7,%xmm7
857
858.Lecb_ret:
859	xorps	%xmm0,%xmm0
860	pxor	%xmm1,%xmm1
861	.byte	0xf3,0xc3
862.cfi_endproc
863.size	aesni_ecb_encrypt,.-aesni_ecb_encrypt
864.globl	aesni_ccm64_encrypt_blocks
865.type	aesni_ccm64_encrypt_blocks,@function
866.align	16
867aesni_ccm64_encrypt_blocks:
868.cfi_startproc
869.byte	243,15,30,250
870	movl	240(%rcx),%eax
871	movdqu	(%r8),%xmm6
872	movdqa	.Lincrement64(%rip),%xmm9
873	movdqa	.Lbswap_mask(%rip),%xmm7
874
875	shll	$4,%eax
876	movl	$16,%r10d
877	leaq	0(%rcx),%r11
878	movdqu	(%r9),%xmm3
879	movdqa	%xmm6,%xmm2
880	leaq	32(%rcx,%rax,1),%rcx
881.byte	102,15,56,0,247
882	subq	%rax,%r10
883	jmp	.Lccm64_enc_outer
884.align	16
885.Lccm64_enc_outer:
886	movups	(%r11),%xmm0
887	movq	%r10,%rax
888	movups	(%rdi),%xmm8
889
890	xorps	%xmm0,%xmm2
891	movups	16(%r11),%xmm1
892	xorps	%xmm8,%xmm0
893	xorps	%xmm0,%xmm3
894	movups	32(%r11),%xmm0
895
896.Lccm64_enc2_loop:
897.byte	102,15,56,220,209
898.byte	102,15,56,220,217
899	movups	(%rcx,%rax,1),%xmm1
900	addq	$32,%rax
901.byte	102,15,56,220,208
902.byte	102,15,56,220,216
903	movups	-16(%rcx,%rax,1),%xmm0
904	jnz	.Lccm64_enc2_loop
905.byte	102,15,56,220,209
906.byte	102,15,56,220,217
907	paddq	%xmm9,%xmm6
908	decq	%rdx
909.byte	102,15,56,221,208
910.byte	102,15,56,221,216
911
912	leaq	16(%rdi),%rdi
913	xorps	%xmm2,%xmm8
914	movdqa	%xmm6,%xmm2
915	movups	%xmm8,(%rsi)
916.byte	102,15,56,0,215
917	leaq	16(%rsi),%rsi
918	jnz	.Lccm64_enc_outer
919
920	pxor	%xmm0,%xmm0
921	pxor	%xmm1,%xmm1
922	pxor	%xmm2,%xmm2
923	movups	%xmm3,(%r9)
924	pxor	%xmm3,%xmm3
925	pxor	%xmm8,%xmm8
926	pxor	%xmm6,%xmm6
927	.byte	0xf3,0xc3
928.cfi_endproc
929.size	aesni_ccm64_encrypt_blocks,.-aesni_ccm64_encrypt_blocks
930.globl	aesni_ccm64_decrypt_blocks
931.type	aesni_ccm64_decrypt_blocks,@function
932.align	16
933aesni_ccm64_decrypt_blocks:
934.cfi_startproc
935.byte	243,15,30,250
936	movl	240(%rcx),%eax
937	movups	(%r8),%xmm6
938	movdqu	(%r9),%xmm3
939	movdqa	.Lincrement64(%rip),%xmm9
940	movdqa	.Lbswap_mask(%rip),%xmm7
941
942	movaps	%xmm6,%xmm2
943	movl	%eax,%r10d
944	movq	%rcx,%r11
945.byte	102,15,56,0,247
946	movups	(%rcx),%xmm0
947	movups	16(%rcx),%xmm1
948	leaq	32(%rcx),%rcx
949	xorps	%xmm0,%xmm2
950.Loop_enc1_5:
951.byte	102,15,56,220,209
952	decl	%eax
953	movups	(%rcx),%xmm1
954	leaq	16(%rcx),%rcx
955	jnz	.Loop_enc1_5
956.byte	102,15,56,221,209
957	shll	$4,%r10d
958	movl	$16,%eax
959	movups	(%rdi),%xmm8
960	paddq	%xmm9,%xmm6
961	leaq	16(%rdi),%rdi
962	subq	%r10,%rax
963	leaq	32(%r11,%r10,1),%rcx
964	movq	%rax,%r10
965	jmp	.Lccm64_dec_outer
966.align	16
967.Lccm64_dec_outer:
968	xorps	%xmm2,%xmm8
969	movdqa	%xmm6,%xmm2
970	movups	%xmm8,(%rsi)
971	leaq	16(%rsi),%rsi
972.byte	102,15,56,0,215
973
974	subq	$1,%rdx
975	jz	.Lccm64_dec_break
976
977	movups	(%r11),%xmm0
978	movq	%r10,%rax
979	movups	16(%r11),%xmm1
980	xorps	%xmm0,%xmm8
981	xorps	%xmm0,%xmm2
982	xorps	%xmm8,%xmm3
983	movups	32(%r11),%xmm0
984	jmp	.Lccm64_dec2_loop
985.align	16
986.Lccm64_dec2_loop:
987.byte	102,15,56,220,209
988.byte	102,15,56,220,217
989	movups	(%rcx,%rax,1),%xmm1
990	addq	$32,%rax
991.byte	102,15,56,220,208
992.byte	102,15,56,220,216
993	movups	-16(%rcx,%rax,1),%xmm0
994	jnz	.Lccm64_dec2_loop
995	movups	(%rdi),%xmm8
996	paddq	%xmm9,%xmm6
997.byte	102,15,56,220,209
998.byte	102,15,56,220,217
999.byte	102,15,56,221,208
1000.byte	102,15,56,221,216
1001	leaq	16(%rdi),%rdi
1002	jmp	.Lccm64_dec_outer
1003
1004.align	16
1005.Lccm64_dec_break:
1006
1007	movl	240(%r11),%eax
1008	movups	(%r11),%xmm0
1009	movups	16(%r11),%xmm1
1010	xorps	%xmm0,%xmm8
1011	leaq	32(%r11),%r11
1012	xorps	%xmm8,%xmm3
1013.Loop_enc1_6:
1014.byte	102,15,56,220,217
1015	decl	%eax
1016	movups	(%r11),%xmm1
1017	leaq	16(%r11),%r11
1018	jnz	.Loop_enc1_6
1019.byte	102,15,56,221,217
1020	pxor	%xmm0,%xmm0
1021	pxor	%xmm1,%xmm1
1022	pxor	%xmm2,%xmm2
1023	movups	%xmm3,(%r9)
1024	pxor	%xmm3,%xmm3
1025	pxor	%xmm8,%xmm8
1026	pxor	%xmm6,%xmm6
1027	.byte	0xf3,0xc3
1028.cfi_endproc
1029.size	aesni_ccm64_decrypt_blocks,.-aesni_ccm64_decrypt_blocks
1030.globl	aesni_ctr32_encrypt_blocks
1031.type	aesni_ctr32_encrypt_blocks,@function
1032.align	16
1033aesni_ctr32_encrypt_blocks:
1034.cfi_startproc
1035.byte	243,15,30,250
1036	cmpq	$1,%rdx
1037	jne	.Lctr32_bulk
1038
1039
1040
1041	movups	(%r8),%xmm2
1042	movups	(%rdi),%xmm3
1043	movl	240(%rcx),%edx
1044	movups	(%rcx),%xmm0
1045	movups	16(%rcx),%xmm1
1046	leaq	32(%rcx),%rcx
1047	xorps	%xmm0,%xmm2
1048.Loop_enc1_7:
1049.byte	102,15,56,220,209
1050	decl	%edx
1051	movups	(%rcx),%xmm1
1052	leaq	16(%rcx),%rcx
1053	jnz	.Loop_enc1_7
1054.byte	102,15,56,221,209
1055	pxor	%xmm0,%xmm0
1056	pxor	%xmm1,%xmm1
1057	xorps	%xmm3,%xmm2
1058	pxor	%xmm3,%xmm3
1059	movups	%xmm2,(%rsi)
1060	xorps	%xmm2,%xmm2
1061	jmp	.Lctr32_epilogue
1062
1063.align	16
1064.Lctr32_bulk:
1065	leaq	(%rsp),%r11
1066.cfi_def_cfa_register	%r11
1067	pushq	%rbp
1068.cfi_offset	%rbp,-16
1069	subq	$128,%rsp
1070	andq	$-16,%rsp
1071
1072
1073
1074
1075	movdqu	(%r8),%xmm2
1076	movdqu	(%rcx),%xmm0
1077	movl	12(%r8),%r8d
1078	pxor	%xmm0,%xmm2
1079	movl	12(%rcx),%ebp
1080	movdqa	%xmm2,0(%rsp)
1081	bswapl	%r8d
1082	movdqa	%xmm2,%xmm3
1083	movdqa	%xmm2,%xmm4
1084	movdqa	%xmm2,%xmm5
1085	movdqa	%xmm2,64(%rsp)
1086	movdqa	%xmm2,80(%rsp)
1087	movdqa	%xmm2,96(%rsp)
1088	movq	%rdx,%r10
1089	movdqa	%xmm2,112(%rsp)
1090
1091	leaq	1(%r8),%rax
1092	leaq	2(%r8),%rdx
1093	bswapl	%eax
1094	bswapl	%edx
1095	xorl	%ebp,%eax
1096	xorl	%ebp,%edx
1097.byte	102,15,58,34,216,3
1098	leaq	3(%r8),%rax
1099	movdqa	%xmm3,16(%rsp)
1100.byte	102,15,58,34,226,3
1101	bswapl	%eax
1102	movq	%r10,%rdx
1103	leaq	4(%r8),%r10
1104	movdqa	%xmm4,32(%rsp)
1105	xorl	%ebp,%eax
1106	bswapl	%r10d
1107.byte	102,15,58,34,232,3
1108	xorl	%ebp,%r10d
1109	movdqa	%xmm5,48(%rsp)
1110	leaq	5(%r8),%r9
1111	movl	%r10d,64+12(%rsp)
1112	bswapl	%r9d
1113	leaq	6(%r8),%r10
1114	movl	240(%rcx),%eax
1115	xorl	%ebp,%r9d
1116	bswapl	%r10d
1117	movl	%r9d,80+12(%rsp)
1118	xorl	%ebp,%r10d
1119	leaq	7(%r8),%r9
1120	movl	%r10d,96+12(%rsp)
1121	bswapl	%r9d
1122	movl	OPENSSL_ia32cap_P+4(%rip),%r10d
1123	xorl	%ebp,%r9d
1124	andl	$71303168,%r10d
1125	movl	%r9d,112+12(%rsp)
1126
1127	movups	16(%rcx),%xmm1
1128
1129	movdqa	64(%rsp),%xmm6
1130	movdqa	80(%rsp),%xmm7
1131
1132	cmpq	$8,%rdx
1133	jb	.Lctr32_tail
1134
1135	subq	$6,%rdx
1136	cmpl	$4194304,%r10d
1137	je	.Lctr32_6x
1138
1139	leaq	128(%rcx),%rcx
1140	subq	$2,%rdx
1141	jmp	.Lctr32_loop8
1142
1143.align	16
1144.Lctr32_6x:
1145	shll	$4,%eax
1146	movl	$48,%r10d
1147	bswapl	%ebp
1148	leaq	32(%rcx,%rax,1),%rcx
1149	subq	%rax,%r10
1150	jmp	.Lctr32_loop6
1151
1152.align	16
1153.Lctr32_loop6:
1154	addl	$6,%r8d
1155	movups	-48(%rcx,%r10,1),%xmm0
1156.byte	102,15,56,220,209
1157	movl	%r8d,%eax
1158	xorl	%ebp,%eax
1159.byte	102,15,56,220,217
1160.byte	0x0f,0x38,0xf1,0x44,0x24,12
1161	leal	1(%r8),%eax
1162.byte	102,15,56,220,225
1163	xorl	%ebp,%eax
1164.byte	0x0f,0x38,0xf1,0x44,0x24,28
1165.byte	102,15,56,220,233
1166	leal	2(%r8),%eax
1167	xorl	%ebp,%eax
1168.byte	102,15,56,220,241
1169.byte	0x0f,0x38,0xf1,0x44,0x24,44
1170	leal	3(%r8),%eax
1171.byte	102,15,56,220,249
1172	movups	-32(%rcx,%r10,1),%xmm1
1173	xorl	%ebp,%eax
1174
1175.byte	102,15,56,220,208
1176.byte	0x0f,0x38,0xf1,0x44,0x24,60
1177	leal	4(%r8),%eax
1178.byte	102,15,56,220,216
1179	xorl	%ebp,%eax
1180.byte	0x0f,0x38,0xf1,0x44,0x24,76
1181.byte	102,15,56,220,224
1182	leal	5(%r8),%eax
1183	xorl	%ebp,%eax
1184.byte	102,15,56,220,232
1185.byte	0x0f,0x38,0xf1,0x44,0x24,92
1186	movq	%r10,%rax
1187.byte	102,15,56,220,240
1188.byte	102,15,56,220,248
1189	movups	-16(%rcx,%r10,1),%xmm0
1190
1191	call	.Lenc_loop6
1192
1193	movdqu	(%rdi),%xmm8
1194	movdqu	16(%rdi),%xmm9
1195	movdqu	32(%rdi),%xmm10
1196	movdqu	48(%rdi),%xmm11
1197	movdqu	64(%rdi),%xmm12
1198	movdqu	80(%rdi),%xmm13
1199	leaq	96(%rdi),%rdi
1200	movups	-64(%rcx,%r10,1),%xmm1
1201	pxor	%xmm2,%xmm8
1202	movaps	0(%rsp),%xmm2
1203	pxor	%xmm3,%xmm9
1204	movaps	16(%rsp),%xmm3
1205	pxor	%xmm4,%xmm10
1206	movaps	32(%rsp),%xmm4
1207	pxor	%xmm5,%xmm11
1208	movaps	48(%rsp),%xmm5
1209	pxor	%xmm6,%xmm12
1210	movaps	64(%rsp),%xmm6
1211	pxor	%xmm7,%xmm13
1212	movaps	80(%rsp),%xmm7
1213	movdqu	%xmm8,(%rsi)
1214	movdqu	%xmm9,16(%rsi)
1215	movdqu	%xmm10,32(%rsi)
1216	movdqu	%xmm11,48(%rsi)
1217	movdqu	%xmm12,64(%rsi)
1218	movdqu	%xmm13,80(%rsi)
1219	leaq	96(%rsi),%rsi
1220
1221	subq	$6,%rdx
1222	jnc	.Lctr32_loop6
1223
1224	addq	$6,%rdx
1225	jz	.Lctr32_done
1226
1227	leal	-48(%r10),%eax
1228	leaq	-80(%rcx,%r10,1),%rcx
1229	negl	%eax
1230	shrl	$4,%eax
1231	jmp	.Lctr32_tail
1232
1233.align	32
1234.Lctr32_loop8:
1235	addl	$8,%r8d
1236	movdqa	96(%rsp),%xmm8
1237.byte	102,15,56,220,209
1238	movl	%r8d,%r9d
1239	movdqa	112(%rsp),%xmm9
1240.byte	102,15,56,220,217
1241	bswapl	%r9d
1242	movups	32-128(%rcx),%xmm0
1243.byte	102,15,56,220,225
1244	xorl	%ebp,%r9d
1245	nop
1246.byte	102,15,56,220,233
1247	movl	%r9d,0+12(%rsp)
1248	leaq	1(%r8),%r9
1249.byte	102,15,56,220,241
1250.byte	102,15,56,220,249
1251.byte	102,68,15,56,220,193
1252.byte	102,68,15,56,220,201
1253	movups	48-128(%rcx),%xmm1
1254	bswapl	%r9d
1255.byte	102,15,56,220,208
1256.byte	102,15,56,220,216
1257	xorl	%ebp,%r9d
1258.byte	0x66,0x90
1259.byte	102,15,56,220,224
1260.byte	102,15,56,220,232
1261	movl	%r9d,16+12(%rsp)
1262	leaq	2(%r8),%r9
1263.byte	102,15,56,220,240
1264.byte	102,15,56,220,248
1265.byte	102,68,15,56,220,192
1266.byte	102,68,15,56,220,200
1267	movups	64-128(%rcx),%xmm0
1268	bswapl	%r9d
1269.byte	102,15,56,220,209
1270.byte	102,15,56,220,217
1271	xorl	%ebp,%r9d
1272.byte	0x66,0x90
1273.byte	102,15,56,220,225
1274.byte	102,15,56,220,233
1275	movl	%r9d,32+12(%rsp)
1276	leaq	3(%r8),%r9
1277.byte	102,15,56,220,241
1278.byte	102,15,56,220,249
1279.byte	102,68,15,56,220,193
1280.byte	102,68,15,56,220,201
1281	movups	80-128(%rcx),%xmm1
1282	bswapl	%r9d
1283.byte	102,15,56,220,208
1284.byte	102,15,56,220,216
1285	xorl	%ebp,%r9d
1286.byte	0x66,0x90
1287.byte	102,15,56,220,224
1288.byte	102,15,56,220,232
1289	movl	%r9d,48+12(%rsp)
1290	leaq	4(%r8),%r9
1291.byte	102,15,56,220,240
1292.byte	102,15,56,220,248
1293.byte	102,68,15,56,220,192
1294.byte	102,68,15,56,220,200
1295	movups	96-128(%rcx),%xmm0
1296	bswapl	%r9d
1297.byte	102,15,56,220,209
1298.byte	102,15,56,220,217
1299	xorl	%ebp,%r9d
1300.byte	0x66,0x90
1301.byte	102,15,56,220,225
1302.byte	102,15,56,220,233
1303	movl	%r9d,64+12(%rsp)
1304	leaq	5(%r8),%r9
1305.byte	102,15,56,220,241
1306.byte	102,15,56,220,249
1307.byte	102,68,15,56,220,193
1308.byte	102,68,15,56,220,201
1309	movups	112-128(%rcx),%xmm1
1310	bswapl	%r9d
1311.byte	102,15,56,220,208
1312.byte	102,15,56,220,216
1313	xorl	%ebp,%r9d
1314.byte	0x66,0x90
1315.byte	102,15,56,220,224
1316.byte	102,15,56,220,232
1317	movl	%r9d,80+12(%rsp)
1318	leaq	6(%r8),%r9
1319.byte	102,15,56,220,240
1320.byte	102,15,56,220,248
1321.byte	102,68,15,56,220,192
1322.byte	102,68,15,56,220,200
1323	movups	128-128(%rcx),%xmm0
1324	bswapl	%r9d
1325.byte	102,15,56,220,209
1326.byte	102,15,56,220,217
1327	xorl	%ebp,%r9d
1328.byte	0x66,0x90
1329.byte	102,15,56,220,225
1330.byte	102,15,56,220,233
1331	movl	%r9d,96+12(%rsp)
1332	leaq	7(%r8),%r9
1333.byte	102,15,56,220,241
1334.byte	102,15,56,220,249
1335.byte	102,68,15,56,220,193
1336.byte	102,68,15,56,220,201
1337	movups	144-128(%rcx),%xmm1
1338	bswapl	%r9d
1339.byte	102,15,56,220,208
1340.byte	102,15,56,220,216
1341.byte	102,15,56,220,224
1342	xorl	%ebp,%r9d
1343	movdqu	0(%rdi),%xmm10
1344.byte	102,15,56,220,232
1345	movl	%r9d,112+12(%rsp)
1346	cmpl	$11,%eax
1347.byte	102,15,56,220,240
1348.byte	102,15,56,220,248
1349.byte	102,68,15,56,220,192
1350.byte	102,68,15,56,220,200
1351	movups	160-128(%rcx),%xmm0
1352
1353	jb	.Lctr32_enc_done
1354
1355.byte	102,15,56,220,209
1356.byte	102,15,56,220,217
1357.byte	102,15,56,220,225
1358.byte	102,15,56,220,233
1359.byte	102,15,56,220,241
1360.byte	102,15,56,220,249
1361.byte	102,68,15,56,220,193
1362.byte	102,68,15,56,220,201
1363	movups	176-128(%rcx),%xmm1
1364
1365.byte	102,15,56,220,208
1366.byte	102,15,56,220,216
1367.byte	102,15,56,220,224
1368.byte	102,15,56,220,232
1369.byte	102,15,56,220,240
1370.byte	102,15,56,220,248
1371.byte	102,68,15,56,220,192
1372.byte	102,68,15,56,220,200
1373	movups	192-128(%rcx),%xmm0
1374	je	.Lctr32_enc_done
1375
1376.byte	102,15,56,220,209
1377.byte	102,15,56,220,217
1378.byte	102,15,56,220,225
1379.byte	102,15,56,220,233
1380.byte	102,15,56,220,241
1381.byte	102,15,56,220,249
1382.byte	102,68,15,56,220,193
1383.byte	102,68,15,56,220,201
1384	movups	208-128(%rcx),%xmm1
1385
1386.byte	102,15,56,220,208
1387.byte	102,15,56,220,216
1388.byte	102,15,56,220,224
1389.byte	102,15,56,220,232
1390.byte	102,15,56,220,240
1391.byte	102,15,56,220,248
1392.byte	102,68,15,56,220,192
1393.byte	102,68,15,56,220,200
1394	movups	224-128(%rcx),%xmm0
1395	jmp	.Lctr32_enc_done
1396
1397.align	16
1398.Lctr32_enc_done:
1399	movdqu	16(%rdi),%xmm11
1400	pxor	%xmm0,%xmm10
1401	movdqu	32(%rdi),%xmm12
1402	pxor	%xmm0,%xmm11
1403	movdqu	48(%rdi),%xmm13
1404	pxor	%xmm0,%xmm12
1405	movdqu	64(%rdi),%xmm14
1406	pxor	%xmm0,%xmm13
1407	movdqu	80(%rdi),%xmm15
1408	pxor	%xmm0,%xmm14
1409	pxor	%xmm0,%xmm15
1410.byte	102,15,56,220,209
1411.byte	102,15,56,220,217
1412.byte	102,15,56,220,225
1413.byte	102,15,56,220,233
1414.byte	102,15,56,220,241
1415.byte	102,15,56,220,249
1416.byte	102,68,15,56,220,193
1417.byte	102,68,15,56,220,201
1418	movdqu	96(%rdi),%xmm1
1419	leaq	128(%rdi),%rdi
1420
1421.byte	102,65,15,56,221,210
1422	pxor	%xmm0,%xmm1
1423	movdqu	112-128(%rdi),%xmm10
1424.byte	102,65,15,56,221,219
1425	pxor	%xmm0,%xmm10
1426	movdqa	0(%rsp),%xmm11
1427.byte	102,65,15,56,221,228
1428.byte	102,65,15,56,221,237
1429	movdqa	16(%rsp),%xmm12
1430	movdqa	32(%rsp),%xmm13
1431.byte	102,65,15,56,221,246
1432.byte	102,65,15,56,221,255
1433	movdqa	48(%rsp),%xmm14
1434	movdqa	64(%rsp),%xmm15
1435.byte	102,68,15,56,221,193
1436	movdqa	80(%rsp),%xmm0
1437	movups	16-128(%rcx),%xmm1
1438.byte	102,69,15,56,221,202
1439
1440	movups	%xmm2,(%rsi)
1441	movdqa	%xmm11,%xmm2
1442	movups	%xmm3,16(%rsi)
1443	movdqa	%xmm12,%xmm3
1444	movups	%xmm4,32(%rsi)
1445	movdqa	%xmm13,%xmm4
1446	movups	%xmm5,48(%rsi)
1447	movdqa	%xmm14,%xmm5
1448	movups	%xmm6,64(%rsi)
1449	movdqa	%xmm15,%xmm6
1450	movups	%xmm7,80(%rsi)
1451	movdqa	%xmm0,%xmm7
1452	movups	%xmm8,96(%rsi)
1453	movups	%xmm9,112(%rsi)
1454	leaq	128(%rsi),%rsi
1455
1456	subq	$8,%rdx
1457	jnc	.Lctr32_loop8
1458
1459	addq	$8,%rdx
1460	jz	.Lctr32_done
1461	leaq	-128(%rcx),%rcx
1462
1463.Lctr32_tail:
1464
1465
1466	leaq	16(%rcx),%rcx
1467	cmpq	$4,%rdx
1468	jb	.Lctr32_loop3
1469	je	.Lctr32_loop4
1470
1471
1472	shll	$4,%eax
1473	movdqa	96(%rsp),%xmm8
1474	pxor	%xmm9,%xmm9
1475
1476	movups	16(%rcx),%xmm0
1477.byte	102,15,56,220,209
1478.byte	102,15,56,220,217
1479	leaq	32-16(%rcx,%rax,1),%rcx
1480	negq	%rax
1481.byte	102,15,56,220,225
1482	addq	$16,%rax
1483	movups	(%rdi),%xmm10
1484.byte	102,15,56,220,233
1485.byte	102,15,56,220,241
1486	movups	16(%rdi),%xmm11
1487	movups	32(%rdi),%xmm12
1488.byte	102,15,56,220,249
1489.byte	102,68,15,56,220,193
1490
1491	call	.Lenc_loop8_enter
1492
1493	movdqu	48(%rdi),%xmm13
1494	pxor	%xmm10,%xmm2
1495	movdqu	64(%rdi),%xmm10
1496	pxor	%xmm11,%xmm3
1497	movdqu	%xmm2,(%rsi)
1498	pxor	%xmm12,%xmm4
1499	movdqu	%xmm3,16(%rsi)
1500	pxor	%xmm13,%xmm5
1501	movdqu	%xmm4,32(%rsi)
1502	pxor	%xmm10,%xmm6
1503	movdqu	%xmm5,48(%rsi)
1504	movdqu	%xmm6,64(%rsi)
1505	cmpq	$6,%rdx
1506	jb	.Lctr32_done
1507
1508	movups	80(%rdi),%xmm11
1509	xorps	%xmm11,%xmm7
1510	movups	%xmm7,80(%rsi)
1511	je	.Lctr32_done
1512
1513	movups	96(%rdi),%xmm12
1514	xorps	%xmm12,%xmm8
1515	movups	%xmm8,96(%rsi)
1516	jmp	.Lctr32_done
1517
1518.align	32
1519.Lctr32_loop4:
1520.byte	102,15,56,220,209
1521	leaq	16(%rcx),%rcx
1522	decl	%eax
1523.byte	102,15,56,220,217
1524.byte	102,15,56,220,225
1525.byte	102,15,56,220,233
1526	movups	(%rcx),%xmm1
1527	jnz	.Lctr32_loop4
1528.byte	102,15,56,221,209
1529.byte	102,15,56,221,217
1530	movups	(%rdi),%xmm10
1531	movups	16(%rdi),%xmm11
1532.byte	102,15,56,221,225
1533.byte	102,15,56,221,233
1534	movups	32(%rdi),%xmm12
1535	movups	48(%rdi),%xmm13
1536
1537	xorps	%xmm10,%xmm2
1538	movups	%xmm2,(%rsi)
1539	xorps	%xmm11,%xmm3
1540	movups	%xmm3,16(%rsi)
1541	pxor	%xmm12,%xmm4
1542	movdqu	%xmm4,32(%rsi)
1543	pxor	%xmm13,%xmm5
1544	movdqu	%xmm5,48(%rsi)
1545	jmp	.Lctr32_done
1546
1547.align	32
1548.Lctr32_loop3:
1549.byte	102,15,56,220,209
1550	leaq	16(%rcx),%rcx
1551	decl	%eax
1552.byte	102,15,56,220,217
1553.byte	102,15,56,220,225
1554	movups	(%rcx),%xmm1
1555	jnz	.Lctr32_loop3
1556.byte	102,15,56,221,209
1557.byte	102,15,56,221,217
1558.byte	102,15,56,221,225
1559
1560	movups	(%rdi),%xmm10
1561	xorps	%xmm10,%xmm2
1562	movups	%xmm2,(%rsi)
1563	cmpq	$2,%rdx
1564	jb	.Lctr32_done
1565
1566	movups	16(%rdi),%xmm11
1567	xorps	%xmm11,%xmm3
1568	movups	%xmm3,16(%rsi)
1569	je	.Lctr32_done
1570
1571	movups	32(%rdi),%xmm12
1572	xorps	%xmm12,%xmm4
1573	movups	%xmm4,32(%rsi)
1574
1575.Lctr32_done:
1576	xorps	%xmm0,%xmm0
1577	xorl	%ebp,%ebp
1578	pxor	%xmm1,%xmm1
1579	pxor	%xmm2,%xmm2
1580	pxor	%xmm3,%xmm3
1581	pxor	%xmm4,%xmm4
1582	pxor	%xmm5,%xmm5
1583	pxor	%xmm6,%xmm6
1584	pxor	%xmm7,%xmm7
1585	movaps	%xmm0,0(%rsp)
1586	pxor	%xmm8,%xmm8
1587	movaps	%xmm0,16(%rsp)
1588	pxor	%xmm9,%xmm9
1589	movaps	%xmm0,32(%rsp)
1590	pxor	%xmm10,%xmm10
1591	movaps	%xmm0,48(%rsp)
1592	pxor	%xmm11,%xmm11
1593	movaps	%xmm0,64(%rsp)
1594	pxor	%xmm12,%xmm12
1595	movaps	%xmm0,80(%rsp)
1596	pxor	%xmm13,%xmm13
1597	movaps	%xmm0,96(%rsp)
1598	pxor	%xmm14,%xmm14
1599	movaps	%xmm0,112(%rsp)
1600	pxor	%xmm15,%xmm15
1601	movq	-8(%r11),%rbp
1602.cfi_restore	%rbp
1603	leaq	(%r11),%rsp
1604.cfi_def_cfa_register	%rsp
1605.Lctr32_epilogue:
1606	.byte	0xf3,0xc3
1607.cfi_endproc
1608.size	aesni_ctr32_encrypt_blocks,.-aesni_ctr32_encrypt_blocks
1609.globl	aesni_xts_encrypt
1610.type	aesni_xts_encrypt,@function
1611.align	16
1612aesni_xts_encrypt:
1613.cfi_startproc
1614.byte	243,15,30,250
1615	leaq	(%rsp),%r11
1616.cfi_def_cfa_register	%r11
1617	pushq	%rbp
1618.cfi_offset	%rbp,-16
1619	subq	$112,%rsp
1620	andq	$-16,%rsp
1621	movups	(%r9),%xmm2
1622	movl	240(%r8),%eax
1623	movl	240(%rcx),%r10d
1624	movups	(%r8),%xmm0
1625	movups	16(%r8),%xmm1
1626	leaq	32(%r8),%r8
1627	xorps	%xmm0,%xmm2
1628.Loop_enc1_8:
1629.byte	102,15,56,220,209
1630	decl	%eax
1631	movups	(%r8),%xmm1
1632	leaq	16(%r8),%r8
1633	jnz	.Loop_enc1_8
1634.byte	102,15,56,221,209
1635	movups	(%rcx),%xmm0
1636	movq	%rcx,%rbp
1637	movl	%r10d,%eax
1638	shll	$4,%r10d
1639	movq	%rdx,%r9
1640	andq	$-16,%rdx
1641
1642	movups	16(%rcx,%r10,1),%xmm1
1643
1644	movdqa	.Lxts_magic(%rip),%xmm8
1645	movdqa	%xmm2,%xmm15
1646	pshufd	$0x5f,%xmm2,%xmm9
1647	pxor	%xmm0,%xmm1
1648	movdqa	%xmm9,%xmm14
1649	paddd	%xmm9,%xmm9
1650	movdqa	%xmm15,%xmm10
1651	psrad	$31,%xmm14
1652	paddq	%xmm15,%xmm15
1653	pand	%xmm8,%xmm14
1654	pxor	%xmm0,%xmm10
1655	pxor	%xmm14,%xmm15
1656	movdqa	%xmm9,%xmm14
1657	paddd	%xmm9,%xmm9
1658	movdqa	%xmm15,%xmm11
1659	psrad	$31,%xmm14
1660	paddq	%xmm15,%xmm15
1661	pand	%xmm8,%xmm14
1662	pxor	%xmm0,%xmm11
1663	pxor	%xmm14,%xmm15
1664	movdqa	%xmm9,%xmm14
1665	paddd	%xmm9,%xmm9
1666	movdqa	%xmm15,%xmm12
1667	psrad	$31,%xmm14
1668	paddq	%xmm15,%xmm15
1669	pand	%xmm8,%xmm14
1670	pxor	%xmm0,%xmm12
1671	pxor	%xmm14,%xmm15
1672	movdqa	%xmm9,%xmm14
1673	paddd	%xmm9,%xmm9
1674	movdqa	%xmm15,%xmm13
1675	psrad	$31,%xmm14
1676	paddq	%xmm15,%xmm15
1677	pand	%xmm8,%xmm14
1678	pxor	%xmm0,%xmm13
1679	pxor	%xmm14,%xmm15
1680	movdqa	%xmm15,%xmm14
1681	psrad	$31,%xmm9
1682	paddq	%xmm15,%xmm15
1683	pand	%xmm8,%xmm9
1684	pxor	%xmm0,%xmm14
1685	pxor	%xmm9,%xmm15
1686	movaps	%xmm1,96(%rsp)
1687
1688	subq	$96,%rdx
1689	jc	.Lxts_enc_short
1690
1691	movl	$16+96,%eax
1692	leaq	32(%rbp,%r10,1),%rcx
1693	subq	%r10,%rax
1694	movups	16(%rbp),%xmm1
1695	movq	%rax,%r10
1696	leaq	.Lxts_magic(%rip),%r8
1697	jmp	.Lxts_enc_grandloop
1698
1699.align	32
1700.Lxts_enc_grandloop:
1701	movdqu	0(%rdi),%xmm2
1702	movdqa	%xmm0,%xmm8
1703	movdqu	16(%rdi),%xmm3
1704	pxor	%xmm10,%xmm2
1705	movdqu	32(%rdi),%xmm4
1706	pxor	%xmm11,%xmm3
1707.byte	102,15,56,220,209
1708	movdqu	48(%rdi),%xmm5
1709	pxor	%xmm12,%xmm4
1710.byte	102,15,56,220,217
1711	movdqu	64(%rdi),%xmm6
1712	pxor	%xmm13,%xmm5
1713.byte	102,15,56,220,225
1714	movdqu	80(%rdi),%xmm7
1715	pxor	%xmm15,%xmm8
1716	movdqa	96(%rsp),%xmm9
1717	pxor	%xmm14,%xmm6
1718.byte	102,15,56,220,233
1719	movups	32(%rbp),%xmm0
1720	leaq	96(%rdi),%rdi
1721	pxor	%xmm8,%xmm7
1722
1723	pxor	%xmm9,%xmm10
1724.byte	102,15,56,220,241
1725	pxor	%xmm9,%xmm11
1726	movdqa	%xmm10,0(%rsp)
1727.byte	102,15,56,220,249
1728	movups	48(%rbp),%xmm1
1729	pxor	%xmm9,%xmm12
1730
1731.byte	102,15,56,220,208
1732	pxor	%xmm9,%xmm13
1733	movdqa	%xmm11,16(%rsp)
1734.byte	102,15,56,220,216
1735	pxor	%xmm9,%xmm14
1736	movdqa	%xmm12,32(%rsp)
1737.byte	102,15,56,220,224
1738.byte	102,15,56,220,232
1739	pxor	%xmm9,%xmm8
1740	movdqa	%xmm14,64(%rsp)
1741.byte	102,15,56,220,240
1742.byte	102,15,56,220,248
1743	movups	64(%rbp),%xmm0
1744	movdqa	%xmm8,80(%rsp)
1745	pshufd	$0x5f,%xmm15,%xmm9
1746	jmp	.Lxts_enc_loop6
1747.align	32
1748.Lxts_enc_loop6:
1749.byte	102,15,56,220,209
1750.byte	102,15,56,220,217
1751.byte	102,15,56,220,225
1752.byte	102,15,56,220,233
1753.byte	102,15,56,220,241
1754.byte	102,15,56,220,249
1755	movups	-64(%rcx,%rax,1),%xmm1
1756	addq	$32,%rax
1757
1758.byte	102,15,56,220,208
1759.byte	102,15,56,220,216
1760.byte	102,15,56,220,224
1761.byte	102,15,56,220,232
1762.byte	102,15,56,220,240
1763.byte	102,15,56,220,248
1764	movups	-80(%rcx,%rax,1),%xmm0
1765	jnz	.Lxts_enc_loop6
1766
1767	movdqa	(%r8),%xmm8
1768	movdqa	%xmm9,%xmm14
1769	paddd	%xmm9,%xmm9
1770.byte	102,15,56,220,209
1771	paddq	%xmm15,%xmm15
1772	psrad	$31,%xmm14
1773.byte	102,15,56,220,217
1774	pand	%xmm8,%xmm14
1775	movups	(%rbp),%xmm10
1776.byte	102,15,56,220,225
1777.byte	102,15,56,220,233
1778.byte	102,15,56,220,241
1779	pxor	%xmm14,%xmm15
1780	movaps	%xmm10,%xmm11
1781.byte	102,15,56,220,249
1782	movups	-64(%rcx),%xmm1
1783
1784	movdqa	%xmm9,%xmm14
1785.byte	102,15,56,220,208
1786	paddd	%xmm9,%xmm9
1787	pxor	%xmm15,%xmm10
1788.byte	102,15,56,220,216
1789	psrad	$31,%xmm14
1790	paddq	%xmm15,%xmm15
1791.byte	102,15,56,220,224
1792.byte	102,15,56,220,232
1793	pand	%xmm8,%xmm14
1794	movaps	%xmm11,%xmm12
1795.byte	102,15,56,220,240
1796	pxor	%xmm14,%xmm15
1797	movdqa	%xmm9,%xmm14
1798.byte	102,15,56,220,248
1799	movups	-48(%rcx),%xmm0
1800
1801	paddd	%xmm9,%xmm9
1802.byte	102,15,56,220,209
1803	pxor	%xmm15,%xmm11
1804	psrad	$31,%xmm14
1805.byte	102,15,56,220,217
1806	paddq	%xmm15,%xmm15
1807	pand	%xmm8,%xmm14
1808.byte	102,15,56,220,225
1809.byte	102,15,56,220,233
1810	movdqa	%xmm13,48(%rsp)
1811	pxor	%xmm14,%xmm15
1812.byte	102,15,56,220,241
1813	movaps	%xmm12,%xmm13
1814	movdqa	%xmm9,%xmm14
1815.byte	102,15,56,220,249
1816	movups	-32(%rcx),%xmm1
1817
1818	paddd	%xmm9,%xmm9
1819.byte	102,15,56,220,208
1820	pxor	%xmm15,%xmm12
1821	psrad	$31,%xmm14
1822.byte	102,15,56,220,216
1823	paddq	%xmm15,%xmm15
1824	pand	%xmm8,%xmm14
1825.byte	102,15,56,220,224
1826.byte	102,15,56,220,232
1827.byte	102,15,56,220,240
1828	pxor	%xmm14,%xmm15
1829	movaps	%xmm13,%xmm14
1830.byte	102,15,56,220,248
1831
1832	movdqa	%xmm9,%xmm0
1833	paddd	%xmm9,%xmm9
1834.byte	102,15,56,220,209
1835	pxor	%xmm15,%xmm13
1836	psrad	$31,%xmm0
1837.byte	102,15,56,220,217
1838	paddq	%xmm15,%xmm15
1839	pand	%xmm8,%xmm0
1840.byte	102,15,56,220,225
1841.byte	102,15,56,220,233
1842	pxor	%xmm0,%xmm15
1843	movups	(%rbp),%xmm0
1844.byte	102,15,56,220,241
1845.byte	102,15,56,220,249
1846	movups	16(%rbp),%xmm1
1847
1848	pxor	%xmm15,%xmm14
1849.byte	102,15,56,221,84,36,0
1850	psrad	$31,%xmm9
1851	paddq	%xmm15,%xmm15
1852.byte	102,15,56,221,92,36,16
1853.byte	102,15,56,221,100,36,32
1854	pand	%xmm8,%xmm9
1855	movq	%r10,%rax
1856.byte	102,15,56,221,108,36,48
1857.byte	102,15,56,221,116,36,64
1858.byte	102,15,56,221,124,36,80
1859	pxor	%xmm9,%xmm15
1860
1861	leaq	96(%rsi),%rsi
1862	movups	%xmm2,-96(%rsi)
1863	movups	%xmm3,-80(%rsi)
1864	movups	%xmm4,-64(%rsi)
1865	movups	%xmm5,-48(%rsi)
1866	movups	%xmm6,-32(%rsi)
1867	movups	%xmm7,-16(%rsi)
1868	subq	$96,%rdx
1869	jnc	.Lxts_enc_grandloop
1870
1871	movl	$16+96,%eax
1872	subl	%r10d,%eax
1873	movq	%rbp,%rcx
1874	shrl	$4,%eax
1875
1876.Lxts_enc_short:
1877
1878	movl	%eax,%r10d
1879	pxor	%xmm0,%xmm10
1880	addq	$96,%rdx
1881	jz	.Lxts_enc_done
1882
1883	pxor	%xmm0,%xmm11
1884	cmpq	$0x20,%rdx
1885	jb	.Lxts_enc_one
1886	pxor	%xmm0,%xmm12
1887	je	.Lxts_enc_two
1888
1889	pxor	%xmm0,%xmm13
1890	cmpq	$0x40,%rdx
1891	jb	.Lxts_enc_three
1892	pxor	%xmm0,%xmm14
1893	je	.Lxts_enc_four
1894
1895	movdqu	(%rdi),%xmm2
1896	movdqu	16(%rdi),%xmm3
1897	movdqu	32(%rdi),%xmm4
1898	pxor	%xmm10,%xmm2
1899	movdqu	48(%rdi),%xmm5
1900	pxor	%xmm11,%xmm3
1901	movdqu	64(%rdi),%xmm6
1902	leaq	80(%rdi),%rdi
1903	pxor	%xmm12,%xmm4
1904	pxor	%xmm13,%xmm5
1905	pxor	%xmm14,%xmm6
1906	pxor	%xmm7,%xmm7
1907
1908	call	_aesni_encrypt6
1909
1910	xorps	%xmm10,%xmm2
1911	movdqa	%xmm15,%xmm10
1912	xorps	%xmm11,%xmm3
1913	xorps	%xmm12,%xmm4
1914	movdqu	%xmm2,(%rsi)
1915	xorps	%xmm13,%xmm5
1916	movdqu	%xmm3,16(%rsi)
1917	xorps	%xmm14,%xmm6
1918	movdqu	%xmm4,32(%rsi)
1919	movdqu	%xmm5,48(%rsi)
1920	movdqu	%xmm6,64(%rsi)
1921	leaq	80(%rsi),%rsi
1922	jmp	.Lxts_enc_done
1923
1924.align	16
1925.Lxts_enc_one:
1926	movups	(%rdi),%xmm2
1927	leaq	16(%rdi),%rdi
1928	xorps	%xmm10,%xmm2
1929	movups	(%rcx),%xmm0
1930	movups	16(%rcx),%xmm1
1931	leaq	32(%rcx),%rcx
1932	xorps	%xmm0,%xmm2
1933.Loop_enc1_9:
1934.byte	102,15,56,220,209
1935	decl	%eax
1936	movups	(%rcx),%xmm1
1937	leaq	16(%rcx),%rcx
1938	jnz	.Loop_enc1_9
1939.byte	102,15,56,221,209
1940	xorps	%xmm10,%xmm2
1941	movdqa	%xmm11,%xmm10
1942	movups	%xmm2,(%rsi)
1943	leaq	16(%rsi),%rsi
1944	jmp	.Lxts_enc_done
1945
1946.align	16
1947.Lxts_enc_two:
1948	movups	(%rdi),%xmm2
1949	movups	16(%rdi),%xmm3
1950	leaq	32(%rdi),%rdi
1951	xorps	%xmm10,%xmm2
1952	xorps	%xmm11,%xmm3
1953
1954	call	_aesni_encrypt2
1955
1956	xorps	%xmm10,%xmm2
1957	movdqa	%xmm12,%xmm10
1958	xorps	%xmm11,%xmm3
1959	movups	%xmm2,(%rsi)
1960	movups	%xmm3,16(%rsi)
1961	leaq	32(%rsi),%rsi
1962	jmp	.Lxts_enc_done
1963
1964.align	16
1965.Lxts_enc_three:
1966	movups	(%rdi),%xmm2
1967	movups	16(%rdi),%xmm3
1968	movups	32(%rdi),%xmm4
1969	leaq	48(%rdi),%rdi
1970	xorps	%xmm10,%xmm2
1971	xorps	%xmm11,%xmm3
1972	xorps	%xmm12,%xmm4
1973
1974	call	_aesni_encrypt3
1975
1976	xorps	%xmm10,%xmm2
1977	movdqa	%xmm13,%xmm10
1978	xorps	%xmm11,%xmm3
1979	xorps	%xmm12,%xmm4
1980	movups	%xmm2,(%rsi)
1981	movups	%xmm3,16(%rsi)
1982	movups	%xmm4,32(%rsi)
1983	leaq	48(%rsi),%rsi
1984	jmp	.Lxts_enc_done
1985
1986.align	16
1987.Lxts_enc_four:
1988	movups	(%rdi),%xmm2
1989	movups	16(%rdi),%xmm3
1990	movups	32(%rdi),%xmm4
1991	xorps	%xmm10,%xmm2
1992	movups	48(%rdi),%xmm5
1993	leaq	64(%rdi),%rdi
1994	xorps	%xmm11,%xmm3
1995	xorps	%xmm12,%xmm4
1996	xorps	%xmm13,%xmm5
1997
1998	call	_aesni_encrypt4
1999
2000	pxor	%xmm10,%xmm2
2001	movdqa	%xmm14,%xmm10
2002	pxor	%xmm11,%xmm3
2003	pxor	%xmm12,%xmm4
2004	movdqu	%xmm2,(%rsi)
2005	pxor	%xmm13,%xmm5
2006	movdqu	%xmm3,16(%rsi)
2007	movdqu	%xmm4,32(%rsi)
2008	movdqu	%xmm5,48(%rsi)
2009	leaq	64(%rsi),%rsi
2010	jmp	.Lxts_enc_done
2011
2012.align	16
2013.Lxts_enc_done:
2014	andq	$15,%r9
2015	jz	.Lxts_enc_ret
2016	movq	%r9,%rdx
2017
2018.Lxts_enc_steal:
2019	movzbl	(%rdi),%eax
2020	movzbl	-16(%rsi),%ecx
2021	leaq	1(%rdi),%rdi
2022	movb	%al,-16(%rsi)
2023	movb	%cl,0(%rsi)
2024	leaq	1(%rsi),%rsi
2025	subq	$1,%rdx
2026	jnz	.Lxts_enc_steal
2027
2028	subq	%r9,%rsi
2029	movq	%rbp,%rcx
2030	movl	%r10d,%eax
2031
2032	movups	-16(%rsi),%xmm2
2033	xorps	%xmm10,%xmm2
2034	movups	(%rcx),%xmm0
2035	movups	16(%rcx),%xmm1
2036	leaq	32(%rcx),%rcx
2037	xorps	%xmm0,%xmm2
2038.Loop_enc1_10:
2039.byte	102,15,56,220,209
2040	decl	%eax
2041	movups	(%rcx),%xmm1
2042	leaq	16(%rcx),%rcx
2043	jnz	.Loop_enc1_10
2044.byte	102,15,56,221,209
2045	xorps	%xmm10,%xmm2
2046	movups	%xmm2,-16(%rsi)
2047
2048.Lxts_enc_ret:
2049	xorps	%xmm0,%xmm0
2050	pxor	%xmm1,%xmm1
2051	pxor	%xmm2,%xmm2
2052	pxor	%xmm3,%xmm3
2053	pxor	%xmm4,%xmm4
2054	pxor	%xmm5,%xmm5
2055	pxor	%xmm6,%xmm6
2056	pxor	%xmm7,%xmm7
2057	movaps	%xmm0,0(%rsp)
2058	pxor	%xmm8,%xmm8
2059	movaps	%xmm0,16(%rsp)
2060	pxor	%xmm9,%xmm9
2061	movaps	%xmm0,32(%rsp)
2062	pxor	%xmm10,%xmm10
2063	movaps	%xmm0,48(%rsp)
2064	pxor	%xmm11,%xmm11
2065	movaps	%xmm0,64(%rsp)
2066	pxor	%xmm12,%xmm12
2067	movaps	%xmm0,80(%rsp)
2068	pxor	%xmm13,%xmm13
2069	movaps	%xmm0,96(%rsp)
2070	pxor	%xmm14,%xmm14
2071	pxor	%xmm15,%xmm15
2072	movq	-8(%r11),%rbp
2073.cfi_restore	%rbp
2074	leaq	(%r11),%rsp
2075.cfi_def_cfa_register	%rsp
2076.Lxts_enc_epilogue:
2077	.byte	0xf3,0xc3
2078.cfi_endproc
2079.size	aesni_xts_encrypt,.-aesni_xts_encrypt
2080.globl	aesni_xts_decrypt
2081.type	aesni_xts_decrypt,@function
2082.align	16
2083aesni_xts_decrypt:
2084.cfi_startproc
2085.byte	243,15,30,250
2086	leaq	(%rsp),%r11
2087.cfi_def_cfa_register	%r11
2088	pushq	%rbp
2089.cfi_offset	%rbp,-16
2090	subq	$112,%rsp
2091	andq	$-16,%rsp
2092	movups	(%r9),%xmm2
2093	movl	240(%r8),%eax
2094	movl	240(%rcx),%r10d
2095	movups	(%r8),%xmm0
2096	movups	16(%r8),%xmm1
2097	leaq	32(%r8),%r8
2098	xorps	%xmm0,%xmm2
2099.Loop_enc1_11:
2100.byte	102,15,56,220,209
2101	decl	%eax
2102	movups	(%r8),%xmm1
2103	leaq	16(%r8),%r8
2104	jnz	.Loop_enc1_11
2105.byte	102,15,56,221,209
2106	xorl	%eax,%eax
2107	testq	$15,%rdx
2108	setnz	%al
2109	shlq	$4,%rax
2110	subq	%rax,%rdx
2111
2112	movups	(%rcx),%xmm0
2113	movq	%rcx,%rbp
2114	movl	%r10d,%eax
2115	shll	$4,%r10d
2116	movq	%rdx,%r9
2117	andq	$-16,%rdx
2118
2119	movups	16(%rcx,%r10,1),%xmm1
2120
2121	movdqa	.Lxts_magic(%rip),%xmm8
2122	movdqa	%xmm2,%xmm15
2123	pshufd	$0x5f,%xmm2,%xmm9
2124	pxor	%xmm0,%xmm1
2125	movdqa	%xmm9,%xmm14
2126	paddd	%xmm9,%xmm9
2127	movdqa	%xmm15,%xmm10
2128	psrad	$31,%xmm14
2129	paddq	%xmm15,%xmm15
2130	pand	%xmm8,%xmm14
2131	pxor	%xmm0,%xmm10
2132	pxor	%xmm14,%xmm15
2133	movdqa	%xmm9,%xmm14
2134	paddd	%xmm9,%xmm9
2135	movdqa	%xmm15,%xmm11
2136	psrad	$31,%xmm14
2137	paddq	%xmm15,%xmm15
2138	pand	%xmm8,%xmm14
2139	pxor	%xmm0,%xmm11
2140	pxor	%xmm14,%xmm15
2141	movdqa	%xmm9,%xmm14
2142	paddd	%xmm9,%xmm9
2143	movdqa	%xmm15,%xmm12
2144	psrad	$31,%xmm14
2145	paddq	%xmm15,%xmm15
2146	pand	%xmm8,%xmm14
2147	pxor	%xmm0,%xmm12
2148	pxor	%xmm14,%xmm15
2149	movdqa	%xmm9,%xmm14
2150	paddd	%xmm9,%xmm9
2151	movdqa	%xmm15,%xmm13
2152	psrad	$31,%xmm14
2153	paddq	%xmm15,%xmm15
2154	pand	%xmm8,%xmm14
2155	pxor	%xmm0,%xmm13
2156	pxor	%xmm14,%xmm15
2157	movdqa	%xmm15,%xmm14
2158	psrad	$31,%xmm9
2159	paddq	%xmm15,%xmm15
2160	pand	%xmm8,%xmm9
2161	pxor	%xmm0,%xmm14
2162	pxor	%xmm9,%xmm15
2163	movaps	%xmm1,96(%rsp)
2164
2165	subq	$96,%rdx
2166	jc	.Lxts_dec_short
2167
2168	movl	$16+96,%eax
2169	leaq	32(%rbp,%r10,1),%rcx
2170	subq	%r10,%rax
2171	movups	16(%rbp),%xmm1
2172	movq	%rax,%r10
2173	leaq	.Lxts_magic(%rip),%r8
2174	jmp	.Lxts_dec_grandloop
2175
2176.align	32
2177.Lxts_dec_grandloop:
2178	movdqu	0(%rdi),%xmm2
2179	movdqa	%xmm0,%xmm8
2180	movdqu	16(%rdi),%xmm3
2181	pxor	%xmm10,%xmm2
2182	movdqu	32(%rdi),%xmm4
2183	pxor	%xmm11,%xmm3
2184.byte	102,15,56,222,209
2185	movdqu	48(%rdi),%xmm5
2186	pxor	%xmm12,%xmm4
2187.byte	102,15,56,222,217
2188	movdqu	64(%rdi),%xmm6
2189	pxor	%xmm13,%xmm5
2190.byte	102,15,56,222,225
2191	movdqu	80(%rdi),%xmm7
2192	pxor	%xmm15,%xmm8
2193	movdqa	96(%rsp),%xmm9
2194	pxor	%xmm14,%xmm6
2195.byte	102,15,56,222,233
2196	movups	32(%rbp),%xmm0
2197	leaq	96(%rdi),%rdi
2198	pxor	%xmm8,%xmm7
2199
2200	pxor	%xmm9,%xmm10
2201.byte	102,15,56,222,241
2202	pxor	%xmm9,%xmm11
2203	movdqa	%xmm10,0(%rsp)
2204.byte	102,15,56,222,249
2205	movups	48(%rbp),%xmm1
2206	pxor	%xmm9,%xmm12
2207
2208.byte	102,15,56,222,208
2209	pxor	%xmm9,%xmm13
2210	movdqa	%xmm11,16(%rsp)
2211.byte	102,15,56,222,216
2212	pxor	%xmm9,%xmm14
2213	movdqa	%xmm12,32(%rsp)
2214.byte	102,15,56,222,224
2215.byte	102,15,56,222,232
2216	pxor	%xmm9,%xmm8
2217	movdqa	%xmm14,64(%rsp)
2218.byte	102,15,56,222,240
2219.byte	102,15,56,222,248
2220	movups	64(%rbp),%xmm0
2221	movdqa	%xmm8,80(%rsp)
2222	pshufd	$0x5f,%xmm15,%xmm9
2223	jmp	.Lxts_dec_loop6
2224.align	32
2225.Lxts_dec_loop6:
2226.byte	102,15,56,222,209
2227.byte	102,15,56,222,217
2228.byte	102,15,56,222,225
2229.byte	102,15,56,222,233
2230.byte	102,15,56,222,241
2231.byte	102,15,56,222,249
2232	movups	-64(%rcx,%rax,1),%xmm1
2233	addq	$32,%rax
2234
2235.byte	102,15,56,222,208
2236.byte	102,15,56,222,216
2237.byte	102,15,56,222,224
2238.byte	102,15,56,222,232
2239.byte	102,15,56,222,240
2240.byte	102,15,56,222,248
2241	movups	-80(%rcx,%rax,1),%xmm0
2242	jnz	.Lxts_dec_loop6
2243
2244	movdqa	(%r8),%xmm8
2245	movdqa	%xmm9,%xmm14
2246	paddd	%xmm9,%xmm9
2247.byte	102,15,56,222,209
2248	paddq	%xmm15,%xmm15
2249	psrad	$31,%xmm14
2250.byte	102,15,56,222,217
2251	pand	%xmm8,%xmm14
2252	movups	(%rbp),%xmm10
2253.byte	102,15,56,222,225
2254.byte	102,15,56,222,233
2255.byte	102,15,56,222,241
2256	pxor	%xmm14,%xmm15
2257	movaps	%xmm10,%xmm11
2258.byte	102,15,56,222,249
2259	movups	-64(%rcx),%xmm1
2260
2261	movdqa	%xmm9,%xmm14
2262.byte	102,15,56,222,208
2263	paddd	%xmm9,%xmm9
2264	pxor	%xmm15,%xmm10
2265.byte	102,15,56,222,216
2266	psrad	$31,%xmm14
2267	paddq	%xmm15,%xmm15
2268.byte	102,15,56,222,224
2269.byte	102,15,56,222,232
2270	pand	%xmm8,%xmm14
2271	movaps	%xmm11,%xmm12
2272.byte	102,15,56,222,240
2273	pxor	%xmm14,%xmm15
2274	movdqa	%xmm9,%xmm14
2275.byte	102,15,56,222,248
2276	movups	-48(%rcx),%xmm0
2277
2278	paddd	%xmm9,%xmm9
2279.byte	102,15,56,222,209
2280	pxor	%xmm15,%xmm11
2281	psrad	$31,%xmm14
2282.byte	102,15,56,222,217
2283	paddq	%xmm15,%xmm15
2284	pand	%xmm8,%xmm14
2285.byte	102,15,56,222,225
2286.byte	102,15,56,222,233
2287	movdqa	%xmm13,48(%rsp)
2288	pxor	%xmm14,%xmm15
2289.byte	102,15,56,222,241
2290	movaps	%xmm12,%xmm13
2291	movdqa	%xmm9,%xmm14
2292.byte	102,15,56,222,249
2293	movups	-32(%rcx),%xmm1
2294
2295	paddd	%xmm9,%xmm9
2296.byte	102,15,56,222,208
2297	pxor	%xmm15,%xmm12
2298	psrad	$31,%xmm14
2299.byte	102,15,56,222,216
2300	paddq	%xmm15,%xmm15
2301	pand	%xmm8,%xmm14
2302.byte	102,15,56,222,224
2303.byte	102,15,56,222,232
2304.byte	102,15,56,222,240
2305	pxor	%xmm14,%xmm15
2306	movaps	%xmm13,%xmm14
2307.byte	102,15,56,222,248
2308
2309	movdqa	%xmm9,%xmm0
2310	paddd	%xmm9,%xmm9
2311.byte	102,15,56,222,209
2312	pxor	%xmm15,%xmm13
2313	psrad	$31,%xmm0
2314.byte	102,15,56,222,217
2315	paddq	%xmm15,%xmm15
2316	pand	%xmm8,%xmm0
2317.byte	102,15,56,222,225
2318.byte	102,15,56,222,233
2319	pxor	%xmm0,%xmm15
2320	movups	(%rbp),%xmm0
2321.byte	102,15,56,222,241
2322.byte	102,15,56,222,249
2323	movups	16(%rbp),%xmm1
2324
2325	pxor	%xmm15,%xmm14
2326.byte	102,15,56,223,84,36,0
2327	psrad	$31,%xmm9
2328	paddq	%xmm15,%xmm15
2329.byte	102,15,56,223,92,36,16
2330.byte	102,15,56,223,100,36,32
2331	pand	%xmm8,%xmm9
2332	movq	%r10,%rax
2333.byte	102,15,56,223,108,36,48
2334.byte	102,15,56,223,116,36,64
2335.byte	102,15,56,223,124,36,80
2336	pxor	%xmm9,%xmm15
2337
2338	leaq	96(%rsi),%rsi
2339	movups	%xmm2,-96(%rsi)
2340	movups	%xmm3,-80(%rsi)
2341	movups	%xmm4,-64(%rsi)
2342	movups	%xmm5,-48(%rsi)
2343	movups	%xmm6,-32(%rsi)
2344	movups	%xmm7,-16(%rsi)
2345	subq	$96,%rdx
2346	jnc	.Lxts_dec_grandloop
2347
2348	movl	$16+96,%eax
2349	subl	%r10d,%eax
2350	movq	%rbp,%rcx
2351	shrl	$4,%eax
2352
2353.Lxts_dec_short:
2354
2355	movl	%eax,%r10d
2356	pxor	%xmm0,%xmm10
2357	pxor	%xmm0,%xmm11
2358	addq	$96,%rdx
2359	jz	.Lxts_dec_done
2360
2361	pxor	%xmm0,%xmm12
2362	cmpq	$0x20,%rdx
2363	jb	.Lxts_dec_one
2364	pxor	%xmm0,%xmm13
2365	je	.Lxts_dec_two
2366
2367	pxor	%xmm0,%xmm14
2368	cmpq	$0x40,%rdx
2369	jb	.Lxts_dec_three
2370	je	.Lxts_dec_four
2371
2372	movdqu	(%rdi),%xmm2
2373	movdqu	16(%rdi),%xmm3
2374	movdqu	32(%rdi),%xmm4
2375	pxor	%xmm10,%xmm2
2376	movdqu	48(%rdi),%xmm5
2377	pxor	%xmm11,%xmm3
2378	movdqu	64(%rdi),%xmm6
2379	leaq	80(%rdi),%rdi
2380	pxor	%xmm12,%xmm4
2381	pxor	%xmm13,%xmm5
2382	pxor	%xmm14,%xmm6
2383
2384	call	_aesni_decrypt6
2385
2386	xorps	%xmm10,%xmm2
2387	xorps	%xmm11,%xmm3
2388	xorps	%xmm12,%xmm4
2389	movdqu	%xmm2,(%rsi)
2390	xorps	%xmm13,%xmm5
2391	movdqu	%xmm3,16(%rsi)
2392	xorps	%xmm14,%xmm6
2393	movdqu	%xmm4,32(%rsi)
2394	pxor	%xmm14,%xmm14
2395	movdqu	%xmm5,48(%rsi)
2396	pcmpgtd	%xmm15,%xmm14
2397	movdqu	%xmm6,64(%rsi)
2398	leaq	80(%rsi),%rsi
2399	pshufd	$0x13,%xmm14,%xmm11
2400	andq	$15,%r9
2401	jz	.Lxts_dec_ret
2402
2403	movdqa	%xmm15,%xmm10
2404	paddq	%xmm15,%xmm15
2405	pand	%xmm8,%xmm11
2406	pxor	%xmm15,%xmm11
2407	jmp	.Lxts_dec_done2
2408
2409.align	16
2410.Lxts_dec_one:
2411	movups	(%rdi),%xmm2
2412	leaq	16(%rdi),%rdi
2413	xorps	%xmm10,%xmm2
2414	movups	(%rcx),%xmm0
2415	movups	16(%rcx),%xmm1
2416	leaq	32(%rcx),%rcx
2417	xorps	%xmm0,%xmm2
2418.Loop_dec1_12:
2419.byte	102,15,56,222,209
2420	decl	%eax
2421	movups	(%rcx),%xmm1
2422	leaq	16(%rcx),%rcx
2423	jnz	.Loop_dec1_12
2424.byte	102,15,56,223,209
2425	xorps	%xmm10,%xmm2
2426	movdqa	%xmm11,%xmm10
2427	movups	%xmm2,(%rsi)
2428	movdqa	%xmm12,%xmm11
2429	leaq	16(%rsi),%rsi
2430	jmp	.Lxts_dec_done
2431
2432.align	16
2433.Lxts_dec_two:
2434	movups	(%rdi),%xmm2
2435	movups	16(%rdi),%xmm3
2436	leaq	32(%rdi),%rdi
2437	xorps	%xmm10,%xmm2
2438	xorps	%xmm11,%xmm3
2439
2440	call	_aesni_decrypt2
2441
2442	xorps	%xmm10,%xmm2
2443	movdqa	%xmm12,%xmm10
2444	xorps	%xmm11,%xmm3
2445	movdqa	%xmm13,%xmm11
2446	movups	%xmm2,(%rsi)
2447	movups	%xmm3,16(%rsi)
2448	leaq	32(%rsi),%rsi
2449	jmp	.Lxts_dec_done
2450
2451.align	16
2452.Lxts_dec_three:
2453	movups	(%rdi),%xmm2
2454	movups	16(%rdi),%xmm3
2455	movups	32(%rdi),%xmm4
2456	leaq	48(%rdi),%rdi
2457	xorps	%xmm10,%xmm2
2458	xorps	%xmm11,%xmm3
2459	xorps	%xmm12,%xmm4
2460
2461	call	_aesni_decrypt3
2462
2463	xorps	%xmm10,%xmm2
2464	movdqa	%xmm13,%xmm10
2465	xorps	%xmm11,%xmm3
2466	movdqa	%xmm14,%xmm11
2467	xorps	%xmm12,%xmm4
2468	movups	%xmm2,(%rsi)
2469	movups	%xmm3,16(%rsi)
2470	movups	%xmm4,32(%rsi)
2471	leaq	48(%rsi),%rsi
2472	jmp	.Lxts_dec_done
2473
2474.align	16
2475.Lxts_dec_four:
2476	movups	(%rdi),%xmm2
2477	movups	16(%rdi),%xmm3
2478	movups	32(%rdi),%xmm4
2479	xorps	%xmm10,%xmm2
2480	movups	48(%rdi),%xmm5
2481	leaq	64(%rdi),%rdi
2482	xorps	%xmm11,%xmm3
2483	xorps	%xmm12,%xmm4
2484	xorps	%xmm13,%xmm5
2485
2486	call	_aesni_decrypt4
2487
2488	pxor	%xmm10,%xmm2
2489	movdqa	%xmm14,%xmm10
2490	pxor	%xmm11,%xmm3
2491	movdqa	%xmm15,%xmm11
2492	pxor	%xmm12,%xmm4
2493	movdqu	%xmm2,(%rsi)
2494	pxor	%xmm13,%xmm5
2495	movdqu	%xmm3,16(%rsi)
2496	movdqu	%xmm4,32(%rsi)
2497	movdqu	%xmm5,48(%rsi)
2498	leaq	64(%rsi),%rsi
2499	jmp	.Lxts_dec_done
2500
2501.align	16
2502.Lxts_dec_done:
2503	andq	$15,%r9
2504	jz	.Lxts_dec_ret
2505.Lxts_dec_done2:
2506	movq	%r9,%rdx
2507	movq	%rbp,%rcx
2508	movl	%r10d,%eax
2509
2510	movups	(%rdi),%xmm2
2511	xorps	%xmm11,%xmm2
2512	movups	(%rcx),%xmm0
2513	movups	16(%rcx),%xmm1
2514	leaq	32(%rcx),%rcx
2515	xorps	%xmm0,%xmm2
2516.Loop_dec1_13:
2517.byte	102,15,56,222,209
2518	decl	%eax
2519	movups	(%rcx),%xmm1
2520	leaq	16(%rcx),%rcx
2521	jnz	.Loop_dec1_13
2522.byte	102,15,56,223,209
2523	xorps	%xmm11,%xmm2
2524	movups	%xmm2,(%rsi)
2525
2526.Lxts_dec_steal:
2527	movzbl	16(%rdi),%eax
2528	movzbl	(%rsi),%ecx
2529	leaq	1(%rdi),%rdi
2530	movb	%al,(%rsi)
2531	movb	%cl,16(%rsi)
2532	leaq	1(%rsi),%rsi
2533	subq	$1,%rdx
2534	jnz	.Lxts_dec_steal
2535
2536	subq	%r9,%rsi
2537	movq	%rbp,%rcx
2538	movl	%r10d,%eax
2539
2540	movups	(%rsi),%xmm2
2541	xorps	%xmm10,%xmm2
2542	movups	(%rcx),%xmm0
2543	movups	16(%rcx),%xmm1
2544	leaq	32(%rcx),%rcx
2545	xorps	%xmm0,%xmm2
2546.Loop_dec1_14:
2547.byte	102,15,56,222,209
2548	decl	%eax
2549	movups	(%rcx),%xmm1
2550	leaq	16(%rcx),%rcx
2551	jnz	.Loop_dec1_14
2552.byte	102,15,56,223,209
2553	xorps	%xmm10,%xmm2
2554	movups	%xmm2,(%rsi)
2555
2556.Lxts_dec_ret:
2557	xorps	%xmm0,%xmm0
2558	pxor	%xmm1,%xmm1
2559	pxor	%xmm2,%xmm2
2560	pxor	%xmm3,%xmm3
2561	pxor	%xmm4,%xmm4
2562	pxor	%xmm5,%xmm5
2563	pxor	%xmm6,%xmm6
2564	pxor	%xmm7,%xmm7
2565	movaps	%xmm0,0(%rsp)
2566	pxor	%xmm8,%xmm8
2567	movaps	%xmm0,16(%rsp)
2568	pxor	%xmm9,%xmm9
2569	movaps	%xmm0,32(%rsp)
2570	pxor	%xmm10,%xmm10
2571	movaps	%xmm0,48(%rsp)
2572	pxor	%xmm11,%xmm11
2573	movaps	%xmm0,64(%rsp)
2574	pxor	%xmm12,%xmm12
2575	movaps	%xmm0,80(%rsp)
2576	pxor	%xmm13,%xmm13
2577	movaps	%xmm0,96(%rsp)
2578	pxor	%xmm14,%xmm14
2579	pxor	%xmm15,%xmm15
2580	movq	-8(%r11),%rbp
2581.cfi_restore	%rbp
2582	leaq	(%r11),%rsp
2583.cfi_def_cfa_register	%rsp
2584.Lxts_dec_epilogue:
2585	.byte	0xf3,0xc3
2586.cfi_endproc
2587.size	aesni_xts_decrypt,.-aesni_xts_decrypt
2588.globl	aesni_ocb_encrypt
2589.type	aesni_ocb_encrypt,@function
2590.align	32
2591aesni_ocb_encrypt:
2592.cfi_startproc
2593.byte	243,15,30,250
2594	leaq	(%rsp),%rax
2595	pushq	%rbx
2596.cfi_adjust_cfa_offset	8
2597.cfi_offset	%rbx,-16
2598	pushq	%rbp
2599.cfi_adjust_cfa_offset	8
2600.cfi_offset	%rbp,-24
2601	pushq	%r12
2602.cfi_adjust_cfa_offset	8
2603.cfi_offset	%r12,-32
2604	pushq	%r13
2605.cfi_adjust_cfa_offset	8
2606.cfi_offset	%r13,-40
2607	pushq	%r14
2608.cfi_adjust_cfa_offset	8
2609.cfi_offset	%r14,-48
2610	movq	8(%rax),%rbx
2611	movq	8+8(%rax),%rbp
2612
2613	movl	240(%rcx),%r10d
2614	movq	%rcx,%r11
2615	shll	$4,%r10d
2616	movups	(%rcx),%xmm9
2617	movups	16(%rcx,%r10,1),%xmm1
2618
2619	movdqu	(%r9),%xmm15
2620	pxor	%xmm1,%xmm9
2621	pxor	%xmm1,%xmm15
2622
2623	movl	$16+32,%eax
2624	leaq	32(%r11,%r10,1),%rcx
2625	movups	16(%r11),%xmm1
2626	subq	%r10,%rax
2627	movq	%rax,%r10
2628
2629	movdqu	(%rbx),%xmm10
2630	movdqu	(%rbp),%xmm8
2631
2632	testq	$1,%r8
2633	jnz	.Locb_enc_odd
2634
2635	bsfq	%r8,%r12
2636	addq	$1,%r8
2637	shlq	$4,%r12
2638	movdqu	(%rbx,%r12,1),%xmm7
2639	movdqu	(%rdi),%xmm2
2640	leaq	16(%rdi),%rdi
2641
2642	call	__ocb_encrypt1
2643
2644	movdqa	%xmm7,%xmm15
2645	movups	%xmm2,(%rsi)
2646	leaq	16(%rsi),%rsi
2647	subq	$1,%rdx
2648	jz	.Locb_enc_done
2649
2650.Locb_enc_odd:
2651	leaq	1(%r8),%r12
2652	leaq	3(%r8),%r13
2653	leaq	5(%r8),%r14
2654	leaq	6(%r8),%r8
2655	bsfq	%r12,%r12
2656	bsfq	%r13,%r13
2657	bsfq	%r14,%r14
2658	shlq	$4,%r12
2659	shlq	$4,%r13
2660	shlq	$4,%r14
2661
2662	subq	$6,%rdx
2663	jc	.Locb_enc_short
2664	jmp	.Locb_enc_grandloop
2665
2666.align	32
2667.Locb_enc_grandloop:
2668	movdqu	0(%rdi),%xmm2
2669	movdqu	16(%rdi),%xmm3
2670	movdqu	32(%rdi),%xmm4
2671	movdqu	48(%rdi),%xmm5
2672	movdqu	64(%rdi),%xmm6
2673	movdqu	80(%rdi),%xmm7
2674	leaq	96(%rdi),%rdi
2675
2676	call	__ocb_encrypt6
2677
2678	movups	%xmm2,0(%rsi)
2679	movups	%xmm3,16(%rsi)
2680	movups	%xmm4,32(%rsi)
2681	movups	%xmm5,48(%rsi)
2682	movups	%xmm6,64(%rsi)
2683	movups	%xmm7,80(%rsi)
2684	leaq	96(%rsi),%rsi
2685	subq	$6,%rdx
2686	jnc	.Locb_enc_grandloop
2687
2688.Locb_enc_short:
2689	addq	$6,%rdx
2690	jz	.Locb_enc_done
2691
2692	movdqu	0(%rdi),%xmm2
2693	cmpq	$2,%rdx
2694	jb	.Locb_enc_one
2695	movdqu	16(%rdi),%xmm3
2696	je	.Locb_enc_two
2697
2698	movdqu	32(%rdi),%xmm4
2699	cmpq	$4,%rdx
2700	jb	.Locb_enc_three
2701	movdqu	48(%rdi),%xmm5
2702	je	.Locb_enc_four
2703
2704	movdqu	64(%rdi),%xmm6
2705	pxor	%xmm7,%xmm7
2706
2707	call	__ocb_encrypt6
2708
2709	movdqa	%xmm14,%xmm15
2710	movups	%xmm2,0(%rsi)
2711	movups	%xmm3,16(%rsi)
2712	movups	%xmm4,32(%rsi)
2713	movups	%xmm5,48(%rsi)
2714	movups	%xmm6,64(%rsi)
2715
2716	jmp	.Locb_enc_done
2717
2718.align	16
2719.Locb_enc_one:
2720	movdqa	%xmm10,%xmm7
2721
2722	call	__ocb_encrypt1
2723
2724	movdqa	%xmm7,%xmm15
2725	movups	%xmm2,0(%rsi)
2726	jmp	.Locb_enc_done
2727
2728.align	16
2729.Locb_enc_two:
2730	pxor	%xmm4,%xmm4
2731	pxor	%xmm5,%xmm5
2732
2733	call	__ocb_encrypt4
2734
2735	movdqa	%xmm11,%xmm15
2736	movups	%xmm2,0(%rsi)
2737	movups	%xmm3,16(%rsi)
2738
2739	jmp	.Locb_enc_done
2740
2741.align	16
2742.Locb_enc_three:
2743	pxor	%xmm5,%xmm5
2744
2745	call	__ocb_encrypt4
2746
2747	movdqa	%xmm12,%xmm15
2748	movups	%xmm2,0(%rsi)
2749	movups	%xmm3,16(%rsi)
2750	movups	%xmm4,32(%rsi)
2751
2752	jmp	.Locb_enc_done
2753
2754.align	16
2755.Locb_enc_four:
2756	call	__ocb_encrypt4
2757
2758	movdqa	%xmm13,%xmm15
2759	movups	%xmm2,0(%rsi)
2760	movups	%xmm3,16(%rsi)
2761	movups	%xmm4,32(%rsi)
2762	movups	%xmm5,48(%rsi)
2763
2764.Locb_enc_done:
2765	pxor	%xmm0,%xmm15
2766	movdqu	%xmm8,(%rbp)
2767	movdqu	%xmm15,(%r9)
2768
2769	xorps	%xmm0,%xmm0
2770	pxor	%xmm1,%xmm1
2771	pxor	%xmm2,%xmm2
2772	pxor	%xmm3,%xmm3
2773	pxor	%xmm4,%xmm4
2774	pxor	%xmm5,%xmm5
2775	pxor	%xmm6,%xmm6
2776	pxor	%xmm7,%xmm7
2777	pxor	%xmm8,%xmm8
2778	pxor	%xmm9,%xmm9
2779	pxor	%xmm10,%xmm10
2780	pxor	%xmm11,%xmm11
2781	pxor	%xmm12,%xmm12
2782	pxor	%xmm13,%xmm13
2783	pxor	%xmm14,%xmm14
2784	pxor	%xmm15,%xmm15
2785	leaq	40(%rsp),%rax
2786.cfi_def_cfa	%rax,8
2787	movq	-40(%rax),%r14
2788.cfi_restore	%r14
2789	movq	-32(%rax),%r13
2790.cfi_restore	%r13
2791	movq	-24(%rax),%r12
2792.cfi_restore	%r12
2793	movq	-16(%rax),%rbp
2794.cfi_restore	%rbp
2795	movq	-8(%rax),%rbx
2796.cfi_restore	%rbx
2797	leaq	(%rax),%rsp
2798.cfi_def_cfa_register	%rsp
2799.Locb_enc_epilogue:
2800	.byte	0xf3,0xc3
2801.cfi_endproc
2802.size	aesni_ocb_encrypt,.-aesni_ocb_encrypt
2803
2804.type	__ocb_encrypt6,@function
2805.align	32
2806__ocb_encrypt6:
2807.cfi_startproc
2808	pxor	%xmm9,%xmm15
2809	movdqu	(%rbx,%r12,1),%xmm11
2810	movdqa	%xmm10,%xmm12
2811	movdqu	(%rbx,%r13,1),%xmm13
2812	movdqa	%xmm10,%xmm14
2813	pxor	%xmm15,%xmm10
2814	movdqu	(%rbx,%r14,1),%xmm15
2815	pxor	%xmm10,%xmm11
2816	pxor	%xmm2,%xmm8
2817	pxor	%xmm10,%xmm2
2818	pxor	%xmm11,%xmm12
2819	pxor	%xmm3,%xmm8
2820	pxor	%xmm11,%xmm3
2821	pxor	%xmm12,%xmm13
2822	pxor	%xmm4,%xmm8
2823	pxor	%xmm12,%xmm4
2824	pxor	%xmm13,%xmm14
2825	pxor	%xmm5,%xmm8
2826	pxor	%xmm13,%xmm5
2827	pxor	%xmm14,%xmm15
2828	pxor	%xmm6,%xmm8
2829	pxor	%xmm14,%xmm6
2830	pxor	%xmm7,%xmm8
2831	pxor	%xmm15,%xmm7
2832	movups	32(%r11),%xmm0
2833
2834	leaq	1(%r8),%r12
2835	leaq	3(%r8),%r13
2836	leaq	5(%r8),%r14
2837	addq	$6,%r8
2838	pxor	%xmm9,%xmm10
2839	bsfq	%r12,%r12
2840	bsfq	%r13,%r13
2841	bsfq	%r14,%r14
2842
2843.byte	102,15,56,220,209
2844.byte	102,15,56,220,217
2845.byte	102,15,56,220,225
2846.byte	102,15,56,220,233
2847	pxor	%xmm9,%xmm11
2848	pxor	%xmm9,%xmm12
2849.byte	102,15,56,220,241
2850	pxor	%xmm9,%xmm13
2851	pxor	%xmm9,%xmm14
2852.byte	102,15,56,220,249
2853	movups	48(%r11),%xmm1
2854	pxor	%xmm9,%xmm15
2855
2856.byte	102,15,56,220,208
2857.byte	102,15,56,220,216
2858.byte	102,15,56,220,224
2859.byte	102,15,56,220,232
2860.byte	102,15,56,220,240
2861.byte	102,15,56,220,248
2862	movups	64(%r11),%xmm0
2863	shlq	$4,%r12
2864	shlq	$4,%r13
2865	jmp	.Locb_enc_loop6
2866
2867.align	32
2868.Locb_enc_loop6:
2869.byte	102,15,56,220,209
2870.byte	102,15,56,220,217
2871.byte	102,15,56,220,225
2872.byte	102,15,56,220,233
2873.byte	102,15,56,220,241
2874.byte	102,15,56,220,249
2875	movups	(%rcx,%rax,1),%xmm1
2876	addq	$32,%rax
2877
2878.byte	102,15,56,220,208
2879.byte	102,15,56,220,216
2880.byte	102,15,56,220,224
2881.byte	102,15,56,220,232
2882.byte	102,15,56,220,240
2883.byte	102,15,56,220,248
2884	movups	-16(%rcx,%rax,1),%xmm0
2885	jnz	.Locb_enc_loop6
2886
2887.byte	102,15,56,220,209
2888.byte	102,15,56,220,217
2889.byte	102,15,56,220,225
2890.byte	102,15,56,220,233
2891.byte	102,15,56,220,241
2892.byte	102,15,56,220,249
2893	movups	16(%r11),%xmm1
2894	shlq	$4,%r14
2895
2896.byte	102,65,15,56,221,210
2897	movdqu	(%rbx),%xmm10
2898	movq	%r10,%rax
2899.byte	102,65,15,56,221,219
2900.byte	102,65,15,56,221,228
2901.byte	102,65,15,56,221,237
2902.byte	102,65,15,56,221,246
2903.byte	102,65,15,56,221,255
2904	.byte	0xf3,0xc3
2905.cfi_endproc
2906.size	__ocb_encrypt6,.-__ocb_encrypt6
2907
2908.type	__ocb_encrypt4,@function
2909.align	32
2910__ocb_encrypt4:
2911.cfi_startproc
2912	pxor	%xmm9,%xmm15
2913	movdqu	(%rbx,%r12,1),%xmm11
2914	movdqa	%xmm10,%xmm12
2915	movdqu	(%rbx,%r13,1),%xmm13
2916	pxor	%xmm15,%xmm10
2917	pxor	%xmm10,%xmm11
2918	pxor	%xmm2,%xmm8
2919	pxor	%xmm10,%xmm2
2920	pxor	%xmm11,%xmm12
2921	pxor	%xmm3,%xmm8
2922	pxor	%xmm11,%xmm3
2923	pxor	%xmm12,%xmm13
2924	pxor	%xmm4,%xmm8
2925	pxor	%xmm12,%xmm4
2926	pxor	%xmm5,%xmm8
2927	pxor	%xmm13,%xmm5
2928	movups	32(%r11),%xmm0
2929
2930	pxor	%xmm9,%xmm10
2931	pxor	%xmm9,%xmm11
2932	pxor	%xmm9,%xmm12
2933	pxor	%xmm9,%xmm13
2934
2935.byte	102,15,56,220,209
2936.byte	102,15,56,220,217
2937.byte	102,15,56,220,225
2938.byte	102,15,56,220,233
2939	movups	48(%r11),%xmm1
2940
2941.byte	102,15,56,220,208
2942.byte	102,15,56,220,216
2943.byte	102,15,56,220,224
2944.byte	102,15,56,220,232
2945	movups	64(%r11),%xmm0
2946	jmp	.Locb_enc_loop4
2947
2948.align	32
2949.Locb_enc_loop4:
2950.byte	102,15,56,220,209
2951.byte	102,15,56,220,217
2952.byte	102,15,56,220,225
2953.byte	102,15,56,220,233
2954	movups	(%rcx,%rax,1),%xmm1
2955	addq	$32,%rax
2956
2957.byte	102,15,56,220,208
2958.byte	102,15,56,220,216
2959.byte	102,15,56,220,224
2960.byte	102,15,56,220,232
2961	movups	-16(%rcx,%rax,1),%xmm0
2962	jnz	.Locb_enc_loop4
2963
2964.byte	102,15,56,220,209
2965.byte	102,15,56,220,217
2966.byte	102,15,56,220,225
2967.byte	102,15,56,220,233
2968	movups	16(%r11),%xmm1
2969	movq	%r10,%rax
2970
2971.byte	102,65,15,56,221,210
2972.byte	102,65,15,56,221,219
2973.byte	102,65,15,56,221,228
2974.byte	102,65,15,56,221,237
2975	.byte	0xf3,0xc3
2976.cfi_endproc
2977.size	__ocb_encrypt4,.-__ocb_encrypt4
2978
2979.type	__ocb_encrypt1,@function
2980.align	32
2981__ocb_encrypt1:
2982.cfi_startproc
2983	pxor	%xmm15,%xmm7
2984	pxor	%xmm9,%xmm7
2985	pxor	%xmm2,%xmm8
2986	pxor	%xmm7,%xmm2
2987	movups	32(%r11),%xmm0
2988
2989.byte	102,15,56,220,209
2990	movups	48(%r11),%xmm1
2991	pxor	%xmm9,%xmm7
2992
2993.byte	102,15,56,220,208
2994	movups	64(%r11),%xmm0
2995	jmp	.Locb_enc_loop1
2996
2997.align	32
2998.Locb_enc_loop1:
2999.byte	102,15,56,220,209
3000	movups	(%rcx,%rax,1),%xmm1
3001	addq	$32,%rax
3002
3003.byte	102,15,56,220,208
3004	movups	-16(%rcx,%rax,1),%xmm0
3005	jnz	.Locb_enc_loop1
3006
3007.byte	102,15,56,220,209
3008	movups	16(%r11),%xmm1
3009	movq	%r10,%rax
3010
3011.byte	102,15,56,221,215
3012	.byte	0xf3,0xc3
3013.cfi_endproc
3014.size	__ocb_encrypt1,.-__ocb_encrypt1
3015
3016.globl	aesni_ocb_decrypt
3017.type	aesni_ocb_decrypt,@function
3018.align	32
3019aesni_ocb_decrypt:
3020.cfi_startproc
3021.byte	243,15,30,250
3022	leaq	(%rsp),%rax
3023	pushq	%rbx
3024.cfi_adjust_cfa_offset	8
3025.cfi_offset	%rbx,-16
3026	pushq	%rbp
3027.cfi_adjust_cfa_offset	8
3028.cfi_offset	%rbp,-24
3029	pushq	%r12
3030.cfi_adjust_cfa_offset	8
3031.cfi_offset	%r12,-32
3032	pushq	%r13
3033.cfi_adjust_cfa_offset	8
3034.cfi_offset	%r13,-40
3035	pushq	%r14
3036.cfi_adjust_cfa_offset	8
3037.cfi_offset	%r14,-48
3038	movq	8(%rax),%rbx
3039	movq	8+8(%rax),%rbp
3040
3041	movl	240(%rcx),%r10d
3042	movq	%rcx,%r11
3043	shll	$4,%r10d
3044	movups	(%rcx),%xmm9
3045	movups	16(%rcx,%r10,1),%xmm1
3046
3047	movdqu	(%r9),%xmm15
3048	pxor	%xmm1,%xmm9
3049	pxor	%xmm1,%xmm15
3050
3051	movl	$16+32,%eax
3052	leaq	32(%r11,%r10,1),%rcx
3053	movups	16(%r11),%xmm1
3054	subq	%r10,%rax
3055	movq	%rax,%r10
3056
3057	movdqu	(%rbx),%xmm10
3058	movdqu	(%rbp),%xmm8
3059
3060	testq	$1,%r8
3061	jnz	.Locb_dec_odd
3062
3063	bsfq	%r8,%r12
3064	addq	$1,%r8
3065	shlq	$4,%r12
3066	movdqu	(%rbx,%r12,1),%xmm7
3067	movdqu	(%rdi),%xmm2
3068	leaq	16(%rdi),%rdi
3069
3070	call	__ocb_decrypt1
3071
3072	movdqa	%xmm7,%xmm15
3073	movups	%xmm2,(%rsi)
3074	xorps	%xmm2,%xmm8
3075	leaq	16(%rsi),%rsi
3076	subq	$1,%rdx
3077	jz	.Locb_dec_done
3078
3079.Locb_dec_odd:
3080	leaq	1(%r8),%r12
3081	leaq	3(%r8),%r13
3082	leaq	5(%r8),%r14
3083	leaq	6(%r8),%r8
3084	bsfq	%r12,%r12
3085	bsfq	%r13,%r13
3086	bsfq	%r14,%r14
3087	shlq	$4,%r12
3088	shlq	$4,%r13
3089	shlq	$4,%r14
3090
3091	subq	$6,%rdx
3092	jc	.Locb_dec_short
3093	jmp	.Locb_dec_grandloop
3094
3095.align	32
3096.Locb_dec_grandloop:
3097	movdqu	0(%rdi),%xmm2
3098	movdqu	16(%rdi),%xmm3
3099	movdqu	32(%rdi),%xmm4
3100	movdqu	48(%rdi),%xmm5
3101	movdqu	64(%rdi),%xmm6
3102	movdqu	80(%rdi),%xmm7
3103	leaq	96(%rdi),%rdi
3104
3105	call	__ocb_decrypt6
3106
3107	movups	%xmm2,0(%rsi)
3108	pxor	%xmm2,%xmm8
3109	movups	%xmm3,16(%rsi)
3110	pxor	%xmm3,%xmm8
3111	movups	%xmm4,32(%rsi)
3112	pxor	%xmm4,%xmm8
3113	movups	%xmm5,48(%rsi)
3114	pxor	%xmm5,%xmm8
3115	movups	%xmm6,64(%rsi)
3116	pxor	%xmm6,%xmm8
3117	movups	%xmm7,80(%rsi)
3118	pxor	%xmm7,%xmm8
3119	leaq	96(%rsi),%rsi
3120	subq	$6,%rdx
3121	jnc	.Locb_dec_grandloop
3122
3123.Locb_dec_short:
3124	addq	$6,%rdx
3125	jz	.Locb_dec_done
3126
3127	movdqu	0(%rdi),%xmm2
3128	cmpq	$2,%rdx
3129	jb	.Locb_dec_one
3130	movdqu	16(%rdi),%xmm3
3131	je	.Locb_dec_two
3132
3133	movdqu	32(%rdi),%xmm4
3134	cmpq	$4,%rdx
3135	jb	.Locb_dec_three
3136	movdqu	48(%rdi),%xmm5
3137	je	.Locb_dec_four
3138
3139	movdqu	64(%rdi),%xmm6
3140	pxor	%xmm7,%xmm7
3141
3142	call	__ocb_decrypt6
3143
3144	movdqa	%xmm14,%xmm15
3145	movups	%xmm2,0(%rsi)
3146	pxor	%xmm2,%xmm8
3147	movups	%xmm3,16(%rsi)
3148	pxor	%xmm3,%xmm8
3149	movups	%xmm4,32(%rsi)
3150	pxor	%xmm4,%xmm8
3151	movups	%xmm5,48(%rsi)
3152	pxor	%xmm5,%xmm8
3153	movups	%xmm6,64(%rsi)
3154	pxor	%xmm6,%xmm8
3155
3156	jmp	.Locb_dec_done
3157
3158.align	16
3159.Locb_dec_one:
3160	movdqa	%xmm10,%xmm7
3161
3162	call	__ocb_decrypt1
3163
3164	movdqa	%xmm7,%xmm15
3165	movups	%xmm2,0(%rsi)
3166	xorps	%xmm2,%xmm8
3167	jmp	.Locb_dec_done
3168
3169.align	16
3170.Locb_dec_two:
3171	pxor	%xmm4,%xmm4
3172	pxor	%xmm5,%xmm5
3173
3174	call	__ocb_decrypt4
3175
3176	movdqa	%xmm11,%xmm15
3177	movups	%xmm2,0(%rsi)
3178	xorps	%xmm2,%xmm8
3179	movups	%xmm3,16(%rsi)
3180	xorps	%xmm3,%xmm8
3181
3182	jmp	.Locb_dec_done
3183
3184.align	16
3185.Locb_dec_three:
3186	pxor	%xmm5,%xmm5
3187
3188	call	__ocb_decrypt4
3189
3190	movdqa	%xmm12,%xmm15
3191	movups	%xmm2,0(%rsi)
3192	xorps	%xmm2,%xmm8
3193	movups	%xmm3,16(%rsi)
3194	xorps	%xmm3,%xmm8
3195	movups	%xmm4,32(%rsi)
3196	xorps	%xmm4,%xmm8
3197
3198	jmp	.Locb_dec_done
3199
3200.align	16
3201.Locb_dec_four:
3202	call	__ocb_decrypt4
3203
3204	movdqa	%xmm13,%xmm15
3205	movups	%xmm2,0(%rsi)
3206	pxor	%xmm2,%xmm8
3207	movups	%xmm3,16(%rsi)
3208	pxor	%xmm3,%xmm8
3209	movups	%xmm4,32(%rsi)
3210	pxor	%xmm4,%xmm8
3211	movups	%xmm5,48(%rsi)
3212	pxor	%xmm5,%xmm8
3213
3214.Locb_dec_done:
3215	pxor	%xmm0,%xmm15
3216	movdqu	%xmm8,(%rbp)
3217	movdqu	%xmm15,(%r9)
3218
3219	xorps	%xmm0,%xmm0
3220	pxor	%xmm1,%xmm1
3221	pxor	%xmm2,%xmm2
3222	pxor	%xmm3,%xmm3
3223	pxor	%xmm4,%xmm4
3224	pxor	%xmm5,%xmm5
3225	pxor	%xmm6,%xmm6
3226	pxor	%xmm7,%xmm7
3227	pxor	%xmm8,%xmm8
3228	pxor	%xmm9,%xmm9
3229	pxor	%xmm10,%xmm10
3230	pxor	%xmm11,%xmm11
3231	pxor	%xmm12,%xmm12
3232	pxor	%xmm13,%xmm13
3233	pxor	%xmm14,%xmm14
3234	pxor	%xmm15,%xmm15
3235	leaq	40(%rsp),%rax
3236.cfi_def_cfa	%rax,8
3237	movq	-40(%rax),%r14
3238.cfi_restore	%r14
3239	movq	-32(%rax),%r13
3240.cfi_restore	%r13
3241	movq	-24(%rax),%r12
3242.cfi_restore	%r12
3243	movq	-16(%rax),%rbp
3244.cfi_restore	%rbp
3245	movq	-8(%rax),%rbx
3246.cfi_restore	%rbx
3247	leaq	(%rax),%rsp
3248.cfi_def_cfa_register	%rsp
3249.Locb_dec_epilogue:
3250	.byte	0xf3,0xc3
3251.cfi_endproc
3252.size	aesni_ocb_decrypt,.-aesni_ocb_decrypt
3253
3254.type	__ocb_decrypt6,@function
3255.align	32
3256__ocb_decrypt6:
3257.cfi_startproc
3258	pxor	%xmm9,%xmm15
3259	movdqu	(%rbx,%r12,1),%xmm11
3260	movdqa	%xmm10,%xmm12
3261	movdqu	(%rbx,%r13,1),%xmm13
3262	movdqa	%xmm10,%xmm14
3263	pxor	%xmm15,%xmm10
3264	movdqu	(%rbx,%r14,1),%xmm15
3265	pxor	%xmm10,%xmm11
3266	pxor	%xmm10,%xmm2
3267	pxor	%xmm11,%xmm12
3268	pxor	%xmm11,%xmm3
3269	pxor	%xmm12,%xmm13
3270	pxor	%xmm12,%xmm4
3271	pxor	%xmm13,%xmm14
3272	pxor	%xmm13,%xmm5
3273	pxor	%xmm14,%xmm15
3274	pxor	%xmm14,%xmm6
3275	pxor	%xmm15,%xmm7
3276	movups	32(%r11),%xmm0
3277
3278	leaq	1(%r8),%r12
3279	leaq	3(%r8),%r13
3280	leaq	5(%r8),%r14
3281	addq	$6,%r8
3282	pxor	%xmm9,%xmm10
3283	bsfq	%r12,%r12
3284	bsfq	%r13,%r13
3285	bsfq	%r14,%r14
3286
3287.byte	102,15,56,222,209
3288.byte	102,15,56,222,217
3289.byte	102,15,56,222,225
3290.byte	102,15,56,222,233
3291	pxor	%xmm9,%xmm11
3292	pxor	%xmm9,%xmm12
3293.byte	102,15,56,222,241
3294	pxor	%xmm9,%xmm13
3295	pxor	%xmm9,%xmm14
3296.byte	102,15,56,222,249
3297	movups	48(%r11),%xmm1
3298	pxor	%xmm9,%xmm15
3299
3300.byte	102,15,56,222,208
3301.byte	102,15,56,222,216
3302.byte	102,15,56,222,224
3303.byte	102,15,56,222,232
3304.byte	102,15,56,222,240
3305.byte	102,15,56,222,248
3306	movups	64(%r11),%xmm0
3307	shlq	$4,%r12
3308	shlq	$4,%r13
3309	jmp	.Locb_dec_loop6
3310
3311.align	32
3312.Locb_dec_loop6:
3313.byte	102,15,56,222,209
3314.byte	102,15,56,222,217
3315.byte	102,15,56,222,225
3316.byte	102,15,56,222,233
3317.byte	102,15,56,222,241
3318.byte	102,15,56,222,249
3319	movups	(%rcx,%rax,1),%xmm1
3320	addq	$32,%rax
3321
3322.byte	102,15,56,222,208
3323.byte	102,15,56,222,216
3324.byte	102,15,56,222,224
3325.byte	102,15,56,222,232
3326.byte	102,15,56,222,240
3327.byte	102,15,56,222,248
3328	movups	-16(%rcx,%rax,1),%xmm0
3329	jnz	.Locb_dec_loop6
3330
3331.byte	102,15,56,222,209
3332.byte	102,15,56,222,217
3333.byte	102,15,56,222,225
3334.byte	102,15,56,222,233
3335.byte	102,15,56,222,241
3336.byte	102,15,56,222,249
3337	movups	16(%r11),%xmm1
3338	shlq	$4,%r14
3339
3340.byte	102,65,15,56,223,210
3341	movdqu	(%rbx),%xmm10
3342	movq	%r10,%rax
3343.byte	102,65,15,56,223,219
3344.byte	102,65,15,56,223,228
3345.byte	102,65,15,56,223,237
3346.byte	102,65,15,56,223,246
3347.byte	102,65,15,56,223,255
3348	.byte	0xf3,0xc3
3349.cfi_endproc
3350.size	__ocb_decrypt6,.-__ocb_decrypt6
3351
3352.type	__ocb_decrypt4,@function
3353.align	32
3354__ocb_decrypt4:
3355.cfi_startproc
3356	pxor	%xmm9,%xmm15
3357	movdqu	(%rbx,%r12,1),%xmm11
3358	movdqa	%xmm10,%xmm12
3359	movdqu	(%rbx,%r13,1),%xmm13
3360	pxor	%xmm15,%xmm10
3361	pxor	%xmm10,%xmm11
3362	pxor	%xmm10,%xmm2
3363	pxor	%xmm11,%xmm12
3364	pxor	%xmm11,%xmm3
3365	pxor	%xmm12,%xmm13
3366	pxor	%xmm12,%xmm4
3367	pxor	%xmm13,%xmm5
3368	movups	32(%r11),%xmm0
3369
3370	pxor	%xmm9,%xmm10
3371	pxor	%xmm9,%xmm11
3372	pxor	%xmm9,%xmm12
3373	pxor	%xmm9,%xmm13
3374
3375.byte	102,15,56,222,209
3376.byte	102,15,56,222,217
3377.byte	102,15,56,222,225
3378.byte	102,15,56,222,233
3379	movups	48(%r11),%xmm1
3380
3381.byte	102,15,56,222,208
3382.byte	102,15,56,222,216
3383.byte	102,15,56,222,224
3384.byte	102,15,56,222,232
3385	movups	64(%r11),%xmm0
3386	jmp	.Locb_dec_loop4
3387
3388.align	32
3389.Locb_dec_loop4:
3390.byte	102,15,56,222,209
3391.byte	102,15,56,222,217
3392.byte	102,15,56,222,225
3393.byte	102,15,56,222,233
3394	movups	(%rcx,%rax,1),%xmm1
3395	addq	$32,%rax
3396
3397.byte	102,15,56,222,208
3398.byte	102,15,56,222,216
3399.byte	102,15,56,222,224
3400.byte	102,15,56,222,232
3401	movups	-16(%rcx,%rax,1),%xmm0
3402	jnz	.Locb_dec_loop4
3403
3404.byte	102,15,56,222,209
3405.byte	102,15,56,222,217
3406.byte	102,15,56,222,225
3407.byte	102,15,56,222,233
3408	movups	16(%r11),%xmm1
3409	movq	%r10,%rax
3410
3411.byte	102,65,15,56,223,210
3412.byte	102,65,15,56,223,219
3413.byte	102,65,15,56,223,228
3414.byte	102,65,15,56,223,237
3415	.byte	0xf3,0xc3
3416.cfi_endproc
3417.size	__ocb_decrypt4,.-__ocb_decrypt4
3418
3419.type	__ocb_decrypt1,@function
3420.align	32
3421__ocb_decrypt1:
3422.cfi_startproc
3423	pxor	%xmm15,%xmm7
3424	pxor	%xmm9,%xmm7
3425	pxor	%xmm7,%xmm2
3426	movups	32(%r11),%xmm0
3427
3428.byte	102,15,56,222,209
3429	movups	48(%r11),%xmm1
3430	pxor	%xmm9,%xmm7
3431
3432.byte	102,15,56,222,208
3433	movups	64(%r11),%xmm0
3434	jmp	.Locb_dec_loop1
3435
3436.align	32
3437.Locb_dec_loop1:
3438.byte	102,15,56,222,209
3439	movups	(%rcx,%rax,1),%xmm1
3440	addq	$32,%rax
3441
3442.byte	102,15,56,222,208
3443	movups	-16(%rcx,%rax,1),%xmm0
3444	jnz	.Locb_dec_loop1
3445
3446.byte	102,15,56,222,209
3447	movups	16(%r11),%xmm1
3448	movq	%r10,%rax
3449
3450.byte	102,15,56,223,215
3451	.byte	0xf3,0xc3
3452.cfi_endproc
3453.size	__ocb_decrypt1,.-__ocb_decrypt1
3454.globl	aesni_cbc_encrypt
3455.type	aesni_cbc_encrypt,@function
3456.align	16
3457aesni_cbc_encrypt:
3458.cfi_startproc
3459.byte	243,15,30,250
3460	testq	%rdx,%rdx
3461	jz	.Lcbc_ret
3462
3463	movl	240(%rcx),%r10d
3464	movq	%rcx,%r11
3465	testl	%r9d,%r9d
3466	jz	.Lcbc_decrypt
3467
3468	movups	(%r8),%xmm2
3469	movl	%r10d,%eax
3470	cmpq	$16,%rdx
3471	jb	.Lcbc_enc_tail
3472	subq	$16,%rdx
3473	jmp	.Lcbc_enc_loop
3474.align	16
3475.Lcbc_enc_loop:
3476	movups	(%rdi),%xmm3
3477	leaq	16(%rdi),%rdi
3478
3479	movups	(%rcx),%xmm0
3480	movups	16(%rcx),%xmm1
3481	xorps	%xmm0,%xmm3
3482	leaq	32(%rcx),%rcx
3483	xorps	%xmm3,%xmm2
3484.Loop_enc1_15:
3485.byte	102,15,56,220,209
3486	decl	%eax
3487	movups	(%rcx),%xmm1
3488	leaq	16(%rcx),%rcx
3489	jnz	.Loop_enc1_15
3490.byte	102,15,56,221,209
3491	movl	%r10d,%eax
3492	movq	%r11,%rcx
3493	movups	%xmm2,0(%rsi)
3494	leaq	16(%rsi),%rsi
3495	subq	$16,%rdx
3496	jnc	.Lcbc_enc_loop
3497	addq	$16,%rdx
3498	jnz	.Lcbc_enc_tail
3499	pxor	%xmm0,%xmm0
3500	pxor	%xmm1,%xmm1
3501	movups	%xmm2,(%r8)
3502	pxor	%xmm2,%xmm2
3503	pxor	%xmm3,%xmm3
3504	jmp	.Lcbc_ret
3505
3506.Lcbc_enc_tail:
3507	movq	%rdx,%rcx
3508	xchgq	%rdi,%rsi
3509.long	0x9066A4F3
3510	movl	$16,%ecx
3511	subq	%rdx,%rcx
3512	xorl	%eax,%eax
3513.long	0x9066AAF3
3514	leaq	-16(%rdi),%rdi
3515	movl	%r10d,%eax
3516	movq	%rdi,%rsi
3517	movq	%r11,%rcx
3518	xorq	%rdx,%rdx
3519	jmp	.Lcbc_enc_loop
3520
3521.align	16
3522.Lcbc_decrypt:
3523	cmpq	$16,%rdx
3524	jne	.Lcbc_decrypt_bulk
3525
3526
3527
3528	movdqu	(%rdi),%xmm2
3529	movdqu	(%r8),%xmm3
3530	movdqa	%xmm2,%xmm4
3531	movups	(%rcx),%xmm0
3532	movups	16(%rcx),%xmm1
3533	leaq	32(%rcx),%rcx
3534	xorps	%xmm0,%xmm2
3535.Loop_dec1_16:
3536.byte	102,15,56,222,209
3537	decl	%r10d
3538	movups	(%rcx),%xmm1
3539	leaq	16(%rcx),%rcx
3540	jnz	.Loop_dec1_16
3541.byte	102,15,56,223,209
3542	pxor	%xmm0,%xmm0
3543	pxor	%xmm1,%xmm1
3544	movdqu	%xmm4,(%r8)
3545	xorps	%xmm3,%xmm2
3546	pxor	%xmm3,%xmm3
3547	movups	%xmm2,(%rsi)
3548	pxor	%xmm2,%xmm2
3549	jmp	.Lcbc_ret
3550.align	16
3551.Lcbc_decrypt_bulk:
3552	leaq	(%rsp),%r11
3553.cfi_def_cfa_register	%r11
3554	pushq	%rbp
3555.cfi_offset	%rbp,-16
3556	subq	$16,%rsp
3557	andq	$-16,%rsp
3558	movq	%rcx,%rbp
3559	movups	(%r8),%xmm10
3560	movl	%r10d,%eax
3561	cmpq	$0x50,%rdx
3562	jbe	.Lcbc_dec_tail
3563
3564	movups	(%rcx),%xmm0
3565	movdqu	0(%rdi),%xmm2
3566	movdqu	16(%rdi),%xmm3
3567	movdqa	%xmm2,%xmm11
3568	movdqu	32(%rdi),%xmm4
3569	movdqa	%xmm3,%xmm12
3570	movdqu	48(%rdi),%xmm5
3571	movdqa	%xmm4,%xmm13
3572	movdqu	64(%rdi),%xmm6
3573	movdqa	%xmm5,%xmm14
3574	movdqu	80(%rdi),%xmm7
3575	movdqa	%xmm6,%xmm15
3576	movl	OPENSSL_ia32cap_P+4(%rip),%r9d
3577	cmpq	$0x70,%rdx
3578	jbe	.Lcbc_dec_six_or_seven
3579
3580	andl	$71303168,%r9d
3581	subq	$0x50,%rdx
3582	cmpl	$4194304,%r9d
3583	je	.Lcbc_dec_loop6_enter
3584	subq	$0x20,%rdx
3585	leaq	112(%rcx),%rcx
3586	jmp	.Lcbc_dec_loop8_enter
3587.align	16
3588.Lcbc_dec_loop8:
3589	movups	%xmm9,(%rsi)
3590	leaq	16(%rsi),%rsi
3591.Lcbc_dec_loop8_enter:
3592	movdqu	96(%rdi),%xmm8
3593	pxor	%xmm0,%xmm2
3594	movdqu	112(%rdi),%xmm9
3595	pxor	%xmm0,%xmm3
3596	movups	16-112(%rcx),%xmm1
3597	pxor	%xmm0,%xmm4
3598	movq	$-1,%rbp
3599	cmpq	$0x70,%rdx
3600	pxor	%xmm0,%xmm5
3601	pxor	%xmm0,%xmm6
3602	pxor	%xmm0,%xmm7
3603	pxor	%xmm0,%xmm8
3604
3605.byte	102,15,56,222,209
3606	pxor	%xmm0,%xmm9
3607	movups	32-112(%rcx),%xmm0
3608.byte	102,15,56,222,217
3609.byte	102,15,56,222,225
3610.byte	102,15,56,222,233
3611.byte	102,15,56,222,241
3612.byte	102,15,56,222,249
3613.byte	102,68,15,56,222,193
3614	adcq	$0,%rbp
3615	andq	$128,%rbp
3616.byte	102,68,15,56,222,201
3617	addq	%rdi,%rbp
3618	movups	48-112(%rcx),%xmm1
3619.byte	102,15,56,222,208
3620.byte	102,15,56,222,216
3621.byte	102,15,56,222,224
3622.byte	102,15,56,222,232
3623.byte	102,15,56,222,240
3624.byte	102,15,56,222,248
3625.byte	102,68,15,56,222,192
3626.byte	102,68,15,56,222,200
3627	movups	64-112(%rcx),%xmm0
3628	nop
3629.byte	102,15,56,222,209
3630.byte	102,15,56,222,217
3631.byte	102,15,56,222,225
3632.byte	102,15,56,222,233
3633.byte	102,15,56,222,241
3634.byte	102,15,56,222,249
3635.byte	102,68,15,56,222,193
3636.byte	102,68,15,56,222,201
3637	movups	80-112(%rcx),%xmm1
3638	nop
3639.byte	102,15,56,222,208
3640.byte	102,15,56,222,216
3641.byte	102,15,56,222,224
3642.byte	102,15,56,222,232
3643.byte	102,15,56,222,240
3644.byte	102,15,56,222,248
3645.byte	102,68,15,56,222,192
3646.byte	102,68,15,56,222,200
3647	movups	96-112(%rcx),%xmm0
3648	nop
3649.byte	102,15,56,222,209
3650.byte	102,15,56,222,217
3651.byte	102,15,56,222,225
3652.byte	102,15,56,222,233
3653.byte	102,15,56,222,241
3654.byte	102,15,56,222,249
3655.byte	102,68,15,56,222,193
3656.byte	102,68,15,56,222,201
3657	movups	112-112(%rcx),%xmm1
3658	nop
3659.byte	102,15,56,222,208
3660.byte	102,15,56,222,216
3661.byte	102,15,56,222,224
3662.byte	102,15,56,222,232
3663.byte	102,15,56,222,240
3664.byte	102,15,56,222,248
3665.byte	102,68,15,56,222,192
3666.byte	102,68,15,56,222,200
3667	movups	128-112(%rcx),%xmm0
3668	nop
3669.byte	102,15,56,222,209
3670.byte	102,15,56,222,217
3671.byte	102,15,56,222,225
3672.byte	102,15,56,222,233
3673.byte	102,15,56,222,241
3674.byte	102,15,56,222,249
3675.byte	102,68,15,56,222,193
3676.byte	102,68,15,56,222,201
3677	movups	144-112(%rcx),%xmm1
3678	cmpl	$11,%eax
3679.byte	102,15,56,222,208
3680.byte	102,15,56,222,216
3681.byte	102,15,56,222,224
3682.byte	102,15,56,222,232
3683.byte	102,15,56,222,240
3684.byte	102,15,56,222,248
3685.byte	102,68,15,56,222,192
3686.byte	102,68,15,56,222,200
3687	movups	160-112(%rcx),%xmm0
3688	jb	.Lcbc_dec_done
3689.byte	102,15,56,222,209
3690.byte	102,15,56,222,217
3691.byte	102,15,56,222,225
3692.byte	102,15,56,222,233
3693.byte	102,15,56,222,241
3694.byte	102,15,56,222,249
3695.byte	102,68,15,56,222,193
3696.byte	102,68,15,56,222,201
3697	movups	176-112(%rcx),%xmm1
3698	nop
3699.byte	102,15,56,222,208
3700.byte	102,15,56,222,216
3701.byte	102,15,56,222,224
3702.byte	102,15,56,222,232
3703.byte	102,15,56,222,240
3704.byte	102,15,56,222,248
3705.byte	102,68,15,56,222,192
3706.byte	102,68,15,56,222,200
3707	movups	192-112(%rcx),%xmm0
3708	je	.Lcbc_dec_done
3709.byte	102,15,56,222,209
3710.byte	102,15,56,222,217
3711.byte	102,15,56,222,225
3712.byte	102,15,56,222,233
3713.byte	102,15,56,222,241
3714.byte	102,15,56,222,249
3715.byte	102,68,15,56,222,193
3716.byte	102,68,15,56,222,201
3717	movups	208-112(%rcx),%xmm1
3718	nop
3719.byte	102,15,56,222,208
3720.byte	102,15,56,222,216
3721.byte	102,15,56,222,224
3722.byte	102,15,56,222,232
3723.byte	102,15,56,222,240
3724.byte	102,15,56,222,248
3725.byte	102,68,15,56,222,192
3726.byte	102,68,15,56,222,200
3727	movups	224-112(%rcx),%xmm0
3728	jmp	.Lcbc_dec_done
3729.align	16
3730.Lcbc_dec_done:
3731.byte	102,15,56,222,209
3732.byte	102,15,56,222,217
3733	pxor	%xmm0,%xmm10
3734	pxor	%xmm0,%xmm11
3735.byte	102,15,56,222,225
3736.byte	102,15,56,222,233
3737	pxor	%xmm0,%xmm12
3738	pxor	%xmm0,%xmm13
3739.byte	102,15,56,222,241
3740.byte	102,15,56,222,249
3741	pxor	%xmm0,%xmm14
3742	pxor	%xmm0,%xmm15
3743.byte	102,68,15,56,222,193
3744.byte	102,68,15,56,222,201
3745	movdqu	80(%rdi),%xmm1
3746
3747.byte	102,65,15,56,223,210
3748	movdqu	96(%rdi),%xmm10
3749	pxor	%xmm0,%xmm1
3750.byte	102,65,15,56,223,219
3751	pxor	%xmm0,%xmm10
3752	movdqu	112(%rdi),%xmm0
3753.byte	102,65,15,56,223,228
3754	leaq	128(%rdi),%rdi
3755	movdqu	0(%rbp),%xmm11
3756.byte	102,65,15,56,223,237
3757.byte	102,65,15,56,223,246
3758	movdqu	16(%rbp),%xmm12
3759	movdqu	32(%rbp),%xmm13
3760.byte	102,65,15,56,223,255
3761.byte	102,68,15,56,223,193
3762	movdqu	48(%rbp),%xmm14
3763	movdqu	64(%rbp),%xmm15
3764.byte	102,69,15,56,223,202
3765	movdqa	%xmm0,%xmm10
3766	movdqu	80(%rbp),%xmm1
3767	movups	-112(%rcx),%xmm0
3768
3769	movups	%xmm2,(%rsi)
3770	movdqa	%xmm11,%xmm2
3771	movups	%xmm3,16(%rsi)
3772	movdqa	%xmm12,%xmm3
3773	movups	%xmm4,32(%rsi)
3774	movdqa	%xmm13,%xmm4
3775	movups	%xmm5,48(%rsi)
3776	movdqa	%xmm14,%xmm5
3777	movups	%xmm6,64(%rsi)
3778	movdqa	%xmm15,%xmm6
3779	movups	%xmm7,80(%rsi)
3780	movdqa	%xmm1,%xmm7
3781	movups	%xmm8,96(%rsi)
3782	leaq	112(%rsi),%rsi
3783
3784	subq	$0x80,%rdx
3785	ja	.Lcbc_dec_loop8
3786
3787	movaps	%xmm9,%xmm2
3788	leaq	-112(%rcx),%rcx
3789	addq	$0x70,%rdx
3790	jle	.Lcbc_dec_clear_tail_collected
3791	movups	%xmm9,(%rsi)
3792	leaq	16(%rsi),%rsi
3793	cmpq	$0x50,%rdx
3794	jbe	.Lcbc_dec_tail
3795
3796	movaps	%xmm11,%xmm2
3797.Lcbc_dec_six_or_seven:
3798	cmpq	$0x60,%rdx
3799	ja	.Lcbc_dec_seven
3800
3801	movaps	%xmm7,%xmm8
3802	call	_aesni_decrypt6
3803	pxor	%xmm10,%xmm2
3804	movaps	%xmm8,%xmm10
3805	pxor	%xmm11,%xmm3
3806	movdqu	%xmm2,(%rsi)
3807	pxor	%xmm12,%xmm4
3808	movdqu	%xmm3,16(%rsi)
3809	pxor	%xmm3,%xmm3
3810	pxor	%xmm13,%xmm5
3811	movdqu	%xmm4,32(%rsi)
3812	pxor	%xmm4,%xmm4
3813	pxor	%xmm14,%xmm6
3814	movdqu	%xmm5,48(%rsi)
3815	pxor	%xmm5,%xmm5
3816	pxor	%xmm15,%xmm7
3817	movdqu	%xmm6,64(%rsi)
3818	pxor	%xmm6,%xmm6
3819	leaq	80(%rsi),%rsi
3820	movdqa	%xmm7,%xmm2
3821	pxor	%xmm7,%xmm7
3822	jmp	.Lcbc_dec_tail_collected
3823
3824.align	16
3825.Lcbc_dec_seven:
3826	movups	96(%rdi),%xmm8
3827	xorps	%xmm9,%xmm9
3828	call	_aesni_decrypt8
3829	movups	80(%rdi),%xmm9
3830	pxor	%xmm10,%xmm2
3831	movups	96(%rdi),%xmm10
3832	pxor	%xmm11,%xmm3
3833	movdqu	%xmm2,(%rsi)
3834	pxor	%xmm12,%xmm4
3835	movdqu	%xmm3,16(%rsi)
3836	pxor	%xmm3,%xmm3
3837	pxor	%xmm13,%xmm5
3838	movdqu	%xmm4,32(%rsi)
3839	pxor	%xmm4,%xmm4
3840	pxor	%xmm14,%xmm6
3841	movdqu	%xmm5,48(%rsi)
3842	pxor	%xmm5,%xmm5
3843	pxor	%xmm15,%xmm7
3844	movdqu	%xmm6,64(%rsi)
3845	pxor	%xmm6,%xmm6
3846	pxor	%xmm9,%xmm8
3847	movdqu	%xmm7,80(%rsi)
3848	pxor	%xmm7,%xmm7
3849	leaq	96(%rsi),%rsi
3850	movdqa	%xmm8,%xmm2
3851	pxor	%xmm8,%xmm8
3852	pxor	%xmm9,%xmm9
3853	jmp	.Lcbc_dec_tail_collected
3854
3855.align	16
3856.Lcbc_dec_loop6:
3857	movups	%xmm7,(%rsi)
3858	leaq	16(%rsi),%rsi
3859	movdqu	0(%rdi),%xmm2
3860	movdqu	16(%rdi),%xmm3
3861	movdqa	%xmm2,%xmm11
3862	movdqu	32(%rdi),%xmm4
3863	movdqa	%xmm3,%xmm12
3864	movdqu	48(%rdi),%xmm5
3865	movdqa	%xmm4,%xmm13
3866	movdqu	64(%rdi),%xmm6
3867	movdqa	%xmm5,%xmm14
3868	movdqu	80(%rdi),%xmm7
3869	movdqa	%xmm6,%xmm15
3870.Lcbc_dec_loop6_enter:
3871	leaq	96(%rdi),%rdi
3872	movdqa	%xmm7,%xmm8
3873
3874	call	_aesni_decrypt6
3875
3876	pxor	%xmm10,%xmm2
3877	movdqa	%xmm8,%xmm10
3878	pxor	%xmm11,%xmm3
3879	movdqu	%xmm2,(%rsi)
3880	pxor	%xmm12,%xmm4
3881	movdqu	%xmm3,16(%rsi)
3882	pxor	%xmm13,%xmm5
3883	movdqu	%xmm4,32(%rsi)
3884	pxor	%xmm14,%xmm6
3885	movq	%rbp,%rcx
3886	movdqu	%xmm5,48(%rsi)
3887	pxor	%xmm15,%xmm7
3888	movl	%r10d,%eax
3889	movdqu	%xmm6,64(%rsi)
3890	leaq	80(%rsi),%rsi
3891	subq	$0x60,%rdx
3892	ja	.Lcbc_dec_loop6
3893
3894	movdqa	%xmm7,%xmm2
3895	addq	$0x50,%rdx
3896	jle	.Lcbc_dec_clear_tail_collected
3897	movups	%xmm7,(%rsi)
3898	leaq	16(%rsi),%rsi
3899
3900.Lcbc_dec_tail:
3901	movups	(%rdi),%xmm2
3902	subq	$0x10,%rdx
3903	jbe	.Lcbc_dec_one
3904
3905	movups	16(%rdi),%xmm3
3906	movaps	%xmm2,%xmm11
3907	subq	$0x10,%rdx
3908	jbe	.Lcbc_dec_two
3909
3910	movups	32(%rdi),%xmm4
3911	movaps	%xmm3,%xmm12
3912	subq	$0x10,%rdx
3913	jbe	.Lcbc_dec_three
3914
3915	movups	48(%rdi),%xmm5
3916	movaps	%xmm4,%xmm13
3917	subq	$0x10,%rdx
3918	jbe	.Lcbc_dec_four
3919
3920	movups	64(%rdi),%xmm6
3921	movaps	%xmm5,%xmm14
3922	movaps	%xmm6,%xmm15
3923	xorps	%xmm7,%xmm7
3924	call	_aesni_decrypt6
3925	pxor	%xmm10,%xmm2
3926	movaps	%xmm15,%xmm10
3927	pxor	%xmm11,%xmm3
3928	movdqu	%xmm2,(%rsi)
3929	pxor	%xmm12,%xmm4
3930	movdqu	%xmm3,16(%rsi)
3931	pxor	%xmm3,%xmm3
3932	pxor	%xmm13,%xmm5
3933	movdqu	%xmm4,32(%rsi)
3934	pxor	%xmm4,%xmm4
3935	pxor	%xmm14,%xmm6
3936	movdqu	%xmm5,48(%rsi)
3937	pxor	%xmm5,%xmm5
3938	leaq	64(%rsi),%rsi
3939	movdqa	%xmm6,%xmm2
3940	pxor	%xmm6,%xmm6
3941	pxor	%xmm7,%xmm7
3942	subq	$0x10,%rdx
3943	jmp	.Lcbc_dec_tail_collected
3944
3945.align	16
3946.Lcbc_dec_one:
3947	movaps	%xmm2,%xmm11
3948	movups	(%rcx),%xmm0
3949	movups	16(%rcx),%xmm1
3950	leaq	32(%rcx),%rcx
3951	xorps	%xmm0,%xmm2
3952.Loop_dec1_17:
3953.byte	102,15,56,222,209
3954	decl	%eax
3955	movups	(%rcx),%xmm1
3956	leaq	16(%rcx),%rcx
3957	jnz	.Loop_dec1_17
3958.byte	102,15,56,223,209
3959	xorps	%xmm10,%xmm2
3960	movaps	%xmm11,%xmm10
3961	jmp	.Lcbc_dec_tail_collected
3962.align	16
3963.Lcbc_dec_two:
3964	movaps	%xmm3,%xmm12
3965	call	_aesni_decrypt2
3966	pxor	%xmm10,%xmm2
3967	movaps	%xmm12,%xmm10
3968	pxor	%xmm11,%xmm3
3969	movdqu	%xmm2,(%rsi)
3970	movdqa	%xmm3,%xmm2
3971	pxor	%xmm3,%xmm3
3972	leaq	16(%rsi),%rsi
3973	jmp	.Lcbc_dec_tail_collected
3974.align	16
3975.Lcbc_dec_three:
3976	movaps	%xmm4,%xmm13
3977	call	_aesni_decrypt3
3978	pxor	%xmm10,%xmm2
3979	movaps	%xmm13,%xmm10
3980	pxor	%xmm11,%xmm3
3981	movdqu	%xmm2,(%rsi)
3982	pxor	%xmm12,%xmm4
3983	movdqu	%xmm3,16(%rsi)
3984	pxor	%xmm3,%xmm3
3985	movdqa	%xmm4,%xmm2
3986	pxor	%xmm4,%xmm4
3987	leaq	32(%rsi),%rsi
3988	jmp	.Lcbc_dec_tail_collected
3989.align	16
3990.Lcbc_dec_four:
3991	movaps	%xmm5,%xmm14
3992	call	_aesni_decrypt4
3993	pxor	%xmm10,%xmm2
3994	movaps	%xmm14,%xmm10
3995	pxor	%xmm11,%xmm3
3996	movdqu	%xmm2,(%rsi)
3997	pxor	%xmm12,%xmm4
3998	movdqu	%xmm3,16(%rsi)
3999	pxor	%xmm3,%xmm3
4000	pxor	%xmm13,%xmm5
4001	movdqu	%xmm4,32(%rsi)
4002	pxor	%xmm4,%xmm4
4003	movdqa	%xmm5,%xmm2
4004	pxor	%xmm5,%xmm5
4005	leaq	48(%rsi),%rsi
4006	jmp	.Lcbc_dec_tail_collected
4007
4008.align	16
4009.Lcbc_dec_clear_tail_collected:
4010	pxor	%xmm3,%xmm3
4011	pxor	%xmm4,%xmm4
4012	pxor	%xmm5,%xmm5
4013	pxor	%xmm6,%xmm6
4014	pxor	%xmm7,%xmm7
4015	pxor	%xmm8,%xmm8
4016	pxor	%xmm9,%xmm9
4017.Lcbc_dec_tail_collected:
4018	movups	%xmm10,(%r8)
4019	andq	$15,%rdx
4020	jnz	.Lcbc_dec_tail_partial
4021	movups	%xmm2,(%rsi)
4022	pxor	%xmm2,%xmm2
4023	jmp	.Lcbc_dec_ret
4024.align	16
4025.Lcbc_dec_tail_partial:
4026	movaps	%xmm2,(%rsp)
4027	pxor	%xmm2,%xmm2
4028	movq	$16,%rcx
4029	movq	%rsi,%rdi
4030	subq	%rdx,%rcx
4031	leaq	(%rsp),%rsi
4032.long	0x9066A4F3
4033	movdqa	%xmm2,(%rsp)
4034
4035.Lcbc_dec_ret:
4036	xorps	%xmm0,%xmm0
4037	pxor	%xmm1,%xmm1
4038	movq	-8(%r11),%rbp
4039.cfi_restore	%rbp
4040	leaq	(%r11),%rsp
4041.cfi_def_cfa_register	%rsp
4042.Lcbc_ret:
4043	.byte	0xf3,0xc3
4044.cfi_endproc
4045.size	aesni_cbc_encrypt,.-aesni_cbc_encrypt
4046.globl	aesni_set_decrypt_key
4047.type	aesni_set_decrypt_key,@function
4048.align	16
4049aesni_set_decrypt_key:
4050.cfi_startproc
4051.byte	0x48,0x83,0xEC,0x08
4052.cfi_adjust_cfa_offset	8
4053	call	__aesni_set_encrypt_key
4054	shll	$4,%esi
4055	testl	%eax,%eax
4056	jnz	.Ldec_key_ret
4057	leaq	16(%rdx,%rsi,1),%rdi
4058
4059	movups	(%rdx),%xmm0
4060	movups	(%rdi),%xmm1
4061	movups	%xmm0,(%rdi)
4062	movups	%xmm1,(%rdx)
4063	leaq	16(%rdx),%rdx
4064	leaq	-16(%rdi),%rdi
4065
4066.Ldec_key_inverse:
4067	movups	(%rdx),%xmm0
4068	movups	(%rdi),%xmm1
4069.byte	102,15,56,219,192
4070.byte	102,15,56,219,201
4071	leaq	16(%rdx),%rdx
4072	leaq	-16(%rdi),%rdi
4073	movups	%xmm0,16(%rdi)
4074	movups	%xmm1,-16(%rdx)
4075	cmpq	%rdx,%rdi
4076	ja	.Ldec_key_inverse
4077
4078	movups	(%rdx),%xmm0
4079.byte	102,15,56,219,192
4080	pxor	%xmm1,%xmm1
4081	movups	%xmm0,(%rdi)
4082	pxor	%xmm0,%xmm0
4083.Ldec_key_ret:
4084	addq	$8,%rsp
4085.cfi_adjust_cfa_offset	-8
4086	.byte	0xf3,0xc3
4087.cfi_endproc
4088.LSEH_end_set_decrypt_key:
4089.size	aesni_set_decrypt_key,.-aesni_set_decrypt_key
4090.globl	aesni_set_encrypt_key
4091.type	aesni_set_encrypt_key,@function
4092.align	16
4093aesni_set_encrypt_key:
4094__aesni_set_encrypt_key:
4095.cfi_startproc
4096.byte	0x48,0x83,0xEC,0x08
4097.cfi_adjust_cfa_offset	8
4098	movq	$-1,%rax
4099	testq	%rdi,%rdi
4100	jz	.Lenc_key_ret
4101	testq	%rdx,%rdx
4102	jz	.Lenc_key_ret
4103
4104	movl	$268437504,%r10d
4105	movups	(%rdi),%xmm0
4106	xorps	%xmm4,%xmm4
4107	andl	OPENSSL_ia32cap_P+4(%rip),%r10d
4108	leaq	16(%rdx),%rax
4109	cmpl	$256,%esi
4110	je	.L14rounds
4111	cmpl	$192,%esi
4112	je	.L12rounds
4113	cmpl	$128,%esi
4114	jne	.Lbad_keybits
4115
4116.L10rounds:
4117	movl	$9,%esi
4118	cmpl	$268435456,%r10d
4119	je	.L10rounds_alt
4120
4121	movups	%xmm0,(%rdx)
4122.byte	102,15,58,223,200,1
4123	call	.Lkey_expansion_128_cold
4124.byte	102,15,58,223,200,2
4125	call	.Lkey_expansion_128
4126.byte	102,15,58,223,200,4
4127	call	.Lkey_expansion_128
4128.byte	102,15,58,223,200,8
4129	call	.Lkey_expansion_128
4130.byte	102,15,58,223,200,16
4131	call	.Lkey_expansion_128
4132.byte	102,15,58,223,200,32
4133	call	.Lkey_expansion_128
4134.byte	102,15,58,223,200,64
4135	call	.Lkey_expansion_128
4136.byte	102,15,58,223,200,128
4137	call	.Lkey_expansion_128
4138.byte	102,15,58,223,200,27
4139	call	.Lkey_expansion_128
4140.byte	102,15,58,223,200,54
4141	call	.Lkey_expansion_128
4142	movups	%xmm0,(%rax)
4143	movl	%esi,80(%rax)
4144	xorl	%eax,%eax
4145	jmp	.Lenc_key_ret
4146
4147.align	16
4148.L10rounds_alt:
4149	movdqa	.Lkey_rotate(%rip),%xmm5
4150	movl	$8,%r10d
4151	movdqa	.Lkey_rcon1(%rip),%xmm4
4152	movdqa	%xmm0,%xmm2
4153	movdqu	%xmm0,(%rdx)
4154	jmp	.Loop_key128
4155
4156.align	16
4157.Loop_key128:
4158.byte	102,15,56,0,197
4159.byte	102,15,56,221,196
4160	pslld	$1,%xmm4
4161	leaq	16(%rax),%rax
4162
4163	movdqa	%xmm2,%xmm3
4164	pslldq	$4,%xmm2
4165	pxor	%xmm2,%xmm3
4166	pslldq	$4,%xmm2
4167	pxor	%xmm2,%xmm3
4168	pslldq	$4,%xmm2
4169	pxor	%xmm3,%xmm2
4170
4171	pxor	%xmm2,%xmm0
4172	movdqu	%xmm0,-16(%rax)
4173	movdqa	%xmm0,%xmm2
4174
4175	decl	%r10d
4176	jnz	.Loop_key128
4177
4178	movdqa	.Lkey_rcon1b(%rip),%xmm4
4179
4180.byte	102,15,56,0,197
4181.byte	102,15,56,221,196
4182	pslld	$1,%xmm4
4183
4184	movdqa	%xmm2,%xmm3
4185	pslldq	$4,%xmm2
4186	pxor	%xmm2,%xmm3
4187	pslldq	$4,%xmm2
4188	pxor	%xmm2,%xmm3
4189	pslldq	$4,%xmm2
4190	pxor	%xmm3,%xmm2
4191
4192	pxor	%xmm2,%xmm0
4193	movdqu	%xmm0,(%rax)
4194
4195	movdqa	%xmm0,%xmm2
4196.byte	102,15,56,0,197
4197.byte	102,15,56,221,196
4198
4199	movdqa	%xmm2,%xmm3
4200	pslldq	$4,%xmm2
4201	pxor	%xmm2,%xmm3
4202	pslldq	$4,%xmm2
4203	pxor	%xmm2,%xmm3
4204	pslldq	$4,%xmm2
4205	pxor	%xmm3,%xmm2
4206
4207	pxor	%xmm2,%xmm0
4208	movdqu	%xmm0,16(%rax)
4209
4210	movl	%esi,96(%rax)
4211	xorl	%eax,%eax
4212	jmp	.Lenc_key_ret
4213
4214.align	16
4215.L12rounds:
4216	movq	16(%rdi),%xmm2
4217	movl	$11,%esi
4218	cmpl	$268435456,%r10d
4219	je	.L12rounds_alt
4220
4221	movups	%xmm0,(%rdx)
4222.byte	102,15,58,223,202,1
4223	call	.Lkey_expansion_192a_cold
4224.byte	102,15,58,223,202,2
4225	call	.Lkey_expansion_192b
4226.byte	102,15,58,223,202,4
4227	call	.Lkey_expansion_192a
4228.byte	102,15,58,223,202,8
4229	call	.Lkey_expansion_192b
4230.byte	102,15,58,223,202,16
4231	call	.Lkey_expansion_192a
4232.byte	102,15,58,223,202,32
4233	call	.Lkey_expansion_192b
4234.byte	102,15,58,223,202,64
4235	call	.Lkey_expansion_192a
4236.byte	102,15,58,223,202,128
4237	call	.Lkey_expansion_192b
4238	movups	%xmm0,(%rax)
4239	movl	%esi,48(%rax)
4240	xorq	%rax,%rax
4241	jmp	.Lenc_key_ret
4242
4243.align	16
4244.L12rounds_alt:
4245	movdqa	.Lkey_rotate192(%rip),%xmm5
4246	movdqa	.Lkey_rcon1(%rip),%xmm4
4247	movl	$8,%r10d
4248	movdqu	%xmm0,(%rdx)
4249	jmp	.Loop_key192
4250
4251.align	16
4252.Loop_key192:
4253	movq	%xmm2,0(%rax)
4254	movdqa	%xmm2,%xmm1
4255.byte	102,15,56,0,213
4256.byte	102,15,56,221,212
4257	pslld	$1,%xmm4
4258	leaq	24(%rax),%rax
4259
4260	movdqa	%xmm0,%xmm3
4261	pslldq	$4,%xmm0
4262	pxor	%xmm0,%xmm3
4263	pslldq	$4,%xmm0
4264	pxor	%xmm0,%xmm3
4265	pslldq	$4,%xmm0
4266	pxor	%xmm3,%xmm0
4267
4268	pshufd	$0xff,%xmm0,%xmm3
4269	pxor	%xmm1,%xmm3
4270	pslldq	$4,%xmm1
4271	pxor	%xmm1,%xmm3
4272
4273	pxor	%xmm2,%xmm0
4274	pxor	%xmm3,%xmm2
4275	movdqu	%xmm0,-16(%rax)
4276
4277	decl	%r10d
4278	jnz	.Loop_key192
4279
4280	movl	%esi,32(%rax)
4281	xorl	%eax,%eax
4282	jmp	.Lenc_key_ret
4283
4284.align	16
4285.L14rounds:
4286	movups	16(%rdi),%xmm2
4287	movl	$13,%esi
4288	leaq	16(%rax),%rax
4289	cmpl	$268435456,%r10d
4290	je	.L14rounds_alt
4291
4292	movups	%xmm0,(%rdx)
4293	movups	%xmm2,16(%rdx)
4294.byte	102,15,58,223,202,1
4295	call	.Lkey_expansion_256a_cold
4296.byte	102,15,58,223,200,1
4297	call	.Lkey_expansion_256b
4298.byte	102,15,58,223,202,2
4299	call	.Lkey_expansion_256a
4300.byte	102,15,58,223,200,2
4301	call	.Lkey_expansion_256b
4302.byte	102,15,58,223,202,4
4303	call	.Lkey_expansion_256a
4304.byte	102,15,58,223,200,4
4305	call	.Lkey_expansion_256b
4306.byte	102,15,58,223,202,8
4307	call	.Lkey_expansion_256a
4308.byte	102,15,58,223,200,8
4309	call	.Lkey_expansion_256b
4310.byte	102,15,58,223,202,16
4311	call	.Lkey_expansion_256a
4312.byte	102,15,58,223,200,16
4313	call	.Lkey_expansion_256b
4314.byte	102,15,58,223,202,32
4315	call	.Lkey_expansion_256a
4316.byte	102,15,58,223,200,32
4317	call	.Lkey_expansion_256b
4318.byte	102,15,58,223,202,64
4319	call	.Lkey_expansion_256a
4320	movups	%xmm0,(%rax)
4321	movl	%esi,16(%rax)
4322	xorq	%rax,%rax
4323	jmp	.Lenc_key_ret
4324
4325.align	16
4326.L14rounds_alt:
4327	movdqa	.Lkey_rotate(%rip),%xmm5
4328	movdqa	.Lkey_rcon1(%rip),%xmm4
4329	movl	$7,%r10d
4330	movdqu	%xmm0,0(%rdx)
4331	movdqa	%xmm2,%xmm1
4332	movdqu	%xmm2,16(%rdx)
4333	jmp	.Loop_key256
4334
4335.align	16
4336.Loop_key256:
4337.byte	102,15,56,0,213
4338.byte	102,15,56,221,212
4339
4340	movdqa	%xmm0,%xmm3
4341	pslldq	$4,%xmm0
4342	pxor	%xmm0,%xmm3
4343	pslldq	$4,%xmm0
4344	pxor	%xmm0,%xmm3
4345	pslldq	$4,%xmm0
4346	pxor	%xmm3,%xmm0
4347	pslld	$1,%xmm4
4348
4349	pxor	%xmm2,%xmm0
4350	movdqu	%xmm0,(%rax)
4351
4352	decl	%r10d
4353	jz	.Ldone_key256
4354
4355	pshufd	$0xff,%xmm0,%xmm2
4356	pxor	%xmm3,%xmm3
4357.byte	102,15,56,221,211
4358
4359	movdqa	%xmm1,%xmm3
4360	pslldq	$4,%xmm1
4361	pxor	%xmm1,%xmm3
4362	pslldq	$4,%xmm1
4363	pxor	%xmm1,%xmm3
4364	pslldq	$4,%xmm1
4365	pxor	%xmm3,%xmm1
4366
4367	pxor	%xmm1,%xmm2
4368	movdqu	%xmm2,16(%rax)
4369	leaq	32(%rax),%rax
4370	movdqa	%xmm2,%xmm1
4371
4372	jmp	.Loop_key256
4373
4374.Ldone_key256:
4375	movl	%esi,16(%rax)
4376	xorl	%eax,%eax
4377	jmp	.Lenc_key_ret
4378
4379.align	16
4380.Lbad_keybits:
4381	movq	$-2,%rax
4382.Lenc_key_ret:
4383	pxor	%xmm0,%xmm0
4384	pxor	%xmm1,%xmm1
4385	pxor	%xmm2,%xmm2
4386	pxor	%xmm3,%xmm3
4387	pxor	%xmm4,%xmm4
4388	pxor	%xmm5,%xmm5
4389	addq	$8,%rsp
4390.cfi_adjust_cfa_offset	-8
4391	.byte	0xf3,0xc3
4392.LSEH_end_set_encrypt_key:
4393
4394.align	16
4395.Lkey_expansion_128:
4396	movups	%xmm0,(%rax)
4397	leaq	16(%rax),%rax
4398.Lkey_expansion_128_cold:
4399	shufps	$16,%xmm0,%xmm4
4400	xorps	%xmm4,%xmm0
4401	shufps	$140,%xmm0,%xmm4
4402	xorps	%xmm4,%xmm0
4403	shufps	$255,%xmm1,%xmm1
4404	xorps	%xmm1,%xmm0
4405	.byte	0xf3,0xc3
4406
4407.align	16
4408.Lkey_expansion_192a:
4409	movups	%xmm0,(%rax)
4410	leaq	16(%rax),%rax
4411.Lkey_expansion_192a_cold:
4412	movaps	%xmm2,%xmm5
4413.Lkey_expansion_192b_warm:
4414	shufps	$16,%xmm0,%xmm4
4415	movdqa	%xmm2,%xmm3
4416	xorps	%xmm4,%xmm0
4417	shufps	$140,%xmm0,%xmm4
4418	pslldq	$4,%xmm3
4419	xorps	%xmm4,%xmm0
4420	pshufd	$85,%xmm1,%xmm1
4421	pxor	%xmm3,%xmm2
4422	pxor	%xmm1,%xmm0
4423	pshufd	$255,%xmm0,%xmm3
4424	pxor	%xmm3,%xmm2
4425	.byte	0xf3,0xc3
4426
4427.align	16
4428.Lkey_expansion_192b:
4429	movaps	%xmm0,%xmm3
4430	shufps	$68,%xmm0,%xmm5
4431	movups	%xmm5,(%rax)
4432	shufps	$78,%xmm2,%xmm3
4433	movups	%xmm3,16(%rax)
4434	leaq	32(%rax),%rax
4435	jmp	.Lkey_expansion_192b_warm
4436
4437.align	16
4438.Lkey_expansion_256a:
4439	movups	%xmm2,(%rax)
4440	leaq	16(%rax),%rax
4441.Lkey_expansion_256a_cold:
4442	shufps	$16,%xmm0,%xmm4
4443	xorps	%xmm4,%xmm0
4444	shufps	$140,%xmm0,%xmm4
4445	xorps	%xmm4,%xmm0
4446	shufps	$255,%xmm1,%xmm1
4447	xorps	%xmm1,%xmm0
4448	.byte	0xf3,0xc3
4449
4450.align	16
4451.Lkey_expansion_256b:
4452	movups	%xmm0,(%rax)
4453	leaq	16(%rax),%rax
4454
4455	shufps	$16,%xmm2,%xmm4
4456	xorps	%xmm4,%xmm2
4457	shufps	$140,%xmm2,%xmm4
4458	xorps	%xmm4,%xmm2
4459	shufps	$170,%xmm1,%xmm1
4460	xorps	%xmm1,%xmm2
4461	.byte	0xf3,0xc3
4462.cfi_endproc
4463.size	aesni_set_encrypt_key,.-aesni_set_encrypt_key
4464.size	__aesni_set_encrypt_key,.-__aesni_set_encrypt_key
4465.align	64
4466.Lbswap_mask:
4467.byte	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
4468.Lincrement32:
4469.long	6,6,6,0
4470.Lincrement64:
4471.long	1,0,0,0
4472.Lxts_magic:
4473.long	0x87,0,1,0
4474.Lincrement1:
4475.byte	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
4476.Lkey_rotate:
4477.long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d
4478.Lkey_rotate192:
4479.long	0x04070605,0x04070605,0x04070605,0x04070605
4480.Lkey_rcon1:
4481.long	1,1,1,1
4482.Lkey_rcon1b:
4483.long	0x1b,0x1b,0x1b,0x1b
4484
4485.byte	65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
4486.align	64
4487	.section ".note.gnu.property", "a"
4488	.p2align 3
4489	.long 1f - 0f
4490	.long 4f - 1f
4491	.long 5
44920:
4493	# "GNU" encoded with .byte, since .asciz isn't supported
4494	# on Solaris.
4495	.byte 0x47
4496	.byte 0x4e
4497	.byte 0x55
4498	.byte 0
44991:
4500	.p2align 3
4501	.long 0xc0000002
4502	.long 3f - 2f
45032:
4504	.long 3
45053:
4506	.p2align 3
45074:
4508