xref: /freebsd/sys/crypto/openssl/amd64/e_padlock-x86_64.S (revision ec0ea6efa1ad229d75c394c1a9b9cac33af2b1d3)
1/* $FreeBSD$ */
2/* Do not modify. This file is auto-generated from e_padlock-x86_64.pl. */
3.text
4.globl	padlock_capability
5.type	padlock_capability,@function
6.align	16
7padlock_capability:
8	movq	%rbx,%r8
9	xorl	%eax,%eax
10	cpuid
11	xorl	%eax,%eax
12	cmpl	$0x746e6543,%ebx
13	jne	.Lzhaoxin
14	cmpl	$0x48727561,%edx
15	jne	.Lnoluck
16	cmpl	$0x736c7561,%ecx
17	jne	.Lnoluck
18	jmp	.LzhaoxinEnd
19.Lzhaoxin:
20	cmpl	$0x68532020,%ebx
21	jne	.Lnoluck
22	cmpl	$0x68676e61,%edx
23	jne	.Lnoluck
24	cmpl	$0x20206961,%ecx
25	jne	.Lnoluck
26.LzhaoxinEnd:
27	movl	$0xC0000000,%eax
28	cpuid
29	movl	%eax,%edx
30	xorl	%eax,%eax
31	cmpl	$0xC0000001,%edx
32	jb	.Lnoluck
33	movl	$0xC0000001,%eax
34	cpuid
35	movl	%edx,%eax
36	andl	$0xffffffef,%eax
37	orl	$0x10,%eax
38.Lnoluck:
39	movq	%r8,%rbx
40	.byte	0xf3,0xc3
41.size	padlock_capability,.-padlock_capability
42
43.globl	padlock_key_bswap
44.type	padlock_key_bswap,@function
45.align	16
46padlock_key_bswap:
47	movl	240(%rdi),%edx
48.Lbswap_loop:
49	movl	(%rdi),%eax
50	bswapl	%eax
51	movl	%eax,(%rdi)
52	leaq	4(%rdi),%rdi
53	subl	$1,%edx
54	jnz	.Lbswap_loop
55	.byte	0xf3,0xc3
56.size	padlock_key_bswap,.-padlock_key_bswap
57
58.globl	padlock_verify_context
59.type	padlock_verify_context,@function
60.align	16
61padlock_verify_context:
62	movq	%rdi,%rdx
63	pushf
64	leaq	.Lpadlock_saved_context(%rip),%rax
65	call	_padlock_verify_ctx
66	leaq	8(%rsp),%rsp
67	.byte	0xf3,0xc3
68.size	padlock_verify_context,.-padlock_verify_context
69
70.type	_padlock_verify_ctx,@function
71.align	16
72_padlock_verify_ctx:
73	movq	8(%rsp),%r8
74	btq	$30,%r8
75	jnc	.Lverified
76	cmpq	(%rax),%rdx
77	je	.Lverified
78	pushf
79	popf
80.Lverified:
81	movq	%rdx,(%rax)
82	.byte	0xf3,0xc3
83.size	_padlock_verify_ctx,.-_padlock_verify_ctx
84
85.globl	padlock_reload_key
86.type	padlock_reload_key,@function
87.align	16
88padlock_reload_key:
89	pushf
90	popf
91	.byte	0xf3,0xc3
92.size	padlock_reload_key,.-padlock_reload_key
93
94.globl	padlock_aes_block
95.type	padlock_aes_block,@function
96.align	16
97padlock_aes_block:
98	movq	%rbx,%r8
99	movq	$1,%rcx
100	leaq	32(%rdx),%rbx
101	leaq	16(%rdx),%rdx
102.byte	0xf3,0x0f,0xa7,0xc8
103	movq	%r8,%rbx
104	.byte	0xf3,0xc3
105.size	padlock_aes_block,.-padlock_aes_block
106
107.globl	padlock_xstore
108.type	padlock_xstore,@function
109.align	16
110padlock_xstore:
111	movl	%esi,%edx
112.byte	0x0f,0xa7,0xc0
113	.byte	0xf3,0xc3
114.size	padlock_xstore,.-padlock_xstore
115
116.globl	padlock_sha1_oneshot
117.type	padlock_sha1_oneshot,@function
118.align	16
119padlock_sha1_oneshot:
120	movq	%rdx,%rcx
121	movq	%rdi,%rdx
122	movups	(%rdi),%xmm0
123	subq	$128+8,%rsp
124	movl	16(%rdi),%eax
125	movaps	%xmm0,(%rsp)
126	movq	%rsp,%rdi
127	movl	%eax,16(%rsp)
128	xorq	%rax,%rax
129.byte	0xf3,0x0f,0xa6,0xc8
130	movaps	(%rsp),%xmm0
131	movl	16(%rsp),%eax
132	addq	$128+8,%rsp
133	movups	%xmm0,(%rdx)
134	movl	%eax,16(%rdx)
135	.byte	0xf3,0xc3
136.size	padlock_sha1_oneshot,.-padlock_sha1_oneshot
137
138.globl	padlock_sha1_blocks
139.type	padlock_sha1_blocks,@function
140.align	16
141padlock_sha1_blocks:
142	movq	%rdx,%rcx
143	movq	%rdi,%rdx
144	movups	(%rdi),%xmm0
145	subq	$128+8,%rsp
146	movl	16(%rdi),%eax
147	movaps	%xmm0,(%rsp)
148	movq	%rsp,%rdi
149	movl	%eax,16(%rsp)
150	movq	$-1,%rax
151.byte	0xf3,0x0f,0xa6,0xc8
152	movaps	(%rsp),%xmm0
153	movl	16(%rsp),%eax
154	addq	$128+8,%rsp
155	movups	%xmm0,(%rdx)
156	movl	%eax,16(%rdx)
157	.byte	0xf3,0xc3
158.size	padlock_sha1_blocks,.-padlock_sha1_blocks
159
160.globl	padlock_sha256_oneshot
161.type	padlock_sha256_oneshot,@function
162.align	16
163padlock_sha256_oneshot:
164	movq	%rdx,%rcx
165	movq	%rdi,%rdx
166	movups	(%rdi),%xmm0
167	subq	$128+8,%rsp
168	movups	16(%rdi),%xmm1
169	movaps	%xmm0,(%rsp)
170	movq	%rsp,%rdi
171	movaps	%xmm1,16(%rsp)
172	xorq	%rax,%rax
173.byte	0xf3,0x0f,0xa6,0xd0
174	movaps	(%rsp),%xmm0
175	movaps	16(%rsp),%xmm1
176	addq	$128+8,%rsp
177	movups	%xmm0,(%rdx)
178	movups	%xmm1,16(%rdx)
179	.byte	0xf3,0xc3
180.size	padlock_sha256_oneshot,.-padlock_sha256_oneshot
181
182.globl	padlock_sha256_blocks
183.type	padlock_sha256_blocks,@function
184.align	16
185padlock_sha256_blocks:
186	movq	%rdx,%rcx
187	movq	%rdi,%rdx
188	movups	(%rdi),%xmm0
189	subq	$128+8,%rsp
190	movups	16(%rdi),%xmm1
191	movaps	%xmm0,(%rsp)
192	movq	%rsp,%rdi
193	movaps	%xmm1,16(%rsp)
194	movq	$-1,%rax
195.byte	0xf3,0x0f,0xa6,0xd0
196	movaps	(%rsp),%xmm0
197	movaps	16(%rsp),%xmm1
198	addq	$128+8,%rsp
199	movups	%xmm0,(%rdx)
200	movups	%xmm1,16(%rdx)
201	.byte	0xf3,0xc3
202.size	padlock_sha256_blocks,.-padlock_sha256_blocks
203
204.globl	padlock_sha512_blocks
205.type	padlock_sha512_blocks,@function
206.align	16
207padlock_sha512_blocks:
208	movq	%rdx,%rcx
209	movq	%rdi,%rdx
210	movups	(%rdi),%xmm0
211	subq	$128+8,%rsp
212	movups	16(%rdi),%xmm1
213	movups	32(%rdi),%xmm2
214	movups	48(%rdi),%xmm3
215	movaps	%xmm0,(%rsp)
216	movq	%rsp,%rdi
217	movaps	%xmm1,16(%rsp)
218	movaps	%xmm2,32(%rsp)
219	movaps	%xmm3,48(%rsp)
220.byte	0xf3,0x0f,0xa6,0xe0
221	movaps	(%rsp),%xmm0
222	movaps	16(%rsp),%xmm1
223	movaps	32(%rsp),%xmm2
224	movaps	48(%rsp),%xmm3
225	addq	$128+8,%rsp
226	movups	%xmm0,(%rdx)
227	movups	%xmm1,16(%rdx)
228	movups	%xmm2,32(%rdx)
229	movups	%xmm3,48(%rdx)
230	.byte	0xf3,0xc3
231.size	padlock_sha512_blocks,.-padlock_sha512_blocks
232.globl	padlock_ecb_encrypt
233.type	padlock_ecb_encrypt,@function
234.align	16
235padlock_ecb_encrypt:
236	pushq	%rbp
237	pushq	%rbx
238
239	xorl	%eax,%eax
240	testq	$15,%rdx
241	jnz	.Lecb_abort
242	testq	$15,%rcx
243	jnz	.Lecb_abort
244	leaq	.Lpadlock_saved_context(%rip),%rax
245	pushf
246	cld
247	call	_padlock_verify_ctx
248	leaq	16(%rdx),%rdx
249	xorl	%eax,%eax
250	xorl	%ebx,%ebx
251	testl	$32,(%rdx)
252	jnz	.Lecb_aligned
253	testq	$0x0f,%rdi
254	setz	%al
255	testq	$0x0f,%rsi
256	setz	%bl
257	testl	%ebx,%eax
258	jnz	.Lecb_aligned
259	negq	%rax
260	movq	$512,%rbx
261	notq	%rax
262	leaq	(%rsp),%rbp
263	cmpq	%rbx,%rcx
264	cmovcq	%rcx,%rbx
265	andq	%rbx,%rax
266	movq	%rcx,%rbx
267	negq	%rax
268	andq	$512-1,%rbx
269	leaq	(%rax,%rbp,1),%rsp
270	movq	$512,%rax
271	cmovzq	%rax,%rbx
272	cmpq	%rbx,%rcx
273	ja	.Lecb_loop
274	movq	%rsi,%rax
275	cmpq	%rsp,%rbp
276	cmoveq	%rdi,%rax
277	addq	%rcx,%rax
278	negq	%rax
279	andq	$0xfff,%rax
280	cmpq	$128,%rax
281	movq	$-128,%rax
282	cmovaeq	%rbx,%rax
283	andq	%rax,%rbx
284	jz	.Lecb_unaligned_tail
285	jmp	.Lecb_loop
286.align	16
287.Lecb_loop:
288	cmpq	%rcx,%rbx
289	cmovaq	%rcx,%rbx
290	movq	%rdi,%r8
291	movq	%rsi,%r9
292	movq	%rcx,%r10
293	movq	%rbx,%rcx
294	movq	%rbx,%r11
295	testq	$0x0f,%rdi
296	cmovnzq	%rsp,%rdi
297	testq	$0x0f,%rsi
298	jz	.Lecb_inp_aligned
299	shrq	$3,%rcx
300.byte	0xf3,0x48,0xa5
301	subq	%rbx,%rdi
302	movq	%rbx,%rcx
303	movq	%rdi,%rsi
304.Lecb_inp_aligned:
305	leaq	-16(%rdx),%rax
306	leaq	16(%rdx),%rbx
307	shrq	$4,%rcx
308.byte	0xf3,0x0f,0xa7,200
309	movq	%r8,%rdi
310	movq	%r11,%rbx
311	testq	$0x0f,%rdi
312	jz	.Lecb_out_aligned
313	movq	%rbx,%rcx
314	leaq	(%rsp),%rsi
315	shrq	$3,%rcx
316.byte	0xf3,0x48,0xa5
317	subq	%rbx,%rdi
318.Lecb_out_aligned:
319	movq	%r9,%rsi
320	movq	%r10,%rcx
321	addq	%rbx,%rdi
322	addq	%rbx,%rsi
323	subq	%rbx,%rcx
324	movq	$512,%rbx
325	jz	.Lecb_break
326	cmpq	%rbx,%rcx
327	jae	.Lecb_loop
328.Lecb_unaligned_tail:
329	xorl	%eax,%eax
330	cmpq	%rsp,%rbp
331	cmoveq	%rcx,%rax
332	movq	%rdi,%r8
333	movq	%rcx,%rbx
334	subq	%rax,%rsp
335	shrq	$3,%rcx
336	leaq	(%rsp),%rdi
337.byte	0xf3,0x48,0xa5
338	movq	%rsp,%rsi
339	movq	%r8,%rdi
340	movq	%rbx,%rcx
341	jmp	.Lecb_loop
342.align	16
343.Lecb_break:
344	cmpq	%rbp,%rsp
345	je	.Lecb_done
346
347	pxor	%xmm0,%xmm0
348	leaq	(%rsp),%rax
349.Lecb_bzero:
350	movaps	%xmm0,(%rax)
351	leaq	16(%rax),%rax
352	cmpq	%rax,%rbp
353	ja	.Lecb_bzero
354
355.Lecb_done:
356	leaq	(%rbp),%rsp
357	jmp	.Lecb_exit
358
359.align	16
360.Lecb_aligned:
361	leaq	(%rsi,%rcx,1),%rbp
362	negq	%rbp
363	andq	$0xfff,%rbp
364	xorl	%eax,%eax
365	cmpq	$128,%rbp
366	movq	$128-1,%rbp
367	cmovaeq	%rax,%rbp
368	andq	%rcx,%rbp
369	subq	%rbp,%rcx
370	jz	.Lecb_aligned_tail
371	leaq	-16(%rdx),%rax
372	leaq	16(%rdx),%rbx
373	shrq	$4,%rcx
374.byte	0xf3,0x0f,0xa7,200
375	testq	%rbp,%rbp
376	jz	.Lecb_exit
377
378.Lecb_aligned_tail:
379	movq	%rdi,%r8
380	movq	%rbp,%rbx
381	movq	%rbp,%rcx
382	leaq	(%rsp),%rbp
383	subq	%rcx,%rsp
384	shrq	$3,%rcx
385	leaq	(%rsp),%rdi
386.byte	0xf3,0x48,0xa5
387	leaq	(%r8),%rdi
388	leaq	(%rsp),%rsi
389	movq	%rbx,%rcx
390	jmp	.Lecb_loop
391.Lecb_exit:
392	movl	$1,%eax
393	leaq	8(%rsp),%rsp
394.Lecb_abort:
395	popq	%rbx
396	popq	%rbp
397	.byte	0xf3,0xc3
398.size	padlock_ecb_encrypt,.-padlock_ecb_encrypt
399.globl	padlock_cbc_encrypt
400.type	padlock_cbc_encrypt,@function
401.align	16
402padlock_cbc_encrypt:
403	pushq	%rbp
404	pushq	%rbx
405
406	xorl	%eax,%eax
407	testq	$15,%rdx
408	jnz	.Lcbc_abort
409	testq	$15,%rcx
410	jnz	.Lcbc_abort
411	leaq	.Lpadlock_saved_context(%rip),%rax
412	pushf
413	cld
414	call	_padlock_verify_ctx
415	leaq	16(%rdx),%rdx
416	xorl	%eax,%eax
417	xorl	%ebx,%ebx
418	testl	$32,(%rdx)
419	jnz	.Lcbc_aligned
420	testq	$0x0f,%rdi
421	setz	%al
422	testq	$0x0f,%rsi
423	setz	%bl
424	testl	%ebx,%eax
425	jnz	.Lcbc_aligned
426	negq	%rax
427	movq	$512,%rbx
428	notq	%rax
429	leaq	(%rsp),%rbp
430	cmpq	%rbx,%rcx
431	cmovcq	%rcx,%rbx
432	andq	%rbx,%rax
433	movq	%rcx,%rbx
434	negq	%rax
435	andq	$512-1,%rbx
436	leaq	(%rax,%rbp,1),%rsp
437	movq	$512,%rax
438	cmovzq	%rax,%rbx
439	cmpq	%rbx,%rcx
440	ja	.Lcbc_loop
441	movq	%rsi,%rax
442	cmpq	%rsp,%rbp
443	cmoveq	%rdi,%rax
444	addq	%rcx,%rax
445	negq	%rax
446	andq	$0xfff,%rax
447	cmpq	$64,%rax
448	movq	$-64,%rax
449	cmovaeq	%rbx,%rax
450	andq	%rax,%rbx
451	jz	.Lcbc_unaligned_tail
452	jmp	.Lcbc_loop
453.align	16
454.Lcbc_loop:
455	cmpq	%rcx,%rbx
456	cmovaq	%rcx,%rbx
457	movq	%rdi,%r8
458	movq	%rsi,%r9
459	movq	%rcx,%r10
460	movq	%rbx,%rcx
461	movq	%rbx,%r11
462	testq	$0x0f,%rdi
463	cmovnzq	%rsp,%rdi
464	testq	$0x0f,%rsi
465	jz	.Lcbc_inp_aligned
466	shrq	$3,%rcx
467.byte	0xf3,0x48,0xa5
468	subq	%rbx,%rdi
469	movq	%rbx,%rcx
470	movq	%rdi,%rsi
471.Lcbc_inp_aligned:
472	leaq	-16(%rdx),%rax
473	leaq	16(%rdx),%rbx
474	shrq	$4,%rcx
475.byte	0xf3,0x0f,0xa7,208
476	movdqa	(%rax),%xmm0
477	movdqa	%xmm0,-16(%rdx)
478	movq	%r8,%rdi
479	movq	%r11,%rbx
480	testq	$0x0f,%rdi
481	jz	.Lcbc_out_aligned
482	movq	%rbx,%rcx
483	leaq	(%rsp),%rsi
484	shrq	$3,%rcx
485.byte	0xf3,0x48,0xa5
486	subq	%rbx,%rdi
487.Lcbc_out_aligned:
488	movq	%r9,%rsi
489	movq	%r10,%rcx
490	addq	%rbx,%rdi
491	addq	%rbx,%rsi
492	subq	%rbx,%rcx
493	movq	$512,%rbx
494	jz	.Lcbc_break
495	cmpq	%rbx,%rcx
496	jae	.Lcbc_loop
497.Lcbc_unaligned_tail:
498	xorl	%eax,%eax
499	cmpq	%rsp,%rbp
500	cmoveq	%rcx,%rax
501	movq	%rdi,%r8
502	movq	%rcx,%rbx
503	subq	%rax,%rsp
504	shrq	$3,%rcx
505	leaq	(%rsp),%rdi
506.byte	0xf3,0x48,0xa5
507	movq	%rsp,%rsi
508	movq	%r8,%rdi
509	movq	%rbx,%rcx
510	jmp	.Lcbc_loop
511.align	16
512.Lcbc_break:
513	cmpq	%rbp,%rsp
514	je	.Lcbc_done
515
516	pxor	%xmm0,%xmm0
517	leaq	(%rsp),%rax
518.Lcbc_bzero:
519	movaps	%xmm0,(%rax)
520	leaq	16(%rax),%rax
521	cmpq	%rax,%rbp
522	ja	.Lcbc_bzero
523
524.Lcbc_done:
525	leaq	(%rbp),%rsp
526	jmp	.Lcbc_exit
527
528.align	16
529.Lcbc_aligned:
530	leaq	(%rsi,%rcx,1),%rbp
531	negq	%rbp
532	andq	$0xfff,%rbp
533	xorl	%eax,%eax
534	cmpq	$64,%rbp
535	movq	$64-1,%rbp
536	cmovaeq	%rax,%rbp
537	andq	%rcx,%rbp
538	subq	%rbp,%rcx
539	jz	.Lcbc_aligned_tail
540	leaq	-16(%rdx),%rax
541	leaq	16(%rdx),%rbx
542	shrq	$4,%rcx
543.byte	0xf3,0x0f,0xa7,208
544	movdqa	(%rax),%xmm0
545	movdqa	%xmm0,-16(%rdx)
546	testq	%rbp,%rbp
547	jz	.Lcbc_exit
548
549.Lcbc_aligned_tail:
550	movq	%rdi,%r8
551	movq	%rbp,%rbx
552	movq	%rbp,%rcx
553	leaq	(%rsp),%rbp
554	subq	%rcx,%rsp
555	shrq	$3,%rcx
556	leaq	(%rsp),%rdi
557.byte	0xf3,0x48,0xa5
558	leaq	(%r8),%rdi
559	leaq	(%rsp),%rsi
560	movq	%rbx,%rcx
561	jmp	.Lcbc_loop
562.Lcbc_exit:
563	movl	$1,%eax
564	leaq	8(%rsp),%rsp
565.Lcbc_abort:
566	popq	%rbx
567	popq	%rbp
568	.byte	0xf3,0xc3
569.size	padlock_cbc_encrypt,.-padlock_cbc_encrypt
570.globl	padlock_cfb_encrypt
571.type	padlock_cfb_encrypt,@function
572.align	16
573padlock_cfb_encrypt:
574	pushq	%rbp
575	pushq	%rbx
576
577	xorl	%eax,%eax
578	testq	$15,%rdx
579	jnz	.Lcfb_abort
580	testq	$15,%rcx
581	jnz	.Lcfb_abort
582	leaq	.Lpadlock_saved_context(%rip),%rax
583	pushf
584	cld
585	call	_padlock_verify_ctx
586	leaq	16(%rdx),%rdx
587	xorl	%eax,%eax
588	xorl	%ebx,%ebx
589	testl	$32,(%rdx)
590	jnz	.Lcfb_aligned
591	testq	$0x0f,%rdi
592	setz	%al
593	testq	$0x0f,%rsi
594	setz	%bl
595	testl	%ebx,%eax
596	jnz	.Lcfb_aligned
597	negq	%rax
598	movq	$512,%rbx
599	notq	%rax
600	leaq	(%rsp),%rbp
601	cmpq	%rbx,%rcx
602	cmovcq	%rcx,%rbx
603	andq	%rbx,%rax
604	movq	%rcx,%rbx
605	negq	%rax
606	andq	$512-1,%rbx
607	leaq	(%rax,%rbp,1),%rsp
608	movq	$512,%rax
609	cmovzq	%rax,%rbx
610	jmp	.Lcfb_loop
611.align	16
612.Lcfb_loop:
613	cmpq	%rcx,%rbx
614	cmovaq	%rcx,%rbx
615	movq	%rdi,%r8
616	movq	%rsi,%r9
617	movq	%rcx,%r10
618	movq	%rbx,%rcx
619	movq	%rbx,%r11
620	testq	$0x0f,%rdi
621	cmovnzq	%rsp,%rdi
622	testq	$0x0f,%rsi
623	jz	.Lcfb_inp_aligned
624	shrq	$3,%rcx
625.byte	0xf3,0x48,0xa5
626	subq	%rbx,%rdi
627	movq	%rbx,%rcx
628	movq	%rdi,%rsi
629.Lcfb_inp_aligned:
630	leaq	-16(%rdx),%rax
631	leaq	16(%rdx),%rbx
632	shrq	$4,%rcx
633.byte	0xf3,0x0f,0xa7,224
634	movdqa	(%rax),%xmm0
635	movdqa	%xmm0,-16(%rdx)
636	movq	%r8,%rdi
637	movq	%r11,%rbx
638	testq	$0x0f,%rdi
639	jz	.Lcfb_out_aligned
640	movq	%rbx,%rcx
641	leaq	(%rsp),%rsi
642	shrq	$3,%rcx
643.byte	0xf3,0x48,0xa5
644	subq	%rbx,%rdi
645.Lcfb_out_aligned:
646	movq	%r9,%rsi
647	movq	%r10,%rcx
648	addq	%rbx,%rdi
649	addq	%rbx,%rsi
650	subq	%rbx,%rcx
651	movq	$512,%rbx
652	jnz	.Lcfb_loop
653	cmpq	%rbp,%rsp
654	je	.Lcfb_done
655
656	pxor	%xmm0,%xmm0
657	leaq	(%rsp),%rax
658.Lcfb_bzero:
659	movaps	%xmm0,(%rax)
660	leaq	16(%rax),%rax
661	cmpq	%rax,%rbp
662	ja	.Lcfb_bzero
663
664.Lcfb_done:
665	leaq	(%rbp),%rsp
666	jmp	.Lcfb_exit
667
668.align	16
669.Lcfb_aligned:
670	leaq	-16(%rdx),%rax
671	leaq	16(%rdx),%rbx
672	shrq	$4,%rcx
673.byte	0xf3,0x0f,0xa7,224
674	movdqa	(%rax),%xmm0
675	movdqa	%xmm0,-16(%rdx)
676.Lcfb_exit:
677	movl	$1,%eax
678	leaq	8(%rsp),%rsp
679.Lcfb_abort:
680	popq	%rbx
681	popq	%rbp
682	.byte	0xf3,0xc3
683.size	padlock_cfb_encrypt,.-padlock_cfb_encrypt
684.globl	padlock_ofb_encrypt
685.type	padlock_ofb_encrypt,@function
686.align	16
687padlock_ofb_encrypt:
688	pushq	%rbp
689	pushq	%rbx
690
691	xorl	%eax,%eax
692	testq	$15,%rdx
693	jnz	.Lofb_abort
694	testq	$15,%rcx
695	jnz	.Lofb_abort
696	leaq	.Lpadlock_saved_context(%rip),%rax
697	pushf
698	cld
699	call	_padlock_verify_ctx
700	leaq	16(%rdx),%rdx
701	xorl	%eax,%eax
702	xorl	%ebx,%ebx
703	testl	$32,(%rdx)
704	jnz	.Lofb_aligned
705	testq	$0x0f,%rdi
706	setz	%al
707	testq	$0x0f,%rsi
708	setz	%bl
709	testl	%ebx,%eax
710	jnz	.Lofb_aligned
711	negq	%rax
712	movq	$512,%rbx
713	notq	%rax
714	leaq	(%rsp),%rbp
715	cmpq	%rbx,%rcx
716	cmovcq	%rcx,%rbx
717	andq	%rbx,%rax
718	movq	%rcx,%rbx
719	negq	%rax
720	andq	$512-1,%rbx
721	leaq	(%rax,%rbp,1),%rsp
722	movq	$512,%rax
723	cmovzq	%rax,%rbx
724	jmp	.Lofb_loop
725.align	16
726.Lofb_loop:
727	cmpq	%rcx,%rbx
728	cmovaq	%rcx,%rbx
729	movq	%rdi,%r8
730	movq	%rsi,%r9
731	movq	%rcx,%r10
732	movq	%rbx,%rcx
733	movq	%rbx,%r11
734	testq	$0x0f,%rdi
735	cmovnzq	%rsp,%rdi
736	testq	$0x0f,%rsi
737	jz	.Lofb_inp_aligned
738	shrq	$3,%rcx
739.byte	0xf3,0x48,0xa5
740	subq	%rbx,%rdi
741	movq	%rbx,%rcx
742	movq	%rdi,%rsi
743.Lofb_inp_aligned:
744	leaq	-16(%rdx),%rax
745	leaq	16(%rdx),%rbx
746	shrq	$4,%rcx
747.byte	0xf3,0x0f,0xa7,232
748	movdqa	(%rax),%xmm0
749	movdqa	%xmm0,-16(%rdx)
750	movq	%r8,%rdi
751	movq	%r11,%rbx
752	testq	$0x0f,%rdi
753	jz	.Lofb_out_aligned
754	movq	%rbx,%rcx
755	leaq	(%rsp),%rsi
756	shrq	$3,%rcx
757.byte	0xf3,0x48,0xa5
758	subq	%rbx,%rdi
759.Lofb_out_aligned:
760	movq	%r9,%rsi
761	movq	%r10,%rcx
762	addq	%rbx,%rdi
763	addq	%rbx,%rsi
764	subq	%rbx,%rcx
765	movq	$512,%rbx
766	jnz	.Lofb_loop
767	cmpq	%rbp,%rsp
768	je	.Lofb_done
769
770	pxor	%xmm0,%xmm0
771	leaq	(%rsp),%rax
772.Lofb_bzero:
773	movaps	%xmm0,(%rax)
774	leaq	16(%rax),%rax
775	cmpq	%rax,%rbp
776	ja	.Lofb_bzero
777
778.Lofb_done:
779	leaq	(%rbp),%rsp
780	jmp	.Lofb_exit
781
782.align	16
783.Lofb_aligned:
784	leaq	-16(%rdx),%rax
785	leaq	16(%rdx),%rbx
786	shrq	$4,%rcx
787.byte	0xf3,0x0f,0xa7,232
788	movdqa	(%rax),%xmm0
789	movdqa	%xmm0,-16(%rdx)
790.Lofb_exit:
791	movl	$1,%eax
792	leaq	8(%rsp),%rsp
793.Lofb_abort:
794	popq	%rbx
795	popq	%rbp
796	.byte	0xf3,0xc3
797.size	padlock_ofb_encrypt,.-padlock_ofb_encrypt
798.globl	padlock_ctr32_encrypt
799.type	padlock_ctr32_encrypt,@function
800.align	16
801padlock_ctr32_encrypt:
802	pushq	%rbp
803	pushq	%rbx
804
805	xorl	%eax,%eax
806	testq	$15,%rdx
807	jnz	.Lctr32_abort
808	testq	$15,%rcx
809	jnz	.Lctr32_abort
810	leaq	.Lpadlock_saved_context(%rip),%rax
811	pushf
812	cld
813	call	_padlock_verify_ctx
814	leaq	16(%rdx),%rdx
815	xorl	%eax,%eax
816	xorl	%ebx,%ebx
817	testl	$32,(%rdx)
818	jnz	.Lctr32_aligned
819	testq	$0x0f,%rdi
820	setz	%al
821	testq	$0x0f,%rsi
822	setz	%bl
823	testl	%ebx,%eax
824	jnz	.Lctr32_aligned
825	negq	%rax
826	movq	$512,%rbx
827	notq	%rax
828	leaq	(%rsp),%rbp
829	cmpq	%rbx,%rcx
830	cmovcq	%rcx,%rbx
831	andq	%rbx,%rax
832	movq	%rcx,%rbx
833	negq	%rax
834	andq	$512-1,%rbx
835	leaq	(%rax,%rbp,1),%rsp
836	movq	$512,%rax
837	cmovzq	%rax,%rbx
838.Lctr32_reenter:
839	movl	-4(%rdx),%eax
840	bswapl	%eax
841	negl	%eax
842	andl	$31,%eax
843	movq	$512,%rbx
844	shll	$4,%eax
845	cmovzq	%rbx,%rax
846	cmpq	%rax,%rcx
847	cmovaq	%rax,%rbx
848	cmovbeq	%rcx,%rbx
849	cmpq	%rbx,%rcx
850	ja	.Lctr32_loop
851	movq	%rsi,%rax
852	cmpq	%rsp,%rbp
853	cmoveq	%rdi,%rax
854	addq	%rcx,%rax
855	negq	%rax
856	andq	$0xfff,%rax
857	cmpq	$32,%rax
858	movq	$-32,%rax
859	cmovaeq	%rbx,%rax
860	andq	%rax,%rbx
861	jz	.Lctr32_unaligned_tail
862	jmp	.Lctr32_loop
863.align	16
864.Lctr32_loop:
865	cmpq	%rcx,%rbx
866	cmovaq	%rcx,%rbx
867	movq	%rdi,%r8
868	movq	%rsi,%r9
869	movq	%rcx,%r10
870	movq	%rbx,%rcx
871	movq	%rbx,%r11
872	testq	$0x0f,%rdi
873	cmovnzq	%rsp,%rdi
874	testq	$0x0f,%rsi
875	jz	.Lctr32_inp_aligned
876	shrq	$3,%rcx
877.byte	0xf3,0x48,0xa5
878	subq	%rbx,%rdi
879	movq	%rbx,%rcx
880	movq	%rdi,%rsi
881.Lctr32_inp_aligned:
882	leaq	-16(%rdx),%rax
883	leaq	16(%rdx),%rbx
884	shrq	$4,%rcx
885.byte	0xf3,0x0f,0xa7,216
886	movl	-4(%rdx),%eax
887	testl	$0xffff0000,%eax
888	jnz	.Lctr32_no_carry
889	bswapl	%eax
890	addl	$0x10000,%eax
891	bswapl	%eax
892	movl	%eax,-4(%rdx)
893.Lctr32_no_carry:
894	movq	%r8,%rdi
895	movq	%r11,%rbx
896	testq	$0x0f,%rdi
897	jz	.Lctr32_out_aligned
898	movq	%rbx,%rcx
899	leaq	(%rsp),%rsi
900	shrq	$3,%rcx
901.byte	0xf3,0x48,0xa5
902	subq	%rbx,%rdi
903.Lctr32_out_aligned:
904	movq	%r9,%rsi
905	movq	%r10,%rcx
906	addq	%rbx,%rdi
907	addq	%rbx,%rsi
908	subq	%rbx,%rcx
909	movq	$512,%rbx
910	jz	.Lctr32_break
911	cmpq	%rbx,%rcx
912	jae	.Lctr32_loop
913	movq	%rcx,%rbx
914	movq	%rsi,%rax
915	cmpq	%rsp,%rbp
916	cmoveq	%rdi,%rax
917	addq	%rcx,%rax
918	negq	%rax
919	andq	$0xfff,%rax
920	cmpq	$32,%rax
921	movq	$-32,%rax
922	cmovaeq	%rbx,%rax
923	andq	%rax,%rbx
924	jnz	.Lctr32_loop
925.Lctr32_unaligned_tail:
926	xorl	%eax,%eax
927	cmpq	%rsp,%rbp
928	cmoveq	%rcx,%rax
929	movq	%rdi,%r8
930	movq	%rcx,%rbx
931	subq	%rax,%rsp
932	shrq	$3,%rcx
933	leaq	(%rsp),%rdi
934.byte	0xf3,0x48,0xa5
935	movq	%rsp,%rsi
936	movq	%r8,%rdi
937	movq	%rbx,%rcx
938	jmp	.Lctr32_loop
939.align	16
940.Lctr32_break:
941	cmpq	%rbp,%rsp
942	je	.Lctr32_done
943
944	pxor	%xmm0,%xmm0
945	leaq	(%rsp),%rax
946.Lctr32_bzero:
947	movaps	%xmm0,(%rax)
948	leaq	16(%rax),%rax
949	cmpq	%rax,%rbp
950	ja	.Lctr32_bzero
951
952.Lctr32_done:
953	leaq	(%rbp),%rsp
954	jmp	.Lctr32_exit
955
956.align	16
957.Lctr32_aligned:
958	movl	-4(%rdx),%eax
959	bswapl	%eax
960	negl	%eax
961	andl	$0xffff,%eax
962	movq	$1048576,%rbx
963	shll	$4,%eax
964	cmovzq	%rbx,%rax
965	cmpq	%rax,%rcx
966	cmovaq	%rax,%rbx
967	cmovbeq	%rcx,%rbx
968	jbe	.Lctr32_aligned_skip
969
970.Lctr32_aligned_loop:
971	movq	%rcx,%r10
972	movq	%rbx,%rcx
973	movq	%rbx,%r11
974
975	leaq	-16(%rdx),%rax
976	leaq	16(%rdx),%rbx
977	shrq	$4,%rcx
978.byte	0xf3,0x0f,0xa7,216
979
980	movl	-4(%rdx),%eax
981	bswapl	%eax
982	addl	$0x10000,%eax
983	bswapl	%eax
984	movl	%eax,-4(%rdx)
985
986	movq	%r10,%rcx
987	subq	%r11,%rcx
988	movq	$1048576,%rbx
989	jz	.Lctr32_exit
990	cmpq	%rbx,%rcx
991	jae	.Lctr32_aligned_loop
992
993.Lctr32_aligned_skip:
994	leaq	(%rsi,%rcx,1),%rbp
995	negq	%rbp
996	andq	$0xfff,%rbp
997	xorl	%eax,%eax
998	cmpq	$32,%rbp
999	movq	$32-1,%rbp
1000	cmovaeq	%rax,%rbp
1001	andq	%rcx,%rbp
1002	subq	%rbp,%rcx
1003	jz	.Lctr32_aligned_tail
1004	leaq	-16(%rdx),%rax
1005	leaq	16(%rdx),%rbx
1006	shrq	$4,%rcx
1007.byte	0xf3,0x0f,0xa7,216
1008	testq	%rbp,%rbp
1009	jz	.Lctr32_exit
1010
1011.Lctr32_aligned_tail:
1012	movq	%rdi,%r8
1013	movq	%rbp,%rbx
1014	movq	%rbp,%rcx
1015	leaq	(%rsp),%rbp
1016	subq	%rcx,%rsp
1017	shrq	$3,%rcx
1018	leaq	(%rsp),%rdi
1019.byte	0xf3,0x48,0xa5
1020	leaq	(%r8),%rdi
1021	leaq	(%rsp),%rsi
1022	movq	%rbx,%rcx
1023	jmp	.Lctr32_loop
1024.Lctr32_exit:
1025	movl	$1,%eax
1026	leaq	8(%rsp),%rsp
1027.Lctr32_abort:
1028	popq	%rbx
1029	popq	%rbp
1030	.byte	0xf3,0xc3
1031.size	padlock_ctr32_encrypt,.-padlock_ctr32_encrypt
1032.byte	86,73,65,32,80,97,100,108,111,99,107,32,120,56,54,95,54,52,32,109,111,100,117,108,101,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
1033.align	16
1034.data
1035.align	8
1036.Lpadlock_saved_context:
1037.quad	0
1038