xref: /freebsd/sys/crypto/openssl/amd64/e_padlock-x86_64.S (revision 2e620256bd76c449c835c604e404483437743011)
1/* Do not modify. This file is auto-generated from e_padlock-x86_64.pl. */
2.text
3.globl	padlock_capability
4.type	padlock_capability,@function
5.align	16
6padlock_capability:
7	movq	%rbx,%r8
8	xorl	%eax,%eax
9	cpuid
10	xorl	%eax,%eax
11	cmpl	$0x746e6543,%ebx
12	jne	.Lzhaoxin
13	cmpl	$0x48727561,%edx
14	jne	.Lnoluck
15	cmpl	$0x736c7561,%ecx
16	jne	.Lnoluck
17	jmp	.LzhaoxinEnd
18.Lzhaoxin:
19	cmpl	$0x68532020,%ebx
20	jne	.Lnoluck
21	cmpl	$0x68676e61,%edx
22	jne	.Lnoluck
23	cmpl	$0x20206961,%ecx
24	jne	.Lnoluck
25.LzhaoxinEnd:
26	movl	$0xC0000000,%eax
27	cpuid
28	movl	%eax,%edx
29	xorl	%eax,%eax
30	cmpl	$0xC0000001,%edx
31	jb	.Lnoluck
32	movl	$0xC0000001,%eax
33	cpuid
34	movl	%edx,%eax
35	andl	$0xffffffef,%eax
36	orl	$0x10,%eax
37.Lnoluck:
38	movq	%r8,%rbx
39	.byte	0xf3,0xc3
40.size	padlock_capability,.-padlock_capability
41
42.globl	padlock_key_bswap
43.type	padlock_key_bswap,@function
44.align	16
45padlock_key_bswap:
46	movl	240(%rdi),%edx
47	incl	%edx
48	shll	$2,%edx
49.Lbswap_loop:
50	movl	(%rdi),%eax
51	bswapl	%eax
52	movl	%eax,(%rdi)
53	leaq	4(%rdi),%rdi
54	subl	$1,%edx
55	jnz	.Lbswap_loop
56	.byte	0xf3,0xc3
57.size	padlock_key_bswap,.-padlock_key_bswap
58
59.globl	padlock_verify_context
60.type	padlock_verify_context,@function
61.align	16
62padlock_verify_context:
63	movq	%rdi,%rdx
64	pushf
65	leaq	.Lpadlock_saved_context(%rip),%rax
66	call	_padlock_verify_ctx
67	leaq	8(%rsp),%rsp
68	.byte	0xf3,0xc3
69.size	padlock_verify_context,.-padlock_verify_context
70
71.type	_padlock_verify_ctx,@function
72.align	16
73_padlock_verify_ctx:
74	movq	8(%rsp),%r8
75	btq	$30,%r8
76	jnc	.Lverified
77	cmpq	(%rax),%rdx
78	je	.Lverified
79	pushf
80	popf
81.Lverified:
82	movq	%rdx,(%rax)
83	.byte	0xf3,0xc3
84.size	_padlock_verify_ctx,.-_padlock_verify_ctx
85
86.globl	padlock_reload_key
87.type	padlock_reload_key,@function
88.align	16
89padlock_reload_key:
90	pushf
91	popf
92	.byte	0xf3,0xc3
93.size	padlock_reload_key,.-padlock_reload_key
94
95.globl	padlock_aes_block
96.type	padlock_aes_block,@function
97.align	16
98padlock_aes_block:
99	movq	%rbx,%r8
100	movq	$1,%rcx
101	leaq	32(%rdx),%rbx
102	leaq	16(%rdx),%rdx
103.byte	0xf3,0x0f,0xa7,0xc8
104	movq	%r8,%rbx
105	.byte	0xf3,0xc3
106.size	padlock_aes_block,.-padlock_aes_block
107
108.globl	padlock_xstore
109.type	padlock_xstore,@function
110.align	16
111padlock_xstore:
112	movl	%esi,%edx
113.byte	0x0f,0xa7,0xc0
114	.byte	0xf3,0xc3
115.size	padlock_xstore,.-padlock_xstore
116
117.globl	padlock_sha1_oneshot
118.type	padlock_sha1_oneshot,@function
119.align	16
120padlock_sha1_oneshot:
121	movq	%rdx,%rcx
122	movq	%rdi,%rdx
123	movups	(%rdi),%xmm0
124	subq	$128+8,%rsp
125	movl	16(%rdi),%eax
126	movaps	%xmm0,(%rsp)
127	movq	%rsp,%rdi
128	movl	%eax,16(%rsp)
129	xorq	%rax,%rax
130.byte	0xf3,0x0f,0xa6,0xc8
131	movaps	(%rsp),%xmm0
132	movl	16(%rsp),%eax
133	addq	$128+8,%rsp
134	movups	%xmm0,(%rdx)
135	movl	%eax,16(%rdx)
136	.byte	0xf3,0xc3
137.size	padlock_sha1_oneshot,.-padlock_sha1_oneshot
138
139.globl	padlock_sha1_blocks
140.type	padlock_sha1_blocks,@function
141.align	16
142padlock_sha1_blocks:
143	movq	%rdx,%rcx
144	movq	%rdi,%rdx
145	movups	(%rdi),%xmm0
146	subq	$128+8,%rsp
147	movl	16(%rdi),%eax
148	movaps	%xmm0,(%rsp)
149	movq	%rsp,%rdi
150	movl	%eax,16(%rsp)
151	movq	$-1,%rax
152.byte	0xf3,0x0f,0xa6,0xc8
153	movaps	(%rsp),%xmm0
154	movl	16(%rsp),%eax
155	addq	$128+8,%rsp
156	movups	%xmm0,(%rdx)
157	movl	%eax,16(%rdx)
158	.byte	0xf3,0xc3
159.size	padlock_sha1_blocks,.-padlock_sha1_blocks
160
161.globl	padlock_sha256_oneshot
162.type	padlock_sha256_oneshot,@function
163.align	16
164padlock_sha256_oneshot:
165	movq	%rdx,%rcx
166	movq	%rdi,%rdx
167	movups	(%rdi),%xmm0
168	subq	$128+8,%rsp
169	movups	16(%rdi),%xmm1
170	movaps	%xmm0,(%rsp)
171	movq	%rsp,%rdi
172	movaps	%xmm1,16(%rsp)
173	xorq	%rax,%rax
174.byte	0xf3,0x0f,0xa6,0xd0
175	movaps	(%rsp),%xmm0
176	movaps	16(%rsp),%xmm1
177	addq	$128+8,%rsp
178	movups	%xmm0,(%rdx)
179	movups	%xmm1,16(%rdx)
180	.byte	0xf3,0xc3
181.size	padlock_sha256_oneshot,.-padlock_sha256_oneshot
182
183.globl	padlock_sha256_blocks
184.type	padlock_sha256_blocks,@function
185.align	16
186padlock_sha256_blocks:
187	movq	%rdx,%rcx
188	movq	%rdi,%rdx
189	movups	(%rdi),%xmm0
190	subq	$128+8,%rsp
191	movups	16(%rdi),%xmm1
192	movaps	%xmm0,(%rsp)
193	movq	%rsp,%rdi
194	movaps	%xmm1,16(%rsp)
195	movq	$-1,%rax
196.byte	0xf3,0x0f,0xa6,0xd0
197	movaps	(%rsp),%xmm0
198	movaps	16(%rsp),%xmm1
199	addq	$128+8,%rsp
200	movups	%xmm0,(%rdx)
201	movups	%xmm1,16(%rdx)
202	.byte	0xf3,0xc3
203.size	padlock_sha256_blocks,.-padlock_sha256_blocks
204
205.globl	padlock_sha512_blocks
206.type	padlock_sha512_blocks,@function
207.align	16
208padlock_sha512_blocks:
209	movq	%rdx,%rcx
210	movq	%rdi,%rdx
211	movups	(%rdi),%xmm0
212	subq	$128+8,%rsp
213	movups	16(%rdi),%xmm1
214	movups	32(%rdi),%xmm2
215	movups	48(%rdi),%xmm3
216	movaps	%xmm0,(%rsp)
217	movq	%rsp,%rdi
218	movaps	%xmm1,16(%rsp)
219	movaps	%xmm2,32(%rsp)
220	movaps	%xmm3,48(%rsp)
221.byte	0xf3,0x0f,0xa6,0xe0
222	movaps	(%rsp),%xmm0
223	movaps	16(%rsp),%xmm1
224	movaps	32(%rsp),%xmm2
225	movaps	48(%rsp),%xmm3
226	addq	$128+8,%rsp
227	movups	%xmm0,(%rdx)
228	movups	%xmm1,16(%rdx)
229	movups	%xmm2,32(%rdx)
230	movups	%xmm3,48(%rdx)
231	.byte	0xf3,0xc3
232.size	padlock_sha512_blocks,.-padlock_sha512_blocks
233.globl	padlock_ecb_encrypt
234.type	padlock_ecb_encrypt,@function
235.align	16
236padlock_ecb_encrypt:
237	pushq	%rbp
238	pushq	%rbx
239
240	xorl	%eax,%eax
241	testq	$15,%rdx
242	jnz	.Lecb_abort
243	testq	$15,%rcx
244	jnz	.Lecb_abort
245	leaq	.Lpadlock_saved_context(%rip),%rax
246	pushf
247	cld
248	call	_padlock_verify_ctx
249	leaq	16(%rdx),%rdx
250	xorl	%eax,%eax
251	xorl	%ebx,%ebx
252	testl	$32,(%rdx)
253	jnz	.Lecb_aligned
254	testq	$0x0f,%rdi
255	setz	%al
256	testq	$0x0f,%rsi
257	setz	%bl
258	testl	%ebx,%eax
259	jnz	.Lecb_aligned
260	negq	%rax
261	movq	$512,%rbx
262	notq	%rax
263	leaq	(%rsp),%rbp
264	cmpq	%rbx,%rcx
265	cmovcq	%rcx,%rbx
266	andq	%rbx,%rax
267	movq	%rcx,%rbx
268	negq	%rax
269	andq	$512-1,%rbx
270	leaq	(%rax,%rbp,1),%rsp
271	movq	$512,%rax
272	cmovzq	%rax,%rbx
273	cmpq	%rbx,%rcx
274	ja	.Lecb_loop
275	movq	%rsi,%rax
276	cmpq	%rsp,%rbp
277	cmoveq	%rdi,%rax
278	addq	%rcx,%rax
279	negq	%rax
280	andq	$0xfff,%rax
281	cmpq	$128,%rax
282	movq	$-128,%rax
283	cmovaeq	%rbx,%rax
284	andq	%rax,%rbx
285	jz	.Lecb_unaligned_tail
286	jmp	.Lecb_loop
287.align	16
288.Lecb_loop:
289	cmpq	%rcx,%rbx
290	cmovaq	%rcx,%rbx
291	movq	%rdi,%r8
292	movq	%rsi,%r9
293	movq	%rcx,%r10
294	movq	%rbx,%rcx
295	movq	%rbx,%r11
296	testq	$0x0f,%rdi
297	cmovnzq	%rsp,%rdi
298	testq	$0x0f,%rsi
299	jz	.Lecb_inp_aligned
300	shrq	$3,%rcx
301.byte	0xf3,0x48,0xa5
302	subq	%rbx,%rdi
303	movq	%rbx,%rcx
304	movq	%rdi,%rsi
305.Lecb_inp_aligned:
306	leaq	-16(%rdx),%rax
307	leaq	16(%rdx),%rbx
308	shrq	$4,%rcx
309.byte	0xf3,0x0f,0xa7,200
310	movq	%r8,%rdi
311	movq	%r11,%rbx
312	testq	$0x0f,%rdi
313	jz	.Lecb_out_aligned
314	movq	%rbx,%rcx
315	leaq	(%rsp),%rsi
316	shrq	$3,%rcx
317.byte	0xf3,0x48,0xa5
318	subq	%rbx,%rdi
319.Lecb_out_aligned:
320	movq	%r9,%rsi
321	movq	%r10,%rcx
322	addq	%rbx,%rdi
323	addq	%rbx,%rsi
324	subq	%rbx,%rcx
325	movq	$512,%rbx
326	jz	.Lecb_break
327	cmpq	%rbx,%rcx
328	jae	.Lecb_loop
329.Lecb_unaligned_tail:
330	xorl	%eax,%eax
331	cmpq	%rsp,%rbp
332	cmoveq	%rcx,%rax
333	movq	%rdi,%r8
334	movq	%rcx,%rbx
335	subq	%rax,%rsp
336	shrq	$3,%rcx
337	leaq	(%rsp),%rdi
338.byte	0xf3,0x48,0xa5
339	movq	%rsp,%rsi
340	movq	%r8,%rdi
341	movq	%rbx,%rcx
342	jmp	.Lecb_loop
343.align	16
344.Lecb_break:
345	cmpq	%rbp,%rsp
346	je	.Lecb_done
347
348	pxor	%xmm0,%xmm0
349	leaq	(%rsp),%rax
350.Lecb_bzero:
351	movaps	%xmm0,(%rax)
352	leaq	16(%rax),%rax
353	cmpq	%rax,%rbp
354	ja	.Lecb_bzero
355
356.Lecb_done:
357	leaq	(%rbp),%rsp
358	jmp	.Lecb_exit
359
360.align	16
361.Lecb_aligned:
362	leaq	(%rsi,%rcx,1),%rbp
363	negq	%rbp
364	andq	$0xfff,%rbp
365	xorl	%eax,%eax
366	cmpq	$128,%rbp
367	movq	$128-1,%rbp
368	cmovaeq	%rax,%rbp
369	andq	%rcx,%rbp
370	subq	%rbp,%rcx
371	jz	.Lecb_aligned_tail
372	leaq	-16(%rdx),%rax
373	leaq	16(%rdx),%rbx
374	shrq	$4,%rcx
375.byte	0xf3,0x0f,0xa7,200
376	testq	%rbp,%rbp
377	jz	.Lecb_exit
378
379.Lecb_aligned_tail:
380	movq	%rdi,%r8
381	movq	%rbp,%rbx
382	movq	%rbp,%rcx
383	leaq	(%rsp),%rbp
384	subq	%rcx,%rsp
385	shrq	$3,%rcx
386	leaq	(%rsp),%rdi
387.byte	0xf3,0x48,0xa5
388	leaq	(%r8),%rdi
389	leaq	(%rsp),%rsi
390	movq	%rbx,%rcx
391	jmp	.Lecb_loop
392.Lecb_exit:
393	movl	$1,%eax
394	leaq	8(%rsp),%rsp
395.Lecb_abort:
396	popq	%rbx
397	popq	%rbp
398	.byte	0xf3,0xc3
399.size	padlock_ecb_encrypt,.-padlock_ecb_encrypt
400.globl	padlock_cbc_encrypt
401.type	padlock_cbc_encrypt,@function
402.align	16
403padlock_cbc_encrypt:
404	pushq	%rbp
405	pushq	%rbx
406
407	xorl	%eax,%eax
408	testq	$15,%rdx
409	jnz	.Lcbc_abort
410	testq	$15,%rcx
411	jnz	.Lcbc_abort
412	leaq	.Lpadlock_saved_context(%rip),%rax
413	pushf
414	cld
415	call	_padlock_verify_ctx
416	leaq	16(%rdx),%rdx
417	xorl	%eax,%eax
418	xorl	%ebx,%ebx
419	testl	$32,(%rdx)
420	jnz	.Lcbc_aligned
421	testq	$0x0f,%rdi
422	setz	%al
423	testq	$0x0f,%rsi
424	setz	%bl
425	testl	%ebx,%eax
426	jnz	.Lcbc_aligned
427	negq	%rax
428	movq	$512,%rbx
429	notq	%rax
430	leaq	(%rsp),%rbp
431	cmpq	%rbx,%rcx
432	cmovcq	%rcx,%rbx
433	andq	%rbx,%rax
434	movq	%rcx,%rbx
435	negq	%rax
436	andq	$512-1,%rbx
437	leaq	(%rax,%rbp,1),%rsp
438	movq	$512,%rax
439	cmovzq	%rax,%rbx
440	cmpq	%rbx,%rcx
441	ja	.Lcbc_loop
442	movq	%rsi,%rax
443	cmpq	%rsp,%rbp
444	cmoveq	%rdi,%rax
445	addq	%rcx,%rax
446	negq	%rax
447	andq	$0xfff,%rax
448	cmpq	$64,%rax
449	movq	$-64,%rax
450	cmovaeq	%rbx,%rax
451	andq	%rax,%rbx
452	jz	.Lcbc_unaligned_tail
453	jmp	.Lcbc_loop
454.align	16
455.Lcbc_loop:
456	cmpq	%rcx,%rbx
457	cmovaq	%rcx,%rbx
458	movq	%rdi,%r8
459	movq	%rsi,%r9
460	movq	%rcx,%r10
461	movq	%rbx,%rcx
462	movq	%rbx,%r11
463	testq	$0x0f,%rdi
464	cmovnzq	%rsp,%rdi
465	testq	$0x0f,%rsi
466	jz	.Lcbc_inp_aligned
467	shrq	$3,%rcx
468.byte	0xf3,0x48,0xa5
469	subq	%rbx,%rdi
470	movq	%rbx,%rcx
471	movq	%rdi,%rsi
472.Lcbc_inp_aligned:
473	leaq	-16(%rdx),%rax
474	leaq	16(%rdx),%rbx
475	shrq	$4,%rcx
476.byte	0xf3,0x0f,0xa7,208
477	movdqa	(%rax),%xmm0
478	movdqa	%xmm0,-16(%rdx)
479	movq	%r8,%rdi
480	movq	%r11,%rbx
481	testq	$0x0f,%rdi
482	jz	.Lcbc_out_aligned
483	movq	%rbx,%rcx
484	leaq	(%rsp),%rsi
485	shrq	$3,%rcx
486.byte	0xf3,0x48,0xa5
487	subq	%rbx,%rdi
488.Lcbc_out_aligned:
489	movq	%r9,%rsi
490	movq	%r10,%rcx
491	addq	%rbx,%rdi
492	addq	%rbx,%rsi
493	subq	%rbx,%rcx
494	movq	$512,%rbx
495	jz	.Lcbc_break
496	cmpq	%rbx,%rcx
497	jae	.Lcbc_loop
498.Lcbc_unaligned_tail:
499	xorl	%eax,%eax
500	cmpq	%rsp,%rbp
501	cmoveq	%rcx,%rax
502	movq	%rdi,%r8
503	movq	%rcx,%rbx
504	subq	%rax,%rsp
505	shrq	$3,%rcx
506	leaq	(%rsp),%rdi
507.byte	0xf3,0x48,0xa5
508	movq	%rsp,%rsi
509	movq	%r8,%rdi
510	movq	%rbx,%rcx
511	jmp	.Lcbc_loop
512.align	16
513.Lcbc_break:
514	cmpq	%rbp,%rsp
515	je	.Lcbc_done
516
517	pxor	%xmm0,%xmm0
518	leaq	(%rsp),%rax
519.Lcbc_bzero:
520	movaps	%xmm0,(%rax)
521	leaq	16(%rax),%rax
522	cmpq	%rax,%rbp
523	ja	.Lcbc_bzero
524
525.Lcbc_done:
526	leaq	(%rbp),%rsp
527	jmp	.Lcbc_exit
528
529.align	16
530.Lcbc_aligned:
531	leaq	(%rsi,%rcx,1),%rbp
532	negq	%rbp
533	andq	$0xfff,%rbp
534	xorl	%eax,%eax
535	cmpq	$64,%rbp
536	movq	$64-1,%rbp
537	cmovaeq	%rax,%rbp
538	andq	%rcx,%rbp
539	subq	%rbp,%rcx
540	jz	.Lcbc_aligned_tail
541	leaq	-16(%rdx),%rax
542	leaq	16(%rdx),%rbx
543	shrq	$4,%rcx
544.byte	0xf3,0x0f,0xa7,208
545	movdqa	(%rax),%xmm0
546	movdqa	%xmm0,-16(%rdx)
547	testq	%rbp,%rbp
548	jz	.Lcbc_exit
549
550.Lcbc_aligned_tail:
551	movq	%rdi,%r8
552	movq	%rbp,%rbx
553	movq	%rbp,%rcx
554	leaq	(%rsp),%rbp
555	subq	%rcx,%rsp
556	shrq	$3,%rcx
557	leaq	(%rsp),%rdi
558.byte	0xf3,0x48,0xa5
559	leaq	(%r8),%rdi
560	leaq	(%rsp),%rsi
561	movq	%rbx,%rcx
562	jmp	.Lcbc_loop
563.Lcbc_exit:
564	movl	$1,%eax
565	leaq	8(%rsp),%rsp
566.Lcbc_abort:
567	popq	%rbx
568	popq	%rbp
569	.byte	0xf3,0xc3
570.size	padlock_cbc_encrypt,.-padlock_cbc_encrypt
571.globl	padlock_cfb_encrypt
572.type	padlock_cfb_encrypt,@function
573.align	16
574padlock_cfb_encrypt:
575	pushq	%rbp
576	pushq	%rbx
577
578	xorl	%eax,%eax
579	testq	$15,%rdx
580	jnz	.Lcfb_abort
581	testq	$15,%rcx
582	jnz	.Lcfb_abort
583	leaq	.Lpadlock_saved_context(%rip),%rax
584	pushf
585	cld
586	call	_padlock_verify_ctx
587	leaq	16(%rdx),%rdx
588	xorl	%eax,%eax
589	xorl	%ebx,%ebx
590	testl	$32,(%rdx)
591	jnz	.Lcfb_aligned
592	testq	$0x0f,%rdi
593	setz	%al
594	testq	$0x0f,%rsi
595	setz	%bl
596	testl	%ebx,%eax
597	jnz	.Lcfb_aligned
598	negq	%rax
599	movq	$512,%rbx
600	notq	%rax
601	leaq	(%rsp),%rbp
602	cmpq	%rbx,%rcx
603	cmovcq	%rcx,%rbx
604	andq	%rbx,%rax
605	movq	%rcx,%rbx
606	negq	%rax
607	andq	$512-1,%rbx
608	leaq	(%rax,%rbp,1),%rsp
609	movq	$512,%rax
610	cmovzq	%rax,%rbx
611	jmp	.Lcfb_loop
612.align	16
613.Lcfb_loop:
614	cmpq	%rcx,%rbx
615	cmovaq	%rcx,%rbx
616	movq	%rdi,%r8
617	movq	%rsi,%r9
618	movq	%rcx,%r10
619	movq	%rbx,%rcx
620	movq	%rbx,%r11
621	testq	$0x0f,%rdi
622	cmovnzq	%rsp,%rdi
623	testq	$0x0f,%rsi
624	jz	.Lcfb_inp_aligned
625	shrq	$3,%rcx
626.byte	0xf3,0x48,0xa5
627	subq	%rbx,%rdi
628	movq	%rbx,%rcx
629	movq	%rdi,%rsi
630.Lcfb_inp_aligned:
631	leaq	-16(%rdx),%rax
632	leaq	16(%rdx),%rbx
633	shrq	$4,%rcx
634.byte	0xf3,0x0f,0xa7,224
635	movdqa	(%rax),%xmm0
636	movdqa	%xmm0,-16(%rdx)
637	movq	%r8,%rdi
638	movq	%r11,%rbx
639	testq	$0x0f,%rdi
640	jz	.Lcfb_out_aligned
641	movq	%rbx,%rcx
642	leaq	(%rsp),%rsi
643	shrq	$3,%rcx
644.byte	0xf3,0x48,0xa5
645	subq	%rbx,%rdi
646.Lcfb_out_aligned:
647	movq	%r9,%rsi
648	movq	%r10,%rcx
649	addq	%rbx,%rdi
650	addq	%rbx,%rsi
651	subq	%rbx,%rcx
652	movq	$512,%rbx
653	jnz	.Lcfb_loop
654	cmpq	%rbp,%rsp
655	je	.Lcfb_done
656
657	pxor	%xmm0,%xmm0
658	leaq	(%rsp),%rax
659.Lcfb_bzero:
660	movaps	%xmm0,(%rax)
661	leaq	16(%rax),%rax
662	cmpq	%rax,%rbp
663	ja	.Lcfb_bzero
664
665.Lcfb_done:
666	leaq	(%rbp),%rsp
667	jmp	.Lcfb_exit
668
669.align	16
670.Lcfb_aligned:
671	leaq	-16(%rdx),%rax
672	leaq	16(%rdx),%rbx
673	shrq	$4,%rcx
674.byte	0xf3,0x0f,0xa7,224
675	movdqa	(%rax),%xmm0
676	movdqa	%xmm0,-16(%rdx)
677.Lcfb_exit:
678	movl	$1,%eax
679	leaq	8(%rsp),%rsp
680.Lcfb_abort:
681	popq	%rbx
682	popq	%rbp
683	.byte	0xf3,0xc3
684.size	padlock_cfb_encrypt,.-padlock_cfb_encrypt
685.globl	padlock_ofb_encrypt
686.type	padlock_ofb_encrypt,@function
687.align	16
688padlock_ofb_encrypt:
689	pushq	%rbp
690	pushq	%rbx
691
692	xorl	%eax,%eax
693	testq	$15,%rdx
694	jnz	.Lofb_abort
695	testq	$15,%rcx
696	jnz	.Lofb_abort
697	leaq	.Lpadlock_saved_context(%rip),%rax
698	pushf
699	cld
700	call	_padlock_verify_ctx
701	leaq	16(%rdx),%rdx
702	xorl	%eax,%eax
703	xorl	%ebx,%ebx
704	testl	$32,(%rdx)
705	jnz	.Lofb_aligned
706	testq	$0x0f,%rdi
707	setz	%al
708	testq	$0x0f,%rsi
709	setz	%bl
710	testl	%ebx,%eax
711	jnz	.Lofb_aligned
712	negq	%rax
713	movq	$512,%rbx
714	notq	%rax
715	leaq	(%rsp),%rbp
716	cmpq	%rbx,%rcx
717	cmovcq	%rcx,%rbx
718	andq	%rbx,%rax
719	movq	%rcx,%rbx
720	negq	%rax
721	andq	$512-1,%rbx
722	leaq	(%rax,%rbp,1),%rsp
723	movq	$512,%rax
724	cmovzq	%rax,%rbx
725	jmp	.Lofb_loop
726.align	16
727.Lofb_loop:
728	cmpq	%rcx,%rbx
729	cmovaq	%rcx,%rbx
730	movq	%rdi,%r8
731	movq	%rsi,%r9
732	movq	%rcx,%r10
733	movq	%rbx,%rcx
734	movq	%rbx,%r11
735	testq	$0x0f,%rdi
736	cmovnzq	%rsp,%rdi
737	testq	$0x0f,%rsi
738	jz	.Lofb_inp_aligned
739	shrq	$3,%rcx
740.byte	0xf3,0x48,0xa5
741	subq	%rbx,%rdi
742	movq	%rbx,%rcx
743	movq	%rdi,%rsi
744.Lofb_inp_aligned:
745	leaq	-16(%rdx),%rax
746	leaq	16(%rdx),%rbx
747	shrq	$4,%rcx
748.byte	0xf3,0x0f,0xa7,232
749	movdqa	(%rax),%xmm0
750	movdqa	%xmm0,-16(%rdx)
751	movq	%r8,%rdi
752	movq	%r11,%rbx
753	testq	$0x0f,%rdi
754	jz	.Lofb_out_aligned
755	movq	%rbx,%rcx
756	leaq	(%rsp),%rsi
757	shrq	$3,%rcx
758.byte	0xf3,0x48,0xa5
759	subq	%rbx,%rdi
760.Lofb_out_aligned:
761	movq	%r9,%rsi
762	movq	%r10,%rcx
763	addq	%rbx,%rdi
764	addq	%rbx,%rsi
765	subq	%rbx,%rcx
766	movq	$512,%rbx
767	jnz	.Lofb_loop
768	cmpq	%rbp,%rsp
769	je	.Lofb_done
770
771	pxor	%xmm0,%xmm0
772	leaq	(%rsp),%rax
773.Lofb_bzero:
774	movaps	%xmm0,(%rax)
775	leaq	16(%rax),%rax
776	cmpq	%rax,%rbp
777	ja	.Lofb_bzero
778
779.Lofb_done:
780	leaq	(%rbp),%rsp
781	jmp	.Lofb_exit
782
783.align	16
784.Lofb_aligned:
785	leaq	-16(%rdx),%rax
786	leaq	16(%rdx),%rbx
787	shrq	$4,%rcx
788.byte	0xf3,0x0f,0xa7,232
789	movdqa	(%rax),%xmm0
790	movdqa	%xmm0,-16(%rdx)
791.Lofb_exit:
792	movl	$1,%eax
793	leaq	8(%rsp),%rsp
794.Lofb_abort:
795	popq	%rbx
796	popq	%rbp
797	.byte	0xf3,0xc3
798.size	padlock_ofb_encrypt,.-padlock_ofb_encrypt
799.globl	padlock_ctr32_encrypt
800.type	padlock_ctr32_encrypt,@function
801.align	16
802padlock_ctr32_encrypt:
803	pushq	%rbp
804	pushq	%rbx
805
806	xorl	%eax,%eax
807	testq	$15,%rdx
808	jnz	.Lctr32_abort
809	testq	$15,%rcx
810	jnz	.Lctr32_abort
811	leaq	.Lpadlock_saved_context(%rip),%rax
812	pushf
813	cld
814	call	_padlock_verify_ctx
815	leaq	16(%rdx),%rdx
816	xorl	%eax,%eax
817	xorl	%ebx,%ebx
818	testl	$32,(%rdx)
819	jnz	.Lctr32_aligned
820	testq	$0x0f,%rdi
821	setz	%al
822	testq	$0x0f,%rsi
823	setz	%bl
824	testl	%ebx,%eax
825	jnz	.Lctr32_aligned
826	negq	%rax
827	movq	$512,%rbx
828	notq	%rax
829	leaq	(%rsp),%rbp
830	cmpq	%rbx,%rcx
831	cmovcq	%rcx,%rbx
832	andq	%rbx,%rax
833	movq	%rcx,%rbx
834	negq	%rax
835	andq	$512-1,%rbx
836	leaq	(%rax,%rbp,1),%rsp
837	movq	$512,%rax
838	cmovzq	%rax,%rbx
839.Lctr32_reenter:
840	movl	-4(%rdx),%eax
841	bswapl	%eax
842	negl	%eax
843	andl	$31,%eax
844	movq	$512,%rbx
845	shll	$4,%eax
846	cmovzq	%rbx,%rax
847	cmpq	%rax,%rcx
848	cmovaq	%rax,%rbx
849	cmovbeq	%rcx,%rbx
850	cmpq	%rbx,%rcx
851	ja	.Lctr32_loop
852	movq	%rsi,%rax
853	cmpq	%rsp,%rbp
854	cmoveq	%rdi,%rax
855	addq	%rcx,%rax
856	negq	%rax
857	andq	$0xfff,%rax
858	cmpq	$32,%rax
859	movq	$-32,%rax
860	cmovaeq	%rbx,%rax
861	andq	%rax,%rbx
862	jz	.Lctr32_unaligned_tail
863	jmp	.Lctr32_loop
864.align	16
865.Lctr32_loop:
866	cmpq	%rcx,%rbx
867	cmovaq	%rcx,%rbx
868	movq	%rdi,%r8
869	movq	%rsi,%r9
870	movq	%rcx,%r10
871	movq	%rbx,%rcx
872	movq	%rbx,%r11
873	testq	$0x0f,%rdi
874	cmovnzq	%rsp,%rdi
875	testq	$0x0f,%rsi
876	jz	.Lctr32_inp_aligned
877	shrq	$3,%rcx
878.byte	0xf3,0x48,0xa5
879	subq	%rbx,%rdi
880	movq	%rbx,%rcx
881	movq	%rdi,%rsi
882.Lctr32_inp_aligned:
883	leaq	-16(%rdx),%rax
884	leaq	16(%rdx),%rbx
885	shrq	$4,%rcx
886.byte	0xf3,0x0f,0xa7,216
887	movl	-4(%rdx),%eax
888	testl	$0xffff0000,%eax
889	jnz	.Lctr32_no_carry
890	bswapl	%eax
891	addl	$0x10000,%eax
892	bswapl	%eax
893	movl	%eax,-4(%rdx)
894.Lctr32_no_carry:
895	movq	%r8,%rdi
896	movq	%r11,%rbx
897	testq	$0x0f,%rdi
898	jz	.Lctr32_out_aligned
899	movq	%rbx,%rcx
900	leaq	(%rsp),%rsi
901	shrq	$3,%rcx
902.byte	0xf3,0x48,0xa5
903	subq	%rbx,%rdi
904.Lctr32_out_aligned:
905	movq	%r9,%rsi
906	movq	%r10,%rcx
907	addq	%rbx,%rdi
908	addq	%rbx,%rsi
909	subq	%rbx,%rcx
910	movq	$512,%rbx
911	jz	.Lctr32_break
912	cmpq	%rbx,%rcx
913	jae	.Lctr32_loop
914	movq	%rcx,%rbx
915	movq	%rsi,%rax
916	cmpq	%rsp,%rbp
917	cmoveq	%rdi,%rax
918	addq	%rcx,%rax
919	negq	%rax
920	andq	$0xfff,%rax
921	cmpq	$32,%rax
922	movq	$-32,%rax
923	cmovaeq	%rbx,%rax
924	andq	%rax,%rbx
925	jnz	.Lctr32_loop
926.Lctr32_unaligned_tail:
927	xorl	%eax,%eax
928	cmpq	%rsp,%rbp
929	cmoveq	%rcx,%rax
930	movq	%rdi,%r8
931	movq	%rcx,%rbx
932	subq	%rax,%rsp
933	shrq	$3,%rcx
934	leaq	(%rsp),%rdi
935.byte	0xf3,0x48,0xa5
936	movq	%rsp,%rsi
937	movq	%r8,%rdi
938	movq	%rbx,%rcx
939	jmp	.Lctr32_loop
940.align	16
941.Lctr32_break:
942	cmpq	%rbp,%rsp
943	je	.Lctr32_done
944
945	pxor	%xmm0,%xmm0
946	leaq	(%rsp),%rax
947.Lctr32_bzero:
948	movaps	%xmm0,(%rax)
949	leaq	16(%rax),%rax
950	cmpq	%rax,%rbp
951	ja	.Lctr32_bzero
952
953.Lctr32_done:
954	leaq	(%rbp),%rsp
955	jmp	.Lctr32_exit
956
957.align	16
958.Lctr32_aligned:
959	movl	-4(%rdx),%eax
960	bswapl	%eax
961	negl	%eax
962	andl	$0xffff,%eax
963	movq	$1048576,%rbx
964	shll	$4,%eax
965	cmovzq	%rbx,%rax
966	cmpq	%rax,%rcx
967	cmovaq	%rax,%rbx
968	cmovbeq	%rcx,%rbx
969	jbe	.Lctr32_aligned_skip
970
971.Lctr32_aligned_loop:
972	movq	%rcx,%r10
973	movq	%rbx,%rcx
974	movq	%rbx,%r11
975
976	leaq	-16(%rdx),%rax
977	leaq	16(%rdx),%rbx
978	shrq	$4,%rcx
979.byte	0xf3,0x0f,0xa7,216
980
981	movl	-4(%rdx),%eax
982	bswapl	%eax
983	addl	$0x10000,%eax
984	bswapl	%eax
985	movl	%eax,-4(%rdx)
986
987	movq	%r10,%rcx
988	subq	%r11,%rcx
989	movq	$1048576,%rbx
990	jz	.Lctr32_exit
991	cmpq	%rbx,%rcx
992	jae	.Lctr32_aligned_loop
993
994.Lctr32_aligned_skip:
995	leaq	(%rsi,%rcx,1),%rbp
996	negq	%rbp
997	andq	$0xfff,%rbp
998	xorl	%eax,%eax
999	cmpq	$32,%rbp
1000	movq	$32-1,%rbp
1001	cmovaeq	%rax,%rbp
1002	andq	%rcx,%rbp
1003	subq	%rbp,%rcx
1004	jz	.Lctr32_aligned_tail
1005	leaq	-16(%rdx),%rax
1006	leaq	16(%rdx),%rbx
1007	shrq	$4,%rcx
1008.byte	0xf3,0x0f,0xa7,216
1009	testq	%rbp,%rbp
1010	jz	.Lctr32_exit
1011
1012.Lctr32_aligned_tail:
1013	movq	%rdi,%r8
1014	movq	%rbp,%rbx
1015	movq	%rbp,%rcx
1016	leaq	(%rsp),%rbp
1017	subq	%rcx,%rsp
1018	shrq	$3,%rcx
1019	leaq	(%rsp),%rdi
1020.byte	0xf3,0x48,0xa5
1021	leaq	(%r8),%rdi
1022	leaq	(%rsp),%rsi
1023	movq	%rbx,%rcx
1024	jmp	.Lctr32_loop
1025.Lctr32_exit:
1026	movl	$1,%eax
1027	leaq	8(%rsp),%rsp
1028.Lctr32_abort:
1029	popq	%rbx
1030	popq	%rbp
1031	.byte	0xf3,0xc3
1032.size	padlock_ctr32_encrypt,.-padlock_ctr32_encrypt
1033.byte	86,73,65,32,80,97,100,108,111,99,107,32,120,56,54,95,54,52,32,109,111,100,117,108,101,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
1034.align	16
1035.data
1036.align	8
1037.Lpadlock_saved_context:
1038.quad	0
1039	.section ".note.gnu.property", "a"
1040	.p2align 3
1041	.long 1f - 0f
1042	.long 4f - 1f
1043	.long 5
10440:
1045	# "GNU" encoded with .byte, since .asciz isn't supported
1046	# on Solaris.
1047	.byte 0x47
1048	.byte 0x4e
1049	.byte 0x55
1050	.byte 0
10511:
1052	.p2align 3
1053	.long 0xc0000002
1054	.long 3f - 2f
10552:
1056	.long 3
10573:
1058	.p2align 3
10594:
1060