xref: /linux/arch/x86/lib/clear_page_64.S (revision b4db9f840283caca0d904436f187ef56a9126eaa)
1/* SPDX-License-Identifier: GPL-2.0-only */
2#include <linux/export.h>
3#include <linux/linkage.h>
4#include <asm/asm.h>
5
6/*
7 * Most CPUs support enhanced REP MOVSB/STOSB instructions. It is
8 * recommended to use this when possible and we do use them by default.
9 * If enhanced REP MOVSB/STOSB is not available, try to use fast string.
10 * Otherwise, use original.
11 */
12
13/*
14 * Zero a page.
15 * %rdi	- page
16 */
17SYM_FUNC_START(clear_page_rep)
18	movl $4096/8,%ecx
19	xorl %eax,%eax
20	rep stosq
21	RET
22SYM_FUNC_END(clear_page_rep)
23EXPORT_SYMBOL_GPL(clear_page_rep)
24
25SYM_FUNC_START(clear_page_orig)
26	xorl   %eax,%eax
27	movl   $4096/64,%ecx
28	.p2align 4
29.Lloop:
30	decl	%ecx
31#define PUT(x) movq %rax,x*8(%rdi)
32	movq %rax,(%rdi)
33	PUT(1)
34	PUT(2)
35	PUT(3)
36	PUT(4)
37	PUT(5)
38	PUT(6)
39	PUT(7)
40	leaq	64(%rdi),%rdi
41	jnz	.Lloop
42	nop
43	RET
44SYM_FUNC_END(clear_page_orig)
45EXPORT_SYMBOL_GPL(clear_page_orig)
46
47SYM_FUNC_START(clear_page_erms)
48	movl $4096,%ecx
49	xorl %eax,%eax
50	rep stosb
51	RET
52SYM_FUNC_END(clear_page_erms)
53EXPORT_SYMBOL_GPL(clear_page_erms)
54
55/*
56 * Default clear user-space.
57 * Input:
58 * rdi destination
59 * rcx count
60 * rax is zero
61 *
62 * Output:
63 * rcx: uncleared bytes or 0 if successful.
64 */
65SYM_FUNC_START(rep_stos_alternative)
66	cmpq $64,%rcx
67	jae .Lunrolled
68
69	cmp $8,%ecx
70	jae .Lword
71
72	testl %ecx,%ecx
73	je .Lexit
74
75.Lclear_user_tail:
760:	movb %al,(%rdi)
77	inc %rdi
78	dec %rcx
79	jnz .Lclear_user_tail
80.Lexit:
81	RET
82
83	_ASM_EXTABLE_UA( 0b, .Lexit)
84
85.Lword:
861:	movq %rax,(%rdi)
87	addq $8,%rdi
88	sub $8,%ecx
89	je .Lexit
90	cmp $8,%ecx
91	jae .Lword
92	jmp .Lclear_user_tail
93
94	.p2align 4
95.Lunrolled:
9610:	movq %rax,(%rdi)
9711:	movq %rax,8(%rdi)
9812:	movq %rax,16(%rdi)
9913:	movq %rax,24(%rdi)
10014:	movq %rax,32(%rdi)
10115:	movq %rax,40(%rdi)
10216:	movq %rax,48(%rdi)
10317:	movq %rax,56(%rdi)
104	addq $64,%rdi
105	subq $64,%rcx
106	cmpq $64,%rcx
107	jae .Lunrolled
108	cmpl $8,%ecx
109	jae .Lword
110	testl %ecx,%ecx
111	jne .Lclear_user_tail
112	RET
113
114	/*
115	 * If we take an exception on any of the
116	 * word stores, we know that %rcx isn't zero,
117	 * so we can just go to the tail clearing to
118	 * get the exact count.
119	 *
120	 * The unrolled case might end up clearing
121	 * some bytes twice. Don't care.
122	 *
123	 * We could use the value in %rdi to avoid
124	 * a second fault on the exact count case,
125	 * but do we really care? No.
126	 *
127	 * Finally, we could try to align %rdi at the
128	 * top of the unrolling. But unaligned stores
129	 * just aren't that common or expensive.
130	 */
131	_ASM_EXTABLE_UA( 1b, .Lclear_user_tail)
132	_ASM_EXTABLE_UA(10b, .Lclear_user_tail)
133	_ASM_EXTABLE_UA(11b, .Lclear_user_tail)
134	_ASM_EXTABLE_UA(12b, .Lclear_user_tail)
135	_ASM_EXTABLE_UA(13b, .Lclear_user_tail)
136	_ASM_EXTABLE_UA(14b, .Lclear_user_tail)
137	_ASM_EXTABLE_UA(15b, .Lclear_user_tail)
138	_ASM_EXTABLE_UA(16b, .Lclear_user_tail)
139	_ASM_EXTABLE_UA(17b, .Lclear_user_tail)
140SYM_FUNC_END(rep_stos_alternative)
141EXPORT_SYMBOL(rep_stos_alternative)
142