1/* SPDX-License-Identifier: GPL-2.0-only */ 2#include <linux/export.h> 3#include <linux/linkage.h> 4#include <asm/asm.h> 5 6/* 7 * Most CPUs support enhanced REP MOVSB/STOSB instructions. It is 8 * recommended to use this when possible and we do use them by default. 9 * If enhanced REP MOVSB/STOSB is not available, try to use fast string. 10 * Otherwise, use original. 11 */ 12 13/* 14 * Zero a page. 15 * %rdi - page 16 */ 17SYM_FUNC_START(clear_page_rep) 18 movl $4096/8,%ecx 19 xorl %eax,%eax 20 rep stosq 21 RET 22SYM_FUNC_END(clear_page_rep) 23EXPORT_SYMBOL_GPL(clear_page_rep) 24 25SYM_FUNC_START(clear_page_orig) 26 xorl %eax,%eax 27 movl $4096/64,%ecx 28 .p2align 4 29.Lloop: 30 decl %ecx 31#define PUT(x) movq %rax,x*8(%rdi) 32 movq %rax,(%rdi) 33 PUT(1) 34 PUT(2) 35 PUT(3) 36 PUT(4) 37 PUT(5) 38 PUT(6) 39 PUT(7) 40 leaq 64(%rdi),%rdi 41 jnz .Lloop 42 nop 43 RET 44SYM_FUNC_END(clear_page_orig) 45EXPORT_SYMBOL_GPL(clear_page_orig) 46 47SYM_FUNC_START(clear_page_erms) 48 movl $4096,%ecx 49 xorl %eax,%eax 50 rep stosb 51 RET 52SYM_FUNC_END(clear_page_erms) 53EXPORT_SYMBOL_GPL(clear_page_erms) 54 55/* 56 * Default clear user-space. 57 * Input: 58 * rdi destination 59 * rcx count 60 * rax is zero 61 * 62 * Output: 63 * rcx: uncleared bytes or 0 if successful. 64 */ 65SYM_FUNC_START(rep_stos_alternative) 66 cmpq $64,%rcx 67 jae .Lunrolled 68 69 cmp $8,%ecx 70 jae .Lword 71 72 testl %ecx,%ecx 73 je .Lexit 74 75.Lclear_user_tail: 760: movb %al,(%rdi) 77 inc %rdi 78 dec %rcx 79 jnz .Lclear_user_tail 80.Lexit: 81 RET 82 83 _ASM_EXTABLE_UA( 0b, .Lexit) 84 85.Lword: 861: movq %rax,(%rdi) 87 addq $8,%rdi 88 sub $8,%ecx 89 je .Lexit 90 cmp $8,%ecx 91 jae .Lword 92 jmp .Lclear_user_tail 93 94 .p2align 4 95.Lunrolled: 9610: movq %rax,(%rdi) 9711: movq %rax,8(%rdi) 9812: movq %rax,16(%rdi) 9913: movq %rax,24(%rdi) 10014: movq %rax,32(%rdi) 10115: movq %rax,40(%rdi) 10216: movq %rax,48(%rdi) 10317: movq %rax,56(%rdi) 104 addq $64,%rdi 105 subq $64,%rcx 106 cmpq $64,%rcx 107 jae .Lunrolled 108 cmpl $8,%ecx 109 jae .Lword 110 testl %ecx,%ecx 111 jne .Lclear_user_tail 112 RET 113 114 /* 115 * If we take an exception on any of the 116 * word stores, we know that %rcx isn't zero, 117 * so we can just go to the tail clearing to 118 * get the exact count. 119 * 120 * The unrolled case might end up clearing 121 * some bytes twice. Don't care. 122 * 123 * We could use the value in %rdi to avoid 124 * a second fault on the exact count case, 125 * but do we really care? No. 126 * 127 * Finally, we could try to align %rdi at the 128 * top of the unrolling. But unaligned stores 129 * just aren't that common or expensive. 130 */ 131 _ASM_EXTABLE_UA( 1b, .Lclear_user_tail) 132 _ASM_EXTABLE_UA(10b, .Lclear_user_tail) 133 _ASM_EXTABLE_UA(11b, .Lclear_user_tail) 134 _ASM_EXTABLE_UA(12b, .Lclear_user_tail) 135 _ASM_EXTABLE_UA(13b, .Lclear_user_tail) 136 _ASM_EXTABLE_UA(14b, .Lclear_user_tail) 137 _ASM_EXTABLE_UA(15b, .Lclear_user_tail) 138 _ASM_EXTABLE_UA(16b, .Lclear_user_tail) 139 _ASM_EXTABLE_UA(17b, .Lclear_user_tail) 140SYM_FUNC_END(rep_stos_alternative) 141EXPORT_SYMBOL(rep_stos_alternative) 142