1/* SPDX-License-Identifier: GPL-2.0-only */ 2/* Copyright 2002 Andi Kleen */ 3 4#include <linux/linkage.h> 5#include <asm/errno.h> 6#include <asm/cpufeatures.h> 7#include <asm/mcsafe_test.h> 8#include <asm/alternative-asm.h> 9#include <asm/export.h> 10 11/* 12 * We build a jump to memcpy_orig by default which gets NOPped out on 13 * the majority of x86 CPUs which set REP_GOOD. In addition, CPUs which 14 * have the enhanced REP MOVSB/STOSB feature (ERMS), change those NOPs 15 * to a jmp to memcpy_erms which does the REP; MOVSB mem copy. 16 */ 17 18.weak memcpy 19 20/* 21 * memcpy - Copy a memory block. 22 * 23 * Input: 24 * rdi destination 25 * rsi source 26 * rdx count 27 * 28 * Output: 29 * rax original destination 30 */ 31SYM_FUNC_START_ALIAS(__memcpy) 32SYM_FUNC_START_LOCAL(memcpy) 33 ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \ 34 "jmp memcpy_erms", X86_FEATURE_ERMS 35 36 movq %rdi, %rax 37 movq %rdx, %rcx 38 shrq $3, %rcx 39 andl $7, %edx 40 rep movsq 41 movl %edx, %ecx 42 rep movsb 43 ret 44SYM_FUNC_END(memcpy) 45SYM_FUNC_END_ALIAS(__memcpy) 46EXPORT_SYMBOL(memcpy) 47EXPORT_SYMBOL(__memcpy) 48 49/* 50 * memcpy_erms() - enhanced fast string memcpy. This is faster and 51 * simpler than memcpy. Use memcpy_erms when possible. 52 */ 53SYM_FUNC_START(memcpy_erms) 54 movq %rdi, %rax 55 movq %rdx, %rcx 56 rep movsb 57 ret 58SYM_FUNC_END(memcpy_erms) 59 60SYM_FUNC_START(memcpy_orig) 61 movq %rdi, %rax 62 63 cmpq $0x20, %rdx 64 jb .Lhandle_tail 65 66 /* 67 * We check whether memory false dependence could occur, 68 * then jump to corresponding copy mode. 69 */ 70 cmp %dil, %sil 71 jl .Lcopy_backward 72 subq $0x20, %rdx 73.Lcopy_forward_loop: 74 subq $0x20, %rdx 75 76 /* 77 * Move in blocks of 4x8 bytes: 78 */ 79 movq 0*8(%rsi), %r8 80 movq 1*8(%rsi), %r9 81 movq 2*8(%rsi), %r10 82 movq 3*8(%rsi), %r11 83 leaq 4*8(%rsi), %rsi 84 85 movq %r8, 0*8(%rdi) 86 movq %r9, 1*8(%rdi) 87 movq %r10, 2*8(%rdi) 88 movq %r11, 3*8(%rdi) 89 leaq 4*8(%rdi), %rdi 90 jae .Lcopy_forward_loop 91 addl $0x20, %edx 92 jmp .Lhandle_tail 93 94.Lcopy_backward: 95 /* 96 * Calculate copy position to tail. 97 */ 98 addq %rdx, %rsi 99 addq %rdx, %rdi 100 subq $0x20, %rdx 101 /* 102 * At most 3 ALU operations in one cycle, 103 * so append NOPS in the same 16 bytes trunk. 104 */ 105 .p2align 4 106.Lcopy_backward_loop: 107 subq $0x20, %rdx 108 movq -1*8(%rsi), %r8 109 movq -2*8(%rsi), %r9 110 movq -3*8(%rsi), %r10 111 movq -4*8(%rsi), %r11 112 leaq -4*8(%rsi), %rsi 113 movq %r8, -1*8(%rdi) 114 movq %r9, -2*8(%rdi) 115 movq %r10, -3*8(%rdi) 116 movq %r11, -4*8(%rdi) 117 leaq -4*8(%rdi), %rdi 118 jae .Lcopy_backward_loop 119 120 /* 121 * Calculate copy position to head. 122 */ 123 addl $0x20, %edx 124 subq %rdx, %rsi 125 subq %rdx, %rdi 126.Lhandle_tail: 127 cmpl $16, %edx 128 jb .Lless_16bytes 129 130 /* 131 * Move data from 16 bytes to 31 bytes. 132 */ 133 movq 0*8(%rsi), %r8 134 movq 1*8(%rsi), %r9 135 movq -2*8(%rsi, %rdx), %r10 136 movq -1*8(%rsi, %rdx), %r11 137 movq %r8, 0*8(%rdi) 138 movq %r9, 1*8(%rdi) 139 movq %r10, -2*8(%rdi, %rdx) 140 movq %r11, -1*8(%rdi, %rdx) 141 retq 142 .p2align 4 143.Lless_16bytes: 144 cmpl $8, %edx 145 jb .Lless_8bytes 146 /* 147 * Move data from 8 bytes to 15 bytes. 148 */ 149 movq 0*8(%rsi), %r8 150 movq -1*8(%rsi, %rdx), %r9 151 movq %r8, 0*8(%rdi) 152 movq %r9, -1*8(%rdi, %rdx) 153 retq 154 .p2align 4 155.Lless_8bytes: 156 cmpl $4, %edx 157 jb .Lless_3bytes 158 159 /* 160 * Move data from 4 bytes to 7 bytes. 161 */ 162 movl (%rsi), %ecx 163 movl -4(%rsi, %rdx), %r8d 164 movl %ecx, (%rdi) 165 movl %r8d, -4(%rdi, %rdx) 166 retq 167 .p2align 4 168.Lless_3bytes: 169 subl $1, %edx 170 jb .Lend 171 /* 172 * Move data from 1 bytes to 3 bytes. 173 */ 174 movzbl (%rsi), %ecx 175 jz .Lstore_1byte 176 movzbq 1(%rsi), %r8 177 movzbq (%rsi, %rdx), %r9 178 movb %r8b, 1(%rdi) 179 movb %r9b, (%rdi, %rdx) 180.Lstore_1byte: 181 movb %cl, (%rdi) 182 183.Lend: 184 retq 185SYM_FUNC_END(memcpy_orig) 186 187#ifndef CONFIG_UML 188 189MCSAFE_TEST_CTL 190 191/* 192 * __memcpy_mcsafe - memory copy with machine check exception handling 193 * Note that we only catch machine checks when reading the source addresses. 194 * Writes to target are posted and don't generate machine checks. 195 */ 196SYM_FUNC_START(__memcpy_mcsafe) 197 cmpl $8, %edx 198 /* Less than 8 bytes? Go to byte copy loop */ 199 jb .L_no_whole_words 200 201 /* Check for bad alignment of source */ 202 testl $7, %esi 203 /* Already aligned */ 204 jz .L_8byte_aligned 205 206 /* Copy one byte at a time until source is 8-byte aligned */ 207 movl %esi, %ecx 208 andl $7, %ecx 209 subl $8, %ecx 210 negl %ecx 211 subl %ecx, %edx 212.L_read_leading_bytes: 213 movb (%rsi), %al 214 MCSAFE_TEST_SRC %rsi 1 .E_leading_bytes 215 MCSAFE_TEST_DST %rdi 1 .E_leading_bytes 216.L_write_leading_bytes: 217 movb %al, (%rdi) 218 incq %rsi 219 incq %rdi 220 decl %ecx 221 jnz .L_read_leading_bytes 222 223.L_8byte_aligned: 224 movl %edx, %ecx 225 andl $7, %edx 226 shrl $3, %ecx 227 jz .L_no_whole_words 228 229.L_read_words: 230 movq (%rsi), %r8 231 MCSAFE_TEST_SRC %rsi 8 .E_read_words 232 MCSAFE_TEST_DST %rdi 8 .E_write_words 233.L_write_words: 234 movq %r8, (%rdi) 235 addq $8, %rsi 236 addq $8, %rdi 237 decl %ecx 238 jnz .L_read_words 239 240 /* Any trailing bytes? */ 241.L_no_whole_words: 242 andl %edx, %edx 243 jz .L_done_memcpy_trap 244 245 /* Copy trailing bytes */ 246 movl %edx, %ecx 247.L_read_trailing_bytes: 248 movb (%rsi), %al 249 MCSAFE_TEST_SRC %rsi 1 .E_trailing_bytes 250 MCSAFE_TEST_DST %rdi 1 .E_trailing_bytes 251.L_write_trailing_bytes: 252 movb %al, (%rdi) 253 incq %rsi 254 incq %rdi 255 decl %ecx 256 jnz .L_read_trailing_bytes 257 258 /* Copy successful. Return zero */ 259.L_done_memcpy_trap: 260 xorl %eax, %eax 261.L_done: 262 ret 263SYM_FUNC_END(__memcpy_mcsafe) 264EXPORT_SYMBOL_GPL(__memcpy_mcsafe) 265 266 .section .fixup, "ax" 267 /* 268 * Return number of bytes not copied for any failure. Note that 269 * there is no "tail" handling since the source buffer is 8-byte 270 * aligned and poison is cacheline aligned. 271 */ 272.E_read_words: 273 shll $3, %ecx 274.E_leading_bytes: 275 addl %edx, %ecx 276.E_trailing_bytes: 277 mov %ecx, %eax 278 jmp .L_done 279 280 /* 281 * For write fault handling, given the destination is unaligned, 282 * we handle faults on multi-byte writes with a byte-by-byte 283 * copy up to the write-protected page. 284 */ 285.E_write_words: 286 shll $3, %ecx 287 addl %edx, %ecx 288 movl %ecx, %edx 289 jmp mcsafe_handle_tail 290 291 .previous 292 293 _ASM_EXTABLE_FAULT(.L_read_leading_bytes, .E_leading_bytes) 294 _ASM_EXTABLE_FAULT(.L_read_words, .E_read_words) 295 _ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .E_trailing_bytes) 296 _ASM_EXTABLE(.L_write_leading_bytes, .E_leading_bytes) 297 _ASM_EXTABLE(.L_write_words, .E_write_words) 298 _ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes) 299#endif 300