1/* Copyright 2002 Andi Kleen */ 2 3#include <linux/linkage.h> 4#include <asm/errno.h> 5#include <asm/cpufeatures.h> 6#include <asm/alternative-asm.h> 7 8/* 9 * We build a jump to memcpy_orig by default which gets NOPped out on 10 * the majority of x86 CPUs which set REP_GOOD. In addition, CPUs which 11 * have the enhanced REP MOVSB/STOSB feature (ERMS), change those NOPs 12 * to a jmp to memcpy_erms which does the REP; MOVSB mem copy. 13 */ 14 15.weak memcpy 16 17/* 18 * memcpy - Copy a memory block. 19 * 20 * Input: 21 * rdi destination 22 * rsi source 23 * rdx count 24 * 25 * Output: 26 * rax original destination 27 */ 28ENTRY(__memcpy) 29ENTRY(memcpy) 30 ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \ 31 "jmp memcpy_erms", X86_FEATURE_ERMS 32 33 movq %rdi, %rax 34 movq %rdx, %rcx 35 shrq $3, %rcx 36 andl $7, %edx 37 rep movsq 38 movl %edx, %ecx 39 rep movsb 40 ret 41ENDPROC(memcpy) 42ENDPROC(__memcpy) 43 44/* 45 * memcpy_erms() - enhanced fast string memcpy. This is faster and 46 * simpler than memcpy. Use memcpy_erms when possible. 47 */ 48ENTRY(memcpy_erms) 49 movq %rdi, %rax 50 movq %rdx, %rcx 51 rep movsb 52 ret 53ENDPROC(memcpy_erms) 54 55ENTRY(memcpy_orig) 56 movq %rdi, %rax 57 58 cmpq $0x20, %rdx 59 jb .Lhandle_tail 60 61 /* 62 * We check whether memory false dependence could occur, 63 * then jump to corresponding copy mode. 64 */ 65 cmp %dil, %sil 66 jl .Lcopy_backward 67 subq $0x20, %rdx 68.Lcopy_forward_loop: 69 subq $0x20, %rdx 70 71 /* 72 * Move in blocks of 4x8 bytes: 73 */ 74 movq 0*8(%rsi), %r8 75 movq 1*8(%rsi), %r9 76 movq 2*8(%rsi), %r10 77 movq 3*8(%rsi), %r11 78 leaq 4*8(%rsi), %rsi 79 80 movq %r8, 0*8(%rdi) 81 movq %r9, 1*8(%rdi) 82 movq %r10, 2*8(%rdi) 83 movq %r11, 3*8(%rdi) 84 leaq 4*8(%rdi), %rdi 85 jae .Lcopy_forward_loop 86 addl $0x20, %edx 87 jmp .Lhandle_tail 88 89.Lcopy_backward: 90 /* 91 * Calculate copy position to tail. 92 */ 93 addq %rdx, %rsi 94 addq %rdx, %rdi 95 subq $0x20, %rdx 96 /* 97 * At most 3 ALU operations in one cycle, 98 * so append NOPS in the same 16 bytes trunk. 99 */ 100 .p2align 4 101.Lcopy_backward_loop: 102 subq $0x20, %rdx 103 movq -1*8(%rsi), %r8 104 movq -2*8(%rsi), %r9 105 movq -3*8(%rsi), %r10 106 movq -4*8(%rsi), %r11 107 leaq -4*8(%rsi), %rsi 108 movq %r8, -1*8(%rdi) 109 movq %r9, -2*8(%rdi) 110 movq %r10, -3*8(%rdi) 111 movq %r11, -4*8(%rdi) 112 leaq -4*8(%rdi), %rdi 113 jae .Lcopy_backward_loop 114 115 /* 116 * Calculate copy position to head. 117 */ 118 addl $0x20, %edx 119 subq %rdx, %rsi 120 subq %rdx, %rdi 121.Lhandle_tail: 122 cmpl $16, %edx 123 jb .Lless_16bytes 124 125 /* 126 * Move data from 16 bytes to 31 bytes. 127 */ 128 movq 0*8(%rsi), %r8 129 movq 1*8(%rsi), %r9 130 movq -2*8(%rsi, %rdx), %r10 131 movq -1*8(%rsi, %rdx), %r11 132 movq %r8, 0*8(%rdi) 133 movq %r9, 1*8(%rdi) 134 movq %r10, -2*8(%rdi, %rdx) 135 movq %r11, -1*8(%rdi, %rdx) 136 retq 137 .p2align 4 138.Lless_16bytes: 139 cmpl $8, %edx 140 jb .Lless_8bytes 141 /* 142 * Move data from 8 bytes to 15 bytes. 143 */ 144 movq 0*8(%rsi), %r8 145 movq -1*8(%rsi, %rdx), %r9 146 movq %r8, 0*8(%rdi) 147 movq %r9, -1*8(%rdi, %rdx) 148 retq 149 .p2align 4 150.Lless_8bytes: 151 cmpl $4, %edx 152 jb .Lless_3bytes 153 154 /* 155 * Move data from 4 bytes to 7 bytes. 156 */ 157 movl (%rsi), %ecx 158 movl -4(%rsi, %rdx), %r8d 159 movl %ecx, (%rdi) 160 movl %r8d, -4(%rdi, %rdx) 161 retq 162 .p2align 4 163.Lless_3bytes: 164 subl $1, %edx 165 jb .Lend 166 /* 167 * Move data from 1 bytes to 3 bytes. 168 */ 169 movzbl (%rsi), %ecx 170 jz .Lstore_1byte 171 movzbq 1(%rsi), %r8 172 movzbq (%rsi, %rdx), %r9 173 movb %r8b, 1(%rdi) 174 movb %r9b, (%rdi, %rdx) 175.Lstore_1byte: 176 movb %cl, (%rdi) 177 178.Lend: 179 retq 180ENDPROC(memcpy_orig) 181 182#ifndef CONFIG_UML 183/* 184 * memcpy_mcsafe_unrolled - memory copy with machine check exception handling 185 * Note that we only catch machine checks when reading the source addresses. 186 * Writes to target are posted and don't generate machine checks. 187 */ 188ENTRY(memcpy_mcsafe_unrolled) 189 cmpl $8, %edx 190 /* Less than 8 bytes? Go to byte copy loop */ 191 jb .L_no_whole_words 192 193 /* Check for bad alignment of source */ 194 testl $7, %esi 195 /* Already aligned */ 196 jz .L_8byte_aligned 197 198 /* Copy one byte at a time until source is 8-byte aligned */ 199 movl %esi, %ecx 200 andl $7, %ecx 201 subl $8, %ecx 202 negl %ecx 203 subl %ecx, %edx 204.L_copy_leading_bytes: 205 movb (%rsi), %al 206 movb %al, (%rdi) 207 incq %rsi 208 incq %rdi 209 decl %ecx 210 jnz .L_copy_leading_bytes 211 212.L_8byte_aligned: 213 /* Figure out how many whole cache lines (64-bytes) to copy */ 214 movl %edx, %ecx 215 andl $63, %edx 216 shrl $6, %ecx 217 jz .L_no_whole_cache_lines 218 219 /* Loop copying whole cache lines */ 220.L_cache_w0: movq (%rsi), %r8 221.L_cache_w1: movq 1*8(%rsi), %r9 222.L_cache_w2: movq 2*8(%rsi), %r10 223.L_cache_w3: movq 3*8(%rsi), %r11 224 movq %r8, (%rdi) 225 movq %r9, 1*8(%rdi) 226 movq %r10, 2*8(%rdi) 227 movq %r11, 3*8(%rdi) 228.L_cache_w4: movq 4*8(%rsi), %r8 229.L_cache_w5: movq 5*8(%rsi), %r9 230.L_cache_w6: movq 6*8(%rsi), %r10 231.L_cache_w7: movq 7*8(%rsi), %r11 232 movq %r8, 4*8(%rdi) 233 movq %r9, 5*8(%rdi) 234 movq %r10, 6*8(%rdi) 235 movq %r11, 7*8(%rdi) 236 leaq 64(%rsi), %rsi 237 leaq 64(%rdi), %rdi 238 decl %ecx 239 jnz .L_cache_w0 240 241 /* Are there any trailing 8-byte words? */ 242.L_no_whole_cache_lines: 243 movl %edx, %ecx 244 andl $7, %edx 245 shrl $3, %ecx 246 jz .L_no_whole_words 247 248 /* Copy trailing words */ 249.L_copy_trailing_words: 250 movq (%rsi), %r8 251 mov %r8, (%rdi) 252 leaq 8(%rsi), %rsi 253 leaq 8(%rdi), %rdi 254 decl %ecx 255 jnz .L_copy_trailing_words 256 257 /* Any trailing bytes? */ 258.L_no_whole_words: 259 andl %edx, %edx 260 jz .L_done_memcpy_trap 261 262 /* Copy trailing bytes */ 263 movl %edx, %ecx 264.L_copy_trailing_bytes: 265 movb (%rsi), %al 266 movb %al, (%rdi) 267 incq %rsi 268 incq %rdi 269 decl %ecx 270 jnz .L_copy_trailing_bytes 271 272 /* Copy successful. Return zero */ 273.L_done_memcpy_trap: 274 xorq %rax, %rax 275 ret 276ENDPROC(memcpy_mcsafe_unrolled) 277 278 .section .fixup, "ax" 279 /* Return -EFAULT for any failure */ 280.L_memcpy_mcsafe_fail: 281 mov $-EFAULT, %rax 282 ret 283 284 .previous 285 286 _ASM_EXTABLE_FAULT(.L_copy_leading_bytes, .L_memcpy_mcsafe_fail) 287 _ASM_EXTABLE_FAULT(.L_cache_w0, .L_memcpy_mcsafe_fail) 288 _ASM_EXTABLE_FAULT(.L_cache_w1, .L_memcpy_mcsafe_fail) 289 _ASM_EXTABLE_FAULT(.L_cache_w2, .L_memcpy_mcsafe_fail) 290 _ASM_EXTABLE_FAULT(.L_cache_w3, .L_memcpy_mcsafe_fail) 291 _ASM_EXTABLE_FAULT(.L_cache_w4, .L_memcpy_mcsafe_fail) 292 _ASM_EXTABLE_FAULT(.L_cache_w5, .L_memcpy_mcsafe_fail) 293 _ASM_EXTABLE_FAULT(.L_cache_w6, .L_memcpy_mcsafe_fail) 294 _ASM_EXTABLE_FAULT(.L_cache_w7, .L_memcpy_mcsafe_fail) 295 _ASM_EXTABLE_FAULT(.L_copy_trailing_words, .L_memcpy_mcsafe_fail) 296 _ASM_EXTABLE_FAULT(.L_copy_trailing_bytes, .L_memcpy_mcsafe_fail) 297#endif 298