1/* 2 * Normally compiler builtins are used, but sometimes the compiler calls out 3 * of line code. Based on asm-i386/string.h. 4 * 5 * This assembly file is re-written from memmove_64.c file. 6 * - Copyright 2011 Fenghua Yu <fenghua.yu@intel.com> 7 */ 8#define _STRING_C 9#include <linux/linkage.h> 10#include <asm/dwarf2.h> 11#include <asm/cpufeature.h> 12 13#undef memmove 14 15/* 16 * Implement memmove(). This can handle overlap between src and dst. 17 * 18 * Input: 19 * rdi: dest 20 * rsi: src 21 * rdx: count 22 * 23 * Output: 24 * rax: dest 25 */ 26ENTRY(memmove) 27 CFI_STARTPROC 28 29 /* Handle more 32bytes in loop */ 30 mov %rdi, %rax 31 cmp $0x20, %rdx 32 jb 1f 33 34 /* Decide forward/backward copy mode */ 35 cmp %rdi, %rsi 36 jge .Lmemmove_begin_forward 37 mov %rsi, %r8 38 add %rdx, %r8 39 cmp %rdi, %r8 40 jg 2f 41 42.Lmemmove_begin_forward: 43 /* 44 * movsq instruction have many startup latency 45 * so we handle small size by general register. 46 */ 47 cmp $680, %rdx 48 jb 3f 49 /* 50 * movsq instruction is only good for aligned case. 51 */ 52 53 cmpb %dil, %sil 54 je 4f 553: 56 sub $0x20, %rdx 57 /* 58 * We gobble 32byts forward in each loop. 59 */ 605: 61 sub $0x20, %rdx 62 movq 0*8(%rsi), %r11 63 movq 1*8(%rsi), %r10 64 movq 2*8(%rsi), %r9 65 movq 3*8(%rsi), %r8 66 leaq 4*8(%rsi), %rsi 67 68 movq %r11, 0*8(%rdi) 69 movq %r10, 1*8(%rdi) 70 movq %r9, 2*8(%rdi) 71 movq %r8, 3*8(%rdi) 72 leaq 4*8(%rdi), %rdi 73 jae 5b 74 addq $0x20, %rdx 75 jmp 1f 76 /* 77 * Handle data forward by movsq. 78 */ 79 .p2align 4 804: 81 movq %rdx, %rcx 82 movq -8(%rsi, %rdx), %r11 83 lea -8(%rdi, %rdx), %r10 84 shrq $3, %rcx 85 rep movsq 86 movq %r11, (%r10) 87 jmp 13f 88.Lmemmove_end_forward: 89 90 /* 91 * Handle data backward by movsq. 92 */ 93 .p2align 4 947: 95 movq %rdx, %rcx 96 movq (%rsi), %r11 97 movq %rdi, %r10 98 leaq -8(%rsi, %rdx), %rsi 99 leaq -8(%rdi, %rdx), %rdi 100 shrq $3, %rcx 101 std 102 rep movsq 103 cld 104 movq %r11, (%r10) 105 jmp 13f 106 107 /* 108 * Start to prepare for backward copy. 109 */ 110 .p2align 4 1112: 112 cmp $680, %rdx 113 jb 6f 114 cmp %dil, %sil 115 je 7b 1166: 117 /* 118 * Calculate copy position to tail. 119 */ 120 addq %rdx, %rsi 121 addq %rdx, %rdi 122 subq $0x20, %rdx 123 /* 124 * We gobble 32byts backward in each loop. 125 */ 1268: 127 subq $0x20, %rdx 128 movq -1*8(%rsi), %r11 129 movq -2*8(%rsi), %r10 130 movq -3*8(%rsi), %r9 131 movq -4*8(%rsi), %r8 132 leaq -4*8(%rsi), %rsi 133 134 movq %r11, -1*8(%rdi) 135 movq %r10, -2*8(%rdi) 136 movq %r9, -3*8(%rdi) 137 movq %r8, -4*8(%rdi) 138 leaq -4*8(%rdi), %rdi 139 jae 8b 140 /* 141 * Calculate copy position to head. 142 */ 143 addq $0x20, %rdx 144 subq %rdx, %rsi 145 subq %rdx, %rdi 1461: 147 cmpq $16, %rdx 148 jb 9f 149 /* 150 * Move data from 16 bytes to 31 bytes. 151 */ 152 movq 0*8(%rsi), %r11 153 movq 1*8(%rsi), %r10 154 movq -2*8(%rsi, %rdx), %r9 155 movq -1*8(%rsi, %rdx), %r8 156 movq %r11, 0*8(%rdi) 157 movq %r10, 1*8(%rdi) 158 movq %r9, -2*8(%rdi, %rdx) 159 movq %r8, -1*8(%rdi, %rdx) 160 jmp 13f 161 .p2align 4 1629: 163 cmpq $8, %rdx 164 jb 10f 165 /* 166 * Move data from 8 bytes to 15 bytes. 167 */ 168 movq 0*8(%rsi), %r11 169 movq -1*8(%rsi, %rdx), %r10 170 movq %r11, 0*8(%rdi) 171 movq %r10, -1*8(%rdi, %rdx) 172 jmp 13f 17310: 174 cmpq $4, %rdx 175 jb 11f 176 /* 177 * Move data from 4 bytes to 7 bytes. 178 */ 179 movl (%rsi), %r11d 180 movl -4(%rsi, %rdx), %r10d 181 movl %r11d, (%rdi) 182 movl %r10d, -4(%rdi, %rdx) 183 jmp 13f 18411: 185 cmp $2, %rdx 186 jb 12f 187 /* 188 * Move data from 2 bytes to 3 bytes. 189 */ 190 movw (%rsi), %r11w 191 movw -2(%rsi, %rdx), %r10w 192 movw %r11w, (%rdi) 193 movw %r10w, -2(%rdi, %rdx) 194 jmp 13f 19512: 196 cmp $1, %rdx 197 jb 13f 198 /* 199 * Move data for 1 byte. 200 */ 201 movb (%rsi), %r11b 202 movb %r11b, (%rdi) 20313: 204 retq 205 CFI_ENDPROC 206 207 .section .altinstr_replacement,"ax" 208.Lmemmove_begin_forward_efs: 209 /* Forward moving data. */ 210 movq %rdx, %rcx 211 rep movsb 212 retq 213.Lmemmove_end_forward_efs: 214 .previous 215 216 .section .altinstructions,"a" 217 .align 8 218 .quad .Lmemmove_begin_forward 219 .quad .Lmemmove_begin_forward_efs 220 .word X86_FEATURE_ERMS 221 .byte .Lmemmove_end_forward-.Lmemmove_begin_forward 222 .byte .Lmemmove_end_forward_efs-.Lmemmove_begin_forward_efs 223 .previous 224ENDPROC(memmove) 225