1/*- 2 * Copyright (c) 2018 The FreeBSD Foundation 3 * 4 * This software was developed by Mateusz Guzik <mjg@FreeBSD.org> 5 * under sponsorship from the FreeBSD Foundation. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29#include <machine/asm.h> 30__FBSDID("$FreeBSD$"); 31 32#define ALIGN_TEXT .p2align 4,0x90 /* 16-byte alignment, nop filled */ 33 34/* 35 * memmove(dst, src, cnt) 36 * rdi, rsi, rdx 37 */ 38 39/* 40 * Register state at entry is supposed to be as follows: 41 * rdi - destination 42 * rsi - source 43 * rdx - count 44 * 45 * The macro possibly clobbers the above and: rcx, r8. 46 * It does not clobber rax, r10 nor r11. 47 */ 48.macro MEMMOVE erms overlap begin end 49 \begin 50.if \overlap == 1 51 movq %rdi,%r8 52 subq %rsi,%r8 53 cmpq %rcx,%r8 /* overlapping && src < dst? */ 54 jb 2f 55.endif 56 57 cmpq $32,%rcx 58 jb 1016f 59 60 cmpq $256,%rcx 61 ja 1256f 62 631032: 64 movq (%rsi),%rdx 65 movq %rdx,(%rdi) 66 movq 8(%rsi),%rdx 67 movq %rdx,8(%rdi) 68 movq 16(%rsi),%rdx 69 movq %rdx,16(%rdi) 70 movq 24(%rsi),%rdx 71 movq %rdx,24(%rdi) 72 leaq 32(%rsi),%rsi 73 leaq 32(%rdi),%rdi 74 subq $32,%rcx 75 cmpq $32,%rcx 76 jae 1032b 77 cmpb $0,%cl 78 jne 1016f 79 \end 80 ret 81 ALIGN_TEXT 821016: 83 cmpb $16,%cl 84 jl 1008f 85 movq (%rsi),%rdx 86 movq %rdx,(%rdi) 87 movq 8(%rsi),%rdx 88 movq %rdx,8(%rdi) 89 subb $16,%cl 90 jz 1000f 91 leaq 16(%rsi),%rsi 92 leaq 16(%rdi),%rdi 931008: 94 cmpb $8,%cl 95 jl 1004f 96 movq (%rsi),%rdx 97 movq %rdx,(%rdi) 98 subb $8,%cl 99 jz 1000f 100 leaq 8(%rsi),%rsi 101 leaq 8(%rdi),%rdi 1021004: 103 cmpb $4,%cl 104 jl 1002f 105 movl (%rsi),%edx 106 movl %edx,(%rdi) 107 subb $4,%cl 108 jz 1000f 109 leaq 4(%rsi),%rsi 110 leaq 4(%rdi),%rdi 1111002: 112 cmpb $2,%cl 113 jl 1001f 114 movw (%rsi),%dx 115 movw %dx,(%rdi) 116 subb $2,%cl 117 jz 1000f 118 leaq 2(%rsi),%rsi 119 leaq 2(%rdi),%rdi 1201001: 121 cmpb $1,%cl 122 jl 1000f 123 movb (%rsi),%dl 124 movb %dl,(%rdi) 1251000: 126 \end 127 ret 128 129 ALIGN_TEXT 1301256: 131.if \erms == 1 132 rep 133 movsb 134.else 135 shrq $3,%rcx /* copy by 64-bit words */ 136 rep 137 movsq 138 movq %rdx,%rcx 139 andb $7,%cl /* any bytes left? */ 140 jne 1004b 141.endif 142 \end 143 ret 144 145.if \overlap == 1 146 /* 147 * Copy backwards. 148 */ 149 ALIGN_TEXT 1502: 151 cmpq $256,%rcx 152 ja 2256f 153 154 leaq -8(%rdi,%rcx),%rdi 155 leaq -8(%rsi,%rcx),%rsi 156 157 cmpq $32,%rcx 158 jb 2016f 159 1602032: 161 movq (%rsi),%rdx 162 movq %rdx,(%rdi) 163 movq -8(%rsi),%rdx 164 movq %rdx,-8(%rdi) 165 movq -16(%rsi),%rdx 166 movq %rdx,-16(%rdi) 167 movq -24(%rsi),%rdx 168 movq %rdx,-24(%rdi) 169 leaq -32(%rsi),%rsi 170 leaq -32(%rdi),%rdi 171 subq $32,%rcx 172 cmpq $32,%rcx 173 jae 2032b 174 cmpb $0,%cl 175 jne 2016f 176 \end 177 ret 178 ALIGN_TEXT 1792016: 180 cmpb $16,%cl 181 jl 2008f 182 movq (%rsi),%rdx 183 movq %rdx,(%rdi) 184 movq -8(%rsi),%rdx 185 movq %rdx,-8(%rdi) 186 subb $16,%cl 187 jz 2000f 188 leaq -16(%rsi),%rsi 189 leaq -16(%rdi),%rdi 1902008: 191 cmpb $8,%cl 192 jl 2004f 193 movq (%rsi),%rdx 194 movq %rdx,(%rdi) 195 subb $8,%cl 196 jz 2000f 197 leaq -8(%rsi),%rsi 198 leaq -8(%rdi),%rdi 1992004: 200 cmpb $4,%cl 201 jl 2002f 202 movl 4(%rsi),%edx 203 movl %edx,4(%rdi) 204 subb $4,%cl 205 jz 2000f 206 leaq -4(%rsi),%rsi 207 leaq -4(%rdi),%rdi 2082002: 209 cmpb $2,%cl 210 jl 2001f 211 movw 6(%rsi),%dx 212 movw %dx,6(%rdi) 213 subb $2,%cl 214 jz 2000f 215 leaq -2(%rsi),%rsi 216 leaq -2(%rdi),%rdi 2172001: 218 cmpb $1,%cl 219 jl 2000f 220 movb 7(%rsi),%dl 221 movb %dl,7(%rdi) 2222000: 223 \end 224 ret 225 ALIGN_TEXT 2262256: 227 std 228.if \erms == 1 229 leaq -1(%rdi,%rcx),%rdi 230 leaq -1(%rsi,%rcx),%rsi 231 rep 232 movsb 233 cld 234.else 235 leaq -8(%rdi,%rcx),%rdi 236 leaq -8(%rsi,%rcx),%rsi 237 shrq $3,%rcx 238 rep 239 movsq 240 cld 241 movq %rdx,%rcx 242 andb $7,%cl 243 jne 2004b 244.endif 245 \end 246 ret 247.endif 248.endm 249 250.macro MEMMOVE_BEGIN 251 movq %rdi,%rax 252 movq %rdx,%rcx 253.endm 254 255.macro MEMMOVE_END 256.endm 257 258#ifndef MEMCPY 259ENTRY(memmove) 260 MEMMOVE erms=0 overlap=1 begin=MEMMOVE_BEGIN end=MEMMOVE_END 261END(memmove) 262#else 263ENTRY(memcpy) 264 MEMMOVE erms=0 overlap=1 begin=MEMMOVE_BEGIN end=MEMMOVE_END 265END(memcpy) 266#endif 267