1/*- 2 * Copyright (c) 2018 The FreeBSD Foundation 3 * 4 * This software was developed by Mateusz Guzik <mjg@FreeBSD.org> 5 * under sponsorship from the FreeBSD Foundation. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29#include <machine/asm.h> 30__FBSDID("$FreeBSD$"); 31 32#define ALIGN_TEXT .p2align 4,0x90 /* 16-byte alignment, nop filled */ 33 34/* 35 * memmove(dst, src, cnt) 36 * rdi, rsi, rdx 37 */ 38 39/* 40 * Register state at entry is supposed to be as follows: 41 * rdi - destination 42 * rsi - source 43 * rdx - count 44 * 45 * The macro possibly clobbers the above and: rcx, r8, r9, 10 46 * It does not clobber rax nor r11. 47 */ 48.macro MEMMOVE erms overlap begin end 49 \begin 50 51 /* 52 * For sizes 0..32 all data is read before it is written, so there 53 * is no correctness issue with direction of copying. 54 */ 55 cmpq $32,%rcx 56 jbe 101632f 57 58.if \overlap == 1 59 movq %rdi,%r8 60 subq %rsi,%r8 61 cmpq %rcx,%r8 /* overlapping && src < dst? */ 62 jb 2f 63.endif 64 65 cmpq $256,%rcx 66 ja 1256f 67 68103200: 69 movq (%rsi),%rdx 70 movq %rdx,(%rdi) 71 movq 8(%rsi),%rdx 72 movq %rdx,8(%rdi) 73 movq 16(%rsi),%rdx 74 movq %rdx,16(%rdi) 75 movq 24(%rsi),%rdx 76 movq %rdx,24(%rdi) 77 leaq 32(%rsi),%rsi 78 leaq 32(%rdi),%rdi 79 subq $32,%rcx 80 cmpq $32,%rcx 81 jae 103200b 82 cmpb $0,%cl 83 jne 101632f 84 \end 85 ret 86 ALIGN_TEXT 87101632: 88 cmpb $16,%cl 89 jl 100816f 90 movq (%rsi),%rdx 91 movq 8(%rsi),%r8 92 movq -16(%rsi,%rcx),%r9 93 movq -8(%rsi,%rcx),%r10 94 movq %rdx,(%rdi) 95 movq %r8,8(%rdi) 96 movq %r9,-16(%rdi,%rcx) 97 movq %r10,-8(%rdi,%rcx) 98 \end 99 ret 100 ALIGN_TEXT 101100816: 102 cmpb $8,%cl 103 jl 100408f 104 movq (%rsi),%rdx 105 movq -8(%rsi,%rcx),%r8 106 movq %rdx,(%rdi) 107 movq %r8,-8(%rdi,%rcx,) 108 \end 109 ret 110 ALIGN_TEXT 111100408: 112 cmpb $4,%cl 113 jl 100204f 114 movl (%rsi),%edx 115 movl -4(%rsi,%rcx),%r8d 116 movl %edx,(%rdi) 117 movl %r8d,-4(%rdi,%rcx) 118 \end 119 ret 120 ALIGN_TEXT 121100204: 122 cmpb $2,%cl 123 jl 100001f 124 movzwl (%rsi),%edx 125 movzwl -2(%rsi,%rcx),%r8d 126 movw %dx,(%rdi) 127 movw %r8w,-2(%rdi,%rcx) 128 \end 129 ret 130 ALIGN_TEXT 131100001: 132 cmpb $1,%cl 133 jl 100000f 134 movb (%rsi),%dl 135 movb %dl,(%rdi) 136100000: 137 \end 138 ret 139 140 ALIGN_TEXT 1411256: 142 testb $15,%dil 143 jnz 100f 144.if \erms == 1 145 rep 146 movsb 147.else 148 shrq $3,%rcx /* copy by 64-bit words */ 149 rep 150 movsq 151 movq %rdx,%rcx 152 andl $7,%ecx /* any bytes left? */ 153 jne 100408b 154.endif 155 \end 156 ret 157100: 158 movq (%rsi),%r8 159 movq 8(%rsi),%r9 160 movq %rdi,%r10 161 movq %rdi,%rcx 162 andq $15,%rcx 163 leaq -16(%rdx,%rcx),%rdx 164 neg %rcx 165 leaq 16(%rdi,%rcx),%rdi 166 leaq 16(%rsi,%rcx),%rsi 167 movq %rdx,%rcx 168.if \erms == 1 169 rep 170 movsb 171 movq %r8,(%r10) 172 movq %r9,8(%r10) 173.else 174 shrq $3,%rcx /* copy by 64-bit words */ 175 rep 176 movsq 177 movq %r8,(%r10) 178 movq %r9,8(%r10) 179 movq %rdx,%rcx 180 andl $7,%ecx /* any bytes left? */ 181 jne 100408b 182.endif 183 \end 184 ret 185 186.if \overlap == 1 187 /* 188 * Copy backwards. 189 */ 190 ALIGN_TEXT 1912: 192 cmpq $256,%rcx 193 ja 2256f 194 195 leaq -8(%rdi,%rcx),%rdi 196 leaq -8(%rsi,%rcx),%rsi 197 198 cmpq $32,%rcx 199 jb 2016f 200 2012032: 202 movq (%rsi),%rdx 203 movq %rdx,(%rdi) 204 movq -8(%rsi),%rdx 205 movq %rdx,-8(%rdi) 206 movq -16(%rsi),%rdx 207 movq %rdx,-16(%rdi) 208 movq -24(%rsi),%rdx 209 movq %rdx,-24(%rdi) 210 leaq -32(%rsi),%rsi 211 leaq -32(%rdi),%rdi 212 subq $32,%rcx 213 cmpq $32,%rcx 214 jae 2032b 215 cmpb $0,%cl 216 jne 2016f 217 \end 218 ret 219 ALIGN_TEXT 2202016: 221 cmpb $16,%cl 222 jl 2008f 223 movq (%rsi),%rdx 224 movq %rdx,(%rdi) 225 movq -8(%rsi),%rdx 226 movq %rdx,-8(%rdi) 227 subb $16,%cl 228 jz 2000f 229 leaq -16(%rsi),%rsi 230 leaq -16(%rdi),%rdi 2312008: 232 cmpb $8,%cl 233 jl 2004f 234 movq (%rsi),%rdx 235 movq %rdx,(%rdi) 236 subb $8,%cl 237 jz 2000f 238 leaq -8(%rsi),%rsi 239 leaq -8(%rdi),%rdi 2402004: 241 cmpb $4,%cl 242 jl 2002f 243 movl 4(%rsi),%edx 244 movl %edx,4(%rdi) 245 subb $4,%cl 246 jz 2000f 247 leaq -4(%rsi),%rsi 248 leaq -4(%rdi),%rdi 2492002: 250 cmpb $2,%cl 251 jl 2001f 252 movw 6(%rsi),%dx 253 movw %dx,6(%rdi) 254 subb $2,%cl 255 jz 2000f 256 leaq -2(%rsi),%rsi 257 leaq -2(%rdi),%rdi 2582001: 259 cmpb $1,%cl 260 jl 2000f 261 movb 7(%rsi),%dl 262 movb %dl,7(%rdi) 2632000: 264 \end 265 ret 266 ALIGN_TEXT 2672256: 268 std 269.if \erms == 1 270 leaq -1(%rdi,%rcx),%rdi 271 leaq -1(%rsi,%rcx),%rsi 272 rep 273 movsb 274 cld 275.else 276 leaq -8(%rdi,%rcx),%rdi 277 leaq -8(%rsi,%rcx),%rsi 278 shrq $3,%rcx 279 rep 280 movsq 281 cld 282 movq %rdx,%rcx 283 andb $7,%cl 284 jne 2004b 285.endif 286 \end 287 ret 288.endif 289.endm 290 291 292.macro MEMMOVE_BEGIN 293 movq %rdi,%rax 294 movq %rdx,%rcx 295.endm 296 297.macro MEMMOVE_END 298.endm 299 300#ifndef MEMCPY 301ENTRY(memmove) 302 MEMMOVE erms=0 overlap=1 begin=MEMMOVE_BEGIN end=MEMMOVE_END 303END(memmove) 304#else 305ENTRY(memcpy) 306 MEMMOVE erms=0 overlap=1 begin=MEMMOVE_BEGIN end=MEMMOVE_END 307END(memcpy) 308#endif 309