1/*- 2 * Copyright (c) 2018 The FreeBSD Foundation 3 * 4 * This software was developed by Mateusz Guzik <mjg@FreeBSD.org> 5 * under sponsorship from the FreeBSD Foundation. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29#include <machine/asm.h> 30__FBSDID("$FreeBSD$"); 31 32#define ALIGN_TEXT .p2align 4,0x90 /* 16-byte alignment, nop filled */ 33 34/* 35 * memmove(dst, src, cnt) 36 * rdi, rsi, rdx 37 * Contains parts of bcopy written by: 38 * ws@tools.de (Wolfgang Solfrank, TooLs GmbH) +49-228-985800 39 */ 40 41/* 42 * Register state at entry is supposed to be as follows: 43 * rdi - destination 44 * rsi - source 45 * rdx - count 46 * 47 * The macro possibly clobbers the above and: rcx, r8. 48 * It does not clobber rax, r10 nor r11. 49 */ 50.macro MEMMOVE erms overlap begin end 51 \begin 52.if \overlap == 1 53 movq %rdi,%r8 54 subq %rsi,%r8 55 cmpq %rcx,%r8 /* overlapping && src < dst? */ 56 jb 2f 57.endif 58 59 cmpq $32,%rcx 60 jb 1016f 61 62 cmpq $256,%rcx 63 ja 1256f 64 651032: 66 movq (%rsi),%rdx 67 movq %rdx,(%rdi) 68 movq 8(%rsi),%rdx 69 movq %rdx,8(%rdi) 70 movq 16(%rsi),%rdx 71 movq %rdx,16(%rdi) 72 movq 24(%rsi),%rdx 73 movq %rdx,24(%rdi) 74 leaq 32(%rsi),%rsi 75 leaq 32(%rdi),%rdi 76 subq $32,%rcx 77 cmpq $32,%rcx 78 jae 1032b 79 cmpb $0,%cl 80 jne 1016f 81 \end 82 ret 83 ALIGN_TEXT 841016: 85 cmpb $16,%cl 86 jl 1008f 87 movq (%rsi),%rdx 88 movq %rdx,(%rdi) 89 movq 8(%rsi),%rdx 90 movq %rdx,8(%rdi) 91 subb $16,%cl 92 jz 1000f 93 leaq 16(%rsi),%rsi 94 leaq 16(%rdi),%rdi 951008: 96 cmpb $8,%cl 97 jl 1004f 98 movq (%rsi),%rdx 99 movq %rdx,(%rdi) 100 subb $8,%cl 101 jz 1000f 102 leaq 8(%rsi),%rsi 103 leaq 8(%rdi),%rdi 1041004: 105 cmpb $4,%cl 106 jl 1002f 107 movl (%rsi),%edx 108 movl %edx,(%rdi) 109 subb $4,%cl 110 jz 1000f 111 leaq 4(%rsi),%rsi 112 leaq 4(%rdi),%rdi 1131002: 114 cmpb $2,%cl 115 jl 1001f 116 movw (%rsi),%dx 117 movw %dx,(%rdi) 118 subb $2,%cl 119 jz 1000f 120 leaq 2(%rsi),%rsi 121 leaq 2(%rdi),%rdi 1221001: 123 cmpb $1,%cl 124 jl 1000f 125 movb (%rsi),%dl 126 movb %dl,(%rdi) 1271000: 128 \end 129 ret 130 131 ALIGN_TEXT 1321256: 133.if \erms == 1 134 rep 135 movsb 136.else 137 shrq $3,%rcx /* copy by 64-bit words */ 138 rep 139 movsq 140 movq %rdx,%rcx 141 andb $7,%cl /* any bytes left? */ 142 jne 1004b 143.endif 144 \end 145 ret 146 147.if \overlap == 1 148 /* 149 * Copy backwards. 150 */ 151 ALIGN_TEXT 1522: 153 addq %rcx,%rdi 154 addq %rcx,%rsi 155 156 cmpq $32,%rcx 157 jb 2016f 158 159 cmpq $256,%rcx 160 ja 2256f 161 1622032: 163 movq -8(%rsi),%rdx 164 movq %rdx,-8(%rdi) 165 movq -16(%rsi),%rdx 166 movq %rdx,-16(%rdi) 167 movq -24(%rsi),%rdx 168 movq %rdx,-24(%rdi) 169 movq -32(%rsi),%rdx 170 movq %rdx,-32(%rdi) 171 leaq -32(%rsi),%rsi 172 leaq -32(%rdi),%rdi 173 subq $32,%rcx 174 cmpq $32,%rcx 175 jae 2032b 176 cmpb $0,%cl 177 jne 2016f 178 \end 179 ret 180 ALIGN_TEXT 1812016: 182 cmpb $16,%cl 183 jl 2008f 184 movq -8(%rsi),%rdx 185 movq %rdx,-8(%rdi) 186 movq -16(%rsi),%rdx 187 movq %rdx,-16(%rdi) 188 subb $16,%cl 189 jz 2000f 190 leaq -16(%rsi),%rsi 191 leaq -16(%rdi),%rdi 1922008: 193 cmpb $8,%cl 194 jl 2004f 195 movq -8(%rsi),%rdx 196 movq %rdx,-8(%rdi) 197 subb $8,%cl 198 jz 2000f 199 leaq -8(%rsi),%rsi 200 leaq -8(%rdi),%rdi 2012004: 202 cmpb $4,%cl 203 jl 2002f 204 movl -4(%rsi),%edx 205 movl %edx,-4(%rdi) 206 subb $4,%cl 207 jz 2000f 208 leaq -4(%rsi),%rsi 209 leaq -4(%rdi),%rdi 2102002: 211 cmpb $2,%cl 212 jl 2001f 213 movw -2(%rsi),%dx 214 movw %dx,-2(%rdi) 215 subb $2,%cl 216 jz 2000f 217 leaq -2(%rsi),%rsi 218 leaq -2(%rdi),%rdi 2192001: 220 cmpb $1,%cl 221 jl 2000f 222 movb -1(%rsi),%dl 223 movb %dl,-1(%rdi) 2242000: 225 \end 226 ret 227 ALIGN_TEXT 2282256: 229 decq %rdi 230 decq %rsi 231 std 232.if \erms == 1 233 rep 234 movsb 235.else 236 andq $7,%rcx /* any fractional bytes? */ 237 je 3f 238 rep 239 movsb 2403: 241 movq %rdx,%rcx /* copy remainder by 32-bit words */ 242 shrq $3,%rcx 243 subq $7,%rsi 244 subq $7,%rdi 245 rep 246 movsq 247.endif 248 cld 249 \end 250 ret 251.endif 252.endm 253 254.macro MEMMOVE_BEGIN 255 movq %rdi,%rax 256 movq %rdx,%rcx 257.endm 258 259.macro MEMMOVE_END 260.endm 261 262#ifndef MEMCPY 263ENTRY(memmove) 264 MEMMOVE erms=0 overlap=1 begin=MEMMOVE_BEGIN end=MEMMOVE_END 265END(memmove) 266#else 267ENTRY(memcpy) 268 MEMMOVE erms=0 overlap=1 begin=MEMMOVE_BEGIN end=MEMMOVE_END 269END(memcpy) 270#endif 271