1/*- 2 * Copyright (c) 2018 The FreeBSD Foundation 3 * 4 * This software was developed by Mateusz Guzik <mjg@FreeBSD.org> 5 * under sponsorship from the FreeBSD Foundation. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29#include <machine/asm.h> 30__FBSDID("$FreeBSD$"); 31 32#define ALIGN_TEXT .p2align 4,0x90 /* 16-byte alignment, nop filled */ 33 34/* 35 * memmove(dst, src, cnt) 36 * rdi, rsi, rdx 37 */ 38 39/* 40 * Register state at entry is supposed to be as follows: 41 * rdi - destination 42 * rsi - source 43 * rdx - count 44 * 45 * The macro possibly clobbers the above and: rcx, r8, r9, 10 46 * It does not clobber rax nor r11. 47 */ 48.macro MEMMOVE erms overlap begin end 49 \begin 50 51 /* 52 * For sizes 0..32 all data is read before it is written, so there 53 * is no correctness issue with direction of copying. 54 */ 55 cmpq $32,%rcx 56 jbe 101632f 57 58.if \overlap == 1 59 movq %rdi,%r8 60 subq %rsi,%r8 61 cmpq %rcx,%r8 /* overlapping && src < dst? */ 62 jb 2f 63.endif 64 65 cmpq $256,%rcx 66 ja 1256f 67 68103200: 69 movq (%rsi),%rdx 70 movq %rdx,(%rdi) 71 movq 8(%rsi),%rdx 72 movq %rdx,8(%rdi) 73 movq 16(%rsi),%rdx 74 movq %rdx,16(%rdi) 75 movq 24(%rsi),%rdx 76 movq %rdx,24(%rdi) 77 leaq 32(%rsi),%rsi 78 leaq 32(%rdi),%rdi 79 subq $32,%rcx 80 cmpq $32,%rcx 81 jae 103200b 82 cmpb $0,%cl 83 jne 101632f 84 \end 85 ret 86 ALIGN_TEXT 87101632: 88 cmpb $16,%cl 89 jl 100816f 90 movq (%rsi),%rdx 91 movq 8(%rsi),%r8 92 movq -16(%rsi,%rcx),%r9 93 movq -8(%rsi,%rcx),%r10 94 movq %rdx,(%rdi) 95 movq %r8,8(%rdi) 96 movq %r9,-16(%rdi,%rcx) 97 movq %r10,-8(%rdi,%rcx) 98 \end 99 ret 100 ALIGN_TEXT 101100816: 102 cmpb $8,%cl 103 jl 100408f 104 movq (%rsi),%rdx 105 movq -8(%rsi,%rcx),%r8 106 movq %rdx,(%rdi) 107 movq %r8,-8(%rdi,%rcx,) 108 \end 109 ret 110 ALIGN_TEXT 111100408: 112 cmpb $4,%cl 113 jl 100204f 114 movl (%rsi),%edx 115 movl -4(%rsi,%rcx),%r8d 116 movl %edx,(%rdi) 117 movl %r8d,-4(%rdi,%rcx) 118 \end 119 ret 120 ALIGN_TEXT 121100204: 122 cmpb $2,%cl 123 jl 100001f 124 movzwl (%rsi),%edx 125 movzwl -2(%rsi,%rcx),%r8d 126 movw %dx,(%rdi) 127 movw %r8w,-2(%rdi,%rcx) 128 \end 129 ret 130 ALIGN_TEXT 131100001: 132 cmpb $1,%cl 133 jl 100000f 134 movb (%rsi),%dl 135 movb %dl,(%rdi) 136100000: 137 \end 138 ret 139 140 ALIGN_TEXT 1411256: 142.if \erms == 1 143 rep 144 movsb 145.else 146 shrq $3,%rcx /* copy by 64-bit words */ 147 rep 148 movsq 149 movq %rdx,%rcx 150 andl $7,%ecx /* any bytes left? */ 151 jne 100408b 152.endif 153 \end 154 ret 155 156.if \overlap == 1 157 /* 158 * Copy backwards. 159 */ 160 ALIGN_TEXT 1612: 162 cmpq $256,%rcx 163 ja 2256f 164 165 leaq -8(%rdi,%rcx),%rdi 166 leaq -8(%rsi,%rcx),%rsi 167 168 cmpq $32,%rcx 169 jb 2016f 170 1712032: 172 movq (%rsi),%rdx 173 movq %rdx,(%rdi) 174 movq -8(%rsi),%rdx 175 movq %rdx,-8(%rdi) 176 movq -16(%rsi),%rdx 177 movq %rdx,-16(%rdi) 178 movq -24(%rsi),%rdx 179 movq %rdx,-24(%rdi) 180 leaq -32(%rsi),%rsi 181 leaq -32(%rdi),%rdi 182 subq $32,%rcx 183 cmpq $32,%rcx 184 jae 2032b 185 cmpb $0,%cl 186 jne 2016f 187 \end 188 ret 189 ALIGN_TEXT 1902016: 191 cmpb $16,%cl 192 jl 2008f 193 movq (%rsi),%rdx 194 movq %rdx,(%rdi) 195 movq -8(%rsi),%rdx 196 movq %rdx,-8(%rdi) 197 subb $16,%cl 198 jz 2000f 199 leaq -16(%rsi),%rsi 200 leaq -16(%rdi),%rdi 2012008: 202 cmpb $8,%cl 203 jl 2004f 204 movq (%rsi),%rdx 205 movq %rdx,(%rdi) 206 subb $8,%cl 207 jz 2000f 208 leaq -8(%rsi),%rsi 209 leaq -8(%rdi),%rdi 2102004: 211 cmpb $4,%cl 212 jl 2002f 213 movl 4(%rsi),%edx 214 movl %edx,4(%rdi) 215 subb $4,%cl 216 jz 2000f 217 leaq -4(%rsi),%rsi 218 leaq -4(%rdi),%rdi 2192002: 220 cmpb $2,%cl 221 jl 2001f 222 movw 6(%rsi),%dx 223 movw %dx,6(%rdi) 224 subb $2,%cl 225 jz 2000f 226 leaq -2(%rsi),%rsi 227 leaq -2(%rdi),%rdi 2282001: 229 cmpb $1,%cl 230 jl 2000f 231 movb 7(%rsi),%dl 232 movb %dl,7(%rdi) 2332000: 234 \end 235 ret 236 ALIGN_TEXT 2372256: 238 std 239.if \erms == 1 240 leaq -1(%rdi,%rcx),%rdi 241 leaq -1(%rsi,%rcx),%rsi 242 rep 243 movsb 244 cld 245.else 246 leaq -8(%rdi,%rcx),%rdi 247 leaq -8(%rsi,%rcx),%rsi 248 shrq $3,%rcx 249 rep 250 movsq 251 cld 252 movq %rdx,%rcx 253 andb $7,%cl 254 jne 2004b 255.endif 256 \end 257 ret 258.endif 259.endm 260 261 262.macro MEMMOVE_BEGIN 263 movq %rdi,%rax 264 movq %rdx,%rcx 265.endm 266 267.macro MEMMOVE_END 268.endm 269 270#ifndef MEMCPY 271ENTRY(memmove) 272 MEMMOVE erms=0 overlap=1 begin=MEMMOVE_BEGIN end=MEMMOVE_END 273END(memmove) 274#else 275ENTRY(memcpy) 276 MEMMOVE erms=0 overlap=1 begin=MEMMOVE_BEGIN end=MEMMOVE_END 277END(memcpy) 278#endif 279