1/*- 2 * Copyright (c) 2018 The FreeBSD Foundation 3 * 4 * This software was developed by Mateusz Guzik <mjg@FreeBSD.org> 5 * under sponsorship from the FreeBSD Foundation. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29#include <machine/asm.h> 30__FBSDID("$FreeBSD$"); 31 32/* 33 * Note: this routine was written with kernel use in mind (read: no simd), 34 * it is only present in userspace as a temporary measure until something 35 * better gets imported. 36 */ 37 38#define ALIGN_TEXT .p2align 4,0x90 /* 16-byte alignment, nop filled */ 39 40/* 41 * memmove(dst, src, cnt) 42 * rdi, rsi, rdx 43 */ 44 45/* 46 * Register state at entry is supposed to be as follows: 47 * rdi - destination 48 * rsi - source 49 * rdx - count 50 * 51 * The macro possibly clobbers the above and: rcx, r8, r9, 10 52 * It does not clobber rax nor r11. 53 */ 54.macro MEMMOVE erms overlap begin end 55 \begin 56 57 /* 58 * For sizes 0..32 all data is read before it is written, so there 59 * is no correctness issue with direction of copying. 60 */ 61 cmpq $32,%rcx 62 jbe 101632f 63 64.if \overlap == 1 65 movq %rdi,%r8 66 subq %rsi,%r8 67 cmpq %rcx,%r8 /* overlapping && src < dst? */ 68 jb 2f 69.endif 70 71 cmpq $256,%rcx 72 ja 1256f 73 74 ALIGN_TEXT 75103200: 76 movq (%rsi),%rdx 77 movq %rdx,(%rdi) 78 movq 8(%rsi),%rdx 79 movq %rdx,8(%rdi) 80 movq 16(%rsi),%rdx 81 movq %rdx,16(%rdi) 82 movq 24(%rsi),%rdx 83 movq %rdx,24(%rdi) 84 leaq 32(%rsi),%rsi 85 leaq 32(%rdi),%rdi 86 subq $32,%rcx 87 cmpq $32,%rcx 88 jae 103200b 89 cmpb $0,%cl 90 jne 101632f 91 \end 92 ret 93 ALIGN_TEXT 94101632: 95 cmpb $16,%cl 96 jl 100816f 97 movq (%rsi),%rdx 98 movq 8(%rsi),%r8 99 movq -16(%rsi,%rcx),%r9 100 movq -8(%rsi,%rcx),%r10 101 movq %rdx,(%rdi) 102 movq %r8,8(%rdi) 103 movq %r9,-16(%rdi,%rcx) 104 movq %r10,-8(%rdi,%rcx) 105 \end 106 ret 107 ALIGN_TEXT 108100816: 109 cmpb $8,%cl 110 jl 100408f 111 movq (%rsi),%rdx 112 movq -8(%rsi,%rcx),%r8 113 movq %rdx,(%rdi) 114 movq %r8,-8(%rdi,%rcx,) 115 \end 116 ret 117 ALIGN_TEXT 118100408: 119 cmpb $4,%cl 120 jl 100204f 121 movl (%rsi),%edx 122 movl -4(%rsi,%rcx),%r8d 123 movl %edx,(%rdi) 124 movl %r8d,-4(%rdi,%rcx) 125 \end 126 ret 127 ALIGN_TEXT 128100204: 129 cmpb $2,%cl 130 jl 100001f 131 movzwl (%rsi),%edx 132 movzwl -2(%rsi,%rcx),%r8d 133 movw %dx,(%rdi) 134 movw %r8w,-2(%rdi,%rcx) 135 \end 136 ret 137 ALIGN_TEXT 138100001: 139 cmpb $1,%cl 140 jl 100000f 141 movb (%rsi),%dl 142 movb %dl,(%rdi) 143100000: 144 \end 145 ret 146 147 ALIGN_TEXT 1481256: 149 testb $15,%dil 150 jnz 100f 151.if \erms == 1 152 rep 153 movsb 154.else 155 shrq $3,%rcx /* copy by 64-bit words */ 156 rep 157 movsq 158 movq %rdx,%rcx 159 andl $7,%ecx /* any bytes left? */ 160 jne 100408b 161.endif 162 \end 163 ret 164100: 165 movq (%rsi),%r8 166 movq 8(%rsi),%r9 167 movq %rdi,%r10 168 movq %rdi,%rcx 169 andq $15,%rcx 170 leaq -16(%rdx,%rcx),%rdx 171 neg %rcx 172 leaq 16(%rdi,%rcx),%rdi 173 leaq 16(%rsi,%rcx),%rsi 174 movq %rdx,%rcx 175.if \erms == 1 176 rep 177 movsb 178 movq %r8,(%r10) 179 movq %r9,8(%r10) 180.else 181 shrq $3,%rcx /* copy by 64-bit words */ 182 rep 183 movsq 184 movq %r8,(%r10) 185 movq %r9,8(%r10) 186 movq %rdx,%rcx 187 andl $7,%ecx /* any bytes left? */ 188 jne 100408b 189.endif 190 \end 191 ret 192 193.if \overlap == 1 194 /* 195 * Copy backwards. 196 */ 197 ALIGN_TEXT 1982: 199 cmpq $256,%rcx 200 ja 2256f 201 202 leaq -8(%rdi,%rcx),%rdi 203 leaq -8(%rsi,%rcx),%rsi 204 205 cmpq $32,%rcx 206 jb 2016f 207 208 ALIGN_TEXT 2092032: 210 movq (%rsi),%rdx 211 movq %rdx,(%rdi) 212 movq -8(%rsi),%rdx 213 movq %rdx,-8(%rdi) 214 movq -16(%rsi),%rdx 215 movq %rdx,-16(%rdi) 216 movq -24(%rsi),%rdx 217 movq %rdx,-24(%rdi) 218 leaq -32(%rsi),%rsi 219 leaq -32(%rdi),%rdi 220 subq $32,%rcx 221 cmpq $32,%rcx 222 jae 2032b 223 cmpb $0,%cl 224 jne 2016f 225 \end 226 ret 227 ALIGN_TEXT 2282016: 229 cmpb $16,%cl 230 jl 2008f 231 movq (%rsi),%rdx 232 movq %rdx,(%rdi) 233 movq -8(%rsi),%rdx 234 movq %rdx,-8(%rdi) 235 subb $16,%cl 236 jz 2000f 237 leaq -16(%rsi),%rsi 238 leaq -16(%rdi),%rdi 2392008: 240 cmpb $8,%cl 241 jl 2004f 242 movq (%rsi),%rdx 243 movq %rdx,(%rdi) 244 subb $8,%cl 245 jz 2000f 246 leaq -8(%rsi),%rsi 247 leaq -8(%rdi),%rdi 2482004: 249 cmpb $4,%cl 250 jl 2002f 251 movl 4(%rsi),%edx 252 movl %edx,4(%rdi) 253 subb $4,%cl 254 jz 2000f 255 leaq -4(%rsi),%rsi 256 leaq -4(%rdi),%rdi 2572002: 258 cmpb $2,%cl 259 jl 2001f 260 movw 6(%rsi),%dx 261 movw %dx,6(%rdi) 262 subb $2,%cl 263 jz 2000f 264 leaq -2(%rsi),%rsi 265 leaq -2(%rdi),%rdi 2662001: 267 cmpb $1,%cl 268 jl 2000f 269 movb 7(%rsi),%dl 270 movb %dl,7(%rdi) 2712000: 272 \end 273 ret 274 ALIGN_TEXT 2752256: 276 std 277.if \erms == 1 278 leaq -1(%rdi,%rcx),%rdi 279 leaq -1(%rsi,%rcx),%rsi 280 rep 281 movsb 282 cld 283.else 284 leaq -8(%rdi,%rcx),%rdi 285 leaq -8(%rsi,%rcx),%rsi 286 shrq $3,%rcx 287 rep 288 movsq 289 cld 290 movq %rdx,%rcx 291 andb $7,%cl 292 jne 2004b 293.endif 294 \end 295 ret 296.endif 297.endm 298 299 300.macro MEMMOVE_BEGIN 301 movq %rdi,%rax 302 movq %rdx,%rcx 303.endm 304 305.macro MEMMOVE_END 306.endm 307 308#ifndef MEMCPY 309ENTRY(memmove) 310 MEMMOVE erms=0 overlap=1 begin=MEMMOVE_BEGIN end=MEMMOVE_END 311END(memmove) 312#else 313ENTRY(memcpy) 314 MEMMOVE erms=0 overlap=1 begin=MEMMOVE_BEGIN end=MEMMOVE_END 315END(memcpy) 316#endif 317