xref: /freebsd/lib/libc/amd64/string/memmove.S (revision ae507c25de5ab327fca6578d2bd015aa4792b20d)
11e52ba8cSMateusz Guzik/*-
21e52ba8cSMateusz Guzik * Copyright (c) 2018 The FreeBSD Foundation
31e52ba8cSMateusz Guzik *
41e52ba8cSMateusz Guzik * This software was developed by Mateusz Guzik <mjg@FreeBSD.org>
51e52ba8cSMateusz Guzik * under sponsorship from the FreeBSD Foundation.
61e52ba8cSMateusz Guzik *
71e52ba8cSMateusz Guzik * Redistribution and use in source and binary forms, with or without
81e52ba8cSMateusz Guzik * modification, are permitted provided that the following conditions
91e52ba8cSMateusz Guzik * are met:
101e52ba8cSMateusz Guzik * 1. Redistributions of source code must retain the above copyright
111e52ba8cSMateusz Guzik *    notice, this list of conditions and the following disclaimer.
121e52ba8cSMateusz Guzik * 2. Redistributions in binary form must reproduce the above copyright
131e52ba8cSMateusz Guzik *    notice, this list of conditions and the following disclaimer in the
141e52ba8cSMateusz Guzik *    documentation and/or other materials provided with the distribution.
151e52ba8cSMateusz Guzik *
161e52ba8cSMateusz Guzik * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
171e52ba8cSMateusz Guzik * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
181e52ba8cSMateusz Guzik * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
191e52ba8cSMateusz Guzik * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
201e52ba8cSMateusz Guzik * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
211e52ba8cSMateusz Guzik * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
221e52ba8cSMateusz Guzik * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
231e52ba8cSMateusz Guzik * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
241e52ba8cSMateusz Guzik * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
251e52ba8cSMateusz Guzik * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
261e52ba8cSMateusz Guzik * SUCH DAMAGE.
271e52ba8cSMateusz Guzik */
2891c09a38SAlan Cox
291e52ba8cSMateusz Guzik#include <machine/asm.h>
301e52ba8cSMateusz Guzik__FBSDID("$FreeBSD$");
311e52ba8cSMateusz Guzik
320db6aef4SMateusz Guzik/*
330db6aef4SMateusz Guzik * Note: this routine was written with kernel use in mind (read: no simd),
340db6aef4SMateusz Guzik * it is only present in userspace as a temporary measure until something
350db6aef4SMateusz Guzik * better gets imported.
360db6aef4SMateusz Guzik */
370db6aef4SMateusz Guzik
381e52ba8cSMateusz Guzik#define	ALIGN_TEXT	.p2align 4,0x90 /* 16-byte alignment, nop filled */
391e52ba8cSMateusz Guzik
401e52ba8cSMateusz Guzik/*
411e52ba8cSMateusz Guzik * memmove(dst, src, cnt)
421e52ba8cSMateusz Guzik *         rdi, rsi, rdx
431e52ba8cSMateusz Guzik */
441e52ba8cSMateusz Guzik
451e52ba8cSMateusz Guzik/*
461e52ba8cSMateusz Guzik * Register state at entry is supposed to be as follows:
471e52ba8cSMateusz Guzik * rdi - destination
481e52ba8cSMateusz Guzik * rsi - source
491e52ba8cSMateusz Guzik * rdx - count
501e52ba8cSMateusz Guzik *
5194243af2SMateusz Guzik * The macro possibly clobbers the above and: rcx, r8, r9, 10
5294243af2SMateusz Guzik * It does not clobber rax nor r11.
531e52ba8cSMateusz Guzik */
541e52ba8cSMateusz Guzik.macro MEMMOVE erms overlap begin end
551e52ba8cSMateusz Guzik	\begin
5694243af2SMateusz Guzik
5794243af2SMateusz Guzik	/*
5894243af2SMateusz Guzik	 * For sizes 0..32 all data is read before it is written, so there
5994243af2SMateusz Guzik	 * is no correctness issue with direction of copying.
6094243af2SMateusz Guzik	 */
6194243af2SMateusz Guzik	cmpq	$32,%rcx
6294243af2SMateusz Guzik	jbe	101632f
6394243af2SMateusz Guzik
641e52ba8cSMateusz Guzik.if \overlap == 1
651e52ba8cSMateusz Guzik	movq	%rdi,%r8
661e52ba8cSMateusz Guzik	subq	%rsi,%r8
671e52ba8cSMateusz Guzik	cmpq	%rcx,%r8	/* overlapping && src < dst? */
681e52ba8cSMateusz Guzik	jb	2f
691e52ba8cSMateusz Guzik.endif
701e52ba8cSMateusz Guzik
711e52ba8cSMateusz Guzik	cmpq	$256,%rcx
721e52ba8cSMateusz Guzik	ja	1256f
731e52ba8cSMateusz Guzik
74164c3b81SMateusz Guzik	ALIGN_TEXT
7594243af2SMateusz Guzik103200:
761e52ba8cSMateusz Guzik	movq	(%rsi),%rdx
771e52ba8cSMateusz Guzik	movq	%rdx,(%rdi)
781e52ba8cSMateusz Guzik	movq	8(%rsi),%rdx
791e52ba8cSMateusz Guzik	movq	%rdx,8(%rdi)
801e52ba8cSMateusz Guzik	movq	16(%rsi),%rdx
811e52ba8cSMateusz Guzik	movq	%rdx,16(%rdi)
821e52ba8cSMateusz Guzik	movq	24(%rsi),%rdx
831e52ba8cSMateusz Guzik	movq	%rdx,24(%rdi)
841e52ba8cSMateusz Guzik	leaq	32(%rsi),%rsi
851e52ba8cSMateusz Guzik	leaq	32(%rdi),%rdi
861e52ba8cSMateusz Guzik	subq	$32,%rcx
871e52ba8cSMateusz Guzik	cmpq	$32,%rcx
8894243af2SMateusz Guzik	jae	103200b
891e52ba8cSMateusz Guzik	cmpb	$0,%cl
9094243af2SMateusz Guzik	jne	101632f
911e52ba8cSMateusz Guzik	\end
921e52ba8cSMateusz Guzik	ret
931e52ba8cSMateusz Guzik	ALIGN_TEXT
9494243af2SMateusz Guzik101632:
951e52ba8cSMateusz Guzik	cmpb	$16,%cl
9694243af2SMateusz Guzik	jl	100816f
971e52ba8cSMateusz Guzik	movq	(%rsi),%rdx
9894243af2SMateusz Guzik	movq	8(%rsi),%r8
9994243af2SMateusz Guzik	movq	-16(%rsi,%rcx),%r9
10094243af2SMateusz Guzik	movq	-8(%rsi,%rcx),%r10
1011e52ba8cSMateusz Guzik	movq	%rdx,(%rdi)
10294243af2SMateusz Guzik	movq	%r8,8(%rdi)
10394243af2SMateusz Guzik	movq	%r9,-16(%rdi,%rcx)
10494243af2SMateusz Guzik	movq	%r10,-8(%rdi,%rcx)
10594243af2SMateusz Guzik	\end
10694243af2SMateusz Guzik	ret
10794243af2SMateusz Guzik	ALIGN_TEXT
10894243af2SMateusz Guzik100816:
1091e52ba8cSMateusz Guzik	cmpb	$8,%cl
11094243af2SMateusz Guzik	jl	100408f
1111e52ba8cSMateusz Guzik	movq	(%rsi),%rdx
11294243af2SMateusz Guzik	movq	-8(%rsi,%rcx),%r8
1131e52ba8cSMateusz Guzik	movq	%rdx,(%rdi)
11494243af2SMateusz Guzik	movq	%r8,-8(%rdi,%rcx,)
11594243af2SMateusz Guzik	\end
11694243af2SMateusz Guzik	ret
11794243af2SMateusz Guzik	ALIGN_TEXT
11894243af2SMateusz Guzik100408:
1191e52ba8cSMateusz Guzik	cmpb	$4,%cl
12094243af2SMateusz Guzik	jl	100204f
1211e52ba8cSMateusz Guzik	movl	(%rsi),%edx
12294243af2SMateusz Guzik	movl	-4(%rsi,%rcx),%r8d
1231e52ba8cSMateusz Guzik	movl	%edx,(%rdi)
12494243af2SMateusz Guzik	movl	%r8d,-4(%rdi,%rcx)
12594243af2SMateusz Guzik	\end
12694243af2SMateusz Guzik	ret
12794243af2SMateusz Guzik	ALIGN_TEXT
12894243af2SMateusz Guzik100204:
1291e52ba8cSMateusz Guzik	cmpb	$2,%cl
13094243af2SMateusz Guzik	jl	100001f
13194243af2SMateusz Guzik	movzwl	(%rsi),%edx
13294243af2SMateusz Guzik	movzwl	-2(%rsi,%rcx),%r8d
1331e52ba8cSMateusz Guzik	movw	%dx,(%rdi)
13494243af2SMateusz Guzik	movw	%r8w,-2(%rdi,%rcx)
13594243af2SMateusz Guzik	\end
13694243af2SMateusz Guzik	ret
13794243af2SMateusz Guzik	ALIGN_TEXT
13894243af2SMateusz Guzik100001:
1391e52ba8cSMateusz Guzik	cmpb	$1,%cl
14094243af2SMateusz Guzik	jl	100000f
1411e52ba8cSMateusz Guzik	movb	(%rsi),%dl
1421e52ba8cSMateusz Guzik	movb	%dl,(%rdi)
14394243af2SMateusz Guzik100000:
1441e52ba8cSMateusz Guzik	\end
1451e52ba8cSMateusz Guzik	ret
1461e52ba8cSMateusz Guzik
1471e52ba8cSMateusz Guzik	ALIGN_TEXT
1481e52ba8cSMateusz Guzik1256:
149ddf65712SMateusz Guzik	testb	$15,%dil
150ddf65712SMateusz Guzik	jnz	100f
1511e52ba8cSMateusz Guzik.if \erms == 1
1521e52ba8cSMateusz Guzik	rep
1531e52ba8cSMateusz Guzik	movsb
1541e52ba8cSMateusz Guzik.else
1551e52ba8cSMateusz Guzik	shrq	$3,%rcx                         /* copy by 64-bit words */
1561e52ba8cSMateusz Guzik	rep
1571e52ba8cSMateusz Guzik	movsq
1581e52ba8cSMateusz Guzik	movq	%rdx,%rcx
15994243af2SMateusz Guzik	andl	$7,%ecx                         /* any bytes left? */
16094243af2SMateusz Guzik	jne	100408b
1611e52ba8cSMateusz Guzik.endif
1621e52ba8cSMateusz Guzik	\end
1631e52ba8cSMateusz Guzik	ret
164ddf65712SMateusz Guzik100:
165ddf65712SMateusz Guzik	movq	(%rsi),%r8
166ddf65712SMateusz Guzik	movq	8(%rsi),%r9
167ddf65712SMateusz Guzik	movq	%rdi,%r10
168ddf65712SMateusz Guzik	movq	%rdi,%rcx
169ddf65712SMateusz Guzik	andq	$15,%rcx
170ddf65712SMateusz Guzik	leaq	-16(%rdx,%rcx),%rdx
171ddf65712SMateusz Guzik	neg	%rcx
172ddf65712SMateusz Guzik	leaq	16(%rdi,%rcx),%rdi
173ddf65712SMateusz Guzik	leaq	16(%rsi,%rcx),%rsi
174ddf65712SMateusz Guzik	movq	%rdx,%rcx
175ddf65712SMateusz Guzik.if \erms == 1
176ddf65712SMateusz Guzik	rep
177ddf65712SMateusz Guzik	movsb
178ddf65712SMateusz Guzik	movq	%r8,(%r10)
179ddf65712SMateusz Guzik	movq	%r9,8(%r10)
180ddf65712SMateusz Guzik.else
181ddf65712SMateusz Guzik	shrq	$3,%rcx                         /* copy by 64-bit words */
182ddf65712SMateusz Guzik	rep
183ddf65712SMateusz Guzik	movsq
184ddf65712SMateusz Guzik	movq	%r8,(%r10)
185ddf65712SMateusz Guzik	movq	%r9,8(%r10)
186ddf65712SMateusz Guzik	movq	%rdx,%rcx
187ddf65712SMateusz Guzik	andl	$7,%ecx                         /* any bytes left? */
188ddf65712SMateusz Guzik	jne	100408b
189ddf65712SMateusz Guzik.endif
190ddf65712SMateusz Guzik	\end
191ddf65712SMateusz Guzik	ret
1921e52ba8cSMateusz Guzik
1931e52ba8cSMateusz Guzik.if \overlap == 1
1941e52ba8cSMateusz Guzik	/*
1951e52ba8cSMateusz Guzik	 * Copy backwards.
1961e52ba8cSMateusz Guzik	 */
1971e52ba8cSMateusz Guzik        ALIGN_TEXT
1981e52ba8cSMateusz Guzik2:
199dd219e5eSMateusz Guzik	cmpq	$256,%rcx
200dd219e5eSMateusz Guzik	ja	2256f
201dd219e5eSMateusz Guzik
202dd219e5eSMateusz Guzik	leaq	-8(%rdi,%rcx),%rdi
203dd219e5eSMateusz Guzik	leaq	-8(%rsi,%rcx),%rsi
2041e52ba8cSMateusz Guzik
2051e52ba8cSMateusz Guzik	cmpq	$32,%rcx
2061e52ba8cSMateusz Guzik	jb	2016f
2071e52ba8cSMateusz Guzik
208164c3b81SMateusz Guzik	ALIGN_TEXT
2091e52ba8cSMateusz Guzik2032:
210dd219e5eSMateusz Guzik	movq	(%rsi),%rdx
211dd219e5eSMateusz Guzik	movq	%rdx,(%rdi)
2121e52ba8cSMateusz Guzik	movq	-8(%rsi),%rdx
2131e52ba8cSMateusz Guzik	movq	%rdx,-8(%rdi)
2141e52ba8cSMateusz Guzik	movq	-16(%rsi),%rdx
2151e52ba8cSMateusz Guzik	movq	%rdx,-16(%rdi)
2161e52ba8cSMateusz Guzik	movq	-24(%rsi),%rdx
2171e52ba8cSMateusz Guzik	movq	%rdx,-24(%rdi)
2181e52ba8cSMateusz Guzik	leaq	-32(%rsi),%rsi
2191e52ba8cSMateusz Guzik	leaq	-32(%rdi),%rdi
2201e52ba8cSMateusz Guzik	subq	$32,%rcx
2211e52ba8cSMateusz Guzik	cmpq	$32,%rcx
2221e52ba8cSMateusz Guzik	jae	2032b
2231e52ba8cSMateusz Guzik	cmpb	$0,%cl
2241e52ba8cSMateusz Guzik	jne	2016f
2251e52ba8cSMateusz Guzik	\end
2261e52ba8cSMateusz Guzik	ret
2271e52ba8cSMateusz Guzik	ALIGN_TEXT
2281e52ba8cSMateusz Guzik2016:
2291e52ba8cSMateusz Guzik	cmpb	$16,%cl
2301e52ba8cSMateusz Guzik	jl	2008f
231dd219e5eSMateusz Guzik	movq	(%rsi),%rdx
232dd219e5eSMateusz Guzik	movq	%rdx,(%rdi)
2331e52ba8cSMateusz Guzik	movq	-8(%rsi),%rdx
2341e52ba8cSMateusz Guzik	movq	%rdx,-8(%rdi)
2351e52ba8cSMateusz Guzik	subb	$16,%cl
2361e52ba8cSMateusz Guzik	jz	2000f
2371e52ba8cSMateusz Guzik	leaq	-16(%rsi),%rsi
2381e52ba8cSMateusz Guzik	leaq	-16(%rdi),%rdi
2391e52ba8cSMateusz Guzik2008:
2401e52ba8cSMateusz Guzik	cmpb	$8,%cl
2411e52ba8cSMateusz Guzik	jl	2004f
242dd219e5eSMateusz Guzik	movq	(%rsi),%rdx
243dd219e5eSMateusz Guzik	movq	%rdx,(%rdi)
2441e52ba8cSMateusz Guzik	subb	$8,%cl
2451e52ba8cSMateusz Guzik	jz	2000f
2461e52ba8cSMateusz Guzik	leaq	-8(%rsi),%rsi
2471e52ba8cSMateusz Guzik	leaq	-8(%rdi),%rdi
2481e52ba8cSMateusz Guzik2004:
2491e52ba8cSMateusz Guzik	cmpb	$4,%cl
2501e52ba8cSMateusz Guzik	jl	2002f
251dd219e5eSMateusz Guzik	movl	4(%rsi),%edx
252dd219e5eSMateusz Guzik	movl	%edx,4(%rdi)
2531e52ba8cSMateusz Guzik	subb	$4,%cl
2541e52ba8cSMateusz Guzik	jz	2000f
2551e52ba8cSMateusz Guzik	leaq	-4(%rsi),%rsi
2561e52ba8cSMateusz Guzik	leaq	-4(%rdi),%rdi
2571e52ba8cSMateusz Guzik2002:
2581e52ba8cSMateusz Guzik	cmpb	$2,%cl
2591e52ba8cSMateusz Guzik	jl	2001f
260dd219e5eSMateusz Guzik	movw	6(%rsi),%dx
261dd219e5eSMateusz Guzik	movw	%dx,6(%rdi)
2621e52ba8cSMateusz Guzik	subb	$2,%cl
2631e52ba8cSMateusz Guzik	jz	2000f
2641e52ba8cSMateusz Guzik	leaq	-2(%rsi),%rsi
2651e52ba8cSMateusz Guzik	leaq	-2(%rdi),%rdi
2661e52ba8cSMateusz Guzik2001:
2671e52ba8cSMateusz Guzik	cmpb	$1,%cl
2681e52ba8cSMateusz Guzik	jl	2000f
269dd219e5eSMateusz Guzik	movb	7(%rsi),%dl
270dd219e5eSMateusz Guzik	movb	%dl,7(%rdi)
2711e52ba8cSMateusz Guzik2000:
2721e52ba8cSMateusz Guzik	\end
2731e52ba8cSMateusz Guzik	ret
2741e52ba8cSMateusz Guzik	ALIGN_TEXT
2751e52ba8cSMateusz Guzik2256:
2761e52ba8cSMateusz Guzik	std
277dd219e5eSMateusz Guzik	leaq	-8(%rdi,%rcx),%rdi
278dd219e5eSMateusz Guzik	leaq	-8(%rsi,%rcx),%rsi
2791e52ba8cSMateusz Guzik	shrq	$3,%rcx
2801e52ba8cSMateusz Guzik	rep
2811e52ba8cSMateusz Guzik	movsq
2821e52ba8cSMateusz Guzik	cld
283dd219e5eSMateusz Guzik	movq	%rdx,%rcx
284dd219e5eSMateusz Guzik	andb	$7,%cl
285dd219e5eSMateusz Guzik	jne	2004b
2861e52ba8cSMateusz Guzik	\end
2871e52ba8cSMateusz Guzik	ret
2881e52ba8cSMateusz Guzik.endif
2891e52ba8cSMateusz Guzik.endm
2901e52ba8cSMateusz Guzik
29194243af2SMateusz Guzik
2921e52ba8cSMateusz Guzik.macro MEMMOVE_BEGIN
2931e52ba8cSMateusz Guzik	movq	%rdi,%rax
2941e52ba8cSMateusz Guzik	movq	%rdx,%rcx
2951e52ba8cSMateusz Guzik.endm
2961e52ba8cSMateusz Guzik
2971e52ba8cSMateusz Guzik.macro MEMMOVE_END
2981e52ba8cSMateusz Guzik.endm
2991e52ba8cSMateusz Guzik
3001e52ba8cSMateusz Guzik#ifndef MEMCPY
3011e52ba8cSMateusz GuzikENTRY(memmove)
3021e52ba8cSMateusz Guzik	MEMMOVE erms=0 overlap=1 begin=MEMMOVE_BEGIN end=MEMMOVE_END
3031e52ba8cSMateusz GuzikEND(memmove)
3041e52ba8cSMateusz Guzik#else
3051e52ba8cSMateusz GuzikENTRY(memcpy)
3061e52ba8cSMateusz Guzik	MEMMOVE erms=0 overlap=1 begin=MEMMOVE_BEGIN end=MEMMOVE_END
3071e52ba8cSMateusz GuzikEND(memcpy)
3081e52ba8cSMateusz Guzik#endif
309*ae507c25SKonstantin Belousov
310*ae507c25SKonstantin Belousov	.section .note.GNU-stack,"",%progbits
311