xref: /freebsd/lib/libc/amd64/string/memmove.S (revision ddf6571230dd508f458982c911ba332da0fcbab4)
11e52ba8cSMateusz Guzik/*-
21e52ba8cSMateusz Guzik * Copyright (c) 2018 The FreeBSD Foundation
31e52ba8cSMateusz Guzik *
41e52ba8cSMateusz Guzik * This software was developed by Mateusz Guzik <mjg@FreeBSD.org>
51e52ba8cSMateusz Guzik * under sponsorship from the FreeBSD Foundation.
61e52ba8cSMateusz Guzik *
71e52ba8cSMateusz Guzik * Redistribution and use in source and binary forms, with or without
81e52ba8cSMateusz Guzik * modification, are permitted provided that the following conditions
91e52ba8cSMateusz Guzik * are met:
101e52ba8cSMateusz Guzik * 1. Redistributions of source code must retain the above copyright
111e52ba8cSMateusz Guzik *    notice, this list of conditions and the following disclaimer.
121e52ba8cSMateusz Guzik * 2. Redistributions in binary form must reproduce the above copyright
131e52ba8cSMateusz Guzik *    notice, this list of conditions and the following disclaimer in the
141e52ba8cSMateusz Guzik *    documentation and/or other materials provided with the distribution.
151e52ba8cSMateusz Guzik *
161e52ba8cSMateusz Guzik * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
171e52ba8cSMateusz Guzik * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
181e52ba8cSMateusz Guzik * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
191e52ba8cSMateusz Guzik * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
201e52ba8cSMateusz Guzik * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
211e52ba8cSMateusz Guzik * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
221e52ba8cSMateusz Guzik * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
231e52ba8cSMateusz Guzik * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
241e52ba8cSMateusz Guzik * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
251e52ba8cSMateusz Guzik * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
261e52ba8cSMateusz Guzik * SUCH DAMAGE.
271e52ba8cSMateusz Guzik */
2891c09a38SAlan Cox
291e52ba8cSMateusz Guzik#include <machine/asm.h>
301e52ba8cSMateusz Guzik__FBSDID("$FreeBSD$");
311e52ba8cSMateusz Guzik
321e52ba8cSMateusz Guzik#define	ALIGN_TEXT	.p2align 4,0x90 /* 16-byte alignment, nop filled */
331e52ba8cSMateusz Guzik
341e52ba8cSMateusz Guzik/*
351e52ba8cSMateusz Guzik * memmove(dst, src, cnt)
361e52ba8cSMateusz Guzik *         rdi, rsi, rdx
371e52ba8cSMateusz Guzik */
381e52ba8cSMateusz Guzik
391e52ba8cSMateusz Guzik/*
401e52ba8cSMateusz Guzik * Register state at entry is supposed to be as follows:
411e52ba8cSMateusz Guzik * rdi - destination
421e52ba8cSMateusz Guzik * rsi - source
431e52ba8cSMateusz Guzik * rdx - count
441e52ba8cSMateusz Guzik *
4594243af2SMateusz Guzik * The macro possibly clobbers the above and: rcx, r8, r9, 10
4694243af2SMateusz Guzik * It does not clobber rax nor r11.
471e52ba8cSMateusz Guzik */
481e52ba8cSMateusz Guzik.macro MEMMOVE erms overlap begin end
491e52ba8cSMateusz Guzik	\begin
5094243af2SMateusz Guzik
5194243af2SMateusz Guzik	/*
5294243af2SMateusz Guzik	 * For sizes 0..32 all data is read before it is written, so there
5394243af2SMateusz Guzik	 * is no correctness issue with direction of copying.
5494243af2SMateusz Guzik	 */
5594243af2SMateusz Guzik	cmpq	$32,%rcx
5694243af2SMateusz Guzik	jbe	101632f
5794243af2SMateusz Guzik
581e52ba8cSMateusz Guzik.if \overlap == 1
591e52ba8cSMateusz Guzik	movq	%rdi,%r8
601e52ba8cSMateusz Guzik	subq	%rsi,%r8
611e52ba8cSMateusz Guzik	cmpq	%rcx,%r8	/* overlapping && src < dst? */
621e52ba8cSMateusz Guzik	jb	2f
631e52ba8cSMateusz Guzik.endif
641e52ba8cSMateusz Guzik
651e52ba8cSMateusz Guzik	cmpq	$256,%rcx
661e52ba8cSMateusz Guzik	ja	1256f
671e52ba8cSMateusz Guzik
6894243af2SMateusz Guzik103200:
691e52ba8cSMateusz Guzik	movq	(%rsi),%rdx
701e52ba8cSMateusz Guzik	movq	%rdx,(%rdi)
711e52ba8cSMateusz Guzik	movq	8(%rsi),%rdx
721e52ba8cSMateusz Guzik	movq	%rdx,8(%rdi)
731e52ba8cSMateusz Guzik	movq	16(%rsi),%rdx
741e52ba8cSMateusz Guzik	movq	%rdx,16(%rdi)
751e52ba8cSMateusz Guzik	movq	24(%rsi),%rdx
761e52ba8cSMateusz Guzik	movq	%rdx,24(%rdi)
771e52ba8cSMateusz Guzik	leaq	32(%rsi),%rsi
781e52ba8cSMateusz Guzik	leaq	32(%rdi),%rdi
791e52ba8cSMateusz Guzik	subq	$32,%rcx
801e52ba8cSMateusz Guzik	cmpq	$32,%rcx
8194243af2SMateusz Guzik	jae	103200b
821e52ba8cSMateusz Guzik	cmpb	$0,%cl
8394243af2SMateusz Guzik	jne	101632f
841e52ba8cSMateusz Guzik	\end
851e52ba8cSMateusz Guzik	ret
861e52ba8cSMateusz Guzik	ALIGN_TEXT
8794243af2SMateusz Guzik101632:
881e52ba8cSMateusz Guzik	cmpb	$16,%cl
8994243af2SMateusz Guzik	jl	100816f
901e52ba8cSMateusz Guzik	movq	(%rsi),%rdx
9194243af2SMateusz Guzik	movq	8(%rsi),%r8
9294243af2SMateusz Guzik	movq	-16(%rsi,%rcx),%r9
9394243af2SMateusz Guzik	movq	-8(%rsi,%rcx),%r10
941e52ba8cSMateusz Guzik	movq	%rdx,(%rdi)
9594243af2SMateusz Guzik	movq	%r8,8(%rdi)
9694243af2SMateusz Guzik	movq	%r9,-16(%rdi,%rcx)
9794243af2SMateusz Guzik	movq	%r10,-8(%rdi,%rcx)
9894243af2SMateusz Guzik	\end
9994243af2SMateusz Guzik	ret
10094243af2SMateusz Guzik	ALIGN_TEXT
10194243af2SMateusz Guzik100816:
1021e52ba8cSMateusz Guzik	cmpb	$8,%cl
10394243af2SMateusz Guzik	jl	100408f
1041e52ba8cSMateusz Guzik	movq	(%rsi),%rdx
10594243af2SMateusz Guzik	movq	-8(%rsi,%rcx),%r8
1061e52ba8cSMateusz Guzik	movq	%rdx,(%rdi)
10794243af2SMateusz Guzik	movq	%r8,-8(%rdi,%rcx,)
10894243af2SMateusz Guzik	\end
10994243af2SMateusz Guzik	ret
11094243af2SMateusz Guzik	ALIGN_TEXT
11194243af2SMateusz Guzik100408:
1121e52ba8cSMateusz Guzik	cmpb	$4,%cl
11394243af2SMateusz Guzik	jl	100204f
1141e52ba8cSMateusz Guzik	movl	(%rsi),%edx
11594243af2SMateusz Guzik	movl	-4(%rsi,%rcx),%r8d
1161e52ba8cSMateusz Guzik	movl	%edx,(%rdi)
11794243af2SMateusz Guzik	movl	%r8d,-4(%rdi,%rcx)
11894243af2SMateusz Guzik	\end
11994243af2SMateusz Guzik	ret
12094243af2SMateusz Guzik	ALIGN_TEXT
12194243af2SMateusz Guzik100204:
1221e52ba8cSMateusz Guzik	cmpb	$2,%cl
12394243af2SMateusz Guzik	jl	100001f
12494243af2SMateusz Guzik	movzwl	(%rsi),%edx
12594243af2SMateusz Guzik	movzwl	-2(%rsi,%rcx),%r8d
1261e52ba8cSMateusz Guzik	movw	%dx,(%rdi)
12794243af2SMateusz Guzik	movw	%r8w,-2(%rdi,%rcx)
12894243af2SMateusz Guzik	\end
12994243af2SMateusz Guzik	ret
13094243af2SMateusz Guzik	ALIGN_TEXT
13194243af2SMateusz Guzik100001:
1321e52ba8cSMateusz Guzik	cmpb	$1,%cl
13394243af2SMateusz Guzik	jl	100000f
1341e52ba8cSMateusz Guzik	movb	(%rsi),%dl
1351e52ba8cSMateusz Guzik	movb	%dl,(%rdi)
13694243af2SMateusz Guzik100000:
1371e52ba8cSMateusz Guzik	\end
1381e52ba8cSMateusz Guzik	ret
1391e52ba8cSMateusz Guzik
1401e52ba8cSMateusz Guzik	ALIGN_TEXT
1411e52ba8cSMateusz Guzik1256:
142*ddf65712SMateusz Guzik	testb	$15,%dil
143*ddf65712SMateusz Guzik	jnz	100f
1441e52ba8cSMateusz Guzik.if \erms == 1
1451e52ba8cSMateusz Guzik	rep
1461e52ba8cSMateusz Guzik	movsb
1471e52ba8cSMateusz Guzik.else
1481e52ba8cSMateusz Guzik	shrq	$3,%rcx                         /* copy by 64-bit words */
1491e52ba8cSMateusz Guzik	rep
1501e52ba8cSMateusz Guzik	movsq
1511e52ba8cSMateusz Guzik	movq	%rdx,%rcx
15294243af2SMateusz Guzik	andl	$7,%ecx                         /* any bytes left? */
15394243af2SMateusz Guzik	jne	100408b
1541e52ba8cSMateusz Guzik.endif
1551e52ba8cSMateusz Guzik	\end
1561e52ba8cSMateusz Guzik	ret
157*ddf65712SMateusz Guzik100:
158*ddf65712SMateusz Guzik	movq	(%rsi),%r8
159*ddf65712SMateusz Guzik	movq	8(%rsi),%r9
160*ddf65712SMateusz Guzik	movq	%rdi,%r10
161*ddf65712SMateusz Guzik	movq	%rdi,%rcx
162*ddf65712SMateusz Guzik	andq	$15,%rcx
163*ddf65712SMateusz Guzik	leaq	-16(%rdx,%rcx),%rdx
164*ddf65712SMateusz Guzik	neg	%rcx
165*ddf65712SMateusz Guzik	leaq	16(%rdi,%rcx),%rdi
166*ddf65712SMateusz Guzik	leaq	16(%rsi,%rcx),%rsi
167*ddf65712SMateusz Guzik	movq	%rdx,%rcx
168*ddf65712SMateusz Guzik.if \erms == 1
169*ddf65712SMateusz Guzik	rep
170*ddf65712SMateusz Guzik	movsb
171*ddf65712SMateusz Guzik	movq	%r8,(%r10)
172*ddf65712SMateusz Guzik	movq	%r9,8(%r10)
173*ddf65712SMateusz Guzik.else
174*ddf65712SMateusz Guzik	shrq	$3,%rcx                         /* copy by 64-bit words */
175*ddf65712SMateusz Guzik	rep
176*ddf65712SMateusz Guzik	movsq
177*ddf65712SMateusz Guzik	movq	%r8,(%r10)
178*ddf65712SMateusz Guzik	movq	%r9,8(%r10)
179*ddf65712SMateusz Guzik	movq	%rdx,%rcx
180*ddf65712SMateusz Guzik	andl	$7,%ecx                         /* any bytes left? */
181*ddf65712SMateusz Guzik	jne	100408b
182*ddf65712SMateusz Guzik.endif
183*ddf65712SMateusz Guzik	\end
184*ddf65712SMateusz Guzik	ret
1851e52ba8cSMateusz Guzik
1861e52ba8cSMateusz Guzik.if \overlap == 1
1871e52ba8cSMateusz Guzik	/*
1881e52ba8cSMateusz Guzik	 * Copy backwards.
1891e52ba8cSMateusz Guzik	 */
1901e52ba8cSMateusz Guzik        ALIGN_TEXT
1911e52ba8cSMateusz Guzik2:
192dd219e5eSMateusz Guzik	cmpq	$256,%rcx
193dd219e5eSMateusz Guzik	ja	2256f
194dd219e5eSMateusz Guzik
195dd219e5eSMateusz Guzik	leaq	-8(%rdi,%rcx),%rdi
196dd219e5eSMateusz Guzik	leaq	-8(%rsi,%rcx),%rsi
1971e52ba8cSMateusz Guzik
1981e52ba8cSMateusz Guzik	cmpq	$32,%rcx
1991e52ba8cSMateusz Guzik	jb	2016f
2001e52ba8cSMateusz Guzik
2011e52ba8cSMateusz Guzik2032:
202dd219e5eSMateusz Guzik	movq	(%rsi),%rdx
203dd219e5eSMateusz Guzik	movq	%rdx,(%rdi)
2041e52ba8cSMateusz Guzik	movq	-8(%rsi),%rdx
2051e52ba8cSMateusz Guzik	movq	%rdx,-8(%rdi)
2061e52ba8cSMateusz Guzik	movq	-16(%rsi),%rdx
2071e52ba8cSMateusz Guzik	movq	%rdx,-16(%rdi)
2081e52ba8cSMateusz Guzik	movq	-24(%rsi),%rdx
2091e52ba8cSMateusz Guzik	movq	%rdx,-24(%rdi)
2101e52ba8cSMateusz Guzik	leaq	-32(%rsi),%rsi
2111e52ba8cSMateusz Guzik	leaq	-32(%rdi),%rdi
2121e52ba8cSMateusz Guzik	subq	$32,%rcx
2131e52ba8cSMateusz Guzik	cmpq	$32,%rcx
2141e52ba8cSMateusz Guzik	jae	2032b
2151e52ba8cSMateusz Guzik	cmpb	$0,%cl
2161e52ba8cSMateusz Guzik	jne	2016f
2171e52ba8cSMateusz Guzik	\end
2181e52ba8cSMateusz Guzik	ret
2191e52ba8cSMateusz Guzik	ALIGN_TEXT
2201e52ba8cSMateusz Guzik2016:
2211e52ba8cSMateusz Guzik	cmpb	$16,%cl
2221e52ba8cSMateusz Guzik	jl	2008f
223dd219e5eSMateusz Guzik	movq	(%rsi),%rdx
224dd219e5eSMateusz Guzik	movq	%rdx,(%rdi)
2251e52ba8cSMateusz Guzik	movq	-8(%rsi),%rdx
2261e52ba8cSMateusz Guzik	movq	%rdx,-8(%rdi)
2271e52ba8cSMateusz Guzik	subb	$16,%cl
2281e52ba8cSMateusz Guzik	jz	2000f
2291e52ba8cSMateusz Guzik	leaq	-16(%rsi),%rsi
2301e52ba8cSMateusz Guzik	leaq	-16(%rdi),%rdi
2311e52ba8cSMateusz Guzik2008:
2321e52ba8cSMateusz Guzik	cmpb	$8,%cl
2331e52ba8cSMateusz Guzik	jl	2004f
234dd219e5eSMateusz Guzik	movq	(%rsi),%rdx
235dd219e5eSMateusz Guzik	movq	%rdx,(%rdi)
2361e52ba8cSMateusz Guzik	subb	$8,%cl
2371e52ba8cSMateusz Guzik	jz	2000f
2381e52ba8cSMateusz Guzik	leaq	-8(%rsi),%rsi
2391e52ba8cSMateusz Guzik	leaq	-8(%rdi),%rdi
2401e52ba8cSMateusz Guzik2004:
2411e52ba8cSMateusz Guzik	cmpb	$4,%cl
2421e52ba8cSMateusz Guzik	jl	2002f
243dd219e5eSMateusz Guzik	movl	4(%rsi),%edx
244dd219e5eSMateusz Guzik	movl	%edx,4(%rdi)
2451e52ba8cSMateusz Guzik	subb	$4,%cl
2461e52ba8cSMateusz Guzik	jz	2000f
2471e52ba8cSMateusz Guzik	leaq	-4(%rsi),%rsi
2481e52ba8cSMateusz Guzik	leaq	-4(%rdi),%rdi
2491e52ba8cSMateusz Guzik2002:
2501e52ba8cSMateusz Guzik	cmpb	$2,%cl
2511e52ba8cSMateusz Guzik	jl	2001f
252dd219e5eSMateusz Guzik	movw	6(%rsi),%dx
253dd219e5eSMateusz Guzik	movw	%dx,6(%rdi)
2541e52ba8cSMateusz Guzik	subb	$2,%cl
2551e52ba8cSMateusz Guzik	jz	2000f
2561e52ba8cSMateusz Guzik	leaq	-2(%rsi),%rsi
2571e52ba8cSMateusz Guzik	leaq	-2(%rdi),%rdi
2581e52ba8cSMateusz Guzik2001:
2591e52ba8cSMateusz Guzik	cmpb	$1,%cl
2601e52ba8cSMateusz Guzik	jl	2000f
261dd219e5eSMateusz Guzik	movb	7(%rsi),%dl
262dd219e5eSMateusz Guzik	movb	%dl,7(%rdi)
2631e52ba8cSMateusz Guzik2000:
2641e52ba8cSMateusz Guzik	\end
2651e52ba8cSMateusz Guzik	ret
2661e52ba8cSMateusz Guzik	ALIGN_TEXT
2671e52ba8cSMateusz Guzik2256:
2681e52ba8cSMateusz Guzik	std
2691e52ba8cSMateusz Guzik.if \erms == 1
270dd219e5eSMateusz Guzik	leaq	-1(%rdi,%rcx),%rdi
271dd219e5eSMateusz Guzik	leaq	-1(%rsi,%rcx),%rsi
2721e52ba8cSMateusz Guzik	rep
2731e52ba8cSMateusz Guzik	movsb
274dd219e5eSMateusz Guzik	cld
2751e52ba8cSMateusz Guzik.else
276dd219e5eSMateusz Guzik	leaq	-8(%rdi,%rcx),%rdi
277dd219e5eSMateusz Guzik	leaq	-8(%rsi,%rcx),%rsi
2781e52ba8cSMateusz Guzik	shrq	$3,%rcx
2791e52ba8cSMateusz Guzik	rep
2801e52ba8cSMateusz Guzik	movsq
2811e52ba8cSMateusz Guzik	cld
282dd219e5eSMateusz Guzik	movq	%rdx,%rcx
283dd219e5eSMateusz Guzik	andb	$7,%cl
284dd219e5eSMateusz Guzik	jne	2004b
285dd219e5eSMateusz Guzik.endif
2861e52ba8cSMateusz Guzik	\end
2871e52ba8cSMateusz Guzik	ret
2881e52ba8cSMateusz Guzik.endif
2891e52ba8cSMateusz Guzik.endm
2901e52ba8cSMateusz Guzik
29194243af2SMateusz Guzik
2921e52ba8cSMateusz Guzik.macro MEMMOVE_BEGIN
2931e52ba8cSMateusz Guzik	movq	%rdi,%rax
2941e52ba8cSMateusz Guzik	movq	%rdx,%rcx
2951e52ba8cSMateusz Guzik.endm
2961e52ba8cSMateusz Guzik
2971e52ba8cSMateusz Guzik.macro MEMMOVE_END
2981e52ba8cSMateusz Guzik.endm
2991e52ba8cSMateusz Guzik
3001e52ba8cSMateusz Guzik#ifndef MEMCPY
3011e52ba8cSMateusz GuzikENTRY(memmove)
3021e52ba8cSMateusz Guzik	MEMMOVE erms=0 overlap=1 begin=MEMMOVE_BEGIN end=MEMMOVE_END
3031e52ba8cSMateusz GuzikEND(memmove)
3041e52ba8cSMateusz Guzik#else
3051e52ba8cSMateusz GuzikENTRY(memcpy)
3061e52ba8cSMateusz Guzik	MEMMOVE erms=0 overlap=1 begin=MEMMOVE_BEGIN end=MEMMOVE_END
3071e52ba8cSMateusz GuzikEND(memcpy)
3081e52ba8cSMateusz Guzik#endif
309