xref: /freebsd/lib/libc/amd64/string/memmove.S (revision dd219e5ea5027785225b914cc18a9c94ffece1a4)
11e52ba8cSMateusz Guzik/*-
21e52ba8cSMateusz Guzik * Copyright (c) 2018 The FreeBSD Foundation
31e52ba8cSMateusz Guzik *
41e52ba8cSMateusz Guzik * This software was developed by Mateusz Guzik <mjg@FreeBSD.org>
51e52ba8cSMateusz Guzik * under sponsorship from the FreeBSD Foundation.
61e52ba8cSMateusz Guzik *
71e52ba8cSMateusz Guzik * Redistribution and use in source and binary forms, with or without
81e52ba8cSMateusz Guzik * modification, are permitted provided that the following conditions
91e52ba8cSMateusz Guzik * are met:
101e52ba8cSMateusz Guzik * 1. Redistributions of source code must retain the above copyright
111e52ba8cSMateusz Guzik *    notice, this list of conditions and the following disclaimer.
121e52ba8cSMateusz Guzik * 2. Redistributions in binary form must reproduce the above copyright
131e52ba8cSMateusz Guzik *    notice, this list of conditions and the following disclaimer in the
141e52ba8cSMateusz Guzik *    documentation and/or other materials provided with the distribution.
151e52ba8cSMateusz Guzik *
161e52ba8cSMateusz Guzik * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
171e52ba8cSMateusz Guzik * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
181e52ba8cSMateusz Guzik * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
191e52ba8cSMateusz Guzik * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
201e52ba8cSMateusz Guzik * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
211e52ba8cSMateusz Guzik * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
221e52ba8cSMateusz Guzik * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
231e52ba8cSMateusz Guzik * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
241e52ba8cSMateusz Guzik * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
251e52ba8cSMateusz Guzik * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
261e52ba8cSMateusz Guzik * SUCH DAMAGE.
271e52ba8cSMateusz Guzik */
2891c09a38SAlan Cox
291e52ba8cSMateusz Guzik#include <machine/asm.h>
301e52ba8cSMateusz Guzik__FBSDID("$FreeBSD$");
311e52ba8cSMateusz Guzik
321e52ba8cSMateusz Guzik#define	ALIGN_TEXT	.p2align 4,0x90 /* 16-byte alignment, nop filled */
331e52ba8cSMateusz Guzik
341e52ba8cSMateusz Guzik/*
351e52ba8cSMateusz Guzik * memmove(dst, src, cnt)
361e52ba8cSMateusz Guzik *         rdi, rsi, rdx
371e52ba8cSMateusz Guzik * Contains parts of bcopy written by:
381e52ba8cSMateusz Guzik *  ws@tools.de     (Wolfgang Solfrank, TooLs GmbH) +49-228-985800
391e52ba8cSMateusz Guzik */
401e52ba8cSMateusz Guzik
411e52ba8cSMateusz Guzik/*
421e52ba8cSMateusz Guzik * Register state at entry is supposed to be as follows:
431e52ba8cSMateusz Guzik * rdi - destination
441e52ba8cSMateusz Guzik * rsi - source
451e52ba8cSMateusz Guzik * rdx - count
461e52ba8cSMateusz Guzik *
471e52ba8cSMateusz Guzik * The macro possibly clobbers the above and: rcx, r8.
481e52ba8cSMateusz Guzik * It does not clobber rax, r10 nor r11.
491e52ba8cSMateusz Guzik */
501e52ba8cSMateusz Guzik.macro MEMMOVE erms overlap begin end
511e52ba8cSMateusz Guzik	\begin
521e52ba8cSMateusz Guzik.if \overlap == 1
531e52ba8cSMateusz Guzik	movq	%rdi,%r8
541e52ba8cSMateusz Guzik	subq	%rsi,%r8
551e52ba8cSMateusz Guzik	cmpq	%rcx,%r8	/* overlapping && src < dst? */
561e52ba8cSMateusz Guzik	jb	2f
571e52ba8cSMateusz Guzik.endif
581e52ba8cSMateusz Guzik
591e52ba8cSMateusz Guzik	cmpq	$32,%rcx
601e52ba8cSMateusz Guzik	jb	1016f
611e52ba8cSMateusz Guzik
621e52ba8cSMateusz Guzik	cmpq	$256,%rcx
631e52ba8cSMateusz Guzik	ja	1256f
641e52ba8cSMateusz Guzik
651e52ba8cSMateusz Guzik1032:
661e52ba8cSMateusz Guzik	movq	(%rsi),%rdx
671e52ba8cSMateusz Guzik	movq	%rdx,(%rdi)
681e52ba8cSMateusz Guzik	movq	8(%rsi),%rdx
691e52ba8cSMateusz Guzik	movq	%rdx,8(%rdi)
701e52ba8cSMateusz Guzik	movq	16(%rsi),%rdx
711e52ba8cSMateusz Guzik	movq	%rdx,16(%rdi)
721e52ba8cSMateusz Guzik	movq	24(%rsi),%rdx
731e52ba8cSMateusz Guzik	movq	%rdx,24(%rdi)
741e52ba8cSMateusz Guzik	leaq	32(%rsi),%rsi
751e52ba8cSMateusz Guzik	leaq	32(%rdi),%rdi
761e52ba8cSMateusz Guzik	subq	$32,%rcx
771e52ba8cSMateusz Guzik	cmpq	$32,%rcx
781e52ba8cSMateusz Guzik	jae	1032b
791e52ba8cSMateusz Guzik	cmpb	$0,%cl
801e52ba8cSMateusz Guzik	jne	1016f
811e52ba8cSMateusz Guzik	\end
821e52ba8cSMateusz Guzik	ret
831e52ba8cSMateusz Guzik	ALIGN_TEXT
841e52ba8cSMateusz Guzik1016:
851e52ba8cSMateusz Guzik	cmpb	$16,%cl
861e52ba8cSMateusz Guzik	jl	1008f
871e52ba8cSMateusz Guzik	movq	(%rsi),%rdx
881e52ba8cSMateusz Guzik	movq	%rdx,(%rdi)
891e52ba8cSMateusz Guzik	movq	8(%rsi),%rdx
901e52ba8cSMateusz Guzik	movq	%rdx,8(%rdi)
911e52ba8cSMateusz Guzik	subb	$16,%cl
921e52ba8cSMateusz Guzik	jz	1000f
931e52ba8cSMateusz Guzik	leaq	16(%rsi),%rsi
941e52ba8cSMateusz Guzik	leaq	16(%rdi),%rdi
951e52ba8cSMateusz Guzik1008:
961e52ba8cSMateusz Guzik	cmpb	$8,%cl
971e52ba8cSMateusz Guzik	jl	1004f
981e52ba8cSMateusz Guzik	movq	(%rsi),%rdx
991e52ba8cSMateusz Guzik	movq	%rdx,(%rdi)
1001e52ba8cSMateusz Guzik	subb	$8,%cl
1011e52ba8cSMateusz Guzik	jz	1000f
1021e52ba8cSMateusz Guzik	leaq	8(%rsi),%rsi
1031e52ba8cSMateusz Guzik	leaq	8(%rdi),%rdi
1041e52ba8cSMateusz Guzik1004:
1051e52ba8cSMateusz Guzik	cmpb	$4,%cl
1061e52ba8cSMateusz Guzik	jl	1002f
1071e52ba8cSMateusz Guzik	movl	(%rsi),%edx
1081e52ba8cSMateusz Guzik	movl	%edx,(%rdi)
1091e52ba8cSMateusz Guzik	subb	$4,%cl
1101e52ba8cSMateusz Guzik	jz	1000f
1111e52ba8cSMateusz Guzik	leaq	4(%rsi),%rsi
1121e52ba8cSMateusz Guzik	leaq	4(%rdi),%rdi
1131e52ba8cSMateusz Guzik1002:
1141e52ba8cSMateusz Guzik	cmpb	$2,%cl
1151e52ba8cSMateusz Guzik	jl	1001f
1161e52ba8cSMateusz Guzik	movw	(%rsi),%dx
1171e52ba8cSMateusz Guzik	movw	%dx,(%rdi)
1181e52ba8cSMateusz Guzik	subb	$2,%cl
1191e52ba8cSMateusz Guzik	jz	1000f
1201e52ba8cSMateusz Guzik	leaq	2(%rsi),%rsi
1211e52ba8cSMateusz Guzik	leaq	2(%rdi),%rdi
1221e52ba8cSMateusz Guzik1001:
1231e52ba8cSMateusz Guzik	cmpb	$1,%cl
1241e52ba8cSMateusz Guzik	jl	1000f
1251e52ba8cSMateusz Guzik	movb	(%rsi),%dl
1261e52ba8cSMateusz Guzik	movb	%dl,(%rdi)
1271e52ba8cSMateusz Guzik1000:
1281e52ba8cSMateusz Guzik	\end
1291e52ba8cSMateusz Guzik	ret
1301e52ba8cSMateusz Guzik
1311e52ba8cSMateusz Guzik	ALIGN_TEXT
1321e52ba8cSMateusz Guzik1256:
1331e52ba8cSMateusz Guzik.if \erms == 1
1341e52ba8cSMateusz Guzik	rep
1351e52ba8cSMateusz Guzik	movsb
1361e52ba8cSMateusz Guzik.else
1371e52ba8cSMateusz Guzik	shrq	$3,%rcx                         /* copy by 64-bit words */
1381e52ba8cSMateusz Guzik	rep
1391e52ba8cSMateusz Guzik	movsq
1401e52ba8cSMateusz Guzik	movq	%rdx,%rcx
1411e52ba8cSMateusz Guzik	andb	$7,%cl                         /* any bytes left? */
1421e52ba8cSMateusz Guzik	jne	1004b
1431e52ba8cSMateusz Guzik.endif
1441e52ba8cSMateusz Guzik	\end
1451e52ba8cSMateusz Guzik	ret
1461e52ba8cSMateusz Guzik
1471e52ba8cSMateusz Guzik.if \overlap == 1
1481e52ba8cSMateusz Guzik	/*
1491e52ba8cSMateusz Guzik	 * Copy backwards.
1501e52ba8cSMateusz Guzik	 */
1511e52ba8cSMateusz Guzik        ALIGN_TEXT
1521e52ba8cSMateusz Guzik2:
153*dd219e5eSMateusz Guzik	cmpq	$256,%rcx
154*dd219e5eSMateusz Guzik	ja	2256f
155*dd219e5eSMateusz Guzik
156*dd219e5eSMateusz Guzik	leaq	-8(%rdi,%rcx),%rdi
157*dd219e5eSMateusz Guzik	leaq	-8(%rsi,%rcx),%rsi
1581e52ba8cSMateusz Guzik
1591e52ba8cSMateusz Guzik	cmpq	$32,%rcx
1601e52ba8cSMateusz Guzik	jb	2016f
1611e52ba8cSMateusz Guzik
1621e52ba8cSMateusz Guzik2032:
163*dd219e5eSMateusz Guzik	movq	(%rsi),%rdx
164*dd219e5eSMateusz Guzik	movq	%rdx,(%rdi)
1651e52ba8cSMateusz Guzik	movq	-8(%rsi),%rdx
1661e52ba8cSMateusz Guzik	movq	%rdx,-8(%rdi)
1671e52ba8cSMateusz Guzik	movq	-16(%rsi),%rdx
1681e52ba8cSMateusz Guzik	movq	%rdx,-16(%rdi)
1691e52ba8cSMateusz Guzik	movq	-24(%rsi),%rdx
1701e52ba8cSMateusz Guzik	movq	%rdx,-24(%rdi)
1711e52ba8cSMateusz Guzik	leaq	-32(%rsi),%rsi
1721e52ba8cSMateusz Guzik	leaq	-32(%rdi),%rdi
1731e52ba8cSMateusz Guzik	subq	$32,%rcx
1741e52ba8cSMateusz Guzik	cmpq	$32,%rcx
1751e52ba8cSMateusz Guzik	jae	2032b
1761e52ba8cSMateusz Guzik	cmpb	$0,%cl
1771e52ba8cSMateusz Guzik	jne	2016f
1781e52ba8cSMateusz Guzik	\end
1791e52ba8cSMateusz Guzik	ret
1801e52ba8cSMateusz Guzik	ALIGN_TEXT
1811e52ba8cSMateusz Guzik2016:
1821e52ba8cSMateusz Guzik	cmpb	$16,%cl
1831e52ba8cSMateusz Guzik	jl	2008f
184*dd219e5eSMateusz Guzik	movq	(%rsi),%rdx
185*dd219e5eSMateusz Guzik	movq	%rdx,(%rdi)
1861e52ba8cSMateusz Guzik	movq	-8(%rsi),%rdx
1871e52ba8cSMateusz Guzik	movq	%rdx,-8(%rdi)
1881e52ba8cSMateusz Guzik	subb	$16,%cl
1891e52ba8cSMateusz Guzik	jz	2000f
1901e52ba8cSMateusz Guzik	leaq	-16(%rsi),%rsi
1911e52ba8cSMateusz Guzik	leaq	-16(%rdi),%rdi
1921e52ba8cSMateusz Guzik2008:
1931e52ba8cSMateusz Guzik	cmpb	$8,%cl
1941e52ba8cSMateusz Guzik	jl	2004f
195*dd219e5eSMateusz Guzik	movq	(%rsi),%rdx
196*dd219e5eSMateusz Guzik	movq	%rdx,(%rdi)
1971e52ba8cSMateusz Guzik	subb	$8,%cl
1981e52ba8cSMateusz Guzik	jz	2000f
1991e52ba8cSMateusz Guzik	leaq	-8(%rsi),%rsi
2001e52ba8cSMateusz Guzik	leaq	-8(%rdi),%rdi
2011e52ba8cSMateusz Guzik2004:
2021e52ba8cSMateusz Guzik	cmpb	$4,%cl
2031e52ba8cSMateusz Guzik	jl	2002f
204*dd219e5eSMateusz Guzik	movl	4(%rsi),%edx
205*dd219e5eSMateusz Guzik	movl	%edx,4(%rdi)
2061e52ba8cSMateusz Guzik	subb	$4,%cl
2071e52ba8cSMateusz Guzik	jz	2000f
2081e52ba8cSMateusz Guzik	leaq	-4(%rsi),%rsi
2091e52ba8cSMateusz Guzik	leaq	-4(%rdi),%rdi
2101e52ba8cSMateusz Guzik2002:
2111e52ba8cSMateusz Guzik	cmpb	$2,%cl
2121e52ba8cSMateusz Guzik	jl	2001f
213*dd219e5eSMateusz Guzik	movw	6(%rsi),%dx
214*dd219e5eSMateusz Guzik	movw	%dx,6(%rdi)
2151e52ba8cSMateusz Guzik	subb	$2,%cl
2161e52ba8cSMateusz Guzik	jz	2000f
2171e52ba8cSMateusz Guzik	leaq	-2(%rsi),%rsi
2181e52ba8cSMateusz Guzik	leaq	-2(%rdi),%rdi
2191e52ba8cSMateusz Guzik2001:
2201e52ba8cSMateusz Guzik	cmpb	$1,%cl
2211e52ba8cSMateusz Guzik	jl	2000f
222*dd219e5eSMateusz Guzik	movb	7(%rsi),%dl
223*dd219e5eSMateusz Guzik	movb	%dl,7(%rdi)
2241e52ba8cSMateusz Guzik2000:
2251e52ba8cSMateusz Guzik	\end
2261e52ba8cSMateusz Guzik	ret
2271e52ba8cSMateusz Guzik	ALIGN_TEXT
2281e52ba8cSMateusz Guzik2256:
2291e52ba8cSMateusz Guzik	std
2301e52ba8cSMateusz Guzik.if \erms == 1
231*dd219e5eSMateusz Guzik	leaq	-1(%rdi,%rcx),%rdi
232*dd219e5eSMateusz Guzik	leaq	-1(%rsi,%rcx),%rsi
2331e52ba8cSMateusz Guzik	rep
2341e52ba8cSMateusz Guzik	movsb
235*dd219e5eSMateusz Guzik	cld
2361e52ba8cSMateusz Guzik.else
237*dd219e5eSMateusz Guzik	leaq	-8(%rdi,%rcx),%rdi
238*dd219e5eSMateusz Guzik	leaq	-8(%rsi,%rcx),%rsi
2391e52ba8cSMateusz Guzik	shrq	$3,%rcx
2401e52ba8cSMateusz Guzik	rep
2411e52ba8cSMateusz Guzik	movsq
2421e52ba8cSMateusz Guzik	cld
243*dd219e5eSMateusz Guzik	movq	%rdx,%rcx
244*dd219e5eSMateusz Guzik	andb	$7,%cl
245*dd219e5eSMateusz Guzik	jne	2004b
246*dd219e5eSMateusz Guzik.endif
2471e52ba8cSMateusz Guzik	\end
2481e52ba8cSMateusz Guzik	ret
2491e52ba8cSMateusz Guzik.endif
2501e52ba8cSMateusz Guzik.endm
2511e52ba8cSMateusz Guzik
2521e52ba8cSMateusz Guzik.macro MEMMOVE_BEGIN
2531e52ba8cSMateusz Guzik	movq	%rdi,%rax
2541e52ba8cSMateusz Guzik	movq	%rdx,%rcx
2551e52ba8cSMateusz Guzik.endm
2561e52ba8cSMateusz Guzik
2571e52ba8cSMateusz Guzik.macro MEMMOVE_END
2581e52ba8cSMateusz Guzik.endm
2591e52ba8cSMateusz Guzik
2601e52ba8cSMateusz Guzik#ifndef MEMCPY
2611e52ba8cSMateusz GuzikENTRY(memmove)
2621e52ba8cSMateusz Guzik	MEMMOVE erms=0 overlap=1 begin=MEMMOVE_BEGIN end=MEMMOVE_END
2631e52ba8cSMateusz GuzikEND(memmove)
2641e52ba8cSMateusz Guzik#else
2651e52ba8cSMateusz GuzikENTRY(memcpy)
2661e52ba8cSMateusz Guzik	MEMMOVE erms=0 overlap=1 begin=MEMMOVE_BEGIN end=MEMMOVE_END
2671e52ba8cSMateusz GuzikEND(memcpy)
2681e52ba8cSMateusz Guzik#endif
269