xref: /freebsd/lib/libc/amd64/string/memmove.S (revision 1d386b48a555f61cb7325543adbbb5c3f3407a66)
11e52ba8cSMateusz Guzik/*-
21e52ba8cSMateusz Guzik * Copyright (c) 2018 The FreeBSD Foundation
31e52ba8cSMateusz Guzik *
41e52ba8cSMateusz Guzik * This software was developed by Mateusz Guzik <mjg@FreeBSD.org>
51e52ba8cSMateusz Guzik * under sponsorship from the FreeBSD Foundation.
61e52ba8cSMateusz Guzik *
71e52ba8cSMateusz Guzik * Redistribution and use in source and binary forms, with or without
81e52ba8cSMateusz Guzik * modification, are permitted provided that the following conditions
91e52ba8cSMateusz Guzik * are met:
101e52ba8cSMateusz Guzik * 1. Redistributions of source code must retain the above copyright
111e52ba8cSMateusz Guzik *    notice, this list of conditions and the following disclaimer.
121e52ba8cSMateusz Guzik * 2. Redistributions in binary form must reproduce the above copyright
131e52ba8cSMateusz Guzik *    notice, this list of conditions and the following disclaimer in the
141e52ba8cSMateusz Guzik *    documentation and/or other materials provided with the distribution.
151e52ba8cSMateusz Guzik *
161e52ba8cSMateusz Guzik * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
171e52ba8cSMateusz Guzik * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
181e52ba8cSMateusz Guzik * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
191e52ba8cSMateusz Guzik * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
201e52ba8cSMateusz Guzik * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
211e52ba8cSMateusz Guzik * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
221e52ba8cSMateusz Guzik * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
231e52ba8cSMateusz Guzik * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
241e52ba8cSMateusz Guzik * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
251e52ba8cSMateusz Guzik * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
261e52ba8cSMateusz Guzik * SUCH DAMAGE.
271e52ba8cSMateusz Guzik */
2891c09a38SAlan Cox
291e52ba8cSMateusz Guzik#include <machine/asm.h>
300db6aef4SMateusz Guzik/*
310db6aef4SMateusz Guzik * Note: this routine was written with kernel use in mind (read: no simd),
320db6aef4SMateusz Guzik * it is only present in userspace as a temporary measure until something
330db6aef4SMateusz Guzik * better gets imported.
340db6aef4SMateusz Guzik */
350db6aef4SMateusz Guzik
361e52ba8cSMateusz Guzik#define	ALIGN_TEXT	.p2align 4,0x90 /* 16-byte alignment, nop filled */
371e52ba8cSMateusz Guzik
381e52ba8cSMateusz Guzik/*
391e52ba8cSMateusz Guzik * memmove(dst, src, cnt)
401e52ba8cSMateusz Guzik *         rdi, rsi, rdx
411e52ba8cSMateusz Guzik */
421e52ba8cSMateusz Guzik
431e52ba8cSMateusz Guzik/*
441e52ba8cSMateusz Guzik * Register state at entry is supposed to be as follows:
451e52ba8cSMateusz Guzik * rdi - destination
461e52ba8cSMateusz Guzik * rsi - source
471e52ba8cSMateusz Guzik * rdx - count
481e52ba8cSMateusz Guzik *
4994243af2SMateusz Guzik * The macro possibly clobbers the above and: rcx, r8, r9, 10
5094243af2SMateusz Guzik * It does not clobber rax nor r11.
511e52ba8cSMateusz Guzik */
521e52ba8cSMateusz Guzik.macro MEMMOVE erms overlap begin end
531e52ba8cSMateusz Guzik	\begin
5494243af2SMateusz Guzik
5594243af2SMateusz Guzik	/*
5694243af2SMateusz Guzik	 * For sizes 0..32 all data is read before it is written, so there
5794243af2SMateusz Guzik	 * is no correctness issue with direction of copying.
5894243af2SMateusz Guzik	 */
5994243af2SMateusz Guzik	cmpq	$32,%rcx
6094243af2SMateusz Guzik	jbe	101632f
6194243af2SMateusz Guzik
621e52ba8cSMateusz Guzik.if \overlap == 1
631e52ba8cSMateusz Guzik	movq	%rdi,%r8
641e52ba8cSMateusz Guzik	subq	%rsi,%r8
651e52ba8cSMateusz Guzik	cmpq	%rcx,%r8	/* overlapping && src < dst? */
661e52ba8cSMateusz Guzik	jb	2f
671e52ba8cSMateusz Guzik.endif
681e52ba8cSMateusz Guzik
691e52ba8cSMateusz Guzik	cmpq	$256,%rcx
701e52ba8cSMateusz Guzik	ja	1256f
711e52ba8cSMateusz Guzik
72164c3b81SMateusz Guzik	ALIGN_TEXT
7394243af2SMateusz Guzik103200:
741e52ba8cSMateusz Guzik	movq	(%rsi),%rdx
751e52ba8cSMateusz Guzik	movq	%rdx,(%rdi)
761e52ba8cSMateusz Guzik	movq	8(%rsi),%rdx
771e52ba8cSMateusz Guzik	movq	%rdx,8(%rdi)
781e52ba8cSMateusz Guzik	movq	16(%rsi),%rdx
791e52ba8cSMateusz Guzik	movq	%rdx,16(%rdi)
801e52ba8cSMateusz Guzik	movq	24(%rsi),%rdx
811e52ba8cSMateusz Guzik	movq	%rdx,24(%rdi)
821e52ba8cSMateusz Guzik	leaq	32(%rsi),%rsi
831e52ba8cSMateusz Guzik	leaq	32(%rdi),%rdi
841e52ba8cSMateusz Guzik	subq	$32,%rcx
851e52ba8cSMateusz Guzik	cmpq	$32,%rcx
8694243af2SMateusz Guzik	jae	103200b
871e52ba8cSMateusz Guzik	cmpb	$0,%cl
8894243af2SMateusz Guzik	jne	101632f
891e52ba8cSMateusz Guzik	\end
901e52ba8cSMateusz Guzik	ret
911e52ba8cSMateusz Guzik	ALIGN_TEXT
9294243af2SMateusz Guzik101632:
931e52ba8cSMateusz Guzik	cmpb	$16,%cl
9494243af2SMateusz Guzik	jl	100816f
951e52ba8cSMateusz Guzik	movq	(%rsi),%rdx
9694243af2SMateusz Guzik	movq	8(%rsi),%r8
9794243af2SMateusz Guzik	movq	-16(%rsi,%rcx),%r9
9894243af2SMateusz Guzik	movq	-8(%rsi,%rcx),%r10
991e52ba8cSMateusz Guzik	movq	%rdx,(%rdi)
10094243af2SMateusz Guzik	movq	%r8,8(%rdi)
10194243af2SMateusz Guzik	movq	%r9,-16(%rdi,%rcx)
10294243af2SMateusz Guzik	movq	%r10,-8(%rdi,%rcx)
10394243af2SMateusz Guzik	\end
10494243af2SMateusz Guzik	ret
10594243af2SMateusz Guzik	ALIGN_TEXT
10694243af2SMateusz Guzik100816:
1071e52ba8cSMateusz Guzik	cmpb	$8,%cl
10894243af2SMateusz Guzik	jl	100408f
1091e52ba8cSMateusz Guzik	movq	(%rsi),%rdx
11094243af2SMateusz Guzik	movq	-8(%rsi,%rcx),%r8
1111e52ba8cSMateusz Guzik	movq	%rdx,(%rdi)
11294243af2SMateusz Guzik	movq	%r8,-8(%rdi,%rcx,)
11394243af2SMateusz Guzik	\end
11494243af2SMateusz Guzik	ret
11594243af2SMateusz Guzik	ALIGN_TEXT
11694243af2SMateusz Guzik100408:
1171e52ba8cSMateusz Guzik	cmpb	$4,%cl
11894243af2SMateusz Guzik	jl	100204f
1191e52ba8cSMateusz Guzik	movl	(%rsi),%edx
12094243af2SMateusz Guzik	movl	-4(%rsi,%rcx),%r8d
1211e52ba8cSMateusz Guzik	movl	%edx,(%rdi)
12294243af2SMateusz Guzik	movl	%r8d,-4(%rdi,%rcx)
12394243af2SMateusz Guzik	\end
12494243af2SMateusz Guzik	ret
12594243af2SMateusz Guzik	ALIGN_TEXT
12694243af2SMateusz Guzik100204:
1271e52ba8cSMateusz Guzik	cmpb	$2,%cl
12894243af2SMateusz Guzik	jl	100001f
12994243af2SMateusz Guzik	movzwl	(%rsi),%edx
13094243af2SMateusz Guzik	movzwl	-2(%rsi,%rcx),%r8d
1311e52ba8cSMateusz Guzik	movw	%dx,(%rdi)
13294243af2SMateusz Guzik	movw	%r8w,-2(%rdi,%rcx)
13394243af2SMateusz Guzik	\end
13494243af2SMateusz Guzik	ret
13594243af2SMateusz Guzik	ALIGN_TEXT
13694243af2SMateusz Guzik100001:
1371e52ba8cSMateusz Guzik	cmpb	$1,%cl
13894243af2SMateusz Guzik	jl	100000f
1391e52ba8cSMateusz Guzik	movb	(%rsi),%dl
1401e52ba8cSMateusz Guzik	movb	%dl,(%rdi)
14194243af2SMateusz Guzik100000:
1421e52ba8cSMateusz Guzik	\end
1431e52ba8cSMateusz Guzik	ret
1441e52ba8cSMateusz Guzik
1451e52ba8cSMateusz Guzik	ALIGN_TEXT
1461e52ba8cSMateusz Guzik1256:
147ddf65712SMateusz Guzik	testb	$15,%dil
148ddf65712SMateusz Guzik	jnz	100f
1491e52ba8cSMateusz Guzik.if \erms == 1
1501e52ba8cSMateusz Guzik	rep
1511e52ba8cSMateusz Guzik	movsb
1521e52ba8cSMateusz Guzik.else
1531e52ba8cSMateusz Guzik	shrq	$3,%rcx                         /* copy by 64-bit words */
1541e52ba8cSMateusz Guzik	rep
1551e52ba8cSMateusz Guzik	movsq
1561e52ba8cSMateusz Guzik	movq	%rdx,%rcx
15794243af2SMateusz Guzik	andl	$7,%ecx                         /* any bytes left? */
15894243af2SMateusz Guzik	jne	100408b
1591e52ba8cSMateusz Guzik.endif
1601e52ba8cSMateusz Guzik	\end
1611e52ba8cSMateusz Guzik	ret
162ddf65712SMateusz Guzik100:
163ddf65712SMateusz Guzik	movq	(%rsi),%r8
164ddf65712SMateusz Guzik	movq	8(%rsi),%r9
165ddf65712SMateusz Guzik	movq	%rdi,%r10
166ddf65712SMateusz Guzik	movq	%rdi,%rcx
167ddf65712SMateusz Guzik	andq	$15,%rcx
168ddf65712SMateusz Guzik	leaq	-16(%rdx,%rcx),%rdx
169ddf65712SMateusz Guzik	neg	%rcx
170ddf65712SMateusz Guzik	leaq	16(%rdi,%rcx),%rdi
171ddf65712SMateusz Guzik	leaq	16(%rsi,%rcx),%rsi
172ddf65712SMateusz Guzik	movq	%rdx,%rcx
173ddf65712SMateusz Guzik.if \erms == 1
174ddf65712SMateusz Guzik	rep
175ddf65712SMateusz Guzik	movsb
176ddf65712SMateusz Guzik	movq	%r8,(%r10)
177ddf65712SMateusz Guzik	movq	%r9,8(%r10)
178ddf65712SMateusz Guzik.else
179ddf65712SMateusz Guzik	shrq	$3,%rcx                         /* copy by 64-bit words */
180ddf65712SMateusz Guzik	rep
181ddf65712SMateusz Guzik	movsq
182ddf65712SMateusz Guzik	movq	%r8,(%r10)
183ddf65712SMateusz Guzik	movq	%r9,8(%r10)
184ddf65712SMateusz Guzik	movq	%rdx,%rcx
185ddf65712SMateusz Guzik	andl	$7,%ecx                         /* any bytes left? */
186ddf65712SMateusz Guzik	jne	100408b
187ddf65712SMateusz Guzik.endif
188ddf65712SMateusz Guzik	\end
189ddf65712SMateusz Guzik	ret
1901e52ba8cSMateusz Guzik
1911e52ba8cSMateusz Guzik.if \overlap == 1
1921e52ba8cSMateusz Guzik	/*
1931e52ba8cSMateusz Guzik	 * Copy backwards.
1941e52ba8cSMateusz Guzik	 */
1951e52ba8cSMateusz Guzik        ALIGN_TEXT
1961e52ba8cSMateusz Guzik2:
197dd219e5eSMateusz Guzik	cmpq	$256,%rcx
198dd219e5eSMateusz Guzik	ja	2256f
199dd219e5eSMateusz Guzik
200dd219e5eSMateusz Guzik	leaq	-8(%rdi,%rcx),%rdi
201dd219e5eSMateusz Guzik	leaq	-8(%rsi,%rcx),%rsi
2021e52ba8cSMateusz Guzik
2031e52ba8cSMateusz Guzik	cmpq	$32,%rcx
2041e52ba8cSMateusz Guzik	jb	2016f
2051e52ba8cSMateusz Guzik
206164c3b81SMateusz Guzik	ALIGN_TEXT
2071e52ba8cSMateusz Guzik2032:
208dd219e5eSMateusz Guzik	movq	(%rsi),%rdx
209dd219e5eSMateusz Guzik	movq	%rdx,(%rdi)
2101e52ba8cSMateusz Guzik	movq	-8(%rsi),%rdx
2111e52ba8cSMateusz Guzik	movq	%rdx,-8(%rdi)
2121e52ba8cSMateusz Guzik	movq	-16(%rsi),%rdx
2131e52ba8cSMateusz Guzik	movq	%rdx,-16(%rdi)
2141e52ba8cSMateusz Guzik	movq	-24(%rsi),%rdx
2151e52ba8cSMateusz Guzik	movq	%rdx,-24(%rdi)
2161e52ba8cSMateusz Guzik	leaq	-32(%rsi),%rsi
2171e52ba8cSMateusz Guzik	leaq	-32(%rdi),%rdi
2181e52ba8cSMateusz Guzik	subq	$32,%rcx
2191e52ba8cSMateusz Guzik	cmpq	$32,%rcx
2201e52ba8cSMateusz Guzik	jae	2032b
2211e52ba8cSMateusz Guzik	cmpb	$0,%cl
2221e52ba8cSMateusz Guzik	jne	2016f
2231e52ba8cSMateusz Guzik	\end
2241e52ba8cSMateusz Guzik	ret
2251e52ba8cSMateusz Guzik	ALIGN_TEXT
2261e52ba8cSMateusz Guzik2016:
2271e52ba8cSMateusz Guzik	cmpb	$16,%cl
2281e52ba8cSMateusz Guzik	jl	2008f
229dd219e5eSMateusz Guzik	movq	(%rsi),%rdx
230dd219e5eSMateusz Guzik	movq	%rdx,(%rdi)
2311e52ba8cSMateusz Guzik	movq	-8(%rsi),%rdx
2321e52ba8cSMateusz Guzik	movq	%rdx,-8(%rdi)
2331e52ba8cSMateusz Guzik	subb	$16,%cl
2341e52ba8cSMateusz Guzik	jz	2000f
2351e52ba8cSMateusz Guzik	leaq	-16(%rsi),%rsi
2361e52ba8cSMateusz Guzik	leaq	-16(%rdi),%rdi
2371e52ba8cSMateusz Guzik2008:
2381e52ba8cSMateusz Guzik	cmpb	$8,%cl
2391e52ba8cSMateusz Guzik	jl	2004f
240dd219e5eSMateusz Guzik	movq	(%rsi),%rdx
241dd219e5eSMateusz Guzik	movq	%rdx,(%rdi)
2421e52ba8cSMateusz Guzik	subb	$8,%cl
2431e52ba8cSMateusz Guzik	jz	2000f
2441e52ba8cSMateusz Guzik	leaq	-8(%rsi),%rsi
2451e52ba8cSMateusz Guzik	leaq	-8(%rdi),%rdi
2461e52ba8cSMateusz Guzik2004:
2471e52ba8cSMateusz Guzik	cmpb	$4,%cl
2481e52ba8cSMateusz Guzik	jl	2002f
249dd219e5eSMateusz Guzik	movl	4(%rsi),%edx
250dd219e5eSMateusz Guzik	movl	%edx,4(%rdi)
2511e52ba8cSMateusz Guzik	subb	$4,%cl
2521e52ba8cSMateusz Guzik	jz	2000f
2531e52ba8cSMateusz Guzik	leaq	-4(%rsi),%rsi
2541e52ba8cSMateusz Guzik	leaq	-4(%rdi),%rdi
2551e52ba8cSMateusz Guzik2002:
2561e52ba8cSMateusz Guzik	cmpb	$2,%cl
2571e52ba8cSMateusz Guzik	jl	2001f
258dd219e5eSMateusz Guzik	movw	6(%rsi),%dx
259dd219e5eSMateusz Guzik	movw	%dx,6(%rdi)
2601e52ba8cSMateusz Guzik	subb	$2,%cl
2611e52ba8cSMateusz Guzik	jz	2000f
2621e52ba8cSMateusz Guzik	leaq	-2(%rsi),%rsi
2631e52ba8cSMateusz Guzik	leaq	-2(%rdi),%rdi
2641e52ba8cSMateusz Guzik2001:
2651e52ba8cSMateusz Guzik	cmpb	$1,%cl
2661e52ba8cSMateusz Guzik	jl	2000f
267dd219e5eSMateusz Guzik	movb	7(%rsi),%dl
268dd219e5eSMateusz Guzik	movb	%dl,7(%rdi)
2691e52ba8cSMateusz Guzik2000:
2701e52ba8cSMateusz Guzik	\end
2711e52ba8cSMateusz Guzik	ret
2721e52ba8cSMateusz Guzik	ALIGN_TEXT
2731e52ba8cSMateusz Guzik2256:
2741e52ba8cSMateusz Guzik	std
275dd219e5eSMateusz Guzik	leaq	-8(%rdi,%rcx),%rdi
276dd219e5eSMateusz Guzik	leaq	-8(%rsi,%rcx),%rsi
2771e52ba8cSMateusz Guzik	shrq	$3,%rcx
2781e52ba8cSMateusz Guzik	rep
2791e52ba8cSMateusz Guzik	movsq
2801e52ba8cSMateusz Guzik	cld
281dd219e5eSMateusz Guzik	movq	%rdx,%rcx
282dd219e5eSMateusz Guzik	andb	$7,%cl
283dd219e5eSMateusz Guzik	jne	2004b
2841e52ba8cSMateusz Guzik	\end
2851e52ba8cSMateusz Guzik	ret
2861e52ba8cSMateusz Guzik.endif
2871e52ba8cSMateusz Guzik.endm
2881e52ba8cSMateusz Guzik
28994243af2SMateusz Guzik
2901e52ba8cSMateusz Guzik.macro MEMMOVE_BEGIN
2911e52ba8cSMateusz Guzik	movq	%rdi,%rax
2921e52ba8cSMateusz Guzik	movq	%rdx,%rcx
2931e52ba8cSMateusz Guzik.endm
2941e52ba8cSMateusz Guzik
2951e52ba8cSMateusz Guzik.macro MEMMOVE_END
2961e52ba8cSMateusz Guzik.endm
2971e52ba8cSMateusz Guzik
2981e52ba8cSMateusz Guzik#ifndef MEMCPY
2991e52ba8cSMateusz GuzikENTRY(memmove)
3001e52ba8cSMateusz Guzik	MEMMOVE erms=0 overlap=1 begin=MEMMOVE_BEGIN end=MEMMOVE_END
3011e52ba8cSMateusz GuzikEND(memmove)
3021e52ba8cSMateusz Guzik#else
3031e52ba8cSMateusz GuzikENTRY(memcpy)
3041e52ba8cSMateusz Guzik	MEMMOVE erms=0 overlap=1 begin=MEMMOVE_BEGIN end=MEMMOVE_END
3051e52ba8cSMateusz GuzikEND(memcpy)
3061e52ba8cSMateusz Guzik#endif
307*ae507c25SKonstantin Belousov
308*ae507c25SKonstantin Belousov	.section .note.GNU-stack,"",%progbits
309