xref: /freebsd/lib/libc/amd64/string/memmove.S (revision e4c7371cefb3d80286e05d1fdf24f35fc607b4bb)
1/*-
2 * Copyright (c) 2018 The FreeBSD Foundation
3 *
4 * This software was developed by Mateusz Guzik <mjg@FreeBSD.org>
5 * under sponsorship from the FreeBSD Foundation.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29#include <machine/asm.h>
30__FBSDID("$FreeBSD$");
31
32#define	ALIGN_TEXT	.p2align 4,0x90 /* 16-byte alignment, nop filled */
33
34/*
35 * memmove(dst, src, cnt)
36 *         rdi, rsi, rdx
37 */
38
39/*
40 * Register state at entry is supposed to be as follows:
41 * rdi - destination
42 * rsi - source
43 * rdx - count
44 *
45 * The macro possibly clobbers the above and: rcx, r8, r9, 10
46 * It does not clobber rax nor r11.
47 */
48.macro MEMMOVE erms overlap begin end
49	\begin
50
51	/*
52	 * For sizes 0..32 all data is read before it is written, so there
53	 * is no correctness issue with direction of copying.
54	 */
55	cmpq	$32,%rcx
56	jbe	101632f
57
58.if \overlap == 1
59	movq	%rdi,%r8
60	subq	%rsi,%r8
61	cmpq	%rcx,%r8	/* overlapping && src < dst? */
62	jb	2f
63.endif
64
65	cmpq	$256,%rcx
66	ja	1256f
67
68103200:
69	movq	(%rsi),%rdx
70	movq	%rdx,(%rdi)
71	movq	8(%rsi),%rdx
72	movq	%rdx,8(%rdi)
73	movq	16(%rsi),%rdx
74	movq	%rdx,16(%rdi)
75	movq	24(%rsi),%rdx
76	movq	%rdx,24(%rdi)
77	leaq	32(%rsi),%rsi
78	leaq	32(%rdi),%rdi
79	subq	$32,%rcx
80	cmpq	$32,%rcx
81	jae	103200b
82	cmpb	$0,%cl
83	jne	101632f
84	\end
85	ret
86	ALIGN_TEXT
87101632:
88	cmpb	$16,%cl
89	jl	100816f
90	movq	(%rsi),%rdx
91	movq	8(%rsi),%r8
92	movq	-16(%rsi,%rcx),%r9
93	movq	-8(%rsi,%rcx),%r10
94	movq	%rdx,(%rdi)
95	movq	%r8,8(%rdi)
96	movq	%r9,-16(%rdi,%rcx)
97	movq	%r10,-8(%rdi,%rcx)
98	\end
99	ret
100	ALIGN_TEXT
101100816:
102	cmpb	$8,%cl
103	jl	100408f
104	movq	(%rsi),%rdx
105	movq	-8(%rsi,%rcx),%r8
106	movq	%rdx,(%rdi)
107	movq	%r8,-8(%rdi,%rcx,)
108	\end
109	ret
110	ALIGN_TEXT
111100408:
112	cmpb	$4,%cl
113	jl	100204f
114	movl	(%rsi),%edx
115	movl	-4(%rsi,%rcx),%r8d
116	movl	%edx,(%rdi)
117	movl	%r8d,-4(%rdi,%rcx)
118	\end
119	ret
120	ALIGN_TEXT
121100204:
122	cmpb	$2,%cl
123	jl	100001f
124	movzwl	(%rsi),%edx
125	movzwl	-2(%rsi,%rcx),%r8d
126	movw	%dx,(%rdi)
127	movw	%r8w,-2(%rdi,%rcx)
128	\end
129	ret
130	ALIGN_TEXT
131100001:
132	cmpb	$1,%cl
133	jl	100000f
134	movb	(%rsi),%dl
135	movb	%dl,(%rdi)
136100000:
137	\end
138	ret
139
140	ALIGN_TEXT
1411256:
142.if \erms == 1
143	rep
144	movsb
145.else
146	shrq	$3,%rcx                         /* copy by 64-bit words */
147	rep
148	movsq
149	movq	%rdx,%rcx
150	andl	$7,%ecx                         /* any bytes left? */
151	jne	100408b
152.endif
153	\end
154	ret
155
156.if \overlap == 1
157	/*
158	 * Copy backwards.
159	 */
160        ALIGN_TEXT
1612:
162	cmpq	$256,%rcx
163	ja	2256f
164
165	leaq	-8(%rdi,%rcx),%rdi
166	leaq	-8(%rsi,%rcx),%rsi
167
168	cmpq	$32,%rcx
169	jb	2016f
170
1712032:
172	movq	(%rsi),%rdx
173	movq	%rdx,(%rdi)
174	movq	-8(%rsi),%rdx
175	movq	%rdx,-8(%rdi)
176	movq	-16(%rsi),%rdx
177	movq	%rdx,-16(%rdi)
178	movq	-24(%rsi),%rdx
179	movq	%rdx,-24(%rdi)
180	leaq	-32(%rsi),%rsi
181	leaq	-32(%rdi),%rdi
182	subq	$32,%rcx
183	cmpq	$32,%rcx
184	jae	2032b
185	cmpb	$0,%cl
186	jne	2016f
187	\end
188	ret
189	ALIGN_TEXT
1902016:
191	cmpb	$16,%cl
192	jl	2008f
193	movq	(%rsi),%rdx
194	movq	%rdx,(%rdi)
195	movq	-8(%rsi),%rdx
196	movq	%rdx,-8(%rdi)
197	subb	$16,%cl
198	jz	2000f
199	leaq	-16(%rsi),%rsi
200	leaq	-16(%rdi),%rdi
2012008:
202	cmpb	$8,%cl
203	jl	2004f
204	movq	(%rsi),%rdx
205	movq	%rdx,(%rdi)
206	subb	$8,%cl
207	jz	2000f
208	leaq	-8(%rsi),%rsi
209	leaq	-8(%rdi),%rdi
2102004:
211	cmpb	$4,%cl
212	jl	2002f
213	movl	4(%rsi),%edx
214	movl	%edx,4(%rdi)
215	subb	$4,%cl
216	jz	2000f
217	leaq	-4(%rsi),%rsi
218	leaq	-4(%rdi),%rdi
2192002:
220	cmpb	$2,%cl
221	jl	2001f
222	movw	6(%rsi),%dx
223	movw	%dx,6(%rdi)
224	subb	$2,%cl
225	jz	2000f
226	leaq	-2(%rsi),%rsi
227	leaq	-2(%rdi),%rdi
2282001:
229	cmpb	$1,%cl
230	jl	2000f
231	movb	7(%rsi),%dl
232	movb	%dl,7(%rdi)
2332000:
234	\end
235	ret
236	ALIGN_TEXT
2372256:
238	std
239.if \erms == 1
240	leaq	-1(%rdi,%rcx),%rdi
241	leaq	-1(%rsi,%rcx),%rsi
242	rep
243	movsb
244	cld
245.else
246	leaq	-8(%rdi,%rcx),%rdi
247	leaq	-8(%rsi,%rcx),%rsi
248	shrq	$3,%rcx
249	rep
250	movsq
251	cld
252	movq	%rdx,%rcx
253	andb	$7,%cl
254	jne	2004b
255.endif
256	\end
257	ret
258.endif
259.endm
260
261
262.macro MEMMOVE_BEGIN
263	movq	%rdi,%rax
264	movq	%rdx,%rcx
265.endm
266
267.macro MEMMOVE_END
268.endm
269
270#ifndef MEMCPY
271ENTRY(memmove)
272	MEMMOVE erms=0 overlap=1 begin=MEMMOVE_BEGIN end=MEMMOVE_END
273END(memmove)
274#else
275ENTRY(memcpy)
276	MEMMOVE erms=0 overlap=1 begin=MEMMOVE_BEGIN end=MEMMOVE_END
277END(memcpy)
278#endif
279