xref: /freebsd/lib/libc/amd64/string/memmove.S (revision 99282790b7d01ec3c4072621d46a0d7302517ad4)
1/*-
2 * Copyright (c) 2018 The FreeBSD Foundation
3 *
4 * This software was developed by Mateusz Guzik <mjg@FreeBSD.org>
5 * under sponsorship from the FreeBSD Foundation.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29#include <machine/asm.h>
30__FBSDID("$FreeBSD$");
31
32#define	ALIGN_TEXT	.p2align 4,0x90 /* 16-byte alignment, nop filled */
33
34/*
35 * memmove(dst, src, cnt)
36 *         rdi, rsi, rdx
37 */
38
39/*
40 * Register state at entry is supposed to be as follows:
41 * rdi - destination
42 * rsi - source
43 * rdx - count
44 *
45 * The macro possibly clobbers the above and: rcx, r8, r9, 10
46 * It does not clobber rax nor r11.
47 */
48.macro MEMMOVE erms overlap begin end
49	\begin
50
51	/*
52	 * For sizes 0..32 all data is read before it is written, so there
53	 * is no correctness issue with direction of copying.
54	 */
55	cmpq	$32,%rcx
56	jbe	101632f
57
58.if \overlap == 1
59	movq	%rdi,%r8
60	subq	%rsi,%r8
61	cmpq	%rcx,%r8	/* overlapping && src < dst? */
62	jb	2f
63.endif
64
65	cmpq	$256,%rcx
66	ja	1256f
67
68103200:
69	movq	(%rsi),%rdx
70	movq	%rdx,(%rdi)
71	movq	8(%rsi),%rdx
72	movq	%rdx,8(%rdi)
73	movq	16(%rsi),%rdx
74	movq	%rdx,16(%rdi)
75	movq	24(%rsi),%rdx
76	movq	%rdx,24(%rdi)
77	leaq	32(%rsi),%rsi
78	leaq	32(%rdi),%rdi
79	subq	$32,%rcx
80	cmpq	$32,%rcx
81	jae	103200b
82	cmpb	$0,%cl
83	jne	101632f
84	\end
85	ret
86	ALIGN_TEXT
87101632:
88	cmpb	$16,%cl
89	jl	100816f
90	movq	(%rsi),%rdx
91	movq	8(%rsi),%r8
92	movq	-16(%rsi,%rcx),%r9
93	movq	-8(%rsi,%rcx),%r10
94	movq	%rdx,(%rdi)
95	movq	%r8,8(%rdi)
96	movq	%r9,-16(%rdi,%rcx)
97	movq	%r10,-8(%rdi,%rcx)
98	\end
99	ret
100	ALIGN_TEXT
101100816:
102	cmpb	$8,%cl
103	jl	100408f
104	movq	(%rsi),%rdx
105	movq	-8(%rsi,%rcx),%r8
106	movq	%rdx,(%rdi)
107	movq	%r8,-8(%rdi,%rcx,)
108	\end
109	ret
110	ALIGN_TEXT
111100408:
112	cmpb	$4,%cl
113	jl	100204f
114	movl	(%rsi),%edx
115	movl	-4(%rsi,%rcx),%r8d
116	movl	%edx,(%rdi)
117	movl	%r8d,-4(%rdi,%rcx)
118	\end
119	ret
120	ALIGN_TEXT
121100204:
122	cmpb	$2,%cl
123	jl	100001f
124	movzwl	(%rsi),%edx
125	movzwl	-2(%rsi,%rcx),%r8d
126	movw	%dx,(%rdi)
127	movw	%r8w,-2(%rdi,%rcx)
128	\end
129	ret
130	ALIGN_TEXT
131100001:
132	cmpb	$1,%cl
133	jl	100000f
134	movb	(%rsi),%dl
135	movb	%dl,(%rdi)
136100000:
137	\end
138	ret
139
140	ALIGN_TEXT
1411256:
142	testb	$15,%dil
143	jnz	100f
144.if \erms == 1
145	rep
146	movsb
147.else
148	shrq	$3,%rcx                         /* copy by 64-bit words */
149	rep
150	movsq
151	movq	%rdx,%rcx
152	andl	$7,%ecx                         /* any bytes left? */
153	jne	100408b
154.endif
155	\end
156	ret
157100:
158	movq	(%rsi),%r8
159	movq	8(%rsi),%r9
160	movq	%rdi,%r10
161	movq	%rdi,%rcx
162	andq	$15,%rcx
163	leaq	-16(%rdx,%rcx),%rdx
164	neg	%rcx
165	leaq	16(%rdi,%rcx),%rdi
166	leaq	16(%rsi,%rcx),%rsi
167	movq	%rdx,%rcx
168.if \erms == 1
169	rep
170	movsb
171	movq	%r8,(%r10)
172	movq	%r9,8(%r10)
173.else
174	shrq	$3,%rcx                         /* copy by 64-bit words */
175	rep
176	movsq
177	movq	%r8,(%r10)
178	movq	%r9,8(%r10)
179	movq	%rdx,%rcx
180	andl	$7,%ecx                         /* any bytes left? */
181	jne	100408b
182.endif
183	\end
184	ret
185
186.if \overlap == 1
187	/*
188	 * Copy backwards.
189	 */
190        ALIGN_TEXT
1912:
192	cmpq	$256,%rcx
193	ja	2256f
194
195	leaq	-8(%rdi,%rcx),%rdi
196	leaq	-8(%rsi,%rcx),%rsi
197
198	cmpq	$32,%rcx
199	jb	2016f
200
2012032:
202	movq	(%rsi),%rdx
203	movq	%rdx,(%rdi)
204	movq	-8(%rsi),%rdx
205	movq	%rdx,-8(%rdi)
206	movq	-16(%rsi),%rdx
207	movq	%rdx,-16(%rdi)
208	movq	-24(%rsi),%rdx
209	movq	%rdx,-24(%rdi)
210	leaq	-32(%rsi),%rsi
211	leaq	-32(%rdi),%rdi
212	subq	$32,%rcx
213	cmpq	$32,%rcx
214	jae	2032b
215	cmpb	$0,%cl
216	jne	2016f
217	\end
218	ret
219	ALIGN_TEXT
2202016:
221	cmpb	$16,%cl
222	jl	2008f
223	movq	(%rsi),%rdx
224	movq	%rdx,(%rdi)
225	movq	-8(%rsi),%rdx
226	movq	%rdx,-8(%rdi)
227	subb	$16,%cl
228	jz	2000f
229	leaq	-16(%rsi),%rsi
230	leaq	-16(%rdi),%rdi
2312008:
232	cmpb	$8,%cl
233	jl	2004f
234	movq	(%rsi),%rdx
235	movq	%rdx,(%rdi)
236	subb	$8,%cl
237	jz	2000f
238	leaq	-8(%rsi),%rsi
239	leaq	-8(%rdi),%rdi
2402004:
241	cmpb	$4,%cl
242	jl	2002f
243	movl	4(%rsi),%edx
244	movl	%edx,4(%rdi)
245	subb	$4,%cl
246	jz	2000f
247	leaq	-4(%rsi),%rsi
248	leaq	-4(%rdi),%rdi
2492002:
250	cmpb	$2,%cl
251	jl	2001f
252	movw	6(%rsi),%dx
253	movw	%dx,6(%rdi)
254	subb	$2,%cl
255	jz	2000f
256	leaq	-2(%rsi),%rsi
257	leaq	-2(%rdi),%rdi
2582001:
259	cmpb	$1,%cl
260	jl	2000f
261	movb	7(%rsi),%dl
262	movb	%dl,7(%rdi)
2632000:
264	\end
265	ret
266	ALIGN_TEXT
2672256:
268	std
269.if \erms == 1
270	leaq	-1(%rdi,%rcx),%rdi
271	leaq	-1(%rsi,%rcx),%rsi
272	rep
273	movsb
274	cld
275.else
276	leaq	-8(%rdi,%rcx),%rdi
277	leaq	-8(%rsi,%rcx),%rsi
278	shrq	$3,%rcx
279	rep
280	movsq
281	cld
282	movq	%rdx,%rcx
283	andb	$7,%cl
284	jne	2004b
285.endif
286	\end
287	ret
288.endif
289.endm
290
291
292.macro MEMMOVE_BEGIN
293	movq	%rdi,%rax
294	movq	%rdx,%rcx
295.endm
296
297.macro MEMMOVE_END
298.endm
299
300#ifndef MEMCPY
301ENTRY(memmove)
302	MEMMOVE erms=0 overlap=1 begin=MEMMOVE_BEGIN end=MEMMOVE_END
303END(memmove)
304#else
305ENTRY(memcpy)
306	MEMMOVE erms=0 overlap=1 begin=MEMMOVE_BEGIN end=MEMMOVE_END
307END(memcpy)
308#endif
309