xref: /freebsd/lib/libc/amd64/string/memmove.S (revision 69c5fa5cd1ec9b09ed88a086607a8a0993818db9)
1/*-
2 * Copyright (c) 2018 The FreeBSD Foundation
3 *
4 * This software was developed by Mateusz Guzik <mjg@FreeBSD.org>
5 * under sponsorship from the FreeBSD Foundation.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29#include <machine/asm.h>
30__FBSDID("$FreeBSD$");
31
32/*
33 * Note: this routine was written with kernel use in mind (read: no simd),
34 * it is only present in userspace as a temporary measure until something
35 * better gets imported.
36 */
37
38#define	ALIGN_TEXT	.p2align 4,0x90 /* 16-byte alignment, nop filled */
39
40/*
41 * memmove(dst, src, cnt)
42 *         rdi, rsi, rdx
43 */
44
45/*
46 * Register state at entry is supposed to be as follows:
47 * rdi - destination
48 * rsi - source
49 * rdx - count
50 *
51 * The macro possibly clobbers the above and: rcx, r8, r9, 10
52 * It does not clobber rax nor r11.
53 */
54.macro MEMMOVE erms overlap begin end
55	\begin
56
57	/*
58	 * For sizes 0..32 all data is read before it is written, so there
59	 * is no correctness issue with direction of copying.
60	 */
61	cmpq	$32,%rcx
62	jbe	101632f
63
64.if \overlap == 1
65	movq	%rdi,%r8
66	subq	%rsi,%r8
67	cmpq	%rcx,%r8	/* overlapping && src < dst? */
68	jb	2f
69.endif
70
71	cmpq	$256,%rcx
72	ja	1256f
73
74	ALIGN_TEXT
75103200:
76	movq	(%rsi),%rdx
77	movq	%rdx,(%rdi)
78	movq	8(%rsi),%rdx
79	movq	%rdx,8(%rdi)
80	movq	16(%rsi),%rdx
81	movq	%rdx,16(%rdi)
82	movq	24(%rsi),%rdx
83	movq	%rdx,24(%rdi)
84	leaq	32(%rsi),%rsi
85	leaq	32(%rdi),%rdi
86	subq	$32,%rcx
87	cmpq	$32,%rcx
88	jae	103200b
89	cmpb	$0,%cl
90	jne	101632f
91	\end
92	ret
93	ALIGN_TEXT
94101632:
95	cmpb	$16,%cl
96	jl	100816f
97	movq	(%rsi),%rdx
98	movq	8(%rsi),%r8
99	movq	-16(%rsi,%rcx),%r9
100	movq	-8(%rsi,%rcx),%r10
101	movq	%rdx,(%rdi)
102	movq	%r8,8(%rdi)
103	movq	%r9,-16(%rdi,%rcx)
104	movq	%r10,-8(%rdi,%rcx)
105	\end
106	ret
107	ALIGN_TEXT
108100816:
109	cmpb	$8,%cl
110	jl	100408f
111	movq	(%rsi),%rdx
112	movq	-8(%rsi,%rcx),%r8
113	movq	%rdx,(%rdi)
114	movq	%r8,-8(%rdi,%rcx,)
115	\end
116	ret
117	ALIGN_TEXT
118100408:
119	cmpb	$4,%cl
120	jl	100204f
121	movl	(%rsi),%edx
122	movl	-4(%rsi,%rcx),%r8d
123	movl	%edx,(%rdi)
124	movl	%r8d,-4(%rdi,%rcx)
125	\end
126	ret
127	ALIGN_TEXT
128100204:
129	cmpb	$2,%cl
130	jl	100001f
131	movzwl	(%rsi),%edx
132	movzwl	-2(%rsi,%rcx),%r8d
133	movw	%dx,(%rdi)
134	movw	%r8w,-2(%rdi,%rcx)
135	\end
136	ret
137	ALIGN_TEXT
138100001:
139	cmpb	$1,%cl
140	jl	100000f
141	movb	(%rsi),%dl
142	movb	%dl,(%rdi)
143100000:
144	\end
145	ret
146
147	ALIGN_TEXT
1481256:
149	testb	$15,%dil
150	jnz	100f
151.if \erms == 1
152	rep
153	movsb
154.else
155	shrq	$3,%rcx                         /* copy by 64-bit words */
156	rep
157	movsq
158	movq	%rdx,%rcx
159	andl	$7,%ecx                         /* any bytes left? */
160	jne	100408b
161.endif
162	\end
163	ret
164100:
165	movq	(%rsi),%r8
166	movq	8(%rsi),%r9
167	movq	%rdi,%r10
168	movq	%rdi,%rcx
169	andq	$15,%rcx
170	leaq	-16(%rdx,%rcx),%rdx
171	neg	%rcx
172	leaq	16(%rdi,%rcx),%rdi
173	leaq	16(%rsi,%rcx),%rsi
174	movq	%rdx,%rcx
175.if \erms == 1
176	rep
177	movsb
178	movq	%r8,(%r10)
179	movq	%r9,8(%r10)
180.else
181	shrq	$3,%rcx                         /* copy by 64-bit words */
182	rep
183	movsq
184	movq	%r8,(%r10)
185	movq	%r9,8(%r10)
186	movq	%rdx,%rcx
187	andl	$7,%ecx                         /* any bytes left? */
188	jne	100408b
189.endif
190	\end
191	ret
192
193.if \overlap == 1
194	/*
195	 * Copy backwards.
196	 */
197        ALIGN_TEXT
1982:
199	cmpq	$256,%rcx
200	ja	2256f
201
202	leaq	-8(%rdi,%rcx),%rdi
203	leaq	-8(%rsi,%rcx),%rsi
204
205	cmpq	$32,%rcx
206	jb	2016f
207
208	ALIGN_TEXT
2092032:
210	movq	(%rsi),%rdx
211	movq	%rdx,(%rdi)
212	movq	-8(%rsi),%rdx
213	movq	%rdx,-8(%rdi)
214	movq	-16(%rsi),%rdx
215	movq	%rdx,-16(%rdi)
216	movq	-24(%rsi),%rdx
217	movq	%rdx,-24(%rdi)
218	leaq	-32(%rsi),%rsi
219	leaq	-32(%rdi),%rdi
220	subq	$32,%rcx
221	cmpq	$32,%rcx
222	jae	2032b
223	cmpb	$0,%cl
224	jne	2016f
225	\end
226	ret
227	ALIGN_TEXT
2282016:
229	cmpb	$16,%cl
230	jl	2008f
231	movq	(%rsi),%rdx
232	movq	%rdx,(%rdi)
233	movq	-8(%rsi),%rdx
234	movq	%rdx,-8(%rdi)
235	subb	$16,%cl
236	jz	2000f
237	leaq	-16(%rsi),%rsi
238	leaq	-16(%rdi),%rdi
2392008:
240	cmpb	$8,%cl
241	jl	2004f
242	movq	(%rsi),%rdx
243	movq	%rdx,(%rdi)
244	subb	$8,%cl
245	jz	2000f
246	leaq	-8(%rsi),%rsi
247	leaq	-8(%rdi),%rdi
2482004:
249	cmpb	$4,%cl
250	jl	2002f
251	movl	4(%rsi),%edx
252	movl	%edx,4(%rdi)
253	subb	$4,%cl
254	jz	2000f
255	leaq	-4(%rsi),%rsi
256	leaq	-4(%rdi),%rdi
2572002:
258	cmpb	$2,%cl
259	jl	2001f
260	movw	6(%rsi),%dx
261	movw	%dx,6(%rdi)
262	subb	$2,%cl
263	jz	2000f
264	leaq	-2(%rsi),%rsi
265	leaq	-2(%rdi),%rdi
2662001:
267	cmpb	$1,%cl
268	jl	2000f
269	movb	7(%rsi),%dl
270	movb	%dl,7(%rdi)
2712000:
272	\end
273	ret
274	ALIGN_TEXT
2752256:
276	std
277.if \erms == 1
278	leaq	-1(%rdi,%rcx),%rdi
279	leaq	-1(%rsi,%rcx),%rsi
280	rep
281	movsb
282	cld
283.else
284	leaq	-8(%rdi,%rcx),%rdi
285	leaq	-8(%rsi,%rcx),%rsi
286	shrq	$3,%rcx
287	rep
288	movsq
289	cld
290	movq	%rdx,%rcx
291	andb	$7,%cl
292	jne	2004b
293.endif
294	\end
295	ret
296.endif
297.endm
298
299
300.macro MEMMOVE_BEGIN
301	movq	%rdi,%rax
302	movq	%rdx,%rcx
303.endm
304
305.macro MEMMOVE_END
306.endm
307
308#ifndef MEMCPY
309ENTRY(memmove)
310	MEMMOVE erms=0 overlap=1 begin=MEMMOVE_BEGIN end=MEMMOVE_END
311END(memmove)
312#else
313ENTRY(memcpy)
314	MEMMOVE erms=0 overlap=1 begin=MEMMOVE_BEGIN end=MEMMOVE_END
315END(memcpy)
316#endif
317