xref: /freebsd/lib/libc/amd64/string/memmove.S (revision 22d7dd834bc5cd189810e414701e3ad1e98102e4)
1/*-
2 * Copyright (c) 2018 The FreeBSD Foundation
3 *
4 * This software was developed by Mateusz Guzik <mjg@FreeBSD.org>
5 * under sponsorship from the FreeBSD Foundation.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29#include <machine/asm.h>
30/*
31 * Note: this routine was written with kernel use in mind (read: no simd),
32 * it is only present in userspace as a temporary measure until something
33 * better gets imported.
34 */
35
36#define	ALIGN_TEXT	.p2align 4,0x90 /* 16-byte alignment, nop filled */
37
38/*
39 * memmove(dst, src, cnt)
40 *         rdi, rsi, rdx
41 */
42
43/*
44 * Register state at entry is supposed to be as follows:
45 * rdi - destination
46 * rsi - source
47 * rdx - count
48 *
49 * The macro possibly clobbers the above and: rcx, r8, r9, 10
50 * It does not clobber rax nor r11.
51 */
52.macro MEMMOVE erms overlap begin end
53	\begin
54
55	/*
56	 * For sizes 0..32 all data is read before it is written, so there
57	 * is no correctness issue with direction of copying.
58	 */
59	cmpq	$32,%rcx
60	jbe	101632f
61
62.if \overlap == 1
63	movq	%rdi,%r8
64	subq	%rsi,%r8
65	cmpq	%rcx,%r8	/* overlapping && src < dst? */
66	jb	2f
67.endif
68
69	cmpq	$256,%rcx
70	ja	1256f
71
72	ALIGN_TEXT
73103200:
74	movq	(%rsi),%rdx
75	movq	%rdx,(%rdi)
76	movq	8(%rsi),%rdx
77	movq	%rdx,8(%rdi)
78	movq	16(%rsi),%rdx
79	movq	%rdx,16(%rdi)
80	movq	24(%rsi),%rdx
81	movq	%rdx,24(%rdi)
82	leaq	32(%rsi),%rsi
83	leaq	32(%rdi),%rdi
84	subq	$32,%rcx
85	cmpq	$32,%rcx
86	jae	103200b
87	cmpb	$0,%cl
88	jne	101632f
89	\end
90	ret
91	ALIGN_TEXT
92101632:
93	cmpb	$16,%cl
94	jl	100816f
95	movq	(%rsi),%rdx
96	movq	8(%rsi),%r8
97	movq	-16(%rsi,%rcx),%r9
98	movq	-8(%rsi,%rcx),%r10
99	movq	%rdx,(%rdi)
100	movq	%r8,8(%rdi)
101	movq	%r9,-16(%rdi,%rcx)
102	movq	%r10,-8(%rdi,%rcx)
103	\end
104	ret
105	ALIGN_TEXT
106100816:
107	cmpb	$8,%cl
108	jl	100408f
109	movq	(%rsi),%rdx
110	movq	-8(%rsi,%rcx),%r8
111	movq	%rdx,(%rdi)
112	movq	%r8,-8(%rdi,%rcx,)
113	\end
114	ret
115	ALIGN_TEXT
116100408:
117	cmpb	$4,%cl
118	jl	100204f
119	movl	(%rsi),%edx
120	movl	-4(%rsi,%rcx),%r8d
121	movl	%edx,(%rdi)
122	movl	%r8d,-4(%rdi,%rcx)
123	\end
124	ret
125	ALIGN_TEXT
126100204:
127	cmpb	$2,%cl
128	jl	100001f
129	movzwl	(%rsi),%edx
130	movzwl	-2(%rsi,%rcx),%r8d
131	movw	%dx,(%rdi)
132	movw	%r8w,-2(%rdi,%rcx)
133	\end
134	ret
135	ALIGN_TEXT
136100001:
137	cmpb	$1,%cl
138	jl	100000f
139	movb	(%rsi),%dl
140	movb	%dl,(%rdi)
141100000:
142	\end
143	ret
144
145	ALIGN_TEXT
1461256:
147	testb	$15,%dil
148	jnz	100f
149.if \erms == 1
150	rep
151	movsb
152.else
153	shrq	$3,%rcx                         /* copy by 64-bit words */
154	rep
155	movsq
156	movq	%rdx,%rcx
157	andl	$7,%ecx                         /* any bytes left? */
158	jne	100408b
159.endif
160	\end
161	ret
162100:
163	movq	(%rsi),%r8
164	movq	8(%rsi),%r9
165	movq	%rdi,%r10
166	movq	%rdi,%rcx
167	andq	$15,%rcx
168	leaq	-16(%rdx,%rcx),%rdx
169	neg	%rcx
170	leaq	16(%rdi,%rcx),%rdi
171	leaq	16(%rsi,%rcx),%rsi
172	movq	%rdx,%rcx
173.if \erms == 1
174	rep
175	movsb
176	movq	%r8,(%r10)
177	movq	%r9,8(%r10)
178.else
179	shrq	$3,%rcx                         /* copy by 64-bit words */
180	rep
181	movsq
182	movq	%r8,(%r10)
183	movq	%r9,8(%r10)
184	movq	%rdx,%rcx
185	andl	$7,%ecx                         /* any bytes left? */
186	jne	100408b
187.endif
188	\end
189	ret
190
191.if \overlap == 1
192	/*
193	 * Copy backwards.
194	 */
195        ALIGN_TEXT
1962:
197	cmpq	$256,%rcx
198	ja	2256f
199
200	leaq	-8(%rdi,%rcx),%rdi
201	leaq	-8(%rsi,%rcx),%rsi
202
203	cmpq	$32,%rcx
204	jb	2016f
205
206	ALIGN_TEXT
2072032:
208	movq	(%rsi),%rdx
209	movq	%rdx,(%rdi)
210	movq	-8(%rsi),%rdx
211	movq	%rdx,-8(%rdi)
212	movq	-16(%rsi),%rdx
213	movq	%rdx,-16(%rdi)
214	movq	-24(%rsi),%rdx
215	movq	%rdx,-24(%rdi)
216	leaq	-32(%rsi),%rsi
217	leaq	-32(%rdi),%rdi
218	subq	$32,%rcx
219	cmpq	$32,%rcx
220	jae	2032b
221	cmpb	$0,%cl
222	jne	2016f
223	\end
224	ret
225	ALIGN_TEXT
2262016:
227	cmpb	$16,%cl
228	jl	2008f
229	movq	(%rsi),%rdx
230	movq	%rdx,(%rdi)
231	movq	-8(%rsi),%rdx
232	movq	%rdx,-8(%rdi)
233	subb	$16,%cl
234	jz	2000f
235	leaq	-16(%rsi),%rsi
236	leaq	-16(%rdi),%rdi
2372008:
238	cmpb	$8,%cl
239	jl	2004f
240	movq	(%rsi),%rdx
241	movq	%rdx,(%rdi)
242	subb	$8,%cl
243	jz	2000f
244	leaq	-8(%rsi),%rsi
245	leaq	-8(%rdi),%rdi
2462004:
247	cmpb	$4,%cl
248	jl	2002f
249	movl	4(%rsi),%edx
250	movl	%edx,4(%rdi)
251	subb	$4,%cl
252	jz	2000f
253	leaq	-4(%rsi),%rsi
254	leaq	-4(%rdi),%rdi
2552002:
256	cmpb	$2,%cl
257	jl	2001f
258	movw	6(%rsi),%dx
259	movw	%dx,6(%rdi)
260	subb	$2,%cl
261	jz	2000f
262	leaq	-2(%rsi),%rsi
263	leaq	-2(%rdi),%rdi
2642001:
265	cmpb	$1,%cl
266	jl	2000f
267	movb	7(%rsi),%dl
268	movb	%dl,7(%rdi)
2692000:
270	\end
271	ret
272	ALIGN_TEXT
2732256:
274	std
275	leaq	-8(%rdi,%rcx),%rdi
276	leaq	-8(%rsi,%rcx),%rsi
277	shrq	$3,%rcx
278	rep
279	movsq
280	cld
281	movq	%rdx,%rcx
282	andb	$7,%cl
283	jne	2004b
284	\end
285	ret
286.endif
287.endm
288
289
290.macro MEMMOVE_BEGIN
291	movq	%rdi,%rax
292	movq	%rdx,%rcx
293.endm
294
295.macro MEMMOVE_END
296.endm
297
298#ifndef MEMCPY
299ENTRY(memmove)
300	MEMMOVE erms=0 overlap=1 begin=MEMMOVE_BEGIN end=MEMMOVE_END
301END(memmove)
302#else
303ENTRY(memcpy)
304	MEMMOVE erms=0 overlap=1 begin=MEMMOVE_BEGIN end=MEMMOVE_END
305END(memcpy)
306#endif
307
308	.section .note.GNU-stack,"",%progbits
309