xref: /freebsd/lib/libc/amd64/string/memmove.S (revision 77ebcc05eac2658a68b447e654cfdf7ff3e703b8)
1/*-
2 * Copyright (c) 2018 The FreeBSD Foundation
3 *
4 * This software was developed by Mateusz Guzik <mjg@FreeBSD.org>
5 * under sponsorship from the FreeBSD Foundation.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29#include <machine/asm.h>
30__FBSDID("$FreeBSD$");
31
32#define	ALIGN_TEXT	.p2align 4,0x90 /* 16-byte alignment, nop filled */
33
34/*
35 * memmove(dst, src, cnt)
36 *         rdi, rsi, rdx
37 */
38
39/*
40 * Register state at entry is supposed to be as follows:
41 * rdi - destination
42 * rsi - source
43 * rdx - count
44 *
45 * The macro possibly clobbers the above and: rcx, r8.
46 * It does not clobber rax, r10 nor r11.
47 */
48.macro MEMMOVE erms overlap begin end
49	\begin
50.if \overlap == 1
51	movq	%rdi,%r8
52	subq	%rsi,%r8
53	cmpq	%rcx,%r8	/* overlapping && src < dst? */
54	jb	2f
55.endif
56
57	cmpq	$32,%rcx
58	jb	1016f
59
60	cmpq	$256,%rcx
61	ja	1256f
62
631032:
64	movq	(%rsi),%rdx
65	movq	%rdx,(%rdi)
66	movq	8(%rsi),%rdx
67	movq	%rdx,8(%rdi)
68	movq	16(%rsi),%rdx
69	movq	%rdx,16(%rdi)
70	movq	24(%rsi),%rdx
71	movq	%rdx,24(%rdi)
72	leaq	32(%rsi),%rsi
73	leaq	32(%rdi),%rdi
74	subq	$32,%rcx
75	cmpq	$32,%rcx
76	jae	1032b
77	cmpb	$0,%cl
78	jne	1016f
79	\end
80	ret
81	ALIGN_TEXT
821016:
83	cmpb	$16,%cl
84	jl	1008f
85	movq	(%rsi),%rdx
86	movq	%rdx,(%rdi)
87	movq	8(%rsi),%rdx
88	movq	%rdx,8(%rdi)
89	subb	$16,%cl
90	jz	1000f
91	leaq	16(%rsi),%rsi
92	leaq	16(%rdi),%rdi
931008:
94	cmpb	$8,%cl
95	jl	1004f
96	movq	(%rsi),%rdx
97	movq	%rdx,(%rdi)
98	subb	$8,%cl
99	jz	1000f
100	leaq	8(%rsi),%rsi
101	leaq	8(%rdi),%rdi
1021004:
103	cmpb	$4,%cl
104	jl	1002f
105	movl	(%rsi),%edx
106	movl	%edx,(%rdi)
107	subb	$4,%cl
108	jz	1000f
109	leaq	4(%rsi),%rsi
110	leaq	4(%rdi),%rdi
1111002:
112	cmpb	$2,%cl
113	jl	1001f
114	movw	(%rsi),%dx
115	movw	%dx,(%rdi)
116	subb	$2,%cl
117	jz	1000f
118	leaq	2(%rsi),%rsi
119	leaq	2(%rdi),%rdi
1201001:
121	cmpb	$1,%cl
122	jl	1000f
123	movb	(%rsi),%dl
124	movb	%dl,(%rdi)
1251000:
126	\end
127	ret
128
129	ALIGN_TEXT
1301256:
131.if \erms == 1
132	rep
133	movsb
134.else
135	shrq	$3,%rcx                         /* copy by 64-bit words */
136	rep
137	movsq
138	movq	%rdx,%rcx
139	andb	$7,%cl                         /* any bytes left? */
140	jne	1004b
141.endif
142	\end
143	ret
144
145.if \overlap == 1
146	/*
147	 * Copy backwards.
148	 */
149        ALIGN_TEXT
1502:
151	cmpq	$256,%rcx
152	ja	2256f
153
154	leaq	-8(%rdi,%rcx),%rdi
155	leaq	-8(%rsi,%rcx),%rsi
156
157	cmpq	$32,%rcx
158	jb	2016f
159
1602032:
161	movq	(%rsi),%rdx
162	movq	%rdx,(%rdi)
163	movq	-8(%rsi),%rdx
164	movq	%rdx,-8(%rdi)
165	movq	-16(%rsi),%rdx
166	movq	%rdx,-16(%rdi)
167	movq	-24(%rsi),%rdx
168	movq	%rdx,-24(%rdi)
169	leaq	-32(%rsi),%rsi
170	leaq	-32(%rdi),%rdi
171	subq	$32,%rcx
172	cmpq	$32,%rcx
173	jae	2032b
174	cmpb	$0,%cl
175	jne	2016f
176	\end
177	ret
178	ALIGN_TEXT
1792016:
180	cmpb	$16,%cl
181	jl	2008f
182	movq	(%rsi),%rdx
183	movq	%rdx,(%rdi)
184	movq	-8(%rsi),%rdx
185	movq	%rdx,-8(%rdi)
186	subb	$16,%cl
187	jz	2000f
188	leaq	-16(%rsi),%rsi
189	leaq	-16(%rdi),%rdi
1902008:
191	cmpb	$8,%cl
192	jl	2004f
193	movq	(%rsi),%rdx
194	movq	%rdx,(%rdi)
195	subb	$8,%cl
196	jz	2000f
197	leaq	-8(%rsi),%rsi
198	leaq	-8(%rdi),%rdi
1992004:
200	cmpb	$4,%cl
201	jl	2002f
202	movl	4(%rsi),%edx
203	movl	%edx,4(%rdi)
204	subb	$4,%cl
205	jz	2000f
206	leaq	-4(%rsi),%rsi
207	leaq	-4(%rdi),%rdi
2082002:
209	cmpb	$2,%cl
210	jl	2001f
211	movw	6(%rsi),%dx
212	movw	%dx,6(%rdi)
213	subb	$2,%cl
214	jz	2000f
215	leaq	-2(%rsi),%rsi
216	leaq	-2(%rdi),%rdi
2172001:
218	cmpb	$1,%cl
219	jl	2000f
220	movb	7(%rsi),%dl
221	movb	%dl,7(%rdi)
2222000:
223	\end
224	ret
225	ALIGN_TEXT
2262256:
227	std
228.if \erms == 1
229	leaq	-1(%rdi,%rcx),%rdi
230	leaq	-1(%rsi,%rcx),%rsi
231	rep
232	movsb
233	cld
234.else
235	leaq	-8(%rdi,%rcx),%rdi
236	leaq	-8(%rsi,%rcx),%rsi
237	shrq	$3,%rcx
238	rep
239	movsq
240	cld
241	movq	%rdx,%rcx
242	andb	$7,%cl
243	jne	2004b
244.endif
245	\end
246	ret
247.endif
248.endm
249
250.macro MEMMOVE_BEGIN
251	movq	%rdi,%rax
252	movq	%rdx,%rcx
253.endm
254
255.macro MEMMOVE_END
256.endm
257
258#ifndef MEMCPY
259ENTRY(memmove)
260	MEMMOVE erms=0 overlap=1 begin=MEMMOVE_BEGIN end=MEMMOVE_END
261END(memmove)
262#else
263ENTRY(memcpy)
264	MEMMOVE erms=0 overlap=1 begin=MEMMOVE_BEGIN end=MEMMOVE_END
265END(memcpy)
266#endif
267