xref: /freebsd/lib/libc/amd64/string/memmove.S (revision bdafb02fcb88389fd1ab684cfe734cb429d35618)
1/*-
2 * Copyright (c) 2018 The FreeBSD Foundation
3 *
4 * This software was developed by Mateusz Guzik <mjg@FreeBSD.org>
5 * under sponsorship from the FreeBSD Foundation.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29#include <machine/asm.h>
30__FBSDID("$FreeBSD$");
31
32#define	ALIGN_TEXT	.p2align 4,0x90 /* 16-byte alignment, nop filled */
33
34/*
35 * memmove(dst, src, cnt)
36 *         rdi, rsi, rdx
37 * Contains parts of bcopy written by:
38 *  ws@tools.de     (Wolfgang Solfrank, TooLs GmbH) +49-228-985800
39 */
40
41/*
42 * Register state at entry is supposed to be as follows:
43 * rdi - destination
44 * rsi - source
45 * rdx - count
46 *
47 * The macro possibly clobbers the above and: rcx, r8.
48 * It does not clobber rax, r10 nor r11.
49 */
50.macro MEMMOVE erms overlap begin end
51	\begin
52.if \overlap == 1
53	movq	%rdi,%r8
54	subq	%rsi,%r8
55	cmpq	%rcx,%r8	/* overlapping && src < dst? */
56	jb	2f
57.endif
58
59	cmpq	$32,%rcx
60	jb	1016f
61
62	cmpq	$256,%rcx
63	ja	1256f
64
651032:
66	movq	(%rsi),%rdx
67	movq	%rdx,(%rdi)
68	movq	8(%rsi),%rdx
69	movq	%rdx,8(%rdi)
70	movq	16(%rsi),%rdx
71	movq	%rdx,16(%rdi)
72	movq	24(%rsi),%rdx
73	movq	%rdx,24(%rdi)
74	leaq	32(%rsi),%rsi
75	leaq	32(%rdi),%rdi
76	subq	$32,%rcx
77	cmpq	$32,%rcx
78	jae	1032b
79	cmpb	$0,%cl
80	jne	1016f
81	\end
82	ret
83	ALIGN_TEXT
841016:
85	cmpb	$16,%cl
86	jl	1008f
87	movq	(%rsi),%rdx
88	movq	%rdx,(%rdi)
89	movq	8(%rsi),%rdx
90	movq	%rdx,8(%rdi)
91	subb	$16,%cl
92	jz	1000f
93	leaq	16(%rsi),%rsi
94	leaq	16(%rdi),%rdi
951008:
96	cmpb	$8,%cl
97	jl	1004f
98	movq	(%rsi),%rdx
99	movq	%rdx,(%rdi)
100	subb	$8,%cl
101	jz	1000f
102	leaq	8(%rsi),%rsi
103	leaq	8(%rdi),%rdi
1041004:
105	cmpb	$4,%cl
106	jl	1002f
107	movl	(%rsi),%edx
108	movl	%edx,(%rdi)
109	subb	$4,%cl
110	jz	1000f
111	leaq	4(%rsi),%rsi
112	leaq	4(%rdi),%rdi
1131002:
114	cmpb	$2,%cl
115	jl	1001f
116	movw	(%rsi),%dx
117	movw	%dx,(%rdi)
118	subb	$2,%cl
119	jz	1000f
120	leaq	2(%rsi),%rsi
121	leaq	2(%rdi),%rdi
1221001:
123	cmpb	$1,%cl
124	jl	1000f
125	movb	(%rsi),%dl
126	movb	%dl,(%rdi)
1271000:
128	\end
129	ret
130
131	ALIGN_TEXT
1321256:
133.if \erms == 1
134	rep
135	movsb
136.else
137	shrq	$3,%rcx                         /* copy by 64-bit words */
138	rep
139	movsq
140	movq	%rdx,%rcx
141	andb	$7,%cl                         /* any bytes left? */
142	jne	1004b
143.endif
144	\end
145	ret
146
147.if \overlap == 1
148	/*
149	 * Copy backwards.
150	 */
151        ALIGN_TEXT
1522:
153	addq	%rcx,%rdi
154	addq	%rcx,%rsi
155
156	cmpq	$32,%rcx
157	jb	2016f
158
159	cmpq	$256,%rcx
160	ja	2256f
161
1622032:
163	movq	-8(%rsi),%rdx
164	movq	%rdx,-8(%rdi)
165	movq	-16(%rsi),%rdx
166	movq	%rdx,-16(%rdi)
167	movq	-24(%rsi),%rdx
168	movq	%rdx,-24(%rdi)
169	movq	-32(%rsi),%rdx
170	movq	%rdx,-32(%rdi)
171	leaq	-32(%rsi),%rsi
172	leaq	-32(%rdi),%rdi
173	subq	$32,%rcx
174	cmpq	$32,%rcx
175	jae	2032b
176	cmpb	$0,%cl
177	jne	2016f
178	\end
179	ret
180	ALIGN_TEXT
1812016:
182	cmpb	$16,%cl
183	jl	2008f
184	movq	-8(%rsi),%rdx
185	movq	%rdx,-8(%rdi)
186	movq	-16(%rsi),%rdx
187	movq	%rdx,-16(%rdi)
188	subb	$16,%cl
189	jz	2000f
190	leaq	-16(%rsi),%rsi
191	leaq	-16(%rdi),%rdi
1922008:
193	cmpb	$8,%cl
194	jl	2004f
195	movq	-8(%rsi),%rdx
196	movq	%rdx,-8(%rdi)
197	subb	$8,%cl
198	jz	2000f
199	leaq	-8(%rsi),%rsi
200	leaq	-8(%rdi),%rdi
2012004:
202	cmpb	$4,%cl
203	jl	2002f
204	movl	-4(%rsi),%edx
205	movl	%edx,-4(%rdi)
206	subb	$4,%cl
207	jz	2000f
208	leaq	-4(%rsi),%rsi
209	leaq	-4(%rdi),%rdi
2102002:
211	cmpb	$2,%cl
212	jl	2001f
213	movw	-2(%rsi),%dx
214	movw	%dx,-2(%rdi)
215	subb	$2,%cl
216	jz	2000f
217	leaq	-2(%rsi),%rsi
218	leaq	-2(%rdi),%rdi
2192001:
220	cmpb	$1,%cl
221	jl	2000f
222	movb	-1(%rsi),%dl
223	movb	%dl,-1(%rdi)
2242000:
225	\end
226	ret
227	ALIGN_TEXT
2282256:
229	decq	%rdi
230	decq	%rsi
231	std
232.if \erms == 1
233	rep
234	movsb
235.else
236	andq	$7,%rcx                         /* any fractional bytes? */
237	je	3f
238	rep
239	movsb
2403:
241	movq	%rdx,%rcx                       /* copy remainder by 32-bit words */
242	shrq	$3,%rcx
243	subq	$7,%rsi
244	subq	$7,%rdi
245	rep
246	movsq
247.endif
248	cld
249	\end
250	ret
251.endif
252.endm
253
254.macro MEMMOVE_BEGIN
255	movq	%rdi,%rax
256	movq	%rdx,%rcx
257.endm
258
259.macro MEMMOVE_END
260.endm
261
262#ifndef MEMCPY
263ENTRY(memmove)
264	MEMMOVE erms=0 overlap=1 begin=MEMMOVE_BEGIN end=MEMMOVE_END
265END(memmove)
266#else
267ENTRY(memcpy)
268	MEMMOVE erms=0 overlap=1 begin=MEMMOVE_BEGIN end=MEMMOVE_END
269END(memcpy)
270#endif
271