xref: /linux/arch/x86/lib/memmove_32.S (revision cf26e043c2a9213805d7ea9e8cf3e1d7166a62a4)
1/* SPDX-License-Identifier: GPL-2.0 */
2
3#include <linux/linkage.h>
4#include <asm/export.h>
5
6SYM_FUNC_START(memmove)
7/*
8 * void *memmove(void *dest_in, const void *src_in, size_t n)
9 * -mregparm=3 passes these in registers:
10 * dest_in: %eax
11 * src_in: %edx
12 * n: %ecx
13 * See also: arch/x86/entry/calling.h for description of the calling convention.
14 *
15 * n can remain in %ecx, but for `rep movsl`, we'll need dest in %edi and src
16 * in %esi.
17 */
18.set dest_in, %eax
19.set dest, %edi
20.set src_in, %edx
21.set src, %esi
22.set n, %ecx
23.set tmp0, %edx
24.set tmp0w, %dx
25.set tmp1, %ebx
26.set tmp1w, %bx
27.set tmp2, %eax
28.set tmp3b, %cl
29
30/*
31 * Save all callee-saved registers, because this function is going to clobber
32 * all of them:
33 */
34	pushl	%ebp
35	movl	%esp, %ebp	// set standard frame pointer
36
37	pushl	%ebx
38	pushl	%edi
39	pushl	%esi
40	pushl	%eax		// save 'dest_in' parameter [eax] as the return value
41
42	movl src_in, src
43	movl dest_in, dest
44
45	/* Handle more 16 bytes in loop */
46	cmpl	$0x10, n
47	jb	.Lmove_16B
48
49	/* Decide forward/backward copy mode */
50	cmpl	dest, src
51	jb	.Lbackwards_header
52
53	/*
54	 * movs instruction have many startup latency
55	 * so we handle small size by general register.
56	 */
57	cmpl	$680, n
58	jb	.Ltoo_small_forwards
59	/* movs instruction is only good for aligned case. */
60	movl	src, tmp0
61	xorl	dest, tmp0
62	andl	$0xff, tmp0
63	jz	.Lforward_movs
64.Ltoo_small_forwards:
65	subl	$0x10, n
66
67	/* We gobble 16 bytes forward in each loop. */
68.Lmove_16B_forwards_loop:
69	subl	$0x10, n
70	movl	0*4(src), tmp0
71	movl	1*4(src), tmp1
72	movl	tmp0, 0*4(dest)
73	movl	tmp1, 1*4(dest)
74	movl	2*4(src), tmp0
75	movl	3*4(src), tmp1
76	movl	tmp0, 2*4(dest)
77	movl	tmp1, 3*4(dest)
78	leal	0x10(src), src
79	leal	0x10(dest), dest
80	jae	.Lmove_16B_forwards_loop
81	addl	$0x10, n
82	jmp	.Lmove_16B
83
84	/* Handle data forward by movs. */
85.p2align 4
86.Lforward_movs:
87	movl	-4(src, n), tmp0
88	leal	-4(dest, n), tmp1
89	shrl	$2, n
90	rep	movsl
91	movl	tmp0, (tmp1)
92	jmp	.Ldone
93
94	/* Handle data backward by movs. */
95.p2align 4
96.Lbackwards_movs:
97	movl	(src), tmp0
98	movl	dest, tmp1
99	leal	-4(src, n), src
100	leal	-4(dest, n), dest
101	shrl	$2, n
102	std
103	rep	movsl
104	movl	tmp0,(tmp1)
105	cld
106	jmp	.Ldone
107
108	/* Start to prepare for backward copy. */
109.p2align 4
110.Lbackwards_header:
111	cmpl	$680, n
112	jb	.Ltoo_small_backwards
113	movl	src, tmp0
114	xorl	dest, tmp0
115	andl	$0xff, tmp0
116	jz	.Lbackwards_movs
117
118	/* Calculate copy position to tail. */
119.Ltoo_small_backwards:
120	addl	n, src
121	addl	n, dest
122	subl	$0x10, n
123
124	/* We gobble 16 bytes backward in each loop. */
125.Lmove_16B_backwards_loop:
126	subl	$0x10, n
127
128	movl	-1*4(src), tmp0
129	movl	-2*4(src), tmp1
130	movl	tmp0, -1*4(dest)
131	movl	tmp1, -2*4(dest)
132	movl	-3*4(src), tmp0
133	movl	-4*4(src), tmp1
134	movl	tmp0, -3*4(dest)
135	movl	tmp1, -4*4(dest)
136	leal	-0x10(src), src
137	leal	-0x10(dest), dest
138	jae	.Lmove_16B_backwards_loop
139	/* Calculate copy position to head. */
140	addl	$0x10, n
141	subl	n, src
142	subl	n, dest
143
144	/* Move data from 8 bytes to 15 bytes. */
145.p2align 4
146.Lmove_16B:
147	cmpl	$8, n
148	jb	.Lmove_8B
149	movl	0*4(src), tmp0
150	movl	1*4(src), tmp1
151	movl	-2*4(src, n), tmp2
152	movl	-1*4(src, n), src
153
154	movl	tmp0, 0*4(dest)
155	movl	tmp1, 1*4(dest)
156	movl	tmp2, -2*4(dest, n)
157	movl	src, -1*4(dest, n)
158	jmp	.Ldone
159
160	/* Move data from 4 bytes to 7 bytes. */
161.p2align 4
162.Lmove_8B:
163	cmpl	$4, n
164	jb	.Lmove_4B
165	movl	0*4(src), tmp0
166	movl	-1*4(src, n), tmp1
167	movl	tmp0, 0*4(dest)
168	movl	tmp1, -1*4(dest, n)
169	jmp	.Ldone
170
171	/* Move data from 2 bytes to 3 bytes. */
172.p2align 4
173.Lmove_4B:
174	cmpl	$2, n
175	jb	.Lmove_1B
176	movw	0*2(src), tmp0w
177	movw	-1*2(src, n), tmp1w
178	movw	tmp0w, 0*2(dest)
179	movw	tmp1w, -1*2(dest, n)
180	jmp	.Ldone
181
182	/* Move data for 1 byte. */
183.p2align 4
184.Lmove_1B:
185	cmpl	$1, n
186	jb	.Ldone
187	movb	(src), tmp3b
188	movb	tmp3b, (dest)
189.p2align 4
190.Ldone:
191	popl	dest_in	// restore 'dest_in' [eax] as the return value
192	/* Restore all callee-saved registers: */
193	popl	%esi
194	popl	%edi
195	popl	%ebx
196	popl	%ebp
197
198	RET
199SYM_FUNC_END(memmove)
200EXPORT_SYMBOL(memmove)
201