xref: /linux/arch/x86/lib/memmove_64.S (revision e9e8bcb8178e197d889ec31e79fa1ddc1732c8f9)
1/*
2 * Normally compiler builtins are used, but sometimes the compiler calls out
3 * of line code. Based on asm-i386/string.h.
4 *
5 * This assembly file is re-written from memmove_64.c file.
6 *	- Copyright 2011 Fenghua Yu <fenghua.yu@intel.com>
7 */
8#define _STRING_C
9#include <linux/linkage.h>
10#include <asm/dwarf2.h>
11#include <asm/cpufeature.h>
12
13#undef memmove
14
15/*
16 * Implement memmove(). This can handle overlap between src and dst.
17 *
18 * Input:
19 * rdi: dest
20 * rsi: src
21 * rdx: count
22 *
23 * Output:
24 * rax: dest
25 */
26ENTRY(memmove)
27	CFI_STARTPROC
28
29	/* Handle more 32bytes in loop */
30	mov %rdi, %rax
31	cmp $0x20, %rdx
32	jb	1f
33
34	/* Decide forward/backward copy mode */
35	cmp %rdi, %rsi
36	jge .Lmemmove_begin_forward
37	mov %rsi, %r8
38	add %rdx, %r8
39	cmp %rdi, %r8
40	jg 2f
41
42.Lmemmove_begin_forward:
43	/*
44	 * movsq instruction have many startup latency
45	 * so we handle small size by general register.
46	 */
47	cmp  $680, %rdx
48	jb	3f
49	/*
50	 * movsq instruction is only good for aligned case.
51	 */
52
53	cmpb %dil, %sil
54	je 4f
553:
56	sub $0x20, %rdx
57	/*
58	 * We gobble 32byts forward in each loop.
59	 */
605:
61	sub $0x20, %rdx
62	movq 0*8(%rsi), %r11
63	movq 1*8(%rsi), %r10
64	movq 2*8(%rsi), %r9
65	movq 3*8(%rsi), %r8
66	leaq 4*8(%rsi), %rsi
67
68	movq %r11, 0*8(%rdi)
69	movq %r10, 1*8(%rdi)
70	movq %r9, 2*8(%rdi)
71	movq %r8, 3*8(%rdi)
72	leaq 4*8(%rdi), %rdi
73	jae 5b
74	addq $0x20, %rdx
75	jmp 1f
76	/*
77	 * Handle data forward by movsq.
78	 */
79	.p2align 4
804:
81	movq %rdx, %rcx
82	movq -8(%rsi, %rdx), %r11
83	lea -8(%rdi, %rdx), %r10
84	shrq $3, %rcx
85	rep movsq
86	movq %r11, (%r10)
87	jmp 13f
88.Lmemmove_end_forward:
89
90	/*
91	 * Handle data backward by movsq.
92	 */
93	.p2align 4
947:
95	movq %rdx, %rcx
96	movq (%rsi), %r11
97	movq %rdi, %r10
98	leaq -8(%rsi, %rdx), %rsi
99	leaq -8(%rdi, %rdx), %rdi
100	shrq $3, %rcx
101	std
102	rep movsq
103	cld
104	movq %r11, (%r10)
105	jmp 13f
106
107	/*
108	 * Start to prepare for backward copy.
109	 */
110	.p2align 4
1112:
112	cmp $680, %rdx
113	jb 6f
114	cmp %dil, %sil
115	je 7b
1166:
117	/*
118	 * Calculate copy position to tail.
119	 */
120	addq %rdx, %rsi
121	addq %rdx, %rdi
122	subq $0x20, %rdx
123	/*
124	 * We gobble 32byts backward in each loop.
125	 */
1268:
127	subq $0x20, %rdx
128	movq -1*8(%rsi), %r11
129	movq -2*8(%rsi), %r10
130	movq -3*8(%rsi), %r9
131	movq -4*8(%rsi), %r8
132	leaq -4*8(%rsi), %rsi
133
134	movq %r11, -1*8(%rdi)
135	movq %r10, -2*8(%rdi)
136	movq %r9, -3*8(%rdi)
137	movq %r8, -4*8(%rdi)
138	leaq -4*8(%rdi), %rdi
139	jae 8b
140	/*
141	 * Calculate copy position to head.
142	 */
143	addq $0x20, %rdx
144	subq %rdx, %rsi
145	subq %rdx, %rdi
1461:
147	cmpq $16, %rdx
148	jb 9f
149	/*
150	 * Move data from 16 bytes to 31 bytes.
151	 */
152	movq 0*8(%rsi), %r11
153	movq 1*8(%rsi), %r10
154	movq -2*8(%rsi, %rdx), %r9
155	movq -1*8(%rsi, %rdx), %r8
156	movq %r11, 0*8(%rdi)
157	movq %r10, 1*8(%rdi)
158	movq %r9, -2*8(%rdi, %rdx)
159	movq %r8, -1*8(%rdi, %rdx)
160	jmp 13f
161	.p2align 4
1629:
163	cmpq $8, %rdx
164	jb 10f
165	/*
166	 * Move data from 8 bytes to 15 bytes.
167	 */
168	movq 0*8(%rsi), %r11
169	movq -1*8(%rsi, %rdx), %r10
170	movq %r11, 0*8(%rdi)
171	movq %r10, -1*8(%rdi, %rdx)
172	jmp 13f
17310:
174	cmpq $4, %rdx
175	jb 11f
176	/*
177	 * Move data from 4 bytes to 7 bytes.
178	 */
179	movl (%rsi), %r11d
180	movl -4(%rsi, %rdx), %r10d
181	movl %r11d, (%rdi)
182	movl %r10d, -4(%rdi, %rdx)
183	jmp 13f
18411:
185	cmp $2, %rdx
186	jb 12f
187	/*
188	 * Move data from 2 bytes to 3 bytes.
189	 */
190	movw (%rsi), %r11w
191	movw -2(%rsi, %rdx), %r10w
192	movw %r11w, (%rdi)
193	movw %r10w, -2(%rdi, %rdx)
194	jmp 13f
19512:
196	cmp $1, %rdx
197	jb 13f
198	/*
199	 * Move data for 1 byte.
200	 */
201	movb (%rsi), %r11b
202	movb %r11b, (%rdi)
20313:
204	retq
205	CFI_ENDPROC
206
207	.section .altinstr_replacement,"ax"
208.Lmemmove_begin_forward_efs:
209	/* Forward moving data. */
210	movq %rdx, %rcx
211	rep movsb
212	retq
213.Lmemmove_end_forward_efs:
214	.previous
215
216	.section .altinstructions,"a"
217	.align 8
218	.quad .Lmemmove_begin_forward
219	.quad .Lmemmove_begin_forward_efs
220	.word X86_FEATURE_ERMS
221	.byte .Lmemmove_end_forward-.Lmemmove_begin_forward
222	.byte .Lmemmove_end_forward_efs-.Lmemmove_begin_forward_efs
223	.previous
224ENDPROC(memmove)
225