xref: /linux/arch/loongarch/lib/copy_user.S (revision 2697b79a469b68e3ad3640f55284359c1396278d)
1/* SPDX-License-Identifier: GPL-2.0 */
2/*
3 * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
4 */
5
6#include <linux/export.h>
7#include <asm/alternative-asm.h>
8#include <asm/asm.h>
9#include <asm/asmmacro.h>
10#include <asm/asm-extable.h>
11#include <asm/cpu.h>
12#include <asm/regdef.h>
13#include <asm/unwind_hints.h>
14
15SYM_FUNC_START(__copy_user)
16	/*
17	 * Some CPUs support hardware unaligned access
18	 */
19	ALTERNATIVE	"b __copy_user_generic",	\
20			"b __copy_user_fast", CPU_FEATURE_UAL
21SYM_FUNC_END(__copy_user)
22
23EXPORT_SYMBOL(__copy_user)
24
25/*
26 * unsigned long __copy_user_generic(void *to, const void *from, size_t n)
27 *
28 * a0: to
29 * a1: from
30 * a2: n
31 */
32SYM_FUNC_START(__copy_user_generic)
33	beqz	a2, 3f
34
351:	ld.b	t0, a1, 0
362:	st.b	t0, a0, 0
37	addi.d	a0, a0, 1
38	addi.d	a1, a1, 1
39	addi.d	a2, a2, -1
40	bgtz	a2, 1b
41
423:	move	a0, a2
43	jr	ra
44
45	_asm_extable 1b, 3b
46	_asm_extable 2b, 3b
47SYM_FUNC_END(__copy_user_generic)
48
49/*
50 * unsigned long __copy_user_fast(void *to, const void *from, unsigned long n)
51 *
52 * a0: to
53 * a1: from
54 * a2: n
55 */
56SYM_FUNC_START(__copy_user_fast)
57	sltui	t0, a2, 9
58	bnez	t0, .Lsmall
59
600:	ld.d	t0, a1, 0
611:	st.d	t0, a0, 0
62	add.d	a3, a1, a2
63	add.d	a2, a0, a2
64
65	/* align up destination address */
66	andi	t1, a0, 7
67	sub.d	t0, zero, t1
68	addi.d	t0, t0, 8
69	add.d	a1, a1, t0
70	add.d	a0, a0, t0
71
72	addi.d	a4, a3, -64
73	bgeu	a1, a4, .Llt64
74
75	/* copy 64 bytes at a time */
76.Lloop64:
772:	ld.d	t0, a1, 0
783:	ld.d	t1, a1, 8
794:	ld.d	t2, a1, 16
805:	ld.d	t3, a1, 24
816:	ld.d	t4, a1, 32
827:	ld.d	t5, a1, 40
838:	ld.d	t6, a1, 48
849:	ld.d	t7, a1, 56
8510:	st.d	t0, a0, 0
8611:	st.d	t1, a0, 8
8712:	st.d	t2, a0, 16
8813:	st.d	t3, a0, 24
8914:	st.d	t4, a0, 32
9015:	st.d	t5, a0, 40
9116:	st.d	t6, a0, 48
9217:	st.d	t7, a0, 56
93	addi.d	a1, a1, 64
94	addi.d	a0, a0, 64
95	bltu	a1, a4, .Lloop64
96
97	/* copy the remaining bytes */
98.Llt64:
99	addi.d	a4, a3, -32
100	bgeu	a1, a4, .Llt32
10118:	ld.d	t0, a1, 0
10219:	ld.d	t1, a1, 8
10320:	ld.d	t2, a1, 16
10421:	ld.d	t3, a1, 24
10522:	st.d	t0, a0, 0
10623:	st.d	t1, a0, 8
10724:	st.d	t2, a0, 16
10825:	st.d	t3, a0, 24
109	addi.d	a1, a1, 32
110	addi.d	a0, a0, 32
111
112.Llt32:
113	addi.d	a4, a3, -16
114	bgeu	a1, a4, .Llt16
11526:	ld.d	t0, a1, 0
11627:	ld.d	t1, a1, 8
11728:	st.d	t0, a0, 0
11829:	st.d	t1, a0, 8
119	addi.d	a1, a1, 16
120	addi.d	a0, a0, 16
121
122.Llt16:
123	addi.d	a4, a3, -8
124	bgeu	a1, a4, .Llt8
12530:	ld.d	t0, a1, 0
12631:	st.d	t0, a0, 0
127	addi.d	a1, a1, 8
128	addi.d	a0, a0, 8
129
130.Llt8:
13132:	ld.d	t0, a3, -8
13233:	st.d	t0, a2, -8
133
134	/* return */
135	move	a0, zero
136	jr	ra
137
138	.align	5
139.Lsmall:
140	pcaddi	t0, 8
141	slli.d	a3, a2, 5
142	add.d	t0, t0, a3
143	jr	t0
144
145	.align	5
146	move	a0, zero
147	jr	ra
148
149	.align	5
15034:	ld.b	t0, a1, 0
15135:	st.b	t0, a0, 0
152	move	a0, zero
153	jr	ra
154
155	.align	5
15636:	ld.h	t0, a1, 0
15737:	st.h	t0, a0, 0
158	move	a0, zero
159	jr	ra
160
161	.align	5
16238:	ld.h	t0, a1, 0
16339:	ld.b	t1, a1, 2
16440:	st.h	t0, a0, 0
16541:	st.b	t1, a0, 2
166	move	a0, zero
167	jr	ra
168
169	.align	5
17042:	ld.w	t0, a1, 0
17143:	st.w	t0, a0, 0
172	move	a0, zero
173	jr	ra
174
175	.align	5
17644:	ld.w	t0, a1, 0
17745:	ld.b	t1, a1, 4
17846:	st.w	t0, a0, 0
17947:	st.b	t1, a0, 4
180	move	a0, zero
181	jr	ra
182
183	.align	5
18448:	ld.w	t0, a1, 0
18549:	ld.h	t1, a1, 4
18650:	st.w	t0, a0, 0
18751:	st.h	t1, a0, 4
188	move	a0, zero
189	jr	ra
190
191	.align	5
19252:	ld.w	t0, a1, 0
19353:	ld.w	t1, a1, 3
19454:	st.w	t0, a0, 0
19555:	st.w	t1, a0, 3
196	move	a0, zero
197	jr	ra
198
199	.align	5
20056:	ld.d	t0, a1, 0
20157:	st.d	t0, a0, 0
202	move	a0, zero
203	jr	ra
204
205	/* fixup and ex_table */
206.Llarge_fixup:
207	sub.d	a2, a2, a0
208
209.Lsmall_fixup:
21058:	ld.b	t0, a1, 0
21159:	st.b	t0, a0, 0
212	addi.d	a0, a0, 1
213	addi.d	a1, a1, 1
214	addi.d	a2, a2, -1
215	bgt	a2, zero, 58b
216
217.Lexit:
218	move	a0, a2
219	jr	ra
220
221	_asm_extable 0b, .Lsmall_fixup
222	_asm_extable 1b, .Lsmall_fixup
223	_asm_extable 2b, .Llarge_fixup
224	_asm_extable 3b, .Llarge_fixup
225	_asm_extable 4b, .Llarge_fixup
226	_asm_extable 5b, .Llarge_fixup
227	_asm_extable 6b, .Llarge_fixup
228	_asm_extable 7b, .Llarge_fixup
229	_asm_extable 8b, .Llarge_fixup
230	_asm_extable 9b, .Llarge_fixup
231	_asm_extable 10b, .Llarge_fixup
232	_asm_extable 11b, .Llarge_fixup
233	_asm_extable 12b, .Llarge_fixup
234	_asm_extable 13b, .Llarge_fixup
235	_asm_extable 14b, .Llarge_fixup
236	_asm_extable 15b, .Llarge_fixup
237	_asm_extable 16b, .Llarge_fixup
238	_asm_extable 17b, .Llarge_fixup
239	_asm_extable 18b, .Llarge_fixup
240	_asm_extable 19b, .Llarge_fixup
241	_asm_extable 20b, .Llarge_fixup
242	_asm_extable 21b, .Llarge_fixup
243	_asm_extable 22b, .Llarge_fixup
244	_asm_extable 23b, .Llarge_fixup
245	_asm_extable 24b, .Llarge_fixup
246	_asm_extable 25b, .Llarge_fixup
247	_asm_extable 26b, .Llarge_fixup
248	_asm_extable 27b, .Llarge_fixup
249	_asm_extable 28b, .Llarge_fixup
250	_asm_extable 29b, .Llarge_fixup
251	_asm_extable 30b, .Llarge_fixup
252	_asm_extable 31b, .Llarge_fixup
253	_asm_extable 32b, .Llarge_fixup
254	_asm_extable 33b, .Llarge_fixup
255	_asm_extable 34b, .Lexit
256	_asm_extable 35b, .Lexit
257	_asm_extable 36b, .Lsmall_fixup
258	_asm_extable 37b, .Lsmall_fixup
259	_asm_extable 38b, .Lsmall_fixup
260	_asm_extable 39b, .Lsmall_fixup
261	_asm_extable 40b, .Lsmall_fixup
262	_asm_extable 41b, .Lsmall_fixup
263	_asm_extable 42b, .Lsmall_fixup
264	_asm_extable 43b, .Lsmall_fixup
265	_asm_extable 44b, .Lsmall_fixup
266	_asm_extable 45b, .Lsmall_fixup
267	_asm_extable 46b, .Lsmall_fixup
268	_asm_extable 47b, .Lsmall_fixup
269	_asm_extable 48b, .Lsmall_fixup
270	_asm_extable 49b, .Lsmall_fixup
271	_asm_extable 50b, .Lsmall_fixup
272	_asm_extable 51b, .Lsmall_fixup
273	_asm_extable 52b, .Lsmall_fixup
274	_asm_extable 53b, .Lsmall_fixup
275	_asm_extable 54b, .Lsmall_fixup
276	_asm_extable 55b, .Lsmall_fixup
277	_asm_extable 56b, .Lsmall_fixup
278	_asm_extable 57b, .Lsmall_fixup
279	_asm_extable 58b, .Lexit
280	_asm_extable 59b, .Lexit
281SYM_FUNC_END(__copy_user_fast)
282
283STACK_FRAME_NON_STANDARD __copy_user_fast
284