xref: /linux/arch/loongarch/lib/copy_user.S (revision c31f4aa8fed048fa70e742c4bb49bb48dc489ab3)
1/* SPDX-License-Identifier: GPL-2.0 */
2/*
3 * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
4 */
5
6#include <linux/export.h>
7#include <asm/alternative-asm.h>
8#include <asm/asm.h>
9#include <asm/asmmacro.h>
10#include <asm/asm-extable.h>
11#include <asm/cpu.h>
12#include <asm/regdef.h>
13#include <asm/unwind_hints.h>
14
15SYM_FUNC_START(__copy_user)
16#ifdef CONFIG_32BIT
17	b		__copy_user_generic
18#else
19	/*
20	 * Some CPUs support hardware unaligned access
21	 */
22	ALTERNATIVE	"b __copy_user_generic",	\
23			"b __copy_user_fast", CPU_FEATURE_UAL
24#endif
25SYM_FUNC_END(__copy_user)
26
27EXPORT_SYMBOL(__copy_user)
28
29/*
30 * unsigned long __copy_user_generic(void *to, const void *from, size_t n)
31 *
32 * a0: to
33 * a1: from
34 * a2: n
35 */
36SYM_FUNC_START(__copy_user_generic)
37	beqz		a2, 3f
38
391:	ld.b		t0, a1, 0
402:	st.b		t0, a0, 0
41	PTR_ADDI	a0, a0, 1
42	PTR_ADDI	a1, a1, 1
43	PTR_ADDI	a2, a2, -1
44	bgtz		a2, 1b
45
463:	move		a0, a2
47	jr		ra
48
49	_asm_extable	1b, 3b
50	_asm_extable	2b, 3b
51SYM_FUNC_END(__copy_user_generic)
52
53#ifdef CONFIG_64BIT
54/*
55 * unsigned long __copy_user_fast(void *to, const void *from, unsigned long n)
56 *
57 * a0: to
58 * a1: from
59 * a2: n
60 */
61SYM_FUNC_START(__copy_user_fast)
62	sltui	t0, a2, 9
63	bnez	t0, .Lsmall
64
650:	ld.d	t0, a1, 0
661:	st.d	t0, a0, 0
67	add.d	a3, a1, a2
68	add.d	a2, a0, a2
69
70	/* align up destination address */
71	andi	t1, a0, 7
72	sub.d	t0, zero, t1
73	addi.d	t0, t0, 8
74	add.d	a1, a1, t0
75	add.d	a0, a0, t0
76
77	addi.d	a4, a3, -64
78	bgeu	a1, a4, .Llt64
79
80	/* copy 64 bytes at a time */
81.Lloop64:
822:	ld.d	t0, a1, 0
833:	ld.d	t1, a1, 8
844:	ld.d	t2, a1, 16
855:	ld.d	t3, a1, 24
866:	ld.d	t4, a1, 32
877:	ld.d	t5, a1, 40
888:	ld.d	t6, a1, 48
899:	ld.d	t7, a1, 56
9010:	st.d	t0, a0, 0
9111:	st.d	t1, a0, 8
9212:	st.d	t2, a0, 16
9313:	st.d	t3, a0, 24
9414:	st.d	t4, a0, 32
9515:	st.d	t5, a0, 40
9616:	st.d	t6, a0, 48
9717:	st.d	t7, a0, 56
98	addi.d	a1, a1, 64
99	addi.d	a0, a0, 64
100	bltu	a1, a4, .Lloop64
101
102	/* copy the remaining bytes */
103.Llt64:
104	addi.d	a4, a3, -32
105	bgeu	a1, a4, .Llt32
10618:	ld.d	t0, a1, 0
10719:	ld.d	t1, a1, 8
10820:	ld.d	t2, a1, 16
10921:	ld.d	t3, a1, 24
11022:	st.d	t0, a0, 0
11123:	st.d	t1, a0, 8
11224:	st.d	t2, a0, 16
11325:	st.d	t3, a0, 24
114	addi.d	a1, a1, 32
115	addi.d	a0, a0, 32
116
117.Llt32:
118	addi.d	a4, a3, -16
119	bgeu	a1, a4, .Llt16
12026:	ld.d	t0, a1, 0
12127:	ld.d	t1, a1, 8
12228:	st.d	t0, a0, 0
12329:	st.d	t1, a0, 8
124	addi.d	a1, a1, 16
125	addi.d	a0, a0, 16
126
127.Llt16:
128	addi.d	a4, a3, -8
129	bgeu	a1, a4, .Llt8
13030:	ld.d	t0, a1, 0
13131:	st.d	t0, a0, 0
132	addi.d	a1, a1, 8
133	addi.d	a0, a0, 8
134
135.Llt8:
13632:	ld.d	t0, a3, -8
13733:	st.d	t0, a2, -8
138
139	/* return */
140	move	a0, zero
141	jr	ra
142
143	.align	5
144.Lsmall:
145	pcaddi	t0, 8
146	slli.d	a3, a2, 5
147	add.d	t0, t0, a3
148	jr	t0
149
150	.align	5
151	move	a0, zero
152	jr	ra
153
154	.align	5
15534:	ld.b	t0, a1, 0
15635:	st.b	t0, a0, 0
157	move	a0, zero
158	jr	ra
159
160	.align	5
16136:	ld.h	t0, a1, 0
16237:	st.h	t0, a0, 0
163	move	a0, zero
164	jr	ra
165
166	.align	5
16738:	ld.h	t0, a1, 0
16839:	ld.b	t1, a1, 2
16940:	st.h	t0, a0, 0
17041:	st.b	t1, a0, 2
171	move	a0, zero
172	jr	ra
173
174	.align	5
17542:	ld.w	t0, a1, 0
17643:	st.w	t0, a0, 0
177	move	a0, zero
178	jr	ra
179
180	.align	5
18144:	ld.w	t0, a1, 0
18245:	ld.b	t1, a1, 4
18346:	st.w	t0, a0, 0
18447:	st.b	t1, a0, 4
185	move	a0, zero
186	jr	ra
187
188	.align	5
18948:	ld.w	t0, a1, 0
19049:	ld.h	t1, a1, 4
19150:	st.w	t0, a0, 0
19251:	st.h	t1, a0, 4
193	move	a0, zero
194	jr	ra
195
196	.align	5
19752:	ld.w	t0, a1, 0
19853:	ld.w	t1, a1, 3
19954:	st.w	t0, a0, 0
20055:	st.w	t1, a0, 3
201	move	a0, zero
202	jr	ra
203
204	.align	5
20556:	ld.d	t0, a1, 0
20657:	st.d	t0, a0, 0
207	move	a0, zero
208	jr	ra
209
210	/* fixup and ex_table */
211.Llarge_fixup:
212	sub.d	a2, a2, a0
213
214.Lsmall_fixup:
21558:	ld.b	t0, a1, 0
21659:	st.b	t0, a0, 0
217	addi.d	a0, a0, 1
218	addi.d	a1, a1, 1
219	addi.d	a2, a2, -1
220	bgt	a2, zero, 58b
221
222.Lexit:
223	move	a0, a2
224	jr	ra
225
226	_asm_extable 0b, .Lsmall_fixup
227	_asm_extable 1b, .Lsmall_fixup
228	_asm_extable 2b, .Llarge_fixup
229	_asm_extable 3b, .Llarge_fixup
230	_asm_extable 4b, .Llarge_fixup
231	_asm_extable 5b, .Llarge_fixup
232	_asm_extable 6b, .Llarge_fixup
233	_asm_extable 7b, .Llarge_fixup
234	_asm_extable 8b, .Llarge_fixup
235	_asm_extable 9b, .Llarge_fixup
236	_asm_extable 10b, .Llarge_fixup
237	_asm_extable 11b, .Llarge_fixup
238	_asm_extable 12b, .Llarge_fixup
239	_asm_extable 13b, .Llarge_fixup
240	_asm_extable 14b, .Llarge_fixup
241	_asm_extable 15b, .Llarge_fixup
242	_asm_extable 16b, .Llarge_fixup
243	_asm_extable 17b, .Llarge_fixup
244	_asm_extable 18b, .Llarge_fixup
245	_asm_extable 19b, .Llarge_fixup
246	_asm_extable 20b, .Llarge_fixup
247	_asm_extable 21b, .Llarge_fixup
248	_asm_extable 22b, .Llarge_fixup
249	_asm_extable 23b, .Llarge_fixup
250	_asm_extable 24b, .Llarge_fixup
251	_asm_extable 25b, .Llarge_fixup
252	_asm_extable 26b, .Llarge_fixup
253	_asm_extable 27b, .Llarge_fixup
254	_asm_extable 28b, .Llarge_fixup
255	_asm_extable 29b, .Llarge_fixup
256	_asm_extable 30b, .Llarge_fixup
257	_asm_extable 31b, .Llarge_fixup
258	_asm_extable 32b, .Llarge_fixup
259	_asm_extable 33b, .Llarge_fixup
260	_asm_extable 34b, .Lexit
261	_asm_extable 35b, .Lexit
262	_asm_extable 36b, .Lsmall_fixup
263	_asm_extable 37b, .Lsmall_fixup
264	_asm_extable 38b, .Lsmall_fixup
265	_asm_extable 39b, .Lsmall_fixup
266	_asm_extable 40b, .Lsmall_fixup
267	_asm_extable 41b, .Lsmall_fixup
268	_asm_extable 42b, .Lsmall_fixup
269	_asm_extable 43b, .Lsmall_fixup
270	_asm_extable 44b, .Lsmall_fixup
271	_asm_extable 45b, .Lsmall_fixup
272	_asm_extable 46b, .Lsmall_fixup
273	_asm_extable 47b, .Lsmall_fixup
274	_asm_extable 48b, .Lsmall_fixup
275	_asm_extable 49b, .Lsmall_fixup
276	_asm_extable 50b, .Lsmall_fixup
277	_asm_extable 51b, .Lsmall_fixup
278	_asm_extable 52b, .Lsmall_fixup
279	_asm_extable 53b, .Lsmall_fixup
280	_asm_extable 54b, .Lsmall_fixup
281	_asm_extable 55b, .Lsmall_fixup
282	_asm_extable 56b, .Lsmall_fixup
283	_asm_extable 57b, .Lsmall_fixup
284	_asm_extable 58b, .Lexit
285	_asm_extable 59b, .Lexit
286SYM_FUNC_END(__copy_user_fast)
287
288STACK_FRAME_NON_STANDARD __copy_user_fast
289#endif
290