xref: /linux/arch/xtensa/lib/usercopy.S (revision cdd5b5a9761fd66d17586e4f4ba6588c70e640ea)
1249ac17eSChris Zankel/*
2249ac17eSChris Zankel *  arch/xtensa/lib/usercopy.S
3249ac17eSChris Zankel *
4249ac17eSChris Zankel *  Copy to/from user space (derived from arch/xtensa/lib/hal/memcopy.S)
5249ac17eSChris Zankel *
6249ac17eSChris Zankel *  DO NOT COMBINE this function with <arch/xtensa/lib/hal/memcopy.S>.
7249ac17eSChris Zankel *  It needs to remain separate and distinct.  The hal files are part
84b3f686dSMatt LaPlante *  of the Xtensa link-time HAL, and those files may differ per
9249ac17eSChris Zankel *  processor configuration.  Patching the kernel for another
10249ac17eSChris Zankel *  processor configuration includes replacing the hal files, and we
114b3f686dSMatt LaPlante *  could lose the special functionality for accessing user-space
12249ac17eSChris Zankel *  memory during such a patch.  We sacrifice a little code space here
13249ac17eSChris Zankel *  in favor to simplify code maintenance.
14249ac17eSChris Zankel *
15249ac17eSChris Zankel *  This file is subject to the terms and conditions of the GNU General
16249ac17eSChris Zankel *  Public License.  See the file "COPYING" in the main directory of
17249ac17eSChris Zankel *  this archive for more details.
18249ac17eSChris Zankel *
19249ac17eSChris Zankel *  Copyright (C) 2002 Tensilica Inc.
20249ac17eSChris Zankel */
21249ac17eSChris Zankel
22249ac17eSChris Zankel
23249ac17eSChris Zankel/*
24249ac17eSChris Zankel * size_t __xtensa_copy_user (void *dst, const void *src, size_t len);
25249ac17eSChris Zankel *
26249ac17eSChris Zankel * The returned value is the number of bytes not copied.  Implies zero
27249ac17eSChris Zankel * is success.
28249ac17eSChris Zankel *
29249ac17eSChris Zankel * The general case algorithm is as follows:
30249ac17eSChris Zankel *   If the destination and source are both aligned,
31249ac17eSChris Zankel *     do 16B chunks with a loop, and then finish up with
32249ac17eSChris Zankel *     8B, 4B, 2B, and 1B copies conditional on the length.
33249ac17eSChris Zankel *   If destination is aligned and source unaligned,
34249ac17eSChris Zankel *     do the same, but use SRC to align the source data.
35249ac17eSChris Zankel *   If destination is unaligned, align it by conditionally
36249ac17eSChris Zankel *     copying 1B and 2B and then retest.
37249ac17eSChris Zankel *   This code tries to use fall-through braches for the common
38249ac17eSChris Zankel *     case of aligned destinations (except for the branches to
39249ac17eSChris Zankel *     the alignment label).
40249ac17eSChris Zankel *
41249ac17eSChris Zankel * Register use:
42249ac17eSChris Zankel *	a0/ return address
43249ac17eSChris Zankel *	a1/ stack pointer
44249ac17eSChris Zankel *	a2/ return value
45249ac17eSChris Zankel *	a3/ src
46249ac17eSChris Zankel *	a4/ length
47249ac17eSChris Zankel *	a5/ dst
48249ac17eSChris Zankel *	a6/ tmp
49249ac17eSChris Zankel *	a7/ tmp
50249ac17eSChris Zankel *	a8/ tmp
51249ac17eSChris Zankel *	a9/ tmp
52249ac17eSChris Zankel *	a10/ tmp
53249ac17eSChris Zankel *	a11/ original length
54249ac17eSChris Zankel */
55249ac17eSChris Zankel
565cf97ebdSMax Filippov#include <linux/linkage.h>
570013acebSMax Filippov#include <asm/asmmacro.h>
588f8d5745SMax Filippov#include <asm/core.h>
59249ac17eSChris Zankel
60249ac17eSChris Zankel	.text
615cf97ebdSMax FilippovENTRY(__xtensa_copy_user)
625cf97ebdSMax Filippov
6361a6b912SMax Filippov#if !XCHAL_HAVE_LOOPS && defined(__XTENSA_CALL0_ABI__)
6461a6b912SMax Filippov#define STACK_SIZE 4
6561a6b912SMax Filippov#else
6661a6b912SMax Filippov#define STACK_SIZE 0
6761a6b912SMax Filippov#endif
6861a6b912SMax Filippov	abi_entry(STACK_SIZE)
69249ac17eSChris Zankel	# a2/ dst, a3/ src, a4/ len
70249ac17eSChris Zankel	mov	a5, a2		# copy dst so that a2 is return value
71249ac17eSChris Zankel	mov	a11, a4		# preserve original len for error case
72249ac17eSChris Zankel.Lcommon:
73249ac17eSChris Zankel	bbsi.l	a2, 0, .Ldst1mod2 # if dst is 1 mod 2
74249ac17eSChris Zankel	bbsi.l	a2, 1, .Ldst2mod4 # if dst is 2 mod 4
75249ac17eSChris Zankel.Ldstaligned:	# return here from .Ldstunaligned when dst is aligned
76249ac17eSChris Zankel	srli	a7, a4, 4	# number of loop iterations with 16B
77249ac17eSChris Zankel				# per iteration
78249ac17eSChris Zankel	movi	a8, 3		  # if source is also aligned,
79249ac17eSChris Zankel	bnone	a3, a8, .Laligned # then use word copy
80fbb871e2SMax Filippov	__ssa8	a3		# set shift amount from byte offset
81249ac17eSChris Zankel	bnez	a4, .Lsrcunaligned
82249ac17eSChris Zankel	movi	a2, 0		# return success for len==0
8361a6b912SMax Filippov	abi_ret(STACK_SIZE)
84249ac17eSChris Zankel
85249ac17eSChris Zankel/*
86249ac17eSChris Zankel * Destination is unaligned
87249ac17eSChris Zankel */
88249ac17eSChris Zankel
89249ac17eSChris Zankel.Ldst1mod2:	# dst is only byte aligned
90249ac17eSChris Zankel	bltui	a4, 7, .Lbytecopy	# do short copies byte by byte
91249ac17eSChris Zankel
92249ac17eSChris Zankel	# copy 1 byte
930013acebSMax FilippovEX(10f)	l8ui	a6, a3, 0
94249ac17eSChris Zankel	addi	a3, a3,  1
950013acebSMax FilippovEX(10f)	s8i	a6, a5,  0
96249ac17eSChris Zankel	addi	a5, a5,  1
97249ac17eSChris Zankel	addi	a4, a4, -1
98249ac17eSChris Zankel	bbci.l	a5, 1, .Ldstaligned	# if dst is now aligned, then
99249ac17eSChris Zankel					# return to main algorithm
100249ac17eSChris Zankel.Ldst2mod4:	# dst 16-bit aligned
101249ac17eSChris Zankel	# copy 2 bytes
102249ac17eSChris Zankel	bltui	a4, 6, .Lbytecopy	# do short copies byte by byte
1030013acebSMax FilippovEX(10f)	l8ui	a6, a3, 0
1040013acebSMax FilippovEX(10f)	l8ui	a7, a3, 1
105249ac17eSChris Zankel	addi	a3, a3,  2
1060013acebSMax FilippovEX(10f)	s8i	a6, a5,  0
1070013acebSMax FilippovEX(10f)	s8i	a7, a5,  1
108249ac17eSChris Zankel	addi	a5, a5,  2
109249ac17eSChris Zankel	addi	a4, a4, -2
110249ac17eSChris Zankel	j	.Ldstaligned	# dst is now aligned, return to main algorithm
111249ac17eSChris Zankel
112249ac17eSChris Zankel/*
113249ac17eSChris Zankel * Byte by byte copy
114249ac17eSChris Zankel */
115249ac17eSChris Zankel	.align	4
116249ac17eSChris Zankel	.byte	0		# 1 mod 4 alignment for LOOPNEZ
117249ac17eSChris Zankel				# (0 mod 4 alignment for LBEG)
118249ac17eSChris Zankel.Lbytecopy:
119249ac17eSChris Zankel#if XCHAL_HAVE_LOOPS
120249ac17eSChris Zankel	loopnez	a4, .Lbytecopydone
121249ac17eSChris Zankel#else /* !XCHAL_HAVE_LOOPS */
122249ac17eSChris Zankel	beqz	a4, .Lbytecopydone
123249ac17eSChris Zankel	add	a7, a3, a4	# a7 = end address for source
124249ac17eSChris Zankel#endif /* !XCHAL_HAVE_LOOPS */
125249ac17eSChris Zankel.Lnextbyte:
1260013acebSMax FilippovEX(10f)	l8ui	a6, a3, 0
127249ac17eSChris Zankel	addi	a3, a3, 1
1280013acebSMax FilippovEX(10f)	s8i	a6, a5, 0
129249ac17eSChris Zankel	addi	a5, a5, 1
130249ac17eSChris Zankel#if !XCHAL_HAVE_LOOPS
131249ac17eSChris Zankel	blt	a3, a7, .Lnextbyte
132249ac17eSChris Zankel#endif /* !XCHAL_HAVE_LOOPS */
133249ac17eSChris Zankel.Lbytecopydone:
134249ac17eSChris Zankel	movi	a2, 0		# return success for len bytes copied
13561a6b912SMax Filippov	abi_ret(STACK_SIZE)
136249ac17eSChris Zankel
137249ac17eSChris Zankel/*
138249ac17eSChris Zankel * Destination and source are word-aligned.
139249ac17eSChris Zankel */
140249ac17eSChris Zankel	# copy 16 bytes per iteration for word-aligned dst and word-aligned src
141249ac17eSChris Zankel	.align	4		# 1 mod 4 alignment for LOOPNEZ
142249ac17eSChris Zankel	.byte	0		# (0 mod 4 alignment for LBEG)
143249ac17eSChris Zankel.Laligned:
144249ac17eSChris Zankel#if XCHAL_HAVE_LOOPS
145249ac17eSChris Zankel	loopnez	a7, .Loop1done
146249ac17eSChris Zankel#else /* !XCHAL_HAVE_LOOPS */
147249ac17eSChris Zankel	beqz	a7, .Loop1done
148249ac17eSChris Zankel	slli	a8, a7, 4
149249ac17eSChris Zankel	add	a8, a8, a3	# a8 = end of last 16B source chunk
150249ac17eSChris Zankel#endif /* !XCHAL_HAVE_LOOPS */
151249ac17eSChris Zankel.Loop1:
1520013acebSMax FilippovEX(10f)	l32i	a6, a3,  0
1530013acebSMax FilippovEX(10f)	l32i	a7, a3,  4
1540013acebSMax FilippovEX(10f)	s32i	a6, a5,  0
1550013acebSMax FilippovEX(10f)	l32i	a6, a3,  8
1560013acebSMax FilippovEX(10f)	s32i	a7, a5,  4
1570013acebSMax FilippovEX(10f)	l32i	a7, a3, 12
1580013acebSMax FilippovEX(10f)	s32i	a6, a5,  8
159249ac17eSChris Zankel	addi	a3, a3, 16
1600013acebSMax FilippovEX(10f)	s32i	a7, a5, 12
161249ac17eSChris Zankel	addi	a5, a5, 16
162249ac17eSChris Zankel#if !XCHAL_HAVE_LOOPS
163249ac17eSChris Zankel	blt	a3, a8, .Loop1
164249ac17eSChris Zankel#endif /* !XCHAL_HAVE_LOOPS */
165249ac17eSChris Zankel.Loop1done:
166249ac17eSChris Zankel	bbci.l	a4, 3, .L2
167249ac17eSChris Zankel	# copy 8 bytes
1680013acebSMax FilippovEX(10f)	l32i	a6, a3,  0
1690013acebSMax FilippovEX(10f)	l32i	a7, a3,  4
170249ac17eSChris Zankel	addi	a3, a3,  8
1710013acebSMax FilippovEX(10f)	s32i	a6, a5,  0
1720013acebSMax FilippovEX(10f)	s32i	a7, a5,  4
173249ac17eSChris Zankel	addi	a5, a5,  8
174249ac17eSChris Zankel.L2:
175249ac17eSChris Zankel	bbci.l	a4, 2, .L3
176249ac17eSChris Zankel	# copy 4 bytes
1770013acebSMax FilippovEX(10f)	l32i	a6, a3,  0
178249ac17eSChris Zankel	addi	a3, a3,  4
1790013acebSMax FilippovEX(10f)	s32i	a6, a5,  0
180249ac17eSChris Zankel	addi	a5, a5,  4
181249ac17eSChris Zankel.L3:
182249ac17eSChris Zankel	bbci.l	a4, 1, .L4
183249ac17eSChris Zankel	# copy 2 bytes
1840013acebSMax FilippovEX(10f)	l16ui	a6, a3,  0
185249ac17eSChris Zankel	addi	a3, a3,  2
1860013acebSMax FilippovEX(10f)	s16i	a6, a5,  0
187249ac17eSChris Zankel	addi	a5, a5,  2
188249ac17eSChris Zankel.L4:
189249ac17eSChris Zankel	bbci.l	a4, 0, .L5
190249ac17eSChris Zankel	# copy 1 byte
1910013acebSMax FilippovEX(10f)	l8ui	a6, a3,  0
1920013acebSMax FilippovEX(10f)	s8i	a6, a5,  0
193249ac17eSChris Zankel.L5:
194249ac17eSChris Zankel	movi	a2, 0		# return success for len bytes copied
19561a6b912SMax Filippov	abi_ret(STACK_SIZE)
196249ac17eSChris Zankel
197249ac17eSChris Zankel/*
198249ac17eSChris Zankel * Destination is aligned, Source is unaligned
199249ac17eSChris Zankel */
200249ac17eSChris Zankel
201249ac17eSChris Zankel	.align	4
202249ac17eSChris Zankel	.byte	0		# 1 mod 4 alignement for LOOPNEZ
203249ac17eSChris Zankel				# (0 mod 4 alignment for LBEG)
204249ac17eSChris Zankel.Lsrcunaligned:
205249ac17eSChris Zankel	# copy 16 bytes per iteration for word-aligned dst and unaligned src
206249ac17eSChris Zankel	and	a10, a3, a8	# save unalignment offset for below
207249ac17eSChris Zankel	sub	a3, a3, a10	# align a3 (to avoid sim warnings only; not needed for hardware)
2080013acebSMax FilippovEX(10f)	l32i	a6, a3, 0	# load first word
209249ac17eSChris Zankel#if XCHAL_HAVE_LOOPS
210249ac17eSChris Zankel	loopnez	a7, .Loop2done
211249ac17eSChris Zankel#else /* !XCHAL_HAVE_LOOPS */
212249ac17eSChris Zankel	beqz	a7, .Loop2done
21361a6b912SMax Filippov#if defined(__XTENSA_CALL0_ABI__)
21461a6b912SMax Filippov	s32i	a10, a1, 0
21561a6b912SMax Filippov	slli	a10, a7, 4
21661a6b912SMax Filippov	add	a10, a10, a3	# a10 = end of last 16B source chunk
21761a6b912SMax Filippov#else
2185029615eSMax Filippov	slli	a12, a7, 4
2195029615eSMax Filippov	add	a12, a12, a3	# a12 = end of last 16B source chunk
22061a6b912SMax Filippov#endif
221249ac17eSChris Zankel#endif /* !XCHAL_HAVE_LOOPS */
222249ac17eSChris Zankel.Loop2:
2230013acebSMax FilippovEX(10f)	l32i	a7, a3,  4
2240013acebSMax FilippovEX(10f)	l32i	a8, a3,  8
225fbb871e2SMax Filippov	__src_b	a6, a6, a7
2260013acebSMax FilippovEX(10f)	s32i	a6, a5,  0
2270013acebSMax FilippovEX(10f)	l32i	a9, a3, 12
228fbb871e2SMax Filippov	__src_b	a7, a7, a8
2290013acebSMax FilippovEX(10f)	s32i	a7, a5,  4
2300013acebSMax FilippovEX(10f)	l32i	a6, a3, 16
231fbb871e2SMax Filippov	__src_b	a8, a8, a9
2320013acebSMax FilippovEX(10f)	s32i	a8, a5,  8
233249ac17eSChris Zankel	addi	a3, a3, 16
234fbb871e2SMax Filippov	__src_b	a9, a9, a6
2350013acebSMax FilippovEX(10f)	s32i	a9, a5, 12
236249ac17eSChris Zankel	addi	a5, a5, 16
237249ac17eSChris Zankel#if !XCHAL_HAVE_LOOPS
23861a6b912SMax Filippov#if defined(__XTENSA_CALL0_ABI__)
23961a6b912SMax Filippov	blt	a3, a10, .Loop2
24061a6b912SMax Filippov	l32i	a10, a1, 0
24161a6b912SMax Filippov#else
2425029615eSMax Filippov	blt	a3, a12, .Loop2
24361a6b912SMax Filippov#endif
244249ac17eSChris Zankel#endif /* !XCHAL_HAVE_LOOPS */
245249ac17eSChris Zankel.Loop2done:
246249ac17eSChris Zankel	bbci.l	a4, 3, .L12
247249ac17eSChris Zankel	# copy 8 bytes
2480013acebSMax FilippovEX(10f)	l32i	a7, a3,  4
2490013acebSMax FilippovEX(10f)	l32i	a8, a3,  8
250fbb871e2SMax Filippov	__src_b	a6, a6, a7
2510013acebSMax FilippovEX(10f)	s32i	a6, a5,  0
252249ac17eSChris Zankel	addi	a3, a3,  8
253fbb871e2SMax Filippov	__src_b	a7, a7, a8
2540013acebSMax FilippovEX(10f)	s32i	a7, a5,  4
255249ac17eSChris Zankel	addi	a5, a5,  8
256249ac17eSChris Zankel	mov	a6, a8
257249ac17eSChris Zankel.L12:
258249ac17eSChris Zankel	bbci.l	a4, 2, .L13
259249ac17eSChris Zankel	# copy 4 bytes
2600013acebSMax FilippovEX(10f)	l32i	a7, a3,  4
261249ac17eSChris Zankel	addi	a3, a3,  4
262fbb871e2SMax Filippov	__src_b	a6, a6, a7
2630013acebSMax FilippovEX(10f)	s32i	a6, a5,  0
264249ac17eSChris Zankel	addi	a5, a5,  4
265249ac17eSChris Zankel	mov	a6, a7
266249ac17eSChris Zankel.L13:
267249ac17eSChris Zankel	add	a3, a3, a10	# readjust a3 with correct misalignment
268249ac17eSChris Zankel	bbci.l	a4, 1, .L14
269249ac17eSChris Zankel	# copy 2 bytes
2700013acebSMax FilippovEX(10f)	l8ui	a6, a3,  0
2710013acebSMax FilippovEX(10f)	l8ui	a7, a3,  1
272249ac17eSChris Zankel	addi	a3, a3,  2
2730013acebSMax FilippovEX(10f)	s8i	a6, a5,  0
2740013acebSMax FilippovEX(10f)	s8i	a7, a5,  1
275249ac17eSChris Zankel	addi	a5, a5,  2
276249ac17eSChris Zankel.L14:
277249ac17eSChris Zankel	bbci.l	a4, 0, .L15
278249ac17eSChris Zankel	# copy 1 byte
2790013acebSMax FilippovEX(10f)	l8ui	a6, a3,  0
2800013acebSMax FilippovEX(10f)	s8i	a6, a5,  0
281249ac17eSChris Zankel.L15:
282249ac17eSChris Zankel	movi	a2, 0		# return success for len bytes copied
28361a6b912SMax Filippov	abi_ret(STACK_SIZE)
284249ac17eSChris Zankel
2855cf97ebdSMax FilippovENDPROC(__xtensa_copy_user)
286*338d9150SMax FilippovEXPORT_SYMBOL(__xtensa_copy_user)
287249ac17eSChris Zankel
288249ac17eSChris Zankel	.section .fixup, "ax"
289249ac17eSChris Zankel	.align	4
290249ac17eSChris Zankel
291249ac17eSChris Zankel/* a2 = original dst; a5 = current dst; a11= original len
292249ac17eSChris Zankel * bytes_copied = a5 - a2
293249ac17eSChris Zankel * retval = bytes_not_copied = original len - bytes_copied
294249ac17eSChris Zankel * retval = a11 - (a5 - a2)
295249ac17eSChris Zankel */
296249ac17eSChris Zankel
297249ac17eSChris Zankel
2980013acebSMax Filippov10:
299249ac17eSChris Zankel	sub	a2, a5, a2	/* a2 <-- bytes copied */
300249ac17eSChris Zankel	sub	a2, a11, a2	/* a2 <-- bytes not copied */
30161a6b912SMax Filippov	abi_ret(STACK_SIZE)
302