xref: /linux/arch/xtensa/lib/usercopy.S (revision fbb871e220672a8e9e4e7870da5b206fe05904b2)
1/*
2 *  arch/xtensa/lib/usercopy.S
3 *
4 *  Copy to/from user space (derived from arch/xtensa/lib/hal/memcopy.S)
5 *
6 *  DO NOT COMBINE this function with <arch/xtensa/lib/hal/memcopy.S>.
7 *  It needs to remain separate and distinct.  The hal files are part
8 *  of the Xtensa link-time HAL, and those files may differ per
9 *  processor configuration.  Patching the kernel for another
10 *  processor configuration includes replacing the hal files, and we
11 *  could lose the special functionality for accessing user-space
12 *  memory during such a patch.  We sacrifice a little code space here
13 *  in favor to simplify code maintenance.
14 *
15 *  This file is subject to the terms and conditions of the GNU General
16 *  Public License.  See the file "COPYING" in the main directory of
17 *  this archive for more details.
18 *
19 *  Copyright (C) 2002 Tensilica Inc.
20 */
21
22
23/*
24 * size_t __xtensa_copy_user (void *dst, const void *src, size_t len);
25 *
26 * The returned value is the number of bytes not copied.  Implies zero
27 * is success.
28 *
29 * The general case algorithm is as follows:
30 *   If the destination and source are both aligned,
31 *     do 16B chunks with a loop, and then finish up with
32 *     8B, 4B, 2B, and 1B copies conditional on the length.
33 *   If destination is aligned and source unaligned,
34 *     do the same, but use SRC to align the source data.
35 *   If destination is unaligned, align it by conditionally
36 *     copying 1B and 2B and then retest.
37 *   This code tries to use fall-through braches for the common
38 *     case of aligned destinations (except for the branches to
39 *     the alignment label).
40 *
41 * Register use:
42 *	a0/ return address
43 *	a1/ stack pointer
44 *	a2/ return value
45 *	a3/ src
46 *	a4/ length
47 *	a5/ dst
48 *	a6/ tmp
49 *	a7/ tmp
50 *	a8/ tmp
51 *	a9/ tmp
52 *	a10/ tmp
53 *	a11/ original length
54 */
55
56#include <variant/core.h>
57#include <asm/asmmacro.h>
58
59	.text
60	.align	4
61	.global	__xtensa_copy_user
62	.type	__xtensa_copy_user,@function
63__xtensa_copy_user:
64	entry	sp, 16		# minimal stack frame
65	# a2/ dst, a3/ src, a4/ len
66	mov	a5, a2		# copy dst so that a2 is return value
67	mov	a11, a4		# preserve original len for error case
68.Lcommon:
69	bbsi.l	a2, 0, .Ldst1mod2 # if dst is 1 mod 2
70	bbsi.l	a2, 1, .Ldst2mod4 # if dst is 2 mod 4
71.Ldstaligned:	# return here from .Ldstunaligned when dst is aligned
72	srli	a7, a4, 4	# number of loop iterations with 16B
73				# per iteration
74	movi	a8, 3		  # if source is also aligned,
75	bnone	a3, a8, .Laligned # then use word copy
76	__ssa8	a3		# set shift amount from byte offset
77	bnez	a4, .Lsrcunaligned
78	movi	a2, 0		# return success for len==0
79	retw
80
81/*
82 * Destination is unaligned
83 */
84
85.Ldst1mod2:	# dst is only byte aligned
86	bltui	a4, 7, .Lbytecopy	# do short copies byte by byte
87
88	# copy 1 byte
89EX(10f)	l8ui	a6, a3, 0
90	addi	a3, a3,  1
91EX(10f)	s8i	a6, a5,  0
92	addi	a5, a5,  1
93	addi	a4, a4, -1
94	bbci.l	a5, 1, .Ldstaligned	# if dst is now aligned, then
95					# return to main algorithm
96.Ldst2mod4:	# dst 16-bit aligned
97	# copy 2 bytes
98	bltui	a4, 6, .Lbytecopy	# do short copies byte by byte
99EX(10f)	l8ui	a6, a3, 0
100EX(10f)	l8ui	a7, a3, 1
101	addi	a3, a3,  2
102EX(10f)	s8i	a6, a5,  0
103EX(10f)	s8i	a7, a5,  1
104	addi	a5, a5,  2
105	addi	a4, a4, -2
106	j	.Ldstaligned	# dst is now aligned, return to main algorithm
107
108/*
109 * Byte by byte copy
110 */
111	.align	4
112	.byte	0		# 1 mod 4 alignment for LOOPNEZ
113				# (0 mod 4 alignment for LBEG)
114.Lbytecopy:
115#if XCHAL_HAVE_LOOPS
116	loopnez	a4, .Lbytecopydone
117#else /* !XCHAL_HAVE_LOOPS */
118	beqz	a4, .Lbytecopydone
119	add	a7, a3, a4	# a7 = end address for source
120#endif /* !XCHAL_HAVE_LOOPS */
121.Lnextbyte:
122EX(10f)	l8ui	a6, a3, 0
123	addi	a3, a3, 1
124EX(10f)	s8i	a6, a5, 0
125	addi	a5, a5, 1
126#if !XCHAL_HAVE_LOOPS
127	blt	a3, a7, .Lnextbyte
128#endif /* !XCHAL_HAVE_LOOPS */
129.Lbytecopydone:
130	movi	a2, 0		# return success for len bytes copied
131	retw
132
133/*
134 * Destination and source are word-aligned.
135 */
136	# copy 16 bytes per iteration for word-aligned dst and word-aligned src
137	.align	4		# 1 mod 4 alignment for LOOPNEZ
138	.byte	0		# (0 mod 4 alignment for LBEG)
139.Laligned:
140#if XCHAL_HAVE_LOOPS
141	loopnez	a7, .Loop1done
142#else /* !XCHAL_HAVE_LOOPS */
143	beqz	a7, .Loop1done
144	slli	a8, a7, 4
145	add	a8, a8, a3	# a8 = end of last 16B source chunk
146#endif /* !XCHAL_HAVE_LOOPS */
147.Loop1:
148EX(10f)	l32i	a6, a3,  0
149EX(10f)	l32i	a7, a3,  4
150EX(10f)	s32i	a6, a5,  0
151EX(10f)	l32i	a6, a3,  8
152EX(10f)	s32i	a7, a5,  4
153EX(10f)	l32i	a7, a3, 12
154EX(10f)	s32i	a6, a5,  8
155	addi	a3, a3, 16
156EX(10f)	s32i	a7, a5, 12
157	addi	a5, a5, 16
158#if !XCHAL_HAVE_LOOPS
159	blt	a3, a8, .Loop1
160#endif /* !XCHAL_HAVE_LOOPS */
161.Loop1done:
162	bbci.l	a4, 3, .L2
163	# copy 8 bytes
164EX(10f)	l32i	a6, a3,  0
165EX(10f)	l32i	a7, a3,  4
166	addi	a3, a3,  8
167EX(10f)	s32i	a6, a5,  0
168EX(10f)	s32i	a7, a5,  4
169	addi	a5, a5,  8
170.L2:
171	bbci.l	a4, 2, .L3
172	# copy 4 bytes
173EX(10f)	l32i	a6, a3,  0
174	addi	a3, a3,  4
175EX(10f)	s32i	a6, a5,  0
176	addi	a5, a5,  4
177.L3:
178	bbci.l	a4, 1, .L4
179	# copy 2 bytes
180EX(10f)	l16ui	a6, a3,  0
181	addi	a3, a3,  2
182EX(10f)	s16i	a6, a5,  0
183	addi	a5, a5,  2
184.L4:
185	bbci.l	a4, 0, .L5
186	# copy 1 byte
187EX(10f)	l8ui	a6, a3,  0
188EX(10f)	s8i	a6, a5,  0
189.L5:
190	movi	a2, 0		# return success for len bytes copied
191	retw
192
193/*
194 * Destination is aligned, Source is unaligned
195 */
196
197	.align	4
198	.byte	0		# 1 mod 4 alignement for LOOPNEZ
199				# (0 mod 4 alignment for LBEG)
200.Lsrcunaligned:
201	# copy 16 bytes per iteration for word-aligned dst and unaligned src
202	and	a10, a3, a8	# save unalignment offset for below
203	sub	a3, a3, a10	# align a3 (to avoid sim warnings only; not needed for hardware)
204EX(10f)	l32i	a6, a3, 0	# load first word
205#if XCHAL_HAVE_LOOPS
206	loopnez	a7, .Loop2done
207#else /* !XCHAL_HAVE_LOOPS */
208	beqz	a7, .Loop2done
209	slli	a12, a7, 4
210	add	a12, a12, a3	# a12 = end of last 16B source chunk
211#endif /* !XCHAL_HAVE_LOOPS */
212.Loop2:
213EX(10f)	l32i	a7, a3,  4
214EX(10f)	l32i	a8, a3,  8
215	__src_b	a6, a6, a7
216EX(10f)	s32i	a6, a5,  0
217EX(10f)	l32i	a9, a3, 12
218	__src_b	a7, a7, a8
219EX(10f)	s32i	a7, a5,  4
220EX(10f)	l32i	a6, a3, 16
221	__src_b	a8, a8, a9
222EX(10f)	s32i	a8, a5,  8
223	addi	a3, a3, 16
224	__src_b	a9, a9, a6
225EX(10f)	s32i	a9, a5, 12
226	addi	a5, a5, 16
227#if !XCHAL_HAVE_LOOPS
228	blt	a3, a12, .Loop2
229#endif /* !XCHAL_HAVE_LOOPS */
230.Loop2done:
231	bbci.l	a4, 3, .L12
232	# copy 8 bytes
233EX(10f)	l32i	a7, a3,  4
234EX(10f)	l32i	a8, a3,  8
235	__src_b	a6, a6, a7
236EX(10f)	s32i	a6, a5,  0
237	addi	a3, a3,  8
238	__src_b	a7, a7, a8
239EX(10f)	s32i	a7, a5,  4
240	addi	a5, a5,  8
241	mov	a6, a8
242.L12:
243	bbci.l	a4, 2, .L13
244	# copy 4 bytes
245EX(10f)	l32i	a7, a3,  4
246	addi	a3, a3,  4
247	__src_b	a6, a6, a7
248EX(10f)	s32i	a6, a5,  0
249	addi	a5, a5,  4
250	mov	a6, a7
251.L13:
252	add	a3, a3, a10	# readjust a3 with correct misalignment
253	bbci.l	a4, 1, .L14
254	# copy 2 bytes
255EX(10f)	l8ui	a6, a3,  0
256EX(10f)	l8ui	a7, a3,  1
257	addi	a3, a3,  2
258EX(10f)	s8i	a6, a5,  0
259EX(10f)	s8i	a7, a5,  1
260	addi	a5, a5,  2
261.L14:
262	bbci.l	a4, 0, .L15
263	# copy 1 byte
264EX(10f)	l8ui	a6, a3,  0
265EX(10f)	s8i	a6, a5,  0
266.L15:
267	movi	a2, 0		# return success for len bytes copied
268	retw
269
270
271	.section .fixup, "ax"
272	.align	4
273
274/* a2 = original dst; a5 = current dst; a11= original len
275 * bytes_copied = a5 - a2
276 * retval = bytes_not_copied = original len - bytes_copied
277 * retval = a11 - (a5 - a2)
278 */
279
280
28110:
282	sub	a2, a5, a2	/* a2 <-- bytes copied */
283	sub	a2, a11, a2	/* a2 <-- bytes not copied */
284	retw
285