xref: /linux/arch/xtensa/lib/usercopy.S (revision 249ac17e96811acc3c6402317dd5d5c89d2cbf68)
1*249ac17eSChris Zankel/*
2*249ac17eSChris Zankel *  arch/xtensa/lib/usercopy.S
3*249ac17eSChris Zankel *
4*249ac17eSChris Zankel *  Copy to/from user space (derived from arch/xtensa/lib/hal/memcopy.S)
5*249ac17eSChris Zankel *
6*249ac17eSChris Zankel *  DO NOT COMBINE this function with <arch/xtensa/lib/hal/memcopy.S>.
7*249ac17eSChris Zankel *  It needs to remain separate and distinct.  The hal files are part
8*249ac17eSChris Zankel *  of the the Xtensa link-time HAL, and those files may differ per
9*249ac17eSChris Zankel *  processor configuration.  Patching the kernel for another
10*249ac17eSChris Zankel *  processor configuration includes replacing the hal files, and we
11*249ac17eSChris Zankel *  could loose the special functionality for accessing user-space
12*249ac17eSChris Zankel *  memory during such a patch.  We sacrifice a little code space here
13*249ac17eSChris Zankel *  in favor to simplify code maintenance.
14*249ac17eSChris Zankel *
15*249ac17eSChris Zankel *  This file is subject to the terms and conditions of the GNU General
16*249ac17eSChris Zankel *  Public License.  See the file "COPYING" in the main directory of
17*249ac17eSChris Zankel *  this archive for more details.
18*249ac17eSChris Zankel *
19*249ac17eSChris Zankel *  Copyright (C) 2002 Tensilica Inc.
20*249ac17eSChris Zankel */
21*249ac17eSChris Zankel
22*249ac17eSChris Zankel
23*249ac17eSChris Zankel/*
24*249ac17eSChris Zankel * size_t __xtensa_copy_user (void *dst, const void *src, size_t len);
25*249ac17eSChris Zankel *
26*249ac17eSChris Zankel * The returned value is the number of bytes not copied.  Implies zero
27*249ac17eSChris Zankel * is success.
28*249ac17eSChris Zankel *
29*249ac17eSChris Zankel * The general case algorithm is as follows:
30*249ac17eSChris Zankel *   If the destination and source are both aligned,
31*249ac17eSChris Zankel *     do 16B chunks with a loop, and then finish up with
32*249ac17eSChris Zankel *     8B, 4B, 2B, and 1B copies conditional on the length.
33*249ac17eSChris Zankel *   If destination is aligned and source unaligned,
34*249ac17eSChris Zankel *     do the same, but use SRC to align the source data.
35*249ac17eSChris Zankel *   If destination is unaligned, align it by conditionally
36*249ac17eSChris Zankel *     copying 1B and 2B and then retest.
37*249ac17eSChris Zankel *   This code tries to use fall-through braches for the common
38*249ac17eSChris Zankel *     case of aligned destinations (except for the branches to
39*249ac17eSChris Zankel *     the alignment label).
40*249ac17eSChris Zankel *
41*249ac17eSChris Zankel * Register use:
42*249ac17eSChris Zankel *	a0/ return address
43*249ac17eSChris Zankel *	a1/ stack pointer
44*249ac17eSChris Zankel *	a2/ return value
45*249ac17eSChris Zankel *	a3/ src
46*249ac17eSChris Zankel *	a4/ length
47*249ac17eSChris Zankel *	a5/ dst
48*249ac17eSChris Zankel *	a6/ tmp
49*249ac17eSChris Zankel *	a7/ tmp
50*249ac17eSChris Zankel *	a8/ tmp
51*249ac17eSChris Zankel *	a9/ tmp
52*249ac17eSChris Zankel *	a10/ tmp
53*249ac17eSChris Zankel *	a11/ original length
54*249ac17eSChris Zankel */
55*249ac17eSChris Zankel
56*249ac17eSChris Zankel#include <xtensa/coreasm.h>
57*249ac17eSChris Zankel
58*249ac17eSChris Zankel#ifdef __XTENSA_EB__
59*249ac17eSChris Zankel#define ALIGN(R, W0, W1) src	R, W0, W1
60*249ac17eSChris Zankel#define SSA8(R)	ssa8b R
61*249ac17eSChris Zankel#else
62*249ac17eSChris Zankel#define ALIGN(R, W0, W1) src	R, W1, W0
63*249ac17eSChris Zankel#define SSA8(R)	ssa8l R
64*249ac17eSChris Zankel#endif
65*249ac17eSChris Zankel
66*249ac17eSChris Zankel/* Load or store instructions that may cause exceptions use the EX macro. */
67*249ac17eSChris Zankel
68*249ac17eSChris Zankel#define EX(insn,reg1,reg2,offset,handler)	\
69*249ac17eSChris Zankel9:	insn	reg1, reg2, offset;		\
70*249ac17eSChris Zankel	.section __ex_table, "a";		\
71*249ac17eSChris Zankel	.word	9b, handler;			\
72*249ac17eSChris Zankel	.previous
73*249ac17eSChris Zankel
74*249ac17eSChris Zankel
75*249ac17eSChris Zankel	.text
76*249ac17eSChris Zankel	.align	4
77*249ac17eSChris Zankel	.global	__xtensa_copy_user
78*249ac17eSChris Zankel	.type	__xtensa_copy_user,@function
79*249ac17eSChris Zankel__xtensa_copy_user:
80*249ac17eSChris Zankel	entry	sp, 16		# minimal stack frame
81*249ac17eSChris Zankel	# a2/ dst, a3/ src, a4/ len
82*249ac17eSChris Zankel	mov	a5, a2		# copy dst so that a2 is return value
83*249ac17eSChris Zankel	mov	a11, a4		# preserve original len for error case
84*249ac17eSChris Zankel.Lcommon:
85*249ac17eSChris Zankel	bbsi.l	a2, 0, .Ldst1mod2 # if dst is 1 mod 2
86*249ac17eSChris Zankel	bbsi.l	a2, 1, .Ldst2mod4 # if dst is 2 mod 4
87*249ac17eSChris Zankel.Ldstaligned:	# return here from .Ldstunaligned when dst is aligned
88*249ac17eSChris Zankel	srli	a7, a4, 4	# number of loop iterations with 16B
89*249ac17eSChris Zankel				# per iteration
90*249ac17eSChris Zankel	movi	a8, 3		  # if source is also aligned,
91*249ac17eSChris Zankel	bnone	a3, a8, .Laligned # then use word copy
92*249ac17eSChris Zankel	SSA8(	a3)		# set shift amount from byte offset
93*249ac17eSChris Zankel	bnez	a4, .Lsrcunaligned
94*249ac17eSChris Zankel	movi	a2, 0		# return success for len==0
95*249ac17eSChris Zankel	retw
96*249ac17eSChris Zankel
97*249ac17eSChris Zankel/*
98*249ac17eSChris Zankel * Destination is unaligned
99*249ac17eSChris Zankel */
100*249ac17eSChris Zankel
101*249ac17eSChris Zankel.Ldst1mod2:	# dst is only byte aligned
102*249ac17eSChris Zankel	bltui	a4, 7, .Lbytecopy	# do short copies byte by byte
103*249ac17eSChris Zankel
104*249ac17eSChris Zankel	# copy 1 byte
105*249ac17eSChris Zankel	EX(l8ui, a6, a3, 0, l_fixup)
106*249ac17eSChris Zankel	addi	a3, a3,  1
107*249ac17eSChris Zankel	EX(s8i, a6, a5,  0, s_fixup)
108*249ac17eSChris Zankel	addi	a5, a5,  1
109*249ac17eSChris Zankel	addi	a4, a4, -1
110*249ac17eSChris Zankel	bbci.l	a5, 1, .Ldstaligned	# if dst is now aligned, then
111*249ac17eSChris Zankel					# return to main algorithm
112*249ac17eSChris Zankel.Ldst2mod4:	# dst 16-bit aligned
113*249ac17eSChris Zankel	# copy 2 bytes
114*249ac17eSChris Zankel	bltui	a4, 6, .Lbytecopy	# do short copies byte by byte
115*249ac17eSChris Zankel	EX(l8ui, a6, a3, 0, l_fixup)
116*249ac17eSChris Zankel	EX(l8ui, a7, a3, 1, l_fixup)
117*249ac17eSChris Zankel	addi	a3, a3,  2
118*249ac17eSChris Zankel	EX(s8i, a6, a5,  0, s_fixup)
119*249ac17eSChris Zankel	EX(s8i, a7, a5,  1, s_fixup)
120*249ac17eSChris Zankel	addi	a5, a5,  2
121*249ac17eSChris Zankel	addi	a4, a4, -2
122*249ac17eSChris Zankel	j	.Ldstaligned	# dst is now aligned, return to main algorithm
123*249ac17eSChris Zankel
124*249ac17eSChris Zankel/*
125*249ac17eSChris Zankel * Byte by byte copy
126*249ac17eSChris Zankel */
127*249ac17eSChris Zankel	.align	4
128*249ac17eSChris Zankel	.byte	0		# 1 mod 4 alignment for LOOPNEZ
129*249ac17eSChris Zankel				# (0 mod 4 alignment for LBEG)
130*249ac17eSChris Zankel.Lbytecopy:
131*249ac17eSChris Zankel#if XCHAL_HAVE_LOOPS
132*249ac17eSChris Zankel	loopnez	a4, .Lbytecopydone
133*249ac17eSChris Zankel#else /* !XCHAL_HAVE_LOOPS */
134*249ac17eSChris Zankel	beqz	a4, .Lbytecopydone
135*249ac17eSChris Zankel	add	a7, a3, a4	# a7 = end address for source
136*249ac17eSChris Zankel#endif /* !XCHAL_HAVE_LOOPS */
137*249ac17eSChris Zankel.Lnextbyte:
138*249ac17eSChris Zankel	EX(l8ui, a6, a3, 0, l_fixup)
139*249ac17eSChris Zankel	addi	a3, a3, 1
140*249ac17eSChris Zankel	EX(s8i, a6, a5, 0, s_fixup)
141*249ac17eSChris Zankel	addi	a5, a5, 1
142*249ac17eSChris Zankel#if !XCHAL_HAVE_LOOPS
143*249ac17eSChris Zankel	blt	a3, a7, .Lnextbyte
144*249ac17eSChris Zankel#endif /* !XCHAL_HAVE_LOOPS */
145*249ac17eSChris Zankel.Lbytecopydone:
146*249ac17eSChris Zankel	movi	a2, 0		# return success for len bytes copied
147*249ac17eSChris Zankel	retw
148*249ac17eSChris Zankel
149*249ac17eSChris Zankel/*
150*249ac17eSChris Zankel * Destination and source are word-aligned.
151*249ac17eSChris Zankel */
152*249ac17eSChris Zankel	# copy 16 bytes per iteration for word-aligned dst and word-aligned src
153*249ac17eSChris Zankel	.align	4		# 1 mod 4 alignment for LOOPNEZ
154*249ac17eSChris Zankel	.byte	0		# (0 mod 4 alignment for LBEG)
155*249ac17eSChris Zankel.Laligned:
156*249ac17eSChris Zankel#if XCHAL_HAVE_LOOPS
157*249ac17eSChris Zankel	loopnez	a7, .Loop1done
158*249ac17eSChris Zankel#else /* !XCHAL_HAVE_LOOPS */
159*249ac17eSChris Zankel	beqz	a7, .Loop1done
160*249ac17eSChris Zankel	slli	a8, a7, 4
161*249ac17eSChris Zankel	add	a8, a8, a3	# a8 = end of last 16B source chunk
162*249ac17eSChris Zankel#endif /* !XCHAL_HAVE_LOOPS */
163*249ac17eSChris Zankel.Loop1:
164*249ac17eSChris Zankel	EX(l32i, a6, a3,  0, l_fixup)
165*249ac17eSChris Zankel	EX(l32i, a7, a3,  4, l_fixup)
166*249ac17eSChris Zankel	EX(s32i, a6, a5,  0, s_fixup)
167*249ac17eSChris Zankel	EX(l32i, a6, a3,  8, l_fixup)
168*249ac17eSChris Zankel	EX(s32i, a7, a5,  4, s_fixup)
169*249ac17eSChris Zankel	EX(l32i, a7, a3, 12, l_fixup)
170*249ac17eSChris Zankel	EX(s32i, a6, a5,  8, s_fixup)
171*249ac17eSChris Zankel	addi	a3, a3, 16
172*249ac17eSChris Zankel	EX(s32i, a7, a5, 12, s_fixup)
173*249ac17eSChris Zankel	addi	a5, a5, 16
174*249ac17eSChris Zankel#if !XCHAL_HAVE_LOOPS
175*249ac17eSChris Zankel	blt	a3, a8, .Loop1
176*249ac17eSChris Zankel#endif /* !XCHAL_HAVE_LOOPS */
177*249ac17eSChris Zankel.Loop1done:
178*249ac17eSChris Zankel	bbci.l	a4, 3, .L2
179*249ac17eSChris Zankel	# copy 8 bytes
180*249ac17eSChris Zankel	EX(l32i, a6, a3,  0, l_fixup)
181*249ac17eSChris Zankel	EX(l32i, a7, a3,  4, l_fixup)
182*249ac17eSChris Zankel	addi	a3, a3,  8
183*249ac17eSChris Zankel	EX(s32i, a6, a5,  0, s_fixup)
184*249ac17eSChris Zankel	EX(s32i, a7, a5,  4, s_fixup)
185*249ac17eSChris Zankel	addi	a5, a5,  8
186*249ac17eSChris Zankel.L2:
187*249ac17eSChris Zankel	bbci.l	a4, 2, .L3
188*249ac17eSChris Zankel	# copy 4 bytes
189*249ac17eSChris Zankel	EX(l32i, a6, a3,  0, l_fixup)
190*249ac17eSChris Zankel	addi	a3, a3,  4
191*249ac17eSChris Zankel	EX(s32i, a6, a5,  0, s_fixup)
192*249ac17eSChris Zankel	addi	a5, a5,  4
193*249ac17eSChris Zankel.L3:
194*249ac17eSChris Zankel	bbci.l	a4, 1, .L4
195*249ac17eSChris Zankel	# copy 2 bytes
196*249ac17eSChris Zankel	EX(l16ui, a6, a3,  0, l_fixup)
197*249ac17eSChris Zankel	addi	a3, a3,  2
198*249ac17eSChris Zankel	EX(s16i,  a6, a5,  0, s_fixup)
199*249ac17eSChris Zankel	addi	a5, a5,  2
200*249ac17eSChris Zankel.L4:
201*249ac17eSChris Zankel	bbci.l	a4, 0, .L5
202*249ac17eSChris Zankel	# copy 1 byte
203*249ac17eSChris Zankel	EX(l8ui, a6, a3,  0, l_fixup)
204*249ac17eSChris Zankel	EX(s8i,  a6, a5,  0, s_fixup)
205*249ac17eSChris Zankel.L5:
206*249ac17eSChris Zankel	movi	a2, 0		# return success for len bytes copied
207*249ac17eSChris Zankel	retw
208*249ac17eSChris Zankel
209*249ac17eSChris Zankel/*
210*249ac17eSChris Zankel * Destination is aligned, Source is unaligned
211*249ac17eSChris Zankel */
212*249ac17eSChris Zankel
213*249ac17eSChris Zankel	.align	4
214*249ac17eSChris Zankel	.byte	0		# 1 mod 4 alignement for LOOPNEZ
215*249ac17eSChris Zankel				# (0 mod 4 alignment for LBEG)
216*249ac17eSChris Zankel.Lsrcunaligned:
217*249ac17eSChris Zankel	# copy 16 bytes per iteration for word-aligned dst and unaligned src
218*249ac17eSChris Zankel	and	a10, a3, a8	# save unalignment offset for below
219*249ac17eSChris Zankel	sub	a3, a3, a10	# align a3 (to avoid sim warnings only; not needed for hardware)
220*249ac17eSChris Zankel	EX(l32i, a6, a3, 0, l_fixup)	# load first word
221*249ac17eSChris Zankel#if XCHAL_HAVE_LOOPS
222*249ac17eSChris Zankel	loopnez	a7, .Loop2done
223*249ac17eSChris Zankel#else /* !XCHAL_HAVE_LOOPS */
224*249ac17eSChris Zankel	beqz	a7, .Loop2done
225*249ac17eSChris Zankel	slli	a10, a7, 4
226*249ac17eSChris Zankel	add	a10, a10, a3	# a10 = end of last 16B source chunk
227*249ac17eSChris Zankel#endif /* !XCHAL_HAVE_LOOPS */
228*249ac17eSChris Zankel.Loop2:
229*249ac17eSChris Zankel	EX(l32i, a7, a3,  4, l_fixup)
230*249ac17eSChris Zankel	EX(l32i, a8, a3,  8, l_fixup)
231*249ac17eSChris Zankel	ALIGN(	a6, a6, a7)
232*249ac17eSChris Zankel	EX(s32i, a6, a5,  0, s_fixup)
233*249ac17eSChris Zankel	EX(l32i, a9, a3, 12, l_fixup)
234*249ac17eSChris Zankel	ALIGN(	a7, a7, a8)
235*249ac17eSChris Zankel	EX(s32i, a7, a5,  4, s_fixup)
236*249ac17eSChris Zankel	EX(l32i, a6, a3, 16, l_fixup)
237*249ac17eSChris Zankel	ALIGN(	a8, a8, a9)
238*249ac17eSChris Zankel	EX(s32i, a8, a5,  8, s_fixup)
239*249ac17eSChris Zankel	addi	a3, a3, 16
240*249ac17eSChris Zankel	ALIGN(	a9, a9, a6)
241*249ac17eSChris Zankel	EX(s32i, a9, a5, 12, s_fixup)
242*249ac17eSChris Zankel	addi	a5, a5, 16
243*249ac17eSChris Zankel#if !XCHAL_HAVE_LOOPS
244*249ac17eSChris Zankel	blt	a3, a10, .Loop2
245*249ac17eSChris Zankel#endif /* !XCHAL_HAVE_LOOPS */
246*249ac17eSChris Zankel.Loop2done:
247*249ac17eSChris Zankel	bbci.l	a4, 3, .L12
248*249ac17eSChris Zankel	# copy 8 bytes
249*249ac17eSChris Zankel	EX(l32i, a7, a3,  4, l_fixup)
250*249ac17eSChris Zankel	EX(l32i, a8, a3,  8, l_fixup)
251*249ac17eSChris Zankel	ALIGN(	a6, a6, a7)
252*249ac17eSChris Zankel	EX(s32i, a6, a5,  0, s_fixup)
253*249ac17eSChris Zankel	addi	a3, a3,  8
254*249ac17eSChris Zankel	ALIGN(	a7, a7, a8)
255*249ac17eSChris Zankel	EX(s32i, a7, a5,  4, s_fixup)
256*249ac17eSChris Zankel	addi	a5, a5,  8
257*249ac17eSChris Zankel	mov	a6, a8
258*249ac17eSChris Zankel.L12:
259*249ac17eSChris Zankel	bbci.l	a4, 2, .L13
260*249ac17eSChris Zankel	# copy 4 bytes
261*249ac17eSChris Zankel	EX(l32i, a7, a3,  4, l_fixup)
262*249ac17eSChris Zankel	addi	a3, a3,  4
263*249ac17eSChris Zankel	ALIGN(	a6, a6, a7)
264*249ac17eSChris Zankel	EX(s32i, a6, a5,  0, s_fixup)
265*249ac17eSChris Zankel	addi	a5, a5,  4
266*249ac17eSChris Zankel	mov	a6, a7
267*249ac17eSChris Zankel.L13:
268*249ac17eSChris Zankel	add	a3, a3, a10	# readjust a3 with correct misalignment
269*249ac17eSChris Zankel	bbci.l	a4, 1, .L14
270*249ac17eSChris Zankel	# copy 2 bytes
271*249ac17eSChris Zankel	EX(l8ui, a6, a3,  0, l_fixup)
272*249ac17eSChris Zankel	EX(l8ui, a7, a3,  1, l_fixup)
273*249ac17eSChris Zankel	addi	a3, a3,  2
274*249ac17eSChris Zankel	EX(s8i, a6, a5,  0, s_fixup)
275*249ac17eSChris Zankel	EX(s8i, a7, a5,  1, s_fixup)
276*249ac17eSChris Zankel	addi	a5, a5,  2
277*249ac17eSChris Zankel.L14:
278*249ac17eSChris Zankel	bbci.l	a4, 0, .L15
279*249ac17eSChris Zankel	# copy 1 byte
280*249ac17eSChris Zankel	EX(l8ui, a6, a3,  0, l_fixup)
281*249ac17eSChris Zankel	EX(s8i,  a6, a5,  0, s_fixup)
282*249ac17eSChris Zankel.L15:
283*249ac17eSChris Zankel	movi	a2, 0		# return success for len bytes copied
284*249ac17eSChris Zankel	retw
285*249ac17eSChris Zankel
286*249ac17eSChris Zankel
287*249ac17eSChris Zankel	.section .fixup, "ax"
288*249ac17eSChris Zankel	.align	4
289*249ac17eSChris Zankel
290*249ac17eSChris Zankel/* a2 = original dst; a5 = current dst; a11= original len
291*249ac17eSChris Zankel * bytes_copied = a5 - a2
292*249ac17eSChris Zankel * retval = bytes_not_copied = original len - bytes_copied
293*249ac17eSChris Zankel * retval = a11 - (a5 - a2)
294*249ac17eSChris Zankel *
295*249ac17eSChris Zankel * Clearing the remaining pieces of kernel memory plugs security
296*249ac17eSChris Zankel * holes.  This functionality is the equivalent of the *_zeroing
297*249ac17eSChris Zankel * functions that some architectures provide.
298*249ac17eSChris Zankel */
299*249ac17eSChris Zankel
300*249ac17eSChris Zankel.Lmemset:
301*249ac17eSChris Zankel	.word	memset
302*249ac17eSChris Zankel
303*249ac17eSChris Zankels_fixup:
304*249ac17eSChris Zankel	sub	a2, a5, a2	/* a2 <-- bytes copied */
305*249ac17eSChris Zankel	sub	a2, a11, a2	/* a2 <-- bytes not copied */
306*249ac17eSChris Zankel	retw
307*249ac17eSChris Zankel
308*249ac17eSChris Zankell_fixup:
309*249ac17eSChris Zankel	sub	a2, a5, a2	/* a2 <-- bytes copied */
310*249ac17eSChris Zankel	sub	a2, a11, a2	/* a2 <-- bytes not copied == return value */
311*249ac17eSChris Zankel
312*249ac17eSChris Zankel	/* void *memset(void *s, int c, size_t n); */
313*249ac17eSChris Zankel	mov	a6, a5		/* s */
314*249ac17eSChris Zankel	movi	a7, 0		/* c */
315*249ac17eSChris Zankel	mov	a8, a2		/* n */
316*249ac17eSChris Zankel	l32r	a4, .Lmemset
317*249ac17eSChris Zankel	callx4	a4
318*249ac17eSChris Zankel	/* Ignore memset return value in a6. */
319*249ac17eSChris Zankel	/* a2 still contains bytes not copied. */
320*249ac17eSChris Zankel	retw
321*249ac17eSChris Zankel
322