xref: /linux/arch/powerpc/lib/copyuser_64.S (revision f8db2007ff5838aff696bd4297eefcc77af2cf46)
170d64ceaSPaul Mackerras/*
270d64ceaSPaul Mackerras * Copyright (C) 2002 Paul Mackerras, IBM Corp.
370d64ceaSPaul Mackerras *
470d64ceaSPaul Mackerras * This program is free software; you can redistribute it and/or
570d64ceaSPaul Mackerras * modify it under the terms of the GNU General Public License
670d64ceaSPaul Mackerras * as published by the Free Software Foundation; either version
770d64ceaSPaul Mackerras * 2 of the License, or (at your option) any later version.
870d64ceaSPaul Mackerras */
970d64ceaSPaul Mackerras#include <asm/processor.h>
1070d64ceaSPaul Mackerras#include <asm/ppc_asm.h>
119445aa1aSAl Viro#include <asm/export.h>
12ec0c464cSChristophe Leroy#include <asm/asm-compat.h>
132c86cd18SChristophe Leroy#include <asm/feature-fixups.h>
1470d64ceaSPaul Mackerras
1598c45f51SPaul Mackerras#ifndef SELFTEST_CASE
1698c45f51SPaul Mackerras/* 0 == most CPUs, 1 == POWER6, 2 == Cell */
1798c45f51SPaul Mackerras#define SELFTEST_CASE	0
1898c45f51SPaul Mackerras#endif
1998c45f51SPaul Mackerras
2020151169SPaul E. McKenney#ifdef __BIG_ENDIAN__
2120151169SPaul E. McKenney#define sLd sld		/* Shift towards low-numbered address. */
2220151169SPaul E. McKenney#define sHd srd		/* Shift towards high-numbered address. */
2320151169SPaul E. McKenney#else
2420151169SPaul E. McKenney#define sLd srd		/* Shift towards low-numbered address. */
2520151169SPaul E. McKenney#define sHd sld		/* Shift towards high-numbered address. */
2620151169SPaul E. McKenney#endif
2720151169SPaul E. McKenney
28a7c81ce3SPaul Mackerras/*
29a7c81ce3SPaul Mackerras * These macros are used to generate exception table entries.
30a7c81ce3SPaul Mackerras * The exception handlers below use the original arguments
31a7c81ce3SPaul Mackerras * (stored on the stack) and the point where we're up to in
32a7c81ce3SPaul Mackerras * the destination buffer, i.e. the address of the first
33a7c81ce3SPaul Mackerras * unmodified byte.  Generally r3 points into the destination
34a7c81ce3SPaul Mackerras * buffer, but the first unmodified byte is at a variable
35a7c81ce3SPaul Mackerras * offset from r3.  In the code below, the symbol r3_offset
36a7c81ce3SPaul Mackerras * is set to indicate the current offset at each point in
37a7c81ce3SPaul Mackerras * the code.  This offset is then used as a negative offset
38a7c81ce3SPaul Mackerras * from the exception handler code, and those instructions
39a7c81ce3SPaul Mackerras * before the exception handlers are addi instructions that
40a7c81ce3SPaul Mackerras * adjust r3 to point to the correct place.
41a7c81ce3SPaul Mackerras */
42a7c81ce3SPaul Mackerras	.macro	lex		/* exception handler for load */
43a7c81ce3SPaul Mackerras100:	EX_TABLE(100b, .Lld_exc - r3_offset)
44a7c81ce3SPaul Mackerras	.endm
45a7c81ce3SPaul Mackerras
46a7c81ce3SPaul Mackerras	.macro	stex		/* exception handler for store */
47a7c81ce3SPaul Mackerras100:	EX_TABLE(100b, .Lst_exc - r3_offset)
48a7c81ce3SPaul Mackerras	.endm
49a7c81ce3SPaul Mackerras
5070d64ceaSPaul Mackerras	.align	7
51169c7ceeSAnton Blanchard_GLOBAL_TOC(__copy_tofrom_user)
5215a3204dSNicholas Piggin#ifdef CONFIG_PPC_BOOK3S_64
53a66086b8SAnton BlanchardBEGIN_FTR_SECTION
54a66086b8SAnton Blanchard	nop
55a66086b8SAnton BlanchardFTR_SECTION_ELSE
56a66086b8SAnton Blanchard	b	__copy_tofrom_user_power7
57a66086b8SAnton BlanchardALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
5815a3204dSNicholas Piggin#endif
59a66086b8SAnton Blanchard_GLOBAL(__copy_tofrom_user_base)
60a7c81ce3SPaul Mackerras	/* first check for a 4kB copy on a 4kB boundary */
6170d64ceaSPaul Mackerras	cmpldi	cr1,r5,16
6270d64ceaSPaul Mackerras	cmpdi	cr6,r5,4096
6370d64ceaSPaul Mackerras	or	r0,r3,r4
6470d64ceaSPaul Mackerras	neg	r6,r3		/* LS 3 bits = # bytes to 8-byte dest bdry */
6570d64ceaSPaul Mackerras	andi.	r0,r0,4095
6670d64ceaSPaul Mackerras	std	r3,-24(r1)
6770d64ceaSPaul Mackerras	crand	cr0*4+2,cr0*4+2,cr6*4+2
6870d64ceaSPaul Mackerras	std	r4,-16(r1)
6970d64ceaSPaul Mackerras	std	r5,-8(r1)
7070d64ceaSPaul Mackerras	dcbt	0,r4
713c726f8dSBenjamin Herrenschmidt	beq	.Lcopy_page_4K
7270d64ceaSPaul Mackerras	andi.	r6,r6,7
73694caf02SAnton Blanchard	PPC_MTOCRF(0x01,r5)
7470d64ceaSPaul Mackerras	blt	cr1,.Lshort_copy
75a4e22f02SMark Nelson/* Below we want to nop out the bne if we're on a CPU that has the
76a4e22f02SMark Nelson * CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit
77a4e22f02SMark Nelson * cleared.
78a4e22f02SMark Nelson * At the time of writing the only CPU that has this combination of bits
79a4e22f02SMark Nelson * set is Power6.
80a4e22f02SMark Nelson */
8198c45f51SPaul Mackerrastest_feature = (SELFTEST_CASE == 1)
82a4e22f02SMark NelsonBEGIN_FTR_SECTION
83a4e22f02SMark Nelson	nop
84a4e22f02SMark NelsonFTR_SECTION_ELSE
8570d64ceaSPaul Mackerras	bne	.Ldst_unaligned
86a4e22f02SMark NelsonALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \
87a4e22f02SMark Nelson		    CPU_FTR_UNALIGNED_LD_STD)
8870d64ceaSPaul Mackerras.Ldst_aligned:
8970d64ceaSPaul Mackerras	addi	r3,r3,-16
90a7c81ce3SPaul Mackerrasr3_offset = 16
9198c45f51SPaul Mackerrastest_feature = (SELFTEST_CASE == 0)
92a4e22f02SMark NelsonBEGIN_FTR_SECTION
93a4e22f02SMark Nelson	andi.	r0,r4,7
9470d64ceaSPaul Mackerras	bne	.Lsrc_unaligned
95a4e22f02SMark NelsonEND_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
96789c299cSAnton Blanchard	blt	cr1,.Ldo_tail		/* if < 16 bytes to copy */
97789c299cSAnton Blanchard	srdi	r0,r5,5
98789c299cSAnton Blanchard	cmpdi	cr1,r0,0
99a7c81ce3SPaul Mackerraslex;	ld	r7,0(r4)
100a7c81ce3SPaul Mackerraslex;	ld	r6,8(r4)
101789c299cSAnton Blanchard	addi	r4,r4,16
102789c299cSAnton Blanchard	mtctr	r0
103789c299cSAnton Blanchard	andi.	r0,r5,0x10
104789c299cSAnton Blanchard	beq	22f
10570d64ceaSPaul Mackerras	addi	r3,r3,16
106a7c81ce3SPaul Mackerrasr3_offset = 0
107789c299cSAnton Blanchard	addi	r4,r4,-16
108789c299cSAnton Blanchard	mr	r9,r7
109789c299cSAnton Blanchard	mr	r8,r6
110789c299cSAnton Blanchard	beq	cr1,72f
111a7c81ce3SPaul Mackerras21:
112a7c81ce3SPaul Mackerraslex;	ld	r7,16(r4)
113a7c81ce3SPaul Mackerraslex;	ld	r6,24(r4)
114789c299cSAnton Blanchard	addi	r4,r4,32
115a7c81ce3SPaul Mackerrasstex;	std	r9,0(r3)
116a7c81ce3SPaul Mackerrasr3_offset = 8
117a7c81ce3SPaul Mackerrasstex;	std	r8,8(r3)
118a7c81ce3SPaul Mackerrasr3_offset = 16
119a7c81ce3SPaul Mackerras22:
120a7c81ce3SPaul Mackerraslex;	ld	r9,0(r4)
121a7c81ce3SPaul Mackerraslex;	ld	r8,8(r4)
122a7c81ce3SPaul Mackerrasstex;	std	r7,16(r3)
123a7c81ce3SPaul Mackerrasr3_offset = 24
124a7c81ce3SPaul Mackerrasstex;	std	r6,24(r3)
125789c299cSAnton Blanchard	addi	r3,r3,32
126a7c81ce3SPaul Mackerrasr3_offset = 0
127789c299cSAnton Blanchard	bdnz	21b
128a7c81ce3SPaul Mackerras72:
129a7c81ce3SPaul Mackerrasstex;	std	r9,0(r3)
130a7c81ce3SPaul Mackerrasr3_offset = 8
131a7c81ce3SPaul Mackerrasstex;	std	r8,8(r3)
132a7c81ce3SPaul Mackerrasr3_offset = 16
133789c299cSAnton Blanchard	andi.	r5,r5,0xf
134789c299cSAnton Blanchard	beq+	3f
135789c299cSAnton Blanchard	addi	r4,r4,16
13670d64ceaSPaul Mackerras.Ldo_tail:
137789c299cSAnton Blanchard	addi	r3,r3,16
138a7c81ce3SPaul Mackerrasr3_offset = 0
139789c299cSAnton Blanchard	bf	cr7*4+0,246f
140a7c81ce3SPaul Mackerraslex;	ld	r9,0(r4)
141789c299cSAnton Blanchard	addi	r4,r4,8
142a7c81ce3SPaul Mackerrasstex;	std	r9,0(r3)
143789c299cSAnton Blanchard	addi	r3,r3,8
144789c299cSAnton Blanchard246:	bf	cr7*4+1,1f
145a7c81ce3SPaul Mackerraslex;	lwz	r9,0(r4)
146f72b728bSMark Nelson	addi	r4,r4,4
147a7c81ce3SPaul Mackerrasstex;	stw	r9,0(r3)
14870d64ceaSPaul Mackerras	addi	r3,r3,4
14970d64ceaSPaul Mackerras1:	bf	cr7*4+2,2f
150a7c81ce3SPaul Mackerraslex;	lhz	r9,0(r4)
151f72b728bSMark Nelson	addi	r4,r4,2
152a7c81ce3SPaul Mackerrasstex;	sth	r9,0(r3)
15370d64ceaSPaul Mackerras	addi	r3,r3,2
15470d64ceaSPaul Mackerras2:	bf	cr7*4+3,3f
155a7c81ce3SPaul Mackerraslex;	lbz	r9,0(r4)
156a7c81ce3SPaul Mackerrasstex;	stb	r9,0(r3)
15770d64ceaSPaul Mackerras3:	li	r3,0
15870d64ceaSPaul Mackerras	blr
15970d64ceaSPaul Mackerras
16070d64ceaSPaul Mackerras.Lsrc_unaligned:
161a7c81ce3SPaul Mackerrasr3_offset = 16
16270d64ceaSPaul Mackerras	srdi	r6,r5,3
16370d64ceaSPaul Mackerras	addi	r5,r5,-16
16470d64ceaSPaul Mackerras	subf	r4,r0,r4
16570d64ceaSPaul Mackerras	srdi	r7,r5,4
16670d64ceaSPaul Mackerras	sldi	r10,r0,3
16770d64ceaSPaul Mackerras	cmpldi	cr6,r6,3
16870d64ceaSPaul Mackerras	andi.	r5,r5,7
16970d64ceaSPaul Mackerras	mtctr	r7
17070d64ceaSPaul Mackerras	subfic	r11,r10,64
17170d64ceaSPaul Mackerras	add	r5,r5,r0
17270d64ceaSPaul Mackerras	bt	cr7*4+0,28f
17370d64ceaSPaul Mackerras
174a7c81ce3SPaul Mackerraslex;	ld	r9,0(r4)	/* 3+2n loads, 2+2n stores */
175a7c81ce3SPaul Mackerraslex;	ld	r0,8(r4)
17620151169SPaul E. McKenney	sLd	r6,r9,r10
177a7c81ce3SPaul Mackerraslex;	ldu	r9,16(r4)
17820151169SPaul E. McKenney	sHd	r7,r0,r11
17920151169SPaul E. McKenney	sLd	r8,r0,r10
18070d64ceaSPaul Mackerras	or	r7,r7,r6
18170d64ceaSPaul Mackerras	blt	cr6,79f
182a7c81ce3SPaul Mackerraslex;	ld	r0,8(r4)
18370d64ceaSPaul Mackerras	b	2f
18470d64ceaSPaul Mackerras
185a7c81ce3SPaul Mackerras28:
186a7c81ce3SPaul Mackerraslex;	ld	r0,0(r4)	/* 4+2n loads, 3+2n stores */
187a7c81ce3SPaul Mackerraslex;	ldu	r9,8(r4)
18820151169SPaul E. McKenney	sLd	r8,r0,r10
18970d64ceaSPaul Mackerras	addi	r3,r3,-8
190a7c81ce3SPaul Mackerrasr3_offset = 24
19170d64ceaSPaul Mackerras	blt	cr6,5f
192a7c81ce3SPaul Mackerraslex;	ld	r0,8(r4)
19320151169SPaul E. McKenney	sHd	r12,r9,r11
19420151169SPaul E. McKenney	sLd	r6,r9,r10
195a7c81ce3SPaul Mackerraslex;	ldu	r9,16(r4)
19670d64ceaSPaul Mackerras	or	r12,r8,r12
19720151169SPaul E. McKenney	sHd	r7,r0,r11
19820151169SPaul E. McKenney	sLd	r8,r0,r10
19970d64ceaSPaul Mackerras	addi	r3,r3,16
200a7c81ce3SPaul Mackerrasr3_offset = 8
20170d64ceaSPaul Mackerras	beq	cr6,78f
20270d64ceaSPaul Mackerras
20370d64ceaSPaul Mackerras1:	or	r7,r7,r6
204a7c81ce3SPaul Mackerraslex;	ld	r0,8(r4)
205a7c81ce3SPaul Mackerrasstex;	std	r12,8(r3)
206a7c81ce3SPaul Mackerrasr3_offset = 16
20720151169SPaul E. McKenney2:	sHd	r12,r9,r11
20820151169SPaul E. McKenney	sLd	r6,r9,r10
209a7c81ce3SPaul Mackerraslex;	ldu	r9,16(r4)
21070d64ceaSPaul Mackerras	or	r12,r8,r12
211a7c81ce3SPaul Mackerrasstex;	stdu	r7,16(r3)
212a7c81ce3SPaul Mackerrasr3_offset = 8
21320151169SPaul E. McKenney	sHd	r7,r0,r11
21420151169SPaul E. McKenney	sLd	r8,r0,r10
21570d64ceaSPaul Mackerras	bdnz	1b
21670d64ceaSPaul Mackerras
217a7c81ce3SPaul Mackerras78:
218a7c81ce3SPaul Mackerrasstex;	std	r12,8(r3)
219a7c81ce3SPaul Mackerrasr3_offset = 16
22070d64ceaSPaul Mackerras	or	r7,r7,r6
221a7c81ce3SPaul Mackerras79:
222a7c81ce3SPaul Mackerrasstex;	std	r7,16(r3)
223a7c81ce3SPaul Mackerrasr3_offset = 24
22420151169SPaul E. McKenney5:	sHd	r12,r9,r11
22570d64ceaSPaul Mackerras	or	r12,r8,r12
226a7c81ce3SPaul Mackerrasstex;	std	r12,24(r3)
227a7c81ce3SPaul Mackerrasr3_offset = 32
22870d64ceaSPaul Mackerras	bne	6f
22970d64ceaSPaul Mackerras	li	r3,0
23070d64ceaSPaul Mackerras	blr
23170d64ceaSPaul Mackerras6:	cmpwi	cr1,r5,8
23270d64ceaSPaul Mackerras	addi	r3,r3,32
233a7c81ce3SPaul Mackerrasr3_offset = 0
23420151169SPaul E. McKenney	sLd	r9,r9,r10
235f72b728bSMark Nelson	ble	cr1,7f
236a7c81ce3SPaul Mackerraslex;	ld	r0,8(r4)
23720151169SPaul E. McKenney	sHd	r7,r0,r11
23870d64ceaSPaul Mackerras	or	r9,r7,r9
239f72b728bSMark Nelson7:
240f72b728bSMark Nelson	bf	cr7*4+1,1f
24120151169SPaul E. McKenney#ifdef __BIG_ENDIAN__
242f72b728bSMark Nelson	rotldi	r9,r9,32
24320151169SPaul E. McKenney#endif
244a7c81ce3SPaul Mackerrasstex;	stw	r9,0(r3)
24520151169SPaul E. McKenney#ifdef __LITTLE_ENDIAN__
24620151169SPaul E. McKenney	rotrdi	r9,r9,32
24720151169SPaul E. McKenney#endif
248f72b728bSMark Nelson	addi	r3,r3,4
249f72b728bSMark Nelson1:	bf	cr7*4+2,2f
25020151169SPaul E. McKenney#ifdef __BIG_ENDIAN__
251f72b728bSMark Nelson	rotldi	r9,r9,16
25220151169SPaul E. McKenney#endif
253a7c81ce3SPaul Mackerrasstex;	sth	r9,0(r3)
25420151169SPaul E. McKenney#ifdef __LITTLE_ENDIAN__
25520151169SPaul E. McKenney	rotrdi	r9,r9,16
25620151169SPaul E. McKenney#endif
257f72b728bSMark Nelson	addi	r3,r3,2
258f72b728bSMark Nelson2:	bf	cr7*4+3,3f
25920151169SPaul E. McKenney#ifdef __BIG_ENDIAN__
260f72b728bSMark Nelson	rotldi	r9,r9,8
26120151169SPaul E. McKenney#endif
262a7c81ce3SPaul Mackerrasstex;	stb	r9,0(r3)
26320151169SPaul E. McKenney#ifdef __LITTLE_ENDIAN__
26420151169SPaul E. McKenney	rotrdi	r9,r9,8
26520151169SPaul E. McKenney#endif
266f72b728bSMark Nelson3:	li	r3,0
267f72b728bSMark Nelson	blr
26870d64ceaSPaul Mackerras
26970d64ceaSPaul Mackerras.Ldst_unaligned:
270a7c81ce3SPaul Mackerrasr3_offset = 0
271694caf02SAnton Blanchard	PPC_MTOCRF(0x01,r6)		/* put #bytes to 8B bdry into cr7 */
27270d64ceaSPaul Mackerras	subf	r5,r6,r5
27370d64ceaSPaul Mackerras	li	r7,0
274a4e22f02SMark Nelson	cmpldi	cr1,r5,16
27570d64ceaSPaul Mackerras	bf	cr7*4+3,1f
276a7c81ce3SPaul Mackerras100:	EX_TABLE(100b, .Lld_exc_r7)
277a7c81ce3SPaul Mackerras	lbz	r0,0(r4)
278a7c81ce3SPaul Mackerras100:	EX_TABLE(100b, .Lst_exc_r7)
279a7c81ce3SPaul Mackerras	stb	r0,0(r3)
28070d64ceaSPaul Mackerras	addi	r7,r7,1
28170d64ceaSPaul Mackerras1:	bf	cr7*4+2,2f
282a7c81ce3SPaul Mackerras100:	EX_TABLE(100b, .Lld_exc_r7)
283a7c81ce3SPaul Mackerras	lhzx	r0,r7,r4
284a7c81ce3SPaul Mackerras100:	EX_TABLE(100b, .Lst_exc_r7)
285a7c81ce3SPaul Mackerras	sthx	r0,r7,r3
28670d64ceaSPaul Mackerras	addi	r7,r7,2
28770d64ceaSPaul Mackerras2:	bf	cr7*4+1,3f
288a7c81ce3SPaul Mackerras100:	EX_TABLE(100b, .Lld_exc_r7)
289a7c81ce3SPaul Mackerras	lwzx	r0,r7,r4
290a7c81ce3SPaul Mackerras100:	EX_TABLE(100b, .Lst_exc_r7)
291a7c81ce3SPaul Mackerras	stwx	r0,r7,r3
292694caf02SAnton Blanchard3:	PPC_MTOCRF(0x01,r5)
29370d64ceaSPaul Mackerras	add	r4,r6,r4
29470d64ceaSPaul Mackerras	add	r3,r6,r3
29570d64ceaSPaul Mackerras	b	.Ldst_aligned
29670d64ceaSPaul Mackerras
29770d64ceaSPaul Mackerras.Lshort_copy:
298a7c81ce3SPaul Mackerrasr3_offset = 0
29970d64ceaSPaul Mackerras	bf	cr7*4+0,1f
300a7c81ce3SPaul Mackerraslex;	lwz	r0,0(r4)
301a7c81ce3SPaul Mackerraslex;	lwz	r9,4(r4)
30270d64ceaSPaul Mackerras	addi	r4,r4,8
303a7c81ce3SPaul Mackerrasstex;	stw	r0,0(r3)
304a7c81ce3SPaul Mackerrasstex;	stw	r9,4(r3)
30570d64ceaSPaul Mackerras	addi	r3,r3,8
30670d64ceaSPaul Mackerras1:	bf	cr7*4+1,2f
307a7c81ce3SPaul Mackerraslex;	lwz	r0,0(r4)
30870d64ceaSPaul Mackerras	addi	r4,r4,4
309a7c81ce3SPaul Mackerrasstex;	stw	r0,0(r3)
31070d64ceaSPaul Mackerras	addi	r3,r3,4
31170d64ceaSPaul Mackerras2:	bf	cr7*4+2,3f
312a7c81ce3SPaul Mackerraslex;	lhz	r0,0(r4)
31370d64ceaSPaul Mackerras	addi	r4,r4,2
314a7c81ce3SPaul Mackerrasstex;	sth	r0,0(r3)
31570d64ceaSPaul Mackerras	addi	r3,r3,2
31670d64ceaSPaul Mackerras3:	bf	cr7*4+3,4f
317a7c81ce3SPaul Mackerraslex;	lbz	r0,0(r4)
318a7c81ce3SPaul Mackerrasstex;	stb	r0,0(r3)
31970d64ceaSPaul Mackerras4:	li	r3,0
32070d64ceaSPaul Mackerras	blr
32170d64ceaSPaul Mackerras
32270d64ceaSPaul Mackerras/*
32370d64ceaSPaul Mackerras * exception handlers follow
32470d64ceaSPaul Mackerras * we have to return the number of bytes not copied
32570d64ceaSPaul Mackerras * for an exception on a load, we set the rest of the destination to 0
326a7c81ce3SPaul Mackerras * Note that the number of bytes of instructions for adjusting r3 needs
327a7c81ce3SPaul Mackerras * to equal the amount of the adjustment, due to the trick of using
328a7c81ce3SPaul Mackerras * .Lld_exc - r3_offset as the handler address.
32970d64ceaSPaul Mackerras */
33070d64ceaSPaul Mackerras
331a7c81ce3SPaul Mackerras.Lld_exc_r7:
33270d64ceaSPaul Mackerras	add	r3,r3,r7
333a7c81ce3SPaul Mackerras	b	.Lld_exc
334a7c81ce3SPaul Mackerras
335a7c81ce3SPaul Mackerras	/* adjust by 24 */
33670d64ceaSPaul Mackerras	addi	r3,r3,8
337a7c81ce3SPaul Mackerras	nop
338a7c81ce3SPaul Mackerras	/* adjust by 16 */
33970d64ceaSPaul Mackerras	addi	r3,r3,8
340a7c81ce3SPaul Mackerras	nop
341a7c81ce3SPaul Mackerras	/* adjust by 8 */
34270d64ceaSPaul Mackerras	addi	r3,r3,8
343a7c81ce3SPaul Mackerras	nop
34470d64ceaSPaul Mackerras
34570d64ceaSPaul Mackerras/*
346a7c81ce3SPaul Mackerras * Here we have had a fault on a load and r3 points to the first
347a7c81ce3SPaul Mackerras * unmodified byte of the destination.  We use the original arguments
348a7c81ce3SPaul Mackerras * and r3 to work out how much wasn't copied.  Since we load some
349a7c81ce3SPaul Mackerras * distance ahead of the stores, we continue copying byte-by-byte until
350a7c81ce3SPaul Mackerras * we hit the load fault again in order to copy as much as possible.
35170d64ceaSPaul Mackerras */
352a7c81ce3SPaul Mackerras.Lld_exc:
353a7c81ce3SPaul Mackerras	ld	r6,-24(r1)
35470d64ceaSPaul Mackerras	ld	r4,-16(r1)
35570d64ceaSPaul Mackerras	ld	r5,-8(r1)
35670d64ceaSPaul Mackerras	subf	r6,r6,r3
35770d64ceaSPaul Mackerras	add	r4,r4,r6
35870d64ceaSPaul Mackerras	subf	r5,r6,r5	/* #bytes left to go */
35970d64ceaSPaul Mackerras
36070d64ceaSPaul Mackerras/*
36170d64ceaSPaul Mackerras * first see if we can copy any more bytes before hitting another exception
36270d64ceaSPaul Mackerras */
36370d64ceaSPaul Mackerras	mtctr	r5
364a7c81ce3SPaul Mackerrasr3_offset = 0
365a7c81ce3SPaul Mackerras100:	EX_TABLE(100b, .Ldone)
36670d64ceaSPaul Mackerras43:	lbz	r0,0(r4)
36770d64ceaSPaul Mackerras	addi	r4,r4,1
368a7c81ce3SPaul Mackerrasstex;	stb	r0,0(r3)
36970d64ceaSPaul Mackerras	addi	r3,r3,1
37070d64ceaSPaul Mackerras	bdnz	43b
37170d64ceaSPaul Mackerras	li	r3,0		/* huh? all copied successfully this time? */
37270d64ceaSPaul Mackerras	blr
37370d64ceaSPaul Mackerras
37470d64ceaSPaul Mackerras/*
3753448890cSAl Viro * here we have trapped again, amount remaining is in ctr.
37670d64ceaSPaul Mackerras */
377a7c81ce3SPaul Mackerras.Ldone:
378a7c81ce3SPaul Mackerras	mfctr	r3
37970d64ceaSPaul Mackerras	blr
38070d64ceaSPaul Mackerras
38170d64ceaSPaul Mackerras/*
382*f8db2007SPaul Mackerras * exception handlers for stores: we need to work out how many bytes
383*f8db2007SPaul Mackerras * weren't copied, and we may need to copy some more.
384a7c81ce3SPaul Mackerras * Note that the number of bytes of instructions for adjusting r3 needs
385a7c81ce3SPaul Mackerras * to equal the amount of the adjustment, due to the trick of using
386a7c81ce3SPaul Mackerras * .Lst_exc - r3_offset as the handler address.
38770d64ceaSPaul Mackerras */
388a7c81ce3SPaul Mackerras.Lst_exc_r7:
38970d64ceaSPaul Mackerras	add	r3,r3,r7
390a7c81ce3SPaul Mackerras	b	.Lst_exc
391a7c81ce3SPaul Mackerras
392a7c81ce3SPaul Mackerras	/* adjust by 24 */
39370d64ceaSPaul Mackerras	addi	r3,r3,8
394a7c81ce3SPaul Mackerras	nop
395a7c81ce3SPaul Mackerras	/* adjust by 16 */
39670d64ceaSPaul Mackerras	addi	r3,r3,8
397a7c81ce3SPaul Mackerras	nop
398a7c81ce3SPaul Mackerras	/* adjust by 8 */
39970d64ceaSPaul Mackerras	addi	r3,r3,4
400a7c81ce3SPaul Mackerras	/* adjust by 4 */
40170d64ceaSPaul Mackerras	addi	r3,r3,4
402a7c81ce3SPaul Mackerras.Lst_exc:
403*f8db2007SPaul Mackerras	ld	r6,-24(r1)	/* original destination pointer */
404*f8db2007SPaul Mackerras	ld	r4,-16(r1)	/* original source pointer */
405*f8db2007SPaul Mackerras	ld	r5,-8(r1)	/* original number of bytes */
406*f8db2007SPaul Mackerras	add	r7,r6,r5
407*f8db2007SPaul Mackerras	/*
408*f8db2007SPaul Mackerras	 * If the destination pointer isn't 8-byte aligned,
409*f8db2007SPaul Mackerras	 * we may have got the exception as a result of a
410*f8db2007SPaul Mackerras	 * store that overlapped a page boundary, so we may be
411*f8db2007SPaul Mackerras	 * able to copy a few more bytes.
412*f8db2007SPaul Mackerras	 */
413*f8db2007SPaul Mackerras17:	andi.	r0,r3,7
414*f8db2007SPaul Mackerras	beq	19f
415*f8db2007SPaul Mackerras	subf	r8,r6,r3	/* #bytes copied */
416*f8db2007SPaul Mackerras100:	EX_TABLE(100b,19f)
417*f8db2007SPaul Mackerras	lbzx	r0,r8,r4
418*f8db2007SPaul Mackerras100:	EX_TABLE(100b,19f)
419*f8db2007SPaul Mackerras	stb	r0,0(r3)
420*f8db2007SPaul Mackerras	addi	r3,r3,1
421*f8db2007SPaul Mackerras	cmpld	r3,r7
422*f8db2007SPaul Mackerras	blt	17b
423*f8db2007SPaul Mackerras19:	subf	r3,r3,r7	/* #bytes not copied in r3 */
4243448890cSAl Viro	blr
42570d64ceaSPaul Mackerras
42670d64ceaSPaul Mackerras/*
42770d64ceaSPaul Mackerras * Routine to copy a whole page of data, optimized for POWER4.
42870d64ceaSPaul Mackerras * On POWER4 it is more than 50% faster than the simple loop
4290f369103SMichael Ellerman * above (following the .Ldst_aligned label).
43070d64ceaSPaul Mackerras */
431a7c81ce3SPaul Mackerras	.macro	exc
432a7c81ce3SPaul Mackerras100:	EX_TABLE(100b, .Labort)
433a7c81ce3SPaul Mackerras	.endm
4343c726f8dSBenjamin Herrenschmidt.Lcopy_page_4K:
43570d64ceaSPaul Mackerras	std	r31,-32(1)
43670d64ceaSPaul Mackerras	std	r30,-40(1)
43770d64ceaSPaul Mackerras	std	r29,-48(1)
43870d64ceaSPaul Mackerras	std	r28,-56(1)
43970d64ceaSPaul Mackerras	std	r27,-64(1)
44070d64ceaSPaul Mackerras	std	r26,-72(1)
44170d64ceaSPaul Mackerras	std	r25,-80(1)
44270d64ceaSPaul Mackerras	std	r24,-88(1)
44370d64ceaSPaul Mackerras	std	r23,-96(1)
44470d64ceaSPaul Mackerras	std	r22,-104(1)
44570d64ceaSPaul Mackerras	std	r21,-112(1)
44670d64ceaSPaul Mackerras	std	r20,-120(1)
44770d64ceaSPaul Mackerras	li	r5,4096/32 - 1
44870d64ceaSPaul Mackerras	addi	r3,r3,-8
44970d64ceaSPaul Mackerras	li	r0,5
45070d64ceaSPaul Mackerras0:	addi	r5,r5,-24
45170d64ceaSPaul Mackerras	mtctr	r0
452a7c81ce3SPaul Mackerrasexc;	ld	r22,640(4)
453a7c81ce3SPaul Mackerrasexc;	ld	r21,512(4)
454a7c81ce3SPaul Mackerrasexc;	ld	r20,384(4)
455a7c81ce3SPaul Mackerrasexc;	ld	r11,256(4)
456a7c81ce3SPaul Mackerrasexc;	ld	r9,128(4)
457a7c81ce3SPaul Mackerrasexc;	ld	r7,0(4)
458a7c81ce3SPaul Mackerrasexc;	ld	r25,648(4)
459a7c81ce3SPaul Mackerrasexc;	ld	r24,520(4)
460a7c81ce3SPaul Mackerrasexc;	ld	r23,392(4)
461a7c81ce3SPaul Mackerrasexc;	ld	r10,264(4)
462a7c81ce3SPaul Mackerrasexc;	ld	r8,136(4)
463a7c81ce3SPaul Mackerrasexc;	ldu	r6,8(4)
46470d64ceaSPaul Mackerras	cmpwi	r5,24
46570d64ceaSPaul Mackerras1:
466a7c81ce3SPaul Mackerrasexc;	std	r22,648(3)
467a7c81ce3SPaul Mackerrasexc;	std	r21,520(3)
468a7c81ce3SPaul Mackerrasexc;	std	r20,392(3)
469a7c81ce3SPaul Mackerrasexc;	std	r11,264(3)
470a7c81ce3SPaul Mackerrasexc;	std	r9,136(3)
471a7c81ce3SPaul Mackerrasexc;	std	r7,8(3)
472a7c81ce3SPaul Mackerrasexc;	ld	r28,648(4)
473a7c81ce3SPaul Mackerrasexc;	ld	r27,520(4)
474a7c81ce3SPaul Mackerrasexc;	ld	r26,392(4)
475a7c81ce3SPaul Mackerrasexc;	ld	r31,264(4)
476a7c81ce3SPaul Mackerrasexc;	ld	r30,136(4)
477a7c81ce3SPaul Mackerrasexc;	ld	r29,8(4)
478a7c81ce3SPaul Mackerrasexc;	std	r25,656(3)
479a7c81ce3SPaul Mackerrasexc;	std	r24,528(3)
480a7c81ce3SPaul Mackerrasexc;	std	r23,400(3)
481a7c81ce3SPaul Mackerrasexc;	std	r10,272(3)
482a7c81ce3SPaul Mackerrasexc;	std	r8,144(3)
483a7c81ce3SPaul Mackerrasexc;	std	r6,16(3)
484a7c81ce3SPaul Mackerrasexc;	ld	r22,656(4)
485a7c81ce3SPaul Mackerrasexc;	ld	r21,528(4)
486a7c81ce3SPaul Mackerrasexc;	ld	r20,400(4)
487a7c81ce3SPaul Mackerrasexc;	ld	r11,272(4)
488a7c81ce3SPaul Mackerrasexc;	ld	r9,144(4)
489a7c81ce3SPaul Mackerrasexc;	ld	r7,16(4)
490a7c81ce3SPaul Mackerrasexc;	std	r28,664(3)
491a7c81ce3SPaul Mackerrasexc;	std	r27,536(3)
492a7c81ce3SPaul Mackerrasexc;	std	r26,408(3)
493a7c81ce3SPaul Mackerrasexc;	std	r31,280(3)
494a7c81ce3SPaul Mackerrasexc;	std	r30,152(3)
495a7c81ce3SPaul Mackerrasexc;	stdu	r29,24(3)
496a7c81ce3SPaul Mackerrasexc;	ld	r25,664(4)
497a7c81ce3SPaul Mackerrasexc;	ld	r24,536(4)
498a7c81ce3SPaul Mackerrasexc;	ld	r23,408(4)
499a7c81ce3SPaul Mackerrasexc;	ld	r10,280(4)
500a7c81ce3SPaul Mackerrasexc;	ld	r8,152(4)
501a7c81ce3SPaul Mackerrasexc;	ldu	r6,24(4)
50270d64ceaSPaul Mackerras	bdnz	1b
503a7c81ce3SPaul Mackerrasexc;	std	r22,648(3)
504a7c81ce3SPaul Mackerrasexc;	std	r21,520(3)
505a7c81ce3SPaul Mackerrasexc;	std	r20,392(3)
506a7c81ce3SPaul Mackerrasexc;	std	r11,264(3)
507a7c81ce3SPaul Mackerrasexc;	std	r9,136(3)
508a7c81ce3SPaul Mackerrasexc;	std	r7,8(3)
509a7c81ce3SPaul Mackerras	addi	r4,r4,640
510a7c81ce3SPaul Mackerras	addi	r3,r3,648
51170d64ceaSPaul Mackerras	bge	0b
51270d64ceaSPaul Mackerras	mtctr	r5
513a7c81ce3SPaul Mackerrasexc;	ld	r7,0(4)
514a7c81ce3SPaul Mackerrasexc;	ld	r8,8(4)
515a7c81ce3SPaul Mackerrasexc;	ldu	r9,16(4)
51670d64ceaSPaul Mackerras3:
517a7c81ce3SPaul Mackerrasexc;	ld	r10,8(4)
518a7c81ce3SPaul Mackerrasexc;	std	r7,8(3)
519a7c81ce3SPaul Mackerrasexc;	ld	r7,16(4)
520a7c81ce3SPaul Mackerrasexc;	std	r8,16(3)
521a7c81ce3SPaul Mackerrasexc;	ld	r8,24(4)
522a7c81ce3SPaul Mackerrasexc;	std	r9,24(3)
523a7c81ce3SPaul Mackerrasexc;	ldu	r9,32(4)
524a7c81ce3SPaul Mackerrasexc;	stdu	r10,32(3)
52570d64ceaSPaul Mackerras	bdnz	3b
52670d64ceaSPaul Mackerras4:
527a7c81ce3SPaul Mackerrasexc;	ld	r10,8(4)
528a7c81ce3SPaul Mackerrasexc;	std	r7,8(3)
529a7c81ce3SPaul Mackerrasexc;	std	r8,16(3)
530a7c81ce3SPaul Mackerrasexc;	std	r9,24(3)
531a7c81ce3SPaul Mackerrasexc;	std	r10,32(3)
53270d64ceaSPaul Mackerras9:	ld	r20,-120(1)
53370d64ceaSPaul Mackerras	ld	r21,-112(1)
53470d64ceaSPaul Mackerras	ld	r22,-104(1)
53570d64ceaSPaul Mackerras	ld	r23,-96(1)
53670d64ceaSPaul Mackerras	ld	r24,-88(1)
53770d64ceaSPaul Mackerras	ld	r25,-80(1)
53870d64ceaSPaul Mackerras	ld	r26,-72(1)
53970d64ceaSPaul Mackerras	ld	r27,-64(1)
54070d64ceaSPaul Mackerras	ld	r28,-56(1)
54170d64ceaSPaul Mackerras	ld	r29,-48(1)
54270d64ceaSPaul Mackerras	ld	r30,-40(1)
54370d64ceaSPaul Mackerras	ld	r31,-32(1)
54470d64ceaSPaul Mackerras	li	r3,0
54570d64ceaSPaul Mackerras	blr
54670d64ceaSPaul Mackerras
54770d64ceaSPaul Mackerras/*
54870d64ceaSPaul Mackerras * on an exception, reset to the beginning and jump back into the
54970d64ceaSPaul Mackerras * standard __copy_tofrom_user
55070d64ceaSPaul Mackerras */
551a7c81ce3SPaul Mackerras.Labort:
552a7c81ce3SPaul Mackerras	ld	r20,-120(1)
55370d64ceaSPaul Mackerras	ld	r21,-112(1)
55470d64ceaSPaul Mackerras	ld	r22,-104(1)
55570d64ceaSPaul Mackerras	ld	r23,-96(1)
55670d64ceaSPaul Mackerras	ld	r24,-88(1)
55770d64ceaSPaul Mackerras	ld	r25,-80(1)
55870d64ceaSPaul Mackerras	ld	r26,-72(1)
55970d64ceaSPaul Mackerras	ld	r27,-64(1)
56070d64ceaSPaul Mackerras	ld	r28,-56(1)
56170d64ceaSPaul Mackerras	ld	r29,-48(1)
56270d64ceaSPaul Mackerras	ld	r30,-40(1)
56370d64ceaSPaul Mackerras	ld	r31,-32(1)
56470d64ceaSPaul Mackerras	ld	r3,-24(r1)
56570d64ceaSPaul Mackerras	ld	r4,-16(r1)
56670d64ceaSPaul Mackerras	li	r5,4096
56770d64ceaSPaul Mackerras	b	.Ldst_aligned
5689445aa1aSAl ViroEXPORT_SYMBOL(__copy_tofrom_user)
569