xref: /linux/arch/powerpc/lib/copyuser_64.S (revision a1c613ae4c322ddd58d5a8539dbfba2a0380a8c0)
12874c5fdSThomas Gleixner/* SPDX-License-Identifier: GPL-2.0-or-later */
270d64ceaSPaul Mackerras/*
370d64ceaSPaul Mackerras * Copyright (C) 2002 Paul Mackerras, IBM Corp.
470d64ceaSPaul Mackerras */
5*39326182SMasahiro Yamada#include <linux/export.h>
670d64ceaSPaul Mackerras#include <asm/processor.h>
770d64ceaSPaul Mackerras#include <asm/ppc_asm.h>
8ec0c464cSChristophe Leroy#include <asm/asm-compat.h>
92c86cd18SChristophe Leroy#include <asm/feature-fixups.h>
1070d64ceaSPaul Mackerras
1198c45f51SPaul Mackerras#ifndef SELFTEST_CASE
1298c45f51SPaul Mackerras/* 0 == most CPUs, 1 == POWER6, 2 == Cell */
1398c45f51SPaul Mackerras#define SELFTEST_CASE	0
1498c45f51SPaul Mackerras#endif
1598c45f51SPaul Mackerras
1620151169SPaul E. McKenney#ifdef __BIG_ENDIAN__
1720151169SPaul E. McKenney#define sLd sld		/* Shift towards low-numbered address. */
1820151169SPaul E. McKenney#define sHd srd		/* Shift towards high-numbered address. */
1920151169SPaul E. McKenney#else
2020151169SPaul E. McKenney#define sLd srd		/* Shift towards low-numbered address. */
2120151169SPaul E. McKenney#define sHd sld		/* Shift towards high-numbered address. */
2220151169SPaul E. McKenney#endif
2320151169SPaul E. McKenney
24a7c81ce3SPaul Mackerras/*
25a7c81ce3SPaul Mackerras * These macros are used to generate exception table entries.
26a7c81ce3SPaul Mackerras * The exception handlers below use the original arguments
27a7c81ce3SPaul Mackerras * (stored on the stack) and the point where we're up to in
28a7c81ce3SPaul Mackerras * the destination buffer, i.e. the address of the first
29a7c81ce3SPaul Mackerras * unmodified byte.  Generally r3 points into the destination
30a7c81ce3SPaul Mackerras * buffer, but the first unmodified byte is at a variable
31a7c81ce3SPaul Mackerras * offset from r3.  In the code below, the symbol r3_offset
32a7c81ce3SPaul Mackerras * is set to indicate the current offset at each point in
33a7c81ce3SPaul Mackerras * the code.  This offset is then used as a negative offset
34a7c81ce3SPaul Mackerras * from the exception handler code, and those instructions
35a7c81ce3SPaul Mackerras * before the exception handlers are addi instructions that
36a7c81ce3SPaul Mackerras * adjust r3 to point to the correct place.
37a7c81ce3SPaul Mackerras */
38a7c81ce3SPaul Mackerras	.macro	lex		/* exception handler for load */
39a7c81ce3SPaul Mackerras100:	EX_TABLE(100b, .Lld_exc - r3_offset)
40a7c81ce3SPaul Mackerras	.endm
41a7c81ce3SPaul Mackerras
42a7c81ce3SPaul Mackerras	.macro	stex		/* exception handler for store */
43a7c81ce3SPaul Mackerras100:	EX_TABLE(100b, .Lst_exc - r3_offset)
44a7c81ce3SPaul Mackerras	.endm
45a7c81ce3SPaul Mackerras
4670d64ceaSPaul Mackerras	.align	7
47169c7ceeSAnton Blanchard_GLOBAL_TOC(__copy_tofrom_user)
4815a3204dSNicholas Piggin#ifdef CONFIG_PPC_BOOK3S_64
49a66086b8SAnton BlanchardBEGIN_FTR_SECTION
50a66086b8SAnton Blanchard	nop
51a66086b8SAnton BlanchardFTR_SECTION_ELSE
52a66086b8SAnton Blanchard	b	__copy_tofrom_user_power7
53a66086b8SAnton BlanchardALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
5415a3204dSNicholas Piggin#endif
55a66086b8SAnton Blanchard_GLOBAL(__copy_tofrom_user_base)
56a7c81ce3SPaul Mackerras	/* first check for a 4kB copy on a 4kB boundary */
5770d64ceaSPaul Mackerras	cmpldi	cr1,r5,16
5870d64ceaSPaul Mackerras	cmpdi	cr6,r5,4096
5970d64ceaSPaul Mackerras	or	r0,r3,r4
6070d64ceaSPaul Mackerras	neg	r6,r3		/* LS 3 bits = # bytes to 8-byte dest bdry */
6170d64ceaSPaul Mackerras	andi.	r0,r0,4095
6270d64ceaSPaul Mackerras	std	r3,-24(r1)
6370d64ceaSPaul Mackerras	crand	cr0*4+2,cr0*4+2,cr6*4+2
6470d64ceaSPaul Mackerras	std	r4,-16(r1)
6570d64ceaSPaul Mackerras	std	r5,-8(r1)
6670d64ceaSPaul Mackerras	dcbt	0,r4
673c726f8dSBenjamin Herrenschmidt	beq	.Lcopy_page_4K
6870d64ceaSPaul Mackerras	andi.	r6,r6,7
69694caf02SAnton Blanchard	PPC_MTOCRF(0x01,r5)
7070d64ceaSPaul Mackerras	blt	cr1,.Lshort_copy
71a4e22f02SMark Nelson/* Below we want to nop out the bne if we're on a CPU that has the
72a4e22f02SMark Nelson * CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit
73a4e22f02SMark Nelson * cleared.
74a4e22f02SMark Nelson * At the time of writing the only CPU that has this combination of bits
75a4e22f02SMark Nelson * set is Power6.
76a4e22f02SMark Nelson */
7798c45f51SPaul Mackerrastest_feature = (SELFTEST_CASE == 1)
78a4e22f02SMark NelsonBEGIN_FTR_SECTION
79a4e22f02SMark Nelson	nop
80a4e22f02SMark NelsonFTR_SECTION_ELSE
8170d64ceaSPaul Mackerras	bne	.Ldst_unaligned
82a4e22f02SMark NelsonALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \
83a4e22f02SMark Nelson		    CPU_FTR_UNALIGNED_LD_STD)
8470d64ceaSPaul Mackerras.Ldst_aligned:
8570d64ceaSPaul Mackerras	addi	r3,r3,-16
86a7c81ce3SPaul Mackerrasr3_offset = 16
8798c45f51SPaul Mackerrastest_feature = (SELFTEST_CASE == 0)
88a4e22f02SMark NelsonBEGIN_FTR_SECTION
89a4e22f02SMark Nelson	andi.	r0,r4,7
9070d64ceaSPaul Mackerras	bne	.Lsrc_unaligned
91a4e22f02SMark NelsonEND_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
92789c299cSAnton Blanchard	blt	cr1,.Ldo_tail		/* if < 16 bytes to copy */
93789c299cSAnton Blanchard	srdi	r0,r5,5
94789c299cSAnton Blanchard	cmpdi	cr1,r0,0
95a7c81ce3SPaul Mackerraslex;	ld	r7,0(r4)
96a7c81ce3SPaul Mackerraslex;	ld	r6,8(r4)
97789c299cSAnton Blanchard	addi	r4,r4,16
98789c299cSAnton Blanchard	mtctr	r0
99789c299cSAnton Blanchard	andi.	r0,r5,0x10
100789c299cSAnton Blanchard	beq	22f
10170d64ceaSPaul Mackerras	addi	r3,r3,16
102a7c81ce3SPaul Mackerrasr3_offset = 0
103789c299cSAnton Blanchard	addi	r4,r4,-16
104789c299cSAnton Blanchard	mr	r9,r7
105789c299cSAnton Blanchard	mr	r8,r6
106789c299cSAnton Blanchard	beq	cr1,72f
107a7c81ce3SPaul Mackerras21:
108a7c81ce3SPaul Mackerraslex;	ld	r7,16(r4)
109a7c81ce3SPaul Mackerraslex;	ld	r6,24(r4)
110789c299cSAnton Blanchard	addi	r4,r4,32
111a7c81ce3SPaul Mackerrasstex;	std	r9,0(r3)
112a7c81ce3SPaul Mackerrasr3_offset = 8
113a7c81ce3SPaul Mackerrasstex;	std	r8,8(r3)
114a7c81ce3SPaul Mackerrasr3_offset = 16
115a7c81ce3SPaul Mackerras22:
116a7c81ce3SPaul Mackerraslex;	ld	r9,0(r4)
117a7c81ce3SPaul Mackerraslex;	ld	r8,8(r4)
118a7c81ce3SPaul Mackerrasstex;	std	r7,16(r3)
119a7c81ce3SPaul Mackerrasr3_offset = 24
120a7c81ce3SPaul Mackerrasstex;	std	r6,24(r3)
121789c299cSAnton Blanchard	addi	r3,r3,32
122a7c81ce3SPaul Mackerrasr3_offset = 0
123789c299cSAnton Blanchard	bdnz	21b
124a7c81ce3SPaul Mackerras72:
125a7c81ce3SPaul Mackerrasstex;	std	r9,0(r3)
126a7c81ce3SPaul Mackerrasr3_offset = 8
127a7c81ce3SPaul Mackerrasstex;	std	r8,8(r3)
128a7c81ce3SPaul Mackerrasr3_offset = 16
129789c299cSAnton Blanchard	andi.	r5,r5,0xf
130789c299cSAnton Blanchard	beq+	3f
131789c299cSAnton Blanchard	addi	r4,r4,16
13270d64ceaSPaul Mackerras.Ldo_tail:
133789c299cSAnton Blanchard	addi	r3,r3,16
134a7c81ce3SPaul Mackerrasr3_offset = 0
135789c299cSAnton Blanchard	bf	cr7*4+0,246f
136a7c81ce3SPaul Mackerraslex;	ld	r9,0(r4)
137789c299cSAnton Blanchard	addi	r4,r4,8
138a7c81ce3SPaul Mackerrasstex;	std	r9,0(r3)
139789c299cSAnton Blanchard	addi	r3,r3,8
140789c299cSAnton Blanchard246:	bf	cr7*4+1,1f
141a7c81ce3SPaul Mackerraslex;	lwz	r9,0(r4)
142f72b728bSMark Nelson	addi	r4,r4,4
143a7c81ce3SPaul Mackerrasstex;	stw	r9,0(r3)
14470d64ceaSPaul Mackerras	addi	r3,r3,4
14570d64ceaSPaul Mackerras1:	bf	cr7*4+2,2f
146a7c81ce3SPaul Mackerraslex;	lhz	r9,0(r4)
147f72b728bSMark Nelson	addi	r4,r4,2
148a7c81ce3SPaul Mackerrasstex;	sth	r9,0(r3)
14970d64ceaSPaul Mackerras	addi	r3,r3,2
15070d64ceaSPaul Mackerras2:	bf	cr7*4+3,3f
151a7c81ce3SPaul Mackerraslex;	lbz	r9,0(r4)
152a7c81ce3SPaul Mackerrasstex;	stb	r9,0(r3)
15370d64ceaSPaul Mackerras3:	li	r3,0
15470d64ceaSPaul Mackerras	blr
15570d64ceaSPaul Mackerras
15670d64ceaSPaul Mackerras.Lsrc_unaligned:
157a7c81ce3SPaul Mackerrasr3_offset = 16
15870d64ceaSPaul Mackerras	srdi	r6,r5,3
15970d64ceaSPaul Mackerras	addi	r5,r5,-16
16070d64ceaSPaul Mackerras	subf	r4,r0,r4
16170d64ceaSPaul Mackerras	srdi	r7,r5,4
16270d64ceaSPaul Mackerras	sldi	r10,r0,3
16370d64ceaSPaul Mackerras	cmpldi	cr6,r6,3
16470d64ceaSPaul Mackerras	andi.	r5,r5,7
16570d64ceaSPaul Mackerras	mtctr	r7
16670d64ceaSPaul Mackerras	subfic	r11,r10,64
16770d64ceaSPaul Mackerras	add	r5,r5,r0
16870d64ceaSPaul Mackerras	bt	cr7*4+0,28f
16970d64ceaSPaul Mackerras
170a7c81ce3SPaul Mackerraslex;	ld	r9,0(r4)	/* 3+2n loads, 2+2n stores */
171a7c81ce3SPaul Mackerraslex;	ld	r0,8(r4)
17220151169SPaul E. McKenney	sLd	r6,r9,r10
173a7c81ce3SPaul Mackerraslex;	ldu	r9,16(r4)
17420151169SPaul E. McKenney	sHd	r7,r0,r11
17520151169SPaul E. McKenney	sLd	r8,r0,r10
17670d64ceaSPaul Mackerras	or	r7,r7,r6
17770d64ceaSPaul Mackerras	blt	cr6,79f
178a7c81ce3SPaul Mackerraslex;	ld	r0,8(r4)
17970d64ceaSPaul Mackerras	b	2f
18070d64ceaSPaul Mackerras
181a7c81ce3SPaul Mackerras28:
182a7c81ce3SPaul Mackerraslex;	ld	r0,0(r4)	/* 4+2n loads, 3+2n stores */
183a7c81ce3SPaul Mackerraslex;	ldu	r9,8(r4)
18420151169SPaul E. McKenney	sLd	r8,r0,r10
18570d64ceaSPaul Mackerras	addi	r3,r3,-8
186a7c81ce3SPaul Mackerrasr3_offset = 24
18770d64ceaSPaul Mackerras	blt	cr6,5f
188a7c81ce3SPaul Mackerraslex;	ld	r0,8(r4)
18920151169SPaul E. McKenney	sHd	r12,r9,r11
19020151169SPaul E. McKenney	sLd	r6,r9,r10
191a7c81ce3SPaul Mackerraslex;	ldu	r9,16(r4)
19270d64ceaSPaul Mackerras	or	r12,r8,r12
19320151169SPaul E. McKenney	sHd	r7,r0,r11
19420151169SPaul E. McKenney	sLd	r8,r0,r10
19570d64ceaSPaul Mackerras	addi	r3,r3,16
196a7c81ce3SPaul Mackerrasr3_offset = 8
19770d64ceaSPaul Mackerras	beq	cr6,78f
19870d64ceaSPaul Mackerras
19970d64ceaSPaul Mackerras1:	or	r7,r7,r6
200a7c81ce3SPaul Mackerraslex;	ld	r0,8(r4)
201a7c81ce3SPaul Mackerrasstex;	std	r12,8(r3)
202a7c81ce3SPaul Mackerrasr3_offset = 16
20320151169SPaul E. McKenney2:	sHd	r12,r9,r11
20420151169SPaul E. McKenney	sLd	r6,r9,r10
205a7c81ce3SPaul Mackerraslex;	ldu	r9,16(r4)
20670d64ceaSPaul Mackerras	or	r12,r8,r12
207a7c81ce3SPaul Mackerrasstex;	stdu	r7,16(r3)
208a7c81ce3SPaul Mackerrasr3_offset = 8
20920151169SPaul E. McKenney	sHd	r7,r0,r11
21020151169SPaul E. McKenney	sLd	r8,r0,r10
21170d64ceaSPaul Mackerras	bdnz	1b
21270d64ceaSPaul Mackerras
213a7c81ce3SPaul Mackerras78:
214a7c81ce3SPaul Mackerrasstex;	std	r12,8(r3)
215a7c81ce3SPaul Mackerrasr3_offset = 16
21670d64ceaSPaul Mackerras	or	r7,r7,r6
217a7c81ce3SPaul Mackerras79:
218a7c81ce3SPaul Mackerrasstex;	std	r7,16(r3)
219a7c81ce3SPaul Mackerrasr3_offset = 24
22020151169SPaul E. McKenney5:	sHd	r12,r9,r11
22170d64ceaSPaul Mackerras	or	r12,r8,r12
222a7c81ce3SPaul Mackerrasstex;	std	r12,24(r3)
223a7c81ce3SPaul Mackerrasr3_offset = 32
22470d64ceaSPaul Mackerras	bne	6f
22570d64ceaSPaul Mackerras	li	r3,0
22670d64ceaSPaul Mackerras	blr
22770d64ceaSPaul Mackerras6:	cmpwi	cr1,r5,8
22870d64ceaSPaul Mackerras	addi	r3,r3,32
229a7c81ce3SPaul Mackerrasr3_offset = 0
23020151169SPaul E. McKenney	sLd	r9,r9,r10
231f72b728bSMark Nelson	ble	cr1,7f
232a7c81ce3SPaul Mackerraslex;	ld	r0,8(r4)
23320151169SPaul E. McKenney	sHd	r7,r0,r11
23470d64ceaSPaul Mackerras	or	r9,r7,r9
235f72b728bSMark Nelson7:
236f72b728bSMark Nelson	bf	cr7*4+1,1f
23720151169SPaul E. McKenney#ifdef __BIG_ENDIAN__
238f72b728bSMark Nelson	rotldi	r9,r9,32
23920151169SPaul E. McKenney#endif
240a7c81ce3SPaul Mackerrasstex;	stw	r9,0(r3)
24120151169SPaul E. McKenney#ifdef __LITTLE_ENDIAN__
24220151169SPaul E. McKenney	rotrdi	r9,r9,32
24320151169SPaul E. McKenney#endif
244f72b728bSMark Nelson	addi	r3,r3,4
245f72b728bSMark Nelson1:	bf	cr7*4+2,2f
24620151169SPaul E. McKenney#ifdef __BIG_ENDIAN__
247f72b728bSMark Nelson	rotldi	r9,r9,16
24820151169SPaul E. McKenney#endif
249a7c81ce3SPaul Mackerrasstex;	sth	r9,0(r3)
25020151169SPaul E. McKenney#ifdef __LITTLE_ENDIAN__
25120151169SPaul E. McKenney	rotrdi	r9,r9,16
25220151169SPaul E. McKenney#endif
253f72b728bSMark Nelson	addi	r3,r3,2
254f72b728bSMark Nelson2:	bf	cr7*4+3,3f
25520151169SPaul E. McKenney#ifdef __BIG_ENDIAN__
256f72b728bSMark Nelson	rotldi	r9,r9,8
25720151169SPaul E. McKenney#endif
258a7c81ce3SPaul Mackerrasstex;	stb	r9,0(r3)
25920151169SPaul E. McKenney#ifdef __LITTLE_ENDIAN__
26020151169SPaul E. McKenney	rotrdi	r9,r9,8
26120151169SPaul E. McKenney#endif
262f72b728bSMark Nelson3:	li	r3,0
263f72b728bSMark Nelson	blr
26470d64ceaSPaul Mackerras
26570d64ceaSPaul Mackerras.Ldst_unaligned:
266a7c81ce3SPaul Mackerrasr3_offset = 0
267694caf02SAnton Blanchard	PPC_MTOCRF(0x01,r6)		/* put #bytes to 8B bdry into cr7 */
26870d64ceaSPaul Mackerras	subf	r5,r6,r5
26970d64ceaSPaul Mackerras	li	r7,0
270a4e22f02SMark Nelson	cmpldi	cr1,r5,16
27170d64ceaSPaul Mackerras	bf	cr7*4+3,1f
272a7c81ce3SPaul Mackerras100:	EX_TABLE(100b, .Lld_exc_r7)
273a7c81ce3SPaul Mackerras	lbz	r0,0(r4)
274a7c81ce3SPaul Mackerras100:	EX_TABLE(100b, .Lst_exc_r7)
275a7c81ce3SPaul Mackerras	stb	r0,0(r3)
27670d64ceaSPaul Mackerras	addi	r7,r7,1
27770d64ceaSPaul Mackerras1:	bf	cr7*4+2,2f
278a7c81ce3SPaul Mackerras100:	EX_TABLE(100b, .Lld_exc_r7)
279a7c81ce3SPaul Mackerras	lhzx	r0,r7,r4
280a7c81ce3SPaul Mackerras100:	EX_TABLE(100b, .Lst_exc_r7)
281a7c81ce3SPaul Mackerras	sthx	r0,r7,r3
28270d64ceaSPaul Mackerras	addi	r7,r7,2
28370d64ceaSPaul Mackerras2:	bf	cr7*4+1,3f
284a7c81ce3SPaul Mackerras100:	EX_TABLE(100b, .Lld_exc_r7)
285a7c81ce3SPaul Mackerras	lwzx	r0,r7,r4
286a7c81ce3SPaul Mackerras100:	EX_TABLE(100b, .Lst_exc_r7)
287a7c81ce3SPaul Mackerras	stwx	r0,r7,r3
288694caf02SAnton Blanchard3:	PPC_MTOCRF(0x01,r5)
28970d64ceaSPaul Mackerras	add	r4,r6,r4
29070d64ceaSPaul Mackerras	add	r3,r6,r3
29170d64ceaSPaul Mackerras	b	.Ldst_aligned
29270d64ceaSPaul Mackerras
29370d64ceaSPaul Mackerras.Lshort_copy:
294a7c81ce3SPaul Mackerrasr3_offset = 0
29570d64ceaSPaul Mackerras	bf	cr7*4+0,1f
296a7c81ce3SPaul Mackerraslex;	lwz	r0,0(r4)
297a7c81ce3SPaul Mackerraslex;	lwz	r9,4(r4)
29870d64ceaSPaul Mackerras	addi	r4,r4,8
299a7c81ce3SPaul Mackerrasstex;	stw	r0,0(r3)
300a7c81ce3SPaul Mackerrasstex;	stw	r9,4(r3)
30170d64ceaSPaul Mackerras	addi	r3,r3,8
30270d64ceaSPaul Mackerras1:	bf	cr7*4+1,2f
303a7c81ce3SPaul Mackerraslex;	lwz	r0,0(r4)
30470d64ceaSPaul Mackerras	addi	r4,r4,4
305a7c81ce3SPaul Mackerrasstex;	stw	r0,0(r3)
30670d64ceaSPaul Mackerras	addi	r3,r3,4
30770d64ceaSPaul Mackerras2:	bf	cr7*4+2,3f
308a7c81ce3SPaul Mackerraslex;	lhz	r0,0(r4)
30970d64ceaSPaul Mackerras	addi	r4,r4,2
310a7c81ce3SPaul Mackerrasstex;	sth	r0,0(r3)
31170d64ceaSPaul Mackerras	addi	r3,r3,2
31270d64ceaSPaul Mackerras3:	bf	cr7*4+3,4f
313a7c81ce3SPaul Mackerraslex;	lbz	r0,0(r4)
314a7c81ce3SPaul Mackerrasstex;	stb	r0,0(r3)
31570d64ceaSPaul Mackerras4:	li	r3,0
31670d64ceaSPaul Mackerras	blr
31770d64ceaSPaul Mackerras
31870d64ceaSPaul Mackerras/*
31970d64ceaSPaul Mackerras * exception handlers follow
32070d64ceaSPaul Mackerras * we have to return the number of bytes not copied
32170d64ceaSPaul Mackerras * for an exception on a load, we set the rest of the destination to 0
322a7c81ce3SPaul Mackerras * Note that the number of bytes of instructions for adjusting r3 needs
323a7c81ce3SPaul Mackerras * to equal the amount of the adjustment, due to the trick of using
324a7c81ce3SPaul Mackerras * .Lld_exc - r3_offset as the handler address.
32570d64ceaSPaul Mackerras */
32670d64ceaSPaul Mackerras
327a7c81ce3SPaul Mackerras.Lld_exc_r7:
32870d64ceaSPaul Mackerras	add	r3,r3,r7
329a7c81ce3SPaul Mackerras	b	.Lld_exc
330a7c81ce3SPaul Mackerras
331a7c81ce3SPaul Mackerras	/* adjust by 24 */
33270d64ceaSPaul Mackerras	addi	r3,r3,8
333a7c81ce3SPaul Mackerras	nop
334a7c81ce3SPaul Mackerras	/* adjust by 16 */
33570d64ceaSPaul Mackerras	addi	r3,r3,8
336a7c81ce3SPaul Mackerras	nop
337a7c81ce3SPaul Mackerras	/* adjust by 8 */
33870d64ceaSPaul Mackerras	addi	r3,r3,8
339a7c81ce3SPaul Mackerras	nop
34070d64ceaSPaul Mackerras
34170d64ceaSPaul Mackerras/*
342a7c81ce3SPaul Mackerras * Here we have had a fault on a load and r3 points to the first
343a7c81ce3SPaul Mackerras * unmodified byte of the destination.  We use the original arguments
344a7c81ce3SPaul Mackerras * and r3 to work out how much wasn't copied.  Since we load some
345a7c81ce3SPaul Mackerras * distance ahead of the stores, we continue copying byte-by-byte until
346a7c81ce3SPaul Mackerras * we hit the load fault again in order to copy as much as possible.
34770d64ceaSPaul Mackerras */
348a7c81ce3SPaul Mackerras.Lld_exc:
349a7c81ce3SPaul Mackerras	ld	r6,-24(r1)
35070d64ceaSPaul Mackerras	ld	r4,-16(r1)
35170d64ceaSPaul Mackerras	ld	r5,-8(r1)
35270d64ceaSPaul Mackerras	subf	r6,r6,r3
35370d64ceaSPaul Mackerras	add	r4,r4,r6
35470d64ceaSPaul Mackerras	subf	r5,r6,r5	/* #bytes left to go */
35570d64ceaSPaul Mackerras
35670d64ceaSPaul Mackerras/*
35770d64ceaSPaul Mackerras * first see if we can copy any more bytes before hitting another exception
35870d64ceaSPaul Mackerras */
35970d64ceaSPaul Mackerras	mtctr	r5
360a7c81ce3SPaul Mackerrasr3_offset = 0
361a7c81ce3SPaul Mackerras100:	EX_TABLE(100b, .Ldone)
36270d64ceaSPaul Mackerras43:	lbz	r0,0(r4)
36370d64ceaSPaul Mackerras	addi	r4,r4,1
364a7c81ce3SPaul Mackerrasstex;	stb	r0,0(r3)
36570d64ceaSPaul Mackerras	addi	r3,r3,1
36670d64ceaSPaul Mackerras	bdnz	43b
36770d64ceaSPaul Mackerras	li	r3,0		/* huh? all copied successfully this time? */
36870d64ceaSPaul Mackerras	blr
36970d64ceaSPaul Mackerras
37070d64ceaSPaul Mackerras/*
3713448890cSAl Viro * here we have trapped again, amount remaining is in ctr.
37270d64ceaSPaul Mackerras */
373a7c81ce3SPaul Mackerras.Ldone:
374a7c81ce3SPaul Mackerras	mfctr	r3
37570d64ceaSPaul Mackerras	blr
37670d64ceaSPaul Mackerras
37770d64ceaSPaul Mackerras/*
378f8db2007SPaul Mackerras * exception handlers for stores: we need to work out how many bytes
379f8db2007SPaul Mackerras * weren't copied, and we may need to copy some more.
380a7c81ce3SPaul Mackerras * Note that the number of bytes of instructions for adjusting r3 needs
381a7c81ce3SPaul Mackerras * to equal the amount of the adjustment, due to the trick of using
382a7c81ce3SPaul Mackerras * .Lst_exc - r3_offset as the handler address.
38370d64ceaSPaul Mackerras */
384a7c81ce3SPaul Mackerras.Lst_exc_r7:
38570d64ceaSPaul Mackerras	add	r3,r3,r7
386a7c81ce3SPaul Mackerras	b	.Lst_exc
387a7c81ce3SPaul Mackerras
388a7c81ce3SPaul Mackerras	/* adjust by 24 */
38970d64ceaSPaul Mackerras	addi	r3,r3,8
390a7c81ce3SPaul Mackerras	nop
391a7c81ce3SPaul Mackerras	/* adjust by 16 */
39270d64ceaSPaul Mackerras	addi	r3,r3,8
393a7c81ce3SPaul Mackerras	nop
394a7c81ce3SPaul Mackerras	/* adjust by 8 */
39570d64ceaSPaul Mackerras	addi	r3,r3,4
396a7c81ce3SPaul Mackerras	/* adjust by 4 */
39770d64ceaSPaul Mackerras	addi	r3,r3,4
398a7c81ce3SPaul Mackerras.Lst_exc:
399f8db2007SPaul Mackerras	ld	r6,-24(r1)	/* original destination pointer */
400f8db2007SPaul Mackerras	ld	r4,-16(r1)	/* original source pointer */
401f8db2007SPaul Mackerras	ld	r5,-8(r1)	/* original number of bytes */
402f8db2007SPaul Mackerras	add	r7,r6,r5
403f8db2007SPaul Mackerras	/*
404f8db2007SPaul Mackerras	 * If the destination pointer isn't 8-byte aligned,
405f8db2007SPaul Mackerras	 * we may have got the exception as a result of a
406f8db2007SPaul Mackerras	 * store that overlapped a page boundary, so we may be
407f8db2007SPaul Mackerras	 * able to copy a few more bytes.
408f8db2007SPaul Mackerras	 */
409f8db2007SPaul Mackerras17:	andi.	r0,r3,7
410f8db2007SPaul Mackerras	beq	19f
411f8db2007SPaul Mackerras	subf	r8,r6,r3	/* #bytes copied */
412f8db2007SPaul Mackerras100:	EX_TABLE(100b,19f)
413f8db2007SPaul Mackerras	lbzx	r0,r8,r4
414f8db2007SPaul Mackerras100:	EX_TABLE(100b,19f)
415f8db2007SPaul Mackerras	stb	r0,0(r3)
416f8db2007SPaul Mackerras	addi	r3,r3,1
417f8db2007SPaul Mackerras	cmpld	r3,r7
418f8db2007SPaul Mackerras	blt	17b
419f8db2007SPaul Mackerras19:	subf	r3,r3,r7	/* #bytes not copied in r3 */
4203448890cSAl Viro	blr
42170d64ceaSPaul Mackerras
42270d64ceaSPaul Mackerras/*
42370d64ceaSPaul Mackerras * Routine to copy a whole page of data, optimized for POWER4.
42470d64ceaSPaul Mackerras * On POWER4 it is more than 50% faster than the simple loop
4250f369103SMichael Ellerman * above (following the .Ldst_aligned label).
42670d64ceaSPaul Mackerras */
427a7c81ce3SPaul Mackerras	.macro	exc
428a7c81ce3SPaul Mackerras100:	EX_TABLE(100b, .Labort)
429a7c81ce3SPaul Mackerras	.endm
4303c726f8dSBenjamin Herrenschmidt.Lcopy_page_4K:
43170d64ceaSPaul Mackerras	std	r31,-32(1)
43270d64ceaSPaul Mackerras	std	r30,-40(1)
43370d64ceaSPaul Mackerras	std	r29,-48(1)
43470d64ceaSPaul Mackerras	std	r28,-56(1)
43570d64ceaSPaul Mackerras	std	r27,-64(1)
43670d64ceaSPaul Mackerras	std	r26,-72(1)
43770d64ceaSPaul Mackerras	std	r25,-80(1)
43870d64ceaSPaul Mackerras	std	r24,-88(1)
43970d64ceaSPaul Mackerras	std	r23,-96(1)
44070d64ceaSPaul Mackerras	std	r22,-104(1)
44170d64ceaSPaul Mackerras	std	r21,-112(1)
44270d64ceaSPaul Mackerras	std	r20,-120(1)
44370d64ceaSPaul Mackerras	li	r5,4096/32 - 1
44470d64ceaSPaul Mackerras	addi	r3,r3,-8
44570d64ceaSPaul Mackerras	li	r0,5
44670d64ceaSPaul Mackerras0:	addi	r5,r5,-24
44770d64ceaSPaul Mackerras	mtctr	r0
448a7c81ce3SPaul Mackerrasexc;	ld	r22,640(4)
449a7c81ce3SPaul Mackerrasexc;	ld	r21,512(4)
450a7c81ce3SPaul Mackerrasexc;	ld	r20,384(4)
451a7c81ce3SPaul Mackerrasexc;	ld	r11,256(4)
452a7c81ce3SPaul Mackerrasexc;	ld	r9,128(4)
453a7c81ce3SPaul Mackerrasexc;	ld	r7,0(4)
454a7c81ce3SPaul Mackerrasexc;	ld	r25,648(4)
455a7c81ce3SPaul Mackerrasexc;	ld	r24,520(4)
456a7c81ce3SPaul Mackerrasexc;	ld	r23,392(4)
457a7c81ce3SPaul Mackerrasexc;	ld	r10,264(4)
458a7c81ce3SPaul Mackerrasexc;	ld	r8,136(4)
459a7c81ce3SPaul Mackerrasexc;	ldu	r6,8(4)
46070d64ceaSPaul Mackerras	cmpwi	r5,24
46170d64ceaSPaul Mackerras1:
462a7c81ce3SPaul Mackerrasexc;	std	r22,648(3)
463a7c81ce3SPaul Mackerrasexc;	std	r21,520(3)
464a7c81ce3SPaul Mackerrasexc;	std	r20,392(3)
465a7c81ce3SPaul Mackerrasexc;	std	r11,264(3)
466a7c81ce3SPaul Mackerrasexc;	std	r9,136(3)
467a7c81ce3SPaul Mackerrasexc;	std	r7,8(3)
468a7c81ce3SPaul Mackerrasexc;	ld	r28,648(4)
469a7c81ce3SPaul Mackerrasexc;	ld	r27,520(4)
470a7c81ce3SPaul Mackerrasexc;	ld	r26,392(4)
471a7c81ce3SPaul Mackerrasexc;	ld	r31,264(4)
472a7c81ce3SPaul Mackerrasexc;	ld	r30,136(4)
473a7c81ce3SPaul Mackerrasexc;	ld	r29,8(4)
474a7c81ce3SPaul Mackerrasexc;	std	r25,656(3)
475a7c81ce3SPaul Mackerrasexc;	std	r24,528(3)
476a7c81ce3SPaul Mackerrasexc;	std	r23,400(3)
477a7c81ce3SPaul Mackerrasexc;	std	r10,272(3)
478a7c81ce3SPaul Mackerrasexc;	std	r8,144(3)
479a7c81ce3SPaul Mackerrasexc;	std	r6,16(3)
480a7c81ce3SPaul Mackerrasexc;	ld	r22,656(4)
481a7c81ce3SPaul Mackerrasexc;	ld	r21,528(4)
482a7c81ce3SPaul Mackerrasexc;	ld	r20,400(4)
483a7c81ce3SPaul Mackerrasexc;	ld	r11,272(4)
484a7c81ce3SPaul Mackerrasexc;	ld	r9,144(4)
485a7c81ce3SPaul Mackerrasexc;	ld	r7,16(4)
486a7c81ce3SPaul Mackerrasexc;	std	r28,664(3)
487a7c81ce3SPaul Mackerrasexc;	std	r27,536(3)
488a7c81ce3SPaul Mackerrasexc;	std	r26,408(3)
489a7c81ce3SPaul Mackerrasexc;	std	r31,280(3)
490a7c81ce3SPaul Mackerrasexc;	std	r30,152(3)
491a7c81ce3SPaul Mackerrasexc;	stdu	r29,24(3)
492a7c81ce3SPaul Mackerrasexc;	ld	r25,664(4)
493a7c81ce3SPaul Mackerrasexc;	ld	r24,536(4)
494a7c81ce3SPaul Mackerrasexc;	ld	r23,408(4)
495a7c81ce3SPaul Mackerrasexc;	ld	r10,280(4)
496a7c81ce3SPaul Mackerrasexc;	ld	r8,152(4)
497a7c81ce3SPaul Mackerrasexc;	ldu	r6,24(4)
49870d64ceaSPaul Mackerras	bdnz	1b
499a7c81ce3SPaul Mackerrasexc;	std	r22,648(3)
500a7c81ce3SPaul Mackerrasexc;	std	r21,520(3)
501a7c81ce3SPaul Mackerrasexc;	std	r20,392(3)
502a7c81ce3SPaul Mackerrasexc;	std	r11,264(3)
503a7c81ce3SPaul Mackerrasexc;	std	r9,136(3)
504a7c81ce3SPaul Mackerrasexc;	std	r7,8(3)
505a7c81ce3SPaul Mackerras	addi	r4,r4,640
506a7c81ce3SPaul Mackerras	addi	r3,r3,648
50770d64ceaSPaul Mackerras	bge	0b
50870d64ceaSPaul Mackerras	mtctr	r5
509a7c81ce3SPaul Mackerrasexc;	ld	r7,0(4)
510a7c81ce3SPaul Mackerrasexc;	ld	r8,8(4)
511a7c81ce3SPaul Mackerrasexc;	ldu	r9,16(4)
51270d64ceaSPaul Mackerras3:
513a7c81ce3SPaul Mackerrasexc;	ld	r10,8(4)
514a7c81ce3SPaul Mackerrasexc;	std	r7,8(3)
515a7c81ce3SPaul Mackerrasexc;	ld	r7,16(4)
516a7c81ce3SPaul Mackerrasexc;	std	r8,16(3)
517a7c81ce3SPaul Mackerrasexc;	ld	r8,24(4)
518a7c81ce3SPaul Mackerrasexc;	std	r9,24(3)
519a7c81ce3SPaul Mackerrasexc;	ldu	r9,32(4)
520a7c81ce3SPaul Mackerrasexc;	stdu	r10,32(3)
52170d64ceaSPaul Mackerras	bdnz	3b
52270d64ceaSPaul Mackerras4:
523a7c81ce3SPaul Mackerrasexc;	ld	r10,8(4)
524a7c81ce3SPaul Mackerrasexc;	std	r7,8(3)
525a7c81ce3SPaul Mackerrasexc;	std	r8,16(3)
526a7c81ce3SPaul Mackerrasexc;	std	r9,24(3)
527a7c81ce3SPaul Mackerrasexc;	std	r10,32(3)
52870d64ceaSPaul Mackerras9:	ld	r20,-120(1)
52970d64ceaSPaul Mackerras	ld	r21,-112(1)
53070d64ceaSPaul Mackerras	ld	r22,-104(1)
53170d64ceaSPaul Mackerras	ld	r23,-96(1)
53270d64ceaSPaul Mackerras	ld	r24,-88(1)
53370d64ceaSPaul Mackerras	ld	r25,-80(1)
53470d64ceaSPaul Mackerras	ld	r26,-72(1)
53570d64ceaSPaul Mackerras	ld	r27,-64(1)
53670d64ceaSPaul Mackerras	ld	r28,-56(1)
53770d64ceaSPaul Mackerras	ld	r29,-48(1)
53870d64ceaSPaul Mackerras	ld	r30,-40(1)
53970d64ceaSPaul Mackerras	ld	r31,-32(1)
54070d64ceaSPaul Mackerras	li	r3,0
54170d64ceaSPaul Mackerras	blr
54270d64ceaSPaul Mackerras
54370d64ceaSPaul Mackerras/*
54470d64ceaSPaul Mackerras * on an exception, reset to the beginning and jump back into the
54570d64ceaSPaul Mackerras * standard __copy_tofrom_user
54670d64ceaSPaul Mackerras */
547a7c81ce3SPaul Mackerras.Labort:
548a7c81ce3SPaul Mackerras	ld	r20,-120(1)
54970d64ceaSPaul Mackerras	ld	r21,-112(1)
55070d64ceaSPaul Mackerras	ld	r22,-104(1)
55170d64ceaSPaul Mackerras	ld	r23,-96(1)
55270d64ceaSPaul Mackerras	ld	r24,-88(1)
55370d64ceaSPaul Mackerras	ld	r25,-80(1)
55470d64ceaSPaul Mackerras	ld	r26,-72(1)
55570d64ceaSPaul Mackerras	ld	r27,-64(1)
55670d64ceaSPaul Mackerras	ld	r28,-56(1)
55770d64ceaSPaul Mackerras	ld	r29,-48(1)
55870d64ceaSPaul Mackerras	ld	r30,-40(1)
55970d64ceaSPaul Mackerras	ld	r31,-32(1)
56070d64ceaSPaul Mackerras	ld	r3,-24(r1)
56170d64ceaSPaul Mackerras	ld	r4,-16(r1)
56270d64ceaSPaul Mackerras	li	r5,4096
56370d64ceaSPaul Mackerras	b	.Ldst_aligned
5649445aa1aSAl ViroEXPORT_SYMBOL(__copy_tofrom_user)
565