xref: /linux/arch/powerpc/lib/copyuser_64.S (revision a4e22f02f5b6518c1484faea1f88d81802b9feac)
170d64ceaSPaul Mackerras/*
270d64ceaSPaul Mackerras * Copyright (C) 2002 Paul Mackerras, IBM Corp.
370d64ceaSPaul Mackerras *
470d64ceaSPaul Mackerras * This program is free software; you can redistribute it and/or
570d64ceaSPaul Mackerras * modify it under the terms of the GNU General Public License
670d64ceaSPaul Mackerras * as published by the Free Software Foundation; either version
770d64ceaSPaul Mackerras * 2 of the License, or (at your option) any later version.
870d64ceaSPaul Mackerras */
970d64ceaSPaul Mackerras#include <asm/processor.h>
1070d64ceaSPaul Mackerras#include <asm/ppc_asm.h>
1170d64ceaSPaul Mackerras
1270d64ceaSPaul Mackerras	.align	7
1370d64ceaSPaul Mackerras_GLOBAL(__copy_tofrom_user)
1470d64ceaSPaul Mackerras	/* first check for a whole page copy on a page boundary */
1570d64ceaSPaul Mackerras	cmpldi	cr1,r5,16
1670d64ceaSPaul Mackerras	cmpdi	cr6,r5,4096
1770d64ceaSPaul Mackerras	or	r0,r3,r4
1870d64ceaSPaul Mackerras	neg	r6,r3		/* LS 3 bits = # bytes to 8-byte dest bdry */
1970d64ceaSPaul Mackerras	andi.	r0,r0,4095
2070d64ceaSPaul Mackerras	std	r3,-24(r1)
2170d64ceaSPaul Mackerras	crand	cr0*4+2,cr0*4+2,cr6*4+2
2270d64ceaSPaul Mackerras	std	r4,-16(r1)
2370d64ceaSPaul Mackerras	std	r5,-8(r1)
2470d64ceaSPaul Mackerras	dcbt	0,r4
253c726f8dSBenjamin Herrenschmidt	beq	.Lcopy_page_4K
2670d64ceaSPaul Mackerras	andi.	r6,r6,7
273467bfd3SOlof Johansson	PPC_MTOCRF	0x01,r5
2870d64ceaSPaul Mackerras	blt	cr1,.Lshort_copy
29*a4e22f02SMark Nelson/* Below we want to nop out the bne if we're on a CPU that has the
30*a4e22f02SMark Nelson * CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit
31*a4e22f02SMark Nelson * cleared.
32*a4e22f02SMark Nelson * At the time of writing the only CPU that has this combination of bits
33*a4e22f02SMark Nelson * set is Power6.
34*a4e22f02SMark Nelson */
35*a4e22f02SMark NelsonBEGIN_FTR_SECTION
36*a4e22f02SMark Nelson	nop
37*a4e22f02SMark NelsonFTR_SECTION_ELSE
3870d64ceaSPaul Mackerras	bne	.Ldst_unaligned
39*a4e22f02SMark NelsonALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \
40*a4e22f02SMark Nelson		    CPU_FTR_UNALIGNED_LD_STD)
4170d64ceaSPaul Mackerras.Ldst_aligned:
4270d64ceaSPaul Mackerras	addi	r3,r3,-16
43*a4e22f02SMark NelsonBEGIN_FTR_SECTION
44*a4e22f02SMark Nelson	andi.	r0,r4,7
4570d64ceaSPaul Mackerras	bne	.Lsrc_unaligned
46*a4e22f02SMark NelsonEND_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
4770d64ceaSPaul Mackerras	srdi	r7,r5,4
4870d64ceaSPaul Mackerras20:	ld	r9,0(r4)
4970d64ceaSPaul Mackerras	addi	r4,r4,-8
5070d64ceaSPaul Mackerras	mtctr	r7
5170d64ceaSPaul Mackerras	andi.	r5,r5,7
5270d64ceaSPaul Mackerras	bf	cr7*4+0,22f
5370d64ceaSPaul Mackerras	addi	r3,r3,8
5470d64ceaSPaul Mackerras	addi	r4,r4,8
5570d64ceaSPaul Mackerras	mr	r8,r9
5670d64ceaSPaul Mackerras	blt	cr1,72f
5770d64ceaSPaul Mackerras21:	ld	r9,8(r4)
5870d64ceaSPaul Mackerras70:	std	r8,8(r3)
5970d64ceaSPaul Mackerras22:	ldu	r8,16(r4)
6070d64ceaSPaul Mackerras71:	stdu	r9,16(r3)
6170d64ceaSPaul Mackerras	bdnz	21b
6270d64ceaSPaul Mackerras72:	std	r8,8(r3)
6370d64ceaSPaul Mackerras	beq+	3f
6470d64ceaSPaul Mackerras	addi	r3,r3,16
6570d64ceaSPaul Mackerras23:	ld	r9,8(r4)
6670d64ceaSPaul Mackerras.Ldo_tail:
6770d64ceaSPaul Mackerras	bf	cr7*4+1,1f
6870d64ceaSPaul Mackerras	rotldi	r9,r9,32
6970d64ceaSPaul Mackerras73:	stw	r9,0(r3)
7070d64ceaSPaul Mackerras	addi	r3,r3,4
7170d64ceaSPaul Mackerras1:	bf	cr7*4+2,2f
7270d64ceaSPaul Mackerras	rotldi	r9,r9,16
7370d64ceaSPaul Mackerras74:	sth	r9,0(r3)
7470d64ceaSPaul Mackerras	addi	r3,r3,2
7570d64ceaSPaul Mackerras2:	bf	cr7*4+3,3f
7670d64ceaSPaul Mackerras	rotldi	r9,r9,8
7770d64ceaSPaul Mackerras75:	stb	r9,0(r3)
7870d64ceaSPaul Mackerras3:	li	r3,0
7970d64ceaSPaul Mackerras	blr
8070d64ceaSPaul Mackerras
8170d64ceaSPaul Mackerras.Lsrc_unaligned:
8270d64ceaSPaul Mackerras	srdi	r6,r5,3
8370d64ceaSPaul Mackerras	addi	r5,r5,-16
8470d64ceaSPaul Mackerras	subf	r4,r0,r4
8570d64ceaSPaul Mackerras	srdi	r7,r5,4
8670d64ceaSPaul Mackerras	sldi	r10,r0,3
8770d64ceaSPaul Mackerras	cmpldi	cr6,r6,3
8870d64ceaSPaul Mackerras	andi.	r5,r5,7
8970d64ceaSPaul Mackerras	mtctr	r7
9070d64ceaSPaul Mackerras	subfic	r11,r10,64
9170d64ceaSPaul Mackerras	add	r5,r5,r0
9270d64ceaSPaul Mackerras	bt	cr7*4+0,28f
9370d64ceaSPaul Mackerras
9470d64ceaSPaul Mackerras24:	ld	r9,0(r4)	/* 3+2n loads, 2+2n stores */
9570d64ceaSPaul Mackerras25:	ld	r0,8(r4)
9670d64ceaSPaul Mackerras	sld	r6,r9,r10
9770d64ceaSPaul Mackerras26:	ldu	r9,16(r4)
9870d64ceaSPaul Mackerras	srd	r7,r0,r11
9970d64ceaSPaul Mackerras	sld	r8,r0,r10
10070d64ceaSPaul Mackerras	or	r7,r7,r6
10170d64ceaSPaul Mackerras	blt	cr6,79f
10270d64ceaSPaul Mackerras27:	ld	r0,8(r4)
10370d64ceaSPaul Mackerras	b	2f
10470d64ceaSPaul Mackerras
10570d64ceaSPaul Mackerras28:	ld	r0,0(r4)	/* 4+2n loads, 3+2n stores */
10670d64ceaSPaul Mackerras29:	ldu	r9,8(r4)
10770d64ceaSPaul Mackerras	sld	r8,r0,r10
10870d64ceaSPaul Mackerras	addi	r3,r3,-8
10970d64ceaSPaul Mackerras	blt	cr6,5f
11070d64ceaSPaul Mackerras30:	ld	r0,8(r4)
11170d64ceaSPaul Mackerras	srd	r12,r9,r11
11270d64ceaSPaul Mackerras	sld	r6,r9,r10
11370d64ceaSPaul Mackerras31:	ldu	r9,16(r4)
11470d64ceaSPaul Mackerras	or	r12,r8,r12
11570d64ceaSPaul Mackerras	srd	r7,r0,r11
11670d64ceaSPaul Mackerras	sld	r8,r0,r10
11770d64ceaSPaul Mackerras	addi	r3,r3,16
11870d64ceaSPaul Mackerras	beq	cr6,78f
11970d64ceaSPaul Mackerras
12070d64ceaSPaul Mackerras1:	or	r7,r7,r6
12170d64ceaSPaul Mackerras32:	ld	r0,8(r4)
12270d64ceaSPaul Mackerras76:	std	r12,8(r3)
12370d64ceaSPaul Mackerras2:	srd	r12,r9,r11
12470d64ceaSPaul Mackerras	sld	r6,r9,r10
12570d64ceaSPaul Mackerras33:	ldu	r9,16(r4)
12670d64ceaSPaul Mackerras	or	r12,r8,r12
12770d64ceaSPaul Mackerras77:	stdu	r7,16(r3)
12870d64ceaSPaul Mackerras	srd	r7,r0,r11
12970d64ceaSPaul Mackerras	sld	r8,r0,r10
13070d64ceaSPaul Mackerras	bdnz	1b
13170d64ceaSPaul Mackerras
13270d64ceaSPaul Mackerras78:	std	r12,8(r3)
13370d64ceaSPaul Mackerras	or	r7,r7,r6
13470d64ceaSPaul Mackerras79:	std	r7,16(r3)
13570d64ceaSPaul Mackerras5:	srd	r12,r9,r11
13670d64ceaSPaul Mackerras	or	r12,r8,r12
13770d64ceaSPaul Mackerras80:	std	r12,24(r3)
13870d64ceaSPaul Mackerras	bne	6f
13970d64ceaSPaul Mackerras	li	r3,0
14070d64ceaSPaul Mackerras	blr
14170d64ceaSPaul Mackerras6:	cmpwi	cr1,r5,8
14270d64ceaSPaul Mackerras	addi	r3,r3,32
14370d64ceaSPaul Mackerras	sld	r9,r9,r10
14470d64ceaSPaul Mackerras	ble	cr1,.Ldo_tail
14570d64ceaSPaul Mackerras34:	ld	r0,8(r4)
14670d64ceaSPaul Mackerras	srd	r7,r0,r11
14770d64ceaSPaul Mackerras	or	r9,r7,r9
14870d64ceaSPaul Mackerras	b	.Ldo_tail
14970d64ceaSPaul Mackerras
15070d64ceaSPaul Mackerras.Ldst_unaligned:
1513467bfd3SOlof Johansson	PPC_MTOCRF	0x01,r6		/* put #bytes to 8B bdry into cr7 */
15270d64ceaSPaul Mackerras	subf	r5,r6,r5
15370d64ceaSPaul Mackerras	li	r7,0
154*a4e22f02SMark Nelson	cmpldi	cr1,r5,16
15570d64ceaSPaul Mackerras	bf	cr7*4+3,1f
15670d64ceaSPaul Mackerras35:	lbz	r0,0(r4)
15770d64ceaSPaul Mackerras81:	stb	r0,0(r3)
15870d64ceaSPaul Mackerras	addi	r7,r7,1
15970d64ceaSPaul Mackerras1:	bf	cr7*4+2,2f
16070d64ceaSPaul Mackerras36:	lhzx	r0,r7,r4
16170d64ceaSPaul Mackerras82:	sthx	r0,r7,r3
16270d64ceaSPaul Mackerras	addi	r7,r7,2
16370d64ceaSPaul Mackerras2:	bf	cr7*4+1,3f
16470d64ceaSPaul Mackerras37:	lwzx	r0,r7,r4
16570d64ceaSPaul Mackerras83:	stwx	r0,r7,r3
1663467bfd3SOlof Johansson3:	PPC_MTOCRF	0x01,r5
16770d64ceaSPaul Mackerras	add	r4,r6,r4
16870d64ceaSPaul Mackerras	add	r3,r6,r3
16970d64ceaSPaul Mackerras	b	.Ldst_aligned
17070d64ceaSPaul Mackerras
17170d64ceaSPaul Mackerras.Lshort_copy:
17270d64ceaSPaul Mackerras	bf	cr7*4+0,1f
17370d64ceaSPaul Mackerras38:	lwz	r0,0(r4)
17470d64ceaSPaul Mackerras39:	lwz	r9,4(r4)
17570d64ceaSPaul Mackerras	addi	r4,r4,8
17670d64ceaSPaul Mackerras84:	stw	r0,0(r3)
17770d64ceaSPaul Mackerras85:	stw	r9,4(r3)
17870d64ceaSPaul Mackerras	addi	r3,r3,8
17970d64ceaSPaul Mackerras1:	bf	cr7*4+1,2f
18070d64ceaSPaul Mackerras40:	lwz	r0,0(r4)
18170d64ceaSPaul Mackerras	addi	r4,r4,4
18270d64ceaSPaul Mackerras86:	stw	r0,0(r3)
18370d64ceaSPaul Mackerras	addi	r3,r3,4
18470d64ceaSPaul Mackerras2:	bf	cr7*4+2,3f
18570d64ceaSPaul Mackerras41:	lhz	r0,0(r4)
18670d64ceaSPaul Mackerras	addi	r4,r4,2
18770d64ceaSPaul Mackerras87:	sth	r0,0(r3)
18870d64ceaSPaul Mackerras	addi	r3,r3,2
18970d64ceaSPaul Mackerras3:	bf	cr7*4+3,4f
19070d64ceaSPaul Mackerras42:	lbz	r0,0(r4)
19170d64ceaSPaul Mackerras88:	stb	r0,0(r3)
19270d64ceaSPaul Mackerras4:	li	r3,0
19370d64ceaSPaul Mackerras	blr
19470d64ceaSPaul Mackerras
19570d64ceaSPaul Mackerras/*
19670d64ceaSPaul Mackerras * exception handlers follow
19770d64ceaSPaul Mackerras * we have to return the number of bytes not copied
19870d64ceaSPaul Mackerras * for an exception on a load, we set the rest of the destination to 0
19970d64ceaSPaul Mackerras */
20070d64ceaSPaul Mackerras
20170d64ceaSPaul Mackerras136:
20270d64ceaSPaul Mackerras137:
20370d64ceaSPaul Mackerras	add	r3,r3,r7
20470d64ceaSPaul Mackerras	b	1f
20570d64ceaSPaul Mackerras130:
20670d64ceaSPaul Mackerras131:
20770d64ceaSPaul Mackerras	addi	r3,r3,8
20870d64ceaSPaul Mackerras120:
20970d64ceaSPaul Mackerras122:
21070d64ceaSPaul Mackerras124:
21170d64ceaSPaul Mackerras125:
21270d64ceaSPaul Mackerras126:
21370d64ceaSPaul Mackerras127:
21470d64ceaSPaul Mackerras128:
21570d64ceaSPaul Mackerras129:
21670d64ceaSPaul Mackerras133:
21770d64ceaSPaul Mackerras	addi	r3,r3,8
21870d64ceaSPaul Mackerras121:
21970d64ceaSPaul Mackerras132:
22070d64ceaSPaul Mackerras	addi	r3,r3,8
22170d64ceaSPaul Mackerras123:
22270d64ceaSPaul Mackerras134:
22370d64ceaSPaul Mackerras135:
22470d64ceaSPaul Mackerras138:
22570d64ceaSPaul Mackerras139:
22670d64ceaSPaul Mackerras140:
22770d64ceaSPaul Mackerras141:
22870d64ceaSPaul Mackerras142:
22970d64ceaSPaul Mackerras
23070d64ceaSPaul Mackerras/*
23170d64ceaSPaul Mackerras * here we have had a fault on a load and r3 points to the first
23270d64ceaSPaul Mackerras * unmodified byte of the destination
23370d64ceaSPaul Mackerras */
23470d64ceaSPaul Mackerras1:	ld	r6,-24(r1)
23570d64ceaSPaul Mackerras	ld	r4,-16(r1)
23670d64ceaSPaul Mackerras	ld	r5,-8(r1)
23770d64ceaSPaul Mackerras	subf	r6,r6,r3
23870d64ceaSPaul Mackerras	add	r4,r4,r6
23970d64ceaSPaul Mackerras	subf	r5,r6,r5	/* #bytes left to go */
24070d64ceaSPaul Mackerras
24170d64ceaSPaul Mackerras/*
24270d64ceaSPaul Mackerras * first see if we can copy any more bytes before hitting another exception
24370d64ceaSPaul Mackerras */
24470d64ceaSPaul Mackerras	mtctr	r5
24570d64ceaSPaul Mackerras43:	lbz	r0,0(r4)
24670d64ceaSPaul Mackerras	addi	r4,r4,1
24770d64ceaSPaul Mackerras89:	stb	r0,0(r3)
24870d64ceaSPaul Mackerras	addi	r3,r3,1
24970d64ceaSPaul Mackerras	bdnz	43b
25070d64ceaSPaul Mackerras	li	r3,0		/* huh? all copied successfully this time? */
25170d64ceaSPaul Mackerras	blr
25270d64ceaSPaul Mackerras
25370d64ceaSPaul Mackerras/*
25470d64ceaSPaul Mackerras * here we have trapped again, need to clear ctr bytes starting at r3
25570d64ceaSPaul Mackerras */
25670d64ceaSPaul Mackerras143:	mfctr	r5
25770d64ceaSPaul Mackerras	li	r0,0
25870d64ceaSPaul Mackerras	mr	r4,r3
25970d64ceaSPaul Mackerras	mr	r3,r5		/* return the number of bytes not copied */
26070d64ceaSPaul Mackerras1:	andi.	r9,r4,7
26170d64ceaSPaul Mackerras	beq	3f
26270d64ceaSPaul Mackerras90:	stb	r0,0(r4)
26370d64ceaSPaul Mackerras	addic.	r5,r5,-1
26470d64ceaSPaul Mackerras	addi	r4,r4,1
26570d64ceaSPaul Mackerras	bne	1b
26670d64ceaSPaul Mackerras	blr
26770d64ceaSPaul Mackerras3:	cmpldi	cr1,r5,8
26870d64ceaSPaul Mackerras	srdi	r9,r5,3
26970d64ceaSPaul Mackerras	andi.	r5,r5,7
27070d64ceaSPaul Mackerras	blt	cr1,93f
27170d64ceaSPaul Mackerras	mtctr	r9
27270d64ceaSPaul Mackerras91:	std	r0,0(r4)
27370d64ceaSPaul Mackerras	addi	r4,r4,8
27470d64ceaSPaul Mackerras	bdnz	91b
27570d64ceaSPaul Mackerras93:	beqlr
27670d64ceaSPaul Mackerras	mtctr	r5
27770d64ceaSPaul Mackerras92:	stb	r0,0(r4)
27870d64ceaSPaul Mackerras	addi	r4,r4,1
27970d64ceaSPaul Mackerras	bdnz	92b
28070d64ceaSPaul Mackerras	blr
28170d64ceaSPaul Mackerras
28270d64ceaSPaul Mackerras/*
28370d64ceaSPaul Mackerras * exception handlers for stores: we just need to work
28470d64ceaSPaul Mackerras * out how many bytes weren't copied
28570d64ceaSPaul Mackerras */
28670d64ceaSPaul Mackerras182:
28770d64ceaSPaul Mackerras183:
28870d64ceaSPaul Mackerras	add	r3,r3,r7
28970d64ceaSPaul Mackerras	b	1f
29070d64ceaSPaul Mackerras180:
29170d64ceaSPaul Mackerras	addi	r3,r3,8
29270d64ceaSPaul Mackerras171:
29370d64ceaSPaul Mackerras177:
29470d64ceaSPaul Mackerras	addi	r3,r3,8
29570d64ceaSPaul Mackerras170:
29670d64ceaSPaul Mackerras172:
29770d64ceaSPaul Mackerras176:
29870d64ceaSPaul Mackerras178:
29970d64ceaSPaul Mackerras	addi	r3,r3,4
30070d64ceaSPaul Mackerras185:
30170d64ceaSPaul Mackerras	addi	r3,r3,4
30270d64ceaSPaul Mackerras173:
30370d64ceaSPaul Mackerras174:
30470d64ceaSPaul Mackerras175:
30570d64ceaSPaul Mackerras179:
30670d64ceaSPaul Mackerras181:
30770d64ceaSPaul Mackerras184:
30870d64ceaSPaul Mackerras186:
30970d64ceaSPaul Mackerras187:
31070d64ceaSPaul Mackerras188:
31170d64ceaSPaul Mackerras189:
31270d64ceaSPaul Mackerras1:
31370d64ceaSPaul Mackerras	ld	r6,-24(r1)
31470d64ceaSPaul Mackerras	ld	r5,-8(r1)
31570d64ceaSPaul Mackerras	add	r6,r6,r5
31670d64ceaSPaul Mackerras	subf	r3,r3,r6	/* #bytes not copied */
31770d64ceaSPaul Mackerras190:
31870d64ceaSPaul Mackerras191:
31970d64ceaSPaul Mackerras192:
32070d64ceaSPaul Mackerras	blr			/* #bytes not copied in r3 */
32170d64ceaSPaul Mackerras
32270d64ceaSPaul Mackerras	.section __ex_table,"a"
32370d64ceaSPaul Mackerras	.align	3
32470d64ceaSPaul Mackerras	.llong	20b,120b
32570d64ceaSPaul Mackerras	.llong	21b,121b
32670d64ceaSPaul Mackerras	.llong	70b,170b
32770d64ceaSPaul Mackerras	.llong	22b,122b
32870d64ceaSPaul Mackerras	.llong	71b,171b
32970d64ceaSPaul Mackerras	.llong	72b,172b
33070d64ceaSPaul Mackerras	.llong	23b,123b
33170d64ceaSPaul Mackerras	.llong	73b,173b
33270d64ceaSPaul Mackerras	.llong	74b,174b
33370d64ceaSPaul Mackerras	.llong	75b,175b
33470d64ceaSPaul Mackerras	.llong	24b,124b
33570d64ceaSPaul Mackerras	.llong	25b,125b
33670d64ceaSPaul Mackerras	.llong	26b,126b
33770d64ceaSPaul Mackerras	.llong	27b,127b
33870d64ceaSPaul Mackerras	.llong	28b,128b
33970d64ceaSPaul Mackerras	.llong	29b,129b
34070d64ceaSPaul Mackerras	.llong	30b,130b
34170d64ceaSPaul Mackerras	.llong	31b,131b
34270d64ceaSPaul Mackerras	.llong	32b,132b
34370d64ceaSPaul Mackerras	.llong	76b,176b
34470d64ceaSPaul Mackerras	.llong	33b,133b
34570d64ceaSPaul Mackerras	.llong	77b,177b
34670d64ceaSPaul Mackerras	.llong	78b,178b
34770d64ceaSPaul Mackerras	.llong	79b,179b
34870d64ceaSPaul Mackerras	.llong	80b,180b
34970d64ceaSPaul Mackerras	.llong	34b,134b
35070d64ceaSPaul Mackerras	.llong	35b,135b
35170d64ceaSPaul Mackerras	.llong	81b,181b
35270d64ceaSPaul Mackerras	.llong	36b,136b
35370d64ceaSPaul Mackerras	.llong	82b,182b
35470d64ceaSPaul Mackerras	.llong	37b,137b
35570d64ceaSPaul Mackerras	.llong	83b,183b
35670d64ceaSPaul Mackerras	.llong	38b,138b
35770d64ceaSPaul Mackerras	.llong	39b,139b
35870d64ceaSPaul Mackerras	.llong	84b,184b
35970d64ceaSPaul Mackerras	.llong	85b,185b
36070d64ceaSPaul Mackerras	.llong	40b,140b
36170d64ceaSPaul Mackerras	.llong	86b,186b
36270d64ceaSPaul Mackerras	.llong	41b,141b
36370d64ceaSPaul Mackerras	.llong	87b,187b
36470d64ceaSPaul Mackerras	.llong	42b,142b
36570d64ceaSPaul Mackerras	.llong	88b,188b
36670d64ceaSPaul Mackerras	.llong	43b,143b
36770d64ceaSPaul Mackerras	.llong	89b,189b
36870d64ceaSPaul Mackerras	.llong	90b,190b
36970d64ceaSPaul Mackerras	.llong	91b,191b
37070d64ceaSPaul Mackerras	.llong	92b,192b
37170d64ceaSPaul Mackerras
37270d64ceaSPaul Mackerras	.text
37370d64ceaSPaul Mackerras
37470d64ceaSPaul Mackerras/*
37570d64ceaSPaul Mackerras * Routine to copy a whole page of data, optimized for POWER4.
37670d64ceaSPaul Mackerras * On POWER4 it is more than 50% faster than the simple loop
37770d64ceaSPaul Mackerras * above (following the .Ldst_aligned label) but it runs slightly
37870d64ceaSPaul Mackerras * slower on POWER3.
37970d64ceaSPaul Mackerras */
3803c726f8dSBenjamin Herrenschmidt.Lcopy_page_4K:
38170d64ceaSPaul Mackerras	std	r31,-32(1)
38270d64ceaSPaul Mackerras	std	r30,-40(1)
38370d64ceaSPaul Mackerras	std	r29,-48(1)
38470d64ceaSPaul Mackerras	std	r28,-56(1)
38570d64ceaSPaul Mackerras	std	r27,-64(1)
38670d64ceaSPaul Mackerras	std	r26,-72(1)
38770d64ceaSPaul Mackerras	std	r25,-80(1)
38870d64ceaSPaul Mackerras	std	r24,-88(1)
38970d64ceaSPaul Mackerras	std	r23,-96(1)
39070d64ceaSPaul Mackerras	std	r22,-104(1)
39170d64ceaSPaul Mackerras	std	r21,-112(1)
39270d64ceaSPaul Mackerras	std	r20,-120(1)
39370d64ceaSPaul Mackerras	li	r5,4096/32 - 1
39470d64ceaSPaul Mackerras	addi	r3,r3,-8
39570d64ceaSPaul Mackerras	li	r0,5
39670d64ceaSPaul Mackerras0:	addi	r5,r5,-24
39770d64ceaSPaul Mackerras	mtctr	r0
39870d64ceaSPaul Mackerras20:	ld	r22,640(4)
39970d64ceaSPaul Mackerras21:	ld	r21,512(4)
40070d64ceaSPaul Mackerras22:	ld	r20,384(4)
40170d64ceaSPaul Mackerras23:	ld	r11,256(4)
40270d64ceaSPaul Mackerras24:	ld	r9,128(4)
40370d64ceaSPaul Mackerras25:	ld	r7,0(4)
40470d64ceaSPaul Mackerras26:	ld	r25,648(4)
40570d64ceaSPaul Mackerras27:	ld	r24,520(4)
40670d64ceaSPaul Mackerras28:	ld	r23,392(4)
40770d64ceaSPaul Mackerras29:	ld	r10,264(4)
40870d64ceaSPaul Mackerras30:	ld	r8,136(4)
40970d64ceaSPaul Mackerras31:	ldu	r6,8(4)
41070d64ceaSPaul Mackerras	cmpwi	r5,24
41170d64ceaSPaul Mackerras1:
41270d64ceaSPaul Mackerras32:	std	r22,648(3)
41370d64ceaSPaul Mackerras33:	std	r21,520(3)
41470d64ceaSPaul Mackerras34:	std	r20,392(3)
41570d64ceaSPaul Mackerras35:	std	r11,264(3)
41670d64ceaSPaul Mackerras36:	std	r9,136(3)
41770d64ceaSPaul Mackerras37:	std	r7,8(3)
41870d64ceaSPaul Mackerras38:	ld	r28,648(4)
41970d64ceaSPaul Mackerras39:	ld	r27,520(4)
42070d64ceaSPaul Mackerras40:	ld	r26,392(4)
42170d64ceaSPaul Mackerras41:	ld	r31,264(4)
42270d64ceaSPaul Mackerras42:	ld	r30,136(4)
42370d64ceaSPaul Mackerras43:	ld	r29,8(4)
42470d64ceaSPaul Mackerras44:	std	r25,656(3)
42570d64ceaSPaul Mackerras45:	std	r24,528(3)
42670d64ceaSPaul Mackerras46:	std	r23,400(3)
42770d64ceaSPaul Mackerras47:	std	r10,272(3)
42870d64ceaSPaul Mackerras48:	std	r8,144(3)
42970d64ceaSPaul Mackerras49:	std	r6,16(3)
43070d64ceaSPaul Mackerras50:	ld	r22,656(4)
43170d64ceaSPaul Mackerras51:	ld	r21,528(4)
43270d64ceaSPaul Mackerras52:	ld	r20,400(4)
43370d64ceaSPaul Mackerras53:	ld	r11,272(4)
43470d64ceaSPaul Mackerras54:	ld	r9,144(4)
43570d64ceaSPaul Mackerras55:	ld	r7,16(4)
43670d64ceaSPaul Mackerras56:	std	r28,664(3)
43770d64ceaSPaul Mackerras57:	std	r27,536(3)
43870d64ceaSPaul Mackerras58:	std	r26,408(3)
43970d64ceaSPaul Mackerras59:	std	r31,280(3)
44070d64ceaSPaul Mackerras60:	std	r30,152(3)
44170d64ceaSPaul Mackerras61:	stdu	r29,24(3)
44270d64ceaSPaul Mackerras62:	ld	r25,664(4)
44370d64ceaSPaul Mackerras63:	ld	r24,536(4)
44470d64ceaSPaul Mackerras64:	ld	r23,408(4)
44570d64ceaSPaul Mackerras65:	ld	r10,280(4)
44670d64ceaSPaul Mackerras66:	ld	r8,152(4)
44770d64ceaSPaul Mackerras67:	ldu	r6,24(4)
44870d64ceaSPaul Mackerras	bdnz	1b
44970d64ceaSPaul Mackerras68:	std	r22,648(3)
45070d64ceaSPaul Mackerras69:	std	r21,520(3)
45170d64ceaSPaul Mackerras70:	std	r20,392(3)
45270d64ceaSPaul Mackerras71:	std	r11,264(3)
45370d64ceaSPaul Mackerras72:	std	r9,136(3)
45470d64ceaSPaul Mackerras73:	std	r7,8(3)
45570d64ceaSPaul Mackerras74:	addi	r4,r4,640
45670d64ceaSPaul Mackerras75:	addi	r3,r3,648
45770d64ceaSPaul Mackerras	bge	0b
45870d64ceaSPaul Mackerras	mtctr	r5
45970d64ceaSPaul Mackerras76:	ld	r7,0(4)
46070d64ceaSPaul Mackerras77:	ld	r8,8(4)
46170d64ceaSPaul Mackerras78:	ldu	r9,16(4)
46270d64ceaSPaul Mackerras3:
46370d64ceaSPaul Mackerras79:	ld	r10,8(4)
46470d64ceaSPaul Mackerras80:	std	r7,8(3)
46570d64ceaSPaul Mackerras81:	ld	r7,16(4)
46670d64ceaSPaul Mackerras82:	std	r8,16(3)
46770d64ceaSPaul Mackerras83:	ld	r8,24(4)
46870d64ceaSPaul Mackerras84:	std	r9,24(3)
46970d64ceaSPaul Mackerras85:	ldu	r9,32(4)
47070d64ceaSPaul Mackerras86:	stdu	r10,32(3)
47170d64ceaSPaul Mackerras	bdnz	3b
47270d64ceaSPaul Mackerras4:
47370d64ceaSPaul Mackerras87:	ld	r10,8(4)
47470d64ceaSPaul Mackerras88:	std	r7,8(3)
47570d64ceaSPaul Mackerras89:	std	r8,16(3)
47670d64ceaSPaul Mackerras90:	std	r9,24(3)
47770d64ceaSPaul Mackerras91:	std	r10,32(3)
47870d64ceaSPaul Mackerras9:	ld	r20,-120(1)
47970d64ceaSPaul Mackerras	ld	r21,-112(1)
48070d64ceaSPaul Mackerras	ld	r22,-104(1)
48170d64ceaSPaul Mackerras	ld	r23,-96(1)
48270d64ceaSPaul Mackerras	ld	r24,-88(1)
48370d64ceaSPaul Mackerras	ld	r25,-80(1)
48470d64ceaSPaul Mackerras	ld	r26,-72(1)
48570d64ceaSPaul Mackerras	ld	r27,-64(1)
48670d64ceaSPaul Mackerras	ld	r28,-56(1)
48770d64ceaSPaul Mackerras	ld	r29,-48(1)
48870d64ceaSPaul Mackerras	ld	r30,-40(1)
48970d64ceaSPaul Mackerras	ld	r31,-32(1)
49070d64ceaSPaul Mackerras	li	r3,0
49170d64ceaSPaul Mackerras	blr
49270d64ceaSPaul Mackerras
49370d64ceaSPaul Mackerras/*
49470d64ceaSPaul Mackerras * on an exception, reset to the beginning and jump back into the
49570d64ceaSPaul Mackerras * standard __copy_tofrom_user
49670d64ceaSPaul Mackerras */
49770d64ceaSPaul Mackerras100:	ld	r20,-120(1)
49870d64ceaSPaul Mackerras	ld	r21,-112(1)
49970d64ceaSPaul Mackerras	ld	r22,-104(1)
50070d64ceaSPaul Mackerras	ld	r23,-96(1)
50170d64ceaSPaul Mackerras	ld	r24,-88(1)
50270d64ceaSPaul Mackerras	ld	r25,-80(1)
50370d64ceaSPaul Mackerras	ld	r26,-72(1)
50470d64ceaSPaul Mackerras	ld	r27,-64(1)
50570d64ceaSPaul Mackerras	ld	r28,-56(1)
50670d64ceaSPaul Mackerras	ld	r29,-48(1)
50770d64ceaSPaul Mackerras	ld	r30,-40(1)
50870d64ceaSPaul Mackerras	ld	r31,-32(1)
50970d64ceaSPaul Mackerras	ld	r3,-24(r1)
51070d64ceaSPaul Mackerras	ld	r4,-16(r1)
51170d64ceaSPaul Mackerras	li	r5,4096
51270d64ceaSPaul Mackerras	b	.Ldst_aligned
51370d64ceaSPaul Mackerras
51470d64ceaSPaul Mackerras	.section __ex_table,"a"
51570d64ceaSPaul Mackerras	.align	3
51670d64ceaSPaul Mackerras	.llong	20b,100b
51770d64ceaSPaul Mackerras	.llong	21b,100b
51870d64ceaSPaul Mackerras	.llong	22b,100b
51970d64ceaSPaul Mackerras	.llong	23b,100b
52070d64ceaSPaul Mackerras	.llong	24b,100b
52170d64ceaSPaul Mackerras	.llong	25b,100b
52270d64ceaSPaul Mackerras	.llong	26b,100b
52370d64ceaSPaul Mackerras	.llong	27b,100b
52470d64ceaSPaul Mackerras	.llong	28b,100b
52570d64ceaSPaul Mackerras	.llong	29b,100b
52670d64ceaSPaul Mackerras	.llong	30b,100b
52770d64ceaSPaul Mackerras	.llong	31b,100b
52870d64ceaSPaul Mackerras	.llong	32b,100b
52970d64ceaSPaul Mackerras	.llong	33b,100b
53070d64ceaSPaul Mackerras	.llong	34b,100b
53170d64ceaSPaul Mackerras	.llong	35b,100b
53270d64ceaSPaul Mackerras	.llong	36b,100b
53370d64ceaSPaul Mackerras	.llong	37b,100b
53470d64ceaSPaul Mackerras	.llong	38b,100b
53570d64ceaSPaul Mackerras	.llong	39b,100b
53670d64ceaSPaul Mackerras	.llong	40b,100b
53770d64ceaSPaul Mackerras	.llong	41b,100b
53870d64ceaSPaul Mackerras	.llong	42b,100b
53970d64ceaSPaul Mackerras	.llong	43b,100b
54070d64ceaSPaul Mackerras	.llong	44b,100b
54170d64ceaSPaul Mackerras	.llong	45b,100b
54270d64ceaSPaul Mackerras	.llong	46b,100b
54370d64ceaSPaul Mackerras	.llong	47b,100b
54470d64ceaSPaul Mackerras	.llong	48b,100b
54570d64ceaSPaul Mackerras	.llong	49b,100b
54670d64ceaSPaul Mackerras	.llong	50b,100b
54770d64ceaSPaul Mackerras	.llong	51b,100b
54870d64ceaSPaul Mackerras	.llong	52b,100b
54970d64ceaSPaul Mackerras	.llong	53b,100b
55070d64ceaSPaul Mackerras	.llong	54b,100b
55170d64ceaSPaul Mackerras	.llong	55b,100b
55270d64ceaSPaul Mackerras	.llong	56b,100b
55370d64ceaSPaul Mackerras	.llong	57b,100b
55470d64ceaSPaul Mackerras	.llong	58b,100b
55570d64ceaSPaul Mackerras	.llong	59b,100b
55670d64ceaSPaul Mackerras	.llong	60b,100b
55770d64ceaSPaul Mackerras	.llong	61b,100b
55870d64ceaSPaul Mackerras	.llong	62b,100b
55970d64ceaSPaul Mackerras	.llong	63b,100b
56070d64ceaSPaul Mackerras	.llong	64b,100b
56170d64ceaSPaul Mackerras	.llong	65b,100b
56270d64ceaSPaul Mackerras	.llong	66b,100b
56370d64ceaSPaul Mackerras	.llong	67b,100b
56470d64ceaSPaul Mackerras	.llong	68b,100b
56570d64ceaSPaul Mackerras	.llong	69b,100b
56670d64ceaSPaul Mackerras	.llong	70b,100b
56770d64ceaSPaul Mackerras	.llong	71b,100b
56870d64ceaSPaul Mackerras	.llong	72b,100b
56970d64ceaSPaul Mackerras	.llong	73b,100b
57070d64ceaSPaul Mackerras	.llong	74b,100b
57170d64ceaSPaul Mackerras	.llong	75b,100b
57270d64ceaSPaul Mackerras	.llong	76b,100b
57370d64ceaSPaul Mackerras	.llong	77b,100b
57470d64ceaSPaul Mackerras	.llong	78b,100b
57570d64ceaSPaul Mackerras	.llong	79b,100b
57670d64ceaSPaul Mackerras	.llong	80b,100b
57770d64ceaSPaul Mackerras	.llong	81b,100b
57870d64ceaSPaul Mackerras	.llong	82b,100b
57970d64ceaSPaul Mackerras	.llong	83b,100b
58070d64ceaSPaul Mackerras	.llong	84b,100b
58170d64ceaSPaul Mackerras	.llong	85b,100b
58270d64ceaSPaul Mackerras	.llong	86b,100b
58370d64ceaSPaul Mackerras	.llong	87b,100b
58470d64ceaSPaul Mackerras	.llong	88b,100b
58570d64ceaSPaul Mackerras	.llong	89b,100b
58670d64ceaSPaul Mackerras	.llong	90b,100b
58770d64ceaSPaul Mackerras	.llong	91b,100b
588