xref: /linux/arch/powerpc/lib/copyuser_64.S (revision 70d64ceaa1a84d2502405422a4dfd3f87786a347)
1*70d64ceaSPaul Mackerras/*
2*70d64ceaSPaul Mackerras * arch/ppc64/lib/copyuser.S
3*70d64ceaSPaul Mackerras *
4*70d64ceaSPaul Mackerras * Copyright (C) 2002 Paul Mackerras, IBM Corp.
5*70d64ceaSPaul Mackerras *
6*70d64ceaSPaul Mackerras * This program is free software; you can redistribute it and/or
7*70d64ceaSPaul Mackerras * modify it under the terms of the GNU General Public License
8*70d64ceaSPaul Mackerras * as published by the Free Software Foundation; either version
9*70d64ceaSPaul Mackerras * 2 of the License, or (at your option) any later version.
10*70d64ceaSPaul Mackerras */
11*70d64ceaSPaul Mackerras#include <asm/processor.h>
12*70d64ceaSPaul Mackerras#include <asm/ppc_asm.h>
13*70d64ceaSPaul Mackerras
14*70d64ceaSPaul Mackerras	.align	7
15*70d64ceaSPaul Mackerras_GLOBAL(__copy_tofrom_user)
16*70d64ceaSPaul Mackerras	/* first check for a whole page copy on a page boundary */
17*70d64ceaSPaul Mackerras	cmpldi	cr1,r5,16
18*70d64ceaSPaul Mackerras	cmpdi	cr6,r5,4096
19*70d64ceaSPaul Mackerras	or	r0,r3,r4
20*70d64ceaSPaul Mackerras	neg	r6,r3		/* LS 3 bits = # bytes to 8-byte dest bdry */
21*70d64ceaSPaul Mackerras	andi.	r0,r0,4095
22*70d64ceaSPaul Mackerras	std	r3,-24(r1)
23*70d64ceaSPaul Mackerras	crand	cr0*4+2,cr0*4+2,cr6*4+2
24*70d64ceaSPaul Mackerras	std	r4,-16(r1)
25*70d64ceaSPaul Mackerras	std	r5,-8(r1)
26*70d64ceaSPaul Mackerras	dcbt	0,r4
27*70d64ceaSPaul Mackerras	beq	.Lcopy_page
28*70d64ceaSPaul Mackerras	andi.	r6,r6,7
29*70d64ceaSPaul Mackerras	mtcrf	0x01,r5
30*70d64ceaSPaul Mackerras	blt	cr1,.Lshort_copy
31*70d64ceaSPaul Mackerras	bne	.Ldst_unaligned
32*70d64ceaSPaul Mackerras.Ldst_aligned:
33*70d64ceaSPaul Mackerras	andi.	r0,r4,7
34*70d64ceaSPaul Mackerras	addi	r3,r3,-16
35*70d64ceaSPaul Mackerras	bne	.Lsrc_unaligned
36*70d64ceaSPaul Mackerras	srdi	r7,r5,4
37*70d64ceaSPaul Mackerras20:	ld	r9,0(r4)
38*70d64ceaSPaul Mackerras	addi	r4,r4,-8
39*70d64ceaSPaul Mackerras	mtctr	r7
40*70d64ceaSPaul Mackerras	andi.	r5,r5,7
41*70d64ceaSPaul Mackerras	bf	cr7*4+0,22f
42*70d64ceaSPaul Mackerras	addi	r3,r3,8
43*70d64ceaSPaul Mackerras	addi	r4,r4,8
44*70d64ceaSPaul Mackerras	mr	r8,r9
45*70d64ceaSPaul Mackerras	blt	cr1,72f
46*70d64ceaSPaul Mackerras21:	ld	r9,8(r4)
47*70d64ceaSPaul Mackerras70:	std	r8,8(r3)
48*70d64ceaSPaul Mackerras22:	ldu	r8,16(r4)
49*70d64ceaSPaul Mackerras71:	stdu	r9,16(r3)
50*70d64ceaSPaul Mackerras	bdnz	21b
51*70d64ceaSPaul Mackerras72:	std	r8,8(r3)
52*70d64ceaSPaul Mackerras	beq+	3f
53*70d64ceaSPaul Mackerras	addi	r3,r3,16
54*70d64ceaSPaul Mackerras23:	ld	r9,8(r4)
55*70d64ceaSPaul Mackerras.Ldo_tail:
56*70d64ceaSPaul Mackerras	bf	cr7*4+1,1f
57*70d64ceaSPaul Mackerras	rotldi	r9,r9,32
58*70d64ceaSPaul Mackerras73:	stw	r9,0(r3)
59*70d64ceaSPaul Mackerras	addi	r3,r3,4
60*70d64ceaSPaul Mackerras1:	bf	cr7*4+2,2f
61*70d64ceaSPaul Mackerras	rotldi	r9,r9,16
62*70d64ceaSPaul Mackerras74:	sth	r9,0(r3)
63*70d64ceaSPaul Mackerras	addi	r3,r3,2
64*70d64ceaSPaul Mackerras2:	bf	cr7*4+3,3f
65*70d64ceaSPaul Mackerras	rotldi	r9,r9,8
66*70d64ceaSPaul Mackerras75:	stb	r9,0(r3)
67*70d64ceaSPaul Mackerras3:	li	r3,0
68*70d64ceaSPaul Mackerras	blr
69*70d64ceaSPaul Mackerras
70*70d64ceaSPaul Mackerras.Lsrc_unaligned:
71*70d64ceaSPaul Mackerras	srdi	r6,r5,3
72*70d64ceaSPaul Mackerras	addi	r5,r5,-16
73*70d64ceaSPaul Mackerras	subf	r4,r0,r4
74*70d64ceaSPaul Mackerras	srdi	r7,r5,4
75*70d64ceaSPaul Mackerras	sldi	r10,r0,3
76*70d64ceaSPaul Mackerras	cmpldi	cr6,r6,3
77*70d64ceaSPaul Mackerras	andi.	r5,r5,7
78*70d64ceaSPaul Mackerras	mtctr	r7
79*70d64ceaSPaul Mackerras	subfic	r11,r10,64
80*70d64ceaSPaul Mackerras	add	r5,r5,r0
81*70d64ceaSPaul Mackerras	bt	cr7*4+0,28f
82*70d64ceaSPaul Mackerras
83*70d64ceaSPaul Mackerras24:	ld	r9,0(r4)	/* 3+2n loads, 2+2n stores */
84*70d64ceaSPaul Mackerras25:	ld	r0,8(r4)
85*70d64ceaSPaul Mackerras	sld	r6,r9,r10
86*70d64ceaSPaul Mackerras26:	ldu	r9,16(r4)
87*70d64ceaSPaul Mackerras	srd	r7,r0,r11
88*70d64ceaSPaul Mackerras	sld	r8,r0,r10
89*70d64ceaSPaul Mackerras	or	r7,r7,r6
90*70d64ceaSPaul Mackerras	blt	cr6,79f
91*70d64ceaSPaul Mackerras27:	ld	r0,8(r4)
92*70d64ceaSPaul Mackerras	b	2f
93*70d64ceaSPaul Mackerras
94*70d64ceaSPaul Mackerras28:	ld	r0,0(r4)	/* 4+2n loads, 3+2n stores */
95*70d64ceaSPaul Mackerras29:	ldu	r9,8(r4)
96*70d64ceaSPaul Mackerras	sld	r8,r0,r10
97*70d64ceaSPaul Mackerras	addi	r3,r3,-8
98*70d64ceaSPaul Mackerras	blt	cr6,5f
99*70d64ceaSPaul Mackerras30:	ld	r0,8(r4)
100*70d64ceaSPaul Mackerras	srd	r12,r9,r11
101*70d64ceaSPaul Mackerras	sld	r6,r9,r10
102*70d64ceaSPaul Mackerras31:	ldu	r9,16(r4)
103*70d64ceaSPaul Mackerras	or	r12,r8,r12
104*70d64ceaSPaul Mackerras	srd	r7,r0,r11
105*70d64ceaSPaul Mackerras	sld	r8,r0,r10
106*70d64ceaSPaul Mackerras	addi	r3,r3,16
107*70d64ceaSPaul Mackerras	beq	cr6,78f
108*70d64ceaSPaul Mackerras
109*70d64ceaSPaul Mackerras1:	or	r7,r7,r6
110*70d64ceaSPaul Mackerras32:	ld	r0,8(r4)
111*70d64ceaSPaul Mackerras76:	std	r12,8(r3)
112*70d64ceaSPaul Mackerras2:	srd	r12,r9,r11
113*70d64ceaSPaul Mackerras	sld	r6,r9,r10
114*70d64ceaSPaul Mackerras33:	ldu	r9,16(r4)
115*70d64ceaSPaul Mackerras	or	r12,r8,r12
116*70d64ceaSPaul Mackerras77:	stdu	r7,16(r3)
117*70d64ceaSPaul Mackerras	srd	r7,r0,r11
118*70d64ceaSPaul Mackerras	sld	r8,r0,r10
119*70d64ceaSPaul Mackerras	bdnz	1b
120*70d64ceaSPaul Mackerras
121*70d64ceaSPaul Mackerras78:	std	r12,8(r3)
122*70d64ceaSPaul Mackerras	or	r7,r7,r6
123*70d64ceaSPaul Mackerras79:	std	r7,16(r3)
124*70d64ceaSPaul Mackerras5:	srd	r12,r9,r11
125*70d64ceaSPaul Mackerras	or	r12,r8,r12
126*70d64ceaSPaul Mackerras80:	std	r12,24(r3)
127*70d64ceaSPaul Mackerras	bne	6f
128*70d64ceaSPaul Mackerras	li	r3,0
129*70d64ceaSPaul Mackerras	blr
130*70d64ceaSPaul Mackerras6:	cmpwi	cr1,r5,8
131*70d64ceaSPaul Mackerras	addi	r3,r3,32
132*70d64ceaSPaul Mackerras	sld	r9,r9,r10
133*70d64ceaSPaul Mackerras	ble	cr1,.Ldo_tail
134*70d64ceaSPaul Mackerras34:	ld	r0,8(r4)
135*70d64ceaSPaul Mackerras	srd	r7,r0,r11
136*70d64ceaSPaul Mackerras	or	r9,r7,r9
137*70d64ceaSPaul Mackerras	b	.Ldo_tail
138*70d64ceaSPaul Mackerras
139*70d64ceaSPaul Mackerras.Ldst_unaligned:
140*70d64ceaSPaul Mackerras	mtcrf	0x01,r6		/* put #bytes to 8B bdry into cr7 */
141*70d64ceaSPaul Mackerras	subf	r5,r6,r5
142*70d64ceaSPaul Mackerras	li	r7,0
143*70d64ceaSPaul Mackerras	cmpldi	r1,r5,16
144*70d64ceaSPaul Mackerras	bf	cr7*4+3,1f
145*70d64ceaSPaul Mackerras35:	lbz	r0,0(r4)
146*70d64ceaSPaul Mackerras81:	stb	r0,0(r3)
147*70d64ceaSPaul Mackerras	addi	r7,r7,1
148*70d64ceaSPaul Mackerras1:	bf	cr7*4+2,2f
149*70d64ceaSPaul Mackerras36:	lhzx	r0,r7,r4
150*70d64ceaSPaul Mackerras82:	sthx	r0,r7,r3
151*70d64ceaSPaul Mackerras	addi	r7,r7,2
152*70d64ceaSPaul Mackerras2:	bf	cr7*4+1,3f
153*70d64ceaSPaul Mackerras37:	lwzx	r0,r7,r4
154*70d64ceaSPaul Mackerras83:	stwx	r0,r7,r3
155*70d64ceaSPaul Mackerras3:	mtcrf	0x01,r5
156*70d64ceaSPaul Mackerras	add	r4,r6,r4
157*70d64ceaSPaul Mackerras	add	r3,r6,r3
158*70d64ceaSPaul Mackerras	b	.Ldst_aligned
159*70d64ceaSPaul Mackerras
160*70d64ceaSPaul Mackerras.Lshort_copy:
161*70d64ceaSPaul Mackerras	bf	cr7*4+0,1f
162*70d64ceaSPaul Mackerras38:	lwz	r0,0(r4)
163*70d64ceaSPaul Mackerras39:	lwz	r9,4(r4)
164*70d64ceaSPaul Mackerras	addi	r4,r4,8
165*70d64ceaSPaul Mackerras84:	stw	r0,0(r3)
166*70d64ceaSPaul Mackerras85:	stw	r9,4(r3)
167*70d64ceaSPaul Mackerras	addi	r3,r3,8
168*70d64ceaSPaul Mackerras1:	bf	cr7*4+1,2f
169*70d64ceaSPaul Mackerras40:	lwz	r0,0(r4)
170*70d64ceaSPaul Mackerras	addi	r4,r4,4
171*70d64ceaSPaul Mackerras86:	stw	r0,0(r3)
172*70d64ceaSPaul Mackerras	addi	r3,r3,4
173*70d64ceaSPaul Mackerras2:	bf	cr7*4+2,3f
174*70d64ceaSPaul Mackerras41:	lhz	r0,0(r4)
175*70d64ceaSPaul Mackerras	addi	r4,r4,2
176*70d64ceaSPaul Mackerras87:	sth	r0,0(r3)
177*70d64ceaSPaul Mackerras	addi	r3,r3,2
178*70d64ceaSPaul Mackerras3:	bf	cr7*4+3,4f
179*70d64ceaSPaul Mackerras42:	lbz	r0,0(r4)
180*70d64ceaSPaul Mackerras88:	stb	r0,0(r3)
181*70d64ceaSPaul Mackerras4:	li	r3,0
182*70d64ceaSPaul Mackerras	blr
183*70d64ceaSPaul Mackerras
184*70d64ceaSPaul Mackerras/*
185*70d64ceaSPaul Mackerras * exception handlers follow
186*70d64ceaSPaul Mackerras * we have to return the number of bytes not copied
187*70d64ceaSPaul Mackerras * for an exception on a load, we set the rest of the destination to 0
188*70d64ceaSPaul Mackerras */
189*70d64ceaSPaul Mackerras
190*70d64ceaSPaul Mackerras136:
191*70d64ceaSPaul Mackerras137:
192*70d64ceaSPaul Mackerras	add	r3,r3,r7
193*70d64ceaSPaul Mackerras	b	1f
194*70d64ceaSPaul Mackerras130:
195*70d64ceaSPaul Mackerras131:
196*70d64ceaSPaul Mackerras	addi	r3,r3,8
197*70d64ceaSPaul Mackerras120:
198*70d64ceaSPaul Mackerras122:
199*70d64ceaSPaul Mackerras124:
200*70d64ceaSPaul Mackerras125:
201*70d64ceaSPaul Mackerras126:
202*70d64ceaSPaul Mackerras127:
203*70d64ceaSPaul Mackerras128:
204*70d64ceaSPaul Mackerras129:
205*70d64ceaSPaul Mackerras133:
206*70d64ceaSPaul Mackerras	addi	r3,r3,8
207*70d64ceaSPaul Mackerras121:
208*70d64ceaSPaul Mackerras132:
209*70d64ceaSPaul Mackerras	addi	r3,r3,8
210*70d64ceaSPaul Mackerras123:
211*70d64ceaSPaul Mackerras134:
212*70d64ceaSPaul Mackerras135:
213*70d64ceaSPaul Mackerras138:
214*70d64ceaSPaul Mackerras139:
215*70d64ceaSPaul Mackerras140:
216*70d64ceaSPaul Mackerras141:
217*70d64ceaSPaul Mackerras142:
218*70d64ceaSPaul Mackerras
219*70d64ceaSPaul Mackerras/*
220*70d64ceaSPaul Mackerras * here we have had a fault on a load and r3 points to the first
221*70d64ceaSPaul Mackerras * unmodified byte of the destination
222*70d64ceaSPaul Mackerras */
223*70d64ceaSPaul Mackerras1:	ld	r6,-24(r1)
224*70d64ceaSPaul Mackerras	ld	r4,-16(r1)
225*70d64ceaSPaul Mackerras	ld	r5,-8(r1)
226*70d64ceaSPaul Mackerras	subf	r6,r6,r3
227*70d64ceaSPaul Mackerras	add	r4,r4,r6
228*70d64ceaSPaul Mackerras	subf	r5,r6,r5	/* #bytes left to go */
229*70d64ceaSPaul Mackerras
230*70d64ceaSPaul Mackerras/*
231*70d64ceaSPaul Mackerras * first see if we can copy any more bytes before hitting another exception
232*70d64ceaSPaul Mackerras */
233*70d64ceaSPaul Mackerras	mtctr	r5
234*70d64ceaSPaul Mackerras43:	lbz	r0,0(r4)
235*70d64ceaSPaul Mackerras	addi	r4,r4,1
236*70d64ceaSPaul Mackerras89:	stb	r0,0(r3)
237*70d64ceaSPaul Mackerras	addi	r3,r3,1
238*70d64ceaSPaul Mackerras	bdnz	43b
239*70d64ceaSPaul Mackerras	li	r3,0		/* huh? all copied successfully this time? */
240*70d64ceaSPaul Mackerras	blr
241*70d64ceaSPaul Mackerras
242*70d64ceaSPaul Mackerras/*
243*70d64ceaSPaul Mackerras * here we have trapped again, need to clear ctr bytes starting at r3
244*70d64ceaSPaul Mackerras */
245*70d64ceaSPaul Mackerras143:	mfctr	r5
246*70d64ceaSPaul Mackerras	li	r0,0
247*70d64ceaSPaul Mackerras	mr	r4,r3
248*70d64ceaSPaul Mackerras	mr	r3,r5		/* return the number of bytes not copied */
249*70d64ceaSPaul Mackerras1:	andi.	r9,r4,7
250*70d64ceaSPaul Mackerras	beq	3f
251*70d64ceaSPaul Mackerras90:	stb	r0,0(r4)
252*70d64ceaSPaul Mackerras	addic.	r5,r5,-1
253*70d64ceaSPaul Mackerras	addi	r4,r4,1
254*70d64ceaSPaul Mackerras	bne	1b
255*70d64ceaSPaul Mackerras	blr
256*70d64ceaSPaul Mackerras3:	cmpldi	cr1,r5,8
257*70d64ceaSPaul Mackerras	srdi	r9,r5,3
258*70d64ceaSPaul Mackerras	andi.	r5,r5,7
259*70d64ceaSPaul Mackerras	blt	cr1,93f
260*70d64ceaSPaul Mackerras	mtctr	r9
261*70d64ceaSPaul Mackerras91:	std	r0,0(r4)
262*70d64ceaSPaul Mackerras	addi	r4,r4,8
263*70d64ceaSPaul Mackerras	bdnz	91b
264*70d64ceaSPaul Mackerras93:	beqlr
265*70d64ceaSPaul Mackerras	mtctr	r5
266*70d64ceaSPaul Mackerras92:	stb	r0,0(r4)
267*70d64ceaSPaul Mackerras	addi	r4,r4,1
268*70d64ceaSPaul Mackerras	bdnz	92b
269*70d64ceaSPaul Mackerras	blr
270*70d64ceaSPaul Mackerras
271*70d64ceaSPaul Mackerras/*
272*70d64ceaSPaul Mackerras * exception handlers for stores: we just need to work
273*70d64ceaSPaul Mackerras * out how many bytes weren't copied
274*70d64ceaSPaul Mackerras */
275*70d64ceaSPaul Mackerras182:
276*70d64ceaSPaul Mackerras183:
277*70d64ceaSPaul Mackerras	add	r3,r3,r7
278*70d64ceaSPaul Mackerras	b	1f
279*70d64ceaSPaul Mackerras180:
280*70d64ceaSPaul Mackerras	addi	r3,r3,8
281*70d64ceaSPaul Mackerras171:
282*70d64ceaSPaul Mackerras177:
283*70d64ceaSPaul Mackerras	addi	r3,r3,8
284*70d64ceaSPaul Mackerras170:
285*70d64ceaSPaul Mackerras172:
286*70d64ceaSPaul Mackerras176:
287*70d64ceaSPaul Mackerras178:
288*70d64ceaSPaul Mackerras	addi	r3,r3,4
289*70d64ceaSPaul Mackerras185:
290*70d64ceaSPaul Mackerras	addi	r3,r3,4
291*70d64ceaSPaul Mackerras173:
292*70d64ceaSPaul Mackerras174:
293*70d64ceaSPaul Mackerras175:
294*70d64ceaSPaul Mackerras179:
295*70d64ceaSPaul Mackerras181:
296*70d64ceaSPaul Mackerras184:
297*70d64ceaSPaul Mackerras186:
298*70d64ceaSPaul Mackerras187:
299*70d64ceaSPaul Mackerras188:
300*70d64ceaSPaul Mackerras189:
301*70d64ceaSPaul Mackerras1:
302*70d64ceaSPaul Mackerras	ld	r6,-24(r1)
303*70d64ceaSPaul Mackerras	ld	r5,-8(r1)
304*70d64ceaSPaul Mackerras	add	r6,r6,r5
305*70d64ceaSPaul Mackerras	subf	r3,r3,r6	/* #bytes not copied */
306*70d64ceaSPaul Mackerras190:
307*70d64ceaSPaul Mackerras191:
308*70d64ceaSPaul Mackerras192:
309*70d64ceaSPaul Mackerras	blr			/* #bytes not copied in r3 */
310*70d64ceaSPaul Mackerras
311*70d64ceaSPaul Mackerras	.section __ex_table,"a"
312*70d64ceaSPaul Mackerras	.align	3
313*70d64ceaSPaul Mackerras	.llong	20b,120b
314*70d64ceaSPaul Mackerras	.llong	21b,121b
315*70d64ceaSPaul Mackerras	.llong	70b,170b
316*70d64ceaSPaul Mackerras	.llong	22b,122b
317*70d64ceaSPaul Mackerras	.llong	71b,171b
318*70d64ceaSPaul Mackerras	.llong	72b,172b
319*70d64ceaSPaul Mackerras	.llong	23b,123b
320*70d64ceaSPaul Mackerras	.llong	73b,173b
321*70d64ceaSPaul Mackerras	.llong	74b,174b
322*70d64ceaSPaul Mackerras	.llong	75b,175b
323*70d64ceaSPaul Mackerras	.llong	24b,124b
324*70d64ceaSPaul Mackerras	.llong	25b,125b
325*70d64ceaSPaul Mackerras	.llong	26b,126b
326*70d64ceaSPaul Mackerras	.llong	27b,127b
327*70d64ceaSPaul Mackerras	.llong	28b,128b
328*70d64ceaSPaul Mackerras	.llong	29b,129b
329*70d64ceaSPaul Mackerras	.llong	30b,130b
330*70d64ceaSPaul Mackerras	.llong	31b,131b
331*70d64ceaSPaul Mackerras	.llong	32b,132b
332*70d64ceaSPaul Mackerras	.llong	76b,176b
333*70d64ceaSPaul Mackerras	.llong	33b,133b
334*70d64ceaSPaul Mackerras	.llong	77b,177b
335*70d64ceaSPaul Mackerras	.llong	78b,178b
336*70d64ceaSPaul Mackerras	.llong	79b,179b
337*70d64ceaSPaul Mackerras	.llong	80b,180b
338*70d64ceaSPaul Mackerras	.llong	34b,134b
339*70d64ceaSPaul Mackerras	.llong	35b,135b
340*70d64ceaSPaul Mackerras	.llong	81b,181b
341*70d64ceaSPaul Mackerras	.llong	36b,136b
342*70d64ceaSPaul Mackerras	.llong	82b,182b
343*70d64ceaSPaul Mackerras	.llong	37b,137b
344*70d64ceaSPaul Mackerras	.llong	83b,183b
345*70d64ceaSPaul Mackerras	.llong	38b,138b
346*70d64ceaSPaul Mackerras	.llong	39b,139b
347*70d64ceaSPaul Mackerras	.llong	84b,184b
348*70d64ceaSPaul Mackerras	.llong	85b,185b
349*70d64ceaSPaul Mackerras	.llong	40b,140b
350*70d64ceaSPaul Mackerras	.llong	86b,186b
351*70d64ceaSPaul Mackerras	.llong	41b,141b
352*70d64ceaSPaul Mackerras	.llong	87b,187b
353*70d64ceaSPaul Mackerras	.llong	42b,142b
354*70d64ceaSPaul Mackerras	.llong	88b,188b
355*70d64ceaSPaul Mackerras	.llong	43b,143b
356*70d64ceaSPaul Mackerras	.llong	89b,189b
357*70d64ceaSPaul Mackerras	.llong	90b,190b
358*70d64ceaSPaul Mackerras	.llong	91b,191b
359*70d64ceaSPaul Mackerras	.llong	92b,192b
360*70d64ceaSPaul Mackerras
361*70d64ceaSPaul Mackerras	.text
362*70d64ceaSPaul Mackerras
363*70d64ceaSPaul Mackerras/*
364*70d64ceaSPaul Mackerras * Routine to copy a whole page of data, optimized for POWER4.
365*70d64ceaSPaul Mackerras * On POWER4 it is more than 50% faster than the simple loop
366*70d64ceaSPaul Mackerras * above (following the .Ldst_aligned label) but it runs slightly
367*70d64ceaSPaul Mackerras * slower on POWER3.
368*70d64ceaSPaul Mackerras */
369*70d64ceaSPaul Mackerras.Lcopy_page:
370*70d64ceaSPaul Mackerras	std	r31,-32(1)
371*70d64ceaSPaul Mackerras	std	r30,-40(1)
372*70d64ceaSPaul Mackerras	std	r29,-48(1)
373*70d64ceaSPaul Mackerras	std	r28,-56(1)
374*70d64ceaSPaul Mackerras	std	r27,-64(1)
375*70d64ceaSPaul Mackerras	std	r26,-72(1)
376*70d64ceaSPaul Mackerras	std	r25,-80(1)
377*70d64ceaSPaul Mackerras	std	r24,-88(1)
378*70d64ceaSPaul Mackerras	std	r23,-96(1)
379*70d64ceaSPaul Mackerras	std	r22,-104(1)
380*70d64ceaSPaul Mackerras	std	r21,-112(1)
381*70d64ceaSPaul Mackerras	std	r20,-120(1)
382*70d64ceaSPaul Mackerras	li	r5,4096/32 - 1
383*70d64ceaSPaul Mackerras	addi	r3,r3,-8
384*70d64ceaSPaul Mackerras	li	r0,5
385*70d64ceaSPaul Mackerras0:	addi	r5,r5,-24
386*70d64ceaSPaul Mackerras	mtctr	r0
387*70d64ceaSPaul Mackerras20:	ld	r22,640(4)
388*70d64ceaSPaul Mackerras21:	ld	r21,512(4)
389*70d64ceaSPaul Mackerras22:	ld	r20,384(4)
390*70d64ceaSPaul Mackerras23:	ld	r11,256(4)
391*70d64ceaSPaul Mackerras24:	ld	r9,128(4)
392*70d64ceaSPaul Mackerras25:	ld	r7,0(4)
393*70d64ceaSPaul Mackerras26:	ld	r25,648(4)
394*70d64ceaSPaul Mackerras27:	ld	r24,520(4)
395*70d64ceaSPaul Mackerras28:	ld	r23,392(4)
396*70d64ceaSPaul Mackerras29:	ld	r10,264(4)
397*70d64ceaSPaul Mackerras30:	ld	r8,136(4)
398*70d64ceaSPaul Mackerras31:	ldu	r6,8(4)
399*70d64ceaSPaul Mackerras	cmpwi	r5,24
400*70d64ceaSPaul Mackerras1:
401*70d64ceaSPaul Mackerras32:	std	r22,648(3)
402*70d64ceaSPaul Mackerras33:	std	r21,520(3)
403*70d64ceaSPaul Mackerras34:	std	r20,392(3)
404*70d64ceaSPaul Mackerras35:	std	r11,264(3)
405*70d64ceaSPaul Mackerras36:	std	r9,136(3)
406*70d64ceaSPaul Mackerras37:	std	r7,8(3)
407*70d64ceaSPaul Mackerras38:	ld	r28,648(4)
408*70d64ceaSPaul Mackerras39:	ld	r27,520(4)
409*70d64ceaSPaul Mackerras40:	ld	r26,392(4)
410*70d64ceaSPaul Mackerras41:	ld	r31,264(4)
411*70d64ceaSPaul Mackerras42:	ld	r30,136(4)
412*70d64ceaSPaul Mackerras43:	ld	r29,8(4)
413*70d64ceaSPaul Mackerras44:	std	r25,656(3)
414*70d64ceaSPaul Mackerras45:	std	r24,528(3)
415*70d64ceaSPaul Mackerras46:	std	r23,400(3)
416*70d64ceaSPaul Mackerras47:	std	r10,272(3)
417*70d64ceaSPaul Mackerras48:	std	r8,144(3)
418*70d64ceaSPaul Mackerras49:	std	r6,16(3)
419*70d64ceaSPaul Mackerras50:	ld	r22,656(4)
420*70d64ceaSPaul Mackerras51:	ld	r21,528(4)
421*70d64ceaSPaul Mackerras52:	ld	r20,400(4)
422*70d64ceaSPaul Mackerras53:	ld	r11,272(4)
423*70d64ceaSPaul Mackerras54:	ld	r9,144(4)
424*70d64ceaSPaul Mackerras55:	ld	r7,16(4)
425*70d64ceaSPaul Mackerras56:	std	r28,664(3)
426*70d64ceaSPaul Mackerras57:	std	r27,536(3)
427*70d64ceaSPaul Mackerras58:	std	r26,408(3)
428*70d64ceaSPaul Mackerras59:	std	r31,280(3)
429*70d64ceaSPaul Mackerras60:	std	r30,152(3)
430*70d64ceaSPaul Mackerras61:	stdu	r29,24(3)
431*70d64ceaSPaul Mackerras62:	ld	r25,664(4)
432*70d64ceaSPaul Mackerras63:	ld	r24,536(4)
433*70d64ceaSPaul Mackerras64:	ld	r23,408(4)
434*70d64ceaSPaul Mackerras65:	ld	r10,280(4)
435*70d64ceaSPaul Mackerras66:	ld	r8,152(4)
436*70d64ceaSPaul Mackerras67:	ldu	r6,24(4)
437*70d64ceaSPaul Mackerras	bdnz	1b
438*70d64ceaSPaul Mackerras68:	std	r22,648(3)
439*70d64ceaSPaul Mackerras69:	std	r21,520(3)
440*70d64ceaSPaul Mackerras70:	std	r20,392(3)
441*70d64ceaSPaul Mackerras71:	std	r11,264(3)
442*70d64ceaSPaul Mackerras72:	std	r9,136(3)
443*70d64ceaSPaul Mackerras73:	std	r7,8(3)
444*70d64ceaSPaul Mackerras74:	addi	r4,r4,640
445*70d64ceaSPaul Mackerras75:	addi	r3,r3,648
446*70d64ceaSPaul Mackerras	bge	0b
447*70d64ceaSPaul Mackerras	mtctr	r5
448*70d64ceaSPaul Mackerras76:	ld	r7,0(4)
449*70d64ceaSPaul Mackerras77:	ld	r8,8(4)
450*70d64ceaSPaul Mackerras78:	ldu	r9,16(4)
451*70d64ceaSPaul Mackerras3:
452*70d64ceaSPaul Mackerras79:	ld	r10,8(4)
453*70d64ceaSPaul Mackerras80:	std	r7,8(3)
454*70d64ceaSPaul Mackerras81:	ld	r7,16(4)
455*70d64ceaSPaul Mackerras82:	std	r8,16(3)
456*70d64ceaSPaul Mackerras83:	ld	r8,24(4)
457*70d64ceaSPaul Mackerras84:	std	r9,24(3)
458*70d64ceaSPaul Mackerras85:	ldu	r9,32(4)
459*70d64ceaSPaul Mackerras86:	stdu	r10,32(3)
460*70d64ceaSPaul Mackerras	bdnz	3b
461*70d64ceaSPaul Mackerras4:
462*70d64ceaSPaul Mackerras87:	ld	r10,8(4)
463*70d64ceaSPaul Mackerras88:	std	r7,8(3)
464*70d64ceaSPaul Mackerras89:	std	r8,16(3)
465*70d64ceaSPaul Mackerras90:	std	r9,24(3)
466*70d64ceaSPaul Mackerras91:	std	r10,32(3)
467*70d64ceaSPaul Mackerras9:	ld	r20,-120(1)
468*70d64ceaSPaul Mackerras	ld	r21,-112(1)
469*70d64ceaSPaul Mackerras	ld	r22,-104(1)
470*70d64ceaSPaul Mackerras	ld	r23,-96(1)
471*70d64ceaSPaul Mackerras	ld	r24,-88(1)
472*70d64ceaSPaul Mackerras	ld	r25,-80(1)
473*70d64ceaSPaul Mackerras	ld	r26,-72(1)
474*70d64ceaSPaul Mackerras	ld	r27,-64(1)
475*70d64ceaSPaul Mackerras	ld	r28,-56(1)
476*70d64ceaSPaul Mackerras	ld	r29,-48(1)
477*70d64ceaSPaul Mackerras	ld	r30,-40(1)
478*70d64ceaSPaul Mackerras	ld	r31,-32(1)
479*70d64ceaSPaul Mackerras	li	r3,0
480*70d64ceaSPaul Mackerras	blr
481*70d64ceaSPaul Mackerras
482*70d64ceaSPaul Mackerras/*
483*70d64ceaSPaul Mackerras * on an exception, reset to the beginning and jump back into the
484*70d64ceaSPaul Mackerras * standard __copy_tofrom_user
485*70d64ceaSPaul Mackerras */
486*70d64ceaSPaul Mackerras100:	ld	r20,-120(1)
487*70d64ceaSPaul Mackerras	ld	r21,-112(1)
488*70d64ceaSPaul Mackerras	ld	r22,-104(1)
489*70d64ceaSPaul Mackerras	ld	r23,-96(1)
490*70d64ceaSPaul Mackerras	ld	r24,-88(1)
491*70d64ceaSPaul Mackerras	ld	r25,-80(1)
492*70d64ceaSPaul Mackerras	ld	r26,-72(1)
493*70d64ceaSPaul Mackerras	ld	r27,-64(1)
494*70d64ceaSPaul Mackerras	ld	r28,-56(1)
495*70d64ceaSPaul Mackerras	ld	r29,-48(1)
496*70d64ceaSPaul Mackerras	ld	r30,-40(1)
497*70d64ceaSPaul Mackerras	ld	r31,-32(1)
498*70d64ceaSPaul Mackerras	ld	r3,-24(r1)
499*70d64ceaSPaul Mackerras	ld	r4,-16(r1)
500*70d64ceaSPaul Mackerras	li	r5,4096
501*70d64ceaSPaul Mackerras	b	.Ldst_aligned
502*70d64ceaSPaul Mackerras
503*70d64ceaSPaul Mackerras	.section __ex_table,"a"
504*70d64ceaSPaul Mackerras	.align	3
505*70d64ceaSPaul Mackerras	.llong	20b,100b
506*70d64ceaSPaul Mackerras	.llong	21b,100b
507*70d64ceaSPaul Mackerras	.llong	22b,100b
508*70d64ceaSPaul Mackerras	.llong	23b,100b
509*70d64ceaSPaul Mackerras	.llong	24b,100b
510*70d64ceaSPaul Mackerras	.llong	25b,100b
511*70d64ceaSPaul Mackerras	.llong	26b,100b
512*70d64ceaSPaul Mackerras	.llong	27b,100b
513*70d64ceaSPaul Mackerras	.llong	28b,100b
514*70d64ceaSPaul Mackerras	.llong	29b,100b
515*70d64ceaSPaul Mackerras	.llong	30b,100b
516*70d64ceaSPaul Mackerras	.llong	31b,100b
517*70d64ceaSPaul Mackerras	.llong	32b,100b
518*70d64ceaSPaul Mackerras	.llong	33b,100b
519*70d64ceaSPaul Mackerras	.llong	34b,100b
520*70d64ceaSPaul Mackerras	.llong	35b,100b
521*70d64ceaSPaul Mackerras	.llong	36b,100b
522*70d64ceaSPaul Mackerras	.llong	37b,100b
523*70d64ceaSPaul Mackerras	.llong	38b,100b
524*70d64ceaSPaul Mackerras	.llong	39b,100b
525*70d64ceaSPaul Mackerras	.llong	40b,100b
526*70d64ceaSPaul Mackerras	.llong	41b,100b
527*70d64ceaSPaul Mackerras	.llong	42b,100b
528*70d64ceaSPaul Mackerras	.llong	43b,100b
529*70d64ceaSPaul Mackerras	.llong	44b,100b
530*70d64ceaSPaul Mackerras	.llong	45b,100b
531*70d64ceaSPaul Mackerras	.llong	46b,100b
532*70d64ceaSPaul Mackerras	.llong	47b,100b
533*70d64ceaSPaul Mackerras	.llong	48b,100b
534*70d64ceaSPaul Mackerras	.llong	49b,100b
535*70d64ceaSPaul Mackerras	.llong	50b,100b
536*70d64ceaSPaul Mackerras	.llong	51b,100b
537*70d64ceaSPaul Mackerras	.llong	52b,100b
538*70d64ceaSPaul Mackerras	.llong	53b,100b
539*70d64ceaSPaul Mackerras	.llong	54b,100b
540*70d64ceaSPaul Mackerras	.llong	55b,100b
541*70d64ceaSPaul Mackerras	.llong	56b,100b
542*70d64ceaSPaul Mackerras	.llong	57b,100b
543*70d64ceaSPaul Mackerras	.llong	58b,100b
544*70d64ceaSPaul Mackerras	.llong	59b,100b
545*70d64ceaSPaul Mackerras	.llong	60b,100b
546*70d64ceaSPaul Mackerras	.llong	61b,100b
547*70d64ceaSPaul Mackerras	.llong	62b,100b
548*70d64ceaSPaul Mackerras	.llong	63b,100b
549*70d64ceaSPaul Mackerras	.llong	64b,100b
550*70d64ceaSPaul Mackerras	.llong	65b,100b
551*70d64ceaSPaul Mackerras	.llong	66b,100b
552*70d64ceaSPaul Mackerras	.llong	67b,100b
553*70d64ceaSPaul Mackerras	.llong	68b,100b
554*70d64ceaSPaul Mackerras	.llong	69b,100b
555*70d64ceaSPaul Mackerras	.llong	70b,100b
556*70d64ceaSPaul Mackerras	.llong	71b,100b
557*70d64ceaSPaul Mackerras	.llong	72b,100b
558*70d64ceaSPaul Mackerras	.llong	73b,100b
559*70d64ceaSPaul Mackerras	.llong	74b,100b
560*70d64ceaSPaul Mackerras	.llong	75b,100b
561*70d64ceaSPaul Mackerras	.llong	76b,100b
562*70d64ceaSPaul Mackerras	.llong	77b,100b
563*70d64ceaSPaul Mackerras	.llong	78b,100b
564*70d64ceaSPaul Mackerras	.llong	79b,100b
565*70d64ceaSPaul Mackerras	.llong	80b,100b
566*70d64ceaSPaul Mackerras	.llong	81b,100b
567*70d64ceaSPaul Mackerras	.llong	82b,100b
568*70d64ceaSPaul Mackerras	.llong	83b,100b
569*70d64ceaSPaul Mackerras	.llong	84b,100b
570*70d64ceaSPaul Mackerras	.llong	85b,100b
571*70d64ceaSPaul Mackerras	.llong	86b,100b
572*70d64ceaSPaul Mackerras	.llong	87b,100b
573*70d64ceaSPaul Mackerras	.llong	88b,100b
574*70d64ceaSPaul Mackerras	.llong	89b,100b
575*70d64ceaSPaul Mackerras	.llong	90b,100b
576*70d64ceaSPaul Mackerras	.llong	91b,100b
577