xref: /linux/arch/powerpc/lib/copyuser_power7.S (revision f46e374c1ea7fafce70a838f09fbd67de3e4d49f)
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License as published by
4 * the Free Software Foundation; either version 2 of the License, or
5 * (at your option) any later version.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * along with this program; if not, write to the Free Software
14 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
15 *
16 * Copyright (C) IBM Corporation, 2011
17 *
18 * Author: Anton Blanchard <anton@au.ibm.com>
19 */
20#include <asm/ppc_asm.h>
21
22	.macro err1
23100:
24	.section __ex_table,"a"
25	.align 3
26	.llong 100b,.Ldo_err1
27	.previous
28	.endm
29
30	.macro err2
31200:
32	.section __ex_table,"a"
33	.align 3
34	.llong 200b,.Ldo_err2
35	.previous
36	.endm
37
38#ifdef CONFIG_ALTIVEC
39	.macro err3
40300:
41	.section __ex_table,"a"
42	.align 3
43	.llong 300b,.Ldo_err3
44	.previous
45	.endm
46
47	.macro err4
48400:
49	.section __ex_table,"a"
50	.align 3
51	.llong 400b,.Ldo_err4
52	.previous
53	.endm
54
55
56.Ldo_err4:
57	ld	r16,STK_REG(R16)(r1)
58	ld	r15,STK_REG(R15)(r1)
59	ld	r14,STK_REG(R14)(r1)
60.Ldo_err3:
61	bl	.exit_vmx_usercopy
62	ld	r0,STACKFRAMESIZE+16(r1)
63	mtlr	r0
64	b	.Lexit
65#endif /* CONFIG_ALTIVEC */
66
67.Ldo_err2:
68	ld	r22,STK_REG(R22)(r1)
69	ld	r21,STK_REG(R21)(r1)
70	ld	r20,STK_REG(R20)(r1)
71	ld	r19,STK_REG(R19)(r1)
72	ld	r18,STK_REG(R18)(r1)
73	ld	r17,STK_REG(R17)(r1)
74	ld	r16,STK_REG(R16)(r1)
75	ld	r15,STK_REG(R15)(r1)
76	ld	r14,STK_REG(R14)(r1)
77.Lexit:
78	addi	r1,r1,STACKFRAMESIZE
79.Ldo_err1:
80	ld	r3,48(r1)
81	ld	r4,56(r1)
82	ld	r5,64(r1)
83	b	__copy_tofrom_user_base
84
85
86_GLOBAL(__copy_tofrom_user_power7)
87#ifdef CONFIG_ALTIVEC
88	cmpldi	r5,16
89	cmpldi	cr1,r5,4096
90
91	std	r3,48(r1)
92	std	r4,56(r1)
93	std	r5,64(r1)
94
95	blt	.Lshort_copy
96	bgt	cr1,.Lvmx_copy
97#else
98	cmpldi	r5,16
99
100	std	r3,48(r1)
101	std	r4,56(r1)
102	std	r5,64(r1)
103
104	blt	.Lshort_copy
105#endif
106
107.Lnonvmx_copy:
108	/* Get the source 8B aligned */
109	neg	r6,r4
110	mtocrf	0x01,r6
111	clrldi	r6,r6,(64-3)
112
113	bf	cr7*4+3,1f
114err1;	lbz	r0,0(r4)
115	addi	r4,r4,1
116err1;	stb	r0,0(r3)
117	addi	r3,r3,1
118
1191:	bf	cr7*4+2,2f
120err1;	lhz	r0,0(r4)
121	addi	r4,r4,2
122err1;	sth	r0,0(r3)
123	addi	r3,r3,2
124
1252:	bf	cr7*4+1,3f
126err1;	lwz	r0,0(r4)
127	addi	r4,r4,4
128err1;	stw	r0,0(r3)
129	addi	r3,r3,4
130
1313:	sub	r5,r5,r6
132	cmpldi	r5,128
133	blt	5f
134
135	mflr	r0
136	stdu	r1,-STACKFRAMESIZE(r1)
137	std	r14,STK_REG(R14)(r1)
138	std	r15,STK_REG(R15)(r1)
139	std	r16,STK_REG(R16)(r1)
140	std	r17,STK_REG(R17)(r1)
141	std	r18,STK_REG(R18)(r1)
142	std	r19,STK_REG(R19)(r1)
143	std	r20,STK_REG(R20)(r1)
144	std	r21,STK_REG(R21)(r1)
145	std	r22,STK_REG(R22)(r1)
146	std	r0,STACKFRAMESIZE+16(r1)
147
148	srdi	r6,r5,7
149	mtctr	r6
150
151	/* Now do cacheline (128B) sized loads and stores. */
152	.align	5
1534:
154err2;	ld	r0,0(r4)
155err2;	ld	r6,8(r4)
156err2;	ld	r7,16(r4)
157err2;	ld	r8,24(r4)
158err2;	ld	r9,32(r4)
159err2;	ld	r10,40(r4)
160err2;	ld	r11,48(r4)
161err2;	ld	r12,56(r4)
162err2;	ld	r14,64(r4)
163err2;	ld	r15,72(r4)
164err2;	ld	r16,80(r4)
165err2;	ld	r17,88(r4)
166err2;	ld	r18,96(r4)
167err2;	ld	r19,104(r4)
168err2;	ld	r20,112(r4)
169err2;	ld	r21,120(r4)
170	addi	r4,r4,128
171err2;	std	r0,0(r3)
172err2;	std	r6,8(r3)
173err2;	std	r7,16(r3)
174err2;	std	r8,24(r3)
175err2;	std	r9,32(r3)
176err2;	std	r10,40(r3)
177err2;	std	r11,48(r3)
178err2;	std	r12,56(r3)
179err2;	std	r14,64(r3)
180err2;	std	r15,72(r3)
181err2;	std	r16,80(r3)
182err2;	std	r17,88(r3)
183err2;	std	r18,96(r3)
184err2;	std	r19,104(r3)
185err2;	std	r20,112(r3)
186err2;	std	r21,120(r3)
187	addi	r3,r3,128
188	bdnz	4b
189
190	clrldi	r5,r5,(64-7)
191
192	ld	r14,STK_REG(R14)(r1)
193	ld	r15,STK_REG(R15)(r1)
194	ld	r16,STK_REG(R16)(r1)
195	ld	r17,STK_REG(R17)(r1)
196	ld	r18,STK_REG(R18)(r1)
197	ld	r19,STK_REG(R19)(r1)
198	ld	r20,STK_REG(R20)(r1)
199	ld	r21,STK_REG(R21)(r1)
200	ld	r22,STK_REG(R22)(r1)
201	addi	r1,r1,STACKFRAMESIZE
202
203	/* Up to 127B to go */
2045:	srdi	r6,r5,4
205	mtocrf	0x01,r6
206
2076:	bf	cr7*4+1,7f
208err1;	ld	r0,0(r4)
209err1;	ld	r6,8(r4)
210err1;	ld	r7,16(r4)
211err1;	ld	r8,24(r4)
212err1;	ld	r9,32(r4)
213err1;	ld	r10,40(r4)
214err1;	ld	r11,48(r4)
215err1;	ld	r12,56(r4)
216	addi	r4,r4,64
217err1;	std	r0,0(r3)
218err1;	std	r6,8(r3)
219err1;	std	r7,16(r3)
220err1;	std	r8,24(r3)
221err1;	std	r9,32(r3)
222err1;	std	r10,40(r3)
223err1;	std	r11,48(r3)
224err1;	std	r12,56(r3)
225	addi	r3,r3,64
226
227	/* Up to 63B to go */
2287:	bf	cr7*4+2,8f
229err1;	ld	r0,0(r4)
230err1;	ld	r6,8(r4)
231err1;	ld	r7,16(r4)
232err1;	ld	r8,24(r4)
233	addi	r4,r4,32
234err1;	std	r0,0(r3)
235err1;	std	r6,8(r3)
236err1;	std	r7,16(r3)
237err1;	std	r8,24(r3)
238	addi	r3,r3,32
239
240	/* Up to 31B to go */
2418:	bf	cr7*4+3,9f
242err1;	ld	r0,0(r4)
243err1;	ld	r6,8(r4)
244	addi	r4,r4,16
245err1;	std	r0,0(r3)
246err1;	std	r6,8(r3)
247	addi	r3,r3,16
248
2499:	clrldi	r5,r5,(64-4)
250
251	/* Up to 15B to go */
252.Lshort_copy:
253	mtocrf	0x01,r5
254	bf	cr7*4+0,12f
255err1;	lwz	r0,0(r4)	/* Less chance of a reject with word ops */
256err1;	lwz	r6,4(r4)
257	addi	r4,r4,8
258err1;	stw	r0,0(r3)
259err1;	stw	r6,4(r3)
260	addi	r3,r3,8
261
26212:	bf	cr7*4+1,13f
263err1;	lwz	r0,0(r4)
264	addi	r4,r4,4
265err1;	stw	r0,0(r3)
266	addi	r3,r3,4
267
26813:	bf	cr7*4+2,14f
269err1;	lhz	r0,0(r4)
270	addi	r4,r4,2
271err1;	sth	r0,0(r3)
272	addi	r3,r3,2
273
27414:	bf	cr7*4+3,15f
275err1;	lbz	r0,0(r4)
276err1;	stb	r0,0(r3)
277
27815:	li	r3,0
279	blr
280
281.Lunwind_stack_nonvmx_copy:
282	addi	r1,r1,STACKFRAMESIZE
283	b	.Lnonvmx_copy
284
285#ifdef CONFIG_ALTIVEC
286.Lvmx_copy:
287	mflr	r0
288	std	r0,16(r1)
289	stdu	r1,-STACKFRAMESIZE(r1)
290	bl	.enter_vmx_usercopy
291	cmpwi	cr1,r3,0
292	ld	r0,STACKFRAMESIZE+16(r1)
293	ld	r3,STACKFRAMESIZE+48(r1)
294	ld	r4,STACKFRAMESIZE+56(r1)
295	ld	r5,STACKFRAMESIZE+64(r1)
296	mtlr	r0
297
298	/*
299	 * We prefetch both the source and destination using enhanced touch
300	 * instructions. We use a stream ID of 0 for the load side and
301	 * 1 for the store side.
302	 */
303	clrrdi	r6,r4,7
304	clrrdi	r9,r3,7
305	ori	r9,r9,1		/* stream=1 */
306
307	srdi	r7,r5,7		/* length in cachelines, capped at 0x3FF */
308	cmpldi	r7,0x3FF
309	ble	1f
310	li	r7,0x3FF
3111:	lis	r0,0x0E00	/* depth=7 */
312	sldi	r7,r7,7
313	or	r7,r7,r0
314	ori	r10,r7,1	/* stream=1 */
315
316	lis	r8,0x8000	/* GO=1 */
317	clrldi	r8,r8,32
318
319.machine push
320.machine "power4"
321	dcbt	r0,r6,0b01000
322	dcbt	r0,r7,0b01010
323	dcbtst	r0,r9,0b01000
324	dcbtst	r0,r10,0b01010
325	eieio
326	dcbt	r0,r8,0b01010	/* GO */
327.machine pop
328
329	beq	cr1,.Lunwind_stack_nonvmx_copy
330
331	/*
332	 * If source and destination are not relatively aligned we use a
333	 * slower permute loop.
334	 */
335	xor	r6,r4,r3
336	rldicl.	r6,r6,0,(64-4)
337	bne	.Lvmx_unaligned_copy
338
339	/* Get the destination 16B aligned */
340	neg	r6,r3
341	mtocrf	0x01,r6
342	clrldi	r6,r6,(64-4)
343
344	bf	cr7*4+3,1f
345err3;	lbz	r0,0(r4)
346	addi	r4,r4,1
347err3;	stb	r0,0(r3)
348	addi	r3,r3,1
349
3501:	bf	cr7*4+2,2f
351err3;	lhz	r0,0(r4)
352	addi	r4,r4,2
353err3;	sth	r0,0(r3)
354	addi	r3,r3,2
355
3562:	bf	cr7*4+1,3f
357err3;	lwz	r0,0(r4)
358	addi	r4,r4,4
359err3;	stw	r0,0(r3)
360	addi	r3,r3,4
361
3623:	bf	cr7*4+0,4f
363err3;	ld	r0,0(r4)
364	addi	r4,r4,8
365err3;	std	r0,0(r3)
366	addi	r3,r3,8
367
3684:	sub	r5,r5,r6
369
370	/* Get the desination 128B aligned */
371	neg	r6,r3
372	srdi	r7,r6,4
373	mtocrf	0x01,r7
374	clrldi	r6,r6,(64-7)
375
376	li	r9,16
377	li	r10,32
378	li	r11,48
379
380	bf	cr7*4+3,5f
381err3;	lvx	vr1,r0,r4
382	addi	r4,r4,16
383err3;	stvx	vr1,r0,r3
384	addi	r3,r3,16
385
3865:	bf	cr7*4+2,6f
387err3;	lvx	vr1,r0,r4
388err3;	lvx	vr0,r4,r9
389	addi	r4,r4,32
390err3;	stvx	vr1,r0,r3
391err3;	stvx	vr0,r3,r9
392	addi	r3,r3,32
393
3946:	bf	cr7*4+1,7f
395err3;	lvx	vr3,r0,r4
396err3;	lvx	vr2,r4,r9
397err3;	lvx	vr1,r4,r10
398err3;	lvx	vr0,r4,r11
399	addi	r4,r4,64
400err3;	stvx	vr3,r0,r3
401err3;	stvx	vr2,r3,r9
402err3;	stvx	vr1,r3,r10
403err3;	stvx	vr0,r3,r11
404	addi	r3,r3,64
405
4067:	sub	r5,r5,r6
407	srdi	r6,r5,7
408
409	std	r14,STK_REG(R14)(r1)
410	std	r15,STK_REG(R15)(r1)
411	std	r16,STK_REG(R16)(r1)
412
413	li	r12,64
414	li	r14,80
415	li	r15,96
416	li	r16,112
417
418	mtctr	r6
419
420	/*
421	 * Now do cacheline sized loads and stores. By this stage the
422	 * cacheline stores are also cacheline aligned.
423	 */
424	.align	5
4258:
426err4;	lvx	vr7,r0,r4
427err4;	lvx	vr6,r4,r9
428err4;	lvx	vr5,r4,r10
429err4;	lvx	vr4,r4,r11
430err4;	lvx	vr3,r4,r12
431err4;	lvx	vr2,r4,r14
432err4;	lvx	vr1,r4,r15
433err4;	lvx	vr0,r4,r16
434	addi	r4,r4,128
435err4;	stvx	vr7,r0,r3
436err4;	stvx	vr6,r3,r9
437err4;	stvx	vr5,r3,r10
438err4;	stvx	vr4,r3,r11
439err4;	stvx	vr3,r3,r12
440err4;	stvx	vr2,r3,r14
441err4;	stvx	vr1,r3,r15
442err4;	stvx	vr0,r3,r16
443	addi	r3,r3,128
444	bdnz	8b
445
446	ld	r14,STK_REG(R14)(r1)
447	ld	r15,STK_REG(R15)(r1)
448	ld	r16,STK_REG(R16)(r1)
449
450	/* Up to 127B to go */
451	clrldi	r5,r5,(64-7)
452	srdi	r6,r5,4
453	mtocrf	0x01,r6
454
455	bf	cr7*4+1,9f
456err3;	lvx	vr3,r0,r4
457err3;	lvx	vr2,r4,r9
458err3;	lvx	vr1,r4,r10
459err3;	lvx	vr0,r4,r11
460	addi	r4,r4,64
461err3;	stvx	vr3,r0,r3
462err3;	stvx	vr2,r3,r9
463err3;	stvx	vr1,r3,r10
464err3;	stvx	vr0,r3,r11
465	addi	r3,r3,64
466
4679:	bf	cr7*4+2,10f
468err3;	lvx	vr1,r0,r4
469err3;	lvx	vr0,r4,r9
470	addi	r4,r4,32
471err3;	stvx	vr1,r0,r3
472err3;	stvx	vr0,r3,r9
473	addi	r3,r3,32
474
47510:	bf	cr7*4+3,11f
476err3;	lvx	vr1,r0,r4
477	addi	r4,r4,16
478err3;	stvx	vr1,r0,r3
479	addi	r3,r3,16
480
481	/* Up to 15B to go */
48211:	clrldi	r5,r5,(64-4)
483	mtocrf	0x01,r5
484	bf	cr7*4+0,12f
485err3;	ld	r0,0(r4)
486	addi	r4,r4,8
487err3;	std	r0,0(r3)
488	addi	r3,r3,8
489
49012:	bf	cr7*4+1,13f
491err3;	lwz	r0,0(r4)
492	addi	r4,r4,4
493err3;	stw	r0,0(r3)
494	addi	r3,r3,4
495
49613:	bf	cr7*4+2,14f
497err3;	lhz	r0,0(r4)
498	addi	r4,r4,2
499err3;	sth	r0,0(r3)
500	addi	r3,r3,2
501
50214:	bf	cr7*4+3,15f
503err3;	lbz	r0,0(r4)
504err3;	stb	r0,0(r3)
505
50615:	addi	r1,r1,STACKFRAMESIZE
507	b	.exit_vmx_usercopy	/* tail call optimise */
508
509.Lvmx_unaligned_copy:
510	/* Get the destination 16B aligned */
511	neg	r6,r3
512	mtocrf	0x01,r6
513	clrldi	r6,r6,(64-4)
514
515	bf	cr7*4+3,1f
516err3;	lbz	r0,0(r4)
517	addi	r4,r4,1
518err3;	stb	r0,0(r3)
519	addi	r3,r3,1
520
5211:	bf	cr7*4+2,2f
522err3;	lhz	r0,0(r4)
523	addi	r4,r4,2
524err3;	sth	r0,0(r3)
525	addi	r3,r3,2
526
5272:	bf	cr7*4+1,3f
528err3;	lwz	r0,0(r4)
529	addi	r4,r4,4
530err3;	stw	r0,0(r3)
531	addi	r3,r3,4
532
5333:	bf	cr7*4+0,4f
534err3;	lwz	r0,0(r4)	/* Less chance of a reject with word ops */
535err3;	lwz	r7,4(r4)
536	addi	r4,r4,8
537err3;	stw	r0,0(r3)
538err3;	stw	r7,4(r3)
539	addi	r3,r3,8
540
5414:	sub	r5,r5,r6
542
543	/* Get the desination 128B aligned */
544	neg	r6,r3
545	srdi	r7,r6,4
546	mtocrf	0x01,r7
547	clrldi	r6,r6,(64-7)
548
549	li	r9,16
550	li	r10,32
551	li	r11,48
552
553	lvsl	vr16,0,r4	/* Setup permute control vector */
554err3;	lvx	vr0,0,r4
555	addi	r4,r4,16
556
557	bf	cr7*4+3,5f
558err3;	lvx	vr1,r0,r4
559	vperm	vr8,vr0,vr1,vr16
560	addi	r4,r4,16
561err3;	stvx	vr8,r0,r3
562	addi	r3,r3,16
563	vor	vr0,vr1,vr1
564
5655:	bf	cr7*4+2,6f
566err3;	lvx	vr1,r0,r4
567	vperm	vr8,vr0,vr1,vr16
568err3;	lvx	vr0,r4,r9
569	vperm	vr9,vr1,vr0,vr16
570	addi	r4,r4,32
571err3;	stvx	vr8,r0,r3
572err3;	stvx	vr9,r3,r9
573	addi	r3,r3,32
574
5756:	bf	cr7*4+1,7f
576err3;	lvx	vr3,r0,r4
577	vperm	vr8,vr0,vr3,vr16
578err3;	lvx	vr2,r4,r9
579	vperm	vr9,vr3,vr2,vr16
580err3;	lvx	vr1,r4,r10
581	vperm	vr10,vr2,vr1,vr16
582err3;	lvx	vr0,r4,r11
583	vperm	vr11,vr1,vr0,vr16
584	addi	r4,r4,64
585err3;	stvx	vr8,r0,r3
586err3;	stvx	vr9,r3,r9
587err3;	stvx	vr10,r3,r10
588err3;	stvx	vr11,r3,r11
589	addi	r3,r3,64
590
5917:	sub	r5,r5,r6
592	srdi	r6,r5,7
593
594	std	r14,STK_REG(R14)(r1)
595	std	r15,STK_REG(R15)(r1)
596	std	r16,STK_REG(R16)(r1)
597
598	li	r12,64
599	li	r14,80
600	li	r15,96
601	li	r16,112
602
603	mtctr	r6
604
605	/*
606	 * Now do cacheline sized loads and stores. By this stage the
607	 * cacheline stores are also cacheline aligned.
608	 */
609	.align	5
6108:
611err4;	lvx	vr7,r0,r4
612	vperm	vr8,vr0,vr7,vr16
613err4;	lvx	vr6,r4,r9
614	vperm	vr9,vr7,vr6,vr16
615err4;	lvx	vr5,r4,r10
616	vperm	vr10,vr6,vr5,vr16
617err4;	lvx	vr4,r4,r11
618	vperm	vr11,vr5,vr4,vr16
619err4;	lvx	vr3,r4,r12
620	vperm	vr12,vr4,vr3,vr16
621err4;	lvx	vr2,r4,r14
622	vperm	vr13,vr3,vr2,vr16
623err4;	lvx	vr1,r4,r15
624	vperm	vr14,vr2,vr1,vr16
625err4;	lvx	vr0,r4,r16
626	vperm	vr15,vr1,vr0,vr16
627	addi	r4,r4,128
628err4;	stvx	vr8,r0,r3
629err4;	stvx	vr9,r3,r9
630err4;	stvx	vr10,r3,r10
631err4;	stvx	vr11,r3,r11
632err4;	stvx	vr12,r3,r12
633err4;	stvx	vr13,r3,r14
634err4;	stvx	vr14,r3,r15
635err4;	stvx	vr15,r3,r16
636	addi	r3,r3,128
637	bdnz	8b
638
639	ld	r14,STK_REG(R14)(r1)
640	ld	r15,STK_REG(R15)(r1)
641	ld	r16,STK_REG(R16)(r1)
642
643	/* Up to 127B to go */
644	clrldi	r5,r5,(64-7)
645	srdi	r6,r5,4
646	mtocrf	0x01,r6
647
648	bf	cr7*4+1,9f
649err3;	lvx	vr3,r0,r4
650	vperm	vr8,vr0,vr3,vr16
651err3;	lvx	vr2,r4,r9
652	vperm	vr9,vr3,vr2,vr16
653err3;	lvx	vr1,r4,r10
654	vperm	vr10,vr2,vr1,vr16
655err3;	lvx	vr0,r4,r11
656	vperm	vr11,vr1,vr0,vr16
657	addi	r4,r4,64
658err3;	stvx	vr8,r0,r3
659err3;	stvx	vr9,r3,r9
660err3;	stvx	vr10,r3,r10
661err3;	stvx	vr11,r3,r11
662	addi	r3,r3,64
663
6649:	bf	cr7*4+2,10f
665err3;	lvx	vr1,r0,r4
666	vperm	vr8,vr0,vr1,vr16
667err3;	lvx	vr0,r4,r9
668	vperm	vr9,vr1,vr0,vr16
669	addi	r4,r4,32
670err3;	stvx	vr8,r0,r3
671err3;	stvx	vr9,r3,r9
672	addi	r3,r3,32
673
67410:	bf	cr7*4+3,11f
675err3;	lvx	vr1,r0,r4
676	vperm	vr8,vr0,vr1,vr16
677	addi	r4,r4,16
678err3;	stvx	vr8,r0,r3
679	addi	r3,r3,16
680
681	/* Up to 15B to go */
68211:	clrldi	r5,r5,(64-4)
683	addi	r4,r4,-16	/* Unwind the +16 load offset */
684	mtocrf	0x01,r5
685	bf	cr7*4+0,12f
686err3;	lwz	r0,0(r4)	/* Less chance of a reject with word ops */
687err3;	lwz	r6,4(r4)
688	addi	r4,r4,8
689err3;	stw	r0,0(r3)
690err3;	stw	r6,4(r3)
691	addi	r3,r3,8
692
69312:	bf	cr7*4+1,13f
694err3;	lwz	r0,0(r4)
695	addi	r4,r4,4
696err3;	stw	r0,0(r3)
697	addi	r3,r3,4
698
69913:	bf	cr7*4+2,14f
700err3;	lhz	r0,0(r4)
701	addi	r4,r4,2
702err3;	sth	r0,0(r3)
703	addi	r3,r3,2
704
70514:	bf	cr7*4+3,15f
706err3;	lbz	r0,0(r4)
707err3;	stb	r0,0(r3)
708
70915:	addi	r1,r1,STACKFRAMESIZE
710	b	.exit_vmx_usercopy	/* tail call optimise */
711#endif /* CONFiG_ALTIVEC */
712