xref: /linux/arch/powerpc/lib/copyuser_power7.S (revision 1e76c7cb3c1d281be2e452e780e53859abc7c145)
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License as published by
4 * the Free Software Foundation; either version 2 of the License, or
5 * (at your option) any later version.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * along with this program; if not, write to the Free Software
14 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
15 *
16 * Copyright (C) IBM Corporation, 2011
17 *
18 * Author: Anton Blanchard <anton@au.ibm.com>
19 */
20#include <asm/ppc_asm.h>
21
22	.macro err1
23100:
24	.section __ex_table,"a"
25	.align 3
26	.llong 100b,.Ldo_err1
27	.previous
28	.endm
29
30	.macro err2
31200:
32	.section __ex_table,"a"
33	.align 3
34	.llong 200b,.Ldo_err2
35	.previous
36	.endm
37
38#ifdef CONFIG_ALTIVEC
39	.macro err3
40300:
41	.section __ex_table,"a"
42	.align 3
43	.llong 300b,.Ldo_err3
44	.previous
45	.endm
46
47	.macro err4
48400:
49	.section __ex_table,"a"
50	.align 3
51	.llong 400b,.Ldo_err4
52	.previous
53	.endm
54
55
56.Ldo_err4:
57	ld	r16,STK_REG(R16)(r1)
58	ld	r15,STK_REG(R15)(r1)
59	ld	r14,STK_REG(R14)(r1)
60.Ldo_err3:
61	bl	.exit_vmx_usercopy
62	ld	r0,STACKFRAMESIZE+16(r1)
63	mtlr	r0
64	b	.Lexit
65#endif /* CONFIG_ALTIVEC */
66
67.Ldo_err2:
68	ld	r22,STK_REG(R22)(r1)
69	ld	r21,STK_REG(R21)(r1)
70	ld	r20,STK_REG(R20)(r1)
71	ld	r19,STK_REG(R19)(r1)
72	ld	r18,STK_REG(R18)(r1)
73	ld	r17,STK_REG(R17)(r1)
74	ld	r16,STK_REG(R16)(r1)
75	ld	r15,STK_REG(R15)(r1)
76	ld	r14,STK_REG(R14)(r1)
77.Lexit:
78	addi	r1,r1,STACKFRAMESIZE
79.Ldo_err1:
80	ld	r3,48(r1)
81	ld	r4,56(r1)
82	ld	r5,64(r1)
83	b	__copy_tofrom_user_base
84
85
86_GLOBAL(__copy_tofrom_user_power7)
87#ifdef CONFIG_ALTIVEC
88	cmpldi	r5,16
89	cmpldi	cr1,r5,4096
90
91	std	r3,48(r1)
92	std	r4,56(r1)
93	std	r5,64(r1)
94
95	blt	.Lshort_copy
96	bgt	cr1,.Lvmx_copy
97#else
98	cmpldi	r5,16
99
100	std	r3,48(r1)
101	std	r4,56(r1)
102	std	r5,64(r1)
103
104	blt	.Lshort_copy
105#endif
106
107.Lnonvmx_copy:
108	/* Get the source 8B aligned */
109	neg	r6,r4
110	mtocrf	0x01,r6
111	clrldi	r6,r6,(64-3)
112
113	bf	cr7*4+3,1f
114err1;	lbz	r0,0(r4)
115	addi	r4,r4,1
116err1;	stb	r0,0(r3)
117	addi	r3,r3,1
118
1191:	bf	cr7*4+2,2f
120err1;	lhz	r0,0(r4)
121	addi	r4,r4,2
122err1;	sth	r0,0(r3)
123	addi	r3,r3,2
124
1252:	bf	cr7*4+1,3f
126err1;	lwz	r0,0(r4)
127	addi	r4,r4,4
128err1;	stw	r0,0(r3)
129	addi	r3,r3,4
130
1313:	sub	r5,r5,r6
132	cmpldi	r5,128
133	blt	5f
134
135	mflr	r0
136	stdu	r1,-STACKFRAMESIZE(r1)
137	std	r14,STK_REG(R14)(r1)
138	std	r15,STK_REG(R15)(r1)
139	std	r16,STK_REG(R16)(r1)
140	std	r17,STK_REG(R17)(r1)
141	std	r18,STK_REG(R18)(r1)
142	std	r19,STK_REG(R19)(r1)
143	std	r20,STK_REG(R20)(r1)
144	std	r21,STK_REG(R21)(r1)
145	std	r22,STK_REG(R22)(r1)
146	std	r0,STACKFRAMESIZE+16(r1)
147
148	srdi	r6,r5,7
149	mtctr	r6
150
151	/* Now do cacheline (128B) sized loads and stores. */
152	.align	5
1534:
154err2;	ld	r0,0(r4)
155err2;	ld	r6,8(r4)
156err2;	ld	r7,16(r4)
157err2;	ld	r8,24(r4)
158err2;	ld	r9,32(r4)
159err2;	ld	r10,40(r4)
160err2;	ld	r11,48(r4)
161err2;	ld	r12,56(r4)
162err2;	ld	r14,64(r4)
163err2;	ld	r15,72(r4)
164err2;	ld	r16,80(r4)
165err2;	ld	r17,88(r4)
166err2;	ld	r18,96(r4)
167err2;	ld	r19,104(r4)
168err2;	ld	r20,112(r4)
169err2;	ld	r21,120(r4)
170	addi	r4,r4,128
171err2;	std	r0,0(r3)
172err2;	std	r6,8(r3)
173err2;	std	r7,16(r3)
174err2;	std	r8,24(r3)
175err2;	std	r9,32(r3)
176err2;	std	r10,40(r3)
177err2;	std	r11,48(r3)
178err2;	std	r12,56(r3)
179err2;	std	r14,64(r3)
180err2;	std	r15,72(r3)
181err2;	std	r16,80(r3)
182err2;	std	r17,88(r3)
183err2;	std	r18,96(r3)
184err2;	std	r19,104(r3)
185err2;	std	r20,112(r3)
186err2;	std	r21,120(r3)
187	addi	r3,r3,128
188	bdnz	4b
189
190	clrldi	r5,r5,(64-7)
191
192	ld	r14,STK_REG(R14)(r1)
193	ld	r15,STK_REG(R15)(r1)
194	ld	r16,STK_REG(R16)(r1)
195	ld	r17,STK_REG(R17)(r1)
196	ld	r18,STK_REG(R18)(r1)
197	ld	r19,STK_REG(R19)(r1)
198	ld	r20,STK_REG(R20)(r1)
199	ld	r21,STK_REG(R21)(r1)
200	ld	r22,STK_REG(R22)(r1)
201	addi	r1,r1,STACKFRAMESIZE
202
203	/* Up to 127B to go */
2045:	srdi	r6,r5,4
205	mtocrf	0x01,r6
206
2076:	bf	cr7*4+1,7f
208err1;	ld	r0,0(r4)
209err1;	ld	r6,8(r4)
210err1;	ld	r7,16(r4)
211err1;	ld	r8,24(r4)
212err1;	ld	r9,32(r4)
213err1;	ld	r10,40(r4)
214err1;	ld	r11,48(r4)
215err1;	ld	r12,56(r4)
216	addi	r4,r4,64
217err1;	std	r0,0(r3)
218err1;	std	r6,8(r3)
219err1;	std	r7,16(r3)
220err1;	std	r8,24(r3)
221err1;	std	r9,32(r3)
222err1;	std	r10,40(r3)
223err1;	std	r11,48(r3)
224err1;	std	r12,56(r3)
225	addi	r3,r3,64
226
227	/* Up to 63B to go */
2287:	bf	cr7*4+2,8f
229err1;	ld	r0,0(r4)
230err1;	ld	r6,8(r4)
231err1;	ld	r7,16(r4)
232err1;	ld	r8,24(r4)
233	addi	r4,r4,32
234err1;	std	r0,0(r3)
235err1;	std	r6,8(r3)
236err1;	std	r7,16(r3)
237err1;	std	r8,24(r3)
238	addi	r3,r3,32
239
240	/* Up to 31B to go */
2418:	bf	cr7*4+3,9f
242err1;	ld	r0,0(r4)
243err1;	ld	r6,8(r4)
244	addi	r4,r4,16
245err1;	std	r0,0(r3)
246err1;	std	r6,8(r3)
247	addi	r3,r3,16
248
2499:	clrldi	r5,r5,(64-4)
250
251	/* Up to 15B to go */
252.Lshort_copy:
253	mtocrf	0x01,r5
254	bf	cr7*4+0,12f
255err1;	lwz	r0,0(r4)	/* Less chance of a reject with word ops */
256err1;	lwz	r6,4(r4)
257	addi	r4,r4,8
258err1;	stw	r0,0(r3)
259err1;	stw	r6,4(r3)
260	addi	r3,r3,8
261
26212:	bf	cr7*4+1,13f
263err1;	lwz	r0,0(r4)
264	addi	r4,r4,4
265err1;	stw	r0,0(r3)
266	addi	r3,r3,4
267
26813:	bf	cr7*4+2,14f
269err1;	lhz	r0,0(r4)
270	addi	r4,r4,2
271err1;	sth	r0,0(r3)
272	addi	r3,r3,2
273
27414:	bf	cr7*4+3,15f
275err1;	lbz	r0,0(r4)
276err1;	stb	r0,0(r3)
277
27815:	li	r3,0
279	blr
280
281.Lunwind_stack_nonvmx_copy:
282	addi	r1,r1,STACKFRAMESIZE
283	b	.Lnonvmx_copy
284
285#ifdef CONFIG_ALTIVEC
286.Lvmx_copy:
287	mflr	r0
288	std	r0,16(r1)
289	stdu	r1,-STACKFRAMESIZE(r1)
290	bl	.enter_vmx_usercopy
291	cmpwi	cr1,r3,0
292	ld	r0,STACKFRAMESIZE+16(r1)
293	ld	r3,STACKFRAMESIZE+48(r1)
294	ld	r4,STACKFRAMESIZE+56(r1)
295	ld	r5,STACKFRAMESIZE+64(r1)
296	mtlr	r0
297
298	/*
299	 * We prefetch both the source and destination using enhanced touch
300	 * instructions. We use a stream ID of 0 for the load side and
301	 * 1 for the store side.
302	 */
303	clrrdi	r6,r4,7
304	clrrdi	r9,r3,7
305	ori	r9,r9,1		/* stream=1 */
306
307	srdi	r7,r5,7		/* length in cachelines, capped at 0x3FF */
308	cmpldi	r7,0x3FF
309	ble	1f
310	li	r7,0x3FF
3111:	lis	r0,0x0E00	/* depth=7 */
312	sldi	r7,r7,7
313	or	r7,r7,r0
314	ori	r10,r7,1	/* stream=1 */
315
316	lis	r8,0x8000	/* GO=1 */
317	clrldi	r8,r8,32
318
319.machine push
320.machine "power4"
321	/* setup read stream 0 */
322	dcbt	r0,r6,0b01000   /* addr from */
323	dcbt	r0,r7,0b01010   /* length and depth from */
324	/* setup write stream 1 */
325	dcbtst	r0,r9,0b01000   /* addr to */
326	dcbtst	r0,r10,0b01010  /* length and depth to */
327	eieio
328	dcbt	r0,r8,0b01010	/* all streams GO */
329.machine pop
330
331	beq	cr1,.Lunwind_stack_nonvmx_copy
332
333	/*
334	 * If source and destination are not relatively aligned we use a
335	 * slower permute loop.
336	 */
337	xor	r6,r4,r3
338	rldicl.	r6,r6,0,(64-4)
339	bne	.Lvmx_unaligned_copy
340
341	/* Get the destination 16B aligned */
342	neg	r6,r3
343	mtocrf	0x01,r6
344	clrldi	r6,r6,(64-4)
345
346	bf	cr7*4+3,1f
347err3;	lbz	r0,0(r4)
348	addi	r4,r4,1
349err3;	stb	r0,0(r3)
350	addi	r3,r3,1
351
3521:	bf	cr7*4+2,2f
353err3;	lhz	r0,0(r4)
354	addi	r4,r4,2
355err3;	sth	r0,0(r3)
356	addi	r3,r3,2
357
3582:	bf	cr7*4+1,3f
359err3;	lwz	r0,0(r4)
360	addi	r4,r4,4
361err3;	stw	r0,0(r3)
362	addi	r3,r3,4
363
3643:	bf	cr7*4+0,4f
365err3;	ld	r0,0(r4)
366	addi	r4,r4,8
367err3;	std	r0,0(r3)
368	addi	r3,r3,8
369
3704:	sub	r5,r5,r6
371
372	/* Get the desination 128B aligned */
373	neg	r6,r3
374	srdi	r7,r6,4
375	mtocrf	0x01,r7
376	clrldi	r6,r6,(64-7)
377
378	li	r9,16
379	li	r10,32
380	li	r11,48
381
382	bf	cr7*4+3,5f
383err3;	lvx	vr1,r0,r4
384	addi	r4,r4,16
385err3;	stvx	vr1,r0,r3
386	addi	r3,r3,16
387
3885:	bf	cr7*4+2,6f
389err3;	lvx	vr1,r0,r4
390err3;	lvx	vr0,r4,r9
391	addi	r4,r4,32
392err3;	stvx	vr1,r0,r3
393err3;	stvx	vr0,r3,r9
394	addi	r3,r3,32
395
3966:	bf	cr7*4+1,7f
397err3;	lvx	vr3,r0,r4
398err3;	lvx	vr2,r4,r9
399err3;	lvx	vr1,r4,r10
400err3;	lvx	vr0,r4,r11
401	addi	r4,r4,64
402err3;	stvx	vr3,r0,r3
403err3;	stvx	vr2,r3,r9
404err3;	stvx	vr1,r3,r10
405err3;	stvx	vr0,r3,r11
406	addi	r3,r3,64
407
4087:	sub	r5,r5,r6
409	srdi	r6,r5,7
410
411	std	r14,STK_REG(R14)(r1)
412	std	r15,STK_REG(R15)(r1)
413	std	r16,STK_REG(R16)(r1)
414
415	li	r12,64
416	li	r14,80
417	li	r15,96
418	li	r16,112
419
420	mtctr	r6
421
422	/*
423	 * Now do cacheline sized loads and stores. By this stage the
424	 * cacheline stores are also cacheline aligned.
425	 */
426	.align	5
4278:
428err4;	lvx	vr7,r0,r4
429err4;	lvx	vr6,r4,r9
430err4;	lvx	vr5,r4,r10
431err4;	lvx	vr4,r4,r11
432err4;	lvx	vr3,r4,r12
433err4;	lvx	vr2,r4,r14
434err4;	lvx	vr1,r4,r15
435err4;	lvx	vr0,r4,r16
436	addi	r4,r4,128
437err4;	stvx	vr7,r0,r3
438err4;	stvx	vr6,r3,r9
439err4;	stvx	vr5,r3,r10
440err4;	stvx	vr4,r3,r11
441err4;	stvx	vr3,r3,r12
442err4;	stvx	vr2,r3,r14
443err4;	stvx	vr1,r3,r15
444err4;	stvx	vr0,r3,r16
445	addi	r3,r3,128
446	bdnz	8b
447
448	ld	r14,STK_REG(R14)(r1)
449	ld	r15,STK_REG(R15)(r1)
450	ld	r16,STK_REG(R16)(r1)
451
452	/* Up to 127B to go */
453	clrldi	r5,r5,(64-7)
454	srdi	r6,r5,4
455	mtocrf	0x01,r6
456
457	bf	cr7*4+1,9f
458err3;	lvx	vr3,r0,r4
459err3;	lvx	vr2,r4,r9
460err3;	lvx	vr1,r4,r10
461err3;	lvx	vr0,r4,r11
462	addi	r4,r4,64
463err3;	stvx	vr3,r0,r3
464err3;	stvx	vr2,r3,r9
465err3;	stvx	vr1,r3,r10
466err3;	stvx	vr0,r3,r11
467	addi	r3,r3,64
468
4699:	bf	cr7*4+2,10f
470err3;	lvx	vr1,r0,r4
471err3;	lvx	vr0,r4,r9
472	addi	r4,r4,32
473err3;	stvx	vr1,r0,r3
474err3;	stvx	vr0,r3,r9
475	addi	r3,r3,32
476
47710:	bf	cr7*4+3,11f
478err3;	lvx	vr1,r0,r4
479	addi	r4,r4,16
480err3;	stvx	vr1,r0,r3
481	addi	r3,r3,16
482
483	/* Up to 15B to go */
48411:	clrldi	r5,r5,(64-4)
485	mtocrf	0x01,r5
486	bf	cr7*4+0,12f
487err3;	ld	r0,0(r4)
488	addi	r4,r4,8
489err3;	std	r0,0(r3)
490	addi	r3,r3,8
491
49212:	bf	cr7*4+1,13f
493err3;	lwz	r0,0(r4)
494	addi	r4,r4,4
495err3;	stw	r0,0(r3)
496	addi	r3,r3,4
497
49813:	bf	cr7*4+2,14f
499err3;	lhz	r0,0(r4)
500	addi	r4,r4,2
501err3;	sth	r0,0(r3)
502	addi	r3,r3,2
503
50414:	bf	cr7*4+3,15f
505err3;	lbz	r0,0(r4)
506err3;	stb	r0,0(r3)
507
50815:	addi	r1,r1,STACKFRAMESIZE
509	b	.exit_vmx_usercopy	/* tail call optimise */
510
511.Lvmx_unaligned_copy:
512	/* Get the destination 16B aligned */
513	neg	r6,r3
514	mtocrf	0x01,r6
515	clrldi	r6,r6,(64-4)
516
517	bf	cr7*4+3,1f
518err3;	lbz	r0,0(r4)
519	addi	r4,r4,1
520err3;	stb	r0,0(r3)
521	addi	r3,r3,1
522
5231:	bf	cr7*4+2,2f
524err3;	lhz	r0,0(r4)
525	addi	r4,r4,2
526err3;	sth	r0,0(r3)
527	addi	r3,r3,2
528
5292:	bf	cr7*4+1,3f
530err3;	lwz	r0,0(r4)
531	addi	r4,r4,4
532err3;	stw	r0,0(r3)
533	addi	r3,r3,4
534
5353:	bf	cr7*4+0,4f
536err3;	lwz	r0,0(r4)	/* Less chance of a reject with word ops */
537err3;	lwz	r7,4(r4)
538	addi	r4,r4,8
539err3;	stw	r0,0(r3)
540err3;	stw	r7,4(r3)
541	addi	r3,r3,8
542
5434:	sub	r5,r5,r6
544
545	/* Get the desination 128B aligned */
546	neg	r6,r3
547	srdi	r7,r6,4
548	mtocrf	0x01,r7
549	clrldi	r6,r6,(64-7)
550
551	li	r9,16
552	li	r10,32
553	li	r11,48
554
555	lvsl	vr16,0,r4	/* Setup permute control vector */
556err3;	lvx	vr0,0,r4
557	addi	r4,r4,16
558
559	bf	cr7*4+3,5f
560err3;	lvx	vr1,r0,r4
561	vperm	vr8,vr0,vr1,vr16
562	addi	r4,r4,16
563err3;	stvx	vr8,r0,r3
564	addi	r3,r3,16
565	vor	vr0,vr1,vr1
566
5675:	bf	cr7*4+2,6f
568err3;	lvx	vr1,r0,r4
569	vperm	vr8,vr0,vr1,vr16
570err3;	lvx	vr0,r4,r9
571	vperm	vr9,vr1,vr0,vr16
572	addi	r4,r4,32
573err3;	stvx	vr8,r0,r3
574err3;	stvx	vr9,r3,r9
575	addi	r3,r3,32
576
5776:	bf	cr7*4+1,7f
578err3;	lvx	vr3,r0,r4
579	vperm	vr8,vr0,vr3,vr16
580err3;	lvx	vr2,r4,r9
581	vperm	vr9,vr3,vr2,vr16
582err3;	lvx	vr1,r4,r10
583	vperm	vr10,vr2,vr1,vr16
584err3;	lvx	vr0,r4,r11
585	vperm	vr11,vr1,vr0,vr16
586	addi	r4,r4,64
587err3;	stvx	vr8,r0,r3
588err3;	stvx	vr9,r3,r9
589err3;	stvx	vr10,r3,r10
590err3;	stvx	vr11,r3,r11
591	addi	r3,r3,64
592
5937:	sub	r5,r5,r6
594	srdi	r6,r5,7
595
596	std	r14,STK_REG(R14)(r1)
597	std	r15,STK_REG(R15)(r1)
598	std	r16,STK_REG(R16)(r1)
599
600	li	r12,64
601	li	r14,80
602	li	r15,96
603	li	r16,112
604
605	mtctr	r6
606
607	/*
608	 * Now do cacheline sized loads and stores. By this stage the
609	 * cacheline stores are also cacheline aligned.
610	 */
611	.align	5
6128:
613err4;	lvx	vr7,r0,r4
614	vperm	vr8,vr0,vr7,vr16
615err4;	lvx	vr6,r4,r9
616	vperm	vr9,vr7,vr6,vr16
617err4;	lvx	vr5,r4,r10
618	vperm	vr10,vr6,vr5,vr16
619err4;	lvx	vr4,r4,r11
620	vperm	vr11,vr5,vr4,vr16
621err4;	lvx	vr3,r4,r12
622	vperm	vr12,vr4,vr3,vr16
623err4;	lvx	vr2,r4,r14
624	vperm	vr13,vr3,vr2,vr16
625err4;	lvx	vr1,r4,r15
626	vperm	vr14,vr2,vr1,vr16
627err4;	lvx	vr0,r4,r16
628	vperm	vr15,vr1,vr0,vr16
629	addi	r4,r4,128
630err4;	stvx	vr8,r0,r3
631err4;	stvx	vr9,r3,r9
632err4;	stvx	vr10,r3,r10
633err4;	stvx	vr11,r3,r11
634err4;	stvx	vr12,r3,r12
635err4;	stvx	vr13,r3,r14
636err4;	stvx	vr14,r3,r15
637err4;	stvx	vr15,r3,r16
638	addi	r3,r3,128
639	bdnz	8b
640
641	ld	r14,STK_REG(R14)(r1)
642	ld	r15,STK_REG(R15)(r1)
643	ld	r16,STK_REG(R16)(r1)
644
645	/* Up to 127B to go */
646	clrldi	r5,r5,(64-7)
647	srdi	r6,r5,4
648	mtocrf	0x01,r6
649
650	bf	cr7*4+1,9f
651err3;	lvx	vr3,r0,r4
652	vperm	vr8,vr0,vr3,vr16
653err3;	lvx	vr2,r4,r9
654	vperm	vr9,vr3,vr2,vr16
655err3;	lvx	vr1,r4,r10
656	vperm	vr10,vr2,vr1,vr16
657err3;	lvx	vr0,r4,r11
658	vperm	vr11,vr1,vr0,vr16
659	addi	r4,r4,64
660err3;	stvx	vr8,r0,r3
661err3;	stvx	vr9,r3,r9
662err3;	stvx	vr10,r3,r10
663err3;	stvx	vr11,r3,r11
664	addi	r3,r3,64
665
6669:	bf	cr7*4+2,10f
667err3;	lvx	vr1,r0,r4
668	vperm	vr8,vr0,vr1,vr16
669err3;	lvx	vr0,r4,r9
670	vperm	vr9,vr1,vr0,vr16
671	addi	r4,r4,32
672err3;	stvx	vr8,r0,r3
673err3;	stvx	vr9,r3,r9
674	addi	r3,r3,32
675
67610:	bf	cr7*4+3,11f
677err3;	lvx	vr1,r0,r4
678	vperm	vr8,vr0,vr1,vr16
679	addi	r4,r4,16
680err3;	stvx	vr8,r0,r3
681	addi	r3,r3,16
682
683	/* Up to 15B to go */
68411:	clrldi	r5,r5,(64-4)
685	addi	r4,r4,-16	/* Unwind the +16 load offset */
686	mtocrf	0x01,r5
687	bf	cr7*4+0,12f
688err3;	lwz	r0,0(r4)	/* Less chance of a reject with word ops */
689err3;	lwz	r6,4(r4)
690	addi	r4,r4,8
691err3;	stw	r0,0(r3)
692err3;	stw	r6,4(r3)
693	addi	r3,r3,8
694
69512:	bf	cr7*4+1,13f
696err3;	lwz	r0,0(r4)
697	addi	r4,r4,4
698err3;	stw	r0,0(r3)
699	addi	r3,r3,4
700
70113:	bf	cr7*4+2,14f
702err3;	lhz	r0,0(r4)
703	addi	r4,r4,2
704err3;	sth	r0,0(r3)
705	addi	r3,r3,2
706
70714:	bf	cr7*4+3,15f
708err3;	lbz	r0,0(r4)
709err3;	stb	r0,0(r3)
710
71115:	addi	r1,r1,STACKFRAMESIZE
712	b	.exit_vmx_usercopy	/* tail call optimise */
713#endif /* CONFiG_ALTIVEC */
714