xref: /linux/arch/powerpc/lib/copyuser_64.S (revision 2c86cd188f8a5631f3d75a1dea14d22df85189b4)
1/*
2 * Copyright (C) 2002 Paul Mackerras, IBM Corp.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 */
9#include <asm/processor.h>
10#include <asm/ppc_asm.h>
11#include <asm/export.h>
12#include <asm/asm-compat.h>
13#include <asm/feature-fixups.h>
14
15#ifdef __BIG_ENDIAN__
16#define sLd sld		/* Shift towards low-numbered address. */
17#define sHd srd		/* Shift towards high-numbered address. */
18#else
19#define sLd srd		/* Shift towards low-numbered address. */
20#define sHd sld		/* Shift towards high-numbered address. */
21#endif
22
23	.align	7
24_GLOBAL_TOC(__copy_tofrom_user)
25#ifdef CONFIG_PPC_BOOK3S_64
26BEGIN_FTR_SECTION
27	nop
28FTR_SECTION_ELSE
29	b	__copy_tofrom_user_power7
30ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
31#endif
32_GLOBAL(__copy_tofrom_user_base)
33	/* first check for a whole page copy on a page boundary */
34	cmpldi	cr1,r5,16
35	cmpdi	cr6,r5,4096
36	or	r0,r3,r4
37	neg	r6,r3		/* LS 3 bits = # bytes to 8-byte dest bdry */
38	andi.	r0,r0,4095
39	std	r3,-24(r1)
40	crand	cr0*4+2,cr0*4+2,cr6*4+2
41	std	r4,-16(r1)
42	std	r5,-8(r1)
43	dcbt	0,r4
44	beq	.Lcopy_page_4K
45	andi.	r6,r6,7
46	PPC_MTOCRF(0x01,r5)
47	blt	cr1,.Lshort_copy
48/* Below we want to nop out the bne if we're on a CPU that has the
49 * CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit
50 * cleared.
51 * At the time of writing the only CPU that has this combination of bits
52 * set is Power6.
53 */
54BEGIN_FTR_SECTION
55	nop
56FTR_SECTION_ELSE
57	bne	.Ldst_unaligned
58ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \
59		    CPU_FTR_UNALIGNED_LD_STD)
60.Ldst_aligned:
61	addi	r3,r3,-16
62BEGIN_FTR_SECTION
63	andi.	r0,r4,7
64	bne	.Lsrc_unaligned
65END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
66	blt	cr1,.Ldo_tail		/* if < 16 bytes to copy */
67	srdi	r0,r5,5
68	cmpdi	cr1,r0,0
6920:	ld	r7,0(r4)
70220:	ld	r6,8(r4)
71	addi	r4,r4,16
72	mtctr	r0
73	andi.	r0,r5,0x10
74	beq	22f
75	addi	r3,r3,16
76	addi	r4,r4,-16
77	mr	r9,r7
78	mr	r8,r6
79	beq	cr1,72f
8021:	ld	r7,16(r4)
81221:	ld	r6,24(r4)
82	addi	r4,r4,32
8370:	std	r9,0(r3)
84270:	std	r8,8(r3)
8522:	ld	r9,0(r4)
86222:	ld	r8,8(r4)
8771:	std	r7,16(r3)
88271:	std	r6,24(r3)
89	addi	r3,r3,32
90	bdnz	21b
9172:	std	r9,0(r3)
92272:	std	r8,8(r3)
93	andi.	r5,r5,0xf
94	beq+	3f
95	addi	r4,r4,16
96.Ldo_tail:
97	addi	r3,r3,16
98	bf	cr7*4+0,246f
99244:	ld	r9,0(r4)
100	addi	r4,r4,8
101245:	std	r9,0(r3)
102	addi	r3,r3,8
103246:	bf	cr7*4+1,1f
10423:	lwz	r9,0(r4)
105	addi	r4,r4,4
10673:	stw	r9,0(r3)
107	addi	r3,r3,4
1081:	bf	cr7*4+2,2f
10944:	lhz	r9,0(r4)
110	addi	r4,r4,2
11174:	sth	r9,0(r3)
112	addi	r3,r3,2
1132:	bf	cr7*4+3,3f
11445:	lbz	r9,0(r4)
11575:	stb	r9,0(r3)
1163:	li	r3,0
117	blr
118
119.Lsrc_unaligned:
120	srdi	r6,r5,3
121	addi	r5,r5,-16
122	subf	r4,r0,r4
123	srdi	r7,r5,4
124	sldi	r10,r0,3
125	cmpldi	cr6,r6,3
126	andi.	r5,r5,7
127	mtctr	r7
128	subfic	r11,r10,64
129	add	r5,r5,r0
130	bt	cr7*4+0,28f
131
13224:	ld	r9,0(r4)	/* 3+2n loads, 2+2n stores */
13325:	ld	r0,8(r4)
134	sLd	r6,r9,r10
13526:	ldu	r9,16(r4)
136	sHd	r7,r0,r11
137	sLd	r8,r0,r10
138	or	r7,r7,r6
139	blt	cr6,79f
14027:	ld	r0,8(r4)
141	b	2f
142
14328:	ld	r0,0(r4)	/* 4+2n loads, 3+2n stores */
14429:	ldu	r9,8(r4)
145	sLd	r8,r0,r10
146	addi	r3,r3,-8
147	blt	cr6,5f
14830:	ld	r0,8(r4)
149	sHd	r12,r9,r11
150	sLd	r6,r9,r10
15131:	ldu	r9,16(r4)
152	or	r12,r8,r12
153	sHd	r7,r0,r11
154	sLd	r8,r0,r10
155	addi	r3,r3,16
156	beq	cr6,78f
157
1581:	or	r7,r7,r6
15932:	ld	r0,8(r4)
16076:	std	r12,8(r3)
1612:	sHd	r12,r9,r11
162	sLd	r6,r9,r10
16333:	ldu	r9,16(r4)
164	or	r12,r8,r12
16577:	stdu	r7,16(r3)
166	sHd	r7,r0,r11
167	sLd	r8,r0,r10
168	bdnz	1b
169
17078:	std	r12,8(r3)
171	or	r7,r7,r6
17279:	std	r7,16(r3)
1735:	sHd	r12,r9,r11
174	or	r12,r8,r12
17580:	std	r12,24(r3)
176	bne	6f
177	li	r3,0
178	blr
1796:	cmpwi	cr1,r5,8
180	addi	r3,r3,32
181	sLd	r9,r9,r10
182	ble	cr1,7f
18334:	ld	r0,8(r4)
184	sHd	r7,r0,r11
185	or	r9,r7,r9
1867:
187	bf	cr7*4+1,1f
188#ifdef __BIG_ENDIAN__
189	rotldi	r9,r9,32
190#endif
19194:	stw	r9,0(r3)
192#ifdef __LITTLE_ENDIAN__
193	rotrdi	r9,r9,32
194#endif
195	addi	r3,r3,4
1961:	bf	cr7*4+2,2f
197#ifdef __BIG_ENDIAN__
198	rotldi	r9,r9,16
199#endif
20095:	sth	r9,0(r3)
201#ifdef __LITTLE_ENDIAN__
202	rotrdi	r9,r9,16
203#endif
204	addi	r3,r3,2
2052:	bf	cr7*4+3,3f
206#ifdef __BIG_ENDIAN__
207	rotldi	r9,r9,8
208#endif
20996:	stb	r9,0(r3)
210#ifdef __LITTLE_ENDIAN__
211	rotrdi	r9,r9,8
212#endif
2133:	li	r3,0
214	blr
215
216.Ldst_unaligned:
217	PPC_MTOCRF(0x01,r6)		/* put #bytes to 8B bdry into cr7 */
218	subf	r5,r6,r5
219	li	r7,0
220	cmpldi	cr1,r5,16
221	bf	cr7*4+3,1f
22235:	lbz	r0,0(r4)
22381:	stb	r0,0(r3)
224	addi	r7,r7,1
2251:	bf	cr7*4+2,2f
22636:	lhzx	r0,r7,r4
22782:	sthx	r0,r7,r3
228	addi	r7,r7,2
2292:	bf	cr7*4+1,3f
23037:	lwzx	r0,r7,r4
23183:	stwx	r0,r7,r3
2323:	PPC_MTOCRF(0x01,r5)
233	add	r4,r6,r4
234	add	r3,r6,r3
235	b	.Ldst_aligned
236
237.Lshort_copy:
238	bf	cr7*4+0,1f
23938:	lwz	r0,0(r4)
24039:	lwz	r9,4(r4)
241	addi	r4,r4,8
24284:	stw	r0,0(r3)
24385:	stw	r9,4(r3)
244	addi	r3,r3,8
2451:	bf	cr7*4+1,2f
24640:	lwz	r0,0(r4)
247	addi	r4,r4,4
24886:	stw	r0,0(r3)
249	addi	r3,r3,4
2502:	bf	cr7*4+2,3f
25141:	lhz	r0,0(r4)
252	addi	r4,r4,2
25387:	sth	r0,0(r3)
254	addi	r3,r3,2
2553:	bf	cr7*4+3,4f
25642:	lbz	r0,0(r4)
25788:	stb	r0,0(r3)
2584:	li	r3,0
259	blr
260
261/*
262 * exception handlers follow
263 * we have to return the number of bytes not copied
264 * for an exception on a load, we set the rest of the destination to 0
265 */
266
267136:
268137:
269	add	r3,r3,r7
270	b	1f
271130:
272131:
273	addi	r3,r3,8
274120:
275320:
276122:
277322:
278124:
279125:
280126:
281127:
282128:
283129:
284133:
285	addi	r3,r3,8
286132:
287	addi	r3,r3,8
288121:
289321:
290344:
291134:
292135:
293138:
294139:
295140:
296141:
297142:
298123:
299144:
300145:
301
302/*
303 * here we have had a fault on a load and r3 points to the first
304 * unmodified byte of the destination
305 */
3061:	ld	r6,-24(r1)
307	ld	r4,-16(r1)
308	ld	r5,-8(r1)
309	subf	r6,r6,r3
310	add	r4,r4,r6
311	subf	r5,r6,r5	/* #bytes left to go */
312
313/*
314 * first see if we can copy any more bytes before hitting another exception
315 */
316	mtctr	r5
31743:	lbz	r0,0(r4)
318	addi	r4,r4,1
31989:	stb	r0,0(r3)
320	addi	r3,r3,1
321	bdnz	43b
322	li	r3,0		/* huh? all copied successfully this time? */
323	blr
324
325/*
326 * here we have trapped again, amount remaining is in ctr.
327 */
328143:	mfctr	r3
329	blr
330
331/*
332 * exception handlers for stores: we just need to work
333 * out how many bytes weren't copied
334 */
335182:
336183:
337	add	r3,r3,r7
338	b	1f
339371:
340180:
341	addi	r3,r3,8
342171:
343177:
344179:
345	addi	r3,r3,8
346370:
347372:
348176:
349178:
350	addi	r3,r3,4
351185:
352	addi	r3,r3,4
353170:
354172:
355345:
356173:
357174:
358175:
359181:
360184:
361186:
362187:
363188:
364189:
365194:
366195:
367196:
3681:
369	ld	r6,-24(r1)
370	ld	r5,-8(r1)
371	add	r6,r6,r5
372	subf	r3,r3,r6	/* #bytes not copied */
373	blr
374
375	EX_TABLE(20b,120b)
376	EX_TABLE(220b,320b)
377	EX_TABLE(21b,121b)
378	EX_TABLE(221b,321b)
379	EX_TABLE(70b,170b)
380	EX_TABLE(270b,370b)
381	EX_TABLE(22b,122b)
382	EX_TABLE(222b,322b)
383	EX_TABLE(71b,171b)
384	EX_TABLE(271b,371b)
385	EX_TABLE(72b,172b)
386	EX_TABLE(272b,372b)
387	EX_TABLE(244b,344b)
388	EX_TABLE(245b,345b)
389	EX_TABLE(23b,123b)
390	EX_TABLE(73b,173b)
391	EX_TABLE(44b,144b)
392	EX_TABLE(74b,174b)
393	EX_TABLE(45b,145b)
394	EX_TABLE(75b,175b)
395	EX_TABLE(24b,124b)
396	EX_TABLE(25b,125b)
397	EX_TABLE(26b,126b)
398	EX_TABLE(27b,127b)
399	EX_TABLE(28b,128b)
400	EX_TABLE(29b,129b)
401	EX_TABLE(30b,130b)
402	EX_TABLE(31b,131b)
403	EX_TABLE(32b,132b)
404	EX_TABLE(76b,176b)
405	EX_TABLE(33b,133b)
406	EX_TABLE(77b,177b)
407	EX_TABLE(78b,178b)
408	EX_TABLE(79b,179b)
409	EX_TABLE(80b,180b)
410	EX_TABLE(34b,134b)
411	EX_TABLE(94b,194b)
412	EX_TABLE(95b,195b)
413	EX_TABLE(96b,196b)
414	EX_TABLE(35b,135b)
415	EX_TABLE(81b,181b)
416	EX_TABLE(36b,136b)
417	EX_TABLE(82b,182b)
418	EX_TABLE(37b,137b)
419	EX_TABLE(83b,183b)
420	EX_TABLE(38b,138b)
421	EX_TABLE(39b,139b)
422	EX_TABLE(84b,184b)
423	EX_TABLE(85b,185b)
424	EX_TABLE(40b,140b)
425	EX_TABLE(86b,186b)
426	EX_TABLE(41b,141b)
427	EX_TABLE(87b,187b)
428	EX_TABLE(42b,142b)
429	EX_TABLE(88b,188b)
430	EX_TABLE(43b,143b)
431	EX_TABLE(89b,189b)
432
433/*
434 * Routine to copy a whole page of data, optimized for POWER4.
435 * On POWER4 it is more than 50% faster than the simple loop
436 * above (following the .Ldst_aligned label).
437 */
438.Lcopy_page_4K:
439	std	r31,-32(1)
440	std	r30,-40(1)
441	std	r29,-48(1)
442	std	r28,-56(1)
443	std	r27,-64(1)
444	std	r26,-72(1)
445	std	r25,-80(1)
446	std	r24,-88(1)
447	std	r23,-96(1)
448	std	r22,-104(1)
449	std	r21,-112(1)
450	std	r20,-120(1)
451	li	r5,4096/32 - 1
452	addi	r3,r3,-8
453	li	r0,5
4540:	addi	r5,r5,-24
455	mtctr	r0
45620:	ld	r22,640(4)
45721:	ld	r21,512(4)
45822:	ld	r20,384(4)
45923:	ld	r11,256(4)
46024:	ld	r9,128(4)
46125:	ld	r7,0(4)
46226:	ld	r25,648(4)
46327:	ld	r24,520(4)
46428:	ld	r23,392(4)
46529:	ld	r10,264(4)
46630:	ld	r8,136(4)
46731:	ldu	r6,8(4)
468	cmpwi	r5,24
4691:
47032:	std	r22,648(3)
47133:	std	r21,520(3)
47234:	std	r20,392(3)
47335:	std	r11,264(3)
47436:	std	r9,136(3)
47537:	std	r7,8(3)
47638:	ld	r28,648(4)
47739:	ld	r27,520(4)
47840:	ld	r26,392(4)
47941:	ld	r31,264(4)
48042:	ld	r30,136(4)
48143:	ld	r29,8(4)
48244:	std	r25,656(3)
48345:	std	r24,528(3)
48446:	std	r23,400(3)
48547:	std	r10,272(3)
48648:	std	r8,144(3)
48749:	std	r6,16(3)
48850:	ld	r22,656(4)
48951:	ld	r21,528(4)
49052:	ld	r20,400(4)
49153:	ld	r11,272(4)
49254:	ld	r9,144(4)
49355:	ld	r7,16(4)
49456:	std	r28,664(3)
49557:	std	r27,536(3)
49658:	std	r26,408(3)
49759:	std	r31,280(3)
49860:	std	r30,152(3)
49961:	stdu	r29,24(3)
50062:	ld	r25,664(4)
50163:	ld	r24,536(4)
50264:	ld	r23,408(4)
50365:	ld	r10,280(4)
50466:	ld	r8,152(4)
50567:	ldu	r6,24(4)
506	bdnz	1b
50768:	std	r22,648(3)
50869:	std	r21,520(3)
50970:	std	r20,392(3)
51071:	std	r11,264(3)
51172:	std	r9,136(3)
51273:	std	r7,8(3)
51374:	addi	r4,r4,640
51475:	addi	r3,r3,648
515	bge	0b
516	mtctr	r5
51776:	ld	r7,0(4)
51877:	ld	r8,8(4)
51978:	ldu	r9,16(4)
5203:
52179:	ld	r10,8(4)
52280:	std	r7,8(3)
52381:	ld	r7,16(4)
52482:	std	r8,16(3)
52583:	ld	r8,24(4)
52684:	std	r9,24(3)
52785:	ldu	r9,32(4)
52886:	stdu	r10,32(3)
529	bdnz	3b
5304:
53187:	ld	r10,8(4)
53288:	std	r7,8(3)
53389:	std	r8,16(3)
53490:	std	r9,24(3)
53591:	std	r10,32(3)
5369:	ld	r20,-120(1)
537	ld	r21,-112(1)
538	ld	r22,-104(1)
539	ld	r23,-96(1)
540	ld	r24,-88(1)
541	ld	r25,-80(1)
542	ld	r26,-72(1)
543	ld	r27,-64(1)
544	ld	r28,-56(1)
545	ld	r29,-48(1)
546	ld	r30,-40(1)
547	ld	r31,-32(1)
548	li	r3,0
549	blr
550
551/*
552 * on an exception, reset to the beginning and jump back into the
553 * standard __copy_tofrom_user
554 */
555100:	ld	r20,-120(1)
556	ld	r21,-112(1)
557	ld	r22,-104(1)
558	ld	r23,-96(1)
559	ld	r24,-88(1)
560	ld	r25,-80(1)
561	ld	r26,-72(1)
562	ld	r27,-64(1)
563	ld	r28,-56(1)
564	ld	r29,-48(1)
565	ld	r30,-40(1)
566	ld	r31,-32(1)
567	ld	r3,-24(r1)
568	ld	r4,-16(r1)
569	li	r5,4096
570	b	.Ldst_aligned
571
572	EX_TABLE(20b,100b)
573	EX_TABLE(21b,100b)
574	EX_TABLE(22b,100b)
575	EX_TABLE(23b,100b)
576	EX_TABLE(24b,100b)
577	EX_TABLE(25b,100b)
578	EX_TABLE(26b,100b)
579	EX_TABLE(27b,100b)
580	EX_TABLE(28b,100b)
581	EX_TABLE(29b,100b)
582	EX_TABLE(30b,100b)
583	EX_TABLE(31b,100b)
584	EX_TABLE(32b,100b)
585	EX_TABLE(33b,100b)
586	EX_TABLE(34b,100b)
587	EX_TABLE(35b,100b)
588	EX_TABLE(36b,100b)
589	EX_TABLE(37b,100b)
590	EX_TABLE(38b,100b)
591	EX_TABLE(39b,100b)
592	EX_TABLE(40b,100b)
593	EX_TABLE(41b,100b)
594	EX_TABLE(42b,100b)
595	EX_TABLE(43b,100b)
596	EX_TABLE(44b,100b)
597	EX_TABLE(45b,100b)
598	EX_TABLE(46b,100b)
599	EX_TABLE(47b,100b)
600	EX_TABLE(48b,100b)
601	EX_TABLE(49b,100b)
602	EX_TABLE(50b,100b)
603	EX_TABLE(51b,100b)
604	EX_TABLE(52b,100b)
605	EX_TABLE(53b,100b)
606	EX_TABLE(54b,100b)
607	EX_TABLE(55b,100b)
608	EX_TABLE(56b,100b)
609	EX_TABLE(57b,100b)
610	EX_TABLE(58b,100b)
611	EX_TABLE(59b,100b)
612	EX_TABLE(60b,100b)
613	EX_TABLE(61b,100b)
614	EX_TABLE(62b,100b)
615	EX_TABLE(63b,100b)
616	EX_TABLE(64b,100b)
617	EX_TABLE(65b,100b)
618	EX_TABLE(66b,100b)
619	EX_TABLE(67b,100b)
620	EX_TABLE(68b,100b)
621	EX_TABLE(69b,100b)
622	EX_TABLE(70b,100b)
623	EX_TABLE(71b,100b)
624	EX_TABLE(72b,100b)
625	EX_TABLE(73b,100b)
626	EX_TABLE(74b,100b)
627	EX_TABLE(75b,100b)
628	EX_TABLE(76b,100b)
629	EX_TABLE(77b,100b)
630	EX_TABLE(78b,100b)
631	EX_TABLE(79b,100b)
632	EX_TABLE(80b,100b)
633	EX_TABLE(81b,100b)
634	EX_TABLE(82b,100b)
635	EX_TABLE(83b,100b)
636	EX_TABLE(84b,100b)
637	EX_TABLE(85b,100b)
638	EX_TABLE(86b,100b)
639	EX_TABLE(87b,100b)
640	EX_TABLE(88b,100b)
641	EX_TABLE(89b,100b)
642	EX_TABLE(90b,100b)
643	EX_TABLE(91b,100b)
644
645EXPORT_SYMBOL(__copy_tofrom_user)
646