xref: /linux/arch/powerpc/lib/copypage_power7.S (revision bd628c1bed7902ec1f24ba0fe70758949146abbe)
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License as published by
4 * the Free Software Foundation; either version 2 of the License, or
5 * (at your option) any later version.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * along with this program; if not, write to the Free Software
14 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
15 *
16 * Copyright (C) IBM Corporation, 2012
17 *
18 * Author: Anton Blanchard <anton@au.ibm.com>
19 */
20#include <asm/page.h>
21#include <asm/ppc_asm.h>
22
23_GLOBAL(copypage_power7)
24	/*
25	 * We prefetch both the source and destination using enhanced touch
26	 * instructions. We use a stream ID of 0 for the load side and
27	 * 1 for the store side. Since source and destination are page
28	 * aligned we don't need to clear the bottom 7 bits of either
29	 * address.
30	 */
31	ori	r9,r3,1		/* stream=1 => to */
32
33#ifdef CONFIG_PPC_64K_PAGES
34	lis	r7,0x0E01	/* depth=7
35				 * units/cachelines=512 */
36#else
37	lis	r7,0x0E00	/* depth=7 */
38	ori	r7,r7,0x1000	/* units/cachelines=32 */
39#endif
40	ori	r10,r7,1	/* stream=1 */
41
42	lis	r8,0x8000	/* GO=1 */
43	clrldi	r8,r8,32
44
45	/* setup read stream 0  */
46	dcbt	0,r4,0b01000  	/* addr from */
47	dcbt	0,r7,0b01010   /* length and depth from */
48	/* setup write stream 1 */
49	dcbtst	0,r9,0b01000   /* addr to */
50	dcbtst	0,r10,0b01010  /* length and depth to */
51	eieio
52	dcbt	0,r8,0b01010	/* all streams GO */
53
54#ifdef CONFIG_ALTIVEC
55	mflr	r0
56	std	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
57	std	r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
58	std	r0,16(r1)
59	stdu	r1,-STACKFRAMESIZE(r1)
60	bl	enter_vmx_ops
61	cmpwi	r3,0
62	ld	r0,STACKFRAMESIZE+16(r1)
63	ld	r3,STK_REG(R31)(r1)
64	ld	r4,STK_REG(R30)(r1)
65	mtlr	r0
66
67	li	r0,(PAGE_SIZE/128)
68	mtctr	r0
69
70	beq	.Lnonvmx_copy
71
72	addi	r1,r1,STACKFRAMESIZE
73
74	li	r6,16
75	li	r7,32
76	li	r8,48
77	li	r9,64
78	li	r10,80
79	li	r11,96
80	li	r12,112
81
82	.align	5
831:	lvx	v7,0,r4
84	lvx	v6,r4,r6
85	lvx	v5,r4,r7
86	lvx	v4,r4,r8
87	lvx	v3,r4,r9
88	lvx	v2,r4,r10
89	lvx	v1,r4,r11
90	lvx	v0,r4,r12
91	addi	r4,r4,128
92	stvx	v7,0,r3
93	stvx	v6,r3,r6
94	stvx	v5,r3,r7
95	stvx	v4,r3,r8
96	stvx	v3,r3,r9
97	stvx	v2,r3,r10
98	stvx	v1,r3,r11
99	stvx	v0,r3,r12
100	addi	r3,r3,128
101	bdnz	1b
102
103	b	exit_vmx_ops		/* tail call optimise */
104
105#else
106	li	r0,(PAGE_SIZE/128)
107	mtctr	r0
108
109	stdu	r1,-STACKFRAMESIZE(r1)
110#endif
111
112.Lnonvmx_copy:
113	std	r14,STK_REG(R14)(r1)
114	std	r15,STK_REG(R15)(r1)
115	std	r16,STK_REG(R16)(r1)
116	std	r17,STK_REG(R17)(r1)
117	std	r18,STK_REG(R18)(r1)
118	std	r19,STK_REG(R19)(r1)
119	std	r20,STK_REG(R20)(r1)
120
1211:	ld	r0,0(r4)
122	ld	r5,8(r4)
123	ld	r6,16(r4)
124	ld	r7,24(r4)
125	ld	r8,32(r4)
126	ld	r9,40(r4)
127	ld	r10,48(r4)
128	ld	r11,56(r4)
129	ld	r12,64(r4)
130	ld	r14,72(r4)
131	ld	r15,80(r4)
132	ld	r16,88(r4)
133	ld	r17,96(r4)
134	ld	r18,104(r4)
135	ld	r19,112(r4)
136	ld	r20,120(r4)
137	addi	r4,r4,128
138	std	r0,0(r3)
139	std	r5,8(r3)
140	std	r6,16(r3)
141	std	r7,24(r3)
142	std	r8,32(r3)
143	std	r9,40(r3)
144	std	r10,48(r3)
145	std	r11,56(r3)
146	std	r12,64(r3)
147	std	r14,72(r3)
148	std	r15,80(r3)
149	std	r16,88(r3)
150	std	r17,96(r3)
151	std	r18,104(r3)
152	std	r19,112(r3)
153	std	r20,120(r3)
154	addi	r3,r3,128
155	bdnz	1b
156
157	ld	r14,STK_REG(R14)(r1)
158	ld	r15,STK_REG(R15)(r1)
159	ld	r16,STK_REG(R16)(r1)
160	ld	r17,STK_REG(R17)(r1)
161	ld	r18,STK_REG(R18)(r1)
162	ld	r19,STK_REG(R19)(r1)
163	ld	r20,STK_REG(R20)(r1)
164	addi	r1,r1,STACKFRAMESIZE
165	blr
166