xref: /linux/arch/powerpc/lib/copypage_power7.S (revision e3b9f1e81de2083f359bacd2a94bf1c024f2ede0)
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License as published by
4 * the Free Software Foundation; either version 2 of the License, or
5 * (at your option) any later version.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * along with this program; if not, write to the Free Software
14 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
15 *
16 * Copyright (C) IBM Corporation, 2012
17 *
18 * Author: Anton Blanchard <anton@au.ibm.com>
19 */
20#include <asm/page.h>
21#include <asm/ppc_asm.h>
22
23_GLOBAL(copypage_power7)
24	/*
25	 * We prefetch both the source and destination using enhanced touch
26	 * instructions. We use a stream ID of 0 for the load side and
27	 * 1 for the store side. Since source and destination are page
28	 * aligned we don't need to clear the bottom 7 bits of either
29	 * address.
30	 */
31	ori	r9,r3,1		/* stream=1 => to */
32
33#ifdef CONFIG_PPC_64K_PAGES
34	lis	r7,0x0E01	/* depth=7
35				 * units/cachelines=512 */
36#else
37	lis	r7,0x0E00	/* depth=7 */
38	ori	r7,r7,0x1000	/* units/cachelines=32 */
39#endif
40	ori	r10,r7,1	/* stream=1 */
41
42	lis	r8,0x8000	/* GO=1 */
43	clrldi	r8,r8,32
44
45.machine push
46.machine "power4"
47	/* setup read stream 0  */
48	dcbt	0,r4,0b01000  	/* addr from */
49	dcbt	0,r7,0b01010   /* length and depth from */
50	/* setup write stream 1 */
51	dcbtst	0,r9,0b01000   /* addr to */
52	dcbtst	0,r10,0b01010  /* length and depth to */
53	eieio
54	dcbt	0,r8,0b01010	/* all streams GO */
55.machine pop
56
57#ifdef CONFIG_ALTIVEC
58	mflr	r0
59	std	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
60	std	r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
61	std	r0,16(r1)
62	stdu	r1,-STACKFRAMESIZE(r1)
63	bl	enter_vmx_copy
64	cmpwi	r3,0
65	ld	r0,STACKFRAMESIZE+16(r1)
66	ld	r3,STK_REG(R31)(r1)
67	ld	r4,STK_REG(R30)(r1)
68	mtlr	r0
69
70	li	r0,(PAGE_SIZE/128)
71	mtctr	r0
72
73	beq	.Lnonvmx_copy
74
75	addi	r1,r1,STACKFRAMESIZE
76
77	li	r6,16
78	li	r7,32
79	li	r8,48
80	li	r9,64
81	li	r10,80
82	li	r11,96
83	li	r12,112
84
85	.align	5
861:	lvx	v7,0,r4
87	lvx	v6,r4,r6
88	lvx	v5,r4,r7
89	lvx	v4,r4,r8
90	lvx	v3,r4,r9
91	lvx	v2,r4,r10
92	lvx	v1,r4,r11
93	lvx	v0,r4,r12
94	addi	r4,r4,128
95	stvx	v7,0,r3
96	stvx	v6,r3,r6
97	stvx	v5,r3,r7
98	stvx	v4,r3,r8
99	stvx	v3,r3,r9
100	stvx	v2,r3,r10
101	stvx	v1,r3,r11
102	stvx	v0,r3,r12
103	addi	r3,r3,128
104	bdnz	1b
105
106	b	exit_vmx_copy		/* tail call optimise */
107
108#else
109	li	r0,(PAGE_SIZE/128)
110	mtctr	r0
111
112	stdu	r1,-STACKFRAMESIZE(r1)
113#endif
114
115.Lnonvmx_copy:
116	std	r14,STK_REG(R14)(r1)
117	std	r15,STK_REG(R15)(r1)
118	std	r16,STK_REG(R16)(r1)
119	std	r17,STK_REG(R17)(r1)
120	std	r18,STK_REG(R18)(r1)
121	std	r19,STK_REG(R19)(r1)
122	std	r20,STK_REG(R20)(r1)
123
1241:	ld	r0,0(r4)
125	ld	r5,8(r4)
126	ld	r6,16(r4)
127	ld	r7,24(r4)
128	ld	r8,32(r4)
129	ld	r9,40(r4)
130	ld	r10,48(r4)
131	ld	r11,56(r4)
132	ld	r12,64(r4)
133	ld	r14,72(r4)
134	ld	r15,80(r4)
135	ld	r16,88(r4)
136	ld	r17,96(r4)
137	ld	r18,104(r4)
138	ld	r19,112(r4)
139	ld	r20,120(r4)
140	addi	r4,r4,128
141	std	r0,0(r3)
142	std	r5,8(r3)
143	std	r6,16(r3)
144	std	r7,24(r3)
145	std	r8,32(r3)
146	std	r9,40(r3)
147	std	r10,48(r3)
148	std	r11,56(r3)
149	std	r12,64(r3)
150	std	r14,72(r3)
151	std	r15,80(r3)
152	std	r16,88(r3)
153	std	r17,96(r3)
154	std	r18,104(r3)
155	std	r19,112(r3)
156	std	r20,120(r3)
157	addi	r3,r3,128
158	bdnz	1b
159
160	ld	r14,STK_REG(R14)(r1)
161	ld	r15,STK_REG(R15)(r1)
162	ld	r16,STK_REG(R16)(r1)
163	ld	r17,STK_REG(R17)(r1)
164	ld	r18,STK_REG(R18)(r1)
165	ld	r19,STK_REG(R19)(r1)
166	ld	r20,STK_REG(R20)(r1)
167	addi	r1,r1,STACKFRAMESIZE
168	blr
169