xref: /linux/arch/powerpc/lib/copypage_power7.S (revision 4b132aacb0768ac1e652cf517097ea6f237214b9)
1/* SPDX-License-Identifier: GPL-2.0-or-later */
2/*
3 *
4 * Copyright (C) IBM Corporation, 2012
5 *
6 * Author: Anton Blanchard <anton@au.ibm.com>
7 */
8#include <asm/page.h>
9#include <asm/ppc_asm.h>
10
11_GLOBAL(copypage_power7)
12	/*
13	 * We prefetch both the source and destination using enhanced touch
14	 * instructions. We use a stream ID of 0 for the load side and
15	 * 1 for the store side. Since source and destination are page
16	 * aligned we don't need to clear the bottom 7 bits of either
17	 * address.
18	 */
19	ori	r9,r3,1		/* stream=1 => to */
20
21#ifdef CONFIG_PPC_64K_PAGES
22	lis	r7,0x0E01	/* depth=7
23				 * units/cachelines=512 */
24#else
25	lis	r7,0x0E00	/* depth=7 */
26	ori	r7,r7,0x1000	/* units/cachelines=32 */
27#endif
28	ori	r10,r7,1	/* stream=1 */
29
30	DCBT_SETUP_STREAMS(r4, r7, r9, r10, r8)
31
32#ifdef CONFIG_ALTIVEC
33	mflr	r0
34	std	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
35	std	r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
36	std	r0,16(r1)
37	stdu	r1,-STACKFRAMESIZE(r1)
38	bl	CFUNC(enter_vmx_ops)
39	cmpwi	r3,0
40	ld	r0,STACKFRAMESIZE+16(r1)
41	ld	r3,STK_REG(R31)(r1)
42	ld	r4,STK_REG(R30)(r1)
43	mtlr	r0
44
45	li	r0,(PAGE_SIZE/128)
46	mtctr	r0
47
48	beq	.Lnonvmx_copy
49
50	addi	r1,r1,STACKFRAMESIZE
51
52	li	r6,16
53	li	r7,32
54	li	r8,48
55	li	r9,64
56	li	r10,80
57	li	r11,96
58	li	r12,112
59
60	.align	5
611:	lvx	v7,0,r4
62	lvx	v6,r4,r6
63	lvx	v5,r4,r7
64	lvx	v4,r4,r8
65	lvx	v3,r4,r9
66	lvx	v2,r4,r10
67	lvx	v1,r4,r11
68	lvx	v0,r4,r12
69	addi	r4,r4,128
70	stvx	v7,0,r3
71	stvx	v6,r3,r6
72	stvx	v5,r3,r7
73	stvx	v4,r3,r8
74	stvx	v3,r3,r9
75	stvx	v2,r3,r10
76	stvx	v1,r3,r11
77	stvx	v0,r3,r12
78	addi	r3,r3,128
79	bdnz	1b
80
81	b	CFUNC(exit_vmx_ops)		/* tail call optimise */
82
83#else
84	li	r0,(PAGE_SIZE/128)
85	mtctr	r0
86
87	stdu	r1,-STACKFRAMESIZE(r1)
88#endif
89
90.Lnonvmx_copy:
91	std	r14,STK_REG(R14)(r1)
92	std	r15,STK_REG(R15)(r1)
93	std	r16,STK_REG(R16)(r1)
94	std	r17,STK_REG(R17)(r1)
95	std	r18,STK_REG(R18)(r1)
96	std	r19,STK_REG(R19)(r1)
97	std	r20,STK_REG(R20)(r1)
98
991:	ld	r0,0(r4)
100	ld	r5,8(r4)
101	ld	r6,16(r4)
102	ld	r7,24(r4)
103	ld	r8,32(r4)
104	ld	r9,40(r4)
105	ld	r10,48(r4)
106	ld	r11,56(r4)
107	ld	r12,64(r4)
108	ld	r14,72(r4)
109	ld	r15,80(r4)
110	ld	r16,88(r4)
111	ld	r17,96(r4)
112	ld	r18,104(r4)
113	ld	r19,112(r4)
114	ld	r20,120(r4)
115	addi	r4,r4,128
116	std	r0,0(r3)
117	std	r5,8(r3)
118	std	r6,16(r3)
119	std	r7,24(r3)
120	std	r8,32(r3)
121	std	r9,40(r3)
122	std	r10,48(r3)
123	std	r11,56(r3)
124	std	r12,64(r3)
125	std	r14,72(r3)
126	std	r15,80(r3)
127	std	r16,88(r3)
128	std	r17,96(r3)
129	std	r18,104(r3)
130	std	r19,112(r3)
131	std	r20,120(r3)
132	addi	r3,r3,128
133	bdnz	1b
134
135	ld	r14,STK_REG(R14)(r1)
136	ld	r15,STK_REG(R15)(r1)
137	ld	r16,STK_REG(R16)(r1)
138	ld	r17,STK_REG(R17)(r1)
139	ld	r18,STK_REG(R18)(r1)
140	ld	r19,STK_REG(R19)(r1)
141	ld	r20,STK_REG(R20)(r1)
142	addi	r1,r1,STACKFRAMESIZE
143	blr
144