xref: /linux/tools/testing/selftests/powerpc/math/vmx_asm.S (revision 0883c2c06fb5bcf5b9e008270827e63c09a88c1e)
1/*
2 * Copyright 2015, Cyril Bur, IBM Corp.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 */
9
10#include "../basic_asm.h"
11
12# POS MUST BE 16 ALIGNED!
13#define PUSH_VMX(pos,reg) \
14	li	reg,pos; \
15	stvx	v20,reg,sp; \
16	addi	reg,reg,16; \
17	stvx	v21,reg,sp; \
18	addi	reg,reg,16; \
19	stvx	v22,reg,sp; \
20	addi	reg,reg,16; \
21	stvx	v23,reg,sp; \
22	addi	reg,reg,16; \
23	stvx	v24,reg,sp; \
24	addi	reg,reg,16; \
25	stvx	v25,reg,sp; \
26	addi	reg,reg,16; \
27	stvx	v26,reg,sp; \
28	addi	reg,reg,16; \
29	stvx	v27,reg,sp; \
30	addi	reg,reg,16; \
31	stvx	v28,reg,sp; \
32	addi	reg,reg,16; \
33	stvx	v29,reg,sp; \
34	addi	reg,reg,16; \
35	stvx	v30,reg,sp; \
36	addi	reg,reg,16; \
37	stvx	v31,reg,sp;
38
39# POS MUST BE 16 ALIGNED!
40#define POP_VMX(pos,reg) \
41	li	reg,pos; \
42	lvx	v20,reg,sp; \
43	addi	reg,reg,16; \
44	lvx	v21,reg,sp; \
45	addi	reg,reg,16; \
46	lvx	v22,reg,sp; \
47	addi	reg,reg,16; \
48	lvx	v23,reg,sp; \
49	addi	reg,reg,16; \
50	lvx	v24,reg,sp; \
51	addi	reg,reg,16; \
52	lvx	v25,reg,sp; \
53	addi	reg,reg,16; \
54	lvx	v26,reg,sp; \
55	addi	reg,reg,16; \
56	lvx	v27,reg,sp; \
57	addi	reg,reg,16; \
58	lvx	v28,reg,sp; \
59	addi	reg,reg,16; \
60	lvx	v29,reg,sp; \
61	addi	reg,reg,16; \
62	lvx	v30,reg,sp; \
63	addi	reg,reg,16; \
64	lvx	v31,reg,sp;
65
66# Carefull this will 'clobber' vmx (by design)
67# Don't call this from C
68FUNC_START(load_vmx)
69	li	r5,0
70	lvx	v20,r5,r3
71	addi	r5,r5,16
72	lvx	v21,r5,r3
73	addi	r5,r5,16
74	lvx	v22,r5,r3
75	addi	r5,r5,16
76	lvx	v23,r5,r3
77	addi	r5,r5,16
78	lvx	v24,r5,r3
79	addi	r5,r5,16
80	lvx	v25,r5,r3
81	addi	r5,r5,16
82	lvx	v26,r5,r3
83	addi	r5,r5,16
84	lvx	v27,r5,r3
85	addi	r5,r5,16
86	lvx	v28,r5,r3
87	addi	r5,r5,16
88	lvx	v29,r5,r3
89	addi	r5,r5,16
90	lvx	v30,r5,r3
91	addi	r5,r5,16
92	lvx	v31,r5,r3
93	blr
94FUNC_END(load_vmx)
95
96# Should be safe from C, only touches r4, r5 and v0,v1,v2
97FUNC_START(check_vmx)
98	PUSH_BASIC_STACK(32)
99	mr r4,r3
100	li	r3,1 # assume a bad result
101	li	r5,0
102	lvx	v0,r5,r4
103	vcmpequd.	v1,v0,v20
104	vmr	v2,v1
105
106	addi	r5,r5,16
107	lvx	v0,r5,r4
108	vcmpequd.	v1,v0,v21
109	vand	v2,v2,v1
110
111	addi	r5,r5,16
112	lvx	v0,r5,r4
113	vcmpequd.	v1,v0,v22
114	vand	v2,v2,v1
115
116	addi	r5,r5,16
117	lvx	v0,r5,r4
118	vcmpequd.	v1,v0,v23
119	vand	v2,v2,v1
120
121	addi	r5,r5,16
122	lvx	v0,r5,r4
123	vcmpequd.	v1,v0,v24
124	vand	v2,v2,v1
125
126	addi	r5,r5,16
127	lvx	v0,r5,r4
128	vcmpequd.	v1,v0,v25
129	vand	v2,v2,v1
130
131	addi	r5,r5,16
132	lvx	v0,r5,r4
133	vcmpequd.	v1,v0,v26
134	vand	v2,v2,v1
135
136	addi	r5,r5,16
137	lvx	v0,r5,r4
138	vcmpequd.	v1,v0,v27
139	vand	v2,v2,v1
140
141	addi	r5,r5,16
142	lvx	v0,r5,r4
143	vcmpequd.	v1,v0,v28
144	vand	v2,v2,v1
145
146	addi	r5,r5,16
147	lvx	v0,r5,r4
148	vcmpequd.	v1,v0,v29
149	vand	v2,v2,v1
150
151	addi	r5,r5,16
152	lvx	v0,r5,r4
153	vcmpequd.	v1,v0,v30
154	vand	v2,v2,v1
155
156	addi	r5,r5,16
157	lvx	v0,r5,r4
158	vcmpequd.	v1,v0,v31
159	vand	v2,v2,v1
160
161	li	r5,STACK_FRAME_LOCAL(0,0)
162	stvx	v2,r5,sp
163	ldx	r0,r5,sp
164	cmpdi	r0,0xffffffffffffffff
165	bne	1f
166	li	r3,0
1671:	POP_BASIC_STACK(32)
168	blr
169FUNC_END(check_vmx)
170
171# Safe from C
172FUNC_START(test_vmx)
173	# r3 holds pointer to where to put the result of fork
174	# r4 holds pointer to the pid
175	# v20-v31 are non-volatile
176	PUSH_BASIC_STACK(512)
177	std	r3,STACK_FRAME_PARAM(0)(sp) # Address of varray
178	std r4,STACK_FRAME_PARAM(1)(sp) # address of pid
179	PUSH_VMX(STACK_FRAME_LOCAL(2,0),r4)
180
181	bl load_vmx
182	nop
183
184	li	r0,__NR_fork
185	sc
186	# Pass the result of fork back to the caller
187	ld	r9,STACK_FRAME_PARAM(1)(sp)
188	std	r3,0(r9)
189
190	ld r3,STACK_FRAME_PARAM(0)(sp)
191	bl check_vmx
192	nop
193
194	POP_VMX(STACK_FRAME_LOCAL(2,0),r4)
195	POP_BASIC_STACK(512)
196	blr
197FUNC_END(test_vmx)
198
199# int preempt_vmx(vector int *varray, int *threads_starting, int *running)
200# On starting will (atomically) decrement threads_starting as a signal that
201# the VMX have been loaded with varray. Will proceed to check the validity of
202# the VMX registers while running is not zero.
203FUNC_START(preempt_vmx)
204	PUSH_BASIC_STACK(512)
205	std r3,STACK_FRAME_PARAM(0)(sp) # vector int *varray
206	std r4,STACK_FRAME_PARAM(1)(sp) # int *threads_starting
207	std r5,STACK_FRAME_PARAM(2)(sp) # int *running
208	# VMX need to write to 16 byte aligned addresses, skip STACK_FRAME_LOCAL(3,0)
209	PUSH_VMX(STACK_FRAME_LOCAL(4,0),r4)
210
211	bl load_vmx
212	nop
213
214	sync
215	# Atomic DEC
216	ld r3,STACK_FRAME_PARAM(1)(sp)
2171:	lwarx r4,0,r3
218	addi r4,r4,-1
219	stwcx. r4,0,r3
220	bne- 1b
221
2222:	ld r3,STACK_FRAME_PARAM(0)(sp)
223	bl check_vmx
224	nop
225	cmpdi r3,0
226	bne 3f
227	ld r4,STACK_FRAME_PARAM(2)(sp)
228	ld r5,0(r4)
229	cmpwi r5,0
230	bne 2b
231
2323:	POP_VMX(STACK_FRAME_LOCAL(4,0),r4)
233	POP_BASIC_STACK(512)
234	blr
235FUNC_END(preempt_vmx)
236