xref: /linux/tools/testing/selftests/arm64/fp/za-test.S (revision 694e2803fece8d066bd85ce8607c630ce2b69859)
1// SPDX-License-Identifier: GPL-2.0-only
2// Copyright (C) 2021 ARM Limited.
3// Original author: Mark Brown <broonie@kernel.org>
4//
5// Scalable Matrix Extension ZA context switch test
6// Repeatedly writes unique test patterns into each ZA tile
7// and reads them back to verify integrity.
8//
9// for x in `seq 1 NR_CPUS`; do sve-test & pids=$pids\ $! ; done
10// (leave it running for as long as you want...)
11// kill $pids
12
13#include <asm/unistd.h>
14#include "assembler.h"
15#include "asm-offsets.h"
16#include "sme-inst.h"
17
18.arch_extension sve
19
20#define MAXVL     2048
21#define MAXVL_B   (MAXVL / 8)
22
23// Declare some storage space to shadow ZA register contents and a
24// scratch buffer for a vector.
25.pushsection .text
26.data
27.align 4
28zaref:
29	.space	MAXVL_B * MAXVL_B
30scratch:
31	.space	MAXVL_B
32.popsection
33
34// Trivial memory copy: copy x2 bytes, starting at address x1, to address x0.
35// Clobbers x0-x3
36function memcpy
37	cmp	x2, #0
38	b.eq	1f
390:	ldrb	w3, [x1], #1
40	strb	w3, [x0], #1
41	subs	x2, x2, #1
42	b.ne	0b
431:	ret
44endfunction
45
46// Generate a test pattern for storage in ZA
47// x0: pid
48// x1: row in ZA
49// x2: generation
50
51// These values are used to constuct a 32-bit pattern that is repeated in the
52// scratch buffer as many times as will fit:
53// bits 31:28	generation number (increments once per test_loop)
54// bits 27:16	pid
55// bits 15: 8	row number
56// bits  7: 0	32-bit lane index
57
58function pattern
59	mov	w3, wzr
60	bfi	w3, w0, #16, #12	// PID
61	bfi	w3, w1, #8, #8		// Row
62	bfi	w3, w2, #28, #4		// Generation
63
64	ldr	x0, =scratch
65	mov	w1, #MAXVL_B / 4
66
670:	str	w3, [x0], #4
68	add	w3, w3, #1		// Lane
69	subs	w1, w1, #1
70	b.ne	0b
71
72	ret
73endfunction
74
75// Get the address of shadow data for ZA horizontal vector xn
76.macro _adrza xd, xn, nrtmp
77	ldr	\xd, =zaref
78	rdsvl	\nrtmp, 1
79	madd	\xd, x\nrtmp, \xn, \xd
80.endm
81
82// Set up test pattern in a ZA horizontal vector
83// x0: pid
84// x1: row number
85// x2: generation
86function setup_za
87	mov	x4, x30
88	mov	x12, x1			// Use x12 for vector select
89
90	bl	pattern			// Get pattern in scratch buffer
91	_adrza	x0, x12, 2		// Shadow buffer pointer to x0 and x5
92	mov	x5, x0
93	ldr	x1, =scratch
94	bl	memcpy			// length set up in x2 by _adrza
95
96	_ldr_za 12, 5			// load vector w12 from pointer x5
97
98	ret	x4
99endfunction
100
101// Trivial memory compare: compare x2 bytes starting at address x0 with
102// bytes starting at address x1.
103// Returns only if all bytes match; otherwise, the program is aborted.
104// Clobbers x0-x5.
105function memcmp
106	cbz	x2, 2f
107
108	stp	x0, x1, [sp, #-0x20]!
109	str	x2, [sp, #0x10]
110
111	mov	x5, #0
1120:	ldrb	w3, [x0, x5]
113	ldrb	w4, [x1, x5]
114	add	x5, x5, #1
115	cmp	w3, w4
116	b.ne	1f
117	subs	x2, x2, #1
118	b.ne	0b
119
1201:	ldr	x2, [sp, #0x10]
121	ldp	x0, x1, [sp], #0x20
122	b.ne	barf
123
1242:	ret
125endfunction
126
127// Verify that a ZA vector matches its shadow in memory, else abort
128// x0: row number
129// Clobbers x0-x7 and x12.
130function check_za
131	mov	x3, x30
132
133	mov	x12, x0
134	_adrza	x5, x0, 6		// pointer to expected value in x5
135	mov	x4, x0
136	ldr	x7, =scratch		// x7 is scratch
137
138	mov	x0, x7			// Poison scratch
139	mov	x1, x6
140	bl	memfill_ae
141
142	_str_za 12, 7			// save vector w12 to pointer x7
143
144	mov	x0, x5
145	mov	x1, x7
146	mov	x2, x6
147	mov	x30, x3
148	b	memcmp
149endfunction
150
151// Modify the live SME register state, signal return will undo our changes
152function irritator_handler
153	// Increment the irritation signal count (x23):
154	ldr	x0, [x2, #ucontext_regs + 8 * 23]
155	add	x0, x0, #1
156	str	x0, [x2, #ucontext_regs + 8 * 23]
157
158	// This will reset ZA to all bits 0
159	smstop
160	smstart_za
161
162	ret
163endfunction
164
165function tickle_handler
166	// Increment the signal count (x23):
167	ldr	x0, [x2, #ucontext_regs + 8 * 23]
168	add	x0, x0, #1
169	str	x0, [x2, #ucontext_regs + 8 * 23]
170
171	ret
172endfunction
173
174function terminate_handler
175	mov	w21, w0
176	mov	x20, x2
177
178	puts	"Terminated by signal "
179	mov	w0, w21
180	bl	putdec
181	puts	", no error, iterations="
182	ldr	x0, [x20, #ucontext_regs + 8 * 22]
183	bl	putdec
184	puts	", signals="
185	ldr	x0, [x20, #ucontext_regs + 8 * 23]
186	bl	putdecn
187
188	mov	x0, #0
189	mov	x8, #__NR_exit
190	svc	#0
191endfunction
192
193// w0: signal number
194// x1: sa_action
195// w2: sa_flags
196// Clobbers x0-x6,x8
197function setsignal
198	str	x30, [sp, #-((sa_sz + 15) / 16 * 16 + 16)]!
199
200	mov	w4, w0
201	mov	x5, x1
202	mov	w6, w2
203
204	add	x0, sp, #16
205	mov	x1, #sa_sz
206	bl	memclr
207
208	mov	w0, w4
209	add	x1, sp, #16
210	str	w6, [x1, #sa_flags]
211	str	x5, [x1, #sa_handler]
212	mov	x2, #0
213	mov	x3, #sa_mask_sz
214	mov	x8, #__NR_rt_sigaction
215	svc	#0
216
217	cbz	w0, 1f
218
219	puts	"sigaction failure\n"
220	b	.Labort
221
2221:	ldr	x30, [sp], #((sa_sz + 15) / 16 * 16 + 16)
223	ret
224endfunction
225
226// Main program entry point
227.globl _start
228function _start
229	mov	x23, #0		// signal count
230
231	mov	w0, #SIGINT
232	adr	x1, terminate_handler
233	mov	w2, #SA_SIGINFO
234	bl	setsignal
235
236	mov	w0, #SIGTERM
237	adr	x1, terminate_handler
238	mov	w2, #SA_SIGINFO
239	bl	setsignal
240
241	mov	w0, #SIGUSR1
242	adr	x1, irritator_handler
243	mov	w2, #SA_SIGINFO
244	orr	w2, w2, #SA_NODEFER
245	bl	setsignal
246
247	mov	w0, #SIGUSR2
248	adr	x1, tickle_handler
249	mov	w2, #SA_SIGINFO
250	orr	w2, w2, #SA_NODEFER
251	bl	setsignal
252
253	puts	"Streaming mode "
254	smstart_za
255
256	// Sanity-check and report the vector length
257
258	rdsvl	19, 8
259	cmp	x19, #128
260	b.lo	1f
261	cmp	x19, #2048
262	b.hi	1f
263	tst	x19, #(8 - 1)
264	b.eq	2f
265
2661:	puts	"bad vector length: "
267	mov	x0, x19
268	bl	putdecn
269	b	.Labort
270
2712:	puts	"vector length:\t"
272	mov	x0, x19
273	bl	putdec
274	puts	" bits\n"
275
276	// Obtain our PID, to ensure test pattern uniqueness between processes
277	mov	x8, #__NR_getpid
278	svc	#0
279	mov	x20, x0
280
281	puts	"PID:\t"
282	mov	x0, x20
283	bl	putdecn
284
285	mov	x22, #0		// generation number, increments per iteration
286.Ltest_loop:
287	rdsvl	0, 8
288	cmp	x0, x19
289	b.ne	vl_barf
290
291	rdsvl	21, 1		// Set up ZA & shadow with test pattern
2920:	mov	x0, x20
293	sub	x1, x21, #1
294	mov	x2, x22
295	bl	setup_za
296	subs	x21, x21, #1
297	b.ne	0b
298
299	mov	x8, #__NR_sched_yield	// encourage preemption
3001:
301	svc	#0
302
303	mrs	x0, S3_3_C4_C2_2	// SVCR should have ZA=1,SM=0
304	and	x1, x0, #3
305	cmp	x1, #2
306	b.ne	svcr_barf
307
308	rdsvl	21, 1			// Verify that the data made it through
309	rdsvl	24, 1			// Verify that the data made it through
3100:	sub	x0, x24, x21
311	bl	check_za
312	subs	x21, x21, #1
313	bne	0b
314
315	add	x22, x22, #1	// Everything still working
316	b	.Ltest_loop
317
318.Labort:
319	mov	x0, #0
320	mov	x1, #SIGABRT
321	mov	x8, #__NR_kill
322	svc	#0
323endfunction
324
325function barf
326// fpsimd.c acitivty log dump hack
327//	ldr	w0, =0xdeadc0de
328//	mov	w8, #__NR_exit
329//	svc	#0
330// end hack
331
332	mrs	x13, S3_3_C4_C2_2
333
334	smstop
335	mov	x10, x0	// expected data
336	mov	x11, x1	// actual data
337	mov	x12, x2	// data size
338
339	puts	"Mismatch: PID="
340	mov	x0, x20
341	bl	putdec
342	puts	", iteration="
343	mov	x0, x22
344	bl	putdec
345	puts	", row="
346	mov	x0, x21
347	bl	putdecn
348	puts	"\tExpected ["
349	mov	x0, x10
350	mov	x1, x12
351	bl	dumphex
352	puts	"]\n\tGot      ["
353	mov	x0, x11
354	mov	x1, x12
355	bl	dumphex
356	puts	"]\n"
357	puts	"\tSVCR: "
358	mov	x0, x13
359	bl	putdecn
360
361	mov	x8, #__NR_getpid
362	svc	#0
363// fpsimd.c acitivty log dump hack
364//	ldr	w0, =0xdeadc0de
365//	mov	w8, #__NR_exit
366//	svc	#0
367// ^ end of hack
368	mov	x1, #SIGABRT
369	mov	x8, #__NR_kill
370	svc	#0
371//	mov	x8, #__NR_exit
372//	mov	x1, #1
373//	svc	#0
374endfunction
375
376function vl_barf
377	mov	x10, x0
378
379	puts	"Bad active VL: "
380	mov	x0, x10
381	bl	putdecn
382
383	mov	x8, #__NR_exit
384	mov	x1, #1
385	svc	#0
386endfunction
387
388function svcr_barf
389	mov	x10, x0
390
391	puts	"Bad SVCR: "
392	mov	x0, x10
393	bl	putdecn
394
395	mov	x8, #__NR_exit
396	mov	x1, #1
397	svc	#0
398endfunction
399