xref: /linux/tools/testing/selftests/arm64/fp/sve-test.S (revision 02091cbe9cc4f18167208eec1d6de636cc731817)
1// SPDX-License-Identifier: GPL-2.0-only
2// Copyright (C) 2015-2019 ARM Limited.
3// Original author: Dave Martin <Dave.Martin@arm.com>
4//
5// Simple Scalable Vector Extension context switch test
6// Repeatedly writes unique test patterns into each SVE register
7// and reads them back to verify integrity.
8//
9// for x in `seq 1 NR_CPUS`; do sve-test & pids=$pids\ $! ; done
10// (leave it running for as long as you want...)
11// kill $pids
12
13#include <asm/unistd.h>
14#include "assembler.h"
15#include "asm-offsets.h"
16#include "sme-inst.h"
17
18#define NZR	32
19#define NPR	16
20#define MAXVL_B	(2048 / 8)
21
22.arch_extension sve
23
24.macro _sve_ldr_v zt, xn
25	ldr	z\zt, [x\xn]
26.endm
27
28.macro _sve_str_v zt, xn
29	str	z\zt, [x\xn]
30.endm
31
32.macro _sve_ldr_p pt, xn
33	ldr	p\pt, [x\xn]
34.endm
35
36.macro _sve_str_p pt, xn
37	str	p\pt, [x\xn]
38.endm
39
40// Generate accessor functions to read/write programmatically selected
41// SVE registers.
42// x0 is the register index to access
43// x1 is the memory address to read from (getz,setp) or store to (setz,setp)
44// All clobber x0-x2
45define_accessor setz, NZR, _sve_ldr_v
46define_accessor getz, NZR, _sve_str_v
47define_accessor setp, NPR, _sve_ldr_p
48define_accessor getp, NPR, _sve_str_p
49
50// Declare some storate space to shadow the SVE register contents:
51.pushsection .text
52.data
53.align 4
54zref:
55	.space	MAXVL_B * NZR
56pref:
57	.space	MAXVL_B / 8 * NPR
58ffrref:
59	.space	MAXVL_B / 8
60scratch:
61	.space	MAXVL_B
62.popsection
63
64// Generate a test pattern for storage in SVE registers
65// x0: pid	(16 bits)
66// x1: register number (6 bits)
67// x2: generation (4 bits)
68
69// These values are used to constuct a 32-bit pattern that is repeated in the
70// scratch buffer as many times as will fit:
71// bits 31:28	generation number (increments once per test_loop)
72// bits 27:22	32-bit lane index
73// bits 21:16	register number
74// bits 15: 0	pid
75
76function pattern
77	orr	w1, w0, w1, lsl #16
78	orr	w2, w1, w2, lsl #28
79
80	ldr	x0, =scratch
81	mov	w1, #MAXVL_B / 4
82
830:	str	w2, [x0], #4
84	add	w2, w2, #(1 << 22)
85	subs	w1, w1, #1
86	bne	0b
87
88	ret
89endfunction
90
91// Get the address of shadow data for SVE Z-register Z<xn>
92.macro _adrz xd, xn, nrtmp
93	ldr	\xd, =zref
94	rdvl	x\nrtmp, #1
95	madd	\xd, x\nrtmp, \xn, \xd
96.endm
97
98// Get the address of shadow data for SVE P-register P<xn - NZR>
99.macro _adrp xd, xn, nrtmp
100	ldr	\xd, =pref
101	rdvl	x\nrtmp, #1
102	lsr	x\nrtmp, x\nrtmp, #3
103	sub	\xn, \xn, #NZR
104	madd	\xd, x\nrtmp, \xn, \xd
105.endm
106
107// Set up test pattern in a SVE Z-register
108// x0: pid
109// x1: register number
110// x2: generation
111function setup_zreg
112	mov	x4, x30
113
114	mov	x6, x1
115	bl	pattern
116	_adrz	x0, x6, 2
117	mov	x5, x0
118	ldr	x1, =scratch
119	bl	memcpy
120
121	mov	x0, x6
122	mov	x1, x5
123	bl	setz
124
125	ret	x4
126endfunction
127
128// Set up test pattern in a SVE P-register
129// x0: pid
130// x1: register number
131// x2: generation
132function setup_preg
133	mov	x4, x30
134
135	mov	x6, x1
136	bl	pattern
137	_adrp	x0, x6, 2
138	mov	x5, x0
139	ldr	x1, =scratch
140	bl	memcpy
141
142	mov	x0, x6
143	mov	x1, x5
144	bl	setp
145
146	ret	x4
147endfunction
148
149// Set up test pattern in the FFR
150// x0: pid
151// x2: generation
152//
153// We need to generate a canonical FFR value, which consists of a number of
154// low "1" bits, followed by a number of zeros. This gives us 17 unique values
155// per 16 bits of FFR, so we create a 4 bit signature out of the PID and
156// generation, and use that as the initial number of ones in the pattern.
157// We fill the upper lanes of FFR with zeros.
158// Beware: corrupts P0.
159function setup_ffr
160#ifndef SSVE
161	mov	x4, x30
162
163	and	w0, w0, #0x3
164	bfi	w0, w2, #2, #2
165	mov	w1, #1
166	lsl	w1, w1, w0
167	sub	w1, w1, #1
168
169	ldr	x0, =ffrref
170	strh	w1, [x0], 2
171	rdvl	x1, #1
172	lsr	x1, x1, #3
173	sub	x1, x1, #2
174	bl	memclr
175
176	mov	x0, #0
177	ldr	x1, =ffrref
178	bl	setp
179
180	wrffr	p0.b
181
182	ret	x4
183#else
184	ret
185#endif
186endfunction
187
188// Trivial memory compare: compare x2 bytes starting at address x0 with
189// bytes starting at address x1.
190// Returns only if all bytes match; otherwise, the program is aborted.
191// Clobbers x0-x5.
192function memcmp
193	cbz	x2, 2f
194
195	stp	x0, x1, [sp, #-0x20]!
196	str	x2, [sp, #0x10]
197
198	mov	x5, #0
1990:	ldrb	w3, [x0, x5]
200	ldrb	w4, [x1, x5]
201	add	x5, x5, #1
202	cmp	w3, w4
203	b.ne	1f
204	subs	x2, x2, #1
205	b.ne	0b
206
2071:	ldr	x2, [sp, #0x10]
208	ldp	x0, x1, [sp], #0x20
209	b.ne	barf
210
2112:	ret
212endfunction
213
214// Verify that a SVE Z-register matches its shadow in memory, else abort
215// x0: reg number
216// Clobbers x0-x7.
217function check_zreg
218	mov	x3, x30
219
220	_adrz	x5, x0, 6
221	mov	x4, x0
222	ldr	x7, =scratch
223
224	mov	x0, x7
225	mov	x1, x6
226	bl	memfill_ae
227
228	mov	x0, x4
229	mov	x1, x7
230	bl	getz
231
232	mov	x0, x5
233	mov	x1, x7
234	mov	x2, x6
235	mov	x30, x3
236	b	memcmp
237endfunction
238
239// Verify that a SVE P-register matches its shadow in memory, else abort
240// x0: reg number
241// Clobbers x0-x7.
242function check_preg
243	mov	x3, x30
244
245	_adrp	x5, x0, 6
246	mov	x4, x0
247	ldr	x7, =scratch
248
249	mov	x0, x7
250	mov	x1, x6
251	bl	memfill_ae
252
253	mov	x0, x4
254	mov	x1, x7
255	bl	getp
256
257	mov	x0, x5
258	mov	x1, x7
259	mov	x2, x6
260	mov	x30, x3
261	b	memcmp
262endfunction
263
264// Verify that the FFR matches its shadow in memory, else abort
265// Beware -- corrupts P0.
266// Clobbers x0-x5.
267function check_ffr
268#ifndef SSVE
269	mov	x3, x30
270
271	ldr	x4, =scratch
272	rdvl	x5, #1
273	lsr	x5, x5, #3
274
275	mov	x0, x4
276	mov	x1, x5
277	bl	memfill_ae
278
279	rdffr	p0.b
280	mov	x0, #0
281	mov	x1, x4
282	bl	getp
283
284	ldr	x0, =ffrref
285	mov	x1, x4
286	mov	x2, x5
287	mov	x30, x3
288	b	memcmp
289#else
290	ret
291#endif
292endfunction
293
294// Any SVE register modified here can cause corruption in the main
295// thread -- but *only* the registers modified here.
296function irritator_handler
297	// Increment the irritation signal count (x23):
298	ldr	x0, [x2, #ucontext_regs + 8 * 23]
299	add	x0, x0, #1
300	str	x0, [x2, #ucontext_regs + 8 * 23]
301
302	// Corrupt some random Z-regs
303	adr	x0, .text + (irritator_handler - .text) / 16 * 16
304	movi	v0.8b, #1
305	movi	v9.16b, #2
306	movi	v31.8b, #3
307#ifndef SSVE
308	// And P0
309	rdffr	p0.b
310	// And FFR
311	wrffr	p15.b
312#endif
313
314	ret
315endfunction
316
317function tickle_handler
318	// Increment the signal count (x23):
319	ldr	x0, [x2, #ucontext_regs + 8 * 23]
320	add	x0, x0, #1
321	str	x0, [x2, #ucontext_regs + 8 * 23]
322
323	ret
324endfunction
325
326function terminate_handler
327	mov	w21, w0
328	mov	x20, x2
329
330	puts	"Terminated by signal "
331	mov	w0, w21
332	bl	putdec
333	puts	", no error, iterations="
334	ldr	x0, [x20, #ucontext_regs + 8 * 22]
335	bl	putdec
336	puts	", signals="
337	ldr	x0, [x20, #ucontext_regs + 8 * 23]
338	bl	putdecn
339
340	mov	x0, #0
341	mov	x8, #__NR_exit
342	svc	#0
343endfunction
344
345// w0: signal number
346// x1: sa_action
347// w2: sa_flags
348// Clobbers x0-x6,x8
349function setsignal
350	str	x30, [sp, #-((sa_sz + 15) / 16 * 16 + 16)]!
351
352	mov	w4, w0
353	mov	x5, x1
354	mov	w6, w2
355
356	add	x0, sp, #16
357	mov	x1, #sa_sz
358	bl	memclr
359
360	mov	w0, w4
361	add	x1, sp, #16
362	str	w6, [x1, #sa_flags]
363	str	x5, [x1, #sa_handler]
364	mov	x2, #0
365	mov	x3, #sa_mask_sz
366	mov	x8, #__NR_rt_sigaction
367	svc	#0
368
369	cbz	w0, 1f
370
371	puts	"sigaction failure\n"
372	b	.Labort
373
3741:	ldr	x30, [sp], #((sa_sz + 15) / 16 * 16 + 16)
375	ret
376endfunction
377
378// Main program entry point
379.globl _start
380function _start
381	mov	x23, #0		// Irritation signal count
382
383	mov	w0, #SIGINT
384	adr	x1, terminate_handler
385	mov	w2, #SA_SIGINFO
386	bl	setsignal
387
388	mov	w0, #SIGTERM
389	adr	x1, terminate_handler
390	mov	w2, #SA_SIGINFO
391	bl	setsignal
392
393	mov	w0, #SIGUSR1
394	adr	x1, irritator_handler
395	mov	w2, #SA_SIGINFO
396	orr	w2, w2, #SA_NODEFER
397	bl	setsignal
398
399	mov	w0, #SIGUSR2
400	adr	x1, tickle_handler
401	mov	w2, #SA_SIGINFO
402	orr	w2, w2, #SA_NODEFER
403	bl	setsignal
404
405#ifdef SSVE
406	puts	"Streaming mode "
407	smstart_sm
408#endif
409
410	// Sanity-check and report the vector length
411
412	rdvl	x19, #8
413	cmp	x19, #128
414	b.lo	1f
415	cmp	x19, #2048
416	b.hi	1f
417	tst	x19, #(8 - 1)
418	b.eq	2f
419
4201:	puts	"Bad vector length: "
421	mov	x0, x19
422	bl	putdecn
423	b	.Labort
424
4252:	puts	"Vector length:\t"
426	mov	x0, x19
427	bl	putdec
428	puts	" bits\n"
429
430	// Obtain our PID, to ensure test pattern uniqueness between processes
431
432	mov	x8, #__NR_getpid
433	svc	#0
434	mov	x20, x0
435
436	puts	"PID:\t"
437	mov	x0, x20
438	bl	putdecn
439
440#ifdef SSVE
441	smstart_sm		// syscalls will have exited streaming mode
442#endif
443
444	mov	x22, #0		// generation number, increments per iteration
445.Ltest_loop:
446	rdvl	x0, #8
447	cmp	x0, x19
448	b.ne	vl_barf
449
450	mov	x21, #0		// Set up Z-regs & shadow with test pattern
4510:	mov	x0, x20
452	mov	x1, x21
453	and	x2, x22, #0xf
454	bl	setup_zreg
455	add	x21, x21, #1
456	cmp	x21, #NZR
457	b.lo	0b
458
459	mov	x0, x20		// Set up FFR & shadow with test pattern
460	mov	x1, #NZR + NPR
461	and	x2, x22, #0xf
462	bl	setup_ffr
463
4640:	mov	x0, x20		// Set up P-regs & shadow with test pattern
465	mov	x1, x21
466	and	x2, x22, #0xf
467	bl	setup_preg
468	add	x21, x21, #1
469	cmp	x21, #NZR + NPR
470	b.lo	0b
471
472// Can't do this when SVE state is volatile across SVC:
473//	mov	x8, #__NR_sched_yield	// Encourage preemption
474//	svc	#0
475
476	mov	x21, #0
4770:	mov	x0, x21
478	bl	check_zreg
479	add	x21, x21, #1
480	cmp	x21, #NZR
481	b.lo	0b
482
4830:	mov	x0, x21
484	bl	check_preg
485	add	x21, x21, #1
486	cmp	x21, #NZR + NPR
487	b.lo	0b
488
489	bl	check_ffr
490
491	add	x22, x22, #1
492	b	.Ltest_loop
493
494.Labort:
495	mov	x0, #0
496	mov	x1, #SIGABRT
497	mov	x8, #__NR_kill
498	svc	#0
499endfunction
500
501function barf
502// fpsimd.c acitivty log dump hack
503//	ldr	w0, =0xdeadc0de
504//	mov	w8, #__NR_exit
505//	svc	#0
506// end hack
507	mov	x10, x0	// expected data
508	mov	x11, x1	// actual data
509	mov	x12, x2	// data size
510
511	puts	"Mismatch: PID="
512	mov	x0, x20
513	bl	putdec
514	puts	", iteration="
515	mov	x0, x22
516	bl	putdec
517	puts	", reg="
518	mov	x0, x21
519	bl	putdecn
520	puts	"\tExpected ["
521	mov	x0, x10
522	mov	x1, x12
523	bl	dumphex
524	puts	"]\n\tGot      ["
525	mov	x0, x11
526	mov	x1, x12
527	bl	dumphex
528	puts	"]\n"
529
530	mov	x8, #__NR_getpid
531	svc	#0
532// fpsimd.c acitivty log dump hack
533//	ldr	w0, =0xdeadc0de
534//	mov	w8, #__NR_exit
535//	svc	#0
536// ^ end of hack
537	mov	x1, #SIGABRT
538	mov	x8, #__NR_kill
539	svc	#0
540//	mov	x8, #__NR_exit
541//	mov	x1, #1
542//	svc	#0
543endfunction
544
545function vl_barf
546	mov	x10, x0
547
548	puts	"Bad active VL: "
549	mov	x0, x10
550	bl	putdecn
551
552	mov	x8, #__NR_exit
553	mov	x1, #1
554	svc	#0
555endfunction
556