xref: /linux/tools/testing/selftests/arm64/fp/sve-test.S (revision 172cdcaefea5c297fdb3d20b7d5aff60ae4fbce6)
1// SPDX-License-Identifier: GPL-2.0-only
2// Copyright (C) 2015-2019 ARM Limited.
3// Original author: Dave Martin <Dave.Martin@arm.com>
4//
5// Simple Scalable Vector Extension context switch test
6// Repeatedly writes unique test patterns into each SVE register
7// and reads them back to verify integrity.
8//
9// for x in `seq 1 NR_CPUS`; do sve-test & pids=$pids\ $! ; done
10// (leave it running for as long as you want...)
11// kill $pids
12
13#include <asm/unistd.h>
14#include "assembler.h"
15#include "asm-offsets.h"
16
17#define NZR	32
18#define NPR	16
19#define MAXVL_B	(2048 / 8)
20
21.arch_extension sve
22
23.macro _sve_ldr_v zt, xn
24	ldr	z\zt, [x\xn]
25.endm
26
27.macro _sve_str_v zt, xn
28	str	z\zt, [x\xn]
29.endm
30
31.macro _sve_ldr_p pt, xn
32	ldr	p\pt, [x\xn]
33.endm
34
35.macro _sve_str_p pt, xn
36	str	p\pt, [x\xn]
37.endm
38
39// Generate accessor functions to read/write programmatically selected
40// SVE registers.
41// x0 is the register index to access
42// x1 is the memory address to read from (getz,setp) or store to (setz,setp)
43// All clobber x0-x2
44define_accessor setz, NZR, _sve_ldr_v
45define_accessor getz, NZR, _sve_str_v
46define_accessor setp, NPR, _sve_ldr_p
47define_accessor getp, NPR, _sve_str_p
48
49// Print a single character x0 to stdout
50// Clobbers x0-x2,x8
51function putc
52	str	x0, [sp, #-16]!
53
54	mov	x0, #1			// STDOUT_FILENO
55	mov	x1, sp
56	mov	x2, #1
57	mov	x8, #__NR_write
58	svc	#0
59
60	add	sp, sp, #16
61	ret
62endfunction
63
64// Print a NUL-terminated string starting at address x0 to stdout
65// Clobbers x0-x3,x8
66function puts
67	mov	x1, x0
68
69	mov	x2, #0
700:	ldrb	w3, [x0], #1
71	cbz	w3, 1f
72	add	x2, x2, #1
73	b	0b
74
751:	mov	w0, #1			// STDOUT_FILENO
76	mov	x8, #__NR_write
77	svc	#0
78
79	ret
80endfunction
81
82// Utility macro to print a literal string
83// Clobbers x0-x4,x8
84.macro puts string
85	.pushsection .rodata.str1.1, "aMS", 1
86.L__puts_literal\@: .string "\string"
87	.popsection
88
89	ldr	x0, =.L__puts_literal\@
90	bl	puts
91.endm
92
93// Print an unsigned decimal number x0 to stdout
94// Clobbers x0-x4,x8
95function putdec
96	mov	x1, sp
97	str	x30, [sp, #-32]!	// Result can't be > 20 digits
98
99	mov	x2, #0
100	strb	w2, [x1, #-1]!		// Write the NUL terminator
101
102	mov	x2, #10
1030:	udiv	x3, x0, x2		// div-mod loop to generate the digits
104	msub	x0, x3, x2, x0
105	add	w0, w0, #'0'
106	strb	w0, [x1, #-1]!
107	mov	x0, x3
108	cbnz	x3, 0b
109
110	ldrb	w0, [x1]
111	cbnz	w0, 1f
112	mov	w0, #'0'		// Print "0" for 0, not ""
113	strb	w0, [x1, #-1]!
114
1151:	mov	x0, x1
116	bl	puts
117
118	ldr	x30, [sp], #32
119	ret
120endfunction
121
122// Print an unsigned decimal number x0 to stdout, followed by a newline
123// Clobbers x0-x5,x8
124function putdecn
125	mov	x5, x30
126
127	bl	putdec
128	mov	x0, #'\n'
129	bl	putc
130
131	ret	x5
132endfunction
133
134// Clobbers x0-x3,x8
135function puthexb
136	str	x30, [sp, #-0x10]!
137
138	mov	w3, w0
139	lsr	w0, w0, #4
140	bl	puthexnibble
141	mov	w0, w3
142
143	ldr	x30, [sp], #0x10
144	// fall through to puthexnibble
145endfunction
146// Clobbers x0-x2,x8
147function puthexnibble
148	and	w0, w0, #0xf
149	cmp	w0, #10
150	blo	1f
151	add	w0, w0, #'a' - ('9' + 1)
1521:	add	w0, w0, #'0'
153	b	putc
154endfunction
155
156// x0=data in, x1=size in, clobbers x0-x5,x8
157function dumphex
158	str	x30, [sp, #-0x10]!
159
160	mov	x4, x0
161	mov	x5, x1
162
1630:	subs	x5, x5, #1
164	b.lo	1f
165	ldrb	w0, [x4], #1
166	bl	puthexb
167	b	0b
168
1691:	ldr	x30, [sp], #0x10
170	ret
171endfunction
172
173// Declare some storate space to shadow the SVE register contents:
174.pushsection .text
175.data
176.align 4
177zref:
178	.space	MAXVL_B * NZR
179pref:
180	.space	MAXVL_B / 8 * NPR
181ffrref:
182	.space	MAXVL_B / 8
183scratch:
184	.space	MAXVL_B
185.popsection
186
187// Trivial memory copy: copy x2 bytes, starting at address x1, to address x0.
188// Clobbers x0-x3
189function memcpy
190	cmp	x2, #0
191	b.eq	1f
1920:	ldrb	w3, [x1], #1
193	strb	w3, [x0], #1
194	subs	x2, x2, #1
195	b.ne	0b
1961:	ret
197endfunction
198
199// Generate a test pattern for storage in SVE registers
200// x0: pid	(16 bits)
201// x1: register number (6 bits)
202// x2: generation (4 bits)
203
204// These values are used to constuct a 32-bit pattern that is repeated in the
205// scratch buffer as many times as will fit:
206// bits 31:28	generation number (increments once per test_loop)
207// bits 27:22	32-bit lane index
208// bits 21:16	register number
209// bits 15: 0	pid
210
211function pattern
212	orr	w1, w0, w1, lsl #16
213	orr	w2, w1, w2, lsl #28
214
215	ldr	x0, =scratch
216	mov	w1, #MAXVL_B / 4
217
2180:	str	w2, [x0], #4
219	add	w2, w2, #(1 << 22)
220	subs	w1, w1, #1
221	bne	0b
222
223	ret
224endfunction
225
226// Get the address of shadow data for SVE Z-register Z<xn>
227.macro _adrz xd, xn, nrtmp
228	ldr	\xd, =zref
229	rdvl	x\nrtmp, #1
230	madd	\xd, x\nrtmp, \xn, \xd
231.endm
232
233// Get the address of shadow data for SVE P-register P<xn - NZR>
234.macro _adrp xd, xn, nrtmp
235	ldr	\xd, =pref
236	rdvl	x\nrtmp, #1
237	lsr	x\nrtmp, x\nrtmp, #3
238	sub	\xn, \xn, #NZR
239	madd	\xd, x\nrtmp, \xn, \xd
240.endm
241
242// Set up test pattern in a SVE Z-register
243// x0: pid
244// x1: register number
245// x2: generation
246function setup_zreg
247	mov	x4, x30
248
249	mov	x6, x1
250	bl	pattern
251	_adrz	x0, x6, 2
252	mov	x5, x0
253	ldr	x1, =scratch
254	bl	memcpy
255
256	mov	x0, x6
257	mov	x1, x5
258	bl	setz
259
260	ret	x4
261endfunction
262
263// Set up test pattern in a SVE P-register
264// x0: pid
265// x1: register number
266// x2: generation
267function setup_preg
268	mov	x4, x30
269
270	mov	x6, x1
271	bl	pattern
272	_adrp	x0, x6, 2
273	mov	x5, x0
274	ldr	x1, =scratch
275	bl	memcpy
276
277	mov	x0, x6
278	mov	x1, x5
279	bl	setp
280
281	ret	x4
282endfunction
283
284// Set up test pattern in the FFR
285// x0: pid
286// x2: generation
287//
288// We need to generate a canonical FFR value, which consists of a number of
289// low "1" bits, followed by a number of zeros. This gives us 17 unique values
290// per 16 bits of FFR, so we create a 4 bit signature out of the PID and
291// generation, and use that as the initial number of ones in the pattern.
292// We fill the upper lanes of FFR with zeros.
293// Beware: corrupts P0.
294function setup_ffr
295	mov	x4, x30
296
297	and	w0, w0, #0x3
298	bfi	w0, w2, #2, #2
299	mov	w1, #1
300	lsl	w1, w1, w0
301	sub	w1, w1, #1
302
303	ldr	x0, =ffrref
304	strh	w1, [x0], 2
305	rdvl	x1, #1
306	lsr	x1, x1, #3
307	sub	x1, x1, #2
308	bl	memclr
309
310	mov	x0, #0
311	ldr	x1, =ffrref
312	bl	setp
313
314	wrffr	p0.b
315
316	ret	x4
317endfunction
318
319// Fill x1 bytes starting at x0 with 0xae (for canary purposes)
320// Clobbers x1, x2.
321function memfill_ae
322	mov	w2, #0xae
323	b	memfill
324endfunction
325
326// Fill x1 bytes starting at x0 with 0.
327// Clobbers x1, x2.
328function memclr
329	mov	w2, #0
330endfunction
331	// fall through to memfill
332
333// Trivial memory fill: fill x1 bytes starting at address x0 with byte w2
334// Clobbers x1
335function memfill
336	cmp	x1, #0
337	b.eq	1f
338
3390:	strb	w2, [x0], #1
340	subs	x1, x1, #1
341	b.ne	0b
342
3431:	ret
344endfunction
345
346// Trivial memory compare: compare x2 bytes starting at address x0 with
347// bytes starting at address x1.
348// Returns only if all bytes match; otherwise, the program is aborted.
349// Clobbers x0-x5.
350function memcmp
351	cbz	x2, 2f
352
353	stp	x0, x1, [sp, #-0x20]!
354	str	x2, [sp, #0x10]
355
356	mov	x5, #0
3570:	ldrb	w3, [x0, x5]
358	ldrb	w4, [x1, x5]
359	add	x5, x5, #1
360	cmp	w3, w4
361	b.ne	1f
362	subs	x2, x2, #1
363	b.ne	0b
364
3651:	ldr	x2, [sp, #0x10]
366	ldp	x0, x1, [sp], #0x20
367	b.ne	barf
368
3692:	ret
370endfunction
371
372// Verify that a SVE Z-register matches its shadow in memory, else abort
373// x0: reg number
374// Clobbers x0-x7.
375function check_zreg
376	mov	x3, x30
377
378	_adrz	x5, x0, 6
379	mov	x4, x0
380	ldr	x7, =scratch
381
382	mov	x0, x7
383	mov	x1, x6
384	bl	memfill_ae
385
386	mov	x0, x4
387	mov	x1, x7
388	bl	getz
389
390	mov	x0, x5
391	mov	x1, x7
392	mov	x2, x6
393	mov	x30, x3
394	b	memcmp
395endfunction
396
397// Verify that a SVE P-register matches its shadow in memory, else abort
398// x0: reg number
399// Clobbers x0-x7.
400function check_preg
401	mov	x3, x30
402
403	_adrp	x5, x0, 6
404	mov	x4, x0
405	ldr	x7, =scratch
406
407	mov	x0, x7
408	mov	x1, x6
409	bl	memfill_ae
410
411	mov	x0, x4
412	mov	x1, x7
413	bl	getp
414
415	mov	x0, x5
416	mov	x1, x7
417	mov	x2, x6
418	mov	x30, x3
419	b	memcmp
420endfunction
421
422// Verify that the FFR matches its shadow in memory, else abort
423// Beware -- corrupts P0.
424// Clobbers x0-x5.
425function check_ffr
426	mov	x3, x30
427
428	ldr	x4, =scratch
429	rdvl	x5, #1
430	lsr	x5, x5, #3
431
432	mov	x0, x4
433	mov	x1, x5
434	bl	memfill_ae
435
436	rdffr	p0.b
437	mov	x0, #0
438	mov	x1, x4
439	bl	getp
440
441	ldr	x0, =ffrref
442	mov	x1, x4
443	mov	x2, x5
444	mov	x30, x3
445	b	memcmp
446endfunction
447
448// Any SVE register modified here can cause corruption in the main
449// thread -- but *only* the registers modified here.
450function irritator_handler
451	// Increment the irritation signal count (x23):
452	ldr	x0, [x2, #ucontext_regs + 8 * 23]
453	add	x0, x0, #1
454	str	x0, [x2, #ucontext_regs + 8 * 23]
455
456	// Corrupt some random Z-regs
457	adr	x0, .text + (irritator_handler - .text) / 16 * 16
458	movi	v0.8b, #1
459	movi	v9.16b, #2
460	movi	v31.8b, #3
461	// And P0
462	rdffr	p0.b
463	// And FFR
464	wrffr	p15.b
465
466	ret
467endfunction
468
469function terminate_handler
470	mov	w21, w0
471	mov	x20, x2
472
473	puts	"Terminated by signal "
474	mov	w0, w21
475	bl	putdec
476	puts	", no error, iterations="
477	ldr	x0, [x20, #ucontext_regs + 8 * 22]
478	bl	putdec
479	puts	", signals="
480	ldr	x0, [x20, #ucontext_regs + 8 * 23]
481	bl	putdecn
482
483	mov	x0, #0
484	mov	x8, #__NR_exit
485	svc	#0
486endfunction
487
488// w0: signal number
489// x1: sa_action
490// w2: sa_flags
491// Clobbers x0-x6,x8
492function setsignal
493	str	x30, [sp, #-((sa_sz + 15) / 16 * 16 + 16)]!
494
495	mov	w4, w0
496	mov	x5, x1
497	mov	w6, w2
498
499	add	x0, sp, #16
500	mov	x1, #sa_sz
501	bl	memclr
502
503	mov	w0, w4
504	add	x1, sp, #16
505	str	w6, [x1, #sa_flags]
506	str	x5, [x1, #sa_handler]
507	mov	x2, #0
508	mov	x3, #sa_mask_sz
509	mov	x8, #__NR_rt_sigaction
510	svc	#0
511
512	cbz	w0, 1f
513
514	puts	"sigaction failure\n"
515	b	.Labort
516
5171:	ldr	x30, [sp], #((sa_sz + 15) / 16 * 16 + 16)
518	ret
519endfunction
520
521// Main program entry point
522.globl _start
523function _start
524_start:
525	// Sanity-check and report the vector length
526
527	rdvl	x19, #8
528	cmp	x19, #128
529	b.lo	1f
530	cmp	x19, #2048
531	b.hi	1f
532	tst	x19, #(8 - 1)
533	b.eq	2f
534
5351:	puts	"Bad vector length: "
536	mov	x0, x19
537	bl	putdecn
538	b	.Labort
539
5402:	puts	"Vector length:\t"
541	mov	x0, x19
542	bl	putdec
543	puts	" bits\n"
544
545	// Obtain our PID, to ensure test pattern uniqueness between processes
546
547	mov	x8, #__NR_getpid
548	svc	#0
549	mov	x20, x0
550
551	puts	"PID:\t"
552	mov	x0, x20
553	bl	putdecn
554
555	mov	x23, #0		// Irritation signal count
556
557	mov	w0, #SIGINT
558	adr	x1, terminate_handler
559	mov	w2, #SA_SIGINFO
560	bl	setsignal
561
562	mov	w0, #SIGTERM
563	adr	x1, terminate_handler
564	mov	w2, #SA_SIGINFO
565	bl	setsignal
566
567	mov	w0, #SIGUSR1
568	adr	x1, irritator_handler
569	mov	w2, #SA_SIGINFO
570	orr	w2, w2, #SA_NODEFER
571	bl	setsignal
572
573	mov	x22, #0		// generation number, increments per iteration
574.Ltest_loop:
575	rdvl	x0, #8
576	cmp	x0, x19
577	b.ne	vl_barf
578
579	mov	x21, #0		// Set up Z-regs & shadow with test pattern
5800:	mov	x0, x20
581	mov	x1, x21
582	and	x2, x22, #0xf
583	bl	setup_zreg
584	add	x21, x21, #1
585	cmp	x21, #NZR
586	b.lo	0b
587
588	mov	x0, x20		// Set up FFR & shadow with test pattern
589	mov	x1, #NZR + NPR
590	and	x2, x22, #0xf
591	bl	setup_ffr
592
5930:	mov	x0, x20		// Set up P-regs & shadow with test pattern
594	mov	x1, x21
595	and	x2, x22, #0xf
596	bl	setup_preg
597	add	x21, x21, #1
598	cmp	x21, #NZR + NPR
599	b.lo	0b
600
601// Can't do this when SVE state is volatile across SVC:
602//	mov	x8, #__NR_sched_yield	// Encourage preemption
603//	svc	#0
604
605	mov	x21, #0
6060:	mov	x0, x21
607	bl	check_zreg
608	add	x21, x21, #1
609	cmp	x21, #NZR
610	b.lo	0b
611
6120:	mov	x0, x21
613	bl	check_preg
614	add	x21, x21, #1
615	cmp	x21, #NZR + NPR
616	b.lo	0b
617
618	bl	check_ffr
619
620	add	x22, x22, #1
621	b	.Ltest_loop
622
623.Labort:
624	mov	x0, #0
625	mov	x1, #SIGABRT
626	mov	x8, #__NR_kill
627	svc	#0
628endfunction
629
630function barf
631// fpsimd.c acitivty log dump hack
632//	ldr	w0, =0xdeadc0de
633//	mov	w8, #__NR_exit
634//	svc	#0
635// end hack
636	mov	x10, x0	// expected data
637	mov	x11, x1	// actual data
638	mov	x12, x2	// data size
639
640	puts	"Mismatch: PID="
641	mov	x0, x20
642	bl	putdec
643	puts	", iteration="
644	mov	x0, x22
645	bl	putdec
646	puts	", reg="
647	mov	x0, x21
648	bl	putdecn
649	puts	"\tExpected ["
650	mov	x0, x10
651	mov	x1, x12
652	bl	dumphex
653	puts	"]\n\tGot      ["
654	mov	x0, x11
655	mov	x1, x12
656	bl	dumphex
657	puts	"]\n"
658
659	mov	x8, #__NR_getpid
660	svc	#0
661// fpsimd.c acitivty log dump hack
662//	ldr	w0, =0xdeadc0de
663//	mov	w8, #__NR_exit
664//	svc	#0
665// ^ end of hack
666	mov	x1, #SIGABRT
667	mov	x8, #__NR_kill
668	svc	#0
669//	mov	x8, #__NR_exit
670//	mov	x1, #1
671//	svc	#0
672endfunction
673
674function vl_barf
675	mov	x10, x0
676
677	puts	"Bad active VL: "
678	mov	x0, x10
679	bl	putdecn
680
681	mov	x8, #__NR_exit
682	mov	x1, #1
683	svc	#0
684endfunction
685