1// SPDX-License-Identifier: GPL-2.0-only 2// Copyright (C) 2021 ARM Limited. 3// Original author: Mark Brown <broonie@kernel.org> 4// 5// Scalable Matrix Extension ZA context switch test 6// Repeatedly writes unique test patterns into each ZA tile 7// and reads them back to verify integrity. 8// 9// for x in `seq 1 NR_CPUS`; do sve-test & pids=$pids\ $! ; done 10// (leave it running for as long as you want...) 11// kill $pids 12 13#include <asm/unistd.h> 14#include "assembler.h" 15#include "asm-offsets.h" 16#include "sme-inst.h" 17 18.arch_extension sve 19 20#define MAXVL 2048 21#define MAXVL_B (MAXVL / 8) 22 23// Declare some storage space to shadow ZA register contents and a 24// scratch buffer for a vector. 25.pushsection .text 26.data 27.align 4 28zaref: 29 .space MAXVL_B * MAXVL_B 30scratch: 31 .space MAXVL_B 32.popsection 33 34// Trivial memory copy: copy x2 bytes, starting at address x1, to address x0. 35// Clobbers x0-x3 36function memcpy 37 cmp x2, #0 38 b.eq 1f 390: ldrb w3, [x1], #1 40 strb w3, [x0], #1 41 subs x2, x2, #1 42 b.ne 0b 431: ret 44endfunction 45 46// Generate a test pattern for storage in ZA 47// x0: pid 48// x1: row in ZA 49// x2: generation 50 51// These values are used to constuct a 32-bit pattern that is repeated in the 52// scratch buffer as many times as will fit: 53// bits 31:28 generation number (increments once per test_loop) 54// bits 27:16 pid 55// bits 15: 8 row number 56// bits 7: 0 32-bit lane index 57 58function pattern 59 mov w3, wzr 60 bfi w3, w0, #16, #12 // PID 61 bfi w3, w1, #8, #8 // Row 62 bfi w3, w2, #28, #4 // Generation 63 64 ldr x0, =scratch 65 mov w1, #MAXVL_B / 4 66 670: str w3, [x0], #4 68 add w3, w3, #1 // Lane 69 subs w1, w1, #1 70 b.ne 0b 71 72 ret 73endfunction 74 75// Get the address of shadow data for ZA horizontal vector xn 76.macro _adrza xd, xn, nrtmp 77 ldr \xd, =zaref 78 rdsvl \nrtmp, 1 79 madd \xd, x\nrtmp, \xn, \xd 80.endm 81 82// Set up test pattern in a ZA horizontal vector 83// x0: pid 84// x1: row number 85// x2: generation 86function setup_za 87 mov x4, x30 88 mov x12, x1 // Use x12 for vector select 89 90 bl pattern // Get pattern in scratch buffer 91 _adrza x0, x12, 2 // Shadow buffer pointer to x0 and x5 92 mov x5, x0 93 ldr x1, =scratch 94 bl memcpy // length set up in x2 by _adrza 95 96 _ldr_za 12, 5 // load vector w12 from pointer x5 97 98 ret x4 99endfunction 100 101// Trivial memory compare: compare x2 bytes starting at address x0 with 102// bytes starting at address x1. 103// Returns only if all bytes match; otherwise, the program is aborted. 104// Clobbers x0-x5. 105function memcmp 106 cbz x2, 2f 107 108 stp x0, x1, [sp, #-0x20]! 109 str x2, [sp, #0x10] 110 111 mov x5, #0 1120: ldrb w3, [x0, x5] 113 ldrb w4, [x1, x5] 114 add x5, x5, #1 115 cmp w3, w4 116 b.ne 1f 117 subs x2, x2, #1 118 b.ne 0b 119 1201: ldr x2, [sp, #0x10] 121 ldp x0, x1, [sp], #0x20 122 b.ne barf 123 1242: ret 125endfunction 126 127// Verify that a ZA vector matches its shadow in memory, else abort 128// x0: row number 129// Clobbers x0-x7 and x12. 130function check_za 131 mov x3, x30 132 133 mov x12, x0 134 _adrza x5, x0, 6 // pointer to expected value in x5 135 mov x4, x0 136 ldr x7, =scratch // x7 is scratch 137 138 mov x0, x7 // Poison scratch 139 mov x1, x6 140 bl memfill_ae 141 142 _str_za 12, 7 // save vector w12 to pointer x7 143 144 mov x0, x5 145 mov x1, x7 146 mov x2, x6 147 mov x30, x3 148 b memcmp 149endfunction 150 151// Modify the live SME register state, signal return will undo our changes 152function irritator_handler 153 // Increment the irritation signal count (x23): 154 ldr x0, [x2, #ucontext_regs + 8 * 23] 155 add x0, x0, #1 156 str x0, [x2, #ucontext_regs + 8 * 23] 157 158 // This will reset ZA to all bits 0 159 smstop 160 smstart_za 161 162 ret 163endfunction 164 165function tickle_handler 166 // Increment the signal count (x23): 167 ldr x0, [x2, #ucontext_regs + 8 * 23] 168 add x0, x0, #1 169 str x0, [x2, #ucontext_regs + 8 * 23] 170 171 ret 172endfunction 173 174function terminate_handler 175 mov w21, w0 176 mov x20, x2 177 178 puts "Terminated by signal " 179 mov w0, w21 180 bl putdec 181 puts ", no error, iterations=" 182 ldr x0, [x20, #ucontext_regs + 8 * 22] 183 bl putdec 184 puts ", signals=" 185 ldr x0, [x20, #ucontext_regs + 8 * 23] 186 bl putdecn 187 188 mov x0, #0 189 mov x8, #__NR_exit 190 svc #0 191endfunction 192 193// w0: signal number 194// x1: sa_action 195// w2: sa_flags 196// Clobbers x0-x6,x8 197function setsignal 198 str x30, [sp, #-((sa_sz + 15) / 16 * 16 + 16)]! 199 200 mov w4, w0 201 mov x5, x1 202 mov w6, w2 203 204 add x0, sp, #16 205 mov x1, #sa_sz 206 bl memclr 207 208 mov w0, w4 209 add x1, sp, #16 210 str w6, [x1, #sa_flags] 211 str x5, [x1, #sa_handler] 212 mov x2, #0 213 mov x3, #sa_mask_sz 214 mov x8, #__NR_rt_sigaction 215 svc #0 216 217 cbz w0, 1f 218 219 puts "sigaction failure\n" 220 b .Labort 221 2221: ldr x30, [sp], #((sa_sz + 15) / 16 * 16 + 16) 223 ret 224endfunction 225 226// Main program entry point 227.globl _start 228function _start 229 enable_gcs 230 231 mov x23, #0 // signal count 232 233 mov w0, #SIGINT 234 adr x1, terminate_handler 235 mov w2, #SA_SIGINFO 236 bl setsignal 237 238 mov w0, #SIGTERM 239 adr x1, terminate_handler 240 mov w2, #SA_SIGINFO 241 bl setsignal 242 243 mov w0, #SIGUSR1 244 adr x1, irritator_handler 245 mov w2, #SA_SIGINFO 246 orr w2, w2, #SA_NODEFER 247 bl setsignal 248 249 mov w0, #SIGUSR2 250 adr x1, tickle_handler 251 mov w2, #SA_SIGINFO 252 orr w2, w2, #SA_NODEFER 253 bl setsignal 254 255 puts "Streaming mode " 256 smstart_za 257 258 // Sanity-check and report the vector length 259 260 rdsvl 19, 8 261 cmp x19, #128 262 b.lo 1f 263 cmp x19, #2048 264 b.hi 1f 265 tst x19, #(8 - 1) 266 b.eq 2f 267 2681: puts "bad vector length: " 269 mov x0, x19 270 bl putdecn 271 b .Labort 272 2732: puts "vector length:\t" 274 mov x0, x19 275 bl putdec 276 puts " bits\n" 277 278 // Obtain our PID, to ensure test pattern uniqueness between processes 279 mov x8, #__NR_getpid 280 svc #0 281 mov x20, x0 282 283 puts "PID:\t" 284 mov x0, x20 285 bl putdecn 286 287 mov x22, #0 // generation number, increments per iteration 288.Ltest_loop: 289 rdsvl 0, 8 290 cmp x0, x19 291 b.ne vl_barf 292 293 rdsvl 21, 1 // Set up ZA & shadow with test pattern 2940: mov x0, x20 295 sub x1, x21, #1 296 mov x2, x22 297 bl setup_za 298 subs x21, x21, #1 299 b.ne 0b 300 301 mov x8, #__NR_sched_yield // encourage preemption 3021: 303 svc #0 304 305 mrs x0, S3_3_C4_C2_2 // SVCR should have ZA=1,SM=0 306 and x1, x0, #3 307 cmp x1, #2 308 b.ne svcr_barf 309 310 rdsvl 21, 1 // Verify that the data made it through 311 rdsvl 24, 1 // Verify that the data made it through 3120: sub x0, x24, x21 313 bl check_za 314 subs x21, x21, #1 315 bne 0b 316 317 add x22, x22, #1 // Everything still working 318 b .Ltest_loop 319 320.Labort: 321 mov x0, #0 322 mov x1, #SIGABRT 323 mov x8, #__NR_kill 324 svc #0 325endfunction 326 327function barf 328// fpsimd.c acitivty log dump hack 329// ldr w0, =0xdeadc0de 330// mov w8, #__NR_exit 331// svc #0 332// end hack 333 334 mrs x13, S3_3_C4_C2_2 335 336 smstop 337 mov x10, x0 // expected data 338 mov x11, x1 // actual data 339 mov x12, x2 // data size 340 341 puts "Mismatch: PID=" 342 mov x0, x20 343 bl putdec 344 puts ", iteration=" 345 mov x0, x22 346 bl putdec 347 puts ", row=" 348 mov x0, x21 349 bl putdecn 350 puts "\tExpected [" 351 mov x0, x10 352 mov x1, x12 353 bl dumphex 354 puts "]\n\tGot [" 355 mov x0, x11 356 mov x1, x12 357 bl dumphex 358 puts "]\n" 359 puts "\tSVCR: " 360 mov x0, x13 361 bl putdecn 362 363 mov x8, #__NR_getpid 364 svc #0 365// fpsimd.c acitivty log dump hack 366// ldr w0, =0xdeadc0de 367// mov w8, #__NR_exit 368// svc #0 369// ^ end of hack 370 mov x1, #SIGABRT 371 mov x8, #__NR_kill 372 svc #0 373// mov x8, #__NR_exit 374// mov x1, #1 375// svc #0 376endfunction 377 378function vl_barf 379 mov x10, x0 380 381 puts "Bad active VL: " 382 mov x0, x10 383 bl putdecn 384 385 mov x8, #__NR_exit 386 mov x1, #1 387 svc #0 388endfunction 389 390function svcr_barf 391 mov x10, x0 392 393 puts "Bad SVCR: " 394 mov x0, x10 395 bl putdecn 396 397 mov x8, #__NR_exit 398 mov x1, #1 399 svc #0 400endfunction 401