1 /* SPDX-License-Identifier: GPL-2.0-only */ 2 /* 3 * Copyright (C) 2012 ARM Ltd. 4 */ 5 #ifndef __ASM_FP_H 6 #define __ASM_FP_H 7 8 #include <asm/errno.h> 9 #include <asm/percpu.h> 10 #include <asm/ptrace.h> 11 #include <asm/processor.h> 12 #include <asm/sigcontext.h> 13 #include <asm/sysreg.h> 14 15 #ifndef __ASSEMBLER__ 16 17 #include <linux/bitmap.h> 18 #include <linux/build_bug.h> 19 #include <linux/bug.h> 20 #include <linux/cache.h> 21 #include <linux/init.h> 22 #include <linux/stddef.h> 23 #include <linux/types.h> 24 25 #define __FPSIMD_PREAMBLE ".arch_extension fp\n" \ 26 ".arch_extension simd\n" 27 #define __SVE_PREAMBLE ".arch_extension sve\n" 28 #define __SME_PREAMBLE ".arch_extension sme\n" 29 30 /* Masks for extracting the FPSR and FPCR from the FPSCR */ 31 #define VFP_FPSCR_STAT_MASK 0xf800009f 32 #define VFP_FPSCR_CTRL_MASK 0x07f79f00 33 /* 34 * The VFP state has 32x64-bit registers and a single 32-bit 35 * control/status register. 36 */ 37 #define VFP_STATE_SIZE ((32 * 8) + 4) 38 39 static inline unsigned long cpacr_save_enable_kernel_sve(void) 40 { 41 unsigned long old = read_sysreg(cpacr_el1); 42 unsigned long set = CPACR_EL1_FPEN_EL1EN | CPACR_EL1_ZEN_EL1EN; 43 44 write_sysreg(old | set, cpacr_el1); 45 isb(); 46 return old; 47 } 48 49 static inline unsigned long cpacr_save_enable_kernel_sme(void) 50 { 51 unsigned long old = read_sysreg(cpacr_el1); 52 unsigned long set = CPACR_EL1_FPEN_EL1EN | CPACR_EL1_SMEN_EL1EN; 53 54 write_sysreg(old | set, cpacr_el1); 55 isb(); 56 return old; 57 } 58 59 static inline void cpacr_restore(unsigned long cpacr) 60 { 61 write_sysreg(cpacr, cpacr_el1); 62 isb(); 63 } 64 65 /* 66 * When we defined the maximum SVE vector length we defined the ABI so 67 * that the maximum vector length included all the reserved for future 68 * expansion bits in ZCR rather than those just currently defined by 69 * the architecture. Using this length to allocate worst size buffers 70 * results in excessively large allocations, and this effect is even 71 * more pronounced for SME due to ZA. Define more suitable VLs for 72 * these situations. 73 */ 74 #define ARCH_SVE_VQ_MAX ((ZCR_ELx_LEN_MASK >> ZCR_ELx_LEN_SHIFT) + 1) 75 #define SME_VQ_MAX ((SMCR_ELx_LEN_MASK >> SMCR_ELx_LEN_SHIFT) + 1) 76 77 struct task_struct; 78 79 static inline void fpsimd_save_common(struct user_fpsimd_state *state) 80 { 81 state->fpsr = read_sysreg_s(SYS_FPSR); 82 state->fpcr = read_sysreg_s(SYS_FPCR); 83 } 84 85 static inline void fpsimd_load_common(const struct user_fpsimd_state *state) 86 { 87 write_sysreg_s(state->fpsr, SYS_FPSR); 88 write_sysreg_s(state->fpcr, SYS_FPCR); 89 } 90 91 static inline void fpsimd_save_vregs(struct user_fpsimd_state *state) 92 { 93 instrument_write(state->vregs, sizeof(state->vregs)); 94 asm volatile( 95 __FPSIMD_PREAMBLE 96 " stp q0, q1, [%[vregs], #16 * 0]\n" 97 " stp q2, q3, [%[vregs], #16 * 2]\n" 98 " stp q4, q5, [%[vregs], #16 * 4]\n" 99 " stp q6, q7, [%[vregs], #16 * 6]\n" 100 " stp q8, q9, [%[vregs], #16 * 8]\n" 101 " stp q10, q11, [%[vregs], #16 * 10]\n" 102 " stp q12, q13, [%[vregs], #16 * 12]\n" 103 " stp q14, q15, [%[vregs], #16 * 14]\n" 104 " stp q16, q17, [%[vregs], #16 * 16]\n" 105 " stp q18, q19, [%[vregs], #16 * 18]\n" 106 " stp q20, q21, [%[vregs], #16 * 20]\n" 107 " stp q22, q23, [%[vregs], #16 * 22]\n" 108 " stp q24, q25, [%[vregs], #16 * 24]\n" 109 " stp q26, q27, [%[vregs], #16 * 26]\n" 110 " stp q28, q29, [%[vregs], #16 * 28]\n" 111 " stp q30, q31, [%[vregs], #16 * 30]\n" 112 : "=Q" (state->vregs) 113 : [vregs] "r" (state->vregs) 114 ); 115 } 116 117 static inline void fpsimd_load_vregs(const struct user_fpsimd_state *state) 118 { 119 instrument_read(state->vregs, sizeof(state->vregs)); 120 asm volatile( 121 __FPSIMD_PREAMBLE 122 " ldp q0, q1, [%[vregs], #16 * 0]\n" 123 " ldp q2, q3, [%[vregs], #16 * 2]\n" 124 " ldp q4, q5, [%[vregs], #16 * 4]\n" 125 " ldp q6, q7, [%[vregs], #16 * 6]\n" 126 " ldp q8, q9, [%[vregs], #16 * 8]\n" 127 " ldp q10, q11, [%[vregs], #16 * 10]\n" 128 " ldp q12, q13, [%[vregs], #16 * 12]\n" 129 " ldp q14, q15, [%[vregs], #16 * 14]\n" 130 " ldp q16, q17, [%[vregs], #16 * 16]\n" 131 " ldp q18, q19, [%[vregs], #16 * 18]\n" 132 " ldp q20, q21, [%[vregs], #16 * 20]\n" 133 " ldp q22, q23, [%[vregs], #16 * 22]\n" 134 " ldp q24, q25, [%[vregs], #16 * 24]\n" 135 " ldp q26, q27, [%[vregs], #16 * 26]\n" 136 " ldp q28, q29, [%[vregs], #16 * 28]\n" 137 " ldp q30, q31, [%[vregs], #16 * 30]\n" 138 : 139 : "Q" (state->vregs), 140 [vregs] "r" (state->vregs) 141 ); 142 } 143 144 static inline void fpsimd_save_state(struct user_fpsimd_state *state) 145 { 146 fpsimd_save_vregs(state); 147 fpsimd_save_common(state); 148 } 149 150 static inline void fpsimd_load_state(const struct user_fpsimd_state *state) 151 { 152 fpsimd_load_vregs(state); 153 fpsimd_load_common(state); 154 } 155 156 extern void fpsimd_thread_switch(struct task_struct *next); 157 extern void fpsimd_flush_thread(void); 158 159 extern void fpsimd_preserve_current_state(void); 160 extern void fpsimd_restore_current_state(void); 161 extern void fpsimd_update_current_state(struct user_fpsimd_state const *state); 162 163 struct cpu_fp_state { 164 struct user_fpsimd_state *st; 165 struct arm64_sve_state *sve_state; 166 struct arm64_sme_state *sme_state; 167 u64 *svcr; 168 u64 *fpmr; 169 unsigned int sve_vl; 170 unsigned int sme_vl; 171 enum fp_type *fp_type; 172 enum fp_type to_save; 173 }; 174 175 DECLARE_PER_CPU(struct cpu_fp_state, fpsimd_last_state); 176 177 extern void fpsimd_bind_state_to_cpu(struct cpu_fp_state *fp_state); 178 179 extern void fpsimd_flush_task_state(struct task_struct *target); 180 extern void fpsimd_save_and_flush_current_state(void); 181 extern void fpsimd_save_and_flush_cpu_state(void); 182 183 static inline bool thread_sm_enabled(struct thread_struct *thread) 184 { 185 return system_supports_sme() && (thread->svcr & SVCR_SM_MASK); 186 } 187 188 static inline bool thread_za_enabled(struct thread_struct *thread) 189 { 190 return system_supports_sme() && (thread->svcr & SVCR_ZA_MASK); 191 } 192 193 extern void task_smstop_sm(struct task_struct *task); 194 195 /* Maximum VL that SVE/SME VL-agnostic software can transparently support */ 196 #define VL_ARCH_MAX 0x100 197 198 static inline void *thread_zt_state(struct thread_struct *thread) 199 { 200 /* The ZT register state is stored immediately after the ZA state */ 201 unsigned int sme_vq = sve_vq_from_vl(thread_get_sme_vl(thread)); 202 return (void *)thread->sme_state + ZA_SIG_REGS_SIZE(sme_vq); 203 } 204 205 static inline unsigned int sve_get_vl(void) 206 { 207 unsigned int vl; 208 209 asm volatile( 210 __SVE_PREAMBLE 211 " rdvl %x[vl], #1\n" 212 : [vl] "=r" (vl) 213 ); 214 215 return vl; 216 } 217 218 #define FOR_EACH_Z_REG(idx_str, asm_str) \ 219 " .irp " idx_str ",0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31\n" \ 220 asm_str "\n" \ 221 " .endr\n" 222 223 #define FOR_EACH_P_REG(idx_str, asm_str) \ 224 " .irp " idx_str ",0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15\n" \ 225 asm_str "\n" \ 226 " .endr\n" 227 228 static inline void __sve_save_z(struct arm64_sve_state *state, unsigned long vl) 229 { 230 instrument_write(state, SVE_NUM_ZREGS * vl); 231 asm volatile( 232 __SVE_PREAMBLE 233 FOR_EACH_Z_REG("n", "str z\\n, [%[zregs], #\\n, MUL VL]") 234 : 235 : [zregs] "r" (state) 236 : "memory" 237 ); 238 } 239 240 static inline void __sve_load_z(const struct arm64_sve_state *state, unsigned long vl) 241 { 242 instrument_read(state, SVE_NUM_ZREGS * vl); 243 asm volatile( 244 __SVE_PREAMBLE 245 FOR_EACH_Z_REG("n", "ldr z\\n, [%[zregs], #\\n, MUL VL]") 246 : 247 : [zregs] "r" (state) 248 : "memory" 249 ); 250 } 251 252 static inline void __sve_save_p(struct arm64_sve_state *state, unsigned long vl, bool ffr) 253 { 254 void *pregs = (void *)state + SVE_NUM_ZREGS * vl; 255 unsigned long pl = vl / 8; 256 void *pffr = pregs + SVE_NUM_PREGS * pl; 257 258 instrument_write(pregs, SVE_NUM_PREGS * pl); 259 asm volatile( 260 __SVE_PREAMBLE 261 FOR_EACH_P_REG("n", "str p\\n, [%[pregs], #\\n, MUL VL]\n") 262 : 263 : [pregs] "r" (pregs) 264 : "memory" 265 ); 266 267 instrument_write(pffr, pl); 268 if (ffr) { 269 asm volatile( 270 __SVE_PREAMBLE 271 " rdffr p0.b\n" 272 " str p0, [%[pffr]]\n" 273 " ldr p0, [%[pregs]]\n" 274 : 275 : [pregs] "r" (pregs), 276 [pffr] "r" (pffr) 277 : "memory" 278 ); 279 } else { 280 asm volatile( 281 __SVE_PREAMBLE 282 " pfalse p0.b\n" 283 " str p0, [%[pffr]]\n" 284 " ldr p0, [%[pregs]]\n" 285 : 286 : [pregs] "r" (pregs), 287 [pffr] "r" (pffr) 288 : "memory" 289 ); 290 } 291 } 292 293 static inline void __sve_load_p(const struct arm64_sve_state *state, unsigned long vl, bool ffr) 294 { 295 const void *pregs = (const void *)state + SVE_NUM_ZREGS * vl; 296 unsigned long pl = vl / 8; 297 const void *pffr = pregs + SVE_NUM_PREGS * pl; 298 299 if (ffr) { 300 instrument_read(pffr, pl); 301 asm volatile( 302 __SVE_PREAMBLE 303 " ldr p0, [%[pffr]]\n" 304 " wrffr p0.b\n" 305 : 306 : [pffr] "r" (pffr) 307 : "memory" 308 ); 309 } 310 311 instrument_read(pregs, SVE_NUM_PREGS * pl); 312 asm volatile( 313 __SVE_PREAMBLE 314 FOR_EACH_P_REG("n", "ldr p\\n, [%[pregs], #\\n, MUL VL]\n") 315 : 316 : [pregs] "r" (pregs) 317 : "memory" 318 ); 319 } 320 321 static inline void sve_save_state(struct arm64_sve_state *state, bool ffr) 322 { 323 unsigned long vl = sve_get_vl(); 324 __sve_save_z(state, vl); 325 __sve_save_p(state, vl, ffr); 326 } 327 328 static inline void sve_load_state(const struct arm64_sve_state *state, bool ffr) 329 { 330 unsigned long vl = sve_get_vl(); 331 __sve_load_z(state, vl); 332 __sve_load_p(state, vl, ffr); 333 } 334 335 /* 336 * Zero all SVE registers except for the first 128 bits of each vector. 337 * 338 * The caller must ensure that the VL has been configured and the CPU must be 339 * in non-streaming mode. 340 */ 341 static inline void sve_flush_live(void) 342 { 343 unsigned long vl = sve_get_vl(); 344 345 if (vl > sizeof(__uint128_t)) { 346 asm volatile( 347 __FPSIMD_PREAMBLE 348 FOR_EACH_Z_REG("n", "mov v\\n\\().16b, v\\n\\().16b") 349 ); 350 } 351 352 asm volatile( 353 __SVE_PREAMBLE 354 FOR_EACH_P_REG("n", "pfalse p\\n\\().b") 355 " wrffr p0.b\n" 356 ); 357 } 358 359 struct arm64_cpu_capabilities; 360 extern void cpu_enable_fpsimd(const struct arm64_cpu_capabilities *__unused); 361 extern void cpu_enable_sve(const struct arm64_cpu_capabilities *__unused); 362 extern void cpu_enable_sme(const struct arm64_cpu_capabilities *__unused); 363 extern void cpu_enable_sme2(const struct arm64_cpu_capabilities *__unused); 364 extern void cpu_enable_fa64(const struct arm64_cpu_capabilities *__unused); 365 extern void cpu_enable_fpmr(const struct arm64_cpu_capabilities *__unused); 366 367 /* 368 * Helpers to translate bit indices in sve_vq_map to VQ values (and 369 * vice versa). This allows find_next_bit() to be used to find the 370 * _maximum_ VQ not exceeding a certain value. 371 */ 372 static inline unsigned int __vq_to_bit(unsigned int vq) 373 { 374 return SVE_VQ_MAX - vq; 375 } 376 377 static inline unsigned int __bit_to_vq(unsigned int bit) 378 { 379 return SVE_VQ_MAX - bit; 380 } 381 382 383 struct vl_info { 384 enum vec_type type; 385 const char *name; /* For display purposes */ 386 387 /* Minimum supported vector length across all CPUs */ 388 int min_vl; 389 390 /* Maximum supported vector length across all CPUs */ 391 int max_vl; 392 int max_virtualisable_vl; 393 394 /* 395 * Set of available vector lengths, 396 * where length vq encoded as bit __vq_to_bit(vq): 397 */ 398 DECLARE_BITMAP(vq_map, SVE_VQ_MAX); 399 400 /* Set of vector lengths present on at least one cpu: */ 401 DECLARE_BITMAP(vq_partial_map, SVE_VQ_MAX); 402 }; 403 404 #ifdef CONFIG_ARM64_SVE 405 406 extern void sve_alloc(struct task_struct *task, bool flush); 407 extern void fpsimd_release_task(struct task_struct *task); 408 extern void fpsimd_sync_from_effective_state(struct task_struct *task); 409 extern void fpsimd_sync_to_effective_state_zeropad(struct task_struct *task); 410 411 extern int vec_set_vector_length(struct task_struct *task, enum vec_type type, 412 unsigned long vl, unsigned long flags); 413 414 extern int sve_set_current_vl(unsigned long arg); 415 extern int sve_get_current_vl(void); 416 417 static inline void sve_user_disable(void) 418 { 419 sysreg_clear_set(cpacr_el1, CPACR_EL1_ZEN_EL0EN, 0); 420 } 421 422 static inline void sve_user_enable(void) 423 { 424 sysreg_clear_set(cpacr_el1, 0, CPACR_EL1_ZEN_EL0EN); 425 } 426 427 #define sve_cond_update_zcr_vq(val, reg) \ 428 do { \ 429 u64 __zcr = read_sysreg_s((reg)); \ 430 u64 __new = __zcr & ~ZCR_ELx_LEN_MASK; \ 431 __new |= (val) & ZCR_ELx_LEN_MASK; \ 432 if (__zcr != __new) \ 433 write_sysreg_s(__new, (reg)); \ 434 } while (0) 435 436 /* 437 * Probing and setup functions. 438 * Calls to these functions must be serialised with one another. 439 */ 440 enum vec_type; 441 442 extern void __init vec_init_vq_map(enum vec_type type); 443 extern void vec_update_vq_map(enum vec_type type); 444 extern int vec_verify_vq_map(enum vec_type type); 445 extern void __init sve_setup(void); 446 447 extern __ro_after_init struct vl_info vl_info[ARM64_VEC_MAX]; 448 449 static inline void write_vl(enum vec_type type, u64 val) 450 { 451 u64 tmp; 452 453 switch (type) { 454 #ifdef CONFIG_ARM64_SVE 455 case ARM64_VEC_SVE: 456 tmp = read_sysreg_s(SYS_ZCR_EL1) & ~ZCR_ELx_LEN_MASK; 457 write_sysreg_s(tmp | val, SYS_ZCR_EL1); 458 break; 459 #endif 460 #ifdef CONFIG_ARM64_SME 461 case ARM64_VEC_SME: 462 tmp = read_sysreg_s(SYS_SMCR_EL1) & ~SMCR_ELx_LEN_MASK; 463 write_sysreg_s(tmp | val, SYS_SMCR_EL1); 464 break; 465 #endif 466 default: 467 WARN_ON_ONCE(1); 468 break; 469 } 470 } 471 472 static inline int vec_max_vl(enum vec_type type) 473 { 474 return vl_info[type].max_vl; 475 } 476 477 static inline int vec_max_virtualisable_vl(enum vec_type type) 478 { 479 return vl_info[type].max_virtualisable_vl; 480 } 481 482 static inline int sve_max_vl(void) 483 { 484 return vec_max_vl(ARM64_VEC_SVE); 485 } 486 487 static inline int sve_max_virtualisable_vl(void) 488 { 489 return vec_max_virtualisable_vl(ARM64_VEC_SVE); 490 } 491 492 /* Ensure vq >= SVE_VQ_MIN && vq <= SVE_VQ_MAX before calling this function */ 493 static inline bool vq_available(enum vec_type type, unsigned int vq) 494 { 495 return test_bit(__vq_to_bit(vq), vl_info[type].vq_map); 496 } 497 498 static inline bool sve_vq_available(unsigned int vq) 499 { 500 return vq_available(ARM64_VEC_SVE, vq); 501 } 502 503 static inline size_t __sve_state_size(unsigned int sve_vl, unsigned int sme_vl) 504 { 505 unsigned int vl = max(sve_vl, sme_vl); 506 return SVE_SIG_REGS_SIZE(sve_vq_from_vl(vl)); 507 } 508 509 /* 510 * Return how many bytes of memory are required to store the full SVE 511 * state for task, given task's currently configured vector length. 512 */ 513 static inline size_t sve_state_size(struct task_struct const *task) 514 { 515 unsigned int sve_vl = task_get_sve_vl(task); 516 unsigned int sme_vl = task_get_sme_vl(task); 517 return __sve_state_size(sve_vl, sme_vl); 518 } 519 520 #else /* ! CONFIG_ARM64_SVE */ 521 522 static inline void sve_alloc(struct task_struct *task, bool flush) { } 523 static inline void fpsimd_release_task(struct task_struct *task) { } 524 static inline void fpsimd_sync_from_effective_state(struct task_struct *task) { } 525 static inline void fpsimd_sync_to_effective_state_zeropad(struct task_struct *task) { } 526 527 static inline int sve_max_virtualisable_vl(void) 528 { 529 return 0; 530 } 531 532 static inline int sve_set_current_vl(unsigned long arg) 533 { 534 return -EINVAL; 535 } 536 537 static inline int sve_get_current_vl(void) 538 { 539 return -EINVAL; 540 } 541 542 static inline int sve_max_vl(void) 543 { 544 return -EINVAL; 545 } 546 547 static inline bool sve_vq_available(unsigned int vq) { return false; } 548 549 static inline void sve_user_disable(void) { BUILD_BUG(); } 550 static inline void sve_user_enable(void) { BUILD_BUG(); } 551 552 #define sve_cond_update_zcr_vq(val, reg) do { } while (0) 553 554 static inline void vec_init_vq_map(enum vec_type t) { } 555 static inline void vec_update_vq_map(enum vec_type t) { } 556 static inline int vec_verify_vq_map(enum vec_type t) { return 0; } 557 static inline void sve_setup(void) { } 558 559 static inline size_t __sve_state_size(unsigned int sve_vl, unsigned int sme_vl) 560 { 561 return 0; 562 } 563 564 static inline size_t sve_state_size(struct task_struct const *task) 565 { 566 return 0; 567 } 568 569 #endif /* ! CONFIG_ARM64_SVE */ 570 571 #ifdef CONFIG_ARM64_SME 572 573 static inline void sme_user_disable(void) 574 { 575 sysreg_clear_set(cpacr_el1, CPACR_EL1_SMEN_EL0EN, 0); 576 } 577 578 static inline void sme_user_enable(void) 579 { 580 sysreg_clear_set(cpacr_el1, 0, CPACR_EL1_SMEN_EL0EN); 581 } 582 583 static inline void sme_smstart_sm(void) 584 { 585 asm volatile(__msr_s(SYS_SVCR_SMSTART_SM_EL0, "xzr")); 586 } 587 588 static inline void sme_smstop_sm(void) 589 { 590 asm volatile(__msr_s(SYS_SVCR_SMSTOP_SM_EL0, "xzr")); 591 } 592 593 static inline void sme_smstop(void) 594 { 595 asm volatile(__msr_s(SYS_SVCR_SMSTOP_SMZA_EL0, "xzr")); 596 } 597 598 extern void __init sme_setup(void); 599 600 static inline int sme_max_vl(void) 601 { 602 return vec_max_vl(ARM64_VEC_SME); 603 } 604 605 static inline int sme_max_virtualisable_vl(void) 606 { 607 return vec_max_virtualisable_vl(ARM64_VEC_SME); 608 } 609 610 static inline unsigned int sme_get_vl(void) 611 { 612 unsigned int vl; 613 614 asm volatile( 615 __SME_PREAMBLE 616 " rdsvl %x[vl], #1\n" 617 : [vl] "=r" (vl) 618 ); 619 620 return vl; 621 } 622 623 extern void sme_alloc(struct task_struct *task, bool flush); 624 extern int sme_set_current_vl(unsigned long arg); 625 extern int sme_get_current_vl(void); 626 extern void sme_suspend_exit(void); 627 628 static inline size_t __sme_state_size(unsigned int sme_vl) 629 { 630 size_t size = ZA_SIG_REGS_SIZE(sve_vq_from_vl(sme_vl)); 631 632 if (system_supports_sme2()) 633 size += ZT_SIG_REG_SIZE; 634 635 return size; 636 } 637 638 static inline void __sme_save_za(struct arm64_sme_state *state, unsigned long svl) 639 { 640 /* 641 * The <Wv> argument to LDR/STR (array vector) can only encode W12-W15. 642 * The "Ucj" constraint exists for this, but is only supported by GCC 643 * 14.1.0+ and LLVM 18.1.0+. 644 */ 645 register unsigned int v asm ("w12"); 646 647 instrument_write(state, svl * svl); 648 for (v = 0; v < svl; v++) { 649 void *pav = (void *)state + v * svl; 650 651 asm volatile( 652 __SME_PREAMBLE 653 " str za[%w[v], #0], [%[pav]]\n" 654 : 655 : [v] "r" (v), 656 [pav] "r" (pav) 657 : "memory" 658 ); 659 } 660 } 661 662 static inline void __sme_load_za(const struct arm64_sme_state *state, unsigned long svl) 663 { 664 /* See comment in __sme_save_za */ 665 register unsigned int v asm ("w12"); 666 667 instrument_read(state, svl * svl); 668 for (v = 0; v < svl; v++) { 669 void *pav = (void *)state + v * svl; 670 671 asm volatile( 672 __SME_PREAMBLE 673 " ldr za[%w[v], #0], [%[pav]]\n" 674 : 675 : [v] "r" (v), 676 [pav] "r" (pav) 677 : "memory" 678 ); 679 } 680 } 681 682 static inline void __sme_save_zt(struct arm64_sme_state *state, unsigned long svl) 683 { 684 void *pzt = (void *)state + svl * svl; 685 686 instrument_write(pzt, 64); 687 asm volatile( 688 __DEFINE_ASM_GPR_NUMS 689 /* 690 * STR ZT0, [<Xn|SP>] 691 * Supported by binutils 2.41+. 692 * Supported by LLVM 16+ 693 */ 694 " .inst 0xe13f8000 | ((.L__gpr_num_%[pzt]) << 5)\n" 695 : 696 : [pzt] "r" (pzt) 697 : "memory" 698 ); 699 } 700 701 static inline void __sme_load_zt(const struct arm64_sme_state *state, unsigned long svl) 702 { 703 void *pzt = (void *)state + svl * svl; 704 705 instrument_read(pzt, 64); 706 asm volatile( 707 __DEFINE_ASM_GPR_NUMS 708 /* 709 * LDR ZT0, [<Xn|SP>] 710 * Supported by binutils 2.41+. 711 * Supported by LLVM 16+ 712 */ 713 " .inst 0xe11f8000 | ((.L__gpr_num_%[pzt]) << 5)\n" 714 : 715 : [pzt] "r" (pzt) 716 : "memory" 717 ); 718 } 719 720 static inline void sme_save_state(struct arm64_sme_state *state, bool zt) 721 { 722 unsigned long svl = sme_get_vl(); 723 724 __sme_save_za(state, svl); 725 if (zt) 726 __sme_save_zt(state, svl); 727 } 728 729 static inline void sme_load_state(const struct arm64_sme_state *state, bool zt) 730 { 731 unsigned long svl = sme_get_vl(); 732 733 __sme_load_za(state, svl); 734 if (zt) 735 __sme_load_zt(state, svl); 736 } 737 738 /* 739 * Return how many bytes of memory are required to store the full SME 740 * specific state for task, given task's currently configured vector 741 * length. 742 */ 743 static inline size_t sme_state_size(struct task_struct const *task) 744 { 745 return __sme_state_size(task_get_sme_vl(task)); 746 } 747 748 void sme_enable_dvmsync(void); 749 void sme_set_active(void); 750 void sme_clear_active(void); 751 752 static inline void sme_enter_from_user_mode(void) 753 { 754 if (alternative_has_cap_unlikely(ARM64_WORKAROUND_4193714) && 755 test_thread_flag(TIF_SME)) 756 sme_clear_active(); 757 } 758 759 static inline void sme_exit_to_user_mode(void) 760 { 761 if (alternative_has_cap_unlikely(ARM64_WORKAROUND_4193714) && 762 test_thread_flag(TIF_SME)) 763 sme_set_active(); 764 } 765 766 #else 767 768 static inline void sme_user_disable(void) { BUILD_BUG(); } 769 static inline void sme_user_enable(void) { BUILD_BUG(); } 770 771 static inline void sme_smstart_sm(void) { } 772 static inline void sme_smstop_sm(void) { } 773 static inline void sme_smstop(void) { } 774 775 static inline void sme_alloc(struct task_struct *task, bool flush) { } 776 static inline void sme_setup(void) { } 777 static inline unsigned int sme_get_vl(void) { return 0; } 778 static inline int sme_max_vl(void) { return 0; } 779 static inline int sme_max_virtualisable_vl(void) { return 0; } 780 static inline int sme_set_current_vl(unsigned long arg) { return -EINVAL; } 781 static inline int sme_get_current_vl(void) { return -EINVAL; } 782 static inline void sme_suspend_exit(void) { } 783 784 static inline size_t __sme_state_size(unsigned int sme_vl) 785 { 786 return 0; 787 } 788 789 static inline size_t sme_state_size(struct task_struct const *task) 790 { 791 return 0; 792 } 793 794 static inline void sme_save_state(struct arm64_sme_state *state, bool zt) { BUILD_BUG(); } 795 static inline void sme_load_state(const struct arm64_sme_state *state, bool zt) { BUILD_BUG(); } 796 797 static inline void sme_enter_from_user_mode(void) { } 798 static inline void sme_exit_to_user_mode(void) { } 799 800 #endif /* ! CONFIG_ARM64_SME */ 801 802 /* For use by EFI runtime services calls only */ 803 extern void __efi_fpsimd_begin(void); 804 extern void __efi_fpsimd_end(void); 805 806 #endif 807 808 #endif 809