1 /* SPDX-License-Identifier: GPL-2.0 */ 2 3 #ifndef _ASM_PPC_ENTRY_COMMON_H 4 #define _ASM_PPC_ENTRY_COMMON_H 5 6 #include <asm/cputime.h> 7 #include <asm/interrupt.h> 8 #include <asm/runlatch.h> 9 #include <asm/stacktrace.h> 10 #include <asm/switch_to.h> 11 #include <asm/tm.h> 12 13 #ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG 14 /* 15 * WARN/BUG is handled with a program interrupt so minimise checks here to 16 * avoid recursion and maximise the chance of getting the first oops handled. 17 */ 18 #define INT_SOFT_MASK_BUG_ON(regs, cond) \ 19 do { \ 20 if ((user_mode(regs) || (TRAP(regs) != INTERRUPT_PROGRAM))) \ 21 BUG_ON(cond); \ 22 } while (0) 23 #else 24 #define INT_SOFT_MASK_BUG_ON(regs, cond) 25 #endif 26 27 #ifdef CONFIG_PPC_BOOK3S_64 28 extern char __end_soft_masked[]; 29 bool search_kernel_soft_mask_table(unsigned long addr); 30 unsigned long search_kernel_restart_table(unsigned long addr); 31 32 DECLARE_STATIC_KEY_FALSE(interrupt_exit_not_reentrant); 33 34 static inline bool is_implicit_soft_masked(struct pt_regs *regs) 35 { 36 if (user_mode(regs)) 37 return false; 38 39 if (regs->nip >= (unsigned long)__end_soft_masked) 40 return false; 41 42 return search_kernel_soft_mask_table(regs->nip); 43 } 44 45 static inline void srr_regs_clobbered(void) 46 { 47 local_paca->srr_valid = 0; 48 local_paca->hsrr_valid = 0; 49 } 50 #else 51 static inline unsigned long search_kernel_restart_table(unsigned long addr) 52 { 53 return 0; 54 } 55 56 static inline bool is_implicit_soft_masked(struct pt_regs *regs) 57 { 58 return false; 59 } 60 61 static inline void srr_regs_clobbered(void) 62 { 63 } 64 #endif 65 66 static inline void nap_adjust_return(struct pt_regs *regs) 67 { 68 #ifdef CONFIG_PPC_970_NAP 69 if (unlikely(test_thread_local_flags(_TLF_NAPPING))) { 70 /* Can avoid a test-and-clear because NMIs do not call this */ 71 clear_thread_local_flags(_TLF_NAPPING); 72 regs_set_return_ip(regs, (unsigned long)power4_idle_nap_return); 73 } 74 #endif 75 } 76 77 static __always_inline void booke_load_dbcr0(void) 78 { 79 #ifdef CONFIG_PPC_ADV_DEBUG_REGS 80 unsigned long dbcr0 = current->thread.debug.dbcr0; 81 82 if (likely(!(dbcr0 & DBCR0_IDM))) 83 return; 84 85 /* 86 * Check to see if the dbcr0 register is set up to debug. 87 * Use the internal debug mode bit to do this. 88 */ 89 mtmsr(mfmsr() & ~MSR_DE); 90 if (IS_ENABLED(CONFIG_PPC32)) { 91 isync(); 92 global_dbcr0[smp_processor_id()] = mfspr(SPRN_DBCR0); 93 } 94 mtspr(SPRN_DBCR0, dbcr0); 95 mtspr(SPRN_DBSR, -1); 96 #endif 97 } 98 99 static inline void booke_restore_dbcr0(void) 100 { 101 #ifdef CONFIG_PPC_ADV_DEBUG_REGS 102 unsigned long dbcr0 = current->thread.debug.dbcr0; 103 104 if (IS_ENABLED(CONFIG_PPC32) && unlikely(dbcr0 & DBCR0_IDM)) { 105 mtspr(SPRN_DBSR, -1); 106 mtspr(SPRN_DBCR0, global_dbcr0[smp_processor_id()]); 107 } 108 #endif 109 } 110 111 static inline void check_return_regs_valid(struct pt_regs *regs) 112 { 113 #ifdef CONFIG_PPC_BOOK3S_64 114 unsigned long trap, srr0, srr1; 115 static bool warned; 116 u8 *validp; 117 char *h; 118 119 if (trap_is_scv(regs)) 120 return; 121 122 trap = TRAP(regs); 123 // EE in HV mode sets HSRRs like 0xea0 124 if (cpu_has_feature(CPU_FTR_HVMODE) && trap == INTERRUPT_EXTERNAL) 125 trap = 0xea0; 126 127 switch (trap) { 128 case 0x980: 129 case INTERRUPT_H_DATA_STORAGE: 130 case 0xe20: 131 case 0xe40: 132 case INTERRUPT_HMI: 133 case 0xe80: 134 case 0xea0: 135 case INTERRUPT_H_FAC_UNAVAIL: 136 case 0x1200: 137 case 0x1500: 138 case 0x1600: 139 case 0x1800: 140 validp = &local_paca->hsrr_valid; 141 if (!READ_ONCE(*validp)) 142 return; 143 144 srr0 = mfspr(SPRN_HSRR0); 145 srr1 = mfspr(SPRN_HSRR1); 146 h = "H"; 147 148 break; 149 default: 150 validp = &local_paca->srr_valid; 151 if (!READ_ONCE(*validp)) 152 return; 153 154 srr0 = mfspr(SPRN_SRR0); 155 srr1 = mfspr(SPRN_SRR1); 156 h = ""; 157 break; 158 } 159 160 if (srr0 == regs->nip && srr1 == regs->msr) 161 return; 162 163 /* 164 * A NMI / soft-NMI interrupt may have come in after we found 165 * srr_valid and before the SRRs are loaded. The interrupt then 166 * comes in and clobbers SRRs and clears srr_valid. Then we load 167 * the SRRs here and test them above and find they don't match. 168 * 169 * Test validity again after that, to catch such false positives. 170 * 171 * This test in general will have some window for false negatives 172 * and may not catch and fix all such cases if an NMI comes in 173 * later and clobbers SRRs without clearing srr_valid, but hopefully 174 * such things will get caught most of the time, statistically 175 * enough to be able to get a warning out. 176 */ 177 if (!READ_ONCE(*validp)) 178 return; 179 180 if (!data_race(warned)) { 181 data_race(warned = true); 182 pr_warn("%sSRR0 was: %lx should be: %lx\n", h, srr0, regs->nip); 183 pr_warn("%sSRR1 was: %lx should be: %lx\n", h, srr1, regs->msr); 184 show_regs(regs); 185 } 186 187 WRITE_ONCE(*validp, 0); /* fixup */ 188 #endif 189 } 190 191 static inline void arch_interrupt_enter_prepare(struct pt_regs *regs) 192 { 193 #ifdef CONFIG_PPC64 194 irq_soft_mask_set(IRQS_ALL_DISABLED); 195 196 /* 197 * If the interrupt was taken with HARD_DIS clear, then enable MSR[EE]. 198 * Asynchronous interrupts get here with HARD_DIS set (see below), so 199 * this enables MSR[EE] for synchronous interrupts. IRQs remain 200 * soft-masked. The interrupt handler may later call 201 * interrupt_cond_local_irq_enable() to achieve a regular process 202 * context. 203 */ 204 if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) { 205 INT_SOFT_MASK_BUG_ON(regs, !(regs->msr & MSR_EE)); 206 __hard_irq_enable(); 207 } else { 208 __hard_RI_enable(); 209 } 210 /* Enable MSR[RI] early, to support kernel SLB and hash faults */ 211 #endif 212 213 if (!regs_irqs_disabled(regs)) 214 trace_hardirqs_off(); 215 216 if (user_mode(regs)) { 217 kuap_lock(); 218 account_cpu_user_entry(); 219 account_stolen_time(); 220 } else { 221 kuap_save_and_lock(regs); 222 /* 223 * CT_WARN_ON comes here via program_check_exception, 224 * so avoid recursion. 225 */ 226 if (TRAP(regs) != INTERRUPT_PROGRAM) 227 CT_WARN_ON(ct_state() != CT_STATE_KERNEL && 228 ct_state() != CT_STATE_IDLE); 229 INT_SOFT_MASK_BUG_ON(regs, is_implicit_soft_masked(regs)); 230 INT_SOFT_MASK_BUG_ON(regs, regs_irqs_disabled(regs) && 231 search_kernel_restart_table(regs->nip)); 232 } 233 INT_SOFT_MASK_BUG_ON(regs, !regs_irqs_disabled(regs) && 234 !(regs->msr & MSR_EE)); 235 236 booke_restore_dbcr0(); 237 } 238 239 /* 240 * Care should be taken to note that arch_interrupt_exit_prepare and 241 * arch_interrupt_async_exit_prepare do not necessarily return immediately to 242 * regs context (e.g., if regs is usermode, we don't necessarily return to 243 * user mode). Other interrupts might be taken between here and return, 244 * context switch / preemption may occur in the exit path after this, or a 245 * signal may be delivered, etc. 246 * 247 * The real interrupt exit code is platform specific, e.g., 248 * interrupt_exit_user_prepare / interrupt_exit_kernel_prepare for 64s. 249 * 250 * However arch_interrupt_nmi_exit_prepare does return directly to regs, because 251 * NMIs do not do "exit work" or replay soft-masked interrupts. 252 */ 253 static inline void arch_interrupt_exit_prepare(struct pt_regs *regs) 254 { 255 if (user_mode(regs)) { 256 BUG_ON(regs_is_unrecoverable(regs)); 257 BUG_ON(regs_irqs_disabled(regs)); 258 /* 259 * We don't need to restore AMR on the way back to userspace for KUAP. 260 * AMR can only have been unlocked if we interrupted the kernel. 261 */ 262 kuap_assert_locked(); 263 } 264 265 /* irqentry_exit expects to be called with interrupts disabled */ 266 local_irq_disable(); 267 } 268 269 static inline void arch_interrupt_async_enter_prepare(struct pt_regs *regs) 270 { 271 #ifdef CONFIG_PPC64 272 /* Ensure arch_interrupt_enter_prepare does not enable MSR[EE] */ 273 local_paca->irq_happened |= PACA_IRQ_HARD_DIS; 274 #endif 275 arch_interrupt_enter_prepare(regs); 276 #ifdef CONFIG_PPC_BOOK3S_64 277 /* 278 * RI=1 is set by arch_interrupt_enter_prepare, so this thread flags access 279 * has to come afterward (it can cause SLB faults). 280 */ 281 if (cpu_has_feature(CPU_FTR_CTRL) && 282 !test_thread_local_flags(_TLF_RUNLATCH)) 283 __ppc64_runlatch_on(); 284 #endif 285 } 286 287 static inline void arch_interrupt_async_exit_prepare(struct pt_regs *regs) 288 { 289 /* 290 * Adjust at exit so the main handler sees the true NIA. This must 291 * come before irq_exit() because irq_exit can enable interrupts, and 292 * if another interrupt is taken before nap_adjust_return has run 293 * here, then that interrupt would return directly to idle nap return. 294 */ 295 nap_adjust_return(regs); 296 297 arch_interrupt_exit_prepare(regs); 298 } 299 300 struct interrupt_nmi_state { 301 #ifdef CONFIG_PPC64 302 u8 irq_soft_mask; 303 u8 irq_happened; 304 u8 ftrace_enabled; 305 u64 softe; 306 #endif 307 }; 308 309 static inline bool nmi_disables_ftrace(struct pt_regs *regs) 310 { 311 /* Allow DEC and PMI to be traced when they are soft-NMI */ 312 if (IS_ENABLED(CONFIG_PPC_BOOK3S_64)) { 313 if (TRAP(regs) == INTERRUPT_DECREMENTER) 314 return false; 315 if (TRAP(regs) == INTERRUPT_PERFMON) 316 return false; 317 } 318 if (IS_ENABLED(CONFIG_PPC_BOOK3E_64)) { 319 if (TRAP(regs) == INTERRUPT_PERFMON) 320 return false; 321 } 322 323 return true; 324 } 325 326 static inline void arch_interrupt_nmi_enter_prepare(struct pt_regs *regs, 327 struct interrupt_nmi_state *state) 328 { 329 #ifdef CONFIG_PPC64 330 state->irq_soft_mask = local_paca->irq_soft_mask; 331 state->irq_happened = local_paca->irq_happened; 332 state->softe = regs->softe; 333 334 /* 335 * Set IRQS_ALL_DISABLED unconditionally so irqs_disabled() does 336 * the right thing, and set IRQ_HARD_DIS. We do not want to reconcile 337 * because that goes through irq tracing which we don't want in NMI. 338 */ 339 local_paca->irq_soft_mask = IRQS_ALL_DISABLED; 340 local_paca->irq_happened |= PACA_IRQ_HARD_DIS; 341 342 if (!(regs->msr & MSR_EE) || is_implicit_soft_masked(regs)) { 343 /* 344 * Adjust regs->softe to be soft-masked if it had not been 345 * reconcied (e.g., interrupt entry with MSR[EE]=0 but softe 346 * not yet set disabled), or if it was in an implicit soft 347 * masked state. This makes regs_irqs_disabled(regs) 348 * behave as expected. 349 */ 350 regs->softe = IRQS_ALL_DISABLED; 351 } 352 353 __hard_RI_enable(); 354 355 /* Don't do any per-CPU operations until interrupt state is fixed */ 356 357 if (nmi_disables_ftrace(regs)) { 358 state->ftrace_enabled = this_cpu_get_ftrace_enabled(); 359 this_cpu_set_ftrace_enabled(0); 360 } 361 #endif 362 } 363 364 static inline void arch_interrupt_nmi_exit_prepare(struct pt_regs *regs, 365 struct interrupt_nmi_state *state) 366 { 367 /* 368 * nmi does not call nap_adjust_return because nmi should not create 369 * new work to do (must use irq_work for that). 370 */ 371 372 #ifdef CONFIG_PPC64 373 #ifdef CONFIG_PPC_BOOK3S 374 if (regs_irqs_disabled(regs)) { 375 unsigned long rst = search_kernel_restart_table(regs->nip); 376 377 if (rst) 378 regs_set_return_ip(regs, rst); 379 } 380 #endif 381 382 if (nmi_disables_ftrace(regs)) 383 this_cpu_set_ftrace_enabled(state->ftrace_enabled); 384 385 /* Check we didn't change the pending interrupt mask. */ 386 WARN_ON_ONCE((state->irq_happened | PACA_IRQ_HARD_DIS) != local_paca->irq_happened); 387 regs->softe = state->softe; 388 local_paca->irq_happened = state->irq_happened; 389 local_paca->irq_soft_mask = state->irq_soft_mask; 390 #endif 391 } 392 393 static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs) 394 { 395 kuap_lock(); 396 397 if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) 398 BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED); 399 400 BUG_ON(regs_is_unrecoverable(regs)); 401 BUG_ON(!user_mode(regs)); 402 BUG_ON(regs_irqs_disabled(regs)); 403 404 #ifdef CONFIG_PPC_PKEY 405 if (mmu_has_feature(MMU_FTR_PKEY) && trap_is_syscall(regs)) { 406 unsigned long amr, iamr; 407 bool flush_needed = false; 408 /* 409 * When entering from userspace we mostly have the AMR/IAMR 410 * different from kernel default values. Hence don't compare. 411 */ 412 amr = mfspr(SPRN_AMR); 413 iamr = mfspr(SPRN_IAMR); 414 regs->amr = amr; 415 regs->iamr = iamr; 416 if (mmu_has_feature(MMU_FTR_KUAP)) { 417 mtspr(SPRN_AMR, AMR_KUAP_BLOCKED); 418 flush_needed = true; 419 } 420 if (mmu_has_feature(MMU_FTR_BOOK3S_KUEP)) { 421 mtspr(SPRN_IAMR, AMR_KUEP_BLOCKED); 422 flush_needed = true; 423 } 424 if (flush_needed) 425 isync(); 426 } 427 #endif 428 kuap_assert_locked(); 429 booke_restore_dbcr0(); 430 account_cpu_user_entry(); 431 account_stolen_time(); 432 433 /* 434 * This is not required for the syscall exit path, but makes the 435 * stack frame look nicer. If this was initialised in the first stack 436 * frame, or if the unwinder was taught the first stack frame always 437 * returns to user with IRQS_ENABLED, this store could be avoided! 438 */ 439 irq_soft_mask_regs_set_state(regs, IRQS_ENABLED); 440 441 /* 442 * If system call is called with TM active, set _TIF_RESTOREALL to 443 * prevent RFSCV being used to return to userspace, because POWER9 444 * TM implementation has problems with this instruction returning to 445 * transactional state. Final register values are not relevant because 446 * the transaction will be aborted upon return anyway. Or in the case 447 * of unsupported_scv SIGILL fault, the return state does not much 448 * matter because it's an edge case. 449 */ 450 if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) && 451 unlikely(MSR_TM_TRANSACTIONAL(regs->msr))) 452 set_bits(_TIF_RESTOREALL, ¤t_thread_info()->flags); 453 454 /* 455 * If the system call was made with a transaction active, doom it and 456 * return without performing the system call. Unless it was an 457 * unsupported scv vector, in which case it's treated like an illegal 458 * instruction. 459 */ 460 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 461 if (unlikely(MSR_TM_TRANSACTIONAL(regs->msr)) && 462 !trap_is_unsupported_scv(regs)) { 463 /* Enable TM in the kernel, and disable EE (for scv) */ 464 hard_irq_disable(); 465 mtmsr(mfmsr() | MSR_TM); 466 467 /* tabort, this dooms the transaction, nothing else */ 468 asm volatile(".long 0x7c00071d | ((%0) << 16)" 469 :: "r"(TM_CAUSE_SYSCALL | TM_CAUSE_PERSISTENT)); 470 471 /* 472 * Userspace will never see the return value. Execution will 473 * resume after the tbegin. of the aborted transaction with the 474 * checkpointed register state. A context switch could occur 475 * or signal delivered to the process before resuming the 476 * doomed transaction context, but that should all be handled 477 * as expected. 478 */ 479 return; 480 } 481 #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ 482 } 483 484 #define arch_enter_from_user_mode arch_enter_from_user_mode 485 486 static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs, 487 unsigned long ti_work) 488 { 489 unsigned long mathflags; 490 491 if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && IS_ENABLED(CONFIG_PPC_FPU)) { 492 if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) && 493 unlikely((ti_work & _TIF_RESTORE_TM))) { 494 restore_tm_state(regs); 495 } else { 496 mathflags = MSR_FP; 497 498 if (cpu_has_feature(CPU_FTR_VSX)) 499 mathflags |= MSR_VEC | MSR_VSX; 500 else if (cpu_has_feature(CPU_FTR_ALTIVEC)) 501 mathflags |= MSR_VEC; 502 503 /* 504 * If userspace MSR has all available FP bits set, 505 * then they are live and no need to restore. If not, 506 * it means the regs were given up and restore_math 507 * may decide to restore them (to avoid taking an FP 508 * fault). 509 */ 510 if ((regs->msr & mathflags) != mathflags) 511 restore_math(regs); 512 } 513 } 514 515 check_return_regs_valid(regs); 516 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 517 local_paca->tm_scratch = regs->msr; 518 #endif 519 /* Restore user access locks last */ 520 kuap_user_restore(regs); 521 } 522 523 #define arch_exit_to_user_mode_prepare arch_exit_to_user_mode_prepare 524 525 static __always_inline void arch_exit_to_user_mode(void) 526 { 527 booke_load_dbcr0(); 528 529 account_cpu_user_exit(); 530 } 531 532 #define arch_exit_to_user_mode arch_exit_to_user_mode 533 534 #endif /* _ASM_PPC_ENTRY_COMMON_H */ 535