1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * S390 version 4 * Copyright IBM Corp. 1999 5 * Author(s): Hartmut Penner (hp@de.ibm.com) 6 * Ulrich Weigand (uweigand@de.ibm.com) 7 * 8 * Derived from "arch/i386/mm/fault.c" 9 * Copyright (C) 1995 Linus Torvalds 10 */ 11 12 #include <linux/kernel_stat.h> 13 #include <linux/mmu_context.h> 14 #include <linux/perf_event.h> 15 #include <linux/signal.h> 16 #include <linux/sched.h> 17 #include <linux/sched/debug.h> 18 #include <linux/jump_label.h> 19 #include <linux/kernel.h> 20 #include <linux/errno.h> 21 #include <linux/string.h> 22 #include <linux/types.h> 23 #include <linux/ptrace.h> 24 #include <linux/mman.h> 25 #include <linux/mm.h> 26 #include <linux/compat.h> 27 #include <linux/smp.h> 28 #include <linux/kdebug.h> 29 #include <linux/init.h> 30 #include <linux/console.h> 31 #include <linux/extable.h> 32 #include <linux/hardirq.h> 33 #include <linux/kprobes.h> 34 #include <linux/uaccess.h> 35 #include <linux/hugetlb.h> 36 #include <linux/kfence.h> 37 #include <linux/pagewalk.h> 38 #include <asm/asm-extable.h> 39 #include <asm/asm-offsets.h> 40 #include <asm/ptrace.h> 41 #include <asm/fault.h> 42 #include <asm/diag.h> 43 #include <asm/gmap.h> 44 #include <asm/irq.h> 45 #include <asm/facility.h> 46 #include <asm/uv.h> 47 #include "../kernel/entry.h" 48 49 enum fault_type { 50 KERNEL_FAULT, 51 USER_FAULT, 52 GMAP_FAULT, 53 }; 54 55 static DEFINE_STATIC_KEY_FALSE(have_store_indication); 56 57 static int __init fault_init(void) 58 { 59 if (test_facility(75)) 60 static_branch_enable(&have_store_indication); 61 return 0; 62 } 63 early_initcall(fault_init); 64 65 /* 66 * Find out which address space caused the exception. 67 */ 68 static enum fault_type get_fault_type(struct pt_regs *regs) 69 { 70 union teid teid = { .val = regs->int_parm_long }; 71 struct gmap *gmap; 72 73 if (likely(teid.as == PSW_BITS_AS_PRIMARY)) { 74 if (user_mode(regs)) 75 return USER_FAULT; 76 if (!IS_ENABLED(CONFIG_PGSTE)) 77 return KERNEL_FAULT; 78 gmap = (struct gmap *)get_lowcore()->gmap; 79 if (gmap && gmap->asce == regs->cr1) 80 return GMAP_FAULT; 81 return KERNEL_FAULT; 82 } 83 if (teid.as == PSW_BITS_AS_SECONDARY) 84 return USER_FAULT; 85 /* Access register mode, not used in the kernel */ 86 if (teid.as == PSW_BITS_AS_ACCREG) 87 return USER_FAULT; 88 /* Home space -> access via kernel ASCE */ 89 return KERNEL_FAULT; 90 } 91 92 static unsigned long get_fault_address(struct pt_regs *regs) 93 { 94 union teid teid = { .val = regs->int_parm_long }; 95 96 return teid.addr * PAGE_SIZE; 97 } 98 99 static __always_inline bool fault_is_write(struct pt_regs *regs) 100 { 101 union teid teid = { .val = regs->int_parm_long }; 102 103 if (static_branch_likely(&have_store_indication)) 104 return teid.fsi == TEID_FSI_STORE; 105 return false; 106 } 107 108 static void dump_pagetable(unsigned long asce, unsigned long address) 109 { 110 unsigned long entry, *table = __va(asce & _ASCE_ORIGIN); 111 112 pr_alert("AS:%016lx ", asce); 113 switch (asce & _ASCE_TYPE_MASK) { 114 case _ASCE_TYPE_REGION1: 115 table += (address & _REGION1_INDEX) >> _REGION1_SHIFT; 116 if (get_kernel_nofault(entry, table)) 117 goto bad; 118 pr_cont("R1:%016lx ", entry); 119 if (entry & _REGION_ENTRY_INVALID) 120 goto out; 121 table = __va(entry & _REGION_ENTRY_ORIGIN); 122 fallthrough; 123 case _ASCE_TYPE_REGION2: 124 table += (address & _REGION2_INDEX) >> _REGION2_SHIFT; 125 if (get_kernel_nofault(entry, table)) 126 goto bad; 127 pr_cont("R2:%016lx ", entry); 128 if (entry & _REGION_ENTRY_INVALID) 129 goto out; 130 table = __va(entry & _REGION_ENTRY_ORIGIN); 131 fallthrough; 132 case _ASCE_TYPE_REGION3: 133 table += (address & _REGION3_INDEX) >> _REGION3_SHIFT; 134 if (get_kernel_nofault(entry, table)) 135 goto bad; 136 pr_cont("R3:%016lx ", entry); 137 if (entry & (_REGION_ENTRY_INVALID | _REGION3_ENTRY_LARGE)) 138 goto out; 139 table = __va(entry & _REGION_ENTRY_ORIGIN); 140 fallthrough; 141 case _ASCE_TYPE_SEGMENT: 142 table += (address & _SEGMENT_INDEX) >> _SEGMENT_SHIFT; 143 if (get_kernel_nofault(entry, table)) 144 goto bad; 145 pr_cont("S:%016lx ", entry); 146 if (entry & (_SEGMENT_ENTRY_INVALID | _SEGMENT_ENTRY_LARGE)) 147 goto out; 148 table = __va(entry & _SEGMENT_ENTRY_ORIGIN); 149 } 150 table += (address & _PAGE_INDEX) >> _PAGE_SHIFT; 151 if (get_kernel_nofault(entry, table)) 152 goto bad; 153 pr_cont("P:%016lx ", entry); 154 out: 155 pr_cont("\n"); 156 return; 157 bad: 158 pr_cont("BAD\n"); 159 } 160 161 static void dump_fault_info(struct pt_regs *regs) 162 { 163 union teid teid = { .val = regs->int_parm_long }; 164 unsigned long asce; 165 166 pr_alert("Failing address: %016lx TEID: %016lx\n", 167 get_fault_address(regs), teid.val); 168 pr_alert("Fault in "); 169 switch (teid.as) { 170 case PSW_BITS_AS_HOME: 171 pr_cont("home space "); 172 break; 173 case PSW_BITS_AS_SECONDARY: 174 pr_cont("secondary space "); 175 break; 176 case PSW_BITS_AS_ACCREG: 177 pr_cont("access register "); 178 break; 179 case PSW_BITS_AS_PRIMARY: 180 pr_cont("primary space "); 181 break; 182 } 183 pr_cont("mode while using "); 184 switch (get_fault_type(regs)) { 185 case USER_FAULT: 186 asce = get_lowcore()->user_asce.val; 187 pr_cont("user "); 188 break; 189 case GMAP_FAULT: 190 asce = ((struct gmap *)get_lowcore()->gmap)->asce; 191 pr_cont("gmap "); 192 break; 193 case KERNEL_FAULT: 194 asce = get_lowcore()->kernel_asce.val; 195 pr_cont("kernel "); 196 break; 197 default: 198 unreachable(); 199 } 200 pr_cont("ASCE.\n"); 201 dump_pagetable(asce, get_fault_address(regs)); 202 } 203 204 int show_unhandled_signals = 1; 205 206 void report_user_fault(struct pt_regs *regs, long signr, int is_mm_fault) 207 { 208 static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); 209 210 if ((task_pid_nr(current) > 1) && !show_unhandled_signals) 211 return; 212 if (!unhandled_signal(current, signr)) 213 return; 214 if (!__ratelimit(&rs)) 215 return; 216 pr_alert("User process fault: interruption code %04x ilc:%d ", 217 regs->int_code & 0xffff, regs->int_code >> 17); 218 print_vma_addr(KERN_CONT "in ", regs->psw.addr); 219 pr_cont("\n"); 220 if (is_mm_fault) 221 dump_fault_info(regs); 222 show_regs(regs); 223 } 224 225 static void do_sigsegv(struct pt_regs *regs, int si_code) 226 { 227 report_user_fault(regs, SIGSEGV, 1); 228 force_sig_fault(SIGSEGV, si_code, (void __user *)get_fault_address(regs)); 229 } 230 231 static void handle_fault_error_nolock(struct pt_regs *regs, int si_code) 232 { 233 enum fault_type fault_type; 234 unsigned long address; 235 bool is_write; 236 237 if (user_mode(regs)) { 238 if (WARN_ON_ONCE(!si_code)) 239 si_code = SEGV_MAPERR; 240 return do_sigsegv(regs, si_code); 241 } 242 if (fixup_exception(regs)) 243 return; 244 fault_type = get_fault_type(regs); 245 if (fault_type == KERNEL_FAULT) { 246 address = get_fault_address(regs); 247 is_write = fault_is_write(regs); 248 if (kfence_handle_page_fault(address, is_write, regs)) 249 return; 250 } 251 if (fault_type == KERNEL_FAULT) 252 pr_alert("Unable to handle kernel pointer dereference in virtual kernel address space\n"); 253 else 254 pr_alert("Unable to handle kernel paging request in virtual user address space\n"); 255 dump_fault_info(regs); 256 die(regs, "Oops"); 257 } 258 259 static void handle_fault_error(struct pt_regs *regs, int si_code) 260 { 261 struct mm_struct *mm = current->mm; 262 263 mmap_read_unlock(mm); 264 handle_fault_error_nolock(regs, si_code); 265 } 266 267 static void do_sigbus(struct pt_regs *regs) 268 { 269 force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)get_fault_address(regs)); 270 } 271 272 /* 273 * This routine handles page faults. It determines the address, 274 * and the problem, and then passes it off to one of the appropriate 275 * routines. 276 * 277 * interruption code (int_code): 278 * 04 Protection -> Write-Protection (suppression) 279 * 10 Segment translation -> Not present (nullification) 280 * 11 Page translation -> Not present (nullification) 281 * 3b Region third trans. -> Not present (nullification) 282 */ 283 static void do_exception(struct pt_regs *regs, int access) 284 { 285 struct vm_area_struct *vma; 286 unsigned long address; 287 struct mm_struct *mm; 288 enum fault_type type; 289 unsigned int flags; 290 struct gmap *gmap; 291 vm_fault_t fault; 292 bool is_write; 293 294 /* 295 * The instruction that caused the program check has 296 * been nullified. Don't signal single step via SIGTRAP. 297 */ 298 clear_thread_flag(TIF_PER_TRAP); 299 if (kprobe_page_fault(regs, 14)) 300 return; 301 mm = current->mm; 302 address = get_fault_address(regs); 303 is_write = fault_is_write(regs); 304 type = get_fault_type(regs); 305 switch (type) { 306 case KERNEL_FAULT: 307 return handle_fault_error_nolock(regs, 0); 308 case USER_FAULT: 309 case GMAP_FAULT: 310 if (faulthandler_disabled() || !mm) 311 return handle_fault_error_nolock(regs, 0); 312 break; 313 } 314 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); 315 flags = FAULT_FLAG_DEFAULT; 316 if (user_mode(regs)) 317 flags |= FAULT_FLAG_USER; 318 if (is_write) 319 access = VM_WRITE; 320 if (access == VM_WRITE) 321 flags |= FAULT_FLAG_WRITE; 322 if (!(flags & FAULT_FLAG_USER)) 323 goto lock_mmap; 324 vma = lock_vma_under_rcu(mm, address); 325 if (!vma) 326 goto lock_mmap; 327 if (!(vma->vm_flags & access)) { 328 vma_end_read(vma); 329 count_vm_vma_lock_event(VMA_LOCK_SUCCESS); 330 return handle_fault_error_nolock(regs, SEGV_ACCERR); 331 } 332 fault = handle_mm_fault(vma, address, flags | FAULT_FLAG_VMA_LOCK, regs); 333 if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED))) 334 vma_end_read(vma); 335 if (!(fault & VM_FAULT_RETRY)) { 336 count_vm_vma_lock_event(VMA_LOCK_SUCCESS); 337 if (unlikely(fault & VM_FAULT_ERROR)) 338 goto error; 339 return; 340 } 341 count_vm_vma_lock_event(VMA_LOCK_RETRY); 342 if (fault & VM_FAULT_MAJOR) 343 flags |= FAULT_FLAG_TRIED; 344 345 /* Quick path to respond to signals */ 346 if (fault_signal_pending(fault, regs)) { 347 if (!user_mode(regs)) 348 handle_fault_error_nolock(regs, 0); 349 return; 350 } 351 lock_mmap: 352 mmap_read_lock(mm); 353 gmap = NULL; 354 if (IS_ENABLED(CONFIG_PGSTE) && type == GMAP_FAULT) { 355 gmap = (struct gmap *)get_lowcore()->gmap; 356 current->thread.gmap_addr = address; 357 current->thread.gmap_write_flag = !!(flags & FAULT_FLAG_WRITE); 358 current->thread.gmap_int_code = regs->int_code & 0xffff; 359 address = __gmap_translate(gmap, address); 360 if (address == -EFAULT) 361 return handle_fault_error(regs, SEGV_MAPERR); 362 if (gmap->pfault_enabled) 363 flags |= FAULT_FLAG_RETRY_NOWAIT; 364 } 365 retry: 366 vma = find_vma(mm, address); 367 if (!vma) 368 return handle_fault_error(regs, SEGV_MAPERR); 369 if (unlikely(vma->vm_start > address)) { 370 if (!(vma->vm_flags & VM_GROWSDOWN)) 371 return handle_fault_error(regs, SEGV_MAPERR); 372 vma = expand_stack(mm, address); 373 if (!vma) 374 return handle_fault_error_nolock(regs, SEGV_MAPERR); 375 } 376 if (unlikely(!(vma->vm_flags & access))) 377 return handle_fault_error(regs, SEGV_ACCERR); 378 fault = handle_mm_fault(vma, address, flags, regs); 379 if (fault_signal_pending(fault, regs)) { 380 if (flags & FAULT_FLAG_RETRY_NOWAIT) 381 mmap_read_unlock(mm); 382 if (!user_mode(regs)) 383 handle_fault_error_nolock(regs, 0); 384 return; 385 } 386 /* The fault is fully completed (including releasing mmap lock) */ 387 if (fault & VM_FAULT_COMPLETED) { 388 if (gmap) { 389 mmap_read_lock(mm); 390 goto gmap; 391 } 392 return; 393 } 394 if (unlikely(fault & VM_FAULT_ERROR)) { 395 mmap_read_unlock(mm); 396 goto error; 397 } 398 if (fault & VM_FAULT_RETRY) { 399 if (IS_ENABLED(CONFIG_PGSTE) && gmap && (flags & FAULT_FLAG_RETRY_NOWAIT)) { 400 /* 401 * FAULT_FLAG_RETRY_NOWAIT has been set, 402 * mmap_lock has not been released 403 */ 404 current->thread.gmap_pfault = 1; 405 return handle_fault_error(regs, 0); 406 } 407 flags &= ~FAULT_FLAG_RETRY_NOWAIT; 408 flags |= FAULT_FLAG_TRIED; 409 mmap_read_lock(mm); 410 goto retry; 411 } 412 gmap: 413 if (IS_ENABLED(CONFIG_PGSTE) && gmap) { 414 address = __gmap_link(gmap, current->thread.gmap_addr, 415 address); 416 if (address == -EFAULT) 417 return handle_fault_error(regs, SEGV_MAPERR); 418 if (address == -ENOMEM) { 419 fault = VM_FAULT_OOM; 420 mmap_read_unlock(mm); 421 goto error; 422 } 423 } 424 mmap_read_unlock(mm); 425 return; 426 error: 427 if (fault & VM_FAULT_OOM) { 428 if (!user_mode(regs)) 429 handle_fault_error_nolock(regs, 0); 430 else 431 pagefault_out_of_memory(); 432 } else if (fault & VM_FAULT_SIGSEGV) { 433 if (!user_mode(regs)) 434 handle_fault_error_nolock(regs, 0); 435 else 436 do_sigsegv(regs, SEGV_MAPERR); 437 } else if (fault & (VM_FAULT_SIGBUS | VM_FAULT_HWPOISON)) { 438 if (!user_mode(regs)) 439 handle_fault_error_nolock(regs, 0); 440 else 441 do_sigbus(regs); 442 } else { 443 pr_emerg("Unexpected fault flags: %08x\n", fault); 444 BUG(); 445 } 446 } 447 448 void do_protection_exception(struct pt_regs *regs) 449 { 450 union teid teid = { .val = regs->int_parm_long }; 451 452 /* 453 * Protection exceptions are suppressing, decrement psw address. 454 * The exception to this rule are aborted transactions, for these 455 * the PSW already points to the correct location. 456 */ 457 if (!(regs->int_code & 0x200)) 458 regs->psw.addr = __rewind_psw(regs->psw, regs->int_code >> 16); 459 /* 460 * Check for low-address protection. This needs to be treated 461 * as a special case because the translation exception code 462 * field is not guaranteed to contain valid data in this case. 463 */ 464 if (unlikely(!teid.b61)) { 465 if (user_mode(regs)) { 466 /* Low-address protection in user mode: cannot happen */ 467 die(regs, "Low-address protection"); 468 } 469 /* 470 * Low-address protection in kernel mode means 471 * NULL pointer write access in kernel mode. 472 */ 473 return handle_fault_error_nolock(regs, 0); 474 } 475 if (unlikely(MACHINE_HAS_NX && teid.b56)) { 476 regs->int_parm_long = (teid.addr * PAGE_SIZE) | (regs->psw.addr & PAGE_MASK); 477 return handle_fault_error_nolock(regs, SEGV_ACCERR); 478 } 479 do_exception(regs, VM_WRITE); 480 } 481 NOKPROBE_SYMBOL(do_protection_exception); 482 483 void do_dat_exception(struct pt_regs *regs) 484 { 485 do_exception(regs, VM_ACCESS_FLAGS); 486 } 487 NOKPROBE_SYMBOL(do_dat_exception); 488 489 #if IS_ENABLED(CONFIG_PGSTE) 490 491 void do_secure_storage_access(struct pt_regs *regs) 492 { 493 union teid teid = { .val = regs->int_parm_long }; 494 unsigned long addr = get_fault_address(regs); 495 struct vm_area_struct *vma; 496 struct folio_walk fw; 497 struct mm_struct *mm; 498 struct folio *folio; 499 struct gmap *gmap; 500 int rc; 501 502 /* 503 * Bit 61 indicates if the address is valid, if it is not the 504 * kernel should be stopped or SIGSEGV should be sent to the 505 * process. Bit 61 is not reliable without the misc UV feature, 506 * therefore this needs to be checked too. 507 */ 508 if (uv_has_feature(BIT_UV_FEAT_MISC) && !teid.b61) { 509 /* 510 * When this happens, userspace did something that it 511 * was not supposed to do, e.g. branching into secure 512 * memory. Trigger a segmentation fault. 513 */ 514 if (user_mode(regs)) { 515 send_sig(SIGSEGV, current, 0); 516 return; 517 } 518 /* 519 * The kernel should never run into this case and 520 * there is no way out of this situation. 521 */ 522 panic("Unexpected PGM 0x3d with TEID bit 61=0"); 523 } 524 switch (get_fault_type(regs)) { 525 case GMAP_FAULT: 526 mm = current->mm; 527 gmap = (struct gmap *)get_lowcore()->gmap; 528 mmap_read_lock(mm); 529 addr = __gmap_translate(gmap, addr); 530 mmap_read_unlock(mm); 531 if (IS_ERR_VALUE(addr)) 532 return handle_fault_error_nolock(regs, SEGV_MAPERR); 533 fallthrough; 534 case USER_FAULT: 535 mm = current->mm; 536 mmap_read_lock(mm); 537 vma = find_vma(mm, addr); 538 if (!vma) 539 return handle_fault_error(regs, SEGV_MAPERR); 540 folio = folio_walk_start(&fw, vma, addr, 0); 541 if (!folio) { 542 mmap_read_unlock(mm); 543 break; 544 } 545 /* arch_make_folio_accessible() needs a raised refcount. */ 546 folio_get(folio); 547 rc = arch_make_folio_accessible(folio); 548 folio_put(folio); 549 folio_walk_end(&fw, vma); 550 if (rc) 551 send_sig(SIGSEGV, current, 0); 552 mmap_read_unlock(mm); 553 break; 554 case KERNEL_FAULT: 555 folio = phys_to_folio(addr); 556 if (unlikely(!folio_try_get(folio))) 557 break; 558 rc = arch_make_folio_accessible(folio); 559 folio_put(folio); 560 if (rc) 561 BUG(); 562 break; 563 default: 564 unreachable(); 565 } 566 } 567 NOKPROBE_SYMBOL(do_secure_storage_access); 568 569 void do_non_secure_storage_access(struct pt_regs *regs) 570 { 571 struct gmap *gmap = (struct gmap *)get_lowcore()->gmap; 572 unsigned long gaddr = get_fault_address(regs); 573 574 if (WARN_ON_ONCE(get_fault_type(regs) != GMAP_FAULT)) 575 return handle_fault_error_nolock(regs, SEGV_MAPERR); 576 if (gmap_convert_to_secure(gmap, gaddr) == -EINVAL) 577 send_sig(SIGSEGV, current, 0); 578 } 579 NOKPROBE_SYMBOL(do_non_secure_storage_access); 580 581 void do_secure_storage_violation(struct pt_regs *regs) 582 { 583 struct gmap *gmap = (struct gmap *)get_lowcore()->gmap; 584 unsigned long gaddr = get_fault_address(regs); 585 586 /* 587 * If the VM has been rebooted, its address space might still contain 588 * secure pages from the previous boot. 589 * Clear the page so it can be reused. 590 */ 591 if (!gmap_destroy_page(gmap, gaddr)) 592 return; 593 /* 594 * Either KVM messed up the secure guest mapping or the same 595 * page is mapped into multiple secure guests. 596 * 597 * This exception is only triggered when a guest 2 is running 598 * and can therefore never occur in kernel context. 599 */ 600 pr_warn_ratelimited("Secure storage violation in task: %s, pid %d\n", 601 current->comm, current->pid); 602 send_sig(SIGSEGV, current, 0); 603 } 604 605 #endif /* CONFIG_PGSTE */ 606