1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * linux/arch/arm/mm/fault.c 4 * 5 * Copyright (C) 1995 Linus Torvalds 6 * Modifications for ARM processor (c) 1995-2004 Russell King 7 */ 8 #include <linux/extable.h> 9 #include <linux/signal.h> 10 #include <linux/mm.h> 11 #include <linux/hardirq.h> 12 #include <linux/init.h> 13 #include <linux/kprobes.h> 14 #include <linux/uaccess.h> 15 #include <linux/page-flags.h> 16 #include <linux/sched/signal.h> 17 #include <linux/sched/debug.h> 18 #include <linux/highmem.h> 19 #include <linux/perf_event.h> 20 #include <linux/kfence.h> 21 22 #include <asm/system_misc.h> 23 #include <asm/system_info.h> 24 #include <asm/tlbflush.h> 25 26 #include "fault.h" 27 28 #ifdef CONFIG_MMU 29 30 bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size) 31 { 32 unsigned long addr = (unsigned long)unsafe_src; 33 34 return addr >= TASK_SIZE && ULONG_MAX - addr >= size; 35 } 36 37 /* 38 * This is useful to dump out the page tables associated with 39 * 'addr' in mm 'mm'. 40 */ 41 void show_pte(const char *lvl, struct mm_struct *mm, unsigned long addr) 42 { 43 pgd_t *pgd; 44 45 if (!mm) 46 mm = &init_mm; 47 48 pgd = pgd_offset(mm, addr); 49 printk("%s[%08lx] *pgd=%08llx", lvl, addr, (long long)pgd_val(*pgd)); 50 51 do { 52 p4d_t *p4d; 53 pud_t *pud; 54 pmd_t *pmd; 55 pte_t *pte; 56 57 p4d = p4d_offset(pgd, addr); 58 if (p4d_none(*p4d)) 59 break; 60 61 if (p4d_bad(*p4d)) { 62 pr_cont("(bad)"); 63 break; 64 } 65 66 pud = pud_offset(p4d, addr); 67 if (PTRS_PER_PUD != 1) 68 pr_cont(", *pud=%08llx", (long long)pud_val(*pud)); 69 70 if (pud_none(*pud)) 71 break; 72 73 if (pud_bad(*pud)) { 74 pr_cont("(bad)"); 75 break; 76 } 77 78 pmd = pmd_offset(pud, addr); 79 if (PTRS_PER_PMD != 1) 80 pr_cont(", *pmd=%08llx", (long long)pmd_val(*pmd)); 81 82 if (pmd_none(*pmd)) 83 break; 84 85 if (pmd_bad(*pmd)) { 86 pr_cont("(bad)"); 87 break; 88 } 89 90 /* We must not map this if we have highmem enabled */ 91 if (PageHighMem(pfn_to_page(pmd_val(*pmd) >> PAGE_SHIFT))) 92 break; 93 94 pte = pte_offset_map(pmd, addr); 95 if (!pte) 96 break; 97 98 pr_cont(", *pte=%08llx", (long long)pte_val(*pte)); 99 #ifndef CONFIG_ARM_LPAE 100 pr_cont(", *ppte=%08llx", 101 (long long)pte_val(pte[PTE_HWTABLE_PTRS])); 102 #endif 103 pte_unmap(pte); 104 } while(0); 105 106 pr_cont("\n"); 107 } 108 #else /* CONFIG_MMU */ 109 void show_pte(const char *lvl, struct mm_struct *mm, unsigned long addr) 110 { } 111 #endif /* CONFIG_MMU */ 112 113 static inline bool is_write_fault(unsigned int fsr) 114 { 115 return (fsr & FSR_WRITE) && !(fsr & FSR_CM); 116 } 117 118 static inline bool is_translation_fault(unsigned int fsr) 119 { 120 int fs = fsr_fs(fsr); 121 #ifdef CONFIG_ARM_LPAE 122 if ((fs & FS_MMU_NOLL_MASK) == FS_TRANS_NOLL) 123 return true; 124 #else 125 if (fs == FS_L1_TRANS || fs == FS_L2_TRANS) 126 return true; 127 #endif 128 return false; 129 } 130 131 static void die_kernel_fault(const char *msg, struct mm_struct *mm, 132 unsigned long addr, unsigned int fsr, 133 struct pt_regs *regs) 134 { 135 bust_spinlocks(1); 136 pr_alert("8<--- cut here ---\n"); 137 pr_alert("Unable to handle kernel %s at virtual address %08lx when %s\n", 138 msg, addr, fsr & FSR_LNX_PF ? "execute" : str_write_read(fsr & FSR_WRITE)); 139 140 show_pte(KERN_ALERT, mm, addr); 141 die("Oops", regs, fsr); 142 bust_spinlocks(0); 143 make_task_dead(SIGKILL); 144 } 145 146 /* 147 * Oops. The kernel tried to access some page that wasn't present. 148 */ 149 static void 150 __do_kernel_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr, 151 struct pt_regs *regs) 152 { 153 const char *msg; 154 /* 155 * Are we prepared to handle this kernel fault? 156 */ 157 if (fixup_exception(regs)) 158 return; 159 160 /* 161 * No handler, we'll have to terminate things with extreme prejudice. 162 */ 163 if (addr < PAGE_SIZE) { 164 msg = "NULL pointer dereference"; 165 } else { 166 if (is_translation_fault(fsr) && 167 kfence_handle_page_fault(addr, is_write_fault(fsr), regs)) 168 return; 169 170 msg = "paging request"; 171 } 172 173 die_kernel_fault(msg, mm, addr, fsr, regs); 174 } 175 176 /* 177 * Something tried to access memory that isn't in our memory map.. 178 * User mode accesses just cause a SIGSEGV 179 */ 180 static void 181 __do_user_fault(unsigned long addr, unsigned int fsr, unsigned int sig, 182 int code, struct pt_regs *regs) 183 { 184 struct task_struct *tsk = current; 185 186 if (addr > TASK_SIZE) 187 harden_branch_predictor(); 188 189 #ifdef CONFIG_DEBUG_USER 190 if (((user_debug & UDBG_SEGV) && (sig == SIGSEGV)) || 191 ((user_debug & UDBG_BUS) && (sig == SIGBUS))) { 192 pr_err("8<--- cut here ---\n"); 193 pr_err("%s: unhandled page fault (%d) at 0x%08lx, code 0x%03x\n", 194 tsk->comm, sig, addr, fsr); 195 show_pte(KERN_ERR, tsk->mm, addr); 196 show_regs(regs); 197 } 198 #endif 199 #ifndef CONFIG_KUSER_HELPERS 200 if ((sig == SIGSEGV) && ((addr & PAGE_MASK) == 0xffff0000)) 201 printk_ratelimited(KERN_DEBUG 202 "%s: CONFIG_KUSER_HELPERS disabled at 0x%08lx\n", 203 tsk->comm, addr); 204 #endif 205 206 tsk->thread.address = addr; 207 tsk->thread.error_code = fsr; 208 tsk->thread.trap_no = 14; 209 force_sig_fault(sig, code, (void __user *)addr); 210 } 211 212 void do_bad_area(unsigned long addr, unsigned int fsr, struct pt_regs *regs) 213 { 214 struct task_struct *tsk = current; 215 struct mm_struct *mm = tsk->active_mm; 216 217 /* 218 * If we are in kernel mode at this point, we 219 * have no context to handle this fault with. 220 */ 221 if (user_mode(regs)) 222 __do_user_fault(addr, fsr, SIGSEGV, SEGV_MAPERR, regs); 223 else 224 __do_kernel_fault(mm, addr, fsr, regs); 225 } 226 227 #ifdef CONFIG_MMU 228 static inline bool is_permission_fault(unsigned int fsr) 229 { 230 int fs = fsr_fs(fsr); 231 #ifdef CONFIG_ARM_LPAE 232 if ((fs & FS_MMU_NOLL_MASK) == FS_PERM_NOLL) 233 return true; 234 #else 235 if (fs == FS_L1_PERM || fs == FS_L2_PERM) 236 return true; 237 #endif 238 return false; 239 } 240 241 #ifdef CONFIG_CPU_TTBR0_PAN 242 static inline bool ttbr0_usermode_access_allowed(struct pt_regs *regs) 243 { 244 struct svc_pt_regs *svcregs; 245 246 /* If we are in user mode: permission granted */ 247 if (user_mode(regs)) 248 return true; 249 250 /* uaccess state saved above pt_regs on SVC exception entry */ 251 svcregs = to_svc_pt_regs(regs); 252 253 return !(svcregs->ttbcr & TTBCR_EPD0); 254 } 255 #else 256 static inline bool ttbr0_usermode_access_allowed(struct pt_regs *regs) 257 { 258 return true; 259 } 260 #endif 261 262 static int __kprobes 263 do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) 264 { 265 struct mm_struct *mm = current->mm; 266 struct vm_area_struct *vma; 267 int sig, code; 268 vm_fault_t fault; 269 unsigned int flags = FAULT_FLAG_DEFAULT; 270 vm_flags_t vm_flags = VM_ACCESS_FLAGS; 271 272 if (kprobe_page_fault(regs, fsr)) 273 return 0; 274 275 276 /* Enable interrupts if they were enabled in the parent context. */ 277 if (interrupts_enabled(regs)) 278 local_irq_enable(); 279 280 /* 281 * If we're in an interrupt or have no user 282 * context, we must not take the fault.. 283 */ 284 if (faulthandler_disabled() || !mm) 285 goto no_context; 286 287 if (user_mode(regs)) 288 flags |= FAULT_FLAG_USER; 289 290 if (is_write_fault(fsr)) { 291 flags |= FAULT_FLAG_WRITE; 292 vm_flags = VM_WRITE; 293 } 294 295 if (fsr & FSR_LNX_PF) { 296 vm_flags = VM_EXEC; 297 298 if (is_permission_fault(fsr) && !user_mode(regs)) 299 die_kernel_fault("execution of memory", 300 mm, addr, fsr, regs); 301 } 302 303 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); 304 305 /* 306 * Privileged access aborts with CONFIG_CPU_TTBR0_PAN enabled are 307 * routed via the translation fault mechanism. Check whether uaccess 308 * is disabled while in kernel mode. 309 */ 310 if (!ttbr0_usermode_access_allowed(regs)) 311 goto no_context; 312 313 if (!(flags & FAULT_FLAG_USER)) 314 goto lock_mmap; 315 316 vma = lock_vma_under_rcu(mm, addr); 317 if (!vma) 318 goto lock_mmap; 319 320 if (!(vma->vm_flags & vm_flags)) { 321 vma_end_read(vma); 322 count_vm_vma_lock_event(VMA_LOCK_SUCCESS); 323 fault = 0; 324 code = SEGV_ACCERR; 325 goto bad_area; 326 } 327 fault = handle_mm_fault(vma, addr, flags | FAULT_FLAG_VMA_LOCK, regs); 328 if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED))) 329 vma_end_read(vma); 330 331 if (!(fault & VM_FAULT_RETRY)) { 332 count_vm_vma_lock_event(VMA_LOCK_SUCCESS); 333 goto done; 334 } 335 count_vm_vma_lock_event(VMA_LOCK_RETRY); 336 if (fault & VM_FAULT_MAJOR) 337 flags |= FAULT_FLAG_TRIED; 338 339 /* Quick path to respond to signals */ 340 if (fault_signal_pending(fault, regs)) { 341 if (!user_mode(regs)) 342 goto no_context; 343 return 0; 344 } 345 lock_mmap: 346 347 retry: 348 vma = lock_mm_and_find_vma(mm, addr, regs); 349 if (unlikely(!vma)) { 350 fault = 0; 351 code = SEGV_MAPERR; 352 goto bad_area; 353 } 354 355 /* 356 * ok, we have a good vm_area for this memory access, check the 357 * permissions on the VMA allow for the fault which occurred. 358 */ 359 if (!(vma->vm_flags & vm_flags)) { 360 mmap_read_unlock(mm); 361 fault = 0; 362 code = SEGV_ACCERR; 363 goto bad_area; 364 } 365 366 fault = handle_mm_fault(vma, addr & PAGE_MASK, flags, regs); 367 368 /* If we need to retry but a fatal signal is pending, handle the 369 * signal first. We do not need to release the mmap_lock because 370 * it would already be released in __lock_page_or_retry in 371 * mm/filemap.c. */ 372 if (fault_signal_pending(fault, regs)) { 373 if (!user_mode(regs)) 374 goto no_context; 375 return 0; 376 } 377 378 /* The fault is fully completed (including releasing mmap lock) */ 379 if (fault & VM_FAULT_COMPLETED) 380 return 0; 381 382 if (!(fault & VM_FAULT_ERROR)) { 383 if (fault & VM_FAULT_RETRY) { 384 flags |= FAULT_FLAG_TRIED; 385 goto retry; 386 } 387 } 388 389 mmap_read_unlock(mm); 390 done: 391 392 /* Handle the "normal" case first */ 393 if (likely(!(fault & VM_FAULT_ERROR))) 394 return 0; 395 396 code = SEGV_MAPERR; 397 bad_area: 398 /* 399 * If we are in kernel mode at this point, we 400 * have no context to handle this fault with. 401 */ 402 if (!user_mode(regs)) 403 goto no_context; 404 405 if (fault & VM_FAULT_OOM) { 406 /* 407 * We ran out of memory, call the OOM killer, and return to 408 * userspace (which will retry the fault, or kill us if we 409 * got oom-killed) 410 */ 411 pagefault_out_of_memory(); 412 return 0; 413 } 414 415 if (fault & VM_FAULT_SIGBUS) { 416 /* 417 * We had some memory, but were unable to 418 * successfully fix up this page fault. 419 */ 420 sig = SIGBUS; 421 code = BUS_ADRERR; 422 } else { 423 /* 424 * Something tried to access memory that 425 * isn't in our memory map.. 426 */ 427 sig = SIGSEGV; 428 } 429 430 __do_user_fault(addr, fsr, sig, code, regs); 431 return 0; 432 433 no_context: 434 __do_kernel_fault(mm, addr, fsr, regs); 435 return 0; 436 } 437 #else /* CONFIG_MMU */ 438 static int 439 do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) 440 { 441 return 0; 442 } 443 #endif /* CONFIG_MMU */ 444 445 /* 446 * First Level Translation Fault Handler 447 * 448 * We enter here because the first level page table doesn't contain 449 * a valid entry for the address. 450 * 451 * If the address is in kernel space (>= TASK_SIZE), then we are 452 * probably faulting in the vmalloc() area. 453 * 454 * If the init_task's first level page tables contains the relevant 455 * entry, we copy the it to this task. If not, we send the process 456 * a signal, fixup the exception, or oops the kernel. 457 * 458 * NOTE! We MUST NOT take any locks for this case. We may be in an 459 * interrupt or a critical region, and should only copy the information 460 * from the master page table, nothing more. 461 */ 462 #ifdef CONFIG_MMU 463 static int __kprobes 464 do_translation_fault(unsigned long addr, unsigned int fsr, 465 struct pt_regs *regs) 466 { 467 unsigned int index; 468 pgd_t *pgd, *pgd_k; 469 p4d_t *p4d, *p4d_k; 470 pud_t *pud, *pud_k; 471 pmd_t *pmd, *pmd_k; 472 473 if (addr < TASK_SIZE) 474 return do_page_fault(addr, fsr, regs); 475 476 if (user_mode(regs)) 477 goto bad_area; 478 479 index = pgd_index(addr); 480 481 pgd = cpu_get_pgd() + index; 482 pgd_k = init_mm.pgd + index; 483 484 p4d = p4d_offset(pgd, addr); 485 p4d_k = p4d_offset(pgd_k, addr); 486 487 if (p4d_none(*p4d_k)) 488 goto bad_area; 489 if (!p4d_present(*p4d)) 490 set_p4d(p4d, *p4d_k); 491 492 pud = pud_offset(p4d, addr); 493 pud_k = pud_offset(p4d_k, addr); 494 495 if (pud_none(*pud_k)) 496 goto bad_area; 497 if (!pud_present(*pud)) 498 set_pud(pud, *pud_k); 499 500 pmd = pmd_offset(pud, addr); 501 pmd_k = pmd_offset(pud_k, addr); 502 503 #ifdef CONFIG_ARM_LPAE 504 /* 505 * Only one hardware entry per PMD with LPAE. 506 */ 507 index = 0; 508 #else 509 /* 510 * On ARM one Linux PGD entry contains two hardware entries (see page 511 * tables layout in pgtable.h). We normally guarantee that we always 512 * fill both L1 entries. But create_mapping() doesn't follow the rule. 513 * It can create inidividual L1 entries, so here we have to call 514 * pmd_none() check for the entry really corresponded to address, not 515 * for the first of pair. 516 */ 517 index = (addr >> SECTION_SHIFT) & 1; 518 #endif 519 if (pmd_none(pmd_k[index])) 520 goto bad_area; 521 522 copy_pmd(pmd, pmd_k); 523 return 0; 524 525 bad_area: 526 do_bad_area(addr, fsr, regs); 527 return 0; 528 } 529 #else /* CONFIG_MMU */ 530 static int 531 do_translation_fault(unsigned long addr, unsigned int fsr, 532 struct pt_regs *regs) 533 { 534 return 0; 535 } 536 #endif /* CONFIG_MMU */ 537 538 /* 539 * Some section permission faults need to be handled gracefully. 540 * They can happen due to a __{get,put}_user during an oops. 541 */ 542 #ifndef CONFIG_ARM_LPAE 543 static int 544 do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) 545 { 546 do_bad_area(addr, fsr, regs); 547 return 0; 548 } 549 #endif /* CONFIG_ARM_LPAE */ 550 551 /* 552 * This abort handler always returns "fault". 553 */ 554 static int 555 do_bad(unsigned long addr, unsigned int fsr, struct pt_regs *regs) 556 { 557 return 1; 558 } 559 560 struct fsr_info { 561 int (*fn)(unsigned long addr, unsigned int fsr, struct pt_regs *regs); 562 int sig; 563 int code; 564 const char *name; 565 }; 566 567 /* FSR definition */ 568 #ifdef CONFIG_ARM_LPAE 569 #include "fsr-3level.c" 570 #else 571 #include "fsr-2level.c" 572 #endif 573 574 void __init 575 hook_fault_code(int nr, int (*fn)(unsigned long, unsigned int, struct pt_regs *), 576 int sig, int code, const char *name) 577 { 578 if (nr < 0 || nr >= ARRAY_SIZE(fsr_info)) 579 BUG(); 580 581 fsr_info[nr].fn = fn; 582 fsr_info[nr].sig = sig; 583 fsr_info[nr].code = code; 584 fsr_info[nr].name = name; 585 } 586 587 /* 588 * Dispatch a data abort to the relevant handler. 589 */ 590 asmlinkage void 591 do_DataAbort(unsigned long addr, unsigned int fsr, struct pt_regs *regs) 592 { 593 const struct fsr_info *inf = fsr_info + fsr_fs(fsr); 594 595 if (!inf->fn(addr, fsr & ~FSR_LNX_PF, regs)) 596 return; 597 598 pr_alert("8<--- cut here ---\n"); 599 pr_alert("Unhandled fault: %s (0x%03x) at 0x%08lx\n", 600 inf->name, fsr, addr); 601 show_pte(KERN_ALERT, current->mm, addr); 602 603 arm_notify_die("", regs, inf->sig, inf->code, (void __user *)addr, 604 fsr, 0); 605 } 606 607 void __init 608 hook_ifault_code(int nr, int (*fn)(unsigned long, unsigned int, struct pt_regs *), 609 int sig, int code, const char *name) 610 { 611 if (nr < 0 || nr >= ARRAY_SIZE(ifsr_info)) 612 BUG(); 613 614 ifsr_info[nr].fn = fn; 615 ifsr_info[nr].sig = sig; 616 ifsr_info[nr].code = code; 617 ifsr_info[nr].name = name; 618 } 619 620 asmlinkage void 621 do_PrefetchAbort(unsigned long addr, unsigned int ifsr, struct pt_regs *regs) 622 { 623 const struct fsr_info *inf = ifsr_info + fsr_fs(ifsr); 624 625 if (!inf->fn(addr, ifsr | FSR_LNX_PF, regs)) 626 return; 627 628 pr_alert("8<--- cut here ---\n"); 629 pr_alert("Unhandled prefetch abort: %s (0x%03x) at 0x%08lx\n", 630 inf->name, ifsr, addr); 631 632 arm_notify_die("", regs, inf->sig, inf->code, (void __user *)addr, 633 ifsr, 0); 634 } 635 636 /* 637 * Abort handler to be used only during first unmasking of asynchronous aborts 638 * on the boot CPU. This makes sure that the machine will not die if the 639 * firmware/bootloader left an imprecise abort pending for us to trip over. 640 */ 641 static int __init early_abort_handler(unsigned long addr, unsigned int fsr, 642 struct pt_regs *regs) 643 { 644 pr_warn("Hit pending asynchronous external abort (FSR=0x%08x) during " 645 "first unmask, this is most likely caused by a " 646 "firmware/bootloader bug.\n", fsr); 647 648 return 0; 649 } 650 651 void __init early_abt_enable(void) 652 { 653 fsr_info[FSR_FS_AEA].fn = early_abort_handler; 654 local_abt_enable(); 655 fsr_info[FSR_FS_AEA].fn = do_bad; 656 } 657 658 #ifndef CONFIG_ARM_LPAE 659 static int __init exceptions_init(void) 660 { 661 if (cpu_architecture() >= CPU_ARCH_ARMv6) { 662 hook_fault_code(4, do_translation_fault, SIGSEGV, SEGV_MAPERR, 663 "I-cache maintenance fault"); 664 } 665 666 if (cpu_architecture() >= CPU_ARCH_ARMv7) { 667 /* 668 * TODO: Access flag faults introduced in ARMv6K. 669 * Runtime check for 'K' extension is needed 670 */ 671 hook_fault_code(3, do_bad, SIGSEGV, SEGV_MAPERR, 672 "section access flag fault"); 673 hook_fault_code(6, do_bad, SIGSEGV, SEGV_MAPERR, 674 "section access flag fault"); 675 } 676 677 return 0; 678 } 679 680 arch_initcall(exceptions_init); 681 #endif 682