1 /* 2 * Based on arch/arm/mm/fault.c 3 * 4 * Copyright (C) 1995 Linus Torvalds 5 * Copyright (C) 1995-2004 Russell King 6 * Copyright (C) 2012 ARM Ltd. 7 * 8 * This program is free software; you can redistribute it and/or modify 9 * it under the terms of the GNU General Public License version 2 as 10 * published by the Free Software Foundation. 11 * 12 * This program is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with this program. If not, see <http://www.gnu.org/licenses/>. 19 */ 20 21 #include <linux/extable.h> 22 #include <linux/signal.h> 23 #include <linux/mm.h> 24 #include <linux/hardirq.h> 25 #include <linux/init.h> 26 #include <linux/kprobes.h> 27 #include <linux/uaccess.h> 28 #include <linux/page-flags.h> 29 #include <linux/sched/signal.h> 30 #include <linux/sched/debug.h> 31 #include <linux/highmem.h> 32 #include <linux/perf_event.h> 33 #include <linux/preempt.h> 34 #include <linux/hugetlb.h> 35 36 #include <asm/bug.h> 37 #include <asm/cmpxchg.h> 38 #include <asm/cpufeature.h> 39 #include <asm/exception.h> 40 #include <asm/debug-monitors.h> 41 #include <asm/esr.h> 42 #include <asm/sysreg.h> 43 #include <asm/system_misc.h> 44 #include <asm/pgtable.h> 45 #include <asm/tlbflush.h> 46 #include <asm/traps.h> 47 48 #include <acpi/ghes.h> 49 50 struct fault_info { 51 int (*fn)(unsigned long addr, unsigned int esr, 52 struct pt_regs *regs); 53 int sig; 54 int code; 55 const char *name; 56 }; 57 58 static const struct fault_info fault_info[]; 59 60 static inline const struct fault_info *esr_to_fault_info(unsigned int esr) 61 { 62 return fault_info + (esr & 63); 63 } 64 65 #ifdef CONFIG_KPROBES 66 static inline int notify_page_fault(struct pt_regs *regs, unsigned int esr) 67 { 68 int ret = 0; 69 70 /* kprobe_running() needs smp_processor_id() */ 71 if (!user_mode(regs)) { 72 preempt_disable(); 73 if (kprobe_running() && kprobe_fault_handler(regs, esr)) 74 ret = 1; 75 preempt_enable(); 76 } 77 78 return ret; 79 } 80 #else 81 static inline int notify_page_fault(struct pt_regs *regs, unsigned int esr) 82 { 83 return 0; 84 } 85 #endif 86 87 static void data_abort_decode(unsigned int esr) 88 { 89 pr_alert("Data abort info:\n"); 90 91 if (esr & ESR_ELx_ISV) { 92 pr_alert(" Access size = %u byte(s)\n", 93 1U << ((esr & ESR_ELx_SAS) >> ESR_ELx_SAS_SHIFT)); 94 pr_alert(" SSE = %lu, SRT = %lu\n", 95 (esr & ESR_ELx_SSE) >> ESR_ELx_SSE_SHIFT, 96 (esr & ESR_ELx_SRT_MASK) >> ESR_ELx_SRT_SHIFT); 97 pr_alert(" SF = %lu, AR = %lu\n", 98 (esr & ESR_ELx_SF) >> ESR_ELx_SF_SHIFT, 99 (esr & ESR_ELx_AR) >> ESR_ELx_AR_SHIFT); 100 } else { 101 pr_alert(" ISV = 0, ISS = 0x%08lx\n", esr & ESR_ELx_ISS_MASK); 102 } 103 104 pr_alert(" CM = %lu, WnR = %lu\n", 105 (esr & ESR_ELx_CM) >> ESR_ELx_CM_SHIFT, 106 (esr & ESR_ELx_WNR) >> ESR_ELx_WNR_SHIFT); 107 } 108 109 static void mem_abort_decode(unsigned int esr) 110 { 111 pr_alert("Mem abort info:\n"); 112 113 pr_alert(" ESR = 0x%08x\n", esr); 114 pr_alert(" Exception class = %s, IL = %u bits\n", 115 esr_get_class_string(esr), 116 (esr & ESR_ELx_IL) ? 32 : 16); 117 pr_alert(" SET = %lu, FnV = %lu\n", 118 (esr & ESR_ELx_SET_MASK) >> ESR_ELx_SET_SHIFT, 119 (esr & ESR_ELx_FnV) >> ESR_ELx_FnV_SHIFT); 120 pr_alert(" EA = %lu, S1PTW = %lu\n", 121 (esr & ESR_ELx_EA) >> ESR_ELx_EA_SHIFT, 122 (esr & ESR_ELx_S1PTW) >> ESR_ELx_S1PTW_SHIFT); 123 124 if (esr_is_data_abort(esr)) 125 data_abort_decode(esr); 126 } 127 128 /* 129 * Dump out the page tables associated with 'addr' in the currently active mm. 130 */ 131 void show_pte(unsigned long addr) 132 { 133 struct mm_struct *mm; 134 pgd_t *pgdp; 135 pgd_t pgd; 136 137 if (addr < TASK_SIZE) { 138 /* TTBR0 */ 139 mm = current->active_mm; 140 if (mm == &init_mm) { 141 pr_alert("[%016lx] user address but active_mm is swapper\n", 142 addr); 143 return; 144 } 145 } else if (addr >= VA_START) { 146 /* TTBR1 */ 147 mm = &init_mm; 148 } else { 149 pr_alert("[%016lx] address between user and kernel address ranges\n", 150 addr); 151 return; 152 } 153 154 pr_alert("%s pgtable: %luk pages, %u-bit VAs, pgdp = %p\n", 155 mm == &init_mm ? "swapper" : "user", PAGE_SIZE / SZ_1K, 156 VA_BITS, mm->pgd); 157 pgdp = pgd_offset(mm, addr); 158 pgd = READ_ONCE(*pgdp); 159 pr_alert("[%016lx] pgd=%016llx", addr, pgd_val(pgd)); 160 161 do { 162 pud_t *pudp, pud; 163 pmd_t *pmdp, pmd; 164 pte_t *ptep, pte; 165 166 if (pgd_none(pgd) || pgd_bad(pgd)) 167 break; 168 169 pudp = pud_offset(pgdp, addr); 170 pud = READ_ONCE(*pudp); 171 pr_cont(", pud=%016llx", pud_val(pud)); 172 if (pud_none(pud) || pud_bad(pud)) 173 break; 174 175 pmdp = pmd_offset(pudp, addr); 176 pmd = READ_ONCE(*pmdp); 177 pr_cont(", pmd=%016llx", pmd_val(pmd)); 178 if (pmd_none(pmd) || pmd_bad(pmd)) 179 break; 180 181 ptep = pte_offset_map(pmdp, addr); 182 pte = READ_ONCE(*ptep); 183 pr_cont(", pte=%016llx", pte_val(pte)); 184 pte_unmap(ptep); 185 } while(0); 186 187 pr_cont("\n"); 188 } 189 190 /* 191 * This function sets the access flags (dirty, accessed), as well as write 192 * permission, and only to a more permissive setting. 193 * 194 * It needs to cope with hardware update of the accessed/dirty state by other 195 * agents in the system and can safely skip the __sync_icache_dcache() call as, 196 * like set_pte_at(), the PTE is never changed from no-exec to exec here. 197 * 198 * Returns whether or not the PTE actually changed. 199 */ 200 int ptep_set_access_flags(struct vm_area_struct *vma, 201 unsigned long address, pte_t *ptep, 202 pte_t entry, int dirty) 203 { 204 pteval_t old_pteval, pteval; 205 pte_t pte = READ_ONCE(*ptep); 206 207 if (pte_same(pte, entry)) 208 return 0; 209 210 /* only preserve the access flags and write permission */ 211 pte_val(entry) &= PTE_RDONLY | PTE_AF | PTE_WRITE | PTE_DIRTY; 212 213 /* 214 * Setting the flags must be done atomically to avoid racing with the 215 * hardware update of the access/dirty state. The PTE_RDONLY bit must 216 * be set to the most permissive (lowest value) of *ptep and entry 217 * (calculated as: a & b == ~(~a | ~b)). 218 */ 219 pte_val(entry) ^= PTE_RDONLY; 220 pteval = pte_val(pte); 221 do { 222 old_pteval = pteval; 223 pteval ^= PTE_RDONLY; 224 pteval |= pte_val(entry); 225 pteval ^= PTE_RDONLY; 226 pteval = cmpxchg_relaxed(&pte_val(*ptep), old_pteval, pteval); 227 } while (pteval != old_pteval); 228 229 flush_tlb_fix_spurious_fault(vma, address); 230 return 1; 231 } 232 233 static bool is_el1_instruction_abort(unsigned int esr) 234 { 235 return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_CUR; 236 } 237 238 static inline bool is_permission_fault(unsigned int esr, struct pt_regs *regs, 239 unsigned long addr) 240 { 241 unsigned int ec = ESR_ELx_EC(esr); 242 unsigned int fsc_type = esr & ESR_ELx_FSC_TYPE; 243 244 if (ec != ESR_ELx_EC_DABT_CUR && ec != ESR_ELx_EC_IABT_CUR) 245 return false; 246 247 if (fsc_type == ESR_ELx_FSC_PERM) 248 return true; 249 250 if (addr < TASK_SIZE && system_uses_ttbr0_pan()) 251 return fsc_type == ESR_ELx_FSC_FAULT && 252 (regs->pstate & PSR_PAN_BIT); 253 254 return false; 255 } 256 257 static void __do_kernel_fault(unsigned long addr, unsigned int esr, 258 struct pt_regs *regs) 259 { 260 const char *msg; 261 262 /* 263 * Are we prepared to handle this kernel fault? 264 * We are almost certainly not prepared to handle instruction faults. 265 */ 266 if (!is_el1_instruction_abort(esr) && fixup_exception(regs)) 267 return; 268 269 bust_spinlocks(1); 270 271 if (is_permission_fault(esr, regs, addr)) { 272 if (esr & ESR_ELx_WNR) 273 msg = "write to read-only memory"; 274 else 275 msg = "read from unreadable memory"; 276 } else if (addr < PAGE_SIZE) { 277 msg = "NULL pointer dereference"; 278 } else { 279 msg = "paging request"; 280 } 281 282 pr_alert("Unable to handle kernel %s at virtual address %08lx\n", msg, 283 addr); 284 285 mem_abort_decode(esr); 286 287 show_pte(addr); 288 die("Oops", regs, esr); 289 bust_spinlocks(0); 290 do_exit(SIGKILL); 291 } 292 293 static void __do_user_fault(struct siginfo *info, unsigned int esr) 294 { 295 current->thread.fault_address = (unsigned long)info->si_addr; 296 297 /* 298 * If the faulting address is in the kernel, we must sanitize the ESR. 299 * From userspace's point of view, kernel-only mappings don't exist 300 * at all, so we report them as level 0 translation faults. 301 * (This is not quite the way that "no mapping there at all" behaves: 302 * an alignment fault not caused by the memory type would take 303 * precedence over translation fault for a real access to empty 304 * space. Unfortunately we can't easily distinguish "alignment fault 305 * not caused by memory type" from "alignment fault caused by memory 306 * type", so we ignore this wrinkle and just return the translation 307 * fault.) 308 */ 309 if (current->thread.fault_address >= TASK_SIZE) { 310 switch (ESR_ELx_EC(esr)) { 311 case ESR_ELx_EC_DABT_LOW: 312 /* 313 * These bits provide only information about the 314 * faulting instruction, which userspace knows already. 315 * We explicitly clear bits which are architecturally 316 * RES0 in case they are given meanings in future. 317 * We always report the ESR as if the fault was taken 318 * to EL1 and so ISV and the bits in ISS[23:14] are 319 * clear. (In fact it always will be a fault to EL1.) 320 */ 321 esr &= ESR_ELx_EC_MASK | ESR_ELx_IL | 322 ESR_ELx_CM | ESR_ELx_WNR; 323 esr |= ESR_ELx_FSC_FAULT; 324 break; 325 case ESR_ELx_EC_IABT_LOW: 326 /* 327 * Claim a level 0 translation fault. 328 * All other bits are architecturally RES0 for faults 329 * reported with that DFSC value, so we clear them. 330 */ 331 esr &= ESR_ELx_EC_MASK | ESR_ELx_IL; 332 esr |= ESR_ELx_FSC_FAULT; 333 break; 334 default: 335 /* 336 * This should never happen (entry.S only brings us 337 * into this code for insn and data aborts from a lower 338 * exception level). Fail safe by not providing an ESR 339 * context record at all. 340 */ 341 WARN(1, "ESR 0x%x is not DABT or IABT from EL0\n", esr); 342 esr = 0; 343 break; 344 } 345 } 346 347 current->thread.fault_code = esr; 348 arm64_force_sig_info(info, esr_to_fault_info(esr)->name, current); 349 } 350 351 static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *regs) 352 { 353 /* 354 * If we are in kernel mode at this point, we have no context to 355 * handle this fault with. 356 */ 357 if (user_mode(regs)) { 358 const struct fault_info *inf = esr_to_fault_info(esr); 359 struct siginfo si; 360 361 clear_siginfo(&si); 362 si.si_signo = inf->sig; 363 si.si_code = inf->code; 364 si.si_addr = (void __user *)addr; 365 366 __do_user_fault(&si, esr); 367 } else { 368 __do_kernel_fault(addr, esr, regs); 369 } 370 } 371 372 #define VM_FAULT_BADMAP 0x010000 373 #define VM_FAULT_BADACCESS 0x020000 374 375 static int __do_page_fault(struct mm_struct *mm, unsigned long addr, 376 unsigned int mm_flags, unsigned long vm_flags, 377 struct task_struct *tsk) 378 { 379 struct vm_area_struct *vma; 380 int fault; 381 382 vma = find_vma(mm, addr); 383 fault = VM_FAULT_BADMAP; 384 if (unlikely(!vma)) 385 goto out; 386 if (unlikely(vma->vm_start > addr)) 387 goto check_stack; 388 389 /* 390 * Ok, we have a good vm_area for this memory access, so we can handle 391 * it. 392 */ 393 good_area: 394 /* 395 * Check that the permissions on the VMA allow for the fault which 396 * occurred. 397 */ 398 if (!(vma->vm_flags & vm_flags)) { 399 fault = VM_FAULT_BADACCESS; 400 goto out; 401 } 402 403 return handle_mm_fault(vma, addr & PAGE_MASK, mm_flags); 404 405 check_stack: 406 if (vma->vm_flags & VM_GROWSDOWN && !expand_stack(vma, addr)) 407 goto good_area; 408 out: 409 return fault; 410 } 411 412 static bool is_el0_instruction_abort(unsigned int esr) 413 { 414 return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_LOW; 415 } 416 417 static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, 418 struct pt_regs *regs) 419 { 420 struct task_struct *tsk; 421 struct mm_struct *mm; 422 struct siginfo si; 423 int fault, major = 0; 424 unsigned long vm_flags = VM_READ | VM_WRITE; 425 unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; 426 427 if (notify_page_fault(regs, esr)) 428 return 0; 429 430 tsk = current; 431 mm = tsk->mm; 432 433 /* 434 * If we're in an interrupt or have no user context, we must not take 435 * the fault. 436 */ 437 if (faulthandler_disabled() || !mm) 438 goto no_context; 439 440 if (user_mode(regs)) 441 mm_flags |= FAULT_FLAG_USER; 442 443 if (is_el0_instruction_abort(esr)) { 444 vm_flags = VM_EXEC; 445 } else if ((esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM)) { 446 vm_flags = VM_WRITE; 447 mm_flags |= FAULT_FLAG_WRITE; 448 } 449 450 if (addr < TASK_SIZE && is_permission_fault(esr, regs, addr)) { 451 /* regs->orig_addr_limit may be 0 if we entered from EL0 */ 452 if (regs->orig_addr_limit == KERNEL_DS) 453 die("Accessing user space memory with fs=KERNEL_DS", regs, esr); 454 455 if (is_el1_instruction_abort(esr)) 456 die("Attempting to execute userspace memory", regs, esr); 457 458 if (!search_exception_tables(regs->pc)) 459 die("Accessing user space memory outside uaccess.h routines", regs, esr); 460 } 461 462 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); 463 464 /* 465 * As per x86, we may deadlock here. However, since the kernel only 466 * validly references user space from well defined areas of the code, 467 * we can bug out early if this is from code which shouldn't. 468 */ 469 if (!down_read_trylock(&mm->mmap_sem)) { 470 if (!user_mode(regs) && !search_exception_tables(regs->pc)) 471 goto no_context; 472 retry: 473 down_read(&mm->mmap_sem); 474 } else { 475 /* 476 * The above down_read_trylock() might have succeeded in which 477 * case, we'll have missed the might_sleep() from down_read(). 478 */ 479 might_sleep(); 480 #ifdef CONFIG_DEBUG_VM 481 if (!user_mode(regs) && !search_exception_tables(regs->pc)) 482 goto no_context; 483 #endif 484 } 485 486 fault = __do_page_fault(mm, addr, mm_flags, vm_flags, tsk); 487 major |= fault & VM_FAULT_MAJOR; 488 489 if (fault & VM_FAULT_RETRY) { 490 /* 491 * If we need to retry but a fatal signal is pending, 492 * handle the signal first. We do not need to release 493 * the mmap_sem because it would already be released 494 * in __lock_page_or_retry in mm/filemap.c. 495 */ 496 if (fatal_signal_pending(current)) { 497 if (!user_mode(regs)) 498 goto no_context; 499 return 0; 500 } 501 502 /* 503 * Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk of 504 * starvation. 505 */ 506 if (mm_flags & FAULT_FLAG_ALLOW_RETRY) { 507 mm_flags &= ~FAULT_FLAG_ALLOW_RETRY; 508 mm_flags |= FAULT_FLAG_TRIED; 509 goto retry; 510 } 511 } 512 up_read(&mm->mmap_sem); 513 514 /* 515 * Handle the "normal" (no error) case first. 516 */ 517 if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP | 518 VM_FAULT_BADACCESS)))) { 519 /* 520 * Major/minor page fault accounting is only done 521 * once. If we go through a retry, it is extremely 522 * likely that the page will be found in page cache at 523 * that point. 524 */ 525 if (major) { 526 tsk->maj_flt++; 527 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, 528 addr); 529 } else { 530 tsk->min_flt++; 531 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, 532 addr); 533 } 534 535 return 0; 536 } 537 538 /* 539 * If we are in kernel mode at this point, we have no context to 540 * handle this fault with. 541 */ 542 if (!user_mode(regs)) 543 goto no_context; 544 545 if (fault & VM_FAULT_OOM) { 546 /* 547 * We ran out of memory, call the OOM killer, and return to 548 * userspace (which will retry the fault, or kill us if we got 549 * oom-killed). 550 */ 551 pagefault_out_of_memory(); 552 return 0; 553 } 554 555 clear_siginfo(&si); 556 si.si_addr = (void __user *)addr; 557 558 if (fault & VM_FAULT_SIGBUS) { 559 /* 560 * We had some memory, but were unable to successfully fix up 561 * this page fault. 562 */ 563 si.si_signo = SIGBUS; 564 si.si_code = BUS_ADRERR; 565 } else if (fault & VM_FAULT_HWPOISON_LARGE) { 566 unsigned int hindex = VM_FAULT_GET_HINDEX(fault); 567 568 si.si_signo = SIGBUS; 569 si.si_code = BUS_MCEERR_AR; 570 si.si_addr_lsb = hstate_index_to_shift(hindex); 571 } else if (fault & VM_FAULT_HWPOISON) { 572 si.si_signo = SIGBUS; 573 si.si_code = BUS_MCEERR_AR; 574 si.si_addr_lsb = PAGE_SHIFT; 575 } else { 576 /* 577 * Something tried to access memory that isn't in our memory 578 * map. 579 */ 580 si.si_signo = SIGSEGV; 581 si.si_code = fault == VM_FAULT_BADACCESS ? 582 SEGV_ACCERR : SEGV_MAPERR; 583 } 584 585 __do_user_fault(&si, esr); 586 return 0; 587 588 no_context: 589 __do_kernel_fault(addr, esr, regs); 590 return 0; 591 } 592 593 static int __kprobes do_translation_fault(unsigned long addr, 594 unsigned int esr, 595 struct pt_regs *regs) 596 { 597 if (addr < TASK_SIZE) 598 return do_page_fault(addr, esr, regs); 599 600 do_bad_area(addr, esr, regs); 601 return 0; 602 } 603 604 static int do_alignment_fault(unsigned long addr, unsigned int esr, 605 struct pt_regs *regs) 606 { 607 do_bad_area(addr, esr, regs); 608 return 0; 609 } 610 611 static int do_bad(unsigned long addr, unsigned int esr, struct pt_regs *regs) 612 { 613 return 1; /* "fault" */ 614 } 615 616 static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs) 617 { 618 struct siginfo info; 619 const struct fault_info *inf; 620 621 inf = esr_to_fault_info(esr); 622 623 /* 624 * Synchronous aborts may interrupt code which had interrupts masked. 625 * Before calling out into the wider kernel tell the interested 626 * subsystems. 627 */ 628 if (IS_ENABLED(CONFIG_ACPI_APEI_SEA)) { 629 if (interrupts_enabled(regs)) 630 nmi_enter(); 631 632 ghes_notify_sea(); 633 634 if (interrupts_enabled(regs)) 635 nmi_exit(); 636 } 637 638 clear_siginfo(&info); 639 info.si_signo = inf->sig; 640 info.si_errno = 0; 641 info.si_code = inf->code; 642 if (esr & ESR_ELx_FnV) 643 info.si_addr = NULL; 644 else 645 info.si_addr = (void __user *)addr; 646 arm64_notify_die(inf->name, regs, &info, esr); 647 648 return 0; 649 } 650 651 static const struct fault_info fault_info[] = { 652 { do_bad, SIGKILL, SI_KERNEL, "ttbr address size fault" }, 653 { do_bad, SIGKILL, SI_KERNEL, "level 1 address size fault" }, 654 { do_bad, SIGKILL, SI_KERNEL, "level 2 address size fault" }, 655 { do_bad, SIGKILL, SI_KERNEL, "level 3 address size fault" }, 656 { do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 0 translation fault" }, 657 { do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 1 translation fault" }, 658 { do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 2 translation fault" }, 659 { do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 3 translation fault" }, 660 { do_bad, SIGKILL, SI_KERNEL, "unknown 8" }, 661 { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 1 access flag fault" }, 662 { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 access flag fault" }, 663 { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 access flag fault" }, 664 { do_bad, SIGKILL, SI_KERNEL, "unknown 12" }, 665 { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 1 permission fault" }, 666 { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 permission fault" }, 667 { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 permission fault" }, 668 { do_sea, SIGBUS, BUS_OBJERR, "synchronous external abort" }, 669 { do_bad, SIGKILL, SI_KERNEL, "unknown 17" }, 670 { do_bad, SIGKILL, SI_KERNEL, "unknown 18" }, 671 { do_bad, SIGKILL, SI_KERNEL, "unknown 19" }, 672 { do_sea, SIGKILL, SI_KERNEL, "level 0 (translation table walk)" }, 673 { do_sea, SIGKILL, SI_KERNEL, "level 1 (translation table walk)" }, 674 { do_sea, SIGKILL, SI_KERNEL, "level 2 (translation table walk)" }, 675 { do_sea, SIGKILL, SI_KERNEL, "level 3 (translation table walk)" }, 676 { do_sea, SIGBUS, BUS_OBJERR, "synchronous parity or ECC error" }, // Reserved when RAS is implemented 677 { do_bad, SIGKILL, SI_KERNEL, "unknown 25" }, 678 { do_bad, SIGKILL, SI_KERNEL, "unknown 26" }, 679 { do_bad, SIGKILL, SI_KERNEL, "unknown 27" }, 680 { do_sea, SIGKILL, SI_KERNEL, "level 0 synchronous parity error (translation table walk)" }, // Reserved when RAS is implemented 681 { do_sea, SIGKILL, SI_KERNEL, "level 1 synchronous parity error (translation table walk)" }, // Reserved when RAS is implemented 682 { do_sea, SIGKILL, SI_KERNEL, "level 2 synchronous parity error (translation table walk)" }, // Reserved when RAS is implemented 683 { do_sea, SIGKILL, SI_KERNEL, "level 3 synchronous parity error (translation table walk)" }, // Reserved when RAS is implemented 684 { do_bad, SIGKILL, SI_KERNEL, "unknown 32" }, 685 { do_alignment_fault, SIGBUS, BUS_ADRALN, "alignment fault" }, 686 { do_bad, SIGKILL, SI_KERNEL, "unknown 34" }, 687 { do_bad, SIGKILL, SI_KERNEL, "unknown 35" }, 688 { do_bad, SIGKILL, SI_KERNEL, "unknown 36" }, 689 { do_bad, SIGKILL, SI_KERNEL, "unknown 37" }, 690 { do_bad, SIGKILL, SI_KERNEL, "unknown 38" }, 691 { do_bad, SIGKILL, SI_KERNEL, "unknown 39" }, 692 { do_bad, SIGKILL, SI_KERNEL, "unknown 40" }, 693 { do_bad, SIGKILL, SI_KERNEL, "unknown 41" }, 694 { do_bad, SIGKILL, SI_KERNEL, "unknown 42" }, 695 { do_bad, SIGKILL, SI_KERNEL, "unknown 43" }, 696 { do_bad, SIGKILL, SI_KERNEL, "unknown 44" }, 697 { do_bad, SIGKILL, SI_KERNEL, "unknown 45" }, 698 { do_bad, SIGKILL, SI_KERNEL, "unknown 46" }, 699 { do_bad, SIGKILL, SI_KERNEL, "unknown 47" }, 700 { do_bad, SIGKILL, SI_KERNEL, "TLB conflict abort" }, 701 { do_bad, SIGKILL, SI_KERNEL, "Unsupported atomic hardware update fault" }, 702 { do_bad, SIGKILL, SI_KERNEL, "unknown 50" }, 703 { do_bad, SIGKILL, SI_KERNEL, "unknown 51" }, 704 { do_bad, SIGKILL, SI_KERNEL, "implementation fault (lockdown abort)" }, 705 { do_bad, SIGBUS, BUS_OBJERR, "implementation fault (unsupported exclusive)" }, 706 { do_bad, SIGKILL, SI_KERNEL, "unknown 54" }, 707 { do_bad, SIGKILL, SI_KERNEL, "unknown 55" }, 708 { do_bad, SIGKILL, SI_KERNEL, "unknown 56" }, 709 { do_bad, SIGKILL, SI_KERNEL, "unknown 57" }, 710 { do_bad, SIGKILL, SI_KERNEL, "unknown 58" }, 711 { do_bad, SIGKILL, SI_KERNEL, "unknown 59" }, 712 { do_bad, SIGKILL, SI_KERNEL, "unknown 60" }, 713 { do_bad, SIGKILL, SI_KERNEL, "section domain fault" }, 714 { do_bad, SIGKILL, SI_KERNEL, "page domain fault" }, 715 { do_bad, SIGKILL, SI_KERNEL, "unknown 63" }, 716 }; 717 718 int handle_guest_sea(phys_addr_t addr, unsigned int esr) 719 { 720 int ret = -ENOENT; 721 722 if (IS_ENABLED(CONFIG_ACPI_APEI_SEA)) 723 ret = ghes_notify_sea(); 724 725 return ret; 726 } 727 728 asmlinkage void __exception do_mem_abort(unsigned long addr, unsigned int esr, 729 struct pt_regs *regs) 730 { 731 const struct fault_info *inf = esr_to_fault_info(esr); 732 struct siginfo info; 733 734 if (!inf->fn(addr, esr, regs)) 735 return; 736 737 if (!user_mode(regs)) { 738 pr_alert("Unhandled fault at 0x%016lx\n", addr); 739 mem_abort_decode(esr); 740 show_pte(addr); 741 } 742 743 clear_siginfo(&info); 744 info.si_signo = inf->sig; 745 info.si_errno = 0; 746 info.si_code = inf->code; 747 info.si_addr = (void __user *)addr; 748 arm64_notify_die(inf->name, regs, &info, esr); 749 } 750 751 asmlinkage void __exception do_el0_irq_bp_hardening(void) 752 { 753 /* PC has already been checked in entry.S */ 754 arm64_apply_bp_hardening(); 755 } 756 757 asmlinkage void __exception do_el0_ia_bp_hardening(unsigned long addr, 758 unsigned int esr, 759 struct pt_regs *regs) 760 { 761 /* 762 * We've taken an instruction abort from userspace and not yet 763 * re-enabled IRQs. If the address is a kernel address, apply 764 * BP hardening prior to enabling IRQs and pre-emption. 765 */ 766 if (addr > TASK_SIZE) 767 arm64_apply_bp_hardening(); 768 769 local_irq_enable(); 770 do_mem_abort(addr, esr, regs); 771 } 772 773 774 asmlinkage void __exception do_sp_pc_abort(unsigned long addr, 775 unsigned int esr, 776 struct pt_regs *regs) 777 { 778 struct siginfo info; 779 780 if (user_mode(regs)) { 781 if (instruction_pointer(regs) > TASK_SIZE) 782 arm64_apply_bp_hardening(); 783 local_irq_enable(); 784 } 785 786 clear_siginfo(&info); 787 info.si_signo = SIGBUS; 788 info.si_errno = 0; 789 info.si_code = BUS_ADRALN; 790 info.si_addr = (void __user *)addr; 791 arm64_notify_die("SP/PC alignment exception", regs, &info, esr); 792 } 793 794 int __init early_brk64(unsigned long addr, unsigned int esr, 795 struct pt_regs *regs); 796 797 /* 798 * __refdata because early_brk64 is __init, but the reference to it is 799 * clobbered at arch_initcall time. 800 * See traps.c and debug-monitors.c:debug_traps_init(). 801 */ 802 static struct fault_info __refdata debug_fault_info[] = { 803 { do_bad, SIGTRAP, TRAP_HWBKPT, "hardware breakpoint" }, 804 { do_bad, SIGTRAP, TRAP_HWBKPT, "hardware single-step" }, 805 { do_bad, SIGTRAP, TRAP_HWBKPT, "hardware watchpoint" }, 806 { do_bad, SIGKILL, SI_KERNEL, "unknown 3" }, 807 { do_bad, SIGTRAP, TRAP_BRKPT, "aarch32 BKPT" }, 808 { do_bad, SIGKILL, SI_KERNEL, "aarch32 vector catch" }, 809 { early_brk64, SIGTRAP, TRAP_BRKPT, "aarch64 BRK" }, 810 { do_bad, SIGKILL, SI_KERNEL, "unknown 7" }, 811 }; 812 813 void __init hook_debug_fault_code(int nr, 814 int (*fn)(unsigned long, unsigned int, struct pt_regs *), 815 int sig, int code, const char *name) 816 { 817 BUG_ON(nr < 0 || nr >= ARRAY_SIZE(debug_fault_info)); 818 819 debug_fault_info[nr].fn = fn; 820 debug_fault_info[nr].sig = sig; 821 debug_fault_info[nr].code = code; 822 debug_fault_info[nr].name = name; 823 } 824 825 asmlinkage int __exception do_debug_exception(unsigned long addr, 826 unsigned int esr, 827 struct pt_regs *regs) 828 { 829 const struct fault_info *inf = debug_fault_info + DBG_ESR_EVT(esr); 830 int rv; 831 832 /* 833 * Tell lockdep we disabled irqs in entry.S. Do nothing if they were 834 * already disabled to preserve the last enabled/disabled addresses. 835 */ 836 if (interrupts_enabled(regs)) 837 trace_hardirqs_off(); 838 839 if (user_mode(regs) && instruction_pointer(regs) > TASK_SIZE) 840 arm64_apply_bp_hardening(); 841 842 if (!inf->fn(addr, esr, regs)) { 843 rv = 1; 844 } else { 845 struct siginfo info; 846 847 clear_siginfo(&info); 848 info.si_signo = inf->sig; 849 info.si_errno = 0; 850 info.si_code = inf->code; 851 info.si_addr = (void __user *)addr; 852 arm64_notify_die(inf->name, regs, &info, esr); 853 rv = 0; 854 } 855 856 if (interrupts_enabled(regs)) 857 trace_hardirqs_on(); 858 859 return rv; 860 } 861 NOKPROBE_SYMBOL(do_debug_exception); 862 863 #ifdef CONFIG_ARM64_PAN 864 void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused) 865 { 866 /* 867 * We modify PSTATE. This won't work from irq context as the PSTATE 868 * is discarded once we return from the exception. 869 */ 870 WARN_ON_ONCE(in_interrupt()); 871 872 config_sctlr_el1(SCTLR_EL1_SPAN, 0); 873 asm(SET_PSTATE_PAN(1)); 874 } 875 #endif /* CONFIG_ARM64_PAN */ 876