1 /* 2 * Based on arch/arm/mm/fault.c 3 * 4 * Copyright (C) 1995 Linus Torvalds 5 * Copyright (C) 1995-2004 Russell King 6 * Copyright (C) 2012 ARM Ltd. 7 * 8 * This program is free software; you can redistribute it and/or modify 9 * it under the terms of the GNU General Public License version 2 as 10 * published by the Free Software Foundation. 11 * 12 * This program is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with this program. If not, see <http://www.gnu.org/licenses/>. 19 */ 20 21 #include <linux/extable.h> 22 #include <linux/signal.h> 23 #include <linux/mm.h> 24 #include <linux/hardirq.h> 25 #include <linux/init.h> 26 #include <linux/kprobes.h> 27 #include <linux/uaccess.h> 28 #include <linux/page-flags.h> 29 #include <linux/sched/signal.h> 30 #include <linux/sched/debug.h> 31 #include <linux/highmem.h> 32 #include <linux/perf_event.h> 33 #include <linux/preempt.h> 34 #include <linux/hugetlb.h> 35 36 #include <asm/bug.h> 37 #include <asm/cmpxchg.h> 38 #include <asm/cpufeature.h> 39 #include <asm/exception.h> 40 #include <asm/debug-monitors.h> 41 #include <asm/esr.h> 42 #include <asm/sysreg.h> 43 #include <asm/system_misc.h> 44 #include <asm/pgtable.h> 45 #include <asm/tlbflush.h> 46 #include <asm/traps.h> 47 48 #include <acpi/ghes.h> 49 50 struct fault_info { 51 int (*fn)(unsigned long addr, unsigned int esr, 52 struct pt_regs *regs); 53 int sig; 54 int code; 55 const char *name; 56 }; 57 58 static const struct fault_info fault_info[]; 59 60 static inline const struct fault_info *esr_to_fault_info(unsigned int esr) 61 { 62 return fault_info + (esr & 63); 63 } 64 65 #ifdef CONFIG_KPROBES 66 static inline int notify_page_fault(struct pt_regs *regs, unsigned int esr) 67 { 68 int ret = 0; 69 70 /* kprobe_running() needs smp_processor_id() */ 71 if (!user_mode(regs)) { 72 preempt_disable(); 73 if (kprobe_running() && kprobe_fault_handler(regs, esr)) 74 ret = 1; 75 preempt_enable(); 76 } 77 78 return ret; 79 } 80 #else 81 static inline int notify_page_fault(struct pt_regs *regs, unsigned int esr) 82 { 83 return 0; 84 } 85 #endif 86 87 static void data_abort_decode(unsigned int esr) 88 { 89 pr_alert("Data abort info:\n"); 90 91 if (esr & ESR_ELx_ISV) { 92 pr_alert(" Access size = %u byte(s)\n", 93 1U << ((esr & ESR_ELx_SAS) >> ESR_ELx_SAS_SHIFT)); 94 pr_alert(" SSE = %lu, SRT = %lu\n", 95 (esr & ESR_ELx_SSE) >> ESR_ELx_SSE_SHIFT, 96 (esr & ESR_ELx_SRT_MASK) >> ESR_ELx_SRT_SHIFT); 97 pr_alert(" SF = %lu, AR = %lu\n", 98 (esr & ESR_ELx_SF) >> ESR_ELx_SF_SHIFT, 99 (esr & ESR_ELx_AR) >> ESR_ELx_AR_SHIFT); 100 } else { 101 pr_alert(" ISV = 0, ISS = 0x%08lx\n", esr & ESR_ELx_ISS_MASK); 102 } 103 104 pr_alert(" CM = %lu, WnR = %lu\n", 105 (esr & ESR_ELx_CM) >> ESR_ELx_CM_SHIFT, 106 (esr & ESR_ELx_WNR) >> ESR_ELx_WNR_SHIFT); 107 } 108 109 static void mem_abort_decode(unsigned int esr) 110 { 111 pr_alert("Mem abort info:\n"); 112 113 pr_alert(" ESR = 0x%08x\n", esr); 114 pr_alert(" Exception class = %s, IL = %u bits\n", 115 esr_get_class_string(esr), 116 (esr & ESR_ELx_IL) ? 32 : 16); 117 pr_alert(" SET = %lu, FnV = %lu\n", 118 (esr & ESR_ELx_SET_MASK) >> ESR_ELx_SET_SHIFT, 119 (esr & ESR_ELx_FnV) >> ESR_ELx_FnV_SHIFT); 120 pr_alert(" EA = %lu, S1PTW = %lu\n", 121 (esr & ESR_ELx_EA) >> ESR_ELx_EA_SHIFT, 122 (esr & ESR_ELx_S1PTW) >> ESR_ELx_S1PTW_SHIFT); 123 124 if (esr_is_data_abort(esr)) 125 data_abort_decode(esr); 126 } 127 128 /* 129 * Dump out the page tables associated with 'addr' in the currently active mm. 130 */ 131 void show_pte(unsigned long addr) 132 { 133 struct mm_struct *mm; 134 pgd_t *pgdp; 135 pgd_t pgd; 136 137 if (addr < TASK_SIZE) { 138 /* TTBR0 */ 139 mm = current->active_mm; 140 if (mm == &init_mm) { 141 pr_alert("[%016lx] user address but active_mm is swapper\n", 142 addr); 143 return; 144 } 145 } else if (addr >= VA_START) { 146 /* TTBR1 */ 147 mm = &init_mm; 148 } else { 149 pr_alert("[%016lx] address between user and kernel address ranges\n", 150 addr); 151 return; 152 } 153 154 pr_alert("%s pgtable: %luk pages, %u-bit VAs, pgdp = %p\n", 155 mm == &init_mm ? "swapper" : "user", PAGE_SIZE / SZ_1K, 156 VA_BITS, mm->pgd); 157 pgdp = pgd_offset(mm, addr); 158 pgd = READ_ONCE(*pgdp); 159 pr_alert("[%016lx] pgd=%016llx", addr, pgd_val(pgd)); 160 161 do { 162 pud_t *pudp, pud; 163 pmd_t *pmdp, pmd; 164 pte_t *ptep, pte; 165 166 if (pgd_none(pgd) || pgd_bad(pgd)) 167 break; 168 169 pudp = pud_offset(pgdp, addr); 170 pud = READ_ONCE(*pudp); 171 pr_cont(", pud=%016llx", pud_val(pud)); 172 if (pud_none(pud) || pud_bad(pud)) 173 break; 174 175 pmdp = pmd_offset(pudp, addr); 176 pmd = READ_ONCE(*pmdp); 177 pr_cont(", pmd=%016llx", pmd_val(pmd)); 178 if (pmd_none(pmd) || pmd_bad(pmd)) 179 break; 180 181 ptep = pte_offset_map(pmdp, addr); 182 pte = READ_ONCE(*ptep); 183 pr_cont(", pte=%016llx", pte_val(pte)); 184 pte_unmap(ptep); 185 } while(0); 186 187 pr_cont("\n"); 188 } 189 190 /* 191 * This function sets the access flags (dirty, accessed), as well as write 192 * permission, and only to a more permissive setting. 193 * 194 * It needs to cope with hardware update of the accessed/dirty state by other 195 * agents in the system and can safely skip the __sync_icache_dcache() call as, 196 * like set_pte_at(), the PTE is never changed from no-exec to exec here. 197 * 198 * Returns whether or not the PTE actually changed. 199 */ 200 int ptep_set_access_flags(struct vm_area_struct *vma, 201 unsigned long address, pte_t *ptep, 202 pte_t entry, int dirty) 203 { 204 pteval_t old_pteval, pteval; 205 pte_t pte = READ_ONCE(*ptep); 206 207 if (pte_same(pte, entry)) 208 return 0; 209 210 /* only preserve the access flags and write permission */ 211 pte_val(entry) &= PTE_RDONLY | PTE_AF | PTE_WRITE | PTE_DIRTY; 212 213 /* 214 * Setting the flags must be done atomically to avoid racing with the 215 * hardware update of the access/dirty state. The PTE_RDONLY bit must 216 * be set to the most permissive (lowest value) of *ptep and entry 217 * (calculated as: a & b == ~(~a | ~b)). 218 */ 219 pte_val(entry) ^= PTE_RDONLY; 220 pteval = pte_val(pte); 221 do { 222 old_pteval = pteval; 223 pteval ^= PTE_RDONLY; 224 pteval |= pte_val(entry); 225 pteval ^= PTE_RDONLY; 226 pteval = cmpxchg_relaxed(&pte_val(*ptep), old_pteval, pteval); 227 } while (pteval != old_pteval); 228 229 flush_tlb_fix_spurious_fault(vma, address); 230 return 1; 231 } 232 233 static bool is_el1_instruction_abort(unsigned int esr) 234 { 235 return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_CUR; 236 } 237 238 static inline bool is_permission_fault(unsigned int esr, struct pt_regs *regs, 239 unsigned long addr) 240 { 241 unsigned int ec = ESR_ELx_EC(esr); 242 unsigned int fsc_type = esr & ESR_ELx_FSC_TYPE; 243 244 if (ec != ESR_ELx_EC_DABT_CUR && ec != ESR_ELx_EC_IABT_CUR) 245 return false; 246 247 if (fsc_type == ESR_ELx_FSC_PERM) 248 return true; 249 250 if (addr < TASK_SIZE && system_uses_ttbr0_pan()) 251 return fsc_type == ESR_ELx_FSC_FAULT && 252 (regs->pstate & PSR_PAN_BIT); 253 254 return false; 255 } 256 257 static void __do_kernel_fault(unsigned long addr, unsigned int esr, 258 struct pt_regs *regs) 259 { 260 const char *msg; 261 262 /* 263 * Are we prepared to handle this kernel fault? 264 * We are almost certainly not prepared to handle instruction faults. 265 */ 266 if (!is_el1_instruction_abort(esr) && fixup_exception(regs)) 267 return; 268 269 bust_spinlocks(1); 270 271 if (is_permission_fault(esr, regs, addr)) { 272 if (esr & ESR_ELx_WNR) 273 msg = "write to read-only memory"; 274 else 275 msg = "read from unreadable memory"; 276 } else if (addr < PAGE_SIZE) { 277 msg = "NULL pointer dereference"; 278 } else { 279 msg = "paging request"; 280 } 281 282 pr_alert("Unable to handle kernel %s at virtual address %08lx\n", msg, 283 addr); 284 285 mem_abort_decode(esr); 286 287 show_pte(addr); 288 die("Oops", regs, esr); 289 bust_spinlocks(0); 290 do_exit(SIGKILL); 291 } 292 293 static void __do_user_fault(struct siginfo *info, unsigned int esr) 294 { 295 current->thread.fault_address = (unsigned long)info->si_addr; 296 current->thread.fault_code = esr; 297 arm64_force_sig_info(info, esr_to_fault_info(esr)->name, current); 298 } 299 300 static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *regs) 301 { 302 /* 303 * If we are in kernel mode at this point, we have no context to 304 * handle this fault with. 305 */ 306 if (user_mode(regs)) { 307 const struct fault_info *inf = esr_to_fault_info(esr); 308 struct siginfo si = { 309 .si_signo = inf->sig, 310 .si_code = inf->code, 311 .si_addr = (void __user *)addr, 312 }; 313 314 __do_user_fault(&si, esr); 315 } else { 316 __do_kernel_fault(addr, esr, regs); 317 } 318 } 319 320 #define VM_FAULT_BADMAP 0x010000 321 #define VM_FAULT_BADACCESS 0x020000 322 323 static int __do_page_fault(struct mm_struct *mm, unsigned long addr, 324 unsigned int mm_flags, unsigned long vm_flags, 325 struct task_struct *tsk) 326 { 327 struct vm_area_struct *vma; 328 int fault; 329 330 vma = find_vma(mm, addr); 331 fault = VM_FAULT_BADMAP; 332 if (unlikely(!vma)) 333 goto out; 334 if (unlikely(vma->vm_start > addr)) 335 goto check_stack; 336 337 /* 338 * Ok, we have a good vm_area for this memory access, so we can handle 339 * it. 340 */ 341 good_area: 342 /* 343 * Check that the permissions on the VMA allow for the fault which 344 * occurred. 345 */ 346 if (!(vma->vm_flags & vm_flags)) { 347 fault = VM_FAULT_BADACCESS; 348 goto out; 349 } 350 351 return handle_mm_fault(vma, addr & PAGE_MASK, mm_flags); 352 353 check_stack: 354 if (vma->vm_flags & VM_GROWSDOWN && !expand_stack(vma, addr)) 355 goto good_area; 356 out: 357 return fault; 358 } 359 360 static bool is_el0_instruction_abort(unsigned int esr) 361 { 362 return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_LOW; 363 } 364 365 static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, 366 struct pt_regs *regs) 367 { 368 struct task_struct *tsk; 369 struct mm_struct *mm; 370 struct siginfo si; 371 int fault, major = 0; 372 unsigned long vm_flags = VM_READ | VM_WRITE; 373 unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; 374 375 if (notify_page_fault(regs, esr)) 376 return 0; 377 378 tsk = current; 379 mm = tsk->mm; 380 381 /* 382 * If we're in an interrupt or have no user context, we must not take 383 * the fault. 384 */ 385 if (faulthandler_disabled() || !mm) 386 goto no_context; 387 388 if (user_mode(regs)) 389 mm_flags |= FAULT_FLAG_USER; 390 391 if (is_el0_instruction_abort(esr)) { 392 vm_flags = VM_EXEC; 393 } else if ((esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM)) { 394 vm_flags = VM_WRITE; 395 mm_flags |= FAULT_FLAG_WRITE; 396 } 397 398 if (addr < TASK_SIZE && is_permission_fault(esr, regs, addr)) { 399 /* regs->orig_addr_limit may be 0 if we entered from EL0 */ 400 if (regs->orig_addr_limit == KERNEL_DS) 401 die("Accessing user space memory with fs=KERNEL_DS", regs, esr); 402 403 if (is_el1_instruction_abort(esr)) 404 die("Attempting to execute userspace memory", regs, esr); 405 406 if (!search_exception_tables(regs->pc)) 407 die("Accessing user space memory outside uaccess.h routines", regs, esr); 408 } 409 410 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); 411 412 /* 413 * As per x86, we may deadlock here. However, since the kernel only 414 * validly references user space from well defined areas of the code, 415 * we can bug out early if this is from code which shouldn't. 416 */ 417 if (!down_read_trylock(&mm->mmap_sem)) { 418 if (!user_mode(regs) && !search_exception_tables(regs->pc)) 419 goto no_context; 420 retry: 421 down_read(&mm->mmap_sem); 422 } else { 423 /* 424 * The above down_read_trylock() might have succeeded in which 425 * case, we'll have missed the might_sleep() from down_read(). 426 */ 427 might_sleep(); 428 #ifdef CONFIG_DEBUG_VM 429 if (!user_mode(regs) && !search_exception_tables(regs->pc)) 430 goto no_context; 431 #endif 432 } 433 434 fault = __do_page_fault(mm, addr, mm_flags, vm_flags, tsk); 435 major |= fault & VM_FAULT_MAJOR; 436 437 if (fault & VM_FAULT_RETRY) { 438 /* 439 * If we need to retry but a fatal signal is pending, 440 * handle the signal first. We do not need to release 441 * the mmap_sem because it would already be released 442 * in __lock_page_or_retry in mm/filemap.c. 443 */ 444 if (fatal_signal_pending(current)) { 445 if (!user_mode(regs)) 446 goto no_context; 447 return 0; 448 } 449 450 /* 451 * Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk of 452 * starvation. 453 */ 454 if (mm_flags & FAULT_FLAG_ALLOW_RETRY) { 455 mm_flags &= ~FAULT_FLAG_ALLOW_RETRY; 456 mm_flags |= FAULT_FLAG_TRIED; 457 goto retry; 458 } 459 } 460 up_read(&mm->mmap_sem); 461 462 /* 463 * Handle the "normal" (no error) case first. 464 */ 465 if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP | 466 VM_FAULT_BADACCESS)))) { 467 /* 468 * Major/minor page fault accounting is only done 469 * once. If we go through a retry, it is extremely 470 * likely that the page will be found in page cache at 471 * that point. 472 */ 473 if (major) { 474 tsk->maj_flt++; 475 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, 476 addr); 477 } else { 478 tsk->min_flt++; 479 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, 480 addr); 481 } 482 483 return 0; 484 } 485 486 /* 487 * If we are in kernel mode at this point, we have no context to 488 * handle this fault with. 489 */ 490 if (!user_mode(regs)) 491 goto no_context; 492 493 if (fault & VM_FAULT_OOM) { 494 /* 495 * We ran out of memory, call the OOM killer, and return to 496 * userspace (which will retry the fault, or kill us if we got 497 * oom-killed). 498 */ 499 pagefault_out_of_memory(); 500 return 0; 501 } 502 503 clear_siginfo(&si); 504 si.si_addr = (void __user *)addr; 505 506 if (fault & VM_FAULT_SIGBUS) { 507 /* 508 * We had some memory, but were unable to successfully fix up 509 * this page fault. 510 */ 511 si.si_signo = SIGBUS; 512 si.si_code = BUS_ADRERR; 513 } else if (fault & VM_FAULT_HWPOISON_LARGE) { 514 unsigned int hindex = VM_FAULT_GET_HINDEX(fault); 515 516 si.si_signo = SIGBUS; 517 si.si_code = BUS_MCEERR_AR; 518 si.si_addr_lsb = hstate_index_to_shift(hindex); 519 } else if (fault & VM_FAULT_HWPOISON) { 520 si.si_signo = SIGBUS; 521 si.si_code = BUS_MCEERR_AR; 522 si.si_addr_lsb = PAGE_SHIFT; 523 } else { 524 /* 525 * Something tried to access memory that isn't in our memory 526 * map. 527 */ 528 si.si_signo = SIGSEGV; 529 si.si_code = fault == VM_FAULT_BADACCESS ? 530 SEGV_ACCERR : SEGV_MAPERR; 531 } 532 533 __do_user_fault(&si, esr); 534 return 0; 535 536 no_context: 537 __do_kernel_fault(addr, esr, regs); 538 return 0; 539 } 540 541 static int __kprobes do_translation_fault(unsigned long addr, 542 unsigned int esr, 543 struct pt_regs *regs) 544 { 545 if (addr < TASK_SIZE) 546 return do_page_fault(addr, esr, regs); 547 548 do_bad_area(addr, esr, regs); 549 return 0; 550 } 551 552 static int do_alignment_fault(unsigned long addr, unsigned int esr, 553 struct pt_regs *regs) 554 { 555 do_bad_area(addr, esr, regs); 556 return 0; 557 } 558 559 static int do_bad(unsigned long addr, unsigned int esr, struct pt_regs *regs) 560 { 561 return 1; /* "fault" */ 562 } 563 564 static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs) 565 { 566 struct siginfo info; 567 const struct fault_info *inf; 568 569 inf = esr_to_fault_info(esr); 570 571 /* 572 * Synchronous aborts may interrupt code which had interrupts masked. 573 * Before calling out into the wider kernel tell the interested 574 * subsystems. 575 */ 576 if (IS_ENABLED(CONFIG_ACPI_APEI_SEA)) { 577 if (interrupts_enabled(regs)) 578 nmi_enter(); 579 580 ghes_notify_sea(); 581 582 if (interrupts_enabled(regs)) 583 nmi_exit(); 584 } 585 586 info.si_signo = inf->sig; 587 info.si_errno = 0; 588 info.si_code = inf->code; 589 if (esr & ESR_ELx_FnV) 590 info.si_addr = NULL; 591 else 592 info.si_addr = (void __user *)addr; 593 arm64_notify_die(inf->name, regs, &info, esr); 594 595 return 0; 596 } 597 598 static const struct fault_info fault_info[] = { 599 { do_bad, SIGKILL, SI_KERNEL, "ttbr address size fault" }, 600 { do_bad, SIGKILL, SI_KERNEL, "level 1 address size fault" }, 601 { do_bad, SIGKILL, SI_KERNEL, "level 2 address size fault" }, 602 { do_bad, SIGKILL, SI_KERNEL, "level 3 address size fault" }, 603 { do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 0 translation fault" }, 604 { do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 1 translation fault" }, 605 { do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 2 translation fault" }, 606 { do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 3 translation fault" }, 607 { do_bad, SIGKILL, SI_KERNEL, "unknown 8" }, 608 { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 1 access flag fault" }, 609 { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 access flag fault" }, 610 { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 access flag fault" }, 611 { do_bad, SIGKILL, SI_KERNEL, "unknown 12" }, 612 { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 1 permission fault" }, 613 { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 permission fault" }, 614 { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 permission fault" }, 615 { do_sea, SIGBUS, BUS_OBJERR, "synchronous external abort" }, 616 { do_bad, SIGKILL, SI_KERNEL, "unknown 17" }, 617 { do_bad, SIGKILL, SI_KERNEL, "unknown 18" }, 618 { do_bad, SIGKILL, SI_KERNEL, "unknown 19" }, 619 { do_sea, SIGKILL, SI_KERNEL, "level 0 (translation table walk)" }, 620 { do_sea, SIGKILL, SI_KERNEL, "level 1 (translation table walk)" }, 621 { do_sea, SIGKILL, SI_KERNEL, "level 2 (translation table walk)" }, 622 { do_sea, SIGKILL, SI_KERNEL, "level 3 (translation table walk)" }, 623 { do_sea, SIGBUS, BUS_OBJERR, "synchronous parity or ECC error" }, // Reserved when RAS is implemented 624 { do_bad, SIGKILL, SI_KERNEL, "unknown 25" }, 625 { do_bad, SIGKILL, SI_KERNEL, "unknown 26" }, 626 { do_bad, SIGKILL, SI_KERNEL, "unknown 27" }, 627 { do_sea, SIGKILL, SI_KERNEL, "level 0 synchronous parity error (translation table walk)" }, // Reserved when RAS is implemented 628 { do_sea, SIGKILL, SI_KERNEL, "level 1 synchronous parity error (translation table walk)" }, // Reserved when RAS is implemented 629 { do_sea, SIGKILL, SI_KERNEL, "level 2 synchronous parity error (translation table walk)" }, // Reserved when RAS is implemented 630 { do_sea, SIGKILL, SI_KERNEL, "level 3 synchronous parity error (translation table walk)" }, // Reserved when RAS is implemented 631 { do_bad, SIGKILL, SI_KERNEL, "unknown 32" }, 632 { do_alignment_fault, SIGBUS, BUS_ADRALN, "alignment fault" }, 633 { do_bad, SIGKILL, SI_KERNEL, "unknown 34" }, 634 { do_bad, SIGKILL, SI_KERNEL, "unknown 35" }, 635 { do_bad, SIGKILL, SI_KERNEL, "unknown 36" }, 636 { do_bad, SIGKILL, SI_KERNEL, "unknown 37" }, 637 { do_bad, SIGKILL, SI_KERNEL, "unknown 38" }, 638 { do_bad, SIGKILL, SI_KERNEL, "unknown 39" }, 639 { do_bad, SIGKILL, SI_KERNEL, "unknown 40" }, 640 { do_bad, SIGKILL, SI_KERNEL, "unknown 41" }, 641 { do_bad, SIGKILL, SI_KERNEL, "unknown 42" }, 642 { do_bad, SIGKILL, SI_KERNEL, "unknown 43" }, 643 { do_bad, SIGKILL, SI_KERNEL, "unknown 44" }, 644 { do_bad, SIGKILL, SI_KERNEL, "unknown 45" }, 645 { do_bad, SIGKILL, SI_KERNEL, "unknown 46" }, 646 { do_bad, SIGKILL, SI_KERNEL, "unknown 47" }, 647 { do_bad, SIGKILL, SI_KERNEL, "TLB conflict abort" }, 648 { do_bad, SIGKILL, SI_KERNEL, "Unsupported atomic hardware update fault" }, 649 { do_bad, SIGKILL, SI_KERNEL, "unknown 50" }, 650 { do_bad, SIGKILL, SI_KERNEL, "unknown 51" }, 651 { do_bad, SIGKILL, SI_KERNEL, "implementation fault (lockdown abort)" }, 652 { do_bad, SIGBUS, BUS_OBJERR, "implementation fault (unsupported exclusive)" }, 653 { do_bad, SIGKILL, SI_KERNEL, "unknown 54" }, 654 { do_bad, SIGKILL, SI_KERNEL, "unknown 55" }, 655 { do_bad, SIGKILL, SI_KERNEL, "unknown 56" }, 656 { do_bad, SIGKILL, SI_KERNEL, "unknown 57" }, 657 { do_bad, SIGKILL, SI_KERNEL, "unknown 58" }, 658 { do_bad, SIGKILL, SI_KERNEL, "unknown 59" }, 659 { do_bad, SIGKILL, SI_KERNEL, "unknown 60" }, 660 { do_bad, SIGKILL, SI_KERNEL, "section domain fault" }, 661 { do_bad, SIGKILL, SI_KERNEL, "page domain fault" }, 662 { do_bad, SIGKILL, SI_KERNEL, "unknown 63" }, 663 }; 664 665 int handle_guest_sea(phys_addr_t addr, unsigned int esr) 666 { 667 int ret = -ENOENT; 668 669 if (IS_ENABLED(CONFIG_ACPI_APEI_SEA)) 670 ret = ghes_notify_sea(); 671 672 return ret; 673 } 674 675 asmlinkage void __exception do_mem_abort(unsigned long addr, unsigned int esr, 676 struct pt_regs *regs) 677 { 678 const struct fault_info *inf = esr_to_fault_info(esr); 679 struct siginfo info; 680 681 if (!inf->fn(addr, esr, regs)) 682 return; 683 684 if (!user_mode(regs)) { 685 pr_alert("Unhandled fault at 0x%016lx\n", addr); 686 mem_abort_decode(esr); 687 show_pte(addr); 688 } 689 690 info.si_signo = inf->sig; 691 info.si_errno = 0; 692 info.si_code = inf->code; 693 info.si_addr = (void __user *)addr; 694 arm64_notify_die(inf->name, regs, &info, esr); 695 } 696 697 asmlinkage void __exception do_el0_irq_bp_hardening(void) 698 { 699 /* PC has already been checked in entry.S */ 700 arm64_apply_bp_hardening(); 701 } 702 703 asmlinkage void __exception do_el0_ia_bp_hardening(unsigned long addr, 704 unsigned int esr, 705 struct pt_regs *regs) 706 { 707 /* 708 * We've taken an instruction abort from userspace and not yet 709 * re-enabled IRQs. If the address is a kernel address, apply 710 * BP hardening prior to enabling IRQs and pre-emption. 711 */ 712 if (addr > TASK_SIZE) 713 arm64_apply_bp_hardening(); 714 715 local_irq_enable(); 716 do_mem_abort(addr, esr, regs); 717 } 718 719 720 asmlinkage void __exception do_sp_pc_abort(unsigned long addr, 721 unsigned int esr, 722 struct pt_regs *regs) 723 { 724 struct siginfo info; 725 726 if (user_mode(regs)) { 727 if (instruction_pointer(regs) > TASK_SIZE) 728 arm64_apply_bp_hardening(); 729 local_irq_enable(); 730 } 731 732 info.si_signo = SIGBUS; 733 info.si_errno = 0; 734 info.si_code = BUS_ADRALN; 735 info.si_addr = (void __user *)addr; 736 arm64_notify_die("SP/PC alignment exception", regs, &info, esr); 737 } 738 739 int __init early_brk64(unsigned long addr, unsigned int esr, 740 struct pt_regs *regs); 741 742 /* 743 * __refdata because early_brk64 is __init, but the reference to it is 744 * clobbered at arch_initcall time. 745 * See traps.c and debug-monitors.c:debug_traps_init(). 746 */ 747 static struct fault_info __refdata debug_fault_info[] = { 748 { do_bad, SIGTRAP, TRAP_HWBKPT, "hardware breakpoint" }, 749 { do_bad, SIGTRAP, TRAP_HWBKPT, "hardware single-step" }, 750 { do_bad, SIGTRAP, TRAP_HWBKPT, "hardware watchpoint" }, 751 { do_bad, SIGKILL, SI_KERNEL, "unknown 3" }, 752 { do_bad, SIGTRAP, TRAP_BRKPT, "aarch32 BKPT" }, 753 { do_bad, SIGKILL, SI_KERNEL, "aarch32 vector catch" }, 754 { early_brk64, SIGTRAP, TRAP_BRKPT, "aarch64 BRK" }, 755 { do_bad, SIGKILL, SI_KERNEL, "unknown 7" }, 756 }; 757 758 void __init hook_debug_fault_code(int nr, 759 int (*fn)(unsigned long, unsigned int, struct pt_regs *), 760 int sig, int code, const char *name) 761 { 762 BUG_ON(nr < 0 || nr >= ARRAY_SIZE(debug_fault_info)); 763 764 debug_fault_info[nr].fn = fn; 765 debug_fault_info[nr].sig = sig; 766 debug_fault_info[nr].code = code; 767 debug_fault_info[nr].name = name; 768 } 769 770 asmlinkage int __exception do_debug_exception(unsigned long addr, 771 unsigned int esr, 772 struct pt_regs *regs) 773 { 774 const struct fault_info *inf = debug_fault_info + DBG_ESR_EVT(esr); 775 struct siginfo info; 776 int rv; 777 778 /* 779 * Tell lockdep we disabled irqs in entry.S. Do nothing if they were 780 * already disabled to preserve the last enabled/disabled addresses. 781 */ 782 if (interrupts_enabled(regs)) 783 trace_hardirqs_off(); 784 785 if (user_mode(regs) && instruction_pointer(regs) > TASK_SIZE) 786 arm64_apply_bp_hardening(); 787 788 if (!inf->fn(addr, esr, regs)) { 789 rv = 1; 790 } else { 791 info.si_signo = inf->sig; 792 info.si_errno = 0; 793 info.si_code = inf->code; 794 info.si_addr = (void __user *)addr; 795 arm64_notify_die(inf->name, regs, &info, esr); 796 rv = 0; 797 } 798 799 if (interrupts_enabled(regs)) 800 trace_hardirqs_on(); 801 802 return rv; 803 } 804 NOKPROBE_SYMBOL(do_debug_exception); 805 806 #ifdef CONFIG_ARM64_PAN 807 void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused) 808 { 809 /* 810 * We modify PSTATE. This won't work from irq context as the PSTATE 811 * is discarded once we return from the exception. 812 */ 813 WARN_ON_ONCE(in_interrupt()); 814 815 config_sctlr_el1(SCTLR_EL1_SPAN, 0); 816 asm(SET_PSTATE_PAN(1)); 817 } 818 #endif /* CONFIG_ARM64_PAN */ 819