1588cb88cSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later 207037db5SPalmer Dabbelt /* 307037db5SPalmer Dabbelt * Copyright (C) 2009 Sunplus Core Technology Co., Ltd. 407037db5SPalmer Dabbelt * Lennox Wu <lennox.wu@sunplusct.com> 507037db5SPalmer Dabbelt * Chen Liqin <liqin.chen@sunplusct.com> 607037db5SPalmer Dabbelt * Copyright (C) 2012 Regents of the University of California 707037db5SPalmer Dabbelt */ 807037db5SPalmer Dabbelt 907037db5SPalmer Dabbelt 1007037db5SPalmer Dabbelt #include <linux/mm.h> 1107037db5SPalmer Dabbelt #include <linux/kernel.h> 1207037db5SPalmer Dabbelt #include <linux/interrupt.h> 1307037db5SPalmer Dabbelt #include <linux/perf_event.h> 1407037db5SPalmer Dabbelt #include <linux/signal.h> 1507037db5SPalmer Dabbelt #include <linux/uaccess.h> 16c22b0bcbSGuo Ren #include <linux/kprobes.h> 1747513f24SLiu Shixin #include <linux/kfence.h> 18f0bddf50SGuo Ren #include <linux/entry-common.h> 1907037db5SPalmer Dabbelt 2007037db5SPalmer Dabbelt #include <asm/ptrace.h> 21bf587caaSShihPo Hung #include <asm/tlbflush.h> 2207037db5SPalmer Dabbelt 23ffaee272SPaul Walmsley #include "../kernel/head.h" 24ffaee272SPaul Walmsley 2521733cb5SEric Lin static void die_kernel_fault(const char *msg, unsigned long addr, 2621733cb5SEric Lin struct pt_regs *regs) 2721733cb5SEric Lin { 2821733cb5SEric Lin bust_spinlocks(1); 2921733cb5SEric Lin 3021733cb5SEric Lin pr_alert("Unable to handle kernel %s at virtual address " REG_FMT "\n", msg, 3121733cb5SEric Lin addr); 3221733cb5SEric Lin 3321733cb5SEric Lin bust_spinlocks(0); 3421733cb5SEric Lin die(regs, "Oops"); 350e25498fSEric W. Biederman make_task_dead(SIGKILL); 3621733cb5SEric Lin } 3721733cb5SEric Lin 38cac4d1dcSPekka Enberg static inline void no_context(struct pt_regs *regs, unsigned long addr) 39cac4d1dcSPekka Enberg { 4021733cb5SEric Lin const char *msg; 4121733cb5SEric Lin 42cac4d1dcSPekka Enberg /* Are we prepared to handle this kernel fault? */ 43cac4d1dcSPekka Enberg if (fixup_exception(regs)) 44cac4d1dcSPekka Enberg return; 45cac4d1dcSPekka Enberg 46cac4d1dcSPekka Enberg /* 47cac4d1dcSPekka Enberg * Oops. The kernel tried to access some bad page. We'll have to 48cac4d1dcSPekka Enberg * terminate things with extreme prejudice. 49cac4d1dcSPekka Enberg */ 5047513f24SLiu Shixin if (addr < PAGE_SIZE) 5147513f24SLiu Shixin msg = "NULL pointer dereference"; 5247513f24SLiu Shixin else { 5347513f24SLiu Shixin if (kfence_handle_page_fault(addr, regs->cause == EXC_STORE_PAGE_FAULT, regs)) 5447513f24SLiu Shixin return; 5547513f24SLiu Shixin 5647513f24SLiu Shixin msg = "paging request"; 5747513f24SLiu Shixin } 5847513f24SLiu Shixin 5921733cb5SEric Lin die_kernel_fault(msg, addr, regs); 60cac4d1dcSPekka Enberg } 61cac4d1dcSPekka Enberg 626c11ffbfSPekka Enberg static inline void mm_fault_error(struct pt_regs *regs, unsigned long addr, vm_fault_t fault) 636c11ffbfSPekka Enberg { 64*0c710050SZhe Qiao if (!user_mode(regs)) { 65*0c710050SZhe Qiao no_context(regs, addr); 66*0c710050SZhe Qiao return; 67*0c710050SZhe Qiao } 68*0c710050SZhe Qiao 697a75f3d4SPekka Enberg if (fault & VM_FAULT_OOM) { 706c11ffbfSPekka Enberg /* 716c11ffbfSPekka Enberg * We ran out of memory, call the OOM killer, and return the userspace 726c11ffbfSPekka Enberg * (which will retry the fault, or kill us if we got oom-killed). 736c11ffbfSPekka Enberg */ 746c11ffbfSPekka Enberg pagefault_out_of_memory(); 756c11ffbfSPekka Enberg return; 76117b1bb0SAlexandre Ghiti } else if (fault & (VM_FAULT_SIGBUS | VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE)) { 776c11ffbfSPekka Enberg /* Kernel mode? Handle exceptions or die */ 786c11ffbfSPekka Enberg do_trap(regs, SIGBUS, BUS_ADRERR, addr); 796c11ffbfSPekka Enberg return; 80*0c710050SZhe Qiao } else if (fault & VM_FAULT_SIGSEGV) { 81*0c710050SZhe Qiao do_trap(regs, SIGSEGV, SEGV_MAPERR, addr); 82*0c710050SZhe Qiao return; 836c11ffbfSPekka Enberg } 84*0c710050SZhe Qiao 857a75f3d4SPekka Enberg BUG(); 867a75f3d4SPekka Enberg } 876c11ffbfSPekka Enberg 887267ef7bSBen Hutchings static inline void 897267ef7bSBen Hutchings bad_area_nosemaphore(struct pt_regs *regs, int code, unsigned long addr) 90a51271d9SPekka Enberg { 91a51271d9SPekka Enberg /* 92a51271d9SPekka Enberg * Something tried to access memory that isn't in our memory map. 93a51271d9SPekka Enberg * Fix it, but check if it's kernel or user first. 94a51271d9SPekka Enberg */ 95a51271d9SPekka Enberg /* User mode accesses just cause a SIGSEGV */ 96a51271d9SPekka Enberg if (user_mode(regs)) { 97a51271d9SPekka Enberg do_trap(regs, SIGSEGV, code, addr); 98a51271d9SPekka Enberg return; 99a51271d9SPekka Enberg } 100a51271d9SPekka Enberg 101a51271d9SPekka Enberg no_context(regs, addr); 102a51271d9SPekka Enberg } 103a51271d9SPekka Enberg 1047267ef7bSBen Hutchings static inline void 1057267ef7bSBen Hutchings bad_area(struct pt_regs *regs, struct mm_struct *mm, int code, 1067267ef7bSBen Hutchings unsigned long addr) 1077267ef7bSBen Hutchings { 1087267ef7bSBen Hutchings mmap_read_unlock(mm); 1097267ef7bSBen Hutchings 1107267ef7bSBen Hutchings bad_area_nosemaphore(regs, code, addr); 1117267ef7bSBen Hutchings } 1127267ef7bSBen Hutchings 1132baa6d95SPekka Enberg static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long addr) 114ac416a72SPekka Enberg { 115ac416a72SPekka Enberg pgd_t *pgd, *pgd_k; 1167da9ca3fSChuanhua Han pud_t *pud_k; 1177da9ca3fSChuanhua Han p4d_t *p4d_k; 1187da9ca3fSChuanhua Han pmd_t *pmd_k; 119ac416a72SPekka Enberg pte_t *pte_k; 120ac416a72SPekka Enberg int index; 121bcacf5f6SLiu Shaohua unsigned long pfn; 122ac416a72SPekka Enberg 123ac416a72SPekka Enberg /* User mode accesses just cause a SIGSEGV */ 124ac416a72SPekka Enberg if (user_mode(regs)) 125ac416a72SPekka Enberg return do_trap(regs, SIGSEGV, code, addr); 126ac416a72SPekka Enberg 127ac416a72SPekka Enberg /* 128ac416a72SPekka Enberg * Synchronize this task's top level page-table 129ac416a72SPekka Enberg * with the 'reference' page table. 130ac416a72SPekka Enberg * 131ac416a72SPekka Enberg * Do _not_ use "tsk->active_mm->pgd" here. 132ac416a72SPekka Enberg * We might be inside an interrupt in the middle 133ac416a72SPekka Enberg * of a task switch. 134ac416a72SPekka Enberg */ 135ac416a72SPekka Enberg index = pgd_index(addr); 136bcacf5f6SLiu Shaohua pfn = csr_read(CSR_SATP) & SATP_PPN; 137bcacf5f6SLiu Shaohua pgd = (pgd_t *)pfn_to_virt(pfn) + index; 138ac416a72SPekka Enberg pgd_k = init_mm.pgd + index; 139ac416a72SPekka Enberg 140edf95564SAlexandre Ghiti if (!pgd_present(pgdp_get(pgd_k))) { 141ac416a72SPekka Enberg no_context(regs, addr); 142ac416a72SPekka Enberg return; 143ac416a72SPekka Enberg } 144edf95564SAlexandre Ghiti set_pgd(pgd, pgdp_get(pgd_k)); 145ac416a72SPekka Enberg 146ac416a72SPekka Enberg p4d_k = p4d_offset(pgd_k, addr); 147edf95564SAlexandre Ghiti if (!p4d_present(p4dp_get(p4d_k))) { 148ac416a72SPekka Enberg no_context(regs, addr); 149ac416a72SPekka Enberg return; 150ac416a72SPekka Enberg } 151ac416a72SPekka Enberg 152ac416a72SPekka Enberg pud_k = pud_offset(p4d_k, addr); 153edf95564SAlexandre Ghiti if (!pud_present(pudp_get(pud_k))) { 154ac416a72SPekka Enberg no_context(regs, addr); 155ac416a72SPekka Enberg return; 156ac416a72SPekka Enberg } 157edf95564SAlexandre Ghiti if (pud_leaf(pudp_get(pud_k))) 15847dd902aSDylan Jhong goto flush_tlb; 159ac416a72SPekka Enberg 160ac416a72SPekka Enberg /* 161ac416a72SPekka Enberg * Since the vmalloc area is global, it is unnecessary 162ac416a72SPekka Enberg * to copy individual PTEs 163ac416a72SPekka Enberg */ 164ac416a72SPekka Enberg pmd_k = pmd_offset(pud_k, addr); 165edf95564SAlexandre Ghiti if (!pmd_present(pmdp_get(pmd_k))) { 166ac416a72SPekka Enberg no_context(regs, addr); 167ac416a72SPekka Enberg return; 168ac416a72SPekka Enberg } 169edf95564SAlexandre Ghiti if (pmd_leaf(pmdp_get(pmd_k))) 17047dd902aSDylan Jhong goto flush_tlb; 171ac416a72SPekka Enberg 172ac416a72SPekka Enberg /* 173ac416a72SPekka Enberg * Make sure the actual PTE exists as well to 174ac416a72SPekka Enberg * catch kernel vmalloc-area accesses to non-mapped 175ac416a72SPekka Enberg * addresses. If we don't do this, this will just 176ac416a72SPekka Enberg * silently loop forever. 177ac416a72SPekka Enberg */ 178ac416a72SPekka Enberg pte_k = pte_offset_kernel(pmd_k, addr); 179edf95564SAlexandre Ghiti if (!pte_present(ptep_get(pte_k))) { 180ac416a72SPekka Enberg no_context(regs, addr); 181ac416a72SPekka Enberg return; 182ac416a72SPekka Enberg } 183ac416a72SPekka Enberg 184ac416a72SPekka Enberg /* 185ac416a72SPekka Enberg * The kernel assumes that TLBs don't cache invalid 186ac416a72SPekka Enberg * entries, but in RISC-V, SFENCE.VMA specifies an 187ac416a72SPekka Enberg * ordering constraint, not a cache flush; it is 188ac416a72SPekka Enberg * necessary even after writing invalid entries. 189ac416a72SPekka Enberg */ 19047dd902aSDylan Jhong flush_tlb: 191ac416a72SPekka Enberg local_flush_tlb_page(addr); 192ac416a72SPekka Enberg } 193ac416a72SPekka Enberg 194afb8c6feSPekka Enberg static inline bool access_error(unsigned long cause, struct vm_area_struct *vma) 195afb8c6feSPekka Enberg { 196afb8c6feSPekka Enberg switch (cause) { 197afb8c6feSPekka Enberg case EXC_INST_PAGE_FAULT: 198afb8c6feSPekka Enberg if (!(vma->vm_flags & VM_EXEC)) { 199afb8c6feSPekka Enberg return true; 200afb8c6feSPekka Enberg } 201afb8c6feSPekka Enberg break; 202afb8c6feSPekka Enberg case EXC_LOAD_PAGE_FAULT: 2037ab72c59SAndrew Bresticker /* Write implies read */ 2047ab72c59SAndrew Bresticker if (!(vma->vm_flags & (VM_READ | VM_WRITE))) { 205afb8c6feSPekka Enberg return true; 206afb8c6feSPekka Enberg } 207afb8c6feSPekka Enberg break; 208afb8c6feSPekka Enberg case EXC_STORE_PAGE_FAULT: 209afb8c6feSPekka Enberg if (!(vma->vm_flags & VM_WRITE)) { 210afb8c6feSPekka Enberg return true; 211afb8c6feSPekka Enberg } 212afb8c6feSPekka Enberg break; 213afb8c6feSPekka Enberg default: 214afb8c6feSPekka Enberg panic("%s: unhandled cause %lu", __func__, cause); 215afb8c6feSPekka Enberg } 216afb8c6feSPekka Enberg return false; 217afb8c6feSPekka Enberg } 218afb8c6feSPekka Enberg 21907037db5SPalmer Dabbelt /* 22007037db5SPalmer Dabbelt * This routine handles page faults. It determines the address and the 22107037db5SPalmer Dabbelt * problem, and then passes it off to one of the appropriate routines. 22207037db5SPalmer Dabbelt */ 223f0bddf50SGuo Ren void handle_page_fault(struct pt_regs *regs) 22407037db5SPalmer Dabbelt { 22507037db5SPalmer Dabbelt struct task_struct *tsk; 22607037db5SPalmer Dabbelt struct vm_area_struct *vma; 22707037db5SPalmer Dabbelt struct mm_struct *mm; 22807037db5SPalmer Dabbelt unsigned long addr, cause; 229dde16072SPeter Xu unsigned int flags = FAULT_FLAG_DEFAULT; 23050a7ca3cSSouptick Joarder int code = SEGV_MAPERR; 23150a7ca3cSSouptick Joarder vm_fault_t fault; 23207037db5SPalmer Dabbelt 233a4c3733dSChristoph Hellwig cause = regs->cause; 234a4c3733dSChristoph Hellwig addr = regs->badaddr; 23507037db5SPalmer Dabbelt 23607037db5SPalmer Dabbelt tsk = current; 23707037db5SPalmer Dabbelt mm = tsk->mm; 23807037db5SPalmer Dabbelt 239c22b0bcbSGuo Ren if (kprobe_page_fault(regs, cause)) 240c22b0bcbSGuo Ren return; 241c22b0bcbSGuo Ren 24207037db5SPalmer Dabbelt /* 24307037db5SPalmer Dabbelt * Fault-in kernel-space virtual memory on-demand. 24407037db5SPalmer Dabbelt * The 'reference' page table is init_mm.pgd. 24507037db5SPalmer Dabbelt * 24607037db5SPalmer Dabbelt * NOTE! We MUST NOT take any locks for this case. We may 24707037db5SPalmer Dabbelt * be in an interrupt or a critical region, and should 24807037db5SPalmer Dabbelt * only copy the information from the master page table, 24907037db5SPalmer Dabbelt * nothing more. 25007037db5SPalmer Dabbelt */ 2517d3332beSBjörn Töpel if ((!IS_ENABLED(CONFIG_MMU) || !IS_ENABLED(CONFIG_64BIT)) && 2527d3332beSBjörn Töpel unlikely(addr >= VMALLOC_START && addr < VMALLOC_END)) { 253ac416a72SPekka Enberg vmalloc_fault(regs, code, addr); 254ac416a72SPekka Enberg return; 255ac416a72SPekka Enberg } 25607037db5SPalmer Dabbelt 25707037db5SPalmer Dabbelt /* Enable interrupts if they were enabled in the parent context. */ 258f0bddf50SGuo Ren if (!regs_irqs_disabled(regs)) 25907037db5SPalmer Dabbelt local_irq_enable(); 26007037db5SPalmer Dabbelt 26107037db5SPalmer Dabbelt /* 26207037db5SPalmer Dabbelt * If we're in an interrupt, have no user context, or are running 26307037db5SPalmer Dabbelt * in an atomic region, then we must not take the fault. 26407037db5SPalmer Dabbelt */ 265cac4d1dcSPekka Enberg if (unlikely(faulthandler_disabled() || !mm)) { 26674784081SGuo Ren tsk->thread.bad_cause = cause; 267cac4d1dcSPekka Enberg no_context(regs, addr); 268cac4d1dcSPekka Enberg return; 269cac4d1dcSPekka Enberg } 27007037db5SPalmer Dabbelt 27107037db5SPalmer Dabbelt if (user_mode(regs)) 27207037db5SPalmer Dabbelt flags |= FAULT_FLAG_USER; 27307037db5SPalmer Dabbelt 274416721ffSBjörn Töpel if (!user_mode(regs) && addr < TASK_SIZE && unlikely(!(regs->status & SR_SUM))) { 275416721ffSBjörn Töpel if (fixup_exception(regs)) 276416721ffSBjörn Töpel return; 277416721ffSBjörn Töpel 278416721ffSBjörn Töpel die_kernel_fault("access to user memory without uaccess routines", addr, regs); 279416721ffSBjörn Töpel } 28021855cacSEric Lin 28107037db5SPalmer Dabbelt perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); 28207037db5SPalmer Dabbelt 28367474301SPekka Enberg if (cause == EXC_STORE_PAGE_FAULT) 28467474301SPekka Enberg flags |= FAULT_FLAG_WRITE; 285a960c132SPekka Enberg else if (cause == EXC_INST_PAGE_FAULT) 286a960c132SPekka Enberg flags |= FAULT_FLAG_INSTRUCTION; 287648321faSJisheng Zhang if (!(flags & FAULT_FLAG_USER)) 288648321faSJisheng Zhang goto lock_mmap; 289648321faSJisheng Zhang 290648321faSJisheng Zhang vma = lock_vma_under_rcu(mm, addr); 291648321faSJisheng Zhang if (!vma) 292648321faSJisheng Zhang goto lock_mmap; 293648321faSJisheng Zhang 294648321faSJisheng Zhang if (unlikely(access_error(cause, vma))) { 295648321faSJisheng Zhang vma_end_read(vma); 296cd1c91b8SKefeng Wang count_vm_vma_lock_event(VMA_LOCK_SUCCESS); 297e2c79b4cSPalmer Dabbelt tsk->thread.bad_cause = cause; 298e2c79b4cSPalmer Dabbelt bad_area_nosemaphore(regs, SEGV_ACCERR, addr); 299cd1c91b8SKefeng Wang return; 300648321faSJisheng Zhang } 301648321faSJisheng Zhang 302648321faSJisheng Zhang fault = handle_mm_fault(vma, addr, flags | FAULT_FLAG_VMA_LOCK, regs); 3034089eef0SSuren Baghdasaryan if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED))) 304648321faSJisheng Zhang vma_end_read(vma); 305648321faSJisheng Zhang 306648321faSJisheng Zhang if (!(fault & VM_FAULT_RETRY)) { 307648321faSJisheng Zhang count_vm_vma_lock_event(VMA_LOCK_SUCCESS); 308648321faSJisheng Zhang goto done; 309648321faSJisheng Zhang } 310648321faSJisheng Zhang count_vm_vma_lock_event(VMA_LOCK_RETRY); 31146e714c7SSuren Baghdasaryan if (fault & VM_FAULT_MAJOR) 31246e714c7SSuren Baghdasaryan flags |= FAULT_FLAG_TRIED; 313648321faSJisheng Zhang 314648321faSJisheng Zhang if (fault_signal_pending(fault, regs)) { 315648321faSJisheng Zhang if (!user_mode(regs)) 316648321faSJisheng Zhang no_context(regs, addr); 317648321faSJisheng Zhang return; 318648321faSJisheng Zhang } 319648321faSJisheng Zhang lock_mmap: 320648321faSJisheng Zhang 32107037db5SPalmer Dabbelt retry: 3227267ef7bSBen Hutchings vma = lock_mm_and_find_vma(mm, addr, regs); 323a51271d9SPekka Enberg if (unlikely(!vma)) { 32474784081SGuo Ren tsk->thread.bad_cause = cause; 3257267ef7bSBen Hutchings bad_area_nosemaphore(regs, code, addr); 326a51271d9SPekka Enberg return; 327a51271d9SPekka Enberg } 32807037db5SPalmer Dabbelt 32907037db5SPalmer Dabbelt /* 33007037db5SPalmer Dabbelt * Ok, we have a good vm_area for this memory access, so 33107037db5SPalmer Dabbelt * we can handle it. 33207037db5SPalmer Dabbelt */ 33307037db5SPalmer Dabbelt code = SEGV_ACCERR; 33407037db5SPalmer Dabbelt 335afb8c6feSPekka Enberg if (unlikely(access_error(cause, vma))) { 33674784081SGuo Ren tsk->thread.bad_cause = cause; 337a51271d9SPekka Enberg bad_area(regs, mm, code, addr); 338a51271d9SPekka Enberg return; 339a51271d9SPekka Enberg } 34007037db5SPalmer Dabbelt 34107037db5SPalmer Dabbelt /* 34207037db5SPalmer Dabbelt * If for any reason at all we could not handle the fault, 34307037db5SPalmer Dabbelt * make sure we exit gracefully rather than endlessly redo 34407037db5SPalmer Dabbelt * the fault. 34507037db5SPalmer Dabbelt */ 3465ac365a4SPeter Xu fault = handle_mm_fault(vma, addr, flags, regs); 34707037db5SPalmer Dabbelt 34807037db5SPalmer Dabbelt /* 34907037db5SPalmer Dabbelt * If we need to retry but a fatal signal is pending, handle the 350c1e8d7c6SMichel Lespinasse * signal first. We do not need to release the mmap_lock because it 35107037db5SPalmer Dabbelt * would already be released in __lock_page_or_retry in mm/filemap.c. 35207037db5SPalmer Dabbelt */ 353d835eb3aSAl Viro if (fault_signal_pending(fault, regs)) { 354d835eb3aSAl Viro if (!user_mode(regs)) 355d835eb3aSAl Viro no_context(regs, addr); 35607037db5SPalmer Dabbelt return; 357d835eb3aSAl Viro } 35807037db5SPalmer Dabbelt 359d9272525SPeter Xu /* The fault is fully completed (including releasing mmap lock) */ 360d9272525SPeter Xu if (fault & VM_FAULT_COMPLETED) 361d9272525SPeter Xu return; 362d9272525SPeter Xu 36336ef159fSQi Zheng if (unlikely(fault & VM_FAULT_RETRY)) { 36407037db5SPalmer Dabbelt flags |= FAULT_FLAG_TRIED; 36507037db5SPalmer Dabbelt 36607037db5SPalmer Dabbelt /* 3673e4e28c5SMichel Lespinasse * No need to mmap_read_unlock(mm) as we would 36807037db5SPalmer Dabbelt * have already released it in __lock_page_or_retry 36907037db5SPalmer Dabbelt * in mm/filemap.c. 37007037db5SPalmer Dabbelt */ 37107037db5SPalmer Dabbelt goto retry; 37207037db5SPalmer Dabbelt } 37307037db5SPalmer Dabbelt 374d8ed45c5SMichel Lespinasse mmap_read_unlock(mm); 375bda281d5SPekka Enberg 376648321faSJisheng Zhang done: 377bda281d5SPekka Enberg if (unlikely(fault & VM_FAULT_ERROR)) { 37874784081SGuo Ren tsk->thread.bad_cause = cause; 3796c11ffbfSPekka Enberg mm_fault_error(regs, addr, fault); 380cac4d1dcSPekka Enberg return; 381cac4d1dcSPekka Enberg } 38207037db5SPalmer Dabbelt return; 38307037db5SPalmer Dabbelt } 384