1588cb88cSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later 207037db5SPalmer Dabbelt /* 307037db5SPalmer Dabbelt * Copyright (C) 2009 Sunplus Core Technology Co., Ltd. 407037db5SPalmer Dabbelt * Lennox Wu <lennox.wu@sunplusct.com> 507037db5SPalmer Dabbelt * Chen Liqin <liqin.chen@sunplusct.com> 607037db5SPalmer Dabbelt * Copyright (C) 2012 Regents of the University of California 707037db5SPalmer Dabbelt */ 807037db5SPalmer Dabbelt 907037db5SPalmer Dabbelt 1007037db5SPalmer Dabbelt #include <linux/mm.h> 1107037db5SPalmer Dabbelt #include <linux/kernel.h> 1207037db5SPalmer Dabbelt #include <linux/interrupt.h> 1307037db5SPalmer Dabbelt #include <linux/perf_event.h> 1407037db5SPalmer Dabbelt #include <linux/signal.h> 1507037db5SPalmer Dabbelt #include <linux/uaccess.h> 1607037db5SPalmer Dabbelt 1707037db5SPalmer Dabbelt #include <asm/ptrace.h> 18bf587caaSShihPo Hung #include <asm/tlbflush.h> 1907037db5SPalmer Dabbelt 20ffaee272SPaul Walmsley #include "../kernel/head.h" 21ffaee272SPaul Walmsley 2221733cb5SEric Lin static void die_kernel_fault(const char *msg, unsigned long addr, 2321733cb5SEric Lin struct pt_regs *regs) 2421733cb5SEric Lin { 2521733cb5SEric Lin bust_spinlocks(1); 2621733cb5SEric Lin 2721733cb5SEric Lin pr_alert("Unable to handle kernel %s at virtual address " REG_FMT "\n", msg, 2821733cb5SEric Lin addr); 2921733cb5SEric Lin 3021733cb5SEric Lin bust_spinlocks(0); 3121733cb5SEric Lin die(regs, "Oops"); 3221733cb5SEric Lin do_exit(SIGKILL); 3321733cb5SEric Lin } 3421733cb5SEric Lin 35cac4d1dcSPekka Enberg static inline void no_context(struct pt_regs *regs, unsigned long addr) 36cac4d1dcSPekka Enberg { 3721733cb5SEric Lin const char *msg; 3821733cb5SEric Lin 39cac4d1dcSPekka Enberg /* Are we prepared to handle this kernel fault? */ 40cac4d1dcSPekka Enberg if (fixup_exception(regs)) 41cac4d1dcSPekka Enberg return; 42cac4d1dcSPekka Enberg 43cac4d1dcSPekka Enberg /* 44cac4d1dcSPekka Enberg * Oops. The kernel tried to access some bad page. We'll have to 45cac4d1dcSPekka Enberg * terminate things with extreme prejudice. 46cac4d1dcSPekka Enberg */ 4721733cb5SEric Lin msg = (addr < PAGE_SIZE) ? "NULL pointer dereference" : "paging request"; 4821733cb5SEric Lin die_kernel_fault(msg, addr, regs); 49cac4d1dcSPekka Enberg } 50cac4d1dcSPekka Enberg 516c11ffbfSPekka Enberg static inline void mm_fault_error(struct pt_regs *regs, unsigned long addr, vm_fault_t fault) 526c11ffbfSPekka Enberg { 537a75f3d4SPekka Enberg if (fault & VM_FAULT_OOM) { 546c11ffbfSPekka Enberg /* 556c11ffbfSPekka Enberg * We ran out of memory, call the OOM killer, and return the userspace 566c11ffbfSPekka Enberg * (which will retry the fault, or kill us if we got oom-killed). 576c11ffbfSPekka Enberg */ 586c11ffbfSPekka Enberg if (!user_mode(regs)) { 596c11ffbfSPekka Enberg no_context(regs, addr); 606c11ffbfSPekka Enberg return; 616c11ffbfSPekka Enberg } 626c11ffbfSPekka Enberg pagefault_out_of_memory(); 636c11ffbfSPekka Enberg return; 647a75f3d4SPekka Enberg } else if (fault & VM_FAULT_SIGBUS) { 656c11ffbfSPekka Enberg /* Kernel mode? Handle exceptions or die */ 666c11ffbfSPekka Enberg if (!user_mode(regs)) { 676c11ffbfSPekka Enberg no_context(regs, addr); 686c11ffbfSPekka Enberg return; 696c11ffbfSPekka Enberg } 706c11ffbfSPekka Enberg do_trap(regs, SIGBUS, BUS_ADRERR, addr); 716c11ffbfSPekka Enberg return; 726c11ffbfSPekka Enberg } 737a75f3d4SPekka Enberg BUG(); 747a75f3d4SPekka Enberg } 756c11ffbfSPekka Enberg 76a51271d9SPekka Enberg static inline void bad_area(struct pt_regs *regs, struct mm_struct *mm, int code, unsigned long addr) 77a51271d9SPekka Enberg { 78a51271d9SPekka Enberg /* 79a51271d9SPekka Enberg * Something tried to access memory that isn't in our memory map. 80a51271d9SPekka Enberg * Fix it, but check if it's kernel or user first. 81a51271d9SPekka Enberg */ 82a51271d9SPekka Enberg mmap_read_unlock(mm); 83a51271d9SPekka Enberg /* User mode accesses just cause a SIGSEGV */ 84a51271d9SPekka Enberg if (user_mode(regs)) { 85a51271d9SPekka Enberg do_trap(regs, SIGSEGV, code, addr); 86a51271d9SPekka Enberg return; 87a51271d9SPekka Enberg } 88a51271d9SPekka Enberg 89a51271d9SPekka Enberg no_context(regs, addr); 90a51271d9SPekka Enberg } 91a51271d9SPekka Enberg 922baa6d95SPekka Enberg static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long addr) 93ac416a72SPekka Enberg { 94ac416a72SPekka Enberg pgd_t *pgd, *pgd_k; 95ac416a72SPekka Enberg pud_t *pud, *pud_k; 96ac416a72SPekka Enberg p4d_t *p4d, *p4d_k; 97ac416a72SPekka Enberg pmd_t *pmd, *pmd_k; 98ac416a72SPekka Enberg pte_t *pte_k; 99ac416a72SPekka Enberg int index; 100bcacf5f6SLiu Shaohua unsigned long pfn; 101ac416a72SPekka Enberg 102ac416a72SPekka Enberg /* User mode accesses just cause a SIGSEGV */ 103ac416a72SPekka Enberg if (user_mode(regs)) 104ac416a72SPekka Enberg return do_trap(regs, SIGSEGV, code, addr); 105ac416a72SPekka Enberg 106ac416a72SPekka Enberg /* 107ac416a72SPekka Enberg * Synchronize this task's top level page-table 108ac416a72SPekka Enberg * with the 'reference' page table. 109ac416a72SPekka Enberg * 110ac416a72SPekka Enberg * Do _not_ use "tsk->active_mm->pgd" here. 111ac416a72SPekka Enberg * We might be inside an interrupt in the middle 112ac416a72SPekka Enberg * of a task switch. 113ac416a72SPekka Enberg */ 114ac416a72SPekka Enberg index = pgd_index(addr); 115bcacf5f6SLiu Shaohua pfn = csr_read(CSR_SATP) & SATP_PPN; 116bcacf5f6SLiu Shaohua pgd = (pgd_t *)pfn_to_virt(pfn) + index; 117ac416a72SPekka Enberg pgd_k = init_mm.pgd + index; 118ac416a72SPekka Enberg 119ac416a72SPekka Enberg if (!pgd_present(*pgd_k)) { 120ac416a72SPekka Enberg no_context(regs, addr); 121ac416a72SPekka Enberg return; 122ac416a72SPekka Enberg } 123ac416a72SPekka Enberg set_pgd(pgd, *pgd_k); 124ac416a72SPekka Enberg 125ac416a72SPekka Enberg p4d = p4d_offset(pgd, addr); 126ac416a72SPekka Enberg p4d_k = p4d_offset(pgd_k, addr); 127ac416a72SPekka Enberg if (!p4d_present(*p4d_k)) { 128ac416a72SPekka Enberg no_context(regs, addr); 129ac416a72SPekka Enberg return; 130ac416a72SPekka Enberg } 131ac416a72SPekka Enberg 132ac416a72SPekka Enberg pud = pud_offset(p4d, addr); 133ac416a72SPekka Enberg pud_k = pud_offset(p4d_k, addr); 134ac416a72SPekka Enberg if (!pud_present(*pud_k)) { 135ac416a72SPekka Enberg no_context(regs, addr); 136ac416a72SPekka Enberg return; 137ac416a72SPekka Enberg } 138ac416a72SPekka Enberg 139ac416a72SPekka Enberg /* 140ac416a72SPekka Enberg * Since the vmalloc area is global, it is unnecessary 141ac416a72SPekka Enberg * to copy individual PTEs 142ac416a72SPekka Enberg */ 143ac416a72SPekka Enberg pmd = pmd_offset(pud, addr); 144ac416a72SPekka Enberg pmd_k = pmd_offset(pud_k, addr); 145ac416a72SPekka Enberg if (!pmd_present(*pmd_k)) { 146ac416a72SPekka Enberg no_context(regs, addr); 147ac416a72SPekka Enberg return; 148ac416a72SPekka Enberg } 149ac416a72SPekka Enberg set_pmd(pmd, *pmd_k); 150ac416a72SPekka Enberg 151ac416a72SPekka Enberg /* 152ac416a72SPekka Enberg * Make sure the actual PTE exists as well to 153ac416a72SPekka Enberg * catch kernel vmalloc-area accesses to non-mapped 154ac416a72SPekka Enberg * addresses. If we don't do this, this will just 155ac416a72SPekka Enberg * silently loop forever. 156ac416a72SPekka Enberg */ 157ac416a72SPekka Enberg pte_k = pte_offset_kernel(pmd_k, addr); 158ac416a72SPekka Enberg if (!pte_present(*pte_k)) { 159ac416a72SPekka Enberg no_context(regs, addr); 160ac416a72SPekka Enberg return; 161ac416a72SPekka Enberg } 162ac416a72SPekka Enberg 163ac416a72SPekka Enberg /* 164ac416a72SPekka Enberg * The kernel assumes that TLBs don't cache invalid 165ac416a72SPekka Enberg * entries, but in RISC-V, SFENCE.VMA specifies an 166ac416a72SPekka Enberg * ordering constraint, not a cache flush; it is 167ac416a72SPekka Enberg * necessary even after writing invalid entries. 168ac416a72SPekka Enberg */ 169ac416a72SPekka Enberg local_flush_tlb_page(addr); 170ac416a72SPekka Enberg } 171ac416a72SPekka Enberg 172afb8c6feSPekka Enberg static inline bool access_error(unsigned long cause, struct vm_area_struct *vma) 173afb8c6feSPekka Enberg { 174afb8c6feSPekka Enberg switch (cause) { 175afb8c6feSPekka Enberg case EXC_INST_PAGE_FAULT: 176afb8c6feSPekka Enberg if (!(vma->vm_flags & VM_EXEC)) { 177afb8c6feSPekka Enberg return true; 178afb8c6feSPekka Enberg } 179afb8c6feSPekka Enberg break; 180afb8c6feSPekka Enberg case EXC_LOAD_PAGE_FAULT: 181afb8c6feSPekka Enberg if (!(vma->vm_flags & VM_READ)) { 182afb8c6feSPekka Enberg return true; 183afb8c6feSPekka Enberg } 184afb8c6feSPekka Enberg break; 185afb8c6feSPekka Enberg case EXC_STORE_PAGE_FAULT: 186afb8c6feSPekka Enberg if (!(vma->vm_flags & VM_WRITE)) { 187afb8c6feSPekka Enberg return true; 188afb8c6feSPekka Enberg } 189afb8c6feSPekka Enberg break; 190afb8c6feSPekka Enberg default: 191afb8c6feSPekka Enberg panic("%s: unhandled cause %lu", __func__, cause); 192afb8c6feSPekka Enberg } 193afb8c6feSPekka Enberg return false; 194afb8c6feSPekka Enberg } 195afb8c6feSPekka Enberg 19607037db5SPalmer Dabbelt /* 19707037db5SPalmer Dabbelt * This routine handles page faults. It determines the address and the 19807037db5SPalmer Dabbelt * problem, and then passes it off to one of the appropriate routines. 19907037db5SPalmer Dabbelt */ 20007037db5SPalmer Dabbelt asmlinkage void do_page_fault(struct pt_regs *regs) 20107037db5SPalmer Dabbelt { 20207037db5SPalmer Dabbelt struct task_struct *tsk; 20307037db5SPalmer Dabbelt struct vm_area_struct *vma; 20407037db5SPalmer Dabbelt struct mm_struct *mm; 20507037db5SPalmer Dabbelt unsigned long addr, cause; 206dde16072SPeter Xu unsigned int flags = FAULT_FLAG_DEFAULT; 20750a7ca3cSSouptick Joarder int code = SEGV_MAPERR; 20850a7ca3cSSouptick Joarder vm_fault_t fault; 20907037db5SPalmer Dabbelt 210a4c3733dSChristoph Hellwig cause = regs->cause; 211a4c3733dSChristoph Hellwig addr = regs->badaddr; 21207037db5SPalmer Dabbelt 21307037db5SPalmer Dabbelt tsk = current; 21407037db5SPalmer Dabbelt mm = tsk->mm; 21507037db5SPalmer Dabbelt 21607037db5SPalmer Dabbelt /* 21707037db5SPalmer Dabbelt * Fault-in kernel-space virtual memory on-demand. 21807037db5SPalmer Dabbelt * The 'reference' page table is init_mm.pgd. 21907037db5SPalmer Dabbelt * 22007037db5SPalmer Dabbelt * NOTE! We MUST NOT take any locks for this case. We may 22107037db5SPalmer Dabbelt * be in an interrupt or a critical region, and should 22207037db5SPalmer Dabbelt * only copy the information from the master page table, 22307037db5SPalmer Dabbelt * nothing more. 22407037db5SPalmer Dabbelt */ 225ac416a72SPekka Enberg if (unlikely((addr >= VMALLOC_START) && (addr <= VMALLOC_END))) { 226ac416a72SPekka Enberg vmalloc_fault(regs, code, addr); 227ac416a72SPekka Enberg return; 228ac416a72SPekka Enberg } 22907037db5SPalmer Dabbelt 23007037db5SPalmer Dabbelt /* Enable interrupts if they were enabled in the parent context. */ 231a4c3733dSChristoph Hellwig if (likely(regs->status & SR_PIE)) 23207037db5SPalmer Dabbelt local_irq_enable(); 23307037db5SPalmer Dabbelt 23407037db5SPalmer Dabbelt /* 23507037db5SPalmer Dabbelt * If we're in an interrupt, have no user context, or are running 23607037db5SPalmer Dabbelt * in an atomic region, then we must not take the fault. 23707037db5SPalmer Dabbelt */ 238cac4d1dcSPekka Enberg if (unlikely(faulthandler_disabled() || !mm)) { 239cac4d1dcSPekka Enberg no_context(regs, addr); 240cac4d1dcSPekka Enberg return; 241cac4d1dcSPekka Enberg } 24207037db5SPalmer Dabbelt 24307037db5SPalmer Dabbelt if (user_mode(regs)) 24407037db5SPalmer Dabbelt flags |= FAULT_FLAG_USER; 24507037db5SPalmer Dabbelt 246*21855cacSEric Lin if (!user_mode(regs) && addr < TASK_SIZE && 247*21855cacSEric Lin unlikely(!(regs->status & SR_SUM))) 248*21855cacSEric Lin die_kernel_fault("access to user memory without uaccess routines", 249*21855cacSEric Lin addr, regs); 250*21855cacSEric Lin 25107037db5SPalmer Dabbelt perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); 25207037db5SPalmer Dabbelt 25367474301SPekka Enberg if (cause == EXC_STORE_PAGE_FAULT) 25467474301SPekka Enberg flags |= FAULT_FLAG_WRITE; 255a960c132SPekka Enberg else if (cause == EXC_INST_PAGE_FAULT) 256a960c132SPekka Enberg flags |= FAULT_FLAG_INSTRUCTION; 25707037db5SPalmer Dabbelt retry: 258d8ed45c5SMichel Lespinasse mmap_read_lock(mm); 25907037db5SPalmer Dabbelt vma = find_vma(mm, addr); 260a51271d9SPekka Enberg if (unlikely(!vma)) { 261a51271d9SPekka Enberg bad_area(regs, mm, code, addr); 262a51271d9SPekka Enberg return; 263a51271d9SPekka Enberg } 26407037db5SPalmer Dabbelt if (likely(vma->vm_start <= addr)) 26507037db5SPalmer Dabbelt goto good_area; 266a51271d9SPekka Enberg if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) { 267a51271d9SPekka Enberg bad_area(regs, mm, code, addr); 268a51271d9SPekka Enberg return; 269a51271d9SPekka Enberg } 270a51271d9SPekka Enberg if (unlikely(expand_stack(vma, addr))) { 271a51271d9SPekka Enberg bad_area(regs, mm, code, addr); 272a51271d9SPekka Enberg return; 273a51271d9SPekka Enberg } 27407037db5SPalmer Dabbelt 27507037db5SPalmer Dabbelt /* 27607037db5SPalmer Dabbelt * Ok, we have a good vm_area for this memory access, so 27707037db5SPalmer Dabbelt * we can handle it. 27807037db5SPalmer Dabbelt */ 27907037db5SPalmer Dabbelt good_area: 28007037db5SPalmer Dabbelt code = SEGV_ACCERR; 28107037db5SPalmer Dabbelt 282afb8c6feSPekka Enberg if (unlikely(access_error(cause, vma))) { 283a51271d9SPekka Enberg bad_area(regs, mm, code, addr); 284a51271d9SPekka Enberg return; 285a51271d9SPekka Enberg } 28607037db5SPalmer Dabbelt 28707037db5SPalmer Dabbelt /* 28807037db5SPalmer Dabbelt * If for any reason at all we could not handle the fault, 28907037db5SPalmer Dabbelt * make sure we exit gracefully rather than endlessly redo 29007037db5SPalmer Dabbelt * the fault. 29107037db5SPalmer Dabbelt */ 2925ac365a4SPeter Xu fault = handle_mm_fault(vma, addr, flags, regs); 29307037db5SPalmer Dabbelt 29407037db5SPalmer Dabbelt /* 29507037db5SPalmer Dabbelt * If we need to retry but a fatal signal is pending, handle the 296c1e8d7c6SMichel Lespinasse * signal first. We do not need to release the mmap_lock because it 29707037db5SPalmer Dabbelt * would already be released in __lock_page_or_retry in mm/filemap.c. 29807037db5SPalmer Dabbelt */ 2994ef87322SPeter Xu if (fault_signal_pending(fault, regs)) 30007037db5SPalmer Dabbelt return; 30107037db5SPalmer Dabbelt 30243632871SPekka Enberg if (unlikely((fault & VM_FAULT_RETRY) && (flags & FAULT_FLAG_ALLOW_RETRY))) { 30307037db5SPalmer Dabbelt flags |= FAULT_FLAG_TRIED; 30407037db5SPalmer Dabbelt 30507037db5SPalmer Dabbelt /* 3063e4e28c5SMichel Lespinasse * No need to mmap_read_unlock(mm) as we would 30707037db5SPalmer Dabbelt * have already released it in __lock_page_or_retry 30807037db5SPalmer Dabbelt * in mm/filemap.c. 30907037db5SPalmer Dabbelt */ 31007037db5SPalmer Dabbelt goto retry; 31107037db5SPalmer Dabbelt } 31207037db5SPalmer Dabbelt 313d8ed45c5SMichel Lespinasse mmap_read_unlock(mm); 314bda281d5SPekka Enberg 315bda281d5SPekka Enberg if (unlikely(fault & VM_FAULT_ERROR)) { 3166c11ffbfSPekka Enberg mm_fault_error(regs, addr, fault); 317cac4d1dcSPekka Enberg return; 318cac4d1dcSPekka Enberg } 31907037db5SPalmer Dabbelt return; 32007037db5SPalmer Dabbelt } 321