1588cb88cSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later 207037db5SPalmer Dabbelt /* 307037db5SPalmer Dabbelt * Copyright (C) 2009 Sunplus Core Technology Co., Ltd. 407037db5SPalmer Dabbelt * Lennox Wu <lennox.wu@sunplusct.com> 507037db5SPalmer Dabbelt * Chen Liqin <liqin.chen@sunplusct.com> 607037db5SPalmer Dabbelt * Copyright (C) 2012 Regents of the University of California 707037db5SPalmer Dabbelt */ 807037db5SPalmer Dabbelt 907037db5SPalmer Dabbelt 1007037db5SPalmer Dabbelt #include <linux/mm.h> 1107037db5SPalmer Dabbelt #include <linux/kernel.h> 1207037db5SPalmer Dabbelt #include <linux/interrupt.h> 1307037db5SPalmer Dabbelt #include <linux/perf_event.h> 1407037db5SPalmer Dabbelt #include <linux/signal.h> 1507037db5SPalmer Dabbelt #include <linux/uaccess.h> 1607037db5SPalmer Dabbelt 1707037db5SPalmer Dabbelt #include <asm/ptrace.h> 18bf587caaSShihPo Hung #include <asm/tlbflush.h> 1907037db5SPalmer Dabbelt 20ffaee272SPaul Walmsley #include "../kernel/head.h" 21ffaee272SPaul Walmsley 22cac4d1dcSPekka Enberg static inline void no_context(struct pt_regs *regs, unsigned long addr) 23cac4d1dcSPekka Enberg { 24cac4d1dcSPekka Enberg /* Are we prepared to handle this kernel fault? */ 25cac4d1dcSPekka Enberg if (fixup_exception(regs)) 26cac4d1dcSPekka Enberg return; 27cac4d1dcSPekka Enberg 28cac4d1dcSPekka Enberg /* 29cac4d1dcSPekka Enberg * Oops. The kernel tried to access some bad page. We'll have to 30cac4d1dcSPekka Enberg * terminate things with extreme prejudice. 31cac4d1dcSPekka Enberg */ 32cac4d1dcSPekka Enberg bust_spinlocks(1); 33cac4d1dcSPekka Enberg pr_alert("Unable to handle kernel %s at virtual address " REG_FMT "\n", 34cac4d1dcSPekka Enberg (addr < PAGE_SIZE) ? "NULL pointer dereference" : 35cac4d1dcSPekka Enberg "paging request", addr); 36cac4d1dcSPekka Enberg die(regs, "Oops"); 37cac4d1dcSPekka Enberg do_exit(SIGKILL); 38cac4d1dcSPekka Enberg } 39cac4d1dcSPekka Enberg 406c11ffbfSPekka Enberg static inline void mm_fault_error(struct pt_regs *regs, unsigned long addr, vm_fault_t fault) 416c11ffbfSPekka Enberg { 42*7a75f3d4SPekka Enberg if (fault & VM_FAULT_OOM) { 436c11ffbfSPekka Enberg /* 446c11ffbfSPekka Enberg * We ran out of memory, call the OOM killer, and return the userspace 456c11ffbfSPekka Enberg * (which will retry the fault, or kill us if we got oom-killed). 466c11ffbfSPekka Enberg */ 476c11ffbfSPekka Enberg if (!user_mode(regs)) { 486c11ffbfSPekka Enberg no_context(regs, addr); 496c11ffbfSPekka Enberg return; 506c11ffbfSPekka Enberg } 516c11ffbfSPekka Enberg pagefault_out_of_memory(); 526c11ffbfSPekka Enberg return; 53*7a75f3d4SPekka Enberg } else if (fault & VM_FAULT_SIGBUS) { 546c11ffbfSPekka Enberg /* Kernel mode? Handle exceptions or die */ 556c11ffbfSPekka Enberg if (!user_mode(regs)) { 566c11ffbfSPekka Enberg no_context(regs, addr); 576c11ffbfSPekka Enberg return; 586c11ffbfSPekka Enberg } 596c11ffbfSPekka Enberg do_trap(regs, SIGBUS, BUS_ADRERR, addr); 606c11ffbfSPekka Enberg return; 616c11ffbfSPekka Enberg } 62*7a75f3d4SPekka Enberg BUG(); 63*7a75f3d4SPekka Enberg } 646c11ffbfSPekka Enberg 65a51271d9SPekka Enberg static inline void bad_area(struct pt_regs *regs, struct mm_struct *mm, int code, unsigned long addr) 66a51271d9SPekka Enberg { 67a51271d9SPekka Enberg /* 68a51271d9SPekka Enberg * Something tried to access memory that isn't in our memory map. 69a51271d9SPekka Enberg * Fix it, but check if it's kernel or user first. 70a51271d9SPekka Enberg */ 71a51271d9SPekka Enberg mmap_read_unlock(mm); 72a51271d9SPekka Enberg /* User mode accesses just cause a SIGSEGV */ 73a51271d9SPekka Enberg if (user_mode(regs)) { 74a51271d9SPekka Enberg do_trap(regs, SIGSEGV, code, addr); 75a51271d9SPekka Enberg return; 76a51271d9SPekka Enberg } 77a51271d9SPekka Enberg 78a51271d9SPekka Enberg no_context(regs, addr); 79a51271d9SPekka Enberg } 80a51271d9SPekka Enberg 81ac416a72SPekka Enberg static void inline vmalloc_fault(struct pt_regs *regs, int code, unsigned long addr) 82ac416a72SPekka Enberg { 83ac416a72SPekka Enberg pgd_t *pgd, *pgd_k; 84ac416a72SPekka Enberg pud_t *pud, *pud_k; 85ac416a72SPekka Enberg p4d_t *p4d, *p4d_k; 86ac416a72SPekka Enberg pmd_t *pmd, *pmd_k; 87ac416a72SPekka Enberg pte_t *pte_k; 88ac416a72SPekka Enberg int index; 89ac416a72SPekka Enberg 90ac416a72SPekka Enberg /* User mode accesses just cause a SIGSEGV */ 91ac416a72SPekka Enberg if (user_mode(regs)) 92ac416a72SPekka Enberg return do_trap(regs, SIGSEGV, code, addr); 93ac416a72SPekka Enberg 94ac416a72SPekka Enberg /* 95ac416a72SPekka Enberg * Synchronize this task's top level page-table 96ac416a72SPekka Enberg * with the 'reference' page table. 97ac416a72SPekka Enberg * 98ac416a72SPekka Enberg * Do _not_ use "tsk->active_mm->pgd" here. 99ac416a72SPekka Enberg * We might be inside an interrupt in the middle 100ac416a72SPekka Enberg * of a task switch. 101ac416a72SPekka Enberg */ 102ac416a72SPekka Enberg index = pgd_index(addr); 103ac416a72SPekka Enberg pgd = (pgd_t *)pfn_to_virt(csr_read(CSR_SATP)) + index; 104ac416a72SPekka Enberg pgd_k = init_mm.pgd + index; 105ac416a72SPekka Enberg 106ac416a72SPekka Enberg if (!pgd_present(*pgd_k)) { 107ac416a72SPekka Enberg no_context(regs, addr); 108ac416a72SPekka Enberg return; 109ac416a72SPekka Enberg } 110ac416a72SPekka Enberg set_pgd(pgd, *pgd_k); 111ac416a72SPekka Enberg 112ac416a72SPekka Enberg p4d = p4d_offset(pgd, addr); 113ac416a72SPekka Enberg p4d_k = p4d_offset(pgd_k, addr); 114ac416a72SPekka Enberg if (!p4d_present(*p4d_k)) { 115ac416a72SPekka Enberg no_context(regs, addr); 116ac416a72SPekka Enberg return; 117ac416a72SPekka Enberg } 118ac416a72SPekka Enberg 119ac416a72SPekka Enberg pud = pud_offset(p4d, addr); 120ac416a72SPekka Enberg pud_k = pud_offset(p4d_k, addr); 121ac416a72SPekka Enberg if (!pud_present(*pud_k)) { 122ac416a72SPekka Enberg no_context(regs, addr); 123ac416a72SPekka Enberg return; 124ac416a72SPekka Enberg } 125ac416a72SPekka Enberg 126ac416a72SPekka Enberg /* 127ac416a72SPekka Enberg * Since the vmalloc area is global, it is unnecessary 128ac416a72SPekka Enberg * to copy individual PTEs 129ac416a72SPekka Enberg */ 130ac416a72SPekka Enberg pmd = pmd_offset(pud, addr); 131ac416a72SPekka Enberg pmd_k = pmd_offset(pud_k, addr); 132ac416a72SPekka Enberg if (!pmd_present(*pmd_k)) { 133ac416a72SPekka Enberg no_context(regs, addr); 134ac416a72SPekka Enberg return; 135ac416a72SPekka Enberg } 136ac416a72SPekka Enberg set_pmd(pmd, *pmd_k); 137ac416a72SPekka Enberg 138ac416a72SPekka Enberg /* 139ac416a72SPekka Enberg * Make sure the actual PTE exists as well to 140ac416a72SPekka Enberg * catch kernel vmalloc-area accesses to non-mapped 141ac416a72SPekka Enberg * addresses. If we don't do this, this will just 142ac416a72SPekka Enberg * silently loop forever. 143ac416a72SPekka Enberg */ 144ac416a72SPekka Enberg pte_k = pte_offset_kernel(pmd_k, addr); 145ac416a72SPekka Enberg if (!pte_present(*pte_k)) { 146ac416a72SPekka Enberg no_context(regs, addr); 147ac416a72SPekka Enberg return; 148ac416a72SPekka Enberg } 149ac416a72SPekka Enberg 150ac416a72SPekka Enberg /* 151ac416a72SPekka Enberg * The kernel assumes that TLBs don't cache invalid 152ac416a72SPekka Enberg * entries, but in RISC-V, SFENCE.VMA specifies an 153ac416a72SPekka Enberg * ordering constraint, not a cache flush; it is 154ac416a72SPekka Enberg * necessary even after writing invalid entries. 155ac416a72SPekka Enberg */ 156ac416a72SPekka Enberg local_flush_tlb_page(addr); 157ac416a72SPekka Enberg } 158ac416a72SPekka Enberg 15907037db5SPalmer Dabbelt /* 16007037db5SPalmer Dabbelt * This routine handles page faults. It determines the address and the 16107037db5SPalmer Dabbelt * problem, and then passes it off to one of the appropriate routines. 16207037db5SPalmer Dabbelt */ 16307037db5SPalmer Dabbelt asmlinkage void do_page_fault(struct pt_regs *regs) 16407037db5SPalmer Dabbelt { 16507037db5SPalmer Dabbelt struct task_struct *tsk; 16607037db5SPalmer Dabbelt struct vm_area_struct *vma; 16707037db5SPalmer Dabbelt struct mm_struct *mm; 16807037db5SPalmer Dabbelt unsigned long addr, cause; 169dde16072SPeter Xu unsigned int flags = FAULT_FLAG_DEFAULT; 17050a7ca3cSSouptick Joarder int code = SEGV_MAPERR; 17150a7ca3cSSouptick Joarder vm_fault_t fault; 17207037db5SPalmer Dabbelt 173a4c3733dSChristoph Hellwig cause = regs->cause; 174a4c3733dSChristoph Hellwig addr = regs->badaddr; 17507037db5SPalmer Dabbelt 17607037db5SPalmer Dabbelt tsk = current; 17707037db5SPalmer Dabbelt mm = tsk->mm; 17807037db5SPalmer Dabbelt 17907037db5SPalmer Dabbelt /* 18007037db5SPalmer Dabbelt * Fault-in kernel-space virtual memory on-demand. 18107037db5SPalmer Dabbelt * The 'reference' page table is init_mm.pgd. 18207037db5SPalmer Dabbelt * 18307037db5SPalmer Dabbelt * NOTE! We MUST NOT take any locks for this case. We may 18407037db5SPalmer Dabbelt * be in an interrupt or a critical region, and should 18507037db5SPalmer Dabbelt * only copy the information from the master page table, 18607037db5SPalmer Dabbelt * nothing more. 18707037db5SPalmer Dabbelt */ 188ac416a72SPekka Enberg if (unlikely((addr >= VMALLOC_START) && (addr <= VMALLOC_END))) { 189ac416a72SPekka Enberg vmalloc_fault(regs, code, addr); 190ac416a72SPekka Enberg return; 191ac416a72SPekka Enberg } 19207037db5SPalmer Dabbelt 19307037db5SPalmer Dabbelt /* Enable interrupts if they were enabled in the parent context. */ 194a4c3733dSChristoph Hellwig if (likely(regs->status & SR_PIE)) 19507037db5SPalmer Dabbelt local_irq_enable(); 19607037db5SPalmer Dabbelt 19707037db5SPalmer Dabbelt /* 19807037db5SPalmer Dabbelt * If we're in an interrupt, have no user context, or are running 19907037db5SPalmer Dabbelt * in an atomic region, then we must not take the fault. 20007037db5SPalmer Dabbelt */ 201cac4d1dcSPekka Enberg if (unlikely(faulthandler_disabled() || !mm)) { 202cac4d1dcSPekka Enberg no_context(regs, addr); 203cac4d1dcSPekka Enberg return; 204cac4d1dcSPekka Enberg } 20507037db5SPalmer Dabbelt 20607037db5SPalmer Dabbelt if (user_mode(regs)) 20707037db5SPalmer Dabbelt flags |= FAULT_FLAG_USER; 20807037db5SPalmer Dabbelt 20907037db5SPalmer Dabbelt perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); 21007037db5SPalmer Dabbelt 21107037db5SPalmer Dabbelt retry: 212d8ed45c5SMichel Lespinasse mmap_read_lock(mm); 21307037db5SPalmer Dabbelt vma = find_vma(mm, addr); 214a51271d9SPekka Enberg if (unlikely(!vma)) { 215a51271d9SPekka Enberg bad_area(regs, mm, code, addr); 216a51271d9SPekka Enberg return; 217a51271d9SPekka Enberg } 21807037db5SPalmer Dabbelt if (likely(vma->vm_start <= addr)) 21907037db5SPalmer Dabbelt goto good_area; 220a51271d9SPekka Enberg if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) { 221a51271d9SPekka Enberg bad_area(regs, mm, code, addr); 222a51271d9SPekka Enberg return; 223a51271d9SPekka Enberg } 224a51271d9SPekka Enberg if (unlikely(expand_stack(vma, addr))) { 225a51271d9SPekka Enberg bad_area(regs, mm, code, addr); 226a51271d9SPekka Enberg return; 227a51271d9SPekka Enberg } 22807037db5SPalmer Dabbelt 22907037db5SPalmer Dabbelt /* 23007037db5SPalmer Dabbelt * Ok, we have a good vm_area for this memory access, so 23107037db5SPalmer Dabbelt * we can handle it. 23207037db5SPalmer Dabbelt */ 23307037db5SPalmer Dabbelt good_area: 23407037db5SPalmer Dabbelt code = SEGV_ACCERR; 23507037db5SPalmer Dabbelt 23607037db5SPalmer Dabbelt switch (cause) { 23707037db5SPalmer Dabbelt case EXC_INST_PAGE_FAULT: 238a51271d9SPekka Enberg if (!(vma->vm_flags & VM_EXEC)) { 239a51271d9SPekka Enberg bad_area(regs, mm, code, addr); 240a51271d9SPekka Enberg return; 241a51271d9SPekka Enberg } 24207037db5SPalmer Dabbelt break; 24307037db5SPalmer Dabbelt case EXC_LOAD_PAGE_FAULT: 244a51271d9SPekka Enberg if (!(vma->vm_flags & VM_READ)) { 245a51271d9SPekka Enberg bad_area(regs, mm, code, addr); 246a51271d9SPekka Enberg return; 247a51271d9SPekka Enberg } 24807037db5SPalmer Dabbelt break; 24907037db5SPalmer Dabbelt case EXC_STORE_PAGE_FAULT: 250a51271d9SPekka Enberg if (!(vma->vm_flags & VM_WRITE)) { 251a51271d9SPekka Enberg bad_area(regs, mm, code, addr); 252a51271d9SPekka Enberg return; 253a51271d9SPekka Enberg } 25407037db5SPalmer Dabbelt flags |= FAULT_FLAG_WRITE; 25507037db5SPalmer Dabbelt break; 25607037db5SPalmer Dabbelt default: 25707037db5SPalmer Dabbelt panic("%s: unhandled cause %lu", __func__, cause); 25807037db5SPalmer Dabbelt } 25907037db5SPalmer Dabbelt 26007037db5SPalmer Dabbelt /* 26107037db5SPalmer Dabbelt * If for any reason at all we could not handle the fault, 26207037db5SPalmer Dabbelt * make sure we exit gracefully rather than endlessly redo 26307037db5SPalmer Dabbelt * the fault. 26407037db5SPalmer Dabbelt */ 2655ac365a4SPeter Xu fault = handle_mm_fault(vma, addr, flags, regs); 26607037db5SPalmer Dabbelt 26707037db5SPalmer Dabbelt /* 26807037db5SPalmer Dabbelt * If we need to retry but a fatal signal is pending, handle the 269c1e8d7c6SMichel Lespinasse * signal first. We do not need to release the mmap_lock because it 27007037db5SPalmer Dabbelt * would already be released in __lock_page_or_retry in mm/filemap.c. 27107037db5SPalmer Dabbelt */ 2724ef87322SPeter Xu if (fault_signal_pending(fault, regs)) 27307037db5SPalmer Dabbelt return; 27407037db5SPalmer Dabbelt 27543632871SPekka Enberg if (unlikely((fault & VM_FAULT_RETRY) && (flags & FAULT_FLAG_ALLOW_RETRY))) { 27607037db5SPalmer Dabbelt flags |= FAULT_FLAG_TRIED; 27707037db5SPalmer Dabbelt 27807037db5SPalmer Dabbelt /* 2793e4e28c5SMichel Lespinasse * No need to mmap_read_unlock(mm) as we would 28007037db5SPalmer Dabbelt * have already released it in __lock_page_or_retry 28107037db5SPalmer Dabbelt * in mm/filemap.c. 28207037db5SPalmer Dabbelt */ 28307037db5SPalmer Dabbelt goto retry; 28407037db5SPalmer Dabbelt } 28507037db5SPalmer Dabbelt 286d8ed45c5SMichel Lespinasse mmap_read_unlock(mm); 287bda281d5SPekka Enberg 288bda281d5SPekka Enberg if (unlikely(fault & VM_FAULT_ERROR)) { 2896c11ffbfSPekka Enberg mm_fault_error(regs, addr, fault); 290cac4d1dcSPekka Enberg return; 291cac4d1dcSPekka Enberg } 29207037db5SPalmer Dabbelt return; 29307037db5SPalmer Dabbelt } 294