1588cb88cSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later 207037db5SPalmer Dabbelt /* 307037db5SPalmer Dabbelt * Copyright (C) 2009 Sunplus Core Technology Co., Ltd. 407037db5SPalmer Dabbelt * Lennox Wu <lennox.wu@sunplusct.com> 507037db5SPalmer Dabbelt * Chen Liqin <liqin.chen@sunplusct.com> 607037db5SPalmer Dabbelt * Copyright (C) 2012 Regents of the University of California 707037db5SPalmer Dabbelt */ 807037db5SPalmer Dabbelt 907037db5SPalmer Dabbelt 1007037db5SPalmer Dabbelt #include <linux/mm.h> 1107037db5SPalmer Dabbelt #include <linux/kernel.h> 1207037db5SPalmer Dabbelt #include <linux/interrupt.h> 1307037db5SPalmer Dabbelt #include <linux/perf_event.h> 1407037db5SPalmer Dabbelt #include <linux/signal.h> 1507037db5SPalmer Dabbelt #include <linux/uaccess.h> 1607037db5SPalmer Dabbelt 1707037db5SPalmer Dabbelt #include <asm/ptrace.h> 18bf587caaSShihPo Hung #include <asm/tlbflush.h> 1907037db5SPalmer Dabbelt 20ffaee272SPaul Walmsley #include "../kernel/head.h" 21ffaee272SPaul Walmsley 2207037db5SPalmer Dabbelt /* 2307037db5SPalmer Dabbelt * This routine handles page faults. It determines the address and the 2407037db5SPalmer Dabbelt * problem, and then passes it off to one of the appropriate routines. 2507037db5SPalmer Dabbelt */ 2607037db5SPalmer Dabbelt asmlinkage void do_page_fault(struct pt_regs *regs) 2707037db5SPalmer Dabbelt { 2807037db5SPalmer Dabbelt struct task_struct *tsk; 2907037db5SPalmer Dabbelt struct vm_area_struct *vma; 3007037db5SPalmer Dabbelt struct mm_struct *mm; 3107037db5SPalmer Dabbelt unsigned long addr, cause; 32dde16072SPeter Xu unsigned int flags = FAULT_FLAG_DEFAULT; 3350a7ca3cSSouptick Joarder int code = SEGV_MAPERR; 3450a7ca3cSSouptick Joarder vm_fault_t fault; 3507037db5SPalmer Dabbelt 36a4c3733dSChristoph Hellwig cause = regs->cause; 37a4c3733dSChristoph Hellwig addr = regs->badaddr; 3807037db5SPalmer Dabbelt 3907037db5SPalmer Dabbelt tsk = current; 4007037db5SPalmer Dabbelt mm = tsk->mm; 4107037db5SPalmer Dabbelt 4207037db5SPalmer Dabbelt /* 4307037db5SPalmer Dabbelt * Fault-in kernel-space virtual memory on-demand. 4407037db5SPalmer Dabbelt * The 'reference' page table is init_mm.pgd. 4507037db5SPalmer Dabbelt * 4607037db5SPalmer Dabbelt * NOTE! We MUST NOT take any locks for this case. We may 4707037db5SPalmer Dabbelt * be in an interrupt or a critical region, and should 4807037db5SPalmer Dabbelt * only copy the information from the master page table, 4907037db5SPalmer Dabbelt * nothing more. 5007037db5SPalmer Dabbelt */ 5107037db5SPalmer Dabbelt if (unlikely((addr >= VMALLOC_START) && (addr <= VMALLOC_END))) 5207037db5SPalmer Dabbelt goto vmalloc_fault; 5307037db5SPalmer Dabbelt 5407037db5SPalmer Dabbelt /* Enable interrupts if they were enabled in the parent context. */ 55a4c3733dSChristoph Hellwig if (likely(regs->status & SR_PIE)) 5607037db5SPalmer Dabbelt local_irq_enable(); 5707037db5SPalmer Dabbelt 5807037db5SPalmer Dabbelt /* 5907037db5SPalmer Dabbelt * If we're in an interrupt, have no user context, or are running 6007037db5SPalmer Dabbelt * in an atomic region, then we must not take the fault. 6107037db5SPalmer Dabbelt */ 6207037db5SPalmer Dabbelt if (unlikely(faulthandler_disabled() || !mm)) 6307037db5SPalmer Dabbelt goto no_context; 6407037db5SPalmer Dabbelt 6507037db5SPalmer Dabbelt if (user_mode(regs)) 6607037db5SPalmer Dabbelt flags |= FAULT_FLAG_USER; 6707037db5SPalmer Dabbelt 6807037db5SPalmer Dabbelt perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); 6907037db5SPalmer Dabbelt 7007037db5SPalmer Dabbelt retry: 71d8ed45c5SMichel Lespinasse mmap_read_lock(mm); 7207037db5SPalmer Dabbelt vma = find_vma(mm, addr); 7307037db5SPalmer Dabbelt if (unlikely(!vma)) 7407037db5SPalmer Dabbelt goto bad_area; 7507037db5SPalmer Dabbelt if (likely(vma->vm_start <= addr)) 7607037db5SPalmer Dabbelt goto good_area; 7707037db5SPalmer Dabbelt if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) 7807037db5SPalmer Dabbelt goto bad_area; 7907037db5SPalmer Dabbelt if (unlikely(expand_stack(vma, addr))) 8007037db5SPalmer Dabbelt goto bad_area; 8107037db5SPalmer Dabbelt 8207037db5SPalmer Dabbelt /* 8307037db5SPalmer Dabbelt * Ok, we have a good vm_area for this memory access, so 8407037db5SPalmer Dabbelt * we can handle it. 8507037db5SPalmer Dabbelt */ 8607037db5SPalmer Dabbelt good_area: 8707037db5SPalmer Dabbelt code = SEGV_ACCERR; 8807037db5SPalmer Dabbelt 8907037db5SPalmer Dabbelt switch (cause) { 9007037db5SPalmer Dabbelt case EXC_INST_PAGE_FAULT: 9107037db5SPalmer Dabbelt if (!(vma->vm_flags & VM_EXEC)) 9207037db5SPalmer Dabbelt goto bad_area; 9307037db5SPalmer Dabbelt break; 9407037db5SPalmer Dabbelt case EXC_LOAD_PAGE_FAULT: 9507037db5SPalmer Dabbelt if (!(vma->vm_flags & VM_READ)) 9607037db5SPalmer Dabbelt goto bad_area; 9707037db5SPalmer Dabbelt break; 9807037db5SPalmer Dabbelt case EXC_STORE_PAGE_FAULT: 9907037db5SPalmer Dabbelt if (!(vma->vm_flags & VM_WRITE)) 10007037db5SPalmer Dabbelt goto bad_area; 10107037db5SPalmer Dabbelt flags |= FAULT_FLAG_WRITE; 10207037db5SPalmer Dabbelt break; 10307037db5SPalmer Dabbelt default: 10407037db5SPalmer Dabbelt panic("%s: unhandled cause %lu", __func__, cause); 10507037db5SPalmer Dabbelt } 10607037db5SPalmer Dabbelt 10707037db5SPalmer Dabbelt /* 10807037db5SPalmer Dabbelt * If for any reason at all we could not handle the fault, 10907037db5SPalmer Dabbelt * make sure we exit gracefully rather than endlessly redo 11007037db5SPalmer Dabbelt * the fault. 11107037db5SPalmer Dabbelt */ 1125ac365a4SPeter Xu fault = handle_mm_fault(vma, addr, flags, regs); 11307037db5SPalmer Dabbelt 11407037db5SPalmer Dabbelt /* 11507037db5SPalmer Dabbelt * If we need to retry but a fatal signal is pending, handle the 116c1e8d7c6SMichel Lespinasse * signal first. We do not need to release the mmap_lock because it 11707037db5SPalmer Dabbelt * would already be released in __lock_page_or_retry in mm/filemap.c. 11807037db5SPalmer Dabbelt */ 1194ef87322SPeter Xu if (fault_signal_pending(fault, regs)) 12007037db5SPalmer Dabbelt return; 12107037db5SPalmer Dabbelt 12207037db5SPalmer Dabbelt if (unlikely(fault & VM_FAULT_ERROR)) { 12307037db5SPalmer Dabbelt if (fault & VM_FAULT_OOM) 12407037db5SPalmer Dabbelt goto out_of_memory; 12507037db5SPalmer Dabbelt else if (fault & VM_FAULT_SIGBUS) 12607037db5SPalmer Dabbelt goto do_sigbus; 12707037db5SPalmer Dabbelt BUG(); 12807037db5SPalmer Dabbelt } 12907037db5SPalmer Dabbelt 130*43632871SPekka Enberg if (unlikely((fault & VM_FAULT_RETRY) && (flags & FAULT_FLAG_ALLOW_RETRY))) { 13107037db5SPalmer Dabbelt flags |= FAULT_FLAG_TRIED; 13207037db5SPalmer Dabbelt 13307037db5SPalmer Dabbelt /* 1343e4e28c5SMichel Lespinasse * No need to mmap_read_unlock(mm) as we would 13507037db5SPalmer Dabbelt * have already released it in __lock_page_or_retry 13607037db5SPalmer Dabbelt * in mm/filemap.c. 13707037db5SPalmer Dabbelt */ 13807037db5SPalmer Dabbelt goto retry; 13907037db5SPalmer Dabbelt } 14007037db5SPalmer Dabbelt 141d8ed45c5SMichel Lespinasse mmap_read_unlock(mm); 14207037db5SPalmer Dabbelt return; 14307037db5SPalmer Dabbelt 14407037db5SPalmer Dabbelt /* 14507037db5SPalmer Dabbelt * Something tried to access memory that isn't in our memory map. 14607037db5SPalmer Dabbelt * Fix it, but check if it's kernel or user first. 14707037db5SPalmer Dabbelt */ 14807037db5SPalmer Dabbelt bad_area: 149d8ed45c5SMichel Lespinasse mmap_read_unlock(mm); 15007037db5SPalmer Dabbelt /* User mode accesses just cause a SIGSEGV */ 15107037db5SPalmer Dabbelt if (user_mode(regs)) { 1526f25a967SEric W. Biederman do_trap(regs, SIGSEGV, code, addr); 15307037db5SPalmer Dabbelt return; 15407037db5SPalmer Dabbelt } 15507037db5SPalmer Dabbelt 15607037db5SPalmer Dabbelt no_context: 15707037db5SPalmer Dabbelt /* Are we prepared to handle this kernel fault? */ 15807037db5SPalmer Dabbelt if (fixup_exception(regs)) 15907037db5SPalmer Dabbelt return; 16007037db5SPalmer Dabbelt 16107037db5SPalmer Dabbelt /* 16207037db5SPalmer Dabbelt * Oops. The kernel tried to access some bad page. We'll have to 16307037db5SPalmer Dabbelt * terminate things with extreme prejudice. 16407037db5SPalmer Dabbelt */ 16507037db5SPalmer Dabbelt bust_spinlocks(1); 16607037db5SPalmer Dabbelt pr_alert("Unable to handle kernel %s at virtual address " REG_FMT "\n", 16707037db5SPalmer Dabbelt (addr < PAGE_SIZE) ? "NULL pointer dereference" : 16807037db5SPalmer Dabbelt "paging request", addr); 16907037db5SPalmer Dabbelt die(regs, "Oops"); 17007037db5SPalmer Dabbelt do_exit(SIGKILL); 17107037db5SPalmer Dabbelt 17207037db5SPalmer Dabbelt /* 17307037db5SPalmer Dabbelt * We ran out of memory, call the OOM killer, and return the userspace 17407037db5SPalmer Dabbelt * (which will retry the fault, or kill us if we got oom-killed). 17507037db5SPalmer Dabbelt */ 17607037db5SPalmer Dabbelt out_of_memory: 177d8ed45c5SMichel Lespinasse mmap_read_unlock(mm); 17807037db5SPalmer Dabbelt if (!user_mode(regs)) 17907037db5SPalmer Dabbelt goto no_context; 18007037db5SPalmer Dabbelt pagefault_out_of_memory(); 18107037db5SPalmer Dabbelt return; 18207037db5SPalmer Dabbelt 18307037db5SPalmer Dabbelt do_sigbus: 184d8ed45c5SMichel Lespinasse mmap_read_unlock(mm); 18507037db5SPalmer Dabbelt /* Kernel mode? Handle exceptions or die */ 18607037db5SPalmer Dabbelt if (!user_mode(regs)) 18707037db5SPalmer Dabbelt goto no_context; 1886f25a967SEric W. Biederman do_trap(regs, SIGBUS, BUS_ADRERR, addr); 18907037db5SPalmer Dabbelt return; 19007037db5SPalmer Dabbelt 19107037db5SPalmer Dabbelt vmalloc_fault: 19207037db5SPalmer Dabbelt { 19307037db5SPalmer Dabbelt pgd_t *pgd, *pgd_k; 19407037db5SPalmer Dabbelt pud_t *pud, *pud_k; 19507037db5SPalmer Dabbelt p4d_t *p4d, *p4d_k; 19607037db5SPalmer Dabbelt pmd_t *pmd, *pmd_k; 19707037db5SPalmer Dabbelt pte_t *pte_k; 19807037db5SPalmer Dabbelt int index; 19907037db5SPalmer Dabbelt 2008fef9900SAndreas Schwab /* User mode accesses just cause a SIGSEGV */ 20107037db5SPalmer Dabbelt if (user_mode(regs)) 2026f25a967SEric W. Biederman return do_trap(regs, SIGSEGV, code, addr); 20307037db5SPalmer Dabbelt 20407037db5SPalmer Dabbelt /* 20507037db5SPalmer Dabbelt * Synchronize this task's top level page-table 20607037db5SPalmer Dabbelt * with the 'reference' page table. 20707037db5SPalmer Dabbelt * 20807037db5SPalmer Dabbelt * Do _not_ use "tsk->active_mm->pgd" here. 20907037db5SPalmer Dabbelt * We might be inside an interrupt in the middle 21007037db5SPalmer Dabbelt * of a task switch. 21107037db5SPalmer Dabbelt */ 21207037db5SPalmer Dabbelt index = pgd_index(addr); 213a3182c91SAnup Patel pgd = (pgd_t *)pfn_to_virt(csr_read(CSR_SATP)) + index; 21407037db5SPalmer Dabbelt pgd_k = init_mm.pgd + index; 21507037db5SPalmer Dabbelt 21607037db5SPalmer Dabbelt if (!pgd_present(*pgd_k)) 21707037db5SPalmer Dabbelt goto no_context; 21807037db5SPalmer Dabbelt set_pgd(pgd, *pgd_k); 21907037db5SPalmer Dabbelt 22007037db5SPalmer Dabbelt p4d = p4d_offset(pgd, addr); 22107037db5SPalmer Dabbelt p4d_k = p4d_offset(pgd_k, addr); 22207037db5SPalmer Dabbelt if (!p4d_present(*p4d_k)) 22307037db5SPalmer Dabbelt goto no_context; 22407037db5SPalmer Dabbelt 22507037db5SPalmer Dabbelt pud = pud_offset(p4d, addr); 22607037db5SPalmer Dabbelt pud_k = pud_offset(p4d_k, addr); 22707037db5SPalmer Dabbelt if (!pud_present(*pud_k)) 22807037db5SPalmer Dabbelt goto no_context; 22907037db5SPalmer Dabbelt 23007037db5SPalmer Dabbelt /* 23107037db5SPalmer Dabbelt * Since the vmalloc area is global, it is unnecessary 23207037db5SPalmer Dabbelt * to copy individual PTEs 23307037db5SPalmer Dabbelt */ 23407037db5SPalmer Dabbelt pmd = pmd_offset(pud, addr); 23507037db5SPalmer Dabbelt pmd_k = pmd_offset(pud_k, addr); 23607037db5SPalmer Dabbelt if (!pmd_present(*pmd_k)) 23707037db5SPalmer Dabbelt goto no_context; 23807037db5SPalmer Dabbelt set_pmd(pmd, *pmd_k); 23907037db5SPalmer Dabbelt 24007037db5SPalmer Dabbelt /* 24107037db5SPalmer Dabbelt * Make sure the actual PTE exists as well to 24207037db5SPalmer Dabbelt * catch kernel vmalloc-area accesses to non-mapped 24307037db5SPalmer Dabbelt * addresses. If we don't do this, this will just 24407037db5SPalmer Dabbelt * silently loop forever. 24507037db5SPalmer Dabbelt */ 24607037db5SPalmer Dabbelt pte_k = pte_offset_kernel(pmd_k, addr); 24707037db5SPalmer Dabbelt if (!pte_present(*pte_k)) 24807037db5SPalmer Dabbelt goto no_context; 249bf587caaSShihPo Hung 250bf587caaSShihPo Hung /* 251bf587caaSShihPo Hung * The kernel assumes that TLBs don't cache invalid 252bf587caaSShihPo Hung * entries, but in RISC-V, SFENCE.VMA specifies an 253bf587caaSShihPo Hung * ordering constraint, not a cache flush; it is 254bf587caaSShihPo Hung * necessary even after writing invalid entries. 255bf587caaSShihPo Hung */ 256bf587caaSShihPo Hung local_flush_tlb_page(addr); 257bf587caaSShihPo Hung 25807037db5SPalmer Dabbelt return; 25907037db5SPalmer Dabbelt } 26007037db5SPalmer Dabbelt } 261