1588cb88cSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later 207037db5SPalmer Dabbelt /* 307037db5SPalmer Dabbelt * Copyright (C) 2009 Sunplus Core Technology Co., Ltd. 407037db5SPalmer Dabbelt * Lennox Wu <lennox.wu@sunplusct.com> 507037db5SPalmer Dabbelt * Chen Liqin <liqin.chen@sunplusct.com> 607037db5SPalmer Dabbelt * Copyright (C) 2012 Regents of the University of California 707037db5SPalmer Dabbelt */ 807037db5SPalmer Dabbelt 907037db5SPalmer Dabbelt 1007037db5SPalmer Dabbelt #include <linux/mm.h> 1107037db5SPalmer Dabbelt #include <linux/kernel.h> 1207037db5SPalmer Dabbelt #include <linux/interrupt.h> 1307037db5SPalmer Dabbelt #include <linux/perf_event.h> 1407037db5SPalmer Dabbelt #include <linux/signal.h> 1507037db5SPalmer Dabbelt #include <linux/uaccess.h> 1607037db5SPalmer Dabbelt 1707037db5SPalmer Dabbelt #include <asm/pgalloc.h> 1807037db5SPalmer Dabbelt #include <asm/ptrace.h> 19bf587caaSShihPo Hung #include <asm/tlbflush.h> 2007037db5SPalmer Dabbelt 21ffaee272SPaul Walmsley #include "../kernel/head.h" 22ffaee272SPaul Walmsley 2307037db5SPalmer Dabbelt /* 2407037db5SPalmer Dabbelt * This routine handles page faults. It determines the address and the 2507037db5SPalmer Dabbelt * problem, and then passes it off to one of the appropriate routines. 2607037db5SPalmer Dabbelt */ 2707037db5SPalmer Dabbelt asmlinkage void do_page_fault(struct pt_regs *regs) 2807037db5SPalmer Dabbelt { 2907037db5SPalmer Dabbelt struct task_struct *tsk; 3007037db5SPalmer Dabbelt struct vm_area_struct *vma; 3107037db5SPalmer Dabbelt struct mm_struct *mm; 3207037db5SPalmer Dabbelt unsigned long addr, cause; 3307037db5SPalmer Dabbelt unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; 3450a7ca3cSSouptick Joarder int code = SEGV_MAPERR; 3550a7ca3cSSouptick Joarder vm_fault_t fault; 3607037db5SPalmer Dabbelt 37a4c3733dSChristoph Hellwig cause = regs->cause; 38a4c3733dSChristoph Hellwig addr = regs->badaddr; 3907037db5SPalmer Dabbelt 4007037db5SPalmer Dabbelt tsk = current; 4107037db5SPalmer Dabbelt mm = tsk->mm; 4207037db5SPalmer Dabbelt 4307037db5SPalmer Dabbelt /* 4407037db5SPalmer Dabbelt * Fault-in kernel-space virtual memory on-demand. 4507037db5SPalmer Dabbelt * The 'reference' page table is init_mm.pgd. 4607037db5SPalmer Dabbelt * 4707037db5SPalmer Dabbelt * NOTE! We MUST NOT take any locks for this case. We may 4807037db5SPalmer Dabbelt * be in an interrupt or a critical region, and should 4907037db5SPalmer Dabbelt * only copy the information from the master page table, 5007037db5SPalmer Dabbelt * nothing more. 5107037db5SPalmer Dabbelt */ 5207037db5SPalmer Dabbelt if (unlikely((addr >= VMALLOC_START) && (addr <= VMALLOC_END))) 5307037db5SPalmer Dabbelt goto vmalloc_fault; 5407037db5SPalmer Dabbelt 5507037db5SPalmer Dabbelt /* Enable interrupts if they were enabled in the parent context. */ 56a4c3733dSChristoph Hellwig if (likely(regs->status & SR_PIE)) 5707037db5SPalmer Dabbelt local_irq_enable(); 5807037db5SPalmer Dabbelt 5907037db5SPalmer Dabbelt /* 6007037db5SPalmer Dabbelt * If we're in an interrupt, have no user context, or are running 6107037db5SPalmer Dabbelt * in an atomic region, then we must not take the fault. 6207037db5SPalmer Dabbelt */ 6307037db5SPalmer Dabbelt if (unlikely(faulthandler_disabled() || !mm)) 6407037db5SPalmer Dabbelt goto no_context; 6507037db5SPalmer Dabbelt 6607037db5SPalmer Dabbelt if (user_mode(regs)) 6707037db5SPalmer Dabbelt flags |= FAULT_FLAG_USER; 6807037db5SPalmer Dabbelt 6907037db5SPalmer Dabbelt perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); 7007037db5SPalmer Dabbelt 7107037db5SPalmer Dabbelt retry: 7207037db5SPalmer Dabbelt down_read(&mm->mmap_sem); 7307037db5SPalmer Dabbelt vma = find_vma(mm, addr); 7407037db5SPalmer Dabbelt if (unlikely(!vma)) 7507037db5SPalmer Dabbelt goto bad_area; 7607037db5SPalmer Dabbelt if (likely(vma->vm_start <= addr)) 7707037db5SPalmer Dabbelt goto good_area; 7807037db5SPalmer Dabbelt if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) 7907037db5SPalmer Dabbelt goto bad_area; 8007037db5SPalmer Dabbelt if (unlikely(expand_stack(vma, addr))) 8107037db5SPalmer Dabbelt goto bad_area; 8207037db5SPalmer Dabbelt 8307037db5SPalmer Dabbelt /* 8407037db5SPalmer Dabbelt * Ok, we have a good vm_area for this memory access, so 8507037db5SPalmer Dabbelt * we can handle it. 8607037db5SPalmer Dabbelt */ 8707037db5SPalmer Dabbelt good_area: 8807037db5SPalmer Dabbelt code = SEGV_ACCERR; 8907037db5SPalmer Dabbelt 9007037db5SPalmer Dabbelt switch (cause) { 9107037db5SPalmer Dabbelt case EXC_INST_PAGE_FAULT: 9207037db5SPalmer Dabbelt if (!(vma->vm_flags & VM_EXEC)) 9307037db5SPalmer Dabbelt goto bad_area; 9407037db5SPalmer Dabbelt break; 9507037db5SPalmer Dabbelt case EXC_LOAD_PAGE_FAULT: 9607037db5SPalmer Dabbelt if (!(vma->vm_flags & VM_READ)) 9707037db5SPalmer Dabbelt goto bad_area; 9807037db5SPalmer Dabbelt break; 9907037db5SPalmer Dabbelt case EXC_STORE_PAGE_FAULT: 10007037db5SPalmer Dabbelt if (!(vma->vm_flags & VM_WRITE)) 10107037db5SPalmer Dabbelt goto bad_area; 10207037db5SPalmer Dabbelt flags |= FAULT_FLAG_WRITE; 10307037db5SPalmer Dabbelt break; 10407037db5SPalmer Dabbelt default: 10507037db5SPalmer Dabbelt panic("%s: unhandled cause %lu", __func__, cause); 10607037db5SPalmer Dabbelt } 10707037db5SPalmer Dabbelt 10807037db5SPalmer Dabbelt /* 10907037db5SPalmer Dabbelt * If for any reason at all we could not handle the fault, 11007037db5SPalmer Dabbelt * make sure we exit gracefully rather than endlessly redo 11107037db5SPalmer Dabbelt * the fault. 11207037db5SPalmer Dabbelt */ 11307037db5SPalmer Dabbelt fault = handle_mm_fault(vma, addr, flags); 11407037db5SPalmer Dabbelt 11507037db5SPalmer Dabbelt /* 11607037db5SPalmer Dabbelt * If we need to retry but a fatal signal is pending, handle the 11707037db5SPalmer Dabbelt * signal first. We do not need to release the mmap_sem because it 11807037db5SPalmer Dabbelt * would already be released in __lock_page_or_retry in mm/filemap.c. 11907037db5SPalmer Dabbelt */ 120*4ef87322SPeter Xu if (fault_signal_pending(fault, regs)) 12107037db5SPalmer Dabbelt return; 12207037db5SPalmer Dabbelt 12307037db5SPalmer Dabbelt if (unlikely(fault & VM_FAULT_ERROR)) { 12407037db5SPalmer Dabbelt if (fault & VM_FAULT_OOM) 12507037db5SPalmer Dabbelt goto out_of_memory; 12607037db5SPalmer Dabbelt else if (fault & VM_FAULT_SIGBUS) 12707037db5SPalmer Dabbelt goto do_sigbus; 12807037db5SPalmer Dabbelt BUG(); 12907037db5SPalmer Dabbelt } 13007037db5SPalmer Dabbelt 13107037db5SPalmer Dabbelt /* 13207037db5SPalmer Dabbelt * Major/minor page fault accounting is only done on the 13307037db5SPalmer Dabbelt * initial attempt. If we go through a retry, it is extremely 13407037db5SPalmer Dabbelt * likely that the page will be found in page cache at that point. 13507037db5SPalmer Dabbelt */ 13607037db5SPalmer Dabbelt if (flags & FAULT_FLAG_ALLOW_RETRY) { 13707037db5SPalmer Dabbelt if (fault & VM_FAULT_MAJOR) { 13807037db5SPalmer Dabbelt tsk->maj_flt++; 13907037db5SPalmer Dabbelt perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 14007037db5SPalmer Dabbelt 1, regs, addr); 14107037db5SPalmer Dabbelt } else { 14207037db5SPalmer Dabbelt tsk->min_flt++; 14307037db5SPalmer Dabbelt perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 14407037db5SPalmer Dabbelt 1, regs, addr); 14507037db5SPalmer Dabbelt } 14607037db5SPalmer Dabbelt if (fault & VM_FAULT_RETRY) { 14707037db5SPalmer Dabbelt /* 14807037db5SPalmer Dabbelt * Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk 14907037db5SPalmer Dabbelt * of starvation. 15007037db5SPalmer Dabbelt */ 15107037db5SPalmer Dabbelt flags &= ~(FAULT_FLAG_ALLOW_RETRY); 15207037db5SPalmer Dabbelt flags |= FAULT_FLAG_TRIED; 15307037db5SPalmer Dabbelt 15407037db5SPalmer Dabbelt /* 15507037db5SPalmer Dabbelt * No need to up_read(&mm->mmap_sem) as we would 15607037db5SPalmer Dabbelt * have already released it in __lock_page_or_retry 15707037db5SPalmer Dabbelt * in mm/filemap.c. 15807037db5SPalmer Dabbelt */ 15907037db5SPalmer Dabbelt goto retry; 16007037db5SPalmer Dabbelt } 16107037db5SPalmer Dabbelt } 16207037db5SPalmer Dabbelt 16307037db5SPalmer Dabbelt up_read(&mm->mmap_sem); 16407037db5SPalmer Dabbelt return; 16507037db5SPalmer Dabbelt 16607037db5SPalmer Dabbelt /* 16707037db5SPalmer Dabbelt * Something tried to access memory that isn't in our memory map. 16807037db5SPalmer Dabbelt * Fix it, but check if it's kernel or user first. 16907037db5SPalmer Dabbelt */ 17007037db5SPalmer Dabbelt bad_area: 17107037db5SPalmer Dabbelt up_read(&mm->mmap_sem); 17207037db5SPalmer Dabbelt /* User mode accesses just cause a SIGSEGV */ 17307037db5SPalmer Dabbelt if (user_mode(regs)) { 1746f25a967SEric W. Biederman do_trap(regs, SIGSEGV, code, addr); 17507037db5SPalmer Dabbelt return; 17607037db5SPalmer Dabbelt } 17707037db5SPalmer Dabbelt 17807037db5SPalmer Dabbelt no_context: 17907037db5SPalmer Dabbelt /* Are we prepared to handle this kernel fault? */ 18007037db5SPalmer Dabbelt if (fixup_exception(regs)) 18107037db5SPalmer Dabbelt return; 18207037db5SPalmer Dabbelt 18307037db5SPalmer Dabbelt /* 18407037db5SPalmer Dabbelt * Oops. The kernel tried to access some bad page. We'll have to 18507037db5SPalmer Dabbelt * terminate things with extreme prejudice. 18607037db5SPalmer Dabbelt */ 18707037db5SPalmer Dabbelt bust_spinlocks(1); 18807037db5SPalmer Dabbelt pr_alert("Unable to handle kernel %s at virtual address " REG_FMT "\n", 18907037db5SPalmer Dabbelt (addr < PAGE_SIZE) ? "NULL pointer dereference" : 19007037db5SPalmer Dabbelt "paging request", addr); 19107037db5SPalmer Dabbelt die(regs, "Oops"); 19207037db5SPalmer Dabbelt do_exit(SIGKILL); 19307037db5SPalmer Dabbelt 19407037db5SPalmer Dabbelt /* 19507037db5SPalmer Dabbelt * We ran out of memory, call the OOM killer, and return the userspace 19607037db5SPalmer Dabbelt * (which will retry the fault, or kill us if we got oom-killed). 19707037db5SPalmer Dabbelt */ 19807037db5SPalmer Dabbelt out_of_memory: 19907037db5SPalmer Dabbelt up_read(&mm->mmap_sem); 20007037db5SPalmer Dabbelt if (!user_mode(regs)) 20107037db5SPalmer Dabbelt goto no_context; 20207037db5SPalmer Dabbelt pagefault_out_of_memory(); 20307037db5SPalmer Dabbelt return; 20407037db5SPalmer Dabbelt 20507037db5SPalmer Dabbelt do_sigbus: 20607037db5SPalmer Dabbelt up_read(&mm->mmap_sem); 20707037db5SPalmer Dabbelt /* Kernel mode? Handle exceptions or die */ 20807037db5SPalmer Dabbelt if (!user_mode(regs)) 20907037db5SPalmer Dabbelt goto no_context; 2106f25a967SEric W. Biederman do_trap(regs, SIGBUS, BUS_ADRERR, addr); 21107037db5SPalmer Dabbelt return; 21207037db5SPalmer Dabbelt 21307037db5SPalmer Dabbelt vmalloc_fault: 21407037db5SPalmer Dabbelt { 21507037db5SPalmer Dabbelt pgd_t *pgd, *pgd_k; 21607037db5SPalmer Dabbelt pud_t *pud, *pud_k; 21707037db5SPalmer Dabbelt p4d_t *p4d, *p4d_k; 21807037db5SPalmer Dabbelt pmd_t *pmd, *pmd_k; 21907037db5SPalmer Dabbelt pte_t *pte_k; 22007037db5SPalmer Dabbelt int index; 22107037db5SPalmer Dabbelt 2228fef9900SAndreas Schwab /* User mode accesses just cause a SIGSEGV */ 22307037db5SPalmer Dabbelt if (user_mode(regs)) 2246f25a967SEric W. Biederman return do_trap(regs, SIGSEGV, code, addr); 22507037db5SPalmer Dabbelt 22607037db5SPalmer Dabbelt /* 22707037db5SPalmer Dabbelt * Synchronize this task's top level page-table 22807037db5SPalmer Dabbelt * with the 'reference' page table. 22907037db5SPalmer Dabbelt * 23007037db5SPalmer Dabbelt * Do _not_ use "tsk->active_mm->pgd" here. 23107037db5SPalmer Dabbelt * We might be inside an interrupt in the middle 23207037db5SPalmer Dabbelt * of a task switch. 23307037db5SPalmer Dabbelt */ 23407037db5SPalmer Dabbelt index = pgd_index(addr); 235a3182c91SAnup Patel pgd = (pgd_t *)pfn_to_virt(csr_read(CSR_SATP)) + index; 23607037db5SPalmer Dabbelt pgd_k = init_mm.pgd + index; 23707037db5SPalmer Dabbelt 23807037db5SPalmer Dabbelt if (!pgd_present(*pgd_k)) 23907037db5SPalmer Dabbelt goto no_context; 24007037db5SPalmer Dabbelt set_pgd(pgd, *pgd_k); 24107037db5SPalmer Dabbelt 24207037db5SPalmer Dabbelt p4d = p4d_offset(pgd, addr); 24307037db5SPalmer Dabbelt p4d_k = p4d_offset(pgd_k, addr); 24407037db5SPalmer Dabbelt if (!p4d_present(*p4d_k)) 24507037db5SPalmer Dabbelt goto no_context; 24607037db5SPalmer Dabbelt 24707037db5SPalmer Dabbelt pud = pud_offset(p4d, addr); 24807037db5SPalmer Dabbelt pud_k = pud_offset(p4d_k, addr); 24907037db5SPalmer Dabbelt if (!pud_present(*pud_k)) 25007037db5SPalmer Dabbelt goto no_context; 25107037db5SPalmer Dabbelt 25207037db5SPalmer Dabbelt /* 25307037db5SPalmer Dabbelt * Since the vmalloc area is global, it is unnecessary 25407037db5SPalmer Dabbelt * to copy individual PTEs 25507037db5SPalmer Dabbelt */ 25607037db5SPalmer Dabbelt pmd = pmd_offset(pud, addr); 25707037db5SPalmer Dabbelt pmd_k = pmd_offset(pud_k, addr); 25807037db5SPalmer Dabbelt if (!pmd_present(*pmd_k)) 25907037db5SPalmer Dabbelt goto no_context; 26007037db5SPalmer Dabbelt set_pmd(pmd, *pmd_k); 26107037db5SPalmer Dabbelt 26207037db5SPalmer Dabbelt /* 26307037db5SPalmer Dabbelt * Make sure the actual PTE exists as well to 26407037db5SPalmer Dabbelt * catch kernel vmalloc-area accesses to non-mapped 26507037db5SPalmer Dabbelt * addresses. If we don't do this, this will just 26607037db5SPalmer Dabbelt * silently loop forever. 26707037db5SPalmer Dabbelt */ 26807037db5SPalmer Dabbelt pte_k = pte_offset_kernel(pmd_k, addr); 26907037db5SPalmer Dabbelt if (!pte_present(*pte_k)) 27007037db5SPalmer Dabbelt goto no_context; 271bf587caaSShihPo Hung 272bf587caaSShihPo Hung /* 273bf587caaSShihPo Hung * The kernel assumes that TLBs don't cache invalid 274bf587caaSShihPo Hung * entries, but in RISC-V, SFENCE.VMA specifies an 275bf587caaSShihPo Hung * ordering constraint, not a cache flush; it is 276bf587caaSShihPo Hung * necessary even after writing invalid entries. 277bf587caaSShihPo Hung */ 278bf587caaSShihPo Hung local_flush_tlb_page(addr); 279bf587caaSShihPo Hung 28007037db5SPalmer Dabbelt return; 28107037db5SPalmer Dabbelt } 28207037db5SPalmer Dabbelt } 283