xref: /linux/arch/riscv/mm/fault.c (revision 4ef873226ceb9c7bf11a922caddc5698a24bcfaf)
1588cb88cSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later
207037db5SPalmer Dabbelt /*
307037db5SPalmer Dabbelt  * Copyright (C) 2009 Sunplus Core Technology Co., Ltd.
407037db5SPalmer Dabbelt  *  Lennox Wu <lennox.wu@sunplusct.com>
507037db5SPalmer Dabbelt  *  Chen Liqin <liqin.chen@sunplusct.com>
607037db5SPalmer Dabbelt  * Copyright (C) 2012 Regents of the University of California
707037db5SPalmer Dabbelt  */
807037db5SPalmer Dabbelt 
907037db5SPalmer Dabbelt 
1007037db5SPalmer Dabbelt #include <linux/mm.h>
1107037db5SPalmer Dabbelt #include <linux/kernel.h>
1207037db5SPalmer Dabbelt #include <linux/interrupt.h>
1307037db5SPalmer Dabbelt #include <linux/perf_event.h>
1407037db5SPalmer Dabbelt #include <linux/signal.h>
1507037db5SPalmer Dabbelt #include <linux/uaccess.h>
1607037db5SPalmer Dabbelt 
1707037db5SPalmer Dabbelt #include <asm/pgalloc.h>
1807037db5SPalmer Dabbelt #include <asm/ptrace.h>
19bf587caaSShihPo Hung #include <asm/tlbflush.h>
2007037db5SPalmer Dabbelt 
21ffaee272SPaul Walmsley #include "../kernel/head.h"
22ffaee272SPaul Walmsley 
2307037db5SPalmer Dabbelt /*
2407037db5SPalmer Dabbelt  * This routine handles page faults.  It determines the address and the
2507037db5SPalmer Dabbelt  * problem, and then passes it off to one of the appropriate routines.
2607037db5SPalmer Dabbelt  */
2707037db5SPalmer Dabbelt asmlinkage void do_page_fault(struct pt_regs *regs)
2807037db5SPalmer Dabbelt {
2907037db5SPalmer Dabbelt 	struct task_struct *tsk;
3007037db5SPalmer Dabbelt 	struct vm_area_struct *vma;
3107037db5SPalmer Dabbelt 	struct mm_struct *mm;
3207037db5SPalmer Dabbelt 	unsigned long addr, cause;
3307037db5SPalmer Dabbelt 	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
3450a7ca3cSSouptick Joarder 	int code = SEGV_MAPERR;
3550a7ca3cSSouptick Joarder 	vm_fault_t fault;
3607037db5SPalmer Dabbelt 
37a4c3733dSChristoph Hellwig 	cause = regs->cause;
38a4c3733dSChristoph Hellwig 	addr = regs->badaddr;
3907037db5SPalmer Dabbelt 
4007037db5SPalmer Dabbelt 	tsk = current;
4107037db5SPalmer Dabbelt 	mm = tsk->mm;
4207037db5SPalmer Dabbelt 
4307037db5SPalmer Dabbelt 	/*
4407037db5SPalmer Dabbelt 	 * Fault-in kernel-space virtual memory on-demand.
4507037db5SPalmer Dabbelt 	 * The 'reference' page table is init_mm.pgd.
4607037db5SPalmer Dabbelt 	 *
4707037db5SPalmer Dabbelt 	 * NOTE! We MUST NOT take any locks for this case. We may
4807037db5SPalmer Dabbelt 	 * be in an interrupt or a critical region, and should
4907037db5SPalmer Dabbelt 	 * only copy the information from the master page table,
5007037db5SPalmer Dabbelt 	 * nothing more.
5107037db5SPalmer Dabbelt 	 */
5207037db5SPalmer Dabbelt 	if (unlikely((addr >= VMALLOC_START) && (addr <= VMALLOC_END)))
5307037db5SPalmer Dabbelt 		goto vmalloc_fault;
5407037db5SPalmer Dabbelt 
5507037db5SPalmer Dabbelt 	/* Enable interrupts if they were enabled in the parent context. */
56a4c3733dSChristoph Hellwig 	if (likely(regs->status & SR_PIE))
5707037db5SPalmer Dabbelt 		local_irq_enable();
5807037db5SPalmer Dabbelt 
5907037db5SPalmer Dabbelt 	/*
6007037db5SPalmer Dabbelt 	 * If we're in an interrupt, have no user context, or are running
6107037db5SPalmer Dabbelt 	 * in an atomic region, then we must not take the fault.
6207037db5SPalmer Dabbelt 	 */
6307037db5SPalmer Dabbelt 	if (unlikely(faulthandler_disabled() || !mm))
6407037db5SPalmer Dabbelt 		goto no_context;
6507037db5SPalmer Dabbelt 
6607037db5SPalmer Dabbelt 	if (user_mode(regs))
6707037db5SPalmer Dabbelt 		flags |= FAULT_FLAG_USER;
6807037db5SPalmer Dabbelt 
6907037db5SPalmer Dabbelt 	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
7007037db5SPalmer Dabbelt 
7107037db5SPalmer Dabbelt retry:
7207037db5SPalmer Dabbelt 	down_read(&mm->mmap_sem);
7307037db5SPalmer Dabbelt 	vma = find_vma(mm, addr);
7407037db5SPalmer Dabbelt 	if (unlikely(!vma))
7507037db5SPalmer Dabbelt 		goto bad_area;
7607037db5SPalmer Dabbelt 	if (likely(vma->vm_start <= addr))
7707037db5SPalmer Dabbelt 		goto good_area;
7807037db5SPalmer Dabbelt 	if (unlikely(!(vma->vm_flags & VM_GROWSDOWN)))
7907037db5SPalmer Dabbelt 		goto bad_area;
8007037db5SPalmer Dabbelt 	if (unlikely(expand_stack(vma, addr)))
8107037db5SPalmer Dabbelt 		goto bad_area;
8207037db5SPalmer Dabbelt 
8307037db5SPalmer Dabbelt 	/*
8407037db5SPalmer Dabbelt 	 * Ok, we have a good vm_area for this memory access, so
8507037db5SPalmer Dabbelt 	 * we can handle it.
8607037db5SPalmer Dabbelt 	 */
8707037db5SPalmer Dabbelt good_area:
8807037db5SPalmer Dabbelt 	code = SEGV_ACCERR;
8907037db5SPalmer Dabbelt 
9007037db5SPalmer Dabbelt 	switch (cause) {
9107037db5SPalmer Dabbelt 	case EXC_INST_PAGE_FAULT:
9207037db5SPalmer Dabbelt 		if (!(vma->vm_flags & VM_EXEC))
9307037db5SPalmer Dabbelt 			goto bad_area;
9407037db5SPalmer Dabbelt 		break;
9507037db5SPalmer Dabbelt 	case EXC_LOAD_PAGE_FAULT:
9607037db5SPalmer Dabbelt 		if (!(vma->vm_flags & VM_READ))
9707037db5SPalmer Dabbelt 			goto bad_area;
9807037db5SPalmer Dabbelt 		break;
9907037db5SPalmer Dabbelt 	case EXC_STORE_PAGE_FAULT:
10007037db5SPalmer Dabbelt 		if (!(vma->vm_flags & VM_WRITE))
10107037db5SPalmer Dabbelt 			goto bad_area;
10207037db5SPalmer Dabbelt 		flags |= FAULT_FLAG_WRITE;
10307037db5SPalmer Dabbelt 		break;
10407037db5SPalmer Dabbelt 	default:
10507037db5SPalmer Dabbelt 		panic("%s: unhandled cause %lu", __func__, cause);
10607037db5SPalmer Dabbelt 	}
10707037db5SPalmer Dabbelt 
10807037db5SPalmer Dabbelt 	/*
10907037db5SPalmer Dabbelt 	 * If for any reason at all we could not handle the fault,
11007037db5SPalmer Dabbelt 	 * make sure we exit gracefully rather than endlessly redo
11107037db5SPalmer Dabbelt 	 * the fault.
11207037db5SPalmer Dabbelt 	 */
11307037db5SPalmer Dabbelt 	fault = handle_mm_fault(vma, addr, flags);
11407037db5SPalmer Dabbelt 
11507037db5SPalmer Dabbelt 	/*
11607037db5SPalmer Dabbelt 	 * If we need to retry but a fatal signal is pending, handle the
11707037db5SPalmer Dabbelt 	 * signal first. We do not need to release the mmap_sem because it
11807037db5SPalmer Dabbelt 	 * would already be released in __lock_page_or_retry in mm/filemap.c.
11907037db5SPalmer Dabbelt 	 */
120*4ef87322SPeter Xu 	if (fault_signal_pending(fault, regs))
12107037db5SPalmer Dabbelt 		return;
12207037db5SPalmer Dabbelt 
12307037db5SPalmer Dabbelt 	if (unlikely(fault & VM_FAULT_ERROR)) {
12407037db5SPalmer Dabbelt 		if (fault & VM_FAULT_OOM)
12507037db5SPalmer Dabbelt 			goto out_of_memory;
12607037db5SPalmer Dabbelt 		else if (fault & VM_FAULT_SIGBUS)
12707037db5SPalmer Dabbelt 			goto do_sigbus;
12807037db5SPalmer Dabbelt 		BUG();
12907037db5SPalmer Dabbelt 	}
13007037db5SPalmer Dabbelt 
13107037db5SPalmer Dabbelt 	/*
13207037db5SPalmer Dabbelt 	 * Major/minor page fault accounting is only done on the
13307037db5SPalmer Dabbelt 	 * initial attempt. If we go through a retry, it is extremely
13407037db5SPalmer Dabbelt 	 * likely that the page will be found in page cache at that point.
13507037db5SPalmer Dabbelt 	 */
13607037db5SPalmer Dabbelt 	if (flags & FAULT_FLAG_ALLOW_RETRY) {
13707037db5SPalmer Dabbelt 		if (fault & VM_FAULT_MAJOR) {
13807037db5SPalmer Dabbelt 			tsk->maj_flt++;
13907037db5SPalmer Dabbelt 			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ,
14007037db5SPalmer Dabbelt 				      1, regs, addr);
14107037db5SPalmer Dabbelt 		} else {
14207037db5SPalmer Dabbelt 			tsk->min_flt++;
14307037db5SPalmer Dabbelt 			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN,
14407037db5SPalmer Dabbelt 				      1, regs, addr);
14507037db5SPalmer Dabbelt 		}
14607037db5SPalmer Dabbelt 		if (fault & VM_FAULT_RETRY) {
14707037db5SPalmer Dabbelt 			/*
14807037db5SPalmer Dabbelt 			 * Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
14907037db5SPalmer Dabbelt 			 * of starvation.
15007037db5SPalmer Dabbelt 			 */
15107037db5SPalmer Dabbelt 			flags &= ~(FAULT_FLAG_ALLOW_RETRY);
15207037db5SPalmer Dabbelt 			flags |= FAULT_FLAG_TRIED;
15307037db5SPalmer Dabbelt 
15407037db5SPalmer Dabbelt 			/*
15507037db5SPalmer Dabbelt 			 * No need to up_read(&mm->mmap_sem) as we would
15607037db5SPalmer Dabbelt 			 * have already released it in __lock_page_or_retry
15707037db5SPalmer Dabbelt 			 * in mm/filemap.c.
15807037db5SPalmer Dabbelt 			 */
15907037db5SPalmer Dabbelt 			goto retry;
16007037db5SPalmer Dabbelt 		}
16107037db5SPalmer Dabbelt 	}
16207037db5SPalmer Dabbelt 
16307037db5SPalmer Dabbelt 	up_read(&mm->mmap_sem);
16407037db5SPalmer Dabbelt 	return;
16507037db5SPalmer Dabbelt 
16607037db5SPalmer Dabbelt 	/*
16707037db5SPalmer Dabbelt 	 * Something tried to access memory that isn't in our memory map.
16807037db5SPalmer Dabbelt 	 * Fix it, but check if it's kernel or user first.
16907037db5SPalmer Dabbelt 	 */
17007037db5SPalmer Dabbelt bad_area:
17107037db5SPalmer Dabbelt 	up_read(&mm->mmap_sem);
17207037db5SPalmer Dabbelt 	/* User mode accesses just cause a SIGSEGV */
17307037db5SPalmer Dabbelt 	if (user_mode(regs)) {
1746f25a967SEric W. Biederman 		do_trap(regs, SIGSEGV, code, addr);
17507037db5SPalmer Dabbelt 		return;
17607037db5SPalmer Dabbelt 	}
17707037db5SPalmer Dabbelt 
17807037db5SPalmer Dabbelt no_context:
17907037db5SPalmer Dabbelt 	/* Are we prepared to handle this kernel fault? */
18007037db5SPalmer Dabbelt 	if (fixup_exception(regs))
18107037db5SPalmer Dabbelt 		return;
18207037db5SPalmer Dabbelt 
18307037db5SPalmer Dabbelt 	/*
18407037db5SPalmer Dabbelt 	 * Oops. The kernel tried to access some bad page. We'll have to
18507037db5SPalmer Dabbelt 	 * terminate things with extreme prejudice.
18607037db5SPalmer Dabbelt 	 */
18707037db5SPalmer Dabbelt 	bust_spinlocks(1);
18807037db5SPalmer Dabbelt 	pr_alert("Unable to handle kernel %s at virtual address " REG_FMT "\n",
18907037db5SPalmer Dabbelt 		(addr < PAGE_SIZE) ? "NULL pointer dereference" :
19007037db5SPalmer Dabbelt 		"paging request", addr);
19107037db5SPalmer Dabbelt 	die(regs, "Oops");
19207037db5SPalmer Dabbelt 	do_exit(SIGKILL);
19307037db5SPalmer Dabbelt 
19407037db5SPalmer Dabbelt 	/*
19507037db5SPalmer Dabbelt 	 * We ran out of memory, call the OOM killer, and return the userspace
19607037db5SPalmer Dabbelt 	 * (which will retry the fault, or kill us if we got oom-killed).
19707037db5SPalmer Dabbelt 	 */
19807037db5SPalmer Dabbelt out_of_memory:
19907037db5SPalmer Dabbelt 	up_read(&mm->mmap_sem);
20007037db5SPalmer Dabbelt 	if (!user_mode(regs))
20107037db5SPalmer Dabbelt 		goto no_context;
20207037db5SPalmer Dabbelt 	pagefault_out_of_memory();
20307037db5SPalmer Dabbelt 	return;
20407037db5SPalmer Dabbelt 
20507037db5SPalmer Dabbelt do_sigbus:
20607037db5SPalmer Dabbelt 	up_read(&mm->mmap_sem);
20707037db5SPalmer Dabbelt 	/* Kernel mode? Handle exceptions or die */
20807037db5SPalmer Dabbelt 	if (!user_mode(regs))
20907037db5SPalmer Dabbelt 		goto no_context;
2106f25a967SEric W. Biederman 	do_trap(regs, SIGBUS, BUS_ADRERR, addr);
21107037db5SPalmer Dabbelt 	return;
21207037db5SPalmer Dabbelt 
21307037db5SPalmer Dabbelt vmalloc_fault:
21407037db5SPalmer Dabbelt 	{
21507037db5SPalmer Dabbelt 		pgd_t *pgd, *pgd_k;
21607037db5SPalmer Dabbelt 		pud_t *pud, *pud_k;
21707037db5SPalmer Dabbelt 		p4d_t *p4d, *p4d_k;
21807037db5SPalmer Dabbelt 		pmd_t *pmd, *pmd_k;
21907037db5SPalmer Dabbelt 		pte_t *pte_k;
22007037db5SPalmer Dabbelt 		int index;
22107037db5SPalmer Dabbelt 
2228fef9900SAndreas Schwab 		/* User mode accesses just cause a SIGSEGV */
22307037db5SPalmer Dabbelt 		if (user_mode(regs))
2246f25a967SEric W. Biederman 			return do_trap(regs, SIGSEGV, code, addr);
22507037db5SPalmer Dabbelt 
22607037db5SPalmer Dabbelt 		/*
22707037db5SPalmer Dabbelt 		 * Synchronize this task's top level page-table
22807037db5SPalmer Dabbelt 		 * with the 'reference' page table.
22907037db5SPalmer Dabbelt 		 *
23007037db5SPalmer Dabbelt 		 * Do _not_ use "tsk->active_mm->pgd" here.
23107037db5SPalmer Dabbelt 		 * We might be inside an interrupt in the middle
23207037db5SPalmer Dabbelt 		 * of a task switch.
23307037db5SPalmer Dabbelt 		 */
23407037db5SPalmer Dabbelt 		index = pgd_index(addr);
235a3182c91SAnup Patel 		pgd = (pgd_t *)pfn_to_virt(csr_read(CSR_SATP)) + index;
23607037db5SPalmer Dabbelt 		pgd_k = init_mm.pgd + index;
23707037db5SPalmer Dabbelt 
23807037db5SPalmer Dabbelt 		if (!pgd_present(*pgd_k))
23907037db5SPalmer Dabbelt 			goto no_context;
24007037db5SPalmer Dabbelt 		set_pgd(pgd, *pgd_k);
24107037db5SPalmer Dabbelt 
24207037db5SPalmer Dabbelt 		p4d = p4d_offset(pgd, addr);
24307037db5SPalmer Dabbelt 		p4d_k = p4d_offset(pgd_k, addr);
24407037db5SPalmer Dabbelt 		if (!p4d_present(*p4d_k))
24507037db5SPalmer Dabbelt 			goto no_context;
24607037db5SPalmer Dabbelt 
24707037db5SPalmer Dabbelt 		pud = pud_offset(p4d, addr);
24807037db5SPalmer Dabbelt 		pud_k = pud_offset(p4d_k, addr);
24907037db5SPalmer Dabbelt 		if (!pud_present(*pud_k))
25007037db5SPalmer Dabbelt 			goto no_context;
25107037db5SPalmer Dabbelt 
25207037db5SPalmer Dabbelt 		/*
25307037db5SPalmer Dabbelt 		 * Since the vmalloc area is global, it is unnecessary
25407037db5SPalmer Dabbelt 		 * to copy individual PTEs
25507037db5SPalmer Dabbelt 		 */
25607037db5SPalmer Dabbelt 		pmd = pmd_offset(pud, addr);
25707037db5SPalmer Dabbelt 		pmd_k = pmd_offset(pud_k, addr);
25807037db5SPalmer Dabbelt 		if (!pmd_present(*pmd_k))
25907037db5SPalmer Dabbelt 			goto no_context;
26007037db5SPalmer Dabbelt 		set_pmd(pmd, *pmd_k);
26107037db5SPalmer Dabbelt 
26207037db5SPalmer Dabbelt 		/*
26307037db5SPalmer Dabbelt 		 * Make sure the actual PTE exists as well to
26407037db5SPalmer Dabbelt 		 * catch kernel vmalloc-area accesses to non-mapped
26507037db5SPalmer Dabbelt 		 * addresses. If we don't do this, this will just
26607037db5SPalmer Dabbelt 		 * silently loop forever.
26707037db5SPalmer Dabbelt 		 */
26807037db5SPalmer Dabbelt 		pte_k = pte_offset_kernel(pmd_k, addr);
26907037db5SPalmer Dabbelt 		if (!pte_present(*pte_k))
27007037db5SPalmer Dabbelt 			goto no_context;
271bf587caaSShihPo Hung 
272bf587caaSShihPo Hung 		/*
273bf587caaSShihPo Hung 		 * The kernel assumes that TLBs don't cache invalid
274bf587caaSShihPo Hung 		 * entries, but in RISC-V, SFENCE.VMA specifies an
275bf587caaSShihPo Hung 		 * ordering constraint, not a cache flush; it is
276bf587caaSShihPo Hung 		 * necessary even after writing invalid entries.
277bf587caaSShihPo Hung 		 */
278bf587caaSShihPo Hung 		local_flush_tlb_page(addr);
279bf587caaSShihPo Hung 
28007037db5SPalmer Dabbelt 		return;
28107037db5SPalmer Dabbelt 	}
28207037db5SPalmer Dabbelt }
283