xref: /linux/arch/riscv/mm/fault.c (revision 4363287178a85e41cd59f9f1d423fbe1f9048ec8)
1588cb88cSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later
207037db5SPalmer Dabbelt /*
307037db5SPalmer Dabbelt  * Copyright (C) 2009 Sunplus Core Technology Co., Ltd.
407037db5SPalmer Dabbelt  *  Lennox Wu <lennox.wu@sunplusct.com>
507037db5SPalmer Dabbelt  *  Chen Liqin <liqin.chen@sunplusct.com>
607037db5SPalmer Dabbelt  * Copyright (C) 2012 Regents of the University of California
707037db5SPalmer Dabbelt  */
807037db5SPalmer Dabbelt 
907037db5SPalmer Dabbelt 
1007037db5SPalmer Dabbelt #include <linux/mm.h>
1107037db5SPalmer Dabbelt #include <linux/kernel.h>
1207037db5SPalmer Dabbelt #include <linux/interrupt.h>
1307037db5SPalmer Dabbelt #include <linux/perf_event.h>
1407037db5SPalmer Dabbelt #include <linux/signal.h>
1507037db5SPalmer Dabbelt #include <linux/uaccess.h>
1607037db5SPalmer Dabbelt 
1707037db5SPalmer Dabbelt #include <asm/ptrace.h>
18bf587caaSShihPo Hung #include <asm/tlbflush.h>
1907037db5SPalmer Dabbelt 
20ffaee272SPaul Walmsley #include "../kernel/head.h"
21ffaee272SPaul Walmsley 
2207037db5SPalmer Dabbelt /*
2307037db5SPalmer Dabbelt  * This routine handles page faults.  It determines the address and the
2407037db5SPalmer Dabbelt  * problem, and then passes it off to one of the appropriate routines.
2507037db5SPalmer Dabbelt  */
2607037db5SPalmer Dabbelt asmlinkage void do_page_fault(struct pt_regs *regs)
2707037db5SPalmer Dabbelt {
2807037db5SPalmer Dabbelt 	struct task_struct *tsk;
2907037db5SPalmer Dabbelt 	struct vm_area_struct *vma;
3007037db5SPalmer Dabbelt 	struct mm_struct *mm;
3107037db5SPalmer Dabbelt 	unsigned long addr, cause;
32dde16072SPeter Xu 	unsigned int flags = FAULT_FLAG_DEFAULT;
3350a7ca3cSSouptick Joarder 	int code = SEGV_MAPERR;
3450a7ca3cSSouptick Joarder 	vm_fault_t fault;
3507037db5SPalmer Dabbelt 
36a4c3733dSChristoph Hellwig 	cause = regs->cause;
37a4c3733dSChristoph Hellwig 	addr = regs->badaddr;
3807037db5SPalmer Dabbelt 
3907037db5SPalmer Dabbelt 	tsk = current;
4007037db5SPalmer Dabbelt 	mm = tsk->mm;
4107037db5SPalmer Dabbelt 
4207037db5SPalmer Dabbelt 	/*
4307037db5SPalmer Dabbelt 	 * Fault-in kernel-space virtual memory on-demand.
4407037db5SPalmer Dabbelt 	 * The 'reference' page table is init_mm.pgd.
4507037db5SPalmer Dabbelt 	 *
4607037db5SPalmer Dabbelt 	 * NOTE! We MUST NOT take any locks for this case. We may
4707037db5SPalmer Dabbelt 	 * be in an interrupt or a critical region, and should
4807037db5SPalmer Dabbelt 	 * only copy the information from the master page table,
4907037db5SPalmer Dabbelt 	 * nothing more.
5007037db5SPalmer Dabbelt 	 */
5107037db5SPalmer Dabbelt 	if (unlikely((addr >= VMALLOC_START) && (addr <= VMALLOC_END)))
5207037db5SPalmer Dabbelt 		goto vmalloc_fault;
5307037db5SPalmer Dabbelt 
5407037db5SPalmer Dabbelt 	/* Enable interrupts if they were enabled in the parent context. */
55a4c3733dSChristoph Hellwig 	if (likely(regs->status & SR_PIE))
5607037db5SPalmer Dabbelt 		local_irq_enable();
5707037db5SPalmer Dabbelt 
5807037db5SPalmer Dabbelt 	/*
5907037db5SPalmer Dabbelt 	 * If we're in an interrupt, have no user context, or are running
6007037db5SPalmer Dabbelt 	 * in an atomic region, then we must not take the fault.
6107037db5SPalmer Dabbelt 	 */
6207037db5SPalmer Dabbelt 	if (unlikely(faulthandler_disabled() || !mm))
6307037db5SPalmer Dabbelt 		goto no_context;
6407037db5SPalmer Dabbelt 
6507037db5SPalmer Dabbelt 	if (user_mode(regs))
6607037db5SPalmer Dabbelt 		flags |= FAULT_FLAG_USER;
6707037db5SPalmer Dabbelt 
6807037db5SPalmer Dabbelt 	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
6907037db5SPalmer Dabbelt 
7007037db5SPalmer Dabbelt retry:
71d8ed45c5SMichel Lespinasse 	mmap_read_lock(mm);
7207037db5SPalmer Dabbelt 	vma = find_vma(mm, addr);
7307037db5SPalmer Dabbelt 	if (unlikely(!vma))
7407037db5SPalmer Dabbelt 		goto bad_area;
7507037db5SPalmer Dabbelt 	if (likely(vma->vm_start <= addr))
7607037db5SPalmer Dabbelt 		goto good_area;
7707037db5SPalmer Dabbelt 	if (unlikely(!(vma->vm_flags & VM_GROWSDOWN)))
7807037db5SPalmer Dabbelt 		goto bad_area;
7907037db5SPalmer Dabbelt 	if (unlikely(expand_stack(vma, addr)))
8007037db5SPalmer Dabbelt 		goto bad_area;
8107037db5SPalmer Dabbelt 
8207037db5SPalmer Dabbelt 	/*
8307037db5SPalmer Dabbelt 	 * Ok, we have a good vm_area for this memory access, so
8407037db5SPalmer Dabbelt 	 * we can handle it.
8507037db5SPalmer Dabbelt 	 */
8607037db5SPalmer Dabbelt good_area:
8707037db5SPalmer Dabbelt 	code = SEGV_ACCERR;
8807037db5SPalmer Dabbelt 
8907037db5SPalmer Dabbelt 	switch (cause) {
9007037db5SPalmer Dabbelt 	case EXC_INST_PAGE_FAULT:
9107037db5SPalmer Dabbelt 		if (!(vma->vm_flags & VM_EXEC))
9207037db5SPalmer Dabbelt 			goto bad_area;
9307037db5SPalmer Dabbelt 		break;
9407037db5SPalmer Dabbelt 	case EXC_LOAD_PAGE_FAULT:
9507037db5SPalmer Dabbelt 		if (!(vma->vm_flags & VM_READ))
9607037db5SPalmer Dabbelt 			goto bad_area;
9707037db5SPalmer Dabbelt 		break;
9807037db5SPalmer Dabbelt 	case EXC_STORE_PAGE_FAULT:
9907037db5SPalmer Dabbelt 		if (!(vma->vm_flags & VM_WRITE))
10007037db5SPalmer Dabbelt 			goto bad_area;
10107037db5SPalmer Dabbelt 		flags |= FAULT_FLAG_WRITE;
10207037db5SPalmer Dabbelt 		break;
10307037db5SPalmer Dabbelt 	default:
10407037db5SPalmer Dabbelt 		panic("%s: unhandled cause %lu", __func__, cause);
10507037db5SPalmer Dabbelt 	}
10607037db5SPalmer Dabbelt 
10707037db5SPalmer Dabbelt 	/*
10807037db5SPalmer Dabbelt 	 * If for any reason at all we could not handle the fault,
10907037db5SPalmer Dabbelt 	 * make sure we exit gracefully rather than endlessly redo
11007037db5SPalmer Dabbelt 	 * the fault.
11107037db5SPalmer Dabbelt 	 */
1125ac365a4SPeter Xu 	fault = handle_mm_fault(vma, addr, flags, regs);
11307037db5SPalmer Dabbelt 
11407037db5SPalmer Dabbelt 	/*
11507037db5SPalmer Dabbelt 	 * If we need to retry but a fatal signal is pending, handle the
116c1e8d7c6SMichel Lespinasse 	 * signal first. We do not need to release the mmap_lock because it
11707037db5SPalmer Dabbelt 	 * would already be released in __lock_page_or_retry in mm/filemap.c.
11807037db5SPalmer Dabbelt 	 */
1194ef87322SPeter Xu 	if (fault_signal_pending(fault, regs))
12007037db5SPalmer Dabbelt 		return;
12107037db5SPalmer Dabbelt 
12207037db5SPalmer Dabbelt 	if (unlikely(fault & VM_FAULT_ERROR)) {
12307037db5SPalmer Dabbelt 		if (fault & VM_FAULT_OOM)
12407037db5SPalmer Dabbelt 			goto out_of_memory;
12507037db5SPalmer Dabbelt 		else if (fault & VM_FAULT_SIGBUS)
12607037db5SPalmer Dabbelt 			goto do_sigbus;
12707037db5SPalmer Dabbelt 		BUG();
12807037db5SPalmer Dabbelt 	}
12907037db5SPalmer Dabbelt 
130*43632871SPekka Enberg 	if (unlikely((fault & VM_FAULT_RETRY) && (flags & FAULT_FLAG_ALLOW_RETRY))) {
13107037db5SPalmer Dabbelt 		flags |= FAULT_FLAG_TRIED;
13207037db5SPalmer Dabbelt 
13307037db5SPalmer Dabbelt 		/*
1343e4e28c5SMichel Lespinasse 		 * No need to mmap_read_unlock(mm) as we would
13507037db5SPalmer Dabbelt 		 * have already released it in __lock_page_or_retry
13607037db5SPalmer Dabbelt 		 * in mm/filemap.c.
13707037db5SPalmer Dabbelt 		 */
13807037db5SPalmer Dabbelt 		goto retry;
13907037db5SPalmer Dabbelt 	}
14007037db5SPalmer Dabbelt 
141d8ed45c5SMichel Lespinasse 	mmap_read_unlock(mm);
14207037db5SPalmer Dabbelt 	return;
14307037db5SPalmer Dabbelt 
14407037db5SPalmer Dabbelt 	/*
14507037db5SPalmer Dabbelt 	 * Something tried to access memory that isn't in our memory map.
14607037db5SPalmer Dabbelt 	 * Fix it, but check if it's kernel or user first.
14707037db5SPalmer Dabbelt 	 */
14807037db5SPalmer Dabbelt bad_area:
149d8ed45c5SMichel Lespinasse 	mmap_read_unlock(mm);
15007037db5SPalmer Dabbelt 	/* User mode accesses just cause a SIGSEGV */
15107037db5SPalmer Dabbelt 	if (user_mode(regs)) {
1526f25a967SEric W. Biederman 		do_trap(regs, SIGSEGV, code, addr);
15307037db5SPalmer Dabbelt 		return;
15407037db5SPalmer Dabbelt 	}
15507037db5SPalmer Dabbelt 
15607037db5SPalmer Dabbelt no_context:
15707037db5SPalmer Dabbelt 	/* Are we prepared to handle this kernel fault? */
15807037db5SPalmer Dabbelt 	if (fixup_exception(regs))
15907037db5SPalmer Dabbelt 		return;
16007037db5SPalmer Dabbelt 
16107037db5SPalmer Dabbelt 	/*
16207037db5SPalmer Dabbelt 	 * Oops. The kernel tried to access some bad page. We'll have to
16307037db5SPalmer Dabbelt 	 * terminate things with extreme prejudice.
16407037db5SPalmer Dabbelt 	 */
16507037db5SPalmer Dabbelt 	bust_spinlocks(1);
16607037db5SPalmer Dabbelt 	pr_alert("Unable to handle kernel %s at virtual address " REG_FMT "\n",
16707037db5SPalmer Dabbelt 		(addr < PAGE_SIZE) ? "NULL pointer dereference" :
16807037db5SPalmer Dabbelt 		"paging request", addr);
16907037db5SPalmer Dabbelt 	die(regs, "Oops");
17007037db5SPalmer Dabbelt 	do_exit(SIGKILL);
17107037db5SPalmer Dabbelt 
17207037db5SPalmer Dabbelt 	/*
17307037db5SPalmer Dabbelt 	 * We ran out of memory, call the OOM killer, and return the userspace
17407037db5SPalmer Dabbelt 	 * (which will retry the fault, or kill us if we got oom-killed).
17507037db5SPalmer Dabbelt 	 */
17607037db5SPalmer Dabbelt out_of_memory:
177d8ed45c5SMichel Lespinasse 	mmap_read_unlock(mm);
17807037db5SPalmer Dabbelt 	if (!user_mode(regs))
17907037db5SPalmer Dabbelt 		goto no_context;
18007037db5SPalmer Dabbelt 	pagefault_out_of_memory();
18107037db5SPalmer Dabbelt 	return;
18207037db5SPalmer Dabbelt 
18307037db5SPalmer Dabbelt do_sigbus:
184d8ed45c5SMichel Lespinasse 	mmap_read_unlock(mm);
18507037db5SPalmer Dabbelt 	/* Kernel mode? Handle exceptions or die */
18607037db5SPalmer Dabbelt 	if (!user_mode(regs))
18707037db5SPalmer Dabbelt 		goto no_context;
1886f25a967SEric W. Biederman 	do_trap(regs, SIGBUS, BUS_ADRERR, addr);
18907037db5SPalmer Dabbelt 	return;
19007037db5SPalmer Dabbelt 
19107037db5SPalmer Dabbelt vmalloc_fault:
19207037db5SPalmer Dabbelt 	{
19307037db5SPalmer Dabbelt 		pgd_t *pgd, *pgd_k;
19407037db5SPalmer Dabbelt 		pud_t *pud, *pud_k;
19507037db5SPalmer Dabbelt 		p4d_t *p4d, *p4d_k;
19607037db5SPalmer Dabbelt 		pmd_t *pmd, *pmd_k;
19707037db5SPalmer Dabbelt 		pte_t *pte_k;
19807037db5SPalmer Dabbelt 		int index;
19907037db5SPalmer Dabbelt 
2008fef9900SAndreas Schwab 		/* User mode accesses just cause a SIGSEGV */
20107037db5SPalmer Dabbelt 		if (user_mode(regs))
2026f25a967SEric W. Biederman 			return do_trap(regs, SIGSEGV, code, addr);
20307037db5SPalmer Dabbelt 
20407037db5SPalmer Dabbelt 		/*
20507037db5SPalmer Dabbelt 		 * Synchronize this task's top level page-table
20607037db5SPalmer Dabbelt 		 * with the 'reference' page table.
20707037db5SPalmer Dabbelt 		 *
20807037db5SPalmer Dabbelt 		 * Do _not_ use "tsk->active_mm->pgd" here.
20907037db5SPalmer Dabbelt 		 * We might be inside an interrupt in the middle
21007037db5SPalmer Dabbelt 		 * of a task switch.
21107037db5SPalmer Dabbelt 		 */
21207037db5SPalmer Dabbelt 		index = pgd_index(addr);
213a3182c91SAnup Patel 		pgd = (pgd_t *)pfn_to_virt(csr_read(CSR_SATP)) + index;
21407037db5SPalmer Dabbelt 		pgd_k = init_mm.pgd + index;
21507037db5SPalmer Dabbelt 
21607037db5SPalmer Dabbelt 		if (!pgd_present(*pgd_k))
21707037db5SPalmer Dabbelt 			goto no_context;
21807037db5SPalmer Dabbelt 		set_pgd(pgd, *pgd_k);
21907037db5SPalmer Dabbelt 
22007037db5SPalmer Dabbelt 		p4d = p4d_offset(pgd, addr);
22107037db5SPalmer Dabbelt 		p4d_k = p4d_offset(pgd_k, addr);
22207037db5SPalmer Dabbelt 		if (!p4d_present(*p4d_k))
22307037db5SPalmer Dabbelt 			goto no_context;
22407037db5SPalmer Dabbelt 
22507037db5SPalmer Dabbelt 		pud = pud_offset(p4d, addr);
22607037db5SPalmer Dabbelt 		pud_k = pud_offset(p4d_k, addr);
22707037db5SPalmer Dabbelt 		if (!pud_present(*pud_k))
22807037db5SPalmer Dabbelt 			goto no_context;
22907037db5SPalmer Dabbelt 
23007037db5SPalmer Dabbelt 		/*
23107037db5SPalmer Dabbelt 		 * Since the vmalloc area is global, it is unnecessary
23207037db5SPalmer Dabbelt 		 * to copy individual PTEs
23307037db5SPalmer Dabbelt 		 */
23407037db5SPalmer Dabbelt 		pmd = pmd_offset(pud, addr);
23507037db5SPalmer Dabbelt 		pmd_k = pmd_offset(pud_k, addr);
23607037db5SPalmer Dabbelt 		if (!pmd_present(*pmd_k))
23707037db5SPalmer Dabbelt 			goto no_context;
23807037db5SPalmer Dabbelt 		set_pmd(pmd, *pmd_k);
23907037db5SPalmer Dabbelt 
24007037db5SPalmer Dabbelt 		/*
24107037db5SPalmer Dabbelt 		 * Make sure the actual PTE exists as well to
24207037db5SPalmer Dabbelt 		 * catch kernel vmalloc-area accesses to non-mapped
24307037db5SPalmer Dabbelt 		 * addresses. If we don't do this, this will just
24407037db5SPalmer Dabbelt 		 * silently loop forever.
24507037db5SPalmer Dabbelt 		 */
24607037db5SPalmer Dabbelt 		pte_k = pte_offset_kernel(pmd_k, addr);
24707037db5SPalmer Dabbelt 		if (!pte_present(*pte_k))
24807037db5SPalmer Dabbelt 			goto no_context;
249bf587caaSShihPo Hung 
250bf587caaSShihPo Hung 		/*
251bf587caaSShihPo Hung 		 * The kernel assumes that TLBs don't cache invalid
252bf587caaSShihPo Hung 		 * entries, but in RISC-V, SFENCE.VMA specifies an
253bf587caaSShihPo Hung 		 * ordering constraint, not a cache flush; it is
254bf587caaSShihPo Hung 		 * necessary even after writing invalid entries.
255bf587caaSShihPo Hung 		 */
256bf587caaSShihPo Hung 		local_flush_tlb_page(addr);
257bf587caaSShihPo Hung 
25807037db5SPalmer Dabbelt 		return;
25907037db5SPalmer Dabbelt 	}
26007037db5SPalmer Dabbelt }
261