xref: /linux/arch/riscv/mm/fault.c (revision 21855cac82d3264aa660deafa9c26b8eef548b7a)
1588cb88cSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later
207037db5SPalmer Dabbelt /*
307037db5SPalmer Dabbelt  * Copyright (C) 2009 Sunplus Core Technology Co., Ltd.
407037db5SPalmer Dabbelt  *  Lennox Wu <lennox.wu@sunplusct.com>
507037db5SPalmer Dabbelt  *  Chen Liqin <liqin.chen@sunplusct.com>
607037db5SPalmer Dabbelt  * Copyright (C) 2012 Regents of the University of California
707037db5SPalmer Dabbelt  */
807037db5SPalmer Dabbelt 
907037db5SPalmer Dabbelt 
1007037db5SPalmer Dabbelt #include <linux/mm.h>
1107037db5SPalmer Dabbelt #include <linux/kernel.h>
1207037db5SPalmer Dabbelt #include <linux/interrupt.h>
1307037db5SPalmer Dabbelt #include <linux/perf_event.h>
1407037db5SPalmer Dabbelt #include <linux/signal.h>
1507037db5SPalmer Dabbelt #include <linux/uaccess.h>
1607037db5SPalmer Dabbelt 
1707037db5SPalmer Dabbelt #include <asm/ptrace.h>
18bf587caaSShihPo Hung #include <asm/tlbflush.h>
1907037db5SPalmer Dabbelt 
20ffaee272SPaul Walmsley #include "../kernel/head.h"
21ffaee272SPaul Walmsley 
2221733cb5SEric Lin static void die_kernel_fault(const char *msg, unsigned long addr,
2321733cb5SEric Lin 		struct pt_regs *regs)
2421733cb5SEric Lin {
2521733cb5SEric Lin 	bust_spinlocks(1);
2621733cb5SEric Lin 
2721733cb5SEric Lin 	pr_alert("Unable to handle kernel %s at virtual address " REG_FMT "\n", msg,
2821733cb5SEric Lin 		addr);
2921733cb5SEric Lin 
3021733cb5SEric Lin 	bust_spinlocks(0);
3121733cb5SEric Lin 	die(regs, "Oops");
3221733cb5SEric Lin 	do_exit(SIGKILL);
3321733cb5SEric Lin }
3421733cb5SEric Lin 
35cac4d1dcSPekka Enberg static inline void no_context(struct pt_regs *regs, unsigned long addr)
36cac4d1dcSPekka Enberg {
3721733cb5SEric Lin 	const char *msg;
3821733cb5SEric Lin 
39cac4d1dcSPekka Enberg 	/* Are we prepared to handle this kernel fault? */
40cac4d1dcSPekka Enberg 	if (fixup_exception(regs))
41cac4d1dcSPekka Enberg 		return;
42cac4d1dcSPekka Enberg 
43cac4d1dcSPekka Enberg 	/*
44cac4d1dcSPekka Enberg 	 * Oops. The kernel tried to access some bad page. We'll have to
45cac4d1dcSPekka Enberg 	 * terminate things with extreme prejudice.
46cac4d1dcSPekka Enberg 	 */
4721733cb5SEric Lin 	msg = (addr < PAGE_SIZE) ? "NULL pointer dereference" : "paging request";
4821733cb5SEric Lin 	die_kernel_fault(msg, addr, regs);
49cac4d1dcSPekka Enberg }
50cac4d1dcSPekka Enberg 
516c11ffbfSPekka Enberg static inline void mm_fault_error(struct pt_regs *regs, unsigned long addr, vm_fault_t fault)
526c11ffbfSPekka Enberg {
537a75f3d4SPekka Enberg 	if (fault & VM_FAULT_OOM) {
546c11ffbfSPekka Enberg 		/*
556c11ffbfSPekka Enberg 		 * We ran out of memory, call the OOM killer, and return the userspace
566c11ffbfSPekka Enberg 		 * (which will retry the fault, or kill us if we got oom-killed).
576c11ffbfSPekka Enberg 		 */
586c11ffbfSPekka Enberg 		if (!user_mode(regs)) {
596c11ffbfSPekka Enberg 			no_context(regs, addr);
606c11ffbfSPekka Enberg 			return;
616c11ffbfSPekka Enberg 		}
626c11ffbfSPekka Enberg 		pagefault_out_of_memory();
636c11ffbfSPekka Enberg 		return;
647a75f3d4SPekka Enberg 	} else if (fault & VM_FAULT_SIGBUS) {
656c11ffbfSPekka Enberg 		/* Kernel mode? Handle exceptions or die */
666c11ffbfSPekka Enberg 		if (!user_mode(regs)) {
676c11ffbfSPekka Enberg 			no_context(regs, addr);
686c11ffbfSPekka Enberg 			return;
696c11ffbfSPekka Enberg 		}
706c11ffbfSPekka Enberg 		do_trap(regs, SIGBUS, BUS_ADRERR, addr);
716c11ffbfSPekka Enberg 		return;
726c11ffbfSPekka Enberg 	}
737a75f3d4SPekka Enberg 	BUG();
747a75f3d4SPekka Enberg }
756c11ffbfSPekka Enberg 
76a51271d9SPekka Enberg static inline void bad_area(struct pt_regs *regs, struct mm_struct *mm, int code, unsigned long addr)
77a51271d9SPekka Enberg {
78a51271d9SPekka Enberg 	/*
79a51271d9SPekka Enberg 	 * Something tried to access memory that isn't in our memory map.
80a51271d9SPekka Enberg 	 * Fix it, but check if it's kernel or user first.
81a51271d9SPekka Enberg 	 */
82a51271d9SPekka Enberg 	mmap_read_unlock(mm);
83a51271d9SPekka Enberg 	/* User mode accesses just cause a SIGSEGV */
84a51271d9SPekka Enberg 	if (user_mode(regs)) {
85a51271d9SPekka Enberg 		do_trap(regs, SIGSEGV, code, addr);
86a51271d9SPekka Enberg 		return;
87a51271d9SPekka Enberg 	}
88a51271d9SPekka Enberg 
89a51271d9SPekka Enberg 	no_context(regs, addr);
90a51271d9SPekka Enberg }
91a51271d9SPekka Enberg 
922baa6d95SPekka Enberg static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long addr)
93ac416a72SPekka Enberg {
94ac416a72SPekka Enberg 	pgd_t *pgd, *pgd_k;
95ac416a72SPekka Enberg 	pud_t *pud, *pud_k;
96ac416a72SPekka Enberg 	p4d_t *p4d, *p4d_k;
97ac416a72SPekka Enberg 	pmd_t *pmd, *pmd_k;
98ac416a72SPekka Enberg 	pte_t *pte_k;
99ac416a72SPekka Enberg 	int index;
100bcacf5f6SLiu Shaohua 	unsigned long pfn;
101ac416a72SPekka Enberg 
102ac416a72SPekka Enberg 	/* User mode accesses just cause a SIGSEGV */
103ac416a72SPekka Enberg 	if (user_mode(regs))
104ac416a72SPekka Enberg 		return do_trap(regs, SIGSEGV, code, addr);
105ac416a72SPekka Enberg 
106ac416a72SPekka Enberg 	/*
107ac416a72SPekka Enberg 	 * Synchronize this task's top level page-table
108ac416a72SPekka Enberg 	 * with the 'reference' page table.
109ac416a72SPekka Enberg 	 *
110ac416a72SPekka Enberg 	 * Do _not_ use "tsk->active_mm->pgd" here.
111ac416a72SPekka Enberg 	 * We might be inside an interrupt in the middle
112ac416a72SPekka Enberg 	 * of a task switch.
113ac416a72SPekka Enberg 	 */
114ac416a72SPekka Enberg 	index = pgd_index(addr);
115bcacf5f6SLiu Shaohua 	pfn = csr_read(CSR_SATP) & SATP_PPN;
116bcacf5f6SLiu Shaohua 	pgd = (pgd_t *)pfn_to_virt(pfn) + index;
117ac416a72SPekka Enberg 	pgd_k = init_mm.pgd + index;
118ac416a72SPekka Enberg 
119ac416a72SPekka Enberg 	if (!pgd_present(*pgd_k)) {
120ac416a72SPekka Enberg 		no_context(regs, addr);
121ac416a72SPekka Enberg 		return;
122ac416a72SPekka Enberg 	}
123ac416a72SPekka Enberg 	set_pgd(pgd, *pgd_k);
124ac416a72SPekka Enberg 
125ac416a72SPekka Enberg 	p4d = p4d_offset(pgd, addr);
126ac416a72SPekka Enberg 	p4d_k = p4d_offset(pgd_k, addr);
127ac416a72SPekka Enberg 	if (!p4d_present(*p4d_k)) {
128ac416a72SPekka Enberg 		no_context(regs, addr);
129ac416a72SPekka Enberg 		return;
130ac416a72SPekka Enberg 	}
131ac416a72SPekka Enberg 
132ac416a72SPekka Enberg 	pud = pud_offset(p4d, addr);
133ac416a72SPekka Enberg 	pud_k = pud_offset(p4d_k, addr);
134ac416a72SPekka Enberg 	if (!pud_present(*pud_k)) {
135ac416a72SPekka Enberg 		no_context(regs, addr);
136ac416a72SPekka Enberg 		return;
137ac416a72SPekka Enberg 	}
138ac416a72SPekka Enberg 
139ac416a72SPekka Enberg 	/*
140ac416a72SPekka Enberg 	 * Since the vmalloc area is global, it is unnecessary
141ac416a72SPekka Enberg 	 * to copy individual PTEs
142ac416a72SPekka Enberg 	 */
143ac416a72SPekka Enberg 	pmd = pmd_offset(pud, addr);
144ac416a72SPekka Enberg 	pmd_k = pmd_offset(pud_k, addr);
145ac416a72SPekka Enberg 	if (!pmd_present(*pmd_k)) {
146ac416a72SPekka Enberg 		no_context(regs, addr);
147ac416a72SPekka Enberg 		return;
148ac416a72SPekka Enberg 	}
149ac416a72SPekka Enberg 	set_pmd(pmd, *pmd_k);
150ac416a72SPekka Enberg 
151ac416a72SPekka Enberg 	/*
152ac416a72SPekka Enberg 	 * Make sure the actual PTE exists as well to
153ac416a72SPekka Enberg 	 * catch kernel vmalloc-area accesses to non-mapped
154ac416a72SPekka Enberg 	 * addresses. If we don't do this, this will just
155ac416a72SPekka Enberg 	 * silently loop forever.
156ac416a72SPekka Enberg 	 */
157ac416a72SPekka Enberg 	pte_k = pte_offset_kernel(pmd_k, addr);
158ac416a72SPekka Enberg 	if (!pte_present(*pte_k)) {
159ac416a72SPekka Enberg 		no_context(regs, addr);
160ac416a72SPekka Enberg 		return;
161ac416a72SPekka Enberg 	}
162ac416a72SPekka Enberg 
163ac416a72SPekka Enberg 	/*
164ac416a72SPekka Enberg 	 * The kernel assumes that TLBs don't cache invalid
165ac416a72SPekka Enberg 	 * entries, but in RISC-V, SFENCE.VMA specifies an
166ac416a72SPekka Enberg 	 * ordering constraint, not a cache flush; it is
167ac416a72SPekka Enberg 	 * necessary even after writing invalid entries.
168ac416a72SPekka Enberg 	 */
169ac416a72SPekka Enberg 	local_flush_tlb_page(addr);
170ac416a72SPekka Enberg }
171ac416a72SPekka Enberg 
172afb8c6feSPekka Enberg static inline bool access_error(unsigned long cause, struct vm_area_struct *vma)
173afb8c6feSPekka Enberg {
174afb8c6feSPekka Enberg 	switch (cause) {
175afb8c6feSPekka Enberg 	case EXC_INST_PAGE_FAULT:
176afb8c6feSPekka Enberg 		if (!(vma->vm_flags & VM_EXEC)) {
177afb8c6feSPekka Enberg 			return true;
178afb8c6feSPekka Enberg 		}
179afb8c6feSPekka Enberg 		break;
180afb8c6feSPekka Enberg 	case EXC_LOAD_PAGE_FAULT:
181afb8c6feSPekka Enberg 		if (!(vma->vm_flags & VM_READ)) {
182afb8c6feSPekka Enberg 			return true;
183afb8c6feSPekka Enberg 		}
184afb8c6feSPekka Enberg 		break;
185afb8c6feSPekka Enberg 	case EXC_STORE_PAGE_FAULT:
186afb8c6feSPekka Enberg 		if (!(vma->vm_flags & VM_WRITE)) {
187afb8c6feSPekka Enberg 			return true;
188afb8c6feSPekka Enberg 		}
189afb8c6feSPekka Enberg 		break;
190afb8c6feSPekka Enberg 	default:
191afb8c6feSPekka Enberg 		panic("%s: unhandled cause %lu", __func__, cause);
192afb8c6feSPekka Enberg 	}
193afb8c6feSPekka Enberg 	return false;
194afb8c6feSPekka Enberg }
195afb8c6feSPekka Enberg 
19607037db5SPalmer Dabbelt /*
19707037db5SPalmer Dabbelt  * This routine handles page faults.  It determines the address and the
19807037db5SPalmer Dabbelt  * problem, and then passes it off to one of the appropriate routines.
19907037db5SPalmer Dabbelt  */
20007037db5SPalmer Dabbelt asmlinkage void do_page_fault(struct pt_regs *regs)
20107037db5SPalmer Dabbelt {
20207037db5SPalmer Dabbelt 	struct task_struct *tsk;
20307037db5SPalmer Dabbelt 	struct vm_area_struct *vma;
20407037db5SPalmer Dabbelt 	struct mm_struct *mm;
20507037db5SPalmer Dabbelt 	unsigned long addr, cause;
206dde16072SPeter Xu 	unsigned int flags = FAULT_FLAG_DEFAULT;
20750a7ca3cSSouptick Joarder 	int code = SEGV_MAPERR;
20850a7ca3cSSouptick Joarder 	vm_fault_t fault;
20907037db5SPalmer Dabbelt 
210a4c3733dSChristoph Hellwig 	cause = regs->cause;
211a4c3733dSChristoph Hellwig 	addr = regs->badaddr;
21207037db5SPalmer Dabbelt 
21307037db5SPalmer Dabbelt 	tsk = current;
21407037db5SPalmer Dabbelt 	mm = tsk->mm;
21507037db5SPalmer Dabbelt 
21607037db5SPalmer Dabbelt 	/*
21707037db5SPalmer Dabbelt 	 * Fault-in kernel-space virtual memory on-demand.
21807037db5SPalmer Dabbelt 	 * The 'reference' page table is init_mm.pgd.
21907037db5SPalmer Dabbelt 	 *
22007037db5SPalmer Dabbelt 	 * NOTE! We MUST NOT take any locks for this case. We may
22107037db5SPalmer Dabbelt 	 * be in an interrupt or a critical region, and should
22207037db5SPalmer Dabbelt 	 * only copy the information from the master page table,
22307037db5SPalmer Dabbelt 	 * nothing more.
22407037db5SPalmer Dabbelt 	 */
225ac416a72SPekka Enberg 	if (unlikely((addr >= VMALLOC_START) && (addr <= VMALLOC_END))) {
226ac416a72SPekka Enberg 		vmalloc_fault(regs, code, addr);
227ac416a72SPekka Enberg 		return;
228ac416a72SPekka Enberg 	}
22907037db5SPalmer Dabbelt 
23007037db5SPalmer Dabbelt 	/* Enable interrupts if they were enabled in the parent context. */
231a4c3733dSChristoph Hellwig 	if (likely(regs->status & SR_PIE))
23207037db5SPalmer Dabbelt 		local_irq_enable();
23307037db5SPalmer Dabbelt 
23407037db5SPalmer Dabbelt 	/*
23507037db5SPalmer Dabbelt 	 * If we're in an interrupt, have no user context, or are running
23607037db5SPalmer Dabbelt 	 * in an atomic region, then we must not take the fault.
23707037db5SPalmer Dabbelt 	 */
238cac4d1dcSPekka Enberg 	if (unlikely(faulthandler_disabled() || !mm)) {
239cac4d1dcSPekka Enberg 		no_context(regs, addr);
240cac4d1dcSPekka Enberg 		return;
241cac4d1dcSPekka Enberg 	}
24207037db5SPalmer Dabbelt 
24307037db5SPalmer Dabbelt 	if (user_mode(regs))
24407037db5SPalmer Dabbelt 		flags |= FAULT_FLAG_USER;
24507037db5SPalmer Dabbelt 
246*21855cacSEric Lin 	if (!user_mode(regs) && addr < TASK_SIZE &&
247*21855cacSEric Lin 			unlikely(!(regs->status & SR_SUM)))
248*21855cacSEric Lin 		die_kernel_fault("access to user memory without uaccess routines",
249*21855cacSEric Lin 				addr, regs);
250*21855cacSEric Lin 
25107037db5SPalmer Dabbelt 	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
25207037db5SPalmer Dabbelt 
25367474301SPekka Enberg 	if (cause == EXC_STORE_PAGE_FAULT)
25467474301SPekka Enberg 		flags |= FAULT_FLAG_WRITE;
255a960c132SPekka Enberg 	else if (cause == EXC_INST_PAGE_FAULT)
256a960c132SPekka Enberg 		flags |= FAULT_FLAG_INSTRUCTION;
25707037db5SPalmer Dabbelt retry:
258d8ed45c5SMichel Lespinasse 	mmap_read_lock(mm);
25907037db5SPalmer Dabbelt 	vma = find_vma(mm, addr);
260a51271d9SPekka Enberg 	if (unlikely(!vma)) {
261a51271d9SPekka Enberg 		bad_area(regs, mm, code, addr);
262a51271d9SPekka Enberg 		return;
263a51271d9SPekka Enberg 	}
26407037db5SPalmer Dabbelt 	if (likely(vma->vm_start <= addr))
26507037db5SPalmer Dabbelt 		goto good_area;
266a51271d9SPekka Enberg 	if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) {
267a51271d9SPekka Enberg 		bad_area(regs, mm, code, addr);
268a51271d9SPekka Enberg 		return;
269a51271d9SPekka Enberg 	}
270a51271d9SPekka Enberg 	if (unlikely(expand_stack(vma, addr))) {
271a51271d9SPekka Enberg 		bad_area(regs, mm, code, addr);
272a51271d9SPekka Enberg 		return;
273a51271d9SPekka Enberg 	}
27407037db5SPalmer Dabbelt 
27507037db5SPalmer Dabbelt 	/*
27607037db5SPalmer Dabbelt 	 * Ok, we have a good vm_area for this memory access, so
27707037db5SPalmer Dabbelt 	 * we can handle it.
27807037db5SPalmer Dabbelt 	 */
27907037db5SPalmer Dabbelt good_area:
28007037db5SPalmer Dabbelt 	code = SEGV_ACCERR;
28107037db5SPalmer Dabbelt 
282afb8c6feSPekka Enberg 	if (unlikely(access_error(cause, vma))) {
283a51271d9SPekka Enberg 		bad_area(regs, mm, code, addr);
284a51271d9SPekka Enberg 		return;
285a51271d9SPekka Enberg 	}
28607037db5SPalmer Dabbelt 
28707037db5SPalmer Dabbelt 	/*
28807037db5SPalmer Dabbelt 	 * If for any reason at all we could not handle the fault,
28907037db5SPalmer Dabbelt 	 * make sure we exit gracefully rather than endlessly redo
29007037db5SPalmer Dabbelt 	 * the fault.
29107037db5SPalmer Dabbelt 	 */
2925ac365a4SPeter Xu 	fault = handle_mm_fault(vma, addr, flags, regs);
29307037db5SPalmer Dabbelt 
29407037db5SPalmer Dabbelt 	/*
29507037db5SPalmer Dabbelt 	 * If we need to retry but a fatal signal is pending, handle the
296c1e8d7c6SMichel Lespinasse 	 * signal first. We do not need to release the mmap_lock because it
29707037db5SPalmer Dabbelt 	 * would already be released in __lock_page_or_retry in mm/filemap.c.
29807037db5SPalmer Dabbelt 	 */
2994ef87322SPeter Xu 	if (fault_signal_pending(fault, regs))
30007037db5SPalmer Dabbelt 		return;
30107037db5SPalmer Dabbelt 
30243632871SPekka Enberg 	if (unlikely((fault & VM_FAULT_RETRY) && (flags & FAULT_FLAG_ALLOW_RETRY))) {
30307037db5SPalmer Dabbelt 		flags |= FAULT_FLAG_TRIED;
30407037db5SPalmer Dabbelt 
30507037db5SPalmer Dabbelt 		/*
3063e4e28c5SMichel Lespinasse 		 * No need to mmap_read_unlock(mm) as we would
30707037db5SPalmer Dabbelt 		 * have already released it in __lock_page_or_retry
30807037db5SPalmer Dabbelt 		 * in mm/filemap.c.
30907037db5SPalmer Dabbelt 		 */
31007037db5SPalmer Dabbelt 		goto retry;
31107037db5SPalmer Dabbelt 	}
31207037db5SPalmer Dabbelt 
313d8ed45c5SMichel Lespinasse 	mmap_read_unlock(mm);
314bda281d5SPekka Enberg 
315bda281d5SPekka Enberg 	if (unlikely(fault & VM_FAULT_ERROR)) {
3166c11ffbfSPekka Enberg 		mm_fault_error(regs, addr, fault);
317cac4d1dcSPekka Enberg 		return;
318cac4d1dcSPekka Enberg 	}
31907037db5SPalmer Dabbelt 	return;
32007037db5SPalmer Dabbelt }
321