xref: /linux/arch/riscv/mm/fault.c (revision 48dea9a700c8728cc31a1dd44588b97578de86ee)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright (C) 2009 Sunplus Core Technology Co., Ltd.
4  *  Lennox Wu <lennox.wu@sunplusct.com>
5  *  Chen Liqin <liqin.chen@sunplusct.com>
6  * Copyright (C) 2012 Regents of the University of California
7  */
8 
9 
10 #include <linux/mm.h>
11 #include <linux/kernel.h>
12 #include <linux/interrupt.h>
13 #include <linux/perf_event.h>
14 #include <linux/signal.h>
15 #include <linux/uaccess.h>
16 
17 #include <asm/ptrace.h>
18 #include <asm/tlbflush.h>
19 
20 #include "../kernel/head.h"
21 
22 /*
23  * This routine handles page faults.  It determines the address and the
24  * problem, and then passes it off to one of the appropriate routines.
25  */
26 asmlinkage void do_page_fault(struct pt_regs *regs)
27 {
28 	struct task_struct *tsk;
29 	struct vm_area_struct *vma;
30 	struct mm_struct *mm;
31 	unsigned long addr, cause;
32 	unsigned int flags = FAULT_FLAG_DEFAULT;
33 	int code = SEGV_MAPERR;
34 	vm_fault_t fault;
35 
36 	cause = regs->cause;
37 	addr = regs->badaddr;
38 
39 	tsk = current;
40 	mm = tsk->mm;
41 
42 	/*
43 	 * Fault-in kernel-space virtual memory on-demand.
44 	 * The 'reference' page table is init_mm.pgd.
45 	 *
46 	 * NOTE! We MUST NOT take any locks for this case. We may
47 	 * be in an interrupt or a critical region, and should
48 	 * only copy the information from the master page table,
49 	 * nothing more.
50 	 */
51 	if (unlikely((addr >= VMALLOC_START) && (addr <= VMALLOC_END)))
52 		goto vmalloc_fault;
53 
54 	/* Enable interrupts if they were enabled in the parent context. */
55 	if (likely(regs->status & SR_PIE))
56 		local_irq_enable();
57 
58 	/*
59 	 * If we're in an interrupt, have no user context, or are running
60 	 * in an atomic region, then we must not take the fault.
61 	 */
62 	if (unlikely(faulthandler_disabled() || !mm))
63 		goto no_context;
64 
65 	if (user_mode(regs))
66 		flags |= FAULT_FLAG_USER;
67 
68 	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
69 
70 retry:
71 	mmap_read_lock(mm);
72 	vma = find_vma(mm, addr);
73 	if (unlikely(!vma))
74 		goto bad_area;
75 	if (likely(vma->vm_start <= addr))
76 		goto good_area;
77 	if (unlikely(!(vma->vm_flags & VM_GROWSDOWN)))
78 		goto bad_area;
79 	if (unlikely(expand_stack(vma, addr)))
80 		goto bad_area;
81 
82 	/*
83 	 * Ok, we have a good vm_area for this memory access, so
84 	 * we can handle it.
85 	 */
86 good_area:
87 	code = SEGV_ACCERR;
88 
89 	switch (cause) {
90 	case EXC_INST_PAGE_FAULT:
91 		if (!(vma->vm_flags & VM_EXEC))
92 			goto bad_area;
93 		break;
94 	case EXC_LOAD_PAGE_FAULT:
95 		if (!(vma->vm_flags & VM_READ))
96 			goto bad_area;
97 		break;
98 	case EXC_STORE_PAGE_FAULT:
99 		if (!(vma->vm_flags & VM_WRITE))
100 			goto bad_area;
101 		flags |= FAULT_FLAG_WRITE;
102 		break;
103 	default:
104 		panic("%s: unhandled cause %lu", __func__, cause);
105 	}
106 
107 	/*
108 	 * If for any reason at all we could not handle the fault,
109 	 * make sure we exit gracefully rather than endlessly redo
110 	 * the fault.
111 	 */
112 	fault = handle_mm_fault(vma, addr, flags, regs);
113 
114 	/*
115 	 * If we need to retry but a fatal signal is pending, handle the
116 	 * signal first. We do not need to release the mmap_lock because it
117 	 * would already be released in __lock_page_or_retry in mm/filemap.c.
118 	 */
119 	if (fault_signal_pending(fault, regs))
120 		return;
121 
122 	if (unlikely(fault & VM_FAULT_ERROR)) {
123 		if (fault & VM_FAULT_OOM)
124 			goto out_of_memory;
125 		else if (fault & VM_FAULT_SIGBUS)
126 			goto do_sigbus;
127 		BUG();
128 	}
129 
130 	if (flags & FAULT_FLAG_ALLOW_RETRY) {
131 		if (fault & VM_FAULT_RETRY) {
132 			flags |= FAULT_FLAG_TRIED;
133 
134 			/*
135 			 * No need to mmap_read_unlock(mm) as we would
136 			 * have already released it in __lock_page_or_retry
137 			 * in mm/filemap.c.
138 			 */
139 			goto retry;
140 		}
141 	}
142 
143 	mmap_read_unlock(mm);
144 	return;
145 
146 	/*
147 	 * Something tried to access memory that isn't in our memory map.
148 	 * Fix it, but check if it's kernel or user first.
149 	 */
150 bad_area:
151 	mmap_read_unlock(mm);
152 	/* User mode accesses just cause a SIGSEGV */
153 	if (user_mode(regs)) {
154 		do_trap(regs, SIGSEGV, code, addr);
155 		return;
156 	}
157 
158 no_context:
159 	/* Are we prepared to handle this kernel fault? */
160 	if (fixup_exception(regs))
161 		return;
162 
163 	/*
164 	 * Oops. The kernel tried to access some bad page. We'll have to
165 	 * terminate things with extreme prejudice.
166 	 */
167 	bust_spinlocks(1);
168 	pr_alert("Unable to handle kernel %s at virtual address " REG_FMT "\n",
169 		(addr < PAGE_SIZE) ? "NULL pointer dereference" :
170 		"paging request", addr);
171 	die(regs, "Oops");
172 	do_exit(SIGKILL);
173 
174 	/*
175 	 * We ran out of memory, call the OOM killer, and return the userspace
176 	 * (which will retry the fault, or kill us if we got oom-killed).
177 	 */
178 out_of_memory:
179 	mmap_read_unlock(mm);
180 	if (!user_mode(regs))
181 		goto no_context;
182 	pagefault_out_of_memory();
183 	return;
184 
185 do_sigbus:
186 	mmap_read_unlock(mm);
187 	/* Kernel mode? Handle exceptions or die */
188 	if (!user_mode(regs))
189 		goto no_context;
190 	do_trap(regs, SIGBUS, BUS_ADRERR, addr);
191 	return;
192 
193 vmalloc_fault:
194 	{
195 		pgd_t *pgd, *pgd_k;
196 		pud_t *pud, *pud_k;
197 		p4d_t *p4d, *p4d_k;
198 		pmd_t *pmd, *pmd_k;
199 		pte_t *pte_k;
200 		int index;
201 
202 		/* User mode accesses just cause a SIGSEGV */
203 		if (user_mode(regs))
204 			return do_trap(regs, SIGSEGV, code, addr);
205 
206 		/*
207 		 * Synchronize this task's top level page-table
208 		 * with the 'reference' page table.
209 		 *
210 		 * Do _not_ use "tsk->active_mm->pgd" here.
211 		 * We might be inside an interrupt in the middle
212 		 * of a task switch.
213 		 */
214 		index = pgd_index(addr);
215 		pgd = (pgd_t *)pfn_to_virt(csr_read(CSR_SATP)) + index;
216 		pgd_k = init_mm.pgd + index;
217 
218 		if (!pgd_present(*pgd_k))
219 			goto no_context;
220 		set_pgd(pgd, *pgd_k);
221 
222 		p4d = p4d_offset(pgd, addr);
223 		p4d_k = p4d_offset(pgd_k, addr);
224 		if (!p4d_present(*p4d_k))
225 			goto no_context;
226 
227 		pud = pud_offset(p4d, addr);
228 		pud_k = pud_offset(p4d_k, addr);
229 		if (!pud_present(*pud_k))
230 			goto no_context;
231 
232 		/*
233 		 * Since the vmalloc area is global, it is unnecessary
234 		 * to copy individual PTEs
235 		 */
236 		pmd = pmd_offset(pud, addr);
237 		pmd_k = pmd_offset(pud_k, addr);
238 		if (!pmd_present(*pmd_k))
239 			goto no_context;
240 		set_pmd(pmd, *pmd_k);
241 
242 		/*
243 		 * Make sure the actual PTE exists as well to
244 		 * catch kernel vmalloc-area accesses to non-mapped
245 		 * addresses. If we don't do this, this will just
246 		 * silently loop forever.
247 		 */
248 		pte_k = pte_offset_kernel(pmd_k, addr);
249 		if (!pte_present(*pte_k))
250 			goto no_context;
251 
252 		/*
253 		 * The kernel assumes that TLBs don't cache invalid
254 		 * entries, but in RISC-V, SFENCE.VMA specifies an
255 		 * ordering constraint, not a cache flush; it is
256 		 * necessary even after writing invalid entries.
257 		 */
258 		local_flush_tlb_page(addr);
259 
260 		return;
261 	}
262 }
263