1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2020-2022 Loongson Technology Corporation Limited 4 * 5 * Derived from MIPS: 6 * Copyright (C) 1995 - 2000 by Ralf Baechle 7 */ 8 #include <linux/context_tracking.h> 9 #include <linux/signal.h> 10 #include <linux/sched.h> 11 #include <linux/interrupt.h> 12 #include <linux/kernel.h> 13 #include <linux/entry-common.h> 14 #include <linux/errno.h> 15 #include <linux/string.h> 16 #include <linux/types.h> 17 #include <linux/ptrace.h> 18 #include <linux/ratelimit.h> 19 #include <linux/mman.h> 20 #include <linux/mm.h> 21 #include <linux/smp.h> 22 #include <linux/kdebug.h> 23 #include <linux/perf_event.h> 24 #include <linux/uaccess.h> 25 #include <linux/kfence.h> 26 27 #include <asm/branch.h> 28 #include <asm/exception.h> 29 #include <asm/mmu_context.h> 30 #include <asm/ptrace.h> 31 32 int show_unhandled_signals = 1; 33 34 static int __kprobes spurious_fault(unsigned long write, unsigned long address) 35 { 36 pgd_t *pgd; 37 p4d_t *p4d; 38 pud_t *pud; 39 pmd_t *pmd; 40 pte_t *pte; 41 42 if (!(address & __UA_LIMIT)) 43 return 0; 44 45 pgd = pgd_offset_k(address); 46 if (!pgd_present(pgdp_get(pgd))) 47 return 0; 48 49 p4d = p4d_offset(pgd, address); 50 if (!p4d_present(p4dp_get(p4d))) 51 return 0; 52 53 pud = pud_offset(p4d, address); 54 if (!pud_present(pudp_get(pud))) 55 return 0; 56 57 pmd = pmd_offset(pud, address); 58 if (!pmd_present(pmdp_get(pmd))) 59 return 0; 60 61 if (pmd_leaf(*pmd)) { 62 return write ? pmd_write(pmdp_get(pmd)) : 1; 63 } else { 64 pte = pte_offset_kernel(pmd, address); 65 if (!pte_present(ptep_get(pte))) 66 return 0; 67 68 return write ? pte_write(ptep_get(pte)) : 1; 69 } 70 } 71 72 static void __kprobes no_context(struct pt_regs *regs, 73 unsigned long write, unsigned long address) 74 { 75 const int field = sizeof(unsigned long) * 2; 76 77 if (spurious_fault(write, address)) 78 return; 79 80 /* Are we prepared to handle this kernel fault? */ 81 if (fixup_exception(regs)) 82 return; 83 84 if (kfence_handle_page_fault(address, write, regs)) 85 return; 86 87 /* 88 * Oops. The kernel tried to access some bad page. We'll have to 89 * terminate things with extreme prejudice. 90 */ 91 bust_spinlocks(1); 92 93 pr_alert("CPU %d Unable to handle kernel paging request at " 94 "virtual address %0*lx, era == %0*lx, ra == %0*lx\n", 95 raw_smp_processor_id(), field, address, field, regs->csr_era, 96 field, regs->regs[1]); 97 die("Oops", regs); 98 } 99 100 static void __kprobes do_out_of_memory(struct pt_regs *regs, 101 unsigned long write, unsigned long address) 102 { 103 /* 104 * We ran out of memory, call the OOM killer, and return the userspace 105 * (which will retry the fault, or kill us if we got oom-killed). 106 */ 107 if (!user_mode(regs)) { 108 no_context(regs, write, address); 109 return; 110 } 111 pagefault_out_of_memory(); 112 } 113 114 static void __kprobes do_sigbus(struct pt_regs *regs, 115 unsigned long write, unsigned long address, int si_code) 116 { 117 /* Kernel mode? Handle exceptions or die */ 118 if (!user_mode(regs)) { 119 no_context(regs, write, address); 120 return; 121 } 122 123 /* 124 * Send a sigbus, regardless of whether we were in kernel 125 * or user mode. 126 */ 127 current->thread.csr_badvaddr = address; 128 current->thread.trap_nr = read_csr_excode(); 129 force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address); 130 } 131 132 static void __kprobes do_sigsegv(struct pt_regs *regs, 133 unsigned long write, unsigned long address, int si_code) 134 { 135 const int field = sizeof(unsigned long) * 2; 136 static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 10); 137 138 /* Kernel mode? Handle exceptions or die */ 139 if (!user_mode(regs)) { 140 no_context(regs, write, address); 141 return; 142 } 143 144 /* User mode accesses just cause a SIGSEGV */ 145 current->thread.csr_badvaddr = address; 146 if (!write) 147 current->thread.error_code = 1; 148 else 149 current->thread.error_code = 2; 150 current->thread.trap_nr = read_csr_excode(); 151 152 if (show_unhandled_signals && 153 unhandled_signal(current, SIGSEGV) && __ratelimit(&ratelimit_state)) { 154 pr_info("do_page_fault(): sending SIGSEGV to %s for invalid %s %0*lx\n", 155 current->comm, 156 write ? "write access to" : "read access from", 157 field, address); 158 pr_info("era = %0*lx in", field, 159 (unsigned long) regs->csr_era); 160 print_vma_addr(KERN_CONT " ", regs->csr_era); 161 pr_cont("\n"); 162 pr_info("ra = %0*lx in", field, 163 (unsigned long) regs->regs[1]); 164 print_vma_addr(KERN_CONT " ", regs->regs[1]); 165 pr_cont("\n"); 166 } 167 force_sig_fault(SIGSEGV, si_code, (void __user *)address); 168 } 169 170 /* 171 * This routine handles page faults. It determines the address, 172 * and the problem, and then passes it off to one of the appropriate 173 * routines. 174 */ 175 static void __kprobes __do_page_fault(struct pt_regs *regs, 176 unsigned long write, unsigned long address) 177 { 178 int si_code = SEGV_MAPERR; 179 unsigned int flags = FAULT_FLAG_DEFAULT; 180 struct task_struct *tsk = current; 181 struct mm_struct *mm = tsk->mm; 182 struct vm_area_struct *vma = NULL; 183 vm_fault_t fault; 184 185 if (kprobe_page_fault(regs, current->thread.trap_nr)) 186 return; 187 188 /* 189 * We fault-in kernel-space virtual memory on-demand. The 190 * 'reference' page table is init_mm.pgd. 191 * 192 * NOTE! We MUST NOT take any locks for this case. We may 193 * be in an interrupt or a critical region, and should 194 * only copy the information from the master page table, 195 * nothing more. 196 */ 197 if (address & __UA_LIMIT) { 198 if (!user_mode(regs)) 199 no_context(regs, write, address); 200 else 201 do_sigsegv(regs, write, address, si_code); 202 return; 203 } 204 205 /* 206 * If we're in an interrupt or have no user 207 * context, we must not take the fault.. 208 */ 209 if (faulthandler_disabled() || !mm) { 210 do_sigsegv(regs, write, address, si_code); 211 return; 212 } 213 214 if (user_mode(regs)) 215 flags |= FAULT_FLAG_USER; 216 217 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); 218 retry: 219 vma = lock_mm_and_find_vma(mm, address, regs); 220 if (unlikely(!vma)) 221 goto bad_area_nosemaphore; 222 goto good_area; 223 224 /* 225 * Something tried to access memory that isn't in our memory map.. 226 * Fix it, but check if it's kernel or user first.. 227 */ 228 bad_area: 229 mmap_read_unlock(mm); 230 bad_area_nosemaphore: 231 do_sigsegv(regs, write, address, si_code); 232 return; 233 234 /* 235 * Ok, we have a good vm_area for this memory access, so 236 * we can handle it.. 237 */ 238 good_area: 239 si_code = SEGV_ACCERR; 240 241 if (write) { 242 flags |= FAULT_FLAG_WRITE; 243 if (!(vma->vm_flags & VM_WRITE)) 244 goto bad_area; 245 } else { 246 if (!(vma->vm_flags & VM_EXEC) && address == exception_era(regs)) 247 goto bad_area; 248 if (!(vma->vm_flags & (VM_READ | VM_WRITE)) && address != exception_era(regs)) 249 goto bad_area; 250 } 251 252 /* 253 * If for any reason at all we couldn't handle the fault, 254 * make sure we exit gracefully rather than endlessly redo 255 * the fault. 256 */ 257 fault = handle_mm_fault(vma, address, flags, regs); 258 259 if (fault_signal_pending(fault, regs)) { 260 if (!user_mode(regs)) 261 no_context(regs, write, address); 262 return; 263 } 264 265 /* The fault is fully completed (including releasing mmap lock) */ 266 if (fault & VM_FAULT_COMPLETED) 267 return; 268 269 if (unlikely(fault & VM_FAULT_RETRY)) { 270 flags |= FAULT_FLAG_TRIED; 271 272 /* 273 * No need to mmap_read_unlock(mm) as we would 274 * have already released it in __lock_page_or_retry 275 * in mm/filemap.c. 276 */ 277 goto retry; 278 } 279 if (unlikely(fault & VM_FAULT_ERROR)) { 280 mmap_read_unlock(mm); 281 if (fault & VM_FAULT_OOM) { 282 do_out_of_memory(regs, write, address); 283 return; 284 } else if (fault & VM_FAULT_SIGSEGV) { 285 do_sigsegv(regs, write, address, si_code); 286 return; 287 } else if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE)) { 288 do_sigbus(regs, write, address, si_code); 289 return; 290 } 291 BUG(); 292 } 293 294 mmap_read_unlock(mm); 295 } 296 297 asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, 298 unsigned long write, unsigned long address) 299 { 300 irqentry_state_t state = irqentry_enter(regs); 301 302 /* Enable interrupt if enabled in parent context */ 303 if (likely(regs->csr_prmd & CSR_PRMD_PIE)) 304 local_irq_enable(); 305 306 __do_page_fault(regs, write, address); 307 308 local_irq_disable(); 309 310 irqentry_exit(regs, state); 311 } 312