1 /* 2 * This file is subject to the terms and conditions of the GNU General Public 3 * License. See the file "COPYING" in the main directory of this archive 4 * for more details. 5 * 6 * Copyright (C) 1995 - 2000 by Ralf Baechle 7 */ 8 #include <linux/context_tracking.h> 9 #include <linux/signal.h> 10 #include <linux/sched.h> 11 #include <linux/interrupt.h> 12 #include <linux/kernel.h> 13 #include <linux/errno.h> 14 #include <linux/string.h> 15 #include <linux/types.h> 16 #include <linux/ptrace.h> 17 #include <linux/mman.h> 18 #include <linux/mm.h> 19 #include <linux/smp.h> 20 #include <linux/module.h> 21 #include <linux/kprobes.h> 22 #include <linux/perf_event.h> 23 24 #include <asm/branch.h> 25 #include <asm/mmu_context.h> 26 #include <asm/uaccess.h> 27 #include <asm/ptrace.h> 28 #include <asm/highmem.h> /* For VMALLOC_END */ 29 #include <linux/kdebug.h> 30 31 /* 32 * This routine handles page faults. It determines the address, 33 * and the problem, and then passes it off to one of the appropriate 34 * routines. 35 */ 36 static void __kprobes __do_page_fault(struct pt_regs *regs, unsigned long write, 37 unsigned long address) 38 { 39 struct vm_area_struct * vma = NULL; 40 struct task_struct *tsk = current; 41 struct mm_struct *mm = tsk->mm; 42 const int field = sizeof(unsigned long) * 2; 43 siginfo_t info; 44 int fault; 45 unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | 46 (write ? FAULT_FLAG_WRITE : 0); 47 48 #if 0 49 printk("Cpu%d[%s:%d:%0*lx:%ld:%0*lx]\n", raw_smp_processor_id(), 50 current->comm, current->pid, field, address, write, 51 field, regs->cp0_epc); 52 #endif 53 54 #ifdef CONFIG_KPROBES 55 /* 56 * This is to notify the fault handler of the kprobes. The 57 * exception code is redundant as it is also carried in REGS, 58 * but we pass it anyhow. 59 */ 60 if (notify_die(DIE_PAGE_FAULT, "page fault", regs, -1, 61 (regs->cp0_cause >> 2) & 0x1f, SIGSEGV) == NOTIFY_STOP) 62 return; 63 #endif 64 65 info.si_code = SEGV_MAPERR; 66 67 /* 68 * We fault-in kernel-space virtual memory on-demand. The 69 * 'reference' page table is init_mm.pgd. 70 * 71 * NOTE! We MUST NOT take any locks for this case. We may 72 * be in an interrupt or a critical region, and should 73 * only copy the information from the master page table, 74 * nothing more. 75 */ 76 #ifdef CONFIG_64BIT 77 # define VMALLOC_FAULT_TARGET no_context 78 #else 79 # define VMALLOC_FAULT_TARGET vmalloc_fault 80 #endif 81 82 if (unlikely(address >= VMALLOC_START && address <= VMALLOC_END)) 83 goto VMALLOC_FAULT_TARGET; 84 #ifdef MODULE_START 85 if (unlikely(address >= MODULE_START && address < MODULE_END)) 86 goto VMALLOC_FAULT_TARGET; 87 #endif 88 89 /* 90 * If we're in an interrupt or have no user 91 * context, we must not take the fault.. 92 */ 93 if (in_atomic() || !mm) 94 goto bad_area_nosemaphore; 95 96 retry: 97 down_read(&mm->mmap_sem); 98 vma = find_vma(mm, address); 99 if (!vma) 100 goto bad_area; 101 if (vma->vm_start <= address) 102 goto good_area; 103 if (!(vma->vm_flags & VM_GROWSDOWN)) 104 goto bad_area; 105 if (expand_stack(vma, address)) 106 goto bad_area; 107 /* 108 * Ok, we have a good vm_area for this memory access, so 109 * we can handle it.. 110 */ 111 good_area: 112 info.si_code = SEGV_ACCERR; 113 114 if (write) { 115 if (!(vma->vm_flags & VM_WRITE)) 116 goto bad_area; 117 } else { 118 if (cpu_has_rixi) { 119 if (address == regs->cp0_epc && !(vma->vm_flags & VM_EXEC)) { 120 #if 0 121 pr_notice("Cpu%d[%s:%d:%0*lx:%ld:%0*lx] XI violation\n", 122 raw_smp_processor_id(), 123 current->comm, current->pid, 124 field, address, write, 125 field, regs->cp0_epc); 126 #endif 127 goto bad_area; 128 } 129 if (!(vma->vm_flags & VM_READ)) { 130 #if 0 131 pr_notice("Cpu%d[%s:%d:%0*lx:%ld:%0*lx] RI violation\n", 132 raw_smp_processor_id(), 133 current->comm, current->pid, 134 field, address, write, 135 field, regs->cp0_epc); 136 #endif 137 goto bad_area; 138 } 139 } else { 140 if (!(vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC))) 141 goto bad_area; 142 } 143 } 144 145 /* 146 * If for any reason at all we couldn't handle the fault, 147 * make sure we exit gracefully rather than endlessly redo 148 * the fault. 149 */ 150 fault = handle_mm_fault(mm, vma, address, flags); 151 152 if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) 153 return; 154 155 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); 156 if (unlikely(fault & VM_FAULT_ERROR)) { 157 if (fault & VM_FAULT_OOM) 158 goto out_of_memory; 159 else if (fault & VM_FAULT_SIGBUS) 160 goto do_sigbus; 161 BUG(); 162 } 163 if (flags & FAULT_FLAG_ALLOW_RETRY) { 164 if (fault & VM_FAULT_MAJOR) { 165 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 166 regs, address); 167 tsk->maj_flt++; 168 } else { 169 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 170 regs, address); 171 tsk->min_flt++; 172 } 173 if (fault & VM_FAULT_RETRY) { 174 flags &= ~FAULT_FLAG_ALLOW_RETRY; 175 flags |= FAULT_FLAG_TRIED; 176 177 /* 178 * No need to up_read(&mm->mmap_sem) as we would 179 * have already released it in __lock_page_or_retry 180 * in mm/filemap.c. 181 */ 182 183 goto retry; 184 } 185 } 186 187 up_read(&mm->mmap_sem); 188 return; 189 190 /* 191 * Something tried to access memory that isn't in our memory map.. 192 * Fix it, but check if it's kernel or user first.. 193 */ 194 bad_area: 195 up_read(&mm->mmap_sem); 196 197 bad_area_nosemaphore: 198 /* User mode accesses just cause a SIGSEGV */ 199 if (user_mode(regs)) { 200 tsk->thread.cp0_badvaddr = address; 201 tsk->thread.error_code = write; 202 #if 0 203 printk("do_page_fault() #2: sending SIGSEGV to %s for " 204 "invalid %s\n%0*lx (epc == %0*lx, ra == %0*lx)\n", 205 tsk->comm, 206 write ? "write access to" : "read access from", 207 field, address, 208 field, (unsigned long) regs->cp0_epc, 209 field, (unsigned long) regs->regs[31]); 210 #endif 211 info.si_signo = SIGSEGV; 212 info.si_errno = 0; 213 /* info.si_code has been set above */ 214 info.si_addr = (void __user *) address; 215 force_sig_info(SIGSEGV, &info, tsk); 216 return; 217 } 218 219 no_context: 220 /* Are we prepared to handle this kernel fault? */ 221 if (fixup_exception(regs)) { 222 current->thread.cp0_baduaddr = address; 223 return; 224 } 225 226 /* 227 * Oops. The kernel tried to access some bad page. We'll have to 228 * terminate things with extreme prejudice. 229 */ 230 bust_spinlocks(1); 231 232 printk(KERN_ALERT "CPU %d Unable to handle kernel paging request at " 233 "virtual address %0*lx, epc == %0*lx, ra == %0*lx\n", 234 raw_smp_processor_id(), field, address, field, regs->cp0_epc, 235 field, regs->regs[31]); 236 die("Oops", regs); 237 238 out_of_memory: 239 /* 240 * We ran out of memory, call the OOM killer, and return the userspace 241 * (which will retry the fault, or kill us if we got oom-killed). 242 */ 243 up_read(&mm->mmap_sem); 244 pagefault_out_of_memory(); 245 return; 246 247 do_sigbus: 248 up_read(&mm->mmap_sem); 249 250 /* Kernel mode? Handle exceptions or die */ 251 if (!user_mode(regs)) 252 goto no_context; 253 else 254 /* 255 * Send a sigbus, regardless of whether we were in kernel 256 * or user mode. 257 */ 258 #if 0 259 printk("do_page_fault() #3: sending SIGBUS to %s for " 260 "invalid %s\n%0*lx (epc == %0*lx, ra == %0*lx)\n", 261 tsk->comm, 262 write ? "write access to" : "read access from", 263 field, address, 264 field, (unsigned long) regs->cp0_epc, 265 field, (unsigned long) regs->regs[31]); 266 #endif 267 tsk->thread.cp0_badvaddr = address; 268 info.si_signo = SIGBUS; 269 info.si_errno = 0; 270 info.si_code = BUS_ADRERR; 271 info.si_addr = (void __user *) address; 272 force_sig_info(SIGBUS, &info, tsk); 273 274 return; 275 #ifndef CONFIG_64BIT 276 vmalloc_fault: 277 { 278 /* 279 * Synchronize this task's top level page-table 280 * with the 'reference' page table. 281 * 282 * Do _not_ use "tsk" here. We might be inside 283 * an interrupt in the middle of a task switch.. 284 */ 285 int offset = __pgd_offset(address); 286 pgd_t *pgd, *pgd_k; 287 pud_t *pud, *pud_k; 288 pmd_t *pmd, *pmd_k; 289 pte_t *pte_k; 290 291 pgd = (pgd_t *) pgd_current[raw_smp_processor_id()] + offset; 292 pgd_k = init_mm.pgd + offset; 293 294 if (!pgd_present(*pgd_k)) 295 goto no_context; 296 set_pgd(pgd, *pgd_k); 297 298 pud = pud_offset(pgd, address); 299 pud_k = pud_offset(pgd_k, address); 300 if (!pud_present(*pud_k)) 301 goto no_context; 302 303 pmd = pmd_offset(pud, address); 304 pmd_k = pmd_offset(pud_k, address); 305 if (!pmd_present(*pmd_k)) 306 goto no_context; 307 set_pmd(pmd, *pmd_k); 308 309 pte_k = pte_offset_kernel(pmd_k, address); 310 if (!pte_present(*pte_k)) 311 goto no_context; 312 return; 313 } 314 #endif 315 } 316 317 asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, 318 unsigned long write, unsigned long address) 319 { 320 enum ctx_state prev_state; 321 322 prev_state = exception_enter(); 323 __do_page_fault(regs, write, address); 324 exception_exit(prev_state); 325 } 326