1 /* 2 * linux/arch/arm/mm/fault.c 3 * 4 * Copyright (C) 1995 Linus Torvalds 5 * Modifications for ARM processor (c) 1995-2004 Russell King 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License version 2 as 9 * published by the Free Software Foundation. 10 */ 11 #include <linux/module.h> 12 #include <linux/signal.h> 13 #include <linux/mm.h> 14 #include <linux/hardirq.h> 15 #include <linux/init.h> 16 #include <linux/kprobes.h> 17 #include <linux/uaccess.h> 18 #include <linux/page-flags.h> 19 #include <linux/sched.h> 20 #include <linux/highmem.h> 21 #include <linux/perf_event.h> 22 23 #include <asm/exception.h> 24 #include <asm/system.h> 25 #include <asm/pgtable.h> 26 #include <asm/tlbflush.h> 27 28 #include "fault.h" 29 30 #ifdef CONFIG_MMU 31 32 #ifdef CONFIG_KPROBES 33 static inline int notify_page_fault(struct pt_regs *regs, unsigned int fsr) 34 { 35 int ret = 0; 36 37 if (!user_mode(regs)) { 38 /* kprobe_running() needs smp_processor_id() */ 39 preempt_disable(); 40 if (kprobe_running() && kprobe_fault_handler(regs, fsr)) 41 ret = 1; 42 preempt_enable(); 43 } 44 45 return ret; 46 } 47 #else 48 static inline int notify_page_fault(struct pt_regs *regs, unsigned int fsr) 49 { 50 return 0; 51 } 52 #endif 53 54 /* 55 * This is useful to dump out the page tables associated with 56 * 'addr' in mm 'mm'. 57 */ 58 void show_pte(struct mm_struct *mm, unsigned long addr) 59 { 60 pgd_t *pgd; 61 62 if (!mm) 63 mm = &init_mm; 64 65 printk(KERN_ALERT "pgd = %p\n", mm->pgd); 66 pgd = pgd_offset(mm, addr); 67 printk(KERN_ALERT "[%08lx] *pgd=%08llx", 68 addr, (long long)pgd_val(*pgd)); 69 70 do { 71 pud_t *pud; 72 pmd_t *pmd; 73 pte_t *pte; 74 75 if (pgd_none(*pgd)) 76 break; 77 78 if (pgd_bad(*pgd)) { 79 printk("(bad)"); 80 break; 81 } 82 83 pud = pud_offset(pgd, addr); 84 if (PTRS_PER_PUD != 1) 85 printk(", *pud=%08llx", (long long)pud_val(*pud)); 86 87 if (pud_none(*pud)) 88 break; 89 90 if (pud_bad(*pud)) { 91 printk("(bad)"); 92 break; 93 } 94 95 pmd = pmd_offset(pud, addr); 96 if (PTRS_PER_PMD != 1) 97 printk(", *pmd=%08llx", (long long)pmd_val(*pmd)); 98 99 if (pmd_none(*pmd)) 100 break; 101 102 if (pmd_bad(*pmd)) { 103 printk("(bad)"); 104 break; 105 } 106 107 /* We must not map this if we have highmem enabled */ 108 if (PageHighMem(pfn_to_page(pmd_val(*pmd) >> PAGE_SHIFT))) 109 break; 110 111 pte = pte_offset_map(pmd, addr); 112 printk(", *pte=%08llx", (long long)pte_val(*pte)); 113 #ifndef CONFIG_ARM_LPAE 114 printk(", *ppte=%08llx", 115 (long long)pte_val(pte[PTE_HWTABLE_PTRS])); 116 #endif 117 pte_unmap(pte); 118 } while(0); 119 120 printk("\n"); 121 } 122 #else /* CONFIG_MMU */ 123 void show_pte(struct mm_struct *mm, unsigned long addr) 124 { } 125 #endif /* CONFIG_MMU */ 126 127 /* 128 * Oops. The kernel tried to access some page that wasn't present. 129 */ 130 static void 131 __do_kernel_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr, 132 struct pt_regs *regs) 133 { 134 /* 135 * Are we prepared to handle this kernel fault? 136 */ 137 if (fixup_exception(regs)) 138 return; 139 140 /* 141 * No handler, we'll have to terminate things with extreme prejudice. 142 */ 143 bust_spinlocks(1); 144 printk(KERN_ALERT 145 "Unable to handle kernel %s at virtual address %08lx\n", 146 (addr < PAGE_SIZE) ? "NULL pointer dereference" : 147 "paging request", addr); 148 149 show_pte(mm, addr); 150 die("Oops", regs, fsr); 151 bust_spinlocks(0); 152 do_exit(SIGKILL); 153 } 154 155 /* 156 * Something tried to access memory that isn't in our memory map.. 157 * User mode accesses just cause a SIGSEGV 158 */ 159 static void 160 __do_user_fault(struct task_struct *tsk, unsigned long addr, 161 unsigned int fsr, unsigned int sig, int code, 162 struct pt_regs *regs) 163 { 164 struct siginfo si; 165 166 #ifdef CONFIG_DEBUG_USER 167 if (user_debug & UDBG_SEGV) { 168 printk(KERN_DEBUG "%s: unhandled page fault (%d) at 0x%08lx, code 0x%03x\n", 169 tsk->comm, sig, addr, fsr); 170 show_pte(tsk->mm, addr); 171 show_regs(regs); 172 } 173 #endif 174 175 tsk->thread.address = addr; 176 tsk->thread.error_code = fsr; 177 tsk->thread.trap_no = 14; 178 si.si_signo = sig; 179 si.si_errno = 0; 180 si.si_code = code; 181 si.si_addr = (void __user *)addr; 182 force_sig_info(sig, &si, tsk); 183 } 184 185 void do_bad_area(unsigned long addr, unsigned int fsr, struct pt_regs *regs) 186 { 187 struct task_struct *tsk = current; 188 struct mm_struct *mm = tsk->active_mm; 189 190 /* 191 * If we are in kernel mode at this point, we 192 * have no context to handle this fault with. 193 */ 194 if (user_mode(regs)) 195 __do_user_fault(tsk, addr, fsr, SIGSEGV, SEGV_MAPERR, regs); 196 else 197 __do_kernel_fault(mm, addr, fsr, regs); 198 } 199 200 #ifdef CONFIG_MMU 201 #define VM_FAULT_BADMAP 0x010000 202 #define VM_FAULT_BADACCESS 0x020000 203 204 /* 205 * Check that the permissions on the VMA allow for the fault which occurred. 206 * If we encountered a write fault, we must have write permission, otherwise 207 * we allow any permission. 208 */ 209 static inline bool access_error(unsigned int fsr, struct vm_area_struct *vma) 210 { 211 unsigned int mask = VM_READ | VM_WRITE | VM_EXEC; 212 213 if (fsr & FSR_WRITE) 214 mask = VM_WRITE; 215 if (fsr & FSR_LNX_PF) 216 mask = VM_EXEC; 217 218 return vma->vm_flags & mask ? false : true; 219 } 220 221 static int __kprobes 222 __do_page_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr, 223 unsigned int flags, struct task_struct *tsk) 224 { 225 struct vm_area_struct *vma; 226 int fault; 227 228 vma = find_vma(mm, addr); 229 fault = VM_FAULT_BADMAP; 230 if (unlikely(!vma)) 231 goto out; 232 if (unlikely(vma->vm_start > addr)) 233 goto check_stack; 234 235 /* 236 * Ok, we have a good vm_area for this 237 * memory access, so we can handle it. 238 */ 239 good_area: 240 if (access_error(fsr, vma)) { 241 fault = VM_FAULT_BADACCESS; 242 goto out; 243 } 244 245 return handle_mm_fault(mm, vma, addr & PAGE_MASK, flags); 246 247 check_stack: 248 if (vma->vm_flags & VM_GROWSDOWN && !expand_stack(vma, addr)) 249 goto good_area; 250 out: 251 return fault; 252 } 253 254 static int __kprobes 255 do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) 256 { 257 struct task_struct *tsk; 258 struct mm_struct *mm; 259 int fault, sig, code; 260 int write = fsr & FSR_WRITE; 261 unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | 262 (write ? FAULT_FLAG_WRITE : 0); 263 264 if (notify_page_fault(regs, fsr)) 265 return 0; 266 267 tsk = current; 268 mm = tsk->mm; 269 270 /* Enable interrupts if they were enabled in the parent context. */ 271 if (interrupts_enabled(regs)) 272 local_irq_enable(); 273 274 /* 275 * If we're in an interrupt or have no user 276 * context, we must not take the fault.. 277 */ 278 if (in_atomic() || !mm) 279 goto no_context; 280 281 /* 282 * As per x86, we may deadlock here. However, since the kernel only 283 * validly references user space from well defined areas of the code, 284 * we can bug out early if this is from code which shouldn't. 285 */ 286 if (!down_read_trylock(&mm->mmap_sem)) { 287 if (!user_mode(regs) && !search_exception_tables(regs->ARM_pc)) 288 goto no_context; 289 retry: 290 down_read(&mm->mmap_sem); 291 } else { 292 /* 293 * The above down_read_trylock() might have succeeded in 294 * which case, we'll have missed the might_sleep() from 295 * down_read() 296 */ 297 might_sleep(); 298 #ifdef CONFIG_DEBUG_VM 299 if (!user_mode(regs) && 300 !search_exception_tables(regs->ARM_pc)) 301 goto no_context; 302 #endif 303 } 304 305 fault = __do_page_fault(mm, addr, fsr, flags, tsk); 306 307 /* If we need to retry but a fatal signal is pending, handle the 308 * signal first. We do not need to release the mmap_sem because 309 * it would already be released in __lock_page_or_retry in 310 * mm/filemap.c. */ 311 if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) 312 return 0; 313 314 /* 315 * Major/minor page fault accounting is only done on the 316 * initial attempt. If we go through a retry, it is extremely 317 * likely that the page will be found in page cache at that point. 318 */ 319 320 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); 321 if (flags & FAULT_FLAG_ALLOW_RETRY) { 322 if (fault & VM_FAULT_MAJOR) { 323 tsk->maj_flt++; 324 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 325 regs, addr); 326 } else { 327 tsk->min_flt++; 328 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 329 regs, addr); 330 } 331 if (fault & VM_FAULT_RETRY) { 332 /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk 333 * of starvation. */ 334 flags &= ~FAULT_FLAG_ALLOW_RETRY; 335 goto retry; 336 } 337 } 338 339 up_read(&mm->mmap_sem); 340 341 /* 342 * Handle the "normal" case first - VM_FAULT_MAJOR / VM_FAULT_MINOR 343 */ 344 if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP | VM_FAULT_BADACCESS)))) 345 return 0; 346 347 if (fault & VM_FAULT_OOM) { 348 /* 349 * We ran out of memory, call the OOM killer, and return to 350 * userspace (which will retry the fault, or kill us if we 351 * got oom-killed) 352 */ 353 pagefault_out_of_memory(); 354 return 0; 355 } 356 357 /* 358 * If we are in kernel mode at this point, we 359 * have no context to handle this fault with. 360 */ 361 if (!user_mode(regs)) 362 goto no_context; 363 364 if (fault & VM_FAULT_SIGBUS) { 365 /* 366 * We had some memory, but were unable to 367 * successfully fix up this page fault. 368 */ 369 sig = SIGBUS; 370 code = BUS_ADRERR; 371 } else { 372 /* 373 * Something tried to access memory that 374 * isn't in our memory map.. 375 */ 376 sig = SIGSEGV; 377 code = fault == VM_FAULT_BADACCESS ? 378 SEGV_ACCERR : SEGV_MAPERR; 379 } 380 381 __do_user_fault(tsk, addr, fsr, sig, code, regs); 382 return 0; 383 384 no_context: 385 __do_kernel_fault(mm, addr, fsr, regs); 386 return 0; 387 } 388 #else /* CONFIG_MMU */ 389 static int 390 do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) 391 { 392 return 0; 393 } 394 #endif /* CONFIG_MMU */ 395 396 /* 397 * First Level Translation Fault Handler 398 * 399 * We enter here because the first level page table doesn't contain 400 * a valid entry for the address. 401 * 402 * If the address is in kernel space (>= TASK_SIZE), then we are 403 * probably faulting in the vmalloc() area. 404 * 405 * If the init_task's first level page tables contains the relevant 406 * entry, we copy the it to this task. If not, we send the process 407 * a signal, fixup the exception, or oops the kernel. 408 * 409 * NOTE! We MUST NOT take any locks for this case. We may be in an 410 * interrupt or a critical region, and should only copy the information 411 * from the master page table, nothing more. 412 */ 413 #ifdef CONFIG_MMU 414 static int __kprobes 415 do_translation_fault(unsigned long addr, unsigned int fsr, 416 struct pt_regs *regs) 417 { 418 unsigned int index; 419 pgd_t *pgd, *pgd_k; 420 pud_t *pud, *pud_k; 421 pmd_t *pmd, *pmd_k; 422 423 if (addr < TASK_SIZE) 424 return do_page_fault(addr, fsr, regs); 425 426 if (user_mode(regs)) 427 goto bad_area; 428 429 index = pgd_index(addr); 430 431 /* 432 * FIXME: CP15 C1 is write only on ARMv3 architectures. 433 */ 434 pgd = cpu_get_pgd() + index; 435 pgd_k = init_mm.pgd + index; 436 437 if (pgd_none(*pgd_k)) 438 goto bad_area; 439 if (!pgd_present(*pgd)) 440 set_pgd(pgd, *pgd_k); 441 442 pud = pud_offset(pgd, addr); 443 pud_k = pud_offset(pgd_k, addr); 444 445 if (pud_none(*pud_k)) 446 goto bad_area; 447 if (!pud_present(*pud)) 448 set_pud(pud, *pud_k); 449 450 pmd = pmd_offset(pud, addr); 451 pmd_k = pmd_offset(pud_k, addr); 452 453 #ifdef CONFIG_ARM_LPAE 454 /* 455 * Only one hardware entry per PMD with LPAE. 456 */ 457 index = 0; 458 #else 459 /* 460 * On ARM one Linux PGD entry contains two hardware entries (see page 461 * tables layout in pgtable.h). We normally guarantee that we always 462 * fill both L1 entries. But create_mapping() doesn't follow the rule. 463 * It can create inidividual L1 entries, so here we have to call 464 * pmd_none() check for the entry really corresponded to address, not 465 * for the first of pair. 466 */ 467 index = (addr >> SECTION_SHIFT) & 1; 468 #endif 469 if (pmd_none(pmd_k[index])) 470 goto bad_area; 471 472 copy_pmd(pmd, pmd_k); 473 return 0; 474 475 bad_area: 476 do_bad_area(addr, fsr, regs); 477 return 0; 478 } 479 #else /* CONFIG_MMU */ 480 static int 481 do_translation_fault(unsigned long addr, unsigned int fsr, 482 struct pt_regs *regs) 483 { 484 return 0; 485 } 486 #endif /* CONFIG_MMU */ 487 488 /* 489 * Some section permission faults need to be handled gracefully. 490 * They can happen due to a __{get,put}_user during an oops. 491 */ 492 static int 493 do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) 494 { 495 do_bad_area(addr, fsr, regs); 496 return 0; 497 } 498 499 /* 500 * This abort handler always returns "fault". 501 */ 502 static int 503 do_bad(unsigned long addr, unsigned int fsr, struct pt_regs *regs) 504 { 505 return 1; 506 } 507 508 struct fsr_info { 509 int (*fn)(unsigned long addr, unsigned int fsr, struct pt_regs *regs); 510 int sig; 511 int code; 512 const char *name; 513 }; 514 515 /* FSR definition */ 516 #ifdef CONFIG_ARM_LPAE 517 #include "fsr-3level.c" 518 #else 519 #include "fsr-2level.c" 520 #endif 521 522 void __init 523 hook_fault_code(int nr, int (*fn)(unsigned long, unsigned int, struct pt_regs *), 524 int sig, int code, const char *name) 525 { 526 if (nr < 0 || nr >= ARRAY_SIZE(fsr_info)) 527 BUG(); 528 529 fsr_info[nr].fn = fn; 530 fsr_info[nr].sig = sig; 531 fsr_info[nr].code = code; 532 fsr_info[nr].name = name; 533 } 534 535 /* 536 * Dispatch a data abort to the relevant handler. 537 */ 538 asmlinkage void __exception 539 do_DataAbort(unsigned long addr, unsigned int fsr, struct pt_regs *regs) 540 { 541 const struct fsr_info *inf = fsr_info + fsr_fs(fsr); 542 struct siginfo info; 543 544 if (!inf->fn(addr, fsr & ~FSR_LNX_PF, regs)) 545 return; 546 547 printk(KERN_ALERT "Unhandled fault: %s (0x%03x) at 0x%08lx\n", 548 inf->name, fsr, addr); 549 550 info.si_signo = inf->sig; 551 info.si_errno = 0; 552 info.si_code = inf->code; 553 info.si_addr = (void __user *)addr; 554 arm_notify_die("", regs, &info, fsr, 0); 555 } 556 557 void __init 558 hook_ifault_code(int nr, int (*fn)(unsigned long, unsigned int, struct pt_regs *), 559 int sig, int code, const char *name) 560 { 561 if (nr < 0 || nr >= ARRAY_SIZE(ifsr_info)) 562 BUG(); 563 564 ifsr_info[nr].fn = fn; 565 ifsr_info[nr].sig = sig; 566 ifsr_info[nr].code = code; 567 ifsr_info[nr].name = name; 568 } 569 570 asmlinkage void __exception 571 do_PrefetchAbort(unsigned long addr, unsigned int ifsr, struct pt_regs *regs) 572 { 573 const struct fsr_info *inf = ifsr_info + fsr_fs(ifsr); 574 struct siginfo info; 575 576 if (!inf->fn(addr, ifsr | FSR_LNX_PF, regs)) 577 return; 578 579 printk(KERN_ALERT "Unhandled prefetch abort: %s (0x%03x) at 0x%08lx\n", 580 inf->name, ifsr, addr); 581 582 info.si_signo = inf->sig; 583 info.si_errno = 0; 584 info.si_code = inf->code; 585 info.si_addr = (void __user *)addr; 586 arm_notify_die("", regs, &info, ifsr, 0); 587 } 588 589 #ifndef CONFIG_ARM_LPAE 590 static int __init exceptions_init(void) 591 { 592 if (cpu_architecture() >= CPU_ARCH_ARMv6) { 593 hook_fault_code(4, do_translation_fault, SIGSEGV, SEGV_MAPERR, 594 "I-cache maintenance fault"); 595 } 596 597 if (cpu_architecture() >= CPU_ARCH_ARMv7) { 598 /* 599 * TODO: Access flag faults introduced in ARMv6K. 600 * Runtime check for 'K' extension is needed 601 */ 602 hook_fault_code(3, do_bad, SIGSEGV, SEGV_MAPERR, 603 "section access flag fault"); 604 hook_fault_code(6, do_bad, SIGSEGV, SEGV_MAPERR, 605 "section access flag fault"); 606 } 607 608 return 0; 609 } 610 611 arch_initcall(exceptions_init); 612 #endif 613