1 /* 2 * User-space Probes (UProbes) 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write to the Free Software 16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 17 * 18 * Copyright (C) IBM Corporation, 2008-2012 19 * Authors: 20 * Srikar Dronamraju 21 * Jim Keniston 22 * Copyright (C) 2011-2012 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> 23 */ 24 25 #include <linux/kernel.h> 26 #include <linux/highmem.h> 27 #include <linux/pagemap.h> /* read_mapping_page */ 28 #include <linux/slab.h> 29 #include <linux/sched.h> 30 #include <linux/rmap.h> /* anon_vma_prepare */ 31 #include <linux/mmu_notifier.h> /* set_pte_at_notify */ 32 #include <linux/swap.h> /* try_to_free_swap */ 33 #include <linux/ptrace.h> /* user_enable_single_step */ 34 #include <linux/kdebug.h> /* notifier mechanism */ 35 #include "../../mm/internal.h" /* munlock_vma_page */ 36 37 #include <linux/uprobes.h> 38 39 #define UINSNS_PER_PAGE (PAGE_SIZE/UPROBE_XOL_SLOT_BYTES) 40 #define MAX_UPROBE_XOL_SLOTS UINSNS_PER_PAGE 41 42 static struct rb_root uprobes_tree = RB_ROOT; 43 44 static DEFINE_SPINLOCK(uprobes_treelock); /* serialize rbtree access */ 45 46 #define UPROBES_HASH_SZ 13 47 48 /* 49 * We need separate register/unregister and mmap/munmap lock hashes because 50 * of mmap_sem nesting. 51 * 52 * uprobe_register() needs to install probes on (potentially) all processes 53 * and thus needs to acquire multiple mmap_sems (consequtively, not 54 * concurrently), whereas uprobe_mmap() is called while holding mmap_sem 55 * for the particular process doing the mmap. 56 * 57 * uprobe_register()->register_for_each_vma() needs to drop/acquire mmap_sem 58 * because of lock order against i_mmap_mutex. This means there's a hole in 59 * the register vma iteration where a mmap() can happen. 60 * 61 * Thus uprobe_register() can race with uprobe_mmap() and we can try and 62 * install a probe where one is already installed. 63 */ 64 65 /* serialize (un)register */ 66 static struct mutex uprobes_mutex[UPROBES_HASH_SZ]; 67 68 #define uprobes_hash(v) (&uprobes_mutex[((unsigned long)(v)) % UPROBES_HASH_SZ]) 69 70 /* serialize uprobe->pending_list */ 71 static struct mutex uprobes_mmap_mutex[UPROBES_HASH_SZ]; 72 #define uprobes_mmap_hash(v) (&uprobes_mmap_mutex[((unsigned long)(v)) % UPROBES_HASH_SZ]) 73 74 /* 75 * uprobe_events allows us to skip the uprobe_mmap if there are no uprobe 76 * events active at this time. Probably a fine grained per inode count is 77 * better? 78 */ 79 static atomic_t uprobe_events = ATOMIC_INIT(0); 80 81 /* Have a copy of original instruction */ 82 #define UPROBE_COPY_INSN 0 83 /* Dont run handlers when first register/ last unregister in progress*/ 84 #define UPROBE_RUN_HANDLER 1 85 /* Can skip singlestep */ 86 #define UPROBE_SKIP_SSTEP 2 87 88 struct uprobe { 89 struct rb_node rb_node; /* node in the rb tree */ 90 atomic_t ref; 91 struct rw_semaphore consumer_rwsem; 92 struct mutex copy_mutex; /* TODO: kill me and UPROBE_COPY_INSN */ 93 struct list_head pending_list; 94 struct uprobe_consumer *consumers; 95 struct inode *inode; /* Also hold a ref to inode */ 96 loff_t offset; 97 unsigned long flags; 98 struct arch_uprobe arch; 99 }; 100 101 /* 102 * valid_vma: Verify if the specified vma is an executable vma 103 * Relax restrictions while unregistering: vm_flags might have 104 * changed after breakpoint was inserted. 105 * - is_register: indicates if we are in register context. 106 * - Return 1 if the specified virtual address is in an 107 * executable vma. 108 */ 109 static bool valid_vma(struct vm_area_struct *vma, bool is_register) 110 { 111 vm_flags_t flags = VM_HUGETLB | VM_MAYEXEC | VM_SHARED; 112 113 if (is_register) 114 flags |= VM_WRITE; 115 116 return vma->vm_file && (vma->vm_flags & flags) == VM_MAYEXEC; 117 } 118 119 static unsigned long offset_to_vaddr(struct vm_area_struct *vma, loff_t offset) 120 { 121 return vma->vm_start + offset - ((loff_t)vma->vm_pgoff << PAGE_SHIFT); 122 } 123 124 static loff_t vaddr_to_offset(struct vm_area_struct *vma, unsigned long vaddr) 125 { 126 return ((loff_t)vma->vm_pgoff << PAGE_SHIFT) + (vaddr - vma->vm_start); 127 } 128 129 /** 130 * __replace_page - replace page in vma by new page. 131 * based on replace_page in mm/ksm.c 132 * 133 * @vma: vma that holds the pte pointing to page 134 * @addr: address the old @page is mapped at 135 * @page: the cowed page we are replacing by kpage 136 * @kpage: the modified page we replace page by 137 * 138 * Returns 0 on success, -EFAULT on failure. 139 */ 140 static int __replace_page(struct vm_area_struct *vma, unsigned long addr, 141 struct page *page, struct page *kpage) 142 { 143 struct mm_struct *mm = vma->vm_mm; 144 spinlock_t *ptl; 145 pte_t *ptep; 146 int err; 147 /* For mmu_notifiers */ 148 const unsigned long mmun_start = addr; 149 const unsigned long mmun_end = addr + PAGE_SIZE; 150 151 /* For try_to_free_swap() and munlock_vma_page() below */ 152 lock_page(page); 153 154 mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); 155 err = -EAGAIN; 156 ptep = page_check_address(page, mm, addr, &ptl, 0); 157 if (!ptep) 158 goto unlock; 159 160 get_page(kpage); 161 page_add_new_anon_rmap(kpage, vma, addr); 162 163 if (!PageAnon(page)) { 164 dec_mm_counter(mm, MM_FILEPAGES); 165 inc_mm_counter(mm, MM_ANONPAGES); 166 } 167 168 flush_cache_page(vma, addr, pte_pfn(*ptep)); 169 ptep_clear_flush(vma, addr, ptep); 170 set_pte_at_notify(mm, addr, ptep, mk_pte(kpage, vma->vm_page_prot)); 171 172 page_remove_rmap(page); 173 if (!page_mapped(page)) 174 try_to_free_swap(page); 175 pte_unmap_unlock(ptep, ptl); 176 177 if (vma->vm_flags & VM_LOCKED) 178 munlock_vma_page(page); 179 put_page(page); 180 181 err = 0; 182 unlock: 183 mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); 184 unlock_page(page); 185 return err; 186 } 187 188 /** 189 * is_swbp_insn - check if instruction is breakpoint instruction. 190 * @insn: instruction to be checked. 191 * Default implementation of is_swbp_insn 192 * Returns true if @insn is a breakpoint instruction. 193 */ 194 bool __weak is_swbp_insn(uprobe_opcode_t *insn) 195 { 196 return *insn == UPROBE_SWBP_INSN; 197 } 198 199 static void copy_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t *opcode) 200 { 201 void *kaddr = kmap_atomic(page); 202 memcpy(opcode, kaddr + (vaddr & ~PAGE_MASK), UPROBE_SWBP_INSN_SIZE); 203 kunmap_atomic(kaddr); 204 } 205 206 static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t *new_opcode) 207 { 208 uprobe_opcode_t old_opcode; 209 bool is_swbp; 210 211 copy_opcode(page, vaddr, &old_opcode); 212 is_swbp = is_swbp_insn(&old_opcode); 213 214 if (is_swbp_insn(new_opcode)) { 215 if (is_swbp) /* register: already installed? */ 216 return 0; 217 } else { 218 if (!is_swbp) /* unregister: was it changed by us? */ 219 return 0; 220 } 221 222 return 1; 223 } 224 225 /* 226 * NOTE: 227 * Expect the breakpoint instruction to be the smallest size instruction for 228 * the architecture. If an arch has variable length instruction and the 229 * breakpoint instruction is not of the smallest length instruction 230 * supported by that architecture then we need to modify is_swbp_at_addr and 231 * write_opcode accordingly. This would never be a problem for archs that 232 * have fixed length instructions. 233 */ 234 235 /* 236 * write_opcode - write the opcode at a given virtual address. 237 * @mm: the probed process address space. 238 * @vaddr: the virtual address to store the opcode. 239 * @opcode: opcode to be written at @vaddr. 240 * 241 * Called with mm->mmap_sem held (for read and with a reference to 242 * mm). 243 * 244 * For mm @mm, write the opcode at @vaddr. 245 * Return 0 (success) or a negative errno. 246 */ 247 static int write_opcode(struct mm_struct *mm, unsigned long vaddr, 248 uprobe_opcode_t opcode) 249 { 250 struct page *old_page, *new_page; 251 void *vaddr_old, *vaddr_new; 252 struct vm_area_struct *vma; 253 int ret; 254 255 retry: 256 /* Read the page with vaddr into memory */ 257 ret = get_user_pages(NULL, mm, vaddr, 1, 0, 1, &old_page, &vma); 258 if (ret <= 0) 259 return ret; 260 261 ret = verify_opcode(old_page, vaddr, &opcode); 262 if (ret <= 0) 263 goto put_old; 264 265 ret = -ENOMEM; 266 new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vaddr); 267 if (!new_page) 268 goto put_old; 269 270 __SetPageUptodate(new_page); 271 272 /* copy the page now that we've got it stable */ 273 vaddr_old = kmap_atomic(old_page); 274 vaddr_new = kmap_atomic(new_page); 275 276 memcpy(vaddr_new, vaddr_old, PAGE_SIZE); 277 memcpy(vaddr_new + (vaddr & ~PAGE_MASK), &opcode, UPROBE_SWBP_INSN_SIZE); 278 279 kunmap_atomic(vaddr_new); 280 kunmap_atomic(vaddr_old); 281 282 ret = anon_vma_prepare(vma); 283 if (ret) 284 goto put_new; 285 286 ret = __replace_page(vma, vaddr, old_page, new_page); 287 288 put_new: 289 page_cache_release(new_page); 290 put_old: 291 put_page(old_page); 292 293 if (unlikely(ret == -EAGAIN)) 294 goto retry; 295 return ret; 296 } 297 298 /** 299 * set_swbp - store breakpoint at a given address. 300 * @auprobe: arch specific probepoint information. 301 * @mm: the probed process address space. 302 * @vaddr: the virtual address to insert the opcode. 303 * 304 * For mm @mm, store the breakpoint instruction at @vaddr. 305 * Return 0 (success) or a negative errno. 306 */ 307 int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr) 308 { 309 return write_opcode(mm, vaddr, UPROBE_SWBP_INSN); 310 } 311 312 /** 313 * set_orig_insn - Restore the original instruction. 314 * @mm: the probed process address space. 315 * @auprobe: arch specific probepoint information. 316 * @vaddr: the virtual address to insert the opcode. 317 * 318 * For mm @mm, restore the original opcode (opcode) at @vaddr. 319 * Return 0 (success) or a negative errno. 320 */ 321 int __weak 322 set_orig_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr) 323 { 324 return write_opcode(mm, vaddr, *(uprobe_opcode_t *)auprobe->insn); 325 } 326 327 static int match_uprobe(struct uprobe *l, struct uprobe *r) 328 { 329 if (l->inode < r->inode) 330 return -1; 331 332 if (l->inode > r->inode) 333 return 1; 334 335 if (l->offset < r->offset) 336 return -1; 337 338 if (l->offset > r->offset) 339 return 1; 340 341 return 0; 342 } 343 344 static struct uprobe *__find_uprobe(struct inode *inode, loff_t offset) 345 { 346 struct uprobe u = { .inode = inode, .offset = offset }; 347 struct rb_node *n = uprobes_tree.rb_node; 348 struct uprobe *uprobe; 349 int match; 350 351 while (n) { 352 uprobe = rb_entry(n, struct uprobe, rb_node); 353 match = match_uprobe(&u, uprobe); 354 if (!match) { 355 atomic_inc(&uprobe->ref); 356 return uprobe; 357 } 358 359 if (match < 0) 360 n = n->rb_left; 361 else 362 n = n->rb_right; 363 } 364 return NULL; 365 } 366 367 /* 368 * Find a uprobe corresponding to a given inode:offset 369 * Acquires uprobes_treelock 370 */ 371 static struct uprobe *find_uprobe(struct inode *inode, loff_t offset) 372 { 373 struct uprobe *uprobe; 374 375 spin_lock(&uprobes_treelock); 376 uprobe = __find_uprobe(inode, offset); 377 spin_unlock(&uprobes_treelock); 378 379 return uprobe; 380 } 381 382 static struct uprobe *__insert_uprobe(struct uprobe *uprobe) 383 { 384 struct rb_node **p = &uprobes_tree.rb_node; 385 struct rb_node *parent = NULL; 386 struct uprobe *u; 387 int match; 388 389 while (*p) { 390 parent = *p; 391 u = rb_entry(parent, struct uprobe, rb_node); 392 match = match_uprobe(uprobe, u); 393 if (!match) { 394 atomic_inc(&u->ref); 395 return u; 396 } 397 398 if (match < 0) 399 p = &parent->rb_left; 400 else 401 p = &parent->rb_right; 402 403 } 404 405 u = NULL; 406 rb_link_node(&uprobe->rb_node, parent, p); 407 rb_insert_color(&uprobe->rb_node, &uprobes_tree); 408 /* get access + creation ref */ 409 atomic_set(&uprobe->ref, 2); 410 411 return u; 412 } 413 414 /* 415 * Acquire uprobes_treelock. 416 * Matching uprobe already exists in rbtree; 417 * increment (access refcount) and return the matching uprobe. 418 * 419 * No matching uprobe; insert the uprobe in rb_tree; 420 * get a double refcount (access + creation) and return NULL. 421 */ 422 static struct uprobe *insert_uprobe(struct uprobe *uprobe) 423 { 424 struct uprobe *u; 425 426 spin_lock(&uprobes_treelock); 427 u = __insert_uprobe(uprobe); 428 spin_unlock(&uprobes_treelock); 429 430 /* For now assume that the instruction need not be single-stepped */ 431 __set_bit(UPROBE_SKIP_SSTEP, &uprobe->flags); 432 433 return u; 434 } 435 436 static void put_uprobe(struct uprobe *uprobe) 437 { 438 if (atomic_dec_and_test(&uprobe->ref)) 439 kfree(uprobe); 440 } 441 442 static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset) 443 { 444 struct uprobe *uprobe, *cur_uprobe; 445 446 uprobe = kzalloc(sizeof(struct uprobe), GFP_KERNEL); 447 if (!uprobe) 448 return NULL; 449 450 uprobe->inode = igrab(inode); 451 uprobe->offset = offset; 452 init_rwsem(&uprobe->consumer_rwsem); 453 mutex_init(&uprobe->copy_mutex); 454 455 /* add to uprobes_tree, sorted on inode:offset */ 456 cur_uprobe = insert_uprobe(uprobe); 457 458 /* a uprobe exists for this inode:offset combination */ 459 if (cur_uprobe) { 460 kfree(uprobe); 461 uprobe = cur_uprobe; 462 iput(inode); 463 } else { 464 atomic_inc(&uprobe_events); 465 } 466 467 return uprobe; 468 } 469 470 static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs) 471 { 472 struct uprobe_consumer *uc; 473 474 if (!test_bit(UPROBE_RUN_HANDLER, &uprobe->flags)) 475 return; 476 477 down_read(&uprobe->consumer_rwsem); 478 for (uc = uprobe->consumers; uc; uc = uc->next) { 479 if (!uc->filter || uc->filter(uc, current)) 480 uc->handler(uc, regs); 481 } 482 up_read(&uprobe->consumer_rwsem); 483 } 484 485 /* Returns the previous consumer */ 486 static struct uprobe_consumer * 487 consumer_add(struct uprobe *uprobe, struct uprobe_consumer *uc) 488 { 489 down_write(&uprobe->consumer_rwsem); 490 uc->next = uprobe->consumers; 491 uprobe->consumers = uc; 492 up_write(&uprobe->consumer_rwsem); 493 494 return uc->next; 495 } 496 497 /* 498 * For uprobe @uprobe, delete the consumer @uc. 499 * Return true if the @uc is deleted successfully 500 * or return false. 501 */ 502 static bool consumer_del(struct uprobe *uprobe, struct uprobe_consumer *uc) 503 { 504 struct uprobe_consumer **con; 505 bool ret = false; 506 507 down_write(&uprobe->consumer_rwsem); 508 for (con = &uprobe->consumers; *con; con = &(*con)->next) { 509 if (*con == uc) { 510 *con = uc->next; 511 ret = true; 512 break; 513 } 514 } 515 up_write(&uprobe->consumer_rwsem); 516 517 return ret; 518 } 519 520 static int 521 __copy_insn(struct address_space *mapping, struct file *filp, char *insn, 522 unsigned long nbytes, loff_t offset) 523 { 524 struct page *page; 525 void *vaddr; 526 unsigned long off; 527 pgoff_t idx; 528 529 if (!filp) 530 return -EINVAL; 531 532 if (!mapping->a_ops->readpage) 533 return -EIO; 534 535 idx = offset >> PAGE_CACHE_SHIFT; 536 off = offset & ~PAGE_MASK; 537 538 /* 539 * Ensure that the page that has the original instruction is 540 * populated and in page-cache. 541 */ 542 page = read_mapping_page(mapping, idx, filp); 543 if (IS_ERR(page)) 544 return PTR_ERR(page); 545 546 vaddr = kmap_atomic(page); 547 memcpy(insn, vaddr + off, nbytes); 548 kunmap_atomic(vaddr); 549 page_cache_release(page); 550 551 return 0; 552 } 553 554 static int copy_insn(struct uprobe *uprobe, struct file *filp) 555 { 556 struct address_space *mapping; 557 unsigned long nbytes; 558 int bytes; 559 560 nbytes = PAGE_SIZE - (uprobe->offset & ~PAGE_MASK); 561 mapping = uprobe->inode->i_mapping; 562 563 /* Instruction at end of binary; copy only available bytes */ 564 if (uprobe->offset + MAX_UINSN_BYTES > uprobe->inode->i_size) 565 bytes = uprobe->inode->i_size - uprobe->offset; 566 else 567 bytes = MAX_UINSN_BYTES; 568 569 /* Instruction at the page-boundary; copy bytes in second page */ 570 if (nbytes < bytes) { 571 int err = __copy_insn(mapping, filp, uprobe->arch.insn + nbytes, 572 bytes - nbytes, uprobe->offset + nbytes); 573 if (err) 574 return err; 575 bytes = nbytes; 576 } 577 return __copy_insn(mapping, filp, uprobe->arch.insn, bytes, uprobe->offset); 578 } 579 580 static int prepare_uprobe(struct uprobe *uprobe, struct file *file, 581 struct mm_struct *mm, unsigned long vaddr) 582 { 583 int ret = 0; 584 585 if (test_bit(UPROBE_COPY_INSN, &uprobe->flags)) 586 return ret; 587 588 mutex_lock(&uprobe->copy_mutex); 589 if (test_bit(UPROBE_COPY_INSN, &uprobe->flags)) 590 goto out; 591 592 ret = copy_insn(uprobe, file); 593 if (ret) 594 goto out; 595 596 ret = -ENOTSUPP; 597 if (is_swbp_insn((uprobe_opcode_t *)uprobe->arch.insn)) 598 goto out; 599 600 ret = arch_uprobe_analyze_insn(&uprobe->arch, mm, vaddr); 601 if (ret) 602 goto out; 603 604 /* write_opcode() assumes we don't cross page boundary */ 605 BUG_ON((uprobe->offset & ~PAGE_MASK) + 606 UPROBE_SWBP_INSN_SIZE > PAGE_SIZE); 607 608 smp_wmb(); /* pairs with rmb() in find_active_uprobe() */ 609 set_bit(UPROBE_COPY_INSN, &uprobe->flags); 610 611 out: 612 mutex_unlock(&uprobe->copy_mutex); 613 614 return ret; 615 } 616 617 static int 618 install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, 619 struct vm_area_struct *vma, unsigned long vaddr) 620 { 621 bool first_uprobe; 622 int ret; 623 624 /* 625 * If probe is being deleted, unregister thread could be done with 626 * the vma-rmap-walk through. Adding a probe now can be fatal since 627 * nobody will be able to cleanup. Also we could be from fork or 628 * mremap path, where the probe might have already been inserted. 629 * Hence behave as if probe already existed. 630 */ 631 if (!uprobe->consumers) 632 return 0; 633 634 ret = prepare_uprobe(uprobe, vma->vm_file, mm, vaddr); 635 if (ret) 636 return ret; 637 638 /* 639 * set MMF_HAS_UPROBES in advance for uprobe_pre_sstep_notifier(), 640 * the task can hit this breakpoint right after __replace_page(). 641 */ 642 first_uprobe = !test_bit(MMF_HAS_UPROBES, &mm->flags); 643 if (first_uprobe) 644 set_bit(MMF_HAS_UPROBES, &mm->flags); 645 646 ret = set_swbp(&uprobe->arch, mm, vaddr); 647 if (!ret) 648 clear_bit(MMF_RECALC_UPROBES, &mm->flags); 649 else if (first_uprobe) 650 clear_bit(MMF_HAS_UPROBES, &mm->flags); 651 652 return ret; 653 } 654 655 static int 656 remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, unsigned long vaddr) 657 { 658 /* can happen if uprobe_register() fails */ 659 if (!test_bit(MMF_HAS_UPROBES, &mm->flags)) 660 return 0; 661 662 set_bit(MMF_RECALC_UPROBES, &mm->flags); 663 return set_orig_insn(&uprobe->arch, mm, vaddr); 664 } 665 666 /* 667 * There could be threads that have already hit the breakpoint. They 668 * will recheck the current insn and restart if find_uprobe() fails. 669 * See find_active_uprobe(). 670 */ 671 static void delete_uprobe(struct uprobe *uprobe) 672 { 673 spin_lock(&uprobes_treelock); 674 rb_erase(&uprobe->rb_node, &uprobes_tree); 675 spin_unlock(&uprobes_treelock); 676 iput(uprobe->inode); 677 put_uprobe(uprobe); 678 atomic_dec(&uprobe_events); 679 } 680 681 struct map_info { 682 struct map_info *next; 683 struct mm_struct *mm; 684 unsigned long vaddr; 685 }; 686 687 static inline struct map_info *free_map_info(struct map_info *info) 688 { 689 struct map_info *next = info->next; 690 kfree(info); 691 return next; 692 } 693 694 static struct map_info * 695 build_map_info(struct address_space *mapping, loff_t offset, bool is_register) 696 { 697 unsigned long pgoff = offset >> PAGE_SHIFT; 698 struct vm_area_struct *vma; 699 struct map_info *curr = NULL; 700 struct map_info *prev = NULL; 701 struct map_info *info; 702 int more = 0; 703 704 again: 705 mutex_lock(&mapping->i_mmap_mutex); 706 vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) { 707 if (!valid_vma(vma, is_register)) 708 continue; 709 710 if (!prev && !more) { 711 /* 712 * Needs GFP_NOWAIT to avoid i_mmap_mutex recursion through 713 * reclaim. This is optimistic, no harm done if it fails. 714 */ 715 prev = kmalloc(sizeof(struct map_info), 716 GFP_NOWAIT | __GFP_NOMEMALLOC | __GFP_NOWARN); 717 if (prev) 718 prev->next = NULL; 719 } 720 if (!prev) { 721 more++; 722 continue; 723 } 724 725 if (!atomic_inc_not_zero(&vma->vm_mm->mm_users)) 726 continue; 727 728 info = prev; 729 prev = prev->next; 730 info->next = curr; 731 curr = info; 732 733 info->mm = vma->vm_mm; 734 info->vaddr = offset_to_vaddr(vma, offset); 735 } 736 mutex_unlock(&mapping->i_mmap_mutex); 737 738 if (!more) 739 goto out; 740 741 prev = curr; 742 while (curr) { 743 mmput(curr->mm); 744 curr = curr->next; 745 } 746 747 do { 748 info = kmalloc(sizeof(struct map_info), GFP_KERNEL); 749 if (!info) { 750 curr = ERR_PTR(-ENOMEM); 751 goto out; 752 } 753 info->next = prev; 754 prev = info; 755 } while (--more); 756 757 goto again; 758 out: 759 while (prev) 760 prev = free_map_info(prev); 761 return curr; 762 } 763 764 static int register_for_each_vma(struct uprobe *uprobe, bool is_register) 765 { 766 struct map_info *info; 767 int err = 0; 768 769 info = build_map_info(uprobe->inode->i_mapping, 770 uprobe->offset, is_register); 771 if (IS_ERR(info)) 772 return PTR_ERR(info); 773 774 while (info) { 775 struct mm_struct *mm = info->mm; 776 struct vm_area_struct *vma; 777 778 if (err && is_register) 779 goto free; 780 781 down_write(&mm->mmap_sem); 782 vma = find_vma(mm, info->vaddr); 783 if (!vma || !valid_vma(vma, is_register) || 784 vma->vm_file->f_mapping->host != uprobe->inode) 785 goto unlock; 786 787 if (vma->vm_start > info->vaddr || 788 vaddr_to_offset(vma, info->vaddr) != uprobe->offset) 789 goto unlock; 790 791 if (is_register) 792 err = install_breakpoint(uprobe, mm, vma, info->vaddr); 793 else 794 err |= remove_breakpoint(uprobe, mm, info->vaddr); 795 796 unlock: 797 up_write(&mm->mmap_sem); 798 free: 799 mmput(mm); 800 info = free_map_info(info); 801 } 802 803 return err; 804 } 805 806 static int __uprobe_register(struct uprobe *uprobe) 807 { 808 return register_for_each_vma(uprobe, true); 809 } 810 811 static void __uprobe_unregister(struct uprobe *uprobe) 812 { 813 if (!register_for_each_vma(uprobe, false)) 814 delete_uprobe(uprobe); 815 816 /* TODO : cant unregister? schedule a worker thread */ 817 } 818 819 /* 820 * uprobe_register - register a probe 821 * @inode: the file in which the probe has to be placed. 822 * @offset: offset from the start of the file. 823 * @uc: information on howto handle the probe.. 824 * 825 * Apart from the access refcount, uprobe_register() takes a creation 826 * refcount (thro alloc_uprobe) if and only if this @uprobe is getting 827 * inserted into the rbtree (i.e first consumer for a @inode:@offset 828 * tuple). Creation refcount stops uprobe_unregister from freeing the 829 * @uprobe even before the register operation is complete. Creation 830 * refcount is released when the last @uc for the @uprobe 831 * unregisters. 832 * 833 * Return errno if it cannot successully install probes 834 * else return 0 (success) 835 */ 836 int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc) 837 { 838 struct uprobe *uprobe; 839 int ret; 840 841 if (!inode || !uc || uc->next) 842 return -EINVAL; 843 844 if (offset > i_size_read(inode)) 845 return -EINVAL; 846 847 ret = 0; 848 mutex_lock(uprobes_hash(inode)); 849 uprobe = alloc_uprobe(inode, offset); 850 851 if (!uprobe) { 852 ret = -ENOMEM; 853 } else if (!consumer_add(uprobe, uc)) { 854 ret = __uprobe_register(uprobe); 855 if (ret) { 856 uprobe->consumers = NULL; 857 __uprobe_unregister(uprobe); 858 } else { 859 set_bit(UPROBE_RUN_HANDLER, &uprobe->flags); 860 } 861 } 862 863 mutex_unlock(uprobes_hash(inode)); 864 if (uprobe) 865 put_uprobe(uprobe); 866 867 return ret; 868 } 869 870 /* 871 * uprobe_unregister - unregister a already registered probe. 872 * @inode: the file in which the probe has to be removed. 873 * @offset: offset from the start of the file. 874 * @uc: identify which probe if multiple probes are colocated. 875 */ 876 void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc) 877 { 878 struct uprobe *uprobe; 879 880 if (!inode || !uc) 881 return; 882 883 uprobe = find_uprobe(inode, offset); 884 if (!uprobe) 885 return; 886 887 mutex_lock(uprobes_hash(inode)); 888 889 if (consumer_del(uprobe, uc)) { 890 if (!uprobe->consumers) { 891 __uprobe_unregister(uprobe); 892 clear_bit(UPROBE_RUN_HANDLER, &uprobe->flags); 893 } 894 } 895 896 mutex_unlock(uprobes_hash(inode)); 897 if (uprobe) 898 put_uprobe(uprobe); 899 } 900 901 static struct rb_node * 902 find_node_in_range(struct inode *inode, loff_t min, loff_t max) 903 { 904 struct rb_node *n = uprobes_tree.rb_node; 905 906 while (n) { 907 struct uprobe *u = rb_entry(n, struct uprobe, rb_node); 908 909 if (inode < u->inode) { 910 n = n->rb_left; 911 } else if (inode > u->inode) { 912 n = n->rb_right; 913 } else { 914 if (max < u->offset) 915 n = n->rb_left; 916 else if (min > u->offset) 917 n = n->rb_right; 918 else 919 break; 920 } 921 } 922 923 return n; 924 } 925 926 /* 927 * For a given range in vma, build a list of probes that need to be inserted. 928 */ 929 static void build_probe_list(struct inode *inode, 930 struct vm_area_struct *vma, 931 unsigned long start, unsigned long end, 932 struct list_head *head) 933 { 934 loff_t min, max; 935 struct rb_node *n, *t; 936 struct uprobe *u; 937 938 INIT_LIST_HEAD(head); 939 min = vaddr_to_offset(vma, start); 940 max = min + (end - start) - 1; 941 942 spin_lock(&uprobes_treelock); 943 n = find_node_in_range(inode, min, max); 944 if (n) { 945 for (t = n; t; t = rb_prev(t)) { 946 u = rb_entry(t, struct uprobe, rb_node); 947 if (u->inode != inode || u->offset < min) 948 break; 949 list_add(&u->pending_list, head); 950 atomic_inc(&u->ref); 951 } 952 for (t = n; (t = rb_next(t)); ) { 953 u = rb_entry(t, struct uprobe, rb_node); 954 if (u->inode != inode || u->offset > max) 955 break; 956 list_add(&u->pending_list, head); 957 atomic_inc(&u->ref); 958 } 959 } 960 spin_unlock(&uprobes_treelock); 961 } 962 963 /* 964 * Called from mmap_region/vma_adjust with mm->mmap_sem acquired. 965 * 966 * Currently we ignore all errors and always return 0, the callers 967 * can't handle the failure anyway. 968 */ 969 int uprobe_mmap(struct vm_area_struct *vma) 970 { 971 struct list_head tmp_list; 972 struct uprobe *uprobe, *u; 973 struct inode *inode; 974 975 if (!atomic_read(&uprobe_events) || !valid_vma(vma, true)) 976 return 0; 977 978 inode = vma->vm_file->f_mapping->host; 979 if (!inode) 980 return 0; 981 982 mutex_lock(uprobes_mmap_hash(inode)); 983 build_probe_list(inode, vma, vma->vm_start, vma->vm_end, &tmp_list); 984 985 list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) { 986 if (!fatal_signal_pending(current)) { 987 unsigned long vaddr = offset_to_vaddr(vma, uprobe->offset); 988 install_breakpoint(uprobe, vma->vm_mm, vma, vaddr); 989 } 990 put_uprobe(uprobe); 991 } 992 mutex_unlock(uprobes_mmap_hash(inode)); 993 994 return 0; 995 } 996 997 static bool 998 vma_has_uprobes(struct vm_area_struct *vma, unsigned long start, unsigned long end) 999 { 1000 loff_t min, max; 1001 struct inode *inode; 1002 struct rb_node *n; 1003 1004 inode = vma->vm_file->f_mapping->host; 1005 1006 min = vaddr_to_offset(vma, start); 1007 max = min + (end - start) - 1; 1008 1009 spin_lock(&uprobes_treelock); 1010 n = find_node_in_range(inode, min, max); 1011 spin_unlock(&uprobes_treelock); 1012 1013 return !!n; 1014 } 1015 1016 /* 1017 * Called in context of a munmap of a vma. 1018 */ 1019 void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end) 1020 { 1021 if (!atomic_read(&uprobe_events) || !valid_vma(vma, false)) 1022 return; 1023 1024 if (!atomic_read(&vma->vm_mm->mm_users)) /* called by mmput() ? */ 1025 return; 1026 1027 if (!test_bit(MMF_HAS_UPROBES, &vma->vm_mm->flags) || 1028 test_bit(MMF_RECALC_UPROBES, &vma->vm_mm->flags)) 1029 return; 1030 1031 if (vma_has_uprobes(vma, start, end)) 1032 set_bit(MMF_RECALC_UPROBES, &vma->vm_mm->flags); 1033 } 1034 1035 /* Slot allocation for XOL */ 1036 static int xol_add_vma(struct xol_area *area) 1037 { 1038 struct mm_struct *mm; 1039 int ret; 1040 1041 area->page = alloc_page(GFP_HIGHUSER); 1042 if (!area->page) 1043 return -ENOMEM; 1044 1045 ret = -EALREADY; 1046 mm = current->mm; 1047 1048 down_write(&mm->mmap_sem); 1049 if (mm->uprobes_state.xol_area) 1050 goto fail; 1051 1052 ret = -ENOMEM; 1053 1054 /* Try to map as high as possible, this is only a hint. */ 1055 area->vaddr = get_unmapped_area(NULL, TASK_SIZE - PAGE_SIZE, PAGE_SIZE, 0, 0); 1056 if (area->vaddr & ~PAGE_MASK) { 1057 ret = area->vaddr; 1058 goto fail; 1059 } 1060 1061 ret = install_special_mapping(mm, area->vaddr, PAGE_SIZE, 1062 VM_EXEC|VM_MAYEXEC|VM_DONTCOPY|VM_IO, &area->page); 1063 if (ret) 1064 goto fail; 1065 1066 smp_wmb(); /* pairs with get_xol_area() */ 1067 mm->uprobes_state.xol_area = area; 1068 ret = 0; 1069 1070 fail: 1071 up_write(&mm->mmap_sem); 1072 if (ret) 1073 __free_page(area->page); 1074 1075 return ret; 1076 } 1077 1078 static struct xol_area *get_xol_area(struct mm_struct *mm) 1079 { 1080 struct xol_area *area; 1081 1082 area = mm->uprobes_state.xol_area; 1083 smp_read_barrier_depends(); /* pairs with wmb in xol_add_vma() */ 1084 1085 return area; 1086 } 1087 1088 /* 1089 * xol_alloc_area - Allocate process's xol_area. 1090 * This area will be used for storing instructions for execution out of 1091 * line. 1092 * 1093 * Returns the allocated area or NULL. 1094 */ 1095 static struct xol_area *xol_alloc_area(void) 1096 { 1097 struct xol_area *area; 1098 1099 area = kzalloc(sizeof(*area), GFP_KERNEL); 1100 if (unlikely(!area)) 1101 return NULL; 1102 1103 area->bitmap = kzalloc(BITS_TO_LONGS(UINSNS_PER_PAGE) * sizeof(long), GFP_KERNEL); 1104 1105 if (!area->bitmap) 1106 goto fail; 1107 1108 init_waitqueue_head(&area->wq); 1109 if (!xol_add_vma(area)) 1110 return area; 1111 1112 fail: 1113 kfree(area->bitmap); 1114 kfree(area); 1115 1116 return get_xol_area(current->mm); 1117 } 1118 1119 /* 1120 * uprobe_clear_state - Free the area allocated for slots. 1121 */ 1122 void uprobe_clear_state(struct mm_struct *mm) 1123 { 1124 struct xol_area *area = mm->uprobes_state.xol_area; 1125 1126 if (!area) 1127 return; 1128 1129 put_page(area->page); 1130 kfree(area->bitmap); 1131 kfree(area); 1132 } 1133 1134 void uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm) 1135 { 1136 newmm->uprobes_state.xol_area = NULL; 1137 1138 if (test_bit(MMF_HAS_UPROBES, &oldmm->flags)) { 1139 set_bit(MMF_HAS_UPROBES, &newmm->flags); 1140 /* unconditionally, dup_mmap() skips VM_DONTCOPY vmas */ 1141 set_bit(MMF_RECALC_UPROBES, &newmm->flags); 1142 } 1143 } 1144 1145 /* 1146 * - search for a free slot. 1147 */ 1148 static unsigned long xol_take_insn_slot(struct xol_area *area) 1149 { 1150 unsigned long slot_addr; 1151 int slot_nr; 1152 1153 do { 1154 slot_nr = find_first_zero_bit(area->bitmap, UINSNS_PER_PAGE); 1155 if (slot_nr < UINSNS_PER_PAGE) { 1156 if (!test_and_set_bit(slot_nr, area->bitmap)) 1157 break; 1158 1159 slot_nr = UINSNS_PER_PAGE; 1160 continue; 1161 } 1162 wait_event(area->wq, (atomic_read(&area->slot_count) < UINSNS_PER_PAGE)); 1163 } while (slot_nr >= UINSNS_PER_PAGE); 1164 1165 slot_addr = area->vaddr + (slot_nr * UPROBE_XOL_SLOT_BYTES); 1166 atomic_inc(&area->slot_count); 1167 1168 return slot_addr; 1169 } 1170 1171 /* 1172 * xol_get_insn_slot - If was not allocated a slot, then 1173 * allocate a slot. 1174 * Returns the allocated slot address or 0. 1175 */ 1176 static unsigned long xol_get_insn_slot(struct uprobe *uprobe, unsigned long slot_addr) 1177 { 1178 struct xol_area *area; 1179 unsigned long offset; 1180 void *vaddr; 1181 1182 area = get_xol_area(current->mm); 1183 if (!area) { 1184 area = xol_alloc_area(); 1185 if (!area) 1186 return 0; 1187 } 1188 current->utask->xol_vaddr = xol_take_insn_slot(area); 1189 1190 /* 1191 * Initialize the slot if xol_vaddr points to valid 1192 * instruction slot. 1193 */ 1194 if (unlikely(!current->utask->xol_vaddr)) 1195 return 0; 1196 1197 current->utask->vaddr = slot_addr; 1198 offset = current->utask->xol_vaddr & ~PAGE_MASK; 1199 vaddr = kmap_atomic(area->page); 1200 memcpy(vaddr + offset, uprobe->arch.insn, MAX_UINSN_BYTES); 1201 kunmap_atomic(vaddr); 1202 1203 return current->utask->xol_vaddr; 1204 } 1205 1206 /* 1207 * xol_free_insn_slot - If slot was earlier allocated by 1208 * @xol_get_insn_slot(), make the slot available for 1209 * subsequent requests. 1210 */ 1211 static void xol_free_insn_slot(struct task_struct *tsk) 1212 { 1213 struct xol_area *area; 1214 unsigned long vma_end; 1215 unsigned long slot_addr; 1216 1217 if (!tsk->mm || !tsk->mm->uprobes_state.xol_area || !tsk->utask) 1218 return; 1219 1220 slot_addr = tsk->utask->xol_vaddr; 1221 1222 if (unlikely(!slot_addr || IS_ERR_VALUE(slot_addr))) 1223 return; 1224 1225 area = tsk->mm->uprobes_state.xol_area; 1226 vma_end = area->vaddr + PAGE_SIZE; 1227 if (area->vaddr <= slot_addr && slot_addr < vma_end) { 1228 unsigned long offset; 1229 int slot_nr; 1230 1231 offset = slot_addr - area->vaddr; 1232 slot_nr = offset / UPROBE_XOL_SLOT_BYTES; 1233 if (slot_nr >= UINSNS_PER_PAGE) 1234 return; 1235 1236 clear_bit(slot_nr, area->bitmap); 1237 atomic_dec(&area->slot_count); 1238 if (waitqueue_active(&area->wq)) 1239 wake_up(&area->wq); 1240 1241 tsk->utask->xol_vaddr = 0; 1242 } 1243 } 1244 1245 /** 1246 * uprobe_get_swbp_addr - compute address of swbp given post-swbp regs 1247 * @regs: Reflects the saved state of the task after it has hit a breakpoint 1248 * instruction. 1249 * Return the address of the breakpoint instruction. 1250 */ 1251 unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs) 1252 { 1253 return instruction_pointer(regs) - UPROBE_SWBP_INSN_SIZE; 1254 } 1255 1256 /* 1257 * Called with no locks held. 1258 * Called in context of a exiting or a exec-ing thread. 1259 */ 1260 void uprobe_free_utask(struct task_struct *t) 1261 { 1262 struct uprobe_task *utask = t->utask; 1263 1264 if (!utask) 1265 return; 1266 1267 if (utask->active_uprobe) 1268 put_uprobe(utask->active_uprobe); 1269 1270 xol_free_insn_slot(t); 1271 kfree(utask); 1272 t->utask = NULL; 1273 } 1274 1275 /* 1276 * Called in context of a new clone/fork from copy_process. 1277 */ 1278 void uprobe_copy_process(struct task_struct *t) 1279 { 1280 t->utask = NULL; 1281 } 1282 1283 /* 1284 * Allocate a uprobe_task object for the task. 1285 * Called when the thread hits a breakpoint for the first time. 1286 * 1287 * Returns: 1288 * - pointer to new uprobe_task on success 1289 * - NULL otherwise 1290 */ 1291 static struct uprobe_task *add_utask(void) 1292 { 1293 struct uprobe_task *utask; 1294 1295 utask = kzalloc(sizeof *utask, GFP_KERNEL); 1296 if (unlikely(!utask)) 1297 return NULL; 1298 1299 current->utask = utask; 1300 return utask; 1301 } 1302 1303 /* Prepare to single-step probed instruction out of line. */ 1304 static int 1305 pre_ssout(struct uprobe *uprobe, struct pt_regs *regs, unsigned long vaddr) 1306 { 1307 if (xol_get_insn_slot(uprobe, vaddr) && !arch_uprobe_pre_xol(&uprobe->arch, regs)) 1308 return 0; 1309 1310 return -EFAULT; 1311 } 1312 1313 /* 1314 * If we are singlestepping, then ensure this thread is not connected to 1315 * non-fatal signals until completion of singlestep. When xol insn itself 1316 * triggers the signal, restart the original insn even if the task is 1317 * already SIGKILL'ed (since coredump should report the correct ip). This 1318 * is even more important if the task has a handler for SIGSEGV/etc, The 1319 * _same_ instruction should be repeated again after return from the signal 1320 * handler, and SSTEP can never finish in this case. 1321 */ 1322 bool uprobe_deny_signal(void) 1323 { 1324 struct task_struct *t = current; 1325 struct uprobe_task *utask = t->utask; 1326 1327 if (likely(!utask || !utask->active_uprobe)) 1328 return false; 1329 1330 WARN_ON_ONCE(utask->state != UTASK_SSTEP); 1331 1332 if (signal_pending(t)) { 1333 spin_lock_irq(&t->sighand->siglock); 1334 clear_tsk_thread_flag(t, TIF_SIGPENDING); 1335 spin_unlock_irq(&t->sighand->siglock); 1336 1337 if (__fatal_signal_pending(t) || arch_uprobe_xol_was_trapped(t)) { 1338 utask->state = UTASK_SSTEP_TRAPPED; 1339 set_tsk_thread_flag(t, TIF_UPROBE); 1340 set_tsk_thread_flag(t, TIF_NOTIFY_RESUME); 1341 } 1342 } 1343 1344 return true; 1345 } 1346 1347 /* 1348 * Avoid singlestepping the original instruction if the original instruction 1349 * is a NOP or can be emulated. 1350 */ 1351 static bool can_skip_sstep(struct uprobe *uprobe, struct pt_regs *regs) 1352 { 1353 if (test_bit(UPROBE_SKIP_SSTEP, &uprobe->flags)) { 1354 if (arch_uprobe_skip_sstep(&uprobe->arch, regs)) 1355 return true; 1356 clear_bit(UPROBE_SKIP_SSTEP, &uprobe->flags); 1357 } 1358 return false; 1359 } 1360 1361 static void mmf_recalc_uprobes(struct mm_struct *mm) 1362 { 1363 struct vm_area_struct *vma; 1364 1365 for (vma = mm->mmap; vma; vma = vma->vm_next) { 1366 if (!valid_vma(vma, false)) 1367 continue; 1368 /* 1369 * This is not strictly accurate, we can race with 1370 * uprobe_unregister() and see the already removed 1371 * uprobe if delete_uprobe() was not yet called. 1372 */ 1373 if (vma_has_uprobes(vma, vma->vm_start, vma->vm_end)) 1374 return; 1375 } 1376 1377 clear_bit(MMF_HAS_UPROBES, &mm->flags); 1378 } 1379 1380 static int is_swbp_at_addr(struct mm_struct *mm, unsigned long vaddr) 1381 { 1382 struct page *page; 1383 uprobe_opcode_t opcode; 1384 int result; 1385 1386 pagefault_disable(); 1387 result = __copy_from_user_inatomic(&opcode, (void __user*)vaddr, 1388 sizeof(opcode)); 1389 pagefault_enable(); 1390 1391 if (likely(result == 0)) 1392 goto out; 1393 1394 result = get_user_pages(NULL, mm, vaddr, 1, 0, 1, &page, NULL); 1395 if (result < 0) 1396 return result; 1397 1398 copy_opcode(page, vaddr, &opcode); 1399 put_page(page); 1400 out: 1401 return is_swbp_insn(&opcode); 1402 } 1403 1404 static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp) 1405 { 1406 struct mm_struct *mm = current->mm; 1407 struct uprobe *uprobe = NULL; 1408 struct vm_area_struct *vma; 1409 1410 down_read(&mm->mmap_sem); 1411 vma = find_vma(mm, bp_vaddr); 1412 if (vma && vma->vm_start <= bp_vaddr) { 1413 if (valid_vma(vma, false)) { 1414 struct inode *inode = vma->vm_file->f_mapping->host; 1415 loff_t offset = vaddr_to_offset(vma, bp_vaddr); 1416 1417 uprobe = find_uprobe(inode, offset); 1418 } 1419 1420 if (!uprobe) 1421 *is_swbp = is_swbp_at_addr(mm, bp_vaddr); 1422 } else { 1423 *is_swbp = -EFAULT; 1424 } 1425 1426 if (!uprobe && test_and_clear_bit(MMF_RECALC_UPROBES, &mm->flags)) 1427 mmf_recalc_uprobes(mm); 1428 up_read(&mm->mmap_sem); 1429 1430 return uprobe; 1431 } 1432 1433 void __weak arch_uprobe_enable_step(struct arch_uprobe *arch) 1434 { 1435 user_enable_single_step(current); 1436 } 1437 1438 void __weak arch_uprobe_disable_step(struct arch_uprobe *arch) 1439 { 1440 user_disable_single_step(current); 1441 } 1442 1443 /* 1444 * Run handler and ask thread to singlestep. 1445 * Ensure all non-fatal signals cannot interrupt thread while it singlesteps. 1446 */ 1447 static void handle_swbp(struct pt_regs *regs) 1448 { 1449 struct uprobe_task *utask; 1450 struct uprobe *uprobe; 1451 unsigned long bp_vaddr; 1452 int uninitialized_var(is_swbp); 1453 1454 bp_vaddr = uprobe_get_swbp_addr(regs); 1455 uprobe = find_active_uprobe(bp_vaddr, &is_swbp); 1456 1457 if (!uprobe) { 1458 if (is_swbp > 0) { 1459 /* No matching uprobe; signal SIGTRAP. */ 1460 send_sig(SIGTRAP, current, 0); 1461 } else { 1462 /* 1463 * Either we raced with uprobe_unregister() or we can't 1464 * access this memory. The latter is only possible if 1465 * another thread plays with our ->mm. In both cases 1466 * we can simply restart. If this vma was unmapped we 1467 * can pretend this insn was not executed yet and get 1468 * the (correct) SIGSEGV after restart. 1469 */ 1470 instruction_pointer_set(regs, bp_vaddr); 1471 } 1472 return; 1473 } 1474 /* 1475 * TODO: move copy_insn/etc into _register and remove this hack. 1476 * After we hit the bp, _unregister + _register can install the 1477 * new and not-yet-analyzed uprobe at the same address, restart. 1478 */ 1479 smp_rmb(); /* pairs with wmb() in install_breakpoint() */ 1480 if (unlikely(!test_bit(UPROBE_COPY_INSN, &uprobe->flags))) 1481 goto restart; 1482 1483 utask = current->utask; 1484 if (!utask) { 1485 utask = add_utask(); 1486 /* Cannot allocate; re-execute the instruction. */ 1487 if (!utask) 1488 goto restart; 1489 } 1490 1491 handler_chain(uprobe, regs); 1492 if (can_skip_sstep(uprobe, regs)) 1493 goto out; 1494 1495 if (!pre_ssout(uprobe, regs, bp_vaddr)) { 1496 arch_uprobe_enable_step(&uprobe->arch); 1497 utask->active_uprobe = uprobe; 1498 utask->state = UTASK_SSTEP; 1499 return; 1500 } 1501 1502 restart: 1503 /* 1504 * cannot singlestep; cannot skip instruction; 1505 * re-execute the instruction. 1506 */ 1507 instruction_pointer_set(regs, bp_vaddr); 1508 out: 1509 put_uprobe(uprobe); 1510 } 1511 1512 /* 1513 * Perform required fix-ups and disable singlestep. 1514 * Allow pending signals to take effect. 1515 */ 1516 static void handle_singlestep(struct uprobe_task *utask, struct pt_regs *regs) 1517 { 1518 struct uprobe *uprobe; 1519 1520 uprobe = utask->active_uprobe; 1521 if (utask->state == UTASK_SSTEP_ACK) 1522 arch_uprobe_post_xol(&uprobe->arch, regs); 1523 else if (utask->state == UTASK_SSTEP_TRAPPED) 1524 arch_uprobe_abort_xol(&uprobe->arch, regs); 1525 else 1526 WARN_ON_ONCE(1); 1527 1528 arch_uprobe_disable_step(&uprobe->arch); 1529 put_uprobe(uprobe); 1530 utask->active_uprobe = NULL; 1531 utask->state = UTASK_RUNNING; 1532 xol_free_insn_slot(current); 1533 1534 spin_lock_irq(¤t->sighand->siglock); 1535 recalc_sigpending(); /* see uprobe_deny_signal() */ 1536 spin_unlock_irq(¤t->sighand->siglock); 1537 } 1538 1539 /* 1540 * On breakpoint hit, breakpoint notifier sets the TIF_UPROBE flag and 1541 * allows the thread to return from interrupt. After that handle_swbp() 1542 * sets utask->active_uprobe. 1543 * 1544 * On singlestep exception, singlestep notifier sets the TIF_UPROBE flag 1545 * and allows the thread to return from interrupt. 1546 * 1547 * While returning to userspace, thread notices the TIF_UPROBE flag and calls 1548 * uprobe_notify_resume(). 1549 */ 1550 void uprobe_notify_resume(struct pt_regs *regs) 1551 { 1552 struct uprobe_task *utask; 1553 1554 clear_thread_flag(TIF_UPROBE); 1555 1556 utask = current->utask; 1557 if (utask && utask->active_uprobe) 1558 handle_singlestep(utask, regs); 1559 else 1560 handle_swbp(regs); 1561 } 1562 1563 /* 1564 * uprobe_pre_sstep_notifier gets called from interrupt context as part of 1565 * notifier mechanism. Set TIF_UPROBE flag and indicate breakpoint hit. 1566 */ 1567 int uprobe_pre_sstep_notifier(struct pt_regs *regs) 1568 { 1569 if (!current->mm || !test_bit(MMF_HAS_UPROBES, ¤t->mm->flags)) 1570 return 0; 1571 1572 set_thread_flag(TIF_UPROBE); 1573 return 1; 1574 } 1575 1576 /* 1577 * uprobe_post_sstep_notifier gets called in interrupt context as part of notifier 1578 * mechanism. Set TIF_UPROBE flag and indicate completion of singlestep. 1579 */ 1580 int uprobe_post_sstep_notifier(struct pt_regs *regs) 1581 { 1582 struct uprobe_task *utask = current->utask; 1583 1584 if (!current->mm || !utask || !utask->active_uprobe) 1585 /* task is currently not uprobed */ 1586 return 0; 1587 1588 utask->state = UTASK_SSTEP_ACK; 1589 set_thread_flag(TIF_UPROBE); 1590 return 1; 1591 } 1592 1593 static struct notifier_block uprobe_exception_nb = { 1594 .notifier_call = arch_uprobe_exception_notify, 1595 .priority = INT_MAX-1, /* notified after kprobes, kgdb */ 1596 }; 1597 1598 static int __init init_uprobes(void) 1599 { 1600 int i; 1601 1602 for (i = 0; i < UPROBES_HASH_SZ; i++) { 1603 mutex_init(&uprobes_mutex[i]); 1604 mutex_init(&uprobes_mmap_mutex[i]); 1605 } 1606 1607 return register_die_notifier(&uprobe_exception_nb); 1608 } 1609 module_init(init_uprobes); 1610 1611 static void __exit exit_uprobes(void) 1612 { 1613 } 1614 module_exit(exit_uprobes); 1615