1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Kernel Probes (KProbes) 4 * 5 * Copyright (C) IBM Corporation, 2002, 2004 6 * 7 * 2002-Oct Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel 8 * Probes initial implementation (includes suggestions from 9 * Rusty Russell). 10 * 2004-Aug Updated by Prasanna S Panchamukhi <prasanna@in.ibm.com> with 11 * hlists and exceptions notifier as suggested by Andi Kleen. 12 * 2004-July Suparna Bhattacharya <suparna@in.ibm.com> added jumper probes 13 * interface to access function arguments. 14 * 2004-Sep Prasanna S Panchamukhi <prasanna@in.ibm.com> Changed Kprobes 15 * exceptions notifier to be first on the priority list. 16 * 2005-May Hien Nguyen <hien@us.ibm.com>, Jim Keniston 17 * <jkenisto@us.ibm.com> and Prasanna S Panchamukhi 18 * <prasanna@in.ibm.com> added function-return probes. 19 */ 20 21 #define pr_fmt(fmt) "kprobes: " fmt 22 23 #include <linux/kprobes.h> 24 #include <linux/hash.h> 25 #include <linux/init.h> 26 #include <linux/slab.h> 27 #include <linux/stddef.h> 28 #include <linux/export.h> 29 #include <linux/kallsyms.h> 30 #include <linux/freezer.h> 31 #include <linux/seq_file.h> 32 #include <linux/debugfs.h> 33 #include <linux/sysctl.h> 34 #include <linux/kdebug.h> 35 #include <linux/memory.h> 36 #include <linux/ftrace.h> 37 #include <linux/cpu.h> 38 #include <linux/jump_label.h> 39 #include <linux/static_call.h> 40 #include <linux/perf_event.h> 41 #include <linux/execmem.h> 42 43 #include <asm/sections.h> 44 #include <asm/cacheflush.h> 45 #include <asm/errno.h> 46 #include <linux/uaccess.h> 47 48 #define KPROBE_HASH_BITS 6 49 #define KPROBE_TABLE_SIZE (1 << KPROBE_HASH_BITS) 50 51 #if !defined(CONFIG_OPTPROBES) || !defined(CONFIG_SYSCTL) 52 #define kprobe_sysctls_init() do { } while (0) 53 #endif 54 55 static int kprobes_initialized; 56 /* kprobe_table can be accessed by 57 * - Normal hlist traversal and RCU add/del under 'kprobe_mutex' is held. 58 * Or 59 * - RCU hlist traversal under disabling preempt (breakpoint handlers) 60 */ 61 static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE]; 62 63 /* NOTE: change this value only with 'kprobe_mutex' held */ 64 static bool kprobes_all_disarmed; 65 66 /* This protects 'kprobe_table' and 'optimizing_list' */ 67 static DEFINE_MUTEX(kprobe_mutex); 68 static DEFINE_PER_CPU(struct kprobe *, kprobe_instance); 69 70 kprobe_opcode_t * __weak kprobe_lookup_name(const char *name, 71 unsigned int __unused) 72 { 73 return ((kprobe_opcode_t *)(kallsyms_lookup_name(name))); 74 } 75 76 /* 77 * Blacklist -- list of 'struct kprobe_blacklist_entry' to store info where 78 * kprobes can not probe. 79 */ 80 static LIST_HEAD(kprobe_blacklist); 81 82 #ifdef __ARCH_WANT_KPROBES_INSN_SLOT 83 /* 84 * 'kprobe::ainsn.insn' points to the copy of the instruction to be 85 * single-stepped. x86_64, POWER4 and above have no-exec support and 86 * stepping on the instruction on a vmalloced/kmalloced/data page 87 * is a recipe for disaster 88 */ 89 struct kprobe_insn_page { 90 struct list_head list; 91 kprobe_opcode_t *insns; /* Page of instruction slots */ 92 struct kprobe_insn_cache *cache; 93 int nused; 94 int ngarbage; 95 char slot_used[]; 96 }; 97 98 static int slots_per_page(struct kprobe_insn_cache *c) 99 { 100 return PAGE_SIZE/(c->insn_size * sizeof(kprobe_opcode_t)); 101 } 102 103 enum kprobe_slot_state { 104 SLOT_CLEAN = 0, 105 SLOT_DIRTY = 1, 106 SLOT_USED = 2, 107 }; 108 109 void __weak *alloc_insn_page(void) 110 { 111 /* 112 * Use execmem_alloc() so this page is within +/- 2GB of where the 113 * kernel image and loaded module images reside. This is required 114 * for most of the architectures. 115 * (e.g. x86-64 needs this to handle the %rip-relative fixups.) 116 */ 117 return execmem_alloc(EXECMEM_KPROBES, PAGE_SIZE); 118 } 119 120 static void free_insn_page(void *page) 121 { 122 execmem_free(page); 123 } 124 125 struct kprobe_insn_cache kprobe_insn_slots = { 126 .mutex = __MUTEX_INITIALIZER(kprobe_insn_slots.mutex), 127 .alloc = alloc_insn_page, 128 .free = free_insn_page, 129 .sym = KPROBE_INSN_PAGE_SYM, 130 .pages = LIST_HEAD_INIT(kprobe_insn_slots.pages), 131 .insn_size = MAX_INSN_SIZE, 132 .nr_garbage = 0, 133 }; 134 static int collect_garbage_slots(struct kprobe_insn_cache *c); 135 136 /** 137 * __get_insn_slot() - Find a slot on an executable page for an instruction. 138 * We allocate an executable page if there's no room on existing ones. 139 */ 140 kprobe_opcode_t *__get_insn_slot(struct kprobe_insn_cache *c) 141 { 142 struct kprobe_insn_page *kip; 143 kprobe_opcode_t *slot = NULL; 144 145 /* Since the slot array is not protected by rcu, we need a mutex */ 146 mutex_lock(&c->mutex); 147 retry: 148 rcu_read_lock(); 149 list_for_each_entry_rcu(kip, &c->pages, list) { 150 if (kip->nused < slots_per_page(c)) { 151 int i; 152 153 for (i = 0; i < slots_per_page(c); i++) { 154 if (kip->slot_used[i] == SLOT_CLEAN) { 155 kip->slot_used[i] = SLOT_USED; 156 kip->nused++; 157 slot = kip->insns + (i * c->insn_size); 158 rcu_read_unlock(); 159 goto out; 160 } 161 } 162 /* kip->nused is broken. Fix it. */ 163 kip->nused = slots_per_page(c); 164 WARN_ON(1); 165 } 166 } 167 rcu_read_unlock(); 168 169 /* If there are any garbage slots, collect it and try again. */ 170 if (c->nr_garbage && collect_garbage_slots(c) == 0) 171 goto retry; 172 173 /* All out of space. Need to allocate a new page. */ 174 kip = kmalloc(struct_size(kip, slot_used, slots_per_page(c)), GFP_KERNEL); 175 if (!kip) 176 goto out; 177 178 kip->insns = c->alloc(); 179 if (!kip->insns) { 180 kfree(kip); 181 goto out; 182 } 183 INIT_LIST_HEAD(&kip->list); 184 memset(kip->slot_used, SLOT_CLEAN, slots_per_page(c)); 185 kip->slot_used[0] = SLOT_USED; 186 kip->nused = 1; 187 kip->ngarbage = 0; 188 kip->cache = c; 189 list_add_rcu(&kip->list, &c->pages); 190 slot = kip->insns; 191 192 /* Record the perf ksymbol register event after adding the page */ 193 perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_OOL, (unsigned long)kip->insns, 194 PAGE_SIZE, false, c->sym); 195 out: 196 mutex_unlock(&c->mutex); 197 return slot; 198 } 199 200 /* Return true if all garbages are collected, otherwise false. */ 201 static bool collect_one_slot(struct kprobe_insn_page *kip, int idx) 202 { 203 kip->slot_used[idx] = SLOT_CLEAN; 204 kip->nused--; 205 if (kip->nused != 0) 206 return false; 207 208 /* 209 * Page is no longer in use. Free it unless 210 * it's the last one. We keep the last one 211 * so as not to have to set it up again the 212 * next time somebody inserts a probe. 213 */ 214 if (!list_is_singular(&kip->list)) { 215 /* 216 * Record perf ksymbol unregister event before removing 217 * the page. 218 */ 219 perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_OOL, 220 (unsigned long)kip->insns, PAGE_SIZE, true, 221 kip->cache->sym); 222 list_del_rcu(&kip->list); 223 synchronize_rcu(); 224 kip->cache->free(kip->insns); 225 kfree(kip); 226 } 227 return true; 228 } 229 230 static int collect_garbage_slots(struct kprobe_insn_cache *c) 231 { 232 struct kprobe_insn_page *kip, *next; 233 234 /* Ensure no-one is interrupted on the garbages */ 235 synchronize_rcu(); 236 237 list_for_each_entry_safe(kip, next, &c->pages, list) { 238 int i; 239 240 if (kip->ngarbage == 0) 241 continue; 242 kip->ngarbage = 0; /* we will collect all garbages */ 243 for (i = 0; i < slots_per_page(c); i++) { 244 if (kip->slot_used[i] == SLOT_DIRTY && collect_one_slot(kip, i)) 245 break; 246 } 247 } 248 c->nr_garbage = 0; 249 return 0; 250 } 251 252 void __free_insn_slot(struct kprobe_insn_cache *c, 253 kprobe_opcode_t *slot, int dirty) 254 { 255 struct kprobe_insn_page *kip; 256 long idx; 257 258 mutex_lock(&c->mutex); 259 rcu_read_lock(); 260 list_for_each_entry_rcu(kip, &c->pages, list) { 261 idx = ((long)slot - (long)kip->insns) / 262 (c->insn_size * sizeof(kprobe_opcode_t)); 263 if (idx >= 0 && idx < slots_per_page(c)) 264 goto out; 265 } 266 /* Could not find this slot. */ 267 WARN_ON(1); 268 kip = NULL; 269 out: 270 rcu_read_unlock(); 271 /* Mark and sweep: this may sleep */ 272 if (kip) { 273 /* Check double free */ 274 WARN_ON(kip->slot_used[idx] != SLOT_USED); 275 if (dirty) { 276 kip->slot_used[idx] = SLOT_DIRTY; 277 kip->ngarbage++; 278 if (++c->nr_garbage > slots_per_page(c)) 279 collect_garbage_slots(c); 280 } else { 281 collect_one_slot(kip, idx); 282 } 283 } 284 mutex_unlock(&c->mutex); 285 } 286 287 /* 288 * Check given address is on the page of kprobe instruction slots. 289 * This will be used for checking whether the address on a stack 290 * is on a text area or not. 291 */ 292 bool __is_insn_slot_addr(struct kprobe_insn_cache *c, unsigned long addr) 293 { 294 struct kprobe_insn_page *kip; 295 bool ret = false; 296 297 rcu_read_lock(); 298 list_for_each_entry_rcu(kip, &c->pages, list) { 299 if (addr >= (unsigned long)kip->insns && 300 addr < (unsigned long)kip->insns + PAGE_SIZE) { 301 ret = true; 302 break; 303 } 304 } 305 rcu_read_unlock(); 306 307 return ret; 308 } 309 310 int kprobe_cache_get_kallsym(struct kprobe_insn_cache *c, unsigned int *symnum, 311 unsigned long *value, char *type, char *sym) 312 { 313 struct kprobe_insn_page *kip; 314 int ret = -ERANGE; 315 316 rcu_read_lock(); 317 list_for_each_entry_rcu(kip, &c->pages, list) { 318 if ((*symnum)--) 319 continue; 320 strscpy(sym, c->sym, KSYM_NAME_LEN); 321 *type = 't'; 322 *value = (unsigned long)kip->insns; 323 ret = 0; 324 break; 325 } 326 rcu_read_unlock(); 327 328 return ret; 329 } 330 331 #ifdef CONFIG_OPTPROBES 332 void __weak *alloc_optinsn_page(void) 333 { 334 return alloc_insn_page(); 335 } 336 337 void __weak free_optinsn_page(void *page) 338 { 339 free_insn_page(page); 340 } 341 342 /* For optimized_kprobe buffer */ 343 struct kprobe_insn_cache kprobe_optinsn_slots = { 344 .mutex = __MUTEX_INITIALIZER(kprobe_optinsn_slots.mutex), 345 .alloc = alloc_optinsn_page, 346 .free = free_optinsn_page, 347 .sym = KPROBE_OPTINSN_PAGE_SYM, 348 .pages = LIST_HEAD_INIT(kprobe_optinsn_slots.pages), 349 /* .insn_size is initialized later */ 350 .nr_garbage = 0, 351 }; 352 #endif /* CONFIG_OPTPROBES */ 353 #endif /* __ARCH_WANT_KPROBES_INSN_SLOT */ 354 355 /* We have preemption disabled.. so it is safe to use __ versions */ 356 static inline void set_kprobe_instance(struct kprobe *kp) 357 { 358 __this_cpu_write(kprobe_instance, kp); 359 } 360 361 static inline void reset_kprobe_instance(void) 362 { 363 __this_cpu_write(kprobe_instance, NULL); 364 } 365 366 /* 367 * This routine is called either: 368 * - under the 'kprobe_mutex' - during kprobe_[un]register(). 369 * OR 370 * - with preemption disabled - from architecture specific code. 371 */ 372 struct kprobe *get_kprobe(void *addr) 373 { 374 struct hlist_head *head; 375 struct kprobe *p; 376 377 head = &kprobe_table[hash_ptr(addr, KPROBE_HASH_BITS)]; 378 hlist_for_each_entry_rcu(p, head, hlist, 379 lockdep_is_held(&kprobe_mutex)) { 380 if (p->addr == addr) 381 return p; 382 } 383 384 return NULL; 385 } 386 NOKPROBE_SYMBOL(get_kprobe); 387 388 static int aggr_pre_handler(struct kprobe *p, struct pt_regs *regs); 389 390 /* Return true if 'p' is an aggregator */ 391 static inline bool kprobe_aggrprobe(struct kprobe *p) 392 { 393 return p->pre_handler == aggr_pre_handler; 394 } 395 396 /* Return true if 'p' is unused */ 397 static inline bool kprobe_unused(struct kprobe *p) 398 { 399 return kprobe_aggrprobe(p) && kprobe_disabled(p) && 400 list_empty(&p->list); 401 } 402 403 /* Keep all fields in the kprobe consistent. */ 404 static inline void copy_kprobe(struct kprobe *ap, struct kprobe *p) 405 { 406 memcpy(&p->opcode, &ap->opcode, sizeof(kprobe_opcode_t)); 407 memcpy(&p->ainsn, &ap->ainsn, sizeof(struct arch_specific_insn)); 408 } 409 410 #ifdef CONFIG_OPTPROBES 411 /* NOTE: This is protected by 'kprobe_mutex'. */ 412 static bool kprobes_allow_optimization; 413 414 /* 415 * Call all 'kprobe::pre_handler' on the list, but ignores its return value. 416 * This must be called from arch-dep optimized caller. 417 */ 418 void opt_pre_handler(struct kprobe *p, struct pt_regs *regs) 419 { 420 struct kprobe *kp; 421 422 list_for_each_entry_rcu(kp, &p->list, list) { 423 if (kp->pre_handler && likely(!kprobe_disabled(kp))) { 424 set_kprobe_instance(kp); 425 kp->pre_handler(kp, regs); 426 } 427 reset_kprobe_instance(); 428 } 429 } 430 NOKPROBE_SYMBOL(opt_pre_handler); 431 432 /* Free optimized instructions and optimized_kprobe */ 433 static void free_aggr_kprobe(struct kprobe *p) 434 { 435 struct optimized_kprobe *op; 436 437 op = container_of(p, struct optimized_kprobe, kp); 438 arch_remove_optimized_kprobe(op); 439 arch_remove_kprobe(p); 440 kfree(op); 441 } 442 443 /* Return true if the kprobe is ready for optimization. */ 444 static inline int kprobe_optready(struct kprobe *p) 445 { 446 struct optimized_kprobe *op; 447 448 if (kprobe_aggrprobe(p)) { 449 op = container_of(p, struct optimized_kprobe, kp); 450 return arch_prepared_optinsn(&op->optinsn); 451 } 452 453 return 0; 454 } 455 456 /* Return true if the kprobe is disarmed. Note: p must be on hash list */ 457 bool kprobe_disarmed(struct kprobe *p) 458 { 459 struct optimized_kprobe *op; 460 461 /* If kprobe is not aggr/opt probe, just return kprobe is disabled */ 462 if (!kprobe_aggrprobe(p)) 463 return kprobe_disabled(p); 464 465 op = container_of(p, struct optimized_kprobe, kp); 466 467 return kprobe_disabled(p) && list_empty(&op->list); 468 } 469 470 /* Return true if the probe is queued on (un)optimizing lists */ 471 static bool kprobe_queued(struct kprobe *p) 472 { 473 struct optimized_kprobe *op; 474 475 if (kprobe_aggrprobe(p)) { 476 op = container_of(p, struct optimized_kprobe, kp); 477 if (!list_empty(&op->list)) 478 return true; 479 } 480 return false; 481 } 482 483 /* 484 * Return an optimized kprobe whose optimizing code replaces 485 * instructions including 'addr' (exclude breakpoint). 486 */ 487 static struct kprobe *get_optimized_kprobe(kprobe_opcode_t *addr) 488 { 489 int i; 490 struct kprobe *p = NULL; 491 struct optimized_kprobe *op; 492 493 /* Don't check i == 0, since that is a breakpoint case. */ 494 for (i = 1; !p && i < MAX_OPTIMIZED_LENGTH / sizeof(kprobe_opcode_t); i++) 495 p = get_kprobe(addr - i); 496 497 if (p && kprobe_optready(p)) { 498 op = container_of(p, struct optimized_kprobe, kp); 499 if (arch_within_optimized_kprobe(op, addr)) 500 return p; 501 } 502 503 return NULL; 504 } 505 506 /* Optimization staging list, protected by 'kprobe_mutex' */ 507 static LIST_HEAD(optimizing_list); 508 static LIST_HEAD(unoptimizing_list); 509 static LIST_HEAD(freeing_list); 510 511 static void kprobe_optimizer(struct work_struct *work); 512 static DECLARE_DELAYED_WORK(optimizing_work, kprobe_optimizer); 513 #define OPTIMIZE_DELAY 5 514 515 /* 516 * Optimize (replace a breakpoint with a jump) kprobes listed on 517 * 'optimizing_list'. 518 */ 519 static void do_optimize_kprobes(void) 520 { 521 lockdep_assert_held(&text_mutex); 522 /* 523 * The optimization/unoptimization refers 'online_cpus' via 524 * stop_machine() and cpu-hotplug modifies the 'online_cpus'. 525 * And same time, 'text_mutex' will be held in cpu-hotplug and here. 526 * This combination can cause a deadlock (cpu-hotplug tries to lock 527 * 'text_mutex' but stop_machine() can not be done because 528 * the 'online_cpus' has been changed) 529 * To avoid this deadlock, caller must have locked cpu-hotplug 530 * for preventing cpu-hotplug outside of 'text_mutex' locking. 531 */ 532 lockdep_assert_cpus_held(); 533 534 /* Optimization never be done when disarmed */ 535 if (kprobes_all_disarmed || !kprobes_allow_optimization || 536 list_empty(&optimizing_list)) 537 return; 538 539 arch_optimize_kprobes(&optimizing_list); 540 } 541 542 /* 543 * Unoptimize (replace a jump with a breakpoint and remove the breakpoint 544 * if need) kprobes listed on 'unoptimizing_list'. 545 */ 546 static void do_unoptimize_kprobes(void) 547 { 548 struct optimized_kprobe *op, *tmp; 549 550 lockdep_assert_held(&text_mutex); 551 /* See comment in do_optimize_kprobes() */ 552 lockdep_assert_cpus_held(); 553 554 if (!list_empty(&unoptimizing_list)) 555 arch_unoptimize_kprobes(&unoptimizing_list, &freeing_list); 556 557 /* Loop on 'freeing_list' for disarming and removing from kprobe hash list */ 558 list_for_each_entry_safe(op, tmp, &freeing_list, list) { 559 /* Switching from detour code to origin */ 560 op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED; 561 /* Disarm probes if marked disabled and not gone */ 562 if (kprobe_disabled(&op->kp) && !kprobe_gone(&op->kp)) 563 arch_disarm_kprobe(&op->kp); 564 if (kprobe_unused(&op->kp)) { 565 /* 566 * Remove unused probes from hash list. After waiting 567 * for synchronization, these probes are reclaimed. 568 * (reclaiming is done by do_free_cleaned_kprobes().) 569 */ 570 hlist_del_rcu(&op->kp.hlist); 571 } else 572 list_del_init(&op->list); 573 } 574 } 575 576 /* Reclaim all kprobes on the 'freeing_list' */ 577 static void do_free_cleaned_kprobes(void) 578 { 579 struct optimized_kprobe *op, *tmp; 580 581 list_for_each_entry_safe(op, tmp, &freeing_list, list) { 582 list_del_init(&op->list); 583 if (WARN_ON_ONCE(!kprobe_unused(&op->kp))) { 584 /* 585 * This must not happen, but if there is a kprobe 586 * still in use, keep it on kprobes hash list. 587 */ 588 continue; 589 } 590 free_aggr_kprobe(&op->kp); 591 } 592 } 593 594 /* Start optimizer after OPTIMIZE_DELAY passed */ 595 static void kick_kprobe_optimizer(void) 596 { 597 schedule_delayed_work(&optimizing_work, OPTIMIZE_DELAY); 598 } 599 600 /* Kprobe jump optimizer */ 601 static void kprobe_optimizer(struct work_struct *work) 602 { 603 mutex_lock(&kprobe_mutex); 604 cpus_read_lock(); 605 mutex_lock(&text_mutex); 606 607 /* 608 * Step 1: Unoptimize kprobes and collect cleaned (unused and disarmed) 609 * kprobes before waiting for quiesence period. 610 */ 611 do_unoptimize_kprobes(); 612 613 /* 614 * Step 2: Wait for quiesence period to ensure all potentially 615 * preempted tasks to have normally scheduled. Because optprobe 616 * may modify multiple instructions, there is a chance that Nth 617 * instruction is preempted. In that case, such tasks can return 618 * to 2nd-Nth byte of jump instruction. This wait is for avoiding it. 619 * Note that on non-preemptive kernel, this is transparently converted 620 * to synchronoze_sched() to wait for all interrupts to have completed. 621 */ 622 synchronize_rcu_tasks(); 623 624 /* Step 3: Optimize kprobes after quiesence period */ 625 do_optimize_kprobes(); 626 627 /* Step 4: Free cleaned kprobes after quiesence period */ 628 do_free_cleaned_kprobes(); 629 630 mutex_unlock(&text_mutex); 631 cpus_read_unlock(); 632 633 /* Step 5: Kick optimizer again if needed */ 634 if (!list_empty(&optimizing_list) || !list_empty(&unoptimizing_list)) 635 kick_kprobe_optimizer(); 636 637 mutex_unlock(&kprobe_mutex); 638 } 639 640 /* Wait for completing optimization and unoptimization */ 641 void wait_for_kprobe_optimizer(void) 642 { 643 mutex_lock(&kprobe_mutex); 644 645 while (!list_empty(&optimizing_list) || !list_empty(&unoptimizing_list)) { 646 mutex_unlock(&kprobe_mutex); 647 648 /* This will also make 'optimizing_work' execute immmediately */ 649 flush_delayed_work(&optimizing_work); 650 /* 'optimizing_work' might not have been queued yet, relax */ 651 cpu_relax(); 652 653 mutex_lock(&kprobe_mutex); 654 } 655 656 mutex_unlock(&kprobe_mutex); 657 } 658 659 bool optprobe_queued_unopt(struct optimized_kprobe *op) 660 { 661 struct optimized_kprobe *_op; 662 663 list_for_each_entry(_op, &unoptimizing_list, list) { 664 if (op == _op) 665 return true; 666 } 667 668 return false; 669 } 670 671 /* Optimize kprobe if p is ready to be optimized */ 672 static void optimize_kprobe(struct kprobe *p) 673 { 674 struct optimized_kprobe *op; 675 676 /* Check if the kprobe is disabled or not ready for optimization. */ 677 if (!kprobe_optready(p) || !kprobes_allow_optimization || 678 (kprobe_disabled(p) || kprobes_all_disarmed)) 679 return; 680 681 /* kprobes with 'post_handler' can not be optimized */ 682 if (p->post_handler) 683 return; 684 685 op = container_of(p, struct optimized_kprobe, kp); 686 687 /* Check there is no other kprobes at the optimized instructions */ 688 if (arch_check_optimized_kprobe(op) < 0) 689 return; 690 691 /* Check if it is already optimized. */ 692 if (op->kp.flags & KPROBE_FLAG_OPTIMIZED) { 693 if (optprobe_queued_unopt(op)) { 694 /* This is under unoptimizing. Just dequeue the probe */ 695 list_del_init(&op->list); 696 } 697 return; 698 } 699 op->kp.flags |= KPROBE_FLAG_OPTIMIZED; 700 701 /* 702 * On the 'unoptimizing_list' and 'optimizing_list', 703 * 'op' must have OPTIMIZED flag 704 */ 705 if (WARN_ON_ONCE(!list_empty(&op->list))) 706 return; 707 708 list_add(&op->list, &optimizing_list); 709 kick_kprobe_optimizer(); 710 } 711 712 /* Short cut to direct unoptimizing */ 713 static void force_unoptimize_kprobe(struct optimized_kprobe *op) 714 { 715 lockdep_assert_cpus_held(); 716 arch_unoptimize_kprobe(op); 717 op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED; 718 } 719 720 /* Unoptimize a kprobe if p is optimized */ 721 static void unoptimize_kprobe(struct kprobe *p, bool force) 722 { 723 struct optimized_kprobe *op; 724 725 if (!kprobe_aggrprobe(p) || kprobe_disarmed(p)) 726 return; /* This is not an optprobe nor optimized */ 727 728 op = container_of(p, struct optimized_kprobe, kp); 729 if (!kprobe_optimized(p)) 730 return; 731 732 if (!list_empty(&op->list)) { 733 if (optprobe_queued_unopt(op)) { 734 /* Queued in unoptimizing queue */ 735 if (force) { 736 /* 737 * Forcibly unoptimize the kprobe here, and queue it 738 * in the freeing list for release afterwards. 739 */ 740 force_unoptimize_kprobe(op); 741 list_move(&op->list, &freeing_list); 742 } 743 } else { 744 /* Dequeue from the optimizing queue */ 745 list_del_init(&op->list); 746 op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED; 747 } 748 return; 749 } 750 751 /* Optimized kprobe case */ 752 if (force) { 753 /* Forcibly update the code: this is a special case */ 754 force_unoptimize_kprobe(op); 755 } else { 756 list_add(&op->list, &unoptimizing_list); 757 kick_kprobe_optimizer(); 758 } 759 } 760 761 /* Cancel unoptimizing for reusing */ 762 static int reuse_unused_kprobe(struct kprobe *ap) 763 { 764 struct optimized_kprobe *op; 765 766 /* 767 * Unused kprobe MUST be on the way of delayed unoptimizing (means 768 * there is still a relative jump) and disabled. 769 */ 770 op = container_of(ap, struct optimized_kprobe, kp); 771 WARN_ON_ONCE(list_empty(&op->list)); 772 /* Enable the probe again */ 773 ap->flags &= ~KPROBE_FLAG_DISABLED; 774 /* Optimize it again. (remove from 'op->list') */ 775 if (!kprobe_optready(ap)) 776 return -EINVAL; 777 778 optimize_kprobe(ap); 779 return 0; 780 } 781 782 /* Remove optimized instructions */ 783 static void kill_optimized_kprobe(struct kprobe *p) 784 { 785 struct optimized_kprobe *op; 786 787 op = container_of(p, struct optimized_kprobe, kp); 788 if (!list_empty(&op->list)) 789 /* Dequeue from the (un)optimization queue */ 790 list_del_init(&op->list); 791 op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED; 792 793 if (kprobe_unused(p)) { 794 /* 795 * Unused kprobe is on unoptimizing or freeing list. We move it 796 * to freeing_list and let the kprobe_optimizer() remove it from 797 * the kprobe hash list and free it. 798 */ 799 if (optprobe_queued_unopt(op)) 800 list_move(&op->list, &freeing_list); 801 } 802 803 /* Don't touch the code, because it is already freed. */ 804 arch_remove_optimized_kprobe(op); 805 } 806 807 static inline 808 void __prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p) 809 { 810 if (!kprobe_ftrace(p)) 811 arch_prepare_optimized_kprobe(op, p); 812 } 813 814 /* Try to prepare optimized instructions */ 815 static void prepare_optimized_kprobe(struct kprobe *p) 816 { 817 struct optimized_kprobe *op; 818 819 op = container_of(p, struct optimized_kprobe, kp); 820 __prepare_optimized_kprobe(op, p); 821 } 822 823 /* Allocate new optimized_kprobe and try to prepare optimized instructions. */ 824 static struct kprobe *alloc_aggr_kprobe(struct kprobe *p) 825 { 826 struct optimized_kprobe *op; 827 828 op = kzalloc(sizeof(struct optimized_kprobe), GFP_KERNEL); 829 if (!op) 830 return NULL; 831 832 INIT_LIST_HEAD(&op->list); 833 op->kp.addr = p->addr; 834 __prepare_optimized_kprobe(op, p); 835 836 return &op->kp; 837 } 838 839 static void init_aggr_kprobe(struct kprobe *ap, struct kprobe *p); 840 841 /* 842 * Prepare an optimized_kprobe and optimize it. 843 * NOTE: 'p' must be a normal registered kprobe. 844 */ 845 static void try_to_optimize_kprobe(struct kprobe *p) 846 { 847 struct kprobe *ap; 848 struct optimized_kprobe *op; 849 850 /* Impossible to optimize ftrace-based kprobe. */ 851 if (kprobe_ftrace(p)) 852 return; 853 854 /* For preparing optimization, jump_label_text_reserved() is called. */ 855 cpus_read_lock(); 856 jump_label_lock(); 857 mutex_lock(&text_mutex); 858 859 ap = alloc_aggr_kprobe(p); 860 if (!ap) 861 goto out; 862 863 op = container_of(ap, struct optimized_kprobe, kp); 864 if (!arch_prepared_optinsn(&op->optinsn)) { 865 /* If failed to setup optimizing, fallback to kprobe. */ 866 arch_remove_optimized_kprobe(op); 867 kfree(op); 868 goto out; 869 } 870 871 init_aggr_kprobe(ap, p); 872 optimize_kprobe(ap); /* This just kicks optimizer thread. */ 873 874 out: 875 mutex_unlock(&text_mutex); 876 jump_label_unlock(); 877 cpus_read_unlock(); 878 } 879 880 static void optimize_all_kprobes(void) 881 { 882 struct hlist_head *head; 883 struct kprobe *p; 884 unsigned int i; 885 886 mutex_lock(&kprobe_mutex); 887 /* If optimization is already allowed, just return. */ 888 if (kprobes_allow_optimization) 889 goto out; 890 891 cpus_read_lock(); 892 kprobes_allow_optimization = true; 893 for (i = 0; i < KPROBE_TABLE_SIZE; i++) { 894 head = &kprobe_table[i]; 895 hlist_for_each_entry(p, head, hlist) 896 if (!kprobe_disabled(p)) 897 optimize_kprobe(p); 898 } 899 cpus_read_unlock(); 900 pr_info("kprobe jump-optimization is enabled. All kprobes are optimized if possible.\n"); 901 out: 902 mutex_unlock(&kprobe_mutex); 903 } 904 905 #ifdef CONFIG_SYSCTL 906 static void unoptimize_all_kprobes(void) 907 { 908 struct hlist_head *head; 909 struct kprobe *p; 910 unsigned int i; 911 912 mutex_lock(&kprobe_mutex); 913 /* If optimization is already prohibited, just return. */ 914 if (!kprobes_allow_optimization) { 915 mutex_unlock(&kprobe_mutex); 916 return; 917 } 918 919 cpus_read_lock(); 920 kprobes_allow_optimization = false; 921 for (i = 0; i < KPROBE_TABLE_SIZE; i++) { 922 head = &kprobe_table[i]; 923 hlist_for_each_entry(p, head, hlist) { 924 if (!kprobe_disabled(p)) 925 unoptimize_kprobe(p, false); 926 } 927 } 928 cpus_read_unlock(); 929 mutex_unlock(&kprobe_mutex); 930 931 /* Wait for unoptimizing completion. */ 932 wait_for_kprobe_optimizer(); 933 pr_info("kprobe jump-optimization is disabled. All kprobes are based on software breakpoint.\n"); 934 } 935 936 static DEFINE_MUTEX(kprobe_sysctl_mutex); 937 static int sysctl_kprobes_optimization; 938 static int proc_kprobes_optimization_handler(const struct ctl_table *table, 939 int write, void *buffer, 940 size_t *length, loff_t *ppos) 941 { 942 int ret; 943 944 mutex_lock(&kprobe_sysctl_mutex); 945 sysctl_kprobes_optimization = kprobes_allow_optimization ? 1 : 0; 946 ret = proc_dointvec_minmax(table, write, buffer, length, ppos); 947 948 if (sysctl_kprobes_optimization) 949 optimize_all_kprobes(); 950 else 951 unoptimize_all_kprobes(); 952 mutex_unlock(&kprobe_sysctl_mutex); 953 954 return ret; 955 } 956 957 static struct ctl_table kprobe_sysctls[] = { 958 { 959 .procname = "kprobes-optimization", 960 .data = &sysctl_kprobes_optimization, 961 .maxlen = sizeof(int), 962 .mode = 0644, 963 .proc_handler = proc_kprobes_optimization_handler, 964 .extra1 = SYSCTL_ZERO, 965 .extra2 = SYSCTL_ONE, 966 }, 967 }; 968 969 static void __init kprobe_sysctls_init(void) 970 { 971 register_sysctl_init("debug", kprobe_sysctls); 972 } 973 #endif /* CONFIG_SYSCTL */ 974 975 /* Put a breakpoint for a probe. */ 976 static void __arm_kprobe(struct kprobe *p) 977 { 978 struct kprobe *_p; 979 980 lockdep_assert_held(&text_mutex); 981 982 /* Find the overlapping optimized kprobes. */ 983 _p = get_optimized_kprobe(p->addr); 984 if (unlikely(_p)) 985 /* Fallback to unoptimized kprobe */ 986 unoptimize_kprobe(_p, true); 987 988 arch_arm_kprobe(p); 989 optimize_kprobe(p); /* Try to optimize (add kprobe to a list) */ 990 } 991 992 /* Remove the breakpoint of a probe. */ 993 static void __disarm_kprobe(struct kprobe *p, bool reopt) 994 { 995 struct kprobe *_p; 996 997 lockdep_assert_held(&text_mutex); 998 999 /* Try to unoptimize */ 1000 unoptimize_kprobe(p, kprobes_all_disarmed); 1001 1002 if (!kprobe_queued(p)) { 1003 arch_disarm_kprobe(p); 1004 /* If another kprobe was blocked, re-optimize it. */ 1005 _p = get_optimized_kprobe(p->addr); 1006 if (unlikely(_p) && reopt) 1007 optimize_kprobe(_p); 1008 } 1009 /* 1010 * TODO: Since unoptimization and real disarming will be done by 1011 * the worker thread, we can not check whether another probe are 1012 * unoptimized because of this probe here. It should be re-optimized 1013 * by the worker thread. 1014 */ 1015 } 1016 1017 #else /* !CONFIG_OPTPROBES */ 1018 1019 #define optimize_kprobe(p) do {} while (0) 1020 #define unoptimize_kprobe(p, f) do {} while (0) 1021 #define kill_optimized_kprobe(p) do {} while (0) 1022 #define prepare_optimized_kprobe(p) do {} while (0) 1023 #define try_to_optimize_kprobe(p) do {} while (0) 1024 #define __arm_kprobe(p) arch_arm_kprobe(p) 1025 #define __disarm_kprobe(p, o) arch_disarm_kprobe(p) 1026 #define kprobe_disarmed(p) kprobe_disabled(p) 1027 #define wait_for_kprobe_optimizer() do {} while (0) 1028 1029 static int reuse_unused_kprobe(struct kprobe *ap) 1030 { 1031 /* 1032 * If the optimized kprobe is NOT supported, the aggr kprobe is 1033 * released at the same time that the last aggregated kprobe is 1034 * unregistered. 1035 * Thus there should be no chance to reuse unused kprobe. 1036 */ 1037 WARN_ON_ONCE(1); 1038 return -EINVAL; 1039 } 1040 1041 static void free_aggr_kprobe(struct kprobe *p) 1042 { 1043 arch_remove_kprobe(p); 1044 kfree(p); 1045 } 1046 1047 static struct kprobe *alloc_aggr_kprobe(struct kprobe *p) 1048 { 1049 return kzalloc(sizeof(struct kprobe), GFP_KERNEL); 1050 } 1051 #endif /* CONFIG_OPTPROBES */ 1052 1053 #ifdef CONFIG_KPROBES_ON_FTRACE 1054 static struct ftrace_ops kprobe_ftrace_ops __read_mostly = { 1055 .func = kprobe_ftrace_handler, 1056 .flags = FTRACE_OPS_FL_SAVE_REGS, 1057 }; 1058 1059 static struct ftrace_ops kprobe_ipmodify_ops __read_mostly = { 1060 .func = kprobe_ftrace_handler, 1061 .flags = FTRACE_OPS_FL_SAVE_REGS | FTRACE_OPS_FL_IPMODIFY, 1062 }; 1063 1064 static int kprobe_ipmodify_enabled; 1065 static int kprobe_ftrace_enabled; 1066 bool kprobe_ftrace_disabled; 1067 1068 static int __arm_kprobe_ftrace(struct kprobe *p, struct ftrace_ops *ops, 1069 int *cnt) 1070 { 1071 int ret; 1072 1073 lockdep_assert_held(&kprobe_mutex); 1074 1075 ret = ftrace_set_filter_ip(ops, (unsigned long)p->addr, 0, 0); 1076 if (WARN_ONCE(ret < 0, "Failed to arm kprobe-ftrace at %pS (error %d)\n", p->addr, ret)) 1077 return ret; 1078 1079 if (*cnt == 0) { 1080 ret = register_ftrace_function(ops); 1081 if (WARN(ret < 0, "Failed to register kprobe-ftrace (error %d)\n", ret)) 1082 goto err_ftrace; 1083 } 1084 1085 (*cnt)++; 1086 return ret; 1087 1088 err_ftrace: 1089 /* 1090 * At this point, sinec ops is not registered, we should be sefe from 1091 * registering empty filter. 1092 */ 1093 ftrace_set_filter_ip(ops, (unsigned long)p->addr, 1, 0); 1094 return ret; 1095 } 1096 1097 static int arm_kprobe_ftrace(struct kprobe *p) 1098 { 1099 bool ipmodify = (p->post_handler != NULL); 1100 1101 return __arm_kprobe_ftrace(p, 1102 ipmodify ? &kprobe_ipmodify_ops : &kprobe_ftrace_ops, 1103 ipmodify ? &kprobe_ipmodify_enabled : &kprobe_ftrace_enabled); 1104 } 1105 1106 static int __disarm_kprobe_ftrace(struct kprobe *p, struct ftrace_ops *ops, 1107 int *cnt) 1108 { 1109 int ret; 1110 1111 lockdep_assert_held(&kprobe_mutex); 1112 1113 if (*cnt == 1) { 1114 ret = unregister_ftrace_function(ops); 1115 if (WARN(ret < 0, "Failed to unregister kprobe-ftrace (error %d)\n", ret)) 1116 return ret; 1117 } 1118 1119 (*cnt)--; 1120 1121 ret = ftrace_set_filter_ip(ops, (unsigned long)p->addr, 1, 0); 1122 WARN_ONCE(ret < 0, "Failed to disarm kprobe-ftrace at %pS (error %d)\n", 1123 p->addr, ret); 1124 return ret; 1125 } 1126 1127 static int disarm_kprobe_ftrace(struct kprobe *p) 1128 { 1129 bool ipmodify = (p->post_handler != NULL); 1130 1131 return __disarm_kprobe_ftrace(p, 1132 ipmodify ? &kprobe_ipmodify_ops : &kprobe_ftrace_ops, 1133 ipmodify ? &kprobe_ipmodify_enabled : &kprobe_ftrace_enabled); 1134 } 1135 1136 void kprobe_ftrace_kill(void) 1137 { 1138 kprobe_ftrace_disabled = true; 1139 } 1140 #else /* !CONFIG_KPROBES_ON_FTRACE */ 1141 static inline int arm_kprobe_ftrace(struct kprobe *p) 1142 { 1143 return -ENODEV; 1144 } 1145 1146 static inline int disarm_kprobe_ftrace(struct kprobe *p) 1147 { 1148 return -ENODEV; 1149 } 1150 #endif 1151 1152 static int prepare_kprobe(struct kprobe *p) 1153 { 1154 /* Must ensure p->addr is really on ftrace */ 1155 if (kprobe_ftrace(p)) 1156 return arch_prepare_kprobe_ftrace(p); 1157 1158 return arch_prepare_kprobe(p); 1159 } 1160 1161 static int arm_kprobe(struct kprobe *kp) 1162 { 1163 if (unlikely(kprobe_ftrace(kp))) 1164 return arm_kprobe_ftrace(kp); 1165 1166 cpus_read_lock(); 1167 mutex_lock(&text_mutex); 1168 __arm_kprobe(kp); 1169 mutex_unlock(&text_mutex); 1170 cpus_read_unlock(); 1171 1172 return 0; 1173 } 1174 1175 static int disarm_kprobe(struct kprobe *kp, bool reopt) 1176 { 1177 if (unlikely(kprobe_ftrace(kp))) 1178 return disarm_kprobe_ftrace(kp); 1179 1180 cpus_read_lock(); 1181 mutex_lock(&text_mutex); 1182 __disarm_kprobe(kp, reopt); 1183 mutex_unlock(&text_mutex); 1184 cpus_read_unlock(); 1185 1186 return 0; 1187 } 1188 1189 /* 1190 * Aggregate handlers for multiple kprobes support - these handlers 1191 * take care of invoking the individual kprobe handlers on p->list 1192 */ 1193 static int aggr_pre_handler(struct kprobe *p, struct pt_regs *regs) 1194 { 1195 struct kprobe *kp; 1196 1197 list_for_each_entry_rcu(kp, &p->list, list) { 1198 if (kp->pre_handler && likely(!kprobe_disabled(kp))) { 1199 set_kprobe_instance(kp); 1200 if (kp->pre_handler(kp, regs)) 1201 return 1; 1202 } 1203 reset_kprobe_instance(); 1204 } 1205 return 0; 1206 } 1207 NOKPROBE_SYMBOL(aggr_pre_handler); 1208 1209 static void aggr_post_handler(struct kprobe *p, struct pt_regs *regs, 1210 unsigned long flags) 1211 { 1212 struct kprobe *kp; 1213 1214 list_for_each_entry_rcu(kp, &p->list, list) { 1215 if (kp->post_handler && likely(!kprobe_disabled(kp))) { 1216 set_kprobe_instance(kp); 1217 kp->post_handler(kp, regs, flags); 1218 reset_kprobe_instance(); 1219 } 1220 } 1221 } 1222 NOKPROBE_SYMBOL(aggr_post_handler); 1223 1224 /* Walks the list and increments 'nmissed' if 'p' has child probes. */ 1225 void kprobes_inc_nmissed_count(struct kprobe *p) 1226 { 1227 struct kprobe *kp; 1228 1229 if (!kprobe_aggrprobe(p)) { 1230 p->nmissed++; 1231 } else { 1232 list_for_each_entry_rcu(kp, &p->list, list) 1233 kp->nmissed++; 1234 } 1235 } 1236 NOKPROBE_SYMBOL(kprobes_inc_nmissed_count); 1237 1238 static struct kprobe kprobe_busy = { 1239 .addr = (void *) get_kprobe, 1240 }; 1241 1242 void kprobe_busy_begin(void) 1243 { 1244 struct kprobe_ctlblk *kcb; 1245 1246 preempt_disable(); 1247 __this_cpu_write(current_kprobe, &kprobe_busy); 1248 kcb = get_kprobe_ctlblk(); 1249 kcb->kprobe_status = KPROBE_HIT_ACTIVE; 1250 } 1251 1252 void kprobe_busy_end(void) 1253 { 1254 __this_cpu_write(current_kprobe, NULL); 1255 preempt_enable(); 1256 } 1257 1258 /* Add the new probe to 'ap->list'. */ 1259 static int add_new_kprobe(struct kprobe *ap, struct kprobe *p) 1260 { 1261 if (p->post_handler) 1262 unoptimize_kprobe(ap, true); /* Fall back to normal kprobe */ 1263 1264 list_add_rcu(&p->list, &ap->list); 1265 if (p->post_handler && !ap->post_handler) 1266 ap->post_handler = aggr_post_handler; 1267 1268 return 0; 1269 } 1270 1271 /* 1272 * Fill in the required fields of the aggregator kprobe. Replace the 1273 * earlier kprobe in the hlist with the aggregator kprobe. 1274 */ 1275 static void init_aggr_kprobe(struct kprobe *ap, struct kprobe *p) 1276 { 1277 /* Copy the insn slot of 'p' to 'ap'. */ 1278 copy_kprobe(p, ap); 1279 flush_insn_slot(ap); 1280 ap->addr = p->addr; 1281 ap->flags = p->flags & ~KPROBE_FLAG_OPTIMIZED; 1282 ap->pre_handler = aggr_pre_handler; 1283 /* We don't care the kprobe which has gone. */ 1284 if (p->post_handler && !kprobe_gone(p)) 1285 ap->post_handler = aggr_post_handler; 1286 1287 INIT_LIST_HEAD(&ap->list); 1288 INIT_HLIST_NODE(&ap->hlist); 1289 1290 list_add_rcu(&p->list, &ap->list); 1291 hlist_replace_rcu(&p->hlist, &ap->hlist); 1292 } 1293 1294 /* 1295 * This registers the second or subsequent kprobe at the same address. 1296 */ 1297 static int register_aggr_kprobe(struct kprobe *orig_p, struct kprobe *p) 1298 { 1299 int ret = 0; 1300 struct kprobe *ap = orig_p; 1301 1302 cpus_read_lock(); 1303 1304 /* For preparing optimization, jump_label_text_reserved() is called */ 1305 jump_label_lock(); 1306 mutex_lock(&text_mutex); 1307 1308 if (!kprobe_aggrprobe(orig_p)) { 1309 /* If 'orig_p' is not an 'aggr_kprobe', create new one. */ 1310 ap = alloc_aggr_kprobe(orig_p); 1311 if (!ap) { 1312 ret = -ENOMEM; 1313 goto out; 1314 } 1315 init_aggr_kprobe(ap, orig_p); 1316 } else if (kprobe_unused(ap)) { 1317 /* This probe is going to die. Rescue it */ 1318 ret = reuse_unused_kprobe(ap); 1319 if (ret) 1320 goto out; 1321 } 1322 1323 if (kprobe_gone(ap)) { 1324 /* 1325 * Attempting to insert new probe at the same location that 1326 * had a probe in the module vaddr area which already 1327 * freed. So, the instruction slot has already been 1328 * released. We need a new slot for the new probe. 1329 */ 1330 ret = arch_prepare_kprobe(ap); 1331 if (ret) 1332 /* 1333 * Even if fail to allocate new slot, don't need to 1334 * free the 'ap'. It will be used next time, or 1335 * freed by unregister_kprobe(). 1336 */ 1337 goto out; 1338 1339 /* Prepare optimized instructions if possible. */ 1340 prepare_optimized_kprobe(ap); 1341 1342 /* 1343 * Clear gone flag to prevent allocating new slot again, and 1344 * set disabled flag because it is not armed yet. 1345 */ 1346 ap->flags = (ap->flags & ~KPROBE_FLAG_GONE) 1347 | KPROBE_FLAG_DISABLED; 1348 } 1349 1350 /* Copy the insn slot of 'p' to 'ap'. */ 1351 copy_kprobe(ap, p); 1352 ret = add_new_kprobe(ap, p); 1353 1354 out: 1355 mutex_unlock(&text_mutex); 1356 jump_label_unlock(); 1357 cpus_read_unlock(); 1358 1359 if (ret == 0 && kprobe_disabled(ap) && !kprobe_disabled(p)) { 1360 ap->flags &= ~KPROBE_FLAG_DISABLED; 1361 if (!kprobes_all_disarmed) { 1362 /* Arm the breakpoint again. */ 1363 ret = arm_kprobe(ap); 1364 if (ret) { 1365 ap->flags |= KPROBE_FLAG_DISABLED; 1366 list_del_rcu(&p->list); 1367 synchronize_rcu(); 1368 } 1369 } 1370 } 1371 return ret; 1372 } 1373 1374 bool __weak arch_within_kprobe_blacklist(unsigned long addr) 1375 { 1376 /* The '__kprobes' functions and entry code must not be probed. */ 1377 return addr >= (unsigned long)__kprobes_text_start && 1378 addr < (unsigned long)__kprobes_text_end; 1379 } 1380 1381 static bool __within_kprobe_blacklist(unsigned long addr) 1382 { 1383 struct kprobe_blacklist_entry *ent; 1384 1385 if (arch_within_kprobe_blacklist(addr)) 1386 return true; 1387 /* 1388 * If 'kprobe_blacklist' is defined, check the address and 1389 * reject any probe registration in the prohibited area. 1390 */ 1391 list_for_each_entry(ent, &kprobe_blacklist, list) { 1392 if (addr >= ent->start_addr && addr < ent->end_addr) 1393 return true; 1394 } 1395 return false; 1396 } 1397 1398 bool within_kprobe_blacklist(unsigned long addr) 1399 { 1400 char symname[KSYM_NAME_LEN], *p; 1401 1402 if (__within_kprobe_blacklist(addr)) 1403 return true; 1404 1405 /* Check if the address is on a suffixed-symbol */ 1406 if (!lookup_symbol_name(addr, symname)) { 1407 p = strchr(symname, '.'); 1408 if (!p) 1409 return false; 1410 *p = '\0'; 1411 addr = (unsigned long)kprobe_lookup_name(symname, 0); 1412 if (addr) 1413 return __within_kprobe_blacklist(addr); 1414 } 1415 return false; 1416 } 1417 1418 /* 1419 * arch_adjust_kprobe_addr - adjust the address 1420 * @addr: symbol base address 1421 * @offset: offset within the symbol 1422 * @on_func_entry: was this @addr+@offset on the function entry 1423 * 1424 * Typically returns @addr + @offset, except for special cases where the 1425 * function might be prefixed by a CFI landing pad, in that case any offset 1426 * inside the landing pad is mapped to the first 'real' instruction of the 1427 * symbol. 1428 * 1429 * Specifically, for things like IBT/BTI, skip the resp. ENDBR/BTI.C 1430 * instruction at +0. 1431 */ 1432 kprobe_opcode_t *__weak arch_adjust_kprobe_addr(unsigned long addr, 1433 unsigned long offset, 1434 bool *on_func_entry) 1435 { 1436 *on_func_entry = !offset; 1437 return (kprobe_opcode_t *)(addr + offset); 1438 } 1439 1440 /* 1441 * If 'symbol_name' is specified, look it up and add the 'offset' 1442 * to it. This way, we can specify a relative address to a symbol. 1443 * This returns encoded errors if it fails to look up symbol or invalid 1444 * combination of parameters. 1445 */ 1446 static kprobe_opcode_t * 1447 _kprobe_addr(kprobe_opcode_t *addr, const char *symbol_name, 1448 unsigned long offset, bool *on_func_entry) 1449 { 1450 if ((symbol_name && addr) || (!symbol_name && !addr)) 1451 goto invalid; 1452 1453 if (symbol_name) { 1454 /* 1455 * Input: @sym + @offset 1456 * Output: @addr + @offset 1457 * 1458 * NOTE: kprobe_lookup_name() does *NOT* fold the offset 1459 * argument into it's output! 1460 */ 1461 addr = kprobe_lookup_name(symbol_name, offset); 1462 if (!addr) 1463 return ERR_PTR(-ENOENT); 1464 } 1465 1466 /* 1467 * So here we have @addr + @offset, displace it into a new 1468 * @addr' + @offset' where @addr' is the symbol start address. 1469 */ 1470 addr = (void *)addr + offset; 1471 if (!kallsyms_lookup_size_offset((unsigned long)addr, NULL, &offset)) 1472 return ERR_PTR(-ENOENT); 1473 addr = (void *)addr - offset; 1474 1475 /* 1476 * Then ask the architecture to re-combine them, taking care of 1477 * magical function entry details while telling us if this was indeed 1478 * at the start of the function. 1479 */ 1480 addr = arch_adjust_kprobe_addr((unsigned long)addr, offset, on_func_entry); 1481 if (addr) 1482 return addr; 1483 1484 invalid: 1485 return ERR_PTR(-EINVAL); 1486 } 1487 1488 static kprobe_opcode_t *kprobe_addr(struct kprobe *p) 1489 { 1490 bool on_func_entry; 1491 return _kprobe_addr(p->addr, p->symbol_name, p->offset, &on_func_entry); 1492 } 1493 1494 /* 1495 * Check the 'p' is valid and return the aggregator kprobe 1496 * at the same address. 1497 */ 1498 static struct kprobe *__get_valid_kprobe(struct kprobe *p) 1499 { 1500 struct kprobe *ap, *list_p; 1501 1502 lockdep_assert_held(&kprobe_mutex); 1503 1504 ap = get_kprobe(p->addr); 1505 if (unlikely(!ap)) 1506 return NULL; 1507 1508 if (p != ap) { 1509 list_for_each_entry(list_p, &ap->list, list) 1510 if (list_p == p) 1511 /* kprobe p is a valid probe */ 1512 goto valid; 1513 return NULL; 1514 } 1515 valid: 1516 return ap; 1517 } 1518 1519 /* 1520 * Warn and return error if the kprobe is being re-registered since 1521 * there must be a software bug. 1522 */ 1523 static inline int warn_kprobe_rereg(struct kprobe *p) 1524 { 1525 int ret = 0; 1526 1527 mutex_lock(&kprobe_mutex); 1528 if (WARN_ON_ONCE(__get_valid_kprobe(p))) 1529 ret = -EINVAL; 1530 mutex_unlock(&kprobe_mutex); 1531 1532 return ret; 1533 } 1534 1535 static int check_ftrace_location(struct kprobe *p) 1536 { 1537 unsigned long addr = (unsigned long)p->addr; 1538 1539 if (ftrace_location(addr) == addr) { 1540 #ifdef CONFIG_KPROBES_ON_FTRACE 1541 p->flags |= KPROBE_FLAG_FTRACE; 1542 #else 1543 return -EINVAL; 1544 #endif 1545 } 1546 return 0; 1547 } 1548 1549 static bool is_cfi_preamble_symbol(unsigned long addr) 1550 { 1551 char symbuf[KSYM_NAME_LEN]; 1552 1553 if (lookup_symbol_name(addr, symbuf)) 1554 return false; 1555 1556 return str_has_prefix(symbuf, "__cfi_") || 1557 str_has_prefix(symbuf, "__pfx_"); 1558 } 1559 1560 static int check_kprobe_address_safe(struct kprobe *p, 1561 struct module **probed_mod) 1562 { 1563 int ret; 1564 1565 ret = check_ftrace_location(p); 1566 if (ret) 1567 return ret; 1568 jump_label_lock(); 1569 preempt_disable(); 1570 1571 /* Ensure the address is in a text area, and find a module if exists. */ 1572 *probed_mod = NULL; 1573 if (!core_kernel_text((unsigned long) p->addr)) { 1574 *probed_mod = __module_text_address((unsigned long) p->addr); 1575 if (!(*probed_mod)) { 1576 ret = -EINVAL; 1577 goto out; 1578 } 1579 } 1580 /* Ensure it is not in reserved area. */ 1581 if (in_gate_area_no_mm((unsigned long) p->addr) || 1582 within_kprobe_blacklist((unsigned long) p->addr) || 1583 jump_label_text_reserved(p->addr, p->addr) || 1584 static_call_text_reserved(p->addr, p->addr) || 1585 find_bug((unsigned long)p->addr) || 1586 is_cfi_preamble_symbol((unsigned long)p->addr)) { 1587 ret = -EINVAL; 1588 goto out; 1589 } 1590 1591 /* Get module refcount and reject __init functions for loaded modules. */ 1592 if (IS_ENABLED(CONFIG_MODULES) && *probed_mod) { 1593 /* 1594 * We must hold a refcount of the probed module while updating 1595 * its code to prohibit unexpected unloading. 1596 */ 1597 if (unlikely(!try_module_get(*probed_mod))) { 1598 ret = -ENOENT; 1599 goto out; 1600 } 1601 1602 /* 1603 * If the module freed '.init.text', we couldn't insert 1604 * kprobes in there. 1605 */ 1606 if (within_module_init((unsigned long)p->addr, *probed_mod) && 1607 !module_is_coming(*probed_mod)) { 1608 module_put(*probed_mod); 1609 *probed_mod = NULL; 1610 ret = -ENOENT; 1611 } 1612 } 1613 1614 out: 1615 preempt_enable(); 1616 jump_label_unlock(); 1617 1618 return ret; 1619 } 1620 1621 int register_kprobe(struct kprobe *p) 1622 { 1623 int ret; 1624 struct kprobe *old_p; 1625 struct module *probed_mod; 1626 kprobe_opcode_t *addr; 1627 bool on_func_entry; 1628 1629 /* Adjust probe address from symbol */ 1630 addr = _kprobe_addr(p->addr, p->symbol_name, p->offset, &on_func_entry); 1631 if (IS_ERR(addr)) 1632 return PTR_ERR(addr); 1633 p->addr = addr; 1634 1635 ret = warn_kprobe_rereg(p); 1636 if (ret) 1637 return ret; 1638 1639 /* User can pass only KPROBE_FLAG_DISABLED to register_kprobe */ 1640 p->flags &= KPROBE_FLAG_DISABLED; 1641 p->nmissed = 0; 1642 INIT_LIST_HEAD(&p->list); 1643 1644 ret = check_kprobe_address_safe(p, &probed_mod); 1645 if (ret) 1646 return ret; 1647 1648 mutex_lock(&kprobe_mutex); 1649 1650 if (on_func_entry) 1651 p->flags |= KPROBE_FLAG_ON_FUNC_ENTRY; 1652 1653 old_p = get_kprobe(p->addr); 1654 if (old_p) { 1655 /* Since this may unoptimize 'old_p', locking 'text_mutex'. */ 1656 ret = register_aggr_kprobe(old_p, p); 1657 goto out; 1658 } 1659 1660 cpus_read_lock(); 1661 /* Prevent text modification */ 1662 mutex_lock(&text_mutex); 1663 ret = prepare_kprobe(p); 1664 mutex_unlock(&text_mutex); 1665 cpus_read_unlock(); 1666 if (ret) 1667 goto out; 1668 1669 INIT_HLIST_NODE(&p->hlist); 1670 hlist_add_head_rcu(&p->hlist, 1671 &kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]); 1672 1673 if (!kprobes_all_disarmed && !kprobe_disabled(p)) { 1674 ret = arm_kprobe(p); 1675 if (ret) { 1676 hlist_del_rcu(&p->hlist); 1677 synchronize_rcu(); 1678 goto out; 1679 } 1680 } 1681 1682 /* Try to optimize kprobe */ 1683 try_to_optimize_kprobe(p); 1684 out: 1685 mutex_unlock(&kprobe_mutex); 1686 1687 if (probed_mod) 1688 module_put(probed_mod); 1689 1690 return ret; 1691 } 1692 EXPORT_SYMBOL_GPL(register_kprobe); 1693 1694 /* Check if all probes on the 'ap' are disabled. */ 1695 static bool aggr_kprobe_disabled(struct kprobe *ap) 1696 { 1697 struct kprobe *kp; 1698 1699 lockdep_assert_held(&kprobe_mutex); 1700 1701 list_for_each_entry(kp, &ap->list, list) 1702 if (!kprobe_disabled(kp)) 1703 /* 1704 * Since there is an active probe on the list, 1705 * we can't disable this 'ap'. 1706 */ 1707 return false; 1708 1709 return true; 1710 } 1711 1712 static struct kprobe *__disable_kprobe(struct kprobe *p) 1713 { 1714 struct kprobe *orig_p; 1715 int ret; 1716 1717 lockdep_assert_held(&kprobe_mutex); 1718 1719 /* Get an original kprobe for return */ 1720 orig_p = __get_valid_kprobe(p); 1721 if (unlikely(orig_p == NULL)) 1722 return ERR_PTR(-EINVAL); 1723 1724 if (kprobe_disabled(p)) 1725 return orig_p; 1726 1727 /* Disable probe if it is a child probe */ 1728 if (p != orig_p) 1729 p->flags |= KPROBE_FLAG_DISABLED; 1730 1731 /* Try to disarm and disable this/parent probe */ 1732 if (p == orig_p || aggr_kprobe_disabled(orig_p)) { 1733 /* 1734 * Don't be lazy here. Even if 'kprobes_all_disarmed' 1735 * is false, 'orig_p' might not have been armed yet. 1736 * Note arm_all_kprobes() __tries__ to arm all kprobes 1737 * on the best effort basis. 1738 */ 1739 if (!kprobes_all_disarmed && !kprobe_disabled(orig_p)) { 1740 ret = disarm_kprobe(orig_p, true); 1741 if (ret) { 1742 p->flags &= ~KPROBE_FLAG_DISABLED; 1743 return ERR_PTR(ret); 1744 } 1745 } 1746 orig_p->flags |= KPROBE_FLAG_DISABLED; 1747 } 1748 1749 return orig_p; 1750 } 1751 1752 /* 1753 * Unregister a kprobe without a scheduler synchronization. 1754 */ 1755 static int __unregister_kprobe_top(struct kprobe *p) 1756 { 1757 struct kprobe *ap, *list_p; 1758 1759 /* Disable kprobe. This will disarm it if needed. */ 1760 ap = __disable_kprobe(p); 1761 if (IS_ERR(ap)) 1762 return PTR_ERR(ap); 1763 1764 if (ap == p) 1765 /* 1766 * This probe is an independent(and non-optimized) kprobe 1767 * (not an aggrprobe). Remove from the hash list. 1768 */ 1769 goto disarmed; 1770 1771 /* Following process expects this probe is an aggrprobe */ 1772 WARN_ON(!kprobe_aggrprobe(ap)); 1773 1774 if (list_is_singular(&ap->list) && kprobe_disarmed(ap)) 1775 /* 1776 * !disarmed could be happen if the probe is under delayed 1777 * unoptimizing. 1778 */ 1779 goto disarmed; 1780 else { 1781 /* If disabling probe has special handlers, update aggrprobe */ 1782 if (p->post_handler && !kprobe_gone(p)) { 1783 list_for_each_entry(list_p, &ap->list, list) { 1784 if ((list_p != p) && (list_p->post_handler)) 1785 goto noclean; 1786 } 1787 /* 1788 * For the kprobe-on-ftrace case, we keep the 1789 * post_handler setting to identify this aggrprobe 1790 * armed with kprobe_ipmodify_ops. 1791 */ 1792 if (!kprobe_ftrace(ap)) 1793 ap->post_handler = NULL; 1794 } 1795 noclean: 1796 /* 1797 * Remove from the aggrprobe: this path will do nothing in 1798 * __unregister_kprobe_bottom(). 1799 */ 1800 list_del_rcu(&p->list); 1801 if (!kprobe_disabled(ap) && !kprobes_all_disarmed) 1802 /* 1803 * Try to optimize this probe again, because post 1804 * handler may have been changed. 1805 */ 1806 optimize_kprobe(ap); 1807 } 1808 return 0; 1809 1810 disarmed: 1811 hlist_del_rcu(&ap->hlist); 1812 return 0; 1813 } 1814 1815 static void __unregister_kprobe_bottom(struct kprobe *p) 1816 { 1817 struct kprobe *ap; 1818 1819 if (list_empty(&p->list)) 1820 /* This is an independent kprobe */ 1821 arch_remove_kprobe(p); 1822 else if (list_is_singular(&p->list)) { 1823 /* This is the last child of an aggrprobe */ 1824 ap = list_entry(p->list.next, struct kprobe, list); 1825 list_del(&p->list); 1826 free_aggr_kprobe(ap); 1827 } 1828 /* Otherwise, do nothing. */ 1829 } 1830 1831 int register_kprobes(struct kprobe **kps, int num) 1832 { 1833 int i, ret = 0; 1834 1835 if (num <= 0) 1836 return -EINVAL; 1837 for (i = 0; i < num; i++) { 1838 ret = register_kprobe(kps[i]); 1839 if (ret < 0) { 1840 if (i > 0) 1841 unregister_kprobes(kps, i); 1842 break; 1843 } 1844 } 1845 return ret; 1846 } 1847 EXPORT_SYMBOL_GPL(register_kprobes); 1848 1849 void unregister_kprobe(struct kprobe *p) 1850 { 1851 unregister_kprobes(&p, 1); 1852 } 1853 EXPORT_SYMBOL_GPL(unregister_kprobe); 1854 1855 void unregister_kprobes(struct kprobe **kps, int num) 1856 { 1857 int i; 1858 1859 if (num <= 0) 1860 return; 1861 mutex_lock(&kprobe_mutex); 1862 for (i = 0; i < num; i++) 1863 if (__unregister_kprobe_top(kps[i]) < 0) 1864 kps[i]->addr = NULL; 1865 mutex_unlock(&kprobe_mutex); 1866 1867 synchronize_rcu(); 1868 for (i = 0; i < num; i++) 1869 if (kps[i]->addr) 1870 __unregister_kprobe_bottom(kps[i]); 1871 } 1872 EXPORT_SYMBOL_GPL(unregister_kprobes); 1873 1874 int __weak kprobe_exceptions_notify(struct notifier_block *self, 1875 unsigned long val, void *data) 1876 { 1877 return NOTIFY_DONE; 1878 } 1879 NOKPROBE_SYMBOL(kprobe_exceptions_notify); 1880 1881 static struct notifier_block kprobe_exceptions_nb = { 1882 .notifier_call = kprobe_exceptions_notify, 1883 .priority = 0x7fffffff /* we need to be notified first */ 1884 }; 1885 1886 #ifdef CONFIG_KRETPROBES 1887 1888 #if !defined(CONFIG_KRETPROBE_ON_RETHOOK) 1889 1890 /* callbacks for objpool of kretprobe instances */ 1891 static int kretprobe_init_inst(void *nod, void *context) 1892 { 1893 struct kretprobe_instance *ri = nod; 1894 1895 ri->rph = context; 1896 return 0; 1897 } 1898 static int kretprobe_fini_pool(struct objpool_head *head, void *context) 1899 { 1900 kfree(context); 1901 return 0; 1902 } 1903 1904 static void free_rp_inst_rcu(struct rcu_head *head) 1905 { 1906 struct kretprobe_instance *ri = container_of(head, struct kretprobe_instance, rcu); 1907 struct kretprobe_holder *rph = ri->rph; 1908 1909 objpool_drop(ri, &rph->pool); 1910 } 1911 NOKPROBE_SYMBOL(free_rp_inst_rcu); 1912 1913 static void recycle_rp_inst(struct kretprobe_instance *ri) 1914 { 1915 struct kretprobe *rp = get_kretprobe(ri); 1916 1917 if (likely(rp)) 1918 objpool_push(ri, &rp->rph->pool); 1919 else 1920 call_rcu(&ri->rcu, free_rp_inst_rcu); 1921 } 1922 NOKPROBE_SYMBOL(recycle_rp_inst); 1923 1924 /* 1925 * This function is called from delayed_put_task_struct() when a task is 1926 * dead and cleaned up to recycle any kretprobe instances associated with 1927 * this task. These left over instances represent probed functions that 1928 * have been called but will never return. 1929 */ 1930 void kprobe_flush_task(struct task_struct *tk) 1931 { 1932 struct kretprobe_instance *ri; 1933 struct llist_node *node; 1934 1935 /* Early boot, not yet initialized. */ 1936 if (unlikely(!kprobes_initialized)) 1937 return; 1938 1939 kprobe_busy_begin(); 1940 1941 node = __llist_del_all(&tk->kretprobe_instances); 1942 while (node) { 1943 ri = container_of(node, struct kretprobe_instance, llist); 1944 node = node->next; 1945 1946 recycle_rp_inst(ri); 1947 } 1948 1949 kprobe_busy_end(); 1950 } 1951 NOKPROBE_SYMBOL(kprobe_flush_task); 1952 1953 static inline void free_rp_inst(struct kretprobe *rp) 1954 { 1955 struct kretprobe_holder *rph = rp->rph; 1956 1957 if (!rph) 1958 return; 1959 rp->rph = NULL; 1960 objpool_fini(&rph->pool); 1961 } 1962 1963 /* This assumes the 'tsk' is the current task or the is not running. */ 1964 static kprobe_opcode_t *__kretprobe_find_ret_addr(struct task_struct *tsk, 1965 struct llist_node **cur) 1966 { 1967 struct kretprobe_instance *ri = NULL; 1968 struct llist_node *node = *cur; 1969 1970 if (!node) 1971 node = tsk->kretprobe_instances.first; 1972 else 1973 node = node->next; 1974 1975 while (node) { 1976 ri = container_of(node, struct kretprobe_instance, llist); 1977 if (ri->ret_addr != kretprobe_trampoline_addr()) { 1978 *cur = node; 1979 return ri->ret_addr; 1980 } 1981 node = node->next; 1982 } 1983 return NULL; 1984 } 1985 NOKPROBE_SYMBOL(__kretprobe_find_ret_addr); 1986 1987 /** 1988 * kretprobe_find_ret_addr -- Find correct return address modified by kretprobe 1989 * @tsk: Target task 1990 * @fp: A frame pointer 1991 * @cur: a storage of the loop cursor llist_node pointer for next call 1992 * 1993 * Find the correct return address modified by a kretprobe on @tsk in unsigned 1994 * long type. If it finds the return address, this returns that address value, 1995 * or this returns 0. 1996 * The @tsk must be 'current' or a task which is not running. @fp is a hint 1997 * to get the currect return address - which is compared with the 1998 * kretprobe_instance::fp field. The @cur is a loop cursor for searching the 1999 * kretprobe return addresses on the @tsk. The '*@cur' should be NULL at the 2000 * first call, but '@cur' itself must NOT NULL. 2001 */ 2002 unsigned long kretprobe_find_ret_addr(struct task_struct *tsk, void *fp, 2003 struct llist_node **cur) 2004 { 2005 struct kretprobe_instance *ri; 2006 kprobe_opcode_t *ret; 2007 2008 if (WARN_ON_ONCE(!cur)) 2009 return 0; 2010 2011 do { 2012 ret = __kretprobe_find_ret_addr(tsk, cur); 2013 if (!ret) 2014 break; 2015 ri = container_of(*cur, struct kretprobe_instance, llist); 2016 } while (ri->fp != fp); 2017 2018 return (unsigned long)ret; 2019 } 2020 NOKPROBE_SYMBOL(kretprobe_find_ret_addr); 2021 2022 void __weak arch_kretprobe_fixup_return(struct pt_regs *regs, 2023 kprobe_opcode_t *correct_ret_addr) 2024 { 2025 /* 2026 * Do nothing by default. Please fill this to update the fake return 2027 * address on the stack with the correct one on each arch if possible. 2028 */ 2029 } 2030 2031 unsigned long __kretprobe_trampoline_handler(struct pt_regs *regs, 2032 void *frame_pointer) 2033 { 2034 struct kretprobe_instance *ri = NULL; 2035 struct llist_node *first, *node = NULL; 2036 kprobe_opcode_t *correct_ret_addr; 2037 struct kretprobe *rp; 2038 2039 /* Find correct address and all nodes for this frame. */ 2040 correct_ret_addr = __kretprobe_find_ret_addr(current, &node); 2041 if (!correct_ret_addr) { 2042 pr_err("kretprobe: Return address not found, not execute handler. Maybe there is a bug in the kernel.\n"); 2043 BUG_ON(1); 2044 } 2045 2046 /* 2047 * Set the return address as the instruction pointer, because if the 2048 * user handler calls stack_trace_save_regs() with this 'regs', 2049 * the stack trace will start from the instruction pointer. 2050 */ 2051 instruction_pointer_set(regs, (unsigned long)correct_ret_addr); 2052 2053 /* Run the user handler of the nodes. */ 2054 first = current->kretprobe_instances.first; 2055 while (first) { 2056 ri = container_of(first, struct kretprobe_instance, llist); 2057 2058 if (WARN_ON_ONCE(ri->fp != frame_pointer)) 2059 break; 2060 2061 rp = get_kretprobe(ri); 2062 if (rp && rp->handler) { 2063 struct kprobe *prev = kprobe_running(); 2064 2065 __this_cpu_write(current_kprobe, &rp->kp); 2066 ri->ret_addr = correct_ret_addr; 2067 rp->handler(ri, regs); 2068 __this_cpu_write(current_kprobe, prev); 2069 } 2070 if (first == node) 2071 break; 2072 2073 first = first->next; 2074 } 2075 2076 arch_kretprobe_fixup_return(regs, correct_ret_addr); 2077 2078 /* Unlink all nodes for this frame. */ 2079 first = current->kretprobe_instances.first; 2080 current->kretprobe_instances.first = node->next; 2081 node->next = NULL; 2082 2083 /* Recycle free instances. */ 2084 while (first) { 2085 ri = container_of(first, struct kretprobe_instance, llist); 2086 first = first->next; 2087 2088 recycle_rp_inst(ri); 2089 } 2090 2091 return (unsigned long)correct_ret_addr; 2092 } 2093 NOKPROBE_SYMBOL(__kretprobe_trampoline_handler) 2094 2095 /* 2096 * This kprobe pre_handler is registered with every kretprobe. When probe 2097 * hits it will set up the return probe. 2098 */ 2099 static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs) 2100 { 2101 struct kretprobe *rp = container_of(p, struct kretprobe, kp); 2102 struct kretprobe_holder *rph = rp->rph; 2103 struct kretprobe_instance *ri; 2104 2105 ri = objpool_pop(&rph->pool); 2106 if (!ri) { 2107 rp->nmissed++; 2108 return 0; 2109 } 2110 2111 if (rp->entry_handler && rp->entry_handler(ri, regs)) { 2112 objpool_push(ri, &rph->pool); 2113 return 0; 2114 } 2115 2116 arch_prepare_kretprobe(ri, regs); 2117 2118 __llist_add(&ri->llist, ¤t->kretprobe_instances); 2119 2120 return 0; 2121 } 2122 NOKPROBE_SYMBOL(pre_handler_kretprobe); 2123 #else /* CONFIG_KRETPROBE_ON_RETHOOK */ 2124 /* 2125 * This kprobe pre_handler is registered with every kretprobe. When probe 2126 * hits it will set up the return probe. 2127 */ 2128 static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs) 2129 { 2130 struct kretprobe *rp = container_of(p, struct kretprobe, kp); 2131 struct kretprobe_instance *ri; 2132 struct rethook_node *rhn; 2133 2134 rhn = rethook_try_get(rp->rh); 2135 if (!rhn) { 2136 rp->nmissed++; 2137 return 0; 2138 } 2139 2140 ri = container_of(rhn, struct kretprobe_instance, node); 2141 2142 if (rp->entry_handler && rp->entry_handler(ri, regs)) 2143 rethook_recycle(rhn); 2144 else 2145 rethook_hook(rhn, regs, kprobe_ftrace(p)); 2146 2147 return 0; 2148 } 2149 NOKPROBE_SYMBOL(pre_handler_kretprobe); 2150 2151 static void kretprobe_rethook_handler(struct rethook_node *rh, void *data, 2152 unsigned long ret_addr, 2153 struct pt_regs *regs) 2154 { 2155 struct kretprobe *rp = (struct kretprobe *)data; 2156 struct kretprobe_instance *ri; 2157 struct kprobe_ctlblk *kcb; 2158 2159 /* The data must NOT be null. This means rethook data structure is broken. */ 2160 if (WARN_ON_ONCE(!data) || !rp->handler) 2161 return; 2162 2163 __this_cpu_write(current_kprobe, &rp->kp); 2164 kcb = get_kprobe_ctlblk(); 2165 kcb->kprobe_status = KPROBE_HIT_ACTIVE; 2166 2167 ri = container_of(rh, struct kretprobe_instance, node); 2168 rp->handler(ri, regs); 2169 2170 __this_cpu_write(current_kprobe, NULL); 2171 } 2172 NOKPROBE_SYMBOL(kretprobe_rethook_handler); 2173 2174 #endif /* !CONFIG_KRETPROBE_ON_RETHOOK */ 2175 2176 /** 2177 * kprobe_on_func_entry() -- check whether given address is function entry 2178 * @addr: Target address 2179 * @sym: Target symbol name 2180 * @offset: The offset from the symbol or the address 2181 * 2182 * This checks whether the given @addr+@offset or @sym+@offset is on the 2183 * function entry address or not. 2184 * This returns 0 if it is the function entry, or -EINVAL if it is not. 2185 * And also it returns -ENOENT if it fails the symbol or address lookup. 2186 * Caller must pass @addr or @sym (either one must be NULL), or this 2187 * returns -EINVAL. 2188 */ 2189 int kprobe_on_func_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset) 2190 { 2191 bool on_func_entry; 2192 kprobe_opcode_t *kp_addr = _kprobe_addr(addr, sym, offset, &on_func_entry); 2193 2194 if (IS_ERR(kp_addr)) 2195 return PTR_ERR(kp_addr); 2196 2197 if (!on_func_entry) 2198 return -EINVAL; 2199 2200 return 0; 2201 } 2202 2203 int register_kretprobe(struct kretprobe *rp) 2204 { 2205 int ret; 2206 int i; 2207 void *addr; 2208 2209 ret = kprobe_on_func_entry(rp->kp.addr, rp->kp.symbol_name, rp->kp.offset); 2210 if (ret) 2211 return ret; 2212 2213 /* If only 'rp->kp.addr' is specified, check reregistering kprobes */ 2214 if (rp->kp.addr && warn_kprobe_rereg(&rp->kp)) 2215 return -EINVAL; 2216 2217 if (kretprobe_blacklist_size) { 2218 addr = kprobe_addr(&rp->kp); 2219 if (IS_ERR(addr)) 2220 return PTR_ERR(addr); 2221 2222 for (i = 0; kretprobe_blacklist[i].name != NULL; i++) { 2223 if (kretprobe_blacklist[i].addr == addr) 2224 return -EINVAL; 2225 } 2226 } 2227 2228 if (rp->data_size > KRETPROBE_MAX_DATA_SIZE) 2229 return -E2BIG; 2230 2231 rp->kp.pre_handler = pre_handler_kretprobe; 2232 rp->kp.post_handler = NULL; 2233 2234 /* Pre-allocate memory for max kretprobe instances */ 2235 if (rp->maxactive <= 0) 2236 rp->maxactive = max_t(unsigned int, 10, 2*num_possible_cpus()); 2237 2238 #ifdef CONFIG_KRETPROBE_ON_RETHOOK 2239 rp->rh = rethook_alloc((void *)rp, kretprobe_rethook_handler, 2240 sizeof(struct kretprobe_instance) + 2241 rp->data_size, rp->maxactive); 2242 if (IS_ERR(rp->rh)) 2243 return PTR_ERR(rp->rh); 2244 2245 rp->nmissed = 0; 2246 /* Establish function entry probe point */ 2247 ret = register_kprobe(&rp->kp); 2248 if (ret != 0) { 2249 rethook_free(rp->rh); 2250 rp->rh = NULL; 2251 } 2252 #else /* !CONFIG_KRETPROBE_ON_RETHOOK */ 2253 rp->rph = kzalloc(sizeof(struct kretprobe_holder), GFP_KERNEL); 2254 if (!rp->rph) 2255 return -ENOMEM; 2256 2257 if (objpool_init(&rp->rph->pool, rp->maxactive, rp->data_size + 2258 sizeof(struct kretprobe_instance), GFP_KERNEL, 2259 rp->rph, kretprobe_init_inst, kretprobe_fini_pool)) { 2260 kfree(rp->rph); 2261 rp->rph = NULL; 2262 return -ENOMEM; 2263 } 2264 rcu_assign_pointer(rp->rph->rp, rp); 2265 rp->nmissed = 0; 2266 /* Establish function entry probe point */ 2267 ret = register_kprobe(&rp->kp); 2268 if (ret != 0) 2269 free_rp_inst(rp); 2270 #endif 2271 return ret; 2272 } 2273 EXPORT_SYMBOL_GPL(register_kretprobe); 2274 2275 int register_kretprobes(struct kretprobe **rps, int num) 2276 { 2277 int ret = 0, i; 2278 2279 if (num <= 0) 2280 return -EINVAL; 2281 for (i = 0; i < num; i++) { 2282 ret = register_kretprobe(rps[i]); 2283 if (ret < 0) { 2284 if (i > 0) 2285 unregister_kretprobes(rps, i); 2286 break; 2287 } 2288 } 2289 return ret; 2290 } 2291 EXPORT_SYMBOL_GPL(register_kretprobes); 2292 2293 void unregister_kretprobe(struct kretprobe *rp) 2294 { 2295 unregister_kretprobes(&rp, 1); 2296 } 2297 EXPORT_SYMBOL_GPL(unregister_kretprobe); 2298 2299 void unregister_kretprobes(struct kretprobe **rps, int num) 2300 { 2301 int i; 2302 2303 if (num <= 0) 2304 return; 2305 mutex_lock(&kprobe_mutex); 2306 for (i = 0; i < num; i++) { 2307 if (__unregister_kprobe_top(&rps[i]->kp) < 0) 2308 rps[i]->kp.addr = NULL; 2309 #ifdef CONFIG_KRETPROBE_ON_RETHOOK 2310 rethook_free(rps[i]->rh); 2311 #else 2312 rcu_assign_pointer(rps[i]->rph->rp, NULL); 2313 #endif 2314 } 2315 mutex_unlock(&kprobe_mutex); 2316 2317 synchronize_rcu(); 2318 for (i = 0; i < num; i++) { 2319 if (rps[i]->kp.addr) { 2320 __unregister_kprobe_bottom(&rps[i]->kp); 2321 #ifndef CONFIG_KRETPROBE_ON_RETHOOK 2322 free_rp_inst(rps[i]); 2323 #endif 2324 } 2325 } 2326 } 2327 EXPORT_SYMBOL_GPL(unregister_kretprobes); 2328 2329 #else /* CONFIG_KRETPROBES */ 2330 int register_kretprobe(struct kretprobe *rp) 2331 { 2332 return -EOPNOTSUPP; 2333 } 2334 EXPORT_SYMBOL_GPL(register_kretprobe); 2335 2336 int register_kretprobes(struct kretprobe **rps, int num) 2337 { 2338 return -EOPNOTSUPP; 2339 } 2340 EXPORT_SYMBOL_GPL(register_kretprobes); 2341 2342 void unregister_kretprobe(struct kretprobe *rp) 2343 { 2344 } 2345 EXPORT_SYMBOL_GPL(unregister_kretprobe); 2346 2347 void unregister_kretprobes(struct kretprobe **rps, int num) 2348 { 2349 } 2350 EXPORT_SYMBOL_GPL(unregister_kretprobes); 2351 2352 static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs) 2353 { 2354 return 0; 2355 } 2356 NOKPROBE_SYMBOL(pre_handler_kretprobe); 2357 2358 #endif /* CONFIG_KRETPROBES */ 2359 2360 /* Set the kprobe gone and remove its instruction buffer. */ 2361 static void kill_kprobe(struct kprobe *p) 2362 { 2363 struct kprobe *kp; 2364 2365 lockdep_assert_held(&kprobe_mutex); 2366 2367 /* 2368 * The module is going away. We should disarm the kprobe which 2369 * is using ftrace, because ftrace framework is still available at 2370 * 'MODULE_STATE_GOING' notification. 2371 */ 2372 if (kprobe_ftrace(p) && !kprobe_disabled(p) && !kprobes_all_disarmed) 2373 disarm_kprobe_ftrace(p); 2374 2375 p->flags |= KPROBE_FLAG_GONE; 2376 if (kprobe_aggrprobe(p)) { 2377 /* 2378 * If this is an aggr_kprobe, we have to list all the 2379 * chained probes and mark them GONE. 2380 */ 2381 list_for_each_entry(kp, &p->list, list) 2382 kp->flags |= KPROBE_FLAG_GONE; 2383 p->post_handler = NULL; 2384 kill_optimized_kprobe(p); 2385 } 2386 /* 2387 * Here, we can remove insn_slot safely, because no thread calls 2388 * the original probed function (which will be freed soon) any more. 2389 */ 2390 arch_remove_kprobe(p); 2391 } 2392 2393 /* Disable one kprobe */ 2394 int disable_kprobe(struct kprobe *kp) 2395 { 2396 int ret = 0; 2397 struct kprobe *p; 2398 2399 mutex_lock(&kprobe_mutex); 2400 2401 /* Disable this kprobe */ 2402 p = __disable_kprobe(kp); 2403 if (IS_ERR(p)) 2404 ret = PTR_ERR(p); 2405 2406 mutex_unlock(&kprobe_mutex); 2407 return ret; 2408 } 2409 EXPORT_SYMBOL_GPL(disable_kprobe); 2410 2411 /* Enable one kprobe */ 2412 int enable_kprobe(struct kprobe *kp) 2413 { 2414 int ret = 0; 2415 struct kprobe *p; 2416 2417 mutex_lock(&kprobe_mutex); 2418 2419 /* Check whether specified probe is valid. */ 2420 p = __get_valid_kprobe(kp); 2421 if (unlikely(p == NULL)) { 2422 ret = -EINVAL; 2423 goto out; 2424 } 2425 2426 if (kprobe_gone(kp)) { 2427 /* This kprobe has gone, we couldn't enable it. */ 2428 ret = -EINVAL; 2429 goto out; 2430 } 2431 2432 if (p != kp) 2433 kp->flags &= ~KPROBE_FLAG_DISABLED; 2434 2435 if (!kprobes_all_disarmed && kprobe_disabled(p)) { 2436 p->flags &= ~KPROBE_FLAG_DISABLED; 2437 ret = arm_kprobe(p); 2438 if (ret) { 2439 p->flags |= KPROBE_FLAG_DISABLED; 2440 if (p != kp) 2441 kp->flags |= KPROBE_FLAG_DISABLED; 2442 } 2443 } 2444 out: 2445 mutex_unlock(&kprobe_mutex); 2446 return ret; 2447 } 2448 EXPORT_SYMBOL_GPL(enable_kprobe); 2449 2450 /* Caller must NOT call this in usual path. This is only for critical case */ 2451 void dump_kprobe(struct kprobe *kp) 2452 { 2453 pr_err("Dump kprobe:\n.symbol_name = %s, .offset = %x, .addr = %pS\n", 2454 kp->symbol_name, kp->offset, kp->addr); 2455 } 2456 NOKPROBE_SYMBOL(dump_kprobe); 2457 2458 int kprobe_add_ksym_blacklist(unsigned long entry) 2459 { 2460 struct kprobe_blacklist_entry *ent; 2461 unsigned long offset = 0, size = 0; 2462 2463 if (!kernel_text_address(entry) || 2464 !kallsyms_lookup_size_offset(entry, &size, &offset)) 2465 return -EINVAL; 2466 2467 ent = kmalloc(sizeof(*ent), GFP_KERNEL); 2468 if (!ent) 2469 return -ENOMEM; 2470 ent->start_addr = entry; 2471 ent->end_addr = entry + size; 2472 INIT_LIST_HEAD(&ent->list); 2473 list_add_tail(&ent->list, &kprobe_blacklist); 2474 2475 return (int)size; 2476 } 2477 2478 /* Add all symbols in given area into kprobe blacklist */ 2479 int kprobe_add_area_blacklist(unsigned long start, unsigned long end) 2480 { 2481 unsigned long entry; 2482 int ret = 0; 2483 2484 for (entry = start; entry < end; entry += ret) { 2485 ret = kprobe_add_ksym_blacklist(entry); 2486 if (ret < 0) 2487 return ret; 2488 if (ret == 0) /* In case of alias symbol */ 2489 ret = 1; 2490 } 2491 return 0; 2492 } 2493 2494 int __weak arch_kprobe_get_kallsym(unsigned int *symnum, unsigned long *value, 2495 char *type, char *sym) 2496 { 2497 return -ERANGE; 2498 } 2499 2500 int kprobe_get_kallsym(unsigned int symnum, unsigned long *value, char *type, 2501 char *sym) 2502 { 2503 #ifdef __ARCH_WANT_KPROBES_INSN_SLOT 2504 if (!kprobe_cache_get_kallsym(&kprobe_insn_slots, &symnum, value, type, sym)) 2505 return 0; 2506 #ifdef CONFIG_OPTPROBES 2507 if (!kprobe_cache_get_kallsym(&kprobe_optinsn_slots, &symnum, value, type, sym)) 2508 return 0; 2509 #endif 2510 #endif 2511 if (!arch_kprobe_get_kallsym(&symnum, value, type, sym)) 2512 return 0; 2513 return -ERANGE; 2514 } 2515 2516 int __init __weak arch_populate_kprobe_blacklist(void) 2517 { 2518 return 0; 2519 } 2520 2521 /* 2522 * Lookup and populate the kprobe_blacklist. 2523 * 2524 * Unlike the kretprobe blacklist, we'll need to determine 2525 * the range of addresses that belong to the said functions, 2526 * since a kprobe need not necessarily be at the beginning 2527 * of a function. 2528 */ 2529 static int __init populate_kprobe_blacklist(unsigned long *start, 2530 unsigned long *end) 2531 { 2532 unsigned long entry; 2533 unsigned long *iter; 2534 int ret; 2535 2536 for (iter = start; iter < end; iter++) { 2537 entry = (unsigned long)dereference_symbol_descriptor((void *)*iter); 2538 ret = kprobe_add_ksym_blacklist(entry); 2539 if (ret == -EINVAL) 2540 continue; 2541 if (ret < 0) 2542 return ret; 2543 } 2544 2545 /* Symbols in '__kprobes_text' are blacklisted */ 2546 ret = kprobe_add_area_blacklist((unsigned long)__kprobes_text_start, 2547 (unsigned long)__kprobes_text_end); 2548 if (ret) 2549 return ret; 2550 2551 /* Symbols in 'noinstr' section are blacklisted */ 2552 ret = kprobe_add_area_blacklist((unsigned long)__noinstr_text_start, 2553 (unsigned long)__noinstr_text_end); 2554 2555 return ret ? : arch_populate_kprobe_blacklist(); 2556 } 2557 2558 #ifdef CONFIG_MODULES 2559 /* Remove all symbols in given area from kprobe blacklist */ 2560 static void kprobe_remove_area_blacklist(unsigned long start, unsigned long end) 2561 { 2562 struct kprobe_blacklist_entry *ent, *n; 2563 2564 list_for_each_entry_safe(ent, n, &kprobe_blacklist, list) { 2565 if (ent->start_addr < start || ent->start_addr >= end) 2566 continue; 2567 list_del(&ent->list); 2568 kfree(ent); 2569 } 2570 } 2571 2572 static void kprobe_remove_ksym_blacklist(unsigned long entry) 2573 { 2574 kprobe_remove_area_blacklist(entry, entry + 1); 2575 } 2576 2577 static void add_module_kprobe_blacklist(struct module *mod) 2578 { 2579 unsigned long start, end; 2580 int i; 2581 2582 if (mod->kprobe_blacklist) { 2583 for (i = 0; i < mod->num_kprobe_blacklist; i++) 2584 kprobe_add_ksym_blacklist(mod->kprobe_blacklist[i]); 2585 } 2586 2587 start = (unsigned long)mod->kprobes_text_start; 2588 if (start) { 2589 end = start + mod->kprobes_text_size; 2590 kprobe_add_area_blacklist(start, end); 2591 } 2592 2593 start = (unsigned long)mod->noinstr_text_start; 2594 if (start) { 2595 end = start + mod->noinstr_text_size; 2596 kprobe_add_area_blacklist(start, end); 2597 } 2598 } 2599 2600 static void remove_module_kprobe_blacklist(struct module *mod) 2601 { 2602 unsigned long start, end; 2603 int i; 2604 2605 if (mod->kprobe_blacklist) { 2606 for (i = 0; i < mod->num_kprobe_blacklist; i++) 2607 kprobe_remove_ksym_blacklist(mod->kprobe_blacklist[i]); 2608 } 2609 2610 start = (unsigned long)mod->kprobes_text_start; 2611 if (start) { 2612 end = start + mod->kprobes_text_size; 2613 kprobe_remove_area_blacklist(start, end); 2614 } 2615 2616 start = (unsigned long)mod->noinstr_text_start; 2617 if (start) { 2618 end = start + mod->noinstr_text_size; 2619 kprobe_remove_area_blacklist(start, end); 2620 } 2621 } 2622 2623 /* Module notifier call back, checking kprobes on the module */ 2624 static int kprobes_module_callback(struct notifier_block *nb, 2625 unsigned long val, void *data) 2626 { 2627 struct module *mod = data; 2628 struct hlist_head *head; 2629 struct kprobe *p; 2630 unsigned int i; 2631 int checkcore = (val == MODULE_STATE_GOING); 2632 2633 if (val == MODULE_STATE_COMING) { 2634 mutex_lock(&kprobe_mutex); 2635 add_module_kprobe_blacklist(mod); 2636 mutex_unlock(&kprobe_mutex); 2637 } 2638 if (val != MODULE_STATE_GOING && val != MODULE_STATE_LIVE) 2639 return NOTIFY_DONE; 2640 2641 /* 2642 * When 'MODULE_STATE_GOING' was notified, both of module '.text' and 2643 * '.init.text' sections would be freed. When 'MODULE_STATE_LIVE' was 2644 * notified, only '.init.text' section would be freed. We need to 2645 * disable kprobes which have been inserted in the sections. 2646 */ 2647 mutex_lock(&kprobe_mutex); 2648 for (i = 0; i < KPROBE_TABLE_SIZE; i++) { 2649 head = &kprobe_table[i]; 2650 hlist_for_each_entry(p, head, hlist) 2651 if (within_module_init((unsigned long)p->addr, mod) || 2652 (checkcore && 2653 within_module_core((unsigned long)p->addr, mod))) { 2654 /* 2655 * The vaddr this probe is installed will soon 2656 * be vfreed buy not synced to disk. Hence, 2657 * disarming the breakpoint isn't needed. 2658 * 2659 * Note, this will also move any optimized probes 2660 * that are pending to be removed from their 2661 * corresponding lists to the 'freeing_list' and 2662 * will not be touched by the delayed 2663 * kprobe_optimizer() work handler. 2664 */ 2665 kill_kprobe(p); 2666 } 2667 } 2668 if (val == MODULE_STATE_GOING) 2669 remove_module_kprobe_blacklist(mod); 2670 mutex_unlock(&kprobe_mutex); 2671 return NOTIFY_DONE; 2672 } 2673 2674 static struct notifier_block kprobe_module_nb = { 2675 .notifier_call = kprobes_module_callback, 2676 .priority = 0 2677 }; 2678 2679 static int kprobe_register_module_notifier(void) 2680 { 2681 return register_module_notifier(&kprobe_module_nb); 2682 } 2683 #else 2684 static int kprobe_register_module_notifier(void) 2685 { 2686 return 0; 2687 } 2688 #endif /* CONFIG_MODULES */ 2689 2690 void kprobe_free_init_mem(void) 2691 { 2692 void *start = (void *)(&__init_begin); 2693 void *end = (void *)(&__init_end); 2694 struct hlist_head *head; 2695 struct kprobe *p; 2696 int i; 2697 2698 mutex_lock(&kprobe_mutex); 2699 2700 /* Kill all kprobes on initmem because the target code has been freed. */ 2701 for (i = 0; i < KPROBE_TABLE_SIZE; i++) { 2702 head = &kprobe_table[i]; 2703 hlist_for_each_entry(p, head, hlist) { 2704 if (start <= (void *)p->addr && (void *)p->addr < end) 2705 kill_kprobe(p); 2706 } 2707 } 2708 2709 mutex_unlock(&kprobe_mutex); 2710 } 2711 2712 static int __init init_kprobes(void) 2713 { 2714 int i, err; 2715 2716 /* FIXME allocate the probe table, currently defined statically */ 2717 /* initialize all list heads */ 2718 for (i = 0; i < KPROBE_TABLE_SIZE; i++) 2719 INIT_HLIST_HEAD(&kprobe_table[i]); 2720 2721 err = populate_kprobe_blacklist(__start_kprobe_blacklist, 2722 __stop_kprobe_blacklist); 2723 if (err) 2724 pr_err("Failed to populate blacklist (error %d), kprobes not restricted, be careful using them!\n", err); 2725 2726 if (kretprobe_blacklist_size) { 2727 /* lookup the function address from its name */ 2728 for (i = 0; kretprobe_blacklist[i].name != NULL; i++) { 2729 kretprobe_blacklist[i].addr = 2730 kprobe_lookup_name(kretprobe_blacklist[i].name, 0); 2731 if (!kretprobe_blacklist[i].addr) 2732 pr_err("Failed to lookup symbol '%s' for kretprobe blacklist. Maybe the target function is removed or renamed.\n", 2733 kretprobe_blacklist[i].name); 2734 } 2735 } 2736 2737 /* By default, kprobes are armed */ 2738 kprobes_all_disarmed = false; 2739 2740 #if defined(CONFIG_OPTPROBES) && defined(__ARCH_WANT_KPROBES_INSN_SLOT) 2741 /* Init 'kprobe_optinsn_slots' for allocation */ 2742 kprobe_optinsn_slots.insn_size = MAX_OPTINSN_SIZE; 2743 #endif 2744 2745 err = arch_init_kprobes(); 2746 if (!err) 2747 err = register_die_notifier(&kprobe_exceptions_nb); 2748 if (!err) 2749 err = kprobe_register_module_notifier(); 2750 2751 kprobes_initialized = (err == 0); 2752 kprobe_sysctls_init(); 2753 return err; 2754 } 2755 early_initcall(init_kprobes); 2756 2757 #if defined(CONFIG_OPTPROBES) 2758 static int __init init_optprobes(void) 2759 { 2760 /* 2761 * Enable kprobe optimization - this kicks the optimizer which 2762 * depends on synchronize_rcu_tasks() and ksoftirqd, that is 2763 * not spawned in early initcall. So delay the optimization. 2764 */ 2765 optimize_all_kprobes(); 2766 2767 return 0; 2768 } 2769 subsys_initcall(init_optprobes); 2770 #endif 2771 2772 #ifdef CONFIG_DEBUG_FS 2773 static void report_probe(struct seq_file *pi, struct kprobe *p, 2774 const char *sym, int offset, char *modname, struct kprobe *pp) 2775 { 2776 char *kprobe_type; 2777 void *addr = p->addr; 2778 2779 if (p->pre_handler == pre_handler_kretprobe) 2780 kprobe_type = "r"; 2781 else 2782 kprobe_type = "k"; 2783 2784 if (!kallsyms_show_value(pi->file->f_cred)) 2785 addr = NULL; 2786 2787 if (sym) 2788 seq_printf(pi, "%px %s %s+0x%x %s ", 2789 addr, kprobe_type, sym, offset, 2790 (modname ? modname : " ")); 2791 else /* try to use %pS */ 2792 seq_printf(pi, "%px %s %pS ", 2793 addr, kprobe_type, p->addr); 2794 2795 if (!pp) 2796 pp = p; 2797 seq_printf(pi, "%s%s%s%s\n", 2798 (kprobe_gone(p) ? "[GONE]" : ""), 2799 ((kprobe_disabled(p) && !kprobe_gone(p)) ? "[DISABLED]" : ""), 2800 (kprobe_optimized(pp) ? "[OPTIMIZED]" : ""), 2801 (kprobe_ftrace(pp) ? "[FTRACE]" : "")); 2802 } 2803 2804 static void *kprobe_seq_start(struct seq_file *f, loff_t *pos) 2805 { 2806 return (*pos < KPROBE_TABLE_SIZE) ? pos : NULL; 2807 } 2808 2809 static void *kprobe_seq_next(struct seq_file *f, void *v, loff_t *pos) 2810 { 2811 (*pos)++; 2812 if (*pos >= KPROBE_TABLE_SIZE) 2813 return NULL; 2814 return pos; 2815 } 2816 2817 static void kprobe_seq_stop(struct seq_file *f, void *v) 2818 { 2819 /* Nothing to do */ 2820 } 2821 2822 static int show_kprobe_addr(struct seq_file *pi, void *v) 2823 { 2824 struct hlist_head *head; 2825 struct kprobe *p, *kp; 2826 const char *sym; 2827 unsigned int i = *(loff_t *) v; 2828 unsigned long offset = 0; 2829 char *modname, namebuf[KSYM_NAME_LEN]; 2830 2831 head = &kprobe_table[i]; 2832 preempt_disable(); 2833 hlist_for_each_entry_rcu(p, head, hlist) { 2834 sym = kallsyms_lookup((unsigned long)p->addr, NULL, 2835 &offset, &modname, namebuf); 2836 if (kprobe_aggrprobe(p)) { 2837 list_for_each_entry_rcu(kp, &p->list, list) 2838 report_probe(pi, kp, sym, offset, modname, p); 2839 } else 2840 report_probe(pi, p, sym, offset, modname, NULL); 2841 } 2842 preempt_enable(); 2843 return 0; 2844 } 2845 2846 static const struct seq_operations kprobes_sops = { 2847 .start = kprobe_seq_start, 2848 .next = kprobe_seq_next, 2849 .stop = kprobe_seq_stop, 2850 .show = show_kprobe_addr 2851 }; 2852 2853 DEFINE_SEQ_ATTRIBUTE(kprobes); 2854 2855 /* kprobes/blacklist -- shows which functions can not be probed */ 2856 static void *kprobe_blacklist_seq_start(struct seq_file *m, loff_t *pos) 2857 { 2858 mutex_lock(&kprobe_mutex); 2859 return seq_list_start(&kprobe_blacklist, *pos); 2860 } 2861 2862 static void *kprobe_blacklist_seq_next(struct seq_file *m, void *v, loff_t *pos) 2863 { 2864 return seq_list_next(v, &kprobe_blacklist, pos); 2865 } 2866 2867 static int kprobe_blacklist_seq_show(struct seq_file *m, void *v) 2868 { 2869 struct kprobe_blacklist_entry *ent = 2870 list_entry(v, struct kprobe_blacklist_entry, list); 2871 2872 /* 2873 * If '/proc/kallsyms' is not showing kernel address, we won't 2874 * show them here either. 2875 */ 2876 if (!kallsyms_show_value(m->file->f_cred)) 2877 seq_printf(m, "0x%px-0x%px\t%ps\n", NULL, NULL, 2878 (void *)ent->start_addr); 2879 else 2880 seq_printf(m, "0x%px-0x%px\t%ps\n", (void *)ent->start_addr, 2881 (void *)ent->end_addr, (void *)ent->start_addr); 2882 return 0; 2883 } 2884 2885 static void kprobe_blacklist_seq_stop(struct seq_file *f, void *v) 2886 { 2887 mutex_unlock(&kprobe_mutex); 2888 } 2889 2890 static const struct seq_operations kprobe_blacklist_sops = { 2891 .start = kprobe_blacklist_seq_start, 2892 .next = kprobe_blacklist_seq_next, 2893 .stop = kprobe_blacklist_seq_stop, 2894 .show = kprobe_blacklist_seq_show, 2895 }; 2896 DEFINE_SEQ_ATTRIBUTE(kprobe_blacklist); 2897 2898 static int arm_all_kprobes(void) 2899 { 2900 struct hlist_head *head; 2901 struct kprobe *p; 2902 unsigned int i, total = 0, errors = 0; 2903 int err, ret = 0; 2904 2905 mutex_lock(&kprobe_mutex); 2906 2907 /* If kprobes are armed, just return */ 2908 if (!kprobes_all_disarmed) 2909 goto already_enabled; 2910 2911 /* 2912 * optimize_kprobe() called by arm_kprobe() checks 2913 * kprobes_all_disarmed, so set kprobes_all_disarmed before 2914 * arm_kprobe. 2915 */ 2916 kprobes_all_disarmed = false; 2917 /* Arming kprobes doesn't optimize kprobe itself */ 2918 for (i = 0; i < KPROBE_TABLE_SIZE; i++) { 2919 head = &kprobe_table[i]; 2920 /* Arm all kprobes on a best-effort basis */ 2921 hlist_for_each_entry(p, head, hlist) { 2922 if (!kprobe_disabled(p)) { 2923 err = arm_kprobe(p); 2924 if (err) { 2925 errors++; 2926 ret = err; 2927 } 2928 total++; 2929 } 2930 } 2931 } 2932 2933 if (errors) 2934 pr_warn("Kprobes globally enabled, but failed to enable %d out of %d probes. Please check which kprobes are kept disabled via debugfs.\n", 2935 errors, total); 2936 else 2937 pr_info("Kprobes globally enabled\n"); 2938 2939 already_enabled: 2940 mutex_unlock(&kprobe_mutex); 2941 return ret; 2942 } 2943 2944 static int disarm_all_kprobes(void) 2945 { 2946 struct hlist_head *head; 2947 struct kprobe *p; 2948 unsigned int i, total = 0, errors = 0; 2949 int err, ret = 0; 2950 2951 mutex_lock(&kprobe_mutex); 2952 2953 /* If kprobes are already disarmed, just return */ 2954 if (kprobes_all_disarmed) { 2955 mutex_unlock(&kprobe_mutex); 2956 return 0; 2957 } 2958 2959 kprobes_all_disarmed = true; 2960 2961 for (i = 0; i < KPROBE_TABLE_SIZE; i++) { 2962 head = &kprobe_table[i]; 2963 /* Disarm all kprobes on a best-effort basis */ 2964 hlist_for_each_entry(p, head, hlist) { 2965 if (!arch_trampoline_kprobe(p) && !kprobe_disabled(p)) { 2966 err = disarm_kprobe(p, false); 2967 if (err) { 2968 errors++; 2969 ret = err; 2970 } 2971 total++; 2972 } 2973 } 2974 } 2975 2976 if (errors) 2977 pr_warn("Kprobes globally disabled, but failed to disable %d out of %d probes. Please check which kprobes are kept enabled via debugfs.\n", 2978 errors, total); 2979 else 2980 pr_info("Kprobes globally disabled\n"); 2981 2982 mutex_unlock(&kprobe_mutex); 2983 2984 /* Wait for disarming all kprobes by optimizer */ 2985 wait_for_kprobe_optimizer(); 2986 2987 return ret; 2988 } 2989 2990 /* 2991 * XXX: The debugfs bool file interface doesn't allow for callbacks 2992 * when the bool state is switched. We can reuse that facility when 2993 * available 2994 */ 2995 static ssize_t read_enabled_file_bool(struct file *file, 2996 char __user *user_buf, size_t count, loff_t *ppos) 2997 { 2998 char buf[3]; 2999 3000 if (!kprobes_all_disarmed) 3001 buf[0] = '1'; 3002 else 3003 buf[0] = '0'; 3004 buf[1] = '\n'; 3005 buf[2] = 0x00; 3006 return simple_read_from_buffer(user_buf, count, ppos, buf, 2); 3007 } 3008 3009 static ssize_t write_enabled_file_bool(struct file *file, 3010 const char __user *user_buf, size_t count, loff_t *ppos) 3011 { 3012 bool enable; 3013 int ret; 3014 3015 ret = kstrtobool_from_user(user_buf, count, &enable); 3016 if (ret) 3017 return ret; 3018 3019 ret = enable ? arm_all_kprobes() : disarm_all_kprobes(); 3020 if (ret) 3021 return ret; 3022 3023 return count; 3024 } 3025 3026 static const struct file_operations fops_kp = { 3027 .read = read_enabled_file_bool, 3028 .write = write_enabled_file_bool, 3029 .llseek = default_llseek, 3030 }; 3031 3032 static int __init debugfs_kprobe_init(void) 3033 { 3034 struct dentry *dir; 3035 3036 dir = debugfs_create_dir("kprobes", NULL); 3037 3038 debugfs_create_file("list", 0400, dir, NULL, &kprobes_fops); 3039 3040 debugfs_create_file("enabled", 0600, dir, NULL, &fops_kp); 3041 3042 debugfs_create_file("blacklist", 0400, dir, NULL, 3043 &kprobe_blacklist_fops); 3044 3045 return 0; 3046 } 3047 3048 late_initcall(debugfs_kprobe_init); 3049 #endif /* CONFIG_DEBUG_FS */ 3050