1 /* 2 * Copyright (C) 2008 Mathieu Desnoyers 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write to the Free Software 16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 17 */ 18 #include <linux/module.h> 19 #include <linux/mutex.h> 20 #include <linux/types.h> 21 #include <linux/jhash.h> 22 #include <linux/list.h> 23 #include <linux/rcupdate.h> 24 #include <linux/tracepoint.h> 25 #include <linux/err.h> 26 #include <linux/slab.h> 27 #include <linux/sched.h> 28 #include <linux/jump_label.h> 29 30 extern struct tracepoint * const __start___tracepoints_ptrs[]; 31 extern struct tracepoint * const __stop___tracepoints_ptrs[]; 32 33 /* Set to 1 to enable tracepoint debug output */ 34 static const int tracepoint_debug; 35 36 /* 37 * tracepoints_mutex nests inside module_mutex. Tracepoints mutex protects the 38 * builtin and module tracepoints and the hash table. 39 */ 40 static DEFINE_MUTEX(tracepoints_mutex); 41 42 /* 43 * Tracepoint hash table, containing the active tracepoints. 44 * Protected by tracepoints_mutex. 45 */ 46 #define TRACEPOINT_HASH_BITS 6 47 #define TRACEPOINT_TABLE_SIZE (1 << TRACEPOINT_HASH_BITS) 48 static struct hlist_head tracepoint_table[TRACEPOINT_TABLE_SIZE]; 49 50 /* 51 * Note about RCU : 52 * It is used to delay the free of multiple probes array until a quiescent 53 * state is reached. 54 * Tracepoint entries modifications are protected by the tracepoints_mutex. 55 */ 56 struct tracepoint_entry { 57 struct hlist_node hlist; 58 struct tracepoint_func *funcs; 59 int refcount; /* Number of times armed. 0 if disarmed. */ 60 char name[0]; 61 }; 62 63 struct tp_probes { 64 union { 65 struct rcu_head rcu; 66 struct list_head list; 67 } u; 68 struct tracepoint_func probes[0]; 69 }; 70 71 static inline void *allocate_probes(int count) 72 { 73 struct tp_probes *p = kmalloc(count * sizeof(struct tracepoint_func) 74 + sizeof(struct tp_probes), GFP_KERNEL); 75 return p == NULL ? NULL : p->probes; 76 } 77 78 static void rcu_free_old_probes(struct rcu_head *head) 79 { 80 kfree(container_of(head, struct tp_probes, u.rcu)); 81 } 82 83 static inline void release_probes(struct tracepoint_func *old) 84 { 85 if (old) { 86 struct tp_probes *tp_probes = container_of(old, 87 struct tp_probes, probes[0]); 88 call_rcu_sched(&tp_probes->u.rcu, rcu_free_old_probes); 89 } 90 } 91 92 static void debug_print_probes(struct tracepoint_entry *entry) 93 { 94 int i; 95 96 if (!tracepoint_debug || !entry->funcs) 97 return; 98 99 for (i = 0; entry->funcs[i].func; i++) 100 printk(KERN_DEBUG "Probe %d : %p\n", i, entry->funcs[i].func); 101 } 102 103 static struct tracepoint_func * 104 tracepoint_entry_add_probe(struct tracepoint_entry *entry, 105 void *probe, void *data) 106 { 107 int nr_probes = 0; 108 struct tracepoint_func *old, *new; 109 110 WARN_ON(!probe); 111 112 debug_print_probes(entry); 113 old = entry->funcs; 114 if (old) { 115 /* (N -> N+1), (N != 0, 1) probes */ 116 for (nr_probes = 0; old[nr_probes].func; nr_probes++) 117 if (old[nr_probes].func == probe && 118 old[nr_probes].data == data) 119 return ERR_PTR(-EEXIST); 120 } 121 /* + 2 : one for new probe, one for NULL func */ 122 new = allocate_probes(nr_probes + 2); 123 if (new == NULL) 124 return ERR_PTR(-ENOMEM); 125 if (old) 126 memcpy(new, old, nr_probes * sizeof(struct tracepoint_func)); 127 new[nr_probes].func = probe; 128 new[nr_probes].data = data; 129 new[nr_probes + 1].func = NULL; 130 entry->refcount = nr_probes + 1; 131 entry->funcs = new; 132 debug_print_probes(entry); 133 return old; 134 } 135 136 static void * 137 tracepoint_entry_remove_probe(struct tracepoint_entry *entry, 138 void *probe, void *data) 139 { 140 int nr_probes = 0, nr_del = 0, i; 141 struct tracepoint_func *old, *new; 142 143 old = entry->funcs; 144 145 if (!old) 146 return ERR_PTR(-ENOENT); 147 148 debug_print_probes(entry); 149 /* (N -> M), (N > 1, M >= 0) probes */ 150 for (nr_probes = 0; old[nr_probes].func; nr_probes++) { 151 if (!probe || 152 (old[nr_probes].func == probe && 153 old[nr_probes].data == data)) 154 nr_del++; 155 } 156 157 if (nr_probes - nr_del == 0) { 158 /* N -> 0, (N > 1) */ 159 entry->funcs = NULL; 160 entry->refcount = 0; 161 debug_print_probes(entry); 162 return old; 163 } else { 164 int j = 0; 165 /* N -> M, (N > 1, M > 0) */ 166 /* + 1 for NULL */ 167 new = allocate_probes(nr_probes - nr_del + 1); 168 if (new == NULL) 169 return ERR_PTR(-ENOMEM); 170 for (i = 0; old[i].func; i++) 171 if (probe && 172 (old[i].func != probe || old[i].data != data)) 173 new[j++] = old[i]; 174 new[nr_probes - nr_del].func = NULL; 175 entry->refcount = nr_probes - nr_del; 176 entry->funcs = new; 177 } 178 debug_print_probes(entry); 179 return old; 180 } 181 182 /* 183 * Get tracepoint if the tracepoint is present in the tracepoint hash table. 184 * Must be called with tracepoints_mutex held. 185 * Returns NULL if not present. 186 */ 187 static struct tracepoint_entry *get_tracepoint(const char *name) 188 { 189 struct hlist_head *head; 190 struct hlist_node *node; 191 struct tracepoint_entry *e; 192 u32 hash = jhash(name, strlen(name), 0); 193 194 head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)]; 195 hlist_for_each_entry(e, node, head, hlist) { 196 if (!strcmp(name, e->name)) 197 return e; 198 } 199 return NULL; 200 } 201 202 /* 203 * Add the tracepoint to the tracepoint hash table. Must be called with 204 * tracepoints_mutex held. 205 */ 206 static struct tracepoint_entry *add_tracepoint(const char *name) 207 { 208 struct hlist_head *head; 209 struct hlist_node *node; 210 struct tracepoint_entry *e; 211 size_t name_len = strlen(name) + 1; 212 u32 hash = jhash(name, name_len-1, 0); 213 214 head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)]; 215 hlist_for_each_entry(e, node, head, hlist) { 216 if (!strcmp(name, e->name)) { 217 printk(KERN_NOTICE 218 "tracepoint %s busy\n", name); 219 return ERR_PTR(-EEXIST); /* Already there */ 220 } 221 } 222 /* 223 * Using kmalloc here to allocate a variable length element. Could 224 * cause some memory fragmentation if overused. 225 */ 226 e = kmalloc(sizeof(struct tracepoint_entry) + name_len, GFP_KERNEL); 227 if (!e) 228 return ERR_PTR(-ENOMEM); 229 memcpy(&e->name[0], name, name_len); 230 e->funcs = NULL; 231 e->refcount = 0; 232 hlist_add_head(&e->hlist, head); 233 return e; 234 } 235 236 /* 237 * Remove the tracepoint from the tracepoint hash table. Must be called with 238 * mutex_lock held. 239 */ 240 static inline void remove_tracepoint(struct tracepoint_entry *e) 241 { 242 hlist_del(&e->hlist); 243 kfree(e); 244 } 245 246 /* 247 * Sets the probe callback corresponding to one tracepoint. 248 */ 249 static void set_tracepoint(struct tracepoint_entry **entry, 250 struct tracepoint *elem, int active) 251 { 252 WARN_ON(strcmp((*entry)->name, elem->name) != 0); 253 254 if (elem->regfunc && !jump_label_enabled(&elem->key) && active) 255 elem->regfunc(); 256 else if (elem->unregfunc && jump_label_enabled(&elem->key) && !active) 257 elem->unregfunc(); 258 259 /* 260 * rcu_assign_pointer has a smp_wmb() which makes sure that the new 261 * probe callbacks array is consistent before setting a pointer to it. 262 * This array is referenced by __DO_TRACE from 263 * include/linux/tracepoints.h. A matching smp_read_barrier_depends() 264 * is used. 265 */ 266 rcu_assign_pointer(elem->funcs, (*entry)->funcs); 267 if (active && !jump_label_enabled(&elem->key)) 268 jump_label_inc(&elem->key); 269 else if (!active && jump_label_enabled(&elem->key)) 270 jump_label_dec(&elem->key); 271 } 272 273 /* 274 * Disable a tracepoint and its probe callback. 275 * Note: only waiting an RCU period after setting elem->call to the empty 276 * function insures that the original callback is not used anymore. This insured 277 * by preempt_disable around the call site. 278 */ 279 static void disable_tracepoint(struct tracepoint *elem) 280 { 281 if (elem->unregfunc && jump_label_enabled(&elem->key)) 282 elem->unregfunc(); 283 284 if (jump_label_enabled(&elem->key)) 285 jump_label_dec(&elem->key); 286 rcu_assign_pointer(elem->funcs, NULL); 287 } 288 289 /** 290 * tracepoint_update_probe_range - Update a probe range 291 * @begin: beginning of the range 292 * @end: end of the range 293 * 294 * Updates the probe callback corresponding to a range of tracepoints. 295 */ 296 void tracepoint_update_probe_range(struct tracepoint * const *begin, 297 struct tracepoint * const *end) 298 { 299 struct tracepoint * const *iter; 300 struct tracepoint_entry *mark_entry; 301 302 if (!begin) 303 return; 304 305 mutex_lock(&tracepoints_mutex); 306 for (iter = begin; iter < end; iter++) { 307 mark_entry = get_tracepoint((*iter)->name); 308 if (mark_entry) { 309 set_tracepoint(&mark_entry, *iter, 310 !!mark_entry->refcount); 311 } else { 312 disable_tracepoint(*iter); 313 } 314 } 315 mutex_unlock(&tracepoints_mutex); 316 } 317 318 /* 319 * Update probes, removing the faulty probes. 320 */ 321 static void tracepoint_update_probes(void) 322 { 323 /* Core kernel tracepoints */ 324 tracepoint_update_probe_range(__start___tracepoints_ptrs, 325 __stop___tracepoints_ptrs); 326 /* tracepoints in modules. */ 327 module_update_tracepoints(); 328 } 329 330 static struct tracepoint_func * 331 tracepoint_add_probe(const char *name, void *probe, void *data) 332 { 333 struct tracepoint_entry *entry; 334 struct tracepoint_func *old; 335 336 entry = get_tracepoint(name); 337 if (!entry) { 338 entry = add_tracepoint(name); 339 if (IS_ERR(entry)) 340 return (struct tracepoint_func *)entry; 341 } 342 old = tracepoint_entry_add_probe(entry, probe, data); 343 if (IS_ERR(old) && !entry->refcount) 344 remove_tracepoint(entry); 345 return old; 346 } 347 348 /** 349 * tracepoint_probe_register - Connect a probe to a tracepoint 350 * @name: tracepoint name 351 * @probe: probe handler 352 * 353 * Returns 0 if ok, error value on error. 354 * The probe address must at least be aligned on the architecture pointer size. 355 */ 356 int tracepoint_probe_register(const char *name, void *probe, void *data) 357 { 358 struct tracepoint_func *old; 359 360 mutex_lock(&tracepoints_mutex); 361 old = tracepoint_add_probe(name, probe, data); 362 mutex_unlock(&tracepoints_mutex); 363 if (IS_ERR(old)) 364 return PTR_ERR(old); 365 366 tracepoint_update_probes(); /* may update entry */ 367 release_probes(old); 368 return 0; 369 } 370 EXPORT_SYMBOL_GPL(tracepoint_probe_register); 371 372 static struct tracepoint_func * 373 tracepoint_remove_probe(const char *name, void *probe, void *data) 374 { 375 struct tracepoint_entry *entry; 376 struct tracepoint_func *old; 377 378 entry = get_tracepoint(name); 379 if (!entry) 380 return ERR_PTR(-ENOENT); 381 old = tracepoint_entry_remove_probe(entry, probe, data); 382 if (IS_ERR(old)) 383 return old; 384 if (!entry->refcount) 385 remove_tracepoint(entry); 386 return old; 387 } 388 389 /** 390 * tracepoint_probe_unregister - Disconnect a probe from a tracepoint 391 * @name: tracepoint name 392 * @probe: probe function pointer 393 * 394 * We do not need to call a synchronize_sched to make sure the probes have 395 * finished running before doing a module unload, because the module unload 396 * itself uses stop_machine(), which insures that every preempt disabled section 397 * have finished. 398 */ 399 int tracepoint_probe_unregister(const char *name, void *probe, void *data) 400 { 401 struct tracepoint_func *old; 402 403 mutex_lock(&tracepoints_mutex); 404 old = tracepoint_remove_probe(name, probe, data); 405 mutex_unlock(&tracepoints_mutex); 406 if (IS_ERR(old)) 407 return PTR_ERR(old); 408 409 tracepoint_update_probes(); /* may update entry */ 410 release_probes(old); 411 return 0; 412 } 413 EXPORT_SYMBOL_GPL(tracepoint_probe_unregister); 414 415 static LIST_HEAD(old_probes); 416 static int need_update; 417 418 static void tracepoint_add_old_probes(void *old) 419 { 420 need_update = 1; 421 if (old) { 422 struct tp_probes *tp_probes = container_of(old, 423 struct tp_probes, probes[0]); 424 list_add(&tp_probes->u.list, &old_probes); 425 } 426 } 427 428 /** 429 * tracepoint_probe_register_noupdate - register a probe but not connect 430 * @name: tracepoint name 431 * @probe: probe handler 432 * 433 * caller must call tracepoint_probe_update_all() 434 */ 435 int tracepoint_probe_register_noupdate(const char *name, void *probe, 436 void *data) 437 { 438 struct tracepoint_func *old; 439 440 mutex_lock(&tracepoints_mutex); 441 old = tracepoint_add_probe(name, probe, data); 442 if (IS_ERR(old)) { 443 mutex_unlock(&tracepoints_mutex); 444 return PTR_ERR(old); 445 } 446 tracepoint_add_old_probes(old); 447 mutex_unlock(&tracepoints_mutex); 448 return 0; 449 } 450 EXPORT_SYMBOL_GPL(tracepoint_probe_register_noupdate); 451 452 /** 453 * tracepoint_probe_unregister_noupdate - remove a probe but not disconnect 454 * @name: tracepoint name 455 * @probe: probe function pointer 456 * 457 * caller must call tracepoint_probe_update_all() 458 */ 459 int tracepoint_probe_unregister_noupdate(const char *name, void *probe, 460 void *data) 461 { 462 struct tracepoint_func *old; 463 464 mutex_lock(&tracepoints_mutex); 465 old = tracepoint_remove_probe(name, probe, data); 466 if (IS_ERR(old)) { 467 mutex_unlock(&tracepoints_mutex); 468 return PTR_ERR(old); 469 } 470 tracepoint_add_old_probes(old); 471 mutex_unlock(&tracepoints_mutex); 472 return 0; 473 } 474 EXPORT_SYMBOL_GPL(tracepoint_probe_unregister_noupdate); 475 476 /** 477 * tracepoint_probe_update_all - update tracepoints 478 */ 479 void tracepoint_probe_update_all(void) 480 { 481 LIST_HEAD(release_probes); 482 struct tp_probes *pos, *next; 483 484 mutex_lock(&tracepoints_mutex); 485 if (!need_update) { 486 mutex_unlock(&tracepoints_mutex); 487 return; 488 } 489 if (!list_empty(&old_probes)) 490 list_replace_init(&old_probes, &release_probes); 491 need_update = 0; 492 mutex_unlock(&tracepoints_mutex); 493 494 tracepoint_update_probes(); 495 list_for_each_entry_safe(pos, next, &release_probes, u.list) { 496 list_del(&pos->u.list); 497 call_rcu_sched(&pos->u.rcu, rcu_free_old_probes); 498 } 499 } 500 EXPORT_SYMBOL_GPL(tracepoint_probe_update_all); 501 502 /** 503 * tracepoint_get_iter_range - Get a next tracepoint iterator given a range. 504 * @tracepoint: current tracepoints (in), next tracepoint (out) 505 * @begin: beginning of the range 506 * @end: end of the range 507 * 508 * Returns whether a next tracepoint has been found (1) or not (0). 509 * Will return the first tracepoint in the range if the input tracepoint is 510 * NULL. 511 */ 512 int tracepoint_get_iter_range(struct tracepoint * const **tracepoint, 513 struct tracepoint * const *begin, struct tracepoint * const *end) 514 { 515 if (!*tracepoint && begin != end) { 516 *tracepoint = begin; 517 return 1; 518 } 519 if (*tracepoint >= begin && *tracepoint < end) 520 return 1; 521 return 0; 522 } 523 EXPORT_SYMBOL_GPL(tracepoint_get_iter_range); 524 525 static void tracepoint_get_iter(struct tracepoint_iter *iter) 526 { 527 int found = 0; 528 529 /* Core kernel tracepoints */ 530 if (!iter->module) { 531 found = tracepoint_get_iter_range(&iter->tracepoint, 532 __start___tracepoints_ptrs, 533 __stop___tracepoints_ptrs); 534 if (found) 535 goto end; 536 } 537 /* tracepoints in modules. */ 538 found = module_get_iter_tracepoints(iter); 539 end: 540 if (!found) 541 tracepoint_iter_reset(iter); 542 } 543 544 void tracepoint_iter_start(struct tracepoint_iter *iter) 545 { 546 tracepoint_get_iter(iter); 547 } 548 EXPORT_SYMBOL_GPL(tracepoint_iter_start); 549 550 void tracepoint_iter_next(struct tracepoint_iter *iter) 551 { 552 iter->tracepoint++; 553 /* 554 * iter->tracepoint may be invalid because we blindly incremented it. 555 * Make sure it is valid by marshalling on the tracepoints, getting the 556 * tracepoints from following modules if necessary. 557 */ 558 tracepoint_get_iter(iter); 559 } 560 EXPORT_SYMBOL_GPL(tracepoint_iter_next); 561 562 void tracepoint_iter_stop(struct tracepoint_iter *iter) 563 { 564 } 565 EXPORT_SYMBOL_GPL(tracepoint_iter_stop); 566 567 void tracepoint_iter_reset(struct tracepoint_iter *iter) 568 { 569 iter->module = NULL; 570 iter->tracepoint = NULL; 571 } 572 EXPORT_SYMBOL_GPL(tracepoint_iter_reset); 573 574 #ifdef CONFIG_MODULES 575 576 int tracepoint_module_notify(struct notifier_block *self, 577 unsigned long val, void *data) 578 { 579 struct module *mod = data; 580 581 switch (val) { 582 case MODULE_STATE_COMING: 583 case MODULE_STATE_GOING: 584 tracepoint_update_probe_range(mod->tracepoints_ptrs, 585 mod->tracepoints_ptrs + mod->num_tracepoints); 586 break; 587 } 588 return 0; 589 } 590 591 struct notifier_block tracepoint_module_nb = { 592 .notifier_call = tracepoint_module_notify, 593 .priority = 0, 594 }; 595 596 static int init_tracepoints(void) 597 { 598 return register_module_notifier(&tracepoint_module_nb); 599 } 600 __initcall(init_tracepoints); 601 602 #endif /* CONFIG_MODULES */ 603 604 #ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS 605 606 /* NB: reg/unreg are called while guarded with the tracepoints_mutex */ 607 static int sys_tracepoint_refcount; 608 609 void syscall_regfunc(void) 610 { 611 unsigned long flags; 612 struct task_struct *g, *t; 613 614 if (!sys_tracepoint_refcount) { 615 read_lock_irqsave(&tasklist_lock, flags); 616 do_each_thread(g, t) { 617 /* Skip kernel threads. */ 618 if (t->mm) 619 set_tsk_thread_flag(t, TIF_SYSCALL_TRACEPOINT); 620 } while_each_thread(g, t); 621 read_unlock_irqrestore(&tasklist_lock, flags); 622 } 623 sys_tracepoint_refcount++; 624 } 625 626 void syscall_unregfunc(void) 627 { 628 unsigned long flags; 629 struct task_struct *g, *t; 630 631 sys_tracepoint_refcount--; 632 if (!sys_tracepoint_refcount) { 633 read_lock_irqsave(&tasklist_lock, flags); 634 do_each_thread(g, t) { 635 clear_tsk_thread_flag(t, TIF_SYSCALL_TRACEPOINT); 636 } while_each_thread(g, t); 637 read_unlock_irqrestore(&tasklist_lock, flags); 638 } 639 } 640 #endif 641