1 /* 2 * Copyright (C) 2008 Mathieu Desnoyers 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write to the Free Software 16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 17 */ 18 #include <linux/module.h> 19 #include <linux/mutex.h> 20 #include <linux/types.h> 21 #include <linux/jhash.h> 22 #include <linux/list.h> 23 #include <linux/rcupdate.h> 24 #include <linux/tracepoint.h> 25 #include <linux/err.h> 26 #include <linux/slab.h> 27 #include <linux/sched.h> 28 29 extern struct tracepoint __start___tracepoints[]; 30 extern struct tracepoint __stop___tracepoints[]; 31 32 /* Set to 1 to enable tracepoint debug output */ 33 static const int tracepoint_debug; 34 35 /* 36 * tracepoints_mutex nests inside module_mutex. Tracepoints mutex protects the 37 * builtin and module tracepoints and the hash table. 38 */ 39 static DEFINE_MUTEX(tracepoints_mutex); 40 41 /* 42 * Tracepoint hash table, containing the active tracepoints. 43 * Protected by tracepoints_mutex. 44 */ 45 #define TRACEPOINT_HASH_BITS 6 46 #define TRACEPOINT_TABLE_SIZE (1 << TRACEPOINT_HASH_BITS) 47 static struct hlist_head tracepoint_table[TRACEPOINT_TABLE_SIZE]; 48 49 /* 50 * Note about RCU : 51 * It is used to delay the free of multiple probes array until a quiescent 52 * state is reached. 53 * Tracepoint entries modifications are protected by the tracepoints_mutex. 54 */ 55 struct tracepoint_entry { 56 struct hlist_node hlist; 57 struct tracepoint_func *funcs; 58 int refcount; /* Number of times armed. 0 if disarmed. */ 59 char name[0]; 60 }; 61 62 struct tp_probes { 63 union { 64 struct rcu_head rcu; 65 struct list_head list; 66 } u; 67 struct tracepoint_func probes[0]; 68 }; 69 70 static inline void *allocate_probes(int count) 71 { 72 struct tp_probes *p = kmalloc(count * sizeof(struct tracepoint_func) 73 + sizeof(struct tp_probes), GFP_KERNEL); 74 return p == NULL ? NULL : p->probes; 75 } 76 77 static void rcu_free_old_probes(struct rcu_head *head) 78 { 79 kfree(container_of(head, struct tp_probes, u.rcu)); 80 } 81 82 static inline void release_probes(struct tracepoint_func *old) 83 { 84 if (old) { 85 struct tp_probes *tp_probes = container_of(old, 86 struct tp_probes, probes[0]); 87 call_rcu_sched(&tp_probes->u.rcu, rcu_free_old_probes); 88 } 89 } 90 91 static void debug_print_probes(struct tracepoint_entry *entry) 92 { 93 int i; 94 95 if (!tracepoint_debug || !entry->funcs) 96 return; 97 98 for (i = 0; entry->funcs[i].func; i++) 99 printk(KERN_DEBUG "Probe %d : %p\n", i, entry->funcs[i].func); 100 } 101 102 static struct tracepoint_func * 103 tracepoint_entry_add_probe(struct tracepoint_entry *entry, 104 void *probe, void *data) 105 { 106 int nr_probes = 0; 107 struct tracepoint_func *old, *new; 108 109 WARN_ON(!probe); 110 111 debug_print_probes(entry); 112 old = entry->funcs; 113 if (old) { 114 /* (N -> N+1), (N != 0, 1) probes */ 115 for (nr_probes = 0; old[nr_probes].func; nr_probes++) 116 if (old[nr_probes].func == probe && 117 old[nr_probes].data == data) 118 return ERR_PTR(-EEXIST); 119 } 120 /* + 2 : one for new probe, one for NULL func */ 121 new = allocate_probes(nr_probes + 2); 122 if (new == NULL) 123 return ERR_PTR(-ENOMEM); 124 if (old) 125 memcpy(new, old, nr_probes * sizeof(struct tracepoint_func)); 126 new[nr_probes].func = probe; 127 new[nr_probes].data = data; 128 new[nr_probes + 1].func = NULL; 129 entry->refcount = nr_probes + 1; 130 entry->funcs = new; 131 debug_print_probes(entry); 132 return old; 133 } 134 135 static void * 136 tracepoint_entry_remove_probe(struct tracepoint_entry *entry, 137 void *probe, void *data) 138 { 139 int nr_probes = 0, nr_del = 0, i; 140 struct tracepoint_func *old, *new; 141 142 old = entry->funcs; 143 144 if (!old) 145 return ERR_PTR(-ENOENT); 146 147 debug_print_probes(entry); 148 /* (N -> M), (N > 1, M >= 0) probes */ 149 for (nr_probes = 0; old[nr_probes].func; nr_probes++) { 150 if (!probe || 151 (old[nr_probes].func == probe && 152 old[nr_probes].data == data)) 153 nr_del++; 154 } 155 156 if (nr_probes - nr_del == 0) { 157 /* N -> 0, (N > 1) */ 158 entry->funcs = NULL; 159 entry->refcount = 0; 160 debug_print_probes(entry); 161 return old; 162 } else { 163 int j = 0; 164 /* N -> M, (N > 1, M > 0) */ 165 /* + 1 for NULL */ 166 new = allocate_probes(nr_probes - nr_del + 1); 167 if (new == NULL) 168 return ERR_PTR(-ENOMEM); 169 for (i = 0; old[i].func; i++) 170 if (probe && 171 (old[i].func != probe || old[i].data != data)) 172 new[j++] = old[i]; 173 new[nr_probes - nr_del].func = NULL; 174 entry->refcount = nr_probes - nr_del; 175 entry->funcs = new; 176 } 177 debug_print_probes(entry); 178 return old; 179 } 180 181 /* 182 * Get tracepoint if the tracepoint is present in the tracepoint hash table. 183 * Must be called with tracepoints_mutex held. 184 * Returns NULL if not present. 185 */ 186 static struct tracepoint_entry *get_tracepoint(const char *name) 187 { 188 struct hlist_head *head; 189 struct hlist_node *node; 190 struct tracepoint_entry *e; 191 u32 hash = jhash(name, strlen(name), 0); 192 193 head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)]; 194 hlist_for_each_entry(e, node, head, hlist) { 195 if (!strcmp(name, e->name)) 196 return e; 197 } 198 return NULL; 199 } 200 201 /* 202 * Add the tracepoint to the tracepoint hash table. Must be called with 203 * tracepoints_mutex held. 204 */ 205 static struct tracepoint_entry *add_tracepoint(const char *name) 206 { 207 struct hlist_head *head; 208 struct hlist_node *node; 209 struct tracepoint_entry *e; 210 size_t name_len = strlen(name) + 1; 211 u32 hash = jhash(name, name_len-1, 0); 212 213 head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)]; 214 hlist_for_each_entry(e, node, head, hlist) { 215 if (!strcmp(name, e->name)) { 216 printk(KERN_NOTICE 217 "tracepoint %s busy\n", name); 218 return ERR_PTR(-EEXIST); /* Already there */ 219 } 220 } 221 /* 222 * Using kmalloc here to allocate a variable length element. Could 223 * cause some memory fragmentation if overused. 224 */ 225 e = kmalloc(sizeof(struct tracepoint_entry) + name_len, GFP_KERNEL); 226 if (!e) 227 return ERR_PTR(-ENOMEM); 228 memcpy(&e->name[0], name, name_len); 229 e->funcs = NULL; 230 e->refcount = 0; 231 hlist_add_head(&e->hlist, head); 232 return e; 233 } 234 235 /* 236 * Remove the tracepoint from the tracepoint hash table. Must be called with 237 * mutex_lock held. 238 */ 239 static inline void remove_tracepoint(struct tracepoint_entry *e) 240 { 241 hlist_del(&e->hlist); 242 kfree(e); 243 } 244 245 /* 246 * Sets the probe callback corresponding to one tracepoint. 247 */ 248 static void set_tracepoint(struct tracepoint_entry **entry, 249 struct tracepoint *elem, int active) 250 { 251 WARN_ON(strcmp((*entry)->name, elem->name) != 0); 252 253 if (elem->regfunc && !elem->state && active) 254 elem->regfunc(); 255 else if (elem->unregfunc && elem->state && !active) 256 elem->unregfunc(); 257 258 /* 259 * rcu_assign_pointer has a smp_wmb() which makes sure that the new 260 * probe callbacks array is consistent before setting a pointer to it. 261 * This array is referenced by __DO_TRACE from 262 * include/linux/tracepoints.h. A matching smp_read_barrier_depends() 263 * is used. 264 */ 265 rcu_assign_pointer(elem->funcs, (*entry)->funcs); 266 elem->state = active; 267 } 268 269 /* 270 * Disable a tracepoint and its probe callback. 271 * Note: only waiting an RCU period after setting elem->call to the empty 272 * function insures that the original callback is not used anymore. This insured 273 * by preempt_disable around the call site. 274 */ 275 static void disable_tracepoint(struct tracepoint *elem) 276 { 277 if (elem->unregfunc && elem->state) 278 elem->unregfunc(); 279 280 elem->state = 0; 281 rcu_assign_pointer(elem->funcs, NULL); 282 } 283 284 /** 285 * tracepoint_update_probe_range - Update a probe range 286 * @begin: beginning of the range 287 * @end: end of the range 288 * 289 * Updates the probe callback corresponding to a range of tracepoints. 290 */ 291 void 292 tracepoint_update_probe_range(struct tracepoint *begin, struct tracepoint *end) 293 { 294 struct tracepoint *iter; 295 struct tracepoint_entry *mark_entry; 296 297 if (!begin) 298 return; 299 300 mutex_lock(&tracepoints_mutex); 301 for (iter = begin; iter < end; iter++) { 302 mark_entry = get_tracepoint(iter->name); 303 if (mark_entry) { 304 set_tracepoint(&mark_entry, iter, 305 !!mark_entry->refcount); 306 } else { 307 disable_tracepoint(iter); 308 } 309 } 310 mutex_unlock(&tracepoints_mutex); 311 } 312 313 /* 314 * Update probes, removing the faulty probes. 315 */ 316 static void tracepoint_update_probes(void) 317 { 318 /* Core kernel tracepoints */ 319 tracepoint_update_probe_range(__start___tracepoints, 320 __stop___tracepoints); 321 /* tracepoints in modules. */ 322 module_update_tracepoints(); 323 } 324 325 static struct tracepoint_func * 326 tracepoint_add_probe(const char *name, void *probe, void *data) 327 { 328 struct tracepoint_entry *entry; 329 struct tracepoint_func *old; 330 331 entry = get_tracepoint(name); 332 if (!entry) { 333 entry = add_tracepoint(name); 334 if (IS_ERR(entry)) 335 return (struct tracepoint_func *)entry; 336 } 337 old = tracepoint_entry_add_probe(entry, probe, data); 338 if (IS_ERR(old) && !entry->refcount) 339 remove_tracepoint(entry); 340 return old; 341 } 342 343 /** 344 * tracepoint_probe_register - Connect a probe to a tracepoint 345 * @name: tracepoint name 346 * @probe: probe handler 347 * 348 * Returns 0 if ok, error value on error. 349 * The probe address must at least be aligned on the architecture pointer size. 350 */ 351 int tracepoint_probe_register(const char *name, void *probe, void *data) 352 { 353 struct tracepoint_func *old; 354 355 mutex_lock(&tracepoints_mutex); 356 old = tracepoint_add_probe(name, probe, data); 357 mutex_unlock(&tracepoints_mutex); 358 if (IS_ERR(old)) 359 return PTR_ERR(old); 360 361 tracepoint_update_probes(); /* may update entry */ 362 release_probes(old); 363 return 0; 364 } 365 EXPORT_SYMBOL_GPL(tracepoint_probe_register); 366 367 static struct tracepoint_func * 368 tracepoint_remove_probe(const char *name, void *probe, void *data) 369 { 370 struct tracepoint_entry *entry; 371 struct tracepoint_func *old; 372 373 entry = get_tracepoint(name); 374 if (!entry) 375 return ERR_PTR(-ENOENT); 376 old = tracepoint_entry_remove_probe(entry, probe, data); 377 if (IS_ERR(old)) 378 return old; 379 if (!entry->refcount) 380 remove_tracepoint(entry); 381 return old; 382 } 383 384 /** 385 * tracepoint_probe_unregister - Disconnect a probe from a tracepoint 386 * @name: tracepoint name 387 * @probe: probe function pointer 388 * 389 * We do not need to call a synchronize_sched to make sure the probes have 390 * finished running before doing a module unload, because the module unload 391 * itself uses stop_machine(), which insures that every preempt disabled section 392 * have finished. 393 */ 394 int tracepoint_probe_unregister(const char *name, void *probe, void *data) 395 { 396 struct tracepoint_func *old; 397 398 mutex_lock(&tracepoints_mutex); 399 old = tracepoint_remove_probe(name, probe, data); 400 mutex_unlock(&tracepoints_mutex); 401 if (IS_ERR(old)) 402 return PTR_ERR(old); 403 404 tracepoint_update_probes(); /* may update entry */ 405 release_probes(old); 406 return 0; 407 } 408 EXPORT_SYMBOL_GPL(tracepoint_probe_unregister); 409 410 static LIST_HEAD(old_probes); 411 static int need_update; 412 413 static void tracepoint_add_old_probes(void *old) 414 { 415 need_update = 1; 416 if (old) { 417 struct tp_probes *tp_probes = container_of(old, 418 struct tp_probes, probes[0]); 419 list_add(&tp_probes->u.list, &old_probes); 420 } 421 } 422 423 /** 424 * tracepoint_probe_register_noupdate - register a probe but not connect 425 * @name: tracepoint name 426 * @probe: probe handler 427 * 428 * caller must call tracepoint_probe_update_all() 429 */ 430 int tracepoint_probe_register_noupdate(const char *name, void *probe, 431 void *data) 432 { 433 struct tracepoint_func *old; 434 435 mutex_lock(&tracepoints_mutex); 436 old = tracepoint_add_probe(name, probe, data); 437 if (IS_ERR(old)) { 438 mutex_unlock(&tracepoints_mutex); 439 return PTR_ERR(old); 440 } 441 tracepoint_add_old_probes(old); 442 mutex_unlock(&tracepoints_mutex); 443 return 0; 444 } 445 EXPORT_SYMBOL_GPL(tracepoint_probe_register_noupdate); 446 447 /** 448 * tracepoint_probe_unregister_noupdate - remove a probe but not disconnect 449 * @name: tracepoint name 450 * @probe: probe function pointer 451 * 452 * caller must call tracepoint_probe_update_all() 453 */ 454 int tracepoint_probe_unregister_noupdate(const char *name, void *probe, 455 void *data) 456 { 457 struct tracepoint_func *old; 458 459 mutex_lock(&tracepoints_mutex); 460 old = tracepoint_remove_probe(name, probe, data); 461 if (IS_ERR(old)) { 462 mutex_unlock(&tracepoints_mutex); 463 return PTR_ERR(old); 464 } 465 tracepoint_add_old_probes(old); 466 mutex_unlock(&tracepoints_mutex); 467 return 0; 468 } 469 EXPORT_SYMBOL_GPL(tracepoint_probe_unregister_noupdate); 470 471 /** 472 * tracepoint_probe_update_all - update tracepoints 473 */ 474 void tracepoint_probe_update_all(void) 475 { 476 LIST_HEAD(release_probes); 477 struct tp_probes *pos, *next; 478 479 mutex_lock(&tracepoints_mutex); 480 if (!need_update) { 481 mutex_unlock(&tracepoints_mutex); 482 return; 483 } 484 if (!list_empty(&old_probes)) 485 list_replace_init(&old_probes, &release_probes); 486 need_update = 0; 487 mutex_unlock(&tracepoints_mutex); 488 489 tracepoint_update_probes(); 490 list_for_each_entry_safe(pos, next, &release_probes, u.list) { 491 list_del(&pos->u.list); 492 call_rcu_sched(&pos->u.rcu, rcu_free_old_probes); 493 } 494 } 495 EXPORT_SYMBOL_GPL(tracepoint_probe_update_all); 496 497 /** 498 * tracepoint_get_iter_range - Get a next tracepoint iterator given a range. 499 * @tracepoint: current tracepoints (in), next tracepoint (out) 500 * @begin: beginning of the range 501 * @end: end of the range 502 * 503 * Returns whether a next tracepoint has been found (1) or not (0). 504 * Will return the first tracepoint in the range if the input tracepoint is 505 * NULL. 506 */ 507 int tracepoint_get_iter_range(struct tracepoint **tracepoint, 508 struct tracepoint *begin, struct tracepoint *end) 509 { 510 if (!*tracepoint && begin != end) { 511 *tracepoint = begin; 512 return 1; 513 } 514 if (*tracepoint >= begin && *tracepoint < end) 515 return 1; 516 return 0; 517 } 518 EXPORT_SYMBOL_GPL(tracepoint_get_iter_range); 519 520 static void tracepoint_get_iter(struct tracepoint_iter *iter) 521 { 522 int found = 0; 523 524 /* Core kernel tracepoints */ 525 if (!iter->module) { 526 found = tracepoint_get_iter_range(&iter->tracepoint, 527 __start___tracepoints, __stop___tracepoints); 528 if (found) 529 goto end; 530 } 531 /* tracepoints in modules. */ 532 found = module_get_iter_tracepoints(iter); 533 end: 534 if (!found) 535 tracepoint_iter_reset(iter); 536 } 537 538 void tracepoint_iter_start(struct tracepoint_iter *iter) 539 { 540 tracepoint_get_iter(iter); 541 } 542 EXPORT_SYMBOL_GPL(tracepoint_iter_start); 543 544 void tracepoint_iter_next(struct tracepoint_iter *iter) 545 { 546 iter->tracepoint++; 547 /* 548 * iter->tracepoint may be invalid because we blindly incremented it. 549 * Make sure it is valid by marshalling on the tracepoints, getting the 550 * tracepoints from following modules if necessary. 551 */ 552 tracepoint_get_iter(iter); 553 } 554 EXPORT_SYMBOL_GPL(tracepoint_iter_next); 555 556 void tracepoint_iter_stop(struct tracepoint_iter *iter) 557 { 558 } 559 EXPORT_SYMBOL_GPL(tracepoint_iter_stop); 560 561 void tracepoint_iter_reset(struct tracepoint_iter *iter) 562 { 563 iter->module = NULL; 564 iter->tracepoint = NULL; 565 } 566 EXPORT_SYMBOL_GPL(tracepoint_iter_reset); 567 568 #ifdef CONFIG_MODULES 569 570 int tracepoint_module_notify(struct notifier_block *self, 571 unsigned long val, void *data) 572 { 573 struct module *mod = data; 574 575 switch (val) { 576 case MODULE_STATE_COMING: 577 case MODULE_STATE_GOING: 578 tracepoint_update_probe_range(mod->tracepoints, 579 mod->tracepoints + mod->num_tracepoints); 580 break; 581 } 582 return 0; 583 } 584 585 struct notifier_block tracepoint_module_nb = { 586 .notifier_call = tracepoint_module_notify, 587 .priority = 0, 588 }; 589 590 static int init_tracepoints(void) 591 { 592 return register_module_notifier(&tracepoint_module_nb); 593 } 594 __initcall(init_tracepoints); 595 596 #endif /* CONFIG_MODULES */ 597 598 #ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS 599 600 /* NB: reg/unreg are called while guarded with the tracepoints_mutex */ 601 static int sys_tracepoint_refcount; 602 603 void syscall_regfunc(void) 604 { 605 unsigned long flags; 606 struct task_struct *g, *t; 607 608 if (!sys_tracepoint_refcount) { 609 read_lock_irqsave(&tasklist_lock, flags); 610 do_each_thread(g, t) { 611 /* Skip kernel threads. */ 612 if (t->mm) 613 set_tsk_thread_flag(t, TIF_SYSCALL_TRACEPOINT); 614 } while_each_thread(g, t); 615 read_unlock_irqrestore(&tasklist_lock, flags); 616 } 617 sys_tracepoint_refcount++; 618 } 619 620 void syscall_unregfunc(void) 621 { 622 unsigned long flags; 623 struct task_struct *g, *t; 624 625 sys_tracepoint_refcount--; 626 if (!sys_tracepoint_refcount) { 627 read_lock_irqsave(&tasklist_lock, flags); 628 do_each_thread(g, t) { 629 clear_tsk_thread_flag(t, TIF_SYSCALL_TRACEPOINT); 630 } while_each_thread(g, t); 631 read_unlock_irqrestore(&tasklist_lock, flags); 632 } 633 } 634 #endif 635