1 #include "builtin.h" 2 #include "perf.h" 3 4 #include "util/util.h" 5 #include "util/cache.h" 6 #include "util/symbol.h" 7 #include "util/thread.h" 8 #include "util/header.h" 9 10 #include "util/parse-options.h" 11 #include "util/trace-event.h" 12 13 #include "util/debug.h" 14 #include "util/session.h" 15 16 #include <sys/types.h> 17 #include <sys/prctl.h> 18 #include <semaphore.h> 19 #include <pthread.h> 20 #include <math.h> 21 #include <limits.h> 22 23 #include <linux/list.h> 24 #include <linux/hash.h> 25 26 /* based on kernel/lockdep.c */ 27 #define LOCKHASH_BITS 12 28 #define LOCKHASH_SIZE (1UL << LOCKHASH_BITS) 29 30 static struct list_head lockhash_table[LOCKHASH_SIZE]; 31 32 #define __lockhashfn(key) hash_long((unsigned long)key, LOCKHASH_BITS) 33 #define lockhashentry(key) (lockhash_table + __lockhashfn((key))) 34 35 #define LOCK_STATE_UNLOCKED 0 /* initial state */ 36 #define LOCK_STATE_LOCKED 1 37 38 struct lock_stat { 39 struct list_head hash_entry; 40 struct rb_node rb; /* used for sorting */ 41 42 /* 43 * FIXME: raw_field_value() returns unsigned long long, 44 * so address of lockdep_map should be dealed as 64bit. 45 * Is there more better solution? 46 */ 47 void *addr; /* address of lockdep_map, used as ID */ 48 char *name; /* for strcpy(), we cannot use const */ 49 50 int state; 51 u64 prev_event_time; /* timestamp of previous event */ 52 53 unsigned int nr_acquired; 54 unsigned int nr_acquire; 55 unsigned int nr_contended; 56 unsigned int nr_release; 57 58 /* these times are in nano sec. */ 59 u64 wait_time_total; 60 u64 wait_time_min; 61 u64 wait_time_max; 62 }; 63 64 /* build simple key function one is bigger than two */ 65 #define SINGLE_KEY(member) \ 66 static int lock_stat_key_ ## member(struct lock_stat *one, \ 67 struct lock_stat *two) \ 68 { \ 69 return one->member > two->member; \ 70 } 71 72 SINGLE_KEY(nr_acquired) 73 SINGLE_KEY(nr_contended) 74 SINGLE_KEY(wait_time_total) 75 SINGLE_KEY(wait_time_min) 76 SINGLE_KEY(wait_time_max) 77 78 struct lock_key { 79 /* 80 * name: the value for specify by user 81 * this should be simpler than raw name of member 82 * e.g. nr_acquired -> acquired, wait_time_total -> wait_total 83 */ 84 const char *name; 85 int (*key)(struct lock_stat*, struct lock_stat*); 86 }; 87 88 static const char *sort_key = "acquired"; 89 90 static int (*compare)(struct lock_stat *, struct lock_stat *); 91 92 static struct rb_root result; /* place to store sorted data */ 93 94 #define DEF_KEY_LOCK(name, fn_suffix) \ 95 { #name, lock_stat_key_ ## fn_suffix } 96 struct lock_key keys[] = { 97 DEF_KEY_LOCK(acquired, nr_acquired), 98 DEF_KEY_LOCK(contended, nr_contended), 99 DEF_KEY_LOCK(wait_total, wait_time_total), 100 DEF_KEY_LOCK(wait_min, wait_time_min), 101 DEF_KEY_LOCK(wait_max, wait_time_max), 102 103 /* extra comparisons much complicated should be here */ 104 105 { NULL, NULL } 106 }; 107 108 static void select_key(void) 109 { 110 int i; 111 112 for (i = 0; keys[i].name; i++) { 113 if (!strcmp(keys[i].name, sort_key)) { 114 compare = keys[i].key; 115 return; 116 } 117 } 118 119 die("Unknown compare key:%s\n", sort_key); 120 } 121 122 static void insert_to_result(struct lock_stat *st, 123 int (*bigger)(struct lock_stat *, struct lock_stat *)) 124 { 125 struct rb_node **rb = &result.rb_node; 126 struct rb_node *parent = NULL; 127 struct lock_stat *p; 128 129 while (*rb) { 130 p = container_of(*rb, struct lock_stat, rb); 131 parent = *rb; 132 133 if (bigger(st, p)) 134 rb = &(*rb)->rb_left; 135 else 136 rb = &(*rb)->rb_right; 137 } 138 139 rb_link_node(&st->rb, parent, rb); 140 rb_insert_color(&st->rb, &result); 141 } 142 143 /* returns left most element of result, and erase it */ 144 static struct lock_stat *pop_from_result(void) 145 { 146 struct rb_node *node = result.rb_node; 147 148 if (!node) 149 return NULL; 150 151 while (node->rb_left) 152 node = node->rb_left; 153 154 rb_erase(node, &result); 155 return container_of(node, struct lock_stat, rb); 156 } 157 158 static struct lock_stat *lock_stat_findnew(void *addr, const char *name) 159 { 160 struct list_head *entry = lockhashentry(addr); 161 struct lock_stat *ret, *new; 162 163 list_for_each_entry(ret, entry, hash_entry) { 164 if (ret->addr == addr) 165 return ret; 166 } 167 168 new = zalloc(sizeof(struct lock_stat)); 169 if (!new) 170 goto alloc_failed; 171 172 new->addr = addr; 173 new->name = zalloc(sizeof(char) * strlen(name) + 1); 174 if (!new->name) 175 goto alloc_failed; 176 strcpy(new->name, name); 177 178 /* LOCK_STATE_UNLOCKED == 0 isn't guaranteed forever */ 179 new->state = LOCK_STATE_UNLOCKED; 180 new->wait_time_min = ULLONG_MAX; 181 182 list_add(&new->hash_entry, entry); 183 return new; 184 185 alloc_failed: 186 die("memory allocation failed\n"); 187 } 188 189 static char const *input_name = "perf.data"; 190 191 static int profile_cpu = -1; 192 193 struct raw_event_sample { 194 u32 size; 195 char data[0]; 196 }; 197 198 struct trace_acquire_event { 199 void *addr; 200 const char *name; 201 }; 202 203 struct trace_acquired_event { 204 void *addr; 205 const char *name; 206 }; 207 208 struct trace_contended_event { 209 void *addr; 210 const char *name; 211 }; 212 213 struct trace_release_event { 214 void *addr; 215 const char *name; 216 }; 217 218 struct trace_lock_handler { 219 void (*acquire_event)(struct trace_acquire_event *, 220 struct event *, 221 int cpu, 222 u64 timestamp, 223 struct thread *thread); 224 225 void (*acquired_event)(struct trace_acquired_event *, 226 struct event *, 227 int cpu, 228 u64 timestamp, 229 struct thread *thread); 230 231 void (*contended_event)(struct trace_contended_event *, 232 struct event *, 233 int cpu, 234 u64 timestamp, 235 struct thread *thread); 236 237 void (*release_event)(struct trace_release_event *, 238 struct event *, 239 int cpu, 240 u64 timestamp, 241 struct thread *thread); 242 }; 243 244 static void 245 report_lock_acquire_event(struct trace_acquire_event *acquire_event, 246 struct event *__event __used, 247 int cpu __used, 248 u64 timestamp, 249 struct thread *thread __used) 250 { 251 struct lock_stat *st; 252 253 st = lock_stat_findnew(acquire_event->addr, acquire_event->name); 254 255 switch (st->state) { 256 case LOCK_STATE_UNLOCKED: 257 break; 258 case LOCK_STATE_LOCKED: 259 break; 260 default: 261 BUG_ON(1); 262 break; 263 } 264 265 st->prev_event_time = timestamp; 266 } 267 268 static void 269 report_lock_acquired_event(struct trace_acquired_event *acquired_event, 270 struct event *__event __used, 271 int cpu __used, 272 u64 timestamp, 273 struct thread *thread __used) 274 { 275 struct lock_stat *st; 276 277 st = lock_stat_findnew(acquired_event->addr, acquired_event->name); 278 279 switch (st->state) { 280 case LOCK_STATE_UNLOCKED: 281 st->state = LOCK_STATE_LOCKED; 282 st->nr_acquired++; 283 break; 284 case LOCK_STATE_LOCKED: 285 break; 286 default: 287 BUG_ON(1); 288 break; 289 } 290 291 st->prev_event_time = timestamp; 292 } 293 294 static void 295 report_lock_contended_event(struct trace_contended_event *contended_event, 296 struct event *__event __used, 297 int cpu __used, 298 u64 timestamp, 299 struct thread *thread __used) 300 { 301 struct lock_stat *st; 302 303 st = lock_stat_findnew(contended_event->addr, contended_event->name); 304 305 switch (st->state) { 306 case LOCK_STATE_UNLOCKED: 307 break; 308 case LOCK_STATE_LOCKED: 309 st->nr_contended++; 310 break; 311 default: 312 BUG_ON(1); 313 break; 314 } 315 316 st->prev_event_time = timestamp; 317 } 318 319 static void 320 report_lock_release_event(struct trace_release_event *release_event, 321 struct event *__event __used, 322 int cpu __used, 323 u64 timestamp, 324 struct thread *thread __used) 325 { 326 struct lock_stat *st; 327 u64 hold_time; 328 329 st = lock_stat_findnew(release_event->addr, release_event->name); 330 331 switch (st->state) { 332 case LOCK_STATE_UNLOCKED: 333 break; 334 case LOCK_STATE_LOCKED: 335 st->state = LOCK_STATE_UNLOCKED; 336 hold_time = timestamp - st->prev_event_time; 337 338 if (timestamp < st->prev_event_time) { 339 /* terribly, this can happen... */ 340 goto end; 341 } 342 343 if (st->wait_time_min > hold_time) 344 st->wait_time_min = hold_time; 345 if (st->wait_time_max < hold_time) 346 st->wait_time_max = hold_time; 347 st->wait_time_total += hold_time; 348 349 st->nr_release++; 350 break; 351 default: 352 BUG_ON(1); 353 break; 354 } 355 356 end: 357 st->prev_event_time = timestamp; 358 } 359 360 /* lock oriented handlers */ 361 /* TODO: handlers for CPU oriented, thread oriented */ 362 static struct trace_lock_handler report_lock_ops = { 363 .acquire_event = report_lock_acquire_event, 364 .acquired_event = report_lock_acquired_event, 365 .contended_event = report_lock_contended_event, 366 .release_event = report_lock_release_event, 367 }; 368 369 static struct trace_lock_handler *trace_handler; 370 371 static void 372 process_lock_acquire_event(void *data, 373 struct event *event __used, 374 int cpu __used, 375 u64 timestamp __used, 376 struct thread *thread __used) 377 { 378 struct trace_acquire_event acquire_event; 379 u64 tmp; /* this is required for casting... */ 380 381 tmp = raw_field_value(event, "lockdep_addr", data); 382 memcpy(&acquire_event.addr, &tmp, sizeof(void *)); 383 acquire_event.name = (char *)raw_field_ptr(event, "name", data); 384 385 if (trace_handler->acquire_event) 386 trace_handler->acquire_event(&acquire_event, event, cpu, timestamp, thread); 387 } 388 389 static void 390 process_lock_acquired_event(void *data, 391 struct event *event __used, 392 int cpu __used, 393 u64 timestamp __used, 394 struct thread *thread __used) 395 { 396 struct trace_acquired_event acquired_event; 397 u64 tmp; /* this is required for casting... */ 398 399 tmp = raw_field_value(event, "lockdep_addr", data); 400 memcpy(&acquired_event.addr, &tmp, sizeof(void *)); 401 acquired_event.name = (char *)raw_field_ptr(event, "name", data); 402 403 if (trace_handler->acquire_event) 404 trace_handler->acquired_event(&acquired_event, event, cpu, timestamp, thread); 405 } 406 407 static void 408 process_lock_contended_event(void *data, 409 struct event *event __used, 410 int cpu __used, 411 u64 timestamp __used, 412 struct thread *thread __used) 413 { 414 struct trace_contended_event contended_event; 415 u64 tmp; /* this is required for casting... */ 416 417 tmp = raw_field_value(event, "lockdep_addr", data); 418 memcpy(&contended_event.addr, &tmp, sizeof(void *)); 419 contended_event.name = (char *)raw_field_ptr(event, "name", data); 420 421 if (trace_handler->acquire_event) 422 trace_handler->contended_event(&contended_event, event, cpu, timestamp, thread); 423 } 424 425 static void 426 process_lock_release_event(void *data, 427 struct event *event __used, 428 int cpu __used, 429 u64 timestamp __used, 430 struct thread *thread __used) 431 { 432 struct trace_release_event release_event; 433 u64 tmp; /* this is required for casting... */ 434 435 tmp = raw_field_value(event, "lockdep_addr", data); 436 memcpy(&release_event.addr, &tmp, sizeof(void *)); 437 release_event.name = (char *)raw_field_ptr(event, "name", data); 438 439 if (trace_handler->acquire_event) 440 trace_handler->release_event(&release_event, event, cpu, timestamp, thread); 441 } 442 443 static void 444 process_raw_event(void *data, int cpu, 445 u64 timestamp, struct thread *thread) 446 { 447 struct event *event; 448 int type; 449 450 type = trace_parse_common_type(data); 451 event = trace_find_event(type); 452 453 if (!strcmp(event->name, "lock_acquire")) 454 process_lock_acquire_event(data, event, cpu, timestamp, thread); 455 if (!strcmp(event->name, "lock_acquired")) 456 process_lock_acquired_event(data, event, cpu, timestamp, thread); 457 if (!strcmp(event->name, "lock_contended")) 458 process_lock_contended_event(data, event, cpu, timestamp, thread); 459 if (!strcmp(event->name, "lock_release")) 460 process_lock_release_event(data, event, cpu, timestamp, thread); 461 } 462 463 struct raw_event_queue { 464 u64 timestamp; 465 int cpu; 466 void *data; 467 struct thread *thread; 468 struct list_head list; 469 }; 470 471 static LIST_HEAD(raw_event_head); 472 473 #define FLUSH_PERIOD (5 * NSEC_PER_SEC) 474 475 static u64 flush_limit = ULLONG_MAX; 476 static u64 last_flush = 0; 477 struct raw_event_queue *last_inserted; 478 479 static void flush_raw_event_queue(u64 limit) 480 { 481 struct raw_event_queue *tmp, *iter; 482 483 list_for_each_entry_safe(iter, tmp, &raw_event_head, list) { 484 if (iter->timestamp > limit) 485 return; 486 487 if (iter == last_inserted) 488 last_inserted = NULL; 489 490 process_raw_event(iter->data, iter->cpu, iter->timestamp, 491 iter->thread); 492 493 last_flush = iter->timestamp; 494 list_del(&iter->list); 495 free(iter->data); 496 free(iter); 497 } 498 } 499 500 static void __queue_raw_event_end(struct raw_event_queue *new) 501 { 502 struct raw_event_queue *iter; 503 504 list_for_each_entry_reverse(iter, &raw_event_head, list) { 505 if (iter->timestamp < new->timestamp) { 506 list_add(&new->list, &iter->list); 507 return; 508 } 509 } 510 511 list_add(&new->list, &raw_event_head); 512 } 513 514 static void __queue_raw_event_before(struct raw_event_queue *new, 515 struct raw_event_queue *iter) 516 { 517 list_for_each_entry_continue_reverse(iter, &raw_event_head, list) { 518 if (iter->timestamp < new->timestamp) { 519 list_add(&new->list, &iter->list); 520 return; 521 } 522 } 523 524 list_add(&new->list, &raw_event_head); 525 } 526 527 static void __queue_raw_event_after(struct raw_event_queue *new, 528 struct raw_event_queue *iter) 529 { 530 list_for_each_entry_continue(iter, &raw_event_head, list) { 531 if (iter->timestamp > new->timestamp) { 532 list_add_tail(&new->list, &iter->list); 533 return; 534 } 535 } 536 list_add_tail(&new->list, &raw_event_head); 537 } 538 539 /* The queue is ordered by time */ 540 static void __queue_raw_event(struct raw_event_queue *new) 541 { 542 if (!last_inserted) { 543 __queue_raw_event_end(new); 544 return; 545 } 546 547 /* 548 * Most of the time the current event has a timestamp 549 * very close to the last event inserted, unless we just switched 550 * to another event buffer. Having a sorting based on a list and 551 * on the last inserted event that is close to the current one is 552 * probably more efficient than an rbtree based sorting. 553 */ 554 if (last_inserted->timestamp >= new->timestamp) 555 __queue_raw_event_before(new, last_inserted); 556 else 557 __queue_raw_event_after(new, last_inserted); 558 } 559 560 static void queue_raw_event(void *data, int raw_size, int cpu, 561 u64 timestamp, struct thread *thread) 562 { 563 struct raw_event_queue *new; 564 565 if (flush_limit == ULLONG_MAX) 566 flush_limit = timestamp + FLUSH_PERIOD; 567 568 if (timestamp < last_flush) { 569 printf("Warning: Timestamp below last timeslice flush\n"); 570 return; 571 } 572 573 new = malloc(sizeof(*new)); 574 if (!new) 575 die("Not enough memory\n"); 576 577 new->timestamp = timestamp; 578 new->cpu = cpu; 579 new->thread = thread; 580 581 new->data = malloc(raw_size); 582 if (!new->data) 583 die("Not enough memory\n"); 584 585 memcpy(new->data, data, raw_size); 586 587 __queue_raw_event(new); 588 last_inserted = new; 589 590 /* 591 * We want to have a slice of events covering 2 * FLUSH_PERIOD 592 * If FLUSH_PERIOD is big enough, it ensures every events that occured 593 * in the first half of the timeslice have all been buffered and there 594 * are none remaining (we need that because of the weakly ordered 595 * event recording we have). Then once we reach the 2 * FLUSH_PERIOD 596 * timeslice, we flush the first half to be gentle with the memory 597 * (the second half can still get new events in the middle, so wait 598 * another period to flush it) 599 */ 600 if (new->timestamp > flush_limit && 601 new->timestamp - flush_limit > FLUSH_PERIOD) { 602 flush_limit += FLUSH_PERIOD; 603 flush_raw_event_queue(flush_limit); 604 } 605 } 606 607 static int process_sample_event(event_t *event, struct perf_session *session) 608 { 609 struct thread *thread; 610 struct sample_data data; 611 612 bzero(&data, sizeof(struct sample_data)); 613 event__parse_sample(event, session->sample_type, &data); 614 thread = perf_session__findnew(session, data.pid); 615 616 if (thread == NULL) { 617 pr_debug("problem processing %d event, skipping it.\n", 618 event->header.type); 619 return -1; 620 } 621 622 dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid); 623 624 if (profile_cpu != -1 && profile_cpu != (int) data.cpu) 625 return 0; 626 627 queue_raw_event(data.raw_data, data.raw_size, data.cpu, data.time, thread); 628 629 return 0; 630 } 631 632 /* TODO: various way to print, coloring, nano or milli sec */ 633 static void print_result(void) 634 { 635 struct lock_stat *st; 636 char cut_name[20]; 637 638 printf("%18s ", "ID"); 639 printf("%20s ", "Name"); 640 printf("%10s ", "acquired"); 641 printf("%10s ", "contended"); 642 643 printf("%15s ", "total wait (ns)"); 644 printf("%15s ", "max wait (ns)"); 645 printf("%15s ", "min wait (ns)"); 646 647 printf("\n\n"); 648 649 while ((st = pop_from_result())) { 650 bzero(cut_name, 20); 651 652 printf("%p ", st->addr); 653 654 if (strlen(st->name) < 16) { 655 /* output raw name */ 656 printf("%20s ", st->name); 657 } else { 658 strncpy(cut_name, st->name, 16); 659 cut_name[16] = '.'; 660 cut_name[17] = '.'; 661 cut_name[18] = '.'; 662 cut_name[19] = '\0'; 663 /* cut off name for saving output style */ 664 printf("%20s ", cut_name); 665 } 666 667 printf("%10u ", st->nr_acquired); 668 printf("%10u ", st->nr_contended); 669 670 printf("%15llu ", st->wait_time_total); 671 printf("%15llu ", st->wait_time_max); 672 printf("%15llu ", st->wait_time_min == ULLONG_MAX ? 673 0 : st->wait_time_min); 674 printf("\n"); 675 } 676 } 677 678 static void dump_map(void) 679 { 680 unsigned int i; 681 struct lock_stat *st; 682 683 for (i = 0; i < LOCKHASH_SIZE; i++) { 684 list_for_each_entry(st, &lockhash_table[i], hash_entry) { 685 printf("%p: %s\n", st->addr, st->name); 686 } 687 } 688 } 689 690 static struct perf_event_ops eops = { 691 .sample = process_sample_event, 692 .comm = event__process_comm, 693 }; 694 695 static struct perf_session *session; 696 697 static int read_events(void) 698 { 699 session = perf_session__new(input_name, O_RDONLY, 0); 700 if (!session) 701 die("Initializing perf session failed\n"); 702 703 return perf_session__process_events(session, &eops); 704 } 705 706 static void sort_result(void) 707 { 708 unsigned int i; 709 struct lock_stat *st; 710 711 for (i = 0; i < LOCKHASH_SIZE; i++) { 712 list_for_each_entry(st, &lockhash_table[i], hash_entry) { 713 insert_to_result(st, compare); 714 } 715 } 716 } 717 718 static void __cmd_report(void) 719 { 720 setup_pager(); 721 select_key(); 722 read_events(); 723 flush_raw_event_queue(ULLONG_MAX); 724 sort_result(); 725 print_result(); 726 } 727 728 static const char * const report_usage[] = { 729 "perf lock report [<options>]", 730 NULL 731 }; 732 733 static const struct option report_options[] = { 734 OPT_STRING('k', "key", &sort_key, "acquired", 735 "key for sorting"), 736 /* TODO: type */ 737 OPT_END() 738 }; 739 740 static const char * const lock_usage[] = { 741 "perf lock [<options>] {record|trace|report}", 742 NULL 743 }; 744 745 static const struct option lock_options[] = { 746 OPT_STRING('i', "input", &input_name, "file", "input file name"), 747 OPT_BOOLEAN('v', "verbose", &verbose, "be more verbose (show symbol address, etc)"), 748 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"), 749 OPT_END() 750 }; 751 752 static const char *record_args[] = { 753 "record", 754 "-a", 755 "-R", 756 "-f", 757 "-m", "1024", 758 "-c", "1", 759 "-e", "lock:lock_acquire:r", 760 "-e", "lock:lock_acquired:r", 761 "-e", "lock:lock_contended:r", 762 "-e", "lock:lock_release:r", 763 }; 764 765 static int __cmd_record(int argc, const char **argv) 766 { 767 unsigned int rec_argc, i, j; 768 const char **rec_argv; 769 770 rec_argc = ARRAY_SIZE(record_args) + argc - 1; 771 rec_argv = calloc(rec_argc + 1, sizeof(char *)); 772 773 for (i = 0; i < ARRAY_SIZE(record_args); i++) 774 rec_argv[i] = strdup(record_args[i]); 775 776 for (j = 1; j < (unsigned int)argc; j++, i++) 777 rec_argv[i] = argv[j]; 778 779 BUG_ON(i != rec_argc); 780 781 return cmd_record(i, rec_argv, NULL); 782 } 783 784 int cmd_lock(int argc, const char **argv, const char *prefix __used) 785 { 786 unsigned int i; 787 788 symbol__init(); 789 for (i = 0; i < LOCKHASH_SIZE; i++) 790 INIT_LIST_HEAD(lockhash_table + i); 791 792 argc = parse_options(argc, argv, lock_options, lock_usage, 793 PARSE_OPT_STOP_AT_NON_OPTION); 794 if (!argc) 795 usage_with_options(lock_usage, lock_options); 796 797 if (!strncmp(argv[0], "rec", 3)) { 798 return __cmd_record(argc, argv); 799 } else if (!strncmp(argv[0], "report", 6)) { 800 trace_handler = &report_lock_ops; 801 if (argc) { 802 argc = parse_options(argc, argv, 803 report_options, report_usage, 0); 804 if (argc) 805 usage_with_options(report_usage, report_options); 806 } 807 __cmd_report(); 808 } else if (!strcmp(argv[0], "trace")) { 809 /* Aliased to 'perf trace' */ 810 return cmd_trace(argc, argv, prefix); 811 } else if (!strcmp(argv[0], "map")) { 812 /* recycling report_lock_ops */ 813 trace_handler = &report_lock_ops; 814 setup_pager(); 815 read_events(); 816 dump_map(); 817 } else { 818 usage_with_options(lock_usage, lock_options); 819 } 820 821 return 0; 822 } 823