1 // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) 2 // Copyright (c) 2022 Google 3 #include "vmlinux.h" 4 #include <bpf/bpf_helpers.h> 5 #include <bpf/bpf_tracing.h> 6 #include <bpf/bpf_core_read.h> 7 #include <asm-generic/errno-base.h> 8 9 #include "lock_data.h" 10 11 /* for collect_lock_syms(). 4096 was rejected by the verifier */ 12 #define MAX_CPUS 1024 13 14 /* lock contention flags from include/trace/events/lock.h */ 15 #define LCB_F_SPIN (1U << 0) 16 #define LCB_F_READ (1U << 1) 17 #define LCB_F_WRITE (1U << 2) 18 #define LCB_F_RT (1U << 3) 19 #define LCB_F_PERCPU (1U << 4) 20 #define LCB_F_MUTEX (1U << 5) 21 22 /* callstack storage */ 23 struct { 24 __uint(type, BPF_MAP_TYPE_STACK_TRACE); 25 __uint(key_size, sizeof(__u32)); 26 __uint(value_size, sizeof(__u64)); 27 __uint(max_entries, MAX_ENTRIES); 28 } stacks SEC(".maps"); 29 30 /* buffer for owner stacktrace */ 31 struct { 32 __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); 33 __uint(key_size, sizeof(__u32)); 34 __uint(value_size, sizeof(__u64)); 35 __uint(max_entries, 1); 36 } stack_buf SEC(".maps"); 37 38 /* a map for tracing owner stacktrace to owner stack id */ 39 struct { 40 __uint(type, BPF_MAP_TYPE_HASH); 41 __uint(key_size, sizeof(__u64)); // owner stacktrace 42 __uint(value_size, sizeof(__s32)); // owner stack id 43 __uint(max_entries, 1); 44 } owner_stacks SEC(".maps"); 45 46 /* a map for tracing lock address to owner data */ 47 struct { 48 __uint(type, BPF_MAP_TYPE_HASH); 49 __uint(key_size, sizeof(__u64)); // lock address 50 __uint(value_size, sizeof(struct owner_tracing_data)); 51 __uint(max_entries, 1); 52 } owner_data SEC(".maps"); 53 54 /* a map for contention_key (stores owner stack id) to contention data */ 55 struct { 56 __uint(type, BPF_MAP_TYPE_HASH); 57 __uint(key_size, sizeof(struct contention_key)); 58 __uint(value_size, sizeof(struct contention_data)); 59 __uint(max_entries, 1); 60 } owner_stat SEC(".maps"); 61 62 /* maintain timestamp at the beginning of contention */ 63 struct { 64 __uint(type, BPF_MAP_TYPE_HASH); 65 __type(key, int); 66 __type(value, struct tstamp_data); 67 __uint(max_entries, MAX_ENTRIES); 68 } tstamp SEC(".maps"); 69 70 /* maintain per-CPU timestamp at the beginning of contention */ 71 struct { 72 __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); 73 __uint(key_size, sizeof(__u32)); 74 __uint(value_size, sizeof(struct tstamp_data)); 75 __uint(max_entries, 1); 76 } tstamp_cpu SEC(".maps"); 77 78 /* actual lock contention statistics */ 79 struct { 80 __uint(type, BPF_MAP_TYPE_HASH); 81 __uint(key_size, sizeof(struct contention_key)); 82 __uint(value_size, sizeof(struct contention_data)); 83 __uint(max_entries, MAX_ENTRIES); 84 } lock_stat SEC(".maps"); 85 86 struct { 87 __uint(type, BPF_MAP_TYPE_HASH); 88 __uint(key_size, sizeof(__u32)); 89 __uint(value_size, sizeof(struct contention_task_data)); 90 __uint(max_entries, MAX_ENTRIES); 91 } task_data SEC(".maps"); 92 93 struct { 94 __uint(type, BPF_MAP_TYPE_HASH); 95 __uint(key_size, sizeof(__u64)); 96 __uint(value_size, sizeof(__u32)); 97 __uint(max_entries, MAX_ENTRIES); 98 } lock_syms SEC(".maps"); 99 100 struct { 101 __uint(type, BPF_MAP_TYPE_HASH); 102 __uint(key_size, sizeof(__u32)); 103 __uint(value_size, sizeof(__u8)); 104 __uint(max_entries, 1); 105 } cpu_filter SEC(".maps"); 106 107 struct { 108 __uint(type, BPF_MAP_TYPE_HASH); 109 __uint(key_size, sizeof(__u32)); 110 __uint(value_size, sizeof(__u8)); 111 __uint(max_entries, 1); 112 } task_filter SEC(".maps"); 113 114 struct { 115 __uint(type, BPF_MAP_TYPE_HASH); 116 __uint(key_size, sizeof(__u32)); 117 __uint(value_size, sizeof(__u8)); 118 __uint(max_entries, 1); 119 } type_filter SEC(".maps"); 120 121 struct { 122 __uint(type, BPF_MAP_TYPE_HASH); 123 __uint(key_size, sizeof(__u64)); 124 __uint(value_size, sizeof(__u8)); 125 __uint(max_entries, 1); 126 } addr_filter SEC(".maps"); 127 128 struct { 129 __uint(type, BPF_MAP_TYPE_HASH); 130 __uint(key_size, sizeof(__u64)); 131 __uint(value_size, sizeof(__u8)); 132 __uint(max_entries, 1); 133 } cgroup_filter SEC(".maps"); 134 135 struct { 136 __uint(type, BPF_MAP_TYPE_HASH); 137 __uint(key_size, sizeof(long)); 138 __uint(value_size, sizeof(__u8)); 139 __uint(max_entries, 1); 140 } slab_filter SEC(".maps"); 141 142 struct { 143 __uint(type, BPF_MAP_TYPE_HASH); 144 __uint(key_size, sizeof(long)); 145 __uint(value_size, sizeof(struct slab_cache_data)); 146 __uint(max_entries, 1); 147 } slab_caches SEC(".maps"); 148 149 struct rw_semaphore___old { 150 struct task_struct *owner; 151 } __attribute__((preserve_access_index)); 152 153 struct rw_semaphore___new { 154 atomic_long_t owner; 155 } __attribute__((preserve_access_index)); 156 157 struct mm_struct___old { 158 struct rw_semaphore mmap_sem; 159 } __attribute__((preserve_access_index)); 160 161 struct mm_struct___new { 162 struct rw_semaphore mmap_lock; 163 } __attribute__((preserve_access_index)); 164 165 extern struct kmem_cache *bpf_get_kmem_cache(u64 addr) __ksym __weak; 166 167 /* control flags */ 168 const volatile int has_cpu; 169 const volatile int has_task; 170 const volatile int has_type; 171 const volatile int has_addr; 172 const volatile int has_cgroup; 173 const volatile int has_slab; 174 const volatile int needs_callstack; 175 const volatile int stack_skip; 176 const volatile int lock_owner; 177 const volatile int use_cgroup_v2; 178 const volatile int max_stack; 179 180 /* determine the key of lock stat */ 181 const volatile int aggr_mode; 182 183 int enabled; 184 185 int perf_subsys_id = -1; 186 187 __u64 end_ts; 188 189 __u32 slab_cache_id; 190 191 /* error stat */ 192 int task_fail; 193 int stack_fail; 194 int time_fail; 195 int data_fail; 196 197 int task_map_full; 198 int data_map_full; 199 200 struct task_struct *bpf_task_from_pid(s32 pid) __ksym __weak; 201 void bpf_task_release(struct task_struct *p) __ksym __weak; 202 203 static inline __u64 get_current_cgroup_id(void) 204 { 205 struct task_struct *task; 206 struct cgroup *cgrp; 207 208 if (use_cgroup_v2) 209 return bpf_get_current_cgroup_id(); 210 211 task = bpf_get_current_task_btf(); 212 213 if (perf_subsys_id == -1) { 214 #if __has_builtin(__builtin_preserve_enum_value) 215 perf_subsys_id = bpf_core_enum_value(enum cgroup_subsys_id, 216 perf_event_cgrp_id); 217 #else 218 perf_subsys_id = perf_event_cgrp_id; 219 #endif 220 } 221 222 cgrp = BPF_CORE_READ(task, cgroups, subsys[perf_subsys_id], cgroup); 223 return BPF_CORE_READ(cgrp, kn, id); 224 } 225 226 static inline int can_record(u64 *ctx) 227 { 228 if (has_cpu) { 229 __u32 cpu = bpf_get_smp_processor_id(); 230 __u8 *ok; 231 232 ok = bpf_map_lookup_elem(&cpu_filter, &cpu); 233 if (!ok) 234 return 0; 235 } 236 237 if (has_task) { 238 __u8 *ok; 239 __u32 pid = bpf_get_current_pid_tgid(); 240 241 ok = bpf_map_lookup_elem(&task_filter, &pid); 242 if (!ok) 243 return 0; 244 } 245 246 if (has_type) { 247 __u8 *ok; 248 __u32 flags = (__u32)ctx[1]; 249 250 ok = bpf_map_lookup_elem(&type_filter, &flags); 251 if (!ok) 252 return 0; 253 } 254 255 if (has_addr) { 256 __u8 *ok; 257 __u64 addr = ctx[0]; 258 259 ok = bpf_map_lookup_elem(&addr_filter, &addr); 260 if (!ok && !has_slab) 261 return 0; 262 } 263 264 if (has_cgroup) { 265 __u8 *ok; 266 __u64 cgrp = get_current_cgroup_id(); 267 268 ok = bpf_map_lookup_elem(&cgroup_filter, &cgrp); 269 if (!ok) 270 return 0; 271 } 272 273 if (has_slab && bpf_get_kmem_cache) { 274 __u8 *ok; 275 __u64 addr = ctx[0]; 276 long kmem_cache_addr; 277 278 kmem_cache_addr = (long)bpf_get_kmem_cache(addr); 279 ok = bpf_map_lookup_elem(&slab_filter, &kmem_cache_addr); 280 if (!ok) 281 return 0; 282 } 283 284 return 1; 285 } 286 287 static inline int update_task_data(struct task_struct *task) 288 { 289 struct contention_task_data *p; 290 int pid, err; 291 292 err = bpf_core_read(&pid, sizeof(pid), &task->pid); 293 if (err) 294 return -1; 295 296 p = bpf_map_lookup_elem(&task_data, &pid); 297 if (p == NULL && !task_map_full) { 298 struct contention_task_data data = {}; 299 300 BPF_CORE_READ_STR_INTO(&data.comm, task, comm); 301 if (bpf_map_update_elem(&task_data, &pid, &data, BPF_NOEXIST) == -E2BIG) 302 task_map_full = 1; 303 } 304 305 return 0; 306 } 307 308 #ifndef __has_builtin 309 # define __has_builtin(x) 0 310 #endif 311 312 static inline struct task_struct *get_lock_owner(__u64 lock, __u32 flags) 313 { 314 struct task_struct *task; 315 __u64 owner = 0; 316 317 if (flags & LCB_F_MUTEX) { 318 struct mutex *mutex = (void *)lock; 319 owner = BPF_CORE_READ(mutex, owner.counter); 320 } else if (flags == LCB_F_READ || flags == LCB_F_WRITE) { 321 /* 322 * Support for the BPF_TYPE_MATCHES argument to the 323 * __builtin_preserve_type_info builtin was added at some point during 324 * development of clang 15 and it's what is needed for 325 * bpf_core_type_matches. 326 */ 327 #if __has_builtin(__builtin_preserve_type_info) && __clang_major__ >= 15 328 if (bpf_core_type_matches(struct rw_semaphore___old)) { 329 struct rw_semaphore___old *rwsem = (void *)lock; 330 owner = (unsigned long)BPF_CORE_READ(rwsem, owner); 331 } else if (bpf_core_type_matches(struct rw_semaphore___new)) { 332 struct rw_semaphore___new *rwsem = (void *)lock; 333 owner = BPF_CORE_READ(rwsem, owner.counter); 334 } 335 #else 336 /* assume new struct */ 337 struct rw_semaphore *rwsem = (void *)lock; 338 owner = BPF_CORE_READ(rwsem, owner.counter); 339 #endif 340 } 341 342 if (!owner) 343 return NULL; 344 345 task = (void *)(owner & ~7UL); 346 return task; 347 } 348 349 static inline __u32 check_lock_type(__u64 lock, __u32 flags) 350 { 351 struct task_struct *curr; 352 struct mm_struct___old *mm_old; 353 struct mm_struct___new *mm_new; 354 struct sighand_struct *sighand; 355 356 switch (flags) { 357 case LCB_F_READ: /* rwsem */ 358 case LCB_F_WRITE: 359 curr = bpf_get_current_task_btf(); 360 if (curr->mm == NULL) 361 break; 362 mm_new = (void *)curr->mm; 363 if (bpf_core_field_exists(mm_new->mmap_lock)) { 364 if (&mm_new->mmap_lock == (void *)lock) 365 return LCD_F_MMAP_LOCK; 366 break; 367 } 368 mm_old = (void *)curr->mm; 369 if (bpf_core_field_exists(mm_old->mmap_sem)) { 370 if (&mm_old->mmap_sem == (void *)lock) 371 return LCD_F_MMAP_LOCK; 372 } 373 break; 374 case LCB_F_SPIN: /* spinlock */ 375 curr = bpf_get_current_task_btf(); 376 sighand = curr->sighand; 377 378 if (sighand && &sighand->siglock == (void *)lock) 379 return LCD_F_SIGHAND_LOCK; 380 break; 381 default: 382 break; 383 } 384 return 0; 385 } 386 387 static inline struct tstamp_data *get_tstamp_elem(__u32 flags) 388 { 389 __u32 pid; 390 struct tstamp_data *pelem; 391 392 /* Use per-cpu array map for spinlock and rwlock */ 393 if ((flags & (LCB_F_SPIN | LCB_F_MUTEX)) == LCB_F_SPIN) { 394 __u32 idx = 0; 395 396 pelem = bpf_map_lookup_elem(&tstamp_cpu, &idx); 397 /* Do not update the element for nested locks */ 398 if (pelem && pelem->lock) 399 pelem = NULL; 400 return pelem; 401 } 402 403 pid = bpf_get_current_pid_tgid(); 404 pelem = bpf_map_lookup_elem(&tstamp, &pid); 405 /* Do not update the element for nested locks */ 406 if (pelem && pelem->lock) 407 return NULL; 408 409 if (pelem == NULL) { 410 struct tstamp_data zero = {}; 411 412 if (bpf_map_update_elem(&tstamp, &pid, &zero, BPF_NOEXIST) < 0) { 413 __sync_fetch_and_add(&task_fail, 1); 414 return NULL; 415 } 416 417 pelem = bpf_map_lookup_elem(&tstamp, &pid); 418 if (pelem == NULL) { 419 __sync_fetch_and_add(&task_fail, 1); 420 return NULL; 421 } 422 } 423 return pelem; 424 } 425 426 static inline s32 get_owner_stack_id(u64 *stacktrace) 427 { 428 s32 *id, new_id; 429 static s64 id_gen = 1; 430 431 id = bpf_map_lookup_elem(&owner_stacks, stacktrace); 432 if (id) 433 return *id; 434 435 new_id = (s32)__sync_fetch_and_add(&id_gen, 1); 436 437 bpf_map_update_elem(&owner_stacks, stacktrace, &new_id, BPF_NOEXIST); 438 439 id = bpf_map_lookup_elem(&owner_stacks, stacktrace); 440 if (id) 441 return *id; 442 443 return -1; 444 } 445 446 static inline void update_contention_data(struct contention_data *data, u64 duration, u32 count) 447 { 448 __sync_fetch_and_add(&data->total_time, duration); 449 __sync_fetch_and_add(&data->count, count); 450 451 /* FIXME: need atomic operations */ 452 if (data->max_time < duration) 453 data->max_time = duration; 454 if (data->min_time > duration) 455 data->min_time = duration; 456 } 457 458 static inline void update_owner_stat(u32 id, u64 duration, u32 flags) 459 { 460 struct contention_key key = { 461 .stack_id = id, 462 .pid = 0, 463 .lock_addr_or_cgroup = 0, 464 }; 465 struct contention_data *data = bpf_map_lookup_elem(&owner_stat, &key); 466 467 if (!data) { 468 struct contention_data first = { 469 .total_time = duration, 470 .max_time = duration, 471 .min_time = duration, 472 .count = 1, 473 .flags = flags, 474 }; 475 bpf_map_update_elem(&owner_stat, &key, &first, BPF_NOEXIST); 476 } else { 477 update_contention_data(data, duration, 1); 478 } 479 } 480 481 SEC("tp_btf/contention_begin") 482 int contention_begin(u64 *ctx) 483 { 484 struct tstamp_data *pelem; 485 486 if (!enabled || !can_record(ctx)) 487 return 0; 488 489 pelem = get_tstamp_elem(ctx[1]); 490 if (pelem == NULL) 491 return 0; 492 493 pelem->timestamp = bpf_ktime_get_ns(); 494 pelem->lock = (__u64)ctx[0]; 495 pelem->flags = (__u32)ctx[1]; 496 497 if (needs_callstack) { 498 u32 i = 0; 499 u32 id = 0; 500 int owner_pid; 501 u64 *buf; 502 struct task_struct *task; 503 struct owner_tracing_data *otdata; 504 505 if (!lock_owner) 506 goto skip_owner; 507 508 task = get_lock_owner(pelem->lock, pelem->flags); 509 if (!task) 510 goto skip_owner; 511 512 owner_pid = BPF_CORE_READ(task, pid); 513 514 buf = bpf_map_lookup_elem(&stack_buf, &i); 515 if (!buf) 516 goto skip_owner; 517 for (i = 0; i < max_stack; i++) 518 buf[i] = 0x0; 519 520 if (!bpf_task_from_pid) 521 goto skip_owner; 522 523 task = bpf_task_from_pid(owner_pid); 524 if (!task) 525 goto skip_owner; 526 527 bpf_get_task_stack(task, buf, max_stack * sizeof(unsigned long), 0); 528 bpf_task_release(task); 529 530 otdata = bpf_map_lookup_elem(&owner_data, &pelem->lock); 531 id = get_owner_stack_id(buf); 532 533 /* 534 * Contention just happens, or corner case `lock` is owned by process not 535 * `owner_pid`. For the corner case we treat it as unexpected internal error and 536 * just ignore the precvious tracing record. 537 */ 538 if (!otdata || otdata->pid != owner_pid) { 539 struct owner_tracing_data first = { 540 .pid = owner_pid, 541 .timestamp = pelem->timestamp, 542 .count = 1, 543 .stack_id = id, 544 }; 545 bpf_map_update_elem(&owner_data, &pelem->lock, &first, BPF_ANY); 546 } 547 /* Contention is ongoing and new waiter joins */ 548 else { 549 __sync_fetch_and_add(&otdata->count, 1); 550 551 /* 552 * The owner is the same, but stacktrace might be changed. In this case we 553 * store/update `owner_stat` based on current owner stack id. 554 */ 555 if (id != otdata->stack_id) { 556 update_owner_stat(id, pelem->timestamp - otdata->timestamp, 557 pelem->flags); 558 559 otdata->timestamp = pelem->timestamp; 560 otdata->stack_id = id; 561 } 562 } 563 skip_owner: 564 pelem->stack_id = bpf_get_stackid(ctx, &stacks, 565 BPF_F_FAST_STACK_CMP | stack_skip); 566 if (pelem->stack_id < 0) 567 __sync_fetch_and_add(&stack_fail, 1); 568 } else if (aggr_mode == LOCK_AGGR_TASK) { 569 struct task_struct *task; 570 571 if (lock_owner) { 572 task = get_lock_owner(pelem->lock, pelem->flags); 573 574 /* The flags is not used anymore. Pass the owner pid. */ 575 if (task) 576 pelem->flags = BPF_CORE_READ(task, pid); 577 else 578 pelem->flags = -1U; 579 580 } else { 581 task = bpf_get_current_task_btf(); 582 } 583 584 if (task) { 585 if (update_task_data(task) < 0 && lock_owner) 586 pelem->flags = -1U; 587 } 588 } 589 590 return 0; 591 } 592 593 SEC("tp_btf/contention_end") 594 int contention_end(u64 *ctx) 595 { 596 __u32 pid = 0, idx = 0; 597 struct tstamp_data *pelem; 598 struct contention_key key = {}; 599 struct contention_data *data; 600 __u64 timestamp; 601 __u64 duration; 602 bool need_delete = false; 603 604 if (!enabled) 605 return 0; 606 607 /* 608 * For spinlock and rwlock, it needs to get the timestamp for the 609 * per-cpu map. However, contention_end does not have the flags 610 * so it cannot know whether it reads percpu or hash map. 611 * 612 * Try per-cpu map first and check if there's active contention. 613 * If it is, do not read hash map because it cannot go to sleeping 614 * locks before releasing the spinning locks. 615 */ 616 pelem = bpf_map_lookup_elem(&tstamp_cpu, &idx); 617 if (pelem && pelem->lock) { 618 if (pelem->lock != ctx[0]) 619 return 0; 620 } else { 621 pid = bpf_get_current_pid_tgid(); 622 pelem = bpf_map_lookup_elem(&tstamp, &pid); 623 if (!pelem || pelem->lock != ctx[0]) 624 return 0; 625 need_delete = true; 626 } 627 628 timestamp = bpf_ktime_get_ns(); 629 duration = timestamp - pelem->timestamp; 630 if ((__s64)duration < 0) { 631 __sync_fetch_and_add(&time_fail, 1); 632 goto out; 633 } 634 635 if (needs_callstack && lock_owner) { 636 struct owner_tracing_data *otdata = bpf_map_lookup_elem(&owner_data, &pelem->lock); 637 638 if (!otdata) 639 goto skip_owner; 640 641 /* Update `owner_stat` */ 642 update_owner_stat(otdata->stack_id, timestamp - otdata->timestamp, pelem->flags); 643 644 /* No contention is occurring, delete `lock` entry in `owner_data` */ 645 if (otdata->count <= 1) 646 bpf_map_delete_elem(&owner_data, &pelem->lock); 647 /* 648 * Contention is still ongoing, with a new owner (current task). `owner_data` 649 * should be updated accordingly. 650 */ 651 else { 652 u32 i = 0; 653 s32 ret = (s32)ctx[1]; 654 u64 *buf; 655 656 otdata->timestamp = timestamp; 657 __sync_fetch_and_add(&otdata->count, -1); 658 659 buf = bpf_map_lookup_elem(&stack_buf, &i); 660 if (!buf) 661 goto skip_owner; 662 for (i = 0; i < (u32)max_stack; i++) 663 buf[i] = 0x0; 664 665 /* 666 * `ret` has the return code of the lock function. 667 * If `ret` is negative, the current task terminates lock waiting without 668 * acquiring it. Owner is not changed, but we still need to update the owner 669 * stack. 670 */ 671 if (ret < 0) { 672 s32 id = 0; 673 struct task_struct *task; 674 675 if (!bpf_task_from_pid) 676 goto skip_owner; 677 678 task = bpf_task_from_pid(otdata->pid); 679 if (!task) 680 goto skip_owner; 681 682 bpf_get_task_stack(task, buf, 683 max_stack * sizeof(unsigned long), 0); 684 bpf_task_release(task); 685 686 id = get_owner_stack_id(buf); 687 688 /* 689 * If owner stack is changed, update owner stack id for this lock. 690 */ 691 if (id != otdata->stack_id) 692 otdata->stack_id = id; 693 } 694 /* 695 * Otherwise, update tracing data with the current task, which is the new 696 * owner. 697 */ 698 else { 699 otdata->pid = pid; 700 /* 701 * We don't want to retrieve callstack here, since it is where the 702 * current task acquires the lock and provides no additional 703 * information. We simply assign -1 to invalidate it. 704 */ 705 otdata->stack_id = -1; 706 } 707 } 708 } 709 skip_owner: 710 switch (aggr_mode) { 711 case LOCK_AGGR_CALLER: 712 key.stack_id = pelem->stack_id; 713 break; 714 case LOCK_AGGR_TASK: 715 if (lock_owner) 716 key.pid = pelem->flags; 717 else { 718 if (!need_delete) 719 pid = bpf_get_current_pid_tgid(); 720 key.pid = pid; 721 } 722 if (needs_callstack) 723 key.stack_id = pelem->stack_id; 724 break; 725 case LOCK_AGGR_ADDR: 726 key.lock_addr_or_cgroup = pelem->lock; 727 if (needs_callstack) 728 key.stack_id = pelem->stack_id; 729 break; 730 case LOCK_AGGR_CGROUP: 731 key.lock_addr_or_cgroup = get_current_cgroup_id(); 732 break; 733 default: 734 /* should not happen */ 735 return 0; 736 } 737 738 data = bpf_map_lookup_elem(&lock_stat, &key); 739 if (!data) { 740 if (data_map_full) { 741 __sync_fetch_and_add(&data_fail, 1); 742 goto out; 743 } 744 745 struct contention_data first = { 746 .total_time = duration, 747 .max_time = duration, 748 .min_time = duration, 749 .count = 1, 750 .flags = pelem->flags, 751 }; 752 int err; 753 754 if (aggr_mode == LOCK_AGGR_ADDR) { 755 first.flags |= check_lock_type(pelem->lock, 756 pelem->flags & LCB_F_TYPE_MASK); 757 758 /* Check if it's from a slab object */ 759 if (bpf_get_kmem_cache) { 760 struct kmem_cache *s; 761 struct slab_cache_data *d; 762 763 s = bpf_get_kmem_cache(pelem->lock); 764 if (s != NULL) { 765 /* 766 * Save the ID of the slab cache in the flags 767 * (instead of full address) to reduce the 768 * space in the contention_data. 769 */ 770 d = bpf_map_lookup_elem(&slab_caches, &s); 771 if (d != NULL) 772 first.flags |= d->id; 773 } 774 } 775 } 776 777 err = bpf_map_update_elem(&lock_stat, &key, &first, BPF_NOEXIST); 778 if (err < 0) { 779 if (err == -EEXIST) { 780 /* it lost the race, try to get it again */ 781 data = bpf_map_lookup_elem(&lock_stat, &key); 782 if (data != NULL) 783 goto found; 784 } 785 if (err == -E2BIG) 786 data_map_full = 1; 787 __sync_fetch_and_add(&data_fail, 1); 788 } 789 goto out; 790 } 791 792 found: 793 update_contention_data(data, duration, 1); 794 795 out: 796 pelem->lock = 0; 797 if (need_delete) 798 bpf_map_delete_elem(&tstamp, &pid); 799 return 0; 800 } 801 802 extern struct rq runqueues __ksym; 803 804 struct rq___old { 805 raw_spinlock_t lock; 806 } __attribute__((preserve_access_index)); 807 808 struct rq___new { 809 raw_spinlock_t __lock; 810 } __attribute__((preserve_access_index)); 811 812 SEC("raw_tp/bpf_test_finish") 813 int BPF_PROG(collect_lock_syms) 814 { 815 __u64 lock_addr, lock_off; 816 __u32 lock_flag; 817 818 if (bpf_core_field_exists(struct rq___new, __lock)) 819 lock_off = offsetof(struct rq___new, __lock); 820 else 821 lock_off = offsetof(struct rq___old, lock); 822 823 for (int i = 0; i < MAX_CPUS; i++) { 824 struct rq *rq = bpf_per_cpu_ptr(&runqueues, i); 825 826 if (rq == NULL) 827 break; 828 829 lock_addr = (__u64)(void *)rq + lock_off; 830 lock_flag = LOCK_CLASS_RQLOCK; 831 bpf_map_update_elem(&lock_syms, &lock_addr, &lock_flag, BPF_ANY); 832 } 833 return 0; 834 } 835 836 SEC("raw_tp/bpf_test_finish") 837 int BPF_PROG(end_timestamp) 838 { 839 end_ts = bpf_ktime_get_ns(); 840 return 0; 841 } 842 843 /* 844 * bpf_iter__kmem_cache added recently so old kernels don't have it in the 845 * vmlinux.h. But we cannot add it here since it will cause a compiler error 846 * due to redefinition of the struct on later kernels. 847 * 848 * So it uses a CO-RE trick to access the member only if it has the type. 849 * This will support both old and new kernels without compiler errors. 850 */ 851 struct bpf_iter__kmem_cache___new { 852 struct kmem_cache *s; 853 } __attribute__((preserve_access_index)); 854 855 SEC("iter/kmem_cache") 856 int slab_cache_iter(void *ctx) 857 { 858 struct kmem_cache *s = NULL; 859 struct slab_cache_data d; 860 const char *nameptr; 861 862 if (bpf_core_type_exists(struct bpf_iter__kmem_cache)) { 863 struct bpf_iter__kmem_cache___new *iter = ctx; 864 865 s = iter->s; 866 } 867 868 if (s == NULL) 869 return 0; 870 871 nameptr = s->name; 872 bpf_probe_read_kernel_str(d.name, sizeof(d.name), nameptr); 873 874 d.id = ++slab_cache_id << LCB_F_SLAB_ID_SHIFT; 875 if (d.id >= LCB_F_SLAB_ID_END) 876 return 0; 877 878 bpf_map_update_elem(&slab_caches, &s, &d, BPF_NOEXIST); 879 return 0; 880 } 881 882 char LICENSE[] SEC("license") = "Dual BSD/GPL"; 883