1 // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) 2 // Copyright (c) 2022 Google 3 #include "vmlinux.h" 4 #include <bpf/bpf_helpers.h> 5 #include <bpf/bpf_tracing.h> 6 #include <bpf/bpf_core_read.h> 7 8 #include "lock_data.h" 9 10 /* default buffer size */ 11 #define MAX_ENTRIES 10240 12 13 /* for collect_lock_syms(). 4096 was rejected by the verifier */ 14 #define MAX_CPUS 1024 15 16 /* lock contention flags from include/trace/events/lock.h */ 17 #define LCB_F_SPIN (1U << 0) 18 #define LCB_F_READ (1U << 1) 19 #define LCB_F_WRITE (1U << 2) 20 #define LCB_F_RT (1U << 3) 21 #define LCB_F_PERCPU (1U << 4) 22 #define LCB_F_MUTEX (1U << 5) 23 24 struct tstamp_data { 25 __u64 timestamp; 26 __u64 lock; 27 __u32 flags; 28 __s32 stack_id; 29 }; 30 31 /* callstack storage */ 32 struct { 33 __uint(type, BPF_MAP_TYPE_STACK_TRACE); 34 __uint(key_size, sizeof(__u32)); 35 __uint(value_size, sizeof(__u64)); 36 __uint(max_entries, MAX_ENTRIES); 37 } stacks SEC(".maps"); 38 39 /* maintain timestamp at the beginning of contention */ 40 struct { 41 __uint(type, BPF_MAP_TYPE_HASH); 42 __type(key, int); 43 __type(value, struct tstamp_data); 44 __uint(max_entries, MAX_ENTRIES); 45 } tstamp SEC(".maps"); 46 47 /* actual lock contention statistics */ 48 struct { 49 __uint(type, BPF_MAP_TYPE_HASH); 50 __uint(key_size, sizeof(struct contention_key)); 51 __uint(value_size, sizeof(struct contention_data)); 52 __uint(max_entries, MAX_ENTRIES); 53 } lock_stat SEC(".maps"); 54 55 struct { 56 __uint(type, BPF_MAP_TYPE_HASH); 57 __uint(key_size, sizeof(__u32)); 58 __uint(value_size, sizeof(struct contention_task_data)); 59 __uint(max_entries, MAX_ENTRIES); 60 } task_data SEC(".maps"); 61 62 struct { 63 __uint(type, BPF_MAP_TYPE_HASH); 64 __uint(key_size, sizeof(__u64)); 65 __uint(value_size, sizeof(__u32)); 66 __uint(max_entries, 16384); 67 } lock_syms SEC(".maps"); 68 69 struct { 70 __uint(type, BPF_MAP_TYPE_HASH); 71 __uint(key_size, sizeof(__u32)); 72 __uint(value_size, sizeof(__u8)); 73 __uint(max_entries, 1); 74 } cpu_filter SEC(".maps"); 75 76 struct { 77 __uint(type, BPF_MAP_TYPE_HASH); 78 __uint(key_size, sizeof(__u32)); 79 __uint(value_size, sizeof(__u8)); 80 __uint(max_entries, 1); 81 } task_filter SEC(".maps"); 82 83 struct { 84 __uint(type, BPF_MAP_TYPE_HASH); 85 __uint(key_size, sizeof(__u32)); 86 __uint(value_size, sizeof(__u8)); 87 __uint(max_entries, 1); 88 } type_filter SEC(".maps"); 89 90 struct { 91 __uint(type, BPF_MAP_TYPE_HASH); 92 __uint(key_size, sizeof(__u64)); 93 __uint(value_size, sizeof(__u8)); 94 __uint(max_entries, 1); 95 } addr_filter SEC(".maps"); 96 97 struct rw_semaphore___old { 98 struct task_struct *owner; 99 } __attribute__((preserve_access_index)); 100 101 struct rw_semaphore___new { 102 atomic_long_t owner; 103 } __attribute__((preserve_access_index)); 104 105 struct mm_struct___old { 106 struct rw_semaphore mmap_sem; 107 } __attribute__((preserve_access_index)); 108 109 struct mm_struct___new { 110 struct rw_semaphore mmap_lock; 111 } __attribute__((preserve_access_index)); 112 113 /* control flags */ 114 int enabled; 115 int has_cpu; 116 int has_task; 117 int has_type; 118 int has_addr; 119 int needs_callstack; 120 int stack_skip; 121 int lock_owner; 122 123 /* determine the key of lock stat */ 124 int aggr_mode; 125 126 /* error stat */ 127 int task_fail; 128 int stack_fail; 129 int time_fail; 130 131 static inline int can_record(u64 *ctx) 132 { 133 if (has_cpu) { 134 __u32 cpu = bpf_get_smp_processor_id(); 135 __u8 *ok; 136 137 ok = bpf_map_lookup_elem(&cpu_filter, &cpu); 138 if (!ok) 139 return 0; 140 } 141 142 if (has_task) { 143 __u8 *ok; 144 __u32 pid = bpf_get_current_pid_tgid(); 145 146 ok = bpf_map_lookup_elem(&task_filter, &pid); 147 if (!ok) 148 return 0; 149 } 150 151 if (has_type) { 152 __u8 *ok; 153 __u32 flags = (__u32)ctx[1]; 154 155 ok = bpf_map_lookup_elem(&type_filter, &flags); 156 if (!ok) 157 return 0; 158 } 159 160 if (has_addr) { 161 __u8 *ok; 162 __u64 addr = ctx[0]; 163 164 ok = bpf_map_lookup_elem(&addr_filter, &addr); 165 if (!ok) 166 return 0; 167 } 168 169 return 1; 170 } 171 172 static inline int update_task_data(struct task_struct *task) 173 { 174 struct contention_task_data *p; 175 int pid, err; 176 177 err = bpf_core_read(&pid, sizeof(pid), &task->pid); 178 if (err) 179 return -1; 180 181 p = bpf_map_lookup_elem(&task_data, &pid); 182 if (p == NULL) { 183 struct contention_task_data data = {}; 184 185 BPF_CORE_READ_STR_INTO(&data.comm, task, comm); 186 bpf_map_update_elem(&task_data, &pid, &data, BPF_NOEXIST); 187 } 188 189 return 0; 190 } 191 192 #ifndef __has_builtin 193 # define __has_builtin(x) 0 194 #endif 195 196 static inline struct task_struct *get_lock_owner(__u64 lock, __u32 flags) 197 { 198 struct task_struct *task; 199 __u64 owner = 0; 200 201 if (flags & LCB_F_MUTEX) { 202 struct mutex *mutex = (void *)lock; 203 owner = BPF_CORE_READ(mutex, owner.counter); 204 } else if (flags == LCB_F_READ || flags == LCB_F_WRITE) { 205 /* 206 * Support for the BPF_TYPE_MATCHES argument to the 207 * __builtin_preserve_type_info builtin was added at some point during 208 * development of clang 15 and it's what is needed for 209 * bpf_core_type_matches. 210 */ 211 #if __has_builtin(__builtin_preserve_type_info) && __clang_major__ >= 15 212 if (bpf_core_type_matches(struct rw_semaphore___old)) { 213 struct rw_semaphore___old *rwsem = (void *)lock; 214 owner = (unsigned long)BPF_CORE_READ(rwsem, owner); 215 } else if (bpf_core_type_matches(struct rw_semaphore___new)) { 216 struct rw_semaphore___new *rwsem = (void *)lock; 217 owner = BPF_CORE_READ(rwsem, owner.counter); 218 } 219 #else 220 /* assume new struct */ 221 struct rw_semaphore *rwsem = (void *)lock; 222 owner = BPF_CORE_READ(rwsem, owner.counter); 223 #endif 224 } 225 226 if (!owner) 227 return NULL; 228 229 task = (void *)(owner & ~7UL); 230 return task; 231 } 232 233 static inline __u32 check_lock_type(__u64 lock, __u32 flags) 234 { 235 struct task_struct *curr; 236 struct mm_struct___old *mm_old; 237 struct mm_struct___new *mm_new; 238 239 switch (flags) { 240 case LCB_F_READ: /* rwsem */ 241 case LCB_F_WRITE: 242 curr = bpf_get_current_task_btf(); 243 if (curr->mm == NULL) 244 break; 245 mm_new = (void *)curr->mm; 246 if (bpf_core_field_exists(mm_new->mmap_lock)) { 247 if (&mm_new->mmap_lock == (void *)lock) 248 return LCD_F_MMAP_LOCK; 249 break; 250 } 251 mm_old = (void *)curr->mm; 252 if (bpf_core_field_exists(mm_old->mmap_sem)) { 253 if (&mm_old->mmap_sem == (void *)lock) 254 return LCD_F_MMAP_LOCK; 255 } 256 break; 257 case LCB_F_SPIN: /* spinlock */ 258 curr = bpf_get_current_task_btf(); 259 if (&curr->sighand->siglock == (void *)lock) 260 return LCD_F_SIGHAND_LOCK; 261 break; 262 default: 263 break; 264 } 265 return 0; 266 } 267 268 SEC("tp_btf/contention_begin") 269 int contention_begin(u64 *ctx) 270 { 271 __u32 pid; 272 struct tstamp_data *pelem; 273 274 if (!enabled || !can_record(ctx)) 275 return 0; 276 277 pid = bpf_get_current_pid_tgid(); 278 pelem = bpf_map_lookup_elem(&tstamp, &pid); 279 if (pelem && pelem->lock) 280 return 0; 281 282 if (pelem == NULL) { 283 struct tstamp_data zero = {}; 284 285 bpf_map_update_elem(&tstamp, &pid, &zero, BPF_ANY); 286 pelem = bpf_map_lookup_elem(&tstamp, &pid); 287 if (pelem == NULL) { 288 __sync_fetch_and_add(&task_fail, 1); 289 return 0; 290 } 291 } 292 293 pelem->timestamp = bpf_ktime_get_ns(); 294 pelem->lock = (__u64)ctx[0]; 295 pelem->flags = (__u32)ctx[1]; 296 297 if (needs_callstack) { 298 pelem->stack_id = bpf_get_stackid(ctx, &stacks, 299 BPF_F_FAST_STACK_CMP | stack_skip); 300 if (pelem->stack_id < 0) 301 __sync_fetch_and_add(&stack_fail, 1); 302 } else if (aggr_mode == LOCK_AGGR_TASK) { 303 struct task_struct *task; 304 305 if (lock_owner) { 306 task = get_lock_owner(pelem->lock, pelem->flags); 307 308 /* The flags is not used anymore. Pass the owner pid. */ 309 if (task) 310 pelem->flags = BPF_CORE_READ(task, pid); 311 else 312 pelem->flags = -1U; 313 314 } else { 315 task = bpf_get_current_task_btf(); 316 } 317 318 if (task) { 319 if (update_task_data(task) < 0 && lock_owner) 320 pelem->flags = -1U; 321 } 322 } 323 324 return 0; 325 } 326 327 SEC("tp_btf/contention_end") 328 int contention_end(u64 *ctx) 329 { 330 __u32 pid; 331 struct tstamp_data *pelem; 332 struct contention_key key = {}; 333 struct contention_data *data; 334 __u64 duration; 335 336 if (!enabled) 337 return 0; 338 339 pid = bpf_get_current_pid_tgid(); 340 pelem = bpf_map_lookup_elem(&tstamp, &pid); 341 if (!pelem || pelem->lock != ctx[0]) 342 return 0; 343 344 duration = bpf_ktime_get_ns() - pelem->timestamp; 345 if ((__s64)duration < 0) { 346 bpf_map_delete_elem(&tstamp, &pid); 347 __sync_fetch_and_add(&time_fail, 1); 348 return 0; 349 } 350 351 switch (aggr_mode) { 352 case LOCK_AGGR_CALLER: 353 key.stack_id = pelem->stack_id; 354 break; 355 case LOCK_AGGR_TASK: 356 if (lock_owner) 357 key.pid = pelem->flags; 358 else 359 key.pid = pid; 360 if (needs_callstack) 361 key.stack_id = pelem->stack_id; 362 break; 363 case LOCK_AGGR_ADDR: 364 key.lock_addr = pelem->lock; 365 if (needs_callstack) 366 key.stack_id = pelem->stack_id; 367 break; 368 default: 369 /* should not happen */ 370 return 0; 371 } 372 373 data = bpf_map_lookup_elem(&lock_stat, &key); 374 if (!data) { 375 struct contention_data first = { 376 .total_time = duration, 377 .max_time = duration, 378 .min_time = duration, 379 .count = 1, 380 .flags = pelem->flags, 381 }; 382 383 if (aggr_mode == LOCK_AGGR_ADDR) 384 first.flags |= check_lock_type(pelem->lock, pelem->flags); 385 386 bpf_map_update_elem(&lock_stat, &key, &first, BPF_NOEXIST); 387 bpf_map_delete_elem(&tstamp, &pid); 388 return 0; 389 } 390 391 __sync_fetch_and_add(&data->total_time, duration); 392 __sync_fetch_and_add(&data->count, 1); 393 394 /* FIXME: need atomic operations */ 395 if (data->max_time < duration) 396 data->max_time = duration; 397 if (data->min_time > duration) 398 data->min_time = duration; 399 400 bpf_map_delete_elem(&tstamp, &pid); 401 return 0; 402 } 403 404 extern struct rq runqueues __ksym; 405 406 SEC("raw_tp/bpf_test_finish") 407 int BPF_PROG(collect_lock_syms) 408 { 409 __u64 lock_addr; 410 __u32 lock_flag; 411 412 for (int i = 0; i < MAX_CPUS; i++) { 413 struct rq *rq = bpf_per_cpu_ptr(&runqueues, i); 414 415 if (rq == NULL) 416 break; 417 418 lock_addr = (__u64)&rq->__lock; 419 lock_flag = LOCK_CLASS_RQLOCK; 420 bpf_map_update_elem(&lock_syms, &lock_addr, &lock_flag, BPF_ANY); 421 } 422 return 0; 423 } 424 425 char LICENSE[] SEC("license") = "Dual BSD/GPL"; 426