1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright (c) 2016 Facebook 3 */ 4 #include <linux/bpf.h> 5 #include <linux/jhash.h> 6 #include <linux/filter.h> 7 #include <linux/kernel.h> 8 #include <linux/stacktrace.h> 9 #include <linux/perf_event.h> 10 #include <linux/btf_ids.h> 11 #include <linux/buildid.h> 12 #include "percpu_freelist.h" 13 #include "mmap_unlock_work.h" 14 15 #define STACK_CREATE_FLAG_MASK \ 16 (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY | \ 17 BPF_F_STACK_BUILD_ID) 18 19 struct stack_map_bucket { 20 struct pcpu_freelist_node fnode; 21 u32 hash; 22 u32 nr; 23 u64 data[]; 24 }; 25 26 struct bpf_stack_map { 27 struct bpf_map map; 28 void *elems; 29 struct pcpu_freelist freelist; 30 u32 n_buckets; 31 struct stack_map_bucket *buckets[] __counted_by(n_buckets); 32 }; 33 34 static inline bool stack_map_use_build_id(struct bpf_map *map) 35 { 36 return (map->map_flags & BPF_F_STACK_BUILD_ID); 37 } 38 39 static inline int stack_map_data_size(struct bpf_map *map) 40 { 41 return stack_map_use_build_id(map) ? 42 sizeof(struct bpf_stack_build_id) : sizeof(u64); 43 } 44 45 static int prealloc_elems_and_freelist(struct bpf_stack_map *smap) 46 { 47 u64 elem_size = sizeof(struct stack_map_bucket) + 48 (u64)smap->map.value_size; 49 int err; 50 51 smap->elems = bpf_map_area_alloc(elem_size * smap->map.max_entries, 52 smap->map.numa_node); 53 if (!smap->elems) 54 return -ENOMEM; 55 56 err = pcpu_freelist_init(&smap->freelist); 57 if (err) 58 goto free_elems; 59 60 pcpu_freelist_populate(&smap->freelist, smap->elems, elem_size, 61 smap->map.max_entries); 62 return 0; 63 64 free_elems: 65 bpf_map_area_free(smap->elems); 66 return err; 67 } 68 69 /* Called from syscall */ 70 static struct bpf_map *stack_map_alloc(union bpf_attr *attr) 71 { 72 u32 value_size = attr->value_size; 73 struct bpf_stack_map *smap; 74 u64 cost, n_buckets; 75 int err; 76 77 if (attr->map_flags & ~STACK_CREATE_FLAG_MASK) 78 return ERR_PTR(-EINVAL); 79 80 /* check sanity of attributes */ 81 if (attr->max_entries == 0 || attr->key_size != 4 || 82 value_size < 8 || value_size % 8) 83 return ERR_PTR(-EINVAL); 84 85 BUILD_BUG_ON(sizeof(struct bpf_stack_build_id) % sizeof(u64)); 86 if (attr->map_flags & BPF_F_STACK_BUILD_ID) { 87 if (value_size % sizeof(struct bpf_stack_build_id) || 88 value_size / sizeof(struct bpf_stack_build_id) 89 > sysctl_perf_event_max_stack) 90 return ERR_PTR(-EINVAL); 91 } else if (value_size / 8 > sysctl_perf_event_max_stack) 92 return ERR_PTR(-EINVAL); 93 94 /* hash table size must be power of 2; roundup_pow_of_two() can overflow 95 * into UB on 32-bit arches, so check that first 96 */ 97 if (attr->max_entries > 1UL << 31) 98 return ERR_PTR(-E2BIG); 99 100 n_buckets = roundup_pow_of_two(attr->max_entries); 101 102 cost = n_buckets * sizeof(struct stack_map_bucket *) + sizeof(*smap); 103 smap = bpf_map_area_alloc(cost, bpf_map_attr_numa_node(attr)); 104 if (!smap) 105 return ERR_PTR(-ENOMEM); 106 107 bpf_map_init_from_attr(&smap->map, attr); 108 smap->n_buckets = n_buckets; 109 110 err = get_callchain_buffers(sysctl_perf_event_max_stack); 111 if (err) 112 goto free_smap; 113 114 err = prealloc_elems_and_freelist(smap); 115 if (err) 116 goto put_buffers; 117 118 return &smap->map; 119 120 put_buffers: 121 put_callchain_buffers(); 122 free_smap: 123 bpf_map_area_free(smap); 124 return ERR_PTR(err); 125 } 126 127 static int fetch_build_id(struct vm_area_struct *vma, unsigned char *build_id, bool may_fault) 128 { 129 return may_fault ? build_id_parse(vma, build_id, NULL) 130 : build_id_parse_nofault(vma, build_id, NULL); 131 } 132 133 /* 134 * Expects all id_offs[i].ip values to be set to correct initial IPs. 135 * They will be subsequently: 136 * - either adjusted in place to a file offset, if build ID fetching 137 * succeeds; in this case id_offs[i].build_id is set to correct build ID, 138 * and id_offs[i].status is set to BPF_STACK_BUILD_ID_VALID; 139 * - or IP will be kept intact, if build ID fetching failed; in this case 140 * id_offs[i].build_id is zeroed out and id_offs[i].status is set to 141 * BPF_STACK_BUILD_ID_IP. 142 */ 143 static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs, 144 u32 trace_nr, bool user, bool may_fault) 145 { 146 int i; 147 struct mmap_unlock_irq_work *work = NULL; 148 bool irq_work_busy = bpf_mmap_unlock_get_irq_work(&work); 149 struct vm_area_struct *vma, *prev_vma = NULL; 150 const char *prev_build_id; 151 152 /* If the irq_work is in use, fall back to report ips. Same 153 * fallback is used for kernel stack (!user) on a stackmap with 154 * build_id. 155 */ 156 if (!user || !current || !current->mm || irq_work_busy || 157 !mmap_read_trylock(current->mm)) { 158 /* cannot access current->mm, fall back to ips */ 159 for (i = 0; i < trace_nr; i++) { 160 id_offs[i].status = BPF_STACK_BUILD_ID_IP; 161 memset(id_offs[i].build_id, 0, BUILD_ID_SIZE_MAX); 162 } 163 return; 164 } 165 166 for (i = 0; i < trace_nr; i++) { 167 u64 ip = READ_ONCE(id_offs[i].ip); 168 169 if (range_in_vma(prev_vma, ip, ip)) { 170 vma = prev_vma; 171 memcpy(id_offs[i].build_id, prev_build_id, BUILD_ID_SIZE_MAX); 172 goto build_id_valid; 173 } 174 vma = find_vma(current->mm, ip); 175 if (!vma || fetch_build_id(vma, id_offs[i].build_id, may_fault)) { 176 /* per entry fall back to ips */ 177 id_offs[i].status = BPF_STACK_BUILD_ID_IP; 178 memset(id_offs[i].build_id, 0, BUILD_ID_SIZE_MAX); 179 continue; 180 } 181 build_id_valid: 182 id_offs[i].offset = (vma->vm_pgoff << PAGE_SHIFT) + ip - vma->vm_start; 183 id_offs[i].status = BPF_STACK_BUILD_ID_VALID; 184 prev_vma = vma; 185 prev_build_id = id_offs[i].build_id; 186 } 187 bpf_mmap_unlock_mm(work, current->mm); 188 } 189 190 static struct perf_callchain_entry * 191 get_callchain_entry_for_task(struct task_struct *task, u32 max_depth) 192 { 193 #ifdef CONFIG_STACKTRACE 194 struct perf_callchain_entry *entry; 195 int rctx; 196 197 entry = get_callchain_entry(&rctx); 198 199 if (!entry) 200 return NULL; 201 202 entry->nr = stack_trace_save_tsk(task, (unsigned long *)entry->ip, 203 max_depth, 0); 204 205 /* stack_trace_save_tsk() works on unsigned long array, while 206 * perf_callchain_entry uses u64 array. For 32-bit systems, it is 207 * necessary to fix this mismatch. 208 */ 209 if (__BITS_PER_LONG != 64) { 210 unsigned long *from = (unsigned long *) entry->ip; 211 u64 *to = entry->ip; 212 int i; 213 214 /* copy data from the end to avoid using extra buffer */ 215 for (i = entry->nr - 1; i >= 0; i--) 216 to[i] = (u64)(from[i]); 217 } 218 219 put_callchain_entry(rctx); 220 221 return entry; 222 #else /* CONFIG_STACKTRACE */ 223 return NULL; 224 #endif 225 } 226 227 static long __bpf_get_stackid(struct bpf_map *map, 228 struct perf_callchain_entry *trace, u64 flags) 229 { 230 struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map); 231 struct stack_map_bucket *bucket, *new_bucket, *old_bucket; 232 u32 skip = flags & BPF_F_SKIP_FIELD_MASK; 233 u32 hash, id, trace_nr, trace_len, i; 234 bool user = flags & BPF_F_USER_STACK; 235 u64 *ips; 236 bool hash_matches; 237 238 if (trace->nr <= skip) 239 /* skipping more than usable stack trace */ 240 return -EFAULT; 241 242 trace_nr = trace->nr - skip; 243 trace_len = trace_nr * sizeof(u64); 244 ips = trace->ip + skip; 245 hash = jhash2((u32 *)ips, trace_len / sizeof(u32), 0); 246 id = hash & (smap->n_buckets - 1); 247 bucket = READ_ONCE(smap->buckets[id]); 248 249 hash_matches = bucket && bucket->hash == hash; 250 /* fast cmp */ 251 if (hash_matches && flags & BPF_F_FAST_STACK_CMP) 252 return id; 253 254 if (stack_map_use_build_id(map)) { 255 struct bpf_stack_build_id *id_offs; 256 257 /* for build_id+offset, pop a bucket before slow cmp */ 258 new_bucket = (struct stack_map_bucket *) 259 pcpu_freelist_pop(&smap->freelist); 260 if (unlikely(!new_bucket)) 261 return -ENOMEM; 262 new_bucket->nr = trace_nr; 263 id_offs = (struct bpf_stack_build_id *)new_bucket->data; 264 for (i = 0; i < trace_nr; i++) 265 id_offs[i].ip = ips[i]; 266 stack_map_get_build_id_offset(id_offs, trace_nr, user, false /* !may_fault */); 267 trace_len = trace_nr * sizeof(struct bpf_stack_build_id); 268 if (hash_matches && bucket->nr == trace_nr && 269 memcmp(bucket->data, new_bucket->data, trace_len) == 0) { 270 pcpu_freelist_push(&smap->freelist, &new_bucket->fnode); 271 return id; 272 } 273 if (bucket && !(flags & BPF_F_REUSE_STACKID)) { 274 pcpu_freelist_push(&smap->freelist, &new_bucket->fnode); 275 return -EEXIST; 276 } 277 } else { 278 if (hash_matches && bucket->nr == trace_nr && 279 memcmp(bucket->data, ips, trace_len) == 0) 280 return id; 281 if (bucket && !(flags & BPF_F_REUSE_STACKID)) 282 return -EEXIST; 283 284 new_bucket = (struct stack_map_bucket *) 285 pcpu_freelist_pop(&smap->freelist); 286 if (unlikely(!new_bucket)) 287 return -ENOMEM; 288 memcpy(new_bucket->data, ips, trace_len); 289 } 290 291 new_bucket->hash = hash; 292 new_bucket->nr = trace_nr; 293 294 old_bucket = xchg(&smap->buckets[id], new_bucket); 295 if (old_bucket) 296 pcpu_freelist_push(&smap->freelist, &old_bucket->fnode); 297 return id; 298 } 299 300 BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map, 301 u64, flags) 302 { 303 u32 max_depth = map->value_size / stack_map_data_size(map); 304 u32 skip = flags & BPF_F_SKIP_FIELD_MASK; 305 bool user = flags & BPF_F_USER_STACK; 306 struct perf_callchain_entry *trace; 307 bool kernel = !user; 308 309 if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK | 310 BPF_F_FAST_STACK_CMP | BPF_F_REUSE_STACKID))) 311 return -EINVAL; 312 313 max_depth += skip; 314 if (max_depth > sysctl_perf_event_max_stack) 315 max_depth = sysctl_perf_event_max_stack; 316 317 trace = get_perf_callchain(regs, 0, kernel, user, max_depth, 318 false, false); 319 320 if (unlikely(!trace)) 321 /* couldn't fetch the stack trace */ 322 return -EFAULT; 323 324 return __bpf_get_stackid(map, trace, flags); 325 } 326 327 const struct bpf_func_proto bpf_get_stackid_proto = { 328 .func = bpf_get_stackid, 329 .gpl_only = true, 330 .ret_type = RET_INTEGER, 331 .arg1_type = ARG_PTR_TO_CTX, 332 .arg2_type = ARG_CONST_MAP_PTR, 333 .arg3_type = ARG_ANYTHING, 334 }; 335 336 static __u64 count_kernel_ip(struct perf_callchain_entry *trace) 337 { 338 __u64 nr_kernel = 0; 339 340 while (nr_kernel < trace->nr) { 341 if (trace->ip[nr_kernel] == PERF_CONTEXT_USER) 342 break; 343 nr_kernel++; 344 } 345 return nr_kernel; 346 } 347 348 BPF_CALL_3(bpf_get_stackid_pe, struct bpf_perf_event_data_kern *, ctx, 349 struct bpf_map *, map, u64, flags) 350 { 351 struct perf_event *event = ctx->event; 352 struct perf_callchain_entry *trace; 353 bool kernel, user; 354 __u64 nr_kernel; 355 int ret; 356 357 /* perf_sample_data doesn't have callchain, use bpf_get_stackid */ 358 if (!(event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)) 359 return bpf_get_stackid((unsigned long)(ctx->regs), 360 (unsigned long) map, flags, 0, 0); 361 362 if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK | 363 BPF_F_FAST_STACK_CMP | BPF_F_REUSE_STACKID))) 364 return -EINVAL; 365 366 user = flags & BPF_F_USER_STACK; 367 kernel = !user; 368 369 trace = ctx->data->callchain; 370 if (unlikely(!trace)) 371 return -EFAULT; 372 373 nr_kernel = count_kernel_ip(trace); 374 375 if (kernel) { 376 __u64 nr = trace->nr; 377 378 trace->nr = nr_kernel; 379 ret = __bpf_get_stackid(map, trace, flags); 380 381 /* restore nr */ 382 trace->nr = nr; 383 } else { /* user */ 384 u64 skip = flags & BPF_F_SKIP_FIELD_MASK; 385 386 skip += nr_kernel; 387 if (skip > BPF_F_SKIP_FIELD_MASK) 388 return -EFAULT; 389 390 flags = (flags & ~BPF_F_SKIP_FIELD_MASK) | skip; 391 ret = __bpf_get_stackid(map, trace, flags); 392 } 393 return ret; 394 } 395 396 const struct bpf_func_proto bpf_get_stackid_proto_pe = { 397 .func = bpf_get_stackid_pe, 398 .gpl_only = false, 399 .ret_type = RET_INTEGER, 400 .arg1_type = ARG_PTR_TO_CTX, 401 .arg2_type = ARG_CONST_MAP_PTR, 402 .arg3_type = ARG_ANYTHING, 403 }; 404 405 static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task, 406 struct perf_callchain_entry *trace_in, 407 void *buf, u32 size, u64 flags, bool may_fault) 408 { 409 u32 trace_nr, copy_len, elem_size, num_elem, max_depth; 410 bool user_build_id = flags & BPF_F_USER_BUILD_ID; 411 bool crosstask = task && task != current; 412 u32 skip = flags & BPF_F_SKIP_FIELD_MASK; 413 bool user = flags & BPF_F_USER_STACK; 414 struct perf_callchain_entry *trace; 415 bool kernel = !user; 416 int err = -EINVAL; 417 u64 *ips; 418 419 if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK | 420 BPF_F_USER_BUILD_ID))) 421 goto clear; 422 if (kernel && user_build_id) 423 goto clear; 424 425 elem_size = user_build_id ? sizeof(struct bpf_stack_build_id) : sizeof(u64); 426 if (unlikely(size % elem_size)) 427 goto clear; 428 429 /* cannot get valid user stack for task without user_mode regs */ 430 if (task && user && !user_mode(regs)) 431 goto err_fault; 432 433 /* get_perf_callchain does not support crosstask user stack walking 434 * but returns an empty stack instead of NULL. 435 */ 436 if (crosstask && user) { 437 err = -EOPNOTSUPP; 438 goto clear; 439 } 440 441 num_elem = size / elem_size; 442 max_depth = num_elem + skip; 443 if (sysctl_perf_event_max_stack < max_depth) 444 max_depth = sysctl_perf_event_max_stack; 445 446 if (may_fault) 447 rcu_read_lock(); /* need RCU for perf's callchain below */ 448 449 if (trace_in) 450 trace = trace_in; 451 else if (kernel && task) 452 trace = get_callchain_entry_for_task(task, max_depth); 453 else 454 trace = get_perf_callchain(regs, 0, kernel, user, max_depth, 455 crosstask, false); 456 457 if (unlikely(!trace) || trace->nr < skip) { 458 if (may_fault) 459 rcu_read_unlock(); 460 goto err_fault; 461 } 462 463 trace_nr = trace->nr - skip; 464 trace_nr = (trace_nr <= num_elem) ? trace_nr : num_elem; 465 copy_len = trace_nr * elem_size; 466 467 ips = trace->ip + skip; 468 if (user_build_id) { 469 struct bpf_stack_build_id *id_offs = buf; 470 u32 i; 471 472 for (i = 0; i < trace_nr; i++) 473 id_offs[i].ip = ips[i]; 474 } else { 475 memcpy(buf, ips, copy_len); 476 } 477 478 /* trace/ips should not be dereferenced after this point */ 479 if (may_fault) 480 rcu_read_unlock(); 481 482 if (user_build_id) 483 stack_map_get_build_id_offset(buf, trace_nr, user, may_fault); 484 485 if (size > copy_len) 486 memset(buf + copy_len, 0, size - copy_len); 487 return copy_len; 488 489 err_fault: 490 err = -EFAULT; 491 clear: 492 memset(buf, 0, size); 493 return err; 494 } 495 496 BPF_CALL_4(bpf_get_stack, struct pt_regs *, regs, void *, buf, u32, size, 497 u64, flags) 498 { 499 return __bpf_get_stack(regs, NULL, NULL, buf, size, flags, false /* !may_fault */); 500 } 501 502 const struct bpf_func_proto bpf_get_stack_proto = { 503 .func = bpf_get_stack, 504 .gpl_only = true, 505 .ret_type = RET_INTEGER, 506 .arg1_type = ARG_PTR_TO_CTX, 507 .arg2_type = ARG_PTR_TO_UNINIT_MEM, 508 .arg3_type = ARG_CONST_SIZE_OR_ZERO, 509 .arg4_type = ARG_ANYTHING, 510 }; 511 512 BPF_CALL_4(bpf_get_stack_sleepable, struct pt_regs *, regs, void *, buf, u32, size, 513 u64, flags) 514 { 515 return __bpf_get_stack(regs, NULL, NULL, buf, size, flags, true /* may_fault */); 516 } 517 518 const struct bpf_func_proto bpf_get_stack_sleepable_proto = { 519 .func = bpf_get_stack_sleepable, 520 .gpl_only = true, 521 .ret_type = RET_INTEGER, 522 .arg1_type = ARG_PTR_TO_CTX, 523 .arg2_type = ARG_PTR_TO_UNINIT_MEM, 524 .arg3_type = ARG_CONST_SIZE_OR_ZERO, 525 .arg4_type = ARG_ANYTHING, 526 }; 527 528 static long __bpf_get_task_stack(struct task_struct *task, void *buf, u32 size, 529 u64 flags, bool may_fault) 530 { 531 struct pt_regs *regs; 532 long res = -EINVAL; 533 534 if (!try_get_task_stack(task)) 535 return -EFAULT; 536 537 regs = task_pt_regs(task); 538 if (regs) 539 res = __bpf_get_stack(regs, task, NULL, buf, size, flags, may_fault); 540 put_task_stack(task); 541 542 return res; 543 } 544 545 BPF_CALL_4(bpf_get_task_stack, struct task_struct *, task, void *, buf, 546 u32, size, u64, flags) 547 { 548 return __bpf_get_task_stack(task, buf, size, flags, false /* !may_fault */); 549 } 550 551 const struct bpf_func_proto bpf_get_task_stack_proto = { 552 .func = bpf_get_task_stack, 553 .gpl_only = false, 554 .ret_type = RET_INTEGER, 555 .arg1_type = ARG_PTR_TO_BTF_ID, 556 .arg1_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK], 557 .arg2_type = ARG_PTR_TO_UNINIT_MEM, 558 .arg3_type = ARG_CONST_SIZE_OR_ZERO, 559 .arg4_type = ARG_ANYTHING, 560 }; 561 562 BPF_CALL_4(bpf_get_task_stack_sleepable, struct task_struct *, task, void *, buf, 563 u32, size, u64, flags) 564 { 565 return __bpf_get_task_stack(task, buf, size, flags, true /* !may_fault */); 566 } 567 568 const struct bpf_func_proto bpf_get_task_stack_sleepable_proto = { 569 .func = bpf_get_task_stack_sleepable, 570 .gpl_only = false, 571 .ret_type = RET_INTEGER, 572 .arg1_type = ARG_PTR_TO_BTF_ID, 573 .arg1_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK], 574 .arg2_type = ARG_PTR_TO_UNINIT_MEM, 575 .arg3_type = ARG_CONST_SIZE_OR_ZERO, 576 .arg4_type = ARG_ANYTHING, 577 }; 578 579 BPF_CALL_4(bpf_get_stack_pe, struct bpf_perf_event_data_kern *, ctx, 580 void *, buf, u32, size, u64, flags) 581 { 582 struct pt_regs *regs = (struct pt_regs *)(ctx->regs); 583 struct perf_event *event = ctx->event; 584 struct perf_callchain_entry *trace; 585 bool kernel, user; 586 int err = -EINVAL; 587 __u64 nr_kernel; 588 589 if (!(event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)) 590 return __bpf_get_stack(regs, NULL, NULL, buf, size, flags, false /* !may_fault */); 591 592 if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK | 593 BPF_F_USER_BUILD_ID))) 594 goto clear; 595 596 user = flags & BPF_F_USER_STACK; 597 kernel = !user; 598 599 err = -EFAULT; 600 trace = ctx->data->callchain; 601 if (unlikely(!trace)) 602 goto clear; 603 604 nr_kernel = count_kernel_ip(trace); 605 606 if (kernel) { 607 __u64 nr = trace->nr; 608 609 trace->nr = nr_kernel; 610 err = __bpf_get_stack(regs, NULL, trace, buf, size, flags, false /* !may_fault */); 611 612 /* restore nr */ 613 trace->nr = nr; 614 } else { /* user */ 615 u64 skip = flags & BPF_F_SKIP_FIELD_MASK; 616 617 skip += nr_kernel; 618 if (skip > BPF_F_SKIP_FIELD_MASK) 619 goto clear; 620 621 flags = (flags & ~BPF_F_SKIP_FIELD_MASK) | skip; 622 err = __bpf_get_stack(regs, NULL, trace, buf, size, flags, false /* !may_fault */); 623 } 624 return err; 625 626 clear: 627 memset(buf, 0, size); 628 return err; 629 630 } 631 632 const struct bpf_func_proto bpf_get_stack_proto_pe = { 633 .func = bpf_get_stack_pe, 634 .gpl_only = true, 635 .ret_type = RET_INTEGER, 636 .arg1_type = ARG_PTR_TO_CTX, 637 .arg2_type = ARG_PTR_TO_UNINIT_MEM, 638 .arg3_type = ARG_CONST_SIZE_OR_ZERO, 639 .arg4_type = ARG_ANYTHING, 640 }; 641 642 /* Called from eBPF program */ 643 static void *stack_map_lookup_elem(struct bpf_map *map, void *key) 644 { 645 return ERR_PTR(-EOPNOTSUPP); 646 } 647 648 /* Called from syscall */ 649 int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value) 650 { 651 struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map); 652 struct stack_map_bucket *bucket, *old_bucket; 653 u32 id = *(u32 *)key, trace_len; 654 655 if (unlikely(id >= smap->n_buckets)) 656 return -ENOENT; 657 658 bucket = xchg(&smap->buckets[id], NULL); 659 if (!bucket) 660 return -ENOENT; 661 662 trace_len = bucket->nr * stack_map_data_size(map); 663 memcpy(value, bucket->data, trace_len); 664 memset(value + trace_len, 0, map->value_size - trace_len); 665 666 old_bucket = xchg(&smap->buckets[id], bucket); 667 if (old_bucket) 668 pcpu_freelist_push(&smap->freelist, &old_bucket->fnode); 669 return 0; 670 } 671 672 static int stack_map_get_next_key(struct bpf_map *map, void *key, 673 void *next_key) 674 { 675 struct bpf_stack_map *smap = container_of(map, 676 struct bpf_stack_map, map); 677 u32 id; 678 679 WARN_ON_ONCE(!rcu_read_lock_held()); 680 681 if (!key) { 682 id = 0; 683 } else { 684 id = *(u32 *)key; 685 if (id >= smap->n_buckets || !smap->buckets[id]) 686 id = 0; 687 else 688 id++; 689 } 690 691 while (id < smap->n_buckets && !smap->buckets[id]) 692 id++; 693 694 if (id >= smap->n_buckets) 695 return -ENOENT; 696 697 *(u32 *)next_key = id; 698 return 0; 699 } 700 701 static long stack_map_update_elem(struct bpf_map *map, void *key, void *value, 702 u64 map_flags) 703 { 704 return -EINVAL; 705 } 706 707 /* Called from syscall or from eBPF program */ 708 static long stack_map_delete_elem(struct bpf_map *map, void *key) 709 { 710 struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map); 711 struct stack_map_bucket *old_bucket; 712 u32 id = *(u32 *)key; 713 714 if (unlikely(id >= smap->n_buckets)) 715 return -E2BIG; 716 717 old_bucket = xchg(&smap->buckets[id], NULL); 718 if (old_bucket) { 719 pcpu_freelist_push(&smap->freelist, &old_bucket->fnode); 720 return 0; 721 } else { 722 return -ENOENT; 723 } 724 } 725 726 /* Called when map->refcnt goes to zero, either from workqueue or from syscall */ 727 static void stack_map_free(struct bpf_map *map) 728 { 729 struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map); 730 731 bpf_map_area_free(smap->elems); 732 pcpu_freelist_destroy(&smap->freelist); 733 bpf_map_area_free(smap); 734 put_callchain_buffers(); 735 } 736 737 static u64 stack_map_mem_usage(const struct bpf_map *map) 738 { 739 struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map); 740 u64 value_size = map->value_size; 741 u64 n_buckets = smap->n_buckets; 742 u64 enties = map->max_entries; 743 u64 usage = sizeof(*smap); 744 745 usage += n_buckets * sizeof(struct stack_map_bucket *); 746 usage += enties * (sizeof(struct stack_map_bucket) + value_size); 747 return usage; 748 } 749 750 BTF_ID_LIST_SINGLE(stack_trace_map_btf_ids, struct, bpf_stack_map) 751 const struct bpf_map_ops stack_trace_map_ops = { 752 .map_meta_equal = bpf_map_meta_equal, 753 .map_alloc = stack_map_alloc, 754 .map_free = stack_map_free, 755 .map_get_next_key = stack_map_get_next_key, 756 .map_lookup_elem = stack_map_lookup_elem, 757 .map_update_elem = stack_map_update_elem, 758 .map_delete_elem = stack_map_delete_elem, 759 .map_check_btf = map_check_no_btf, 760 .map_mem_usage = stack_map_mem_usage, 761 .map_btf_id = &stack_trace_map_btf_ids[0], 762 }; 763