1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright (c) 2016 Facebook 3 */ 4 #include <linux/bpf.h> 5 #include <linux/jhash.h> 6 #include <linux/filter.h> 7 #include <linux/kernel.h> 8 #include <linux/stacktrace.h> 9 #include <linux/perf_event.h> 10 #include <linux/btf_ids.h> 11 #include <linux/buildid.h> 12 #include "percpu_freelist.h" 13 #include "mmap_unlock_work.h" 14 15 #define STACK_CREATE_FLAG_MASK \ 16 (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY | \ 17 BPF_F_STACK_BUILD_ID) 18 19 struct stack_map_bucket { 20 struct pcpu_freelist_node fnode; 21 u32 hash; 22 u32 nr; 23 u64 data[]; 24 }; 25 26 struct bpf_stack_map { 27 struct bpf_map map; 28 void *elems; 29 struct pcpu_freelist freelist; 30 u32 n_buckets; 31 struct stack_map_bucket *buckets[] __counted_by(n_buckets); 32 }; 33 34 static inline bool stack_map_use_build_id(struct bpf_map *map) 35 { 36 return (map->map_flags & BPF_F_STACK_BUILD_ID); 37 } 38 39 static inline int stack_map_data_size(struct bpf_map *map) 40 { 41 return stack_map_use_build_id(map) ? 42 sizeof(struct bpf_stack_build_id) : sizeof(u64); 43 } 44 45 /** 46 * stack_map_calculate_max_depth - Calculate maximum allowed stack trace depth 47 * @size: Size of the buffer/map value in bytes 48 * @elem_size: Size of each stack trace element 49 * @flags: BPF stack trace flags (BPF_F_USER_STACK, BPF_F_USER_BUILD_ID, ...) 50 * 51 * Return: Maximum number of stack trace entries that can be safely stored 52 */ 53 static u32 stack_map_calculate_max_depth(u32 size, u32 elem_size, u64 flags) 54 { 55 u32 skip = flags & BPF_F_SKIP_FIELD_MASK; 56 u32 max_depth; 57 u32 curr_sysctl_max_stack = READ_ONCE(sysctl_perf_event_max_stack); 58 59 max_depth = size / elem_size; 60 max_depth += skip; 61 if (max_depth > curr_sysctl_max_stack) 62 return curr_sysctl_max_stack; 63 64 return max_depth; 65 } 66 67 static int prealloc_elems_and_freelist(struct bpf_stack_map *smap) 68 { 69 u64 elem_size = sizeof(struct stack_map_bucket) + 70 (u64)smap->map.value_size; 71 int err; 72 73 smap->elems = bpf_map_area_alloc(elem_size * smap->map.max_entries, 74 smap->map.numa_node); 75 if (!smap->elems) 76 return -ENOMEM; 77 78 err = pcpu_freelist_init(&smap->freelist); 79 if (err) 80 goto free_elems; 81 82 pcpu_freelist_populate(&smap->freelist, smap->elems, elem_size, 83 smap->map.max_entries); 84 return 0; 85 86 free_elems: 87 bpf_map_area_free(smap->elems); 88 return err; 89 } 90 91 /* Called from syscall */ 92 static struct bpf_map *stack_map_alloc(union bpf_attr *attr) 93 { 94 u32 value_size = attr->value_size; 95 struct bpf_stack_map *smap; 96 u64 cost, n_buckets; 97 int err; 98 99 if (attr->map_flags & ~STACK_CREATE_FLAG_MASK) 100 return ERR_PTR(-EINVAL); 101 102 /* check sanity of attributes */ 103 if (attr->max_entries == 0 || attr->key_size != 4 || 104 value_size < 8 || value_size % 8) 105 return ERR_PTR(-EINVAL); 106 107 BUILD_BUG_ON(sizeof(struct bpf_stack_build_id) % sizeof(u64)); 108 if (attr->map_flags & BPF_F_STACK_BUILD_ID) { 109 if (value_size % sizeof(struct bpf_stack_build_id) || 110 value_size / sizeof(struct bpf_stack_build_id) 111 > sysctl_perf_event_max_stack) 112 return ERR_PTR(-EINVAL); 113 } else if (value_size / 8 > sysctl_perf_event_max_stack) 114 return ERR_PTR(-EINVAL); 115 116 /* hash table size must be power of 2; roundup_pow_of_two() can overflow 117 * into UB on 32-bit arches, so check that first 118 */ 119 if (attr->max_entries > 1UL << 31) 120 return ERR_PTR(-E2BIG); 121 122 n_buckets = roundup_pow_of_two(attr->max_entries); 123 124 cost = n_buckets * sizeof(struct stack_map_bucket *) + sizeof(*smap); 125 smap = bpf_map_area_alloc(cost, bpf_map_attr_numa_node(attr)); 126 if (!smap) 127 return ERR_PTR(-ENOMEM); 128 129 bpf_map_init_from_attr(&smap->map, attr); 130 smap->n_buckets = n_buckets; 131 132 err = get_callchain_buffers(sysctl_perf_event_max_stack); 133 if (err) 134 goto free_smap; 135 136 err = prealloc_elems_and_freelist(smap); 137 if (err) 138 goto put_buffers; 139 140 return &smap->map; 141 142 put_buffers: 143 put_callchain_buffers(); 144 free_smap: 145 bpf_map_area_free(smap); 146 return ERR_PTR(err); 147 } 148 149 static int fetch_build_id(struct vm_area_struct *vma, unsigned char *build_id, bool may_fault) 150 { 151 return may_fault ? build_id_parse(vma, build_id, NULL) 152 : build_id_parse_nofault(vma, build_id, NULL); 153 } 154 155 /* 156 * Expects all id_offs[i].ip values to be set to correct initial IPs. 157 * They will be subsequently: 158 * - either adjusted in place to a file offset, if build ID fetching 159 * succeeds; in this case id_offs[i].build_id is set to correct build ID, 160 * and id_offs[i].status is set to BPF_STACK_BUILD_ID_VALID; 161 * - or IP will be kept intact, if build ID fetching failed; in this case 162 * id_offs[i].build_id is zeroed out and id_offs[i].status is set to 163 * BPF_STACK_BUILD_ID_IP. 164 */ 165 static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs, 166 u32 trace_nr, bool user, bool may_fault) 167 { 168 int i; 169 struct mmap_unlock_irq_work *work = NULL; 170 bool irq_work_busy = bpf_mmap_unlock_get_irq_work(&work); 171 struct vm_area_struct *vma, *prev_vma = NULL; 172 const char *prev_build_id; 173 174 /* If the irq_work is in use, fall back to report ips. Same 175 * fallback is used for kernel stack (!user) on a stackmap with 176 * build_id. 177 */ 178 if (!user || !current || !current->mm || irq_work_busy || 179 !mmap_read_trylock(current->mm)) { 180 /* cannot access current->mm, fall back to ips */ 181 for (i = 0; i < trace_nr; i++) { 182 id_offs[i].status = BPF_STACK_BUILD_ID_IP; 183 memset(id_offs[i].build_id, 0, BUILD_ID_SIZE_MAX); 184 } 185 return; 186 } 187 188 for (i = 0; i < trace_nr; i++) { 189 u64 ip = READ_ONCE(id_offs[i].ip); 190 191 if (range_in_vma(prev_vma, ip, ip)) { 192 vma = prev_vma; 193 memcpy(id_offs[i].build_id, prev_build_id, BUILD_ID_SIZE_MAX); 194 goto build_id_valid; 195 } 196 vma = find_vma(current->mm, ip); 197 if (!vma || fetch_build_id(vma, id_offs[i].build_id, may_fault)) { 198 /* per entry fall back to ips */ 199 id_offs[i].status = BPF_STACK_BUILD_ID_IP; 200 memset(id_offs[i].build_id, 0, BUILD_ID_SIZE_MAX); 201 continue; 202 } 203 build_id_valid: 204 id_offs[i].offset = (vma->vm_pgoff << PAGE_SHIFT) + ip - vma->vm_start; 205 id_offs[i].status = BPF_STACK_BUILD_ID_VALID; 206 prev_vma = vma; 207 prev_build_id = id_offs[i].build_id; 208 } 209 bpf_mmap_unlock_mm(work, current->mm); 210 } 211 212 static struct perf_callchain_entry * 213 get_callchain_entry_for_task(struct task_struct *task, u32 max_depth) 214 { 215 #ifdef CONFIG_STACKTRACE 216 struct perf_callchain_entry *entry; 217 int rctx; 218 219 entry = get_callchain_entry(&rctx); 220 221 if (!entry) 222 return NULL; 223 224 entry->nr = stack_trace_save_tsk(task, (unsigned long *)entry->ip, 225 max_depth, 0); 226 227 /* stack_trace_save_tsk() works on unsigned long array, while 228 * perf_callchain_entry uses u64 array. For 32-bit systems, it is 229 * necessary to fix this mismatch. 230 */ 231 if (__BITS_PER_LONG != 64) { 232 unsigned long *from = (unsigned long *) entry->ip; 233 u64 *to = entry->ip; 234 int i; 235 236 /* copy data from the end to avoid using extra buffer */ 237 for (i = entry->nr - 1; i >= 0; i--) 238 to[i] = (u64)(from[i]); 239 } 240 241 put_callchain_entry(rctx); 242 243 return entry; 244 #else /* CONFIG_STACKTRACE */ 245 return NULL; 246 #endif 247 } 248 249 static long __bpf_get_stackid(struct bpf_map *map, 250 struct perf_callchain_entry *trace, u64 flags) 251 { 252 struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map); 253 struct stack_map_bucket *bucket, *new_bucket, *old_bucket; 254 u32 hash, id, trace_nr, trace_len, i, max_depth; 255 u32 skip = flags & BPF_F_SKIP_FIELD_MASK; 256 bool user = flags & BPF_F_USER_STACK; 257 u64 *ips; 258 bool hash_matches; 259 260 if (trace->nr <= skip) 261 /* skipping more than usable stack trace */ 262 return -EFAULT; 263 264 max_depth = stack_map_calculate_max_depth(map->value_size, stack_map_data_size(map), flags); 265 trace_nr = min_t(u32, trace->nr - skip, max_depth - skip); 266 trace_len = trace_nr * sizeof(u64); 267 ips = trace->ip + skip; 268 hash = jhash2((u32 *)ips, trace_len / sizeof(u32), 0); 269 id = hash & (smap->n_buckets - 1); 270 bucket = READ_ONCE(smap->buckets[id]); 271 272 hash_matches = bucket && bucket->hash == hash; 273 /* fast cmp */ 274 if (hash_matches && flags & BPF_F_FAST_STACK_CMP) 275 return id; 276 277 if (stack_map_use_build_id(map)) { 278 struct bpf_stack_build_id *id_offs; 279 280 /* for build_id+offset, pop a bucket before slow cmp */ 281 new_bucket = (struct stack_map_bucket *) 282 pcpu_freelist_pop(&smap->freelist); 283 if (unlikely(!new_bucket)) 284 return -ENOMEM; 285 new_bucket->nr = trace_nr; 286 id_offs = (struct bpf_stack_build_id *)new_bucket->data; 287 for (i = 0; i < trace_nr; i++) 288 id_offs[i].ip = ips[i]; 289 stack_map_get_build_id_offset(id_offs, trace_nr, user, false /* !may_fault */); 290 trace_len = trace_nr * sizeof(struct bpf_stack_build_id); 291 if (hash_matches && bucket->nr == trace_nr && 292 memcmp(bucket->data, new_bucket->data, trace_len) == 0) { 293 pcpu_freelist_push(&smap->freelist, &new_bucket->fnode); 294 return id; 295 } 296 if (bucket && !(flags & BPF_F_REUSE_STACKID)) { 297 pcpu_freelist_push(&smap->freelist, &new_bucket->fnode); 298 return -EEXIST; 299 } 300 } else { 301 if (hash_matches && bucket->nr == trace_nr && 302 memcmp(bucket->data, ips, trace_len) == 0) 303 return id; 304 if (bucket && !(flags & BPF_F_REUSE_STACKID)) 305 return -EEXIST; 306 307 new_bucket = (struct stack_map_bucket *) 308 pcpu_freelist_pop(&smap->freelist); 309 if (unlikely(!new_bucket)) 310 return -ENOMEM; 311 memcpy(new_bucket->data, ips, trace_len); 312 } 313 314 new_bucket->hash = hash; 315 new_bucket->nr = trace_nr; 316 317 old_bucket = xchg(&smap->buckets[id], new_bucket); 318 if (old_bucket) 319 pcpu_freelist_push(&smap->freelist, &old_bucket->fnode); 320 return id; 321 } 322 323 BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map, 324 u64, flags) 325 { 326 u32 elem_size = stack_map_data_size(map); 327 bool user = flags & BPF_F_USER_STACK; 328 struct perf_callchain_entry *trace; 329 bool kernel = !user; 330 u32 max_depth; 331 332 if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK | 333 BPF_F_FAST_STACK_CMP | BPF_F_REUSE_STACKID))) 334 return -EINVAL; 335 336 max_depth = stack_map_calculate_max_depth(map->value_size, elem_size, flags); 337 trace = get_perf_callchain(regs, kernel, user, max_depth, 338 false, false, 0); 339 340 if (unlikely(!trace)) 341 /* couldn't fetch the stack trace */ 342 return -EFAULT; 343 344 return __bpf_get_stackid(map, trace, flags); 345 } 346 347 const struct bpf_func_proto bpf_get_stackid_proto = { 348 .func = bpf_get_stackid, 349 .gpl_only = true, 350 .ret_type = RET_INTEGER, 351 .arg1_type = ARG_PTR_TO_CTX, 352 .arg2_type = ARG_CONST_MAP_PTR, 353 .arg3_type = ARG_ANYTHING, 354 }; 355 356 static __u64 count_kernel_ip(struct perf_callchain_entry *trace) 357 { 358 __u64 nr_kernel = 0; 359 360 while (nr_kernel < trace->nr) { 361 if (trace->ip[nr_kernel] == PERF_CONTEXT_USER) 362 break; 363 nr_kernel++; 364 } 365 return nr_kernel; 366 } 367 368 BPF_CALL_3(bpf_get_stackid_pe, struct bpf_perf_event_data_kern *, ctx, 369 struct bpf_map *, map, u64, flags) 370 { 371 struct perf_event *event = ctx->event; 372 struct perf_callchain_entry *trace; 373 bool kernel, user; 374 __u64 nr_kernel; 375 int ret; 376 377 /* perf_sample_data doesn't have callchain, use bpf_get_stackid */ 378 if (!(event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)) 379 return bpf_get_stackid((unsigned long)(ctx->regs), 380 (unsigned long) map, flags, 0, 0); 381 382 if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK | 383 BPF_F_FAST_STACK_CMP | BPF_F_REUSE_STACKID))) 384 return -EINVAL; 385 386 user = flags & BPF_F_USER_STACK; 387 kernel = !user; 388 389 trace = ctx->data->callchain; 390 if (unlikely(!trace)) 391 return -EFAULT; 392 393 nr_kernel = count_kernel_ip(trace); 394 __u64 nr = trace->nr; /* save original */ 395 396 if (kernel) { 397 trace->nr = nr_kernel; 398 ret = __bpf_get_stackid(map, trace, flags); 399 } else { /* user */ 400 u64 skip = flags & BPF_F_SKIP_FIELD_MASK; 401 402 skip += nr_kernel; 403 if (skip > BPF_F_SKIP_FIELD_MASK) 404 return -EFAULT; 405 406 flags = (flags & ~BPF_F_SKIP_FIELD_MASK) | skip; 407 ret = __bpf_get_stackid(map, trace, flags); 408 } 409 410 /* restore nr */ 411 trace->nr = nr; 412 413 return ret; 414 } 415 416 const struct bpf_func_proto bpf_get_stackid_proto_pe = { 417 .func = bpf_get_stackid_pe, 418 .gpl_only = false, 419 .ret_type = RET_INTEGER, 420 .arg1_type = ARG_PTR_TO_CTX, 421 .arg2_type = ARG_CONST_MAP_PTR, 422 .arg3_type = ARG_ANYTHING, 423 }; 424 425 static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task, 426 struct perf_callchain_entry *trace_in, 427 void *buf, u32 size, u64 flags, bool may_fault) 428 { 429 u32 trace_nr, copy_len, elem_size, max_depth; 430 bool user_build_id = flags & BPF_F_USER_BUILD_ID; 431 bool crosstask = task && task != current; 432 u32 skip = flags & BPF_F_SKIP_FIELD_MASK; 433 bool user = flags & BPF_F_USER_STACK; 434 struct perf_callchain_entry *trace; 435 bool kernel = !user; 436 int err = -EINVAL; 437 u64 *ips; 438 439 if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK | 440 BPF_F_USER_BUILD_ID))) 441 goto clear; 442 if (kernel && user_build_id) 443 goto clear; 444 445 elem_size = user_build_id ? sizeof(struct bpf_stack_build_id) : sizeof(u64); 446 if (unlikely(size % elem_size)) 447 goto clear; 448 449 /* cannot get valid user stack for task without user_mode regs */ 450 if (task && user && !user_mode(regs)) 451 goto err_fault; 452 453 /* get_perf_callchain does not support crosstask user stack walking 454 * but returns an empty stack instead of NULL. 455 */ 456 if (crosstask && user) { 457 err = -EOPNOTSUPP; 458 goto clear; 459 } 460 461 max_depth = stack_map_calculate_max_depth(size, elem_size, flags); 462 463 if (may_fault) 464 rcu_read_lock(); /* need RCU for perf's callchain below */ 465 466 if (trace_in) { 467 trace = trace_in; 468 trace->nr = min_t(u32, trace->nr, max_depth); 469 } else if (kernel && task) { 470 trace = get_callchain_entry_for_task(task, max_depth); 471 } else { 472 trace = get_perf_callchain(regs, kernel, user, max_depth, 473 crosstask, false, 0); 474 } 475 476 if (unlikely(!trace) || trace->nr < skip) { 477 if (may_fault) 478 rcu_read_unlock(); 479 goto err_fault; 480 } 481 482 trace_nr = trace->nr - skip; 483 copy_len = trace_nr * elem_size; 484 485 ips = trace->ip + skip; 486 if (user_build_id) { 487 struct bpf_stack_build_id *id_offs = buf; 488 u32 i; 489 490 for (i = 0; i < trace_nr; i++) 491 id_offs[i].ip = ips[i]; 492 } else { 493 memcpy(buf, ips, copy_len); 494 } 495 496 /* trace/ips should not be dereferenced after this point */ 497 if (may_fault) 498 rcu_read_unlock(); 499 500 if (user_build_id) 501 stack_map_get_build_id_offset(buf, trace_nr, user, may_fault); 502 503 if (size > copy_len) 504 memset(buf + copy_len, 0, size - copy_len); 505 return copy_len; 506 507 err_fault: 508 err = -EFAULT; 509 clear: 510 memset(buf, 0, size); 511 return err; 512 } 513 514 BPF_CALL_4(bpf_get_stack, struct pt_regs *, regs, void *, buf, u32, size, 515 u64, flags) 516 { 517 return __bpf_get_stack(regs, NULL, NULL, buf, size, flags, false /* !may_fault */); 518 } 519 520 const struct bpf_func_proto bpf_get_stack_proto = { 521 .func = bpf_get_stack, 522 .gpl_only = true, 523 .ret_type = RET_INTEGER, 524 .arg1_type = ARG_PTR_TO_CTX, 525 .arg2_type = ARG_PTR_TO_UNINIT_MEM, 526 .arg3_type = ARG_CONST_SIZE_OR_ZERO, 527 .arg4_type = ARG_ANYTHING, 528 }; 529 530 BPF_CALL_4(bpf_get_stack_sleepable, struct pt_regs *, regs, void *, buf, u32, size, 531 u64, flags) 532 { 533 return __bpf_get_stack(regs, NULL, NULL, buf, size, flags, true /* may_fault */); 534 } 535 536 const struct bpf_func_proto bpf_get_stack_sleepable_proto = { 537 .func = bpf_get_stack_sleepable, 538 .gpl_only = true, 539 .ret_type = RET_INTEGER, 540 .arg1_type = ARG_PTR_TO_CTX, 541 .arg2_type = ARG_PTR_TO_UNINIT_MEM, 542 .arg3_type = ARG_CONST_SIZE_OR_ZERO, 543 .arg4_type = ARG_ANYTHING, 544 }; 545 546 static long __bpf_get_task_stack(struct task_struct *task, void *buf, u32 size, 547 u64 flags, bool may_fault) 548 { 549 struct pt_regs *regs; 550 long res = -EINVAL; 551 552 if (!try_get_task_stack(task)) 553 return -EFAULT; 554 555 regs = task_pt_regs(task); 556 if (regs) 557 res = __bpf_get_stack(regs, task, NULL, buf, size, flags, may_fault); 558 put_task_stack(task); 559 560 return res; 561 } 562 563 BPF_CALL_4(bpf_get_task_stack, struct task_struct *, task, void *, buf, 564 u32, size, u64, flags) 565 { 566 return __bpf_get_task_stack(task, buf, size, flags, false /* !may_fault */); 567 } 568 569 const struct bpf_func_proto bpf_get_task_stack_proto = { 570 .func = bpf_get_task_stack, 571 .gpl_only = false, 572 .ret_type = RET_INTEGER, 573 .arg1_type = ARG_PTR_TO_BTF_ID, 574 .arg1_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK], 575 .arg2_type = ARG_PTR_TO_UNINIT_MEM, 576 .arg3_type = ARG_CONST_SIZE_OR_ZERO, 577 .arg4_type = ARG_ANYTHING, 578 }; 579 580 BPF_CALL_4(bpf_get_task_stack_sleepable, struct task_struct *, task, void *, buf, 581 u32, size, u64, flags) 582 { 583 return __bpf_get_task_stack(task, buf, size, flags, true /* !may_fault */); 584 } 585 586 const struct bpf_func_proto bpf_get_task_stack_sleepable_proto = { 587 .func = bpf_get_task_stack_sleepable, 588 .gpl_only = false, 589 .ret_type = RET_INTEGER, 590 .arg1_type = ARG_PTR_TO_BTF_ID, 591 .arg1_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK], 592 .arg2_type = ARG_PTR_TO_UNINIT_MEM, 593 .arg3_type = ARG_CONST_SIZE_OR_ZERO, 594 .arg4_type = ARG_ANYTHING, 595 }; 596 597 BPF_CALL_4(bpf_get_stack_pe, struct bpf_perf_event_data_kern *, ctx, 598 void *, buf, u32, size, u64, flags) 599 { 600 struct pt_regs *regs = (struct pt_regs *)(ctx->regs); 601 struct perf_event *event = ctx->event; 602 struct perf_callchain_entry *trace; 603 bool kernel, user; 604 int err = -EINVAL; 605 __u64 nr_kernel; 606 607 if (!(event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)) 608 return __bpf_get_stack(regs, NULL, NULL, buf, size, flags, false /* !may_fault */); 609 610 if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK | 611 BPF_F_USER_BUILD_ID))) 612 goto clear; 613 614 user = flags & BPF_F_USER_STACK; 615 kernel = !user; 616 617 err = -EFAULT; 618 trace = ctx->data->callchain; 619 if (unlikely(!trace)) 620 goto clear; 621 622 nr_kernel = count_kernel_ip(trace); 623 624 if (kernel) { 625 __u64 nr = trace->nr; 626 627 trace->nr = nr_kernel; 628 err = __bpf_get_stack(regs, NULL, trace, buf, size, flags, false /* !may_fault */); 629 630 /* restore nr */ 631 trace->nr = nr; 632 } else { /* user */ 633 u64 skip = flags & BPF_F_SKIP_FIELD_MASK; 634 635 skip += nr_kernel; 636 if (skip > BPF_F_SKIP_FIELD_MASK) 637 goto clear; 638 639 flags = (flags & ~BPF_F_SKIP_FIELD_MASK) | skip; 640 err = __bpf_get_stack(regs, NULL, trace, buf, size, flags, false /* !may_fault */); 641 } 642 return err; 643 644 clear: 645 memset(buf, 0, size); 646 return err; 647 648 } 649 650 const struct bpf_func_proto bpf_get_stack_proto_pe = { 651 .func = bpf_get_stack_pe, 652 .gpl_only = true, 653 .ret_type = RET_INTEGER, 654 .arg1_type = ARG_PTR_TO_CTX, 655 .arg2_type = ARG_PTR_TO_UNINIT_MEM, 656 .arg3_type = ARG_CONST_SIZE_OR_ZERO, 657 .arg4_type = ARG_ANYTHING, 658 }; 659 660 /* Called from eBPF program */ 661 static void *stack_map_lookup_elem(struct bpf_map *map, void *key) 662 { 663 return ERR_PTR(-EOPNOTSUPP); 664 } 665 666 /* Called from syscall */ 667 static int stack_map_lookup_and_delete_elem(struct bpf_map *map, void *key, 668 void *value, u64 flags) 669 { 670 return bpf_stackmap_extract(map, key, value, true); 671 } 672 673 /* Called from syscall */ 674 int bpf_stackmap_extract(struct bpf_map *map, void *key, void *value, 675 bool delete) 676 { 677 struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map); 678 struct stack_map_bucket *bucket, *old_bucket; 679 u32 id = *(u32 *)key, trace_len; 680 681 if (unlikely(id >= smap->n_buckets)) 682 return -ENOENT; 683 684 bucket = xchg(&smap->buckets[id], NULL); 685 if (!bucket) 686 return -ENOENT; 687 688 trace_len = bucket->nr * stack_map_data_size(map); 689 memcpy(value, bucket->data, trace_len); 690 memset(value + trace_len, 0, map->value_size - trace_len); 691 692 if (delete) 693 old_bucket = bucket; 694 else 695 old_bucket = xchg(&smap->buckets[id], bucket); 696 if (old_bucket) 697 pcpu_freelist_push(&smap->freelist, &old_bucket->fnode); 698 return 0; 699 } 700 701 static int stack_map_get_next_key(struct bpf_map *map, void *key, 702 void *next_key) 703 { 704 struct bpf_stack_map *smap = container_of(map, 705 struct bpf_stack_map, map); 706 u32 id; 707 708 WARN_ON_ONCE(!rcu_read_lock_held()); 709 710 if (!key) { 711 id = 0; 712 } else { 713 id = *(u32 *)key; 714 if (id >= smap->n_buckets || !smap->buckets[id]) 715 id = 0; 716 else 717 id++; 718 } 719 720 while (id < smap->n_buckets && !smap->buckets[id]) 721 id++; 722 723 if (id >= smap->n_buckets) 724 return -ENOENT; 725 726 *(u32 *)next_key = id; 727 return 0; 728 } 729 730 static long stack_map_update_elem(struct bpf_map *map, void *key, void *value, 731 u64 map_flags) 732 { 733 return -EINVAL; 734 } 735 736 /* Called from syscall or from eBPF program */ 737 static long stack_map_delete_elem(struct bpf_map *map, void *key) 738 { 739 struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map); 740 struct stack_map_bucket *old_bucket; 741 u32 id = *(u32 *)key; 742 743 if (unlikely(id >= smap->n_buckets)) 744 return -E2BIG; 745 746 old_bucket = xchg(&smap->buckets[id], NULL); 747 if (old_bucket) { 748 pcpu_freelist_push(&smap->freelist, &old_bucket->fnode); 749 return 0; 750 } else { 751 return -ENOENT; 752 } 753 } 754 755 /* Called when map->refcnt goes to zero, either from workqueue or from syscall */ 756 static void stack_map_free(struct bpf_map *map) 757 { 758 struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map); 759 760 bpf_map_area_free(smap->elems); 761 pcpu_freelist_destroy(&smap->freelist); 762 bpf_map_area_free(smap); 763 put_callchain_buffers(); 764 } 765 766 static u64 stack_map_mem_usage(const struct bpf_map *map) 767 { 768 struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map); 769 u64 value_size = map->value_size; 770 u64 n_buckets = smap->n_buckets; 771 u64 enties = map->max_entries; 772 u64 usage = sizeof(*smap); 773 774 usage += n_buckets * sizeof(struct stack_map_bucket *); 775 usage += enties * (sizeof(struct stack_map_bucket) + value_size); 776 return usage; 777 } 778 779 BTF_ID_LIST_SINGLE(stack_trace_map_btf_ids, struct, bpf_stack_map) 780 const struct bpf_map_ops stack_trace_map_ops = { 781 .map_meta_equal = bpf_map_meta_equal, 782 .map_alloc = stack_map_alloc, 783 .map_free = stack_map_free, 784 .map_get_next_key = stack_map_get_next_key, 785 .map_lookup_elem = stack_map_lookup_elem, 786 .map_lookup_and_delete_elem = stack_map_lookup_and_delete_elem, 787 .map_update_elem = stack_map_update_elem, 788 .map_delete_elem = stack_map_delete_elem, 789 .map_check_btf = map_check_no_btf, 790 .map_mem_usage = stack_map_mem_usage, 791 .map_btf_id = &stack_trace_map_btf_ids[0], 792 }; 793