1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright (c) 2011-2015 PLUMgrid, http://plumgrid.com 3 * Copyright (c) 2016 Facebook 4 */ 5 #include <linux/kernel.h> 6 #include <linux/types.h> 7 #include <linux/slab.h> 8 #include <linux/bpf.h> 9 #include <linux/bpf_perf_event.h> 10 #include <linux/btf.h> 11 #include <linux/filter.h> 12 #include <linux/uaccess.h> 13 #include <linux/ctype.h> 14 #include <linux/kprobes.h> 15 #include <linux/spinlock.h> 16 #include <linux/syscalls.h> 17 #include <linux/error-injection.h> 18 #include <linux/btf_ids.h> 19 #include <linux/bpf_lsm.h> 20 21 #include <net/bpf_sk_storage.h> 22 23 #include <uapi/linux/bpf.h> 24 #include <uapi/linux/btf.h> 25 26 #include <asm/tlb.h> 27 28 #include "trace_probe.h" 29 #include "trace.h" 30 31 #define CREATE_TRACE_POINTS 32 #include "bpf_trace.h" 33 34 #define bpf_event_rcu_dereference(p) \ 35 rcu_dereference_protected(p, lockdep_is_held(&bpf_event_mutex)) 36 37 #ifdef CONFIG_MODULES 38 struct bpf_trace_module { 39 struct module *module; 40 struct list_head list; 41 }; 42 43 static LIST_HEAD(bpf_trace_modules); 44 static DEFINE_MUTEX(bpf_module_mutex); 45 46 static struct bpf_raw_event_map *bpf_get_raw_tracepoint_module(const char *name) 47 { 48 struct bpf_raw_event_map *btp, *ret = NULL; 49 struct bpf_trace_module *btm; 50 unsigned int i; 51 52 mutex_lock(&bpf_module_mutex); 53 list_for_each_entry(btm, &bpf_trace_modules, list) { 54 for (i = 0; i < btm->module->num_bpf_raw_events; ++i) { 55 btp = &btm->module->bpf_raw_events[i]; 56 if (!strcmp(btp->tp->name, name)) { 57 if (try_module_get(btm->module)) 58 ret = btp; 59 goto out; 60 } 61 } 62 } 63 out: 64 mutex_unlock(&bpf_module_mutex); 65 return ret; 66 } 67 #else 68 static struct bpf_raw_event_map *bpf_get_raw_tracepoint_module(const char *name) 69 { 70 return NULL; 71 } 72 #endif /* CONFIG_MODULES */ 73 74 u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); 75 u64 bpf_get_stack(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); 76 77 static int bpf_btf_printf_prepare(struct btf_ptr *ptr, u32 btf_ptr_size, 78 u64 flags, const struct btf **btf, 79 s32 *btf_id); 80 81 /** 82 * trace_call_bpf - invoke BPF program 83 * @call: tracepoint event 84 * @ctx: opaque context pointer 85 * 86 * kprobe handlers execute BPF programs via this helper. 87 * Can be used from static tracepoints in the future. 88 * 89 * Return: BPF programs always return an integer which is interpreted by 90 * kprobe handler as: 91 * 0 - return from kprobe (event is filtered out) 92 * 1 - store kprobe event into ring buffer 93 * Other values are reserved and currently alias to 1 94 */ 95 unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx) 96 { 97 unsigned int ret; 98 99 cant_sleep(); 100 101 if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) { 102 /* 103 * since some bpf program is already running on this cpu, 104 * don't call into another bpf program (same or different) 105 * and don't send kprobe event into ring-buffer, 106 * so return zero here 107 */ 108 ret = 0; 109 goto out; 110 } 111 112 /* 113 * Instead of moving rcu_read_lock/rcu_dereference/rcu_read_unlock 114 * to all call sites, we did a bpf_prog_array_valid() there to check 115 * whether call->prog_array is empty or not, which is 116 * a heuristic to speed up execution. 117 * 118 * If bpf_prog_array_valid() fetched prog_array was 119 * non-NULL, we go into trace_call_bpf() and do the actual 120 * proper rcu_dereference() under RCU lock. 121 * If it turns out that prog_array is NULL then, we bail out. 122 * For the opposite, if the bpf_prog_array_valid() fetched pointer 123 * was NULL, you'll skip the prog_array with the risk of missing 124 * out of events when it was updated in between this and the 125 * rcu_dereference() which is accepted risk. 126 */ 127 ret = BPF_PROG_RUN_ARRAY(call->prog_array, ctx, bpf_prog_run); 128 129 out: 130 __this_cpu_dec(bpf_prog_active); 131 132 return ret; 133 } 134 135 #ifdef CONFIG_BPF_KPROBE_OVERRIDE 136 BPF_CALL_2(bpf_override_return, struct pt_regs *, regs, unsigned long, rc) 137 { 138 regs_set_return_value(regs, rc); 139 override_function_with_return(regs); 140 return 0; 141 } 142 143 static const struct bpf_func_proto bpf_override_return_proto = { 144 .func = bpf_override_return, 145 .gpl_only = true, 146 .ret_type = RET_INTEGER, 147 .arg1_type = ARG_PTR_TO_CTX, 148 .arg2_type = ARG_ANYTHING, 149 }; 150 #endif 151 152 static __always_inline int 153 bpf_probe_read_user_common(void *dst, u32 size, const void __user *unsafe_ptr) 154 { 155 int ret; 156 157 ret = copy_from_user_nofault(dst, unsafe_ptr, size); 158 if (unlikely(ret < 0)) 159 memset(dst, 0, size); 160 return ret; 161 } 162 163 BPF_CALL_3(bpf_probe_read_user, void *, dst, u32, size, 164 const void __user *, unsafe_ptr) 165 { 166 return bpf_probe_read_user_common(dst, size, unsafe_ptr); 167 } 168 169 const struct bpf_func_proto bpf_probe_read_user_proto = { 170 .func = bpf_probe_read_user, 171 .gpl_only = true, 172 .ret_type = RET_INTEGER, 173 .arg1_type = ARG_PTR_TO_UNINIT_MEM, 174 .arg2_type = ARG_CONST_SIZE_OR_ZERO, 175 .arg3_type = ARG_ANYTHING, 176 }; 177 178 static __always_inline int 179 bpf_probe_read_user_str_common(void *dst, u32 size, 180 const void __user *unsafe_ptr) 181 { 182 int ret; 183 184 /* 185 * NB: We rely on strncpy_from_user() not copying junk past the NUL 186 * terminator into `dst`. 187 * 188 * strncpy_from_user() does long-sized strides in the fast path. If the 189 * strncpy does not mask out the bytes after the NUL in `unsafe_ptr`, 190 * then there could be junk after the NUL in `dst`. If user takes `dst` 191 * and keys a hash map with it, then semantically identical strings can 192 * occupy multiple entries in the map. 193 */ 194 ret = strncpy_from_user_nofault(dst, unsafe_ptr, size); 195 if (unlikely(ret < 0)) 196 memset(dst, 0, size); 197 return ret; 198 } 199 200 BPF_CALL_3(bpf_probe_read_user_str, void *, dst, u32, size, 201 const void __user *, unsafe_ptr) 202 { 203 return bpf_probe_read_user_str_common(dst, size, unsafe_ptr); 204 } 205 206 const struct bpf_func_proto bpf_probe_read_user_str_proto = { 207 .func = bpf_probe_read_user_str, 208 .gpl_only = true, 209 .ret_type = RET_INTEGER, 210 .arg1_type = ARG_PTR_TO_UNINIT_MEM, 211 .arg2_type = ARG_CONST_SIZE_OR_ZERO, 212 .arg3_type = ARG_ANYTHING, 213 }; 214 215 static __always_inline int 216 bpf_probe_read_kernel_common(void *dst, u32 size, const void *unsafe_ptr) 217 { 218 int ret; 219 220 ret = copy_from_kernel_nofault(dst, unsafe_ptr, size); 221 if (unlikely(ret < 0)) 222 memset(dst, 0, size); 223 return ret; 224 } 225 226 BPF_CALL_3(bpf_probe_read_kernel, void *, dst, u32, size, 227 const void *, unsafe_ptr) 228 { 229 return bpf_probe_read_kernel_common(dst, size, unsafe_ptr); 230 } 231 232 const struct bpf_func_proto bpf_probe_read_kernel_proto = { 233 .func = bpf_probe_read_kernel, 234 .gpl_only = true, 235 .ret_type = RET_INTEGER, 236 .arg1_type = ARG_PTR_TO_UNINIT_MEM, 237 .arg2_type = ARG_CONST_SIZE_OR_ZERO, 238 .arg3_type = ARG_ANYTHING, 239 }; 240 241 static __always_inline int 242 bpf_probe_read_kernel_str_common(void *dst, u32 size, const void *unsafe_ptr) 243 { 244 int ret; 245 246 /* 247 * The strncpy_from_kernel_nofault() call will likely not fill the 248 * entire buffer, but that's okay in this circumstance as we're probing 249 * arbitrary memory anyway similar to bpf_probe_read_*() and might 250 * as well probe the stack. Thus, memory is explicitly cleared 251 * only in error case, so that improper users ignoring return 252 * code altogether don't copy garbage; otherwise length of string 253 * is returned that can be used for bpf_perf_event_output() et al. 254 */ 255 ret = strncpy_from_kernel_nofault(dst, unsafe_ptr, size); 256 if (unlikely(ret < 0)) 257 memset(dst, 0, size); 258 return ret; 259 } 260 261 BPF_CALL_3(bpf_probe_read_kernel_str, void *, dst, u32, size, 262 const void *, unsafe_ptr) 263 { 264 return bpf_probe_read_kernel_str_common(dst, size, unsafe_ptr); 265 } 266 267 const struct bpf_func_proto bpf_probe_read_kernel_str_proto = { 268 .func = bpf_probe_read_kernel_str, 269 .gpl_only = true, 270 .ret_type = RET_INTEGER, 271 .arg1_type = ARG_PTR_TO_UNINIT_MEM, 272 .arg2_type = ARG_CONST_SIZE_OR_ZERO, 273 .arg3_type = ARG_ANYTHING, 274 }; 275 276 #ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE 277 BPF_CALL_3(bpf_probe_read_compat, void *, dst, u32, size, 278 const void *, unsafe_ptr) 279 { 280 if ((unsigned long)unsafe_ptr < TASK_SIZE) { 281 return bpf_probe_read_user_common(dst, size, 282 (__force void __user *)unsafe_ptr); 283 } 284 return bpf_probe_read_kernel_common(dst, size, unsafe_ptr); 285 } 286 287 static const struct bpf_func_proto bpf_probe_read_compat_proto = { 288 .func = bpf_probe_read_compat, 289 .gpl_only = true, 290 .ret_type = RET_INTEGER, 291 .arg1_type = ARG_PTR_TO_UNINIT_MEM, 292 .arg2_type = ARG_CONST_SIZE_OR_ZERO, 293 .arg3_type = ARG_ANYTHING, 294 }; 295 296 BPF_CALL_3(bpf_probe_read_compat_str, void *, dst, u32, size, 297 const void *, unsafe_ptr) 298 { 299 if ((unsigned long)unsafe_ptr < TASK_SIZE) { 300 return bpf_probe_read_user_str_common(dst, size, 301 (__force void __user *)unsafe_ptr); 302 } 303 return bpf_probe_read_kernel_str_common(dst, size, unsafe_ptr); 304 } 305 306 static const struct bpf_func_proto bpf_probe_read_compat_str_proto = { 307 .func = bpf_probe_read_compat_str, 308 .gpl_only = true, 309 .ret_type = RET_INTEGER, 310 .arg1_type = ARG_PTR_TO_UNINIT_MEM, 311 .arg2_type = ARG_CONST_SIZE_OR_ZERO, 312 .arg3_type = ARG_ANYTHING, 313 }; 314 #endif /* CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE */ 315 316 BPF_CALL_3(bpf_probe_write_user, void __user *, unsafe_ptr, const void *, src, 317 u32, size) 318 { 319 /* 320 * Ensure we're in user context which is safe for the helper to 321 * run. This helper has no business in a kthread. 322 * 323 * access_ok() should prevent writing to non-user memory, but in 324 * some situations (nommu, temporary switch, etc) access_ok() does 325 * not provide enough validation, hence the check on KERNEL_DS. 326 * 327 * nmi_uaccess_okay() ensures the probe is not run in an interim 328 * state, when the task or mm are switched. This is specifically 329 * required to prevent the use of temporary mm. 330 */ 331 332 if (unlikely(in_interrupt() || 333 current->flags & (PF_KTHREAD | PF_EXITING))) 334 return -EPERM; 335 if (unlikely(!nmi_uaccess_okay())) 336 return -EPERM; 337 338 return copy_to_user_nofault(unsafe_ptr, src, size); 339 } 340 341 static const struct bpf_func_proto bpf_probe_write_user_proto = { 342 .func = bpf_probe_write_user, 343 .gpl_only = true, 344 .ret_type = RET_INTEGER, 345 .arg1_type = ARG_ANYTHING, 346 .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, 347 .arg3_type = ARG_CONST_SIZE, 348 }; 349 350 static const struct bpf_func_proto *bpf_get_probe_write_proto(void) 351 { 352 if (!capable(CAP_SYS_ADMIN)) 353 return NULL; 354 355 pr_warn_ratelimited("%s[%d] is installing a program with bpf_probe_write_user helper that may corrupt user memory!", 356 current->comm, task_pid_nr(current)); 357 358 return &bpf_probe_write_user_proto; 359 } 360 361 static DEFINE_RAW_SPINLOCK(trace_printk_lock); 362 363 #define MAX_TRACE_PRINTK_VARARGS 3 364 #define BPF_TRACE_PRINTK_SIZE 1024 365 366 BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1, 367 u64, arg2, u64, arg3) 368 { 369 u64 args[MAX_TRACE_PRINTK_VARARGS] = { arg1, arg2, arg3 }; 370 u32 *bin_args; 371 static char buf[BPF_TRACE_PRINTK_SIZE]; 372 unsigned long flags; 373 int ret; 374 375 ret = bpf_bprintf_prepare(fmt, fmt_size, args, &bin_args, 376 MAX_TRACE_PRINTK_VARARGS); 377 if (ret < 0) 378 return ret; 379 380 raw_spin_lock_irqsave(&trace_printk_lock, flags); 381 ret = bstr_printf(buf, sizeof(buf), fmt, bin_args); 382 383 trace_bpf_trace_printk(buf); 384 raw_spin_unlock_irqrestore(&trace_printk_lock, flags); 385 386 bpf_bprintf_cleanup(); 387 388 return ret; 389 } 390 391 static const struct bpf_func_proto bpf_trace_printk_proto = { 392 .func = bpf_trace_printk, 393 .gpl_only = true, 394 .ret_type = RET_INTEGER, 395 .arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY, 396 .arg2_type = ARG_CONST_SIZE, 397 }; 398 399 static void __set_printk_clr_event(void) 400 { 401 /* 402 * This program might be calling bpf_trace_printk, 403 * so enable the associated bpf_trace/bpf_trace_printk event. 404 * Repeat this each time as it is possible a user has 405 * disabled bpf_trace_printk events. By loading a program 406 * calling bpf_trace_printk() however the user has expressed 407 * the intent to see such events. 408 */ 409 if (trace_set_clr_event("bpf_trace", "bpf_trace_printk", 1)) 410 pr_warn_ratelimited("could not enable bpf_trace_printk events"); 411 } 412 413 const struct bpf_func_proto *bpf_get_trace_printk_proto(void) 414 { 415 __set_printk_clr_event(); 416 return &bpf_trace_printk_proto; 417 } 418 419 BPF_CALL_4(bpf_trace_vprintk, char *, fmt, u32, fmt_size, const void *, data, 420 u32, data_len) 421 { 422 static char buf[BPF_TRACE_PRINTK_SIZE]; 423 unsigned long flags; 424 int ret, num_args; 425 u32 *bin_args; 426 427 if (data_len & 7 || data_len > MAX_BPRINTF_VARARGS * 8 || 428 (data_len && !data)) 429 return -EINVAL; 430 num_args = data_len / 8; 431 432 ret = bpf_bprintf_prepare(fmt, fmt_size, data, &bin_args, num_args); 433 if (ret < 0) 434 return ret; 435 436 raw_spin_lock_irqsave(&trace_printk_lock, flags); 437 ret = bstr_printf(buf, sizeof(buf), fmt, bin_args); 438 439 trace_bpf_trace_printk(buf); 440 raw_spin_unlock_irqrestore(&trace_printk_lock, flags); 441 442 bpf_bprintf_cleanup(); 443 444 return ret; 445 } 446 447 static const struct bpf_func_proto bpf_trace_vprintk_proto = { 448 .func = bpf_trace_vprintk, 449 .gpl_only = true, 450 .ret_type = RET_INTEGER, 451 .arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY, 452 .arg2_type = ARG_CONST_SIZE, 453 .arg3_type = ARG_PTR_TO_MEM | PTR_MAYBE_NULL | MEM_RDONLY, 454 .arg4_type = ARG_CONST_SIZE_OR_ZERO, 455 }; 456 457 const struct bpf_func_proto *bpf_get_trace_vprintk_proto(void) 458 { 459 __set_printk_clr_event(); 460 return &bpf_trace_vprintk_proto; 461 } 462 463 BPF_CALL_5(bpf_seq_printf, struct seq_file *, m, char *, fmt, u32, fmt_size, 464 const void *, data, u32, data_len) 465 { 466 int err, num_args; 467 u32 *bin_args; 468 469 if (data_len & 7 || data_len > MAX_BPRINTF_VARARGS * 8 || 470 (data_len && !data)) 471 return -EINVAL; 472 num_args = data_len / 8; 473 474 err = bpf_bprintf_prepare(fmt, fmt_size, data, &bin_args, num_args); 475 if (err < 0) 476 return err; 477 478 seq_bprintf(m, fmt, bin_args); 479 480 bpf_bprintf_cleanup(); 481 482 return seq_has_overflowed(m) ? -EOVERFLOW : 0; 483 } 484 485 BTF_ID_LIST_SINGLE(btf_seq_file_ids, struct, seq_file) 486 487 static const struct bpf_func_proto bpf_seq_printf_proto = { 488 .func = bpf_seq_printf, 489 .gpl_only = true, 490 .ret_type = RET_INTEGER, 491 .arg1_type = ARG_PTR_TO_BTF_ID, 492 .arg1_btf_id = &btf_seq_file_ids[0], 493 .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, 494 .arg3_type = ARG_CONST_SIZE, 495 .arg4_type = ARG_PTR_TO_MEM | PTR_MAYBE_NULL | MEM_RDONLY, 496 .arg5_type = ARG_CONST_SIZE_OR_ZERO, 497 }; 498 499 BPF_CALL_3(bpf_seq_write, struct seq_file *, m, const void *, data, u32, len) 500 { 501 return seq_write(m, data, len) ? -EOVERFLOW : 0; 502 } 503 504 static const struct bpf_func_proto bpf_seq_write_proto = { 505 .func = bpf_seq_write, 506 .gpl_only = true, 507 .ret_type = RET_INTEGER, 508 .arg1_type = ARG_PTR_TO_BTF_ID, 509 .arg1_btf_id = &btf_seq_file_ids[0], 510 .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, 511 .arg3_type = ARG_CONST_SIZE_OR_ZERO, 512 }; 513 514 BPF_CALL_4(bpf_seq_printf_btf, struct seq_file *, m, struct btf_ptr *, ptr, 515 u32, btf_ptr_size, u64, flags) 516 { 517 const struct btf *btf; 518 s32 btf_id; 519 int ret; 520 521 ret = bpf_btf_printf_prepare(ptr, btf_ptr_size, flags, &btf, &btf_id); 522 if (ret) 523 return ret; 524 525 return btf_type_seq_show_flags(btf, btf_id, ptr->ptr, m, flags); 526 } 527 528 static const struct bpf_func_proto bpf_seq_printf_btf_proto = { 529 .func = bpf_seq_printf_btf, 530 .gpl_only = true, 531 .ret_type = RET_INTEGER, 532 .arg1_type = ARG_PTR_TO_BTF_ID, 533 .arg1_btf_id = &btf_seq_file_ids[0], 534 .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, 535 .arg3_type = ARG_CONST_SIZE_OR_ZERO, 536 .arg4_type = ARG_ANYTHING, 537 }; 538 539 static __always_inline int 540 get_map_perf_counter(struct bpf_map *map, u64 flags, 541 u64 *value, u64 *enabled, u64 *running) 542 { 543 struct bpf_array *array = container_of(map, struct bpf_array, map); 544 unsigned int cpu = smp_processor_id(); 545 u64 index = flags & BPF_F_INDEX_MASK; 546 struct bpf_event_entry *ee; 547 548 if (unlikely(flags & ~(BPF_F_INDEX_MASK))) 549 return -EINVAL; 550 if (index == BPF_F_CURRENT_CPU) 551 index = cpu; 552 if (unlikely(index >= array->map.max_entries)) 553 return -E2BIG; 554 555 ee = READ_ONCE(array->ptrs[index]); 556 if (!ee) 557 return -ENOENT; 558 559 return perf_event_read_local(ee->event, value, enabled, running); 560 } 561 562 BPF_CALL_2(bpf_perf_event_read, struct bpf_map *, map, u64, flags) 563 { 564 u64 value = 0; 565 int err; 566 567 err = get_map_perf_counter(map, flags, &value, NULL, NULL); 568 /* 569 * this api is ugly since we miss [-22..-2] range of valid 570 * counter values, but that's uapi 571 */ 572 if (err) 573 return err; 574 return value; 575 } 576 577 static const struct bpf_func_proto bpf_perf_event_read_proto = { 578 .func = bpf_perf_event_read, 579 .gpl_only = true, 580 .ret_type = RET_INTEGER, 581 .arg1_type = ARG_CONST_MAP_PTR, 582 .arg2_type = ARG_ANYTHING, 583 }; 584 585 BPF_CALL_4(bpf_perf_event_read_value, struct bpf_map *, map, u64, flags, 586 struct bpf_perf_event_value *, buf, u32, size) 587 { 588 int err = -EINVAL; 589 590 if (unlikely(size != sizeof(struct bpf_perf_event_value))) 591 goto clear; 592 err = get_map_perf_counter(map, flags, &buf->counter, &buf->enabled, 593 &buf->running); 594 if (unlikely(err)) 595 goto clear; 596 return 0; 597 clear: 598 memset(buf, 0, size); 599 return err; 600 } 601 602 static const struct bpf_func_proto bpf_perf_event_read_value_proto = { 603 .func = bpf_perf_event_read_value, 604 .gpl_only = true, 605 .ret_type = RET_INTEGER, 606 .arg1_type = ARG_CONST_MAP_PTR, 607 .arg2_type = ARG_ANYTHING, 608 .arg3_type = ARG_PTR_TO_UNINIT_MEM, 609 .arg4_type = ARG_CONST_SIZE, 610 }; 611 612 static __always_inline u64 613 __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map, 614 u64 flags, struct perf_sample_data *sd) 615 { 616 struct bpf_array *array = container_of(map, struct bpf_array, map); 617 unsigned int cpu = smp_processor_id(); 618 u64 index = flags & BPF_F_INDEX_MASK; 619 struct bpf_event_entry *ee; 620 struct perf_event *event; 621 622 if (index == BPF_F_CURRENT_CPU) 623 index = cpu; 624 if (unlikely(index >= array->map.max_entries)) 625 return -E2BIG; 626 627 ee = READ_ONCE(array->ptrs[index]); 628 if (!ee) 629 return -ENOENT; 630 631 event = ee->event; 632 if (unlikely(event->attr.type != PERF_TYPE_SOFTWARE || 633 event->attr.config != PERF_COUNT_SW_BPF_OUTPUT)) 634 return -EINVAL; 635 636 if (unlikely(event->oncpu != cpu)) 637 return -EOPNOTSUPP; 638 639 return perf_event_output(event, sd, regs); 640 } 641 642 /* 643 * Support executing tracepoints in normal, irq, and nmi context that each call 644 * bpf_perf_event_output 645 */ 646 struct bpf_trace_sample_data { 647 struct perf_sample_data sds[3]; 648 }; 649 650 static DEFINE_PER_CPU(struct bpf_trace_sample_data, bpf_trace_sds); 651 static DEFINE_PER_CPU(int, bpf_trace_nest_level); 652 BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map, 653 u64, flags, void *, data, u64, size) 654 { 655 struct bpf_trace_sample_data *sds = this_cpu_ptr(&bpf_trace_sds); 656 int nest_level = this_cpu_inc_return(bpf_trace_nest_level); 657 struct perf_raw_record raw = { 658 .frag = { 659 .size = size, 660 .data = data, 661 }, 662 }; 663 struct perf_sample_data *sd; 664 int err; 665 666 if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(sds->sds))) { 667 err = -EBUSY; 668 goto out; 669 } 670 671 sd = &sds->sds[nest_level - 1]; 672 673 if (unlikely(flags & ~(BPF_F_INDEX_MASK))) { 674 err = -EINVAL; 675 goto out; 676 } 677 678 perf_sample_data_init(sd, 0, 0); 679 sd->raw = &raw; 680 681 err = __bpf_perf_event_output(regs, map, flags, sd); 682 683 out: 684 this_cpu_dec(bpf_trace_nest_level); 685 return err; 686 } 687 688 static const struct bpf_func_proto bpf_perf_event_output_proto = { 689 .func = bpf_perf_event_output, 690 .gpl_only = true, 691 .ret_type = RET_INTEGER, 692 .arg1_type = ARG_PTR_TO_CTX, 693 .arg2_type = ARG_CONST_MAP_PTR, 694 .arg3_type = ARG_ANYTHING, 695 .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY, 696 .arg5_type = ARG_CONST_SIZE_OR_ZERO, 697 }; 698 699 static DEFINE_PER_CPU(int, bpf_event_output_nest_level); 700 struct bpf_nested_pt_regs { 701 struct pt_regs regs[3]; 702 }; 703 static DEFINE_PER_CPU(struct bpf_nested_pt_regs, bpf_pt_regs); 704 static DEFINE_PER_CPU(struct bpf_trace_sample_data, bpf_misc_sds); 705 706 u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, 707 void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy) 708 { 709 int nest_level = this_cpu_inc_return(bpf_event_output_nest_level); 710 struct perf_raw_frag frag = { 711 .copy = ctx_copy, 712 .size = ctx_size, 713 .data = ctx, 714 }; 715 struct perf_raw_record raw = { 716 .frag = { 717 { 718 .next = ctx_size ? &frag : NULL, 719 }, 720 .size = meta_size, 721 .data = meta, 722 }, 723 }; 724 struct perf_sample_data *sd; 725 struct pt_regs *regs; 726 u64 ret; 727 728 if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(bpf_misc_sds.sds))) { 729 ret = -EBUSY; 730 goto out; 731 } 732 sd = this_cpu_ptr(&bpf_misc_sds.sds[nest_level - 1]); 733 regs = this_cpu_ptr(&bpf_pt_regs.regs[nest_level - 1]); 734 735 perf_fetch_caller_regs(regs); 736 perf_sample_data_init(sd, 0, 0); 737 sd->raw = &raw; 738 739 ret = __bpf_perf_event_output(regs, map, flags, sd); 740 out: 741 this_cpu_dec(bpf_event_output_nest_level); 742 return ret; 743 } 744 745 BPF_CALL_0(bpf_get_current_task) 746 { 747 return (long) current; 748 } 749 750 const struct bpf_func_proto bpf_get_current_task_proto = { 751 .func = bpf_get_current_task, 752 .gpl_only = true, 753 .ret_type = RET_INTEGER, 754 }; 755 756 BPF_CALL_0(bpf_get_current_task_btf) 757 { 758 return (unsigned long) current; 759 } 760 761 const struct bpf_func_proto bpf_get_current_task_btf_proto = { 762 .func = bpf_get_current_task_btf, 763 .gpl_only = true, 764 .ret_type = RET_PTR_TO_BTF_ID, 765 .ret_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK], 766 }; 767 768 BPF_CALL_1(bpf_task_pt_regs, struct task_struct *, task) 769 { 770 return (unsigned long) task_pt_regs(task); 771 } 772 773 BTF_ID_LIST(bpf_task_pt_regs_ids) 774 BTF_ID(struct, pt_regs) 775 776 const struct bpf_func_proto bpf_task_pt_regs_proto = { 777 .func = bpf_task_pt_regs, 778 .gpl_only = true, 779 .arg1_type = ARG_PTR_TO_BTF_ID, 780 .arg1_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK], 781 .ret_type = RET_PTR_TO_BTF_ID, 782 .ret_btf_id = &bpf_task_pt_regs_ids[0], 783 }; 784 785 BPF_CALL_2(bpf_current_task_under_cgroup, struct bpf_map *, map, u32, idx) 786 { 787 struct bpf_array *array = container_of(map, struct bpf_array, map); 788 struct cgroup *cgrp; 789 790 if (unlikely(idx >= array->map.max_entries)) 791 return -E2BIG; 792 793 cgrp = READ_ONCE(array->ptrs[idx]); 794 if (unlikely(!cgrp)) 795 return -EAGAIN; 796 797 return task_under_cgroup_hierarchy(current, cgrp); 798 } 799 800 static const struct bpf_func_proto bpf_current_task_under_cgroup_proto = { 801 .func = bpf_current_task_under_cgroup, 802 .gpl_only = false, 803 .ret_type = RET_INTEGER, 804 .arg1_type = ARG_CONST_MAP_PTR, 805 .arg2_type = ARG_ANYTHING, 806 }; 807 808 struct send_signal_irq_work { 809 struct irq_work irq_work; 810 struct task_struct *task; 811 u32 sig; 812 enum pid_type type; 813 }; 814 815 static DEFINE_PER_CPU(struct send_signal_irq_work, send_signal_work); 816 817 static void do_bpf_send_signal(struct irq_work *entry) 818 { 819 struct send_signal_irq_work *work; 820 821 work = container_of(entry, struct send_signal_irq_work, irq_work); 822 group_send_sig_info(work->sig, SEND_SIG_PRIV, work->task, work->type); 823 } 824 825 static int bpf_send_signal_common(u32 sig, enum pid_type type) 826 { 827 struct send_signal_irq_work *work = NULL; 828 829 /* Similar to bpf_probe_write_user, task needs to be 830 * in a sound condition and kernel memory access be 831 * permitted in order to send signal to the current 832 * task. 833 */ 834 if (unlikely(current->flags & (PF_KTHREAD | PF_EXITING))) 835 return -EPERM; 836 if (unlikely(!nmi_uaccess_okay())) 837 return -EPERM; 838 839 if (irqs_disabled()) { 840 /* Do an early check on signal validity. Otherwise, 841 * the error is lost in deferred irq_work. 842 */ 843 if (unlikely(!valid_signal(sig))) 844 return -EINVAL; 845 846 work = this_cpu_ptr(&send_signal_work); 847 if (irq_work_is_busy(&work->irq_work)) 848 return -EBUSY; 849 850 /* Add the current task, which is the target of sending signal, 851 * to the irq_work. The current task may change when queued 852 * irq works get executed. 853 */ 854 work->task = current; 855 work->sig = sig; 856 work->type = type; 857 irq_work_queue(&work->irq_work); 858 return 0; 859 } 860 861 return group_send_sig_info(sig, SEND_SIG_PRIV, current, type); 862 } 863 864 BPF_CALL_1(bpf_send_signal, u32, sig) 865 { 866 return bpf_send_signal_common(sig, PIDTYPE_TGID); 867 } 868 869 static const struct bpf_func_proto bpf_send_signal_proto = { 870 .func = bpf_send_signal, 871 .gpl_only = false, 872 .ret_type = RET_INTEGER, 873 .arg1_type = ARG_ANYTHING, 874 }; 875 876 BPF_CALL_1(bpf_send_signal_thread, u32, sig) 877 { 878 return bpf_send_signal_common(sig, PIDTYPE_PID); 879 } 880 881 static const struct bpf_func_proto bpf_send_signal_thread_proto = { 882 .func = bpf_send_signal_thread, 883 .gpl_only = false, 884 .ret_type = RET_INTEGER, 885 .arg1_type = ARG_ANYTHING, 886 }; 887 888 BPF_CALL_3(bpf_d_path, struct path *, path, char *, buf, u32, sz) 889 { 890 long len; 891 char *p; 892 893 if (!sz) 894 return 0; 895 896 p = d_path(path, buf, sz); 897 if (IS_ERR(p)) { 898 len = PTR_ERR(p); 899 } else { 900 len = buf + sz - p; 901 memmove(buf, p, len); 902 } 903 904 return len; 905 } 906 907 BTF_SET_START(btf_allowlist_d_path) 908 #ifdef CONFIG_SECURITY 909 BTF_ID(func, security_file_permission) 910 BTF_ID(func, security_inode_getattr) 911 BTF_ID(func, security_file_open) 912 #endif 913 #ifdef CONFIG_SECURITY_PATH 914 BTF_ID(func, security_path_truncate) 915 #endif 916 BTF_ID(func, vfs_truncate) 917 BTF_ID(func, vfs_fallocate) 918 BTF_ID(func, dentry_open) 919 BTF_ID(func, vfs_getattr) 920 BTF_ID(func, filp_close) 921 BTF_SET_END(btf_allowlist_d_path) 922 923 static bool bpf_d_path_allowed(const struct bpf_prog *prog) 924 { 925 if (prog->type == BPF_PROG_TYPE_TRACING && 926 prog->expected_attach_type == BPF_TRACE_ITER) 927 return true; 928 929 if (prog->type == BPF_PROG_TYPE_LSM) 930 return bpf_lsm_is_sleepable_hook(prog->aux->attach_btf_id); 931 932 return btf_id_set_contains(&btf_allowlist_d_path, 933 prog->aux->attach_btf_id); 934 } 935 936 BTF_ID_LIST_SINGLE(bpf_d_path_btf_ids, struct, path) 937 938 static const struct bpf_func_proto bpf_d_path_proto = { 939 .func = bpf_d_path, 940 .gpl_only = false, 941 .ret_type = RET_INTEGER, 942 .arg1_type = ARG_PTR_TO_BTF_ID, 943 .arg1_btf_id = &bpf_d_path_btf_ids[0], 944 .arg2_type = ARG_PTR_TO_MEM, 945 .arg3_type = ARG_CONST_SIZE_OR_ZERO, 946 .allowed = bpf_d_path_allowed, 947 }; 948 949 #define BTF_F_ALL (BTF_F_COMPACT | BTF_F_NONAME | \ 950 BTF_F_PTR_RAW | BTF_F_ZERO) 951 952 static int bpf_btf_printf_prepare(struct btf_ptr *ptr, u32 btf_ptr_size, 953 u64 flags, const struct btf **btf, 954 s32 *btf_id) 955 { 956 const struct btf_type *t; 957 958 if (unlikely(flags & ~(BTF_F_ALL))) 959 return -EINVAL; 960 961 if (btf_ptr_size != sizeof(struct btf_ptr)) 962 return -EINVAL; 963 964 *btf = bpf_get_btf_vmlinux(); 965 966 if (IS_ERR_OR_NULL(*btf)) 967 return IS_ERR(*btf) ? PTR_ERR(*btf) : -EINVAL; 968 969 if (ptr->type_id > 0) 970 *btf_id = ptr->type_id; 971 else 972 return -EINVAL; 973 974 if (*btf_id > 0) 975 t = btf_type_by_id(*btf, *btf_id); 976 if (*btf_id <= 0 || !t) 977 return -ENOENT; 978 979 return 0; 980 } 981 982 BPF_CALL_5(bpf_snprintf_btf, char *, str, u32, str_size, struct btf_ptr *, ptr, 983 u32, btf_ptr_size, u64, flags) 984 { 985 const struct btf *btf; 986 s32 btf_id; 987 int ret; 988 989 ret = bpf_btf_printf_prepare(ptr, btf_ptr_size, flags, &btf, &btf_id); 990 if (ret) 991 return ret; 992 993 return btf_type_snprintf_show(btf, btf_id, ptr->ptr, str, str_size, 994 flags); 995 } 996 997 const struct bpf_func_proto bpf_snprintf_btf_proto = { 998 .func = bpf_snprintf_btf, 999 .gpl_only = false, 1000 .ret_type = RET_INTEGER, 1001 .arg1_type = ARG_PTR_TO_MEM, 1002 .arg2_type = ARG_CONST_SIZE, 1003 .arg3_type = ARG_PTR_TO_MEM | MEM_RDONLY, 1004 .arg4_type = ARG_CONST_SIZE, 1005 .arg5_type = ARG_ANYTHING, 1006 }; 1007 1008 BPF_CALL_1(bpf_get_func_ip_tracing, void *, ctx) 1009 { 1010 /* This helper call is inlined by verifier. */ 1011 return ((u64 *)ctx)[-2]; 1012 } 1013 1014 static const struct bpf_func_proto bpf_get_func_ip_proto_tracing = { 1015 .func = bpf_get_func_ip_tracing, 1016 .gpl_only = true, 1017 .ret_type = RET_INTEGER, 1018 .arg1_type = ARG_PTR_TO_CTX, 1019 }; 1020 1021 BPF_CALL_1(bpf_get_func_ip_kprobe, struct pt_regs *, regs) 1022 { 1023 struct kprobe *kp = kprobe_running(); 1024 1025 return kp ? (uintptr_t)kp->addr : 0; 1026 } 1027 1028 static const struct bpf_func_proto bpf_get_func_ip_proto_kprobe = { 1029 .func = bpf_get_func_ip_kprobe, 1030 .gpl_only = true, 1031 .ret_type = RET_INTEGER, 1032 .arg1_type = ARG_PTR_TO_CTX, 1033 }; 1034 1035 BPF_CALL_1(bpf_get_attach_cookie_trace, void *, ctx) 1036 { 1037 struct bpf_trace_run_ctx *run_ctx; 1038 1039 run_ctx = container_of(current->bpf_ctx, struct bpf_trace_run_ctx, run_ctx); 1040 return run_ctx->bpf_cookie; 1041 } 1042 1043 static const struct bpf_func_proto bpf_get_attach_cookie_proto_trace = { 1044 .func = bpf_get_attach_cookie_trace, 1045 .gpl_only = false, 1046 .ret_type = RET_INTEGER, 1047 .arg1_type = ARG_PTR_TO_CTX, 1048 }; 1049 1050 BPF_CALL_1(bpf_get_attach_cookie_pe, struct bpf_perf_event_data_kern *, ctx) 1051 { 1052 return ctx->event->bpf_cookie; 1053 } 1054 1055 static const struct bpf_func_proto bpf_get_attach_cookie_proto_pe = { 1056 .func = bpf_get_attach_cookie_pe, 1057 .gpl_only = false, 1058 .ret_type = RET_INTEGER, 1059 .arg1_type = ARG_PTR_TO_CTX, 1060 }; 1061 1062 BPF_CALL_3(bpf_get_branch_snapshot, void *, buf, u32, size, u64, flags) 1063 { 1064 #ifndef CONFIG_X86 1065 return -ENOENT; 1066 #else 1067 static const u32 br_entry_size = sizeof(struct perf_branch_entry); 1068 u32 entry_cnt = size / br_entry_size; 1069 1070 entry_cnt = static_call(perf_snapshot_branch_stack)(buf, entry_cnt); 1071 1072 if (unlikely(flags)) 1073 return -EINVAL; 1074 1075 if (!entry_cnt) 1076 return -ENOENT; 1077 1078 return entry_cnt * br_entry_size; 1079 #endif 1080 } 1081 1082 static const struct bpf_func_proto bpf_get_branch_snapshot_proto = { 1083 .func = bpf_get_branch_snapshot, 1084 .gpl_only = true, 1085 .ret_type = RET_INTEGER, 1086 .arg1_type = ARG_PTR_TO_UNINIT_MEM, 1087 .arg2_type = ARG_CONST_SIZE_OR_ZERO, 1088 }; 1089 1090 BPF_CALL_3(get_func_arg, void *, ctx, u32, n, u64 *, value) 1091 { 1092 /* This helper call is inlined by verifier. */ 1093 u64 nr_args = ((u64 *)ctx)[-1]; 1094 1095 if ((u64) n >= nr_args) 1096 return -EINVAL; 1097 *value = ((u64 *)ctx)[n]; 1098 return 0; 1099 } 1100 1101 static const struct bpf_func_proto bpf_get_func_arg_proto = { 1102 .func = get_func_arg, 1103 .ret_type = RET_INTEGER, 1104 .arg1_type = ARG_PTR_TO_CTX, 1105 .arg2_type = ARG_ANYTHING, 1106 .arg3_type = ARG_PTR_TO_LONG, 1107 }; 1108 1109 BPF_CALL_2(get_func_ret, void *, ctx, u64 *, value) 1110 { 1111 /* This helper call is inlined by verifier. */ 1112 u64 nr_args = ((u64 *)ctx)[-1]; 1113 1114 *value = ((u64 *)ctx)[nr_args]; 1115 return 0; 1116 } 1117 1118 static const struct bpf_func_proto bpf_get_func_ret_proto = { 1119 .func = get_func_ret, 1120 .ret_type = RET_INTEGER, 1121 .arg1_type = ARG_PTR_TO_CTX, 1122 .arg2_type = ARG_PTR_TO_LONG, 1123 }; 1124 1125 BPF_CALL_1(get_func_arg_cnt, void *, ctx) 1126 { 1127 /* This helper call is inlined by verifier. */ 1128 return ((u64 *)ctx)[-1]; 1129 } 1130 1131 static const struct bpf_func_proto bpf_get_func_arg_cnt_proto = { 1132 .func = get_func_arg_cnt, 1133 .ret_type = RET_INTEGER, 1134 .arg1_type = ARG_PTR_TO_CTX, 1135 }; 1136 1137 static const struct bpf_func_proto * 1138 bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 1139 { 1140 switch (func_id) { 1141 case BPF_FUNC_map_lookup_elem: 1142 return &bpf_map_lookup_elem_proto; 1143 case BPF_FUNC_map_update_elem: 1144 return &bpf_map_update_elem_proto; 1145 case BPF_FUNC_map_delete_elem: 1146 return &bpf_map_delete_elem_proto; 1147 case BPF_FUNC_map_push_elem: 1148 return &bpf_map_push_elem_proto; 1149 case BPF_FUNC_map_pop_elem: 1150 return &bpf_map_pop_elem_proto; 1151 case BPF_FUNC_map_peek_elem: 1152 return &bpf_map_peek_elem_proto; 1153 case BPF_FUNC_ktime_get_ns: 1154 return &bpf_ktime_get_ns_proto; 1155 case BPF_FUNC_ktime_get_boot_ns: 1156 return &bpf_ktime_get_boot_ns_proto; 1157 case BPF_FUNC_tail_call: 1158 return &bpf_tail_call_proto; 1159 case BPF_FUNC_get_current_pid_tgid: 1160 return &bpf_get_current_pid_tgid_proto; 1161 case BPF_FUNC_get_current_task: 1162 return &bpf_get_current_task_proto; 1163 case BPF_FUNC_get_current_task_btf: 1164 return &bpf_get_current_task_btf_proto; 1165 case BPF_FUNC_task_pt_regs: 1166 return &bpf_task_pt_regs_proto; 1167 case BPF_FUNC_get_current_uid_gid: 1168 return &bpf_get_current_uid_gid_proto; 1169 case BPF_FUNC_get_current_comm: 1170 return &bpf_get_current_comm_proto; 1171 case BPF_FUNC_trace_printk: 1172 return bpf_get_trace_printk_proto(); 1173 case BPF_FUNC_get_smp_processor_id: 1174 return &bpf_get_smp_processor_id_proto; 1175 case BPF_FUNC_get_numa_node_id: 1176 return &bpf_get_numa_node_id_proto; 1177 case BPF_FUNC_perf_event_read: 1178 return &bpf_perf_event_read_proto; 1179 case BPF_FUNC_current_task_under_cgroup: 1180 return &bpf_current_task_under_cgroup_proto; 1181 case BPF_FUNC_get_prandom_u32: 1182 return &bpf_get_prandom_u32_proto; 1183 case BPF_FUNC_probe_write_user: 1184 return security_locked_down(LOCKDOWN_BPF_WRITE_USER) < 0 ? 1185 NULL : bpf_get_probe_write_proto(); 1186 case BPF_FUNC_probe_read_user: 1187 return &bpf_probe_read_user_proto; 1188 case BPF_FUNC_probe_read_kernel: 1189 return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ? 1190 NULL : &bpf_probe_read_kernel_proto; 1191 case BPF_FUNC_probe_read_user_str: 1192 return &bpf_probe_read_user_str_proto; 1193 case BPF_FUNC_probe_read_kernel_str: 1194 return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ? 1195 NULL : &bpf_probe_read_kernel_str_proto; 1196 #ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE 1197 case BPF_FUNC_probe_read: 1198 return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ? 1199 NULL : &bpf_probe_read_compat_proto; 1200 case BPF_FUNC_probe_read_str: 1201 return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ? 1202 NULL : &bpf_probe_read_compat_str_proto; 1203 #endif 1204 #ifdef CONFIG_CGROUPS 1205 case BPF_FUNC_get_current_cgroup_id: 1206 return &bpf_get_current_cgroup_id_proto; 1207 case BPF_FUNC_get_current_ancestor_cgroup_id: 1208 return &bpf_get_current_ancestor_cgroup_id_proto; 1209 #endif 1210 case BPF_FUNC_send_signal: 1211 return &bpf_send_signal_proto; 1212 case BPF_FUNC_send_signal_thread: 1213 return &bpf_send_signal_thread_proto; 1214 case BPF_FUNC_perf_event_read_value: 1215 return &bpf_perf_event_read_value_proto; 1216 case BPF_FUNC_get_ns_current_pid_tgid: 1217 return &bpf_get_ns_current_pid_tgid_proto; 1218 case BPF_FUNC_ringbuf_output: 1219 return &bpf_ringbuf_output_proto; 1220 case BPF_FUNC_ringbuf_reserve: 1221 return &bpf_ringbuf_reserve_proto; 1222 case BPF_FUNC_ringbuf_submit: 1223 return &bpf_ringbuf_submit_proto; 1224 case BPF_FUNC_ringbuf_discard: 1225 return &bpf_ringbuf_discard_proto; 1226 case BPF_FUNC_ringbuf_query: 1227 return &bpf_ringbuf_query_proto; 1228 case BPF_FUNC_jiffies64: 1229 return &bpf_jiffies64_proto; 1230 case BPF_FUNC_get_task_stack: 1231 return &bpf_get_task_stack_proto; 1232 case BPF_FUNC_copy_from_user: 1233 return prog->aux->sleepable ? &bpf_copy_from_user_proto : NULL; 1234 case BPF_FUNC_snprintf_btf: 1235 return &bpf_snprintf_btf_proto; 1236 case BPF_FUNC_per_cpu_ptr: 1237 return &bpf_per_cpu_ptr_proto; 1238 case BPF_FUNC_this_cpu_ptr: 1239 return &bpf_this_cpu_ptr_proto; 1240 case BPF_FUNC_task_storage_get: 1241 return &bpf_task_storage_get_proto; 1242 case BPF_FUNC_task_storage_delete: 1243 return &bpf_task_storage_delete_proto; 1244 case BPF_FUNC_for_each_map_elem: 1245 return &bpf_for_each_map_elem_proto; 1246 case BPF_FUNC_snprintf: 1247 return &bpf_snprintf_proto; 1248 case BPF_FUNC_get_func_ip: 1249 return &bpf_get_func_ip_proto_tracing; 1250 case BPF_FUNC_get_branch_snapshot: 1251 return &bpf_get_branch_snapshot_proto; 1252 case BPF_FUNC_find_vma: 1253 return &bpf_find_vma_proto; 1254 case BPF_FUNC_trace_vprintk: 1255 return bpf_get_trace_vprintk_proto(); 1256 default: 1257 return bpf_base_func_proto(func_id); 1258 } 1259 } 1260 1261 static const struct bpf_func_proto * 1262 kprobe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 1263 { 1264 switch (func_id) { 1265 case BPF_FUNC_perf_event_output: 1266 return &bpf_perf_event_output_proto; 1267 case BPF_FUNC_get_stackid: 1268 return &bpf_get_stackid_proto; 1269 case BPF_FUNC_get_stack: 1270 return &bpf_get_stack_proto; 1271 #ifdef CONFIG_BPF_KPROBE_OVERRIDE 1272 case BPF_FUNC_override_return: 1273 return &bpf_override_return_proto; 1274 #endif 1275 case BPF_FUNC_get_func_ip: 1276 return &bpf_get_func_ip_proto_kprobe; 1277 case BPF_FUNC_get_attach_cookie: 1278 return &bpf_get_attach_cookie_proto_trace; 1279 default: 1280 return bpf_tracing_func_proto(func_id, prog); 1281 } 1282 } 1283 1284 /* bpf+kprobe programs can access fields of 'struct pt_regs' */ 1285 static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type type, 1286 const struct bpf_prog *prog, 1287 struct bpf_insn_access_aux *info) 1288 { 1289 if (off < 0 || off >= sizeof(struct pt_regs)) 1290 return false; 1291 if (type != BPF_READ) 1292 return false; 1293 if (off % size != 0) 1294 return false; 1295 /* 1296 * Assertion for 32 bit to make sure last 8 byte access 1297 * (BPF_DW) to the last 4 byte member is disallowed. 1298 */ 1299 if (off + size > sizeof(struct pt_regs)) 1300 return false; 1301 1302 return true; 1303 } 1304 1305 const struct bpf_verifier_ops kprobe_verifier_ops = { 1306 .get_func_proto = kprobe_prog_func_proto, 1307 .is_valid_access = kprobe_prog_is_valid_access, 1308 }; 1309 1310 const struct bpf_prog_ops kprobe_prog_ops = { 1311 }; 1312 1313 BPF_CALL_5(bpf_perf_event_output_tp, void *, tp_buff, struct bpf_map *, map, 1314 u64, flags, void *, data, u64, size) 1315 { 1316 struct pt_regs *regs = *(struct pt_regs **)tp_buff; 1317 1318 /* 1319 * r1 points to perf tracepoint buffer where first 8 bytes are hidden 1320 * from bpf program and contain a pointer to 'struct pt_regs'. Fetch it 1321 * from there and call the same bpf_perf_event_output() helper inline. 1322 */ 1323 return ____bpf_perf_event_output(regs, map, flags, data, size); 1324 } 1325 1326 static const struct bpf_func_proto bpf_perf_event_output_proto_tp = { 1327 .func = bpf_perf_event_output_tp, 1328 .gpl_only = true, 1329 .ret_type = RET_INTEGER, 1330 .arg1_type = ARG_PTR_TO_CTX, 1331 .arg2_type = ARG_CONST_MAP_PTR, 1332 .arg3_type = ARG_ANYTHING, 1333 .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY, 1334 .arg5_type = ARG_CONST_SIZE_OR_ZERO, 1335 }; 1336 1337 BPF_CALL_3(bpf_get_stackid_tp, void *, tp_buff, struct bpf_map *, map, 1338 u64, flags) 1339 { 1340 struct pt_regs *regs = *(struct pt_regs **)tp_buff; 1341 1342 /* 1343 * Same comment as in bpf_perf_event_output_tp(), only that this time 1344 * the other helper's function body cannot be inlined due to being 1345 * external, thus we need to call raw helper function. 1346 */ 1347 return bpf_get_stackid((unsigned long) regs, (unsigned long) map, 1348 flags, 0, 0); 1349 } 1350 1351 static const struct bpf_func_proto bpf_get_stackid_proto_tp = { 1352 .func = bpf_get_stackid_tp, 1353 .gpl_only = true, 1354 .ret_type = RET_INTEGER, 1355 .arg1_type = ARG_PTR_TO_CTX, 1356 .arg2_type = ARG_CONST_MAP_PTR, 1357 .arg3_type = ARG_ANYTHING, 1358 }; 1359 1360 BPF_CALL_4(bpf_get_stack_tp, void *, tp_buff, void *, buf, u32, size, 1361 u64, flags) 1362 { 1363 struct pt_regs *regs = *(struct pt_regs **)tp_buff; 1364 1365 return bpf_get_stack((unsigned long) regs, (unsigned long) buf, 1366 (unsigned long) size, flags, 0); 1367 } 1368 1369 static const struct bpf_func_proto bpf_get_stack_proto_tp = { 1370 .func = bpf_get_stack_tp, 1371 .gpl_only = true, 1372 .ret_type = RET_INTEGER, 1373 .arg1_type = ARG_PTR_TO_CTX, 1374 .arg2_type = ARG_PTR_TO_UNINIT_MEM, 1375 .arg3_type = ARG_CONST_SIZE_OR_ZERO, 1376 .arg4_type = ARG_ANYTHING, 1377 }; 1378 1379 static const struct bpf_func_proto * 1380 tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 1381 { 1382 switch (func_id) { 1383 case BPF_FUNC_perf_event_output: 1384 return &bpf_perf_event_output_proto_tp; 1385 case BPF_FUNC_get_stackid: 1386 return &bpf_get_stackid_proto_tp; 1387 case BPF_FUNC_get_stack: 1388 return &bpf_get_stack_proto_tp; 1389 case BPF_FUNC_get_attach_cookie: 1390 return &bpf_get_attach_cookie_proto_trace; 1391 default: 1392 return bpf_tracing_func_proto(func_id, prog); 1393 } 1394 } 1395 1396 static bool tp_prog_is_valid_access(int off, int size, enum bpf_access_type type, 1397 const struct bpf_prog *prog, 1398 struct bpf_insn_access_aux *info) 1399 { 1400 if (off < sizeof(void *) || off >= PERF_MAX_TRACE_SIZE) 1401 return false; 1402 if (type != BPF_READ) 1403 return false; 1404 if (off % size != 0) 1405 return false; 1406 1407 BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(__u64)); 1408 return true; 1409 } 1410 1411 const struct bpf_verifier_ops tracepoint_verifier_ops = { 1412 .get_func_proto = tp_prog_func_proto, 1413 .is_valid_access = tp_prog_is_valid_access, 1414 }; 1415 1416 const struct bpf_prog_ops tracepoint_prog_ops = { 1417 }; 1418 1419 BPF_CALL_3(bpf_perf_prog_read_value, struct bpf_perf_event_data_kern *, ctx, 1420 struct bpf_perf_event_value *, buf, u32, size) 1421 { 1422 int err = -EINVAL; 1423 1424 if (unlikely(size != sizeof(struct bpf_perf_event_value))) 1425 goto clear; 1426 err = perf_event_read_local(ctx->event, &buf->counter, &buf->enabled, 1427 &buf->running); 1428 if (unlikely(err)) 1429 goto clear; 1430 return 0; 1431 clear: 1432 memset(buf, 0, size); 1433 return err; 1434 } 1435 1436 static const struct bpf_func_proto bpf_perf_prog_read_value_proto = { 1437 .func = bpf_perf_prog_read_value, 1438 .gpl_only = true, 1439 .ret_type = RET_INTEGER, 1440 .arg1_type = ARG_PTR_TO_CTX, 1441 .arg2_type = ARG_PTR_TO_UNINIT_MEM, 1442 .arg3_type = ARG_CONST_SIZE, 1443 }; 1444 1445 BPF_CALL_4(bpf_read_branch_records, struct bpf_perf_event_data_kern *, ctx, 1446 void *, buf, u32, size, u64, flags) 1447 { 1448 static const u32 br_entry_size = sizeof(struct perf_branch_entry); 1449 struct perf_branch_stack *br_stack = ctx->data->br_stack; 1450 u32 to_copy; 1451 1452 if (unlikely(flags & ~BPF_F_GET_BRANCH_RECORDS_SIZE)) 1453 return -EINVAL; 1454 1455 if (unlikely(!br_stack)) 1456 return -ENOENT; 1457 1458 if (flags & BPF_F_GET_BRANCH_RECORDS_SIZE) 1459 return br_stack->nr * br_entry_size; 1460 1461 if (!buf || (size % br_entry_size != 0)) 1462 return -EINVAL; 1463 1464 to_copy = min_t(u32, br_stack->nr * br_entry_size, size); 1465 memcpy(buf, br_stack->entries, to_copy); 1466 1467 return to_copy; 1468 } 1469 1470 static const struct bpf_func_proto bpf_read_branch_records_proto = { 1471 .func = bpf_read_branch_records, 1472 .gpl_only = true, 1473 .ret_type = RET_INTEGER, 1474 .arg1_type = ARG_PTR_TO_CTX, 1475 .arg2_type = ARG_PTR_TO_MEM_OR_NULL, 1476 .arg3_type = ARG_CONST_SIZE_OR_ZERO, 1477 .arg4_type = ARG_ANYTHING, 1478 }; 1479 1480 static const struct bpf_func_proto * 1481 pe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 1482 { 1483 switch (func_id) { 1484 case BPF_FUNC_perf_event_output: 1485 return &bpf_perf_event_output_proto_tp; 1486 case BPF_FUNC_get_stackid: 1487 return &bpf_get_stackid_proto_pe; 1488 case BPF_FUNC_get_stack: 1489 return &bpf_get_stack_proto_pe; 1490 case BPF_FUNC_perf_prog_read_value: 1491 return &bpf_perf_prog_read_value_proto; 1492 case BPF_FUNC_read_branch_records: 1493 return &bpf_read_branch_records_proto; 1494 case BPF_FUNC_get_attach_cookie: 1495 return &bpf_get_attach_cookie_proto_pe; 1496 default: 1497 return bpf_tracing_func_proto(func_id, prog); 1498 } 1499 } 1500 1501 /* 1502 * bpf_raw_tp_regs are separate from bpf_pt_regs used from skb/xdp 1503 * to avoid potential recursive reuse issue when/if tracepoints are added 1504 * inside bpf_*_event_output, bpf_get_stackid and/or bpf_get_stack. 1505 * 1506 * Since raw tracepoints run despite bpf_prog_active, support concurrent usage 1507 * in normal, irq, and nmi context. 1508 */ 1509 struct bpf_raw_tp_regs { 1510 struct pt_regs regs[3]; 1511 }; 1512 static DEFINE_PER_CPU(struct bpf_raw_tp_regs, bpf_raw_tp_regs); 1513 static DEFINE_PER_CPU(int, bpf_raw_tp_nest_level); 1514 static struct pt_regs *get_bpf_raw_tp_regs(void) 1515 { 1516 struct bpf_raw_tp_regs *tp_regs = this_cpu_ptr(&bpf_raw_tp_regs); 1517 int nest_level = this_cpu_inc_return(bpf_raw_tp_nest_level); 1518 1519 if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(tp_regs->regs))) { 1520 this_cpu_dec(bpf_raw_tp_nest_level); 1521 return ERR_PTR(-EBUSY); 1522 } 1523 1524 return &tp_regs->regs[nest_level - 1]; 1525 } 1526 1527 static void put_bpf_raw_tp_regs(void) 1528 { 1529 this_cpu_dec(bpf_raw_tp_nest_level); 1530 } 1531 1532 BPF_CALL_5(bpf_perf_event_output_raw_tp, struct bpf_raw_tracepoint_args *, args, 1533 struct bpf_map *, map, u64, flags, void *, data, u64, size) 1534 { 1535 struct pt_regs *regs = get_bpf_raw_tp_regs(); 1536 int ret; 1537 1538 if (IS_ERR(regs)) 1539 return PTR_ERR(regs); 1540 1541 perf_fetch_caller_regs(regs); 1542 ret = ____bpf_perf_event_output(regs, map, flags, data, size); 1543 1544 put_bpf_raw_tp_regs(); 1545 return ret; 1546 } 1547 1548 static const struct bpf_func_proto bpf_perf_event_output_proto_raw_tp = { 1549 .func = bpf_perf_event_output_raw_tp, 1550 .gpl_only = true, 1551 .ret_type = RET_INTEGER, 1552 .arg1_type = ARG_PTR_TO_CTX, 1553 .arg2_type = ARG_CONST_MAP_PTR, 1554 .arg3_type = ARG_ANYTHING, 1555 .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY, 1556 .arg5_type = ARG_CONST_SIZE_OR_ZERO, 1557 }; 1558 1559 extern const struct bpf_func_proto bpf_skb_output_proto; 1560 extern const struct bpf_func_proto bpf_xdp_output_proto; 1561 1562 BPF_CALL_3(bpf_get_stackid_raw_tp, struct bpf_raw_tracepoint_args *, args, 1563 struct bpf_map *, map, u64, flags) 1564 { 1565 struct pt_regs *regs = get_bpf_raw_tp_regs(); 1566 int ret; 1567 1568 if (IS_ERR(regs)) 1569 return PTR_ERR(regs); 1570 1571 perf_fetch_caller_regs(regs); 1572 /* similar to bpf_perf_event_output_tp, but pt_regs fetched differently */ 1573 ret = bpf_get_stackid((unsigned long) regs, (unsigned long) map, 1574 flags, 0, 0); 1575 put_bpf_raw_tp_regs(); 1576 return ret; 1577 } 1578 1579 static const struct bpf_func_proto bpf_get_stackid_proto_raw_tp = { 1580 .func = bpf_get_stackid_raw_tp, 1581 .gpl_only = true, 1582 .ret_type = RET_INTEGER, 1583 .arg1_type = ARG_PTR_TO_CTX, 1584 .arg2_type = ARG_CONST_MAP_PTR, 1585 .arg3_type = ARG_ANYTHING, 1586 }; 1587 1588 BPF_CALL_4(bpf_get_stack_raw_tp, struct bpf_raw_tracepoint_args *, args, 1589 void *, buf, u32, size, u64, flags) 1590 { 1591 struct pt_regs *regs = get_bpf_raw_tp_regs(); 1592 int ret; 1593 1594 if (IS_ERR(regs)) 1595 return PTR_ERR(regs); 1596 1597 perf_fetch_caller_regs(regs); 1598 ret = bpf_get_stack((unsigned long) regs, (unsigned long) buf, 1599 (unsigned long) size, flags, 0); 1600 put_bpf_raw_tp_regs(); 1601 return ret; 1602 } 1603 1604 static const struct bpf_func_proto bpf_get_stack_proto_raw_tp = { 1605 .func = bpf_get_stack_raw_tp, 1606 .gpl_only = true, 1607 .ret_type = RET_INTEGER, 1608 .arg1_type = ARG_PTR_TO_CTX, 1609 .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, 1610 .arg3_type = ARG_CONST_SIZE_OR_ZERO, 1611 .arg4_type = ARG_ANYTHING, 1612 }; 1613 1614 static const struct bpf_func_proto * 1615 raw_tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 1616 { 1617 switch (func_id) { 1618 case BPF_FUNC_perf_event_output: 1619 return &bpf_perf_event_output_proto_raw_tp; 1620 case BPF_FUNC_get_stackid: 1621 return &bpf_get_stackid_proto_raw_tp; 1622 case BPF_FUNC_get_stack: 1623 return &bpf_get_stack_proto_raw_tp; 1624 default: 1625 return bpf_tracing_func_proto(func_id, prog); 1626 } 1627 } 1628 1629 const struct bpf_func_proto * 1630 tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 1631 { 1632 const struct bpf_func_proto *fn; 1633 1634 switch (func_id) { 1635 #ifdef CONFIG_NET 1636 case BPF_FUNC_skb_output: 1637 return &bpf_skb_output_proto; 1638 case BPF_FUNC_xdp_output: 1639 return &bpf_xdp_output_proto; 1640 case BPF_FUNC_skc_to_tcp6_sock: 1641 return &bpf_skc_to_tcp6_sock_proto; 1642 case BPF_FUNC_skc_to_tcp_sock: 1643 return &bpf_skc_to_tcp_sock_proto; 1644 case BPF_FUNC_skc_to_tcp_timewait_sock: 1645 return &bpf_skc_to_tcp_timewait_sock_proto; 1646 case BPF_FUNC_skc_to_tcp_request_sock: 1647 return &bpf_skc_to_tcp_request_sock_proto; 1648 case BPF_FUNC_skc_to_udp6_sock: 1649 return &bpf_skc_to_udp6_sock_proto; 1650 case BPF_FUNC_skc_to_unix_sock: 1651 return &bpf_skc_to_unix_sock_proto; 1652 case BPF_FUNC_sk_storage_get: 1653 return &bpf_sk_storage_get_tracing_proto; 1654 case BPF_FUNC_sk_storage_delete: 1655 return &bpf_sk_storage_delete_tracing_proto; 1656 case BPF_FUNC_sock_from_file: 1657 return &bpf_sock_from_file_proto; 1658 case BPF_FUNC_get_socket_cookie: 1659 return &bpf_get_socket_ptr_cookie_proto; 1660 #endif 1661 case BPF_FUNC_seq_printf: 1662 return prog->expected_attach_type == BPF_TRACE_ITER ? 1663 &bpf_seq_printf_proto : 1664 NULL; 1665 case BPF_FUNC_seq_write: 1666 return prog->expected_attach_type == BPF_TRACE_ITER ? 1667 &bpf_seq_write_proto : 1668 NULL; 1669 case BPF_FUNC_seq_printf_btf: 1670 return prog->expected_attach_type == BPF_TRACE_ITER ? 1671 &bpf_seq_printf_btf_proto : 1672 NULL; 1673 case BPF_FUNC_d_path: 1674 return &bpf_d_path_proto; 1675 case BPF_FUNC_get_func_arg: 1676 return bpf_prog_has_trampoline(prog) ? &bpf_get_func_arg_proto : NULL; 1677 case BPF_FUNC_get_func_ret: 1678 return bpf_prog_has_trampoline(prog) ? &bpf_get_func_ret_proto : NULL; 1679 case BPF_FUNC_get_func_arg_cnt: 1680 return bpf_prog_has_trampoline(prog) ? &bpf_get_func_arg_cnt_proto : NULL; 1681 default: 1682 fn = raw_tp_prog_func_proto(func_id, prog); 1683 if (!fn && prog->expected_attach_type == BPF_TRACE_ITER) 1684 fn = bpf_iter_get_func_proto(func_id, prog); 1685 return fn; 1686 } 1687 } 1688 1689 static bool raw_tp_prog_is_valid_access(int off, int size, 1690 enum bpf_access_type type, 1691 const struct bpf_prog *prog, 1692 struct bpf_insn_access_aux *info) 1693 { 1694 return bpf_tracing_ctx_access(off, size, type); 1695 } 1696 1697 static bool tracing_prog_is_valid_access(int off, int size, 1698 enum bpf_access_type type, 1699 const struct bpf_prog *prog, 1700 struct bpf_insn_access_aux *info) 1701 { 1702 return bpf_tracing_btf_ctx_access(off, size, type, prog, info); 1703 } 1704 1705 int __weak bpf_prog_test_run_tracing(struct bpf_prog *prog, 1706 const union bpf_attr *kattr, 1707 union bpf_attr __user *uattr) 1708 { 1709 return -ENOTSUPP; 1710 } 1711 1712 const struct bpf_verifier_ops raw_tracepoint_verifier_ops = { 1713 .get_func_proto = raw_tp_prog_func_proto, 1714 .is_valid_access = raw_tp_prog_is_valid_access, 1715 }; 1716 1717 const struct bpf_prog_ops raw_tracepoint_prog_ops = { 1718 #ifdef CONFIG_NET 1719 .test_run = bpf_prog_test_run_raw_tp, 1720 #endif 1721 }; 1722 1723 const struct bpf_verifier_ops tracing_verifier_ops = { 1724 .get_func_proto = tracing_prog_func_proto, 1725 .is_valid_access = tracing_prog_is_valid_access, 1726 }; 1727 1728 const struct bpf_prog_ops tracing_prog_ops = { 1729 .test_run = bpf_prog_test_run_tracing, 1730 }; 1731 1732 static bool raw_tp_writable_prog_is_valid_access(int off, int size, 1733 enum bpf_access_type type, 1734 const struct bpf_prog *prog, 1735 struct bpf_insn_access_aux *info) 1736 { 1737 if (off == 0) { 1738 if (size != sizeof(u64) || type != BPF_READ) 1739 return false; 1740 info->reg_type = PTR_TO_TP_BUFFER; 1741 } 1742 return raw_tp_prog_is_valid_access(off, size, type, prog, info); 1743 } 1744 1745 const struct bpf_verifier_ops raw_tracepoint_writable_verifier_ops = { 1746 .get_func_proto = raw_tp_prog_func_proto, 1747 .is_valid_access = raw_tp_writable_prog_is_valid_access, 1748 }; 1749 1750 const struct bpf_prog_ops raw_tracepoint_writable_prog_ops = { 1751 }; 1752 1753 static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type, 1754 const struct bpf_prog *prog, 1755 struct bpf_insn_access_aux *info) 1756 { 1757 const int size_u64 = sizeof(u64); 1758 1759 if (off < 0 || off >= sizeof(struct bpf_perf_event_data)) 1760 return false; 1761 if (type != BPF_READ) 1762 return false; 1763 if (off % size != 0) { 1764 if (sizeof(unsigned long) != 4) 1765 return false; 1766 if (size != 8) 1767 return false; 1768 if (off % size != 4) 1769 return false; 1770 } 1771 1772 switch (off) { 1773 case bpf_ctx_range(struct bpf_perf_event_data, sample_period): 1774 bpf_ctx_record_field_size(info, size_u64); 1775 if (!bpf_ctx_narrow_access_ok(off, size, size_u64)) 1776 return false; 1777 break; 1778 case bpf_ctx_range(struct bpf_perf_event_data, addr): 1779 bpf_ctx_record_field_size(info, size_u64); 1780 if (!bpf_ctx_narrow_access_ok(off, size, size_u64)) 1781 return false; 1782 break; 1783 default: 1784 if (size != sizeof(long)) 1785 return false; 1786 } 1787 1788 return true; 1789 } 1790 1791 static u32 pe_prog_convert_ctx_access(enum bpf_access_type type, 1792 const struct bpf_insn *si, 1793 struct bpf_insn *insn_buf, 1794 struct bpf_prog *prog, u32 *target_size) 1795 { 1796 struct bpf_insn *insn = insn_buf; 1797 1798 switch (si->off) { 1799 case offsetof(struct bpf_perf_event_data, sample_period): 1800 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern, 1801 data), si->dst_reg, si->src_reg, 1802 offsetof(struct bpf_perf_event_data_kern, data)); 1803 *insn++ = BPF_LDX_MEM(BPF_DW, si->dst_reg, si->dst_reg, 1804 bpf_target_off(struct perf_sample_data, period, 8, 1805 target_size)); 1806 break; 1807 case offsetof(struct bpf_perf_event_data, addr): 1808 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern, 1809 data), si->dst_reg, si->src_reg, 1810 offsetof(struct bpf_perf_event_data_kern, data)); 1811 *insn++ = BPF_LDX_MEM(BPF_DW, si->dst_reg, si->dst_reg, 1812 bpf_target_off(struct perf_sample_data, addr, 8, 1813 target_size)); 1814 break; 1815 default: 1816 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern, 1817 regs), si->dst_reg, si->src_reg, 1818 offsetof(struct bpf_perf_event_data_kern, regs)); 1819 *insn++ = BPF_LDX_MEM(BPF_SIZEOF(long), si->dst_reg, si->dst_reg, 1820 si->off); 1821 break; 1822 } 1823 1824 return insn - insn_buf; 1825 } 1826 1827 const struct bpf_verifier_ops perf_event_verifier_ops = { 1828 .get_func_proto = pe_prog_func_proto, 1829 .is_valid_access = pe_prog_is_valid_access, 1830 .convert_ctx_access = pe_prog_convert_ctx_access, 1831 }; 1832 1833 const struct bpf_prog_ops perf_event_prog_ops = { 1834 }; 1835 1836 static DEFINE_MUTEX(bpf_event_mutex); 1837 1838 #define BPF_TRACE_MAX_PROGS 64 1839 1840 int perf_event_attach_bpf_prog(struct perf_event *event, 1841 struct bpf_prog *prog, 1842 u64 bpf_cookie) 1843 { 1844 struct bpf_prog_array *old_array; 1845 struct bpf_prog_array *new_array; 1846 int ret = -EEXIST; 1847 1848 /* 1849 * Kprobe override only works if they are on the function entry, 1850 * and only if they are on the opt-in list. 1851 */ 1852 if (prog->kprobe_override && 1853 (!trace_kprobe_on_func_entry(event->tp_event) || 1854 !trace_kprobe_error_injectable(event->tp_event))) 1855 return -EINVAL; 1856 1857 mutex_lock(&bpf_event_mutex); 1858 1859 if (event->prog) 1860 goto unlock; 1861 1862 old_array = bpf_event_rcu_dereference(event->tp_event->prog_array); 1863 if (old_array && 1864 bpf_prog_array_length(old_array) >= BPF_TRACE_MAX_PROGS) { 1865 ret = -E2BIG; 1866 goto unlock; 1867 } 1868 1869 ret = bpf_prog_array_copy(old_array, NULL, prog, bpf_cookie, &new_array); 1870 if (ret < 0) 1871 goto unlock; 1872 1873 /* set the new array to event->tp_event and set event->prog */ 1874 event->prog = prog; 1875 event->bpf_cookie = bpf_cookie; 1876 rcu_assign_pointer(event->tp_event->prog_array, new_array); 1877 bpf_prog_array_free(old_array); 1878 1879 unlock: 1880 mutex_unlock(&bpf_event_mutex); 1881 return ret; 1882 } 1883 1884 void perf_event_detach_bpf_prog(struct perf_event *event) 1885 { 1886 struct bpf_prog_array *old_array; 1887 struct bpf_prog_array *new_array; 1888 int ret; 1889 1890 mutex_lock(&bpf_event_mutex); 1891 1892 if (!event->prog) 1893 goto unlock; 1894 1895 old_array = bpf_event_rcu_dereference(event->tp_event->prog_array); 1896 ret = bpf_prog_array_copy(old_array, event->prog, NULL, 0, &new_array); 1897 if (ret == -ENOENT) 1898 goto unlock; 1899 if (ret < 0) { 1900 bpf_prog_array_delete_safe(old_array, event->prog); 1901 } else { 1902 rcu_assign_pointer(event->tp_event->prog_array, new_array); 1903 bpf_prog_array_free(old_array); 1904 } 1905 1906 bpf_prog_put(event->prog); 1907 event->prog = NULL; 1908 1909 unlock: 1910 mutex_unlock(&bpf_event_mutex); 1911 } 1912 1913 int perf_event_query_prog_array(struct perf_event *event, void __user *info) 1914 { 1915 struct perf_event_query_bpf __user *uquery = info; 1916 struct perf_event_query_bpf query = {}; 1917 struct bpf_prog_array *progs; 1918 u32 *ids, prog_cnt, ids_len; 1919 int ret; 1920 1921 if (!perfmon_capable()) 1922 return -EPERM; 1923 if (event->attr.type != PERF_TYPE_TRACEPOINT) 1924 return -EINVAL; 1925 if (copy_from_user(&query, uquery, sizeof(query))) 1926 return -EFAULT; 1927 1928 ids_len = query.ids_len; 1929 if (ids_len > BPF_TRACE_MAX_PROGS) 1930 return -E2BIG; 1931 ids = kcalloc(ids_len, sizeof(u32), GFP_USER | __GFP_NOWARN); 1932 if (!ids) 1933 return -ENOMEM; 1934 /* 1935 * The above kcalloc returns ZERO_SIZE_PTR when ids_len = 0, which 1936 * is required when user only wants to check for uquery->prog_cnt. 1937 * There is no need to check for it since the case is handled 1938 * gracefully in bpf_prog_array_copy_info. 1939 */ 1940 1941 mutex_lock(&bpf_event_mutex); 1942 progs = bpf_event_rcu_dereference(event->tp_event->prog_array); 1943 ret = bpf_prog_array_copy_info(progs, ids, ids_len, &prog_cnt); 1944 mutex_unlock(&bpf_event_mutex); 1945 1946 if (copy_to_user(&uquery->prog_cnt, &prog_cnt, sizeof(prog_cnt)) || 1947 copy_to_user(uquery->ids, ids, ids_len * sizeof(u32))) 1948 ret = -EFAULT; 1949 1950 kfree(ids); 1951 return ret; 1952 } 1953 1954 extern struct bpf_raw_event_map __start__bpf_raw_tp[]; 1955 extern struct bpf_raw_event_map __stop__bpf_raw_tp[]; 1956 1957 struct bpf_raw_event_map *bpf_get_raw_tracepoint(const char *name) 1958 { 1959 struct bpf_raw_event_map *btp = __start__bpf_raw_tp; 1960 1961 for (; btp < __stop__bpf_raw_tp; btp++) { 1962 if (!strcmp(btp->tp->name, name)) 1963 return btp; 1964 } 1965 1966 return bpf_get_raw_tracepoint_module(name); 1967 } 1968 1969 void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp) 1970 { 1971 struct module *mod; 1972 1973 preempt_disable(); 1974 mod = __module_address((unsigned long)btp); 1975 module_put(mod); 1976 preempt_enable(); 1977 } 1978 1979 static __always_inline 1980 void __bpf_trace_run(struct bpf_prog *prog, u64 *args) 1981 { 1982 cant_sleep(); 1983 rcu_read_lock(); 1984 (void) bpf_prog_run(prog, args); 1985 rcu_read_unlock(); 1986 } 1987 1988 #define UNPACK(...) __VA_ARGS__ 1989 #define REPEAT_1(FN, DL, X, ...) FN(X) 1990 #define REPEAT_2(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_1(FN, DL, __VA_ARGS__) 1991 #define REPEAT_3(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_2(FN, DL, __VA_ARGS__) 1992 #define REPEAT_4(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_3(FN, DL, __VA_ARGS__) 1993 #define REPEAT_5(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_4(FN, DL, __VA_ARGS__) 1994 #define REPEAT_6(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_5(FN, DL, __VA_ARGS__) 1995 #define REPEAT_7(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_6(FN, DL, __VA_ARGS__) 1996 #define REPEAT_8(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_7(FN, DL, __VA_ARGS__) 1997 #define REPEAT_9(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_8(FN, DL, __VA_ARGS__) 1998 #define REPEAT_10(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_9(FN, DL, __VA_ARGS__) 1999 #define REPEAT_11(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_10(FN, DL, __VA_ARGS__) 2000 #define REPEAT_12(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_11(FN, DL, __VA_ARGS__) 2001 #define REPEAT(X, FN, DL, ...) REPEAT_##X(FN, DL, __VA_ARGS__) 2002 2003 #define SARG(X) u64 arg##X 2004 #define COPY(X) args[X] = arg##X 2005 2006 #define __DL_COM (,) 2007 #define __DL_SEM (;) 2008 2009 #define __SEQ_0_11 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 2010 2011 #define BPF_TRACE_DEFN_x(x) \ 2012 void bpf_trace_run##x(struct bpf_prog *prog, \ 2013 REPEAT(x, SARG, __DL_COM, __SEQ_0_11)) \ 2014 { \ 2015 u64 args[x]; \ 2016 REPEAT(x, COPY, __DL_SEM, __SEQ_0_11); \ 2017 __bpf_trace_run(prog, args); \ 2018 } \ 2019 EXPORT_SYMBOL_GPL(bpf_trace_run##x) 2020 BPF_TRACE_DEFN_x(1); 2021 BPF_TRACE_DEFN_x(2); 2022 BPF_TRACE_DEFN_x(3); 2023 BPF_TRACE_DEFN_x(4); 2024 BPF_TRACE_DEFN_x(5); 2025 BPF_TRACE_DEFN_x(6); 2026 BPF_TRACE_DEFN_x(7); 2027 BPF_TRACE_DEFN_x(8); 2028 BPF_TRACE_DEFN_x(9); 2029 BPF_TRACE_DEFN_x(10); 2030 BPF_TRACE_DEFN_x(11); 2031 BPF_TRACE_DEFN_x(12); 2032 2033 static int __bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog) 2034 { 2035 struct tracepoint *tp = btp->tp; 2036 2037 /* 2038 * check that program doesn't access arguments beyond what's 2039 * available in this tracepoint 2040 */ 2041 if (prog->aux->max_ctx_offset > btp->num_args * sizeof(u64)) 2042 return -EINVAL; 2043 2044 if (prog->aux->max_tp_access > btp->writable_size) 2045 return -EINVAL; 2046 2047 return tracepoint_probe_register_may_exist(tp, (void *)btp->bpf_func, 2048 prog); 2049 } 2050 2051 int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog) 2052 { 2053 return __bpf_probe_register(btp, prog); 2054 } 2055 2056 int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog) 2057 { 2058 return tracepoint_probe_unregister(btp->tp, (void *)btp->bpf_func, prog); 2059 } 2060 2061 int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id, 2062 u32 *fd_type, const char **buf, 2063 u64 *probe_offset, u64 *probe_addr) 2064 { 2065 bool is_tracepoint, is_syscall_tp; 2066 struct bpf_prog *prog; 2067 int flags, err = 0; 2068 2069 prog = event->prog; 2070 if (!prog) 2071 return -ENOENT; 2072 2073 /* not supporting BPF_PROG_TYPE_PERF_EVENT yet */ 2074 if (prog->type == BPF_PROG_TYPE_PERF_EVENT) 2075 return -EOPNOTSUPP; 2076 2077 *prog_id = prog->aux->id; 2078 flags = event->tp_event->flags; 2079 is_tracepoint = flags & TRACE_EVENT_FL_TRACEPOINT; 2080 is_syscall_tp = is_syscall_trace_event(event->tp_event); 2081 2082 if (is_tracepoint || is_syscall_tp) { 2083 *buf = is_tracepoint ? event->tp_event->tp->name 2084 : event->tp_event->name; 2085 *fd_type = BPF_FD_TYPE_TRACEPOINT; 2086 *probe_offset = 0x0; 2087 *probe_addr = 0x0; 2088 } else { 2089 /* kprobe/uprobe */ 2090 err = -EOPNOTSUPP; 2091 #ifdef CONFIG_KPROBE_EVENTS 2092 if (flags & TRACE_EVENT_FL_KPROBE) 2093 err = bpf_get_kprobe_info(event, fd_type, buf, 2094 probe_offset, probe_addr, 2095 event->attr.type == PERF_TYPE_TRACEPOINT); 2096 #endif 2097 #ifdef CONFIG_UPROBE_EVENTS 2098 if (flags & TRACE_EVENT_FL_UPROBE) 2099 err = bpf_get_uprobe_info(event, fd_type, buf, 2100 probe_offset, 2101 event->attr.type == PERF_TYPE_TRACEPOINT); 2102 #endif 2103 } 2104 2105 return err; 2106 } 2107 2108 static int __init send_signal_irq_work_init(void) 2109 { 2110 int cpu; 2111 struct send_signal_irq_work *work; 2112 2113 for_each_possible_cpu(cpu) { 2114 work = per_cpu_ptr(&send_signal_work, cpu); 2115 init_irq_work(&work->irq_work, do_bpf_send_signal); 2116 } 2117 return 0; 2118 } 2119 2120 subsys_initcall(send_signal_irq_work_init); 2121 2122 #ifdef CONFIG_MODULES 2123 static int bpf_event_notify(struct notifier_block *nb, unsigned long op, 2124 void *module) 2125 { 2126 struct bpf_trace_module *btm, *tmp; 2127 struct module *mod = module; 2128 int ret = 0; 2129 2130 if (mod->num_bpf_raw_events == 0 || 2131 (op != MODULE_STATE_COMING && op != MODULE_STATE_GOING)) 2132 goto out; 2133 2134 mutex_lock(&bpf_module_mutex); 2135 2136 switch (op) { 2137 case MODULE_STATE_COMING: 2138 btm = kzalloc(sizeof(*btm), GFP_KERNEL); 2139 if (btm) { 2140 btm->module = module; 2141 list_add(&btm->list, &bpf_trace_modules); 2142 } else { 2143 ret = -ENOMEM; 2144 } 2145 break; 2146 case MODULE_STATE_GOING: 2147 list_for_each_entry_safe(btm, tmp, &bpf_trace_modules, list) { 2148 if (btm->module == module) { 2149 list_del(&btm->list); 2150 kfree(btm); 2151 break; 2152 } 2153 } 2154 break; 2155 } 2156 2157 mutex_unlock(&bpf_module_mutex); 2158 2159 out: 2160 return notifier_from_errno(ret); 2161 } 2162 2163 static struct notifier_block bpf_module_nb = { 2164 .notifier_call = bpf_event_notify, 2165 }; 2166 2167 static int __init bpf_event_init(void) 2168 { 2169 register_module_notifier(&bpf_module_nb); 2170 return 0; 2171 } 2172 2173 fs_initcall(bpf_event_init); 2174 #endif /* CONFIG_MODULES */ 2175