1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright (c) 2011-2015 PLUMgrid, http://plumgrid.com 3 * Copyright (c) 2016 Facebook 4 */ 5 #include <linux/kernel.h> 6 #include <linux/types.h> 7 #include <linux/slab.h> 8 #include <linux/bpf.h> 9 #include <linux/bpf_verifier.h> 10 #include <linux/bpf_perf_event.h> 11 #include <linux/btf.h> 12 #include <linux/filter.h> 13 #include <linux/uaccess.h> 14 #include <linux/ctype.h> 15 #include <linux/kprobes.h> 16 #include <linux/spinlock.h> 17 #include <linux/syscalls.h> 18 #include <linux/error-injection.h> 19 #include <linux/btf_ids.h> 20 #include <linux/bpf_lsm.h> 21 #include <linux/fprobe.h> 22 #include <linux/bsearch.h> 23 #include <linux/sort.h> 24 #include <linux/key.h> 25 #include <linux/namei.h> 26 27 #include <net/bpf_sk_storage.h> 28 29 #include <uapi/linux/bpf.h> 30 #include <uapi/linux/btf.h> 31 32 #include <asm/tlb.h> 33 34 #include "trace_probe.h" 35 #include "trace.h" 36 37 #define CREATE_TRACE_POINTS 38 #include "bpf_trace.h" 39 40 #define bpf_event_rcu_dereference(p) \ 41 rcu_dereference_protected(p, lockdep_is_held(&bpf_event_mutex)) 42 43 #define MAX_UPROBE_MULTI_CNT (1U << 20) 44 #define MAX_KPROBE_MULTI_CNT (1U << 20) 45 46 #ifdef CONFIG_MODULES 47 struct bpf_trace_module { 48 struct module *module; 49 struct list_head list; 50 }; 51 52 static LIST_HEAD(bpf_trace_modules); 53 static DEFINE_MUTEX(bpf_module_mutex); 54 55 static struct bpf_raw_event_map *bpf_get_raw_tracepoint_module(const char *name) 56 { 57 struct bpf_raw_event_map *btp, *ret = NULL; 58 struct bpf_trace_module *btm; 59 unsigned int i; 60 61 mutex_lock(&bpf_module_mutex); 62 list_for_each_entry(btm, &bpf_trace_modules, list) { 63 for (i = 0; i < btm->module->num_bpf_raw_events; ++i) { 64 btp = &btm->module->bpf_raw_events[i]; 65 if (!strcmp(btp->tp->name, name)) { 66 if (try_module_get(btm->module)) 67 ret = btp; 68 goto out; 69 } 70 } 71 } 72 out: 73 mutex_unlock(&bpf_module_mutex); 74 return ret; 75 } 76 #else 77 static struct bpf_raw_event_map *bpf_get_raw_tracepoint_module(const char *name) 78 { 79 return NULL; 80 } 81 #endif /* CONFIG_MODULES */ 82 83 u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); 84 u64 bpf_get_stack(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); 85 86 static int bpf_btf_printf_prepare(struct btf_ptr *ptr, u32 btf_ptr_size, 87 u64 flags, const struct btf **btf, 88 s32 *btf_id); 89 static u64 bpf_kprobe_multi_cookie(struct bpf_run_ctx *ctx); 90 static u64 bpf_kprobe_multi_entry_ip(struct bpf_run_ctx *ctx); 91 92 static u64 bpf_uprobe_multi_cookie(struct bpf_run_ctx *ctx); 93 static u64 bpf_uprobe_multi_entry_ip(struct bpf_run_ctx *ctx); 94 95 /** 96 * trace_call_bpf - invoke BPF program 97 * @call: tracepoint event 98 * @ctx: opaque context pointer 99 * 100 * kprobe handlers execute BPF programs via this helper. 101 * Can be used from static tracepoints in the future. 102 * 103 * Return: BPF programs always return an integer which is interpreted by 104 * kprobe handler as: 105 * 0 - return from kprobe (event is filtered out) 106 * 1 - store kprobe event into ring buffer 107 * Other values are reserved and currently alias to 1 108 */ 109 unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx) 110 { 111 unsigned int ret; 112 113 cant_sleep(); 114 115 if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) { 116 /* 117 * since some bpf program is already running on this cpu, 118 * don't call into another bpf program (same or different) 119 * and don't send kprobe event into ring-buffer, 120 * so return zero here 121 */ 122 rcu_read_lock(); 123 bpf_prog_inc_misses_counters(rcu_dereference(call->prog_array)); 124 rcu_read_unlock(); 125 ret = 0; 126 goto out; 127 } 128 129 /* 130 * Instead of moving rcu_read_lock/rcu_dereference/rcu_read_unlock 131 * to all call sites, we did a bpf_prog_array_valid() there to check 132 * whether call->prog_array is empty or not, which is 133 * a heuristic to speed up execution. 134 * 135 * If bpf_prog_array_valid() fetched prog_array was 136 * non-NULL, we go into trace_call_bpf() and do the actual 137 * proper rcu_dereference() under RCU lock. 138 * If it turns out that prog_array is NULL then, we bail out. 139 * For the opposite, if the bpf_prog_array_valid() fetched pointer 140 * was NULL, you'll skip the prog_array with the risk of missing 141 * out of events when it was updated in between this and the 142 * rcu_dereference() which is accepted risk. 143 */ 144 rcu_read_lock(); 145 ret = bpf_prog_run_array(rcu_dereference(call->prog_array), 146 ctx, bpf_prog_run); 147 rcu_read_unlock(); 148 149 out: 150 __this_cpu_dec(bpf_prog_active); 151 152 return ret; 153 } 154 155 /** 156 * trace_call_bpf_faultable - invoke BPF program in faultable context 157 * @call: tracepoint event 158 * @ctx: opaque context pointer 159 * 160 * Variant of trace_call_bpf() for faultable tracepoints (syscall 161 * tracepoints). Supports sleepable BPF programs by using rcu_tasks_trace 162 * for lifetime protection and bpf_prog_run_array_sleepable() for per-program 163 * RCU flavor selection, following the uprobe pattern. 164 * 165 * Per-program recursion protection is provided by 166 * bpf_prog_run_array_sleepable(). Global bpf_prog_active is not 167 * needed because syscall tracepoints cannot self-recurse. 168 * 169 * Must be called from a faultable/preemptible context. 170 */ 171 unsigned int trace_call_bpf_faultable(struct trace_event_call *call, void *ctx) 172 { 173 struct bpf_prog_array *prog_array; 174 175 might_fault(); 176 guard(rcu_tasks_trace)(); 177 178 prog_array = rcu_dereference_check(call->prog_array, 179 rcu_read_lock_trace_held()); 180 return bpf_prog_run_array_sleepable(prog_array, ctx, bpf_prog_run); 181 } 182 183 #ifdef CONFIG_BPF_KPROBE_OVERRIDE 184 BPF_CALL_2(bpf_override_return, struct pt_regs *, regs, unsigned long, rc) 185 { 186 regs_set_return_value(regs, rc); 187 override_function_with_return(regs); 188 return 0; 189 } 190 191 static const struct bpf_func_proto bpf_override_return_proto = { 192 .func = bpf_override_return, 193 .gpl_only = true, 194 .ret_type = RET_INTEGER, 195 .arg1_type = ARG_PTR_TO_CTX, 196 .arg2_type = ARG_ANYTHING, 197 }; 198 #endif 199 200 static __always_inline int 201 bpf_probe_read_user_common(void *dst, u32 size, const void __user *unsafe_ptr) 202 { 203 int ret; 204 205 ret = copy_from_user_nofault(dst, unsafe_ptr, size); 206 if (unlikely(ret < 0)) 207 memset(dst, 0, size); 208 return ret; 209 } 210 211 BPF_CALL_3(bpf_probe_read_user, void *, dst, u32, size, 212 const void __user *, unsafe_ptr) 213 { 214 return bpf_probe_read_user_common(dst, size, unsafe_ptr); 215 } 216 217 const struct bpf_func_proto bpf_probe_read_user_proto = { 218 .func = bpf_probe_read_user, 219 .gpl_only = true, 220 .ret_type = RET_INTEGER, 221 .arg1_type = ARG_PTR_TO_UNINIT_MEM, 222 .arg2_type = ARG_CONST_SIZE_OR_ZERO, 223 .arg3_type = ARG_ANYTHING, 224 }; 225 226 static __always_inline int 227 bpf_probe_read_user_str_common(void *dst, u32 size, 228 const void __user *unsafe_ptr) 229 { 230 int ret; 231 232 /* 233 * NB: We rely on strncpy_from_user() not copying junk past the NUL 234 * terminator into `dst`. 235 * 236 * strncpy_from_user() does long-sized strides in the fast path. If the 237 * strncpy does not mask out the bytes after the NUL in `unsafe_ptr`, 238 * then there could be junk after the NUL in `dst`. If user takes `dst` 239 * and keys a hash map with it, then semantically identical strings can 240 * occupy multiple entries in the map. 241 */ 242 ret = strncpy_from_user_nofault(dst, unsafe_ptr, size); 243 if (unlikely(ret < 0)) 244 memset(dst, 0, size); 245 return ret; 246 } 247 248 BPF_CALL_3(bpf_probe_read_user_str, void *, dst, u32, size, 249 const void __user *, unsafe_ptr) 250 { 251 return bpf_probe_read_user_str_common(dst, size, unsafe_ptr); 252 } 253 254 const struct bpf_func_proto bpf_probe_read_user_str_proto = { 255 .func = bpf_probe_read_user_str, 256 .gpl_only = true, 257 .ret_type = RET_INTEGER, 258 .arg1_type = ARG_PTR_TO_UNINIT_MEM, 259 .arg2_type = ARG_CONST_SIZE_OR_ZERO, 260 .arg3_type = ARG_ANYTHING, 261 }; 262 263 BPF_CALL_3(bpf_probe_read_kernel, void *, dst, u32, size, 264 const void *, unsafe_ptr) 265 { 266 return bpf_probe_read_kernel_common(dst, size, unsafe_ptr); 267 } 268 269 const struct bpf_func_proto bpf_probe_read_kernel_proto = { 270 .func = bpf_probe_read_kernel, 271 .gpl_only = true, 272 .ret_type = RET_INTEGER, 273 .arg1_type = ARG_PTR_TO_UNINIT_MEM, 274 .arg2_type = ARG_CONST_SIZE_OR_ZERO, 275 .arg3_type = ARG_ANYTHING, 276 }; 277 278 static __always_inline int 279 bpf_probe_read_kernel_str_common(void *dst, u32 size, const void *unsafe_ptr) 280 { 281 int ret; 282 283 /* 284 * The strncpy_from_kernel_nofault() call will likely not fill the 285 * entire buffer, but that's okay in this circumstance as we're probing 286 * arbitrary memory anyway similar to bpf_probe_read_*() and might 287 * as well probe the stack. Thus, memory is explicitly cleared 288 * only in error case, so that improper users ignoring return 289 * code altogether don't copy garbage; otherwise length of string 290 * is returned that can be used for bpf_perf_event_output() et al. 291 */ 292 ret = strncpy_from_kernel_nofault(dst, unsafe_ptr, size); 293 if (unlikely(ret < 0)) 294 memset(dst, 0, size); 295 return ret; 296 } 297 298 BPF_CALL_3(bpf_probe_read_kernel_str, void *, dst, u32, size, 299 const void *, unsafe_ptr) 300 { 301 return bpf_probe_read_kernel_str_common(dst, size, unsafe_ptr); 302 } 303 304 const struct bpf_func_proto bpf_probe_read_kernel_str_proto = { 305 .func = bpf_probe_read_kernel_str, 306 .gpl_only = true, 307 .ret_type = RET_INTEGER, 308 .arg1_type = ARG_PTR_TO_UNINIT_MEM, 309 .arg2_type = ARG_CONST_SIZE_OR_ZERO, 310 .arg3_type = ARG_ANYTHING, 311 }; 312 313 #ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE 314 BPF_CALL_3(bpf_probe_read_compat, void *, dst, u32, size, 315 const void *, unsafe_ptr) 316 { 317 if ((unsigned long)unsafe_ptr < TASK_SIZE) { 318 return bpf_probe_read_user_common(dst, size, 319 (__force void __user *)unsafe_ptr); 320 } 321 return bpf_probe_read_kernel_common(dst, size, unsafe_ptr); 322 } 323 324 static const struct bpf_func_proto bpf_probe_read_compat_proto = { 325 .func = bpf_probe_read_compat, 326 .gpl_only = true, 327 .ret_type = RET_INTEGER, 328 .arg1_type = ARG_PTR_TO_UNINIT_MEM, 329 .arg2_type = ARG_CONST_SIZE_OR_ZERO, 330 .arg3_type = ARG_ANYTHING, 331 }; 332 333 BPF_CALL_3(bpf_probe_read_compat_str, void *, dst, u32, size, 334 const void *, unsafe_ptr) 335 { 336 if ((unsigned long)unsafe_ptr < TASK_SIZE) { 337 return bpf_probe_read_user_str_common(dst, size, 338 (__force void __user *)unsafe_ptr); 339 } 340 return bpf_probe_read_kernel_str_common(dst, size, unsafe_ptr); 341 } 342 343 static const struct bpf_func_proto bpf_probe_read_compat_str_proto = { 344 .func = bpf_probe_read_compat_str, 345 .gpl_only = true, 346 .ret_type = RET_INTEGER, 347 .arg1_type = ARG_PTR_TO_UNINIT_MEM, 348 .arg2_type = ARG_CONST_SIZE_OR_ZERO, 349 .arg3_type = ARG_ANYTHING, 350 }; 351 #endif /* CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE */ 352 353 BPF_CALL_3(bpf_probe_write_user, void __user *, unsafe_ptr, const void *, src, 354 u32, size) 355 { 356 /* 357 * Ensure we're in user context which is safe for the helper to 358 * run. This helper has no business in a kthread. 359 * 360 * access_ok() should prevent writing to non-user memory, but in 361 * some situations (nommu, temporary switch, etc) access_ok() does 362 * not provide enough validation, hence the check on KERNEL_DS. 363 * 364 * nmi_uaccess_okay() ensures the probe is not run in an interim 365 * state, when the task or mm are switched. This is specifically 366 * required to prevent the use of temporary mm. 367 */ 368 369 if (unlikely(in_interrupt() || 370 current->flags & (PF_KTHREAD | PF_EXITING))) 371 return -EPERM; 372 if (unlikely(!nmi_uaccess_okay())) 373 return -EPERM; 374 375 return copy_to_user_nofault(unsafe_ptr, src, size); 376 } 377 378 static const struct bpf_func_proto bpf_probe_write_user_proto = { 379 .func = bpf_probe_write_user, 380 .gpl_only = true, 381 .ret_type = RET_INTEGER, 382 .arg1_type = ARG_ANYTHING, 383 .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, 384 .arg3_type = ARG_CONST_SIZE, 385 }; 386 387 #define MAX_TRACE_PRINTK_VARARGS 3 388 #define BPF_TRACE_PRINTK_SIZE 1024 389 390 BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1, 391 u64, arg2, u64, arg3) 392 { 393 u64 args[MAX_TRACE_PRINTK_VARARGS] = { arg1, arg2, arg3 }; 394 struct bpf_bprintf_data data = { 395 .get_bin_args = true, 396 .get_buf = true, 397 }; 398 int ret; 399 400 ret = bpf_bprintf_prepare(fmt, fmt_size, args, 401 MAX_TRACE_PRINTK_VARARGS, &data); 402 if (ret < 0) 403 return ret; 404 405 ret = bstr_printf(data.buf, MAX_BPRINTF_BUF, fmt, data.bin_args); 406 407 trace_bpf_trace_printk(data.buf); 408 409 bpf_bprintf_cleanup(&data); 410 411 return ret; 412 } 413 414 static const struct bpf_func_proto bpf_trace_printk_proto = { 415 .func = bpf_trace_printk, 416 .gpl_only = true, 417 .ret_type = RET_INTEGER, 418 .arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY, 419 .arg2_type = ARG_CONST_SIZE, 420 }; 421 422 static void __set_printk_clr_event(struct work_struct *work) 423 { 424 /* 425 * This program might be calling bpf_trace_printk, 426 * so enable the associated bpf_trace/bpf_trace_printk event. 427 * Repeat this each time as it is possible a user has 428 * disabled bpf_trace_printk events. By loading a program 429 * calling bpf_trace_printk() however the user has expressed 430 * the intent to see such events. 431 */ 432 if (trace_set_clr_event("bpf_trace", "bpf_trace_printk", 1)) 433 pr_warn_ratelimited("could not enable bpf_trace_printk events"); 434 } 435 static DECLARE_WORK(set_printk_work, __set_printk_clr_event); 436 437 const struct bpf_func_proto *bpf_get_trace_printk_proto(void) 438 { 439 schedule_work(&set_printk_work); 440 return &bpf_trace_printk_proto; 441 } 442 443 BPF_CALL_4(bpf_trace_vprintk, char *, fmt, u32, fmt_size, const void *, args, 444 u32, data_len) 445 { 446 struct bpf_bprintf_data data = { 447 .get_bin_args = true, 448 .get_buf = true, 449 }; 450 int ret, num_args; 451 452 if (data_len & 7 || data_len > MAX_BPRINTF_VARARGS * 8 || 453 (data_len && !args)) 454 return -EINVAL; 455 num_args = data_len / 8; 456 457 ret = bpf_bprintf_prepare(fmt, fmt_size, args, num_args, &data); 458 if (ret < 0) 459 return ret; 460 461 ret = bstr_printf(data.buf, MAX_BPRINTF_BUF, fmt, data.bin_args); 462 463 trace_bpf_trace_printk(data.buf); 464 465 bpf_bprintf_cleanup(&data); 466 467 return ret; 468 } 469 470 static const struct bpf_func_proto bpf_trace_vprintk_proto = { 471 .func = bpf_trace_vprintk, 472 .gpl_only = true, 473 .ret_type = RET_INTEGER, 474 .arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY, 475 .arg2_type = ARG_CONST_SIZE, 476 .arg3_type = ARG_PTR_TO_MEM | PTR_MAYBE_NULL | MEM_RDONLY, 477 .arg4_type = ARG_CONST_SIZE_OR_ZERO, 478 }; 479 480 const struct bpf_func_proto *bpf_get_trace_vprintk_proto(void) 481 { 482 schedule_work(&set_printk_work); 483 return &bpf_trace_vprintk_proto; 484 } 485 486 BPF_CALL_5(bpf_seq_printf, struct seq_file *, m, char *, fmt, u32, fmt_size, 487 const void *, args, u32, data_len) 488 { 489 struct bpf_bprintf_data data = { 490 .get_bin_args = true, 491 }; 492 int err, num_args; 493 494 if (data_len & 7 || data_len > MAX_BPRINTF_VARARGS * 8 || 495 (data_len && !args)) 496 return -EINVAL; 497 num_args = data_len / 8; 498 499 err = bpf_bprintf_prepare(fmt, fmt_size, args, num_args, &data); 500 if (err < 0) 501 return err; 502 503 seq_bprintf(m, fmt, data.bin_args); 504 505 bpf_bprintf_cleanup(&data); 506 507 return seq_has_overflowed(m) ? -EOVERFLOW : 0; 508 } 509 510 BTF_ID_LIST_SINGLE(btf_seq_file_ids, struct, seq_file) 511 512 static const struct bpf_func_proto bpf_seq_printf_proto = { 513 .func = bpf_seq_printf, 514 .gpl_only = true, 515 .ret_type = RET_INTEGER, 516 .arg1_type = ARG_PTR_TO_BTF_ID, 517 .arg1_btf_id = &btf_seq_file_ids[0], 518 .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, 519 .arg3_type = ARG_CONST_SIZE, 520 .arg4_type = ARG_PTR_TO_MEM | PTR_MAYBE_NULL | MEM_RDONLY, 521 .arg5_type = ARG_CONST_SIZE_OR_ZERO, 522 }; 523 524 BPF_CALL_3(bpf_seq_write, struct seq_file *, m, const void *, data, u32, len) 525 { 526 return seq_write(m, data, len) ? -EOVERFLOW : 0; 527 } 528 529 static const struct bpf_func_proto bpf_seq_write_proto = { 530 .func = bpf_seq_write, 531 .gpl_only = true, 532 .ret_type = RET_INTEGER, 533 .arg1_type = ARG_PTR_TO_BTF_ID, 534 .arg1_btf_id = &btf_seq_file_ids[0], 535 .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, 536 .arg3_type = ARG_CONST_SIZE_OR_ZERO, 537 }; 538 539 BPF_CALL_4(bpf_seq_printf_btf, struct seq_file *, m, struct btf_ptr *, ptr, 540 u32, btf_ptr_size, u64, flags) 541 { 542 const struct btf *btf; 543 s32 btf_id; 544 int ret; 545 546 ret = bpf_btf_printf_prepare(ptr, btf_ptr_size, flags, &btf, &btf_id); 547 if (ret) 548 return ret; 549 550 return btf_type_seq_show_flags(btf, btf_id, ptr->ptr, m, flags); 551 } 552 553 static const struct bpf_func_proto bpf_seq_printf_btf_proto = { 554 .func = bpf_seq_printf_btf, 555 .gpl_only = true, 556 .ret_type = RET_INTEGER, 557 .arg1_type = ARG_PTR_TO_BTF_ID, 558 .arg1_btf_id = &btf_seq_file_ids[0], 559 .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, 560 .arg3_type = ARG_CONST_SIZE_OR_ZERO, 561 .arg4_type = ARG_ANYTHING, 562 }; 563 564 static __always_inline int 565 get_map_perf_counter(struct bpf_map *map, u64 flags, 566 u64 *value, u64 *enabled, u64 *running) 567 { 568 struct bpf_array *array = container_of(map, struct bpf_array, map); 569 unsigned int cpu = smp_processor_id(); 570 u64 index = flags & BPF_F_INDEX_MASK; 571 struct bpf_event_entry *ee; 572 573 if (unlikely(flags & ~(BPF_F_INDEX_MASK))) 574 return -EINVAL; 575 if (index == BPF_F_CURRENT_CPU) 576 index = cpu; 577 if (unlikely(index >= array->map.max_entries)) 578 return -E2BIG; 579 580 ee = READ_ONCE(array->ptrs[index]); 581 if (!ee) 582 return -ENOENT; 583 584 return perf_event_read_local(ee->event, value, enabled, running); 585 } 586 587 BPF_CALL_2(bpf_perf_event_read, struct bpf_map *, map, u64, flags) 588 { 589 u64 value = 0; 590 int err; 591 592 err = get_map_perf_counter(map, flags, &value, NULL, NULL); 593 /* 594 * this api is ugly since we miss [-22..-2] range of valid 595 * counter values, but that's uapi 596 */ 597 if (err) 598 return err; 599 return value; 600 } 601 602 const struct bpf_func_proto bpf_perf_event_read_proto = { 603 .func = bpf_perf_event_read, 604 .gpl_only = true, 605 .ret_type = RET_INTEGER, 606 .arg1_type = ARG_CONST_MAP_PTR, 607 .arg2_type = ARG_ANYTHING, 608 }; 609 610 BPF_CALL_4(bpf_perf_event_read_value, struct bpf_map *, map, u64, flags, 611 struct bpf_perf_event_value *, buf, u32, size) 612 { 613 int err = -EINVAL; 614 615 if (unlikely(size != sizeof(struct bpf_perf_event_value))) 616 goto clear; 617 err = get_map_perf_counter(map, flags, &buf->counter, &buf->enabled, 618 &buf->running); 619 if (unlikely(err)) 620 goto clear; 621 return 0; 622 clear: 623 memset(buf, 0, size); 624 return err; 625 } 626 627 static const struct bpf_func_proto bpf_perf_event_read_value_proto = { 628 .func = bpf_perf_event_read_value, 629 .gpl_only = true, 630 .ret_type = RET_INTEGER, 631 .arg1_type = ARG_CONST_MAP_PTR, 632 .arg2_type = ARG_ANYTHING, 633 .arg3_type = ARG_PTR_TO_UNINIT_MEM, 634 .arg4_type = ARG_CONST_SIZE, 635 }; 636 637 const struct bpf_func_proto *bpf_get_perf_event_read_value_proto(void) 638 { 639 return &bpf_perf_event_read_value_proto; 640 } 641 642 static __always_inline u64 643 __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map, 644 u64 flags, struct perf_raw_record *raw, 645 struct perf_sample_data *sd) 646 { 647 struct bpf_array *array = container_of(map, struct bpf_array, map); 648 unsigned int cpu = smp_processor_id(); 649 u64 index = flags & BPF_F_INDEX_MASK; 650 struct bpf_event_entry *ee; 651 struct perf_event *event; 652 653 if (index == BPF_F_CURRENT_CPU) 654 index = cpu; 655 if (unlikely(index >= array->map.max_entries)) 656 return -E2BIG; 657 658 ee = READ_ONCE(array->ptrs[index]); 659 if (!ee) 660 return -ENOENT; 661 662 event = ee->event; 663 if (unlikely(event->attr.type != PERF_TYPE_SOFTWARE || 664 event->attr.config != PERF_COUNT_SW_BPF_OUTPUT)) 665 return -EINVAL; 666 667 if (unlikely(event->oncpu != cpu)) 668 return -EOPNOTSUPP; 669 670 perf_sample_save_raw_data(sd, event, raw); 671 672 return perf_event_output(event, sd, regs); 673 } 674 675 /* 676 * Support executing tracepoints in normal, irq, and nmi context that each call 677 * bpf_perf_event_output 678 */ 679 struct bpf_trace_sample_data { 680 struct perf_sample_data sds[3]; 681 }; 682 683 static DEFINE_PER_CPU(struct bpf_trace_sample_data, bpf_trace_sds); 684 static DEFINE_PER_CPU(int, bpf_trace_nest_level); 685 BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map, 686 u64, flags, void *, data, u64, size) 687 { 688 struct bpf_trace_sample_data *sds; 689 struct perf_raw_record raw = { 690 .frag = { 691 .size = size, 692 .data = data, 693 }, 694 }; 695 struct perf_sample_data *sd; 696 int nest_level, err; 697 698 preempt_disable(); 699 sds = this_cpu_ptr(&bpf_trace_sds); 700 nest_level = this_cpu_inc_return(bpf_trace_nest_level); 701 702 if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(sds->sds))) { 703 err = -EBUSY; 704 goto out; 705 } 706 707 sd = &sds->sds[nest_level - 1]; 708 709 if (unlikely(flags & ~(BPF_F_INDEX_MASK))) { 710 err = -EINVAL; 711 goto out; 712 } 713 714 perf_sample_data_init(sd, 0, 0); 715 716 err = __bpf_perf_event_output(regs, map, flags, &raw, sd); 717 out: 718 this_cpu_dec(bpf_trace_nest_level); 719 preempt_enable(); 720 return err; 721 } 722 723 static const struct bpf_func_proto bpf_perf_event_output_proto = { 724 .func = bpf_perf_event_output, 725 .gpl_only = true, 726 .ret_type = RET_INTEGER, 727 .arg1_type = ARG_PTR_TO_CTX, 728 .arg2_type = ARG_CONST_MAP_PTR, 729 .arg3_type = ARG_ANYTHING, 730 .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY, 731 .arg5_type = ARG_CONST_SIZE_OR_ZERO, 732 }; 733 734 static DEFINE_PER_CPU(int, bpf_event_output_nest_level); 735 struct bpf_nested_pt_regs { 736 struct pt_regs regs[3]; 737 }; 738 static DEFINE_PER_CPU(struct bpf_nested_pt_regs, bpf_pt_regs); 739 static DEFINE_PER_CPU(struct bpf_trace_sample_data, bpf_misc_sds); 740 741 u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, 742 void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy) 743 { 744 struct perf_raw_frag frag = { 745 .copy = ctx_copy, 746 .size = ctx_size, 747 .data = ctx, 748 }; 749 struct perf_raw_record raw = { 750 .frag = { 751 { 752 .next = ctx_size ? &frag : NULL, 753 }, 754 .size = meta_size, 755 .data = meta, 756 }, 757 }; 758 struct perf_sample_data *sd; 759 struct pt_regs *regs; 760 int nest_level; 761 u64 ret; 762 763 preempt_disable(); 764 nest_level = this_cpu_inc_return(bpf_event_output_nest_level); 765 766 if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(bpf_misc_sds.sds))) { 767 ret = -EBUSY; 768 goto out; 769 } 770 sd = this_cpu_ptr(&bpf_misc_sds.sds[nest_level - 1]); 771 regs = this_cpu_ptr(&bpf_pt_regs.regs[nest_level - 1]); 772 773 perf_fetch_caller_regs(regs); 774 perf_sample_data_init(sd, 0, 0); 775 776 ret = __bpf_perf_event_output(regs, map, flags, &raw, sd); 777 out: 778 this_cpu_dec(bpf_event_output_nest_level); 779 preempt_enable(); 780 return ret; 781 } 782 783 BPF_CALL_0(bpf_get_current_task) 784 { 785 return (long) current; 786 } 787 788 const struct bpf_func_proto bpf_get_current_task_proto = { 789 .func = bpf_get_current_task, 790 .gpl_only = true, 791 .ret_type = RET_INTEGER, 792 }; 793 794 BPF_CALL_0(bpf_get_current_task_btf) 795 { 796 return (unsigned long) current; 797 } 798 799 const struct bpf_func_proto bpf_get_current_task_btf_proto = { 800 .func = bpf_get_current_task_btf, 801 .gpl_only = true, 802 .ret_type = RET_PTR_TO_BTF_ID_TRUSTED, 803 .ret_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK], 804 }; 805 806 BPF_CALL_1(bpf_task_pt_regs, struct task_struct *, task) 807 { 808 return (unsigned long) task_pt_regs(task); 809 } 810 811 BTF_ID_LIST_SINGLE(bpf_task_pt_regs_ids, struct, pt_regs) 812 813 const struct bpf_func_proto bpf_task_pt_regs_proto = { 814 .func = bpf_task_pt_regs, 815 .gpl_only = true, 816 .arg1_type = ARG_PTR_TO_BTF_ID, 817 .arg1_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK], 818 .ret_type = RET_PTR_TO_BTF_ID, 819 .ret_btf_id = &bpf_task_pt_regs_ids[0], 820 }; 821 822 struct send_signal_irq_work { 823 struct irq_work irq_work; 824 struct task_struct *task; 825 u32 sig; 826 enum pid_type type; 827 bool has_siginfo; 828 struct kernel_siginfo info; 829 }; 830 831 static DEFINE_PER_CPU(struct send_signal_irq_work, send_signal_work); 832 833 static void do_bpf_send_signal(struct irq_work *entry) 834 { 835 struct send_signal_irq_work *work; 836 struct kernel_siginfo *siginfo; 837 838 work = container_of(entry, struct send_signal_irq_work, irq_work); 839 siginfo = work->has_siginfo ? &work->info : SEND_SIG_PRIV; 840 841 group_send_sig_info(work->sig, siginfo, work->task, work->type); 842 put_task_struct(work->task); 843 } 844 845 static int bpf_send_signal_common(u32 sig, enum pid_type type, struct task_struct *task, u64 value) 846 { 847 struct send_signal_irq_work *work = NULL; 848 struct kernel_siginfo info; 849 struct kernel_siginfo *siginfo; 850 851 if (!task) { 852 task = current; 853 siginfo = SEND_SIG_PRIV; 854 } else { 855 clear_siginfo(&info); 856 info.si_signo = sig; 857 info.si_errno = 0; 858 info.si_code = SI_KERNEL; 859 info.si_pid = 0; 860 info.si_uid = 0; 861 info.si_value.sival_ptr = (void __user __force *)(unsigned long)value; 862 siginfo = &info; 863 } 864 865 /* Similar to bpf_probe_write_user, task needs to be 866 * in a sound condition and kernel memory access be 867 * permitted in order to send signal to the current 868 * task. 869 */ 870 if (unlikely(task->flags & (PF_KTHREAD | PF_EXITING))) 871 return -EPERM; 872 if (unlikely(!nmi_uaccess_okay())) 873 return -EPERM; 874 /* Task should not be pid=1 to avoid kernel panic. */ 875 if (unlikely(is_global_init(task))) 876 return -EPERM; 877 878 if (preempt_count() != 0 || irqs_disabled()) { 879 /* Do an early check on signal validity. Otherwise, 880 * the error is lost in deferred irq_work. 881 */ 882 if (unlikely(!valid_signal(sig))) 883 return -EINVAL; 884 885 work = this_cpu_ptr(&send_signal_work); 886 if (irq_work_is_busy(&work->irq_work)) 887 return -EBUSY; 888 889 /* Add the current task, which is the target of sending signal, 890 * to the irq_work. The current task may change when queued 891 * irq works get executed. 892 */ 893 work->task = get_task_struct(task); 894 work->has_siginfo = siginfo == &info; 895 if (work->has_siginfo) 896 copy_siginfo(&work->info, &info); 897 work->sig = sig; 898 work->type = type; 899 irq_work_queue(&work->irq_work); 900 return 0; 901 } 902 903 return group_send_sig_info(sig, siginfo, task, type); 904 } 905 906 BPF_CALL_1(bpf_send_signal, u32, sig) 907 { 908 return bpf_send_signal_common(sig, PIDTYPE_TGID, NULL, 0); 909 } 910 911 const struct bpf_func_proto bpf_send_signal_proto = { 912 .func = bpf_send_signal, 913 .gpl_only = false, 914 .ret_type = RET_INTEGER, 915 .arg1_type = ARG_ANYTHING, 916 }; 917 918 BPF_CALL_1(bpf_send_signal_thread, u32, sig) 919 { 920 return bpf_send_signal_common(sig, PIDTYPE_PID, NULL, 0); 921 } 922 923 const struct bpf_func_proto bpf_send_signal_thread_proto = { 924 .func = bpf_send_signal_thread, 925 .gpl_only = false, 926 .ret_type = RET_INTEGER, 927 .arg1_type = ARG_ANYTHING, 928 }; 929 930 BPF_CALL_3(bpf_d_path, const struct path *, path, char *, buf, u32, sz) 931 { 932 struct path copy; 933 long len; 934 char *p; 935 936 if (!sz) 937 return 0; 938 939 /* 940 * The path pointer is verified as trusted and safe to use, 941 * but let's double check it's valid anyway to workaround 942 * potentially broken verifier. 943 */ 944 len = copy_from_kernel_nofault(©, path, sizeof(*path)); 945 if (len < 0) 946 return len; 947 948 p = d_path(©, buf, sz); 949 if (IS_ERR(p)) { 950 len = PTR_ERR(p); 951 } else { 952 len = buf + sz - p; 953 memmove(buf, p, len); 954 } 955 956 return len; 957 } 958 959 BTF_SET_START(btf_allowlist_d_path) 960 #ifdef CONFIG_SECURITY 961 BTF_ID(func, security_file_permission) 962 BTF_ID(func, security_inode_getattr) 963 BTF_ID(func, security_file_open) 964 #endif 965 #ifdef CONFIG_SECURITY_PATH 966 BTF_ID(func, security_path_truncate) 967 #endif 968 BTF_ID(func, vfs_truncate) 969 BTF_ID(func, vfs_fallocate) 970 BTF_ID(func, dentry_open) 971 BTF_ID(func, vfs_getattr) 972 BTF_ID(func, filp_close) 973 BTF_SET_END(btf_allowlist_d_path) 974 975 static bool bpf_d_path_allowed(const struct bpf_prog *prog) 976 { 977 if (prog->type == BPF_PROG_TYPE_TRACING && 978 prog->expected_attach_type == BPF_TRACE_ITER) 979 return true; 980 981 if (prog->type == BPF_PROG_TYPE_LSM) 982 return bpf_lsm_is_sleepable_hook(prog->aux->attach_btf_id); 983 984 return btf_id_set_contains(&btf_allowlist_d_path, 985 prog->aux->attach_btf_id); 986 } 987 988 BTF_ID_LIST_SINGLE(bpf_d_path_btf_ids, struct, path) 989 990 static const struct bpf_func_proto bpf_d_path_proto = { 991 .func = bpf_d_path, 992 .gpl_only = false, 993 .ret_type = RET_INTEGER, 994 .arg1_type = ARG_PTR_TO_BTF_ID, 995 .arg1_btf_id = &bpf_d_path_btf_ids[0], 996 .arg2_type = ARG_PTR_TO_MEM | MEM_WRITE, 997 .arg3_type = ARG_CONST_SIZE_OR_ZERO, 998 .allowed = bpf_d_path_allowed, 999 }; 1000 1001 #define BTF_F_ALL (BTF_F_COMPACT | BTF_F_NONAME | \ 1002 BTF_F_PTR_RAW | BTF_F_ZERO) 1003 1004 static int bpf_btf_printf_prepare(struct btf_ptr *ptr, u32 btf_ptr_size, 1005 u64 flags, const struct btf **btf, 1006 s32 *btf_id) 1007 { 1008 const struct btf_type *t; 1009 1010 if (unlikely(flags & ~(BTF_F_ALL))) 1011 return -EINVAL; 1012 1013 if (btf_ptr_size != sizeof(struct btf_ptr)) 1014 return -EINVAL; 1015 1016 *btf = bpf_get_btf_vmlinux(); 1017 1018 if (IS_ERR_OR_NULL(*btf)) 1019 return IS_ERR(*btf) ? PTR_ERR(*btf) : -EINVAL; 1020 1021 if (ptr->type_id > 0) 1022 *btf_id = ptr->type_id; 1023 else 1024 return -EINVAL; 1025 1026 if (*btf_id > 0) 1027 t = btf_type_by_id(*btf, *btf_id); 1028 if (*btf_id <= 0 || !t) 1029 return -ENOENT; 1030 1031 return 0; 1032 } 1033 1034 BPF_CALL_5(bpf_snprintf_btf, char *, str, u32, str_size, struct btf_ptr *, ptr, 1035 u32, btf_ptr_size, u64, flags) 1036 { 1037 const struct btf *btf; 1038 s32 btf_id; 1039 int ret; 1040 1041 ret = bpf_btf_printf_prepare(ptr, btf_ptr_size, flags, &btf, &btf_id); 1042 if (ret) 1043 return ret; 1044 1045 return btf_type_snprintf_show(btf, btf_id, ptr->ptr, str, str_size, 1046 flags); 1047 } 1048 1049 const struct bpf_func_proto bpf_snprintf_btf_proto = { 1050 .func = bpf_snprintf_btf, 1051 .gpl_only = false, 1052 .ret_type = RET_INTEGER, 1053 .arg1_type = ARG_PTR_TO_MEM | MEM_WRITE, 1054 .arg2_type = ARG_CONST_SIZE, 1055 .arg3_type = ARG_PTR_TO_MEM | MEM_RDONLY, 1056 .arg4_type = ARG_CONST_SIZE, 1057 .arg5_type = ARG_ANYTHING, 1058 }; 1059 1060 BPF_CALL_1(bpf_get_func_ip_tracing, void *, ctx) 1061 { 1062 /* This helper call is inlined by verifier. */ 1063 return ((u64 *)ctx)[-2]; 1064 } 1065 1066 static const struct bpf_func_proto bpf_get_func_ip_proto_tracing = { 1067 .func = bpf_get_func_ip_tracing, 1068 .gpl_only = true, 1069 .ret_type = RET_INTEGER, 1070 .arg1_type = ARG_PTR_TO_CTX, 1071 }; 1072 1073 static inline unsigned long get_entry_ip(unsigned long fentry_ip) 1074 { 1075 #ifdef CONFIG_X86_KERNEL_IBT 1076 if (is_endbr((void *)(fentry_ip - ENDBR_INSN_SIZE))) 1077 fentry_ip -= ENDBR_INSN_SIZE; 1078 #endif 1079 return fentry_ip; 1080 } 1081 1082 BPF_CALL_1(bpf_get_func_ip_kprobe, struct pt_regs *, regs) 1083 { 1084 struct bpf_trace_run_ctx *run_ctx __maybe_unused; 1085 struct kprobe *kp; 1086 1087 #ifdef CONFIG_UPROBES 1088 run_ctx = container_of(current->bpf_ctx, struct bpf_trace_run_ctx, run_ctx); 1089 if (run_ctx->is_uprobe) 1090 return ((struct uprobe_dispatch_data *)current->utask->vaddr)->bp_addr; 1091 #endif 1092 1093 kp = kprobe_running(); 1094 1095 if (!kp || !(kp->flags & KPROBE_FLAG_ON_FUNC_ENTRY)) 1096 return 0; 1097 1098 return get_entry_ip((uintptr_t)kp->addr); 1099 } 1100 1101 static const struct bpf_func_proto bpf_get_func_ip_proto_kprobe = { 1102 .func = bpf_get_func_ip_kprobe, 1103 .gpl_only = true, 1104 .ret_type = RET_INTEGER, 1105 .arg1_type = ARG_PTR_TO_CTX, 1106 }; 1107 1108 BPF_CALL_1(bpf_get_func_ip_kprobe_multi, struct pt_regs *, regs) 1109 { 1110 return bpf_kprobe_multi_entry_ip(current->bpf_ctx); 1111 } 1112 1113 static const struct bpf_func_proto bpf_get_func_ip_proto_kprobe_multi = { 1114 .func = bpf_get_func_ip_kprobe_multi, 1115 .gpl_only = false, 1116 .ret_type = RET_INTEGER, 1117 .arg1_type = ARG_PTR_TO_CTX, 1118 }; 1119 1120 BPF_CALL_1(bpf_get_attach_cookie_kprobe_multi, struct pt_regs *, regs) 1121 { 1122 return bpf_kprobe_multi_cookie(current->bpf_ctx); 1123 } 1124 1125 static const struct bpf_func_proto bpf_get_attach_cookie_proto_kmulti = { 1126 .func = bpf_get_attach_cookie_kprobe_multi, 1127 .gpl_only = false, 1128 .ret_type = RET_INTEGER, 1129 .arg1_type = ARG_PTR_TO_CTX, 1130 }; 1131 1132 BPF_CALL_1(bpf_get_func_ip_uprobe_multi, struct pt_regs *, regs) 1133 { 1134 return bpf_uprobe_multi_entry_ip(current->bpf_ctx); 1135 } 1136 1137 static const struct bpf_func_proto bpf_get_func_ip_proto_uprobe_multi = { 1138 .func = bpf_get_func_ip_uprobe_multi, 1139 .gpl_only = false, 1140 .ret_type = RET_INTEGER, 1141 .arg1_type = ARG_PTR_TO_CTX, 1142 }; 1143 1144 BPF_CALL_1(bpf_get_attach_cookie_uprobe_multi, struct pt_regs *, regs) 1145 { 1146 return bpf_uprobe_multi_cookie(current->bpf_ctx); 1147 } 1148 1149 static const struct bpf_func_proto bpf_get_attach_cookie_proto_umulti = { 1150 .func = bpf_get_attach_cookie_uprobe_multi, 1151 .gpl_only = false, 1152 .ret_type = RET_INTEGER, 1153 .arg1_type = ARG_PTR_TO_CTX, 1154 }; 1155 1156 BPF_CALL_1(bpf_get_attach_cookie_trace, void *, ctx) 1157 { 1158 struct bpf_trace_run_ctx *run_ctx; 1159 1160 run_ctx = container_of(current->bpf_ctx, struct bpf_trace_run_ctx, run_ctx); 1161 return run_ctx->bpf_cookie; 1162 } 1163 1164 static const struct bpf_func_proto bpf_get_attach_cookie_proto_trace = { 1165 .func = bpf_get_attach_cookie_trace, 1166 .gpl_only = false, 1167 .ret_type = RET_INTEGER, 1168 .arg1_type = ARG_PTR_TO_CTX, 1169 }; 1170 1171 BPF_CALL_1(bpf_get_attach_cookie_pe, struct bpf_perf_event_data_kern *, ctx) 1172 { 1173 return ctx->event->bpf_cookie; 1174 } 1175 1176 static const struct bpf_func_proto bpf_get_attach_cookie_proto_pe = { 1177 .func = bpf_get_attach_cookie_pe, 1178 .gpl_only = false, 1179 .ret_type = RET_INTEGER, 1180 .arg1_type = ARG_PTR_TO_CTX, 1181 }; 1182 1183 BPF_CALL_1(bpf_get_attach_cookie_tracing, void *, ctx) 1184 { 1185 struct bpf_trace_run_ctx *run_ctx; 1186 1187 run_ctx = container_of(current->bpf_ctx, struct bpf_trace_run_ctx, run_ctx); 1188 return run_ctx->bpf_cookie; 1189 } 1190 1191 static const struct bpf_func_proto bpf_get_attach_cookie_proto_tracing = { 1192 .func = bpf_get_attach_cookie_tracing, 1193 .gpl_only = false, 1194 .ret_type = RET_INTEGER, 1195 .arg1_type = ARG_PTR_TO_CTX, 1196 }; 1197 1198 BPF_CALL_3(bpf_get_branch_snapshot, void *, buf, u32, size, u64, flags) 1199 { 1200 static const u32 br_entry_size = sizeof(struct perf_branch_entry); 1201 u32 entry_cnt = size / br_entry_size; 1202 1203 entry_cnt = static_call(perf_snapshot_branch_stack)(buf, entry_cnt); 1204 1205 if (unlikely(flags)) 1206 return -EINVAL; 1207 1208 if (!entry_cnt) 1209 return -ENOENT; 1210 1211 return entry_cnt * br_entry_size; 1212 } 1213 1214 const struct bpf_func_proto bpf_get_branch_snapshot_proto = { 1215 .func = bpf_get_branch_snapshot, 1216 .gpl_only = true, 1217 .ret_type = RET_INTEGER, 1218 .arg1_type = ARG_PTR_TO_UNINIT_MEM, 1219 .arg2_type = ARG_CONST_SIZE_OR_ZERO, 1220 }; 1221 1222 BPF_CALL_3(get_func_arg, void *, ctx, u32, n, u64 *, value) 1223 { 1224 /* This helper call is inlined by verifier. */ 1225 u64 nr_args = ((u64 *)ctx)[-1] & 0xFF; 1226 1227 if ((u64) n >= nr_args) 1228 return -EINVAL; 1229 *value = ((u64 *)ctx)[n]; 1230 return 0; 1231 } 1232 1233 static const struct bpf_func_proto bpf_get_func_arg_proto = { 1234 .func = get_func_arg, 1235 .ret_type = RET_INTEGER, 1236 .arg1_type = ARG_PTR_TO_CTX, 1237 .arg2_type = ARG_ANYTHING, 1238 .arg3_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_WRITE | MEM_ALIGNED, 1239 .arg3_size = sizeof(u64), 1240 }; 1241 1242 BPF_CALL_2(get_func_ret, void *, ctx, u64 *, value) 1243 { 1244 /* This helper call is inlined by verifier. */ 1245 u64 nr_args = ((u64 *)ctx)[-1] & 0xFF; 1246 1247 *value = ((u64 *)ctx)[nr_args]; 1248 return 0; 1249 } 1250 1251 static const struct bpf_func_proto bpf_get_func_ret_proto = { 1252 .func = get_func_ret, 1253 .ret_type = RET_INTEGER, 1254 .arg1_type = ARG_PTR_TO_CTX, 1255 .arg2_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_WRITE | MEM_ALIGNED, 1256 .arg2_size = sizeof(u64), 1257 }; 1258 1259 BPF_CALL_1(get_func_arg_cnt, void *, ctx) 1260 { 1261 /* This helper call is inlined by verifier. */ 1262 return ((u64 *)ctx)[-1] & 0xFF; 1263 } 1264 1265 static const struct bpf_func_proto bpf_get_func_arg_cnt_proto = { 1266 .func = get_func_arg_cnt, 1267 .ret_type = RET_INTEGER, 1268 .arg1_type = ARG_PTR_TO_CTX, 1269 }; 1270 1271 static const struct bpf_func_proto * 1272 bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 1273 { 1274 const struct bpf_func_proto *func_proto; 1275 1276 switch (func_id) { 1277 case BPF_FUNC_get_smp_processor_id: 1278 return &bpf_get_smp_processor_id_proto; 1279 #ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE 1280 case BPF_FUNC_probe_read: 1281 return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ? 1282 NULL : &bpf_probe_read_compat_proto; 1283 case BPF_FUNC_probe_read_str: 1284 return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ? 1285 NULL : &bpf_probe_read_compat_str_proto; 1286 #endif 1287 case BPF_FUNC_get_func_ip: 1288 return &bpf_get_func_ip_proto_tracing; 1289 default: 1290 break; 1291 } 1292 1293 func_proto = bpf_base_func_proto(func_id, prog); 1294 if (func_proto) 1295 return func_proto; 1296 1297 if (!bpf_token_capable(prog->aux->token, CAP_SYS_ADMIN)) 1298 return NULL; 1299 1300 switch (func_id) { 1301 case BPF_FUNC_probe_write_user: 1302 return security_locked_down(LOCKDOWN_BPF_WRITE_USER) < 0 ? 1303 NULL : &bpf_probe_write_user_proto; 1304 default: 1305 return NULL; 1306 } 1307 } 1308 1309 static bool is_kprobe_multi(const struct bpf_prog *prog) 1310 { 1311 return prog->expected_attach_type == BPF_TRACE_KPROBE_MULTI || 1312 prog->expected_attach_type == BPF_TRACE_KPROBE_SESSION; 1313 } 1314 1315 static inline bool is_kprobe_session(const struct bpf_prog *prog) 1316 { 1317 return prog->type == BPF_PROG_TYPE_KPROBE && 1318 prog->expected_attach_type == BPF_TRACE_KPROBE_SESSION; 1319 } 1320 1321 static inline bool is_uprobe_multi(const struct bpf_prog *prog) 1322 { 1323 return prog->expected_attach_type == BPF_TRACE_UPROBE_MULTI || 1324 prog->expected_attach_type == BPF_TRACE_UPROBE_SESSION; 1325 } 1326 1327 static inline bool is_uprobe_session(const struct bpf_prog *prog) 1328 { 1329 return prog->type == BPF_PROG_TYPE_KPROBE && 1330 prog->expected_attach_type == BPF_TRACE_UPROBE_SESSION; 1331 } 1332 1333 static inline bool is_trace_fsession(const struct bpf_prog *prog) 1334 { 1335 return prog->type == BPF_PROG_TYPE_TRACING && 1336 prog->expected_attach_type == BPF_TRACE_FSESSION; 1337 } 1338 1339 static const struct bpf_func_proto * 1340 kprobe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 1341 { 1342 switch (func_id) { 1343 case BPF_FUNC_perf_event_output: 1344 return &bpf_perf_event_output_proto; 1345 case BPF_FUNC_get_stackid: 1346 return &bpf_get_stackid_proto; 1347 case BPF_FUNC_get_stack: 1348 return prog->sleepable ? &bpf_get_stack_sleepable_proto : &bpf_get_stack_proto; 1349 #ifdef CONFIG_BPF_KPROBE_OVERRIDE 1350 case BPF_FUNC_override_return: 1351 return &bpf_override_return_proto; 1352 #endif 1353 case BPF_FUNC_get_func_ip: 1354 if (is_kprobe_multi(prog)) 1355 return &bpf_get_func_ip_proto_kprobe_multi; 1356 if (is_uprobe_multi(prog)) 1357 return &bpf_get_func_ip_proto_uprobe_multi; 1358 return &bpf_get_func_ip_proto_kprobe; 1359 case BPF_FUNC_get_attach_cookie: 1360 if (is_kprobe_multi(prog)) 1361 return &bpf_get_attach_cookie_proto_kmulti; 1362 if (is_uprobe_multi(prog)) 1363 return &bpf_get_attach_cookie_proto_umulti; 1364 return &bpf_get_attach_cookie_proto_trace; 1365 default: 1366 return bpf_tracing_func_proto(func_id, prog); 1367 } 1368 } 1369 1370 /* bpf+kprobe programs can access fields of 'struct pt_regs' */ 1371 static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type type, 1372 const struct bpf_prog *prog, 1373 struct bpf_insn_access_aux *info) 1374 { 1375 if (off < 0 || off >= sizeof(struct pt_regs)) 1376 return false; 1377 if (off % size != 0) 1378 return false; 1379 /* 1380 * Assertion for 32 bit to make sure last 8 byte access 1381 * (BPF_DW) to the last 4 byte member is disallowed. 1382 */ 1383 if (off + size > sizeof(struct pt_regs)) 1384 return false; 1385 1386 if (type == BPF_WRITE) 1387 prog->aux->kprobe_write_ctx = true; 1388 1389 return true; 1390 } 1391 1392 const struct bpf_verifier_ops kprobe_verifier_ops = { 1393 .get_func_proto = kprobe_prog_func_proto, 1394 .is_valid_access = kprobe_prog_is_valid_access, 1395 }; 1396 1397 const struct bpf_prog_ops kprobe_prog_ops = { 1398 }; 1399 1400 BPF_CALL_5(bpf_perf_event_output_tp, void *, tp_buff, struct bpf_map *, map, 1401 u64, flags, void *, data, u64, size) 1402 { 1403 struct pt_regs *regs = *(struct pt_regs **)tp_buff; 1404 1405 /* 1406 * r1 points to perf tracepoint buffer where first 8 bytes are hidden 1407 * from bpf program and contain a pointer to 'struct pt_regs'. Fetch it 1408 * from there and call the same bpf_perf_event_output() helper inline. 1409 */ 1410 return ____bpf_perf_event_output(regs, map, flags, data, size); 1411 } 1412 1413 static const struct bpf_func_proto bpf_perf_event_output_proto_tp = { 1414 .func = bpf_perf_event_output_tp, 1415 .gpl_only = true, 1416 .ret_type = RET_INTEGER, 1417 .arg1_type = ARG_PTR_TO_CTX, 1418 .arg2_type = ARG_CONST_MAP_PTR, 1419 .arg3_type = ARG_ANYTHING, 1420 .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY, 1421 .arg5_type = ARG_CONST_SIZE_OR_ZERO, 1422 }; 1423 1424 BPF_CALL_3(bpf_get_stackid_tp, void *, tp_buff, struct bpf_map *, map, 1425 u64, flags) 1426 { 1427 struct pt_regs *regs = *(struct pt_regs **)tp_buff; 1428 1429 /* 1430 * Same comment as in bpf_perf_event_output_tp(), only that this time 1431 * the other helper's function body cannot be inlined due to being 1432 * external, thus we need to call raw helper function. 1433 */ 1434 return bpf_get_stackid((unsigned long) regs, (unsigned long) map, 1435 flags, 0, 0); 1436 } 1437 1438 static const struct bpf_func_proto bpf_get_stackid_proto_tp = { 1439 .func = bpf_get_stackid_tp, 1440 .gpl_only = true, 1441 .ret_type = RET_INTEGER, 1442 .arg1_type = ARG_PTR_TO_CTX, 1443 .arg2_type = ARG_CONST_MAP_PTR, 1444 .arg3_type = ARG_ANYTHING, 1445 }; 1446 1447 BPF_CALL_4(bpf_get_stack_tp, void *, tp_buff, void *, buf, u32, size, 1448 u64, flags) 1449 { 1450 struct pt_regs *regs = *(struct pt_regs **)tp_buff; 1451 1452 return bpf_get_stack((unsigned long) regs, (unsigned long) buf, 1453 (unsigned long) size, flags, 0); 1454 } 1455 1456 static const struct bpf_func_proto bpf_get_stack_proto_tp = { 1457 .func = bpf_get_stack_tp, 1458 .gpl_only = true, 1459 .ret_type = RET_INTEGER, 1460 .arg1_type = ARG_PTR_TO_CTX, 1461 .arg2_type = ARG_PTR_TO_UNINIT_MEM, 1462 .arg3_type = ARG_CONST_SIZE_OR_ZERO, 1463 .arg4_type = ARG_ANYTHING, 1464 }; 1465 1466 static const struct bpf_func_proto * 1467 tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 1468 { 1469 switch (func_id) { 1470 case BPF_FUNC_perf_event_output: 1471 return &bpf_perf_event_output_proto_tp; 1472 case BPF_FUNC_get_stackid: 1473 return &bpf_get_stackid_proto_tp; 1474 case BPF_FUNC_get_stack: 1475 return &bpf_get_stack_proto_tp; 1476 case BPF_FUNC_get_attach_cookie: 1477 return &bpf_get_attach_cookie_proto_trace; 1478 default: 1479 return bpf_tracing_func_proto(func_id, prog); 1480 } 1481 } 1482 1483 static bool tp_prog_is_valid_access(int off, int size, enum bpf_access_type type, 1484 const struct bpf_prog *prog, 1485 struct bpf_insn_access_aux *info) 1486 { 1487 if (off < sizeof(void *) || off >= PERF_MAX_TRACE_SIZE) 1488 return false; 1489 if (type != BPF_READ) 1490 return false; 1491 if (off % size != 0) 1492 return false; 1493 1494 BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(__u64)); 1495 return true; 1496 } 1497 1498 const struct bpf_verifier_ops tracepoint_verifier_ops = { 1499 .get_func_proto = tp_prog_func_proto, 1500 .is_valid_access = tp_prog_is_valid_access, 1501 }; 1502 1503 const struct bpf_prog_ops tracepoint_prog_ops = { 1504 }; 1505 1506 BPF_CALL_3(bpf_perf_prog_read_value, struct bpf_perf_event_data_kern *, ctx, 1507 struct bpf_perf_event_value *, buf, u32, size) 1508 { 1509 int err = -EINVAL; 1510 1511 if (unlikely(size != sizeof(struct bpf_perf_event_value))) 1512 goto clear; 1513 err = perf_event_read_local(ctx->event, &buf->counter, &buf->enabled, 1514 &buf->running); 1515 if (unlikely(err)) 1516 goto clear; 1517 return 0; 1518 clear: 1519 memset(buf, 0, size); 1520 return err; 1521 } 1522 1523 static const struct bpf_func_proto bpf_perf_prog_read_value_proto = { 1524 .func = bpf_perf_prog_read_value, 1525 .gpl_only = true, 1526 .ret_type = RET_INTEGER, 1527 .arg1_type = ARG_PTR_TO_CTX, 1528 .arg2_type = ARG_PTR_TO_UNINIT_MEM, 1529 .arg3_type = ARG_CONST_SIZE, 1530 }; 1531 1532 BPF_CALL_4(bpf_read_branch_records, struct bpf_perf_event_data_kern *, ctx, 1533 void *, buf, u32, size, u64, flags) 1534 { 1535 static const u32 br_entry_size = sizeof(struct perf_branch_entry); 1536 struct perf_branch_stack *br_stack = ctx->data->br_stack; 1537 u32 to_copy; 1538 1539 if (unlikely(flags & ~BPF_F_GET_BRANCH_RECORDS_SIZE)) 1540 return -EINVAL; 1541 1542 if (unlikely(!(ctx->data->sample_flags & PERF_SAMPLE_BRANCH_STACK))) 1543 return -ENOENT; 1544 1545 if (unlikely(!br_stack)) 1546 return -ENOENT; 1547 1548 if (flags & BPF_F_GET_BRANCH_RECORDS_SIZE) 1549 return br_stack->nr * br_entry_size; 1550 1551 if (!buf || (size % br_entry_size != 0)) 1552 return -EINVAL; 1553 1554 to_copy = min_t(u32, br_stack->nr * br_entry_size, size); 1555 memcpy(buf, br_stack->entries, to_copy); 1556 1557 return to_copy; 1558 } 1559 1560 static const struct bpf_func_proto bpf_read_branch_records_proto = { 1561 .func = bpf_read_branch_records, 1562 .gpl_only = true, 1563 .ret_type = RET_INTEGER, 1564 .arg1_type = ARG_PTR_TO_CTX, 1565 .arg2_type = ARG_PTR_TO_MEM_OR_NULL | MEM_WRITE, 1566 .arg3_type = ARG_CONST_SIZE_OR_ZERO, 1567 .arg4_type = ARG_ANYTHING, 1568 }; 1569 1570 static const struct bpf_func_proto * 1571 pe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 1572 { 1573 switch (func_id) { 1574 case BPF_FUNC_perf_event_output: 1575 return &bpf_perf_event_output_proto_tp; 1576 case BPF_FUNC_get_stackid: 1577 return &bpf_get_stackid_proto_pe; 1578 case BPF_FUNC_get_stack: 1579 return &bpf_get_stack_proto_pe; 1580 case BPF_FUNC_perf_prog_read_value: 1581 return &bpf_perf_prog_read_value_proto; 1582 case BPF_FUNC_read_branch_records: 1583 return &bpf_read_branch_records_proto; 1584 case BPF_FUNC_get_attach_cookie: 1585 return &bpf_get_attach_cookie_proto_pe; 1586 default: 1587 return bpf_tracing_func_proto(func_id, prog); 1588 } 1589 } 1590 1591 /* 1592 * bpf_raw_tp_regs are separate from bpf_pt_regs used from skb/xdp 1593 * to avoid potential recursive reuse issue when/if tracepoints are added 1594 * inside bpf_*_event_output, bpf_get_stackid and/or bpf_get_stack. 1595 * 1596 * Since raw tracepoints run despite bpf_prog_active, support concurrent usage 1597 * in normal, irq, and nmi context. 1598 */ 1599 struct bpf_raw_tp_regs { 1600 struct pt_regs regs[3]; 1601 }; 1602 static DEFINE_PER_CPU(struct bpf_raw_tp_regs, bpf_raw_tp_regs); 1603 static DEFINE_PER_CPU(int, bpf_raw_tp_nest_level); 1604 static struct pt_regs *get_bpf_raw_tp_regs(void) 1605 { 1606 struct bpf_raw_tp_regs *tp_regs = this_cpu_ptr(&bpf_raw_tp_regs); 1607 int nest_level = this_cpu_inc_return(bpf_raw_tp_nest_level); 1608 1609 if (nest_level > ARRAY_SIZE(tp_regs->regs)) { 1610 this_cpu_dec(bpf_raw_tp_nest_level); 1611 return ERR_PTR(-EBUSY); 1612 } 1613 1614 return &tp_regs->regs[nest_level - 1]; 1615 } 1616 1617 static void put_bpf_raw_tp_regs(void) 1618 { 1619 this_cpu_dec(bpf_raw_tp_nest_level); 1620 } 1621 1622 BPF_CALL_5(bpf_perf_event_output_raw_tp, struct bpf_raw_tracepoint_args *, args, 1623 struct bpf_map *, map, u64, flags, void *, data, u64, size) 1624 { 1625 struct pt_regs *regs = get_bpf_raw_tp_regs(); 1626 int ret; 1627 1628 if (IS_ERR(regs)) 1629 return PTR_ERR(regs); 1630 1631 perf_fetch_caller_regs(regs); 1632 ret = ____bpf_perf_event_output(regs, map, flags, data, size); 1633 1634 put_bpf_raw_tp_regs(); 1635 return ret; 1636 } 1637 1638 static const struct bpf_func_proto bpf_perf_event_output_proto_raw_tp = { 1639 .func = bpf_perf_event_output_raw_tp, 1640 .gpl_only = true, 1641 .ret_type = RET_INTEGER, 1642 .arg1_type = ARG_PTR_TO_CTX, 1643 .arg2_type = ARG_CONST_MAP_PTR, 1644 .arg3_type = ARG_ANYTHING, 1645 .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY, 1646 .arg5_type = ARG_CONST_SIZE_OR_ZERO, 1647 }; 1648 1649 extern const struct bpf_func_proto bpf_skb_output_proto; 1650 extern const struct bpf_func_proto bpf_xdp_output_proto; 1651 extern const struct bpf_func_proto bpf_xdp_get_buff_len_trace_proto; 1652 1653 BPF_CALL_3(bpf_get_stackid_raw_tp, struct bpf_raw_tracepoint_args *, args, 1654 struct bpf_map *, map, u64, flags) 1655 { 1656 struct pt_regs *regs = get_bpf_raw_tp_regs(); 1657 int ret; 1658 1659 if (IS_ERR(regs)) 1660 return PTR_ERR(regs); 1661 1662 perf_fetch_caller_regs(regs); 1663 /* similar to bpf_perf_event_output_tp, but pt_regs fetched differently */ 1664 ret = bpf_get_stackid((unsigned long) regs, (unsigned long) map, 1665 flags, 0, 0); 1666 put_bpf_raw_tp_regs(); 1667 return ret; 1668 } 1669 1670 static const struct bpf_func_proto bpf_get_stackid_proto_raw_tp = { 1671 .func = bpf_get_stackid_raw_tp, 1672 .gpl_only = true, 1673 .ret_type = RET_INTEGER, 1674 .arg1_type = ARG_PTR_TO_CTX, 1675 .arg2_type = ARG_CONST_MAP_PTR, 1676 .arg3_type = ARG_ANYTHING, 1677 }; 1678 1679 BPF_CALL_4(bpf_get_stack_raw_tp, struct bpf_raw_tracepoint_args *, args, 1680 void *, buf, u32, size, u64, flags) 1681 { 1682 struct pt_regs *regs = get_bpf_raw_tp_regs(); 1683 int ret; 1684 1685 if (IS_ERR(regs)) 1686 return PTR_ERR(regs); 1687 1688 perf_fetch_caller_regs(regs); 1689 ret = bpf_get_stack((unsigned long) regs, (unsigned long) buf, 1690 (unsigned long) size, flags, 0); 1691 put_bpf_raw_tp_regs(); 1692 return ret; 1693 } 1694 1695 static const struct bpf_func_proto bpf_get_stack_proto_raw_tp = { 1696 .func = bpf_get_stack_raw_tp, 1697 .gpl_only = true, 1698 .ret_type = RET_INTEGER, 1699 .arg1_type = ARG_PTR_TO_CTX, 1700 .arg2_type = ARG_PTR_TO_UNINIT_MEM, 1701 .arg3_type = ARG_CONST_SIZE_OR_ZERO, 1702 .arg4_type = ARG_ANYTHING, 1703 }; 1704 1705 static const struct bpf_func_proto * 1706 raw_tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 1707 { 1708 switch (func_id) { 1709 case BPF_FUNC_perf_event_output: 1710 return &bpf_perf_event_output_proto_raw_tp; 1711 case BPF_FUNC_get_stackid: 1712 return &bpf_get_stackid_proto_raw_tp; 1713 case BPF_FUNC_get_stack: 1714 return &bpf_get_stack_proto_raw_tp; 1715 case BPF_FUNC_get_attach_cookie: 1716 return &bpf_get_attach_cookie_proto_tracing; 1717 default: 1718 return bpf_tracing_func_proto(func_id, prog); 1719 } 1720 } 1721 1722 const struct bpf_func_proto * 1723 tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 1724 { 1725 const struct bpf_func_proto *fn; 1726 1727 switch (func_id) { 1728 #ifdef CONFIG_NET 1729 case BPF_FUNC_skb_output: 1730 return &bpf_skb_output_proto; 1731 case BPF_FUNC_xdp_output: 1732 return &bpf_xdp_output_proto; 1733 case BPF_FUNC_skc_to_tcp6_sock: 1734 return &bpf_skc_to_tcp6_sock_proto; 1735 case BPF_FUNC_skc_to_tcp_sock: 1736 return &bpf_skc_to_tcp_sock_proto; 1737 case BPF_FUNC_skc_to_tcp_timewait_sock: 1738 return &bpf_skc_to_tcp_timewait_sock_proto; 1739 case BPF_FUNC_skc_to_tcp_request_sock: 1740 return &bpf_skc_to_tcp_request_sock_proto; 1741 case BPF_FUNC_skc_to_udp6_sock: 1742 return &bpf_skc_to_udp6_sock_proto; 1743 case BPF_FUNC_skc_to_unix_sock: 1744 return &bpf_skc_to_unix_sock_proto; 1745 case BPF_FUNC_skc_to_mptcp_sock: 1746 return &bpf_skc_to_mptcp_sock_proto; 1747 case BPF_FUNC_sk_storage_get: 1748 return &bpf_sk_storage_get_tracing_proto; 1749 case BPF_FUNC_sk_storage_delete: 1750 return &bpf_sk_storage_delete_tracing_proto; 1751 case BPF_FUNC_sock_from_file: 1752 return &bpf_sock_from_file_proto; 1753 case BPF_FUNC_get_socket_cookie: 1754 return &bpf_get_socket_ptr_cookie_proto; 1755 case BPF_FUNC_xdp_get_buff_len: 1756 return &bpf_xdp_get_buff_len_trace_proto; 1757 #endif 1758 case BPF_FUNC_seq_printf: 1759 return prog->expected_attach_type == BPF_TRACE_ITER ? 1760 &bpf_seq_printf_proto : 1761 NULL; 1762 case BPF_FUNC_seq_write: 1763 return prog->expected_attach_type == BPF_TRACE_ITER ? 1764 &bpf_seq_write_proto : 1765 NULL; 1766 case BPF_FUNC_seq_printf_btf: 1767 return prog->expected_attach_type == BPF_TRACE_ITER ? 1768 &bpf_seq_printf_btf_proto : 1769 NULL; 1770 case BPF_FUNC_d_path: 1771 return &bpf_d_path_proto; 1772 case BPF_FUNC_get_func_arg: 1773 if (bpf_prog_has_trampoline(prog) || 1774 prog->expected_attach_type == BPF_TRACE_RAW_TP) 1775 return &bpf_get_func_arg_proto; 1776 return NULL; 1777 case BPF_FUNC_get_func_ret: 1778 return bpf_prog_has_trampoline(prog) ? &bpf_get_func_ret_proto : NULL; 1779 case BPF_FUNC_get_func_arg_cnt: 1780 if (bpf_prog_has_trampoline(prog) || 1781 prog->expected_attach_type == BPF_TRACE_RAW_TP) 1782 return &bpf_get_func_arg_cnt_proto; 1783 return NULL; 1784 case BPF_FUNC_get_attach_cookie: 1785 if (prog->type == BPF_PROG_TYPE_TRACING && 1786 prog->expected_attach_type == BPF_TRACE_RAW_TP) 1787 return &bpf_get_attach_cookie_proto_tracing; 1788 return bpf_prog_has_trampoline(prog) ? &bpf_get_attach_cookie_proto_tracing : NULL; 1789 default: 1790 fn = raw_tp_prog_func_proto(func_id, prog); 1791 if (!fn && prog->expected_attach_type == BPF_TRACE_ITER) 1792 fn = bpf_iter_get_func_proto(func_id, prog); 1793 return fn; 1794 } 1795 } 1796 1797 static bool raw_tp_prog_is_valid_access(int off, int size, 1798 enum bpf_access_type type, 1799 const struct bpf_prog *prog, 1800 struct bpf_insn_access_aux *info) 1801 { 1802 return bpf_tracing_ctx_access(off, size, type); 1803 } 1804 1805 static bool tracing_prog_is_valid_access(int off, int size, 1806 enum bpf_access_type type, 1807 const struct bpf_prog *prog, 1808 struct bpf_insn_access_aux *info) 1809 { 1810 return bpf_tracing_btf_ctx_access(off, size, type, prog, info); 1811 } 1812 1813 int __weak bpf_prog_test_run_tracing(struct bpf_prog *prog, 1814 const union bpf_attr *kattr, 1815 union bpf_attr __user *uattr) 1816 { 1817 return -ENOTSUPP; 1818 } 1819 1820 const struct bpf_verifier_ops raw_tracepoint_verifier_ops = { 1821 .get_func_proto = raw_tp_prog_func_proto, 1822 .is_valid_access = raw_tp_prog_is_valid_access, 1823 }; 1824 1825 const struct bpf_prog_ops raw_tracepoint_prog_ops = { 1826 #ifdef CONFIG_NET 1827 .test_run = bpf_prog_test_run_raw_tp, 1828 #endif 1829 }; 1830 1831 const struct bpf_verifier_ops tracing_verifier_ops = { 1832 .get_func_proto = tracing_prog_func_proto, 1833 .is_valid_access = tracing_prog_is_valid_access, 1834 }; 1835 1836 const struct bpf_prog_ops tracing_prog_ops = { 1837 .test_run = bpf_prog_test_run_tracing, 1838 }; 1839 1840 static bool raw_tp_writable_prog_is_valid_access(int off, int size, 1841 enum bpf_access_type type, 1842 const struct bpf_prog *prog, 1843 struct bpf_insn_access_aux *info) 1844 { 1845 if (off == 0) { 1846 if (size != sizeof(u64) || type != BPF_READ) 1847 return false; 1848 info->reg_type = PTR_TO_TP_BUFFER; 1849 } 1850 return raw_tp_prog_is_valid_access(off, size, type, prog, info); 1851 } 1852 1853 const struct bpf_verifier_ops raw_tracepoint_writable_verifier_ops = { 1854 .get_func_proto = raw_tp_prog_func_proto, 1855 .is_valid_access = raw_tp_writable_prog_is_valid_access, 1856 }; 1857 1858 const struct bpf_prog_ops raw_tracepoint_writable_prog_ops = { 1859 }; 1860 1861 static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type, 1862 const struct bpf_prog *prog, 1863 struct bpf_insn_access_aux *info) 1864 { 1865 const int size_u64 = sizeof(u64); 1866 1867 if (off < 0 || off >= sizeof(struct bpf_perf_event_data)) 1868 return false; 1869 if (type != BPF_READ) 1870 return false; 1871 if (off % size != 0) { 1872 if (sizeof(unsigned long) != 4) 1873 return false; 1874 if (size != 8) 1875 return false; 1876 if (off % size != 4) 1877 return false; 1878 } 1879 1880 switch (off) { 1881 case bpf_ctx_range(struct bpf_perf_event_data, sample_period): 1882 bpf_ctx_record_field_size(info, size_u64); 1883 if (!bpf_ctx_narrow_access_ok(off, size, size_u64)) 1884 return false; 1885 break; 1886 case bpf_ctx_range(struct bpf_perf_event_data, addr): 1887 bpf_ctx_record_field_size(info, size_u64); 1888 if (!bpf_ctx_narrow_access_ok(off, size, size_u64)) 1889 return false; 1890 break; 1891 default: 1892 if (size != sizeof(long)) 1893 return false; 1894 } 1895 1896 return true; 1897 } 1898 1899 static u32 pe_prog_convert_ctx_access(enum bpf_access_type type, 1900 const struct bpf_insn *si, 1901 struct bpf_insn *insn_buf, 1902 struct bpf_prog *prog, u32 *target_size) 1903 { 1904 struct bpf_insn *insn = insn_buf; 1905 1906 switch (si->off) { 1907 case offsetof(struct bpf_perf_event_data, sample_period): 1908 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern, 1909 data), si->dst_reg, si->src_reg, 1910 offsetof(struct bpf_perf_event_data_kern, data)); 1911 *insn++ = BPF_LDX_MEM(BPF_DW, si->dst_reg, si->dst_reg, 1912 bpf_target_off(struct perf_sample_data, period, 8, 1913 target_size)); 1914 break; 1915 case offsetof(struct bpf_perf_event_data, addr): 1916 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern, 1917 data), si->dst_reg, si->src_reg, 1918 offsetof(struct bpf_perf_event_data_kern, data)); 1919 *insn++ = BPF_LDX_MEM(BPF_DW, si->dst_reg, si->dst_reg, 1920 bpf_target_off(struct perf_sample_data, addr, 8, 1921 target_size)); 1922 break; 1923 default: 1924 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern, 1925 regs), si->dst_reg, si->src_reg, 1926 offsetof(struct bpf_perf_event_data_kern, regs)); 1927 *insn++ = BPF_LDX_MEM(BPF_SIZEOF(long), si->dst_reg, si->dst_reg, 1928 si->off); 1929 break; 1930 } 1931 1932 return insn - insn_buf; 1933 } 1934 1935 const struct bpf_verifier_ops perf_event_verifier_ops = { 1936 .get_func_proto = pe_prog_func_proto, 1937 .is_valid_access = pe_prog_is_valid_access, 1938 .convert_ctx_access = pe_prog_convert_ctx_access, 1939 }; 1940 1941 const struct bpf_prog_ops perf_event_prog_ops = { 1942 }; 1943 1944 static DEFINE_MUTEX(bpf_event_mutex); 1945 1946 #define BPF_TRACE_MAX_PROGS 64 1947 1948 int perf_event_attach_bpf_prog(struct perf_event *event, 1949 struct bpf_prog *prog, 1950 u64 bpf_cookie) 1951 { 1952 struct bpf_prog_array *old_array; 1953 struct bpf_prog_array *new_array; 1954 int ret = -EEXIST; 1955 1956 /* 1957 * Kprobe override only works if they are on the function entry, 1958 * and only if they are on the opt-in list. 1959 */ 1960 if (prog->kprobe_override && 1961 (!trace_kprobe_on_func_entry(event->tp_event) || 1962 !trace_kprobe_error_injectable(event->tp_event))) 1963 return -EINVAL; 1964 1965 mutex_lock(&bpf_event_mutex); 1966 1967 if (event->prog) 1968 goto unlock; 1969 1970 old_array = bpf_event_rcu_dereference(event->tp_event->prog_array); 1971 if (old_array && 1972 bpf_prog_array_length(old_array) >= BPF_TRACE_MAX_PROGS) { 1973 ret = -E2BIG; 1974 goto unlock; 1975 } 1976 1977 ret = bpf_prog_array_copy(old_array, NULL, prog, bpf_cookie, &new_array); 1978 if (ret < 0) 1979 goto unlock; 1980 1981 /* set the new array to event->tp_event and set event->prog */ 1982 event->prog = prog; 1983 event->bpf_cookie = bpf_cookie; 1984 rcu_assign_pointer(event->tp_event->prog_array, new_array); 1985 bpf_prog_array_free_sleepable(old_array); 1986 1987 unlock: 1988 mutex_unlock(&bpf_event_mutex); 1989 return ret; 1990 } 1991 1992 void perf_event_detach_bpf_prog(struct perf_event *event) 1993 { 1994 struct bpf_prog_array *old_array; 1995 struct bpf_prog_array *new_array; 1996 struct bpf_prog *prog = NULL; 1997 int ret; 1998 1999 mutex_lock(&bpf_event_mutex); 2000 2001 if (!event->prog) 2002 goto unlock; 2003 2004 old_array = bpf_event_rcu_dereference(event->tp_event->prog_array); 2005 if (!old_array) 2006 goto put; 2007 2008 ret = bpf_prog_array_copy(old_array, event->prog, NULL, 0, &new_array); 2009 if (ret < 0) { 2010 bpf_prog_array_delete_safe(old_array, event->prog); 2011 } else { 2012 rcu_assign_pointer(event->tp_event->prog_array, new_array); 2013 bpf_prog_array_free_sleepable(old_array); 2014 } 2015 2016 put: 2017 prog = event->prog; 2018 event->prog = NULL; 2019 2020 unlock: 2021 mutex_unlock(&bpf_event_mutex); 2022 2023 if (prog) { 2024 /* 2025 * It could be that the bpf_prog is not sleepable (and will be freed 2026 * via normal RCU), but is called from a point that supports sleepable 2027 * programs and uses tasks-trace-RCU. 2028 */ 2029 synchronize_rcu_tasks_trace(); 2030 2031 bpf_prog_put(prog); 2032 } 2033 } 2034 2035 int perf_event_query_prog_array(struct perf_event *event, void __user *info) 2036 { 2037 struct perf_event_query_bpf __user *uquery = info; 2038 struct perf_event_query_bpf query = {}; 2039 struct bpf_prog_array *progs; 2040 u32 *ids, prog_cnt, ids_len; 2041 int ret; 2042 2043 if (!perfmon_capable()) 2044 return -EPERM; 2045 if (event->attr.type != PERF_TYPE_TRACEPOINT) 2046 return -EINVAL; 2047 if (copy_from_user(&query, uquery, sizeof(query))) 2048 return -EFAULT; 2049 2050 ids_len = query.ids_len; 2051 if (ids_len > BPF_TRACE_MAX_PROGS) 2052 return -E2BIG; 2053 ids = kcalloc(ids_len, sizeof(u32), GFP_USER | __GFP_NOWARN); 2054 if (!ids) 2055 return -ENOMEM; 2056 /* 2057 * The above kcalloc returns ZERO_SIZE_PTR when ids_len = 0, which 2058 * is required when user only wants to check for uquery->prog_cnt. 2059 * There is no need to check for it since the case is handled 2060 * gracefully in bpf_prog_array_copy_info. 2061 */ 2062 2063 mutex_lock(&bpf_event_mutex); 2064 progs = bpf_event_rcu_dereference(event->tp_event->prog_array); 2065 ret = bpf_prog_array_copy_info(progs, ids, ids_len, &prog_cnt); 2066 mutex_unlock(&bpf_event_mutex); 2067 2068 if (copy_to_user(&uquery->prog_cnt, &prog_cnt, sizeof(prog_cnt)) || 2069 copy_to_user(uquery->ids, ids, ids_len * sizeof(u32))) 2070 ret = -EFAULT; 2071 2072 kfree(ids); 2073 return ret; 2074 } 2075 2076 extern struct bpf_raw_event_map __start__bpf_raw_tp[]; 2077 extern struct bpf_raw_event_map __stop__bpf_raw_tp[]; 2078 2079 struct bpf_raw_event_map *bpf_get_raw_tracepoint(const char *name) 2080 { 2081 struct bpf_raw_event_map *btp = __start__bpf_raw_tp; 2082 2083 for (; btp < __stop__bpf_raw_tp; btp++) { 2084 if (!strcmp(btp->tp->name, name)) 2085 return btp; 2086 } 2087 2088 return bpf_get_raw_tracepoint_module(name); 2089 } 2090 2091 void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp) 2092 { 2093 struct module *mod; 2094 2095 guard(rcu)(); 2096 mod = __module_address((unsigned long)btp); 2097 module_put(mod); 2098 } 2099 2100 static __always_inline 2101 void __bpf_trace_run(struct bpf_raw_tp_link *link, u64 *args) 2102 { 2103 struct srcu_ctr __percpu *scp = NULL; 2104 struct bpf_prog *prog = link->link.prog; 2105 bool sleepable = prog->sleepable; 2106 struct bpf_run_ctx *old_run_ctx; 2107 struct bpf_trace_run_ctx run_ctx; 2108 2109 if (sleepable) { 2110 scp = rcu_read_lock_tasks_trace(); 2111 migrate_disable(); 2112 } else { 2113 rcu_read_lock_dont_migrate(); 2114 } 2115 2116 if (unlikely(!bpf_prog_get_recursion_context(prog))) { 2117 bpf_prog_inc_misses_counter(prog); 2118 goto out; 2119 } 2120 2121 run_ctx.bpf_cookie = link->cookie; 2122 old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); 2123 2124 (void)bpf_prog_run(prog, args); 2125 2126 bpf_reset_run_ctx(old_run_ctx); 2127 out: 2128 bpf_prog_put_recursion_context(prog); 2129 2130 if (sleepable) { 2131 migrate_enable(); 2132 rcu_read_unlock_tasks_trace(scp); 2133 } else { 2134 rcu_read_unlock_migrate(); 2135 } 2136 } 2137 2138 #define UNPACK(...) __VA_ARGS__ 2139 #define REPEAT_1(FN, DL, X, ...) FN(X) 2140 #define REPEAT_2(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_1(FN, DL, __VA_ARGS__) 2141 #define REPEAT_3(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_2(FN, DL, __VA_ARGS__) 2142 #define REPEAT_4(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_3(FN, DL, __VA_ARGS__) 2143 #define REPEAT_5(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_4(FN, DL, __VA_ARGS__) 2144 #define REPEAT_6(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_5(FN, DL, __VA_ARGS__) 2145 #define REPEAT_7(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_6(FN, DL, __VA_ARGS__) 2146 #define REPEAT_8(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_7(FN, DL, __VA_ARGS__) 2147 #define REPEAT_9(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_8(FN, DL, __VA_ARGS__) 2148 #define REPEAT_10(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_9(FN, DL, __VA_ARGS__) 2149 #define REPEAT_11(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_10(FN, DL, __VA_ARGS__) 2150 #define REPEAT_12(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_11(FN, DL, __VA_ARGS__) 2151 #define REPEAT(X, FN, DL, ...) REPEAT_##X(FN, DL, __VA_ARGS__) 2152 2153 #define SARG(X) u64 arg##X 2154 #define COPY(X) args[X] = arg##X 2155 2156 #define __DL_COM (,) 2157 #define __DL_SEM (;) 2158 2159 #define __SEQ_0_11 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 2160 2161 #define BPF_TRACE_DEFN_x(x) \ 2162 void bpf_trace_run##x(struct bpf_raw_tp_link *link, \ 2163 REPEAT(x, SARG, __DL_COM, __SEQ_0_11)) \ 2164 { \ 2165 u64 args[x]; \ 2166 REPEAT(x, COPY, __DL_SEM, __SEQ_0_11); \ 2167 __bpf_trace_run(link, args); \ 2168 } \ 2169 EXPORT_SYMBOL_GPL(bpf_trace_run##x) 2170 BPF_TRACE_DEFN_x(1); 2171 BPF_TRACE_DEFN_x(2); 2172 BPF_TRACE_DEFN_x(3); 2173 BPF_TRACE_DEFN_x(4); 2174 BPF_TRACE_DEFN_x(5); 2175 BPF_TRACE_DEFN_x(6); 2176 BPF_TRACE_DEFN_x(7); 2177 BPF_TRACE_DEFN_x(8); 2178 BPF_TRACE_DEFN_x(9); 2179 BPF_TRACE_DEFN_x(10); 2180 BPF_TRACE_DEFN_x(11); 2181 BPF_TRACE_DEFN_x(12); 2182 2183 int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_raw_tp_link *link) 2184 { 2185 struct tracepoint *tp = btp->tp; 2186 struct bpf_prog *prog = link->link.prog; 2187 2188 /* 2189 * check that program doesn't access arguments beyond what's 2190 * available in this tracepoint 2191 */ 2192 if (prog->aux->max_ctx_offset > btp->num_args * sizeof(u64)) 2193 return -EINVAL; 2194 2195 if (prog->aux->max_tp_access > btp->writable_size) 2196 return -EINVAL; 2197 2198 return tracepoint_probe_register_may_exist(tp, (void *)btp->bpf_func, link); 2199 } 2200 2201 int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_raw_tp_link *link) 2202 { 2203 return tracepoint_probe_unregister(btp->tp, (void *)btp->bpf_func, link); 2204 } 2205 2206 int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id, 2207 u32 *fd_type, const char **buf, 2208 u64 *probe_offset, u64 *probe_addr, 2209 unsigned long *missed) 2210 { 2211 bool is_tracepoint, is_syscall_tp; 2212 struct bpf_prog *prog; 2213 int flags, err = 0; 2214 2215 prog = event->prog; 2216 if (!prog) 2217 return -ENOENT; 2218 2219 /* not supporting BPF_PROG_TYPE_PERF_EVENT yet */ 2220 if (prog->type == BPF_PROG_TYPE_PERF_EVENT) 2221 return -EOPNOTSUPP; 2222 2223 *prog_id = prog->aux->id; 2224 flags = event->tp_event->flags; 2225 is_tracepoint = flags & TRACE_EVENT_FL_TRACEPOINT; 2226 is_syscall_tp = is_syscall_trace_event(event->tp_event); 2227 2228 if (is_tracepoint || is_syscall_tp) { 2229 *buf = is_tracepoint ? event->tp_event->tp->name 2230 : event->tp_event->name; 2231 /* We allow NULL pointer for tracepoint */ 2232 if (fd_type) 2233 *fd_type = BPF_FD_TYPE_TRACEPOINT; 2234 if (probe_offset) 2235 *probe_offset = 0x0; 2236 if (probe_addr) 2237 *probe_addr = 0x0; 2238 } else { 2239 /* kprobe/uprobe */ 2240 err = -EOPNOTSUPP; 2241 #ifdef CONFIG_KPROBE_EVENTS 2242 if (flags & TRACE_EVENT_FL_KPROBE) 2243 err = bpf_get_kprobe_info(event, fd_type, buf, 2244 probe_offset, probe_addr, missed, 2245 event->attr.type == PERF_TYPE_TRACEPOINT); 2246 #endif 2247 #ifdef CONFIG_UPROBE_EVENTS 2248 if (flags & TRACE_EVENT_FL_UPROBE) 2249 err = bpf_get_uprobe_info(event, fd_type, buf, 2250 probe_offset, probe_addr, 2251 event->attr.type == PERF_TYPE_TRACEPOINT); 2252 #endif 2253 } 2254 2255 return err; 2256 } 2257 2258 static int __init send_signal_irq_work_init(void) 2259 { 2260 int cpu; 2261 struct send_signal_irq_work *work; 2262 2263 for_each_possible_cpu(cpu) { 2264 work = per_cpu_ptr(&send_signal_work, cpu); 2265 init_irq_work(&work->irq_work, do_bpf_send_signal); 2266 } 2267 return 0; 2268 } 2269 2270 subsys_initcall(send_signal_irq_work_init); 2271 2272 #ifdef CONFIG_MODULES 2273 static int bpf_event_notify(struct notifier_block *nb, unsigned long op, 2274 void *module) 2275 { 2276 struct bpf_trace_module *btm, *tmp; 2277 struct module *mod = module; 2278 int ret = 0; 2279 2280 if (mod->num_bpf_raw_events == 0 || 2281 (op != MODULE_STATE_COMING && op != MODULE_STATE_GOING)) 2282 goto out; 2283 2284 mutex_lock(&bpf_module_mutex); 2285 2286 switch (op) { 2287 case MODULE_STATE_COMING: 2288 btm = kzalloc_obj(*btm); 2289 if (btm) { 2290 btm->module = module; 2291 list_add(&btm->list, &bpf_trace_modules); 2292 } else { 2293 ret = -ENOMEM; 2294 } 2295 break; 2296 case MODULE_STATE_GOING: 2297 list_for_each_entry_safe(btm, tmp, &bpf_trace_modules, list) { 2298 if (btm->module == module) { 2299 list_del(&btm->list); 2300 kfree(btm); 2301 break; 2302 } 2303 } 2304 break; 2305 } 2306 2307 mutex_unlock(&bpf_module_mutex); 2308 2309 out: 2310 return notifier_from_errno(ret); 2311 } 2312 2313 static struct notifier_block bpf_module_nb = { 2314 .notifier_call = bpf_event_notify, 2315 }; 2316 2317 static int __init bpf_event_init(void) 2318 { 2319 register_module_notifier(&bpf_module_nb); 2320 return 0; 2321 } 2322 2323 fs_initcall(bpf_event_init); 2324 #endif /* CONFIG_MODULES */ 2325 2326 struct bpf_session_run_ctx { 2327 struct bpf_run_ctx run_ctx; 2328 bool is_return; 2329 void *data; 2330 }; 2331 2332 #ifdef CONFIG_FPROBE 2333 struct bpf_kprobe_multi_link { 2334 struct bpf_link link; 2335 struct fprobe fp; 2336 unsigned long *addrs; 2337 u64 *cookies; 2338 u32 cnt; 2339 u32 mods_cnt; 2340 struct module **mods; 2341 }; 2342 2343 struct bpf_kprobe_multi_run_ctx { 2344 struct bpf_session_run_ctx session_ctx; 2345 struct bpf_kprobe_multi_link *link; 2346 unsigned long entry_ip; 2347 }; 2348 2349 struct user_syms { 2350 const char **syms; 2351 char *buf; 2352 }; 2353 2354 #ifndef CONFIG_HAVE_FTRACE_REGS_HAVING_PT_REGS 2355 static DEFINE_PER_CPU(struct pt_regs, bpf_kprobe_multi_pt_regs); 2356 #define bpf_kprobe_multi_pt_regs_ptr() this_cpu_ptr(&bpf_kprobe_multi_pt_regs) 2357 #else 2358 #define bpf_kprobe_multi_pt_regs_ptr() (NULL) 2359 #endif 2360 2361 static unsigned long ftrace_get_entry_ip(unsigned long fentry_ip) 2362 { 2363 unsigned long ip = ftrace_get_symaddr(fentry_ip); 2364 2365 return ip ? : fentry_ip; 2366 } 2367 2368 static int copy_user_syms(struct user_syms *us, unsigned long __user *usyms, u32 cnt) 2369 { 2370 unsigned long __user usymbol; 2371 const char **syms = NULL; 2372 char *buf = NULL, *p; 2373 int err = -ENOMEM; 2374 unsigned int i; 2375 2376 syms = kvmalloc_array(cnt, sizeof(*syms), GFP_KERNEL); 2377 if (!syms) 2378 goto error; 2379 2380 buf = kvmalloc_array(cnt, KSYM_NAME_LEN, GFP_KERNEL); 2381 if (!buf) 2382 goto error; 2383 2384 for (p = buf, i = 0; i < cnt; i++) { 2385 if (__get_user(usymbol, usyms + i)) { 2386 err = -EFAULT; 2387 goto error; 2388 } 2389 err = strncpy_from_user(p, (const char __user *) usymbol, KSYM_NAME_LEN); 2390 if (err == KSYM_NAME_LEN) 2391 err = -E2BIG; 2392 if (err < 0) 2393 goto error; 2394 syms[i] = p; 2395 p += err + 1; 2396 } 2397 2398 us->syms = syms; 2399 us->buf = buf; 2400 return 0; 2401 2402 error: 2403 if (err) { 2404 kvfree(syms); 2405 kvfree(buf); 2406 } 2407 return err; 2408 } 2409 2410 static void kprobe_multi_put_modules(struct module **mods, u32 cnt) 2411 { 2412 u32 i; 2413 2414 for (i = 0; i < cnt; i++) 2415 module_put(mods[i]); 2416 } 2417 2418 static void free_user_syms(struct user_syms *us) 2419 { 2420 kvfree(us->syms); 2421 kvfree(us->buf); 2422 } 2423 2424 static void bpf_kprobe_multi_link_release(struct bpf_link *link) 2425 { 2426 struct bpf_kprobe_multi_link *kmulti_link; 2427 2428 kmulti_link = container_of(link, struct bpf_kprobe_multi_link, link); 2429 /* Don't wait for RCU GP here. */ 2430 unregister_fprobe_async(&kmulti_link->fp); 2431 kprobe_multi_put_modules(kmulti_link->mods, kmulti_link->mods_cnt); 2432 } 2433 2434 static void bpf_kprobe_multi_link_dealloc(struct bpf_link *link) 2435 { 2436 struct bpf_kprobe_multi_link *kmulti_link; 2437 2438 kmulti_link = container_of(link, struct bpf_kprobe_multi_link, link); 2439 kvfree(kmulti_link->addrs); 2440 kvfree(kmulti_link->cookies); 2441 kfree(kmulti_link->mods); 2442 kfree(kmulti_link); 2443 } 2444 2445 static int bpf_kprobe_multi_link_fill_link_info(const struct bpf_link *link, 2446 struct bpf_link_info *info) 2447 { 2448 u64 __user *ucookies = u64_to_user_ptr(info->kprobe_multi.cookies); 2449 u64 __user *uaddrs = u64_to_user_ptr(info->kprobe_multi.addrs); 2450 struct bpf_kprobe_multi_link *kmulti_link; 2451 u32 ucount = info->kprobe_multi.count; 2452 int err = 0, i; 2453 2454 if (!uaddrs ^ !ucount) 2455 return -EINVAL; 2456 if (ucookies && !ucount) 2457 return -EINVAL; 2458 2459 kmulti_link = container_of(link, struct bpf_kprobe_multi_link, link); 2460 info->kprobe_multi.count = kmulti_link->cnt; 2461 info->kprobe_multi.flags = kmulti_link->link.flags; 2462 info->kprobe_multi.missed = kmulti_link->fp.nmissed; 2463 2464 if (!uaddrs) 2465 return 0; 2466 if (ucount < kmulti_link->cnt) 2467 err = -ENOSPC; 2468 else 2469 ucount = kmulti_link->cnt; 2470 2471 if (ucookies) { 2472 if (kmulti_link->cookies) { 2473 if (copy_to_user(ucookies, kmulti_link->cookies, ucount * sizeof(u64))) 2474 return -EFAULT; 2475 } else { 2476 for (i = 0; i < ucount; i++) { 2477 if (put_user(0, ucookies + i)) 2478 return -EFAULT; 2479 } 2480 } 2481 } 2482 2483 if (kallsyms_show_value(current_cred())) { 2484 if (copy_to_user(uaddrs, kmulti_link->addrs, ucount * sizeof(u64))) 2485 return -EFAULT; 2486 } else { 2487 for (i = 0; i < ucount; i++) { 2488 if (put_user(0, uaddrs + i)) 2489 return -EFAULT; 2490 } 2491 } 2492 return err; 2493 } 2494 2495 #ifdef CONFIG_PROC_FS 2496 static void bpf_kprobe_multi_show_fdinfo(const struct bpf_link *link, 2497 struct seq_file *seq) 2498 { 2499 struct bpf_kprobe_multi_link *kmulti_link; 2500 bool has_cookies; 2501 2502 kmulti_link = container_of(link, struct bpf_kprobe_multi_link, link); 2503 has_cookies = !!kmulti_link->cookies; 2504 2505 seq_printf(seq, 2506 "kprobe_cnt:\t%u\n" 2507 "missed:\t%lu\n", 2508 kmulti_link->cnt, 2509 kmulti_link->fp.nmissed); 2510 2511 seq_printf(seq, "%s\t %s\n", "cookie", "func"); 2512 for (int i = 0; i < kmulti_link->cnt; i++) { 2513 seq_printf(seq, 2514 "%llu\t %pS\n", 2515 has_cookies ? kmulti_link->cookies[i] : 0, 2516 (void *)kmulti_link->addrs[i]); 2517 } 2518 } 2519 #endif 2520 2521 static const struct bpf_link_ops bpf_kprobe_multi_link_lops = { 2522 .release = bpf_kprobe_multi_link_release, 2523 .dealloc_deferred = bpf_kprobe_multi_link_dealloc, 2524 .fill_link_info = bpf_kprobe_multi_link_fill_link_info, 2525 #ifdef CONFIG_PROC_FS 2526 .show_fdinfo = bpf_kprobe_multi_show_fdinfo, 2527 #endif 2528 }; 2529 2530 static void bpf_kprobe_multi_cookie_swap(void *a, void *b, int size, const void *priv) 2531 { 2532 const struct bpf_kprobe_multi_link *link = priv; 2533 unsigned long *addr_a = a, *addr_b = b; 2534 u64 *cookie_a, *cookie_b; 2535 2536 cookie_a = link->cookies + (addr_a - link->addrs); 2537 cookie_b = link->cookies + (addr_b - link->addrs); 2538 2539 /* swap addr_a/addr_b and cookie_a/cookie_b values */ 2540 swap(*addr_a, *addr_b); 2541 swap(*cookie_a, *cookie_b); 2542 } 2543 2544 static int bpf_kprobe_multi_addrs_cmp(const void *a, const void *b) 2545 { 2546 const unsigned long *addr_a = a, *addr_b = b; 2547 2548 if (*addr_a == *addr_b) 2549 return 0; 2550 return *addr_a < *addr_b ? -1 : 1; 2551 } 2552 2553 static int bpf_kprobe_multi_cookie_cmp(const void *a, const void *b, const void *priv) 2554 { 2555 return bpf_kprobe_multi_addrs_cmp(a, b); 2556 } 2557 2558 static u64 bpf_kprobe_multi_cookie(struct bpf_run_ctx *ctx) 2559 { 2560 struct bpf_kprobe_multi_run_ctx *run_ctx; 2561 struct bpf_kprobe_multi_link *link; 2562 u64 *cookie, entry_ip; 2563 unsigned long *addr; 2564 2565 if (WARN_ON_ONCE(!ctx)) 2566 return 0; 2567 run_ctx = container_of(current->bpf_ctx, struct bpf_kprobe_multi_run_ctx, 2568 session_ctx.run_ctx); 2569 link = run_ctx->link; 2570 if (!link->cookies) 2571 return 0; 2572 entry_ip = run_ctx->entry_ip; 2573 addr = bsearch(&entry_ip, link->addrs, link->cnt, sizeof(entry_ip), 2574 bpf_kprobe_multi_addrs_cmp); 2575 if (!addr) 2576 return 0; 2577 cookie = link->cookies + (addr - link->addrs); 2578 return *cookie; 2579 } 2580 2581 static u64 bpf_kprobe_multi_entry_ip(struct bpf_run_ctx *ctx) 2582 { 2583 struct bpf_kprobe_multi_run_ctx *run_ctx; 2584 2585 run_ctx = container_of(current->bpf_ctx, struct bpf_kprobe_multi_run_ctx, 2586 session_ctx.run_ctx); 2587 return run_ctx->entry_ip; 2588 } 2589 2590 static __always_inline int 2591 kprobe_multi_link_prog_run(struct bpf_kprobe_multi_link *link, 2592 unsigned long entry_ip, struct ftrace_regs *fregs, 2593 bool is_return, void *data) 2594 { 2595 struct bpf_kprobe_multi_run_ctx run_ctx = { 2596 .session_ctx = { 2597 .is_return = is_return, 2598 .data = data, 2599 }, 2600 .link = link, 2601 .entry_ip = entry_ip, 2602 }; 2603 struct bpf_run_ctx *old_run_ctx; 2604 struct pt_regs *regs; 2605 int err; 2606 2607 /* 2608 * graph tracer framework ensures we won't migrate, so there is no need 2609 * to use migrate_disable for bpf_prog_run again. The check here just for 2610 * __this_cpu_inc_return. 2611 */ 2612 cant_sleep(); 2613 2614 if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) { 2615 bpf_prog_inc_misses_counter(link->link.prog); 2616 err = 1; 2617 goto out; 2618 } 2619 2620 rcu_read_lock(); 2621 regs = ftrace_partial_regs(fregs, bpf_kprobe_multi_pt_regs_ptr()); 2622 old_run_ctx = bpf_set_run_ctx(&run_ctx.session_ctx.run_ctx); 2623 err = bpf_prog_run(link->link.prog, regs); 2624 bpf_reset_run_ctx(old_run_ctx); 2625 ftrace_partial_regs_update(fregs, bpf_kprobe_multi_pt_regs_ptr()); 2626 rcu_read_unlock(); 2627 2628 out: 2629 __this_cpu_dec(bpf_prog_active); 2630 return err; 2631 } 2632 2633 static int 2634 kprobe_multi_link_handler(struct fprobe *fp, unsigned long fentry_ip, 2635 unsigned long ret_ip, struct ftrace_regs *fregs, 2636 void *data) 2637 { 2638 struct bpf_kprobe_multi_link *link; 2639 int err; 2640 2641 link = container_of(fp, struct bpf_kprobe_multi_link, fp); 2642 err = kprobe_multi_link_prog_run(link, ftrace_get_entry_ip(fentry_ip), 2643 fregs, false, data); 2644 return is_kprobe_session(link->link.prog) ? err : 0; 2645 } 2646 2647 static void 2648 kprobe_multi_link_exit_handler(struct fprobe *fp, unsigned long fentry_ip, 2649 unsigned long ret_ip, struct ftrace_regs *fregs, 2650 void *data) 2651 { 2652 struct bpf_kprobe_multi_link *link; 2653 2654 link = container_of(fp, struct bpf_kprobe_multi_link, fp); 2655 kprobe_multi_link_prog_run(link, ftrace_get_entry_ip(fentry_ip), 2656 fregs, true, data); 2657 } 2658 2659 static int symbols_cmp_r(const void *a, const void *b, const void *priv) 2660 { 2661 const char **str_a = (const char **) a; 2662 const char **str_b = (const char **) b; 2663 2664 return strcmp(*str_a, *str_b); 2665 } 2666 2667 struct multi_symbols_sort { 2668 const char **funcs; 2669 u64 *cookies; 2670 }; 2671 2672 static void symbols_swap_r(void *a, void *b, int size, const void *priv) 2673 { 2674 const struct multi_symbols_sort *data = priv; 2675 const char **name_a = a, **name_b = b; 2676 2677 swap(*name_a, *name_b); 2678 2679 /* If defined, swap also related cookies. */ 2680 if (data->cookies) { 2681 u64 *cookie_a, *cookie_b; 2682 2683 cookie_a = data->cookies + (name_a - data->funcs); 2684 cookie_b = data->cookies + (name_b - data->funcs); 2685 swap(*cookie_a, *cookie_b); 2686 } 2687 } 2688 2689 struct modules_array { 2690 struct module **mods; 2691 int mods_cnt; 2692 int mods_cap; 2693 }; 2694 2695 static int add_module(struct modules_array *arr, struct module *mod) 2696 { 2697 struct module **mods; 2698 2699 if (arr->mods_cnt == arr->mods_cap) { 2700 arr->mods_cap = max(16, arr->mods_cap * 3 / 2); 2701 mods = krealloc_array(arr->mods, arr->mods_cap, sizeof(*mods), GFP_KERNEL); 2702 if (!mods) 2703 return -ENOMEM; 2704 arr->mods = mods; 2705 } 2706 2707 arr->mods[arr->mods_cnt] = mod; 2708 arr->mods_cnt++; 2709 return 0; 2710 } 2711 2712 static bool has_module(struct modules_array *arr, struct module *mod) 2713 { 2714 int i; 2715 2716 for (i = arr->mods_cnt - 1; i >= 0; i--) { 2717 if (arr->mods[i] == mod) 2718 return true; 2719 } 2720 return false; 2721 } 2722 2723 static int get_modules_for_addrs(struct module ***mods, unsigned long *addrs, u32 addrs_cnt) 2724 { 2725 struct modules_array arr = {}; 2726 u32 i, err = 0; 2727 2728 for (i = 0; i < addrs_cnt; i++) { 2729 bool skip_add = false; 2730 struct module *mod; 2731 2732 scoped_guard(rcu) { 2733 mod = __module_address(addrs[i]); 2734 /* Either no module or it's already stored */ 2735 if (!mod || has_module(&arr, mod)) { 2736 skip_add = true; 2737 break; /* scoped_guard */ 2738 } 2739 if (!try_module_get(mod)) 2740 err = -EINVAL; 2741 } 2742 if (skip_add) 2743 continue; 2744 if (err) 2745 break; 2746 err = add_module(&arr, mod); 2747 if (err) { 2748 module_put(mod); 2749 break; 2750 } 2751 } 2752 2753 /* We return either err < 0 in case of error, ... */ 2754 if (err) { 2755 kprobe_multi_put_modules(arr.mods, arr.mods_cnt); 2756 kfree(arr.mods); 2757 return err; 2758 } 2759 2760 /* or number of modules found if everything is ok. */ 2761 *mods = arr.mods; 2762 return arr.mods_cnt; 2763 } 2764 2765 static int addrs_check_error_injection_list(unsigned long *addrs, u32 cnt) 2766 { 2767 u32 i; 2768 2769 for (i = 0; i < cnt; i++) { 2770 if (!within_error_injection_list(addrs[i])) 2771 return -EINVAL; 2772 } 2773 return 0; 2774 } 2775 2776 int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) 2777 { 2778 struct bpf_kprobe_multi_link *link = NULL; 2779 struct bpf_link_primer link_primer; 2780 void __user *ucookies; 2781 unsigned long *addrs; 2782 u32 flags, cnt, size; 2783 void __user *uaddrs; 2784 u64 *cookies = NULL; 2785 void __user *usyms; 2786 int err; 2787 2788 /* no support for 32bit archs yet */ 2789 if (sizeof(u64) != sizeof(void *)) 2790 return -EOPNOTSUPP; 2791 2792 if (attr->link_create.flags) 2793 return -EINVAL; 2794 2795 if (!is_kprobe_multi(prog)) 2796 return -EINVAL; 2797 2798 /* kprobe_multi is not allowed to be sleepable. */ 2799 if (prog->sleepable) 2800 return -EINVAL; 2801 2802 /* Writing to context is not allowed for kprobes. */ 2803 if (prog->aux->kprobe_write_ctx) 2804 return -EINVAL; 2805 2806 flags = attr->link_create.kprobe_multi.flags; 2807 if (flags & ~BPF_F_KPROBE_MULTI_RETURN) 2808 return -EINVAL; 2809 2810 uaddrs = u64_to_user_ptr(attr->link_create.kprobe_multi.addrs); 2811 usyms = u64_to_user_ptr(attr->link_create.kprobe_multi.syms); 2812 if (!!uaddrs == !!usyms) 2813 return -EINVAL; 2814 2815 cnt = attr->link_create.kprobe_multi.cnt; 2816 if (!cnt) 2817 return -EINVAL; 2818 if (cnt > MAX_KPROBE_MULTI_CNT) 2819 return -E2BIG; 2820 2821 size = cnt * sizeof(*addrs); 2822 addrs = kvmalloc_array(cnt, sizeof(*addrs), GFP_KERNEL); 2823 if (!addrs) 2824 return -ENOMEM; 2825 2826 ucookies = u64_to_user_ptr(attr->link_create.kprobe_multi.cookies); 2827 if (ucookies) { 2828 cookies = kvmalloc_array(cnt, sizeof(*addrs), GFP_KERNEL); 2829 if (!cookies) { 2830 err = -ENOMEM; 2831 goto error; 2832 } 2833 if (copy_from_user(cookies, ucookies, size)) { 2834 err = -EFAULT; 2835 goto error; 2836 } 2837 } 2838 2839 if (uaddrs) { 2840 if (copy_from_user(addrs, uaddrs, size)) { 2841 err = -EFAULT; 2842 goto error; 2843 } 2844 } else { 2845 struct multi_symbols_sort data = { 2846 .cookies = cookies, 2847 }; 2848 struct user_syms us; 2849 2850 err = copy_user_syms(&us, usyms, cnt); 2851 if (err) 2852 goto error; 2853 2854 if (cookies) 2855 data.funcs = us.syms; 2856 2857 sort_r(us.syms, cnt, sizeof(*us.syms), symbols_cmp_r, 2858 symbols_swap_r, &data); 2859 2860 err = ftrace_lookup_symbols(us.syms, cnt, addrs); 2861 free_user_syms(&us); 2862 if (err) 2863 goto error; 2864 } 2865 2866 if (prog->kprobe_override && addrs_check_error_injection_list(addrs, cnt)) { 2867 err = -EINVAL; 2868 goto error; 2869 } 2870 2871 link = kzalloc_obj(*link); 2872 if (!link) { 2873 err = -ENOMEM; 2874 goto error; 2875 } 2876 2877 bpf_link_init(&link->link, BPF_LINK_TYPE_KPROBE_MULTI, 2878 &bpf_kprobe_multi_link_lops, prog, attr->link_create.attach_type); 2879 2880 err = bpf_link_prime(&link->link, &link_primer); 2881 if (err) 2882 goto error; 2883 2884 if (!(flags & BPF_F_KPROBE_MULTI_RETURN)) 2885 link->fp.entry_handler = kprobe_multi_link_handler; 2886 if ((flags & BPF_F_KPROBE_MULTI_RETURN) || is_kprobe_session(prog)) 2887 link->fp.exit_handler = kprobe_multi_link_exit_handler; 2888 if (is_kprobe_session(prog)) 2889 link->fp.entry_data_size = sizeof(u64); 2890 2891 link->addrs = addrs; 2892 link->cookies = cookies; 2893 link->cnt = cnt; 2894 link->link.flags = flags; 2895 2896 if (cookies) { 2897 /* 2898 * Sorting addresses will trigger sorting cookies as well 2899 * (check bpf_kprobe_multi_cookie_swap). This way we can 2900 * find cookie based on the address in bpf_get_attach_cookie 2901 * helper. 2902 */ 2903 sort_r(addrs, cnt, sizeof(*addrs), 2904 bpf_kprobe_multi_cookie_cmp, 2905 bpf_kprobe_multi_cookie_swap, 2906 link); 2907 } 2908 2909 err = get_modules_for_addrs(&link->mods, addrs, cnt); 2910 if (err < 0) { 2911 bpf_link_cleanup(&link_primer); 2912 return err; 2913 } 2914 link->mods_cnt = err; 2915 2916 err = register_fprobe_ips(&link->fp, addrs, cnt); 2917 if (err) { 2918 kprobe_multi_put_modules(link->mods, link->mods_cnt); 2919 bpf_link_cleanup(&link_primer); 2920 return err; 2921 } 2922 2923 return bpf_link_settle(&link_primer); 2924 2925 error: 2926 kfree(link); 2927 kvfree(addrs); 2928 kvfree(cookies); 2929 return err; 2930 } 2931 #else /* !CONFIG_FPROBE */ 2932 int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) 2933 { 2934 return -EOPNOTSUPP; 2935 } 2936 static u64 bpf_kprobe_multi_cookie(struct bpf_run_ctx *ctx) 2937 { 2938 return 0; 2939 } 2940 static u64 bpf_kprobe_multi_entry_ip(struct bpf_run_ctx *ctx) 2941 { 2942 return 0; 2943 } 2944 #endif 2945 2946 #ifdef CONFIG_UPROBES 2947 struct bpf_uprobe_multi_link; 2948 2949 struct bpf_uprobe { 2950 struct bpf_uprobe_multi_link *link; 2951 loff_t offset; 2952 unsigned long ref_ctr_offset; 2953 u64 cookie; 2954 struct uprobe *uprobe; 2955 struct uprobe_consumer consumer; 2956 bool session; 2957 }; 2958 2959 struct bpf_uprobe_multi_link { 2960 struct path path; 2961 struct bpf_link link; 2962 u32 cnt; 2963 struct bpf_uprobe *uprobes; 2964 struct task_struct *task; 2965 }; 2966 2967 struct bpf_uprobe_multi_run_ctx { 2968 struct bpf_session_run_ctx session_ctx; 2969 unsigned long entry_ip; 2970 struct bpf_uprobe *uprobe; 2971 }; 2972 2973 static void bpf_uprobe_unregister(struct bpf_uprobe *uprobes, u32 cnt) 2974 { 2975 u32 i; 2976 2977 for (i = 0; i < cnt; i++) 2978 uprobe_unregister_nosync(uprobes[i].uprobe, &uprobes[i].consumer); 2979 2980 if (cnt) 2981 uprobe_unregister_sync(); 2982 } 2983 2984 static void bpf_uprobe_multi_link_release(struct bpf_link *link) 2985 { 2986 struct bpf_uprobe_multi_link *umulti_link; 2987 2988 umulti_link = container_of(link, struct bpf_uprobe_multi_link, link); 2989 bpf_uprobe_unregister(umulti_link->uprobes, umulti_link->cnt); 2990 if (umulti_link->task) 2991 put_task_struct(umulti_link->task); 2992 path_put(&umulti_link->path); 2993 } 2994 2995 static void bpf_uprobe_multi_link_dealloc(struct bpf_link *link) 2996 { 2997 struct bpf_uprobe_multi_link *umulti_link; 2998 2999 umulti_link = container_of(link, struct bpf_uprobe_multi_link, link); 3000 kvfree(umulti_link->uprobes); 3001 kfree(umulti_link); 3002 } 3003 3004 static int bpf_uprobe_multi_link_fill_link_info(const struct bpf_link *link, 3005 struct bpf_link_info *info) 3006 { 3007 u64 __user *uref_ctr_offsets = u64_to_user_ptr(info->uprobe_multi.ref_ctr_offsets); 3008 u64 __user *ucookies = u64_to_user_ptr(info->uprobe_multi.cookies); 3009 u64 __user *uoffsets = u64_to_user_ptr(info->uprobe_multi.offsets); 3010 u64 __user *upath = u64_to_user_ptr(info->uprobe_multi.path); 3011 u32 upath_size = info->uprobe_multi.path_size; 3012 struct bpf_uprobe_multi_link *umulti_link; 3013 u32 ucount = info->uprobe_multi.count; 3014 int err = 0, i; 3015 char *p, *buf; 3016 long left = 0; 3017 3018 if (!upath ^ !upath_size) 3019 return -EINVAL; 3020 3021 if ((uoffsets || uref_ctr_offsets || ucookies) && !ucount) 3022 return -EINVAL; 3023 3024 umulti_link = container_of(link, struct bpf_uprobe_multi_link, link); 3025 info->uprobe_multi.count = umulti_link->cnt; 3026 info->uprobe_multi.flags = umulti_link->link.flags; 3027 info->uprobe_multi.pid = umulti_link->task ? 3028 task_pid_nr_ns(umulti_link->task, task_active_pid_ns(current)) : 0; 3029 3030 upath_size = upath_size ? min_t(u32, upath_size, PATH_MAX) : PATH_MAX; 3031 buf = kmalloc(upath_size, GFP_KERNEL); 3032 if (!buf) 3033 return -ENOMEM; 3034 p = d_path(&umulti_link->path, buf, upath_size); 3035 if (IS_ERR(p)) { 3036 kfree(buf); 3037 return PTR_ERR(p); 3038 } 3039 upath_size = buf + upath_size - p; 3040 3041 if (upath) 3042 left = copy_to_user(upath, p, upath_size); 3043 kfree(buf); 3044 if (left) 3045 return -EFAULT; 3046 info->uprobe_multi.path_size = upath_size; 3047 3048 if (!uoffsets && !ucookies && !uref_ctr_offsets) 3049 return 0; 3050 3051 if (ucount < umulti_link->cnt) 3052 err = -ENOSPC; 3053 else 3054 ucount = umulti_link->cnt; 3055 3056 for (i = 0; i < ucount; i++) { 3057 if (uoffsets && 3058 put_user(umulti_link->uprobes[i].offset, uoffsets + i)) 3059 return -EFAULT; 3060 if (uref_ctr_offsets && 3061 put_user(umulti_link->uprobes[i].ref_ctr_offset, uref_ctr_offsets + i)) 3062 return -EFAULT; 3063 if (ucookies && 3064 put_user(umulti_link->uprobes[i].cookie, ucookies + i)) 3065 return -EFAULT; 3066 } 3067 3068 return err; 3069 } 3070 3071 #ifdef CONFIG_PROC_FS 3072 static void bpf_uprobe_multi_show_fdinfo(const struct bpf_link *link, 3073 struct seq_file *seq) 3074 { 3075 struct bpf_uprobe_multi_link *umulti_link; 3076 char *p, *buf; 3077 pid_t pid; 3078 3079 umulti_link = container_of(link, struct bpf_uprobe_multi_link, link); 3080 3081 buf = kmalloc(PATH_MAX, GFP_KERNEL); 3082 if (!buf) 3083 return; 3084 3085 p = d_path(&umulti_link->path, buf, PATH_MAX); 3086 if (IS_ERR(p)) { 3087 kfree(buf); 3088 return; 3089 } 3090 3091 pid = umulti_link->task ? 3092 task_pid_nr_ns(umulti_link->task, task_active_pid_ns(current)) : 0; 3093 seq_printf(seq, 3094 "uprobe_cnt:\t%u\n" 3095 "pid:\t%u\n" 3096 "path:\t%s\n", 3097 umulti_link->cnt, pid, p); 3098 3099 seq_printf(seq, "%s\t %s\t %s\n", "cookie", "offset", "ref_ctr_offset"); 3100 for (int i = 0; i < umulti_link->cnt; i++) { 3101 seq_printf(seq, 3102 "%llu\t %#llx\t %#lx\n", 3103 umulti_link->uprobes[i].cookie, 3104 umulti_link->uprobes[i].offset, 3105 umulti_link->uprobes[i].ref_ctr_offset); 3106 } 3107 3108 kfree(buf); 3109 } 3110 #endif 3111 3112 static const struct bpf_link_ops bpf_uprobe_multi_link_lops = { 3113 .release = bpf_uprobe_multi_link_release, 3114 .dealloc_deferred = bpf_uprobe_multi_link_dealloc, 3115 .fill_link_info = bpf_uprobe_multi_link_fill_link_info, 3116 #ifdef CONFIG_PROC_FS 3117 .show_fdinfo = bpf_uprobe_multi_show_fdinfo, 3118 #endif 3119 }; 3120 3121 static int uprobe_prog_run(struct bpf_uprobe *uprobe, 3122 unsigned long entry_ip, 3123 struct pt_regs *regs, 3124 bool is_return, void *data) 3125 { 3126 struct bpf_uprobe_multi_link *link = uprobe->link; 3127 struct bpf_uprobe_multi_run_ctx run_ctx = { 3128 .session_ctx = { 3129 .is_return = is_return, 3130 .data = data, 3131 }, 3132 .entry_ip = entry_ip, 3133 .uprobe = uprobe, 3134 }; 3135 struct bpf_prog *prog = link->link.prog; 3136 bool sleepable = prog->sleepable; 3137 struct bpf_run_ctx *old_run_ctx; 3138 int err; 3139 3140 if (link->task && !same_thread_group(current, link->task)) 3141 return 0; 3142 3143 if (sleepable) 3144 rcu_read_lock_trace(); 3145 else 3146 rcu_read_lock(); 3147 3148 migrate_disable(); 3149 3150 old_run_ctx = bpf_set_run_ctx(&run_ctx.session_ctx.run_ctx); 3151 err = bpf_prog_run(link->link.prog, regs); 3152 bpf_reset_run_ctx(old_run_ctx); 3153 3154 migrate_enable(); 3155 3156 if (sleepable) 3157 rcu_read_unlock_trace(); 3158 else 3159 rcu_read_unlock(); 3160 return err; 3161 } 3162 3163 static bool 3164 uprobe_multi_link_filter(struct uprobe_consumer *con, struct mm_struct *mm) 3165 { 3166 struct bpf_uprobe *uprobe; 3167 3168 uprobe = container_of(con, struct bpf_uprobe, consumer); 3169 return uprobe->link->task->mm == mm; 3170 } 3171 3172 static int 3173 uprobe_multi_link_handler(struct uprobe_consumer *con, struct pt_regs *regs, 3174 __u64 *data) 3175 { 3176 struct bpf_uprobe *uprobe; 3177 int ret; 3178 3179 uprobe = container_of(con, struct bpf_uprobe, consumer); 3180 ret = uprobe_prog_run(uprobe, instruction_pointer(regs), regs, false, data); 3181 if (uprobe->session) 3182 return ret ? UPROBE_HANDLER_IGNORE : 0; 3183 return 0; 3184 } 3185 3186 static int 3187 uprobe_multi_link_ret_handler(struct uprobe_consumer *con, unsigned long func, struct pt_regs *regs, 3188 __u64 *data) 3189 { 3190 struct bpf_uprobe *uprobe; 3191 3192 uprobe = container_of(con, struct bpf_uprobe, consumer); 3193 uprobe_prog_run(uprobe, func, regs, true, data); 3194 return 0; 3195 } 3196 3197 static u64 bpf_uprobe_multi_entry_ip(struct bpf_run_ctx *ctx) 3198 { 3199 struct bpf_uprobe_multi_run_ctx *run_ctx; 3200 3201 run_ctx = container_of(current->bpf_ctx, struct bpf_uprobe_multi_run_ctx, 3202 session_ctx.run_ctx); 3203 return run_ctx->entry_ip; 3204 } 3205 3206 static u64 bpf_uprobe_multi_cookie(struct bpf_run_ctx *ctx) 3207 { 3208 struct bpf_uprobe_multi_run_ctx *run_ctx; 3209 3210 run_ctx = container_of(current->bpf_ctx, struct bpf_uprobe_multi_run_ctx, 3211 session_ctx.run_ctx); 3212 return run_ctx->uprobe->cookie; 3213 } 3214 3215 int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) 3216 { 3217 struct bpf_uprobe_multi_link *link = NULL; 3218 unsigned long __user *uref_ctr_offsets; 3219 struct bpf_link_primer link_primer; 3220 struct bpf_uprobe *uprobes = NULL; 3221 struct task_struct *task = NULL; 3222 unsigned long __user *uoffsets; 3223 u64 __user *ucookies; 3224 void __user *upath; 3225 u32 flags, cnt, i; 3226 struct path path; 3227 char *name; 3228 pid_t pid; 3229 int err; 3230 3231 /* no support for 32bit archs yet */ 3232 if (sizeof(u64) != sizeof(void *)) 3233 return -EOPNOTSUPP; 3234 3235 if (attr->link_create.flags) 3236 return -EINVAL; 3237 3238 if (!is_uprobe_multi(prog)) 3239 return -EINVAL; 3240 3241 flags = attr->link_create.uprobe_multi.flags; 3242 if (flags & ~BPF_F_UPROBE_MULTI_RETURN) 3243 return -EINVAL; 3244 3245 /* 3246 * path, offsets and cnt are mandatory, 3247 * ref_ctr_offsets and cookies are optional 3248 */ 3249 upath = u64_to_user_ptr(attr->link_create.uprobe_multi.path); 3250 uoffsets = u64_to_user_ptr(attr->link_create.uprobe_multi.offsets); 3251 cnt = attr->link_create.uprobe_multi.cnt; 3252 pid = attr->link_create.uprobe_multi.pid; 3253 3254 if (!upath || !uoffsets || !cnt || pid < 0) 3255 return -EINVAL; 3256 if (cnt > MAX_UPROBE_MULTI_CNT) 3257 return -E2BIG; 3258 3259 uref_ctr_offsets = u64_to_user_ptr(attr->link_create.uprobe_multi.ref_ctr_offsets); 3260 ucookies = u64_to_user_ptr(attr->link_create.uprobe_multi.cookies); 3261 3262 name = strndup_user(upath, PATH_MAX); 3263 if (IS_ERR(name)) { 3264 err = PTR_ERR(name); 3265 return err; 3266 } 3267 3268 err = kern_path(name, LOOKUP_FOLLOW, &path); 3269 kfree(name); 3270 if (err) 3271 return err; 3272 3273 if (!d_is_reg(path.dentry)) { 3274 err = -EBADF; 3275 goto error_path_put; 3276 } 3277 3278 if (pid) { 3279 rcu_read_lock(); 3280 task = get_pid_task(find_vpid(pid), PIDTYPE_TGID); 3281 rcu_read_unlock(); 3282 if (!task) { 3283 err = -ESRCH; 3284 goto error_path_put; 3285 } 3286 } 3287 3288 err = -ENOMEM; 3289 3290 link = kzalloc_obj(*link); 3291 uprobes = kvzalloc_objs(*uprobes, cnt); 3292 3293 if (!uprobes || !link) 3294 goto error_free; 3295 3296 for (i = 0; i < cnt; i++) { 3297 if (__get_user(uprobes[i].offset, uoffsets + i)) { 3298 err = -EFAULT; 3299 goto error_free; 3300 } 3301 if (uprobes[i].offset < 0) { 3302 err = -EINVAL; 3303 goto error_free; 3304 } 3305 if (uref_ctr_offsets && __get_user(uprobes[i].ref_ctr_offset, uref_ctr_offsets + i)) { 3306 err = -EFAULT; 3307 goto error_free; 3308 } 3309 if (ucookies && __get_user(uprobes[i].cookie, ucookies + i)) { 3310 err = -EFAULT; 3311 goto error_free; 3312 } 3313 3314 uprobes[i].link = link; 3315 3316 if (!(flags & BPF_F_UPROBE_MULTI_RETURN)) 3317 uprobes[i].consumer.handler = uprobe_multi_link_handler; 3318 if (flags & BPF_F_UPROBE_MULTI_RETURN || is_uprobe_session(prog)) 3319 uprobes[i].consumer.ret_handler = uprobe_multi_link_ret_handler; 3320 if (is_uprobe_session(prog)) 3321 uprobes[i].session = true; 3322 if (pid) 3323 uprobes[i].consumer.filter = uprobe_multi_link_filter; 3324 } 3325 3326 link->cnt = cnt; 3327 link->uprobes = uprobes; 3328 link->path = path; 3329 link->task = task; 3330 link->link.flags = flags; 3331 3332 bpf_link_init(&link->link, BPF_LINK_TYPE_UPROBE_MULTI, 3333 &bpf_uprobe_multi_link_lops, prog, attr->link_create.attach_type); 3334 3335 for (i = 0; i < cnt; i++) { 3336 uprobes[i].uprobe = uprobe_register(d_real_inode(link->path.dentry), 3337 uprobes[i].offset, 3338 uprobes[i].ref_ctr_offset, 3339 &uprobes[i].consumer); 3340 if (IS_ERR(uprobes[i].uprobe)) { 3341 err = PTR_ERR(uprobes[i].uprobe); 3342 link->cnt = i; 3343 goto error_unregister; 3344 } 3345 } 3346 3347 err = bpf_link_prime(&link->link, &link_primer); 3348 if (err) 3349 goto error_unregister; 3350 3351 return bpf_link_settle(&link_primer); 3352 3353 error_unregister: 3354 bpf_uprobe_unregister(uprobes, link->cnt); 3355 3356 error_free: 3357 kvfree(uprobes); 3358 kfree(link); 3359 if (task) 3360 put_task_struct(task); 3361 error_path_put: 3362 path_put(&path); 3363 return err; 3364 } 3365 #else /* !CONFIG_UPROBES */ 3366 int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) 3367 { 3368 return -EOPNOTSUPP; 3369 } 3370 static u64 bpf_uprobe_multi_cookie(struct bpf_run_ctx *ctx) 3371 { 3372 return 0; 3373 } 3374 static u64 bpf_uprobe_multi_entry_ip(struct bpf_run_ctx *ctx) 3375 { 3376 return 0; 3377 } 3378 #endif /* CONFIG_UPROBES */ 3379 3380 __bpf_kfunc_start_defs(); 3381 3382 __bpf_kfunc bool bpf_session_is_return(void *ctx) 3383 { 3384 struct bpf_session_run_ctx *session_ctx; 3385 3386 session_ctx = container_of(current->bpf_ctx, struct bpf_session_run_ctx, run_ctx); 3387 return session_ctx->is_return; 3388 } 3389 3390 __bpf_kfunc __u64 *bpf_session_cookie(void *ctx) 3391 { 3392 struct bpf_session_run_ctx *session_ctx; 3393 3394 session_ctx = container_of(current->bpf_ctx, struct bpf_session_run_ctx, run_ctx); 3395 return session_ctx->data; 3396 } 3397 3398 __bpf_kfunc_end_defs(); 3399 3400 BTF_KFUNCS_START(session_kfunc_set_ids) 3401 BTF_ID_FLAGS(func, bpf_session_is_return) 3402 BTF_ID_FLAGS(func, bpf_session_cookie) 3403 BTF_KFUNCS_END(session_kfunc_set_ids) 3404 3405 static int bpf_session_filter(const struct bpf_prog *prog, u32 kfunc_id) 3406 { 3407 if (!btf_id_set8_contains(&session_kfunc_set_ids, kfunc_id)) 3408 return 0; 3409 3410 if (!is_kprobe_session(prog) && !is_uprobe_session(prog) && !is_trace_fsession(prog)) 3411 return -EACCES; 3412 3413 return 0; 3414 } 3415 3416 static const struct btf_kfunc_id_set bpf_session_kfunc_set = { 3417 .owner = THIS_MODULE, 3418 .set = &session_kfunc_set_ids, 3419 .filter = bpf_session_filter, 3420 }; 3421 3422 static int __init bpf_trace_kfuncs_init(void) 3423 { 3424 int err = 0; 3425 3426 err = err ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_KPROBE, &bpf_session_kfunc_set); 3427 err = err ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &bpf_session_kfunc_set); 3428 3429 return err; 3430 } 3431 3432 late_initcall(bpf_trace_kfuncs_init); 3433 3434 typedef int (*copy_fn_t)(void *dst, const void *src, u32 size, struct task_struct *tsk); 3435 3436 /* 3437 * The __always_inline is to make sure the compiler doesn't 3438 * generate indirect calls into callbacks, which is expensive, 3439 * on some kernel configurations. This allows compiler to put 3440 * direct calls into all the specific callback implementations 3441 * (copy_user_data_sleepable, copy_user_data_nofault, and so on) 3442 */ 3443 static __always_inline int __bpf_dynptr_copy_str(const struct bpf_dynptr *dptr, u64 doff, u64 size, 3444 const void *unsafe_src, 3445 copy_fn_t str_copy_fn, 3446 struct task_struct *tsk) 3447 { 3448 const struct bpf_dynptr_kern *dst; 3449 u64 chunk_sz, off; 3450 void *dst_slice; 3451 int cnt, err; 3452 char buf[256]; 3453 3454 dst_slice = bpf_dynptr_slice_rdwr(dptr, doff, NULL, size); 3455 if (likely(dst_slice)) 3456 return str_copy_fn(dst_slice, unsafe_src, size, tsk); 3457 3458 dst = (struct bpf_dynptr_kern *)dptr; 3459 if (bpf_dynptr_check_off_len(dst, doff, size)) 3460 return -E2BIG; 3461 3462 for (off = 0; off < size; off += chunk_sz - 1) { 3463 chunk_sz = min_t(u64, sizeof(buf), size - off); 3464 /* Expect str_copy_fn to return count of copied bytes, including 3465 * zero terminator. Next iteration increment off by chunk_sz - 1 to 3466 * overwrite NUL. 3467 */ 3468 cnt = str_copy_fn(buf, unsafe_src + off, chunk_sz, tsk); 3469 if (cnt < 0) 3470 return cnt; 3471 err = __bpf_dynptr_write(dst, doff + off, buf, cnt, 0); 3472 if (err) 3473 return err; 3474 if (cnt < chunk_sz || chunk_sz == 1) /* we are done */ 3475 return off + cnt; 3476 } 3477 return off; 3478 } 3479 3480 static __always_inline int __bpf_dynptr_copy(const struct bpf_dynptr *dptr, u64 doff, 3481 u64 size, const void *unsafe_src, 3482 copy_fn_t copy_fn, struct task_struct *tsk) 3483 { 3484 const struct bpf_dynptr_kern *dst; 3485 void *dst_slice; 3486 char buf[256]; 3487 u64 off, chunk_sz; 3488 int err; 3489 3490 dst_slice = bpf_dynptr_slice_rdwr(dptr, doff, NULL, size); 3491 if (likely(dst_slice)) 3492 return copy_fn(dst_slice, unsafe_src, size, tsk); 3493 3494 dst = (struct bpf_dynptr_kern *)dptr; 3495 if (bpf_dynptr_check_off_len(dst, doff, size)) 3496 return -E2BIG; 3497 3498 for (off = 0; off < size; off += chunk_sz) { 3499 chunk_sz = min_t(u64, sizeof(buf), size - off); 3500 err = copy_fn(buf, unsafe_src + off, chunk_sz, tsk); 3501 if (err) 3502 return err; 3503 err = __bpf_dynptr_write(dst, doff + off, buf, chunk_sz, 0); 3504 if (err) 3505 return err; 3506 } 3507 return 0; 3508 } 3509 3510 static __always_inline int copy_user_data_nofault(void *dst, const void *unsafe_src, 3511 u32 size, struct task_struct *tsk) 3512 { 3513 return copy_from_user_nofault(dst, (const void __user *)unsafe_src, size); 3514 } 3515 3516 static __always_inline int copy_user_data_sleepable(void *dst, const void *unsafe_src, 3517 u32 size, struct task_struct *tsk) 3518 { 3519 int ret; 3520 3521 if (!tsk) { /* Read from the current task */ 3522 ret = copy_from_user(dst, (const void __user *)unsafe_src, size); 3523 if (ret) 3524 return -EFAULT; 3525 return 0; 3526 } 3527 3528 ret = access_process_vm(tsk, (unsigned long)unsafe_src, dst, size, 0); 3529 if (ret != size) 3530 return -EFAULT; 3531 return 0; 3532 } 3533 3534 static __always_inline int copy_kernel_data_nofault(void *dst, const void *unsafe_src, 3535 u32 size, struct task_struct *tsk) 3536 { 3537 return copy_from_kernel_nofault(dst, unsafe_src, size); 3538 } 3539 3540 static __always_inline int copy_user_str_nofault(void *dst, const void *unsafe_src, 3541 u32 size, struct task_struct *tsk) 3542 { 3543 return strncpy_from_user_nofault(dst, (const void __user *)unsafe_src, size); 3544 } 3545 3546 static __always_inline int copy_user_str_sleepable(void *dst, const void *unsafe_src, 3547 u32 size, struct task_struct *tsk) 3548 { 3549 int ret; 3550 3551 if (unlikely(size == 0)) 3552 return 0; 3553 3554 if (tsk) { 3555 ret = copy_remote_vm_str(tsk, (unsigned long)unsafe_src, dst, size, 0); 3556 } else { 3557 ret = strncpy_from_user(dst, (const void __user *)unsafe_src, size - 1); 3558 /* strncpy_from_user does not guarantee NUL termination */ 3559 if (ret >= 0) 3560 ((char *)dst)[ret] = '\0'; 3561 } 3562 3563 if (ret < 0) 3564 return ret; 3565 return ret + 1; 3566 } 3567 3568 static __always_inline int copy_kernel_str_nofault(void *dst, const void *unsafe_src, 3569 u32 size, struct task_struct *tsk) 3570 { 3571 return strncpy_from_kernel_nofault(dst, unsafe_src, size); 3572 } 3573 3574 __bpf_kfunc_start_defs(); 3575 3576 __bpf_kfunc int bpf_send_signal_task(struct task_struct *task, int sig, enum pid_type type, 3577 u64 value) 3578 { 3579 if (type != PIDTYPE_PID && type != PIDTYPE_TGID) 3580 return -EINVAL; 3581 3582 return bpf_send_signal_common(sig, type, task, value); 3583 } 3584 3585 __bpf_kfunc int bpf_probe_read_user_dynptr(const struct bpf_dynptr *dptr, u64 off, 3586 u64 size, const void __user *unsafe_ptr__ign) 3587 { 3588 return __bpf_dynptr_copy(dptr, off, size, (const void __force *)unsafe_ptr__ign, 3589 copy_user_data_nofault, NULL); 3590 } 3591 3592 __bpf_kfunc int bpf_probe_read_kernel_dynptr(const struct bpf_dynptr *dptr, u64 off, 3593 u64 size, const void *unsafe_ptr__ign) 3594 { 3595 return __bpf_dynptr_copy(dptr, off, size, unsafe_ptr__ign, 3596 copy_kernel_data_nofault, NULL); 3597 } 3598 3599 __bpf_kfunc int bpf_probe_read_user_str_dynptr(const struct bpf_dynptr *dptr, u64 off, 3600 u64 size, const void __user *unsafe_ptr__ign) 3601 { 3602 return __bpf_dynptr_copy_str(dptr, off, size, (const void __force *)unsafe_ptr__ign, 3603 copy_user_str_nofault, NULL); 3604 } 3605 3606 __bpf_kfunc int bpf_probe_read_kernel_str_dynptr(const struct bpf_dynptr *dptr, u64 off, 3607 u64 size, const void *unsafe_ptr__ign) 3608 { 3609 return __bpf_dynptr_copy_str(dptr, off, size, unsafe_ptr__ign, 3610 copy_kernel_str_nofault, NULL); 3611 } 3612 3613 __bpf_kfunc int bpf_copy_from_user_dynptr(const struct bpf_dynptr *dptr, u64 off, 3614 u64 size, const void __user *unsafe_ptr__ign) 3615 { 3616 return __bpf_dynptr_copy(dptr, off, size, (const void __force *)unsafe_ptr__ign, 3617 copy_user_data_sleepable, NULL); 3618 } 3619 3620 __bpf_kfunc int bpf_copy_from_user_str_dynptr(const struct bpf_dynptr *dptr, u64 off, 3621 u64 size, const void __user *unsafe_ptr__ign) 3622 { 3623 return __bpf_dynptr_copy_str(dptr, off, size, (const void __force *)unsafe_ptr__ign, 3624 copy_user_str_sleepable, NULL); 3625 } 3626 3627 __bpf_kfunc int bpf_copy_from_user_task_dynptr(const struct bpf_dynptr *dptr, u64 off, 3628 u64 size, const void __user *unsafe_ptr__ign, 3629 struct task_struct *tsk) 3630 { 3631 return __bpf_dynptr_copy(dptr, off, size, (const void __force *)unsafe_ptr__ign, 3632 copy_user_data_sleepable, tsk); 3633 } 3634 3635 __bpf_kfunc int bpf_copy_from_user_task_str_dynptr(const struct bpf_dynptr *dptr, u64 off, 3636 u64 size, const void __user *unsafe_ptr__ign, 3637 struct task_struct *tsk) 3638 { 3639 return __bpf_dynptr_copy_str(dptr, off, size, (const void __force *)unsafe_ptr__ign, 3640 copy_user_str_sleepable, tsk); 3641 } 3642 3643 __bpf_kfunc_end_defs(); 3644