1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright (c) 2011-2015 PLUMgrid, http://plumgrid.com 3 * Copyright (c) 2016 Facebook 4 */ 5 #include <linux/kernel.h> 6 #include <linux/types.h> 7 #include <linux/slab.h> 8 #include <linux/bpf.h> 9 #include <linux/bpf_verifier.h> 10 #include <linux/bpf_perf_event.h> 11 #include <linux/btf.h> 12 #include <linux/filter.h> 13 #include <linux/uaccess.h> 14 #include <linux/ctype.h> 15 #include <linux/kprobes.h> 16 #include <linux/spinlock.h> 17 #include <linux/syscalls.h> 18 #include <linux/error-injection.h> 19 #include <linux/btf_ids.h> 20 #include <linux/bpf_lsm.h> 21 #include <linux/fprobe.h> 22 #include <linux/bsearch.h> 23 #include <linux/sort.h> 24 #include <linux/key.h> 25 #include <linux/namei.h> 26 #include <linux/file.h> 27 28 #include <net/bpf_sk_storage.h> 29 30 #include <uapi/linux/bpf.h> 31 #include <uapi/linux/btf.h> 32 33 #include <asm/tlb.h> 34 35 #include "trace_probe.h" 36 #include "trace.h" 37 38 #define CREATE_TRACE_POINTS 39 #include "bpf_trace.h" 40 41 #define bpf_event_rcu_dereference(p) \ 42 rcu_dereference_protected(p, lockdep_is_held(&bpf_event_mutex)) 43 44 #define MAX_UPROBE_MULTI_CNT (1U << 20) 45 #define MAX_KPROBE_MULTI_CNT (1U << 20) 46 #define MAX_TRACING_MULTI_CNT (1U << 20) 47 48 #ifdef CONFIG_MODULES 49 struct bpf_trace_module { 50 struct module *module; 51 struct list_head list; 52 }; 53 54 static LIST_HEAD(bpf_trace_modules); 55 static DEFINE_MUTEX(bpf_module_mutex); 56 57 static struct bpf_raw_event_map *bpf_get_raw_tracepoint_module(const char *name) 58 { 59 struct bpf_raw_event_map *btp, *ret = NULL; 60 struct bpf_trace_module *btm; 61 unsigned int i; 62 63 mutex_lock(&bpf_module_mutex); 64 list_for_each_entry(btm, &bpf_trace_modules, list) { 65 for (i = 0; i < btm->module->num_bpf_raw_events; ++i) { 66 btp = &btm->module->bpf_raw_events[i]; 67 if (!strcmp(btp->tp->name, name)) { 68 if (try_module_get(btm->module)) 69 ret = btp; 70 goto out; 71 } 72 } 73 } 74 out: 75 mutex_unlock(&bpf_module_mutex); 76 return ret; 77 } 78 #else 79 static struct bpf_raw_event_map *bpf_get_raw_tracepoint_module(const char *name) 80 { 81 return NULL; 82 } 83 #endif /* CONFIG_MODULES */ 84 85 u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); 86 u64 bpf_get_stack(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); 87 88 static int bpf_btf_printf_prepare(struct btf_ptr *ptr, u32 btf_ptr_size, 89 u64 flags, const struct btf **btf, 90 s32 *btf_id); 91 static u64 bpf_kprobe_multi_cookie(struct bpf_run_ctx *ctx); 92 static u64 bpf_kprobe_multi_entry_ip(struct bpf_run_ctx *ctx); 93 94 static u64 bpf_uprobe_multi_cookie(struct bpf_run_ctx *ctx); 95 static u64 bpf_uprobe_multi_entry_ip(struct bpf_run_ctx *ctx); 96 97 /** 98 * trace_call_bpf - invoke BPF program 99 * @call: tracepoint event 100 * @ctx: opaque context pointer 101 * 102 * kprobe handlers execute BPF programs via this helper. 103 * Can be used from static tracepoints in the future. 104 * 105 * Return: BPF programs always return an integer which is interpreted by 106 * kprobe handler as: 107 * 0 - return from kprobe (event is filtered out) 108 * 1 - store kprobe event into ring buffer 109 * Other values are reserved and currently alias to 1 110 */ 111 unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx) 112 { 113 unsigned int ret; 114 115 cant_sleep(); 116 117 if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) { 118 /* 119 * since some bpf program is already running on this cpu, 120 * don't call into another bpf program (same or different) 121 * and don't send kprobe event into ring-buffer, 122 * so return zero here 123 */ 124 rcu_read_lock(); 125 bpf_prog_inc_misses_counters(rcu_dereference(call->prog_array)); 126 rcu_read_unlock(); 127 ret = 0; 128 goto out; 129 } 130 131 /* 132 * Instead of moving rcu_read_lock/rcu_dereference/rcu_read_unlock 133 * to all call sites, we did a bpf_prog_array_valid() there to check 134 * whether call->prog_array is empty or not, which is 135 * a heuristic to speed up execution. 136 * 137 * If bpf_prog_array_valid() fetched prog_array was 138 * non-NULL, we go into trace_call_bpf() and do the actual 139 * proper rcu_dereference() under RCU lock. 140 * If it turns out that prog_array is NULL then, we bail out. 141 * For the opposite, if the bpf_prog_array_valid() fetched pointer 142 * was NULL, you'll skip the prog_array with the risk of missing 143 * out of events when it was updated in between this and the 144 * rcu_dereference() which is accepted risk. 145 */ 146 rcu_read_lock(); 147 ret = bpf_prog_run_array(rcu_dereference(call->prog_array), 148 ctx, bpf_prog_run); 149 rcu_read_unlock(); 150 151 out: 152 __this_cpu_dec(bpf_prog_active); 153 154 return ret; 155 } 156 157 /** 158 * trace_call_bpf_faultable - invoke BPF program in faultable context 159 * @call: tracepoint event 160 * @ctx: opaque context pointer 161 * 162 * Variant of trace_call_bpf() for faultable tracepoints (syscall 163 * tracepoints). Supports sleepable BPF programs by using rcu_tasks_trace 164 * for lifetime protection and bpf_prog_run_array_sleepable() for per-program 165 * RCU flavor selection, following the uprobe pattern. 166 * 167 * Per-program recursion protection is provided by 168 * bpf_prog_run_array_sleepable(). Global bpf_prog_active is not 169 * needed because syscall tracepoints cannot self-recurse. 170 * 171 * Must be called from a faultable/preemptible context. 172 */ 173 unsigned int trace_call_bpf_faultable(struct trace_event_call *call, void *ctx) 174 { 175 struct bpf_prog_array *prog_array; 176 177 might_fault(); 178 guard(rcu_tasks_trace)(); 179 180 prog_array = rcu_dereference_check(call->prog_array, 181 rcu_read_lock_trace_held()); 182 return bpf_prog_run_array_sleepable(prog_array, ctx, bpf_prog_run); 183 } 184 185 #ifdef CONFIG_BPF_KPROBE_OVERRIDE 186 BPF_CALL_2(bpf_override_return, struct pt_regs *, regs, unsigned long, rc) 187 { 188 regs_set_return_value(regs, rc); 189 override_function_with_return(regs); 190 return 0; 191 } 192 193 static const struct bpf_func_proto bpf_override_return_proto = { 194 .func = bpf_override_return, 195 .gpl_only = true, 196 .ret_type = RET_INTEGER, 197 .arg1_type = ARG_PTR_TO_CTX, 198 .arg2_type = ARG_ANYTHING, 199 }; 200 #endif 201 202 static __always_inline int 203 bpf_probe_read_user_common(void *dst, u32 size, const void __user *unsafe_ptr) 204 { 205 int ret; 206 207 ret = copy_from_user_nofault(dst, unsafe_ptr, size); 208 if (unlikely(ret < 0)) 209 memset(dst, 0, size); 210 return ret; 211 } 212 213 BPF_CALL_3(bpf_probe_read_user, void *, dst, u32, size, 214 const void __user *, unsafe_ptr) 215 { 216 return bpf_probe_read_user_common(dst, size, unsafe_ptr); 217 } 218 219 const struct bpf_func_proto bpf_probe_read_user_proto = { 220 .func = bpf_probe_read_user, 221 .gpl_only = true, 222 .ret_type = RET_INTEGER, 223 .arg1_type = ARG_PTR_TO_UNINIT_MEM, 224 .arg2_type = ARG_CONST_SIZE_OR_ZERO, 225 .arg3_type = ARG_ANYTHING, 226 }; 227 228 static __always_inline int 229 bpf_probe_read_user_str_common(void *dst, u32 size, 230 const void __user *unsafe_ptr) 231 { 232 int ret; 233 234 /* 235 * NB: We rely on strncpy_from_user() not copying junk past the NUL 236 * terminator into `dst`. 237 * 238 * strncpy_from_user() does long-sized strides in the fast path. If the 239 * strncpy does not mask out the bytes after the NUL in `unsafe_ptr`, 240 * then there could be junk after the NUL in `dst`. If user takes `dst` 241 * and keys a hash map with it, then semantically identical strings can 242 * occupy multiple entries in the map. 243 */ 244 ret = strncpy_from_user_nofault(dst, unsafe_ptr, size); 245 if (unlikely(ret < 0)) 246 memset(dst, 0, size); 247 return ret; 248 } 249 250 BPF_CALL_3(bpf_probe_read_user_str, void *, dst, u32, size, 251 const void __user *, unsafe_ptr) 252 { 253 return bpf_probe_read_user_str_common(dst, size, unsafe_ptr); 254 } 255 256 const struct bpf_func_proto bpf_probe_read_user_str_proto = { 257 .func = bpf_probe_read_user_str, 258 .gpl_only = true, 259 .ret_type = RET_INTEGER, 260 .arg1_type = ARG_PTR_TO_UNINIT_MEM, 261 .arg2_type = ARG_CONST_SIZE_OR_ZERO, 262 .arg3_type = ARG_ANYTHING, 263 }; 264 265 BPF_CALL_3(bpf_probe_read_kernel, void *, dst, u32, size, 266 const void *, unsafe_ptr) 267 { 268 return bpf_probe_read_kernel_common(dst, size, unsafe_ptr); 269 } 270 271 const struct bpf_func_proto bpf_probe_read_kernel_proto = { 272 .func = bpf_probe_read_kernel, 273 .gpl_only = true, 274 .ret_type = RET_INTEGER, 275 .arg1_type = ARG_PTR_TO_UNINIT_MEM, 276 .arg2_type = ARG_CONST_SIZE_OR_ZERO, 277 .arg3_type = ARG_ANYTHING, 278 }; 279 280 static __always_inline int 281 bpf_probe_read_kernel_str_common(void *dst, u32 size, const void *unsafe_ptr) 282 { 283 int ret; 284 285 /* 286 * The strncpy_from_kernel_nofault() call will likely not fill the 287 * entire buffer, but that's okay in this circumstance as we're probing 288 * arbitrary memory anyway similar to bpf_probe_read_*() and might 289 * as well probe the stack. Thus, memory is explicitly cleared 290 * only in error case, so that improper users ignoring return 291 * code altogether don't copy garbage; otherwise length of string 292 * is returned that can be used for bpf_perf_event_output() et al. 293 */ 294 ret = strncpy_from_kernel_nofault(dst, unsafe_ptr, size); 295 if (unlikely(ret < 0)) 296 memset(dst, 0, size); 297 return ret; 298 } 299 300 BPF_CALL_3(bpf_probe_read_kernel_str, void *, dst, u32, size, 301 const void *, unsafe_ptr) 302 { 303 return bpf_probe_read_kernel_str_common(dst, size, unsafe_ptr); 304 } 305 306 const struct bpf_func_proto bpf_probe_read_kernel_str_proto = { 307 .func = bpf_probe_read_kernel_str, 308 .gpl_only = true, 309 .ret_type = RET_INTEGER, 310 .arg1_type = ARG_PTR_TO_UNINIT_MEM, 311 .arg2_type = ARG_CONST_SIZE_OR_ZERO, 312 .arg3_type = ARG_ANYTHING, 313 }; 314 315 #ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE 316 BPF_CALL_3(bpf_probe_read_compat, void *, dst, u32, size, 317 const void *, unsafe_ptr) 318 { 319 if ((unsigned long)unsafe_ptr < TASK_SIZE) { 320 return bpf_probe_read_user_common(dst, size, 321 (__force void __user *)unsafe_ptr); 322 } 323 return bpf_probe_read_kernel_common(dst, size, unsafe_ptr); 324 } 325 326 static const struct bpf_func_proto bpf_probe_read_compat_proto = { 327 .func = bpf_probe_read_compat, 328 .gpl_only = true, 329 .ret_type = RET_INTEGER, 330 .arg1_type = ARG_PTR_TO_UNINIT_MEM, 331 .arg2_type = ARG_CONST_SIZE_OR_ZERO, 332 .arg3_type = ARG_ANYTHING, 333 }; 334 335 BPF_CALL_3(bpf_probe_read_compat_str, void *, dst, u32, size, 336 const void *, unsafe_ptr) 337 { 338 if ((unsigned long)unsafe_ptr < TASK_SIZE) { 339 return bpf_probe_read_user_str_common(dst, size, 340 (__force void __user *)unsafe_ptr); 341 } 342 return bpf_probe_read_kernel_str_common(dst, size, unsafe_ptr); 343 } 344 345 static const struct bpf_func_proto bpf_probe_read_compat_str_proto = { 346 .func = bpf_probe_read_compat_str, 347 .gpl_only = true, 348 .ret_type = RET_INTEGER, 349 .arg1_type = ARG_PTR_TO_UNINIT_MEM, 350 .arg2_type = ARG_CONST_SIZE_OR_ZERO, 351 .arg3_type = ARG_ANYTHING, 352 }; 353 #endif /* CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE */ 354 355 BPF_CALL_3(bpf_probe_write_user, void __user *, unsafe_ptr, const void *, src, 356 u32, size) 357 { 358 /* 359 * Ensure we're in user context which is safe for the helper to 360 * run. This helper has no business in a kthread. 361 * 362 * access_ok() should prevent writing to non-user memory, but in 363 * some situations (nommu, temporary switch, etc) access_ok() does 364 * not provide enough validation, hence the check on KERNEL_DS. 365 * 366 * nmi_uaccess_okay() ensures the probe is not run in an interim 367 * state, when the task or mm are switched. This is specifically 368 * required to prevent the use of temporary mm. 369 */ 370 371 if (unlikely(in_interrupt() || 372 current->flags & (PF_KTHREAD | PF_EXITING))) 373 return -EPERM; 374 if (unlikely(!nmi_uaccess_okay())) 375 return -EPERM; 376 377 return copy_to_user_nofault(unsafe_ptr, src, size); 378 } 379 380 static const struct bpf_func_proto bpf_probe_write_user_proto = { 381 .func = bpf_probe_write_user, 382 .gpl_only = true, 383 .ret_type = RET_INTEGER, 384 .arg1_type = ARG_ANYTHING, 385 .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, 386 .arg3_type = ARG_CONST_SIZE, 387 }; 388 389 #define MAX_TRACE_PRINTK_VARARGS 3 390 #define BPF_TRACE_PRINTK_SIZE 1024 391 392 BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1, 393 u64, arg2, u64, arg3) 394 { 395 u64 args[MAX_TRACE_PRINTK_VARARGS] = { arg1, arg2, arg3 }; 396 struct bpf_bprintf_data data = { 397 .get_bin_args = true, 398 .get_buf = true, 399 }; 400 int ret; 401 402 ret = bpf_bprintf_prepare(fmt, fmt_size, args, 403 MAX_TRACE_PRINTK_VARARGS, &data); 404 if (ret < 0) 405 return ret; 406 407 ret = bstr_printf(data.buf, MAX_BPRINTF_BUF, fmt, data.bin_args); 408 409 trace_bpf_trace_printk(data.buf); 410 411 bpf_bprintf_cleanup(&data); 412 413 return ret; 414 } 415 416 static const struct bpf_func_proto bpf_trace_printk_proto = { 417 .func = bpf_trace_printk, 418 .gpl_only = true, 419 .ret_type = RET_INTEGER, 420 .arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY, 421 .arg2_type = ARG_CONST_SIZE, 422 }; 423 424 static void __set_printk_clr_event(struct work_struct *work) 425 { 426 /* 427 * This program might be calling bpf_trace_printk, 428 * so enable the associated bpf_trace/bpf_trace_printk event. 429 * Repeat this each time as it is possible a user has 430 * disabled bpf_trace_printk events. By loading a program 431 * calling bpf_trace_printk() however the user has expressed 432 * the intent to see such events. 433 */ 434 if (trace_set_clr_event("bpf_trace", "bpf_trace_printk", 1)) 435 pr_warn_ratelimited("could not enable bpf_trace_printk events"); 436 } 437 static DECLARE_WORK(set_printk_work, __set_printk_clr_event); 438 439 const struct bpf_func_proto *bpf_get_trace_printk_proto(void) 440 { 441 schedule_work(&set_printk_work); 442 return &bpf_trace_printk_proto; 443 } 444 445 BPF_CALL_4(bpf_trace_vprintk, char *, fmt, u32, fmt_size, const void *, args, 446 u32, data_len) 447 { 448 struct bpf_bprintf_data data = { 449 .get_bin_args = true, 450 .get_buf = true, 451 }; 452 int ret, num_args; 453 454 if (data_len & 7 || data_len > MAX_BPRINTF_VARARGS * 8 || 455 (data_len && !args)) 456 return -EINVAL; 457 num_args = data_len / 8; 458 459 ret = bpf_bprintf_prepare(fmt, fmt_size, args, num_args, &data); 460 if (ret < 0) 461 return ret; 462 463 ret = bstr_printf(data.buf, MAX_BPRINTF_BUF, fmt, data.bin_args); 464 465 trace_bpf_trace_printk(data.buf); 466 467 bpf_bprintf_cleanup(&data); 468 469 return ret; 470 } 471 472 static const struct bpf_func_proto bpf_trace_vprintk_proto = { 473 .func = bpf_trace_vprintk, 474 .gpl_only = true, 475 .ret_type = RET_INTEGER, 476 .arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY, 477 .arg2_type = ARG_CONST_SIZE, 478 .arg3_type = ARG_PTR_TO_MEM | PTR_MAYBE_NULL | MEM_RDONLY, 479 .arg4_type = ARG_CONST_SIZE_OR_ZERO, 480 }; 481 482 const struct bpf_func_proto *bpf_get_trace_vprintk_proto(void) 483 { 484 schedule_work(&set_printk_work); 485 return &bpf_trace_vprintk_proto; 486 } 487 488 BPF_CALL_5(bpf_seq_printf, struct seq_file *, m, char *, fmt, u32, fmt_size, 489 const void *, args, u32, data_len) 490 { 491 struct bpf_bprintf_data data = { 492 .get_bin_args = true, 493 }; 494 int err, num_args; 495 496 if (data_len & 7 || data_len > MAX_BPRINTF_VARARGS * 8 || 497 (data_len && !args)) 498 return -EINVAL; 499 num_args = data_len / 8; 500 501 err = bpf_bprintf_prepare(fmt, fmt_size, args, num_args, &data); 502 if (err < 0) 503 return err; 504 505 seq_bprintf(m, fmt, data.bin_args); 506 507 bpf_bprintf_cleanup(&data); 508 509 return seq_has_overflowed(m) ? -EOVERFLOW : 0; 510 } 511 512 BTF_ID_LIST_SINGLE(btf_seq_file_ids, struct, seq_file) 513 514 static const struct bpf_func_proto bpf_seq_printf_proto = { 515 .func = bpf_seq_printf, 516 .gpl_only = true, 517 .ret_type = RET_INTEGER, 518 .arg1_type = ARG_PTR_TO_BTF_ID, 519 .arg1_btf_id = &btf_seq_file_ids[0], 520 .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, 521 .arg3_type = ARG_CONST_SIZE, 522 .arg4_type = ARG_PTR_TO_MEM | PTR_MAYBE_NULL | MEM_RDONLY, 523 .arg5_type = ARG_CONST_SIZE_OR_ZERO, 524 }; 525 526 BPF_CALL_3(bpf_seq_write, struct seq_file *, m, const void *, data, u32, len) 527 { 528 return seq_write(m, data, len) ? -EOVERFLOW : 0; 529 } 530 531 static const struct bpf_func_proto bpf_seq_write_proto = { 532 .func = bpf_seq_write, 533 .gpl_only = true, 534 .ret_type = RET_INTEGER, 535 .arg1_type = ARG_PTR_TO_BTF_ID, 536 .arg1_btf_id = &btf_seq_file_ids[0], 537 .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, 538 .arg3_type = ARG_CONST_SIZE_OR_ZERO, 539 }; 540 541 BPF_CALL_4(bpf_seq_printf_btf, struct seq_file *, m, struct btf_ptr *, ptr, 542 u32, btf_ptr_size, u64, flags) 543 { 544 const struct btf *btf; 545 s32 btf_id; 546 int ret; 547 548 ret = bpf_btf_printf_prepare(ptr, btf_ptr_size, flags, &btf, &btf_id); 549 if (ret) 550 return ret; 551 552 return btf_type_seq_show_flags(btf, btf_id, ptr->ptr, m, flags); 553 } 554 555 static const struct bpf_func_proto bpf_seq_printf_btf_proto = { 556 .func = bpf_seq_printf_btf, 557 .gpl_only = true, 558 .ret_type = RET_INTEGER, 559 .arg1_type = ARG_PTR_TO_BTF_ID, 560 .arg1_btf_id = &btf_seq_file_ids[0], 561 .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, 562 .arg3_type = ARG_CONST_SIZE_OR_ZERO, 563 .arg4_type = ARG_ANYTHING, 564 }; 565 566 static __always_inline int 567 get_map_perf_counter(struct bpf_map *map, u64 flags, 568 u64 *value, u64 *enabled, u64 *running) 569 { 570 struct bpf_array *array = container_of(map, struct bpf_array, map); 571 unsigned int cpu = smp_processor_id(); 572 u64 index = flags & BPF_F_INDEX_MASK; 573 struct bpf_event_entry *ee; 574 575 if (unlikely(flags & ~(BPF_F_INDEX_MASK))) 576 return -EINVAL; 577 if (index == BPF_F_CURRENT_CPU) 578 index = cpu; 579 if (unlikely(index >= array->map.max_entries)) 580 return -E2BIG; 581 582 ee = READ_ONCE(array->ptrs[index]); 583 if (!ee) 584 return -ENOENT; 585 586 return perf_event_read_local(ee->event, value, enabled, running); 587 } 588 589 BPF_CALL_2(bpf_perf_event_read, struct bpf_map *, map, u64, flags) 590 { 591 u64 value = 0; 592 int err; 593 594 err = get_map_perf_counter(map, flags, &value, NULL, NULL); 595 /* 596 * this api is ugly since we miss [-22..-2] range of valid 597 * counter values, but that's uapi 598 */ 599 if (err) 600 return err; 601 return value; 602 } 603 604 const struct bpf_func_proto bpf_perf_event_read_proto = { 605 .func = bpf_perf_event_read, 606 .gpl_only = true, 607 .ret_type = RET_INTEGER, 608 .arg1_type = ARG_CONST_MAP_PTR, 609 .arg2_type = ARG_ANYTHING, 610 }; 611 612 BPF_CALL_4(bpf_perf_event_read_value, struct bpf_map *, map, u64, flags, 613 struct bpf_perf_event_value *, buf, u32, size) 614 { 615 int err = -EINVAL; 616 617 if (unlikely(size != sizeof(struct bpf_perf_event_value))) 618 goto clear; 619 err = get_map_perf_counter(map, flags, &buf->counter, &buf->enabled, 620 &buf->running); 621 if (unlikely(err)) 622 goto clear; 623 return 0; 624 clear: 625 memset(buf, 0, size); 626 return err; 627 } 628 629 static const struct bpf_func_proto bpf_perf_event_read_value_proto = { 630 .func = bpf_perf_event_read_value, 631 .gpl_only = true, 632 .ret_type = RET_INTEGER, 633 .arg1_type = ARG_CONST_MAP_PTR, 634 .arg2_type = ARG_ANYTHING, 635 .arg3_type = ARG_PTR_TO_UNINIT_MEM, 636 .arg4_type = ARG_CONST_SIZE, 637 }; 638 639 const struct bpf_func_proto *bpf_get_perf_event_read_value_proto(void) 640 { 641 return &bpf_perf_event_read_value_proto; 642 } 643 644 static __always_inline u64 645 __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map, 646 u64 flags, struct perf_raw_record *raw, 647 struct perf_sample_data *sd) 648 { 649 struct bpf_array *array = container_of(map, struct bpf_array, map); 650 unsigned int cpu = smp_processor_id(); 651 u64 index = flags & BPF_F_INDEX_MASK; 652 struct bpf_event_entry *ee; 653 struct perf_event *event; 654 655 if (index == BPF_F_CURRENT_CPU) 656 index = cpu; 657 if (unlikely(index >= array->map.max_entries)) 658 return -E2BIG; 659 660 ee = READ_ONCE(array->ptrs[index]); 661 if (!ee) 662 return -ENOENT; 663 664 event = ee->event; 665 if (unlikely(event->attr.type != PERF_TYPE_SOFTWARE || 666 event->attr.config != PERF_COUNT_SW_BPF_OUTPUT)) 667 return -EINVAL; 668 669 if (unlikely(event->oncpu != cpu)) 670 return -EOPNOTSUPP; 671 672 perf_sample_save_raw_data(sd, event, raw); 673 674 return perf_event_output(event, sd, regs); 675 } 676 677 /* 678 * Support executing tracepoints in normal, irq, and nmi context that each call 679 * bpf_perf_event_output 680 */ 681 struct bpf_trace_sample_data { 682 struct perf_sample_data sds[3]; 683 }; 684 685 static DEFINE_PER_CPU(struct bpf_trace_sample_data, bpf_trace_sds); 686 static DEFINE_PER_CPU(int, bpf_trace_nest_level); 687 BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map, 688 u64, flags, void *, data, u64, size) 689 { 690 struct bpf_trace_sample_data *sds; 691 struct perf_raw_record raw = { 692 .frag = { 693 .size = size, 694 .data = data, 695 }, 696 }; 697 struct perf_sample_data *sd; 698 int nest_level, err; 699 700 preempt_disable(); 701 sds = this_cpu_ptr(&bpf_trace_sds); 702 nest_level = this_cpu_inc_return(bpf_trace_nest_level); 703 704 if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(sds->sds))) { 705 err = -EBUSY; 706 goto out; 707 } 708 709 sd = &sds->sds[nest_level - 1]; 710 711 if (unlikely(flags & ~(BPF_F_INDEX_MASK))) { 712 err = -EINVAL; 713 goto out; 714 } 715 716 perf_sample_data_init(sd, 0, 0); 717 718 err = __bpf_perf_event_output(regs, map, flags, &raw, sd); 719 out: 720 this_cpu_dec(bpf_trace_nest_level); 721 preempt_enable(); 722 return err; 723 } 724 725 static const struct bpf_func_proto bpf_perf_event_output_proto = { 726 .func = bpf_perf_event_output, 727 .gpl_only = true, 728 .ret_type = RET_INTEGER, 729 .arg1_type = ARG_PTR_TO_CTX, 730 .arg2_type = ARG_CONST_MAP_PTR, 731 .arg3_type = ARG_ANYTHING, 732 .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY, 733 .arg5_type = ARG_CONST_SIZE_OR_ZERO, 734 }; 735 736 static DEFINE_PER_CPU(int, bpf_event_output_nest_level); 737 struct bpf_nested_pt_regs { 738 struct pt_regs regs[3]; 739 }; 740 static DEFINE_PER_CPU(struct bpf_nested_pt_regs, bpf_pt_regs); 741 static DEFINE_PER_CPU(struct bpf_trace_sample_data, bpf_misc_sds); 742 743 u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, 744 void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy) 745 { 746 struct perf_raw_frag frag = { 747 .copy = ctx_copy, 748 .size = ctx_size, 749 .data = ctx, 750 }; 751 struct perf_raw_record raw = { 752 .frag = { 753 { 754 .next = ctx_size ? &frag : NULL, 755 }, 756 .size = meta_size, 757 .data = meta, 758 }, 759 }; 760 struct perf_sample_data *sd; 761 struct pt_regs *regs; 762 int nest_level; 763 u64 ret; 764 765 preempt_disable(); 766 nest_level = this_cpu_inc_return(bpf_event_output_nest_level); 767 768 if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(bpf_misc_sds.sds))) { 769 ret = -EBUSY; 770 goto out; 771 } 772 sd = this_cpu_ptr(&bpf_misc_sds.sds[nest_level - 1]); 773 regs = this_cpu_ptr(&bpf_pt_regs.regs[nest_level - 1]); 774 775 perf_fetch_caller_regs(regs); 776 perf_sample_data_init(sd, 0, 0); 777 778 ret = __bpf_perf_event_output(regs, map, flags, &raw, sd); 779 out: 780 this_cpu_dec(bpf_event_output_nest_level); 781 preempt_enable(); 782 return ret; 783 } 784 785 BPF_CALL_0(bpf_get_current_task) 786 { 787 return (long) current; 788 } 789 790 const struct bpf_func_proto bpf_get_current_task_proto = { 791 .func = bpf_get_current_task, 792 .gpl_only = true, 793 .ret_type = RET_INTEGER, 794 }; 795 796 BPF_CALL_0(bpf_get_current_task_btf) 797 { 798 return (unsigned long) current; 799 } 800 801 const struct bpf_func_proto bpf_get_current_task_btf_proto = { 802 .func = bpf_get_current_task_btf, 803 .gpl_only = true, 804 .ret_type = RET_PTR_TO_BTF_ID_TRUSTED, 805 .ret_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK], 806 }; 807 808 BPF_CALL_1(bpf_task_pt_regs, struct task_struct *, task) 809 { 810 return (unsigned long) task_pt_regs(task); 811 } 812 813 BTF_ID_LIST_SINGLE(bpf_task_pt_regs_ids, struct, pt_regs) 814 815 const struct bpf_func_proto bpf_task_pt_regs_proto = { 816 .func = bpf_task_pt_regs, 817 .gpl_only = true, 818 .arg1_type = ARG_PTR_TO_BTF_ID, 819 .arg1_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK], 820 .ret_type = RET_PTR_TO_BTF_ID, 821 .ret_btf_id = &bpf_task_pt_regs_ids[0], 822 }; 823 824 struct send_signal_irq_work { 825 struct irq_work irq_work; 826 struct task_struct *task; 827 u32 sig; 828 enum pid_type type; 829 bool has_siginfo; 830 struct kernel_siginfo info; 831 }; 832 833 static DEFINE_PER_CPU(struct send_signal_irq_work, send_signal_work); 834 835 static void do_bpf_send_signal(struct irq_work *entry) 836 { 837 struct send_signal_irq_work *work; 838 struct kernel_siginfo *siginfo; 839 840 work = container_of(entry, struct send_signal_irq_work, irq_work); 841 siginfo = work->has_siginfo ? &work->info : SEND_SIG_PRIV; 842 843 group_send_sig_info(work->sig, siginfo, work->task, work->type); 844 put_task_struct(work->task); 845 } 846 847 static int bpf_send_signal_common(u32 sig, enum pid_type type, struct task_struct *task, u64 value) 848 { 849 struct send_signal_irq_work *work = NULL; 850 struct kernel_siginfo info; 851 struct kernel_siginfo *siginfo; 852 853 if (!task) { 854 task = current; 855 siginfo = SEND_SIG_PRIV; 856 } else { 857 clear_siginfo(&info); 858 info.si_signo = sig; 859 info.si_errno = 0; 860 info.si_code = SI_KERNEL; 861 info.si_pid = 0; 862 info.si_uid = 0; 863 info.si_value.sival_ptr = (void __user __force *)(unsigned long)value; 864 siginfo = &info; 865 } 866 867 /* Similar to bpf_probe_write_user, task needs to be 868 * in a sound condition and kernel memory access be 869 * permitted in order to send signal to the current 870 * task. 871 */ 872 if (unlikely(task->flags & (PF_KTHREAD | PF_EXITING))) 873 return -EPERM; 874 if (unlikely(!nmi_uaccess_okay())) 875 return -EPERM; 876 /* Task should not be pid=1 to avoid kernel panic. */ 877 if (unlikely(is_global_init(task))) 878 return -EPERM; 879 880 if (preempt_count() != 0 || irqs_disabled()) { 881 /* Do an early check on signal validity. Otherwise, 882 * the error is lost in deferred irq_work. 883 */ 884 if (unlikely(!valid_signal(sig))) 885 return -EINVAL; 886 887 work = this_cpu_ptr(&send_signal_work); 888 if (irq_work_is_busy(&work->irq_work)) 889 return -EBUSY; 890 891 /* Add the current task, which is the target of sending signal, 892 * to the irq_work. The current task may change when queued 893 * irq works get executed. 894 */ 895 work->task = get_task_struct(task); 896 work->has_siginfo = siginfo == &info; 897 if (work->has_siginfo) 898 copy_siginfo(&work->info, &info); 899 work->sig = sig; 900 work->type = type; 901 irq_work_queue(&work->irq_work); 902 return 0; 903 } 904 905 return group_send_sig_info(sig, siginfo, task, type); 906 } 907 908 BPF_CALL_1(bpf_send_signal, u32, sig) 909 { 910 return bpf_send_signal_common(sig, PIDTYPE_TGID, NULL, 0); 911 } 912 913 const struct bpf_func_proto bpf_send_signal_proto = { 914 .func = bpf_send_signal, 915 .gpl_only = false, 916 .ret_type = RET_INTEGER, 917 .arg1_type = ARG_ANYTHING, 918 }; 919 920 BPF_CALL_1(bpf_send_signal_thread, u32, sig) 921 { 922 return bpf_send_signal_common(sig, PIDTYPE_PID, NULL, 0); 923 } 924 925 const struct bpf_func_proto bpf_send_signal_thread_proto = { 926 .func = bpf_send_signal_thread, 927 .gpl_only = false, 928 .ret_type = RET_INTEGER, 929 .arg1_type = ARG_ANYTHING, 930 }; 931 932 BPF_CALL_3(bpf_d_path, const struct path *, path, char *, buf, u32, sz) 933 { 934 struct path copy; 935 long len; 936 char *p; 937 938 if (!sz) 939 return 0; 940 941 /* 942 * The path pointer is verified as trusted and safe to use, 943 * but let's double check it's valid anyway to workaround 944 * potentially broken verifier. 945 */ 946 len = copy_from_kernel_nofault(©, path, sizeof(*path)); 947 if (len < 0) 948 return len; 949 950 p = d_path(©, buf, sz); 951 if (IS_ERR(p)) { 952 len = PTR_ERR(p); 953 } else { 954 len = buf + sz - p; 955 memmove(buf, p, len); 956 } 957 958 return len; 959 } 960 961 BTF_SET_START(btf_allowlist_d_path) 962 #ifdef CONFIG_SECURITY 963 BTF_ID(func, security_file_permission) 964 BTF_ID(func, security_inode_getattr) 965 BTF_ID(func, security_file_open) 966 #endif 967 #ifdef CONFIG_SECURITY_PATH 968 BTF_ID(func, security_path_truncate) 969 #endif 970 BTF_ID(func, vfs_truncate) 971 BTF_ID(func, vfs_fallocate) 972 BTF_ID(func, dentry_open) 973 BTF_ID(func, vfs_getattr) 974 BTF_ID(func, filp_close) 975 BTF_SET_END(btf_allowlist_d_path) 976 977 static bool bpf_d_path_allowed(const struct bpf_prog *prog) 978 { 979 if (prog->type == BPF_PROG_TYPE_TRACING && 980 prog->expected_attach_type == BPF_TRACE_ITER) 981 return true; 982 983 if (prog->type == BPF_PROG_TYPE_LSM) 984 return bpf_lsm_is_sleepable_hook(prog->aux->attach_btf_id); 985 986 return btf_id_set_contains(&btf_allowlist_d_path, 987 prog->aux->attach_btf_id); 988 } 989 990 BTF_ID_LIST_SINGLE(bpf_d_path_btf_ids, struct, path) 991 992 static const struct bpf_func_proto bpf_d_path_proto = { 993 .func = bpf_d_path, 994 .gpl_only = false, 995 .ret_type = RET_INTEGER, 996 .arg1_type = ARG_PTR_TO_BTF_ID, 997 .arg1_btf_id = &bpf_d_path_btf_ids[0], 998 .arg2_type = ARG_PTR_TO_MEM | MEM_WRITE, 999 .arg3_type = ARG_CONST_SIZE_OR_ZERO, 1000 .allowed = bpf_d_path_allowed, 1001 }; 1002 1003 #define BTF_F_ALL (BTF_F_COMPACT | BTF_F_NONAME | \ 1004 BTF_F_PTR_RAW | BTF_F_ZERO) 1005 1006 static int bpf_btf_printf_prepare(struct btf_ptr *ptr, u32 btf_ptr_size, 1007 u64 flags, const struct btf **btf, 1008 s32 *btf_id) 1009 { 1010 const struct btf_type *t; 1011 1012 if (unlikely(flags & ~(BTF_F_ALL))) 1013 return -EINVAL; 1014 1015 if (btf_ptr_size != sizeof(struct btf_ptr)) 1016 return -EINVAL; 1017 1018 *btf = bpf_get_btf_vmlinux(); 1019 1020 if (IS_ERR_OR_NULL(*btf)) 1021 return IS_ERR(*btf) ? PTR_ERR(*btf) : -EINVAL; 1022 1023 if (ptr->type_id > 0) 1024 *btf_id = ptr->type_id; 1025 else 1026 return -EINVAL; 1027 1028 if (*btf_id > 0) 1029 t = btf_type_by_id(*btf, *btf_id); 1030 if (*btf_id <= 0 || !t) 1031 return -ENOENT; 1032 1033 return 0; 1034 } 1035 1036 BPF_CALL_5(bpf_snprintf_btf, char *, str, u32, str_size, struct btf_ptr *, ptr, 1037 u32, btf_ptr_size, u64, flags) 1038 { 1039 const struct btf *btf; 1040 s32 btf_id; 1041 int ret; 1042 1043 ret = bpf_btf_printf_prepare(ptr, btf_ptr_size, flags, &btf, &btf_id); 1044 if (ret) 1045 return ret; 1046 1047 return btf_type_snprintf_show(btf, btf_id, ptr->ptr, str, str_size, 1048 flags); 1049 } 1050 1051 const struct bpf_func_proto bpf_snprintf_btf_proto = { 1052 .func = bpf_snprintf_btf, 1053 .gpl_only = false, 1054 .ret_type = RET_INTEGER, 1055 .arg1_type = ARG_PTR_TO_MEM | MEM_WRITE, 1056 .arg2_type = ARG_CONST_SIZE, 1057 .arg3_type = ARG_PTR_TO_MEM | MEM_RDONLY, 1058 .arg4_type = ARG_CONST_SIZE, 1059 .arg5_type = ARG_ANYTHING, 1060 }; 1061 1062 BPF_CALL_1(bpf_get_func_ip_tracing, void *, ctx) 1063 { 1064 /* This helper call is inlined by verifier. */ 1065 return ((u64 *)ctx)[-2]; 1066 } 1067 1068 static const struct bpf_func_proto bpf_get_func_ip_proto_tracing = { 1069 .func = bpf_get_func_ip_tracing, 1070 .gpl_only = true, 1071 .ret_type = RET_INTEGER, 1072 .arg1_type = ARG_PTR_TO_CTX, 1073 }; 1074 1075 static inline unsigned long get_entry_ip(unsigned long fentry_ip) 1076 { 1077 #ifdef CONFIG_X86_KERNEL_IBT 1078 if (is_endbr((void *)(fentry_ip - ENDBR_INSN_SIZE))) 1079 fentry_ip -= ENDBR_INSN_SIZE; 1080 #endif 1081 return fentry_ip; 1082 } 1083 1084 BPF_CALL_1(bpf_get_func_ip_kprobe, struct pt_regs *, regs) 1085 { 1086 struct bpf_trace_run_ctx *run_ctx __maybe_unused; 1087 struct kprobe *kp; 1088 1089 #ifdef CONFIG_UPROBES 1090 run_ctx = container_of(current->bpf_ctx, struct bpf_trace_run_ctx, run_ctx); 1091 if (run_ctx->is_uprobe) 1092 return ((struct uprobe_dispatch_data *)current->utask->vaddr)->bp_addr; 1093 #endif 1094 1095 kp = kprobe_running(); 1096 1097 if (!kp || !(kp->flags & KPROBE_FLAG_ON_FUNC_ENTRY)) 1098 return 0; 1099 1100 return get_entry_ip((uintptr_t)kp->addr); 1101 } 1102 1103 static const struct bpf_func_proto bpf_get_func_ip_proto_kprobe = { 1104 .func = bpf_get_func_ip_kprobe, 1105 .gpl_only = true, 1106 .ret_type = RET_INTEGER, 1107 .arg1_type = ARG_PTR_TO_CTX, 1108 }; 1109 1110 BPF_CALL_1(bpf_get_func_ip_kprobe_multi, struct pt_regs *, regs) 1111 { 1112 return bpf_kprobe_multi_entry_ip(current->bpf_ctx); 1113 } 1114 1115 static const struct bpf_func_proto bpf_get_func_ip_proto_kprobe_multi = { 1116 .func = bpf_get_func_ip_kprobe_multi, 1117 .gpl_only = false, 1118 .ret_type = RET_INTEGER, 1119 .arg1_type = ARG_PTR_TO_CTX, 1120 }; 1121 1122 BPF_CALL_1(bpf_get_attach_cookie_kprobe_multi, struct pt_regs *, regs) 1123 { 1124 return bpf_kprobe_multi_cookie(current->bpf_ctx); 1125 } 1126 1127 static const struct bpf_func_proto bpf_get_attach_cookie_proto_kmulti = { 1128 .func = bpf_get_attach_cookie_kprobe_multi, 1129 .gpl_only = false, 1130 .ret_type = RET_INTEGER, 1131 .arg1_type = ARG_PTR_TO_CTX, 1132 }; 1133 1134 BPF_CALL_1(bpf_get_func_ip_uprobe_multi, struct pt_regs *, regs) 1135 { 1136 return bpf_uprobe_multi_entry_ip(current->bpf_ctx); 1137 } 1138 1139 static const struct bpf_func_proto bpf_get_func_ip_proto_uprobe_multi = { 1140 .func = bpf_get_func_ip_uprobe_multi, 1141 .gpl_only = false, 1142 .ret_type = RET_INTEGER, 1143 .arg1_type = ARG_PTR_TO_CTX, 1144 }; 1145 1146 BPF_CALL_1(bpf_get_attach_cookie_uprobe_multi, struct pt_regs *, regs) 1147 { 1148 return bpf_uprobe_multi_cookie(current->bpf_ctx); 1149 } 1150 1151 static const struct bpf_func_proto bpf_get_attach_cookie_proto_umulti = { 1152 .func = bpf_get_attach_cookie_uprobe_multi, 1153 .gpl_only = false, 1154 .ret_type = RET_INTEGER, 1155 .arg1_type = ARG_PTR_TO_CTX, 1156 }; 1157 1158 BPF_CALL_1(bpf_get_attach_cookie_trace, void *, ctx) 1159 { 1160 struct bpf_trace_run_ctx *run_ctx; 1161 1162 run_ctx = container_of(current->bpf_ctx, struct bpf_trace_run_ctx, run_ctx); 1163 return run_ctx->bpf_cookie; 1164 } 1165 1166 static const struct bpf_func_proto bpf_get_attach_cookie_proto_trace = { 1167 .func = bpf_get_attach_cookie_trace, 1168 .gpl_only = false, 1169 .ret_type = RET_INTEGER, 1170 .arg1_type = ARG_PTR_TO_CTX, 1171 }; 1172 1173 BPF_CALL_1(bpf_get_attach_cookie_pe, struct bpf_perf_event_data_kern *, ctx) 1174 { 1175 return ctx->event->bpf_cookie; 1176 } 1177 1178 static const struct bpf_func_proto bpf_get_attach_cookie_proto_pe = { 1179 .func = bpf_get_attach_cookie_pe, 1180 .gpl_only = false, 1181 .ret_type = RET_INTEGER, 1182 .arg1_type = ARG_PTR_TO_CTX, 1183 }; 1184 1185 BPF_CALL_1(bpf_get_attach_cookie_tracing, void *, ctx) 1186 { 1187 struct bpf_trace_run_ctx *run_ctx; 1188 1189 run_ctx = container_of(current->bpf_ctx, struct bpf_trace_run_ctx, run_ctx); 1190 return run_ctx->bpf_cookie; 1191 } 1192 1193 static const struct bpf_func_proto bpf_get_attach_cookie_proto_tracing = { 1194 .func = bpf_get_attach_cookie_tracing, 1195 .gpl_only = false, 1196 .ret_type = RET_INTEGER, 1197 .arg1_type = ARG_PTR_TO_CTX, 1198 }; 1199 1200 BPF_CALL_3(bpf_get_branch_snapshot, void *, buf, u32, size, u64, flags) 1201 { 1202 static const u32 br_entry_size = sizeof(struct perf_branch_entry); 1203 u32 entry_cnt = size / br_entry_size; 1204 1205 entry_cnt = static_call(perf_snapshot_branch_stack)(buf, entry_cnt); 1206 1207 if (unlikely(flags)) 1208 return -EINVAL; 1209 1210 if (!entry_cnt) 1211 return -ENOENT; 1212 1213 return entry_cnt * br_entry_size; 1214 } 1215 1216 const struct bpf_func_proto bpf_get_branch_snapshot_proto = { 1217 .func = bpf_get_branch_snapshot, 1218 .gpl_only = true, 1219 .ret_type = RET_INTEGER, 1220 .arg1_type = ARG_PTR_TO_UNINIT_MEM, 1221 .arg2_type = ARG_CONST_SIZE_OR_ZERO, 1222 }; 1223 1224 BPF_CALL_3(get_func_arg, void *, ctx, u32, n, u64 *, value) 1225 { 1226 /* This helper call is inlined by verifier. */ 1227 u64 nr_args = ((u64 *)ctx)[-1] & 0xFF; 1228 1229 if ((u64) n >= nr_args) 1230 return -EINVAL; 1231 *value = ((u64 *)ctx)[n]; 1232 return 0; 1233 } 1234 1235 static const struct bpf_func_proto bpf_get_func_arg_proto = { 1236 .func = get_func_arg, 1237 .ret_type = RET_INTEGER, 1238 .arg1_type = ARG_PTR_TO_CTX, 1239 .arg2_type = ARG_ANYTHING, 1240 .arg3_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_WRITE | MEM_ALIGNED, 1241 .arg3_size = sizeof(u64), 1242 }; 1243 1244 BPF_CALL_2(get_func_ret, void *, ctx, u64 *, value) 1245 { 1246 /* This helper call is inlined by verifier. */ 1247 u64 nr_args = ((u64 *)ctx)[-1] & 0xFF; 1248 1249 *value = ((u64 *)ctx)[nr_args]; 1250 return 0; 1251 } 1252 1253 static const struct bpf_func_proto bpf_get_func_ret_proto = { 1254 .func = get_func_ret, 1255 .ret_type = RET_INTEGER, 1256 .arg1_type = ARG_PTR_TO_CTX, 1257 .arg2_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_WRITE | MEM_ALIGNED, 1258 .arg2_size = sizeof(u64), 1259 }; 1260 1261 BPF_CALL_1(get_func_arg_cnt, void *, ctx) 1262 { 1263 /* This helper call is inlined by verifier. */ 1264 return ((u64 *)ctx)[-1] & 0xFF; 1265 } 1266 1267 static const struct bpf_func_proto bpf_get_func_arg_cnt_proto = { 1268 .func = get_func_arg_cnt, 1269 .ret_type = RET_INTEGER, 1270 .arg1_type = ARG_PTR_TO_CTX, 1271 }; 1272 1273 static const struct bpf_func_proto * 1274 bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 1275 { 1276 const struct bpf_func_proto *func_proto; 1277 1278 switch (func_id) { 1279 case BPF_FUNC_get_smp_processor_id: 1280 return &bpf_get_smp_processor_id_proto; 1281 #ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE 1282 case BPF_FUNC_probe_read: 1283 return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ? 1284 NULL : &bpf_probe_read_compat_proto; 1285 case BPF_FUNC_probe_read_str: 1286 return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ? 1287 NULL : &bpf_probe_read_compat_str_proto; 1288 #endif 1289 case BPF_FUNC_get_func_ip: 1290 return &bpf_get_func_ip_proto_tracing; 1291 default: 1292 break; 1293 } 1294 1295 func_proto = bpf_base_func_proto(func_id, prog); 1296 if (func_proto) 1297 return func_proto; 1298 1299 if (!bpf_token_capable(prog->aux->token, CAP_SYS_ADMIN)) 1300 return NULL; 1301 1302 switch (func_id) { 1303 case BPF_FUNC_probe_write_user: 1304 return security_locked_down(LOCKDOWN_BPF_WRITE_USER) < 0 ? 1305 NULL : &bpf_probe_write_user_proto; 1306 default: 1307 return NULL; 1308 } 1309 } 1310 1311 static bool is_kprobe_multi(const struct bpf_prog *prog) 1312 { 1313 return prog->expected_attach_type == BPF_TRACE_KPROBE_MULTI || 1314 prog->expected_attach_type == BPF_TRACE_KPROBE_SESSION; 1315 } 1316 1317 static inline bool is_kprobe_session(const struct bpf_prog *prog) 1318 { 1319 return prog->type == BPF_PROG_TYPE_KPROBE && 1320 prog->expected_attach_type == BPF_TRACE_KPROBE_SESSION; 1321 } 1322 1323 static inline bool is_uprobe_multi(const struct bpf_prog *prog) 1324 { 1325 return prog->expected_attach_type == BPF_TRACE_UPROBE_MULTI || 1326 prog->expected_attach_type == BPF_TRACE_UPROBE_SESSION; 1327 } 1328 1329 static inline bool is_uprobe_session(const struct bpf_prog *prog) 1330 { 1331 return prog->type == BPF_PROG_TYPE_KPROBE && 1332 prog->expected_attach_type == BPF_TRACE_UPROBE_SESSION; 1333 } 1334 1335 static inline bool is_trace_fsession(const struct bpf_prog *prog) 1336 { 1337 return prog->type == BPF_PROG_TYPE_TRACING && 1338 (prog->expected_attach_type == BPF_TRACE_FSESSION || 1339 prog->expected_attach_type == BPF_TRACE_FSESSION_MULTI); 1340 } 1341 1342 static const struct bpf_func_proto * 1343 kprobe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 1344 { 1345 switch (func_id) { 1346 case BPF_FUNC_perf_event_output: 1347 return &bpf_perf_event_output_proto; 1348 case BPF_FUNC_get_stackid: 1349 return &bpf_get_stackid_proto; 1350 case BPF_FUNC_get_stack: 1351 return prog->sleepable ? &bpf_get_stack_sleepable_proto : &bpf_get_stack_proto; 1352 #ifdef CONFIG_BPF_KPROBE_OVERRIDE 1353 case BPF_FUNC_override_return: 1354 return &bpf_override_return_proto; 1355 #endif 1356 case BPF_FUNC_get_func_ip: 1357 if (is_kprobe_multi(prog)) 1358 return &bpf_get_func_ip_proto_kprobe_multi; 1359 if (is_uprobe_multi(prog)) 1360 return &bpf_get_func_ip_proto_uprobe_multi; 1361 return &bpf_get_func_ip_proto_kprobe; 1362 case BPF_FUNC_get_attach_cookie: 1363 if (is_kprobe_multi(prog)) 1364 return &bpf_get_attach_cookie_proto_kmulti; 1365 if (is_uprobe_multi(prog)) 1366 return &bpf_get_attach_cookie_proto_umulti; 1367 return &bpf_get_attach_cookie_proto_trace; 1368 default: 1369 return bpf_tracing_func_proto(func_id, prog); 1370 } 1371 } 1372 1373 /* bpf+kprobe programs can access fields of 'struct pt_regs' */ 1374 static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type type, 1375 const struct bpf_prog *prog, 1376 struct bpf_insn_access_aux *info) 1377 { 1378 if (off < 0 || off >= sizeof(struct pt_regs)) 1379 return false; 1380 if (off % size != 0) 1381 return false; 1382 /* 1383 * Assertion for 32 bit to make sure last 8 byte access 1384 * (BPF_DW) to the last 4 byte member is disallowed. 1385 */ 1386 if (off + size > sizeof(struct pt_regs)) 1387 return false; 1388 1389 if (type == BPF_WRITE) 1390 prog->aux->kprobe_write_ctx = true; 1391 1392 return true; 1393 } 1394 1395 const struct bpf_verifier_ops kprobe_verifier_ops = { 1396 .get_func_proto = kprobe_prog_func_proto, 1397 .is_valid_access = kprobe_prog_is_valid_access, 1398 }; 1399 1400 const struct bpf_prog_ops kprobe_prog_ops = { 1401 }; 1402 1403 BPF_CALL_5(bpf_perf_event_output_tp, void *, tp_buff, struct bpf_map *, map, 1404 u64, flags, void *, data, u64, size) 1405 { 1406 struct pt_regs *regs = *(struct pt_regs **)tp_buff; 1407 1408 /* 1409 * r1 points to perf tracepoint buffer where first 8 bytes are hidden 1410 * from bpf program and contain a pointer to 'struct pt_regs'. Fetch it 1411 * from there and call the same bpf_perf_event_output() helper inline. 1412 */ 1413 return ____bpf_perf_event_output(regs, map, flags, data, size); 1414 } 1415 1416 static const struct bpf_func_proto bpf_perf_event_output_proto_tp = { 1417 .func = bpf_perf_event_output_tp, 1418 .gpl_only = true, 1419 .ret_type = RET_INTEGER, 1420 .arg1_type = ARG_PTR_TO_CTX, 1421 .arg2_type = ARG_CONST_MAP_PTR, 1422 .arg3_type = ARG_ANYTHING, 1423 .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY, 1424 .arg5_type = ARG_CONST_SIZE_OR_ZERO, 1425 }; 1426 1427 BPF_CALL_3(bpf_get_stackid_tp, void *, tp_buff, struct bpf_map *, map, 1428 u64, flags) 1429 { 1430 struct pt_regs *regs = *(struct pt_regs **)tp_buff; 1431 1432 /* 1433 * Same comment as in bpf_perf_event_output_tp(), only that this time 1434 * the other helper's function body cannot be inlined due to being 1435 * external, thus we need to call raw helper function. 1436 */ 1437 return bpf_get_stackid((unsigned long) regs, (unsigned long) map, 1438 flags, 0, 0); 1439 } 1440 1441 static const struct bpf_func_proto bpf_get_stackid_proto_tp = { 1442 .func = bpf_get_stackid_tp, 1443 .gpl_only = true, 1444 .ret_type = RET_INTEGER, 1445 .arg1_type = ARG_PTR_TO_CTX, 1446 .arg2_type = ARG_CONST_MAP_PTR, 1447 .arg3_type = ARG_ANYTHING, 1448 }; 1449 1450 BPF_CALL_4(bpf_get_stack_tp, void *, tp_buff, void *, buf, u32, size, 1451 u64, flags) 1452 { 1453 struct pt_regs *regs = *(struct pt_regs **)tp_buff; 1454 1455 return bpf_get_stack((unsigned long) regs, (unsigned long) buf, 1456 (unsigned long) size, flags, 0); 1457 } 1458 1459 static const struct bpf_func_proto bpf_get_stack_proto_tp = { 1460 .func = bpf_get_stack_tp, 1461 .gpl_only = true, 1462 .ret_type = RET_INTEGER, 1463 .arg1_type = ARG_PTR_TO_CTX, 1464 .arg2_type = ARG_PTR_TO_UNINIT_MEM, 1465 .arg3_type = ARG_CONST_SIZE_OR_ZERO, 1466 .arg4_type = ARG_ANYTHING, 1467 }; 1468 1469 static const struct bpf_func_proto * 1470 tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 1471 { 1472 switch (func_id) { 1473 case BPF_FUNC_perf_event_output: 1474 return &bpf_perf_event_output_proto_tp; 1475 case BPF_FUNC_get_stackid: 1476 return &bpf_get_stackid_proto_tp; 1477 case BPF_FUNC_get_stack: 1478 return &bpf_get_stack_proto_tp; 1479 case BPF_FUNC_get_attach_cookie: 1480 return &bpf_get_attach_cookie_proto_trace; 1481 default: 1482 return bpf_tracing_func_proto(func_id, prog); 1483 } 1484 } 1485 1486 static bool tp_prog_is_valid_access(int off, int size, enum bpf_access_type type, 1487 const struct bpf_prog *prog, 1488 struct bpf_insn_access_aux *info) 1489 { 1490 if (off < sizeof(void *) || off >= PERF_MAX_TRACE_SIZE) 1491 return false; 1492 if (type != BPF_READ) 1493 return false; 1494 if (off % size != 0) 1495 return false; 1496 1497 BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(__u64)); 1498 return true; 1499 } 1500 1501 const struct bpf_verifier_ops tracepoint_verifier_ops = { 1502 .get_func_proto = tp_prog_func_proto, 1503 .is_valid_access = tp_prog_is_valid_access, 1504 }; 1505 1506 const struct bpf_prog_ops tracepoint_prog_ops = { 1507 }; 1508 1509 BPF_CALL_3(bpf_perf_prog_read_value, struct bpf_perf_event_data_kern *, ctx, 1510 struct bpf_perf_event_value *, buf, u32, size) 1511 { 1512 int err = -EINVAL; 1513 1514 if (unlikely(size != sizeof(struct bpf_perf_event_value))) 1515 goto clear; 1516 err = perf_event_read_local(ctx->event, &buf->counter, &buf->enabled, 1517 &buf->running); 1518 if (unlikely(err)) 1519 goto clear; 1520 return 0; 1521 clear: 1522 memset(buf, 0, size); 1523 return err; 1524 } 1525 1526 static const struct bpf_func_proto bpf_perf_prog_read_value_proto = { 1527 .func = bpf_perf_prog_read_value, 1528 .gpl_only = true, 1529 .ret_type = RET_INTEGER, 1530 .arg1_type = ARG_PTR_TO_CTX, 1531 .arg2_type = ARG_PTR_TO_UNINIT_MEM, 1532 .arg3_type = ARG_CONST_SIZE, 1533 }; 1534 1535 BPF_CALL_4(bpf_read_branch_records, struct bpf_perf_event_data_kern *, ctx, 1536 void *, buf, u32, size, u64, flags) 1537 { 1538 static const u32 br_entry_size = sizeof(struct perf_branch_entry); 1539 struct perf_branch_stack *br_stack = ctx->data->br_stack; 1540 u32 to_copy; 1541 1542 if (unlikely(flags & ~BPF_F_GET_BRANCH_RECORDS_SIZE)) 1543 return -EINVAL; 1544 1545 if (unlikely(!(ctx->data->sample_flags & PERF_SAMPLE_BRANCH_STACK))) 1546 return -ENOENT; 1547 1548 if (unlikely(!br_stack)) 1549 return -ENOENT; 1550 1551 if (flags & BPF_F_GET_BRANCH_RECORDS_SIZE) 1552 return br_stack->nr * br_entry_size; 1553 1554 if (!buf || (size % br_entry_size != 0)) 1555 return -EINVAL; 1556 1557 to_copy = min_t(u32, br_stack->nr * br_entry_size, size); 1558 memcpy(buf, br_stack->entries, to_copy); 1559 1560 return to_copy; 1561 } 1562 1563 static const struct bpf_func_proto bpf_read_branch_records_proto = { 1564 .func = bpf_read_branch_records, 1565 .gpl_only = true, 1566 .ret_type = RET_INTEGER, 1567 .arg1_type = ARG_PTR_TO_CTX, 1568 .arg2_type = ARG_PTR_TO_MEM_OR_NULL | MEM_WRITE, 1569 .arg3_type = ARG_CONST_SIZE_OR_ZERO, 1570 .arg4_type = ARG_ANYTHING, 1571 }; 1572 1573 static const struct bpf_func_proto * 1574 pe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 1575 { 1576 switch (func_id) { 1577 case BPF_FUNC_perf_event_output: 1578 return &bpf_perf_event_output_proto_tp; 1579 case BPF_FUNC_get_stackid: 1580 return &bpf_get_stackid_proto_pe; 1581 case BPF_FUNC_get_stack: 1582 return &bpf_get_stack_proto_pe; 1583 case BPF_FUNC_perf_prog_read_value: 1584 return &bpf_perf_prog_read_value_proto; 1585 case BPF_FUNC_read_branch_records: 1586 return &bpf_read_branch_records_proto; 1587 case BPF_FUNC_get_attach_cookie: 1588 return &bpf_get_attach_cookie_proto_pe; 1589 default: 1590 return bpf_tracing_func_proto(func_id, prog); 1591 } 1592 } 1593 1594 /* 1595 * bpf_raw_tp_regs are separate from bpf_pt_regs used from skb/xdp 1596 * to avoid potential recursive reuse issue when/if tracepoints are added 1597 * inside bpf_*_event_output, bpf_get_stackid and/or bpf_get_stack. 1598 * 1599 * Since raw tracepoints run despite bpf_prog_active, support concurrent usage 1600 * in normal, irq, and nmi context. 1601 */ 1602 struct bpf_raw_tp_regs { 1603 struct pt_regs regs[3]; 1604 }; 1605 static DEFINE_PER_CPU(struct bpf_raw_tp_regs, bpf_raw_tp_regs); 1606 static DEFINE_PER_CPU(int, bpf_raw_tp_nest_level); 1607 static struct pt_regs *get_bpf_raw_tp_regs(void) 1608 { 1609 struct bpf_raw_tp_regs *tp_regs = this_cpu_ptr(&bpf_raw_tp_regs); 1610 int nest_level = this_cpu_inc_return(bpf_raw_tp_nest_level); 1611 1612 if (nest_level > ARRAY_SIZE(tp_regs->regs)) { 1613 this_cpu_dec(bpf_raw_tp_nest_level); 1614 return ERR_PTR(-EBUSY); 1615 } 1616 1617 return &tp_regs->regs[nest_level - 1]; 1618 } 1619 1620 static void put_bpf_raw_tp_regs(void) 1621 { 1622 this_cpu_dec(bpf_raw_tp_nest_level); 1623 } 1624 1625 BPF_CALL_5(bpf_perf_event_output_raw_tp, struct bpf_raw_tracepoint_args *, args, 1626 struct bpf_map *, map, u64, flags, void *, data, u64, size) 1627 { 1628 struct pt_regs *regs = get_bpf_raw_tp_regs(); 1629 int ret; 1630 1631 if (IS_ERR(regs)) 1632 return PTR_ERR(regs); 1633 1634 perf_fetch_caller_regs(regs); 1635 ret = ____bpf_perf_event_output(regs, map, flags, data, size); 1636 1637 put_bpf_raw_tp_regs(); 1638 return ret; 1639 } 1640 1641 static const struct bpf_func_proto bpf_perf_event_output_proto_raw_tp = { 1642 .func = bpf_perf_event_output_raw_tp, 1643 .gpl_only = true, 1644 .ret_type = RET_INTEGER, 1645 .arg1_type = ARG_PTR_TO_CTX, 1646 .arg2_type = ARG_CONST_MAP_PTR, 1647 .arg3_type = ARG_ANYTHING, 1648 .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY, 1649 .arg5_type = ARG_CONST_SIZE_OR_ZERO, 1650 }; 1651 1652 extern const struct bpf_func_proto bpf_skb_output_proto; 1653 extern const struct bpf_func_proto bpf_xdp_output_proto; 1654 extern const struct bpf_func_proto bpf_xdp_get_buff_len_trace_proto; 1655 1656 BPF_CALL_3(bpf_get_stackid_raw_tp, struct bpf_raw_tracepoint_args *, args, 1657 struct bpf_map *, map, u64, flags) 1658 { 1659 struct pt_regs *regs = get_bpf_raw_tp_regs(); 1660 int ret; 1661 1662 if (IS_ERR(regs)) 1663 return PTR_ERR(regs); 1664 1665 perf_fetch_caller_regs(regs); 1666 /* similar to bpf_perf_event_output_tp, but pt_regs fetched differently */ 1667 ret = bpf_get_stackid((unsigned long) regs, (unsigned long) map, 1668 flags, 0, 0); 1669 put_bpf_raw_tp_regs(); 1670 return ret; 1671 } 1672 1673 static const struct bpf_func_proto bpf_get_stackid_proto_raw_tp = { 1674 .func = bpf_get_stackid_raw_tp, 1675 .gpl_only = true, 1676 .ret_type = RET_INTEGER, 1677 .arg1_type = ARG_PTR_TO_CTX, 1678 .arg2_type = ARG_CONST_MAP_PTR, 1679 .arg3_type = ARG_ANYTHING, 1680 }; 1681 1682 BPF_CALL_4(bpf_get_stack_raw_tp, struct bpf_raw_tracepoint_args *, args, 1683 void *, buf, u32, size, u64, flags) 1684 { 1685 struct pt_regs *regs = get_bpf_raw_tp_regs(); 1686 int ret; 1687 1688 if (IS_ERR(regs)) 1689 return PTR_ERR(regs); 1690 1691 perf_fetch_caller_regs(regs); 1692 ret = bpf_get_stack((unsigned long) regs, (unsigned long) buf, 1693 (unsigned long) size, flags, 0); 1694 put_bpf_raw_tp_regs(); 1695 return ret; 1696 } 1697 1698 static const struct bpf_func_proto bpf_get_stack_proto_raw_tp = { 1699 .func = bpf_get_stack_raw_tp, 1700 .gpl_only = true, 1701 .ret_type = RET_INTEGER, 1702 .arg1_type = ARG_PTR_TO_CTX, 1703 .arg2_type = ARG_PTR_TO_UNINIT_MEM, 1704 .arg3_type = ARG_CONST_SIZE_OR_ZERO, 1705 .arg4_type = ARG_ANYTHING, 1706 }; 1707 1708 static const struct bpf_func_proto * 1709 raw_tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 1710 { 1711 switch (func_id) { 1712 case BPF_FUNC_perf_event_output: 1713 return &bpf_perf_event_output_proto_raw_tp; 1714 case BPF_FUNC_get_stackid: 1715 return &bpf_get_stackid_proto_raw_tp; 1716 case BPF_FUNC_get_stack: 1717 return &bpf_get_stack_proto_raw_tp; 1718 case BPF_FUNC_get_attach_cookie: 1719 return &bpf_get_attach_cookie_proto_tracing; 1720 default: 1721 return bpf_tracing_func_proto(func_id, prog); 1722 } 1723 } 1724 1725 const struct bpf_func_proto * 1726 tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 1727 { 1728 const struct bpf_func_proto *fn; 1729 1730 switch (func_id) { 1731 #ifdef CONFIG_NET 1732 case BPF_FUNC_skb_output: 1733 return &bpf_skb_output_proto; 1734 case BPF_FUNC_xdp_output: 1735 return &bpf_xdp_output_proto; 1736 case BPF_FUNC_skc_to_tcp6_sock: 1737 return &bpf_skc_to_tcp6_sock_proto; 1738 case BPF_FUNC_skc_to_tcp_sock: 1739 return &bpf_skc_to_tcp_sock_proto; 1740 case BPF_FUNC_skc_to_tcp_timewait_sock: 1741 return &bpf_skc_to_tcp_timewait_sock_proto; 1742 case BPF_FUNC_skc_to_tcp_request_sock: 1743 return &bpf_skc_to_tcp_request_sock_proto; 1744 case BPF_FUNC_skc_to_udp6_sock: 1745 return &bpf_skc_to_udp6_sock_proto; 1746 case BPF_FUNC_skc_to_unix_sock: 1747 return &bpf_skc_to_unix_sock_proto; 1748 case BPF_FUNC_skc_to_mptcp_sock: 1749 return &bpf_skc_to_mptcp_sock_proto; 1750 case BPF_FUNC_sk_storage_get: 1751 return &bpf_sk_storage_get_tracing_proto; 1752 case BPF_FUNC_sk_storage_delete: 1753 return &bpf_sk_storage_delete_tracing_proto; 1754 case BPF_FUNC_sock_from_file: 1755 return &bpf_sock_from_file_proto; 1756 case BPF_FUNC_get_socket_cookie: 1757 return &bpf_get_socket_ptr_cookie_proto; 1758 case BPF_FUNC_xdp_get_buff_len: 1759 return &bpf_xdp_get_buff_len_trace_proto; 1760 #endif 1761 case BPF_FUNC_seq_printf: 1762 return prog->expected_attach_type == BPF_TRACE_ITER ? 1763 &bpf_seq_printf_proto : 1764 NULL; 1765 case BPF_FUNC_seq_write: 1766 return prog->expected_attach_type == BPF_TRACE_ITER ? 1767 &bpf_seq_write_proto : 1768 NULL; 1769 case BPF_FUNC_seq_printf_btf: 1770 return prog->expected_attach_type == BPF_TRACE_ITER ? 1771 &bpf_seq_printf_btf_proto : 1772 NULL; 1773 case BPF_FUNC_d_path: 1774 return &bpf_d_path_proto; 1775 case BPF_FUNC_get_func_arg: 1776 if (bpf_prog_has_trampoline(prog) || 1777 prog->expected_attach_type == BPF_TRACE_RAW_TP) 1778 return &bpf_get_func_arg_proto; 1779 return NULL; 1780 case BPF_FUNC_get_func_ret: 1781 return bpf_prog_has_trampoline(prog) ? &bpf_get_func_ret_proto : NULL; 1782 case BPF_FUNC_get_func_arg_cnt: 1783 if (bpf_prog_has_trampoline(prog) || 1784 prog->expected_attach_type == BPF_TRACE_RAW_TP) 1785 return &bpf_get_func_arg_cnt_proto; 1786 return NULL; 1787 case BPF_FUNC_get_attach_cookie: 1788 if (prog->type == BPF_PROG_TYPE_TRACING && 1789 prog->expected_attach_type == BPF_TRACE_RAW_TP) 1790 return &bpf_get_attach_cookie_proto_tracing; 1791 return bpf_prog_has_trampoline(prog) ? &bpf_get_attach_cookie_proto_tracing : NULL; 1792 default: 1793 fn = raw_tp_prog_func_proto(func_id, prog); 1794 if (!fn && prog->expected_attach_type == BPF_TRACE_ITER) 1795 fn = bpf_iter_get_func_proto(func_id, prog); 1796 return fn; 1797 } 1798 } 1799 1800 static bool raw_tp_prog_is_valid_access(int off, int size, 1801 enum bpf_access_type type, 1802 const struct bpf_prog *prog, 1803 struct bpf_insn_access_aux *info) 1804 { 1805 return bpf_tracing_ctx_access(off, size, type); 1806 } 1807 1808 static bool tracing_prog_is_valid_access(int off, int size, 1809 enum bpf_access_type type, 1810 const struct bpf_prog *prog, 1811 struct bpf_insn_access_aux *info) 1812 { 1813 return bpf_tracing_btf_ctx_access(off, size, type, prog, info); 1814 } 1815 1816 int __weak bpf_prog_test_run_tracing(struct bpf_prog *prog, 1817 const union bpf_attr *kattr, 1818 union bpf_attr __user *uattr) 1819 { 1820 return -ENOTSUPP; 1821 } 1822 1823 const struct bpf_verifier_ops raw_tracepoint_verifier_ops = { 1824 .get_func_proto = raw_tp_prog_func_proto, 1825 .is_valid_access = raw_tp_prog_is_valid_access, 1826 }; 1827 1828 const struct bpf_prog_ops raw_tracepoint_prog_ops = { 1829 #ifdef CONFIG_NET 1830 .test_run = bpf_prog_test_run_raw_tp, 1831 #endif 1832 }; 1833 1834 const struct bpf_verifier_ops tracing_verifier_ops = { 1835 .get_func_proto = tracing_prog_func_proto, 1836 .is_valid_access = tracing_prog_is_valid_access, 1837 }; 1838 1839 const struct bpf_prog_ops tracing_prog_ops = { 1840 .test_run = bpf_prog_test_run_tracing, 1841 }; 1842 1843 static bool raw_tp_writable_prog_is_valid_access(int off, int size, 1844 enum bpf_access_type type, 1845 const struct bpf_prog *prog, 1846 struct bpf_insn_access_aux *info) 1847 { 1848 if (off == 0) { 1849 if (size != sizeof(u64) || type != BPF_READ) 1850 return false; 1851 info->reg_type = PTR_TO_TP_BUFFER; 1852 } 1853 return raw_tp_prog_is_valid_access(off, size, type, prog, info); 1854 } 1855 1856 const struct bpf_verifier_ops raw_tracepoint_writable_verifier_ops = { 1857 .get_func_proto = raw_tp_prog_func_proto, 1858 .is_valid_access = raw_tp_writable_prog_is_valid_access, 1859 }; 1860 1861 const struct bpf_prog_ops raw_tracepoint_writable_prog_ops = { 1862 }; 1863 1864 static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type, 1865 const struct bpf_prog *prog, 1866 struct bpf_insn_access_aux *info) 1867 { 1868 const int size_u64 = sizeof(u64); 1869 1870 if (off < 0 || off >= sizeof(struct bpf_perf_event_data)) 1871 return false; 1872 if (type != BPF_READ) 1873 return false; 1874 if (off % size != 0) { 1875 if (sizeof(unsigned long) != 4) 1876 return false; 1877 if (size != 8) 1878 return false; 1879 if (off % size != 4) 1880 return false; 1881 } 1882 1883 switch (off) { 1884 case bpf_ctx_range(struct bpf_perf_event_data, sample_period): 1885 bpf_ctx_record_field_size(info, size_u64); 1886 if (!bpf_ctx_narrow_access_ok(off, size, size_u64)) 1887 return false; 1888 break; 1889 case bpf_ctx_range(struct bpf_perf_event_data, addr): 1890 bpf_ctx_record_field_size(info, size_u64); 1891 if (!bpf_ctx_narrow_access_ok(off, size, size_u64)) 1892 return false; 1893 break; 1894 default: 1895 if (size != sizeof(long)) 1896 return false; 1897 } 1898 1899 return true; 1900 } 1901 1902 static u32 pe_prog_convert_ctx_access(enum bpf_access_type type, 1903 const struct bpf_insn *si, 1904 struct bpf_insn *insn_buf, 1905 struct bpf_prog *prog, u32 *target_size) 1906 { 1907 struct bpf_insn *insn = insn_buf; 1908 1909 switch (si->off) { 1910 case offsetof(struct bpf_perf_event_data, sample_period): 1911 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern, 1912 data), si->dst_reg, si->src_reg, 1913 offsetof(struct bpf_perf_event_data_kern, data)); 1914 *insn++ = BPF_LDX_MEM(BPF_DW, si->dst_reg, si->dst_reg, 1915 bpf_target_off(struct perf_sample_data, period, 8, 1916 target_size)); 1917 break; 1918 case offsetof(struct bpf_perf_event_data, addr): 1919 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern, 1920 data), si->dst_reg, si->src_reg, 1921 offsetof(struct bpf_perf_event_data_kern, data)); 1922 *insn++ = BPF_LDX_MEM(BPF_DW, si->dst_reg, si->dst_reg, 1923 bpf_target_off(struct perf_sample_data, addr, 8, 1924 target_size)); 1925 break; 1926 default: 1927 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern, 1928 regs), si->dst_reg, si->src_reg, 1929 offsetof(struct bpf_perf_event_data_kern, regs)); 1930 *insn++ = BPF_LDX_MEM(BPF_SIZEOF(long), si->dst_reg, si->dst_reg, 1931 si->off); 1932 break; 1933 } 1934 1935 return insn - insn_buf; 1936 } 1937 1938 const struct bpf_verifier_ops perf_event_verifier_ops = { 1939 .get_func_proto = pe_prog_func_proto, 1940 .is_valid_access = pe_prog_is_valid_access, 1941 .convert_ctx_access = pe_prog_convert_ctx_access, 1942 }; 1943 1944 const struct bpf_prog_ops perf_event_prog_ops = { 1945 }; 1946 1947 static DEFINE_MUTEX(bpf_event_mutex); 1948 1949 #define BPF_TRACE_MAX_PROGS 64 1950 1951 int perf_event_attach_bpf_prog(struct perf_event *event, 1952 struct bpf_prog *prog, 1953 u64 bpf_cookie) 1954 { 1955 struct bpf_prog_array *old_array; 1956 struct bpf_prog_array *new_array; 1957 int ret = -EEXIST; 1958 1959 /* 1960 * Kprobe override only works if they are on the function entry, 1961 * and only if they are on the opt-in list. 1962 */ 1963 if (prog->kprobe_override && 1964 (!trace_kprobe_on_func_entry(event->tp_event) || 1965 !trace_kprobe_error_injectable(event->tp_event))) 1966 return -EINVAL; 1967 1968 mutex_lock(&bpf_event_mutex); 1969 1970 if (event->prog) 1971 goto unlock; 1972 1973 old_array = bpf_event_rcu_dereference(event->tp_event->prog_array); 1974 if (old_array && 1975 bpf_prog_array_length(old_array) >= BPF_TRACE_MAX_PROGS) { 1976 ret = -E2BIG; 1977 goto unlock; 1978 } 1979 1980 ret = bpf_prog_array_copy(old_array, NULL, prog, bpf_cookie, &new_array); 1981 if (ret < 0) 1982 goto unlock; 1983 1984 /* set the new array to event->tp_event and set event->prog */ 1985 event->prog = prog; 1986 event->bpf_cookie = bpf_cookie; 1987 rcu_assign_pointer(event->tp_event->prog_array, new_array); 1988 bpf_prog_array_free_sleepable(old_array); 1989 1990 unlock: 1991 mutex_unlock(&bpf_event_mutex); 1992 return ret; 1993 } 1994 1995 void perf_event_detach_bpf_prog(struct perf_event *event) 1996 { 1997 struct bpf_prog_array *old_array; 1998 struct bpf_prog_array *new_array; 1999 struct bpf_prog *prog = NULL; 2000 int ret; 2001 2002 mutex_lock(&bpf_event_mutex); 2003 2004 if (!event->prog) 2005 goto unlock; 2006 2007 old_array = bpf_event_rcu_dereference(event->tp_event->prog_array); 2008 if (!old_array) 2009 goto put; 2010 2011 ret = bpf_prog_array_copy(old_array, event->prog, NULL, 0, &new_array); 2012 if (ret < 0) { 2013 bpf_prog_array_delete_safe(old_array, event->prog); 2014 } else { 2015 rcu_assign_pointer(event->tp_event->prog_array, new_array); 2016 bpf_prog_array_free_sleepable(old_array); 2017 } 2018 2019 put: 2020 prog = event->prog; 2021 event->prog = NULL; 2022 2023 unlock: 2024 mutex_unlock(&bpf_event_mutex); 2025 2026 if (prog) { 2027 /* 2028 * It could be that the bpf_prog is not sleepable (and will be freed 2029 * via normal RCU), but is called from a point that supports sleepable 2030 * programs and uses tasks-trace-RCU. 2031 */ 2032 synchronize_rcu_tasks_trace(); 2033 2034 bpf_prog_put(prog); 2035 } 2036 } 2037 2038 int perf_event_query_prog_array(struct perf_event *event, void __user *info) 2039 { 2040 struct perf_event_query_bpf __user *uquery = info; 2041 struct perf_event_query_bpf query = {}; 2042 struct bpf_prog_array *progs; 2043 u32 *ids, prog_cnt, ids_len; 2044 int ret; 2045 2046 if (!perfmon_capable()) 2047 return -EPERM; 2048 if (event->attr.type != PERF_TYPE_TRACEPOINT) 2049 return -EINVAL; 2050 if (copy_from_user(&query, uquery, sizeof(query))) 2051 return -EFAULT; 2052 2053 ids_len = query.ids_len; 2054 if (ids_len > BPF_TRACE_MAX_PROGS) 2055 return -E2BIG; 2056 ids = kcalloc(ids_len, sizeof(u32), GFP_USER | __GFP_NOWARN); 2057 if (!ids) 2058 return -ENOMEM; 2059 /* 2060 * The above kcalloc returns ZERO_SIZE_PTR when ids_len = 0, which 2061 * is required when user only wants to check for uquery->prog_cnt. 2062 * There is no need to check for it since the case is handled 2063 * gracefully in bpf_prog_array_copy_info. 2064 */ 2065 2066 mutex_lock(&bpf_event_mutex); 2067 progs = bpf_event_rcu_dereference(event->tp_event->prog_array); 2068 ret = bpf_prog_array_copy_info(progs, ids, ids_len, &prog_cnt); 2069 mutex_unlock(&bpf_event_mutex); 2070 2071 if (copy_to_user(&uquery->prog_cnt, &prog_cnt, sizeof(prog_cnt)) || 2072 copy_to_user(uquery->ids, ids, ids_len * sizeof(u32))) 2073 ret = -EFAULT; 2074 2075 kfree(ids); 2076 return ret; 2077 } 2078 2079 extern struct bpf_raw_event_map __start__bpf_raw_tp[]; 2080 extern struct bpf_raw_event_map __stop__bpf_raw_tp[]; 2081 2082 struct bpf_raw_event_map *bpf_get_raw_tracepoint(const char *name) 2083 { 2084 struct bpf_raw_event_map *btp = __start__bpf_raw_tp; 2085 2086 for (; btp < __stop__bpf_raw_tp; btp++) { 2087 if (!strcmp(btp->tp->name, name)) 2088 return btp; 2089 } 2090 2091 return bpf_get_raw_tracepoint_module(name); 2092 } 2093 2094 void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp) 2095 { 2096 struct module *mod; 2097 2098 guard(rcu)(); 2099 mod = __module_address((unsigned long)btp); 2100 module_put(mod); 2101 } 2102 2103 static __always_inline 2104 void __bpf_trace_run(struct bpf_raw_tp_link *link, u64 *args) 2105 { 2106 struct srcu_ctr __percpu *scp = NULL; 2107 struct bpf_prog *prog = link->link.prog; 2108 bool sleepable = prog->sleepable; 2109 struct bpf_run_ctx *old_run_ctx; 2110 struct bpf_trace_run_ctx run_ctx; 2111 2112 if (sleepable) { 2113 scp = rcu_read_lock_tasks_trace(); 2114 migrate_disable(); 2115 } else { 2116 rcu_read_lock_dont_migrate(); 2117 } 2118 2119 if (unlikely(!bpf_prog_get_recursion_context(prog))) { 2120 bpf_prog_inc_misses_counter(prog); 2121 goto out; 2122 } 2123 2124 run_ctx.bpf_cookie = link->cookie; 2125 old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); 2126 2127 (void)bpf_prog_run(prog, args); 2128 2129 bpf_reset_run_ctx(old_run_ctx); 2130 out: 2131 bpf_prog_put_recursion_context(prog); 2132 2133 if (sleepable) { 2134 migrate_enable(); 2135 rcu_read_unlock_tasks_trace(scp); 2136 } else { 2137 rcu_read_unlock_migrate(); 2138 } 2139 } 2140 2141 #define UNPACK(...) __VA_ARGS__ 2142 #define REPEAT_1(FN, DL, X, ...) FN(X) 2143 #define REPEAT_2(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_1(FN, DL, __VA_ARGS__) 2144 #define REPEAT_3(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_2(FN, DL, __VA_ARGS__) 2145 #define REPEAT_4(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_3(FN, DL, __VA_ARGS__) 2146 #define REPEAT_5(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_4(FN, DL, __VA_ARGS__) 2147 #define REPEAT_6(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_5(FN, DL, __VA_ARGS__) 2148 #define REPEAT_7(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_6(FN, DL, __VA_ARGS__) 2149 #define REPEAT_8(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_7(FN, DL, __VA_ARGS__) 2150 #define REPEAT_9(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_8(FN, DL, __VA_ARGS__) 2151 #define REPEAT_10(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_9(FN, DL, __VA_ARGS__) 2152 #define REPEAT_11(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_10(FN, DL, __VA_ARGS__) 2153 #define REPEAT_12(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_11(FN, DL, __VA_ARGS__) 2154 #define REPEAT(X, FN, DL, ...) REPEAT_##X(FN, DL, __VA_ARGS__) 2155 2156 #define SARG(X) u64 arg##X 2157 #define COPY(X) args[X] = arg##X 2158 2159 #define __DL_COM (,) 2160 #define __DL_SEM (;) 2161 2162 #define __SEQ_0_11 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 2163 2164 #define BPF_TRACE_DEFN_x(x) \ 2165 void bpf_trace_run##x(struct bpf_raw_tp_link *link, \ 2166 REPEAT(x, SARG, __DL_COM, __SEQ_0_11)) \ 2167 { \ 2168 u64 args[x]; \ 2169 REPEAT(x, COPY, __DL_SEM, __SEQ_0_11); \ 2170 __bpf_trace_run(link, args); \ 2171 } \ 2172 EXPORT_SYMBOL_GPL(bpf_trace_run##x) 2173 BPF_TRACE_DEFN_x(1); 2174 BPF_TRACE_DEFN_x(2); 2175 BPF_TRACE_DEFN_x(3); 2176 BPF_TRACE_DEFN_x(4); 2177 BPF_TRACE_DEFN_x(5); 2178 BPF_TRACE_DEFN_x(6); 2179 BPF_TRACE_DEFN_x(7); 2180 BPF_TRACE_DEFN_x(8); 2181 BPF_TRACE_DEFN_x(9); 2182 BPF_TRACE_DEFN_x(10); 2183 BPF_TRACE_DEFN_x(11); 2184 BPF_TRACE_DEFN_x(12); 2185 2186 int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_raw_tp_link *link) 2187 { 2188 struct tracepoint *tp = btp->tp; 2189 struct bpf_prog *prog = link->link.prog; 2190 2191 /* 2192 * check that program doesn't access arguments beyond what's 2193 * available in this tracepoint 2194 */ 2195 if (prog->aux->max_ctx_offset > btp->num_args * sizeof(u64)) 2196 return -EINVAL; 2197 2198 if (prog->aux->max_tp_access > btp->writable_size) 2199 return -EINVAL; 2200 2201 return tracepoint_probe_register_may_exist(tp, (void *)btp->bpf_func, link); 2202 } 2203 2204 int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_raw_tp_link *link) 2205 { 2206 return tracepoint_probe_unregister(btp->tp, (void *)btp->bpf_func, link); 2207 } 2208 2209 int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id, 2210 u32 *fd_type, const char **buf, 2211 u64 *probe_offset, u64 *probe_addr, 2212 unsigned long *missed) 2213 { 2214 bool is_tracepoint, is_syscall_tp; 2215 struct bpf_prog *prog; 2216 int flags, err = 0; 2217 2218 prog = event->prog; 2219 if (!prog) 2220 return -ENOENT; 2221 2222 /* not supporting BPF_PROG_TYPE_PERF_EVENT yet */ 2223 if (prog->type == BPF_PROG_TYPE_PERF_EVENT) 2224 return -EOPNOTSUPP; 2225 2226 *prog_id = prog->aux->id; 2227 flags = event->tp_event->flags; 2228 is_tracepoint = flags & TRACE_EVENT_FL_TRACEPOINT; 2229 is_syscall_tp = is_syscall_trace_event(event->tp_event); 2230 2231 if (is_tracepoint || is_syscall_tp) { 2232 *buf = is_tracepoint ? event->tp_event->tp->name 2233 : event->tp_event->name; 2234 /* We allow NULL pointer for tracepoint */ 2235 if (fd_type) 2236 *fd_type = BPF_FD_TYPE_TRACEPOINT; 2237 if (probe_offset) 2238 *probe_offset = 0x0; 2239 if (probe_addr) 2240 *probe_addr = 0x0; 2241 } else { 2242 /* kprobe/uprobe */ 2243 err = -EOPNOTSUPP; 2244 #ifdef CONFIG_KPROBE_EVENTS 2245 if (flags & TRACE_EVENT_FL_KPROBE) 2246 err = bpf_get_kprobe_info(event, fd_type, buf, 2247 probe_offset, probe_addr, missed, 2248 event->attr.type == PERF_TYPE_TRACEPOINT); 2249 #endif 2250 #ifdef CONFIG_UPROBE_EVENTS 2251 if (flags & TRACE_EVENT_FL_UPROBE) 2252 err = bpf_get_uprobe_info(event, fd_type, buf, 2253 probe_offset, probe_addr, 2254 event->attr.type == PERF_TYPE_TRACEPOINT); 2255 #endif 2256 } 2257 2258 return err; 2259 } 2260 2261 static int __init send_signal_irq_work_init(void) 2262 { 2263 int cpu; 2264 struct send_signal_irq_work *work; 2265 2266 for_each_possible_cpu(cpu) { 2267 work = per_cpu_ptr(&send_signal_work, cpu); 2268 init_irq_work(&work->irq_work, do_bpf_send_signal); 2269 } 2270 return 0; 2271 } 2272 2273 subsys_initcall(send_signal_irq_work_init); 2274 2275 #ifdef CONFIG_MODULES 2276 static int bpf_event_notify(struct notifier_block *nb, unsigned long op, 2277 void *module) 2278 { 2279 struct bpf_trace_module *btm, *tmp; 2280 struct module *mod = module; 2281 int ret = 0; 2282 2283 if (mod->num_bpf_raw_events == 0 || 2284 (op != MODULE_STATE_COMING && op != MODULE_STATE_GOING)) 2285 goto out; 2286 2287 mutex_lock(&bpf_module_mutex); 2288 2289 switch (op) { 2290 case MODULE_STATE_COMING: 2291 btm = kzalloc_obj(*btm); 2292 if (btm) { 2293 btm->module = module; 2294 list_add(&btm->list, &bpf_trace_modules); 2295 } else { 2296 ret = -ENOMEM; 2297 } 2298 break; 2299 case MODULE_STATE_GOING: 2300 list_for_each_entry_safe(btm, tmp, &bpf_trace_modules, list) { 2301 if (btm->module == module) { 2302 list_del(&btm->list); 2303 kfree(btm); 2304 break; 2305 } 2306 } 2307 break; 2308 } 2309 2310 mutex_unlock(&bpf_module_mutex); 2311 2312 out: 2313 return notifier_from_errno(ret); 2314 } 2315 2316 static struct notifier_block bpf_module_nb = { 2317 .notifier_call = bpf_event_notify, 2318 }; 2319 2320 static int __init bpf_event_init(void) 2321 { 2322 register_module_notifier(&bpf_module_nb); 2323 return 0; 2324 } 2325 2326 fs_initcall(bpf_event_init); 2327 #endif /* CONFIG_MODULES */ 2328 2329 struct bpf_session_run_ctx { 2330 struct bpf_run_ctx run_ctx; 2331 bool is_return; 2332 void *data; 2333 }; 2334 2335 #ifdef CONFIG_FPROBE 2336 struct bpf_kprobe_multi_link { 2337 struct bpf_link link; 2338 struct fprobe fp; 2339 unsigned long *addrs; 2340 u64 *cookies; 2341 u32 cnt; 2342 u32 mods_cnt; 2343 struct module **mods; 2344 }; 2345 2346 struct bpf_kprobe_multi_run_ctx { 2347 struct bpf_session_run_ctx session_ctx; 2348 struct bpf_kprobe_multi_link *link; 2349 unsigned long entry_ip; 2350 }; 2351 2352 struct user_syms { 2353 const char **syms; 2354 char *buf; 2355 }; 2356 2357 #ifndef CONFIG_HAVE_FTRACE_REGS_HAVING_PT_REGS 2358 static DEFINE_PER_CPU(struct pt_regs, bpf_kprobe_multi_pt_regs); 2359 #define bpf_kprobe_multi_pt_regs_ptr() this_cpu_ptr(&bpf_kprobe_multi_pt_regs) 2360 #else 2361 #define bpf_kprobe_multi_pt_regs_ptr() (NULL) 2362 #endif 2363 2364 static unsigned long ftrace_get_entry_ip(unsigned long fentry_ip) 2365 { 2366 unsigned long ip = ftrace_get_symaddr(fentry_ip); 2367 2368 return ip ? : fentry_ip; 2369 } 2370 2371 static int copy_user_syms(struct user_syms *us, unsigned long __user *usyms, u32 cnt) 2372 { 2373 unsigned long __user usymbol; 2374 const char **syms = NULL; 2375 char *buf = NULL, *p; 2376 int err = -ENOMEM; 2377 unsigned int i; 2378 2379 syms = kvmalloc_array(cnt, sizeof(*syms), GFP_KERNEL); 2380 if (!syms) 2381 goto error; 2382 2383 buf = kvmalloc_array(cnt, KSYM_NAME_LEN, GFP_KERNEL); 2384 if (!buf) 2385 goto error; 2386 2387 for (p = buf, i = 0; i < cnt; i++) { 2388 if (__get_user(usymbol, usyms + i)) { 2389 err = -EFAULT; 2390 goto error; 2391 } 2392 err = strncpy_from_user(p, (const char __user *) usymbol, KSYM_NAME_LEN); 2393 if (err == KSYM_NAME_LEN) 2394 err = -E2BIG; 2395 if (err < 0) 2396 goto error; 2397 syms[i] = p; 2398 p += err + 1; 2399 } 2400 2401 us->syms = syms; 2402 us->buf = buf; 2403 return 0; 2404 2405 error: 2406 if (err) { 2407 kvfree(syms); 2408 kvfree(buf); 2409 } 2410 return err; 2411 } 2412 2413 static void kprobe_multi_put_modules(struct module **mods, u32 cnt) 2414 { 2415 u32 i; 2416 2417 for (i = 0; i < cnt; i++) 2418 module_put(mods[i]); 2419 } 2420 2421 static void free_user_syms(struct user_syms *us) 2422 { 2423 kvfree(us->syms); 2424 kvfree(us->buf); 2425 } 2426 2427 static void bpf_kprobe_multi_link_release(struct bpf_link *link) 2428 { 2429 struct bpf_kprobe_multi_link *kmulti_link; 2430 2431 kmulti_link = container_of(link, struct bpf_kprobe_multi_link, link); 2432 /* Don't wait for RCU GP here. */ 2433 unregister_fprobe_async(&kmulti_link->fp); 2434 kprobe_multi_put_modules(kmulti_link->mods, kmulti_link->mods_cnt); 2435 } 2436 2437 static void bpf_kprobe_multi_link_dealloc(struct bpf_link *link) 2438 { 2439 struct bpf_kprobe_multi_link *kmulti_link; 2440 2441 kmulti_link = container_of(link, struct bpf_kprobe_multi_link, link); 2442 kvfree(kmulti_link->addrs); 2443 kvfree(kmulti_link->cookies); 2444 kfree(kmulti_link->mods); 2445 kfree(kmulti_link); 2446 } 2447 2448 static int bpf_kprobe_multi_link_fill_link_info(const struct bpf_link *link, 2449 struct bpf_link_info *info) 2450 { 2451 u64 __user *ucookies = u64_to_user_ptr(info->kprobe_multi.cookies); 2452 u64 __user *uaddrs = u64_to_user_ptr(info->kprobe_multi.addrs); 2453 struct bpf_kprobe_multi_link *kmulti_link; 2454 u32 ucount = info->kprobe_multi.count; 2455 int err = 0, i; 2456 2457 if (!uaddrs ^ !ucount) 2458 return -EINVAL; 2459 if (ucookies && !ucount) 2460 return -EINVAL; 2461 2462 kmulti_link = container_of(link, struct bpf_kprobe_multi_link, link); 2463 info->kprobe_multi.count = kmulti_link->cnt; 2464 info->kprobe_multi.flags = kmulti_link->link.flags; 2465 info->kprobe_multi.missed = kmulti_link->fp.nmissed; 2466 2467 if (!uaddrs) 2468 return 0; 2469 if (ucount < kmulti_link->cnt) 2470 err = -ENOSPC; 2471 else 2472 ucount = kmulti_link->cnt; 2473 2474 if (ucookies) { 2475 if (kmulti_link->cookies) { 2476 if (copy_to_user(ucookies, kmulti_link->cookies, ucount * sizeof(u64))) 2477 return -EFAULT; 2478 } else { 2479 for (i = 0; i < ucount; i++) { 2480 if (put_user(0, ucookies + i)) 2481 return -EFAULT; 2482 } 2483 } 2484 } 2485 2486 if (kallsyms_show_value(current_cred())) { 2487 if (copy_to_user(uaddrs, kmulti_link->addrs, ucount * sizeof(u64))) 2488 return -EFAULT; 2489 } else { 2490 for (i = 0; i < ucount; i++) { 2491 if (put_user(0, uaddrs + i)) 2492 return -EFAULT; 2493 } 2494 } 2495 return err; 2496 } 2497 2498 #ifdef CONFIG_PROC_FS 2499 static void bpf_kprobe_multi_show_fdinfo(const struct bpf_link *link, 2500 struct seq_file *seq) 2501 { 2502 struct bpf_kprobe_multi_link *kmulti_link; 2503 bool has_cookies; 2504 2505 kmulti_link = container_of(link, struct bpf_kprobe_multi_link, link); 2506 has_cookies = !!kmulti_link->cookies; 2507 2508 seq_printf(seq, 2509 "kprobe_cnt:\t%u\n" 2510 "missed:\t%lu\n", 2511 kmulti_link->cnt, 2512 kmulti_link->fp.nmissed); 2513 2514 seq_printf(seq, "%s\t %s\n", "cookie", "func"); 2515 for (int i = 0; i < kmulti_link->cnt; i++) { 2516 seq_printf(seq, 2517 "%llu\t %pS\n", 2518 has_cookies ? kmulti_link->cookies[i] : 0, 2519 (void *)kmulti_link->addrs[i]); 2520 } 2521 } 2522 #endif 2523 2524 static const struct bpf_link_ops bpf_kprobe_multi_link_lops = { 2525 .release = bpf_kprobe_multi_link_release, 2526 .dealloc_deferred = bpf_kprobe_multi_link_dealloc, 2527 .fill_link_info = bpf_kprobe_multi_link_fill_link_info, 2528 #ifdef CONFIG_PROC_FS 2529 .show_fdinfo = bpf_kprobe_multi_show_fdinfo, 2530 #endif 2531 }; 2532 2533 static void bpf_kprobe_multi_cookie_swap(void *a, void *b, int size, const void *priv) 2534 { 2535 const struct bpf_kprobe_multi_link *link = priv; 2536 unsigned long *addr_a = a, *addr_b = b; 2537 u64 *cookie_a, *cookie_b; 2538 2539 cookie_a = link->cookies + (addr_a - link->addrs); 2540 cookie_b = link->cookies + (addr_b - link->addrs); 2541 2542 /* swap addr_a/addr_b and cookie_a/cookie_b values */ 2543 swap(*addr_a, *addr_b); 2544 swap(*cookie_a, *cookie_b); 2545 } 2546 2547 static int bpf_kprobe_multi_addrs_cmp(const void *a, const void *b) 2548 { 2549 const unsigned long *addr_a = a, *addr_b = b; 2550 2551 if (*addr_a == *addr_b) 2552 return 0; 2553 return *addr_a < *addr_b ? -1 : 1; 2554 } 2555 2556 static int bpf_kprobe_multi_cookie_cmp(const void *a, const void *b, const void *priv) 2557 { 2558 return bpf_kprobe_multi_addrs_cmp(a, b); 2559 } 2560 2561 static u64 bpf_kprobe_multi_cookie(struct bpf_run_ctx *ctx) 2562 { 2563 struct bpf_kprobe_multi_run_ctx *run_ctx; 2564 struct bpf_kprobe_multi_link *link; 2565 u64 *cookie, entry_ip; 2566 unsigned long *addr; 2567 2568 if (WARN_ON_ONCE(!ctx)) 2569 return 0; 2570 run_ctx = container_of(current->bpf_ctx, struct bpf_kprobe_multi_run_ctx, 2571 session_ctx.run_ctx); 2572 link = run_ctx->link; 2573 if (!link->cookies) 2574 return 0; 2575 entry_ip = run_ctx->entry_ip; 2576 addr = bsearch(&entry_ip, link->addrs, link->cnt, sizeof(entry_ip), 2577 bpf_kprobe_multi_addrs_cmp); 2578 if (!addr) 2579 return 0; 2580 cookie = link->cookies + (addr - link->addrs); 2581 return *cookie; 2582 } 2583 2584 static u64 bpf_kprobe_multi_entry_ip(struct bpf_run_ctx *ctx) 2585 { 2586 struct bpf_kprobe_multi_run_ctx *run_ctx; 2587 2588 run_ctx = container_of(current->bpf_ctx, struct bpf_kprobe_multi_run_ctx, 2589 session_ctx.run_ctx); 2590 return run_ctx->entry_ip; 2591 } 2592 2593 static __always_inline int 2594 kprobe_multi_link_prog_run(struct bpf_kprobe_multi_link *link, 2595 unsigned long entry_ip, struct ftrace_regs *fregs, 2596 bool is_return, void *data) 2597 { 2598 struct bpf_kprobe_multi_run_ctx run_ctx = { 2599 .session_ctx = { 2600 .is_return = is_return, 2601 .data = data, 2602 }, 2603 .link = link, 2604 .entry_ip = entry_ip, 2605 }; 2606 struct bpf_run_ctx *old_run_ctx; 2607 struct pt_regs *regs; 2608 int err; 2609 2610 /* 2611 * graph tracer framework ensures we won't migrate, so there is no need 2612 * to use migrate_disable for bpf_prog_run again. The check here just for 2613 * __this_cpu_inc_return. 2614 */ 2615 cant_sleep(); 2616 2617 if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) { 2618 bpf_prog_inc_misses_counter(link->link.prog); 2619 err = 1; 2620 goto out; 2621 } 2622 2623 rcu_read_lock(); 2624 regs = ftrace_partial_regs(fregs, bpf_kprobe_multi_pt_regs_ptr()); 2625 old_run_ctx = bpf_set_run_ctx(&run_ctx.session_ctx.run_ctx); 2626 err = bpf_prog_run(link->link.prog, regs); 2627 bpf_reset_run_ctx(old_run_ctx); 2628 ftrace_partial_regs_update(fregs, bpf_kprobe_multi_pt_regs_ptr()); 2629 rcu_read_unlock(); 2630 2631 out: 2632 __this_cpu_dec(bpf_prog_active); 2633 return err; 2634 } 2635 2636 static int 2637 kprobe_multi_link_handler(struct fprobe *fp, unsigned long fentry_ip, 2638 unsigned long ret_ip, struct ftrace_regs *fregs, 2639 void *data) 2640 { 2641 struct bpf_kprobe_multi_link *link; 2642 int err; 2643 2644 link = container_of(fp, struct bpf_kprobe_multi_link, fp); 2645 err = kprobe_multi_link_prog_run(link, ftrace_get_entry_ip(fentry_ip), 2646 fregs, false, data); 2647 return is_kprobe_session(link->link.prog) ? err : 0; 2648 } 2649 2650 static void 2651 kprobe_multi_link_exit_handler(struct fprobe *fp, unsigned long fentry_ip, 2652 unsigned long ret_ip, struct ftrace_regs *fregs, 2653 void *data) 2654 { 2655 struct bpf_kprobe_multi_link *link; 2656 2657 link = container_of(fp, struct bpf_kprobe_multi_link, fp); 2658 kprobe_multi_link_prog_run(link, ftrace_get_entry_ip(fentry_ip), 2659 fregs, true, data); 2660 } 2661 2662 static int symbols_cmp_r(const void *a, const void *b, const void *priv) 2663 { 2664 const char **str_a = (const char **) a; 2665 const char **str_b = (const char **) b; 2666 2667 return strcmp(*str_a, *str_b); 2668 } 2669 2670 struct multi_symbols_sort { 2671 const char **funcs; 2672 u64 *cookies; 2673 }; 2674 2675 static void symbols_swap_r(void *a, void *b, int size, const void *priv) 2676 { 2677 const struct multi_symbols_sort *data = priv; 2678 const char **name_a = a, **name_b = b; 2679 2680 swap(*name_a, *name_b); 2681 2682 /* If defined, swap also related cookies. */ 2683 if (data->cookies) { 2684 u64 *cookie_a, *cookie_b; 2685 2686 cookie_a = data->cookies + (name_a - data->funcs); 2687 cookie_b = data->cookies + (name_b - data->funcs); 2688 swap(*cookie_a, *cookie_b); 2689 } 2690 } 2691 2692 struct modules_array { 2693 struct module **mods; 2694 int mods_cnt; 2695 int mods_cap; 2696 }; 2697 2698 static int add_module(struct modules_array *arr, struct module *mod) 2699 { 2700 struct module **mods; 2701 2702 if (arr->mods_cnt == arr->mods_cap) { 2703 arr->mods_cap = max(16, arr->mods_cap * 3 / 2); 2704 mods = krealloc_array(arr->mods, arr->mods_cap, sizeof(*mods), GFP_KERNEL); 2705 if (!mods) 2706 return -ENOMEM; 2707 arr->mods = mods; 2708 } 2709 2710 arr->mods[arr->mods_cnt] = mod; 2711 arr->mods_cnt++; 2712 return 0; 2713 } 2714 2715 static bool has_module(struct modules_array *arr, struct module *mod) 2716 { 2717 int i; 2718 2719 for (i = arr->mods_cnt - 1; i >= 0; i--) { 2720 if (arr->mods[i] == mod) 2721 return true; 2722 } 2723 return false; 2724 } 2725 2726 static int get_modules_for_addrs(struct module ***mods, unsigned long *addrs, u32 addrs_cnt) 2727 { 2728 struct modules_array arr = {}; 2729 u32 i, err = 0; 2730 2731 for (i = 0; i < addrs_cnt; i++) { 2732 bool skip_add = false; 2733 struct module *mod; 2734 2735 scoped_guard(rcu) { 2736 mod = __module_address(addrs[i]); 2737 /* Either no module or it's already stored */ 2738 if (!mod || has_module(&arr, mod)) { 2739 skip_add = true; 2740 break; /* scoped_guard */ 2741 } 2742 if (!try_module_get(mod)) 2743 err = -EINVAL; 2744 } 2745 if (skip_add) 2746 continue; 2747 if (err) 2748 break; 2749 err = add_module(&arr, mod); 2750 if (err) { 2751 module_put(mod); 2752 break; 2753 } 2754 } 2755 2756 /* We return either err < 0 in case of error, ... */ 2757 if (err) { 2758 kprobe_multi_put_modules(arr.mods, arr.mods_cnt); 2759 kfree(arr.mods); 2760 return err; 2761 } 2762 2763 /* or number of modules found if everything is ok. */ 2764 *mods = arr.mods; 2765 return arr.mods_cnt; 2766 } 2767 2768 static int addrs_check_error_injection_list(unsigned long *addrs, u32 cnt) 2769 { 2770 u32 i; 2771 2772 for (i = 0; i < cnt; i++) { 2773 if (!within_error_injection_list(addrs[i])) 2774 return -EINVAL; 2775 } 2776 return 0; 2777 } 2778 2779 int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) 2780 { 2781 struct bpf_kprobe_multi_link *link = NULL; 2782 struct bpf_link_primer link_primer; 2783 void __user *ucookies; 2784 unsigned long *addrs; 2785 u32 flags, cnt, size; 2786 void __user *uaddrs; 2787 u64 *cookies = NULL; 2788 void __user *usyms; 2789 int err; 2790 2791 /* no support for 32bit archs yet */ 2792 if (sizeof(u64) != sizeof(void *)) 2793 return -EOPNOTSUPP; 2794 2795 if (attr->link_create.flags) 2796 return -EINVAL; 2797 2798 if (!is_kprobe_multi(prog)) 2799 return -EINVAL; 2800 2801 /* kprobe_multi is not allowed to be sleepable. */ 2802 if (prog->sleepable) 2803 return -EINVAL; 2804 2805 /* Writing to context is not allowed for kprobes. */ 2806 if (prog->aux->kprobe_write_ctx) 2807 return -EINVAL; 2808 2809 flags = attr->link_create.kprobe_multi.flags; 2810 if (flags & ~BPF_F_KPROBE_MULTI_RETURN) 2811 return -EINVAL; 2812 2813 uaddrs = u64_to_user_ptr(attr->link_create.kprobe_multi.addrs); 2814 usyms = u64_to_user_ptr(attr->link_create.kprobe_multi.syms); 2815 if (!!uaddrs == !!usyms) 2816 return -EINVAL; 2817 2818 cnt = attr->link_create.kprobe_multi.cnt; 2819 if (!cnt) 2820 return -EINVAL; 2821 if (cnt > MAX_KPROBE_MULTI_CNT) 2822 return -E2BIG; 2823 2824 size = cnt * sizeof(*addrs); 2825 addrs = kvmalloc_array(cnt, sizeof(*addrs), GFP_KERNEL); 2826 if (!addrs) 2827 return -ENOMEM; 2828 2829 ucookies = u64_to_user_ptr(attr->link_create.kprobe_multi.cookies); 2830 if (ucookies) { 2831 cookies = kvmalloc_array(cnt, sizeof(*addrs), GFP_KERNEL); 2832 if (!cookies) { 2833 err = -ENOMEM; 2834 goto error; 2835 } 2836 if (copy_from_user(cookies, ucookies, size)) { 2837 err = -EFAULT; 2838 goto error; 2839 } 2840 } 2841 2842 if (uaddrs) { 2843 if (copy_from_user(addrs, uaddrs, size)) { 2844 err = -EFAULT; 2845 goto error; 2846 } 2847 } else { 2848 struct multi_symbols_sort data = { 2849 .cookies = cookies, 2850 }; 2851 struct user_syms us; 2852 2853 err = copy_user_syms(&us, usyms, cnt); 2854 if (err) 2855 goto error; 2856 2857 if (cookies) 2858 data.funcs = us.syms; 2859 2860 sort_r(us.syms, cnt, sizeof(*us.syms), symbols_cmp_r, 2861 symbols_swap_r, &data); 2862 2863 err = ftrace_lookup_symbols(us.syms, cnt, addrs); 2864 free_user_syms(&us); 2865 if (err) 2866 goto error; 2867 } 2868 2869 if (prog->kprobe_override && addrs_check_error_injection_list(addrs, cnt)) { 2870 err = -EINVAL; 2871 goto error; 2872 } 2873 2874 link = kzalloc_obj(*link); 2875 if (!link) { 2876 err = -ENOMEM; 2877 goto error; 2878 } 2879 2880 bpf_link_init(&link->link, BPF_LINK_TYPE_KPROBE_MULTI, 2881 &bpf_kprobe_multi_link_lops, prog, attr->link_create.attach_type); 2882 2883 err = bpf_link_prime(&link->link, &link_primer); 2884 if (err) 2885 goto error; 2886 2887 if (!(flags & BPF_F_KPROBE_MULTI_RETURN)) 2888 link->fp.entry_handler = kprobe_multi_link_handler; 2889 if ((flags & BPF_F_KPROBE_MULTI_RETURN) || is_kprobe_session(prog)) 2890 link->fp.exit_handler = kprobe_multi_link_exit_handler; 2891 if (is_kprobe_session(prog)) 2892 link->fp.entry_data_size = sizeof(u64); 2893 2894 link->addrs = addrs; 2895 link->cookies = cookies; 2896 link->cnt = cnt; 2897 link->link.flags = flags; 2898 2899 if (cookies) { 2900 /* 2901 * Sorting addresses will trigger sorting cookies as well 2902 * (check bpf_kprobe_multi_cookie_swap). This way we can 2903 * find cookie based on the address in bpf_get_attach_cookie 2904 * helper. 2905 */ 2906 sort_r(addrs, cnt, sizeof(*addrs), 2907 bpf_kprobe_multi_cookie_cmp, 2908 bpf_kprobe_multi_cookie_swap, 2909 link); 2910 } 2911 2912 err = get_modules_for_addrs(&link->mods, addrs, cnt); 2913 if (err < 0) { 2914 bpf_link_cleanup(&link_primer); 2915 return err; 2916 } 2917 link->mods_cnt = err; 2918 2919 err = register_fprobe_ips(&link->fp, addrs, cnt); 2920 if (err) { 2921 kprobe_multi_put_modules(link->mods, link->mods_cnt); 2922 bpf_link_cleanup(&link_primer); 2923 return err; 2924 } 2925 2926 return bpf_link_settle(&link_primer); 2927 2928 error: 2929 kfree(link); 2930 kvfree(addrs); 2931 kvfree(cookies); 2932 return err; 2933 } 2934 #else /* !CONFIG_FPROBE */ 2935 int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) 2936 { 2937 return -EOPNOTSUPP; 2938 } 2939 static u64 bpf_kprobe_multi_cookie(struct bpf_run_ctx *ctx) 2940 { 2941 return 0; 2942 } 2943 static u64 bpf_kprobe_multi_entry_ip(struct bpf_run_ctx *ctx) 2944 { 2945 return 0; 2946 } 2947 #endif 2948 2949 #ifdef CONFIG_UPROBES 2950 struct bpf_uprobe_multi_link; 2951 2952 struct bpf_uprobe { 2953 struct bpf_uprobe_multi_link *link; 2954 loff_t offset; 2955 unsigned long ref_ctr_offset; 2956 u64 cookie; 2957 struct uprobe *uprobe; 2958 struct uprobe_consumer consumer; 2959 bool session; 2960 }; 2961 2962 struct bpf_uprobe_multi_link { 2963 struct path path; 2964 struct bpf_link link; 2965 u32 cnt; 2966 struct bpf_uprobe *uprobes; 2967 struct task_struct *task; 2968 }; 2969 2970 struct bpf_uprobe_multi_run_ctx { 2971 struct bpf_session_run_ctx session_ctx; 2972 unsigned long entry_ip; 2973 struct bpf_uprobe *uprobe; 2974 }; 2975 2976 static void bpf_uprobe_unregister(struct bpf_uprobe *uprobes, u32 cnt) 2977 { 2978 u32 i; 2979 2980 for (i = 0; i < cnt; i++) 2981 uprobe_unregister_nosync(uprobes[i].uprobe, &uprobes[i].consumer); 2982 2983 if (cnt) 2984 uprobe_unregister_sync(); 2985 } 2986 2987 static void bpf_uprobe_multi_link_release(struct bpf_link *link) 2988 { 2989 struct bpf_uprobe_multi_link *umulti_link; 2990 2991 umulti_link = container_of(link, struct bpf_uprobe_multi_link, link); 2992 bpf_uprobe_unregister(umulti_link->uprobes, umulti_link->cnt); 2993 if (umulti_link->task) 2994 put_task_struct(umulti_link->task); 2995 path_put(&umulti_link->path); 2996 } 2997 2998 static void bpf_uprobe_multi_link_dealloc(struct bpf_link *link) 2999 { 3000 struct bpf_uprobe_multi_link *umulti_link; 3001 3002 umulti_link = container_of(link, struct bpf_uprobe_multi_link, link); 3003 kvfree(umulti_link->uprobes); 3004 kfree(umulti_link); 3005 } 3006 3007 static int bpf_uprobe_multi_link_fill_link_info(const struct bpf_link *link, 3008 struct bpf_link_info *info) 3009 { 3010 u64 __user *uref_ctr_offsets = u64_to_user_ptr(info->uprobe_multi.ref_ctr_offsets); 3011 u64 __user *ucookies = u64_to_user_ptr(info->uprobe_multi.cookies); 3012 u64 __user *uoffsets = u64_to_user_ptr(info->uprobe_multi.offsets); 3013 u64 __user *upath = u64_to_user_ptr(info->uprobe_multi.path); 3014 u32 upath_size = info->uprobe_multi.path_size; 3015 struct bpf_uprobe_multi_link *umulti_link; 3016 u32 ucount = info->uprobe_multi.count; 3017 int err = 0, i; 3018 char *p, *buf; 3019 long left = 0; 3020 3021 if (!upath ^ !upath_size) 3022 return -EINVAL; 3023 3024 if ((uoffsets || uref_ctr_offsets || ucookies) && !ucount) 3025 return -EINVAL; 3026 3027 umulti_link = container_of(link, struct bpf_uprobe_multi_link, link); 3028 info->uprobe_multi.count = umulti_link->cnt; 3029 info->uprobe_multi.flags = umulti_link->link.flags; 3030 info->uprobe_multi.pid = umulti_link->task ? 3031 task_pid_nr_ns(umulti_link->task, task_active_pid_ns(current)) : 0; 3032 3033 upath_size = upath_size ? min_t(u32, upath_size, PATH_MAX) : PATH_MAX; 3034 buf = kmalloc(upath_size, GFP_KERNEL); 3035 if (!buf) 3036 return -ENOMEM; 3037 p = d_path(&umulti_link->path, buf, upath_size); 3038 if (IS_ERR(p)) { 3039 kfree(buf); 3040 return PTR_ERR(p); 3041 } 3042 upath_size = buf + upath_size - p; 3043 3044 if (upath) 3045 left = copy_to_user(upath, p, upath_size); 3046 kfree(buf); 3047 if (left) 3048 return -EFAULT; 3049 info->uprobe_multi.path_size = upath_size; 3050 3051 if (!uoffsets && !ucookies && !uref_ctr_offsets) 3052 return 0; 3053 3054 if (ucount < umulti_link->cnt) 3055 err = -ENOSPC; 3056 else 3057 ucount = umulti_link->cnt; 3058 3059 for (i = 0; i < ucount; i++) { 3060 if (uoffsets && 3061 put_user(umulti_link->uprobes[i].offset, uoffsets + i)) 3062 return -EFAULT; 3063 if (uref_ctr_offsets && 3064 put_user(umulti_link->uprobes[i].ref_ctr_offset, uref_ctr_offsets + i)) 3065 return -EFAULT; 3066 if (ucookies && 3067 put_user(umulti_link->uprobes[i].cookie, ucookies + i)) 3068 return -EFAULT; 3069 } 3070 3071 return err; 3072 } 3073 3074 #ifdef CONFIG_PROC_FS 3075 static void bpf_uprobe_multi_show_fdinfo(const struct bpf_link *link, 3076 struct seq_file *seq) 3077 { 3078 struct bpf_uprobe_multi_link *umulti_link; 3079 char *p, *buf; 3080 pid_t pid; 3081 3082 umulti_link = container_of(link, struct bpf_uprobe_multi_link, link); 3083 3084 buf = kmalloc(PATH_MAX, GFP_KERNEL); 3085 if (!buf) 3086 return; 3087 3088 p = d_path(&umulti_link->path, buf, PATH_MAX); 3089 if (IS_ERR(p)) { 3090 kfree(buf); 3091 return; 3092 } 3093 3094 pid = umulti_link->task ? 3095 task_pid_nr_ns(umulti_link->task, task_active_pid_ns(current)) : 0; 3096 seq_printf(seq, 3097 "uprobe_cnt:\t%u\n" 3098 "pid:\t%u\n" 3099 "path:\t%s\n", 3100 umulti_link->cnt, pid, p); 3101 3102 seq_printf(seq, "%s\t %s\t %s\n", "cookie", "offset", "ref_ctr_offset"); 3103 for (int i = 0; i < umulti_link->cnt; i++) { 3104 seq_printf(seq, 3105 "%llu\t %#llx\t %#lx\n", 3106 umulti_link->uprobes[i].cookie, 3107 umulti_link->uprobes[i].offset, 3108 umulti_link->uprobes[i].ref_ctr_offset); 3109 } 3110 3111 kfree(buf); 3112 } 3113 #endif 3114 3115 static const struct bpf_link_ops bpf_uprobe_multi_link_lops = { 3116 .release = bpf_uprobe_multi_link_release, 3117 .dealloc_deferred = bpf_uprobe_multi_link_dealloc, 3118 .fill_link_info = bpf_uprobe_multi_link_fill_link_info, 3119 #ifdef CONFIG_PROC_FS 3120 .show_fdinfo = bpf_uprobe_multi_show_fdinfo, 3121 #endif 3122 }; 3123 3124 static int uprobe_prog_run(struct bpf_uprobe *uprobe, 3125 unsigned long entry_ip, 3126 struct pt_regs *regs, 3127 bool is_return, void *data) 3128 { 3129 struct bpf_uprobe_multi_link *link = uprobe->link; 3130 struct bpf_uprobe_multi_run_ctx run_ctx = { 3131 .session_ctx = { 3132 .is_return = is_return, 3133 .data = data, 3134 }, 3135 .entry_ip = entry_ip, 3136 .uprobe = uprobe, 3137 }; 3138 struct bpf_prog *prog = link->link.prog; 3139 bool sleepable = prog->sleepable; 3140 struct bpf_run_ctx *old_run_ctx; 3141 int err; 3142 3143 if (link->task && !same_thread_group(current, link->task)) 3144 return 0; 3145 3146 if (sleepable) 3147 rcu_read_lock_trace(); 3148 else 3149 rcu_read_lock(); 3150 3151 migrate_disable(); 3152 3153 old_run_ctx = bpf_set_run_ctx(&run_ctx.session_ctx.run_ctx); 3154 err = bpf_prog_run(link->link.prog, regs); 3155 bpf_reset_run_ctx(old_run_ctx); 3156 3157 migrate_enable(); 3158 3159 if (sleepable) 3160 rcu_read_unlock_trace(); 3161 else 3162 rcu_read_unlock(); 3163 return err; 3164 } 3165 3166 static bool 3167 uprobe_multi_link_filter(struct uprobe_consumer *con, struct mm_struct *mm) 3168 { 3169 struct bpf_uprobe *uprobe; 3170 3171 uprobe = container_of(con, struct bpf_uprobe, consumer); 3172 return uprobe->link->task->mm == mm; 3173 } 3174 3175 static int 3176 uprobe_multi_link_handler(struct uprobe_consumer *con, struct pt_regs *regs, 3177 __u64 *data) 3178 { 3179 struct bpf_uprobe *uprobe; 3180 int ret; 3181 3182 uprobe = container_of(con, struct bpf_uprobe, consumer); 3183 ret = uprobe_prog_run(uprobe, instruction_pointer(regs), regs, false, data); 3184 if (uprobe->session) 3185 return ret ? UPROBE_HANDLER_IGNORE : 0; 3186 return 0; 3187 } 3188 3189 static int 3190 uprobe_multi_link_ret_handler(struct uprobe_consumer *con, unsigned long func, struct pt_regs *regs, 3191 __u64 *data) 3192 { 3193 struct bpf_uprobe *uprobe; 3194 3195 uprobe = container_of(con, struct bpf_uprobe, consumer); 3196 uprobe_prog_run(uprobe, func, regs, true, data); 3197 return 0; 3198 } 3199 3200 static u64 bpf_uprobe_multi_entry_ip(struct bpf_run_ctx *ctx) 3201 { 3202 struct bpf_uprobe_multi_run_ctx *run_ctx; 3203 3204 run_ctx = container_of(current->bpf_ctx, struct bpf_uprobe_multi_run_ctx, 3205 session_ctx.run_ctx); 3206 return run_ctx->entry_ip; 3207 } 3208 3209 static u64 bpf_uprobe_multi_cookie(struct bpf_run_ctx *ctx) 3210 { 3211 struct bpf_uprobe_multi_run_ctx *run_ctx; 3212 3213 run_ctx = container_of(current->bpf_ctx, struct bpf_uprobe_multi_run_ctx, 3214 session_ctx.run_ctx); 3215 return run_ctx->uprobe->cookie; 3216 } 3217 3218 static int bpf_uprobe_multi_get_path(const union bpf_attr *attr, struct path *path) 3219 { 3220 void __user *upath = u64_to_user_ptr(attr->link_create.uprobe_multi.path); 3221 u32 path_fd = attr->link_create.uprobe_multi.path_fd; 3222 u32 flags = attr->link_create.uprobe_multi.flags; 3223 3224 if (flags & BPF_F_UPROBE_MULTI_PATH_FD) { 3225 /* 3226 * When BPF_F_UPROBE_MULTI_PATH_FD is set, the executable is 3227 * identified by path_fd, upath must be NULL. 3228 */ 3229 if (upath) 3230 return -EINVAL; 3231 3232 CLASS(fd, f)(path_fd); 3233 if (fd_empty(f)) 3234 return -EBADF; 3235 *path = fd_file(f)->f_path; 3236 path_get(path); 3237 return 0; 3238 } 3239 3240 /* 3241 * When BPF_F_UPROBE_MULTI_PATH_FD is not set, the path is resolved 3242 * relative to the cwd (AT_FDCWD) or absolute using the upath string. 3243 */ 3244 if (!upath || path_fd) 3245 return -EINVAL; 3246 3247 return user_path_at(AT_FDCWD, upath, LOOKUP_FOLLOW, path); 3248 } 3249 3250 int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) 3251 { 3252 struct bpf_uprobe_multi_link *link = NULL; 3253 unsigned long __user *uref_ctr_offsets; 3254 struct bpf_link_primer link_primer; 3255 struct bpf_uprobe *uprobes = NULL; 3256 struct task_struct *task = NULL; 3257 unsigned long __user *uoffsets; 3258 u64 __user *ucookies; 3259 unsigned long size; 3260 u32 flags, cnt, i; 3261 struct path path; 3262 pid_t pid; 3263 int err; 3264 3265 /* no support for 32bit archs yet */ 3266 if (sizeof(u64) != sizeof(void *)) 3267 return -EOPNOTSUPP; 3268 3269 if (attr->link_create.flags) 3270 return -EINVAL; 3271 3272 if (!is_uprobe_multi(prog)) 3273 return -EINVAL; 3274 3275 flags = attr->link_create.uprobe_multi.flags; 3276 if (flags & ~(BPF_F_UPROBE_MULTI_RETURN | BPF_F_UPROBE_MULTI_PATH_FD)) 3277 return -EINVAL; 3278 3279 /* 3280 * offsets and cnt are mandatory, 3281 * ref_ctr_offsets and cookies are optional 3282 */ 3283 uoffsets = u64_to_user_ptr(attr->link_create.uprobe_multi.offsets); 3284 cnt = attr->link_create.uprobe_multi.cnt; 3285 pid = attr->link_create.uprobe_multi.pid; 3286 3287 if (!uoffsets || !cnt || pid < 0) 3288 return -EINVAL; 3289 if (cnt > MAX_UPROBE_MULTI_CNT) 3290 return -E2BIG; 3291 3292 uref_ctr_offsets = u64_to_user_ptr(attr->link_create.uprobe_multi.ref_ctr_offsets); 3293 ucookies = u64_to_user_ptr(attr->link_create.uprobe_multi.cookies); 3294 3295 /* 3296 * All uoffsets/uref_ctr_offsets/ucookies arrays have the same value 3297 * size, we need to check their address range is safe for __get_user 3298 * calls. 3299 */ 3300 size = sizeof(*uoffsets) * cnt; 3301 if (!access_ok(uoffsets, size) || !access_ok(uref_ctr_offsets, size) || 3302 !access_ok(ucookies, size)) 3303 return -EFAULT; 3304 3305 err = bpf_uprobe_multi_get_path(attr, &path); 3306 if (err) 3307 return err; 3308 3309 if (!d_is_reg(path.dentry)) { 3310 err = -EBADF; 3311 goto error_path_put; 3312 } 3313 3314 if (pid) { 3315 rcu_read_lock(); 3316 task = get_pid_task(find_vpid(pid), PIDTYPE_TGID); 3317 rcu_read_unlock(); 3318 if (!task) { 3319 err = -ESRCH; 3320 goto error_path_put; 3321 } 3322 } 3323 3324 err = -ENOMEM; 3325 3326 link = kzalloc_obj(*link); 3327 uprobes = kvzalloc_objs(*uprobes, cnt); 3328 3329 if (!uprobes || !link) 3330 goto error_free; 3331 3332 for (i = 0; i < cnt; i++) { 3333 if (__get_user(uprobes[i].offset, uoffsets + i)) { 3334 err = -EFAULT; 3335 goto error_free; 3336 } 3337 if (uprobes[i].offset < 0) { 3338 err = -EINVAL; 3339 goto error_free; 3340 } 3341 if (uref_ctr_offsets && __get_user(uprobes[i].ref_ctr_offset, uref_ctr_offsets + i)) { 3342 err = -EFAULT; 3343 goto error_free; 3344 } 3345 if (ucookies && __get_user(uprobes[i].cookie, ucookies + i)) { 3346 err = -EFAULT; 3347 goto error_free; 3348 } 3349 3350 uprobes[i].link = link; 3351 3352 if (!(flags & BPF_F_UPROBE_MULTI_RETURN)) 3353 uprobes[i].consumer.handler = uprobe_multi_link_handler; 3354 if (flags & BPF_F_UPROBE_MULTI_RETURN || is_uprobe_session(prog)) 3355 uprobes[i].consumer.ret_handler = uprobe_multi_link_ret_handler; 3356 if (is_uprobe_session(prog)) 3357 uprobes[i].session = true; 3358 if (pid) 3359 uprobes[i].consumer.filter = uprobe_multi_link_filter; 3360 } 3361 3362 link->cnt = cnt; 3363 link->uprobes = uprobes; 3364 link->path = path; 3365 link->task = task; 3366 link->link.flags = flags; 3367 3368 bpf_link_init(&link->link, BPF_LINK_TYPE_UPROBE_MULTI, 3369 &bpf_uprobe_multi_link_lops, prog, attr->link_create.attach_type); 3370 3371 for (i = 0; i < cnt; i++) { 3372 uprobes[i].uprobe = uprobe_register(d_real_inode(link->path.dentry), 3373 uprobes[i].offset, 3374 uprobes[i].ref_ctr_offset, 3375 &uprobes[i].consumer); 3376 if (IS_ERR(uprobes[i].uprobe)) { 3377 err = PTR_ERR(uprobes[i].uprobe); 3378 link->cnt = i; 3379 goto error_unregister; 3380 } 3381 } 3382 3383 err = bpf_link_prime(&link->link, &link_primer); 3384 if (err) 3385 goto error_unregister; 3386 3387 return bpf_link_settle(&link_primer); 3388 3389 error_unregister: 3390 bpf_uprobe_unregister(uprobes, link->cnt); 3391 3392 error_free: 3393 kvfree(uprobes); 3394 kfree(link); 3395 if (task) 3396 put_task_struct(task); 3397 error_path_put: 3398 path_put(&path); 3399 return err; 3400 } 3401 #else /* !CONFIG_UPROBES */ 3402 int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) 3403 { 3404 return -EOPNOTSUPP; 3405 } 3406 static u64 bpf_uprobe_multi_cookie(struct bpf_run_ctx *ctx) 3407 { 3408 return 0; 3409 } 3410 static u64 bpf_uprobe_multi_entry_ip(struct bpf_run_ctx *ctx) 3411 { 3412 return 0; 3413 } 3414 #endif /* CONFIG_UPROBES */ 3415 3416 __bpf_kfunc_start_defs(); 3417 3418 __bpf_kfunc bool bpf_session_is_return(void *ctx) 3419 { 3420 struct bpf_session_run_ctx *session_ctx; 3421 3422 session_ctx = container_of(current->bpf_ctx, struct bpf_session_run_ctx, run_ctx); 3423 return session_ctx->is_return; 3424 } 3425 3426 __bpf_kfunc __u64 *bpf_session_cookie(void *ctx) 3427 { 3428 struct bpf_session_run_ctx *session_ctx; 3429 3430 session_ctx = container_of(current->bpf_ctx, struct bpf_session_run_ctx, run_ctx); 3431 return session_ctx->data; 3432 } 3433 3434 __bpf_kfunc_end_defs(); 3435 3436 BTF_KFUNCS_START(session_kfunc_set_ids) 3437 BTF_ID_FLAGS(func, bpf_session_is_return) 3438 BTF_ID_FLAGS(func, bpf_session_cookie) 3439 BTF_KFUNCS_END(session_kfunc_set_ids) 3440 3441 static int bpf_session_filter(const struct bpf_prog *prog, u32 kfunc_id) 3442 { 3443 if (!btf_id_set8_contains(&session_kfunc_set_ids, kfunc_id)) 3444 return 0; 3445 3446 if (!is_kprobe_session(prog) && !is_uprobe_session(prog) && !is_trace_fsession(prog)) 3447 return -EACCES; 3448 3449 return 0; 3450 } 3451 3452 static const struct btf_kfunc_id_set bpf_session_kfunc_set = { 3453 .owner = THIS_MODULE, 3454 .set = &session_kfunc_set_ids, 3455 .filter = bpf_session_filter, 3456 }; 3457 3458 static int __init bpf_trace_kfuncs_init(void) 3459 { 3460 int err = 0; 3461 3462 err = err ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_KPROBE, &bpf_session_kfunc_set); 3463 err = err ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &bpf_session_kfunc_set); 3464 3465 return err; 3466 } 3467 3468 late_initcall(bpf_trace_kfuncs_init); 3469 3470 typedef int (*copy_fn_t)(void *dst, const void *src, u32 size, struct task_struct *tsk); 3471 3472 /* 3473 * The __always_inline is to make sure the compiler doesn't 3474 * generate indirect calls into callbacks, which is expensive, 3475 * on some kernel configurations. This allows compiler to put 3476 * direct calls into all the specific callback implementations 3477 * (copy_user_data_sleepable, copy_user_data_nofault, and so on) 3478 */ 3479 static __always_inline int __bpf_dynptr_copy_str(const struct bpf_dynptr *dptr, u64 doff, u64 size, 3480 const void *unsafe_src, 3481 copy_fn_t str_copy_fn, 3482 struct task_struct *tsk) 3483 { 3484 const struct bpf_dynptr_kern *dst; 3485 u64 chunk_sz, off; 3486 void *dst_slice; 3487 int cnt, err; 3488 char buf[256]; 3489 3490 dst_slice = bpf_dynptr_slice_rdwr(dptr, doff, NULL, size); 3491 if (likely(dst_slice)) 3492 return str_copy_fn(dst_slice, unsafe_src, size, tsk); 3493 3494 dst = (struct bpf_dynptr_kern *)dptr; 3495 if (bpf_dynptr_check_off_len(dst, doff, size)) 3496 return -E2BIG; 3497 3498 for (off = 0; off < size; off += chunk_sz - 1) { 3499 chunk_sz = min_t(u64, sizeof(buf), size - off); 3500 /* Expect str_copy_fn to return count of copied bytes, including 3501 * zero terminator. Next iteration increment off by chunk_sz - 1 to 3502 * overwrite NUL. 3503 */ 3504 cnt = str_copy_fn(buf, unsafe_src + off, chunk_sz, tsk); 3505 if (cnt < 0) 3506 return cnt; 3507 err = __bpf_dynptr_write(dst, doff + off, buf, cnt, 0); 3508 if (err) 3509 return err; 3510 if (cnt < chunk_sz || chunk_sz == 1) /* we are done */ 3511 return off + cnt; 3512 } 3513 return off; 3514 } 3515 3516 static __always_inline int __bpf_dynptr_copy(const struct bpf_dynptr *dptr, u64 doff, 3517 u64 size, const void *unsafe_src, 3518 copy_fn_t copy_fn, struct task_struct *tsk) 3519 { 3520 const struct bpf_dynptr_kern *dst; 3521 void *dst_slice; 3522 char buf[256]; 3523 u64 off, chunk_sz; 3524 int err; 3525 3526 dst_slice = bpf_dynptr_slice_rdwr(dptr, doff, NULL, size); 3527 if (likely(dst_slice)) 3528 return copy_fn(dst_slice, unsafe_src, size, tsk); 3529 3530 dst = (struct bpf_dynptr_kern *)dptr; 3531 if (bpf_dynptr_check_off_len(dst, doff, size)) 3532 return -E2BIG; 3533 3534 for (off = 0; off < size; off += chunk_sz) { 3535 chunk_sz = min_t(u64, sizeof(buf), size - off); 3536 err = copy_fn(buf, unsafe_src + off, chunk_sz, tsk); 3537 if (err) 3538 return err; 3539 err = __bpf_dynptr_write(dst, doff + off, buf, chunk_sz, 0); 3540 if (err) 3541 return err; 3542 } 3543 return 0; 3544 } 3545 3546 static __always_inline int copy_user_data_nofault(void *dst, const void *unsafe_src, 3547 u32 size, struct task_struct *tsk) 3548 { 3549 return copy_from_user_nofault(dst, (const void __user *)unsafe_src, size); 3550 } 3551 3552 static __always_inline int copy_user_data_sleepable(void *dst, const void *unsafe_src, 3553 u32 size, struct task_struct *tsk) 3554 { 3555 int ret; 3556 3557 if (!tsk) { /* Read from the current task */ 3558 ret = copy_from_user(dst, (const void __user *)unsafe_src, size); 3559 if (ret) 3560 return -EFAULT; 3561 return 0; 3562 } 3563 3564 ret = access_process_vm(tsk, (unsigned long)unsafe_src, dst, size, 0); 3565 if (ret != size) 3566 return -EFAULT; 3567 return 0; 3568 } 3569 3570 static __always_inline int copy_kernel_data_nofault(void *dst, const void *unsafe_src, 3571 u32 size, struct task_struct *tsk) 3572 { 3573 return copy_from_kernel_nofault(dst, unsafe_src, size); 3574 } 3575 3576 static __always_inline int copy_user_str_nofault(void *dst, const void *unsafe_src, 3577 u32 size, struct task_struct *tsk) 3578 { 3579 return strncpy_from_user_nofault(dst, (const void __user *)unsafe_src, size); 3580 } 3581 3582 static __always_inline int copy_user_str_sleepable(void *dst, const void *unsafe_src, 3583 u32 size, struct task_struct *tsk) 3584 { 3585 int ret; 3586 3587 if (unlikely(size == 0)) 3588 return 0; 3589 3590 if (tsk) { 3591 ret = copy_remote_vm_str(tsk, (unsigned long)unsafe_src, dst, size, 0); 3592 } else { 3593 ret = strncpy_from_user(dst, (const void __user *)unsafe_src, size - 1); 3594 /* strncpy_from_user does not guarantee NUL termination */ 3595 if (ret >= 0) 3596 ((char *)dst)[ret] = '\0'; 3597 } 3598 3599 if (ret < 0) 3600 return ret; 3601 return ret + 1; 3602 } 3603 3604 static __always_inline int copy_kernel_str_nofault(void *dst, const void *unsafe_src, 3605 u32 size, struct task_struct *tsk) 3606 { 3607 return strncpy_from_kernel_nofault(dst, unsafe_src, size); 3608 } 3609 3610 __bpf_kfunc_start_defs(); 3611 3612 __bpf_kfunc int bpf_send_signal_task(struct task_struct *task, int sig, enum pid_type type, 3613 u64 value) 3614 { 3615 if (type != PIDTYPE_PID && type != PIDTYPE_TGID) 3616 return -EINVAL; 3617 3618 return bpf_send_signal_common(sig, type, task, value); 3619 } 3620 3621 __bpf_kfunc int bpf_probe_read_user_dynptr(const struct bpf_dynptr *dptr, u64 off, 3622 u64 size, const void __user *unsafe_ptr__ign) 3623 { 3624 return __bpf_dynptr_copy(dptr, off, size, (const void __force *)unsafe_ptr__ign, 3625 copy_user_data_nofault, NULL); 3626 } 3627 3628 __bpf_kfunc int bpf_probe_read_kernel_dynptr(const struct bpf_dynptr *dptr, u64 off, 3629 u64 size, const void *unsafe_ptr__ign) 3630 { 3631 return __bpf_dynptr_copy(dptr, off, size, unsafe_ptr__ign, 3632 copy_kernel_data_nofault, NULL); 3633 } 3634 3635 __bpf_kfunc int bpf_probe_read_user_str_dynptr(const struct bpf_dynptr *dptr, u64 off, 3636 u64 size, const void __user *unsafe_ptr__ign) 3637 { 3638 return __bpf_dynptr_copy_str(dptr, off, size, (const void __force *)unsafe_ptr__ign, 3639 copy_user_str_nofault, NULL); 3640 } 3641 3642 __bpf_kfunc int bpf_probe_read_kernel_str_dynptr(const struct bpf_dynptr *dptr, u64 off, 3643 u64 size, const void *unsafe_ptr__ign) 3644 { 3645 return __bpf_dynptr_copy_str(dptr, off, size, unsafe_ptr__ign, 3646 copy_kernel_str_nofault, NULL); 3647 } 3648 3649 __bpf_kfunc int bpf_copy_from_user_dynptr(const struct bpf_dynptr *dptr, u64 off, 3650 u64 size, const void __user *unsafe_ptr__ign) 3651 { 3652 return __bpf_dynptr_copy(dptr, off, size, (const void __force *)unsafe_ptr__ign, 3653 copy_user_data_sleepable, NULL); 3654 } 3655 3656 __bpf_kfunc int bpf_copy_from_user_str_dynptr(const struct bpf_dynptr *dptr, u64 off, 3657 u64 size, const void __user *unsafe_ptr__ign) 3658 { 3659 return __bpf_dynptr_copy_str(dptr, off, size, (const void __force *)unsafe_ptr__ign, 3660 copy_user_str_sleepable, NULL); 3661 } 3662 3663 __bpf_kfunc int bpf_copy_from_user_task_dynptr(const struct bpf_dynptr *dptr, u64 off, 3664 u64 size, const void __user *unsafe_ptr__ign, 3665 struct task_struct *tsk) 3666 { 3667 return __bpf_dynptr_copy(dptr, off, size, (const void __force *)unsafe_ptr__ign, 3668 copy_user_data_sleepable, tsk); 3669 } 3670 3671 __bpf_kfunc int bpf_copy_from_user_task_str_dynptr(const struct bpf_dynptr *dptr, u64 off, 3672 u64 size, const void __user *unsafe_ptr__ign, 3673 struct task_struct *tsk) 3674 { 3675 return __bpf_dynptr_copy_str(dptr, off, size, (const void __force *)unsafe_ptr__ign, 3676 copy_user_str_sleepable, tsk); 3677 } 3678 3679 __bpf_kfunc_end_defs(); 3680 3681 #if defined(CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS) && \ 3682 defined(CONFIG_HAVE_SINGLE_FTRACE_DIRECT_OPS) 3683 3684 static void bpf_tracing_multi_link_release(struct bpf_link *link) 3685 { 3686 struct bpf_tracing_multi_link *tr_link = 3687 container_of(link, struct bpf_tracing_multi_link, link); 3688 3689 WARN_ON_ONCE(bpf_trampoline_multi_detach(link->prog, tr_link)); 3690 } 3691 3692 static void bpf_tracing_multi_link_dealloc(struct bpf_link *link) 3693 { 3694 struct bpf_tracing_multi_link *tr_link = 3695 container_of(link, struct bpf_tracing_multi_link, link); 3696 3697 kvfree(tr_link->fexits); 3698 kvfree(tr_link->cookies); 3699 kvfree(tr_link); 3700 } 3701 3702 #ifdef CONFIG_PROC_FS 3703 static void bpf_tracing_multi_show_fdinfo(const struct bpf_link *link, 3704 struct seq_file *seq) 3705 { 3706 struct bpf_tracing_multi_link *tr_link = 3707 container_of(link, struct bpf_tracing_multi_link, link); 3708 bool has_cookies = !!tr_link->cookies; 3709 3710 seq_printf(seq, "attach_type:\t%u\n", tr_link->link.attach_type); 3711 seq_printf(seq, "cnt:\t%u\n", tr_link->nodes_cnt); 3712 3713 seq_printf(seq, "%s\t %s\t %s\t %s\n", "obj-id", "btf-id", "cookie", "func"); 3714 for (int i = 0; i < tr_link->nodes_cnt; i++) { 3715 struct bpf_tracing_multi_node *mnode = &tr_link->nodes[i]; 3716 u32 btf_id, obj_id; 3717 3718 bpf_trampoline_unpack_key(mnode->trampoline->key, &obj_id, &btf_id); 3719 seq_printf(seq, "%u\t %u\t %llu\t %pS\n", 3720 obj_id, btf_id, 3721 has_cookies ? tr_link->cookies[i] : 0, 3722 (void *) mnode->trampoline->ip); 3723 3724 cond_resched(); 3725 } 3726 } 3727 #endif 3728 3729 static const struct bpf_link_ops bpf_tracing_multi_link_lops = { 3730 .release = bpf_tracing_multi_link_release, 3731 .dealloc_deferred = bpf_tracing_multi_link_dealloc, 3732 #ifdef CONFIG_PROC_FS 3733 .show_fdinfo = bpf_tracing_multi_show_fdinfo, 3734 #endif 3735 }; 3736 3737 static int ids_cmp_r(const void *pa, const void *pb, const void *priv __maybe_unused) 3738 { 3739 u32 a = *(u32 *) pa; 3740 u32 b = *(u32 *) pb; 3741 3742 return (a > b) - (a < b); 3743 } 3744 3745 static void ids_swap_r(void *a, void *b, int size __maybe_unused, 3746 const void *priv __maybe_unused) 3747 { 3748 u64 *cookie_a, *cookie_b, *cookies; 3749 u32 *id_a = a, *id_b = b, *ids; 3750 void **data = (void **) priv; 3751 3752 ids = data[0]; 3753 cookies = data[1]; 3754 3755 if (cookies) { 3756 cookie_a = cookies + (id_a - ids); 3757 cookie_b = cookies + (id_b - ids); 3758 swap(*cookie_a, *cookie_b); 3759 } 3760 swap(*id_a, *id_b); 3761 } 3762 3763 static int check_dup_ids(u32 *ids, u64 *cookies, u32 cnt) 3764 { 3765 void *data[2] = { ids, cookies }; 3766 int err = 0; 3767 3768 /* 3769 * Sort ids array (together with cookies array if defined) 3770 * and check it for duplicates. The ids and cookies arrays 3771 * are left sorted. 3772 */ 3773 sort_r_nonatomic(ids, cnt, sizeof(ids[0]), ids_cmp_r, ids_swap_r, data); 3774 3775 for (int i = 1; i < cnt; i++) { 3776 if (ids[i] == ids[i - 1]) { 3777 err = -EINVAL; 3778 break; 3779 } 3780 } 3781 return err; 3782 } 3783 3784 int bpf_tracing_multi_attach(struct bpf_prog *prog, const union bpf_attr *attr) 3785 { 3786 struct bpf_tracing_multi_link *link = NULL; 3787 struct bpf_tramp_node *fexits = NULL; 3788 struct bpf_link_primer link_primer; 3789 u32 cnt, *ids = NULL; 3790 u64 __user *ucookies; 3791 u64 *cookies = NULL; 3792 u32 __user *uids; 3793 int err; 3794 3795 uids = u64_to_user_ptr(attr->link_create.tracing_multi.ids); 3796 cnt = attr->link_create.tracing_multi.cnt; 3797 3798 if (!cnt || !uids) 3799 return -EINVAL; 3800 if (cnt > MAX_TRACING_MULTI_CNT) 3801 return -E2BIG; 3802 if (attr->link_create.flags || attr->link_create.target_fd) 3803 return -EINVAL; 3804 3805 ids = kvmalloc_objs(*ids, cnt); 3806 if (!ids) 3807 return -ENOMEM; 3808 3809 if (copy_from_user(ids, uids, cnt * sizeof(*ids))) { 3810 err = -EFAULT; 3811 goto error; 3812 } 3813 3814 ucookies = u64_to_user_ptr(attr->link_create.tracing_multi.cookies); 3815 if (ucookies) { 3816 cookies = kvmalloc_objs(*cookies, cnt); 3817 if (!cookies) { 3818 err = -ENOMEM; 3819 goto error; 3820 } 3821 if (copy_from_user(cookies, ucookies, cnt * sizeof(*cookies))) { 3822 err = -EFAULT; 3823 goto error; 3824 } 3825 } 3826 3827 err = check_dup_ids(ids, cookies, cnt); 3828 if (err) 3829 goto error; 3830 3831 if (prog->expected_attach_type == BPF_TRACE_FSESSION_MULTI) { 3832 fexits = kvmalloc_objs(*fexits, cnt); 3833 if (!fexits) { 3834 err = -ENOMEM; 3835 goto error; 3836 } 3837 } 3838 3839 link = kvzalloc_flex(*link, nodes, cnt); 3840 if (!link) { 3841 err = -ENOMEM; 3842 goto error; 3843 } 3844 3845 bpf_link_init(&link->link, BPF_LINK_TYPE_TRACING_MULTI, 3846 &bpf_tracing_multi_link_lops, prog, prog->expected_attach_type); 3847 3848 err = bpf_link_prime(&link->link, &link_primer); 3849 if (err) 3850 goto error; 3851 3852 link->nodes_cnt = cnt; 3853 link->cookies = cookies; 3854 link->fexits = fexits; 3855 3856 err = bpf_trampoline_multi_attach(prog, ids, link); 3857 kvfree(ids); 3858 if (err) { 3859 bpf_link_cleanup(&link_primer); 3860 return err; 3861 } 3862 return bpf_link_settle(&link_primer); 3863 3864 error: 3865 kvfree(fexits); 3866 kvfree(cookies); 3867 kvfree(ids); 3868 kvfree(link); 3869 return err; 3870 } 3871 3872 #else 3873 3874 int bpf_tracing_multi_attach(struct bpf_prog *prog, const union bpf_attr *attr) 3875 { 3876 return -EOPNOTSUPP; 3877 } 3878 3879 #endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS && CONFIG_HAVE_SINGLE_FTRACE_DIRECT_OPS */ 3880