1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright (c) 2011-2015 PLUMgrid, http://plumgrid.com 3 * Copyright (c) 2016 Facebook 4 */ 5 #include <linux/kernel.h> 6 #include <linux/types.h> 7 #include <linux/slab.h> 8 #include <linux/bpf.h> 9 #include <linux/bpf_verifier.h> 10 #include <linux/bpf_perf_event.h> 11 #include <linux/btf.h> 12 #include <linux/filter.h> 13 #include <linux/uaccess.h> 14 #include <linux/ctype.h> 15 #include <linux/kprobes.h> 16 #include <linux/spinlock.h> 17 #include <linux/syscalls.h> 18 #include <linux/error-injection.h> 19 #include <linux/btf_ids.h> 20 #include <linux/bpf_lsm.h> 21 #include <linux/fprobe.h> 22 #include <linux/bsearch.h> 23 #include <linux/sort.h> 24 #include <linux/key.h> 25 #include <linux/namei.h> 26 27 #include <net/bpf_sk_storage.h> 28 29 #include <uapi/linux/bpf.h> 30 #include <uapi/linux/btf.h> 31 32 #include <asm/tlb.h> 33 34 #include "trace_probe.h" 35 #include "trace.h" 36 37 #define CREATE_TRACE_POINTS 38 #include "bpf_trace.h" 39 40 #define bpf_event_rcu_dereference(p) \ 41 rcu_dereference_protected(p, lockdep_is_held(&bpf_event_mutex)) 42 43 #define MAX_UPROBE_MULTI_CNT (1U << 20) 44 #define MAX_KPROBE_MULTI_CNT (1U << 20) 45 #define MAX_TRACING_MULTI_CNT (1U << 20) 46 47 #ifdef CONFIG_MODULES 48 struct bpf_trace_module { 49 struct module *module; 50 struct list_head list; 51 }; 52 53 static LIST_HEAD(bpf_trace_modules); 54 static DEFINE_MUTEX(bpf_module_mutex); 55 56 static struct bpf_raw_event_map *bpf_get_raw_tracepoint_module(const char *name) 57 { 58 struct bpf_raw_event_map *btp, *ret = NULL; 59 struct bpf_trace_module *btm; 60 unsigned int i; 61 62 mutex_lock(&bpf_module_mutex); 63 list_for_each_entry(btm, &bpf_trace_modules, list) { 64 for (i = 0; i < btm->module->num_bpf_raw_events; ++i) { 65 btp = &btm->module->bpf_raw_events[i]; 66 if (!strcmp(btp->tp->name, name)) { 67 if (try_module_get(btm->module)) 68 ret = btp; 69 goto out; 70 } 71 } 72 } 73 out: 74 mutex_unlock(&bpf_module_mutex); 75 return ret; 76 } 77 #else 78 static struct bpf_raw_event_map *bpf_get_raw_tracepoint_module(const char *name) 79 { 80 return NULL; 81 } 82 #endif /* CONFIG_MODULES */ 83 84 u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); 85 u64 bpf_get_stack(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); 86 87 static int bpf_btf_printf_prepare(struct btf_ptr *ptr, u32 btf_ptr_size, 88 u64 flags, const struct btf **btf, 89 s32 *btf_id); 90 static u64 bpf_kprobe_multi_cookie(struct bpf_run_ctx *ctx); 91 static u64 bpf_kprobe_multi_entry_ip(struct bpf_run_ctx *ctx); 92 93 static u64 bpf_uprobe_multi_cookie(struct bpf_run_ctx *ctx); 94 static u64 bpf_uprobe_multi_entry_ip(struct bpf_run_ctx *ctx); 95 96 /** 97 * trace_call_bpf - invoke BPF program 98 * @call: tracepoint event 99 * @ctx: opaque context pointer 100 * 101 * kprobe handlers execute BPF programs via this helper. 102 * Can be used from static tracepoints in the future. 103 * 104 * Return: BPF programs always return an integer which is interpreted by 105 * kprobe handler as: 106 * 0 - return from kprobe (event is filtered out) 107 * 1 - store kprobe event into ring buffer 108 * Other values are reserved and currently alias to 1 109 */ 110 unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx) 111 { 112 unsigned int ret; 113 114 cant_sleep(); 115 116 if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) { 117 /* 118 * since some bpf program is already running on this cpu, 119 * don't call into another bpf program (same or different) 120 * and don't send kprobe event into ring-buffer, 121 * so return zero here 122 */ 123 rcu_read_lock(); 124 bpf_prog_inc_misses_counters(rcu_dereference(call->prog_array)); 125 rcu_read_unlock(); 126 ret = 0; 127 goto out; 128 } 129 130 /* 131 * Instead of moving rcu_read_lock/rcu_dereference/rcu_read_unlock 132 * to all call sites, we did a bpf_prog_array_valid() there to check 133 * whether call->prog_array is empty or not, which is 134 * a heuristic to speed up execution. 135 * 136 * If bpf_prog_array_valid() fetched prog_array was 137 * non-NULL, we go into trace_call_bpf() and do the actual 138 * proper rcu_dereference() under RCU lock. 139 * If it turns out that prog_array is NULL then, we bail out. 140 * For the opposite, if the bpf_prog_array_valid() fetched pointer 141 * was NULL, you'll skip the prog_array with the risk of missing 142 * out of events when it was updated in between this and the 143 * rcu_dereference() which is accepted risk. 144 */ 145 rcu_read_lock(); 146 ret = bpf_prog_run_array(rcu_dereference(call->prog_array), 147 ctx, bpf_prog_run); 148 rcu_read_unlock(); 149 150 out: 151 __this_cpu_dec(bpf_prog_active); 152 153 return ret; 154 } 155 156 /** 157 * trace_call_bpf_faultable - invoke BPF program in faultable context 158 * @call: tracepoint event 159 * @ctx: opaque context pointer 160 * 161 * Variant of trace_call_bpf() for faultable tracepoints (syscall 162 * tracepoints). Supports sleepable BPF programs by using rcu_tasks_trace 163 * for lifetime protection and bpf_prog_run_array_sleepable() for per-program 164 * RCU flavor selection, following the uprobe pattern. 165 * 166 * Per-program recursion protection is provided by 167 * bpf_prog_run_array_sleepable(). Global bpf_prog_active is not 168 * needed because syscall tracepoints cannot self-recurse. 169 * 170 * Must be called from a faultable/preemptible context. 171 */ 172 unsigned int trace_call_bpf_faultable(struct trace_event_call *call, void *ctx) 173 { 174 struct bpf_prog_array *prog_array; 175 176 might_fault(); 177 guard(rcu_tasks_trace)(); 178 179 prog_array = rcu_dereference_check(call->prog_array, 180 rcu_read_lock_trace_held()); 181 return bpf_prog_run_array_sleepable(prog_array, ctx, bpf_prog_run); 182 } 183 184 #ifdef CONFIG_BPF_KPROBE_OVERRIDE 185 BPF_CALL_2(bpf_override_return, struct pt_regs *, regs, unsigned long, rc) 186 { 187 regs_set_return_value(regs, rc); 188 override_function_with_return(regs); 189 return 0; 190 } 191 192 static const struct bpf_func_proto bpf_override_return_proto = { 193 .func = bpf_override_return, 194 .gpl_only = true, 195 .ret_type = RET_INTEGER, 196 .arg1_type = ARG_PTR_TO_CTX, 197 .arg2_type = ARG_ANYTHING, 198 }; 199 #endif 200 201 static __always_inline int 202 bpf_probe_read_user_common(void *dst, u32 size, const void __user *unsafe_ptr) 203 { 204 int ret; 205 206 ret = copy_from_user_nofault(dst, unsafe_ptr, size); 207 if (unlikely(ret < 0)) 208 memset(dst, 0, size); 209 return ret; 210 } 211 212 BPF_CALL_3(bpf_probe_read_user, void *, dst, u32, size, 213 const void __user *, unsafe_ptr) 214 { 215 return bpf_probe_read_user_common(dst, size, unsafe_ptr); 216 } 217 218 const struct bpf_func_proto bpf_probe_read_user_proto = { 219 .func = bpf_probe_read_user, 220 .gpl_only = true, 221 .ret_type = RET_INTEGER, 222 .arg1_type = ARG_PTR_TO_UNINIT_MEM, 223 .arg2_type = ARG_CONST_SIZE_OR_ZERO, 224 .arg3_type = ARG_ANYTHING, 225 }; 226 227 static __always_inline int 228 bpf_probe_read_user_str_common(void *dst, u32 size, 229 const void __user *unsafe_ptr) 230 { 231 int ret; 232 233 /* 234 * NB: We rely on strncpy_from_user() not copying junk past the NUL 235 * terminator into `dst`. 236 * 237 * strncpy_from_user() does long-sized strides in the fast path. If the 238 * strncpy does not mask out the bytes after the NUL in `unsafe_ptr`, 239 * then there could be junk after the NUL in `dst`. If user takes `dst` 240 * and keys a hash map with it, then semantically identical strings can 241 * occupy multiple entries in the map. 242 */ 243 ret = strncpy_from_user_nofault(dst, unsafe_ptr, size); 244 if (unlikely(ret < 0)) 245 memset(dst, 0, size); 246 return ret; 247 } 248 249 BPF_CALL_3(bpf_probe_read_user_str, void *, dst, u32, size, 250 const void __user *, unsafe_ptr) 251 { 252 return bpf_probe_read_user_str_common(dst, size, unsafe_ptr); 253 } 254 255 const struct bpf_func_proto bpf_probe_read_user_str_proto = { 256 .func = bpf_probe_read_user_str, 257 .gpl_only = true, 258 .ret_type = RET_INTEGER, 259 .arg1_type = ARG_PTR_TO_UNINIT_MEM, 260 .arg2_type = ARG_CONST_SIZE_OR_ZERO, 261 .arg3_type = ARG_ANYTHING, 262 }; 263 264 BPF_CALL_3(bpf_probe_read_kernel, void *, dst, u32, size, 265 const void *, unsafe_ptr) 266 { 267 return bpf_probe_read_kernel_common(dst, size, unsafe_ptr); 268 } 269 270 const struct bpf_func_proto bpf_probe_read_kernel_proto = { 271 .func = bpf_probe_read_kernel, 272 .gpl_only = true, 273 .ret_type = RET_INTEGER, 274 .arg1_type = ARG_PTR_TO_UNINIT_MEM, 275 .arg2_type = ARG_CONST_SIZE_OR_ZERO, 276 .arg3_type = ARG_ANYTHING, 277 }; 278 279 static __always_inline int 280 bpf_probe_read_kernel_str_common(void *dst, u32 size, const void *unsafe_ptr) 281 { 282 int ret; 283 284 /* 285 * The strncpy_from_kernel_nofault() call will likely not fill the 286 * entire buffer, but that's okay in this circumstance as we're probing 287 * arbitrary memory anyway similar to bpf_probe_read_*() and might 288 * as well probe the stack. Thus, memory is explicitly cleared 289 * only in error case, so that improper users ignoring return 290 * code altogether don't copy garbage; otherwise length of string 291 * is returned that can be used for bpf_perf_event_output() et al. 292 */ 293 ret = strncpy_from_kernel_nofault(dst, unsafe_ptr, size); 294 if (unlikely(ret < 0)) 295 memset(dst, 0, size); 296 return ret; 297 } 298 299 BPF_CALL_3(bpf_probe_read_kernel_str, void *, dst, u32, size, 300 const void *, unsafe_ptr) 301 { 302 return bpf_probe_read_kernel_str_common(dst, size, unsafe_ptr); 303 } 304 305 const struct bpf_func_proto bpf_probe_read_kernel_str_proto = { 306 .func = bpf_probe_read_kernel_str, 307 .gpl_only = true, 308 .ret_type = RET_INTEGER, 309 .arg1_type = ARG_PTR_TO_UNINIT_MEM, 310 .arg2_type = ARG_CONST_SIZE_OR_ZERO, 311 .arg3_type = ARG_ANYTHING, 312 }; 313 314 #ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE 315 BPF_CALL_3(bpf_probe_read_compat, void *, dst, u32, size, 316 const void *, unsafe_ptr) 317 { 318 if ((unsigned long)unsafe_ptr < TASK_SIZE) { 319 return bpf_probe_read_user_common(dst, size, 320 (__force void __user *)unsafe_ptr); 321 } 322 return bpf_probe_read_kernel_common(dst, size, unsafe_ptr); 323 } 324 325 static const struct bpf_func_proto bpf_probe_read_compat_proto = { 326 .func = bpf_probe_read_compat, 327 .gpl_only = true, 328 .ret_type = RET_INTEGER, 329 .arg1_type = ARG_PTR_TO_UNINIT_MEM, 330 .arg2_type = ARG_CONST_SIZE_OR_ZERO, 331 .arg3_type = ARG_ANYTHING, 332 }; 333 334 BPF_CALL_3(bpf_probe_read_compat_str, void *, dst, u32, size, 335 const void *, unsafe_ptr) 336 { 337 if ((unsigned long)unsafe_ptr < TASK_SIZE) { 338 return bpf_probe_read_user_str_common(dst, size, 339 (__force void __user *)unsafe_ptr); 340 } 341 return bpf_probe_read_kernel_str_common(dst, size, unsafe_ptr); 342 } 343 344 static const struct bpf_func_proto bpf_probe_read_compat_str_proto = { 345 .func = bpf_probe_read_compat_str, 346 .gpl_only = true, 347 .ret_type = RET_INTEGER, 348 .arg1_type = ARG_PTR_TO_UNINIT_MEM, 349 .arg2_type = ARG_CONST_SIZE_OR_ZERO, 350 .arg3_type = ARG_ANYTHING, 351 }; 352 #endif /* CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE */ 353 354 BPF_CALL_3(bpf_probe_write_user, void __user *, unsafe_ptr, const void *, src, 355 u32, size) 356 { 357 /* 358 * Ensure we're in user context which is safe for the helper to 359 * run. This helper has no business in a kthread. 360 * 361 * access_ok() should prevent writing to non-user memory, but in 362 * some situations (nommu, temporary switch, etc) access_ok() does 363 * not provide enough validation, hence the check on KERNEL_DS. 364 * 365 * nmi_uaccess_okay() ensures the probe is not run in an interim 366 * state, when the task or mm are switched. This is specifically 367 * required to prevent the use of temporary mm. 368 */ 369 370 if (unlikely(in_interrupt() || 371 current->flags & (PF_KTHREAD | PF_EXITING))) 372 return -EPERM; 373 if (unlikely(!nmi_uaccess_okay())) 374 return -EPERM; 375 376 return copy_to_user_nofault(unsafe_ptr, src, size); 377 } 378 379 static const struct bpf_func_proto bpf_probe_write_user_proto = { 380 .func = bpf_probe_write_user, 381 .gpl_only = true, 382 .ret_type = RET_INTEGER, 383 .arg1_type = ARG_ANYTHING, 384 .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, 385 .arg3_type = ARG_CONST_SIZE, 386 }; 387 388 #define MAX_TRACE_PRINTK_VARARGS 3 389 #define BPF_TRACE_PRINTK_SIZE 1024 390 391 BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1, 392 u64, arg2, u64, arg3) 393 { 394 u64 args[MAX_TRACE_PRINTK_VARARGS] = { arg1, arg2, arg3 }; 395 struct bpf_bprintf_data data = { 396 .get_bin_args = true, 397 .get_buf = true, 398 }; 399 int ret; 400 401 ret = bpf_bprintf_prepare(fmt, fmt_size, args, 402 MAX_TRACE_PRINTK_VARARGS, &data); 403 if (ret < 0) 404 return ret; 405 406 ret = bstr_printf(data.buf, MAX_BPRINTF_BUF, fmt, data.bin_args); 407 408 trace_bpf_trace_printk(data.buf); 409 410 bpf_bprintf_cleanup(&data); 411 412 return ret; 413 } 414 415 static const struct bpf_func_proto bpf_trace_printk_proto = { 416 .func = bpf_trace_printk, 417 .gpl_only = true, 418 .ret_type = RET_INTEGER, 419 .arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY, 420 .arg2_type = ARG_CONST_SIZE, 421 }; 422 423 static void __set_printk_clr_event(struct work_struct *work) 424 { 425 /* 426 * This program might be calling bpf_trace_printk, 427 * so enable the associated bpf_trace/bpf_trace_printk event. 428 * Repeat this each time as it is possible a user has 429 * disabled bpf_trace_printk events. By loading a program 430 * calling bpf_trace_printk() however the user has expressed 431 * the intent to see such events. 432 */ 433 if (trace_set_clr_event("bpf_trace", "bpf_trace_printk", 1)) 434 pr_warn_ratelimited("could not enable bpf_trace_printk events"); 435 } 436 static DECLARE_WORK(set_printk_work, __set_printk_clr_event); 437 438 const struct bpf_func_proto *bpf_get_trace_printk_proto(void) 439 { 440 schedule_work(&set_printk_work); 441 return &bpf_trace_printk_proto; 442 } 443 444 BPF_CALL_4(bpf_trace_vprintk, char *, fmt, u32, fmt_size, const void *, args, 445 u32, data_len) 446 { 447 struct bpf_bprintf_data data = { 448 .get_bin_args = true, 449 .get_buf = true, 450 }; 451 int ret, num_args; 452 453 if (data_len & 7 || data_len > MAX_BPRINTF_VARARGS * 8 || 454 (data_len && !args)) 455 return -EINVAL; 456 num_args = data_len / 8; 457 458 ret = bpf_bprintf_prepare(fmt, fmt_size, args, num_args, &data); 459 if (ret < 0) 460 return ret; 461 462 ret = bstr_printf(data.buf, MAX_BPRINTF_BUF, fmt, data.bin_args); 463 464 trace_bpf_trace_printk(data.buf); 465 466 bpf_bprintf_cleanup(&data); 467 468 return ret; 469 } 470 471 static const struct bpf_func_proto bpf_trace_vprintk_proto = { 472 .func = bpf_trace_vprintk, 473 .gpl_only = true, 474 .ret_type = RET_INTEGER, 475 .arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY, 476 .arg2_type = ARG_CONST_SIZE, 477 .arg3_type = ARG_PTR_TO_MEM | PTR_MAYBE_NULL | MEM_RDONLY, 478 .arg4_type = ARG_CONST_SIZE_OR_ZERO, 479 }; 480 481 const struct bpf_func_proto *bpf_get_trace_vprintk_proto(void) 482 { 483 schedule_work(&set_printk_work); 484 return &bpf_trace_vprintk_proto; 485 } 486 487 BPF_CALL_5(bpf_seq_printf, struct seq_file *, m, char *, fmt, u32, fmt_size, 488 const void *, args, u32, data_len) 489 { 490 struct bpf_bprintf_data data = { 491 .get_bin_args = true, 492 }; 493 int err, num_args; 494 495 if (data_len & 7 || data_len > MAX_BPRINTF_VARARGS * 8 || 496 (data_len && !args)) 497 return -EINVAL; 498 num_args = data_len / 8; 499 500 err = bpf_bprintf_prepare(fmt, fmt_size, args, num_args, &data); 501 if (err < 0) 502 return err; 503 504 seq_bprintf(m, fmt, data.bin_args); 505 506 bpf_bprintf_cleanup(&data); 507 508 return seq_has_overflowed(m) ? -EOVERFLOW : 0; 509 } 510 511 BTF_ID_LIST_SINGLE(btf_seq_file_ids, struct, seq_file) 512 513 static const struct bpf_func_proto bpf_seq_printf_proto = { 514 .func = bpf_seq_printf, 515 .gpl_only = true, 516 .ret_type = RET_INTEGER, 517 .arg1_type = ARG_PTR_TO_BTF_ID, 518 .arg1_btf_id = &btf_seq_file_ids[0], 519 .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, 520 .arg3_type = ARG_CONST_SIZE, 521 .arg4_type = ARG_PTR_TO_MEM | PTR_MAYBE_NULL | MEM_RDONLY, 522 .arg5_type = ARG_CONST_SIZE_OR_ZERO, 523 }; 524 525 BPF_CALL_3(bpf_seq_write, struct seq_file *, m, const void *, data, u32, len) 526 { 527 return seq_write(m, data, len) ? -EOVERFLOW : 0; 528 } 529 530 static const struct bpf_func_proto bpf_seq_write_proto = { 531 .func = bpf_seq_write, 532 .gpl_only = true, 533 .ret_type = RET_INTEGER, 534 .arg1_type = ARG_PTR_TO_BTF_ID, 535 .arg1_btf_id = &btf_seq_file_ids[0], 536 .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, 537 .arg3_type = ARG_CONST_SIZE_OR_ZERO, 538 }; 539 540 BPF_CALL_4(bpf_seq_printf_btf, struct seq_file *, m, struct btf_ptr *, ptr, 541 u32, btf_ptr_size, u64, flags) 542 { 543 const struct btf *btf; 544 s32 btf_id; 545 int ret; 546 547 ret = bpf_btf_printf_prepare(ptr, btf_ptr_size, flags, &btf, &btf_id); 548 if (ret) 549 return ret; 550 551 return btf_type_seq_show_flags(btf, btf_id, ptr->ptr, m, flags); 552 } 553 554 static const struct bpf_func_proto bpf_seq_printf_btf_proto = { 555 .func = bpf_seq_printf_btf, 556 .gpl_only = true, 557 .ret_type = RET_INTEGER, 558 .arg1_type = ARG_PTR_TO_BTF_ID, 559 .arg1_btf_id = &btf_seq_file_ids[0], 560 .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, 561 .arg3_type = ARG_CONST_SIZE_OR_ZERO, 562 .arg4_type = ARG_ANYTHING, 563 }; 564 565 static __always_inline int 566 get_map_perf_counter(struct bpf_map *map, u64 flags, 567 u64 *value, u64 *enabled, u64 *running) 568 { 569 struct bpf_array *array = container_of(map, struct bpf_array, map); 570 unsigned int cpu = smp_processor_id(); 571 u64 index = flags & BPF_F_INDEX_MASK; 572 struct bpf_event_entry *ee; 573 574 if (unlikely(flags & ~(BPF_F_INDEX_MASK))) 575 return -EINVAL; 576 if (index == BPF_F_CURRENT_CPU) 577 index = cpu; 578 if (unlikely(index >= array->map.max_entries)) 579 return -E2BIG; 580 581 ee = READ_ONCE(array->ptrs[index]); 582 if (!ee) 583 return -ENOENT; 584 585 return perf_event_read_local(ee->event, value, enabled, running); 586 } 587 588 BPF_CALL_2(bpf_perf_event_read, struct bpf_map *, map, u64, flags) 589 { 590 u64 value = 0; 591 int err; 592 593 err = get_map_perf_counter(map, flags, &value, NULL, NULL); 594 /* 595 * this api is ugly since we miss [-22..-2] range of valid 596 * counter values, but that's uapi 597 */ 598 if (err) 599 return err; 600 return value; 601 } 602 603 const struct bpf_func_proto bpf_perf_event_read_proto = { 604 .func = bpf_perf_event_read, 605 .gpl_only = true, 606 .ret_type = RET_INTEGER, 607 .arg1_type = ARG_CONST_MAP_PTR, 608 .arg2_type = ARG_ANYTHING, 609 }; 610 611 BPF_CALL_4(bpf_perf_event_read_value, struct bpf_map *, map, u64, flags, 612 struct bpf_perf_event_value *, buf, u32, size) 613 { 614 int err = -EINVAL; 615 616 if (unlikely(size != sizeof(struct bpf_perf_event_value))) 617 goto clear; 618 err = get_map_perf_counter(map, flags, &buf->counter, &buf->enabled, 619 &buf->running); 620 if (unlikely(err)) 621 goto clear; 622 return 0; 623 clear: 624 memset(buf, 0, size); 625 return err; 626 } 627 628 static const struct bpf_func_proto bpf_perf_event_read_value_proto = { 629 .func = bpf_perf_event_read_value, 630 .gpl_only = true, 631 .ret_type = RET_INTEGER, 632 .arg1_type = ARG_CONST_MAP_PTR, 633 .arg2_type = ARG_ANYTHING, 634 .arg3_type = ARG_PTR_TO_UNINIT_MEM, 635 .arg4_type = ARG_CONST_SIZE, 636 }; 637 638 const struct bpf_func_proto *bpf_get_perf_event_read_value_proto(void) 639 { 640 return &bpf_perf_event_read_value_proto; 641 } 642 643 static __always_inline u64 644 __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map, 645 u64 flags, struct perf_raw_record *raw, 646 struct perf_sample_data *sd) 647 { 648 struct bpf_array *array = container_of(map, struct bpf_array, map); 649 unsigned int cpu = smp_processor_id(); 650 u64 index = flags & BPF_F_INDEX_MASK; 651 struct bpf_event_entry *ee; 652 struct perf_event *event; 653 654 if (index == BPF_F_CURRENT_CPU) 655 index = cpu; 656 if (unlikely(index >= array->map.max_entries)) 657 return -E2BIG; 658 659 ee = READ_ONCE(array->ptrs[index]); 660 if (!ee) 661 return -ENOENT; 662 663 event = ee->event; 664 if (unlikely(event->attr.type != PERF_TYPE_SOFTWARE || 665 event->attr.config != PERF_COUNT_SW_BPF_OUTPUT)) 666 return -EINVAL; 667 668 if (unlikely(event->oncpu != cpu)) 669 return -EOPNOTSUPP; 670 671 perf_sample_save_raw_data(sd, event, raw); 672 673 return perf_event_output(event, sd, regs); 674 } 675 676 /* 677 * Support executing tracepoints in normal, irq, and nmi context that each call 678 * bpf_perf_event_output 679 */ 680 struct bpf_trace_sample_data { 681 struct perf_sample_data sds[3]; 682 }; 683 684 static DEFINE_PER_CPU(struct bpf_trace_sample_data, bpf_trace_sds); 685 static DEFINE_PER_CPU(int, bpf_trace_nest_level); 686 BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map, 687 u64, flags, void *, data, u64, size) 688 { 689 struct bpf_trace_sample_data *sds; 690 struct perf_raw_record raw = { 691 .frag = { 692 .size = size, 693 .data = data, 694 }, 695 }; 696 struct perf_sample_data *sd; 697 int nest_level, err; 698 699 preempt_disable(); 700 sds = this_cpu_ptr(&bpf_trace_sds); 701 nest_level = this_cpu_inc_return(bpf_trace_nest_level); 702 703 if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(sds->sds))) { 704 err = -EBUSY; 705 goto out; 706 } 707 708 sd = &sds->sds[nest_level - 1]; 709 710 if (unlikely(flags & ~(BPF_F_INDEX_MASK))) { 711 err = -EINVAL; 712 goto out; 713 } 714 715 perf_sample_data_init(sd, 0, 0); 716 717 err = __bpf_perf_event_output(regs, map, flags, &raw, sd); 718 out: 719 this_cpu_dec(bpf_trace_nest_level); 720 preempt_enable(); 721 return err; 722 } 723 724 static const struct bpf_func_proto bpf_perf_event_output_proto = { 725 .func = bpf_perf_event_output, 726 .gpl_only = true, 727 .ret_type = RET_INTEGER, 728 .arg1_type = ARG_PTR_TO_CTX, 729 .arg2_type = ARG_CONST_MAP_PTR, 730 .arg3_type = ARG_ANYTHING, 731 .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY, 732 .arg5_type = ARG_CONST_SIZE_OR_ZERO, 733 }; 734 735 static DEFINE_PER_CPU(int, bpf_event_output_nest_level); 736 struct bpf_nested_pt_regs { 737 struct pt_regs regs[3]; 738 }; 739 static DEFINE_PER_CPU(struct bpf_nested_pt_regs, bpf_pt_regs); 740 static DEFINE_PER_CPU(struct bpf_trace_sample_data, bpf_misc_sds); 741 742 u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, 743 void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy) 744 { 745 struct perf_raw_frag frag = { 746 .copy = ctx_copy, 747 .size = ctx_size, 748 .data = ctx, 749 }; 750 struct perf_raw_record raw = { 751 .frag = { 752 { 753 .next = ctx_size ? &frag : NULL, 754 }, 755 .size = meta_size, 756 .data = meta, 757 }, 758 }; 759 struct perf_sample_data *sd; 760 struct pt_regs *regs; 761 int nest_level; 762 u64 ret; 763 764 preempt_disable(); 765 nest_level = this_cpu_inc_return(bpf_event_output_nest_level); 766 767 if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(bpf_misc_sds.sds))) { 768 ret = -EBUSY; 769 goto out; 770 } 771 sd = this_cpu_ptr(&bpf_misc_sds.sds[nest_level - 1]); 772 regs = this_cpu_ptr(&bpf_pt_regs.regs[nest_level - 1]); 773 774 perf_fetch_caller_regs(regs); 775 perf_sample_data_init(sd, 0, 0); 776 777 ret = __bpf_perf_event_output(regs, map, flags, &raw, sd); 778 out: 779 this_cpu_dec(bpf_event_output_nest_level); 780 preempt_enable(); 781 return ret; 782 } 783 784 BPF_CALL_0(bpf_get_current_task) 785 { 786 return (long) current; 787 } 788 789 const struct bpf_func_proto bpf_get_current_task_proto = { 790 .func = bpf_get_current_task, 791 .gpl_only = true, 792 .ret_type = RET_INTEGER, 793 }; 794 795 BPF_CALL_0(bpf_get_current_task_btf) 796 { 797 return (unsigned long) current; 798 } 799 800 const struct bpf_func_proto bpf_get_current_task_btf_proto = { 801 .func = bpf_get_current_task_btf, 802 .gpl_only = true, 803 .ret_type = RET_PTR_TO_BTF_ID_TRUSTED, 804 .ret_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK], 805 }; 806 807 BPF_CALL_1(bpf_task_pt_regs, struct task_struct *, task) 808 { 809 return (unsigned long) task_pt_regs(task); 810 } 811 812 BTF_ID_LIST_SINGLE(bpf_task_pt_regs_ids, struct, pt_regs) 813 814 const struct bpf_func_proto bpf_task_pt_regs_proto = { 815 .func = bpf_task_pt_regs, 816 .gpl_only = true, 817 .arg1_type = ARG_PTR_TO_BTF_ID, 818 .arg1_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK], 819 .ret_type = RET_PTR_TO_BTF_ID, 820 .ret_btf_id = &bpf_task_pt_regs_ids[0], 821 }; 822 823 struct send_signal_irq_work { 824 struct irq_work irq_work; 825 struct task_struct *task; 826 u32 sig; 827 enum pid_type type; 828 bool has_siginfo; 829 struct kernel_siginfo info; 830 }; 831 832 static DEFINE_PER_CPU(struct send_signal_irq_work, send_signal_work); 833 834 static void do_bpf_send_signal(struct irq_work *entry) 835 { 836 struct send_signal_irq_work *work; 837 struct kernel_siginfo *siginfo; 838 839 work = container_of(entry, struct send_signal_irq_work, irq_work); 840 siginfo = work->has_siginfo ? &work->info : SEND_SIG_PRIV; 841 842 group_send_sig_info(work->sig, siginfo, work->task, work->type); 843 put_task_struct(work->task); 844 } 845 846 static int bpf_send_signal_common(u32 sig, enum pid_type type, struct task_struct *task, u64 value) 847 { 848 struct send_signal_irq_work *work = NULL; 849 struct kernel_siginfo info; 850 struct kernel_siginfo *siginfo; 851 852 if (!task) { 853 task = current; 854 siginfo = SEND_SIG_PRIV; 855 } else { 856 clear_siginfo(&info); 857 info.si_signo = sig; 858 info.si_errno = 0; 859 info.si_code = SI_KERNEL; 860 info.si_pid = 0; 861 info.si_uid = 0; 862 info.si_value.sival_ptr = (void __user __force *)(unsigned long)value; 863 siginfo = &info; 864 } 865 866 /* Similar to bpf_probe_write_user, task needs to be 867 * in a sound condition and kernel memory access be 868 * permitted in order to send signal to the current 869 * task. 870 */ 871 if (unlikely(task->flags & (PF_KTHREAD | PF_EXITING))) 872 return -EPERM; 873 if (unlikely(!nmi_uaccess_okay())) 874 return -EPERM; 875 /* Task should not be pid=1 to avoid kernel panic. */ 876 if (unlikely(is_global_init(task))) 877 return -EPERM; 878 879 if (preempt_count() != 0 || irqs_disabled()) { 880 /* Do an early check on signal validity. Otherwise, 881 * the error is lost in deferred irq_work. 882 */ 883 if (unlikely(!valid_signal(sig))) 884 return -EINVAL; 885 886 work = this_cpu_ptr(&send_signal_work); 887 if (irq_work_is_busy(&work->irq_work)) 888 return -EBUSY; 889 890 /* Add the current task, which is the target of sending signal, 891 * to the irq_work. The current task may change when queued 892 * irq works get executed. 893 */ 894 work->task = get_task_struct(task); 895 work->has_siginfo = siginfo == &info; 896 if (work->has_siginfo) 897 copy_siginfo(&work->info, &info); 898 work->sig = sig; 899 work->type = type; 900 irq_work_queue(&work->irq_work); 901 return 0; 902 } 903 904 return group_send_sig_info(sig, siginfo, task, type); 905 } 906 907 BPF_CALL_1(bpf_send_signal, u32, sig) 908 { 909 return bpf_send_signal_common(sig, PIDTYPE_TGID, NULL, 0); 910 } 911 912 const struct bpf_func_proto bpf_send_signal_proto = { 913 .func = bpf_send_signal, 914 .gpl_only = false, 915 .ret_type = RET_INTEGER, 916 .arg1_type = ARG_ANYTHING, 917 }; 918 919 BPF_CALL_1(bpf_send_signal_thread, u32, sig) 920 { 921 return bpf_send_signal_common(sig, PIDTYPE_PID, NULL, 0); 922 } 923 924 const struct bpf_func_proto bpf_send_signal_thread_proto = { 925 .func = bpf_send_signal_thread, 926 .gpl_only = false, 927 .ret_type = RET_INTEGER, 928 .arg1_type = ARG_ANYTHING, 929 }; 930 931 BPF_CALL_3(bpf_d_path, const struct path *, path, char *, buf, u32, sz) 932 { 933 struct path copy; 934 long len; 935 char *p; 936 937 if (!sz) 938 return 0; 939 940 /* 941 * The path pointer is verified as trusted and safe to use, 942 * but let's double check it's valid anyway to workaround 943 * potentially broken verifier. 944 */ 945 len = copy_from_kernel_nofault(©, path, sizeof(*path)); 946 if (len < 0) 947 return len; 948 949 p = d_path(©, buf, sz); 950 if (IS_ERR(p)) { 951 len = PTR_ERR(p); 952 } else { 953 len = buf + sz - p; 954 memmove(buf, p, len); 955 } 956 957 return len; 958 } 959 960 BTF_SET_START(btf_allowlist_d_path) 961 #ifdef CONFIG_SECURITY 962 BTF_ID(func, security_file_permission) 963 BTF_ID(func, security_inode_getattr) 964 BTF_ID(func, security_file_open) 965 #endif 966 #ifdef CONFIG_SECURITY_PATH 967 BTF_ID(func, security_path_truncate) 968 #endif 969 BTF_ID(func, vfs_truncate) 970 BTF_ID(func, vfs_fallocate) 971 BTF_ID(func, dentry_open) 972 BTF_ID(func, vfs_getattr) 973 BTF_ID(func, filp_close) 974 BTF_SET_END(btf_allowlist_d_path) 975 976 static bool bpf_d_path_allowed(const struct bpf_prog *prog) 977 { 978 if (prog->type == BPF_PROG_TYPE_TRACING && 979 prog->expected_attach_type == BPF_TRACE_ITER) 980 return true; 981 982 if (prog->type == BPF_PROG_TYPE_LSM) 983 return bpf_lsm_is_sleepable_hook(prog->aux->attach_btf_id); 984 985 return btf_id_set_contains(&btf_allowlist_d_path, 986 prog->aux->attach_btf_id); 987 } 988 989 BTF_ID_LIST_SINGLE(bpf_d_path_btf_ids, struct, path) 990 991 static const struct bpf_func_proto bpf_d_path_proto = { 992 .func = bpf_d_path, 993 .gpl_only = false, 994 .ret_type = RET_INTEGER, 995 .arg1_type = ARG_PTR_TO_BTF_ID, 996 .arg1_btf_id = &bpf_d_path_btf_ids[0], 997 .arg2_type = ARG_PTR_TO_MEM | MEM_WRITE, 998 .arg3_type = ARG_CONST_SIZE_OR_ZERO, 999 .allowed = bpf_d_path_allowed, 1000 }; 1001 1002 #define BTF_F_ALL (BTF_F_COMPACT | BTF_F_NONAME | \ 1003 BTF_F_PTR_RAW | BTF_F_ZERO) 1004 1005 static int bpf_btf_printf_prepare(struct btf_ptr *ptr, u32 btf_ptr_size, 1006 u64 flags, const struct btf **btf, 1007 s32 *btf_id) 1008 { 1009 const struct btf_type *t; 1010 1011 if (unlikely(flags & ~(BTF_F_ALL))) 1012 return -EINVAL; 1013 1014 if (btf_ptr_size != sizeof(struct btf_ptr)) 1015 return -EINVAL; 1016 1017 *btf = bpf_get_btf_vmlinux(); 1018 1019 if (IS_ERR_OR_NULL(*btf)) 1020 return IS_ERR(*btf) ? PTR_ERR(*btf) : -EINVAL; 1021 1022 if (ptr->type_id > 0) 1023 *btf_id = ptr->type_id; 1024 else 1025 return -EINVAL; 1026 1027 if (*btf_id > 0) 1028 t = btf_type_by_id(*btf, *btf_id); 1029 if (*btf_id <= 0 || !t) 1030 return -ENOENT; 1031 1032 return 0; 1033 } 1034 1035 BPF_CALL_5(bpf_snprintf_btf, char *, str, u32, str_size, struct btf_ptr *, ptr, 1036 u32, btf_ptr_size, u64, flags) 1037 { 1038 const struct btf *btf; 1039 s32 btf_id; 1040 int ret; 1041 1042 ret = bpf_btf_printf_prepare(ptr, btf_ptr_size, flags, &btf, &btf_id); 1043 if (ret) 1044 return ret; 1045 1046 return btf_type_snprintf_show(btf, btf_id, ptr->ptr, str, str_size, 1047 flags); 1048 } 1049 1050 const struct bpf_func_proto bpf_snprintf_btf_proto = { 1051 .func = bpf_snprintf_btf, 1052 .gpl_only = false, 1053 .ret_type = RET_INTEGER, 1054 .arg1_type = ARG_PTR_TO_MEM | MEM_WRITE, 1055 .arg2_type = ARG_CONST_SIZE, 1056 .arg3_type = ARG_PTR_TO_MEM | MEM_RDONLY, 1057 .arg4_type = ARG_CONST_SIZE, 1058 .arg5_type = ARG_ANYTHING, 1059 }; 1060 1061 BPF_CALL_1(bpf_get_func_ip_tracing, void *, ctx) 1062 { 1063 /* This helper call is inlined by verifier. */ 1064 return ((u64 *)ctx)[-2]; 1065 } 1066 1067 static const struct bpf_func_proto bpf_get_func_ip_proto_tracing = { 1068 .func = bpf_get_func_ip_tracing, 1069 .gpl_only = true, 1070 .ret_type = RET_INTEGER, 1071 .arg1_type = ARG_PTR_TO_CTX, 1072 }; 1073 1074 static inline unsigned long get_entry_ip(unsigned long fentry_ip) 1075 { 1076 #ifdef CONFIG_X86_KERNEL_IBT 1077 if (is_endbr((void *)(fentry_ip - ENDBR_INSN_SIZE))) 1078 fentry_ip -= ENDBR_INSN_SIZE; 1079 #endif 1080 return fentry_ip; 1081 } 1082 1083 BPF_CALL_1(bpf_get_func_ip_kprobe, struct pt_regs *, regs) 1084 { 1085 struct bpf_trace_run_ctx *run_ctx __maybe_unused; 1086 struct kprobe *kp; 1087 1088 #ifdef CONFIG_UPROBES 1089 run_ctx = container_of(current->bpf_ctx, struct bpf_trace_run_ctx, run_ctx); 1090 if (run_ctx->is_uprobe) 1091 return ((struct uprobe_dispatch_data *)current->utask->vaddr)->bp_addr; 1092 #endif 1093 1094 kp = kprobe_running(); 1095 1096 if (!kp || !(kp->flags & KPROBE_FLAG_ON_FUNC_ENTRY)) 1097 return 0; 1098 1099 return get_entry_ip((uintptr_t)kp->addr); 1100 } 1101 1102 static const struct bpf_func_proto bpf_get_func_ip_proto_kprobe = { 1103 .func = bpf_get_func_ip_kprobe, 1104 .gpl_only = true, 1105 .ret_type = RET_INTEGER, 1106 .arg1_type = ARG_PTR_TO_CTX, 1107 }; 1108 1109 BPF_CALL_1(bpf_get_func_ip_kprobe_multi, struct pt_regs *, regs) 1110 { 1111 return bpf_kprobe_multi_entry_ip(current->bpf_ctx); 1112 } 1113 1114 static const struct bpf_func_proto bpf_get_func_ip_proto_kprobe_multi = { 1115 .func = bpf_get_func_ip_kprobe_multi, 1116 .gpl_only = false, 1117 .ret_type = RET_INTEGER, 1118 .arg1_type = ARG_PTR_TO_CTX, 1119 }; 1120 1121 BPF_CALL_1(bpf_get_attach_cookie_kprobe_multi, struct pt_regs *, regs) 1122 { 1123 return bpf_kprobe_multi_cookie(current->bpf_ctx); 1124 } 1125 1126 static const struct bpf_func_proto bpf_get_attach_cookie_proto_kmulti = { 1127 .func = bpf_get_attach_cookie_kprobe_multi, 1128 .gpl_only = false, 1129 .ret_type = RET_INTEGER, 1130 .arg1_type = ARG_PTR_TO_CTX, 1131 }; 1132 1133 BPF_CALL_1(bpf_get_func_ip_uprobe_multi, struct pt_regs *, regs) 1134 { 1135 return bpf_uprobe_multi_entry_ip(current->bpf_ctx); 1136 } 1137 1138 static const struct bpf_func_proto bpf_get_func_ip_proto_uprobe_multi = { 1139 .func = bpf_get_func_ip_uprobe_multi, 1140 .gpl_only = false, 1141 .ret_type = RET_INTEGER, 1142 .arg1_type = ARG_PTR_TO_CTX, 1143 }; 1144 1145 BPF_CALL_1(bpf_get_attach_cookie_uprobe_multi, struct pt_regs *, regs) 1146 { 1147 return bpf_uprobe_multi_cookie(current->bpf_ctx); 1148 } 1149 1150 static const struct bpf_func_proto bpf_get_attach_cookie_proto_umulti = { 1151 .func = bpf_get_attach_cookie_uprobe_multi, 1152 .gpl_only = false, 1153 .ret_type = RET_INTEGER, 1154 .arg1_type = ARG_PTR_TO_CTX, 1155 }; 1156 1157 BPF_CALL_1(bpf_get_attach_cookie_trace, void *, ctx) 1158 { 1159 struct bpf_trace_run_ctx *run_ctx; 1160 1161 run_ctx = container_of(current->bpf_ctx, struct bpf_trace_run_ctx, run_ctx); 1162 return run_ctx->bpf_cookie; 1163 } 1164 1165 static const struct bpf_func_proto bpf_get_attach_cookie_proto_trace = { 1166 .func = bpf_get_attach_cookie_trace, 1167 .gpl_only = false, 1168 .ret_type = RET_INTEGER, 1169 .arg1_type = ARG_PTR_TO_CTX, 1170 }; 1171 1172 BPF_CALL_1(bpf_get_attach_cookie_pe, struct bpf_perf_event_data_kern *, ctx) 1173 { 1174 return ctx->event->bpf_cookie; 1175 } 1176 1177 static const struct bpf_func_proto bpf_get_attach_cookie_proto_pe = { 1178 .func = bpf_get_attach_cookie_pe, 1179 .gpl_only = false, 1180 .ret_type = RET_INTEGER, 1181 .arg1_type = ARG_PTR_TO_CTX, 1182 }; 1183 1184 BPF_CALL_1(bpf_get_attach_cookie_tracing, void *, ctx) 1185 { 1186 struct bpf_trace_run_ctx *run_ctx; 1187 1188 run_ctx = container_of(current->bpf_ctx, struct bpf_trace_run_ctx, run_ctx); 1189 return run_ctx->bpf_cookie; 1190 } 1191 1192 static const struct bpf_func_proto bpf_get_attach_cookie_proto_tracing = { 1193 .func = bpf_get_attach_cookie_tracing, 1194 .gpl_only = false, 1195 .ret_type = RET_INTEGER, 1196 .arg1_type = ARG_PTR_TO_CTX, 1197 }; 1198 1199 BPF_CALL_3(bpf_get_branch_snapshot, void *, buf, u32, size, u64, flags) 1200 { 1201 static const u32 br_entry_size = sizeof(struct perf_branch_entry); 1202 u32 entry_cnt = size / br_entry_size; 1203 1204 entry_cnt = static_call(perf_snapshot_branch_stack)(buf, entry_cnt); 1205 1206 if (unlikely(flags)) 1207 return -EINVAL; 1208 1209 if (!entry_cnt) 1210 return -ENOENT; 1211 1212 return entry_cnt * br_entry_size; 1213 } 1214 1215 const struct bpf_func_proto bpf_get_branch_snapshot_proto = { 1216 .func = bpf_get_branch_snapshot, 1217 .gpl_only = true, 1218 .ret_type = RET_INTEGER, 1219 .arg1_type = ARG_PTR_TO_UNINIT_MEM, 1220 .arg2_type = ARG_CONST_SIZE_OR_ZERO, 1221 }; 1222 1223 BPF_CALL_3(get_func_arg, void *, ctx, u32, n, u64 *, value) 1224 { 1225 /* This helper call is inlined by verifier. */ 1226 u64 nr_args = ((u64 *)ctx)[-1] & 0xFF; 1227 1228 if ((u64) n >= nr_args) 1229 return -EINVAL; 1230 *value = ((u64 *)ctx)[n]; 1231 return 0; 1232 } 1233 1234 static const struct bpf_func_proto bpf_get_func_arg_proto = { 1235 .func = get_func_arg, 1236 .ret_type = RET_INTEGER, 1237 .arg1_type = ARG_PTR_TO_CTX, 1238 .arg2_type = ARG_ANYTHING, 1239 .arg3_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_WRITE | MEM_ALIGNED, 1240 .arg3_size = sizeof(u64), 1241 }; 1242 1243 BPF_CALL_2(get_func_ret, void *, ctx, u64 *, value) 1244 { 1245 /* This helper call is inlined by verifier. */ 1246 u64 nr_args = ((u64 *)ctx)[-1] & 0xFF; 1247 1248 *value = ((u64 *)ctx)[nr_args]; 1249 return 0; 1250 } 1251 1252 static const struct bpf_func_proto bpf_get_func_ret_proto = { 1253 .func = get_func_ret, 1254 .ret_type = RET_INTEGER, 1255 .arg1_type = ARG_PTR_TO_CTX, 1256 .arg2_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_WRITE | MEM_ALIGNED, 1257 .arg2_size = sizeof(u64), 1258 }; 1259 1260 BPF_CALL_1(get_func_arg_cnt, void *, ctx) 1261 { 1262 /* This helper call is inlined by verifier. */ 1263 return ((u64 *)ctx)[-1] & 0xFF; 1264 } 1265 1266 static const struct bpf_func_proto bpf_get_func_arg_cnt_proto = { 1267 .func = get_func_arg_cnt, 1268 .ret_type = RET_INTEGER, 1269 .arg1_type = ARG_PTR_TO_CTX, 1270 }; 1271 1272 static const struct bpf_func_proto * 1273 bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 1274 { 1275 const struct bpf_func_proto *func_proto; 1276 1277 switch (func_id) { 1278 case BPF_FUNC_get_smp_processor_id: 1279 return &bpf_get_smp_processor_id_proto; 1280 #ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE 1281 case BPF_FUNC_probe_read: 1282 return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ? 1283 NULL : &bpf_probe_read_compat_proto; 1284 case BPF_FUNC_probe_read_str: 1285 return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ? 1286 NULL : &bpf_probe_read_compat_str_proto; 1287 #endif 1288 case BPF_FUNC_get_func_ip: 1289 return &bpf_get_func_ip_proto_tracing; 1290 default: 1291 break; 1292 } 1293 1294 func_proto = bpf_base_func_proto(func_id, prog); 1295 if (func_proto) 1296 return func_proto; 1297 1298 if (!bpf_token_capable(prog->aux->token, CAP_SYS_ADMIN)) 1299 return NULL; 1300 1301 switch (func_id) { 1302 case BPF_FUNC_probe_write_user: 1303 return security_locked_down(LOCKDOWN_BPF_WRITE_USER) < 0 ? 1304 NULL : &bpf_probe_write_user_proto; 1305 default: 1306 return NULL; 1307 } 1308 } 1309 1310 static bool is_kprobe_multi(const struct bpf_prog *prog) 1311 { 1312 return prog->expected_attach_type == BPF_TRACE_KPROBE_MULTI || 1313 prog->expected_attach_type == BPF_TRACE_KPROBE_SESSION; 1314 } 1315 1316 static inline bool is_kprobe_session(const struct bpf_prog *prog) 1317 { 1318 return prog->type == BPF_PROG_TYPE_KPROBE && 1319 prog->expected_attach_type == BPF_TRACE_KPROBE_SESSION; 1320 } 1321 1322 static inline bool is_uprobe_multi(const struct bpf_prog *prog) 1323 { 1324 return prog->expected_attach_type == BPF_TRACE_UPROBE_MULTI || 1325 prog->expected_attach_type == BPF_TRACE_UPROBE_SESSION; 1326 } 1327 1328 static inline bool is_uprobe_session(const struct bpf_prog *prog) 1329 { 1330 return prog->type == BPF_PROG_TYPE_KPROBE && 1331 prog->expected_attach_type == BPF_TRACE_UPROBE_SESSION; 1332 } 1333 1334 static inline bool is_trace_fsession(const struct bpf_prog *prog) 1335 { 1336 return prog->type == BPF_PROG_TYPE_TRACING && 1337 (prog->expected_attach_type == BPF_TRACE_FSESSION || 1338 prog->expected_attach_type == BPF_TRACE_FSESSION_MULTI); 1339 } 1340 1341 static const struct bpf_func_proto * 1342 kprobe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 1343 { 1344 switch (func_id) { 1345 case BPF_FUNC_perf_event_output: 1346 return &bpf_perf_event_output_proto; 1347 case BPF_FUNC_get_stackid: 1348 return &bpf_get_stackid_proto; 1349 case BPF_FUNC_get_stack: 1350 return prog->sleepable ? &bpf_get_stack_sleepable_proto : &bpf_get_stack_proto; 1351 #ifdef CONFIG_BPF_KPROBE_OVERRIDE 1352 case BPF_FUNC_override_return: 1353 return &bpf_override_return_proto; 1354 #endif 1355 case BPF_FUNC_get_func_ip: 1356 if (is_kprobe_multi(prog)) 1357 return &bpf_get_func_ip_proto_kprobe_multi; 1358 if (is_uprobe_multi(prog)) 1359 return &bpf_get_func_ip_proto_uprobe_multi; 1360 return &bpf_get_func_ip_proto_kprobe; 1361 case BPF_FUNC_get_attach_cookie: 1362 if (is_kprobe_multi(prog)) 1363 return &bpf_get_attach_cookie_proto_kmulti; 1364 if (is_uprobe_multi(prog)) 1365 return &bpf_get_attach_cookie_proto_umulti; 1366 return &bpf_get_attach_cookie_proto_trace; 1367 default: 1368 return bpf_tracing_func_proto(func_id, prog); 1369 } 1370 } 1371 1372 /* bpf+kprobe programs can access fields of 'struct pt_regs' */ 1373 static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type type, 1374 const struct bpf_prog *prog, 1375 struct bpf_insn_access_aux *info) 1376 { 1377 if (off < 0 || off >= sizeof(struct pt_regs)) 1378 return false; 1379 if (off % size != 0) 1380 return false; 1381 /* 1382 * Assertion for 32 bit to make sure last 8 byte access 1383 * (BPF_DW) to the last 4 byte member is disallowed. 1384 */ 1385 if (off + size > sizeof(struct pt_regs)) 1386 return false; 1387 1388 if (type == BPF_WRITE) 1389 prog->aux->kprobe_write_ctx = true; 1390 1391 return true; 1392 } 1393 1394 const struct bpf_verifier_ops kprobe_verifier_ops = { 1395 .get_func_proto = kprobe_prog_func_proto, 1396 .is_valid_access = kprobe_prog_is_valid_access, 1397 }; 1398 1399 const struct bpf_prog_ops kprobe_prog_ops = { 1400 }; 1401 1402 BPF_CALL_5(bpf_perf_event_output_tp, void *, tp_buff, struct bpf_map *, map, 1403 u64, flags, void *, data, u64, size) 1404 { 1405 struct pt_regs *regs = *(struct pt_regs **)tp_buff; 1406 1407 /* 1408 * r1 points to perf tracepoint buffer where first 8 bytes are hidden 1409 * from bpf program and contain a pointer to 'struct pt_regs'. Fetch it 1410 * from there and call the same bpf_perf_event_output() helper inline. 1411 */ 1412 return ____bpf_perf_event_output(regs, map, flags, data, size); 1413 } 1414 1415 static const struct bpf_func_proto bpf_perf_event_output_proto_tp = { 1416 .func = bpf_perf_event_output_tp, 1417 .gpl_only = true, 1418 .ret_type = RET_INTEGER, 1419 .arg1_type = ARG_PTR_TO_CTX, 1420 .arg2_type = ARG_CONST_MAP_PTR, 1421 .arg3_type = ARG_ANYTHING, 1422 .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY, 1423 .arg5_type = ARG_CONST_SIZE_OR_ZERO, 1424 }; 1425 1426 BPF_CALL_3(bpf_get_stackid_tp, void *, tp_buff, struct bpf_map *, map, 1427 u64, flags) 1428 { 1429 struct pt_regs *regs = *(struct pt_regs **)tp_buff; 1430 1431 /* 1432 * Same comment as in bpf_perf_event_output_tp(), only that this time 1433 * the other helper's function body cannot be inlined due to being 1434 * external, thus we need to call raw helper function. 1435 */ 1436 return bpf_get_stackid((unsigned long) regs, (unsigned long) map, 1437 flags, 0, 0); 1438 } 1439 1440 static const struct bpf_func_proto bpf_get_stackid_proto_tp = { 1441 .func = bpf_get_stackid_tp, 1442 .gpl_only = true, 1443 .ret_type = RET_INTEGER, 1444 .arg1_type = ARG_PTR_TO_CTX, 1445 .arg2_type = ARG_CONST_MAP_PTR, 1446 .arg3_type = ARG_ANYTHING, 1447 }; 1448 1449 BPF_CALL_4(bpf_get_stack_tp, void *, tp_buff, void *, buf, u32, size, 1450 u64, flags) 1451 { 1452 struct pt_regs *regs = *(struct pt_regs **)tp_buff; 1453 1454 return bpf_get_stack((unsigned long) regs, (unsigned long) buf, 1455 (unsigned long) size, flags, 0); 1456 } 1457 1458 static const struct bpf_func_proto bpf_get_stack_proto_tp = { 1459 .func = bpf_get_stack_tp, 1460 .gpl_only = true, 1461 .ret_type = RET_INTEGER, 1462 .arg1_type = ARG_PTR_TO_CTX, 1463 .arg2_type = ARG_PTR_TO_UNINIT_MEM, 1464 .arg3_type = ARG_CONST_SIZE_OR_ZERO, 1465 .arg4_type = ARG_ANYTHING, 1466 }; 1467 1468 static const struct bpf_func_proto * 1469 tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 1470 { 1471 switch (func_id) { 1472 case BPF_FUNC_perf_event_output: 1473 return &bpf_perf_event_output_proto_tp; 1474 case BPF_FUNC_get_stackid: 1475 return &bpf_get_stackid_proto_tp; 1476 case BPF_FUNC_get_stack: 1477 return &bpf_get_stack_proto_tp; 1478 case BPF_FUNC_get_attach_cookie: 1479 return &bpf_get_attach_cookie_proto_trace; 1480 default: 1481 return bpf_tracing_func_proto(func_id, prog); 1482 } 1483 } 1484 1485 static bool tp_prog_is_valid_access(int off, int size, enum bpf_access_type type, 1486 const struct bpf_prog *prog, 1487 struct bpf_insn_access_aux *info) 1488 { 1489 if (off < sizeof(void *) || off >= PERF_MAX_TRACE_SIZE) 1490 return false; 1491 if (type != BPF_READ) 1492 return false; 1493 if (off % size != 0) 1494 return false; 1495 1496 BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(__u64)); 1497 return true; 1498 } 1499 1500 const struct bpf_verifier_ops tracepoint_verifier_ops = { 1501 .get_func_proto = tp_prog_func_proto, 1502 .is_valid_access = tp_prog_is_valid_access, 1503 }; 1504 1505 const struct bpf_prog_ops tracepoint_prog_ops = { 1506 }; 1507 1508 BPF_CALL_3(bpf_perf_prog_read_value, struct bpf_perf_event_data_kern *, ctx, 1509 struct bpf_perf_event_value *, buf, u32, size) 1510 { 1511 int err = -EINVAL; 1512 1513 if (unlikely(size != sizeof(struct bpf_perf_event_value))) 1514 goto clear; 1515 err = perf_event_read_local(ctx->event, &buf->counter, &buf->enabled, 1516 &buf->running); 1517 if (unlikely(err)) 1518 goto clear; 1519 return 0; 1520 clear: 1521 memset(buf, 0, size); 1522 return err; 1523 } 1524 1525 static const struct bpf_func_proto bpf_perf_prog_read_value_proto = { 1526 .func = bpf_perf_prog_read_value, 1527 .gpl_only = true, 1528 .ret_type = RET_INTEGER, 1529 .arg1_type = ARG_PTR_TO_CTX, 1530 .arg2_type = ARG_PTR_TO_UNINIT_MEM, 1531 .arg3_type = ARG_CONST_SIZE, 1532 }; 1533 1534 BPF_CALL_4(bpf_read_branch_records, struct bpf_perf_event_data_kern *, ctx, 1535 void *, buf, u32, size, u64, flags) 1536 { 1537 static const u32 br_entry_size = sizeof(struct perf_branch_entry); 1538 struct perf_branch_stack *br_stack = ctx->data->br_stack; 1539 u32 to_copy; 1540 1541 if (unlikely(flags & ~BPF_F_GET_BRANCH_RECORDS_SIZE)) 1542 return -EINVAL; 1543 1544 if (unlikely(!(ctx->data->sample_flags & PERF_SAMPLE_BRANCH_STACK))) 1545 return -ENOENT; 1546 1547 if (unlikely(!br_stack)) 1548 return -ENOENT; 1549 1550 if (flags & BPF_F_GET_BRANCH_RECORDS_SIZE) 1551 return br_stack->nr * br_entry_size; 1552 1553 if (!buf || (size % br_entry_size != 0)) 1554 return -EINVAL; 1555 1556 to_copy = min_t(u32, br_stack->nr * br_entry_size, size); 1557 memcpy(buf, br_stack->entries, to_copy); 1558 1559 return to_copy; 1560 } 1561 1562 static const struct bpf_func_proto bpf_read_branch_records_proto = { 1563 .func = bpf_read_branch_records, 1564 .gpl_only = true, 1565 .ret_type = RET_INTEGER, 1566 .arg1_type = ARG_PTR_TO_CTX, 1567 .arg2_type = ARG_PTR_TO_MEM_OR_NULL | MEM_WRITE, 1568 .arg3_type = ARG_CONST_SIZE_OR_ZERO, 1569 .arg4_type = ARG_ANYTHING, 1570 }; 1571 1572 static const struct bpf_func_proto * 1573 pe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 1574 { 1575 switch (func_id) { 1576 case BPF_FUNC_perf_event_output: 1577 return &bpf_perf_event_output_proto_tp; 1578 case BPF_FUNC_get_stackid: 1579 return &bpf_get_stackid_proto_pe; 1580 case BPF_FUNC_get_stack: 1581 return &bpf_get_stack_proto_pe; 1582 case BPF_FUNC_perf_prog_read_value: 1583 return &bpf_perf_prog_read_value_proto; 1584 case BPF_FUNC_read_branch_records: 1585 return &bpf_read_branch_records_proto; 1586 case BPF_FUNC_get_attach_cookie: 1587 return &bpf_get_attach_cookie_proto_pe; 1588 default: 1589 return bpf_tracing_func_proto(func_id, prog); 1590 } 1591 } 1592 1593 /* 1594 * bpf_raw_tp_regs are separate from bpf_pt_regs used from skb/xdp 1595 * to avoid potential recursive reuse issue when/if tracepoints are added 1596 * inside bpf_*_event_output, bpf_get_stackid and/or bpf_get_stack. 1597 * 1598 * Since raw tracepoints run despite bpf_prog_active, support concurrent usage 1599 * in normal, irq, and nmi context. 1600 */ 1601 struct bpf_raw_tp_regs { 1602 struct pt_regs regs[3]; 1603 }; 1604 static DEFINE_PER_CPU(struct bpf_raw_tp_regs, bpf_raw_tp_regs); 1605 static DEFINE_PER_CPU(int, bpf_raw_tp_nest_level); 1606 static struct pt_regs *get_bpf_raw_tp_regs(void) 1607 { 1608 struct bpf_raw_tp_regs *tp_regs = this_cpu_ptr(&bpf_raw_tp_regs); 1609 int nest_level = this_cpu_inc_return(bpf_raw_tp_nest_level); 1610 1611 if (nest_level > ARRAY_SIZE(tp_regs->regs)) { 1612 this_cpu_dec(bpf_raw_tp_nest_level); 1613 return ERR_PTR(-EBUSY); 1614 } 1615 1616 return &tp_regs->regs[nest_level - 1]; 1617 } 1618 1619 static void put_bpf_raw_tp_regs(void) 1620 { 1621 this_cpu_dec(bpf_raw_tp_nest_level); 1622 } 1623 1624 BPF_CALL_5(bpf_perf_event_output_raw_tp, struct bpf_raw_tracepoint_args *, args, 1625 struct bpf_map *, map, u64, flags, void *, data, u64, size) 1626 { 1627 struct pt_regs *regs = get_bpf_raw_tp_regs(); 1628 int ret; 1629 1630 if (IS_ERR(regs)) 1631 return PTR_ERR(regs); 1632 1633 perf_fetch_caller_regs(regs); 1634 ret = ____bpf_perf_event_output(regs, map, flags, data, size); 1635 1636 put_bpf_raw_tp_regs(); 1637 return ret; 1638 } 1639 1640 static const struct bpf_func_proto bpf_perf_event_output_proto_raw_tp = { 1641 .func = bpf_perf_event_output_raw_tp, 1642 .gpl_only = true, 1643 .ret_type = RET_INTEGER, 1644 .arg1_type = ARG_PTR_TO_CTX, 1645 .arg2_type = ARG_CONST_MAP_PTR, 1646 .arg3_type = ARG_ANYTHING, 1647 .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY, 1648 .arg5_type = ARG_CONST_SIZE_OR_ZERO, 1649 }; 1650 1651 extern const struct bpf_func_proto bpf_skb_output_proto; 1652 extern const struct bpf_func_proto bpf_xdp_output_proto; 1653 extern const struct bpf_func_proto bpf_xdp_get_buff_len_trace_proto; 1654 1655 BPF_CALL_3(bpf_get_stackid_raw_tp, struct bpf_raw_tracepoint_args *, args, 1656 struct bpf_map *, map, u64, flags) 1657 { 1658 struct pt_regs *regs = get_bpf_raw_tp_regs(); 1659 int ret; 1660 1661 if (IS_ERR(regs)) 1662 return PTR_ERR(regs); 1663 1664 perf_fetch_caller_regs(regs); 1665 /* similar to bpf_perf_event_output_tp, but pt_regs fetched differently */ 1666 ret = bpf_get_stackid((unsigned long) regs, (unsigned long) map, 1667 flags, 0, 0); 1668 put_bpf_raw_tp_regs(); 1669 return ret; 1670 } 1671 1672 static const struct bpf_func_proto bpf_get_stackid_proto_raw_tp = { 1673 .func = bpf_get_stackid_raw_tp, 1674 .gpl_only = true, 1675 .ret_type = RET_INTEGER, 1676 .arg1_type = ARG_PTR_TO_CTX, 1677 .arg2_type = ARG_CONST_MAP_PTR, 1678 .arg3_type = ARG_ANYTHING, 1679 }; 1680 1681 BPF_CALL_4(bpf_get_stack_raw_tp, struct bpf_raw_tracepoint_args *, args, 1682 void *, buf, u32, size, u64, flags) 1683 { 1684 struct pt_regs *regs = get_bpf_raw_tp_regs(); 1685 int ret; 1686 1687 if (IS_ERR(regs)) 1688 return PTR_ERR(regs); 1689 1690 perf_fetch_caller_regs(regs); 1691 ret = bpf_get_stack((unsigned long) regs, (unsigned long) buf, 1692 (unsigned long) size, flags, 0); 1693 put_bpf_raw_tp_regs(); 1694 return ret; 1695 } 1696 1697 static const struct bpf_func_proto bpf_get_stack_proto_raw_tp = { 1698 .func = bpf_get_stack_raw_tp, 1699 .gpl_only = true, 1700 .ret_type = RET_INTEGER, 1701 .arg1_type = ARG_PTR_TO_CTX, 1702 .arg2_type = ARG_PTR_TO_UNINIT_MEM, 1703 .arg3_type = ARG_CONST_SIZE_OR_ZERO, 1704 .arg4_type = ARG_ANYTHING, 1705 }; 1706 1707 static const struct bpf_func_proto * 1708 raw_tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 1709 { 1710 switch (func_id) { 1711 case BPF_FUNC_perf_event_output: 1712 return &bpf_perf_event_output_proto_raw_tp; 1713 case BPF_FUNC_get_stackid: 1714 return &bpf_get_stackid_proto_raw_tp; 1715 case BPF_FUNC_get_stack: 1716 return &bpf_get_stack_proto_raw_tp; 1717 case BPF_FUNC_get_attach_cookie: 1718 return &bpf_get_attach_cookie_proto_tracing; 1719 default: 1720 return bpf_tracing_func_proto(func_id, prog); 1721 } 1722 } 1723 1724 const struct bpf_func_proto * 1725 tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 1726 { 1727 const struct bpf_func_proto *fn; 1728 1729 switch (func_id) { 1730 #ifdef CONFIG_NET 1731 case BPF_FUNC_skb_output: 1732 return &bpf_skb_output_proto; 1733 case BPF_FUNC_xdp_output: 1734 return &bpf_xdp_output_proto; 1735 case BPF_FUNC_skc_to_tcp6_sock: 1736 return &bpf_skc_to_tcp6_sock_proto; 1737 case BPF_FUNC_skc_to_tcp_sock: 1738 return &bpf_skc_to_tcp_sock_proto; 1739 case BPF_FUNC_skc_to_tcp_timewait_sock: 1740 return &bpf_skc_to_tcp_timewait_sock_proto; 1741 case BPF_FUNC_skc_to_tcp_request_sock: 1742 return &bpf_skc_to_tcp_request_sock_proto; 1743 case BPF_FUNC_skc_to_udp6_sock: 1744 return &bpf_skc_to_udp6_sock_proto; 1745 case BPF_FUNC_skc_to_unix_sock: 1746 return &bpf_skc_to_unix_sock_proto; 1747 case BPF_FUNC_skc_to_mptcp_sock: 1748 return &bpf_skc_to_mptcp_sock_proto; 1749 case BPF_FUNC_sk_storage_get: 1750 return &bpf_sk_storage_get_tracing_proto; 1751 case BPF_FUNC_sk_storage_delete: 1752 return &bpf_sk_storage_delete_tracing_proto; 1753 case BPF_FUNC_sock_from_file: 1754 return &bpf_sock_from_file_proto; 1755 case BPF_FUNC_get_socket_cookie: 1756 return &bpf_get_socket_ptr_cookie_proto; 1757 case BPF_FUNC_xdp_get_buff_len: 1758 return &bpf_xdp_get_buff_len_trace_proto; 1759 #endif 1760 case BPF_FUNC_seq_printf: 1761 return prog->expected_attach_type == BPF_TRACE_ITER ? 1762 &bpf_seq_printf_proto : 1763 NULL; 1764 case BPF_FUNC_seq_write: 1765 return prog->expected_attach_type == BPF_TRACE_ITER ? 1766 &bpf_seq_write_proto : 1767 NULL; 1768 case BPF_FUNC_seq_printf_btf: 1769 return prog->expected_attach_type == BPF_TRACE_ITER ? 1770 &bpf_seq_printf_btf_proto : 1771 NULL; 1772 case BPF_FUNC_d_path: 1773 return &bpf_d_path_proto; 1774 case BPF_FUNC_get_func_arg: 1775 if (bpf_prog_has_trampoline(prog) || 1776 prog->expected_attach_type == BPF_TRACE_RAW_TP) 1777 return &bpf_get_func_arg_proto; 1778 return NULL; 1779 case BPF_FUNC_get_func_ret: 1780 return bpf_prog_has_trampoline(prog) ? &bpf_get_func_ret_proto : NULL; 1781 case BPF_FUNC_get_func_arg_cnt: 1782 if (bpf_prog_has_trampoline(prog) || 1783 prog->expected_attach_type == BPF_TRACE_RAW_TP) 1784 return &bpf_get_func_arg_cnt_proto; 1785 return NULL; 1786 case BPF_FUNC_get_attach_cookie: 1787 if (prog->type == BPF_PROG_TYPE_TRACING && 1788 prog->expected_attach_type == BPF_TRACE_RAW_TP) 1789 return &bpf_get_attach_cookie_proto_tracing; 1790 return bpf_prog_has_trampoline(prog) ? &bpf_get_attach_cookie_proto_tracing : NULL; 1791 default: 1792 fn = raw_tp_prog_func_proto(func_id, prog); 1793 if (!fn && prog->expected_attach_type == BPF_TRACE_ITER) 1794 fn = bpf_iter_get_func_proto(func_id, prog); 1795 return fn; 1796 } 1797 } 1798 1799 static bool raw_tp_prog_is_valid_access(int off, int size, 1800 enum bpf_access_type type, 1801 const struct bpf_prog *prog, 1802 struct bpf_insn_access_aux *info) 1803 { 1804 return bpf_tracing_ctx_access(off, size, type); 1805 } 1806 1807 static bool tracing_prog_is_valid_access(int off, int size, 1808 enum bpf_access_type type, 1809 const struct bpf_prog *prog, 1810 struct bpf_insn_access_aux *info) 1811 { 1812 return bpf_tracing_btf_ctx_access(off, size, type, prog, info); 1813 } 1814 1815 int __weak bpf_prog_test_run_tracing(struct bpf_prog *prog, 1816 const union bpf_attr *kattr, 1817 union bpf_attr __user *uattr) 1818 { 1819 return -ENOTSUPP; 1820 } 1821 1822 const struct bpf_verifier_ops raw_tracepoint_verifier_ops = { 1823 .get_func_proto = raw_tp_prog_func_proto, 1824 .is_valid_access = raw_tp_prog_is_valid_access, 1825 }; 1826 1827 const struct bpf_prog_ops raw_tracepoint_prog_ops = { 1828 #ifdef CONFIG_NET 1829 .test_run = bpf_prog_test_run_raw_tp, 1830 #endif 1831 }; 1832 1833 const struct bpf_verifier_ops tracing_verifier_ops = { 1834 .get_func_proto = tracing_prog_func_proto, 1835 .is_valid_access = tracing_prog_is_valid_access, 1836 }; 1837 1838 const struct bpf_prog_ops tracing_prog_ops = { 1839 .test_run = bpf_prog_test_run_tracing, 1840 }; 1841 1842 static bool raw_tp_writable_prog_is_valid_access(int off, int size, 1843 enum bpf_access_type type, 1844 const struct bpf_prog *prog, 1845 struct bpf_insn_access_aux *info) 1846 { 1847 if (off == 0) { 1848 if (size != sizeof(u64) || type != BPF_READ) 1849 return false; 1850 info->reg_type = PTR_TO_TP_BUFFER; 1851 } 1852 return raw_tp_prog_is_valid_access(off, size, type, prog, info); 1853 } 1854 1855 const struct bpf_verifier_ops raw_tracepoint_writable_verifier_ops = { 1856 .get_func_proto = raw_tp_prog_func_proto, 1857 .is_valid_access = raw_tp_writable_prog_is_valid_access, 1858 }; 1859 1860 const struct bpf_prog_ops raw_tracepoint_writable_prog_ops = { 1861 }; 1862 1863 static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type, 1864 const struct bpf_prog *prog, 1865 struct bpf_insn_access_aux *info) 1866 { 1867 const int size_u64 = sizeof(u64); 1868 1869 if (off < 0 || off >= sizeof(struct bpf_perf_event_data)) 1870 return false; 1871 if (type != BPF_READ) 1872 return false; 1873 if (off % size != 0) { 1874 if (sizeof(unsigned long) != 4) 1875 return false; 1876 if (size != 8) 1877 return false; 1878 if (off % size != 4) 1879 return false; 1880 } 1881 1882 switch (off) { 1883 case bpf_ctx_range(struct bpf_perf_event_data, sample_period): 1884 bpf_ctx_record_field_size(info, size_u64); 1885 if (!bpf_ctx_narrow_access_ok(off, size, size_u64)) 1886 return false; 1887 break; 1888 case bpf_ctx_range(struct bpf_perf_event_data, addr): 1889 bpf_ctx_record_field_size(info, size_u64); 1890 if (!bpf_ctx_narrow_access_ok(off, size, size_u64)) 1891 return false; 1892 break; 1893 default: 1894 if (size != sizeof(long)) 1895 return false; 1896 } 1897 1898 return true; 1899 } 1900 1901 static u32 pe_prog_convert_ctx_access(enum bpf_access_type type, 1902 const struct bpf_insn *si, 1903 struct bpf_insn *insn_buf, 1904 struct bpf_prog *prog, u32 *target_size) 1905 { 1906 struct bpf_insn *insn = insn_buf; 1907 1908 switch (si->off) { 1909 case offsetof(struct bpf_perf_event_data, sample_period): 1910 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern, 1911 data), si->dst_reg, si->src_reg, 1912 offsetof(struct bpf_perf_event_data_kern, data)); 1913 *insn++ = BPF_LDX_MEM(BPF_DW, si->dst_reg, si->dst_reg, 1914 bpf_target_off(struct perf_sample_data, period, 8, 1915 target_size)); 1916 break; 1917 case offsetof(struct bpf_perf_event_data, addr): 1918 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern, 1919 data), si->dst_reg, si->src_reg, 1920 offsetof(struct bpf_perf_event_data_kern, data)); 1921 *insn++ = BPF_LDX_MEM(BPF_DW, si->dst_reg, si->dst_reg, 1922 bpf_target_off(struct perf_sample_data, addr, 8, 1923 target_size)); 1924 break; 1925 default: 1926 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern, 1927 regs), si->dst_reg, si->src_reg, 1928 offsetof(struct bpf_perf_event_data_kern, regs)); 1929 *insn++ = BPF_LDX_MEM(BPF_SIZEOF(long), si->dst_reg, si->dst_reg, 1930 si->off); 1931 break; 1932 } 1933 1934 return insn - insn_buf; 1935 } 1936 1937 const struct bpf_verifier_ops perf_event_verifier_ops = { 1938 .get_func_proto = pe_prog_func_proto, 1939 .is_valid_access = pe_prog_is_valid_access, 1940 .convert_ctx_access = pe_prog_convert_ctx_access, 1941 }; 1942 1943 const struct bpf_prog_ops perf_event_prog_ops = { 1944 }; 1945 1946 static DEFINE_MUTEX(bpf_event_mutex); 1947 1948 #define BPF_TRACE_MAX_PROGS 64 1949 1950 int perf_event_attach_bpf_prog(struct perf_event *event, 1951 struct bpf_prog *prog, 1952 u64 bpf_cookie) 1953 { 1954 struct bpf_prog_array *old_array; 1955 struct bpf_prog_array *new_array; 1956 int ret = -EEXIST; 1957 1958 /* 1959 * Kprobe override only works if they are on the function entry, 1960 * and only if they are on the opt-in list. 1961 */ 1962 if (prog->kprobe_override && 1963 (!trace_kprobe_on_func_entry(event->tp_event) || 1964 !trace_kprobe_error_injectable(event->tp_event))) 1965 return -EINVAL; 1966 1967 mutex_lock(&bpf_event_mutex); 1968 1969 if (event->prog) 1970 goto unlock; 1971 1972 old_array = bpf_event_rcu_dereference(event->tp_event->prog_array); 1973 if (old_array && 1974 bpf_prog_array_length(old_array) >= BPF_TRACE_MAX_PROGS) { 1975 ret = -E2BIG; 1976 goto unlock; 1977 } 1978 1979 ret = bpf_prog_array_copy(old_array, NULL, prog, bpf_cookie, &new_array); 1980 if (ret < 0) 1981 goto unlock; 1982 1983 /* set the new array to event->tp_event and set event->prog */ 1984 event->prog = prog; 1985 event->bpf_cookie = bpf_cookie; 1986 rcu_assign_pointer(event->tp_event->prog_array, new_array); 1987 bpf_prog_array_free_sleepable(old_array); 1988 1989 unlock: 1990 mutex_unlock(&bpf_event_mutex); 1991 return ret; 1992 } 1993 1994 void perf_event_detach_bpf_prog(struct perf_event *event) 1995 { 1996 struct bpf_prog_array *old_array; 1997 struct bpf_prog_array *new_array; 1998 struct bpf_prog *prog = NULL; 1999 int ret; 2000 2001 mutex_lock(&bpf_event_mutex); 2002 2003 if (!event->prog) 2004 goto unlock; 2005 2006 old_array = bpf_event_rcu_dereference(event->tp_event->prog_array); 2007 if (!old_array) 2008 goto put; 2009 2010 ret = bpf_prog_array_copy(old_array, event->prog, NULL, 0, &new_array); 2011 if (ret < 0) { 2012 bpf_prog_array_delete_safe(old_array, event->prog); 2013 } else { 2014 rcu_assign_pointer(event->tp_event->prog_array, new_array); 2015 bpf_prog_array_free_sleepable(old_array); 2016 } 2017 2018 put: 2019 prog = event->prog; 2020 event->prog = NULL; 2021 2022 unlock: 2023 mutex_unlock(&bpf_event_mutex); 2024 2025 if (prog) { 2026 /* 2027 * It could be that the bpf_prog is not sleepable (and will be freed 2028 * via normal RCU), but is called from a point that supports sleepable 2029 * programs and uses tasks-trace-RCU. 2030 */ 2031 synchronize_rcu_tasks_trace(); 2032 2033 bpf_prog_put(prog); 2034 } 2035 } 2036 2037 int perf_event_query_prog_array(struct perf_event *event, void __user *info) 2038 { 2039 struct perf_event_query_bpf __user *uquery = info; 2040 struct perf_event_query_bpf query = {}; 2041 struct bpf_prog_array *progs; 2042 u32 *ids, prog_cnt, ids_len; 2043 int ret; 2044 2045 if (!perfmon_capable()) 2046 return -EPERM; 2047 if (event->attr.type != PERF_TYPE_TRACEPOINT) 2048 return -EINVAL; 2049 if (copy_from_user(&query, uquery, sizeof(query))) 2050 return -EFAULT; 2051 2052 ids_len = query.ids_len; 2053 if (ids_len > BPF_TRACE_MAX_PROGS) 2054 return -E2BIG; 2055 ids = kcalloc(ids_len, sizeof(u32), GFP_USER | __GFP_NOWARN); 2056 if (!ids) 2057 return -ENOMEM; 2058 /* 2059 * The above kcalloc returns ZERO_SIZE_PTR when ids_len = 0, which 2060 * is required when user only wants to check for uquery->prog_cnt. 2061 * There is no need to check for it since the case is handled 2062 * gracefully in bpf_prog_array_copy_info. 2063 */ 2064 2065 mutex_lock(&bpf_event_mutex); 2066 progs = bpf_event_rcu_dereference(event->tp_event->prog_array); 2067 ret = bpf_prog_array_copy_info(progs, ids, ids_len, &prog_cnt); 2068 mutex_unlock(&bpf_event_mutex); 2069 2070 if (copy_to_user(&uquery->prog_cnt, &prog_cnt, sizeof(prog_cnt)) || 2071 copy_to_user(uquery->ids, ids, ids_len * sizeof(u32))) 2072 ret = -EFAULT; 2073 2074 kfree(ids); 2075 return ret; 2076 } 2077 2078 extern struct bpf_raw_event_map __start__bpf_raw_tp[]; 2079 extern struct bpf_raw_event_map __stop__bpf_raw_tp[]; 2080 2081 struct bpf_raw_event_map *bpf_get_raw_tracepoint(const char *name) 2082 { 2083 struct bpf_raw_event_map *btp = __start__bpf_raw_tp; 2084 2085 for (; btp < __stop__bpf_raw_tp; btp++) { 2086 if (!strcmp(btp->tp->name, name)) 2087 return btp; 2088 } 2089 2090 return bpf_get_raw_tracepoint_module(name); 2091 } 2092 2093 void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp) 2094 { 2095 struct module *mod; 2096 2097 guard(rcu)(); 2098 mod = __module_address((unsigned long)btp); 2099 module_put(mod); 2100 } 2101 2102 static __always_inline 2103 void __bpf_trace_run(struct bpf_raw_tp_link *link, u64 *args) 2104 { 2105 struct srcu_ctr __percpu *scp = NULL; 2106 struct bpf_prog *prog = link->link.prog; 2107 bool sleepable = prog->sleepable; 2108 struct bpf_run_ctx *old_run_ctx; 2109 struct bpf_trace_run_ctx run_ctx; 2110 2111 if (sleepable) { 2112 scp = rcu_read_lock_tasks_trace(); 2113 migrate_disable(); 2114 } else { 2115 rcu_read_lock_dont_migrate(); 2116 } 2117 2118 if (unlikely(!bpf_prog_get_recursion_context(prog))) { 2119 bpf_prog_inc_misses_counter(prog); 2120 goto out; 2121 } 2122 2123 run_ctx.bpf_cookie = link->cookie; 2124 old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); 2125 2126 (void)bpf_prog_run(prog, args); 2127 2128 bpf_reset_run_ctx(old_run_ctx); 2129 out: 2130 bpf_prog_put_recursion_context(prog); 2131 2132 if (sleepable) { 2133 migrate_enable(); 2134 rcu_read_unlock_tasks_trace(scp); 2135 } else { 2136 rcu_read_unlock_migrate(); 2137 } 2138 } 2139 2140 #define UNPACK(...) __VA_ARGS__ 2141 #define REPEAT_1(FN, DL, X, ...) FN(X) 2142 #define REPEAT_2(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_1(FN, DL, __VA_ARGS__) 2143 #define REPEAT_3(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_2(FN, DL, __VA_ARGS__) 2144 #define REPEAT_4(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_3(FN, DL, __VA_ARGS__) 2145 #define REPEAT_5(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_4(FN, DL, __VA_ARGS__) 2146 #define REPEAT_6(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_5(FN, DL, __VA_ARGS__) 2147 #define REPEAT_7(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_6(FN, DL, __VA_ARGS__) 2148 #define REPEAT_8(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_7(FN, DL, __VA_ARGS__) 2149 #define REPEAT_9(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_8(FN, DL, __VA_ARGS__) 2150 #define REPEAT_10(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_9(FN, DL, __VA_ARGS__) 2151 #define REPEAT_11(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_10(FN, DL, __VA_ARGS__) 2152 #define REPEAT_12(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_11(FN, DL, __VA_ARGS__) 2153 #define REPEAT(X, FN, DL, ...) REPEAT_##X(FN, DL, __VA_ARGS__) 2154 2155 #define SARG(X) u64 arg##X 2156 #define COPY(X) args[X] = arg##X 2157 2158 #define __DL_COM (,) 2159 #define __DL_SEM (;) 2160 2161 #define __SEQ_0_11 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 2162 2163 #define BPF_TRACE_DEFN_x(x) \ 2164 void bpf_trace_run##x(struct bpf_raw_tp_link *link, \ 2165 REPEAT(x, SARG, __DL_COM, __SEQ_0_11)) \ 2166 { \ 2167 u64 args[x]; \ 2168 REPEAT(x, COPY, __DL_SEM, __SEQ_0_11); \ 2169 __bpf_trace_run(link, args); \ 2170 } \ 2171 EXPORT_SYMBOL_GPL(bpf_trace_run##x) 2172 BPF_TRACE_DEFN_x(1); 2173 BPF_TRACE_DEFN_x(2); 2174 BPF_TRACE_DEFN_x(3); 2175 BPF_TRACE_DEFN_x(4); 2176 BPF_TRACE_DEFN_x(5); 2177 BPF_TRACE_DEFN_x(6); 2178 BPF_TRACE_DEFN_x(7); 2179 BPF_TRACE_DEFN_x(8); 2180 BPF_TRACE_DEFN_x(9); 2181 BPF_TRACE_DEFN_x(10); 2182 BPF_TRACE_DEFN_x(11); 2183 BPF_TRACE_DEFN_x(12); 2184 2185 int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_raw_tp_link *link) 2186 { 2187 struct tracepoint *tp = btp->tp; 2188 struct bpf_prog *prog = link->link.prog; 2189 2190 /* 2191 * check that program doesn't access arguments beyond what's 2192 * available in this tracepoint 2193 */ 2194 if (prog->aux->max_ctx_offset > btp->num_args * sizeof(u64)) 2195 return -EINVAL; 2196 2197 if (prog->aux->max_tp_access > btp->writable_size) 2198 return -EINVAL; 2199 2200 return tracepoint_probe_register_may_exist(tp, (void *)btp->bpf_func, link); 2201 } 2202 2203 int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_raw_tp_link *link) 2204 { 2205 return tracepoint_probe_unregister(btp->tp, (void *)btp->bpf_func, link); 2206 } 2207 2208 int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id, 2209 u32 *fd_type, const char **buf, 2210 u64 *probe_offset, u64 *probe_addr, 2211 unsigned long *missed) 2212 { 2213 bool is_tracepoint, is_syscall_tp; 2214 struct bpf_prog *prog; 2215 int flags, err = 0; 2216 2217 prog = event->prog; 2218 if (!prog) 2219 return -ENOENT; 2220 2221 /* not supporting BPF_PROG_TYPE_PERF_EVENT yet */ 2222 if (prog->type == BPF_PROG_TYPE_PERF_EVENT) 2223 return -EOPNOTSUPP; 2224 2225 *prog_id = prog->aux->id; 2226 flags = event->tp_event->flags; 2227 is_tracepoint = flags & TRACE_EVENT_FL_TRACEPOINT; 2228 is_syscall_tp = is_syscall_trace_event(event->tp_event); 2229 2230 if (is_tracepoint || is_syscall_tp) { 2231 *buf = is_tracepoint ? event->tp_event->tp->name 2232 : event->tp_event->name; 2233 /* We allow NULL pointer for tracepoint */ 2234 if (fd_type) 2235 *fd_type = BPF_FD_TYPE_TRACEPOINT; 2236 if (probe_offset) 2237 *probe_offset = 0x0; 2238 if (probe_addr) 2239 *probe_addr = 0x0; 2240 } else { 2241 /* kprobe/uprobe */ 2242 err = -EOPNOTSUPP; 2243 #ifdef CONFIG_KPROBE_EVENTS 2244 if (flags & TRACE_EVENT_FL_KPROBE) 2245 err = bpf_get_kprobe_info(event, fd_type, buf, 2246 probe_offset, probe_addr, missed, 2247 event->attr.type == PERF_TYPE_TRACEPOINT); 2248 #endif 2249 #ifdef CONFIG_UPROBE_EVENTS 2250 if (flags & TRACE_EVENT_FL_UPROBE) 2251 err = bpf_get_uprobe_info(event, fd_type, buf, 2252 probe_offset, probe_addr, 2253 event->attr.type == PERF_TYPE_TRACEPOINT); 2254 #endif 2255 } 2256 2257 return err; 2258 } 2259 2260 static int __init send_signal_irq_work_init(void) 2261 { 2262 int cpu; 2263 struct send_signal_irq_work *work; 2264 2265 for_each_possible_cpu(cpu) { 2266 work = per_cpu_ptr(&send_signal_work, cpu); 2267 init_irq_work(&work->irq_work, do_bpf_send_signal); 2268 } 2269 return 0; 2270 } 2271 2272 subsys_initcall(send_signal_irq_work_init); 2273 2274 #ifdef CONFIG_MODULES 2275 static int bpf_event_notify(struct notifier_block *nb, unsigned long op, 2276 void *module) 2277 { 2278 struct bpf_trace_module *btm, *tmp; 2279 struct module *mod = module; 2280 int ret = 0; 2281 2282 if (mod->num_bpf_raw_events == 0 || 2283 (op != MODULE_STATE_COMING && op != MODULE_STATE_GOING)) 2284 goto out; 2285 2286 mutex_lock(&bpf_module_mutex); 2287 2288 switch (op) { 2289 case MODULE_STATE_COMING: 2290 btm = kzalloc_obj(*btm); 2291 if (btm) { 2292 btm->module = module; 2293 list_add(&btm->list, &bpf_trace_modules); 2294 } else { 2295 ret = -ENOMEM; 2296 } 2297 break; 2298 case MODULE_STATE_GOING: 2299 list_for_each_entry_safe(btm, tmp, &bpf_trace_modules, list) { 2300 if (btm->module == module) { 2301 list_del(&btm->list); 2302 kfree(btm); 2303 break; 2304 } 2305 } 2306 break; 2307 } 2308 2309 mutex_unlock(&bpf_module_mutex); 2310 2311 out: 2312 return notifier_from_errno(ret); 2313 } 2314 2315 static struct notifier_block bpf_module_nb = { 2316 .notifier_call = bpf_event_notify, 2317 }; 2318 2319 static int __init bpf_event_init(void) 2320 { 2321 register_module_notifier(&bpf_module_nb); 2322 return 0; 2323 } 2324 2325 fs_initcall(bpf_event_init); 2326 #endif /* CONFIG_MODULES */ 2327 2328 struct bpf_session_run_ctx { 2329 struct bpf_run_ctx run_ctx; 2330 bool is_return; 2331 void *data; 2332 }; 2333 2334 #ifdef CONFIG_FPROBE 2335 struct bpf_kprobe_multi_link { 2336 struct bpf_link link; 2337 struct fprobe fp; 2338 unsigned long *addrs; 2339 u64 *cookies; 2340 u32 cnt; 2341 u32 mods_cnt; 2342 struct module **mods; 2343 }; 2344 2345 struct bpf_kprobe_multi_run_ctx { 2346 struct bpf_session_run_ctx session_ctx; 2347 struct bpf_kprobe_multi_link *link; 2348 unsigned long entry_ip; 2349 }; 2350 2351 struct user_syms { 2352 const char **syms; 2353 char *buf; 2354 }; 2355 2356 #ifndef CONFIG_HAVE_FTRACE_REGS_HAVING_PT_REGS 2357 static DEFINE_PER_CPU(struct pt_regs, bpf_kprobe_multi_pt_regs); 2358 #define bpf_kprobe_multi_pt_regs_ptr() this_cpu_ptr(&bpf_kprobe_multi_pt_regs) 2359 #else 2360 #define bpf_kprobe_multi_pt_regs_ptr() (NULL) 2361 #endif 2362 2363 static unsigned long ftrace_get_entry_ip(unsigned long fentry_ip) 2364 { 2365 unsigned long ip = ftrace_get_symaddr(fentry_ip); 2366 2367 return ip ? : fentry_ip; 2368 } 2369 2370 static int copy_user_syms(struct user_syms *us, unsigned long __user *usyms, u32 cnt) 2371 { 2372 unsigned long __user usymbol; 2373 const char **syms = NULL; 2374 char *buf = NULL, *p; 2375 int err = -ENOMEM; 2376 unsigned int i; 2377 2378 syms = kvmalloc_array(cnt, sizeof(*syms), GFP_KERNEL); 2379 if (!syms) 2380 goto error; 2381 2382 buf = kvmalloc_array(cnt, KSYM_NAME_LEN, GFP_KERNEL); 2383 if (!buf) 2384 goto error; 2385 2386 for (p = buf, i = 0; i < cnt; i++) { 2387 if (__get_user(usymbol, usyms + i)) { 2388 err = -EFAULT; 2389 goto error; 2390 } 2391 err = strncpy_from_user(p, (const char __user *) usymbol, KSYM_NAME_LEN); 2392 if (err == KSYM_NAME_LEN) 2393 err = -E2BIG; 2394 if (err < 0) 2395 goto error; 2396 syms[i] = p; 2397 p += err + 1; 2398 } 2399 2400 us->syms = syms; 2401 us->buf = buf; 2402 return 0; 2403 2404 error: 2405 if (err) { 2406 kvfree(syms); 2407 kvfree(buf); 2408 } 2409 return err; 2410 } 2411 2412 static void kprobe_multi_put_modules(struct module **mods, u32 cnt) 2413 { 2414 u32 i; 2415 2416 for (i = 0; i < cnt; i++) 2417 module_put(mods[i]); 2418 } 2419 2420 static void free_user_syms(struct user_syms *us) 2421 { 2422 kvfree(us->syms); 2423 kvfree(us->buf); 2424 } 2425 2426 static void bpf_kprobe_multi_link_release(struct bpf_link *link) 2427 { 2428 struct bpf_kprobe_multi_link *kmulti_link; 2429 2430 kmulti_link = container_of(link, struct bpf_kprobe_multi_link, link); 2431 /* Don't wait for RCU GP here. */ 2432 unregister_fprobe_async(&kmulti_link->fp); 2433 kprobe_multi_put_modules(kmulti_link->mods, kmulti_link->mods_cnt); 2434 } 2435 2436 static void bpf_kprobe_multi_link_dealloc(struct bpf_link *link) 2437 { 2438 struct bpf_kprobe_multi_link *kmulti_link; 2439 2440 kmulti_link = container_of(link, struct bpf_kprobe_multi_link, link); 2441 kvfree(kmulti_link->addrs); 2442 kvfree(kmulti_link->cookies); 2443 kfree(kmulti_link->mods); 2444 kfree(kmulti_link); 2445 } 2446 2447 static int bpf_kprobe_multi_link_fill_link_info(const struct bpf_link *link, 2448 struct bpf_link_info *info) 2449 { 2450 u64 __user *ucookies = u64_to_user_ptr(info->kprobe_multi.cookies); 2451 u64 __user *uaddrs = u64_to_user_ptr(info->kprobe_multi.addrs); 2452 struct bpf_kprobe_multi_link *kmulti_link; 2453 u32 ucount = info->kprobe_multi.count; 2454 int err = 0, i; 2455 2456 if (!uaddrs ^ !ucount) 2457 return -EINVAL; 2458 if (ucookies && !ucount) 2459 return -EINVAL; 2460 2461 kmulti_link = container_of(link, struct bpf_kprobe_multi_link, link); 2462 info->kprobe_multi.count = kmulti_link->cnt; 2463 info->kprobe_multi.flags = kmulti_link->link.flags; 2464 info->kprobe_multi.missed = kmulti_link->fp.nmissed; 2465 2466 if (!uaddrs) 2467 return 0; 2468 if (ucount < kmulti_link->cnt) 2469 err = -ENOSPC; 2470 else 2471 ucount = kmulti_link->cnt; 2472 2473 if (ucookies) { 2474 if (kmulti_link->cookies) { 2475 if (copy_to_user(ucookies, kmulti_link->cookies, ucount * sizeof(u64))) 2476 return -EFAULT; 2477 } else { 2478 for (i = 0; i < ucount; i++) { 2479 if (put_user(0, ucookies + i)) 2480 return -EFAULT; 2481 } 2482 } 2483 } 2484 2485 if (kallsyms_show_value(current_cred())) { 2486 if (copy_to_user(uaddrs, kmulti_link->addrs, ucount * sizeof(u64))) 2487 return -EFAULT; 2488 } else { 2489 for (i = 0; i < ucount; i++) { 2490 if (put_user(0, uaddrs + i)) 2491 return -EFAULT; 2492 } 2493 } 2494 return err; 2495 } 2496 2497 #ifdef CONFIG_PROC_FS 2498 static void bpf_kprobe_multi_show_fdinfo(const struct bpf_link *link, 2499 struct seq_file *seq) 2500 { 2501 struct bpf_kprobe_multi_link *kmulti_link; 2502 bool has_cookies; 2503 2504 kmulti_link = container_of(link, struct bpf_kprobe_multi_link, link); 2505 has_cookies = !!kmulti_link->cookies; 2506 2507 seq_printf(seq, 2508 "kprobe_cnt:\t%u\n" 2509 "missed:\t%lu\n", 2510 kmulti_link->cnt, 2511 kmulti_link->fp.nmissed); 2512 2513 seq_printf(seq, "%s\t %s\n", "cookie", "func"); 2514 for (int i = 0; i < kmulti_link->cnt; i++) { 2515 seq_printf(seq, 2516 "%llu\t %pS\n", 2517 has_cookies ? kmulti_link->cookies[i] : 0, 2518 (void *)kmulti_link->addrs[i]); 2519 } 2520 } 2521 #endif 2522 2523 static const struct bpf_link_ops bpf_kprobe_multi_link_lops = { 2524 .release = bpf_kprobe_multi_link_release, 2525 .dealloc_deferred = bpf_kprobe_multi_link_dealloc, 2526 .fill_link_info = bpf_kprobe_multi_link_fill_link_info, 2527 #ifdef CONFIG_PROC_FS 2528 .show_fdinfo = bpf_kprobe_multi_show_fdinfo, 2529 #endif 2530 }; 2531 2532 static void bpf_kprobe_multi_cookie_swap(void *a, void *b, int size, const void *priv) 2533 { 2534 const struct bpf_kprobe_multi_link *link = priv; 2535 unsigned long *addr_a = a, *addr_b = b; 2536 u64 *cookie_a, *cookie_b; 2537 2538 cookie_a = link->cookies + (addr_a - link->addrs); 2539 cookie_b = link->cookies + (addr_b - link->addrs); 2540 2541 /* swap addr_a/addr_b and cookie_a/cookie_b values */ 2542 swap(*addr_a, *addr_b); 2543 swap(*cookie_a, *cookie_b); 2544 } 2545 2546 static int bpf_kprobe_multi_addrs_cmp(const void *a, const void *b) 2547 { 2548 const unsigned long *addr_a = a, *addr_b = b; 2549 2550 if (*addr_a == *addr_b) 2551 return 0; 2552 return *addr_a < *addr_b ? -1 : 1; 2553 } 2554 2555 static int bpf_kprobe_multi_cookie_cmp(const void *a, const void *b, const void *priv) 2556 { 2557 return bpf_kprobe_multi_addrs_cmp(a, b); 2558 } 2559 2560 static u64 bpf_kprobe_multi_cookie(struct bpf_run_ctx *ctx) 2561 { 2562 struct bpf_kprobe_multi_run_ctx *run_ctx; 2563 struct bpf_kprobe_multi_link *link; 2564 u64 *cookie, entry_ip; 2565 unsigned long *addr; 2566 2567 if (WARN_ON_ONCE(!ctx)) 2568 return 0; 2569 run_ctx = container_of(current->bpf_ctx, struct bpf_kprobe_multi_run_ctx, 2570 session_ctx.run_ctx); 2571 link = run_ctx->link; 2572 if (!link->cookies) 2573 return 0; 2574 entry_ip = run_ctx->entry_ip; 2575 addr = bsearch(&entry_ip, link->addrs, link->cnt, sizeof(entry_ip), 2576 bpf_kprobe_multi_addrs_cmp); 2577 if (!addr) 2578 return 0; 2579 cookie = link->cookies + (addr - link->addrs); 2580 return *cookie; 2581 } 2582 2583 static u64 bpf_kprobe_multi_entry_ip(struct bpf_run_ctx *ctx) 2584 { 2585 struct bpf_kprobe_multi_run_ctx *run_ctx; 2586 2587 run_ctx = container_of(current->bpf_ctx, struct bpf_kprobe_multi_run_ctx, 2588 session_ctx.run_ctx); 2589 return run_ctx->entry_ip; 2590 } 2591 2592 static __always_inline int 2593 kprobe_multi_link_prog_run(struct bpf_kprobe_multi_link *link, 2594 unsigned long entry_ip, struct ftrace_regs *fregs, 2595 bool is_return, void *data) 2596 { 2597 struct bpf_kprobe_multi_run_ctx run_ctx = { 2598 .session_ctx = { 2599 .is_return = is_return, 2600 .data = data, 2601 }, 2602 .link = link, 2603 .entry_ip = entry_ip, 2604 }; 2605 struct bpf_run_ctx *old_run_ctx; 2606 struct pt_regs *regs; 2607 int err; 2608 2609 /* 2610 * graph tracer framework ensures we won't migrate, so there is no need 2611 * to use migrate_disable for bpf_prog_run again. The check here just for 2612 * __this_cpu_inc_return. 2613 */ 2614 cant_sleep(); 2615 2616 if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) { 2617 bpf_prog_inc_misses_counter(link->link.prog); 2618 err = 1; 2619 goto out; 2620 } 2621 2622 rcu_read_lock(); 2623 regs = ftrace_partial_regs(fregs, bpf_kprobe_multi_pt_regs_ptr()); 2624 old_run_ctx = bpf_set_run_ctx(&run_ctx.session_ctx.run_ctx); 2625 err = bpf_prog_run(link->link.prog, regs); 2626 bpf_reset_run_ctx(old_run_ctx); 2627 ftrace_partial_regs_update(fregs, bpf_kprobe_multi_pt_regs_ptr()); 2628 rcu_read_unlock(); 2629 2630 out: 2631 __this_cpu_dec(bpf_prog_active); 2632 return err; 2633 } 2634 2635 static int 2636 kprobe_multi_link_handler(struct fprobe *fp, unsigned long fentry_ip, 2637 unsigned long ret_ip, struct ftrace_regs *fregs, 2638 void *data) 2639 { 2640 struct bpf_kprobe_multi_link *link; 2641 int err; 2642 2643 link = container_of(fp, struct bpf_kprobe_multi_link, fp); 2644 err = kprobe_multi_link_prog_run(link, ftrace_get_entry_ip(fentry_ip), 2645 fregs, false, data); 2646 return is_kprobe_session(link->link.prog) ? err : 0; 2647 } 2648 2649 static void 2650 kprobe_multi_link_exit_handler(struct fprobe *fp, unsigned long fentry_ip, 2651 unsigned long ret_ip, struct ftrace_regs *fregs, 2652 void *data) 2653 { 2654 struct bpf_kprobe_multi_link *link; 2655 2656 link = container_of(fp, struct bpf_kprobe_multi_link, fp); 2657 kprobe_multi_link_prog_run(link, ftrace_get_entry_ip(fentry_ip), 2658 fregs, true, data); 2659 } 2660 2661 static int symbols_cmp_r(const void *a, const void *b, const void *priv) 2662 { 2663 const char **str_a = (const char **) a; 2664 const char **str_b = (const char **) b; 2665 2666 return strcmp(*str_a, *str_b); 2667 } 2668 2669 struct multi_symbols_sort { 2670 const char **funcs; 2671 u64 *cookies; 2672 }; 2673 2674 static void symbols_swap_r(void *a, void *b, int size, const void *priv) 2675 { 2676 const struct multi_symbols_sort *data = priv; 2677 const char **name_a = a, **name_b = b; 2678 2679 swap(*name_a, *name_b); 2680 2681 /* If defined, swap also related cookies. */ 2682 if (data->cookies) { 2683 u64 *cookie_a, *cookie_b; 2684 2685 cookie_a = data->cookies + (name_a - data->funcs); 2686 cookie_b = data->cookies + (name_b - data->funcs); 2687 swap(*cookie_a, *cookie_b); 2688 } 2689 } 2690 2691 struct modules_array { 2692 struct module **mods; 2693 int mods_cnt; 2694 int mods_cap; 2695 }; 2696 2697 static int add_module(struct modules_array *arr, struct module *mod) 2698 { 2699 struct module **mods; 2700 2701 if (arr->mods_cnt == arr->mods_cap) { 2702 arr->mods_cap = max(16, arr->mods_cap * 3 / 2); 2703 mods = krealloc_array(arr->mods, arr->mods_cap, sizeof(*mods), GFP_KERNEL); 2704 if (!mods) 2705 return -ENOMEM; 2706 arr->mods = mods; 2707 } 2708 2709 arr->mods[arr->mods_cnt] = mod; 2710 arr->mods_cnt++; 2711 return 0; 2712 } 2713 2714 static bool has_module(struct modules_array *arr, struct module *mod) 2715 { 2716 int i; 2717 2718 for (i = arr->mods_cnt - 1; i >= 0; i--) { 2719 if (arr->mods[i] == mod) 2720 return true; 2721 } 2722 return false; 2723 } 2724 2725 static int get_modules_for_addrs(struct module ***mods, unsigned long *addrs, u32 addrs_cnt) 2726 { 2727 struct modules_array arr = {}; 2728 u32 i, err = 0; 2729 2730 for (i = 0; i < addrs_cnt; i++) { 2731 bool skip_add = false; 2732 struct module *mod; 2733 2734 scoped_guard(rcu) { 2735 mod = __module_address(addrs[i]); 2736 /* Either no module or it's already stored */ 2737 if (!mod || has_module(&arr, mod)) { 2738 skip_add = true; 2739 break; /* scoped_guard */ 2740 } 2741 if (!try_module_get(mod)) 2742 err = -EINVAL; 2743 } 2744 if (skip_add) 2745 continue; 2746 if (err) 2747 break; 2748 err = add_module(&arr, mod); 2749 if (err) { 2750 module_put(mod); 2751 break; 2752 } 2753 } 2754 2755 /* We return either err < 0 in case of error, ... */ 2756 if (err) { 2757 kprobe_multi_put_modules(arr.mods, arr.mods_cnt); 2758 kfree(arr.mods); 2759 return err; 2760 } 2761 2762 /* or number of modules found if everything is ok. */ 2763 *mods = arr.mods; 2764 return arr.mods_cnt; 2765 } 2766 2767 static int addrs_check_error_injection_list(unsigned long *addrs, u32 cnt) 2768 { 2769 u32 i; 2770 2771 for (i = 0; i < cnt; i++) { 2772 if (!within_error_injection_list(addrs[i])) 2773 return -EINVAL; 2774 } 2775 return 0; 2776 } 2777 2778 int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) 2779 { 2780 struct bpf_kprobe_multi_link *link = NULL; 2781 struct bpf_link_primer link_primer; 2782 void __user *ucookies; 2783 unsigned long *addrs; 2784 u32 flags, cnt, size; 2785 void __user *uaddrs; 2786 u64 *cookies = NULL; 2787 void __user *usyms; 2788 int err; 2789 2790 /* no support for 32bit archs yet */ 2791 if (sizeof(u64) != sizeof(void *)) 2792 return -EOPNOTSUPP; 2793 2794 if (attr->link_create.flags) 2795 return -EINVAL; 2796 2797 if (!is_kprobe_multi(prog)) 2798 return -EINVAL; 2799 2800 /* kprobe_multi is not allowed to be sleepable. */ 2801 if (prog->sleepable) 2802 return -EINVAL; 2803 2804 /* Writing to context is not allowed for kprobes. */ 2805 if (prog->aux->kprobe_write_ctx) 2806 return -EINVAL; 2807 2808 flags = attr->link_create.kprobe_multi.flags; 2809 if (flags & ~BPF_F_KPROBE_MULTI_RETURN) 2810 return -EINVAL; 2811 2812 uaddrs = u64_to_user_ptr(attr->link_create.kprobe_multi.addrs); 2813 usyms = u64_to_user_ptr(attr->link_create.kprobe_multi.syms); 2814 if (!!uaddrs == !!usyms) 2815 return -EINVAL; 2816 2817 cnt = attr->link_create.kprobe_multi.cnt; 2818 if (!cnt) 2819 return -EINVAL; 2820 if (cnt > MAX_KPROBE_MULTI_CNT) 2821 return -E2BIG; 2822 2823 size = cnt * sizeof(*addrs); 2824 addrs = kvmalloc_array(cnt, sizeof(*addrs), GFP_KERNEL); 2825 if (!addrs) 2826 return -ENOMEM; 2827 2828 ucookies = u64_to_user_ptr(attr->link_create.kprobe_multi.cookies); 2829 if (ucookies) { 2830 cookies = kvmalloc_array(cnt, sizeof(*addrs), GFP_KERNEL); 2831 if (!cookies) { 2832 err = -ENOMEM; 2833 goto error; 2834 } 2835 if (copy_from_user(cookies, ucookies, size)) { 2836 err = -EFAULT; 2837 goto error; 2838 } 2839 } 2840 2841 if (uaddrs) { 2842 if (copy_from_user(addrs, uaddrs, size)) { 2843 err = -EFAULT; 2844 goto error; 2845 } 2846 } else { 2847 struct multi_symbols_sort data = { 2848 .cookies = cookies, 2849 }; 2850 struct user_syms us; 2851 2852 err = copy_user_syms(&us, usyms, cnt); 2853 if (err) 2854 goto error; 2855 2856 if (cookies) 2857 data.funcs = us.syms; 2858 2859 sort_r(us.syms, cnt, sizeof(*us.syms), symbols_cmp_r, 2860 symbols_swap_r, &data); 2861 2862 err = ftrace_lookup_symbols(us.syms, cnt, addrs); 2863 free_user_syms(&us); 2864 if (err) 2865 goto error; 2866 } 2867 2868 if (prog->kprobe_override && addrs_check_error_injection_list(addrs, cnt)) { 2869 err = -EINVAL; 2870 goto error; 2871 } 2872 2873 link = kzalloc_obj(*link); 2874 if (!link) { 2875 err = -ENOMEM; 2876 goto error; 2877 } 2878 2879 bpf_link_init(&link->link, BPF_LINK_TYPE_KPROBE_MULTI, 2880 &bpf_kprobe_multi_link_lops, prog, attr->link_create.attach_type); 2881 2882 err = bpf_link_prime(&link->link, &link_primer); 2883 if (err) 2884 goto error; 2885 2886 if (!(flags & BPF_F_KPROBE_MULTI_RETURN)) 2887 link->fp.entry_handler = kprobe_multi_link_handler; 2888 if ((flags & BPF_F_KPROBE_MULTI_RETURN) || is_kprobe_session(prog)) 2889 link->fp.exit_handler = kprobe_multi_link_exit_handler; 2890 if (is_kprobe_session(prog)) 2891 link->fp.entry_data_size = sizeof(u64); 2892 2893 link->addrs = addrs; 2894 link->cookies = cookies; 2895 link->cnt = cnt; 2896 link->link.flags = flags; 2897 2898 if (cookies) { 2899 /* 2900 * Sorting addresses will trigger sorting cookies as well 2901 * (check bpf_kprobe_multi_cookie_swap). This way we can 2902 * find cookie based on the address in bpf_get_attach_cookie 2903 * helper. 2904 */ 2905 sort_r(addrs, cnt, sizeof(*addrs), 2906 bpf_kprobe_multi_cookie_cmp, 2907 bpf_kprobe_multi_cookie_swap, 2908 link); 2909 } 2910 2911 err = get_modules_for_addrs(&link->mods, addrs, cnt); 2912 if (err < 0) { 2913 bpf_link_cleanup(&link_primer); 2914 return err; 2915 } 2916 link->mods_cnt = err; 2917 2918 err = register_fprobe_ips(&link->fp, addrs, cnt); 2919 if (err) { 2920 kprobe_multi_put_modules(link->mods, link->mods_cnt); 2921 bpf_link_cleanup(&link_primer); 2922 return err; 2923 } 2924 2925 return bpf_link_settle(&link_primer); 2926 2927 error: 2928 kfree(link); 2929 kvfree(addrs); 2930 kvfree(cookies); 2931 return err; 2932 } 2933 #else /* !CONFIG_FPROBE */ 2934 int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) 2935 { 2936 return -EOPNOTSUPP; 2937 } 2938 static u64 bpf_kprobe_multi_cookie(struct bpf_run_ctx *ctx) 2939 { 2940 return 0; 2941 } 2942 static u64 bpf_kprobe_multi_entry_ip(struct bpf_run_ctx *ctx) 2943 { 2944 return 0; 2945 } 2946 #endif 2947 2948 #ifdef CONFIG_UPROBES 2949 struct bpf_uprobe_multi_link; 2950 2951 struct bpf_uprobe { 2952 struct bpf_uprobe_multi_link *link; 2953 loff_t offset; 2954 unsigned long ref_ctr_offset; 2955 u64 cookie; 2956 struct uprobe *uprobe; 2957 struct uprobe_consumer consumer; 2958 bool session; 2959 }; 2960 2961 struct bpf_uprobe_multi_link { 2962 struct path path; 2963 struct bpf_link link; 2964 u32 cnt; 2965 struct bpf_uprobe *uprobes; 2966 struct task_struct *task; 2967 }; 2968 2969 struct bpf_uprobe_multi_run_ctx { 2970 struct bpf_session_run_ctx session_ctx; 2971 unsigned long entry_ip; 2972 struct bpf_uprobe *uprobe; 2973 }; 2974 2975 static void bpf_uprobe_unregister(struct bpf_uprobe *uprobes, u32 cnt) 2976 { 2977 u32 i; 2978 2979 for (i = 0; i < cnt; i++) 2980 uprobe_unregister_nosync(uprobes[i].uprobe, &uprobes[i].consumer); 2981 2982 if (cnt) 2983 uprobe_unregister_sync(); 2984 } 2985 2986 static void bpf_uprobe_multi_link_release(struct bpf_link *link) 2987 { 2988 struct bpf_uprobe_multi_link *umulti_link; 2989 2990 umulti_link = container_of(link, struct bpf_uprobe_multi_link, link); 2991 bpf_uprobe_unregister(umulti_link->uprobes, umulti_link->cnt); 2992 if (umulti_link->task) 2993 put_task_struct(umulti_link->task); 2994 path_put(&umulti_link->path); 2995 } 2996 2997 static void bpf_uprobe_multi_link_dealloc(struct bpf_link *link) 2998 { 2999 struct bpf_uprobe_multi_link *umulti_link; 3000 3001 umulti_link = container_of(link, struct bpf_uprobe_multi_link, link); 3002 kvfree(umulti_link->uprobes); 3003 kfree(umulti_link); 3004 } 3005 3006 static int bpf_uprobe_multi_link_fill_link_info(const struct bpf_link *link, 3007 struct bpf_link_info *info) 3008 { 3009 u64 __user *uref_ctr_offsets = u64_to_user_ptr(info->uprobe_multi.ref_ctr_offsets); 3010 u64 __user *ucookies = u64_to_user_ptr(info->uprobe_multi.cookies); 3011 u64 __user *uoffsets = u64_to_user_ptr(info->uprobe_multi.offsets); 3012 u64 __user *upath = u64_to_user_ptr(info->uprobe_multi.path); 3013 u32 upath_size = info->uprobe_multi.path_size; 3014 struct bpf_uprobe_multi_link *umulti_link; 3015 u32 ucount = info->uprobe_multi.count; 3016 int err = 0, i; 3017 char *p, *buf; 3018 long left = 0; 3019 3020 if (!upath ^ !upath_size) 3021 return -EINVAL; 3022 3023 if ((uoffsets || uref_ctr_offsets || ucookies) && !ucount) 3024 return -EINVAL; 3025 3026 umulti_link = container_of(link, struct bpf_uprobe_multi_link, link); 3027 info->uprobe_multi.count = umulti_link->cnt; 3028 info->uprobe_multi.flags = umulti_link->link.flags; 3029 info->uprobe_multi.pid = umulti_link->task ? 3030 task_pid_nr_ns(umulti_link->task, task_active_pid_ns(current)) : 0; 3031 3032 upath_size = upath_size ? min_t(u32, upath_size, PATH_MAX) : PATH_MAX; 3033 buf = kmalloc(upath_size, GFP_KERNEL); 3034 if (!buf) 3035 return -ENOMEM; 3036 p = d_path(&umulti_link->path, buf, upath_size); 3037 if (IS_ERR(p)) { 3038 kfree(buf); 3039 return PTR_ERR(p); 3040 } 3041 upath_size = buf + upath_size - p; 3042 3043 if (upath) 3044 left = copy_to_user(upath, p, upath_size); 3045 kfree(buf); 3046 if (left) 3047 return -EFAULT; 3048 info->uprobe_multi.path_size = upath_size; 3049 3050 if (!uoffsets && !ucookies && !uref_ctr_offsets) 3051 return 0; 3052 3053 if (ucount < umulti_link->cnt) 3054 err = -ENOSPC; 3055 else 3056 ucount = umulti_link->cnt; 3057 3058 for (i = 0; i < ucount; i++) { 3059 if (uoffsets && 3060 put_user(umulti_link->uprobes[i].offset, uoffsets + i)) 3061 return -EFAULT; 3062 if (uref_ctr_offsets && 3063 put_user(umulti_link->uprobes[i].ref_ctr_offset, uref_ctr_offsets + i)) 3064 return -EFAULT; 3065 if (ucookies && 3066 put_user(umulti_link->uprobes[i].cookie, ucookies + i)) 3067 return -EFAULT; 3068 } 3069 3070 return err; 3071 } 3072 3073 #ifdef CONFIG_PROC_FS 3074 static void bpf_uprobe_multi_show_fdinfo(const struct bpf_link *link, 3075 struct seq_file *seq) 3076 { 3077 struct bpf_uprobe_multi_link *umulti_link; 3078 char *p, *buf; 3079 pid_t pid; 3080 3081 umulti_link = container_of(link, struct bpf_uprobe_multi_link, link); 3082 3083 buf = kmalloc(PATH_MAX, GFP_KERNEL); 3084 if (!buf) 3085 return; 3086 3087 p = d_path(&umulti_link->path, buf, PATH_MAX); 3088 if (IS_ERR(p)) { 3089 kfree(buf); 3090 return; 3091 } 3092 3093 pid = umulti_link->task ? 3094 task_pid_nr_ns(umulti_link->task, task_active_pid_ns(current)) : 0; 3095 seq_printf(seq, 3096 "uprobe_cnt:\t%u\n" 3097 "pid:\t%u\n" 3098 "path:\t%s\n", 3099 umulti_link->cnt, pid, p); 3100 3101 seq_printf(seq, "%s\t %s\t %s\n", "cookie", "offset", "ref_ctr_offset"); 3102 for (int i = 0; i < umulti_link->cnt; i++) { 3103 seq_printf(seq, 3104 "%llu\t %#llx\t %#lx\n", 3105 umulti_link->uprobes[i].cookie, 3106 umulti_link->uprobes[i].offset, 3107 umulti_link->uprobes[i].ref_ctr_offset); 3108 } 3109 3110 kfree(buf); 3111 } 3112 #endif 3113 3114 static const struct bpf_link_ops bpf_uprobe_multi_link_lops = { 3115 .release = bpf_uprobe_multi_link_release, 3116 .dealloc_deferred = bpf_uprobe_multi_link_dealloc, 3117 .fill_link_info = bpf_uprobe_multi_link_fill_link_info, 3118 #ifdef CONFIG_PROC_FS 3119 .show_fdinfo = bpf_uprobe_multi_show_fdinfo, 3120 #endif 3121 }; 3122 3123 static int uprobe_prog_run(struct bpf_uprobe *uprobe, 3124 unsigned long entry_ip, 3125 struct pt_regs *regs, 3126 bool is_return, void *data) 3127 { 3128 struct bpf_uprobe_multi_link *link = uprobe->link; 3129 struct bpf_uprobe_multi_run_ctx run_ctx = { 3130 .session_ctx = { 3131 .is_return = is_return, 3132 .data = data, 3133 }, 3134 .entry_ip = entry_ip, 3135 .uprobe = uprobe, 3136 }; 3137 struct bpf_prog *prog = link->link.prog; 3138 bool sleepable = prog->sleepable; 3139 struct bpf_run_ctx *old_run_ctx; 3140 int err; 3141 3142 if (link->task && !same_thread_group(current, link->task)) 3143 return 0; 3144 3145 if (sleepable) 3146 rcu_read_lock_trace(); 3147 else 3148 rcu_read_lock(); 3149 3150 migrate_disable(); 3151 3152 old_run_ctx = bpf_set_run_ctx(&run_ctx.session_ctx.run_ctx); 3153 err = bpf_prog_run(link->link.prog, regs); 3154 bpf_reset_run_ctx(old_run_ctx); 3155 3156 migrate_enable(); 3157 3158 if (sleepable) 3159 rcu_read_unlock_trace(); 3160 else 3161 rcu_read_unlock(); 3162 return err; 3163 } 3164 3165 static bool 3166 uprobe_multi_link_filter(struct uprobe_consumer *con, struct mm_struct *mm) 3167 { 3168 struct bpf_uprobe *uprobe; 3169 3170 uprobe = container_of(con, struct bpf_uprobe, consumer); 3171 return uprobe->link->task->mm == mm; 3172 } 3173 3174 static int 3175 uprobe_multi_link_handler(struct uprobe_consumer *con, struct pt_regs *regs, 3176 __u64 *data) 3177 { 3178 struct bpf_uprobe *uprobe; 3179 int ret; 3180 3181 uprobe = container_of(con, struct bpf_uprobe, consumer); 3182 ret = uprobe_prog_run(uprobe, instruction_pointer(regs), regs, false, data); 3183 if (uprobe->session) 3184 return ret ? UPROBE_HANDLER_IGNORE : 0; 3185 return 0; 3186 } 3187 3188 static int 3189 uprobe_multi_link_ret_handler(struct uprobe_consumer *con, unsigned long func, struct pt_regs *regs, 3190 __u64 *data) 3191 { 3192 struct bpf_uprobe *uprobe; 3193 3194 uprobe = container_of(con, struct bpf_uprobe, consumer); 3195 uprobe_prog_run(uprobe, func, regs, true, data); 3196 return 0; 3197 } 3198 3199 static u64 bpf_uprobe_multi_entry_ip(struct bpf_run_ctx *ctx) 3200 { 3201 struct bpf_uprobe_multi_run_ctx *run_ctx; 3202 3203 run_ctx = container_of(current->bpf_ctx, struct bpf_uprobe_multi_run_ctx, 3204 session_ctx.run_ctx); 3205 return run_ctx->entry_ip; 3206 } 3207 3208 static u64 bpf_uprobe_multi_cookie(struct bpf_run_ctx *ctx) 3209 { 3210 struct bpf_uprobe_multi_run_ctx *run_ctx; 3211 3212 run_ctx = container_of(current->bpf_ctx, struct bpf_uprobe_multi_run_ctx, 3213 session_ctx.run_ctx); 3214 return run_ctx->uprobe->cookie; 3215 } 3216 3217 int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) 3218 { 3219 struct bpf_uprobe_multi_link *link = NULL; 3220 unsigned long __user *uref_ctr_offsets; 3221 struct bpf_link_primer link_primer; 3222 struct bpf_uprobe *uprobes = NULL; 3223 struct task_struct *task = NULL; 3224 unsigned long __user *uoffsets; 3225 u64 __user *ucookies; 3226 void __user *upath; 3227 u32 flags, cnt, i; 3228 struct path path; 3229 char *name; 3230 pid_t pid; 3231 int err; 3232 3233 /* no support for 32bit archs yet */ 3234 if (sizeof(u64) != sizeof(void *)) 3235 return -EOPNOTSUPP; 3236 3237 if (attr->link_create.flags) 3238 return -EINVAL; 3239 3240 if (!is_uprobe_multi(prog)) 3241 return -EINVAL; 3242 3243 flags = attr->link_create.uprobe_multi.flags; 3244 if (flags & ~BPF_F_UPROBE_MULTI_RETURN) 3245 return -EINVAL; 3246 3247 /* 3248 * path, offsets and cnt are mandatory, 3249 * ref_ctr_offsets and cookies are optional 3250 */ 3251 upath = u64_to_user_ptr(attr->link_create.uprobe_multi.path); 3252 uoffsets = u64_to_user_ptr(attr->link_create.uprobe_multi.offsets); 3253 cnt = attr->link_create.uprobe_multi.cnt; 3254 pid = attr->link_create.uprobe_multi.pid; 3255 3256 if (!upath || !uoffsets || !cnt || pid < 0) 3257 return -EINVAL; 3258 if (cnt > MAX_UPROBE_MULTI_CNT) 3259 return -E2BIG; 3260 3261 uref_ctr_offsets = u64_to_user_ptr(attr->link_create.uprobe_multi.ref_ctr_offsets); 3262 ucookies = u64_to_user_ptr(attr->link_create.uprobe_multi.cookies); 3263 3264 name = strndup_user(upath, PATH_MAX); 3265 if (IS_ERR(name)) { 3266 err = PTR_ERR(name); 3267 return err; 3268 } 3269 3270 err = kern_path(name, LOOKUP_FOLLOW, &path); 3271 kfree(name); 3272 if (err) 3273 return err; 3274 3275 if (!d_is_reg(path.dentry)) { 3276 err = -EBADF; 3277 goto error_path_put; 3278 } 3279 3280 if (pid) { 3281 rcu_read_lock(); 3282 task = get_pid_task(find_vpid(pid), PIDTYPE_TGID); 3283 rcu_read_unlock(); 3284 if (!task) { 3285 err = -ESRCH; 3286 goto error_path_put; 3287 } 3288 } 3289 3290 err = -ENOMEM; 3291 3292 link = kzalloc_obj(*link); 3293 uprobes = kvzalloc_objs(*uprobes, cnt); 3294 3295 if (!uprobes || !link) 3296 goto error_free; 3297 3298 for (i = 0; i < cnt; i++) { 3299 if (__get_user(uprobes[i].offset, uoffsets + i)) { 3300 err = -EFAULT; 3301 goto error_free; 3302 } 3303 if (uprobes[i].offset < 0) { 3304 err = -EINVAL; 3305 goto error_free; 3306 } 3307 if (uref_ctr_offsets && __get_user(uprobes[i].ref_ctr_offset, uref_ctr_offsets + i)) { 3308 err = -EFAULT; 3309 goto error_free; 3310 } 3311 if (ucookies && __get_user(uprobes[i].cookie, ucookies + i)) { 3312 err = -EFAULT; 3313 goto error_free; 3314 } 3315 3316 uprobes[i].link = link; 3317 3318 if (!(flags & BPF_F_UPROBE_MULTI_RETURN)) 3319 uprobes[i].consumer.handler = uprobe_multi_link_handler; 3320 if (flags & BPF_F_UPROBE_MULTI_RETURN || is_uprobe_session(prog)) 3321 uprobes[i].consumer.ret_handler = uprobe_multi_link_ret_handler; 3322 if (is_uprobe_session(prog)) 3323 uprobes[i].session = true; 3324 if (pid) 3325 uprobes[i].consumer.filter = uprobe_multi_link_filter; 3326 } 3327 3328 link->cnt = cnt; 3329 link->uprobes = uprobes; 3330 link->path = path; 3331 link->task = task; 3332 link->link.flags = flags; 3333 3334 bpf_link_init(&link->link, BPF_LINK_TYPE_UPROBE_MULTI, 3335 &bpf_uprobe_multi_link_lops, prog, attr->link_create.attach_type); 3336 3337 for (i = 0; i < cnt; i++) { 3338 uprobes[i].uprobe = uprobe_register(d_real_inode(link->path.dentry), 3339 uprobes[i].offset, 3340 uprobes[i].ref_ctr_offset, 3341 &uprobes[i].consumer); 3342 if (IS_ERR(uprobes[i].uprobe)) { 3343 err = PTR_ERR(uprobes[i].uprobe); 3344 link->cnt = i; 3345 goto error_unregister; 3346 } 3347 } 3348 3349 err = bpf_link_prime(&link->link, &link_primer); 3350 if (err) 3351 goto error_unregister; 3352 3353 return bpf_link_settle(&link_primer); 3354 3355 error_unregister: 3356 bpf_uprobe_unregister(uprobes, link->cnt); 3357 3358 error_free: 3359 kvfree(uprobes); 3360 kfree(link); 3361 if (task) 3362 put_task_struct(task); 3363 error_path_put: 3364 path_put(&path); 3365 return err; 3366 } 3367 #else /* !CONFIG_UPROBES */ 3368 int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) 3369 { 3370 return -EOPNOTSUPP; 3371 } 3372 static u64 bpf_uprobe_multi_cookie(struct bpf_run_ctx *ctx) 3373 { 3374 return 0; 3375 } 3376 static u64 bpf_uprobe_multi_entry_ip(struct bpf_run_ctx *ctx) 3377 { 3378 return 0; 3379 } 3380 #endif /* CONFIG_UPROBES */ 3381 3382 __bpf_kfunc_start_defs(); 3383 3384 __bpf_kfunc bool bpf_session_is_return(void *ctx) 3385 { 3386 struct bpf_session_run_ctx *session_ctx; 3387 3388 session_ctx = container_of(current->bpf_ctx, struct bpf_session_run_ctx, run_ctx); 3389 return session_ctx->is_return; 3390 } 3391 3392 __bpf_kfunc __u64 *bpf_session_cookie(void *ctx) 3393 { 3394 struct bpf_session_run_ctx *session_ctx; 3395 3396 session_ctx = container_of(current->bpf_ctx, struct bpf_session_run_ctx, run_ctx); 3397 return session_ctx->data; 3398 } 3399 3400 __bpf_kfunc_end_defs(); 3401 3402 BTF_KFUNCS_START(session_kfunc_set_ids) 3403 BTF_ID_FLAGS(func, bpf_session_is_return) 3404 BTF_ID_FLAGS(func, bpf_session_cookie) 3405 BTF_KFUNCS_END(session_kfunc_set_ids) 3406 3407 static int bpf_session_filter(const struct bpf_prog *prog, u32 kfunc_id) 3408 { 3409 if (!btf_id_set8_contains(&session_kfunc_set_ids, kfunc_id)) 3410 return 0; 3411 3412 if (!is_kprobe_session(prog) && !is_uprobe_session(prog) && !is_trace_fsession(prog)) 3413 return -EACCES; 3414 3415 return 0; 3416 } 3417 3418 static const struct btf_kfunc_id_set bpf_session_kfunc_set = { 3419 .owner = THIS_MODULE, 3420 .set = &session_kfunc_set_ids, 3421 .filter = bpf_session_filter, 3422 }; 3423 3424 static int __init bpf_trace_kfuncs_init(void) 3425 { 3426 int err = 0; 3427 3428 err = err ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_KPROBE, &bpf_session_kfunc_set); 3429 err = err ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &bpf_session_kfunc_set); 3430 3431 return err; 3432 } 3433 3434 late_initcall(bpf_trace_kfuncs_init); 3435 3436 typedef int (*copy_fn_t)(void *dst, const void *src, u32 size, struct task_struct *tsk); 3437 3438 /* 3439 * The __always_inline is to make sure the compiler doesn't 3440 * generate indirect calls into callbacks, which is expensive, 3441 * on some kernel configurations. This allows compiler to put 3442 * direct calls into all the specific callback implementations 3443 * (copy_user_data_sleepable, copy_user_data_nofault, and so on) 3444 */ 3445 static __always_inline int __bpf_dynptr_copy_str(const struct bpf_dynptr *dptr, u64 doff, u64 size, 3446 const void *unsafe_src, 3447 copy_fn_t str_copy_fn, 3448 struct task_struct *tsk) 3449 { 3450 const struct bpf_dynptr_kern *dst; 3451 u64 chunk_sz, off; 3452 void *dst_slice; 3453 int cnt, err; 3454 char buf[256]; 3455 3456 dst_slice = bpf_dynptr_slice_rdwr(dptr, doff, NULL, size); 3457 if (likely(dst_slice)) 3458 return str_copy_fn(dst_slice, unsafe_src, size, tsk); 3459 3460 dst = (struct bpf_dynptr_kern *)dptr; 3461 if (bpf_dynptr_check_off_len(dst, doff, size)) 3462 return -E2BIG; 3463 3464 for (off = 0; off < size; off += chunk_sz - 1) { 3465 chunk_sz = min_t(u64, sizeof(buf), size - off); 3466 /* Expect str_copy_fn to return count of copied bytes, including 3467 * zero terminator. Next iteration increment off by chunk_sz - 1 to 3468 * overwrite NUL. 3469 */ 3470 cnt = str_copy_fn(buf, unsafe_src + off, chunk_sz, tsk); 3471 if (cnt < 0) 3472 return cnt; 3473 err = __bpf_dynptr_write(dst, doff + off, buf, cnt, 0); 3474 if (err) 3475 return err; 3476 if (cnt < chunk_sz || chunk_sz == 1) /* we are done */ 3477 return off + cnt; 3478 } 3479 return off; 3480 } 3481 3482 static __always_inline int __bpf_dynptr_copy(const struct bpf_dynptr *dptr, u64 doff, 3483 u64 size, const void *unsafe_src, 3484 copy_fn_t copy_fn, struct task_struct *tsk) 3485 { 3486 const struct bpf_dynptr_kern *dst; 3487 void *dst_slice; 3488 char buf[256]; 3489 u64 off, chunk_sz; 3490 int err; 3491 3492 dst_slice = bpf_dynptr_slice_rdwr(dptr, doff, NULL, size); 3493 if (likely(dst_slice)) 3494 return copy_fn(dst_slice, unsafe_src, size, tsk); 3495 3496 dst = (struct bpf_dynptr_kern *)dptr; 3497 if (bpf_dynptr_check_off_len(dst, doff, size)) 3498 return -E2BIG; 3499 3500 for (off = 0; off < size; off += chunk_sz) { 3501 chunk_sz = min_t(u64, sizeof(buf), size - off); 3502 err = copy_fn(buf, unsafe_src + off, chunk_sz, tsk); 3503 if (err) 3504 return err; 3505 err = __bpf_dynptr_write(dst, doff + off, buf, chunk_sz, 0); 3506 if (err) 3507 return err; 3508 } 3509 return 0; 3510 } 3511 3512 static __always_inline int copy_user_data_nofault(void *dst, const void *unsafe_src, 3513 u32 size, struct task_struct *tsk) 3514 { 3515 return copy_from_user_nofault(dst, (const void __user *)unsafe_src, size); 3516 } 3517 3518 static __always_inline int copy_user_data_sleepable(void *dst, const void *unsafe_src, 3519 u32 size, struct task_struct *tsk) 3520 { 3521 int ret; 3522 3523 if (!tsk) { /* Read from the current task */ 3524 ret = copy_from_user(dst, (const void __user *)unsafe_src, size); 3525 if (ret) 3526 return -EFAULT; 3527 return 0; 3528 } 3529 3530 ret = access_process_vm(tsk, (unsigned long)unsafe_src, dst, size, 0); 3531 if (ret != size) 3532 return -EFAULT; 3533 return 0; 3534 } 3535 3536 static __always_inline int copy_kernel_data_nofault(void *dst, const void *unsafe_src, 3537 u32 size, struct task_struct *tsk) 3538 { 3539 return copy_from_kernel_nofault(dst, unsafe_src, size); 3540 } 3541 3542 static __always_inline int copy_user_str_nofault(void *dst, const void *unsafe_src, 3543 u32 size, struct task_struct *tsk) 3544 { 3545 return strncpy_from_user_nofault(dst, (const void __user *)unsafe_src, size); 3546 } 3547 3548 static __always_inline int copy_user_str_sleepable(void *dst, const void *unsafe_src, 3549 u32 size, struct task_struct *tsk) 3550 { 3551 int ret; 3552 3553 if (unlikely(size == 0)) 3554 return 0; 3555 3556 if (tsk) { 3557 ret = copy_remote_vm_str(tsk, (unsigned long)unsafe_src, dst, size, 0); 3558 } else { 3559 ret = strncpy_from_user(dst, (const void __user *)unsafe_src, size - 1); 3560 /* strncpy_from_user does not guarantee NUL termination */ 3561 if (ret >= 0) 3562 ((char *)dst)[ret] = '\0'; 3563 } 3564 3565 if (ret < 0) 3566 return ret; 3567 return ret + 1; 3568 } 3569 3570 static __always_inline int copy_kernel_str_nofault(void *dst, const void *unsafe_src, 3571 u32 size, struct task_struct *tsk) 3572 { 3573 return strncpy_from_kernel_nofault(dst, unsafe_src, size); 3574 } 3575 3576 __bpf_kfunc_start_defs(); 3577 3578 __bpf_kfunc int bpf_send_signal_task(struct task_struct *task, int sig, enum pid_type type, 3579 u64 value) 3580 { 3581 if (type != PIDTYPE_PID && type != PIDTYPE_TGID) 3582 return -EINVAL; 3583 3584 return bpf_send_signal_common(sig, type, task, value); 3585 } 3586 3587 __bpf_kfunc int bpf_probe_read_user_dynptr(const struct bpf_dynptr *dptr, u64 off, 3588 u64 size, const void __user *unsafe_ptr__ign) 3589 { 3590 return __bpf_dynptr_copy(dptr, off, size, (const void __force *)unsafe_ptr__ign, 3591 copy_user_data_nofault, NULL); 3592 } 3593 3594 __bpf_kfunc int bpf_probe_read_kernel_dynptr(const struct bpf_dynptr *dptr, u64 off, 3595 u64 size, const void *unsafe_ptr__ign) 3596 { 3597 return __bpf_dynptr_copy(dptr, off, size, unsafe_ptr__ign, 3598 copy_kernel_data_nofault, NULL); 3599 } 3600 3601 __bpf_kfunc int bpf_probe_read_user_str_dynptr(const struct bpf_dynptr *dptr, u64 off, 3602 u64 size, const void __user *unsafe_ptr__ign) 3603 { 3604 return __bpf_dynptr_copy_str(dptr, off, size, (const void __force *)unsafe_ptr__ign, 3605 copy_user_str_nofault, NULL); 3606 } 3607 3608 __bpf_kfunc int bpf_probe_read_kernel_str_dynptr(const struct bpf_dynptr *dptr, u64 off, 3609 u64 size, const void *unsafe_ptr__ign) 3610 { 3611 return __bpf_dynptr_copy_str(dptr, off, size, unsafe_ptr__ign, 3612 copy_kernel_str_nofault, NULL); 3613 } 3614 3615 __bpf_kfunc int bpf_copy_from_user_dynptr(const struct bpf_dynptr *dptr, u64 off, 3616 u64 size, const void __user *unsafe_ptr__ign) 3617 { 3618 return __bpf_dynptr_copy(dptr, off, size, (const void __force *)unsafe_ptr__ign, 3619 copy_user_data_sleepable, NULL); 3620 } 3621 3622 __bpf_kfunc int bpf_copy_from_user_str_dynptr(const struct bpf_dynptr *dptr, u64 off, 3623 u64 size, const void __user *unsafe_ptr__ign) 3624 { 3625 return __bpf_dynptr_copy_str(dptr, off, size, (const void __force *)unsafe_ptr__ign, 3626 copy_user_str_sleepable, NULL); 3627 } 3628 3629 __bpf_kfunc int bpf_copy_from_user_task_dynptr(const struct bpf_dynptr *dptr, u64 off, 3630 u64 size, const void __user *unsafe_ptr__ign, 3631 struct task_struct *tsk) 3632 { 3633 return __bpf_dynptr_copy(dptr, off, size, (const void __force *)unsafe_ptr__ign, 3634 copy_user_data_sleepable, tsk); 3635 } 3636 3637 __bpf_kfunc int bpf_copy_from_user_task_str_dynptr(const struct bpf_dynptr *dptr, u64 off, 3638 u64 size, const void __user *unsafe_ptr__ign, 3639 struct task_struct *tsk) 3640 { 3641 return __bpf_dynptr_copy_str(dptr, off, size, (const void __force *)unsafe_ptr__ign, 3642 copy_user_str_sleepable, tsk); 3643 } 3644 3645 __bpf_kfunc_end_defs(); 3646 3647 #if defined(CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS) && \ 3648 defined(CONFIG_HAVE_SINGLE_FTRACE_DIRECT_OPS) 3649 3650 static void bpf_tracing_multi_link_release(struct bpf_link *link) 3651 { 3652 struct bpf_tracing_multi_link *tr_link = 3653 container_of(link, struct bpf_tracing_multi_link, link); 3654 3655 WARN_ON_ONCE(bpf_trampoline_multi_detach(link->prog, tr_link)); 3656 } 3657 3658 static void bpf_tracing_multi_link_dealloc(struct bpf_link *link) 3659 { 3660 struct bpf_tracing_multi_link *tr_link = 3661 container_of(link, struct bpf_tracing_multi_link, link); 3662 3663 kvfree(tr_link->fexits); 3664 kvfree(tr_link->cookies); 3665 kvfree(tr_link); 3666 } 3667 3668 #ifdef CONFIG_PROC_FS 3669 static void bpf_tracing_multi_show_fdinfo(const struct bpf_link *link, 3670 struct seq_file *seq) 3671 { 3672 struct bpf_tracing_multi_link *tr_link = 3673 container_of(link, struct bpf_tracing_multi_link, link); 3674 bool has_cookies = !!tr_link->cookies; 3675 3676 seq_printf(seq, "attach_type:\t%u\n", tr_link->link.attach_type); 3677 seq_printf(seq, "cnt:\t%u\n", tr_link->nodes_cnt); 3678 3679 seq_printf(seq, "%s\t %s\t %s\t %s\n", "obj-id", "btf-id", "cookie", "func"); 3680 for (int i = 0; i < tr_link->nodes_cnt; i++) { 3681 struct bpf_tracing_multi_node *mnode = &tr_link->nodes[i]; 3682 u32 btf_id, obj_id; 3683 3684 bpf_trampoline_unpack_key(mnode->trampoline->key, &obj_id, &btf_id); 3685 seq_printf(seq, "%u\t %u\t %llu\t %pS\n", 3686 obj_id, btf_id, 3687 has_cookies ? tr_link->cookies[i] : 0, 3688 (void *) mnode->trampoline->ip); 3689 3690 cond_resched(); 3691 } 3692 } 3693 #endif 3694 3695 static const struct bpf_link_ops bpf_tracing_multi_link_lops = { 3696 .release = bpf_tracing_multi_link_release, 3697 .dealloc_deferred = bpf_tracing_multi_link_dealloc, 3698 #ifdef CONFIG_PROC_FS 3699 .show_fdinfo = bpf_tracing_multi_show_fdinfo, 3700 #endif 3701 }; 3702 3703 static int ids_cmp_r(const void *pa, const void *pb, const void *priv __maybe_unused) 3704 { 3705 u32 a = *(u32 *) pa; 3706 u32 b = *(u32 *) pb; 3707 3708 return (a > b) - (a < b); 3709 } 3710 3711 static void ids_swap_r(void *a, void *b, int size __maybe_unused, 3712 const void *priv __maybe_unused) 3713 { 3714 u64 *cookie_a, *cookie_b, *cookies; 3715 u32 *id_a = a, *id_b = b, *ids; 3716 void **data = (void **) priv; 3717 3718 ids = data[0]; 3719 cookies = data[1]; 3720 3721 if (cookies) { 3722 cookie_a = cookies + (id_a - ids); 3723 cookie_b = cookies + (id_b - ids); 3724 swap(*cookie_a, *cookie_b); 3725 } 3726 swap(*id_a, *id_b); 3727 } 3728 3729 static int check_dup_ids(u32 *ids, u64 *cookies, u32 cnt) 3730 { 3731 void *data[2] = { ids, cookies }; 3732 int err = 0; 3733 3734 /* 3735 * Sort ids array (together with cookies array if defined) 3736 * and check it for duplicates. The ids and cookies arrays 3737 * are left sorted. 3738 */ 3739 sort_r_nonatomic(ids, cnt, sizeof(ids[0]), ids_cmp_r, ids_swap_r, data); 3740 3741 for (int i = 1; i < cnt; i++) { 3742 if (ids[i] == ids[i - 1]) { 3743 err = -EINVAL; 3744 break; 3745 } 3746 } 3747 return err; 3748 } 3749 3750 int bpf_tracing_multi_attach(struct bpf_prog *prog, const union bpf_attr *attr) 3751 { 3752 struct bpf_tracing_multi_link *link = NULL; 3753 struct bpf_tramp_node *fexits = NULL; 3754 struct bpf_link_primer link_primer; 3755 u32 cnt, *ids = NULL; 3756 u64 __user *ucookies; 3757 u64 *cookies = NULL; 3758 u32 __user *uids; 3759 int err; 3760 3761 uids = u64_to_user_ptr(attr->link_create.tracing_multi.ids); 3762 cnt = attr->link_create.tracing_multi.cnt; 3763 3764 if (!cnt || !uids) 3765 return -EINVAL; 3766 if (cnt > MAX_TRACING_MULTI_CNT) 3767 return -E2BIG; 3768 if (attr->link_create.flags || attr->link_create.target_fd) 3769 return -EINVAL; 3770 3771 ids = kvmalloc_objs(*ids, cnt); 3772 if (!ids) 3773 return -ENOMEM; 3774 3775 if (copy_from_user(ids, uids, cnt * sizeof(*ids))) { 3776 err = -EFAULT; 3777 goto error; 3778 } 3779 3780 ucookies = u64_to_user_ptr(attr->link_create.tracing_multi.cookies); 3781 if (ucookies) { 3782 cookies = kvmalloc_objs(*cookies, cnt); 3783 if (!cookies) { 3784 err = -ENOMEM; 3785 goto error; 3786 } 3787 if (copy_from_user(cookies, ucookies, cnt * sizeof(*cookies))) { 3788 err = -EFAULT; 3789 goto error; 3790 } 3791 } 3792 3793 err = check_dup_ids(ids, cookies, cnt); 3794 if (err) 3795 goto error; 3796 3797 if (prog->expected_attach_type == BPF_TRACE_FSESSION_MULTI) { 3798 fexits = kvmalloc_objs(*fexits, cnt); 3799 if (!fexits) { 3800 err = -ENOMEM; 3801 goto error; 3802 } 3803 } 3804 3805 link = kvzalloc_flex(*link, nodes, cnt); 3806 if (!link) { 3807 err = -ENOMEM; 3808 goto error; 3809 } 3810 3811 bpf_link_init(&link->link, BPF_LINK_TYPE_TRACING_MULTI, 3812 &bpf_tracing_multi_link_lops, prog, prog->expected_attach_type); 3813 3814 err = bpf_link_prime(&link->link, &link_primer); 3815 if (err) 3816 goto error; 3817 3818 link->nodes_cnt = cnt; 3819 link->cookies = cookies; 3820 link->fexits = fexits; 3821 3822 err = bpf_trampoline_multi_attach(prog, ids, link); 3823 kvfree(ids); 3824 if (err) { 3825 bpf_link_cleanup(&link_primer); 3826 return err; 3827 } 3828 return bpf_link_settle(&link_primer); 3829 3830 error: 3831 kvfree(fexits); 3832 kvfree(cookies); 3833 kvfree(ids); 3834 kvfree(link); 3835 return err; 3836 } 3837 3838 #else 3839 3840 int bpf_tracing_multi_attach(struct bpf_prog *prog, const union bpf_attr *attr) 3841 { 3842 return -EOPNOTSUPP; 3843 } 3844 3845 #endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS && CONFIG_HAVE_SINGLE_FTRACE_DIRECT_OPS */ 3846