1 // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) 2 3 /* 4 * Common eBPF ELF object loading operations. 5 * 6 * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org> 7 * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com> 8 * Copyright (C) 2015 Huawei Inc. 9 * Copyright (C) 2017 Nicira, Inc. 10 * Copyright (C) 2019 Isovalent, Inc. 11 */ 12 13 #ifndef _GNU_SOURCE 14 #define _GNU_SOURCE 15 #endif 16 #include <stdlib.h> 17 #include <stdio.h> 18 #include <stdarg.h> 19 #include <libgen.h> 20 #include <inttypes.h> 21 #include <limits.h> 22 #include <string.h> 23 #include <unistd.h> 24 #include <endian.h> 25 #include <fcntl.h> 26 #include <errno.h> 27 #include <ctype.h> 28 #include <asm/unistd.h> 29 #include <linux/err.h> 30 #include <linux/kernel.h> 31 #include <linux/bpf.h> 32 #include <linux/btf.h> 33 #include <linux/filter.h> 34 #include <linux/limits.h> 35 #include <linux/perf_event.h> 36 #include <linux/ring_buffer.h> 37 #include <sys/epoll.h> 38 #include <sys/ioctl.h> 39 #include <sys/mman.h> 40 #include <sys/stat.h> 41 #include <sys/types.h> 42 #include <sys/vfs.h> 43 #include <sys/utsname.h> 44 #include <sys/resource.h> 45 #include <libelf.h> 46 #include <gelf.h> 47 #include <zlib.h> 48 49 #include "libbpf.h" 50 #include "bpf.h" 51 #include "btf.h" 52 #include "str_error.h" 53 #include "libbpf_internal.h" 54 #include "hashmap.h" 55 #include "bpf_gen_internal.h" 56 #include "zip.h" 57 58 #ifndef BPF_FS_MAGIC 59 #define BPF_FS_MAGIC 0xcafe4a11 60 #endif 61 62 #define BPF_FS_DEFAULT_PATH "/sys/fs/bpf" 63 64 #define BPF_INSN_SZ (sizeof(struct bpf_insn)) 65 66 /* vsprintf() in __base_pr() uses nonliteral format string. It may break 67 * compilation if user enables corresponding warning. Disable it explicitly. 68 */ 69 #pragma GCC diagnostic ignored "-Wformat-nonliteral" 70 71 #define __printf(a, b) __attribute__((format(printf, a, b))) 72 73 static struct bpf_map *bpf_object__add_map(struct bpf_object *obj); 74 static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog); 75 76 static const char * const attach_type_name[] = { 77 [BPF_CGROUP_INET_INGRESS] = "cgroup_inet_ingress", 78 [BPF_CGROUP_INET_EGRESS] = "cgroup_inet_egress", 79 [BPF_CGROUP_INET_SOCK_CREATE] = "cgroup_inet_sock_create", 80 [BPF_CGROUP_INET_SOCK_RELEASE] = "cgroup_inet_sock_release", 81 [BPF_CGROUP_SOCK_OPS] = "cgroup_sock_ops", 82 [BPF_CGROUP_DEVICE] = "cgroup_device", 83 [BPF_CGROUP_INET4_BIND] = "cgroup_inet4_bind", 84 [BPF_CGROUP_INET6_BIND] = "cgroup_inet6_bind", 85 [BPF_CGROUP_INET4_CONNECT] = "cgroup_inet4_connect", 86 [BPF_CGROUP_INET6_CONNECT] = "cgroup_inet6_connect", 87 [BPF_CGROUP_UNIX_CONNECT] = "cgroup_unix_connect", 88 [BPF_CGROUP_INET4_POST_BIND] = "cgroup_inet4_post_bind", 89 [BPF_CGROUP_INET6_POST_BIND] = "cgroup_inet6_post_bind", 90 [BPF_CGROUP_INET4_GETPEERNAME] = "cgroup_inet4_getpeername", 91 [BPF_CGROUP_INET6_GETPEERNAME] = "cgroup_inet6_getpeername", 92 [BPF_CGROUP_UNIX_GETPEERNAME] = "cgroup_unix_getpeername", 93 [BPF_CGROUP_INET4_GETSOCKNAME] = "cgroup_inet4_getsockname", 94 [BPF_CGROUP_INET6_GETSOCKNAME] = "cgroup_inet6_getsockname", 95 [BPF_CGROUP_UNIX_GETSOCKNAME] = "cgroup_unix_getsockname", 96 [BPF_CGROUP_UDP4_SENDMSG] = "cgroup_udp4_sendmsg", 97 [BPF_CGROUP_UDP6_SENDMSG] = "cgroup_udp6_sendmsg", 98 [BPF_CGROUP_UNIX_SENDMSG] = "cgroup_unix_sendmsg", 99 [BPF_CGROUP_SYSCTL] = "cgroup_sysctl", 100 [BPF_CGROUP_UDP4_RECVMSG] = "cgroup_udp4_recvmsg", 101 [BPF_CGROUP_UDP6_RECVMSG] = "cgroup_udp6_recvmsg", 102 [BPF_CGROUP_UNIX_RECVMSG] = "cgroup_unix_recvmsg", 103 [BPF_CGROUP_GETSOCKOPT] = "cgroup_getsockopt", 104 [BPF_CGROUP_SETSOCKOPT] = "cgroup_setsockopt", 105 [BPF_SK_SKB_STREAM_PARSER] = "sk_skb_stream_parser", 106 [BPF_SK_SKB_STREAM_VERDICT] = "sk_skb_stream_verdict", 107 [BPF_SK_SKB_VERDICT] = "sk_skb_verdict", 108 [BPF_SK_MSG_VERDICT] = "sk_msg_verdict", 109 [BPF_LIRC_MODE2] = "lirc_mode2", 110 [BPF_FLOW_DISSECTOR] = "flow_dissector", 111 [BPF_TRACE_RAW_TP] = "trace_raw_tp", 112 [BPF_TRACE_FENTRY] = "trace_fentry", 113 [BPF_TRACE_FEXIT] = "trace_fexit", 114 [BPF_MODIFY_RETURN] = "modify_return", 115 [BPF_LSM_MAC] = "lsm_mac", 116 [BPF_LSM_CGROUP] = "lsm_cgroup", 117 [BPF_SK_LOOKUP] = "sk_lookup", 118 [BPF_TRACE_ITER] = "trace_iter", 119 [BPF_XDP_DEVMAP] = "xdp_devmap", 120 [BPF_XDP_CPUMAP] = "xdp_cpumap", 121 [BPF_XDP] = "xdp", 122 [BPF_SK_REUSEPORT_SELECT] = "sk_reuseport_select", 123 [BPF_SK_REUSEPORT_SELECT_OR_MIGRATE] = "sk_reuseport_select_or_migrate", 124 [BPF_PERF_EVENT] = "perf_event", 125 [BPF_TRACE_KPROBE_MULTI] = "trace_kprobe_multi", 126 [BPF_STRUCT_OPS] = "struct_ops", 127 [BPF_NETFILTER] = "netfilter", 128 [BPF_TCX_INGRESS] = "tcx_ingress", 129 [BPF_TCX_EGRESS] = "tcx_egress", 130 [BPF_TRACE_UPROBE_MULTI] = "trace_uprobe_multi", 131 [BPF_NETKIT_PRIMARY] = "netkit_primary", 132 [BPF_NETKIT_PEER] = "netkit_peer", 133 }; 134 135 static const char * const link_type_name[] = { 136 [BPF_LINK_TYPE_UNSPEC] = "unspec", 137 [BPF_LINK_TYPE_RAW_TRACEPOINT] = "raw_tracepoint", 138 [BPF_LINK_TYPE_TRACING] = "tracing", 139 [BPF_LINK_TYPE_CGROUP] = "cgroup", 140 [BPF_LINK_TYPE_ITER] = "iter", 141 [BPF_LINK_TYPE_NETNS] = "netns", 142 [BPF_LINK_TYPE_XDP] = "xdp", 143 [BPF_LINK_TYPE_PERF_EVENT] = "perf_event", 144 [BPF_LINK_TYPE_KPROBE_MULTI] = "kprobe_multi", 145 [BPF_LINK_TYPE_STRUCT_OPS] = "struct_ops", 146 [BPF_LINK_TYPE_NETFILTER] = "netfilter", 147 [BPF_LINK_TYPE_TCX] = "tcx", 148 [BPF_LINK_TYPE_UPROBE_MULTI] = "uprobe_multi", 149 [BPF_LINK_TYPE_NETKIT] = "netkit", 150 }; 151 152 static const char * const map_type_name[] = { 153 [BPF_MAP_TYPE_UNSPEC] = "unspec", 154 [BPF_MAP_TYPE_HASH] = "hash", 155 [BPF_MAP_TYPE_ARRAY] = "array", 156 [BPF_MAP_TYPE_PROG_ARRAY] = "prog_array", 157 [BPF_MAP_TYPE_PERF_EVENT_ARRAY] = "perf_event_array", 158 [BPF_MAP_TYPE_PERCPU_HASH] = "percpu_hash", 159 [BPF_MAP_TYPE_PERCPU_ARRAY] = "percpu_array", 160 [BPF_MAP_TYPE_STACK_TRACE] = "stack_trace", 161 [BPF_MAP_TYPE_CGROUP_ARRAY] = "cgroup_array", 162 [BPF_MAP_TYPE_LRU_HASH] = "lru_hash", 163 [BPF_MAP_TYPE_LRU_PERCPU_HASH] = "lru_percpu_hash", 164 [BPF_MAP_TYPE_LPM_TRIE] = "lpm_trie", 165 [BPF_MAP_TYPE_ARRAY_OF_MAPS] = "array_of_maps", 166 [BPF_MAP_TYPE_HASH_OF_MAPS] = "hash_of_maps", 167 [BPF_MAP_TYPE_DEVMAP] = "devmap", 168 [BPF_MAP_TYPE_DEVMAP_HASH] = "devmap_hash", 169 [BPF_MAP_TYPE_SOCKMAP] = "sockmap", 170 [BPF_MAP_TYPE_CPUMAP] = "cpumap", 171 [BPF_MAP_TYPE_XSKMAP] = "xskmap", 172 [BPF_MAP_TYPE_SOCKHASH] = "sockhash", 173 [BPF_MAP_TYPE_CGROUP_STORAGE] = "cgroup_storage", 174 [BPF_MAP_TYPE_REUSEPORT_SOCKARRAY] = "reuseport_sockarray", 175 [BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE] = "percpu_cgroup_storage", 176 [BPF_MAP_TYPE_QUEUE] = "queue", 177 [BPF_MAP_TYPE_STACK] = "stack", 178 [BPF_MAP_TYPE_SK_STORAGE] = "sk_storage", 179 [BPF_MAP_TYPE_STRUCT_OPS] = "struct_ops", 180 [BPF_MAP_TYPE_RINGBUF] = "ringbuf", 181 [BPF_MAP_TYPE_INODE_STORAGE] = "inode_storage", 182 [BPF_MAP_TYPE_TASK_STORAGE] = "task_storage", 183 [BPF_MAP_TYPE_BLOOM_FILTER] = "bloom_filter", 184 [BPF_MAP_TYPE_USER_RINGBUF] = "user_ringbuf", 185 [BPF_MAP_TYPE_CGRP_STORAGE] = "cgrp_storage", 186 }; 187 188 static const char * const prog_type_name[] = { 189 [BPF_PROG_TYPE_UNSPEC] = "unspec", 190 [BPF_PROG_TYPE_SOCKET_FILTER] = "socket_filter", 191 [BPF_PROG_TYPE_KPROBE] = "kprobe", 192 [BPF_PROG_TYPE_SCHED_CLS] = "sched_cls", 193 [BPF_PROG_TYPE_SCHED_ACT] = "sched_act", 194 [BPF_PROG_TYPE_TRACEPOINT] = "tracepoint", 195 [BPF_PROG_TYPE_XDP] = "xdp", 196 [BPF_PROG_TYPE_PERF_EVENT] = "perf_event", 197 [BPF_PROG_TYPE_CGROUP_SKB] = "cgroup_skb", 198 [BPF_PROG_TYPE_CGROUP_SOCK] = "cgroup_sock", 199 [BPF_PROG_TYPE_LWT_IN] = "lwt_in", 200 [BPF_PROG_TYPE_LWT_OUT] = "lwt_out", 201 [BPF_PROG_TYPE_LWT_XMIT] = "lwt_xmit", 202 [BPF_PROG_TYPE_SOCK_OPS] = "sock_ops", 203 [BPF_PROG_TYPE_SK_SKB] = "sk_skb", 204 [BPF_PROG_TYPE_CGROUP_DEVICE] = "cgroup_device", 205 [BPF_PROG_TYPE_SK_MSG] = "sk_msg", 206 [BPF_PROG_TYPE_RAW_TRACEPOINT] = "raw_tracepoint", 207 [BPF_PROG_TYPE_CGROUP_SOCK_ADDR] = "cgroup_sock_addr", 208 [BPF_PROG_TYPE_LWT_SEG6LOCAL] = "lwt_seg6local", 209 [BPF_PROG_TYPE_LIRC_MODE2] = "lirc_mode2", 210 [BPF_PROG_TYPE_SK_REUSEPORT] = "sk_reuseport", 211 [BPF_PROG_TYPE_FLOW_DISSECTOR] = "flow_dissector", 212 [BPF_PROG_TYPE_CGROUP_SYSCTL] = "cgroup_sysctl", 213 [BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE] = "raw_tracepoint_writable", 214 [BPF_PROG_TYPE_CGROUP_SOCKOPT] = "cgroup_sockopt", 215 [BPF_PROG_TYPE_TRACING] = "tracing", 216 [BPF_PROG_TYPE_STRUCT_OPS] = "struct_ops", 217 [BPF_PROG_TYPE_EXT] = "ext", 218 [BPF_PROG_TYPE_LSM] = "lsm", 219 [BPF_PROG_TYPE_SK_LOOKUP] = "sk_lookup", 220 [BPF_PROG_TYPE_SYSCALL] = "syscall", 221 [BPF_PROG_TYPE_NETFILTER] = "netfilter", 222 }; 223 224 static int __base_pr(enum libbpf_print_level level, const char *format, 225 va_list args) 226 { 227 if (level == LIBBPF_DEBUG) 228 return 0; 229 230 return vfprintf(stderr, format, args); 231 } 232 233 static libbpf_print_fn_t __libbpf_pr = __base_pr; 234 235 libbpf_print_fn_t libbpf_set_print(libbpf_print_fn_t fn) 236 { 237 libbpf_print_fn_t old_print_fn; 238 239 old_print_fn = __atomic_exchange_n(&__libbpf_pr, fn, __ATOMIC_RELAXED); 240 241 return old_print_fn; 242 } 243 244 __printf(2, 3) 245 void libbpf_print(enum libbpf_print_level level, const char *format, ...) 246 { 247 va_list args; 248 int old_errno; 249 libbpf_print_fn_t print_fn; 250 251 print_fn = __atomic_load_n(&__libbpf_pr, __ATOMIC_RELAXED); 252 if (!print_fn) 253 return; 254 255 old_errno = errno; 256 257 va_start(args, format); 258 __libbpf_pr(level, format, args); 259 va_end(args); 260 261 errno = old_errno; 262 } 263 264 static void pr_perm_msg(int err) 265 { 266 struct rlimit limit; 267 char buf[100]; 268 269 if (err != -EPERM || geteuid() != 0) 270 return; 271 272 err = getrlimit(RLIMIT_MEMLOCK, &limit); 273 if (err) 274 return; 275 276 if (limit.rlim_cur == RLIM_INFINITY) 277 return; 278 279 if (limit.rlim_cur < 1024) 280 snprintf(buf, sizeof(buf), "%zu bytes", (size_t)limit.rlim_cur); 281 else if (limit.rlim_cur < 1024*1024) 282 snprintf(buf, sizeof(buf), "%.1f KiB", (double)limit.rlim_cur / 1024); 283 else 284 snprintf(buf, sizeof(buf), "%.1f MiB", (double)limit.rlim_cur / (1024*1024)); 285 286 pr_warn("permission error while running as root; try raising 'ulimit -l'? current value: %s\n", 287 buf); 288 } 289 290 #define STRERR_BUFSIZE 128 291 292 /* Copied from tools/perf/util/util.h */ 293 #ifndef zfree 294 # define zfree(ptr) ({ free(*ptr); *ptr = NULL; }) 295 #endif 296 297 #ifndef zclose 298 # define zclose(fd) ({ \ 299 int ___err = 0; \ 300 if ((fd) >= 0) \ 301 ___err = close((fd)); \ 302 fd = -1; \ 303 ___err; }) 304 #endif 305 306 static inline __u64 ptr_to_u64(const void *ptr) 307 { 308 return (__u64) (unsigned long) ptr; 309 } 310 311 int libbpf_set_strict_mode(enum libbpf_strict_mode mode) 312 { 313 /* as of v1.0 libbpf_set_strict_mode() is a no-op */ 314 return 0; 315 } 316 317 __u32 libbpf_major_version(void) 318 { 319 return LIBBPF_MAJOR_VERSION; 320 } 321 322 __u32 libbpf_minor_version(void) 323 { 324 return LIBBPF_MINOR_VERSION; 325 } 326 327 const char *libbpf_version_string(void) 328 { 329 #define __S(X) #X 330 #define _S(X) __S(X) 331 return "v" _S(LIBBPF_MAJOR_VERSION) "." _S(LIBBPF_MINOR_VERSION); 332 #undef _S 333 #undef __S 334 } 335 336 enum reloc_type { 337 RELO_LD64, 338 RELO_CALL, 339 RELO_DATA, 340 RELO_EXTERN_LD64, 341 RELO_EXTERN_CALL, 342 RELO_SUBPROG_ADDR, 343 RELO_CORE, 344 }; 345 346 struct reloc_desc { 347 enum reloc_type type; 348 int insn_idx; 349 union { 350 const struct bpf_core_relo *core_relo; /* used when type == RELO_CORE */ 351 struct { 352 int map_idx; 353 int sym_off; 354 int ext_idx; 355 }; 356 }; 357 }; 358 359 /* stored as sec_def->cookie for all libbpf-supported SEC()s */ 360 enum sec_def_flags { 361 SEC_NONE = 0, 362 /* expected_attach_type is optional, if kernel doesn't support that */ 363 SEC_EXP_ATTACH_OPT = 1, 364 /* legacy, only used by libbpf_get_type_names() and 365 * libbpf_attach_type_by_name(), not used by libbpf itself at all. 366 * This used to be associated with cgroup (and few other) BPF programs 367 * that were attachable through BPF_PROG_ATTACH command. Pretty 368 * meaningless nowadays, though. 369 */ 370 SEC_ATTACHABLE = 2, 371 SEC_ATTACHABLE_OPT = SEC_ATTACHABLE | SEC_EXP_ATTACH_OPT, 372 /* attachment target is specified through BTF ID in either kernel or 373 * other BPF program's BTF object 374 */ 375 SEC_ATTACH_BTF = 4, 376 /* BPF program type allows sleeping/blocking in kernel */ 377 SEC_SLEEPABLE = 8, 378 /* BPF program support non-linear XDP buffer */ 379 SEC_XDP_FRAGS = 16, 380 /* Setup proper attach type for usdt probes. */ 381 SEC_USDT = 32, 382 }; 383 384 struct bpf_sec_def { 385 char *sec; 386 enum bpf_prog_type prog_type; 387 enum bpf_attach_type expected_attach_type; 388 long cookie; 389 int handler_id; 390 391 libbpf_prog_setup_fn_t prog_setup_fn; 392 libbpf_prog_prepare_load_fn_t prog_prepare_load_fn; 393 libbpf_prog_attach_fn_t prog_attach_fn; 394 }; 395 396 /* 397 * bpf_prog should be a better name but it has been used in 398 * linux/filter.h. 399 */ 400 struct bpf_program { 401 char *name; 402 char *sec_name; 403 size_t sec_idx; 404 const struct bpf_sec_def *sec_def; 405 /* this program's instruction offset (in number of instructions) 406 * within its containing ELF section 407 */ 408 size_t sec_insn_off; 409 /* number of original instructions in ELF section belonging to this 410 * program, not taking into account subprogram instructions possible 411 * appended later during relocation 412 */ 413 size_t sec_insn_cnt; 414 /* Offset (in number of instructions) of the start of instruction 415 * belonging to this BPF program within its containing main BPF 416 * program. For the entry-point (main) BPF program, this is always 417 * zero. For a sub-program, this gets reset before each of main BPF 418 * programs are processed and relocated and is used to determined 419 * whether sub-program was already appended to the main program, and 420 * if yes, at which instruction offset. 421 */ 422 size_t sub_insn_off; 423 424 /* instructions that belong to BPF program; insns[0] is located at 425 * sec_insn_off instruction within its ELF section in ELF file, so 426 * when mapping ELF file instruction index to the local instruction, 427 * one needs to subtract sec_insn_off; and vice versa. 428 */ 429 struct bpf_insn *insns; 430 /* actual number of instruction in this BPF program's image; for 431 * entry-point BPF programs this includes the size of main program 432 * itself plus all the used sub-programs, appended at the end 433 */ 434 size_t insns_cnt; 435 436 struct reloc_desc *reloc_desc; 437 int nr_reloc; 438 439 /* BPF verifier log settings */ 440 char *log_buf; 441 size_t log_size; 442 __u32 log_level; 443 444 struct bpf_object *obj; 445 446 int fd; 447 bool autoload; 448 bool autoattach; 449 bool sym_global; 450 bool mark_btf_static; 451 enum bpf_prog_type type; 452 enum bpf_attach_type expected_attach_type; 453 int exception_cb_idx; 454 455 int prog_ifindex; 456 __u32 attach_btf_obj_fd; 457 __u32 attach_btf_id; 458 __u32 attach_prog_fd; 459 460 void *func_info; 461 __u32 func_info_rec_size; 462 __u32 func_info_cnt; 463 464 void *line_info; 465 __u32 line_info_rec_size; 466 __u32 line_info_cnt; 467 __u32 prog_flags; 468 }; 469 470 struct bpf_struct_ops { 471 const char *tname; 472 const struct btf_type *type; 473 struct bpf_program **progs; 474 __u32 *kern_func_off; 475 /* e.g. struct tcp_congestion_ops in bpf_prog's btf format */ 476 void *data; 477 /* e.g. struct bpf_struct_ops_tcp_congestion_ops in 478 * btf_vmlinux's format. 479 * struct bpf_struct_ops_tcp_congestion_ops { 480 * [... some other kernel fields ...] 481 * struct tcp_congestion_ops data; 482 * } 483 * kern_vdata-size == sizeof(struct bpf_struct_ops_tcp_congestion_ops) 484 * bpf_map__init_kern_struct_ops() will populate the "kern_vdata" 485 * from "data". 486 */ 487 void *kern_vdata; 488 __u32 type_id; 489 }; 490 491 #define DATA_SEC ".data" 492 #define BSS_SEC ".bss" 493 #define RODATA_SEC ".rodata" 494 #define KCONFIG_SEC ".kconfig" 495 #define KSYMS_SEC ".ksyms" 496 #define STRUCT_OPS_SEC ".struct_ops" 497 #define STRUCT_OPS_LINK_SEC ".struct_ops.link" 498 499 enum libbpf_map_type { 500 LIBBPF_MAP_UNSPEC, 501 LIBBPF_MAP_DATA, 502 LIBBPF_MAP_BSS, 503 LIBBPF_MAP_RODATA, 504 LIBBPF_MAP_KCONFIG, 505 }; 506 507 struct bpf_map_def { 508 unsigned int type; 509 unsigned int key_size; 510 unsigned int value_size; 511 unsigned int max_entries; 512 unsigned int map_flags; 513 }; 514 515 struct bpf_map { 516 struct bpf_object *obj; 517 char *name; 518 /* real_name is defined for special internal maps (.rodata*, 519 * .data*, .bss, .kconfig) and preserves their original ELF section 520 * name. This is important to be able to find corresponding BTF 521 * DATASEC information. 522 */ 523 char *real_name; 524 int fd; 525 int sec_idx; 526 size_t sec_offset; 527 int map_ifindex; 528 int inner_map_fd; 529 struct bpf_map_def def; 530 __u32 numa_node; 531 __u32 btf_var_idx; 532 __u32 btf_key_type_id; 533 __u32 btf_value_type_id; 534 __u32 btf_vmlinux_value_type_id; 535 enum libbpf_map_type libbpf_type; 536 void *mmaped; 537 struct bpf_struct_ops *st_ops; 538 struct bpf_map *inner_map; 539 void **init_slots; 540 int init_slots_sz; 541 char *pin_path; 542 bool pinned; 543 bool reused; 544 bool autocreate; 545 __u64 map_extra; 546 }; 547 548 enum extern_type { 549 EXT_UNKNOWN, 550 EXT_KCFG, 551 EXT_KSYM, 552 }; 553 554 enum kcfg_type { 555 KCFG_UNKNOWN, 556 KCFG_CHAR, 557 KCFG_BOOL, 558 KCFG_INT, 559 KCFG_TRISTATE, 560 KCFG_CHAR_ARR, 561 }; 562 563 struct extern_desc { 564 enum extern_type type; 565 int sym_idx; 566 int btf_id; 567 int sec_btf_id; 568 const char *name; 569 char *essent_name; 570 bool is_set; 571 bool is_weak; 572 union { 573 struct { 574 enum kcfg_type type; 575 int sz; 576 int align; 577 int data_off; 578 bool is_signed; 579 } kcfg; 580 struct { 581 unsigned long long addr; 582 583 /* target btf_id of the corresponding kernel var. */ 584 int kernel_btf_obj_fd; 585 int kernel_btf_id; 586 587 /* local btf_id of the ksym extern's type. */ 588 __u32 type_id; 589 /* BTF fd index to be patched in for insn->off, this is 590 * 0 for vmlinux BTF, index in obj->fd_array for module 591 * BTF 592 */ 593 __s16 btf_fd_idx; 594 } ksym; 595 }; 596 }; 597 598 struct module_btf { 599 struct btf *btf; 600 char *name; 601 __u32 id; 602 int fd; 603 int fd_array_idx; 604 }; 605 606 enum sec_type { 607 SEC_UNUSED = 0, 608 SEC_RELO, 609 SEC_BSS, 610 SEC_DATA, 611 SEC_RODATA, 612 }; 613 614 struct elf_sec_desc { 615 enum sec_type sec_type; 616 Elf64_Shdr *shdr; 617 Elf_Data *data; 618 }; 619 620 struct elf_state { 621 int fd; 622 const void *obj_buf; 623 size_t obj_buf_sz; 624 Elf *elf; 625 Elf64_Ehdr *ehdr; 626 Elf_Data *symbols; 627 Elf_Data *st_ops_data; 628 Elf_Data *st_ops_link_data; 629 size_t shstrndx; /* section index for section name strings */ 630 size_t strtabidx; 631 struct elf_sec_desc *secs; 632 size_t sec_cnt; 633 int btf_maps_shndx; 634 __u32 btf_maps_sec_btf_id; 635 int text_shndx; 636 int symbols_shndx; 637 int st_ops_shndx; 638 int st_ops_link_shndx; 639 }; 640 641 struct usdt_manager; 642 643 struct bpf_object { 644 char name[BPF_OBJ_NAME_LEN]; 645 char license[64]; 646 __u32 kern_version; 647 648 struct bpf_program *programs; 649 size_t nr_programs; 650 struct bpf_map *maps; 651 size_t nr_maps; 652 size_t maps_cap; 653 654 char *kconfig; 655 struct extern_desc *externs; 656 int nr_extern; 657 int kconfig_map_idx; 658 659 bool loaded; 660 bool has_subcalls; 661 bool has_rodata; 662 663 struct bpf_gen *gen_loader; 664 665 /* Information when doing ELF related work. Only valid if efile.elf is not NULL */ 666 struct elf_state efile; 667 668 struct btf *btf; 669 struct btf_ext *btf_ext; 670 671 /* Parse and load BTF vmlinux if any of the programs in the object need 672 * it at load time. 673 */ 674 struct btf *btf_vmlinux; 675 /* Path to the custom BTF to be used for BPF CO-RE relocations as an 676 * override for vmlinux BTF. 677 */ 678 char *btf_custom_path; 679 /* vmlinux BTF override for CO-RE relocations */ 680 struct btf *btf_vmlinux_override; 681 /* Lazily initialized kernel module BTFs */ 682 struct module_btf *btf_modules; 683 bool btf_modules_loaded; 684 size_t btf_module_cnt; 685 size_t btf_module_cap; 686 687 /* optional log settings passed to BPF_BTF_LOAD and BPF_PROG_LOAD commands */ 688 char *log_buf; 689 size_t log_size; 690 __u32 log_level; 691 692 int *fd_array; 693 size_t fd_array_cap; 694 size_t fd_array_cnt; 695 696 struct usdt_manager *usdt_man; 697 698 struct kern_feature_cache *feat_cache; 699 char *token_path; 700 int token_fd; 701 702 char path[]; 703 }; 704 705 static const char *elf_sym_str(const struct bpf_object *obj, size_t off); 706 static const char *elf_sec_str(const struct bpf_object *obj, size_t off); 707 static Elf_Scn *elf_sec_by_idx(const struct bpf_object *obj, size_t idx); 708 static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name); 709 static Elf64_Shdr *elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn); 710 static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn); 711 static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn); 712 static Elf64_Sym *elf_sym_by_idx(const struct bpf_object *obj, size_t idx); 713 static Elf64_Rel *elf_rel_by_idx(Elf_Data *data, size_t idx); 714 715 void bpf_program__unload(struct bpf_program *prog) 716 { 717 if (!prog) 718 return; 719 720 zclose(prog->fd); 721 722 zfree(&prog->func_info); 723 zfree(&prog->line_info); 724 } 725 726 static void bpf_program__exit(struct bpf_program *prog) 727 { 728 if (!prog) 729 return; 730 731 bpf_program__unload(prog); 732 zfree(&prog->name); 733 zfree(&prog->sec_name); 734 zfree(&prog->insns); 735 zfree(&prog->reloc_desc); 736 737 prog->nr_reloc = 0; 738 prog->insns_cnt = 0; 739 prog->sec_idx = -1; 740 } 741 742 static bool insn_is_subprog_call(const struct bpf_insn *insn) 743 { 744 return BPF_CLASS(insn->code) == BPF_JMP && 745 BPF_OP(insn->code) == BPF_CALL && 746 BPF_SRC(insn->code) == BPF_K && 747 insn->src_reg == BPF_PSEUDO_CALL && 748 insn->dst_reg == 0 && 749 insn->off == 0; 750 } 751 752 static bool is_call_insn(const struct bpf_insn *insn) 753 { 754 return insn->code == (BPF_JMP | BPF_CALL); 755 } 756 757 static bool insn_is_pseudo_func(struct bpf_insn *insn) 758 { 759 return is_ldimm64_insn(insn) && insn->src_reg == BPF_PSEUDO_FUNC; 760 } 761 762 static int 763 bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog, 764 const char *name, size_t sec_idx, const char *sec_name, 765 size_t sec_off, void *insn_data, size_t insn_data_sz) 766 { 767 if (insn_data_sz == 0 || insn_data_sz % BPF_INSN_SZ || sec_off % BPF_INSN_SZ) { 768 pr_warn("sec '%s': corrupted program '%s', offset %zu, size %zu\n", 769 sec_name, name, sec_off, insn_data_sz); 770 return -EINVAL; 771 } 772 773 memset(prog, 0, sizeof(*prog)); 774 prog->obj = obj; 775 776 prog->sec_idx = sec_idx; 777 prog->sec_insn_off = sec_off / BPF_INSN_SZ; 778 prog->sec_insn_cnt = insn_data_sz / BPF_INSN_SZ; 779 /* insns_cnt can later be increased by appending used subprograms */ 780 prog->insns_cnt = prog->sec_insn_cnt; 781 782 prog->type = BPF_PROG_TYPE_UNSPEC; 783 prog->fd = -1; 784 prog->exception_cb_idx = -1; 785 786 /* libbpf's convention for SEC("?abc...") is that it's just like 787 * SEC("abc...") but the corresponding bpf_program starts out with 788 * autoload set to false. 789 */ 790 if (sec_name[0] == '?') { 791 prog->autoload = false; 792 /* from now on forget there was ? in section name */ 793 sec_name++; 794 } else { 795 prog->autoload = true; 796 } 797 798 prog->autoattach = true; 799 800 /* inherit object's log_level */ 801 prog->log_level = obj->log_level; 802 803 prog->sec_name = strdup(sec_name); 804 if (!prog->sec_name) 805 goto errout; 806 807 prog->name = strdup(name); 808 if (!prog->name) 809 goto errout; 810 811 prog->insns = malloc(insn_data_sz); 812 if (!prog->insns) 813 goto errout; 814 memcpy(prog->insns, insn_data, insn_data_sz); 815 816 return 0; 817 errout: 818 pr_warn("sec '%s': failed to allocate memory for prog '%s'\n", sec_name, name); 819 bpf_program__exit(prog); 820 return -ENOMEM; 821 } 822 823 static int 824 bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data, 825 const char *sec_name, int sec_idx) 826 { 827 Elf_Data *symbols = obj->efile.symbols; 828 struct bpf_program *prog, *progs; 829 void *data = sec_data->d_buf; 830 size_t sec_sz = sec_data->d_size, sec_off, prog_sz, nr_syms; 831 int nr_progs, err, i; 832 const char *name; 833 Elf64_Sym *sym; 834 835 progs = obj->programs; 836 nr_progs = obj->nr_programs; 837 nr_syms = symbols->d_size / sizeof(Elf64_Sym); 838 839 for (i = 0; i < nr_syms; i++) { 840 sym = elf_sym_by_idx(obj, i); 841 842 if (sym->st_shndx != sec_idx) 843 continue; 844 if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC) 845 continue; 846 847 prog_sz = sym->st_size; 848 sec_off = sym->st_value; 849 850 name = elf_sym_str(obj, sym->st_name); 851 if (!name) { 852 pr_warn("sec '%s': failed to get symbol name for offset %zu\n", 853 sec_name, sec_off); 854 return -LIBBPF_ERRNO__FORMAT; 855 } 856 857 if (sec_off + prog_sz > sec_sz) { 858 pr_warn("sec '%s': program at offset %zu crosses section boundary\n", 859 sec_name, sec_off); 860 return -LIBBPF_ERRNO__FORMAT; 861 } 862 863 if (sec_idx != obj->efile.text_shndx && ELF64_ST_BIND(sym->st_info) == STB_LOCAL) { 864 pr_warn("sec '%s': program '%s' is static and not supported\n", sec_name, name); 865 return -ENOTSUP; 866 } 867 868 pr_debug("sec '%s': found program '%s' at insn offset %zu (%zu bytes), code size %zu insns (%zu bytes)\n", 869 sec_name, name, sec_off / BPF_INSN_SZ, sec_off, prog_sz / BPF_INSN_SZ, prog_sz); 870 871 progs = libbpf_reallocarray(progs, nr_progs + 1, sizeof(*progs)); 872 if (!progs) { 873 /* 874 * In this case the original obj->programs 875 * is still valid, so don't need special treat for 876 * bpf_close_object(). 877 */ 878 pr_warn("sec '%s': failed to alloc memory for new program '%s'\n", 879 sec_name, name); 880 return -ENOMEM; 881 } 882 obj->programs = progs; 883 884 prog = &progs[nr_progs]; 885 886 err = bpf_object__init_prog(obj, prog, name, sec_idx, sec_name, 887 sec_off, data + sec_off, prog_sz); 888 if (err) 889 return err; 890 891 if (ELF64_ST_BIND(sym->st_info) != STB_LOCAL) 892 prog->sym_global = true; 893 894 /* if function is a global/weak symbol, but has restricted 895 * (STV_HIDDEN or STV_INTERNAL) visibility, mark its BTF FUNC 896 * as static to enable more permissive BPF verification mode 897 * with more outside context available to BPF verifier 898 */ 899 if (prog->sym_global && (ELF64_ST_VISIBILITY(sym->st_other) == STV_HIDDEN 900 || ELF64_ST_VISIBILITY(sym->st_other) == STV_INTERNAL)) 901 prog->mark_btf_static = true; 902 903 nr_progs++; 904 obj->nr_programs = nr_progs; 905 } 906 907 return 0; 908 } 909 910 static const struct btf_member * 911 find_member_by_offset(const struct btf_type *t, __u32 bit_offset) 912 { 913 struct btf_member *m; 914 int i; 915 916 for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) { 917 if (btf_member_bit_offset(t, i) == bit_offset) 918 return m; 919 } 920 921 return NULL; 922 } 923 924 static const struct btf_member * 925 find_member_by_name(const struct btf *btf, const struct btf_type *t, 926 const char *name) 927 { 928 struct btf_member *m; 929 int i; 930 931 for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) { 932 if (!strcmp(btf__name_by_offset(btf, m->name_off), name)) 933 return m; 934 } 935 936 return NULL; 937 } 938 939 #define STRUCT_OPS_VALUE_PREFIX "bpf_struct_ops_" 940 static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix, 941 const char *name, __u32 kind); 942 943 static int 944 find_struct_ops_kern_types(const struct btf *btf, const char *tname, 945 const struct btf_type **type, __u32 *type_id, 946 const struct btf_type **vtype, __u32 *vtype_id, 947 const struct btf_member **data_member) 948 { 949 const struct btf_type *kern_type, *kern_vtype; 950 const struct btf_member *kern_data_member; 951 __s32 kern_vtype_id, kern_type_id; 952 __u32 i; 953 954 kern_type_id = btf__find_by_name_kind(btf, tname, BTF_KIND_STRUCT); 955 if (kern_type_id < 0) { 956 pr_warn("struct_ops init_kern: struct %s is not found in kernel BTF\n", 957 tname); 958 return kern_type_id; 959 } 960 kern_type = btf__type_by_id(btf, kern_type_id); 961 962 /* Find the corresponding "map_value" type that will be used 963 * in map_update(BPF_MAP_TYPE_STRUCT_OPS). For example, 964 * find "struct bpf_struct_ops_tcp_congestion_ops" from the 965 * btf_vmlinux. 966 */ 967 kern_vtype_id = find_btf_by_prefix_kind(btf, STRUCT_OPS_VALUE_PREFIX, 968 tname, BTF_KIND_STRUCT); 969 if (kern_vtype_id < 0) { 970 pr_warn("struct_ops init_kern: struct %s%s is not found in kernel BTF\n", 971 STRUCT_OPS_VALUE_PREFIX, tname); 972 return kern_vtype_id; 973 } 974 kern_vtype = btf__type_by_id(btf, kern_vtype_id); 975 976 /* Find "struct tcp_congestion_ops" from 977 * struct bpf_struct_ops_tcp_congestion_ops { 978 * [ ... ] 979 * struct tcp_congestion_ops data; 980 * } 981 */ 982 kern_data_member = btf_members(kern_vtype); 983 for (i = 0; i < btf_vlen(kern_vtype); i++, kern_data_member++) { 984 if (kern_data_member->type == kern_type_id) 985 break; 986 } 987 if (i == btf_vlen(kern_vtype)) { 988 pr_warn("struct_ops init_kern: struct %s data is not found in struct %s%s\n", 989 tname, STRUCT_OPS_VALUE_PREFIX, tname); 990 return -EINVAL; 991 } 992 993 *type = kern_type; 994 *type_id = kern_type_id; 995 *vtype = kern_vtype; 996 *vtype_id = kern_vtype_id; 997 *data_member = kern_data_member; 998 999 return 0; 1000 } 1001 1002 static bool bpf_map__is_struct_ops(const struct bpf_map *map) 1003 { 1004 return map->def.type == BPF_MAP_TYPE_STRUCT_OPS; 1005 } 1006 1007 /* Init the map's fields that depend on kern_btf */ 1008 static int bpf_map__init_kern_struct_ops(struct bpf_map *map, 1009 const struct btf *btf, 1010 const struct btf *kern_btf) 1011 { 1012 const struct btf_member *member, *kern_member, *kern_data_member; 1013 const struct btf_type *type, *kern_type, *kern_vtype; 1014 __u32 i, kern_type_id, kern_vtype_id, kern_data_off; 1015 struct bpf_struct_ops *st_ops; 1016 void *data, *kern_data; 1017 const char *tname; 1018 int err; 1019 1020 st_ops = map->st_ops; 1021 type = st_ops->type; 1022 tname = st_ops->tname; 1023 err = find_struct_ops_kern_types(kern_btf, tname, 1024 &kern_type, &kern_type_id, 1025 &kern_vtype, &kern_vtype_id, 1026 &kern_data_member); 1027 if (err) 1028 return err; 1029 1030 pr_debug("struct_ops init_kern %s: type_id:%u kern_type_id:%u kern_vtype_id:%u\n", 1031 map->name, st_ops->type_id, kern_type_id, kern_vtype_id); 1032 1033 map->def.value_size = kern_vtype->size; 1034 map->btf_vmlinux_value_type_id = kern_vtype_id; 1035 1036 st_ops->kern_vdata = calloc(1, kern_vtype->size); 1037 if (!st_ops->kern_vdata) 1038 return -ENOMEM; 1039 1040 data = st_ops->data; 1041 kern_data_off = kern_data_member->offset / 8; 1042 kern_data = st_ops->kern_vdata + kern_data_off; 1043 1044 member = btf_members(type); 1045 for (i = 0; i < btf_vlen(type); i++, member++) { 1046 const struct btf_type *mtype, *kern_mtype; 1047 __u32 mtype_id, kern_mtype_id; 1048 void *mdata, *kern_mdata; 1049 __s64 msize, kern_msize; 1050 __u32 moff, kern_moff; 1051 __u32 kern_member_idx; 1052 const char *mname; 1053 1054 mname = btf__name_by_offset(btf, member->name_off); 1055 kern_member = find_member_by_name(kern_btf, kern_type, mname); 1056 if (!kern_member) { 1057 pr_warn("struct_ops init_kern %s: Cannot find member %s in kernel BTF\n", 1058 map->name, mname); 1059 return -ENOTSUP; 1060 } 1061 1062 kern_member_idx = kern_member - btf_members(kern_type); 1063 if (btf_member_bitfield_size(type, i) || 1064 btf_member_bitfield_size(kern_type, kern_member_idx)) { 1065 pr_warn("struct_ops init_kern %s: bitfield %s is not supported\n", 1066 map->name, mname); 1067 return -ENOTSUP; 1068 } 1069 1070 moff = member->offset / 8; 1071 kern_moff = kern_member->offset / 8; 1072 1073 mdata = data + moff; 1074 kern_mdata = kern_data + kern_moff; 1075 1076 mtype = skip_mods_and_typedefs(btf, member->type, &mtype_id); 1077 kern_mtype = skip_mods_and_typedefs(kern_btf, kern_member->type, 1078 &kern_mtype_id); 1079 if (BTF_INFO_KIND(mtype->info) != 1080 BTF_INFO_KIND(kern_mtype->info)) { 1081 pr_warn("struct_ops init_kern %s: Unmatched member type %s %u != %u(kernel)\n", 1082 map->name, mname, BTF_INFO_KIND(mtype->info), 1083 BTF_INFO_KIND(kern_mtype->info)); 1084 return -ENOTSUP; 1085 } 1086 1087 if (btf_is_ptr(mtype)) { 1088 struct bpf_program *prog; 1089 1090 prog = st_ops->progs[i]; 1091 if (!prog) 1092 continue; 1093 1094 kern_mtype = skip_mods_and_typedefs(kern_btf, 1095 kern_mtype->type, 1096 &kern_mtype_id); 1097 1098 /* mtype->type must be a func_proto which was 1099 * guaranteed in bpf_object__collect_st_ops_relos(), 1100 * so only check kern_mtype for func_proto here. 1101 */ 1102 if (!btf_is_func_proto(kern_mtype)) { 1103 pr_warn("struct_ops init_kern %s: kernel member %s is not a func ptr\n", 1104 map->name, mname); 1105 return -ENOTSUP; 1106 } 1107 1108 prog->attach_btf_id = kern_type_id; 1109 prog->expected_attach_type = kern_member_idx; 1110 1111 st_ops->kern_func_off[i] = kern_data_off + kern_moff; 1112 1113 pr_debug("struct_ops init_kern %s: func ptr %s is set to prog %s from data(+%u) to kern_data(+%u)\n", 1114 map->name, mname, prog->name, moff, 1115 kern_moff); 1116 1117 continue; 1118 } 1119 1120 msize = btf__resolve_size(btf, mtype_id); 1121 kern_msize = btf__resolve_size(kern_btf, kern_mtype_id); 1122 if (msize < 0 || kern_msize < 0 || msize != kern_msize) { 1123 pr_warn("struct_ops init_kern %s: Error in size of member %s: %zd != %zd(kernel)\n", 1124 map->name, mname, (ssize_t)msize, 1125 (ssize_t)kern_msize); 1126 return -ENOTSUP; 1127 } 1128 1129 pr_debug("struct_ops init_kern %s: copy %s %u bytes from data(+%u) to kern_data(+%u)\n", 1130 map->name, mname, (unsigned int)msize, 1131 moff, kern_moff); 1132 memcpy(kern_mdata, mdata, msize); 1133 } 1134 1135 return 0; 1136 } 1137 1138 static int bpf_object__init_kern_struct_ops_maps(struct bpf_object *obj) 1139 { 1140 struct bpf_map *map; 1141 size_t i; 1142 int err; 1143 1144 for (i = 0; i < obj->nr_maps; i++) { 1145 map = &obj->maps[i]; 1146 1147 if (!bpf_map__is_struct_ops(map)) 1148 continue; 1149 1150 err = bpf_map__init_kern_struct_ops(map, obj->btf, 1151 obj->btf_vmlinux); 1152 if (err) 1153 return err; 1154 } 1155 1156 return 0; 1157 } 1158 1159 static int init_struct_ops_maps(struct bpf_object *obj, const char *sec_name, 1160 int shndx, Elf_Data *data, __u32 map_flags) 1161 { 1162 const struct btf_type *type, *datasec; 1163 const struct btf_var_secinfo *vsi; 1164 struct bpf_struct_ops *st_ops; 1165 const char *tname, *var_name; 1166 __s32 type_id, datasec_id; 1167 const struct btf *btf; 1168 struct bpf_map *map; 1169 __u32 i; 1170 1171 if (shndx == -1) 1172 return 0; 1173 1174 btf = obj->btf; 1175 datasec_id = btf__find_by_name_kind(btf, sec_name, 1176 BTF_KIND_DATASEC); 1177 if (datasec_id < 0) { 1178 pr_warn("struct_ops init: DATASEC %s not found\n", 1179 sec_name); 1180 return -EINVAL; 1181 } 1182 1183 datasec = btf__type_by_id(btf, datasec_id); 1184 vsi = btf_var_secinfos(datasec); 1185 for (i = 0; i < btf_vlen(datasec); i++, vsi++) { 1186 type = btf__type_by_id(obj->btf, vsi->type); 1187 var_name = btf__name_by_offset(obj->btf, type->name_off); 1188 1189 type_id = btf__resolve_type(obj->btf, vsi->type); 1190 if (type_id < 0) { 1191 pr_warn("struct_ops init: Cannot resolve var type_id %u in DATASEC %s\n", 1192 vsi->type, sec_name); 1193 return -EINVAL; 1194 } 1195 1196 type = btf__type_by_id(obj->btf, type_id); 1197 tname = btf__name_by_offset(obj->btf, type->name_off); 1198 if (!tname[0]) { 1199 pr_warn("struct_ops init: anonymous type is not supported\n"); 1200 return -ENOTSUP; 1201 } 1202 if (!btf_is_struct(type)) { 1203 pr_warn("struct_ops init: %s is not a struct\n", tname); 1204 return -EINVAL; 1205 } 1206 1207 map = bpf_object__add_map(obj); 1208 if (IS_ERR(map)) 1209 return PTR_ERR(map); 1210 1211 map->sec_idx = shndx; 1212 map->sec_offset = vsi->offset; 1213 map->name = strdup(var_name); 1214 if (!map->name) 1215 return -ENOMEM; 1216 1217 map->def.type = BPF_MAP_TYPE_STRUCT_OPS; 1218 map->def.key_size = sizeof(int); 1219 map->def.value_size = type->size; 1220 map->def.max_entries = 1; 1221 map->def.map_flags = map_flags; 1222 1223 map->st_ops = calloc(1, sizeof(*map->st_ops)); 1224 if (!map->st_ops) 1225 return -ENOMEM; 1226 st_ops = map->st_ops; 1227 st_ops->data = malloc(type->size); 1228 st_ops->progs = calloc(btf_vlen(type), sizeof(*st_ops->progs)); 1229 st_ops->kern_func_off = malloc(btf_vlen(type) * 1230 sizeof(*st_ops->kern_func_off)); 1231 if (!st_ops->data || !st_ops->progs || !st_ops->kern_func_off) 1232 return -ENOMEM; 1233 1234 if (vsi->offset + type->size > data->d_size) { 1235 pr_warn("struct_ops init: var %s is beyond the end of DATASEC %s\n", 1236 var_name, sec_name); 1237 return -EINVAL; 1238 } 1239 1240 memcpy(st_ops->data, 1241 data->d_buf + vsi->offset, 1242 type->size); 1243 st_ops->tname = tname; 1244 st_ops->type = type; 1245 st_ops->type_id = type_id; 1246 1247 pr_debug("struct_ops init: struct %s(type_id=%u) %s found at offset %u\n", 1248 tname, type_id, var_name, vsi->offset); 1249 } 1250 1251 return 0; 1252 } 1253 1254 static int bpf_object_init_struct_ops(struct bpf_object *obj) 1255 { 1256 int err; 1257 1258 err = init_struct_ops_maps(obj, STRUCT_OPS_SEC, obj->efile.st_ops_shndx, 1259 obj->efile.st_ops_data, 0); 1260 err = err ?: init_struct_ops_maps(obj, STRUCT_OPS_LINK_SEC, 1261 obj->efile.st_ops_link_shndx, 1262 obj->efile.st_ops_link_data, 1263 BPF_F_LINK); 1264 return err; 1265 } 1266 1267 static struct bpf_object *bpf_object__new(const char *path, 1268 const void *obj_buf, 1269 size_t obj_buf_sz, 1270 const char *obj_name) 1271 { 1272 struct bpf_object *obj; 1273 char *end; 1274 1275 obj = calloc(1, sizeof(struct bpf_object) + strlen(path) + 1); 1276 if (!obj) { 1277 pr_warn("alloc memory failed for %s\n", path); 1278 return ERR_PTR(-ENOMEM); 1279 } 1280 1281 strcpy(obj->path, path); 1282 if (obj_name) { 1283 libbpf_strlcpy(obj->name, obj_name, sizeof(obj->name)); 1284 } else { 1285 /* Using basename() GNU version which doesn't modify arg. */ 1286 libbpf_strlcpy(obj->name, basename((void *)path), sizeof(obj->name)); 1287 end = strchr(obj->name, '.'); 1288 if (end) 1289 *end = 0; 1290 } 1291 1292 obj->efile.fd = -1; 1293 /* 1294 * Caller of this function should also call 1295 * bpf_object__elf_finish() after data collection to return 1296 * obj_buf to user. If not, we should duplicate the buffer to 1297 * avoid user freeing them before elf finish. 1298 */ 1299 obj->efile.obj_buf = obj_buf; 1300 obj->efile.obj_buf_sz = obj_buf_sz; 1301 obj->efile.btf_maps_shndx = -1; 1302 obj->efile.st_ops_shndx = -1; 1303 obj->efile.st_ops_link_shndx = -1; 1304 obj->kconfig_map_idx = -1; 1305 1306 obj->kern_version = get_kernel_version(); 1307 obj->loaded = false; 1308 1309 return obj; 1310 } 1311 1312 static void bpf_object__elf_finish(struct bpf_object *obj) 1313 { 1314 if (!obj->efile.elf) 1315 return; 1316 1317 elf_end(obj->efile.elf); 1318 obj->efile.elf = NULL; 1319 obj->efile.symbols = NULL; 1320 obj->efile.st_ops_data = NULL; 1321 obj->efile.st_ops_link_data = NULL; 1322 1323 zfree(&obj->efile.secs); 1324 obj->efile.sec_cnt = 0; 1325 zclose(obj->efile.fd); 1326 obj->efile.obj_buf = NULL; 1327 obj->efile.obj_buf_sz = 0; 1328 } 1329 1330 static int bpf_object__elf_init(struct bpf_object *obj) 1331 { 1332 Elf64_Ehdr *ehdr; 1333 int err = 0; 1334 Elf *elf; 1335 1336 if (obj->efile.elf) { 1337 pr_warn("elf: init internal error\n"); 1338 return -LIBBPF_ERRNO__LIBELF; 1339 } 1340 1341 if (obj->efile.obj_buf_sz > 0) { 1342 /* obj_buf should have been validated by bpf_object__open_mem(). */ 1343 elf = elf_memory((char *)obj->efile.obj_buf, obj->efile.obj_buf_sz); 1344 } else { 1345 obj->efile.fd = open(obj->path, O_RDONLY | O_CLOEXEC); 1346 if (obj->efile.fd < 0) { 1347 char errmsg[STRERR_BUFSIZE], *cp; 1348 1349 err = -errno; 1350 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); 1351 pr_warn("elf: failed to open %s: %s\n", obj->path, cp); 1352 return err; 1353 } 1354 1355 elf = elf_begin(obj->efile.fd, ELF_C_READ_MMAP, NULL); 1356 } 1357 1358 if (!elf) { 1359 pr_warn("elf: failed to open %s as ELF file: %s\n", obj->path, elf_errmsg(-1)); 1360 err = -LIBBPF_ERRNO__LIBELF; 1361 goto errout; 1362 } 1363 1364 obj->efile.elf = elf; 1365 1366 if (elf_kind(elf) != ELF_K_ELF) { 1367 err = -LIBBPF_ERRNO__FORMAT; 1368 pr_warn("elf: '%s' is not a proper ELF object\n", obj->path); 1369 goto errout; 1370 } 1371 1372 if (gelf_getclass(elf) != ELFCLASS64) { 1373 err = -LIBBPF_ERRNO__FORMAT; 1374 pr_warn("elf: '%s' is not a 64-bit ELF object\n", obj->path); 1375 goto errout; 1376 } 1377 1378 obj->efile.ehdr = ehdr = elf64_getehdr(elf); 1379 if (!obj->efile.ehdr) { 1380 pr_warn("elf: failed to get ELF header from %s: %s\n", obj->path, elf_errmsg(-1)); 1381 err = -LIBBPF_ERRNO__FORMAT; 1382 goto errout; 1383 } 1384 1385 if (elf_getshdrstrndx(elf, &obj->efile.shstrndx)) { 1386 pr_warn("elf: failed to get section names section index for %s: %s\n", 1387 obj->path, elf_errmsg(-1)); 1388 err = -LIBBPF_ERRNO__FORMAT; 1389 goto errout; 1390 } 1391 1392 /* ELF is corrupted/truncated, avoid calling elf_strptr. */ 1393 if (!elf_rawdata(elf_getscn(elf, obj->efile.shstrndx), NULL)) { 1394 pr_warn("elf: failed to get section names strings from %s: %s\n", 1395 obj->path, elf_errmsg(-1)); 1396 err = -LIBBPF_ERRNO__FORMAT; 1397 goto errout; 1398 } 1399 1400 /* Old LLVM set e_machine to EM_NONE */ 1401 if (ehdr->e_type != ET_REL || (ehdr->e_machine && ehdr->e_machine != EM_BPF)) { 1402 pr_warn("elf: %s is not a valid eBPF object file\n", obj->path); 1403 err = -LIBBPF_ERRNO__FORMAT; 1404 goto errout; 1405 } 1406 1407 return 0; 1408 errout: 1409 bpf_object__elf_finish(obj); 1410 return err; 1411 } 1412 1413 static int bpf_object__check_endianness(struct bpf_object *obj) 1414 { 1415 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 1416 if (obj->efile.ehdr->e_ident[EI_DATA] == ELFDATA2LSB) 1417 return 0; 1418 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ 1419 if (obj->efile.ehdr->e_ident[EI_DATA] == ELFDATA2MSB) 1420 return 0; 1421 #else 1422 # error "Unrecognized __BYTE_ORDER__" 1423 #endif 1424 pr_warn("elf: endianness mismatch in %s.\n", obj->path); 1425 return -LIBBPF_ERRNO__ENDIAN; 1426 } 1427 1428 static int 1429 bpf_object__init_license(struct bpf_object *obj, void *data, size_t size) 1430 { 1431 if (!data) { 1432 pr_warn("invalid license section in %s\n", obj->path); 1433 return -LIBBPF_ERRNO__FORMAT; 1434 } 1435 /* libbpf_strlcpy() only copies first N - 1 bytes, so size + 1 won't 1436 * go over allowed ELF data section buffer 1437 */ 1438 libbpf_strlcpy(obj->license, data, min(size + 1, sizeof(obj->license))); 1439 pr_debug("license of %s is %s\n", obj->path, obj->license); 1440 return 0; 1441 } 1442 1443 static int 1444 bpf_object__init_kversion(struct bpf_object *obj, void *data, size_t size) 1445 { 1446 __u32 kver; 1447 1448 if (!data || size != sizeof(kver)) { 1449 pr_warn("invalid kver section in %s\n", obj->path); 1450 return -LIBBPF_ERRNO__FORMAT; 1451 } 1452 memcpy(&kver, data, sizeof(kver)); 1453 obj->kern_version = kver; 1454 pr_debug("kernel version of %s is %x\n", obj->path, obj->kern_version); 1455 return 0; 1456 } 1457 1458 static bool bpf_map_type__is_map_in_map(enum bpf_map_type type) 1459 { 1460 if (type == BPF_MAP_TYPE_ARRAY_OF_MAPS || 1461 type == BPF_MAP_TYPE_HASH_OF_MAPS) 1462 return true; 1463 return false; 1464 } 1465 1466 static int find_elf_sec_sz(const struct bpf_object *obj, const char *name, __u32 *size) 1467 { 1468 Elf_Data *data; 1469 Elf_Scn *scn; 1470 1471 if (!name) 1472 return -EINVAL; 1473 1474 scn = elf_sec_by_name(obj, name); 1475 data = elf_sec_data(obj, scn); 1476 if (data) { 1477 *size = data->d_size; 1478 return 0; /* found it */ 1479 } 1480 1481 return -ENOENT; 1482 } 1483 1484 static Elf64_Sym *find_elf_var_sym(const struct bpf_object *obj, const char *name) 1485 { 1486 Elf_Data *symbols = obj->efile.symbols; 1487 const char *sname; 1488 size_t si; 1489 1490 for (si = 0; si < symbols->d_size / sizeof(Elf64_Sym); si++) { 1491 Elf64_Sym *sym = elf_sym_by_idx(obj, si); 1492 1493 if (ELF64_ST_TYPE(sym->st_info) != STT_OBJECT) 1494 continue; 1495 1496 if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL && 1497 ELF64_ST_BIND(sym->st_info) != STB_WEAK) 1498 continue; 1499 1500 sname = elf_sym_str(obj, sym->st_name); 1501 if (!sname) { 1502 pr_warn("failed to get sym name string for var %s\n", name); 1503 return ERR_PTR(-EIO); 1504 } 1505 if (strcmp(name, sname) == 0) 1506 return sym; 1507 } 1508 1509 return ERR_PTR(-ENOENT); 1510 } 1511 1512 static struct bpf_map *bpf_object__add_map(struct bpf_object *obj) 1513 { 1514 struct bpf_map *map; 1515 int err; 1516 1517 err = libbpf_ensure_mem((void **)&obj->maps, &obj->maps_cap, 1518 sizeof(*obj->maps), obj->nr_maps + 1); 1519 if (err) 1520 return ERR_PTR(err); 1521 1522 map = &obj->maps[obj->nr_maps++]; 1523 map->obj = obj; 1524 map->fd = -1; 1525 map->inner_map_fd = -1; 1526 map->autocreate = true; 1527 1528 return map; 1529 } 1530 1531 static size_t bpf_map_mmap_sz(unsigned int value_sz, unsigned int max_entries) 1532 { 1533 const long page_sz = sysconf(_SC_PAGE_SIZE); 1534 size_t map_sz; 1535 1536 map_sz = (size_t)roundup(value_sz, 8) * max_entries; 1537 map_sz = roundup(map_sz, page_sz); 1538 return map_sz; 1539 } 1540 1541 static int bpf_map_mmap_resize(struct bpf_map *map, size_t old_sz, size_t new_sz) 1542 { 1543 void *mmaped; 1544 1545 if (!map->mmaped) 1546 return -EINVAL; 1547 1548 if (old_sz == new_sz) 1549 return 0; 1550 1551 mmaped = mmap(NULL, new_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); 1552 if (mmaped == MAP_FAILED) 1553 return -errno; 1554 1555 memcpy(mmaped, map->mmaped, min(old_sz, new_sz)); 1556 munmap(map->mmaped, old_sz); 1557 map->mmaped = mmaped; 1558 return 0; 1559 } 1560 1561 static char *internal_map_name(struct bpf_object *obj, const char *real_name) 1562 { 1563 char map_name[BPF_OBJ_NAME_LEN], *p; 1564 int pfx_len, sfx_len = max((size_t)7, strlen(real_name)); 1565 1566 /* This is one of the more confusing parts of libbpf for various 1567 * reasons, some of which are historical. The original idea for naming 1568 * internal names was to include as much of BPF object name prefix as 1569 * possible, so that it can be distinguished from similar internal 1570 * maps of a different BPF object. 1571 * As an example, let's say we have bpf_object named 'my_object_name' 1572 * and internal map corresponding to '.rodata' ELF section. The final 1573 * map name advertised to user and to the kernel will be 1574 * 'my_objec.rodata', taking first 8 characters of object name and 1575 * entire 7 characters of '.rodata'. 1576 * Somewhat confusingly, if internal map ELF section name is shorter 1577 * than 7 characters, e.g., '.bss', we still reserve 7 characters 1578 * for the suffix, even though we only have 4 actual characters, and 1579 * resulting map will be called 'my_objec.bss', not even using all 15 1580 * characters allowed by the kernel. Oh well, at least the truncated 1581 * object name is somewhat consistent in this case. But if the map 1582 * name is '.kconfig', we'll still have entirety of '.kconfig' added 1583 * (8 chars) and thus will be left with only first 7 characters of the 1584 * object name ('my_obje'). Happy guessing, user, that the final map 1585 * name will be "my_obje.kconfig". 1586 * Now, with libbpf starting to support arbitrarily named .rodata.* 1587 * and .data.* data sections, it's possible that ELF section name is 1588 * longer than allowed 15 chars, so we now need to be careful to take 1589 * only up to 15 first characters of ELF name, taking no BPF object 1590 * name characters at all. So '.rodata.abracadabra' will result in 1591 * '.rodata.abracad' kernel and user-visible name. 1592 * We need to keep this convoluted logic intact for .data, .bss and 1593 * .rodata maps, but for new custom .data.custom and .rodata.custom 1594 * maps we use their ELF names as is, not prepending bpf_object name 1595 * in front. We still need to truncate them to 15 characters for the 1596 * kernel. Full name can be recovered for such maps by using DATASEC 1597 * BTF type associated with such map's value type, though. 1598 */ 1599 if (sfx_len >= BPF_OBJ_NAME_LEN) 1600 sfx_len = BPF_OBJ_NAME_LEN - 1; 1601 1602 /* if there are two or more dots in map name, it's a custom dot map */ 1603 if (strchr(real_name + 1, '.') != NULL) 1604 pfx_len = 0; 1605 else 1606 pfx_len = min((size_t)BPF_OBJ_NAME_LEN - sfx_len - 1, strlen(obj->name)); 1607 1608 snprintf(map_name, sizeof(map_name), "%.*s%.*s", pfx_len, obj->name, 1609 sfx_len, real_name); 1610 1611 /* sanitise map name to characters allowed by kernel */ 1612 for (p = map_name; *p && p < map_name + sizeof(map_name); p++) 1613 if (!isalnum(*p) && *p != '_' && *p != '.') 1614 *p = '_'; 1615 1616 return strdup(map_name); 1617 } 1618 1619 static int 1620 map_fill_btf_type_info(struct bpf_object *obj, struct bpf_map *map); 1621 1622 /* Internal BPF map is mmap()'able only if at least one of corresponding 1623 * DATASEC's VARs are to be exposed through BPF skeleton. I.e., it's a GLOBAL 1624 * variable and it's not marked as __hidden (which turns it into, effectively, 1625 * a STATIC variable). 1626 */ 1627 static bool map_is_mmapable(struct bpf_object *obj, struct bpf_map *map) 1628 { 1629 const struct btf_type *t, *vt; 1630 struct btf_var_secinfo *vsi; 1631 int i, n; 1632 1633 if (!map->btf_value_type_id) 1634 return false; 1635 1636 t = btf__type_by_id(obj->btf, map->btf_value_type_id); 1637 if (!btf_is_datasec(t)) 1638 return false; 1639 1640 vsi = btf_var_secinfos(t); 1641 for (i = 0, n = btf_vlen(t); i < n; i++, vsi++) { 1642 vt = btf__type_by_id(obj->btf, vsi->type); 1643 if (!btf_is_var(vt)) 1644 continue; 1645 1646 if (btf_var(vt)->linkage != BTF_VAR_STATIC) 1647 return true; 1648 } 1649 1650 return false; 1651 } 1652 1653 static int 1654 bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, 1655 const char *real_name, int sec_idx, void *data, size_t data_sz) 1656 { 1657 struct bpf_map_def *def; 1658 struct bpf_map *map; 1659 size_t mmap_sz; 1660 int err; 1661 1662 map = bpf_object__add_map(obj); 1663 if (IS_ERR(map)) 1664 return PTR_ERR(map); 1665 1666 map->libbpf_type = type; 1667 map->sec_idx = sec_idx; 1668 map->sec_offset = 0; 1669 map->real_name = strdup(real_name); 1670 map->name = internal_map_name(obj, real_name); 1671 if (!map->real_name || !map->name) { 1672 zfree(&map->real_name); 1673 zfree(&map->name); 1674 return -ENOMEM; 1675 } 1676 1677 def = &map->def; 1678 def->type = BPF_MAP_TYPE_ARRAY; 1679 def->key_size = sizeof(int); 1680 def->value_size = data_sz; 1681 def->max_entries = 1; 1682 def->map_flags = type == LIBBPF_MAP_RODATA || type == LIBBPF_MAP_KCONFIG 1683 ? BPF_F_RDONLY_PROG : 0; 1684 1685 /* failures are fine because of maps like .rodata.str1.1 */ 1686 (void) map_fill_btf_type_info(obj, map); 1687 1688 if (map_is_mmapable(obj, map)) 1689 def->map_flags |= BPF_F_MMAPABLE; 1690 1691 pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n", 1692 map->name, map->sec_idx, map->sec_offset, def->map_flags); 1693 1694 mmap_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries); 1695 map->mmaped = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE, 1696 MAP_SHARED | MAP_ANONYMOUS, -1, 0); 1697 if (map->mmaped == MAP_FAILED) { 1698 err = -errno; 1699 map->mmaped = NULL; 1700 pr_warn("failed to alloc map '%s' content buffer: %d\n", 1701 map->name, err); 1702 zfree(&map->real_name); 1703 zfree(&map->name); 1704 return err; 1705 } 1706 1707 if (data) 1708 memcpy(map->mmaped, data, data_sz); 1709 1710 pr_debug("map %td is \"%s\"\n", map - obj->maps, map->name); 1711 return 0; 1712 } 1713 1714 static int bpf_object__init_global_data_maps(struct bpf_object *obj) 1715 { 1716 struct elf_sec_desc *sec_desc; 1717 const char *sec_name; 1718 int err = 0, sec_idx; 1719 1720 /* 1721 * Populate obj->maps with libbpf internal maps. 1722 */ 1723 for (sec_idx = 1; sec_idx < obj->efile.sec_cnt; sec_idx++) { 1724 sec_desc = &obj->efile.secs[sec_idx]; 1725 1726 /* Skip recognized sections with size 0. */ 1727 if (!sec_desc->data || sec_desc->data->d_size == 0) 1728 continue; 1729 1730 switch (sec_desc->sec_type) { 1731 case SEC_DATA: 1732 sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx)); 1733 err = bpf_object__init_internal_map(obj, LIBBPF_MAP_DATA, 1734 sec_name, sec_idx, 1735 sec_desc->data->d_buf, 1736 sec_desc->data->d_size); 1737 break; 1738 case SEC_RODATA: 1739 obj->has_rodata = true; 1740 sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx)); 1741 err = bpf_object__init_internal_map(obj, LIBBPF_MAP_RODATA, 1742 sec_name, sec_idx, 1743 sec_desc->data->d_buf, 1744 sec_desc->data->d_size); 1745 break; 1746 case SEC_BSS: 1747 sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx)); 1748 err = bpf_object__init_internal_map(obj, LIBBPF_MAP_BSS, 1749 sec_name, sec_idx, 1750 NULL, 1751 sec_desc->data->d_size); 1752 break; 1753 default: 1754 /* skip */ 1755 break; 1756 } 1757 if (err) 1758 return err; 1759 } 1760 return 0; 1761 } 1762 1763 1764 static struct extern_desc *find_extern_by_name(const struct bpf_object *obj, 1765 const void *name) 1766 { 1767 int i; 1768 1769 for (i = 0; i < obj->nr_extern; i++) { 1770 if (strcmp(obj->externs[i].name, name) == 0) 1771 return &obj->externs[i]; 1772 } 1773 return NULL; 1774 } 1775 1776 static int set_kcfg_value_tri(struct extern_desc *ext, void *ext_val, 1777 char value) 1778 { 1779 switch (ext->kcfg.type) { 1780 case KCFG_BOOL: 1781 if (value == 'm') { 1782 pr_warn("extern (kcfg) '%s': value '%c' implies tristate or char type\n", 1783 ext->name, value); 1784 return -EINVAL; 1785 } 1786 *(bool *)ext_val = value == 'y' ? true : false; 1787 break; 1788 case KCFG_TRISTATE: 1789 if (value == 'y') 1790 *(enum libbpf_tristate *)ext_val = TRI_YES; 1791 else if (value == 'm') 1792 *(enum libbpf_tristate *)ext_val = TRI_MODULE; 1793 else /* value == 'n' */ 1794 *(enum libbpf_tristate *)ext_val = TRI_NO; 1795 break; 1796 case KCFG_CHAR: 1797 *(char *)ext_val = value; 1798 break; 1799 case KCFG_UNKNOWN: 1800 case KCFG_INT: 1801 case KCFG_CHAR_ARR: 1802 default: 1803 pr_warn("extern (kcfg) '%s': value '%c' implies bool, tristate, or char type\n", 1804 ext->name, value); 1805 return -EINVAL; 1806 } 1807 ext->is_set = true; 1808 return 0; 1809 } 1810 1811 static int set_kcfg_value_str(struct extern_desc *ext, char *ext_val, 1812 const char *value) 1813 { 1814 size_t len; 1815 1816 if (ext->kcfg.type != KCFG_CHAR_ARR) { 1817 pr_warn("extern (kcfg) '%s': value '%s' implies char array type\n", 1818 ext->name, value); 1819 return -EINVAL; 1820 } 1821 1822 len = strlen(value); 1823 if (value[len - 1] != '"') { 1824 pr_warn("extern (kcfg) '%s': invalid string config '%s'\n", 1825 ext->name, value); 1826 return -EINVAL; 1827 } 1828 1829 /* strip quotes */ 1830 len -= 2; 1831 if (len >= ext->kcfg.sz) { 1832 pr_warn("extern (kcfg) '%s': long string '%s' of (%zu bytes) truncated to %d bytes\n", 1833 ext->name, value, len, ext->kcfg.sz - 1); 1834 len = ext->kcfg.sz - 1; 1835 } 1836 memcpy(ext_val, value + 1, len); 1837 ext_val[len] = '\0'; 1838 ext->is_set = true; 1839 return 0; 1840 } 1841 1842 static int parse_u64(const char *value, __u64 *res) 1843 { 1844 char *value_end; 1845 int err; 1846 1847 errno = 0; 1848 *res = strtoull(value, &value_end, 0); 1849 if (errno) { 1850 err = -errno; 1851 pr_warn("failed to parse '%s' as integer: %d\n", value, err); 1852 return err; 1853 } 1854 if (*value_end) { 1855 pr_warn("failed to parse '%s' as integer completely\n", value); 1856 return -EINVAL; 1857 } 1858 return 0; 1859 } 1860 1861 static bool is_kcfg_value_in_range(const struct extern_desc *ext, __u64 v) 1862 { 1863 int bit_sz = ext->kcfg.sz * 8; 1864 1865 if (ext->kcfg.sz == 8) 1866 return true; 1867 1868 /* Validate that value stored in u64 fits in integer of `ext->sz` 1869 * bytes size without any loss of information. If the target integer 1870 * is signed, we rely on the following limits of integer type of 1871 * Y bits and subsequent transformation: 1872 * 1873 * -2^(Y-1) <= X <= 2^(Y-1) - 1 1874 * 0 <= X + 2^(Y-1) <= 2^Y - 1 1875 * 0 <= X + 2^(Y-1) < 2^Y 1876 * 1877 * For unsigned target integer, check that all the (64 - Y) bits are 1878 * zero. 1879 */ 1880 if (ext->kcfg.is_signed) 1881 return v + (1ULL << (bit_sz - 1)) < (1ULL << bit_sz); 1882 else 1883 return (v >> bit_sz) == 0; 1884 } 1885 1886 static int set_kcfg_value_num(struct extern_desc *ext, void *ext_val, 1887 __u64 value) 1888 { 1889 if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR && 1890 ext->kcfg.type != KCFG_BOOL) { 1891 pr_warn("extern (kcfg) '%s': value '%llu' implies integer, char, or boolean type\n", 1892 ext->name, (unsigned long long)value); 1893 return -EINVAL; 1894 } 1895 if (ext->kcfg.type == KCFG_BOOL && value > 1) { 1896 pr_warn("extern (kcfg) '%s': value '%llu' isn't boolean compatible\n", 1897 ext->name, (unsigned long long)value); 1898 return -EINVAL; 1899 1900 } 1901 if (!is_kcfg_value_in_range(ext, value)) { 1902 pr_warn("extern (kcfg) '%s': value '%llu' doesn't fit in %d bytes\n", 1903 ext->name, (unsigned long long)value, ext->kcfg.sz); 1904 return -ERANGE; 1905 } 1906 switch (ext->kcfg.sz) { 1907 case 1: 1908 *(__u8 *)ext_val = value; 1909 break; 1910 case 2: 1911 *(__u16 *)ext_val = value; 1912 break; 1913 case 4: 1914 *(__u32 *)ext_val = value; 1915 break; 1916 case 8: 1917 *(__u64 *)ext_val = value; 1918 break; 1919 default: 1920 return -EINVAL; 1921 } 1922 ext->is_set = true; 1923 return 0; 1924 } 1925 1926 static int bpf_object__process_kconfig_line(struct bpf_object *obj, 1927 char *buf, void *data) 1928 { 1929 struct extern_desc *ext; 1930 char *sep, *value; 1931 int len, err = 0; 1932 void *ext_val; 1933 __u64 num; 1934 1935 if (!str_has_pfx(buf, "CONFIG_")) 1936 return 0; 1937 1938 sep = strchr(buf, '='); 1939 if (!sep) { 1940 pr_warn("failed to parse '%s': no separator\n", buf); 1941 return -EINVAL; 1942 } 1943 1944 /* Trim ending '\n' */ 1945 len = strlen(buf); 1946 if (buf[len - 1] == '\n') 1947 buf[len - 1] = '\0'; 1948 /* Split on '=' and ensure that a value is present. */ 1949 *sep = '\0'; 1950 if (!sep[1]) { 1951 *sep = '='; 1952 pr_warn("failed to parse '%s': no value\n", buf); 1953 return -EINVAL; 1954 } 1955 1956 ext = find_extern_by_name(obj, buf); 1957 if (!ext || ext->is_set) 1958 return 0; 1959 1960 ext_val = data + ext->kcfg.data_off; 1961 value = sep + 1; 1962 1963 switch (*value) { 1964 case 'y': case 'n': case 'm': 1965 err = set_kcfg_value_tri(ext, ext_val, *value); 1966 break; 1967 case '"': 1968 err = set_kcfg_value_str(ext, ext_val, value); 1969 break; 1970 default: 1971 /* assume integer */ 1972 err = parse_u64(value, &num); 1973 if (err) { 1974 pr_warn("extern (kcfg) '%s': value '%s' isn't a valid integer\n", ext->name, value); 1975 return err; 1976 } 1977 if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR) { 1978 pr_warn("extern (kcfg) '%s': value '%s' implies integer type\n", ext->name, value); 1979 return -EINVAL; 1980 } 1981 err = set_kcfg_value_num(ext, ext_val, num); 1982 break; 1983 } 1984 if (err) 1985 return err; 1986 pr_debug("extern (kcfg) '%s': set to %s\n", ext->name, value); 1987 return 0; 1988 } 1989 1990 static int bpf_object__read_kconfig_file(struct bpf_object *obj, void *data) 1991 { 1992 char buf[PATH_MAX]; 1993 struct utsname uts; 1994 int len, err = 0; 1995 gzFile file; 1996 1997 uname(&uts); 1998 len = snprintf(buf, PATH_MAX, "/boot/config-%s", uts.release); 1999 if (len < 0) 2000 return -EINVAL; 2001 else if (len >= PATH_MAX) 2002 return -ENAMETOOLONG; 2003 2004 /* gzopen also accepts uncompressed files. */ 2005 file = gzopen(buf, "re"); 2006 if (!file) 2007 file = gzopen("/proc/config.gz", "re"); 2008 2009 if (!file) { 2010 pr_warn("failed to open system Kconfig\n"); 2011 return -ENOENT; 2012 } 2013 2014 while (gzgets(file, buf, sizeof(buf))) { 2015 err = bpf_object__process_kconfig_line(obj, buf, data); 2016 if (err) { 2017 pr_warn("error parsing system Kconfig line '%s': %d\n", 2018 buf, err); 2019 goto out; 2020 } 2021 } 2022 2023 out: 2024 gzclose(file); 2025 return err; 2026 } 2027 2028 static int bpf_object__read_kconfig_mem(struct bpf_object *obj, 2029 const char *config, void *data) 2030 { 2031 char buf[PATH_MAX]; 2032 int err = 0; 2033 FILE *file; 2034 2035 file = fmemopen((void *)config, strlen(config), "r"); 2036 if (!file) { 2037 err = -errno; 2038 pr_warn("failed to open in-memory Kconfig: %d\n", err); 2039 return err; 2040 } 2041 2042 while (fgets(buf, sizeof(buf), file)) { 2043 err = bpf_object__process_kconfig_line(obj, buf, data); 2044 if (err) { 2045 pr_warn("error parsing in-memory Kconfig line '%s': %d\n", 2046 buf, err); 2047 break; 2048 } 2049 } 2050 2051 fclose(file); 2052 return err; 2053 } 2054 2055 static int bpf_object__init_kconfig_map(struct bpf_object *obj) 2056 { 2057 struct extern_desc *last_ext = NULL, *ext; 2058 size_t map_sz; 2059 int i, err; 2060 2061 for (i = 0; i < obj->nr_extern; i++) { 2062 ext = &obj->externs[i]; 2063 if (ext->type == EXT_KCFG) 2064 last_ext = ext; 2065 } 2066 2067 if (!last_ext) 2068 return 0; 2069 2070 map_sz = last_ext->kcfg.data_off + last_ext->kcfg.sz; 2071 err = bpf_object__init_internal_map(obj, LIBBPF_MAP_KCONFIG, 2072 ".kconfig", obj->efile.symbols_shndx, 2073 NULL, map_sz); 2074 if (err) 2075 return err; 2076 2077 obj->kconfig_map_idx = obj->nr_maps - 1; 2078 2079 return 0; 2080 } 2081 2082 const struct btf_type * 2083 skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id) 2084 { 2085 const struct btf_type *t = btf__type_by_id(btf, id); 2086 2087 if (res_id) 2088 *res_id = id; 2089 2090 while (btf_is_mod(t) || btf_is_typedef(t)) { 2091 if (res_id) 2092 *res_id = t->type; 2093 t = btf__type_by_id(btf, t->type); 2094 } 2095 2096 return t; 2097 } 2098 2099 static const struct btf_type * 2100 resolve_func_ptr(const struct btf *btf, __u32 id, __u32 *res_id) 2101 { 2102 const struct btf_type *t; 2103 2104 t = skip_mods_and_typedefs(btf, id, NULL); 2105 if (!btf_is_ptr(t)) 2106 return NULL; 2107 2108 t = skip_mods_and_typedefs(btf, t->type, res_id); 2109 2110 return btf_is_func_proto(t) ? t : NULL; 2111 } 2112 2113 static const char *__btf_kind_str(__u16 kind) 2114 { 2115 switch (kind) { 2116 case BTF_KIND_UNKN: return "void"; 2117 case BTF_KIND_INT: return "int"; 2118 case BTF_KIND_PTR: return "ptr"; 2119 case BTF_KIND_ARRAY: return "array"; 2120 case BTF_KIND_STRUCT: return "struct"; 2121 case BTF_KIND_UNION: return "union"; 2122 case BTF_KIND_ENUM: return "enum"; 2123 case BTF_KIND_FWD: return "fwd"; 2124 case BTF_KIND_TYPEDEF: return "typedef"; 2125 case BTF_KIND_VOLATILE: return "volatile"; 2126 case BTF_KIND_CONST: return "const"; 2127 case BTF_KIND_RESTRICT: return "restrict"; 2128 case BTF_KIND_FUNC: return "func"; 2129 case BTF_KIND_FUNC_PROTO: return "func_proto"; 2130 case BTF_KIND_VAR: return "var"; 2131 case BTF_KIND_DATASEC: return "datasec"; 2132 case BTF_KIND_FLOAT: return "float"; 2133 case BTF_KIND_DECL_TAG: return "decl_tag"; 2134 case BTF_KIND_TYPE_TAG: return "type_tag"; 2135 case BTF_KIND_ENUM64: return "enum64"; 2136 default: return "unknown"; 2137 } 2138 } 2139 2140 const char *btf_kind_str(const struct btf_type *t) 2141 { 2142 return __btf_kind_str(btf_kind(t)); 2143 } 2144 2145 /* 2146 * Fetch integer attribute of BTF map definition. Such attributes are 2147 * represented using a pointer to an array, in which dimensionality of array 2148 * encodes specified integer value. E.g., int (*type)[BPF_MAP_TYPE_ARRAY]; 2149 * encodes `type => BPF_MAP_TYPE_ARRAY` key/value pair completely using BTF 2150 * type definition, while using only sizeof(void *) space in ELF data section. 2151 */ 2152 static bool get_map_field_int(const char *map_name, const struct btf *btf, 2153 const struct btf_member *m, __u32 *res) 2154 { 2155 const struct btf_type *t = skip_mods_and_typedefs(btf, m->type, NULL); 2156 const char *name = btf__name_by_offset(btf, m->name_off); 2157 const struct btf_array *arr_info; 2158 const struct btf_type *arr_t; 2159 2160 if (!btf_is_ptr(t)) { 2161 pr_warn("map '%s': attr '%s': expected PTR, got %s.\n", 2162 map_name, name, btf_kind_str(t)); 2163 return false; 2164 } 2165 2166 arr_t = btf__type_by_id(btf, t->type); 2167 if (!arr_t) { 2168 pr_warn("map '%s': attr '%s': type [%u] not found.\n", 2169 map_name, name, t->type); 2170 return false; 2171 } 2172 if (!btf_is_array(arr_t)) { 2173 pr_warn("map '%s': attr '%s': expected ARRAY, got %s.\n", 2174 map_name, name, btf_kind_str(arr_t)); 2175 return false; 2176 } 2177 arr_info = btf_array(arr_t); 2178 *res = arr_info->nelems; 2179 return true; 2180 } 2181 2182 static int pathname_concat(char *buf, size_t buf_sz, const char *path, const char *name) 2183 { 2184 int len; 2185 2186 len = snprintf(buf, buf_sz, "%s/%s", path, name); 2187 if (len < 0) 2188 return -EINVAL; 2189 if (len >= buf_sz) 2190 return -ENAMETOOLONG; 2191 2192 return 0; 2193 } 2194 2195 static int build_map_pin_path(struct bpf_map *map, const char *path) 2196 { 2197 char buf[PATH_MAX]; 2198 int err; 2199 2200 if (!path) 2201 path = BPF_FS_DEFAULT_PATH; 2202 2203 err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map)); 2204 if (err) 2205 return err; 2206 2207 return bpf_map__set_pin_path(map, buf); 2208 } 2209 2210 /* should match definition in bpf_helpers.h */ 2211 enum libbpf_pin_type { 2212 LIBBPF_PIN_NONE, 2213 /* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */ 2214 LIBBPF_PIN_BY_NAME, 2215 }; 2216 2217 int parse_btf_map_def(const char *map_name, struct btf *btf, 2218 const struct btf_type *def_t, bool strict, 2219 struct btf_map_def *map_def, struct btf_map_def *inner_def) 2220 { 2221 const struct btf_type *t; 2222 const struct btf_member *m; 2223 bool is_inner = inner_def == NULL; 2224 int vlen, i; 2225 2226 vlen = btf_vlen(def_t); 2227 m = btf_members(def_t); 2228 for (i = 0; i < vlen; i++, m++) { 2229 const char *name = btf__name_by_offset(btf, m->name_off); 2230 2231 if (!name) { 2232 pr_warn("map '%s': invalid field #%d.\n", map_name, i); 2233 return -EINVAL; 2234 } 2235 if (strcmp(name, "type") == 0) { 2236 if (!get_map_field_int(map_name, btf, m, &map_def->map_type)) 2237 return -EINVAL; 2238 map_def->parts |= MAP_DEF_MAP_TYPE; 2239 } else if (strcmp(name, "max_entries") == 0) { 2240 if (!get_map_field_int(map_name, btf, m, &map_def->max_entries)) 2241 return -EINVAL; 2242 map_def->parts |= MAP_DEF_MAX_ENTRIES; 2243 } else if (strcmp(name, "map_flags") == 0) { 2244 if (!get_map_field_int(map_name, btf, m, &map_def->map_flags)) 2245 return -EINVAL; 2246 map_def->parts |= MAP_DEF_MAP_FLAGS; 2247 } else if (strcmp(name, "numa_node") == 0) { 2248 if (!get_map_field_int(map_name, btf, m, &map_def->numa_node)) 2249 return -EINVAL; 2250 map_def->parts |= MAP_DEF_NUMA_NODE; 2251 } else if (strcmp(name, "key_size") == 0) { 2252 __u32 sz; 2253 2254 if (!get_map_field_int(map_name, btf, m, &sz)) 2255 return -EINVAL; 2256 if (map_def->key_size && map_def->key_size != sz) { 2257 pr_warn("map '%s': conflicting key size %u != %u.\n", 2258 map_name, map_def->key_size, sz); 2259 return -EINVAL; 2260 } 2261 map_def->key_size = sz; 2262 map_def->parts |= MAP_DEF_KEY_SIZE; 2263 } else if (strcmp(name, "key") == 0) { 2264 __s64 sz; 2265 2266 t = btf__type_by_id(btf, m->type); 2267 if (!t) { 2268 pr_warn("map '%s': key type [%d] not found.\n", 2269 map_name, m->type); 2270 return -EINVAL; 2271 } 2272 if (!btf_is_ptr(t)) { 2273 pr_warn("map '%s': key spec is not PTR: %s.\n", 2274 map_name, btf_kind_str(t)); 2275 return -EINVAL; 2276 } 2277 sz = btf__resolve_size(btf, t->type); 2278 if (sz < 0) { 2279 pr_warn("map '%s': can't determine key size for type [%u]: %zd.\n", 2280 map_name, t->type, (ssize_t)sz); 2281 return sz; 2282 } 2283 if (map_def->key_size && map_def->key_size != sz) { 2284 pr_warn("map '%s': conflicting key size %u != %zd.\n", 2285 map_name, map_def->key_size, (ssize_t)sz); 2286 return -EINVAL; 2287 } 2288 map_def->key_size = sz; 2289 map_def->key_type_id = t->type; 2290 map_def->parts |= MAP_DEF_KEY_SIZE | MAP_DEF_KEY_TYPE; 2291 } else if (strcmp(name, "value_size") == 0) { 2292 __u32 sz; 2293 2294 if (!get_map_field_int(map_name, btf, m, &sz)) 2295 return -EINVAL; 2296 if (map_def->value_size && map_def->value_size != sz) { 2297 pr_warn("map '%s': conflicting value size %u != %u.\n", 2298 map_name, map_def->value_size, sz); 2299 return -EINVAL; 2300 } 2301 map_def->value_size = sz; 2302 map_def->parts |= MAP_DEF_VALUE_SIZE; 2303 } else if (strcmp(name, "value") == 0) { 2304 __s64 sz; 2305 2306 t = btf__type_by_id(btf, m->type); 2307 if (!t) { 2308 pr_warn("map '%s': value type [%d] not found.\n", 2309 map_name, m->type); 2310 return -EINVAL; 2311 } 2312 if (!btf_is_ptr(t)) { 2313 pr_warn("map '%s': value spec is not PTR: %s.\n", 2314 map_name, btf_kind_str(t)); 2315 return -EINVAL; 2316 } 2317 sz = btf__resolve_size(btf, t->type); 2318 if (sz < 0) { 2319 pr_warn("map '%s': can't determine value size for type [%u]: %zd.\n", 2320 map_name, t->type, (ssize_t)sz); 2321 return sz; 2322 } 2323 if (map_def->value_size && map_def->value_size != sz) { 2324 pr_warn("map '%s': conflicting value size %u != %zd.\n", 2325 map_name, map_def->value_size, (ssize_t)sz); 2326 return -EINVAL; 2327 } 2328 map_def->value_size = sz; 2329 map_def->value_type_id = t->type; 2330 map_def->parts |= MAP_DEF_VALUE_SIZE | MAP_DEF_VALUE_TYPE; 2331 } 2332 else if (strcmp(name, "values") == 0) { 2333 bool is_map_in_map = bpf_map_type__is_map_in_map(map_def->map_type); 2334 bool is_prog_array = map_def->map_type == BPF_MAP_TYPE_PROG_ARRAY; 2335 const char *desc = is_map_in_map ? "map-in-map inner" : "prog-array value"; 2336 char inner_map_name[128]; 2337 int err; 2338 2339 if (is_inner) { 2340 pr_warn("map '%s': multi-level inner maps not supported.\n", 2341 map_name); 2342 return -ENOTSUP; 2343 } 2344 if (i != vlen - 1) { 2345 pr_warn("map '%s': '%s' member should be last.\n", 2346 map_name, name); 2347 return -EINVAL; 2348 } 2349 if (!is_map_in_map && !is_prog_array) { 2350 pr_warn("map '%s': should be map-in-map or prog-array.\n", 2351 map_name); 2352 return -ENOTSUP; 2353 } 2354 if (map_def->value_size && map_def->value_size != 4) { 2355 pr_warn("map '%s': conflicting value size %u != 4.\n", 2356 map_name, map_def->value_size); 2357 return -EINVAL; 2358 } 2359 map_def->value_size = 4; 2360 t = btf__type_by_id(btf, m->type); 2361 if (!t) { 2362 pr_warn("map '%s': %s type [%d] not found.\n", 2363 map_name, desc, m->type); 2364 return -EINVAL; 2365 } 2366 if (!btf_is_array(t) || btf_array(t)->nelems) { 2367 pr_warn("map '%s': %s spec is not a zero-sized array.\n", 2368 map_name, desc); 2369 return -EINVAL; 2370 } 2371 t = skip_mods_and_typedefs(btf, btf_array(t)->type, NULL); 2372 if (!btf_is_ptr(t)) { 2373 pr_warn("map '%s': %s def is of unexpected kind %s.\n", 2374 map_name, desc, btf_kind_str(t)); 2375 return -EINVAL; 2376 } 2377 t = skip_mods_and_typedefs(btf, t->type, NULL); 2378 if (is_prog_array) { 2379 if (!btf_is_func_proto(t)) { 2380 pr_warn("map '%s': prog-array value def is of unexpected kind %s.\n", 2381 map_name, btf_kind_str(t)); 2382 return -EINVAL; 2383 } 2384 continue; 2385 } 2386 if (!btf_is_struct(t)) { 2387 pr_warn("map '%s': map-in-map inner def is of unexpected kind %s.\n", 2388 map_name, btf_kind_str(t)); 2389 return -EINVAL; 2390 } 2391 2392 snprintf(inner_map_name, sizeof(inner_map_name), "%s.inner", map_name); 2393 err = parse_btf_map_def(inner_map_name, btf, t, strict, inner_def, NULL); 2394 if (err) 2395 return err; 2396 2397 map_def->parts |= MAP_DEF_INNER_MAP; 2398 } else if (strcmp(name, "pinning") == 0) { 2399 __u32 val; 2400 2401 if (is_inner) { 2402 pr_warn("map '%s': inner def can't be pinned.\n", map_name); 2403 return -EINVAL; 2404 } 2405 if (!get_map_field_int(map_name, btf, m, &val)) 2406 return -EINVAL; 2407 if (val != LIBBPF_PIN_NONE && val != LIBBPF_PIN_BY_NAME) { 2408 pr_warn("map '%s': invalid pinning value %u.\n", 2409 map_name, val); 2410 return -EINVAL; 2411 } 2412 map_def->pinning = val; 2413 map_def->parts |= MAP_DEF_PINNING; 2414 } else if (strcmp(name, "map_extra") == 0) { 2415 __u32 map_extra; 2416 2417 if (!get_map_field_int(map_name, btf, m, &map_extra)) 2418 return -EINVAL; 2419 map_def->map_extra = map_extra; 2420 map_def->parts |= MAP_DEF_MAP_EXTRA; 2421 } else { 2422 if (strict) { 2423 pr_warn("map '%s': unknown field '%s'.\n", map_name, name); 2424 return -ENOTSUP; 2425 } 2426 pr_debug("map '%s': ignoring unknown field '%s'.\n", map_name, name); 2427 } 2428 } 2429 2430 if (map_def->map_type == BPF_MAP_TYPE_UNSPEC) { 2431 pr_warn("map '%s': map type isn't specified.\n", map_name); 2432 return -EINVAL; 2433 } 2434 2435 return 0; 2436 } 2437 2438 static size_t adjust_ringbuf_sz(size_t sz) 2439 { 2440 __u32 page_sz = sysconf(_SC_PAGE_SIZE); 2441 __u32 mul; 2442 2443 /* if user forgot to set any size, make sure they see error */ 2444 if (sz == 0) 2445 return 0; 2446 /* Kernel expects BPF_MAP_TYPE_RINGBUF's max_entries to be 2447 * a power-of-2 multiple of kernel's page size. If user diligently 2448 * satisified these conditions, pass the size through. 2449 */ 2450 if ((sz % page_sz) == 0 && is_pow_of_2(sz / page_sz)) 2451 return sz; 2452 2453 /* Otherwise find closest (page_sz * power_of_2) product bigger than 2454 * user-set size to satisfy both user size request and kernel 2455 * requirements and substitute correct max_entries for map creation. 2456 */ 2457 for (mul = 1; mul <= UINT_MAX / page_sz; mul <<= 1) { 2458 if (mul * page_sz > sz) 2459 return mul * page_sz; 2460 } 2461 2462 /* if it's impossible to satisfy the conditions (i.e., user size is 2463 * very close to UINT_MAX but is not a power-of-2 multiple of 2464 * page_size) then just return original size and let kernel reject it 2465 */ 2466 return sz; 2467 } 2468 2469 static bool map_is_ringbuf(const struct bpf_map *map) 2470 { 2471 return map->def.type == BPF_MAP_TYPE_RINGBUF || 2472 map->def.type == BPF_MAP_TYPE_USER_RINGBUF; 2473 } 2474 2475 static void fill_map_from_def(struct bpf_map *map, const struct btf_map_def *def) 2476 { 2477 map->def.type = def->map_type; 2478 map->def.key_size = def->key_size; 2479 map->def.value_size = def->value_size; 2480 map->def.max_entries = def->max_entries; 2481 map->def.map_flags = def->map_flags; 2482 map->map_extra = def->map_extra; 2483 2484 map->numa_node = def->numa_node; 2485 map->btf_key_type_id = def->key_type_id; 2486 map->btf_value_type_id = def->value_type_id; 2487 2488 /* auto-adjust BPF ringbuf map max_entries to be a multiple of page size */ 2489 if (map_is_ringbuf(map)) 2490 map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries); 2491 2492 if (def->parts & MAP_DEF_MAP_TYPE) 2493 pr_debug("map '%s': found type = %u.\n", map->name, def->map_type); 2494 2495 if (def->parts & MAP_DEF_KEY_TYPE) 2496 pr_debug("map '%s': found key [%u], sz = %u.\n", 2497 map->name, def->key_type_id, def->key_size); 2498 else if (def->parts & MAP_DEF_KEY_SIZE) 2499 pr_debug("map '%s': found key_size = %u.\n", map->name, def->key_size); 2500 2501 if (def->parts & MAP_DEF_VALUE_TYPE) 2502 pr_debug("map '%s': found value [%u], sz = %u.\n", 2503 map->name, def->value_type_id, def->value_size); 2504 else if (def->parts & MAP_DEF_VALUE_SIZE) 2505 pr_debug("map '%s': found value_size = %u.\n", map->name, def->value_size); 2506 2507 if (def->parts & MAP_DEF_MAX_ENTRIES) 2508 pr_debug("map '%s': found max_entries = %u.\n", map->name, def->max_entries); 2509 if (def->parts & MAP_DEF_MAP_FLAGS) 2510 pr_debug("map '%s': found map_flags = 0x%x.\n", map->name, def->map_flags); 2511 if (def->parts & MAP_DEF_MAP_EXTRA) 2512 pr_debug("map '%s': found map_extra = 0x%llx.\n", map->name, 2513 (unsigned long long)def->map_extra); 2514 if (def->parts & MAP_DEF_PINNING) 2515 pr_debug("map '%s': found pinning = %u.\n", map->name, def->pinning); 2516 if (def->parts & MAP_DEF_NUMA_NODE) 2517 pr_debug("map '%s': found numa_node = %u.\n", map->name, def->numa_node); 2518 2519 if (def->parts & MAP_DEF_INNER_MAP) 2520 pr_debug("map '%s': found inner map definition.\n", map->name); 2521 } 2522 2523 static const char *btf_var_linkage_str(__u32 linkage) 2524 { 2525 switch (linkage) { 2526 case BTF_VAR_STATIC: return "static"; 2527 case BTF_VAR_GLOBAL_ALLOCATED: return "global"; 2528 case BTF_VAR_GLOBAL_EXTERN: return "extern"; 2529 default: return "unknown"; 2530 } 2531 } 2532 2533 static int bpf_object__init_user_btf_map(struct bpf_object *obj, 2534 const struct btf_type *sec, 2535 int var_idx, int sec_idx, 2536 const Elf_Data *data, bool strict, 2537 const char *pin_root_path) 2538 { 2539 struct btf_map_def map_def = {}, inner_def = {}; 2540 const struct btf_type *var, *def; 2541 const struct btf_var_secinfo *vi; 2542 const struct btf_var *var_extra; 2543 const char *map_name; 2544 struct bpf_map *map; 2545 int err; 2546 2547 vi = btf_var_secinfos(sec) + var_idx; 2548 var = btf__type_by_id(obj->btf, vi->type); 2549 var_extra = btf_var(var); 2550 map_name = btf__name_by_offset(obj->btf, var->name_off); 2551 2552 if (map_name == NULL || map_name[0] == '\0') { 2553 pr_warn("map #%d: empty name.\n", var_idx); 2554 return -EINVAL; 2555 } 2556 if ((__u64)vi->offset + vi->size > data->d_size) { 2557 pr_warn("map '%s' BTF data is corrupted.\n", map_name); 2558 return -EINVAL; 2559 } 2560 if (!btf_is_var(var)) { 2561 pr_warn("map '%s': unexpected var kind %s.\n", 2562 map_name, btf_kind_str(var)); 2563 return -EINVAL; 2564 } 2565 if (var_extra->linkage != BTF_VAR_GLOBAL_ALLOCATED) { 2566 pr_warn("map '%s': unsupported map linkage %s.\n", 2567 map_name, btf_var_linkage_str(var_extra->linkage)); 2568 return -EOPNOTSUPP; 2569 } 2570 2571 def = skip_mods_and_typedefs(obj->btf, var->type, NULL); 2572 if (!btf_is_struct(def)) { 2573 pr_warn("map '%s': unexpected def kind %s.\n", 2574 map_name, btf_kind_str(var)); 2575 return -EINVAL; 2576 } 2577 if (def->size > vi->size) { 2578 pr_warn("map '%s': invalid def size.\n", map_name); 2579 return -EINVAL; 2580 } 2581 2582 map = bpf_object__add_map(obj); 2583 if (IS_ERR(map)) 2584 return PTR_ERR(map); 2585 map->name = strdup(map_name); 2586 if (!map->name) { 2587 pr_warn("map '%s': failed to alloc map name.\n", map_name); 2588 return -ENOMEM; 2589 } 2590 map->libbpf_type = LIBBPF_MAP_UNSPEC; 2591 map->def.type = BPF_MAP_TYPE_UNSPEC; 2592 map->sec_idx = sec_idx; 2593 map->sec_offset = vi->offset; 2594 map->btf_var_idx = var_idx; 2595 pr_debug("map '%s': at sec_idx %d, offset %zu.\n", 2596 map_name, map->sec_idx, map->sec_offset); 2597 2598 err = parse_btf_map_def(map->name, obj->btf, def, strict, &map_def, &inner_def); 2599 if (err) 2600 return err; 2601 2602 fill_map_from_def(map, &map_def); 2603 2604 if (map_def.pinning == LIBBPF_PIN_BY_NAME) { 2605 err = build_map_pin_path(map, pin_root_path); 2606 if (err) { 2607 pr_warn("map '%s': couldn't build pin path.\n", map->name); 2608 return err; 2609 } 2610 } 2611 2612 if (map_def.parts & MAP_DEF_INNER_MAP) { 2613 map->inner_map = calloc(1, sizeof(*map->inner_map)); 2614 if (!map->inner_map) 2615 return -ENOMEM; 2616 map->inner_map->fd = -1; 2617 map->inner_map->sec_idx = sec_idx; 2618 map->inner_map->name = malloc(strlen(map_name) + sizeof(".inner") + 1); 2619 if (!map->inner_map->name) 2620 return -ENOMEM; 2621 sprintf(map->inner_map->name, "%s.inner", map_name); 2622 2623 fill_map_from_def(map->inner_map, &inner_def); 2624 } 2625 2626 err = map_fill_btf_type_info(obj, map); 2627 if (err) 2628 return err; 2629 2630 return 0; 2631 } 2632 2633 static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict, 2634 const char *pin_root_path) 2635 { 2636 const struct btf_type *sec = NULL; 2637 int nr_types, i, vlen, err; 2638 const struct btf_type *t; 2639 const char *name; 2640 Elf_Data *data; 2641 Elf_Scn *scn; 2642 2643 if (obj->efile.btf_maps_shndx < 0) 2644 return 0; 2645 2646 scn = elf_sec_by_idx(obj, obj->efile.btf_maps_shndx); 2647 data = elf_sec_data(obj, scn); 2648 if (!scn || !data) { 2649 pr_warn("elf: failed to get %s map definitions for %s\n", 2650 MAPS_ELF_SEC, obj->path); 2651 return -EINVAL; 2652 } 2653 2654 nr_types = btf__type_cnt(obj->btf); 2655 for (i = 1; i < nr_types; i++) { 2656 t = btf__type_by_id(obj->btf, i); 2657 if (!btf_is_datasec(t)) 2658 continue; 2659 name = btf__name_by_offset(obj->btf, t->name_off); 2660 if (strcmp(name, MAPS_ELF_SEC) == 0) { 2661 sec = t; 2662 obj->efile.btf_maps_sec_btf_id = i; 2663 break; 2664 } 2665 } 2666 2667 if (!sec) { 2668 pr_warn("DATASEC '%s' not found.\n", MAPS_ELF_SEC); 2669 return -ENOENT; 2670 } 2671 2672 vlen = btf_vlen(sec); 2673 for (i = 0; i < vlen; i++) { 2674 err = bpf_object__init_user_btf_map(obj, sec, i, 2675 obj->efile.btf_maps_shndx, 2676 data, strict, 2677 pin_root_path); 2678 if (err) 2679 return err; 2680 } 2681 2682 return 0; 2683 } 2684 2685 static int bpf_object__init_maps(struct bpf_object *obj, 2686 const struct bpf_object_open_opts *opts) 2687 { 2688 const char *pin_root_path; 2689 bool strict; 2690 int err = 0; 2691 2692 strict = !OPTS_GET(opts, relaxed_maps, false); 2693 pin_root_path = OPTS_GET(opts, pin_root_path, NULL); 2694 2695 err = bpf_object__init_user_btf_maps(obj, strict, pin_root_path); 2696 err = err ?: bpf_object__init_global_data_maps(obj); 2697 err = err ?: bpf_object__init_kconfig_map(obj); 2698 err = err ?: bpf_object_init_struct_ops(obj); 2699 2700 return err; 2701 } 2702 2703 static bool section_have_execinstr(struct bpf_object *obj, int idx) 2704 { 2705 Elf64_Shdr *sh; 2706 2707 sh = elf_sec_hdr(obj, elf_sec_by_idx(obj, idx)); 2708 if (!sh) 2709 return false; 2710 2711 return sh->sh_flags & SHF_EXECINSTR; 2712 } 2713 2714 static bool btf_needs_sanitization(struct bpf_object *obj) 2715 { 2716 bool has_func_global = kernel_supports(obj, FEAT_BTF_GLOBAL_FUNC); 2717 bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC); 2718 bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT); 2719 bool has_func = kernel_supports(obj, FEAT_BTF_FUNC); 2720 bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG); 2721 bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG); 2722 bool has_enum64 = kernel_supports(obj, FEAT_BTF_ENUM64); 2723 2724 return !has_func || !has_datasec || !has_func_global || !has_float || 2725 !has_decl_tag || !has_type_tag || !has_enum64; 2726 } 2727 2728 static int bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf) 2729 { 2730 bool has_func_global = kernel_supports(obj, FEAT_BTF_GLOBAL_FUNC); 2731 bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC); 2732 bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT); 2733 bool has_func = kernel_supports(obj, FEAT_BTF_FUNC); 2734 bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG); 2735 bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG); 2736 bool has_enum64 = kernel_supports(obj, FEAT_BTF_ENUM64); 2737 int enum64_placeholder_id = 0; 2738 struct btf_type *t; 2739 int i, j, vlen; 2740 2741 for (i = 1; i < btf__type_cnt(btf); i++) { 2742 t = (struct btf_type *)btf__type_by_id(btf, i); 2743 2744 if ((!has_datasec && btf_is_var(t)) || (!has_decl_tag && btf_is_decl_tag(t))) { 2745 /* replace VAR/DECL_TAG with INT */ 2746 t->info = BTF_INFO_ENC(BTF_KIND_INT, 0, 0); 2747 /* 2748 * using size = 1 is the safest choice, 4 will be too 2749 * big and cause kernel BTF validation failure if 2750 * original variable took less than 4 bytes 2751 */ 2752 t->size = 1; 2753 *(int *)(t + 1) = BTF_INT_ENC(0, 0, 8); 2754 } else if (!has_datasec && btf_is_datasec(t)) { 2755 /* replace DATASEC with STRUCT */ 2756 const struct btf_var_secinfo *v = btf_var_secinfos(t); 2757 struct btf_member *m = btf_members(t); 2758 struct btf_type *vt; 2759 char *name; 2760 2761 name = (char *)btf__name_by_offset(btf, t->name_off); 2762 while (*name) { 2763 if (*name == '.') 2764 *name = '_'; 2765 name++; 2766 } 2767 2768 vlen = btf_vlen(t); 2769 t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, vlen); 2770 for (j = 0; j < vlen; j++, v++, m++) { 2771 /* order of field assignments is important */ 2772 m->offset = v->offset * 8; 2773 m->type = v->type; 2774 /* preserve variable name as member name */ 2775 vt = (void *)btf__type_by_id(btf, v->type); 2776 m->name_off = vt->name_off; 2777 } 2778 } else if (!has_func && btf_is_func_proto(t)) { 2779 /* replace FUNC_PROTO with ENUM */ 2780 vlen = btf_vlen(t); 2781 t->info = BTF_INFO_ENC(BTF_KIND_ENUM, 0, vlen); 2782 t->size = sizeof(__u32); /* kernel enforced */ 2783 } else if (!has_func && btf_is_func(t)) { 2784 /* replace FUNC with TYPEDEF */ 2785 t->info = BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0); 2786 } else if (!has_func_global && btf_is_func(t)) { 2787 /* replace BTF_FUNC_GLOBAL with BTF_FUNC_STATIC */ 2788 t->info = BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0); 2789 } else if (!has_float && btf_is_float(t)) { 2790 /* replace FLOAT with an equally-sized empty STRUCT; 2791 * since C compilers do not accept e.g. "float" as a 2792 * valid struct name, make it anonymous 2793 */ 2794 t->name_off = 0; 2795 t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 0); 2796 } else if (!has_type_tag && btf_is_type_tag(t)) { 2797 /* replace TYPE_TAG with a CONST */ 2798 t->name_off = 0; 2799 t->info = BTF_INFO_ENC(BTF_KIND_CONST, 0, 0); 2800 } else if (!has_enum64 && btf_is_enum(t)) { 2801 /* clear the kflag */ 2802 t->info = btf_type_info(btf_kind(t), btf_vlen(t), false); 2803 } else if (!has_enum64 && btf_is_enum64(t)) { 2804 /* replace ENUM64 with a union */ 2805 struct btf_member *m; 2806 2807 if (enum64_placeholder_id == 0) { 2808 enum64_placeholder_id = btf__add_int(btf, "enum64_placeholder", 1, 0); 2809 if (enum64_placeholder_id < 0) 2810 return enum64_placeholder_id; 2811 2812 t = (struct btf_type *)btf__type_by_id(btf, i); 2813 } 2814 2815 m = btf_members(t); 2816 vlen = btf_vlen(t); 2817 t->info = BTF_INFO_ENC(BTF_KIND_UNION, 0, vlen); 2818 for (j = 0; j < vlen; j++, m++) { 2819 m->type = enum64_placeholder_id; 2820 m->offset = 0; 2821 } 2822 } 2823 } 2824 2825 return 0; 2826 } 2827 2828 static bool libbpf_needs_btf(const struct bpf_object *obj) 2829 { 2830 return obj->efile.btf_maps_shndx >= 0 || 2831 obj->efile.st_ops_shndx >= 0 || 2832 obj->efile.st_ops_link_shndx >= 0 || 2833 obj->nr_extern > 0; 2834 } 2835 2836 static bool kernel_needs_btf(const struct bpf_object *obj) 2837 { 2838 return obj->efile.st_ops_shndx >= 0 || obj->efile.st_ops_link_shndx >= 0; 2839 } 2840 2841 static int bpf_object__init_btf(struct bpf_object *obj, 2842 Elf_Data *btf_data, 2843 Elf_Data *btf_ext_data) 2844 { 2845 int err = -ENOENT; 2846 2847 if (btf_data) { 2848 obj->btf = btf__new(btf_data->d_buf, btf_data->d_size); 2849 err = libbpf_get_error(obj->btf); 2850 if (err) { 2851 obj->btf = NULL; 2852 pr_warn("Error loading ELF section %s: %d.\n", BTF_ELF_SEC, err); 2853 goto out; 2854 } 2855 /* enforce 8-byte pointers for BPF-targeted BTFs */ 2856 btf__set_pointer_size(obj->btf, 8); 2857 } 2858 if (btf_ext_data) { 2859 struct btf_ext_info *ext_segs[3]; 2860 int seg_num, sec_num; 2861 2862 if (!obj->btf) { 2863 pr_debug("Ignore ELF section %s because its depending ELF section %s is not found.\n", 2864 BTF_EXT_ELF_SEC, BTF_ELF_SEC); 2865 goto out; 2866 } 2867 obj->btf_ext = btf_ext__new(btf_ext_data->d_buf, btf_ext_data->d_size); 2868 err = libbpf_get_error(obj->btf_ext); 2869 if (err) { 2870 pr_warn("Error loading ELF section %s: %d. Ignored and continue.\n", 2871 BTF_EXT_ELF_SEC, err); 2872 obj->btf_ext = NULL; 2873 goto out; 2874 } 2875 2876 /* setup .BTF.ext to ELF section mapping */ 2877 ext_segs[0] = &obj->btf_ext->func_info; 2878 ext_segs[1] = &obj->btf_ext->line_info; 2879 ext_segs[2] = &obj->btf_ext->core_relo_info; 2880 for (seg_num = 0; seg_num < ARRAY_SIZE(ext_segs); seg_num++) { 2881 struct btf_ext_info *seg = ext_segs[seg_num]; 2882 const struct btf_ext_info_sec *sec; 2883 const char *sec_name; 2884 Elf_Scn *scn; 2885 2886 if (seg->sec_cnt == 0) 2887 continue; 2888 2889 seg->sec_idxs = calloc(seg->sec_cnt, sizeof(*seg->sec_idxs)); 2890 if (!seg->sec_idxs) { 2891 err = -ENOMEM; 2892 goto out; 2893 } 2894 2895 sec_num = 0; 2896 for_each_btf_ext_sec(seg, sec) { 2897 /* preventively increment index to avoid doing 2898 * this before every continue below 2899 */ 2900 sec_num++; 2901 2902 sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off); 2903 if (str_is_empty(sec_name)) 2904 continue; 2905 scn = elf_sec_by_name(obj, sec_name); 2906 if (!scn) 2907 continue; 2908 2909 seg->sec_idxs[sec_num - 1] = elf_ndxscn(scn); 2910 } 2911 } 2912 } 2913 out: 2914 if (err && libbpf_needs_btf(obj)) { 2915 pr_warn("BTF is required, but is missing or corrupted.\n"); 2916 return err; 2917 } 2918 return 0; 2919 } 2920 2921 static int compare_vsi_off(const void *_a, const void *_b) 2922 { 2923 const struct btf_var_secinfo *a = _a; 2924 const struct btf_var_secinfo *b = _b; 2925 2926 return a->offset - b->offset; 2927 } 2928 2929 static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf, 2930 struct btf_type *t) 2931 { 2932 __u32 size = 0, i, vars = btf_vlen(t); 2933 const char *sec_name = btf__name_by_offset(btf, t->name_off); 2934 struct btf_var_secinfo *vsi; 2935 bool fixup_offsets = false; 2936 int err; 2937 2938 if (!sec_name) { 2939 pr_debug("No name found in string section for DATASEC kind.\n"); 2940 return -ENOENT; 2941 } 2942 2943 /* Extern-backing datasecs (.ksyms, .kconfig) have their size and 2944 * variable offsets set at the previous step. Further, not every 2945 * extern BTF VAR has corresponding ELF symbol preserved, so we skip 2946 * all fixups altogether for such sections and go straight to sorting 2947 * VARs within their DATASEC. 2948 */ 2949 if (strcmp(sec_name, KCONFIG_SEC) == 0 || strcmp(sec_name, KSYMS_SEC) == 0) 2950 goto sort_vars; 2951 2952 /* Clang leaves DATASEC size and VAR offsets as zeroes, so we need to 2953 * fix this up. But BPF static linker already fixes this up and fills 2954 * all the sizes and offsets during static linking. So this step has 2955 * to be optional. But the STV_HIDDEN handling is non-optional for any 2956 * non-extern DATASEC, so the variable fixup loop below handles both 2957 * functions at the same time, paying the cost of BTF VAR <-> ELF 2958 * symbol matching just once. 2959 */ 2960 if (t->size == 0) { 2961 err = find_elf_sec_sz(obj, sec_name, &size); 2962 if (err || !size) { 2963 pr_debug("sec '%s': failed to determine size from ELF: size %u, err %d\n", 2964 sec_name, size, err); 2965 return -ENOENT; 2966 } 2967 2968 t->size = size; 2969 fixup_offsets = true; 2970 } 2971 2972 for (i = 0, vsi = btf_var_secinfos(t); i < vars; i++, vsi++) { 2973 const struct btf_type *t_var; 2974 struct btf_var *var; 2975 const char *var_name; 2976 Elf64_Sym *sym; 2977 2978 t_var = btf__type_by_id(btf, vsi->type); 2979 if (!t_var || !btf_is_var(t_var)) { 2980 pr_debug("sec '%s': unexpected non-VAR type found\n", sec_name); 2981 return -EINVAL; 2982 } 2983 2984 var = btf_var(t_var); 2985 if (var->linkage == BTF_VAR_STATIC || var->linkage == BTF_VAR_GLOBAL_EXTERN) 2986 continue; 2987 2988 var_name = btf__name_by_offset(btf, t_var->name_off); 2989 if (!var_name) { 2990 pr_debug("sec '%s': failed to find name of DATASEC's member #%d\n", 2991 sec_name, i); 2992 return -ENOENT; 2993 } 2994 2995 sym = find_elf_var_sym(obj, var_name); 2996 if (IS_ERR(sym)) { 2997 pr_debug("sec '%s': failed to find ELF symbol for VAR '%s'\n", 2998 sec_name, var_name); 2999 return -ENOENT; 3000 } 3001 3002 if (fixup_offsets) 3003 vsi->offset = sym->st_value; 3004 3005 /* if variable is a global/weak symbol, but has restricted 3006 * (STV_HIDDEN or STV_INTERNAL) visibility, mark its BTF VAR 3007 * as static. This follows similar logic for functions (BPF 3008 * subprogs) and influences libbpf's further decisions about 3009 * whether to make global data BPF array maps as 3010 * BPF_F_MMAPABLE. 3011 */ 3012 if (ELF64_ST_VISIBILITY(sym->st_other) == STV_HIDDEN 3013 || ELF64_ST_VISIBILITY(sym->st_other) == STV_INTERNAL) 3014 var->linkage = BTF_VAR_STATIC; 3015 } 3016 3017 sort_vars: 3018 qsort(btf_var_secinfos(t), vars, sizeof(*vsi), compare_vsi_off); 3019 return 0; 3020 } 3021 3022 static int bpf_object_fixup_btf(struct bpf_object *obj) 3023 { 3024 int i, n, err = 0; 3025 3026 if (!obj->btf) 3027 return 0; 3028 3029 n = btf__type_cnt(obj->btf); 3030 for (i = 1; i < n; i++) { 3031 struct btf_type *t = btf_type_by_id(obj->btf, i); 3032 3033 /* Loader needs to fix up some of the things compiler 3034 * couldn't get its hands on while emitting BTF. This 3035 * is section size and global variable offset. We use 3036 * the info from the ELF itself for this purpose. 3037 */ 3038 if (btf_is_datasec(t)) { 3039 err = btf_fixup_datasec(obj, obj->btf, t); 3040 if (err) 3041 return err; 3042 } 3043 } 3044 3045 return 0; 3046 } 3047 3048 static bool prog_needs_vmlinux_btf(struct bpf_program *prog) 3049 { 3050 if (prog->type == BPF_PROG_TYPE_STRUCT_OPS || 3051 prog->type == BPF_PROG_TYPE_LSM) 3052 return true; 3053 3054 /* BPF_PROG_TYPE_TRACING programs which do not attach to other programs 3055 * also need vmlinux BTF 3056 */ 3057 if (prog->type == BPF_PROG_TYPE_TRACING && !prog->attach_prog_fd) 3058 return true; 3059 3060 return false; 3061 } 3062 3063 static bool map_needs_vmlinux_btf(struct bpf_map *map) 3064 { 3065 return bpf_map__is_struct_ops(map); 3066 } 3067 3068 static bool obj_needs_vmlinux_btf(const struct bpf_object *obj) 3069 { 3070 struct bpf_program *prog; 3071 struct bpf_map *map; 3072 int i; 3073 3074 /* CO-RE relocations need kernel BTF, only when btf_custom_path 3075 * is not specified 3076 */ 3077 if (obj->btf_ext && obj->btf_ext->core_relo_info.len && !obj->btf_custom_path) 3078 return true; 3079 3080 /* Support for typed ksyms needs kernel BTF */ 3081 for (i = 0; i < obj->nr_extern; i++) { 3082 const struct extern_desc *ext; 3083 3084 ext = &obj->externs[i]; 3085 if (ext->type == EXT_KSYM && ext->ksym.type_id) 3086 return true; 3087 } 3088 3089 bpf_object__for_each_program(prog, obj) { 3090 if (!prog->autoload) 3091 continue; 3092 if (prog_needs_vmlinux_btf(prog)) 3093 return true; 3094 } 3095 3096 bpf_object__for_each_map(map, obj) { 3097 if (map_needs_vmlinux_btf(map)) 3098 return true; 3099 } 3100 3101 return false; 3102 } 3103 3104 static int bpf_object__load_vmlinux_btf(struct bpf_object *obj, bool force) 3105 { 3106 int err; 3107 3108 /* btf_vmlinux could be loaded earlier */ 3109 if (obj->btf_vmlinux || obj->gen_loader) 3110 return 0; 3111 3112 if (!force && !obj_needs_vmlinux_btf(obj)) 3113 return 0; 3114 3115 obj->btf_vmlinux = btf__load_vmlinux_btf(); 3116 err = libbpf_get_error(obj->btf_vmlinux); 3117 if (err) { 3118 pr_warn("Error loading vmlinux BTF: %d\n", err); 3119 obj->btf_vmlinux = NULL; 3120 return err; 3121 } 3122 return 0; 3123 } 3124 3125 static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj) 3126 { 3127 struct btf *kern_btf = obj->btf; 3128 bool btf_mandatory, sanitize; 3129 int i, err = 0; 3130 3131 if (!obj->btf) 3132 return 0; 3133 3134 if (!kernel_supports(obj, FEAT_BTF)) { 3135 if (kernel_needs_btf(obj)) { 3136 err = -EOPNOTSUPP; 3137 goto report; 3138 } 3139 pr_debug("Kernel doesn't support BTF, skipping uploading it.\n"); 3140 return 0; 3141 } 3142 3143 /* Even though some subprogs are global/weak, user might prefer more 3144 * permissive BPF verification process that BPF verifier performs for 3145 * static functions, taking into account more context from the caller 3146 * functions. In such case, they need to mark such subprogs with 3147 * __attribute__((visibility("hidden"))) and libbpf will adjust 3148 * corresponding FUNC BTF type to be marked as static and trigger more 3149 * involved BPF verification process. 3150 */ 3151 for (i = 0; i < obj->nr_programs; i++) { 3152 struct bpf_program *prog = &obj->programs[i]; 3153 struct btf_type *t; 3154 const char *name; 3155 int j, n; 3156 3157 if (!prog->mark_btf_static || !prog_is_subprog(obj, prog)) 3158 continue; 3159 3160 n = btf__type_cnt(obj->btf); 3161 for (j = 1; j < n; j++) { 3162 t = btf_type_by_id(obj->btf, j); 3163 if (!btf_is_func(t) || btf_func_linkage(t) != BTF_FUNC_GLOBAL) 3164 continue; 3165 3166 name = btf__str_by_offset(obj->btf, t->name_off); 3167 if (strcmp(name, prog->name) != 0) 3168 continue; 3169 3170 t->info = btf_type_info(BTF_KIND_FUNC, BTF_FUNC_STATIC, 0); 3171 break; 3172 } 3173 } 3174 3175 if (!kernel_supports(obj, FEAT_BTF_DECL_TAG)) 3176 goto skip_exception_cb; 3177 for (i = 0; i < obj->nr_programs; i++) { 3178 struct bpf_program *prog = &obj->programs[i]; 3179 int j, k, n; 3180 3181 if (prog_is_subprog(obj, prog)) 3182 continue; 3183 n = btf__type_cnt(obj->btf); 3184 for (j = 1; j < n; j++) { 3185 const char *str = "exception_callback:", *name; 3186 size_t len = strlen(str); 3187 struct btf_type *t; 3188 3189 t = btf_type_by_id(obj->btf, j); 3190 if (!btf_is_decl_tag(t) || btf_decl_tag(t)->component_idx != -1) 3191 continue; 3192 3193 name = btf__str_by_offset(obj->btf, t->name_off); 3194 if (strncmp(name, str, len)) 3195 continue; 3196 3197 t = btf_type_by_id(obj->btf, t->type); 3198 if (!btf_is_func(t) || btf_func_linkage(t) != BTF_FUNC_GLOBAL) { 3199 pr_warn("prog '%s': exception_callback:<value> decl tag not applied to the main program\n", 3200 prog->name); 3201 return -EINVAL; 3202 } 3203 if (strcmp(prog->name, btf__str_by_offset(obj->btf, t->name_off))) 3204 continue; 3205 /* Multiple callbacks are specified for the same prog, 3206 * the verifier will eventually return an error for this 3207 * case, hence simply skip appending a subprog. 3208 */ 3209 if (prog->exception_cb_idx >= 0) { 3210 prog->exception_cb_idx = -1; 3211 break; 3212 } 3213 3214 name += len; 3215 if (str_is_empty(name)) { 3216 pr_warn("prog '%s': exception_callback:<value> decl tag contains empty value\n", 3217 prog->name); 3218 return -EINVAL; 3219 } 3220 3221 for (k = 0; k < obj->nr_programs; k++) { 3222 struct bpf_program *subprog = &obj->programs[k]; 3223 3224 if (!prog_is_subprog(obj, subprog)) 3225 continue; 3226 if (strcmp(name, subprog->name)) 3227 continue; 3228 /* Enforce non-hidden, as from verifier point of 3229 * view it expects global functions, whereas the 3230 * mark_btf_static fixes up linkage as static. 3231 */ 3232 if (!subprog->sym_global || subprog->mark_btf_static) { 3233 pr_warn("prog '%s': exception callback %s must be a global non-hidden function\n", 3234 prog->name, subprog->name); 3235 return -EINVAL; 3236 } 3237 /* Let's see if we already saw a static exception callback with the same name */ 3238 if (prog->exception_cb_idx >= 0) { 3239 pr_warn("prog '%s': multiple subprogs with same name as exception callback '%s'\n", 3240 prog->name, subprog->name); 3241 return -EINVAL; 3242 } 3243 prog->exception_cb_idx = k; 3244 break; 3245 } 3246 3247 if (prog->exception_cb_idx >= 0) 3248 continue; 3249 pr_warn("prog '%s': cannot find exception callback '%s'\n", prog->name, name); 3250 return -ENOENT; 3251 } 3252 } 3253 skip_exception_cb: 3254 3255 sanitize = btf_needs_sanitization(obj); 3256 if (sanitize) { 3257 const void *raw_data; 3258 __u32 sz; 3259 3260 /* clone BTF to sanitize a copy and leave the original intact */ 3261 raw_data = btf__raw_data(obj->btf, &sz); 3262 kern_btf = btf__new(raw_data, sz); 3263 err = libbpf_get_error(kern_btf); 3264 if (err) 3265 return err; 3266 3267 /* enforce 8-byte pointers for BPF-targeted BTFs */ 3268 btf__set_pointer_size(obj->btf, 8); 3269 err = bpf_object__sanitize_btf(obj, kern_btf); 3270 if (err) 3271 return err; 3272 } 3273 3274 if (obj->gen_loader) { 3275 __u32 raw_size = 0; 3276 const void *raw_data = btf__raw_data(kern_btf, &raw_size); 3277 3278 if (!raw_data) 3279 return -ENOMEM; 3280 bpf_gen__load_btf(obj->gen_loader, raw_data, raw_size); 3281 /* Pretend to have valid FD to pass various fd >= 0 checks. 3282 * This fd == 0 will not be used with any syscall and will be reset to -1 eventually. 3283 */ 3284 btf__set_fd(kern_btf, 0); 3285 } else { 3286 /* currently BPF_BTF_LOAD only supports log_level 1 */ 3287 err = btf_load_into_kernel(kern_btf, obj->log_buf, obj->log_size, 3288 obj->log_level ? 1 : 0, obj->token_fd); 3289 } 3290 if (sanitize) { 3291 if (!err) { 3292 /* move fd to libbpf's BTF */ 3293 btf__set_fd(obj->btf, btf__fd(kern_btf)); 3294 btf__set_fd(kern_btf, -1); 3295 } 3296 btf__free(kern_btf); 3297 } 3298 report: 3299 if (err) { 3300 btf_mandatory = kernel_needs_btf(obj); 3301 pr_warn("Error loading .BTF into kernel: %d. %s\n", err, 3302 btf_mandatory ? "BTF is mandatory, can't proceed." 3303 : "BTF is optional, ignoring."); 3304 if (!btf_mandatory) 3305 err = 0; 3306 } 3307 return err; 3308 } 3309 3310 static const char *elf_sym_str(const struct bpf_object *obj, size_t off) 3311 { 3312 const char *name; 3313 3314 name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, off); 3315 if (!name) { 3316 pr_warn("elf: failed to get section name string at offset %zu from %s: %s\n", 3317 off, obj->path, elf_errmsg(-1)); 3318 return NULL; 3319 } 3320 3321 return name; 3322 } 3323 3324 static const char *elf_sec_str(const struct bpf_object *obj, size_t off) 3325 { 3326 const char *name; 3327 3328 name = elf_strptr(obj->efile.elf, obj->efile.shstrndx, off); 3329 if (!name) { 3330 pr_warn("elf: failed to get section name string at offset %zu from %s: %s\n", 3331 off, obj->path, elf_errmsg(-1)); 3332 return NULL; 3333 } 3334 3335 return name; 3336 } 3337 3338 static Elf_Scn *elf_sec_by_idx(const struct bpf_object *obj, size_t idx) 3339 { 3340 Elf_Scn *scn; 3341 3342 scn = elf_getscn(obj->efile.elf, idx); 3343 if (!scn) { 3344 pr_warn("elf: failed to get section(%zu) from %s: %s\n", 3345 idx, obj->path, elf_errmsg(-1)); 3346 return NULL; 3347 } 3348 return scn; 3349 } 3350 3351 static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name) 3352 { 3353 Elf_Scn *scn = NULL; 3354 Elf *elf = obj->efile.elf; 3355 const char *sec_name; 3356 3357 while ((scn = elf_nextscn(elf, scn)) != NULL) { 3358 sec_name = elf_sec_name(obj, scn); 3359 if (!sec_name) 3360 return NULL; 3361 3362 if (strcmp(sec_name, name) != 0) 3363 continue; 3364 3365 return scn; 3366 } 3367 return NULL; 3368 } 3369 3370 static Elf64_Shdr *elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn) 3371 { 3372 Elf64_Shdr *shdr; 3373 3374 if (!scn) 3375 return NULL; 3376 3377 shdr = elf64_getshdr(scn); 3378 if (!shdr) { 3379 pr_warn("elf: failed to get section(%zu) header from %s: %s\n", 3380 elf_ndxscn(scn), obj->path, elf_errmsg(-1)); 3381 return NULL; 3382 } 3383 3384 return shdr; 3385 } 3386 3387 static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn) 3388 { 3389 const char *name; 3390 Elf64_Shdr *sh; 3391 3392 if (!scn) 3393 return NULL; 3394 3395 sh = elf_sec_hdr(obj, scn); 3396 if (!sh) 3397 return NULL; 3398 3399 name = elf_sec_str(obj, sh->sh_name); 3400 if (!name) { 3401 pr_warn("elf: failed to get section(%zu) name from %s: %s\n", 3402 elf_ndxscn(scn), obj->path, elf_errmsg(-1)); 3403 return NULL; 3404 } 3405 3406 return name; 3407 } 3408 3409 static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn) 3410 { 3411 Elf_Data *data; 3412 3413 if (!scn) 3414 return NULL; 3415 3416 data = elf_getdata(scn, 0); 3417 if (!data) { 3418 pr_warn("elf: failed to get section(%zu) %s data from %s: %s\n", 3419 elf_ndxscn(scn), elf_sec_name(obj, scn) ?: "<?>", 3420 obj->path, elf_errmsg(-1)); 3421 return NULL; 3422 } 3423 3424 return data; 3425 } 3426 3427 static Elf64_Sym *elf_sym_by_idx(const struct bpf_object *obj, size_t idx) 3428 { 3429 if (idx >= obj->efile.symbols->d_size / sizeof(Elf64_Sym)) 3430 return NULL; 3431 3432 return (Elf64_Sym *)obj->efile.symbols->d_buf + idx; 3433 } 3434 3435 static Elf64_Rel *elf_rel_by_idx(Elf_Data *data, size_t idx) 3436 { 3437 if (idx >= data->d_size / sizeof(Elf64_Rel)) 3438 return NULL; 3439 3440 return (Elf64_Rel *)data->d_buf + idx; 3441 } 3442 3443 static bool is_sec_name_dwarf(const char *name) 3444 { 3445 /* approximation, but the actual list is too long */ 3446 return str_has_pfx(name, ".debug_"); 3447 } 3448 3449 static bool ignore_elf_section(Elf64_Shdr *hdr, const char *name) 3450 { 3451 /* no special handling of .strtab */ 3452 if (hdr->sh_type == SHT_STRTAB) 3453 return true; 3454 3455 /* ignore .llvm_addrsig section as well */ 3456 if (hdr->sh_type == SHT_LLVM_ADDRSIG) 3457 return true; 3458 3459 /* no subprograms will lead to an empty .text section, ignore it */ 3460 if (hdr->sh_type == SHT_PROGBITS && hdr->sh_size == 0 && 3461 strcmp(name, ".text") == 0) 3462 return true; 3463 3464 /* DWARF sections */ 3465 if (is_sec_name_dwarf(name)) 3466 return true; 3467 3468 if (str_has_pfx(name, ".rel")) { 3469 name += sizeof(".rel") - 1; 3470 /* DWARF section relocations */ 3471 if (is_sec_name_dwarf(name)) 3472 return true; 3473 3474 /* .BTF and .BTF.ext don't need relocations */ 3475 if (strcmp(name, BTF_ELF_SEC) == 0 || 3476 strcmp(name, BTF_EXT_ELF_SEC) == 0) 3477 return true; 3478 } 3479 3480 return false; 3481 } 3482 3483 static int cmp_progs(const void *_a, const void *_b) 3484 { 3485 const struct bpf_program *a = _a; 3486 const struct bpf_program *b = _b; 3487 3488 if (a->sec_idx != b->sec_idx) 3489 return a->sec_idx < b->sec_idx ? -1 : 1; 3490 3491 /* sec_insn_off can't be the same within the section */ 3492 return a->sec_insn_off < b->sec_insn_off ? -1 : 1; 3493 } 3494 3495 static int bpf_object__elf_collect(struct bpf_object *obj) 3496 { 3497 struct elf_sec_desc *sec_desc; 3498 Elf *elf = obj->efile.elf; 3499 Elf_Data *btf_ext_data = NULL; 3500 Elf_Data *btf_data = NULL; 3501 int idx = 0, err = 0; 3502 const char *name; 3503 Elf_Data *data; 3504 Elf_Scn *scn; 3505 Elf64_Shdr *sh; 3506 3507 /* ELF section indices are 0-based, but sec #0 is special "invalid" 3508 * section. Since section count retrieved by elf_getshdrnum() does 3509 * include sec #0, it is already the necessary size of an array to keep 3510 * all the sections. 3511 */ 3512 if (elf_getshdrnum(obj->efile.elf, &obj->efile.sec_cnt)) { 3513 pr_warn("elf: failed to get the number of sections for %s: %s\n", 3514 obj->path, elf_errmsg(-1)); 3515 return -LIBBPF_ERRNO__FORMAT; 3516 } 3517 obj->efile.secs = calloc(obj->efile.sec_cnt, sizeof(*obj->efile.secs)); 3518 if (!obj->efile.secs) 3519 return -ENOMEM; 3520 3521 /* a bunch of ELF parsing functionality depends on processing symbols, 3522 * so do the first pass and find the symbol table 3523 */ 3524 scn = NULL; 3525 while ((scn = elf_nextscn(elf, scn)) != NULL) { 3526 sh = elf_sec_hdr(obj, scn); 3527 if (!sh) 3528 return -LIBBPF_ERRNO__FORMAT; 3529 3530 if (sh->sh_type == SHT_SYMTAB) { 3531 if (obj->efile.symbols) { 3532 pr_warn("elf: multiple symbol tables in %s\n", obj->path); 3533 return -LIBBPF_ERRNO__FORMAT; 3534 } 3535 3536 data = elf_sec_data(obj, scn); 3537 if (!data) 3538 return -LIBBPF_ERRNO__FORMAT; 3539 3540 idx = elf_ndxscn(scn); 3541 3542 obj->efile.symbols = data; 3543 obj->efile.symbols_shndx = idx; 3544 obj->efile.strtabidx = sh->sh_link; 3545 } 3546 } 3547 3548 if (!obj->efile.symbols) { 3549 pr_warn("elf: couldn't find symbol table in %s, stripped object file?\n", 3550 obj->path); 3551 return -ENOENT; 3552 } 3553 3554 scn = NULL; 3555 while ((scn = elf_nextscn(elf, scn)) != NULL) { 3556 idx = elf_ndxscn(scn); 3557 sec_desc = &obj->efile.secs[idx]; 3558 3559 sh = elf_sec_hdr(obj, scn); 3560 if (!sh) 3561 return -LIBBPF_ERRNO__FORMAT; 3562 3563 name = elf_sec_str(obj, sh->sh_name); 3564 if (!name) 3565 return -LIBBPF_ERRNO__FORMAT; 3566 3567 if (ignore_elf_section(sh, name)) 3568 continue; 3569 3570 data = elf_sec_data(obj, scn); 3571 if (!data) 3572 return -LIBBPF_ERRNO__FORMAT; 3573 3574 pr_debug("elf: section(%d) %s, size %ld, link %d, flags %lx, type=%d\n", 3575 idx, name, (unsigned long)data->d_size, 3576 (int)sh->sh_link, (unsigned long)sh->sh_flags, 3577 (int)sh->sh_type); 3578 3579 if (strcmp(name, "license") == 0) { 3580 err = bpf_object__init_license(obj, data->d_buf, data->d_size); 3581 if (err) 3582 return err; 3583 } else if (strcmp(name, "version") == 0) { 3584 err = bpf_object__init_kversion(obj, data->d_buf, data->d_size); 3585 if (err) 3586 return err; 3587 } else if (strcmp(name, "maps") == 0) { 3588 pr_warn("elf: legacy map definitions in 'maps' section are not supported by libbpf v1.0+\n"); 3589 return -ENOTSUP; 3590 } else if (strcmp(name, MAPS_ELF_SEC) == 0) { 3591 obj->efile.btf_maps_shndx = idx; 3592 } else if (strcmp(name, BTF_ELF_SEC) == 0) { 3593 if (sh->sh_type != SHT_PROGBITS) 3594 return -LIBBPF_ERRNO__FORMAT; 3595 btf_data = data; 3596 } else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) { 3597 if (sh->sh_type != SHT_PROGBITS) 3598 return -LIBBPF_ERRNO__FORMAT; 3599 btf_ext_data = data; 3600 } else if (sh->sh_type == SHT_SYMTAB) { 3601 /* already processed during the first pass above */ 3602 } else if (sh->sh_type == SHT_PROGBITS && data->d_size > 0) { 3603 if (sh->sh_flags & SHF_EXECINSTR) { 3604 if (strcmp(name, ".text") == 0) 3605 obj->efile.text_shndx = idx; 3606 err = bpf_object__add_programs(obj, data, name, idx); 3607 if (err) 3608 return err; 3609 } else if (strcmp(name, DATA_SEC) == 0 || 3610 str_has_pfx(name, DATA_SEC ".")) { 3611 sec_desc->sec_type = SEC_DATA; 3612 sec_desc->shdr = sh; 3613 sec_desc->data = data; 3614 } else if (strcmp(name, RODATA_SEC) == 0 || 3615 str_has_pfx(name, RODATA_SEC ".")) { 3616 sec_desc->sec_type = SEC_RODATA; 3617 sec_desc->shdr = sh; 3618 sec_desc->data = data; 3619 } else if (strcmp(name, STRUCT_OPS_SEC) == 0) { 3620 obj->efile.st_ops_data = data; 3621 obj->efile.st_ops_shndx = idx; 3622 } else if (strcmp(name, STRUCT_OPS_LINK_SEC) == 0) { 3623 obj->efile.st_ops_link_data = data; 3624 obj->efile.st_ops_link_shndx = idx; 3625 } else { 3626 pr_info("elf: skipping unrecognized data section(%d) %s\n", 3627 idx, name); 3628 } 3629 } else if (sh->sh_type == SHT_REL) { 3630 int targ_sec_idx = sh->sh_info; /* points to other section */ 3631 3632 if (sh->sh_entsize != sizeof(Elf64_Rel) || 3633 targ_sec_idx >= obj->efile.sec_cnt) 3634 return -LIBBPF_ERRNO__FORMAT; 3635 3636 /* Only do relo for section with exec instructions */ 3637 if (!section_have_execinstr(obj, targ_sec_idx) && 3638 strcmp(name, ".rel" STRUCT_OPS_SEC) && 3639 strcmp(name, ".rel" STRUCT_OPS_LINK_SEC) && 3640 strcmp(name, ".rel" MAPS_ELF_SEC)) { 3641 pr_info("elf: skipping relo section(%d) %s for section(%d) %s\n", 3642 idx, name, targ_sec_idx, 3643 elf_sec_name(obj, elf_sec_by_idx(obj, targ_sec_idx)) ?: "<?>"); 3644 continue; 3645 } 3646 3647 sec_desc->sec_type = SEC_RELO; 3648 sec_desc->shdr = sh; 3649 sec_desc->data = data; 3650 } else if (sh->sh_type == SHT_NOBITS && (strcmp(name, BSS_SEC) == 0 || 3651 str_has_pfx(name, BSS_SEC "."))) { 3652 sec_desc->sec_type = SEC_BSS; 3653 sec_desc->shdr = sh; 3654 sec_desc->data = data; 3655 } else { 3656 pr_info("elf: skipping section(%d) %s (size %zu)\n", idx, name, 3657 (size_t)sh->sh_size); 3658 } 3659 } 3660 3661 if (!obj->efile.strtabidx || obj->efile.strtabidx > idx) { 3662 pr_warn("elf: symbol strings section missing or invalid in %s\n", obj->path); 3663 return -LIBBPF_ERRNO__FORMAT; 3664 } 3665 3666 /* sort BPF programs by section name and in-section instruction offset 3667 * for faster search 3668 */ 3669 if (obj->nr_programs) 3670 qsort(obj->programs, obj->nr_programs, sizeof(*obj->programs), cmp_progs); 3671 3672 return bpf_object__init_btf(obj, btf_data, btf_ext_data); 3673 } 3674 3675 static bool sym_is_extern(const Elf64_Sym *sym) 3676 { 3677 int bind = ELF64_ST_BIND(sym->st_info); 3678 /* externs are symbols w/ type=NOTYPE, bind=GLOBAL|WEAK, section=UND */ 3679 return sym->st_shndx == SHN_UNDEF && 3680 (bind == STB_GLOBAL || bind == STB_WEAK) && 3681 ELF64_ST_TYPE(sym->st_info) == STT_NOTYPE; 3682 } 3683 3684 static bool sym_is_subprog(const Elf64_Sym *sym, int text_shndx) 3685 { 3686 int bind = ELF64_ST_BIND(sym->st_info); 3687 int type = ELF64_ST_TYPE(sym->st_info); 3688 3689 /* in .text section */ 3690 if (sym->st_shndx != text_shndx) 3691 return false; 3692 3693 /* local function */ 3694 if (bind == STB_LOCAL && type == STT_SECTION) 3695 return true; 3696 3697 /* global function */ 3698 return bind == STB_GLOBAL && type == STT_FUNC; 3699 } 3700 3701 static int find_extern_btf_id(const struct btf *btf, const char *ext_name) 3702 { 3703 const struct btf_type *t; 3704 const char *tname; 3705 int i, n; 3706 3707 if (!btf) 3708 return -ESRCH; 3709 3710 n = btf__type_cnt(btf); 3711 for (i = 1; i < n; i++) { 3712 t = btf__type_by_id(btf, i); 3713 3714 if (!btf_is_var(t) && !btf_is_func(t)) 3715 continue; 3716 3717 tname = btf__name_by_offset(btf, t->name_off); 3718 if (strcmp(tname, ext_name)) 3719 continue; 3720 3721 if (btf_is_var(t) && 3722 btf_var(t)->linkage != BTF_VAR_GLOBAL_EXTERN) 3723 return -EINVAL; 3724 3725 if (btf_is_func(t) && btf_func_linkage(t) != BTF_FUNC_EXTERN) 3726 return -EINVAL; 3727 3728 return i; 3729 } 3730 3731 return -ENOENT; 3732 } 3733 3734 static int find_extern_sec_btf_id(struct btf *btf, int ext_btf_id) { 3735 const struct btf_var_secinfo *vs; 3736 const struct btf_type *t; 3737 int i, j, n; 3738 3739 if (!btf) 3740 return -ESRCH; 3741 3742 n = btf__type_cnt(btf); 3743 for (i = 1; i < n; i++) { 3744 t = btf__type_by_id(btf, i); 3745 3746 if (!btf_is_datasec(t)) 3747 continue; 3748 3749 vs = btf_var_secinfos(t); 3750 for (j = 0; j < btf_vlen(t); j++, vs++) { 3751 if (vs->type == ext_btf_id) 3752 return i; 3753 } 3754 } 3755 3756 return -ENOENT; 3757 } 3758 3759 static enum kcfg_type find_kcfg_type(const struct btf *btf, int id, 3760 bool *is_signed) 3761 { 3762 const struct btf_type *t; 3763 const char *name; 3764 3765 t = skip_mods_and_typedefs(btf, id, NULL); 3766 name = btf__name_by_offset(btf, t->name_off); 3767 3768 if (is_signed) 3769 *is_signed = false; 3770 switch (btf_kind(t)) { 3771 case BTF_KIND_INT: { 3772 int enc = btf_int_encoding(t); 3773 3774 if (enc & BTF_INT_BOOL) 3775 return t->size == 1 ? KCFG_BOOL : KCFG_UNKNOWN; 3776 if (is_signed) 3777 *is_signed = enc & BTF_INT_SIGNED; 3778 if (t->size == 1) 3779 return KCFG_CHAR; 3780 if (t->size < 1 || t->size > 8 || (t->size & (t->size - 1))) 3781 return KCFG_UNKNOWN; 3782 return KCFG_INT; 3783 } 3784 case BTF_KIND_ENUM: 3785 if (t->size != 4) 3786 return KCFG_UNKNOWN; 3787 if (strcmp(name, "libbpf_tristate")) 3788 return KCFG_UNKNOWN; 3789 return KCFG_TRISTATE; 3790 case BTF_KIND_ENUM64: 3791 if (strcmp(name, "libbpf_tristate")) 3792 return KCFG_UNKNOWN; 3793 return KCFG_TRISTATE; 3794 case BTF_KIND_ARRAY: 3795 if (btf_array(t)->nelems == 0) 3796 return KCFG_UNKNOWN; 3797 if (find_kcfg_type(btf, btf_array(t)->type, NULL) != KCFG_CHAR) 3798 return KCFG_UNKNOWN; 3799 return KCFG_CHAR_ARR; 3800 default: 3801 return KCFG_UNKNOWN; 3802 } 3803 } 3804 3805 static int cmp_externs(const void *_a, const void *_b) 3806 { 3807 const struct extern_desc *a = _a; 3808 const struct extern_desc *b = _b; 3809 3810 if (a->type != b->type) 3811 return a->type < b->type ? -1 : 1; 3812 3813 if (a->type == EXT_KCFG) { 3814 /* descending order by alignment requirements */ 3815 if (a->kcfg.align != b->kcfg.align) 3816 return a->kcfg.align > b->kcfg.align ? -1 : 1; 3817 /* ascending order by size, within same alignment class */ 3818 if (a->kcfg.sz != b->kcfg.sz) 3819 return a->kcfg.sz < b->kcfg.sz ? -1 : 1; 3820 } 3821 3822 /* resolve ties by name */ 3823 return strcmp(a->name, b->name); 3824 } 3825 3826 static int find_int_btf_id(const struct btf *btf) 3827 { 3828 const struct btf_type *t; 3829 int i, n; 3830 3831 n = btf__type_cnt(btf); 3832 for (i = 1; i < n; i++) { 3833 t = btf__type_by_id(btf, i); 3834 3835 if (btf_is_int(t) && btf_int_bits(t) == 32) 3836 return i; 3837 } 3838 3839 return 0; 3840 } 3841 3842 static int add_dummy_ksym_var(struct btf *btf) 3843 { 3844 int i, int_btf_id, sec_btf_id, dummy_var_btf_id; 3845 const struct btf_var_secinfo *vs; 3846 const struct btf_type *sec; 3847 3848 if (!btf) 3849 return 0; 3850 3851 sec_btf_id = btf__find_by_name_kind(btf, KSYMS_SEC, 3852 BTF_KIND_DATASEC); 3853 if (sec_btf_id < 0) 3854 return 0; 3855 3856 sec = btf__type_by_id(btf, sec_btf_id); 3857 vs = btf_var_secinfos(sec); 3858 for (i = 0; i < btf_vlen(sec); i++, vs++) { 3859 const struct btf_type *vt; 3860 3861 vt = btf__type_by_id(btf, vs->type); 3862 if (btf_is_func(vt)) 3863 break; 3864 } 3865 3866 /* No func in ksyms sec. No need to add dummy var. */ 3867 if (i == btf_vlen(sec)) 3868 return 0; 3869 3870 int_btf_id = find_int_btf_id(btf); 3871 dummy_var_btf_id = btf__add_var(btf, 3872 "dummy_ksym", 3873 BTF_VAR_GLOBAL_ALLOCATED, 3874 int_btf_id); 3875 if (dummy_var_btf_id < 0) 3876 pr_warn("cannot create a dummy_ksym var\n"); 3877 3878 return dummy_var_btf_id; 3879 } 3880 3881 static int bpf_object__collect_externs(struct bpf_object *obj) 3882 { 3883 struct btf_type *sec, *kcfg_sec = NULL, *ksym_sec = NULL; 3884 const struct btf_type *t; 3885 struct extern_desc *ext; 3886 int i, n, off, dummy_var_btf_id; 3887 const char *ext_name, *sec_name; 3888 size_t ext_essent_len; 3889 Elf_Scn *scn; 3890 Elf64_Shdr *sh; 3891 3892 if (!obj->efile.symbols) 3893 return 0; 3894 3895 scn = elf_sec_by_idx(obj, obj->efile.symbols_shndx); 3896 sh = elf_sec_hdr(obj, scn); 3897 if (!sh || sh->sh_entsize != sizeof(Elf64_Sym)) 3898 return -LIBBPF_ERRNO__FORMAT; 3899 3900 dummy_var_btf_id = add_dummy_ksym_var(obj->btf); 3901 if (dummy_var_btf_id < 0) 3902 return dummy_var_btf_id; 3903 3904 n = sh->sh_size / sh->sh_entsize; 3905 pr_debug("looking for externs among %d symbols...\n", n); 3906 3907 for (i = 0; i < n; i++) { 3908 Elf64_Sym *sym = elf_sym_by_idx(obj, i); 3909 3910 if (!sym) 3911 return -LIBBPF_ERRNO__FORMAT; 3912 if (!sym_is_extern(sym)) 3913 continue; 3914 ext_name = elf_sym_str(obj, sym->st_name); 3915 if (!ext_name || !ext_name[0]) 3916 continue; 3917 3918 ext = obj->externs; 3919 ext = libbpf_reallocarray(ext, obj->nr_extern + 1, sizeof(*ext)); 3920 if (!ext) 3921 return -ENOMEM; 3922 obj->externs = ext; 3923 ext = &ext[obj->nr_extern]; 3924 memset(ext, 0, sizeof(*ext)); 3925 obj->nr_extern++; 3926 3927 ext->btf_id = find_extern_btf_id(obj->btf, ext_name); 3928 if (ext->btf_id <= 0) { 3929 pr_warn("failed to find BTF for extern '%s': %d\n", 3930 ext_name, ext->btf_id); 3931 return ext->btf_id; 3932 } 3933 t = btf__type_by_id(obj->btf, ext->btf_id); 3934 ext->name = btf__name_by_offset(obj->btf, t->name_off); 3935 ext->sym_idx = i; 3936 ext->is_weak = ELF64_ST_BIND(sym->st_info) == STB_WEAK; 3937 3938 ext_essent_len = bpf_core_essential_name_len(ext->name); 3939 ext->essent_name = NULL; 3940 if (ext_essent_len != strlen(ext->name)) { 3941 ext->essent_name = strndup(ext->name, ext_essent_len); 3942 if (!ext->essent_name) 3943 return -ENOMEM; 3944 } 3945 3946 ext->sec_btf_id = find_extern_sec_btf_id(obj->btf, ext->btf_id); 3947 if (ext->sec_btf_id <= 0) { 3948 pr_warn("failed to find BTF for extern '%s' [%d] section: %d\n", 3949 ext_name, ext->btf_id, ext->sec_btf_id); 3950 return ext->sec_btf_id; 3951 } 3952 sec = (void *)btf__type_by_id(obj->btf, ext->sec_btf_id); 3953 sec_name = btf__name_by_offset(obj->btf, sec->name_off); 3954 3955 if (strcmp(sec_name, KCONFIG_SEC) == 0) { 3956 if (btf_is_func(t)) { 3957 pr_warn("extern function %s is unsupported under %s section\n", 3958 ext->name, KCONFIG_SEC); 3959 return -ENOTSUP; 3960 } 3961 kcfg_sec = sec; 3962 ext->type = EXT_KCFG; 3963 ext->kcfg.sz = btf__resolve_size(obj->btf, t->type); 3964 if (ext->kcfg.sz <= 0) { 3965 pr_warn("failed to resolve size of extern (kcfg) '%s': %d\n", 3966 ext_name, ext->kcfg.sz); 3967 return ext->kcfg.sz; 3968 } 3969 ext->kcfg.align = btf__align_of(obj->btf, t->type); 3970 if (ext->kcfg.align <= 0) { 3971 pr_warn("failed to determine alignment of extern (kcfg) '%s': %d\n", 3972 ext_name, ext->kcfg.align); 3973 return -EINVAL; 3974 } 3975 ext->kcfg.type = find_kcfg_type(obj->btf, t->type, 3976 &ext->kcfg.is_signed); 3977 if (ext->kcfg.type == KCFG_UNKNOWN) { 3978 pr_warn("extern (kcfg) '%s': type is unsupported\n", ext_name); 3979 return -ENOTSUP; 3980 } 3981 } else if (strcmp(sec_name, KSYMS_SEC) == 0) { 3982 ksym_sec = sec; 3983 ext->type = EXT_KSYM; 3984 skip_mods_and_typedefs(obj->btf, t->type, 3985 &ext->ksym.type_id); 3986 } else { 3987 pr_warn("unrecognized extern section '%s'\n", sec_name); 3988 return -ENOTSUP; 3989 } 3990 } 3991 pr_debug("collected %d externs total\n", obj->nr_extern); 3992 3993 if (!obj->nr_extern) 3994 return 0; 3995 3996 /* sort externs by type, for kcfg ones also by (align, size, name) */ 3997 qsort(obj->externs, obj->nr_extern, sizeof(*ext), cmp_externs); 3998 3999 /* for .ksyms section, we need to turn all externs into allocated 4000 * variables in BTF to pass kernel verification; we do this by 4001 * pretending that each extern is a 8-byte variable 4002 */ 4003 if (ksym_sec) { 4004 /* find existing 4-byte integer type in BTF to use for fake 4005 * extern variables in DATASEC 4006 */ 4007 int int_btf_id = find_int_btf_id(obj->btf); 4008 /* For extern function, a dummy_var added earlier 4009 * will be used to replace the vs->type and 4010 * its name string will be used to refill 4011 * the missing param's name. 4012 */ 4013 const struct btf_type *dummy_var; 4014 4015 dummy_var = btf__type_by_id(obj->btf, dummy_var_btf_id); 4016 for (i = 0; i < obj->nr_extern; i++) { 4017 ext = &obj->externs[i]; 4018 if (ext->type != EXT_KSYM) 4019 continue; 4020 pr_debug("extern (ksym) #%d: symbol %d, name %s\n", 4021 i, ext->sym_idx, ext->name); 4022 } 4023 4024 sec = ksym_sec; 4025 n = btf_vlen(sec); 4026 for (i = 0, off = 0; i < n; i++, off += sizeof(int)) { 4027 struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i; 4028 struct btf_type *vt; 4029 4030 vt = (void *)btf__type_by_id(obj->btf, vs->type); 4031 ext_name = btf__name_by_offset(obj->btf, vt->name_off); 4032 ext = find_extern_by_name(obj, ext_name); 4033 if (!ext) { 4034 pr_warn("failed to find extern definition for BTF %s '%s'\n", 4035 btf_kind_str(vt), ext_name); 4036 return -ESRCH; 4037 } 4038 if (btf_is_func(vt)) { 4039 const struct btf_type *func_proto; 4040 struct btf_param *param; 4041 int j; 4042 4043 func_proto = btf__type_by_id(obj->btf, 4044 vt->type); 4045 param = btf_params(func_proto); 4046 /* Reuse the dummy_var string if the 4047 * func proto does not have param name. 4048 */ 4049 for (j = 0; j < btf_vlen(func_proto); j++) 4050 if (param[j].type && !param[j].name_off) 4051 param[j].name_off = 4052 dummy_var->name_off; 4053 vs->type = dummy_var_btf_id; 4054 vt->info &= ~0xffff; 4055 vt->info |= BTF_FUNC_GLOBAL; 4056 } else { 4057 btf_var(vt)->linkage = BTF_VAR_GLOBAL_ALLOCATED; 4058 vt->type = int_btf_id; 4059 } 4060 vs->offset = off; 4061 vs->size = sizeof(int); 4062 } 4063 sec->size = off; 4064 } 4065 4066 if (kcfg_sec) { 4067 sec = kcfg_sec; 4068 /* for kcfg externs calculate their offsets within a .kconfig map */ 4069 off = 0; 4070 for (i = 0; i < obj->nr_extern; i++) { 4071 ext = &obj->externs[i]; 4072 if (ext->type != EXT_KCFG) 4073 continue; 4074 4075 ext->kcfg.data_off = roundup(off, ext->kcfg.align); 4076 off = ext->kcfg.data_off + ext->kcfg.sz; 4077 pr_debug("extern (kcfg) #%d: symbol %d, off %u, name %s\n", 4078 i, ext->sym_idx, ext->kcfg.data_off, ext->name); 4079 } 4080 sec->size = off; 4081 n = btf_vlen(sec); 4082 for (i = 0; i < n; i++) { 4083 struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i; 4084 4085 t = btf__type_by_id(obj->btf, vs->type); 4086 ext_name = btf__name_by_offset(obj->btf, t->name_off); 4087 ext = find_extern_by_name(obj, ext_name); 4088 if (!ext) { 4089 pr_warn("failed to find extern definition for BTF var '%s'\n", 4090 ext_name); 4091 return -ESRCH; 4092 } 4093 btf_var(t)->linkage = BTF_VAR_GLOBAL_ALLOCATED; 4094 vs->offset = ext->kcfg.data_off; 4095 } 4096 } 4097 return 0; 4098 } 4099 4100 static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog) 4101 { 4102 return prog->sec_idx == obj->efile.text_shndx && obj->nr_programs > 1; 4103 } 4104 4105 struct bpf_program * 4106 bpf_object__find_program_by_name(const struct bpf_object *obj, 4107 const char *name) 4108 { 4109 struct bpf_program *prog; 4110 4111 bpf_object__for_each_program(prog, obj) { 4112 if (prog_is_subprog(obj, prog)) 4113 continue; 4114 if (!strcmp(prog->name, name)) 4115 return prog; 4116 } 4117 return errno = ENOENT, NULL; 4118 } 4119 4120 static bool bpf_object__shndx_is_data(const struct bpf_object *obj, 4121 int shndx) 4122 { 4123 switch (obj->efile.secs[shndx].sec_type) { 4124 case SEC_BSS: 4125 case SEC_DATA: 4126 case SEC_RODATA: 4127 return true; 4128 default: 4129 return false; 4130 } 4131 } 4132 4133 static bool bpf_object__shndx_is_maps(const struct bpf_object *obj, 4134 int shndx) 4135 { 4136 return shndx == obj->efile.btf_maps_shndx; 4137 } 4138 4139 static enum libbpf_map_type 4140 bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx) 4141 { 4142 if (shndx == obj->efile.symbols_shndx) 4143 return LIBBPF_MAP_KCONFIG; 4144 4145 switch (obj->efile.secs[shndx].sec_type) { 4146 case SEC_BSS: 4147 return LIBBPF_MAP_BSS; 4148 case SEC_DATA: 4149 return LIBBPF_MAP_DATA; 4150 case SEC_RODATA: 4151 return LIBBPF_MAP_RODATA; 4152 default: 4153 return LIBBPF_MAP_UNSPEC; 4154 } 4155 } 4156 4157 static int bpf_program__record_reloc(struct bpf_program *prog, 4158 struct reloc_desc *reloc_desc, 4159 __u32 insn_idx, const char *sym_name, 4160 const Elf64_Sym *sym, const Elf64_Rel *rel) 4161 { 4162 struct bpf_insn *insn = &prog->insns[insn_idx]; 4163 size_t map_idx, nr_maps = prog->obj->nr_maps; 4164 struct bpf_object *obj = prog->obj; 4165 __u32 shdr_idx = sym->st_shndx; 4166 enum libbpf_map_type type; 4167 const char *sym_sec_name; 4168 struct bpf_map *map; 4169 4170 if (!is_call_insn(insn) && !is_ldimm64_insn(insn)) { 4171 pr_warn("prog '%s': invalid relo against '%s' for insns[%d].code 0x%x\n", 4172 prog->name, sym_name, insn_idx, insn->code); 4173 return -LIBBPF_ERRNO__RELOC; 4174 } 4175 4176 if (sym_is_extern(sym)) { 4177 int sym_idx = ELF64_R_SYM(rel->r_info); 4178 int i, n = obj->nr_extern; 4179 struct extern_desc *ext; 4180 4181 for (i = 0; i < n; i++) { 4182 ext = &obj->externs[i]; 4183 if (ext->sym_idx == sym_idx) 4184 break; 4185 } 4186 if (i >= n) { 4187 pr_warn("prog '%s': extern relo failed to find extern for '%s' (%d)\n", 4188 prog->name, sym_name, sym_idx); 4189 return -LIBBPF_ERRNO__RELOC; 4190 } 4191 pr_debug("prog '%s': found extern #%d '%s' (sym %d) for insn #%u\n", 4192 prog->name, i, ext->name, ext->sym_idx, insn_idx); 4193 if (insn->code == (BPF_JMP | BPF_CALL)) 4194 reloc_desc->type = RELO_EXTERN_CALL; 4195 else 4196 reloc_desc->type = RELO_EXTERN_LD64; 4197 reloc_desc->insn_idx = insn_idx; 4198 reloc_desc->ext_idx = i; 4199 return 0; 4200 } 4201 4202 /* sub-program call relocation */ 4203 if (is_call_insn(insn)) { 4204 if (insn->src_reg != BPF_PSEUDO_CALL) { 4205 pr_warn("prog '%s': incorrect bpf_call opcode\n", prog->name); 4206 return -LIBBPF_ERRNO__RELOC; 4207 } 4208 /* text_shndx can be 0, if no default "main" program exists */ 4209 if (!shdr_idx || shdr_idx != obj->efile.text_shndx) { 4210 sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx)); 4211 pr_warn("prog '%s': bad call relo against '%s' in section '%s'\n", 4212 prog->name, sym_name, sym_sec_name); 4213 return -LIBBPF_ERRNO__RELOC; 4214 } 4215 if (sym->st_value % BPF_INSN_SZ) { 4216 pr_warn("prog '%s': bad call relo against '%s' at offset %zu\n", 4217 prog->name, sym_name, (size_t)sym->st_value); 4218 return -LIBBPF_ERRNO__RELOC; 4219 } 4220 reloc_desc->type = RELO_CALL; 4221 reloc_desc->insn_idx = insn_idx; 4222 reloc_desc->sym_off = sym->st_value; 4223 return 0; 4224 } 4225 4226 if (!shdr_idx || shdr_idx >= SHN_LORESERVE) { 4227 pr_warn("prog '%s': invalid relo against '%s' in special section 0x%x; forgot to initialize global var?..\n", 4228 prog->name, sym_name, shdr_idx); 4229 return -LIBBPF_ERRNO__RELOC; 4230 } 4231 4232 /* loading subprog addresses */ 4233 if (sym_is_subprog(sym, obj->efile.text_shndx)) { 4234 /* global_func: sym->st_value = offset in the section, insn->imm = 0. 4235 * local_func: sym->st_value = 0, insn->imm = offset in the section. 4236 */ 4237 if ((sym->st_value % BPF_INSN_SZ) || (insn->imm % BPF_INSN_SZ)) { 4238 pr_warn("prog '%s': bad subprog addr relo against '%s' at offset %zu+%d\n", 4239 prog->name, sym_name, (size_t)sym->st_value, insn->imm); 4240 return -LIBBPF_ERRNO__RELOC; 4241 } 4242 4243 reloc_desc->type = RELO_SUBPROG_ADDR; 4244 reloc_desc->insn_idx = insn_idx; 4245 reloc_desc->sym_off = sym->st_value; 4246 return 0; 4247 } 4248 4249 type = bpf_object__section_to_libbpf_map_type(obj, shdr_idx); 4250 sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx)); 4251 4252 /* generic map reference relocation */ 4253 if (type == LIBBPF_MAP_UNSPEC) { 4254 if (!bpf_object__shndx_is_maps(obj, shdr_idx)) { 4255 pr_warn("prog '%s': bad map relo against '%s' in section '%s'\n", 4256 prog->name, sym_name, sym_sec_name); 4257 return -LIBBPF_ERRNO__RELOC; 4258 } 4259 for (map_idx = 0; map_idx < nr_maps; map_idx++) { 4260 map = &obj->maps[map_idx]; 4261 if (map->libbpf_type != type || 4262 map->sec_idx != sym->st_shndx || 4263 map->sec_offset != sym->st_value) 4264 continue; 4265 pr_debug("prog '%s': found map %zd (%s, sec %d, off %zu) for insn #%u\n", 4266 prog->name, map_idx, map->name, map->sec_idx, 4267 map->sec_offset, insn_idx); 4268 break; 4269 } 4270 if (map_idx >= nr_maps) { 4271 pr_warn("prog '%s': map relo failed to find map for section '%s', off %zu\n", 4272 prog->name, sym_sec_name, (size_t)sym->st_value); 4273 return -LIBBPF_ERRNO__RELOC; 4274 } 4275 reloc_desc->type = RELO_LD64; 4276 reloc_desc->insn_idx = insn_idx; 4277 reloc_desc->map_idx = map_idx; 4278 reloc_desc->sym_off = 0; /* sym->st_value determines map_idx */ 4279 return 0; 4280 } 4281 4282 /* global data map relocation */ 4283 if (!bpf_object__shndx_is_data(obj, shdr_idx)) { 4284 pr_warn("prog '%s': bad data relo against section '%s'\n", 4285 prog->name, sym_sec_name); 4286 return -LIBBPF_ERRNO__RELOC; 4287 } 4288 for (map_idx = 0; map_idx < nr_maps; map_idx++) { 4289 map = &obj->maps[map_idx]; 4290 if (map->libbpf_type != type || map->sec_idx != sym->st_shndx) 4291 continue; 4292 pr_debug("prog '%s': found data map %zd (%s, sec %d, off %zu) for insn %u\n", 4293 prog->name, map_idx, map->name, map->sec_idx, 4294 map->sec_offset, insn_idx); 4295 break; 4296 } 4297 if (map_idx >= nr_maps) { 4298 pr_warn("prog '%s': data relo failed to find map for section '%s'\n", 4299 prog->name, sym_sec_name); 4300 return -LIBBPF_ERRNO__RELOC; 4301 } 4302 4303 reloc_desc->type = RELO_DATA; 4304 reloc_desc->insn_idx = insn_idx; 4305 reloc_desc->map_idx = map_idx; 4306 reloc_desc->sym_off = sym->st_value; 4307 return 0; 4308 } 4309 4310 static bool prog_contains_insn(const struct bpf_program *prog, size_t insn_idx) 4311 { 4312 return insn_idx >= prog->sec_insn_off && 4313 insn_idx < prog->sec_insn_off + prog->sec_insn_cnt; 4314 } 4315 4316 static struct bpf_program *find_prog_by_sec_insn(const struct bpf_object *obj, 4317 size_t sec_idx, size_t insn_idx) 4318 { 4319 int l = 0, r = obj->nr_programs - 1, m; 4320 struct bpf_program *prog; 4321 4322 if (!obj->nr_programs) 4323 return NULL; 4324 4325 while (l < r) { 4326 m = l + (r - l + 1) / 2; 4327 prog = &obj->programs[m]; 4328 4329 if (prog->sec_idx < sec_idx || 4330 (prog->sec_idx == sec_idx && prog->sec_insn_off <= insn_idx)) 4331 l = m; 4332 else 4333 r = m - 1; 4334 } 4335 /* matching program could be at index l, but it still might be the 4336 * wrong one, so we need to double check conditions for the last time 4337 */ 4338 prog = &obj->programs[l]; 4339 if (prog->sec_idx == sec_idx && prog_contains_insn(prog, insn_idx)) 4340 return prog; 4341 return NULL; 4342 } 4343 4344 static int 4345 bpf_object__collect_prog_relos(struct bpf_object *obj, Elf64_Shdr *shdr, Elf_Data *data) 4346 { 4347 const char *relo_sec_name, *sec_name; 4348 size_t sec_idx = shdr->sh_info, sym_idx; 4349 struct bpf_program *prog; 4350 struct reloc_desc *relos; 4351 int err, i, nrels; 4352 const char *sym_name; 4353 __u32 insn_idx; 4354 Elf_Scn *scn; 4355 Elf_Data *scn_data; 4356 Elf64_Sym *sym; 4357 Elf64_Rel *rel; 4358 4359 if (sec_idx >= obj->efile.sec_cnt) 4360 return -EINVAL; 4361 4362 scn = elf_sec_by_idx(obj, sec_idx); 4363 scn_data = elf_sec_data(obj, scn); 4364 4365 relo_sec_name = elf_sec_str(obj, shdr->sh_name); 4366 sec_name = elf_sec_name(obj, scn); 4367 if (!relo_sec_name || !sec_name) 4368 return -EINVAL; 4369 4370 pr_debug("sec '%s': collecting relocation for section(%zu) '%s'\n", 4371 relo_sec_name, sec_idx, sec_name); 4372 nrels = shdr->sh_size / shdr->sh_entsize; 4373 4374 for (i = 0; i < nrels; i++) { 4375 rel = elf_rel_by_idx(data, i); 4376 if (!rel) { 4377 pr_warn("sec '%s': failed to get relo #%d\n", relo_sec_name, i); 4378 return -LIBBPF_ERRNO__FORMAT; 4379 } 4380 4381 sym_idx = ELF64_R_SYM(rel->r_info); 4382 sym = elf_sym_by_idx(obj, sym_idx); 4383 if (!sym) { 4384 pr_warn("sec '%s': symbol #%zu not found for relo #%d\n", 4385 relo_sec_name, sym_idx, i); 4386 return -LIBBPF_ERRNO__FORMAT; 4387 } 4388 4389 if (sym->st_shndx >= obj->efile.sec_cnt) { 4390 pr_warn("sec '%s': corrupted symbol #%zu pointing to invalid section #%zu for relo #%d\n", 4391 relo_sec_name, sym_idx, (size_t)sym->st_shndx, i); 4392 return -LIBBPF_ERRNO__FORMAT; 4393 } 4394 4395 if (rel->r_offset % BPF_INSN_SZ || rel->r_offset >= scn_data->d_size) { 4396 pr_warn("sec '%s': invalid offset 0x%zx for relo #%d\n", 4397 relo_sec_name, (size_t)rel->r_offset, i); 4398 return -LIBBPF_ERRNO__FORMAT; 4399 } 4400 4401 insn_idx = rel->r_offset / BPF_INSN_SZ; 4402 /* relocations against static functions are recorded as 4403 * relocations against the section that contains a function; 4404 * in such case, symbol will be STT_SECTION and sym.st_name 4405 * will point to empty string (0), so fetch section name 4406 * instead 4407 */ 4408 if (ELF64_ST_TYPE(sym->st_info) == STT_SECTION && sym->st_name == 0) 4409 sym_name = elf_sec_name(obj, elf_sec_by_idx(obj, sym->st_shndx)); 4410 else 4411 sym_name = elf_sym_str(obj, sym->st_name); 4412 sym_name = sym_name ?: "<?"; 4413 4414 pr_debug("sec '%s': relo #%d: insn #%u against '%s'\n", 4415 relo_sec_name, i, insn_idx, sym_name); 4416 4417 prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx); 4418 if (!prog) { 4419 pr_debug("sec '%s': relo #%d: couldn't find program in section '%s' for insn #%u, probably overridden weak function, skipping...\n", 4420 relo_sec_name, i, sec_name, insn_idx); 4421 continue; 4422 } 4423 4424 relos = libbpf_reallocarray(prog->reloc_desc, 4425 prog->nr_reloc + 1, sizeof(*relos)); 4426 if (!relos) 4427 return -ENOMEM; 4428 prog->reloc_desc = relos; 4429 4430 /* adjust insn_idx to local BPF program frame of reference */ 4431 insn_idx -= prog->sec_insn_off; 4432 err = bpf_program__record_reloc(prog, &relos[prog->nr_reloc], 4433 insn_idx, sym_name, sym, rel); 4434 if (err) 4435 return err; 4436 4437 prog->nr_reloc++; 4438 } 4439 return 0; 4440 } 4441 4442 static int map_fill_btf_type_info(struct bpf_object *obj, struct bpf_map *map) 4443 { 4444 int id; 4445 4446 if (!obj->btf) 4447 return -ENOENT; 4448 4449 /* if it's BTF-defined map, we don't need to search for type IDs. 4450 * For struct_ops map, it does not need btf_key_type_id and 4451 * btf_value_type_id. 4452 */ 4453 if (map->sec_idx == obj->efile.btf_maps_shndx || bpf_map__is_struct_ops(map)) 4454 return 0; 4455 4456 /* 4457 * LLVM annotates global data differently in BTF, that is, 4458 * only as '.data', '.bss' or '.rodata'. 4459 */ 4460 if (!bpf_map__is_internal(map)) 4461 return -ENOENT; 4462 4463 id = btf__find_by_name(obj->btf, map->real_name); 4464 if (id < 0) 4465 return id; 4466 4467 map->btf_key_type_id = 0; 4468 map->btf_value_type_id = id; 4469 return 0; 4470 } 4471 4472 static int bpf_get_map_info_from_fdinfo(int fd, struct bpf_map_info *info) 4473 { 4474 char file[PATH_MAX], buff[4096]; 4475 FILE *fp; 4476 __u32 val; 4477 int err; 4478 4479 snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd); 4480 memset(info, 0, sizeof(*info)); 4481 4482 fp = fopen(file, "re"); 4483 if (!fp) { 4484 err = -errno; 4485 pr_warn("failed to open %s: %d. No procfs support?\n", file, 4486 err); 4487 return err; 4488 } 4489 4490 while (fgets(buff, sizeof(buff), fp)) { 4491 if (sscanf(buff, "map_type:\t%u", &val) == 1) 4492 info->type = val; 4493 else if (sscanf(buff, "key_size:\t%u", &val) == 1) 4494 info->key_size = val; 4495 else if (sscanf(buff, "value_size:\t%u", &val) == 1) 4496 info->value_size = val; 4497 else if (sscanf(buff, "max_entries:\t%u", &val) == 1) 4498 info->max_entries = val; 4499 else if (sscanf(buff, "map_flags:\t%i", &val) == 1) 4500 info->map_flags = val; 4501 } 4502 4503 fclose(fp); 4504 4505 return 0; 4506 } 4507 4508 bool bpf_map__autocreate(const struct bpf_map *map) 4509 { 4510 return map->autocreate; 4511 } 4512 4513 int bpf_map__set_autocreate(struct bpf_map *map, bool autocreate) 4514 { 4515 if (map->obj->loaded) 4516 return libbpf_err(-EBUSY); 4517 4518 map->autocreate = autocreate; 4519 return 0; 4520 } 4521 4522 int bpf_map__reuse_fd(struct bpf_map *map, int fd) 4523 { 4524 struct bpf_map_info info; 4525 __u32 len = sizeof(info), name_len; 4526 int new_fd, err; 4527 char *new_name; 4528 4529 memset(&info, 0, len); 4530 err = bpf_map_get_info_by_fd(fd, &info, &len); 4531 if (err && errno == EINVAL) 4532 err = bpf_get_map_info_from_fdinfo(fd, &info); 4533 if (err) 4534 return libbpf_err(err); 4535 4536 name_len = strlen(info.name); 4537 if (name_len == BPF_OBJ_NAME_LEN - 1 && strncmp(map->name, info.name, name_len) == 0) 4538 new_name = strdup(map->name); 4539 else 4540 new_name = strdup(info.name); 4541 4542 if (!new_name) 4543 return libbpf_err(-errno); 4544 4545 /* 4546 * Like dup(), but make sure new FD is >= 3 and has O_CLOEXEC set. 4547 * This is similar to what we do in ensure_good_fd(), but without 4548 * closing original FD. 4549 */ 4550 new_fd = fcntl(fd, F_DUPFD_CLOEXEC, 3); 4551 if (new_fd < 0) { 4552 err = -errno; 4553 goto err_free_new_name; 4554 } 4555 4556 err = zclose(map->fd); 4557 if (err) { 4558 err = -errno; 4559 goto err_close_new_fd; 4560 } 4561 free(map->name); 4562 4563 map->fd = new_fd; 4564 map->name = new_name; 4565 map->def.type = info.type; 4566 map->def.key_size = info.key_size; 4567 map->def.value_size = info.value_size; 4568 map->def.max_entries = info.max_entries; 4569 map->def.map_flags = info.map_flags; 4570 map->btf_key_type_id = info.btf_key_type_id; 4571 map->btf_value_type_id = info.btf_value_type_id; 4572 map->reused = true; 4573 map->map_extra = info.map_extra; 4574 4575 return 0; 4576 4577 err_close_new_fd: 4578 close(new_fd); 4579 err_free_new_name: 4580 free(new_name); 4581 return libbpf_err(err); 4582 } 4583 4584 __u32 bpf_map__max_entries(const struct bpf_map *map) 4585 { 4586 return map->def.max_entries; 4587 } 4588 4589 struct bpf_map *bpf_map__inner_map(struct bpf_map *map) 4590 { 4591 if (!bpf_map_type__is_map_in_map(map->def.type)) 4592 return errno = EINVAL, NULL; 4593 4594 return map->inner_map; 4595 } 4596 4597 int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries) 4598 { 4599 if (map->obj->loaded) 4600 return libbpf_err(-EBUSY); 4601 4602 map->def.max_entries = max_entries; 4603 4604 /* auto-adjust BPF ringbuf map max_entries to be a multiple of page size */ 4605 if (map_is_ringbuf(map)) 4606 map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries); 4607 4608 return 0; 4609 } 4610 4611 static int bpf_object_prepare_token(struct bpf_object *obj) 4612 { 4613 const char *bpffs_path; 4614 int bpffs_fd = -1, token_fd, err; 4615 bool mandatory; 4616 enum libbpf_print_level level; 4617 4618 /* token is already set up */ 4619 if (obj->token_fd > 0) 4620 return 0; 4621 /* token is explicitly prevented */ 4622 if (obj->token_fd < 0) { 4623 pr_debug("object '%s': token is prevented, skipping...\n", obj->name); 4624 /* reset to zero to avoid extra checks during map_create and prog_load steps */ 4625 obj->token_fd = 0; 4626 return 0; 4627 } 4628 4629 mandatory = obj->token_path != NULL; 4630 level = mandatory ? LIBBPF_WARN : LIBBPF_DEBUG; 4631 4632 bpffs_path = obj->token_path ?: BPF_FS_DEFAULT_PATH; 4633 bpffs_fd = open(bpffs_path, O_DIRECTORY, O_RDWR); 4634 if (bpffs_fd < 0) { 4635 err = -errno; 4636 __pr(level, "object '%s': failed (%d) to open BPF FS mount at '%s'%s\n", 4637 obj->name, err, bpffs_path, 4638 mandatory ? "" : ", skipping optional step..."); 4639 return mandatory ? err : 0; 4640 } 4641 4642 token_fd = bpf_token_create(bpffs_fd, 0); 4643 close(bpffs_fd); 4644 if (token_fd < 0) { 4645 if (!mandatory && token_fd == -ENOENT) { 4646 pr_debug("object '%s': BPF FS at '%s' doesn't have BPF token delegation set up, skipping...\n", 4647 obj->name, bpffs_path); 4648 return 0; 4649 } 4650 __pr(level, "object '%s': failed (%d) to create BPF token from '%s'%s\n", 4651 obj->name, token_fd, bpffs_path, 4652 mandatory ? "" : ", skipping optional step..."); 4653 return mandatory ? token_fd : 0; 4654 } 4655 4656 obj->feat_cache = calloc(1, sizeof(*obj->feat_cache)); 4657 if (!obj->feat_cache) { 4658 close(token_fd); 4659 return -ENOMEM; 4660 } 4661 4662 obj->token_fd = token_fd; 4663 obj->feat_cache->token_fd = token_fd; 4664 4665 return 0; 4666 } 4667 4668 static int 4669 bpf_object__probe_loading(struct bpf_object *obj) 4670 { 4671 char *cp, errmsg[STRERR_BUFSIZE]; 4672 struct bpf_insn insns[] = { 4673 BPF_MOV64_IMM(BPF_REG_0, 0), 4674 BPF_EXIT_INSN(), 4675 }; 4676 int ret, insn_cnt = ARRAY_SIZE(insns); 4677 LIBBPF_OPTS(bpf_prog_load_opts, opts, .token_fd = obj->token_fd); 4678 4679 if (obj->gen_loader) 4680 return 0; 4681 4682 ret = bump_rlimit_memlock(); 4683 if (ret) 4684 pr_warn("Failed to bump RLIMIT_MEMLOCK (err = %d), you might need to do it explicitly!\n", ret); 4685 4686 /* make sure basic loading works */ 4687 ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, &opts); 4688 if (ret < 0) 4689 ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts); 4690 if (ret < 0) { 4691 ret = errno; 4692 cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); 4693 pr_warn("Error in %s():%s(%d). Couldn't load trivial BPF " 4694 "program. Make sure your kernel supports BPF " 4695 "(CONFIG_BPF_SYSCALL=y) and/or that RLIMIT_MEMLOCK is " 4696 "set to big enough value.\n", __func__, cp, ret); 4697 return -ret; 4698 } 4699 close(ret); 4700 4701 return 0; 4702 } 4703 4704 bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id) 4705 { 4706 if (obj && obj->gen_loader) 4707 /* To generate loader program assume the latest kernel 4708 * to avoid doing extra prog_load, map_create syscalls. 4709 */ 4710 return true; 4711 4712 if (obj->token_fd) 4713 return feat_supported(obj->feat_cache, feat_id); 4714 4715 return feat_supported(NULL, feat_id); 4716 } 4717 4718 static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd) 4719 { 4720 struct bpf_map_info map_info; 4721 char msg[STRERR_BUFSIZE]; 4722 __u32 map_info_len = sizeof(map_info); 4723 int err; 4724 4725 memset(&map_info, 0, map_info_len); 4726 err = bpf_map_get_info_by_fd(map_fd, &map_info, &map_info_len); 4727 if (err && errno == EINVAL) 4728 err = bpf_get_map_info_from_fdinfo(map_fd, &map_info); 4729 if (err) { 4730 pr_warn("failed to get map info for map FD %d: %s\n", map_fd, 4731 libbpf_strerror_r(errno, msg, sizeof(msg))); 4732 return false; 4733 } 4734 4735 return (map_info.type == map->def.type && 4736 map_info.key_size == map->def.key_size && 4737 map_info.value_size == map->def.value_size && 4738 map_info.max_entries == map->def.max_entries && 4739 map_info.map_flags == map->def.map_flags && 4740 map_info.map_extra == map->map_extra); 4741 } 4742 4743 static int 4744 bpf_object__reuse_map(struct bpf_map *map) 4745 { 4746 char *cp, errmsg[STRERR_BUFSIZE]; 4747 int err, pin_fd; 4748 4749 pin_fd = bpf_obj_get(map->pin_path); 4750 if (pin_fd < 0) { 4751 err = -errno; 4752 if (err == -ENOENT) { 4753 pr_debug("found no pinned map to reuse at '%s'\n", 4754 map->pin_path); 4755 return 0; 4756 } 4757 4758 cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg)); 4759 pr_warn("couldn't retrieve pinned map '%s': %s\n", 4760 map->pin_path, cp); 4761 return err; 4762 } 4763 4764 if (!map_is_reuse_compat(map, pin_fd)) { 4765 pr_warn("couldn't reuse pinned map at '%s': parameter mismatch\n", 4766 map->pin_path); 4767 close(pin_fd); 4768 return -EINVAL; 4769 } 4770 4771 err = bpf_map__reuse_fd(map, pin_fd); 4772 close(pin_fd); 4773 if (err) 4774 return err; 4775 4776 map->pinned = true; 4777 pr_debug("reused pinned map at '%s'\n", map->pin_path); 4778 4779 return 0; 4780 } 4781 4782 static int 4783 bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map) 4784 { 4785 enum libbpf_map_type map_type = map->libbpf_type; 4786 char *cp, errmsg[STRERR_BUFSIZE]; 4787 int err, zero = 0; 4788 4789 if (obj->gen_loader) { 4790 bpf_gen__map_update_elem(obj->gen_loader, map - obj->maps, 4791 map->mmaped, map->def.value_size); 4792 if (map_type == LIBBPF_MAP_RODATA || map_type == LIBBPF_MAP_KCONFIG) 4793 bpf_gen__map_freeze(obj->gen_loader, map - obj->maps); 4794 return 0; 4795 } 4796 err = bpf_map_update_elem(map->fd, &zero, map->mmaped, 0); 4797 if (err) { 4798 err = -errno; 4799 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); 4800 pr_warn("Error setting initial map(%s) contents: %s\n", 4801 map->name, cp); 4802 return err; 4803 } 4804 4805 /* Freeze .rodata and .kconfig map as read-only from syscall side. */ 4806 if (map_type == LIBBPF_MAP_RODATA || map_type == LIBBPF_MAP_KCONFIG) { 4807 err = bpf_map_freeze(map->fd); 4808 if (err) { 4809 err = -errno; 4810 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); 4811 pr_warn("Error freezing map(%s) as read-only: %s\n", 4812 map->name, cp); 4813 return err; 4814 } 4815 } 4816 return 0; 4817 } 4818 4819 static void bpf_map__destroy(struct bpf_map *map); 4820 4821 static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, bool is_inner) 4822 { 4823 LIBBPF_OPTS(bpf_map_create_opts, create_attr); 4824 struct bpf_map_def *def = &map->def; 4825 const char *map_name = NULL; 4826 int err = 0; 4827 4828 if (kernel_supports(obj, FEAT_PROG_NAME)) 4829 map_name = map->name; 4830 create_attr.map_ifindex = map->map_ifindex; 4831 create_attr.map_flags = def->map_flags; 4832 create_attr.numa_node = map->numa_node; 4833 create_attr.map_extra = map->map_extra; 4834 create_attr.token_fd = obj->token_fd; 4835 4836 if (bpf_map__is_struct_ops(map)) 4837 create_attr.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id; 4838 4839 if (obj->btf && btf__fd(obj->btf) >= 0) { 4840 create_attr.btf_fd = btf__fd(obj->btf); 4841 create_attr.btf_key_type_id = map->btf_key_type_id; 4842 create_attr.btf_value_type_id = map->btf_value_type_id; 4843 } 4844 4845 if (bpf_map_type__is_map_in_map(def->type)) { 4846 if (map->inner_map) { 4847 err = bpf_object__create_map(obj, map->inner_map, true); 4848 if (err) { 4849 pr_warn("map '%s': failed to create inner map: %d\n", 4850 map->name, err); 4851 return err; 4852 } 4853 map->inner_map_fd = bpf_map__fd(map->inner_map); 4854 } 4855 if (map->inner_map_fd >= 0) 4856 create_attr.inner_map_fd = map->inner_map_fd; 4857 } 4858 4859 switch (def->type) { 4860 case BPF_MAP_TYPE_PERF_EVENT_ARRAY: 4861 case BPF_MAP_TYPE_CGROUP_ARRAY: 4862 case BPF_MAP_TYPE_STACK_TRACE: 4863 case BPF_MAP_TYPE_ARRAY_OF_MAPS: 4864 case BPF_MAP_TYPE_HASH_OF_MAPS: 4865 case BPF_MAP_TYPE_DEVMAP: 4866 case BPF_MAP_TYPE_DEVMAP_HASH: 4867 case BPF_MAP_TYPE_CPUMAP: 4868 case BPF_MAP_TYPE_XSKMAP: 4869 case BPF_MAP_TYPE_SOCKMAP: 4870 case BPF_MAP_TYPE_SOCKHASH: 4871 case BPF_MAP_TYPE_QUEUE: 4872 case BPF_MAP_TYPE_STACK: 4873 create_attr.btf_fd = 0; 4874 create_attr.btf_key_type_id = 0; 4875 create_attr.btf_value_type_id = 0; 4876 map->btf_key_type_id = 0; 4877 map->btf_value_type_id = 0; 4878 default: 4879 break; 4880 } 4881 4882 if (obj->gen_loader) { 4883 bpf_gen__map_create(obj->gen_loader, def->type, map_name, 4884 def->key_size, def->value_size, def->max_entries, 4885 &create_attr, is_inner ? -1 : map - obj->maps); 4886 /* Pretend to have valid FD to pass various fd >= 0 checks. 4887 * This fd == 0 will not be used with any syscall and will be reset to -1 eventually. 4888 */ 4889 map->fd = 0; 4890 } else { 4891 map->fd = bpf_map_create(def->type, map_name, 4892 def->key_size, def->value_size, 4893 def->max_entries, &create_attr); 4894 } 4895 if (map->fd < 0 && (create_attr.btf_key_type_id || 4896 create_attr.btf_value_type_id)) { 4897 char *cp, errmsg[STRERR_BUFSIZE]; 4898 4899 err = -errno; 4900 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); 4901 pr_warn("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n", 4902 map->name, cp, err); 4903 create_attr.btf_fd = 0; 4904 create_attr.btf_key_type_id = 0; 4905 create_attr.btf_value_type_id = 0; 4906 map->btf_key_type_id = 0; 4907 map->btf_value_type_id = 0; 4908 map->fd = bpf_map_create(def->type, map_name, 4909 def->key_size, def->value_size, 4910 def->max_entries, &create_attr); 4911 } 4912 4913 err = map->fd < 0 ? -errno : 0; 4914 4915 if (bpf_map_type__is_map_in_map(def->type) && map->inner_map) { 4916 if (obj->gen_loader) 4917 map->inner_map->fd = -1; 4918 bpf_map__destroy(map->inner_map); 4919 zfree(&map->inner_map); 4920 } 4921 4922 return err; 4923 } 4924 4925 static int init_map_in_map_slots(struct bpf_object *obj, struct bpf_map *map) 4926 { 4927 const struct bpf_map *targ_map; 4928 unsigned int i; 4929 int fd, err = 0; 4930 4931 for (i = 0; i < map->init_slots_sz; i++) { 4932 if (!map->init_slots[i]) 4933 continue; 4934 4935 targ_map = map->init_slots[i]; 4936 fd = bpf_map__fd(targ_map); 4937 4938 if (obj->gen_loader) { 4939 bpf_gen__populate_outer_map(obj->gen_loader, 4940 map - obj->maps, i, 4941 targ_map - obj->maps); 4942 } else { 4943 err = bpf_map_update_elem(map->fd, &i, &fd, 0); 4944 } 4945 if (err) { 4946 err = -errno; 4947 pr_warn("map '%s': failed to initialize slot [%d] to map '%s' fd=%d: %d\n", 4948 map->name, i, targ_map->name, fd, err); 4949 return err; 4950 } 4951 pr_debug("map '%s': slot [%d] set to map '%s' fd=%d\n", 4952 map->name, i, targ_map->name, fd); 4953 } 4954 4955 zfree(&map->init_slots); 4956 map->init_slots_sz = 0; 4957 4958 return 0; 4959 } 4960 4961 static int init_prog_array_slots(struct bpf_object *obj, struct bpf_map *map) 4962 { 4963 const struct bpf_program *targ_prog; 4964 unsigned int i; 4965 int fd, err; 4966 4967 if (obj->gen_loader) 4968 return -ENOTSUP; 4969 4970 for (i = 0; i < map->init_slots_sz; i++) { 4971 if (!map->init_slots[i]) 4972 continue; 4973 4974 targ_prog = map->init_slots[i]; 4975 fd = bpf_program__fd(targ_prog); 4976 4977 err = bpf_map_update_elem(map->fd, &i, &fd, 0); 4978 if (err) { 4979 err = -errno; 4980 pr_warn("map '%s': failed to initialize slot [%d] to prog '%s' fd=%d: %d\n", 4981 map->name, i, targ_prog->name, fd, err); 4982 return err; 4983 } 4984 pr_debug("map '%s': slot [%d] set to prog '%s' fd=%d\n", 4985 map->name, i, targ_prog->name, fd); 4986 } 4987 4988 zfree(&map->init_slots); 4989 map->init_slots_sz = 0; 4990 4991 return 0; 4992 } 4993 4994 static int bpf_object_init_prog_arrays(struct bpf_object *obj) 4995 { 4996 struct bpf_map *map; 4997 int i, err; 4998 4999 for (i = 0; i < obj->nr_maps; i++) { 5000 map = &obj->maps[i]; 5001 5002 if (!map->init_slots_sz || map->def.type != BPF_MAP_TYPE_PROG_ARRAY) 5003 continue; 5004 5005 err = init_prog_array_slots(obj, map); 5006 if (err < 0) { 5007 zclose(map->fd); 5008 return err; 5009 } 5010 } 5011 return 0; 5012 } 5013 5014 static int map_set_def_max_entries(struct bpf_map *map) 5015 { 5016 if (map->def.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY && !map->def.max_entries) { 5017 int nr_cpus; 5018 5019 nr_cpus = libbpf_num_possible_cpus(); 5020 if (nr_cpus < 0) { 5021 pr_warn("map '%s': failed to determine number of system CPUs: %d\n", 5022 map->name, nr_cpus); 5023 return nr_cpus; 5024 } 5025 pr_debug("map '%s': setting size to %d\n", map->name, nr_cpus); 5026 map->def.max_entries = nr_cpus; 5027 } 5028 5029 return 0; 5030 } 5031 5032 static int 5033 bpf_object__create_maps(struct bpf_object *obj) 5034 { 5035 struct bpf_map *map; 5036 char *cp, errmsg[STRERR_BUFSIZE]; 5037 unsigned int i, j; 5038 int err; 5039 bool retried; 5040 5041 for (i = 0; i < obj->nr_maps; i++) { 5042 map = &obj->maps[i]; 5043 5044 /* To support old kernels, we skip creating global data maps 5045 * (.rodata, .data, .kconfig, etc); later on, during program 5046 * loading, if we detect that at least one of the to-be-loaded 5047 * programs is referencing any global data map, we'll error 5048 * out with program name and relocation index logged. 5049 * This approach allows to accommodate Clang emitting 5050 * unnecessary .rodata.str1.1 sections for string literals, 5051 * but also it allows to have CO-RE applications that use 5052 * global variables in some of BPF programs, but not others. 5053 * If those global variable-using programs are not loaded at 5054 * runtime due to bpf_program__set_autoload(prog, false), 5055 * bpf_object loading will succeed just fine even on old 5056 * kernels. 5057 */ 5058 if (bpf_map__is_internal(map) && !kernel_supports(obj, FEAT_GLOBAL_DATA)) 5059 map->autocreate = false; 5060 5061 if (!map->autocreate) { 5062 pr_debug("map '%s': skipped auto-creating...\n", map->name); 5063 continue; 5064 } 5065 5066 err = map_set_def_max_entries(map); 5067 if (err) 5068 goto err_out; 5069 5070 retried = false; 5071 retry: 5072 if (map->pin_path) { 5073 err = bpf_object__reuse_map(map); 5074 if (err) { 5075 pr_warn("map '%s': error reusing pinned map\n", 5076 map->name); 5077 goto err_out; 5078 } 5079 if (retried && map->fd < 0) { 5080 pr_warn("map '%s': cannot find pinned map\n", 5081 map->name); 5082 err = -ENOENT; 5083 goto err_out; 5084 } 5085 } 5086 5087 if (map->fd >= 0) { 5088 pr_debug("map '%s': skipping creation (preset fd=%d)\n", 5089 map->name, map->fd); 5090 } else { 5091 err = bpf_object__create_map(obj, map, false); 5092 if (err) 5093 goto err_out; 5094 5095 pr_debug("map '%s': created successfully, fd=%d\n", 5096 map->name, map->fd); 5097 5098 if (bpf_map__is_internal(map)) { 5099 err = bpf_object__populate_internal_map(obj, map); 5100 if (err < 0) { 5101 zclose(map->fd); 5102 goto err_out; 5103 } 5104 } 5105 5106 if (map->init_slots_sz && map->def.type != BPF_MAP_TYPE_PROG_ARRAY) { 5107 err = init_map_in_map_slots(obj, map); 5108 if (err < 0) { 5109 zclose(map->fd); 5110 goto err_out; 5111 } 5112 } 5113 } 5114 5115 if (map->pin_path && !map->pinned) { 5116 err = bpf_map__pin(map, NULL); 5117 if (err) { 5118 zclose(map->fd); 5119 if (!retried && err == -EEXIST) { 5120 retried = true; 5121 goto retry; 5122 } 5123 pr_warn("map '%s': failed to auto-pin at '%s': %d\n", 5124 map->name, map->pin_path, err); 5125 goto err_out; 5126 } 5127 } 5128 } 5129 5130 return 0; 5131 5132 err_out: 5133 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); 5134 pr_warn("map '%s': failed to create: %s(%d)\n", map->name, cp, err); 5135 pr_perm_msg(err); 5136 for (j = 0; j < i; j++) 5137 zclose(obj->maps[j].fd); 5138 return err; 5139 } 5140 5141 static bool bpf_core_is_flavor_sep(const char *s) 5142 { 5143 /* check X___Y name pattern, where X and Y are not underscores */ 5144 return s[0] != '_' && /* X */ 5145 s[1] == '_' && s[2] == '_' && s[3] == '_' && /* ___ */ 5146 s[4] != '_'; /* Y */ 5147 } 5148 5149 /* Given 'some_struct_name___with_flavor' return the length of a name prefix 5150 * before last triple underscore. Struct name part after last triple 5151 * underscore is ignored by BPF CO-RE relocation during relocation matching. 5152 */ 5153 size_t bpf_core_essential_name_len(const char *name) 5154 { 5155 size_t n = strlen(name); 5156 int i; 5157 5158 for (i = n - 5; i >= 0; i--) { 5159 if (bpf_core_is_flavor_sep(name + i)) 5160 return i + 1; 5161 } 5162 return n; 5163 } 5164 5165 void bpf_core_free_cands(struct bpf_core_cand_list *cands) 5166 { 5167 if (!cands) 5168 return; 5169 5170 free(cands->cands); 5171 free(cands); 5172 } 5173 5174 int bpf_core_add_cands(struct bpf_core_cand *local_cand, 5175 size_t local_essent_len, 5176 const struct btf *targ_btf, 5177 const char *targ_btf_name, 5178 int targ_start_id, 5179 struct bpf_core_cand_list *cands) 5180 { 5181 struct bpf_core_cand *new_cands, *cand; 5182 const struct btf_type *t, *local_t; 5183 const char *targ_name, *local_name; 5184 size_t targ_essent_len; 5185 int n, i; 5186 5187 local_t = btf__type_by_id(local_cand->btf, local_cand->id); 5188 local_name = btf__str_by_offset(local_cand->btf, local_t->name_off); 5189 5190 n = btf__type_cnt(targ_btf); 5191 for (i = targ_start_id; i < n; i++) { 5192 t = btf__type_by_id(targ_btf, i); 5193 if (!btf_kind_core_compat(t, local_t)) 5194 continue; 5195 5196 targ_name = btf__name_by_offset(targ_btf, t->name_off); 5197 if (str_is_empty(targ_name)) 5198 continue; 5199 5200 targ_essent_len = bpf_core_essential_name_len(targ_name); 5201 if (targ_essent_len != local_essent_len) 5202 continue; 5203 5204 if (strncmp(local_name, targ_name, local_essent_len) != 0) 5205 continue; 5206 5207 pr_debug("CO-RE relocating [%d] %s %s: found target candidate [%d] %s %s in [%s]\n", 5208 local_cand->id, btf_kind_str(local_t), 5209 local_name, i, btf_kind_str(t), targ_name, 5210 targ_btf_name); 5211 new_cands = libbpf_reallocarray(cands->cands, cands->len + 1, 5212 sizeof(*cands->cands)); 5213 if (!new_cands) 5214 return -ENOMEM; 5215 5216 cand = &new_cands[cands->len]; 5217 cand->btf = targ_btf; 5218 cand->id = i; 5219 5220 cands->cands = new_cands; 5221 cands->len++; 5222 } 5223 return 0; 5224 } 5225 5226 static int load_module_btfs(struct bpf_object *obj) 5227 { 5228 struct bpf_btf_info info; 5229 struct module_btf *mod_btf; 5230 struct btf *btf; 5231 char name[64]; 5232 __u32 id = 0, len; 5233 int err, fd; 5234 5235 if (obj->btf_modules_loaded) 5236 return 0; 5237 5238 if (obj->gen_loader) 5239 return 0; 5240 5241 /* don't do this again, even if we find no module BTFs */ 5242 obj->btf_modules_loaded = true; 5243 5244 /* kernel too old to support module BTFs */ 5245 if (!kernel_supports(obj, FEAT_MODULE_BTF)) 5246 return 0; 5247 5248 while (true) { 5249 err = bpf_btf_get_next_id(id, &id); 5250 if (err && errno == ENOENT) 5251 return 0; 5252 if (err && errno == EPERM) { 5253 pr_debug("skipping module BTFs loading, missing privileges\n"); 5254 return 0; 5255 } 5256 if (err) { 5257 err = -errno; 5258 pr_warn("failed to iterate BTF objects: %d\n", err); 5259 return err; 5260 } 5261 5262 fd = bpf_btf_get_fd_by_id(id); 5263 if (fd < 0) { 5264 if (errno == ENOENT) 5265 continue; /* expected race: BTF was unloaded */ 5266 err = -errno; 5267 pr_warn("failed to get BTF object #%d FD: %d\n", id, err); 5268 return err; 5269 } 5270 5271 len = sizeof(info); 5272 memset(&info, 0, sizeof(info)); 5273 info.name = ptr_to_u64(name); 5274 info.name_len = sizeof(name); 5275 5276 err = bpf_btf_get_info_by_fd(fd, &info, &len); 5277 if (err) { 5278 err = -errno; 5279 pr_warn("failed to get BTF object #%d info: %d\n", id, err); 5280 goto err_out; 5281 } 5282 5283 /* ignore non-module BTFs */ 5284 if (!info.kernel_btf || strcmp(name, "vmlinux") == 0) { 5285 close(fd); 5286 continue; 5287 } 5288 5289 btf = btf_get_from_fd(fd, obj->btf_vmlinux); 5290 err = libbpf_get_error(btf); 5291 if (err) { 5292 pr_warn("failed to load module [%s]'s BTF object #%d: %d\n", 5293 name, id, err); 5294 goto err_out; 5295 } 5296 5297 err = libbpf_ensure_mem((void **)&obj->btf_modules, &obj->btf_module_cap, 5298 sizeof(*obj->btf_modules), obj->btf_module_cnt + 1); 5299 if (err) 5300 goto err_out; 5301 5302 mod_btf = &obj->btf_modules[obj->btf_module_cnt++]; 5303 5304 mod_btf->btf = btf; 5305 mod_btf->id = id; 5306 mod_btf->fd = fd; 5307 mod_btf->name = strdup(name); 5308 if (!mod_btf->name) { 5309 err = -ENOMEM; 5310 goto err_out; 5311 } 5312 continue; 5313 5314 err_out: 5315 close(fd); 5316 return err; 5317 } 5318 5319 return 0; 5320 } 5321 5322 static struct bpf_core_cand_list * 5323 bpf_core_find_cands(struct bpf_object *obj, const struct btf *local_btf, __u32 local_type_id) 5324 { 5325 struct bpf_core_cand local_cand = {}; 5326 struct bpf_core_cand_list *cands; 5327 const struct btf *main_btf; 5328 const struct btf_type *local_t; 5329 const char *local_name; 5330 size_t local_essent_len; 5331 int err, i; 5332 5333 local_cand.btf = local_btf; 5334 local_cand.id = local_type_id; 5335 local_t = btf__type_by_id(local_btf, local_type_id); 5336 if (!local_t) 5337 return ERR_PTR(-EINVAL); 5338 5339 local_name = btf__name_by_offset(local_btf, local_t->name_off); 5340 if (str_is_empty(local_name)) 5341 return ERR_PTR(-EINVAL); 5342 local_essent_len = bpf_core_essential_name_len(local_name); 5343 5344 cands = calloc(1, sizeof(*cands)); 5345 if (!cands) 5346 return ERR_PTR(-ENOMEM); 5347 5348 /* Attempt to find target candidates in vmlinux BTF first */ 5349 main_btf = obj->btf_vmlinux_override ?: obj->btf_vmlinux; 5350 err = bpf_core_add_cands(&local_cand, local_essent_len, main_btf, "vmlinux", 1, cands); 5351 if (err) 5352 goto err_out; 5353 5354 /* if vmlinux BTF has any candidate, don't got for module BTFs */ 5355 if (cands->len) 5356 return cands; 5357 5358 /* if vmlinux BTF was overridden, don't attempt to load module BTFs */ 5359 if (obj->btf_vmlinux_override) 5360 return cands; 5361 5362 /* now look through module BTFs, trying to still find candidates */ 5363 err = load_module_btfs(obj); 5364 if (err) 5365 goto err_out; 5366 5367 for (i = 0; i < obj->btf_module_cnt; i++) { 5368 err = bpf_core_add_cands(&local_cand, local_essent_len, 5369 obj->btf_modules[i].btf, 5370 obj->btf_modules[i].name, 5371 btf__type_cnt(obj->btf_vmlinux), 5372 cands); 5373 if (err) 5374 goto err_out; 5375 } 5376 5377 return cands; 5378 err_out: 5379 bpf_core_free_cands(cands); 5380 return ERR_PTR(err); 5381 } 5382 5383 /* Check local and target types for compatibility. This check is used for 5384 * type-based CO-RE relocations and follow slightly different rules than 5385 * field-based relocations. This function assumes that root types were already 5386 * checked for name match. Beyond that initial root-level name check, names 5387 * are completely ignored. Compatibility rules are as follows: 5388 * - any two STRUCTs/UNIONs/FWDs/ENUMs/INTs are considered compatible, but 5389 * kind should match for local and target types (i.e., STRUCT is not 5390 * compatible with UNION); 5391 * - for ENUMs, the size is ignored; 5392 * - for INT, size and signedness are ignored; 5393 * - for ARRAY, dimensionality is ignored, element types are checked for 5394 * compatibility recursively; 5395 * - CONST/VOLATILE/RESTRICT modifiers are ignored; 5396 * - TYPEDEFs/PTRs are compatible if types they pointing to are compatible; 5397 * - FUNC_PROTOs are compatible if they have compatible signature: same 5398 * number of input args and compatible return and argument types. 5399 * These rules are not set in stone and probably will be adjusted as we get 5400 * more experience with using BPF CO-RE relocations. 5401 */ 5402 int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id, 5403 const struct btf *targ_btf, __u32 targ_id) 5404 { 5405 return __bpf_core_types_are_compat(local_btf, local_id, targ_btf, targ_id, 32); 5406 } 5407 5408 int bpf_core_types_match(const struct btf *local_btf, __u32 local_id, 5409 const struct btf *targ_btf, __u32 targ_id) 5410 { 5411 return __bpf_core_types_match(local_btf, local_id, targ_btf, targ_id, false, 32); 5412 } 5413 5414 static size_t bpf_core_hash_fn(const long key, void *ctx) 5415 { 5416 return key; 5417 } 5418 5419 static bool bpf_core_equal_fn(const long k1, const long k2, void *ctx) 5420 { 5421 return k1 == k2; 5422 } 5423 5424 static int record_relo_core(struct bpf_program *prog, 5425 const struct bpf_core_relo *core_relo, int insn_idx) 5426 { 5427 struct reloc_desc *relos, *relo; 5428 5429 relos = libbpf_reallocarray(prog->reloc_desc, 5430 prog->nr_reloc + 1, sizeof(*relos)); 5431 if (!relos) 5432 return -ENOMEM; 5433 relo = &relos[prog->nr_reloc]; 5434 relo->type = RELO_CORE; 5435 relo->insn_idx = insn_idx; 5436 relo->core_relo = core_relo; 5437 prog->reloc_desc = relos; 5438 prog->nr_reloc++; 5439 return 0; 5440 } 5441 5442 static const struct bpf_core_relo *find_relo_core(struct bpf_program *prog, int insn_idx) 5443 { 5444 struct reloc_desc *relo; 5445 int i; 5446 5447 for (i = 0; i < prog->nr_reloc; i++) { 5448 relo = &prog->reloc_desc[i]; 5449 if (relo->type != RELO_CORE || relo->insn_idx != insn_idx) 5450 continue; 5451 5452 return relo->core_relo; 5453 } 5454 5455 return NULL; 5456 } 5457 5458 static int bpf_core_resolve_relo(struct bpf_program *prog, 5459 const struct bpf_core_relo *relo, 5460 int relo_idx, 5461 const struct btf *local_btf, 5462 struct hashmap *cand_cache, 5463 struct bpf_core_relo_res *targ_res) 5464 { 5465 struct bpf_core_spec specs_scratch[3] = {}; 5466 struct bpf_core_cand_list *cands = NULL; 5467 const char *prog_name = prog->name; 5468 const struct btf_type *local_type; 5469 const char *local_name; 5470 __u32 local_id = relo->type_id; 5471 int err; 5472 5473 local_type = btf__type_by_id(local_btf, local_id); 5474 if (!local_type) 5475 return -EINVAL; 5476 5477 local_name = btf__name_by_offset(local_btf, local_type->name_off); 5478 if (!local_name) 5479 return -EINVAL; 5480 5481 if (relo->kind != BPF_CORE_TYPE_ID_LOCAL && 5482 !hashmap__find(cand_cache, local_id, &cands)) { 5483 cands = bpf_core_find_cands(prog->obj, local_btf, local_id); 5484 if (IS_ERR(cands)) { 5485 pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s %s: %ld\n", 5486 prog_name, relo_idx, local_id, btf_kind_str(local_type), 5487 local_name, PTR_ERR(cands)); 5488 return PTR_ERR(cands); 5489 } 5490 err = hashmap__set(cand_cache, local_id, cands, NULL, NULL); 5491 if (err) { 5492 bpf_core_free_cands(cands); 5493 return err; 5494 } 5495 } 5496 5497 return bpf_core_calc_relo_insn(prog_name, relo, relo_idx, local_btf, cands, specs_scratch, 5498 targ_res); 5499 } 5500 5501 static int 5502 bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path) 5503 { 5504 const struct btf_ext_info_sec *sec; 5505 struct bpf_core_relo_res targ_res; 5506 const struct bpf_core_relo *rec; 5507 const struct btf_ext_info *seg; 5508 struct hashmap_entry *entry; 5509 struct hashmap *cand_cache = NULL; 5510 struct bpf_program *prog; 5511 struct bpf_insn *insn; 5512 const char *sec_name; 5513 int i, err = 0, insn_idx, sec_idx, sec_num; 5514 5515 if (obj->btf_ext->core_relo_info.len == 0) 5516 return 0; 5517 5518 if (targ_btf_path) { 5519 obj->btf_vmlinux_override = btf__parse(targ_btf_path, NULL); 5520 err = libbpf_get_error(obj->btf_vmlinux_override); 5521 if (err) { 5522 pr_warn("failed to parse target BTF: %d\n", err); 5523 return err; 5524 } 5525 } 5526 5527 cand_cache = hashmap__new(bpf_core_hash_fn, bpf_core_equal_fn, NULL); 5528 if (IS_ERR(cand_cache)) { 5529 err = PTR_ERR(cand_cache); 5530 goto out; 5531 } 5532 5533 seg = &obj->btf_ext->core_relo_info; 5534 sec_num = 0; 5535 for_each_btf_ext_sec(seg, sec) { 5536 sec_idx = seg->sec_idxs[sec_num]; 5537 sec_num++; 5538 5539 sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off); 5540 if (str_is_empty(sec_name)) { 5541 err = -EINVAL; 5542 goto out; 5543 } 5544 5545 pr_debug("sec '%s': found %d CO-RE relocations\n", sec_name, sec->num_info); 5546 5547 for_each_btf_ext_rec(seg, sec, i, rec) { 5548 if (rec->insn_off % BPF_INSN_SZ) 5549 return -EINVAL; 5550 insn_idx = rec->insn_off / BPF_INSN_SZ; 5551 prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx); 5552 if (!prog) { 5553 /* When __weak subprog is "overridden" by another instance 5554 * of the subprog from a different object file, linker still 5555 * appends all the .BTF.ext info that used to belong to that 5556 * eliminated subprogram. 5557 * This is similar to what x86-64 linker does for relocations. 5558 * So just ignore such relocations just like we ignore 5559 * subprog instructions when discovering subprograms. 5560 */ 5561 pr_debug("sec '%s': skipping CO-RE relocation #%d for insn #%d belonging to eliminated weak subprogram\n", 5562 sec_name, i, insn_idx); 5563 continue; 5564 } 5565 /* no need to apply CO-RE relocation if the program is 5566 * not going to be loaded 5567 */ 5568 if (!prog->autoload) 5569 continue; 5570 5571 /* adjust insn_idx from section frame of reference to the local 5572 * program's frame of reference; (sub-)program code is not yet 5573 * relocated, so it's enough to just subtract in-section offset 5574 */ 5575 insn_idx = insn_idx - prog->sec_insn_off; 5576 if (insn_idx >= prog->insns_cnt) 5577 return -EINVAL; 5578 insn = &prog->insns[insn_idx]; 5579 5580 err = record_relo_core(prog, rec, insn_idx); 5581 if (err) { 5582 pr_warn("prog '%s': relo #%d: failed to record relocation: %d\n", 5583 prog->name, i, err); 5584 goto out; 5585 } 5586 5587 if (prog->obj->gen_loader) 5588 continue; 5589 5590 err = bpf_core_resolve_relo(prog, rec, i, obj->btf, cand_cache, &targ_res); 5591 if (err) { 5592 pr_warn("prog '%s': relo #%d: failed to relocate: %d\n", 5593 prog->name, i, err); 5594 goto out; 5595 } 5596 5597 err = bpf_core_patch_insn(prog->name, insn, insn_idx, rec, i, &targ_res); 5598 if (err) { 5599 pr_warn("prog '%s': relo #%d: failed to patch insn #%u: %d\n", 5600 prog->name, i, insn_idx, err); 5601 goto out; 5602 } 5603 } 5604 } 5605 5606 out: 5607 /* obj->btf_vmlinux and module BTFs are freed after object load */ 5608 btf__free(obj->btf_vmlinux_override); 5609 obj->btf_vmlinux_override = NULL; 5610 5611 if (!IS_ERR_OR_NULL(cand_cache)) { 5612 hashmap__for_each_entry(cand_cache, entry, i) { 5613 bpf_core_free_cands(entry->pvalue); 5614 } 5615 hashmap__free(cand_cache); 5616 } 5617 return err; 5618 } 5619 5620 /* base map load ldimm64 special constant, used also for log fixup logic */ 5621 #define POISON_LDIMM64_MAP_BASE 2001000000 5622 #define POISON_LDIMM64_MAP_PFX "200100" 5623 5624 static void poison_map_ldimm64(struct bpf_program *prog, int relo_idx, 5625 int insn_idx, struct bpf_insn *insn, 5626 int map_idx, const struct bpf_map *map) 5627 { 5628 int i; 5629 5630 pr_debug("prog '%s': relo #%d: poisoning insn #%d that loads map #%d '%s'\n", 5631 prog->name, relo_idx, insn_idx, map_idx, map->name); 5632 5633 /* we turn single ldimm64 into two identical invalid calls */ 5634 for (i = 0; i < 2; i++) { 5635 insn->code = BPF_JMP | BPF_CALL; 5636 insn->dst_reg = 0; 5637 insn->src_reg = 0; 5638 insn->off = 0; 5639 /* if this instruction is reachable (not a dead code), 5640 * verifier will complain with something like: 5641 * invalid func unknown#2001000123 5642 * where lower 123 is map index into obj->maps[] array 5643 */ 5644 insn->imm = POISON_LDIMM64_MAP_BASE + map_idx; 5645 5646 insn++; 5647 } 5648 } 5649 5650 /* unresolved kfunc call special constant, used also for log fixup logic */ 5651 #define POISON_CALL_KFUNC_BASE 2002000000 5652 #define POISON_CALL_KFUNC_PFX "2002" 5653 5654 static void poison_kfunc_call(struct bpf_program *prog, int relo_idx, 5655 int insn_idx, struct bpf_insn *insn, 5656 int ext_idx, const struct extern_desc *ext) 5657 { 5658 pr_debug("prog '%s': relo #%d: poisoning insn #%d that calls kfunc '%s'\n", 5659 prog->name, relo_idx, insn_idx, ext->name); 5660 5661 /* we turn kfunc call into invalid helper call with identifiable constant */ 5662 insn->code = BPF_JMP | BPF_CALL; 5663 insn->dst_reg = 0; 5664 insn->src_reg = 0; 5665 insn->off = 0; 5666 /* if this instruction is reachable (not a dead code), 5667 * verifier will complain with something like: 5668 * invalid func unknown#2001000123 5669 * where lower 123 is extern index into obj->externs[] array 5670 */ 5671 insn->imm = POISON_CALL_KFUNC_BASE + ext_idx; 5672 } 5673 5674 /* Relocate data references within program code: 5675 * - map references; 5676 * - global variable references; 5677 * - extern references. 5678 */ 5679 static int 5680 bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog) 5681 { 5682 int i; 5683 5684 for (i = 0; i < prog->nr_reloc; i++) { 5685 struct reloc_desc *relo = &prog->reloc_desc[i]; 5686 struct bpf_insn *insn = &prog->insns[relo->insn_idx]; 5687 const struct bpf_map *map; 5688 struct extern_desc *ext; 5689 5690 switch (relo->type) { 5691 case RELO_LD64: 5692 map = &obj->maps[relo->map_idx]; 5693 if (obj->gen_loader) { 5694 insn[0].src_reg = BPF_PSEUDO_MAP_IDX; 5695 insn[0].imm = relo->map_idx; 5696 } else if (map->autocreate) { 5697 insn[0].src_reg = BPF_PSEUDO_MAP_FD; 5698 insn[0].imm = map->fd; 5699 } else { 5700 poison_map_ldimm64(prog, i, relo->insn_idx, insn, 5701 relo->map_idx, map); 5702 } 5703 break; 5704 case RELO_DATA: 5705 map = &obj->maps[relo->map_idx]; 5706 insn[1].imm = insn[0].imm + relo->sym_off; 5707 if (obj->gen_loader) { 5708 insn[0].src_reg = BPF_PSEUDO_MAP_IDX_VALUE; 5709 insn[0].imm = relo->map_idx; 5710 } else if (map->autocreate) { 5711 insn[0].src_reg = BPF_PSEUDO_MAP_VALUE; 5712 insn[0].imm = map->fd; 5713 } else { 5714 poison_map_ldimm64(prog, i, relo->insn_idx, insn, 5715 relo->map_idx, map); 5716 } 5717 break; 5718 case RELO_EXTERN_LD64: 5719 ext = &obj->externs[relo->ext_idx]; 5720 if (ext->type == EXT_KCFG) { 5721 if (obj->gen_loader) { 5722 insn[0].src_reg = BPF_PSEUDO_MAP_IDX_VALUE; 5723 insn[0].imm = obj->kconfig_map_idx; 5724 } else { 5725 insn[0].src_reg = BPF_PSEUDO_MAP_VALUE; 5726 insn[0].imm = obj->maps[obj->kconfig_map_idx].fd; 5727 } 5728 insn[1].imm = ext->kcfg.data_off; 5729 } else /* EXT_KSYM */ { 5730 if (ext->ksym.type_id && ext->is_set) { /* typed ksyms */ 5731 insn[0].src_reg = BPF_PSEUDO_BTF_ID; 5732 insn[0].imm = ext->ksym.kernel_btf_id; 5733 insn[1].imm = ext->ksym.kernel_btf_obj_fd; 5734 } else { /* typeless ksyms or unresolved typed ksyms */ 5735 insn[0].imm = (__u32)ext->ksym.addr; 5736 insn[1].imm = ext->ksym.addr >> 32; 5737 } 5738 } 5739 break; 5740 case RELO_EXTERN_CALL: 5741 ext = &obj->externs[relo->ext_idx]; 5742 insn[0].src_reg = BPF_PSEUDO_KFUNC_CALL; 5743 if (ext->is_set) { 5744 insn[0].imm = ext->ksym.kernel_btf_id; 5745 insn[0].off = ext->ksym.btf_fd_idx; 5746 } else { /* unresolved weak kfunc call */ 5747 poison_kfunc_call(prog, i, relo->insn_idx, insn, 5748 relo->ext_idx, ext); 5749 } 5750 break; 5751 case RELO_SUBPROG_ADDR: 5752 if (insn[0].src_reg != BPF_PSEUDO_FUNC) { 5753 pr_warn("prog '%s': relo #%d: bad insn\n", 5754 prog->name, i); 5755 return -EINVAL; 5756 } 5757 /* handled already */ 5758 break; 5759 case RELO_CALL: 5760 /* handled already */ 5761 break; 5762 case RELO_CORE: 5763 /* will be handled by bpf_program_record_relos() */ 5764 break; 5765 default: 5766 pr_warn("prog '%s': relo #%d: bad relo type %d\n", 5767 prog->name, i, relo->type); 5768 return -EINVAL; 5769 } 5770 } 5771 5772 return 0; 5773 } 5774 5775 static int adjust_prog_btf_ext_info(const struct bpf_object *obj, 5776 const struct bpf_program *prog, 5777 const struct btf_ext_info *ext_info, 5778 void **prog_info, __u32 *prog_rec_cnt, 5779 __u32 *prog_rec_sz) 5780 { 5781 void *copy_start = NULL, *copy_end = NULL; 5782 void *rec, *rec_end, *new_prog_info; 5783 const struct btf_ext_info_sec *sec; 5784 size_t old_sz, new_sz; 5785 int i, sec_num, sec_idx, off_adj; 5786 5787 sec_num = 0; 5788 for_each_btf_ext_sec(ext_info, sec) { 5789 sec_idx = ext_info->sec_idxs[sec_num]; 5790 sec_num++; 5791 if (prog->sec_idx != sec_idx) 5792 continue; 5793 5794 for_each_btf_ext_rec(ext_info, sec, i, rec) { 5795 __u32 insn_off = *(__u32 *)rec / BPF_INSN_SZ; 5796 5797 if (insn_off < prog->sec_insn_off) 5798 continue; 5799 if (insn_off >= prog->sec_insn_off + prog->sec_insn_cnt) 5800 break; 5801 5802 if (!copy_start) 5803 copy_start = rec; 5804 copy_end = rec + ext_info->rec_size; 5805 } 5806 5807 if (!copy_start) 5808 return -ENOENT; 5809 5810 /* append func/line info of a given (sub-)program to the main 5811 * program func/line info 5812 */ 5813 old_sz = (size_t)(*prog_rec_cnt) * ext_info->rec_size; 5814 new_sz = old_sz + (copy_end - copy_start); 5815 new_prog_info = realloc(*prog_info, new_sz); 5816 if (!new_prog_info) 5817 return -ENOMEM; 5818 *prog_info = new_prog_info; 5819 *prog_rec_cnt = new_sz / ext_info->rec_size; 5820 memcpy(new_prog_info + old_sz, copy_start, copy_end - copy_start); 5821 5822 /* Kernel instruction offsets are in units of 8-byte 5823 * instructions, while .BTF.ext instruction offsets generated 5824 * by Clang are in units of bytes. So convert Clang offsets 5825 * into kernel offsets and adjust offset according to program 5826 * relocated position. 5827 */ 5828 off_adj = prog->sub_insn_off - prog->sec_insn_off; 5829 rec = new_prog_info + old_sz; 5830 rec_end = new_prog_info + new_sz; 5831 for (; rec < rec_end; rec += ext_info->rec_size) { 5832 __u32 *insn_off = rec; 5833 5834 *insn_off = *insn_off / BPF_INSN_SZ + off_adj; 5835 } 5836 *prog_rec_sz = ext_info->rec_size; 5837 return 0; 5838 } 5839 5840 return -ENOENT; 5841 } 5842 5843 static int 5844 reloc_prog_func_and_line_info(const struct bpf_object *obj, 5845 struct bpf_program *main_prog, 5846 const struct bpf_program *prog) 5847 { 5848 int err; 5849 5850 /* no .BTF.ext relocation if .BTF.ext is missing or kernel doesn't 5851 * supprot func/line info 5852 */ 5853 if (!obj->btf_ext || !kernel_supports(obj, FEAT_BTF_FUNC)) 5854 return 0; 5855 5856 /* only attempt func info relocation if main program's func_info 5857 * relocation was successful 5858 */ 5859 if (main_prog != prog && !main_prog->func_info) 5860 goto line_info; 5861 5862 err = adjust_prog_btf_ext_info(obj, prog, &obj->btf_ext->func_info, 5863 &main_prog->func_info, 5864 &main_prog->func_info_cnt, 5865 &main_prog->func_info_rec_size); 5866 if (err) { 5867 if (err != -ENOENT) { 5868 pr_warn("prog '%s': error relocating .BTF.ext function info: %d\n", 5869 prog->name, err); 5870 return err; 5871 } 5872 if (main_prog->func_info) { 5873 /* 5874 * Some info has already been found but has problem 5875 * in the last btf_ext reloc. Must have to error out. 5876 */ 5877 pr_warn("prog '%s': missing .BTF.ext function info.\n", prog->name); 5878 return err; 5879 } 5880 /* Have problem loading the very first info. Ignore the rest. */ 5881 pr_warn("prog '%s': missing .BTF.ext function info for the main program, skipping all of .BTF.ext func info.\n", 5882 prog->name); 5883 } 5884 5885 line_info: 5886 /* don't relocate line info if main program's relocation failed */ 5887 if (main_prog != prog && !main_prog->line_info) 5888 return 0; 5889 5890 err = adjust_prog_btf_ext_info(obj, prog, &obj->btf_ext->line_info, 5891 &main_prog->line_info, 5892 &main_prog->line_info_cnt, 5893 &main_prog->line_info_rec_size); 5894 if (err) { 5895 if (err != -ENOENT) { 5896 pr_warn("prog '%s': error relocating .BTF.ext line info: %d\n", 5897 prog->name, err); 5898 return err; 5899 } 5900 if (main_prog->line_info) { 5901 /* 5902 * Some info has already been found but has problem 5903 * in the last btf_ext reloc. Must have to error out. 5904 */ 5905 pr_warn("prog '%s': missing .BTF.ext line info.\n", prog->name); 5906 return err; 5907 } 5908 /* Have problem loading the very first info. Ignore the rest. */ 5909 pr_warn("prog '%s': missing .BTF.ext line info for the main program, skipping all of .BTF.ext line info.\n", 5910 prog->name); 5911 } 5912 return 0; 5913 } 5914 5915 static int cmp_relo_by_insn_idx(const void *key, const void *elem) 5916 { 5917 size_t insn_idx = *(const size_t *)key; 5918 const struct reloc_desc *relo = elem; 5919 5920 if (insn_idx == relo->insn_idx) 5921 return 0; 5922 return insn_idx < relo->insn_idx ? -1 : 1; 5923 } 5924 5925 static struct reloc_desc *find_prog_insn_relo(const struct bpf_program *prog, size_t insn_idx) 5926 { 5927 if (!prog->nr_reloc) 5928 return NULL; 5929 return bsearch(&insn_idx, prog->reloc_desc, prog->nr_reloc, 5930 sizeof(*prog->reloc_desc), cmp_relo_by_insn_idx); 5931 } 5932 5933 static int append_subprog_relos(struct bpf_program *main_prog, struct bpf_program *subprog) 5934 { 5935 int new_cnt = main_prog->nr_reloc + subprog->nr_reloc; 5936 struct reloc_desc *relos; 5937 int i; 5938 5939 if (main_prog == subprog) 5940 return 0; 5941 relos = libbpf_reallocarray(main_prog->reloc_desc, new_cnt, sizeof(*relos)); 5942 /* if new count is zero, reallocarray can return a valid NULL result; 5943 * in this case the previous pointer will be freed, so we *have to* 5944 * reassign old pointer to the new value (even if it's NULL) 5945 */ 5946 if (!relos && new_cnt) 5947 return -ENOMEM; 5948 if (subprog->nr_reloc) 5949 memcpy(relos + main_prog->nr_reloc, subprog->reloc_desc, 5950 sizeof(*relos) * subprog->nr_reloc); 5951 5952 for (i = main_prog->nr_reloc; i < new_cnt; i++) 5953 relos[i].insn_idx += subprog->sub_insn_off; 5954 /* After insn_idx adjustment the 'relos' array is still sorted 5955 * by insn_idx and doesn't break bsearch. 5956 */ 5957 main_prog->reloc_desc = relos; 5958 main_prog->nr_reloc = new_cnt; 5959 return 0; 5960 } 5961 5962 static int 5963 bpf_object__append_subprog_code(struct bpf_object *obj, struct bpf_program *main_prog, 5964 struct bpf_program *subprog) 5965 { 5966 struct bpf_insn *insns; 5967 size_t new_cnt; 5968 int err; 5969 5970 subprog->sub_insn_off = main_prog->insns_cnt; 5971 5972 new_cnt = main_prog->insns_cnt + subprog->insns_cnt; 5973 insns = libbpf_reallocarray(main_prog->insns, new_cnt, sizeof(*insns)); 5974 if (!insns) { 5975 pr_warn("prog '%s': failed to realloc prog code\n", main_prog->name); 5976 return -ENOMEM; 5977 } 5978 main_prog->insns = insns; 5979 main_prog->insns_cnt = new_cnt; 5980 5981 memcpy(main_prog->insns + subprog->sub_insn_off, subprog->insns, 5982 subprog->insns_cnt * sizeof(*insns)); 5983 5984 pr_debug("prog '%s': added %zu insns from sub-prog '%s'\n", 5985 main_prog->name, subprog->insns_cnt, subprog->name); 5986 5987 /* The subprog insns are now appended. Append its relos too. */ 5988 err = append_subprog_relos(main_prog, subprog); 5989 if (err) 5990 return err; 5991 return 0; 5992 } 5993 5994 static int 5995 bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog, 5996 struct bpf_program *prog) 5997 { 5998 size_t sub_insn_idx, insn_idx; 5999 struct bpf_program *subprog; 6000 struct reloc_desc *relo; 6001 struct bpf_insn *insn; 6002 int err; 6003 6004 err = reloc_prog_func_and_line_info(obj, main_prog, prog); 6005 if (err) 6006 return err; 6007 6008 for (insn_idx = 0; insn_idx < prog->sec_insn_cnt; insn_idx++) { 6009 insn = &main_prog->insns[prog->sub_insn_off + insn_idx]; 6010 if (!insn_is_subprog_call(insn) && !insn_is_pseudo_func(insn)) 6011 continue; 6012 6013 relo = find_prog_insn_relo(prog, insn_idx); 6014 if (relo && relo->type == RELO_EXTERN_CALL) 6015 /* kfunc relocations will be handled later 6016 * in bpf_object__relocate_data() 6017 */ 6018 continue; 6019 if (relo && relo->type != RELO_CALL && relo->type != RELO_SUBPROG_ADDR) { 6020 pr_warn("prog '%s': unexpected relo for insn #%zu, type %d\n", 6021 prog->name, insn_idx, relo->type); 6022 return -LIBBPF_ERRNO__RELOC; 6023 } 6024 if (relo) { 6025 /* sub-program instruction index is a combination of 6026 * an offset of a symbol pointed to by relocation and 6027 * call instruction's imm field; for global functions, 6028 * call always has imm = -1, but for static functions 6029 * relocation is against STT_SECTION and insn->imm 6030 * points to a start of a static function 6031 * 6032 * for subprog addr relocation, the relo->sym_off + insn->imm is 6033 * the byte offset in the corresponding section. 6034 */ 6035 if (relo->type == RELO_CALL) 6036 sub_insn_idx = relo->sym_off / BPF_INSN_SZ + insn->imm + 1; 6037 else 6038 sub_insn_idx = (relo->sym_off + insn->imm) / BPF_INSN_SZ; 6039 } else if (insn_is_pseudo_func(insn)) { 6040 /* 6041 * RELO_SUBPROG_ADDR relo is always emitted even if both 6042 * functions are in the same section, so it shouldn't reach here. 6043 */ 6044 pr_warn("prog '%s': missing subprog addr relo for insn #%zu\n", 6045 prog->name, insn_idx); 6046 return -LIBBPF_ERRNO__RELOC; 6047 } else { 6048 /* if subprogram call is to a static function within 6049 * the same ELF section, there won't be any relocation 6050 * emitted, but it also means there is no additional 6051 * offset necessary, insns->imm is relative to 6052 * instruction's original position within the section 6053 */ 6054 sub_insn_idx = prog->sec_insn_off + insn_idx + insn->imm + 1; 6055 } 6056 6057 /* we enforce that sub-programs should be in .text section */ 6058 subprog = find_prog_by_sec_insn(obj, obj->efile.text_shndx, sub_insn_idx); 6059 if (!subprog) { 6060 pr_warn("prog '%s': no .text section found yet sub-program call exists\n", 6061 prog->name); 6062 return -LIBBPF_ERRNO__RELOC; 6063 } 6064 6065 /* if it's the first call instruction calling into this 6066 * subprogram (meaning this subprog hasn't been processed 6067 * yet) within the context of current main program: 6068 * - append it at the end of main program's instructions blog; 6069 * - process is recursively, while current program is put on hold; 6070 * - if that subprogram calls some other not yet processes 6071 * subprogram, same thing will happen recursively until 6072 * there are no more unprocesses subprograms left to append 6073 * and relocate. 6074 */ 6075 if (subprog->sub_insn_off == 0) { 6076 err = bpf_object__append_subprog_code(obj, main_prog, subprog); 6077 if (err) 6078 return err; 6079 err = bpf_object__reloc_code(obj, main_prog, subprog); 6080 if (err) 6081 return err; 6082 } 6083 6084 /* main_prog->insns memory could have been re-allocated, so 6085 * calculate pointer again 6086 */ 6087 insn = &main_prog->insns[prog->sub_insn_off + insn_idx]; 6088 /* calculate correct instruction position within current main 6089 * prog; each main prog can have a different set of 6090 * subprograms appended (potentially in different order as 6091 * well), so position of any subprog can be different for 6092 * different main programs 6093 */ 6094 insn->imm = subprog->sub_insn_off - (prog->sub_insn_off + insn_idx) - 1; 6095 6096 pr_debug("prog '%s': insn #%zu relocated, imm %d points to subprog '%s' (now at %zu offset)\n", 6097 prog->name, insn_idx, insn->imm, subprog->name, subprog->sub_insn_off); 6098 } 6099 6100 return 0; 6101 } 6102 6103 /* 6104 * Relocate sub-program calls. 6105 * 6106 * Algorithm operates as follows. Each entry-point BPF program (referred to as 6107 * main prog) is processed separately. For each subprog (non-entry functions, 6108 * that can be called from either entry progs or other subprogs) gets their 6109 * sub_insn_off reset to zero. This serves as indicator that this subprogram 6110 * hasn't been yet appended and relocated within current main prog. Once its 6111 * relocated, sub_insn_off will point at the position within current main prog 6112 * where given subprog was appended. This will further be used to relocate all 6113 * the call instructions jumping into this subprog. 6114 * 6115 * We start with main program and process all call instructions. If the call 6116 * is into a subprog that hasn't been processed (i.e., subprog->sub_insn_off 6117 * is zero), subprog instructions are appended at the end of main program's 6118 * instruction array. Then main program is "put on hold" while we recursively 6119 * process newly appended subprogram. If that subprogram calls into another 6120 * subprogram that hasn't been appended, new subprogram is appended again to 6121 * the *main* prog's instructions (subprog's instructions are always left 6122 * untouched, as they need to be in unmodified state for subsequent main progs 6123 * and subprog instructions are always sent only as part of a main prog) and 6124 * the process continues recursively. Once all the subprogs called from a main 6125 * prog or any of its subprogs are appended (and relocated), all their 6126 * positions within finalized instructions array are known, so it's easy to 6127 * rewrite call instructions with correct relative offsets, corresponding to 6128 * desired target subprog. 6129 * 6130 * Its important to realize that some subprogs might not be called from some 6131 * main prog and any of its called/used subprogs. Those will keep their 6132 * subprog->sub_insn_off as zero at all times and won't be appended to current 6133 * main prog and won't be relocated within the context of current main prog. 6134 * They might still be used from other main progs later. 6135 * 6136 * Visually this process can be shown as below. Suppose we have two main 6137 * programs mainA and mainB and BPF object contains three subprogs: subA, 6138 * subB, and subC. mainA calls only subA, mainB calls only subC, but subA and 6139 * subC both call subB: 6140 * 6141 * +--------+ +-------+ 6142 * | v v | 6143 * +--+---+ +--+-+-+ +---+--+ 6144 * | subA | | subB | | subC | 6145 * +--+---+ +------+ +---+--+ 6146 * ^ ^ 6147 * | | 6148 * +---+-------+ +------+----+ 6149 * | mainA | | mainB | 6150 * +-----------+ +-----------+ 6151 * 6152 * We'll start relocating mainA, will find subA, append it and start 6153 * processing sub A recursively: 6154 * 6155 * +-----------+------+ 6156 * | mainA | subA | 6157 * +-----------+------+ 6158 * 6159 * At this point we notice that subB is used from subA, so we append it and 6160 * relocate (there are no further subcalls from subB): 6161 * 6162 * +-----------+------+------+ 6163 * | mainA | subA | subB | 6164 * +-----------+------+------+ 6165 * 6166 * At this point, we relocate subA calls, then go one level up and finish with 6167 * relocatin mainA calls. mainA is done. 6168 * 6169 * For mainB process is similar but results in different order. We start with 6170 * mainB and skip subA and subB, as mainB never calls them (at least 6171 * directly), but we see subC is needed, so we append and start processing it: 6172 * 6173 * +-----------+------+ 6174 * | mainB | subC | 6175 * +-----------+------+ 6176 * Now we see subC needs subB, so we go back to it, append and relocate it: 6177 * 6178 * +-----------+------+------+ 6179 * | mainB | subC | subB | 6180 * +-----------+------+------+ 6181 * 6182 * At this point we unwind recursion, relocate calls in subC, then in mainB. 6183 */ 6184 static int 6185 bpf_object__relocate_calls(struct bpf_object *obj, struct bpf_program *prog) 6186 { 6187 struct bpf_program *subprog; 6188 int i, err; 6189 6190 /* mark all subprogs as not relocated (yet) within the context of 6191 * current main program 6192 */ 6193 for (i = 0; i < obj->nr_programs; i++) { 6194 subprog = &obj->programs[i]; 6195 if (!prog_is_subprog(obj, subprog)) 6196 continue; 6197 6198 subprog->sub_insn_off = 0; 6199 } 6200 6201 err = bpf_object__reloc_code(obj, prog, prog); 6202 if (err) 6203 return err; 6204 6205 return 0; 6206 } 6207 6208 static void 6209 bpf_object__free_relocs(struct bpf_object *obj) 6210 { 6211 struct bpf_program *prog; 6212 int i; 6213 6214 /* free up relocation descriptors */ 6215 for (i = 0; i < obj->nr_programs; i++) { 6216 prog = &obj->programs[i]; 6217 zfree(&prog->reloc_desc); 6218 prog->nr_reloc = 0; 6219 } 6220 } 6221 6222 static int cmp_relocs(const void *_a, const void *_b) 6223 { 6224 const struct reloc_desc *a = _a; 6225 const struct reloc_desc *b = _b; 6226 6227 if (a->insn_idx != b->insn_idx) 6228 return a->insn_idx < b->insn_idx ? -1 : 1; 6229 6230 /* no two relocations should have the same insn_idx, but ... */ 6231 if (a->type != b->type) 6232 return a->type < b->type ? -1 : 1; 6233 6234 return 0; 6235 } 6236 6237 static void bpf_object__sort_relos(struct bpf_object *obj) 6238 { 6239 int i; 6240 6241 for (i = 0; i < obj->nr_programs; i++) { 6242 struct bpf_program *p = &obj->programs[i]; 6243 6244 if (!p->nr_reloc) 6245 continue; 6246 6247 qsort(p->reloc_desc, p->nr_reloc, sizeof(*p->reloc_desc), cmp_relocs); 6248 } 6249 } 6250 6251 static int 6252 bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) 6253 { 6254 struct bpf_program *prog; 6255 size_t i, j; 6256 int err; 6257 6258 if (obj->btf_ext) { 6259 err = bpf_object__relocate_core(obj, targ_btf_path); 6260 if (err) { 6261 pr_warn("failed to perform CO-RE relocations: %d\n", 6262 err); 6263 return err; 6264 } 6265 bpf_object__sort_relos(obj); 6266 } 6267 6268 /* Before relocating calls pre-process relocations and mark 6269 * few ld_imm64 instructions that points to subprogs. 6270 * Otherwise bpf_object__reloc_code() later would have to consider 6271 * all ld_imm64 insns as relocation candidates. That would 6272 * reduce relocation speed, since amount of find_prog_insn_relo() 6273 * would increase and most of them will fail to find a relo. 6274 */ 6275 for (i = 0; i < obj->nr_programs; i++) { 6276 prog = &obj->programs[i]; 6277 for (j = 0; j < prog->nr_reloc; j++) { 6278 struct reloc_desc *relo = &prog->reloc_desc[j]; 6279 struct bpf_insn *insn = &prog->insns[relo->insn_idx]; 6280 6281 /* mark the insn, so it's recognized by insn_is_pseudo_func() */ 6282 if (relo->type == RELO_SUBPROG_ADDR) 6283 insn[0].src_reg = BPF_PSEUDO_FUNC; 6284 } 6285 } 6286 6287 /* relocate subprogram calls and append used subprograms to main 6288 * programs; each copy of subprogram code needs to be relocated 6289 * differently for each main program, because its code location might 6290 * have changed. 6291 * Append subprog relos to main programs to allow data relos to be 6292 * processed after text is completely relocated. 6293 */ 6294 for (i = 0; i < obj->nr_programs; i++) { 6295 prog = &obj->programs[i]; 6296 /* sub-program's sub-calls are relocated within the context of 6297 * its main program only 6298 */ 6299 if (prog_is_subprog(obj, prog)) 6300 continue; 6301 if (!prog->autoload) 6302 continue; 6303 6304 err = bpf_object__relocate_calls(obj, prog); 6305 if (err) { 6306 pr_warn("prog '%s': failed to relocate calls: %d\n", 6307 prog->name, err); 6308 return err; 6309 } 6310 6311 /* Now, also append exception callback if it has not been done already. */ 6312 if (prog->exception_cb_idx >= 0) { 6313 struct bpf_program *subprog = &obj->programs[prog->exception_cb_idx]; 6314 6315 /* Calling exception callback directly is disallowed, which the 6316 * verifier will reject later. In case it was processed already, 6317 * we can skip this step, otherwise for all other valid cases we 6318 * have to append exception callback now. 6319 */ 6320 if (subprog->sub_insn_off == 0) { 6321 err = bpf_object__append_subprog_code(obj, prog, subprog); 6322 if (err) 6323 return err; 6324 err = bpf_object__reloc_code(obj, prog, subprog); 6325 if (err) 6326 return err; 6327 } 6328 } 6329 } 6330 /* Process data relos for main programs */ 6331 for (i = 0; i < obj->nr_programs; i++) { 6332 prog = &obj->programs[i]; 6333 if (prog_is_subprog(obj, prog)) 6334 continue; 6335 if (!prog->autoload) 6336 continue; 6337 err = bpf_object__relocate_data(obj, prog); 6338 if (err) { 6339 pr_warn("prog '%s': failed to relocate data references: %d\n", 6340 prog->name, err); 6341 return err; 6342 } 6343 } 6344 6345 return 0; 6346 } 6347 6348 static int bpf_object__collect_st_ops_relos(struct bpf_object *obj, 6349 Elf64_Shdr *shdr, Elf_Data *data); 6350 6351 static int bpf_object__collect_map_relos(struct bpf_object *obj, 6352 Elf64_Shdr *shdr, Elf_Data *data) 6353 { 6354 const int bpf_ptr_sz = 8, host_ptr_sz = sizeof(void *); 6355 int i, j, nrels, new_sz; 6356 const struct btf_var_secinfo *vi = NULL; 6357 const struct btf_type *sec, *var, *def; 6358 struct bpf_map *map = NULL, *targ_map = NULL; 6359 struct bpf_program *targ_prog = NULL; 6360 bool is_prog_array, is_map_in_map; 6361 const struct btf_member *member; 6362 const char *name, *mname, *type; 6363 unsigned int moff; 6364 Elf64_Sym *sym; 6365 Elf64_Rel *rel; 6366 void *tmp; 6367 6368 if (!obj->efile.btf_maps_sec_btf_id || !obj->btf) 6369 return -EINVAL; 6370 sec = btf__type_by_id(obj->btf, obj->efile.btf_maps_sec_btf_id); 6371 if (!sec) 6372 return -EINVAL; 6373 6374 nrels = shdr->sh_size / shdr->sh_entsize; 6375 for (i = 0; i < nrels; i++) { 6376 rel = elf_rel_by_idx(data, i); 6377 if (!rel) { 6378 pr_warn(".maps relo #%d: failed to get ELF relo\n", i); 6379 return -LIBBPF_ERRNO__FORMAT; 6380 } 6381 6382 sym = elf_sym_by_idx(obj, ELF64_R_SYM(rel->r_info)); 6383 if (!sym) { 6384 pr_warn(".maps relo #%d: symbol %zx not found\n", 6385 i, (size_t)ELF64_R_SYM(rel->r_info)); 6386 return -LIBBPF_ERRNO__FORMAT; 6387 } 6388 name = elf_sym_str(obj, sym->st_name) ?: "<?>"; 6389 6390 pr_debug(".maps relo #%d: for %zd value %zd rel->r_offset %zu name %d ('%s')\n", 6391 i, (ssize_t)(rel->r_info >> 32), (size_t)sym->st_value, 6392 (size_t)rel->r_offset, sym->st_name, name); 6393 6394 for (j = 0; j < obj->nr_maps; j++) { 6395 map = &obj->maps[j]; 6396 if (map->sec_idx != obj->efile.btf_maps_shndx) 6397 continue; 6398 6399 vi = btf_var_secinfos(sec) + map->btf_var_idx; 6400 if (vi->offset <= rel->r_offset && 6401 rel->r_offset + bpf_ptr_sz <= vi->offset + vi->size) 6402 break; 6403 } 6404 if (j == obj->nr_maps) { 6405 pr_warn(".maps relo #%d: cannot find map '%s' at rel->r_offset %zu\n", 6406 i, name, (size_t)rel->r_offset); 6407 return -EINVAL; 6408 } 6409 6410 is_map_in_map = bpf_map_type__is_map_in_map(map->def.type); 6411 is_prog_array = map->def.type == BPF_MAP_TYPE_PROG_ARRAY; 6412 type = is_map_in_map ? "map" : "prog"; 6413 if (is_map_in_map) { 6414 if (sym->st_shndx != obj->efile.btf_maps_shndx) { 6415 pr_warn(".maps relo #%d: '%s' isn't a BTF-defined map\n", 6416 i, name); 6417 return -LIBBPF_ERRNO__RELOC; 6418 } 6419 if (map->def.type == BPF_MAP_TYPE_HASH_OF_MAPS && 6420 map->def.key_size != sizeof(int)) { 6421 pr_warn(".maps relo #%d: hash-of-maps '%s' should have key size %zu.\n", 6422 i, map->name, sizeof(int)); 6423 return -EINVAL; 6424 } 6425 targ_map = bpf_object__find_map_by_name(obj, name); 6426 if (!targ_map) { 6427 pr_warn(".maps relo #%d: '%s' isn't a valid map reference\n", 6428 i, name); 6429 return -ESRCH; 6430 } 6431 } else if (is_prog_array) { 6432 targ_prog = bpf_object__find_program_by_name(obj, name); 6433 if (!targ_prog) { 6434 pr_warn(".maps relo #%d: '%s' isn't a valid program reference\n", 6435 i, name); 6436 return -ESRCH; 6437 } 6438 if (targ_prog->sec_idx != sym->st_shndx || 6439 targ_prog->sec_insn_off * 8 != sym->st_value || 6440 prog_is_subprog(obj, targ_prog)) { 6441 pr_warn(".maps relo #%d: '%s' isn't an entry-point program\n", 6442 i, name); 6443 return -LIBBPF_ERRNO__RELOC; 6444 } 6445 } else { 6446 return -EINVAL; 6447 } 6448 6449 var = btf__type_by_id(obj->btf, vi->type); 6450 def = skip_mods_and_typedefs(obj->btf, var->type, NULL); 6451 if (btf_vlen(def) == 0) 6452 return -EINVAL; 6453 member = btf_members(def) + btf_vlen(def) - 1; 6454 mname = btf__name_by_offset(obj->btf, member->name_off); 6455 if (strcmp(mname, "values")) 6456 return -EINVAL; 6457 6458 moff = btf_member_bit_offset(def, btf_vlen(def) - 1) / 8; 6459 if (rel->r_offset - vi->offset < moff) 6460 return -EINVAL; 6461 6462 moff = rel->r_offset - vi->offset - moff; 6463 /* here we use BPF pointer size, which is always 64 bit, as we 6464 * are parsing ELF that was built for BPF target 6465 */ 6466 if (moff % bpf_ptr_sz) 6467 return -EINVAL; 6468 moff /= bpf_ptr_sz; 6469 if (moff >= map->init_slots_sz) { 6470 new_sz = moff + 1; 6471 tmp = libbpf_reallocarray(map->init_slots, new_sz, host_ptr_sz); 6472 if (!tmp) 6473 return -ENOMEM; 6474 map->init_slots = tmp; 6475 memset(map->init_slots + map->init_slots_sz, 0, 6476 (new_sz - map->init_slots_sz) * host_ptr_sz); 6477 map->init_slots_sz = new_sz; 6478 } 6479 map->init_slots[moff] = is_map_in_map ? (void *)targ_map : (void *)targ_prog; 6480 6481 pr_debug(".maps relo #%d: map '%s' slot [%d] points to %s '%s'\n", 6482 i, map->name, moff, type, name); 6483 } 6484 6485 return 0; 6486 } 6487 6488 static int bpf_object__collect_relos(struct bpf_object *obj) 6489 { 6490 int i, err; 6491 6492 for (i = 0; i < obj->efile.sec_cnt; i++) { 6493 struct elf_sec_desc *sec_desc = &obj->efile.secs[i]; 6494 Elf64_Shdr *shdr; 6495 Elf_Data *data; 6496 int idx; 6497 6498 if (sec_desc->sec_type != SEC_RELO) 6499 continue; 6500 6501 shdr = sec_desc->shdr; 6502 data = sec_desc->data; 6503 idx = shdr->sh_info; 6504 6505 if (shdr->sh_type != SHT_REL) { 6506 pr_warn("internal error at %d\n", __LINE__); 6507 return -LIBBPF_ERRNO__INTERNAL; 6508 } 6509 6510 if (idx == obj->efile.st_ops_shndx || idx == obj->efile.st_ops_link_shndx) 6511 err = bpf_object__collect_st_ops_relos(obj, shdr, data); 6512 else if (idx == obj->efile.btf_maps_shndx) 6513 err = bpf_object__collect_map_relos(obj, shdr, data); 6514 else 6515 err = bpf_object__collect_prog_relos(obj, shdr, data); 6516 if (err) 6517 return err; 6518 } 6519 6520 bpf_object__sort_relos(obj); 6521 return 0; 6522 } 6523 6524 static bool insn_is_helper_call(struct bpf_insn *insn, enum bpf_func_id *func_id) 6525 { 6526 if (BPF_CLASS(insn->code) == BPF_JMP && 6527 BPF_OP(insn->code) == BPF_CALL && 6528 BPF_SRC(insn->code) == BPF_K && 6529 insn->src_reg == 0 && 6530 insn->dst_reg == 0) { 6531 *func_id = insn->imm; 6532 return true; 6533 } 6534 return false; 6535 } 6536 6537 static int bpf_object__sanitize_prog(struct bpf_object *obj, struct bpf_program *prog) 6538 { 6539 struct bpf_insn *insn = prog->insns; 6540 enum bpf_func_id func_id; 6541 int i; 6542 6543 if (obj->gen_loader) 6544 return 0; 6545 6546 for (i = 0; i < prog->insns_cnt; i++, insn++) { 6547 if (!insn_is_helper_call(insn, &func_id)) 6548 continue; 6549 6550 /* on kernels that don't yet support 6551 * bpf_probe_read_{kernel,user}[_str] helpers, fall back 6552 * to bpf_probe_read() which works well for old kernels 6553 */ 6554 switch (func_id) { 6555 case BPF_FUNC_probe_read_kernel: 6556 case BPF_FUNC_probe_read_user: 6557 if (!kernel_supports(obj, FEAT_PROBE_READ_KERN)) 6558 insn->imm = BPF_FUNC_probe_read; 6559 break; 6560 case BPF_FUNC_probe_read_kernel_str: 6561 case BPF_FUNC_probe_read_user_str: 6562 if (!kernel_supports(obj, FEAT_PROBE_READ_KERN)) 6563 insn->imm = BPF_FUNC_probe_read_str; 6564 break; 6565 default: 6566 break; 6567 } 6568 } 6569 return 0; 6570 } 6571 6572 static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attach_name, 6573 int *btf_obj_fd, int *btf_type_id); 6574 6575 /* this is called as prog->sec_def->prog_prepare_load_fn for libbpf-supported sec_defs */ 6576 static int libbpf_prepare_prog_load(struct bpf_program *prog, 6577 struct bpf_prog_load_opts *opts, long cookie) 6578 { 6579 enum sec_def_flags def = cookie; 6580 6581 /* old kernels might not support specifying expected_attach_type */ 6582 if ((def & SEC_EXP_ATTACH_OPT) && !kernel_supports(prog->obj, FEAT_EXP_ATTACH_TYPE)) 6583 opts->expected_attach_type = 0; 6584 6585 if (def & SEC_SLEEPABLE) 6586 opts->prog_flags |= BPF_F_SLEEPABLE; 6587 6588 if (prog->type == BPF_PROG_TYPE_XDP && (def & SEC_XDP_FRAGS)) 6589 opts->prog_flags |= BPF_F_XDP_HAS_FRAGS; 6590 6591 /* special check for usdt to use uprobe_multi link */ 6592 if ((def & SEC_USDT) && kernel_supports(prog->obj, FEAT_UPROBE_MULTI_LINK)) 6593 prog->expected_attach_type = BPF_TRACE_UPROBE_MULTI; 6594 6595 if ((def & SEC_ATTACH_BTF) && !prog->attach_btf_id) { 6596 int btf_obj_fd = 0, btf_type_id = 0, err; 6597 const char *attach_name; 6598 6599 attach_name = strchr(prog->sec_name, '/'); 6600 if (!attach_name) { 6601 /* if BPF program is annotated with just SEC("fentry") 6602 * (or similar) without declaratively specifying 6603 * target, then it is expected that target will be 6604 * specified with bpf_program__set_attach_target() at 6605 * runtime before BPF object load step. If not, then 6606 * there is nothing to load into the kernel as BPF 6607 * verifier won't be able to validate BPF program 6608 * correctness anyways. 6609 */ 6610 pr_warn("prog '%s': no BTF-based attach target is specified, use bpf_program__set_attach_target()\n", 6611 prog->name); 6612 return -EINVAL; 6613 } 6614 attach_name++; /* skip over / */ 6615 6616 err = libbpf_find_attach_btf_id(prog, attach_name, &btf_obj_fd, &btf_type_id); 6617 if (err) 6618 return err; 6619 6620 /* cache resolved BTF FD and BTF type ID in the prog */ 6621 prog->attach_btf_obj_fd = btf_obj_fd; 6622 prog->attach_btf_id = btf_type_id; 6623 6624 /* but by now libbpf common logic is not utilizing 6625 * prog->atach_btf_obj_fd/prog->attach_btf_id anymore because 6626 * this callback is called after opts were populated by 6627 * libbpf, so this callback has to update opts explicitly here 6628 */ 6629 opts->attach_btf_obj_fd = btf_obj_fd; 6630 opts->attach_btf_id = btf_type_id; 6631 } 6632 return 0; 6633 } 6634 6635 static void fixup_verifier_log(struct bpf_program *prog, char *buf, size_t buf_sz); 6636 6637 static int bpf_object_load_prog(struct bpf_object *obj, struct bpf_program *prog, 6638 struct bpf_insn *insns, int insns_cnt, 6639 const char *license, __u32 kern_version, int *prog_fd) 6640 { 6641 LIBBPF_OPTS(bpf_prog_load_opts, load_attr); 6642 const char *prog_name = NULL; 6643 char *cp, errmsg[STRERR_BUFSIZE]; 6644 size_t log_buf_size = 0; 6645 char *log_buf = NULL, *tmp; 6646 int btf_fd, ret, err; 6647 bool own_log_buf = true; 6648 __u32 log_level = prog->log_level; 6649 6650 if (prog->type == BPF_PROG_TYPE_UNSPEC) { 6651 /* 6652 * The program type must be set. Most likely we couldn't find a proper 6653 * section definition at load time, and thus we didn't infer the type. 6654 */ 6655 pr_warn("prog '%s': missing BPF prog type, check ELF section name '%s'\n", 6656 prog->name, prog->sec_name); 6657 return -EINVAL; 6658 } 6659 6660 if (!insns || !insns_cnt) 6661 return -EINVAL; 6662 6663 if (kernel_supports(obj, FEAT_PROG_NAME)) 6664 prog_name = prog->name; 6665 load_attr.attach_prog_fd = prog->attach_prog_fd; 6666 load_attr.attach_btf_obj_fd = prog->attach_btf_obj_fd; 6667 load_attr.attach_btf_id = prog->attach_btf_id; 6668 load_attr.kern_version = kern_version; 6669 load_attr.prog_ifindex = prog->prog_ifindex; 6670 load_attr.token_fd = obj->token_fd; 6671 6672 /* specify func_info/line_info only if kernel supports them */ 6673 btf_fd = bpf_object__btf_fd(obj); 6674 if (btf_fd >= 0 && kernel_supports(obj, FEAT_BTF_FUNC)) { 6675 load_attr.prog_btf_fd = btf_fd; 6676 load_attr.func_info = prog->func_info; 6677 load_attr.func_info_rec_size = prog->func_info_rec_size; 6678 load_attr.func_info_cnt = prog->func_info_cnt; 6679 load_attr.line_info = prog->line_info; 6680 load_attr.line_info_rec_size = prog->line_info_rec_size; 6681 load_attr.line_info_cnt = prog->line_info_cnt; 6682 } 6683 load_attr.log_level = log_level; 6684 load_attr.prog_flags = prog->prog_flags; 6685 load_attr.fd_array = obj->fd_array; 6686 6687 /* adjust load_attr if sec_def provides custom preload callback */ 6688 if (prog->sec_def && prog->sec_def->prog_prepare_load_fn) { 6689 err = prog->sec_def->prog_prepare_load_fn(prog, &load_attr, prog->sec_def->cookie); 6690 if (err < 0) { 6691 pr_warn("prog '%s': failed to prepare load attributes: %d\n", 6692 prog->name, err); 6693 return err; 6694 } 6695 insns = prog->insns; 6696 insns_cnt = prog->insns_cnt; 6697 } 6698 6699 /* allow prog_prepare_load_fn to change expected_attach_type */ 6700 load_attr.expected_attach_type = prog->expected_attach_type; 6701 6702 if (obj->gen_loader) { 6703 bpf_gen__prog_load(obj->gen_loader, prog->type, prog->name, 6704 license, insns, insns_cnt, &load_attr, 6705 prog - obj->programs); 6706 *prog_fd = -1; 6707 return 0; 6708 } 6709 6710 retry_load: 6711 /* if log_level is zero, we don't request logs initially even if 6712 * custom log_buf is specified; if the program load fails, then we'll 6713 * bump log_level to 1 and use either custom log_buf or we'll allocate 6714 * our own and retry the load to get details on what failed 6715 */ 6716 if (log_level) { 6717 if (prog->log_buf) { 6718 log_buf = prog->log_buf; 6719 log_buf_size = prog->log_size; 6720 own_log_buf = false; 6721 } else if (obj->log_buf) { 6722 log_buf = obj->log_buf; 6723 log_buf_size = obj->log_size; 6724 own_log_buf = false; 6725 } else { 6726 log_buf_size = max((size_t)BPF_LOG_BUF_SIZE, log_buf_size * 2); 6727 tmp = realloc(log_buf, log_buf_size); 6728 if (!tmp) { 6729 ret = -ENOMEM; 6730 goto out; 6731 } 6732 log_buf = tmp; 6733 log_buf[0] = '\0'; 6734 own_log_buf = true; 6735 } 6736 } 6737 6738 load_attr.log_buf = log_buf; 6739 load_attr.log_size = log_buf_size; 6740 load_attr.log_level = log_level; 6741 6742 ret = bpf_prog_load(prog->type, prog_name, license, insns, insns_cnt, &load_attr); 6743 if (ret >= 0) { 6744 if (log_level && own_log_buf) { 6745 pr_debug("prog '%s': -- BEGIN PROG LOAD LOG --\n%s-- END PROG LOAD LOG --\n", 6746 prog->name, log_buf); 6747 } 6748 6749 if (obj->has_rodata && kernel_supports(obj, FEAT_PROG_BIND_MAP)) { 6750 struct bpf_map *map; 6751 int i; 6752 6753 for (i = 0; i < obj->nr_maps; i++) { 6754 map = &prog->obj->maps[i]; 6755 if (map->libbpf_type != LIBBPF_MAP_RODATA) 6756 continue; 6757 6758 if (bpf_prog_bind_map(ret, bpf_map__fd(map), NULL)) { 6759 cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); 6760 pr_warn("prog '%s': failed to bind map '%s': %s\n", 6761 prog->name, map->real_name, cp); 6762 /* Don't fail hard if can't bind rodata. */ 6763 } 6764 } 6765 } 6766 6767 *prog_fd = ret; 6768 ret = 0; 6769 goto out; 6770 } 6771 6772 if (log_level == 0) { 6773 log_level = 1; 6774 goto retry_load; 6775 } 6776 /* On ENOSPC, increase log buffer size and retry, unless custom 6777 * log_buf is specified. 6778 * Be careful to not overflow u32, though. Kernel's log buf size limit 6779 * isn't part of UAPI so it can always be bumped to full 4GB. So don't 6780 * multiply by 2 unless we are sure we'll fit within 32 bits. 6781 * Currently, we'll get -EINVAL when we reach (UINT_MAX >> 2). 6782 */ 6783 if (own_log_buf && errno == ENOSPC && log_buf_size <= UINT_MAX / 2) 6784 goto retry_load; 6785 6786 ret = -errno; 6787 6788 /* post-process verifier log to improve error descriptions */ 6789 fixup_verifier_log(prog, log_buf, log_buf_size); 6790 6791 cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); 6792 pr_warn("prog '%s': BPF program load failed: %s\n", prog->name, cp); 6793 pr_perm_msg(ret); 6794 6795 if (own_log_buf && log_buf && log_buf[0] != '\0') { 6796 pr_warn("prog '%s': -- BEGIN PROG LOAD LOG --\n%s-- END PROG LOAD LOG --\n", 6797 prog->name, log_buf); 6798 } 6799 6800 out: 6801 if (own_log_buf) 6802 free(log_buf); 6803 return ret; 6804 } 6805 6806 static char *find_prev_line(char *buf, char *cur) 6807 { 6808 char *p; 6809 6810 if (cur == buf) /* end of a log buf */ 6811 return NULL; 6812 6813 p = cur - 1; 6814 while (p - 1 >= buf && *(p - 1) != '\n') 6815 p--; 6816 6817 return p; 6818 } 6819 6820 static void patch_log(char *buf, size_t buf_sz, size_t log_sz, 6821 char *orig, size_t orig_sz, const char *patch) 6822 { 6823 /* size of the remaining log content to the right from the to-be-replaced part */ 6824 size_t rem_sz = (buf + log_sz) - (orig + orig_sz); 6825 size_t patch_sz = strlen(patch); 6826 6827 if (patch_sz != orig_sz) { 6828 /* If patch line(s) are longer than original piece of verifier log, 6829 * shift log contents by (patch_sz - orig_sz) bytes to the right 6830 * starting from after to-be-replaced part of the log. 6831 * 6832 * If patch line(s) are shorter than original piece of verifier log, 6833 * shift log contents by (orig_sz - patch_sz) bytes to the left 6834 * starting from after to-be-replaced part of the log 6835 * 6836 * We need to be careful about not overflowing available 6837 * buf_sz capacity. If that's the case, we'll truncate the end 6838 * of the original log, as necessary. 6839 */ 6840 if (patch_sz > orig_sz) { 6841 if (orig + patch_sz >= buf + buf_sz) { 6842 /* patch is big enough to cover remaining space completely */ 6843 patch_sz -= (orig + patch_sz) - (buf + buf_sz) + 1; 6844 rem_sz = 0; 6845 } else if (patch_sz - orig_sz > buf_sz - log_sz) { 6846 /* patch causes part of remaining log to be truncated */ 6847 rem_sz -= (patch_sz - orig_sz) - (buf_sz - log_sz); 6848 } 6849 } 6850 /* shift remaining log to the right by calculated amount */ 6851 memmove(orig + patch_sz, orig + orig_sz, rem_sz); 6852 } 6853 6854 memcpy(orig, patch, patch_sz); 6855 } 6856 6857 static void fixup_log_failed_core_relo(struct bpf_program *prog, 6858 char *buf, size_t buf_sz, size_t log_sz, 6859 char *line1, char *line2, char *line3) 6860 { 6861 /* Expected log for failed and not properly guarded CO-RE relocation: 6862 * line1 -> 123: (85) call unknown#195896080 6863 * line2 -> invalid func unknown#195896080 6864 * line3 -> <anything else or end of buffer> 6865 * 6866 * "123" is the index of the instruction that was poisoned. We extract 6867 * instruction index to find corresponding CO-RE relocation and 6868 * replace this part of the log with more relevant information about 6869 * failed CO-RE relocation. 6870 */ 6871 const struct bpf_core_relo *relo; 6872 struct bpf_core_spec spec; 6873 char patch[512], spec_buf[256]; 6874 int insn_idx, err, spec_len; 6875 6876 if (sscanf(line1, "%d: (%*d) call unknown#195896080\n", &insn_idx) != 1) 6877 return; 6878 6879 relo = find_relo_core(prog, insn_idx); 6880 if (!relo) 6881 return; 6882 6883 err = bpf_core_parse_spec(prog->name, prog->obj->btf, relo, &spec); 6884 if (err) 6885 return; 6886 6887 spec_len = bpf_core_format_spec(spec_buf, sizeof(spec_buf), &spec); 6888 snprintf(patch, sizeof(patch), 6889 "%d: <invalid CO-RE relocation>\n" 6890 "failed to resolve CO-RE relocation %s%s\n", 6891 insn_idx, spec_buf, spec_len >= sizeof(spec_buf) ? "..." : ""); 6892 6893 patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch); 6894 } 6895 6896 static void fixup_log_missing_map_load(struct bpf_program *prog, 6897 char *buf, size_t buf_sz, size_t log_sz, 6898 char *line1, char *line2, char *line3) 6899 { 6900 /* Expected log for failed and not properly guarded map reference: 6901 * line1 -> 123: (85) call unknown#2001000345 6902 * line2 -> invalid func unknown#2001000345 6903 * line3 -> <anything else or end of buffer> 6904 * 6905 * "123" is the index of the instruction that was poisoned. 6906 * "345" in "2001000345" is a map index in obj->maps to fetch map name. 6907 */ 6908 struct bpf_object *obj = prog->obj; 6909 const struct bpf_map *map; 6910 int insn_idx, map_idx; 6911 char patch[128]; 6912 6913 if (sscanf(line1, "%d: (%*d) call unknown#%d\n", &insn_idx, &map_idx) != 2) 6914 return; 6915 6916 map_idx -= POISON_LDIMM64_MAP_BASE; 6917 if (map_idx < 0 || map_idx >= obj->nr_maps) 6918 return; 6919 map = &obj->maps[map_idx]; 6920 6921 snprintf(patch, sizeof(patch), 6922 "%d: <invalid BPF map reference>\n" 6923 "BPF map '%s' is referenced but wasn't created\n", 6924 insn_idx, map->name); 6925 6926 patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch); 6927 } 6928 6929 static void fixup_log_missing_kfunc_call(struct bpf_program *prog, 6930 char *buf, size_t buf_sz, size_t log_sz, 6931 char *line1, char *line2, char *line3) 6932 { 6933 /* Expected log for failed and not properly guarded kfunc call: 6934 * line1 -> 123: (85) call unknown#2002000345 6935 * line2 -> invalid func unknown#2002000345 6936 * line3 -> <anything else or end of buffer> 6937 * 6938 * "123" is the index of the instruction that was poisoned. 6939 * "345" in "2002000345" is an extern index in obj->externs to fetch kfunc name. 6940 */ 6941 struct bpf_object *obj = prog->obj; 6942 const struct extern_desc *ext; 6943 int insn_idx, ext_idx; 6944 char patch[128]; 6945 6946 if (sscanf(line1, "%d: (%*d) call unknown#%d\n", &insn_idx, &ext_idx) != 2) 6947 return; 6948 6949 ext_idx -= POISON_CALL_KFUNC_BASE; 6950 if (ext_idx < 0 || ext_idx >= obj->nr_extern) 6951 return; 6952 ext = &obj->externs[ext_idx]; 6953 6954 snprintf(patch, sizeof(patch), 6955 "%d: <invalid kfunc call>\n" 6956 "kfunc '%s' is referenced but wasn't resolved\n", 6957 insn_idx, ext->name); 6958 6959 patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch); 6960 } 6961 6962 static void fixup_verifier_log(struct bpf_program *prog, char *buf, size_t buf_sz) 6963 { 6964 /* look for familiar error patterns in last N lines of the log */ 6965 const size_t max_last_line_cnt = 10; 6966 char *prev_line, *cur_line, *next_line; 6967 size_t log_sz; 6968 int i; 6969 6970 if (!buf) 6971 return; 6972 6973 log_sz = strlen(buf) + 1; 6974 next_line = buf + log_sz - 1; 6975 6976 for (i = 0; i < max_last_line_cnt; i++, next_line = cur_line) { 6977 cur_line = find_prev_line(buf, next_line); 6978 if (!cur_line) 6979 return; 6980 6981 if (str_has_pfx(cur_line, "invalid func unknown#195896080\n")) { 6982 prev_line = find_prev_line(buf, cur_line); 6983 if (!prev_line) 6984 continue; 6985 6986 /* failed CO-RE relocation case */ 6987 fixup_log_failed_core_relo(prog, buf, buf_sz, log_sz, 6988 prev_line, cur_line, next_line); 6989 return; 6990 } else if (str_has_pfx(cur_line, "invalid func unknown#"POISON_LDIMM64_MAP_PFX)) { 6991 prev_line = find_prev_line(buf, cur_line); 6992 if (!prev_line) 6993 continue; 6994 6995 /* reference to uncreated BPF map */ 6996 fixup_log_missing_map_load(prog, buf, buf_sz, log_sz, 6997 prev_line, cur_line, next_line); 6998 return; 6999 } else if (str_has_pfx(cur_line, "invalid func unknown#"POISON_CALL_KFUNC_PFX)) { 7000 prev_line = find_prev_line(buf, cur_line); 7001 if (!prev_line) 7002 continue; 7003 7004 /* reference to unresolved kfunc */ 7005 fixup_log_missing_kfunc_call(prog, buf, buf_sz, log_sz, 7006 prev_line, cur_line, next_line); 7007 return; 7008 } 7009 } 7010 } 7011 7012 static int bpf_program_record_relos(struct bpf_program *prog) 7013 { 7014 struct bpf_object *obj = prog->obj; 7015 int i; 7016 7017 for (i = 0; i < prog->nr_reloc; i++) { 7018 struct reloc_desc *relo = &prog->reloc_desc[i]; 7019 struct extern_desc *ext = &obj->externs[relo->ext_idx]; 7020 int kind; 7021 7022 switch (relo->type) { 7023 case RELO_EXTERN_LD64: 7024 if (ext->type != EXT_KSYM) 7025 continue; 7026 kind = btf_is_var(btf__type_by_id(obj->btf, ext->btf_id)) ? 7027 BTF_KIND_VAR : BTF_KIND_FUNC; 7028 bpf_gen__record_extern(obj->gen_loader, ext->name, 7029 ext->is_weak, !ext->ksym.type_id, 7030 true, kind, relo->insn_idx); 7031 break; 7032 case RELO_EXTERN_CALL: 7033 bpf_gen__record_extern(obj->gen_loader, ext->name, 7034 ext->is_weak, false, false, BTF_KIND_FUNC, 7035 relo->insn_idx); 7036 break; 7037 case RELO_CORE: { 7038 struct bpf_core_relo cr = { 7039 .insn_off = relo->insn_idx * 8, 7040 .type_id = relo->core_relo->type_id, 7041 .access_str_off = relo->core_relo->access_str_off, 7042 .kind = relo->core_relo->kind, 7043 }; 7044 7045 bpf_gen__record_relo_core(obj->gen_loader, &cr); 7046 break; 7047 } 7048 default: 7049 continue; 7050 } 7051 } 7052 return 0; 7053 } 7054 7055 static int 7056 bpf_object__load_progs(struct bpf_object *obj, int log_level) 7057 { 7058 struct bpf_program *prog; 7059 size_t i; 7060 int err; 7061 7062 for (i = 0; i < obj->nr_programs; i++) { 7063 prog = &obj->programs[i]; 7064 err = bpf_object__sanitize_prog(obj, prog); 7065 if (err) 7066 return err; 7067 } 7068 7069 for (i = 0; i < obj->nr_programs; i++) { 7070 prog = &obj->programs[i]; 7071 if (prog_is_subprog(obj, prog)) 7072 continue; 7073 if (!prog->autoload) { 7074 pr_debug("prog '%s': skipped loading\n", prog->name); 7075 continue; 7076 } 7077 prog->log_level |= log_level; 7078 7079 if (obj->gen_loader) 7080 bpf_program_record_relos(prog); 7081 7082 err = bpf_object_load_prog(obj, prog, prog->insns, prog->insns_cnt, 7083 obj->license, obj->kern_version, &prog->fd); 7084 if (err) { 7085 pr_warn("prog '%s': failed to load: %d\n", prog->name, err); 7086 return err; 7087 } 7088 } 7089 7090 bpf_object__free_relocs(obj); 7091 return 0; 7092 } 7093 7094 static const struct bpf_sec_def *find_sec_def(const char *sec_name); 7095 7096 static int bpf_object_init_progs(struct bpf_object *obj, const struct bpf_object_open_opts *opts) 7097 { 7098 struct bpf_program *prog; 7099 int err; 7100 7101 bpf_object__for_each_program(prog, obj) { 7102 prog->sec_def = find_sec_def(prog->sec_name); 7103 if (!prog->sec_def) { 7104 /* couldn't guess, but user might manually specify */ 7105 pr_debug("prog '%s': unrecognized ELF section name '%s'\n", 7106 prog->name, prog->sec_name); 7107 continue; 7108 } 7109 7110 prog->type = prog->sec_def->prog_type; 7111 prog->expected_attach_type = prog->sec_def->expected_attach_type; 7112 7113 /* sec_def can have custom callback which should be called 7114 * after bpf_program is initialized to adjust its properties 7115 */ 7116 if (prog->sec_def->prog_setup_fn) { 7117 err = prog->sec_def->prog_setup_fn(prog, prog->sec_def->cookie); 7118 if (err < 0) { 7119 pr_warn("prog '%s': failed to initialize: %d\n", 7120 prog->name, err); 7121 return err; 7122 } 7123 } 7124 } 7125 7126 return 0; 7127 } 7128 7129 static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, size_t obj_buf_sz, 7130 const struct bpf_object_open_opts *opts) 7131 { 7132 const char *obj_name, *kconfig, *btf_tmp_path, *token_path; 7133 struct bpf_object *obj; 7134 char tmp_name[64]; 7135 int err, token_fd; 7136 char *log_buf; 7137 size_t log_size; 7138 __u32 log_level; 7139 7140 if (elf_version(EV_CURRENT) == EV_NONE) { 7141 pr_warn("failed to init libelf for %s\n", 7142 path ? : "(mem buf)"); 7143 return ERR_PTR(-LIBBPF_ERRNO__LIBELF); 7144 } 7145 7146 if (!OPTS_VALID(opts, bpf_object_open_opts)) 7147 return ERR_PTR(-EINVAL); 7148 7149 obj_name = OPTS_GET(opts, object_name, NULL); 7150 if (obj_buf) { 7151 if (!obj_name) { 7152 snprintf(tmp_name, sizeof(tmp_name), "%lx-%lx", 7153 (unsigned long)obj_buf, 7154 (unsigned long)obj_buf_sz); 7155 obj_name = tmp_name; 7156 } 7157 path = obj_name; 7158 pr_debug("loading object '%s' from buffer\n", obj_name); 7159 } 7160 7161 log_buf = OPTS_GET(opts, kernel_log_buf, NULL); 7162 log_size = OPTS_GET(opts, kernel_log_size, 0); 7163 log_level = OPTS_GET(opts, kernel_log_level, 0); 7164 if (log_size > UINT_MAX) 7165 return ERR_PTR(-EINVAL); 7166 if (log_size && !log_buf) 7167 return ERR_PTR(-EINVAL); 7168 7169 token_path = OPTS_GET(opts, bpf_token_path, NULL); 7170 token_fd = OPTS_GET(opts, bpf_token_fd, -1); 7171 /* non-empty token path can't be combined with invalid token FD */ 7172 if (token_path && token_path[0] != '\0' && token_fd < 0) 7173 return ERR_PTR(-EINVAL); 7174 /* empty token path can't be combined with valid token FD */ 7175 if (token_path && token_path[0] == '\0' && token_fd > 0) 7176 return ERR_PTR(-EINVAL); 7177 /* if user didn't specify bpf_token_path/bpf_token_fd explicitly, 7178 * check if LIBBPF_BPF_TOKEN_PATH envvar was set and treat it as 7179 * bpf_token_path option 7180 */ 7181 if (token_fd == 0 && !token_path) 7182 token_path = getenv("LIBBPF_BPF_TOKEN_PATH"); 7183 /* empty token_path is equivalent to invalid token_fd */ 7184 if (token_path && token_path[0] == '\0') { 7185 token_path = NULL; 7186 token_fd = -1; 7187 } 7188 if (token_path && strlen(token_path) >= PATH_MAX) 7189 return ERR_PTR(-ENAMETOOLONG); 7190 7191 obj = bpf_object__new(path, obj_buf, obj_buf_sz, obj_name); 7192 if (IS_ERR(obj)) 7193 return obj; 7194 7195 obj->log_buf = log_buf; 7196 obj->log_size = log_size; 7197 obj->log_level = log_level; 7198 7199 obj->token_fd = token_fd <= 0 ? token_fd : dup_good_fd(token_fd); 7200 if (token_fd > 0 && obj->token_fd < 0) { 7201 err = -errno; 7202 goto out; 7203 } 7204 if (token_path) { 7205 obj->token_path = strdup(token_path); 7206 if (!obj->token_path) { 7207 err = -ENOMEM; 7208 goto out; 7209 } 7210 } 7211 7212 btf_tmp_path = OPTS_GET(opts, btf_custom_path, NULL); 7213 if (btf_tmp_path) { 7214 if (strlen(btf_tmp_path) >= PATH_MAX) { 7215 err = -ENAMETOOLONG; 7216 goto out; 7217 } 7218 obj->btf_custom_path = strdup(btf_tmp_path); 7219 if (!obj->btf_custom_path) { 7220 err = -ENOMEM; 7221 goto out; 7222 } 7223 } 7224 7225 kconfig = OPTS_GET(opts, kconfig, NULL); 7226 if (kconfig) { 7227 obj->kconfig = strdup(kconfig); 7228 if (!obj->kconfig) { 7229 err = -ENOMEM; 7230 goto out; 7231 } 7232 } 7233 7234 err = bpf_object__elf_init(obj); 7235 err = err ? : bpf_object__check_endianness(obj); 7236 err = err ? : bpf_object__elf_collect(obj); 7237 err = err ? : bpf_object__collect_externs(obj); 7238 err = err ? : bpf_object_fixup_btf(obj); 7239 err = err ? : bpf_object__init_maps(obj, opts); 7240 err = err ? : bpf_object_init_progs(obj, opts); 7241 err = err ? : bpf_object__collect_relos(obj); 7242 if (err) 7243 goto out; 7244 7245 bpf_object__elf_finish(obj); 7246 7247 return obj; 7248 out: 7249 bpf_object__close(obj); 7250 return ERR_PTR(err); 7251 } 7252 7253 struct bpf_object * 7254 bpf_object__open_file(const char *path, const struct bpf_object_open_opts *opts) 7255 { 7256 if (!path) 7257 return libbpf_err_ptr(-EINVAL); 7258 7259 pr_debug("loading %s\n", path); 7260 7261 return libbpf_ptr(bpf_object_open(path, NULL, 0, opts)); 7262 } 7263 7264 struct bpf_object *bpf_object__open(const char *path) 7265 { 7266 return bpf_object__open_file(path, NULL); 7267 } 7268 7269 struct bpf_object * 7270 bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz, 7271 const struct bpf_object_open_opts *opts) 7272 { 7273 if (!obj_buf || obj_buf_sz == 0) 7274 return libbpf_err_ptr(-EINVAL); 7275 7276 return libbpf_ptr(bpf_object_open(NULL, obj_buf, obj_buf_sz, opts)); 7277 } 7278 7279 static int bpf_object_unload(struct bpf_object *obj) 7280 { 7281 size_t i; 7282 7283 if (!obj) 7284 return libbpf_err(-EINVAL); 7285 7286 for (i = 0; i < obj->nr_maps; i++) { 7287 zclose(obj->maps[i].fd); 7288 if (obj->maps[i].st_ops) 7289 zfree(&obj->maps[i].st_ops->kern_vdata); 7290 } 7291 7292 for (i = 0; i < obj->nr_programs; i++) 7293 bpf_program__unload(&obj->programs[i]); 7294 7295 return 0; 7296 } 7297 7298 static int bpf_object__sanitize_maps(struct bpf_object *obj) 7299 { 7300 struct bpf_map *m; 7301 7302 bpf_object__for_each_map(m, obj) { 7303 if (!bpf_map__is_internal(m)) 7304 continue; 7305 if (!kernel_supports(obj, FEAT_ARRAY_MMAP)) 7306 m->def.map_flags &= ~BPF_F_MMAPABLE; 7307 } 7308 7309 return 0; 7310 } 7311 7312 int libbpf_kallsyms_parse(kallsyms_cb_t cb, void *ctx) 7313 { 7314 char sym_type, sym_name[500]; 7315 unsigned long long sym_addr; 7316 int ret, err = 0; 7317 FILE *f; 7318 7319 f = fopen("/proc/kallsyms", "re"); 7320 if (!f) { 7321 err = -errno; 7322 pr_warn("failed to open /proc/kallsyms: %d\n", err); 7323 return err; 7324 } 7325 7326 while (true) { 7327 ret = fscanf(f, "%llx %c %499s%*[^\n]\n", 7328 &sym_addr, &sym_type, sym_name); 7329 if (ret == EOF && feof(f)) 7330 break; 7331 if (ret != 3) { 7332 pr_warn("failed to read kallsyms entry: %d\n", ret); 7333 err = -EINVAL; 7334 break; 7335 } 7336 7337 err = cb(sym_addr, sym_type, sym_name, ctx); 7338 if (err) 7339 break; 7340 } 7341 7342 fclose(f); 7343 return err; 7344 } 7345 7346 static int kallsyms_cb(unsigned long long sym_addr, char sym_type, 7347 const char *sym_name, void *ctx) 7348 { 7349 struct bpf_object *obj = ctx; 7350 const struct btf_type *t; 7351 struct extern_desc *ext; 7352 7353 ext = find_extern_by_name(obj, sym_name); 7354 if (!ext || ext->type != EXT_KSYM) 7355 return 0; 7356 7357 t = btf__type_by_id(obj->btf, ext->btf_id); 7358 if (!btf_is_var(t)) 7359 return 0; 7360 7361 if (ext->is_set && ext->ksym.addr != sym_addr) { 7362 pr_warn("extern (ksym) '%s': resolution is ambiguous: 0x%llx or 0x%llx\n", 7363 sym_name, ext->ksym.addr, sym_addr); 7364 return -EINVAL; 7365 } 7366 if (!ext->is_set) { 7367 ext->is_set = true; 7368 ext->ksym.addr = sym_addr; 7369 pr_debug("extern (ksym) '%s': set to 0x%llx\n", sym_name, sym_addr); 7370 } 7371 return 0; 7372 } 7373 7374 static int bpf_object__read_kallsyms_file(struct bpf_object *obj) 7375 { 7376 return libbpf_kallsyms_parse(kallsyms_cb, obj); 7377 } 7378 7379 static int find_ksym_btf_id(struct bpf_object *obj, const char *ksym_name, 7380 __u16 kind, struct btf **res_btf, 7381 struct module_btf **res_mod_btf) 7382 { 7383 struct module_btf *mod_btf; 7384 struct btf *btf; 7385 int i, id, err; 7386 7387 btf = obj->btf_vmlinux; 7388 mod_btf = NULL; 7389 id = btf__find_by_name_kind(btf, ksym_name, kind); 7390 7391 if (id == -ENOENT) { 7392 err = load_module_btfs(obj); 7393 if (err) 7394 return err; 7395 7396 for (i = 0; i < obj->btf_module_cnt; i++) { 7397 /* we assume module_btf's BTF FD is always >0 */ 7398 mod_btf = &obj->btf_modules[i]; 7399 btf = mod_btf->btf; 7400 id = btf__find_by_name_kind_own(btf, ksym_name, kind); 7401 if (id != -ENOENT) 7402 break; 7403 } 7404 } 7405 if (id <= 0) 7406 return -ESRCH; 7407 7408 *res_btf = btf; 7409 *res_mod_btf = mod_btf; 7410 return id; 7411 } 7412 7413 static int bpf_object__resolve_ksym_var_btf_id(struct bpf_object *obj, 7414 struct extern_desc *ext) 7415 { 7416 const struct btf_type *targ_var, *targ_type; 7417 __u32 targ_type_id, local_type_id; 7418 struct module_btf *mod_btf = NULL; 7419 const char *targ_var_name; 7420 struct btf *btf = NULL; 7421 int id, err; 7422 7423 id = find_ksym_btf_id(obj, ext->name, BTF_KIND_VAR, &btf, &mod_btf); 7424 if (id < 0) { 7425 if (id == -ESRCH && ext->is_weak) 7426 return 0; 7427 pr_warn("extern (var ksym) '%s': not found in kernel BTF\n", 7428 ext->name); 7429 return id; 7430 } 7431 7432 /* find local type_id */ 7433 local_type_id = ext->ksym.type_id; 7434 7435 /* find target type_id */ 7436 targ_var = btf__type_by_id(btf, id); 7437 targ_var_name = btf__name_by_offset(btf, targ_var->name_off); 7438 targ_type = skip_mods_and_typedefs(btf, targ_var->type, &targ_type_id); 7439 7440 err = bpf_core_types_are_compat(obj->btf, local_type_id, 7441 btf, targ_type_id); 7442 if (err <= 0) { 7443 const struct btf_type *local_type; 7444 const char *targ_name, *local_name; 7445 7446 local_type = btf__type_by_id(obj->btf, local_type_id); 7447 local_name = btf__name_by_offset(obj->btf, local_type->name_off); 7448 targ_name = btf__name_by_offset(btf, targ_type->name_off); 7449 7450 pr_warn("extern (var ksym) '%s': incompatible types, expected [%d] %s %s, but kernel has [%d] %s %s\n", 7451 ext->name, local_type_id, 7452 btf_kind_str(local_type), local_name, targ_type_id, 7453 btf_kind_str(targ_type), targ_name); 7454 return -EINVAL; 7455 } 7456 7457 ext->is_set = true; 7458 ext->ksym.kernel_btf_obj_fd = mod_btf ? mod_btf->fd : 0; 7459 ext->ksym.kernel_btf_id = id; 7460 pr_debug("extern (var ksym) '%s': resolved to [%d] %s %s\n", 7461 ext->name, id, btf_kind_str(targ_var), targ_var_name); 7462 7463 return 0; 7464 } 7465 7466 static int bpf_object__resolve_ksym_func_btf_id(struct bpf_object *obj, 7467 struct extern_desc *ext) 7468 { 7469 int local_func_proto_id, kfunc_proto_id, kfunc_id; 7470 struct module_btf *mod_btf = NULL; 7471 const struct btf_type *kern_func; 7472 struct btf *kern_btf = NULL; 7473 int ret; 7474 7475 local_func_proto_id = ext->ksym.type_id; 7476 7477 kfunc_id = find_ksym_btf_id(obj, ext->essent_name ?: ext->name, BTF_KIND_FUNC, &kern_btf, 7478 &mod_btf); 7479 if (kfunc_id < 0) { 7480 if (kfunc_id == -ESRCH && ext->is_weak) 7481 return 0; 7482 pr_warn("extern (func ksym) '%s': not found in kernel or module BTFs\n", 7483 ext->name); 7484 return kfunc_id; 7485 } 7486 7487 kern_func = btf__type_by_id(kern_btf, kfunc_id); 7488 kfunc_proto_id = kern_func->type; 7489 7490 ret = bpf_core_types_are_compat(obj->btf, local_func_proto_id, 7491 kern_btf, kfunc_proto_id); 7492 if (ret <= 0) { 7493 if (ext->is_weak) 7494 return 0; 7495 7496 pr_warn("extern (func ksym) '%s': func_proto [%d] incompatible with %s [%d]\n", 7497 ext->name, local_func_proto_id, 7498 mod_btf ? mod_btf->name : "vmlinux", kfunc_proto_id); 7499 return -EINVAL; 7500 } 7501 7502 /* set index for module BTF fd in fd_array, if unset */ 7503 if (mod_btf && !mod_btf->fd_array_idx) { 7504 /* insn->off is s16 */ 7505 if (obj->fd_array_cnt == INT16_MAX) { 7506 pr_warn("extern (func ksym) '%s': module BTF fd index %d too big to fit in bpf_insn offset\n", 7507 ext->name, mod_btf->fd_array_idx); 7508 return -E2BIG; 7509 } 7510 /* Cannot use index 0 for module BTF fd */ 7511 if (!obj->fd_array_cnt) 7512 obj->fd_array_cnt = 1; 7513 7514 ret = libbpf_ensure_mem((void **)&obj->fd_array, &obj->fd_array_cap, sizeof(int), 7515 obj->fd_array_cnt + 1); 7516 if (ret) 7517 return ret; 7518 mod_btf->fd_array_idx = obj->fd_array_cnt; 7519 /* we assume module BTF FD is always >0 */ 7520 obj->fd_array[obj->fd_array_cnt++] = mod_btf->fd; 7521 } 7522 7523 ext->is_set = true; 7524 ext->ksym.kernel_btf_id = kfunc_id; 7525 ext->ksym.btf_fd_idx = mod_btf ? mod_btf->fd_array_idx : 0; 7526 /* Also set kernel_btf_obj_fd to make sure that bpf_object__relocate_data() 7527 * populates FD into ld_imm64 insn when it's used to point to kfunc. 7528 * {kernel_btf_id, btf_fd_idx} -> fixup bpf_call. 7529 * {kernel_btf_id, kernel_btf_obj_fd} -> fixup ld_imm64. 7530 */ 7531 ext->ksym.kernel_btf_obj_fd = mod_btf ? mod_btf->fd : 0; 7532 pr_debug("extern (func ksym) '%s': resolved to %s [%d]\n", 7533 ext->name, mod_btf ? mod_btf->name : "vmlinux", kfunc_id); 7534 7535 return 0; 7536 } 7537 7538 static int bpf_object__resolve_ksyms_btf_id(struct bpf_object *obj) 7539 { 7540 const struct btf_type *t; 7541 struct extern_desc *ext; 7542 int i, err; 7543 7544 for (i = 0; i < obj->nr_extern; i++) { 7545 ext = &obj->externs[i]; 7546 if (ext->type != EXT_KSYM || !ext->ksym.type_id) 7547 continue; 7548 7549 if (obj->gen_loader) { 7550 ext->is_set = true; 7551 ext->ksym.kernel_btf_obj_fd = 0; 7552 ext->ksym.kernel_btf_id = 0; 7553 continue; 7554 } 7555 t = btf__type_by_id(obj->btf, ext->btf_id); 7556 if (btf_is_var(t)) 7557 err = bpf_object__resolve_ksym_var_btf_id(obj, ext); 7558 else 7559 err = bpf_object__resolve_ksym_func_btf_id(obj, ext); 7560 if (err) 7561 return err; 7562 } 7563 return 0; 7564 } 7565 7566 static int bpf_object__resolve_externs(struct bpf_object *obj, 7567 const char *extra_kconfig) 7568 { 7569 bool need_config = false, need_kallsyms = false; 7570 bool need_vmlinux_btf = false; 7571 struct extern_desc *ext; 7572 void *kcfg_data = NULL; 7573 int err, i; 7574 7575 if (obj->nr_extern == 0) 7576 return 0; 7577 7578 if (obj->kconfig_map_idx >= 0) 7579 kcfg_data = obj->maps[obj->kconfig_map_idx].mmaped; 7580 7581 for (i = 0; i < obj->nr_extern; i++) { 7582 ext = &obj->externs[i]; 7583 7584 if (ext->type == EXT_KSYM) { 7585 if (ext->ksym.type_id) 7586 need_vmlinux_btf = true; 7587 else 7588 need_kallsyms = true; 7589 continue; 7590 } else if (ext->type == EXT_KCFG) { 7591 void *ext_ptr = kcfg_data + ext->kcfg.data_off; 7592 __u64 value = 0; 7593 7594 /* Kconfig externs need actual /proc/config.gz */ 7595 if (str_has_pfx(ext->name, "CONFIG_")) { 7596 need_config = true; 7597 continue; 7598 } 7599 7600 /* Virtual kcfg externs are customly handled by libbpf */ 7601 if (strcmp(ext->name, "LINUX_KERNEL_VERSION") == 0) { 7602 value = get_kernel_version(); 7603 if (!value) { 7604 pr_warn("extern (kcfg) '%s': failed to get kernel version\n", ext->name); 7605 return -EINVAL; 7606 } 7607 } else if (strcmp(ext->name, "LINUX_HAS_BPF_COOKIE") == 0) { 7608 value = kernel_supports(obj, FEAT_BPF_COOKIE); 7609 } else if (strcmp(ext->name, "LINUX_HAS_SYSCALL_WRAPPER") == 0) { 7610 value = kernel_supports(obj, FEAT_SYSCALL_WRAPPER); 7611 } else if (!str_has_pfx(ext->name, "LINUX_") || !ext->is_weak) { 7612 /* Currently libbpf supports only CONFIG_ and LINUX_ prefixed 7613 * __kconfig externs, where LINUX_ ones are virtual and filled out 7614 * customly by libbpf (their values don't come from Kconfig). 7615 * If LINUX_xxx variable is not recognized by libbpf, but is marked 7616 * __weak, it defaults to zero value, just like for CONFIG_xxx 7617 * externs. 7618 */ 7619 pr_warn("extern (kcfg) '%s': unrecognized virtual extern\n", ext->name); 7620 return -EINVAL; 7621 } 7622 7623 err = set_kcfg_value_num(ext, ext_ptr, value); 7624 if (err) 7625 return err; 7626 pr_debug("extern (kcfg) '%s': set to 0x%llx\n", 7627 ext->name, (long long)value); 7628 } else { 7629 pr_warn("extern '%s': unrecognized extern kind\n", ext->name); 7630 return -EINVAL; 7631 } 7632 } 7633 if (need_config && extra_kconfig) { 7634 err = bpf_object__read_kconfig_mem(obj, extra_kconfig, kcfg_data); 7635 if (err) 7636 return -EINVAL; 7637 need_config = false; 7638 for (i = 0; i < obj->nr_extern; i++) { 7639 ext = &obj->externs[i]; 7640 if (ext->type == EXT_KCFG && !ext->is_set) { 7641 need_config = true; 7642 break; 7643 } 7644 } 7645 } 7646 if (need_config) { 7647 err = bpf_object__read_kconfig_file(obj, kcfg_data); 7648 if (err) 7649 return -EINVAL; 7650 } 7651 if (need_kallsyms) { 7652 err = bpf_object__read_kallsyms_file(obj); 7653 if (err) 7654 return -EINVAL; 7655 } 7656 if (need_vmlinux_btf) { 7657 err = bpf_object__resolve_ksyms_btf_id(obj); 7658 if (err) 7659 return -EINVAL; 7660 } 7661 for (i = 0; i < obj->nr_extern; i++) { 7662 ext = &obj->externs[i]; 7663 7664 if (!ext->is_set && !ext->is_weak) { 7665 pr_warn("extern '%s' (strong): not resolved\n", ext->name); 7666 return -ESRCH; 7667 } else if (!ext->is_set) { 7668 pr_debug("extern '%s' (weak): not resolved, defaulting to zero\n", 7669 ext->name); 7670 } 7671 } 7672 7673 return 0; 7674 } 7675 7676 static void bpf_map_prepare_vdata(const struct bpf_map *map) 7677 { 7678 struct bpf_struct_ops *st_ops; 7679 __u32 i; 7680 7681 st_ops = map->st_ops; 7682 for (i = 0; i < btf_vlen(st_ops->type); i++) { 7683 struct bpf_program *prog = st_ops->progs[i]; 7684 void *kern_data; 7685 int prog_fd; 7686 7687 if (!prog) 7688 continue; 7689 7690 prog_fd = bpf_program__fd(prog); 7691 kern_data = st_ops->kern_vdata + st_ops->kern_func_off[i]; 7692 *(unsigned long *)kern_data = prog_fd; 7693 } 7694 } 7695 7696 static int bpf_object_prepare_struct_ops(struct bpf_object *obj) 7697 { 7698 int i; 7699 7700 for (i = 0; i < obj->nr_maps; i++) 7701 if (bpf_map__is_struct_ops(&obj->maps[i])) 7702 bpf_map_prepare_vdata(&obj->maps[i]); 7703 7704 return 0; 7705 } 7706 7707 static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const char *target_btf_path) 7708 { 7709 int err, i; 7710 7711 if (!obj) 7712 return libbpf_err(-EINVAL); 7713 7714 if (obj->loaded) { 7715 pr_warn("object '%s': load can't be attempted twice\n", obj->name); 7716 return libbpf_err(-EINVAL); 7717 } 7718 7719 if (obj->gen_loader) 7720 bpf_gen__init(obj->gen_loader, extra_log_level, obj->nr_programs, obj->nr_maps); 7721 7722 err = bpf_object_prepare_token(obj); 7723 err = err ? : bpf_object__probe_loading(obj); 7724 err = err ? : bpf_object__load_vmlinux_btf(obj, false); 7725 err = err ? : bpf_object__resolve_externs(obj, obj->kconfig); 7726 err = err ? : bpf_object__sanitize_and_load_btf(obj); 7727 err = err ? : bpf_object__sanitize_maps(obj); 7728 err = err ? : bpf_object__init_kern_struct_ops_maps(obj); 7729 err = err ? : bpf_object__create_maps(obj); 7730 err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : target_btf_path); 7731 err = err ? : bpf_object__load_progs(obj, extra_log_level); 7732 err = err ? : bpf_object_init_prog_arrays(obj); 7733 err = err ? : bpf_object_prepare_struct_ops(obj); 7734 7735 if (obj->gen_loader) { 7736 /* reset FDs */ 7737 if (obj->btf) 7738 btf__set_fd(obj->btf, -1); 7739 for (i = 0; i < obj->nr_maps; i++) 7740 obj->maps[i].fd = -1; 7741 if (!err) 7742 err = bpf_gen__finish(obj->gen_loader, obj->nr_programs, obj->nr_maps); 7743 } 7744 7745 /* clean up fd_array */ 7746 zfree(&obj->fd_array); 7747 7748 /* clean up module BTFs */ 7749 for (i = 0; i < obj->btf_module_cnt; i++) { 7750 close(obj->btf_modules[i].fd); 7751 btf__free(obj->btf_modules[i].btf); 7752 free(obj->btf_modules[i].name); 7753 } 7754 free(obj->btf_modules); 7755 7756 /* clean up vmlinux BTF */ 7757 btf__free(obj->btf_vmlinux); 7758 obj->btf_vmlinux = NULL; 7759 7760 obj->loaded = true; /* doesn't matter if successfully or not */ 7761 7762 if (err) 7763 goto out; 7764 7765 return 0; 7766 out: 7767 /* unpin any maps that were auto-pinned during load */ 7768 for (i = 0; i < obj->nr_maps; i++) 7769 if (obj->maps[i].pinned && !obj->maps[i].reused) 7770 bpf_map__unpin(&obj->maps[i], NULL); 7771 7772 bpf_object_unload(obj); 7773 pr_warn("failed to load object '%s'\n", obj->path); 7774 return libbpf_err(err); 7775 } 7776 7777 int bpf_object__load(struct bpf_object *obj) 7778 { 7779 return bpf_object_load(obj, 0, NULL); 7780 } 7781 7782 static int make_parent_dir(const char *path) 7783 { 7784 char *cp, errmsg[STRERR_BUFSIZE]; 7785 char *dname, *dir; 7786 int err = 0; 7787 7788 dname = strdup(path); 7789 if (dname == NULL) 7790 return -ENOMEM; 7791 7792 dir = dirname(dname); 7793 if (mkdir(dir, 0700) && errno != EEXIST) 7794 err = -errno; 7795 7796 free(dname); 7797 if (err) { 7798 cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg)); 7799 pr_warn("failed to mkdir %s: %s\n", path, cp); 7800 } 7801 return err; 7802 } 7803 7804 static int check_path(const char *path) 7805 { 7806 char *cp, errmsg[STRERR_BUFSIZE]; 7807 struct statfs st_fs; 7808 char *dname, *dir; 7809 int err = 0; 7810 7811 if (path == NULL) 7812 return -EINVAL; 7813 7814 dname = strdup(path); 7815 if (dname == NULL) 7816 return -ENOMEM; 7817 7818 dir = dirname(dname); 7819 if (statfs(dir, &st_fs)) { 7820 cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); 7821 pr_warn("failed to statfs %s: %s\n", dir, cp); 7822 err = -errno; 7823 } 7824 free(dname); 7825 7826 if (!err && st_fs.f_type != BPF_FS_MAGIC) { 7827 pr_warn("specified path %s is not on BPF FS\n", path); 7828 err = -EINVAL; 7829 } 7830 7831 return err; 7832 } 7833 7834 int bpf_program__pin(struct bpf_program *prog, const char *path) 7835 { 7836 char *cp, errmsg[STRERR_BUFSIZE]; 7837 int err; 7838 7839 if (prog->fd < 0) { 7840 pr_warn("prog '%s': can't pin program that wasn't loaded\n", prog->name); 7841 return libbpf_err(-EINVAL); 7842 } 7843 7844 err = make_parent_dir(path); 7845 if (err) 7846 return libbpf_err(err); 7847 7848 err = check_path(path); 7849 if (err) 7850 return libbpf_err(err); 7851 7852 if (bpf_obj_pin(prog->fd, path)) { 7853 err = -errno; 7854 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); 7855 pr_warn("prog '%s': failed to pin at '%s': %s\n", prog->name, path, cp); 7856 return libbpf_err(err); 7857 } 7858 7859 pr_debug("prog '%s': pinned at '%s'\n", prog->name, path); 7860 return 0; 7861 } 7862 7863 int bpf_program__unpin(struct bpf_program *prog, const char *path) 7864 { 7865 int err; 7866 7867 if (prog->fd < 0) { 7868 pr_warn("prog '%s': can't unpin program that wasn't loaded\n", prog->name); 7869 return libbpf_err(-EINVAL); 7870 } 7871 7872 err = check_path(path); 7873 if (err) 7874 return libbpf_err(err); 7875 7876 err = unlink(path); 7877 if (err) 7878 return libbpf_err(-errno); 7879 7880 pr_debug("prog '%s': unpinned from '%s'\n", prog->name, path); 7881 return 0; 7882 } 7883 7884 int bpf_map__pin(struct bpf_map *map, const char *path) 7885 { 7886 char *cp, errmsg[STRERR_BUFSIZE]; 7887 int err; 7888 7889 if (map == NULL) { 7890 pr_warn("invalid map pointer\n"); 7891 return libbpf_err(-EINVAL); 7892 } 7893 7894 if (map->pin_path) { 7895 if (path && strcmp(path, map->pin_path)) { 7896 pr_warn("map '%s' already has pin path '%s' different from '%s'\n", 7897 bpf_map__name(map), map->pin_path, path); 7898 return libbpf_err(-EINVAL); 7899 } else if (map->pinned) { 7900 pr_debug("map '%s' already pinned at '%s'; not re-pinning\n", 7901 bpf_map__name(map), map->pin_path); 7902 return 0; 7903 } 7904 } else { 7905 if (!path) { 7906 pr_warn("missing a path to pin map '%s' at\n", 7907 bpf_map__name(map)); 7908 return libbpf_err(-EINVAL); 7909 } else if (map->pinned) { 7910 pr_warn("map '%s' already pinned\n", bpf_map__name(map)); 7911 return libbpf_err(-EEXIST); 7912 } 7913 7914 map->pin_path = strdup(path); 7915 if (!map->pin_path) { 7916 err = -errno; 7917 goto out_err; 7918 } 7919 } 7920 7921 err = make_parent_dir(map->pin_path); 7922 if (err) 7923 return libbpf_err(err); 7924 7925 err = check_path(map->pin_path); 7926 if (err) 7927 return libbpf_err(err); 7928 7929 if (bpf_obj_pin(map->fd, map->pin_path)) { 7930 err = -errno; 7931 goto out_err; 7932 } 7933 7934 map->pinned = true; 7935 pr_debug("pinned map '%s'\n", map->pin_path); 7936 7937 return 0; 7938 7939 out_err: 7940 cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg)); 7941 pr_warn("failed to pin map: %s\n", cp); 7942 return libbpf_err(err); 7943 } 7944 7945 int bpf_map__unpin(struct bpf_map *map, const char *path) 7946 { 7947 int err; 7948 7949 if (map == NULL) { 7950 pr_warn("invalid map pointer\n"); 7951 return libbpf_err(-EINVAL); 7952 } 7953 7954 if (map->pin_path) { 7955 if (path && strcmp(path, map->pin_path)) { 7956 pr_warn("map '%s' already has pin path '%s' different from '%s'\n", 7957 bpf_map__name(map), map->pin_path, path); 7958 return libbpf_err(-EINVAL); 7959 } 7960 path = map->pin_path; 7961 } else if (!path) { 7962 pr_warn("no path to unpin map '%s' from\n", 7963 bpf_map__name(map)); 7964 return libbpf_err(-EINVAL); 7965 } 7966 7967 err = check_path(path); 7968 if (err) 7969 return libbpf_err(err); 7970 7971 err = unlink(path); 7972 if (err != 0) 7973 return libbpf_err(-errno); 7974 7975 map->pinned = false; 7976 pr_debug("unpinned map '%s' from '%s'\n", bpf_map__name(map), path); 7977 7978 return 0; 7979 } 7980 7981 int bpf_map__set_pin_path(struct bpf_map *map, const char *path) 7982 { 7983 char *new = NULL; 7984 7985 if (path) { 7986 new = strdup(path); 7987 if (!new) 7988 return libbpf_err(-errno); 7989 } 7990 7991 free(map->pin_path); 7992 map->pin_path = new; 7993 return 0; 7994 } 7995 7996 __alias(bpf_map__pin_path) 7997 const char *bpf_map__get_pin_path(const struct bpf_map *map); 7998 7999 const char *bpf_map__pin_path(const struct bpf_map *map) 8000 { 8001 return map->pin_path; 8002 } 8003 8004 bool bpf_map__is_pinned(const struct bpf_map *map) 8005 { 8006 return map->pinned; 8007 } 8008 8009 static void sanitize_pin_path(char *s) 8010 { 8011 /* bpffs disallows periods in path names */ 8012 while (*s) { 8013 if (*s == '.') 8014 *s = '_'; 8015 s++; 8016 } 8017 } 8018 8019 int bpf_object__pin_maps(struct bpf_object *obj, const char *path) 8020 { 8021 struct bpf_map *map; 8022 int err; 8023 8024 if (!obj) 8025 return libbpf_err(-ENOENT); 8026 8027 if (!obj->loaded) { 8028 pr_warn("object not yet loaded; load it first\n"); 8029 return libbpf_err(-ENOENT); 8030 } 8031 8032 bpf_object__for_each_map(map, obj) { 8033 char *pin_path = NULL; 8034 char buf[PATH_MAX]; 8035 8036 if (!map->autocreate) 8037 continue; 8038 8039 if (path) { 8040 err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map)); 8041 if (err) 8042 goto err_unpin_maps; 8043 sanitize_pin_path(buf); 8044 pin_path = buf; 8045 } else if (!map->pin_path) { 8046 continue; 8047 } 8048 8049 err = bpf_map__pin(map, pin_path); 8050 if (err) 8051 goto err_unpin_maps; 8052 } 8053 8054 return 0; 8055 8056 err_unpin_maps: 8057 while ((map = bpf_object__prev_map(obj, map))) { 8058 if (!map->pin_path) 8059 continue; 8060 8061 bpf_map__unpin(map, NULL); 8062 } 8063 8064 return libbpf_err(err); 8065 } 8066 8067 int bpf_object__unpin_maps(struct bpf_object *obj, const char *path) 8068 { 8069 struct bpf_map *map; 8070 int err; 8071 8072 if (!obj) 8073 return libbpf_err(-ENOENT); 8074 8075 bpf_object__for_each_map(map, obj) { 8076 char *pin_path = NULL; 8077 char buf[PATH_MAX]; 8078 8079 if (path) { 8080 err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map)); 8081 if (err) 8082 return libbpf_err(err); 8083 sanitize_pin_path(buf); 8084 pin_path = buf; 8085 } else if (!map->pin_path) { 8086 continue; 8087 } 8088 8089 err = bpf_map__unpin(map, pin_path); 8090 if (err) 8091 return libbpf_err(err); 8092 } 8093 8094 return 0; 8095 } 8096 8097 int bpf_object__pin_programs(struct bpf_object *obj, const char *path) 8098 { 8099 struct bpf_program *prog; 8100 char buf[PATH_MAX]; 8101 int err; 8102 8103 if (!obj) 8104 return libbpf_err(-ENOENT); 8105 8106 if (!obj->loaded) { 8107 pr_warn("object not yet loaded; load it first\n"); 8108 return libbpf_err(-ENOENT); 8109 } 8110 8111 bpf_object__for_each_program(prog, obj) { 8112 err = pathname_concat(buf, sizeof(buf), path, prog->name); 8113 if (err) 8114 goto err_unpin_programs; 8115 8116 err = bpf_program__pin(prog, buf); 8117 if (err) 8118 goto err_unpin_programs; 8119 } 8120 8121 return 0; 8122 8123 err_unpin_programs: 8124 while ((prog = bpf_object__prev_program(obj, prog))) { 8125 if (pathname_concat(buf, sizeof(buf), path, prog->name)) 8126 continue; 8127 8128 bpf_program__unpin(prog, buf); 8129 } 8130 8131 return libbpf_err(err); 8132 } 8133 8134 int bpf_object__unpin_programs(struct bpf_object *obj, const char *path) 8135 { 8136 struct bpf_program *prog; 8137 int err; 8138 8139 if (!obj) 8140 return libbpf_err(-ENOENT); 8141 8142 bpf_object__for_each_program(prog, obj) { 8143 char buf[PATH_MAX]; 8144 8145 err = pathname_concat(buf, sizeof(buf), path, prog->name); 8146 if (err) 8147 return libbpf_err(err); 8148 8149 err = bpf_program__unpin(prog, buf); 8150 if (err) 8151 return libbpf_err(err); 8152 } 8153 8154 return 0; 8155 } 8156 8157 int bpf_object__pin(struct bpf_object *obj, const char *path) 8158 { 8159 int err; 8160 8161 err = bpf_object__pin_maps(obj, path); 8162 if (err) 8163 return libbpf_err(err); 8164 8165 err = bpf_object__pin_programs(obj, path); 8166 if (err) { 8167 bpf_object__unpin_maps(obj, path); 8168 return libbpf_err(err); 8169 } 8170 8171 return 0; 8172 } 8173 8174 int bpf_object__unpin(struct bpf_object *obj, const char *path) 8175 { 8176 int err; 8177 8178 err = bpf_object__unpin_programs(obj, path); 8179 if (err) 8180 return libbpf_err(err); 8181 8182 err = bpf_object__unpin_maps(obj, path); 8183 if (err) 8184 return libbpf_err(err); 8185 8186 return 0; 8187 } 8188 8189 static void bpf_map__destroy(struct bpf_map *map) 8190 { 8191 if (map->inner_map) { 8192 bpf_map__destroy(map->inner_map); 8193 zfree(&map->inner_map); 8194 } 8195 8196 zfree(&map->init_slots); 8197 map->init_slots_sz = 0; 8198 8199 if (map->mmaped) { 8200 size_t mmap_sz; 8201 8202 mmap_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries); 8203 munmap(map->mmaped, mmap_sz); 8204 map->mmaped = NULL; 8205 } 8206 8207 if (map->st_ops) { 8208 zfree(&map->st_ops->data); 8209 zfree(&map->st_ops->progs); 8210 zfree(&map->st_ops->kern_func_off); 8211 zfree(&map->st_ops); 8212 } 8213 8214 zfree(&map->name); 8215 zfree(&map->real_name); 8216 zfree(&map->pin_path); 8217 8218 if (map->fd >= 0) 8219 zclose(map->fd); 8220 } 8221 8222 void bpf_object__close(struct bpf_object *obj) 8223 { 8224 size_t i; 8225 8226 if (IS_ERR_OR_NULL(obj)) 8227 return; 8228 8229 usdt_manager_free(obj->usdt_man); 8230 obj->usdt_man = NULL; 8231 8232 bpf_gen__free(obj->gen_loader); 8233 bpf_object__elf_finish(obj); 8234 bpf_object_unload(obj); 8235 btf__free(obj->btf); 8236 btf__free(obj->btf_vmlinux); 8237 btf_ext__free(obj->btf_ext); 8238 8239 for (i = 0; i < obj->nr_maps; i++) 8240 bpf_map__destroy(&obj->maps[i]); 8241 8242 zfree(&obj->btf_custom_path); 8243 zfree(&obj->kconfig); 8244 8245 for (i = 0; i < obj->nr_extern; i++) 8246 zfree(&obj->externs[i].essent_name); 8247 8248 zfree(&obj->externs); 8249 obj->nr_extern = 0; 8250 8251 zfree(&obj->maps); 8252 obj->nr_maps = 0; 8253 8254 if (obj->programs && obj->nr_programs) { 8255 for (i = 0; i < obj->nr_programs; i++) 8256 bpf_program__exit(&obj->programs[i]); 8257 } 8258 zfree(&obj->programs); 8259 8260 zfree(&obj->feat_cache); 8261 zfree(&obj->token_path); 8262 if (obj->token_fd > 0) 8263 close(obj->token_fd); 8264 8265 free(obj); 8266 } 8267 8268 const char *bpf_object__name(const struct bpf_object *obj) 8269 { 8270 return obj ? obj->name : libbpf_err_ptr(-EINVAL); 8271 } 8272 8273 unsigned int bpf_object__kversion(const struct bpf_object *obj) 8274 { 8275 return obj ? obj->kern_version : 0; 8276 } 8277 8278 struct btf *bpf_object__btf(const struct bpf_object *obj) 8279 { 8280 return obj ? obj->btf : NULL; 8281 } 8282 8283 int bpf_object__btf_fd(const struct bpf_object *obj) 8284 { 8285 return obj->btf ? btf__fd(obj->btf) : -1; 8286 } 8287 8288 int bpf_object__set_kversion(struct bpf_object *obj, __u32 kern_version) 8289 { 8290 if (obj->loaded) 8291 return libbpf_err(-EINVAL); 8292 8293 obj->kern_version = kern_version; 8294 8295 return 0; 8296 } 8297 8298 int bpf_object__gen_loader(struct bpf_object *obj, struct gen_loader_opts *opts) 8299 { 8300 struct bpf_gen *gen; 8301 8302 if (!opts) 8303 return -EFAULT; 8304 if (!OPTS_VALID(opts, gen_loader_opts)) 8305 return -EINVAL; 8306 gen = calloc(sizeof(*gen), 1); 8307 if (!gen) 8308 return -ENOMEM; 8309 gen->opts = opts; 8310 obj->gen_loader = gen; 8311 return 0; 8312 } 8313 8314 static struct bpf_program * 8315 __bpf_program__iter(const struct bpf_program *p, const struct bpf_object *obj, 8316 bool forward) 8317 { 8318 size_t nr_programs = obj->nr_programs; 8319 ssize_t idx; 8320 8321 if (!nr_programs) 8322 return NULL; 8323 8324 if (!p) 8325 /* Iter from the beginning */ 8326 return forward ? &obj->programs[0] : 8327 &obj->programs[nr_programs - 1]; 8328 8329 if (p->obj != obj) { 8330 pr_warn("error: program handler doesn't match object\n"); 8331 return errno = EINVAL, NULL; 8332 } 8333 8334 idx = (p - obj->programs) + (forward ? 1 : -1); 8335 if (idx >= obj->nr_programs || idx < 0) 8336 return NULL; 8337 return &obj->programs[idx]; 8338 } 8339 8340 struct bpf_program * 8341 bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prev) 8342 { 8343 struct bpf_program *prog = prev; 8344 8345 do { 8346 prog = __bpf_program__iter(prog, obj, true); 8347 } while (prog && prog_is_subprog(obj, prog)); 8348 8349 return prog; 8350 } 8351 8352 struct bpf_program * 8353 bpf_object__prev_program(const struct bpf_object *obj, struct bpf_program *next) 8354 { 8355 struct bpf_program *prog = next; 8356 8357 do { 8358 prog = __bpf_program__iter(prog, obj, false); 8359 } while (prog && prog_is_subprog(obj, prog)); 8360 8361 return prog; 8362 } 8363 8364 void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex) 8365 { 8366 prog->prog_ifindex = ifindex; 8367 } 8368 8369 const char *bpf_program__name(const struct bpf_program *prog) 8370 { 8371 return prog->name; 8372 } 8373 8374 const char *bpf_program__section_name(const struct bpf_program *prog) 8375 { 8376 return prog->sec_name; 8377 } 8378 8379 bool bpf_program__autoload(const struct bpf_program *prog) 8380 { 8381 return prog->autoload; 8382 } 8383 8384 int bpf_program__set_autoload(struct bpf_program *prog, bool autoload) 8385 { 8386 if (prog->obj->loaded) 8387 return libbpf_err(-EINVAL); 8388 8389 prog->autoload = autoload; 8390 return 0; 8391 } 8392 8393 bool bpf_program__autoattach(const struct bpf_program *prog) 8394 { 8395 return prog->autoattach; 8396 } 8397 8398 void bpf_program__set_autoattach(struct bpf_program *prog, bool autoattach) 8399 { 8400 prog->autoattach = autoattach; 8401 } 8402 8403 const struct bpf_insn *bpf_program__insns(const struct bpf_program *prog) 8404 { 8405 return prog->insns; 8406 } 8407 8408 size_t bpf_program__insn_cnt(const struct bpf_program *prog) 8409 { 8410 return prog->insns_cnt; 8411 } 8412 8413 int bpf_program__set_insns(struct bpf_program *prog, 8414 struct bpf_insn *new_insns, size_t new_insn_cnt) 8415 { 8416 struct bpf_insn *insns; 8417 8418 if (prog->obj->loaded) 8419 return -EBUSY; 8420 8421 insns = libbpf_reallocarray(prog->insns, new_insn_cnt, sizeof(*insns)); 8422 /* NULL is a valid return from reallocarray if the new count is zero */ 8423 if (!insns && new_insn_cnt) { 8424 pr_warn("prog '%s': failed to realloc prog code\n", prog->name); 8425 return -ENOMEM; 8426 } 8427 memcpy(insns, new_insns, new_insn_cnt * sizeof(*insns)); 8428 8429 prog->insns = insns; 8430 prog->insns_cnt = new_insn_cnt; 8431 return 0; 8432 } 8433 8434 int bpf_program__fd(const struct bpf_program *prog) 8435 { 8436 if (!prog) 8437 return libbpf_err(-EINVAL); 8438 8439 if (prog->fd < 0) 8440 return libbpf_err(-ENOENT); 8441 8442 return prog->fd; 8443 } 8444 8445 __alias(bpf_program__type) 8446 enum bpf_prog_type bpf_program__get_type(const struct bpf_program *prog); 8447 8448 enum bpf_prog_type bpf_program__type(const struct bpf_program *prog) 8449 { 8450 return prog->type; 8451 } 8452 8453 static size_t custom_sec_def_cnt; 8454 static struct bpf_sec_def *custom_sec_defs; 8455 static struct bpf_sec_def custom_fallback_def; 8456 static bool has_custom_fallback_def; 8457 static int last_custom_sec_def_handler_id; 8458 8459 int bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type) 8460 { 8461 if (prog->obj->loaded) 8462 return libbpf_err(-EBUSY); 8463 8464 /* if type is not changed, do nothing */ 8465 if (prog->type == type) 8466 return 0; 8467 8468 prog->type = type; 8469 8470 /* If a program type was changed, we need to reset associated SEC() 8471 * handler, as it will be invalid now. The only exception is a generic 8472 * fallback handler, which by definition is program type-agnostic and 8473 * is a catch-all custom handler, optionally set by the application, 8474 * so should be able to handle any type of BPF program. 8475 */ 8476 if (prog->sec_def != &custom_fallback_def) 8477 prog->sec_def = NULL; 8478 return 0; 8479 } 8480 8481 __alias(bpf_program__expected_attach_type) 8482 enum bpf_attach_type bpf_program__get_expected_attach_type(const struct bpf_program *prog); 8483 8484 enum bpf_attach_type bpf_program__expected_attach_type(const struct bpf_program *prog) 8485 { 8486 return prog->expected_attach_type; 8487 } 8488 8489 int bpf_program__set_expected_attach_type(struct bpf_program *prog, 8490 enum bpf_attach_type type) 8491 { 8492 if (prog->obj->loaded) 8493 return libbpf_err(-EBUSY); 8494 8495 prog->expected_attach_type = type; 8496 return 0; 8497 } 8498 8499 __u32 bpf_program__flags(const struct bpf_program *prog) 8500 { 8501 return prog->prog_flags; 8502 } 8503 8504 int bpf_program__set_flags(struct bpf_program *prog, __u32 flags) 8505 { 8506 if (prog->obj->loaded) 8507 return libbpf_err(-EBUSY); 8508 8509 prog->prog_flags = flags; 8510 return 0; 8511 } 8512 8513 __u32 bpf_program__log_level(const struct bpf_program *prog) 8514 { 8515 return prog->log_level; 8516 } 8517 8518 int bpf_program__set_log_level(struct bpf_program *prog, __u32 log_level) 8519 { 8520 if (prog->obj->loaded) 8521 return libbpf_err(-EBUSY); 8522 8523 prog->log_level = log_level; 8524 return 0; 8525 } 8526 8527 const char *bpf_program__log_buf(const struct bpf_program *prog, size_t *log_size) 8528 { 8529 *log_size = prog->log_size; 8530 return prog->log_buf; 8531 } 8532 8533 int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, size_t log_size) 8534 { 8535 if (log_size && !log_buf) 8536 return -EINVAL; 8537 if (prog->log_size > UINT_MAX) 8538 return -EINVAL; 8539 if (prog->obj->loaded) 8540 return -EBUSY; 8541 8542 prog->log_buf = log_buf; 8543 prog->log_size = log_size; 8544 return 0; 8545 } 8546 8547 #define SEC_DEF(sec_pfx, ptype, atype, flags, ...) { \ 8548 .sec = (char *)sec_pfx, \ 8549 .prog_type = BPF_PROG_TYPE_##ptype, \ 8550 .expected_attach_type = atype, \ 8551 .cookie = (long)(flags), \ 8552 .prog_prepare_load_fn = libbpf_prepare_prog_load, \ 8553 __VA_ARGS__ \ 8554 } 8555 8556 static int attach_kprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link); 8557 static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link); 8558 static int attach_ksyscall(const struct bpf_program *prog, long cookie, struct bpf_link **link); 8559 static int attach_usdt(const struct bpf_program *prog, long cookie, struct bpf_link **link); 8560 static int attach_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link); 8561 static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link); 8562 static int attach_trace(const struct bpf_program *prog, long cookie, struct bpf_link **link); 8563 static int attach_kprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link); 8564 static int attach_uprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link); 8565 static int attach_lsm(const struct bpf_program *prog, long cookie, struct bpf_link **link); 8566 static int attach_iter(const struct bpf_program *prog, long cookie, struct bpf_link **link); 8567 8568 static const struct bpf_sec_def section_defs[] = { 8569 SEC_DEF("socket", SOCKET_FILTER, 0, SEC_NONE), 8570 SEC_DEF("sk_reuseport/migrate", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, SEC_ATTACHABLE), 8571 SEC_DEF("sk_reuseport", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT, SEC_ATTACHABLE), 8572 SEC_DEF("kprobe+", KPROBE, 0, SEC_NONE, attach_kprobe), 8573 SEC_DEF("uprobe+", KPROBE, 0, SEC_NONE, attach_uprobe), 8574 SEC_DEF("uprobe.s+", KPROBE, 0, SEC_SLEEPABLE, attach_uprobe), 8575 SEC_DEF("kretprobe+", KPROBE, 0, SEC_NONE, attach_kprobe), 8576 SEC_DEF("uretprobe+", KPROBE, 0, SEC_NONE, attach_uprobe), 8577 SEC_DEF("uretprobe.s+", KPROBE, 0, SEC_SLEEPABLE, attach_uprobe), 8578 SEC_DEF("kprobe.multi+", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi), 8579 SEC_DEF("kretprobe.multi+", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi), 8580 SEC_DEF("uprobe.multi+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_NONE, attach_uprobe_multi), 8581 SEC_DEF("uretprobe.multi+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_NONE, attach_uprobe_multi), 8582 SEC_DEF("uprobe.multi.s+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_SLEEPABLE, attach_uprobe_multi), 8583 SEC_DEF("uretprobe.multi.s+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_SLEEPABLE, attach_uprobe_multi), 8584 SEC_DEF("ksyscall+", KPROBE, 0, SEC_NONE, attach_ksyscall), 8585 SEC_DEF("kretsyscall+", KPROBE, 0, SEC_NONE, attach_ksyscall), 8586 SEC_DEF("usdt+", KPROBE, 0, SEC_USDT, attach_usdt), 8587 SEC_DEF("usdt.s+", KPROBE, 0, SEC_USDT | SEC_SLEEPABLE, attach_usdt), 8588 SEC_DEF("tc/ingress", SCHED_CLS, BPF_TCX_INGRESS, SEC_NONE), /* alias for tcx */ 8589 SEC_DEF("tc/egress", SCHED_CLS, BPF_TCX_EGRESS, SEC_NONE), /* alias for tcx */ 8590 SEC_DEF("tcx/ingress", SCHED_CLS, BPF_TCX_INGRESS, SEC_NONE), 8591 SEC_DEF("tcx/egress", SCHED_CLS, BPF_TCX_EGRESS, SEC_NONE), 8592 SEC_DEF("tc", SCHED_CLS, 0, SEC_NONE), /* deprecated / legacy, use tcx */ 8593 SEC_DEF("classifier", SCHED_CLS, 0, SEC_NONE), /* deprecated / legacy, use tcx */ 8594 SEC_DEF("action", SCHED_ACT, 0, SEC_NONE), /* deprecated / legacy, use tcx */ 8595 SEC_DEF("netkit/primary", SCHED_CLS, BPF_NETKIT_PRIMARY, SEC_NONE), 8596 SEC_DEF("netkit/peer", SCHED_CLS, BPF_NETKIT_PEER, SEC_NONE), 8597 SEC_DEF("tracepoint+", TRACEPOINT, 0, SEC_NONE, attach_tp), 8598 SEC_DEF("tp+", TRACEPOINT, 0, SEC_NONE, attach_tp), 8599 SEC_DEF("raw_tracepoint+", RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp), 8600 SEC_DEF("raw_tp+", RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp), 8601 SEC_DEF("raw_tracepoint.w+", RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp), 8602 SEC_DEF("raw_tp.w+", RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp), 8603 SEC_DEF("tp_btf+", TRACING, BPF_TRACE_RAW_TP, SEC_ATTACH_BTF, attach_trace), 8604 SEC_DEF("fentry+", TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF, attach_trace), 8605 SEC_DEF("fmod_ret+", TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF, attach_trace), 8606 SEC_DEF("fexit+", TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF, attach_trace), 8607 SEC_DEF("fentry.s+", TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), 8608 SEC_DEF("fmod_ret.s+", TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), 8609 SEC_DEF("fexit.s+", TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), 8610 SEC_DEF("freplace+", EXT, 0, SEC_ATTACH_BTF, attach_trace), 8611 SEC_DEF("lsm+", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF, attach_lsm), 8612 SEC_DEF("lsm.s+", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_lsm), 8613 SEC_DEF("lsm_cgroup+", LSM, BPF_LSM_CGROUP, SEC_ATTACH_BTF), 8614 SEC_DEF("iter+", TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF, attach_iter), 8615 SEC_DEF("iter.s+", TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_iter), 8616 SEC_DEF("syscall", SYSCALL, 0, SEC_SLEEPABLE), 8617 SEC_DEF("xdp.frags/devmap", XDP, BPF_XDP_DEVMAP, SEC_XDP_FRAGS), 8618 SEC_DEF("xdp/devmap", XDP, BPF_XDP_DEVMAP, SEC_ATTACHABLE), 8619 SEC_DEF("xdp.frags/cpumap", XDP, BPF_XDP_CPUMAP, SEC_XDP_FRAGS), 8620 SEC_DEF("xdp/cpumap", XDP, BPF_XDP_CPUMAP, SEC_ATTACHABLE), 8621 SEC_DEF("xdp.frags", XDP, BPF_XDP, SEC_XDP_FRAGS), 8622 SEC_DEF("xdp", XDP, BPF_XDP, SEC_ATTACHABLE_OPT), 8623 SEC_DEF("perf_event", PERF_EVENT, 0, SEC_NONE), 8624 SEC_DEF("lwt_in", LWT_IN, 0, SEC_NONE), 8625 SEC_DEF("lwt_out", LWT_OUT, 0, SEC_NONE), 8626 SEC_DEF("lwt_xmit", LWT_XMIT, 0, SEC_NONE), 8627 SEC_DEF("lwt_seg6local", LWT_SEG6LOCAL, 0, SEC_NONE), 8628 SEC_DEF("sockops", SOCK_OPS, BPF_CGROUP_SOCK_OPS, SEC_ATTACHABLE_OPT), 8629 SEC_DEF("sk_skb/stream_parser", SK_SKB, BPF_SK_SKB_STREAM_PARSER, SEC_ATTACHABLE_OPT), 8630 SEC_DEF("sk_skb/stream_verdict",SK_SKB, BPF_SK_SKB_STREAM_VERDICT, SEC_ATTACHABLE_OPT), 8631 SEC_DEF("sk_skb", SK_SKB, 0, SEC_NONE), 8632 SEC_DEF("sk_msg", SK_MSG, BPF_SK_MSG_VERDICT, SEC_ATTACHABLE_OPT), 8633 SEC_DEF("lirc_mode2", LIRC_MODE2, BPF_LIRC_MODE2, SEC_ATTACHABLE_OPT), 8634 SEC_DEF("flow_dissector", FLOW_DISSECTOR, BPF_FLOW_DISSECTOR, SEC_ATTACHABLE_OPT), 8635 SEC_DEF("cgroup_skb/ingress", CGROUP_SKB, BPF_CGROUP_INET_INGRESS, SEC_ATTACHABLE_OPT), 8636 SEC_DEF("cgroup_skb/egress", CGROUP_SKB, BPF_CGROUP_INET_EGRESS, SEC_ATTACHABLE_OPT), 8637 SEC_DEF("cgroup/skb", CGROUP_SKB, 0, SEC_NONE), 8638 SEC_DEF("cgroup/sock_create", CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE), 8639 SEC_DEF("cgroup/sock_release", CGROUP_SOCK, BPF_CGROUP_INET_SOCK_RELEASE, SEC_ATTACHABLE), 8640 SEC_DEF("cgroup/sock", CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE_OPT), 8641 SEC_DEF("cgroup/post_bind4", CGROUP_SOCK, BPF_CGROUP_INET4_POST_BIND, SEC_ATTACHABLE), 8642 SEC_DEF("cgroup/post_bind6", CGROUP_SOCK, BPF_CGROUP_INET6_POST_BIND, SEC_ATTACHABLE), 8643 SEC_DEF("cgroup/bind4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_BIND, SEC_ATTACHABLE), 8644 SEC_DEF("cgroup/bind6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_BIND, SEC_ATTACHABLE), 8645 SEC_DEF("cgroup/connect4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_CONNECT, SEC_ATTACHABLE), 8646 SEC_DEF("cgroup/connect6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_CONNECT, SEC_ATTACHABLE), 8647 SEC_DEF("cgroup/connect_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_CONNECT, SEC_ATTACHABLE), 8648 SEC_DEF("cgroup/sendmsg4", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_SENDMSG, SEC_ATTACHABLE), 8649 SEC_DEF("cgroup/sendmsg6", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_SENDMSG, SEC_ATTACHABLE), 8650 SEC_DEF("cgroup/sendmsg_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_SENDMSG, SEC_ATTACHABLE), 8651 SEC_DEF("cgroup/recvmsg4", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_RECVMSG, SEC_ATTACHABLE), 8652 SEC_DEF("cgroup/recvmsg6", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_RECVMSG, SEC_ATTACHABLE), 8653 SEC_DEF("cgroup/recvmsg_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_RECVMSG, SEC_ATTACHABLE), 8654 SEC_DEF("cgroup/getpeername4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETPEERNAME, SEC_ATTACHABLE), 8655 SEC_DEF("cgroup/getpeername6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETPEERNAME, SEC_ATTACHABLE), 8656 SEC_DEF("cgroup/getpeername_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_GETPEERNAME, SEC_ATTACHABLE), 8657 SEC_DEF("cgroup/getsockname4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETSOCKNAME, SEC_ATTACHABLE), 8658 SEC_DEF("cgroup/getsockname6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETSOCKNAME, SEC_ATTACHABLE), 8659 SEC_DEF("cgroup/getsockname_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_GETSOCKNAME, SEC_ATTACHABLE), 8660 SEC_DEF("cgroup/sysctl", CGROUP_SYSCTL, BPF_CGROUP_SYSCTL, SEC_ATTACHABLE), 8661 SEC_DEF("cgroup/getsockopt", CGROUP_SOCKOPT, BPF_CGROUP_GETSOCKOPT, SEC_ATTACHABLE), 8662 SEC_DEF("cgroup/setsockopt", CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT, SEC_ATTACHABLE), 8663 SEC_DEF("cgroup/dev", CGROUP_DEVICE, BPF_CGROUP_DEVICE, SEC_ATTACHABLE_OPT), 8664 SEC_DEF("struct_ops+", STRUCT_OPS, 0, SEC_NONE), 8665 SEC_DEF("struct_ops.s+", STRUCT_OPS, 0, SEC_SLEEPABLE), 8666 SEC_DEF("sk_lookup", SK_LOOKUP, BPF_SK_LOOKUP, SEC_ATTACHABLE), 8667 SEC_DEF("netfilter", NETFILTER, BPF_NETFILTER, SEC_NONE), 8668 }; 8669 8670 int libbpf_register_prog_handler(const char *sec, 8671 enum bpf_prog_type prog_type, 8672 enum bpf_attach_type exp_attach_type, 8673 const struct libbpf_prog_handler_opts *opts) 8674 { 8675 struct bpf_sec_def *sec_def; 8676 8677 if (!OPTS_VALID(opts, libbpf_prog_handler_opts)) 8678 return libbpf_err(-EINVAL); 8679 8680 if (last_custom_sec_def_handler_id == INT_MAX) /* prevent overflow */ 8681 return libbpf_err(-E2BIG); 8682 8683 if (sec) { 8684 sec_def = libbpf_reallocarray(custom_sec_defs, custom_sec_def_cnt + 1, 8685 sizeof(*sec_def)); 8686 if (!sec_def) 8687 return libbpf_err(-ENOMEM); 8688 8689 custom_sec_defs = sec_def; 8690 sec_def = &custom_sec_defs[custom_sec_def_cnt]; 8691 } else { 8692 if (has_custom_fallback_def) 8693 return libbpf_err(-EBUSY); 8694 8695 sec_def = &custom_fallback_def; 8696 } 8697 8698 sec_def->sec = sec ? strdup(sec) : NULL; 8699 if (sec && !sec_def->sec) 8700 return libbpf_err(-ENOMEM); 8701 8702 sec_def->prog_type = prog_type; 8703 sec_def->expected_attach_type = exp_attach_type; 8704 sec_def->cookie = OPTS_GET(opts, cookie, 0); 8705 8706 sec_def->prog_setup_fn = OPTS_GET(opts, prog_setup_fn, NULL); 8707 sec_def->prog_prepare_load_fn = OPTS_GET(opts, prog_prepare_load_fn, NULL); 8708 sec_def->prog_attach_fn = OPTS_GET(opts, prog_attach_fn, NULL); 8709 8710 sec_def->handler_id = ++last_custom_sec_def_handler_id; 8711 8712 if (sec) 8713 custom_sec_def_cnt++; 8714 else 8715 has_custom_fallback_def = true; 8716 8717 return sec_def->handler_id; 8718 } 8719 8720 int libbpf_unregister_prog_handler(int handler_id) 8721 { 8722 struct bpf_sec_def *sec_defs; 8723 int i; 8724 8725 if (handler_id <= 0) 8726 return libbpf_err(-EINVAL); 8727 8728 if (has_custom_fallback_def && custom_fallback_def.handler_id == handler_id) { 8729 memset(&custom_fallback_def, 0, sizeof(custom_fallback_def)); 8730 has_custom_fallback_def = false; 8731 return 0; 8732 } 8733 8734 for (i = 0; i < custom_sec_def_cnt; i++) { 8735 if (custom_sec_defs[i].handler_id == handler_id) 8736 break; 8737 } 8738 8739 if (i == custom_sec_def_cnt) 8740 return libbpf_err(-ENOENT); 8741 8742 free(custom_sec_defs[i].sec); 8743 for (i = i + 1; i < custom_sec_def_cnt; i++) 8744 custom_sec_defs[i - 1] = custom_sec_defs[i]; 8745 custom_sec_def_cnt--; 8746 8747 /* try to shrink the array, but it's ok if we couldn't */ 8748 sec_defs = libbpf_reallocarray(custom_sec_defs, custom_sec_def_cnt, sizeof(*sec_defs)); 8749 /* if new count is zero, reallocarray can return a valid NULL result; 8750 * in this case the previous pointer will be freed, so we *have to* 8751 * reassign old pointer to the new value (even if it's NULL) 8752 */ 8753 if (sec_defs || custom_sec_def_cnt == 0) 8754 custom_sec_defs = sec_defs; 8755 8756 return 0; 8757 } 8758 8759 static bool sec_def_matches(const struct bpf_sec_def *sec_def, const char *sec_name) 8760 { 8761 size_t len = strlen(sec_def->sec); 8762 8763 /* "type/" always has to have proper SEC("type/extras") form */ 8764 if (sec_def->sec[len - 1] == '/') { 8765 if (str_has_pfx(sec_name, sec_def->sec)) 8766 return true; 8767 return false; 8768 } 8769 8770 /* "type+" means it can be either exact SEC("type") or 8771 * well-formed SEC("type/extras") with proper '/' separator 8772 */ 8773 if (sec_def->sec[len - 1] == '+') { 8774 len--; 8775 /* not even a prefix */ 8776 if (strncmp(sec_name, sec_def->sec, len) != 0) 8777 return false; 8778 /* exact match or has '/' separator */ 8779 if (sec_name[len] == '\0' || sec_name[len] == '/') 8780 return true; 8781 return false; 8782 } 8783 8784 return strcmp(sec_name, sec_def->sec) == 0; 8785 } 8786 8787 static const struct bpf_sec_def *find_sec_def(const char *sec_name) 8788 { 8789 const struct bpf_sec_def *sec_def; 8790 int i, n; 8791 8792 n = custom_sec_def_cnt; 8793 for (i = 0; i < n; i++) { 8794 sec_def = &custom_sec_defs[i]; 8795 if (sec_def_matches(sec_def, sec_name)) 8796 return sec_def; 8797 } 8798 8799 n = ARRAY_SIZE(section_defs); 8800 for (i = 0; i < n; i++) { 8801 sec_def = §ion_defs[i]; 8802 if (sec_def_matches(sec_def, sec_name)) 8803 return sec_def; 8804 } 8805 8806 if (has_custom_fallback_def) 8807 return &custom_fallback_def; 8808 8809 return NULL; 8810 } 8811 8812 #define MAX_TYPE_NAME_SIZE 32 8813 8814 static char *libbpf_get_type_names(bool attach_type) 8815 { 8816 int i, len = ARRAY_SIZE(section_defs) * MAX_TYPE_NAME_SIZE; 8817 char *buf; 8818 8819 buf = malloc(len); 8820 if (!buf) 8821 return NULL; 8822 8823 buf[0] = '\0'; 8824 /* Forge string buf with all available names */ 8825 for (i = 0; i < ARRAY_SIZE(section_defs); i++) { 8826 const struct bpf_sec_def *sec_def = §ion_defs[i]; 8827 8828 if (attach_type) { 8829 if (sec_def->prog_prepare_load_fn != libbpf_prepare_prog_load) 8830 continue; 8831 8832 if (!(sec_def->cookie & SEC_ATTACHABLE)) 8833 continue; 8834 } 8835 8836 if (strlen(buf) + strlen(section_defs[i].sec) + 2 > len) { 8837 free(buf); 8838 return NULL; 8839 } 8840 strcat(buf, " "); 8841 strcat(buf, section_defs[i].sec); 8842 } 8843 8844 return buf; 8845 } 8846 8847 int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type, 8848 enum bpf_attach_type *expected_attach_type) 8849 { 8850 const struct bpf_sec_def *sec_def; 8851 char *type_names; 8852 8853 if (!name) 8854 return libbpf_err(-EINVAL); 8855 8856 sec_def = find_sec_def(name); 8857 if (sec_def) { 8858 *prog_type = sec_def->prog_type; 8859 *expected_attach_type = sec_def->expected_attach_type; 8860 return 0; 8861 } 8862 8863 pr_debug("failed to guess program type from ELF section '%s'\n", name); 8864 type_names = libbpf_get_type_names(false); 8865 if (type_names != NULL) { 8866 pr_debug("supported section(type) names are:%s\n", type_names); 8867 free(type_names); 8868 } 8869 8870 return libbpf_err(-ESRCH); 8871 } 8872 8873 const char *libbpf_bpf_attach_type_str(enum bpf_attach_type t) 8874 { 8875 if (t < 0 || t >= ARRAY_SIZE(attach_type_name)) 8876 return NULL; 8877 8878 return attach_type_name[t]; 8879 } 8880 8881 const char *libbpf_bpf_link_type_str(enum bpf_link_type t) 8882 { 8883 if (t < 0 || t >= ARRAY_SIZE(link_type_name)) 8884 return NULL; 8885 8886 return link_type_name[t]; 8887 } 8888 8889 const char *libbpf_bpf_map_type_str(enum bpf_map_type t) 8890 { 8891 if (t < 0 || t >= ARRAY_SIZE(map_type_name)) 8892 return NULL; 8893 8894 return map_type_name[t]; 8895 } 8896 8897 const char *libbpf_bpf_prog_type_str(enum bpf_prog_type t) 8898 { 8899 if (t < 0 || t >= ARRAY_SIZE(prog_type_name)) 8900 return NULL; 8901 8902 return prog_type_name[t]; 8903 } 8904 8905 static struct bpf_map *find_struct_ops_map_by_offset(struct bpf_object *obj, 8906 int sec_idx, 8907 size_t offset) 8908 { 8909 struct bpf_map *map; 8910 size_t i; 8911 8912 for (i = 0; i < obj->nr_maps; i++) { 8913 map = &obj->maps[i]; 8914 if (!bpf_map__is_struct_ops(map)) 8915 continue; 8916 if (map->sec_idx == sec_idx && 8917 map->sec_offset <= offset && 8918 offset - map->sec_offset < map->def.value_size) 8919 return map; 8920 } 8921 8922 return NULL; 8923 } 8924 8925 /* Collect the reloc from ELF and populate the st_ops->progs[] */ 8926 static int bpf_object__collect_st_ops_relos(struct bpf_object *obj, 8927 Elf64_Shdr *shdr, Elf_Data *data) 8928 { 8929 const struct btf_member *member; 8930 struct bpf_struct_ops *st_ops; 8931 struct bpf_program *prog; 8932 unsigned int shdr_idx; 8933 const struct btf *btf; 8934 struct bpf_map *map; 8935 unsigned int moff, insn_idx; 8936 const char *name; 8937 __u32 member_idx; 8938 Elf64_Sym *sym; 8939 Elf64_Rel *rel; 8940 int i, nrels; 8941 8942 btf = obj->btf; 8943 nrels = shdr->sh_size / shdr->sh_entsize; 8944 for (i = 0; i < nrels; i++) { 8945 rel = elf_rel_by_idx(data, i); 8946 if (!rel) { 8947 pr_warn("struct_ops reloc: failed to get %d reloc\n", i); 8948 return -LIBBPF_ERRNO__FORMAT; 8949 } 8950 8951 sym = elf_sym_by_idx(obj, ELF64_R_SYM(rel->r_info)); 8952 if (!sym) { 8953 pr_warn("struct_ops reloc: symbol %zx not found\n", 8954 (size_t)ELF64_R_SYM(rel->r_info)); 8955 return -LIBBPF_ERRNO__FORMAT; 8956 } 8957 8958 name = elf_sym_str(obj, sym->st_name) ?: "<?>"; 8959 map = find_struct_ops_map_by_offset(obj, shdr->sh_info, rel->r_offset); 8960 if (!map) { 8961 pr_warn("struct_ops reloc: cannot find map at rel->r_offset %zu\n", 8962 (size_t)rel->r_offset); 8963 return -EINVAL; 8964 } 8965 8966 moff = rel->r_offset - map->sec_offset; 8967 shdr_idx = sym->st_shndx; 8968 st_ops = map->st_ops; 8969 pr_debug("struct_ops reloc %s: for %lld value %lld shdr_idx %u rel->r_offset %zu map->sec_offset %zu name %d (\'%s\')\n", 8970 map->name, 8971 (long long)(rel->r_info >> 32), 8972 (long long)sym->st_value, 8973 shdr_idx, (size_t)rel->r_offset, 8974 map->sec_offset, sym->st_name, name); 8975 8976 if (shdr_idx >= SHN_LORESERVE) { 8977 pr_warn("struct_ops reloc %s: rel->r_offset %zu shdr_idx %u unsupported non-static function\n", 8978 map->name, (size_t)rel->r_offset, shdr_idx); 8979 return -LIBBPF_ERRNO__RELOC; 8980 } 8981 if (sym->st_value % BPF_INSN_SZ) { 8982 pr_warn("struct_ops reloc %s: invalid target program offset %llu\n", 8983 map->name, (unsigned long long)sym->st_value); 8984 return -LIBBPF_ERRNO__FORMAT; 8985 } 8986 insn_idx = sym->st_value / BPF_INSN_SZ; 8987 8988 member = find_member_by_offset(st_ops->type, moff * 8); 8989 if (!member) { 8990 pr_warn("struct_ops reloc %s: cannot find member at moff %u\n", 8991 map->name, moff); 8992 return -EINVAL; 8993 } 8994 member_idx = member - btf_members(st_ops->type); 8995 name = btf__name_by_offset(btf, member->name_off); 8996 8997 if (!resolve_func_ptr(btf, member->type, NULL)) { 8998 pr_warn("struct_ops reloc %s: cannot relocate non func ptr %s\n", 8999 map->name, name); 9000 return -EINVAL; 9001 } 9002 9003 prog = find_prog_by_sec_insn(obj, shdr_idx, insn_idx); 9004 if (!prog) { 9005 pr_warn("struct_ops reloc %s: cannot find prog at shdr_idx %u to relocate func ptr %s\n", 9006 map->name, shdr_idx, name); 9007 return -EINVAL; 9008 } 9009 9010 /* prevent the use of BPF prog with invalid type */ 9011 if (prog->type != BPF_PROG_TYPE_STRUCT_OPS) { 9012 pr_warn("struct_ops reloc %s: prog %s is not struct_ops BPF program\n", 9013 map->name, prog->name); 9014 return -EINVAL; 9015 } 9016 9017 /* if we haven't yet processed this BPF program, record proper 9018 * attach_btf_id and member_idx 9019 */ 9020 if (!prog->attach_btf_id) { 9021 prog->attach_btf_id = st_ops->type_id; 9022 prog->expected_attach_type = member_idx; 9023 } 9024 9025 /* struct_ops BPF prog can be re-used between multiple 9026 * .struct_ops & .struct_ops.link as long as it's the 9027 * same struct_ops struct definition and the same 9028 * function pointer field 9029 */ 9030 if (prog->attach_btf_id != st_ops->type_id || 9031 prog->expected_attach_type != member_idx) { 9032 pr_warn("struct_ops reloc %s: cannot use prog %s in sec %s with type %u attach_btf_id %u expected_attach_type %u for func ptr %s\n", 9033 map->name, prog->name, prog->sec_name, prog->type, 9034 prog->attach_btf_id, prog->expected_attach_type, name); 9035 return -EINVAL; 9036 } 9037 9038 st_ops->progs[member_idx] = prog; 9039 } 9040 9041 return 0; 9042 } 9043 9044 #define BTF_TRACE_PREFIX "btf_trace_" 9045 #define BTF_LSM_PREFIX "bpf_lsm_" 9046 #define BTF_ITER_PREFIX "bpf_iter_" 9047 #define BTF_MAX_NAME_SIZE 128 9048 9049 void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type, 9050 const char **prefix, int *kind) 9051 { 9052 switch (attach_type) { 9053 case BPF_TRACE_RAW_TP: 9054 *prefix = BTF_TRACE_PREFIX; 9055 *kind = BTF_KIND_TYPEDEF; 9056 break; 9057 case BPF_LSM_MAC: 9058 case BPF_LSM_CGROUP: 9059 *prefix = BTF_LSM_PREFIX; 9060 *kind = BTF_KIND_FUNC; 9061 break; 9062 case BPF_TRACE_ITER: 9063 *prefix = BTF_ITER_PREFIX; 9064 *kind = BTF_KIND_FUNC; 9065 break; 9066 default: 9067 *prefix = ""; 9068 *kind = BTF_KIND_FUNC; 9069 } 9070 } 9071 9072 static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix, 9073 const char *name, __u32 kind) 9074 { 9075 char btf_type_name[BTF_MAX_NAME_SIZE]; 9076 int ret; 9077 9078 ret = snprintf(btf_type_name, sizeof(btf_type_name), 9079 "%s%s", prefix, name); 9080 /* snprintf returns the number of characters written excluding the 9081 * terminating null. So, if >= BTF_MAX_NAME_SIZE are written, it 9082 * indicates truncation. 9083 */ 9084 if (ret < 0 || ret >= sizeof(btf_type_name)) 9085 return -ENAMETOOLONG; 9086 return btf__find_by_name_kind(btf, btf_type_name, kind); 9087 } 9088 9089 static inline int find_attach_btf_id(struct btf *btf, const char *name, 9090 enum bpf_attach_type attach_type) 9091 { 9092 const char *prefix; 9093 int kind; 9094 9095 btf_get_kernel_prefix_kind(attach_type, &prefix, &kind); 9096 return find_btf_by_prefix_kind(btf, prefix, name, kind); 9097 } 9098 9099 int libbpf_find_vmlinux_btf_id(const char *name, 9100 enum bpf_attach_type attach_type) 9101 { 9102 struct btf *btf; 9103 int err; 9104 9105 btf = btf__load_vmlinux_btf(); 9106 err = libbpf_get_error(btf); 9107 if (err) { 9108 pr_warn("vmlinux BTF is not found\n"); 9109 return libbpf_err(err); 9110 } 9111 9112 err = find_attach_btf_id(btf, name, attach_type); 9113 if (err <= 0) 9114 pr_warn("%s is not found in vmlinux BTF\n", name); 9115 9116 btf__free(btf); 9117 return libbpf_err(err); 9118 } 9119 9120 static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd) 9121 { 9122 struct bpf_prog_info info; 9123 __u32 info_len = sizeof(info); 9124 struct btf *btf; 9125 int err; 9126 9127 memset(&info, 0, info_len); 9128 err = bpf_prog_get_info_by_fd(attach_prog_fd, &info, &info_len); 9129 if (err) { 9130 pr_warn("failed bpf_prog_get_info_by_fd for FD %d: %d\n", 9131 attach_prog_fd, err); 9132 return err; 9133 } 9134 9135 err = -EINVAL; 9136 if (!info.btf_id) { 9137 pr_warn("The target program doesn't have BTF\n"); 9138 goto out; 9139 } 9140 btf = btf__load_from_kernel_by_id(info.btf_id); 9141 err = libbpf_get_error(btf); 9142 if (err) { 9143 pr_warn("Failed to get BTF %d of the program: %d\n", info.btf_id, err); 9144 goto out; 9145 } 9146 err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC); 9147 btf__free(btf); 9148 if (err <= 0) { 9149 pr_warn("%s is not found in prog's BTF\n", name); 9150 goto out; 9151 } 9152 out: 9153 return err; 9154 } 9155 9156 static int find_kernel_btf_id(struct bpf_object *obj, const char *attach_name, 9157 enum bpf_attach_type attach_type, 9158 int *btf_obj_fd, int *btf_type_id) 9159 { 9160 int ret, i; 9161 9162 ret = find_attach_btf_id(obj->btf_vmlinux, attach_name, attach_type); 9163 if (ret > 0) { 9164 *btf_obj_fd = 0; /* vmlinux BTF */ 9165 *btf_type_id = ret; 9166 return 0; 9167 } 9168 if (ret != -ENOENT) 9169 return ret; 9170 9171 ret = load_module_btfs(obj); 9172 if (ret) 9173 return ret; 9174 9175 for (i = 0; i < obj->btf_module_cnt; i++) { 9176 const struct module_btf *mod = &obj->btf_modules[i]; 9177 9178 ret = find_attach_btf_id(mod->btf, attach_name, attach_type); 9179 if (ret > 0) { 9180 *btf_obj_fd = mod->fd; 9181 *btf_type_id = ret; 9182 return 0; 9183 } 9184 if (ret == -ENOENT) 9185 continue; 9186 9187 return ret; 9188 } 9189 9190 return -ESRCH; 9191 } 9192 9193 static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attach_name, 9194 int *btf_obj_fd, int *btf_type_id) 9195 { 9196 enum bpf_attach_type attach_type = prog->expected_attach_type; 9197 __u32 attach_prog_fd = prog->attach_prog_fd; 9198 int err = 0; 9199 9200 /* BPF program's BTF ID */ 9201 if (prog->type == BPF_PROG_TYPE_EXT || attach_prog_fd) { 9202 if (!attach_prog_fd) { 9203 pr_warn("prog '%s': attach program FD is not set\n", prog->name); 9204 return -EINVAL; 9205 } 9206 err = libbpf_find_prog_btf_id(attach_name, attach_prog_fd); 9207 if (err < 0) { 9208 pr_warn("prog '%s': failed to find BPF program (FD %d) BTF ID for '%s': %d\n", 9209 prog->name, attach_prog_fd, attach_name, err); 9210 return err; 9211 } 9212 *btf_obj_fd = 0; 9213 *btf_type_id = err; 9214 return 0; 9215 } 9216 9217 /* kernel/module BTF ID */ 9218 if (prog->obj->gen_loader) { 9219 bpf_gen__record_attach_target(prog->obj->gen_loader, attach_name, attach_type); 9220 *btf_obj_fd = 0; 9221 *btf_type_id = 1; 9222 } else { 9223 err = find_kernel_btf_id(prog->obj, attach_name, attach_type, btf_obj_fd, btf_type_id); 9224 } 9225 if (err) { 9226 pr_warn("prog '%s': failed to find kernel BTF type ID of '%s': %d\n", 9227 prog->name, attach_name, err); 9228 return err; 9229 } 9230 return 0; 9231 } 9232 9233 int libbpf_attach_type_by_name(const char *name, 9234 enum bpf_attach_type *attach_type) 9235 { 9236 char *type_names; 9237 const struct bpf_sec_def *sec_def; 9238 9239 if (!name) 9240 return libbpf_err(-EINVAL); 9241 9242 sec_def = find_sec_def(name); 9243 if (!sec_def) { 9244 pr_debug("failed to guess attach type based on ELF section name '%s'\n", name); 9245 type_names = libbpf_get_type_names(true); 9246 if (type_names != NULL) { 9247 pr_debug("attachable section(type) names are:%s\n", type_names); 9248 free(type_names); 9249 } 9250 9251 return libbpf_err(-EINVAL); 9252 } 9253 9254 if (sec_def->prog_prepare_load_fn != libbpf_prepare_prog_load) 9255 return libbpf_err(-EINVAL); 9256 if (!(sec_def->cookie & SEC_ATTACHABLE)) 9257 return libbpf_err(-EINVAL); 9258 9259 *attach_type = sec_def->expected_attach_type; 9260 return 0; 9261 } 9262 9263 int bpf_map__fd(const struct bpf_map *map) 9264 { 9265 return map ? map->fd : libbpf_err(-EINVAL); 9266 } 9267 9268 static bool map_uses_real_name(const struct bpf_map *map) 9269 { 9270 /* Since libbpf started to support custom .data.* and .rodata.* maps, 9271 * their user-visible name differs from kernel-visible name. Users see 9272 * such map's corresponding ELF section name as a map name. 9273 * This check distinguishes .data/.rodata from .data.* and .rodata.* 9274 * maps to know which name has to be returned to the user. 9275 */ 9276 if (map->libbpf_type == LIBBPF_MAP_DATA && strcmp(map->real_name, DATA_SEC) != 0) 9277 return true; 9278 if (map->libbpf_type == LIBBPF_MAP_RODATA && strcmp(map->real_name, RODATA_SEC) != 0) 9279 return true; 9280 return false; 9281 } 9282 9283 const char *bpf_map__name(const struct bpf_map *map) 9284 { 9285 if (!map) 9286 return NULL; 9287 9288 if (map_uses_real_name(map)) 9289 return map->real_name; 9290 9291 return map->name; 9292 } 9293 9294 enum bpf_map_type bpf_map__type(const struct bpf_map *map) 9295 { 9296 return map->def.type; 9297 } 9298 9299 int bpf_map__set_type(struct bpf_map *map, enum bpf_map_type type) 9300 { 9301 if (map->fd >= 0) 9302 return libbpf_err(-EBUSY); 9303 map->def.type = type; 9304 return 0; 9305 } 9306 9307 __u32 bpf_map__map_flags(const struct bpf_map *map) 9308 { 9309 return map->def.map_flags; 9310 } 9311 9312 int bpf_map__set_map_flags(struct bpf_map *map, __u32 flags) 9313 { 9314 if (map->fd >= 0) 9315 return libbpf_err(-EBUSY); 9316 map->def.map_flags = flags; 9317 return 0; 9318 } 9319 9320 __u64 bpf_map__map_extra(const struct bpf_map *map) 9321 { 9322 return map->map_extra; 9323 } 9324 9325 int bpf_map__set_map_extra(struct bpf_map *map, __u64 map_extra) 9326 { 9327 if (map->fd >= 0) 9328 return libbpf_err(-EBUSY); 9329 map->map_extra = map_extra; 9330 return 0; 9331 } 9332 9333 __u32 bpf_map__numa_node(const struct bpf_map *map) 9334 { 9335 return map->numa_node; 9336 } 9337 9338 int bpf_map__set_numa_node(struct bpf_map *map, __u32 numa_node) 9339 { 9340 if (map->fd >= 0) 9341 return libbpf_err(-EBUSY); 9342 map->numa_node = numa_node; 9343 return 0; 9344 } 9345 9346 __u32 bpf_map__key_size(const struct bpf_map *map) 9347 { 9348 return map->def.key_size; 9349 } 9350 9351 int bpf_map__set_key_size(struct bpf_map *map, __u32 size) 9352 { 9353 if (map->fd >= 0) 9354 return libbpf_err(-EBUSY); 9355 map->def.key_size = size; 9356 return 0; 9357 } 9358 9359 __u32 bpf_map__value_size(const struct bpf_map *map) 9360 { 9361 return map->def.value_size; 9362 } 9363 9364 static int map_btf_datasec_resize(struct bpf_map *map, __u32 size) 9365 { 9366 struct btf *btf; 9367 struct btf_type *datasec_type, *var_type; 9368 struct btf_var_secinfo *var; 9369 const struct btf_type *array_type; 9370 const struct btf_array *array; 9371 int vlen, element_sz, new_array_id; 9372 __u32 nr_elements; 9373 9374 /* check btf existence */ 9375 btf = bpf_object__btf(map->obj); 9376 if (!btf) 9377 return -ENOENT; 9378 9379 /* verify map is datasec */ 9380 datasec_type = btf_type_by_id(btf, bpf_map__btf_value_type_id(map)); 9381 if (!btf_is_datasec(datasec_type)) { 9382 pr_warn("map '%s': cannot be resized, map value type is not a datasec\n", 9383 bpf_map__name(map)); 9384 return -EINVAL; 9385 } 9386 9387 /* verify datasec has at least one var */ 9388 vlen = btf_vlen(datasec_type); 9389 if (vlen == 0) { 9390 pr_warn("map '%s': cannot be resized, map value datasec is empty\n", 9391 bpf_map__name(map)); 9392 return -EINVAL; 9393 } 9394 9395 /* verify last var in the datasec is an array */ 9396 var = &btf_var_secinfos(datasec_type)[vlen - 1]; 9397 var_type = btf_type_by_id(btf, var->type); 9398 array_type = skip_mods_and_typedefs(btf, var_type->type, NULL); 9399 if (!btf_is_array(array_type)) { 9400 pr_warn("map '%s': cannot be resized, last var must be an array\n", 9401 bpf_map__name(map)); 9402 return -EINVAL; 9403 } 9404 9405 /* verify request size aligns with array */ 9406 array = btf_array(array_type); 9407 element_sz = btf__resolve_size(btf, array->type); 9408 if (element_sz <= 0 || (size - var->offset) % element_sz != 0) { 9409 pr_warn("map '%s': cannot be resized, element size (%d) doesn't align with new total size (%u)\n", 9410 bpf_map__name(map), element_sz, size); 9411 return -EINVAL; 9412 } 9413 9414 /* create a new array based on the existing array, but with new length */ 9415 nr_elements = (size - var->offset) / element_sz; 9416 new_array_id = btf__add_array(btf, array->index_type, array->type, nr_elements); 9417 if (new_array_id < 0) 9418 return new_array_id; 9419 9420 /* adding a new btf type invalidates existing pointers to btf objects, 9421 * so refresh pointers before proceeding 9422 */ 9423 datasec_type = btf_type_by_id(btf, map->btf_value_type_id); 9424 var = &btf_var_secinfos(datasec_type)[vlen - 1]; 9425 var_type = btf_type_by_id(btf, var->type); 9426 9427 /* finally update btf info */ 9428 datasec_type->size = size; 9429 var->size = size - var->offset; 9430 var_type->type = new_array_id; 9431 9432 return 0; 9433 } 9434 9435 int bpf_map__set_value_size(struct bpf_map *map, __u32 size) 9436 { 9437 if (map->fd >= 0) 9438 return libbpf_err(-EBUSY); 9439 9440 if (map->mmaped) { 9441 int err; 9442 size_t mmap_old_sz, mmap_new_sz; 9443 9444 mmap_old_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries); 9445 mmap_new_sz = bpf_map_mmap_sz(size, map->def.max_entries); 9446 err = bpf_map_mmap_resize(map, mmap_old_sz, mmap_new_sz); 9447 if (err) { 9448 pr_warn("map '%s': failed to resize memory-mapped region: %d\n", 9449 bpf_map__name(map), err); 9450 return err; 9451 } 9452 err = map_btf_datasec_resize(map, size); 9453 if (err && err != -ENOENT) { 9454 pr_warn("map '%s': failed to adjust resized BTF, clearing BTF key/value info: %d\n", 9455 bpf_map__name(map), err); 9456 map->btf_value_type_id = 0; 9457 map->btf_key_type_id = 0; 9458 } 9459 } 9460 9461 map->def.value_size = size; 9462 return 0; 9463 } 9464 9465 __u32 bpf_map__btf_key_type_id(const struct bpf_map *map) 9466 { 9467 return map ? map->btf_key_type_id : 0; 9468 } 9469 9470 __u32 bpf_map__btf_value_type_id(const struct bpf_map *map) 9471 { 9472 return map ? map->btf_value_type_id : 0; 9473 } 9474 9475 int bpf_map__set_initial_value(struct bpf_map *map, 9476 const void *data, size_t size) 9477 { 9478 if (!map->mmaped || map->libbpf_type == LIBBPF_MAP_KCONFIG || 9479 size != map->def.value_size || map->fd >= 0) 9480 return libbpf_err(-EINVAL); 9481 9482 memcpy(map->mmaped, data, size); 9483 return 0; 9484 } 9485 9486 void *bpf_map__initial_value(struct bpf_map *map, size_t *psize) 9487 { 9488 if (!map->mmaped) 9489 return NULL; 9490 *psize = map->def.value_size; 9491 return map->mmaped; 9492 } 9493 9494 bool bpf_map__is_internal(const struct bpf_map *map) 9495 { 9496 return map->libbpf_type != LIBBPF_MAP_UNSPEC; 9497 } 9498 9499 __u32 bpf_map__ifindex(const struct bpf_map *map) 9500 { 9501 return map->map_ifindex; 9502 } 9503 9504 int bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex) 9505 { 9506 if (map->fd >= 0) 9507 return libbpf_err(-EBUSY); 9508 map->map_ifindex = ifindex; 9509 return 0; 9510 } 9511 9512 int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd) 9513 { 9514 if (!bpf_map_type__is_map_in_map(map->def.type)) { 9515 pr_warn("error: unsupported map type\n"); 9516 return libbpf_err(-EINVAL); 9517 } 9518 if (map->inner_map_fd != -1) { 9519 pr_warn("error: inner_map_fd already specified\n"); 9520 return libbpf_err(-EINVAL); 9521 } 9522 if (map->inner_map) { 9523 bpf_map__destroy(map->inner_map); 9524 zfree(&map->inner_map); 9525 } 9526 map->inner_map_fd = fd; 9527 return 0; 9528 } 9529 9530 static struct bpf_map * 9531 __bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i) 9532 { 9533 ssize_t idx; 9534 struct bpf_map *s, *e; 9535 9536 if (!obj || !obj->maps) 9537 return errno = EINVAL, NULL; 9538 9539 s = obj->maps; 9540 e = obj->maps + obj->nr_maps; 9541 9542 if ((m < s) || (m >= e)) { 9543 pr_warn("error in %s: map handler doesn't belong to object\n", 9544 __func__); 9545 return errno = EINVAL, NULL; 9546 } 9547 9548 idx = (m - obj->maps) + i; 9549 if (idx >= obj->nr_maps || idx < 0) 9550 return NULL; 9551 return &obj->maps[idx]; 9552 } 9553 9554 struct bpf_map * 9555 bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *prev) 9556 { 9557 if (prev == NULL) 9558 return obj->maps; 9559 9560 return __bpf_map__iter(prev, obj, 1); 9561 } 9562 9563 struct bpf_map * 9564 bpf_object__prev_map(const struct bpf_object *obj, const struct bpf_map *next) 9565 { 9566 if (next == NULL) { 9567 if (!obj->nr_maps) 9568 return NULL; 9569 return obj->maps + obj->nr_maps - 1; 9570 } 9571 9572 return __bpf_map__iter(next, obj, -1); 9573 } 9574 9575 struct bpf_map * 9576 bpf_object__find_map_by_name(const struct bpf_object *obj, const char *name) 9577 { 9578 struct bpf_map *pos; 9579 9580 bpf_object__for_each_map(pos, obj) { 9581 /* if it's a special internal map name (which always starts 9582 * with dot) then check if that special name matches the 9583 * real map name (ELF section name) 9584 */ 9585 if (name[0] == '.') { 9586 if (pos->real_name && strcmp(pos->real_name, name) == 0) 9587 return pos; 9588 continue; 9589 } 9590 /* otherwise map name has to be an exact match */ 9591 if (map_uses_real_name(pos)) { 9592 if (strcmp(pos->real_name, name) == 0) 9593 return pos; 9594 continue; 9595 } 9596 if (strcmp(pos->name, name) == 0) 9597 return pos; 9598 } 9599 return errno = ENOENT, NULL; 9600 } 9601 9602 int 9603 bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name) 9604 { 9605 return bpf_map__fd(bpf_object__find_map_by_name(obj, name)); 9606 } 9607 9608 static int validate_map_op(const struct bpf_map *map, size_t key_sz, 9609 size_t value_sz, bool check_value_sz) 9610 { 9611 if (map->fd <= 0) 9612 return -ENOENT; 9613 9614 if (map->def.key_size != key_sz) { 9615 pr_warn("map '%s': unexpected key size %zu provided, expected %u\n", 9616 map->name, key_sz, map->def.key_size); 9617 return -EINVAL; 9618 } 9619 9620 if (!check_value_sz) 9621 return 0; 9622 9623 switch (map->def.type) { 9624 case BPF_MAP_TYPE_PERCPU_ARRAY: 9625 case BPF_MAP_TYPE_PERCPU_HASH: 9626 case BPF_MAP_TYPE_LRU_PERCPU_HASH: 9627 case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE: { 9628 int num_cpu = libbpf_num_possible_cpus(); 9629 size_t elem_sz = roundup(map->def.value_size, 8); 9630 9631 if (value_sz != num_cpu * elem_sz) { 9632 pr_warn("map '%s': unexpected value size %zu provided for per-CPU map, expected %d * %zu = %zd\n", 9633 map->name, value_sz, num_cpu, elem_sz, num_cpu * elem_sz); 9634 return -EINVAL; 9635 } 9636 break; 9637 } 9638 default: 9639 if (map->def.value_size != value_sz) { 9640 pr_warn("map '%s': unexpected value size %zu provided, expected %u\n", 9641 map->name, value_sz, map->def.value_size); 9642 return -EINVAL; 9643 } 9644 break; 9645 } 9646 return 0; 9647 } 9648 9649 int bpf_map__lookup_elem(const struct bpf_map *map, 9650 const void *key, size_t key_sz, 9651 void *value, size_t value_sz, __u64 flags) 9652 { 9653 int err; 9654 9655 err = validate_map_op(map, key_sz, value_sz, true); 9656 if (err) 9657 return libbpf_err(err); 9658 9659 return bpf_map_lookup_elem_flags(map->fd, key, value, flags); 9660 } 9661 9662 int bpf_map__update_elem(const struct bpf_map *map, 9663 const void *key, size_t key_sz, 9664 const void *value, size_t value_sz, __u64 flags) 9665 { 9666 int err; 9667 9668 err = validate_map_op(map, key_sz, value_sz, true); 9669 if (err) 9670 return libbpf_err(err); 9671 9672 return bpf_map_update_elem(map->fd, key, value, flags); 9673 } 9674 9675 int bpf_map__delete_elem(const struct bpf_map *map, 9676 const void *key, size_t key_sz, __u64 flags) 9677 { 9678 int err; 9679 9680 err = validate_map_op(map, key_sz, 0, false /* check_value_sz */); 9681 if (err) 9682 return libbpf_err(err); 9683 9684 return bpf_map_delete_elem_flags(map->fd, key, flags); 9685 } 9686 9687 int bpf_map__lookup_and_delete_elem(const struct bpf_map *map, 9688 const void *key, size_t key_sz, 9689 void *value, size_t value_sz, __u64 flags) 9690 { 9691 int err; 9692 9693 err = validate_map_op(map, key_sz, value_sz, true); 9694 if (err) 9695 return libbpf_err(err); 9696 9697 return bpf_map_lookup_and_delete_elem_flags(map->fd, key, value, flags); 9698 } 9699 9700 int bpf_map__get_next_key(const struct bpf_map *map, 9701 const void *cur_key, void *next_key, size_t key_sz) 9702 { 9703 int err; 9704 9705 err = validate_map_op(map, key_sz, 0, false /* check_value_sz */); 9706 if (err) 9707 return libbpf_err(err); 9708 9709 return bpf_map_get_next_key(map->fd, cur_key, next_key); 9710 } 9711 9712 long libbpf_get_error(const void *ptr) 9713 { 9714 if (!IS_ERR_OR_NULL(ptr)) 9715 return 0; 9716 9717 if (IS_ERR(ptr)) 9718 errno = -PTR_ERR(ptr); 9719 9720 /* If ptr == NULL, then errno should be already set by the failing 9721 * API, because libbpf never returns NULL on success and it now always 9722 * sets errno on error. So no extra errno handling for ptr == NULL 9723 * case. 9724 */ 9725 return -errno; 9726 } 9727 9728 /* Replace link's underlying BPF program with the new one */ 9729 int bpf_link__update_program(struct bpf_link *link, struct bpf_program *prog) 9730 { 9731 int ret; 9732 9733 ret = bpf_link_update(bpf_link__fd(link), bpf_program__fd(prog), NULL); 9734 return libbpf_err_errno(ret); 9735 } 9736 9737 /* Release "ownership" of underlying BPF resource (typically, BPF program 9738 * attached to some BPF hook, e.g., tracepoint, kprobe, etc). Disconnected 9739 * link, when destructed through bpf_link__destroy() call won't attempt to 9740 * detach/unregisted that BPF resource. This is useful in situations where, 9741 * say, attached BPF program has to outlive userspace program that attached it 9742 * in the system. Depending on type of BPF program, though, there might be 9743 * additional steps (like pinning BPF program in BPF FS) necessary to ensure 9744 * exit of userspace program doesn't trigger automatic detachment and clean up 9745 * inside the kernel. 9746 */ 9747 void bpf_link__disconnect(struct bpf_link *link) 9748 { 9749 link->disconnected = true; 9750 } 9751 9752 int bpf_link__destroy(struct bpf_link *link) 9753 { 9754 int err = 0; 9755 9756 if (IS_ERR_OR_NULL(link)) 9757 return 0; 9758 9759 if (!link->disconnected && link->detach) 9760 err = link->detach(link); 9761 if (link->pin_path) 9762 free(link->pin_path); 9763 if (link->dealloc) 9764 link->dealloc(link); 9765 else 9766 free(link); 9767 9768 return libbpf_err(err); 9769 } 9770 9771 int bpf_link__fd(const struct bpf_link *link) 9772 { 9773 return link->fd; 9774 } 9775 9776 const char *bpf_link__pin_path(const struct bpf_link *link) 9777 { 9778 return link->pin_path; 9779 } 9780 9781 static int bpf_link__detach_fd(struct bpf_link *link) 9782 { 9783 return libbpf_err_errno(close(link->fd)); 9784 } 9785 9786 struct bpf_link *bpf_link__open(const char *path) 9787 { 9788 struct bpf_link *link; 9789 int fd; 9790 9791 fd = bpf_obj_get(path); 9792 if (fd < 0) { 9793 fd = -errno; 9794 pr_warn("failed to open link at %s: %d\n", path, fd); 9795 return libbpf_err_ptr(fd); 9796 } 9797 9798 link = calloc(1, sizeof(*link)); 9799 if (!link) { 9800 close(fd); 9801 return libbpf_err_ptr(-ENOMEM); 9802 } 9803 link->detach = &bpf_link__detach_fd; 9804 link->fd = fd; 9805 9806 link->pin_path = strdup(path); 9807 if (!link->pin_path) { 9808 bpf_link__destroy(link); 9809 return libbpf_err_ptr(-ENOMEM); 9810 } 9811 9812 return link; 9813 } 9814 9815 int bpf_link__detach(struct bpf_link *link) 9816 { 9817 return bpf_link_detach(link->fd) ? -errno : 0; 9818 } 9819 9820 int bpf_link__pin(struct bpf_link *link, const char *path) 9821 { 9822 int err; 9823 9824 if (link->pin_path) 9825 return libbpf_err(-EBUSY); 9826 err = make_parent_dir(path); 9827 if (err) 9828 return libbpf_err(err); 9829 err = check_path(path); 9830 if (err) 9831 return libbpf_err(err); 9832 9833 link->pin_path = strdup(path); 9834 if (!link->pin_path) 9835 return libbpf_err(-ENOMEM); 9836 9837 if (bpf_obj_pin(link->fd, link->pin_path)) { 9838 err = -errno; 9839 zfree(&link->pin_path); 9840 return libbpf_err(err); 9841 } 9842 9843 pr_debug("link fd=%d: pinned at %s\n", link->fd, link->pin_path); 9844 return 0; 9845 } 9846 9847 int bpf_link__unpin(struct bpf_link *link) 9848 { 9849 int err; 9850 9851 if (!link->pin_path) 9852 return libbpf_err(-EINVAL); 9853 9854 err = unlink(link->pin_path); 9855 if (err != 0) 9856 return -errno; 9857 9858 pr_debug("link fd=%d: unpinned from %s\n", link->fd, link->pin_path); 9859 zfree(&link->pin_path); 9860 return 0; 9861 } 9862 9863 struct bpf_link_perf { 9864 struct bpf_link link; 9865 int perf_event_fd; 9866 /* legacy kprobe support: keep track of probe identifier and type */ 9867 char *legacy_probe_name; 9868 bool legacy_is_kprobe; 9869 bool legacy_is_retprobe; 9870 }; 9871 9872 static int remove_kprobe_event_legacy(const char *probe_name, bool retprobe); 9873 static int remove_uprobe_event_legacy(const char *probe_name, bool retprobe); 9874 9875 static int bpf_link_perf_detach(struct bpf_link *link) 9876 { 9877 struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link); 9878 int err = 0; 9879 9880 if (ioctl(perf_link->perf_event_fd, PERF_EVENT_IOC_DISABLE, 0) < 0) 9881 err = -errno; 9882 9883 if (perf_link->perf_event_fd != link->fd) 9884 close(perf_link->perf_event_fd); 9885 close(link->fd); 9886 9887 /* legacy uprobe/kprobe needs to be removed after perf event fd closure */ 9888 if (perf_link->legacy_probe_name) { 9889 if (perf_link->legacy_is_kprobe) { 9890 err = remove_kprobe_event_legacy(perf_link->legacy_probe_name, 9891 perf_link->legacy_is_retprobe); 9892 } else { 9893 err = remove_uprobe_event_legacy(perf_link->legacy_probe_name, 9894 perf_link->legacy_is_retprobe); 9895 } 9896 } 9897 9898 return err; 9899 } 9900 9901 static void bpf_link_perf_dealloc(struct bpf_link *link) 9902 { 9903 struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link); 9904 9905 free(perf_link->legacy_probe_name); 9906 free(perf_link); 9907 } 9908 9909 struct bpf_link *bpf_program__attach_perf_event_opts(const struct bpf_program *prog, int pfd, 9910 const struct bpf_perf_event_opts *opts) 9911 { 9912 char errmsg[STRERR_BUFSIZE]; 9913 struct bpf_link_perf *link; 9914 int prog_fd, link_fd = -1, err; 9915 bool force_ioctl_attach; 9916 9917 if (!OPTS_VALID(opts, bpf_perf_event_opts)) 9918 return libbpf_err_ptr(-EINVAL); 9919 9920 if (pfd < 0) { 9921 pr_warn("prog '%s': invalid perf event FD %d\n", 9922 prog->name, pfd); 9923 return libbpf_err_ptr(-EINVAL); 9924 } 9925 prog_fd = bpf_program__fd(prog); 9926 if (prog_fd < 0) { 9927 pr_warn("prog '%s': can't attach BPF program w/o FD (did you load it?)\n", 9928 prog->name); 9929 return libbpf_err_ptr(-EINVAL); 9930 } 9931 9932 link = calloc(1, sizeof(*link)); 9933 if (!link) 9934 return libbpf_err_ptr(-ENOMEM); 9935 link->link.detach = &bpf_link_perf_detach; 9936 link->link.dealloc = &bpf_link_perf_dealloc; 9937 link->perf_event_fd = pfd; 9938 9939 force_ioctl_attach = OPTS_GET(opts, force_ioctl_attach, false); 9940 if (kernel_supports(prog->obj, FEAT_PERF_LINK) && !force_ioctl_attach) { 9941 DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_opts, 9942 .perf_event.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0)); 9943 9944 link_fd = bpf_link_create(prog_fd, pfd, BPF_PERF_EVENT, &link_opts); 9945 if (link_fd < 0) { 9946 err = -errno; 9947 pr_warn("prog '%s': failed to create BPF link for perf_event FD %d: %d (%s)\n", 9948 prog->name, pfd, 9949 err, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); 9950 goto err_out; 9951 } 9952 link->link.fd = link_fd; 9953 } else { 9954 if (OPTS_GET(opts, bpf_cookie, 0)) { 9955 pr_warn("prog '%s': user context value is not supported\n", prog->name); 9956 err = -EOPNOTSUPP; 9957 goto err_out; 9958 } 9959 9960 if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) { 9961 err = -errno; 9962 pr_warn("prog '%s': failed to attach to perf_event FD %d: %s\n", 9963 prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); 9964 if (err == -EPROTO) 9965 pr_warn("prog '%s': try add PERF_SAMPLE_CALLCHAIN to or remove exclude_callchain_[kernel|user] from pfd %d\n", 9966 prog->name, pfd); 9967 goto err_out; 9968 } 9969 link->link.fd = pfd; 9970 } 9971 if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) { 9972 err = -errno; 9973 pr_warn("prog '%s': failed to enable perf_event FD %d: %s\n", 9974 prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); 9975 goto err_out; 9976 } 9977 9978 return &link->link; 9979 err_out: 9980 if (link_fd >= 0) 9981 close(link_fd); 9982 free(link); 9983 return libbpf_err_ptr(err); 9984 } 9985 9986 struct bpf_link *bpf_program__attach_perf_event(const struct bpf_program *prog, int pfd) 9987 { 9988 return bpf_program__attach_perf_event_opts(prog, pfd, NULL); 9989 } 9990 9991 /* 9992 * this function is expected to parse integer in the range of [0, 2^31-1] from 9993 * given file using scanf format string fmt. If actual parsed value is 9994 * negative, the result might be indistinguishable from error 9995 */ 9996 static int parse_uint_from_file(const char *file, const char *fmt) 9997 { 9998 char buf[STRERR_BUFSIZE]; 9999 int err, ret; 10000 FILE *f; 10001 10002 f = fopen(file, "re"); 10003 if (!f) { 10004 err = -errno; 10005 pr_debug("failed to open '%s': %s\n", file, 10006 libbpf_strerror_r(err, buf, sizeof(buf))); 10007 return err; 10008 } 10009 err = fscanf(f, fmt, &ret); 10010 if (err != 1) { 10011 err = err == EOF ? -EIO : -errno; 10012 pr_debug("failed to parse '%s': %s\n", file, 10013 libbpf_strerror_r(err, buf, sizeof(buf))); 10014 fclose(f); 10015 return err; 10016 } 10017 fclose(f); 10018 return ret; 10019 } 10020 10021 static int determine_kprobe_perf_type(void) 10022 { 10023 const char *file = "/sys/bus/event_source/devices/kprobe/type"; 10024 10025 return parse_uint_from_file(file, "%d\n"); 10026 } 10027 10028 static int determine_uprobe_perf_type(void) 10029 { 10030 const char *file = "/sys/bus/event_source/devices/uprobe/type"; 10031 10032 return parse_uint_from_file(file, "%d\n"); 10033 } 10034 10035 static int determine_kprobe_retprobe_bit(void) 10036 { 10037 const char *file = "/sys/bus/event_source/devices/kprobe/format/retprobe"; 10038 10039 return parse_uint_from_file(file, "config:%d\n"); 10040 } 10041 10042 static int determine_uprobe_retprobe_bit(void) 10043 { 10044 const char *file = "/sys/bus/event_source/devices/uprobe/format/retprobe"; 10045 10046 return parse_uint_from_file(file, "config:%d\n"); 10047 } 10048 10049 #define PERF_UPROBE_REF_CTR_OFFSET_BITS 32 10050 #define PERF_UPROBE_REF_CTR_OFFSET_SHIFT 32 10051 10052 static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name, 10053 uint64_t offset, int pid, size_t ref_ctr_off) 10054 { 10055 const size_t attr_sz = sizeof(struct perf_event_attr); 10056 struct perf_event_attr attr; 10057 char errmsg[STRERR_BUFSIZE]; 10058 int type, pfd; 10059 10060 if ((__u64)ref_ctr_off >= (1ULL << PERF_UPROBE_REF_CTR_OFFSET_BITS)) 10061 return -EINVAL; 10062 10063 memset(&attr, 0, attr_sz); 10064 10065 type = uprobe ? determine_uprobe_perf_type() 10066 : determine_kprobe_perf_type(); 10067 if (type < 0) { 10068 pr_warn("failed to determine %s perf type: %s\n", 10069 uprobe ? "uprobe" : "kprobe", 10070 libbpf_strerror_r(type, errmsg, sizeof(errmsg))); 10071 return type; 10072 } 10073 if (retprobe) { 10074 int bit = uprobe ? determine_uprobe_retprobe_bit() 10075 : determine_kprobe_retprobe_bit(); 10076 10077 if (bit < 0) { 10078 pr_warn("failed to determine %s retprobe bit: %s\n", 10079 uprobe ? "uprobe" : "kprobe", 10080 libbpf_strerror_r(bit, errmsg, sizeof(errmsg))); 10081 return bit; 10082 } 10083 attr.config |= 1 << bit; 10084 } 10085 attr.size = attr_sz; 10086 attr.type = type; 10087 attr.config |= (__u64)ref_ctr_off << PERF_UPROBE_REF_CTR_OFFSET_SHIFT; 10088 attr.config1 = ptr_to_u64(name); /* kprobe_func or uprobe_path */ 10089 attr.config2 = offset; /* kprobe_addr or probe_offset */ 10090 10091 /* pid filter is meaningful only for uprobes */ 10092 pfd = syscall(__NR_perf_event_open, &attr, 10093 pid < 0 ? -1 : pid /* pid */, 10094 pid == -1 ? 0 : -1 /* cpu */, 10095 -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC); 10096 return pfd >= 0 ? pfd : -errno; 10097 } 10098 10099 static int append_to_file(const char *file, const char *fmt, ...) 10100 { 10101 int fd, n, err = 0; 10102 va_list ap; 10103 char buf[1024]; 10104 10105 va_start(ap, fmt); 10106 n = vsnprintf(buf, sizeof(buf), fmt, ap); 10107 va_end(ap); 10108 10109 if (n < 0 || n >= sizeof(buf)) 10110 return -EINVAL; 10111 10112 fd = open(file, O_WRONLY | O_APPEND | O_CLOEXEC, 0); 10113 if (fd < 0) 10114 return -errno; 10115 10116 if (write(fd, buf, n) < 0) 10117 err = -errno; 10118 10119 close(fd); 10120 return err; 10121 } 10122 10123 #define DEBUGFS "/sys/kernel/debug/tracing" 10124 #define TRACEFS "/sys/kernel/tracing" 10125 10126 static bool use_debugfs(void) 10127 { 10128 static int has_debugfs = -1; 10129 10130 if (has_debugfs < 0) 10131 has_debugfs = faccessat(AT_FDCWD, DEBUGFS, F_OK, AT_EACCESS) == 0; 10132 10133 return has_debugfs == 1; 10134 } 10135 10136 static const char *tracefs_path(void) 10137 { 10138 return use_debugfs() ? DEBUGFS : TRACEFS; 10139 } 10140 10141 static const char *tracefs_kprobe_events(void) 10142 { 10143 return use_debugfs() ? DEBUGFS"/kprobe_events" : TRACEFS"/kprobe_events"; 10144 } 10145 10146 static const char *tracefs_uprobe_events(void) 10147 { 10148 return use_debugfs() ? DEBUGFS"/uprobe_events" : TRACEFS"/uprobe_events"; 10149 } 10150 10151 static const char *tracefs_available_filter_functions(void) 10152 { 10153 return use_debugfs() ? DEBUGFS"/available_filter_functions" 10154 : TRACEFS"/available_filter_functions"; 10155 } 10156 10157 static const char *tracefs_available_filter_functions_addrs(void) 10158 { 10159 return use_debugfs() ? DEBUGFS"/available_filter_functions_addrs" 10160 : TRACEFS"/available_filter_functions_addrs"; 10161 } 10162 10163 static void gen_kprobe_legacy_event_name(char *buf, size_t buf_sz, 10164 const char *kfunc_name, size_t offset) 10165 { 10166 static int index = 0; 10167 int i; 10168 10169 snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx_%d", getpid(), kfunc_name, offset, 10170 __sync_fetch_and_add(&index, 1)); 10171 10172 /* sanitize binary_path in the probe name */ 10173 for (i = 0; buf[i]; i++) { 10174 if (!isalnum(buf[i])) 10175 buf[i] = '_'; 10176 } 10177 } 10178 10179 static int add_kprobe_event_legacy(const char *probe_name, bool retprobe, 10180 const char *kfunc_name, size_t offset) 10181 { 10182 return append_to_file(tracefs_kprobe_events(), "%c:%s/%s %s+0x%zx", 10183 retprobe ? 'r' : 'p', 10184 retprobe ? "kretprobes" : "kprobes", 10185 probe_name, kfunc_name, offset); 10186 } 10187 10188 static int remove_kprobe_event_legacy(const char *probe_name, bool retprobe) 10189 { 10190 return append_to_file(tracefs_kprobe_events(), "-:%s/%s", 10191 retprobe ? "kretprobes" : "kprobes", probe_name); 10192 } 10193 10194 static int determine_kprobe_perf_type_legacy(const char *probe_name, bool retprobe) 10195 { 10196 char file[256]; 10197 10198 snprintf(file, sizeof(file), "%s/events/%s/%s/id", 10199 tracefs_path(), retprobe ? "kretprobes" : "kprobes", probe_name); 10200 10201 return parse_uint_from_file(file, "%d\n"); 10202 } 10203 10204 static int perf_event_kprobe_open_legacy(const char *probe_name, bool retprobe, 10205 const char *kfunc_name, size_t offset, int pid) 10206 { 10207 const size_t attr_sz = sizeof(struct perf_event_attr); 10208 struct perf_event_attr attr; 10209 char errmsg[STRERR_BUFSIZE]; 10210 int type, pfd, err; 10211 10212 err = add_kprobe_event_legacy(probe_name, retprobe, kfunc_name, offset); 10213 if (err < 0) { 10214 pr_warn("failed to add legacy kprobe event for '%s+0x%zx': %s\n", 10215 kfunc_name, offset, 10216 libbpf_strerror_r(err, errmsg, sizeof(errmsg))); 10217 return err; 10218 } 10219 type = determine_kprobe_perf_type_legacy(probe_name, retprobe); 10220 if (type < 0) { 10221 err = type; 10222 pr_warn("failed to determine legacy kprobe event id for '%s+0x%zx': %s\n", 10223 kfunc_name, offset, 10224 libbpf_strerror_r(err, errmsg, sizeof(errmsg))); 10225 goto err_clean_legacy; 10226 } 10227 10228 memset(&attr, 0, attr_sz); 10229 attr.size = attr_sz; 10230 attr.config = type; 10231 attr.type = PERF_TYPE_TRACEPOINT; 10232 10233 pfd = syscall(__NR_perf_event_open, &attr, 10234 pid < 0 ? -1 : pid, /* pid */ 10235 pid == -1 ? 0 : -1, /* cpu */ 10236 -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC); 10237 if (pfd < 0) { 10238 err = -errno; 10239 pr_warn("legacy kprobe perf_event_open() failed: %s\n", 10240 libbpf_strerror_r(err, errmsg, sizeof(errmsg))); 10241 goto err_clean_legacy; 10242 } 10243 return pfd; 10244 10245 err_clean_legacy: 10246 /* Clear the newly added legacy kprobe_event */ 10247 remove_kprobe_event_legacy(probe_name, retprobe); 10248 return err; 10249 } 10250 10251 static const char *arch_specific_syscall_pfx(void) 10252 { 10253 #if defined(__x86_64__) 10254 return "x64"; 10255 #elif defined(__i386__) 10256 return "ia32"; 10257 #elif defined(__s390x__) 10258 return "s390x"; 10259 #elif defined(__s390__) 10260 return "s390"; 10261 #elif defined(__arm__) 10262 return "arm"; 10263 #elif defined(__aarch64__) 10264 return "arm64"; 10265 #elif defined(__mips__) 10266 return "mips"; 10267 #elif defined(__riscv) 10268 return "riscv"; 10269 #elif defined(__powerpc__) 10270 return "powerpc"; 10271 #elif defined(__powerpc64__) 10272 return "powerpc64"; 10273 #else 10274 return NULL; 10275 #endif 10276 } 10277 10278 int probe_kern_syscall_wrapper(int token_fd) 10279 { 10280 char syscall_name[64]; 10281 const char *ksys_pfx; 10282 10283 ksys_pfx = arch_specific_syscall_pfx(); 10284 if (!ksys_pfx) 10285 return 0; 10286 10287 snprintf(syscall_name, sizeof(syscall_name), "__%s_sys_bpf", ksys_pfx); 10288 10289 if (determine_kprobe_perf_type() >= 0) { 10290 int pfd; 10291 10292 pfd = perf_event_open_probe(false, false, syscall_name, 0, getpid(), 0); 10293 if (pfd >= 0) 10294 close(pfd); 10295 10296 return pfd >= 0 ? 1 : 0; 10297 } else { /* legacy mode */ 10298 char probe_name[128]; 10299 10300 gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name), syscall_name, 0); 10301 if (add_kprobe_event_legacy(probe_name, false, syscall_name, 0) < 0) 10302 return 0; 10303 10304 (void)remove_kprobe_event_legacy(probe_name, false); 10305 return 1; 10306 } 10307 } 10308 10309 struct bpf_link * 10310 bpf_program__attach_kprobe_opts(const struct bpf_program *prog, 10311 const char *func_name, 10312 const struct bpf_kprobe_opts *opts) 10313 { 10314 DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts); 10315 enum probe_attach_mode attach_mode; 10316 char errmsg[STRERR_BUFSIZE]; 10317 char *legacy_probe = NULL; 10318 struct bpf_link *link; 10319 size_t offset; 10320 bool retprobe, legacy; 10321 int pfd, err; 10322 10323 if (!OPTS_VALID(opts, bpf_kprobe_opts)) 10324 return libbpf_err_ptr(-EINVAL); 10325 10326 attach_mode = OPTS_GET(opts, attach_mode, PROBE_ATTACH_MODE_DEFAULT); 10327 retprobe = OPTS_GET(opts, retprobe, false); 10328 offset = OPTS_GET(opts, offset, 0); 10329 pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0); 10330 10331 legacy = determine_kprobe_perf_type() < 0; 10332 switch (attach_mode) { 10333 case PROBE_ATTACH_MODE_LEGACY: 10334 legacy = true; 10335 pe_opts.force_ioctl_attach = true; 10336 break; 10337 case PROBE_ATTACH_MODE_PERF: 10338 if (legacy) 10339 return libbpf_err_ptr(-ENOTSUP); 10340 pe_opts.force_ioctl_attach = true; 10341 break; 10342 case PROBE_ATTACH_MODE_LINK: 10343 if (legacy || !kernel_supports(prog->obj, FEAT_PERF_LINK)) 10344 return libbpf_err_ptr(-ENOTSUP); 10345 break; 10346 case PROBE_ATTACH_MODE_DEFAULT: 10347 break; 10348 default: 10349 return libbpf_err_ptr(-EINVAL); 10350 } 10351 10352 if (!legacy) { 10353 pfd = perf_event_open_probe(false /* uprobe */, retprobe, 10354 func_name, offset, 10355 -1 /* pid */, 0 /* ref_ctr_off */); 10356 } else { 10357 char probe_name[256]; 10358 10359 gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name), 10360 func_name, offset); 10361 10362 legacy_probe = strdup(probe_name); 10363 if (!legacy_probe) 10364 return libbpf_err_ptr(-ENOMEM); 10365 10366 pfd = perf_event_kprobe_open_legacy(legacy_probe, retprobe, func_name, 10367 offset, -1 /* pid */); 10368 } 10369 if (pfd < 0) { 10370 err = -errno; 10371 pr_warn("prog '%s': failed to create %s '%s+0x%zx' perf event: %s\n", 10372 prog->name, retprobe ? "kretprobe" : "kprobe", 10373 func_name, offset, 10374 libbpf_strerror_r(err, errmsg, sizeof(errmsg))); 10375 goto err_out; 10376 } 10377 link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts); 10378 err = libbpf_get_error(link); 10379 if (err) { 10380 close(pfd); 10381 pr_warn("prog '%s': failed to attach to %s '%s+0x%zx': %s\n", 10382 prog->name, retprobe ? "kretprobe" : "kprobe", 10383 func_name, offset, 10384 libbpf_strerror_r(err, errmsg, sizeof(errmsg))); 10385 goto err_clean_legacy; 10386 } 10387 if (legacy) { 10388 struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link); 10389 10390 perf_link->legacy_probe_name = legacy_probe; 10391 perf_link->legacy_is_kprobe = true; 10392 perf_link->legacy_is_retprobe = retprobe; 10393 } 10394 10395 return link; 10396 10397 err_clean_legacy: 10398 if (legacy) 10399 remove_kprobe_event_legacy(legacy_probe, retprobe); 10400 err_out: 10401 free(legacy_probe); 10402 return libbpf_err_ptr(err); 10403 } 10404 10405 struct bpf_link *bpf_program__attach_kprobe(const struct bpf_program *prog, 10406 bool retprobe, 10407 const char *func_name) 10408 { 10409 DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts, 10410 .retprobe = retprobe, 10411 ); 10412 10413 return bpf_program__attach_kprobe_opts(prog, func_name, &opts); 10414 } 10415 10416 struct bpf_link *bpf_program__attach_ksyscall(const struct bpf_program *prog, 10417 const char *syscall_name, 10418 const struct bpf_ksyscall_opts *opts) 10419 { 10420 LIBBPF_OPTS(bpf_kprobe_opts, kprobe_opts); 10421 char func_name[128]; 10422 10423 if (!OPTS_VALID(opts, bpf_ksyscall_opts)) 10424 return libbpf_err_ptr(-EINVAL); 10425 10426 if (kernel_supports(prog->obj, FEAT_SYSCALL_WRAPPER)) { 10427 /* arch_specific_syscall_pfx() should never return NULL here 10428 * because it is guarded by kernel_supports(). However, since 10429 * compiler does not know that we have an explicit conditional 10430 * as well. 10431 */ 10432 snprintf(func_name, sizeof(func_name), "__%s_sys_%s", 10433 arch_specific_syscall_pfx() ? : "", syscall_name); 10434 } else { 10435 snprintf(func_name, sizeof(func_name), "__se_sys_%s", syscall_name); 10436 } 10437 10438 kprobe_opts.retprobe = OPTS_GET(opts, retprobe, false); 10439 kprobe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0); 10440 10441 return bpf_program__attach_kprobe_opts(prog, func_name, &kprobe_opts); 10442 } 10443 10444 /* Adapted from perf/util/string.c */ 10445 bool glob_match(const char *str, const char *pat) 10446 { 10447 while (*str && *pat && *pat != '*') { 10448 if (*pat == '?') { /* Matches any single character */ 10449 str++; 10450 pat++; 10451 continue; 10452 } 10453 if (*str != *pat) 10454 return false; 10455 str++; 10456 pat++; 10457 } 10458 /* Check wild card */ 10459 if (*pat == '*') { 10460 while (*pat == '*') 10461 pat++; 10462 if (!*pat) /* Tail wild card matches all */ 10463 return true; 10464 while (*str) 10465 if (glob_match(str++, pat)) 10466 return true; 10467 } 10468 return !*str && !*pat; 10469 } 10470 10471 struct kprobe_multi_resolve { 10472 const char *pattern; 10473 unsigned long *addrs; 10474 size_t cap; 10475 size_t cnt; 10476 }; 10477 10478 struct avail_kallsyms_data { 10479 char **syms; 10480 size_t cnt; 10481 struct kprobe_multi_resolve *res; 10482 }; 10483 10484 static int avail_func_cmp(const void *a, const void *b) 10485 { 10486 return strcmp(*(const char **)a, *(const char **)b); 10487 } 10488 10489 static int avail_kallsyms_cb(unsigned long long sym_addr, char sym_type, 10490 const char *sym_name, void *ctx) 10491 { 10492 struct avail_kallsyms_data *data = ctx; 10493 struct kprobe_multi_resolve *res = data->res; 10494 int err; 10495 10496 if (!bsearch(&sym_name, data->syms, data->cnt, sizeof(*data->syms), avail_func_cmp)) 10497 return 0; 10498 10499 err = libbpf_ensure_mem((void **)&res->addrs, &res->cap, sizeof(*res->addrs), res->cnt + 1); 10500 if (err) 10501 return err; 10502 10503 res->addrs[res->cnt++] = (unsigned long)sym_addr; 10504 return 0; 10505 } 10506 10507 static int libbpf_available_kallsyms_parse(struct kprobe_multi_resolve *res) 10508 { 10509 const char *available_functions_file = tracefs_available_filter_functions(); 10510 struct avail_kallsyms_data data; 10511 char sym_name[500]; 10512 FILE *f; 10513 int err = 0, ret, i; 10514 char **syms = NULL; 10515 size_t cap = 0, cnt = 0; 10516 10517 f = fopen(available_functions_file, "re"); 10518 if (!f) { 10519 err = -errno; 10520 pr_warn("failed to open %s: %d\n", available_functions_file, err); 10521 return err; 10522 } 10523 10524 while (true) { 10525 char *name; 10526 10527 ret = fscanf(f, "%499s%*[^\n]\n", sym_name); 10528 if (ret == EOF && feof(f)) 10529 break; 10530 10531 if (ret != 1) { 10532 pr_warn("failed to parse available_filter_functions entry: %d\n", ret); 10533 err = -EINVAL; 10534 goto cleanup; 10535 } 10536 10537 if (!glob_match(sym_name, res->pattern)) 10538 continue; 10539 10540 err = libbpf_ensure_mem((void **)&syms, &cap, sizeof(*syms), cnt + 1); 10541 if (err) 10542 goto cleanup; 10543 10544 name = strdup(sym_name); 10545 if (!name) { 10546 err = -errno; 10547 goto cleanup; 10548 } 10549 10550 syms[cnt++] = name; 10551 } 10552 10553 /* no entries found, bail out */ 10554 if (cnt == 0) { 10555 err = -ENOENT; 10556 goto cleanup; 10557 } 10558 10559 /* sort available functions */ 10560 qsort(syms, cnt, sizeof(*syms), avail_func_cmp); 10561 10562 data.syms = syms; 10563 data.res = res; 10564 data.cnt = cnt; 10565 libbpf_kallsyms_parse(avail_kallsyms_cb, &data); 10566 10567 if (res->cnt == 0) 10568 err = -ENOENT; 10569 10570 cleanup: 10571 for (i = 0; i < cnt; i++) 10572 free((char *)syms[i]); 10573 free(syms); 10574 10575 fclose(f); 10576 return err; 10577 } 10578 10579 static bool has_available_filter_functions_addrs(void) 10580 { 10581 return access(tracefs_available_filter_functions_addrs(), R_OK) != -1; 10582 } 10583 10584 static int libbpf_available_kprobes_parse(struct kprobe_multi_resolve *res) 10585 { 10586 const char *available_path = tracefs_available_filter_functions_addrs(); 10587 char sym_name[500]; 10588 FILE *f; 10589 int ret, err = 0; 10590 unsigned long long sym_addr; 10591 10592 f = fopen(available_path, "re"); 10593 if (!f) { 10594 err = -errno; 10595 pr_warn("failed to open %s: %d\n", available_path, err); 10596 return err; 10597 } 10598 10599 while (true) { 10600 ret = fscanf(f, "%llx %499s%*[^\n]\n", &sym_addr, sym_name); 10601 if (ret == EOF && feof(f)) 10602 break; 10603 10604 if (ret != 2) { 10605 pr_warn("failed to parse available_filter_functions_addrs entry: %d\n", 10606 ret); 10607 err = -EINVAL; 10608 goto cleanup; 10609 } 10610 10611 if (!glob_match(sym_name, res->pattern)) 10612 continue; 10613 10614 err = libbpf_ensure_mem((void **)&res->addrs, &res->cap, 10615 sizeof(*res->addrs), res->cnt + 1); 10616 if (err) 10617 goto cleanup; 10618 10619 res->addrs[res->cnt++] = (unsigned long)sym_addr; 10620 } 10621 10622 if (res->cnt == 0) 10623 err = -ENOENT; 10624 10625 cleanup: 10626 fclose(f); 10627 return err; 10628 } 10629 10630 struct bpf_link * 10631 bpf_program__attach_kprobe_multi_opts(const struct bpf_program *prog, 10632 const char *pattern, 10633 const struct bpf_kprobe_multi_opts *opts) 10634 { 10635 LIBBPF_OPTS(bpf_link_create_opts, lopts); 10636 struct kprobe_multi_resolve res = { 10637 .pattern = pattern, 10638 }; 10639 struct bpf_link *link = NULL; 10640 char errmsg[STRERR_BUFSIZE]; 10641 const unsigned long *addrs; 10642 int err, link_fd, prog_fd; 10643 const __u64 *cookies; 10644 const char **syms; 10645 bool retprobe; 10646 size_t cnt; 10647 10648 if (!OPTS_VALID(opts, bpf_kprobe_multi_opts)) 10649 return libbpf_err_ptr(-EINVAL); 10650 10651 syms = OPTS_GET(opts, syms, false); 10652 addrs = OPTS_GET(opts, addrs, false); 10653 cnt = OPTS_GET(opts, cnt, false); 10654 cookies = OPTS_GET(opts, cookies, false); 10655 10656 if (!pattern && !addrs && !syms) 10657 return libbpf_err_ptr(-EINVAL); 10658 if (pattern && (addrs || syms || cookies || cnt)) 10659 return libbpf_err_ptr(-EINVAL); 10660 if (!pattern && !cnt) 10661 return libbpf_err_ptr(-EINVAL); 10662 if (addrs && syms) 10663 return libbpf_err_ptr(-EINVAL); 10664 10665 if (pattern) { 10666 if (has_available_filter_functions_addrs()) 10667 err = libbpf_available_kprobes_parse(&res); 10668 else 10669 err = libbpf_available_kallsyms_parse(&res); 10670 if (err) 10671 goto error; 10672 addrs = res.addrs; 10673 cnt = res.cnt; 10674 } 10675 10676 retprobe = OPTS_GET(opts, retprobe, false); 10677 10678 lopts.kprobe_multi.syms = syms; 10679 lopts.kprobe_multi.addrs = addrs; 10680 lopts.kprobe_multi.cookies = cookies; 10681 lopts.kprobe_multi.cnt = cnt; 10682 lopts.kprobe_multi.flags = retprobe ? BPF_F_KPROBE_MULTI_RETURN : 0; 10683 10684 link = calloc(1, sizeof(*link)); 10685 if (!link) { 10686 err = -ENOMEM; 10687 goto error; 10688 } 10689 link->detach = &bpf_link__detach_fd; 10690 10691 prog_fd = bpf_program__fd(prog); 10692 link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_KPROBE_MULTI, &lopts); 10693 if (link_fd < 0) { 10694 err = -errno; 10695 pr_warn("prog '%s': failed to attach: %s\n", 10696 prog->name, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); 10697 goto error; 10698 } 10699 link->fd = link_fd; 10700 free(res.addrs); 10701 return link; 10702 10703 error: 10704 free(link); 10705 free(res.addrs); 10706 return libbpf_err_ptr(err); 10707 } 10708 10709 static int attach_kprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link) 10710 { 10711 DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts); 10712 unsigned long offset = 0; 10713 const char *func_name; 10714 char *func; 10715 int n; 10716 10717 *link = NULL; 10718 10719 /* no auto-attach for SEC("kprobe") and SEC("kretprobe") */ 10720 if (strcmp(prog->sec_name, "kprobe") == 0 || strcmp(prog->sec_name, "kretprobe") == 0) 10721 return 0; 10722 10723 opts.retprobe = str_has_pfx(prog->sec_name, "kretprobe/"); 10724 if (opts.retprobe) 10725 func_name = prog->sec_name + sizeof("kretprobe/") - 1; 10726 else 10727 func_name = prog->sec_name + sizeof("kprobe/") - 1; 10728 10729 n = sscanf(func_name, "%m[a-zA-Z0-9_.]+%li", &func, &offset); 10730 if (n < 1) { 10731 pr_warn("kprobe name is invalid: %s\n", func_name); 10732 return -EINVAL; 10733 } 10734 if (opts.retprobe && offset != 0) { 10735 free(func); 10736 pr_warn("kretprobes do not support offset specification\n"); 10737 return -EINVAL; 10738 } 10739 10740 opts.offset = offset; 10741 *link = bpf_program__attach_kprobe_opts(prog, func, &opts); 10742 free(func); 10743 return libbpf_get_error(*link); 10744 } 10745 10746 static int attach_ksyscall(const struct bpf_program *prog, long cookie, struct bpf_link **link) 10747 { 10748 LIBBPF_OPTS(bpf_ksyscall_opts, opts); 10749 const char *syscall_name; 10750 10751 *link = NULL; 10752 10753 /* no auto-attach for SEC("ksyscall") and SEC("kretsyscall") */ 10754 if (strcmp(prog->sec_name, "ksyscall") == 0 || strcmp(prog->sec_name, "kretsyscall") == 0) 10755 return 0; 10756 10757 opts.retprobe = str_has_pfx(prog->sec_name, "kretsyscall/"); 10758 if (opts.retprobe) 10759 syscall_name = prog->sec_name + sizeof("kretsyscall/") - 1; 10760 else 10761 syscall_name = prog->sec_name + sizeof("ksyscall/") - 1; 10762 10763 *link = bpf_program__attach_ksyscall(prog, syscall_name, &opts); 10764 return *link ? 0 : -errno; 10765 } 10766 10767 static int attach_kprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link) 10768 { 10769 LIBBPF_OPTS(bpf_kprobe_multi_opts, opts); 10770 const char *spec; 10771 char *pattern; 10772 int n; 10773 10774 *link = NULL; 10775 10776 /* no auto-attach for SEC("kprobe.multi") and SEC("kretprobe.multi") */ 10777 if (strcmp(prog->sec_name, "kprobe.multi") == 0 || 10778 strcmp(prog->sec_name, "kretprobe.multi") == 0) 10779 return 0; 10780 10781 opts.retprobe = str_has_pfx(prog->sec_name, "kretprobe.multi/"); 10782 if (opts.retprobe) 10783 spec = prog->sec_name + sizeof("kretprobe.multi/") - 1; 10784 else 10785 spec = prog->sec_name + sizeof("kprobe.multi/") - 1; 10786 10787 n = sscanf(spec, "%m[a-zA-Z0-9_.*?]", &pattern); 10788 if (n < 1) { 10789 pr_warn("kprobe multi pattern is invalid: %s\n", pattern); 10790 return -EINVAL; 10791 } 10792 10793 *link = bpf_program__attach_kprobe_multi_opts(prog, pattern, &opts); 10794 free(pattern); 10795 return libbpf_get_error(*link); 10796 } 10797 10798 static int attach_uprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link) 10799 { 10800 char *probe_type = NULL, *binary_path = NULL, *func_name = NULL; 10801 LIBBPF_OPTS(bpf_uprobe_multi_opts, opts); 10802 int n, ret = -EINVAL; 10803 10804 *link = NULL; 10805 10806 n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%m[^\n]", 10807 &probe_type, &binary_path, &func_name); 10808 switch (n) { 10809 case 1: 10810 /* handle SEC("u[ret]probe") - format is valid, but auto-attach is impossible. */ 10811 ret = 0; 10812 break; 10813 case 3: 10814 opts.retprobe = strcmp(probe_type, "uretprobe.multi") == 0; 10815 *link = bpf_program__attach_uprobe_multi(prog, -1, binary_path, func_name, &opts); 10816 ret = libbpf_get_error(*link); 10817 break; 10818 default: 10819 pr_warn("prog '%s': invalid format of section definition '%s'\n", prog->name, 10820 prog->sec_name); 10821 break; 10822 } 10823 free(probe_type); 10824 free(binary_path); 10825 free(func_name); 10826 return ret; 10827 } 10828 10829 static void gen_uprobe_legacy_event_name(char *buf, size_t buf_sz, 10830 const char *binary_path, uint64_t offset) 10831 { 10832 int i; 10833 10834 snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx", getpid(), binary_path, (size_t)offset); 10835 10836 /* sanitize binary_path in the probe name */ 10837 for (i = 0; buf[i]; i++) { 10838 if (!isalnum(buf[i])) 10839 buf[i] = '_'; 10840 } 10841 } 10842 10843 static inline int add_uprobe_event_legacy(const char *probe_name, bool retprobe, 10844 const char *binary_path, size_t offset) 10845 { 10846 return append_to_file(tracefs_uprobe_events(), "%c:%s/%s %s:0x%zx", 10847 retprobe ? 'r' : 'p', 10848 retprobe ? "uretprobes" : "uprobes", 10849 probe_name, binary_path, offset); 10850 } 10851 10852 static inline int remove_uprobe_event_legacy(const char *probe_name, bool retprobe) 10853 { 10854 return append_to_file(tracefs_uprobe_events(), "-:%s/%s", 10855 retprobe ? "uretprobes" : "uprobes", probe_name); 10856 } 10857 10858 static int determine_uprobe_perf_type_legacy(const char *probe_name, bool retprobe) 10859 { 10860 char file[512]; 10861 10862 snprintf(file, sizeof(file), "%s/events/%s/%s/id", 10863 tracefs_path(), retprobe ? "uretprobes" : "uprobes", probe_name); 10864 10865 return parse_uint_from_file(file, "%d\n"); 10866 } 10867 10868 static int perf_event_uprobe_open_legacy(const char *probe_name, bool retprobe, 10869 const char *binary_path, size_t offset, int pid) 10870 { 10871 const size_t attr_sz = sizeof(struct perf_event_attr); 10872 struct perf_event_attr attr; 10873 int type, pfd, err; 10874 10875 err = add_uprobe_event_legacy(probe_name, retprobe, binary_path, offset); 10876 if (err < 0) { 10877 pr_warn("failed to add legacy uprobe event for %s:0x%zx: %d\n", 10878 binary_path, (size_t)offset, err); 10879 return err; 10880 } 10881 type = determine_uprobe_perf_type_legacy(probe_name, retprobe); 10882 if (type < 0) { 10883 err = type; 10884 pr_warn("failed to determine legacy uprobe event id for %s:0x%zx: %d\n", 10885 binary_path, offset, err); 10886 goto err_clean_legacy; 10887 } 10888 10889 memset(&attr, 0, attr_sz); 10890 attr.size = attr_sz; 10891 attr.config = type; 10892 attr.type = PERF_TYPE_TRACEPOINT; 10893 10894 pfd = syscall(__NR_perf_event_open, &attr, 10895 pid < 0 ? -1 : pid, /* pid */ 10896 pid == -1 ? 0 : -1, /* cpu */ 10897 -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC); 10898 if (pfd < 0) { 10899 err = -errno; 10900 pr_warn("legacy uprobe perf_event_open() failed: %d\n", err); 10901 goto err_clean_legacy; 10902 } 10903 return pfd; 10904 10905 err_clean_legacy: 10906 /* Clear the newly added legacy uprobe_event */ 10907 remove_uprobe_event_legacy(probe_name, retprobe); 10908 return err; 10909 } 10910 10911 /* Find offset of function name in archive specified by path. Currently 10912 * supported are .zip files that do not compress their contents, as used on 10913 * Android in the form of APKs, for example. "file_name" is the name of the ELF 10914 * file inside the archive. "func_name" matches symbol name or name@@LIB for 10915 * library functions. 10916 * 10917 * An overview of the APK format specifically provided here: 10918 * https://en.wikipedia.org/w/index.php?title=Apk_(file_format)&oldid=1139099120#Package_contents 10919 */ 10920 static long elf_find_func_offset_from_archive(const char *archive_path, const char *file_name, 10921 const char *func_name) 10922 { 10923 struct zip_archive *archive; 10924 struct zip_entry entry; 10925 long ret; 10926 Elf *elf; 10927 10928 archive = zip_archive_open(archive_path); 10929 if (IS_ERR(archive)) { 10930 ret = PTR_ERR(archive); 10931 pr_warn("zip: failed to open %s: %ld\n", archive_path, ret); 10932 return ret; 10933 } 10934 10935 ret = zip_archive_find_entry(archive, file_name, &entry); 10936 if (ret) { 10937 pr_warn("zip: could not find archive member %s in %s: %ld\n", file_name, 10938 archive_path, ret); 10939 goto out; 10940 } 10941 pr_debug("zip: found entry for %s in %s at 0x%lx\n", file_name, archive_path, 10942 (unsigned long)entry.data_offset); 10943 10944 if (entry.compression) { 10945 pr_warn("zip: entry %s of %s is compressed and cannot be handled\n", file_name, 10946 archive_path); 10947 ret = -LIBBPF_ERRNO__FORMAT; 10948 goto out; 10949 } 10950 10951 elf = elf_memory((void *)entry.data, entry.data_length); 10952 if (!elf) { 10953 pr_warn("elf: could not read elf file %s from %s: %s\n", file_name, archive_path, 10954 elf_errmsg(-1)); 10955 ret = -LIBBPF_ERRNO__LIBELF; 10956 goto out; 10957 } 10958 10959 ret = elf_find_func_offset(elf, file_name, func_name); 10960 if (ret > 0) { 10961 pr_debug("elf: symbol address match for %s of %s in %s: 0x%x + 0x%lx = 0x%lx\n", 10962 func_name, file_name, archive_path, entry.data_offset, ret, 10963 ret + entry.data_offset); 10964 ret += entry.data_offset; 10965 } 10966 elf_end(elf); 10967 10968 out: 10969 zip_archive_close(archive); 10970 return ret; 10971 } 10972 10973 static const char *arch_specific_lib_paths(void) 10974 { 10975 /* 10976 * Based on https://packages.debian.org/sid/libc6. 10977 * 10978 * Assume that the traced program is built for the same architecture 10979 * as libbpf, which should cover the vast majority of cases. 10980 */ 10981 #if defined(__x86_64__) 10982 return "/lib/x86_64-linux-gnu"; 10983 #elif defined(__i386__) 10984 return "/lib/i386-linux-gnu"; 10985 #elif defined(__s390x__) 10986 return "/lib/s390x-linux-gnu"; 10987 #elif defined(__s390__) 10988 return "/lib/s390-linux-gnu"; 10989 #elif defined(__arm__) && defined(__SOFTFP__) 10990 return "/lib/arm-linux-gnueabi"; 10991 #elif defined(__arm__) && !defined(__SOFTFP__) 10992 return "/lib/arm-linux-gnueabihf"; 10993 #elif defined(__aarch64__) 10994 return "/lib/aarch64-linux-gnu"; 10995 #elif defined(__mips__) && defined(__MIPSEL__) && _MIPS_SZLONG == 64 10996 return "/lib/mips64el-linux-gnuabi64"; 10997 #elif defined(__mips__) && defined(__MIPSEL__) && _MIPS_SZLONG == 32 10998 return "/lib/mipsel-linux-gnu"; 10999 #elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 11000 return "/lib/powerpc64le-linux-gnu"; 11001 #elif defined(__sparc__) && defined(__arch64__) 11002 return "/lib/sparc64-linux-gnu"; 11003 #elif defined(__riscv) && __riscv_xlen == 64 11004 return "/lib/riscv64-linux-gnu"; 11005 #else 11006 return NULL; 11007 #endif 11008 } 11009 11010 /* Get full path to program/shared library. */ 11011 static int resolve_full_path(const char *file, char *result, size_t result_sz) 11012 { 11013 const char *search_paths[3] = {}; 11014 int i, perm; 11015 11016 if (str_has_sfx(file, ".so") || strstr(file, ".so.")) { 11017 search_paths[0] = getenv("LD_LIBRARY_PATH"); 11018 search_paths[1] = "/usr/lib64:/usr/lib"; 11019 search_paths[2] = arch_specific_lib_paths(); 11020 perm = R_OK; 11021 } else { 11022 search_paths[0] = getenv("PATH"); 11023 search_paths[1] = "/usr/bin:/usr/sbin"; 11024 perm = R_OK | X_OK; 11025 } 11026 11027 for (i = 0; i < ARRAY_SIZE(search_paths); i++) { 11028 const char *s; 11029 11030 if (!search_paths[i]) 11031 continue; 11032 for (s = search_paths[i]; s != NULL; s = strchr(s, ':')) { 11033 char *next_path; 11034 int seg_len; 11035 11036 if (s[0] == ':') 11037 s++; 11038 next_path = strchr(s, ':'); 11039 seg_len = next_path ? next_path - s : strlen(s); 11040 if (!seg_len) 11041 continue; 11042 snprintf(result, result_sz, "%.*s/%s", seg_len, s, file); 11043 /* ensure it has required permissions */ 11044 if (faccessat(AT_FDCWD, result, perm, AT_EACCESS) < 0) 11045 continue; 11046 pr_debug("resolved '%s' to '%s'\n", file, result); 11047 return 0; 11048 } 11049 } 11050 return -ENOENT; 11051 } 11052 11053 struct bpf_link * 11054 bpf_program__attach_uprobe_multi(const struct bpf_program *prog, 11055 pid_t pid, 11056 const char *path, 11057 const char *func_pattern, 11058 const struct bpf_uprobe_multi_opts *opts) 11059 { 11060 const unsigned long *ref_ctr_offsets = NULL, *offsets = NULL; 11061 LIBBPF_OPTS(bpf_link_create_opts, lopts); 11062 unsigned long *resolved_offsets = NULL; 11063 int err = 0, link_fd, prog_fd; 11064 struct bpf_link *link = NULL; 11065 char errmsg[STRERR_BUFSIZE]; 11066 char full_path[PATH_MAX]; 11067 const __u64 *cookies; 11068 const char **syms; 11069 size_t cnt; 11070 11071 if (!OPTS_VALID(opts, bpf_uprobe_multi_opts)) 11072 return libbpf_err_ptr(-EINVAL); 11073 11074 syms = OPTS_GET(opts, syms, NULL); 11075 offsets = OPTS_GET(opts, offsets, NULL); 11076 ref_ctr_offsets = OPTS_GET(opts, ref_ctr_offsets, NULL); 11077 cookies = OPTS_GET(opts, cookies, NULL); 11078 cnt = OPTS_GET(opts, cnt, 0); 11079 11080 /* 11081 * User can specify 2 mutually exclusive set of inputs: 11082 * 11083 * 1) use only path/func_pattern/pid arguments 11084 * 11085 * 2) use path/pid with allowed combinations of: 11086 * syms/offsets/ref_ctr_offsets/cookies/cnt 11087 * 11088 * - syms and offsets are mutually exclusive 11089 * - ref_ctr_offsets and cookies are optional 11090 * 11091 * Any other usage results in error. 11092 */ 11093 11094 if (!path) 11095 return libbpf_err_ptr(-EINVAL); 11096 if (!func_pattern && cnt == 0) 11097 return libbpf_err_ptr(-EINVAL); 11098 11099 if (func_pattern) { 11100 if (syms || offsets || ref_ctr_offsets || cookies || cnt) 11101 return libbpf_err_ptr(-EINVAL); 11102 } else { 11103 if (!!syms == !!offsets) 11104 return libbpf_err_ptr(-EINVAL); 11105 } 11106 11107 if (func_pattern) { 11108 if (!strchr(path, '/')) { 11109 err = resolve_full_path(path, full_path, sizeof(full_path)); 11110 if (err) { 11111 pr_warn("prog '%s': failed to resolve full path for '%s': %d\n", 11112 prog->name, path, err); 11113 return libbpf_err_ptr(err); 11114 } 11115 path = full_path; 11116 } 11117 11118 err = elf_resolve_pattern_offsets(path, func_pattern, 11119 &resolved_offsets, &cnt); 11120 if (err < 0) 11121 return libbpf_err_ptr(err); 11122 offsets = resolved_offsets; 11123 } else if (syms) { 11124 err = elf_resolve_syms_offsets(path, cnt, syms, &resolved_offsets, STT_FUNC); 11125 if (err < 0) 11126 return libbpf_err_ptr(err); 11127 offsets = resolved_offsets; 11128 } 11129 11130 lopts.uprobe_multi.path = path; 11131 lopts.uprobe_multi.offsets = offsets; 11132 lopts.uprobe_multi.ref_ctr_offsets = ref_ctr_offsets; 11133 lopts.uprobe_multi.cookies = cookies; 11134 lopts.uprobe_multi.cnt = cnt; 11135 lopts.uprobe_multi.flags = OPTS_GET(opts, retprobe, false) ? BPF_F_UPROBE_MULTI_RETURN : 0; 11136 11137 if (pid == 0) 11138 pid = getpid(); 11139 if (pid > 0) 11140 lopts.uprobe_multi.pid = pid; 11141 11142 link = calloc(1, sizeof(*link)); 11143 if (!link) { 11144 err = -ENOMEM; 11145 goto error; 11146 } 11147 link->detach = &bpf_link__detach_fd; 11148 11149 prog_fd = bpf_program__fd(prog); 11150 link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &lopts); 11151 if (link_fd < 0) { 11152 err = -errno; 11153 pr_warn("prog '%s': failed to attach multi-uprobe: %s\n", 11154 prog->name, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); 11155 goto error; 11156 } 11157 link->fd = link_fd; 11158 free(resolved_offsets); 11159 return link; 11160 11161 error: 11162 free(resolved_offsets); 11163 free(link); 11164 return libbpf_err_ptr(err); 11165 } 11166 11167 LIBBPF_API struct bpf_link * 11168 bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, 11169 const char *binary_path, size_t func_offset, 11170 const struct bpf_uprobe_opts *opts) 11171 { 11172 const char *archive_path = NULL, *archive_sep = NULL; 11173 char errmsg[STRERR_BUFSIZE], *legacy_probe = NULL; 11174 DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts); 11175 enum probe_attach_mode attach_mode; 11176 char full_path[PATH_MAX]; 11177 struct bpf_link *link; 11178 size_t ref_ctr_off; 11179 int pfd, err; 11180 bool retprobe, legacy; 11181 const char *func_name; 11182 11183 if (!OPTS_VALID(opts, bpf_uprobe_opts)) 11184 return libbpf_err_ptr(-EINVAL); 11185 11186 attach_mode = OPTS_GET(opts, attach_mode, PROBE_ATTACH_MODE_DEFAULT); 11187 retprobe = OPTS_GET(opts, retprobe, false); 11188 ref_ctr_off = OPTS_GET(opts, ref_ctr_offset, 0); 11189 pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0); 11190 11191 if (!binary_path) 11192 return libbpf_err_ptr(-EINVAL); 11193 11194 /* Check if "binary_path" refers to an archive. */ 11195 archive_sep = strstr(binary_path, "!/"); 11196 if (archive_sep) { 11197 full_path[0] = '\0'; 11198 libbpf_strlcpy(full_path, binary_path, 11199 min(sizeof(full_path), (size_t)(archive_sep - binary_path + 1))); 11200 archive_path = full_path; 11201 binary_path = archive_sep + 2; 11202 } else if (!strchr(binary_path, '/')) { 11203 err = resolve_full_path(binary_path, full_path, sizeof(full_path)); 11204 if (err) { 11205 pr_warn("prog '%s': failed to resolve full path for '%s': %d\n", 11206 prog->name, binary_path, err); 11207 return libbpf_err_ptr(err); 11208 } 11209 binary_path = full_path; 11210 } 11211 func_name = OPTS_GET(opts, func_name, NULL); 11212 if (func_name) { 11213 long sym_off; 11214 11215 if (archive_path) { 11216 sym_off = elf_find_func_offset_from_archive(archive_path, binary_path, 11217 func_name); 11218 binary_path = archive_path; 11219 } else { 11220 sym_off = elf_find_func_offset_from_file(binary_path, func_name); 11221 } 11222 if (sym_off < 0) 11223 return libbpf_err_ptr(sym_off); 11224 func_offset += sym_off; 11225 } 11226 11227 legacy = determine_uprobe_perf_type() < 0; 11228 switch (attach_mode) { 11229 case PROBE_ATTACH_MODE_LEGACY: 11230 legacy = true; 11231 pe_opts.force_ioctl_attach = true; 11232 break; 11233 case PROBE_ATTACH_MODE_PERF: 11234 if (legacy) 11235 return libbpf_err_ptr(-ENOTSUP); 11236 pe_opts.force_ioctl_attach = true; 11237 break; 11238 case PROBE_ATTACH_MODE_LINK: 11239 if (legacy || !kernel_supports(prog->obj, FEAT_PERF_LINK)) 11240 return libbpf_err_ptr(-ENOTSUP); 11241 break; 11242 case PROBE_ATTACH_MODE_DEFAULT: 11243 break; 11244 default: 11245 return libbpf_err_ptr(-EINVAL); 11246 } 11247 11248 if (!legacy) { 11249 pfd = perf_event_open_probe(true /* uprobe */, retprobe, binary_path, 11250 func_offset, pid, ref_ctr_off); 11251 } else { 11252 char probe_name[PATH_MAX + 64]; 11253 11254 if (ref_ctr_off) 11255 return libbpf_err_ptr(-EINVAL); 11256 11257 gen_uprobe_legacy_event_name(probe_name, sizeof(probe_name), 11258 binary_path, func_offset); 11259 11260 legacy_probe = strdup(probe_name); 11261 if (!legacy_probe) 11262 return libbpf_err_ptr(-ENOMEM); 11263 11264 pfd = perf_event_uprobe_open_legacy(legacy_probe, retprobe, 11265 binary_path, func_offset, pid); 11266 } 11267 if (pfd < 0) { 11268 err = -errno; 11269 pr_warn("prog '%s': failed to create %s '%s:0x%zx' perf event: %s\n", 11270 prog->name, retprobe ? "uretprobe" : "uprobe", 11271 binary_path, func_offset, 11272 libbpf_strerror_r(err, errmsg, sizeof(errmsg))); 11273 goto err_out; 11274 } 11275 11276 link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts); 11277 err = libbpf_get_error(link); 11278 if (err) { 11279 close(pfd); 11280 pr_warn("prog '%s': failed to attach to %s '%s:0x%zx': %s\n", 11281 prog->name, retprobe ? "uretprobe" : "uprobe", 11282 binary_path, func_offset, 11283 libbpf_strerror_r(err, errmsg, sizeof(errmsg))); 11284 goto err_clean_legacy; 11285 } 11286 if (legacy) { 11287 struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link); 11288 11289 perf_link->legacy_probe_name = legacy_probe; 11290 perf_link->legacy_is_kprobe = false; 11291 perf_link->legacy_is_retprobe = retprobe; 11292 } 11293 return link; 11294 11295 err_clean_legacy: 11296 if (legacy) 11297 remove_uprobe_event_legacy(legacy_probe, retprobe); 11298 err_out: 11299 free(legacy_probe); 11300 return libbpf_err_ptr(err); 11301 } 11302 11303 /* Format of u[ret]probe section definition supporting auto-attach: 11304 * u[ret]probe/binary:function[+offset] 11305 * 11306 * binary can be an absolute/relative path or a filename; the latter is resolved to a 11307 * full binary path via bpf_program__attach_uprobe_opts. 11308 * 11309 * Specifying uprobe+ ensures we carry out strict matching; either "uprobe" must be 11310 * specified (and auto-attach is not possible) or the above format is specified for 11311 * auto-attach. 11312 */ 11313 static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link) 11314 { 11315 DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts); 11316 char *probe_type = NULL, *binary_path = NULL, *func_name = NULL, *func_off; 11317 int n, c, ret = -EINVAL; 11318 long offset = 0; 11319 11320 *link = NULL; 11321 11322 n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%m[^\n]", 11323 &probe_type, &binary_path, &func_name); 11324 switch (n) { 11325 case 1: 11326 /* handle SEC("u[ret]probe") - format is valid, but auto-attach is impossible. */ 11327 ret = 0; 11328 break; 11329 case 2: 11330 pr_warn("prog '%s': section '%s' missing ':function[+offset]' specification\n", 11331 prog->name, prog->sec_name); 11332 break; 11333 case 3: 11334 /* check if user specifies `+offset`, if yes, this should be 11335 * the last part of the string, make sure sscanf read to EOL 11336 */ 11337 func_off = strrchr(func_name, '+'); 11338 if (func_off) { 11339 n = sscanf(func_off, "+%li%n", &offset, &c); 11340 if (n == 1 && *(func_off + c) == '\0') 11341 func_off[0] = '\0'; 11342 else 11343 offset = 0; 11344 } 11345 opts.retprobe = strcmp(probe_type, "uretprobe") == 0 || 11346 strcmp(probe_type, "uretprobe.s") == 0; 11347 if (opts.retprobe && offset != 0) { 11348 pr_warn("prog '%s': uretprobes do not support offset specification\n", 11349 prog->name); 11350 break; 11351 } 11352 opts.func_name = func_name; 11353 *link = bpf_program__attach_uprobe_opts(prog, -1, binary_path, offset, &opts); 11354 ret = libbpf_get_error(*link); 11355 break; 11356 default: 11357 pr_warn("prog '%s': invalid format of section definition '%s'\n", prog->name, 11358 prog->sec_name); 11359 break; 11360 } 11361 free(probe_type); 11362 free(binary_path); 11363 free(func_name); 11364 11365 return ret; 11366 } 11367 11368 struct bpf_link *bpf_program__attach_uprobe(const struct bpf_program *prog, 11369 bool retprobe, pid_t pid, 11370 const char *binary_path, 11371 size_t func_offset) 11372 { 11373 DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts, .retprobe = retprobe); 11374 11375 return bpf_program__attach_uprobe_opts(prog, pid, binary_path, func_offset, &opts); 11376 } 11377 11378 struct bpf_link *bpf_program__attach_usdt(const struct bpf_program *prog, 11379 pid_t pid, const char *binary_path, 11380 const char *usdt_provider, const char *usdt_name, 11381 const struct bpf_usdt_opts *opts) 11382 { 11383 char resolved_path[512]; 11384 struct bpf_object *obj = prog->obj; 11385 struct bpf_link *link; 11386 __u64 usdt_cookie; 11387 int err; 11388 11389 if (!OPTS_VALID(opts, bpf_uprobe_opts)) 11390 return libbpf_err_ptr(-EINVAL); 11391 11392 if (bpf_program__fd(prog) < 0) { 11393 pr_warn("prog '%s': can't attach BPF program w/o FD (did you load it?)\n", 11394 prog->name); 11395 return libbpf_err_ptr(-EINVAL); 11396 } 11397 11398 if (!binary_path) 11399 return libbpf_err_ptr(-EINVAL); 11400 11401 if (!strchr(binary_path, '/')) { 11402 err = resolve_full_path(binary_path, resolved_path, sizeof(resolved_path)); 11403 if (err) { 11404 pr_warn("prog '%s': failed to resolve full path for '%s': %d\n", 11405 prog->name, binary_path, err); 11406 return libbpf_err_ptr(err); 11407 } 11408 binary_path = resolved_path; 11409 } 11410 11411 /* USDT manager is instantiated lazily on first USDT attach. It will 11412 * be destroyed together with BPF object in bpf_object__close(). 11413 */ 11414 if (IS_ERR(obj->usdt_man)) 11415 return libbpf_ptr(obj->usdt_man); 11416 if (!obj->usdt_man) { 11417 obj->usdt_man = usdt_manager_new(obj); 11418 if (IS_ERR(obj->usdt_man)) 11419 return libbpf_ptr(obj->usdt_man); 11420 } 11421 11422 usdt_cookie = OPTS_GET(opts, usdt_cookie, 0); 11423 link = usdt_manager_attach_usdt(obj->usdt_man, prog, pid, binary_path, 11424 usdt_provider, usdt_name, usdt_cookie); 11425 err = libbpf_get_error(link); 11426 if (err) 11427 return libbpf_err_ptr(err); 11428 return link; 11429 } 11430 11431 static int attach_usdt(const struct bpf_program *prog, long cookie, struct bpf_link **link) 11432 { 11433 char *path = NULL, *provider = NULL, *name = NULL; 11434 const char *sec_name; 11435 int n, err; 11436 11437 sec_name = bpf_program__section_name(prog); 11438 if (strcmp(sec_name, "usdt") == 0) { 11439 /* no auto-attach for just SEC("usdt") */ 11440 *link = NULL; 11441 return 0; 11442 } 11443 11444 n = sscanf(sec_name, "usdt/%m[^:]:%m[^:]:%m[^:]", &path, &provider, &name); 11445 if (n != 3) { 11446 pr_warn("invalid section '%s', expected SEC(\"usdt/<path>:<provider>:<name>\")\n", 11447 sec_name); 11448 err = -EINVAL; 11449 } else { 11450 *link = bpf_program__attach_usdt(prog, -1 /* any process */, path, 11451 provider, name, NULL); 11452 err = libbpf_get_error(*link); 11453 } 11454 free(path); 11455 free(provider); 11456 free(name); 11457 return err; 11458 } 11459 11460 static int determine_tracepoint_id(const char *tp_category, 11461 const char *tp_name) 11462 { 11463 char file[PATH_MAX]; 11464 int ret; 11465 11466 ret = snprintf(file, sizeof(file), "%s/events/%s/%s/id", 11467 tracefs_path(), tp_category, tp_name); 11468 if (ret < 0) 11469 return -errno; 11470 if (ret >= sizeof(file)) { 11471 pr_debug("tracepoint %s/%s path is too long\n", 11472 tp_category, tp_name); 11473 return -E2BIG; 11474 } 11475 return parse_uint_from_file(file, "%d\n"); 11476 } 11477 11478 static int perf_event_open_tracepoint(const char *tp_category, 11479 const char *tp_name) 11480 { 11481 const size_t attr_sz = sizeof(struct perf_event_attr); 11482 struct perf_event_attr attr; 11483 char errmsg[STRERR_BUFSIZE]; 11484 int tp_id, pfd, err; 11485 11486 tp_id = determine_tracepoint_id(tp_category, tp_name); 11487 if (tp_id < 0) { 11488 pr_warn("failed to determine tracepoint '%s/%s' perf event ID: %s\n", 11489 tp_category, tp_name, 11490 libbpf_strerror_r(tp_id, errmsg, sizeof(errmsg))); 11491 return tp_id; 11492 } 11493 11494 memset(&attr, 0, attr_sz); 11495 attr.type = PERF_TYPE_TRACEPOINT; 11496 attr.size = attr_sz; 11497 attr.config = tp_id; 11498 11499 pfd = syscall(__NR_perf_event_open, &attr, -1 /* pid */, 0 /* cpu */, 11500 -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC); 11501 if (pfd < 0) { 11502 err = -errno; 11503 pr_warn("tracepoint '%s/%s' perf_event_open() failed: %s\n", 11504 tp_category, tp_name, 11505 libbpf_strerror_r(err, errmsg, sizeof(errmsg))); 11506 return err; 11507 } 11508 return pfd; 11509 } 11510 11511 struct bpf_link *bpf_program__attach_tracepoint_opts(const struct bpf_program *prog, 11512 const char *tp_category, 11513 const char *tp_name, 11514 const struct bpf_tracepoint_opts *opts) 11515 { 11516 DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts); 11517 char errmsg[STRERR_BUFSIZE]; 11518 struct bpf_link *link; 11519 int pfd, err; 11520 11521 if (!OPTS_VALID(opts, bpf_tracepoint_opts)) 11522 return libbpf_err_ptr(-EINVAL); 11523 11524 pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0); 11525 11526 pfd = perf_event_open_tracepoint(tp_category, tp_name); 11527 if (pfd < 0) { 11528 pr_warn("prog '%s': failed to create tracepoint '%s/%s' perf event: %s\n", 11529 prog->name, tp_category, tp_name, 11530 libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); 11531 return libbpf_err_ptr(pfd); 11532 } 11533 link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts); 11534 err = libbpf_get_error(link); 11535 if (err) { 11536 close(pfd); 11537 pr_warn("prog '%s': failed to attach to tracepoint '%s/%s': %s\n", 11538 prog->name, tp_category, tp_name, 11539 libbpf_strerror_r(err, errmsg, sizeof(errmsg))); 11540 return libbpf_err_ptr(err); 11541 } 11542 return link; 11543 } 11544 11545 struct bpf_link *bpf_program__attach_tracepoint(const struct bpf_program *prog, 11546 const char *tp_category, 11547 const char *tp_name) 11548 { 11549 return bpf_program__attach_tracepoint_opts(prog, tp_category, tp_name, NULL); 11550 } 11551 11552 static int attach_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link) 11553 { 11554 char *sec_name, *tp_cat, *tp_name; 11555 11556 *link = NULL; 11557 11558 /* no auto-attach for SEC("tp") or SEC("tracepoint") */ 11559 if (strcmp(prog->sec_name, "tp") == 0 || strcmp(prog->sec_name, "tracepoint") == 0) 11560 return 0; 11561 11562 sec_name = strdup(prog->sec_name); 11563 if (!sec_name) 11564 return -ENOMEM; 11565 11566 /* extract "tp/<category>/<name>" or "tracepoint/<category>/<name>" */ 11567 if (str_has_pfx(prog->sec_name, "tp/")) 11568 tp_cat = sec_name + sizeof("tp/") - 1; 11569 else 11570 tp_cat = sec_name + sizeof("tracepoint/") - 1; 11571 tp_name = strchr(tp_cat, '/'); 11572 if (!tp_name) { 11573 free(sec_name); 11574 return -EINVAL; 11575 } 11576 *tp_name = '\0'; 11577 tp_name++; 11578 11579 *link = bpf_program__attach_tracepoint(prog, tp_cat, tp_name); 11580 free(sec_name); 11581 return libbpf_get_error(*link); 11582 } 11583 11584 struct bpf_link *bpf_program__attach_raw_tracepoint(const struct bpf_program *prog, 11585 const char *tp_name) 11586 { 11587 char errmsg[STRERR_BUFSIZE]; 11588 struct bpf_link *link; 11589 int prog_fd, pfd; 11590 11591 prog_fd = bpf_program__fd(prog); 11592 if (prog_fd < 0) { 11593 pr_warn("prog '%s': can't attach before loaded\n", prog->name); 11594 return libbpf_err_ptr(-EINVAL); 11595 } 11596 11597 link = calloc(1, sizeof(*link)); 11598 if (!link) 11599 return libbpf_err_ptr(-ENOMEM); 11600 link->detach = &bpf_link__detach_fd; 11601 11602 pfd = bpf_raw_tracepoint_open(tp_name, prog_fd); 11603 if (pfd < 0) { 11604 pfd = -errno; 11605 free(link); 11606 pr_warn("prog '%s': failed to attach to raw tracepoint '%s': %s\n", 11607 prog->name, tp_name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); 11608 return libbpf_err_ptr(pfd); 11609 } 11610 link->fd = pfd; 11611 return link; 11612 } 11613 11614 static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link) 11615 { 11616 static const char *const prefixes[] = { 11617 "raw_tp", 11618 "raw_tracepoint", 11619 "raw_tp.w", 11620 "raw_tracepoint.w", 11621 }; 11622 size_t i; 11623 const char *tp_name = NULL; 11624 11625 *link = NULL; 11626 11627 for (i = 0; i < ARRAY_SIZE(prefixes); i++) { 11628 size_t pfx_len; 11629 11630 if (!str_has_pfx(prog->sec_name, prefixes[i])) 11631 continue; 11632 11633 pfx_len = strlen(prefixes[i]); 11634 /* no auto-attach case of, e.g., SEC("raw_tp") */ 11635 if (prog->sec_name[pfx_len] == '\0') 11636 return 0; 11637 11638 if (prog->sec_name[pfx_len] != '/') 11639 continue; 11640 11641 tp_name = prog->sec_name + pfx_len + 1; 11642 break; 11643 } 11644 11645 if (!tp_name) { 11646 pr_warn("prog '%s': invalid section name '%s'\n", 11647 prog->name, prog->sec_name); 11648 return -EINVAL; 11649 } 11650 11651 *link = bpf_program__attach_raw_tracepoint(prog, tp_name); 11652 return libbpf_get_error(*link); 11653 } 11654 11655 /* Common logic for all BPF program types that attach to a btf_id */ 11656 static struct bpf_link *bpf_program__attach_btf_id(const struct bpf_program *prog, 11657 const struct bpf_trace_opts *opts) 11658 { 11659 LIBBPF_OPTS(bpf_link_create_opts, link_opts); 11660 char errmsg[STRERR_BUFSIZE]; 11661 struct bpf_link *link; 11662 int prog_fd, pfd; 11663 11664 if (!OPTS_VALID(opts, bpf_trace_opts)) 11665 return libbpf_err_ptr(-EINVAL); 11666 11667 prog_fd = bpf_program__fd(prog); 11668 if (prog_fd < 0) { 11669 pr_warn("prog '%s': can't attach before loaded\n", prog->name); 11670 return libbpf_err_ptr(-EINVAL); 11671 } 11672 11673 link = calloc(1, sizeof(*link)); 11674 if (!link) 11675 return libbpf_err_ptr(-ENOMEM); 11676 link->detach = &bpf_link__detach_fd; 11677 11678 /* libbpf is smart enough to redirect to BPF_RAW_TRACEPOINT_OPEN on old kernels */ 11679 link_opts.tracing.cookie = OPTS_GET(opts, cookie, 0); 11680 pfd = bpf_link_create(prog_fd, 0, bpf_program__expected_attach_type(prog), &link_opts); 11681 if (pfd < 0) { 11682 pfd = -errno; 11683 free(link); 11684 pr_warn("prog '%s': failed to attach: %s\n", 11685 prog->name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); 11686 return libbpf_err_ptr(pfd); 11687 } 11688 link->fd = pfd; 11689 return link; 11690 } 11691 11692 struct bpf_link *bpf_program__attach_trace(const struct bpf_program *prog) 11693 { 11694 return bpf_program__attach_btf_id(prog, NULL); 11695 } 11696 11697 struct bpf_link *bpf_program__attach_trace_opts(const struct bpf_program *prog, 11698 const struct bpf_trace_opts *opts) 11699 { 11700 return bpf_program__attach_btf_id(prog, opts); 11701 } 11702 11703 struct bpf_link *bpf_program__attach_lsm(const struct bpf_program *prog) 11704 { 11705 return bpf_program__attach_btf_id(prog, NULL); 11706 } 11707 11708 static int attach_trace(const struct bpf_program *prog, long cookie, struct bpf_link **link) 11709 { 11710 *link = bpf_program__attach_trace(prog); 11711 return libbpf_get_error(*link); 11712 } 11713 11714 static int attach_lsm(const struct bpf_program *prog, long cookie, struct bpf_link **link) 11715 { 11716 *link = bpf_program__attach_lsm(prog); 11717 return libbpf_get_error(*link); 11718 } 11719 11720 static struct bpf_link * 11721 bpf_program_attach_fd(const struct bpf_program *prog, 11722 int target_fd, const char *target_name, 11723 const struct bpf_link_create_opts *opts) 11724 { 11725 enum bpf_attach_type attach_type; 11726 char errmsg[STRERR_BUFSIZE]; 11727 struct bpf_link *link; 11728 int prog_fd, link_fd; 11729 11730 prog_fd = bpf_program__fd(prog); 11731 if (prog_fd < 0) { 11732 pr_warn("prog '%s': can't attach before loaded\n", prog->name); 11733 return libbpf_err_ptr(-EINVAL); 11734 } 11735 11736 link = calloc(1, sizeof(*link)); 11737 if (!link) 11738 return libbpf_err_ptr(-ENOMEM); 11739 link->detach = &bpf_link__detach_fd; 11740 11741 attach_type = bpf_program__expected_attach_type(prog); 11742 link_fd = bpf_link_create(prog_fd, target_fd, attach_type, opts); 11743 if (link_fd < 0) { 11744 link_fd = -errno; 11745 free(link); 11746 pr_warn("prog '%s': failed to attach to %s: %s\n", 11747 prog->name, target_name, 11748 libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg))); 11749 return libbpf_err_ptr(link_fd); 11750 } 11751 link->fd = link_fd; 11752 return link; 11753 } 11754 11755 struct bpf_link * 11756 bpf_program__attach_cgroup(const struct bpf_program *prog, int cgroup_fd) 11757 { 11758 return bpf_program_attach_fd(prog, cgroup_fd, "cgroup", NULL); 11759 } 11760 11761 struct bpf_link * 11762 bpf_program__attach_netns(const struct bpf_program *prog, int netns_fd) 11763 { 11764 return bpf_program_attach_fd(prog, netns_fd, "netns", NULL); 11765 } 11766 11767 struct bpf_link *bpf_program__attach_xdp(const struct bpf_program *prog, int ifindex) 11768 { 11769 /* target_fd/target_ifindex use the same field in LINK_CREATE */ 11770 return bpf_program_attach_fd(prog, ifindex, "xdp", NULL); 11771 } 11772 11773 struct bpf_link * 11774 bpf_program__attach_tcx(const struct bpf_program *prog, int ifindex, 11775 const struct bpf_tcx_opts *opts) 11776 { 11777 LIBBPF_OPTS(bpf_link_create_opts, link_create_opts); 11778 __u32 relative_id; 11779 int relative_fd; 11780 11781 if (!OPTS_VALID(opts, bpf_tcx_opts)) 11782 return libbpf_err_ptr(-EINVAL); 11783 11784 relative_id = OPTS_GET(opts, relative_id, 0); 11785 relative_fd = OPTS_GET(opts, relative_fd, 0); 11786 11787 /* validate we don't have unexpected combinations of non-zero fields */ 11788 if (!ifindex) { 11789 pr_warn("prog '%s': target netdevice ifindex cannot be zero\n", 11790 prog->name); 11791 return libbpf_err_ptr(-EINVAL); 11792 } 11793 if (relative_fd && relative_id) { 11794 pr_warn("prog '%s': relative_fd and relative_id cannot be set at the same time\n", 11795 prog->name); 11796 return libbpf_err_ptr(-EINVAL); 11797 } 11798 11799 link_create_opts.tcx.expected_revision = OPTS_GET(opts, expected_revision, 0); 11800 link_create_opts.tcx.relative_fd = relative_fd; 11801 link_create_opts.tcx.relative_id = relative_id; 11802 link_create_opts.flags = OPTS_GET(opts, flags, 0); 11803 11804 /* target_fd/target_ifindex use the same field in LINK_CREATE */ 11805 return bpf_program_attach_fd(prog, ifindex, "tcx", &link_create_opts); 11806 } 11807 11808 struct bpf_link * 11809 bpf_program__attach_netkit(const struct bpf_program *prog, int ifindex, 11810 const struct bpf_netkit_opts *opts) 11811 { 11812 LIBBPF_OPTS(bpf_link_create_opts, link_create_opts); 11813 __u32 relative_id; 11814 int relative_fd; 11815 11816 if (!OPTS_VALID(opts, bpf_netkit_opts)) 11817 return libbpf_err_ptr(-EINVAL); 11818 11819 relative_id = OPTS_GET(opts, relative_id, 0); 11820 relative_fd = OPTS_GET(opts, relative_fd, 0); 11821 11822 /* validate we don't have unexpected combinations of non-zero fields */ 11823 if (!ifindex) { 11824 pr_warn("prog '%s': target netdevice ifindex cannot be zero\n", 11825 prog->name); 11826 return libbpf_err_ptr(-EINVAL); 11827 } 11828 if (relative_fd && relative_id) { 11829 pr_warn("prog '%s': relative_fd and relative_id cannot be set at the same time\n", 11830 prog->name); 11831 return libbpf_err_ptr(-EINVAL); 11832 } 11833 11834 link_create_opts.netkit.expected_revision = OPTS_GET(opts, expected_revision, 0); 11835 link_create_opts.netkit.relative_fd = relative_fd; 11836 link_create_opts.netkit.relative_id = relative_id; 11837 link_create_opts.flags = OPTS_GET(opts, flags, 0); 11838 11839 return bpf_program_attach_fd(prog, ifindex, "netkit", &link_create_opts); 11840 } 11841 11842 struct bpf_link *bpf_program__attach_freplace(const struct bpf_program *prog, 11843 int target_fd, 11844 const char *attach_func_name) 11845 { 11846 int btf_id; 11847 11848 if (!!target_fd != !!attach_func_name) { 11849 pr_warn("prog '%s': supply none or both of target_fd and attach_func_name\n", 11850 prog->name); 11851 return libbpf_err_ptr(-EINVAL); 11852 } 11853 11854 if (prog->type != BPF_PROG_TYPE_EXT) { 11855 pr_warn("prog '%s': only BPF_PROG_TYPE_EXT can attach as freplace", 11856 prog->name); 11857 return libbpf_err_ptr(-EINVAL); 11858 } 11859 11860 if (target_fd) { 11861 LIBBPF_OPTS(bpf_link_create_opts, target_opts); 11862 11863 btf_id = libbpf_find_prog_btf_id(attach_func_name, target_fd); 11864 if (btf_id < 0) 11865 return libbpf_err_ptr(btf_id); 11866 11867 target_opts.target_btf_id = btf_id; 11868 11869 return bpf_program_attach_fd(prog, target_fd, "freplace", 11870 &target_opts); 11871 } else { 11872 /* no target, so use raw_tracepoint_open for compatibility 11873 * with old kernels 11874 */ 11875 return bpf_program__attach_trace(prog); 11876 } 11877 } 11878 11879 struct bpf_link * 11880 bpf_program__attach_iter(const struct bpf_program *prog, 11881 const struct bpf_iter_attach_opts *opts) 11882 { 11883 DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_create_opts); 11884 char errmsg[STRERR_BUFSIZE]; 11885 struct bpf_link *link; 11886 int prog_fd, link_fd; 11887 __u32 target_fd = 0; 11888 11889 if (!OPTS_VALID(opts, bpf_iter_attach_opts)) 11890 return libbpf_err_ptr(-EINVAL); 11891 11892 link_create_opts.iter_info = OPTS_GET(opts, link_info, (void *)0); 11893 link_create_opts.iter_info_len = OPTS_GET(opts, link_info_len, 0); 11894 11895 prog_fd = bpf_program__fd(prog); 11896 if (prog_fd < 0) { 11897 pr_warn("prog '%s': can't attach before loaded\n", prog->name); 11898 return libbpf_err_ptr(-EINVAL); 11899 } 11900 11901 link = calloc(1, sizeof(*link)); 11902 if (!link) 11903 return libbpf_err_ptr(-ENOMEM); 11904 link->detach = &bpf_link__detach_fd; 11905 11906 link_fd = bpf_link_create(prog_fd, target_fd, BPF_TRACE_ITER, 11907 &link_create_opts); 11908 if (link_fd < 0) { 11909 link_fd = -errno; 11910 free(link); 11911 pr_warn("prog '%s': failed to attach to iterator: %s\n", 11912 prog->name, libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg))); 11913 return libbpf_err_ptr(link_fd); 11914 } 11915 link->fd = link_fd; 11916 return link; 11917 } 11918 11919 static int attach_iter(const struct bpf_program *prog, long cookie, struct bpf_link **link) 11920 { 11921 *link = bpf_program__attach_iter(prog, NULL); 11922 return libbpf_get_error(*link); 11923 } 11924 11925 struct bpf_link *bpf_program__attach_netfilter(const struct bpf_program *prog, 11926 const struct bpf_netfilter_opts *opts) 11927 { 11928 LIBBPF_OPTS(bpf_link_create_opts, lopts); 11929 struct bpf_link *link; 11930 int prog_fd, link_fd; 11931 11932 if (!OPTS_VALID(opts, bpf_netfilter_opts)) 11933 return libbpf_err_ptr(-EINVAL); 11934 11935 prog_fd = bpf_program__fd(prog); 11936 if (prog_fd < 0) { 11937 pr_warn("prog '%s': can't attach before loaded\n", prog->name); 11938 return libbpf_err_ptr(-EINVAL); 11939 } 11940 11941 link = calloc(1, sizeof(*link)); 11942 if (!link) 11943 return libbpf_err_ptr(-ENOMEM); 11944 11945 link->detach = &bpf_link__detach_fd; 11946 11947 lopts.netfilter.pf = OPTS_GET(opts, pf, 0); 11948 lopts.netfilter.hooknum = OPTS_GET(opts, hooknum, 0); 11949 lopts.netfilter.priority = OPTS_GET(opts, priority, 0); 11950 lopts.netfilter.flags = OPTS_GET(opts, flags, 0); 11951 11952 link_fd = bpf_link_create(prog_fd, 0, BPF_NETFILTER, &lopts); 11953 if (link_fd < 0) { 11954 char errmsg[STRERR_BUFSIZE]; 11955 11956 link_fd = -errno; 11957 free(link); 11958 pr_warn("prog '%s': failed to attach to netfilter: %s\n", 11959 prog->name, libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg))); 11960 return libbpf_err_ptr(link_fd); 11961 } 11962 link->fd = link_fd; 11963 11964 return link; 11965 } 11966 11967 struct bpf_link *bpf_program__attach(const struct bpf_program *prog) 11968 { 11969 struct bpf_link *link = NULL; 11970 int err; 11971 11972 if (!prog->sec_def || !prog->sec_def->prog_attach_fn) 11973 return libbpf_err_ptr(-EOPNOTSUPP); 11974 11975 err = prog->sec_def->prog_attach_fn(prog, prog->sec_def->cookie, &link); 11976 if (err) 11977 return libbpf_err_ptr(err); 11978 11979 /* When calling bpf_program__attach() explicitly, auto-attach support 11980 * is expected to work, so NULL returned link is considered an error. 11981 * This is different for skeleton's attach, see comment in 11982 * bpf_object__attach_skeleton(). 11983 */ 11984 if (!link) 11985 return libbpf_err_ptr(-EOPNOTSUPP); 11986 11987 return link; 11988 } 11989 11990 struct bpf_link_struct_ops { 11991 struct bpf_link link; 11992 int map_fd; 11993 }; 11994 11995 static int bpf_link__detach_struct_ops(struct bpf_link *link) 11996 { 11997 struct bpf_link_struct_ops *st_link; 11998 __u32 zero = 0; 11999 12000 st_link = container_of(link, struct bpf_link_struct_ops, link); 12001 12002 if (st_link->map_fd < 0) 12003 /* w/o a real link */ 12004 return bpf_map_delete_elem(link->fd, &zero); 12005 12006 return close(link->fd); 12007 } 12008 12009 struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map) 12010 { 12011 struct bpf_link_struct_ops *link; 12012 __u32 zero = 0; 12013 int err, fd; 12014 12015 if (!bpf_map__is_struct_ops(map) || map->fd == -1) 12016 return libbpf_err_ptr(-EINVAL); 12017 12018 link = calloc(1, sizeof(*link)); 12019 if (!link) 12020 return libbpf_err_ptr(-EINVAL); 12021 12022 /* kern_vdata should be prepared during the loading phase. */ 12023 err = bpf_map_update_elem(map->fd, &zero, map->st_ops->kern_vdata, 0); 12024 /* It can be EBUSY if the map has been used to create or 12025 * update a link before. We don't allow updating the value of 12026 * a struct_ops once it is set. That ensures that the value 12027 * never changed. So, it is safe to skip EBUSY. 12028 */ 12029 if (err && (!(map->def.map_flags & BPF_F_LINK) || err != -EBUSY)) { 12030 free(link); 12031 return libbpf_err_ptr(err); 12032 } 12033 12034 link->link.detach = bpf_link__detach_struct_ops; 12035 12036 if (!(map->def.map_flags & BPF_F_LINK)) { 12037 /* w/o a real link */ 12038 link->link.fd = map->fd; 12039 link->map_fd = -1; 12040 return &link->link; 12041 } 12042 12043 fd = bpf_link_create(map->fd, 0, BPF_STRUCT_OPS, NULL); 12044 if (fd < 0) { 12045 free(link); 12046 return libbpf_err_ptr(fd); 12047 } 12048 12049 link->link.fd = fd; 12050 link->map_fd = map->fd; 12051 12052 return &link->link; 12053 } 12054 12055 /* 12056 * Swap the back struct_ops of a link with a new struct_ops map. 12057 */ 12058 int bpf_link__update_map(struct bpf_link *link, const struct bpf_map *map) 12059 { 12060 struct bpf_link_struct_ops *st_ops_link; 12061 __u32 zero = 0; 12062 int err; 12063 12064 if (!bpf_map__is_struct_ops(map) || map->fd < 0) 12065 return -EINVAL; 12066 12067 st_ops_link = container_of(link, struct bpf_link_struct_ops, link); 12068 /* Ensure the type of a link is correct */ 12069 if (st_ops_link->map_fd < 0) 12070 return -EINVAL; 12071 12072 err = bpf_map_update_elem(map->fd, &zero, map->st_ops->kern_vdata, 0); 12073 /* It can be EBUSY if the map has been used to create or 12074 * update a link before. We don't allow updating the value of 12075 * a struct_ops once it is set. That ensures that the value 12076 * never changed. So, it is safe to skip EBUSY. 12077 */ 12078 if (err && err != -EBUSY) 12079 return err; 12080 12081 err = bpf_link_update(link->fd, map->fd, NULL); 12082 if (err < 0) 12083 return err; 12084 12085 st_ops_link->map_fd = map->fd; 12086 12087 return 0; 12088 } 12089 12090 typedef enum bpf_perf_event_ret (*bpf_perf_event_print_t)(struct perf_event_header *hdr, 12091 void *private_data); 12092 12093 static enum bpf_perf_event_ret 12094 perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size, 12095 void **copy_mem, size_t *copy_size, 12096 bpf_perf_event_print_t fn, void *private_data) 12097 { 12098 struct perf_event_mmap_page *header = mmap_mem; 12099 __u64 data_head = ring_buffer_read_head(header); 12100 __u64 data_tail = header->data_tail; 12101 void *base = ((__u8 *)header) + page_size; 12102 int ret = LIBBPF_PERF_EVENT_CONT; 12103 struct perf_event_header *ehdr; 12104 size_t ehdr_size; 12105 12106 while (data_head != data_tail) { 12107 ehdr = base + (data_tail & (mmap_size - 1)); 12108 ehdr_size = ehdr->size; 12109 12110 if (((void *)ehdr) + ehdr_size > base + mmap_size) { 12111 void *copy_start = ehdr; 12112 size_t len_first = base + mmap_size - copy_start; 12113 size_t len_secnd = ehdr_size - len_first; 12114 12115 if (*copy_size < ehdr_size) { 12116 free(*copy_mem); 12117 *copy_mem = malloc(ehdr_size); 12118 if (!*copy_mem) { 12119 *copy_size = 0; 12120 ret = LIBBPF_PERF_EVENT_ERROR; 12121 break; 12122 } 12123 *copy_size = ehdr_size; 12124 } 12125 12126 memcpy(*copy_mem, copy_start, len_first); 12127 memcpy(*copy_mem + len_first, base, len_secnd); 12128 ehdr = *copy_mem; 12129 } 12130 12131 ret = fn(ehdr, private_data); 12132 data_tail += ehdr_size; 12133 if (ret != LIBBPF_PERF_EVENT_CONT) 12134 break; 12135 } 12136 12137 ring_buffer_write_tail(header, data_tail); 12138 return libbpf_err(ret); 12139 } 12140 12141 struct perf_buffer; 12142 12143 struct perf_buffer_params { 12144 struct perf_event_attr *attr; 12145 /* if event_cb is specified, it takes precendence */ 12146 perf_buffer_event_fn event_cb; 12147 /* sample_cb and lost_cb are higher-level common-case callbacks */ 12148 perf_buffer_sample_fn sample_cb; 12149 perf_buffer_lost_fn lost_cb; 12150 void *ctx; 12151 int cpu_cnt; 12152 int *cpus; 12153 int *map_keys; 12154 }; 12155 12156 struct perf_cpu_buf { 12157 struct perf_buffer *pb; 12158 void *base; /* mmap()'ed memory */ 12159 void *buf; /* for reconstructing segmented data */ 12160 size_t buf_size; 12161 int fd; 12162 int cpu; 12163 int map_key; 12164 }; 12165 12166 struct perf_buffer { 12167 perf_buffer_event_fn event_cb; 12168 perf_buffer_sample_fn sample_cb; 12169 perf_buffer_lost_fn lost_cb; 12170 void *ctx; /* passed into callbacks */ 12171 12172 size_t page_size; 12173 size_t mmap_size; 12174 struct perf_cpu_buf **cpu_bufs; 12175 struct epoll_event *events; 12176 int cpu_cnt; /* number of allocated CPU buffers */ 12177 int epoll_fd; /* perf event FD */ 12178 int map_fd; /* BPF_MAP_TYPE_PERF_EVENT_ARRAY BPF map FD */ 12179 }; 12180 12181 static void perf_buffer__free_cpu_buf(struct perf_buffer *pb, 12182 struct perf_cpu_buf *cpu_buf) 12183 { 12184 if (!cpu_buf) 12185 return; 12186 if (cpu_buf->base && 12187 munmap(cpu_buf->base, pb->mmap_size + pb->page_size)) 12188 pr_warn("failed to munmap cpu_buf #%d\n", cpu_buf->cpu); 12189 if (cpu_buf->fd >= 0) { 12190 ioctl(cpu_buf->fd, PERF_EVENT_IOC_DISABLE, 0); 12191 close(cpu_buf->fd); 12192 } 12193 free(cpu_buf->buf); 12194 free(cpu_buf); 12195 } 12196 12197 void perf_buffer__free(struct perf_buffer *pb) 12198 { 12199 int i; 12200 12201 if (IS_ERR_OR_NULL(pb)) 12202 return; 12203 if (pb->cpu_bufs) { 12204 for (i = 0; i < pb->cpu_cnt; i++) { 12205 struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i]; 12206 12207 if (!cpu_buf) 12208 continue; 12209 12210 bpf_map_delete_elem(pb->map_fd, &cpu_buf->map_key); 12211 perf_buffer__free_cpu_buf(pb, cpu_buf); 12212 } 12213 free(pb->cpu_bufs); 12214 } 12215 if (pb->epoll_fd >= 0) 12216 close(pb->epoll_fd); 12217 free(pb->events); 12218 free(pb); 12219 } 12220 12221 static struct perf_cpu_buf * 12222 perf_buffer__open_cpu_buf(struct perf_buffer *pb, struct perf_event_attr *attr, 12223 int cpu, int map_key) 12224 { 12225 struct perf_cpu_buf *cpu_buf; 12226 char msg[STRERR_BUFSIZE]; 12227 int err; 12228 12229 cpu_buf = calloc(1, sizeof(*cpu_buf)); 12230 if (!cpu_buf) 12231 return ERR_PTR(-ENOMEM); 12232 12233 cpu_buf->pb = pb; 12234 cpu_buf->cpu = cpu; 12235 cpu_buf->map_key = map_key; 12236 12237 cpu_buf->fd = syscall(__NR_perf_event_open, attr, -1 /* pid */, cpu, 12238 -1, PERF_FLAG_FD_CLOEXEC); 12239 if (cpu_buf->fd < 0) { 12240 err = -errno; 12241 pr_warn("failed to open perf buffer event on cpu #%d: %s\n", 12242 cpu, libbpf_strerror_r(err, msg, sizeof(msg))); 12243 goto error; 12244 } 12245 12246 cpu_buf->base = mmap(NULL, pb->mmap_size + pb->page_size, 12247 PROT_READ | PROT_WRITE, MAP_SHARED, 12248 cpu_buf->fd, 0); 12249 if (cpu_buf->base == MAP_FAILED) { 12250 cpu_buf->base = NULL; 12251 err = -errno; 12252 pr_warn("failed to mmap perf buffer on cpu #%d: %s\n", 12253 cpu, libbpf_strerror_r(err, msg, sizeof(msg))); 12254 goto error; 12255 } 12256 12257 if (ioctl(cpu_buf->fd, PERF_EVENT_IOC_ENABLE, 0) < 0) { 12258 err = -errno; 12259 pr_warn("failed to enable perf buffer event on cpu #%d: %s\n", 12260 cpu, libbpf_strerror_r(err, msg, sizeof(msg))); 12261 goto error; 12262 } 12263 12264 return cpu_buf; 12265 12266 error: 12267 perf_buffer__free_cpu_buf(pb, cpu_buf); 12268 return (struct perf_cpu_buf *)ERR_PTR(err); 12269 } 12270 12271 static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, 12272 struct perf_buffer_params *p); 12273 12274 struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt, 12275 perf_buffer_sample_fn sample_cb, 12276 perf_buffer_lost_fn lost_cb, 12277 void *ctx, 12278 const struct perf_buffer_opts *opts) 12279 { 12280 const size_t attr_sz = sizeof(struct perf_event_attr); 12281 struct perf_buffer_params p = {}; 12282 struct perf_event_attr attr; 12283 __u32 sample_period; 12284 12285 if (!OPTS_VALID(opts, perf_buffer_opts)) 12286 return libbpf_err_ptr(-EINVAL); 12287 12288 sample_period = OPTS_GET(opts, sample_period, 1); 12289 if (!sample_period) 12290 sample_period = 1; 12291 12292 memset(&attr, 0, attr_sz); 12293 attr.size = attr_sz; 12294 attr.config = PERF_COUNT_SW_BPF_OUTPUT; 12295 attr.type = PERF_TYPE_SOFTWARE; 12296 attr.sample_type = PERF_SAMPLE_RAW; 12297 attr.sample_period = sample_period; 12298 attr.wakeup_events = sample_period; 12299 12300 p.attr = &attr; 12301 p.sample_cb = sample_cb; 12302 p.lost_cb = lost_cb; 12303 p.ctx = ctx; 12304 12305 return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p)); 12306 } 12307 12308 struct perf_buffer *perf_buffer__new_raw(int map_fd, size_t page_cnt, 12309 struct perf_event_attr *attr, 12310 perf_buffer_event_fn event_cb, void *ctx, 12311 const struct perf_buffer_raw_opts *opts) 12312 { 12313 struct perf_buffer_params p = {}; 12314 12315 if (!attr) 12316 return libbpf_err_ptr(-EINVAL); 12317 12318 if (!OPTS_VALID(opts, perf_buffer_raw_opts)) 12319 return libbpf_err_ptr(-EINVAL); 12320 12321 p.attr = attr; 12322 p.event_cb = event_cb; 12323 p.ctx = ctx; 12324 p.cpu_cnt = OPTS_GET(opts, cpu_cnt, 0); 12325 p.cpus = OPTS_GET(opts, cpus, NULL); 12326 p.map_keys = OPTS_GET(opts, map_keys, NULL); 12327 12328 return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p)); 12329 } 12330 12331 static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, 12332 struct perf_buffer_params *p) 12333 { 12334 const char *online_cpus_file = "/sys/devices/system/cpu/online"; 12335 struct bpf_map_info map; 12336 char msg[STRERR_BUFSIZE]; 12337 struct perf_buffer *pb; 12338 bool *online = NULL; 12339 __u32 map_info_len; 12340 int err, i, j, n; 12341 12342 if (page_cnt == 0 || (page_cnt & (page_cnt - 1))) { 12343 pr_warn("page count should be power of two, but is %zu\n", 12344 page_cnt); 12345 return ERR_PTR(-EINVAL); 12346 } 12347 12348 /* best-effort sanity checks */ 12349 memset(&map, 0, sizeof(map)); 12350 map_info_len = sizeof(map); 12351 err = bpf_map_get_info_by_fd(map_fd, &map, &map_info_len); 12352 if (err) { 12353 err = -errno; 12354 /* if BPF_OBJ_GET_INFO_BY_FD is supported, will return 12355 * -EBADFD, -EFAULT, or -E2BIG on real error 12356 */ 12357 if (err != -EINVAL) { 12358 pr_warn("failed to get map info for map FD %d: %s\n", 12359 map_fd, libbpf_strerror_r(err, msg, sizeof(msg))); 12360 return ERR_PTR(err); 12361 } 12362 pr_debug("failed to get map info for FD %d; API not supported? Ignoring...\n", 12363 map_fd); 12364 } else { 12365 if (map.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) { 12366 pr_warn("map '%s' should be BPF_MAP_TYPE_PERF_EVENT_ARRAY\n", 12367 map.name); 12368 return ERR_PTR(-EINVAL); 12369 } 12370 } 12371 12372 pb = calloc(1, sizeof(*pb)); 12373 if (!pb) 12374 return ERR_PTR(-ENOMEM); 12375 12376 pb->event_cb = p->event_cb; 12377 pb->sample_cb = p->sample_cb; 12378 pb->lost_cb = p->lost_cb; 12379 pb->ctx = p->ctx; 12380 12381 pb->page_size = getpagesize(); 12382 pb->mmap_size = pb->page_size * page_cnt; 12383 pb->map_fd = map_fd; 12384 12385 pb->epoll_fd = epoll_create1(EPOLL_CLOEXEC); 12386 if (pb->epoll_fd < 0) { 12387 err = -errno; 12388 pr_warn("failed to create epoll instance: %s\n", 12389 libbpf_strerror_r(err, msg, sizeof(msg))); 12390 goto error; 12391 } 12392 12393 if (p->cpu_cnt > 0) { 12394 pb->cpu_cnt = p->cpu_cnt; 12395 } else { 12396 pb->cpu_cnt = libbpf_num_possible_cpus(); 12397 if (pb->cpu_cnt < 0) { 12398 err = pb->cpu_cnt; 12399 goto error; 12400 } 12401 if (map.max_entries && map.max_entries < pb->cpu_cnt) 12402 pb->cpu_cnt = map.max_entries; 12403 } 12404 12405 pb->events = calloc(pb->cpu_cnt, sizeof(*pb->events)); 12406 if (!pb->events) { 12407 err = -ENOMEM; 12408 pr_warn("failed to allocate events: out of memory\n"); 12409 goto error; 12410 } 12411 pb->cpu_bufs = calloc(pb->cpu_cnt, sizeof(*pb->cpu_bufs)); 12412 if (!pb->cpu_bufs) { 12413 err = -ENOMEM; 12414 pr_warn("failed to allocate buffers: out of memory\n"); 12415 goto error; 12416 } 12417 12418 err = parse_cpu_mask_file(online_cpus_file, &online, &n); 12419 if (err) { 12420 pr_warn("failed to get online CPU mask: %d\n", err); 12421 goto error; 12422 } 12423 12424 for (i = 0, j = 0; i < pb->cpu_cnt; i++) { 12425 struct perf_cpu_buf *cpu_buf; 12426 int cpu, map_key; 12427 12428 cpu = p->cpu_cnt > 0 ? p->cpus[i] : i; 12429 map_key = p->cpu_cnt > 0 ? p->map_keys[i] : i; 12430 12431 /* in case user didn't explicitly requested particular CPUs to 12432 * be attached to, skip offline/not present CPUs 12433 */ 12434 if (p->cpu_cnt <= 0 && (cpu >= n || !online[cpu])) 12435 continue; 12436 12437 cpu_buf = perf_buffer__open_cpu_buf(pb, p->attr, cpu, map_key); 12438 if (IS_ERR(cpu_buf)) { 12439 err = PTR_ERR(cpu_buf); 12440 goto error; 12441 } 12442 12443 pb->cpu_bufs[j] = cpu_buf; 12444 12445 err = bpf_map_update_elem(pb->map_fd, &map_key, 12446 &cpu_buf->fd, 0); 12447 if (err) { 12448 err = -errno; 12449 pr_warn("failed to set cpu #%d, key %d -> perf FD %d: %s\n", 12450 cpu, map_key, cpu_buf->fd, 12451 libbpf_strerror_r(err, msg, sizeof(msg))); 12452 goto error; 12453 } 12454 12455 pb->events[j].events = EPOLLIN; 12456 pb->events[j].data.ptr = cpu_buf; 12457 if (epoll_ctl(pb->epoll_fd, EPOLL_CTL_ADD, cpu_buf->fd, 12458 &pb->events[j]) < 0) { 12459 err = -errno; 12460 pr_warn("failed to epoll_ctl cpu #%d perf FD %d: %s\n", 12461 cpu, cpu_buf->fd, 12462 libbpf_strerror_r(err, msg, sizeof(msg))); 12463 goto error; 12464 } 12465 j++; 12466 } 12467 pb->cpu_cnt = j; 12468 free(online); 12469 12470 return pb; 12471 12472 error: 12473 free(online); 12474 if (pb) 12475 perf_buffer__free(pb); 12476 return ERR_PTR(err); 12477 } 12478 12479 struct perf_sample_raw { 12480 struct perf_event_header header; 12481 uint32_t size; 12482 char data[]; 12483 }; 12484 12485 struct perf_sample_lost { 12486 struct perf_event_header header; 12487 uint64_t id; 12488 uint64_t lost; 12489 uint64_t sample_id; 12490 }; 12491 12492 static enum bpf_perf_event_ret 12493 perf_buffer__process_record(struct perf_event_header *e, void *ctx) 12494 { 12495 struct perf_cpu_buf *cpu_buf = ctx; 12496 struct perf_buffer *pb = cpu_buf->pb; 12497 void *data = e; 12498 12499 /* user wants full control over parsing perf event */ 12500 if (pb->event_cb) 12501 return pb->event_cb(pb->ctx, cpu_buf->cpu, e); 12502 12503 switch (e->type) { 12504 case PERF_RECORD_SAMPLE: { 12505 struct perf_sample_raw *s = data; 12506 12507 if (pb->sample_cb) 12508 pb->sample_cb(pb->ctx, cpu_buf->cpu, s->data, s->size); 12509 break; 12510 } 12511 case PERF_RECORD_LOST: { 12512 struct perf_sample_lost *s = data; 12513 12514 if (pb->lost_cb) 12515 pb->lost_cb(pb->ctx, cpu_buf->cpu, s->lost); 12516 break; 12517 } 12518 default: 12519 pr_warn("unknown perf sample type %d\n", e->type); 12520 return LIBBPF_PERF_EVENT_ERROR; 12521 } 12522 return LIBBPF_PERF_EVENT_CONT; 12523 } 12524 12525 static int perf_buffer__process_records(struct perf_buffer *pb, 12526 struct perf_cpu_buf *cpu_buf) 12527 { 12528 enum bpf_perf_event_ret ret; 12529 12530 ret = perf_event_read_simple(cpu_buf->base, pb->mmap_size, 12531 pb->page_size, &cpu_buf->buf, 12532 &cpu_buf->buf_size, 12533 perf_buffer__process_record, cpu_buf); 12534 if (ret != LIBBPF_PERF_EVENT_CONT) 12535 return ret; 12536 return 0; 12537 } 12538 12539 int perf_buffer__epoll_fd(const struct perf_buffer *pb) 12540 { 12541 return pb->epoll_fd; 12542 } 12543 12544 int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms) 12545 { 12546 int i, cnt, err; 12547 12548 cnt = epoll_wait(pb->epoll_fd, pb->events, pb->cpu_cnt, timeout_ms); 12549 if (cnt < 0) 12550 return -errno; 12551 12552 for (i = 0; i < cnt; i++) { 12553 struct perf_cpu_buf *cpu_buf = pb->events[i].data.ptr; 12554 12555 err = perf_buffer__process_records(pb, cpu_buf); 12556 if (err) { 12557 pr_warn("error while processing records: %d\n", err); 12558 return libbpf_err(err); 12559 } 12560 } 12561 return cnt; 12562 } 12563 12564 /* Return number of PERF_EVENT_ARRAY map slots set up by this perf_buffer 12565 * manager. 12566 */ 12567 size_t perf_buffer__buffer_cnt(const struct perf_buffer *pb) 12568 { 12569 return pb->cpu_cnt; 12570 } 12571 12572 /* 12573 * Return perf_event FD of a ring buffer in *buf_idx* slot of 12574 * PERF_EVENT_ARRAY BPF map. This FD can be polled for new data using 12575 * select()/poll()/epoll() Linux syscalls. 12576 */ 12577 int perf_buffer__buffer_fd(const struct perf_buffer *pb, size_t buf_idx) 12578 { 12579 struct perf_cpu_buf *cpu_buf; 12580 12581 if (buf_idx >= pb->cpu_cnt) 12582 return libbpf_err(-EINVAL); 12583 12584 cpu_buf = pb->cpu_bufs[buf_idx]; 12585 if (!cpu_buf) 12586 return libbpf_err(-ENOENT); 12587 12588 return cpu_buf->fd; 12589 } 12590 12591 int perf_buffer__buffer(struct perf_buffer *pb, int buf_idx, void **buf, size_t *buf_size) 12592 { 12593 struct perf_cpu_buf *cpu_buf; 12594 12595 if (buf_idx >= pb->cpu_cnt) 12596 return libbpf_err(-EINVAL); 12597 12598 cpu_buf = pb->cpu_bufs[buf_idx]; 12599 if (!cpu_buf) 12600 return libbpf_err(-ENOENT); 12601 12602 *buf = cpu_buf->base; 12603 *buf_size = pb->mmap_size; 12604 return 0; 12605 } 12606 12607 /* 12608 * Consume data from perf ring buffer corresponding to slot *buf_idx* in 12609 * PERF_EVENT_ARRAY BPF map without waiting/polling. If there is no data to 12610 * consume, do nothing and return success. 12611 * Returns: 12612 * - 0 on success; 12613 * - <0 on failure. 12614 */ 12615 int perf_buffer__consume_buffer(struct perf_buffer *pb, size_t buf_idx) 12616 { 12617 struct perf_cpu_buf *cpu_buf; 12618 12619 if (buf_idx >= pb->cpu_cnt) 12620 return libbpf_err(-EINVAL); 12621 12622 cpu_buf = pb->cpu_bufs[buf_idx]; 12623 if (!cpu_buf) 12624 return libbpf_err(-ENOENT); 12625 12626 return perf_buffer__process_records(pb, cpu_buf); 12627 } 12628 12629 int perf_buffer__consume(struct perf_buffer *pb) 12630 { 12631 int i, err; 12632 12633 for (i = 0; i < pb->cpu_cnt; i++) { 12634 struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i]; 12635 12636 if (!cpu_buf) 12637 continue; 12638 12639 err = perf_buffer__process_records(pb, cpu_buf); 12640 if (err) { 12641 pr_warn("perf_buffer: failed to process records in buffer #%d: %d\n", i, err); 12642 return libbpf_err(err); 12643 } 12644 } 12645 return 0; 12646 } 12647 12648 int bpf_program__set_attach_target(struct bpf_program *prog, 12649 int attach_prog_fd, 12650 const char *attach_func_name) 12651 { 12652 int btf_obj_fd = 0, btf_id = 0, err; 12653 12654 if (!prog || attach_prog_fd < 0) 12655 return libbpf_err(-EINVAL); 12656 12657 if (prog->obj->loaded) 12658 return libbpf_err(-EINVAL); 12659 12660 if (attach_prog_fd && !attach_func_name) { 12661 /* remember attach_prog_fd and let bpf_program__load() find 12662 * BTF ID during the program load 12663 */ 12664 prog->attach_prog_fd = attach_prog_fd; 12665 return 0; 12666 } 12667 12668 if (attach_prog_fd) { 12669 btf_id = libbpf_find_prog_btf_id(attach_func_name, 12670 attach_prog_fd); 12671 if (btf_id < 0) 12672 return libbpf_err(btf_id); 12673 } else { 12674 if (!attach_func_name) 12675 return libbpf_err(-EINVAL); 12676 12677 /* load btf_vmlinux, if not yet */ 12678 err = bpf_object__load_vmlinux_btf(prog->obj, true); 12679 if (err) 12680 return libbpf_err(err); 12681 err = find_kernel_btf_id(prog->obj, attach_func_name, 12682 prog->expected_attach_type, 12683 &btf_obj_fd, &btf_id); 12684 if (err) 12685 return libbpf_err(err); 12686 } 12687 12688 prog->attach_btf_id = btf_id; 12689 prog->attach_btf_obj_fd = btf_obj_fd; 12690 prog->attach_prog_fd = attach_prog_fd; 12691 return 0; 12692 } 12693 12694 int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz) 12695 { 12696 int err = 0, n, len, start, end = -1; 12697 bool *tmp; 12698 12699 *mask = NULL; 12700 *mask_sz = 0; 12701 12702 /* Each sub string separated by ',' has format \d+-\d+ or \d+ */ 12703 while (*s) { 12704 if (*s == ',' || *s == '\n') { 12705 s++; 12706 continue; 12707 } 12708 n = sscanf(s, "%d%n-%d%n", &start, &len, &end, &len); 12709 if (n <= 0 || n > 2) { 12710 pr_warn("Failed to get CPU range %s: %d\n", s, n); 12711 err = -EINVAL; 12712 goto cleanup; 12713 } else if (n == 1) { 12714 end = start; 12715 } 12716 if (start < 0 || start > end) { 12717 pr_warn("Invalid CPU range [%d,%d] in %s\n", 12718 start, end, s); 12719 err = -EINVAL; 12720 goto cleanup; 12721 } 12722 tmp = realloc(*mask, end + 1); 12723 if (!tmp) { 12724 err = -ENOMEM; 12725 goto cleanup; 12726 } 12727 *mask = tmp; 12728 memset(tmp + *mask_sz, 0, start - *mask_sz); 12729 memset(tmp + start, 1, end - start + 1); 12730 *mask_sz = end + 1; 12731 s += len; 12732 } 12733 if (!*mask_sz) { 12734 pr_warn("Empty CPU range\n"); 12735 return -EINVAL; 12736 } 12737 return 0; 12738 cleanup: 12739 free(*mask); 12740 *mask = NULL; 12741 return err; 12742 } 12743 12744 int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz) 12745 { 12746 int fd, err = 0, len; 12747 char buf[128]; 12748 12749 fd = open(fcpu, O_RDONLY | O_CLOEXEC); 12750 if (fd < 0) { 12751 err = -errno; 12752 pr_warn("Failed to open cpu mask file %s: %d\n", fcpu, err); 12753 return err; 12754 } 12755 len = read(fd, buf, sizeof(buf)); 12756 close(fd); 12757 if (len <= 0) { 12758 err = len ? -errno : -EINVAL; 12759 pr_warn("Failed to read cpu mask from %s: %d\n", fcpu, err); 12760 return err; 12761 } 12762 if (len >= sizeof(buf)) { 12763 pr_warn("CPU mask is too big in file %s\n", fcpu); 12764 return -E2BIG; 12765 } 12766 buf[len] = '\0'; 12767 12768 return parse_cpu_mask_str(buf, mask, mask_sz); 12769 } 12770 12771 int libbpf_num_possible_cpus(void) 12772 { 12773 static const char *fcpu = "/sys/devices/system/cpu/possible"; 12774 static int cpus; 12775 int err, n, i, tmp_cpus; 12776 bool *mask; 12777 12778 tmp_cpus = READ_ONCE(cpus); 12779 if (tmp_cpus > 0) 12780 return tmp_cpus; 12781 12782 err = parse_cpu_mask_file(fcpu, &mask, &n); 12783 if (err) 12784 return libbpf_err(err); 12785 12786 tmp_cpus = 0; 12787 for (i = 0; i < n; i++) { 12788 if (mask[i]) 12789 tmp_cpus++; 12790 } 12791 free(mask); 12792 12793 WRITE_ONCE(cpus, tmp_cpus); 12794 return tmp_cpus; 12795 } 12796 12797 static int populate_skeleton_maps(const struct bpf_object *obj, 12798 struct bpf_map_skeleton *maps, 12799 size_t map_cnt) 12800 { 12801 int i; 12802 12803 for (i = 0; i < map_cnt; i++) { 12804 struct bpf_map **map = maps[i].map; 12805 const char *name = maps[i].name; 12806 void **mmaped = maps[i].mmaped; 12807 12808 *map = bpf_object__find_map_by_name(obj, name); 12809 if (!*map) { 12810 pr_warn("failed to find skeleton map '%s'\n", name); 12811 return -ESRCH; 12812 } 12813 12814 /* externs shouldn't be pre-setup from user code */ 12815 if (mmaped && (*map)->libbpf_type != LIBBPF_MAP_KCONFIG) 12816 *mmaped = (*map)->mmaped; 12817 } 12818 return 0; 12819 } 12820 12821 static int populate_skeleton_progs(const struct bpf_object *obj, 12822 struct bpf_prog_skeleton *progs, 12823 size_t prog_cnt) 12824 { 12825 int i; 12826 12827 for (i = 0; i < prog_cnt; i++) { 12828 struct bpf_program **prog = progs[i].prog; 12829 const char *name = progs[i].name; 12830 12831 *prog = bpf_object__find_program_by_name(obj, name); 12832 if (!*prog) { 12833 pr_warn("failed to find skeleton program '%s'\n", name); 12834 return -ESRCH; 12835 } 12836 } 12837 return 0; 12838 } 12839 12840 int bpf_object__open_skeleton(struct bpf_object_skeleton *s, 12841 const struct bpf_object_open_opts *opts) 12842 { 12843 DECLARE_LIBBPF_OPTS(bpf_object_open_opts, skel_opts, 12844 .object_name = s->name, 12845 ); 12846 struct bpf_object *obj; 12847 int err; 12848 12849 /* Attempt to preserve opts->object_name, unless overriden by user 12850 * explicitly. Overwriting object name for skeletons is discouraged, 12851 * as it breaks global data maps, because they contain object name 12852 * prefix as their own map name prefix. When skeleton is generated, 12853 * bpftool is making an assumption that this name will stay the same. 12854 */ 12855 if (opts) { 12856 memcpy(&skel_opts, opts, sizeof(*opts)); 12857 if (!opts->object_name) 12858 skel_opts.object_name = s->name; 12859 } 12860 12861 obj = bpf_object__open_mem(s->data, s->data_sz, &skel_opts); 12862 err = libbpf_get_error(obj); 12863 if (err) { 12864 pr_warn("failed to initialize skeleton BPF object '%s': %d\n", 12865 s->name, err); 12866 return libbpf_err(err); 12867 } 12868 12869 *s->obj = obj; 12870 err = populate_skeleton_maps(obj, s->maps, s->map_cnt); 12871 if (err) { 12872 pr_warn("failed to populate skeleton maps for '%s': %d\n", s->name, err); 12873 return libbpf_err(err); 12874 } 12875 12876 err = populate_skeleton_progs(obj, s->progs, s->prog_cnt); 12877 if (err) { 12878 pr_warn("failed to populate skeleton progs for '%s': %d\n", s->name, err); 12879 return libbpf_err(err); 12880 } 12881 12882 return 0; 12883 } 12884 12885 int bpf_object__open_subskeleton(struct bpf_object_subskeleton *s) 12886 { 12887 int err, len, var_idx, i; 12888 const char *var_name; 12889 const struct bpf_map *map; 12890 struct btf *btf; 12891 __u32 map_type_id; 12892 const struct btf_type *map_type, *var_type; 12893 const struct bpf_var_skeleton *var_skel; 12894 struct btf_var_secinfo *var; 12895 12896 if (!s->obj) 12897 return libbpf_err(-EINVAL); 12898 12899 btf = bpf_object__btf(s->obj); 12900 if (!btf) { 12901 pr_warn("subskeletons require BTF at runtime (object %s)\n", 12902 bpf_object__name(s->obj)); 12903 return libbpf_err(-errno); 12904 } 12905 12906 err = populate_skeleton_maps(s->obj, s->maps, s->map_cnt); 12907 if (err) { 12908 pr_warn("failed to populate subskeleton maps: %d\n", err); 12909 return libbpf_err(err); 12910 } 12911 12912 err = populate_skeleton_progs(s->obj, s->progs, s->prog_cnt); 12913 if (err) { 12914 pr_warn("failed to populate subskeleton maps: %d\n", err); 12915 return libbpf_err(err); 12916 } 12917 12918 for (var_idx = 0; var_idx < s->var_cnt; var_idx++) { 12919 var_skel = &s->vars[var_idx]; 12920 map = *var_skel->map; 12921 map_type_id = bpf_map__btf_value_type_id(map); 12922 map_type = btf__type_by_id(btf, map_type_id); 12923 12924 if (!btf_is_datasec(map_type)) { 12925 pr_warn("type for map '%1$s' is not a datasec: %2$s", 12926 bpf_map__name(map), 12927 __btf_kind_str(btf_kind(map_type))); 12928 return libbpf_err(-EINVAL); 12929 } 12930 12931 len = btf_vlen(map_type); 12932 var = btf_var_secinfos(map_type); 12933 for (i = 0; i < len; i++, var++) { 12934 var_type = btf__type_by_id(btf, var->type); 12935 var_name = btf__name_by_offset(btf, var_type->name_off); 12936 if (strcmp(var_name, var_skel->name) == 0) { 12937 *var_skel->addr = map->mmaped + var->offset; 12938 break; 12939 } 12940 } 12941 } 12942 return 0; 12943 } 12944 12945 void bpf_object__destroy_subskeleton(struct bpf_object_subskeleton *s) 12946 { 12947 if (!s) 12948 return; 12949 free(s->maps); 12950 free(s->progs); 12951 free(s->vars); 12952 free(s); 12953 } 12954 12955 int bpf_object__load_skeleton(struct bpf_object_skeleton *s) 12956 { 12957 int i, err; 12958 12959 err = bpf_object__load(*s->obj); 12960 if (err) { 12961 pr_warn("failed to load BPF skeleton '%s': %d\n", s->name, err); 12962 return libbpf_err(err); 12963 } 12964 12965 for (i = 0; i < s->map_cnt; i++) { 12966 struct bpf_map *map = *s->maps[i].map; 12967 size_t mmap_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries); 12968 int prot, map_fd = bpf_map__fd(map); 12969 void **mmaped = s->maps[i].mmaped; 12970 12971 if (!mmaped) 12972 continue; 12973 12974 if (!(map->def.map_flags & BPF_F_MMAPABLE)) { 12975 *mmaped = NULL; 12976 continue; 12977 } 12978 12979 if (map->def.map_flags & BPF_F_RDONLY_PROG) 12980 prot = PROT_READ; 12981 else 12982 prot = PROT_READ | PROT_WRITE; 12983 12984 /* Remap anonymous mmap()-ed "map initialization image" as 12985 * a BPF map-backed mmap()-ed memory, but preserving the same 12986 * memory address. This will cause kernel to change process' 12987 * page table to point to a different piece of kernel memory, 12988 * but from userspace point of view memory address (and its 12989 * contents, being identical at this point) will stay the 12990 * same. This mapping will be released by bpf_object__close() 12991 * as per normal clean up procedure, so we don't need to worry 12992 * about it from skeleton's clean up perspective. 12993 */ 12994 *mmaped = mmap(map->mmaped, mmap_sz, prot, MAP_SHARED | MAP_FIXED, map_fd, 0); 12995 if (*mmaped == MAP_FAILED) { 12996 err = -errno; 12997 *mmaped = NULL; 12998 pr_warn("failed to re-mmap() map '%s': %d\n", 12999 bpf_map__name(map), err); 13000 return libbpf_err(err); 13001 } 13002 } 13003 13004 return 0; 13005 } 13006 13007 int bpf_object__attach_skeleton(struct bpf_object_skeleton *s) 13008 { 13009 int i, err; 13010 13011 for (i = 0; i < s->prog_cnt; i++) { 13012 struct bpf_program *prog = *s->progs[i].prog; 13013 struct bpf_link **link = s->progs[i].link; 13014 13015 if (!prog->autoload || !prog->autoattach) 13016 continue; 13017 13018 /* auto-attaching not supported for this program */ 13019 if (!prog->sec_def || !prog->sec_def->prog_attach_fn) 13020 continue; 13021 13022 /* if user already set the link manually, don't attempt auto-attach */ 13023 if (*link) 13024 continue; 13025 13026 err = prog->sec_def->prog_attach_fn(prog, prog->sec_def->cookie, link); 13027 if (err) { 13028 pr_warn("prog '%s': failed to auto-attach: %d\n", 13029 bpf_program__name(prog), err); 13030 return libbpf_err(err); 13031 } 13032 13033 /* It's possible that for some SEC() definitions auto-attach 13034 * is supported in some cases (e.g., if definition completely 13035 * specifies target information), but is not in other cases. 13036 * SEC("uprobe") is one such case. If user specified target 13037 * binary and function name, such BPF program can be 13038 * auto-attached. But if not, it shouldn't trigger skeleton's 13039 * attach to fail. It should just be skipped. 13040 * attach_fn signals such case with returning 0 (no error) and 13041 * setting link to NULL. 13042 */ 13043 } 13044 13045 return 0; 13046 } 13047 13048 void bpf_object__detach_skeleton(struct bpf_object_skeleton *s) 13049 { 13050 int i; 13051 13052 for (i = 0; i < s->prog_cnt; i++) { 13053 struct bpf_link **link = s->progs[i].link; 13054 13055 bpf_link__destroy(*link); 13056 *link = NULL; 13057 } 13058 } 13059 13060 void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s) 13061 { 13062 if (!s) 13063 return; 13064 13065 if (s->progs) 13066 bpf_object__detach_skeleton(s); 13067 if (s->obj) 13068 bpf_object__close(*s->obj); 13069 free(s->maps); 13070 free(s->progs); 13071 free(s); 13072 } 13073