1 // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) 2 3 /* 4 * Common eBPF ELF object loading operations. 5 * 6 * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org> 7 * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com> 8 * Copyright (C) 2015 Huawei Inc. 9 * Copyright (C) 2017 Nicira, Inc. 10 * Copyright (C) 2019 Isovalent, Inc. 11 */ 12 13 #ifndef _GNU_SOURCE 14 #define _GNU_SOURCE 15 #endif 16 #include <stdlib.h> 17 #include <stdio.h> 18 #include <stdarg.h> 19 #include <libgen.h> 20 #include <inttypes.h> 21 #include <limits.h> 22 #include <string.h> 23 #include <unistd.h> 24 #include <endian.h> 25 #include <fcntl.h> 26 #include <errno.h> 27 #include <ctype.h> 28 #include <asm/unistd.h> 29 #include <linux/err.h> 30 #include <linux/kernel.h> 31 #include <linux/bpf.h> 32 #include <linux/btf.h> 33 #include <linux/filter.h> 34 #include <linux/limits.h> 35 #include <linux/perf_event.h> 36 #include <linux/bpf_perf_event.h> 37 #include <linux/ring_buffer.h> 38 #include <sys/epoll.h> 39 #include <sys/ioctl.h> 40 #include <sys/mman.h> 41 #include <sys/stat.h> 42 #include <sys/types.h> 43 #include <sys/vfs.h> 44 #include <sys/utsname.h> 45 #include <sys/resource.h> 46 #include <libelf.h> 47 #include <gelf.h> 48 #include <zlib.h> 49 50 #include "libbpf.h" 51 #include "bpf.h" 52 #include "btf.h" 53 #include "str_error.h" 54 #include "libbpf_internal.h" 55 #include "hashmap.h" 56 #include "bpf_gen_internal.h" 57 #include "zip.h" 58 59 #ifndef BPF_FS_MAGIC 60 #define BPF_FS_MAGIC 0xcafe4a11 61 #endif 62 63 #define BPF_FS_DEFAULT_PATH "/sys/fs/bpf" 64 65 #define BPF_INSN_SZ (sizeof(struct bpf_insn)) 66 67 /* vsprintf() in __base_pr() uses nonliteral format string. It may break 68 * compilation if user enables corresponding warning. Disable it explicitly. 69 */ 70 #pragma GCC diagnostic ignored "-Wformat-nonliteral" 71 72 #define __printf(a, b) __attribute__((format(printf, a, b))) 73 74 static struct bpf_map *bpf_object__add_map(struct bpf_object *obj); 75 static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog); 76 static int map_set_def_max_entries(struct bpf_map *map); 77 78 static const char * const attach_type_name[] = { 79 [BPF_CGROUP_INET_INGRESS] = "cgroup_inet_ingress", 80 [BPF_CGROUP_INET_EGRESS] = "cgroup_inet_egress", 81 [BPF_CGROUP_INET_SOCK_CREATE] = "cgroup_inet_sock_create", 82 [BPF_CGROUP_INET_SOCK_RELEASE] = "cgroup_inet_sock_release", 83 [BPF_CGROUP_SOCK_OPS] = "cgroup_sock_ops", 84 [BPF_CGROUP_DEVICE] = "cgroup_device", 85 [BPF_CGROUP_INET4_BIND] = "cgroup_inet4_bind", 86 [BPF_CGROUP_INET6_BIND] = "cgroup_inet6_bind", 87 [BPF_CGROUP_INET4_CONNECT] = "cgroup_inet4_connect", 88 [BPF_CGROUP_INET6_CONNECT] = "cgroup_inet6_connect", 89 [BPF_CGROUP_UNIX_CONNECT] = "cgroup_unix_connect", 90 [BPF_CGROUP_INET4_POST_BIND] = "cgroup_inet4_post_bind", 91 [BPF_CGROUP_INET6_POST_BIND] = "cgroup_inet6_post_bind", 92 [BPF_CGROUP_INET4_GETPEERNAME] = "cgroup_inet4_getpeername", 93 [BPF_CGROUP_INET6_GETPEERNAME] = "cgroup_inet6_getpeername", 94 [BPF_CGROUP_UNIX_GETPEERNAME] = "cgroup_unix_getpeername", 95 [BPF_CGROUP_INET4_GETSOCKNAME] = "cgroup_inet4_getsockname", 96 [BPF_CGROUP_INET6_GETSOCKNAME] = "cgroup_inet6_getsockname", 97 [BPF_CGROUP_UNIX_GETSOCKNAME] = "cgroup_unix_getsockname", 98 [BPF_CGROUP_UDP4_SENDMSG] = "cgroup_udp4_sendmsg", 99 [BPF_CGROUP_UDP6_SENDMSG] = "cgroup_udp6_sendmsg", 100 [BPF_CGROUP_UNIX_SENDMSG] = "cgroup_unix_sendmsg", 101 [BPF_CGROUP_SYSCTL] = "cgroup_sysctl", 102 [BPF_CGROUP_UDP4_RECVMSG] = "cgroup_udp4_recvmsg", 103 [BPF_CGROUP_UDP6_RECVMSG] = "cgroup_udp6_recvmsg", 104 [BPF_CGROUP_UNIX_RECVMSG] = "cgroup_unix_recvmsg", 105 [BPF_CGROUP_GETSOCKOPT] = "cgroup_getsockopt", 106 [BPF_CGROUP_SETSOCKOPT] = "cgroup_setsockopt", 107 [BPF_SK_SKB_STREAM_PARSER] = "sk_skb_stream_parser", 108 [BPF_SK_SKB_STREAM_VERDICT] = "sk_skb_stream_verdict", 109 [BPF_SK_SKB_VERDICT] = "sk_skb_verdict", 110 [BPF_SK_MSG_VERDICT] = "sk_msg_verdict", 111 [BPF_LIRC_MODE2] = "lirc_mode2", 112 [BPF_FLOW_DISSECTOR] = "flow_dissector", 113 [BPF_TRACE_RAW_TP] = "trace_raw_tp", 114 [BPF_TRACE_FENTRY] = "trace_fentry", 115 [BPF_TRACE_FEXIT] = "trace_fexit", 116 [BPF_MODIFY_RETURN] = "modify_return", 117 [BPF_LSM_MAC] = "lsm_mac", 118 [BPF_LSM_CGROUP] = "lsm_cgroup", 119 [BPF_SK_LOOKUP] = "sk_lookup", 120 [BPF_TRACE_ITER] = "trace_iter", 121 [BPF_XDP_DEVMAP] = "xdp_devmap", 122 [BPF_XDP_CPUMAP] = "xdp_cpumap", 123 [BPF_XDP] = "xdp", 124 [BPF_SK_REUSEPORT_SELECT] = "sk_reuseport_select", 125 [BPF_SK_REUSEPORT_SELECT_OR_MIGRATE] = "sk_reuseport_select_or_migrate", 126 [BPF_PERF_EVENT] = "perf_event", 127 [BPF_TRACE_KPROBE_MULTI] = "trace_kprobe_multi", 128 [BPF_STRUCT_OPS] = "struct_ops", 129 [BPF_NETFILTER] = "netfilter", 130 [BPF_TCX_INGRESS] = "tcx_ingress", 131 [BPF_TCX_EGRESS] = "tcx_egress", 132 [BPF_TRACE_UPROBE_MULTI] = "trace_uprobe_multi", 133 [BPF_NETKIT_PRIMARY] = "netkit_primary", 134 [BPF_NETKIT_PEER] = "netkit_peer", 135 }; 136 137 static const char * const link_type_name[] = { 138 [BPF_LINK_TYPE_UNSPEC] = "unspec", 139 [BPF_LINK_TYPE_RAW_TRACEPOINT] = "raw_tracepoint", 140 [BPF_LINK_TYPE_TRACING] = "tracing", 141 [BPF_LINK_TYPE_CGROUP] = "cgroup", 142 [BPF_LINK_TYPE_ITER] = "iter", 143 [BPF_LINK_TYPE_NETNS] = "netns", 144 [BPF_LINK_TYPE_XDP] = "xdp", 145 [BPF_LINK_TYPE_PERF_EVENT] = "perf_event", 146 [BPF_LINK_TYPE_KPROBE_MULTI] = "kprobe_multi", 147 [BPF_LINK_TYPE_STRUCT_OPS] = "struct_ops", 148 [BPF_LINK_TYPE_NETFILTER] = "netfilter", 149 [BPF_LINK_TYPE_TCX] = "tcx", 150 [BPF_LINK_TYPE_UPROBE_MULTI] = "uprobe_multi", 151 [BPF_LINK_TYPE_NETKIT] = "netkit", 152 }; 153 154 static const char * const map_type_name[] = { 155 [BPF_MAP_TYPE_UNSPEC] = "unspec", 156 [BPF_MAP_TYPE_HASH] = "hash", 157 [BPF_MAP_TYPE_ARRAY] = "array", 158 [BPF_MAP_TYPE_PROG_ARRAY] = "prog_array", 159 [BPF_MAP_TYPE_PERF_EVENT_ARRAY] = "perf_event_array", 160 [BPF_MAP_TYPE_PERCPU_HASH] = "percpu_hash", 161 [BPF_MAP_TYPE_PERCPU_ARRAY] = "percpu_array", 162 [BPF_MAP_TYPE_STACK_TRACE] = "stack_trace", 163 [BPF_MAP_TYPE_CGROUP_ARRAY] = "cgroup_array", 164 [BPF_MAP_TYPE_LRU_HASH] = "lru_hash", 165 [BPF_MAP_TYPE_LRU_PERCPU_HASH] = "lru_percpu_hash", 166 [BPF_MAP_TYPE_LPM_TRIE] = "lpm_trie", 167 [BPF_MAP_TYPE_ARRAY_OF_MAPS] = "array_of_maps", 168 [BPF_MAP_TYPE_HASH_OF_MAPS] = "hash_of_maps", 169 [BPF_MAP_TYPE_DEVMAP] = "devmap", 170 [BPF_MAP_TYPE_DEVMAP_HASH] = "devmap_hash", 171 [BPF_MAP_TYPE_SOCKMAP] = "sockmap", 172 [BPF_MAP_TYPE_CPUMAP] = "cpumap", 173 [BPF_MAP_TYPE_XSKMAP] = "xskmap", 174 [BPF_MAP_TYPE_SOCKHASH] = "sockhash", 175 [BPF_MAP_TYPE_CGROUP_STORAGE] = "cgroup_storage", 176 [BPF_MAP_TYPE_REUSEPORT_SOCKARRAY] = "reuseport_sockarray", 177 [BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE] = "percpu_cgroup_storage", 178 [BPF_MAP_TYPE_QUEUE] = "queue", 179 [BPF_MAP_TYPE_STACK] = "stack", 180 [BPF_MAP_TYPE_SK_STORAGE] = "sk_storage", 181 [BPF_MAP_TYPE_STRUCT_OPS] = "struct_ops", 182 [BPF_MAP_TYPE_RINGBUF] = "ringbuf", 183 [BPF_MAP_TYPE_INODE_STORAGE] = "inode_storage", 184 [BPF_MAP_TYPE_TASK_STORAGE] = "task_storage", 185 [BPF_MAP_TYPE_BLOOM_FILTER] = "bloom_filter", 186 [BPF_MAP_TYPE_USER_RINGBUF] = "user_ringbuf", 187 [BPF_MAP_TYPE_CGRP_STORAGE] = "cgrp_storage", 188 [BPF_MAP_TYPE_ARENA] = "arena", 189 }; 190 191 static const char * const prog_type_name[] = { 192 [BPF_PROG_TYPE_UNSPEC] = "unspec", 193 [BPF_PROG_TYPE_SOCKET_FILTER] = "socket_filter", 194 [BPF_PROG_TYPE_KPROBE] = "kprobe", 195 [BPF_PROG_TYPE_SCHED_CLS] = "sched_cls", 196 [BPF_PROG_TYPE_SCHED_ACT] = "sched_act", 197 [BPF_PROG_TYPE_TRACEPOINT] = "tracepoint", 198 [BPF_PROG_TYPE_XDP] = "xdp", 199 [BPF_PROG_TYPE_PERF_EVENT] = "perf_event", 200 [BPF_PROG_TYPE_CGROUP_SKB] = "cgroup_skb", 201 [BPF_PROG_TYPE_CGROUP_SOCK] = "cgroup_sock", 202 [BPF_PROG_TYPE_LWT_IN] = "lwt_in", 203 [BPF_PROG_TYPE_LWT_OUT] = "lwt_out", 204 [BPF_PROG_TYPE_LWT_XMIT] = "lwt_xmit", 205 [BPF_PROG_TYPE_SOCK_OPS] = "sock_ops", 206 [BPF_PROG_TYPE_SK_SKB] = "sk_skb", 207 [BPF_PROG_TYPE_CGROUP_DEVICE] = "cgroup_device", 208 [BPF_PROG_TYPE_SK_MSG] = "sk_msg", 209 [BPF_PROG_TYPE_RAW_TRACEPOINT] = "raw_tracepoint", 210 [BPF_PROG_TYPE_CGROUP_SOCK_ADDR] = "cgroup_sock_addr", 211 [BPF_PROG_TYPE_LWT_SEG6LOCAL] = "lwt_seg6local", 212 [BPF_PROG_TYPE_LIRC_MODE2] = "lirc_mode2", 213 [BPF_PROG_TYPE_SK_REUSEPORT] = "sk_reuseport", 214 [BPF_PROG_TYPE_FLOW_DISSECTOR] = "flow_dissector", 215 [BPF_PROG_TYPE_CGROUP_SYSCTL] = "cgroup_sysctl", 216 [BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE] = "raw_tracepoint_writable", 217 [BPF_PROG_TYPE_CGROUP_SOCKOPT] = "cgroup_sockopt", 218 [BPF_PROG_TYPE_TRACING] = "tracing", 219 [BPF_PROG_TYPE_STRUCT_OPS] = "struct_ops", 220 [BPF_PROG_TYPE_EXT] = "ext", 221 [BPF_PROG_TYPE_LSM] = "lsm", 222 [BPF_PROG_TYPE_SK_LOOKUP] = "sk_lookup", 223 [BPF_PROG_TYPE_SYSCALL] = "syscall", 224 [BPF_PROG_TYPE_NETFILTER] = "netfilter", 225 }; 226 227 static int __base_pr(enum libbpf_print_level level, const char *format, 228 va_list args) 229 { 230 if (level == LIBBPF_DEBUG) 231 return 0; 232 233 return vfprintf(stderr, format, args); 234 } 235 236 static libbpf_print_fn_t __libbpf_pr = __base_pr; 237 238 libbpf_print_fn_t libbpf_set_print(libbpf_print_fn_t fn) 239 { 240 libbpf_print_fn_t old_print_fn; 241 242 old_print_fn = __atomic_exchange_n(&__libbpf_pr, fn, __ATOMIC_RELAXED); 243 244 return old_print_fn; 245 } 246 247 __printf(2, 3) 248 void libbpf_print(enum libbpf_print_level level, const char *format, ...) 249 { 250 va_list args; 251 int old_errno; 252 libbpf_print_fn_t print_fn; 253 254 print_fn = __atomic_load_n(&__libbpf_pr, __ATOMIC_RELAXED); 255 if (!print_fn) 256 return; 257 258 old_errno = errno; 259 260 va_start(args, format); 261 __libbpf_pr(level, format, args); 262 va_end(args); 263 264 errno = old_errno; 265 } 266 267 static void pr_perm_msg(int err) 268 { 269 struct rlimit limit; 270 char buf[100]; 271 272 if (err != -EPERM || geteuid() != 0) 273 return; 274 275 err = getrlimit(RLIMIT_MEMLOCK, &limit); 276 if (err) 277 return; 278 279 if (limit.rlim_cur == RLIM_INFINITY) 280 return; 281 282 if (limit.rlim_cur < 1024) 283 snprintf(buf, sizeof(buf), "%zu bytes", (size_t)limit.rlim_cur); 284 else if (limit.rlim_cur < 1024*1024) 285 snprintf(buf, sizeof(buf), "%.1f KiB", (double)limit.rlim_cur / 1024); 286 else 287 snprintf(buf, sizeof(buf), "%.1f MiB", (double)limit.rlim_cur / (1024*1024)); 288 289 pr_warn("permission error while running as root; try raising 'ulimit -l'? current value: %s\n", 290 buf); 291 } 292 293 #define STRERR_BUFSIZE 128 294 295 /* Copied from tools/perf/util/util.h */ 296 #ifndef zfree 297 # define zfree(ptr) ({ free(*ptr); *ptr = NULL; }) 298 #endif 299 300 #ifndef zclose 301 # define zclose(fd) ({ \ 302 int ___err = 0; \ 303 if ((fd) >= 0) \ 304 ___err = close((fd)); \ 305 fd = -1; \ 306 ___err; }) 307 #endif 308 309 static inline __u64 ptr_to_u64(const void *ptr) 310 { 311 return (__u64) (unsigned long) ptr; 312 } 313 314 int libbpf_set_strict_mode(enum libbpf_strict_mode mode) 315 { 316 /* as of v1.0 libbpf_set_strict_mode() is a no-op */ 317 return 0; 318 } 319 320 __u32 libbpf_major_version(void) 321 { 322 return LIBBPF_MAJOR_VERSION; 323 } 324 325 __u32 libbpf_minor_version(void) 326 { 327 return LIBBPF_MINOR_VERSION; 328 } 329 330 const char *libbpf_version_string(void) 331 { 332 #define __S(X) #X 333 #define _S(X) __S(X) 334 return "v" _S(LIBBPF_MAJOR_VERSION) "." _S(LIBBPF_MINOR_VERSION); 335 #undef _S 336 #undef __S 337 } 338 339 enum reloc_type { 340 RELO_LD64, 341 RELO_CALL, 342 RELO_DATA, 343 RELO_EXTERN_LD64, 344 RELO_EXTERN_CALL, 345 RELO_SUBPROG_ADDR, 346 RELO_CORE, 347 }; 348 349 struct reloc_desc { 350 enum reloc_type type; 351 int insn_idx; 352 union { 353 const struct bpf_core_relo *core_relo; /* used when type == RELO_CORE */ 354 struct { 355 int map_idx; 356 int sym_off; 357 int ext_idx; 358 }; 359 }; 360 }; 361 362 /* stored as sec_def->cookie for all libbpf-supported SEC()s */ 363 enum sec_def_flags { 364 SEC_NONE = 0, 365 /* expected_attach_type is optional, if kernel doesn't support that */ 366 SEC_EXP_ATTACH_OPT = 1, 367 /* legacy, only used by libbpf_get_type_names() and 368 * libbpf_attach_type_by_name(), not used by libbpf itself at all. 369 * This used to be associated with cgroup (and few other) BPF programs 370 * that were attachable through BPF_PROG_ATTACH command. Pretty 371 * meaningless nowadays, though. 372 */ 373 SEC_ATTACHABLE = 2, 374 SEC_ATTACHABLE_OPT = SEC_ATTACHABLE | SEC_EXP_ATTACH_OPT, 375 /* attachment target is specified through BTF ID in either kernel or 376 * other BPF program's BTF object 377 */ 378 SEC_ATTACH_BTF = 4, 379 /* BPF program type allows sleeping/blocking in kernel */ 380 SEC_SLEEPABLE = 8, 381 /* BPF program support non-linear XDP buffer */ 382 SEC_XDP_FRAGS = 16, 383 /* Setup proper attach type for usdt probes. */ 384 SEC_USDT = 32, 385 }; 386 387 struct bpf_sec_def { 388 char *sec; 389 enum bpf_prog_type prog_type; 390 enum bpf_attach_type expected_attach_type; 391 long cookie; 392 int handler_id; 393 394 libbpf_prog_setup_fn_t prog_setup_fn; 395 libbpf_prog_prepare_load_fn_t prog_prepare_load_fn; 396 libbpf_prog_attach_fn_t prog_attach_fn; 397 }; 398 399 /* 400 * bpf_prog should be a better name but it has been used in 401 * linux/filter.h. 402 */ 403 struct bpf_program { 404 char *name; 405 char *sec_name; 406 size_t sec_idx; 407 const struct bpf_sec_def *sec_def; 408 /* this program's instruction offset (in number of instructions) 409 * within its containing ELF section 410 */ 411 size_t sec_insn_off; 412 /* number of original instructions in ELF section belonging to this 413 * program, not taking into account subprogram instructions possible 414 * appended later during relocation 415 */ 416 size_t sec_insn_cnt; 417 /* Offset (in number of instructions) of the start of instruction 418 * belonging to this BPF program within its containing main BPF 419 * program. For the entry-point (main) BPF program, this is always 420 * zero. For a sub-program, this gets reset before each of main BPF 421 * programs are processed and relocated and is used to determined 422 * whether sub-program was already appended to the main program, and 423 * if yes, at which instruction offset. 424 */ 425 size_t sub_insn_off; 426 427 /* instructions that belong to BPF program; insns[0] is located at 428 * sec_insn_off instruction within its ELF section in ELF file, so 429 * when mapping ELF file instruction index to the local instruction, 430 * one needs to subtract sec_insn_off; and vice versa. 431 */ 432 struct bpf_insn *insns; 433 /* actual number of instruction in this BPF program's image; for 434 * entry-point BPF programs this includes the size of main program 435 * itself plus all the used sub-programs, appended at the end 436 */ 437 size_t insns_cnt; 438 439 struct reloc_desc *reloc_desc; 440 int nr_reloc; 441 442 /* BPF verifier log settings */ 443 char *log_buf; 444 size_t log_size; 445 __u32 log_level; 446 447 struct bpf_object *obj; 448 449 int fd; 450 bool autoload; 451 bool autoattach; 452 bool sym_global; 453 bool mark_btf_static; 454 enum bpf_prog_type type; 455 enum bpf_attach_type expected_attach_type; 456 int exception_cb_idx; 457 458 int prog_ifindex; 459 __u32 attach_btf_obj_fd; 460 __u32 attach_btf_id; 461 __u32 attach_prog_fd; 462 463 void *func_info; 464 __u32 func_info_rec_size; 465 __u32 func_info_cnt; 466 467 void *line_info; 468 __u32 line_info_rec_size; 469 __u32 line_info_cnt; 470 __u32 prog_flags; 471 }; 472 473 struct bpf_struct_ops { 474 const char *tname; 475 const struct btf_type *type; 476 struct bpf_program **progs; 477 __u32 *kern_func_off; 478 /* e.g. struct tcp_congestion_ops in bpf_prog's btf format */ 479 void *data; 480 /* e.g. struct bpf_struct_ops_tcp_congestion_ops in 481 * btf_vmlinux's format. 482 * struct bpf_struct_ops_tcp_congestion_ops { 483 * [... some other kernel fields ...] 484 * struct tcp_congestion_ops data; 485 * } 486 * kern_vdata-size == sizeof(struct bpf_struct_ops_tcp_congestion_ops) 487 * bpf_map__init_kern_struct_ops() will populate the "kern_vdata" 488 * from "data". 489 */ 490 void *kern_vdata; 491 __u32 type_id; 492 }; 493 494 #define DATA_SEC ".data" 495 #define BSS_SEC ".bss" 496 #define RODATA_SEC ".rodata" 497 #define KCONFIG_SEC ".kconfig" 498 #define KSYMS_SEC ".ksyms" 499 #define STRUCT_OPS_SEC ".struct_ops" 500 #define STRUCT_OPS_LINK_SEC ".struct_ops.link" 501 #define ARENA_SEC ".addr_space.1" 502 503 enum libbpf_map_type { 504 LIBBPF_MAP_UNSPEC, 505 LIBBPF_MAP_DATA, 506 LIBBPF_MAP_BSS, 507 LIBBPF_MAP_RODATA, 508 LIBBPF_MAP_KCONFIG, 509 }; 510 511 struct bpf_map_def { 512 unsigned int type; 513 unsigned int key_size; 514 unsigned int value_size; 515 unsigned int max_entries; 516 unsigned int map_flags; 517 }; 518 519 struct bpf_map { 520 struct bpf_object *obj; 521 char *name; 522 /* real_name is defined for special internal maps (.rodata*, 523 * .data*, .bss, .kconfig) and preserves their original ELF section 524 * name. This is important to be able to find corresponding BTF 525 * DATASEC information. 526 */ 527 char *real_name; 528 int fd; 529 int sec_idx; 530 size_t sec_offset; 531 int map_ifindex; 532 int inner_map_fd; 533 struct bpf_map_def def; 534 __u32 numa_node; 535 __u32 btf_var_idx; 536 int mod_btf_fd; 537 __u32 btf_key_type_id; 538 __u32 btf_value_type_id; 539 __u32 btf_vmlinux_value_type_id; 540 enum libbpf_map_type libbpf_type; 541 void *mmaped; 542 struct bpf_struct_ops *st_ops; 543 struct bpf_map *inner_map; 544 void **init_slots; 545 int init_slots_sz; 546 char *pin_path; 547 bool pinned; 548 bool reused; 549 bool autocreate; 550 __u64 map_extra; 551 }; 552 553 enum extern_type { 554 EXT_UNKNOWN, 555 EXT_KCFG, 556 EXT_KSYM, 557 }; 558 559 enum kcfg_type { 560 KCFG_UNKNOWN, 561 KCFG_CHAR, 562 KCFG_BOOL, 563 KCFG_INT, 564 KCFG_TRISTATE, 565 KCFG_CHAR_ARR, 566 }; 567 568 struct extern_desc { 569 enum extern_type type; 570 int sym_idx; 571 int btf_id; 572 int sec_btf_id; 573 const char *name; 574 char *essent_name; 575 bool is_set; 576 bool is_weak; 577 union { 578 struct { 579 enum kcfg_type type; 580 int sz; 581 int align; 582 int data_off; 583 bool is_signed; 584 } kcfg; 585 struct { 586 unsigned long long addr; 587 588 /* target btf_id of the corresponding kernel var. */ 589 int kernel_btf_obj_fd; 590 int kernel_btf_id; 591 592 /* local btf_id of the ksym extern's type. */ 593 __u32 type_id; 594 /* BTF fd index to be patched in for insn->off, this is 595 * 0 for vmlinux BTF, index in obj->fd_array for module 596 * BTF 597 */ 598 __s16 btf_fd_idx; 599 } ksym; 600 }; 601 }; 602 603 struct module_btf { 604 struct btf *btf; 605 char *name; 606 __u32 id; 607 int fd; 608 int fd_array_idx; 609 }; 610 611 enum sec_type { 612 SEC_UNUSED = 0, 613 SEC_RELO, 614 SEC_BSS, 615 SEC_DATA, 616 SEC_RODATA, 617 SEC_ST_OPS, 618 }; 619 620 struct elf_sec_desc { 621 enum sec_type sec_type; 622 Elf64_Shdr *shdr; 623 Elf_Data *data; 624 }; 625 626 struct elf_state { 627 int fd; 628 const void *obj_buf; 629 size_t obj_buf_sz; 630 Elf *elf; 631 Elf64_Ehdr *ehdr; 632 Elf_Data *symbols; 633 Elf_Data *arena_data; 634 size_t shstrndx; /* section index for section name strings */ 635 size_t strtabidx; 636 struct elf_sec_desc *secs; 637 size_t sec_cnt; 638 int btf_maps_shndx; 639 __u32 btf_maps_sec_btf_id; 640 int text_shndx; 641 int symbols_shndx; 642 bool has_st_ops; 643 int arena_data_shndx; 644 }; 645 646 struct usdt_manager; 647 648 struct bpf_object { 649 char name[BPF_OBJ_NAME_LEN]; 650 char license[64]; 651 __u32 kern_version; 652 653 struct bpf_program *programs; 654 size_t nr_programs; 655 struct bpf_map *maps; 656 size_t nr_maps; 657 size_t maps_cap; 658 659 char *kconfig; 660 struct extern_desc *externs; 661 int nr_extern; 662 int kconfig_map_idx; 663 664 bool loaded; 665 bool has_subcalls; 666 bool has_rodata; 667 668 struct bpf_gen *gen_loader; 669 670 /* Information when doing ELF related work. Only valid if efile.elf is not NULL */ 671 struct elf_state efile; 672 673 struct btf *btf; 674 struct btf_ext *btf_ext; 675 676 /* Parse and load BTF vmlinux if any of the programs in the object need 677 * it at load time. 678 */ 679 struct btf *btf_vmlinux; 680 /* Path to the custom BTF to be used for BPF CO-RE relocations as an 681 * override for vmlinux BTF. 682 */ 683 char *btf_custom_path; 684 /* vmlinux BTF override for CO-RE relocations */ 685 struct btf *btf_vmlinux_override; 686 /* Lazily initialized kernel module BTFs */ 687 struct module_btf *btf_modules; 688 bool btf_modules_loaded; 689 size_t btf_module_cnt; 690 size_t btf_module_cap; 691 692 /* optional log settings passed to BPF_BTF_LOAD and BPF_PROG_LOAD commands */ 693 char *log_buf; 694 size_t log_size; 695 __u32 log_level; 696 697 int *fd_array; 698 size_t fd_array_cap; 699 size_t fd_array_cnt; 700 701 struct usdt_manager *usdt_man; 702 703 struct bpf_map *arena_map; 704 void *arena_data; 705 size_t arena_data_sz; 706 707 struct kern_feature_cache *feat_cache; 708 char *token_path; 709 int token_fd; 710 711 char path[]; 712 }; 713 714 static const char *elf_sym_str(const struct bpf_object *obj, size_t off); 715 static const char *elf_sec_str(const struct bpf_object *obj, size_t off); 716 static Elf_Scn *elf_sec_by_idx(const struct bpf_object *obj, size_t idx); 717 static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name); 718 static Elf64_Shdr *elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn); 719 static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn); 720 static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn); 721 static Elf64_Sym *elf_sym_by_idx(const struct bpf_object *obj, size_t idx); 722 static Elf64_Rel *elf_rel_by_idx(Elf_Data *data, size_t idx); 723 724 void bpf_program__unload(struct bpf_program *prog) 725 { 726 if (!prog) 727 return; 728 729 zclose(prog->fd); 730 731 zfree(&prog->func_info); 732 zfree(&prog->line_info); 733 } 734 735 static void bpf_program__exit(struct bpf_program *prog) 736 { 737 if (!prog) 738 return; 739 740 bpf_program__unload(prog); 741 zfree(&prog->name); 742 zfree(&prog->sec_name); 743 zfree(&prog->insns); 744 zfree(&prog->reloc_desc); 745 746 prog->nr_reloc = 0; 747 prog->insns_cnt = 0; 748 prog->sec_idx = -1; 749 } 750 751 static bool insn_is_subprog_call(const struct bpf_insn *insn) 752 { 753 return BPF_CLASS(insn->code) == BPF_JMP && 754 BPF_OP(insn->code) == BPF_CALL && 755 BPF_SRC(insn->code) == BPF_K && 756 insn->src_reg == BPF_PSEUDO_CALL && 757 insn->dst_reg == 0 && 758 insn->off == 0; 759 } 760 761 static bool is_call_insn(const struct bpf_insn *insn) 762 { 763 return insn->code == (BPF_JMP | BPF_CALL); 764 } 765 766 static bool insn_is_pseudo_func(struct bpf_insn *insn) 767 { 768 return is_ldimm64_insn(insn) && insn->src_reg == BPF_PSEUDO_FUNC; 769 } 770 771 static int 772 bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog, 773 const char *name, size_t sec_idx, const char *sec_name, 774 size_t sec_off, void *insn_data, size_t insn_data_sz) 775 { 776 if (insn_data_sz == 0 || insn_data_sz % BPF_INSN_SZ || sec_off % BPF_INSN_SZ) { 777 pr_warn("sec '%s': corrupted program '%s', offset %zu, size %zu\n", 778 sec_name, name, sec_off, insn_data_sz); 779 return -EINVAL; 780 } 781 782 memset(prog, 0, sizeof(*prog)); 783 prog->obj = obj; 784 785 prog->sec_idx = sec_idx; 786 prog->sec_insn_off = sec_off / BPF_INSN_SZ; 787 prog->sec_insn_cnt = insn_data_sz / BPF_INSN_SZ; 788 /* insns_cnt can later be increased by appending used subprograms */ 789 prog->insns_cnt = prog->sec_insn_cnt; 790 791 prog->type = BPF_PROG_TYPE_UNSPEC; 792 prog->fd = -1; 793 prog->exception_cb_idx = -1; 794 795 /* libbpf's convention for SEC("?abc...") is that it's just like 796 * SEC("abc...") but the corresponding bpf_program starts out with 797 * autoload set to false. 798 */ 799 if (sec_name[0] == '?') { 800 prog->autoload = false; 801 /* from now on forget there was ? in section name */ 802 sec_name++; 803 } else { 804 prog->autoload = true; 805 } 806 807 prog->autoattach = true; 808 809 /* inherit object's log_level */ 810 prog->log_level = obj->log_level; 811 812 prog->sec_name = strdup(sec_name); 813 if (!prog->sec_name) 814 goto errout; 815 816 prog->name = strdup(name); 817 if (!prog->name) 818 goto errout; 819 820 prog->insns = malloc(insn_data_sz); 821 if (!prog->insns) 822 goto errout; 823 memcpy(prog->insns, insn_data, insn_data_sz); 824 825 return 0; 826 errout: 827 pr_warn("sec '%s': failed to allocate memory for prog '%s'\n", sec_name, name); 828 bpf_program__exit(prog); 829 return -ENOMEM; 830 } 831 832 static int 833 bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data, 834 const char *sec_name, int sec_idx) 835 { 836 Elf_Data *symbols = obj->efile.symbols; 837 struct bpf_program *prog, *progs; 838 void *data = sec_data->d_buf; 839 size_t sec_sz = sec_data->d_size, sec_off, prog_sz, nr_syms; 840 int nr_progs, err, i; 841 const char *name; 842 Elf64_Sym *sym; 843 844 progs = obj->programs; 845 nr_progs = obj->nr_programs; 846 nr_syms = symbols->d_size / sizeof(Elf64_Sym); 847 848 for (i = 0; i < nr_syms; i++) { 849 sym = elf_sym_by_idx(obj, i); 850 851 if (sym->st_shndx != sec_idx) 852 continue; 853 if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC) 854 continue; 855 856 prog_sz = sym->st_size; 857 sec_off = sym->st_value; 858 859 name = elf_sym_str(obj, sym->st_name); 860 if (!name) { 861 pr_warn("sec '%s': failed to get symbol name for offset %zu\n", 862 sec_name, sec_off); 863 return -LIBBPF_ERRNO__FORMAT; 864 } 865 866 if (sec_off + prog_sz > sec_sz) { 867 pr_warn("sec '%s': program at offset %zu crosses section boundary\n", 868 sec_name, sec_off); 869 return -LIBBPF_ERRNO__FORMAT; 870 } 871 872 if (sec_idx != obj->efile.text_shndx && ELF64_ST_BIND(sym->st_info) == STB_LOCAL) { 873 pr_warn("sec '%s': program '%s' is static and not supported\n", sec_name, name); 874 return -ENOTSUP; 875 } 876 877 pr_debug("sec '%s': found program '%s' at insn offset %zu (%zu bytes), code size %zu insns (%zu bytes)\n", 878 sec_name, name, sec_off / BPF_INSN_SZ, sec_off, prog_sz / BPF_INSN_SZ, prog_sz); 879 880 progs = libbpf_reallocarray(progs, nr_progs + 1, sizeof(*progs)); 881 if (!progs) { 882 /* 883 * In this case the original obj->programs 884 * is still valid, so don't need special treat for 885 * bpf_close_object(). 886 */ 887 pr_warn("sec '%s': failed to alloc memory for new program '%s'\n", 888 sec_name, name); 889 return -ENOMEM; 890 } 891 obj->programs = progs; 892 893 prog = &progs[nr_progs]; 894 895 err = bpf_object__init_prog(obj, prog, name, sec_idx, sec_name, 896 sec_off, data + sec_off, prog_sz); 897 if (err) 898 return err; 899 900 if (ELF64_ST_BIND(sym->st_info) != STB_LOCAL) 901 prog->sym_global = true; 902 903 /* if function is a global/weak symbol, but has restricted 904 * (STV_HIDDEN or STV_INTERNAL) visibility, mark its BTF FUNC 905 * as static to enable more permissive BPF verification mode 906 * with more outside context available to BPF verifier 907 */ 908 if (prog->sym_global && (ELF64_ST_VISIBILITY(sym->st_other) == STV_HIDDEN 909 || ELF64_ST_VISIBILITY(sym->st_other) == STV_INTERNAL)) 910 prog->mark_btf_static = true; 911 912 nr_progs++; 913 obj->nr_programs = nr_progs; 914 } 915 916 return 0; 917 } 918 919 static const struct btf_member * 920 find_member_by_offset(const struct btf_type *t, __u32 bit_offset) 921 { 922 struct btf_member *m; 923 int i; 924 925 for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) { 926 if (btf_member_bit_offset(t, i) == bit_offset) 927 return m; 928 } 929 930 return NULL; 931 } 932 933 static const struct btf_member * 934 find_member_by_name(const struct btf *btf, const struct btf_type *t, 935 const char *name) 936 { 937 struct btf_member *m; 938 int i; 939 940 for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) { 941 if (!strcmp(btf__name_by_offset(btf, m->name_off), name)) 942 return m; 943 } 944 945 return NULL; 946 } 947 948 static int find_ksym_btf_id(struct bpf_object *obj, const char *ksym_name, 949 __u16 kind, struct btf **res_btf, 950 struct module_btf **res_mod_btf); 951 952 #define STRUCT_OPS_VALUE_PREFIX "bpf_struct_ops_" 953 static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix, 954 const char *name, __u32 kind); 955 956 static int 957 find_struct_ops_kern_types(struct bpf_object *obj, const char *tname_raw, 958 struct module_btf **mod_btf, 959 const struct btf_type **type, __u32 *type_id, 960 const struct btf_type **vtype, __u32 *vtype_id, 961 const struct btf_member **data_member) 962 { 963 const struct btf_type *kern_type, *kern_vtype; 964 const struct btf_member *kern_data_member; 965 struct btf *btf; 966 __s32 kern_vtype_id, kern_type_id; 967 char tname[256]; 968 __u32 i; 969 970 snprintf(tname, sizeof(tname), "%.*s", 971 (int)bpf_core_essential_name_len(tname_raw), tname_raw); 972 973 kern_type_id = find_ksym_btf_id(obj, tname, BTF_KIND_STRUCT, 974 &btf, mod_btf); 975 if (kern_type_id < 0) { 976 pr_warn("struct_ops init_kern: struct %s is not found in kernel BTF\n", 977 tname); 978 return kern_type_id; 979 } 980 kern_type = btf__type_by_id(btf, kern_type_id); 981 982 /* Find the corresponding "map_value" type that will be used 983 * in map_update(BPF_MAP_TYPE_STRUCT_OPS). For example, 984 * find "struct bpf_struct_ops_tcp_congestion_ops" from the 985 * btf_vmlinux. 986 */ 987 kern_vtype_id = find_btf_by_prefix_kind(btf, STRUCT_OPS_VALUE_PREFIX, 988 tname, BTF_KIND_STRUCT); 989 if (kern_vtype_id < 0) { 990 pr_warn("struct_ops init_kern: struct %s%s is not found in kernel BTF\n", 991 STRUCT_OPS_VALUE_PREFIX, tname); 992 return kern_vtype_id; 993 } 994 kern_vtype = btf__type_by_id(btf, kern_vtype_id); 995 996 /* Find "struct tcp_congestion_ops" from 997 * struct bpf_struct_ops_tcp_congestion_ops { 998 * [ ... ] 999 * struct tcp_congestion_ops data; 1000 * } 1001 */ 1002 kern_data_member = btf_members(kern_vtype); 1003 for (i = 0; i < btf_vlen(kern_vtype); i++, kern_data_member++) { 1004 if (kern_data_member->type == kern_type_id) 1005 break; 1006 } 1007 if (i == btf_vlen(kern_vtype)) { 1008 pr_warn("struct_ops init_kern: struct %s data is not found in struct %s%s\n", 1009 tname, STRUCT_OPS_VALUE_PREFIX, tname); 1010 return -EINVAL; 1011 } 1012 1013 *type = kern_type; 1014 *type_id = kern_type_id; 1015 *vtype = kern_vtype; 1016 *vtype_id = kern_vtype_id; 1017 *data_member = kern_data_member; 1018 1019 return 0; 1020 } 1021 1022 static bool bpf_map__is_struct_ops(const struct bpf_map *map) 1023 { 1024 return map->def.type == BPF_MAP_TYPE_STRUCT_OPS; 1025 } 1026 1027 static bool is_valid_st_ops_program(struct bpf_object *obj, 1028 const struct bpf_program *prog) 1029 { 1030 int i; 1031 1032 for (i = 0; i < obj->nr_programs; i++) { 1033 if (&obj->programs[i] == prog) 1034 return prog->type == BPF_PROG_TYPE_STRUCT_OPS; 1035 } 1036 1037 return false; 1038 } 1039 1040 /* For each struct_ops program P, referenced from some struct_ops map M, 1041 * enable P.autoload if there are Ms for which M.autocreate is true, 1042 * disable P.autoload if for all Ms M.autocreate is false. 1043 * Don't change P.autoload for programs that are not referenced from any maps. 1044 */ 1045 static int bpf_object_adjust_struct_ops_autoload(struct bpf_object *obj) 1046 { 1047 struct bpf_program *prog, *slot_prog; 1048 struct bpf_map *map; 1049 int i, j, k, vlen; 1050 1051 for (i = 0; i < obj->nr_programs; ++i) { 1052 int should_load = false; 1053 int use_cnt = 0; 1054 1055 prog = &obj->programs[i]; 1056 if (prog->type != BPF_PROG_TYPE_STRUCT_OPS) 1057 continue; 1058 1059 for (j = 0; j < obj->nr_maps; ++j) { 1060 map = &obj->maps[j]; 1061 if (!bpf_map__is_struct_ops(map)) 1062 continue; 1063 1064 vlen = btf_vlen(map->st_ops->type); 1065 for (k = 0; k < vlen; ++k) { 1066 slot_prog = map->st_ops->progs[k]; 1067 if (prog != slot_prog) 1068 continue; 1069 1070 use_cnt++; 1071 if (map->autocreate) 1072 should_load = true; 1073 } 1074 } 1075 if (use_cnt) 1076 prog->autoload = should_load; 1077 } 1078 1079 return 0; 1080 } 1081 1082 /* Init the map's fields that depend on kern_btf */ 1083 static int bpf_map__init_kern_struct_ops(struct bpf_map *map) 1084 { 1085 const struct btf_member *member, *kern_member, *kern_data_member; 1086 const struct btf_type *type, *kern_type, *kern_vtype; 1087 __u32 i, kern_type_id, kern_vtype_id, kern_data_off; 1088 struct bpf_object *obj = map->obj; 1089 const struct btf *btf = obj->btf; 1090 struct bpf_struct_ops *st_ops; 1091 const struct btf *kern_btf; 1092 struct module_btf *mod_btf; 1093 void *data, *kern_data; 1094 const char *tname; 1095 int err; 1096 1097 st_ops = map->st_ops; 1098 type = st_ops->type; 1099 tname = st_ops->tname; 1100 err = find_struct_ops_kern_types(obj, tname, &mod_btf, 1101 &kern_type, &kern_type_id, 1102 &kern_vtype, &kern_vtype_id, 1103 &kern_data_member); 1104 if (err) 1105 return err; 1106 1107 kern_btf = mod_btf ? mod_btf->btf : obj->btf_vmlinux; 1108 1109 pr_debug("struct_ops init_kern %s: type_id:%u kern_type_id:%u kern_vtype_id:%u\n", 1110 map->name, st_ops->type_id, kern_type_id, kern_vtype_id); 1111 1112 map->mod_btf_fd = mod_btf ? mod_btf->fd : -1; 1113 map->def.value_size = kern_vtype->size; 1114 map->btf_vmlinux_value_type_id = kern_vtype_id; 1115 1116 st_ops->kern_vdata = calloc(1, kern_vtype->size); 1117 if (!st_ops->kern_vdata) 1118 return -ENOMEM; 1119 1120 data = st_ops->data; 1121 kern_data_off = kern_data_member->offset / 8; 1122 kern_data = st_ops->kern_vdata + kern_data_off; 1123 1124 member = btf_members(type); 1125 for (i = 0; i < btf_vlen(type); i++, member++) { 1126 const struct btf_type *mtype, *kern_mtype; 1127 __u32 mtype_id, kern_mtype_id; 1128 void *mdata, *kern_mdata; 1129 __s64 msize, kern_msize; 1130 __u32 moff, kern_moff; 1131 __u32 kern_member_idx; 1132 const char *mname; 1133 1134 mname = btf__name_by_offset(btf, member->name_off); 1135 moff = member->offset / 8; 1136 mdata = data + moff; 1137 msize = btf__resolve_size(btf, member->type); 1138 if (msize < 0) { 1139 pr_warn("struct_ops init_kern %s: failed to resolve the size of member %s\n", 1140 map->name, mname); 1141 return msize; 1142 } 1143 1144 kern_member = find_member_by_name(kern_btf, kern_type, mname); 1145 if (!kern_member) { 1146 /* Skip all zeros or null fields if they are not 1147 * presented in the kernel BTF. 1148 */ 1149 if (libbpf_is_mem_zeroed(mdata, msize)) { 1150 pr_info("struct_ops %s: member %s not found in kernel, skipping it as it's set to zero\n", 1151 map->name, mname); 1152 continue; 1153 } 1154 1155 pr_warn("struct_ops init_kern %s: Cannot find member %s in kernel BTF\n", 1156 map->name, mname); 1157 return -ENOTSUP; 1158 } 1159 1160 kern_member_idx = kern_member - btf_members(kern_type); 1161 if (btf_member_bitfield_size(type, i) || 1162 btf_member_bitfield_size(kern_type, kern_member_idx)) { 1163 pr_warn("struct_ops init_kern %s: bitfield %s is not supported\n", 1164 map->name, mname); 1165 return -ENOTSUP; 1166 } 1167 1168 kern_moff = kern_member->offset / 8; 1169 kern_mdata = kern_data + kern_moff; 1170 1171 mtype = skip_mods_and_typedefs(btf, member->type, &mtype_id); 1172 kern_mtype = skip_mods_and_typedefs(kern_btf, kern_member->type, 1173 &kern_mtype_id); 1174 if (BTF_INFO_KIND(mtype->info) != 1175 BTF_INFO_KIND(kern_mtype->info)) { 1176 pr_warn("struct_ops init_kern %s: Unmatched member type %s %u != %u(kernel)\n", 1177 map->name, mname, BTF_INFO_KIND(mtype->info), 1178 BTF_INFO_KIND(kern_mtype->info)); 1179 return -ENOTSUP; 1180 } 1181 1182 if (btf_is_ptr(mtype)) { 1183 struct bpf_program *prog; 1184 1185 /* Update the value from the shadow type */ 1186 prog = *(void **)mdata; 1187 st_ops->progs[i] = prog; 1188 if (!prog) 1189 continue; 1190 if (!is_valid_st_ops_program(obj, prog)) { 1191 pr_warn("struct_ops init_kern %s: member %s is not a struct_ops program\n", 1192 map->name, mname); 1193 return -ENOTSUP; 1194 } 1195 1196 kern_mtype = skip_mods_and_typedefs(kern_btf, 1197 kern_mtype->type, 1198 &kern_mtype_id); 1199 1200 /* mtype->type must be a func_proto which was 1201 * guaranteed in bpf_object__collect_st_ops_relos(), 1202 * so only check kern_mtype for func_proto here. 1203 */ 1204 if (!btf_is_func_proto(kern_mtype)) { 1205 pr_warn("struct_ops init_kern %s: kernel member %s is not a func ptr\n", 1206 map->name, mname); 1207 return -ENOTSUP; 1208 } 1209 1210 if (mod_btf) 1211 prog->attach_btf_obj_fd = mod_btf->fd; 1212 1213 /* if we haven't yet processed this BPF program, record proper 1214 * attach_btf_id and member_idx 1215 */ 1216 if (!prog->attach_btf_id) { 1217 prog->attach_btf_id = kern_type_id; 1218 prog->expected_attach_type = kern_member_idx; 1219 } 1220 1221 /* struct_ops BPF prog can be re-used between multiple 1222 * .struct_ops & .struct_ops.link as long as it's the 1223 * same struct_ops struct definition and the same 1224 * function pointer field 1225 */ 1226 if (prog->attach_btf_id != kern_type_id) { 1227 pr_warn("struct_ops init_kern %s func ptr %s: invalid reuse of prog %s in sec %s with type %u: attach_btf_id %u != kern_type_id %u\n", 1228 map->name, mname, prog->name, prog->sec_name, prog->type, 1229 prog->attach_btf_id, kern_type_id); 1230 return -EINVAL; 1231 } 1232 if (prog->expected_attach_type != kern_member_idx) { 1233 pr_warn("struct_ops init_kern %s func ptr %s: invalid reuse of prog %s in sec %s with type %u: expected_attach_type %u != kern_member_idx %u\n", 1234 map->name, mname, prog->name, prog->sec_name, prog->type, 1235 prog->expected_attach_type, kern_member_idx); 1236 return -EINVAL; 1237 } 1238 1239 st_ops->kern_func_off[i] = kern_data_off + kern_moff; 1240 1241 pr_debug("struct_ops init_kern %s: func ptr %s is set to prog %s from data(+%u) to kern_data(+%u)\n", 1242 map->name, mname, prog->name, moff, 1243 kern_moff); 1244 1245 continue; 1246 } 1247 1248 kern_msize = btf__resolve_size(kern_btf, kern_mtype_id); 1249 if (kern_msize < 0 || msize != kern_msize) { 1250 pr_warn("struct_ops init_kern %s: Error in size of member %s: %zd != %zd(kernel)\n", 1251 map->name, mname, (ssize_t)msize, 1252 (ssize_t)kern_msize); 1253 return -ENOTSUP; 1254 } 1255 1256 pr_debug("struct_ops init_kern %s: copy %s %u bytes from data(+%u) to kern_data(+%u)\n", 1257 map->name, mname, (unsigned int)msize, 1258 moff, kern_moff); 1259 memcpy(kern_mdata, mdata, msize); 1260 } 1261 1262 return 0; 1263 } 1264 1265 static int bpf_object__init_kern_struct_ops_maps(struct bpf_object *obj) 1266 { 1267 struct bpf_map *map; 1268 size_t i; 1269 int err; 1270 1271 for (i = 0; i < obj->nr_maps; i++) { 1272 map = &obj->maps[i]; 1273 1274 if (!bpf_map__is_struct_ops(map)) 1275 continue; 1276 1277 if (!map->autocreate) 1278 continue; 1279 1280 err = bpf_map__init_kern_struct_ops(map); 1281 if (err) 1282 return err; 1283 } 1284 1285 return 0; 1286 } 1287 1288 static int init_struct_ops_maps(struct bpf_object *obj, const char *sec_name, 1289 int shndx, Elf_Data *data) 1290 { 1291 const struct btf_type *type, *datasec; 1292 const struct btf_var_secinfo *vsi; 1293 struct bpf_struct_ops *st_ops; 1294 const char *tname, *var_name; 1295 __s32 type_id, datasec_id; 1296 const struct btf *btf; 1297 struct bpf_map *map; 1298 __u32 i; 1299 1300 if (shndx == -1) 1301 return 0; 1302 1303 btf = obj->btf; 1304 datasec_id = btf__find_by_name_kind(btf, sec_name, 1305 BTF_KIND_DATASEC); 1306 if (datasec_id < 0) { 1307 pr_warn("struct_ops init: DATASEC %s not found\n", 1308 sec_name); 1309 return -EINVAL; 1310 } 1311 1312 datasec = btf__type_by_id(btf, datasec_id); 1313 vsi = btf_var_secinfos(datasec); 1314 for (i = 0; i < btf_vlen(datasec); i++, vsi++) { 1315 type = btf__type_by_id(obj->btf, vsi->type); 1316 var_name = btf__name_by_offset(obj->btf, type->name_off); 1317 1318 type_id = btf__resolve_type(obj->btf, vsi->type); 1319 if (type_id < 0) { 1320 pr_warn("struct_ops init: Cannot resolve var type_id %u in DATASEC %s\n", 1321 vsi->type, sec_name); 1322 return -EINVAL; 1323 } 1324 1325 type = btf__type_by_id(obj->btf, type_id); 1326 tname = btf__name_by_offset(obj->btf, type->name_off); 1327 if (!tname[0]) { 1328 pr_warn("struct_ops init: anonymous type is not supported\n"); 1329 return -ENOTSUP; 1330 } 1331 if (!btf_is_struct(type)) { 1332 pr_warn("struct_ops init: %s is not a struct\n", tname); 1333 return -EINVAL; 1334 } 1335 1336 map = bpf_object__add_map(obj); 1337 if (IS_ERR(map)) 1338 return PTR_ERR(map); 1339 1340 map->sec_idx = shndx; 1341 map->sec_offset = vsi->offset; 1342 map->name = strdup(var_name); 1343 if (!map->name) 1344 return -ENOMEM; 1345 map->btf_value_type_id = type_id; 1346 1347 /* Follow same convention as for programs autoload: 1348 * SEC("?.struct_ops") means map is not created by default. 1349 */ 1350 if (sec_name[0] == '?') { 1351 map->autocreate = false; 1352 /* from now on forget there was ? in section name */ 1353 sec_name++; 1354 } 1355 1356 map->def.type = BPF_MAP_TYPE_STRUCT_OPS; 1357 map->def.key_size = sizeof(int); 1358 map->def.value_size = type->size; 1359 map->def.max_entries = 1; 1360 map->def.map_flags = strcmp(sec_name, STRUCT_OPS_LINK_SEC) == 0 ? BPF_F_LINK : 0; 1361 1362 map->st_ops = calloc(1, sizeof(*map->st_ops)); 1363 if (!map->st_ops) 1364 return -ENOMEM; 1365 st_ops = map->st_ops; 1366 st_ops->data = malloc(type->size); 1367 st_ops->progs = calloc(btf_vlen(type), sizeof(*st_ops->progs)); 1368 st_ops->kern_func_off = malloc(btf_vlen(type) * 1369 sizeof(*st_ops->kern_func_off)); 1370 if (!st_ops->data || !st_ops->progs || !st_ops->kern_func_off) 1371 return -ENOMEM; 1372 1373 if (vsi->offset + type->size > data->d_size) { 1374 pr_warn("struct_ops init: var %s is beyond the end of DATASEC %s\n", 1375 var_name, sec_name); 1376 return -EINVAL; 1377 } 1378 1379 memcpy(st_ops->data, 1380 data->d_buf + vsi->offset, 1381 type->size); 1382 st_ops->tname = tname; 1383 st_ops->type = type; 1384 st_ops->type_id = type_id; 1385 1386 pr_debug("struct_ops init: struct %s(type_id=%u) %s found at offset %u\n", 1387 tname, type_id, var_name, vsi->offset); 1388 } 1389 1390 return 0; 1391 } 1392 1393 static int bpf_object_init_struct_ops(struct bpf_object *obj) 1394 { 1395 const char *sec_name; 1396 int sec_idx, err; 1397 1398 for (sec_idx = 0; sec_idx < obj->efile.sec_cnt; ++sec_idx) { 1399 struct elf_sec_desc *desc = &obj->efile.secs[sec_idx]; 1400 1401 if (desc->sec_type != SEC_ST_OPS) 1402 continue; 1403 1404 sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx)); 1405 if (!sec_name) 1406 return -LIBBPF_ERRNO__FORMAT; 1407 1408 err = init_struct_ops_maps(obj, sec_name, sec_idx, desc->data); 1409 if (err) 1410 return err; 1411 } 1412 1413 return 0; 1414 } 1415 1416 static struct bpf_object *bpf_object__new(const char *path, 1417 const void *obj_buf, 1418 size_t obj_buf_sz, 1419 const char *obj_name) 1420 { 1421 struct bpf_object *obj; 1422 char *end; 1423 1424 obj = calloc(1, sizeof(struct bpf_object) + strlen(path) + 1); 1425 if (!obj) { 1426 pr_warn("alloc memory failed for %s\n", path); 1427 return ERR_PTR(-ENOMEM); 1428 } 1429 1430 strcpy(obj->path, path); 1431 if (obj_name) { 1432 libbpf_strlcpy(obj->name, obj_name, sizeof(obj->name)); 1433 } else { 1434 /* Using basename() GNU version which doesn't modify arg. */ 1435 libbpf_strlcpy(obj->name, basename((void *)path), sizeof(obj->name)); 1436 end = strchr(obj->name, '.'); 1437 if (end) 1438 *end = 0; 1439 } 1440 1441 obj->efile.fd = -1; 1442 /* 1443 * Caller of this function should also call 1444 * bpf_object__elf_finish() after data collection to return 1445 * obj_buf to user. If not, we should duplicate the buffer to 1446 * avoid user freeing them before elf finish. 1447 */ 1448 obj->efile.obj_buf = obj_buf; 1449 obj->efile.obj_buf_sz = obj_buf_sz; 1450 obj->efile.btf_maps_shndx = -1; 1451 obj->kconfig_map_idx = -1; 1452 1453 obj->kern_version = get_kernel_version(); 1454 obj->loaded = false; 1455 1456 return obj; 1457 } 1458 1459 static void bpf_object__elf_finish(struct bpf_object *obj) 1460 { 1461 if (!obj->efile.elf) 1462 return; 1463 1464 elf_end(obj->efile.elf); 1465 obj->efile.elf = NULL; 1466 obj->efile.symbols = NULL; 1467 obj->efile.arena_data = NULL; 1468 1469 zfree(&obj->efile.secs); 1470 obj->efile.sec_cnt = 0; 1471 zclose(obj->efile.fd); 1472 obj->efile.obj_buf = NULL; 1473 obj->efile.obj_buf_sz = 0; 1474 } 1475 1476 static int bpf_object__elf_init(struct bpf_object *obj) 1477 { 1478 Elf64_Ehdr *ehdr; 1479 int err = 0; 1480 Elf *elf; 1481 1482 if (obj->efile.elf) { 1483 pr_warn("elf: init internal error\n"); 1484 return -LIBBPF_ERRNO__LIBELF; 1485 } 1486 1487 if (obj->efile.obj_buf_sz > 0) { 1488 /* obj_buf should have been validated by bpf_object__open_mem(). */ 1489 elf = elf_memory((char *)obj->efile.obj_buf, obj->efile.obj_buf_sz); 1490 } else { 1491 obj->efile.fd = open(obj->path, O_RDONLY | O_CLOEXEC); 1492 if (obj->efile.fd < 0) { 1493 char errmsg[STRERR_BUFSIZE], *cp; 1494 1495 err = -errno; 1496 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); 1497 pr_warn("elf: failed to open %s: %s\n", obj->path, cp); 1498 return err; 1499 } 1500 1501 elf = elf_begin(obj->efile.fd, ELF_C_READ_MMAP, NULL); 1502 } 1503 1504 if (!elf) { 1505 pr_warn("elf: failed to open %s as ELF file: %s\n", obj->path, elf_errmsg(-1)); 1506 err = -LIBBPF_ERRNO__LIBELF; 1507 goto errout; 1508 } 1509 1510 obj->efile.elf = elf; 1511 1512 if (elf_kind(elf) != ELF_K_ELF) { 1513 err = -LIBBPF_ERRNO__FORMAT; 1514 pr_warn("elf: '%s' is not a proper ELF object\n", obj->path); 1515 goto errout; 1516 } 1517 1518 if (gelf_getclass(elf) != ELFCLASS64) { 1519 err = -LIBBPF_ERRNO__FORMAT; 1520 pr_warn("elf: '%s' is not a 64-bit ELF object\n", obj->path); 1521 goto errout; 1522 } 1523 1524 obj->efile.ehdr = ehdr = elf64_getehdr(elf); 1525 if (!obj->efile.ehdr) { 1526 pr_warn("elf: failed to get ELF header from %s: %s\n", obj->path, elf_errmsg(-1)); 1527 err = -LIBBPF_ERRNO__FORMAT; 1528 goto errout; 1529 } 1530 1531 if (elf_getshdrstrndx(elf, &obj->efile.shstrndx)) { 1532 pr_warn("elf: failed to get section names section index for %s: %s\n", 1533 obj->path, elf_errmsg(-1)); 1534 err = -LIBBPF_ERRNO__FORMAT; 1535 goto errout; 1536 } 1537 1538 /* ELF is corrupted/truncated, avoid calling elf_strptr. */ 1539 if (!elf_rawdata(elf_getscn(elf, obj->efile.shstrndx), NULL)) { 1540 pr_warn("elf: failed to get section names strings from %s: %s\n", 1541 obj->path, elf_errmsg(-1)); 1542 err = -LIBBPF_ERRNO__FORMAT; 1543 goto errout; 1544 } 1545 1546 /* Old LLVM set e_machine to EM_NONE */ 1547 if (ehdr->e_type != ET_REL || (ehdr->e_machine && ehdr->e_machine != EM_BPF)) { 1548 pr_warn("elf: %s is not a valid eBPF object file\n", obj->path); 1549 err = -LIBBPF_ERRNO__FORMAT; 1550 goto errout; 1551 } 1552 1553 return 0; 1554 errout: 1555 bpf_object__elf_finish(obj); 1556 return err; 1557 } 1558 1559 static int bpf_object__check_endianness(struct bpf_object *obj) 1560 { 1561 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 1562 if (obj->efile.ehdr->e_ident[EI_DATA] == ELFDATA2LSB) 1563 return 0; 1564 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ 1565 if (obj->efile.ehdr->e_ident[EI_DATA] == ELFDATA2MSB) 1566 return 0; 1567 #else 1568 # error "Unrecognized __BYTE_ORDER__" 1569 #endif 1570 pr_warn("elf: endianness mismatch in %s.\n", obj->path); 1571 return -LIBBPF_ERRNO__ENDIAN; 1572 } 1573 1574 static int 1575 bpf_object__init_license(struct bpf_object *obj, void *data, size_t size) 1576 { 1577 if (!data) { 1578 pr_warn("invalid license section in %s\n", obj->path); 1579 return -LIBBPF_ERRNO__FORMAT; 1580 } 1581 /* libbpf_strlcpy() only copies first N - 1 bytes, so size + 1 won't 1582 * go over allowed ELF data section buffer 1583 */ 1584 libbpf_strlcpy(obj->license, data, min(size + 1, sizeof(obj->license))); 1585 pr_debug("license of %s is %s\n", obj->path, obj->license); 1586 return 0; 1587 } 1588 1589 static int 1590 bpf_object__init_kversion(struct bpf_object *obj, void *data, size_t size) 1591 { 1592 __u32 kver; 1593 1594 if (!data || size != sizeof(kver)) { 1595 pr_warn("invalid kver section in %s\n", obj->path); 1596 return -LIBBPF_ERRNO__FORMAT; 1597 } 1598 memcpy(&kver, data, sizeof(kver)); 1599 obj->kern_version = kver; 1600 pr_debug("kernel version of %s is %x\n", obj->path, obj->kern_version); 1601 return 0; 1602 } 1603 1604 static bool bpf_map_type__is_map_in_map(enum bpf_map_type type) 1605 { 1606 if (type == BPF_MAP_TYPE_ARRAY_OF_MAPS || 1607 type == BPF_MAP_TYPE_HASH_OF_MAPS) 1608 return true; 1609 return false; 1610 } 1611 1612 static int find_elf_sec_sz(const struct bpf_object *obj, const char *name, __u32 *size) 1613 { 1614 Elf_Data *data; 1615 Elf_Scn *scn; 1616 1617 if (!name) 1618 return -EINVAL; 1619 1620 scn = elf_sec_by_name(obj, name); 1621 data = elf_sec_data(obj, scn); 1622 if (data) { 1623 *size = data->d_size; 1624 return 0; /* found it */ 1625 } 1626 1627 return -ENOENT; 1628 } 1629 1630 static Elf64_Sym *find_elf_var_sym(const struct bpf_object *obj, const char *name) 1631 { 1632 Elf_Data *symbols = obj->efile.symbols; 1633 const char *sname; 1634 size_t si; 1635 1636 for (si = 0; si < symbols->d_size / sizeof(Elf64_Sym); si++) { 1637 Elf64_Sym *sym = elf_sym_by_idx(obj, si); 1638 1639 if (ELF64_ST_TYPE(sym->st_info) != STT_OBJECT) 1640 continue; 1641 1642 if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL && 1643 ELF64_ST_BIND(sym->st_info) != STB_WEAK) 1644 continue; 1645 1646 sname = elf_sym_str(obj, sym->st_name); 1647 if (!sname) { 1648 pr_warn("failed to get sym name string for var %s\n", name); 1649 return ERR_PTR(-EIO); 1650 } 1651 if (strcmp(name, sname) == 0) 1652 return sym; 1653 } 1654 1655 return ERR_PTR(-ENOENT); 1656 } 1657 1658 /* Some versions of Android don't provide memfd_create() in their libc 1659 * implementation, so avoid complications and just go straight to Linux 1660 * syscall. 1661 */ 1662 static int sys_memfd_create(const char *name, unsigned flags) 1663 { 1664 return syscall(__NR_memfd_create, name, flags); 1665 } 1666 1667 #ifndef MFD_CLOEXEC 1668 #define MFD_CLOEXEC 0x0001U 1669 #endif 1670 1671 static int create_placeholder_fd(void) 1672 { 1673 int fd; 1674 1675 fd = ensure_good_fd(sys_memfd_create("libbpf-placeholder-fd", MFD_CLOEXEC)); 1676 if (fd < 0) 1677 return -errno; 1678 return fd; 1679 } 1680 1681 static struct bpf_map *bpf_object__add_map(struct bpf_object *obj) 1682 { 1683 struct bpf_map *map; 1684 int err; 1685 1686 err = libbpf_ensure_mem((void **)&obj->maps, &obj->maps_cap, 1687 sizeof(*obj->maps), obj->nr_maps + 1); 1688 if (err) 1689 return ERR_PTR(err); 1690 1691 map = &obj->maps[obj->nr_maps++]; 1692 map->obj = obj; 1693 /* Preallocate map FD without actually creating BPF map just yet. 1694 * These map FD "placeholders" will be reused later without changing 1695 * FD value when map is actually created in the kernel. 1696 * 1697 * This is useful to be able to perform BPF program relocations 1698 * without having to create BPF maps before that step. This allows us 1699 * to finalize and load BTF very late in BPF object's loading phase, 1700 * right before BPF maps have to be created and BPF programs have to 1701 * be loaded. By having these map FD placeholders we can perform all 1702 * the sanitizations, relocations, and any other adjustments before we 1703 * start creating actual BPF kernel objects (BTF, maps, progs). 1704 */ 1705 map->fd = create_placeholder_fd(); 1706 if (map->fd < 0) 1707 return ERR_PTR(map->fd); 1708 map->inner_map_fd = -1; 1709 map->autocreate = true; 1710 1711 return map; 1712 } 1713 1714 static size_t array_map_mmap_sz(unsigned int value_sz, unsigned int max_entries) 1715 { 1716 const long page_sz = sysconf(_SC_PAGE_SIZE); 1717 size_t map_sz; 1718 1719 map_sz = (size_t)roundup(value_sz, 8) * max_entries; 1720 map_sz = roundup(map_sz, page_sz); 1721 return map_sz; 1722 } 1723 1724 static size_t bpf_map_mmap_sz(const struct bpf_map *map) 1725 { 1726 const long page_sz = sysconf(_SC_PAGE_SIZE); 1727 1728 switch (map->def.type) { 1729 case BPF_MAP_TYPE_ARRAY: 1730 return array_map_mmap_sz(map->def.value_size, map->def.max_entries); 1731 case BPF_MAP_TYPE_ARENA: 1732 return page_sz * map->def.max_entries; 1733 default: 1734 return 0; /* not supported */ 1735 } 1736 } 1737 1738 static int bpf_map_mmap_resize(struct bpf_map *map, size_t old_sz, size_t new_sz) 1739 { 1740 void *mmaped; 1741 1742 if (!map->mmaped) 1743 return -EINVAL; 1744 1745 if (old_sz == new_sz) 1746 return 0; 1747 1748 mmaped = mmap(NULL, new_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); 1749 if (mmaped == MAP_FAILED) 1750 return -errno; 1751 1752 memcpy(mmaped, map->mmaped, min(old_sz, new_sz)); 1753 munmap(map->mmaped, old_sz); 1754 map->mmaped = mmaped; 1755 return 0; 1756 } 1757 1758 static char *internal_map_name(struct bpf_object *obj, const char *real_name) 1759 { 1760 char map_name[BPF_OBJ_NAME_LEN], *p; 1761 int pfx_len, sfx_len = max((size_t)7, strlen(real_name)); 1762 1763 /* This is one of the more confusing parts of libbpf for various 1764 * reasons, some of which are historical. The original idea for naming 1765 * internal names was to include as much of BPF object name prefix as 1766 * possible, so that it can be distinguished from similar internal 1767 * maps of a different BPF object. 1768 * As an example, let's say we have bpf_object named 'my_object_name' 1769 * and internal map corresponding to '.rodata' ELF section. The final 1770 * map name advertised to user and to the kernel will be 1771 * 'my_objec.rodata', taking first 8 characters of object name and 1772 * entire 7 characters of '.rodata'. 1773 * Somewhat confusingly, if internal map ELF section name is shorter 1774 * than 7 characters, e.g., '.bss', we still reserve 7 characters 1775 * for the suffix, even though we only have 4 actual characters, and 1776 * resulting map will be called 'my_objec.bss', not even using all 15 1777 * characters allowed by the kernel. Oh well, at least the truncated 1778 * object name is somewhat consistent in this case. But if the map 1779 * name is '.kconfig', we'll still have entirety of '.kconfig' added 1780 * (8 chars) and thus will be left with only first 7 characters of the 1781 * object name ('my_obje'). Happy guessing, user, that the final map 1782 * name will be "my_obje.kconfig". 1783 * Now, with libbpf starting to support arbitrarily named .rodata.* 1784 * and .data.* data sections, it's possible that ELF section name is 1785 * longer than allowed 15 chars, so we now need to be careful to take 1786 * only up to 15 first characters of ELF name, taking no BPF object 1787 * name characters at all. So '.rodata.abracadabra' will result in 1788 * '.rodata.abracad' kernel and user-visible name. 1789 * We need to keep this convoluted logic intact for .data, .bss and 1790 * .rodata maps, but for new custom .data.custom and .rodata.custom 1791 * maps we use their ELF names as is, not prepending bpf_object name 1792 * in front. We still need to truncate them to 15 characters for the 1793 * kernel. Full name can be recovered for such maps by using DATASEC 1794 * BTF type associated with such map's value type, though. 1795 */ 1796 if (sfx_len >= BPF_OBJ_NAME_LEN) 1797 sfx_len = BPF_OBJ_NAME_LEN - 1; 1798 1799 /* if there are two or more dots in map name, it's a custom dot map */ 1800 if (strchr(real_name + 1, '.') != NULL) 1801 pfx_len = 0; 1802 else 1803 pfx_len = min((size_t)BPF_OBJ_NAME_LEN - sfx_len - 1, strlen(obj->name)); 1804 1805 snprintf(map_name, sizeof(map_name), "%.*s%.*s", pfx_len, obj->name, 1806 sfx_len, real_name); 1807 1808 /* sanitise map name to characters allowed by kernel */ 1809 for (p = map_name; *p && p < map_name + sizeof(map_name); p++) 1810 if (!isalnum(*p) && *p != '_' && *p != '.') 1811 *p = '_'; 1812 1813 return strdup(map_name); 1814 } 1815 1816 static int 1817 map_fill_btf_type_info(struct bpf_object *obj, struct bpf_map *map); 1818 1819 /* Internal BPF map is mmap()'able only if at least one of corresponding 1820 * DATASEC's VARs are to be exposed through BPF skeleton. I.e., it's a GLOBAL 1821 * variable and it's not marked as __hidden (which turns it into, effectively, 1822 * a STATIC variable). 1823 */ 1824 static bool map_is_mmapable(struct bpf_object *obj, struct bpf_map *map) 1825 { 1826 const struct btf_type *t, *vt; 1827 struct btf_var_secinfo *vsi; 1828 int i, n; 1829 1830 if (!map->btf_value_type_id) 1831 return false; 1832 1833 t = btf__type_by_id(obj->btf, map->btf_value_type_id); 1834 if (!btf_is_datasec(t)) 1835 return false; 1836 1837 vsi = btf_var_secinfos(t); 1838 for (i = 0, n = btf_vlen(t); i < n; i++, vsi++) { 1839 vt = btf__type_by_id(obj->btf, vsi->type); 1840 if (!btf_is_var(vt)) 1841 continue; 1842 1843 if (btf_var(vt)->linkage != BTF_VAR_STATIC) 1844 return true; 1845 } 1846 1847 return false; 1848 } 1849 1850 static int 1851 bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, 1852 const char *real_name, int sec_idx, void *data, size_t data_sz) 1853 { 1854 struct bpf_map_def *def; 1855 struct bpf_map *map; 1856 size_t mmap_sz; 1857 int err; 1858 1859 map = bpf_object__add_map(obj); 1860 if (IS_ERR(map)) 1861 return PTR_ERR(map); 1862 1863 map->libbpf_type = type; 1864 map->sec_idx = sec_idx; 1865 map->sec_offset = 0; 1866 map->real_name = strdup(real_name); 1867 map->name = internal_map_name(obj, real_name); 1868 if (!map->real_name || !map->name) { 1869 zfree(&map->real_name); 1870 zfree(&map->name); 1871 return -ENOMEM; 1872 } 1873 1874 def = &map->def; 1875 def->type = BPF_MAP_TYPE_ARRAY; 1876 def->key_size = sizeof(int); 1877 def->value_size = data_sz; 1878 def->max_entries = 1; 1879 def->map_flags = type == LIBBPF_MAP_RODATA || type == LIBBPF_MAP_KCONFIG 1880 ? BPF_F_RDONLY_PROG : 0; 1881 1882 /* failures are fine because of maps like .rodata.str1.1 */ 1883 (void) map_fill_btf_type_info(obj, map); 1884 1885 if (map_is_mmapable(obj, map)) 1886 def->map_flags |= BPF_F_MMAPABLE; 1887 1888 pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n", 1889 map->name, map->sec_idx, map->sec_offset, def->map_flags); 1890 1891 mmap_sz = bpf_map_mmap_sz(map); 1892 map->mmaped = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE, 1893 MAP_SHARED | MAP_ANONYMOUS, -1, 0); 1894 if (map->mmaped == MAP_FAILED) { 1895 err = -errno; 1896 map->mmaped = NULL; 1897 pr_warn("failed to alloc map '%s' content buffer: %d\n", 1898 map->name, err); 1899 zfree(&map->real_name); 1900 zfree(&map->name); 1901 return err; 1902 } 1903 1904 if (data) 1905 memcpy(map->mmaped, data, data_sz); 1906 1907 pr_debug("map %td is \"%s\"\n", map - obj->maps, map->name); 1908 return 0; 1909 } 1910 1911 static int bpf_object__init_global_data_maps(struct bpf_object *obj) 1912 { 1913 struct elf_sec_desc *sec_desc; 1914 const char *sec_name; 1915 int err = 0, sec_idx; 1916 1917 /* 1918 * Populate obj->maps with libbpf internal maps. 1919 */ 1920 for (sec_idx = 1; sec_idx < obj->efile.sec_cnt; sec_idx++) { 1921 sec_desc = &obj->efile.secs[sec_idx]; 1922 1923 /* Skip recognized sections with size 0. */ 1924 if (!sec_desc->data || sec_desc->data->d_size == 0) 1925 continue; 1926 1927 switch (sec_desc->sec_type) { 1928 case SEC_DATA: 1929 sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx)); 1930 err = bpf_object__init_internal_map(obj, LIBBPF_MAP_DATA, 1931 sec_name, sec_idx, 1932 sec_desc->data->d_buf, 1933 sec_desc->data->d_size); 1934 break; 1935 case SEC_RODATA: 1936 obj->has_rodata = true; 1937 sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx)); 1938 err = bpf_object__init_internal_map(obj, LIBBPF_MAP_RODATA, 1939 sec_name, sec_idx, 1940 sec_desc->data->d_buf, 1941 sec_desc->data->d_size); 1942 break; 1943 case SEC_BSS: 1944 sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx)); 1945 err = bpf_object__init_internal_map(obj, LIBBPF_MAP_BSS, 1946 sec_name, sec_idx, 1947 NULL, 1948 sec_desc->data->d_size); 1949 break; 1950 default: 1951 /* skip */ 1952 break; 1953 } 1954 if (err) 1955 return err; 1956 } 1957 return 0; 1958 } 1959 1960 1961 static struct extern_desc *find_extern_by_name(const struct bpf_object *obj, 1962 const void *name) 1963 { 1964 int i; 1965 1966 for (i = 0; i < obj->nr_extern; i++) { 1967 if (strcmp(obj->externs[i].name, name) == 0) 1968 return &obj->externs[i]; 1969 } 1970 return NULL; 1971 } 1972 1973 static int set_kcfg_value_tri(struct extern_desc *ext, void *ext_val, 1974 char value) 1975 { 1976 switch (ext->kcfg.type) { 1977 case KCFG_BOOL: 1978 if (value == 'm') { 1979 pr_warn("extern (kcfg) '%s': value '%c' implies tristate or char type\n", 1980 ext->name, value); 1981 return -EINVAL; 1982 } 1983 *(bool *)ext_val = value == 'y' ? true : false; 1984 break; 1985 case KCFG_TRISTATE: 1986 if (value == 'y') 1987 *(enum libbpf_tristate *)ext_val = TRI_YES; 1988 else if (value == 'm') 1989 *(enum libbpf_tristate *)ext_val = TRI_MODULE; 1990 else /* value == 'n' */ 1991 *(enum libbpf_tristate *)ext_val = TRI_NO; 1992 break; 1993 case KCFG_CHAR: 1994 *(char *)ext_val = value; 1995 break; 1996 case KCFG_UNKNOWN: 1997 case KCFG_INT: 1998 case KCFG_CHAR_ARR: 1999 default: 2000 pr_warn("extern (kcfg) '%s': value '%c' implies bool, tristate, or char type\n", 2001 ext->name, value); 2002 return -EINVAL; 2003 } 2004 ext->is_set = true; 2005 return 0; 2006 } 2007 2008 static int set_kcfg_value_str(struct extern_desc *ext, char *ext_val, 2009 const char *value) 2010 { 2011 size_t len; 2012 2013 if (ext->kcfg.type != KCFG_CHAR_ARR) { 2014 pr_warn("extern (kcfg) '%s': value '%s' implies char array type\n", 2015 ext->name, value); 2016 return -EINVAL; 2017 } 2018 2019 len = strlen(value); 2020 if (value[len - 1] != '"') { 2021 pr_warn("extern (kcfg) '%s': invalid string config '%s'\n", 2022 ext->name, value); 2023 return -EINVAL; 2024 } 2025 2026 /* strip quotes */ 2027 len -= 2; 2028 if (len >= ext->kcfg.sz) { 2029 pr_warn("extern (kcfg) '%s': long string '%s' of (%zu bytes) truncated to %d bytes\n", 2030 ext->name, value, len, ext->kcfg.sz - 1); 2031 len = ext->kcfg.sz - 1; 2032 } 2033 memcpy(ext_val, value + 1, len); 2034 ext_val[len] = '\0'; 2035 ext->is_set = true; 2036 return 0; 2037 } 2038 2039 static int parse_u64(const char *value, __u64 *res) 2040 { 2041 char *value_end; 2042 int err; 2043 2044 errno = 0; 2045 *res = strtoull(value, &value_end, 0); 2046 if (errno) { 2047 err = -errno; 2048 pr_warn("failed to parse '%s' as integer: %d\n", value, err); 2049 return err; 2050 } 2051 if (*value_end) { 2052 pr_warn("failed to parse '%s' as integer completely\n", value); 2053 return -EINVAL; 2054 } 2055 return 0; 2056 } 2057 2058 static bool is_kcfg_value_in_range(const struct extern_desc *ext, __u64 v) 2059 { 2060 int bit_sz = ext->kcfg.sz * 8; 2061 2062 if (ext->kcfg.sz == 8) 2063 return true; 2064 2065 /* Validate that value stored in u64 fits in integer of `ext->sz` 2066 * bytes size without any loss of information. If the target integer 2067 * is signed, we rely on the following limits of integer type of 2068 * Y bits and subsequent transformation: 2069 * 2070 * -2^(Y-1) <= X <= 2^(Y-1) - 1 2071 * 0 <= X + 2^(Y-1) <= 2^Y - 1 2072 * 0 <= X + 2^(Y-1) < 2^Y 2073 * 2074 * For unsigned target integer, check that all the (64 - Y) bits are 2075 * zero. 2076 */ 2077 if (ext->kcfg.is_signed) 2078 return v + (1ULL << (bit_sz - 1)) < (1ULL << bit_sz); 2079 else 2080 return (v >> bit_sz) == 0; 2081 } 2082 2083 static int set_kcfg_value_num(struct extern_desc *ext, void *ext_val, 2084 __u64 value) 2085 { 2086 if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR && 2087 ext->kcfg.type != KCFG_BOOL) { 2088 pr_warn("extern (kcfg) '%s': value '%llu' implies integer, char, or boolean type\n", 2089 ext->name, (unsigned long long)value); 2090 return -EINVAL; 2091 } 2092 if (ext->kcfg.type == KCFG_BOOL && value > 1) { 2093 pr_warn("extern (kcfg) '%s': value '%llu' isn't boolean compatible\n", 2094 ext->name, (unsigned long long)value); 2095 return -EINVAL; 2096 2097 } 2098 if (!is_kcfg_value_in_range(ext, value)) { 2099 pr_warn("extern (kcfg) '%s': value '%llu' doesn't fit in %d bytes\n", 2100 ext->name, (unsigned long long)value, ext->kcfg.sz); 2101 return -ERANGE; 2102 } 2103 switch (ext->kcfg.sz) { 2104 case 1: 2105 *(__u8 *)ext_val = value; 2106 break; 2107 case 2: 2108 *(__u16 *)ext_val = value; 2109 break; 2110 case 4: 2111 *(__u32 *)ext_val = value; 2112 break; 2113 case 8: 2114 *(__u64 *)ext_val = value; 2115 break; 2116 default: 2117 return -EINVAL; 2118 } 2119 ext->is_set = true; 2120 return 0; 2121 } 2122 2123 static int bpf_object__process_kconfig_line(struct bpf_object *obj, 2124 char *buf, void *data) 2125 { 2126 struct extern_desc *ext; 2127 char *sep, *value; 2128 int len, err = 0; 2129 void *ext_val; 2130 __u64 num; 2131 2132 if (!str_has_pfx(buf, "CONFIG_")) 2133 return 0; 2134 2135 sep = strchr(buf, '='); 2136 if (!sep) { 2137 pr_warn("failed to parse '%s': no separator\n", buf); 2138 return -EINVAL; 2139 } 2140 2141 /* Trim ending '\n' */ 2142 len = strlen(buf); 2143 if (buf[len - 1] == '\n') 2144 buf[len - 1] = '\0'; 2145 /* Split on '=' and ensure that a value is present. */ 2146 *sep = '\0'; 2147 if (!sep[1]) { 2148 *sep = '='; 2149 pr_warn("failed to parse '%s': no value\n", buf); 2150 return -EINVAL; 2151 } 2152 2153 ext = find_extern_by_name(obj, buf); 2154 if (!ext || ext->is_set) 2155 return 0; 2156 2157 ext_val = data + ext->kcfg.data_off; 2158 value = sep + 1; 2159 2160 switch (*value) { 2161 case 'y': case 'n': case 'm': 2162 err = set_kcfg_value_tri(ext, ext_val, *value); 2163 break; 2164 case '"': 2165 err = set_kcfg_value_str(ext, ext_val, value); 2166 break; 2167 default: 2168 /* assume integer */ 2169 err = parse_u64(value, &num); 2170 if (err) { 2171 pr_warn("extern (kcfg) '%s': value '%s' isn't a valid integer\n", ext->name, value); 2172 return err; 2173 } 2174 if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR) { 2175 pr_warn("extern (kcfg) '%s': value '%s' implies integer type\n", ext->name, value); 2176 return -EINVAL; 2177 } 2178 err = set_kcfg_value_num(ext, ext_val, num); 2179 break; 2180 } 2181 if (err) 2182 return err; 2183 pr_debug("extern (kcfg) '%s': set to %s\n", ext->name, value); 2184 return 0; 2185 } 2186 2187 static int bpf_object__read_kconfig_file(struct bpf_object *obj, void *data) 2188 { 2189 char buf[PATH_MAX]; 2190 struct utsname uts; 2191 int len, err = 0; 2192 gzFile file; 2193 2194 uname(&uts); 2195 len = snprintf(buf, PATH_MAX, "/boot/config-%s", uts.release); 2196 if (len < 0) 2197 return -EINVAL; 2198 else if (len >= PATH_MAX) 2199 return -ENAMETOOLONG; 2200 2201 /* gzopen also accepts uncompressed files. */ 2202 file = gzopen(buf, "re"); 2203 if (!file) 2204 file = gzopen("/proc/config.gz", "re"); 2205 2206 if (!file) { 2207 pr_warn("failed to open system Kconfig\n"); 2208 return -ENOENT; 2209 } 2210 2211 while (gzgets(file, buf, sizeof(buf))) { 2212 err = bpf_object__process_kconfig_line(obj, buf, data); 2213 if (err) { 2214 pr_warn("error parsing system Kconfig line '%s': %d\n", 2215 buf, err); 2216 goto out; 2217 } 2218 } 2219 2220 out: 2221 gzclose(file); 2222 return err; 2223 } 2224 2225 static int bpf_object__read_kconfig_mem(struct bpf_object *obj, 2226 const char *config, void *data) 2227 { 2228 char buf[PATH_MAX]; 2229 int err = 0; 2230 FILE *file; 2231 2232 file = fmemopen((void *)config, strlen(config), "r"); 2233 if (!file) { 2234 err = -errno; 2235 pr_warn("failed to open in-memory Kconfig: %d\n", err); 2236 return err; 2237 } 2238 2239 while (fgets(buf, sizeof(buf), file)) { 2240 err = bpf_object__process_kconfig_line(obj, buf, data); 2241 if (err) { 2242 pr_warn("error parsing in-memory Kconfig line '%s': %d\n", 2243 buf, err); 2244 break; 2245 } 2246 } 2247 2248 fclose(file); 2249 return err; 2250 } 2251 2252 static int bpf_object__init_kconfig_map(struct bpf_object *obj) 2253 { 2254 struct extern_desc *last_ext = NULL, *ext; 2255 size_t map_sz; 2256 int i, err; 2257 2258 for (i = 0; i < obj->nr_extern; i++) { 2259 ext = &obj->externs[i]; 2260 if (ext->type == EXT_KCFG) 2261 last_ext = ext; 2262 } 2263 2264 if (!last_ext) 2265 return 0; 2266 2267 map_sz = last_ext->kcfg.data_off + last_ext->kcfg.sz; 2268 err = bpf_object__init_internal_map(obj, LIBBPF_MAP_KCONFIG, 2269 ".kconfig", obj->efile.symbols_shndx, 2270 NULL, map_sz); 2271 if (err) 2272 return err; 2273 2274 obj->kconfig_map_idx = obj->nr_maps - 1; 2275 2276 return 0; 2277 } 2278 2279 const struct btf_type * 2280 skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id) 2281 { 2282 const struct btf_type *t = btf__type_by_id(btf, id); 2283 2284 if (res_id) 2285 *res_id = id; 2286 2287 while (btf_is_mod(t) || btf_is_typedef(t)) { 2288 if (res_id) 2289 *res_id = t->type; 2290 t = btf__type_by_id(btf, t->type); 2291 } 2292 2293 return t; 2294 } 2295 2296 static const struct btf_type * 2297 resolve_func_ptr(const struct btf *btf, __u32 id, __u32 *res_id) 2298 { 2299 const struct btf_type *t; 2300 2301 t = skip_mods_and_typedefs(btf, id, NULL); 2302 if (!btf_is_ptr(t)) 2303 return NULL; 2304 2305 t = skip_mods_and_typedefs(btf, t->type, res_id); 2306 2307 return btf_is_func_proto(t) ? t : NULL; 2308 } 2309 2310 static const char *__btf_kind_str(__u16 kind) 2311 { 2312 switch (kind) { 2313 case BTF_KIND_UNKN: return "void"; 2314 case BTF_KIND_INT: return "int"; 2315 case BTF_KIND_PTR: return "ptr"; 2316 case BTF_KIND_ARRAY: return "array"; 2317 case BTF_KIND_STRUCT: return "struct"; 2318 case BTF_KIND_UNION: return "union"; 2319 case BTF_KIND_ENUM: return "enum"; 2320 case BTF_KIND_FWD: return "fwd"; 2321 case BTF_KIND_TYPEDEF: return "typedef"; 2322 case BTF_KIND_VOLATILE: return "volatile"; 2323 case BTF_KIND_CONST: return "const"; 2324 case BTF_KIND_RESTRICT: return "restrict"; 2325 case BTF_KIND_FUNC: return "func"; 2326 case BTF_KIND_FUNC_PROTO: return "func_proto"; 2327 case BTF_KIND_VAR: return "var"; 2328 case BTF_KIND_DATASEC: return "datasec"; 2329 case BTF_KIND_FLOAT: return "float"; 2330 case BTF_KIND_DECL_TAG: return "decl_tag"; 2331 case BTF_KIND_TYPE_TAG: return "type_tag"; 2332 case BTF_KIND_ENUM64: return "enum64"; 2333 default: return "unknown"; 2334 } 2335 } 2336 2337 const char *btf_kind_str(const struct btf_type *t) 2338 { 2339 return __btf_kind_str(btf_kind(t)); 2340 } 2341 2342 /* 2343 * Fetch integer attribute of BTF map definition. Such attributes are 2344 * represented using a pointer to an array, in which dimensionality of array 2345 * encodes specified integer value. E.g., int (*type)[BPF_MAP_TYPE_ARRAY]; 2346 * encodes `type => BPF_MAP_TYPE_ARRAY` key/value pair completely using BTF 2347 * type definition, while using only sizeof(void *) space in ELF data section. 2348 */ 2349 static bool get_map_field_int(const char *map_name, const struct btf *btf, 2350 const struct btf_member *m, __u32 *res) 2351 { 2352 const struct btf_type *t = skip_mods_and_typedefs(btf, m->type, NULL); 2353 const char *name = btf__name_by_offset(btf, m->name_off); 2354 const struct btf_array *arr_info; 2355 const struct btf_type *arr_t; 2356 2357 if (!btf_is_ptr(t)) { 2358 pr_warn("map '%s': attr '%s': expected PTR, got %s.\n", 2359 map_name, name, btf_kind_str(t)); 2360 return false; 2361 } 2362 2363 arr_t = btf__type_by_id(btf, t->type); 2364 if (!arr_t) { 2365 pr_warn("map '%s': attr '%s': type [%u] not found.\n", 2366 map_name, name, t->type); 2367 return false; 2368 } 2369 if (!btf_is_array(arr_t)) { 2370 pr_warn("map '%s': attr '%s': expected ARRAY, got %s.\n", 2371 map_name, name, btf_kind_str(arr_t)); 2372 return false; 2373 } 2374 arr_info = btf_array(arr_t); 2375 *res = arr_info->nelems; 2376 return true; 2377 } 2378 2379 static bool get_map_field_long(const char *map_name, const struct btf *btf, 2380 const struct btf_member *m, __u64 *res) 2381 { 2382 const struct btf_type *t = skip_mods_and_typedefs(btf, m->type, NULL); 2383 const char *name = btf__name_by_offset(btf, m->name_off); 2384 2385 if (btf_is_ptr(t)) { 2386 __u32 res32; 2387 bool ret; 2388 2389 ret = get_map_field_int(map_name, btf, m, &res32); 2390 if (ret) 2391 *res = (__u64)res32; 2392 return ret; 2393 } 2394 2395 if (!btf_is_enum(t) && !btf_is_enum64(t)) { 2396 pr_warn("map '%s': attr '%s': expected ENUM or ENUM64, got %s.\n", 2397 map_name, name, btf_kind_str(t)); 2398 return false; 2399 } 2400 2401 if (btf_vlen(t) != 1) { 2402 pr_warn("map '%s': attr '%s': invalid __ulong\n", 2403 map_name, name); 2404 return false; 2405 } 2406 2407 if (btf_is_enum(t)) { 2408 const struct btf_enum *e = btf_enum(t); 2409 2410 *res = e->val; 2411 } else { 2412 const struct btf_enum64 *e = btf_enum64(t); 2413 2414 *res = btf_enum64_value(e); 2415 } 2416 return true; 2417 } 2418 2419 static int pathname_concat(char *buf, size_t buf_sz, const char *path, const char *name) 2420 { 2421 int len; 2422 2423 len = snprintf(buf, buf_sz, "%s/%s", path, name); 2424 if (len < 0) 2425 return -EINVAL; 2426 if (len >= buf_sz) 2427 return -ENAMETOOLONG; 2428 2429 return 0; 2430 } 2431 2432 static int build_map_pin_path(struct bpf_map *map, const char *path) 2433 { 2434 char buf[PATH_MAX]; 2435 int err; 2436 2437 if (!path) 2438 path = BPF_FS_DEFAULT_PATH; 2439 2440 err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map)); 2441 if (err) 2442 return err; 2443 2444 return bpf_map__set_pin_path(map, buf); 2445 } 2446 2447 /* should match definition in bpf_helpers.h */ 2448 enum libbpf_pin_type { 2449 LIBBPF_PIN_NONE, 2450 /* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */ 2451 LIBBPF_PIN_BY_NAME, 2452 }; 2453 2454 int parse_btf_map_def(const char *map_name, struct btf *btf, 2455 const struct btf_type *def_t, bool strict, 2456 struct btf_map_def *map_def, struct btf_map_def *inner_def) 2457 { 2458 const struct btf_type *t; 2459 const struct btf_member *m; 2460 bool is_inner = inner_def == NULL; 2461 int vlen, i; 2462 2463 vlen = btf_vlen(def_t); 2464 m = btf_members(def_t); 2465 for (i = 0; i < vlen; i++, m++) { 2466 const char *name = btf__name_by_offset(btf, m->name_off); 2467 2468 if (!name) { 2469 pr_warn("map '%s': invalid field #%d.\n", map_name, i); 2470 return -EINVAL; 2471 } 2472 if (strcmp(name, "type") == 0) { 2473 if (!get_map_field_int(map_name, btf, m, &map_def->map_type)) 2474 return -EINVAL; 2475 map_def->parts |= MAP_DEF_MAP_TYPE; 2476 } else if (strcmp(name, "max_entries") == 0) { 2477 if (!get_map_field_int(map_name, btf, m, &map_def->max_entries)) 2478 return -EINVAL; 2479 map_def->parts |= MAP_DEF_MAX_ENTRIES; 2480 } else if (strcmp(name, "map_flags") == 0) { 2481 if (!get_map_field_int(map_name, btf, m, &map_def->map_flags)) 2482 return -EINVAL; 2483 map_def->parts |= MAP_DEF_MAP_FLAGS; 2484 } else if (strcmp(name, "numa_node") == 0) { 2485 if (!get_map_field_int(map_name, btf, m, &map_def->numa_node)) 2486 return -EINVAL; 2487 map_def->parts |= MAP_DEF_NUMA_NODE; 2488 } else if (strcmp(name, "key_size") == 0) { 2489 __u32 sz; 2490 2491 if (!get_map_field_int(map_name, btf, m, &sz)) 2492 return -EINVAL; 2493 if (map_def->key_size && map_def->key_size != sz) { 2494 pr_warn("map '%s': conflicting key size %u != %u.\n", 2495 map_name, map_def->key_size, sz); 2496 return -EINVAL; 2497 } 2498 map_def->key_size = sz; 2499 map_def->parts |= MAP_DEF_KEY_SIZE; 2500 } else if (strcmp(name, "key") == 0) { 2501 __s64 sz; 2502 2503 t = btf__type_by_id(btf, m->type); 2504 if (!t) { 2505 pr_warn("map '%s': key type [%d] not found.\n", 2506 map_name, m->type); 2507 return -EINVAL; 2508 } 2509 if (!btf_is_ptr(t)) { 2510 pr_warn("map '%s': key spec is not PTR: %s.\n", 2511 map_name, btf_kind_str(t)); 2512 return -EINVAL; 2513 } 2514 sz = btf__resolve_size(btf, t->type); 2515 if (sz < 0) { 2516 pr_warn("map '%s': can't determine key size for type [%u]: %zd.\n", 2517 map_name, t->type, (ssize_t)sz); 2518 return sz; 2519 } 2520 if (map_def->key_size && map_def->key_size != sz) { 2521 pr_warn("map '%s': conflicting key size %u != %zd.\n", 2522 map_name, map_def->key_size, (ssize_t)sz); 2523 return -EINVAL; 2524 } 2525 map_def->key_size = sz; 2526 map_def->key_type_id = t->type; 2527 map_def->parts |= MAP_DEF_KEY_SIZE | MAP_DEF_KEY_TYPE; 2528 } else if (strcmp(name, "value_size") == 0) { 2529 __u32 sz; 2530 2531 if (!get_map_field_int(map_name, btf, m, &sz)) 2532 return -EINVAL; 2533 if (map_def->value_size && map_def->value_size != sz) { 2534 pr_warn("map '%s': conflicting value size %u != %u.\n", 2535 map_name, map_def->value_size, sz); 2536 return -EINVAL; 2537 } 2538 map_def->value_size = sz; 2539 map_def->parts |= MAP_DEF_VALUE_SIZE; 2540 } else if (strcmp(name, "value") == 0) { 2541 __s64 sz; 2542 2543 t = btf__type_by_id(btf, m->type); 2544 if (!t) { 2545 pr_warn("map '%s': value type [%d] not found.\n", 2546 map_name, m->type); 2547 return -EINVAL; 2548 } 2549 if (!btf_is_ptr(t)) { 2550 pr_warn("map '%s': value spec is not PTR: %s.\n", 2551 map_name, btf_kind_str(t)); 2552 return -EINVAL; 2553 } 2554 sz = btf__resolve_size(btf, t->type); 2555 if (sz < 0) { 2556 pr_warn("map '%s': can't determine value size for type [%u]: %zd.\n", 2557 map_name, t->type, (ssize_t)sz); 2558 return sz; 2559 } 2560 if (map_def->value_size && map_def->value_size != sz) { 2561 pr_warn("map '%s': conflicting value size %u != %zd.\n", 2562 map_name, map_def->value_size, (ssize_t)sz); 2563 return -EINVAL; 2564 } 2565 map_def->value_size = sz; 2566 map_def->value_type_id = t->type; 2567 map_def->parts |= MAP_DEF_VALUE_SIZE | MAP_DEF_VALUE_TYPE; 2568 } 2569 else if (strcmp(name, "values") == 0) { 2570 bool is_map_in_map = bpf_map_type__is_map_in_map(map_def->map_type); 2571 bool is_prog_array = map_def->map_type == BPF_MAP_TYPE_PROG_ARRAY; 2572 const char *desc = is_map_in_map ? "map-in-map inner" : "prog-array value"; 2573 char inner_map_name[128]; 2574 int err; 2575 2576 if (is_inner) { 2577 pr_warn("map '%s': multi-level inner maps not supported.\n", 2578 map_name); 2579 return -ENOTSUP; 2580 } 2581 if (i != vlen - 1) { 2582 pr_warn("map '%s': '%s' member should be last.\n", 2583 map_name, name); 2584 return -EINVAL; 2585 } 2586 if (!is_map_in_map && !is_prog_array) { 2587 pr_warn("map '%s': should be map-in-map or prog-array.\n", 2588 map_name); 2589 return -ENOTSUP; 2590 } 2591 if (map_def->value_size && map_def->value_size != 4) { 2592 pr_warn("map '%s': conflicting value size %u != 4.\n", 2593 map_name, map_def->value_size); 2594 return -EINVAL; 2595 } 2596 map_def->value_size = 4; 2597 t = btf__type_by_id(btf, m->type); 2598 if (!t) { 2599 pr_warn("map '%s': %s type [%d] not found.\n", 2600 map_name, desc, m->type); 2601 return -EINVAL; 2602 } 2603 if (!btf_is_array(t) || btf_array(t)->nelems) { 2604 pr_warn("map '%s': %s spec is not a zero-sized array.\n", 2605 map_name, desc); 2606 return -EINVAL; 2607 } 2608 t = skip_mods_and_typedefs(btf, btf_array(t)->type, NULL); 2609 if (!btf_is_ptr(t)) { 2610 pr_warn("map '%s': %s def is of unexpected kind %s.\n", 2611 map_name, desc, btf_kind_str(t)); 2612 return -EINVAL; 2613 } 2614 t = skip_mods_and_typedefs(btf, t->type, NULL); 2615 if (is_prog_array) { 2616 if (!btf_is_func_proto(t)) { 2617 pr_warn("map '%s': prog-array value def is of unexpected kind %s.\n", 2618 map_name, btf_kind_str(t)); 2619 return -EINVAL; 2620 } 2621 continue; 2622 } 2623 if (!btf_is_struct(t)) { 2624 pr_warn("map '%s': map-in-map inner def is of unexpected kind %s.\n", 2625 map_name, btf_kind_str(t)); 2626 return -EINVAL; 2627 } 2628 2629 snprintf(inner_map_name, sizeof(inner_map_name), "%s.inner", map_name); 2630 err = parse_btf_map_def(inner_map_name, btf, t, strict, inner_def, NULL); 2631 if (err) 2632 return err; 2633 2634 map_def->parts |= MAP_DEF_INNER_MAP; 2635 } else if (strcmp(name, "pinning") == 0) { 2636 __u32 val; 2637 2638 if (is_inner) { 2639 pr_warn("map '%s': inner def can't be pinned.\n", map_name); 2640 return -EINVAL; 2641 } 2642 if (!get_map_field_int(map_name, btf, m, &val)) 2643 return -EINVAL; 2644 if (val != LIBBPF_PIN_NONE && val != LIBBPF_PIN_BY_NAME) { 2645 pr_warn("map '%s': invalid pinning value %u.\n", 2646 map_name, val); 2647 return -EINVAL; 2648 } 2649 map_def->pinning = val; 2650 map_def->parts |= MAP_DEF_PINNING; 2651 } else if (strcmp(name, "map_extra") == 0) { 2652 __u64 map_extra; 2653 2654 if (!get_map_field_long(map_name, btf, m, &map_extra)) 2655 return -EINVAL; 2656 map_def->map_extra = map_extra; 2657 map_def->parts |= MAP_DEF_MAP_EXTRA; 2658 } else { 2659 if (strict) { 2660 pr_warn("map '%s': unknown field '%s'.\n", map_name, name); 2661 return -ENOTSUP; 2662 } 2663 pr_debug("map '%s': ignoring unknown field '%s'.\n", map_name, name); 2664 } 2665 } 2666 2667 if (map_def->map_type == BPF_MAP_TYPE_UNSPEC) { 2668 pr_warn("map '%s': map type isn't specified.\n", map_name); 2669 return -EINVAL; 2670 } 2671 2672 return 0; 2673 } 2674 2675 static size_t adjust_ringbuf_sz(size_t sz) 2676 { 2677 __u32 page_sz = sysconf(_SC_PAGE_SIZE); 2678 __u32 mul; 2679 2680 /* if user forgot to set any size, make sure they see error */ 2681 if (sz == 0) 2682 return 0; 2683 /* Kernel expects BPF_MAP_TYPE_RINGBUF's max_entries to be 2684 * a power-of-2 multiple of kernel's page size. If user diligently 2685 * satisified these conditions, pass the size through. 2686 */ 2687 if ((sz % page_sz) == 0 && is_pow_of_2(sz / page_sz)) 2688 return sz; 2689 2690 /* Otherwise find closest (page_sz * power_of_2) product bigger than 2691 * user-set size to satisfy both user size request and kernel 2692 * requirements and substitute correct max_entries for map creation. 2693 */ 2694 for (mul = 1; mul <= UINT_MAX / page_sz; mul <<= 1) { 2695 if (mul * page_sz > sz) 2696 return mul * page_sz; 2697 } 2698 2699 /* if it's impossible to satisfy the conditions (i.e., user size is 2700 * very close to UINT_MAX but is not a power-of-2 multiple of 2701 * page_size) then just return original size and let kernel reject it 2702 */ 2703 return sz; 2704 } 2705 2706 static bool map_is_ringbuf(const struct bpf_map *map) 2707 { 2708 return map->def.type == BPF_MAP_TYPE_RINGBUF || 2709 map->def.type == BPF_MAP_TYPE_USER_RINGBUF; 2710 } 2711 2712 static void fill_map_from_def(struct bpf_map *map, const struct btf_map_def *def) 2713 { 2714 map->def.type = def->map_type; 2715 map->def.key_size = def->key_size; 2716 map->def.value_size = def->value_size; 2717 map->def.max_entries = def->max_entries; 2718 map->def.map_flags = def->map_flags; 2719 map->map_extra = def->map_extra; 2720 2721 map->numa_node = def->numa_node; 2722 map->btf_key_type_id = def->key_type_id; 2723 map->btf_value_type_id = def->value_type_id; 2724 2725 /* auto-adjust BPF ringbuf map max_entries to be a multiple of page size */ 2726 if (map_is_ringbuf(map)) 2727 map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries); 2728 2729 if (def->parts & MAP_DEF_MAP_TYPE) 2730 pr_debug("map '%s': found type = %u.\n", map->name, def->map_type); 2731 2732 if (def->parts & MAP_DEF_KEY_TYPE) 2733 pr_debug("map '%s': found key [%u], sz = %u.\n", 2734 map->name, def->key_type_id, def->key_size); 2735 else if (def->parts & MAP_DEF_KEY_SIZE) 2736 pr_debug("map '%s': found key_size = %u.\n", map->name, def->key_size); 2737 2738 if (def->parts & MAP_DEF_VALUE_TYPE) 2739 pr_debug("map '%s': found value [%u], sz = %u.\n", 2740 map->name, def->value_type_id, def->value_size); 2741 else if (def->parts & MAP_DEF_VALUE_SIZE) 2742 pr_debug("map '%s': found value_size = %u.\n", map->name, def->value_size); 2743 2744 if (def->parts & MAP_DEF_MAX_ENTRIES) 2745 pr_debug("map '%s': found max_entries = %u.\n", map->name, def->max_entries); 2746 if (def->parts & MAP_DEF_MAP_FLAGS) 2747 pr_debug("map '%s': found map_flags = 0x%x.\n", map->name, def->map_flags); 2748 if (def->parts & MAP_DEF_MAP_EXTRA) 2749 pr_debug("map '%s': found map_extra = 0x%llx.\n", map->name, 2750 (unsigned long long)def->map_extra); 2751 if (def->parts & MAP_DEF_PINNING) 2752 pr_debug("map '%s': found pinning = %u.\n", map->name, def->pinning); 2753 if (def->parts & MAP_DEF_NUMA_NODE) 2754 pr_debug("map '%s': found numa_node = %u.\n", map->name, def->numa_node); 2755 2756 if (def->parts & MAP_DEF_INNER_MAP) 2757 pr_debug("map '%s': found inner map definition.\n", map->name); 2758 } 2759 2760 static const char *btf_var_linkage_str(__u32 linkage) 2761 { 2762 switch (linkage) { 2763 case BTF_VAR_STATIC: return "static"; 2764 case BTF_VAR_GLOBAL_ALLOCATED: return "global"; 2765 case BTF_VAR_GLOBAL_EXTERN: return "extern"; 2766 default: return "unknown"; 2767 } 2768 } 2769 2770 static int bpf_object__init_user_btf_map(struct bpf_object *obj, 2771 const struct btf_type *sec, 2772 int var_idx, int sec_idx, 2773 const Elf_Data *data, bool strict, 2774 const char *pin_root_path) 2775 { 2776 struct btf_map_def map_def = {}, inner_def = {}; 2777 const struct btf_type *var, *def; 2778 const struct btf_var_secinfo *vi; 2779 const struct btf_var *var_extra; 2780 const char *map_name; 2781 struct bpf_map *map; 2782 int err; 2783 2784 vi = btf_var_secinfos(sec) + var_idx; 2785 var = btf__type_by_id(obj->btf, vi->type); 2786 var_extra = btf_var(var); 2787 map_name = btf__name_by_offset(obj->btf, var->name_off); 2788 2789 if (map_name == NULL || map_name[0] == '\0') { 2790 pr_warn("map #%d: empty name.\n", var_idx); 2791 return -EINVAL; 2792 } 2793 if ((__u64)vi->offset + vi->size > data->d_size) { 2794 pr_warn("map '%s' BTF data is corrupted.\n", map_name); 2795 return -EINVAL; 2796 } 2797 if (!btf_is_var(var)) { 2798 pr_warn("map '%s': unexpected var kind %s.\n", 2799 map_name, btf_kind_str(var)); 2800 return -EINVAL; 2801 } 2802 if (var_extra->linkage != BTF_VAR_GLOBAL_ALLOCATED) { 2803 pr_warn("map '%s': unsupported map linkage %s.\n", 2804 map_name, btf_var_linkage_str(var_extra->linkage)); 2805 return -EOPNOTSUPP; 2806 } 2807 2808 def = skip_mods_and_typedefs(obj->btf, var->type, NULL); 2809 if (!btf_is_struct(def)) { 2810 pr_warn("map '%s': unexpected def kind %s.\n", 2811 map_name, btf_kind_str(var)); 2812 return -EINVAL; 2813 } 2814 if (def->size > vi->size) { 2815 pr_warn("map '%s': invalid def size.\n", map_name); 2816 return -EINVAL; 2817 } 2818 2819 map = bpf_object__add_map(obj); 2820 if (IS_ERR(map)) 2821 return PTR_ERR(map); 2822 map->name = strdup(map_name); 2823 if (!map->name) { 2824 pr_warn("map '%s': failed to alloc map name.\n", map_name); 2825 return -ENOMEM; 2826 } 2827 map->libbpf_type = LIBBPF_MAP_UNSPEC; 2828 map->def.type = BPF_MAP_TYPE_UNSPEC; 2829 map->sec_idx = sec_idx; 2830 map->sec_offset = vi->offset; 2831 map->btf_var_idx = var_idx; 2832 pr_debug("map '%s': at sec_idx %d, offset %zu.\n", 2833 map_name, map->sec_idx, map->sec_offset); 2834 2835 err = parse_btf_map_def(map->name, obj->btf, def, strict, &map_def, &inner_def); 2836 if (err) 2837 return err; 2838 2839 fill_map_from_def(map, &map_def); 2840 2841 if (map_def.pinning == LIBBPF_PIN_BY_NAME) { 2842 err = build_map_pin_path(map, pin_root_path); 2843 if (err) { 2844 pr_warn("map '%s': couldn't build pin path.\n", map->name); 2845 return err; 2846 } 2847 } 2848 2849 if (map_def.parts & MAP_DEF_INNER_MAP) { 2850 map->inner_map = calloc(1, sizeof(*map->inner_map)); 2851 if (!map->inner_map) 2852 return -ENOMEM; 2853 map->inner_map->fd = create_placeholder_fd(); 2854 if (map->inner_map->fd < 0) 2855 return map->inner_map->fd; 2856 map->inner_map->sec_idx = sec_idx; 2857 map->inner_map->name = malloc(strlen(map_name) + sizeof(".inner") + 1); 2858 if (!map->inner_map->name) 2859 return -ENOMEM; 2860 sprintf(map->inner_map->name, "%s.inner", map_name); 2861 2862 fill_map_from_def(map->inner_map, &inner_def); 2863 } 2864 2865 err = map_fill_btf_type_info(obj, map); 2866 if (err) 2867 return err; 2868 2869 return 0; 2870 } 2871 2872 static int init_arena_map_data(struct bpf_object *obj, struct bpf_map *map, 2873 const char *sec_name, int sec_idx, 2874 void *data, size_t data_sz) 2875 { 2876 const long page_sz = sysconf(_SC_PAGE_SIZE); 2877 size_t mmap_sz; 2878 2879 mmap_sz = bpf_map_mmap_sz(obj->arena_map); 2880 if (roundup(data_sz, page_sz) > mmap_sz) { 2881 pr_warn("elf: sec '%s': declared ARENA map size (%zu) is too small to hold global __arena variables of size %zu\n", 2882 sec_name, mmap_sz, data_sz); 2883 return -E2BIG; 2884 } 2885 2886 obj->arena_data = malloc(data_sz); 2887 if (!obj->arena_data) 2888 return -ENOMEM; 2889 memcpy(obj->arena_data, data, data_sz); 2890 obj->arena_data_sz = data_sz; 2891 2892 /* make bpf_map__init_value() work for ARENA maps */ 2893 map->mmaped = obj->arena_data; 2894 2895 return 0; 2896 } 2897 2898 static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict, 2899 const char *pin_root_path) 2900 { 2901 const struct btf_type *sec = NULL; 2902 int nr_types, i, vlen, err; 2903 const struct btf_type *t; 2904 const char *name; 2905 Elf_Data *data; 2906 Elf_Scn *scn; 2907 2908 if (obj->efile.btf_maps_shndx < 0) 2909 return 0; 2910 2911 scn = elf_sec_by_idx(obj, obj->efile.btf_maps_shndx); 2912 data = elf_sec_data(obj, scn); 2913 if (!scn || !data) { 2914 pr_warn("elf: failed to get %s map definitions for %s\n", 2915 MAPS_ELF_SEC, obj->path); 2916 return -EINVAL; 2917 } 2918 2919 nr_types = btf__type_cnt(obj->btf); 2920 for (i = 1; i < nr_types; i++) { 2921 t = btf__type_by_id(obj->btf, i); 2922 if (!btf_is_datasec(t)) 2923 continue; 2924 name = btf__name_by_offset(obj->btf, t->name_off); 2925 if (strcmp(name, MAPS_ELF_SEC) == 0) { 2926 sec = t; 2927 obj->efile.btf_maps_sec_btf_id = i; 2928 break; 2929 } 2930 } 2931 2932 if (!sec) { 2933 pr_warn("DATASEC '%s' not found.\n", MAPS_ELF_SEC); 2934 return -ENOENT; 2935 } 2936 2937 vlen = btf_vlen(sec); 2938 for (i = 0; i < vlen; i++) { 2939 err = bpf_object__init_user_btf_map(obj, sec, i, 2940 obj->efile.btf_maps_shndx, 2941 data, strict, 2942 pin_root_path); 2943 if (err) 2944 return err; 2945 } 2946 2947 for (i = 0; i < obj->nr_maps; i++) { 2948 struct bpf_map *map = &obj->maps[i]; 2949 2950 if (map->def.type != BPF_MAP_TYPE_ARENA) 2951 continue; 2952 2953 if (obj->arena_map) { 2954 pr_warn("map '%s': only single ARENA map is supported (map '%s' is also ARENA)\n", 2955 map->name, obj->arena_map->name); 2956 return -EINVAL; 2957 } 2958 obj->arena_map = map; 2959 2960 if (obj->efile.arena_data) { 2961 err = init_arena_map_data(obj, map, ARENA_SEC, obj->efile.arena_data_shndx, 2962 obj->efile.arena_data->d_buf, 2963 obj->efile.arena_data->d_size); 2964 if (err) 2965 return err; 2966 } 2967 } 2968 if (obj->efile.arena_data && !obj->arena_map) { 2969 pr_warn("elf: sec '%s': to use global __arena variables the ARENA map should be explicitly declared in SEC(\".maps\")\n", 2970 ARENA_SEC); 2971 return -ENOENT; 2972 } 2973 2974 return 0; 2975 } 2976 2977 static int bpf_object__init_maps(struct bpf_object *obj, 2978 const struct bpf_object_open_opts *opts) 2979 { 2980 const char *pin_root_path; 2981 bool strict; 2982 int err = 0; 2983 2984 strict = !OPTS_GET(opts, relaxed_maps, false); 2985 pin_root_path = OPTS_GET(opts, pin_root_path, NULL); 2986 2987 err = bpf_object__init_user_btf_maps(obj, strict, pin_root_path); 2988 err = err ?: bpf_object__init_global_data_maps(obj); 2989 err = err ?: bpf_object__init_kconfig_map(obj); 2990 err = err ?: bpf_object_init_struct_ops(obj); 2991 2992 return err; 2993 } 2994 2995 static bool section_have_execinstr(struct bpf_object *obj, int idx) 2996 { 2997 Elf64_Shdr *sh; 2998 2999 sh = elf_sec_hdr(obj, elf_sec_by_idx(obj, idx)); 3000 if (!sh) 3001 return false; 3002 3003 return sh->sh_flags & SHF_EXECINSTR; 3004 } 3005 3006 static bool starts_with_qmark(const char *s) 3007 { 3008 return s && s[0] == '?'; 3009 } 3010 3011 static bool btf_needs_sanitization(struct bpf_object *obj) 3012 { 3013 bool has_func_global = kernel_supports(obj, FEAT_BTF_GLOBAL_FUNC); 3014 bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC); 3015 bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT); 3016 bool has_func = kernel_supports(obj, FEAT_BTF_FUNC); 3017 bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG); 3018 bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG); 3019 bool has_enum64 = kernel_supports(obj, FEAT_BTF_ENUM64); 3020 bool has_qmark_datasec = kernel_supports(obj, FEAT_BTF_QMARK_DATASEC); 3021 3022 return !has_func || !has_datasec || !has_func_global || !has_float || 3023 !has_decl_tag || !has_type_tag || !has_enum64 || !has_qmark_datasec; 3024 } 3025 3026 static int bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf) 3027 { 3028 bool has_func_global = kernel_supports(obj, FEAT_BTF_GLOBAL_FUNC); 3029 bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC); 3030 bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT); 3031 bool has_func = kernel_supports(obj, FEAT_BTF_FUNC); 3032 bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG); 3033 bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG); 3034 bool has_enum64 = kernel_supports(obj, FEAT_BTF_ENUM64); 3035 bool has_qmark_datasec = kernel_supports(obj, FEAT_BTF_QMARK_DATASEC); 3036 int enum64_placeholder_id = 0; 3037 struct btf_type *t; 3038 int i, j, vlen; 3039 3040 for (i = 1; i < btf__type_cnt(btf); i++) { 3041 t = (struct btf_type *)btf__type_by_id(btf, i); 3042 3043 if ((!has_datasec && btf_is_var(t)) || (!has_decl_tag && btf_is_decl_tag(t))) { 3044 /* replace VAR/DECL_TAG with INT */ 3045 t->info = BTF_INFO_ENC(BTF_KIND_INT, 0, 0); 3046 /* 3047 * using size = 1 is the safest choice, 4 will be too 3048 * big and cause kernel BTF validation failure if 3049 * original variable took less than 4 bytes 3050 */ 3051 t->size = 1; 3052 *(int *)(t + 1) = BTF_INT_ENC(0, 0, 8); 3053 } else if (!has_datasec && btf_is_datasec(t)) { 3054 /* replace DATASEC with STRUCT */ 3055 const struct btf_var_secinfo *v = btf_var_secinfos(t); 3056 struct btf_member *m = btf_members(t); 3057 struct btf_type *vt; 3058 char *name; 3059 3060 name = (char *)btf__name_by_offset(btf, t->name_off); 3061 while (*name) { 3062 if (*name == '.' || *name == '?') 3063 *name = '_'; 3064 name++; 3065 } 3066 3067 vlen = btf_vlen(t); 3068 t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, vlen); 3069 for (j = 0; j < vlen; j++, v++, m++) { 3070 /* order of field assignments is important */ 3071 m->offset = v->offset * 8; 3072 m->type = v->type; 3073 /* preserve variable name as member name */ 3074 vt = (void *)btf__type_by_id(btf, v->type); 3075 m->name_off = vt->name_off; 3076 } 3077 } else if (!has_qmark_datasec && btf_is_datasec(t) && 3078 starts_with_qmark(btf__name_by_offset(btf, t->name_off))) { 3079 /* replace '?' prefix with '_' for DATASEC names */ 3080 char *name; 3081 3082 name = (char *)btf__name_by_offset(btf, t->name_off); 3083 if (name[0] == '?') 3084 name[0] = '_'; 3085 } else if (!has_func && btf_is_func_proto(t)) { 3086 /* replace FUNC_PROTO with ENUM */ 3087 vlen = btf_vlen(t); 3088 t->info = BTF_INFO_ENC(BTF_KIND_ENUM, 0, vlen); 3089 t->size = sizeof(__u32); /* kernel enforced */ 3090 } else if (!has_func && btf_is_func(t)) { 3091 /* replace FUNC with TYPEDEF */ 3092 t->info = BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0); 3093 } else if (!has_func_global && btf_is_func(t)) { 3094 /* replace BTF_FUNC_GLOBAL with BTF_FUNC_STATIC */ 3095 t->info = BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0); 3096 } else if (!has_float && btf_is_float(t)) { 3097 /* replace FLOAT with an equally-sized empty STRUCT; 3098 * since C compilers do not accept e.g. "float" as a 3099 * valid struct name, make it anonymous 3100 */ 3101 t->name_off = 0; 3102 t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 0); 3103 } else if (!has_type_tag && btf_is_type_tag(t)) { 3104 /* replace TYPE_TAG with a CONST */ 3105 t->name_off = 0; 3106 t->info = BTF_INFO_ENC(BTF_KIND_CONST, 0, 0); 3107 } else if (!has_enum64 && btf_is_enum(t)) { 3108 /* clear the kflag */ 3109 t->info = btf_type_info(btf_kind(t), btf_vlen(t), false); 3110 } else if (!has_enum64 && btf_is_enum64(t)) { 3111 /* replace ENUM64 with a union */ 3112 struct btf_member *m; 3113 3114 if (enum64_placeholder_id == 0) { 3115 enum64_placeholder_id = btf__add_int(btf, "enum64_placeholder", 1, 0); 3116 if (enum64_placeholder_id < 0) 3117 return enum64_placeholder_id; 3118 3119 t = (struct btf_type *)btf__type_by_id(btf, i); 3120 } 3121 3122 m = btf_members(t); 3123 vlen = btf_vlen(t); 3124 t->info = BTF_INFO_ENC(BTF_KIND_UNION, 0, vlen); 3125 for (j = 0; j < vlen; j++, m++) { 3126 m->type = enum64_placeholder_id; 3127 m->offset = 0; 3128 } 3129 } 3130 } 3131 3132 return 0; 3133 } 3134 3135 static bool libbpf_needs_btf(const struct bpf_object *obj) 3136 { 3137 return obj->efile.btf_maps_shndx >= 0 || 3138 obj->efile.has_st_ops || 3139 obj->nr_extern > 0; 3140 } 3141 3142 static bool kernel_needs_btf(const struct bpf_object *obj) 3143 { 3144 return obj->efile.has_st_ops; 3145 } 3146 3147 static int bpf_object__init_btf(struct bpf_object *obj, 3148 Elf_Data *btf_data, 3149 Elf_Data *btf_ext_data) 3150 { 3151 int err = -ENOENT; 3152 3153 if (btf_data) { 3154 obj->btf = btf__new(btf_data->d_buf, btf_data->d_size); 3155 err = libbpf_get_error(obj->btf); 3156 if (err) { 3157 obj->btf = NULL; 3158 pr_warn("Error loading ELF section %s: %d.\n", BTF_ELF_SEC, err); 3159 goto out; 3160 } 3161 /* enforce 8-byte pointers for BPF-targeted BTFs */ 3162 btf__set_pointer_size(obj->btf, 8); 3163 } 3164 if (btf_ext_data) { 3165 struct btf_ext_info *ext_segs[3]; 3166 int seg_num, sec_num; 3167 3168 if (!obj->btf) { 3169 pr_debug("Ignore ELF section %s because its depending ELF section %s is not found.\n", 3170 BTF_EXT_ELF_SEC, BTF_ELF_SEC); 3171 goto out; 3172 } 3173 obj->btf_ext = btf_ext__new(btf_ext_data->d_buf, btf_ext_data->d_size); 3174 err = libbpf_get_error(obj->btf_ext); 3175 if (err) { 3176 pr_warn("Error loading ELF section %s: %d. Ignored and continue.\n", 3177 BTF_EXT_ELF_SEC, err); 3178 obj->btf_ext = NULL; 3179 goto out; 3180 } 3181 3182 /* setup .BTF.ext to ELF section mapping */ 3183 ext_segs[0] = &obj->btf_ext->func_info; 3184 ext_segs[1] = &obj->btf_ext->line_info; 3185 ext_segs[2] = &obj->btf_ext->core_relo_info; 3186 for (seg_num = 0; seg_num < ARRAY_SIZE(ext_segs); seg_num++) { 3187 struct btf_ext_info *seg = ext_segs[seg_num]; 3188 const struct btf_ext_info_sec *sec; 3189 const char *sec_name; 3190 Elf_Scn *scn; 3191 3192 if (seg->sec_cnt == 0) 3193 continue; 3194 3195 seg->sec_idxs = calloc(seg->sec_cnt, sizeof(*seg->sec_idxs)); 3196 if (!seg->sec_idxs) { 3197 err = -ENOMEM; 3198 goto out; 3199 } 3200 3201 sec_num = 0; 3202 for_each_btf_ext_sec(seg, sec) { 3203 /* preventively increment index to avoid doing 3204 * this before every continue below 3205 */ 3206 sec_num++; 3207 3208 sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off); 3209 if (str_is_empty(sec_name)) 3210 continue; 3211 scn = elf_sec_by_name(obj, sec_name); 3212 if (!scn) 3213 continue; 3214 3215 seg->sec_idxs[sec_num - 1] = elf_ndxscn(scn); 3216 } 3217 } 3218 } 3219 out: 3220 if (err && libbpf_needs_btf(obj)) { 3221 pr_warn("BTF is required, but is missing or corrupted.\n"); 3222 return err; 3223 } 3224 return 0; 3225 } 3226 3227 static int compare_vsi_off(const void *_a, const void *_b) 3228 { 3229 const struct btf_var_secinfo *a = _a; 3230 const struct btf_var_secinfo *b = _b; 3231 3232 return a->offset - b->offset; 3233 } 3234 3235 static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf, 3236 struct btf_type *t) 3237 { 3238 __u32 size = 0, i, vars = btf_vlen(t); 3239 const char *sec_name = btf__name_by_offset(btf, t->name_off); 3240 struct btf_var_secinfo *vsi; 3241 bool fixup_offsets = false; 3242 int err; 3243 3244 if (!sec_name) { 3245 pr_debug("No name found in string section for DATASEC kind.\n"); 3246 return -ENOENT; 3247 } 3248 3249 /* Extern-backing datasecs (.ksyms, .kconfig) have their size and 3250 * variable offsets set at the previous step. Further, not every 3251 * extern BTF VAR has corresponding ELF symbol preserved, so we skip 3252 * all fixups altogether for such sections and go straight to sorting 3253 * VARs within their DATASEC. 3254 */ 3255 if (strcmp(sec_name, KCONFIG_SEC) == 0 || strcmp(sec_name, KSYMS_SEC) == 0) 3256 goto sort_vars; 3257 3258 /* Clang leaves DATASEC size and VAR offsets as zeroes, so we need to 3259 * fix this up. But BPF static linker already fixes this up and fills 3260 * all the sizes and offsets during static linking. So this step has 3261 * to be optional. But the STV_HIDDEN handling is non-optional for any 3262 * non-extern DATASEC, so the variable fixup loop below handles both 3263 * functions at the same time, paying the cost of BTF VAR <-> ELF 3264 * symbol matching just once. 3265 */ 3266 if (t->size == 0) { 3267 err = find_elf_sec_sz(obj, sec_name, &size); 3268 if (err || !size) { 3269 pr_debug("sec '%s': failed to determine size from ELF: size %u, err %d\n", 3270 sec_name, size, err); 3271 return -ENOENT; 3272 } 3273 3274 t->size = size; 3275 fixup_offsets = true; 3276 } 3277 3278 for (i = 0, vsi = btf_var_secinfos(t); i < vars; i++, vsi++) { 3279 const struct btf_type *t_var; 3280 struct btf_var *var; 3281 const char *var_name; 3282 Elf64_Sym *sym; 3283 3284 t_var = btf__type_by_id(btf, vsi->type); 3285 if (!t_var || !btf_is_var(t_var)) { 3286 pr_debug("sec '%s': unexpected non-VAR type found\n", sec_name); 3287 return -EINVAL; 3288 } 3289 3290 var = btf_var(t_var); 3291 if (var->linkage == BTF_VAR_STATIC || var->linkage == BTF_VAR_GLOBAL_EXTERN) 3292 continue; 3293 3294 var_name = btf__name_by_offset(btf, t_var->name_off); 3295 if (!var_name) { 3296 pr_debug("sec '%s': failed to find name of DATASEC's member #%d\n", 3297 sec_name, i); 3298 return -ENOENT; 3299 } 3300 3301 sym = find_elf_var_sym(obj, var_name); 3302 if (IS_ERR(sym)) { 3303 pr_debug("sec '%s': failed to find ELF symbol for VAR '%s'\n", 3304 sec_name, var_name); 3305 return -ENOENT; 3306 } 3307 3308 if (fixup_offsets) 3309 vsi->offset = sym->st_value; 3310 3311 /* if variable is a global/weak symbol, but has restricted 3312 * (STV_HIDDEN or STV_INTERNAL) visibility, mark its BTF VAR 3313 * as static. This follows similar logic for functions (BPF 3314 * subprogs) and influences libbpf's further decisions about 3315 * whether to make global data BPF array maps as 3316 * BPF_F_MMAPABLE. 3317 */ 3318 if (ELF64_ST_VISIBILITY(sym->st_other) == STV_HIDDEN 3319 || ELF64_ST_VISIBILITY(sym->st_other) == STV_INTERNAL) 3320 var->linkage = BTF_VAR_STATIC; 3321 } 3322 3323 sort_vars: 3324 qsort(btf_var_secinfos(t), vars, sizeof(*vsi), compare_vsi_off); 3325 return 0; 3326 } 3327 3328 static int bpf_object_fixup_btf(struct bpf_object *obj) 3329 { 3330 int i, n, err = 0; 3331 3332 if (!obj->btf) 3333 return 0; 3334 3335 n = btf__type_cnt(obj->btf); 3336 for (i = 1; i < n; i++) { 3337 struct btf_type *t = btf_type_by_id(obj->btf, i); 3338 3339 /* Loader needs to fix up some of the things compiler 3340 * couldn't get its hands on while emitting BTF. This 3341 * is section size and global variable offset. We use 3342 * the info from the ELF itself for this purpose. 3343 */ 3344 if (btf_is_datasec(t)) { 3345 err = btf_fixup_datasec(obj, obj->btf, t); 3346 if (err) 3347 return err; 3348 } 3349 } 3350 3351 return 0; 3352 } 3353 3354 static bool prog_needs_vmlinux_btf(struct bpf_program *prog) 3355 { 3356 if (prog->type == BPF_PROG_TYPE_STRUCT_OPS || 3357 prog->type == BPF_PROG_TYPE_LSM) 3358 return true; 3359 3360 /* BPF_PROG_TYPE_TRACING programs which do not attach to other programs 3361 * also need vmlinux BTF 3362 */ 3363 if (prog->type == BPF_PROG_TYPE_TRACING && !prog->attach_prog_fd) 3364 return true; 3365 3366 return false; 3367 } 3368 3369 static bool map_needs_vmlinux_btf(struct bpf_map *map) 3370 { 3371 return bpf_map__is_struct_ops(map); 3372 } 3373 3374 static bool obj_needs_vmlinux_btf(const struct bpf_object *obj) 3375 { 3376 struct bpf_program *prog; 3377 struct bpf_map *map; 3378 int i; 3379 3380 /* CO-RE relocations need kernel BTF, only when btf_custom_path 3381 * is not specified 3382 */ 3383 if (obj->btf_ext && obj->btf_ext->core_relo_info.len && !obj->btf_custom_path) 3384 return true; 3385 3386 /* Support for typed ksyms needs kernel BTF */ 3387 for (i = 0; i < obj->nr_extern; i++) { 3388 const struct extern_desc *ext; 3389 3390 ext = &obj->externs[i]; 3391 if (ext->type == EXT_KSYM && ext->ksym.type_id) 3392 return true; 3393 } 3394 3395 bpf_object__for_each_program(prog, obj) { 3396 if (!prog->autoload) 3397 continue; 3398 if (prog_needs_vmlinux_btf(prog)) 3399 return true; 3400 } 3401 3402 bpf_object__for_each_map(map, obj) { 3403 if (map_needs_vmlinux_btf(map)) 3404 return true; 3405 } 3406 3407 return false; 3408 } 3409 3410 static int bpf_object__load_vmlinux_btf(struct bpf_object *obj, bool force) 3411 { 3412 int err; 3413 3414 /* btf_vmlinux could be loaded earlier */ 3415 if (obj->btf_vmlinux || obj->gen_loader) 3416 return 0; 3417 3418 if (!force && !obj_needs_vmlinux_btf(obj)) 3419 return 0; 3420 3421 obj->btf_vmlinux = btf__load_vmlinux_btf(); 3422 err = libbpf_get_error(obj->btf_vmlinux); 3423 if (err) { 3424 pr_warn("Error loading vmlinux BTF: %d\n", err); 3425 obj->btf_vmlinux = NULL; 3426 return err; 3427 } 3428 return 0; 3429 } 3430 3431 static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj) 3432 { 3433 struct btf *kern_btf = obj->btf; 3434 bool btf_mandatory, sanitize; 3435 int i, err = 0; 3436 3437 if (!obj->btf) 3438 return 0; 3439 3440 if (!kernel_supports(obj, FEAT_BTF)) { 3441 if (kernel_needs_btf(obj)) { 3442 err = -EOPNOTSUPP; 3443 goto report; 3444 } 3445 pr_debug("Kernel doesn't support BTF, skipping uploading it.\n"); 3446 return 0; 3447 } 3448 3449 /* Even though some subprogs are global/weak, user might prefer more 3450 * permissive BPF verification process that BPF verifier performs for 3451 * static functions, taking into account more context from the caller 3452 * functions. In such case, they need to mark such subprogs with 3453 * __attribute__((visibility("hidden"))) and libbpf will adjust 3454 * corresponding FUNC BTF type to be marked as static and trigger more 3455 * involved BPF verification process. 3456 */ 3457 for (i = 0; i < obj->nr_programs; i++) { 3458 struct bpf_program *prog = &obj->programs[i]; 3459 struct btf_type *t; 3460 const char *name; 3461 int j, n; 3462 3463 if (!prog->mark_btf_static || !prog_is_subprog(obj, prog)) 3464 continue; 3465 3466 n = btf__type_cnt(obj->btf); 3467 for (j = 1; j < n; j++) { 3468 t = btf_type_by_id(obj->btf, j); 3469 if (!btf_is_func(t) || btf_func_linkage(t) != BTF_FUNC_GLOBAL) 3470 continue; 3471 3472 name = btf__str_by_offset(obj->btf, t->name_off); 3473 if (strcmp(name, prog->name) != 0) 3474 continue; 3475 3476 t->info = btf_type_info(BTF_KIND_FUNC, BTF_FUNC_STATIC, 0); 3477 break; 3478 } 3479 } 3480 3481 sanitize = btf_needs_sanitization(obj); 3482 if (sanitize) { 3483 const void *raw_data; 3484 __u32 sz; 3485 3486 /* clone BTF to sanitize a copy and leave the original intact */ 3487 raw_data = btf__raw_data(obj->btf, &sz); 3488 kern_btf = btf__new(raw_data, sz); 3489 err = libbpf_get_error(kern_btf); 3490 if (err) 3491 return err; 3492 3493 /* enforce 8-byte pointers for BPF-targeted BTFs */ 3494 btf__set_pointer_size(obj->btf, 8); 3495 err = bpf_object__sanitize_btf(obj, kern_btf); 3496 if (err) 3497 return err; 3498 } 3499 3500 if (obj->gen_loader) { 3501 __u32 raw_size = 0; 3502 const void *raw_data = btf__raw_data(kern_btf, &raw_size); 3503 3504 if (!raw_data) 3505 return -ENOMEM; 3506 bpf_gen__load_btf(obj->gen_loader, raw_data, raw_size); 3507 /* Pretend to have valid FD to pass various fd >= 0 checks. 3508 * This fd == 0 will not be used with any syscall and will be reset to -1 eventually. 3509 */ 3510 btf__set_fd(kern_btf, 0); 3511 } else { 3512 /* currently BPF_BTF_LOAD only supports log_level 1 */ 3513 err = btf_load_into_kernel(kern_btf, obj->log_buf, obj->log_size, 3514 obj->log_level ? 1 : 0, obj->token_fd); 3515 } 3516 if (sanitize) { 3517 if (!err) { 3518 /* move fd to libbpf's BTF */ 3519 btf__set_fd(obj->btf, btf__fd(kern_btf)); 3520 btf__set_fd(kern_btf, -1); 3521 } 3522 btf__free(kern_btf); 3523 } 3524 report: 3525 if (err) { 3526 btf_mandatory = kernel_needs_btf(obj); 3527 pr_warn("Error loading .BTF into kernel: %d. %s\n", err, 3528 btf_mandatory ? "BTF is mandatory, can't proceed." 3529 : "BTF is optional, ignoring."); 3530 if (!btf_mandatory) 3531 err = 0; 3532 } 3533 return err; 3534 } 3535 3536 static const char *elf_sym_str(const struct bpf_object *obj, size_t off) 3537 { 3538 const char *name; 3539 3540 name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, off); 3541 if (!name) { 3542 pr_warn("elf: failed to get section name string at offset %zu from %s: %s\n", 3543 off, obj->path, elf_errmsg(-1)); 3544 return NULL; 3545 } 3546 3547 return name; 3548 } 3549 3550 static const char *elf_sec_str(const struct bpf_object *obj, size_t off) 3551 { 3552 const char *name; 3553 3554 name = elf_strptr(obj->efile.elf, obj->efile.shstrndx, off); 3555 if (!name) { 3556 pr_warn("elf: failed to get section name string at offset %zu from %s: %s\n", 3557 off, obj->path, elf_errmsg(-1)); 3558 return NULL; 3559 } 3560 3561 return name; 3562 } 3563 3564 static Elf_Scn *elf_sec_by_idx(const struct bpf_object *obj, size_t idx) 3565 { 3566 Elf_Scn *scn; 3567 3568 scn = elf_getscn(obj->efile.elf, idx); 3569 if (!scn) { 3570 pr_warn("elf: failed to get section(%zu) from %s: %s\n", 3571 idx, obj->path, elf_errmsg(-1)); 3572 return NULL; 3573 } 3574 return scn; 3575 } 3576 3577 static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name) 3578 { 3579 Elf_Scn *scn = NULL; 3580 Elf *elf = obj->efile.elf; 3581 const char *sec_name; 3582 3583 while ((scn = elf_nextscn(elf, scn)) != NULL) { 3584 sec_name = elf_sec_name(obj, scn); 3585 if (!sec_name) 3586 return NULL; 3587 3588 if (strcmp(sec_name, name) != 0) 3589 continue; 3590 3591 return scn; 3592 } 3593 return NULL; 3594 } 3595 3596 static Elf64_Shdr *elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn) 3597 { 3598 Elf64_Shdr *shdr; 3599 3600 if (!scn) 3601 return NULL; 3602 3603 shdr = elf64_getshdr(scn); 3604 if (!shdr) { 3605 pr_warn("elf: failed to get section(%zu) header from %s: %s\n", 3606 elf_ndxscn(scn), obj->path, elf_errmsg(-1)); 3607 return NULL; 3608 } 3609 3610 return shdr; 3611 } 3612 3613 static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn) 3614 { 3615 const char *name; 3616 Elf64_Shdr *sh; 3617 3618 if (!scn) 3619 return NULL; 3620 3621 sh = elf_sec_hdr(obj, scn); 3622 if (!sh) 3623 return NULL; 3624 3625 name = elf_sec_str(obj, sh->sh_name); 3626 if (!name) { 3627 pr_warn("elf: failed to get section(%zu) name from %s: %s\n", 3628 elf_ndxscn(scn), obj->path, elf_errmsg(-1)); 3629 return NULL; 3630 } 3631 3632 return name; 3633 } 3634 3635 static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn) 3636 { 3637 Elf_Data *data; 3638 3639 if (!scn) 3640 return NULL; 3641 3642 data = elf_getdata(scn, 0); 3643 if (!data) { 3644 pr_warn("elf: failed to get section(%zu) %s data from %s: %s\n", 3645 elf_ndxscn(scn), elf_sec_name(obj, scn) ?: "<?>", 3646 obj->path, elf_errmsg(-1)); 3647 return NULL; 3648 } 3649 3650 return data; 3651 } 3652 3653 static Elf64_Sym *elf_sym_by_idx(const struct bpf_object *obj, size_t idx) 3654 { 3655 if (idx >= obj->efile.symbols->d_size / sizeof(Elf64_Sym)) 3656 return NULL; 3657 3658 return (Elf64_Sym *)obj->efile.symbols->d_buf + idx; 3659 } 3660 3661 static Elf64_Rel *elf_rel_by_idx(Elf_Data *data, size_t idx) 3662 { 3663 if (idx >= data->d_size / sizeof(Elf64_Rel)) 3664 return NULL; 3665 3666 return (Elf64_Rel *)data->d_buf + idx; 3667 } 3668 3669 static bool is_sec_name_dwarf(const char *name) 3670 { 3671 /* approximation, but the actual list is too long */ 3672 return str_has_pfx(name, ".debug_"); 3673 } 3674 3675 static bool ignore_elf_section(Elf64_Shdr *hdr, const char *name) 3676 { 3677 /* no special handling of .strtab */ 3678 if (hdr->sh_type == SHT_STRTAB) 3679 return true; 3680 3681 /* ignore .llvm_addrsig section as well */ 3682 if (hdr->sh_type == SHT_LLVM_ADDRSIG) 3683 return true; 3684 3685 /* no subprograms will lead to an empty .text section, ignore it */ 3686 if (hdr->sh_type == SHT_PROGBITS && hdr->sh_size == 0 && 3687 strcmp(name, ".text") == 0) 3688 return true; 3689 3690 /* DWARF sections */ 3691 if (is_sec_name_dwarf(name)) 3692 return true; 3693 3694 if (str_has_pfx(name, ".rel")) { 3695 name += sizeof(".rel") - 1; 3696 /* DWARF section relocations */ 3697 if (is_sec_name_dwarf(name)) 3698 return true; 3699 3700 /* .BTF and .BTF.ext don't need relocations */ 3701 if (strcmp(name, BTF_ELF_SEC) == 0 || 3702 strcmp(name, BTF_EXT_ELF_SEC) == 0) 3703 return true; 3704 } 3705 3706 return false; 3707 } 3708 3709 static int cmp_progs(const void *_a, const void *_b) 3710 { 3711 const struct bpf_program *a = _a; 3712 const struct bpf_program *b = _b; 3713 3714 if (a->sec_idx != b->sec_idx) 3715 return a->sec_idx < b->sec_idx ? -1 : 1; 3716 3717 /* sec_insn_off can't be the same within the section */ 3718 return a->sec_insn_off < b->sec_insn_off ? -1 : 1; 3719 } 3720 3721 static int bpf_object__elf_collect(struct bpf_object *obj) 3722 { 3723 struct elf_sec_desc *sec_desc; 3724 Elf *elf = obj->efile.elf; 3725 Elf_Data *btf_ext_data = NULL; 3726 Elf_Data *btf_data = NULL; 3727 int idx = 0, err = 0; 3728 const char *name; 3729 Elf_Data *data; 3730 Elf_Scn *scn; 3731 Elf64_Shdr *sh; 3732 3733 /* ELF section indices are 0-based, but sec #0 is special "invalid" 3734 * section. Since section count retrieved by elf_getshdrnum() does 3735 * include sec #0, it is already the necessary size of an array to keep 3736 * all the sections. 3737 */ 3738 if (elf_getshdrnum(obj->efile.elf, &obj->efile.sec_cnt)) { 3739 pr_warn("elf: failed to get the number of sections for %s: %s\n", 3740 obj->path, elf_errmsg(-1)); 3741 return -LIBBPF_ERRNO__FORMAT; 3742 } 3743 obj->efile.secs = calloc(obj->efile.sec_cnt, sizeof(*obj->efile.secs)); 3744 if (!obj->efile.secs) 3745 return -ENOMEM; 3746 3747 /* a bunch of ELF parsing functionality depends on processing symbols, 3748 * so do the first pass and find the symbol table 3749 */ 3750 scn = NULL; 3751 while ((scn = elf_nextscn(elf, scn)) != NULL) { 3752 sh = elf_sec_hdr(obj, scn); 3753 if (!sh) 3754 return -LIBBPF_ERRNO__FORMAT; 3755 3756 if (sh->sh_type == SHT_SYMTAB) { 3757 if (obj->efile.symbols) { 3758 pr_warn("elf: multiple symbol tables in %s\n", obj->path); 3759 return -LIBBPF_ERRNO__FORMAT; 3760 } 3761 3762 data = elf_sec_data(obj, scn); 3763 if (!data) 3764 return -LIBBPF_ERRNO__FORMAT; 3765 3766 idx = elf_ndxscn(scn); 3767 3768 obj->efile.symbols = data; 3769 obj->efile.symbols_shndx = idx; 3770 obj->efile.strtabidx = sh->sh_link; 3771 } 3772 } 3773 3774 if (!obj->efile.symbols) { 3775 pr_warn("elf: couldn't find symbol table in %s, stripped object file?\n", 3776 obj->path); 3777 return -ENOENT; 3778 } 3779 3780 scn = NULL; 3781 while ((scn = elf_nextscn(elf, scn)) != NULL) { 3782 idx = elf_ndxscn(scn); 3783 sec_desc = &obj->efile.secs[idx]; 3784 3785 sh = elf_sec_hdr(obj, scn); 3786 if (!sh) 3787 return -LIBBPF_ERRNO__FORMAT; 3788 3789 name = elf_sec_str(obj, sh->sh_name); 3790 if (!name) 3791 return -LIBBPF_ERRNO__FORMAT; 3792 3793 if (ignore_elf_section(sh, name)) 3794 continue; 3795 3796 data = elf_sec_data(obj, scn); 3797 if (!data) 3798 return -LIBBPF_ERRNO__FORMAT; 3799 3800 pr_debug("elf: section(%d) %s, size %ld, link %d, flags %lx, type=%d\n", 3801 idx, name, (unsigned long)data->d_size, 3802 (int)sh->sh_link, (unsigned long)sh->sh_flags, 3803 (int)sh->sh_type); 3804 3805 if (strcmp(name, "license") == 0) { 3806 err = bpf_object__init_license(obj, data->d_buf, data->d_size); 3807 if (err) 3808 return err; 3809 } else if (strcmp(name, "version") == 0) { 3810 err = bpf_object__init_kversion(obj, data->d_buf, data->d_size); 3811 if (err) 3812 return err; 3813 } else if (strcmp(name, "maps") == 0) { 3814 pr_warn("elf: legacy map definitions in 'maps' section are not supported by libbpf v1.0+\n"); 3815 return -ENOTSUP; 3816 } else if (strcmp(name, MAPS_ELF_SEC) == 0) { 3817 obj->efile.btf_maps_shndx = idx; 3818 } else if (strcmp(name, BTF_ELF_SEC) == 0) { 3819 if (sh->sh_type != SHT_PROGBITS) 3820 return -LIBBPF_ERRNO__FORMAT; 3821 btf_data = data; 3822 } else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) { 3823 if (sh->sh_type != SHT_PROGBITS) 3824 return -LIBBPF_ERRNO__FORMAT; 3825 btf_ext_data = data; 3826 } else if (sh->sh_type == SHT_SYMTAB) { 3827 /* already processed during the first pass above */ 3828 } else if (sh->sh_type == SHT_PROGBITS && data->d_size > 0) { 3829 if (sh->sh_flags & SHF_EXECINSTR) { 3830 if (strcmp(name, ".text") == 0) 3831 obj->efile.text_shndx = idx; 3832 err = bpf_object__add_programs(obj, data, name, idx); 3833 if (err) 3834 return err; 3835 } else if (strcmp(name, DATA_SEC) == 0 || 3836 str_has_pfx(name, DATA_SEC ".")) { 3837 sec_desc->sec_type = SEC_DATA; 3838 sec_desc->shdr = sh; 3839 sec_desc->data = data; 3840 } else if (strcmp(name, RODATA_SEC) == 0 || 3841 str_has_pfx(name, RODATA_SEC ".")) { 3842 sec_desc->sec_type = SEC_RODATA; 3843 sec_desc->shdr = sh; 3844 sec_desc->data = data; 3845 } else if (strcmp(name, STRUCT_OPS_SEC) == 0 || 3846 strcmp(name, STRUCT_OPS_LINK_SEC) == 0 || 3847 strcmp(name, "?" STRUCT_OPS_SEC) == 0 || 3848 strcmp(name, "?" STRUCT_OPS_LINK_SEC) == 0) { 3849 sec_desc->sec_type = SEC_ST_OPS; 3850 sec_desc->shdr = sh; 3851 sec_desc->data = data; 3852 obj->efile.has_st_ops = true; 3853 } else if (strcmp(name, ARENA_SEC) == 0) { 3854 obj->efile.arena_data = data; 3855 obj->efile.arena_data_shndx = idx; 3856 } else { 3857 pr_info("elf: skipping unrecognized data section(%d) %s\n", 3858 idx, name); 3859 } 3860 } else if (sh->sh_type == SHT_REL) { 3861 int targ_sec_idx = sh->sh_info; /* points to other section */ 3862 3863 if (sh->sh_entsize != sizeof(Elf64_Rel) || 3864 targ_sec_idx >= obj->efile.sec_cnt) 3865 return -LIBBPF_ERRNO__FORMAT; 3866 3867 /* Only do relo for section with exec instructions */ 3868 if (!section_have_execinstr(obj, targ_sec_idx) && 3869 strcmp(name, ".rel" STRUCT_OPS_SEC) && 3870 strcmp(name, ".rel" STRUCT_OPS_LINK_SEC) && 3871 strcmp(name, ".rel?" STRUCT_OPS_SEC) && 3872 strcmp(name, ".rel?" STRUCT_OPS_LINK_SEC) && 3873 strcmp(name, ".rel" MAPS_ELF_SEC)) { 3874 pr_info("elf: skipping relo section(%d) %s for section(%d) %s\n", 3875 idx, name, targ_sec_idx, 3876 elf_sec_name(obj, elf_sec_by_idx(obj, targ_sec_idx)) ?: "<?>"); 3877 continue; 3878 } 3879 3880 sec_desc->sec_type = SEC_RELO; 3881 sec_desc->shdr = sh; 3882 sec_desc->data = data; 3883 } else if (sh->sh_type == SHT_NOBITS && (strcmp(name, BSS_SEC) == 0 || 3884 str_has_pfx(name, BSS_SEC "."))) { 3885 sec_desc->sec_type = SEC_BSS; 3886 sec_desc->shdr = sh; 3887 sec_desc->data = data; 3888 } else { 3889 pr_info("elf: skipping section(%d) %s (size %zu)\n", idx, name, 3890 (size_t)sh->sh_size); 3891 } 3892 } 3893 3894 if (!obj->efile.strtabidx || obj->efile.strtabidx > idx) { 3895 pr_warn("elf: symbol strings section missing or invalid in %s\n", obj->path); 3896 return -LIBBPF_ERRNO__FORMAT; 3897 } 3898 3899 /* sort BPF programs by section name and in-section instruction offset 3900 * for faster search 3901 */ 3902 if (obj->nr_programs) 3903 qsort(obj->programs, obj->nr_programs, sizeof(*obj->programs), cmp_progs); 3904 3905 return bpf_object__init_btf(obj, btf_data, btf_ext_data); 3906 } 3907 3908 static bool sym_is_extern(const Elf64_Sym *sym) 3909 { 3910 int bind = ELF64_ST_BIND(sym->st_info); 3911 /* externs are symbols w/ type=NOTYPE, bind=GLOBAL|WEAK, section=UND */ 3912 return sym->st_shndx == SHN_UNDEF && 3913 (bind == STB_GLOBAL || bind == STB_WEAK) && 3914 ELF64_ST_TYPE(sym->st_info) == STT_NOTYPE; 3915 } 3916 3917 static bool sym_is_subprog(const Elf64_Sym *sym, int text_shndx) 3918 { 3919 int bind = ELF64_ST_BIND(sym->st_info); 3920 int type = ELF64_ST_TYPE(sym->st_info); 3921 3922 /* in .text section */ 3923 if (sym->st_shndx != text_shndx) 3924 return false; 3925 3926 /* local function */ 3927 if (bind == STB_LOCAL && type == STT_SECTION) 3928 return true; 3929 3930 /* global function */ 3931 return bind == STB_GLOBAL && type == STT_FUNC; 3932 } 3933 3934 static int find_extern_btf_id(const struct btf *btf, const char *ext_name) 3935 { 3936 const struct btf_type *t; 3937 const char *tname; 3938 int i, n; 3939 3940 if (!btf) 3941 return -ESRCH; 3942 3943 n = btf__type_cnt(btf); 3944 for (i = 1; i < n; i++) { 3945 t = btf__type_by_id(btf, i); 3946 3947 if (!btf_is_var(t) && !btf_is_func(t)) 3948 continue; 3949 3950 tname = btf__name_by_offset(btf, t->name_off); 3951 if (strcmp(tname, ext_name)) 3952 continue; 3953 3954 if (btf_is_var(t) && 3955 btf_var(t)->linkage != BTF_VAR_GLOBAL_EXTERN) 3956 return -EINVAL; 3957 3958 if (btf_is_func(t) && btf_func_linkage(t) != BTF_FUNC_EXTERN) 3959 return -EINVAL; 3960 3961 return i; 3962 } 3963 3964 return -ENOENT; 3965 } 3966 3967 static int find_extern_sec_btf_id(struct btf *btf, int ext_btf_id) { 3968 const struct btf_var_secinfo *vs; 3969 const struct btf_type *t; 3970 int i, j, n; 3971 3972 if (!btf) 3973 return -ESRCH; 3974 3975 n = btf__type_cnt(btf); 3976 for (i = 1; i < n; i++) { 3977 t = btf__type_by_id(btf, i); 3978 3979 if (!btf_is_datasec(t)) 3980 continue; 3981 3982 vs = btf_var_secinfos(t); 3983 for (j = 0; j < btf_vlen(t); j++, vs++) { 3984 if (vs->type == ext_btf_id) 3985 return i; 3986 } 3987 } 3988 3989 return -ENOENT; 3990 } 3991 3992 static enum kcfg_type find_kcfg_type(const struct btf *btf, int id, 3993 bool *is_signed) 3994 { 3995 const struct btf_type *t; 3996 const char *name; 3997 3998 t = skip_mods_and_typedefs(btf, id, NULL); 3999 name = btf__name_by_offset(btf, t->name_off); 4000 4001 if (is_signed) 4002 *is_signed = false; 4003 switch (btf_kind(t)) { 4004 case BTF_KIND_INT: { 4005 int enc = btf_int_encoding(t); 4006 4007 if (enc & BTF_INT_BOOL) 4008 return t->size == 1 ? KCFG_BOOL : KCFG_UNKNOWN; 4009 if (is_signed) 4010 *is_signed = enc & BTF_INT_SIGNED; 4011 if (t->size == 1) 4012 return KCFG_CHAR; 4013 if (t->size < 1 || t->size > 8 || (t->size & (t->size - 1))) 4014 return KCFG_UNKNOWN; 4015 return KCFG_INT; 4016 } 4017 case BTF_KIND_ENUM: 4018 if (t->size != 4) 4019 return KCFG_UNKNOWN; 4020 if (strcmp(name, "libbpf_tristate")) 4021 return KCFG_UNKNOWN; 4022 return KCFG_TRISTATE; 4023 case BTF_KIND_ENUM64: 4024 if (strcmp(name, "libbpf_tristate")) 4025 return KCFG_UNKNOWN; 4026 return KCFG_TRISTATE; 4027 case BTF_KIND_ARRAY: 4028 if (btf_array(t)->nelems == 0) 4029 return KCFG_UNKNOWN; 4030 if (find_kcfg_type(btf, btf_array(t)->type, NULL) != KCFG_CHAR) 4031 return KCFG_UNKNOWN; 4032 return KCFG_CHAR_ARR; 4033 default: 4034 return KCFG_UNKNOWN; 4035 } 4036 } 4037 4038 static int cmp_externs(const void *_a, const void *_b) 4039 { 4040 const struct extern_desc *a = _a; 4041 const struct extern_desc *b = _b; 4042 4043 if (a->type != b->type) 4044 return a->type < b->type ? -1 : 1; 4045 4046 if (a->type == EXT_KCFG) { 4047 /* descending order by alignment requirements */ 4048 if (a->kcfg.align != b->kcfg.align) 4049 return a->kcfg.align > b->kcfg.align ? -1 : 1; 4050 /* ascending order by size, within same alignment class */ 4051 if (a->kcfg.sz != b->kcfg.sz) 4052 return a->kcfg.sz < b->kcfg.sz ? -1 : 1; 4053 } 4054 4055 /* resolve ties by name */ 4056 return strcmp(a->name, b->name); 4057 } 4058 4059 static int find_int_btf_id(const struct btf *btf) 4060 { 4061 const struct btf_type *t; 4062 int i, n; 4063 4064 n = btf__type_cnt(btf); 4065 for (i = 1; i < n; i++) { 4066 t = btf__type_by_id(btf, i); 4067 4068 if (btf_is_int(t) && btf_int_bits(t) == 32) 4069 return i; 4070 } 4071 4072 return 0; 4073 } 4074 4075 static int add_dummy_ksym_var(struct btf *btf) 4076 { 4077 int i, int_btf_id, sec_btf_id, dummy_var_btf_id; 4078 const struct btf_var_secinfo *vs; 4079 const struct btf_type *sec; 4080 4081 if (!btf) 4082 return 0; 4083 4084 sec_btf_id = btf__find_by_name_kind(btf, KSYMS_SEC, 4085 BTF_KIND_DATASEC); 4086 if (sec_btf_id < 0) 4087 return 0; 4088 4089 sec = btf__type_by_id(btf, sec_btf_id); 4090 vs = btf_var_secinfos(sec); 4091 for (i = 0; i < btf_vlen(sec); i++, vs++) { 4092 const struct btf_type *vt; 4093 4094 vt = btf__type_by_id(btf, vs->type); 4095 if (btf_is_func(vt)) 4096 break; 4097 } 4098 4099 /* No func in ksyms sec. No need to add dummy var. */ 4100 if (i == btf_vlen(sec)) 4101 return 0; 4102 4103 int_btf_id = find_int_btf_id(btf); 4104 dummy_var_btf_id = btf__add_var(btf, 4105 "dummy_ksym", 4106 BTF_VAR_GLOBAL_ALLOCATED, 4107 int_btf_id); 4108 if (dummy_var_btf_id < 0) 4109 pr_warn("cannot create a dummy_ksym var\n"); 4110 4111 return dummy_var_btf_id; 4112 } 4113 4114 static int bpf_object__collect_externs(struct bpf_object *obj) 4115 { 4116 struct btf_type *sec, *kcfg_sec = NULL, *ksym_sec = NULL; 4117 const struct btf_type *t; 4118 struct extern_desc *ext; 4119 int i, n, off, dummy_var_btf_id; 4120 const char *ext_name, *sec_name; 4121 size_t ext_essent_len; 4122 Elf_Scn *scn; 4123 Elf64_Shdr *sh; 4124 4125 if (!obj->efile.symbols) 4126 return 0; 4127 4128 scn = elf_sec_by_idx(obj, obj->efile.symbols_shndx); 4129 sh = elf_sec_hdr(obj, scn); 4130 if (!sh || sh->sh_entsize != sizeof(Elf64_Sym)) 4131 return -LIBBPF_ERRNO__FORMAT; 4132 4133 dummy_var_btf_id = add_dummy_ksym_var(obj->btf); 4134 if (dummy_var_btf_id < 0) 4135 return dummy_var_btf_id; 4136 4137 n = sh->sh_size / sh->sh_entsize; 4138 pr_debug("looking for externs among %d symbols...\n", n); 4139 4140 for (i = 0; i < n; i++) { 4141 Elf64_Sym *sym = elf_sym_by_idx(obj, i); 4142 4143 if (!sym) 4144 return -LIBBPF_ERRNO__FORMAT; 4145 if (!sym_is_extern(sym)) 4146 continue; 4147 ext_name = elf_sym_str(obj, sym->st_name); 4148 if (!ext_name || !ext_name[0]) 4149 continue; 4150 4151 ext = obj->externs; 4152 ext = libbpf_reallocarray(ext, obj->nr_extern + 1, sizeof(*ext)); 4153 if (!ext) 4154 return -ENOMEM; 4155 obj->externs = ext; 4156 ext = &ext[obj->nr_extern]; 4157 memset(ext, 0, sizeof(*ext)); 4158 obj->nr_extern++; 4159 4160 ext->btf_id = find_extern_btf_id(obj->btf, ext_name); 4161 if (ext->btf_id <= 0) { 4162 pr_warn("failed to find BTF for extern '%s': %d\n", 4163 ext_name, ext->btf_id); 4164 return ext->btf_id; 4165 } 4166 t = btf__type_by_id(obj->btf, ext->btf_id); 4167 ext->name = btf__name_by_offset(obj->btf, t->name_off); 4168 ext->sym_idx = i; 4169 ext->is_weak = ELF64_ST_BIND(sym->st_info) == STB_WEAK; 4170 4171 ext_essent_len = bpf_core_essential_name_len(ext->name); 4172 ext->essent_name = NULL; 4173 if (ext_essent_len != strlen(ext->name)) { 4174 ext->essent_name = strndup(ext->name, ext_essent_len); 4175 if (!ext->essent_name) 4176 return -ENOMEM; 4177 } 4178 4179 ext->sec_btf_id = find_extern_sec_btf_id(obj->btf, ext->btf_id); 4180 if (ext->sec_btf_id <= 0) { 4181 pr_warn("failed to find BTF for extern '%s' [%d] section: %d\n", 4182 ext_name, ext->btf_id, ext->sec_btf_id); 4183 return ext->sec_btf_id; 4184 } 4185 sec = (void *)btf__type_by_id(obj->btf, ext->sec_btf_id); 4186 sec_name = btf__name_by_offset(obj->btf, sec->name_off); 4187 4188 if (strcmp(sec_name, KCONFIG_SEC) == 0) { 4189 if (btf_is_func(t)) { 4190 pr_warn("extern function %s is unsupported under %s section\n", 4191 ext->name, KCONFIG_SEC); 4192 return -ENOTSUP; 4193 } 4194 kcfg_sec = sec; 4195 ext->type = EXT_KCFG; 4196 ext->kcfg.sz = btf__resolve_size(obj->btf, t->type); 4197 if (ext->kcfg.sz <= 0) { 4198 pr_warn("failed to resolve size of extern (kcfg) '%s': %d\n", 4199 ext_name, ext->kcfg.sz); 4200 return ext->kcfg.sz; 4201 } 4202 ext->kcfg.align = btf__align_of(obj->btf, t->type); 4203 if (ext->kcfg.align <= 0) { 4204 pr_warn("failed to determine alignment of extern (kcfg) '%s': %d\n", 4205 ext_name, ext->kcfg.align); 4206 return -EINVAL; 4207 } 4208 ext->kcfg.type = find_kcfg_type(obj->btf, t->type, 4209 &ext->kcfg.is_signed); 4210 if (ext->kcfg.type == KCFG_UNKNOWN) { 4211 pr_warn("extern (kcfg) '%s': type is unsupported\n", ext_name); 4212 return -ENOTSUP; 4213 } 4214 } else if (strcmp(sec_name, KSYMS_SEC) == 0) { 4215 ksym_sec = sec; 4216 ext->type = EXT_KSYM; 4217 skip_mods_and_typedefs(obj->btf, t->type, 4218 &ext->ksym.type_id); 4219 } else { 4220 pr_warn("unrecognized extern section '%s'\n", sec_name); 4221 return -ENOTSUP; 4222 } 4223 } 4224 pr_debug("collected %d externs total\n", obj->nr_extern); 4225 4226 if (!obj->nr_extern) 4227 return 0; 4228 4229 /* sort externs by type, for kcfg ones also by (align, size, name) */ 4230 qsort(obj->externs, obj->nr_extern, sizeof(*ext), cmp_externs); 4231 4232 /* for .ksyms section, we need to turn all externs into allocated 4233 * variables in BTF to pass kernel verification; we do this by 4234 * pretending that each extern is a 8-byte variable 4235 */ 4236 if (ksym_sec) { 4237 /* find existing 4-byte integer type in BTF to use for fake 4238 * extern variables in DATASEC 4239 */ 4240 int int_btf_id = find_int_btf_id(obj->btf); 4241 /* For extern function, a dummy_var added earlier 4242 * will be used to replace the vs->type and 4243 * its name string will be used to refill 4244 * the missing param's name. 4245 */ 4246 const struct btf_type *dummy_var; 4247 4248 dummy_var = btf__type_by_id(obj->btf, dummy_var_btf_id); 4249 for (i = 0; i < obj->nr_extern; i++) { 4250 ext = &obj->externs[i]; 4251 if (ext->type != EXT_KSYM) 4252 continue; 4253 pr_debug("extern (ksym) #%d: symbol %d, name %s\n", 4254 i, ext->sym_idx, ext->name); 4255 } 4256 4257 sec = ksym_sec; 4258 n = btf_vlen(sec); 4259 for (i = 0, off = 0; i < n; i++, off += sizeof(int)) { 4260 struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i; 4261 struct btf_type *vt; 4262 4263 vt = (void *)btf__type_by_id(obj->btf, vs->type); 4264 ext_name = btf__name_by_offset(obj->btf, vt->name_off); 4265 ext = find_extern_by_name(obj, ext_name); 4266 if (!ext) { 4267 pr_warn("failed to find extern definition for BTF %s '%s'\n", 4268 btf_kind_str(vt), ext_name); 4269 return -ESRCH; 4270 } 4271 if (btf_is_func(vt)) { 4272 const struct btf_type *func_proto; 4273 struct btf_param *param; 4274 int j; 4275 4276 func_proto = btf__type_by_id(obj->btf, 4277 vt->type); 4278 param = btf_params(func_proto); 4279 /* Reuse the dummy_var string if the 4280 * func proto does not have param name. 4281 */ 4282 for (j = 0; j < btf_vlen(func_proto); j++) 4283 if (param[j].type && !param[j].name_off) 4284 param[j].name_off = 4285 dummy_var->name_off; 4286 vs->type = dummy_var_btf_id; 4287 vt->info &= ~0xffff; 4288 vt->info |= BTF_FUNC_GLOBAL; 4289 } else { 4290 btf_var(vt)->linkage = BTF_VAR_GLOBAL_ALLOCATED; 4291 vt->type = int_btf_id; 4292 } 4293 vs->offset = off; 4294 vs->size = sizeof(int); 4295 } 4296 sec->size = off; 4297 } 4298 4299 if (kcfg_sec) { 4300 sec = kcfg_sec; 4301 /* for kcfg externs calculate their offsets within a .kconfig map */ 4302 off = 0; 4303 for (i = 0; i < obj->nr_extern; i++) { 4304 ext = &obj->externs[i]; 4305 if (ext->type != EXT_KCFG) 4306 continue; 4307 4308 ext->kcfg.data_off = roundup(off, ext->kcfg.align); 4309 off = ext->kcfg.data_off + ext->kcfg.sz; 4310 pr_debug("extern (kcfg) #%d: symbol %d, off %u, name %s\n", 4311 i, ext->sym_idx, ext->kcfg.data_off, ext->name); 4312 } 4313 sec->size = off; 4314 n = btf_vlen(sec); 4315 for (i = 0; i < n; i++) { 4316 struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i; 4317 4318 t = btf__type_by_id(obj->btf, vs->type); 4319 ext_name = btf__name_by_offset(obj->btf, t->name_off); 4320 ext = find_extern_by_name(obj, ext_name); 4321 if (!ext) { 4322 pr_warn("failed to find extern definition for BTF var '%s'\n", 4323 ext_name); 4324 return -ESRCH; 4325 } 4326 btf_var(t)->linkage = BTF_VAR_GLOBAL_ALLOCATED; 4327 vs->offset = ext->kcfg.data_off; 4328 } 4329 } 4330 return 0; 4331 } 4332 4333 static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog) 4334 { 4335 return prog->sec_idx == obj->efile.text_shndx && obj->nr_programs > 1; 4336 } 4337 4338 struct bpf_program * 4339 bpf_object__find_program_by_name(const struct bpf_object *obj, 4340 const char *name) 4341 { 4342 struct bpf_program *prog; 4343 4344 bpf_object__for_each_program(prog, obj) { 4345 if (prog_is_subprog(obj, prog)) 4346 continue; 4347 if (!strcmp(prog->name, name)) 4348 return prog; 4349 } 4350 return errno = ENOENT, NULL; 4351 } 4352 4353 static bool bpf_object__shndx_is_data(const struct bpf_object *obj, 4354 int shndx) 4355 { 4356 switch (obj->efile.secs[shndx].sec_type) { 4357 case SEC_BSS: 4358 case SEC_DATA: 4359 case SEC_RODATA: 4360 return true; 4361 default: 4362 return false; 4363 } 4364 } 4365 4366 static bool bpf_object__shndx_is_maps(const struct bpf_object *obj, 4367 int shndx) 4368 { 4369 return shndx == obj->efile.btf_maps_shndx; 4370 } 4371 4372 static enum libbpf_map_type 4373 bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx) 4374 { 4375 if (shndx == obj->efile.symbols_shndx) 4376 return LIBBPF_MAP_KCONFIG; 4377 4378 switch (obj->efile.secs[shndx].sec_type) { 4379 case SEC_BSS: 4380 return LIBBPF_MAP_BSS; 4381 case SEC_DATA: 4382 return LIBBPF_MAP_DATA; 4383 case SEC_RODATA: 4384 return LIBBPF_MAP_RODATA; 4385 default: 4386 return LIBBPF_MAP_UNSPEC; 4387 } 4388 } 4389 4390 static int bpf_program__record_reloc(struct bpf_program *prog, 4391 struct reloc_desc *reloc_desc, 4392 __u32 insn_idx, const char *sym_name, 4393 const Elf64_Sym *sym, const Elf64_Rel *rel) 4394 { 4395 struct bpf_insn *insn = &prog->insns[insn_idx]; 4396 size_t map_idx, nr_maps = prog->obj->nr_maps; 4397 struct bpf_object *obj = prog->obj; 4398 __u32 shdr_idx = sym->st_shndx; 4399 enum libbpf_map_type type; 4400 const char *sym_sec_name; 4401 struct bpf_map *map; 4402 4403 if (!is_call_insn(insn) && !is_ldimm64_insn(insn)) { 4404 pr_warn("prog '%s': invalid relo against '%s' for insns[%d].code 0x%x\n", 4405 prog->name, sym_name, insn_idx, insn->code); 4406 return -LIBBPF_ERRNO__RELOC; 4407 } 4408 4409 if (sym_is_extern(sym)) { 4410 int sym_idx = ELF64_R_SYM(rel->r_info); 4411 int i, n = obj->nr_extern; 4412 struct extern_desc *ext; 4413 4414 for (i = 0; i < n; i++) { 4415 ext = &obj->externs[i]; 4416 if (ext->sym_idx == sym_idx) 4417 break; 4418 } 4419 if (i >= n) { 4420 pr_warn("prog '%s': extern relo failed to find extern for '%s' (%d)\n", 4421 prog->name, sym_name, sym_idx); 4422 return -LIBBPF_ERRNO__RELOC; 4423 } 4424 pr_debug("prog '%s': found extern #%d '%s' (sym %d) for insn #%u\n", 4425 prog->name, i, ext->name, ext->sym_idx, insn_idx); 4426 if (insn->code == (BPF_JMP | BPF_CALL)) 4427 reloc_desc->type = RELO_EXTERN_CALL; 4428 else 4429 reloc_desc->type = RELO_EXTERN_LD64; 4430 reloc_desc->insn_idx = insn_idx; 4431 reloc_desc->ext_idx = i; 4432 return 0; 4433 } 4434 4435 /* sub-program call relocation */ 4436 if (is_call_insn(insn)) { 4437 if (insn->src_reg != BPF_PSEUDO_CALL) { 4438 pr_warn("prog '%s': incorrect bpf_call opcode\n", prog->name); 4439 return -LIBBPF_ERRNO__RELOC; 4440 } 4441 /* text_shndx can be 0, if no default "main" program exists */ 4442 if (!shdr_idx || shdr_idx != obj->efile.text_shndx) { 4443 sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx)); 4444 pr_warn("prog '%s': bad call relo against '%s' in section '%s'\n", 4445 prog->name, sym_name, sym_sec_name); 4446 return -LIBBPF_ERRNO__RELOC; 4447 } 4448 if (sym->st_value % BPF_INSN_SZ) { 4449 pr_warn("prog '%s': bad call relo against '%s' at offset %zu\n", 4450 prog->name, sym_name, (size_t)sym->st_value); 4451 return -LIBBPF_ERRNO__RELOC; 4452 } 4453 reloc_desc->type = RELO_CALL; 4454 reloc_desc->insn_idx = insn_idx; 4455 reloc_desc->sym_off = sym->st_value; 4456 return 0; 4457 } 4458 4459 if (!shdr_idx || shdr_idx >= SHN_LORESERVE) { 4460 pr_warn("prog '%s': invalid relo against '%s' in special section 0x%x; forgot to initialize global var?..\n", 4461 prog->name, sym_name, shdr_idx); 4462 return -LIBBPF_ERRNO__RELOC; 4463 } 4464 4465 /* loading subprog addresses */ 4466 if (sym_is_subprog(sym, obj->efile.text_shndx)) { 4467 /* global_func: sym->st_value = offset in the section, insn->imm = 0. 4468 * local_func: sym->st_value = 0, insn->imm = offset in the section. 4469 */ 4470 if ((sym->st_value % BPF_INSN_SZ) || (insn->imm % BPF_INSN_SZ)) { 4471 pr_warn("prog '%s': bad subprog addr relo against '%s' at offset %zu+%d\n", 4472 prog->name, sym_name, (size_t)sym->st_value, insn->imm); 4473 return -LIBBPF_ERRNO__RELOC; 4474 } 4475 4476 reloc_desc->type = RELO_SUBPROG_ADDR; 4477 reloc_desc->insn_idx = insn_idx; 4478 reloc_desc->sym_off = sym->st_value; 4479 return 0; 4480 } 4481 4482 type = bpf_object__section_to_libbpf_map_type(obj, shdr_idx); 4483 sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx)); 4484 4485 /* arena data relocation */ 4486 if (shdr_idx == obj->efile.arena_data_shndx) { 4487 reloc_desc->type = RELO_DATA; 4488 reloc_desc->insn_idx = insn_idx; 4489 reloc_desc->map_idx = obj->arena_map - obj->maps; 4490 reloc_desc->sym_off = sym->st_value; 4491 return 0; 4492 } 4493 4494 /* generic map reference relocation */ 4495 if (type == LIBBPF_MAP_UNSPEC) { 4496 if (!bpf_object__shndx_is_maps(obj, shdr_idx)) { 4497 pr_warn("prog '%s': bad map relo against '%s' in section '%s'\n", 4498 prog->name, sym_name, sym_sec_name); 4499 return -LIBBPF_ERRNO__RELOC; 4500 } 4501 for (map_idx = 0; map_idx < nr_maps; map_idx++) { 4502 map = &obj->maps[map_idx]; 4503 if (map->libbpf_type != type || 4504 map->sec_idx != sym->st_shndx || 4505 map->sec_offset != sym->st_value) 4506 continue; 4507 pr_debug("prog '%s': found map %zd (%s, sec %d, off %zu) for insn #%u\n", 4508 prog->name, map_idx, map->name, map->sec_idx, 4509 map->sec_offset, insn_idx); 4510 break; 4511 } 4512 if (map_idx >= nr_maps) { 4513 pr_warn("prog '%s': map relo failed to find map for section '%s', off %zu\n", 4514 prog->name, sym_sec_name, (size_t)sym->st_value); 4515 return -LIBBPF_ERRNO__RELOC; 4516 } 4517 reloc_desc->type = RELO_LD64; 4518 reloc_desc->insn_idx = insn_idx; 4519 reloc_desc->map_idx = map_idx; 4520 reloc_desc->sym_off = 0; /* sym->st_value determines map_idx */ 4521 return 0; 4522 } 4523 4524 /* global data map relocation */ 4525 if (!bpf_object__shndx_is_data(obj, shdr_idx)) { 4526 pr_warn("prog '%s': bad data relo against section '%s'\n", 4527 prog->name, sym_sec_name); 4528 return -LIBBPF_ERRNO__RELOC; 4529 } 4530 for (map_idx = 0; map_idx < nr_maps; map_idx++) { 4531 map = &obj->maps[map_idx]; 4532 if (map->libbpf_type != type || map->sec_idx != sym->st_shndx) 4533 continue; 4534 pr_debug("prog '%s': found data map %zd (%s, sec %d, off %zu) for insn %u\n", 4535 prog->name, map_idx, map->name, map->sec_idx, 4536 map->sec_offset, insn_idx); 4537 break; 4538 } 4539 if (map_idx >= nr_maps) { 4540 pr_warn("prog '%s': data relo failed to find map for section '%s'\n", 4541 prog->name, sym_sec_name); 4542 return -LIBBPF_ERRNO__RELOC; 4543 } 4544 4545 reloc_desc->type = RELO_DATA; 4546 reloc_desc->insn_idx = insn_idx; 4547 reloc_desc->map_idx = map_idx; 4548 reloc_desc->sym_off = sym->st_value; 4549 return 0; 4550 } 4551 4552 static bool prog_contains_insn(const struct bpf_program *prog, size_t insn_idx) 4553 { 4554 return insn_idx >= prog->sec_insn_off && 4555 insn_idx < prog->sec_insn_off + prog->sec_insn_cnt; 4556 } 4557 4558 static struct bpf_program *find_prog_by_sec_insn(const struct bpf_object *obj, 4559 size_t sec_idx, size_t insn_idx) 4560 { 4561 int l = 0, r = obj->nr_programs - 1, m; 4562 struct bpf_program *prog; 4563 4564 if (!obj->nr_programs) 4565 return NULL; 4566 4567 while (l < r) { 4568 m = l + (r - l + 1) / 2; 4569 prog = &obj->programs[m]; 4570 4571 if (prog->sec_idx < sec_idx || 4572 (prog->sec_idx == sec_idx && prog->sec_insn_off <= insn_idx)) 4573 l = m; 4574 else 4575 r = m - 1; 4576 } 4577 /* matching program could be at index l, but it still might be the 4578 * wrong one, so we need to double check conditions for the last time 4579 */ 4580 prog = &obj->programs[l]; 4581 if (prog->sec_idx == sec_idx && prog_contains_insn(prog, insn_idx)) 4582 return prog; 4583 return NULL; 4584 } 4585 4586 static int 4587 bpf_object__collect_prog_relos(struct bpf_object *obj, Elf64_Shdr *shdr, Elf_Data *data) 4588 { 4589 const char *relo_sec_name, *sec_name; 4590 size_t sec_idx = shdr->sh_info, sym_idx; 4591 struct bpf_program *prog; 4592 struct reloc_desc *relos; 4593 int err, i, nrels; 4594 const char *sym_name; 4595 __u32 insn_idx; 4596 Elf_Scn *scn; 4597 Elf_Data *scn_data; 4598 Elf64_Sym *sym; 4599 Elf64_Rel *rel; 4600 4601 if (sec_idx >= obj->efile.sec_cnt) 4602 return -EINVAL; 4603 4604 scn = elf_sec_by_idx(obj, sec_idx); 4605 scn_data = elf_sec_data(obj, scn); 4606 if (!scn_data) 4607 return -LIBBPF_ERRNO__FORMAT; 4608 4609 relo_sec_name = elf_sec_str(obj, shdr->sh_name); 4610 sec_name = elf_sec_name(obj, scn); 4611 if (!relo_sec_name || !sec_name) 4612 return -EINVAL; 4613 4614 pr_debug("sec '%s': collecting relocation for section(%zu) '%s'\n", 4615 relo_sec_name, sec_idx, sec_name); 4616 nrels = shdr->sh_size / shdr->sh_entsize; 4617 4618 for (i = 0; i < nrels; i++) { 4619 rel = elf_rel_by_idx(data, i); 4620 if (!rel) { 4621 pr_warn("sec '%s': failed to get relo #%d\n", relo_sec_name, i); 4622 return -LIBBPF_ERRNO__FORMAT; 4623 } 4624 4625 sym_idx = ELF64_R_SYM(rel->r_info); 4626 sym = elf_sym_by_idx(obj, sym_idx); 4627 if (!sym) { 4628 pr_warn("sec '%s': symbol #%zu not found for relo #%d\n", 4629 relo_sec_name, sym_idx, i); 4630 return -LIBBPF_ERRNO__FORMAT; 4631 } 4632 4633 if (sym->st_shndx >= obj->efile.sec_cnt) { 4634 pr_warn("sec '%s': corrupted symbol #%zu pointing to invalid section #%zu for relo #%d\n", 4635 relo_sec_name, sym_idx, (size_t)sym->st_shndx, i); 4636 return -LIBBPF_ERRNO__FORMAT; 4637 } 4638 4639 if (rel->r_offset % BPF_INSN_SZ || rel->r_offset >= scn_data->d_size) { 4640 pr_warn("sec '%s': invalid offset 0x%zx for relo #%d\n", 4641 relo_sec_name, (size_t)rel->r_offset, i); 4642 return -LIBBPF_ERRNO__FORMAT; 4643 } 4644 4645 insn_idx = rel->r_offset / BPF_INSN_SZ; 4646 /* relocations against static functions are recorded as 4647 * relocations against the section that contains a function; 4648 * in such case, symbol will be STT_SECTION and sym.st_name 4649 * will point to empty string (0), so fetch section name 4650 * instead 4651 */ 4652 if (ELF64_ST_TYPE(sym->st_info) == STT_SECTION && sym->st_name == 0) 4653 sym_name = elf_sec_name(obj, elf_sec_by_idx(obj, sym->st_shndx)); 4654 else 4655 sym_name = elf_sym_str(obj, sym->st_name); 4656 sym_name = sym_name ?: "<?"; 4657 4658 pr_debug("sec '%s': relo #%d: insn #%u against '%s'\n", 4659 relo_sec_name, i, insn_idx, sym_name); 4660 4661 prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx); 4662 if (!prog) { 4663 pr_debug("sec '%s': relo #%d: couldn't find program in section '%s' for insn #%u, probably overridden weak function, skipping...\n", 4664 relo_sec_name, i, sec_name, insn_idx); 4665 continue; 4666 } 4667 4668 relos = libbpf_reallocarray(prog->reloc_desc, 4669 prog->nr_reloc + 1, sizeof(*relos)); 4670 if (!relos) 4671 return -ENOMEM; 4672 prog->reloc_desc = relos; 4673 4674 /* adjust insn_idx to local BPF program frame of reference */ 4675 insn_idx -= prog->sec_insn_off; 4676 err = bpf_program__record_reloc(prog, &relos[prog->nr_reloc], 4677 insn_idx, sym_name, sym, rel); 4678 if (err) 4679 return err; 4680 4681 prog->nr_reloc++; 4682 } 4683 return 0; 4684 } 4685 4686 static int map_fill_btf_type_info(struct bpf_object *obj, struct bpf_map *map) 4687 { 4688 int id; 4689 4690 if (!obj->btf) 4691 return -ENOENT; 4692 4693 /* if it's BTF-defined map, we don't need to search for type IDs. 4694 * For struct_ops map, it does not need btf_key_type_id and 4695 * btf_value_type_id. 4696 */ 4697 if (map->sec_idx == obj->efile.btf_maps_shndx || bpf_map__is_struct_ops(map)) 4698 return 0; 4699 4700 /* 4701 * LLVM annotates global data differently in BTF, that is, 4702 * only as '.data', '.bss' or '.rodata'. 4703 */ 4704 if (!bpf_map__is_internal(map)) 4705 return -ENOENT; 4706 4707 id = btf__find_by_name(obj->btf, map->real_name); 4708 if (id < 0) 4709 return id; 4710 4711 map->btf_key_type_id = 0; 4712 map->btf_value_type_id = id; 4713 return 0; 4714 } 4715 4716 static int bpf_get_map_info_from_fdinfo(int fd, struct bpf_map_info *info) 4717 { 4718 char file[PATH_MAX], buff[4096]; 4719 FILE *fp; 4720 __u32 val; 4721 int err; 4722 4723 snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd); 4724 memset(info, 0, sizeof(*info)); 4725 4726 fp = fopen(file, "re"); 4727 if (!fp) { 4728 err = -errno; 4729 pr_warn("failed to open %s: %d. No procfs support?\n", file, 4730 err); 4731 return err; 4732 } 4733 4734 while (fgets(buff, sizeof(buff), fp)) { 4735 if (sscanf(buff, "map_type:\t%u", &val) == 1) 4736 info->type = val; 4737 else if (sscanf(buff, "key_size:\t%u", &val) == 1) 4738 info->key_size = val; 4739 else if (sscanf(buff, "value_size:\t%u", &val) == 1) 4740 info->value_size = val; 4741 else if (sscanf(buff, "max_entries:\t%u", &val) == 1) 4742 info->max_entries = val; 4743 else if (sscanf(buff, "map_flags:\t%i", &val) == 1) 4744 info->map_flags = val; 4745 } 4746 4747 fclose(fp); 4748 4749 return 0; 4750 } 4751 4752 bool bpf_map__autocreate(const struct bpf_map *map) 4753 { 4754 return map->autocreate; 4755 } 4756 4757 int bpf_map__set_autocreate(struct bpf_map *map, bool autocreate) 4758 { 4759 if (map->obj->loaded) 4760 return libbpf_err(-EBUSY); 4761 4762 map->autocreate = autocreate; 4763 return 0; 4764 } 4765 4766 int bpf_map__reuse_fd(struct bpf_map *map, int fd) 4767 { 4768 struct bpf_map_info info; 4769 __u32 len = sizeof(info), name_len; 4770 int new_fd, err; 4771 char *new_name; 4772 4773 memset(&info, 0, len); 4774 err = bpf_map_get_info_by_fd(fd, &info, &len); 4775 if (err && errno == EINVAL) 4776 err = bpf_get_map_info_from_fdinfo(fd, &info); 4777 if (err) 4778 return libbpf_err(err); 4779 4780 name_len = strlen(info.name); 4781 if (name_len == BPF_OBJ_NAME_LEN - 1 && strncmp(map->name, info.name, name_len) == 0) 4782 new_name = strdup(map->name); 4783 else 4784 new_name = strdup(info.name); 4785 4786 if (!new_name) 4787 return libbpf_err(-errno); 4788 4789 /* 4790 * Like dup(), but make sure new FD is >= 3 and has O_CLOEXEC set. 4791 * This is similar to what we do in ensure_good_fd(), but without 4792 * closing original FD. 4793 */ 4794 new_fd = fcntl(fd, F_DUPFD_CLOEXEC, 3); 4795 if (new_fd < 0) { 4796 err = -errno; 4797 goto err_free_new_name; 4798 } 4799 4800 err = reuse_fd(map->fd, new_fd); 4801 if (err) 4802 goto err_free_new_name; 4803 4804 free(map->name); 4805 4806 map->name = new_name; 4807 map->def.type = info.type; 4808 map->def.key_size = info.key_size; 4809 map->def.value_size = info.value_size; 4810 map->def.max_entries = info.max_entries; 4811 map->def.map_flags = info.map_flags; 4812 map->btf_key_type_id = info.btf_key_type_id; 4813 map->btf_value_type_id = info.btf_value_type_id; 4814 map->reused = true; 4815 map->map_extra = info.map_extra; 4816 4817 return 0; 4818 4819 err_free_new_name: 4820 free(new_name); 4821 return libbpf_err(err); 4822 } 4823 4824 __u32 bpf_map__max_entries(const struct bpf_map *map) 4825 { 4826 return map->def.max_entries; 4827 } 4828 4829 struct bpf_map *bpf_map__inner_map(struct bpf_map *map) 4830 { 4831 if (!bpf_map_type__is_map_in_map(map->def.type)) 4832 return errno = EINVAL, NULL; 4833 4834 return map->inner_map; 4835 } 4836 4837 int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries) 4838 { 4839 if (map->obj->loaded) 4840 return libbpf_err(-EBUSY); 4841 4842 map->def.max_entries = max_entries; 4843 4844 /* auto-adjust BPF ringbuf map max_entries to be a multiple of page size */ 4845 if (map_is_ringbuf(map)) 4846 map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries); 4847 4848 return 0; 4849 } 4850 4851 static int bpf_object_prepare_token(struct bpf_object *obj) 4852 { 4853 const char *bpffs_path; 4854 int bpffs_fd = -1, token_fd, err; 4855 bool mandatory; 4856 enum libbpf_print_level level; 4857 4858 /* token is explicitly prevented */ 4859 if (obj->token_path && obj->token_path[0] == '\0') { 4860 pr_debug("object '%s': token is prevented, skipping...\n", obj->name); 4861 return 0; 4862 } 4863 4864 mandatory = obj->token_path != NULL; 4865 level = mandatory ? LIBBPF_WARN : LIBBPF_DEBUG; 4866 4867 bpffs_path = obj->token_path ?: BPF_FS_DEFAULT_PATH; 4868 bpffs_fd = open(bpffs_path, O_DIRECTORY, O_RDWR); 4869 if (bpffs_fd < 0) { 4870 err = -errno; 4871 __pr(level, "object '%s': failed (%d) to open BPF FS mount at '%s'%s\n", 4872 obj->name, err, bpffs_path, 4873 mandatory ? "" : ", skipping optional step..."); 4874 return mandatory ? err : 0; 4875 } 4876 4877 token_fd = bpf_token_create(bpffs_fd, 0); 4878 close(bpffs_fd); 4879 if (token_fd < 0) { 4880 if (!mandatory && token_fd == -ENOENT) { 4881 pr_debug("object '%s': BPF FS at '%s' doesn't have BPF token delegation set up, skipping...\n", 4882 obj->name, bpffs_path); 4883 return 0; 4884 } 4885 __pr(level, "object '%s': failed (%d) to create BPF token from '%s'%s\n", 4886 obj->name, token_fd, bpffs_path, 4887 mandatory ? "" : ", skipping optional step..."); 4888 return mandatory ? token_fd : 0; 4889 } 4890 4891 obj->feat_cache = calloc(1, sizeof(*obj->feat_cache)); 4892 if (!obj->feat_cache) { 4893 close(token_fd); 4894 return -ENOMEM; 4895 } 4896 4897 obj->token_fd = token_fd; 4898 obj->feat_cache->token_fd = token_fd; 4899 4900 return 0; 4901 } 4902 4903 static int 4904 bpf_object__probe_loading(struct bpf_object *obj) 4905 { 4906 char *cp, errmsg[STRERR_BUFSIZE]; 4907 struct bpf_insn insns[] = { 4908 BPF_MOV64_IMM(BPF_REG_0, 0), 4909 BPF_EXIT_INSN(), 4910 }; 4911 int ret, insn_cnt = ARRAY_SIZE(insns); 4912 LIBBPF_OPTS(bpf_prog_load_opts, opts, 4913 .token_fd = obj->token_fd, 4914 .prog_flags = obj->token_fd ? BPF_F_TOKEN_FD : 0, 4915 ); 4916 4917 if (obj->gen_loader) 4918 return 0; 4919 4920 ret = bump_rlimit_memlock(); 4921 if (ret) 4922 pr_warn("Failed to bump RLIMIT_MEMLOCK (err = %d), you might need to do it explicitly!\n", ret); 4923 4924 /* make sure basic loading works */ 4925 ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, &opts); 4926 if (ret < 0) 4927 ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts); 4928 if (ret < 0) { 4929 ret = errno; 4930 cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); 4931 pr_warn("Error in %s():%s(%d). Couldn't load trivial BPF " 4932 "program. Make sure your kernel supports BPF " 4933 "(CONFIG_BPF_SYSCALL=y) and/or that RLIMIT_MEMLOCK is " 4934 "set to big enough value.\n", __func__, cp, ret); 4935 return -ret; 4936 } 4937 close(ret); 4938 4939 return 0; 4940 } 4941 4942 bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id) 4943 { 4944 if (obj->gen_loader) 4945 /* To generate loader program assume the latest kernel 4946 * to avoid doing extra prog_load, map_create syscalls. 4947 */ 4948 return true; 4949 4950 if (obj->token_fd) 4951 return feat_supported(obj->feat_cache, feat_id); 4952 4953 return feat_supported(NULL, feat_id); 4954 } 4955 4956 static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd) 4957 { 4958 struct bpf_map_info map_info; 4959 char msg[STRERR_BUFSIZE]; 4960 __u32 map_info_len = sizeof(map_info); 4961 int err; 4962 4963 memset(&map_info, 0, map_info_len); 4964 err = bpf_map_get_info_by_fd(map_fd, &map_info, &map_info_len); 4965 if (err && errno == EINVAL) 4966 err = bpf_get_map_info_from_fdinfo(map_fd, &map_info); 4967 if (err) { 4968 pr_warn("failed to get map info for map FD %d: %s\n", map_fd, 4969 libbpf_strerror_r(errno, msg, sizeof(msg))); 4970 return false; 4971 } 4972 4973 return (map_info.type == map->def.type && 4974 map_info.key_size == map->def.key_size && 4975 map_info.value_size == map->def.value_size && 4976 map_info.max_entries == map->def.max_entries && 4977 map_info.map_flags == map->def.map_flags && 4978 map_info.map_extra == map->map_extra); 4979 } 4980 4981 static int 4982 bpf_object__reuse_map(struct bpf_map *map) 4983 { 4984 char *cp, errmsg[STRERR_BUFSIZE]; 4985 int err, pin_fd; 4986 4987 pin_fd = bpf_obj_get(map->pin_path); 4988 if (pin_fd < 0) { 4989 err = -errno; 4990 if (err == -ENOENT) { 4991 pr_debug("found no pinned map to reuse at '%s'\n", 4992 map->pin_path); 4993 return 0; 4994 } 4995 4996 cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg)); 4997 pr_warn("couldn't retrieve pinned map '%s': %s\n", 4998 map->pin_path, cp); 4999 return err; 5000 } 5001 5002 if (!map_is_reuse_compat(map, pin_fd)) { 5003 pr_warn("couldn't reuse pinned map at '%s': parameter mismatch\n", 5004 map->pin_path); 5005 close(pin_fd); 5006 return -EINVAL; 5007 } 5008 5009 err = bpf_map__reuse_fd(map, pin_fd); 5010 close(pin_fd); 5011 if (err) 5012 return err; 5013 5014 map->pinned = true; 5015 pr_debug("reused pinned map at '%s'\n", map->pin_path); 5016 5017 return 0; 5018 } 5019 5020 static int 5021 bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map) 5022 { 5023 enum libbpf_map_type map_type = map->libbpf_type; 5024 char *cp, errmsg[STRERR_BUFSIZE]; 5025 int err, zero = 0; 5026 5027 if (obj->gen_loader) { 5028 bpf_gen__map_update_elem(obj->gen_loader, map - obj->maps, 5029 map->mmaped, map->def.value_size); 5030 if (map_type == LIBBPF_MAP_RODATA || map_type == LIBBPF_MAP_KCONFIG) 5031 bpf_gen__map_freeze(obj->gen_loader, map - obj->maps); 5032 return 0; 5033 } 5034 5035 err = bpf_map_update_elem(map->fd, &zero, map->mmaped, 0); 5036 if (err) { 5037 err = -errno; 5038 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); 5039 pr_warn("Error setting initial map(%s) contents: %s\n", 5040 map->name, cp); 5041 return err; 5042 } 5043 5044 /* Freeze .rodata and .kconfig map as read-only from syscall side. */ 5045 if (map_type == LIBBPF_MAP_RODATA || map_type == LIBBPF_MAP_KCONFIG) { 5046 err = bpf_map_freeze(map->fd); 5047 if (err) { 5048 err = -errno; 5049 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); 5050 pr_warn("Error freezing map(%s) as read-only: %s\n", 5051 map->name, cp); 5052 return err; 5053 } 5054 } 5055 return 0; 5056 } 5057 5058 static void bpf_map__destroy(struct bpf_map *map); 5059 5060 static bool map_is_created(const struct bpf_map *map) 5061 { 5062 return map->obj->loaded || map->reused; 5063 } 5064 5065 static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, bool is_inner) 5066 { 5067 LIBBPF_OPTS(bpf_map_create_opts, create_attr); 5068 struct bpf_map_def *def = &map->def; 5069 const char *map_name = NULL; 5070 int err = 0, map_fd; 5071 5072 if (kernel_supports(obj, FEAT_PROG_NAME)) 5073 map_name = map->name; 5074 create_attr.map_ifindex = map->map_ifindex; 5075 create_attr.map_flags = def->map_flags; 5076 create_attr.numa_node = map->numa_node; 5077 create_attr.map_extra = map->map_extra; 5078 create_attr.token_fd = obj->token_fd; 5079 if (obj->token_fd) 5080 create_attr.map_flags |= BPF_F_TOKEN_FD; 5081 5082 if (bpf_map__is_struct_ops(map)) { 5083 create_attr.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id; 5084 if (map->mod_btf_fd >= 0) { 5085 create_attr.value_type_btf_obj_fd = map->mod_btf_fd; 5086 create_attr.map_flags |= BPF_F_VTYPE_BTF_OBJ_FD; 5087 } 5088 } 5089 5090 if (obj->btf && btf__fd(obj->btf) >= 0) { 5091 create_attr.btf_fd = btf__fd(obj->btf); 5092 create_attr.btf_key_type_id = map->btf_key_type_id; 5093 create_attr.btf_value_type_id = map->btf_value_type_id; 5094 } 5095 5096 if (bpf_map_type__is_map_in_map(def->type)) { 5097 if (map->inner_map) { 5098 err = map_set_def_max_entries(map->inner_map); 5099 if (err) 5100 return err; 5101 err = bpf_object__create_map(obj, map->inner_map, true); 5102 if (err) { 5103 pr_warn("map '%s': failed to create inner map: %d\n", 5104 map->name, err); 5105 return err; 5106 } 5107 map->inner_map_fd = map->inner_map->fd; 5108 } 5109 if (map->inner_map_fd >= 0) 5110 create_attr.inner_map_fd = map->inner_map_fd; 5111 } 5112 5113 switch (def->type) { 5114 case BPF_MAP_TYPE_PERF_EVENT_ARRAY: 5115 case BPF_MAP_TYPE_CGROUP_ARRAY: 5116 case BPF_MAP_TYPE_STACK_TRACE: 5117 case BPF_MAP_TYPE_ARRAY_OF_MAPS: 5118 case BPF_MAP_TYPE_HASH_OF_MAPS: 5119 case BPF_MAP_TYPE_DEVMAP: 5120 case BPF_MAP_TYPE_DEVMAP_HASH: 5121 case BPF_MAP_TYPE_CPUMAP: 5122 case BPF_MAP_TYPE_XSKMAP: 5123 case BPF_MAP_TYPE_SOCKMAP: 5124 case BPF_MAP_TYPE_SOCKHASH: 5125 case BPF_MAP_TYPE_QUEUE: 5126 case BPF_MAP_TYPE_STACK: 5127 case BPF_MAP_TYPE_ARENA: 5128 create_attr.btf_fd = 0; 5129 create_attr.btf_key_type_id = 0; 5130 create_attr.btf_value_type_id = 0; 5131 map->btf_key_type_id = 0; 5132 map->btf_value_type_id = 0; 5133 break; 5134 case BPF_MAP_TYPE_STRUCT_OPS: 5135 create_attr.btf_value_type_id = 0; 5136 break; 5137 default: 5138 break; 5139 } 5140 5141 if (obj->gen_loader) { 5142 bpf_gen__map_create(obj->gen_loader, def->type, map_name, 5143 def->key_size, def->value_size, def->max_entries, 5144 &create_attr, is_inner ? -1 : map - obj->maps); 5145 /* We keep pretenting we have valid FD to pass various fd >= 0 5146 * checks by just keeping original placeholder FDs in place. 5147 * See bpf_object__add_map() comment. 5148 * This placeholder fd will not be used with any syscall and 5149 * will be reset to -1 eventually. 5150 */ 5151 map_fd = map->fd; 5152 } else { 5153 map_fd = bpf_map_create(def->type, map_name, 5154 def->key_size, def->value_size, 5155 def->max_entries, &create_attr); 5156 } 5157 if (map_fd < 0 && (create_attr.btf_key_type_id || create_attr.btf_value_type_id)) { 5158 char *cp, errmsg[STRERR_BUFSIZE]; 5159 5160 err = -errno; 5161 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); 5162 pr_warn("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n", 5163 map->name, cp, err); 5164 create_attr.btf_fd = 0; 5165 create_attr.btf_key_type_id = 0; 5166 create_attr.btf_value_type_id = 0; 5167 map->btf_key_type_id = 0; 5168 map->btf_value_type_id = 0; 5169 map_fd = bpf_map_create(def->type, map_name, 5170 def->key_size, def->value_size, 5171 def->max_entries, &create_attr); 5172 } 5173 5174 if (bpf_map_type__is_map_in_map(def->type) && map->inner_map) { 5175 if (obj->gen_loader) 5176 map->inner_map->fd = -1; 5177 bpf_map__destroy(map->inner_map); 5178 zfree(&map->inner_map); 5179 } 5180 5181 if (map_fd < 0) 5182 return map_fd; 5183 5184 /* obj->gen_loader case, prevent reuse_fd() from closing map_fd */ 5185 if (map->fd == map_fd) 5186 return 0; 5187 5188 /* Keep placeholder FD value but now point it to the BPF map object. 5189 * This way everything that relied on this map's FD (e.g., relocated 5190 * ldimm64 instructions) will stay valid and won't need adjustments. 5191 * map->fd stays valid but now point to what map_fd points to. 5192 */ 5193 return reuse_fd(map->fd, map_fd); 5194 } 5195 5196 static int init_map_in_map_slots(struct bpf_object *obj, struct bpf_map *map) 5197 { 5198 const struct bpf_map *targ_map; 5199 unsigned int i; 5200 int fd, err = 0; 5201 5202 for (i = 0; i < map->init_slots_sz; i++) { 5203 if (!map->init_slots[i]) 5204 continue; 5205 5206 targ_map = map->init_slots[i]; 5207 fd = targ_map->fd; 5208 5209 if (obj->gen_loader) { 5210 bpf_gen__populate_outer_map(obj->gen_loader, 5211 map - obj->maps, i, 5212 targ_map - obj->maps); 5213 } else { 5214 err = bpf_map_update_elem(map->fd, &i, &fd, 0); 5215 } 5216 if (err) { 5217 err = -errno; 5218 pr_warn("map '%s': failed to initialize slot [%d] to map '%s' fd=%d: %d\n", 5219 map->name, i, targ_map->name, fd, err); 5220 return err; 5221 } 5222 pr_debug("map '%s': slot [%d] set to map '%s' fd=%d\n", 5223 map->name, i, targ_map->name, fd); 5224 } 5225 5226 zfree(&map->init_slots); 5227 map->init_slots_sz = 0; 5228 5229 return 0; 5230 } 5231 5232 static int init_prog_array_slots(struct bpf_object *obj, struct bpf_map *map) 5233 { 5234 const struct bpf_program *targ_prog; 5235 unsigned int i; 5236 int fd, err; 5237 5238 if (obj->gen_loader) 5239 return -ENOTSUP; 5240 5241 for (i = 0; i < map->init_slots_sz; i++) { 5242 if (!map->init_slots[i]) 5243 continue; 5244 5245 targ_prog = map->init_slots[i]; 5246 fd = bpf_program__fd(targ_prog); 5247 5248 err = bpf_map_update_elem(map->fd, &i, &fd, 0); 5249 if (err) { 5250 err = -errno; 5251 pr_warn("map '%s': failed to initialize slot [%d] to prog '%s' fd=%d: %d\n", 5252 map->name, i, targ_prog->name, fd, err); 5253 return err; 5254 } 5255 pr_debug("map '%s': slot [%d] set to prog '%s' fd=%d\n", 5256 map->name, i, targ_prog->name, fd); 5257 } 5258 5259 zfree(&map->init_slots); 5260 map->init_slots_sz = 0; 5261 5262 return 0; 5263 } 5264 5265 static int bpf_object_init_prog_arrays(struct bpf_object *obj) 5266 { 5267 struct bpf_map *map; 5268 int i, err; 5269 5270 for (i = 0; i < obj->nr_maps; i++) { 5271 map = &obj->maps[i]; 5272 5273 if (!map->init_slots_sz || map->def.type != BPF_MAP_TYPE_PROG_ARRAY) 5274 continue; 5275 5276 err = init_prog_array_slots(obj, map); 5277 if (err < 0) 5278 return err; 5279 } 5280 return 0; 5281 } 5282 5283 static int map_set_def_max_entries(struct bpf_map *map) 5284 { 5285 if (map->def.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY && !map->def.max_entries) { 5286 int nr_cpus; 5287 5288 nr_cpus = libbpf_num_possible_cpus(); 5289 if (nr_cpus < 0) { 5290 pr_warn("map '%s': failed to determine number of system CPUs: %d\n", 5291 map->name, nr_cpus); 5292 return nr_cpus; 5293 } 5294 pr_debug("map '%s': setting size to %d\n", map->name, nr_cpus); 5295 map->def.max_entries = nr_cpus; 5296 } 5297 5298 return 0; 5299 } 5300 5301 static int 5302 bpf_object__create_maps(struct bpf_object *obj) 5303 { 5304 struct bpf_map *map; 5305 char *cp, errmsg[STRERR_BUFSIZE]; 5306 unsigned int i, j; 5307 int err; 5308 bool retried; 5309 5310 for (i = 0; i < obj->nr_maps; i++) { 5311 map = &obj->maps[i]; 5312 5313 /* To support old kernels, we skip creating global data maps 5314 * (.rodata, .data, .kconfig, etc); later on, during program 5315 * loading, if we detect that at least one of the to-be-loaded 5316 * programs is referencing any global data map, we'll error 5317 * out with program name and relocation index logged. 5318 * This approach allows to accommodate Clang emitting 5319 * unnecessary .rodata.str1.1 sections for string literals, 5320 * but also it allows to have CO-RE applications that use 5321 * global variables in some of BPF programs, but not others. 5322 * If those global variable-using programs are not loaded at 5323 * runtime due to bpf_program__set_autoload(prog, false), 5324 * bpf_object loading will succeed just fine even on old 5325 * kernels. 5326 */ 5327 if (bpf_map__is_internal(map) && !kernel_supports(obj, FEAT_GLOBAL_DATA)) 5328 map->autocreate = false; 5329 5330 if (!map->autocreate) { 5331 pr_debug("map '%s': skipped auto-creating...\n", map->name); 5332 continue; 5333 } 5334 5335 err = map_set_def_max_entries(map); 5336 if (err) 5337 goto err_out; 5338 5339 retried = false; 5340 retry: 5341 if (map->pin_path) { 5342 err = bpf_object__reuse_map(map); 5343 if (err) { 5344 pr_warn("map '%s': error reusing pinned map\n", 5345 map->name); 5346 goto err_out; 5347 } 5348 if (retried && map->fd < 0) { 5349 pr_warn("map '%s': cannot find pinned map\n", 5350 map->name); 5351 err = -ENOENT; 5352 goto err_out; 5353 } 5354 } 5355 5356 if (map->reused) { 5357 pr_debug("map '%s': skipping creation (preset fd=%d)\n", 5358 map->name, map->fd); 5359 } else { 5360 err = bpf_object__create_map(obj, map, false); 5361 if (err) 5362 goto err_out; 5363 5364 pr_debug("map '%s': created successfully, fd=%d\n", 5365 map->name, map->fd); 5366 5367 if (bpf_map__is_internal(map)) { 5368 err = bpf_object__populate_internal_map(obj, map); 5369 if (err < 0) 5370 goto err_out; 5371 } 5372 if (map->def.type == BPF_MAP_TYPE_ARENA) { 5373 map->mmaped = mmap((void *)(long)map->map_extra, 5374 bpf_map_mmap_sz(map), PROT_READ | PROT_WRITE, 5375 map->map_extra ? MAP_SHARED | MAP_FIXED : MAP_SHARED, 5376 map->fd, 0); 5377 if (map->mmaped == MAP_FAILED) { 5378 err = -errno; 5379 map->mmaped = NULL; 5380 pr_warn("map '%s': failed to mmap arena: %d\n", 5381 map->name, err); 5382 return err; 5383 } 5384 if (obj->arena_data) { 5385 memcpy(map->mmaped, obj->arena_data, obj->arena_data_sz); 5386 zfree(&obj->arena_data); 5387 } 5388 } 5389 if (map->init_slots_sz && map->def.type != BPF_MAP_TYPE_PROG_ARRAY) { 5390 err = init_map_in_map_slots(obj, map); 5391 if (err < 0) 5392 goto err_out; 5393 } 5394 } 5395 5396 if (map->pin_path && !map->pinned) { 5397 err = bpf_map__pin(map, NULL); 5398 if (err) { 5399 if (!retried && err == -EEXIST) { 5400 retried = true; 5401 goto retry; 5402 } 5403 pr_warn("map '%s': failed to auto-pin at '%s': %d\n", 5404 map->name, map->pin_path, err); 5405 goto err_out; 5406 } 5407 } 5408 } 5409 5410 return 0; 5411 5412 err_out: 5413 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); 5414 pr_warn("map '%s': failed to create: %s(%d)\n", map->name, cp, err); 5415 pr_perm_msg(err); 5416 for (j = 0; j < i; j++) 5417 zclose(obj->maps[j].fd); 5418 return err; 5419 } 5420 5421 static bool bpf_core_is_flavor_sep(const char *s) 5422 { 5423 /* check X___Y name pattern, where X and Y are not underscores */ 5424 return s[0] != '_' && /* X */ 5425 s[1] == '_' && s[2] == '_' && s[3] == '_' && /* ___ */ 5426 s[4] != '_'; /* Y */ 5427 } 5428 5429 /* Given 'some_struct_name___with_flavor' return the length of a name prefix 5430 * before last triple underscore. Struct name part after last triple 5431 * underscore is ignored by BPF CO-RE relocation during relocation matching. 5432 */ 5433 size_t bpf_core_essential_name_len(const char *name) 5434 { 5435 size_t n = strlen(name); 5436 int i; 5437 5438 for (i = n - 5; i >= 0; i--) { 5439 if (bpf_core_is_flavor_sep(name + i)) 5440 return i + 1; 5441 } 5442 return n; 5443 } 5444 5445 void bpf_core_free_cands(struct bpf_core_cand_list *cands) 5446 { 5447 if (!cands) 5448 return; 5449 5450 free(cands->cands); 5451 free(cands); 5452 } 5453 5454 int bpf_core_add_cands(struct bpf_core_cand *local_cand, 5455 size_t local_essent_len, 5456 const struct btf *targ_btf, 5457 const char *targ_btf_name, 5458 int targ_start_id, 5459 struct bpf_core_cand_list *cands) 5460 { 5461 struct bpf_core_cand *new_cands, *cand; 5462 const struct btf_type *t, *local_t; 5463 const char *targ_name, *local_name; 5464 size_t targ_essent_len; 5465 int n, i; 5466 5467 local_t = btf__type_by_id(local_cand->btf, local_cand->id); 5468 local_name = btf__str_by_offset(local_cand->btf, local_t->name_off); 5469 5470 n = btf__type_cnt(targ_btf); 5471 for (i = targ_start_id; i < n; i++) { 5472 t = btf__type_by_id(targ_btf, i); 5473 if (!btf_kind_core_compat(t, local_t)) 5474 continue; 5475 5476 targ_name = btf__name_by_offset(targ_btf, t->name_off); 5477 if (str_is_empty(targ_name)) 5478 continue; 5479 5480 targ_essent_len = bpf_core_essential_name_len(targ_name); 5481 if (targ_essent_len != local_essent_len) 5482 continue; 5483 5484 if (strncmp(local_name, targ_name, local_essent_len) != 0) 5485 continue; 5486 5487 pr_debug("CO-RE relocating [%d] %s %s: found target candidate [%d] %s %s in [%s]\n", 5488 local_cand->id, btf_kind_str(local_t), 5489 local_name, i, btf_kind_str(t), targ_name, 5490 targ_btf_name); 5491 new_cands = libbpf_reallocarray(cands->cands, cands->len + 1, 5492 sizeof(*cands->cands)); 5493 if (!new_cands) 5494 return -ENOMEM; 5495 5496 cand = &new_cands[cands->len]; 5497 cand->btf = targ_btf; 5498 cand->id = i; 5499 5500 cands->cands = new_cands; 5501 cands->len++; 5502 } 5503 return 0; 5504 } 5505 5506 static int load_module_btfs(struct bpf_object *obj) 5507 { 5508 struct bpf_btf_info info; 5509 struct module_btf *mod_btf; 5510 struct btf *btf; 5511 char name[64]; 5512 __u32 id = 0, len; 5513 int err, fd; 5514 5515 if (obj->btf_modules_loaded) 5516 return 0; 5517 5518 if (obj->gen_loader) 5519 return 0; 5520 5521 /* don't do this again, even if we find no module BTFs */ 5522 obj->btf_modules_loaded = true; 5523 5524 /* kernel too old to support module BTFs */ 5525 if (!kernel_supports(obj, FEAT_MODULE_BTF)) 5526 return 0; 5527 5528 while (true) { 5529 err = bpf_btf_get_next_id(id, &id); 5530 if (err && errno == ENOENT) 5531 return 0; 5532 if (err && errno == EPERM) { 5533 pr_debug("skipping module BTFs loading, missing privileges\n"); 5534 return 0; 5535 } 5536 if (err) { 5537 err = -errno; 5538 pr_warn("failed to iterate BTF objects: %d\n", err); 5539 return err; 5540 } 5541 5542 fd = bpf_btf_get_fd_by_id(id); 5543 if (fd < 0) { 5544 if (errno == ENOENT) 5545 continue; /* expected race: BTF was unloaded */ 5546 err = -errno; 5547 pr_warn("failed to get BTF object #%d FD: %d\n", id, err); 5548 return err; 5549 } 5550 5551 len = sizeof(info); 5552 memset(&info, 0, sizeof(info)); 5553 info.name = ptr_to_u64(name); 5554 info.name_len = sizeof(name); 5555 5556 err = bpf_btf_get_info_by_fd(fd, &info, &len); 5557 if (err) { 5558 err = -errno; 5559 pr_warn("failed to get BTF object #%d info: %d\n", id, err); 5560 goto err_out; 5561 } 5562 5563 /* ignore non-module BTFs */ 5564 if (!info.kernel_btf || strcmp(name, "vmlinux") == 0) { 5565 close(fd); 5566 continue; 5567 } 5568 5569 btf = btf_get_from_fd(fd, obj->btf_vmlinux); 5570 err = libbpf_get_error(btf); 5571 if (err) { 5572 pr_warn("failed to load module [%s]'s BTF object #%d: %d\n", 5573 name, id, err); 5574 goto err_out; 5575 } 5576 5577 err = libbpf_ensure_mem((void **)&obj->btf_modules, &obj->btf_module_cap, 5578 sizeof(*obj->btf_modules), obj->btf_module_cnt + 1); 5579 if (err) 5580 goto err_out; 5581 5582 mod_btf = &obj->btf_modules[obj->btf_module_cnt++]; 5583 5584 mod_btf->btf = btf; 5585 mod_btf->id = id; 5586 mod_btf->fd = fd; 5587 mod_btf->name = strdup(name); 5588 if (!mod_btf->name) { 5589 err = -ENOMEM; 5590 goto err_out; 5591 } 5592 continue; 5593 5594 err_out: 5595 close(fd); 5596 return err; 5597 } 5598 5599 return 0; 5600 } 5601 5602 static struct bpf_core_cand_list * 5603 bpf_core_find_cands(struct bpf_object *obj, const struct btf *local_btf, __u32 local_type_id) 5604 { 5605 struct bpf_core_cand local_cand = {}; 5606 struct bpf_core_cand_list *cands; 5607 const struct btf *main_btf; 5608 const struct btf_type *local_t; 5609 const char *local_name; 5610 size_t local_essent_len; 5611 int err, i; 5612 5613 local_cand.btf = local_btf; 5614 local_cand.id = local_type_id; 5615 local_t = btf__type_by_id(local_btf, local_type_id); 5616 if (!local_t) 5617 return ERR_PTR(-EINVAL); 5618 5619 local_name = btf__name_by_offset(local_btf, local_t->name_off); 5620 if (str_is_empty(local_name)) 5621 return ERR_PTR(-EINVAL); 5622 local_essent_len = bpf_core_essential_name_len(local_name); 5623 5624 cands = calloc(1, sizeof(*cands)); 5625 if (!cands) 5626 return ERR_PTR(-ENOMEM); 5627 5628 /* Attempt to find target candidates in vmlinux BTF first */ 5629 main_btf = obj->btf_vmlinux_override ?: obj->btf_vmlinux; 5630 err = bpf_core_add_cands(&local_cand, local_essent_len, main_btf, "vmlinux", 1, cands); 5631 if (err) 5632 goto err_out; 5633 5634 /* if vmlinux BTF has any candidate, don't got for module BTFs */ 5635 if (cands->len) 5636 return cands; 5637 5638 /* if vmlinux BTF was overridden, don't attempt to load module BTFs */ 5639 if (obj->btf_vmlinux_override) 5640 return cands; 5641 5642 /* now look through module BTFs, trying to still find candidates */ 5643 err = load_module_btfs(obj); 5644 if (err) 5645 goto err_out; 5646 5647 for (i = 0; i < obj->btf_module_cnt; i++) { 5648 err = bpf_core_add_cands(&local_cand, local_essent_len, 5649 obj->btf_modules[i].btf, 5650 obj->btf_modules[i].name, 5651 btf__type_cnt(obj->btf_vmlinux), 5652 cands); 5653 if (err) 5654 goto err_out; 5655 } 5656 5657 return cands; 5658 err_out: 5659 bpf_core_free_cands(cands); 5660 return ERR_PTR(err); 5661 } 5662 5663 /* Check local and target types for compatibility. This check is used for 5664 * type-based CO-RE relocations and follow slightly different rules than 5665 * field-based relocations. This function assumes that root types were already 5666 * checked for name match. Beyond that initial root-level name check, names 5667 * are completely ignored. Compatibility rules are as follows: 5668 * - any two STRUCTs/UNIONs/FWDs/ENUMs/INTs are considered compatible, but 5669 * kind should match for local and target types (i.e., STRUCT is not 5670 * compatible with UNION); 5671 * - for ENUMs, the size is ignored; 5672 * - for INT, size and signedness are ignored; 5673 * - for ARRAY, dimensionality is ignored, element types are checked for 5674 * compatibility recursively; 5675 * - CONST/VOLATILE/RESTRICT modifiers are ignored; 5676 * - TYPEDEFs/PTRs are compatible if types they pointing to are compatible; 5677 * - FUNC_PROTOs are compatible if they have compatible signature: same 5678 * number of input args and compatible return and argument types. 5679 * These rules are not set in stone and probably will be adjusted as we get 5680 * more experience with using BPF CO-RE relocations. 5681 */ 5682 int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id, 5683 const struct btf *targ_btf, __u32 targ_id) 5684 { 5685 return __bpf_core_types_are_compat(local_btf, local_id, targ_btf, targ_id, 32); 5686 } 5687 5688 int bpf_core_types_match(const struct btf *local_btf, __u32 local_id, 5689 const struct btf *targ_btf, __u32 targ_id) 5690 { 5691 return __bpf_core_types_match(local_btf, local_id, targ_btf, targ_id, false, 32); 5692 } 5693 5694 static size_t bpf_core_hash_fn(const long key, void *ctx) 5695 { 5696 return key; 5697 } 5698 5699 static bool bpf_core_equal_fn(const long k1, const long k2, void *ctx) 5700 { 5701 return k1 == k2; 5702 } 5703 5704 static int record_relo_core(struct bpf_program *prog, 5705 const struct bpf_core_relo *core_relo, int insn_idx) 5706 { 5707 struct reloc_desc *relos, *relo; 5708 5709 relos = libbpf_reallocarray(prog->reloc_desc, 5710 prog->nr_reloc + 1, sizeof(*relos)); 5711 if (!relos) 5712 return -ENOMEM; 5713 relo = &relos[prog->nr_reloc]; 5714 relo->type = RELO_CORE; 5715 relo->insn_idx = insn_idx; 5716 relo->core_relo = core_relo; 5717 prog->reloc_desc = relos; 5718 prog->nr_reloc++; 5719 return 0; 5720 } 5721 5722 static const struct bpf_core_relo *find_relo_core(struct bpf_program *prog, int insn_idx) 5723 { 5724 struct reloc_desc *relo; 5725 int i; 5726 5727 for (i = 0; i < prog->nr_reloc; i++) { 5728 relo = &prog->reloc_desc[i]; 5729 if (relo->type != RELO_CORE || relo->insn_idx != insn_idx) 5730 continue; 5731 5732 return relo->core_relo; 5733 } 5734 5735 return NULL; 5736 } 5737 5738 static int bpf_core_resolve_relo(struct bpf_program *prog, 5739 const struct bpf_core_relo *relo, 5740 int relo_idx, 5741 const struct btf *local_btf, 5742 struct hashmap *cand_cache, 5743 struct bpf_core_relo_res *targ_res) 5744 { 5745 struct bpf_core_spec specs_scratch[3] = {}; 5746 struct bpf_core_cand_list *cands = NULL; 5747 const char *prog_name = prog->name; 5748 const struct btf_type *local_type; 5749 const char *local_name; 5750 __u32 local_id = relo->type_id; 5751 int err; 5752 5753 local_type = btf__type_by_id(local_btf, local_id); 5754 if (!local_type) 5755 return -EINVAL; 5756 5757 local_name = btf__name_by_offset(local_btf, local_type->name_off); 5758 if (!local_name) 5759 return -EINVAL; 5760 5761 if (relo->kind != BPF_CORE_TYPE_ID_LOCAL && 5762 !hashmap__find(cand_cache, local_id, &cands)) { 5763 cands = bpf_core_find_cands(prog->obj, local_btf, local_id); 5764 if (IS_ERR(cands)) { 5765 pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s %s: %ld\n", 5766 prog_name, relo_idx, local_id, btf_kind_str(local_type), 5767 local_name, PTR_ERR(cands)); 5768 return PTR_ERR(cands); 5769 } 5770 err = hashmap__set(cand_cache, local_id, cands, NULL, NULL); 5771 if (err) { 5772 bpf_core_free_cands(cands); 5773 return err; 5774 } 5775 } 5776 5777 return bpf_core_calc_relo_insn(prog_name, relo, relo_idx, local_btf, cands, specs_scratch, 5778 targ_res); 5779 } 5780 5781 static int 5782 bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path) 5783 { 5784 const struct btf_ext_info_sec *sec; 5785 struct bpf_core_relo_res targ_res; 5786 const struct bpf_core_relo *rec; 5787 const struct btf_ext_info *seg; 5788 struct hashmap_entry *entry; 5789 struct hashmap *cand_cache = NULL; 5790 struct bpf_program *prog; 5791 struct bpf_insn *insn; 5792 const char *sec_name; 5793 int i, err = 0, insn_idx, sec_idx, sec_num; 5794 5795 if (obj->btf_ext->core_relo_info.len == 0) 5796 return 0; 5797 5798 if (targ_btf_path) { 5799 obj->btf_vmlinux_override = btf__parse(targ_btf_path, NULL); 5800 err = libbpf_get_error(obj->btf_vmlinux_override); 5801 if (err) { 5802 pr_warn("failed to parse target BTF: %d\n", err); 5803 return err; 5804 } 5805 } 5806 5807 cand_cache = hashmap__new(bpf_core_hash_fn, bpf_core_equal_fn, NULL); 5808 if (IS_ERR(cand_cache)) { 5809 err = PTR_ERR(cand_cache); 5810 goto out; 5811 } 5812 5813 seg = &obj->btf_ext->core_relo_info; 5814 sec_num = 0; 5815 for_each_btf_ext_sec(seg, sec) { 5816 sec_idx = seg->sec_idxs[sec_num]; 5817 sec_num++; 5818 5819 sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off); 5820 if (str_is_empty(sec_name)) { 5821 err = -EINVAL; 5822 goto out; 5823 } 5824 5825 pr_debug("sec '%s': found %d CO-RE relocations\n", sec_name, sec->num_info); 5826 5827 for_each_btf_ext_rec(seg, sec, i, rec) { 5828 if (rec->insn_off % BPF_INSN_SZ) 5829 return -EINVAL; 5830 insn_idx = rec->insn_off / BPF_INSN_SZ; 5831 prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx); 5832 if (!prog) { 5833 /* When __weak subprog is "overridden" by another instance 5834 * of the subprog from a different object file, linker still 5835 * appends all the .BTF.ext info that used to belong to that 5836 * eliminated subprogram. 5837 * This is similar to what x86-64 linker does for relocations. 5838 * So just ignore such relocations just like we ignore 5839 * subprog instructions when discovering subprograms. 5840 */ 5841 pr_debug("sec '%s': skipping CO-RE relocation #%d for insn #%d belonging to eliminated weak subprogram\n", 5842 sec_name, i, insn_idx); 5843 continue; 5844 } 5845 /* no need to apply CO-RE relocation if the program is 5846 * not going to be loaded 5847 */ 5848 if (!prog->autoload) 5849 continue; 5850 5851 /* adjust insn_idx from section frame of reference to the local 5852 * program's frame of reference; (sub-)program code is not yet 5853 * relocated, so it's enough to just subtract in-section offset 5854 */ 5855 insn_idx = insn_idx - prog->sec_insn_off; 5856 if (insn_idx >= prog->insns_cnt) 5857 return -EINVAL; 5858 insn = &prog->insns[insn_idx]; 5859 5860 err = record_relo_core(prog, rec, insn_idx); 5861 if (err) { 5862 pr_warn("prog '%s': relo #%d: failed to record relocation: %d\n", 5863 prog->name, i, err); 5864 goto out; 5865 } 5866 5867 if (prog->obj->gen_loader) 5868 continue; 5869 5870 err = bpf_core_resolve_relo(prog, rec, i, obj->btf, cand_cache, &targ_res); 5871 if (err) { 5872 pr_warn("prog '%s': relo #%d: failed to relocate: %d\n", 5873 prog->name, i, err); 5874 goto out; 5875 } 5876 5877 err = bpf_core_patch_insn(prog->name, insn, insn_idx, rec, i, &targ_res); 5878 if (err) { 5879 pr_warn("prog '%s': relo #%d: failed to patch insn #%u: %d\n", 5880 prog->name, i, insn_idx, err); 5881 goto out; 5882 } 5883 } 5884 } 5885 5886 out: 5887 /* obj->btf_vmlinux and module BTFs are freed after object load */ 5888 btf__free(obj->btf_vmlinux_override); 5889 obj->btf_vmlinux_override = NULL; 5890 5891 if (!IS_ERR_OR_NULL(cand_cache)) { 5892 hashmap__for_each_entry(cand_cache, entry, i) { 5893 bpf_core_free_cands(entry->pvalue); 5894 } 5895 hashmap__free(cand_cache); 5896 } 5897 return err; 5898 } 5899 5900 /* base map load ldimm64 special constant, used also for log fixup logic */ 5901 #define POISON_LDIMM64_MAP_BASE 2001000000 5902 #define POISON_LDIMM64_MAP_PFX "200100" 5903 5904 static void poison_map_ldimm64(struct bpf_program *prog, int relo_idx, 5905 int insn_idx, struct bpf_insn *insn, 5906 int map_idx, const struct bpf_map *map) 5907 { 5908 int i; 5909 5910 pr_debug("prog '%s': relo #%d: poisoning insn #%d that loads map #%d '%s'\n", 5911 prog->name, relo_idx, insn_idx, map_idx, map->name); 5912 5913 /* we turn single ldimm64 into two identical invalid calls */ 5914 for (i = 0; i < 2; i++) { 5915 insn->code = BPF_JMP | BPF_CALL; 5916 insn->dst_reg = 0; 5917 insn->src_reg = 0; 5918 insn->off = 0; 5919 /* if this instruction is reachable (not a dead code), 5920 * verifier will complain with something like: 5921 * invalid func unknown#2001000123 5922 * where lower 123 is map index into obj->maps[] array 5923 */ 5924 insn->imm = POISON_LDIMM64_MAP_BASE + map_idx; 5925 5926 insn++; 5927 } 5928 } 5929 5930 /* unresolved kfunc call special constant, used also for log fixup logic */ 5931 #define POISON_CALL_KFUNC_BASE 2002000000 5932 #define POISON_CALL_KFUNC_PFX "2002" 5933 5934 static void poison_kfunc_call(struct bpf_program *prog, int relo_idx, 5935 int insn_idx, struct bpf_insn *insn, 5936 int ext_idx, const struct extern_desc *ext) 5937 { 5938 pr_debug("prog '%s': relo #%d: poisoning insn #%d that calls kfunc '%s'\n", 5939 prog->name, relo_idx, insn_idx, ext->name); 5940 5941 /* we turn kfunc call into invalid helper call with identifiable constant */ 5942 insn->code = BPF_JMP | BPF_CALL; 5943 insn->dst_reg = 0; 5944 insn->src_reg = 0; 5945 insn->off = 0; 5946 /* if this instruction is reachable (not a dead code), 5947 * verifier will complain with something like: 5948 * invalid func unknown#2001000123 5949 * where lower 123 is extern index into obj->externs[] array 5950 */ 5951 insn->imm = POISON_CALL_KFUNC_BASE + ext_idx; 5952 } 5953 5954 /* Relocate data references within program code: 5955 * - map references; 5956 * - global variable references; 5957 * - extern references. 5958 */ 5959 static int 5960 bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog) 5961 { 5962 int i; 5963 5964 for (i = 0; i < prog->nr_reloc; i++) { 5965 struct reloc_desc *relo = &prog->reloc_desc[i]; 5966 struct bpf_insn *insn = &prog->insns[relo->insn_idx]; 5967 const struct bpf_map *map; 5968 struct extern_desc *ext; 5969 5970 switch (relo->type) { 5971 case RELO_LD64: 5972 map = &obj->maps[relo->map_idx]; 5973 if (obj->gen_loader) { 5974 insn[0].src_reg = BPF_PSEUDO_MAP_IDX; 5975 insn[0].imm = relo->map_idx; 5976 } else if (map->autocreate) { 5977 insn[0].src_reg = BPF_PSEUDO_MAP_FD; 5978 insn[0].imm = map->fd; 5979 } else { 5980 poison_map_ldimm64(prog, i, relo->insn_idx, insn, 5981 relo->map_idx, map); 5982 } 5983 break; 5984 case RELO_DATA: 5985 map = &obj->maps[relo->map_idx]; 5986 insn[1].imm = insn[0].imm + relo->sym_off; 5987 if (obj->gen_loader) { 5988 insn[0].src_reg = BPF_PSEUDO_MAP_IDX_VALUE; 5989 insn[0].imm = relo->map_idx; 5990 } else if (map->autocreate) { 5991 insn[0].src_reg = BPF_PSEUDO_MAP_VALUE; 5992 insn[0].imm = map->fd; 5993 } else { 5994 poison_map_ldimm64(prog, i, relo->insn_idx, insn, 5995 relo->map_idx, map); 5996 } 5997 break; 5998 case RELO_EXTERN_LD64: 5999 ext = &obj->externs[relo->ext_idx]; 6000 if (ext->type == EXT_KCFG) { 6001 if (obj->gen_loader) { 6002 insn[0].src_reg = BPF_PSEUDO_MAP_IDX_VALUE; 6003 insn[0].imm = obj->kconfig_map_idx; 6004 } else { 6005 insn[0].src_reg = BPF_PSEUDO_MAP_VALUE; 6006 insn[0].imm = obj->maps[obj->kconfig_map_idx].fd; 6007 } 6008 insn[1].imm = ext->kcfg.data_off; 6009 } else /* EXT_KSYM */ { 6010 if (ext->ksym.type_id && ext->is_set) { /* typed ksyms */ 6011 insn[0].src_reg = BPF_PSEUDO_BTF_ID; 6012 insn[0].imm = ext->ksym.kernel_btf_id; 6013 insn[1].imm = ext->ksym.kernel_btf_obj_fd; 6014 } else { /* typeless ksyms or unresolved typed ksyms */ 6015 insn[0].imm = (__u32)ext->ksym.addr; 6016 insn[1].imm = ext->ksym.addr >> 32; 6017 } 6018 } 6019 break; 6020 case RELO_EXTERN_CALL: 6021 ext = &obj->externs[relo->ext_idx]; 6022 insn[0].src_reg = BPF_PSEUDO_KFUNC_CALL; 6023 if (ext->is_set) { 6024 insn[0].imm = ext->ksym.kernel_btf_id; 6025 insn[0].off = ext->ksym.btf_fd_idx; 6026 } else { /* unresolved weak kfunc call */ 6027 poison_kfunc_call(prog, i, relo->insn_idx, insn, 6028 relo->ext_idx, ext); 6029 } 6030 break; 6031 case RELO_SUBPROG_ADDR: 6032 if (insn[0].src_reg != BPF_PSEUDO_FUNC) { 6033 pr_warn("prog '%s': relo #%d: bad insn\n", 6034 prog->name, i); 6035 return -EINVAL; 6036 } 6037 /* handled already */ 6038 break; 6039 case RELO_CALL: 6040 /* handled already */ 6041 break; 6042 case RELO_CORE: 6043 /* will be handled by bpf_program_record_relos() */ 6044 break; 6045 default: 6046 pr_warn("prog '%s': relo #%d: bad relo type %d\n", 6047 prog->name, i, relo->type); 6048 return -EINVAL; 6049 } 6050 } 6051 6052 return 0; 6053 } 6054 6055 static int adjust_prog_btf_ext_info(const struct bpf_object *obj, 6056 const struct bpf_program *prog, 6057 const struct btf_ext_info *ext_info, 6058 void **prog_info, __u32 *prog_rec_cnt, 6059 __u32 *prog_rec_sz) 6060 { 6061 void *copy_start = NULL, *copy_end = NULL; 6062 void *rec, *rec_end, *new_prog_info; 6063 const struct btf_ext_info_sec *sec; 6064 size_t old_sz, new_sz; 6065 int i, sec_num, sec_idx, off_adj; 6066 6067 sec_num = 0; 6068 for_each_btf_ext_sec(ext_info, sec) { 6069 sec_idx = ext_info->sec_idxs[sec_num]; 6070 sec_num++; 6071 if (prog->sec_idx != sec_idx) 6072 continue; 6073 6074 for_each_btf_ext_rec(ext_info, sec, i, rec) { 6075 __u32 insn_off = *(__u32 *)rec / BPF_INSN_SZ; 6076 6077 if (insn_off < prog->sec_insn_off) 6078 continue; 6079 if (insn_off >= prog->sec_insn_off + prog->sec_insn_cnt) 6080 break; 6081 6082 if (!copy_start) 6083 copy_start = rec; 6084 copy_end = rec + ext_info->rec_size; 6085 } 6086 6087 if (!copy_start) 6088 return -ENOENT; 6089 6090 /* append func/line info of a given (sub-)program to the main 6091 * program func/line info 6092 */ 6093 old_sz = (size_t)(*prog_rec_cnt) * ext_info->rec_size; 6094 new_sz = old_sz + (copy_end - copy_start); 6095 new_prog_info = realloc(*prog_info, new_sz); 6096 if (!new_prog_info) 6097 return -ENOMEM; 6098 *prog_info = new_prog_info; 6099 *prog_rec_cnt = new_sz / ext_info->rec_size; 6100 memcpy(new_prog_info + old_sz, copy_start, copy_end - copy_start); 6101 6102 /* Kernel instruction offsets are in units of 8-byte 6103 * instructions, while .BTF.ext instruction offsets generated 6104 * by Clang are in units of bytes. So convert Clang offsets 6105 * into kernel offsets and adjust offset according to program 6106 * relocated position. 6107 */ 6108 off_adj = prog->sub_insn_off - prog->sec_insn_off; 6109 rec = new_prog_info + old_sz; 6110 rec_end = new_prog_info + new_sz; 6111 for (; rec < rec_end; rec += ext_info->rec_size) { 6112 __u32 *insn_off = rec; 6113 6114 *insn_off = *insn_off / BPF_INSN_SZ + off_adj; 6115 } 6116 *prog_rec_sz = ext_info->rec_size; 6117 return 0; 6118 } 6119 6120 return -ENOENT; 6121 } 6122 6123 static int 6124 reloc_prog_func_and_line_info(const struct bpf_object *obj, 6125 struct bpf_program *main_prog, 6126 const struct bpf_program *prog) 6127 { 6128 int err; 6129 6130 /* no .BTF.ext relocation if .BTF.ext is missing or kernel doesn't 6131 * support func/line info 6132 */ 6133 if (!obj->btf_ext || !kernel_supports(obj, FEAT_BTF_FUNC)) 6134 return 0; 6135 6136 /* only attempt func info relocation if main program's func_info 6137 * relocation was successful 6138 */ 6139 if (main_prog != prog && !main_prog->func_info) 6140 goto line_info; 6141 6142 err = adjust_prog_btf_ext_info(obj, prog, &obj->btf_ext->func_info, 6143 &main_prog->func_info, 6144 &main_prog->func_info_cnt, 6145 &main_prog->func_info_rec_size); 6146 if (err) { 6147 if (err != -ENOENT) { 6148 pr_warn("prog '%s': error relocating .BTF.ext function info: %d\n", 6149 prog->name, err); 6150 return err; 6151 } 6152 if (main_prog->func_info) { 6153 /* 6154 * Some info has already been found but has problem 6155 * in the last btf_ext reloc. Must have to error out. 6156 */ 6157 pr_warn("prog '%s': missing .BTF.ext function info.\n", prog->name); 6158 return err; 6159 } 6160 /* Have problem loading the very first info. Ignore the rest. */ 6161 pr_warn("prog '%s': missing .BTF.ext function info for the main program, skipping all of .BTF.ext func info.\n", 6162 prog->name); 6163 } 6164 6165 line_info: 6166 /* don't relocate line info if main program's relocation failed */ 6167 if (main_prog != prog && !main_prog->line_info) 6168 return 0; 6169 6170 err = adjust_prog_btf_ext_info(obj, prog, &obj->btf_ext->line_info, 6171 &main_prog->line_info, 6172 &main_prog->line_info_cnt, 6173 &main_prog->line_info_rec_size); 6174 if (err) { 6175 if (err != -ENOENT) { 6176 pr_warn("prog '%s': error relocating .BTF.ext line info: %d\n", 6177 prog->name, err); 6178 return err; 6179 } 6180 if (main_prog->line_info) { 6181 /* 6182 * Some info has already been found but has problem 6183 * in the last btf_ext reloc. Must have to error out. 6184 */ 6185 pr_warn("prog '%s': missing .BTF.ext line info.\n", prog->name); 6186 return err; 6187 } 6188 /* Have problem loading the very first info. Ignore the rest. */ 6189 pr_warn("prog '%s': missing .BTF.ext line info for the main program, skipping all of .BTF.ext line info.\n", 6190 prog->name); 6191 } 6192 return 0; 6193 } 6194 6195 static int cmp_relo_by_insn_idx(const void *key, const void *elem) 6196 { 6197 size_t insn_idx = *(const size_t *)key; 6198 const struct reloc_desc *relo = elem; 6199 6200 if (insn_idx == relo->insn_idx) 6201 return 0; 6202 return insn_idx < relo->insn_idx ? -1 : 1; 6203 } 6204 6205 static struct reloc_desc *find_prog_insn_relo(const struct bpf_program *prog, size_t insn_idx) 6206 { 6207 if (!prog->nr_reloc) 6208 return NULL; 6209 return bsearch(&insn_idx, prog->reloc_desc, prog->nr_reloc, 6210 sizeof(*prog->reloc_desc), cmp_relo_by_insn_idx); 6211 } 6212 6213 static int append_subprog_relos(struct bpf_program *main_prog, struct bpf_program *subprog) 6214 { 6215 int new_cnt = main_prog->nr_reloc + subprog->nr_reloc; 6216 struct reloc_desc *relos; 6217 int i; 6218 6219 if (main_prog == subprog) 6220 return 0; 6221 relos = libbpf_reallocarray(main_prog->reloc_desc, new_cnt, sizeof(*relos)); 6222 /* if new count is zero, reallocarray can return a valid NULL result; 6223 * in this case the previous pointer will be freed, so we *have to* 6224 * reassign old pointer to the new value (even if it's NULL) 6225 */ 6226 if (!relos && new_cnt) 6227 return -ENOMEM; 6228 if (subprog->nr_reloc) 6229 memcpy(relos + main_prog->nr_reloc, subprog->reloc_desc, 6230 sizeof(*relos) * subprog->nr_reloc); 6231 6232 for (i = main_prog->nr_reloc; i < new_cnt; i++) 6233 relos[i].insn_idx += subprog->sub_insn_off; 6234 /* After insn_idx adjustment the 'relos' array is still sorted 6235 * by insn_idx and doesn't break bsearch. 6236 */ 6237 main_prog->reloc_desc = relos; 6238 main_prog->nr_reloc = new_cnt; 6239 return 0; 6240 } 6241 6242 static int 6243 bpf_object__append_subprog_code(struct bpf_object *obj, struct bpf_program *main_prog, 6244 struct bpf_program *subprog) 6245 { 6246 struct bpf_insn *insns; 6247 size_t new_cnt; 6248 int err; 6249 6250 subprog->sub_insn_off = main_prog->insns_cnt; 6251 6252 new_cnt = main_prog->insns_cnt + subprog->insns_cnt; 6253 insns = libbpf_reallocarray(main_prog->insns, new_cnt, sizeof(*insns)); 6254 if (!insns) { 6255 pr_warn("prog '%s': failed to realloc prog code\n", main_prog->name); 6256 return -ENOMEM; 6257 } 6258 main_prog->insns = insns; 6259 main_prog->insns_cnt = new_cnt; 6260 6261 memcpy(main_prog->insns + subprog->sub_insn_off, subprog->insns, 6262 subprog->insns_cnt * sizeof(*insns)); 6263 6264 pr_debug("prog '%s': added %zu insns from sub-prog '%s'\n", 6265 main_prog->name, subprog->insns_cnt, subprog->name); 6266 6267 /* The subprog insns are now appended. Append its relos too. */ 6268 err = append_subprog_relos(main_prog, subprog); 6269 if (err) 6270 return err; 6271 return 0; 6272 } 6273 6274 static int 6275 bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog, 6276 struct bpf_program *prog) 6277 { 6278 size_t sub_insn_idx, insn_idx; 6279 struct bpf_program *subprog; 6280 struct reloc_desc *relo; 6281 struct bpf_insn *insn; 6282 int err; 6283 6284 err = reloc_prog_func_and_line_info(obj, main_prog, prog); 6285 if (err) 6286 return err; 6287 6288 for (insn_idx = 0; insn_idx < prog->sec_insn_cnt; insn_idx++) { 6289 insn = &main_prog->insns[prog->sub_insn_off + insn_idx]; 6290 if (!insn_is_subprog_call(insn) && !insn_is_pseudo_func(insn)) 6291 continue; 6292 6293 relo = find_prog_insn_relo(prog, insn_idx); 6294 if (relo && relo->type == RELO_EXTERN_CALL) 6295 /* kfunc relocations will be handled later 6296 * in bpf_object__relocate_data() 6297 */ 6298 continue; 6299 if (relo && relo->type != RELO_CALL && relo->type != RELO_SUBPROG_ADDR) { 6300 pr_warn("prog '%s': unexpected relo for insn #%zu, type %d\n", 6301 prog->name, insn_idx, relo->type); 6302 return -LIBBPF_ERRNO__RELOC; 6303 } 6304 if (relo) { 6305 /* sub-program instruction index is a combination of 6306 * an offset of a symbol pointed to by relocation and 6307 * call instruction's imm field; for global functions, 6308 * call always has imm = -1, but for static functions 6309 * relocation is against STT_SECTION and insn->imm 6310 * points to a start of a static function 6311 * 6312 * for subprog addr relocation, the relo->sym_off + insn->imm is 6313 * the byte offset in the corresponding section. 6314 */ 6315 if (relo->type == RELO_CALL) 6316 sub_insn_idx = relo->sym_off / BPF_INSN_SZ + insn->imm + 1; 6317 else 6318 sub_insn_idx = (relo->sym_off + insn->imm) / BPF_INSN_SZ; 6319 } else if (insn_is_pseudo_func(insn)) { 6320 /* 6321 * RELO_SUBPROG_ADDR relo is always emitted even if both 6322 * functions are in the same section, so it shouldn't reach here. 6323 */ 6324 pr_warn("prog '%s': missing subprog addr relo for insn #%zu\n", 6325 prog->name, insn_idx); 6326 return -LIBBPF_ERRNO__RELOC; 6327 } else { 6328 /* if subprogram call is to a static function within 6329 * the same ELF section, there won't be any relocation 6330 * emitted, but it also means there is no additional 6331 * offset necessary, insns->imm is relative to 6332 * instruction's original position within the section 6333 */ 6334 sub_insn_idx = prog->sec_insn_off + insn_idx + insn->imm + 1; 6335 } 6336 6337 /* we enforce that sub-programs should be in .text section */ 6338 subprog = find_prog_by_sec_insn(obj, obj->efile.text_shndx, sub_insn_idx); 6339 if (!subprog) { 6340 pr_warn("prog '%s': no .text section found yet sub-program call exists\n", 6341 prog->name); 6342 return -LIBBPF_ERRNO__RELOC; 6343 } 6344 6345 /* if it's the first call instruction calling into this 6346 * subprogram (meaning this subprog hasn't been processed 6347 * yet) within the context of current main program: 6348 * - append it at the end of main program's instructions blog; 6349 * - process is recursively, while current program is put on hold; 6350 * - if that subprogram calls some other not yet processes 6351 * subprogram, same thing will happen recursively until 6352 * there are no more unprocesses subprograms left to append 6353 * and relocate. 6354 */ 6355 if (subprog->sub_insn_off == 0) { 6356 err = bpf_object__append_subprog_code(obj, main_prog, subprog); 6357 if (err) 6358 return err; 6359 err = bpf_object__reloc_code(obj, main_prog, subprog); 6360 if (err) 6361 return err; 6362 } 6363 6364 /* main_prog->insns memory could have been re-allocated, so 6365 * calculate pointer again 6366 */ 6367 insn = &main_prog->insns[prog->sub_insn_off + insn_idx]; 6368 /* calculate correct instruction position within current main 6369 * prog; each main prog can have a different set of 6370 * subprograms appended (potentially in different order as 6371 * well), so position of any subprog can be different for 6372 * different main programs 6373 */ 6374 insn->imm = subprog->sub_insn_off - (prog->sub_insn_off + insn_idx) - 1; 6375 6376 pr_debug("prog '%s': insn #%zu relocated, imm %d points to subprog '%s' (now at %zu offset)\n", 6377 prog->name, insn_idx, insn->imm, subprog->name, subprog->sub_insn_off); 6378 } 6379 6380 return 0; 6381 } 6382 6383 /* 6384 * Relocate sub-program calls. 6385 * 6386 * Algorithm operates as follows. Each entry-point BPF program (referred to as 6387 * main prog) is processed separately. For each subprog (non-entry functions, 6388 * that can be called from either entry progs or other subprogs) gets their 6389 * sub_insn_off reset to zero. This serves as indicator that this subprogram 6390 * hasn't been yet appended and relocated within current main prog. Once its 6391 * relocated, sub_insn_off will point at the position within current main prog 6392 * where given subprog was appended. This will further be used to relocate all 6393 * the call instructions jumping into this subprog. 6394 * 6395 * We start with main program and process all call instructions. If the call 6396 * is into a subprog that hasn't been processed (i.e., subprog->sub_insn_off 6397 * is zero), subprog instructions are appended at the end of main program's 6398 * instruction array. Then main program is "put on hold" while we recursively 6399 * process newly appended subprogram. If that subprogram calls into another 6400 * subprogram that hasn't been appended, new subprogram is appended again to 6401 * the *main* prog's instructions (subprog's instructions are always left 6402 * untouched, as they need to be in unmodified state for subsequent main progs 6403 * and subprog instructions are always sent only as part of a main prog) and 6404 * the process continues recursively. Once all the subprogs called from a main 6405 * prog or any of its subprogs are appended (and relocated), all their 6406 * positions within finalized instructions array are known, so it's easy to 6407 * rewrite call instructions with correct relative offsets, corresponding to 6408 * desired target subprog. 6409 * 6410 * Its important to realize that some subprogs might not be called from some 6411 * main prog and any of its called/used subprogs. Those will keep their 6412 * subprog->sub_insn_off as zero at all times and won't be appended to current 6413 * main prog and won't be relocated within the context of current main prog. 6414 * They might still be used from other main progs later. 6415 * 6416 * Visually this process can be shown as below. Suppose we have two main 6417 * programs mainA and mainB and BPF object contains three subprogs: subA, 6418 * subB, and subC. mainA calls only subA, mainB calls only subC, but subA and 6419 * subC both call subB: 6420 * 6421 * +--------+ +-------+ 6422 * | v v | 6423 * +--+---+ +--+-+-+ +---+--+ 6424 * | subA | | subB | | subC | 6425 * +--+---+ +------+ +---+--+ 6426 * ^ ^ 6427 * | | 6428 * +---+-------+ +------+----+ 6429 * | mainA | | mainB | 6430 * +-----------+ +-----------+ 6431 * 6432 * We'll start relocating mainA, will find subA, append it and start 6433 * processing sub A recursively: 6434 * 6435 * +-----------+------+ 6436 * | mainA | subA | 6437 * +-----------+------+ 6438 * 6439 * At this point we notice that subB is used from subA, so we append it and 6440 * relocate (there are no further subcalls from subB): 6441 * 6442 * +-----------+------+------+ 6443 * | mainA | subA | subB | 6444 * +-----------+------+------+ 6445 * 6446 * At this point, we relocate subA calls, then go one level up and finish with 6447 * relocatin mainA calls. mainA is done. 6448 * 6449 * For mainB process is similar but results in different order. We start with 6450 * mainB and skip subA and subB, as mainB never calls them (at least 6451 * directly), but we see subC is needed, so we append and start processing it: 6452 * 6453 * +-----------+------+ 6454 * | mainB | subC | 6455 * +-----------+------+ 6456 * Now we see subC needs subB, so we go back to it, append and relocate it: 6457 * 6458 * +-----------+------+------+ 6459 * | mainB | subC | subB | 6460 * +-----------+------+------+ 6461 * 6462 * At this point we unwind recursion, relocate calls in subC, then in mainB. 6463 */ 6464 static int 6465 bpf_object__relocate_calls(struct bpf_object *obj, struct bpf_program *prog) 6466 { 6467 struct bpf_program *subprog; 6468 int i, err; 6469 6470 /* mark all subprogs as not relocated (yet) within the context of 6471 * current main program 6472 */ 6473 for (i = 0; i < obj->nr_programs; i++) { 6474 subprog = &obj->programs[i]; 6475 if (!prog_is_subprog(obj, subprog)) 6476 continue; 6477 6478 subprog->sub_insn_off = 0; 6479 } 6480 6481 err = bpf_object__reloc_code(obj, prog, prog); 6482 if (err) 6483 return err; 6484 6485 return 0; 6486 } 6487 6488 static void 6489 bpf_object__free_relocs(struct bpf_object *obj) 6490 { 6491 struct bpf_program *prog; 6492 int i; 6493 6494 /* free up relocation descriptors */ 6495 for (i = 0; i < obj->nr_programs; i++) { 6496 prog = &obj->programs[i]; 6497 zfree(&prog->reloc_desc); 6498 prog->nr_reloc = 0; 6499 } 6500 } 6501 6502 static int cmp_relocs(const void *_a, const void *_b) 6503 { 6504 const struct reloc_desc *a = _a; 6505 const struct reloc_desc *b = _b; 6506 6507 if (a->insn_idx != b->insn_idx) 6508 return a->insn_idx < b->insn_idx ? -1 : 1; 6509 6510 /* no two relocations should have the same insn_idx, but ... */ 6511 if (a->type != b->type) 6512 return a->type < b->type ? -1 : 1; 6513 6514 return 0; 6515 } 6516 6517 static void bpf_object__sort_relos(struct bpf_object *obj) 6518 { 6519 int i; 6520 6521 for (i = 0; i < obj->nr_programs; i++) { 6522 struct bpf_program *p = &obj->programs[i]; 6523 6524 if (!p->nr_reloc) 6525 continue; 6526 6527 qsort(p->reloc_desc, p->nr_reloc, sizeof(*p->reloc_desc), cmp_relocs); 6528 } 6529 } 6530 6531 static int bpf_prog_assign_exc_cb(struct bpf_object *obj, struct bpf_program *prog) 6532 { 6533 const char *str = "exception_callback:"; 6534 size_t pfx_len = strlen(str); 6535 int i, j, n; 6536 6537 if (!obj->btf || !kernel_supports(obj, FEAT_BTF_DECL_TAG)) 6538 return 0; 6539 6540 n = btf__type_cnt(obj->btf); 6541 for (i = 1; i < n; i++) { 6542 const char *name; 6543 struct btf_type *t; 6544 6545 t = btf_type_by_id(obj->btf, i); 6546 if (!btf_is_decl_tag(t) || btf_decl_tag(t)->component_idx != -1) 6547 continue; 6548 6549 name = btf__str_by_offset(obj->btf, t->name_off); 6550 if (strncmp(name, str, pfx_len) != 0) 6551 continue; 6552 6553 t = btf_type_by_id(obj->btf, t->type); 6554 if (!btf_is_func(t) || btf_func_linkage(t) != BTF_FUNC_GLOBAL) { 6555 pr_warn("prog '%s': exception_callback:<value> decl tag not applied to the main program\n", 6556 prog->name); 6557 return -EINVAL; 6558 } 6559 if (strcmp(prog->name, btf__str_by_offset(obj->btf, t->name_off)) != 0) 6560 continue; 6561 /* Multiple callbacks are specified for the same prog, 6562 * the verifier will eventually return an error for this 6563 * case, hence simply skip appending a subprog. 6564 */ 6565 if (prog->exception_cb_idx >= 0) { 6566 prog->exception_cb_idx = -1; 6567 break; 6568 } 6569 6570 name += pfx_len; 6571 if (str_is_empty(name)) { 6572 pr_warn("prog '%s': exception_callback:<value> decl tag contains empty value\n", 6573 prog->name); 6574 return -EINVAL; 6575 } 6576 6577 for (j = 0; j < obj->nr_programs; j++) { 6578 struct bpf_program *subprog = &obj->programs[j]; 6579 6580 if (!prog_is_subprog(obj, subprog)) 6581 continue; 6582 if (strcmp(name, subprog->name) != 0) 6583 continue; 6584 /* Enforce non-hidden, as from verifier point of 6585 * view it expects global functions, whereas the 6586 * mark_btf_static fixes up linkage as static. 6587 */ 6588 if (!subprog->sym_global || subprog->mark_btf_static) { 6589 pr_warn("prog '%s': exception callback %s must be a global non-hidden function\n", 6590 prog->name, subprog->name); 6591 return -EINVAL; 6592 } 6593 /* Let's see if we already saw a static exception callback with the same name */ 6594 if (prog->exception_cb_idx >= 0) { 6595 pr_warn("prog '%s': multiple subprogs with same name as exception callback '%s'\n", 6596 prog->name, subprog->name); 6597 return -EINVAL; 6598 } 6599 prog->exception_cb_idx = j; 6600 break; 6601 } 6602 6603 if (prog->exception_cb_idx >= 0) 6604 continue; 6605 6606 pr_warn("prog '%s': cannot find exception callback '%s'\n", prog->name, name); 6607 return -ENOENT; 6608 } 6609 6610 return 0; 6611 } 6612 6613 static struct { 6614 enum bpf_prog_type prog_type; 6615 const char *ctx_name; 6616 } global_ctx_map[] = { 6617 { BPF_PROG_TYPE_CGROUP_DEVICE, "bpf_cgroup_dev_ctx" }, 6618 { BPF_PROG_TYPE_CGROUP_SKB, "__sk_buff" }, 6619 { BPF_PROG_TYPE_CGROUP_SOCK, "bpf_sock" }, 6620 { BPF_PROG_TYPE_CGROUP_SOCK_ADDR, "bpf_sock_addr" }, 6621 { BPF_PROG_TYPE_CGROUP_SOCKOPT, "bpf_sockopt" }, 6622 { BPF_PROG_TYPE_CGROUP_SYSCTL, "bpf_sysctl" }, 6623 { BPF_PROG_TYPE_FLOW_DISSECTOR, "__sk_buff" }, 6624 { BPF_PROG_TYPE_KPROBE, "bpf_user_pt_regs_t" }, 6625 { BPF_PROG_TYPE_LWT_IN, "__sk_buff" }, 6626 { BPF_PROG_TYPE_LWT_OUT, "__sk_buff" }, 6627 { BPF_PROG_TYPE_LWT_SEG6LOCAL, "__sk_buff" }, 6628 { BPF_PROG_TYPE_LWT_XMIT, "__sk_buff" }, 6629 { BPF_PROG_TYPE_NETFILTER, "bpf_nf_ctx" }, 6630 { BPF_PROG_TYPE_PERF_EVENT, "bpf_perf_event_data" }, 6631 { BPF_PROG_TYPE_RAW_TRACEPOINT, "bpf_raw_tracepoint_args" }, 6632 { BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, "bpf_raw_tracepoint_args" }, 6633 { BPF_PROG_TYPE_SCHED_ACT, "__sk_buff" }, 6634 { BPF_PROG_TYPE_SCHED_CLS, "__sk_buff" }, 6635 { BPF_PROG_TYPE_SK_LOOKUP, "bpf_sk_lookup" }, 6636 { BPF_PROG_TYPE_SK_MSG, "sk_msg_md" }, 6637 { BPF_PROG_TYPE_SK_REUSEPORT, "sk_reuseport_md" }, 6638 { BPF_PROG_TYPE_SK_SKB, "__sk_buff" }, 6639 { BPF_PROG_TYPE_SOCK_OPS, "bpf_sock_ops" }, 6640 { BPF_PROG_TYPE_SOCKET_FILTER, "__sk_buff" }, 6641 { BPF_PROG_TYPE_XDP, "xdp_md" }, 6642 /* all other program types don't have "named" context structs */ 6643 }; 6644 6645 /* forward declarations for arch-specific underlying types of bpf_user_pt_regs_t typedef, 6646 * for below __builtin_types_compatible_p() checks; 6647 * with this approach we don't need any extra arch-specific #ifdef guards 6648 */ 6649 struct pt_regs; 6650 struct user_pt_regs; 6651 struct user_regs_struct; 6652 6653 static bool need_func_arg_type_fixup(const struct btf *btf, const struct bpf_program *prog, 6654 const char *subprog_name, int arg_idx, 6655 int arg_type_id, const char *ctx_name) 6656 { 6657 const struct btf_type *t; 6658 const char *tname; 6659 6660 /* check if existing parameter already matches verifier expectations */ 6661 t = skip_mods_and_typedefs(btf, arg_type_id, NULL); 6662 if (!btf_is_ptr(t)) 6663 goto out_warn; 6664 6665 /* typedef bpf_user_pt_regs_t is a special PITA case, valid for kprobe 6666 * and perf_event programs, so check this case early on and forget 6667 * about it for subsequent checks 6668 */ 6669 while (btf_is_mod(t)) 6670 t = btf__type_by_id(btf, t->type); 6671 if (btf_is_typedef(t) && 6672 (prog->type == BPF_PROG_TYPE_KPROBE || prog->type == BPF_PROG_TYPE_PERF_EVENT)) { 6673 tname = btf__str_by_offset(btf, t->name_off) ?: "<anon>"; 6674 if (strcmp(tname, "bpf_user_pt_regs_t") == 0) 6675 return false; /* canonical type for kprobe/perf_event */ 6676 } 6677 6678 /* now we can ignore typedefs moving forward */ 6679 t = skip_mods_and_typedefs(btf, t->type, NULL); 6680 6681 /* if it's `void *`, definitely fix up BTF info */ 6682 if (btf_is_void(t)) 6683 return true; 6684 6685 /* if it's already proper canonical type, no need to fix up */ 6686 tname = btf__str_by_offset(btf, t->name_off) ?: "<anon>"; 6687 if (btf_is_struct(t) && strcmp(tname, ctx_name) == 0) 6688 return false; 6689 6690 /* special cases */ 6691 switch (prog->type) { 6692 case BPF_PROG_TYPE_KPROBE: 6693 /* `struct pt_regs *` is expected, but we need to fix up */ 6694 if (btf_is_struct(t) && strcmp(tname, "pt_regs") == 0) 6695 return true; 6696 break; 6697 case BPF_PROG_TYPE_PERF_EVENT: 6698 if (__builtin_types_compatible_p(bpf_user_pt_regs_t, struct pt_regs) && 6699 btf_is_struct(t) && strcmp(tname, "pt_regs") == 0) 6700 return true; 6701 if (__builtin_types_compatible_p(bpf_user_pt_regs_t, struct user_pt_regs) && 6702 btf_is_struct(t) && strcmp(tname, "user_pt_regs") == 0) 6703 return true; 6704 if (__builtin_types_compatible_p(bpf_user_pt_regs_t, struct user_regs_struct) && 6705 btf_is_struct(t) && strcmp(tname, "user_regs_struct") == 0) 6706 return true; 6707 break; 6708 case BPF_PROG_TYPE_RAW_TRACEPOINT: 6709 case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE: 6710 /* allow u64* as ctx */ 6711 if (btf_is_int(t) && t->size == 8) 6712 return true; 6713 break; 6714 default: 6715 break; 6716 } 6717 6718 out_warn: 6719 pr_warn("prog '%s': subprog '%s' arg#%d is expected to be of `struct %s *` type\n", 6720 prog->name, subprog_name, arg_idx, ctx_name); 6721 return false; 6722 } 6723 6724 static int clone_func_btf_info(struct btf *btf, int orig_fn_id, struct bpf_program *prog) 6725 { 6726 int fn_id, fn_proto_id, ret_type_id, orig_proto_id; 6727 int i, err, arg_cnt, fn_name_off, linkage; 6728 struct btf_type *fn_t, *fn_proto_t, *t; 6729 struct btf_param *p; 6730 6731 /* caller already validated FUNC -> FUNC_PROTO validity */ 6732 fn_t = btf_type_by_id(btf, orig_fn_id); 6733 fn_proto_t = btf_type_by_id(btf, fn_t->type); 6734 6735 /* Note that each btf__add_xxx() operation invalidates 6736 * all btf_type and string pointers, so we need to be 6737 * very careful when cloning BTF types. BTF type 6738 * pointers have to be always refetched. And to avoid 6739 * problems with invalidated string pointers, we 6740 * add empty strings initially, then just fix up 6741 * name_off offsets in place. Offsets are stable for 6742 * existing strings, so that works out. 6743 */ 6744 fn_name_off = fn_t->name_off; /* we are about to invalidate fn_t */ 6745 linkage = btf_func_linkage(fn_t); 6746 orig_proto_id = fn_t->type; /* original FUNC_PROTO ID */ 6747 ret_type_id = fn_proto_t->type; /* fn_proto_t will be invalidated */ 6748 arg_cnt = btf_vlen(fn_proto_t); 6749 6750 /* clone FUNC_PROTO and its params */ 6751 fn_proto_id = btf__add_func_proto(btf, ret_type_id); 6752 if (fn_proto_id < 0) 6753 return -EINVAL; 6754 6755 for (i = 0; i < arg_cnt; i++) { 6756 int name_off; 6757 6758 /* copy original parameter data */ 6759 t = btf_type_by_id(btf, orig_proto_id); 6760 p = &btf_params(t)[i]; 6761 name_off = p->name_off; 6762 6763 err = btf__add_func_param(btf, "", p->type); 6764 if (err) 6765 return err; 6766 6767 fn_proto_t = btf_type_by_id(btf, fn_proto_id); 6768 p = &btf_params(fn_proto_t)[i]; 6769 p->name_off = name_off; /* use remembered str offset */ 6770 } 6771 6772 /* clone FUNC now, btf__add_func() enforces non-empty name, so use 6773 * entry program's name as a placeholder, which we replace immediately 6774 * with original name_off 6775 */ 6776 fn_id = btf__add_func(btf, prog->name, linkage, fn_proto_id); 6777 if (fn_id < 0) 6778 return -EINVAL; 6779 6780 fn_t = btf_type_by_id(btf, fn_id); 6781 fn_t->name_off = fn_name_off; /* reuse original string */ 6782 6783 return fn_id; 6784 } 6785 6786 /* Check if main program or global subprog's function prototype has `arg:ctx` 6787 * argument tags, and, if necessary, substitute correct type to match what BPF 6788 * verifier would expect, taking into account specific program type. This 6789 * allows to support __arg_ctx tag transparently on old kernels that don't yet 6790 * have a native support for it in the verifier, making user's life much 6791 * easier. 6792 */ 6793 static int bpf_program_fixup_func_info(struct bpf_object *obj, struct bpf_program *prog) 6794 { 6795 const char *ctx_name = NULL, *ctx_tag = "arg:ctx", *fn_name; 6796 struct bpf_func_info_min *func_rec; 6797 struct btf_type *fn_t, *fn_proto_t; 6798 struct btf *btf = obj->btf; 6799 const struct btf_type *t; 6800 struct btf_param *p; 6801 int ptr_id = 0, struct_id, tag_id, orig_fn_id; 6802 int i, n, arg_idx, arg_cnt, err, rec_idx; 6803 int *orig_ids; 6804 6805 /* no .BTF.ext, no problem */ 6806 if (!obj->btf_ext || !prog->func_info) 6807 return 0; 6808 6809 /* don't do any fix ups if kernel natively supports __arg_ctx */ 6810 if (kernel_supports(obj, FEAT_ARG_CTX_TAG)) 6811 return 0; 6812 6813 /* some BPF program types just don't have named context structs, so 6814 * this fallback mechanism doesn't work for them 6815 */ 6816 for (i = 0; i < ARRAY_SIZE(global_ctx_map); i++) { 6817 if (global_ctx_map[i].prog_type != prog->type) 6818 continue; 6819 ctx_name = global_ctx_map[i].ctx_name; 6820 break; 6821 } 6822 if (!ctx_name) 6823 return 0; 6824 6825 /* remember original func BTF IDs to detect if we already cloned them */ 6826 orig_ids = calloc(prog->func_info_cnt, sizeof(*orig_ids)); 6827 if (!orig_ids) 6828 return -ENOMEM; 6829 for (i = 0; i < prog->func_info_cnt; i++) { 6830 func_rec = prog->func_info + prog->func_info_rec_size * i; 6831 orig_ids[i] = func_rec->type_id; 6832 } 6833 6834 /* go through each DECL_TAG with "arg:ctx" and see if it points to one 6835 * of our subprogs; if yes and subprog is global and needs adjustment, 6836 * clone and adjust FUNC -> FUNC_PROTO combo 6837 */ 6838 for (i = 1, n = btf__type_cnt(btf); i < n; i++) { 6839 /* only DECL_TAG with "arg:ctx" value are interesting */ 6840 t = btf__type_by_id(btf, i); 6841 if (!btf_is_decl_tag(t)) 6842 continue; 6843 if (strcmp(btf__str_by_offset(btf, t->name_off), ctx_tag) != 0) 6844 continue; 6845 6846 /* only global funcs need adjustment, if at all */ 6847 orig_fn_id = t->type; 6848 fn_t = btf_type_by_id(btf, orig_fn_id); 6849 if (!btf_is_func(fn_t) || btf_func_linkage(fn_t) != BTF_FUNC_GLOBAL) 6850 continue; 6851 6852 /* sanity check FUNC -> FUNC_PROTO chain, just in case */ 6853 fn_proto_t = btf_type_by_id(btf, fn_t->type); 6854 if (!fn_proto_t || !btf_is_func_proto(fn_proto_t)) 6855 continue; 6856 6857 /* find corresponding func_info record */ 6858 func_rec = NULL; 6859 for (rec_idx = 0; rec_idx < prog->func_info_cnt; rec_idx++) { 6860 if (orig_ids[rec_idx] == t->type) { 6861 func_rec = prog->func_info + prog->func_info_rec_size * rec_idx; 6862 break; 6863 } 6864 } 6865 /* current main program doesn't call into this subprog */ 6866 if (!func_rec) 6867 continue; 6868 6869 /* some more sanity checking of DECL_TAG */ 6870 arg_cnt = btf_vlen(fn_proto_t); 6871 arg_idx = btf_decl_tag(t)->component_idx; 6872 if (arg_idx < 0 || arg_idx >= arg_cnt) 6873 continue; 6874 6875 /* check if we should fix up argument type */ 6876 p = &btf_params(fn_proto_t)[arg_idx]; 6877 fn_name = btf__str_by_offset(btf, fn_t->name_off) ?: "<anon>"; 6878 if (!need_func_arg_type_fixup(btf, prog, fn_name, arg_idx, p->type, ctx_name)) 6879 continue; 6880 6881 /* clone fn/fn_proto, unless we already did it for another arg */ 6882 if (func_rec->type_id == orig_fn_id) { 6883 int fn_id; 6884 6885 fn_id = clone_func_btf_info(btf, orig_fn_id, prog); 6886 if (fn_id < 0) { 6887 err = fn_id; 6888 goto err_out; 6889 } 6890 6891 /* point func_info record to a cloned FUNC type */ 6892 func_rec->type_id = fn_id; 6893 } 6894 6895 /* create PTR -> STRUCT type chain to mark PTR_TO_CTX argument; 6896 * we do it just once per main BPF program, as all global 6897 * funcs share the same program type, so need only PTR -> 6898 * STRUCT type chain 6899 */ 6900 if (ptr_id == 0) { 6901 struct_id = btf__add_struct(btf, ctx_name, 0); 6902 ptr_id = btf__add_ptr(btf, struct_id); 6903 if (ptr_id < 0 || struct_id < 0) { 6904 err = -EINVAL; 6905 goto err_out; 6906 } 6907 } 6908 6909 /* for completeness, clone DECL_TAG and point it to cloned param */ 6910 tag_id = btf__add_decl_tag(btf, ctx_tag, func_rec->type_id, arg_idx); 6911 if (tag_id < 0) { 6912 err = -EINVAL; 6913 goto err_out; 6914 } 6915 6916 /* all the BTF manipulations invalidated pointers, refetch them */ 6917 fn_t = btf_type_by_id(btf, func_rec->type_id); 6918 fn_proto_t = btf_type_by_id(btf, fn_t->type); 6919 6920 /* fix up type ID pointed to by param */ 6921 p = &btf_params(fn_proto_t)[arg_idx]; 6922 p->type = ptr_id; 6923 } 6924 6925 free(orig_ids); 6926 return 0; 6927 err_out: 6928 free(orig_ids); 6929 return err; 6930 } 6931 6932 static int bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) 6933 { 6934 struct bpf_program *prog; 6935 size_t i, j; 6936 int err; 6937 6938 if (obj->btf_ext) { 6939 err = bpf_object__relocate_core(obj, targ_btf_path); 6940 if (err) { 6941 pr_warn("failed to perform CO-RE relocations: %d\n", 6942 err); 6943 return err; 6944 } 6945 bpf_object__sort_relos(obj); 6946 } 6947 6948 /* Before relocating calls pre-process relocations and mark 6949 * few ld_imm64 instructions that points to subprogs. 6950 * Otherwise bpf_object__reloc_code() later would have to consider 6951 * all ld_imm64 insns as relocation candidates. That would 6952 * reduce relocation speed, since amount of find_prog_insn_relo() 6953 * would increase and most of them will fail to find a relo. 6954 */ 6955 for (i = 0; i < obj->nr_programs; i++) { 6956 prog = &obj->programs[i]; 6957 for (j = 0; j < prog->nr_reloc; j++) { 6958 struct reloc_desc *relo = &prog->reloc_desc[j]; 6959 struct bpf_insn *insn = &prog->insns[relo->insn_idx]; 6960 6961 /* mark the insn, so it's recognized by insn_is_pseudo_func() */ 6962 if (relo->type == RELO_SUBPROG_ADDR) 6963 insn[0].src_reg = BPF_PSEUDO_FUNC; 6964 } 6965 } 6966 6967 /* relocate subprogram calls and append used subprograms to main 6968 * programs; each copy of subprogram code needs to be relocated 6969 * differently for each main program, because its code location might 6970 * have changed. 6971 * Append subprog relos to main programs to allow data relos to be 6972 * processed after text is completely relocated. 6973 */ 6974 for (i = 0; i < obj->nr_programs; i++) { 6975 prog = &obj->programs[i]; 6976 /* sub-program's sub-calls are relocated within the context of 6977 * its main program only 6978 */ 6979 if (prog_is_subprog(obj, prog)) 6980 continue; 6981 if (!prog->autoload) 6982 continue; 6983 6984 err = bpf_object__relocate_calls(obj, prog); 6985 if (err) { 6986 pr_warn("prog '%s': failed to relocate calls: %d\n", 6987 prog->name, err); 6988 return err; 6989 } 6990 6991 err = bpf_prog_assign_exc_cb(obj, prog); 6992 if (err) 6993 return err; 6994 /* Now, also append exception callback if it has not been done already. */ 6995 if (prog->exception_cb_idx >= 0) { 6996 struct bpf_program *subprog = &obj->programs[prog->exception_cb_idx]; 6997 6998 /* Calling exception callback directly is disallowed, which the 6999 * verifier will reject later. In case it was processed already, 7000 * we can skip this step, otherwise for all other valid cases we 7001 * have to append exception callback now. 7002 */ 7003 if (subprog->sub_insn_off == 0) { 7004 err = bpf_object__append_subprog_code(obj, prog, subprog); 7005 if (err) 7006 return err; 7007 err = bpf_object__reloc_code(obj, prog, subprog); 7008 if (err) 7009 return err; 7010 } 7011 } 7012 } 7013 for (i = 0; i < obj->nr_programs; i++) { 7014 prog = &obj->programs[i]; 7015 if (prog_is_subprog(obj, prog)) 7016 continue; 7017 if (!prog->autoload) 7018 continue; 7019 7020 /* Process data relos for main programs */ 7021 err = bpf_object__relocate_data(obj, prog); 7022 if (err) { 7023 pr_warn("prog '%s': failed to relocate data references: %d\n", 7024 prog->name, err); 7025 return err; 7026 } 7027 7028 /* Fix up .BTF.ext information, if necessary */ 7029 err = bpf_program_fixup_func_info(obj, prog); 7030 if (err) { 7031 pr_warn("prog '%s': failed to perform .BTF.ext fix ups: %d\n", 7032 prog->name, err); 7033 return err; 7034 } 7035 } 7036 7037 return 0; 7038 } 7039 7040 static int bpf_object__collect_st_ops_relos(struct bpf_object *obj, 7041 Elf64_Shdr *shdr, Elf_Data *data); 7042 7043 static int bpf_object__collect_map_relos(struct bpf_object *obj, 7044 Elf64_Shdr *shdr, Elf_Data *data) 7045 { 7046 const int bpf_ptr_sz = 8, host_ptr_sz = sizeof(void *); 7047 int i, j, nrels, new_sz; 7048 const struct btf_var_secinfo *vi = NULL; 7049 const struct btf_type *sec, *var, *def; 7050 struct bpf_map *map = NULL, *targ_map = NULL; 7051 struct bpf_program *targ_prog = NULL; 7052 bool is_prog_array, is_map_in_map; 7053 const struct btf_member *member; 7054 const char *name, *mname, *type; 7055 unsigned int moff; 7056 Elf64_Sym *sym; 7057 Elf64_Rel *rel; 7058 void *tmp; 7059 7060 if (!obj->efile.btf_maps_sec_btf_id || !obj->btf) 7061 return -EINVAL; 7062 sec = btf__type_by_id(obj->btf, obj->efile.btf_maps_sec_btf_id); 7063 if (!sec) 7064 return -EINVAL; 7065 7066 nrels = shdr->sh_size / shdr->sh_entsize; 7067 for (i = 0; i < nrels; i++) { 7068 rel = elf_rel_by_idx(data, i); 7069 if (!rel) { 7070 pr_warn(".maps relo #%d: failed to get ELF relo\n", i); 7071 return -LIBBPF_ERRNO__FORMAT; 7072 } 7073 7074 sym = elf_sym_by_idx(obj, ELF64_R_SYM(rel->r_info)); 7075 if (!sym) { 7076 pr_warn(".maps relo #%d: symbol %zx not found\n", 7077 i, (size_t)ELF64_R_SYM(rel->r_info)); 7078 return -LIBBPF_ERRNO__FORMAT; 7079 } 7080 name = elf_sym_str(obj, sym->st_name) ?: "<?>"; 7081 7082 pr_debug(".maps relo #%d: for %zd value %zd rel->r_offset %zu name %d ('%s')\n", 7083 i, (ssize_t)(rel->r_info >> 32), (size_t)sym->st_value, 7084 (size_t)rel->r_offset, sym->st_name, name); 7085 7086 for (j = 0; j < obj->nr_maps; j++) { 7087 map = &obj->maps[j]; 7088 if (map->sec_idx != obj->efile.btf_maps_shndx) 7089 continue; 7090 7091 vi = btf_var_secinfos(sec) + map->btf_var_idx; 7092 if (vi->offset <= rel->r_offset && 7093 rel->r_offset + bpf_ptr_sz <= vi->offset + vi->size) 7094 break; 7095 } 7096 if (j == obj->nr_maps) { 7097 pr_warn(".maps relo #%d: cannot find map '%s' at rel->r_offset %zu\n", 7098 i, name, (size_t)rel->r_offset); 7099 return -EINVAL; 7100 } 7101 7102 is_map_in_map = bpf_map_type__is_map_in_map(map->def.type); 7103 is_prog_array = map->def.type == BPF_MAP_TYPE_PROG_ARRAY; 7104 type = is_map_in_map ? "map" : "prog"; 7105 if (is_map_in_map) { 7106 if (sym->st_shndx != obj->efile.btf_maps_shndx) { 7107 pr_warn(".maps relo #%d: '%s' isn't a BTF-defined map\n", 7108 i, name); 7109 return -LIBBPF_ERRNO__RELOC; 7110 } 7111 if (map->def.type == BPF_MAP_TYPE_HASH_OF_MAPS && 7112 map->def.key_size != sizeof(int)) { 7113 pr_warn(".maps relo #%d: hash-of-maps '%s' should have key size %zu.\n", 7114 i, map->name, sizeof(int)); 7115 return -EINVAL; 7116 } 7117 targ_map = bpf_object__find_map_by_name(obj, name); 7118 if (!targ_map) { 7119 pr_warn(".maps relo #%d: '%s' isn't a valid map reference\n", 7120 i, name); 7121 return -ESRCH; 7122 } 7123 } else if (is_prog_array) { 7124 targ_prog = bpf_object__find_program_by_name(obj, name); 7125 if (!targ_prog) { 7126 pr_warn(".maps relo #%d: '%s' isn't a valid program reference\n", 7127 i, name); 7128 return -ESRCH; 7129 } 7130 if (targ_prog->sec_idx != sym->st_shndx || 7131 targ_prog->sec_insn_off * 8 != sym->st_value || 7132 prog_is_subprog(obj, targ_prog)) { 7133 pr_warn(".maps relo #%d: '%s' isn't an entry-point program\n", 7134 i, name); 7135 return -LIBBPF_ERRNO__RELOC; 7136 } 7137 } else { 7138 return -EINVAL; 7139 } 7140 7141 var = btf__type_by_id(obj->btf, vi->type); 7142 def = skip_mods_and_typedefs(obj->btf, var->type, NULL); 7143 if (btf_vlen(def) == 0) 7144 return -EINVAL; 7145 member = btf_members(def) + btf_vlen(def) - 1; 7146 mname = btf__name_by_offset(obj->btf, member->name_off); 7147 if (strcmp(mname, "values")) 7148 return -EINVAL; 7149 7150 moff = btf_member_bit_offset(def, btf_vlen(def) - 1) / 8; 7151 if (rel->r_offset - vi->offset < moff) 7152 return -EINVAL; 7153 7154 moff = rel->r_offset - vi->offset - moff; 7155 /* here we use BPF pointer size, which is always 64 bit, as we 7156 * are parsing ELF that was built for BPF target 7157 */ 7158 if (moff % bpf_ptr_sz) 7159 return -EINVAL; 7160 moff /= bpf_ptr_sz; 7161 if (moff >= map->init_slots_sz) { 7162 new_sz = moff + 1; 7163 tmp = libbpf_reallocarray(map->init_slots, new_sz, host_ptr_sz); 7164 if (!tmp) 7165 return -ENOMEM; 7166 map->init_slots = tmp; 7167 memset(map->init_slots + map->init_slots_sz, 0, 7168 (new_sz - map->init_slots_sz) * host_ptr_sz); 7169 map->init_slots_sz = new_sz; 7170 } 7171 map->init_slots[moff] = is_map_in_map ? (void *)targ_map : (void *)targ_prog; 7172 7173 pr_debug(".maps relo #%d: map '%s' slot [%d] points to %s '%s'\n", 7174 i, map->name, moff, type, name); 7175 } 7176 7177 return 0; 7178 } 7179 7180 static int bpf_object__collect_relos(struct bpf_object *obj) 7181 { 7182 int i, err; 7183 7184 for (i = 0; i < obj->efile.sec_cnt; i++) { 7185 struct elf_sec_desc *sec_desc = &obj->efile.secs[i]; 7186 Elf64_Shdr *shdr; 7187 Elf_Data *data; 7188 int idx; 7189 7190 if (sec_desc->sec_type != SEC_RELO) 7191 continue; 7192 7193 shdr = sec_desc->shdr; 7194 data = sec_desc->data; 7195 idx = shdr->sh_info; 7196 7197 if (shdr->sh_type != SHT_REL || idx < 0 || idx >= obj->efile.sec_cnt) { 7198 pr_warn("internal error at %d\n", __LINE__); 7199 return -LIBBPF_ERRNO__INTERNAL; 7200 } 7201 7202 if (obj->efile.secs[idx].sec_type == SEC_ST_OPS) 7203 err = bpf_object__collect_st_ops_relos(obj, shdr, data); 7204 else if (idx == obj->efile.btf_maps_shndx) 7205 err = bpf_object__collect_map_relos(obj, shdr, data); 7206 else 7207 err = bpf_object__collect_prog_relos(obj, shdr, data); 7208 if (err) 7209 return err; 7210 } 7211 7212 bpf_object__sort_relos(obj); 7213 return 0; 7214 } 7215 7216 static bool insn_is_helper_call(struct bpf_insn *insn, enum bpf_func_id *func_id) 7217 { 7218 if (BPF_CLASS(insn->code) == BPF_JMP && 7219 BPF_OP(insn->code) == BPF_CALL && 7220 BPF_SRC(insn->code) == BPF_K && 7221 insn->src_reg == 0 && 7222 insn->dst_reg == 0) { 7223 *func_id = insn->imm; 7224 return true; 7225 } 7226 return false; 7227 } 7228 7229 static int bpf_object__sanitize_prog(struct bpf_object *obj, struct bpf_program *prog) 7230 { 7231 struct bpf_insn *insn = prog->insns; 7232 enum bpf_func_id func_id; 7233 int i; 7234 7235 if (obj->gen_loader) 7236 return 0; 7237 7238 for (i = 0; i < prog->insns_cnt; i++, insn++) { 7239 if (!insn_is_helper_call(insn, &func_id)) 7240 continue; 7241 7242 /* on kernels that don't yet support 7243 * bpf_probe_read_{kernel,user}[_str] helpers, fall back 7244 * to bpf_probe_read() which works well for old kernels 7245 */ 7246 switch (func_id) { 7247 case BPF_FUNC_probe_read_kernel: 7248 case BPF_FUNC_probe_read_user: 7249 if (!kernel_supports(obj, FEAT_PROBE_READ_KERN)) 7250 insn->imm = BPF_FUNC_probe_read; 7251 break; 7252 case BPF_FUNC_probe_read_kernel_str: 7253 case BPF_FUNC_probe_read_user_str: 7254 if (!kernel_supports(obj, FEAT_PROBE_READ_KERN)) 7255 insn->imm = BPF_FUNC_probe_read_str; 7256 break; 7257 default: 7258 break; 7259 } 7260 } 7261 return 0; 7262 } 7263 7264 static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attach_name, 7265 int *btf_obj_fd, int *btf_type_id); 7266 7267 /* this is called as prog->sec_def->prog_prepare_load_fn for libbpf-supported sec_defs */ 7268 static int libbpf_prepare_prog_load(struct bpf_program *prog, 7269 struct bpf_prog_load_opts *opts, long cookie) 7270 { 7271 enum sec_def_flags def = cookie; 7272 7273 /* old kernels might not support specifying expected_attach_type */ 7274 if ((def & SEC_EXP_ATTACH_OPT) && !kernel_supports(prog->obj, FEAT_EXP_ATTACH_TYPE)) 7275 opts->expected_attach_type = 0; 7276 7277 if (def & SEC_SLEEPABLE) 7278 opts->prog_flags |= BPF_F_SLEEPABLE; 7279 7280 if (prog->type == BPF_PROG_TYPE_XDP && (def & SEC_XDP_FRAGS)) 7281 opts->prog_flags |= BPF_F_XDP_HAS_FRAGS; 7282 7283 /* special check for usdt to use uprobe_multi link */ 7284 if ((def & SEC_USDT) && kernel_supports(prog->obj, FEAT_UPROBE_MULTI_LINK)) 7285 prog->expected_attach_type = BPF_TRACE_UPROBE_MULTI; 7286 7287 if ((def & SEC_ATTACH_BTF) && !prog->attach_btf_id) { 7288 int btf_obj_fd = 0, btf_type_id = 0, err; 7289 const char *attach_name; 7290 7291 attach_name = strchr(prog->sec_name, '/'); 7292 if (!attach_name) { 7293 /* if BPF program is annotated with just SEC("fentry") 7294 * (or similar) without declaratively specifying 7295 * target, then it is expected that target will be 7296 * specified with bpf_program__set_attach_target() at 7297 * runtime before BPF object load step. If not, then 7298 * there is nothing to load into the kernel as BPF 7299 * verifier won't be able to validate BPF program 7300 * correctness anyways. 7301 */ 7302 pr_warn("prog '%s': no BTF-based attach target is specified, use bpf_program__set_attach_target()\n", 7303 prog->name); 7304 return -EINVAL; 7305 } 7306 attach_name++; /* skip over / */ 7307 7308 err = libbpf_find_attach_btf_id(prog, attach_name, &btf_obj_fd, &btf_type_id); 7309 if (err) 7310 return err; 7311 7312 /* cache resolved BTF FD and BTF type ID in the prog */ 7313 prog->attach_btf_obj_fd = btf_obj_fd; 7314 prog->attach_btf_id = btf_type_id; 7315 7316 /* but by now libbpf common logic is not utilizing 7317 * prog->atach_btf_obj_fd/prog->attach_btf_id anymore because 7318 * this callback is called after opts were populated by 7319 * libbpf, so this callback has to update opts explicitly here 7320 */ 7321 opts->attach_btf_obj_fd = btf_obj_fd; 7322 opts->attach_btf_id = btf_type_id; 7323 } 7324 return 0; 7325 } 7326 7327 static void fixup_verifier_log(struct bpf_program *prog, char *buf, size_t buf_sz); 7328 7329 static int bpf_object_load_prog(struct bpf_object *obj, struct bpf_program *prog, 7330 struct bpf_insn *insns, int insns_cnt, 7331 const char *license, __u32 kern_version, int *prog_fd) 7332 { 7333 LIBBPF_OPTS(bpf_prog_load_opts, load_attr); 7334 const char *prog_name = NULL; 7335 char *cp, errmsg[STRERR_BUFSIZE]; 7336 size_t log_buf_size = 0; 7337 char *log_buf = NULL, *tmp; 7338 bool own_log_buf = true; 7339 __u32 log_level = prog->log_level; 7340 int ret, err; 7341 7342 if (prog->type == BPF_PROG_TYPE_UNSPEC) { 7343 /* 7344 * The program type must be set. Most likely we couldn't find a proper 7345 * section definition at load time, and thus we didn't infer the type. 7346 */ 7347 pr_warn("prog '%s': missing BPF prog type, check ELF section name '%s'\n", 7348 prog->name, prog->sec_name); 7349 return -EINVAL; 7350 } 7351 7352 if (!insns || !insns_cnt) 7353 return -EINVAL; 7354 7355 if (kernel_supports(obj, FEAT_PROG_NAME)) 7356 prog_name = prog->name; 7357 load_attr.attach_prog_fd = prog->attach_prog_fd; 7358 load_attr.attach_btf_obj_fd = prog->attach_btf_obj_fd; 7359 load_attr.attach_btf_id = prog->attach_btf_id; 7360 load_attr.kern_version = kern_version; 7361 load_attr.prog_ifindex = prog->prog_ifindex; 7362 7363 /* specify func_info/line_info only if kernel supports them */ 7364 if (obj->btf && btf__fd(obj->btf) >= 0 && kernel_supports(obj, FEAT_BTF_FUNC)) { 7365 load_attr.prog_btf_fd = btf__fd(obj->btf); 7366 load_attr.func_info = prog->func_info; 7367 load_attr.func_info_rec_size = prog->func_info_rec_size; 7368 load_attr.func_info_cnt = prog->func_info_cnt; 7369 load_attr.line_info = prog->line_info; 7370 load_attr.line_info_rec_size = prog->line_info_rec_size; 7371 load_attr.line_info_cnt = prog->line_info_cnt; 7372 } 7373 load_attr.log_level = log_level; 7374 load_attr.prog_flags = prog->prog_flags; 7375 load_attr.fd_array = obj->fd_array; 7376 7377 load_attr.token_fd = obj->token_fd; 7378 if (obj->token_fd) 7379 load_attr.prog_flags |= BPF_F_TOKEN_FD; 7380 7381 /* adjust load_attr if sec_def provides custom preload callback */ 7382 if (prog->sec_def && prog->sec_def->prog_prepare_load_fn) { 7383 err = prog->sec_def->prog_prepare_load_fn(prog, &load_attr, prog->sec_def->cookie); 7384 if (err < 0) { 7385 pr_warn("prog '%s': failed to prepare load attributes: %d\n", 7386 prog->name, err); 7387 return err; 7388 } 7389 insns = prog->insns; 7390 insns_cnt = prog->insns_cnt; 7391 } 7392 7393 /* allow prog_prepare_load_fn to change expected_attach_type */ 7394 load_attr.expected_attach_type = prog->expected_attach_type; 7395 7396 if (obj->gen_loader) { 7397 bpf_gen__prog_load(obj->gen_loader, prog->type, prog->name, 7398 license, insns, insns_cnt, &load_attr, 7399 prog - obj->programs); 7400 *prog_fd = -1; 7401 return 0; 7402 } 7403 7404 retry_load: 7405 /* if log_level is zero, we don't request logs initially even if 7406 * custom log_buf is specified; if the program load fails, then we'll 7407 * bump log_level to 1 and use either custom log_buf or we'll allocate 7408 * our own and retry the load to get details on what failed 7409 */ 7410 if (log_level) { 7411 if (prog->log_buf) { 7412 log_buf = prog->log_buf; 7413 log_buf_size = prog->log_size; 7414 own_log_buf = false; 7415 } else if (obj->log_buf) { 7416 log_buf = obj->log_buf; 7417 log_buf_size = obj->log_size; 7418 own_log_buf = false; 7419 } else { 7420 log_buf_size = max((size_t)BPF_LOG_BUF_SIZE, log_buf_size * 2); 7421 tmp = realloc(log_buf, log_buf_size); 7422 if (!tmp) { 7423 ret = -ENOMEM; 7424 goto out; 7425 } 7426 log_buf = tmp; 7427 log_buf[0] = '\0'; 7428 own_log_buf = true; 7429 } 7430 } 7431 7432 load_attr.log_buf = log_buf; 7433 load_attr.log_size = log_buf_size; 7434 load_attr.log_level = log_level; 7435 7436 ret = bpf_prog_load(prog->type, prog_name, license, insns, insns_cnt, &load_attr); 7437 if (ret >= 0) { 7438 if (log_level && own_log_buf) { 7439 pr_debug("prog '%s': -- BEGIN PROG LOAD LOG --\n%s-- END PROG LOAD LOG --\n", 7440 prog->name, log_buf); 7441 } 7442 7443 if (obj->has_rodata && kernel_supports(obj, FEAT_PROG_BIND_MAP)) { 7444 struct bpf_map *map; 7445 int i; 7446 7447 for (i = 0; i < obj->nr_maps; i++) { 7448 map = &prog->obj->maps[i]; 7449 if (map->libbpf_type != LIBBPF_MAP_RODATA) 7450 continue; 7451 7452 if (bpf_prog_bind_map(ret, map->fd, NULL)) { 7453 cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); 7454 pr_warn("prog '%s': failed to bind map '%s': %s\n", 7455 prog->name, map->real_name, cp); 7456 /* Don't fail hard if can't bind rodata. */ 7457 } 7458 } 7459 } 7460 7461 *prog_fd = ret; 7462 ret = 0; 7463 goto out; 7464 } 7465 7466 if (log_level == 0) { 7467 log_level = 1; 7468 goto retry_load; 7469 } 7470 /* On ENOSPC, increase log buffer size and retry, unless custom 7471 * log_buf is specified. 7472 * Be careful to not overflow u32, though. Kernel's log buf size limit 7473 * isn't part of UAPI so it can always be bumped to full 4GB. So don't 7474 * multiply by 2 unless we are sure we'll fit within 32 bits. 7475 * Currently, we'll get -EINVAL when we reach (UINT_MAX >> 2). 7476 */ 7477 if (own_log_buf && errno == ENOSPC && log_buf_size <= UINT_MAX / 2) 7478 goto retry_load; 7479 7480 ret = -errno; 7481 7482 /* post-process verifier log to improve error descriptions */ 7483 fixup_verifier_log(prog, log_buf, log_buf_size); 7484 7485 cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); 7486 pr_warn("prog '%s': BPF program load failed: %s\n", prog->name, cp); 7487 pr_perm_msg(ret); 7488 7489 if (own_log_buf && log_buf && log_buf[0] != '\0') { 7490 pr_warn("prog '%s': -- BEGIN PROG LOAD LOG --\n%s-- END PROG LOAD LOG --\n", 7491 prog->name, log_buf); 7492 } 7493 7494 out: 7495 if (own_log_buf) 7496 free(log_buf); 7497 return ret; 7498 } 7499 7500 static char *find_prev_line(char *buf, char *cur) 7501 { 7502 char *p; 7503 7504 if (cur == buf) /* end of a log buf */ 7505 return NULL; 7506 7507 p = cur - 1; 7508 while (p - 1 >= buf && *(p - 1) != '\n') 7509 p--; 7510 7511 return p; 7512 } 7513 7514 static void patch_log(char *buf, size_t buf_sz, size_t log_sz, 7515 char *orig, size_t orig_sz, const char *patch) 7516 { 7517 /* size of the remaining log content to the right from the to-be-replaced part */ 7518 size_t rem_sz = (buf + log_sz) - (orig + orig_sz); 7519 size_t patch_sz = strlen(patch); 7520 7521 if (patch_sz != orig_sz) { 7522 /* If patch line(s) are longer than original piece of verifier log, 7523 * shift log contents by (patch_sz - orig_sz) bytes to the right 7524 * starting from after to-be-replaced part of the log. 7525 * 7526 * If patch line(s) are shorter than original piece of verifier log, 7527 * shift log contents by (orig_sz - patch_sz) bytes to the left 7528 * starting from after to-be-replaced part of the log 7529 * 7530 * We need to be careful about not overflowing available 7531 * buf_sz capacity. If that's the case, we'll truncate the end 7532 * of the original log, as necessary. 7533 */ 7534 if (patch_sz > orig_sz) { 7535 if (orig + patch_sz >= buf + buf_sz) { 7536 /* patch is big enough to cover remaining space completely */ 7537 patch_sz -= (orig + patch_sz) - (buf + buf_sz) + 1; 7538 rem_sz = 0; 7539 } else if (patch_sz - orig_sz > buf_sz - log_sz) { 7540 /* patch causes part of remaining log to be truncated */ 7541 rem_sz -= (patch_sz - orig_sz) - (buf_sz - log_sz); 7542 } 7543 } 7544 /* shift remaining log to the right by calculated amount */ 7545 memmove(orig + patch_sz, orig + orig_sz, rem_sz); 7546 } 7547 7548 memcpy(orig, patch, patch_sz); 7549 } 7550 7551 static void fixup_log_failed_core_relo(struct bpf_program *prog, 7552 char *buf, size_t buf_sz, size_t log_sz, 7553 char *line1, char *line2, char *line3) 7554 { 7555 /* Expected log for failed and not properly guarded CO-RE relocation: 7556 * line1 -> 123: (85) call unknown#195896080 7557 * line2 -> invalid func unknown#195896080 7558 * line3 -> <anything else or end of buffer> 7559 * 7560 * "123" is the index of the instruction that was poisoned. We extract 7561 * instruction index to find corresponding CO-RE relocation and 7562 * replace this part of the log with more relevant information about 7563 * failed CO-RE relocation. 7564 */ 7565 const struct bpf_core_relo *relo; 7566 struct bpf_core_spec spec; 7567 char patch[512], spec_buf[256]; 7568 int insn_idx, err, spec_len; 7569 7570 if (sscanf(line1, "%d: (%*d) call unknown#195896080\n", &insn_idx) != 1) 7571 return; 7572 7573 relo = find_relo_core(prog, insn_idx); 7574 if (!relo) 7575 return; 7576 7577 err = bpf_core_parse_spec(prog->name, prog->obj->btf, relo, &spec); 7578 if (err) 7579 return; 7580 7581 spec_len = bpf_core_format_spec(spec_buf, sizeof(spec_buf), &spec); 7582 snprintf(patch, sizeof(patch), 7583 "%d: <invalid CO-RE relocation>\n" 7584 "failed to resolve CO-RE relocation %s%s\n", 7585 insn_idx, spec_buf, spec_len >= sizeof(spec_buf) ? "..." : ""); 7586 7587 patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch); 7588 } 7589 7590 static void fixup_log_missing_map_load(struct bpf_program *prog, 7591 char *buf, size_t buf_sz, size_t log_sz, 7592 char *line1, char *line2, char *line3) 7593 { 7594 /* Expected log for failed and not properly guarded map reference: 7595 * line1 -> 123: (85) call unknown#2001000345 7596 * line2 -> invalid func unknown#2001000345 7597 * line3 -> <anything else or end of buffer> 7598 * 7599 * "123" is the index of the instruction that was poisoned. 7600 * "345" in "2001000345" is a map index in obj->maps to fetch map name. 7601 */ 7602 struct bpf_object *obj = prog->obj; 7603 const struct bpf_map *map; 7604 int insn_idx, map_idx; 7605 char patch[128]; 7606 7607 if (sscanf(line1, "%d: (%*d) call unknown#%d\n", &insn_idx, &map_idx) != 2) 7608 return; 7609 7610 map_idx -= POISON_LDIMM64_MAP_BASE; 7611 if (map_idx < 0 || map_idx >= obj->nr_maps) 7612 return; 7613 map = &obj->maps[map_idx]; 7614 7615 snprintf(patch, sizeof(patch), 7616 "%d: <invalid BPF map reference>\n" 7617 "BPF map '%s' is referenced but wasn't created\n", 7618 insn_idx, map->name); 7619 7620 patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch); 7621 } 7622 7623 static void fixup_log_missing_kfunc_call(struct bpf_program *prog, 7624 char *buf, size_t buf_sz, size_t log_sz, 7625 char *line1, char *line2, char *line3) 7626 { 7627 /* Expected log for failed and not properly guarded kfunc call: 7628 * line1 -> 123: (85) call unknown#2002000345 7629 * line2 -> invalid func unknown#2002000345 7630 * line3 -> <anything else or end of buffer> 7631 * 7632 * "123" is the index of the instruction that was poisoned. 7633 * "345" in "2002000345" is an extern index in obj->externs to fetch kfunc name. 7634 */ 7635 struct bpf_object *obj = prog->obj; 7636 const struct extern_desc *ext; 7637 int insn_idx, ext_idx; 7638 char patch[128]; 7639 7640 if (sscanf(line1, "%d: (%*d) call unknown#%d\n", &insn_idx, &ext_idx) != 2) 7641 return; 7642 7643 ext_idx -= POISON_CALL_KFUNC_BASE; 7644 if (ext_idx < 0 || ext_idx >= obj->nr_extern) 7645 return; 7646 ext = &obj->externs[ext_idx]; 7647 7648 snprintf(patch, sizeof(patch), 7649 "%d: <invalid kfunc call>\n" 7650 "kfunc '%s' is referenced but wasn't resolved\n", 7651 insn_idx, ext->name); 7652 7653 patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch); 7654 } 7655 7656 static void fixup_verifier_log(struct bpf_program *prog, char *buf, size_t buf_sz) 7657 { 7658 /* look for familiar error patterns in last N lines of the log */ 7659 const size_t max_last_line_cnt = 10; 7660 char *prev_line, *cur_line, *next_line; 7661 size_t log_sz; 7662 int i; 7663 7664 if (!buf) 7665 return; 7666 7667 log_sz = strlen(buf) + 1; 7668 next_line = buf + log_sz - 1; 7669 7670 for (i = 0; i < max_last_line_cnt; i++, next_line = cur_line) { 7671 cur_line = find_prev_line(buf, next_line); 7672 if (!cur_line) 7673 return; 7674 7675 if (str_has_pfx(cur_line, "invalid func unknown#195896080\n")) { 7676 prev_line = find_prev_line(buf, cur_line); 7677 if (!prev_line) 7678 continue; 7679 7680 /* failed CO-RE relocation case */ 7681 fixup_log_failed_core_relo(prog, buf, buf_sz, log_sz, 7682 prev_line, cur_line, next_line); 7683 return; 7684 } else if (str_has_pfx(cur_line, "invalid func unknown#"POISON_LDIMM64_MAP_PFX)) { 7685 prev_line = find_prev_line(buf, cur_line); 7686 if (!prev_line) 7687 continue; 7688 7689 /* reference to uncreated BPF map */ 7690 fixup_log_missing_map_load(prog, buf, buf_sz, log_sz, 7691 prev_line, cur_line, next_line); 7692 return; 7693 } else if (str_has_pfx(cur_line, "invalid func unknown#"POISON_CALL_KFUNC_PFX)) { 7694 prev_line = find_prev_line(buf, cur_line); 7695 if (!prev_line) 7696 continue; 7697 7698 /* reference to unresolved kfunc */ 7699 fixup_log_missing_kfunc_call(prog, buf, buf_sz, log_sz, 7700 prev_line, cur_line, next_line); 7701 return; 7702 } 7703 } 7704 } 7705 7706 static int bpf_program_record_relos(struct bpf_program *prog) 7707 { 7708 struct bpf_object *obj = prog->obj; 7709 int i; 7710 7711 for (i = 0; i < prog->nr_reloc; i++) { 7712 struct reloc_desc *relo = &prog->reloc_desc[i]; 7713 struct extern_desc *ext = &obj->externs[relo->ext_idx]; 7714 int kind; 7715 7716 switch (relo->type) { 7717 case RELO_EXTERN_LD64: 7718 if (ext->type != EXT_KSYM) 7719 continue; 7720 kind = btf_is_var(btf__type_by_id(obj->btf, ext->btf_id)) ? 7721 BTF_KIND_VAR : BTF_KIND_FUNC; 7722 bpf_gen__record_extern(obj->gen_loader, ext->name, 7723 ext->is_weak, !ext->ksym.type_id, 7724 true, kind, relo->insn_idx); 7725 break; 7726 case RELO_EXTERN_CALL: 7727 bpf_gen__record_extern(obj->gen_loader, ext->name, 7728 ext->is_weak, false, false, BTF_KIND_FUNC, 7729 relo->insn_idx); 7730 break; 7731 case RELO_CORE: { 7732 struct bpf_core_relo cr = { 7733 .insn_off = relo->insn_idx * 8, 7734 .type_id = relo->core_relo->type_id, 7735 .access_str_off = relo->core_relo->access_str_off, 7736 .kind = relo->core_relo->kind, 7737 }; 7738 7739 bpf_gen__record_relo_core(obj->gen_loader, &cr); 7740 break; 7741 } 7742 default: 7743 continue; 7744 } 7745 } 7746 return 0; 7747 } 7748 7749 static int 7750 bpf_object__load_progs(struct bpf_object *obj, int log_level) 7751 { 7752 struct bpf_program *prog; 7753 size_t i; 7754 int err; 7755 7756 for (i = 0; i < obj->nr_programs; i++) { 7757 prog = &obj->programs[i]; 7758 err = bpf_object__sanitize_prog(obj, prog); 7759 if (err) 7760 return err; 7761 } 7762 7763 for (i = 0; i < obj->nr_programs; i++) { 7764 prog = &obj->programs[i]; 7765 if (prog_is_subprog(obj, prog)) 7766 continue; 7767 if (!prog->autoload) { 7768 pr_debug("prog '%s': skipped loading\n", prog->name); 7769 continue; 7770 } 7771 prog->log_level |= log_level; 7772 7773 if (obj->gen_loader) 7774 bpf_program_record_relos(prog); 7775 7776 err = bpf_object_load_prog(obj, prog, prog->insns, prog->insns_cnt, 7777 obj->license, obj->kern_version, &prog->fd); 7778 if (err) { 7779 pr_warn("prog '%s': failed to load: %d\n", prog->name, err); 7780 return err; 7781 } 7782 } 7783 7784 bpf_object__free_relocs(obj); 7785 return 0; 7786 } 7787 7788 static const struct bpf_sec_def *find_sec_def(const char *sec_name); 7789 7790 static int bpf_object_init_progs(struct bpf_object *obj, const struct bpf_object_open_opts *opts) 7791 { 7792 struct bpf_program *prog; 7793 int err; 7794 7795 bpf_object__for_each_program(prog, obj) { 7796 prog->sec_def = find_sec_def(prog->sec_name); 7797 if (!prog->sec_def) { 7798 /* couldn't guess, but user might manually specify */ 7799 pr_debug("prog '%s': unrecognized ELF section name '%s'\n", 7800 prog->name, prog->sec_name); 7801 continue; 7802 } 7803 7804 prog->type = prog->sec_def->prog_type; 7805 prog->expected_attach_type = prog->sec_def->expected_attach_type; 7806 7807 /* sec_def can have custom callback which should be called 7808 * after bpf_program is initialized to adjust its properties 7809 */ 7810 if (prog->sec_def->prog_setup_fn) { 7811 err = prog->sec_def->prog_setup_fn(prog, prog->sec_def->cookie); 7812 if (err < 0) { 7813 pr_warn("prog '%s': failed to initialize: %d\n", 7814 prog->name, err); 7815 return err; 7816 } 7817 } 7818 } 7819 7820 return 0; 7821 } 7822 7823 static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, size_t obj_buf_sz, 7824 const struct bpf_object_open_opts *opts) 7825 { 7826 const char *obj_name, *kconfig, *btf_tmp_path, *token_path; 7827 struct bpf_object *obj; 7828 char tmp_name[64]; 7829 int err; 7830 char *log_buf; 7831 size_t log_size; 7832 __u32 log_level; 7833 7834 if (elf_version(EV_CURRENT) == EV_NONE) { 7835 pr_warn("failed to init libelf for %s\n", 7836 path ? : "(mem buf)"); 7837 return ERR_PTR(-LIBBPF_ERRNO__LIBELF); 7838 } 7839 7840 if (!OPTS_VALID(opts, bpf_object_open_opts)) 7841 return ERR_PTR(-EINVAL); 7842 7843 obj_name = OPTS_GET(opts, object_name, NULL); 7844 if (obj_buf) { 7845 if (!obj_name) { 7846 snprintf(tmp_name, sizeof(tmp_name), "%lx-%lx", 7847 (unsigned long)obj_buf, 7848 (unsigned long)obj_buf_sz); 7849 obj_name = tmp_name; 7850 } 7851 path = obj_name; 7852 pr_debug("loading object '%s' from buffer\n", obj_name); 7853 } 7854 7855 log_buf = OPTS_GET(opts, kernel_log_buf, NULL); 7856 log_size = OPTS_GET(opts, kernel_log_size, 0); 7857 log_level = OPTS_GET(opts, kernel_log_level, 0); 7858 if (log_size > UINT_MAX) 7859 return ERR_PTR(-EINVAL); 7860 if (log_size && !log_buf) 7861 return ERR_PTR(-EINVAL); 7862 7863 token_path = OPTS_GET(opts, bpf_token_path, NULL); 7864 /* if user didn't specify bpf_token_path explicitly, check if 7865 * LIBBPF_BPF_TOKEN_PATH envvar was set and treat it as bpf_token_path 7866 * option 7867 */ 7868 if (!token_path) 7869 token_path = getenv("LIBBPF_BPF_TOKEN_PATH"); 7870 if (token_path && strlen(token_path) >= PATH_MAX) 7871 return ERR_PTR(-ENAMETOOLONG); 7872 7873 obj = bpf_object__new(path, obj_buf, obj_buf_sz, obj_name); 7874 if (IS_ERR(obj)) 7875 return obj; 7876 7877 obj->log_buf = log_buf; 7878 obj->log_size = log_size; 7879 obj->log_level = log_level; 7880 7881 if (token_path) { 7882 obj->token_path = strdup(token_path); 7883 if (!obj->token_path) { 7884 err = -ENOMEM; 7885 goto out; 7886 } 7887 } 7888 7889 btf_tmp_path = OPTS_GET(opts, btf_custom_path, NULL); 7890 if (btf_tmp_path) { 7891 if (strlen(btf_tmp_path) >= PATH_MAX) { 7892 err = -ENAMETOOLONG; 7893 goto out; 7894 } 7895 obj->btf_custom_path = strdup(btf_tmp_path); 7896 if (!obj->btf_custom_path) { 7897 err = -ENOMEM; 7898 goto out; 7899 } 7900 } 7901 7902 kconfig = OPTS_GET(opts, kconfig, NULL); 7903 if (kconfig) { 7904 obj->kconfig = strdup(kconfig); 7905 if (!obj->kconfig) { 7906 err = -ENOMEM; 7907 goto out; 7908 } 7909 } 7910 7911 err = bpf_object__elf_init(obj); 7912 err = err ? : bpf_object__check_endianness(obj); 7913 err = err ? : bpf_object__elf_collect(obj); 7914 err = err ? : bpf_object__collect_externs(obj); 7915 err = err ? : bpf_object_fixup_btf(obj); 7916 err = err ? : bpf_object__init_maps(obj, opts); 7917 err = err ? : bpf_object_init_progs(obj, opts); 7918 err = err ? : bpf_object__collect_relos(obj); 7919 if (err) 7920 goto out; 7921 7922 bpf_object__elf_finish(obj); 7923 7924 return obj; 7925 out: 7926 bpf_object__close(obj); 7927 return ERR_PTR(err); 7928 } 7929 7930 struct bpf_object * 7931 bpf_object__open_file(const char *path, const struct bpf_object_open_opts *opts) 7932 { 7933 if (!path) 7934 return libbpf_err_ptr(-EINVAL); 7935 7936 pr_debug("loading %s\n", path); 7937 7938 return libbpf_ptr(bpf_object_open(path, NULL, 0, opts)); 7939 } 7940 7941 struct bpf_object *bpf_object__open(const char *path) 7942 { 7943 return bpf_object__open_file(path, NULL); 7944 } 7945 7946 struct bpf_object * 7947 bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz, 7948 const struct bpf_object_open_opts *opts) 7949 { 7950 if (!obj_buf || obj_buf_sz == 0) 7951 return libbpf_err_ptr(-EINVAL); 7952 7953 return libbpf_ptr(bpf_object_open(NULL, obj_buf, obj_buf_sz, opts)); 7954 } 7955 7956 static int bpf_object_unload(struct bpf_object *obj) 7957 { 7958 size_t i; 7959 7960 if (!obj) 7961 return libbpf_err(-EINVAL); 7962 7963 for (i = 0; i < obj->nr_maps; i++) { 7964 zclose(obj->maps[i].fd); 7965 if (obj->maps[i].st_ops) 7966 zfree(&obj->maps[i].st_ops->kern_vdata); 7967 } 7968 7969 for (i = 0; i < obj->nr_programs; i++) 7970 bpf_program__unload(&obj->programs[i]); 7971 7972 return 0; 7973 } 7974 7975 static int bpf_object__sanitize_maps(struct bpf_object *obj) 7976 { 7977 struct bpf_map *m; 7978 7979 bpf_object__for_each_map(m, obj) { 7980 if (!bpf_map__is_internal(m)) 7981 continue; 7982 if (!kernel_supports(obj, FEAT_ARRAY_MMAP)) 7983 m->def.map_flags &= ~BPF_F_MMAPABLE; 7984 } 7985 7986 return 0; 7987 } 7988 7989 int libbpf_kallsyms_parse(kallsyms_cb_t cb, void *ctx) 7990 { 7991 char sym_type, sym_name[500]; 7992 unsigned long long sym_addr; 7993 int ret, err = 0; 7994 FILE *f; 7995 7996 f = fopen("/proc/kallsyms", "re"); 7997 if (!f) { 7998 err = -errno; 7999 pr_warn("failed to open /proc/kallsyms: %d\n", err); 8000 return err; 8001 } 8002 8003 while (true) { 8004 ret = fscanf(f, "%llx %c %499s%*[^\n]\n", 8005 &sym_addr, &sym_type, sym_name); 8006 if (ret == EOF && feof(f)) 8007 break; 8008 if (ret != 3) { 8009 pr_warn("failed to read kallsyms entry: %d\n", ret); 8010 err = -EINVAL; 8011 break; 8012 } 8013 8014 err = cb(sym_addr, sym_type, sym_name, ctx); 8015 if (err) 8016 break; 8017 } 8018 8019 fclose(f); 8020 return err; 8021 } 8022 8023 static int kallsyms_cb(unsigned long long sym_addr, char sym_type, 8024 const char *sym_name, void *ctx) 8025 { 8026 struct bpf_object *obj = ctx; 8027 const struct btf_type *t; 8028 struct extern_desc *ext; 8029 8030 ext = find_extern_by_name(obj, sym_name); 8031 if (!ext || ext->type != EXT_KSYM) 8032 return 0; 8033 8034 t = btf__type_by_id(obj->btf, ext->btf_id); 8035 if (!btf_is_var(t)) 8036 return 0; 8037 8038 if (ext->is_set && ext->ksym.addr != sym_addr) { 8039 pr_warn("extern (ksym) '%s': resolution is ambiguous: 0x%llx or 0x%llx\n", 8040 sym_name, ext->ksym.addr, sym_addr); 8041 return -EINVAL; 8042 } 8043 if (!ext->is_set) { 8044 ext->is_set = true; 8045 ext->ksym.addr = sym_addr; 8046 pr_debug("extern (ksym) '%s': set to 0x%llx\n", sym_name, sym_addr); 8047 } 8048 return 0; 8049 } 8050 8051 static int bpf_object__read_kallsyms_file(struct bpf_object *obj) 8052 { 8053 return libbpf_kallsyms_parse(kallsyms_cb, obj); 8054 } 8055 8056 static int find_ksym_btf_id(struct bpf_object *obj, const char *ksym_name, 8057 __u16 kind, struct btf **res_btf, 8058 struct module_btf **res_mod_btf) 8059 { 8060 struct module_btf *mod_btf; 8061 struct btf *btf; 8062 int i, id, err; 8063 8064 btf = obj->btf_vmlinux; 8065 mod_btf = NULL; 8066 id = btf__find_by_name_kind(btf, ksym_name, kind); 8067 8068 if (id == -ENOENT) { 8069 err = load_module_btfs(obj); 8070 if (err) 8071 return err; 8072 8073 for (i = 0; i < obj->btf_module_cnt; i++) { 8074 /* we assume module_btf's BTF FD is always >0 */ 8075 mod_btf = &obj->btf_modules[i]; 8076 btf = mod_btf->btf; 8077 id = btf__find_by_name_kind_own(btf, ksym_name, kind); 8078 if (id != -ENOENT) 8079 break; 8080 } 8081 } 8082 if (id <= 0) 8083 return -ESRCH; 8084 8085 *res_btf = btf; 8086 *res_mod_btf = mod_btf; 8087 return id; 8088 } 8089 8090 static int bpf_object__resolve_ksym_var_btf_id(struct bpf_object *obj, 8091 struct extern_desc *ext) 8092 { 8093 const struct btf_type *targ_var, *targ_type; 8094 __u32 targ_type_id, local_type_id; 8095 struct module_btf *mod_btf = NULL; 8096 const char *targ_var_name; 8097 struct btf *btf = NULL; 8098 int id, err; 8099 8100 id = find_ksym_btf_id(obj, ext->name, BTF_KIND_VAR, &btf, &mod_btf); 8101 if (id < 0) { 8102 if (id == -ESRCH && ext->is_weak) 8103 return 0; 8104 pr_warn("extern (var ksym) '%s': not found in kernel BTF\n", 8105 ext->name); 8106 return id; 8107 } 8108 8109 /* find local type_id */ 8110 local_type_id = ext->ksym.type_id; 8111 8112 /* find target type_id */ 8113 targ_var = btf__type_by_id(btf, id); 8114 targ_var_name = btf__name_by_offset(btf, targ_var->name_off); 8115 targ_type = skip_mods_and_typedefs(btf, targ_var->type, &targ_type_id); 8116 8117 err = bpf_core_types_are_compat(obj->btf, local_type_id, 8118 btf, targ_type_id); 8119 if (err <= 0) { 8120 const struct btf_type *local_type; 8121 const char *targ_name, *local_name; 8122 8123 local_type = btf__type_by_id(obj->btf, local_type_id); 8124 local_name = btf__name_by_offset(obj->btf, local_type->name_off); 8125 targ_name = btf__name_by_offset(btf, targ_type->name_off); 8126 8127 pr_warn("extern (var ksym) '%s': incompatible types, expected [%d] %s %s, but kernel has [%d] %s %s\n", 8128 ext->name, local_type_id, 8129 btf_kind_str(local_type), local_name, targ_type_id, 8130 btf_kind_str(targ_type), targ_name); 8131 return -EINVAL; 8132 } 8133 8134 ext->is_set = true; 8135 ext->ksym.kernel_btf_obj_fd = mod_btf ? mod_btf->fd : 0; 8136 ext->ksym.kernel_btf_id = id; 8137 pr_debug("extern (var ksym) '%s': resolved to [%d] %s %s\n", 8138 ext->name, id, btf_kind_str(targ_var), targ_var_name); 8139 8140 return 0; 8141 } 8142 8143 static int bpf_object__resolve_ksym_func_btf_id(struct bpf_object *obj, 8144 struct extern_desc *ext) 8145 { 8146 int local_func_proto_id, kfunc_proto_id, kfunc_id; 8147 struct module_btf *mod_btf = NULL; 8148 const struct btf_type *kern_func; 8149 struct btf *kern_btf = NULL; 8150 int ret; 8151 8152 local_func_proto_id = ext->ksym.type_id; 8153 8154 kfunc_id = find_ksym_btf_id(obj, ext->essent_name ?: ext->name, BTF_KIND_FUNC, &kern_btf, 8155 &mod_btf); 8156 if (kfunc_id < 0) { 8157 if (kfunc_id == -ESRCH && ext->is_weak) 8158 return 0; 8159 pr_warn("extern (func ksym) '%s': not found in kernel or module BTFs\n", 8160 ext->name); 8161 return kfunc_id; 8162 } 8163 8164 kern_func = btf__type_by_id(kern_btf, kfunc_id); 8165 kfunc_proto_id = kern_func->type; 8166 8167 ret = bpf_core_types_are_compat(obj->btf, local_func_proto_id, 8168 kern_btf, kfunc_proto_id); 8169 if (ret <= 0) { 8170 if (ext->is_weak) 8171 return 0; 8172 8173 pr_warn("extern (func ksym) '%s': func_proto [%d] incompatible with %s [%d]\n", 8174 ext->name, local_func_proto_id, 8175 mod_btf ? mod_btf->name : "vmlinux", kfunc_proto_id); 8176 return -EINVAL; 8177 } 8178 8179 /* set index for module BTF fd in fd_array, if unset */ 8180 if (mod_btf && !mod_btf->fd_array_idx) { 8181 /* insn->off is s16 */ 8182 if (obj->fd_array_cnt == INT16_MAX) { 8183 pr_warn("extern (func ksym) '%s': module BTF fd index %d too big to fit in bpf_insn offset\n", 8184 ext->name, mod_btf->fd_array_idx); 8185 return -E2BIG; 8186 } 8187 /* Cannot use index 0 for module BTF fd */ 8188 if (!obj->fd_array_cnt) 8189 obj->fd_array_cnt = 1; 8190 8191 ret = libbpf_ensure_mem((void **)&obj->fd_array, &obj->fd_array_cap, sizeof(int), 8192 obj->fd_array_cnt + 1); 8193 if (ret) 8194 return ret; 8195 mod_btf->fd_array_idx = obj->fd_array_cnt; 8196 /* we assume module BTF FD is always >0 */ 8197 obj->fd_array[obj->fd_array_cnt++] = mod_btf->fd; 8198 } 8199 8200 ext->is_set = true; 8201 ext->ksym.kernel_btf_id = kfunc_id; 8202 ext->ksym.btf_fd_idx = mod_btf ? mod_btf->fd_array_idx : 0; 8203 /* Also set kernel_btf_obj_fd to make sure that bpf_object__relocate_data() 8204 * populates FD into ld_imm64 insn when it's used to point to kfunc. 8205 * {kernel_btf_id, btf_fd_idx} -> fixup bpf_call. 8206 * {kernel_btf_id, kernel_btf_obj_fd} -> fixup ld_imm64. 8207 */ 8208 ext->ksym.kernel_btf_obj_fd = mod_btf ? mod_btf->fd : 0; 8209 pr_debug("extern (func ksym) '%s': resolved to %s [%d]\n", 8210 ext->name, mod_btf ? mod_btf->name : "vmlinux", kfunc_id); 8211 8212 return 0; 8213 } 8214 8215 static int bpf_object__resolve_ksyms_btf_id(struct bpf_object *obj) 8216 { 8217 const struct btf_type *t; 8218 struct extern_desc *ext; 8219 int i, err; 8220 8221 for (i = 0; i < obj->nr_extern; i++) { 8222 ext = &obj->externs[i]; 8223 if (ext->type != EXT_KSYM || !ext->ksym.type_id) 8224 continue; 8225 8226 if (obj->gen_loader) { 8227 ext->is_set = true; 8228 ext->ksym.kernel_btf_obj_fd = 0; 8229 ext->ksym.kernel_btf_id = 0; 8230 continue; 8231 } 8232 t = btf__type_by_id(obj->btf, ext->btf_id); 8233 if (btf_is_var(t)) 8234 err = bpf_object__resolve_ksym_var_btf_id(obj, ext); 8235 else 8236 err = bpf_object__resolve_ksym_func_btf_id(obj, ext); 8237 if (err) 8238 return err; 8239 } 8240 return 0; 8241 } 8242 8243 static int bpf_object__resolve_externs(struct bpf_object *obj, 8244 const char *extra_kconfig) 8245 { 8246 bool need_config = false, need_kallsyms = false; 8247 bool need_vmlinux_btf = false; 8248 struct extern_desc *ext; 8249 void *kcfg_data = NULL; 8250 int err, i; 8251 8252 if (obj->nr_extern == 0) 8253 return 0; 8254 8255 if (obj->kconfig_map_idx >= 0) 8256 kcfg_data = obj->maps[obj->kconfig_map_idx].mmaped; 8257 8258 for (i = 0; i < obj->nr_extern; i++) { 8259 ext = &obj->externs[i]; 8260 8261 if (ext->type == EXT_KSYM) { 8262 if (ext->ksym.type_id) 8263 need_vmlinux_btf = true; 8264 else 8265 need_kallsyms = true; 8266 continue; 8267 } else if (ext->type == EXT_KCFG) { 8268 void *ext_ptr = kcfg_data + ext->kcfg.data_off; 8269 __u64 value = 0; 8270 8271 /* Kconfig externs need actual /proc/config.gz */ 8272 if (str_has_pfx(ext->name, "CONFIG_")) { 8273 need_config = true; 8274 continue; 8275 } 8276 8277 /* Virtual kcfg externs are customly handled by libbpf */ 8278 if (strcmp(ext->name, "LINUX_KERNEL_VERSION") == 0) { 8279 value = get_kernel_version(); 8280 if (!value) { 8281 pr_warn("extern (kcfg) '%s': failed to get kernel version\n", ext->name); 8282 return -EINVAL; 8283 } 8284 } else if (strcmp(ext->name, "LINUX_HAS_BPF_COOKIE") == 0) { 8285 value = kernel_supports(obj, FEAT_BPF_COOKIE); 8286 } else if (strcmp(ext->name, "LINUX_HAS_SYSCALL_WRAPPER") == 0) { 8287 value = kernel_supports(obj, FEAT_SYSCALL_WRAPPER); 8288 } else if (!str_has_pfx(ext->name, "LINUX_") || !ext->is_weak) { 8289 /* Currently libbpf supports only CONFIG_ and LINUX_ prefixed 8290 * __kconfig externs, where LINUX_ ones are virtual and filled out 8291 * customly by libbpf (their values don't come from Kconfig). 8292 * If LINUX_xxx variable is not recognized by libbpf, but is marked 8293 * __weak, it defaults to zero value, just like for CONFIG_xxx 8294 * externs. 8295 */ 8296 pr_warn("extern (kcfg) '%s': unrecognized virtual extern\n", ext->name); 8297 return -EINVAL; 8298 } 8299 8300 err = set_kcfg_value_num(ext, ext_ptr, value); 8301 if (err) 8302 return err; 8303 pr_debug("extern (kcfg) '%s': set to 0x%llx\n", 8304 ext->name, (long long)value); 8305 } else { 8306 pr_warn("extern '%s': unrecognized extern kind\n", ext->name); 8307 return -EINVAL; 8308 } 8309 } 8310 if (need_config && extra_kconfig) { 8311 err = bpf_object__read_kconfig_mem(obj, extra_kconfig, kcfg_data); 8312 if (err) 8313 return -EINVAL; 8314 need_config = false; 8315 for (i = 0; i < obj->nr_extern; i++) { 8316 ext = &obj->externs[i]; 8317 if (ext->type == EXT_KCFG && !ext->is_set) { 8318 need_config = true; 8319 break; 8320 } 8321 } 8322 } 8323 if (need_config) { 8324 err = bpf_object__read_kconfig_file(obj, kcfg_data); 8325 if (err) 8326 return -EINVAL; 8327 } 8328 if (need_kallsyms) { 8329 err = bpf_object__read_kallsyms_file(obj); 8330 if (err) 8331 return -EINVAL; 8332 } 8333 if (need_vmlinux_btf) { 8334 err = bpf_object__resolve_ksyms_btf_id(obj); 8335 if (err) 8336 return -EINVAL; 8337 } 8338 for (i = 0; i < obj->nr_extern; i++) { 8339 ext = &obj->externs[i]; 8340 8341 if (!ext->is_set && !ext->is_weak) { 8342 pr_warn("extern '%s' (strong): not resolved\n", ext->name); 8343 return -ESRCH; 8344 } else if (!ext->is_set) { 8345 pr_debug("extern '%s' (weak): not resolved, defaulting to zero\n", 8346 ext->name); 8347 } 8348 } 8349 8350 return 0; 8351 } 8352 8353 static void bpf_map_prepare_vdata(const struct bpf_map *map) 8354 { 8355 struct bpf_struct_ops *st_ops; 8356 __u32 i; 8357 8358 st_ops = map->st_ops; 8359 for (i = 0; i < btf_vlen(st_ops->type); i++) { 8360 struct bpf_program *prog = st_ops->progs[i]; 8361 void *kern_data; 8362 int prog_fd; 8363 8364 if (!prog) 8365 continue; 8366 8367 prog_fd = bpf_program__fd(prog); 8368 kern_data = st_ops->kern_vdata + st_ops->kern_func_off[i]; 8369 *(unsigned long *)kern_data = prog_fd; 8370 } 8371 } 8372 8373 static int bpf_object_prepare_struct_ops(struct bpf_object *obj) 8374 { 8375 struct bpf_map *map; 8376 int i; 8377 8378 for (i = 0; i < obj->nr_maps; i++) { 8379 map = &obj->maps[i]; 8380 8381 if (!bpf_map__is_struct_ops(map)) 8382 continue; 8383 8384 if (!map->autocreate) 8385 continue; 8386 8387 bpf_map_prepare_vdata(map); 8388 } 8389 8390 return 0; 8391 } 8392 8393 static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const char *target_btf_path) 8394 { 8395 int err, i; 8396 8397 if (!obj) 8398 return libbpf_err(-EINVAL); 8399 8400 if (obj->loaded) { 8401 pr_warn("object '%s': load can't be attempted twice\n", obj->name); 8402 return libbpf_err(-EINVAL); 8403 } 8404 8405 if (obj->gen_loader) 8406 bpf_gen__init(obj->gen_loader, extra_log_level, obj->nr_programs, obj->nr_maps); 8407 8408 err = bpf_object_prepare_token(obj); 8409 err = err ? : bpf_object__probe_loading(obj); 8410 err = err ? : bpf_object__load_vmlinux_btf(obj, false); 8411 err = err ? : bpf_object__resolve_externs(obj, obj->kconfig); 8412 err = err ? : bpf_object__sanitize_maps(obj); 8413 err = err ? : bpf_object__init_kern_struct_ops_maps(obj); 8414 err = err ? : bpf_object_adjust_struct_ops_autoload(obj); 8415 err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : target_btf_path); 8416 err = err ? : bpf_object__sanitize_and_load_btf(obj); 8417 err = err ? : bpf_object__create_maps(obj); 8418 err = err ? : bpf_object__load_progs(obj, extra_log_level); 8419 err = err ? : bpf_object_init_prog_arrays(obj); 8420 err = err ? : bpf_object_prepare_struct_ops(obj); 8421 8422 if (obj->gen_loader) { 8423 /* reset FDs */ 8424 if (obj->btf) 8425 btf__set_fd(obj->btf, -1); 8426 if (!err) 8427 err = bpf_gen__finish(obj->gen_loader, obj->nr_programs, obj->nr_maps); 8428 } 8429 8430 /* clean up fd_array */ 8431 zfree(&obj->fd_array); 8432 8433 /* clean up module BTFs */ 8434 for (i = 0; i < obj->btf_module_cnt; i++) { 8435 close(obj->btf_modules[i].fd); 8436 btf__free(obj->btf_modules[i].btf); 8437 free(obj->btf_modules[i].name); 8438 } 8439 free(obj->btf_modules); 8440 8441 /* clean up vmlinux BTF */ 8442 btf__free(obj->btf_vmlinux); 8443 obj->btf_vmlinux = NULL; 8444 8445 obj->loaded = true; /* doesn't matter if successfully or not */ 8446 8447 if (err) 8448 goto out; 8449 8450 return 0; 8451 out: 8452 /* unpin any maps that were auto-pinned during load */ 8453 for (i = 0; i < obj->nr_maps; i++) 8454 if (obj->maps[i].pinned && !obj->maps[i].reused) 8455 bpf_map__unpin(&obj->maps[i], NULL); 8456 8457 bpf_object_unload(obj); 8458 pr_warn("failed to load object '%s'\n", obj->path); 8459 return libbpf_err(err); 8460 } 8461 8462 int bpf_object__load(struct bpf_object *obj) 8463 { 8464 return bpf_object_load(obj, 0, NULL); 8465 } 8466 8467 static int make_parent_dir(const char *path) 8468 { 8469 char *cp, errmsg[STRERR_BUFSIZE]; 8470 char *dname, *dir; 8471 int err = 0; 8472 8473 dname = strdup(path); 8474 if (dname == NULL) 8475 return -ENOMEM; 8476 8477 dir = dirname(dname); 8478 if (mkdir(dir, 0700) && errno != EEXIST) 8479 err = -errno; 8480 8481 free(dname); 8482 if (err) { 8483 cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg)); 8484 pr_warn("failed to mkdir %s: %s\n", path, cp); 8485 } 8486 return err; 8487 } 8488 8489 static int check_path(const char *path) 8490 { 8491 char *cp, errmsg[STRERR_BUFSIZE]; 8492 struct statfs st_fs; 8493 char *dname, *dir; 8494 int err = 0; 8495 8496 if (path == NULL) 8497 return -EINVAL; 8498 8499 dname = strdup(path); 8500 if (dname == NULL) 8501 return -ENOMEM; 8502 8503 dir = dirname(dname); 8504 if (statfs(dir, &st_fs)) { 8505 cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); 8506 pr_warn("failed to statfs %s: %s\n", dir, cp); 8507 err = -errno; 8508 } 8509 free(dname); 8510 8511 if (!err && st_fs.f_type != BPF_FS_MAGIC) { 8512 pr_warn("specified path %s is not on BPF FS\n", path); 8513 err = -EINVAL; 8514 } 8515 8516 return err; 8517 } 8518 8519 int bpf_program__pin(struct bpf_program *prog, const char *path) 8520 { 8521 char *cp, errmsg[STRERR_BUFSIZE]; 8522 int err; 8523 8524 if (prog->fd < 0) { 8525 pr_warn("prog '%s': can't pin program that wasn't loaded\n", prog->name); 8526 return libbpf_err(-EINVAL); 8527 } 8528 8529 err = make_parent_dir(path); 8530 if (err) 8531 return libbpf_err(err); 8532 8533 err = check_path(path); 8534 if (err) 8535 return libbpf_err(err); 8536 8537 if (bpf_obj_pin(prog->fd, path)) { 8538 err = -errno; 8539 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); 8540 pr_warn("prog '%s': failed to pin at '%s': %s\n", prog->name, path, cp); 8541 return libbpf_err(err); 8542 } 8543 8544 pr_debug("prog '%s': pinned at '%s'\n", prog->name, path); 8545 return 0; 8546 } 8547 8548 int bpf_program__unpin(struct bpf_program *prog, const char *path) 8549 { 8550 int err; 8551 8552 if (prog->fd < 0) { 8553 pr_warn("prog '%s': can't unpin program that wasn't loaded\n", prog->name); 8554 return libbpf_err(-EINVAL); 8555 } 8556 8557 err = check_path(path); 8558 if (err) 8559 return libbpf_err(err); 8560 8561 err = unlink(path); 8562 if (err) 8563 return libbpf_err(-errno); 8564 8565 pr_debug("prog '%s': unpinned from '%s'\n", prog->name, path); 8566 return 0; 8567 } 8568 8569 int bpf_map__pin(struct bpf_map *map, const char *path) 8570 { 8571 char *cp, errmsg[STRERR_BUFSIZE]; 8572 int err; 8573 8574 if (map == NULL) { 8575 pr_warn("invalid map pointer\n"); 8576 return libbpf_err(-EINVAL); 8577 } 8578 8579 if (map->fd < 0) { 8580 pr_warn("map '%s': can't pin BPF map without FD (was it created?)\n", map->name); 8581 return libbpf_err(-EINVAL); 8582 } 8583 8584 if (map->pin_path) { 8585 if (path && strcmp(path, map->pin_path)) { 8586 pr_warn("map '%s' already has pin path '%s' different from '%s'\n", 8587 bpf_map__name(map), map->pin_path, path); 8588 return libbpf_err(-EINVAL); 8589 } else if (map->pinned) { 8590 pr_debug("map '%s' already pinned at '%s'; not re-pinning\n", 8591 bpf_map__name(map), map->pin_path); 8592 return 0; 8593 } 8594 } else { 8595 if (!path) { 8596 pr_warn("missing a path to pin map '%s' at\n", 8597 bpf_map__name(map)); 8598 return libbpf_err(-EINVAL); 8599 } else if (map->pinned) { 8600 pr_warn("map '%s' already pinned\n", bpf_map__name(map)); 8601 return libbpf_err(-EEXIST); 8602 } 8603 8604 map->pin_path = strdup(path); 8605 if (!map->pin_path) { 8606 err = -errno; 8607 goto out_err; 8608 } 8609 } 8610 8611 err = make_parent_dir(map->pin_path); 8612 if (err) 8613 return libbpf_err(err); 8614 8615 err = check_path(map->pin_path); 8616 if (err) 8617 return libbpf_err(err); 8618 8619 if (bpf_obj_pin(map->fd, map->pin_path)) { 8620 err = -errno; 8621 goto out_err; 8622 } 8623 8624 map->pinned = true; 8625 pr_debug("pinned map '%s'\n", map->pin_path); 8626 8627 return 0; 8628 8629 out_err: 8630 cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg)); 8631 pr_warn("failed to pin map: %s\n", cp); 8632 return libbpf_err(err); 8633 } 8634 8635 int bpf_map__unpin(struct bpf_map *map, const char *path) 8636 { 8637 int err; 8638 8639 if (map == NULL) { 8640 pr_warn("invalid map pointer\n"); 8641 return libbpf_err(-EINVAL); 8642 } 8643 8644 if (map->pin_path) { 8645 if (path && strcmp(path, map->pin_path)) { 8646 pr_warn("map '%s' already has pin path '%s' different from '%s'\n", 8647 bpf_map__name(map), map->pin_path, path); 8648 return libbpf_err(-EINVAL); 8649 } 8650 path = map->pin_path; 8651 } else if (!path) { 8652 pr_warn("no path to unpin map '%s' from\n", 8653 bpf_map__name(map)); 8654 return libbpf_err(-EINVAL); 8655 } 8656 8657 err = check_path(path); 8658 if (err) 8659 return libbpf_err(err); 8660 8661 err = unlink(path); 8662 if (err != 0) 8663 return libbpf_err(-errno); 8664 8665 map->pinned = false; 8666 pr_debug("unpinned map '%s' from '%s'\n", bpf_map__name(map), path); 8667 8668 return 0; 8669 } 8670 8671 int bpf_map__set_pin_path(struct bpf_map *map, const char *path) 8672 { 8673 char *new = NULL; 8674 8675 if (path) { 8676 new = strdup(path); 8677 if (!new) 8678 return libbpf_err(-errno); 8679 } 8680 8681 free(map->pin_path); 8682 map->pin_path = new; 8683 return 0; 8684 } 8685 8686 __alias(bpf_map__pin_path) 8687 const char *bpf_map__get_pin_path(const struct bpf_map *map); 8688 8689 const char *bpf_map__pin_path(const struct bpf_map *map) 8690 { 8691 return map->pin_path; 8692 } 8693 8694 bool bpf_map__is_pinned(const struct bpf_map *map) 8695 { 8696 return map->pinned; 8697 } 8698 8699 static void sanitize_pin_path(char *s) 8700 { 8701 /* bpffs disallows periods in path names */ 8702 while (*s) { 8703 if (*s == '.') 8704 *s = '_'; 8705 s++; 8706 } 8707 } 8708 8709 int bpf_object__pin_maps(struct bpf_object *obj, const char *path) 8710 { 8711 struct bpf_map *map; 8712 int err; 8713 8714 if (!obj) 8715 return libbpf_err(-ENOENT); 8716 8717 if (!obj->loaded) { 8718 pr_warn("object not yet loaded; load it first\n"); 8719 return libbpf_err(-ENOENT); 8720 } 8721 8722 bpf_object__for_each_map(map, obj) { 8723 char *pin_path = NULL; 8724 char buf[PATH_MAX]; 8725 8726 if (!map->autocreate) 8727 continue; 8728 8729 if (path) { 8730 err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map)); 8731 if (err) 8732 goto err_unpin_maps; 8733 sanitize_pin_path(buf); 8734 pin_path = buf; 8735 } else if (!map->pin_path) { 8736 continue; 8737 } 8738 8739 err = bpf_map__pin(map, pin_path); 8740 if (err) 8741 goto err_unpin_maps; 8742 } 8743 8744 return 0; 8745 8746 err_unpin_maps: 8747 while ((map = bpf_object__prev_map(obj, map))) { 8748 if (!map->pin_path) 8749 continue; 8750 8751 bpf_map__unpin(map, NULL); 8752 } 8753 8754 return libbpf_err(err); 8755 } 8756 8757 int bpf_object__unpin_maps(struct bpf_object *obj, const char *path) 8758 { 8759 struct bpf_map *map; 8760 int err; 8761 8762 if (!obj) 8763 return libbpf_err(-ENOENT); 8764 8765 bpf_object__for_each_map(map, obj) { 8766 char *pin_path = NULL; 8767 char buf[PATH_MAX]; 8768 8769 if (path) { 8770 err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map)); 8771 if (err) 8772 return libbpf_err(err); 8773 sanitize_pin_path(buf); 8774 pin_path = buf; 8775 } else if (!map->pin_path) { 8776 continue; 8777 } 8778 8779 err = bpf_map__unpin(map, pin_path); 8780 if (err) 8781 return libbpf_err(err); 8782 } 8783 8784 return 0; 8785 } 8786 8787 int bpf_object__pin_programs(struct bpf_object *obj, const char *path) 8788 { 8789 struct bpf_program *prog; 8790 char buf[PATH_MAX]; 8791 int err; 8792 8793 if (!obj) 8794 return libbpf_err(-ENOENT); 8795 8796 if (!obj->loaded) { 8797 pr_warn("object not yet loaded; load it first\n"); 8798 return libbpf_err(-ENOENT); 8799 } 8800 8801 bpf_object__for_each_program(prog, obj) { 8802 err = pathname_concat(buf, sizeof(buf), path, prog->name); 8803 if (err) 8804 goto err_unpin_programs; 8805 8806 err = bpf_program__pin(prog, buf); 8807 if (err) 8808 goto err_unpin_programs; 8809 } 8810 8811 return 0; 8812 8813 err_unpin_programs: 8814 while ((prog = bpf_object__prev_program(obj, prog))) { 8815 if (pathname_concat(buf, sizeof(buf), path, prog->name)) 8816 continue; 8817 8818 bpf_program__unpin(prog, buf); 8819 } 8820 8821 return libbpf_err(err); 8822 } 8823 8824 int bpf_object__unpin_programs(struct bpf_object *obj, const char *path) 8825 { 8826 struct bpf_program *prog; 8827 int err; 8828 8829 if (!obj) 8830 return libbpf_err(-ENOENT); 8831 8832 bpf_object__for_each_program(prog, obj) { 8833 char buf[PATH_MAX]; 8834 8835 err = pathname_concat(buf, sizeof(buf), path, prog->name); 8836 if (err) 8837 return libbpf_err(err); 8838 8839 err = bpf_program__unpin(prog, buf); 8840 if (err) 8841 return libbpf_err(err); 8842 } 8843 8844 return 0; 8845 } 8846 8847 int bpf_object__pin(struct bpf_object *obj, const char *path) 8848 { 8849 int err; 8850 8851 err = bpf_object__pin_maps(obj, path); 8852 if (err) 8853 return libbpf_err(err); 8854 8855 err = bpf_object__pin_programs(obj, path); 8856 if (err) { 8857 bpf_object__unpin_maps(obj, path); 8858 return libbpf_err(err); 8859 } 8860 8861 return 0; 8862 } 8863 8864 int bpf_object__unpin(struct bpf_object *obj, const char *path) 8865 { 8866 int err; 8867 8868 err = bpf_object__unpin_programs(obj, path); 8869 if (err) 8870 return libbpf_err(err); 8871 8872 err = bpf_object__unpin_maps(obj, path); 8873 if (err) 8874 return libbpf_err(err); 8875 8876 return 0; 8877 } 8878 8879 static void bpf_map__destroy(struct bpf_map *map) 8880 { 8881 if (map->inner_map) { 8882 bpf_map__destroy(map->inner_map); 8883 zfree(&map->inner_map); 8884 } 8885 8886 zfree(&map->init_slots); 8887 map->init_slots_sz = 0; 8888 8889 if (map->mmaped && map->mmaped != map->obj->arena_data) 8890 munmap(map->mmaped, bpf_map_mmap_sz(map)); 8891 map->mmaped = NULL; 8892 8893 if (map->st_ops) { 8894 zfree(&map->st_ops->data); 8895 zfree(&map->st_ops->progs); 8896 zfree(&map->st_ops->kern_func_off); 8897 zfree(&map->st_ops); 8898 } 8899 8900 zfree(&map->name); 8901 zfree(&map->real_name); 8902 zfree(&map->pin_path); 8903 8904 if (map->fd >= 0) 8905 zclose(map->fd); 8906 } 8907 8908 void bpf_object__close(struct bpf_object *obj) 8909 { 8910 size_t i; 8911 8912 if (IS_ERR_OR_NULL(obj)) 8913 return; 8914 8915 usdt_manager_free(obj->usdt_man); 8916 obj->usdt_man = NULL; 8917 8918 bpf_gen__free(obj->gen_loader); 8919 bpf_object__elf_finish(obj); 8920 bpf_object_unload(obj); 8921 btf__free(obj->btf); 8922 btf__free(obj->btf_vmlinux); 8923 btf_ext__free(obj->btf_ext); 8924 8925 for (i = 0; i < obj->nr_maps; i++) 8926 bpf_map__destroy(&obj->maps[i]); 8927 8928 zfree(&obj->btf_custom_path); 8929 zfree(&obj->kconfig); 8930 8931 for (i = 0; i < obj->nr_extern; i++) 8932 zfree(&obj->externs[i].essent_name); 8933 8934 zfree(&obj->externs); 8935 obj->nr_extern = 0; 8936 8937 zfree(&obj->maps); 8938 obj->nr_maps = 0; 8939 8940 if (obj->programs && obj->nr_programs) { 8941 for (i = 0; i < obj->nr_programs; i++) 8942 bpf_program__exit(&obj->programs[i]); 8943 } 8944 zfree(&obj->programs); 8945 8946 zfree(&obj->feat_cache); 8947 zfree(&obj->token_path); 8948 if (obj->token_fd > 0) 8949 close(obj->token_fd); 8950 8951 zfree(&obj->arena_data); 8952 8953 free(obj); 8954 } 8955 8956 const char *bpf_object__name(const struct bpf_object *obj) 8957 { 8958 return obj ? obj->name : libbpf_err_ptr(-EINVAL); 8959 } 8960 8961 unsigned int bpf_object__kversion(const struct bpf_object *obj) 8962 { 8963 return obj ? obj->kern_version : 0; 8964 } 8965 8966 struct btf *bpf_object__btf(const struct bpf_object *obj) 8967 { 8968 return obj ? obj->btf : NULL; 8969 } 8970 8971 int bpf_object__btf_fd(const struct bpf_object *obj) 8972 { 8973 return obj->btf ? btf__fd(obj->btf) : -1; 8974 } 8975 8976 int bpf_object__set_kversion(struct bpf_object *obj, __u32 kern_version) 8977 { 8978 if (obj->loaded) 8979 return libbpf_err(-EINVAL); 8980 8981 obj->kern_version = kern_version; 8982 8983 return 0; 8984 } 8985 8986 int bpf_object__gen_loader(struct bpf_object *obj, struct gen_loader_opts *opts) 8987 { 8988 struct bpf_gen *gen; 8989 8990 if (!opts) 8991 return -EFAULT; 8992 if (!OPTS_VALID(opts, gen_loader_opts)) 8993 return -EINVAL; 8994 gen = calloc(sizeof(*gen), 1); 8995 if (!gen) 8996 return -ENOMEM; 8997 gen->opts = opts; 8998 obj->gen_loader = gen; 8999 return 0; 9000 } 9001 9002 static struct bpf_program * 9003 __bpf_program__iter(const struct bpf_program *p, const struct bpf_object *obj, 9004 bool forward) 9005 { 9006 size_t nr_programs = obj->nr_programs; 9007 ssize_t idx; 9008 9009 if (!nr_programs) 9010 return NULL; 9011 9012 if (!p) 9013 /* Iter from the beginning */ 9014 return forward ? &obj->programs[0] : 9015 &obj->programs[nr_programs - 1]; 9016 9017 if (p->obj != obj) { 9018 pr_warn("error: program handler doesn't match object\n"); 9019 return errno = EINVAL, NULL; 9020 } 9021 9022 idx = (p - obj->programs) + (forward ? 1 : -1); 9023 if (idx >= obj->nr_programs || idx < 0) 9024 return NULL; 9025 return &obj->programs[idx]; 9026 } 9027 9028 struct bpf_program * 9029 bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prev) 9030 { 9031 struct bpf_program *prog = prev; 9032 9033 do { 9034 prog = __bpf_program__iter(prog, obj, true); 9035 } while (prog && prog_is_subprog(obj, prog)); 9036 9037 return prog; 9038 } 9039 9040 struct bpf_program * 9041 bpf_object__prev_program(const struct bpf_object *obj, struct bpf_program *next) 9042 { 9043 struct bpf_program *prog = next; 9044 9045 do { 9046 prog = __bpf_program__iter(prog, obj, false); 9047 } while (prog && prog_is_subprog(obj, prog)); 9048 9049 return prog; 9050 } 9051 9052 void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex) 9053 { 9054 prog->prog_ifindex = ifindex; 9055 } 9056 9057 const char *bpf_program__name(const struct bpf_program *prog) 9058 { 9059 return prog->name; 9060 } 9061 9062 const char *bpf_program__section_name(const struct bpf_program *prog) 9063 { 9064 return prog->sec_name; 9065 } 9066 9067 bool bpf_program__autoload(const struct bpf_program *prog) 9068 { 9069 return prog->autoload; 9070 } 9071 9072 int bpf_program__set_autoload(struct bpf_program *prog, bool autoload) 9073 { 9074 if (prog->obj->loaded) 9075 return libbpf_err(-EINVAL); 9076 9077 prog->autoload = autoload; 9078 return 0; 9079 } 9080 9081 bool bpf_program__autoattach(const struct bpf_program *prog) 9082 { 9083 return prog->autoattach; 9084 } 9085 9086 void bpf_program__set_autoattach(struct bpf_program *prog, bool autoattach) 9087 { 9088 prog->autoattach = autoattach; 9089 } 9090 9091 const struct bpf_insn *bpf_program__insns(const struct bpf_program *prog) 9092 { 9093 return prog->insns; 9094 } 9095 9096 size_t bpf_program__insn_cnt(const struct bpf_program *prog) 9097 { 9098 return prog->insns_cnt; 9099 } 9100 9101 int bpf_program__set_insns(struct bpf_program *prog, 9102 struct bpf_insn *new_insns, size_t new_insn_cnt) 9103 { 9104 struct bpf_insn *insns; 9105 9106 if (prog->obj->loaded) 9107 return -EBUSY; 9108 9109 insns = libbpf_reallocarray(prog->insns, new_insn_cnt, sizeof(*insns)); 9110 /* NULL is a valid return from reallocarray if the new count is zero */ 9111 if (!insns && new_insn_cnt) { 9112 pr_warn("prog '%s': failed to realloc prog code\n", prog->name); 9113 return -ENOMEM; 9114 } 9115 memcpy(insns, new_insns, new_insn_cnt * sizeof(*insns)); 9116 9117 prog->insns = insns; 9118 prog->insns_cnt = new_insn_cnt; 9119 return 0; 9120 } 9121 9122 int bpf_program__fd(const struct bpf_program *prog) 9123 { 9124 if (!prog) 9125 return libbpf_err(-EINVAL); 9126 9127 if (prog->fd < 0) 9128 return libbpf_err(-ENOENT); 9129 9130 return prog->fd; 9131 } 9132 9133 __alias(bpf_program__type) 9134 enum bpf_prog_type bpf_program__get_type(const struct bpf_program *prog); 9135 9136 enum bpf_prog_type bpf_program__type(const struct bpf_program *prog) 9137 { 9138 return prog->type; 9139 } 9140 9141 static size_t custom_sec_def_cnt; 9142 static struct bpf_sec_def *custom_sec_defs; 9143 static struct bpf_sec_def custom_fallback_def; 9144 static bool has_custom_fallback_def; 9145 static int last_custom_sec_def_handler_id; 9146 9147 int bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type) 9148 { 9149 if (prog->obj->loaded) 9150 return libbpf_err(-EBUSY); 9151 9152 /* if type is not changed, do nothing */ 9153 if (prog->type == type) 9154 return 0; 9155 9156 prog->type = type; 9157 9158 /* If a program type was changed, we need to reset associated SEC() 9159 * handler, as it will be invalid now. The only exception is a generic 9160 * fallback handler, which by definition is program type-agnostic and 9161 * is a catch-all custom handler, optionally set by the application, 9162 * so should be able to handle any type of BPF program. 9163 */ 9164 if (prog->sec_def != &custom_fallback_def) 9165 prog->sec_def = NULL; 9166 return 0; 9167 } 9168 9169 __alias(bpf_program__expected_attach_type) 9170 enum bpf_attach_type bpf_program__get_expected_attach_type(const struct bpf_program *prog); 9171 9172 enum bpf_attach_type bpf_program__expected_attach_type(const struct bpf_program *prog) 9173 { 9174 return prog->expected_attach_type; 9175 } 9176 9177 int bpf_program__set_expected_attach_type(struct bpf_program *prog, 9178 enum bpf_attach_type type) 9179 { 9180 if (prog->obj->loaded) 9181 return libbpf_err(-EBUSY); 9182 9183 prog->expected_attach_type = type; 9184 return 0; 9185 } 9186 9187 __u32 bpf_program__flags(const struct bpf_program *prog) 9188 { 9189 return prog->prog_flags; 9190 } 9191 9192 int bpf_program__set_flags(struct bpf_program *prog, __u32 flags) 9193 { 9194 if (prog->obj->loaded) 9195 return libbpf_err(-EBUSY); 9196 9197 prog->prog_flags = flags; 9198 return 0; 9199 } 9200 9201 __u32 bpf_program__log_level(const struct bpf_program *prog) 9202 { 9203 return prog->log_level; 9204 } 9205 9206 int bpf_program__set_log_level(struct bpf_program *prog, __u32 log_level) 9207 { 9208 if (prog->obj->loaded) 9209 return libbpf_err(-EBUSY); 9210 9211 prog->log_level = log_level; 9212 return 0; 9213 } 9214 9215 const char *bpf_program__log_buf(const struct bpf_program *prog, size_t *log_size) 9216 { 9217 *log_size = prog->log_size; 9218 return prog->log_buf; 9219 } 9220 9221 int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, size_t log_size) 9222 { 9223 if (log_size && !log_buf) 9224 return -EINVAL; 9225 if (prog->log_size > UINT_MAX) 9226 return -EINVAL; 9227 if (prog->obj->loaded) 9228 return -EBUSY; 9229 9230 prog->log_buf = log_buf; 9231 prog->log_size = log_size; 9232 return 0; 9233 } 9234 9235 #define SEC_DEF(sec_pfx, ptype, atype, flags, ...) { \ 9236 .sec = (char *)sec_pfx, \ 9237 .prog_type = BPF_PROG_TYPE_##ptype, \ 9238 .expected_attach_type = atype, \ 9239 .cookie = (long)(flags), \ 9240 .prog_prepare_load_fn = libbpf_prepare_prog_load, \ 9241 __VA_ARGS__ \ 9242 } 9243 9244 static int attach_kprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link); 9245 static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link); 9246 static int attach_ksyscall(const struct bpf_program *prog, long cookie, struct bpf_link **link); 9247 static int attach_usdt(const struct bpf_program *prog, long cookie, struct bpf_link **link); 9248 static int attach_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link); 9249 static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link); 9250 static int attach_trace(const struct bpf_program *prog, long cookie, struct bpf_link **link); 9251 static int attach_kprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link); 9252 static int attach_uprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link); 9253 static int attach_lsm(const struct bpf_program *prog, long cookie, struct bpf_link **link); 9254 static int attach_iter(const struct bpf_program *prog, long cookie, struct bpf_link **link); 9255 9256 static const struct bpf_sec_def section_defs[] = { 9257 SEC_DEF("socket", SOCKET_FILTER, 0, SEC_NONE), 9258 SEC_DEF("sk_reuseport/migrate", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, SEC_ATTACHABLE), 9259 SEC_DEF("sk_reuseport", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT, SEC_ATTACHABLE), 9260 SEC_DEF("kprobe+", KPROBE, 0, SEC_NONE, attach_kprobe), 9261 SEC_DEF("uprobe+", KPROBE, 0, SEC_NONE, attach_uprobe), 9262 SEC_DEF("uprobe.s+", KPROBE, 0, SEC_SLEEPABLE, attach_uprobe), 9263 SEC_DEF("kretprobe+", KPROBE, 0, SEC_NONE, attach_kprobe), 9264 SEC_DEF("uretprobe+", KPROBE, 0, SEC_NONE, attach_uprobe), 9265 SEC_DEF("uretprobe.s+", KPROBE, 0, SEC_SLEEPABLE, attach_uprobe), 9266 SEC_DEF("kprobe.multi+", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi), 9267 SEC_DEF("kretprobe.multi+", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi), 9268 SEC_DEF("uprobe.multi+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_NONE, attach_uprobe_multi), 9269 SEC_DEF("uretprobe.multi+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_NONE, attach_uprobe_multi), 9270 SEC_DEF("uprobe.multi.s+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_SLEEPABLE, attach_uprobe_multi), 9271 SEC_DEF("uretprobe.multi.s+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_SLEEPABLE, attach_uprobe_multi), 9272 SEC_DEF("ksyscall+", KPROBE, 0, SEC_NONE, attach_ksyscall), 9273 SEC_DEF("kretsyscall+", KPROBE, 0, SEC_NONE, attach_ksyscall), 9274 SEC_DEF("usdt+", KPROBE, 0, SEC_USDT, attach_usdt), 9275 SEC_DEF("usdt.s+", KPROBE, 0, SEC_USDT | SEC_SLEEPABLE, attach_usdt), 9276 SEC_DEF("tc/ingress", SCHED_CLS, BPF_TCX_INGRESS, SEC_NONE), /* alias for tcx */ 9277 SEC_DEF("tc/egress", SCHED_CLS, BPF_TCX_EGRESS, SEC_NONE), /* alias for tcx */ 9278 SEC_DEF("tcx/ingress", SCHED_CLS, BPF_TCX_INGRESS, SEC_NONE), 9279 SEC_DEF("tcx/egress", SCHED_CLS, BPF_TCX_EGRESS, SEC_NONE), 9280 SEC_DEF("tc", SCHED_CLS, 0, SEC_NONE), /* deprecated / legacy, use tcx */ 9281 SEC_DEF("classifier", SCHED_CLS, 0, SEC_NONE), /* deprecated / legacy, use tcx */ 9282 SEC_DEF("action", SCHED_ACT, 0, SEC_NONE), /* deprecated / legacy, use tcx */ 9283 SEC_DEF("netkit/primary", SCHED_CLS, BPF_NETKIT_PRIMARY, SEC_NONE), 9284 SEC_DEF("netkit/peer", SCHED_CLS, BPF_NETKIT_PEER, SEC_NONE), 9285 SEC_DEF("tracepoint+", TRACEPOINT, 0, SEC_NONE, attach_tp), 9286 SEC_DEF("tp+", TRACEPOINT, 0, SEC_NONE, attach_tp), 9287 SEC_DEF("raw_tracepoint+", RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp), 9288 SEC_DEF("raw_tp+", RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp), 9289 SEC_DEF("raw_tracepoint.w+", RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp), 9290 SEC_DEF("raw_tp.w+", RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp), 9291 SEC_DEF("tp_btf+", TRACING, BPF_TRACE_RAW_TP, SEC_ATTACH_BTF, attach_trace), 9292 SEC_DEF("fentry+", TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF, attach_trace), 9293 SEC_DEF("fmod_ret+", TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF, attach_trace), 9294 SEC_DEF("fexit+", TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF, attach_trace), 9295 SEC_DEF("fentry.s+", TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), 9296 SEC_DEF("fmod_ret.s+", TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), 9297 SEC_DEF("fexit.s+", TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), 9298 SEC_DEF("freplace+", EXT, 0, SEC_ATTACH_BTF, attach_trace), 9299 SEC_DEF("lsm+", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF, attach_lsm), 9300 SEC_DEF("lsm.s+", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_lsm), 9301 SEC_DEF("lsm_cgroup+", LSM, BPF_LSM_CGROUP, SEC_ATTACH_BTF), 9302 SEC_DEF("iter+", TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF, attach_iter), 9303 SEC_DEF("iter.s+", TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_iter), 9304 SEC_DEF("syscall", SYSCALL, 0, SEC_SLEEPABLE), 9305 SEC_DEF("xdp.frags/devmap", XDP, BPF_XDP_DEVMAP, SEC_XDP_FRAGS), 9306 SEC_DEF("xdp/devmap", XDP, BPF_XDP_DEVMAP, SEC_ATTACHABLE), 9307 SEC_DEF("xdp.frags/cpumap", XDP, BPF_XDP_CPUMAP, SEC_XDP_FRAGS), 9308 SEC_DEF("xdp/cpumap", XDP, BPF_XDP_CPUMAP, SEC_ATTACHABLE), 9309 SEC_DEF("xdp.frags", XDP, BPF_XDP, SEC_XDP_FRAGS), 9310 SEC_DEF("xdp", XDP, BPF_XDP, SEC_ATTACHABLE_OPT), 9311 SEC_DEF("perf_event", PERF_EVENT, 0, SEC_NONE), 9312 SEC_DEF("lwt_in", LWT_IN, 0, SEC_NONE), 9313 SEC_DEF("lwt_out", LWT_OUT, 0, SEC_NONE), 9314 SEC_DEF("lwt_xmit", LWT_XMIT, 0, SEC_NONE), 9315 SEC_DEF("lwt_seg6local", LWT_SEG6LOCAL, 0, SEC_NONE), 9316 SEC_DEF("sockops", SOCK_OPS, BPF_CGROUP_SOCK_OPS, SEC_ATTACHABLE_OPT), 9317 SEC_DEF("sk_skb/stream_parser", SK_SKB, BPF_SK_SKB_STREAM_PARSER, SEC_ATTACHABLE_OPT), 9318 SEC_DEF("sk_skb/stream_verdict",SK_SKB, BPF_SK_SKB_STREAM_VERDICT, SEC_ATTACHABLE_OPT), 9319 SEC_DEF("sk_skb/verdict", SK_SKB, BPF_SK_SKB_VERDICT, SEC_ATTACHABLE_OPT), 9320 SEC_DEF("sk_skb", SK_SKB, 0, SEC_NONE), 9321 SEC_DEF("sk_msg", SK_MSG, BPF_SK_MSG_VERDICT, SEC_ATTACHABLE_OPT), 9322 SEC_DEF("lirc_mode2", LIRC_MODE2, BPF_LIRC_MODE2, SEC_ATTACHABLE_OPT), 9323 SEC_DEF("flow_dissector", FLOW_DISSECTOR, BPF_FLOW_DISSECTOR, SEC_ATTACHABLE_OPT), 9324 SEC_DEF("cgroup_skb/ingress", CGROUP_SKB, BPF_CGROUP_INET_INGRESS, SEC_ATTACHABLE_OPT), 9325 SEC_DEF("cgroup_skb/egress", CGROUP_SKB, BPF_CGROUP_INET_EGRESS, SEC_ATTACHABLE_OPT), 9326 SEC_DEF("cgroup/skb", CGROUP_SKB, 0, SEC_NONE), 9327 SEC_DEF("cgroup/sock_create", CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE), 9328 SEC_DEF("cgroup/sock_release", CGROUP_SOCK, BPF_CGROUP_INET_SOCK_RELEASE, SEC_ATTACHABLE), 9329 SEC_DEF("cgroup/sock", CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE_OPT), 9330 SEC_DEF("cgroup/post_bind4", CGROUP_SOCK, BPF_CGROUP_INET4_POST_BIND, SEC_ATTACHABLE), 9331 SEC_DEF("cgroup/post_bind6", CGROUP_SOCK, BPF_CGROUP_INET6_POST_BIND, SEC_ATTACHABLE), 9332 SEC_DEF("cgroup/bind4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_BIND, SEC_ATTACHABLE), 9333 SEC_DEF("cgroup/bind6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_BIND, SEC_ATTACHABLE), 9334 SEC_DEF("cgroup/connect4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_CONNECT, SEC_ATTACHABLE), 9335 SEC_DEF("cgroup/connect6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_CONNECT, SEC_ATTACHABLE), 9336 SEC_DEF("cgroup/connect_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_CONNECT, SEC_ATTACHABLE), 9337 SEC_DEF("cgroup/sendmsg4", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_SENDMSG, SEC_ATTACHABLE), 9338 SEC_DEF("cgroup/sendmsg6", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_SENDMSG, SEC_ATTACHABLE), 9339 SEC_DEF("cgroup/sendmsg_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_SENDMSG, SEC_ATTACHABLE), 9340 SEC_DEF("cgroup/recvmsg4", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_RECVMSG, SEC_ATTACHABLE), 9341 SEC_DEF("cgroup/recvmsg6", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_RECVMSG, SEC_ATTACHABLE), 9342 SEC_DEF("cgroup/recvmsg_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_RECVMSG, SEC_ATTACHABLE), 9343 SEC_DEF("cgroup/getpeername4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETPEERNAME, SEC_ATTACHABLE), 9344 SEC_DEF("cgroup/getpeername6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETPEERNAME, SEC_ATTACHABLE), 9345 SEC_DEF("cgroup/getpeername_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_GETPEERNAME, SEC_ATTACHABLE), 9346 SEC_DEF("cgroup/getsockname4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETSOCKNAME, SEC_ATTACHABLE), 9347 SEC_DEF("cgroup/getsockname6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETSOCKNAME, SEC_ATTACHABLE), 9348 SEC_DEF("cgroup/getsockname_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_GETSOCKNAME, SEC_ATTACHABLE), 9349 SEC_DEF("cgroup/sysctl", CGROUP_SYSCTL, BPF_CGROUP_SYSCTL, SEC_ATTACHABLE), 9350 SEC_DEF("cgroup/getsockopt", CGROUP_SOCKOPT, BPF_CGROUP_GETSOCKOPT, SEC_ATTACHABLE), 9351 SEC_DEF("cgroup/setsockopt", CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT, SEC_ATTACHABLE), 9352 SEC_DEF("cgroup/dev", CGROUP_DEVICE, BPF_CGROUP_DEVICE, SEC_ATTACHABLE_OPT), 9353 SEC_DEF("struct_ops+", STRUCT_OPS, 0, SEC_NONE), 9354 SEC_DEF("struct_ops.s+", STRUCT_OPS, 0, SEC_SLEEPABLE), 9355 SEC_DEF("sk_lookup", SK_LOOKUP, BPF_SK_LOOKUP, SEC_ATTACHABLE), 9356 SEC_DEF("netfilter", NETFILTER, BPF_NETFILTER, SEC_NONE), 9357 }; 9358 9359 int libbpf_register_prog_handler(const char *sec, 9360 enum bpf_prog_type prog_type, 9361 enum bpf_attach_type exp_attach_type, 9362 const struct libbpf_prog_handler_opts *opts) 9363 { 9364 struct bpf_sec_def *sec_def; 9365 9366 if (!OPTS_VALID(opts, libbpf_prog_handler_opts)) 9367 return libbpf_err(-EINVAL); 9368 9369 if (last_custom_sec_def_handler_id == INT_MAX) /* prevent overflow */ 9370 return libbpf_err(-E2BIG); 9371 9372 if (sec) { 9373 sec_def = libbpf_reallocarray(custom_sec_defs, custom_sec_def_cnt + 1, 9374 sizeof(*sec_def)); 9375 if (!sec_def) 9376 return libbpf_err(-ENOMEM); 9377 9378 custom_sec_defs = sec_def; 9379 sec_def = &custom_sec_defs[custom_sec_def_cnt]; 9380 } else { 9381 if (has_custom_fallback_def) 9382 return libbpf_err(-EBUSY); 9383 9384 sec_def = &custom_fallback_def; 9385 } 9386 9387 sec_def->sec = sec ? strdup(sec) : NULL; 9388 if (sec && !sec_def->sec) 9389 return libbpf_err(-ENOMEM); 9390 9391 sec_def->prog_type = prog_type; 9392 sec_def->expected_attach_type = exp_attach_type; 9393 sec_def->cookie = OPTS_GET(opts, cookie, 0); 9394 9395 sec_def->prog_setup_fn = OPTS_GET(opts, prog_setup_fn, NULL); 9396 sec_def->prog_prepare_load_fn = OPTS_GET(opts, prog_prepare_load_fn, NULL); 9397 sec_def->prog_attach_fn = OPTS_GET(opts, prog_attach_fn, NULL); 9398 9399 sec_def->handler_id = ++last_custom_sec_def_handler_id; 9400 9401 if (sec) 9402 custom_sec_def_cnt++; 9403 else 9404 has_custom_fallback_def = true; 9405 9406 return sec_def->handler_id; 9407 } 9408 9409 int libbpf_unregister_prog_handler(int handler_id) 9410 { 9411 struct bpf_sec_def *sec_defs; 9412 int i; 9413 9414 if (handler_id <= 0) 9415 return libbpf_err(-EINVAL); 9416 9417 if (has_custom_fallback_def && custom_fallback_def.handler_id == handler_id) { 9418 memset(&custom_fallback_def, 0, sizeof(custom_fallback_def)); 9419 has_custom_fallback_def = false; 9420 return 0; 9421 } 9422 9423 for (i = 0; i < custom_sec_def_cnt; i++) { 9424 if (custom_sec_defs[i].handler_id == handler_id) 9425 break; 9426 } 9427 9428 if (i == custom_sec_def_cnt) 9429 return libbpf_err(-ENOENT); 9430 9431 free(custom_sec_defs[i].sec); 9432 for (i = i + 1; i < custom_sec_def_cnt; i++) 9433 custom_sec_defs[i - 1] = custom_sec_defs[i]; 9434 custom_sec_def_cnt--; 9435 9436 /* try to shrink the array, but it's ok if we couldn't */ 9437 sec_defs = libbpf_reallocarray(custom_sec_defs, custom_sec_def_cnt, sizeof(*sec_defs)); 9438 /* if new count is zero, reallocarray can return a valid NULL result; 9439 * in this case the previous pointer will be freed, so we *have to* 9440 * reassign old pointer to the new value (even if it's NULL) 9441 */ 9442 if (sec_defs || custom_sec_def_cnt == 0) 9443 custom_sec_defs = sec_defs; 9444 9445 return 0; 9446 } 9447 9448 static bool sec_def_matches(const struct bpf_sec_def *sec_def, const char *sec_name) 9449 { 9450 size_t len = strlen(sec_def->sec); 9451 9452 /* "type/" always has to have proper SEC("type/extras") form */ 9453 if (sec_def->sec[len - 1] == '/') { 9454 if (str_has_pfx(sec_name, sec_def->sec)) 9455 return true; 9456 return false; 9457 } 9458 9459 /* "type+" means it can be either exact SEC("type") or 9460 * well-formed SEC("type/extras") with proper '/' separator 9461 */ 9462 if (sec_def->sec[len - 1] == '+') { 9463 len--; 9464 /* not even a prefix */ 9465 if (strncmp(sec_name, sec_def->sec, len) != 0) 9466 return false; 9467 /* exact match or has '/' separator */ 9468 if (sec_name[len] == '\0' || sec_name[len] == '/') 9469 return true; 9470 return false; 9471 } 9472 9473 return strcmp(sec_name, sec_def->sec) == 0; 9474 } 9475 9476 static const struct bpf_sec_def *find_sec_def(const char *sec_name) 9477 { 9478 const struct bpf_sec_def *sec_def; 9479 int i, n; 9480 9481 n = custom_sec_def_cnt; 9482 for (i = 0; i < n; i++) { 9483 sec_def = &custom_sec_defs[i]; 9484 if (sec_def_matches(sec_def, sec_name)) 9485 return sec_def; 9486 } 9487 9488 n = ARRAY_SIZE(section_defs); 9489 for (i = 0; i < n; i++) { 9490 sec_def = §ion_defs[i]; 9491 if (sec_def_matches(sec_def, sec_name)) 9492 return sec_def; 9493 } 9494 9495 if (has_custom_fallback_def) 9496 return &custom_fallback_def; 9497 9498 return NULL; 9499 } 9500 9501 #define MAX_TYPE_NAME_SIZE 32 9502 9503 static char *libbpf_get_type_names(bool attach_type) 9504 { 9505 int i, len = ARRAY_SIZE(section_defs) * MAX_TYPE_NAME_SIZE; 9506 char *buf; 9507 9508 buf = malloc(len); 9509 if (!buf) 9510 return NULL; 9511 9512 buf[0] = '\0'; 9513 /* Forge string buf with all available names */ 9514 for (i = 0; i < ARRAY_SIZE(section_defs); i++) { 9515 const struct bpf_sec_def *sec_def = §ion_defs[i]; 9516 9517 if (attach_type) { 9518 if (sec_def->prog_prepare_load_fn != libbpf_prepare_prog_load) 9519 continue; 9520 9521 if (!(sec_def->cookie & SEC_ATTACHABLE)) 9522 continue; 9523 } 9524 9525 if (strlen(buf) + strlen(section_defs[i].sec) + 2 > len) { 9526 free(buf); 9527 return NULL; 9528 } 9529 strcat(buf, " "); 9530 strcat(buf, section_defs[i].sec); 9531 } 9532 9533 return buf; 9534 } 9535 9536 int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type, 9537 enum bpf_attach_type *expected_attach_type) 9538 { 9539 const struct bpf_sec_def *sec_def; 9540 char *type_names; 9541 9542 if (!name) 9543 return libbpf_err(-EINVAL); 9544 9545 sec_def = find_sec_def(name); 9546 if (sec_def) { 9547 *prog_type = sec_def->prog_type; 9548 *expected_attach_type = sec_def->expected_attach_type; 9549 return 0; 9550 } 9551 9552 pr_debug("failed to guess program type from ELF section '%s'\n", name); 9553 type_names = libbpf_get_type_names(false); 9554 if (type_names != NULL) { 9555 pr_debug("supported section(type) names are:%s\n", type_names); 9556 free(type_names); 9557 } 9558 9559 return libbpf_err(-ESRCH); 9560 } 9561 9562 const char *libbpf_bpf_attach_type_str(enum bpf_attach_type t) 9563 { 9564 if (t < 0 || t >= ARRAY_SIZE(attach_type_name)) 9565 return NULL; 9566 9567 return attach_type_name[t]; 9568 } 9569 9570 const char *libbpf_bpf_link_type_str(enum bpf_link_type t) 9571 { 9572 if (t < 0 || t >= ARRAY_SIZE(link_type_name)) 9573 return NULL; 9574 9575 return link_type_name[t]; 9576 } 9577 9578 const char *libbpf_bpf_map_type_str(enum bpf_map_type t) 9579 { 9580 if (t < 0 || t >= ARRAY_SIZE(map_type_name)) 9581 return NULL; 9582 9583 return map_type_name[t]; 9584 } 9585 9586 const char *libbpf_bpf_prog_type_str(enum bpf_prog_type t) 9587 { 9588 if (t < 0 || t >= ARRAY_SIZE(prog_type_name)) 9589 return NULL; 9590 9591 return prog_type_name[t]; 9592 } 9593 9594 static struct bpf_map *find_struct_ops_map_by_offset(struct bpf_object *obj, 9595 int sec_idx, 9596 size_t offset) 9597 { 9598 struct bpf_map *map; 9599 size_t i; 9600 9601 for (i = 0; i < obj->nr_maps; i++) { 9602 map = &obj->maps[i]; 9603 if (!bpf_map__is_struct_ops(map)) 9604 continue; 9605 if (map->sec_idx == sec_idx && 9606 map->sec_offset <= offset && 9607 offset - map->sec_offset < map->def.value_size) 9608 return map; 9609 } 9610 9611 return NULL; 9612 } 9613 9614 /* Collect the reloc from ELF, populate the st_ops->progs[], and update 9615 * st_ops->data for shadow type. 9616 */ 9617 static int bpf_object__collect_st_ops_relos(struct bpf_object *obj, 9618 Elf64_Shdr *shdr, Elf_Data *data) 9619 { 9620 const struct btf_member *member; 9621 struct bpf_struct_ops *st_ops; 9622 struct bpf_program *prog; 9623 unsigned int shdr_idx; 9624 const struct btf *btf; 9625 struct bpf_map *map; 9626 unsigned int moff, insn_idx; 9627 const char *name; 9628 __u32 member_idx; 9629 Elf64_Sym *sym; 9630 Elf64_Rel *rel; 9631 int i, nrels; 9632 9633 btf = obj->btf; 9634 nrels = shdr->sh_size / shdr->sh_entsize; 9635 for (i = 0; i < nrels; i++) { 9636 rel = elf_rel_by_idx(data, i); 9637 if (!rel) { 9638 pr_warn("struct_ops reloc: failed to get %d reloc\n", i); 9639 return -LIBBPF_ERRNO__FORMAT; 9640 } 9641 9642 sym = elf_sym_by_idx(obj, ELF64_R_SYM(rel->r_info)); 9643 if (!sym) { 9644 pr_warn("struct_ops reloc: symbol %zx not found\n", 9645 (size_t)ELF64_R_SYM(rel->r_info)); 9646 return -LIBBPF_ERRNO__FORMAT; 9647 } 9648 9649 name = elf_sym_str(obj, sym->st_name) ?: "<?>"; 9650 map = find_struct_ops_map_by_offset(obj, shdr->sh_info, rel->r_offset); 9651 if (!map) { 9652 pr_warn("struct_ops reloc: cannot find map at rel->r_offset %zu\n", 9653 (size_t)rel->r_offset); 9654 return -EINVAL; 9655 } 9656 9657 moff = rel->r_offset - map->sec_offset; 9658 shdr_idx = sym->st_shndx; 9659 st_ops = map->st_ops; 9660 pr_debug("struct_ops reloc %s: for %lld value %lld shdr_idx %u rel->r_offset %zu map->sec_offset %zu name %d (\'%s\')\n", 9661 map->name, 9662 (long long)(rel->r_info >> 32), 9663 (long long)sym->st_value, 9664 shdr_idx, (size_t)rel->r_offset, 9665 map->sec_offset, sym->st_name, name); 9666 9667 if (shdr_idx >= SHN_LORESERVE) { 9668 pr_warn("struct_ops reloc %s: rel->r_offset %zu shdr_idx %u unsupported non-static function\n", 9669 map->name, (size_t)rel->r_offset, shdr_idx); 9670 return -LIBBPF_ERRNO__RELOC; 9671 } 9672 if (sym->st_value % BPF_INSN_SZ) { 9673 pr_warn("struct_ops reloc %s: invalid target program offset %llu\n", 9674 map->name, (unsigned long long)sym->st_value); 9675 return -LIBBPF_ERRNO__FORMAT; 9676 } 9677 insn_idx = sym->st_value / BPF_INSN_SZ; 9678 9679 member = find_member_by_offset(st_ops->type, moff * 8); 9680 if (!member) { 9681 pr_warn("struct_ops reloc %s: cannot find member at moff %u\n", 9682 map->name, moff); 9683 return -EINVAL; 9684 } 9685 member_idx = member - btf_members(st_ops->type); 9686 name = btf__name_by_offset(btf, member->name_off); 9687 9688 if (!resolve_func_ptr(btf, member->type, NULL)) { 9689 pr_warn("struct_ops reloc %s: cannot relocate non func ptr %s\n", 9690 map->name, name); 9691 return -EINVAL; 9692 } 9693 9694 prog = find_prog_by_sec_insn(obj, shdr_idx, insn_idx); 9695 if (!prog) { 9696 pr_warn("struct_ops reloc %s: cannot find prog at shdr_idx %u to relocate func ptr %s\n", 9697 map->name, shdr_idx, name); 9698 return -EINVAL; 9699 } 9700 9701 /* prevent the use of BPF prog with invalid type */ 9702 if (prog->type != BPF_PROG_TYPE_STRUCT_OPS) { 9703 pr_warn("struct_ops reloc %s: prog %s is not struct_ops BPF program\n", 9704 map->name, prog->name); 9705 return -EINVAL; 9706 } 9707 9708 st_ops->progs[member_idx] = prog; 9709 9710 /* st_ops->data will be exposed to users, being returned by 9711 * bpf_map__initial_value() as a pointer to the shadow 9712 * type. All function pointers in the original struct type 9713 * should be converted to a pointer to struct bpf_program 9714 * in the shadow type. 9715 */ 9716 *((struct bpf_program **)(st_ops->data + moff)) = prog; 9717 } 9718 9719 return 0; 9720 } 9721 9722 #define BTF_TRACE_PREFIX "btf_trace_" 9723 #define BTF_LSM_PREFIX "bpf_lsm_" 9724 #define BTF_ITER_PREFIX "bpf_iter_" 9725 #define BTF_MAX_NAME_SIZE 128 9726 9727 void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type, 9728 const char **prefix, int *kind) 9729 { 9730 switch (attach_type) { 9731 case BPF_TRACE_RAW_TP: 9732 *prefix = BTF_TRACE_PREFIX; 9733 *kind = BTF_KIND_TYPEDEF; 9734 break; 9735 case BPF_LSM_MAC: 9736 case BPF_LSM_CGROUP: 9737 *prefix = BTF_LSM_PREFIX; 9738 *kind = BTF_KIND_FUNC; 9739 break; 9740 case BPF_TRACE_ITER: 9741 *prefix = BTF_ITER_PREFIX; 9742 *kind = BTF_KIND_FUNC; 9743 break; 9744 default: 9745 *prefix = ""; 9746 *kind = BTF_KIND_FUNC; 9747 } 9748 } 9749 9750 static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix, 9751 const char *name, __u32 kind) 9752 { 9753 char btf_type_name[BTF_MAX_NAME_SIZE]; 9754 int ret; 9755 9756 ret = snprintf(btf_type_name, sizeof(btf_type_name), 9757 "%s%s", prefix, name); 9758 /* snprintf returns the number of characters written excluding the 9759 * terminating null. So, if >= BTF_MAX_NAME_SIZE are written, it 9760 * indicates truncation. 9761 */ 9762 if (ret < 0 || ret >= sizeof(btf_type_name)) 9763 return -ENAMETOOLONG; 9764 return btf__find_by_name_kind(btf, btf_type_name, kind); 9765 } 9766 9767 static inline int find_attach_btf_id(struct btf *btf, const char *name, 9768 enum bpf_attach_type attach_type) 9769 { 9770 const char *prefix; 9771 int kind; 9772 9773 btf_get_kernel_prefix_kind(attach_type, &prefix, &kind); 9774 return find_btf_by_prefix_kind(btf, prefix, name, kind); 9775 } 9776 9777 int libbpf_find_vmlinux_btf_id(const char *name, 9778 enum bpf_attach_type attach_type) 9779 { 9780 struct btf *btf; 9781 int err; 9782 9783 btf = btf__load_vmlinux_btf(); 9784 err = libbpf_get_error(btf); 9785 if (err) { 9786 pr_warn("vmlinux BTF is not found\n"); 9787 return libbpf_err(err); 9788 } 9789 9790 err = find_attach_btf_id(btf, name, attach_type); 9791 if (err <= 0) 9792 pr_warn("%s is not found in vmlinux BTF\n", name); 9793 9794 btf__free(btf); 9795 return libbpf_err(err); 9796 } 9797 9798 static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd) 9799 { 9800 struct bpf_prog_info info; 9801 __u32 info_len = sizeof(info); 9802 struct btf *btf; 9803 int err; 9804 9805 memset(&info, 0, info_len); 9806 err = bpf_prog_get_info_by_fd(attach_prog_fd, &info, &info_len); 9807 if (err) { 9808 pr_warn("failed bpf_prog_get_info_by_fd for FD %d: %d\n", 9809 attach_prog_fd, err); 9810 return err; 9811 } 9812 9813 err = -EINVAL; 9814 if (!info.btf_id) { 9815 pr_warn("The target program doesn't have BTF\n"); 9816 goto out; 9817 } 9818 btf = btf__load_from_kernel_by_id(info.btf_id); 9819 err = libbpf_get_error(btf); 9820 if (err) { 9821 pr_warn("Failed to get BTF %d of the program: %d\n", info.btf_id, err); 9822 goto out; 9823 } 9824 err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC); 9825 btf__free(btf); 9826 if (err <= 0) { 9827 pr_warn("%s is not found in prog's BTF\n", name); 9828 goto out; 9829 } 9830 out: 9831 return err; 9832 } 9833 9834 static int find_kernel_btf_id(struct bpf_object *obj, const char *attach_name, 9835 enum bpf_attach_type attach_type, 9836 int *btf_obj_fd, int *btf_type_id) 9837 { 9838 int ret, i; 9839 9840 ret = find_attach_btf_id(obj->btf_vmlinux, attach_name, attach_type); 9841 if (ret > 0) { 9842 *btf_obj_fd = 0; /* vmlinux BTF */ 9843 *btf_type_id = ret; 9844 return 0; 9845 } 9846 if (ret != -ENOENT) 9847 return ret; 9848 9849 ret = load_module_btfs(obj); 9850 if (ret) 9851 return ret; 9852 9853 for (i = 0; i < obj->btf_module_cnt; i++) { 9854 const struct module_btf *mod = &obj->btf_modules[i]; 9855 9856 ret = find_attach_btf_id(mod->btf, attach_name, attach_type); 9857 if (ret > 0) { 9858 *btf_obj_fd = mod->fd; 9859 *btf_type_id = ret; 9860 return 0; 9861 } 9862 if (ret == -ENOENT) 9863 continue; 9864 9865 return ret; 9866 } 9867 9868 return -ESRCH; 9869 } 9870 9871 static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attach_name, 9872 int *btf_obj_fd, int *btf_type_id) 9873 { 9874 enum bpf_attach_type attach_type = prog->expected_attach_type; 9875 __u32 attach_prog_fd = prog->attach_prog_fd; 9876 int err = 0; 9877 9878 /* BPF program's BTF ID */ 9879 if (prog->type == BPF_PROG_TYPE_EXT || attach_prog_fd) { 9880 if (!attach_prog_fd) { 9881 pr_warn("prog '%s': attach program FD is not set\n", prog->name); 9882 return -EINVAL; 9883 } 9884 err = libbpf_find_prog_btf_id(attach_name, attach_prog_fd); 9885 if (err < 0) { 9886 pr_warn("prog '%s': failed to find BPF program (FD %d) BTF ID for '%s': %d\n", 9887 prog->name, attach_prog_fd, attach_name, err); 9888 return err; 9889 } 9890 *btf_obj_fd = 0; 9891 *btf_type_id = err; 9892 return 0; 9893 } 9894 9895 /* kernel/module BTF ID */ 9896 if (prog->obj->gen_loader) { 9897 bpf_gen__record_attach_target(prog->obj->gen_loader, attach_name, attach_type); 9898 *btf_obj_fd = 0; 9899 *btf_type_id = 1; 9900 } else { 9901 err = find_kernel_btf_id(prog->obj, attach_name, 9902 attach_type, btf_obj_fd, 9903 btf_type_id); 9904 } 9905 if (err) { 9906 pr_warn("prog '%s': failed to find kernel BTF type ID of '%s': %d\n", 9907 prog->name, attach_name, err); 9908 return err; 9909 } 9910 return 0; 9911 } 9912 9913 int libbpf_attach_type_by_name(const char *name, 9914 enum bpf_attach_type *attach_type) 9915 { 9916 char *type_names; 9917 const struct bpf_sec_def *sec_def; 9918 9919 if (!name) 9920 return libbpf_err(-EINVAL); 9921 9922 sec_def = find_sec_def(name); 9923 if (!sec_def) { 9924 pr_debug("failed to guess attach type based on ELF section name '%s'\n", name); 9925 type_names = libbpf_get_type_names(true); 9926 if (type_names != NULL) { 9927 pr_debug("attachable section(type) names are:%s\n", type_names); 9928 free(type_names); 9929 } 9930 9931 return libbpf_err(-EINVAL); 9932 } 9933 9934 if (sec_def->prog_prepare_load_fn != libbpf_prepare_prog_load) 9935 return libbpf_err(-EINVAL); 9936 if (!(sec_def->cookie & SEC_ATTACHABLE)) 9937 return libbpf_err(-EINVAL); 9938 9939 *attach_type = sec_def->expected_attach_type; 9940 return 0; 9941 } 9942 9943 int bpf_map__fd(const struct bpf_map *map) 9944 { 9945 if (!map) 9946 return libbpf_err(-EINVAL); 9947 if (!map_is_created(map)) 9948 return -1; 9949 return map->fd; 9950 } 9951 9952 static bool map_uses_real_name(const struct bpf_map *map) 9953 { 9954 /* Since libbpf started to support custom .data.* and .rodata.* maps, 9955 * their user-visible name differs from kernel-visible name. Users see 9956 * such map's corresponding ELF section name as a map name. 9957 * This check distinguishes .data/.rodata from .data.* and .rodata.* 9958 * maps to know which name has to be returned to the user. 9959 */ 9960 if (map->libbpf_type == LIBBPF_MAP_DATA && strcmp(map->real_name, DATA_SEC) != 0) 9961 return true; 9962 if (map->libbpf_type == LIBBPF_MAP_RODATA && strcmp(map->real_name, RODATA_SEC) != 0) 9963 return true; 9964 return false; 9965 } 9966 9967 const char *bpf_map__name(const struct bpf_map *map) 9968 { 9969 if (!map) 9970 return NULL; 9971 9972 if (map_uses_real_name(map)) 9973 return map->real_name; 9974 9975 return map->name; 9976 } 9977 9978 enum bpf_map_type bpf_map__type(const struct bpf_map *map) 9979 { 9980 return map->def.type; 9981 } 9982 9983 int bpf_map__set_type(struct bpf_map *map, enum bpf_map_type type) 9984 { 9985 if (map_is_created(map)) 9986 return libbpf_err(-EBUSY); 9987 map->def.type = type; 9988 return 0; 9989 } 9990 9991 __u32 bpf_map__map_flags(const struct bpf_map *map) 9992 { 9993 return map->def.map_flags; 9994 } 9995 9996 int bpf_map__set_map_flags(struct bpf_map *map, __u32 flags) 9997 { 9998 if (map_is_created(map)) 9999 return libbpf_err(-EBUSY); 10000 map->def.map_flags = flags; 10001 return 0; 10002 } 10003 10004 __u64 bpf_map__map_extra(const struct bpf_map *map) 10005 { 10006 return map->map_extra; 10007 } 10008 10009 int bpf_map__set_map_extra(struct bpf_map *map, __u64 map_extra) 10010 { 10011 if (map_is_created(map)) 10012 return libbpf_err(-EBUSY); 10013 map->map_extra = map_extra; 10014 return 0; 10015 } 10016 10017 __u32 bpf_map__numa_node(const struct bpf_map *map) 10018 { 10019 return map->numa_node; 10020 } 10021 10022 int bpf_map__set_numa_node(struct bpf_map *map, __u32 numa_node) 10023 { 10024 if (map_is_created(map)) 10025 return libbpf_err(-EBUSY); 10026 map->numa_node = numa_node; 10027 return 0; 10028 } 10029 10030 __u32 bpf_map__key_size(const struct bpf_map *map) 10031 { 10032 return map->def.key_size; 10033 } 10034 10035 int bpf_map__set_key_size(struct bpf_map *map, __u32 size) 10036 { 10037 if (map_is_created(map)) 10038 return libbpf_err(-EBUSY); 10039 map->def.key_size = size; 10040 return 0; 10041 } 10042 10043 __u32 bpf_map__value_size(const struct bpf_map *map) 10044 { 10045 return map->def.value_size; 10046 } 10047 10048 static int map_btf_datasec_resize(struct bpf_map *map, __u32 size) 10049 { 10050 struct btf *btf; 10051 struct btf_type *datasec_type, *var_type; 10052 struct btf_var_secinfo *var; 10053 const struct btf_type *array_type; 10054 const struct btf_array *array; 10055 int vlen, element_sz, new_array_id; 10056 __u32 nr_elements; 10057 10058 /* check btf existence */ 10059 btf = bpf_object__btf(map->obj); 10060 if (!btf) 10061 return -ENOENT; 10062 10063 /* verify map is datasec */ 10064 datasec_type = btf_type_by_id(btf, bpf_map__btf_value_type_id(map)); 10065 if (!btf_is_datasec(datasec_type)) { 10066 pr_warn("map '%s': cannot be resized, map value type is not a datasec\n", 10067 bpf_map__name(map)); 10068 return -EINVAL; 10069 } 10070 10071 /* verify datasec has at least one var */ 10072 vlen = btf_vlen(datasec_type); 10073 if (vlen == 0) { 10074 pr_warn("map '%s': cannot be resized, map value datasec is empty\n", 10075 bpf_map__name(map)); 10076 return -EINVAL; 10077 } 10078 10079 /* verify last var in the datasec is an array */ 10080 var = &btf_var_secinfos(datasec_type)[vlen - 1]; 10081 var_type = btf_type_by_id(btf, var->type); 10082 array_type = skip_mods_and_typedefs(btf, var_type->type, NULL); 10083 if (!btf_is_array(array_type)) { 10084 pr_warn("map '%s': cannot be resized, last var must be an array\n", 10085 bpf_map__name(map)); 10086 return -EINVAL; 10087 } 10088 10089 /* verify request size aligns with array */ 10090 array = btf_array(array_type); 10091 element_sz = btf__resolve_size(btf, array->type); 10092 if (element_sz <= 0 || (size - var->offset) % element_sz != 0) { 10093 pr_warn("map '%s': cannot be resized, element size (%d) doesn't align with new total size (%u)\n", 10094 bpf_map__name(map), element_sz, size); 10095 return -EINVAL; 10096 } 10097 10098 /* create a new array based on the existing array, but with new length */ 10099 nr_elements = (size - var->offset) / element_sz; 10100 new_array_id = btf__add_array(btf, array->index_type, array->type, nr_elements); 10101 if (new_array_id < 0) 10102 return new_array_id; 10103 10104 /* adding a new btf type invalidates existing pointers to btf objects, 10105 * so refresh pointers before proceeding 10106 */ 10107 datasec_type = btf_type_by_id(btf, map->btf_value_type_id); 10108 var = &btf_var_secinfos(datasec_type)[vlen - 1]; 10109 var_type = btf_type_by_id(btf, var->type); 10110 10111 /* finally update btf info */ 10112 datasec_type->size = size; 10113 var->size = size - var->offset; 10114 var_type->type = new_array_id; 10115 10116 return 0; 10117 } 10118 10119 int bpf_map__set_value_size(struct bpf_map *map, __u32 size) 10120 { 10121 if (map->obj->loaded || map->reused) 10122 return libbpf_err(-EBUSY); 10123 10124 if (map->mmaped) { 10125 size_t mmap_old_sz, mmap_new_sz; 10126 int err; 10127 10128 if (map->def.type != BPF_MAP_TYPE_ARRAY) 10129 return -EOPNOTSUPP; 10130 10131 mmap_old_sz = bpf_map_mmap_sz(map); 10132 mmap_new_sz = array_map_mmap_sz(size, map->def.max_entries); 10133 err = bpf_map_mmap_resize(map, mmap_old_sz, mmap_new_sz); 10134 if (err) { 10135 pr_warn("map '%s': failed to resize memory-mapped region: %d\n", 10136 bpf_map__name(map), err); 10137 return err; 10138 } 10139 err = map_btf_datasec_resize(map, size); 10140 if (err && err != -ENOENT) { 10141 pr_warn("map '%s': failed to adjust resized BTF, clearing BTF key/value info: %d\n", 10142 bpf_map__name(map), err); 10143 map->btf_value_type_id = 0; 10144 map->btf_key_type_id = 0; 10145 } 10146 } 10147 10148 map->def.value_size = size; 10149 return 0; 10150 } 10151 10152 __u32 bpf_map__btf_key_type_id(const struct bpf_map *map) 10153 { 10154 return map ? map->btf_key_type_id : 0; 10155 } 10156 10157 __u32 bpf_map__btf_value_type_id(const struct bpf_map *map) 10158 { 10159 return map ? map->btf_value_type_id : 0; 10160 } 10161 10162 int bpf_map__set_initial_value(struct bpf_map *map, 10163 const void *data, size_t size) 10164 { 10165 size_t actual_sz; 10166 10167 if (map->obj->loaded || map->reused) 10168 return libbpf_err(-EBUSY); 10169 10170 if (!map->mmaped || map->libbpf_type == LIBBPF_MAP_KCONFIG) 10171 return libbpf_err(-EINVAL); 10172 10173 if (map->def.type == BPF_MAP_TYPE_ARENA) 10174 actual_sz = map->obj->arena_data_sz; 10175 else 10176 actual_sz = map->def.value_size; 10177 if (size != actual_sz) 10178 return libbpf_err(-EINVAL); 10179 10180 memcpy(map->mmaped, data, size); 10181 return 0; 10182 } 10183 10184 void *bpf_map__initial_value(const struct bpf_map *map, size_t *psize) 10185 { 10186 if (bpf_map__is_struct_ops(map)) { 10187 if (psize) 10188 *psize = map->def.value_size; 10189 return map->st_ops->data; 10190 } 10191 10192 if (!map->mmaped) 10193 return NULL; 10194 10195 if (map->def.type == BPF_MAP_TYPE_ARENA) 10196 *psize = map->obj->arena_data_sz; 10197 else 10198 *psize = map->def.value_size; 10199 10200 return map->mmaped; 10201 } 10202 10203 bool bpf_map__is_internal(const struct bpf_map *map) 10204 { 10205 return map->libbpf_type != LIBBPF_MAP_UNSPEC; 10206 } 10207 10208 __u32 bpf_map__ifindex(const struct bpf_map *map) 10209 { 10210 return map->map_ifindex; 10211 } 10212 10213 int bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex) 10214 { 10215 if (map_is_created(map)) 10216 return libbpf_err(-EBUSY); 10217 map->map_ifindex = ifindex; 10218 return 0; 10219 } 10220 10221 int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd) 10222 { 10223 if (!bpf_map_type__is_map_in_map(map->def.type)) { 10224 pr_warn("error: unsupported map type\n"); 10225 return libbpf_err(-EINVAL); 10226 } 10227 if (map->inner_map_fd != -1) { 10228 pr_warn("error: inner_map_fd already specified\n"); 10229 return libbpf_err(-EINVAL); 10230 } 10231 if (map->inner_map) { 10232 bpf_map__destroy(map->inner_map); 10233 zfree(&map->inner_map); 10234 } 10235 map->inner_map_fd = fd; 10236 return 0; 10237 } 10238 10239 static struct bpf_map * 10240 __bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i) 10241 { 10242 ssize_t idx; 10243 struct bpf_map *s, *e; 10244 10245 if (!obj || !obj->maps) 10246 return errno = EINVAL, NULL; 10247 10248 s = obj->maps; 10249 e = obj->maps + obj->nr_maps; 10250 10251 if ((m < s) || (m >= e)) { 10252 pr_warn("error in %s: map handler doesn't belong to object\n", 10253 __func__); 10254 return errno = EINVAL, NULL; 10255 } 10256 10257 idx = (m - obj->maps) + i; 10258 if (idx >= obj->nr_maps || idx < 0) 10259 return NULL; 10260 return &obj->maps[idx]; 10261 } 10262 10263 struct bpf_map * 10264 bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *prev) 10265 { 10266 if (prev == NULL) 10267 return obj->maps; 10268 10269 return __bpf_map__iter(prev, obj, 1); 10270 } 10271 10272 struct bpf_map * 10273 bpf_object__prev_map(const struct bpf_object *obj, const struct bpf_map *next) 10274 { 10275 if (next == NULL) { 10276 if (!obj->nr_maps) 10277 return NULL; 10278 return obj->maps + obj->nr_maps - 1; 10279 } 10280 10281 return __bpf_map__iter(next, obj, -1); 10282 } 10283 10284 struct bpf_map * 10285 bpf_object__find_map_by_name(const struct bpf_object *obj, const char *name) 10286 { 10287 struct bpf_map *pos; 10288 10289 bpf_object__for_each_map(pos, obj) { 10290 /* if it's a special internal map name (which always starts 10291 * with dot) then check if that special name matches the 10292 * real map name (ELF section name) 10293 */ 10294 if (name[0] == '.') { 10295 if (pos->real_name && strcmp(pos->real_name, name) == 0) 10296 return pos; 10297 continue; 10298 } 10299 /* otherwise map name has to be an exact match */ 10300 if (map_uses_real_name(pos)) { 10301 if (strcmp(pos->real_name, name) == 0) 10302 return pos; 10303 continue; 10304 } 10305 if (strcmp(pos->name, name) == 0) 10306 return pos; 10307 } 10308 return errno = ENOENT, NULL; 10309 } 10310 10311 int 10312 bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name) 10313 { 10314 return bpf_map__fd(bpf_object__find_map_by_name(obj, name)); 10315 } 10316 10317 static int validate_map_op(const struct bpf_map *map, size_t key_sz, 10318 size_t value_sz, bool check_value_sz) 10319 { 10320 if (!map_is_created(map)) /* map is not yet created */ 10321 return -ENOENT; 10322 10323 if (map->def.key_size != key_sz) { 10324 pr_warn("map '%s': unexpected key size %zu provided, expected %u\n", 10325 map->name, key_sz, map->def.key_size); 10326 return -EINVAL; 10327 } 10328 10329 if (map->fd < 0) { 10330 pr_warn("map '%s': can't use BPF map without FD (was it created?)\n", map->name); 10331 return -EINVAL; 10332 } 10333 10334 if (!check_value_sz) 10335 return 0; 10336 10337 switch (map->def.type) { 10338 case BPF_MAP_TYPE_PERCPU_ARRAY: 10339 case BPF_MAP_TYPE_PERCPU_HASH: 10340 case BPF_MAP_TYPE_LRU_PERCPU_HASH: 10341 case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE: { 10342 int num_cpu = libbpf_num_possible_cpus(); 10343 size_t elem_sz = roundup(map->def.value_size, 8); 10344 10345 if (value_sz != num_cpu * elem_sz) { 10346 pr_warn("map '%s': unexpected value size %zu provided for per-CPU map, expected %d * %zu = %zd\n", 10347 map->name, value_sz, num_cpu, elem_sz, num_cpu * elem_sz); 10348 return -EINVAL; 10349 } 10350 break; 10351 } 10352 default: 10353 if (map->def.value_size != value_sz) { 10354 pr_warn("map '%s': unexpected value size %zu provided, expected %u\n", 10355 map->name, value_sz, map->def.value_size); 10356 return -EINVAL; 10357 } 10358 break; 10359 } 10360 return 0; 10361 } 10362 10363 int bpf_map__lookup_elem(const struct bpf_map *map, 10364 const void *key, size_t key_sz, 10365 void *value, size_t value_sz, __u64 flags) 10366 { 10367 int err; 10368 10369 err = validate_map_op(map, key_sz, value_sz, true); 10370 if (err) 10371 return libbpf_err(err); 10372 10373 return bpf_map_lookup_elem_flags(map->fd, key, value, flags); 10374 } 10375 10376 int bpf_map__update_elem(const struct bpf_map *map, 10377 const void *key, size_t key_sz, 10378 const void *value, size_t value_sz, __u64 flags) 10379 { 10380 int err; 10381 10382 err = validate_map_op(map, key_sz, value_sz, true); 10383 if (err) 10384 return libbpf_err(err); 10385 10386 return bpf_map_update_elem(map->fd, key, value, flags); 10387 } 10388 10389 int bpf_map__delete_elem(const struct bpf_map *map, 10390 const void *key, size_t key_sz, __u64 flags) 10391 { 10392 int err; 10393 10394 err = validate_map_op(map, key_sz, 0, false /* check_value_sz */); 10395 if (err) 10396 return libbpf_err(err); 10397 10398 return bpf_map_delete_elem_flags(map->fd, key, flags); 10399 } 10400 10401 int bpf_map__lookup_and_delete_elem(const struct bpf_map *map, 10402 const void *key, size_t key_sz, 10403 void *value, size_t value_sz, __u64 flags) 10404 { 10405 int err; 10406 10407 err = validate_map_op(map, key_sz, value_sz, true); 10408 if (err) 10409 return libbpf_err(err); 10410 10411 return bpf_map_lookup_and_delete_elem_flags(map->fd, key, value, flags); 10412 } 10413 10414 int bpf_map__get_next_key(const struct bpf_map *map, 10415 const void *cur_key, void *next_key, size_t key_sz) 10416 { 10417 int err; 10418 10419 err = validate_map_op(map, key_sz, 0, false /* check_value_sz */); 10420 if (err) 10421 return libbpf_err(err); 10422 10423 return bpf_map_get_next_key(map->fd, cur_key, next_key); 10424 } 10425 10426 long libbpf_get_error(const void *ptr) 10427 { 10428 if (!IS_ERR_OR_NULL(ptr)) 10429 return 0; 10430 10431 if (IS_ERR(ptr)) 10432 errno = -PTR_ERR(ptr); 10433 10434 /* If ptr == NULL, then errno should be already set by the failing 10435 * API, because libbpf never returns NULL on success and it now always 10436 * sets errno on error. So no extra errno handling for ptr == NULL 10437 * case. 10438 */ 10439 return -errno; 10440 } 10441 10442 /* Replace link's underlying BPF program with the new one */ 10443 int bpf_link__update_program(struct bpf_link *link, struct bpf_program *prog) 10444 { 10445 int ret; 10446 int prog_fd = bpf_program__fd(prog); 10447 10448 if (prog_fd < 0) { 10449 pr_warn("prog '%s': can't use BPF program without FD (was it loaded?)\n", 10450 prog->name); 10451 return libbpf_err(-EINVAL); 10452 } 10453 10454 ret = bpf_link_update(bpf_link__fd(link), prog_fd, NULL); 10455 return libbpf_err_errno(ret); 10456 } 10457 10458 /* Release "ownership" of underlying BPF resource (typically, BPF program 10459 * attached to some BPF hook, e.g., tracepoint, kprobe, etc). Disconnected 10460 * link, when destructed through bpf_link__destroy() call won't attempt to 10461 * detach/unregisted that BPF resource. This is useful in situations where, 10462 * say, attached BPF program has to outlive userspace program that attached it 10463 * in the system. Depending on type of BPF program, though, there might be 10464 * additional steps (like pinning BPF program in BPF FS) necessary to ensure 10465 * exit of userspace program doesn't trigger automatic detachment and clean up 10466 * inside the kernel. 10467 */ 10468 void bpf_link__disconnect(struct bpf_link *link) 10469 { 10470 link->disconnected = true; 10471 } 10472 10473 int bpf_link__destroy(struct bpf_link *link) 10474 { 10475 int err = 0; 10476 10477 if (IS_ERR_OR_NULL(link)) 10478 return 0; 10479 10480 if (!link->disconnected && link->detach) 10481 err = link->detach(link); 10482 if (link->pin_path) 10483 free(link->pin_path); 10484 if (link->dealloc) 10485 link->dealloc(link); 10486 else 10487 free(link); 10488 10489 return libbpf_err(err); 10490 } 10491 10492 int bpf_link__fd(const struct bpf_link *link) 10493 { 10494 return link->fd; 10495 } 10496 10497 const char *bpf_link__pin_path(const struct bpf_link *link) 10498 { 10499 return link->pin_path; 10500 } 10501 10502 static int bpf_link__detach_fd(struct bpf_link *link) 10503 { 10504 return libbpf_err_errno(close(link->fd)); 10505 } 10506 10507 struct bpf_link *bpf_link__open(const char *path) 10508 { 10509 struct bpf_link *link; 10510 int fd; 10511 10512 fd = bpf_obj_get(path); 10513 if (fd < 0) { 10514 fd = -errno; 10515 pr_warn("failed to open link at %s: %d\n", path, fd); 10516 return libbpf_err_ptr(fd); 10517 } 10518 10519 link = calloc(1, sizeof(*link)); 10520 if (!link) { 10521 close(fd); 10522 return libbpf_err_ptr(-ENOMEM); 10523 } 10524 link->detach = &bpf_link__detach_fd; 10525 link->fd = fd; 10526 10527 link->pin_path = strdup(path); 10528 if (!link->pin_path) { 10529 bpf_link__destroy(link); 10530 return libbpf_err_ptr(-ENOMEM); 10531 } 10532 10533 return link; 10534 } 10535 10536 int bpf_link__detach(struct bpf_link *link) 10537 { 10538 return bpf_link_detach(link->fd) ? -errno : 0; 10539 } 10540 10541 int bpf_link__pin(struct bpf_link *link, const char *path) 10542 { 10543 int err; 10544 10545 if (link->pin_path) 10546 return libbpf_err(-EBUSY); 10547 err = make_parent_dir(path); 10548 if (err) 10549 return libbpf_err(err); 10550 err = check_path(path); 10551 if (err) 10552 return libbpf_err(err); 10553 10554 link->pin_path = strdup(path); 10555 if (!link->pin_path) 10556 return libbpf_err(-ENOMEM); 10557 10558 if (bpf_obj_pin(link->fd, link->pin_path)) { 10559 err = -errno; 10560 zfree(&link->pin_path); 10561 return libbpf_err(err); 10562 } 10563 10564 pr_debug("link fd=%d: pinned at %s\n", link->fd, link->pin_path); 10565 return 0; 10566 } 10567 10568 int bpf_link__unpin(struct bpf_link *link) 10569 { 10570 int err; 10571 10572 if (!link->pin_path) 10573 return libbpf_err(-EINVAL); 10574 10575 err = unlink(link->pin_path); 10576 if (err != 0) 10577 return -errno; 10578 10579 pr_debug("link fd=%d: unpinned from %s\n", link->fd, link->pin_path); 10580 zfree(&link->pin_path); 10581 return 0; 10582 } 10583 10584 struct bpf_link_perf { 10585 struct bpf_link link; 10586 int perf_event_fd; 10587 /* legacy kprobe support: keep track of probe identifier and type */ 10588 char *legacy_probe_name; 10589 bool legacy_is_kprobe; 10590 bool legacy_is_retprobe; 10591 }; 10592 10593 static int remove_kprobe_event_legacy(const char *probe_name, bool retprobe); 10594 static int remove_uprobe_event_legacy(const char *probe_name, bool retprobe); 10595 10596 static int bpf_link_perf_detach(struct bpf_link *link) 10597 { 10598 struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link); 10599 int err = 0; 10600 10601 if (ioctl(perf_link->perf_event_fd, PERF_EVENT_IOC_DISABLE, 0) < 0) 10602 err = -errno; 10603 10604 if (perf_link->perf_event_fd != link->fd) 10605 close(perf_link->perf_event_fd); 10606 close(link->fd); 10607 10608 /* legacy uprobe/kprobe needs to be removed after perf event fd closure */ 10609 if (perf_link->legacy_probe_name) { 10610 if (perf_link->legacy_is_kprobe) { 10611 err = remove_kprobe_event_legacy(perf_link->legacy_probe_name, 10612 perf_link->legacy_is_retprobe); 10613 } else { 10614 err = remove_uprobe_event_legacy(perf_link->legacy_probe_name, 10615 perf_link->legacy_is_retprobe); 10616 } 10617 } 10618 10619 return err; 10620 } 10621 10622 static void bpf_link_perf_dealloc(struct bpf_link *link) 10623 { 10624 struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link); 10625 10626 free(perf_link->legacy_probe_name); 10627 free(perf_link); 10628 } 10629 10630 struct bpf_link *bpf_program__attach_perf_event_opts(const struct bpf_program *prog, int pfd, 10631 const struct bpf_perf_event_opts *opts) 10632 { 10633 char errmsg[STRERR_BUFSIZE]; 10634 struct bpf_link_perf *link; 10635 int prog_fd, link_fd = -1, err; 10636 bool force_ioctl_attach; 10637 10638 if (!OPTS_VALID(opts, bpf_perf_event_opts)) 10639 return libbpf_err_ptr(-EINVAL); 10640 10641 if (pfd < 0) { 10642 pr_warn("prog '%s': invalid perf event FD %d\n", 10643 prog->name, pfd); 10644 return libbpf_err_ptr(-EINVAL); 10645 } 10646 prog_fd = bpf_program__fd(prog); 10647 if (prog_fd < 0) { 10648 pr_warn("prog '%s': can't attach BPF program without FD (was it loaded?)\n", 10649 prog->name); 10650 return libbpf_err_ptr(-EINVAL); 10651 } 10652 10653 link = calloc(1, sizeof(*link)); 10654 if (!link) 10655 return libbpf_err_ptr(-ENOMEM); 10656 link->link.detach = &bpf_link_perf_detach; 10657 link->link.dealloc = &bpf_link_perf_dealloc; 10658 link->perf_event_fd = pfd; 10659 10660 force_ioctl_attach = OPTS_GET(opts, force_ioctl_attach, false); 10661 if (kernel_supports(prog->obj, FEAT_PERF_LINK) && !force_ioctl_attach) { 10662 DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_opts, 10663 .perf_event.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0)); 10664 10665 link_fd = bpf_link_create(prog_fd, pfd, BPF_PERF_EVENT, &link_opts); 10666 if (link_fd < 0) { 10667 err = -errno; 10668 pr_warn("prog '%s': failed to create BPF link for perf_event FD %d: %d (%s)\n", 10669 prog->name, pfd, 10670 err, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); 10671 goto err_out; 10672 } 10673 link->link.fd = link_fd; 10674 } else { 10675 if (OPTS_GET(opts, bpf_cookie, 0)) { 10676 pr_warn("prog '%s': user context value is not supported\n", prog->name); 10677 err = -EOPNOTSUPP; 10678 goto err_out; 10679 } 10680 10681 if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) { 10682 err = -errno; 10683 pr_warn("prog '%s': failed to attach to perf_event FD %d: %s\n", 10684 prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); 10685 if (err == -EPROTO) 10686 pr_warn("prog '%s': try add PERF_SAMPLE_CALLCHAIN to or remove exclude_callchain_[kernel|user] from pfd %d\n", 10687 prog->name, pfd); 10688 goto err_out; 10689 } 10690 link->link.fd = pfd; 10691 } 10692 if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) { 10693 err = -errno; 10694 pr_warn("prog '%s': failed to enable perf_event FD %d: %s\n", 10695 prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); 10696 goto err_out; 10697 } 10698 10699 return &link->link; 10700 err_out: 10701 if (link_fd >= 0) 10702 close(link_fd); 10703 free(link); 10704 return libbpf_err_ptr(err); 10705 } 10706 10707 struct bpf_link *bpf_program__attach_perf_event(const struct bpf_program *prog, int pfd) 10708 { 10709 return bpf_program__attach_perf_event_opts(prog, pfd, NULL); 10710 } 10711 10712 /* 10713 * this function is expected to parse integer in the range of [0, 2^31-1] from 10714 * given file using scanf format string fmt. If actual parsed value is 10715 * negative, the result might be indistinguishable from error 10716 */ 10717 static int parse_uint_from_file(const char *file, const char *fmt) 10718 { 10719 char buf[STRERR_BUFSIZE]; 10720 int err, ret; 10721 FILE *f; 10722 10723 f = fopen(file, "re"); 10724 if (!f) { 10725 err = -errno; 10726 pr_debug("failed to open '%s': %s\n", file, 10727 libbpf_strerror_r(err, buf, sizeof(buf))); 10728 return err; 10729 } 10730 err = fscanf(f, fmt, &ret); 10731 if (err != 1) { 10732 err = err == EOF ? -EIO : -errno; 10733 pr_debug("failed to parse '%s': %s\n", file, 10734 libbpf_strerror_r(err, buf, sizeof(buf))); 10735 fclose(f); 10736 return err; 10737 } 10738 fclose(f); 10739 return ret; 10740 } 10741 10742 static int determine_kprobe_perf_type(void) 10743 { 10744 const char *file = "/sys/bus/event_source/devices/kprobe/type"; 10745 10746 return parse_uint_from_file(file, "%d\n"); 10747 } 10748 10749 static int determine_uprobe_perf_type(void) 10750 { 10751 const char *file = "/sys/bus/event_source/devices/uprobe/type"; 10752 10753 return parse_uint_from_file(file, "%d\n"); 10754 } 10755 10756 static int determine_kprobe_retprobe_bit(void) 10757 { 10758 const char *file = "/sys/bus/event_source/devices/kprobe/format/retprobe"; 10759 10760 return parse_uint_from_file(file, "config:%d\n"); 10761 } 10762 10763 static int determine_uprobe_retprobe_bit(void) 10764 { 10765 const char *file = "/sys/bus/event_source/devices/uprobe/format/retprobe"; 10766 10767 return parse_uint_from_file(file, "config:%d\n"); 10768 } 10769 10770 #define PERF_UPROBE_REF_CTR_OFFSET_BITS 32 10771 #define PERF_UPROBE_REF_CTR_OFFSET_SHIFT 32 10772 10773 static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name, 10774 uint64_t offset, int pid, size_t ref_ctr_off) 10775 { 10776 const size_t attr_sz = sizeof(struct perf_event_attr); 10777 struct perf_event_attr attr; 10778 char errmsg[STRERR_BUFSIZE]; 10779 int type, pfd; 10780 10781 if ((__u64)ref_ctr_off >= (1ULL << PERF_UPROBE_REF_CTR_OFFSET_BITS)) 10782 return -EINVAL; 10783 10784 memset(&attr, 0, attr_sz); 10785 10786 type = uprobe ? determine_uprobe_perf_type() 10787 : determine_kprobe_perf_type(); 10788 if (type < 0) { 10789 pr_warn("failed to determine %s perf type: %s\n", 10790 uprobe ? "uprobe" : "kprobe", 10791 libbpf_strerror_r(type, errmsg, sizeof(errmsg))); 10792 return type; 10793 } 10794 if (retprobe) { 10795 int bit = uprobe ? determine_uprobe_retprobe_bit() 10796 : determine_kprobe_retprobe_bit(); 10797 10798 if (bit < 0) { 10799 pr_warn("failed to determine %s retprobe bit: %s\n", 10800 uprobe ? "uprobe" : "kprobe", 10801 libbpf_strerror_r(bit, errmsg, sizeof(errmsg))); 10802 return bit; 10803 } 10804 attr.config |= 1 << bit; 10805 } 10806 attr.size = attr_sz; 10807 attr.type = type; 10808 attr.config |= (__u64)ref_ctr_off << PERF_UPROBE_REF_CTR_OFFSET_SHIFT; 10809 attr.config1 = ptr_to_u64(name); /* kprobe_func or uprobe_path */ 10810 attr.config2 = offset; /* kprobe_addr or probe_offset */ 10811 10812 /* pid filter is meaningful only for uprobes */ 10813 pfd = syscall(__NR_perf_event_open, &attr, 10814 pid < 0 ? -1 : pid /* pid */, 10815 pid == -1 ? 0 : -1 /* cpu */, 10816 -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC); 10817 return pfd >= 0 ? pfd : -errno; 10818 } 10819 10820 static int append_to_file(const char *file, const char *fmt, ...) 10821 { 10822 int fd, n, err = 0; 10823 va_list ap; 10824 char buf[1024]; 10825 10826 va_start(ap, fmt); 10827 n = vsnprintf(buf, sizeof(buf), fmt, ap); 10828 va_end(ap); 10829 10830 if (n < 0 || n >= sizeof(buf)) 10831 return -EINVAL; 10832 10833 fd = open(file, O_WRONLY | O_APPEND | O_CLOEXEC, 0); 10834 if (fd < 0) 10835 return -errno; 10836 10837 if (write(fd, buf, n) < 0) 10838 err = -errno; 10839 10840 close(fd); 10841 return err; 10842 } 10843 10844 #define DEBUGFS "/sys/kernel/debug/tracing" 10845 #define TRACEFS "/sys/kernel/tracing" 10846 10847 static bool use_debugfs(void) 10848 { 10849 static int has_debugfs = -1; 10850 10851 if (has_debugfs < 0) 10852 has_debugfs = faccessat(AT_FDCWD, DEBUGFS, F_OK, AT_EACCESS) == 0; 10853 10854 return has_debugfs == 1; 10855 } 10856 10857 static const char *tracefs_path(void) 10858 { 10859 return use_debugfs() ? DEBUGFS : TRACEFS; 10860 } 10861 10862 static const char *tracefs_kprobe_events(void) 10863 { 10864 return use_debugfs() ? DEBUGFS"/kprobe_events" : TRACEFS"/kprobe_events"; 10865 } 10866 10867 static const char *tracefs_uprobe_events(void) 10868 { 10869 return use_debugfs() ? DEBUGFS"/uprobe_events" : TRACEFS"/uprobe_events"; 10870 } 10871 10872 static const char *tracefs_available_filter_functions(void) 10873 { 10874 return use_debugfs() ? DEBUGFS"/available_filter_functions" 10875 : TRACEFS"/available_filter_functions"; 10876 } 10877 10878 static const char *tracefs_available_filter_functions_addrs(void) 10879 { 10880 return use_debugfs() ? DEBUGFS"/available_filter_functions_addrs" 10881 : TRACEFS"/available_filter_functions_addrs"; 10882 } 10883 10884 static void gen_kprobe_legacy_event_name(char *buf, size_t buf_sz, 10885 const char *kfunc_name, size_t offset) 10886 { 10887 static int index = 0; 10888 int i; 10889 10890 snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx_%d", getpid(), kfunc_name, offset, 10891 __sync_fetch_and_add(&index, 1)); 10892 10893 /* sanitize binary_path in the probe name */ 10894 for (i = 0; buf[i]; i++) { 10895 if (!isalnum(buf[i])) 10896 buf[i] = '_'; 10897 } 10898 } 10899 10900 static int add_kprobe_event_legacy(const char *probe_name, bool retprobe, 10901 const char *kfunc_name, size_t offset) 10902 { 10903 return append_to_file(tracefs_kprobe_events(), "%c:%s/%s %s+0x%zx", 10904 retprobe ? 'r' : 'p', 10905 retprobe ? "kretprobes" : "kprobes", 10906 probe_name, kfunc_name, offset); 10907 } 10908 10909 static int remove_kprobe_event_legacy(const char *probe_name, bool retprobe) 10910 { 10911 return append_to_file(tracefs_kprobe_events(), "-:%s/%s", 10912 retprobe ? "kretprobes" : "kprobes", probe_name); 10913 } 10914 10915 static int determine_kprobe_perf_type_legacy(const char *probe_name, bool retprobe) 10916 { 10917 char file[256]; 10918 10919 snprintf(file, sizeof(file), "%s/events/%s/%s/id", 10920 tracefs_path(), retprobe ? "kretprobes" : "kprobes", probe_name); 10921 10922 return parse_uint_from_file(file, "%d\n"); 10923 } 10924 10925 static int perf_event_kprobe_open_legacy(const char *probe_name, bool retprobe, 10926 const char *kfunc_name, size_t offset, int pid) 10927 { 10928 const size_t attr_sz = sizeof(struct perf_event_attr); 10929 struct perf_event_attr attr; 10930 char errmsg[STRERR_BUFSIZE]; 10931 int type, pfd, err; 10932 10933 err = add_kprobe_event_legacy(probe_name, retprobe, kfunc_name, offset); 10934 if (err < 0) { 10935 pr_warn("failed to add legacy kprobe event for '%s+0x%zx': %s\n", 10936 kfunc_name, offset, 10937 libbpf_strerror_r(err, errmsg, sizeof(errmsg))); 10938 return err; 10939 } 10940 type = determine_kprobe_perf_type_legacy(probe_name, retprobe); 10941 if (type < 0) { 10942 err = type; 10943 pr_warn("failed to determine legacy kprobe event id for '%s+0x%zx': %s\n", 10944 kfunc_name, offset, 10945 libbpf_strerror_r(err, errmsg, sizeof(errmsg))); 10946 goto err_clean_legacy; 10947 } 10948 10949 memset(&attr, 0, attr_sz); 10950 attr.size = attr_sz; 10951 attr.config = type; 10952 attr.type = PERF_TYPE_TRACEPOINT; 10953 10954 pfd = syscall(__NR_perf_event_open, &attr, 10955 pid < 0 ? -1 : pid, /* pid */ 10956 pid == -1 ? 0 : -1, /* cpu */ 10957 -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC); 10958 if (pfd < 0) { 10959 err = -errno; 10960 pr_warn("legacy kprobe perf_event_open() failed: %s\n", 10961 libbpf_strerror_r(err, errmsg, sizeof(errmsg))); 10962 goto err_clean_legacy; 10963 } 10964 return pfd; 10965 10966 err_clean_legacy: 10967 /* Clear the newly added legacy kprobe_event */ 10968 remove_kprobe_event_legacy(probe_name, retprobe); 10969 return err; 10970 } 10971 10972 static const char *arch_specific_syscall_pfx(void) 10973 { 10974 #if defined(__x86_64__) 10975 return "x64"; 10976 #elif defined(__i386__) 10977 return "ia32"; 10978 #elif defined(__s390x__) 10979 return "s390x"; 10980 #elif defined(__s390__) 10981 return "s390"; 10982 #elif defined(__arm__) 10983 return "arm"; 10984 #elif defined(__aarch64__) 10985 return "arm64"; 10986 #elif defined(__mips__) 10987 return "mips"; 10988 #elif defined(__riscv) 10989 return "riscv"; 10990 #elif defined(__powerpc__) 10991 return "powerpc"; 10992 #elif defined(__powerpc64__) 10993 return "powerpc64"; 10994 #else 10995 return NULL; 10996 #endif 10997 } 10998 10999 int probe_kern_syscall_wrapper(int token_fd) 11000 { 11001 char syscall_name[64]; 11002 const char *ksys_pfx; 11003 11004 ksys_pfx = arch_specific_syscall_pfx(); 11005 if (!ksys_pfx) 11006 return 0; 11007 11008 snprintf(syscall_name, sizeof(syscall_name), "__%s_sys_bpf", ksys_pfx); 11009 11010 if (determine_kprobe_perf_type() >= 0) { 11011 int pfd; 11012 11013 pfd = perf_event_open_probe(false, false, syscall_name, 0, getpid(), 0); 11014 if (pfd >= 0) 11015 close(pfd); 11016 11017 return pfd >= 0 ? 1 : 0; 11018 } else { /* legacy mode */ 11019 char probe_name[128]; 11020 11021 gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name), syscall_name, 0); 11022 if (add_kprobe_event_legacy(probe_name, false, syscall_name, 0) < 0) 11023 return 0; 11024 11025 (void)remove_kprobe_event_legacy(probe_name, false); 11026 return 1; 11027 } 11028 } 11029 11030 struct bpf_link * 11031 bpf_program__attach_kprobe_opts(const struct bpf_program *prog, 11032 const char *func_name, 11033 const struct bpf_kprobe_opts *opts) 11034 { 11035 DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts); 11036 enum probe_attach_mode attach_mode; 11037 char errmsg[STRERR_BUFSIZE]; 11038 char *legacy_probe = NULL; 11039 struct bpf_link *link; 11040 size_t offset; 11041 bool retprobe, legacy; 11042 int pfd, err; 11043 11044 if (!OPTS_VALID(opts, bpf_kprobe_opts)) 11045 return libbpf_err_ptr(-EINVAL); 11046 11047 attach_mode = OPTS_GET(opts, attach_mode, PROBE_ATTACH_MODE_DEFAULT); 11048 retprobe = OPTS_GET(opts, retprobe, false); 11049 offset = OPTS_GET(opts, offset, 0); 11050 pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0); 11051 11052 legacy = determine_kprobe_perf_type() < 0; 11053 switch (attach_mode) { 11054 case PROBE_ATTACH_MODE_LEGACY: 11055 legacy = true; 11056 pe_opts.force_ioctl_attach = true; 11057 break; 11058 case PROBE_ATTACH_MODE_PERF: 11059 if (legacy) 11060 return libbpf_err_ptr(-ENOTSUP); 11061 pe_opts.force_ioctl_attach = true; 11062 break; 11063 case PROBE_ATTACH_MODE_LINK: 11064 if (legacy || !kernel_supports(prog->obj, FEAT_PERF_LINK)) 11065 return libbpf_err_ptr(-ENOTSUP); 11066 break; 11067 case PROBE_ATTACH_MODE_DEFAULT: 11068 break; 11069 default: 11070 return libbpf_err_ptr(-EINVAL); 11071 } 11072 11073 if (!legacy) { 11074 pfd = perf_event_open_probe(false /* uprobe */, retprobe, 11075 func_name, offset, 11076 -1 /* pid */, 0 /* ref_ctr_off */); 11077 } else { 11078 char probe_name[256]; 11079 11080 gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name), 11081 func_name, offset); 11082 11083 legacy_probe = strdup(probe_name); 11084 if (!legacy_probe) 11085 return libbpf_err_ptr(-ENOMEM); 11086 11087 pfd = perf_event_kprobe_open_legacy(legacy_probe, retprobe, func_name, 11088 offset, -1 /* pid */); 11089 } 11090 if (pfd < 0) { 11091 err = -errno; 11092 pr_warn("prog '%s': failed to create %s '%s+0x%zx' perf event: %s\n", 11093 prog->name, retprobe ? "kretprobe" : "kprobe", 11094 func_name, offset, 11095 libbpf_strerror_r(err, errmsg, sizeof(errmsg))); 11096 goto err_out; 11097 } 11098 link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts); 11099 err = libbpf_get_error(link); 11100 if (err) { 11101 close(pfd); 11102 pr_warn("prog '%s': failed to attach to %s '%s+0x%zx': %s\n", 11103 prog->name, retprobe ? "kretprobe" : "kprobe", 11104 func_name, offset, 11105 libbpf_strerror_r(err, errmsg, sizeof(errmsg))); 11106 goto err_clean_legacy; 11107 } 11108 if (legacy) { 11109 struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link); 11110 11111 perf_link->legacy_probe_name = legacy_probe; 11112 perf_link->legacy_is_kprobe = true; 11113 perf_link->legacy_is_retprobe = retprobe; 11114 } 11115 11116 return link; 11117 11118 err_clean_legacy: 11119 if (legacy) 11120 remove_kprobe_event_legacy(legacy_probe, retprobe); 11121 err_out: 11122 free(legacy_probe); 11123 return libbpf_err_ptr(err); 11124 } 11125 11126 struct bpf_link *bpf_program__attach_kprobe(const struct bpf_program *prog, 11127 bool retprobe, 11128 const char *func_name) 11129 { 11130 DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts, 11131 .retprobe = retprobe, 11132 ); 11133 11134 return bpf_program__attach_kprobe_opts(prog, func_name, &opts); 11135 } 11136 11137 struct bpf_link *bpf_program__attach_ksyscall(const struct bpf_program *prog, 11138 const char *syscall_name, 11139 const struct bpf_ksyscall_opts *opts) 11140 { 11141 LIBBPF_OPTS(bpf_kprobe_opts, kprobe_opts); 11142 char func_name[128]; 11143 11144 if (!OPTS_VALID(opts, bpf_ksyscall_opts)) 11145 return libbpf_err_ptr(-EINVAL); 11146 11147 if (kernel_supports(prog->obj, FEAT_SYSCALL_WRAPPER)) { 11148 /* arch_specific_syscall_pfx() should never return NULL here 11149 * because it is guarded by kernel_supports(). However, since 11150 * compiler does not know that we have an explicit conditional 11151 * as well. 11152 */ 11153 snprintf(func_name, sizeof(func_name), "__%s_sys_%s", 11154 arch_specific_syscall_pfx() ? : "", syscall_name); 11155 } else { 11156 snprintf(func_name, sizeof(func_name), "__se_sys_%s", syscall_name); 11157 } 11158 11159 kprobe_opts.retprobe = OPTS_GET(opts, retprobe, false); 11160 kprobe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0); 11161 11162 return bpf_program__attach_kprobe_opts(prog, func_name, &kprobe_opts); 11163 } 11164 11165 /* Adapted from perf/util/string.c */ 11166 bool glob_match(const char *str, const char *pat) 11167 { 11168 while (*str && *pat && *pat != '*') { 11169 if (*pat == '?') { /* Matches any single character */ 11170 str++; 11171 pat++; 11172 continue; 11173 } 11174 if (*str != *pat) 11175 return false; 11176 str++; 11177 pat++; 11178 } 11179 /* Check wild card */ 11180 if (*pat == '*') { 11181 while (*pat == '*') 11182 pat++; 11183 if (!*pat) /* Tail wild card matches all */ 11184 return true; 11185 while (*str) 11186 if (glob_match(str++, pat)) 11187 return true; 11188 } 11189 return !*str && !*pat; 11190 } 11191 11192 struct kprobe_multi_resolve { 11193 const char *pattern; 11194 unsigned long *addrs; 11195 size_t cap; 11196 size_t cnt; 11197 }; 11198 11199 struct avail_kallsyms_data { 11200 char **syms; 11201 size_t cnt; 11202 struct kprobe_multi_resolve *res; 11203 }; 11204 11205 static int avail_func_cmp(const void *a, const void *b) 11206 { 11207 return strcmp(*(const char **)a, *(const char **)b); 11208 } 11209 11210 static int avail_kallsyms_cb(unsigned long long sym_addr, char sym_type, 11211 const char *sym_name, void *ctx) 11212 { 11213 struct avail_kallsyms_data *data = ctx; 11214 struct kprobe_multi_resolve *res = data->res; 11215 int err; 11216 11217 if (!bsearch(&sym_name, data->syms, data->cnt, sizeof(*data->syms), avail_func_cmp)) 11218 return 0; 11219 11220 err = libbpf_ensure_mem((void **)&res->addrs, &res->cap, sizeof(*res->addrs), res->cnt + 1); 11221 if (err) 11222 return err; 11223 11224 res->addrs[res->cnt++] = (unsigned long)sym_addr; 11225 return 0; 11226 } 11227 11228 static int libbpf_available_kallsyms_parse(struct kprobe_multi_resolve *res) 11229 { 11230 const char *available_functions_file = tracefs_available_filter_functions(); 11231 struct avail_kallsyms_data data; 11232 char sym_name[500]; 11233 FILE *f; 11234 int err = 0, ret, i; 11235 char **syms = NULL; 11236 size_t cap = 0, cnt = 0; 11237 11238 f = fopen(available_functions_file, "re"); 11239 if (!f) { 11240 err = -errno; 11241 pr_warn("failed to open %s: %d\n", available_functions_file, err); 11242 return err; 11243 } 11244 11245 while (true) { 11246 char *name; 11247 11248 ret = fscanf(f, "%499s%*[^\n]\n", sym_name); 11249 if (ret == EOF && feof(f)) 11250 break; 11251 11252 if (ret != 1) { 11253 pr_warn("failed to parse available_filter_functions entry: %d\n", ret); 11254 err = -EINVAL; 11255 goto cleanup; 11256 } 11257 11258 if (!glob_match(sym_name, res->pattern)) 11259 continue; 11260 11261 err = libbpf_ensure_mem((void **)&syms, &cap, sizeof(*syms), cnt + 1); 11262 if (err) 11263 goto cleanup; 11264 11265 name = strdup(sym_name); 11266 if (!name) { 11267 err = -errno; 11268 goto cleanup; 11269 } 11270 11271 syms[cnt++] = name; 11272 } 11273 11274 /* no entries found, bail out */ 11275 if (cnt == 0) { 11276 err = -ENOENT; 11277 goto cleanup; 11278 } 11279 11280 /* sort available functions */ 11281 qsort(syms, cnt, sizeof(*syms), avail_func_cmp); 11282 11283 data.syms = syms; 11284 data.res = res; 11285 data.cnt = cnt; 11286 libbpf_kallsyms_parse(avail_kallsyms_cb, &data); 11287 11288 if (res->cnt == 0) 11289 err = -ENOENT; 11290 11291 cleanup: 11292 for (i = 0; i < cnt; i++) 11293 free((char *)syms[i]); 11294 free(syms); 11295 11296 fclose(f); 11297 return err; 11298 } 11299 11300 static bool has_available_filter_functions_addrs(void) 11301 { 11302 return access(tracefs_available_filter_functions_addrs(), R_OK) != -1; 11303 } 11304 11305 static int libbpf_available_kprobes_parse(struct kprobe_multi_resolve *res) 11306 { 11307 const char *available_path = tracefs_available_filter_functions_addrs(); 11308 char sym_name[500]; 11309 FILE *f; 11310 int ret, err = 0; 11311 unsigned long long sym_addr; 11312 11313 f = fopen(available_path, "re"); 11314 if (!f) { 11315 err = -errno; 11316 pr_warn("failed to open %s: %d\n", available_path, err); 11317 return err; 11318 } 11319 11320 while (true) { 11321 ret = fscanf(f, "%llx %499s%*[^\n]\n", &sym_addr, sym_name); 11322 if (ret == EOF && feof(f)) 11323 break; 11324 11325 if (ret != 2) { 11326 pr_warn("failed to parse available_filter_functions_addrs entry: %d\n", 11327 ret); 11328 err = -EINVAL; 11329 goto cleanup; 11330 } 11331 11332 if (!glob_match(sym_name, res->pattern)) 11333 continue; 11334 11335 err = libbpf_ensure_mem((void **)&res->addrs, &res->cap, 11336 sizeof(*res->addrs), res->cnt + 1); 11337 if (err) 11338 goto cleanup; 11339 11340 res->addrs[res->cnt++] = (unsigned long)sym_addr; 11341 } 11342 11343 if (res->cnt == 0) 11344 err = -ENOENT; 11345 11346 cleanup: 11347 fclose(f); 11348 return err; 11349 } 11350 11351 struct bpf_link * 11352 bpf_program__attach_kprobe_multi_opts(const struct bpf_program *prog, 11353 const char *pattern, 11354 const struct bpf_kprobe_multi_opts *opts) 11355 { 11356 LIBBPF_OPTS(bpf_link_create_opts, lopts); 11357 struct kprobe_multi_resolve res = { 11358 .pattern = pattern, 11359 }; 11360 struct bpf_link *link = NULL; 11361 char errmsg[STRERR_BUFSIZE]; 11362 const unsigned long *addrs; 11363 int err, link_fd, prog_fd; 11364 const __u64 *cookies; 11365 const char **syms; 11366 bool retprobe; 11367 size_t cnt; 11368 11369 if (!OPTS_VALID(opts, bpf_kprobe_multi_opts)) 11370 return libbpf_err_ptr(-EINVAL); 11371 11372 prog_fd = bpf_program__fd(prog); 11373 if (prog_fd < 0) { 11374 pr_warn("prog '%s': can't attach BPF program without FD (was it loaded?)\n", 11375 prog->name); 11376 return libbpf_err_ptr(-EINVAL); 11377 } 11378 11379 syms = OPTS_GET(opts, syms, false); 11380 addrs = OPTS_GET(opts, addrs, false); 11381 cnt = OPTS_GET(opts, cnt, false); 11382 cookies = OPTS_GET(opts, cookies, false); 11383 11384 if (!pattern && !addrs && !syms) 11385 return libbpf_err_ptr(-EINVAL); 11386 if (pattern && (addrs || syms || cookies || cnt)) 11387 return libbpf_err_ptr(-EINVAL); 11388 if (!pattern && !cnt) 11389 return libbpf_err_ptr(-EINVAL); 11390 if (addrs && syms) 11391 return libbpf_err_ptr(-EINVAL); 11392 11393 if (pattern) { 11394 if (has_available_filter_functions_addrs()) 11395 err = libbpf_available_kprobes_parse(&res); 11396 else 11397 err = libbpf_available_kallsyms_parse(&res); 11398 if (err) 11399 goto error; 11400 addrs = res.addrs; 11401 cnt = res.cnt; 11402 } 11403 11404 retprobe = OPTS_GET(opts, retprobe, false); 11405 11406 lopts.kprobe_multi.syms = syms; 11407 lopts.kprobe_multi.addrs = addrs; 11408 lopts.kprobe_multi.cookies = cookies; 11409 lopts.kprobe_multi.cnt = cnt; 11410 lopts.kprobe_multi.flags = retprobe ? BPF_F_KPROBE_MULTI_RETURN : 0; 11411 11412 link = calloc(1, sizeof(*link)); 11413 if (!link) { 11414 err = -ENOMEM; 11415 goto error; 11416 } 11417 link->detach = &bpf_link__detach_fd; 11418 11419 link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_KPROBE_MULTI, &lopts); 11420 if (link_fd < 0) { 11421 err = -errno; 11422 pr_warn("prog '%s': failed to attach: %s\n", 11423 prog->name, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); 11424 goto error; 11425 } 11426 link->fd = link_fd; 11427 free(res.addrs); 11428 return link; 11429 11430 error: 11431 free(link); 11432 free(res.addrs); 11433 return libbpf_err_ptr(err); 11434 } 11435 11436 static int attach_kprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link) 11437 { 11438 DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts); 11439 unsigned long offset = 0; 11440 const char *func_name; 11441 char *func; 11442 int n; 11443 11444 *link = NULL; 11445 11446 /* no auto-attach for SEC("kprobe") and SEC("kretprobe") */ 11447 if (strcmp(prog->sec_name, "kprobe") == 0 || strcmp(prog->sec_name, "kretprobe") == 0) 11448 return 0; 11449 11450 opts.retprobe = str_has_pfx(prog->sec_name, "kretprobe/"); 11451 if (opts.retprobe) 11452 func_name = prog->sec_name + sizeof("kretprobe/") - 1; 11453 else 11454 func_name = prog->sec_name + sizeof("kprobe/") - 1; 11455 11456 n = sscanf(func_name, "%m[a-zA-Z0-9_.]+%li", &func, &offset); 11457 if (n < 1) { 11458 pr_warn("kprobe name is invalid: %s\n", func_name); 11459 return -EINVAL; 11460 } 11461 if (opts.retprobe && offset != 0) { 11462 free(func); 11463 pr_warn("kretprobes do not support offset specification\n"); 11464 return -EINVAL; 11465 } 11466 11467 opts.offset = offset; 11468 *link = bpf_program__attach_kprobe_opts(prog, func, &opts); 11469 free(func); 11470 return libbpf_get_error(*link); 11471 } 11472 11473 static int attach_ksyscall(const struct bpf_program *prog, long cookie, struct bpf_link **link) 11474 { 11475 LIBBPF_OPTS(bpf_ksyscall_opts, opts); 11476 const char *syscall_name; 11477 11478 *link = NULL; 11479 11480 /* no auto-attach for SEC("ksyscall") and SEC("kretsyscall") */ 11481 if (strcmp(prog->sec_name, "ksyscall") == 0 || strcmp(prog->sec_name, "kretsyscall") == 0) 11482 return 0; 11483 11484 opts.retprobe = str_has_pfx(prog->sec_name, "kretsyscall/"); 11485 if (opts.retprobe) 11486 syscall_name = prog->sec_name + sizeof("kretsyscall/") - 1; 11487 else 11488 syscall_name = prog->sec_name + sizeof("ksyscall/") - 1; 11489 11490 *link = bpf_program__attach_ksyscall(prog, syscall_name, &opts); 11491 return *link ? 0 : -errno; 11492 } 11493 11494 static int attach_kprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link) 11495 { 11496 LIBBPF_OPTS(bpf_kprobe_multi_opts, opts); 11497 const char *spec; 11498 char *pattern; 11499 int n; 11500 11501 *link = NULL; 11502 11503 /* no auto-attach for SEC("kprobe.multi") and SEC("kretprobe.multi") */ 11504 if (strcmp(prog->sec_name, "kprobe.multi") == 0 || 11505 strcmp(prog->sec_name, "kretprobe.multi") == 0) 11506 return 0; 11507 11508 opts.retprobe = str_has_pfx(prog->sec_name, "kretprobe.multi/"); 11509 if (opts.retprobe) 11510 spec = prog->sec_name + sizeof("kretprobe.multi/") - 1; 11511 else 11512 spec = prog->sec_name + sizeof("kprobe.multi/") - 1; 11513 11514 n = sscanf(spec, "%m[a-zA-Z0-9_.*?]", &pattern); 11515 if (n < 1) { 11516 pr_warn("kprobe multi pattern is invalid: %s\n", pattern); 11517 return -EINVAL; 11518 } 11519 11520 *link = bpf_program__attach_kprobe_multi_opts(prog, pattern, &opts); 11521 free(pattern); 11522 return libbpf_get_error(*link); 11523 } 11524 11525 static int attach_uprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link) 11526 { 11527 char *probe_type = NULL, *binary_path = NULL, *func_name = NULL; 11528 LIBBPF_OPTS(bpf_uprobe_multi_opts, opts); 11529 int n, ret = -EINVAL; 11530 11531 *link = NULL; 11532 11533 n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%m[^\n]", 11534 &probe_type, &binary_path, &func_name); 11535 switch (n) { 11536 case 1: 11537 /* handle SEC("u[ret]probe") - format is valid, but auto-attach is impossible. */ 11538 ret = 0; 11539 break; 11540 case 3: 11541 opts.retprobe = strcmp(probe_type, "uretprobe.multi") == 0; 11542 *link = bpf_program__attach_uprobe_multi(prog, -1, binary_path, func_name, &opts); 11543 ret = libbpf_get_error(*link); 11544 break; 11545 default: 11546 pr_warn("prog '%s': invalid format of section definition '%s'\n", prog->name, 11547 prog->sec_name); 11548 break; 11549 } 11550 free(probe_type); 11551 free(binary_path); 11552 free(func_name); 11553 return ret; 11554 } 11555 11556 static void gen_uprobe_legacy_event_name(char *buf, size_t buf_sz, 11557 const char *binary_path, uint64_t offset) 11558 { 11559 int i; 11560 11561 snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx", getpid(), binary_path, (size_t)offset); 11562 11563 /* sanitize binary_path in the probe name */ 11564 for (i = 0; buf[i]; i++) { 11565 if (!isalnum(buf[i])) 11566 buf[i] = '_'; 11567 } 11568 } 11569 11570 static inline int add_uprobe_event_legacy(const char *probe_name, bool retprobe, 11571 const char *binary_path, size_t offset) 11572 { 11573 return append_to_file(tracefs_uprobe_events(), "%c:%s/%s %s:0x%zx", 11574 retprobe ? 'r' : 'p', 11575 retprobe ? "uretprobes" : "uprobes", 11576 probe_name, binary_path, offset); 11577 } 11578 11579 static inline int remove_uprobe_event_legacy(const char *probe_name, bool retprobe) 11580 { 11581 return append_to_file(tracefs_uprobe_events(), "-:%s/%s", 11582 retprobe ? "uretprobes" : "uprobes", probe_name); 11583 } 11584 11585 static int determine_uprobe_perf_type_legacy(const char *probe_name, bool retprobe) 11586 { 11587 char file[512]; 11588 11589 snprintf(file, sizeof(file), "%s/events/%s/%s/id", 11590 tracefs_path(), retprobe ? "uretprobes" : "uprobes", probe_name); 11591 11592 return parse_uint_from_file(file, "%d\n"); 11593 } 11594 11595 static int perf_event_uprobe_open_legacy(const char *probe_name, bool retprobe, 11596 const char *binary_path, size_t offset, int pid) 11597 { 11598 const size_t attr_sz = sizeof(struct perf_event_attr); 11599 struct perf_event_attr attr; 11600 int type, pfd, err; 11601 11602 err = add_uprobe_event_legacy(probe_name, retprobe, binary_path, offset); 11603 if (err < 0) { 11604 pr_warn("failed to add legacy uprobe event for %s:0x%zx: %d\n", 11605 binary_path, (size_t)offset, err); 11606 return err; 11607 } 11608 type = determine_uprobe_perf_type_legacy(probe_name, retprobe); 11609 if (type < 0) { 11610 err = type; 11611 pr_warn("failed to determine legacy uprobe event id for %s:0x%zx: %d\n", 11612 binary_path, offset, err); 11613 goto err_clean_legacy; 11614 } 11615 11616 memset(&attr, 0, attr_sz); 11617 attr.size = attr_sz; 11618 attr.config = type; 11619 attr.type = PERF_TYPE_TRACEPOINT; 11620 11621 pfd = syscall(__NR_perf_event_open, &attr, 11622 pid < 0 ? -1 : pid, /* pid */ 11623 pid == -1 ? 0 : -1, /* cpu */ 11624 -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC); 11625 if (pfd < 0) { 11626 err = -errno; 11627 pr_warn("legacy uprobe perf_event_open() failed: %d\n", err); 11628 goto err_clean_legacy; 11629 } 11630 return pfd; 11631 11632 err_clean_legacy: 11633 /* Clear the newly added legacy uprobe_event */ 11634 remove_uprobe_event_legacy(probe_name, retprobe); 11635 return err; 11636 } 11637 11638 /* Find offset of function name in archive specified by path. Currently 11639 * supported are .zip files that do not compress their contents, as used on 11640 * Android in the form of APKs, for example. "file_name" is the name of the ELF 11641 * file inside the archive. "func_name" matches symbol name or name@@LIB for 11642 * library functions. 11643 * 11644 * An overview of the APK format specifically provided here: 11645 * https://en.wikipedia.org/w/index.php?title=Apk_(file_format)&oldid=1139099120#Package_contents 11646 */ 11647 static long elf_find_func_offset_from_archive(const char *archive_path, const char *file_name, 11648 const char *func_name) 11649 { 11650 struct zip_archive *archive; 11651 struct zip_entry entry; 11652 long ret; 11653 Elf *elf; 11654 11655 archive = zip_archive_open(archive_path); 11656 if (IS_ERR(archive)) { 11657 ret = PTR_ERR(archive); 11658 pr_warn("zip: failed to open %s: %ld\n", archive_path, ret); 11659 return ret; 11660 } 11661 11662 ret = zip_archive_find_entry(archive, file_name, &entry); 11663 if (ret) { 11664 pr_warn("zip: could not find archive member %s in %s: %ld\n", file_name, 11665 archive_path, ret); 11666 goto out; 11667 } 11668 pr_debug("zip: found entry for %s in %s at 0x%lx\n", file_name, archive_path, 11669 (unsigned long)entry.data_offset); 11670 11671 if (entry.compression) { 11672 pr_warn("zip: entry %s of %s is compressed and cannot be handled\n", file_name, 11673 archive_path); 11674 ret = -LIBBPF_ERRNO__FORMAT; 11675 goto out; 11676 } 11677 11678 elf = elf_memory((void *)entry.data, entry.data_length); 11679 if (!elf) { 11680 pr_warn("elf: could not read elf file %s from %s: %s\n", file_name, archive_path, 11681 elf_errmsg(-1)); 11682 ret = -LIBBPF_ERRNO__LIBELF; 11683 goto out; 11684 } 11685 11686 ret = elf_find_func_offset(elf, file_name, func_name); 11687 if (ret > 0) { 11688 pr_debug("elf: symbol address match for %s of %s in %s: 0x%x + 0x%lx = 0x%lx\n", 11689 func_name, file_name, archive_path, entry.data_offset, ret, 11690 ret + entry.data_offset); 11691 ret += entry.data_offset; 11692 } 11693 elf_end(elf); 11694 11695 out: 11696 zip_archive_close(archive); 11697 return ret; 11698 } 11699 11700 static const char *arch_specific_lib_paths(void) 11701 { 11702 /* 11703 * Based on https://packages.debian.org/sid/libc6. 11704 * 11705 * Assume that the traced program is built for the same architecture 11706 * as libbpf, which should cover the vast majority of cases. 11707 */ 11708 #if defined(__x86_64__) 11709 return "/lib/x86_64-linux-gnu"; 11710 #elif defined(__i386__) 11711 return "/lib/i386-linux-gnu"; 11712 #elif defined(__s390x__) 11713 return "/lib/s390x-linux-gnu"; 11714 #elif defined(__s390__) 11715 return "/lib/s390-linux-gnu"; 11716 #elif defined(__arm__) && defined(__SOFTFP__) 11717 return "/lib/arm-linux-gnueabi"; 11718 #elif defined(__arm__) && !defined(__SOFTFP__) 11719 return "/lib/arm-linux-gnueabihf"; 11720 #elif defined(__aarch64__) 11721 return "/lib/aarch64-linux-gnu"; 11722 #elif defined(__mips__) && defined(__MIPSEL__) && _MIPS_SZLONG == 64 11723 return "/lib/mips64el-linux-gnuabi64"; 11724 #elif defined(__mips__) && defined(__MIPSEL__) && _MIPS_SZLONG == 32 11725 return "/lib/mipsel-linux-gnu"; 11726 #elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 11727 return "/lib/powerpc64le-linux-gnu"; 11728 #elif defined(__sparc__) && defined(__arch64__) 11729 return "/lib/sparc64-linux-gnu"; 11730 #elif defined(__riscv) && __riscv_xlen == 64 11731 return "/lib/riscv64-linux-gnu"; 11732 #else 11733 return NULL; 11734 #endif 11735 } 11736 11737 /* Get full path to program/shared library. */ 11738 static int resolve_full_path(const char *file, char *result, size_t result_sz) 11739 { 11740 const char *search_paths[3] = {}; 11741 int i, perm; 11742 11743 if (str_has_sfx(file, ".so") || strstr(file, ".so.")) { 11744 search_paths[0] = getenv("LD_LIBRARY_PATH"); 11745 search_paths[1] = "/usr/lib64:/usr/lib"; 11746 search_paths[2] = arch_specific_lib_paths(); 11747 perm = R_OK; 11748 } else { 11749 search_paths[0] = getenv("PATH"); 11750 search_paths[1] = "/usr/bin:/usr/sbin"; 11751 perm = R_OK | X_OK; 11752 } 11753 11754 for (i = 0; i < ARRAY_SIZE(search_paths); i++) { 11755 const char *s; 11756 11757 if (!search_paths[i]) 11758 continue; 11759 for (s = search_paths[i]; s != NULL; s = strchr(s, ':')) { 11760 char *next_path; 11761 int seg_len; 11762 11763 if (s[0] == ':') 11764 s++; 11765 next_path = strchr(s, ':'); 11766 seg_len = next_path ? next_path - s : strlen(s); 11767 if (!seg_len) 11768 continue; 11769 snprintf(result, result_sz, "%.*s/%s", seg_len, s, file); 11770 /* ensure it has required permissions */ 11771 if (faccessat(AT_FDCWD, result, perm, AT_EACCESS) < 0) 11772 continue; 11773 pr_debug("resolved '%s' to '%s'\n", file, result); 11774 return 0; 11775 } 11776 } 11777 return -ENOENT; 11778 } 11779 11780 struct bpf_link * 11781 bpf_program__attach_uprobe_multi(const struct bpf_program *prog, 11782 pid_t pid, 11783 const char *path, 11784 const char *func_pattern, 11785 const struct bpf_uprobe_multi_opts *opts) 11786 { 11787 const unsigned long *ref_ctr_offsets = NULL, *offsets = NULL; 11788 LIBBPF_OPTS(bpf_link_create_opts, lopts); 11789 unsigned long *resolved_offsets = NULL; 11790 int err = 0, link_fd, prog_fd; 11791 struct bpf_link *link = NULL; 11792 char errmsg[STRERR_BUFSIZE]; 11793 char full_path[PATH_MAX]; 11794 const __u64 *cookies; 11795 const char **syms; 11796 size_t cnt; 11797 11798 if (!OPTS_VALID(opts, bpf_uprobe_multi_opts)) 11799 return libbpf_err_ptr(-EINVAL); 11800 11801 prog_fd = bpf_program__fd(prog); 11802 if (prog_fd < 0) { 11803 pr_warn("prog '%s': can't attach BPF program without FD (was it loaded?)\n", 11804 prog->name); 11805 return libbpf_err_ptr(-EINVAL); 11806 } 11807 11808 syms = OPTS_GET(opts, syms, NULL); 11809 offsets = OPTS_GET(opts, offsets, NULL); 11810 ref_ctr_offsets = OPTS_GET(opts, ref_ctr_offsets, NULL); 11811 cookies = OPTS_GET(opts, cookies, NULL); 11812 cnt = OPTS_GET(opts, cnt, 0); 11813 11814 /* 11815 * User can specify 2 mutually exclusive set of inputs: 11816 * 11817 * 1) use only path/func_pattern/pid arguments 11818 * 11819 * 2) use path/pid with allowed combinations of: 11820 * syms/offsets/ref_ctr_offsets/cookies/cnt 11821 * 11822 * - syms and offsets are mutually exclusive 11823 * - ref_ctr_offsets and cookies are optional 11824 * 11825 * Any other usage results in error. 11826 */ 11827 11828 if (!path) 11829 return libbpf_err_ptr(-EINVAL); 11830 if (!func_pattern && cnt == 0) 11831 return libbpf_err_ptr(-EINVAL); 11832 11833 if (func_pattern) { 11834 if (syms || offsets || ref_ctr_offsets || cookies || cnt) 11835 return libbpf_err_ptr(-EINVAL); 11836 } else { 11837 if (!!syms == !!offsets) 11838 return libbpf_err_ptr(-EINVAL); 11839 } 11840 11841 if (func_pattern) { 11842 if (!strchr(path, '/')) { 11843 err = resolve_full_path(path, full_path, sizeof(full_path)); 11844 if (err) { 11845 pr_warn("prog '%s': failed to resolve full path for '%s': %d\n", 11846 prog->name, path, err); 11847 return libbpf_err_ptr(err); 11848 } 11849 path = full_path; 11850 } 11851 11852 err = elf_resolve_pattern_offsets(path, func_pattern, 11853 &resolved_offsets, &cnt); 11854 if (err < 0) 11855 return libbpf_err_ptr(err); 11856 offsets = resolved_offsets; 11857 } else if (syms) { 11858 err = elf_resolve_syms_offsets(path, cnt, syms, &resolved_offsets, STT_FUNC); 11859 if (err < 0) 11860 return libbpf_err_ptr(err); 11861 offsets = resolved_offsets; 11862 } 11863 11864 lopts.uprobe_multi.path = path; 11865 lopts.uprobe_multi.offsets = offsets; 11866 lopts.uprobe_multi.ref_ctr_offsets = ref_ctr_offsets; 11867 lopts.uprobe_multi.cookies = cookies; 11868 lopts.uprobe_multi.cnt = cnt; 11869 lopts.uprobe_multi.flags = OPTS_GET(opts, retprobe, false) ? BPF_F_UPROBE_MULTI_RETURN : 0; 11870 11871 if (pid == 0) 11872 pid = getpid(); 11873 if (pid > 0) 11874 lopts.uprobe_multi.pid = pid; 11875 11876 link = calloc(1, sizeof(*link)); 11877 if (!link) { 11878 err = -ENOMEM; 11879 goto error; 11880 } 11881 link->detach = &bpf_link__detach_fd; 11882 11883 link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &lopts); 11884 if (link_fd < 0) { 11885 err = -errno; 11886 pr_warn("prog '%s': failed to attach multi-uprobe: %s\n", 11887 prog->name, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); 11888 goto error; 11889 } 11890 link->fd = link_fd; 11891 free(resolved_offsets); 11892 return link; 11893 11894 error: 11895 free(resolved_offsets); 11896 free(link); 11897 return libbpf_err_ptr(err); 11898 } 11899 11900 LIBBPF_API struct bpf_link * 11901 bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, 11902 const char *binary_path, size_t func_offset, 11903 const struct bpf_uprobe_opts *opts) 11904 { 11905 const char *archive_path = NULL, *archive_sep = NULL; 11906 char errmsg[STRERR_BUFSIZE], *legacy_probe = NULL; 11907 DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts); 11908 enum probe_attach_mode attach_mode; 11909 char full_path[PATH_MAX]; 11910 struct bpf_link *link; 11911 size_t ref_ctr_off; 11912 int pfd, err; 11913 bool retprobe, legacy; 11914 const char *func_name; 11915 11916 if (!OPTS_VALID(opts, bpf_uprobe_opts)) 11917 return libbpf_err_ptr(-EINVAL); 11918 11919 attach_mode = OPTS_GET(opts, attach_mode, PROBE_ATTACH_MODE_DEFAULT); 11920 retprobe = OPTS_GET(opts, retprobe, false); 11921 ref_ctr_off = OPTS_GET(opts, ref_ctr_offset, 0); 11922 pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0); 11923 11924 if (!binary_path) 11925 return libbpf_err_ptr(-EINVAL); 11926 11927 /* Check if "binary_path" refers to an archive. */ 11928 archive_sep = strstr(binary_path, "!/"); 11929 if (archive_sep) { 11930 full_path[0] = '\0'; 11931 libbpf_strlcpy(full_path, binary_path, 11932 min(sizeof(full_path), (size_t)(archive_sep - binary_path + 1))); 11933 archive_path = full_path; 11934 binary_path = archive_sep + 2; 11935 } else if (!strchr(binary_path, '/')) { 11936 err = resolve_full_path(binary_path, full_path, sizeof(full_path)); 11937 if (err) { 11938 pr_warn("prog '%s': failed to resolve full path for '%s': %d\n", 11939 prog->name, binary_path, err); 11940 return libbpf_err_ptr(err); 11941 } 11942 binary_path = full_path; 11943 } 11944 func_name = OPTS_GET(opts, func_name, NULL); 11945 if (func_name) { 11946 long sym_off; 11947 11948 if (archive_path) { 11949 sym_off = elf_find_func_offset_from_archive(archive_path, binary_path, 11950 func_name); 11951 binary_path = archive_path; 11952 } else { 11953 sym_off = elf_find_func_offset_from_file(binary_path, func_name); 11954 } 11955 if (sym_off < 0) 11956 return libbpf_err_ptr(sym_off); 11957 func_offset += sym_off; 11958 } 11959 11960 legacy = determine_uprobe_perf_type() < 0; 11961 switch (attach_mode) { 11962 case PROBE_ATTACH_MODE_LEGACY: 11963 legacy = true; 11964 pe_opts.force_ioctl_attach = true; 11965 break; 11966 case PROBE_ATTACH_MODE_PERF: 11967 if (legacy) 11968 return libbpf_err_ptr(-ENOTSUP); 11969 pe_opts.force_ioctl_attach = true; 11970 break; 11971 case PROBE_ATTACH_MODE_LINK: 11972 if (legacy || !kernel_supports(prog->obj, FEAT_PERF_LINK)) 11973 return libbpf_err_ptr(-ENOTSUP); 11974 break; 11975 case PROBE_ATTACH_MODE_DEFAULT: 11976 break; 11977 default: 11978 return libbpf_err_ptr(-EINVAL); 11979 } 11980 11981 if (!legacy) { 11982 pfd = perf_event_open_probe(true /* uprobe */, retprobe, binary_path, 11983 func_offset, pid, ref_ctr_off); 11984 } else { 11985 char probe_name[PATH_MAX + 64]; 11986 11987 if (ref_ctr_off) 11988 return libbpf_err_ptr(-EINVAL); 11989 11990 gen_uprobe_legacy_event_name(probe_name, sizeof(probe_name), 11991 binary_path, func_offset); 11992 11993 legacy_probe = strdup(probe_name); 11994 if (!legacy_probe) 11995 return libbpf_err_ptr(-ENOMEM); 11996 11997 pfd = perf_event_uprobe_open_legacy(legacy_probe, retprobe, 11998 binary_path, func_offset, pid); 11999 } 12000 if (pfd < 0) { 12001 err = -errno; 12002 pr_warn("prog '%s': failed to create %s '%s:0x%zx' perf event: %s\n", 12003 prog->name, retprobe ? "uretprobe" : "uprobe", 12004 binary_path, func_offset, 12005 libbpf_strerror_r(err, errmsg, sizeof(errmsg))); 12006 goto err_out; 12007 } 12008 12009 link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts); 12010 err = libbpf_get_error(link); 12011 if (err) { 12012 close(pfd); 12013 pr_warn("prog '%s': failed to attach to %s '%s:0x%zx': %s\n", 12014 prog->name, retprobe ? "uretprobe" : "uprobe", 12015 binary_path, func_offset, 12016 libbpf_strerror_r(err, errmsg, sizeof(errmsg))); 12017 goto err_clean_legacy; 12018 } 12019 if (legacy) { 12020 struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link); 12021 12022 perf_link->legacy_probe_name = legacy_probe; 12023 perf_link->legacy_is_kprobe = false; 12024 perf_link->legacy_is_retprobe = retprobe; 12025 } 12026 return link; 12027 12028 err_clean_legacy: 12029 if (legacy) 12030 remove_uprobe_event_legacy(legacy_probe, retprobe); 12031 err_out: 12032 free(legacy_probe); 12033 return libbpf_err_ptr(err); 12034 } 12035 12036 /* Format of u[ret]probe section definition supporting auto-attach: 12037 * u[ret]probe/binary:function[+offset] 12038 * 12039 * binary can be an absolute/relative path or a filename; the latter is resolved to a 12040 * full binary path via bpf_program__attach_uprobe_opts. 12041 * 12042 * Specifying uprobe+ ensures we carry out strict matching; either "uprobe" must be 12043 * specified (and auto-attach is not possible) or the above format is specified for 12044 * auto-attach. 12045 */ 12046 static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link) 12047 { 12048 DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts); 12049 char *probe_type = NULL, *binary_path = NULL, *func_name = NULL, *func_off; 12050 int n, c, ret = -EINVAL; 12051 long offset = 0; 12052 12053 *link = NULL; 12054 12055 n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%m[^\n]", 12056 &probe_type, &binary_path, &func_name); 12057 switch (n) { 12058 case 1: 12059 /* handle SEC("u[ret]probe") - format is valid, but auto-attach is impossible. */ 12060 ret = 0; 12061 break; 12062 case 2: 12063 pr_warn("prog '%s': section '%s' missing ':function[+offset]' specification\n", 12064 prog->name, prog->sec_name); 12065 break; 12066 case 3: 12067 /* check if user specifies `+offset`, if yes, this should be 12068 * the last part of the string, make sure sscanf read to EOL 12069 */ 12070 func_off = strrchr(func_name, '+'); 12071 if (func_off) { 12072 n = sscanf(func_off, "+%li%n", &offset, &c); 12073 if (n == 1 && *(func_off + c) == '\0') 12074 func_off[0] = '\0'; 12075 else 12076 offset = 0; 12077 } 12078 opts.retprobe = strcmp(probe_type, "uretprobe") == 0 || 12079 strcmp(probe_type, "uretprobe.s") == 0; 12080 if (opts.retprobe && offset != 0) { 12081 pr_warn("prog '%s': uretprobes do not support offset specification\n", 12082 prog->name); 12083 break; 12084 } 12085 opts.func_name = func_name; 12086 *link = bpf_program__attach_uprobe_opts(prog, -1, binary_path, offset, &opts); 12087 ret = libbpf_get_error(*link); 12088 break; 12089 default: 12090 pr_warn("prog '%s': invalid format of section definition '%s'\n", prog->name, 12091 prog->sec_name); 12092 break; 12093 } 12094 free(probe_type); 12095 free(binary_path); 12096 free(func_name); 12097 12098 return ret; 12099 } 12100 12101 struct bpf_link *bpf_program__attach_uprobe(const struct bpf_program *prog, 12102 bool retprobe, pid_t pid, 12103 const char *binary_path, 12104 size_t func_offset) 12105 { 12106 DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts, .retprobe = retprobe); 12107 12108 return bpf_program__attach_uprobe_opts(prog, pid, binary_path, func_offset, &opts); 12109 } 12110 12111 struct bpf_link *bpf_program__attach_usdt(const struct bpf_program *prog, 12112 pid_t pid, const char *binary_path, 12113 const char *usdt_provider, const char *usdt_name, 12114 const struct bpf_usdt_opts *opts) 12115 { 12116 char resolved_path[512]; 12117 struct bpf_object *obj = prog->obj; 12118 struct bpf_link *link; 12119 __u64 usdt_cookie; 12120 int err; 12121 12122 if (!OPTS_VALID(opts, bpf_uprobe_opts)) 12123 return libbpf_err_ptr(-EINVAL); 12124 12125 if (bpf_program__fd(prog) < 0) { 12126 pr_warn("prog '%s': can't attach BPF program without FD (was it loaded?)\n", 12127 prog->name); 12128 return libbpf_err_ptr(-EINVAL); 12129 } 12130 12131 if (!binary_path) 12132 return libbpf_err_ptr(-EINVAL); 12133 12134 if (!strchr(binary_path, '/')) { 12135 err = resolve_full_path(binary_path, resolved_path, sizeof(resolved_path)); 12136 if (err) { 12137 pr_warn("prog '%s': failed to resolve full path for '%s': %d\n", 12138 prog->name, binary_path, err); 12139 return libbpf_err_ptr(err); 12140 } 12141 binary_path = resolved_path; 12142 } 12143 12144 /* USDT manager is instantiated lazily on first USDT attach. It will 12145 * be destroyed together with BPF object in bpf_object__close(). 12146 */ 12147 if (IS_ERR(obj->usdt_man)) 12148 return libbpf_ptr(obj->usdt_man); 12149 if (!obj->usdt_man) { 12150 obj->usdt_man = usdt_manager_new(obj); 12151 if (IS_ERR(obj->usdt_man)) 12152 return libbpf_ptr(obj->usdt_man); 12153 } 12154 12155 usdt_cookie = OPTS_GET(opts, usdt_cookie, 0); 12156 link = usdt_manager_attach_usdt(obj->usdt_man, prog, pid, binary_path, 12157 usdt_provider, usdt_name, usdt_cookie); 12158 err = libbpf_get_error(link); 12159 if (err) 12160 return libbpf_err_ptr(err); 12161 return link; 12162 } 12163 12164 static int attach_usdt(const struct bpf_program *prog, long cookie, struct bpf_link **link) 12165 { 12166 char *path = NULL, *provider = NULL, *name = NULL; 12167 const char *sec_name; 12168 int n, err; 12169 12170 sec_name = bpf_program__section_name(prog); 12171 if (strcmp(sec_name, "usdt") == 0) { 12172 /* no auto-attach for just SEC("usdt") */ 12173 *link = NULL; 12174 return 0; 12175 } 12176 12177 n = sscanf(sec_name, "usdt/%m[^:]:%m[^:]:%m[^:]", &path, &provider, &name); 12178 if (n != 3) { 12179 pr_warn("invalid section '%s', expected SEC(\"usdt/<path>:<provider>:<name>\")\n", 12180 sec_name); 12181 err = -EINVAL; 12182 } else { 12183 *link = bpf_program__attach_usdt(prog, -1 /* any process */, path, 12184 provider, name, NULL); 12185 err = libbpf_get_error(*link); 12186 } 12187 free(path); 12188 free(provider); 12189 free(name); 12190 return err; 12191 } 12192 12193 static int determine_tracepoint_id(const char *tp_category, 12194 const char *tp_name) 12195 { 12196 char file[PATH_MAX]; 12197 int ret; 12198 12199 ret = snprintf(file, sizeof(file), "%s/events/%s/%s/id", 12200 tracefs_path(), tp_category, tp_name); 12201 if (ret < 0) 12202 return -errno; 12203 if (ret >= sizeof(file)) { 12204 pr_debug("tracepoint %s/%s path is too long\n", 12205 tp_category, tp_name); 12206 return -E2BIG; 12207 } 12208 return parse_uint_from_file(file, "%d\n"); 12209 } 12210 12211 static int perf_event_open_tracepoint(const char *tp_category, 12212 const char *tp_name) 12213 { 12214 const size_t attr_sz = sizeof(struct perf_event_attr); 12215 struct perf_event_attr attr; 12216 char errmsg[STRERR_BUFSIZE]; 12217 int tp_id, pfd, err; 12218 12219 tp_id = determine_tracepoint_id(tp_category, tp_name); 12220 if (tp_id < 0) { 12221 pr_warn("failed to determine tracepoint '%s/%s' perf event ID: %s\n", 12222 tp_category, tp_name, 12223 libbpf_strerror_r(tp_id, errmsg, sizeof(errmsg))); 12224 return tp_id; 12225 } 12226 12227 memset(&attr, 0, attr_sz); 12228 attr.type = PERF_TYPE_TRACEPOINT; 12229 attr.size = attr_sz; 12230 attr.config = tp_id; 12231 12232 pfd = syscall(__NR_perf_event_open, &attr, -1 /* pid */, 0 /* cpu */, 12233 -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC); 12234 if (pfd < 0) { 12235 err = -errno; 12236 pr_warn("tracepoint '%s/%s' perf_event_open() failed: %s\n", 12237 tp_category, tp_name, 12238 libbpf_strerror_r(err, errmsg, sizeof(errmsg))); 12239 return err; 12240 } 12241 return pfd; 12242 } 12243 12244 struct bpf_link *bpf_program__attach_tracepoint_opts(const struct bpf_program *prog, 12245 const char *tp_category, 12246 const char *tp_name, 12247 const struct bpf_tracepoint_opts *opts) 12248 { 12249 DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts); 12250 char errmsg[STRERR_BUFSIZE]; 12251 struct bpf_link *link; 12252 int pfd, err; 12253 12254 if (!OPTS_VALID(opts, bpf_tracepoint_opts)) 12255 return libbpf_err_ptr(-EINVAL); 12256 12257 pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0); 12258 12259 pfd = perf_event_open_tracepoint(tp_category, tp_name); 12260 if (pfd < 0) { 12261 pr_warn("prog '%s': failed to create tracepoint '%s/%s' perf event: %s\n", 12262 prog->name, tp_category, tp_name, 12263 libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); 12264 return libbpf_err_ptr(pfd); 12265 } 12266 link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts); 12267 err = libbpf_get_error(link); 12268 if (err) { 12269 close(pfd); 12270 pr_warn("prog '%s': failed to attach to tracepoint '%s/%s': %s\n", 12271 prog->name, tp_category, tp_name, 12272 libbpf_strerror_r(err, errmsg, sizeof(errmsg))); 12273 return libbpf_err_ptr(err); 12274 } 12275 return link; 12276 } 12277 12278 struct bpf_link *bpf_program__attach_tracepoint(const struct bpf_program *prog, 12279 const char *tp_category, 12280 const char *tp_name) 12281 { 12282 return bpf_program__attach_tracepoint_opts(prog, tp_category, tp_name, NULL); 12283 } 12284 12285 static int attach_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link) 12286 { 12287 char *sec_name, *tp_cat, *tp_name; 12288 12289 *link = NULL; 12290 12291 /* no auto-attach for SEC("tp") or SEC("tracepoint") */ 12292 if (strcmp(prog->sec_name, "tp") == 0 || strcmp(prog->sec_name, "tracepoint") == 0) 12293 return 0; 12294 12295 sec_name = strdup(prog->sec_name); 12296 if (!sec_name) 12297 return -ENOMEM; 12298 12299 /* extract "tp/<category>/<name>" or "tracepoint/<category>/<name>" */ 12300 if (str_has_pfx(prog->sec_name, "tp/")) 12301 tp_cat = sec_name + sizeof("tp/") - 1; 12302 else 12303 tp_cat = sec_name + sizeof("tracepoint/") - 1; 12304 tp_name = strchr(tp_cat, '/'); 12305 if (!tp_name) { 12306 free(sec_name); 12307 return -EINVAL; 12308 } 12309 *tp_name = '\0'; 12310 tp_name++; 12311 12312 *link = bpf_program__attach_tracepoint(prog, tp_cat, tp_name); 12313 free(sec_name); 12314 return libbpf_get_error(*link); 12315 } 12316 12317 struct bpf_link * 12318 bpf_program__attach_raw_tracepoint_opts(const struct bpf_program *prog, 12319 const char *tp_name, 12320 struct bpf_raw_tracepoint_opts *opts) 12321 { 12322 LIBBPF_OPTS(bpf_raw_tp_opts, raw_opts); 12323 char errmsg[STRERR_BUFSIZE]; 12324 struct bpf_link *link; 12325 int prog_fd, pfd; 12326 12327 if (!OPTS_VALID(opts, bpf_raw_tracepoint_opts)) 12328 return libbpf_err_ptr(-EINVAL); 12329 12330 prog_fd = bpf_program__fd(prog); 12331 if (prog_fd < 0) { 12332 pr_warn("prog '%s': can't attach before loaded\n", prog->name); 12333 return libbpf_err_ptr(-EINVAL); 12334 } 12335 12336 link = calloc(1, sizeof(*link)); 12337 if (!link) 12338 return libbpf_err_ptr(-ENOMEM); 12339 link->detach = &bpf_link__detach_fd; 12340 12341 raw_opts.tp_name = tp_name; 12342 raw_opts.cookie = OPTS_GET(opts, cookie, 0); 12343 pfd = bpf_raw_tracepoint_open_opts(prog_fd, &raw_opts); 12344 if (pfd < 0) { 12345 pfd = -errno; 12346 free(link); 12347 pr_warn("prog '%s': failed to attach to raw tracepoint '%s': %s\n", 12348 prog->name, tp_name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); 12349 return libbpf_err_ptr(pfd); 12350 } 12351 link->fd = pfd; 12352 return link; 12353 } 12354 12355 struct bpf_link *bpf_program__attach_raw_tracepoint(const struct bpf_program *prog, 12356 const char *tp_name) 12357 { 12358 return bpf_program__attach_raw_tracepoint_opts(prog, tp_name, NULL); 12359 } 12360 12361 static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link) 12362 { 12363 static const char *const prefixes[] = { 12364 "raw_tp", 12365 "raw_tracepoint", 12366 "raw_tp.w", 12367 "raw_tracepoint.w", 12368 }; 12369 size_t i; 12370 const char *tp_name = NULL; 12371 12372 *link = NULL; 12373 12374 for (i = 0; i < ARRAY_SIZE(prefixes); i++) { 12375 size_t pfx_len; 12376 12377 if (!str_has_pfx(prog->sec_name, prefixes[i])) 12378 continue; 12379 12380 pfx_len = strlen(prefixes[i]); 12381 /* no auto-attach case of, e.g., SEC("raw_tp") */ 12382 if (prog->sec_name[pfx_len] == '\0') 12383 return 0; 12384 12385 if (prog->sec_name[pfx_len] != '/') 12386 continue; 12387 12388 tp_name = prog->sec_name + pfx_len + 1; 12389 break; 12390 } 12391 12392 if (!tp_name) { 12393 pr_warn("prog '%s': invalid section name '%s'\n", 12394 prog->name, prog->sec_name); 12395 return -EINVAL; 12396 } 12397 12398 *link = bpf_program__attach_raw_tracepoint(prog, tp_name); 12399 return libbpf_get_error(*link); 12400 } 12401 12402 /* Common logic for all BPF program types that attach to a btf_id */ 12403 static struct bpf_link *bpf_program__attach_btf_id(const struct bpf_program *prog, 12404 const struct bpf_trace_opts *opts) 12405 { 12406 LIBBPF_OPTS(bpf_link_create_opts, link_opts); 12407 char errmsg[STRERR_BUFSIZE]; 12408 struct bpf_link *link; 12409 int prog_fd, pfd; 12410 12411 if (!OPTS_VALID(opts, bpf_trace_opts)) 12412 return libbpf_err_ptr(-EINVAL); 12413 12414 prog_fd = bpf_program__fd(prog); 12415 if (prog_fd < 0) { 12416 pr_warn("prog '%s': can't attach before loaded\n", prog->name); 12417 return libbpf_err_ptr(-EINVAL); 12418 } 12419 12420 link = calloc(1, sizeof(*link)); 12421 if (!link) 12422 return libbpf_err_ptr(-ENOMEM); 12423 link->detach = &bpf_link__detach_fd; 12424 12425 /* libbpf is smart enough to redirect to BPF_RAW_TRACEPOINT_OPEN on old kernels */ 12426 link_opts.tracing.cookie = OPTS_GET(opts, cookie, 0); 12427 pfd = bpf_link_create(prog_fd, 0, bpf_program__expected_attach_type(prog), &link_opts); 12428 if (pfd < 0) { 12429 pfd = -errno; 12430 free(link); 12431 pr_warn("prog '%s': failed to attach: %s\n", 12432 prog->name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); 12433 return libbpf_err_ptr(pfd); 12434 } 12435 link->fd = pfd; 12436 return link; 12437 } 12438 12439 struct bpf_link *bpf_program__attach_trace(const struct bpf_program *prog) 12440 { 12441 return bpf_program__attach_btf_id(prog, NULL); 12442 } 12443 12444 struct bpf_link *bpf_program__attach_trace_opts(const struct bpf_program *prog, 12445 const struct bpf_trace_opts *opts) 12446 { 12447 return bpf_program__attach_btf_id(prog, opts); 12448 } 12449 12450 struct bpf_link *bpf_program__attach_lsm(const struct bpf_program *prog) 12451 { 12452 return bpf_program__attach_btf_id(prog, NULL); 12453 } 12454 12455 static int attach_trace(const struct bpf_program *prog, long cookie, struct bpf_link **link) 12456 { 12457 *link = bpf_program__attach_trace(prog); 12458 return libbpf_get_error(*link); 12459 } 12460 12461 static int attach_lsm(const struct bpf_program *prog, long cookie, struct bpf_link **link) 12462 { 12463 *link = bpf_program__attach_lsm(prog); 12464 return libbpf_get_error(*link); 12465 } 12466 12467 static struct bpf_link * 12468 bpf_program_attach_fd(const struct bpf_program *prog, 12469 int target_fd, const char *target_name, 12470 const struct bpf_link_create_opts *opts) 12471 { 12472 enum bpf_attach_type attach_type; 12473 char errmsg[STRERR_BUFSIZE]; 12474 struct bpf_link *link; 12475 int prog_fd, link_fd; 12476 12477 prog_fd = bpf_program__fd(prog); 12478 if (prog_fd < 0) { 12479 pr_warn("prog '%s': can't attach before loaded\n", prog->name); 12480 return libbpf_err_ptr(-EINVAL); 12481 } 12482 12483 link = calloc(1, sizeof(*link)); 12484 if (!link) 12485 return libbpf_err_ptr(-ENOMEM); 12486 link->detach = &bpf_link__detach_fd; 12487 12488 attach_type = bpf_program__expected_attach_type(prog); 12489 link_fd = bpf_link_create(prog_fd, target_fd, attach_type, opts); 12490 if (link_fd < 0) { 12491 link_fd = -errno; 12492 free(link); 12493 pr_warn("prog '%s': failed to attach to %s: %s\n", 12494 prog->name, target_name, 12495 libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg))); 12496 return libbpf_err_ptr(link_fd); 12497 } 12498 link->fd = link_fd; 12499 return link; 12500 } 12501 12502 struct bpf_link * 12503 bpf_program__attach_cgroup(const struct bpf_program *prog, int cgroup_fd) 12504 { 12505 return bpf_program_attach_fd(prog, cgroup_fd, "cgroup", NULL); 12506 } 12507 12508 struct bpf_link * 12509 bpf_program__attach_netns(const struct bpf_program *prog, int netns_fd) 12510 { 12511 return bpf_program_attach_fd(prog, netns_fd, "netns", NULL); 12512 } 12513 12514 struct bpf_link *bpf_program__attach_xdp(const struct bpf_program *prog, int ifindex) 12515 { 12516 /* target_fd/target_ifindex use the same field in LINK_CREATE */ 12517 return bpf_program_attach_fd(prog, ifindex, "xdp", NULL); 12518 } 12519 12520 struct bpf_link * 12521 bpf_program__attach_tcx(const struct bpf_program *prog, int ifindex, 12522 const struct bpf_tcx_opts *opts) 12523 { 12524 LIBBPF_OPTS(bpf_link_create_opts, link_create_opts); 12525 __u32 relative_id; 12526 int relative_fd; 12527 12528 if (!OPTS_VALID(opts, bpf_tcx_opts)) 12529 return libbpf_err_ptr(-EINVAL); 12530 12531 relative_id = OPTS_GET(opts, relative_id, 0); 12532 relative_fd = OPTS_GET(opts, relative_fd, 0); 12533 12534 /* validate we don't have unexpected combinations of non-zero fields */ 12535 if (!ifindex) { 12536 pr_warn("prog '%s': target netdevice ifindex cannot be zero\n", 12537 prog->name); 12538 return libbpf_err_ptr(-EINVAL); 12539 } 12540 if (relative_fd && relative_id) { 12541 pr_warn("prog '%s': relative_fd and relative_id cannot be set at the same time\n", 12542 prog->name); 12543 return libbpf_err_ptr(-EINVAL); 12544 } 12545 12546 link_create_opts.tcx.expected_revision = OPTS_GET(opts, expected_revision, 0); 12547 link_create_opts.tcx.relative_fd = relative_fd; 12548 link_create_opts.tcx.relative_id = relative_id; 12549 link_create_opts.flags = OPTS_GET(opts, flags, 0); 12550 12551 /* target_fd/target_ifindex use the same field in LINK_CREATE */ 12552 return bpf_program_attach_fd(prog, ifindex, "tcx", &link_create_opts); 12553 } 12554 12555 struct bpf_link * 12556 bpf_program__attach_netkit(const struct bpf_program *prog, int ifindex, 12557 const struct bpf_netkit_opts *opts) 12558 { 12559 LIBBPF_OPTS(bpf_link_create_opts, link_create_opts); 12560 __u32 relative_id; 12561 int relative_fd; 12562 12563 if (!OPTS_VALID(opts, bpf_netkit_opts)) 12564 return libbpf_err_ptr(-EINVAL); 12565 12566 relative_id = OPTS_GET(opts, relative_id, 0); 12567 relative_fd = OPTS_GET(opts, relative_fd, 0); 12568 12569 /* validate we don't have unexpected combinations of non-zero fields */ 12570 if (!ifindex) { 12571 pr_warn("prog '%s': target netdevice ifindex cannot be zero\n", 12572 prog->name); 12573 return libbpf_err_ptr(-EINVAL); 12574 } 12575 if (relative_fd && relative_id) { 12576 pr_warn("prog '%s': relative_fd and relative_id cannot be set at the same time\n", 12577 prog->name); 12578 return libbpf_err_ptr(-EINVAL); 12579 } 12580 12581 link_create_opts.netkit.expected_revision = OPTS_GET(opts, expected_revision, 0); 12582 link_create_opts.netkit.relative_fd = relative_fd; 12583 link_create_opts.netkit.relative_id = relative_id; 12584 link_create_opts.flags = OPTS_GET(opts, flags, 0); 12585 12586 return bpf_program_attach_fd(prog, ifindex, "netkit", &link_create_opts); 12587 } 12588 12589 struct bpf_link *bpf_program__attach_freplace(const struct bpf_program *prog, 12590 int target_fd, 12591 const char *attach_func_name) 12592 { 12593 int btf_id; 12594 12595 if (!!target_fd != !!attach_func_name) { 12596 pr_warn("prog '%s': supply none or both of target_fd and attach_func_name\n", 12597 prog->name); 12598 return libbpf_err_ptr(-EINVAL); 12599 } 12600 12601 if (prog->type != BPF_PROG_TYPE_EXT) { 12602 pr_warn("prog '%s': only BPF_PROG_TYPE_EXT can attach as freplace", 12603 prog->name); 12604 return libbpf_err_ptr(-EINVAL); 12605 } 12606 12607 if (target_fd) { 12608 LIBBPF_OPTS(bpf_link_create_opts, target_opts); 12609 12610 btf_id = libbpf_find_prog_btf_id(attach_func_name, target_fd); 12611 if (btf_id < 0) 12612 return libbpf_err_ptr(btf_id); 12613 12614 target_opts.target_btf_id = btf_id; 12615 12616 return bpf_program_attach_fd(prog, target_fd, "freplace", 12617 &target_opts); 12618 } else { 12619 /* no target, so use raw_tracepoint_open for compatibility 12620 * with old kernels 12621 */ 12622 return bpf_program__attach_trace(prog); 12623 } 12624 } 12625 12626 struct bpf_link * 12627 bpf_program__attach_iter(const struct bpf_program *prog, 12628 const struct bpf_iter_attach_opts *opts) 12629 { 12630 DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_create_opts); 12631 char errmsg[STRERR_BUFSIZE]; 12632 struct bpf_link *link; 12633 int prog_fd, link_fd; 12634 __u32 target_fd = 0; 12635 12636 if (!OPTS_VALID(opts, bpf_iter_attach_opts)) 12637 return libbpf_err_ptr(-EINVAL); 12638 12639 link_create_opts.iter_info = OPTS_GET(opts, link_info, (void *)0); 12640 link_create_opts.iter_info_len = OPTS_GET(opts, link_info_len, 0); 12641 12642 prog_fd = bpf_program__fd(prog); 12643 if (prog_fd < 0) { 12644 pr_warn("prog '%s': can't attach before loaded\n", prog->name); 12645 return libbpf_err_ptr(-EINVAL); 12646 } 12647 12648 link = calloc(1, sizeof(*link)); 12649 if (!link) 12650 return libbpf_err_ptr(-ENOMEM); 12651 link->detach = &bpf_link__detach_fd; 12652 12653 link_fd = bpf_link_create(prog_fd, target_fd, BPF_TRACE_ITER, 12654 &link_create_opts); 12655 if (link_fd < 0) { 12656 link_fd = -errno; 12657 free(link); 12658 pr_warn("prog '%s': failed to attach to iterator: %s\n", 12659 prog->name, libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg))); 12660 return libbpf_err_ptr(link_fd); 12661 } 12662 link->fd = link_fd; 12663 return link; 12664 } 12665 12666 static int attach_iter(const struct bpf_program *prog, long cookie, struct bpf_link **link) 12667 { 12668 *link = bpf_program__attach_iter(prog, NULL); 12669 return libbpf_get_error(*link); 12670 } 12671 12672 struct bpf_link *bpf_program__attach_netfilter(const struct bpf_program *prog, 12673 const struct bpf_netfilter_opts *opts) 12674 { 12675 LIBBPF_OPTS(bpf_link_create_opts, lopts); 12676 struct bpf_link *link; 12677 int prog_fd, link_fd; 12678 12679 if (!OPTS_VALID(opts, bpf_netfilter_opts)) 12680 return libbpf_err_ptr(-EINVAL); 12681 12682 prog_fd = bpf_program__fd(prog); 12683 if (prog_fd < 0) { 12684 pr_warn("prog '%s': can't attach before loaded\n", prog->name); 12685 return libbpf_err_ptr(-EINVAL); 12686 } 12687 12688 link = calloc(1, sizeof(*link)); 12689 if (!link) 12690 return libbpf_err_ptr(-ENOMEM); 12691 12692 link->detach = &bpf_link__detach_fd; 12693 12694 lopts.netfilter.pf = OPTS_GET(opts, pf, 0); 12695 lopts.netfilter.hooknum = OPTS_GET(opts, hooknum, 0); 12696 lopts.netfilter.priority = OPTS_GET(opts, priority, 0); 12697 lopts.netfilter.flags = OPTS_GET(opts, flags, 0); 12698 12699 link_fd = bpf_link_create(prog_fd, 0, BPF_NETFILTER, &lopts); 12700 if (link_fd < 0) { 12701 char errmsg[STRERR_BUFSIZE]; 12702 12703 link_fd = -errno; 12704 free(link); 12705 pr_warn("prog '%s': failed to attach to netfilter: %s\n", 12706 prog->name, libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg))); 12707 return libbpf_err_ptr(link_fd); 12708 } 12709 link->fd = link_fd; 12710 12711 return link; 12712 } 12713 12714 struct bpf_link *bpf_program__attach(const struct bpf_program *prog) 12715 { 12716 struct bpf_link *link = NULL; 12717 int err; 12718 12719 if (!prog->sec_def || !prog->sec_def->prog_attach_fn) 12720 return libbpf_err_ptr(-EOPNOTSUPP); 12721 12722 if (bpf_program__fd(prog) < 0) { 12723 pr_warn("prog '%s': can't attach BPF program without FD (was it loaded?)\n", 12724 prog->name); 12725 return libbpf_err_ptr(-EINVAL); 12726 } 12727 12728 err = prog->sec_def->prog_attach_fn(prog, prog->sec_def->cookie, &link); 12729 if (err) 12730 return libbpf_err_ptr(err); 12731 12732 /* When calling bpf_program__attach() explicitly, auto-attach support 12733 * is expected to work, so NULL returned link is considered an error. 12734 * This is different for skeleton's attach, see comment in 12735 * bpf_object__attach_skeleton(). 12736 */ 12737 if (!link) 12738 return libbpf_err_ptr(-EOPNOTSUPP); 12739 12740 return link; 12741 } 12742 12743 struct bpf_link_struct_ops { 12744 struct bpf_link link; 12745 int map_fd; 12746 }; 12747 12748 static int bpf_link__detach_struct_ops(struct bpf_link *link) 12749 { 12750 struct bpf_link_struct_ops *st_link; 12751 __u32 zero = 0; 12752 12753 st_link = container_of(link, struct bpf_link_struct_ops, link); 12754 12755 if (st_link->map_fd < 0) 12756 /* w/o a real link */ 12757 return bpf_map_delete_elem(link->fd, &zero); 12758 12759 return close(link->fd); 12760 } 12761 12762 struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map) 12763 { 12764 struct bpf_link_struct_ops *link; 12765 __u32 zero = 0; 12766 int err, fd; 12767 12768 if (!bpf_map__is_struct_ops(map)) 12769 return libbpf_err_ptr(-EINVAL); 12770 12771 if (map->fd < 0) { 12772 pr_warn("map '%s': can't attach BPF map without FD (was it created?)\n", map->name); 12773 return libbpf_err_ptr(-EINVAL); 12774 } 12775 12776 link = calloc(1, sizeof(*link)); 12777 if (!link) 12778 return libbpf_err_ptr(-EINVAL); 12779 12780 /* kern_vdata should be prepared during the loading phase. */ 12781 err = bpf_map_update_elem(map->fd, &zero, map->st_ops->kern_vdata, 0); 12782 /* It can be EBUSY if the map has been used to create or 12783 * update a link before. We don't allow updating the value of 12784 * a struct_ops once it is set. That ensures that the value 12785 * never changed. So, it is safe to skip EBUSY. 12786 */ 12787 if (err && (!(map->def.map_flags & BPF_F_LINK) || err != -EBUSY)) { 12788 free(link); 12789 return libbpf_err_ptr(err); 12790 } 12791 12792 link->link.detach = bpf_link__detach_struct_ops; 12793 12794 if (!(map->def.map_flags & BPF_F_LINK)) { 12795 /* w/o a real link */ 12796 link->link.fd = map->fd; 12797 link->map_fd = -1; 12798 return &link->link; 12799 } 12800 12801 fd = bpf_link_create(map->fd, 0, BPF_STRUCT_OPS, NULL); 12802 if (fd < 0) { 12803 free(link); 12804 return libbpf_err_ptr(fd); 12805 } 12806 12807 link->link.fd = fd; 12808 link->map_fd = map->fd; 12809 12810 return &link->link; 12811 } 12812 12813 /* 12814 * Swap the back struct_ops of a link with a new struct_ops map. 12815 */ 12816 int bpf_link__update_map(struct bpf_link *link, const struct bpf_map *map) 12817 { 12818 struct bpf_link_struct_ops *st_ops_link; 12819 __u32 zero = 0; 12820 int err; 12821 12822 if (!bpf_map__is_struct_ops(map)) 12823 return -EINVAL; 12824 12825 if (map->fd < 0) { 12826 pr_warn("map '%s': can't use BPF map without FD (was it created?)\n", map->name); 12827 return -EINVAL; 12828 } 12829 12830 st_ops_link = container_of(link, struct bpf_link_struct_ops, link); 12831 /* Ensure the type of a link is correct */ 12832 if (st_ops_link->map_fd < 0) 12833 return -EINVAL; 12834 12835 err = bpf_map_update_elem(map->fd, &zero, map->st_ops->kern_vdata, 0); 12836 /* It can be EBUSY if the map has been used to create or 12837 * update a link before. We don't allow updating the value of 12838 * a struct_ops once it is set. That ensures that the value 12839 * never changed. So, it is safe to skip EBUSY. 12840 */ 12841 if (err && err != -EBUSY) 12842 return err; 12843 12844 err = bpf_link_update(link->fd, map->fd, NULL); 12845 if (err < 0) 12846 return err; 12847 12848 st_ops_link->map_fd = map->fd; 12849 12850 return 0; 12851 } 12852 12853 typedef enum bpf_perf_event_ret (*bpf_perf_event_print_t)(struct perf_event_header *hdr, 12854 void *private_data); 12855 12856 static enum bpf_perf_event_ret 12857 perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size, 12858 void **copy_mem, size_t *copy_size, 12859 bpf_perf_event_print_t fn, void *private_data) 12860 { 12861 struct perf_event_mmap_page *header = mmap_mem; 12862 __u64 data_head = ring_buffer_read_head(header); 12863 __u64 data_tail = header->data_tail; 12864 void *base = ((__u8 *)header) + page_size; 12865 int ret = LIBBPF_PERF_EVENT_CONT; 12866 struct perf_event_header *ehdr; 12867 size_t ehdr_size; 12868 12869 while (data_head != data_tail) { 12870 ehdr = base + (data_tail & (mmap_size - 1)); 12871 ehdr_size = ehdr->size; 12872 12873 if (((void *)ehdr) + ehdr_size > base + mmap_size) { 12874 void *copy_start = ehdr; 12875 size_t len_first = base + mmap_size - copy_start; 12876 size_t len_secnd = ehdr_size - len_first; 12877 12878 if (*copy_size < ehdr_size) { 12879 free(*copy_mem); 12880 *copy_mem = malloc(ehdr_size); 12881 if (!*copy_mem) { 12882 *copy_size = 0; 12883 ret = LIBBPF_PERF_EVENT_ERROR; 12884 break; 12885 } 12886 *copy_size = ehdr_size; 12887 } 12888 12889 memcpy(*copy_mem, copy_start, len_first); 12890 memcpy(*copy_mem + len_first, base, len_secnd); 12891 ehdr = *copy_mem; 12892 } 12893 12894 ret = fn(ehdr, private_data); 12895 data_tail += ehdr_size; 12896 if (ret != LIBBPF_PERF_EVENT_CONT) 12897 break; 12898 } 12899 12900 ring_buffer_write_tail(header, data_tail); 12901 return libbpf_err(ret); 12902 } 12903 12904 struct perf_buffer; 12905 12906 struct perf_buffer_params { 12907 struct perf_event_attr *attr; 12908 /* if event_cb is specified, it takes precendence */ 12909 perf_buffer_event_fn event_cb; 12910 /* sample_cb and lost_cb are higher-level common-case callbacks */ 12911 perf_buffer_sample_fn sample_cb; 12912 perf_buffer_lost_fn lost_cb; 12913 void *ctx; 12914 int cpu_cnt; 12915 int *cpus; 12916 int *map_keys; 12917 }; 12918 12919 struct perf_cpu_buf { 12920 struct perf_buffer *pb; 12921 void *base; /* mmap()'ed memory */ 12922 void *buf; /* for reconstructing segmented data */ 12923 size_t buf_size; 12924 int fd; 12925 int cpu; 12926 int map_key; 12927 }; 12928 12929 struct perf_buffer { 12930 perf_buffer_event_fn event_cb; 12931 perf_buffer_sample_fn sample_cb; 12932 perf_buffer_lost_fn lost_cb; 12933 void *ctx; /* passed into callbacks */ 12934 12935 size_t page_size; 12936 size_t mmap_size; 12937 struct perf_cpu_buf **cpu_bufs; 12938 struct epoll_event *events; 12939 int cpu_cnt; /* number of allocated CPU buffers */ 12940 int epoll_fd; /* perf event FD */ 12941 int map_fd; /* BPF_MAP_TYPE_PERF_EVENT_ARRAY BPF map FD */ 12942 }; 12943 12944 static void perf_buffer__free_cpu_buf(struct perf_buffer *pb, 12945 struct perf_cpu_buf *cpu_buf) 12946 { 12947 if (!cpu_buf) 12948 return; 12949 if (cpu_buf->base && 12950 munmap(cpu_buf->base, pb->mmap_size + pb->page_size)) 12951 pr_warn("failed to munmap cpu_buf #%d\n", cpu_buf->cpu); 12952 if (cpu_buf->fd >= 0) { 12953 ioctl(cpu_buf->fd, PERF_EVENT_IOC_DISABLE, 0); 12954 close(cpu_buf->fd); 12955 } 12956 free(cpu_buf->buf); 12957 free(cpu_buf); 12958 } 12959 12960 void perf_buffer__free(struct perf_buffer *pb) 12961 { 12962 int i; 12963 12964 if (IS_ERR_OR_NULL(pb)) 12965 return; 12966 if (pb->cpu_bufs) { 12967 for (i = 0; i < pb->cpu_cnt; i++) { 12968 struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i]; 12969 12970 if (!cpu_buf) 12971 continue; 12972 12973 bpf_map_delete_elem(pb->map_fd, &cpu_buf->map_key); 12974 perf_buffer__free_cpu_buf(pb, cpu_buf); 12975 } 12976 free(pb->cpu_bufs); 12977 } 12978 if (pb->epoll_fd >= 0) 12979 close(pb->epoll_fd); 12980 free(pb->events); 12981 free(pb); 12982 } 12983 12984 static struct perf_cpu_buf * 12985 perf_buffer__open_cpu_buf(struct perf_buffer *pb, struct perf_event_attr *attr, 12986 int cpu, int map_key) 12987 { 12988 struct perf_cpu_buf *cpu_buf; 12989 char msg[STRERR_BUFSIZE]; 12990 int err; 12991 12992 cpu_buf = calloc(1, sizeof(*cpu_buf)); 12993 if (!cpu_buf) 12994 return ERR_PTR(-ENOMEM); 12995 12996 cpu_buf->pb = pb; 12997 cpu_buf->cpu = cpu; 12998 cpu_buf->map_key = map_key; 12999 13000 cpu_buf->fd = syscall(__NR_perf_event_open, attr, -1 /* pid */, cpu, 13001 -1, PERF_FLAG_FD_CLOEXEC); 13002 if (cpu_buf->fd < 0) { 13003 err = -errno; 13004 pr_warn("failed to open perf buffer event on cpu #%d: %s\n", 13005 cpu, libbpf_strerror_r(err, msg, sizeof(msg))); 13006 goto error; 13007 } 13008 13009 cpu_buf->base = mmap(NULL, pb->mmap_size + pb->page_size, 13010 PROT_READ | PROT_WRITE, MAP_SHARED, 13011 cpu_buf->fd, 0); 13012 if (cpu_buf->base == MAP_FAILED) { 13013 cpu_buf->base = NULL; 13014 err = -errno; 13015 pr_warn("failed to mmap perf buffer on cpu #%d: %s\n", 13016 cpu, libbpf_strerror_r(err, msg, sizeof(msg))); 13017 goto error; 13018 } 13019 13020 if (ioctl(cpu_buf->fd, PERF_EVENT_IOC_ENABLE, 0) < 0) { 13021 err = -errno; 13022 pr_warn("failed to enable perf buffer event on cpu #%d: %s\n", 13023 cpu, libbpf_strerror_r(err, msg, sizeof(msg))); 13024 goto error; 13025 } 13026 13027 return cpu_buf; 13028 13029 error: 13030 perf_buffer__free_cpu_buf(pb, cpu_buf); 13031 return (struct perf_cpu_buf *)ERR_PTR(err); 13032 } 13033 13034 static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, 13035 struct perf_buffer_params *p); 13036 13037 struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt, 13038 perf_buffer_sample_fn sample_cb, 13039 perf_buffer_lost_fn lost_cb, 13040 void *ctx, 13041 const struct perf_buffer_opts *opts) 13042 { 13043 const size_t attr_sz = sizeof(struct perf_event_attr); 13044 struct perf_buffer_params p = {}; 13045 struct perf_event_attr attr; 13046 __u32 sample_period; 13047 13048 if (!OPTS_VALID(opts, perf_buffer_opts)) 13049 return libbpf_err_ptr(-EINVAL); 13050 13051 sample_period = OPTS_GET(opts, sample_period, 1); 13052 if (!sample_period) 13053 sample_period = 1; 13054 13055 memset(&attr, 0, attr_sz); 13056 attr.size = attr_sz; 13057 attr.config = PERF_COUNT_SW_BPF_OUTPUT; 13058 attr.type = PERF_TYPE_SOFTWARE; 13059 attr.sample_type = PERF_SAMPLE_RAW; 13060 attr.sample_period = sample_period; 13061 attr.wakeup_events = sample_period; 13062 13063 p.attr = &attr; 13064 p.sample_cb = sample_cb; 13065 p.lost_cb = lost_cb; 13066 p.ctx = ctx; 13067 13068 return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p)); 13069 } 13070 13071 struct perf_buffer *perf_buffer__new_raw(int map_fd, size_t page_cnt, 13072 struct perf_event_attr *attr, 13073 perf_buffer_event_fn event_cb, void *ctx, 13074 const struct perf_buffer_raw_opts *opts) 13075 { 13076 struct perf_buffer_params p = {}; 13077 13078 if (!attr) 13079 return libbpf_err_ptr(-EINVAL); 13080 13081 if (!OPTS_VALID(opts, perf_buffer_raw_opts)) 13082 return libbpf_err_ptr(-EINVAL); 13083 13084 p.attr = attr; 13085 p.event_cb = event_cb; 13086 p.ctx = ctx; 13087 p.cpu_cnt = OPTS_GET(opts, cpu_cnt, 0); 13088 p.cpus = OPTS_GET(opts, cpus, NULL); 13089 p.map_keys = OPTS_GET(opts, map_keys, NULL); 13090 13091 return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p)); 13092 } 13093 13094 static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, 13095 struct perf_buffer_params *p) 13096 { 13097 const char *online_cpus_file = "/sys/devices/system/cpu/online"; 13098 struct bpf_map_info map; 13099 char msg[STRERR_BUFSIZE]; 13100 struct perf_buffer *pb; 13101 bool *online = NULL; 13102 __u32 map_info_len; 13103 int err, i, j, n; 13104 13105 if (page_cnt == 0 || (page_cnt & (page_cnt - 1))) { 13106 pr_warn("page count should be power of two, but is %zu\n", 13107 page_cnt); 13108 return ERR_PTR(-EINVAL); 13109 } 13110 13111 /* best-effort sanity checks */ 13112 memset(&map, 0, sizeof(map)); 13113 map_info_len = sizeof(map); 13114 err = bpf_map_get_info_by_fd(map_fd, &map, &map_info_len); 13115 if (err) { 13116 err = -errno; 13117 /* if BPF_OBJ_GET_INFO_BY_FD is supported, will return 13118 * -EBADFD, -EFAULT, or -E2BIG on real error 13119 */ 13120 if (err != -EINVAL) { 13121 pr_warn("failed to get map info for map FD %d: %s\n", 13122 map_fd, libbpf_strerror_r(err, msg, sizeof(msg))); 13123 return ERR_PTR(err); 13124 } 13125 pr_debug("failed to get map info for FD %d; API not supported? Ignoring...\n", 13126 map_fd); 13127 } else { 13128 if (map.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) { 13129 pr_warn("map '%s' should be BPF_MAP_TYPE_PERF_EVENT_ARRAY\n", 13130 map.name); 13131 return ERR_PTR(-EINVAL); 13132 } 13133 } 13134 13135 pb = calloc(1, sizeof(*pb)); 13136 if (!pb) 13137 return ERR_PTR(-ENOMEM); 13138 13139 pb->event_cb = p->event_cb; 13140 pb->sample_cb = p->sample_cb; 13141 pb->lost_cb = p->lost_cb; 13142 pb->ctx = p->ctx; 13143 13144 pb->page_size = getpagesize(); 13145 pb->mmap_size = pb->page_size * page_cnt; 13146 pb->map_fd = map_fd; 13147 13148 pb->epoll_fd = epoll_create1(EPOLL_CLOEXEC); 13149 if (pb->epoll_fd < 0) { 13150 err = -errno; 13151 pr_warn("failed to create epoll instance: %s\n", 13152 libbpf_strerror_r(err, msg, sizeof(msg))); 13153 goto error; 13154 } 13155 13156 if (p->cpu_cnt > 0) { 13157 pb->cpu_cnt = p->cpu_cnt; 13158 } else { 13159 pb->cpu_cnt = libbpf_num_possible_cpus(); 13160 if (pb->cpu_cnt < 0) { 13161 err = pb->cpu_cnt; 13162 goto error; 13163 } 13164 if (map.max_entries && map.max_entries < pb->cpu_cnt) 13165 pb->cpu_cnt = map.max_entries; 13166 } 13167 13168 pb->events = calloc(pb->cpu_cnt, sizeof(*pb->events)); 13169 if (!pb->events) { 13170 err = -ENOMEM; 13171 pr_warn("failed to allocate events: out of memory\n"); 13172 goto error; 13173 } 13174 pb->cpu_bufs = calloc(pb->cpu_cnt, sizeof(*pb->cpu_bufs)); 13175 if (!pb->cpu_bufs) { 13176 err = -ENOMEM; 13177 pr_warn("failed to allocate buffers: out of memory\n"); 13178 goto error; 13179 } 13180 13181 err = parse_cpu_mask_file(online_cpus_file, &online, &n); 13182 if (err) { 13183 pr_warn("failed to get online CPU mask: %d\n", err); 13184 goto error; 13185 } 13186 13187 for (i = 0, j = 0; i < pb->cpu_cnt; i++) { 13188 struct perf_cpu_buf *cpu_buf; 13189 int cpu, map_key; 13190 13191 cpu = p->cpu_cnt > 0 ? p->cpus[i] : i; 13192 map_key = p->cpu_cnt > 0 ? p->map_keys[i] : i; 13193 13194 /* in case user didn't explicitly requested particular CPUs to 13195 * be attached to, skip offline/not present CPUs 13196 */ 13197 if (p->cpu_cnt <= 0 && (cpu >= n || !online[cpu])) 13198 continue; 13199 13200 cpu_buf = perf_buffer__open_cpu_buf(pb, p->attr, cpu, map_key); 13201 if (IS_ERR(cpu_buf)) { 13202 err = PTR_ERR(cpu_buf); 13203 goto error; 13204 } 13205 13206 pb->cpu_bufs[j] = cpu_buf; 13207 13208 err = bpf_map_update_elem(pb->map_fd, &map_key, 13209 &cpu_buf->fd, 0); 13210 if (err) { 13211 err = -errno; 13212 pr_warn("failed to set cpu #%d, key %d -> perf FD %d: %s\n", 13213 cpu, map_key, cpu_buf->fd, 13214 libbpf_strerror_r(err, msg, sizeof(msg))); 13215 goto error; 13216 } 13217 13218 pb->events[j].events = EPOLLIN; 13219 pb->events[j].data.ptr = cpu_buf; 13220 if (epoll_ctl(pb->epoll_fd, EPOLL_CTL_ADD, cpu_buf->fd, 13221 &pb->events[j]) < 0) { 13222 err = -errno; 13223 pr_warn("failed to epoll_ctl cpu #%d perf FD %d: %s\n", 13224 cpu, cpu_buf->fd, 13225 libbpf_strerror_r(err, msg, sizeof(msg))); 13226 goto error; 13227 } 13228 j++; 13229 } 13230 pb->cpu_cnt = j; 13231 free(online); 13232 13233 return pb; 13234 13235 error: 13236 free(online); 13237 if (pb) 13238 perf_buffer__free(pb); 13239 return ERR_PTR(err); 13240 } 13241 13242 struct perf_sample_raw { 13243 struct perf_event_header header; 13244 uint32_t size; 13245 char data[]; 13246 }; 13247 13248 struct perf_sample_lost { 13249 struct perf_event_header header; 13250 uint64_t id; 13251 uint64_t lost; 13252 uint64_t sample_id; 13253 }; 13254 13255 static enum bpf_perf_event_ret 13256 perf_buffer__process_record(struct perf_event_header *e, void *ctx) 13257 { 13258 struct perf_cpu_buf *cpu_buf = ctx; 13259 struct perf_buffer *pb = cpu_buf->pb; 13260 void *data = e; 13261 13262 /* user wants full control over parsing perf event */ 13263 if (pb->event_cb) 13264 return pb->event_cb(pb->ctx, cpu_buf->cpu, e); 13265 13266 switch (e->type) { 13267 case PERF_RECORD_SAMPLE: { 13268 struct perf_sample_raw *s = data; 13269 13270 if (pb->sample_cb) 13271 pb->sample_cb(pb->ctx, cpu_buf->cpu, s->data, s->size); 13272 break; 13273 } 13274 case PERF_RECORD_LOST: { 13275 struct perf_sample_lost *s = data; 13276 13277 if (pb->lost_cb) 13278 pb->lost_cb(pb->ctx, cpu_buf->cpu, s->lost); 13279 break; 13280 } 13281 default: 13282 pr_warn("unknown perf sample type %d\n", e->type); 13283 return LIBBPF_PERF_EVENT_ERROR; 13284 } 13285 return LIBBPF_PERF_EVENT_CONT; 13286 } 13287 13288 static int perf_buffer__process_records(struct perf_buffer *pb, 13289 struct perf_cpu_buf *cpu_buf) 13290 { 13291 enum bpf_perf_event_ret ret; 13292 13293 ret = perf_event_read_simple(cpu_buf->base, pb->mmap_size, 13294 pb->page_size, &cpu_buf->buf, 13295 &cpu_buf->buf_size, 13296 perf_buffer__process_record, cpu_buf); 13297 if (ret != LIBBPF_PERF_EVENT_CONT) 13298 return ret; 13299 return 0; 13300 } 13301 13302 int perf_buffer__epoll_fd(const struct perf_buffer *pb) 13303 { 13304 return pb->epoll_fd; 13305 } 13306 13307 int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms) 13308 { 13309 int i, cnt, err; 13310 13311 cnt = epoll_wait(pb->epoll_fd, pb->events, pb->cpu_cnt, timeout_ms); 13312 if (cnt < 0) 13313 return -errno; 13314 13315 for (i = 0; i < cnt; i++) { 13316 struct perf_cpu_buf *cpu_buf = pb->events[i].data.ptr; 13317 13318 err = perf_buffer__process_records(pb, cpu_buf); 13319 if (err) { 13320 pr_warn("error while processing records: %d\n", err); 13321 return libbpf_err(err); 13322 } 13323 } 13324 return cnt; 13325 } 13326 13327 /* Return number of PERF_EVENT_ARRAY map slots set up by this perf_buffer 13328 * manager. 13329 */ 13330 size_t perf_buffer__buffer_cnt(const struct perf_buffer *pb) 13331 { 13332 return pb->cpu_cnt; 13333 } 13334 13335 /* 13336 * Return perf_event FD of a ring buffer in *buf_idx* slot of 13337 * PERF_EVENT_ARRAY BPF map. This FD can be polled for new data using 13338 * select()/poll()/epoll() Linux syscalls. 13339 */ 13340 int perf_buffer__buffer_fd(const struct perf_buffer *pb, size_t buf_idx) 13341 { 13342 struct perf_cpu_buf *cpu_buf; 13343 13344 if (buf_idx >= pb->cpu_cnt) 13345 return libbpf_err(-EINVAL); 13346 13347 cpu_buf = pb->cpu_bufs[buf_idx]; 13348 if (!cpu_buf) 13349 return libbpf_err(-ENOENT); 13350 13351 return cpu_buf->fd; 13352 } 13353 13354 int perf_buffer__buffer(struct perf_buffer *pb, int buf_idx, void **buf, size_t *buf_size) 13355 { 13356 struct perf_cpu_buf *cpu_buf; 13357 13358 if (buf_idx >= pb->cpu_cnt) 13359 return libbpf_err(-EINVAL); 13360 13361 cpu_buf = pb->cpu_bufs[buf_idx]; 13362 if (!cpu_buf) 13363 return libbpf_err(-ENOENT); 13364 13365 *buf = cpu_buf->base; 13366 *buf_size = pb->mmap_size; 13367 return 0; 13368 } 13369 13370 /* 13371 * Consume data from perf ring buffer corresponding to slot *buf_idx* in 13372 * PERF_EVENT_ARRAY BPF map without waiting/polling. If there is no data to 13373 * consume, do nothing and return success. 13374 * Returns: 13375 * - 0 on success; 13376 * - <0 on failure. 13377 */ 13378 int perf_buffer__consume_buffer(struct perf_buffer *pb, size_t buf_idx) 13379 { 13380 struct perf_cpu_buf *cpu_buf; 13381 13382 if (buf_idx >= pb->cpu_cnt) 13383 return libbpf_err(-EINVAL); 13384 13385 cpu_buf = pb->cpu_bufs[buf_idx]; 13386 if (!cpu_buf) 13387 return libbpf_err(-ENOENT); 13388 13389 return perf_buffer__process_records(pb, cpu_buf); 13390 } 13391 13392 int perf_buffer__consume(struct perf_buffer *pb) 13393 { 13394 int i, err; 13395 13396 for (i = 0; i < pb->cpu_cnt; i++) { 13397 struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i]; 13398 13399 if (!cpu_buf) 13400 continue; 13401 13402 err = perf_buffer__process_records(pb, cpu_buf); 13403 if (err) { 13404 pr_warn("perf_buffer: failed to process records in buffer #%d: %d\n", i, err); 13405 return libbpf_err(err); 13406 } 13407 } 13408 return 0; 13409 } 13410 13411 int bpf_program__set_attach_target(struct bpf_program *prog, 13412 int attach_prog_fd, 13413 const char *attach_func_name) 13414 { 13415 int btf_obj_fd = 0, btf_id = 0, err; 13416 13417 if (!prog || attach_prog_fd < 0) 13418 return libbpf_err(-EINVAL); 13419 13420 if (prog->obj->loaded) 13421 return libbpf_err(-EINVAL); 13422 13423 if (attach_prog_fd && !attach_func_name) { 13424 /* remember attach_prog_fd and let bpf_program__load() find 13425 * BTF ID during the program load 13426 */ 13427 prog->attach_prog_fd = attach_prog_fd; 13428 return 0; 13429 } 13430 13431 if (attach_prog_fd) { 13432 btf_id = libbpf_find_prog_btf_id(attach_func_name, 13433 attach_prog_fd); 13434 if (btf_id < 0) 13435 return libbpf_err(btf_id); 13436 } else { 13437 if (!attach_func_name) 13438 return libbpf_err(-EINVAL); 13439 13440 /* load btf_vmlinux, if not yet */ 13441 err = bpf_object__load_vmlinux_btf(prog->obj, true); 13442 if (err) 13443 return libbpf_err(err); 13444 err = find_kernel_btf_id(prog->obj, attach_func_name, 13445 prog->expected_attach_type, 13446 &btf_obj_fd, &btf_id); 13447 if (err) 13448 return libbpf_err(err); 13449 } 13450 13451 prog->attach_btf_id = btf_id; 13452 prog->attach_btf_obj_fd = btf_obj_fd; 13453 prog->attach_prog_fd = attach_prog_fd; 13454 return 0; 13455 } 13456 13457 int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz) 13458 { 13459 int err = 0, n, len, start, end = -1; 13460 bool *tmp; 13461 13462 *mask = NULL; 13463 *mask_sz = 0; 13464 13465 /* Each sub string separated by ',' has format \d+-\d+ or \d+ */ 13466 while (*s) { 13467 if (*s == ',' || *s == '\n') { 13468 s++; 13469 continue; 13470 } 13471 n = sscanf(s, "%d%n-%d%n", &start, &len, &end, &len); 13472 if (n <= 0 || n > 2) { 13473 pr_warn("Failed to get CPU range %s: %d\n", s, n); 13474 err = -EINVAL; 13475 goto cleanup; 13476 } else if (n == 1) { 13477 end = start; 13478 } 13479 if (start < 0 || start > end) { 13480 pr_warn("Invalid CPU range [%d,%d] in %s\n", 13481 start, end, s); 13482 err = -EINVAL; 13483 goto cleanup; 13484 } 13485 tmp = realloc(*mask, end + 1); 13486 if (!tmp) { 13487 err = -ENOMEM; 13488 goto cleanup; 13489 } 13490 *mask = tmp; 13491 memset(tmp + *mask_sz, 0, start - *mask_sz); 13492 memset(tmp + start, 1, end - start + 1); 13493 *mask_sz = end + 1; 13494 s += len; 13495 } 13496 if (!*mask_sz) { 13497 pr_warn("Empty CPU range\n"); 13498 return -EINVAL; 13499 } 13500 return 0; 13501 cleanup: 13502 free(*mask); 13503 *mask = NULL; 13504 return err; 13505 } 13506 13507 int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz) 13508 { 13509 int fd, err = 0, len; 13510 char buf[128]; 13511 13512 fd = open(fcpu, O_RDONLY | O_CLOEXEC); 13513 if (fd < 0) { 13514 err = -errno; 13515 pr_warn("Failed to open cpu mask file %s: %d\n", fcpu, err); 13516 return err; 13517 } 13518 len = read(fd, buf, sizeof(buf)); 13519 close(fd); 13520 if (len <= 0) { 13521 err = len ? -errno : -EINVAL; 13522 pr_warn("Failed to read cpu mask from %s: %d\n", fcpu, err); 13523 return err; 13524 } 13525 if (len >= sizeof(buf)) { 13526 pr_warn("CPU mask is too big in file %s\n", fcpu); 13527 return -E2BIG; 13528 } 13529 buf[len] = '\0'; 13530 13531 return parse_cpu_mask_str(buf, mask, mask_sz); 13532 } 13533 13534 int libbpf_num_possible_cpus(void) 13535 { 13536 static const char *fcpu = "/sys/devices/system/cpu/possible"; 13537 static int cpus; 13538 int err, n, i, tmp_cpus; 13539 bool *mask; 13540 13541 tmp_cpus = READ_ONCE(cpus); 13542 if (tmp_cpus > 0) 13543 return tmp_cpus; 13544 13545 err = parse_cpu_mask_file(fcpu, &mask, &n); 13546 if (err) 13547 return libbpf_err(err); 13548 13549 tmp_cpus = 0; 13550 for (i = 0; i < n; i++) { 13551 if (mask[i]) 13552 tmp_cpus++; 13553 } 13554 free(mask); 13555 13556 WRITE_ONCE(cpus, tmp_cpus); 13557 return tmp_cpus; 13558 } 13559 13560 static int populate_skeleton_maps(const struct bpf_object *obj, 13561 struct bpf_map_skeleton *maps, 13562 size_t map_cnt) 13563 { 13564 int i; 13565 13566 for (i = 0; i < map_cnt; i++) { 13567 struct bpf_map **map = maps[i].map; 13568 const char *name = maps[i].name; 13569 void **mmaped = maps[i].mmaped; 13570 13571 *map = bpf_object__find_map_by_name(obj, name); 13572 if (!*map) { 13573 pr_warn("failed to find skeleton map '%s'\n", name); 13574 return -ESRCH; 13575 } 13576 13577 /* externs shouldn't be pre-setup from user code */ 13578 if (mmaped && (*map)->libbpf_type != LIBBPF_MAP_KCONFIG) 13579 *mmaped = (*map)->mmaped; 13580 } 13581 return 0; 13582 } 13583 13584 static int populate_skeleton_progs(const struct bpf_object *obj, 13585 struct bpf_prog_skeleton *progs, 13586 size_t prog_cnt) 13587 { 13588 int i; 13589 13590 for (i = 0; i < prog_cnt; i++) { 13591 struct bpf_program **prog = progs[i].prog; 13592 const char *name = progs[i].name; 13593 13594 *prog = bpf_object__find_program_by_name(obj, name); 13595 if (!*prog) { 13596 pr_warn("failed to find skeleton program '%s'\n", name); 13597 return -ESRCH; 13598 } 13599 } 13600 return 0; 13601 } 13602 13603 int bpf_object__open_skeleton(struct bpf_object_skeleton *s, 13604 const struct bpf_object_open_opts *opts) 13605 { 13606 DECLARE_LIBBPF_OPTS(bpf_object_open_opts, skel_opts, 13607 .object_name = s->name, 13608 ); 13609 struct bpf_object *obj; 13610 int err; 13611 13612 /* Attempt to preserve opts->object_name, unless overriden by user 13613 * explicitly. Overwriting object name for skeletons is discouraged, 13614 * as it breaks global data maps, because they contain object name 13615 * prefix as their own map name prefix. When skeleton is generated, 13616 * bpftool is making an assumption that this name will stay the same. 13617 */ 13618 if (opts) { 13619 memcpy(&skel_opts, opts, sizeof(*opts)); 13620 if (!opts->object_name) 13621 skel_opts.object_name = s->name; 13622 } 13623 13624 obj = bpf_object__open_mem(s->data, s->data_sz, &skel_opts); 13625 err = libbpf_get_error(obj); 13626 if (err) { 13627 pr_warn("failed to initialize skeleton BPF object '%s': %d\n", 13628 s->name, err); 13629 return libbpf_err(err); 13630 } 13631 13632 *s->obj = obj; 13633 err = populate_skeleton_maps(obj, s->maps, s->map_cnt); 13634 if (err) { 13635 pr_warn("failed to populate skeleton maps for '%s': %d\n", s->name, err); 13636 return libbpf_err(err); 13637 } 13638 13639 err = populate_skeleton_progs(obj, s->progs, s->prog_cnt); 13640 if (err) { 13641 pr_warn("failed to populate skeleton progs for '%s': %d\n", s->name, err); 13642 return libbpf_err(err); 13643 } 13644 13645 return 0; 13646 } 13647 13648 int bpf_object__open_subskeleton(struct bpf_object_subskeleton *s) 13649 { 13650 int err, len, var_idx, i; 13651 const char *var_name; 13652 const struct bpf_map *map; 13653 struct btf *btf; 13654 __u32 map_type_id; 13655 const struct btf_type *map_type, *var_type; 13656 const struct bpf_var_skeleton *var_skel; 13657 struct btf_var_secinfo *var; 13658 13659 if (!s->obj) 13660 return libbpf_err(-EINVAL); 13661 13662 btf = bpf_object__btf(s->obj); 13663 if (!btf) { 13664 pr_warn("subskeletons require BTF at runtime (object %s)\n", 13665 bpf_object__name(s->obj)); 13666 return libbpf_err(-errno); 13667 } 13668 13669 err = populate_skeleton_maps(s->obj, s->maps, s->map_cnt); 13670 if (err) { 13671 pr_warn("failed to populate subskeleton maps: %d\n", err); 13672 return libbpf_err(err); 13673 } 13674 13675 err = populate_skeleton_progs(s->obj, s->progs, s->prog_cnt); 13676 if (err) { 13677 pr_warn("failed to populate subskeleton maps: %d\n", err); 13678 return libbpf_err(err); 13679 } 13680 13681 for (var_idx = 0; var_idx < s->var_cnt; var_idx++) { 13682 var_skel = &s->vars[var_idx]; 13683 map = *var_skel->map; 13684 map_type_id = bpf_map__btf_value_type_id(map); 13685 map_type = btf__type_by_id(btf, map_type_id); 13686 13687 if (!btf_is_datasec(map_type)) { 13688 pr_warn("type for map '%1$s' is not a datasec: %2$s", 13689 bpf_map__name(map), 13690 __btf_kind_str(btf_kind(map_type))); 13691 return libbpf_err(-EINVAL); 13692 } 13693 13694 len = btf_vlen(map_type); 13695 var = btf_var_secinfos(map_type); 13696 for (i = 0; i < len; i++, var++) { 13697 var_type = btf__type_by_id(btf, var->type); 13698 var_name = btf__name_by_offset(btf, var_type->name_off); 13699 if (strcmp(var_name, var_skel->name) == 0) { 13700 *var_skel->addr = map->mmaped + var->offset; 13701 break; 13702 } 13703 } 13704 } 13705 return 0; 13706 } 13707 13708 void bpf_object__destroy_subskeleton(struct bpf_object_subskeleton *s) 13709 { 13710 if (!s) 13711 return; 13712 free(s->maps); 13713 free(s->progs); 13714 free(s->vars); 13715 free(s); 13716 } 13717 13718 int bpf_object__load_skeleton(struct bpf_object_skeleton *s) 13719 { 13720 int i, err; 13721 13722 err = bpf_object__load(*s->obj); 13723 if (err) { 13724 pr_warn("failed to load BPF skeleton '%s': %d\n", s->name, err); 13725 return libbpf_err(err); 13726 } 13727 13728 for (i = 0; i < s->map_cnt; i++) { 13729 struct bpf_map *map = *s->maps[i].map; 13730 size_t mmap_sz = bpf_map_mmap_sz(map); 13731 int prot, map_fd = map->fd; 13732 void **mmaped = s->maps[i].mmaped; 13733 13734 if (!mmaped) 13735 continue; 13736 13737 if (!(map->def.map_flags & BPF_F_MMAPABLE)) { 13738 *mmaped = NULL; 13739 continue; 13740 } 13741 13742 if (map->def.type == BPF_MAP_TYPE_ARENA) { 13743 *mmaped = map->mmaped; 13744 continue; 13745 } 13746 13747 if (map->def.map_flags & BPF_F_RDONLY_PROG) 13748 prot = PROT_READ; 13749 else 13750 prot = PROT_READ | PROT_WRITE; 13751 13752 /* Remap anonymous mmap()-ed "map initialization image" as 13753 * a BPF map-backed mmap()-ed memory, but preserving the same 13754 * memory address. This will cause kernel to change process' 13755 * page table to point to a different piece of kernel memory, 13756 * but from userspace point of view memory address (and its 13757 * contents, being identical at this point) will stay the 13758 * same. This mapping will be released by bpf_object__close() 13759 * as per normal clean up procedure, so we don't need to worry 13760 * about it from skeleton's clean up perspective. 13761 */ 13762 *mmaped = mmap(map->mmaped, mmap_sz, prot, MAP_SHARED | MAP_FIXED, map_fd, 0); 13763 if (*mmaped == MAP_FAILED) { 13764 err = -errno; 13765 *mmaped = NULL; 13766 pr_warn("failed to re-mmap() map '%s': %d\n", 13767 bpf_map__name(map), err); 13768 return libbpf_err(err); 13769 } 13770 } 13771 13772 return 0; 13773 } 13774 13775 int bpf_object__attach_skeleton(struct bpf_object_skeleton *s) 13776 { 13777 int i, err; 13778 13779 for (i = 0; i < s->prog_cnt; i++) { 13780 struct bpf_program *prog = *s->progs[i].prog; 13781 struct bpf_link **link = s->progs[i].link; 13782 13783 if (!prog->autoload || !prog->autoattach) 13784 continue; 13785 13786 /* auto-attaching not supported for this program */ 13787 if (!prog->sec_def || !prog->sec_def->prog_attach_fn) 13788 continue; 13789 13790 /* if user already set the link manually, don't attempt auto-attach */ 13791 if (*link) 13792 continue; 13793 13794 err = prog->sec_def->prog_attach_fn(prog, prog->sec_def->cookie, link); 13795 if (err) { 13796 pr_warn("prog '%s': failed to auto-attach: %d\n", 13797 bpf_program__name(prog), err); 13798 return libbpf_err(err); 13799 } 13800 13801 /* It's possible that for some SEC() definitions auto-attach 13802 * is supported in some cases (e.g., if definition completely 13803 * specifies target information), but is not in other cases. 13804 * SEC("uprobe") is one such case. If user specified target 13805 * binary and function name, such BPF program can be 13806 * auto-attached. But if not, it shouldn't trigger skeleton's 13807 * attach to fail. It should just be skipped. 13808 * attach_fn signals such case with returning 0 (no error) and 13809 * setting link to NULL. 13810 */ 13811 } 13812 13813 return 0; 13814 } 13815 13816 void bpf_object__detach_skeleton(struct bpf_object_skeleton *s) 13817 { 13818 int i; 13819 13820 for (i = 0; i < s->prog_cnt; i++) { 13821 struct bpf_link **link = s->progs[i].link; 13822 13823 bpf_link__destroy(*link); 13824 *link = NULL; 13825 } 13826 } 13827 13828 void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s) 13829 { 13830 if (!s) 13831 return; 13832 13833 if (s->progs) 13834 bpf_object__detach_skeleton(s); 13835 if (s->obj) 13836 bpf_object__close(*s->obj); 13837 free(s->maps); 13838 free(s->progs); 13839 free(s); 13840 } 13841