1 // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) 2 3 /* 4 * Common eBPF ELF object loading operations. 5 * 6 * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org> 7 * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com> 8 * Copyright (C) 2015 Huawei Inc. 9 * Copyright (C) 2017 Nicira, Inc. 10 * Copyright (C) 2019 Isovalent, Inc. 11 */ 12 13 #ifndef _GNU_SOURCE 14 #define _GNU_SOURCE 15 #endif 16 #include <stdlib.h> 17 #include <stdio.h> 18 #include <stdarg.h> 19 #include <libgen.h> 20 #include <inttypes.h> 21 #include <limits.h> 22 #include <string.h> 23 #include <unistd.h> 24 #include <endian.h> 25 #include <fcntl.h> 26 #include <errno.h> 27 #include <ctype.h> 28 #include <asm/unistd.h> 29 #include <linux/err.h> 30 #include <linux/kernel.h> 31 #include <linux/bpf.h> 32 #include <linux/btf.h> 33 #include <linux/filter.h> 34 #include <linux/limits.h> 35 #include <linux/perf_event.h> 36 #include <linux/bpf_perf_event.h> 37 #include <linux/ring_buffer.h> 38 #include <sys/epoll.h> 39 #include <sys/ioctl.h> 40 #include <sys/mman.h> 41 #include <sys/stat.h> 42 #include <sys/types.h> 43 #include <sys/vfs.h> 44 #include <sys/utsname.h> 45 #include <sys/resource.h> 46 #include <libelf.h> 47 #include <gelf.h> 48 #include <zlib.h> 49 50 #include "libbpf.h" 51 #include "bpf.h" 52 #include "btf.h" 53 #include "str_error.h" 54 #include "libbpf_internal.h" 55 #include "hashmap.h" 56 #include "bpf_gen_internal.h" 57 #include "zip.h" 58 59 #ifndef BPF_FS_MAGIC 60 #define BPF_FS_MAGIC 0xcafe4a11 61 #endif 62 63 #define BPF_FS_DEFAULT_PATH "/sys/fs/bpf" 64 65 #define BPF_INSN_SZ (sizeof(struct bpf_insn)) 66 67 /* vsprintf() in __base_pr() uses nonliteral format string. It may break 68 * compilation if user enables corresponding warning. Disable it explicitly. 69 */ 70 #pragma GCC diagnostic ignored "-Wformat-nonliteral" 71 72 #define __printf(a, b) __attribute__((format(printf, a, b))) 73 74 static struct bpf_map *bpf_object__add_map(struct bpf_object *obj); 75 static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog); 76 static int map_set_def_max_entries(struct bpf_map *map); 77 78 static const char * const attach_type_name[] = { 79 [BPF_CGROUP_INET_INGRESS] = "cgroup_inet_ingress", 80 [BPF_CGROUP_INET_EGRESS] = "cgroup_inet_egress", 81 [BPF_CGROUP_INET_SOCK_CREATE] = "cgroup_inet_sock_create", 82 [BPF_CGROUP_INET_SOCK_RELEASE] = "cgroup_inet_sock_release", 83 [BPF_CGROUP_SOCK_OPS] = "cgroup_sock_ops", 84 [BPF_CGROUP_DEVICE] = "cgroup_device", 85 [BPF_CGROUP_INET4_BIND] = "cgroup_inet4_bind", 86 [BPF_CGROUP_INET6_BIND] = "cgroup_inet6_bind", 87 [BPF_CGROUP_INET4_CONNECT] = "cgroup_inet4_connect", 88 [BPF_CGROUP_INET6_CONNECT] = "cgroup_inet6_connect", 89 [BPF_CGROUP_UNIX_CONNECT] = "cgroup_unix_connect", 90 [BPF_CGROUP_INET4_POST_BIND] = "cgroup_inet4_post_bind", 91 [BPF_CGROUP_INET6_POST_BIND] = "cgroup_inet6_post_bind", 92 [BPF_CGROUP_INET4_GETPEERNAME] = "cgroup_inet4_getpeername", 93 [BPF_CGROUP_INET6_GETPEERNAME] = "cgroup_inet6_getpeername", 94 [BPF_CGROUP_UNIX_GETPEERNAME] = "cgroup_unix_getpeername", 95 [BPF_CGROUP_INET4_GETSOCKNAME] = "cgroup_inet4_getsockname", 96 [BPF_CGROUP_INET6_GETSOCKNAME] = "cgroup_inet6_getsockname", 97 [BPF_CGROUP_UNIX_GETSOCKNAME] = "cgroup_unix_getsockname", 98 [BPF_CGROUP_UDP4_SENDMSG] = "cgroup_udp4_sendmsg", 99 [BPF_CGROUP_UDP6_SENDMSG] = "cgroup_udp6_sendmsg", 100 [BPF_CGROUP_UNIX_SENDMSG] = "cgroup_unix_sendmsg", 101 [BPF_CGROUP_SYSCTL] = "cgroup_sysctl", 102 [BPF_CGROUP_UDP4_RECVMSG] = "cgroup_udp4_recvmsg", 103 [BPF_CGROUP_UDP6_RECVMSG] = "cgroup_udp6_recvmsg", 104 [BPF_CGROUP_UNIX_RECVMSG] = "cgroup_unix_recvmsg", 105 [BPF_CGROUP_GETSOCKOPT] = "cgroup_getsockopt", 106 [BPF_CGROUP_SETSOCKOPT] = "cgroup_setsockopt", 107 [BPF_SK_SKB_STREAM_PARSER] = "sk_skb_stream_parser", 108 [BPF_SK_SKB_STREAM_VERDICT] = "sk_skb_stream_verdict", 109 [BPF_SK_SKB_VERDICT] = "sk_skb_verdict", 110 [BPF_SK_MSG_VERDICT] = "sk_msg_verdict", 111 [BPF_LIRC_MODE2] = "lirc_mode2", 112 [BPF_FLOW_DISSECTOR] = "flow_dissector", 113 [BPF_TRACE_RAW_TP] = "trace_raw_tp", 114 [BPF_TRACE_FENTRY] = "trace_fentry", 115 [BPF_TRACE_FEXIT] = "trace_fexit", 116 [BPF_MODIFY_RETURN] = "modify_return", 117 [BPF_LSM_MAC] = "lsm_mac", 118 [BPF_LSM_CGROUP] = "lsm_cgroup", 119 [BPF_SK_LOOKUP] = "sk_lookup", 120 [BPF_TRACE_ITER] = "trace_iter", 121 [BPF_XDP_DEVMAP] = "xdp_devmap", 122 [BPF_XDP_CPUMAP] = "xdp_cpumap", 123 [BPF_XDP] = "xdp", 124 [BPF_SK_REUSEPORT_SELECT] = "sk_reuseport_select", 125 [BPF_SK_REUSEPORT_SELECT_OR_MIGRATE] = "sk_reuseport_select_or_migrate", 126 [BPF_PERF_EVENT] = "perf_event", 127 [BPF_TRACE_KPROBE_MULTI] = "trace_kprobe_multi", 128 [BPF_STRUCT_OPS] = "struct_ops", 129 [BPF_NETFILTER] = "netfilter", 130 [BPF_TCX_INGRESS] = "tcx_ingress", 131 [BPF_TCX_EGRESS] = "tcx_egress", 132 [BPF_TRACE_UPROBE_MULTI] = "trace_uprobe_multi", 133 [BPF_NETKIT_PRIMARY] = "netkit_primary", 134 [BPF_NETKIT_PEER] = "netkit_peer", 135 [BPF_TRACE_KPROBE_SESSION] = "trace_kprobe_session", 136 [BPF_TRACE_UPROBE_SESSION] = "trace_uprobe_session", 137 }; 138 139 static const char * const link_type_name[] = { 140 [BPF_LINK_TYPE_UNSPEC] = "unspec", 141 [BPF_LINK_TYPE_RAW_TRACEPOINT] = "raw_tracepoint", 142 [BPF_LINK_TYPE_TRACING] = "tracing", 143 [BPF_LINK_TYPE_CGROUP] = "cgroup", 144 [BPF_LINK_TYPE_ITER] = "iter", 145 [BPF_LINK_TYPE_NETNS] = "netns", 146 [BPF_LINK_TYPE_XDP] = "xdp", 147 [BPF_LINK_TYPE_PERF_EVENT] = "perf_event", 148 [BPF_LINK_TYPE_KPROBE_MULTI] = "kprobe_multi", 149 [BPF_LINK_TYPE_STRUCT_OPS] = "struct_ops", 150 [BPF_LINK_TYPE_NETFILTER] = "netfilter", 151 [BPF_LINK_TYPE_TCX] = "tcx", 152 [BPF_LINK_TYPE_UPROBE_MULTI] = "uprobe_multi", 153 [BPF_LINK_TYPE_NETKIT] = "netkit", 154 [BPF_LINK_TYPE_SOCKMAP] = "sockmap", 155 }; 156 157 static const char * const map_type_name[] = { 158 [BPF_MAP_TYPE_UNSPEC] = "unspec", 159 [BPF_MAP_TYPE_HASH] = "hash", 160 [BPF_MAP_TYPE_ARRAY] = "array", 161 [BPF_MAP_TYPE_PROG_ARRAY] = "prog_array", 162 [BPF_MAP_TYPE_PERF_EVENT_ARRAY] = "perf_event_array", 163 [BPF_MAP_TYPE_PERCPU_HASH] = "percpu_hash", 164 [BPF_MAP_TYPE_PERCPU_ARRAY] = "percpu_array", 165 [BPF_MAP_TYPE_STACK_TRACE] = "stack_trace", 166 [BPF_MAP_TYPE_CGROUP_ARRAY] = "cgroup_array", 167 [BPF_MAP_TYPE_LRU_HASH] = "lru_hash", 168 [BPF_MAP_TYPE_LRU_PERCPU_HASH] = "lru_percpu_hash", 169 [BPF_MAP_TYPE_LPM_TRIE] = "lpm_trie", 170 [BPF_MAP_TYPE_ARRAY_OF_MAPS] = "array_of_maps", 171 [BPF_MAP_TYPE_HASH_OF_MAPS] = "hash_of_maps", 172 [BPF_MAP_TYPE_DEVMAP] = "devmap", 173 [BPF_MAP_TYPE_DEVMAP_HASH] = "devmap_hash", 174 [BPF_MAP_TYPE_SOCKMAP] = "sockmap", 175 [BPF_MAP_TYPE_CPUMAP] = "cpumap", 176 [BPF_MAP_TYPE_XSKMAP] = "xskmap", 177 [BPF_MAP_TYPE_SOCKHASH] = "sockhash", 178 [BPF_MAP_TYPE_CGROUP_STORAGE] = "cgroup_storage", 179 [BPF_MAP_TYPE_REUSEPORT_SOCKARRAY] = "reuseport_sockarray", 180 [BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE] = "percpu_cgroup_storage", 181 [BPF_MAP_TYPE_QUEUE] = "queue", 182 [BPF_MAP_TYPE_STACK] = "stack", 183 [BPF_MAP_TYPE_SK_STORAGE] = "sk_storage", 184 [BPF_MAP_TYPE_STRUCT_OPS] = "struct_ops", 185 [BPF_MAP_TYPE_RINGBUF] = "ringbuf", 186 [BPF_MAP_TYPE_INODE_STORAGE] = "inode_storage", 187 [BPF_MAP_TYPE_TASK_STORAGE] = "task_storage", 188 [BPF_MAP_TYPE_BLOOM_FILTER] = "bloom_filter", 189 [BPF_MAP_TYPE_USER_RINGBUF] = "user_ringbuf", 190 [BPF_MAP_TYPE_CGRP_STORAGE] = "cgrp_storage", 191 [BPF_MAP_TYPE_ARENA] = "arena", 192 }; 193 194 static const char * const prog_type_name[] = { 195 [BPF_PROG_TYPE_UNSPEC] = "unspec", 196 [BPF_PROG_TYPE_SOCKET_FILTER] = "socket_filter", 197 [BPF_PROG_TYPE_KPROBE] = "kprobe", 198 [BPF_PROG_TYPE_SCHED_CLS] = "sched_cls", 199 [BPF_PROG_TYPE_SCHED_ACT] = "sched_act", 200 [BPF_PROG_TYPE_TRACEPOINT] = "tracepoint", 201 [BPF_PROG_TYPE_XDP] = "xdp", 202 [BPF_PROG_TYPE_PERF_EVENT] = "perf_event", 203 [BPF_PROG_TYPE_CGROUP_SKB] = "cgroup_skb", 204 [BPF_PROG_TYPE_CGROUP_SOCK] = "cgroup_sock", 205 [BPF_PROG_TYPE_LWT_IN] = "lwt_in", 206 [BPF_PROG_TYPE_LWT_OUT] = "lwt_out", 207 [BPF_PROG_TYPE_LWT_XMIT] = "lwt_xmit", 208 [BPF_PROG_TYPE_SOCK_OPS] = "sock_ops", 209 [BPF_PROG_TYPE_SK_SKB] = "sk_skb", 210 [BPF_PROG_TYPE_CGROUP_DEVICE] = "cgroup_device", 211 [BPF_PROG_TYPE_SK_MSG] = "sk_msg", 212 [BPF_PROG_TYPE_RAW_TRACEPOINT] = "raw_tracepoint", 213 [BPF_PROG_TYPE_CGROUP_SOCK_ADDR] = "cgroup_sock_addr", 214 [BPF_PROG_TYPE_LWT_SEG6LOCAL] = "lwt_seg6local", 215 [BPF_PROG_TYPE_LIRC_MODE2] = "lirc_mode2", 216 [BPF_PROG_TYPE_SK_REUSEPORT] = "sk_reuseport", 217 [BPF_PROG_TYPE_FLOW_DISSECTOR] = "flow_dissector", 218 [BPF_PROG_TYPE_CGROUP_SYSCTL] = "cgroup_sysctl", 219 [BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE] = "raw_tracepoint_writable", 220 [BPF_PROG_TYPE_CGROUP_SOCKOPT] = "cgroup_sockopt", 221 [BPF_PROG_TYPE_TRACING] = "tracing", 222 [BPF_PROG_TYPE_STRUCT_OPS] = "struct_ops", 223 [BPF_PROG_TYPE_EXT] = "ext", 224 [BPF_PROG_TYPE_LSM] = "lsm", 225 [BPF_PROG_TYPE_SK_LOOKUP] = "sk_lookup", 226 [BPF_PROG_TYPE_SYSCALL] = "syscall", 227 [BPF_PROG_TYPE_NETFILTER] = "netfilter", 228 }; 229 230 static int __base_pr(enum libbpf_print_level level, const char *format, 231 va_list args) 232 { 233 const char *env_var = "LIBBPF_LOG_LEVEL"; 234 static enum libbpf_print_level min_level = LIBBPF_INFO; 235 static bool initialized; 236 237 if (!initialized) { 238 char *verbosity; 239 240 initialized = true; 241 verbosity = getenv(env_var); 242 if (verbosity) { 243 if (strcasecmp(verbosity, "warn") == 0) 244 min_level = LIBBPF_WARN; 245 else if (strcasecmp(verbosity, "debug") == 0) 246 min_level = LIBBPF_DEBUG; 247 else if (strcasecmp(verbosity, "info") == 0) 248 min_level = LIBBPF_INFO; 249 else 250 fprintf(stderr, "libbpf: unrecognized '%s' envvar value: '%s', should be one of 'warn', 'debug', or 'info'.\n", 251 env_var, verbosity); 252 } 253 } 254 255 /* if too verbose, skip logging */ 256 if (level > min_level) 257 return 0; 258 259 return vfprintf(stderr, format, args); 260 } 261 262 static libbpf_print_fn_t __libbpf_pr = __base_pr; 263 264 libbpf_print_fn_t libbpf_set_print(libbpf_print_fn_t fn) 265 { 266 libbpf_print_fn_t old_print_fn; 267 268 old_print_fn = __atomic_exchange_n(&__libbpf_pr, fn, __ATOMIC_RELAXED); 269 270 return old_print_fn; 271 } 272 273 __printf(2, 3) 274 void libbpf_print(enum libbpf_print_level level, const char *format, ...) 275 { 276 va_list args; 277 int old_errno; 278 libbpf_print_fn_t print_fn; 279 280 print_fn = __atomic_load_n(&__libbpf_pr, __ATOMIC_RELAXED); 281 if (!print_fn) 282 return; 283 284 old_errno = errno; 285 286 va_start(args, format); 287 __libbpf_pr(level, format, args); 288 va_end(args); 289 290 errno = old_errno; 291 } 292 293 static void pr_perm_msg(int err) 294 { 295 struct rlimit limit; 296 char buf[100]; 297 298 if (err != -EPERM || geteuid() != 0) 299 return; 300 301 err = getrlimit(RLIMIT_MEMLOCK, &limit); 302 if (err) 303 return; 304 305 if (limit.rlim_cur == RLIM_INFINITY) 306 return; 307 308 if (limit.rlim_cur < 1024) 309 snprintf(buf, sizeof(buf), "%zu bytes", (size_t)limit.rlim_cur); 310 else if (limit.rlim_cur < 1024*1024) 311 snprintf(buf, sizeof(buf), "%.1f KiB", (double)limit.rlim_cur / 1024); 312 else 313 snprintf(buf, sizeof(buf), "%.1f MiB", (double)limit.rlim_cur / (1024*1024)); 314 315 pr_warn("permission error while running as root; try raising 'ulimit -l'? current value: %s\n", 316 buf); 317 } 318 319 #define STRERR_BUFSIZE 128 320 321 /* Copied from tools/perf/util/util.h */ 322 #ifndef zfree 323 # define zfree(ptr) ({ free(*ptr); *ptr = NULL; }) 324 #endif 325 326 #ifndef zclose 327 # define zclose(fd) ({ \ 328 int ___err = 0; \ 329 if ((fd) >= 0) \ 330 ___err = close((fd)); \ 331 fd = -1; \ 332 ___err; }) 333 #endif 334 335 static inline __u64 ptr_to_u64(const void *ptr) 336 { 337 return (__u64) (unsigned long) ptr; 338 } 339 340 int libbpf_set_strict_mode(enum libbpf_strict_mode mode) 341 { 342 /* as of v1.0 libbpf_set_strict_mode() is a no-op */ 343 return 0; 344 } 345 346 __u32 libbpf_major_version(void) 347 { 348 return LIBBPF_MAJOR_VERSION; 349 } 350 351 __u32 libbpf_minor_version(void) 352 { 353 return LIBBPF_MINOR_VERSION; 354 } 355 356 const char *libbpf_version_string(void) 357 { 358 #define __S(X) #X 359 #define _S(X) __S(X) 360 return "v" _S(LIBBPF_MAJOR_VERSION) "." _S(LIBBPF_MINOR_VERSION); 361 #undef _S 362 #undef __S 363 } 364 365 enum reloc_type { 366 RELO_LD64, 367 RELO_CALL, 368 RELO_DATA, 369 RELO_EXTERN_LD64, 370 RELO_EXTERN_CALL, 371 RELO_SUBPROG_ADDR, 372 RELO_CORE, 373 }; 374 375 struct reloc_desc { 376 enum reloc_type type; 377 int insn_idx; 378 union { 379 const struct bpf_core_relo *core_relo; /* used when type == RELO_CORE */ 380 struct { 381 int map_idx; 382 int sym_off; 383 int ext_idx; 384 }; 385 }; 386 }; 387 388 /* stored as sec_def->cookie for all libbpf-supported SEC()s */ 389 enum sec_def_flags { 390 SEC_NONE = 0, 391 /* expected_attach_type is optional, if kernel doesn't support that */ 392 SEC_EXP_ATTACH_OPT = 1, 393 /* legacy, only used by libbpf_get_type_names() and 394 * libbpf_attach_type_by_name(), not used by libbpf itself at all. 395 * This used to be associated with cgroup (and few other) BPF programs 396 * that were attachable through BPF_PROG_ATTACH command. Pretty 397 * meaningless nowadays, though. 398 */ 399 SEC_ATTACHABLE = 2, 400 SEC_ATTACHABLE_OPT = SEC_ATTACHABLE | SEC_EXP_ATTACH_OPT, 401 /* attachment target is specified through BTF ID in either kernel or 402 * other BPF program's BTF object 403 */ 404 SEC_ATTACH_BTF = 4, 405 /* BPF program type allows sleeping/blocking in kernel */ 406 SEC_SLEEPABLE = 8, 407 /* BPF program support non-linear XDP buffer */ 408 SEC_XDP_FRAGS = 16, 409 /* Setup proper attach type for usdt probes. */ 410 SEC_USDT = 32, 411 }; 412 413 struct bpf_sec_def { 414 char *sec; 415 enum bpf_prog_type prog_type; 416 enum bpf_attach_type expected_attach_type; 417 long cookie; 418 int handler_id; 419 420 libbpf_prog_setup_fn_t prog_setup_fn; 421 libbpf_prog_prepare_load_fn_t prog_prepare_load_fn; 422 libbpf_prog_attach_fn_t prog_attach_fn; 423 }; 424 425 /* 426 * bpf_prog should be a better name but it has been used in 427 * linux/filter.h. 428 */ 429 struct bpf_program { 430 char *name; 431 char *sec_name; 432 size_t sec_idx; 433 const struct bpf_sec_def *sec_def; 434 /* this program's instruction offset (in number of instructions) 435 * within its containing ELF section 436 */ 437 size_t sec_insn_off; 438 /* number of original instructions in ELF section belonging to this 439 * program, not taking into account subprogram instructions possible 440 * appended later during relocation 441 */ 442 size_t sec_insn_cnt; 443 /* Offset (in number of instructions) of the start of instruction 444 * belonging to this BPF program within its containing main BPF 445 * program. For the entry-point (main) BPF program, this is always 446 * zero. For a sub-program, this gets reset before each of main BPF 447 * programs are processed and relocated and is used to determined 448 * whether sub-program was already appended to the main program, and 449 * if yes, at which instruction offset. 450 */ 451 size_t sub_insn_off; 452 453 /* instructions that belong to BPF program; insns[0] is located at 454 * sec_insn_off instruction within its ELF section in ELF file, so 455 * when mapping ELF file instruction index to the local instruction, 456 * one needs to subtract sec_insn_off; and vice versa. 457 */ 458 struct bpf_insn *insns; 459 /* actual number of instruction in this BPF program's image; for 460 * entry-point BPF programs this includes the size of main program 461 * itself plus all the used sub-programs, appended at the end 462 */ 463 size_t insns_cnt; 464 465 struct reloc_desc *reloc_desc; 466 int nr_reloc; 467 468 /* BPF verifier log settings */ 469 char *log_buf; 470 size_t log_size; 471 __u32 log_level; 472 473 struct bpf_object *obj; 474 475 int fd; 476 bool autoload; 477 bool autoattach; 478 bool sym_global; 479 bool mark_btf_static; 480 enum bpf_prog_type type; 481 enum bpf_attach_type expected_attach_type; 482 int exception_cb_idx; 483 484 int prog_ifindex; 485 __u32 attach_btf_obj_fd; 486 __u32 attach_btf_id; 487 __u32 attach_prog_fd; 488 489 void *func_info; 490 __u32 func_info_rec_size; 491 __u32 func_info_cnt; 492 493 void *line_info; 494 __u32 line_info_rec_size; 495 __u32 line_info_cnt; 496 __u32 prog_flags; 497 }; 498 499 struct bpf_struct_ops { 500 struct bpf_program **progs; 501 __u32 *kern_func_off; 502 /* e.g. struct tcp_congestion_ops in bpf_prog's btf format */ 503 void *data; 504 /* e.g. struct bpf_struct_ops_tcp_congestion_ops in 505 * btf_vmlinux's format. 506 * struct bpf_struct_ops_tcp_congestion_ops { 507 * [... some other kernel fields ...] 508 * struct tcp_congestion_ops data; 509 * } 510 * kern_vdata-size == sizeof(struct bpf_struct_ops_tcp_congestion_ops) 511 * bpf_map__init_kern_struct_ops() will populate the "kern_vdata" 512 * from "data". 513 */ 514 void *kern_vdata; 515 __u32 type_id; 516 }; 517 518 #define DATA_SEC ".data" 519 #define BSS_SEC ".bss" 520 #define RODATA_SEC ".rodata" 521 #define KCONFIG_SEC ".kconfig" 522 #define KSYMS_SEC ".ksyms" 523 #define STRUCT_OPS_SEC ".struct_ops" 524 #define STRUCT_OPS_LINK_SEC ".struct_ops.link" 525 #define ARENA_SEC ".addr_space.1" 526 527 enum libbpf_map_type { 528 LIBBPF_MAP_UNSPEC, 529 LIBBPF_MAP_DATA, 530 LIBBPF_MAP_BSS, 531 LIBBPF_MAP_RODATA, 532 LIBBPF_MAP_KCONFIG, 533 }; 534 535 struct bpf_map_def { 536 unsigned int type; 537 unsigned int key_size; 538 unsigned int value_size; 539 unsigned int max_entries; 540 unsigned int map_flags; 541 }; 542 543 struct bpf_map { 544 struct bpf_object *obj; 545 char *name; 546 /* real_name is defined for special internal maps (.rodata*, 547 * .data*, .bss, .kconfig) and preserves their original ELF section 548 * name. This is important to be able to find corresponding BTF 549 * DATASEC information. 550 */ 551 char *real_name; 552 int fd; 553 int sec_idx; 554 size_t sec_offset; 555 int map_ifindex; 556 int inner_map_fd; 557 struct bpf_map_def def; 558 __u32 numa_node; 559 __u32 btf_var_idx; 560 int mod_btf_fd; 561 __u32 btf_key_type_id; 562 __u32 btf_value_type_id; 563 __u32 btf_vmlinux_value_type_id; 564 enum libbpf_map_type libbpf_type; 565 void *mmaped; 566 struct bpf_struct_ops *st_ops; 567 struct bpf_map *inner_map; 568 void **init_slots; 569 int init_slots_sz; 570 char *pin_path; 571 bool pinned; 572 bool reused; 573 bool autocreate; 574 bool autoattach; 575 __u64 map_extra; 576 }; 577 578 enum extern_type { 579 EXT_UNKNOWN, 580 EXT_KCFG, 581 EXT_KSYM, 582 }; 583 584 enum kcfg_type { 585 KCFG_UNKNOWN, 586 KCFG_CHAR, 587 KCFG_BOOL, 588 KCFG_INT, 589 KCFG_TRISTATE, 590 KCFG_CHAR_ARR, 591 }; 592 593 struct extern_desc { 594 enum extern_type type; 595 int sym_idx; 596 int btf_id; 597 int sec_btf_id; 598 const char *name; 599 char *essent_name; 600 bool is_set; 601 bool is_weak; 602 union { 603 struct { 604 enum kcfg_type type; 605 int sz; 606 int align; 607 int data_off; 608 bool is_signed; 609 } kcfg; 610 struct { 611 unsigned long long addr; 612 613 /* target btf_id of the corresponding kernel var. */ 614 int kernel_btf_obj_fd; 615 int kernel_btf_id; 616 617 /* local btf_id of the ksym extern's type. */ 618 __u32 type_id; 619 /* BTF fd index to be patched in for insn->off, this is 620 * 0 for vmlinux BTF, index in obj->fd_array for module 621 * BTF 622 */ 623 __s16 btf_fd_idx; 624 } ksym; 625 }; 626 }; 627 628 struct module_btf { 629 struct btf *btf; 630 char *name; 631 __u32 id; 632 int fd; 633 int fd_array_idx; 634 }; 635 636 enum sec_type { 637 SEC_UNUSED = 0, 638 SEC_RELO, 639 SEC_BSS, 640 SEC_DATA, 641 SEC_RODATA, 642 SEC_ST_OPS, 643 }; 644 645 struct elf_sec_desc { 646 enum sec_type sec_type; 647 Elf64_Shdr *shdr; 648 Elf_Data *data; 649 }; 650 651 struct elf_state { 652 int fd; 653 const void *obj_buf; 654 size_t obj_buf_sz; 655 Elf *elf; 656 Elf64_Ehdr *ehdr; 657 Elf_Data *symbols; 658 Elf_Data *arena_data; 659 size_t shstrndx; /* section index for section name strings */ 660 size_t strtabidx; 661 struct elf_sec_desc *secs; 662 size_t sec_cnt; 663 int btf_maps_shndx; 664 __u32 btf_maps_sec_btf_id; 665 int text_shndx; 666 int symbols_shndx; 667 bool has_st_ops; 668 int arena_data_shndx; 669 }; 670 671 struct usdt_manager; 672 673 enum bpf_object_state { 674 OBJ_OPEN, 675 OBJ_PREPARED, 676 OBJ_LOADED, 677 }; 678 679 struct bpf_object { 680 char name[BPF_OBJ_NAME_LEN]; 681 char license[64]; 682 __u32 kern_version; 683 684 enum bpf_object_state state; 685 struct bpf_program *programs; 686 size_t nr_programs; 687 struct bpf_map *maps; 688 size_t nr_maps; 689 size_t maps_cap; 690 691 char *kconfig; 692 struct extern_desc *externs; 693 int nr_extern; 694 int kconfig_map_idx; 695 696 bool has_subcalls; 697 bool has_rodata; 698 699 struct bpf_gen *gen_loader; 700 701 /* Information when doing ELF related work. Only valid if efile.elf is not NULL */ 702 struct elf_state efile; 703 704 unsigned char byteorder; 705 706 struct btf *btf; 707 struct btf_ext *btf_ext; 708 709 /* Parse and load BTF vmlinux if any of the programs in the object need 710 * it at load time. 711 */ 712 struct btf *btf_vmlinux; 713 /* Path to the custom BTF to be used for BPF CO-RE relocations as an 714 * override for vmlinux BTF. 715 */ 716 char *btf_custom_path; 717 /* vmlinux BTF override for CO-RE relocations */ 718 struct btf *btf_vmlinux_override; 719 /* Lazily initialized kernel module BTFs */ 720 struct module_btf *btf_modules; 721 bool btf_modules_loaded; 722 size_t btf_module_cnt; 723 size_t btf_module_cap; 724 725 /* optional log settings passed to BPF_BTF_LOAD and BPF_PROG_LOAD commands */ 726 char *log_buf; 727 size_t log_size; 728 __u32 log_level; 729 730 int *fd_array; 731 size_t fd_array_cap; 732 size_t fd_array_cnt; 733 734 struct usdt_manager *usdt_man; 735 736 struct bpf_map *arena_map; 737 void *arena_data; 738 size_t arena_data_sz; 739 740 struct kern_feature_cache *feat_cache; 741 char *token_path; 742 int token_fd; 743 744 char path[]; 745 }; 746 747 static const char *elf_sym_str(const struct bpf_object *obj, size_t off); 748 static const char *elf_sec_str(const struct bpf_object *obj, size_t off); 749 static Elf_Scn *elf_sec_by_idx(const struct bpf_object *obj, size_t idx); 750 static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name); 751 static Elf64_Shdr *elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn); 752 static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn); 753 static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn); 754 static Elf64_Sym *elf_sym_by_idx(const struct bpf_object *obj, size_t idx); 755 static Elf64_Rel *elf_rel_by_idx(Elf_Data *data, size_t idx); 756 757 void bpf_program__unload(struct bpf_program *prog) 758 { 759 if (!prog) 760 return; 761 762 zclose(prog->fd); 763 764 zfree(&prog->func_info); 765 zfree(&prog->line_info); 766 } 767 768 static void bpf_program__exit(struct bpf_program *prog) 769 { 770 if (!prog) 771 return; 772 773 bpf_program__unload(prog); 774 zfree(&prog->name); 775 zfree(&prog->sec_name); 776 zfree(&prog->insns); 777 zfree(&prog->reloc_desc); 778 779 prog->nr_reloc = 0; 780 prog->insns_cnt = 0; 781 prog->sec_idx = -1; 782 } 783 784 static bool insn_is_subprog_call(const struct bpf_insn *insn) 785 { 786 return BPF_CLASS(insn->code) == BPF_JMP && 787 BPF_OP(insn->code) == BPF_CALL && 788 BPF_SRC(insn->code) == BPF_K && 789 insn->src_reg == BPF_PSEUDO_CALL && 790 insn->dst_reg == 0 && 791 insn->off == 0; 792 } 793 794 static bool is_call_insn(const struct bpf_insn *insn) 795 { 796 return insn->code == (BPF_JMP | BPF_CALL); 797 } 798 799 static bool insn_is_pseudo_func(struct bpf_insn *insn) 800 { 801 return is_ldimm64_insn(insn) && insn->src_reg == BPF_PSEUDO_FUNC; 802 } 803 804 static int 805 bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog, 806 const char *name, size_t sec_idx, const char *sec_name, 807 size_t sec_off, void *insn_data, size_t insn_data_sz) 808 { 809 if (insn_data_sz == 0 || insn_data_sz % BPF_INSN_SZ || sec_off % BPF_INSN_SZ) { 810 pr_warn("sec '%s': corrupted program '%s', offset %zu, size %zu\n", 811 sec_name, name, sec_off, insn_data_sz); 812 return -EINVAL; 813 } 814 815 memset(prog, 0, sizeof(*prog)); 816 prog->obj = obj; 817 818 prog->sec_idx = sec_idx; 819 prog->sec_insn_off = sec_off / BPF_INSN_SZ; 820 prog->sec_insn_cnt = insn_data_sz / BPF_INSN_SZ; 821 /* insns_cnt can later be increased by appending used subprograms */ 822 prog->insns_cnt = prog->sec_insn_cnt; 823 824 prog->type = BPF_PROG_TYPE_UNSPEC; 825 prog->fd = -1; 826 prog->exception_cb_idx = -1; 827 828 /* libbpf's convention for SEC("?abc...") is that it's just like 829 * SEC("abc...") but the corresponding bpf_program starts out with 830 * autoload set to false. 831 */ 832 if (sec_name[0] == '?') { 833 prog->autoload = false; 834 /* from now on forget there was ? in section name */ 835 sec_name++; 836 } else { 837 prog->autoload = true; 838 } 839 840 prog->autoattach = true; 841 842 /* inherit object's log_level */ 843 prog->log_level = obj->log_level; 844 845 prog->sec_name = strdup(sec_name); 846 if (!prog->sec_name) 847 goto errout; 848 849 prog->name = strdup(name); 850 if (!prog->name) 851 goto errout; 852 853 prog->insns = malloc(insn_data_sz); 854 if (!prog->insns) 855 goto errout; 856 memcpy(prog->insns, insn_data, insn_data_sz); 857 858 return 0; 859 errout: 860 pr_warn("sec '%s': failed to allocate memory for prog '%s'\n", sec_name, name); 861 bpf_program__exit(prog); 862 return -ENOMEM; 863 } 864 865 static int 866 bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data, 867 const char *sec_name, int sec_idx) 868 { 869 Elf_Data *symbols = obj->efile.symbols; 870 struct bpf_program *prog, *progs; 871 void *data = sec_data->d_buf; 872 size_t sec_sz = sec_data->d_size, sec_off, prog_sz, nr_syms; 873 int nr_progs, err, i; 874 const char *name; 875 Elf64_Sym *sym; 876 877 progs = obj->programs; 878 nr_progs = obj->nr_programs; 879 nr_syms = symbols->d_size / sizeof(Elf64_Sym); 880 881 for (i = 0; i < nr_syms; i++) { 882 sym = elf_sym_by_idx(obj, i); 883 884 if (sym->st_shndx != sec_idx) 885 continue; 886 if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC) 887 continue; 888 889 prog_sz = sym->st_size; 890 sec_off = sym->st_value; 891 892 name = elf_sym_str(obj, sym->st_name); 893 if (!name) { 894 pr_warn("sec '%s': failed to get symbol name for offset %zu\n", 895 sec_name, sec_off); 896 return -LIBBPF_ERRNO__FORMAT; 897 } 898 899 if (sec_off + prog_sz > sec_sz) { 900 pr_warn("sec '%s': program at offset %zu crosses section boundary\n", 901 sec_name, sec_off); 902 return -LIBBPF_ERRNO__FORMAT; 903 } 904 905 if (sec_idx != obj->efile.text_shndx && ELF64_ST_BIND(sym->st_info) == STB_LOCAL) { 906 pr_warn("sec '%s': program '%s' is static and not supported\n", sec_name, name); 907 return -ENOTSUP; 908 } 909 910 pr_debug("sec '%s': found program '%s' at insn offset %zu (%zu bytes), code size %zu insns (%zu bytes)\n", 911 sec_name, name, sec_off / BPF_INSN_SZ, sec_off, prog_sz / BPF_INSN_SZ, prog_sz); 912 913 progs = libbpf_reallocarray(progs, nr_progs + 1, sizeof(*progs)); 914 if (!progs) { 915 /* 916 * In this case the original obj->programs 917 * is still valid, so don't need special treat for 918 * bpf_close_object(). 919 */ 920 pr_warn("sec '%s': failed to alloc memory for new program '%s'\n", 921 sec_name, name); 922 return -ENOMEM; 923 } 924 obj->programs = progs; 925 926 prog = &progs[nr_progs]; 927 928 err = bpf_object__init_prog(obj, prog, name, sec_idx, sec_name, 929 sec_off, data + sec_off, prog_sz); 930 if (err) 931 return err; 932 933 if (ELF64_ST_BIND(sym->st_info) != STB_LOCAL) 934 prog->sym_global = true; 935 936 /* if function is a global/weak symbol, but has restricted 937 * (STV_HIDDEN or STV_INTERNAL) visibility, mark its BTF FUNC 938 * as static to enable more permissive BPF verification mode 939 * with more outside context available to BPF verifier 940 */ 941 if (prog->sym_global && (ELF64_ST_VISIBILITY(sym->st_other) == STV_HIDDEN 942 || ELF64_ST_VISIBILITY(sym->st_other) == STV_INTERNAL)) 943 prog->mark_btf_static = true; 944 945 nr_progs++; 946 obj->nr_programs = nr_progs; 947 } 948 949 return 0; 950 } 951 952 static void bpf_object_bswap_progs(struct bpf_object *obj) 953 { 954 struct bpf_program *prog = obj->programs; 955 struct bpf_insn *insn; 956 int p, i; 957 958 for (p = 0; p < obj->nr_programs; p++, prog++) { 959 insn = prog->insns; 960 for (i = 0; i < prog->insns_cnt; i++, insn++) 961 bpf_insn_bswap(insn); 962 } 963 pr_debug("converted %zu BPF programs to native byte order\n", obj->nr_programs); 964 } 965 966 static const struct btf_member * 967 find_member_by_offset(const struct btf_type *t, __u32 bit_offset) 968 { 969 struct btf_member *m; 970 int i; 971 972 for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) { 973 if (btf_member_bit_offset(t, i) == bit_offset) 974 return m; 975 } 976 977 return NULL; 978 } 979 980 static const struct btf_member * 981 find_member_by_name(const struct btf *btf, const struct btf_type *t, 982 const char *name) 983 { 984 struct btf_member *m; 985 int i; 986 987 for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) { 988 if (!strcmp(btf__name_by_offset(btf, m->name_off), name)) 989 return m; 990 } 991 992 return NULL; 993 } 994 995 static int find_ksym_btf_id(struct bpf_object *obj, const char *ksym_name, 996 __u16 kind, struct btf **res_btf, 997 struct module_btf **res_mod_btf); 998 999 #define STRUCT_OPS_VALUE_PREFIX "bpf_struct_ops_" 1000 static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix, 1001 const char *name, __u32 kind); 1002 1003 static int 1004 find_struct_ops_kern_types(struct bpf_object *obj, const char *tname_raw, 1005 struct module_btf **mod_btf, 1006 const struct btf_type **type, __u32 *type_id, 1007 const struct btf_type **vtype, __u32 *vtype_id, 1008 const struct btf_member **data_member) 1009 { 1010 const struct btf_type *kern_type, *kern_vtype; 1011 const struct btf_member *kern_data_member; 1012 struct btf *btf = NULL; 1013 __s32 kern_vtype_id, kern_type_id; 1014 char tname[256]; 1015 __u32 i; 1016 1017 snprintf(tname, sizeof(tname), "%.*s", 1018 (int)bpf_core_essential_name_len(tname_raw), tname_raw); 1019 1020 kern_type_id = find_ksym_btf_id(obj, tname, BTF_KIND_STRUCT, 1021 &btf, mod_btf); 1022 if (kern_type_id < 0) { 1023 pr_warn("struct_ops init_kern: struct %s is not found in kernel BTF\n", 1024 tname); 1025 return kern_type_id; 1026 } 1027 kern_type = btf__type_by_id(btf, kern_type_id); 1028 1029 /* Find the corresponding "map_value" type that will be used 1030 * in map_update(BPF_MAP_TYPE_STRUCT_OPS). For example, 1031 * find "struct bpf_struct_ops_tcp_congestion_ops" from the 1032 * btf_vmlinux. 1033 */ 1034 kern_vtype_id = find_btf_by_prefix_kind(btf, STRUCT_OPS_VALUE_PREFIX, 1035 tname, BTF_KIND_STRUCT); 1036 if (kern_vtype_id < 0) { 1037 pr_warn("struct_ops init_kern: struct %s%s is not found in kernel BTF\n", 1038 STRUCT_OPS_VALUE_PREFIX, tname); 1039 return kern_vtype_id; 1040 } 1041 kern_vtype = btf__type_by_id(btf, kern_vtype_id); 1042 1043 /* Find "struct tcp_congestion_ops" from 1044 * struct bpf_struct_ops_tcp_congestion_ops { 1045 * [ ... ] 1046 * struct tcp_congestion_ops data; 1047 * } 1048 */ 1049 kern_data_member = btf_members(kern_vtype); 1050 for (i = 0; i < btf_vlen(kern_vtype); i++, kern_data_member++) { 1051 if (kern_data_member->type == kern_type_id) 1052 break; 1053 } 1054 if (i == btf_vlen(kern_vtype)) { 1055 pr_warn("struct_ops init_kern: struct %s data is not found in struct %s%s\n", 1056 tname, STRUCT_OPS_VALUE_PREFIX, tname); 1057 return -EINVAL; 1058 } 1059 1060 *type = kern_type; 1061 *type_id = kern_type_id; 1062 *vtype = kern_vtype; 1063 *vtype_id = kern_vtype_id; 1064 *data_member = kern_data_member; 1065 1066 return 0; 1067 } 1068 1069 static bool bpf_map__is_struct_ops(const struct bpf_map *map) 1070 { 1071 return map->def.type == BPF_MAP_TYPE_STRUCT_OPS; 1072 } 1073 1074 static bool is_valid_st_ops_program(struct bpf_object *obj, 1075 const struct bpf_program *prog) 1076 { 1077 int i; 1078 1079 for (i = 0; i < obj->nr_programs; i++) { 1080 if (&obj->programs[i] == prog) 1081 return prog->type == BPF_PROG_TYPE_STRUCT_OPS; 1082 } 1083 1084 return false; 1085 } 1086 1087 /* For each struct_ops program P, referenced from some struct_ops map M, 1088 * enable P.autoload if there are Ms for which M.autocreate is true, 1089 * disable P.autoload if for all Ms M.autocreate is false. 1090 * Don't change P.autoload for programs that are not referenced from any maps. 1091 */ 1092 static int bpf_object_adjust_struct_ops_autoload(struct bpf_object *obj) 1093 { 1094 struct bpf_program *prog, *slot_prog; 1095 struct bpf_map *map; 1096 int i, j, k, vlen; 1097 1098 for (i = 0; i < obj->nr_programs; ++i) { 1099 int should_load = false; 1100 int use_cnt = 0; 1101 1102 prog = &obj->programs[i]; 1103 if (prog->type != BPF_PROG_TYPE_STRUCT_OPS) 1104 continue; 1105 1106 for (j = 0; j < obj->nr_maps; ++j) { 1107 const struct btf_type *type; 1108 1109 map = &obj->maps[j]; 1110 if (!bpf_map__is_struct_ops(map)) 1111 continue; 1112 1113 type = btf__type_by_id(obj->btf, map->st_ops->type_id); 1114 vlen = btf_vlen(type); 1115 for (k = 0; k < vlen; ++k) { 1116 slot_prog = map->st_ops->progs[k]; 1117 if (prog != slot_prog) 1118 continue; 1119 1120 use_cnt++; 1121 if (map->autocreate) 1122 should_load = true; 1123 } 1124 } 1125 if (use_cnt) 1126 prog->autoload = should_load; 1127 } 1128 1129 return 0; 1130 } 1131 1132 /* Init the map's fields that depend on kern_btf */ 1133 static int bpf_map__init_kern_struct_ops(struct bpf_map *map) 1134 { 1135 const struct btf_member *member, *kern_member, *kern_data_member; 1136 const struct btf_type *type, *kern_type, *kern_vtype; 1137 __u32 i, kern_type_id, kern_vtype_id, kern_data_off; 1138 struct bpf_object *obj = map->obj; 1139 const struct btf *btf = obj->btf; 1140 struct bpf_struct_ops *st_ops; 1141 const struct btf *kern_btf; 1142 struct module_btf *mod_btf = NULL; 1143 void *data, *kern_data; 1144 const char *tname; 1145 int err; 1146 1147 st_ops = map->st_ops; 1148 type = btf__type_by_id(btf, st_ops->type_id); 1149 tname = btf__name_by_offset(btf, type->name_off); 1150 err = find_struct_ops_kern_types(obj, tname, &mod_btf, 1151 &kern_type, &kern_type_id, 1152 &kern_vtype, &kern_vtype_id, 1153 &kern_data_member); 1154 if (err) 1155 return err; 1156 1157 kern_btf = mod_btf ? mod_btf->btf : obj->btf_vmlinux; 1158 1159 pr_debug("struct_ops init_kern %s: type_id:%u kern_type_id:%u kern_vtype_id:%u\n", 1160 map->name, st_ops->type_id, kern_type_id, kern_vtype_id); 1161 1162 map->mod_btf_fd = mod_btf ? mod_btf->fd : -1; 1163 map->def.value_size = kern_vtype->size; 1164 map->btf_vmlinux_value_type_id = kern_vtype_id; 1165 1166 st_ops->kern_vdata = calloc(1, kern_vtype->size); 1167 if (!st_ops->kern_vdata) 1168 return -ENOMEM; 1169 1170 data = st_ops->data; 1171 kern_data_off = kern_data_member->offset / 8; 1172 kern_data = st_ops->kern_vdata + kern_data_off; 1173 1174 member = btf_members(type); 1175 for (i = 0; i < btf_vlen(type); i++, member++) { 1176 const struct btf_type *mtype, *kern_mtype; 1177 __u32 mtype_id, kern_mtype_id; 1178 void *mdata, *kern_mdata; 1179 struct bpf_program *prog; 1180 __s64 msize, kern_msize; 1181 __u32 moff, kern_moff; 1182 __u32 kern_member_idx; 1183 const char *mname; 1184 1185 mname = btf__name_by_offset(btf, member->name_off); 1186 moff = member->offset / 8; 1187 mdata = data + moff; 1188 msize = btf__resolve_size(btf, member->type); 1189 if (msize < 0) { 1190 pr_warn("struct_ops init_kern %s: failed to resolve the size of member %s\n", 1191 map->name, mname); 1192 return msize; 1193 } 1194 1195 kern_member = find_member_by_name(kern_btf, kern_type, mname); 1196 if (!kern_member) { 1197 if (!libbpf_is_mem_zeroed(mdata, msize)) { 1198 pr_warn("struct_ops init_kern %s: Cannot find member %s in kernel BTF\n", 1199 map->name, mname); 1200 return -ENOTSUP; 1201 } 1202 1203 if (st_ops->progs[i]) { 1204 /* If we had declaratively set struct_ops callback, we need to 1205 * force its autoload to false, because it doesn't have 1206 * a chance of succeeding from POV of the current struct_ops map. 1207 * If this program is still referenced somewhere else, though, 1208 * then bpf_object_adjust_struct_ops_autoload() will update its 1209 * autoload accordingly. 1210 */ 1211 st_ops->progs[i]->autoload = false; 1212 st_ops->progs[i] = NULL; 1213 } 1214 1215 /* Skip all-zero/NULL fields if they are not present in the kernel BTF */ 1216 pr_info("struct_ops %s: member %s not found in kernel, skipping it as it's set to zero\n", 1217 map->name, mname); 1218 continue; 1219 } 1220 1221 kern_member_idx = kern_member - btf_members(kern_type); 1222 if (btf_member_bitfield_size(type, i) || 1223 btf_member_bitfield_size(kern_type, kern_member_idx)) { 1224 pr_warn("struct_ops init_kern %s: bitfield %s is not supported\n", 1225 map->name, mname); 1226 return -ENOTSUP; 1227 } 1228 1229 kern_moff = kern_member->offset / 8; 1230 kern_mdata = kern_data + kern_moff; 1231 1232 mtype = skip_mods_and_typedefs(btf, member->type, &mtype_id); 1233 kern_mtype = skip_mods_and_typedefs(kern_btf, kern_member->type, 1234 &kern_mtype_id); 1235 if (BTF_INFO_KIND(mtype->info) != 1236 BTF_INFO_KIND(kern_mtype->info)) { 1237 pr_warn("struct_ops init_kern %s: Unmatched member type %s %u != %u(kernel)\n", 1238 map->name, mname, BTF_INFO_KIND(mtype->info), 1239 BTF_INFO_KIND(kern_mtype->info)); 1240 return -ENOTSUP; 1241 } 1242 1243 if (btf_is_ptr(mtype)) { 1244 prog = *(void **)mdata; 1245 /* just like for !kern_member case above, reset declaratively 1246 * set (at compile time) program's autload to false, 1247 * if user replaced it with another program or NULL 1248 */ 1249 if (st_ops->progs[i] && st_ops->progs[i] != prog) 1250 st_ops->progs[i]->autoload = false; 1251 1252 /* Update the value from the shadow type */ 1253 st_ops->progs[i] = prog; 1254 if (!prog) 1255 continue; 1256 1257 if (!is_valid_st_ops_program(obj, prog)) { 1258 pr_warn("struct_ops init_kern %s: member %s is not a struct_ops program\n", 1259 map->name, mname); 1260 return -ENOTSUP; 1261 } 1262 1263 kern_mtype = skip_mods_and_typedefs(kern_btf, 1264 kern_mtype->type, 1265 &kern_mtype_id); 1266 1267 /* mtype->type must be a func_proto which was 1268 * guaranteed in bpf_object__collect_st_ops_relos(), 1269 * so only check kern_mtype for func_proto here. 1270 */ 1271 if (!btf_is_func_proto(kern_mtype)) { 1272 pr_warn("struct_ops init_kern %s: kernel member %s is not a func ptr\n", 1273 map->name, mname); 1274 return -ENOTSUP; 1275 } 1276 1277 if (mod_btf) 1278 prog->attach_btf_obj_fd = mod_btf->fd; 1279 1280 /* if we haven't yet processed this BPF program, record proper 1281 * attach_btf_id and member_idx 1282 */ 1283 if (!prog->attach_btf_id) { 1284 prog->attach_btf_id = kern_type_id; 1285 prog->expected_attach_type = kern_member_idx; 1286 } 1287 1288 /* struct_ops BPF prog can be re-used between multiple 1289 * .struct_ops & .struct_ops.link as long as it's the 1290 * same struct_ops struct definition and the same 1291 * function pointer field 1292 */ 1293 if (prog->attach_btf_id != kern_type_id) { 1294 pr_warn("struct_ops init_kern %s func ptr %s: invalid reuse of prog %s in sec %s with type %u: attach_btf_id %u != kern_type_id %u\n", 1295 map->name, mname, prog->name, prog->sec_name, prog->type, 1296 prog->attach_btf_id, kern_type_id); 1297 return -EINVAL; 1298 } 1299 if (prog->expected_attach_type != kern_member_idx) { 1300 pr_warn("struct_ops init_kern %s func ptr %s: invalid reuse of prog %s in sec %s with type %u: expected_attach_type %u != kern_member_idx %u\n", 1301 map->name, mname, prog->name, prog->sec_name, prog->type, 1302 prog->expected_attach_type, kern_member_idx); 1303 return -EINVAL; 1304 } 1305 1306 st_ops->kern_func_off[i] = kern_data_off + kern_moff; 1307 1308 pr_debug("struct_ops init_kern %s: func ptr %s is set to prog %s from data(+%u) to kern_data(+%u)\n", 1309 map->name, mname, prog->name, moff, 1310 kern_moff); 1311 1312 continue; 1313 } 1314 1315 kern_msize = btf__resolve_size(kern_btf, kern_mtype_id); 1316 if (kern_msize < 0 || msize != kern_msize) { 1317 pr_warn("struct_ops init_kern %s: Error in size of member %s: %zd != %zd(kernel)\n", 1318 map->name, mname, (ssize_t)msize, 1319 (ssize_t)kern_msize); 1320 return -ENOTSUP; 1321 } 1322 1323 pr_debug("struct_ops init_kern %s: copy %s %u bytes from data(+%u) to kern_data(+%u)\n", 1324 map->name, mname, (unsigned int)msize, 1325 moff, kern_moff); 1326 memcpy(kern_mdata, mdata, msize); 1327 } 1328 1329 return 0; 1330 } 1331 1332 static int bpf_object__init_kern_struct_ops_maps(struct bpf_object *obj) 1333 { 1334 struct bpf_map *map; 1335 size_t i; 1336 int err; 1337 1338 for (i = 0; i < obj->nr_maps; i++) { 1339 map = &obj->maps[i]; 1340 1341 if (!bpf_map__is_struct_ops(map)) 1342 continue; 1343 1344 if (!map->autocreate) 1345 continue; 1346 1347 err = bpf_map__init_kern_struct_ops(map); 1348 if (err) 1349 return err; 1350 } 1351 1352 return 0; 1353 } 1354 1355 static int init_struct_ops_maps(struct bpf_object *obj, const char *sec_name, 1356 int shndx, Elf_Data *data) 1357 { 1358 const struct btf_type *type, *datasec; 1359 const struct btf_var_secinfo *vsi; 1360 struct bpf_struct_ops *st_ops; 1361 const char *tname, *var_name; 1362 __s32 type_id, datasec_id; 1363 const struct btf *btf; 1364 struct bpf_map *map; 1365 __u32 i; 1366 1367 if (shndx == -1) 1368 return 0; 1369 1370 btf = obj->btf; 1371 datasec_id = btf__find_by_name_kind(btf, sec_name, 1372 BTF_KIND_DATASEC); 1373 if (datasec_id < 0) { 1374 pr_warn("struct_ops init: DATASEC %s not found\n", 1375 sec_name); 1376 return -EINVAL; 1377 } 1378 1379 datasec = btf__type_by_id(btf, datasec_id); 1380 vsi = btf_var_secinfos(datasec); 1381 for (i = 0; i < btf_vlen(datasec); i++, vsi++) { 1382 type = btf__type_by_id(obj->btf, vsi->type); 1383 var_name = btf__name_by_offset(obj->btf, type->name_off); 1384 1385 type_id = btf__resolve_type(obj->btf, vsi->type); 1386 if (type_id < 0) { 1387 pr_warn("struct_ops init: Cannot resolve var type_id %u in DATASEC %s\n", 1388 vsi->type, sec_name); 1389 return -EINVAL; 1390 } 1391 1392 type = btf__type_by_id(obj->btf, type_id); 1393 tname = btf__name_by_offset(obj->btf, type->name_off); 1394 if (!tname[0]) { 1395 pr_warn("struct_ops init: anonymous type is not supported\n"); 1396 return -ENOTSUP; 1397 } 1398 if (!btf_is_struct(type)) { 1399 pr_warn("struct_ops init: %s is not a struct\n", tname); 1400 return -EINVAL; 1401 } 1402 1403 map = bpf_object__add_map(obj); 1404 if (IS_ERR(map)) 1405 return PTR_ERR(map); 1406 1407 map->sec_idx = shndx; 1408 map->sec_offset = vsi->offset; 1409 map->name = strdup(var_name); 1410 if (!map->name) 1411 return -ENOMEM; 1412 map->btf_value_type_id = type_id; 1413 1414 /* Follow same convention as for programs autoload: 1415 * SEC("?.struct_ops") means map is not created by default. 1416 */ 1417 if (sec_name[0] == '?') { 1418 map->autocreate = false; 1419 /* from now on forget there was ? in section name */ 1420 sec_name++; 1421 } 1422 1423 map->def.type = BPF_MAP_TYPE_STRUCT_OPS; 1424 map->def.key_size = sizeof(int); 1425 map->def.value_size = type->size; 1426 map->def.max_entries = 1; 1427 map->def.map_flags = strcmp(sec_name, STRUCT_OPS_LINK_SEC) == 0 ? BPF_F_LINK : 0; 1428 map->autoattach = true; 1429 1430 map->st_ops = calloc(1, sizeof(*map->st_ops)); 1431 if (!map->st_ops) 1432 return -ENOMEM; 1433 st_ops = map->st_ops; 1434 st_ops->data = malloc(type->size); 1435 st_ops->progs = calloc(btf_vlen(type), sizeof(*st_ops->progs)); 1436 st_ops->kern_func_off = malloc(btf_vlen(type) * 1437 sizeof(*st_ops->kern_func_off)); 1438 if (!st_ops->data || !st_ops->progs || !st_ops->kern_func_off) 1439 return -ENOMEM; 1440 1441 if (vsi->offset + type->size > data->d_size) { 1442 pr_warn("struct_ops init: var %s is beyond the end of DATASEC %s\n", 1443 var_name, sec_name); 1444 return -EINVAL; 1445 } 1446 1447 memcpy(st_ops->data, 1448 data->d_buf + vsi->offset, 1449 type->size); 1450 st_ops->type_id = type_id; 1451 1452 pr_debug("struct_ops init: struct %s(type_id=%u) %s found at offset %u\n", 1453 tname, type_id, var_name, vsi->offset); 1454 } 1455 1456 return 0; 1457 } 1458 1459 static int bpf_object_init_struct_ops(struct bpf_object *obj) 1460 { 1461 const char *sec_name; 1462 int sec_idx, err; 1463 1464 for (sec_idx = 0; sec_idx < obj->efile.sec_cnt; ++sec_idx) { 1465 struct elf_sec_desc *desc = &obj->efile.secs[sec_idx]; 1466 1467 if (desc->sec_type != SEC_ST_OPS) 1468 continue; 1469 1470 sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx)); 1471 if (!sec_name) 1472 return -LIBBPF_ERRNO__FORMAT; 1473 1474 err = init_struct_ops_maps(obj, sec_name, sec_idx, desc->data); 1475 if (err) 1476 return err; 1477 } 1478 1479 return 0; 1480 } 1481 1482 static struct bpf_object *bpf_object__new(const char *path, 1483 const void *obj_buf, 1484 size_t obj_buf_sz, 1485 const char *obj_name) 1486 { 1487 struct bpf_object *obj; 1488 char *end; 1489 1490 obj = calloc(1, sizeof(struct bpf_object) + strlen(path) + 1); 1491 if (!obj) { 1492 pr_warn("alloc memory failed for %s\n", path); 1493 return ERR_PTR(-ENOMEM); 1494 } 1495 1496 strcpy(obj->path, path); 1497 if (obj_name) { 1498 libbpf_strlcpy(obj->name, obj_name, sizeof(obj->name)); 1499 } else { 1500 /* Using basename() GNU version which doesn't modify arg. */ 1501 libbpf_strlcpy(obj->name, basename((void *)path), sizeof(obj->name)); 1502 end = strchr(obj->name, '.'); 1503 if (end) 1504 *end = 0; 1505 } 1506 1507 obj->efile.fd = -1; 1508 /* 1509 * Caller of this function should also call 1510 * bpf_object__elf_finish() after data collection to return 1511 * obj_buf to user. If not, we should duplicate the buffer to 1512 * avoid user freeing them before elf finish. 1513 */ 1514 obj->efile.obj_buf = obj_buf; 1515 obj->efile.obj_buf_sz = obj_buf_sz; 1516 obj->efile.btf_maps_shndx = -1; 1517 obj->kconfig_map_idx = -1; 1518 1519 obj->kern_version = get_kernel_version(); 1520 obj->state = OBJ_OPEN; 1521 1522 return obj; 1523 } 1524 1525 static void bpf_object__elf_finish(struct bpf_object *obj) 1526 { 1527 if (!obj->efile.elf) 1528 return; 1529 1530 elf_end(obj->efile.elf); 1531 obj->efile.elf = NULL; 1532 obj->efile.ehdr = NULL; 1533 obj->efile.symbols = NULL; 1534 obj->efile.arena_data = NULL; 1535 1536 zfree(&obj->efile.secs); 1537 obj->efile.sec_cnt = 0; 1538 zclose(obj->efile.fd); 1539 obj->efile.obj_buf = NULL; 1540 obj->efile.obj_buf_sz = 0; 1541 } 1542 1543 static int bpf_object__elf_init(struct bpf_object *obj) 1544 { 1545 Elf64_Ehdr *ehdr; 1546 int err = 0; 1547 Elf *elf; 1548 1549 if (obj->efile.elf) { 1550 pr_warn("elf: init internal error\n"); 1551 return -LIBBPF_ERRNO__LIBELF; 1552 } 1553 1554 if (obj->efile.obj_buf_sz > 0) { 1555 /* obj_buf should have been validated by bpf_object__open_mem(). */ 1556 elf = elf_memory((char *)obj->efile.obj_buf, obj->efile.obj_buf_sz); 1557 } else { 1558 obj->efile.fd = open(obj->path, O_RDONLY | O_CLOEXEC); 1559 if (obj->efile.fd < 0) { 1560 err = -errno; 1561 pr_warn("elf: failed to open %s: %s\n", obj->path, errstr(err)); 1562 return err; 1563 } 1564 1565 elf = elf_begin(obj->efile.fd, ELF_C_READ_MMAP, NULL); 1566 } 1567 1568 if (!elf) { 1569 pr_warn("elf: failed to open %s as ELF file: %s\n", obj->path, elf_errmsg(-1)); 1570 err = -LIBBPF_ERRNO__LIBELF; 1571 goto errout; 1572 } 1573 1574 obj->efile.elf = elf; 1575 1576 if (elf_kind(elf) != ELF_K_ELF) { 1577 err = -LIBBPF_ERRNO__FORMAT; 1578 pr_warn("elf: '%s' is not a proper ELF object\n", obj->path); 1579 goto errout; 1580 } 1581 1582 if (gelf_getclass(elf) != ELFCLASS64) { 1583 err = -LIBBPF_ERRNO__FORMAT; 1584 pr_warn("elf: '%s' is not a 64-bit ELF object\n", obj->path); 1585 goto errout; 1586 } 1587 1588 obj->efile.ehdr = ehdr = elf64_getehdr(elf); 1589 if (!obj->efile.ehdr) { 1590 pr_warn("elf: failed to get ELF header from %s: %s\n", obj->path, elf_errmsg(-1)); 1591 err = -LIBBPF_ERRNO__FORMAT; 1592 goto errout; 1593 } 1594 1595 /* Validate ELF object endianness... */ 1596 if (ehdr->e_ident[EI_DATA] != ELFDATA2LSB && 1597 ehdr->e_ident[EI_DATA] != ELFDATA2MSB) { 1598 err = -LIBBPF_ERRNO__ENDIAN; 1599 pr_warn("elf: '%s' has unknown byte order\n", obj->path); 1600 goto errout; 1601 } 1602 /* and save after bpf_object_open() frees ELF data */ 1603 obj->byteorder = ehdr->e_ident[EI_DATA]; 1604 1605 if (elf_getshdrstrndx(elf, &obj->efile.shstrndx)) { 1606 pr_warn("elf: failed to get section names section index for %s: %s\n", 1607 obj->path, elf_errmsg(-1)); 1608 err = -LIBBPF_ERRNO__FORMAT; 1609 goto errout; 1610 } 1611 1612 /* ELF is corrupted/truncated, avoid calling elf_strptr. */ 1613 if (!elf_rawdata(elf_getscn(elf, obj->efile.shstrndx), NULL)) { 1614 pr_warn("elf: failed to get section names strings from %s: %s\n", 1615 obj->path, elf_errmsg(-1)); 1616 err = -LIBBPF_ERRNO__FORMAT; 1617 goto errout; 1618 } 1619 1620 /* Old LLVM set e_machine to EM_NONE */ 1621 if (ehdr->e_type != ET_REL || (ehdr->e_machine && ehdr->e_machine != EM_BPF)) { 1622 pr_warn("elf: %s is not a valid eBPF object file\n", obj->path); 1623 err = -LIBBPF_ERRNO__FORMAT; 1624 goto errout; 1625 } 1626 1627 return 0; 1628 errout: 1629 bpf_object__elf_finish(obj); 1630 return err; 1631 } 1632 1633 static bool is_native_endianness(struct bpf_object *obj) 1634 { 1635 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 1636 return obj->byteorder == ELFDATA2LSB; 1637 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ 1638 return obj->byteorder == ELFDATA2MSB; 1639 #else 1640 # error "Unrecognized __BYTE_ORDER__" 1641 #endif 1642 } 1643 1644 static int 1645 bpf_object__init_license(struct bpf_object *obj, void *data, size_t size) 1646 { 1647 if (!data) { 1648 pr_warn("invalid license section in %s\n", obj->path); 1649 return -LIBBPF_ERRNO__FORMAT; 1650 } 1651 /* libbpf_strlcpy() only copies first N - 1 bytes, so size + 1 won't 1652 * go over allowed ELF data section buffer 1653 */ 1654 libbpf_strlcpy(obj->license, data, min(size + 1, sizeof(obj->license))); 1655 pr_debug("license of %s is %s\n", obj->path, obj->license); 1656 return 0; 1657 } 1658 1659 static int 1660 bpf_object__init_kversion(struct bpf_object *obj, void *data, size_t size) 1661 { 1662 __u32 kver; 1663 1664 if (!data || size != sizeof(kver)) { 1665 pr_warn("invalid kver section in %s\n", obj->path); 1666 return -LIBBPF_ERRNO__FORMAT; 1667 } 1668 memcpy(&kver, data, sizeof(kver)); 1669 obj->kern_version = kver; 1670 pr_debug("kernel version of %s is %x\n", obj->path, obj->kern_version); 1671 return 0; 1672 } 1673 1674 static bool bpf_map_type__is_map_in_map(enum bpf_map_type type) 1675 { 1676 if (type == BPF_MAP_TYPE_ARRAY_OF_MAPS || 1677 type == BPF_MAP_TYPE_HASH_OF_MAPS) 1678 return true; 1679 return false; 1680 } 1681 1682 static int find_elf_sec_sz(const struct bpf_object *obj, const char *name, __u32 *size) 1683 { 1684 Elf_Data *data; 1685 Elf_Scn *scn; 1686 1687 if (!name) 1688 return -EINVAL; 1689 1690 scn = elf_sec_by_name(obj, name); 1691 data = elf_sec_data(obj, scn); 1692 if (data) { 1693 *size = data->d_size; 1694 return 0; /* found it */ 1695 } 1696 1697 return -ENOENT; 1698 } 1699 1700 static Elf64_Sym *find_elf_var_sym(const struct bpf_object *obj, const char *name) 1701 { 1702 Elf_Data *symbols = obj->efile.symbols; 1703 const char *sname; 1704 size_t si; 1705 1706 for (si = 0; si < symbols->d_size / sizeof(Elf64_Sym); si++) { 1707 Elf64_Sym *sym = elf_sym_by_idx(obj, si); 1708 1709 if (ELF64_ST_TYPE(sym->st_info) != STT_OBJECT) 1710 continue; 1711 1712 if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL && 1713 ELF64_ST_BIND(sym->st_info) != STB_WEAK) 1714 continue; 1715 1716 sname = elf_sym_str(obj, sym->st_name); 1717 if (!sname) { 1718 pr_warn("failed to get sym name string for var %s\n", name); 1719 return ERR_PTR(-EIO); 1720 } 1721 if (strcmp(name, sname) == 0) 1722 return sym; 1723 } 1724 1725 return ERR_PTR(-ENOENT); 1726 } 1727 1728 /* Some versions of Android don't provide memfd_create() in their libc 1729 * implementation, so avoid complications and just go straight to Linux 1730 * syscall. 1731 */ 1732 static int sys_memfd_create(const char *name, unsigned flags) 1733 { 1734 return syscall(__NR_memfd_create, name, flags); 1735 } 1736 1737 #ifndef MFD_CLOEXEC 1738 #define MFD_CLOEXEC 0x0001U 1739 #endif 1740 #ifndef MFD_NOEXEC_SEAL 1741 #define MFD_NOEXEC_SEAL 0x0008U 1742 #endif 1743 1744 static int create_placeholder_fd(void) 1745 { 1746 unsigned int flags = MFD_CLOEXEC | MFD_NOEXEC_SEAL; 1747 const char *name = "libbpf-placeholder-fd"; 1748 int fd; 1749 1750 fd = ensure_good_fd(sys_memfd_create(name, flags)); 1751 if (fd >= 0) 1752 return fd; 1753 else if (errno != EINVAL) 1754 return -errno; 1755 1756 /* Possibly running on kernel without MFD_NOEXEC_SEAL */ 1757 fd = ensure_good_fd(sys_memfd_create(name, flags & ~MFD_NOEXEC_SEAL)); 1758 if (fd < 0) 1759 return -errno; 1760 return fd; 1761 } 1762 1763 static struct bpf_map *bpf_object__add_map(struct bpf_object *obj) 1764 { 1765 struct bpf_map *map; 1766 int err; 1767 1768 err = libbpf_ensure_mem((void **)&obj->maps, &obj->maps_cap, 1769 sizeof(*obj->maps), obj->nr_maps + 1); 1770 if (err) 1771 return ERR_PTR(err); 1772 1773 map = &obj->maps[obj->nr_maps++]; 1774 map->obj = obj; 1775 /* Preallocate map FD without actually creating BPF map just yet. 1776 * These map FD "placeholders" will be reused later without changing 1777 * FD value when map is actually created in the kernel. 1778 * 1779 * This is useful to be able to perform BPF program relocations 1780 * without having to create BPF maps before that step. This allows us 1781 * to finalize and load BTF very late in BPF object's loading phase, 1782 * right before BPF maps have to be created and BPF programs have to 1783 * be loaded. By having these map FD placeholders we can perform all 1784 * the sanitizations, relocations, and any other adjustments before we 1785 * start creating actual BPF kernel objects (BTF, maps, progs). 1786 */ 1787 map->fd = create_placeholder_fd(); 1788 if (map->fd < 0) 1789 return ERR_PTR(map->fd); 1790 map->inner_map_fd = -1; 1791 map->autocreate = true; 1792 1793 return map; 1794 } 1795 1796 static size_t array_map_mmap_sz(unsigned int value_sz, unsigned int max_entries) 1797 { 1798 const long page_sz = sysconf(_SC_PAGE_SIZE); 1799 size_t map_sz; 1800 1801 map_sz = (size_t)roundup(value_sz, 8) * max_entries; 1802 map_sz = roundup(map_sz, page_sz); 1803 return map_sz; 1804 } 1805 1806 static size_t bpf_map_mmap_sz(const struct bpf_map *map) 1807 { 1808 const long page_sz = sysconf(_SC_PAGE_SIZE); 1809 1810 switch (map->def.type) { 1811 case BPF_MAP_TYPE_ARRAY: 1812 return array_map_mmap_sz(map->def.value_size, map->def.max_entries); 1813 case BPF_MAP_TYPE_ARENA: 1814 return page_sz * map->def.max_entries; 1815 default: 1816 return 0; /* not supported */ 1817 } 1818 } 1819 1820 static int bpf_map_mmap_resize(struct bpf_map *map, size_t old_sz, size_t new_sz) 1821 { 1822 void *mmaped; 1823 1824 if (!map->mmaped) 1825 return -EINVAL; 1826 1827 if (old_sz == new_sz) 1828 return 0; 1829 1830 mmaped = mmap(NULL, new_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); 1831 if (mmaped == MAP_FAILED) 1832 return -errno; 1833 1834 memcpy(mmaped, map->mmaped, min(old_sz, new_sz)); 1835 munmap(map->mmaped, old_sz); 1836 map->mmaped = mmaped; 1837 return 0; 1838 } 1839 1840 static char *internal_map_name(struct bpf_object *obj, const char *real_name) 1841 { 1842 char map_name[BPF_OBJ_NAME_LEN], *p; 1843 int pfx_len, sfx_len = max((size_t)7, strlen(real_name)); 1844 1845 /* This is one of the more confusing parts of libbpf for various 1846 * reasons, some of which are historical. The original idea for naming 1847 * internal names was to include as much of BPF object name prefix as 1848 * possible, so that it can be distinguished from similar internal 1849 * maps of a different BPF object. 1850 * As an example, let's say we have bpf_object named 'my_object_name' 1851 * and internal map corresponding to '.rodata' ELF section. The final 1852 * map name advertised to user and to the kernel will be 1853 * 'my_objec.rodata', taking first 8 characters of object name and 1854 * entire 7 characters of '.rodata'. 1855 * Somewhat confusingly, if internal map ELF section name is shorter 1856 * than 7 characters, e.g., '.bss', we still reserve 7 characters 1857 * for the suffix, even though we only have 4 actual characters, and 1858 * resulting map will be called 'my_objec.bss', not even using all 15 1859 * characters allowed by the kernel. Oh well, at least the truncated 1860 * object name is somewhat consistent in this case. But if the map 1861 * name is '.kconfig', we'll still have entirety of '.kconfig' added 1862 * (8 chars) and thus will be left with only first 7 characters of the 1863 * object name ('my_obje'). Happy guessing, user, that the final map 1864 * name will be "my_obje.kconfig". 1865 * Now, with libbpf starting to support arbitrarily named .rodata.* 1866 * and .data.* data sections, it's possible that ELF section name is 1867 * longer than allowed 15 chars, so we now need to be careful to take 1868 * only up to 15 first characters of ELF name, taking no BPF object 1869 * name characters at all. So '.rodata.abracadabra' will result in 1870 * '.rodata.abracad' kernel and user-visible name. 1871 * We need to keep this convoluted logic intact for .data, .bss and 1872 * .rodata maps, but for new custom .data.custom and .rodata.custom 1873 * maps we use their ELF names as is, not prepending bpf_object name 1874 * in front. We still need to truncate them to 15 characters for the 1875 * kernel. Full name can be recovered for such maps by using DATASEC 1876 * BTF type associated with such map's value type, though. 1877 */ 1878 if (sfx_len >= BPF_OBJ_NAME_LEN) 1879 sfx_len = BPF_OBJ_NAME_LEN - 1; 1880 1881 /* if there are two or more dots in map name, it's a custom dot map */ 1882 if (strchr(real_name + 1, '.') != NULL) 1883 pfx_len = 0; 1884 else 1885 pfx_len = min((size_t)BPF_OBJ_NAME_LEN - sfx_len - 1, strlen(obj->name)); 1886 1887 snprintf(map_name, sizeof(map_name), "%.*s%.*s", pfx_len, obj->name, 1888 sfx_len, real_name); 1889 1890 /* sanities map name to characters allowed by kernel */ 1891 for (p = map_name; *p && p < map_name + sizeof(map_name); p++) 1892 if (!isalnum(*p) && *p != '_' && *p != '.') 1893 *p = '_'; 1894 1895 return strdup(map_name); 1896 } 1897 1898 static int 1899 map_fill_btf_type_info(struct bpf_object *obj, struct bpf_map *map); 1900 1901 /* Internal BPF map is mmap()'able only if at least one of corresponding 1902 * DATASEC's VARs are to be exposed through BPF skeleton. I.e., it's a GLOBAL 1903 * variable and it's not marked as __hidden (which turns it into, effectively, 1904 * a STATIC variable). 1905 */ 1906 static bool map_is_mmapable(struct bpf_object *obj, struct bpf_map *map) 1907 { 1908 const struct btf_type *t, *vt; 1909 struct btf_var_secinfo *vsi; 1910 int i, n; 1911 1912 if (!map->btf_value_type_id) 1913 return false; 1914 1915 t = btf__type_by_id(obj->btf, map->btf_value_type_id); 1916 if (!btf_is_datasec(t)) 1917 return false; 1918 1919 vsi = btf_var_secinfos(t); 1920 for (i = 0, n = btf_vlen(t); i < n; i++, vsi++) { 1921 vt = btf__type_by_id(obj->btf, vsi->type); 1922 if (!btf_is_var(vt)) 1923 continue; 1924 1925 if (btf_var(vt)->linkage != BTF_VAR_STATIC) 1926 return true; 1927 } 1928 1929 return false; 1930 } 1931 1932 static int 1933 bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, 1934 const char *real_name, int sec_idx, void *data, size_t data_sz) 1935 { 1936 struct bpf_map_def *def; 1937 struct bpf_map *map; 1938 size_t mmap_sz; 1939 int err; 1940 1941 map = bpf_object__add_map(obj); 1942 if (IS_ERR(map)) 1943 return PTR_ERR(map); 1944 1945 map->libbpf_type = type; 1946 map->sec_idx = sec_idx; 1947 map->sec_offset = 0; 1948 map->real_name = strdup(real_name); 1949 map->name = internal_map_name(obj, real_name); 1950 if (!map->real_name || !map->name) { 1951 zfree(&map->real_name); 1952 zfree(&map->name); 1953 return -ENOMEM; 1954 } 1955 1956 def = &map->def; 1957 def->type = BPF_MAP_TYPE_ARRAY; 1958 def->key_size = sizeof(int); 1959 def->value_size = data_sz; 1960 def->max_entries = 1; 1961 def->map_flags = type == LIBBPF_MAP_RODATA || type == LIBBPF_MAP_KCONFIG 1962 ? BPF_F_RDONLY_PROG : 0; 1963 1964 /* failures are fine because of maps like .rodata.str1.1 */ 1965 (void) map_fill_btf_type_info(obj, map); 1966 1967 if (map_is_mmapable(obj, map)) 1968 def->map_flags |= BPF_F_MMAPABLE; 1969 1970 pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n", 1971 map->name, map->sec_idx, map->sec_offset, def->map_flags); 1972 1973 mmap_sz = bpf_map_mmap_sz(map); 1974 map->mmaped = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE, 1975 MAP_SHARED | MAP_ANONYMOUS, -1, 0); 1976 if (map->mmaped == MAP_FAILED) { 1977 err = -errno; 1978 map->mmaped = NULL; 1979 pr_warn("failed to alloc map '%s' content buffer: %s\n", map->name, errstr(err)); 1980 zfree(&map->real_name); 1981 zfree(&map->name); 1982 return err; 1983 } 1984 1985 if (data) 1986 memcpy(map->mmaped, data, data_sz); 1987 1988 pr_debug("map %td is \"%s\"\n", map - obj->maps, map->name); 1989 return 0; 1990 } 1991 1992 static int bpf_object__init_global_data_maps(struct bpf_object *obj) 1993 { 1994 struct elf_sec_desc *sec_desc; 1995 const char *sec_name; 1996 int err = 0, sec_idx; 1997 1998 /* 1999 * Populate obj->maps with libbpf internal maps. 2000 */ 2001 for (sec_idx = 1; sec_idx < obj->efile.sec_cnt; sec_idx++) { 2002 sec_desc = &obj->efile.secs[sec_idx]; 2003 2004 /* Skip recognized sections with size 0. */ 2005 if (!sec_desc->data || sec_desc->data->d_size == 0) 2006 continue; 2007 2008 switch (sec_desc->sec_type) { 2009 case SEC_DATA: 2010 sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx)); 2011 err = bpf_object__init_internal_map(obj, LIBBPF_MAP_DATA, 2012 sec_name, sec_idx, 2013 sec_desc->data->d_buf, 2014 sec_desc->data->d_size); 2015 break; 2016 case SEC_RODATA: 2017 obj->has_rodata = true; 2018 sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx)); 2019 err = bpf_object__init_internal_map(obj, LIBBPF_MAP_RODATA, 2020 sec_name, sec_idx, 2021 sec_desc->data->d_buf, 2022 sec_desc->data->d_size); 2023 break; 2024 case SEC_BSS: 2025 sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx)); 2026 err = bpf_object__init_internal_map(obj, LIBBPF_MAP_BSS, 2027 sec_name, sec_idx, 2028 NULL, 2029 sec_desc->data->d_size); 2030 break; 2031 default: 2032 /* skip */ 2033 break; 2034 } 2035 if (err) 2036 return err; 2037 } 2038 return 0; 2039 } 2040 2041 2042 static struct extern_desc *find_extern_by_name(const struct bpf_object *obj, 2043 const void *name) 2044 { 2045 int i; 2046 2047 for (i = 0; i < obj->nr_extern; i++) { 2048 if (strcmp(obj->externs[i].name, name) == 0) 2049 return &obj->externs[i]; 2050 } 2051 return NULL; 2052 } 2053 2054 static struct extern_desc *find_extern_by_name_with_len(const struct bpf_object *obj, 2055 const void *name, int len) 2056 { 2057 const char *ext_name; 2058 int i; 2059 2060 for (i = 0; i < obj->nr_extern; i++) { 2061 ext_name = obj->externs[i].name; 2062 if (strlen(ext_name) == len && strncmp(ext_name, name, len) == 0) 2063 return &obj->externs[i]; 2064 } 2065 return NULL; 2066 } 2067 2068 static int set_kcfg_value_tri(struct extern_desc *ext, void *ext_val, 2069 char value) 2070 { 2071 switch (ext->kcfg.type) { 2072 case KCFG_BOOL: 2073 if (value == 'm') { 2074 pr_warn("extern (kcfg) '%s': value '%c' implies tristate or char type\n", 2075 ext->name, value); 2076 return -EINVAL; 2077 } 2078 *(bool *)ext_val = value == 'y' ? true : false; 2079 break; 2080 case KCFG_TRISTATE: 2081 if (value == 'y') 2082 *(enum libbpf_tristate *)ext_val = TRI_YES; 2083 else if (value == 'm') 2084 *(enum libbpf_tristate *)ext_val = TRI_MODULE; 2085 else /* value == 'n' */ 2086 *(enum libbpf_tristate *)ext_val = TRI_NO; 2087 break; 2088 case KCFG_CHAR: 2089 *(char *)ext_val = value; 2090 break; 2091 case KCFG_UNKNOWN: 2092 case KCFG_INT: 2093 case KCFG_CHAR_ARR: 2094 default: 2095 pr_warn("extern (kcfg) '%s': value '%c' implies bool, tristate, or char type\n", 2096 ext->name, value); 2097 return -EINVAL; 2098 } 2099 ext->is_set = true; 2100 return 0; 2101 } 2102 2103 static int set_kcfg_value_str(struct extern_desc *ext, char *ext_val, 2104 const char *value) 2105 { 2106 size_t len; 2107 2108 if (ext->kcfg.type != KCFG_CHAR_ARR) { 2109 pr_warn("extern (kcfg) '%s': value '%s' implies char array type\n", 2110 ext->name, value); 2111 return -EINVAL; 2112 } 2113 2114 len = strlen(value); 2115 if (len < 2 || value[len - 1] != '"') { 2116 pr_warn("extern (kcfg) '%s': invalid string config '%s'\n", 2117 ext->name, value); 2118 return -EINVAL; 2119 } 2120 2121 /* strip quotes */ 2122 len -= 2; 2123 if (len >= ext->kcfg.sz) { 2124 pr_warn("extern (kcfg) '%s': long string '%s' of (%zu bytes) truncated to %d bytes\n", 2125 ext->name, value, len, ext->kcfg.sz - 1); 2126 len = ext->kcfg.sz - 1; 2127 } 2128 memcpy(ext_val, value + 1, len); 2129 ext_val[len] = '\0'; 2130 ext->is_set = true; 2131 return 0; 2132 } 2133 2134 static int parse_u64(const char *value, __u64 *res) 2135 { 2136 char *value_end; 2137 int err; 2138 2139 errno = 0; 2140 *res = strtoull(value, &value_end, 0); 2141 if (errno) { 2142 err = -errno; 2143 pr_warn("failed to parse '%s': %s\n", value, errstr(err)); 2144 return err; 2145 } 2146 if (*value_end) { 2147 pr_warn("failed to parse '%s' as integer completely\n", value); 2148 return -EINVAL; 2149 } 2150 return 0; 2151 } 2152 2153 static bool is_kcfg_value_in_range(const struct extern_desc *ext, __u64 v) 2154 { 2155 int bit_sz = ext->kcfg.sz * 8; 2156 2157 if (ext->kcfg.sz == 8) 2158 return true; 2159 2160 /* Validate that value stored in u64 fits in integer of `ext->sz` 2161 * bytes size without any loss of information. If the target integer 2162 * is signed, we rely on the following limits of integer type of 2163 * Y bits and subsequent transformation: 2164 * 2165 * -2^(Y-1) <= X <= 2^(Y-1) - 1 2166 * 0 <= X + 2^(Y-1) <= 2^Y - 1 2167 * 0 <= X + 2^(Y-1) < 2^Y 2168 * 2169 * For unsigned target integer, check that all the (64 - Y) bits are 2170 * zero. 2171 */ 2172 if (ext->kcfg.is_signed) 2173 return v + (1ULL << (bit_sz - 1)) < (1ULL << bit_sz); 2174 else 2175 return (v >> bit_sz) == 0; 2176 } 2177 2178 static int set_kcfg_value_num(struct extern_desc *ext, void *ext_val, 2179 __u64 value) 2180 { 2181 if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR && 2182 ext->kcfg.type != KCFG_BOOL) { 2183 pr_warn("extern (kcfg) '%s': value '%llu' implies integer, char, or boolean type\n", 2184 ext->name, (unsigned long long)value); 2185 return -EINVAL; 2186 } 2187 if (ext->kcfg.type == KCFG_BOOL && value > 1) { 2188 pr_warn("extern (kcfg) '%s': value '%llu' isn't boolean compatible\n", 2189 ext->name, (unsigned long long)value); 2190 return -EINVAL; 2191 2192 } 2193 if (!is_kcfg_value_in_range(ext, value)) { 2194 pr_warn("extern (kcfg) '%s': value '%llu' doesn't fit in %d bytes\n", 2195 ext->name, (unsigned long long)value, ext->kcfg.sz); 2196 return -ERANGE; 2197 } 2198 switch (ext->kcfg.sz) { 2199 case 1: 2200 *(__u8 *)ext_val = value; 2201 break; 2202 case 2: 2203 *(__u16 *)ext_val = value; 2204 break; 2205 case 4: 2206 *(__u32 *)ext_val = value; 2207 break; 2208 case 8: 2209 *(__u64 *)ext_val = value; 2210 break; 2211 default: 2212 return -EINVAL; 2213 } 2214 ext->is_set = true; 2215 return 0; 2216 } 2217 2218 static int bpf_object__process_kconfig_line(struct bpf_object *obj, 2219 char *buf, void *data) 2220 { 2221 struct extern_desc *ext; 2222 char *sep, *value; 2223 int len, err = 0; 2224 void *ext_val; 2225 __u64 num; 2226 2227 if (!str_has_pfx(buf, "CONFIG_")) 2228 return 0; 2229 2230 sep = strchr(buf, '='); 2231 if (!sep) { 2232 pr_warn("failed to parse '%s': no separator\n", buf); 2233 return -EINVAL; 2234 } 2235 2236 /* Trim ending '\n' */ 2237 len = strlen(buf); 2238 if (buf[len - 1] == '\n') 2239 buf[len - 1] = '\0'; 2240 /* Split on '=' and ensure that a value is present. */ 2241 *sep = '\0'; 2242 if (!sep[1]) { 2243 *sep = '='; 2244 pr_warn("failed to parse '%s': no value\n", buf); 2245 return -EINVAL; 2246 } 2247 2248 ext = find_extern_by_name(obj, buf); 2249 if (!ext || ext->is_set) 2250 return 0; 2251 2252 ext_val = data + ext->kcfg.data_off; 2253 value = sep + 1; 2254 2255 switch (*value) { 2256 case 'y': case 'n': case 'm': 2257 err = set_kcfg_value_tri(ext, ext_val, *value); 2258 break; 2259 case '"': 2260 err = set_kcfg_value_str(ext, ext_val, value); 2261 break; 2262 default: 2263 /* assume integer */ 2264 err = parse_u64(value, &num); 2265 if (err) { 2266 pr_warn("extern (kcfg) '%s': value '%s' isn't a valid integer\n", ext->name, value); 2267 return err; 2268 } 2269 if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR) { 2270 pr_warn("extern (kcfg) '%s': value '%s' implies integer type\n", ext->name, value); 2271 return -EINVAL; 2272 } 2273 err = set_kcfg_value_num(ext, ext_val, num); 2274 break; 2275 } 2276 if (err) 2277 return err; 2278 pr_debug("extern (kcfg) '%s': set to %s\n", ext->name, value); 2279 return 0; 2280 } 2281 2282 static int bpf_object__read_kconfig_file(struct bpf_object *obj, void *data) 2283 { 2284 char buf[PATH_MAX]; 2285 struct utsname uts; 2286 int len, err = 0; 2287 gzFile file; 2288 2289 uname(&uts); 2290 len = snprintf(buf, PATH_MAX, "/boot/config-%s", uts.release); 2291 if (len < 0) 2292 return -EINVAL; 2293 else if (len >= PATH_MAX) 2294 return -ENAMETOOLONG; 2295 2296 /* gzopen also accepts uncompressed files. */ 2297 file = gzopen(buf, "re"); 2298 if (!file) 2299 file = gzopen("/proc/config.gz", "re"); 2300 2301 if (!file) { 2302 pr_warn("failed to open system Kconfig\n"); 2303 return -ENOENT; 2304 } 2305 2306 while (gzgets(file, buf, sizeof(buf))) { 2307 err = bpf_object__process_kconfig_line(obj, buf, data); 2308 if (err) { 2309 pr_warn("error parsing system Kconfig line '%s': %s\n", 2310 buf, errstr(err)); 2311 goto out; 2312 } 2313 } 2314 2315 out: 2316 gzclose(file); 2317 return err; 2318 } 2319 2320 static int bpf_object__read_kconfig_mem(struct bpf_object *obj, 2321 const char *config, void *data) 2322 { 2323 char buf[PATH_MAX]; 2324 int err = 0; 2325 FILE *file; 2326 2327 file = fmemopen((void *)config, strlen(config), "r"); 2328 if (!file) { 2329 err = -errno; 2330 pr_warn("failed to open in-memory Kconfig: %s\n", errstr(err)); 2331 return err; 2332 } 2333 2334 while (fgets(buf, sizeof(buf), file)) { 2335 err = bpf_object__process_kconfig_line(obj, buf, data); 2336 if (err) { 2337 pr_warn("error parsing in-memory Kconfig line '%s': %s\n", 2338 buf, errstr(err)); 2339 break; 2340 } 2341 } 2342 2343 fclose(file); 2344 return err; 2345 } 2346 2347 static int bpf_object__init_kconfig_map(struct bpf_object *obj) 2348 { 2349 struct extern_desc *last_ext = NULL, *ext; 2350 size_t map_sz; 2351 int i, err; 2352 2353 for (i = 0; i < obj->nr_extern; i++) { 2354 ext = &obj->externs[i]; 2355 if (ext->type == EXT_KCFG) 2356 last_ext = ext; 2357 } 2358 2359 if (!last_ext) 2360 return 0; 2361 2362 map_sz = last_ext->kcfg.data_off + last_ext->kcfg.sz; 2363 err = bpf_object__init_internal_map(obj, LIBBPF_MAP_KCONFIG, 2364 ".kconfig", obj->efile.symbols_shndx, 2365 NULL, map_sz); 2366 if (err) 2367 return err; 2368 2369 obj->kconfig_map_idx = obj->nr_maps - 1; 2370 2371 return 0; 2372 } 2373 2374 const struct btf_type * 2375 skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id) 2376 { 2377 const struct btf_type *t = btf__type_by_id(btf, id); 2378 2379 if (res_id) 2380 *res_id = id; 2381 2382 while (btf_is_mod(t) || btf_is_typedef(t)) { 2383 if (res_id) 2384 *res_id = t->type; 2385 t = btf__type_by_id(btf, t->type); 2386 } 2387 2388 return t; 2389 } 2390 2391 static const struct btf_type * 2392 resolve_func_ptr(const struct btf *btf, __u32 id, __u32 *res_id) 2393 { 2394 const struct btf_type *t; 2395 2396 t = skip_mods_and_typedefs(btf, id, NULL); 2397 if (!btf_is_ptr(t)) 2398 return NULL; 2399 2400 t = skip_mods_and_typedefs(btf, t->type, res_id); 2401 2402 return btf_is_func_proto(t) ? t : NULL; 2403 } 2404 2405 static const char *__btf_kind_str(__u16 kind) 2406 { 2407 switch (kind) { 2408 case BTF_KIND_UNKN: return "void"; 2409 case BTF_KIND_INT: return "int"; 2410 case BTF_KIND_PTR: return "ptr"; 2411 case BTF_KIND_ARRAY: return "array"; 2412 case BTF_KIND_STRUCT: return "struct"; 2413 case BTF_KIND_UNION: return "union"; 2414 case BTF_KIND_ENUM: return "enum"; 2415 case BTF_KIND_FWD: return "fwd"; 2416 case BTF_KIND_TYPEDEF: return "typedef"; 2417 case BTF_KIND_VOLATILE: return "volatile"; 2418 case BTF_KIND_CONST: return "const"; 2419 case BTF_KIND_RESTRICT: return "restrict"; 2420 case BTF_KIND_FUNC: return "func"; 2421 case BTF_KIND_FUNC_PROTO: return "func_proto"; 2422 case BTF_KIND_VAR: return "var"; 2423 case BTF_KIND_DATASEC: return "datasec"; 2424 case BTF_KIND_FLOAT: return "float"; 2425 case BTF_KIND_DECL_TAG: return "decl_tag"; 2426 case BTF_KIND_TYPE_TAG: return "type_tag"; 2427 case BTF_KIND_ENUM64: return "enum64"; 2428 default: return "unknown"; 2429 } 2430 } 2431 2432 const char *btf_kind_str(const struct btf_type *t) 2433 { 2434 return __btf_kind_str(btf_kind(t)); 2435 } 2436 2437 /* 2438 * Fetch integer attribute of BTF map definition. Such attributes are 2439 * represented using a pointer to an array, in which dimensionality of array 2440 * encodes specified integer value. E.g., int (*type)[BPF_MAP_TYPE_ARRAY]; 2441 * encodes `type => BPF_MAP_TYPE_ARRAY` key/value pair completely using BTF 2442 * type definition, while using only sizeof(void *) space in ELF data section. 2443 */ 2444 static bool get_map_field_int(const char *map_name, const struct btf *btf, 2445 const struct btf_member *m, __u32 *res) 2446 { 2447 const struct btf_type *t = skip_mods_and_typedefs(btf, m->type, NULL); 2448 const char *name = btf__name_by_offset(btf, m->name_off); 2449 const struct btf_array *arr_info; 2450 const struct btf_type *arr_t; 2451 2452 if (!btf_is_ptr(t)) { 2453 pr_warn("map '%s': attr '%s': expected PTR, got %s.\n", 2454 map_name, name, btf_kind_str(t)); 2455 return false; 2456 } 2457 2458 arr_t = btf__type_by_id(btf, t->type); 2459 if (!arr_t) { 2460 pr_warn("map '%s': attr '%s': type [%u] not found.\n", 2461 map_name, name, t->type); 2462 return false; 2463 } 2464 if (!btf_is_array(arr_t)) { 2465 pr_warn("map '%s': attr '%s': expected ARRAY, got %s.\n", 2466 map_name, name, btf_kind_str(arr_t)); 2467 return false; 2468 } 2469 arr_info = btf_array(arr_t); 2470 *res = arr_info->nelems; 2471 return true; 2472 } 2473 2474 static bool get_map_field_long(const char *map_name, const struct btf *btf, 2475 const struct btf_member *m, __u64 *res) 2476 { 2477 const struct btf_type *t = skip_mods_and_typedefs(btf, m->type, NULL); 2478 const char *name = btf__name_by_offset(btf, m->name_off); 2479 2480 if (btf_is_ptr(t)) { 2481 __u32 res32; 2482 bool ret; 2483 2484 ret = get_map_field_int(map_name, btf, m, &res32); 2485 if (ret) 2486 *res = (__u64)res32; 2487 return ret; 2488 } 2489 2490 if (!btf_is_enum(t) && !btf_is_enum64(t)) { 2491 pr_warn("map '%s': attr '%s': expected ENUM or ENUM64, got %s.\n", 2492 map_name, name, btf_kind_str(t)); 2493 return false; 2494 } 2495 2496 if (btf_vlen(t) != 1) { 2497 pr_warn("map '%s': attr '%s': invalid __ulong\n", 2498 map_name, name); 2499 return false; 2500 } 2501 2502 if (btf_is_enum(t)) { 2503 const struct btf_enum *e = btf_enum(t); 2504 2505 *res = e->val; 2506 } else { 2507 const struct btf_enum64 *e = btf_enum64(t); 2508 2509 *res = btf_enum64_value(e); 2510 } 2511 return true; 2512 } 2513 2514 static int pathname_concat(char *buf, size_t buf_sz, const char *path, const char *name) 2515 { 2516 int len; 2517 2518 len = snprintf(buf, buf_sz, "%s/%s", path, name); 2519 if (len < 0) 2520 return -EINVAL; 2521 if (len >= buf_sz) 2522 return -ENAMETOOLONG; 2523 2524 return 0; 2525 } 2526 2527 static int build_map_pin_path(struct bpf_map *map, const char *path) 2528 { 2529 char buf[PATH_MAX]; 2530 int err; 2531 2532 if (!path) 2533 path = BPF_FS_DEFAULT_PATH; 2534 2535 err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map)); 2536 if (err) 2537 return err; 2538 2539 return bpf_map__set_pin_path(map, buf); 2540 } 2541 2542 /* should match definition in bpf_helpers.h */ 2543 enum libbpf_pin_type { 2544 LIBBPF_PIN_NONE, 2545 /* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */ 2546 LIBBPF_PIN_BY_NAME, 2547 }; 2548 2549 int parse_btf_map_def(const char *map_name, struct btf *btf, 2550 const struct btf_type *def_t, bool strict, 2551 struct btf_map_def *map_def, struct btf_map_def *inner_def) 2552 { 2553 const struct btf_type *t; 2554 const struct btf_member *m; 2555 bool is_inner = inner_def == NULL; 2556 int vlen, i; 2557 2558 vlen = btf_vlen(def_t); 2559 m = btf_members(def_t); 2560 for (i = 0; i < vlen; i++, m++) { 2561 const char *name = btf__name_by_offset(btf, m->name_off); 2562 2563 if (!name) { 2564 pr_warn("map '%s': invalid field #%d.\n", map_name, i); 2565 return -EINVAL; 2566 } 2567 if (strcmp(name, "type") == 0) { 2568 if (!get_map_field_int(map_name, btf, m, &map_def->map_type)) 2569 return -EINVAL; 2570 map_def->parts |= MAP_DEF_MAP_TYPE; 2571 } else if (strcmp(name, "max_entries") == 0) { 2572 if (!get_map_field_int(map_name, btf, m, &map_def->max_entries)) 2573 return -EINVAL; 2574 map_def->parts |= MAP_DEF_MAX_ENTRIES; 2575 } else if (strcmp(name, "map_flags") == 0) { 2576 if (!get_map_field_int(map_name, btf, m, &map_def->map_flags)) 2577 return -EINVAL; 2578 map_def->parts |= MAP_DEF_MAP_FLAGS; 2579 } else if (strcmp(name, "numa_node") == 0) { 2580 if (!get_map_field_int(map_name, btf, m, &map_def->numa_node)) 2581 return -EINVAL; 2582 map_def->parts |= MAP_DEF_NUMA_NODE; 2583 } else if (strcmp(name, "key_size") == 0) { 2584 __u32 sz; 2585 2586 if (!get_map_field_int(map_name, btf, m, &sz)) 2587 return -EINVAL; 2588 if (map_def->key_size && map_def->key_size != sz) { 2589 pr_warn("map '%s': conflicting key size %u != %u.\n", 2590 map_name, map_def->key_size, sz); 2591 return -EINVAL; 2592 } 2593 map_def->key_size = sz; 2594 map_def->parts |= MAP_DEF_KEY_SIZE; 2595 } else if (strcmp(name, "key") == 0) { 2596 __s64 sz; 2597 2598 t = btf__type_by_id(btf, m->type); 2599 if (!t) { 2600 pr_warn("map '%s': key type [%d] not found.\n", 2601 map_name, m->type); 2602 return -EINVAL; 2603 } 2604 if (!btf_is_ptr(t)) { 2605 pr_warn("map '%s': key spec is not PTR: %s.\n", 2606 map_name, btf_kind_str(t)); 2607 return -EINVAL; 2608 } 2609 sz = btf__resolve_size(btf, t->type); 2610 if (sz < 0) { 2611 pr_warn("map '%s': can't determine key size for type [%u]: %zd.\n", 2612 map_name, t->type, (ssize_t)sz); 2613 return sz; 2614 } 2615 if (map_def->key_size && map_def->key_size != sz) { 2616 pr_warn("map '%s': conflicting key size %u != %zd.\n", 2617 map_name, map_def->key_size, (ssize_t)sz); 2618 return -EINVAL; 2619 } 2620 map_def->key_size = sz; 2621 map_def->key_type_id = t->type; 2622 map_def->parts |= MAP_DEF_KEY_SIZE | MAP_DEF_KEY_TYPE; 2623 } else if (strcmp(name, "value_size") == 0) { 2624 __u32 sz; 2625 2626 if (!get_map_field_int(map_name, btf, m, &sz)) 2627 return -EINVAL; 2628 if (map_def->value_size && map_def->value_size != sz) { 2629 pr_warn("map '%s': conflicting value size %u != %u.\n", 2630 map_name, map_def->value_size, sz); 2631 return -EINVAL; 2632 } 2633 map_def->value_size = sz; 2634 map_def->parts |= MAP_DEF_VALUE_SIZE; 2635 } else if (strcmp(name, "value") == 0) { 2636 __s64 sz; 2637 2638 t = btf__type_by_id(btf, m->type); 2639 if (!t) { 2640 pr_warn("map '%s': value type [%d] not found.\n", 2641 map_name, m->type); 2642 return -EINVAL; 2643 } 2644 if (!btf_is_ptr(t)) { 2645 pr_warn("map '%s': value spec is not PTR: %s.\n", 2646 map_name, btf_kind_str(t)); 2647 return -EINVAL; 2648 } 2649 sz = btf__resolve_size(btf, t->type); 2650 if (sz < 0) { 2651 pr_warn("map '%s': can't determine value size for type [%u]: %zd.\n", 2652 map_name, t->type, (ssize_t)sz); 2653 return sz; 2654 } 2655 if (map_def->value_size && map_def->value_size != sz) { 2656 pr_warn("map '%s': conflicting value size %u != %zd.\n", 2657 map_name, map_def->value_size, (ssize_t)sz); 2658 return -EINVAL; 2659 } 2660 map_def->value_size = sz; 2661 map_def->value_type_id = t->type; 2662 map_def->parts |= MAP_DEF_VALUE_SIZE | MAP_DEF_VALUE_TYPE; 2663 } 2664 else if (strcmp(name, "values") == 0) { 2665 bool is_map_in_map = bpf_map_type__is_map_in_map(map_def->map_type); 2666 bool is_prog_array = map_def->map_type == BPF_MAP_TYPE_PROG_ARRAY; 2667 const char *desc = is_map_in_map ? "map-in-map inner" : "prog-array value"; 2668 char inner_map_name[128]; 2669 int err; 2670 2671 if (is_inner) { 2672 pr_warn("map '%s': multi-level inner maps not supported.\n", 2673 map_name); 2674 return -ENOTSUP; 2675 } 2676 if (i != vlen - 1) { 2677 pr_warn("map '%s': '%s' member should be last.\n", 2678 map_name, name); 2679 return -EINVAL; 2680 } 2681 if (!is_map_in_map && !is_prog_array) { 2682 pr_warn("map '%s': should be map-in-map or prog-array.\n", 2683 map_name); 2684 return -ENOTSUP; 2685 } 2686 if (map_def->value_size && map_def->value_size != 4) { 2687 pr_warn("map '%s': conflicting value size %u != 4.\n", 2688 map_name, map_def->value_size); 2689 return -EINVAL; 2690 } 2691 map_def->value_size = 4; 2692 t = btf__type_by_id(btf, m->type); 2693 if (!t) { 2694 pr_warn("map '%s': %s type [%d] not found.\n", 2695 map_name, desc, m->type); 2696 return -EINVAL; 2697 } 2698 if (!btf_is_array(t) || btf_array(t)->nelems) { 2699 pr_warn("map '%s': %s spec is not a zero-sized array.\n", 2700 map_name, desc); 2701 return -EINVAL; 2702 } 2703 t = skip_mods_and_typedefs(btf, btf_array(t)->type, NULL); 2704 if (!btf_is_ptr(t)) { 2705 pr_warn("map '%s': %s def is of unexpected kind %s.\n", 2706 map_name, desc, btf_kind_str(t)); 2707 return -EINVAL; 2708 } 2709 t = skip_mods_and_typedefs(btf, t->type, NULL); 2710 if (is_prog_array) { 2711 if (!btf_is_func_proto(t)) { 2712 pr_warn("map '%s': prog-array value def is of unexpected kind %s.\n", 2713 map_name, btf_kind_str(t)); 2714 return -EINVAL; 2715 } 2716 continue; 2717 } 2718 if (!btf_is_struct(t)) { 2719 pr_warn("map '%s': map-in-map inner def is of unexpected kind %s.\n", 2720 map_name, btf_kind_str(t)); 2721 return -EINVAL; 2722 } 2723 2724 snprintf(inner_map_name, sizeof(inner_map_name), "%s.inner", map_name); 2725 err = parse_btf_map_def(inner_map_name, btf, t, strict, inner_def, NULL); 2726 if (err) 2727 return err; 2728 2729 map_def->parts |= MAP_DEF_INNER_MAP; 2730 } else if (strcmp(name, "pinning") == 0) { 2731 __u32 val; 2732 2733 if (is_inner) { 2734 pr_warn("map '%s': inner def can't be pinned.\n", map_name); 2735 return -EINVAL; 2736 } 2737 if (!get_map_field_int(map_name, btf, m, &val)) 2738 return -EINVAL; 2739 if (val != LIBBPF_PIN_NONE && val != LIBBPF_PIN_BY_NAME) { 2740 pr_warn("map '%s': invalid pinning value %u.\n", 2741 map_name, val); 2742 return -EINVAL; 2743 } 2744 map_def->pinning = val; 2745 map_def->parts |= MAP_DEF_PINNING; 2746 } else if (strcmp(name, "map_extra") == 0) { 2747 __u64 map_extra; 2748 2749 if (!get_map_field_long(map_name, btf, m, &map_extra)) 2750 return -EINVAL; 2751 map_def->map_extra = map_extra; 2752 map_def->parts |= MAP_DEF_MAP_EXTRA; 2753 } else { 2754 if (strict) { 2755 pr_warn("map '%s': unknown field '%s'.\n", map_name, name); 2756 return -ENOTSUP; 2757 } 2758 pr_debug("map '%s': ignoring unknown field '%s'.\n", map_name, name); 2759 } 2760 } 2761 2762 if (map_def->map_type == BPF_MAP_TYPE_UNSPEC) { 2763 pr_warn("map '%s': map type isn't specified.\n", map_name); 2764 return -EINVAL; 2765 } 2766 2767 return 0; 2768 } 2769 2770 static size_t adjust_ringbuf_sz(size_t sz) 2771 { 2772 __u32 page_sz = sysconf(_SC_PAGE_SIZE); 2773 __u32 mul; 2774 2775 /* if user forgot to set any size, make sure they see error */ 2776 if (sz == 0) 2777 return 0; 2778 /* Kernel expects BPF_MAP_TYPE_RINGBUF's max_entries to be 2779 * a power-of-2 multiple of kernel's page size. If user diligently 2780 * satisified these conditions, pass the size through. 2781 */ 2782 if ((sz % page_sz) == 0 && is_pow_of_2(sz / page_sz)) 2783 return sz; 2784 2785 /* Otherwise find closest (page_sz * power_of_2) product bigger than 2786 * user-set size to satisfy both user size request and kernel 2787 * requirements and substitute correct max_entries for map creation. 2788 */ 2789 for (mul = 1; mul <= UINT_MAX / page_sz; mul <<= 1) { 2790 if (mul * page_sz > sz) 2791 return mul * page_sz; 2792 } 2793 2794 /* if it's impossible to satisfy the conditions (i.e., user size is 2795 * very close to UINT_MAX but is not a power-of-2 multiple of 2796 * page_size) then just return original size and let kernel reject it 2797 */ 2798 return sz; 2799 } 2800 2801 static bool map_is_ringbuf(const struct bpf_map *map) 2802 { 2803 return map->def.type == BPF_MAP_TYPE_RINGBUF || 2804 map->def.type == BPF_MAP_TYPE_USER_RINGBUF; 2805 } 2806 2807 static void fill_map_from_def(struct bpf_map *map, const struct btf_map_def *def) 2808 { 2809 map->def.type = def->map_type; 2810 map->def.key_size = def->key_size; 2811 map->def.value_size = def->value_size; 2812 map->def.max_entries = def->max_entries; 2813 map->def.map_flags = def->map_flags; 2814 map->map_extra = def->map_extra; 2815 2816 map->numa_node = def->numa_node; 2817 map->btf_key_type_id = def->key_type_id; 2818 map->btf_value_type_id = def->value_type_id; 2819 2820 /* auto-adjust BPF ringbuf map max_entries to be a multiple of page size */ 2821 if (map_is_ringbuf(map)) 2822 map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries); 2823 2824 if (def->parts & MAP_DEF_MAP_TYPE) 2825 pr_debug("map '%s': found type = %u.\n", map->name, def->map_type); 2826 2827 if (def->parts & MAP_DEF_KEY_TYPE) 2828 pr_debug("map '%s': found key [%u], sz = %u.\n", 2829 map->name, def->key_type_id, def->key_size); 2830 else if (def->parts & MAP_DEF_KEY_SIZE) 2831 pr_debug("map '%s': found key_size = %u.\n", map->name, def->key_size); 2832 2833 if (def->parts & MAP_DEF_VALUE_TYPE) 2834 pr_debug("map '%s': found value [%u], sz = %u.\n", 2835 map->name, def->value_type_id, def->value_size); 2836 else if (def->parts & MAP_DEF_VALUE_SIZE) 2837 pr_debug("map '%s': found value_size = %u.\n", map->name, def->value_size); 2838 2839 if (def->parts & MAP_DEF_MAX_ENTRIES) 2840 pr_debug("map '%s': found max_entries = %u.\n", map->name, def->max_entries); 2841 if (def->parts & MAP_DEF_MAP_FLAGS) 2842 pr_debug("map '%s': found map_flags = 0x%x.\n", map->name, def->map_flags); 2843 if (def->parts & MAP_DEF_MAP_EXTRA) 2844 pr_debug("map '%s': found map_extra = 0x%llx.\n", map->name, 2845 (unsigned long long)def->map_extra); 2846 if (def->parts & MAP_DEF_PINNING) 2847 pr_debug("map '%s': found pinning = %u.\n", map->name, def->pinning); 2848 if (def->parts & MAP_DEF_NUMA_NODE) 2849 pr_debug("map '%s': found numa_node = %u.\n", map->name, def->numa_node); 2850 2851 if (def->parts & MAP_DEF_INNER_MAP) 2852 pr_debug("map '%s': found inner map definition.\n", map->name); 2853 } 2854 2855 static const char *btf_var_linkage_str(__u32 linkage) 2856 { 2857 switch (linkage) { 2858 case BTF_VAR_STATIC: return "static"; 2859 case BTF_VAR_GLOBAL_ALLOCATED: return "global"; 2860 case BTF_VAR_GLOBAL_EXTERN: return "extern"; 2861 default: return "unknown"; 2862 } 2863 } 2864 2865 static int bpf_object__init_user_btf_map(struct bpf_object *obj, 2866 const struct btf_type *sec, 2867 int var_idx, int sec_idx, 2868 const Elf_Data *data, bool strict, 2869 const char *pin_root_path) 2870 { 2871 struct btf_map_def map_def = {}, inner_def = {}; 2872 const struct btf_type *var, *def; 2873 const struct btf_var_secinfo *vi; 2874 const struct btf_var *var_extra; 2875 const char *map_name; 2876 struct bpf_map *map; 2877 int err; 2878 2879 vi = btf_var_secinfos(sec) + var_idx; 2880 var = btf__type_by_id(obj->btf, vi->type); 2881 var_extra = btf_var(var); 2882 map_name = btf__name_by_offset(obj->btf, var->name_off); 2883 2884 if (map_name == NULL || map_name[0] == '\0') { 2885 pr_warn("map #%d: empty name.\n", var_idx); 2886 return -EINVAL; 2887 } 2888 if ((__u64)vi->offset + vi->size > data->d_size) { 2889 pr_warn("map '%s' BTF data is corrupted.\n", map_name); 2890 return -EINVAL; 2891 } 2892 if (!btf_is_var(var)) { 2893 pr_warn("map '%s': unexpected var kind %s.\n", 2894 map_name, btf_kind_str(var)); 2895 return -EINVAL; 2896 } 2897 if (var_extra->linkage != BTF_VAR_GLOBAL_ALLOCATED) { 2898 pr_warn("map '%s': unsupported map linkage %s.\n", 2899 map_name, btf_var_linkage_str(var_extra->linkage)); 2900 return -EOPNOTSUPP; 2901 } 2902 2903 def = skip_mods_and_typedefs(obj->btf, var->type, NULL); 2904 if (!btf_is_struct(def)) { 2905 pr_warn("map '%s': unexpected def kind %s.\n", 2906 map_name, btf_kind_str(var)); 2907 return -EINVAL; 2908 } 2909 if (def->size > vi->size) { 2910 pr_warn("map '%s': invalid def size.\n", map_name); 2911 return -EINVAL; 2912 } 2913 2914 map = bpf_object__add_map(obj); 2915 if (IS_ERR(map)) 2916 return PTR_ERR(map); 2917 map->name = strdup(map_name); 2918 if (!map->name) { 2919 pr_warn("map '%s': failed to alloc map name.\n", map_name); 2920 return -ENOMEM; 2921 } 2922 map->libbpf_type = LIBBPF_MAP_UNSPEC; 2923 map->def.type = BPF_MAP_TYPE_UNSPEC; 2924 map->sec_idx = sec_idx; 2925 map->sec_offset = vi->offset; 2926 map->btf_var_idx = var_idx; 2927 pr_debug("map '%s': at sec_idx %d, offset %zu.\n", 2928 map_name, map->sec_idx, map->sec_offset); 2929 2930 err = parse_btf_map_def(map->name, obj->btf, def, strict, &map_def, &inner_def); 2931 if (err) 2932 return err; 2933 2934 fill_map_from_def(map, &map_def); 2935 2936 if (map_def.pinning == LIBBPF_PIN_BY_NAME) { 2937 err = build_map_pin_path(map, pin_root_path); 2938 if (err) { 2939 pr_warn("map '%s': couldn't build pin path.\n", map->name); 2940 return err; 2941 } 2942 } 2943 2944 if (map_def.parts & MAP_DEF_INNER_MAP) { 2945 map->inner_map = calloc(1, sizeof(*map->inner_map)); 2946 if (!map->inner_map) 2947 return -ENOMEM; 2948 map->inner_map->fd = create_placeholder_fd(); 2949 if (map->inner_map->fd < 0) 2950 return map->inner_map->fd; 2951 map->inner_map->sec_idx = sec_idx; 2952 map->inner_map->name = malloc(strlen(map_name) + sizeof(".inner") + 1); 2953 if (!map->inner_map->name) 2954 return -ENOMEM; 2955 sprintf(map->inner_map->name, "%s.inner", map_name); 2956 2957 fill_map_from_def(map->inner_map, &inner_def); 2958 } 2959 2960 err = map_fill_btf_type_info(obj, map); 2961 if (err) 2962 return err; 2963 2964 return 0; 2965 } 2966 2967 static int init_arena_map_data(struct bpf_object *obj, struct bpf_map *map, 2968 const char *sec_name, int sec_idx, 2969 void *data, size_t data_sz) 2970 { 2971 const long page_sz = sysconf(_SC_PAGE_SIZE); 2972 size_t mmap_sz; 2973 2974 mmap_sz = bpf_map_mmap_sz(obj->arena_map); 2975 if (roundup(data_sz, page_sz) > mmap_sz) { 2976 pr_warn("elf: sec '%s': declared ARENA map size (%zu) is too small to hold global __arena variables of size %zu\n", 2977 sec_name, mmap_sz, data_sz); 2978 return -E2BIG; 2979 } 2980 2981 obj->arena_data = malloc(data_sz); 2982 if (!obj->arena_data) 2983 return -ENOMEM; 2984 memcpy(obj->arena_data, data, data_sz); 2985 obj->arena_data_sz = data_sz; 2986 2987 /* make bpf_map__init_value() work for ARENA maps */ 2988 map->mmaped = obj->arena_data; 2989 2990 return 0; 2991 } 2992 2993 static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict, 2994 const char *pin_root_path) 2995 { 2996 const struct btf_type *sec = NULL; 2997 int nr_types, i, vlen, err; 2998 const struct btf_type *t; 2999 const char *name; 3000 Elf_Data *data; 3001 Elf_Scn *scn; 3002 3003 if (obj->efile.btf_maps_shndx < 0) 3004 return 0; 3005 3006 scn = elf_sec_by_idx(obj, obj->efile.btf_maps_shndx); 3007 data = elf_sec_data(obj, scn); 3008 if (!scn || !data) { 3009 pr_warn("elf: failed to get %s map definitions for %s\n", 3010 MAPS_ELF_SEC, obj->path); 3011 return -EINVAL; 3012 } 3013 3014 nr_types = btf__type_cnt(obj->btf); 3015 for (i = 1; i < nr_types; i++) { 3016 t = btf__type_by_id(obj->btf, i); 3017 if (!btf_is_datasec(t)) 3018 continue; 3019 name = btf__name_by_offset(obj->btf, t->name_off); 3020 if (strcmp(name, MAPS_ELF_SEC) == 0) { 3021 sec = t; 3022 obj->efile.btf_maps_sec_btf_id = i; 3023 break; 3024 } 3025 } 3026 3027 if (!sec) { 3028 pr_warn("DATASEC '%s' not found.\n", MAPS_ELF_SEC); 3029 return -ENOENT; 3030 } 3031 3032 vlen = btf_vlen(sec); 3033 for (i = 0; i < vlen; i++) { 3034 err = bpf_object__init_user_btf_map(obj, sec, i, 3035 obj->efile.btf_maps_shndx, 3036 data, strict, 3037 pin_root_path); 3038 if (err) 3039 return err; 3040 } 3041 3042 for (i = 0; i < obj->nr_maps; i++) { 3043 struct bpf_map *map = &obj->maps[i]; 3044 3045 if (map->def.type != BPF_MAP_TYPE_ARENA) 3046 continue; 3047 3048 if (obj->arena_map) { 3049 pr_warn("map '%s': only single ARENA map is supported (map '%s' is also ARENA)\n", 3050 map->name, obj->arena_map->name); 3051 return -EINVAL; 3052 } 3053 obj->arena_map = map; 3054 3055 if (obj->efile.arena_data) { 3056 err = init_arena_map_data(obj, map, ARENA_SEC, obj->efile.arena_data_shndx, 3057 obj->efile.arena_data->d_buf, 3058 obj->efile.arena_data->d_size); 3059 if (err) 3060 return err; 3061 } 3062 } 3063 if (obj->efile.arena_data && !obj->arena_map) { 3064 pr_warn("elf: sec '%s': to use global __arena variables the ARENA map should be explicitly declared in SEC(\".maps\")\n", 3065 ARENA_SEC); 3066 return -ENOENT; 3067 } 3068 3069 return 0; 3070 } 3071 3072 static int bpf_object__init_maps(struct bpf_object *obj, 3073 const struct bpf_object_open_opts *opts) 3074 { 3075 const char *pin_root_path; 3076 bool strict; 3077 int err = 0; 3078 3079 strict = !OPTS_GET(opts, relaxed_maps, false); 3080 pin_root_path = OPTS_GET(opts, pin_root_path, NULL); 3081 3082 err = bpf_object__init_user_btf_maps(obj, strict, pin_root_path); 3083 err = err ?: bpf_object__init_global_data_maps(obj); 3084 err = err ?: bpf_object__init_kconfig_map(obj); 3085 err = err ?: bpf_object_init_struct_ops(obj); 3086 3087 return err; 3088 } 3089 3090 static bool section_have_execinstr(struct bpf_object *obj, int idx) 3091 { 3092 Elf64_Shdr *sh; 3093 3094 sh = elf_sec_hdr(obj, elf_sec_by_idx(obj, idx)); 3095 if (!sh) 3096 return false; 3097 3098 return sh->sh_flags & SHF_EXECINSTR; 3099 } 3100 3101 static bool starts_with_qmark(const char *s) 3102 { 3103 return s && s[0] == '?'; 3104 } 3105 3106 static bool btf_needs_sanitization(struct bpf_object *obj) 3107 { 3108 bool has_func_global = kernel_supports(obj, FEAT_BTF_GLOBAL_FUNC); 3109 bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC); 3110 bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT); 3111 bool has_func = kernel_supports(obj, FEAT_BTF_FUNC); 3112 bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG); 3113 bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG); 3114 bool has_enum64 = kernel_supports(obj, FEAT_BTF_ENUM64); 3115 bool has_qmark_datasec = kernel_supports(obj, FEAT_BTF_QMARK_DATASEC); 3116 3117 return !has_func || !has_datasec || !has_func_global || !has_float || 3118 !has_decl_tag || !has_type_tag || !has_enum64 || !has_qmark_datasec; 3119 } 3120 3121 static int bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf) 3122 { 3123 bool has_func_global = kernel_supports(obj, FEAT_BTF_GLOBAL_FUNC); 3124 bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC); 3125 bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT); 3126 bool has_func = kernel_supports(obj, FEAT_BTF_FUNC); 3127 bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG); 3128 bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG); 3129 bool has_enum64 = kernel_supports(obj, FEAT_BTF_ENUM64); 3130 bool has_qmark_datasec = kernel_supports(obj, FEAT_BTF_QMARK_DATASEC); 3131 int enum64_placeholder_id = 0; 3132 struct btf_type *t; 3133 int i, j, vlen; 3134 3135 for (i = 1; i < btf__type_cnt(btf); i++) { 3136 t = (struct btf_type *)btf__type_by_id(btf, i); 3137 3138 if ((!has_datasec && btf_is_var(t)) || (!has_decl_tag && btf_is_decl_tag(t))) { 3139 /* replace VAR/DECL_TAG with INT */ 3140 t->info = BTF_INFO_ENC(BTF_KIND_INT, 0, 0); 3141 /* 3142 * using size = 1 is the safest choice, 4 will be too 3143 * big and cause kernel BTF validation failure if 3144 * original variable took less than 4 bytes 3145 */ 3146 t->size = 1; 3147 *(int *)(t + 1) = BTF_INT_ENC(0, 0, 8); 3148 } else if (!has_datasec && btf_is_datasec(t)) { 3149 /* replace DATASEC with STRUCT */ 3150 const struct btf_var_secinfo *v = btf_var_secinfos(t); 3151 struct btf_member *m = btf_members(t); 3152 struct btf_type *vt; 3153 char *name; 3154 3155 name = (char *)btf__name_by_offset(btf, t->name_off); 3156 while (*name) { 3157 if (*name == '.' || *name == '?') 3158 *name = '_'; 3159 name++; 3160 } 3161 3162 vlen = btf_vlen(t); 3163 t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, vlen); 3164 for (j = 0; j < vlen; j++, v++, m++) { 3165 /* order of field assignments is important */ 3166 m->offset = v->offset * 8; 3167 m->type = v->type; 3168 /* preserve variable name as member name */ 3169 vt = (void *)btf__type_by_id(btf, v->type); 3170 m->name_off = vt->name_off; 3171 } 3172 } else if (!has_qmark_datasec && btf_is_datasec(t) && 3173 starts_with_qmark(btf__name_by_offset(btf, t->name_off))) { 3174 /* replace '?' prefix with '_' for DATASEC names */ 3175 char *name; 3176 3177 name = (char *)btf__name_by_offset(btf, t->name_off); 3178 if (name[0] == '?') 3179 name[0] = '_'; 3180 } else if (!has_func && btf_is_func_proto(t)) { 3181 /* replace FUNC_PROTO with ENUM */ 3182 vlen = btf_vlen(t); 3183 t->info = BTF_INFO_ENC(BTF_KIND_ENUM, 0, vlen); 3184 t->size = sizeof(__u32); /* kernel enforced */ 3185 } else if (!has_func && btf_is_func(t)) { 3186 /* replace FUNC with TYPEDEF */ 3187 t->info = BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0); 3188 } else if (!has_func_global && btf_is_func(t)) { 3189 /* replace BTF_FUNC_GLOBAL with BTF_FUNC_STATIC */ 3190 t->info = BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0); 3191 } else if (!has_float && btf_is_float(t)) { 3192 /* replace FLOAT with an equally-sized empty STRUCT; 3193 * since C compilers do not accept e.g. "float" as a 3194 * valid struct name, make it anonymous 3195 */ 3196 t->name_off = 0; 3197 t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 0); 3198 } else if (!has_type_tag && btf_is_type_tag(t)) { 3199 /* replace TYPE_TAG with a CONST */ 3200 t->name_off = 0; 3201 t->info = BTF_INFO_ENC(BTF_KIND_CONST, 0, 0); 3202 } else if (!has_enum64 && btf_is_enum(t)) { 3203 /* clear the kflag */ 3204 t->info = btf_type_info(btf_kind(t), btf_vlen(t), false); 3205 } else if (!has_enum64 && btf_is_enum64(t)) { 3206 /* replace ENUM64 with a union */ 3207 struct btf_member *m; 3208 3209 if (enum64_placeholder_id == 0) { 3210 enum64_placeholder_id = btf__add_int(btf, "enum64_placeholder", 1, 0); 3211 if (enum64_placeholder_id < 0) 3212 return enum64_placeholder_id; 3213 3214 t = (struct btf_type *)btf__type_by_id(btf, i); 3215 } 3216 3217 m = btf_members(t); 3218 vlen = btf_vlen(t); 3219 t->info = BTF_INFO_ENC(BTF_KIND_UNION, 0, vlen); 3220 for (j = 0; j < vlen; j++, m++) { 3221 m->type = enum64_placeholder_id; 3222 m->offset = 0; 3223 } 3224 } 3225 } 3226 3227 return 0; 3228 } 3229 3230 static bool libbpf_needs_btf(const struct bpf_object *obj) 3231 { 3232 return obj->efile.btf_maps_shndx >= 0 || 3233 obj->efile.has_st_ops || 3234 obj->nr_extern > 0; 3235 } 3236 3237 static bool kernel_needs_btf(const struct bpf_object *obj) 3238 { 3239 return obj->efile.has_st_ops; 3240 } 3241 3242 static int bpf_object__init_btf(struct bpf_object *obj, 3243 Elf_Data *btf_data, 3244 Elf_Data *btf_ext_data) 3245 { 3246 int err = -ENOENT; 3247 3248 if (btf_data) { 3249 obj->btf = btf__new(btf_data->d_buf, btf_data->d_size); 3250 err = libbpf_get_error(obj->btf); 3251 if (err) { 3252 obj->btf = NULL; 3253 pr_warn("Error loading ELF section %s: %s.\n", BTF_ELF_SEC, errstr(err)); 3254 goto out; 3255 } 3256 /* enforce 8-byte pointers for BPF-targeted BTFs */ 3257 btf__set_pointer_size(obj->btf, 8); 3258 } 3259 if (btf_ext_data) { 3260 struct btf_ext_info *ext_segs[3]; 3261 int seg_num, sec_num; 3262 3263 if (!obj->btf) { 3264 pr_debug("Ignore ELF section %s because its depending ELF section %s is not found.\n", 3265 BTF_EXT_ELF_SEC, BTF_ELF_SEC); 3266 goto out; 3267 } 3268 obj->btf_ext = btf_ext__new(btf_ext_data->d_buf, btf_ext_data->d_size); 3269 err = libbpf_get_error(obj->btf_ext); 3270 if (err) { 3271 pr_warn("Error loading ELF section %s: %s. Ignored and continue.\n", 3272 BTF_EXT_ELF_SEC, errstr(err)); 3273 obj->btf_ext = NULL; 3274 goto out; 3275 } 3276 3277 /* setup .BTF.ext to ELF section mapping */ 3278 ext_segs[0] = &obj->btf_ext->func_info; 3279 ext_segs[1] = &obj->btf_ext->line_info; 3280 ext_segs[2] = &obj->btf_ext->core_relo_info; 3281 for (seg_num = 0; seg_num < ARRAY_SIZE(ext_segs); seg_num++) { 3282 struct btf_ext_info *seg = ext_segs[seg_num]; 3283 const struct btf_ext_info_sec *sec; 3284 const char *sec_name; 3285 Elf_Scn *scn; 3286 3287 if (seg->sec_cnt == 0) 3288 continue; 3289 3290 seg->sec_idxs = calloc(seg->sec_cnt, sizeof(*seg->sec_idxs)); 3291 if (!seg->sec_idxs) { 3292 err = -ENOMEM; 3293 goto out; 3294 } 3295 3296 sec_num = 0; 3297 for_each_btf_ext_sec(seg, sec) { 3298 /* preventively increment index to avoid doing 3299 * this before every continue below 3300 */ 3301 sec_num++; 3302 3303 sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off); 3304 if (str_is_empty(sec_name)) 3305 continue; 3306 scn = elf_sec_by_name(obj, sec_name); 3307 if (!scn) 3308 continue; 3309 3310 seg->sec_idxs[sec_num - 1] = elf_ndxscn(scn); 3311 } 3312 } 3313 } 3314 out: 3315 if (err && libbpf_needs_btf(obj)) { 3316 pr_warn("BTF is required, but is missing or corrupted.\n"); 3317 return err; 3318 } 3319 return 0; 3320 } 3321 3322 static int compare_vsi_off(const void *_a, const void *_b) 3323 { 3324 const struct btf_var_secinfo *a = _a; 3325 const struct btf_var_secinfo *b = _b; 3326 3327 return a->offset - b->offset; 3328 } 3329 3330 static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf, 3331 struct btf_type *t) 3332 { 3333 __u32 size = 0, i, vars = btf_vlen(t); 3334 const char *sec_name = btf__name_by_offset(btf, t->name_off); 3335 struct btf_var_secinfo *vsi; 3336 bool fixup_offsets = false; 3337 int err; 3338 3339 if (!sec_name) { 3340 pr_debug("No name found in string section for DATASEC kind.\n"); 3341 return -ENOENT; 3342 } 3343 3344 /* Extern-backing datasecs (.ksyms, .kconfig) have their size and 3345 * variable offsets set at the previous step. Further, not every 3346 * extern BTF VAR has corresponding ELF symbol preserved, so we skip 3347 * all fixups altogether for such sections and go straight to sorting 3348 * VARs within their DATASEC. 3349 */ 3350 if (strcmp(sec_name, KCONFIG_SEC) == 0 || strcmp(sec_name, KSYMS_SEC) == 0) 3351 goto sort_vars; 3352 3353 /* Clang leaves DATASEC size and VAR offsets as zeroes, so we need to 3354 * fix this up. But BPF static linker already fixes this up and fills 3355 * all the sizes and offsets during static linking. So this step has 3356 * to be optional. But the STV_HIDDEN handling is non-optional for any 3357 * non-extern DATASEC, so the variable fixup loop below handles both 3358 * functions at the same time, paying the cost of BTF VAR <-> ELF 3359 * symbol matching just once. 3360 */ 3361 if (t->size == 0) { 3362 err = find_elf_sec_sz(obj, sec_name, &size); 3363 if (err || !size) { 3364 pr_debug("sec '%s': failed to determine size from ELF: size %u, err %s\n", 3365 sec_name, size, errstr(err)); 3366 return -ENOENT; 3367 } 3368 3369 t->size = size; 3370 fixup_offsets = true; 3371 } 3372 3373 for (i = 0, vsi = btf_var_secinfos(t); i < vars; i++, vsi++) { 3374 const struct btf_type *t_var; 3375 struct btf_var *var; 3376 const char *var_name; 3377 Elf64_Sym *sym; 3378 3379 t_var = btf__type_by_id(btf, vsi->type); 3380 if (!t_var || !btf_is_var(t_var)) { 3381 pr_debug("sec '%s': unexpected non-VAR type found\n", sec_name); 3382 return -EINVAL; 3383 } 3384 3385 var = btf_var(t_var); 3386 if (var->linkage == BTF_VAR_STATIC || var->linkage == BTF_VAR_GLOBAL_EXTERN) 3387 continue; 3388 3389 var_name = btf__name_by_offset(btf, t_var->name_off); 3390 if (!var_name) { 3391 pr_debug("sec '%s': failed to find name of DATASEC's member #%d\n", 3392 sec_name, i); 3393 return -ENOENT; 3394 } 3395 3396 sym = find_elf_var_sym(obj, var_name); 3397 if (IS_ERR(sym)) { 3398 pr_debug("sec '%s': failed to find ELF symbol for VAR '%s'\n", 3399 sec_name, var_name); 3400 return -ENOENT; 3401 } 3402 3403 if (fixup_offsets) 3404 vsi->offset = sym->st_value; 3405 3406 /* if variable is a global/weak symbol, but has restricted 3407 * (STV_HIDDEN or STV_INTERNAL) visibility, mark its BTF VAR 3408 * as static. This follows similar logic for functions (BPF 3409 * subprogs) and influences libbpf's further decisions about 3410 * whether to make global data BPF array maps as 3411 * BPF_F_MMAPABLE. 3412 */ 3413 if (ELF64_ST_VISIBILITY(sym->st_other) == STV_HIDDEN 3414 || ELF64_ST_VISIBILITY(sym->st_other) == STV_INTERNAL) 3415 var->linkage = BTF_VAR_STATIC; 3416 } 3417 3418 sort_vars: 3419 qsort(btf_var_secinfos(t), vars, sizeof(*vsi), compare_vsi_off); 3420 return 0; 3421 } 3422 3423 static int bpf_object_fixup_btf(struct bpf_object *obj) 3424 { 3425 int i, n, err = 0; 3426 3427 if (!obj->btf) 3428 return 0; 3429 3430 n = btf__type_cnt(obj->btf); 3431 for (i = 1; i < n; i++) { 3432 struct btf_type *t = btf_type_by_id(obj->btf, i); 3433 3434 /* Loader needs to fix up some of the things compiler 3435 * couldn't get its hands on while emitting BTF. This 3436 * is section size and global variable offset. We use 3437 * the info from the ELF itself for this purpose. 3438 */ 3439 if (btf_is_datasec(t)) { 3440 err = btf_fixup_datasec(obj, obj->btf, t); 3441 if (err) 3442 return err; 3443 } 3444 } 3445 3446 return 0; 3447 } 3448 3449 static bool prog_needs_vmlinux_btf(struct bpf_program *prog) 3450 { 3451 if (prog->type == BPF_PROG_TYPE_STRUCT_OPS || 3452 prog->type == BPF_PROG_TYPE_LSM) 3453 return true; 3454 3455 /* BPF_PROG_TYPE_TRACING programs which do not attach to other programs 3456 * also need vmlinux BTF 3457 */ 3458 if (prog->type == BPF_PROG_TYPE_TRACING && !prog->attach_prog_fd) 3459 return true; 3460 3461 return false; 3462 } 3463 3464 static bool map_needs_vmlinux_btf(struct bpf_map *map) 3465 { 3466 return bpf_map__is_struct_ops(map); 3467 } 3468 3469 static bool obj_needs_vmlinux_btf(const struct bpf_object *obj) 3470 { 3471 struct bpf_program *prog; 3472 struct bpf_map *map; 3473 int i; 3474 3475 /* CO-RE relocations need kernel BTF, only when btf_custom_path 3476 * is not specified 3477 */ 3478 if (obj->btf_ext && obj->btf_ext->core_relo_info.len && !obj->btf_custom_path) 3479 return true; 3480 3481 /* Support for typed ksyms needs kernel BTF */ 3482 for (i = 0; i < obj->nr_extern; i++) { 3483 const struct extern_desc *ext; 3484 3485 ext = &obj->externs[i]; 3486 if (ext->type == EXT_KSYM && ext->ksym.type_id) 3487 return true; 3488 } 3489 3490 bpf_object__for_each_program(prog, obj) { 3491 if (!prog->autoload) 3492 continue; 3493 if (prog_needs_vmlinux_btf(prog)) 3494 return true; 3495 } 3496 3497 bpf_object__for_each_map(map, obj) { 3498 if (map_needs_vmlinux_btf(map)) 3499 return true; 3500 } 3501 3502 return false; 3503 } 3504 3505 static int bpf_object__load_vmlinux_btf(struct bpf_object *obj, bool force) 3506 { 3507 int err; 3508 3509 /* btf_vmlinux could be loaded earlier */ 3510 if (obj->btf_vmlinux || obj->gen_loader) 3511 return 0; 3512 3513 if (!force && !obj_needs_vmlinux_btf(obj)) 3514 return 0; 3515 3516 obj->btf_vmlinux = btf__load_vmlinux_btf(); 3517 err = libbpf_get_error(obj->btf_vmlinux); 3518 if (err) { 3519 pr_warn("Error loading vmlinux BTF: %s\n", errstr(err)); 3520 obj->btf_vmlinux = NULL; 3521 return err; 3522 } 3523 return 0; 3524 } 3525 3526 static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj) 3527 { 3528 struct btf *kern_btf = obj->btf; 3529 bool btf_mandatory, sanitize; 3530 int i, err = 0; 3531 3532 if (!obj->btf) 3533 return 0; 3534 3535 if (!kernel_supports(obj, FEAT_BTF)) { 3536 if (kernel_needs_btf(obj)) { 3537 err = -EOPNOTSUPP; 3538 goto report; 3539 } 3540 pr_debug("Kernel doesn't support BTF, skipping uploading it.\n"); 3541 return 0; 3542 } 3543 3544 /* Even though some subprogs are global/weak, user might prefer more 3545 * permissive BPF verification process that BPF verifier performs for 3546 * static functions, taking into account more context from the caller 3547 * functions. In such case, they need to mark such subprogs with 3548 * __attribute__((visibility("hidden"))) and libbpf will adjust 3549 * corresponding FUNC BTF type to be marked as static and trigger more 3550 * involved BPF verification process. 3551 */ 3552 for (i = 0; i < obj->nr_programs; i++) { 3553 struct bpf_program *prog = &obj->programs[i]; 3554 struct btf_type *t; 3555 const char *name; 3556 int j, n; 3557 3558 if (!prog->mark_btf_static || !prog_is_subprog(obj, prog)) 3559 continue; 3560 3561 n = btf__type_cnt(obj->btf); 3562 for (j = 1; j < n; j++) { 3563 t = btf_type_by_id(obj->btf, j); 3564 if (!btf_is_func(t) || btf_func_linkage(t) != BTF_FUNC_GLOBAL) 3565 continue; 3566 3567 name = btf__str_by_offset(obj->btf, t->name_off); 3568 if (strcmp(name, prog->name) != 0) 3569 continue; 3570 3571 t->info = btf_type_info(BTF_KIND_FUNC, BTF_FUNC_STATIC, 0); 3572 break; 3573 } 3574 } 3575 3576 sanitize = btf_needs_sanitization(obj); 3577 if (sanitize) { 3578 const void *raw_data; 3579 __u32 sz; 3580 3581 /* clone BTF to sanitize a copy and leave the original intact */ 3582 raw_data = btf__raw_data(obj->btf, &sz); 3583 kern_btf = btf__new(raw_data, sz); 3584 err = libbpf_get_error(kern_btf); 3585 if (err) 3586 return err; 3587 3588 /* enforce 8-byte pointers for BPF-targeted BTFs */ 3589 btf__set_pointer_size(obj->btf, 8); 3590 err = bpf_object__sanitize_btf(obj, kern_btf); 3591 if (err) 3592 return err; 3593 } 3594 3595 if (obj->gen_loader) { 3596 __u32 raw_size = 0; 3597 const void *raw_data = btf__raw_data(kern_btf, &raw_size); 3598 3599 if (!raw_data) 3600 return -ENOMEM; 3601 bpf_gen__load_btf(obj->gen_loader, raw_data, raw_size); 3602 /* Pretend to have valid FD to pass various fd >= 0 checks. 3603 * This fd == 0 will not be used with any syscall and will be reset to -1 eventually. 3604 */ 3605 btf__set_fd(kern_btf, 0); 3606 } else { 3607 /* currently BPF_BTF_LOAD only supports log_level 1 */ 3608 err = btf_load_into_kernel(kern_btf, obj->log_buf, obj->log_size, 3609 obj->log_level ? 1 : 0, obj->token_fd); 3610 } 3611 if (sanitize) { 3612 if (!err) { 3613 /* move fd to libbpf's BTF */ 3614 btf__set_fd(obj->btf, btf__fd(kern_btf)); 3615 btf__set_fd(kern_btf, -1); 3616 } 3617 btf__free(kern_btf); 3618 } 3619 report: 3620 if (err) { 3621 btf_mandatory = kernel_needs_btf(obj); 3622 if (btf_mandatory) { 3623 pr_warn("Error loading .BTF into kernel: %s. BTF is mandatory, can't proceed.\n", 3624 errstr(err)); 3625 } else { 3626 pr_info("Error loading .BTF into kernel: %s. BTF is optional, ignoring.\n", 3627 errstr(err)); 3628 err = 0; 3629 } 3630 } 3631 return err; 3632 } 3633 3634 static const char *elf_sym_str(const struct bpf_object *obj, size_t off) 3635 { 3636 const char *name; 3637 3638 name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, off); 3639 if (!name) { 3640 pr_warn("elf: failed to get section name string at offset %zu from %s: %s\n", 3641 off, obj->path, elf_errmsg(-1)); 3642 return NULL; 3643 } 3644 3645 return name; 3646 } 3647 3648 static const char *elf_sec_str(const struct bpf_object *obj, size_t off) 3649 { 3650 const char *name; 3651 3652 name = elf_strptr(obj->efile.elf, obj->efile.shstrndx, off); 3653 if (!name) { 3654 pr_warn("elf: failed to get section name string at offset %zu from %s: %s\n", 3655 off, obj->path, elf_errmsg(-1)); 3656 return NULL; 3657 } 3658 3659 return name; 3660 } 3661 3662 static Elf_Scn *elf_sec_by_idx(const struct bpf_object *obj, size_t idx) 3663 { 3664 Elf_Scn *scn; 3665 3666 scn = elf_getscn(obj->efile.elf, idx); 3667 if (!scn) { 3668 pr_warn("elf: failed to get section(%zu) from %s: %s\n", 3669 idx, obj->path, elf_errmsg(-1)); 3670 return NULL; 3671 } 3672 return scn; 3673 } 3674 3675 static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name) 3676 { 3677 Elf_Scn *scn = NULL; 3678 Elf *elf = obj->efile.elf; 3679 const char *sec_name; 3680 3681 while ((scn = elf_nextscn(elf, scn)) != NULL) { 3682 sec_name = elf_sec_name(obj, scn); 3683 if (!sec_name) 3684 return NULL; 3685 3686 if (strcmp(sec_name, name) != 0) 3687 continue; 3688 3689 return scn; 3690 } 3691 return NULL; 3692 } 3693 3694 static Elf64_Shdr *elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn) 3695 { 3696 Elf64_Shdr *shdr; 3697 3698 if (!scn) 3699 return NULL; 3700 3701 shdr = elf64_getshdr(scn); 3702 if (!shdr) { 3703 pr_warn("elf: failed to get section(%zu) header from %s: %s\n", 3704 elf_ndxscn(scn), obj->path, elf_errmsg(-1)); 3705 return NULL; 3706 } 3707 3708 return shdr; 3709 } 3710 3711 static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn) 3712 { 3713 const char *name; 3714 Elf64_Shdr *sh; 3715 3716 if (!scn) 3717 return NULL; 3718 3719 sh = elf_sec_hdr(obj, scn); 3720 if (!sh) 3721 return NULL; 3722 3723 name = elf_sec_str(obj, sh->sh_name); 3724 if (!name) { 3725 pr_warn("elf: failed to get section(%zu) name from %s: %s\n", 3726 elf_ndxscn(scn), obj->path, elf_errmsg(-1)); 3727 return NULL; 3728 } 3729 3730 return name; 3731 } 3732 3733 static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn) 3734 { 3735 Elf_Data *data; 3736 3737 if (!scn) 3738 return NULL; 3739 3740 data = elf_getdata(scn, 0); 3741 if (!data) { 3742 pr_warn("elf: failed to get section(%zu) %s data from %s: %s\n", 3743 elf_ndxscn(scn), elf_sec_name(obj, scn) ?: "<?>", 3744 obj->path, elf_errmsg(-1)); 3745 return NULL; 3746 } 3747 3748 return data; 3749 } 3750 3751 static Elf64_Sym *elf_sym_by_idx(const struct bpf_object *obj, size_t idx) 3752 { 3753 if (idx >= obj->efile.symbols->d_size / sizeof(Elf64_Sym)) 3754 return NULL; 3755 3756 return (Elf64_Sym *)obj->efile.symbols->d_buf + idx; 3757 } 3758 3759 static Elf64_Rel *elf_rel_by_idx(Elf_Data *data, size_t idx) 3760 { 3761 if (idx >= data->d_size / sizeof(Elf64_Rel)) 3762 return NULL; 3763 3764 return (Elf64_Rel *)data->d_buf + idx; 3765 } 3766 3767 static bool is_sec_name_dwarf(const char *name) 3768 { 3769 /* approximation, but the actual list is too long */ 3770 return str_has_pfx(name, ".debug_"); 3771 } 3772 3773 static bool ignore_elf_section(Elf64_Shdr *hdr, const char *name) 3774 { 3775 /* no special handling of .strtab */ 3776 if (hdr->sh_type == SHT_STRTAB) 3777 return true; 3778 3779 /* ignore .llvm_addrsig section as well */ 3780 if (hdr->sh_type == SHT_LLVM_ADDRSIG) 3781 return true; 3782 3783 /* no subprograms will lead to an empty .text section, ignore it */ 3784 if (hdr->sh_type == SHT_PROGBITS && hdr->sh_size == 0 && 3785 strcmp(name, ".text") == 0) 3786 return true; 3787 3788 /* DWARF sections */ 3789 if (is_sec_name_dwarf(name)) 3790 return true; 3791 3792 if (str_has_pfx(name, ".rel")) { 3793 name += sizeof(".rel") - 1; 3794 /* DWARF section relocations */ 3795 if (is_sec_name_dwarf(name)) 3796 return true; 3797 3798 /* .BTF and .BTF.ext don't need relocations */ 3799 if (strcmp(name, BTF_ELF_SEC) == 0 || 3800 strcmp(name, BTF_EXT_ELF_SEC) == 0) 3801 return true; 3802 } 3803 3804 return false; 3805 } 3806 3807 static int cmp_progs(const void *_a, const void *_b) 3808 { 3809 const struct bpf_program *a = _a; 3810 const struct bpf_program *b = _b; 3811 3812 if (a->sec_idx != b->sec_idx) 3813 return a->sec_idx < b->sec_idx ? -1 : 1; 3814 3815 /* sec_insn_off can't be the same within the section */ 3816 return a->sec_insn_off < b->sec_insn_off ? -1 : 1; 3817 } 3818 3819 static int bpf_object__elf_collect(struct bpf_object *obj) 3820 { 3821 struct elf_sec_desc *sec_desc; 3822 Elf *elf = obj->efile.elf; 3823 Elf_Data *btf_ext_data = NULL; 3824 Elf_Data *btf_data = NULL; 3825 int idx = 0, err = 0; 3826 const char *name; 3827 Elf_Data *data; 3828 Elf_Scn *scn; 3829 Elf64_Shdr *sh; 3830 3831 /* ELF section indices are 0-based, but sec #0 is special "invalid" 3832 * section. Since section count retrieved by elf_getshdrnum() does 3833 * include sec #0, it is already the necessary size of an array to keep 3834 * all the sections. 3835 */ 3836 if (elf_getshdrnum(obj->efile.elf, &obj->efile.sec_cnt)) { 3837 pr_warn("elf: failed to get the number of sections for %s: %s\n", 3838 obj->path, elf_errmsg(-1)); 3839 return -LIBBPF_ERRNO__FORMAT; 3840 } 3841 obj->efile.secs = calloc(obj->efile.sec_cnt, sizeof(*obj->efile.secs)); 3842 if (!obj->efile.secs) 3843 return -ENOMEM; 3844 3845 /* a bunch of ELF parsing functionality depends on processing symbols, 3846 * so do the first pass and find the symbol table 3847 */ 3848 scn = NULL; 3849 while ((scn = elf_nextscn(elf, scn)) != NULL) { 3850 sh = elf_sec_hdr(obj, scn); 3851 if (!sh) 3852 return -LIBBPF_ERRNO__FORMAT; 3853 3854 if (sh->sh_type == SHT_SYMTAB) { 3855 if (obj->efile.symbols) { 3856 pr_warn("elf: multiple symbol tables in %s\n", obj->path); 3857 return -LIBBPF_ERRNO__FORMAT; 3858 } 3859 3860 data = elf_sec_data(obj, scn); 3861 if (!data) 3862 return -LIBBPF_ERRNO__FORMAT; 3863 3864 idx = elf_ndxscn(scn); 3865 3866 obj->efile.symbols = data; 3867 obj->efile.symbols_shndx = idx; 3868 obj->efile.strtabidx = sh->sh_link; 3869 } 3870 } 3871 3872 if (!obj->efile.symbols) { 3873 pr_warn("elf: couldn't find symbol table in %s, stripped object file?\n", 3874 obj->path); 3875 return -ENOENT; 3876 } 3877 3878 scn = NULL; 3879 while ((scn = elf_nextscn(elf, scn)) != NULL) { 3880 idx = elf_ndxscn(scn); 3881 sec_desc = &obj->efile.secs[idx]; 3882 3883 sh = elf_sec_hdr(obj, scn); 3884 if (!sh) 3885 return -LIBBPF_ERRNO__FORMAT; 3886 3887 name = elf_sec_str(obj, sh->sh_name); 3888 if (!name) 3889 return -LIBBPF_ERRNO__FORMAT; 3890 3891 if (ignore_elf_section(sh, name)) 3892 continue; 3893 3894 data = elf_sec_data(obj, scn); 3895 if (!data) 3896 return -LIBBPF_ERRNO__FORMAT; 3897 3898 pr_debug("elf: section(%d) %s, size %ld, link %d, flags %lx, type=%d\n", 3899 idx, name, (unsigned long)data->d_size, 3900 (int)sh->sh_link, (unsigned long)sh->sh_flags, 3901 (int)sh->sh_type); 3902 3903 if (strcmp(name, "license") == 0) { 3904 err = bpf_object__init_license(obj, data->d_buf, data->d_size); 3905 if (err) 3906 return err; 3907 } else if (strcmp(name, "version") == 0) { 3908 err = bpf_object__init_kversion(obj, data->d_buf, data->d_size); 3909 if (err) 3910 return err; 3911 } else if (strcmp(name, "maps") == 0) { 3912 pr_warn("elf: legacy map definitions in 'maps' section are not supported by libbpf v1.0+\n"); 3913 return -ENOTSUP; 3914 } else if (strcmp(name, MAPS_ELF_SEC) == 0) { 3915 obj->efile.btf_maps_shndx = idx; 3916 } else if (strcmp(name, BTF_ELF_SEC) == 0) { 3917 if (sh->sh_type != SHT_PROGBITS) 3918 return -LIBBPF_ERRNO__FORMAT; 3919 btf_data = data; 3920 } else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) { 3921 if (sh->sh_type != SHT_PROGBITS) 3922 return -LIBBPF_ERRNO__FORMAT; 3923 btf_ext_data = data; 3924 } else if (sh->sh_type == SHT_SYMTAB) { 3925 /* already processed during the first pass above */ 3926 } else if (sh->sh_type == SHT_PROGBITS && data->d_size > 0) { 3927 if (sh->sh_flags & SHF_EXECINSTR) { 3928 if (strcmp(name, ".text") == 0) 3929 obj->efile.text_shndx = idx; 3930 err = bpf_object__add_programs(obj, data, name, idx); 3931 if (err) 3932 return err; 3933 } else if (strcmp(name, DATA_SEC) == 0 || 3934 str_has_pfx(name, DATA_SEC ".")) { 3935 sec_desc->sec_type = SEC_DATA; 3936 sec_desc->shdr = sh; 3937 sec_desc->data = data; 3938 } else if (strcmp(name, RODATA_SEC) == 0 || 3939 str_has_pfx(name, RODATA_SEC ".")) { 3940 sec_desc->sec_type = SEC_RODATA; 3941 sec_desc->shdr = sh; 3942 sec_desc->data = data; 3943 } else if (strcmp(name, STRUCT_OPS_SEC) == 0 || 3944 strcmp(name, STRUCT_OPS_LINK_SEC) == 0 || 3945 strcmp(name, "?" STRUCT_OPS_SEC) == 0 || 3946 strcmp(name, "?" STRUCT_OPS_LINK_SEC) == 0) { 3947 sec_desc->sec_type = SEC_ST_OPS; 3948 sec_desc->shdr = sh; 3949 sec_desc->data = data; 3950 obj->efile.has_st_ops = true; 3951 } else if (strcmp(name, ARENA_SEC) == 0) { 3952 obj->efile.arena_data = data; 3953 obj->efile.arena_data_shndx = idx; 3954 } else { 3955 pr_info("elf: skipping unrecognized data section(%d) %s\n", 3956 idx, name); 3957 } 3958 } else if (sh->sh_type == SHT_REL) { 3959 int targ_sec_idx = sh->sh_info; /* points to other section */ 3960 3961 if (sh->sh_entsize != sizeof(Elf64_Rel) || 3962 targ_sec_idx >= obj->efile.sec_cnt) 3963 return -LIBBPF_ERRNO__FORMAT; 3964 3965 /* Only do relo for section with exec instructions */ 3966 if (!section_have_execinstr(obj, targ_sec_idx) && 3967 strcmp(name, ".rel" STRUCT_OPS_SEC) && 3968 strcmp(name, ".rel" STRUCT_OPS_LINK_SEC) && 3969 strcmp(name, ".rel?" STRUCT_OPS_SEC) && 3970 strcmp(name, ".rel?" STRUCT_OPS_LINK_SEC) && 3971 strcmp(name, ".rel" MAPS_ELF_SEC)) { 3972 pr_info("elf: skipping relo section(%d) %s for section(%d) %s\n", 3973 idx, name, targ_sec_idx, 3974 elf_sec_name(obj, elf_sec_by_idx(obj, targ_sec_idx)) ?: "<?>"); 3975 continue; 3976 } 3977 3978 sec_desc->sec_type = SEC_RELO; 3979 sec_desc->shdr = sh; 3980 sec_desc->data = data; 3981 } else if (sh->sh_type == SHT_NOBITS && (strcmp(name, BSS_SEC) == 0 || 3982 str_has_pfx(name, BSS_SEC "."))) { 3983 sec_desc->sec_type = SEC_BSS; 3984 sec_desc->shdr = sh; 3985 sec_desc->data = data; 3986 } else { 3987 pr_info("elf: skipping section(%d) %s (size %zu)\n", idx, name, 3988 (size_t)sh->sh_size); 3989 } 3990 } 3991 3992 if (!obj->efile.strtabidx || obj->efile.strtabidx > idx) { 3993 pr_warn("elf: symbol strings section missing or invalid in %s\n", obj->path); 3994 return -LIBBPF_ERRNO__FORMAT; 3995 } 3996 3997 /* change BPF program insns to native endianness for introspection */ 3998 if (!is_native_endianness(obj)) 3999 bpf_object_bswap_progs(obj); 4000 4001 /* sort BPF programs by section name and in-section instruction offset 4002 * for faster search 4003 */ 4004 if (obj->nr_programs) 4005 qsort(obj->programs, obj->nr_programs, sizeof(*obj->programs), cmp_progs); 4006 4007 return bpf_object__init_btf(obj, btf_data, btf_ext_data); 4008 } 4009 4010 static bool sym_is_extern(const Elf64_Sym *sym) 4011 { 4012 int bind = ELF64_ST_BIND(sym->st_info); 4013 /* externs are symbols w/ type=NOTYPE, bind=GLOBAL|WEAK, section=UND */ 4014 return sym->st_shndx == SHN_UNDEF && 4015 (bind == STB_GLOBAL || bind == STB_WEAK) && 4016 ELF64_ST_TYPE(sym->st_info) == STT_NOTYPE; 4017 } 4018 4019 static bool sym_is_subprog(const Elf64_Sym *sym, int text_shndx) 4020 { 4021 int bind = ELF64_ST_BIND(sym->st_info); 4022 int type = ELF64_ST_TYPE(sym->st_info); 4023 4024 /* in .text section */ 4025 if (sym->st_shndx != text_shndx) 4026 return false; 4027 4028 /* local function */ 4029 if (bind == STB_LOCAL && type == STT_SECTION) 4030 return true; 4031 4032 /* global function */ 4033 return (bind == STB_GLOBAL || bind == STB_WEAK) && type == STT_FUNC; 4034 } 4035 4036 static int find_extern_btf_id(const struct btf *btf, const char *ext_name) 4037 { 4038 const struct btf_type *t; 4039 const char *tname; 4040 int i, n; 4041 4042 if (!btf) 4043 return -ESRCH; 4044 4045 n = btf__type_cnt(btf); 4046 for (i = 1; i < n; i++) { 4047 t = btf__type_by_id(btf, i); 4048 4049 if (!btf_is_var(t) && !btf_is_func(t)) 4050 continue; 4051 4052 tname = btf__name_by_offset(btf, t->name_off); 4053 if (strcmp(tname, ext_name)) 4054 continue; 4055 4056 if (btf_is_var(t) && 4057 btf_var(t)->linkage != BTF_VAR_GLOBAL_EXTERN) 4058 return -EINVAL; 4059 4060 if (btf_is_func(t) && btf_func_linkage(t) != BTF_FUNC_EXTERN) 4061 return -EINVAL; 4062 4063 return i; 4064 } 4065 4066 return -ENOENT; 4067 } 4068 4069 static int find_extern_sec_btf_id(struct btf *btf, int ext_btf_id) { 4070 const struct btf_var_secinfo *vs; 4071 const struct btf_type *t; 4072 int i, j, n; 4073 4074 if (!btf) 4075 return -ESRCH; 4076 4077 n = btf__type_cnt(btf); 4078 for (i = 1; i < n; i++) { 4079 t = btf__type_by_id(btf, i); 4080 4081 if (!btf_is_datasec(t)) 4082 continue; 4083 4084 vs = btf_var_secinfos(t); 4085 for (j = 0; j < btf_vlen(t); j++, vs++) { 4086 if (vs->type == ext_btf_id) 4087 return i; 4088 } 4089 } 4090 4091 return -ENOENT; 4092 } 4093 4094 static enum kcfg_type find_kcfg_type(const struct btf *btf, int id, 4095 bool *is_signed) 4096 { 4097 const struct btf_type *t; 4098 const char *name; 4099 4100 t = skip_mods_and_typedefs(btf, id, NULL); 4101 name = btf__name_by_offset(btf, t->name_off); 4102 4103 if (is_signed) 4104 *is_signed = false; 4105 switch (btf_kind(t)) { 4106 case BTF_KIND_INT: { 4107 int enc = btf_int_encoding(t); 4108 4109 if (enc & BTF_INT_BOOL) 4110 return t->size == 1 ? KCFG_BOOL : KCFG_UNKNOWN; 4111 if (is_signed) 4112 *is_signed = enc & BTF_INT_SIGNED; 4113 if (t->size == 1) 4114 return KCFG_CHAR; 4115 if (t->size < 1 || t->size > 8 || (t->size & (t->size - 1))) 4116 return KCFG_UNKNOWN; 4117 return KCFG_INT; 4118 } 4119 case BTF_KIND_ENUM: 4120 if (t->size != 4) 4121 return KCFG_UNKNOWN; 4122 if (strcmp(name, "libbpf_tristate")) 4123 return KCFG_UNKNOWN; 4124 return KCFG_TRISTATE; 4125 case BTF_KIND_ENUM64: 4126 if (strcmp(name, "libbpf_tristate")) 4127 return KCFG_UNKNOWN; 4128 return KCFG_TRISTATE; 4129 case BTF_KIND_ARRAY: 4130 if (btf_array(t)->nelems == 0) 4131 return KCFG_UNKNOWN; 4132 if (find_kcfg_type(btf, btf_array(t)->type, NULL) != KCFG_CHAR) 4133 return KCFG_UNKNOWN; 4134 return KCFG_CHAR_ARR; 4135 default: 4136 return KCFG_UNKNOWN; 4137 } 4138 } 4139 4140 static int cmp_externs(const void *_a, const void *_b) 4141 { 4142 const struct extern_desc *a = _a; 4143 const struct extern_desc *b = _b; 4144 4145 if (a->type != b->type) 4146 return a->type < b->type ? -1 : 1; 4147 4148 if (a->type == EXT_KCFG) { 4149 /* descending order by alignment requirements */ 4150 if (a->kcfg.align != b->kcfg.align) 4151 return a->kcfg.align > b->kcfg.align ? -1 : 1; 4152 /* ascending order by size, within same alignment class */ 4153 if (a->kcfg.sz != b->kcfg.sz) 4154 return a->kcfg.sz < b->kcfg.sz ? -1 : 1; 4155 } 4156 4157 /* resolve ties by name */ 4158 return strcmp(a->name, b->name); 4159 } 4160 4161 static int find_int_btf_id(const struct btf *btf) 4162 { 4163 const struct btf_type *t; 4164 int i, n; 4165 4166 n = btf__type_cnt(btf); 4167 for (i = 1; i < n; i++) { 4168 t = btf__type_by_id(btf, i); 4169 4170 if (btf_is_int(t) && btf_int_bits(t) == 32) 4171 return i; 4172 } 4173 4174 return 0; 4175 } 4176 4177 static int add_dummy_ksym_var(struct btf *btf) 4178 { 4179 int i, int_btf_id, sec_btf_id, dummy_var_btf_id; 4180 const struct btf_var_secinfo *vs; 4181 const struct btf_type *sec; 4182 4183 if (!btf) 4184 return 0; 4185 4186 sec_btf_id = btf__find_by_name_kind(btf, KSYMS_SEC, 4187 BTF_KIND_DATASEC); 4188 if (sec_btf_id < 0) 4189 return 0; 4190 4191 sec = btf__type_by_id(btf, sec_btf_id); 4192 vs = btf_var_secinfos(sec); 4193 for (i = 0; i < btf_vlen(sec); i++, vs++) { 4194 const struct btf_type *vt; 4195 4196 vt = btf__type_by_id(btf, vs->type); 4197 if (btf_is_func(vt)) 4198 break; 4199 } 4200 4201 /* No func in ksyms sec. No need to add dummy var. */ 4202 if (i == btf_vlen(sec)) 4203 return 0; 4204 4205 int_btf_id = find_int_btf_id(btf); 4206 dummy_var_btf_id = btf__add_var(btf, 4207 "dummy_ksym", 4208 BTF_VAR_GLOBAL_ALLOCATED, 4209 int_btf_id); 4210 if (dummy_var_btf_id < 0) 4211 pr_warn("cannot create a dummy_ksym var\n"); 4212 4213 return dummy_var_btf_id; 4214 } 4215 4216 static int bpf_object__collect_externs(struct bpf_object *obj) 4217 { 4218 struct btf_type *sec, *kcfg_sec = NULL, *ksym_sec = NULL; 4219 const struct btf_type *t; 4220 struct extern_desc *ext; 4221 int i, n, off, dummy_var_btf_id; 4222 const char *ext_name, *sec_name; 4223 size_t ext_essent_len; 4224 Elf_Scn *scn; 4225 Elf64_Shdr *sh; 4226 4227 if (!obj->efile.symbols) 4228 return 0; 4229 4230 scn = elf_sec_by_idx(obj, obj->efile.symbols_shndx); 4231 sh = elf_sec_hdr(obj, scn); 4232 if (!sh || sh->sh_entsize != sizeof(Elf64_Sym)) 4233 return -LIBBPF_ERRNO__FORMAT; 4234 4235 dummy_var_btf_id = add_dummy_ksym_var(obj->btf); 4236 if (dummy_var_btf_id < 0) 4237 return dummy_var_btf_id; 4238 4239 n = sh->sh_size / sh->sh_entsize; 4240 pr_debug("looking for externs among %d symbols...\n", n); 4241 4242 for (i = 0; i < n; i++) { 4243 Elf64_Sym *sym = elf_sym_by_idx(obj, i); 4244 4245 if (!sym) 4246 return -LIBBPF_ERRNO__FORMAT; 4247 if (!sym_is_extern(sym)) 4248 continue; 4249 ext_name = elf_sym_str(obj, sym->st_name); 4250 if (!ext_name || !ext_name[0]) 4251 continue; 4252 4253 ext = obj->externs; 4254 ext = libbpf_reallocarray(ext, obj->nr_extern + 1, sizeof(*ext)); 4255 if (!ext) 4256 return -ENOMEM; 4257 obj->externs = ext; 4258 ext = &ext[obj->nr_extern]; 4259 memset(ext, 0, sizeof(*ext)); 4260 obj->nr_extern++; 4261 4262 ext->btf_id = find_extern_btf_id(obj->btf, ext_name); 4263 if (ext->btf_id <= 0) { 4264 pr_warn("failed to find BTF for extern '%s': %d\n", 4265 ext_name, ext->btf_id); 4266 return ext->btf_id; 4267 } 4268 t = btf__type_by_id(obj->btf, ext->btf_id); 4269 ext->name = btf__name_by_offset(obj->btf, t->name_off); 4270 ext->sym_idx = i; 4271 ext->is_weak = ELF64_ST_BIND(sym->st_info) == STB_WEAK; 4272 4273 ext_essent_len = bpf_core_essential_name_len(ext->name); 4274 ext->essent_name = NULL; 4275 if (ext_essent_len != strlen(ext->name)) { 4276 ext->essent_name = strndup(ext->name, ext_essent_len); 4277 if (!ext->essent_name) 4278 return -ENOMEM; 4279 } 4280 4281 ext->sec_btf_id = find_extern_sec_btf_id(obj->btf, ext->btf_id); 4282 if (ext->sec_btf_id <= 0) { 4283 pr_warn("failed to find BTF for extern '%s' [%d] section: %d\n", 4284 ext_name, ext->btf_id, ext->sec_btf_id); 4285 return ext->sec_btf_id; 4286 } 4287 sec = (void *)btf__type_by_id(obj->btf, ext->sec_btf_id); 4288 sec_name = btf__name_by_offset(obj->btf, sec->name_off); 4289 4290 if (strcmp(sec_name, KCONFIG_SEC) == 0) { 4291 if (btf_is_func(t)) { 4292 pr_warn("extern function %s is unsupported under %s section\n", 4293 ext->name, KCONFIG_SEC); 4294 return -ENOTSUP; 4295 } 4296 kcfg_sec = sec; 4297 ext->type = EXT_KCFG; 4298 ext->kcfg.sz = btf__resolve_size(obj->btf, t->type); 4299 if (ext->kcfg.sz <= 0) { 4300 pr_warn("failed to resolve size of extern (kcfg) '%s': %d\n", 4301 ext_name, ext->kcfg.sz); 4302 return ext->kcfg.sz; 4303 } 4304 ext->kcfg.align = btf__align_of(obj->btf, t->type); 4305 if (ext->kcfg.align <= 0) { 4306 pr_warn("failed to determine alignment of extern (kcfg) '%s': %d\n", 4307 ext_name, ext->kcfg.align); 4308 return -EINVAL; 4309 } 4310 ext->kcfg.type = find_kcfg_type(obj->btf, t->type, 4311 &ext->kcfg.is_signed); 4312 if (ext->kcfg.type == KCFG_UNKNOWN) { 4313 pr_warn("extern (kcfg) '%s': type is unsupported\n", ext_name); 4314 return -ENOTSUP; 4315 } 4316 } else if (strcmp(sec_name, KSYMS_SEC) == 0) { 4317 ksym_sec = sec; 4318 ext->type = EXT_KSYM; 4319 skip_mods_and_typedefs(obj->btf, t->type, 4320 &ext->ksym.type_id); 4321 } else { 4322 pr_warn("unrecognized extern section '%s'\n", sec_name); 4323 return -ENOTSUP; 4324 } 4325 } 4326 pr_debug("collected %d externs total\n", obj->nr_extern); 4327 4328 if (!obj->nr_extern) 4329 return 0; 4330 4331 /* sort externs by type, for kcfg ones also by (align, size, name) */ 4332 qsort(obj->externs, obj->nr_extern, sizeof(*ext), cmp_externs); 4333 4334 /* for .ksyms section, we need to turn all externs into allocated 4335 * variables in BTF to pass kernel verification; we do this by 4336 * pretending that each extern is a 8-byte variable 4337 */ 4338 if (ksym_sec) { 4339 /* find existing 4-byte integer type in BTF to use for fake 4340 * extern variables in DATASEC 4341 */ 4342 int int_btf_id = find_int_btf_id(obj->btf); 4343 /* For extern function, a dummy_var added earlier 4344 * will be used to replace the vs->type and 4345 * its name string will be used to refill 4346 * the missing param's name. 4347 */ 4348 const struct btf_type *dummy_var; 4349 4350 dummy_var = btf__type_by_id(obj->btf, dummy_var_btf_id); 4351 for (i = 0; i < obj->nr_extern; i++) { 4352 ext = &obj->externs[i]; 4353 if (ext->type != EXT_KSYM) 4354 continue; 4355 pr_debug("extern (ksym) #%d: symbol %d, name %s\n", 4356 i, ext->sym_idx, ext->name); 4357 } 4358 4359 sec = ksym_sec; 4360 n = btf_vlen(sec); 4361 for (i = 0, off = 0; i < n; i++, off += sizeof(int)) { 4362 struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i; 4363 struct btf_type *vt; 4364 4365 vt = (void *)btf__type_by_id(obj->btf, vs->type); 4366 ext_name = btf__name_by_offset(obj->btf, vt->name_off); 4367 ext = find_extern_by_name(obj, ext_name); 4368 if (!ext) { 4369 pr_warn("failed to find extern definition for BTF %s '%s'\n", 4370 btf_kind_str(vt), ext_name); 4371 return -ESRCH; 4372 } 4373 if (btf_is_func(vt)) { 4374 const struct btf_type *func_proto; 4375 struct btf_param *param; 4376 int j; 4377 4378 func_proto = btf__type_by_id(obj->btf, 4379 vt->type); 4380 param = btf_params(func_proto); 4381 /* Reuse the dummy_var string if the 4382 * func proto does not have param name. 4383 */ 4384 for (j = 0; j < btf_vlen(func_proto); j++) 4385 if (param[j].type && !param[j].name_off) 4386 param[j].name_off = 4387 dummy_var->name_off; 4388 vs->type = dummy_var_btf_id; 4389 vt->info &= ~0xffff; 4390 vt->info |= BTF_FUNC_GLOBAL; 4391 } else { 4392 btf_var(vt)->linkage = BTF_VAR_GLOBAL_ALLOCATED; 4393 vt->type = int_btf_id; 4394 } 4395 vs->offset = off; 4396 vs->size = sizeof(int); 4397 } 4398 sec->size = off; 4399 } 4400 4401 if (kcfg_sec) { 4402 sec = kcfg_sec; 4403 /* for kcfg externs calculate their offsets within a .kconfig map */ 4404 off = 0; 4405 for (i = 0; i < obj->nr_extern; i++) { 4406 ext = &obj->externs[i]; 4407 if (ext->type != EXT_KCFG) 4408 continue; 4409 4410 ext->kcfg.data_off = roundup(off, ext->kcfg.align); 4411 off = ext->kcfg.data_off + ext->kcfg.sz; 4412 pr_debug("extern (kcfg) #%d: symbol %d, off %u, name %s\n", 4413 i, ext->sym_idx, ext->kcfg.data_off, ext->name); 4414 } 4415 sec->size = off; 4416 n = btf_vlen(sec); 4417 for (i = 0; i < n; i++) { 4418 struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i; 4419 4420 t = btf__type_by_id(obj->btf, vs->type); 4421 ext_name = btf__name_by_offset(obj->btf, t->name_off); 4422 ext = find_extern_by_name(obj, ext_name); 4423 if (!ext) { 4424 pr_warn("failed to find extern definition for BTF var '%s'\n", 4425 ext_name); 4426 return -ESRCH; 4427 } 4428 btf_var(t)->linkage = BTF_VAR_GLOBAL_ALLOCATED; 4429 vs->offset = ext->kcfg.data_off; 4430 } 4431 } 4432 return 0; 4433 } 4434 4435 static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog) 4436 { 4437 return prog->sec_idx == obj->efile.text_shndx; 4438 } 4439 4440 struct bpf_program * 4441 bpf_object__find_program_by_name(const struct bpf_object *obj, 4442 const char *name) 4443 { 4444 struct bpf_program *prog; 4445 4446 bpf_object__for_each_program(prog, obj) { 4447 if (prog_is_subprog(obj, prog)) 4448 continue; 4449 if (!strcmp(prog->name, name)) 4450 return prog; 4451 } 4452 return errno = ENOENT, NULL; 4453 } 4454 4455 static bool bpf_object__shndx_is_data(const struct bpf_object *obj, 4456 int shndx) 4457 { 4458 switch (obj->efile.secs[shndx].sec_type) { 4459 case SEC_BSS: 4460 case SEC_DATA: 4461 case SEC_RODATA: 4462 return true; 4463 default: 4464 return false; 4465 } 4466 } 4467 4468 static bool bpf_object__shndx_is_maps(const struct bpf_object *obj, 4469 int shndx) 4470 { 4471 return shndx == obj->efile.btf_maps_shndx; 4472 } 4473 4474 static enum libbpf_map_type 4475 bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx) 4476 { 4477 if (shndx == obj->efile.symbols_shndx) 4478 return LIBBPF_MAP_KCONFIG; 4479 4480 switch (obj->efile.secs[shndx].sec_type) { 4481 case SEC_BSS: 4482 return LIBBPF_MAP_BSS; 4483 case SEC_DATA: 4484 return LIBBPF_MAP_DATA; 4485 case SEC_RODATA: 4486 return LIBBPF_MAP_RODATA; 4487 default: 4488 return LIBBPF_MAP_UNSPEC; 4489 } 4490 } 4491 4492 static int bpf_program__record_reloc(struct bpf_program *prog, 4493 struct reloc_desc *reloc_desc, 4494 __u32 insn_idx, const char *sym_name, 4495 const Elf64_Sym *sym, const Elf64_Rel *rel) 4496 { 4497 struct bpf_insn *insn = &prog->insns[insn_idx]; 4498 size_t map_idx, nr_maps = prog->obj->nr_maps; 4499 struct bpf_object *obj = prog->obj; 4500 __u32 shdr_idx = sym->st_shndx; 4501 enum libbpf_map_type type; 4502 const char *sym_sec_name; 4503 struct bpf_map *map; 4504 4505 if (!is_call_insn(insn) && !is_ldimm64_insn(insn)) { 4506 pr_warn("prog '%s': invalid relo against '%s' for insns[%d].code 0x%x\n", 4507 prog->name, sym_name, insn_idx, insn->code); 4508 return -LIBBPF_ERRNO__RELOC; 4509 } 4510 4511 if (sym_is_extern(sym)) { 4512 int sym_idx = ELF64_R_SYM(rel->r_info); 4513 int i, n = obj->nr_extern; 4514 struct extern_desc *ext; 4515 4516 for (i = 0; i < n; i++) { 4517 ext = &obj->externs[i]; 4518 if (ext->sym_idx == sym_idx) 4519 break; 4520 } 4521 if (i >= n) { 4522 pr_warn("prog '%s': extern relo failed to find extern for '%s' (%d)\n", 4523 prog->name, sym_name, sym_idx); 4524 return -LIBBPF_ERRNO__RELOC; 4525 } 4526 pr_debug("prog '%s': found extern #%d '%s' (sym %d) for insn #%u\n", 4527 prog->name, i, ext->name, ext->sym_idx, insn_idx); 4528 if (insn->code == (BPF_JMP | BPF_CALL)) 4529 reloc_desc->type = RELO_EXTERN_CALL; 4530 else 4531 reloc_desc->type = RELO_EXTERN_LD64; 4532 reloc_desc->insn_idx = insn_idx; 4533 reloc_desc->ext_idx = i; 4534 return 0; 4535 } 4536 4537 /* sub-program call relocation */ 4538 if (is_call_insn(insn)) { 4539 if (insn->src_reg != BPF_PSEUDO_CALL) { 4540 pr_warn("prog '%s': incorrect bpf_call opcode\n", prog->name); 4541 return -LIBBPF_ERRNO__RELOC; 4542 } 4543 /* text_shndx can be 0, if no default "main" program exists */ 4544 if (!shdr_idx || shdr_idx != obj->efile.text_shndx) { 4545 sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx)); 4546 pr_warn("prog '%s': bad call relo against '%s' in section '%s'\n", 4547 prog->name, sym_name, sym_sec_name); 4548 return -LIBBPF_ERRNO__RELOC; 4549 } 4550 if (sym->st_value % BPF_INSN_SZ) { 4551 pr_warn("prog '%s': bad call relo against '%s' at offset %zu\n", 4552 prog->name, sym_name, (size_t)sym->st_value); 4553 return -LIBBPF_ERRNO__RELOC; 4554 } 4555 reloc_desc->type = RELO_CALL; 4556 reloc_desc->insn_idx = insn_idx; 4557 reloc_desc->sym_off = sym->st_value; 4558 return 0; 4559 } 4560 4561 if (!shdr_idx || shdr_idx >= SHN_LORESERVE) { 4562 pr_warn("prog '%s': invalid relo against '%s' in special section 0x%x; forgot to initialize global var?..\n", 4563 prog->name, sym_name, shdr_idx); 4564 return -LIBBPF_ERRNO__RELOC; 4565 } 4566 4567 /* loading subprog addresses */ 4568 if (sym_is_subprog(sym, obj->efile.text_shndx)) { 4569 /* global_func: sym->st_value = offset in the section, insn->imm = 0. 4570 * local_func: sym->st_value = 0, insn->imm = offset in the section. 4571 */ 4572 if ((sym->st_value % BPF_INSN_SZ) || (insn->imm % BPF_INSN_SZ)) { 4573 pr_warn("prog '%s': bad subprog addr relo against '%s' at offset %zu+%d\n", 4574 prog->name, sym_name, (size_t)sym->st_value, insn->imm); 4575 return -LIBBPF_ERRNO__RELOC; 4576 } 4577 4578 reloc_desc->type = RELO_SUBPROG_ADDR; 4579 reloc_desc->insn_idx = insn_idx; 4580 reloc_desc->sym_off = sym->st_value; 4581 return 0; 4582 } 4583 4584 type = bpf_object__section_to_libbpf_map_type(obj, shdr_idx); 4585 sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx)); 4586 4587 /* arena data relocation */ 4588 if (shdr_idx == obj->efile.arena_data_shndx) { 4589 reloc_desc->type = RELO_DATA; 4590 reloc_desc->insn_idx = insn_idx; 4591 reloc_desc->map_idx = obj->arena_map - obj->maps; 4592 reloc_desc->sym_off = sym->st_value; 4593 return 0; 4594 } 4595 4596 /* generic map reference relocation */ 4597 if (type == LIBBPF_MAP_UNSPEC) { 4598 if (!bpf_object__shndx_is_maps(obj, shdr_idx)) { 4599 pr_warn("prog '%s': bad map relo against '%s' in section '%s'\n", 4600 prog->name, sym_name, sym_sec_name); 4601 return -LIBBPF_ERRNO__RELOC; 4602 } 4603 for (map_idx = 0; map_idx < nr_maps; map_idx++) { 4604 map = &obj->maps[map_idx]; 4605 if (map->libbpf_type != type || 4606 map->sec_idx != sym->st_shndx || 4607 map->sec_offset != sym->st_value) 4608 continue; 4609 pr_debug("prog '%s': found map %zd (%s, sec %d, off %zu) for insn #%u\n", 4610 prog->name, map_idx, map->name, map->sec_idx, 4611 map->sec_offset, insn_idx); 4612 break; 4613 } 4614 if (map_idx >= nr_maps) { 4615 pr_warn("prog '%s': map relo failed to find map for section '%s', off %zu\n", 4616 prog->name, sym_sec_name, (size_t)sym->st_value); 4617 return -LIBBPF_ERRNO__RELOC; 4618 } 4619 reloc_desc->type = RELO_LD64; 4620 reloc_desc->insn_idx = insn_idx; 4621 reloc_desc->map_idx = map_idx; 4622 reloc_desc->sym_off = 0; /* sym->st_value determines map_idx */ 4623 return 0; 4624 } 4625 4626 /* global data map relocation */ 4627 if (!bpf_object__shndx_is_data(obj, shdr_idx)) { 4628 pr_warn("prog '%s': bad data relo against section '%s'\n", 4629 prog->name, sym_sec_name); 4630 return -LIBBPF_ERRNO__RELOC; 4631 } 4632 for (map_idx = 0; map_idx < nr_maps; map_idx++) { 4633 map = &obj->maps[map_idx]; 4634 if (map->libbpf_type != type || map->sec_idx != sym->st_shndx) 4635 continue; 4636 pr_debug("prog '%s': found data map %zd (%s, sec %d, off %zu) for insn %u\n", 4637 prog->name, map_idx, map->name, map->sec_idx, 4638 map->sec_offset, insn_idx); 4639 break; 4640 } 4641 if (map_idx >= nr_maps) { 4642 pr_warn("prog '%s': data relo failed to find map for section '%s'\n", 4643 prog->name, sym_sec_name); 4644 return -LIBBPF_ERRNO__RELOC; 4645 } 4646 4647 reloc_desc->type = RELO_DATA; 4648 reloc_desc->insn_idx = insn_idx; 4649 reloc_desc->map_idx = map_idx; 4650 reloc_desc->sym_off = sym->st_value; 4651 return 0; 4652 } 4653 4654 static bool prog_contains_insn(const struct bpf_program *prog, size_t insn_idx) 4655 { 4656 return insn_idx >= prog->sec_insn_off && 4657 insn_idx < prog->sec_insn_off + prog->sec_insn_cnt; 4658 } 4659 4660 static struct bpf_program *find_prog_by_sec_insn(const struct bpf_object *obj, 4661 size_t sec_idx, size_t insn_idx) 4662 { 4663 int l = 0, r = obj->nr_programs - 1, m; 4664 struct bpf_program *prog; 4665 4666 if (!obj->nr_programs) 4667 return NULL; 4668 4669 while (l < r) { 4670 m = l + (r - l + 1) / 2; 4671 prog = &obj->programs[m]; 4672 4673 if (prog->sec_idx < sec_idx || 4674 (prog->sec_idx == sec_idx && prog->sec_insn_off <= insn_idx)) 4675 l = m; 4676 else 4677 r = m - 1; 4678 } 4679 /* matching program could be at index l, but it still might be the 4680 * wrong one, so we need to double check conditions for the last time 4681 */ 4682 prog = &obj->programs[l]; 4683 if (prog->sec_idx == sec_idx && prog_contains_insn(prog, insn_idx)) 4684 return prog; 4685 return NULL; 4686 } 4687 4688 static int 4689 bpf_object__collect_prog_relos(struct bpf_object *obj, Elf64_Shdr *shdr, Elf_Data *data) 4690 { 4691 const char *relo_sec_name, *sec_name; 4692 size_t sec_idx = shdr->sh_info, sym_idx; 4693 struct bpf_program *prog; 4694 struct reloc_desc *relos; 4695 int err, i, nrels; 4696 const char *sym_name; 4697 __u32 insn_idx; 4698 Elf_Scn *scn; 4699 Elf_Data *scn_data; 4700 Elf64_Sym *sym; 4701 Elf64_Rel *rel; 4702 4703 if (sec_idx >= obj->efile.sec_cnt) 4704 return -EINVAL; 4705 4706 scn = elf_sec_by_idx(obj, sec_idx); 4707 scn_data = elf_sec_data(obj, scn); 4708 if (!scn_data) 4709 return -LIBBPF_ERRNO__FORMAT; 4710 4711 relo_sec_name = elf_sec_str(obj, shdr->sh_name); 4712 sec_name = elf_sec_name(obj, scn); 4713 if (!relo_sec_name || !sec_name) 4714 return -EINVAL; 4715 4716 pr_debug("sec '%s': collecting relocation for section(%zu) '%s'\n", 4717 relo_sec_name, sec_idx, sec_name); 4718 nrels = shdr->sh_size / shdr->sh_entsize; 4719 4720 for (i = 0; i < nrels; i++) { 4721 rel = elf_rel_by_idx(data, i); 4722 if (!rel) { 4723 pr_warn("sec '%s': failed to get relo #%d\n", relo_sec_name, i); 4724 return -LIBBPF_ERRNO__FORMAT; 4725 } 4726 4727 sym_idx = ELF64_R_SYM(rel->r_info); 4728 sym = elf_sym_by_idx(obj, sym_idx); 4729 if (!sym) { 4730 pr_warn("sec '%s': symbol #%zu not found for relo #%d\n", 4731 relo_sec_name, sym_idx, i); 4732 return -LIBBPF_ERRNO__FORMAT; 4733 } 4734 4735 if (sym->st_shndx >= obj->efile.sec_cnt) { 4736 pr_warn("sec '%s': corrupted symbol #%zu pointing to invalid section #%zu for relo #%d\n", 4737 relo_sec_name, sym_idx, (size_t)sym->st_shndx, i); 4738 return -LIBBPF_ERRNO__FORMAT; 4739 } 4740 4741 if (rel->r_offset % BPF_INSN_SZ || rel->r_offset >= scn_data->d_size) { 4742 pr_warn("sec '%s': invalid offset 0x%zx for relo #%d\n", 4743 relo_sec_name, (size_t)rel->r_offset, i); 4744 return -LIBBPF_ERRNO__FORMAT; 4745 } 4746 4747 insn_idx = rel->r_offset / BPF_INSN_SZ; 4748 /* relocations against static functions are recorded as 4749 * relocations against the section that contains a function; 4750 * in such case, symbol will be STT_SECTION and sym.st_name 4751 * will point to empty string (0), so fetch section name 4752 * instead 4753 */ 4754 if (ELF64_ST_TYPE(sym->st_info) == STT_SECTION && sym->st_name == 0) 4755 sym_name = elf_sec_name(obj, elf_sec_by_idx(obj, sym->st_shndx)); 4756 else 4757 sym_name = elf_sym_str(obj, sym->st_name); 4758 sym_name = sym_name ?: "<?"; 4759 4760 pr_debug("sec '%s': relo #%d: insn #%u against '%s'\n", 4761 relo_sec_name, i, insn_idx, sym_name); 4762 4763 prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx); 4764 if (!prog) { 4765 pr_debug("sec '%s': relo #%d: couldn't find program in section '%s' for insn #%u, probably overridden weak function, skipping...\n", 4766 relo_sec_name, i, sec_name, insn_idx); 4767 continue; 4768 } 4769 4770 relos = libbpf_reallocarray(prog->reloc_desc, 4771 prog->nr_reloc + 1, sizeof(*relos)); 4772 if (!relos) 4773 return -ENOMEM; 4774 prog->reloc_desc = relos; 4775 4776 /* adjust insn_idx to local BPF program frame of reference */ 4777 insn_idx -= prog->sec_insn_off; 4778 err = bpf_program__record_reloc(prog, &relos[prog->nr_reloc], 4779 insn_idx, sym_name, sym, rel); 4780 if (err) 4781 return err; 4782 4783 prog->nr_reloc++; 4784 } 4785 return 0; 4786 } 4787 4788 static int map_fill_btf_type_info(struct bpf_object *obj, struct bpf_map *map) 4789 { 4790 int id; 4791 4792 if (!obj->btf) 4793 return -ENOENT; 4794 4795 /* if it's BTF-defined map, we don't need to search for type IDs. 4796 * For struct_ops map, it does not need btf_key_type_id and 4797 * btf_value_type_id. 4798 */ 4799 if (map->sec_idx == obj->efile.btf_maps_shndx || bpf_map__is_struct_ops(map)) 4800 return 0; 4801 4802 /* 4803 * LLVM annotates global data differently in BTF, that is, 4804 * only as '.data', '.bss' or '.rodata'. 4805 */ 4806 if (!bpf_map__is_internal(map)) 4807 return -ENOENT; 4808 4809 id = btf__find_by_name(obj->btf, map->real_name); 4810 if (id < 0) 4811 return id; 4812 4813 map->btf_key_type_id = 0; 4814 map->btf_value_type_id = id; 4815 return 0; 4816 } 4817 4818 static int bpf_get_map_info_from_fdinfo(int fd, struct bpf_map_info *info) 4819 { 4820 char file[PATH_MAX], buff[4096]; 4821 FILE *fp; 4822 __u32 val; 4823 int err; 4824 4825 snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd); 4826 memset(info, 0, sizeof(*info)); 4827 4828 fp = fopen(file, "re"); 4829 if (!fp) { 4830 err = -errno; 4831 pr_warn("failed to open %s: %s. No procfs support?\n", file, 4832 errstr(err)); 4833 return err; 4834 } 4835 4836 while (fgets(buff, sizeof(buff), fp)) { 4837 if (sscanf(buff, "map_type:\t%u", &val) == 1) 4838 info->type = val; 4839 else if (sscanf(buff, "key_size:\t%u", &val) == 1) 4840 info->key_size = val; 4841 else if (sscanf(buff, "value_size:\t%u", &val) == 1) 4842 info->value_size = val; 4843 else if (sscanf(buff, "max_entries:\t%u", &val) == 1) 4844 info->max_entries = val; 4845 else if (sscanf(buff, "map_flags:\t%i", &val) == 1) 4846 info->map_flags = val; 4847 } 4848 4849 fclose(fp); 4850 4851 return 0; 4852 } 4853 4854 static bool map_is_created(const struct bpf_map *map) 4855 { 4856 return map->obj->state >= OBJ_PREPARED || map->reused; 4857 } 4858 4859 bool bpf_map__autocreate(const struct bpf_map *map) 4860 { 4861 return map->autocreate; 4862 } 4863 4864 int bpf_map__set_autocreate(struct bpf_map *map, bool autocreate) 4865 { 4866 if (map_is_created(map)) 4867 return libbpf_err(-EBUSY); 4868 4869 map->autocreate = autocreate; 4870 return 0; 4871 } 4872 4873 int bpf_map__set_autoattach(struct bpf_map *map, bool autoattach) 4874 { 4875 if (!bpf_map__is_struct_ops(map)) 4876 return libbpf_err(-EINVAL); 4877 4878 map->autoattach = autoattach; 4879 return 0; 4880 } 4881 4882 bool bpf_map__autoattach(const struct bpf_map *map) 4883 { 4884 return map->autoattach; 4885 } 4886 4887 int bpf_map__reuse_fd(struct bpf_map *map, int fd) 4888 { 4889 struct bpf_map_info info; 4890 __u32 len = sizeof(info), name_len; 4891 int new_fd, err; 4892 char *new_name; 4893 4894 memset(&info, 0, len); 4895 err = bpf_map_get_info_by_fd(fd, &info, &len); 4896 if (err && errno == EINVAL) 4897 err = bpf_get_map_info_from_fdinfo(fd, &info); 4898 if (err) 4899 return libbpf_err(err); 4900 4901 name_len = strlen(info.name); 4902 if (name_len == BPF_OBJ_NAME_LEN - 1 && strncmp(map->name, info.name, name_len) == 0) 4903 new_name = strdup(map->name); 4904 else 4905 new_name = strdup(info.name); 4906 4907 if (!new_name) 4908 return libbpf_err(-errno); 4909 4910 /* 4911 * Like dup(), but make sure new FD is >= 3 and has O_CLOEXEC set. 4912 * This is similar to what we do in ensure_good_fd(), but without 4913 * closing original FD. 4914 */ 4915 new_fd = fcntl(fd, F_DUPFD_CLOEXEC, 3); 4916 if (new_fd < 0) { 4917 err = -errno; 4918 goto err_free_new_name; 4919 } 4920 4921 err = reuse_fd(map->fd, new_fd); 4922 if (err) 4923 goto err_free_new_name; 4924 4925 free(map->name); 4926 4927 map->name = new_name; 4928 map->def.type = info.type; 4929 map->def.key_size = info.key_size; 4930 map->def.value_size = info.value_size; 4931 map->def.max_entries = info.max_entries; 4932 map->def.map_flags = info.map_flags; 4933 map->btf_key_type_id = info.btf_key_type_id; 4934 map->btf_value_type_id = info.btf_value_type_id; 4935 map->reused = true; 4936 map->map_extra = info.map_extra; 4937 4938 return 0; 4939 4940 err_free_new_name: 4941 free(new_name); 4942 return libbpf_err(err); 4943 } 4944 4945 __u32 bpf_map__max_entries(const struct bpf_map *map) 4946 { 4947 return map->def.max_entries; 4948 } 4949 4950 struct bpf_map *bpf_map__inner_map(struct bpf_map *map) 4951 { 4952 if (!bpf_map_type__is_map_in_map(map->def.type)) 4953 return errno = EINVAL, NULL; 4954 4955 return map->inner_map; 4956 } 4957 4958 int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries) 4959 { 4960 if (map_is_created(map)) 4961 return libbpf_err(-EBUSY); 4962 4963 map->def.max_entries = max_entries; 4964 4965 /* auto-adjust BPF ringbuf map max_entries to be a multiple of page size */ 4966 if (map_is_ringbuf(map)) 4967 map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries); 4968 4969 return 0; 4970 } 4971 4972 static int bpf_object_prepare_token(struct bpf_object *obj) 4973 { 4974 const char *bpffs_path; 4975 int bpffs_fd = -1, token_fd, err; 4976 bool mandatory; 4977 enum libbpf_print_level level; 4978 4979 /* token is explicitly prevented */ 4980 if (obj->token_path && obj->token_path[0] == '\0') { 4981 pr_debug("object '%s': token is prevented, skipping...\n", obj->name); 4982 return 0; 4983 } 4984 4985 mandatory = obj->token_path != NULL; 4986 level = mandatory ? LIBBPF_WARN : LIBBPF_DEBUG; 4987 4988 bpffs_path = obj->token_path ?: BPF_FS_DEFAULT_PATH; 4989 bpffs_fd = open(bpffs_path, O_DIRECTORY, O_RDWR); 4990 if (bpffs_fd < 0) { 4991 err = -errno; 4992 __pr(level, "object '%s': failed (%s) to open BPF FS mount at '%s'%s\n", 4993 obj->name, errstr(err), bpffs_path, 4994 mandatory ? "" : ", skipping optional step..."); 4995 return mandatory ? err : 0; 4996 } 4997 4998 token_fd = bpf_token_create(bpffs_fd, 0); 4999 close(bpffs_fd); 5000 if (token_fd < 0) { 5001 if (!mandatory && token_fd == -ENOENT) { 5002 pr_debug("object '%s': BPF FS at '%s' doesn't have BPF token delegation set up, skipping...\n", 5003 obj->name, bpffs_path); 5004 return 0; 5005 } 5006 __pr(level, "object '%s': failed (%d) to create BPF token from '%s'%s\n", 5007 obj->name, token_fd, bpffs_path, 5008 mandatory ? "" : ", skipping optional step..."); 5009 return mandatory ? token_fd : 0; 5010 } 5011 5012 obj->feat_cache = calloc(1, sizeof(*obj->feat_cache)); 5013 if (!obj->feat_cache) { 5014 close(token_fd); 5015 return -ENOMEM; 5016 } 5017 5018 obj->token_fd = token_fd; 5019 obj->feat_cache->token_fd = token_fd; 5020 5021 return 0; 5022 } 5023 5024 static int 5025 bpf_object__probe_loading(struct bpf_object *obj) 5026 { 5027 struct bpf_insn insns[] = { 5028 BPF_MOV64_IMM(BPF_REG_0, 0), 5029 BPF_EXIT_INSN(), 5030 }; 5031 int ret, insn_cnt = ARRAY_SIZE(insns); 5032 LIBBPF_OPTS(bpf_prog_load_opts, opts, 5033 .token_fd = obj->token_fd, 5034 .prog_flags = obj->token_fd ? BPF_F_TOKEN_FD : 0, 5035 ); 5036 5037 if (obj->gen_loader) 5038 return 0; 5039 5040 ret = bump_rlimit_memlock(); 5041 if (ret) 5042 pr_warn("Failed to bump RLIMIT_MEMLOCK (err = %s), you might need to do it explicitly!\n", 5043 errstr(ret)); 5044 5045 /* make sure basic loading works */ 5046 ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, &opts); 5047 if (ret < 0) 5048 ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts); 5049 if (ret < 0) { 5050 ret = errno; 5051 pr_warn("Error in %s(): %s. Couldn't load trivial BPF program. Make sure your kernel supports BPF (CONFIG_BPF_SYSCALL=y) and/or that RLIMIT_MEMLOCK is set to big enough value.\n", 5052 __func__, errstr(ret)); 5053 return -ret; 5054 } 5055 close(ret); 5056 5057 return 0; 5058 } 5059 5060 bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id) 5061 { 5062 if (obj->gen_loader) 5063 /* To generate loader program assume the latest kernel 5064 * to avoid doing extra prog_load, map_create syscalls. 5065 */ 5066 return true; 5067 5068 if (obj->token_fd) 5069 return feat_supported(obj->feat_cache, feat_id); 5070 5071 return feat_supported(NULL, feat_id); 5072 } 5073 5074 static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd) 5075 { 5076 struct bpf_map_info map_info; 5077 __u32 map_info_len = sizeof(map_info); 5078 int err; 5079 5080 memset(&map_info, 0, map_info_len); 5081 err = bpf_map_get_info_by_fd(map_fd, &map_info, &map_info_len); 5082 if (err && errno == EINVAL) 5083 err = bpf_get_map_info_from_fdinfo(map_fd, &map_info); 5084 if (err) { 5085 pr_warn("failed to get map info for map FD %d: %s\n", map_fd, 5086 errstr(err)); 5087 return false; 5088 } 5089 5090 return (map_info.type == map->def.type && 5091 map_info.key_size == map->def.key_size && 5092 map_info.value_size == map->def.value_size && 5093 map_info.max_entries == map->def.max_entries && 5094 map_info.map_flags == map->def.map_flags && 5095 map_info.map_extra == map->map_extra); 5096 } 5097 5098 static int 5099 bpf_object__reuse_map(struct bpf_map *map) 5100 { 5101 int err, pin_fd; 5102 5103 pin_fd = bpf_obj_get(map->pin_path); 5104 if (pin_fd < 0) { 5105 err = -errno; 5106 if (err == -ENOENT) { 5107 pr_debug("found no pinned map to reuse at '%s'\n", 5108 map->pin_path); 5109 return 0; 5110 } 5111 5112 pr_warn("couldn't retrieve pinned map '%s': %s\n", 5113 map->pin_path, errstr(err)); 5114 return err; 5115 } 5116 5117 if (!map_is_reuse_compat(map, pin_fd)) { 5118 pr_warn("couldn't reuse pinned map at '%s': parameter mismatch\n", 5119 map->pin_path); 5120 close(pin_fd); 5121 return -EINVAL; 5122 } 5123 5124 err = bpf_map__reuse_fd(map, pin_fd); 5125 close(pin_fd); 5126 if (err) 5127 return err; 5128 5129 map->pinned = true; 5130 pr_debug("reused pinned map at '%s'\n", map->pin_path); 5131 5132 return 0; 5133 } 5134 5135 static int 5136 bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map) 5137 { 5138 enum libbpf_map_type map_type = map->libbpf_type; 5139 int err, zero = 0; 5140 size_t mmap_sz; 5141 5142 if (obj->gen_loader) { 5143 bpf_gen__map_update_elem(obj->gen_loader, map - obj->maps, 5144 map->mmaped, map->def.value_size); 5145 if (map_type == LIBBPF_MAP_RODATA || map_type == LIBBPF_MAP_KCONFIG) 5146 bpf_gen__map_freeze(obj->gen_loader, map - obj->maps); 5147 return 0; 5148 } 5149 5150 err = bpf_map_update_elem(map->fd, &zero, map->mmaped, 0); 5151 if (err) { 5152 err = -errno; 5153 pr_warn("map '%s': failed to set initial contents: %s\n", 5154 bpf_map__name(map), errstr(err)); 5155 return err; 5156 } 5157 5158 /* Freeze .rodata and .kconfig map as read-only from syscall side. */ 5159 if (map_type == LIBBPF_MAP_RODATA || map_type == LIBBPF_MAP_KCONFIG) { 5160 err = bpf_map_freeze(map->fd); 5161 if (err) { 5162 err = -errno; 5163 pr_warn("map '%s': failed to freeze as read-only: %s\n", 5164 bpf_map__name(map), errstr(err)); 5165 return err; 5166 } 5167 } 5168 5169 /* Remap anonymous mmap()-ed "map initialization image" as 5170 * a BPF map-backed mmap()-ed memory, but preserving the same 5171 * memory address. This will cause kernel to change process' 5172 * page table to point to a different piece of kernel memory, 5173 * but from userspace point of view memory address (and its 5174 * contents, being identical at this point) will stay the 5175 * same. This mapping will be released by bpf_object__close() 5176 * as per normal clean up procedure. 5177 */ 5178 mmap_sz = bpf_map_mmap_sz(map); 5179 if (map->def.map_flags & BPF_F_MMAPABLE) { 5180 void *mmaped; 5181 int prot; 5182 5183 if (map->def.map_flags & BPF_F_RDONLY_PROG) 5184 prot = PROT_READ; 5185 else 5186 prot = PROT_READ | PROT_WRITE; 5187 mmaped = mmap(map->mmaped, mmap_sz, prot, MAP_SHARED | MAP_FIXED, map->fd, 0); 5188 if (mmaped == MAP_FAILED) { 5189 err = -errno; 5190 pr_warn("map '%s': failed to re-mmap() contents: %s\n", 5191 bpf_map__name(map), errstr(err)); 5192 return err; 5193 } 5194 map->mmaped = mmaped; 5195 } else if (map->mmaped) { 5196 munmap(map->mmaped, mmap_sz); 5197 map->mmaped = NULL; 5198 } 5199 5200 return 0; 5201 } 5202 5203 static void bpf_map__destroy(struct bpf_map *map); 5204 5205 static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, bool is_inner) 5206 { 5207 LIBBPF_OPTS(bpf_map_create_opts, create_attr); 5208 struct bpf_map_def *def = &map->def; 5209 const char *map_name = NULL; 5210 int err = 0, map_fd; 5211 5212 if (kernel_supports(obj, FEAT_PROG_NAME)) 5213 map_name = map->name; 5214 create_attr.map_ifindex = map->map_ifindex; 5215 create_attr.map_flags = def->map_flags; 5216 create_attr.numa_node = map->numa_node; 5217 create_attr.map_extra = map->map_extra; 5218 create_attr.token_fd = obj->token_fd; 5219 if (obj->token_fd) 5220 create_attr.map_flags |= BPF_F_TOKEN_FD; 5221 5222 if (bpf_map__is_struct_ops(map)) { 5223 create_attr.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id; 5224 if (map->mod_btf_fd >= 0) { 5225 create_attr.value_type_btf_obj_fd = map->mod_btf_fd; 5226 create_attr.map_flags |= BPF_F_VTYPE_BTF_OBJ_FD; 5227 } 5228 } 5229 5230 if (obj->btf && btf__fd(obj->btf) >= 0) { 5231 create_attr.btf_fd = btf__fd(obj->btf); 5232 create_attr.btf_key_type_id = map->btf_key_type_id; 5233 create_attr.btf_value_type_id = map->btf_value_type_id; 5234 } 5235 5236 if (bpf_map_type__is_map_in_map(def->type)) { 5237 if (map->inner_map) { 5238 err = map_set_def_max_entries(map->inner_map); 5239 if (err) 5240 return err; 5241 err = bpf_object__create_map(obj, map->inner_map, true); 5242 if (err) { 5243 pr_warn("map '%s': failed to create inner map: %s\n", 5244 map->name, errstr(err)); 5245 return err; 5246 } 5247 map->inner_map_fd = map->inner_map->fd; 5248 } 5249 if (map->inner_map_fd >= 0) 5250 create_attr.inner_map_fd = map->inner_map_fd; 5251 } 5252 5253 switch (def->type) { 5254 case BPF_MAP_TYPE_PERF_EVENT_ARRAY: 5255 case BPF_MAP_TYPE_CGROUP_ARRAY: 5256 case BPF_MAP_TYPE_STACK_TRACE: 5257 case BPF_MAP_TYPE_ARRAY_OF_MAPS: 5258 case BPF_MAP_TYPE_HASH_OF_MAPS: 5259 case BPF_MAP_TYPE_DEVMAP: 5260 case BPF_MAP_TYPE_DEVMAP_HASH: 5261 case BPF_MAP_TYPE_CPUMAP: 5262 case BPF_MAP_TYPE_XSKMAP: 5263 case BPF_MAP_TYPE_SOCKMAP: 5264 case BPF_MAP_TYPE_SOCKHASH: 5265 case BPF_MAP_TYPE_QUEUE: 5266 case BPF_MAP_TYPE_STACK: 5267 case BPF_MAP_TYPE_ARENA: 5268 create_attr.btf_fd = 0; 5269 create_attr.btf_key_type_id = 0; 5270 create_attr.btf_value_type_id = 0; 5271 map->btf_key_type_id = 0; 5272 map->btf_value_type_id = 0; 5273 break; 5274 case BPF_MAP_TYPE_STRUCT_OPS: 5275 create_attr.btf_value_type_id = 0; 5276 break; 5277 default: 5278 break; 5279 } 5280 5281 if (obj->gen_loader) { 5282 bpf_gen__map_create(obj->gen_loader, def->type, map_name, 5283 def->key_size, def->value_size, def->max_entries, 5284 &create_attr, is_inner ? -1 : map - obj->maps); 5285 /* We keep pretenting we have valid FD to pass various fd >= 0 5286 * checks by just keeping original placeholder FDs in place. 5287 * See bpf_object__add_map() comment. 5288 * This placeholder fd will not be used with any syscall and 5289 * will be reset to -1 eventually. 5290 */ 5291 map_fd = map->fd; 5292 } else { 5293 map_fd = bpf_map_create(def->type, map_name, 5294 def->key_size, def->value_size, 5295 def->max_entries, &create_attr); 5296 } 5297 if (map_fd < 0 && (create_attr.btf_key_type_id || create_attr.btf_value_type_id)) { 5298 err = -errno; 5299 pr_warn("Error in bpf_create_map_xattr(%s): %s. Retrying without BTF.\n", 5300 map->name, errstr(err)); 5301 create_attr.btf_fd = 0; 5302 create_attr.btf_key_type_id = 0; 5303 create_attr.btf_value_type_id = 0; 5304 map->btf_key_type_id = 0; 5305 map->btf_value_type_id = 0; 5306 map_fd = bpf_map_create(def->type, map_name, 5307 def->key_size, def->value_size, 5308 def->max_entries, &create_attr); 5309 } 5310 5311 if (bpf_map_type__is_map_in_map(def->type) && map->inner_map) { 5312 if (obj->gen_loader) 5313 map->inner_map->fd = -1; 5314 bpf_map__destroy(map->inner_map); 5315 zfree(&map->inner_map); 5316 } 5317 5318 if (map_fd < 0) 5319 return map_fd; 5320 5321 /* obj->gen_loader case, prevent reuse_fd() from closing map_fd */ 5322 if (map->fd == map_fd) 5323 return 0; 5324 5325 /* Keep placeholder FD value but now point it to the BPF map object. 5326 * This way everything that relied on this map's FD (e.g., relocated 5327 * ldimm64 instructions) will stay valid and won't need adjustments. 5328 * map->fd stays valid but now point to what map_fd points to. 5329 */ 5330 return reuse_fd(map->fd, map_fd); 5331 } 5332 5333 static int init_map_in_map_slots(struct bpf_object *obj, struct bpf_map *map) 5334 { 5335 const struct bpf_map *targ_map; 5336 unsigned int i; 5337 int fd, err = 0; 5338 5339 for (i = 0; i < map->init_slots_sz; i++) { 5340 if (!map->init_slots[i]) 5341 continue; 5342 5343 targ_map = map->init_slots[i]; 5344 fd = targ_map->fd; 5345 5346 if (obj->gen_loader) { 5347 bpf_gen__populate_outer_map(obj->gen_loader, 5348 map - obj->maps, i, 5349 targ_map - obj->maps); 5350 } else { 5351 err = bpf_map_update_elem(map->fd, &i, &fd, 0); 5352 } 5353 if (err) { 5354 err = -errno; 5355 pr_warn("map '%s': failed to initialize slot [%d] to map '%s' fd=%d: %s\n", 5356 map->name, i, targ_map->name, fd, errstr(err)); 5357 return err; 5358 } 5359 pr_debug("map '%s': slot [%d] set to map '%s' fd=%d\n", 5360 map->name, i, targ_map->name, fd); 5361 } 5362 5363 zfree(&map->init_slots); 5364 map->init_slots_sz = 0; 5365 5366 return 0; 5367 } 5368 5369 static int init_prog_array_slots(struct bpf_object *obj, struct bpf_map *map) 5370 { 5371 const struct bpf_program *targ_prog; 5372 unsigned int i; 5373 int fd, err; 5374 5375 if (obj->gen_loader) 5376 return -ENOTSUP; 5377 5378 for (i = 0; i < map->init_slots_sz; i++) { 5379 if (!map->init_slots[i]) 5380 continue; 5381 5382 targ_prog = map->init_slots[i]; 5383 fd = bpf_program__fd(targ_prog); 5384 5385 err = bpf_map_update_elem(map->fd, &i, &fd, 0); 5386 if (err) { 5387 err = -errno; 5388 pr_warn("map '%s': failed to initialize slot [%d] to prog '%s' fd=%d: %s\n", 5389 map->name, i, targ_prog->name, fd, errstr(err)); 5390 return err; 5391 } 5392 pr_debug("map '%s': slot [%d] set to prog '%s' fd=%d\n", 5393 map->name, i, targ_prog->name, fd); 5394 } 5395 5396 zfree(&map->init_slots); 5397 map->init_slots_sz = 0; 5398 5399 return 0; 5400 } 5401 5402 static int bpf_object_init_prog_arrays(struct bpf_object *obj) 5403 { 5404 struct bpf_map *map; 5405 int i, err; 5406 5407 for (i = 0; i < obj->nr_maps; i++) { 5408 map = &obj->maps[i]; 5409 5410 if (!map->init_slots_sz || map->def.type != BPF_MAP_TYPE_PROG_ARRAY) 5411 continue; 5412 5413 err = init_prog_array_slots(obj, map); 5414 if (err < 0) 5415 return err; 5416 } 5417 return 0; 5418 } 5419 5420 static int map_set_def_max_entries(struct bpf_map *map) 5421 { 5422 if (map->def.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY && !map->def.max_entries) { 5423 int nr_cpus; 5424 5425 nr_cpus = libbpf_num_possible_cpus(); 5426 if (nr_cpus < 0) { 5427 pr_warn("map '%s': failed to determine number of system CPUs: %d\n", 5428 map->name, nr_cpus); 5429 return nr_cpus; 5430 } 5431 pr_debug("map '%s': setting size to %d\n", map->name, nr_cpus); 5432 map->def.max_entries = nr_cpus; 5433 } 5434 5435 return 0; 5436 } 5437 5438 static int 5439 bpf_object__create_maps(struct bpf_object *obj) 5440 { 5441 struct bpf_map *map; 5442 unsigned int i, j; 5443 int err; 5444 bool retried; 5445 5446 for (i = 0; i < obj->nr_maps; i++) { 5447 map = &obj->maps[i]; 5448 5449 /* To support old kernels, we skip creating global data maps 5450 * (.rodata, .data, .kconfig, etc); later on, during program 5451 * loading, if we detect that at least one of the to-be-loaded 5452 * programs is referencing any global data map, we'll error 5453 * out with program name and relocation index logged. 5454 * This approach allows to accommodate Clang emitting 5455 * unnecessary .rodata.str1.1 sections for string literals, 5456 * but also it allows to have CO-RE applications that use 5457 * global variables in some of BPF programs, but not others. 5458 * If those global variable-using programs are not loaded at 5459 * runtime due to bpf_program__set_autoload(prog, false), 5460 * bpf_object loading will succeed just fine even on old 5461 * kernels. 5462 */ 5463 if (bpf_map__is_internal(map) && !kernel_supports(obj, FEAT_GLOBAL_DATA)) 5464 map->autocreate = false; 5465 5466 if (!map->autocreate) { 5467 pr_debug("map '%s': skipped auto-creating...\n", map->name); 5468 continue; 5469 } 5470 5471 err = map_set_def_max_entries(map); 5472 if (err) 5473 goto err_out; 5474 5475 retried = false; 5476 retry: 5477 if (map->pin_path) { 5478 err = bpf_object__reuse_map(map); 5479 if (err) { 5480 pr_warn("map '%s': error reusing pinned map\n", 5481 map->name); 5482 goto err_out; 5483 } 5484 if (retried && map->fd < 0) { 5485 pr_warn("map '%s': cannot find pinned map\n", 5486 map->name); 5487 err = -ENOENT; 5488 goto err_out; 5489 } 5490 } 5491 5492 if (map->reused) { 5493 pr_debug("map '%s': skipping creation (preset fd=%d)\n", 5494 map->name, map->fd); 5495 } else { 5496 err = bpf_object__create_map(obj, map, false); 5497 if (err) 5498 goto err_out; 5499 5500 pr_debug("map '%s': created successfully, fd=%d\n", 5501 map->name, map->fd); 5502 5503 if (bpf_map__is_internal(map)) { 5504 err = bpf_object__populate_internal_map(obj, map); 5505 if (err < 0) 5506 goto err_out; 5507 } else if (map->def.type == BPF_MAP_TYPE_ARENA) { 5508 map->mmaped = mmap((void *)(long)map->map_extra, 5509 bpf_map_mmap_sz(map), PROT_READ | PROT_WRITE, 5510 map->map_extra ? MAP_SHARED | MAP_FIXED : MAP_SHARED, 5511 map->fd, 0); 5512 if (map->mmaped == MAP_FAILED) { 5513 err = -errno; 5514 map->mmaped = NULL; 5515 pr_warn("map '%s': failed to mmap arena: %s\n", 5516 map->name, errstr(err)); 5517 return err; 5518 } 5519 if (obj->arena_data) { 5520 memcpy(map->mmaped, obj->arena_data, obj->arena_data_sz); 5521 zfree(&obj->arena_data); 5522 } 5523 } 5524 if (map->init_slots_sz && map->def.type != BPF_MAP_TYPE_PROG_ARRAY) { 5525 err = init_map_in_map_slots(obj, map); 5526 if (err < 0) 5527 goto err_out; 5528 } 5529 } 5530 5531 if (map->pin_path && !map->pinned) { 5532 err = bpf_map__pin(map, NULL); 5533 if (err) { 5534 if (!retried && err == -EEXIST) { 5535 retried = true; 5536 goto retry; 5537 } 5538 pr_warn("map '%s': failed to auto-pin at '%s': %s\n", 5539 map->name, map->pin_path, errstr(err)); 5540 goto err_out; 5541 } 5542 } 5543 } 5544 5545 return 0; 5546 5547 err_out: 5548 pr_warn("map '%s': failed to create: %s\n", map->name, errstr(err)); 5549 pr_perm_msg(err); 5550 for (j = 0; j < i; j++) 5551 zclose(obj->maps[j].fd); 5552 return err; 5553 } 5554 5555 static bool bpf_core_is_flavor_sep(const char *s) 5556 { 5557 /* check X___Y name pattern, where X and Y are not underscores */ 5558 return s[0] != '_' && /* X */ 5559 s[1] == '_' && s[2] == '_' && s[3] == '_' && /* ___ */ 5560 s[4] != '_'; /* Y */ 5561 } 5562 5563 /* Given 'some_struct_name___with_flavor' return the length of a name prefix 5564 * before last triple underscore. Struct name part after last triple 5565 * underscore is ignored by BPF CO-RE relocation during relocation matching. 5566 */ 5567 size_t bpf_core_essential_name_len(const char *name) 5568 { 5569 size_t n = strlen(name); 5570 int i; 5571 5572 for (i = n - 5; i >= 0; i--) { 5573 if (bpf_core_is_flavor_sep(name + i)) 5574 return i + 1; 5575 } 5576 return n; 5577 } 5578 5579 void bpf_core_free_cands(struct bpf_core_cand_list *cands) 5580 { 5581 if (!cands) 5582 return; 5583 5584 free(cands->cands); 5585 free(cands); 5586 } 5587 5588 int bpf_core_add_cands(struct bpf_core_cand *local_cand, 5589 size_t local_essent_len, 5590 const struct btf *targ_btf, 5591 const char *targ_btf_name, 5592 int targ_start_id, 5593 struct bpf_core_cand_list *cands) 5594 { 5595 struct bpf_core_cand *new_cands, *cand; 5596 const struct btf_type *t, *local_t; 5597 const char *targ_name, *local_name; 5598 size_t targ_essent_len; 5599 int n, i; 5600 5601 local_t = btf__type_by_id(local_cand->btf, local_cand->id); 5602 local_name = btf__str_by_offset(local_cand->btf, local_t->name_off); 5603 5604 n = btf__type_cnt(targ_btf); 5605 for (i = targ_start_id; i < n; i++) { 5606 t = btf__type_by_id(targ_btf, i); 5607 if (!btf_kind_core_compat(t, local_t)) 5608 continue; 5609 5610 targ_name = btf__name_by_offset(targ_btf, t->name_off); 5611 if (str_is_empty(targ_name)) 5612 continue; 5613 5614 targ_essent_len = bpf_core_essential_name_len(targ_name); 5615 if (targ_essent_len != local_essent_len) 5616 continue; 5617 5618 if (strncmp(local_name, targ_name, local_essent_len) != 0) 5619 continue; 5620 5621 pr_debug("CO-RE relocating [%d] %s %s: found target candidate [%d] %s %s in [%s]\n", 5622 local_cand->id, btf_kind_str(local_t), 5623 local_name, i, btf_kind_str(t), targ_name, 5624 targ_btf_name); 5625 new_cands = libbpf_reallocarray(cands->cands, cands->len + 1, 5626 sizeof(*cands->cands)); 5627 if (!new_cands) 5628 return -ENOMEM; 5629 5630 cand = &new_cands[cands->len]; 5631 cand->btf = targ_btf; 5632 cand->id = i; 5633 5634 cands->cands = new_cands; 5635 cands->len++; 5636 } 5637 return 0; 5638 } 5639 5640 static int load_module_btfs(struct bpf_object *obj) 5641 { 5642 struct bpf_btf_info info; 5643 struct module_btf *mod_btf; 5644 struct btf *btf; 5645 char name[64]; 5646 __u32 id = 0, len; 5647 int err, fd; 5648 5649 if (obj->btf_modules_loaded) 5650 return 0; 5651 5652 if (obj->gen_loader) 5653 return 0; 5654 5655 /* don't do this again, even if we find no module BTFs */ 5656 obj->btf_modules_loaded = true; 5657 5658 /* kernel too old to support module BTFs */ 5659 if (!kernel_supports(obj, FEAT_MODULE_BTF)) 5660 return 0; 5661 5662 while (true) { 5663 err = bpf_btf_get_next_id(id, &id); 5664 if (err && errno == ENOENT) 5665 return 0; 5666 if (err && errno == EPERM) { 5667 pr_debug("skipping module BTFs loading, missing privileges\n"); 5668 return 0; 5669 } 5670 if (err) { 5671 err = -errno; 5672 pr_warn("failed to iterate BTF objects: %s\n", errstr(err)); 5673 return err; 5674 } 5675 5676 fd = bpf_btf_get_fd_by_id(id); 5677 if (fd < 0) { 5678 if (errno == ENOENT) 5679 continue; /* expected race: BTF was unloaded */ 5680 err = -errno; 5681 pr_warn("failed to get BTF object #%d FD: %s\n", id, errstr(err)); 5682 return err; 5683 } 5684 5685 len = sizeof(info); 5686 memset(&info, 0, sizeof(info)); 5687 info.name = ptr_to_u64(name); 5688 info.name_len = sizeof(name); 5689 5690 err = bpf_btf_get_info_by_fd(fd, &info, &len); 5691 if (err) { 5692 err = -errno; 5693 pr_warn("failed to get BTF object #%d info: %s\n", id, errstr(err)); 5694 goto err_out; 5695 } 5696 5697 /* ignore non-module BTFs */ 5698 if (!info.kernel_btf || strcmp(name, "vmlinux") == 0) { 5699 close(fd); 5700 continue; 5701 } 5702 5703 btf = btf_get_from_fd(fd, obj->btf_vmlinux); 5704 err = libbpf_get_error(btf); 5705 if (err) { 5706 pr_warn("failed to load module [%s]'s BTF object #%d: %s\n", 5707 name, id, errstr(err)); 5708 goto err_out; 5709 } 5710 5711 err = libbpf_ensure_mem((void **)&obj->btf_modules, &obj->btf_module_cap, 5712 sizeof(*obj->btf_modules), obj->btf_module_cnt + 1); 5713 if (err) 5714 goto err_out; 5715 5716 mod_btf = &obj->btf_modules[obj->btf_module_cnt++]; 5717 5718 mod_btf->btf = btf; 5719 mod_btf->id = id; 5720 mod_btf->fd = fd; 5721 mod_btf->name = strdup(name); 5722 if (!mod_btf->name) { 5723 err = -ENOMEM; 5724 goto err_out; 5725 } 5726 continue; 5727 5728 err_out: 5729 close(fd); 5730 return err; 5731 } 5732 5733 return 0; 5734 } 5735 5736 static struct bpf_core_cand_list * 5737 bpf_core_find_cands(struct bpf_object *obj, const struct btf *local_btf, __u32 local_type_id) 5738 { 5739 struct bpf_core_cand local_cand = {}; 5740 struct bpf_core_cand_list *cands; 5741 const struct btf *main_btf; 5742 const struct btf_type *local_t; 5743 const char *local_name; 5744 size_t local_essent_len; 5745 int err, i; 5746 5747 local_cand.btf = local_btf; 5748 local_cand.id = local_type_id; 5749 local_t = btf__type_by_id(local_btf, local_type_id); 5750 if (!local_t) 5751 return ERR_PTR(-EINVAL); 5752 5753 local_name = btf__name_by_offset(local_btf, local_t->name_off); 5754 if (str_is_empty(local_name)) 5755 return ERR_PTR(-EINVAL); 5756 local_essent_len = bpf_core_essential_name_len(local_name); 5757 5758 cands = calloc(1, sizeof(*cands)); 5759 if (!cands) 5760 return ERR_PTR(-ENOMEM); 5761 5762 /* Attempt to find target candidates in vmlinux BTF first */ 5763 main_btf = obj->btf_vmlinux_override ?: obj->btf_vmlinux; 5764 err = bpf_core_add_cands(&local_cand, local_essent_len, main_btf, "vmlinux", 1, cands); 5765 if (err) 5766 goto err_out; 5767 5768 /* if vmlinux BTF has any candidate, don't got for module BTFs */ 5769 if (cands->len) 5770 return cands; 5771 5772 /* if vmlinux BTF was overridden, don't attempt to load module BTFs */ 5773 if (obj->btf_vmlinux_override) 5774 return cands; 5775 5776 /* now look through module BTFs, trying to still find candidates */ 5777 err = load_module_btfs(obj); 5778 if (err) 5779 goto err_out; 5780 5781 for (i = 0; i < obj->btf_module_cnt; i++) { 5782 err = bpf_core_add_cands(&local_cand, local_essent_len, 5783 obj->btf_modules[i].btf, 5784 obj->btf_modules[i].name, 5785 btf__type_cnt(obj->btf_vmlinux), 5786 cands); 5787 if (err) 5788 goto err_out; 5789 } 5790 5791 return cands; 5792 err_out: 5793 bpf_core_free_cands(cands); 5794 return ERR_PTR(err); 5795 } 5796 5797 /* Check local and target types for compatibility. This check is used for 5798 * type-based CO-RE relocations and follow slightly different rules than 5799 * field-based relocations. This function assumes that root types were already 5800 * checked for name match. Beyond that initial root-level name check, names 5801 * are completely ignored. Compatibility rules are as follows: 5802 * - any two STRUCTs/UNIONs/FWDs/ENUMs/INTs are considered compatible, but 5803 * kind should match for local and target types (i.e., STRUCT is not 5804 * compatible with UNION); 5805 * - for ENUMs, the size is ignored; 5806 * - for INT, size and signedness are ignored; 5807 * - for ARRAY, dimensionality is ignored, element types are checked for 5808 * compatibility recursively; 5809 * - CONST/VOLATILE/RESTRICT modifiers are ignored; 5810 * - TYPEDEFs/PTRs are compatible if types they pointing to are compatible; 5811 * - FUNC_PROTOs are compatible if they have compatible signature: same 5812 * number of input args and compatible return and argument types. 5813 * These rules are not set in stone and probably will be adjusted as we get 5814 * more experience with using BPF CO-RE relocations. 5815 */ 5816 int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id, 5817 const struct btf *targ_btf, __u32 targ_id) 5818 { 5819 return __bpf_core_types_are_compat(local_btf, local_id, targ_btf, targ_id, 32); 5820 } 5821 5822 int bpf_core_types_match(const struct btf *local_btf, __u32 local_id, 5823 const struct btf *targ_btf, __u32 targ_id) 5824 { 5825 return __bpf_core_types_match(local_btf, local_id, targ_btf, targ_id, false, 32); 5826 } 5827 5828 static size_t bpf_core_hash_fn(const long key, void *ctx) 5829 { 5830 return key; 5831 } 5832 5833 static bool bpf_core_equal_fn(const long k1, const long k2, void *ctx) 5834 { 5835 return k1 == k2; 5836 } 5837 5838 static int record_relo_core(struct bpf_program *prog, 5839 const struct bpf_core_relo *core_relo, int insn_idx) 5840 { 5841 struct reloc_desc *relos, *relo; 5842 5843 relos = libbpf_reallocarray(prog->reloc_desc, 5844 prog->nr_reloc + 1, sizeof(*relos)); 5845 if (!relos) 5846 return -ENOMEM; 5847 relo = &relos[prog->nr_reloc]; 5848 relo->type = RELO_CORE; 5849 relo->insn_idx = insn_idx; 5850 relo->core_relo = core_relo; 5851 prog->reloc_desc = relos; 5852 prog->nr_reloc++; 5853 return 0; 5854 } 5855 5856 static const struct bpf_core_relo *find_relo_core(struct bpf_program *prog, int insn_idx) 5857 { 5858 struct reloc_desc *relo; 5859 int i; 5860 5861 for (i = 0; i < prog->nr_reloc; i++) { 5862 relo = &prog->reloc_desc[i]; 5863 if (relo->type != RELO_CORE || relo->insn_idx != insn_idx) 5864 continue; 5865 5866 return relo->core_relo; 5867 } 5868 5869 return NULL; 5870 } 5871 5872 static int bpf_core_resolve_relo(struct bpf_program *prog, 5873 const struct bpf_core_relo *relo, 5874 int relo_idx, 5875 const struct btf *local_btf, 5876 struct hashmap *cand_cache, 5877 struct bpf_core_relo_res *targ_res) 5878 { 5879 struct bpf_core_spec specs_scratch[3] = {}; 5880 struct bpf_core_cand_list *cands = NULL; 5881 const char *prog_name = prog->name; 5882 const struct btf_type *local_type; 5883 const char *local_name; 5884 __u32 local_id = relo->type_id; 5885 int err; 5886 5887 local_type = btf__type_by_id(local_btf, local_id); 5888 if (!local_type) 5889 return -EINVAL; 5890 5891 local_name = btf__name_by_offset(local_btf, local_type->name_off); 5892 if (!local_name) 5893 return -EINVAL; 5894 5895 if (relo->kind != BPF_CORE_TYPE_ID_LOCAL && 5896 !hashmap__find(cand_cache, local_id, &cands)) { 5897 cands = bpf_core_find_cands(prog->obj, local_btf, local_id); 5898 if (IS_ERR(cands)) { 5899 pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s %s: %ld\n", 5900 prog_name, relo_idx, local_id, btf_kind_str(local_type), 5901 local_name, PTR_ERR(cands)); 5902 return PTR_ERR(cands); 5903 } 5904 err = hashmap__set(cand_cache, local_id, cands, NULL, NULL); 5905 if (err) { 5906 bpf_core_free_cands(cands); 5907 return err; 5908 } 5909 } 5910 5911 return bpf_core_calc_relo_insn(prog_name, relo, relo_idx, local_btf, cands, specs_scratch, 5912 targ_res); 5913 } 5914 5915 static int 5916 bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path) 5917 { 5918 const struct btf_ext_info_sec *sec; 5919 struct bpf_core_relo_res targ_res; 5920 const struct bpf_core_relo *rec; 5921 const struct btf_ext_info *seg; 5922 struct hashmap_entry *entry; 5923 struct hashmap *cand_cache = NULL; 5924 struct bpf_program *prog; 5925 struct bpf_insn *insn; 5926 const char *sec_name; 5927 int i, err = 0, insn_idx, sec_idx, sec_num; 5928 5929 if (obj->btf_ext->core_relo_info.len == 0) 5930 return 0; 5931 5932 if (targ_btf_path) { 5933 obj->btf_vmlinux_override = btf__parse(targ_btf_path, NULL); 5934 err = libbpf_get_error(obj->btf_vmlinux_override); 5935 if (err) { 5936 pr_warn("failed to parse target BTF: %s\n", errstr(err)); 5937 return err; 5938 } 5939 } 5940 5941 cand_cache = hashmap__new(bpf_core_hash_fn, bpf_core_equal_fn, NULL); 5942 if (IS_ERR(cand_cache)) { 5943 err = PTR_ERR(cand_cache); 5944 goto out; 5945 } 5946 5947 seg = &obj->btf_ext->core_relo_info; 5948 sec_num = 0; 5949 for_each_btf_ext_sec(seg, sec) { 5950 sec_idx = seg->sec_idxs[sec_num]; 5951 sec_num++; 5952 5953 sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off); 5954 if (str_is_empty(sec_name)) { 5955 err = -EINVAL; 5956 goto out; 5957 } 5958 5959 pr_debug("sec '%s': found %d CO-RE relocations\n", sec_name, sec->num_info); 5960 5961 for_each_btf_ext_rec(seg, sec, i, rec) { 5962 if (rec->insn_off % BPF_INSN_SZ) 5963 return -EINVAL; 5964 insn_idx = rec->insn_off / BPF_INSN_SZ; 5965 prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx); 5966 if (!prog) { 5967 /* When __weak subprog is "overridden" by another instance 5968 * of the subprog from a different object file, linker still 5969 * appends all the .BTF.ext info that used to belong to that 5970 * eliminated subprogram. 5971 * This is similar to what x86-64 linker does for relocations. 5972 * So just ignore such relocations just like we ignore 5973 * subprog instructions when discovering subprograms. 5974 */ 5975 pr_debug("sec '%s': skipping CO-RE relocation #%d for insn #%d belonging to eliminated weak subprogram\n", 5976 sec_name, i, insn_idx); 5977 continue; 5978 } 5979 /* no need to apply CO-RE relocation if the program is 5980 * not going to be loaded 5981 */ 5982 if (!prog->autoload) 5983 continue; 5984 5985 /* adjust insn_idx from section frame of reference to the local 5986 * program's frame of reference; (sub-)program code is not yet 5987 * relocated, so it's enough to just subtract in-section offset 5988 */ 5989 insn_idx = insn_idx - prog->sec_insn_off; 5990 if (insn_idx >= prog->insns_cnt) 5991 return -EINVAL; 5992 insn = &prog->insns[insn_idx]; 5993 5994 err = record_relo_core(prog, rec, insn_idx); 5995 if (err) { 5996 pr_warn("prog '%s': relo #%d: failed to record relocation: %s\n", 5997 prog->name, i, errstr(err)); 5998 goto out; 5999 } 6000 6001 if (prog->obj->gen_loader) 6002 continue; 6003 6004 err = bpf_core_resolve_relo(prog, rec, i, obj->btf, cand_cache, &targ_res); 6005 if (err) { 6006 pr_warn("prog '%s': relo #%d: failed to relocate: %s\n", 6007 prog->name, i, errstr(err)); 6008 goto out; 6009 } 6010 6011 err = bpf_core_patch_insn(prog->name, insn, insn_idx, rec, i, &targ_res); 6012 if (err) { 6013 pr_warn("prog '%s': relo #%d: failed to patch insn #%u: %s\n", 6014 prog->name, i, insn_idx, errstr(err)); 6015 goto out; 6016 } 6017 } 6018 } 6019 6020 out: 6021 /* obj->btf_vmlinux and module BTFs are freed after object load */ 6022 btf__free(obj->btf_vmlinux_override); 6023 obj->btf_vmlinux_override = NULL; 6024 6025 if (!IS_ERR_OR_NULL(cand_cache)) { 6026 hashmap__for_each_entry(cand_cache, entry, i) { 6027 bpf_core_free_cands(entry->pvalue); 6028 } 6029 hashmap__free(cand_cache); 6030 } 6031 return err; 6032 } 6033 6034 /* base map load ldimm64 special constant, used also for log fixup logic */ 6035 #define POISON_LDIMM64_MAP_BASE 2001000000 6036 #define POISON_LDIMM64_MAP_PFX "200100" 6037 6038 static void poison_map_ldimm64(struct bpf_program *prog, int relo_idx, 6039 int insn_idx, struct bpf_insn *insn, 6040 int map_idx, const struct bpf_map *map) 6041 { 6042 int i; 6043 6044 pr_debug("prog '%s': relo #%d: poisoning insn #%d that loads map #%d '%s'\n", 6045 prog->name, relo_idx, insn_idx, map_idx, map->name); 6046 6047 /* we turn single ldimm64 into two identical invalid calls */ 6048 for (i = 0; i < 2; i++) { 6049 insn->code = BPF_JMP | BPF_CALL; 6050 insn->dst_reg = 0; 6051 insn->src_reg = 0; 6052 insn->off = 0; 6053 /* if this instruction is reachable (not a dead code), 6054 * verifier will complain with something like: 6055 * invalid func unknown#2001000123 6056 * where lower 123 is map index into obj->maps[] array 6057 */ 6058 insn->imm = POISON_LDIMM64_MAP_BASE + map_idx; 6059 6060 insn++; 6061 } 6062 } 6063 6064 /* unresolved kfunc call special constant, used also for log fixup logic */ 6065 #define POISON_CALL_KFUNC_BASE 2002000000 6066 #define POISON_CALL_KFUNC_PFX "2002" 6067 6068 static void poison_kfunc_call(struct bpf_program *prog, int relo_idx, 6069 int insn_idx, struct bpf_insn *insn, 6070 int ext_idx, const struct extern_desc *ext) 6071 { 6072 pr_debug("prog '%s': relo #%d: poisoning insn #%d that calls kfunc '%s'\n", 6073 prog->name, relo_idx, insn_idx, ext->name); 6074 6075 /* we turn kfunc call into invalid helper call with identifiable constant */ 6076 insn->code = BPF_JMP | BPF_CALL; 6077 insn->dst_reg = 0; 6078 insn->src_reg = 0; 6079 insn->off = 0; 6080 /* if this instruction is reachable (not a dead code), 6081 * verifier will complain with something like: 6082 * invalid func unknown#2001000123 6083 * where lower 123 is extern index into obj->externs[] array 6084 */ 6085 insn->imm = POISON_CALL_KFUNC_BASE + ext_idx; 6086 } 6087 6088 /* Relocate data references within program code: 6089 * - map references; 6090 * - global variable references; 6091 * - extern references. 6092 */ 6093 static int 6094 bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog) 6095 { 6096 int i; 6097 6098 for (i = 0; i < prog->nr_reloc; i++) { 6099 struct reloc_desc *relo = &prog->reloc_desc[i]; 6100 struct bpf_insn *insn = &prog->insns[relo->insn_idx]; 6101 const struct bpf_map *map; 6102 struct extern_desc *ext; 6103 6104 switch (relo->type) { 6105 case RELO_LD64: 6106 map = &obj->maps[relo->map_idx]; 6107 if (obj->gen_loader) { 6108 insn[0].src_reg = BPF_PSEUDO_MAP_IDX; 6109 insn[0].imm = relo->map_idx; 6110 } else if (map->autocreate) { 6111 insn[0].src_reg = BPF_PSEUDO_MAP_FD; 6112 insn[0].imm = map->fd; 6113 } else { 6114 poison_map_ldimm64(prog, i, relo->insn_idx, insn, 6115 relo->map_idx, map); 6116 } 6117 break; 6118 case RELO_DATA: 6119 map = &obj->maps[relo->map_idx]; 6120 insn[1].imm = insn[0].imm + relo->sym_off; 6121 if (obj->gen_loader) { 6122 insn[0].src_reg = BPF_PSEUDO_MAP_IDX_VALUE; 6123 insn[0].imm = relo->map_idx; 6124 } else if (map->autocreate) { 6125 insn[0].src_reg = BPF_PSEUDO_MAP_VALUE; 6126 insn[0].imm = map->fd; 6127 } else { 6128 poison_map_ldimm64(prog, i, relo->insn_idx, insn, 6129 relo->map_idx, map); 6130 } 6131 break; 6132 case RELO_EXTERN_LD64: 6133 ext = &obj->externs[relo->ext_idx]; 6134 if (ext->type == EXT_KCFG) { 6135 if (obj->gen_loader) { 6136 insn[0].src_reg = BPF_PSEUDO_MAP_IDX_VALUE; 6137 insn[0].imm = obj->kconfig_map_idx; 6138 } else { 6139 insn[0].src_reg = BPF_PSEUDO_MAP_VALUE; 6140 insn[0].imm = obj->maps[obj->kconfig_map_idx].fd; 6141 } 6142 insn[1].imm = ext->kcfg.data_off; 6143 } else /* EXT_KSYM */ { 6144 if (ext->ksym.type_id && ext->is_set) { /* typed ksyms */ 6145 insn[0].src_reg = BPF_PSEUDO_BTF_ID; 6146 insn[0].imm = ext->ksym.kernel_btf_id; 6147 insn[1].imm = ext->ksym.kernel_btf_obj_fd; 6148 } else { /* typeless ksyms or unresolved typed ksyms */ 6149 insn[0].imm = (__u32)ext->ksym.addr; 6150 insn[1].imm = ext->ksym.addr >> 32; 6151 } 6152 } 6153 break; 6154 case RELO_EXTERN_CALL: 6155 ext = &obj->externs[relo->ext_idx]; 6156 insn[0].src_reg = BPF_PSEUDO_KFUNC_CALL; 6157 if (ext->is_set) { 6158 insn[0].imm = ext->ksym.kernel_btf_id; 6159 insn[0].off = ext->ksym.btf_fd_idx; 6160 } else { /* unresolved weak kfunc call */ 6161 poison_kfunc_call(prog, i, relo->insn_idx, insn, 6162 relo->ext_idx, ext); 6163 } 6164 break; 6165 case RELO_SUBPROG_ADDR: 6166 if (insn[0].src_reg != BPF_PSEUDO_FUNC) { 6167 pr_warn("prog '%s': relo #%d: bad insn\n", 6168 prog->name, i); 6169 return -EINVAL; 6170 } 6171 /* handled already */ 6172 break; 6173 case RELO_CALL: 6174 /* handled already */ 6175 break; 6176 case RELO_CORE: 6177 /* will be handled by bpf_program_record_relos() */ 6178 break; 6179 default: 6180 pr_warn("prog '%s': relo #%d: bad relo type %d\n", 6181 prog->name, i, relo->type); 6182 return -EINVAL; 6183 } 6184 } 6185 6186 return 0; 6187 } 6188 6189 static int adjust_prog_btf_ext_info(const struct bpf_object *obj, 6190 const struct bpf_program *prog, 6191 const struct btf_ext_info *ext_info, 6192 void **prog_info, __u32 *prog_rec_cnt, 6193 __u32 *prog_rec_sz) 6194 { 6195 void *copy_start = NULL, *copy_end = NULL; 6196 void *rec, *rec_end, *new_prog_info; 6197 const struct btf_ext_info_sec *sec; 6198 size_t old_sz, new_sz; 6199 int i, sec_num, sec_idx, off_adj; 6200 6201 sec_num = 0; 6202 for_each_btf_ext_sec(ext_info, sec) { 6203 sec_idx = ext_info->sec_idxs[sec_num]; 6204 sec_num++; 6205 if (prog->sec_idx != sec_idx) 6206 continue; 6207 6208 for_each_btf_ext_rec(ext_info, sec, i, rec) { 6209 __u32 insn_off = *(__u32 *)rec / BPF_INSN_SZ; 6210 6211 if (insn_off < prog->sec_insn_off) 6212 continue; 6213 if (insn_off >= prog->sec_insn_off + prog->sec_insn_cnt) 6214 break; 6215 6216 if (!copy_start) 6217 copy_start = rec; 6218 copy_end = rec + ext_info->rec_size; 6219 } 6220 6221 if (!copy_start) 6222 return -ENOENT; 6223 6224 /* append func/line info of a given (sub-)program to the main 6225 * program func/line info 6226 */ 6227 old_sz = (size_t)(*prog_rec_cnt) * ext_info->rec_size; 6228 new_sz = old_sz + (copy_end - copy_start); 6229 new_prog_info = realloc(*prog_info, new_sz); 6230 if (!new_prog_info) 6231 return -ENOMEM; 6232 *prog_info = new_prog_info; 6233 *prog_rec_cnt = new_sz / ext_info->rec_size; 6234 memcpy(new_prog_info + old_sz, copy_start, copy_end - copy_start); 6235 6236 /* Kernel instruction offsets are in units of 8-byte 6237 * instructions, while .BTF.ext instruction offsets generated 6238 * by Clang are in units of bytes. So convert Clang offsets 6239 * into kernel offsets and adjust offset according to program 6240 * relocated position. 6241 */ 6242 off_adj = prog->sub_insn_off - prog->sec_insn_off; 6243 rec = new_prog_info + old_sz; 6244 rec_end = new_prog_info + new_sz; 6245 for (; rec < rec_end; rec += ext_info->rec_size) { 6246 __u32 *insn_off = rec; 6247 6248 *insn_off = *insn_off / BPF_INSN_SZ + off_adj; 6249 } 6250 *prog_rec_sz = ext_info->rec_size; 6251 return 0; 6252 } 6253 6254 return -ENOENT; 6255 } 6256 6257 static int 6258 reloc_prog_func_and_line_info(const struct bpf_object *obj, 6259 struct bpf_program *main_prog, 6260 const struct bpf_program *prog) 6261 { 6262 int err; 6263 6264 /* no .BTF.ext relocation if .BTF.ext is missing or kernel doesn't 6265 * support func/line info 6266 */ 6267 if (!obj->btf_ext || !kernel_supports(obj, FEAT_BTF_FUNC)) 6268 return 0; 6269 6270 /* only attempt func info relocation if main program's func_info 6271 * relocation was successful 6272 */ 6273 if (main_prog != prog && !main_prog->func_info) 6274 goto line_info; 6275 6276 err = adjust_prog_btf_ext_info(obj, prog, &obj->btf_ext->func_info, 6277 &main_prog->func_info, 6278 &main_prog->func_info_cnt, 6279 &main_prog->func_info_rec_size); 6280 if (err) { 6281 if (err != -ENOENT) { 6282 pr_warn("prog '%s': error relocating .BTF.ext function info: %s\n", 6283 prog->name, errstr(err)); 6284 return err; 6285 } 6286 if (main_prog->func_info) { 6287 /* 6288 * Some info has already been found but has problem 6289 * in the last btf_ext reloc. Must have to error out. 6290 */ 6291 pr_warn("prog '%s': missing .BTF.ext function info.\n", prog->name); 6292 return err; 6293 } 6294 /* Have problem loading the very first info. Ignore the rest. */ 6295 pr_warn("prog '%s': missing .BTF.ext function info for the main program, skipping all of .BTF.ext func info.\n", 6296 prog->name); 6297 } 6298 6299 line_info: 6300 /* don't relocate line info if main program's relocation failed */ 6301 if (main_prog != prog && !main_prog->line_info) 6302 return 0; 6303 6304 err = adjust_prog_btf_ext_info(obj, prog, &obj->btf_ext->line_info, 6305 &main_prog->line_info, 6306 &main_prog->line_info_cnt, 6307 &main_prog->line_info_rec_size); 6308 if (err) { 6309 if (err != -ENOENT) { 6310 pr_warn("prog '%s': error relocating .BTF.ext line info: %s\n", 6311 prog->name, errstr(err)); 6312 return err; 6313 } 6314 if (main_prog->line_info) { 6315 /* 6316 * Some info has already been found but has problem 6317 * in the last btf_ext reloc. Must have to error out. 6318 */ 6319 pr_warn("prog '%s': missing .BTF.ext line info.\n", prog->name); 6320 return err; 6321 } 6322 /* Have problem loading the very first info. Ignore the rest. */ 6323 pr_warn("prog '%s': missing .BTF.ext line info for the main program, skipping all of .BTF.ext line info.\n", 6324 prog->name); 6325 } 6326 return 0; 6327 } 6328 6329 static int cmp_relo_by_insn_idx(const void *key, const void *elem) 6330 { 6331 size_t insn_idx = *(const size_t *)key; 6332 const struct reloc_desc *relo = elem; 6333 6334 if (insn_idx == relo->insn_idx) 6335 return 0; 6336 return insn_idx < relo->insn_idx ? -1 : 1; 6337 } 6338 6339 static struct reloc_desc *find_prog_insn_relo(const struct bpf_program *prog, size_t insn_idx) 6340 { 6341 if (!prog->nr_reloc) 6342 return NULL; 6343 return bsearch(&insn_idx, prog->reloc_desc, prog->nr_reloc, 6344 sizeof(*prog->reloc_desc), cmp_relo_by_insn_idx); 6345 } 6346 6347 static int append_subprog_relos(struct bpf_program *main_prog, struct bpf_program *subprog) 6348 { 6349 int new_cnt = main_prog->nr_reloc + subprog->nr_reloc; 6350 struct reloc_desc *relos; 6351 int i; 6352 6353 if (main_prog == subprog) 6354 return 0; 6355 relos = libbpf_reallocarray(main_prog->reloc_desc, new_cnt, sizeof(*relos)); 6356 /* if new count is zero, reallocarray can return a valid NULL result; 6357 * in this case the previous pointer will be freed, so we *have to* 6358 * reassign old pointer to the new value (even if it's NULL) 6359 */ 6360 if (!relos && new_cnt) 6361 return -ENOMEM; 6362 if (subprog->nr_reloc) 6363 memcpy(relos + main_prog->nr_reloc, subprog->reloc_desc, 6364 sizeof(*relos) * subprog->nr_reloc); 6365 6366 for (i = main_prog->nr_reloc; i < new_cnt; i++) 6367 relos[i].insn_idx += subprog->sub_insn_off; 6368 /* After insn_idx adjustment the 'relos' array is still sorted 6369 * by insn_idx and doesn't break bsearch. 6370 */ 6371 main_prog->reloc_desc = relos; 6372 main_prog->nr_reloc = new_cnt; 6373 return 0; 6374 } 6375 6376 static int 6377 bpf_object__append_subprog_code(struct bpf_object *obj, struct bpf_program *main_prog, 6378 struct bpf_program *subprog) 6379 { 6380 struct bpf_insn *insns; 6381 size_t new_cnt; 6382 int err; 6383 6384 subprog->sub_insn_off = main_prog->insns_cnt; 6385 6386 new_cnt = main_prog->insns_cnt + subprog->insns_cnt; 6387 insns = libbpf_reallocarray(main_prog->insns, new_cnt, sizeof(*insns)); 6388 if (!insns) { 6389 pr_warn("prog '%s': failed to realloc prog code\n", main_prog->name); 6390 return -ENOMEM; 6391 } 6392 main_prog->insns = insns; 6393 main_prog->insns_cnt = new_cnt; 6394 6395 memcpy(main_prog->insns + subprog->sub_insn_off, subprog->insns, 6396 subprog->insns_cnt * sizeof(*insns)); 6397 6398 pr_debug("prog '%s': added %zu insns from sub-prog '%s'\n", 6399 main_prog->name, subprog->insns_cnt, subprog->name); 6400 6401 /* The subprog insns are now appended. Append its relos too. */ 6402 err = append_subprog_relos(main_prog, subprog); 6403 if (err) 6404 return err; 6405 return 0; 6406 } 6407 6408 static int 6409 bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog, 6410 struct bpf_program *prog) 6411 { 6412 size_t sub_insn_idx, insn_idx; 6413 struct bpf_program *subprog; 6414 struct reloc_desc *relo; 6415 struct bpf_insn *insn; 6416 int err; 6417 6418 err = reloc_prog_func_and_line_info(obj, main_prog, prog); 6419 if (err) 6420 return err; 6421 6422 for (insn_idx = 0; insn_idx < prog->sec_insn_cnt; insn_idx++) { 6423 insn = &main_prog->insns[prog->sub_insn_off + insn_idx]; 6424 if (!insn_is_subprog_call(insn) && !insn_is_pseudo_func(insn)) 6425 continue; 6426 6427 relo = find_prog_insn_relo(prog, insn_idx); 6428 if (relo && relo->type == RELO_EXTERN_CALL) 6429 /* kfunc relocations will be handled later 6430 * in bpf_object__relocate_data() 6431 */ 6432 continue; 6433 if (relo && relo->type != RELO_CALL && relo->type != RELO_SUBPROG_ADDR) { 6434 pr_warn("prog '%s': unexpected relo for insn #%zu, type %d\n", 6435 prog->name, insn_idx, relo->type); 6436 return -LIBBPF_ERRNO__RELOC; 6437 } 6438 if (relo) { 6439 /* sub-program instruction index is a combination of 6440 * an offset of a symbol pointed to by relocation and 6441 * call instruction's imm field; for global functions, 6442 * call always has imm = -1, but for static functions 6443 * relocation is against STT_SECTION and insn->imm 6444 * points to a start of a static function 6445 * 6446 * for subprog addr relocation, the relo->sym_off + insn->imm is 6447 * the byte offset in the corresponding section. 6448 */ 6449 if (relo->type == RELO_CALL) 6450 sub_insn_idx = relo->sym_off / BPF_INSN_SZ + insn->imm + 1; 6451 else 6452 sub_insn_idx = (relo->sym_off + insn->imm) / BPF_INSN_SZ; 6453 } else if (insn_is_pseudo_func(insn)) { 6454 /* 6455 * RELO_SUBPROG_ADDR relo is always emitted even if both 6456 * functions are in the same section, so it shouldn't reach here. 6457 */ 6458 pr_warn("prog '%s': missing subprog addr relo for insn #%zu\n", 6459 prog->name, insn_idx); 6460 return -LIBBPF_ERRNO__RELOC; 6461 } else { 6462 /* if subprogram call is to a static function within 6463 * the same ELF section, there won't be any relocation 6464 * emitted, but it also means there is no additional 6465 * offset necessary, insns->imm is relative to 6466 * instruction's original position within the section 6467 */ 6468 sub_insn_idx = prog->sec_insn_off + insn_idx + insn->imm + 1; 6469 } 6470 6471 /* we enforce that sub-programs should be in .text section */ 6472 subprog = find_prog_by_sec_insn(obj, obj->efile.text_shndx, sub_insn_idx); 6473 if (!subprog) { 6474 pr_warn("prog '%s': no .text section found yet sub-program call exists\n", 6475 prog->name); 6476 return -LIBBPF_ERRNO__RELOC; 6477 } 6478 6479 /* if it's the first call instruction calling into this 6480 * subprogram (meaning this subprog hasn't been processed 6481 * yet) within the context of current main program: 6482 * - append it at the end of main program's instructions blog; 6483 * - process is recursively, while current program is put on hold; 6484 * - if that subprogram calls some other not yet processes 6485 * subprogram, same thing will happen recursively until 6486 * there are no more unprocesses subprograms left to append 6487 * and relocate. 6488 */ 6489 if (subprog->sub_insn_off == 0) { 6490 err = bpf_object__append_subprog_code(obj, main_prog, subprog); 6491 if (err) 6492 return err; 6493 err = bpf_object__reloc_code(obj, main_prog, subprog); 6494 if (err) 6495 return err; 6496 } 6497 6498 /* main_prog->insns memory could have been re-allocated, so 6499 * calculate pointer again 6500 */ 6501 insn = &main_prog->insns[prog->sub_insn_off + insn_idx]; 6502 /* calculate correct instruction position within current main 6503 * prog; each main prog can have a different set of 6504 * subprograms appended (potentially in different order as 6505 * well), so position of any subprog can be different for 6506 * different main programs 6507 */ 6508 insn->imm = subprog->sub_insn_off - (prog->sub_insn_off + insn_idx) - 1; 6509 6510 pr_debug("prog '%s': insn #%zu relocated, imm %d points to subprog '%s' (now at %zu offset)\n", 6511 prog->name, insn_idx, insn->imm, subprog->name, subprog->sub_insn_off); 6512 } 6513 6514 return 0; 6515 } 6516 6517 /* 6518 * Relocate sub-program calls. 6519 * 6520 * Algorithm operates as follows. Each entry-point BPF program (referred to as 6521 * main prog) is processed separately. For each subprog (non-entry functions, 6522 * that can be called from either entry progs or other subprogs) gets their 6523 * sub_insn_off reset to zero. This serves as indicator that this subprogram 6524 * hasn't been yet appended and relocated within current main prog. Once its 6525 * relocated, sub_insn_off will point at the position within current main prog 6526 * where given subprog was appended. This will further be used to relocate all 6527 * the call instructions jumping into this subprog. 6528 * 6529 * We start with main program and process all call instructions. If the call 6530 * is into a subprog that hasn't been processed (i.e., subprog->sub_insn_off 6531 * is zero), subprog instructions are appended at the end of main program's 6532 * instruction array. Then main program is "put on hold" while we recursively 6533 * process newly appended subprogram. If that subprogram calls into another 6534 * subprogram that hasn't been appended, new subprogram is appended again to 6535 * the *main* prog's instructions (subprog's instructions are always left 6536 * untouched, as they need to be in unmodified state for subsequent main progs 6537 * and subprog instructions are always sent only as part of a main prog) and 6538 * the process continues recursively. Once all the subprogs called from a main 6539 * prog or any of its subprogs are appended (and relocated), all their 6540 * positions within finalized instructions array are known, so it's easy to 6541 * rewrite call instructions with correct relative offsets, corresponding to 6542 * desired target subprog. 6543 * 6544 * Its important to realize that some subprogs might not be called from some 6545 * main prog and any of its called/used subprogs. Those will keep their 6546 * subprog->sub_insn_off as zero at all times and won't be appended to current 6547 * main prog and won't be relocated within the context of current main prog. 6548 * They might still be used from other main progs later. 6549 * 6550 * Visually this process can be shown as below. Suppose we have two main 6551 * programs mainA and mainB and BPF object contains three subprogs: subA, 6552 * subB, and subC. mainA calls only subA, mainB calls only subC, but subA and 6553 * subC both call subB: 6554 * 6555 * +--------+ +-------+ 6556 * | v v | 6557 * +--+---+ +--+-+-+ +---+--+ 6558 * | subA | | subB | | subC | 6559 * +--+---+ +------+ +---+--+ 6560 * ^ ^ 6561 * | | 6562 * +---+-------+ +------+----+ 6563 * | mainA | | mainB | 6564 * +-----------+ +-----------+ 6565 * 6566 * We'll start relocating mainA, will find subA, append it and start 6567 * processing sub A recursively: 6568 * 6569 * +-----------+------+ 6570 * | mainA | subA | 6571 * +-----------+------+ 6572 * 6573 * At this point we notice that subB is used from subA, so we append it and 6574 * relocate (there are no further subcalls from subB): 6575 * 6576 * +-----------+------+------+ 6577 * | mainA | subA | subB | 6578 * +-----------+------+------+ 6579 * 6580 * At this point, we relocate subA calls, then go one level up and finish with 6581 * relocatin mainA calls. mainA is done. 6582 * 6583 * For mainB process is similar but results in different order. We start with 6584 * mainB and skip subA and subB, as mainB never calls them (at least 6585 * directly), but we see subC is needed, so we append and start processing it: 6586 * 6587 * +-----------+------+ 6588 * | mainB | subC | 6589 * +-----------+------+ 6590 * Now we see subC needs subB, so we go back to it, append and relocate it: 6591 * 6592 * +-----------+------+------+ 6593 * | mainB | subC | subB | 6594 * +-----------+------+------+ 6595 * 6596 * At this point we unwind recursion, relocate calls in subC, then in mainB. 6597 */ 6598 static int 6599 bpf_object__relocate_calls(struct bpf_object *obj, struct bpf_program *prog) 6600 { 6601 struct bpf_program *subprog; 6602 int i, err; 6603 6604 /* mark all subprogs as not relocated (yet) within the context of 6605 * current main program 6606 */ 6607 for (i = 0; i < obj->nr_programs; i++) { 6608 subprog = &obj->programs[i]; 6609 if (!prog_is_subprog(obj, subprog)) 6610 continue; 6611 6612 subprog->sub_insn_off = 0; 6613 } 6614 6615 err = bpf_object__reloc_code(obj, prog, prog); 6616 if (err) 6617 return err; 6618 6619 return 0; 6620 } 6621 6622 static void 6623 bpf_object__free_relocs(struct bpf_object *obj) 6624 { 6625 struct bpf_program *prog; 6626 int i; 6627 6628 /* free up relocation descriptors */ 6629 for (i = 0; i < obj->nr_programs; i++) { 6630 prog = &obj->programs[i]; 6631 zfree(&prog->reloc_desc); 6632 prog->nr_reloc = 0; 6633 } 6634 } 6635 6636 static int cmp_relocs(const void *_a, const void *_b) 6637 { 6638 const struct reloc_desc *a = _a; 6639 const struct reloc_desc *b = _b; 6640 6641 if (a->insn_idx != b->insn_idx) 6642 return a->insn_idx < b->insn_idx ? -1 : 1; 6643 6644 /* no two relocations should have the same insn_idx, but ... */ 6645 if (a->type != b->type) 6646 return a->type < b->type ? -1 : 1; 6647 6648 return 0; 6649 } 6650 6651 static void bpf_object__sort_relos(struct bpf_object *obj) 6652 { 6653 int i; 6654 6655 for (i = 0; i < obj->nr_programs; i++) { 6656 struct bpf_program *p = &obj->programs[i]; 6657 6658 if (!p->nr_reloc) 6659 continue; 6660 6661 qsort(p->reloc_desc, p->nr_reloc, sizeof(*p->reloc_desc), cmp_relocs); 6662 } 6663 } 6664 6665 static int bpf_prog_assign_exc_cb(struct bpf_object *obj, struct bpf_program *prog) 6666 { 6667 const char *str = "exception_callback:"; 6668 size_t pfx_len = strlen(str); 6669 int i, j, n; 6670 6671 if (!obj->btf || !kernel_supports(obj, FEAT_BTF_DECL_TAG)) 6672 return 0; 6673 6674 n = btf__type_cnt(obj->btf); 6675 for (i = 1; i < n; i++) { 6676 const char *name; 6677 struct btf_type *t; 6678 6679 t = btf_type_by_id(obj->btf, i); 6680 if (!btf_is_decl_tag(t) || btf_decl_tag(t)->component_idx != -1) 6681 continue; 6682 6683 name = btf__str_by_offset(obj->btf, t->name_off); 6684 if (strncmp(name, str, pfx_len) != 0) 6685 continue; 6686 6687 t = btf_type_by_id(obj->btf, t->type); 6688 if (!btf_is_func(t) || btf_func_linkage(t) != BTF_FUNC_GLOBAL) { 6689 pr_warn("prog '%s': exception_callback:<value> decl tag not applied to the main program\n", 6690 prog->name); 6691 return -EINVAL; 6692 } 6693 if (strcmp(prog->name, btf__str_by_offset(obj->btf, t->name_off)) != 0) 6694 continue; 6695 /* Multiple callbacks are specified for the same prog, 6696 * the verifier will eventually return an error for this 6697 * case, hence simply skip appending a subprog. 6698 */ 6699 if (prog->exception_cb_idx >= 0) { 6700 prog->exception_cb_idx = -1; 6701 break; 6702 } 6703 6704 name += pfx_len; 6705 if (str_is_empty(name)) { 6706 pr_warn("prog '%s': exception_callback:<value> decl tag contains empty value\n", 6707 prog->name); 6708 return -EINVAL; 6709 } 6710 6711 for (j = 0; j < obj->nr_programs; j++) { 6712 struct bpf_program *subprog = &obj->programs[j]; 6713 6714 if (!prog_is_subprog(obj, subprog)) 6715 continue; 6716 if (strcmp(name, subprog->name) != 0) 6717 continue; 6718 /* Enforce non-hidden, as from verifier point of 6719 * view it expects global functions, whereas the 6720 * mark_btf_static fixes up linkage as static. 6721 */ 6722 if (!subprog->sym_global || subprog->mark_btf_static) { 6723 pr_warn("prog '%s': exception callback %s must be a global non-hidden function\n", 6724 prog->name, subprog->name); 6725 return -EINVAL; 6726 } 6727 /* Let's see if we already saw a static exception callback with the same name */ 6728 if (prog->exception_cb_idx >= 0) { 6729 pr_warn("prog '%s': multiple subprogs with same name as exception callback '%s'\n", 6730 prog->name, subprog->name); 6731 return -EINVAL; 6732 } 6733 prog->exception_cb_idx = j; 6734 break; 6735 } 6736 6737 if (prog->exception_cb_idx >= 0) 6738 continue; 6739 6740 pr_warn("prog '%s': cannot find exception callback '%s'\n", prog->name, name); 6741 return -ENOENT; 6742 } 6743 6744 return 0; 6745 } 6746 6747 static struct { 6748 enum bpf_prog_type prog_type; 6749 const char *ctx_name; 6750 } global_ctx_map[] = { 6751 { BPF_PROG_TYPE_CGROUP_DEVICE, "bpf_cgroup_dev_ctx" }, 6752 { BPF_PROG_TYPE_CGROUP_SKB, "__sk_buff" }, 6753 { BPF_PROG_TYPE_CGROUP_SOCK, "bpf_sock" }, 6754 { BPF_PROG_TYPE_CGROUP_SOCK_ADDR, "bpf_sock_addr" }, 6755 { BPF_PROG_TYPE_CGROUP_SOCKOPT, "bpf_sockopt" }, 6756 { BPF_PROG_TYPE_CGROUP_SYSCTL, "bpf_sysctl" }, 6757 { BPF_PROG_TYPE_FLOW_DISSECTOR, "__sk_buff" }, 6758 { BPF_PROG_TYPE_KPROBE, "bpf_user_pt_regs_t" }, 6759 { BPF_PROG_TYPE_LWT_IN, "__sk_buff" }, 6760 { BPF_PROG_TYPE_LWT_OUT, "__sk_buff" }, 6761 { BPF_PROG_TYPE_LWT_SEG6LOCAL, "__sk_buff" }, 6762 { BPF_PROG_TYPE_LWT_XMIT, "__sk_buff" }, 6763 { BPF_PROG_TYPE_NETFILTER, "bpf_nf_ctx" }, 6764 { BPF_PROG_TYPE_PERF_EVENT, "bpf_perf_event_data" }, 6765 { BPF_PROG_TYPE_RAW_TRACEPOINT, "bpf_raw_tracepoint_args" }, 6766 { BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, "bpf_raw_tracepoint_args" }, 6767 { BPF_PROG_TYPE_SCHED_ACT, "__sk_buff" }, 6768 { BPF_PROG_TYPE_SCHED_CLS, "__sk_buff" }, 6769 { BPF_PROG_TYPE_SK_LOOKUP, "bpf_sk_lookup" }, 6770 { BPF_PROG_TYPE_SK_MSG, "sk_msg_md" }, 6771 { BPF_PROG_TYPE_SK_REUSEPORT, "sk_reuseport_md" }, 6772 { BPF_PROG_TYPE_SK_SKB, "__sk_buff" }, 6773 { BPF_PROG_TYPE_SOCK_OPS, "bpf_sock_ops" }, 6774 { BPF_PROG_TYPE_SOCKET_FILTER, "__sk_buff" }, 6775 { BPF_PROG_TYPE_XDP, "xdp_md" }, 6776 /* all other program types don't have "named" context structs */ 6777 }; 6778 6779 /* forward declarations for arch-specific underlying types of bpf_user_pt_regs_t typedef, 6780 * for below __builtin_types_compatible_p() checks; 6781 * with this approach we don't need any extra arch-specific #ifdef guards 6782 */ 6783 struct pt_regs; 6784 struct user_pt_regs; 6785 struct user_regs_struct; 6786 6787 static bool need_func_arg_type_fixup(const struct btf *btf, const struct bpf_program *prog, 6788 const char *subprog_name, int arg_idx, 6789 int arg_type_id, const char *ctx_name) 6790 { 6791 const struct btf_type *t; 6792 const char *tname; 6793 6794 /* check if existing parameter already matches verifier expectations */ 6795 t = skip_mods_and_typedefs(btf, arg_type_id, NULL); 6796 if (!btf_is_ptr(t)) 6797 goto out_warn; 6798 6799 /* typedef bpf_user_pt_regs_t is a special PITA case, valid for kprobe 6800 * and perf_event programs, so check this case early on and forget 6801 * about it for subsequent checks 6802 */ 6803 while (btf_is_mod(t)) 6804 t = btf__type_by_id(btf, t->type); 6805 if (btf_is_typedef(t) && 6806 (prog->type == BPF_PROG_TYPE_KPROBE || prog->type == BPF_PROG_TYPE_PERF_EVENT)) { 6807 tname = btf__str_by_offset(btf, t->name_off) ?: "<anon>"; 6808 if (strcmp(tname, "bpf_user_pt_regs_t") == 0) 6809 return false; /* canonical type for kprobe/perf_event */ 6810 } 6811 6812 /* now we can ignore typedefs moving forward */ 6813 t = skip_mods_and_typedefs(btf, t->type, NULL); 6814 6815 /* if it's `void *`, definitely fix up BTF info */ 6816 if (btf_is_void(t)) 6817 return true; 6818 6819 /* if it's already proper canonical type, no need to fix up */ 6820 tname = btf__str_by_offset(btf, t->name_off) ?: "<anon>"; 6821 if (btf_is_struct(t) && strcmp(tname, ctx_name) == 0) 6822 return false; 6823 6824 /* special cases */ 6825 switch (prog->type) { 6826 case BPF_PROG_TYPE_KPROBE: 6827 /* `struct pt_regs *` is expected, but we need to fix up */ 6828 if (btf_is_struct(t) && strcmp(tname, "pt_regs") == 0) 6829 return true; 6830 break; 6831 case BPF_PROG_TYPE_PERF_EVENT: 6832 if (__builtin_types_compatible_p(bpf_user_pt_regs_t, struct pt_regs) && 6833 btf_is_struct(t) && strcmp(tname, "pt_regs") == 0) 6834 return true; 6835 if (__builtin_types_compatible_p(bpf_user_pt_regs_t, struct user_pt_regs) && 6836 btf_is_struct(t) && strcmp(tname, "user_pt_regs") == 0) 6837 return true; 6838 if (__builtin_types_compatible_p(bpf_user_pt_regs_t, struct user_regs_struct) && 6839 btf_is_struct(t) && strcmp(tname, "user_regs_struct") == 0) 6840 return true; 6841 break; 6842 case BPF_PROG_TYPE_RAW_TRACEPOINT: 6843 case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE: 6844 /* allow u64* as ctx */ 6845 if (btf_is_int(t) && t->size == 8) 6846 return true; 6847 break; 6848 default: 6849 break; 6850 } 6851 6852 out_warn: 6853 pr_warn("prog '%s': subprog '%s' arg#%d is expected to be of `struct %s *` type\n", 6854 prog->name, subprog_name, arg_idx, ctx_name); 6855 return false; 6856 } 6857 6858 static int clone_func_btf_info(struct btf *btf, int orig_fn_id, struct bpf_program *prog) 6859 { 6860 int fn_id, fn_proto_id, ret_type_id, orig_proto_id; 6861 int i, err, arg_cnt, fn_name_off, linkage; 6862 struct btf_type *fn_t, *fn_proto_t, *t; 6863 struct btf_param *p; 6864 6865 /* caller already validated FUNC -> FUNC_PROTO validity */ 6866 fn_t = btf_type_by_id(btf, orig_fn_id); 6867 fn_proto_t = btf_type_by_id(btf, fn_t->type); 6868 6869 /* Note that each btf__add_xxx() operation invalidates 6870 * all btf_type and string pointers, so we need to be 6871 * very careful when cloning BTF types. BTF type 6872 * pointers have to be always refetched. And to avoid 6873 * problems with invalidated string pointers, we 6874 * add empty strings initially, then just fix up 6875 * name_off offsets in place. Offsets are stable for 6876 * existing strings, so that works out. 6877 */ 6878 fn_name_off = fn_t->name_off; /* we are about to invalidate fn_t */ 6879 linkage = btf_func_linkage(fn_t); 6880 orig_proto_id = fn_t->type; /* original FUNC_PROTO ID */ 6881 ret_type_id = fn_proto_t->type; /* fn_proto_t will be invalidated */ 6882 arg_cnt = btf_vlen(fn_proto_t); 6883 6884 /* clone FUNC_PROTO and its params */ 6885 fn_proto_id = btf__add_func_proto(btf, ret_type_id); 6886 if (fn_proto_id < 0) 6887 return -EINVAL; 6888 6889 for (i = 0; i < arg_cnt; i++) { 6890 int name_off; 6891 6892 /* copy original parameter data */ 6893 t = btf_type_by_id(btf, orig_proto_id); 6894 p = &btf_params(t)[i]; 6895 name_off = p->name_off; 6896 6897 err = btf__add_func_param(btf, "", p->type); 6898 if (err) 6899 return err; 6900 6901 fn_proto_t = btf_type_by_id(btf, fn_proto_id); 6902 p = &btf_params(fn_proto_t)[i]; 6903 p->name_off = name_off; /* use remembered str offset */ 6904 } 6905 6906 /* clone FUNC now, btf__add_func() enforces non-empty name, so use 6907 * entry program's name as a placeholder, which we replace immediately 6908 * with original name_off 6909 */ 6910 fn_id = btf__add_func(btf, prog->name, linkage, fn_proto_id); 6911 if (fn_id < 0) 6912 return -EINVAL; 6913 6914 fn_t = btf_type_by_id(btf, fn_id); 6915 fn_t->name_off = fn_name_off; /* reuse original string */ 6916 6917 return fn_id; 6918 } 6919 6920 /* Check if main program or global subprog's function prototype has `arg:ctx` 6921 * argument tags, and, if necessary, substitute correct type to match what BPF 6922 * verifier would expect, taking into account specific program type. This 6923 * allows to support __arg_ctx tag transparently on old kernels that don't yet 6924 * have a native support for it in the verifier, making user's life much 6925 * easier. 6926 */ 6927 static int bpf_program_fixup_func_info(struct bpf_object *obj, struct bpf_program *prog) 6928 { 6929 const char *ctx_name = NULL, *ctx_tag = "arg:ctx", *fn_name; 6930 struct bpf_func_info_min *func_rec; 6931 struct btf_type *fn_t, *fn_proto_t; 6932 struct btf *btf = obj->btf; 6933 const struct btf_type *t; 6934 struct btf_param *p; 6935 int ptr_id = 0, struct_id, tag_id, orig_fn_id; 6936 int i, n, arg_idx, arg_cnt, err, rec_idx; 6937 int *orig_ids; 6938 6939 /* no .BTF.ext, no problem */ 6940 if (!obj->btf_ext || !prog->func_info) 6941 return 0; 6942 6943 /* don't do any fix ups if kernel natively supports __arg_ctx */ 6944 if (kernel_supports(obj, FEAT_ARG_CTX_TAG)) 6945 return 0; 6946 6947 /* some BPF program types just don't have named context structs, so 6948 * this fallback mechanism doesn't work for them 6949 */ 6950 for (i = 0; i < ARRAY_SIZE(global_ctx_map); i++) { 6951 if (global_ctx_map[i].prog_type != prog->type) 6952 continue; 6953 ctx_name = global_ctx_map[i].ctx_name; 6954 break; 6955 } 6956 if (!ctx_name) 6957 return 0; 6958 6959 /* remember original func BTF IDs to detect if we already cloned them */ 6960 orig_ids = calloc(prog->func_info_cnt, sizeof(*orig_ids)); 6961 if (!orig_ids) 6962 return -ENOMEM; 6963 for (i = 0; i < prog->func_info_cnt; i++) { 6964 func_rec = prog->func_info + prog->func_info_rec_size * i; 6965 orig_ids[i] = func_rec->type_id; 6966 } 6967 6968 /* go through each DECL_TAG with "arg:ctx" and see if it points to one 6969 * of our subprogs; if yes and subprog is global and needs adjustment, 6970 * clone and adjust FUNC -> FUNC_PROTO combo 6971 */ 6972 for (i = 1, n = btf__type_cnt(btf); i < n; i++) { 6973 /* only DECL_TAG with "arg:ctx" value are interesting */ 6974 t = btf__type_by_id(btf, i); 6975 if (!btf_is_decl_tag(t)) 6976 continue; 6977 if (strcmp(btf__str_by_offset(btf, t->name_off), ctx_tag) != 0) 6978 continue; 6979 6980 /* only global funcs need adjustment, if at all */ 6981 orig_fn_id = t->type; 6982 fn_t = btf_type_by_id(btf, orig_fn_id); 6983 if (!btf_is_func(fn_t) || btf_func_linkage(fn_t) != BTF_FUNC_GLOBAL) 6984 continue; 6985 6986 /* sanity check FUNC -> FUNC_PROTO chain, just in case */ 6987 fn_proto_t = btf_type_by_id(btf, fn_t->type); 6988 if (!fn_proto_t || !btf_is_func_proto(fn_proto_t)) 6989 continue; 6990 6991 /* find corresponding func_info record */ 6992 func_rec = NULL; 6993 for (rec_idx = 0; rec_idx < prog->func_info_cnt; rec_idx++) { 6994 if (orig_ids[rec_idx] == t->type) { 6995 func_rec = prog->func_info + prog->func_info_rec_size * rec_idx; 6996 break; 6997 } 6998 } 6999 /* current main program doesn't call into this subprog */ 7000 if (!func_rec) 7001 continue; 7002 7003 /* some more sanity checking of DECL_TAG */ 7004 arg_cnt = btf_vlen(fn_proto_t); 7005 arg_idx = btf_decl_tag(t)->component_idx; 7006 if (arg_idx < 0 || arg_idx >= arg_cnt) 7007 continue; 7008 7009 /* check if we should fix up argument type */ 7010 p = &btf_params(fn_proto_t)[arg_idx]; 7011 fn_name = btf__str_by_offset(btf, fn_t->name_off) ?: "<anon>"; 7012 if (!need_func_arg_type_fixup(btf, prog, fn_name, arg_idx, p->type, ctx_name)) 7013 continue; 7014 7015 /* clone fn/fn_proto, unless we already did it for another arg */ 7016 if (func_rec->type_id == orig_fn_id) { 7017 int fn_id; 7018 7019 fn_id = clone_func_btf_info(btf, orig_fn_id, prog); 7020 if (fn_id < 0) { 7021 err = fn_id; 7022 goto err_out; 7023 } 7024 7025 /* point func_info record to a cloned FUNC type */ 7026 func_rec->type_id = fn_id; 7027 } 7028 7029 /* create PTR -> STRUCT type chain to mark PTR_TO_CTX argument; 7030 * we do it just once per main BPF program, as all global 7031 * funcs share the same program type, so need only PTR -> 7032 * STRUCT type chain 7033 */ 7034 if (ptr_id == 0) { 7035 struct_id = btf__add_struct(btf, ctx_name, 0); 7036 ptr_id = btf__add_ptr(btf, struct_id); 7037 if (ptr_id < 0 || struct_id < 0) { 7038 err = -EINVAL; 7039 goto err_out; 7040 } 7041 } 7042 7043 /* for completeness, clone DECL_TAG and point it to cloned param */ 7044 tag_id = btf__add_decl_tag(btf, ctx_tag, func_rec->type_id, arg_idx); 7045 if (tag_id < 0) { 7046 err = -EINVAL; 7047 goto err_out; 7048 } 7049 7050 /* all the BTF manipulations invalidated pointers, refetch them */ 7051 fn_t = btf_type_by_id(btf, func_rec->type_id); 7052 fn_proto_t = btf_type_by_id(btf, fn_t->type); 7053 7054 /* fix up type ID pointed to by param */ 7055 p = &btf_params(fn_proto_t)[arg_idx]; 7056 p->type = ptr_id; 7057 } 7058 7059 free(orig_ids); 7060 return 0; 7061 err_out: 7062 free(orig_ids); 7063 return err; 7064 } 7065 7066 static int bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) 7067 { 7068 struct bpf_program *prog; 7069 size_t i, j; 7070 int err; 7071 7072 if (obj->btf_ext) { 7073 err = bpf_object__relocate_core(obj, targ_btf_path); 7074 if (err) { 7075 pr_warn("failed to perform CO-RE relocations: %s\n", 7076 errstr(err)); 7077 return err; 7078 } 7079 bpf_object__sort_relos(obj); 7080 } 7081 7082 /* Before relocating calls pre-process relocations and mark 7083 * few ld_imm64 instructions that points to subprogs. 7084 * Otherwise bpf_object__reloc_code() later would have to consider 7085 * all ld_imm64 insns as relocation candidates. That would 7086 * reduce relocation speed, since amount of find_prog_insn_relo() 7087 * would increase and most of them will fail to find a relo. 7088 */ 7089 for (i = 0; i < obj->nr_programs; i++) { 7090 prog = &obj->programs[i]; 7091 for (j = 0; j < prog->nr_reloc; j++) { 7092 struct reloc_desc *relo = &prog->reloc_desc[j]; 7093 struct bpf_insn *insn = &prog->insns[relo->insn_idx]; 7094 7095 /* mark the insn, so it's recognized by insn_is_pseudo_func() */ 7096 if (relo->type == RELO_SUBPROG_ADDR) 7097 insn[0].src_reg = BPF_PSEUDO_FUNC; 7098 } 7099 } 7100 7101 /* relocate subprogram calls and append used subprograms to main 7102 * programs; each copy of subprogram code needs to be relocated 7103 * differently for each main program, because its code location might 7104 * have changed. 7105 * Append subprog relos to main programs to allow data relos to be 7106 * processed after text is completely relocated. 7107 */ 7108 for (i = 0; i < obj->nr_programs; i++) { 7109 prog = &obj->programs[i]; 7110 /* sub-program's sub-calls are relocated within the context of 7111 * its main program only 7112 */ 7113 if (prog_is_subprog(obj, prog)) 7114 continue; 7115 if (!prog->autoload) 7116 continue; 7117 7118 err = bpf_object__relocate_calls(obj, prog); 7119 if (err) { 7120 pr_warn("prog '%s': failed to relocate calls: %s\n", 7121 prog->name, errstr(err)); 7122 return err; 7123 } 7124 7125 err = bpf_prog_assign_exc_cb(obj, prog); 7126 if (err) 7127 return err; 7128 /* Now, also append exception callback if it has not been done already. */ 7129 if (prog->exception_cb_idx >= 0) { 7130 struct bpf_program *subprog = &obj->programs[prog->exception_cb_idx]; 7131 7132 /* Calling exception callback directly is disallowed, which the 7133 * verifier will reject later. In case it was processed already, 7134 * we can skip this step, otherwise for all other valid cases we 7135 * have to append exception callback now. 7136 */ 7137 if (subprog->sub_insn_off == 0) { 7138 err = bpf_object__append_subprog_code(obj, prog, subprog); 7139 if (err) 7140 return err; 7141 err = bpf_object__reloc_code(obj, prog, subprog); 7142 if (err) 7143 return err; 7144 } 7145 } 7146 } 7147 for (i = 0; i < obj->nr_programs; i++) { 7148 prog = &obj->programs[i]; 7149 if (prog_is_subprog(obj, prog)) 7150 continue; 7151 if (!prog->autoload) 7152 continue; 7153 7154 /* Process data relos for main programs */ 7155 err = bpf_object__relocate_data(obj, prog); 7156 if (err) { 7157 pr_warn("prog '%s': failed to relocate data references: %s\n", 7158 prog->name, errstr(err)); 7159 return err; 7160 } 7161 7162 /* Fix up .BTF.ext information, if necessary */ 7163 err = bpf_program_fixup_func_info(obj, prog); 7164 if (err) { 7165 pr_warn("prog '%s': failed to perform .BTF.ext fix ups: %s\n", 7166 prog->name, errstr(err)); 7167 return err; 7168 } 7169 } 7170 7171 return 0; 7172 } 7173 7174 static int bpf_object__collect_st_ops_relos(struct bpf_object *obj, 7175 Elf64_Shdr *shdr, Elf_Data *data); 7176 7177 static int bpf_object__collect_map_relos(struct bpf_object *obj, 7178 Elf64_Shdr *shdr, Elf_Data *data) 7179 { 7180 const int bpf_ptr_sz = 8, host_ptr_sz = sizeof(void *); 7181 int i, j, nrels, new_sz; 7182 const struct btf_var_secinfo *vi = NULL; 7183 const struct btf_type *sec, *var, *def; 7184 struct bpf_map *map = NULL, *targ_map = NULL; 7185 struct bpf_program *targ_prog = NULL; 7186 bool is_prog_array, is_map_in_map; 7187 const struct btf_member *member; 7188 const char *name, *mname, *type; 7189 unsigned int moff; 7190 Elf64_Sym *sym; 7191 Elf64_Rel *rel; 7192 void *tmp; 7193 7194 if (!obj->efile.btf_maps_sec_btf_id || !obj->btf) 7195 return -EINVAL; 7196 sec = btf__type_by_id(obj->btf, obj->efile.btf_maps_sec_btf_id); 7197 if (!sec) 7198 return -EINVAL; 7199 7200 nrels = shdr->sh_size / shdr->sh_entsize; 7201 for (i = 0; i < nrels; i++) { 7202 rel = elf_rel_by_idx(data, i); 7203 if (!rel) { 7204 pr_warn(".maps relo #%d: failed to get ELF relo\n", i); 7205 return -LIBBPF_ERRNO__FORMAT; 7206 } 7207 7208 sym = elf_sym_by_idx(obj, ELF64_R_SYM(rel->r_info)); 7209 if (!sym) { 7210 pr_warn(".maps relo #%d: symbol %zx not found\n", 7211 i, (size_t)ELF64_R_SYM(rel->r_info)); 7212 return -LIBBPF_ERRNO__FORMAT; 7213 } 7214 name = elf_sym_str(obj, sym->st_name) ?: "<?>"; 7215 7216 pr_debug(".maps relo #%d: for %zd value %zd rel->r_offset %zu name %d ('%s')\n", 7217 i, (ssize_t)(rel->r_info >> 32), (size_t)sym->st_value, 7218 (size_t)rel->r_offset, sym->st_name, name); 7219 7220 for (j = 0; j < obj->nr_maps; j++) { 7221 map = &obj->maps[j]; 7222 if (map->sec_idx != obj->efile.btf_maps_shndx) 7223 continue; 7224 7225 vi = btf_var_secinfos(sec) + map->btf_var_idx; 7226 if (vi->offset <= rel->r_offset && 7227 rel->r_offset + bpf_ptr_sz <= vi->offset + vi->size) 7228 break; 7229 } 7230 if (j == obj->nr_maps) { 7231 pr_warn(".maps relo #%d: cannot find map '%s' at rel->r_offset %zu\n", 7232 i, name, (size_t)rel->r_offset); 7233 return -EINVAL; 7234 } 7235 7236 is_map_in_map = bpf_map_type__is_map_in_map(map->def.type); 7237 is_prog_array = map->def.type == BPF_MAP_TYPE_PROG_ARRAY; 7238 type = is_map_in_map ? "map" : "prog"; 7239 if (is_map_in_map) { 7240 if (sym->st_shndx != obj->efile.btf_maps_shndx) { 7241 pr_warn(".maps relo #%d: '%s' isn't a BTF-defined map\n", 7242 i, name); 7243 return -LIBBPF_ERRNO__RELOC; 7244 } 7245 if (map->def.type == BPF_MAP_TYPE_HASH_OF_MAPS && 7246 map->def.key_size != sizeof(int)) { 7247 pr_warn(".maps relo #%d: hash-of-maps '%s' should have key size %zu.\n", 7248 i, map->name, sizeof(int)); 7249 return -EINVAL; 7250 } 7251 targ_map = bpf_object__find_map_by_name(obj, name); 7252 if (!targ_map) { 7253 pr_warn(".maps relo #%d: '%s' isn't a valid map reference\n", 7254 i, name); 7255 return -ESRCH; 7256 } 7257 } else if (is_prog_array) { 7258 targ_prog = bpf_object__find_program_by_name(obj, name); 7259 if (!targ_prog) { 7260 pr_warn(".maps relo #%d: '%s' isn't a valid program reference\n", 7261 i, name); 7262 return -ESRCH; 7263 } 7264 if (targ_prog->sec_idx != sym->st_shndx || 7265 targ_prog->sec_insn_off * 8 != sym->st_value || 7266 prog_is_subprog(obj, targ_prog)) { 7267 pr_warn(".maps relo #%d: '%s' isn't an entry-point program\n", 7268 i, name); 7269 return -LIBBPF_ERRNO__RELOC; 7270 } 7271 } else { 7272 return -EINVAL; 7273 } 7274 7275 var = btf__type_by_id(obj->btf, vi->type); 7276 def = skip_mods_and_typedefs(obj->btf, var->type, NULL); 7277 if (btf_vlen(def) == 0) 7278 return -EINVAL; 7279 member = btf_members(def) + btf_vlen(def) - 1; 7280 mname = btf__name_by_offset(obj->btf, member->name_off); 7281 if (strcmp(mname, "values")) 7282 return -EINVAL; 7283 7284 moff = btf_member_bit_offset(def, btf_vlen(def) - 1) / 8; 7285 if (rel->r_offset - vi->offset < moff) 7286 return -EINVAL; 7287 7288 moff = rel->r_offset - vi->offset - moff; 7289 /* here we use BPF pointer size, which is always 64 bit, as we 7290 * are parsing ELF that was built for BPF target 7291 */ 7292 if (moff % bpf_ptr_sz) 7293 return -EINVAL; 7294 moff /= bpf_ptr_sz; 7295 if (moff >= map->init_slots_sz) { 7296 new_sz = moff + 1; 7297 tmp = libbpf_reallocarray(map->init_slots, new_sz, host_ptr_sz); 7298 if (!tmp) 7299 return -ENOMEM; 7300 map->init_slots = tmp; 7301 memset(map->init_slots + map->init_slots_sz, 0, 7302 (new_sz - map->init_slots_sz) * host_ptr_sz); 7303 map->init_slots_sz = new_sz; 7304 } 7305 map->init_slots[moff] = is_map_in_map ? (void *)targ_map : (void *)targ_prog; 7306 7307 pr_debug(".maps relo #%d: map '%s' slot [%d] points to %s '%s'\n", 7308 i, map->name, moff, type, name); 7309 } 7310 7311 return 0; 7312 } 7313 7314 static int bpf_object__collect_relos(struct bpf_object *obj) 7315 { 7316 int i, err; 7317 7318 for (i = 0; i < obj->efile.sec_cnt; i++) { 7319 struct elf_sec_desc *sec_desc = &obj->efile.secs[i]; 7320 Elf64_Shdr *shdr; 7321 Elf_Data *data; 7322 int idx; 7323 7324 if (sec_desc->sec_type != SEC_RELO) 7325 continue; 7326 7327 shdr = sec_desc->shdr; 7328 data = sec_desc->data; 7329 idx = shdr->sh_info; 7330 7331 if (shdr->sh_type != SHT_REL || idx < 0 || idx >= obj->efile.sec_cnt) { 7332 pr_warn("internal error at %d\n", __LINE__); 7333 return -LIBBPF_ERRNO__INTERNAL; 7334 } 7335 7336 if (obj->efile.secs[idx].sec_type == SEC_ST_OPS) 7337 err = bpf_object__collect_st_ops_relos(obj, shdr, data); 7338 else if (idx == obj->efile.btf_maps_shndx) 7339 err = bpf_object__collect_map_relos(obj, shdr, data); 7340 else 7341 err = bpf_object__collect_prog_relos(obj, shdr, data); 7342 if (err) 7343 return err; 7344 } 7345 7346 bpf_object__sort_relos(obj); 7347 return 0; 7348 } 7349 7350 static bool insn_is_helper_call(struct bpf_insn *insn, enum bpf_func_id *func_id) 7351 { 7352 if (BPF_CLASS(insn->code) == BPF_JMP && 7353 BPF_OP(insn->code) == BPF_CALL && 7354 BPF_SRC(insn->code) == BPF_K && 7355 insn->src_reg == 0 && 7356 insn->dst_reg == 0) { 7357 *func_id = insn->imm; 7358 return true; 7359 } 7360 return false; 7361 } 7362 7363 static int bpf_object__sanitize_prog(struct bpf_object *obj, struct bpf_program *prog) 7364 { 7365 struct bpf_insn *insn = prog->insns; 7366 enum bpf_func_id func_id; 7367 int i; 7368 7369 if (obj->gen_loader) 7370 return 0; 7371 7372 for (i = 0; i < prog->insns_cnt; i++, insn++) { 7373 if (!insn_is_helper_call(insn, &func_id)) 7374 continue; 7375 7376 /* on kernels that don't yet support 7377 * bpf_probe_read_{kernel,user}[_str] helpers, fall back 7378 * to bpf_probe_read() which works well for old kernels 7379 */ 7380 switch (func_id) { 7381 case BPF_FUNC_probe_read_kernel: 7382 case BPF_FUNC_probe_read_user: 7383 if (!kernel_supports(obj, FEAT_PROBE_READ_KERN)) 7384 insn->imm = BPF_FUNC_probe_read; 7385 break; 7386 case BPF_FUNC_probe_read_kernel_str: 7387 case BPF_FUNC_probe_read_user_str: 7388 if (!kernel_supports(obj, FEAT_PROBE_READ_KERN)) 7389 insn->imm = BPF_FUNC_probe_read_str; 7390 break; 7391 default: 7392 break; 7393 } 7394 } 7395 return 0; 7396 } 7397 7398 static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attach_name, 7399 int *btf_obj_fd, int *btf_type_id); 7400 7401 /* this is called as prog->sec_def->prog_prepare_load_fn for libbpf-supported sec_defs */ 7402 static int libbpf_prepare_prog_load(struct bpf_program *prog, 7403 struct bpf_prog_load_opts *opts, long cookie) 7404 { 7405 enum sec_def_flags def = cookie; 7406 7407 /* old kernels might not support specifying expected_attach_type */ 7408 if ((def & SEC_EXP_ATTACH_OPT) && !kernel_supports(prog->obj, FEAT_EXP_ATTACH_TYPE)) 7409 opts->expected_attach_type = 0; 7410 7411 if (def & SEC_SLEEPABLE) 7412 opts->prog_flags |= BPF_F_SLEEPABLE; 7413 7414 if (prog->type == BPF_PROG_TYPE_XDP && (def & SEC_XDP_FRAGS)) 7415 opts->prog_flags |= BPF_F_XDP_HAS_FRAGS; 7416 7417 /* special check for usdt to use uprobe_multi link */ 7418 if ((def & SEC_USDT) && kernel_supports(prog->obj, FEAT_UPROBE_MULTI_LINK)) { 7419 /* for BPF_TRACE_UPROBE_MULTI, user might want to query expected_attach_type 7420 * in prog, and expected_attach_type we set in kernel is from opts, so we 7421 * update both. 7422 */ 7423 prog->expected_attach_type = BPF_TRACE_UPROBE_MULTI; 7424 opts->expected_attach_type = BPF_TRACE_UPROBE_MULTI; 7425 } 7426 7427 if ((def & SEC_ATTACH_BTF) && !prog->attach_btf_id) { 7428 int btf_obj_fd = 0, btf_type_id = 0, err; 7429 const char *attach_name; 7430 7431 attach_name = strchr(prog->sec_name, '/'); 7432 if (!attach_name) { 7433 /* if BPF program is annotated with just SEC("fentry") 7434 * (or similar) without declaratively specifying 7435 * target, then it is expected that target will be 7436 * specified with bpf_program__set_attach_target() at 7437 * runtime before BPF object load step. If not, then 7438 * there is nothing to load into the kernel as BPF 7439 * verifier won't be able to validate BPF program 7440 * correctness anyways. 7441 */ 7442 pr_warn("prog '%s': no BTF-based attach target is specified, use bpf_program__set_attach_target()\n", 7443 prog->name); 7444 return -EINVAL; 7445 } 7446 attach_name++; /* skip over / */ 7447 7448 err = libbpf_find_attach_btf_id(prog, attach_name, &btf_obj_fd, &btf_type_id); 7449 if (err) 7450 return err; 7451 7452 /* cache resolved BTF FD and BTF type ID in the prog */ 7453 prog->attach_btf_obj_fd = btf_obj_fd; 7454 prog->attach_btf_id = btf_type_id; 7455 7456 /* but by now libbpf common logic is not utilizing 7457 * prog->atach_btf_obj_fd/prog->attach_btf_id anymore because 7458 * this callback is called after opts were populated by 7459 * libbpf, so this callback has to update opts explicitly here 7460 */ 7461 opts->attach_btf_obj_fd = btf_obj_fd; 7462 opts->attach_btf_id = btf_type_id; 7463 } 7464 return 0; 7465 } 7466 7467 static void fixup_verifier_log(struct bpf_program *prog, char *buf, size_t buf_sz); 7468 7469 static int bpf_object_load_prog(struct bpf_object *obj, struct bpf_program *prog, 7470 struct bpf_insn *insns, int insns_cnt, 7471 const char *license, __u32 kern_version, int *prog_fd) 7472 { 7473 LIBBPF_OPTS(bpf_prog_load_opts, load_attr); 7474 const char *prog_name = NULL; 7475 size_t log_buf_size = 0; 7476 char *log_buf = NULL, *tmp; 7477 bool own_log_buf = true; 7478 __u32 log_level = prog->log_level; 7479 int ret, err; 7480 7481 /* Be more helpful by rejecting programs that can't be validated early 7482 * with more meaningful and actionable error message. 7483 */ 7484 switch (prog->type) { 7485 case BPF_PROG_TYPE_UNSPEC: 7486 /* 7487 * The program type must be set. Most likely we couldn't find a proper 7488 * section definition at load time, and thus we didn't infer the type. 7489 */ 7490 pr_warn("prog '%s': missing BPF prog type, check ELF section name '%s'\n", 7491 prog->name, prog->sec_name); 7492 return -EINVAL; 7493 case BPF_PROG_TYPE_STRUCT_OPS: 7494 if (prog->attach_btf_id == 0) { 7495 pr_warn("prog '%s': SEC(\"struct_ops\") program isn't referenced anywhere, did you forget to use it?\n", 7496 prog->name); 7497 return -EINVAL; 7498 } 7499 break; 7500 default: 7501 break; 7502 } 7503 7504 if (!insns || !insns_cnt) 7505 return -EINVAL; 7506 7507 if (kernel_supports(obj, FEAT_PROG_NAME)) 7508 prog_name = prog->name; 7509 load_attr.attach_prog_fd = prog->attach_prog_fd; 7510 load_attr.attach_btf_obj_fd = prog->attach_btf_obj_fd; 7511 load_attr.attach_btf_id = prog->attach_btf_id; 7512 load_attr.kern_version = kern_version; 7513 load_attr.prog_ifindex = prog->prog_ifindex; 7514 load_attr.expected_attach_type = prog->expected_attach_type; 7515 7516 /* specify func_info/line_info only if kernel supports them */ 7517 if (obj->btf && btf__fd(obj->btf) >= 0 && kernel_supports(obj, FEAT_BTF_FUNC)) { 7518 load_attr.prog_btf_fd = btf__fd(obj->btf); 7519 load_attr.func_info = prog->func_info; 7520 load_attr.func_info_rec_size = prog->func_info_rec_size; 7521 load_attr.func_info_cnt = prog->func_info_cnt; 7522 load_attr.line_info = prog->line_info; 7523 load_attr.line_info_rec_size = prog->line_info_rec_size; 7524 load_attr.line_info_cnt = prog->line_info_cnt; 7525 } 7526 load_attr.log_level = log_level; 7527 load_attr.prog_flags = prog->prog_flags; 7528 load_attr.fd_array = obj->fd_array; 7529 7530 load_attr.token_fd = obj->token_fd; 7531 if (obj->token_fd) 7532 load_attr.prog_flags |= BPF_F_TOKEN_FD; 7533 7534 /* adjust load_attr if sec_def provides custom preload callback */ 7535 if (prog->sec_def && prog->sec_def->prog_prepare_load_fn) { 7536 err = prog->sec_def->prog_prepare_load_fn(prog, &load_attr, prog->sec_def->cookie); 7537 if (err < 0) { 7538 pr_warn("prog '%s': failed to prepare load attributes: %s\n", 7539 prog->name, errstr(err)); 7540 return err; 7541 } 7542 insns = prog->insns; 7543 insns_cnt = prog->insns_cnt; 7544 } 7545 7546 if (obj->gen_loader) { 7547 bpf_gen__prog_load(obj->gen_loader, prog->type, prog->name, 7548 license, insns, insns_cnt, &load_attr, 7549 prog - obj->programs); 7550 *prog_fd = -1; 7551 return 0; 7552 } 7553 7554 retry_load: 7555 /* if log_level is zero, we don't request logs initially even if 7556 * custom log_buf is specified; if the program load fails, then we'll 7557 * bump log_level to 1 and use either custom log_buf or we'll allocate 7558 * our own and retry the load to get details on what failed 7559 */ 7560 if (log_level) { 7561 if (prog->log_buf) { 7562 log_buf = prog->log_buf; 7563 log_buf_size = prog->log_size; 7564 own_log_buf = false; 7565 } else if (obj->log_buf) { 7566 log_buf = obj->log_buf; 7567 log_buf_size = obj->log_size; 7568 own_log_buf = false; 7569 } else { 7570 log_buf_size = max((size_t)BPF_LOG_BUF_SIZE, log_buf_size * 2); 7571 tmp = realloc(log_buf, log_buf_size); 7572 if (!tmp) { 7573 ret = -ENOMEM; 7574 goto out; 7575 } 7576 log_buf = tmp; 7577 log_buf[0] = '\0'; 7578 own_log_buf = true; 7579 } 7580 } 7581 7582 load_attr.log_buf = log_buf; 7583 load_attr.log_size = log_buf_size; 7584 load_attr.log_level = log_level; 7585 7586 ret = bpf_prog_load(prog->type, prog_name, license, insns, insns_cnt, &load_attr); 7587 if (ret >= 0) { 7588 if (log_level && own_log_buf) { 7589 pr_debug("prog '%s': -- BEGIN PROG LOAD LOG --\n%s-- END PROG LOAD LOG --\n", 7590 prog->name, log_buf); 7591 } 7592 7593 if (obj->has_rodata && kernel_supports(obj, FEAT_PROG_BIND_MAP)) { 7594 struct bpf_map *map; 7595 int i; 7596 7597 for (i = 0; i < obj->nr_maps; i++) { 7598 map = &prog->obj->maps[i]; 7599 if (map->libbpf_type != LIBBPF_MAP_RODATA) 7600 continue; 7601 7602 if (bpf_prog_bind_map(ret, map->fd, NULL)) { 7603 pr_warn("prog '%s': failed to bind map '%s': %s\n", 7604 prog->name, map->real_name, errstr(errno)); 7605 /* Don't fail hard if can't bind rodata. */ 7606 } 7607 } 7608 } 7609 7610 *prog_fd = ret; 7611 ret = 0; 7612 goto out; 7613 } 7614 7615 if (log_level == 0) { 7616 log_level = 1; 7617 goto retry_load; 7618 } 7619 /* On ENOSPC, increase log buffer size and retry, unless custom 7620 * log_buf is specified. 7621 * Be careful to not overflow u32, though. Kernel's log buf size limit 7622 * isn't part of UAPI so it can always be bumped to full 4GB. So don't 7623 * multiply by 2 unless we are sure we'll fit within 32 bits. 7624 * Currently, we'll get -EINVAL when we reach (UINT_MAX >> 2). 7625 */ 7626 if (own_log_buf && errno == ENOSPC && log_buf_size <= UINT_MAX / 2) 7627 goto retry_load; 7628 7629 ret = -errno; 7630 7631 /* post-process verifier log to improve error descriptions */ 7632 fixup_verifier_log(prog, log_buf, log_buf_size); 7633 7634 pr_warn("prog '%s': BPF program load failed: %s\n", prog->name, errstr(errno)); 7635 pr_perm_msg(ret); 7636 7637 if (own_log_buf && log_buf && log_buf[0] != '\0') { 7638 pr_warn("prog '%s': -- BEGIN PROG LOAD LOG --\n%s-- END PROG LOAD LOG --\n", 7639 prog->name, log_buf); 7640 } 7641 7642 out: 7643 if (own_log_buf) 7644 free(log_buf); 7645 return ret; 7646 } 7647 7648 static char *find_prev_line(char *buf, char *cur) 7649 { 7650 char *p; 7651 7652 if (cur == buf) /* end of a log buf */ 7653 return NULL; 7654 7655 p = cur - 1; 7656 while (p - 1 >= buf && *(p - 1) != '\n') 7657 p--; 7658 7659 return p; 7660 } 7661 7662 static void patch_log(char *buf, size_t buf_sz, size_t log_sz, 7663 char *orig, size_t orig_sz, const char *patch) 7664 { 7665 /* size of the remaining log content to the right from the to-be-replaced part */ 7666 size_t rem_sz = (buf + log_sz) - (orig + orig_sz); 7667 size_t patch_sz = strlen(patch); 7668 7669 if (patch_sz != orig_sz) { 7670 /* If patch line(s) are longer than original piece of verifier log, 7671 * shift log contents by (patch_sz - orig_sz) bytes to the right 7672 * starting from after to-be-replaced part of the log. 7673 * 7674 * If patch line(s) are shorter than original piece of verifier log, 7675 * shift log contents by (orig_sz - patch_sz) bytes to the left 7676 * starting from after to-be-replaced part of the log 7677 * 7678 * We need to be careful about not overflowing available 7679 * buf_sz capacity. If that's the case, we'll truncate the end 7680 * of the original log, as necessary. 7681 */ 7682 if (patch_sz > orig_sz) { 7683 if (orig + patch_sz >= buf + buf_sz) { 7684 /* patch is big enough to cover remaining space completely */ 7685 patch_sz -= (orig + patch_sz) - (buf + buf_sz) + 1; 7686 rem_sz = 0; 7687 } else if (patch_sz - orig_sz > buf_sz - log_sz) { 7688 /* patch causes part of remaining log to be truncated */ 7689 rem_sz -= (patch_sz - orig_sz) - (buf_sz - log_sz); 7690 } 7691 } 7692 /* shift remaining log to the right by calculated amount */ 7693 memmove(orig + patch_sz, orig + orig_sz, rem_sz); 7694 } 7695 7696 memcpy(orig, patch, patch_sz); 7697 } 7698 7699 static void fixup_log_failed_core_relo(struct bpf_program *prog, 7700 char *buf, size_t buf_sz, size_t log_sz, 7701 char *line1, char *line2, char *line3) 7702 { 7703 /* Expected log for failed and not properly guarded CO-RE relocation: 7704 * line1 -> 123: (85) call unknown#195896080 7705 * line2 -> invalid func unknown#195896080 7706 * line3 -> <anything else or end of buffer> 7707 * 7708 * "123" is the index of the instruction that was poisoned. We extract 7709 * instruction index to find corresponding CO-RE relocation and 7710 * replace this part of the log with more relevant information about 7711 * failed CO-RE relocation. 7712 */ 7713 const struct bpf_core_relo *relo; 7714 struct bpf_core_spec spec; 7715 char patch[512], spec_buf[256]; 7716 int insn_idx, err, spec_len; 7717 7718 if (sscanf(line1, "%d: (%*d) call unknown#195896080\n", &insn_idx) != 1) 7719 return; 7720 7721 relo = find_relo_core(prog, insn_idx); 7722 if (!relo) 7723 return; 7724 7725 err = bpf_core_parse_spec(prog->name, prog->obj->btf, relo, &spec); 7726 if (err) 7727 return; 7728 7729 spec_len = bpf_core_format_spec(spec_buf, sizeof(spec_buf), &spec); 7730 snprintf(patch, sizeof(patch), 7731 "%d: <invalid CO-RE relocation>\n" 7732 "failed to resolve CO-RE relocation %s%s\n", 7733 insn_idx, spec_buf, spec_len >= sizeof(spec_buf) ? "..." : ""); 7734 7735 patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch); 7736 } 7737 7738 static void fixup_log_missing_map_load(struct bpf_program *prog, 7739 char *buf, size_t buf_sz, size_t log_sz, 7740 char *line1, char *line2, char *line3) 7741 { 7742 /* Expected log for failed and not properly guarded map reference: 7743 * line1 -> 123: (85) call unknown#2001000345 7744 * line2 -> invalid func unknown#2001000345 7745 * line3 -> <anything else or end of buffer> 7746 * 7747 * "123" is the index of the instruction that was poisoned. 7748 * "345" in "2001000345" is a map index in obj->maps to fetch map name. 7749 */ 7750 struct bpf_object *obj = prog->obj; 7751 const struct bpf_map *map; 7752 int insn_idx, map_idx; 7753 char patch[128]; 7754 7755 if (sscanf(line1, "%d: (%*d) call unknown#%d\n", &insn_idx, &map_idx) != 2) 7756 return; 7757 7758 map_idx -= POISON_LDIMM64_MAP_BASE; 7759 if (map_idx < 0 || map_idx >= obj->nr_maps) 7760 return; 7761 map = &obj->maps[map_idx]; 7762 7763 snprintf(patch, sizeof(patch), 7764 "%d: <invalid BPF map reference>\n" 7765 "BPF map '%s' is referenced but wasn't created\n", 7766 insn_idx, map->name); 7767 7768 patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch); 7769 } 7770 7771 static void fixup_log_missing_kfunc_call(struct bpf_program *prog, 7772 char *buf, size_t buf_sz, size_t log_sz, 7773 char *line1, char *line2, char *line3) 7774 { 7775 /* Expected log for failed and not properly guarded kfunc call: 7776 * line1 -> 123: (85) call unknown#2002000345 7777 * line2 -> invalid func unknown#2002000345 7778 * line3 -> <anything else or end of buffer> 7779 * 7780 * "123" is the index of the instruction that was poisoned. 7781 * "345" in "2002000345" is an extern index in obj->externs to fetch kfunc name. 7782 */ 7783 struct bpf_object *obj = prog->obj; 7784 const struct extern_desc *ext; 7785 int insn_idx, ext_idx; 7786 char patch[128]; 7787 7788 if (sscanf(line1, "%d: (%*d) call unknown#%d\n", &insn_idx, &ext_idx) != 2) 7789 return; 7790 7791 ext_idx -= POISON_CALL_KFUNC_BASE; 7792 if (ext_idx < 0 || ext_idx >= obj->nr_extern) 7793 return; 7794 ext = &obj->externs[ext_idx]; 7795 7796 snprintf(patch, sizeof(patch), 7797 "%d: <invalid kfunc call>\n" 7798 "kfunc '%s' is referenced but wasn't resolved\n", 7799 insn_idx, ext->name); 7800 7801 patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch); 7802 } 7803 7804 static void fixup_verifier_log(struct bpf_program *prog, char *buf, size_t buf_sz) 7805 { 7806 /* look for familiar error patterns in last N lines of the log */ 7807 const size_t max_last_line_cnt = 10; 7808 char *prev_line, *cur_line, *next_line; 7809 size_t log_sz; 7810 int i; 7811 7812 if (!buf) 7813 return; 7814 7815 log_sz = strlen(buf) + 1; 7816 next_line = buf + log_sz - 1; 7817 7818 for (i = 0; i < max_last_line_cnt; i++, next_line = cur_line) { 7819 cur_line = find_prev_line(buf, next_line); 7820 if (!cur_line) 7821 return; 7822 7823 if (str_has_pfx(cur_line, "invalid func unknown#195896080\n")) { 7824 prev_line = find_prev_line(buf, cur_line); 7825 if (!prev_line) 7826 continue; 7827 7828 /* failed CO-RE relocation case */ 7829 fixup_log_failed_core_relo(prog, buf, buf_sz, log_sz, 7830 prev_line, cur_line, next_line); 7831 return; 7832 } else if (str_has_pfx(cur_line, "invalid func unknown#"POISON_LDIMM64_MAP_PFX)) { 7833 prev_line = find_prev_line(buf, cur_line); 7834 if (!prev_line) 7835 continue; 7836 7837 /* reference to uncreated BPF map */ 7838 fixup_log_missing_map_load(prog, buf, buf_sz, log_sz, 7839 prev_line, cur_line, next_line); 7840 return; 7841 } else if (str_has_pfx(cur_line, "invalid func unknown#"POISON_CALL_KFUNC_PFX)) { 7842 prev_line = find_prev_line(buf, cur_line); 7843 if (!prev_line) 7844 continue; 7845 7846 /* reference to unresolved kfunc */ 7847 fixup_log_missing_kfunc_call(prog, buf, buf_sz, log_sz, 7848 prev_line, cur_line, next_line); 7849 return; 7850 } 7851 } 7852 } 7853 7854 static int bpf_program_record_relos(struct bpf_program *prog) 7855 { 7856 struct bpf_object *obj = prog->obj; 7857 int i; 7858 7859 for (i = 0; i < prog->nr_reloc; i++) { 7860 struct reloc_desc *relo = &prog->reloc_desc[i]; 7861 struct extern_desc *ext = &obj->externs[relo->ext_idx]; 7862 int kind; 7863 7864 switch (relo->type) { 7865 case RELO_EXTERN_LD64: 7866 if (ext->type != EXT_KSYM) 7867 continue; 7868 kind = btf_is_var(btf__type_by_id(obj->btf, ext->btf_id)) ? 7869 BTF_KIND_VAR : BTF_KIND_FUNC; 7870 bpf_gen__record_extern(obj->gen_loader, ext->name, 7871 ext->is_weak, !ext->ksym.type_id, 7872 true, kind, relo->insn_idx); 7873 break; 7874 case RELO_EXTERN_CALL: 7875 bpf_gen__record_extern(obj->gen_loader, ext->name, 7876 ext->is_weak, false, false, BTF_KIND_FUNC, 7877 relo->insn_idx); 7878 break; 7879 case RELO_CORE: { 7880 struct bpf_core_relo cr = { 7881 .insn_off = relo->insn_idx * 8, 7882 .type_id = relo->core_relo->type_id, 7883 .access_str_off = relo->core_relo->access_str_off, 7884 .kind = relo->core_relo->kind, 7885 }; 7886 7887 bpf_gen__record_relo_core(obj->gen_loader, &cr); 7888 break; 7889 } 7890 default: 7891 continue; 7892 } 7893 } 7894 return 0; 7895 } 7896 7897 static int 7898 bpf_object__load_progs(struct bpf_object *obj, int log_level) 7899 { 7900 struct bpf_program *prog; 7901 size_t i; 7902 int err; 7903 7904 for (i = 0; i < obj->nr_programs; i++) { 7905 prog = &obj->programs[i]; 7906 if (prog_is_subprog(obj, prog)) 7907 continue; 7908 if (!prog->autoload) { 7909 pr_debug("prog '%s': skipped loading\n", prog->name); 7910 continue; 7911 } 7912 prog->log_level |= log_level; 7913 7914 if (obj->gen_loader) 7915 bpf_program_record_relos(prog); 7916 7917 err = bpf_object_load_prog(obj, prog, prog->insns, prog->insns_cnt, 7918 obj->license, obj->kern_version, &prog->fd); 7919 if (err) { 7920 pr_warn("prog '%s': failed to load: %s\n", prog->name, errstr(err)); 7921 return err; 7922 } 7923 } 7924 7925 bpf_object__free_relocs(obj); 7926 return 0; 7927 } 7928 7929 static int bpf_object_prepare_progs(struct bpf_object *obj) 7930 { 7931 struct bpf_program *prog; 7932 size_t i; 7933 int err; 7934 7935 for (i = 0; i < obj->nr_programs; i++) { 7936 prog = &obj->programs[i]; 7937 err = bpf_object__sanitize_prog(obj, prog); 7938 if (err) 7939 return err; 7940 } 7941 return 0; 7942 } 7943 7944 static const struct bpf_sec_def *find_sec_def(const char *sec_name); 7945 7946 static int bpf_object_init_progs(struct bpf_object *obj, const struct bpf_object_open_opts *opts) 7947 { 7948 struct bpf_program *prog; 7949 int err; 7950 7951 bpf_object__for_each_program(prog, obj) { 7952 prog->sec_def = find_sec_def(prog->sec_name); 7953 if (!prog->sec_def) { 7954 /* couldn't guess, but user might manually specify */ 7955 pr_debug("prog '%s': unrecognized ELF section name '%s'\n", 7956 prog->name, prog->sec_name); 7957 continue; 7958 } 7959 7960 prog->type = prog->sec_def->prog_type; 7961 prog->expected_attach_type = prog->sec_def->expected_attach_type; 7962 7963 /* sec_def can have custom callback which should be called 7964 * after bpf_program is initialized to adjust its properties 7965 */ 7966 if (prog->sec_def->prog_setup_fn) { 7967 err = prog->sec_def->prog_setup_fn(prog, prog->sec_def->cookie); 7968 if (err < 0) { 7969 pr_warn("prog '%s': failed to initialize: %s\n", 7970 prog->name, errstr(err)); 7971 return err; 7972 } 7973 } 7974 } 7975 7976 return 0; 7977 } 7978 7979 static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, size_t obj_buf_sz, 7980 const char *obj_name, 7981 const struct bpf_object_open_opts *opts) 7982 { 7983 const char *kconfig, *btf_tmp_path, *token_path; 7984 struct bpf_object *obj; 7985 int err; 7986 char *log_buf; 7987 size_t log_size; 7988 __u32 log_level; 7989 7990 if (obj_buf && !obj_name) 7991 return ERR_PTR(-EINVAL); 7992 7993 if (elf_version(EV_CURRENT) == EV_NONE) { 7994 pr_warn("failed to init libelf for %s\n", 7995 path ? : "(mem buf)"); 7996 return ERR_PTR(-LIBBPF_ERRNO__LIBELF); 7997 } 7998 7999 if (!OPTS_VALID(opts, bpf_object_open_opts)) 8000 return ERR_PTR(-EINVAL); 8001 8002 obj_name = OPTS_GET(opts, object_name, NULL) ?: obj_name; 8003 if (obj_buf) { 8004 path = obj_name; 8005 pr_debug("loading object '%s' from buffer\n", obj_name); 8006 } else { 8007 pr_debug("loading object from %s\n", path); 8008 } 8009 8010 log_buf = OPTS_GET(opts, kernel_log_buf, NULL); 8011 log_size = OPTS_GET(opts, kernel_log_size, 0); 8012 log_level = OPTS_GET(opts, kernel_log_level, 0); 8013 if (log_size > UINT_MAX) 8014 return ERR_PTR(-EINVAL); 8015 if (log_size && !log_buf) 8016 return ERR_PTR(-EINVAL); 8017 8018 token_path = OPTS_GET(opts, bpf_token_path, NULL); 8019 /* if user didn't specify bpf_token_path explicitly, check if 8020 * LIBBPF_BPF_TOKEN_PATH envvar was set and treat it as bpf_token_path 8021 * option 8022 */ 8023 if (!token_path) 8024 token_path = getenv("LIBBPF_BPF_TOKEN_PATH"); 8025 if (token_path && strlen(token_path) >= PATH_MAX) 8026 return ERR_PTR(-ENAMETOOLONG); 8027 8028 obj = bpf_object__new(path, obj_buf, obj_buf_sz, obj_name); 8029 if (IS_ERR(obj)) 8030 return obj; 8031 8032 obj->log_buf = log_buf; 8033 obj->log_size = log_size; 8034 obj->log_level = log_level; 8035 8036 if (token_path) { 8037 obj->token_path = strdup(token_path); 8038 if (!obj->token_path) { 8039 err = -ENOMEM; 8040 goto out; 8041 } 8042 } 8043 8044 btf_tmp_path = OPTS_GET(opts, btf_custom_path, NULL); 8045 if (btf_tmp_path) { 8046 if (strlen(btf_tmp_path) >= PATH_MAX) { 8047 err = -ENAMETOOLONG; 8048 goto out; 8049 } 8050 obj->btf_custom_path = strdup(btf_tmp_path); 8051 if (!obj->btf_custom_path) { 8052 err = -ENOMEM; 8053 goto out; 8054 } 8055 } 8056 8057 kconfig = OPTS_GET(opts, kconfig, NULL); 8058 if (kconfig) { 8059 obj->kconfig = strdup(kconfig); 8060 if (!obj->kconfig) { 8061 err = -ENOMEM; 8062 goto out; 8063 } 8064 } 8065 8066 err = bpf_object__elf_init(obj); 8067 err = err ? : bpf_object__elf_collect(obj); 8068 err = err ? : bpf_object__collect_externs(obj); 8069 err = err ? : bpf_object_fixup_btf(obj); 8070 err = err ? : bpf_object__init_maps(obj, opts); 8071 err = err ? : bpf_object_init_progs(obj, opts); 8072 err = err ? : bpf_object__collect_relos(obj); 8073 if (err) 8074 goto out; 8075 8076 bpf_object__elf_finish(obj); 8077 8078 return obj; 8079 out: 8080 bpf_object__close(obj); 8081 return ERR_PTR(err); 8082 } 8083 8084 struct bpf_object * 8085 bpf_object__open_file(const char *path, const struct bpf_object_open_opts *opts) 8086 { 8087 if (!path) 8088 return libbpf_err_ptr(-EINVAL); 8089 8090 return libbpf_ptr(bpf_object_open(path, NULL, 0, NULL, opts)); 8091 } 8092 8093 struct bpf_object *bpf_object__open(const char *path) 8094 { 8095 return bpf_object__open_file(path, NULL); 8096 } 8097 8098 struct bpf_object * 8099 bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz, 8100 const struct bpf_object_open_opts *opts) 8101 { 8102 char tmp_name[64]; 8103 8104 if (!obj_buf || obj_buf_sz == 0) 8105 return libbpf_err_ptr(-EINVAL); 8106 8107 /* create a (quite useless) default "name" for this memory buffer object */ 8108 snprintf(tmp_name, sizeof(tmp_name), "%lx-%zx", (unsigned long)obj_buf, obj_buf_sz); 8109 8110 return libbpf_ptr(bpf_object_open(NULL, obj_buf, obj_buf_sz, tmp_name, opts)); 8111 } 8112 8113 static int bpf_object_unload(struct bpf_object *obj) 8114 { 8115 size_t i; 8116 8117 if (!obj) 8118 return libbpf_err(-EINVAL); 8119 8120 for (i = 0; i < obj->nr_maps; i++) { 8121 zclose(obj->maps[i].fd); 8122 if (obj->maps[i].st_ops) 8123 zfree(&obj->maps[i].st_ops->kern_vdata); 8124 } 8125 8126 for (i = 0; i < obj->nr_programs; i++) 8127 bpf_program__unload(&obj->programs[i]); 8128 8129 return 0; 8130 } 8131 8132 static int bpf_object__sanitize_maps(struct bpf_object *obj) 8133 { 8134 struct bpf_map *m; 8135 8136 bpf_object__for_each_map(m, obj) { 8137 if (!bpf_map__is_internal(m)) 8138 continue; 8139 if (!kernel_supports(obj, FEAT_ARRAY_MMAP)) 8140 m->def.map_flags &= ~BPF_F_MMAPABLE; 8141 } 8142 8143 return 0; 8144 } 8145 8146 typedef int (*kallsyms_cb_t)(unsigned long long sym_addr, char sym_type, 8147 const char *sym_name, void *ctx); 8148 8149 static int libbpf_kallsyms_parse(kallsyms_cb_t cb, void *ctx) 8150 { 8151 char sym_type, sym_name[500]; 8152 unsigned long long sym_addr; 8153 int ret, err = 0; 8154 FILE *f; 8155 8156 f = fopen("/proc/kallsyms", "re"); 8157 if (!f) { 8158 err = -errno; 8159 pr_warn("failed to open /proc/kallsyms: %s\n", errstr(err)); 8160 return err; 8161 } 8162 8163 while (true) { 8164 ret = fscanf(f, "%llx %c %499s%*[^\n]\n", 8165 &sym_addr, &sym_type, sym_name); 8166 if (ret == EOF && feof(f)) 8167 break; 8168 if (ret != 3) { 8169 pr_warn("failed to read kallsyms entry: %d\n", ret); 8170 err = -EINVAL; 8171 break; 8172 } 8173 8174 err = cb(sym_addr, sym_type, sym_name, ctx); 8175 if (err) 8176 break; 8177 } 8178 8179 fclose(f); 8180 return err; 8181 } 8182 8183 static int kallsyms_cb(unsigned long long sym_addr, char sym_type, 8184 const char *sym_name, void *ctx) 8185 { 8186 struct bpf_object *obj = ctx; 8187 const struct btf_type *t; 8188 struct extern_desc *ext; 8189 char *res; 8190 8191 res = strstr(sym_name, ".llvm."); 8192 if (sym_type == 'd' && res) 8193 ext = find_extern_by_name_with_len(obj, sym_name, res - sym_name); 8194 else 8195 ext = find_extern_by_name(obj, sym_name); 8196 if (!ext || ext->type != EXT_KSYM) 8197 return 0; 8198 8199 t = btf__type_by_id(obj->btf, ext->btf_id); 8200 if (!btf_is_var(t)) 8201 return 0; 8202 8203 if (ext->is_set && ext->ksym.addr != sym_addr) { 8204 pr_warn("extern (ksym) '%s': resolution is ambiguous: 0x%llx or 0x%llx\n", 8205 sym_name, ext->ksym.addr, sym_addr); 8206 return -EINVAL; 8207 } 8208 if (!ext->is_set) { 8209 ext->is_set = true; 8210 ext->ksym.addr = sym_addr; 8211 pr_debug("extern (ksym) '%s': set to 0x%llx\n", sym_name, sym_addr); 8212 } 8213 return 0; 8214 } 8215 8216 static int bpf_object__read_kallsyms_file(struct bpf_object *obj) 8217 { 8218 return libbpf_kallsyms_parse(kallsyms_cb, obj); 8219 } 8220 8221 static int find_ksym_btf_id(struct bpf_object *obj, const char *ksym_name, 8222 __u16 kind, struct btf **res_btf, 8223 struct module_btf **res_mod_btf) 8224 { 8225 struct module_btf *mod_btf; 8226 struct btf *btf; 8227 int i, id, err; 8228 8229 btf = obj->btf_vmlinux; 8230 mod_btf = NULL; 8231 id = btf__find_by_name_kind(btf, ksym_name, kind); 8232 8233 if (id == -ENOENT) { 8234 err = load_module_btfs(obj); 8235 if (err) 8236 return err; 8237 8238 for (i = 0; i < obj->btf_module_cnt; i++) { 8239 /* we assume module_btf's BTF FD is always >0 */ 8240 mod_btf = &obj->btf_modules[i]; 8241 btf = mod_btf->btf; 8242 id = btf__find_by_name_kind_own(btf, ksym_name, kind); 8243 if (id != -ENOENT) 8244 break; 8245 } 8246 } 8247 if (id <= 0) 8248 return -ESRCH; 8249 8250 *res_btf = btf; 8251 *res_mod_btf = mod_btf; 8252 return id; 8253 } 8254 8255 static int bpf_object__resolve_ksym_var_btf_id(struct bpf_object *obj, 8256 struct extern_desc *ext) 8257 { 8258 const struct btf_type *targ_var, *targ_type; 8259 __u32 targ_type_id, local_type_id; 8260 struct module_btf *mod_btf = NULL; 8261 const char *targ_var_name; 8262 struct btf *btf = NULL; 8263 int id, err; 8264 8265 id = find_ksym_btf_id(obj, ext->name, BTF_KIND_VAR, &btf, &mod_btf); 8266 if (id < 0) { 8267 if (id == -ESRCH && ext->is_weak) 8268 return 0; 8269 pr_warn("extern (var ksym) '%s': not found in kernel BTF\n", 8270 ext->name); 8271 return id; 8272 } 8273 8274 /* find local type_id */ 8275 local_type_id = ext->ksym.type_id; 8276 8277 /* find target type_id */ 8278 targ_var = btf__type_by_id(btf, id); 8279 targ_var_name = btf__name_by_offset(btf, targ_var->name_off); 8280 targ_type = skip_mods_and_typedefs(btf, targ_var->type, &targ_type_id); 8281 8282 err = bpf_core_types_are_compat(obj->btf, local_type_id, 8283 btf, targ_type_id); 8284 if (err <= 0) { 8285 const struct btf_type *local_type; 8286 const char *targ_name, *local_name; 8287 8288 local_type = btf__type_by_id(obj->btf, local_type_id); 8289 local_name = btf__name_by_offset(obj->btf, local_type->name_off); 8290 targ_name = btf__name_by_offset(btf, targ_type->name_off); 8291 8292 pr_warn("extern (var ksym) '%s': incompatible types, expected [%d] %s %s, but kernel has [%d] %s %s\n", 8293 ext->name, local_type_id, 8294 btf_kind_str(local_type), local_name, targ_type_id, 8295 btf_kind_str(targ_type), targ_name); 8296 return -EINVAL; 8297 } 8298 8299 ext->is_set = true; 8300 ext->ksym.kernel_btf_obj_fd = mod_btf ? mod_btf->fd : 0; 8301 ext->ksym.kernel_btf_id = id; 8302 pr_debug("extern (var ksym) '%s': resolved to [%d] %s %s\n", 8303 ext->name, id, btf_kind_str(targ_var), targ_var_name); 8304 8305 return 0; 8306 } 8307 8308 static int bpf_object__resolve_ksym_func_btf_id(struct bpf_object *obj, 8309 struct extern_desc *ext) 8310 { 8311 int local_func_proto_id, kfunc_proto_id, kfunc_id; 8312 struct module_btf *mod_btf = NULL; 8313 const struct btf_type *kern_func; 8314 struct btf *kern_btf = NULL; 8315 int ret; 8316 8317 local_func_proto_id = ext->ksym.type_id; 8318 8319 kfunc_id = find_ksym_btf_id(obj, ext->essent_name ?: ext->name, BTF_KIND_FUNC, &kern_btf, 8320 &mod_btf); 8321 if (kfunc_id < 0) { 8322 if (kfunc_id == -ESRCH && ext->is_weak) 8323 return 0; 8324 pr_warn("extern (func ksym) '%s': not found in kernel or module BTFs\n", 8325 ext->name); 8326 return kfunc_id; 8327 } 8328 8329 kern_func = btf__type_by_id(kern_btf, kfunc_id); 8330 kfunc_proto_id = kern_func->type; 8331 8332 ret = bpf_core_types_are_compat(obj->btf, local_func_proto_id, 8333 kern_btf, kfunc_proto_id); 8334 if (ret <= 0) { 8335 if (ext->is_weak) 8336 return 0; 8337 8338 pr_warn("extern (func ksym) '%s': func_proto [%d] incompatible with %s [%d]\n", 8339 ext->name, local_func_proto_id, 8340 mod_btf ? mod_btf->name : "vmlinux", kfunc_proto_id); 8341 return -EINVAL; 8342 } 8343 8344 /* set index for module BTF fd in fd_array, if unset */ 8345 if (mod_btf && !mod_btf->fd_array_idx) { 8346 /* insn->off is s16 */ 8347 if (obj->fd_array_cnt == INT16_MAX) { 8348 pr_warn("extern (func ksym) '%s': module BTF fd index %d too big to fit in bpf_insn offset\n", 8349 ext->name, mod_btf->fd_array_idx); 8350 return -E2BIG; 8351 } 8352 /* Cannot use index 0 for module BTF fd */ 8353 if (!obj->fd_array_cnt) 8354 obj->fd_array_cnt = 1; 8355 8356 ret = libbpf_ensure_mem((void **)&obj->fd_array, &obj->fd_array_cap, sizeof(int), 8357 obj->fd_array_cnt + 1); 8358 if (ret) 8359 return ret; 8360 mod_btf->fd_array_idx = obj->fd_array_cnt; 8361 /* we assume module BTF FD is always >0 */ 8362 obj->fd_array[obj->fd_array_cnt++] = mod_btf->fd; 8363 } 8364 8365 ext->is_set = true; 8366 ext->ksym.kernel_btf_id = kfunc_id; 8367 ext->ksym.btf_fd_idx = mod_btf ? mod_btf->fd_array_idx : 0; 8368 /* Also set kernel_btf_obj_fd to make sure that bpf_object__relocate_data() 8369 * populates FD into ld_imm64 insn when it's used to point to kfunc. 8370 * {kernel_btf_id, btf_fd_idx} -> fixup bpf_call. 8371 * {kernel_btf_id, kernel_btf_obj_fd} -> fixup ld_imm64. 8372 */ 8373 ext->ksym.kernel_btf_obj_fd = mod_btf ? mod_btf->fd : 0; 8374 pr_debug("extern (func ksym) '%s': resolved to %s [%d]\n", 8375 ext->name, mod_btf ? mod_btf->name : "vmlinux", kfunc_id); 8376 8377 return 0; 8378 } 8379 8380 static int bpf_object__resolve_ksyms_btf_id(struct bpf_object *obj) 8381 { 8382 const struct btf_type *t; 8383 struct extern_desc *ext; 8384 int i, err; 8385 8386 for (i = 0; i < obj->nr_extern; i++) { 8387 ext = &obj->externs[i]; 8388 if (ext->type != EXT_KSYM || !ext->ksym.type_id) 8389 continue; 8390 8391 if (obj->gen_loader) { 8392 ext->is_set = true; 8393 ext->ksym.kernel_btf_obj_fd = 0; 8394 ext->ksym.kernel_btf_id = 0; 8395 continue; 8396 } 8397 t = btf__type_by_id(obj->btf, ext->btf_id); 8398 if (btf_is_var(t)) 8399 err = bpf_object__resolve_ksym_var_btf_id(obj, ext); 8400 else 8401 err = bpf_object__resolve_ksym_func_btf_id(obj, ext); 8402 if (err) 8403 return err; 8404 } 8405 return 0; 8406 } 8407 8408 static int bpf_object__resolve_externs(struct bpf_object *obj, 8409 const char *extra_kconfig) 8410 { 8411 bool need_config = false, need_kallsyms = false; 8412 bool need_vmlinux_btf = false; 8413 struct extern_desc *ext; 8414 void *kcfg_data = NULL; 8415 int err, i; 8416 8417 if (obj->nr_extern == 0) 8418 return 0; 8419 8420 if (obj->kconfig_map_idx >= 0) 8421 kcfg_data = obj->maps[obj->kconfig_map_idx].mmaped; 8422 8423 for (i = 0; i < obj->nr_extern; i++) { 8424 ext = &obj->externs[i]; 8425 8426 if (ext->type == EXT_KSYM) { 8427 if (ext->ksym.type_id) 8428 need_vmlinux_btf = true; 8429 else 8430 need_kallsyms = true; 8431 continue; 8432 } else if (ext->type == EXT_KCFG) { 8433 void *ext_ptr = kcfg_data + ext->kcfg.data_off; 8434 __u64 value = 0; 8435 8436 /* Kconfig externs need actual /proc/config.gz */ 8437 if (str_has_pfx(ext->name, "CONFIG_")) { 8438 need_config = true; 8439 continue; 8440 } 8441 8442 /* Virtual kcfg externs are customly handled by libbpf */ 8443 if (strcmp(ext->name, "LINUX_KERNEL_VERSION") == 0) { 8444 value = get_kernel_version(); 8445 if (!value) { 8446 pr_warn("extern (kcfg) '%s': failed to get kernel version\n", ext->name); 8447 return -EINVAL; 8448 } 8449 } else if (strcmp(ext->name, "LINUX_HAS_BPF_COOKIE") == 0) { 8450 value = kernel_supports(obj, FEAT_BPF_COOKIE); 8451 } else if (strcmp(ext->name, "LINUX_HAS_SYSCALL_WRAPPER") == 0) { 8452 value = kernel_supports(obj, FEAT_SYSCALL_WRAPPER); 8453 } else if (!str_has_pfx(ext->name, "LINUX_") || !ext->is_weak) { 8454 /* Currently libbpf supports only CONFIG_ and LINUX_ prefixed 8455 * __kconfig externs, where LINUX_ ones are virtual and filled out 8456 * customly by libbpf (their values don't come from Kconfig). 8457 * If LINUX_xxx variable is not recognized by libbpf, but is marked 8458 * __weak, it defaults to zero value, just like for CONFIG_xxx 8459 * externs. 8460 */ 8461 pr_warn("extern (kcfg) '%s': unrecognized virtual extern\n", ext->name); 8462 return -EINVAL; 8463 } 8464 8465 err = set_kcfg_value_num(ext, ext_ptr, value); 8466 if (err) 8467 return err; 8468 pr_debug("extern (kcfg) '%s': set to 0x%llx\n", 8469 ext->name, (long long)value); 8470 } else { 8471 pr_warn("extern '%s': unrecognized extern kind\n", ext->name); 8472 return -EINVAL; 8473 } 8474 } 8475 if (need_config && extra_kconfig) { 8476 err = bpf_object__read_kconfig_mem(obj, extra_kconfig, kcfg_data); 8477 if (err) 8478 return -EINVAL; 8479 need_config = false; 8480 for (i = 0; i < obj->nr_extern; i++) { 8481 ext = &obj->externs[i]; 8482 if (ext->type == EXT_KCFG && !ext->is_set) { 8483 need_config = true; 8484 break; 8485 } 8486 } 8487 } 8488 if (need_config) { 8489 err = bpf_object__read_kconfig_file(obj, kcfg_data); 8490 if (err) 8491 return -EINVAL; 8492 } 8493 if (need_kallsyms) { 8494 err = bpf_object__read_kallsyms_file(obj); 8495 if (err) 8496 return -EINVAL; 8497 } 8498 if (need_vmlinux_btf) { 8499 err = bpf_object__resolve_ksyms_btf_id(obj); 8500 if (err) 8501 return -EINVAL; 8502 } 8503 for (i = 0; i < obj->nr_extern; i++) { 8504 ext = &obj->externs[i]; 8505 8506 if (!ext->is_set && !ext->is_weak) { 8507 pr_warn("extern '%s' (strong): not resolved\n", ext->name); 8508 return -ESRCH; 8509 } else if (!ext->is_set) { 8510 pr_debug("extern '%s' (weak): not resolved, defaulting to zero\n", 8511 ext->name); 8512 } 8513 } 8514 8515 return 0; 8516 } 8517 8518 static void bpf_map_prepare_vdata(const struct bpf_map *map) 8519 { 8520 const struct btf_type *type; 8521 struct bpf_struct_ops *st_ops; 8522 __u32 i; 8523 8524 st_ops = map->st_ops; 8525 type = btf__type_by_id(map->obj->btf, st_ops->type_id); 8526 for (i = 0; i < btf_vlen(type); i++) { 8527 struct bpf_program *prog = st_ops->progs[i]; 8528 void *kern_data; 8529 int prog_fd; 8530 8531 if (!prog) 8532 continue; 8533 8534 prog_fd = bpf_program__fd(prog); 8535 kern_data = st_ops->kern_vdata + st_ops->kern_func_off[i]; 8536 *(unsigned long *)kern_data = prog_fd; 8537 } 8538 } 8539 8540 static int bpf_object_prepare_struct_ops(struct bpf_object *obj) 8541 { 8542 struct bpf_map *map; 8543 int i; 8544 8545 for (i = 0; i < obj->nr_maps; i++) { 8546 map = &obj->maps[i]; 8547 8548 if (!bpf_map__is_struct_ops(map)) 8549 continue; 8550 8551 if (!map->autocreate) 8552 continue; 8553 8554 bpf_map_prepare_vdata(map); 8555 } 8556 8557 return 0; 8558 } 8559 8560 static void bpf_object_unpin(struct bpf_object *obj) 8561 { 8562 int i; 8563 8564 /* unpin any maps that were auto-pinned during load */ 8565 for (i = 0; i < obj->nr_maps; i++) 8566 if (obj->maps[i].pinned && !obj->maps[i].reused) 8567 bpf_map__unpin(&obj->maps[i], NULL); 8568 } 8569 8570 static void bpf_object_post_load_cleanup(struct bpf_object *obj) 8571 { 8572 int i; 8573 8574 /* clean up fd_array */ 8575 zfree(&obj->fd_array); 8576 8577 /* clean up module BTFs */ 8578 for (i = 0; i < obj->btf_module_cnt; i++) { 8579 close(obj->btf_modules[i].fd); 8580 btf__free(obj->btf_modules[i].btf); 8581 free(obj->btf_modules[i].name); 8582 } 8583 obj->btf_module_cnt = 0; 8584 zfree(&obj->btf_modules); 8585 8586 /* clean up vmlinux BTF */ 8587 btf__free(obj->btf_vmlinux); 8588 obj->btf_vmlinux = NULL; 8589 } 8590 8591 static int bpf_object_prepare(struct bpf_object *obj, const char *target_btf_path) 8592 { 8593 int err; 8594 8595 if (obj->state >= OBJ_PREPARED) { 8596 pr_warn("object '%s': prepare loading can't be attempted twice\n", obj->name); 8597 return -EINVAL; 8598 } 8599 8600 err = bpf_object_prepare_token(obj); 8601 err = err ? : bpf_object__probe_loading(obj); 8602 err = err ? : bpf_object__load_vmlinux_btf(obj, false); 8603 err = err ? : bpf_object__resolve_externs(obj, obj->kconfig); 8604 err = err ? : bpf_object__sanitize_maps(obj); 8605 err = err ? : bpf_object__init_kern_struct_ops_maps(obj); 8606 err = err ? : bpf_object_adjust_struct_ops_autoload(obj); 8607 err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : target_btf_path); 8608 err = err ? : bpf_object__sanitize_and_load_btf(obj); 8609 err = err ? : bpf_object__create_maps(obj); 8610 err = err ? : bpf_object_prepare_progs(obj); 8611 8612 if (err) { 8613 bpf_object_unpin(obj); 8614 bpf_object_unload(obj); 8615 obj->state = OBJ_LOADED; 8616 return err; 8617 } 8618 8619 obj->state = OBJ_PREPARED; 8620 return 0; 8621 } 8622 8623 static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const char *target_btf_path) 8624 { 8625 int err; 8626 8627 if (!obj) 8628 return libbpf_err(-EINVAL); 8629 8630 if (obj->state >= OBJ_LOADED) { 8631 pr_warn("object '%s': load can't be attempted twice\n", obj->name); 8632 return libbpf_err(-EINVAL); 8633 } 8634 8635 /* Disallow kernel loading programs of non-native endianness but 8636 * permit cross-endian creation of "light skeleton". 8637 */ 8638 if (obj->gen_loader) { 8639 bpf_gen__init(obj->gen_loader, extra_log_level, obj->nr_programs, obj->nr_maps); 8640 } else if (!is_native_endianness(obj)) { 8641 pr_warn("object '%s': loading non-native endianness is unsupported\n", obj->name); 8642 return libbpf_err(-LIBBPF_ERRNO__ENDIAN); 8643 } 8644 8645 if (obj->state < OBJ_PREPARED) { 8646 err = bpf_object_prepare(obj, target_btf_path); 8647 if (err) 8648 return libbpf_err(err); 8649 } 8650 err = bpf_object__load_progs(obj, extra_log_level); 8651 err = err ? : bpf_object_init_prog_arrays(obj); 8652 err = err ? : bpf_object_prepare_struct_ops(obj); 8653 8654 if (obj->gen_loader) { 8655 /* reset FDs */ 8656 if (obj->btf) 8657 btf__set_fd(obj->btf, -1); 8658 if (!err) 8659 err = bpf_gen__finish(obj->gen_loader, obj->nr_programs, obj->nr_maps); 8660 } 8661 8662 bpf_object_post_load_cleanup(obj); 8663 obj->state = OBJ_LOADED; /* doesn't matter if successfully or not */ 8664 8665 if (err) { 8666 bpf_object_unpin(obj); 8667 bpf_object_unload(obj); 8668 pr_warn("failed to load object '%s'\n", obj->path); 8669 return libbpf_err(err); 8670 } 8671 8672 return 0; 8673 } 8674 8675 int bpf_object__prepare(struct bpf_object *obj) 8676 { 8677 return libbpf_err(bpf_object_prepare(obj, NULL)); 8678 } 8679 8680 int bpf_object__load(struct bpf_object *obj) 8681 { 8682 return bpf_object_load(obj, 0, NULL); 8683 } 8684 8685 static int make_parent_dir(const char *path) 8686 { 8687 char *dname, *dir; 8688 int err = 0; 8689 8690 dname = strdup(path); 8691 if (dname == NULL) 8692 return -ENOMEM; 8693 8694 dir = dirname(dname); 8695 if (mkdir(dir, 0700) && errno != EEXIST) 8696 err = -errno; 8697 8698 free(dname); 8699 if (err) { 8700 pr_warn("failed to mkdir %s: %s\n", path, errstr(err)); 8701 } 8702 return err; 8703 } 8704 8705 static int check_path(const char *path) 8706 { 8707 struct statfs st_fs; 8708 char *dname, *dir; 8709 int err = 0; 8710 8711 if (path == NULL) 8712 return -EINVAL; 8713 8714 dname = strdup(path); 8715 if (dname == NULL) 8716 return -ENOMEM; 8717 8718 dir = dirname(dname); 8719 if (statfs(dir, &st_fs)) { 8720 pr_warn("failed to statfs %s: %s\n", dir, errstr(errno)); 8721 err = -errno; 8722 } 8723 free(dname); 8724 8725 if (!err && st_fs.f_type != BPF_FS_MAGIC) { 8726 pr_warn("specified path %s is not on BPF FS\n", path); 8727 err = -EINVAL; 8728 } 8729 8730 return err; 8731 } 8732 8733 int bpf_program__pin(struct bpf_program *prog, const char *path) 8734 { 8735 int err; 8736 8737 if (prog->fd < 0) { 8738 pr_warn("prog '%s': can't pin program that wasn't loaded\n", prog->name); 8739 return libbpf_err(-EINVAL); 8740 } 8741 8742 err = make_parent_dir(path); 8743 if (err) 8744 return libbpf_err(err); 8745 8746 err = check_path(path); 8747 if (err) 8748 return libbpf_err(err); 8749 8750 if (bpf_obj_pin(prog->fd, path)) { 8751 err = -errno; 8752 pr_warn("prog '%s': failed to pin at '%s': %s\n", prog->name, path, errstr(err)); 8753 return libbpf_err(err); 8754 } 8755 8756 pr_debug("prog '%s': pinned at '%s'\n", prog->name, path); 8757 return 0; 8758 } 8759 8760 int bpf_program__unpin(struct bpf_program *prog, const char *path) 8761 { 8762 int err; 8763 8764 if (prog->fd < 0) { 8765 pr_warn("prog '%s': can't unpin program that wasn't loaded\n", prog->name); 8766 return libbpf_err(-EINVAL); 8767 } 8768 8769 err = check_path(path); 8770 if (err) 8771 return libbpf_err(err); 8772 8773 err = unlink(path); 8774 if (err) 8775 return libbpf_err(-errno); 8776 8777 pr_debug("prog '%s': unpinned from '%s'\n", prog->name, path); 8778 return 0; 8779 } 8780 8781 int bpf_map__pin(struct bpf_map *map, const char *path) 8782 { 8783 int err; 8784 8785 if (map == NULL) { 8786 pr_warn("invalid map pointer\n"); 8787 return libbpf_err(-EINVAL); 8788 } 8789 8790 if (map->fd < 0) { 8791 pr_warn("map '%s': can't pin BPF map without FD (was it created?)\n", map->name); 8792 return libbpf_err(-EINVAL); 8793 } 8794 8795 if (map->pin_path) { 8796 if (path && strcmp(path, map->pin_path)) { 8797 pr_warn("map '%s' already has pin path '%s' different from '%s'\n", 8798 bpf_map__name(map), map->pin_path, path); 8799 return libbpf_err(-EINVAL); 8800 } else if (map->pinned) { 8801 pr_debug("map '%s' already pinned at '%s'; not re-pinning\n", 8802 bpf_map__name(map), map->pin_path); 8803 return 0; 8804 } 8805 } else { 8806 if (!path) { 8807 pr_warn("missing a path to pin map '%s' at\n", 8808 bpf_map__name(map)); 8809 return libbpf_err(-EINVAL); 8810 } else if (map->pinned) { 8811 pr_warn("map '%s' already pinned\n", bpf_map__name(map)); 8812 return libbpf_err(-EEXIST); 8813 } 8814 8815 map->pin_path = strdup(path); 8816 if (!map->pin_path) { 8817 err = -errno; 8818 goto out_err; 8819 } 8820 } 8821 8822 err = make_parent_dir(map->pin_path); 8823 if (err) 8824 return libbpf_err(err); 8825 8826 err = check_path(map->pin_path); 8827 if (err) 8828 return libbpf_err(err); 8829 8830 if (bpf_obj_pin(map->fd, map->pin_path)) { 8831 err = -errno; 8832 goto out_err; 8833 } 8834 8835 map->pinned = true; 8836 pr_debug("pinned map '%s'\n", map->pin_path); 8837 8838 return 0; 8839 8840 out_err: 8841 pr_warn("failed to pin map: %s\n", errstr(err)); 8842 return libbpf_err(err); 8843 } 8844 8845 int bpf_map__unpin(struct bpf_map *map, const char *path) 8846 { 8847 int err; 8848 8849 if (map == NULL) { 8850 pr_warn("invalid map pointer\n"); 8851 return libbpf_err(-EINVAL); 8852 } 8853 8854 if (map->pin_path) { 8855 if (path && strcmp(path, map->pin_path)) { 8856 pr_warn("map '%s' already has pin path '%s' different from '%s'\n", 8857 bpf_map__name(map), map->pin_path, path); 8858 return libbpf_err(-EINVAL); 8859 } 8860 path = map->pin_path; 8861 } else if (!path) { 8862 pr_warn("no path to unpin map '%s' from\n", 8863 bpf_map__name(map)); 8864 return libbpf_err(-EINVAL); 8865 } 8866 8867 err = check_path(path); 8868 if (err) 8869 return libbpf_err(err); 8870 8871 err = unlink(path); 8872 if (err != 0) 8873 return libbpf_err(-errno); 8874 8875 map->pinned = false; 8876 pr_debug("unpinned map '%s' from '%s'\n", bpf_map__name(map), path); 8877 8878 return 0; 8879 } 8880 8881 int bpf_map__set_pin_path(struct bpf_map *map, const char *path) 8882 { 8883 char *new = NULL; 8884 8885 if (path) { 8886 new = strdup(path); 8887 if (!new) 8888 return libbpf_err(-errno); 8889 } 8890 8891 free(map->pin_path); 8892 map->pin_path = new; 8893 return 0; 8894 } 8895 8896 __alias(bpf_map__pin_path) 8897 const char *bpf_map__get_pin_path(const struct bpf_map *map); 8898 8899 const char *bpf_map__pin_path(const struct bpf_map *map) 8900 { 8901 return map->pin_path; 8902 } 8903 8904 bool bpf_map__is_pinned(const struct bpf_map *map) 8905 { 8906 return map->pinned; 8907 } 8908 8909 static void sanitize_pin_path(char *s) 8910 { 8911 /* bpffs disallows periods in path names */ 8912 while (*s) { 8913 if (*s == '.') 8914 *s = '_'; 8915 s++; 8916 } 8917 } 8918 8919 int bpf_object__pin_maps(struct bpf_object *obj, const char *path) 8920 { 8921 struct bpf_map *map; 8922 int err; 8923 8924 if (!obj) 8925 return libbpf_err(-ENOENT); 8926 8927 if (obj->state < OBJ_PREPARED) { 8928 pr_warn("object not yet loaded; load it first\n"); 8929 return libbpf_err(-ENOENT); 8930 } 8931 8932 bpf_object__for_each_map(map, obj) { 8933 char *pin_path = NULL; 8934 char buf[PATH_MAX]; 8935 8936 if (!map->autocreate) 8937 continue; 8938 8939 if (path) { 8940 err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map)); 8941 if (err) 8942 goto err_unpin_maps; 8943 sanitize_pin_path(buf); 8944 pin_path = buf; 8945 } else if (!map->pin_path) { 8946 continue; 8947 } 8948 8949 err = bpf_map__pin(map, pin_path); 8950 if (err) 8951 goto err_unpin_maps; 8952 } 8953 8954 return 0; 8955 8956 err_unpin_maps: 8957 while ((map = bpf_object__prev_map(obj, map))) { 8958 if (!map->pin_path) 8959 continue; 8960 8961 bpf_map__unpin(map, NULL); 8962 } 8963 8964 return libbpf_err(err); 8965 } 8966 8967 int bpf_object__unpin_maps(struct bpf_object *obj, const char *path) 8968 { 8969 struct bpf_map *map; 8970 int err; 8971 8972 if (!obj) 8973 return libbpf_err(-ENOENT); 8974 8975 bpf_object__for_each_map(map, obj) { 8976 char *pin_path = NULL; 8977 char buf[PATH_MAX]; 8978 8979 if (path) { 8980 err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map)); 8981 if (err) 8982 return libbpf_err(err); 8983 sanitize_pin_path(buf); 8984 pin_path = buf; 8985 } else if (!map->pin_path) { 8986 continue; 8987 } 8988 8989 err = bpf_map__unpin(map, pin_path); 8990 if (err) 8991 return libbpf_err(err); 8992 } 8993 8994 return 0; 8995 } 8996 8997 int bpf_object__pin_programs(struct bpf_object *obj, const char *path) 8998 { 8999 struct bpf_program *prog; 9000 char buf[PATH_MAX]; 9001 int err; 9002 9003 if (!obj) 9004 return libbpf_err(-ENOENT); 9005 9006 if (obj->state < OBJ_LOADED) { 9007 pr_warn("object not yet loaded; load it first\n"); 9008 return libbpf_err(-ENOENT); 9009 } 9010 9011 bpf_object__for_each_program(prog, obj) { 9012 err = pathname_concat(buf, sizeof(buf), path, prog->name); 9013 if (err) 9014 goto err_unpin_programs; 9015 9016 err = bpf_program__pin(prog, buf); 9017 if (err) 9018 goto err_unpin_programs; 9019 } 9020 9021 return 0; 9022 9023 err_unpin_programs: 9024 while ((prog = bpf_object__prev_program(obj, prog))) { 9025 if (pathname_concat(buf, sizeof(buf), path, prog->name)) 9026 continue; 9027 9028 bpf_program__unpin(prog, buf); 9029 } 9030 9031 return libbpf_err(err); 9032 } 9033 9034 int bpf_object__unpin_programs(struct bpf_object *obj, const char *path) 9035 { 9036 struct bpf_program *prog; 9037 int err; 9038 9039 if (!obj) 9040 return libbpf_err(-ENOENT); 9041 9042 bpf_object__for_each_program(prog, obj) { 9043 char buf[PATH_MAX]; 9044 9045 err = pathname_concat(buf, sizeof(buf), path, prog->name); 9046 if (err) 9047 return libbpf_err(err); 9048 9049 err = bpf_program__unpin(prog, buf); 9050 if (err) 9051 return libbpf_err(err); 9052 } 9053 9054 return 0; 9055 } 9056 9057 int bpf_object__pin(struct bpf_object *obj, const char *path) 9058 { 9059 int err; 9060 9061 err = bpf_object__pin_maps(obj, path); 9062 if (err) 9063 return libbpf_err(err); 9064 9065 err = bpf_object__pin_programs(obj, path); 9066 if (err) { 9067 bpf_object__unpin_maps(obj, path); 9068 return libbpf_err(err); 9069 } 9070 9071 return 0; 9072 } 9073 9074 int bpf_object__unpin(struct bpf_object *obj, const char *path) 9075 { 9076 int err; 9077 9078 err = bpf_object__unpin_programs(obj, path); 9079 if (err) 9080 return libbpf_err(err); 9081 9082 err = bpf_object__unpin_maps(obj, path); 9083 if (err) 9084 return libbpf_err(err); 9085 9086 return 0; 9087 } 9088 9089 static void bpf_map__destroy(struct bpf_map *map) 9090 { 9091 if (map->inner_map) { 9092 bpf_map__destroy(map->inner_map); 9093 zfree(&map->inner_map); 9094 } 9095 9096 zfree(&map->init_slots); 9097 map->init_slots_sz = 0; 9098 9099 if (map->mmaped && map->mmaped != map->obj->arena_data) 9100 munmap(map->mmaped, bpf_map_mmap_sz(map)); 9101 map->mmaped = NULL; 9102 9103 if (map->st_ops) { 9104 zfree(&map->st_ops->data); 9105 zfree(&map->st_ops->progs); 9106 zfree(&map->st_ops->kern_func_off); 9107 zfree(&map->st_ops); 9108 } 9109 9110 zfree(&map->name); 9111 zfree(&map->real_name); 9112 zfree(&map->pin_path); 9113 9114 if (map->fd >= 0) 9115 zclose(map->fd); 9116 } 9117 9118 void bpf_object__close(struct bpf_object *obj) 9119 { 9120 size_t i; 9121 9122 if (IS_ERR_OR_NULL(obj)) 9123 return; 9124 9125 /* 9126 * if user called bpf_object__prepare() without ever getting to 9127 * bpf_object__load(), we need to clean up stuff that is normally 9128 * cleaned up at the end of loading step 9129 */ 9130 bpf_object_post_load_cleanup(obj); 9131 9132 usdt_manager_free(obj->usdt_man); 9133 obj->usdt_man = NULL; 9134 9135 bpf_gen__free(obj->gen_loader); 9136 bpf_object__elf_finish(obj); 9137 bpf_object_unload(obj); 9138 btf__free(obj->btf); 9139 btf__free(obj->btf_vmlinux); 9140 btf_ext__free(obj->btf_ext); 9141 9142 for (i = 0; i < obj->nr_maps; i++) 9143 bpf_map__destroy(&obj->maps[i]); 9144 9145 zfree(&obj->btf_custom_path); 9146 zfree(&obj->kconfig); 9147 9148 for (i = 0; i < obj->nr_extern; i++) 9149 zfree(&obj->externs[i].essent_name); 9150 9151 zfree(&obj->externs); 9152 obj->nr_extern = 0; 9153 9154 zfree(&obj->maps); 9155 obj->nr_maps = 0; 9156 9157 if (obj->programs && obj->nr_programs) { 9158 for (i = 0; i < obj->nr_programs; i++) 9159 bpf_program__exit(&obj->programs[i]); 9160 } 9161 zfree(&obj->programs); 9162 9163 zfree(&obj->feat_cache); 9164 zfree(&obj->token_path); 9165 if (obj->token_fd > 0) 9166 close(obj->token_fd); 9167 9168 zfree(&obj->arena_data); 9169 9170 free(obj); 9171 } 9172 9173 const char *bpf_object__name(const struct bpf_object *obj) 9174 { 9175 return obj ? obj->name : libbpf_err_ptr(-EINVAL); 9176 } 9177 9178 unsigned int bpf_object__kversion(const struct bpf_object *obj) 9179 { 9180 return obj ? obj->kern_version : 0; 9181 } 9182 9183 int bpf_object__token_fd(const struct bpf_object *obj) 9184 { 9185 return obj->token_fd ?: -1; 9186 } 9187 9188 struct btf *bpf_object__btf(const struct bpf_object *obj) 9189 { 9190 return obj ? obj->btf : NULL; 9191 } 9192 9193 int bpf_object__btf_fd(const struct bpf_object *obj) 9194 { 9195 return obj->btf ? btf__fd(obj->btf) : -1; 9196 } 9197 9198 int bpf_object__set_kversion(struct bpf_object *obj, __u32 kern_version) 9199 { 9200 if (obj->state >= OBJ_LOADED) 9201 return libbpf_err(-EINVAL); 9202 9203 obj->kern_version = kern_version; 9204 9205 return 0; 9206 } 9207 9208 int bpf_object__gen_loader(struct bpf_object *obj, struct gen_loader_opts *opts) 9209 { 9210 struct bpf_gen *gen; 9211 9212 if (!opts) 9213 return libbpf_err(-EFAULT); 9214 if (!OPTS_VALID(opts, gen_loader_opts)) 9215 return libbpf_err(-EINVAL); 9216 gen = calloc(sizeof(*gen), 1); 9217 if (!gen) 9218 return libbpf_err(-ENOMEM); 9219 gen->opts = opts; 9220 gen->swapped_endian = !is_native_endianness(obj); 9221 obj->gen_loader = gen; 9222 return 0; 9223 } 9224 9225 static struct bpf_program * 9226 __bpf_program__iter(const struct bpf_program *p, const struct bpf_object *obj, 9227 bool forward) 9228 { 9229 size_t nr_programs = obj->nr_programs; 9230 ssize_t idx; 9231 9232 if (!nr_programs) 9233 return NULL; 9234 9235 if (!p) 9236 /* Iter from the beginning */ 9237 return forward ? &obj->programs[0] : 9238 &obj->programs[nr_programs - 1]; 9239 9240 if (p->obj != obj) { 9241 pr_warn("error: program handler doesn't match object\n"); 9242 return errno = EINVAL, NULL; 9243 } 9244 9245 idx = (p - obj->programs) + (forward ? 1 : -1); 9246 if (idx >= obj->nr_programs || idx < 0) 9247 return NULL; 9248 return &obj->programs[idx]; 9249 } 9250 9251 struct bpf_program * 9252 bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prev) 9253 { 9254 struct bpf_program *prog = prev; 9255 9256 do { 9257 prog = __bpf_program__iter(prog, obj, true); 9258 } while (prog && prog_is_subprog(obj, prog)); 9259 9260 return prog; 9261 } 9262 9263 struct bpf_program * 9264 bpf_object__prev_program(const struct bpf_object *obj, struct bpf_program *next) 9265 { 9266 struct bpf_program *prog = next; 9267 9268 do { 9269 prog = __bpf_program__iter(prog, obj, false); 9270 } while (prog && prog_is_subprog(obj, prog)); 9271 9272 return prog; 9273 } 9274 9275 void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex) 9276 { 9277 prog->prog_ifindex = ifindex; 9278 } 9279 9280 const char *bpf_program__name(const struct bpf_program *prog) 9281 { 9282 return prog->name; 9283 } 9284 9285 const char *bpf_program__section_name(const struct bpf_program *prog) 9286 { 9287 return prog->sec_name; 9288 } 9289 9290 bool bpf_program__autoload(const struct bpf_program *prog) 9291 { 9292 return prog->autoload; 9293 } 9294 9295 int bpf_program__set_autoload(struct bpf_program *prog, bool autoload) 9296 { 9297 if (prog->obj->state >= OBJ_LOADED) 9298 return libbpf_err(-EINVAL); 9299 9300 prog->autoload = autoload; 9301 return 0; 9302 } 9303 9304 bool bpf_program__autoattach(const struct bpf_program *prog) 9305 { 9306 return prog->autoattach; 9307 } 9308 9309 void bpf_program__set_autoattach(struct bpf_program *prog, bool autoattach) 9310 { 9311 prog->autoattach = autoattach; 9312 } 9313 9314 const struct bpf_insn *bpf_program__insns(const struct bpf_program *prog) 9315 { 9316 return prog->insns; 9317 } 9318 9319 size_t bpf_program__insn_cnt(const struct bpf_program *prog) 9320 { 9321 return prog->insns_cnt; 9322 } 9323 9324 int bpf_program__set_insns(struct bpf_program *prog, 9325 struct bpf_insn *new_insns, size_t new_insn_cnt) 9326 { 9327 struct bpf_insn *insns; 9328 9329 if (prog->obj->state >= OBJ_LOADED) 9330 return libbpf_err(-EBUSY); 9331 9332 insns = libbpf_reallocarray(prog->insns, new_insn_cnt, sizeof(*insns)); 9333 /* NULL is a valid return from reallocarray if the new count is zero */ 9334 if (!insns && new_insn_cnt) { 9335 pr_warn("prog '%s': failed to realloc prog code\n", prog->name); 9336 return libbpf_err(-ENOMEM); 9337 } 9338 memcpy(insns, new_insns, new_insn_cnt * sizeof(*insns)); 9339 9340 prog->insns = insns; 9341 prog->insns_cnt = new_insn_cnt; 9342 return 0; 9343 } 9344 9345 int bpf_program__fd(const struct bpf_program *prog) 9346 { 9347 if (!prog) 9348 return libbpf_err(-EINVAL); 9349 9350 if (prog->fd < 0) 9351 return libbpf_err(-ENOENT); 9352 9353 return prog->fd; 9354 } 9355 9356 __alias(bpf_program__type) 9357 enum bpf_prog_type bpf_program__get_type(const struct bpf_program *prog); 9358 9359 enum bpf_prog_type bpf_program__type(const struct bpf_program *prog) 9360 { 9361 return prog->type; 9362 } 9363 9364 static size_t custom_sec_def_cnt; 9365 static struct bpf_sec_def *custom_sec_defs; 9366 static struct bpf_sec_def custom_fallback_def; 9367 static bool has_custom_fallback_def; 9368 static int last_custom_sec_def_handler_id; 9369 9370 int bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type) 9371 { 9372 if (prog->obj->state >= OBJ_LOADED) 9373 return libbpf_err(-EBUSY); 9374 9375 /* if type is not changed, do nothing */ 9376 if (prog->type == type) 9377 return 0; 9378 9379 prog->type = type; 9380 9381 /* If a program type was changed, we need to reset associated SEC() 9382 * handler, as it will be invalid now. The only exception is a generic 9383 * fallback handler, which by definition is program type-agnostic and 9384 * is a catch-all custom handler, optionally set by the application, 9385 * so should be able to handle any type of BPF program. 9386 */ 9387 if (prog->sec_def != &custom_fallback_def) 9388 prog->sec_def = NULL; 9389 return 0; 9390 } 9391 9392 __alias(bpf_program__expected_attach_type) 9393 enum bpf_attach_type bpf_program__get_expected_attach_type(const struct bpf_program *prog); 9394 9395 enum bpf_attach_type bpf_program__expected_attach_type(const struct bpf_program *prog) 9396 { 9397 return prog->expected_attach_type; 9398 } 9399 9400 int bpf_program__set_expected_attach_type(struct bpf_program *prog, 9401 enum bpf_attach_type type) 9402 { 9403 if (prog->obj->state >= OBJ_LOADED) 9404 return libbpf_err(-EBUSY); 9405 9406 prog->expected_attach_type = type; 9407 return 0; 9408 } 9409 9410 __u32 bpf_program__flags(const struct bpf_program *prog) 9411 { 9412 return prog->prog_flags; 9413 } 9414 9415 int bpf_program__set_flags(struct bpf_program *prog, __u32 flags) 9416 { 9417 if (prog->obj->state >= OBJ_LOADED) 9418 return libbpf_err(-EBUSY); 9419 9420 prog->prog_flags = flags; 9421 return 0; 9422 } 9423 9424 __u32 bpf_program__log_level(const struct bpf_program *prog) 9425 { 9426 return prog->log_level; 9427 } 9428 9429 int bpf_program__set_log_level(struct bpf_program *prog, __u32 log_level) 9430 { 9431 if (prog->obj->state >= OBJ_LOADED) 9432 return libbpf_err(-EBUSY); 9433 9434 prog->log_level = log_level; 9435 return 0; 9436 } 9437 9438 const char *bpf_program__log_buf(const struct bpf_program *prog, size_t *log_size) 9439 { 9440 *log_size = prog->log_size; 9441 return prog->log_buf; 9442 } 9443 9444 int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, size_t log_size) 9445 { 9446 if (log_size && !log_buf) 9447 return libbpf_err(-EINVAL); 9448 if (prog->log_size > UINT_MAX) 9449 return libbpf_err(-EINVAL); 9450 if (prog->obj->state >= OBJ_LOADED) 9451 return libbpf_err(-EBUSY); 9452 9453 prog->log_buf = log_buf; 9454 prog->log_size = log_size; 9455 return 0; 9456 } 9457 9458 #define SEC_DEF(sec_pfx, ptype, atype, flags, ...) { \ 9459 .sec = (char *)sec_pfx, \ 9460 .prog_type = BPF_PROG_TYPE_##ptype, \ 9461 .expected_attach_type = atype, \ 9462 .cookie = (long)(flags), \ 9463 .prog_prepare_load_fn = libbpf_prepare_prog_load, \ 9464 __VA_ARGS__ \ 9465 } 9466 9467 static int attach_kprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link); 9468 static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link); 9469 static int attach_ksyscall(const struct bpf_program *prog, long cookie, struct bpf_link **link); 9470 static int attach_usdt(const struct bpf_program *prog, long cookie, struct bpf_link **link); 9471 static int attach_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link); 9472 static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link); 9473 static int attach_trace(const struct bpf_program *prog, long cookie, struct bpf_link **link); 9474 static int attach_kprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link); 9475 static int attach_kprobe_session(const struct bpf_program *prog, long cookie, struct bpf_link **link); 9476 static int attach_uprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link); 9477 static int attach_lsm(const struct bpf_program *prog, long cookie, struct bpf_link **link); 9478 static int attach_iter(const struct bpf_program *prog, long cookie, struct bpf_link **link); 9479 9480 static const struct bpf_sec_def section_defs[] = { 9481 SEC_DEF("socket", SOCKET_FILTER, 0, SEC_NONE), 9482 SEC_DEF("sk_reuseport/migrate", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, SEC_ATTACHABLE), 9483 SEC_DEF("sk_reuseport", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT, SEC_ATTACHABLE), 9484 SEC_DEF("kprobe+", KPROBE, 0, SEC_NONE, attach_kprobe), 9485 SEC_DEF("uprobe+", KPROBE, 0, SEC_NONE, attach_uprobe), 9486 SEC_DEF("uprobe.s+", KPROBE, 0, SEC_SLEEPABLE, attach_uprobe), 9487 SEC_DEF("kretprobe+", KPROBE, 0, SEC_NONE, attach_kprobe), 9488 SEC_DEF("uretprobe+", KPROBE, 0, SEC_NONE, attach_uprobe), 9489 SEC_DEF("uretprobe.s+", KPROBE, 0, SEC_SLEEPABLE, attach_uprobe), 9490 SEC_DEF("kprobe.multi+", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi), 9491 SEC_DEF("kretprobe.multi+", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi), 9492 SEC_DEF("kprobe.session+", KPROBE, BPF_TRACE_KPROBE_SESSION, SEC_NONE, attach_kprobe_session), 9493 SEC_DEF("uprobe.multi+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_NONE, attach_uprobe_multi), 9494 SEC_DEF("uretprobe.multi+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_NONE, attach_uprobe_multi), 9495 SEC_DEF("uprobe.session+", KPROBE, BPF_TRACE_UPROBE_SESSION, SEC_NONE, attach_uprobe_multi), 9496 SEC_DEF("uprobe.multi.s+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_SLEEPABLE, attach_uprobe_multi), 9497 SEC_DEF("uretprobe.multi.s+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_SLEEPABLE, attach_uprobe_multi), 9498 SEC_DEF("uprobe.session.s+", KPROBE, BPF_TRACE_UPROBE_SESSION, SEC_SLEEPABLE, attach_uprobe_multi), 9499 SEC_DEF("ksyscall+", KPROBE, 0, SEC_NONE, attach_ksyscall), 9500 SEC_DEF("kretsyscall+", KPROBE, 0, SEC_NONE, attach_ksyscall), 9501 SEC_DEF("usdt+", KPROBE, 0, SEC_USDT, attach_usdt), 9502 SEC_DEF("usdt.s+", KPROBE, 0, SEC_USDT | SEC_SLEEPABLE, attach_usdt), 9503 SEC_DEF("tc/ingress", SCHED_CLS, BPF_TCX_INGRESS, SEC_NONE), /* alias for tcx */ 9504 SEC_DEF("tc/egress", SCHED_CLS, BPF_TCX_EGRESS, SEC_NONE), /* alias for tcx */ 9505 SEC_DEF("tcx/ingress", SCHED_CLS, BPF_TCX_INGRESS, SEC_NONE), 9506 SEC_DEF("tcx/egress", SCHED_CLS, BPF_TCX_EGRESS, SEC_NONE), 9507 SEC_DEF("tc", SCHED_CLS, 0, SEC_NONE), /* deprecated / legacy, use tcx */ 9508 SEC_DEF("classifier", SCHED_CLS, 0, SEC_NONE), /* deprecated / legacy, use tcx */ 9509 SEC_DEF("action", SCHED_ACT, 0, SEC_NONE), /* deprecated / legacy, use tcx */ 9510 SEC_DEF("netkit/primary", SCHED_CLS, BPF_NETKIT_PRIMARY, SEC_NONE), 9511 SEC_DEF("netkit/peer", SCHED_CLS, BPF_NETKIT_PEER, SEC_NONE), 9512 SEC_DEF("tracepoint+", TRACEPOINT, 0, SEC_NONE, attach_tp), 9513 SEC_DEF("tp+", TRACEPOINT, 0, SEC_NONE, attach_tp), 9514 SEC_DEF("raw_tracepoint+", RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp), 9515 SEC_DEF("raw_tp+", RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp), 9516 SEC_DEF("raw_tracepoint.w+", RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp), 9517 SEC_DEF("raw_tp.w+", RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp), 9518 SEC_DEF("tp_btf+", TRACING, BPF_TRACE_RAW_TP, SEC_ATTACH_BTF, attach_trace), 9519 SEC_DEF("fentry+", TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF, attach_trace), 9520 SEC_DEF("fmod_ret+", TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF, attach_trace), 9521 SEC_DEF("fexit+", TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF, attach_trace), 9522 SEC_DEF("fentry.s+", TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), 9523 SEC_DEF("fmod_ret.s+", TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), 9524 SEC_DEF("fexit.s+", TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), 9525 SEC_DEF("freplace+", EXT, 0, SEC_ATTACH_BTF, attach_trace), 9526 SEC_DEF("lsm+", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF, attach_lsm), 9527 SEC_DEF("lsm.s+", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_lsm), 9528 SEC_DEF("lsm_cgroup+", LSM, BPF_LSM_CGROUP, SEC_ATTACH_BTF), 9529 SEC_DEF("iter+", TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF, attach_iter), 9530 SEC_DEF("iter.s+", TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_iter), 9531 SEC_DEF("syscall", SYSCALL, 0, SEC_SLEEPABLE), 9532 SEC_DEF("xdp.frags/devmap", XDP, BPF_XDP_DEVMAP, SEC_XDP_FRAGS), 9533 SEC_DEF("xdp/devmap", XDP, BPF_XDP_DEVMAP, SEC_ATTACHABLE), 9534 SEC_DEF("xdp.frags/cpumap", XDP, BPF_XDP_CPUMAP, SEC_XDP_FRAGS), 9535 SEC_DEF("xdp/cpumap", XDP, BPF_XDP_CPUMAP, SEC_ATTACHABLE), 9536 SEC_DEF("xdp.frags", XDP, BPF_XDP, SEC_XDP_FRAGS), 9537 SEC_DEF("xdp", XDP, BPF_XDP, SEC_ATTACHABLE_OPT), 9538 SEC_DEF("perf_event", PERF_EVENT, 0, SEC_NONE), 9539 SEC_DEF("lwt_in", LWT_IN, 0, SEC_NONE), 9540 SEC_DEF("lwt_out", LWT_OUT, 0, SEC_NONE), 9541 SEC_DEF("lwt_xmit", LWT_XMIT, 0, SEC_NONE), 9542 SEC_DEF("lwt_seg6local", LWT_SEG6LOCAL, 0, SEC_NONE), 9543 SEC_DEF("sockops", SOCK_OPS, BPF_CGROUP_SOCK_OPS, SEC_ATTACHABLE_OPT), 9544 SEC_DEF("sk_skb/stream_parser", SK_SKB, BPF_SK_SKB_STREAM_PARSER, SEC_ATTACHABLE_OPT), 9545 SEC_DEF("sk_skb/stream_verdict",SK_SKB, BPF_SK_SKB_STREAM_VERDICT, SEC_ATTACHABLE_OPT), 9546 SEC_DEF("sk_skb/verdict", SK_SKB, BPF_SK_SKB_VERDICT, SEC_ATTACHABLE_OPT), 9547 SEC_DEF("sk_skb", SK_SKB, 0, SEC_NONE), 9548 SEC_DEF("sk_msg", SK_MSG, BPF_SK_MSG_VERDICT, SEC_ATTACHABLE_OPT), 9549 SEC_DEF("lirc_mode2", LIRC_MODE2, BPF_LIRC_MODE2, SEC_ATTACHABLE_OPT), 9550 SEC_DEF("flow_dissector", FLOW_DISSECTOR, BPF_FLOW_DISSECTOR, SEC_ATTACHABLE_OPT), 9551 SEC_DEF("cgroup_skb/ingress", CGROUP_SKB, BPF_CGROUP_INET_INGRESS, SEC_ATTACHABLE_OPT), 9552 SEC_DEF("cgroup_skb/egress", CGROUP_SKB, BPF_CGROUP_INET_EGRESS, SEC_ATTACHABLE_OPT), 9553 SEC_DEF("cgroup/skb", CGROUP_SKB, 0, SEC_NONE), 9554 SEC_DEF("cgroup/sock_create", CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE), 9555 SEC_DEF("cgroup/sock_release", CGROUP_SOCK, BPF_CGROUP_INET_SOCK_RELEASE, SEC_ATTACHABLE), 9556 SEC_DEF("cgroup/sock", CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE_OPT), 9557 SEC_DEF("cgroup/post_bind4", CGROUP_SOCK, BPF_CGROUP_INET4_POST_BIND, SEC_ATTACHABLE), 9558 SEC_DEF("cgroup/post_bind6", CGROUP_SOCK, BPF_CGROUP_INET6_POST_BIND, SEC_ATTACHABLE), 9559 SEC_DEF("cgroup/bind4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_BIND, SEC_ATTACHABLE), 9560 SEC_DEF("cgroup/bind6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_BIND, SEC_ATTACHABLE), 9561 SEC_DEF("cgroup/connect4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_CONNECT, SEC_ATTACHABLE), 9562 SEC_DEF("cgroup/connect6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_CONNECT, SEC_ATTACHABLE), 9563 SEC_DEF("cgroup/connect_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_CONNECT, SEC_ATTACHABLE), 9564 SEC_DEF("cgroup/sendmsg4", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_SENDMSG, SEC_ATTACHABLE), 9565 SEC_DEF("cgroup/sendmsg6", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_SENDMSG, SEC_ATTACHABLE), 9566 SEC_DEF("cgroup/sendmsg_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_SENDMSG, SEC_ATTACHABLE), 9567 SEC_DEF("cgroup/recvmsg4", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_RECVMSG, SEC_ATTACHABLE), 9568 SEC_DEF("cgroup/recvmsg6", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_RECVMSG, SEC_ATTACHABLE), 9569 SEC_DEF("cgroup/recvmsg_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_RECVMSG, SEC_ATTACHABLE), 9570 SEC_DEF("cgroup/getpeername4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETPEERNAME, SEC_ATTACHABLE), 9571 SEC_DEF("cgroup/getpeername6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETPEERNAME, SEC_ATTACHABLE), 9572 SEC_DEF("cgroup/getpeername_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_GETPEERNAME, SEC_ATTACHABLE), 9573 SEC_DEF("cgroup/getsockname4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETSOCKNAME, SEC_ATTACHABLE), 9574 SEC_DEF("cgroup/getsockname6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETSOCKNAME, SEC_ATTACHABLE), 9575 SEC_DEF("cgroup/getsockname_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_GETSOCKNAME, SEC_ATTACHABLE), 9576 SEC_DEF("cgroup/sysctl", CGROUP_SYSCTL, BPF_CGROUP_SYSCTL, SEC_ATTACHABLE), 9577 SEC_DEF("cgroup/getsockopt", CGROUP_SOCKOPT, BPF_CGROUP_GETSOCKOPT, SEC_ATTACHABLE), 9578 SEC_DEF("cgroup/setsockopt", CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT, SEC_ATTACHABLE), 9579 SEC_DEF("cgroup/dev", CGROUP_DEVICE, BPF_CGROUP_DEVICE, SEC_ATTACHABLE_OPT), 9580 SEC_DEF("struct_ops+", STRUCT_OPS, 0, SEC_NONE), 9581 SEC_DEF("struct_ops.s+", STRUCT_OPS, 0, SEC_SLEEPABLE), 9582 SEC_DEF("sk_lookup", SK_LOOKUP, BPF_SK_LOOKUP, SEC_ATTACHABLE), 9583 SEC_DEF("netfilter", NETFILTER, BPF_NETFILTER, SEC_NONE), 9584 }; 9585 9586 int libbpf_register_prog_handler(const char *sec, 9587 enum bpf_prog_type prog_type, 9588 enum bpf_attach_type exp_attach_type, 9589 const struct libbpf_prog_handler_opts *opts) 9590 { 9591 struct bpf_sec_def *sec_def; 9592 9593 if (!OPTS_VALID(opts, libbpf_prog_handler_opts)) 9594 return libbpf_err(-EINVAL); 9595 9596 if (last_custom_sec_def_handler_id == INT_MAX) /* prevent overflow */ 9597 return libbpf_err(-E2BIG); 9598 9599 if (sec) { 9600 sec_def = libbpf_reallocarray(custom_sec_defs, custom_sec_def_cnt + 1, 9601 sizeof(*sec_def)); 9602 if (!sec_def) 9603 return libbpf_err(-ENOMEM); 9604 9605 custom_sec_defs = sec_def; 9606 sec_def = &custom_sec_defs[custom_sec_def_cnt]; 9607 } else { 9608 if (has_custom_fallback_def) 9609 return libbpf_err(-EBUSY); 9610 9611 sec_def = &custom_fallback_def; 9612 } 9613 9614 sec_def->sec = sec ? strdup(sec) : NULL; 9615 if (sec && !sec_def->sec) 9616 return libbpf_err(-ENOMEM); 9617 9618 sec_def->prog_type = prog_type; 9619 sec_def->expected_attach_type = exp_attach_type; 9620 sec_def->cookie = OPTS_GET(opts, cookie, 0); 9621 9622 sec_def->prog_setup_fn = OPTS_GET(opts, prog_setup_fn, NULL); 9623 sec_def->prog_prepare_load_fn = OPTS_GET(opts, prog_prepare_load_fn, NULL); 9624 sec_def->prog_attach_fn = OPTS_GET(opts, prog_attach_fn, NULL); 9625 9626 sec_def->handler_id = ++last_custom_sec_def_handler_id; 9627 9628 if (sec) 9629 custom_sec_def_cnt++; 9630 else 9631 has_custom_fallback_def = true; 9632 9633 return sec_def->handler_id; 9634 } 9635 9636 int libbpf_unregister_prog_handler(int handler_id) 9637 { 9638 struct bpf_sec_def *sec_defs; 9639 int i; 9640 9641 if (handler_id <= 0) 9642 return libbpf_err(-EINVAL); 9643 9644 if (has_custom_fallback_def && custom_fallback_def.handler_id == handler_id) { 9645 memset(&custom_fallback_def, 0, sizeof(custom_fallback_def)); 9646 has_custom_fallback_def = false; 9647 return 0; 9648 } 9649 9650 for (i = 0; i < custom_sec_def_cnt; i++) { 9651 if (custom_sec_defs[i].handler_id == handler_id) 9652 break; 9653 } 9654 9655 if (i == custom_sec_def_cnt) 9656 return libbpf_err(-ENOENT); 9657 9658 free(custom_sec_defs[i].sec); 9659 for (i = i + 1; i < custom_sec_def_cnt; i++) 9660 custom_sec_defs[i - 1] = custom_sec_defs[i]; 9661 custom_sec_def_cnt--; 9662 9663 /* try to shrink the array, but it's ok if we couldn't */ 9664 sec_defs = libbpf_reallocarray(custom_sec_defs, custom_sec_def_cnt, sizeof(*sec_defs)); 9665 /* if new count is zero, reallocarray can return a valid NULL result; 9666 * in this case the previous pointer will be freed, so we *have to* 9667 * reassign old pointer to the new value (even if it's NULL) 9668 */ 9669 if (sec_defs || custom_sec_def_cnt == 0) 9670 custom_sec_defs = sec_defs; 9671 9672 return 0; 9673 } 9674 9675 static bool sec_def_matches(const struct bpf_sec_def *sec_def, const char *sec_name) 9676 { 9677 size_t len = strlen(sec_def->sec); 9678 9679 /* "type/" always has to have proper SEC("type/extras") form */ 9680 if (sec_def->sec[len - 1] == '/') { 9681 if (str_has_pfx(sec_name, sec_def->sec)) 9682 return true; 9683 return false; 9684 } 9685 9686 /* "type+" means it can be either exact SEC("type") or 9687 * well-formed SEC("type/extras") with proper '/' separator 9688 */ 9689 if (sec_def->sec[len - 1] == '+') { 9690 len--; 9691 /* not even a prefix */ 9692 if (strncmp(sec_name, sec_def->sec, len) != 0) 9693 return false; 9694 /* exact match or has '/' separator */ 9695 if (sec_name[len] == '\0' || sec_name[len] == '/') 9696 return true; 9697 return false; 9698 } 9699 9700 return strcmp(sec_name, sec_def->sec) == 0; 9701 } 9702 9703 static const struct bpf_sec_def *find_sec_def(const char *sec_name) 9704 { 9705 const struct bpf_sec_def *sec_def; 9706 int i, n; 9707 9708 n = custom_sec_def_cnt; 9709 for (i = 0; i < n; i++) { 9710 sec_def = &custom_sec_defs[i]; 9711 if (sec_def_matches(sec_def, sec_name)) 9712 return sec_def; 9713 } 9714 9715 n = ARRAY_SIZE(section_defs); 9716 for (i = 0; i < n; i++) { 9717 sec_def = §ion_defs[i]; 9718 if (sec_def_matches(sec_def, sec_name)) 9719 return sec_def; 9720 } 9721 9722 if (has_custom_fallback_def) 9723 return &custom_fallback_def; 9724 9725 return NULL; 9726 } 9727 9728 #define MAX_TYPE_NAME_SIZE 32 9729 9730 static char *libbpf_get_type_names(bool attach_type) 9731 { 9732 int i, len = ARRAY_SIZE(section_defs) * MAX_TYPE_NAME_SIZE; 9733 char *buf; 9734 9735 buf = malloc(len); 9736 if (!buf) 9737 return NULL; 9738 9739 buf[0] = '\0'; 9740 /* Forge string buf with all available names */ 9741 for (i = 0; i < ARRAY_SIZE(section_defs); i++) { 9742 const struct bpf_sec_def *sec_def = §ion_defs[i]; 9743 9744 if (attach_type) { 9745 if (sec_def->prog_prepare_load_fn != libbpf_prepare_prog_load) 9746 continue; 9747 9748 if (!(sec_def->cookie & SEC_ATTACHABLE)) 9749 continue; 9750 } 9751 9752 if (strlen(buf) + strlen(section_defs[i].sec) + 2 > len) { 9753 free(buf); 9754 return NULL; 9755 } 9756 strcat(buf, " "); 9757 strcat(buf, section_defs[i].sec); 9758 } 9759 9760 return buf; 9761 } 9762 9763 int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type, 9764 enum bpf_attach_type *expected_attach_type) 9765 { 9766 const struct bpf_sec_def *sec_def; 9767 char *type_names; 9768 9769 if (!name) 9770 return libbpf_err(-EINVAL); 9771 9772 sec_def = find_sec_def(name); 9773 if (sec_def) { 9774 *prog_type = sec_def->prog_type; 9775 *expected_attach_type = sec_def->expected_attach_type; 9776 return 0; 9777 } 9778 9779 pr_debug("failed to guess program type from ELF section '%s'\n", name); 9780 type_names = libbpf_get_type_names(false); 9781 if (type_names != NULL) { 9782 pr_debug("supported section(type) names are:%s\n", type_names); 9783 free(type_names); 9784 } 9785 9786 return libbpf_err(-ESRCH); 9787 } 9788 9789 const char *libbpf_bpf_attach_type_str(enum bpf_attach_type t) 9790 { 9791 if (t < 0 || t >= ARRAY_SIZE(attach_type_name)) 9792 return NULL; 9793 9794 return attach_type_name[t]; 9795 } 9796 9797 const char *libbpf_bpf_link_type_str(enum bpf_link_type t) 9798 { 9799 if (t < 0 || t >= ARRAY_SIZE(link_type_name)) 9800 return NULL; 9801 9802 return link_type_name[t]; 9803 } 9804 9805 const char *libbpf_bpf_map_type_str(enum bpf_map_type t) 9806 { 9807 if (t < 0 || t >= ARRAY_SIZE(map_type_name)) 9808 return NULL; 9809 9810 return map_type_name[t]; 9811 } 9812 9813 const char *libbpf_bpf_prog_type_str(enum bpf_prog_type t) 9814 { 9815 if (t < 0 || t >= ARRAY_SIZE(prog_type_name)) 9816 return NULL; 9817 9818 return prog_type_name[t]; 9819 } 9820 9821 static struct bpf_map *find_struct_ops_map_by_offset(struct bpf_object *obj, 9822 int sec_idx, 9823 size_t offset) 9824 { 9825 struct bpf_map *map; 9826 size_t i; 9827 9828 for (i = 0; i < obj->nr_maps; i++) { 9829 map = &obj->maps[i]; 9830 if (!bpf_map__is_struct_ops(map)) 9831 continue; 9832 if (map->sec_idx == sec_idx && 9833 map->sec_offset <= offset && 9834 offset - map->sec_offset < map->def.value_size) 9835 return map; 9836 } 9837 9838 return NULL; 9839 } 9840 9841 /* Collect the reloc from ELF, populate the st_ops->progs[], and update 9842 * st_ops->data for shadow type. 9843 */ 9844 static int bpf_object__collect_st_ops_relos(struct bpf_object *obj, 9845 Elf64_Shdr *shdr, Elf_Data *data) 9846 { 9847 const struct btf_type *type; 9848 const struct btf_member *member; 9849 struct bpf_struct_ops *st_ops; 9850 struct bpf_program *prog; 9851 unsigned int shdr_idx; 9852 const struct btf *btf; 9853 struct bpf_map *map; 9854 unsigned int moff, insn_idx; 9855 const char *name; 9856 __u32 member_idx; 9857 Elf64_Sym *sym; 9858 Elf64_Rel *rel; 9859 int i, nrels; 9860 9861 btf = obj->btf; 9862 nrels = shdr->sh_size / shdr->sh_entsize; 9863 for (i = 0; i < nrels; i++) { 9864 rel = elf_rel_by_idx(data, i); 9865 if (!rel) { 9866 pr_warn("struct_ops reloc: failed to get %d reloc\n", i); 9867 return -LIBBPF_ERRNO__FORMAT; 9868 } 9869 9870 sym = elf_sym_by_idx(obj, ELF64_R_SYM(rel->r_info)); 9871 if (!sym) { 9872 pr_warn("struct_ops reloc: symbol %zx not found\n", 9873 (size_t)ELF64_R_SYM(rel->r_info)); 9874 return -LIBBPF_ERRNO__FORMAT; 9875 } 9876 9877 name = elf_sym_str(obj, sym->st_name) ?: "<?>"; 9878 map = find_struct_ops_map_by_offset(obj, shdr->sh_info, rel->r_offset); 9879 if (!map) { 9880 pr_warn("struct_ops reloc: cannot find map at rel->r_offset %zu\n", 9881 (size_t)rel->r_offset); 9882 return -EINVAL; 9883 } 9884 9885 moff = rel->r_offset - map->sec_offset; 9886 shdr_idx = sym->st_shndx; 9887 st_ops = map->st_ops; 9888 pr_debug("struct_ops reloc %s: for %lld value %lld shdr_idx %u rel->r_offset %zu map->sec_offset %zu name %d (\'%s\')\n", 9889 map->name, 9890 (long long)(rel->r_info >> 32), 9891 (long long)sym->st_value, 9892 shdr_idx, (size_t)rel->r_offset, 9893 map->sec_offset, sym->st_name, name); 9894 9895 if (shdr_idx >= SHN_LORESERVE) { 9896 pr_warn("struct_ops reloc %s: rel->r_offset %zu shdr_idx %u unsupported non-static function\n", 9897 map->name, (size_t)rel->r_offset, shdr_idx); 9898 return -LIBBPF_ERRNO__RELOC; 9899 } 9900 if (sym->st_value % BPF_INSN_SZ) { 9901 pr_warn("struct_ops reloc %s: invalid target program offset %llu\n", 9902 map->name, (unsigned long long)sym->st_value); 9903 return -LIBBPF_ERRNO__FORMAT; 9904 } 9905 insn_idx = sym->st_value / BPF_INSN_SZ; 9906 9907 type = btf__type_by_id(btf, st_ops->type_id); 9908 member = find_member_by_offset(type, moff * 8); 9909 if (!member) { 9910 pr_warn("struct_ops reloc %s: cannot find member at moff %u\n", 9911 map->name, moff); 9912 return -EINVAL; 9913 } 9914 member_idx = member - btf_members(type); 9915 name = btf__name_by_offset(btf, member->name_off); 9916 9917 if (!resolve_func_ptr(btf, member->type, NULL)) { 9918 pr_warn("struct_ops reloc %s: cannot relocate non func ptr %s\n", 9919 map->name, name); 9920 return -EINVAL; 9921 } 9922 9923 prog = find_prog_by_sec_insn(obj, shdr_idx, insn_idx); 9924 if (!prog) { 9925 pr_warn("struct_ops reloc %s: cannot find prog at shdr_idx %u to relocate func ptr %s\n", 9926 map->name, shdr_idx, name); 9927 return -EINVAL; 9928 } 9929 9930 /* prevent the use of BPF prog with invalid type */ 9931 if (prog->type != BPF_PROG_TYPE_STRUCT_OPS) { 9932 pr_warn("struct_ops reloc %s: prog %s is not struct_ops BPF program\n", 9933 map->name, prog->name); 9934 return -EINVAL; 9935 } 9936 9937 st_ops->progs[member_idx] = prog; 9938 9939 /* st_ops->data will be exposed to users, being returned by 9940 * bpf_map__initial_value() as a pointer to the shadow 9941 * type. All function pointers in the original struct type 9942 * should be converted to a pointer to struct bpf_program 9943 * in the shadow type. 9944 */ 9945 *((struct bpf_program **)(st_ops->data + moff)) = prog; 9946 } 9947 9948 return 0; 9949 } 9950 9951 #define BTF_TRACE_PREFIX "btf_trace_" 9952 #define BTF_LSM_PREFIX "bpf_lsm_" 9953 #define BTF_ITER_PREFIX "bpf_iter_" 9954 #define BTF_MAX_NAME_SIZE 128 9955 9956 void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type, 9957 const char **prefix, int *kind) 9958 { 9959 switch (attach_type) { 9960 case BPF_TRACE_RAW_TP: 9961 *prefix = BTF_TRACE_PREFIX; 9962 *kind = BTF_KIND_TYPEDEF; 9963 break; 9964 case BPF_LSM_MAC: 9965 case BPF_LSM_CGROUP: 9966 *prefix = BTF_LSM_PREFIX; 9967 *kind = BTF_KIND_FUNC; 9968 break; 9969 case BPF_TRACE_ITER: 9970 *prefix = BTF_ITER_PREFIX; 9971 *kind = BTF_KIND_FUNC; 9972 break; 9973 default: 9974 *prefix = ""; 9975 *kind = BTF_KIND_FUNC; 9976 } 9977 } 9978 9979 static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix, 9980 const char *name, __u32 kind) 9981 { 9982 char btf_type_name[BTF_MAX_NAME_SIZE]; 9983 int ret; 9984 9985 ret = snprintf(btf_type_name, sizeof(btf_type_name), 9986 "%s%s", prefix, name); 9987 /* snprintf returns the number of characters written excluding the 9988 * terminating null. So, if >= BTF_MAX_NAME_SIZE are written, it 9989 * indicates truncation. 9990 */ 9991 if (ret < 0 || ret >= sizeof(btf_type_name)) 9992 return -ENAMETOOLONG; 9993 return btf__find_by_name_kind(btf, btf_type_name, kind); 9994 } 9995 9996 static inline int find_attach_btf_id(struct btf *btf, const char *name, 9997 enum bpf_attach_type attach_type) 9998 { 9999 const char *prefix; 10000 int kind; 10001 10002 btf_get_kernel_prefix_kind(attach_type, &prefix, &kind); 10003 return find_btf_by_prefix_kind(btf, prefix, name, kind); 10004 } 10005 10006 int libbpf_find_vmlinux_btf_id(const char *name, 10007 enum bpf_attach_type attach_type) 10008 { 10009 struct btf *btf; 10010 int err; 10011 10012 btf = btf__load_vmlinux_btf(); 10013 err = libbpf_get_error(btf); 10014 if (err) { 10015 pr_warn("vmlinux BTF is not found\n"); 10016 return libbpf_err(err); 10017 } 10018 10019 err = find_attach_btf_id(btf, name, attach_type); 10020 if (err <= 0) 10021 pr_warn("%s is not found in vmlinux BTF\n", name); 10022 10023 btf__free(btf); 10024 return libbpf_err(err); 10025 } 10026 10027 static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd, int token_fd) 10028 { 10029 struct bpf_prog_info info; 10030 __u32 info_len = sizeof(info); 10031 struct btf *btf; 10032 int err; 10033 10034 memset(&info, 0, info_len); 10035 err = bpf_prog_get_info_by_fd(attach_prog_fd, &info, &info_len); 10036 if (err) { 10037 pr_warn("failed bpf_prog_get_info_by_fd for FD %d: %s\n", 10038 attach_prog_fd, errstr(err)); 10039 return err; 10040 } 10041 10042 err = -EINVAL; 10043 if (!info.btf_id) { 10044 pr_warn("The target program doesn't have BTF\n"); 10045 goto out; 10046 } 10047 btf = btf_load_from_kernel(info.btf_id, NULL, token_fd); 10048 err = libbpf_get_error(btf); 10049 if (err) { 10050 pr_warn("Failed to get BTF %d of the program: %s\n", info.btf_id, errstr(err)); 10051 goto out; 10052 } 10053 err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC); 10054 btf__free(btf); 10055 if (err <= 0) { 10056 pr_warn("%s is not found in prog's BTF\n", name); 10057 goto out; 10058 } 10059 out: 10060 return err; 10061 } 10062 10063 static int find_kernel_btf_id(struct bpf_object *obj, const char *attach_name, 10064 enum bpf_attach_type attach_type, 10065 int *btf_obj_fd, int *btf_type_id) 10066 { 10067 int ret, i, mod_len; 10068 const char *fn_name, *mod_name = NULL; 10069 10070 fn_name = strchr(attach_name, ':'); 10071 if (fn_name) { 10072 mod_name = attach_name; 10073 mod_len = fn_name - mod_name; 10074 fn_name++; 10075 } 10076 10077 if (!mod_name || strncmp(mod_name, "vmlinux", mod_len) == 0) { 10078 ret = find_attach_btf_id(obj->btf_vmlinux, 10079 mod_name ? fn_name : attach_name, 10080 attach_type); 10081 if (ret > 0) { 10082 *btf_obj_fd = 0; /* vmlinux BTF */ 10083 *btf_type_id = ret; 10084 return 0; 10085 } 10086 if (ret != -ENOENT) 10087 return ret; 10088 } 10089 10090 ret = load_module_btfs(obj); 10091 if (ret) 10092 return ret; 10093 10094 for (i = 0; i < obj->btf_module_cnt; i++) { 10095 const struct module_btf *mod = &obj->btf_modules[i]; 10096 10097 if (mod_name && strncmp(mod->name, mod_name, mod_len) != 0) 10098 continue; 10099 10100 ret = find_attach_btf_id(mod->btf, 10101 mod_name ? fn_name : attach_name, 10102 attach_type); 10103 if (ret > 0) { 10104 *btf_obj_fd = mod->fd; 10105 *btf_type_id = ret; 10106 return 0; 10107 } 10108 if (ret == -ENOENT) 10109 continue; 10110 10111 return ret; 10112 } 10113 10114 return -ESRCH; 10115 } 10116 10117 static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attach_name, 10118 int *btf_obj_fd, int *btf_type_id) 10119 { 10120 enum bpf_attach_type attach_type = prog->expected_attach_type; 10121 __u32 attach_prog_fd = prog->attach_prog_fd; 10122 int err = 0; 10123 10124 /* BPF program's BTF ID */ 10125 if (prog->type == BPF_PROG_TYPE_EXT || attach_prog_fd) { 10126 if (!attach_prog_fd) { 10127 pr_warn("prog '%s': attach program FD is not set\n", prog->name); 10128 return -EINVAL; 10129 } 10130 err = libbpf_find_prog_btf_id(attach_name, attach_prog_fd, prog->obj->token_fd); 10131 if (err < 0) { 10132 pr_warn("prog '%s': failed to find BPF program (FD %d) BTF ID for '%s': %s\n", 10133 prog->name, attach_prog_fd, attach_name, errstr(err)); 10134 return err; 10135 } 10136 *btf_obj_fd = 0; 10137 *btf_type_id = err; 10138 return 0; 10139 } 10140 10141 /* kernel/module BTF ID */ 10142 if (prog->obj->gen_loader) { 10143 bpf_gen__record_attach_target(prog->obj->gen_loader, attach_name, attach_type); 10144 *btf_obj_fd = 0; 10145 *btf_type_id = 1; 10146 } else { 10147 err = find_kernel_btf_id(prog->obj, attach_name, 10148 attach_type, btf_obj_fd, 10149 btf_type_id); 10150 } 10151 if (err) { 10152 pr_warn("prog '%s': failed to find kernel BTF type ID of '%s': %s\n", 10153 prog->name, attach_name, errstr(err)); 10154 return err; 10155 } 10156 return 0; 10157 } 10158 10159 int libbpf_attach_type_by_name(const char *name, 10160 enum bpf_attach_type *attach_type) 10161 { 10162 char *type_names; 10163 const struct bpf_sec_def *sec_def; 10164 10165 if (!name) 10166 return libbpf_err(-EINVAL); 10167 10168 sec_def = find_sec_def(name); 10169 if (!sec_def) { 10170 pr_debug("failed to guess attach type based on ELF section name '%s'\n", name); 10171 type_names = libbpf_get_type_names(true); 10172 if (type_names != NULL) { 10173 pr_debug("attachable section(type) names are:%s\n", type_names); 10174 free(type_names); 10175 } 10176 10177 return libbpf_err(-EINVAL); 10178 } 10179 10180 if (sec_def->prog_prepare_load_fn != libbpf_prepare_prog_load) 10181 return libbpf_err(-EINVAL); 10182 if (!(sec_def->cookie & SEC_ATTACHABLE)) 10183 return libbpf_err(-EINVAL); 10184 10185 *attach_type = sec_def->expected_attach_type; 10186 return 0; 10187 } 10188 10189 int bpf_map__fd(const struct bpf_map *map) 10190 { 10191 if (!map) 10192 return libbpf_err(-EINVAL); 10193 if (!map_is_created(map)) 10194 return -1; 10195 return map->fd; 10196 } 10197 10198 static bool map_uses_real_name(const struct bpf_map *map) 10199 { 10200 /* Since libbpf started to support custom .data.* and .rodata.* maps, 10201 * their user-visible name differs from kernel-visible name. Users see 10202 * such map's corresponding ELF section name as a map name. 10203 * This check distinguishes .data/.rodata from .data.* and .rodata.* 10204 * maps to know which name has to be returned to the user. 10205 */ 10206 if (map->libbpf_type == LIBBPF_MAP_DATA && strcmp(map->real_name, DATA_SEC) != 0) 10207 return true; 10208 if (map->libbpf_type == LIBBPF_MAP_RODATA && strcmp(map->real_name, RODATA_SEC) != 0) 10209 return true; 10210 return false; 10211 } 10212 10213 const char *bpf_map__name(const struct bpf_map *map) 10214 { 10215 if (!map) 10216 return NULL; 10217 10218 if (map_uses_real_name(map)) 10219 return map->real_name; 10220 10221 return map->name; 10222 } 10223 10224 enum bpf_map_type bpf_map__type(const struct bpf_map *map) 10225 { 10226 return map->def.type; 10227 } 10228 10229 int bpf_map__set_type(struct bpf_map *map, enum bpf_map_type type) 10230 { 10231 if (map_is_created(map)) 10232 return libbpf_err(-EBUSY); 10233 map->def.type = type; 10234 return 0; 10235 } 10236 10237 __u32 bpf_map__map_flags(const struct bpf_map *map) 10238 { 10239 return map->def.map_flags; 10240 } 10241 10242 int bpf_map__set_map_flags(struct bpf_map *map, __u32 flags) 10243 { 10244 if (map_is_created(map)) 10245 return libbpf_err(-EBUSY); 10246 map->def.map_flags = flags; 10247 return 0; 10248 } 10249 10250 __u64 bpf_map__map_extra(const struct bpf_map *map) 10251 { 10252 return map->map_extra; 10253 } 10254 10255 int bpf_map__set_map_extra(struct bpf_map *map, __u64 map_extra) 10256 { 10257 if (map_is_created(map)) 10258 return libbpf_err(-EBUSY); 10259 map->map_extra = map_extra; 10260 return 0; 10261 } 10262 10263 __u32 bpf_map__numa_node(const struct bpf_map *map) 10264 { 10265 return map->numa_node; 10266 } 10267 10268 int bpf_map__set_numa_node(struct bpf_map *map, __u32 numa_node) 10269 { 10270 if (map_is_created(map)) 10271 return libbpf_err(-EBUSY); 10272 map->numa_node = numa_node; 10273 return 0; 10274 } 10275 10276 __u32 bpf_map__key_size(const struct bpf_map *map) 10277 { 10278 return map->def.key_size; 10279 } 10280 10281 int bpf_map__set_key_size(struct bpf_map *map, __u32 size) 10282 { 10283 if (map_is_created(map)) 10284 return libbpf_err(-EBUSY); 10285 map->def.key_size = size; 10286 return 0; 10287 } 10288 10289 __u32 bpf_map__value_size(const struct bpf_map *map) 10290 { 10291 return map->def.value_size; 10292 } 10293 10294 static int map_btf_datasec_resize(struct bpf_map *map, __u32 size) 10295 { 10296 struct btf *btf; 10297 struct btf_type *datasec_type, *var_type; 10298 struct btf_var_secinfo *var; 10299 const struct btf_type *array_type; 10300 const struct btf_array *array; 10301 int vlen, element_sz, new_array_id; 10302 __u32 nr_elements; 10303 10304 /* check btf existence */ 10305 btf = bpf_object__btf(map->obj); 10306 if (!btf) 10307 return -ENOENT; 10308 10309 /* verify map is datasec */ 10310 datasec_type = btf_type_by_id(btf, bpf_map__btf_value_type_id(map)); 10311 if (!btf_is_datasec(datasec_type)) { 10312 pr_warn("map '%s': cannot be resized, map value type is not a datasec\n", 10313 bpf_map__name(map)); 10314 return -EINVAL; 10315 } 10316 10317 /* verify datasec has at least one var */ 10318 vlen = btf_vlen(datasec_type); 10319 if (vlen == 0) { 10320 pr_warn("map '%s': cannot be resized, map value datasec is empty\n", 10321 bpf_map__name(map)); 10322 return -EINVAL; 10323 } 10324 10325 /* verify last var in the datasec is an array */ 10326 var = &btf_var_secinfos(datasec_type)[vlen - 1]; 10327 var_type = btf_type_by_id(btf, var->type); 10328 array_type = skip_mods_and_typedefs(btf, var_type->type, NULL); 10329 if (!btf_is_array(array_type)) { 10330 pr_warn("map '%s': cannot be resized, last var must be an array\n", 10331 bpf_map__name(map)); 10332 return -EINVAL; 10333 } 10334 10335 /* verify request size aligns with array */ 10336 array = btf_array(array_type); 10337 element_sz = btf__resolve_size(btf, array->type); 10338 if (element_sz <= 0 || (size - var->offset) % element_sz != 0) { 10339 pr_warn("map '%s': cannot be resized, element size (%d) doesn't align with new total size (%u)\n", 10340 bpf_map__name(map), element_sz, size); 10341 return -EINVAL; 10342 } 10343 10344 /* create a new array based on the existing array, but with new length */ 10345 nr_elements = (size - var->offset) / element_sz; 10346 new_array_id = btf__add_array(btf, array->index_type, array->type, nr_elements); 10347 if (new_array_id < 0) 10348 return new_array_id; 10349 10350 /* adding a new btf type invalidates existing pointers to btf objects, 10351 * so refresh pointers before proceeding 10352 */ 10353 datasec_type = btf_type_by_id(btf, map->btf_value_type_id); 10354 var = &btf_var_secinfos(datasec_type)[vlen - 1]; 10355 var_type = btf_type_by_id(btf, var->type); 10356 10357 /* finally update btf info */ 10358 datasec_type->size = size; 10359 var->size = size - var->offset; 10360 var_type->type = new_array_id; 10361 10362 return 0; 10363 } 10364 10365 int bpf_map__set_value_size(struct bpf_map *map, __u32 size) 10366 { 10367 if (map_is_created(map)) 10368 return libbpf_err(-EBUSY); 10369 10370 if (map->mmaped) { 10371 size_t mmap_old_sz, mmap_new_sz; 10372 int err; 10373 10374 if (map->def.type != BPF_MAP_TYPE_ARRAY) 10375 return libbpf_err(-EOPNOTSUPP); 10376 10377 mmap_old_sz = bpf_map_mmap_sz(map); 10378 mmap_new_sz = array_map_mmap_sz(size, map->def.max_entries); 10379 err = bpf_map_mmap_resize(map, mmap_old_sz, mmap_new_sz); 10380 if (err) { 10381 pr_warn("map '%s': failed to resize memory-mapped region: %s\n", 10382 bpf_map__name(map), errstr(err)); 10383 return libbpf_err(err); 10384 } 10385 err = map_btf_datasec_resize(map, size); 10386 if (err && err != -ENOENT) { 10387 pr_warn("map '%s': failed to adjust resized BTF, clearing BTF key/value info: %s\n", 10388 bpf_map__name(map), errstr(err)); 10389 map->btf_value_type_id = 0; 10390 map->btf_key_type_id = 0; 10391 } 10392 } 10393 10394 map->def.value_size = size; 10395 return 0; 10396 } 10397 10398 __u32 bpf_map__btf_key_type_id(const struct bpf_map *map) 10399 { 10400 return map ? map->btf_key_type_id : 0; 10401 } 10402 10403 __u32 bpf_map__btf_value_type_id(const struct bpf_map *map) 10404 { 10405 return map ? map->btf_value_type_id : 0; 10406 } 10407 10408 int bpf_map__set_initial_value(struct bpf_map *map, 10409 const void *data, size_t size) 10410 { 10411 size_t actual_sz; 10412 10413 if (map_is_created(map)) 10414 return libbpf_err(-EBUSY); 10415 10416 if (!map->mmaped || map->libbpf_type == LIBBPF_MAP_KCONFIG) 10417 return libbpf_err(-EINVAL); 10418 10419 if (map->def.type == BPF_MAP_TYPE_ARENA) 10420 actual_sz = map->obj->arena_data_sz; 10421 else 10422 actual_sz = map->def.value_size; 10423 if (size != actual_sz) 10424 return libbpf_err(-EINVAL); 10425 10426 memcpy(map->mmaped, data, size); 10427 return 0; 10428 } 10429 10430 void *bpf_map__initial_value(const struct bpf_map *map, size_t *psize) 10431 { 10432 if (bpf_map__is_struct_ops(map)) { 10433 if (psize) 10434 *psize = map->def.value_size; 10435 return map->st_ops->data; 10436 } 10437 10438 if (!map->mmaped) 10439 return NULL; 10440 10441 if (map->def.type == BPF_MAP_TYPE_ARENA) 10442 *psize = map->obj->arena_data_sz; 10443 else 10444 *psize = map->def.value_size; 10445 10446 return map->mmaped; 10447 } 10448 10449 bool bpf_map__is_internal(const struct bpf_map *map) 10450 { 10451 return map->libbpf_type != LIBBPF_MAP_UNSPEC; 10452 } 10453 10454 __u32 bpf_map__ifindex(const struct bpf_map *map) 10455 { 10456 return map->map_ifindex; 10457 } 10458 10459 int bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex) 10460 { 10461 if (map_is_created(map)) 10462 return libbpf_err(-EBUSY); 10463 map->map_ifindex = ifindex; 10464 return 0; 10465 } 10466 10467 int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd) 10468 { 10469 if (!bpf_map_type__is_map_in_map(map->def.type)) { 10470 pr_warn("error: unsupported map type\n"); 10471 return libbpf_err(-EINVAL); 10472 } 10473 if (map->inner_map_fd != -1) { 10474 pr_warn("error: inner_map_fd already specified\n"); 10475 return libbpf_err(-EINVAL); 10476 } 10477 if (map->inner_map) { 10478 bpf_map__destroy(map->inner_map); 10479 zfree(&map->inner_map); 10480 } 10481 map->inner_map_fd = fd; 10482 return 0; 10483 } 10484 10485 static struct bpf_map * 10486 __bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i) 10487 { 10488 ssize_t idx; 10489 struct bpf_map *s, *e; 10490 10491 if (!obj || !obj->maps) 10492 return errno = EINVAL, NULL; 10493 10494 s = obj->maps; 10495 e = obj->maps + obj->nr_maps; 10496 10497 if ((m < s) || (m >= e)) { 10498 pr_warn("error in %s: map handler doesn't belong to object\n", 10499 __func__); 10500 return errno = EINVAL, NULL; 10501 } 10502 10503 idx = (m - obj->maps) + i; 10504 if (idx >= obj->nr_maps || idx < 0) 10505 return NULL; 10506 return &obj->maps[idx]; 10507 } 10508 10509 struct bpf_map * 10510 bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *prev) 10511 { 10512 if (prev == NULL && obj != NULL) 10513 return obj->maps; 10514 10515 return __bpf_map__iter(prev, obj, 1); 10516 } 10517 10518 struct bpf_map * 10519 bpf_object__prev_map(const struct bpf_object *obj, const struct bpf_map *next) 10520 { 10521 if (next == NULL && obj != NULL) { 10522 if (!obj->nr_maps) 10523 return NULL; 10524 return obj->maps + obj->nr_maps - 1; 10525 } 10526 10527 return __bpf_map__iter(next, obj, -1); 10528 } 10529 10530 struct bpf_map * 10531 bpf_object__find_map_by_name(const struct bpf_object *obj, const char *name) 10532 { 10533 struct bpf_map *pos; 10534 10535 bpf_object__for_each_map(pos, obj) { 10536 /* if it's a special internal map name (which always starts 10537 * with dot) then check if that special name matches the 10538 * real map name (ELF section name) 10539 */ 10540 if (name[0] == '.') { 10541 if (pos->real_name && strcmp(pos->real_name, name) == 0) 10542 return pos; 10543 continue; 10544 } 10545 /* otherwise map name has to be an exact match */ 10546 if (map_uses_real_name(pos)) { 10547 if (strcmp(pos->real_name, name) == 0) 10548 return pos; 10549 continue; 10550 } 10551 if (strcmp(pos->name, name) == 0) 10552 return pos; 10553 } 10554 return errno = ENOENT, NULL; 10555 } 10556 10557 int 10558 bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name) 10559 { 10560 return bpf_map__fd(bpf_object__find_map_by_name(obj, name)); 10561 } 10562 10563 static int validate_map_op(const struct bpf_map *map, size_t key_sz, 10564 size_t value_sz, bool check_value_sz) 10565 { 10566 if (!map_is_created(map)) /* map is not yet created */ 10567 return -ENOENT; 10568 10569 if (map->def.key_size != key_sz) { 10570 pr_warn("map '%s': unexpected key size %zu provided, expected %u\n", 10571 map->name, key_sz, map->def.key_size); 10572 return -EINVAL; 10573 } 10574 10575 if (map->fd < 0) { 10576 pr_warn("map '%s': can't use BPF map without FD (was it created?)\n", map->name); 10577 return -EINVAL; 10578 } 10579 10580 if (!check_value_sz) 10581 return 0; 10582 10583 switch (map->def.type) { 10584 case BPF_MAP_TYPE_PERCPU_ARRAY: 10585 case BPF_MAP_TYPE_PERCPU_HASH: 10586 case BPF_MAP_TYPE_LRU_PERCPU_HASH: 10587 case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE: { 10588 int num_cpu = libbpf_num_possible_cpus(); 10589 size_t elem_sz = roundup(map->def.value_size, 8); 10590 10591 if (value_sz != num_cpu * elem_sz) { 10592 pr_warn("map '%s': unexpected value size %zu provided for per-CPU map, expected %d * %zu = %zd\n", 10593 map->name, value_sz, num_cpu, elem_sz, num_cpu * elem_sz); 10594 return -EINVAL; 10595 } 10596 break; 10597 } 10598 default: 10599 if (map->def.value_size != value_sz) { 10600 pr_warn("map '%s': unexpected value size %zu provided, expected %u\n", 10601 map->name, value_sz, map->def.value_size); 10602 return -EINVAL; 10603 } 10604 break; 10605 } 10606 return 0; 10607 } 10608 10609 int bpf_map__lookup_elem(const struct bpf_map *map, 10610 const void *key, size_t key_sz, 10611 void *value, size_t value_sz, __u64 flags) 10612 { 10613 int err; 10614 10615 err = validate_map_op(map, key_sz, value_sz, true); 10616 if (err) 10617 return libbpf_err(err); 10618 10619 return bpf_map_lookup_elem_flags(map->fd, key, value, flags); 10620 } 10621 10622 int bpf_map__update_elem(const struct bpf_map *map, 10623 const void *key, size_t key_sz, 10624 const void *value, size_t value_sz, __u64 flags) 10625 { 10626 int err; 10627 10628 err = validate_map_op(map, key_sz, value_sz, true); 10629 if (err) 10630 return libbpf_err(err); 10631 10632 return bpf_map_update_elem(map->fd, key, value, flags); 10633 } 10634 10635 int bpf_map__delete_elem(const struct bpf_map *map, 10636 const void *key, size_t key_sz, __u64 flags) 10637 { 10638 int err; 10639 10640 err = validate_map_op(map, key_sz, 0, false /* check_value_sz */); 10641 if (err) 10642 return libbpf_err(err); 10643 10644 return bpf_map_delete_elem_flags(map->fd, key, flags); 10645 } 10646 10647 int bpf_map__lookup_and_delete_elem(const struct bpf_map *map, 10648 const void *key, size_t key_sz, 10649 void *value, size_t value_sz, __u64 flags) 10650 { 10651 int err; 10652 10653 err = validate_map_op(map, key_sz, value_sz, true); 10654 if (err) 10655 return libbpf_err(err); 10656 10657 return bpf_map_lookup_and_delete_elem_flags(map->fd, key, value, flags); 10658 } 10659 10660 int bpf_map__get_next_key(const struct bpf_map *map, 10661 const void *cur_key, void *next_key, size_t key_sz) 10662 { 10663 int err; 10664 10665 err = validate_map_op(map, key_sz, 0, false /* check_value_sz */); 10666 if (err) 10667 return libbpf_err(err); 10668 10669 return bpf_map_get_next_key(map->fd, cur_key, next_key); 10670 } 10671 10672 long libbpf_get_error(const void *ptr) 10673 { 10674 if (!IS_ERR_OR_NULL(ptr)) 10675 return 0; 10676 10677 if (IS_ERR(ptr)) 10678 errno = -PTR_ERR(ptr); 10679 10680 /* If ptr == NULL, then errno should be already set by the failing 10681 * API, because libbpf never returns NULL on success and it now always 10682 * sets errno on error. So no extra errno handling for ptr == NULL 10683 * case. 10684 */ 10685 return -errno; 10686 } 10687 10688 /* Replace link's underlying BPF program with the new one */ 10689 int bpf_link__update_program(struct bpf_link *link, struct bpf_program *prog) 10690 { 10691 int ret; 10692 int prog_fd = bpf_program__fd(prog); 10693 10694 if (prog_fd < 0) { 10695 pr_warn("prog '%s': can't use BPF program without FD (was it loaded?)\n", 10696 prog->name); 10697 return libbpf_err(-EINVAL); 10698 } 10699 10700 ret = bpf_link_update(bpf_link__fd(link), prog_fd, NULL); 10701 return libbpf_err_errno(ret); 10702 } 10703 10704 /* Release "ownership" of underlying BPF resource (typically, BPF program 10705 * attached to some BPF hook, e.g., tracepoint, kprobe, etc). Disconnected 10706 * link, when destructed through bpf_link__destroy() call won't attempt to 10707 * detach/unregisted that BPF resource. This is useful in situations where, 10708 * say, attached BPF program has to outlive userspace program that attached it 10709 * in the system. Depending on type of BPF program, though, there might be 10710 * additional steps (like pinning BPF program in BPF FS) necessary to ensure 10711 * exit of userspace program doesn't trigger automatic detachment and clean up 10712 * inside the kernel. 10713 */ 10714 void bpf_link__disconnect(struct bpf_link *link) 10715 { 10716 link->disconnected = true; 10717 } 10718 10719 int bpf_link__destroy(struct bpf_link *link) 10720 { 10721 int err = 0; 10722 10723 if (IS_ERR_OR_NULL(link)) 10724 return 0; 10725 10726 if (!link->disconnected && link->detach) 10727 err = link->detach(link); 10728 if (link->pin_path) 10729 free(link->pin_path); 10730 if (link->dealloc) 10731 link->dealloc(link); 10732 else 10733 free(link); 10734 10735 return libbpf_err(err); 10736 } 10737 10738 int bpf_link__fd(const struct bpf_link *link) 10739 { 10740 return link->fd; 10741 } 10742 10743 const char *bpf_link__pin_path(const struct bpf_link *link) 10744 { 10745 return link->pin_path; 10746 } 10747 10748 static int bpf_link__detach_fd(struct bpf_link *link) 10749 { 10750 return libbpf_err_errno(close(link->fd)); 10751 } 10752 10753 struct bpf_link *bpf_link__open(const char *path) 10754 { 10755 struct bpf_link *link; 10756 int fd; 10757 10758 fd = bpf_obj_get(path); 10759 if (fd < 0) { 10760 fd = -errno; 10761 pr_warn("failed to open link at %s: %d\n", path, fd); 10762 return libbpf_err_ptr(fd); 10763 } 10764 10765 link = calloc(1, sizeof(*link)); 10766 if (!link) { 10767 close(fd); 10768 return libbpf_err_ptr(-ENOMEM); 10769 } 10770 link->detach = &bpf_link__detach_fd; 10771 link->fd = fd; 10772 10773 link->pin_path = strdup(path); 10774 if (!link->pin_path) { 10775 bpf_link__destroy(link); 10776 return libbpf_err_ptr(-ENOMEM); 10777 } 10778 10779 return link; 10780 } 10781 10782 int bpf_link__detach(struct bpf_link *link) 10783 { 10784 return bpf_link_detach(link->fd) ? -errno : 0; 10785 } 10786 10787 int bpf_link__pin(struct bpf_link *link, const char *path) 10788 { 10789 int err; 10790 10791 if (link->pin_path) 10792 return libbpf_err(-EBUSY); 10793 err = make_parent_dir(path); 10794 if (err) 10795 return libbpf_err(err); 10796 err = check_path(path); 10797 if (err) 10798 return libbpf_err(err); 10799 10800 link->pin_path = strdup(path); 10801 if (!link->pin_path) 10802 return libbpf_err(-ENOMEM); 10803 10804 if (bpf_obj_pin(link->fd, link->pin_path)) { 10805 err = -errno; 10806 zfree(&link->pin_path); 10807 return libbpf_err(err); 10808 } 10809 10810 pr_debug("link fd=%d: pinned at %s\n", link->fd, link->pin_path); 10811 return 0; 10812 } 10813 10814 int bpf_link__unpin(struct bpf_link *link) 10815 { 10816 int err; 10817 10818 if (!link->pin_path) 10819 return libbpf_err(-EINVAL); 10820 10821 err = unlink(link->pin_path); 10822 if (err != 0) 10823 return -errno; 10824 10825 pr_debug("link fd=%d: unpinned from %s\n", link->fd, link->pin_path); 10826 zfree(&link->pin_path); 10827 return 0; 10828 } 10829 10830 struct bpf_link_perf { 10831 struct bpf_link link; 10832 int perf_event_fd; 10833 /* legacy kprobe support: keep track of probe identifier and type */ 10834 char *legacy_probe_name; 10835 bool legacy_is_kprobe; 10836 bool legacy_is_retprobe; 10837 }; 10838 10839 static int remove_kprobe_event_legacy(const char *probe_name, bool retprobe); 10840 static int remove_uprobe_event_legacy(const char *probe_name, bool retprobe); 10841 10842 static int bpf_link_perf_detach(struct bpf_link *link) 10843 { 10844 struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link); 10845 int err = 0; 10846 10847 if (ioctl(perf_link->perf_event_fd, PERF_EVENT_IOC_DISABLE, 0) < 0) 10848 err = -errno; 10849 10850 if (perf_link->perf_event_fd != link->fd) 10851 close(perf_link->perf_event_fd); 10852 close(link->fd); 10853 10854 /* legacy uprobe/kprobe needs to be removed after perf event fd closure */ 10855 if (perf_link->legacy_probe_name) { 10856 if (perf_link->legacy_is_kprobe) { 10857 err = remove_kprobe_event_legacy(perf_link->legacy_probe_name, 10858 perf_link->legacy_is_retprobe); 10859 } else { 10860 err = remove_uprobe_event_legacy(perf_link->legacy_probe_name, 10861 perf_link->legacy_is_retprobe); 10862 } 10863 } 10864 10865 return err; 10866 } 10867 10868 static void bpf_link_perf_dealloc(struct bpf_link *link) 10869 { 10870 struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link); 10871 10872 free(perf_link->legacy_probe_name); 10873 free(perf_link); 10874 } 10875 10876 struct bpf_link *bpf_program__attach_perf_event_opts(const struct bpf_program *prog, int pfd, 10877 const struct bpf_perf_event_opts *opts) 10878 { 10879 struct bpf_link_perf *link; 10880 int prog_fd, link_fd = -1, err; 10881 bool force_ioctl_attach; 10882 10883 if (!OPTS_VALID(opts, bpf_perf_event_opts)) 10884 return libbpf_err_ptr(-EINVAL); 10885 10886 if (pfd < 0) { 10887 pr_warn("prog '%s': invalid perf event FD %d\n", 10888 prog->name, pfd); 10889 return libbpf_err_ptr(-EINVAL); 10890 } 10891 prog_fd = bpf_program__fd(prog); 10892 if (prog_fd < 0) { 10893 pr_warn("prog '%s': can't attach BPF program without FD (was it loaded?)\n", 10894 prog->name); 10895 return libbpf_err_ptr(-EINVAL); 10896 } 10897 10898 link = calloc(1, sizeof(*link)); 10899 if (!link) 10900 return libbpf_err_ptr(-ENOMEM); 10901 link->link.detach = &bpf_link_perf_detach; 10902 link->link.dealloc = &bpf_link_perf_dealloc; 10903 link->perf_event_fd = pfd; 10904 10905 force_ioctl_attach = OPTS_GET(opts, force_ioctl_attach, false); 10906 if (kernel_supports(prog->obj, FEAT_PERF_LINK) && !force_ioctl_attach) { 10907 DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_opts, 10908 .perf_event.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0)); 10909 10910 link_fd = bpf_link_create(prog_fd, pfd, BPF_PERF_EVENT, &link_opts); 10911 if (link_fd < 0) { 10912 err = -errno; 10913 pr_warn("prog '%s': failed to create BPF link for perf_event FD %d: %s\n", 10914 prog->name, pfd, errstr(err)); 10915 goto err_out; 10916 } 10917 link->link.fd = link_fd; 10918 } else { 10919 if (OPTS_GET(opts, bpf_cookie, 0)) { 10920 pr_warn("prog '%s': user context value is not supported\n", prog->name); 10921 err = -EOPNOTSUPP; 10922 goto err_out; 10923 } 10924 10925 if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) { 10926 err = -errno; 10927 pr_warn("prog '%s': failed to attach to perf_event FD %d: %s\n", 10928 prog->name, pfd, errstr(err)); 10929 if (err == -EPROTO) 10930 pr_warn("prog '%s': try add PERF_SAMPLE_CALLCHAIN to or remove exclude_callchain_[kernel|user] from pfd %d\n", 10931 prog->name, pfd); 10932 goto err_out; 10933 } 10934 link->link.fd = pfd; 10935 } 10936 if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) { 10937 err = -errno; 10938 pr_warn("prog '%s': failed to enable perf_event FD %d: %s\n", 10939 prog->name, pfd, errstr(err)); 10940 goto err_out; 10941 } 10942 10943 return &link->link; 10944 err_out: 10945 if (link_fd >= 0) 10946 close(link_fd); 10947 free(link); 10948 return libbpf_err_ptr(err); 10949 } 10950 10951 struct bpf_link *bpf_program__attach_perf_event(const struct bpf_program *prog, int pfd) 10952 { 10953 return bpf_program__attach_perf_event_opts(prog, pfd, NULL); 10954 } 10955 10956 /* 10957 * this function is expected to parse integer in the range of [0, 2^31-1] from 10958 * given file using scanf format string fmt. If actual parsed value is 10959 * negative, the result might be indistinguishable from error 10960 */ 10961 static int parse_uint_from_file(const char *file, const char *fmt) 10962 { 10963 int err, ret; 10964 FILE *f; 10965 10966 f = fopen(file, "re"); 10967 if (!f) { 10968 err = -errno; 10969 pr_debug("failed to open '%s': %s\n", file, errstr(err)); 10970 return err; 10971 } 10972 err = fscanf(f, fmt, &ret); 10973 if (err != 1) { 10974 err = err == EOF ? -EIO : -errno; 10975 pr_debug("failed to parse '%s': %s\n", file, errstr(err)); 10976 fclose(f); 10977 return err; 10978 } 10979 fclose(f); 10980 return ret; 10981 } 10982 10983 static int determine_kprobe_perf_type(void) 10984 { 10985 const char *file = "/sys/bus/event_source/devices/kprobe/type"; 10986 10987 return parse_uint_from_file(file, "%d\n"); 10988 } 10989 10990 static int determine_uprobe_perf_type(void) 10991 { 10992 const char *file = "/sys/bus/event_source/devices/uprobe/type"; 10993 10994 return parse_uint_from_file(file, "%d\n"); 10995 } 10996 10997 static int determine_kprobe_retprobe_bit(void) 10998 { 10999 const char *file = "/sys/bus/event_source/devices/kprobe/format/retprobe"; 11000 11001 return parse_uint_from_file(file, "config:%d\n"); 11002 } 11003 11004 static int determine_uprobe_retprobe_bit(void) 11005 { 11006 const char *file = "/sys/bus/event_source/devices/uprobe/format/retprobe"; 11007 11008 return parse_uint_from_file(file, "config:%d\n"); 11009 } 11010 11011 #define PERF_UPROBE_REF_CTR_OFFSET_BITS 32 11012 #define PERF_UPROBE_REF_CTR_OFFSET_SHIFT 32 11013 11014 static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name, 11015 uint64_t offset, int pid, size_t ref_ctr_off) 11016 { 11017 const size_t attr_sz = sizeof(struct perf_event_attr); 11018 struct perf_event_attr attr; 11019 int type, pfd; 11020 11021 if ((__u64)ref_ctr_off >= (1ULL << PERF_UPROBE_REF_CTR_OFFSET_BITS)) 11022 return -EINVAL; 11023 11024 memset(&attr, 0, attr_sz); 11025 11026 type = uprobe ? determine_uprobe_perf_type() 11027 : determine_kprobe_perf_type(); 11028 if (type < 0) { 11029 pr_warn("failed to determine %s perf type: %s\n", 11030 uprobe ? "uprobe" : "kprobe", 11031 errstr(type)); 11032 return type; 11033 } 11034 if (retprobe) { 11035 int bit = uprobe ? determine_uprobe_retprobe_bit() 11036 : determine_kprobe_retprobe_bit(); 11037 11038 if (bit < 0) { 11039 pr_warn("failed to determine %s retprobe bit: %s\n", 11040 uprobe ? "uprobe" : "kprobe", 11041 errstr(bit)); 11042 return bit; 11043 } 11044 attr.config |= 1 << bit; 11045 } 11046 attr.size = attr_sz; 11047 attr.type = type; 11048 attr.config |= (__u64)ref_ctr_off << PERF_UPROBE_REF_CTR_OFFSET_SHIFT; 11049 attr.config1 = ptr_to_u64(name); /* kprobe_func or uprobe_path */ 11050 attr.config2 = offset; /* kprobe_addr or probe_offset */ 11051 11052 /* pid filter is meaningful only for uprobes */ 11053 pfd = syscall(__NR_perf_event_open, &attr, 11054 pid < 0 ? -1 : pid /* pid */, 11055 pid == -1 ? 0 : -1 /* cpu */, 11056 -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC); 11057 return pfd >= 0 ? pfd : -errno; 11058 } 11059 11060 static int append_to_file(const char *file, const char *fmt, ...) 11061 { 11062 int fd, n, err = 0; 11063 va_list ap; 11064 char buf[1024]; 11065 11066 va_start(ap, fmt); 11067 n = vsnprintf(buf, sizeof(buf), fmt, ap); 11068 va_end(ap); 11069 11070 if (n < 0 || n >= sizeof(buf)) 11071 return -EINVAL; 11072 11073 fd = open(file, O_WRONLY | O_APPEND | O_CLOEXEC, 0); 11074 if (fd < 0) 11075 return -errno; 11076 11077 if (write(fd, buf, n) < 0) 11078 err = -errno; 11079 11080 close(fd); 11081 return err; 11082 } 11083 11084 #define DEBUGFS "/sys/kernel/debug/tracing" 11085 #define TRACEFS "/sys/kernel/tracing" 11086 11087 static bool use_debugfs(void) 11088 { 11089 static int has_debugfs = -1; 11090 11091 if (has_debugfs < 0) 11092 has_debugfs = faccessat(AT_FDCWD, DEBUGFS, F_OK, AT_EACCESS) == 0; 11093 11094 return has_debugfs == 1; 11095 } 11096 11097 static const char *tracefs_path(void) 11098 { 11099 return use_debugfs() ? DEBUGFS : TRACEFS; 11100 } 11101 11102 static const char *tracefs_kprobe_events(void) 11103 { 11104 return use_debugfs() ? DEBUGFS"/kprobe_events" : TRACEFS"/kprobe_events"; 11105 } 11106 11107 static const char *tracefs_uprobe_events(void) 11108 { 11109 return use_debugfs() ? DEBUGFS"/uprobe_events" : TRACEFS"/uprobe_events"; 11110 } 11111 11112 static const char *tracefs_available_filter_functions(void) 11113 { 11114 return use_debugfs() ? DEBUGFS"/available_filter_functions" 11115 : TRACEFS"/available_filter_functions"; 11116 } 11117 11118 static const char *tracefs_available_filter_functions_addrs(void) 11119 { 11120 return use_debugfs() ? DEBUGFS"/available_filter_functions_addrs" 11121 : TRACEFS"/available_filter_functions_addrs"; 11122 } 11123 11124 static void gen_kprobe_legacy_event_name(char *buf, size_t buf_sz, 11125 const char *kfunc_name, size_t offset) 11126 { 11127 static int index = 0; 11128 int i; 11129 11130 snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx_%d", getpid(), kfunc_name, offset, 11131 __sync_fetch_and_add(&index, 1)); 11132 11133 /* sanitize binary_path in the probe name */ 11134 for (i = 0; buf[i]; i++) { 11135 if (!isalnum(buf[i])) 11136 buf[i] = '_'; 11137 } 11138 } 11139 11140 static int add_kprobe_event_legacy(const char *probe_name, bool retprobe, 11141 const char *kfunc_name, size_t offset) 11142 { 11143 return append_to_file(tracefs_kprobe_events(), "%c:%s/%s %s+0x%zx", 11144 retprobe ? 'r' : 'p', 11145 retprobe ? "kretprobes" : "kprobes", 11146 probe_name, kfunc_name, offset); 11147 } 11148 11149 static int remove_kprobe_event_legacy(const char *probe_name, bool retprobe) 11150 { 11151 return append_to_file(tracefs_kprobe_events(), "-:%s/%s", 11152 retprobe ? "kretprobes" : "kprobes", probe_name); 11153 } 11154 11155 static int determine_kprobe_perf_type_legacy(const char *probe_name, bool retprobe) 11156 { 11157 char file[256]; 11158 11159 snprintf(file, sizeof(file), "%s/events/%s/%s/id", 11160 tracefs_path(), retprobe ? "kretprobes" : "kprobes", probe_name); 11161 11162 return parse_uint_from_file(file, "%d\n"); 11163 } 11164 11165 static int perf_event_kprobe_open_legacy(const char *probe_name, bool retprobe, 11166 const char *kfunc_name, size_t offset, int pid) 11167 { 11168 const size_t attr_sz = sizeof(struct perf_event_attr); 11169 struct perf_event_attr attr; 11170 int type, pfd, err; 11171 11172 err = add_kprobe_event_legacy(probe_name, retprobe, kfunc_name, offset); 11173 if (err < 0) { 11174 pr_warn("failed to add legacy kprobe event for '%s+0x%zx': %s\n", 11175 kfunc_name, offset, 11176 errstr(err)); 11177 return err; 11178 } 11179 type = determine_kprobe_perf_type_legacy(probe_name, retprobe); 11180 if (type < 0) { 11181 err = type; 11182 pr_warn("failed to determine legacy kprobe event id for '%s+0x%zx': %s\n", 11183 kfunc_name, offset, 11184 errstr(err)); 11185 goto err_clean_legacy; 11186 } 11187 11188 memset(&attr, 0, attr_sz); 11189 attr.size = attr_sz; 11190 attr.config = type; 11191 attr.type = PERF_TYPE_TRACEPOINT; 11192 11193 pfd = syscall(__NR_perf_event_open, &attr, 11194 pid < 0 ? -1 : pid, /* pid */ 11195 pid == -1 ? 0 : -1, /* cpu */ 11196 -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC); 11197 if (pfd < 0) { 11198 err = -errno; 11199 pr_warn("legacy kprobe perf_event_open() failed: %s\n", 11200 errstr(err)); 11201 goto err_clean_legacy; 11202 } 11203 return pfd; 11204 11205 err_clean_legacy: 11206 /* Clear the newly added legacy kprobe_event */ 11207 remove_kprobe_event_legacy(probe_name, retprobe); 11208 return err; 11209 } 11210 11211 static const char *arch_specific_syscall_pfx(void) 11212 { 11213 #if defined(__x86_64__) 11214 return "x64"; 11215 #elif defined(__i386__) 11216 return "ia32"; 11217 #elif defined(__s390x__) 11218 return "s390x"; 11219 #elif defined(__s390__) 11220 return "s390"; 11221 #elif defined(__arm__) 11222 return "arm"; 11223 #elif defined(__aarch64__) 11224 return "arm64"; 11225 #elif defined(__mips__) 11226 return "mips"; 11227 #elif defined(__riscv) 11228 return "riscv"; 11229 #elif defined(__powerpc__) 11230 return "powerpc"; 11231 #elif defined(__powerpc64__) 11232 return "powerpc64"; 11233 #else 11234 return NULL; 11235 #endif 11236 } 11237 11238 int probe_kern_syscall_wrapper(int token_fd) 11239 { 11240 char syscall_name[64]; 11241 const char *ksys_pfx; 11242 11243 ksys_pfx = arch_specific_syscall_pfx(); 11244 if (!ksys_pfx) 11245 return 0; 11246 11247 snprintf(syscall_name, sizeof(syscall_name), "__%s_sys_bpf", ksys_pfx); 11248 11249 if (determine_kprobe_perf_type() >= 0) { 11250 int pfd; 11251 11252 pfd = perf_event_open_probe(false, false, syscall_name, 0, getpid(), 0); 11253 if (pfd >= 0) 11254 close(pfd); 11255 11256 return pfd >= 0 ? 1 : 0; 11257 } else { /* legacy mode */ 11258 char probe_name[128]; 11259 11260 gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name), syscall_name, 0); 11261 if (add_kprobe_event_legacy(probe_name, false, syscall_name, 0) < 0) 11262 return 0; 11263 11264 (void)remove_kprobe_event_legacy(probe_name, false); 11265 return 1; 11266 } 11267 } 11268 11269 struct bpf_link * 11270 bpf_program__attach_kprobe_opts(const struct bpf_program *prog, 11271 const char *func_name, 11272 const struct bpf_kprobe_opts *opts) 11273 { 11274 DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts); 11275 enum probe_attach_mode attach_mode; 11276 char *legacy_probe = NULL; 11277 struct bpf_link *link; 11278 size_t offset; 11279 bool retprobe, legacy; 11280 int pfd, err; 11281 11282 if (!OPTS_VALID(opts, bpf_kprobe_opts)) 11283 return libbpf_err_ptr(-EINVAL); 11284 11285 attach_mode = OPTS_GET(opts, attach_mode, PROBE_ATTACH_MODE_DEFAULT); 11286 retprobe = OPTS_GET(opts, retprobe, false); 11287 offset = OPTS_GET(opts, offset, 0); 11288 pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0); 11289 11290 legacy = determine_kprobe_perf_type() < 0; 11291 switch (attach_mode) { 11292 case PROBE_ATTACH_MODE_LEGACY: 11293 legacy = true; 11294 pe_opts.force_ioctl_attach = true; 11295 break; 11296 case PROBE_ATTACH_MODE_PERF: 11297 if (legacy) 11298 return libbpf_err_ptr(-ENOTSUP); 11299 pe_opts.force_ioctl_attach = true; 11300 break; 11301 case PROBE_ATTACH_MODE_LINK: 11302 if (legacy || !kernel_supports(prog->obj, FEAT_PERF_LINK)) 11303 return libbpf_err_ptr(-ENOTSUP); 11304 break; 11305 case PROBE_ATTACH_MODE_DEFAULT: 11306 break; 11307 default: 11308 return libbpf_err_ptr(-EINVAL); 11309 } 11310 11311 if (!legacy) { 11312 pfd = perf_event_open_probe(false /* uprobe */, retprobe, 11313 func_name, offset, 11314 -1 /* pid */, 0 /* ref_ctr_off */); 11315 } else { 11316 char probe_name[256]; 11317 11318 gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name), 11319 func_name, offset); 11320 11321 legacy_probe = strdup(probe_name); 11322 if (!legacy_probe) 11323 return libbpf_err_ptr(-ENOMEM); 11324 11325 pfd = perf_event_kprobe_open_legacy(legacy_probe, retprobe, func_name, 11326 offset, -1 /* pid */); 11327 } 11328 if (pfd < 0) { 11329 err = -errno; 11330 pr_warn("prog '%s': failed to create %s '%s+0x%zx' perf event: %s\n", 11331 prog->name, retprobe ? "kretprobe" : "kprobe", 11332 func_name, offset, 11333 errstr(err)); 11334 goto err_out; 11335 } 11336 link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts); 11337 err = libbpf_get_error(link); 11338 if (err) { 11339 close(pfd); 11340 pr_warn("prog '%s': failed to attach to %s '%s+0x%zx': %s\n", 11341 prog->name, retprobe ? "kretprobe" : "kprobe", 11342 func_name, offset, 11343 errstr(err)); 11344 goto err_clean_legacy; 11345 } 11346 if (legacy) { 11347 struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link); 11348 11349 perf_link->legacy_probe_name = legacy_probe; 11350 perf_link->legacy_is_kprobe = true; 11351 perf_link->legacy_is_retprobe = retprobe; 11352 } 11353 11354 return link; 11355 11356 err_clean_legacy: 11357 if (legacy) 11358 remove_kprobe_event_legacy(legacy_probe, retprobe); 11359 err_out: 11360 free(legacy_probe); 11361 return libbpf_err_ptr(err); 11362 } 11363 11364 struct bpf_link *bpf_program__attach_kprobe(const struct bpf_program *prog, 11365 bool retprobe, 11366 const char *func_name) 11367 { 11368 DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts, 11369 .retprobe = retprobe, 11370 ); 11371 11372 return bpf_program__attach_kprobe_opts(prog, func_name, &opts); 11373 } 11374 11375 struct bpf_link *bpf_program__attach_ksyscall(const struct bpf_program *prog, 11376 const char *syscall_name, 11377 const struct bpf_ksyscall_opts *opts) 11378 { 11379 LIBBPF_OPTS(bpf_kprobe_opts, kprobe_opts); 11380 char func_name[128]; 11381 11382 if (!OPTS_VALID(opts, bpf_ksyscall_opts)) 11383 return libbpf_err_ptr(-EINVAL); 11384 11385 if (kernel_supports(prog->obj, FEAT_SYSCALL_WRAPPER)) { 11386 /* arch_specific_syscall_pfx() should never return NULL here 11387 * because it is guarded by kernel_supports(). However, since 11388 * compiler does not know that we have an explicit conditional 11389 * as well. 11390 */ 11391 snprintf(func_name, sizeof(func_name), "__%s_sys_%s", 11392 arch_specific_syscall_pfx() ? : "", syscall_name); 11393 } else { 11394 snprintf(func_name, sizeof(func_name), "__se_sys_%s", syscall_name); 11395 } 11396 11397 kprobe_opts.retprobe = OPTS_GET(opts, retprobe, false); 11398 kprobe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0); 11399 11400 return bpf_program__attach_kprobe_opts(prog, func_name, &kprobe_opts); 11401 } 11402 11403 /* Adapted from perf/util/string.c */ 11404 bool glob_match(const char *str, const char *pat) 11405 { 11406 while (*str && *pat && *pat != '*') { 11407 if (*pat == '?') { /* Matches any single character */ 11408 str++; 11409 pat++; 11410 continue; 11411 } 11412 if (*str != *pat) 11413 return false; 11414 str++; 11415 pat++; 11416 } 11417 /* Check wild card */ 11418 if (*pat == '*') { 11419 while (*pat == '*') 11420 pat++; 11421 if (!*pat) /* Tail wild card matches all */ 11422 return true; 11423 while (*str) 11424 if (glob_match(str++, pat)) 11425 return true; 11426 } 11427 return !*str && !*pat; 11428 } 11429 11430 struct kprobe_multi_resolve { 11431 const char *pattern; 11432 unsigned long *addrs; 11433 size_t cap; 11434 size_t cnt; 11435 }; 11436 11437 struct avail_kallsyms_data { 11438 char **syms; 11439 size_t cnt; 11440 struct kprobe_multi_resolve *res; 11441 }; 11442 11443 static int avail_func_cmp(const void *a, const void *b) 11444 { 11445 return strcmp(*(const char **)a, *(const char **)b); 11446 } 11447 11448 static int avail_kallsyms_cb(unsigned long long sym_addr, char sym_type, 11449 const char *sym_name, void *ctx) 11450 { 11451 struct avail_kallsyms_data *data = ctx; 11452 struct kprobe_multi_resolve *res = data->res; 11453 int err; 11454 11455 if (!glob_match(sym_name, res->pattern)) 11456 return 0; 11457 11458 if (!bsearch(&sym_name, data->syms, data->cnt, sizeof(*data->syms), avail_func_cmp)) { 11459 /* Some versions of kernel strip out .llvm.<hash> suffix from 11460 * function names reported in available_filter_functions, but 11461 * don't do so for kallsyms. While this is clearly a kernel 11462 * bug (fixed by [0]) we try to accommodate that in libbpf to 11463 * make multi-kprobe usability a bit better: if no match is 11464 * found, we will strip .llvm. suffix and try one more time. 11465 * 11466 * [0] fb6a421fb615 ("kallsyms: Match symbols exactly with CONFIG_LTO_CLANG") 11467 */ 11468 char sym_trim[256], *psym_trim = sym_trim, *sym_sfx; 11469 11470 if (!(sym_sfx = strstr(sym_name, ".llvm."))) 11471 return 0; 11472 11473 /* psym_trim vs sym_trim dance is done to avoid pointer vs array 11474 * coercion differences and get proper `const char **` pointer 11475 * which avail_func_cmp() expects 11476 */ 11477 snprintf(sym_trim, sizeof(sym_trim), "%.*s", (int)(sym_sfx - sym_name), sym_name); 11478 if (!bsearch(&psym_trim, data->syms, data->cnt, sizeof(*data->syms), avail_func_cmp)) 11479 return 0; 11480 } 11481 11482 err = libbpf_ensure_mem((void **)&res->addrs, &res->cap, sizeof(*res->addrs), res->cnt + 1); 11483 if (err) 11484 return err; 11485 11486 res->addrs[res->cnt++] = (unsigned long)sym_addr; 11487 return 0; 11488 } 11489 11490 static int libbpf_available_kallsyms_parse(struct kprobe_multi_resolve *res) 11491 { 11492 const char *available_functions_file = tracefs_available_filter_functions(); 11493 struct avail_kallsyms_data data; 11494 char sym_name[500]; 11495 FILE *f; 11496 int err = 0, ret, i; 11497 char **syms = NULL; 11498 size_t cap = 0, cnt = 0; 11499 11500 f = fopen(available_functions_file, "re"); 11501 if (!f) { 11502 err = -errno; 11503 pr_warn("failed to open %s: %s\n", available_functions_file, errstr(err)); 11504 return err; 11505 } 11506 11507 while (true) { 11508 char *name; 11509 11510 ret = fscanf(f, "%499s%*[^\n]\n", sym_name); 11511 if (ret == EOF && feof(f)) 11512 break; 11513 11514 if (ret != 1) { 11515 pr_warn("failed to parse available_filter_functions entry: %d\n", ret); 11516 err = -EINVAL; 11517 goto cleanup; 11518 } 11519 11520 if (!glob_match(sym_name, res->pattern)) 11521 continue; 11522 11523 err = libbpf_ensure_mem((void **)&syms, &cap, sizeof(*syms), cnt + 1); 11524 if (err) 11525 goto cleanup; 11526 11527 name = strdup(sym_name); 11528 if (!name) { 11529 err = -errno; 11530 goto cleanup; 11531 } 11532 11533 syms[cnt++] = name; 11534 } 11535 11536 /* no entries found, bail out */ 11537 if (cnt == 0) { 11538 err = -ENOENT; 11539 goto cleanup; 11540 } 11541 11542 /* sort available functions */ 11543 qsort(syms, cnt, sizeof(*syms), avail_func_cmp); 11544 11545 data.syms = syms; 11546 data.res = res; 11547 data.cnt = cnt; 11548 libbpf_kallsyms_parse(avail_kallsyms_cb, &data); 11549 11550 if (res->cnt == 0) 11551 err = -ENOENT; 11552 11553 cleanup: 11554 for (i = 0; i < cnt; i++) 11555 free((char *)syms[i]); 11556 free(syms); 11557 11558 fclose(f); 11559 return err; 11560 } 11561 11562 static bool has_available_filter_functions_addrs(void) 11563 { 11564 return access(tracefs_available_filter_functions_addrs(), R_OK) != -1; 11565 } 11566 11567 static int libbpf_available_kprobes_parse(struct kprobe_multi_resolve *res) 11568 { 11569 const char *available_path = tracefs_available_filter_functions_addrs(); 11570 char sym_name[500]; 11571 FILE *f; 11572 int ret, err = 0; 11573 unsigned long long sym_addr; 11574 11575 f = fopen(available_path, "re"); 11576 if (!f) { 11577 err = -errno; 11578 pr_warn("failed to open %s: %s\n", available_path, errstr(err)); 11579 return err; 11580 } 11581 11582 while (true) { 11583 ret = fscanf(f, "%llx %499s%*[^\n]\n", &sym_addr, sym_name); 11584 if (ret == EOF && feof(f)) 11585 break; 11586 11587 if (ret != 2) { 11588 pr_warn("failed to parse available_filter_functions_addrs entry: %d\n", 11589 ret); 11590 err = -EINVAL; 11591 goto cleanup; 11592 } 11593 11594 if (!glob_match(sym_name, res->pattern)) 11595 continue; 11596 11597 err = libbpf_ensure_mem((void **)&res->addrs, &res->cap, 11598 sizeof(*res->addrs), res->cnt + 1); 11599 if (err) 11600 goto cleanup; 11601 11602 res->addrs[res->cnt++] = (unsigned long)sym_addr; 11603 } 11604 11605 if (res->cnt == 0) 11606 err = -ENOENT; 11607 11608 cleanup: 11609 fclose(f); 11610 return err; 11611 } 11612 11613 struct bpf_link * 11614 bpf_program__attach_kprobe_multi_opts(const struct bpf_program *prog, 11615 const char *pattern, 11616 const struct bpf_kprobe_multi_opts *opts) 11617 { 11618 LIBBPF_OPTS(bpf_link_create_opts, lopts); 11619 struct kprobe_multi_resolve res = { 11620 .pattern = pattern, 11621 }; 11622 enum bpf_attach_type attach_type; 11623 struct bpf_link *link = NULL; 11624 const unsigned long *addrs; 11625 int err, link_fd, prog_fd; 11626 bool retprobe, session, unique_match; 11627 const __u64 *cookies; 11628 const char **syms; 11629 size_t cnt; 11630 11631 if (!OPTS_VALID(opts, bpf_kprobe_multi_opts)) 11632 return libbpf_err_ptr(-EINVAL); 11633 11634 prog_fd = bpf_program__fd(prog); 11635 if (prog_fd < 0) { 11636 pr_warn("prog '%s': can't attach BPF program without FD (was it loaded?)\n", 11637 prog->name); 11638 return libbpf_err_ptr(-EINVAL); 11639 } 11640 11641 syms = OPTS_GET(opts, syms, false); 11642 addrs = OPTS_GET(opts, addrs, false); 11643 cnt = OPTS_GET(opts, cnt, false); 11644 cookies = OPTS_GET(opts, cookies, false); 11645 unique_match = OPTS_GET(opts, unique_match, false); 11646 11647 if (!pattern && !addrs && !syms) 11648 return libbpf_err_ptr(-EINVAL); 11649 if (pattern && (addrs || syms || cookies || cnt)) 11650 return libbpf_err_ptr(-EINVAL); 11651 if (!pattern && !cnt) 11652 return libbpf_err_ptr(-EINVAL); 11653 if (!pattern && unique_match) 11654 return libbpf_err_ptr(-EINVAL); 11655 if (addrs && syms) 11656 return libbpf_err_ptr(-EINVAL); 11657 11658 if (pattern) { 11659 if (has_available_filter_functions_addrs()) 11660 err = libbpf_available_kprobes_parse(&res); 11661 else 11662 err = libbpf_available_kallsyms_parse(&res); 11663 if (err) 11664 goto error; 11665 11666 if (unique_match && res.cnt != 1) { 11667 pr_warn("prog '%s': failed to find a unique match for '%s' (%zu matches)\n", 11668 prog->name, pattern, res.cnt); 11669 err = -EINVAL; 11670 goto error; 11671 } 11672 11673 addrs = res.addrs; 11674 cnt = res.cnt; 11675 } 11676 11677 retprobe = OPTS_GET(opts, retprobe, false); 11678 session = OPTS_GET(opts, session, false); 11679 11680 if (retprobe && session) 11681 return libbpf_err_ptr(-EINVAL); 11682 11683 attach_type = session ? BPF_TRACE_KPROBE_SESSION : BPF_TRACE_KPROBE_MULTI; 11684 11685 lopts.kprobe_multi.syms = syms; 11686 lopts.kprobe_multi.addrs = addrs; 11687 lopts.kprobe_multi.cookies = cookies; 11688 lopts.kprobe_multi.cnt = cnt; 11689 lopts.kprobe_multi.flags = retprobe ? BPF_F_KPROBE_MULTI_RETURN : 0; 11690 11691 link = calloc(1, sizeof(*link)); 11692 if (!link) { 11693 err = -ENOMEM; 11694 goto error; 11695 } 11696 link->detach = &bpf_link__detach_fd; 11697 11698 link_fd = bpf_link_create(prog_fd, 0, attach_type, &lopts); 11699 if (link_fd < 0) { 11700 err = -errno; 11701 pr_warn("prog '%s': failed to attach: %s\n", 11702 prog->name, errstr(err)); 11703 goto error; 11704 } 11705 link->fd = link_fd; 11706 free(res.addrs); 11707 return link; 11708 11709 error: 11710 free(link); 11711 free(res.addrs); 11712 return libbpf_err_ptr(err); 11713 } 11714 11715 static int attach_kprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link) 11716 { 11717 DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts); 11718 unsigned long offset = 0; 11719 const char *func_name; 11720 char *func; 11721 int n; 11722 11723 *link = NULL; 11724 11725 /* no auto-attach for SEC("kprobe") and SEC("kretprobe") */ 11726 if (strcmp(prog->sec_name, "kprobe") == 0 || strcmp(prog->sec_name, "kretprobe") == 0) 11727 return 0; 11728 11729 opts.retprobe = str_has_pfx(prog->sec_name, "kretprobe/"); 11730 if (opts.retprobe) 11731 func_name = prog->sec_name + sizeof("kretprobe/") - 1; 11732 else 11733 func_name = prog->sec_name + sizeof("kprobe/") - 1; 11734 11735 n = sscanf(func_name, "%m[a-zA-Z0-9_.]+%li", &func, &offset); 11736 if (n < 1) { 11737 pr_warn("kprobe name is invalid: %s\n", func_name); 11738 return -EINVAL; 11739 } 11740 if (opts.retprobe && offset != 0) { 11741 free(func); 11742 pr_warn("kretprobes do not support offset specification\n"); 11743 return -EINVAL; 11744 } 11745 11746 opts.offset = offset; 11747 *link = bpf_program__attach_kprobe_opts(prog, func, &opts); 11748 free(func); 11749 return libbpf_get_error(*link); 11750 } 11751 11752 static int attach_ksyscall(const struct bpf_program *prog, long cookie, struct bpf_link **link) 11753 { 11754 LIBBPF_OPTS(bpf_ksyscall_opts, opts); 11755 const char *syscall_name; 11756 11757 *link = NULL; 11758 11759 /* no auto-attach for SEC("ksyscall") and SEC("kretsyscall") */ 11760 if (strcmp(prog->sec_name, "ksyscall") == 0 || strcmp(prog->sec_name, "kretsyscall") == 0) 11761 return 0; 11762 11763 opts.retprobe = str_has_pfx(prog->sec_name, "kretsyscall/"); 11764 if (opts.retprobe) 11765 syscall_name = prog->sec_name + sizeof("kretsyscall/") - 1; 11766 else 11767 syscall_name = prog->sec_name + sizeof("ksyscall/") - 1; 11768 11769 *link = bpf_program__attach_ksyscall(prog, syscall_name, &opts); 11770 return *link ? 0 : -errno; 11771 } 11772 11773 static int attach_kprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link) 11774 { 11775 LIBBPF_OPTS(bpf_kprobe_multi_opts, opts); 11776 const char *spec; 11777 char *pattern; 11778 int n; 11779 11780 *link = NULL; 11781 11782 /* no auto-attach for SEC("kprobe.multi") and SEC("kretprobe.multi") */ 11783 if (strcmp(prog->sec_name, "kprobe.multi") == 0 || 11784 strcmp(prog->sec_name, "kretprobe.multi") == 0) 11785 return 0; 11786 11787 opts.retprobe = str_has_pfx(prog->sec_name, "kretprobe.multi/"); 11788 if (opts.retprobe) 11789 spec = prog->sec_name + sizeof("kretprobe.multi/") - 1; 11790 else 11791 spec = prog->sec_name + sizeof("kprobe.multi/") - 1; 11792 11793 n = sscanf(spec, "%m[a-zA-Z0-9_.*?]", &pattern); 11794 if (n < 1) { 11795 pr_warn("kprobe multi pattern is invalid: %s\n", spec); 11796 return -EINVAL; 11797 } 11798 11799 *link = bpf_program__attach_kprobe_multi_opts(prog, pattern, &opts); 11800 free(pattern); 11801 return libbpf_get_error(*link); 11802 } 11803 11804 static int attach_kprobe_session(const struct bpf_program *prog, long cookie, 11805 struct bpf_link **link) 11806 { 11807 LIBBPF_OPTS(bpf_kprobe_multi_opts, opts, .session = true); 11808 const char *spec; 11809 char *pattern; 11810 int n; 11811 11812 *link = NULL; 11813 11814 /* no auto-attach for SEC("kprobe.session") */ 11815 if (strcmp(prog->sec_name, "kprobe.session") == 0) 11816 return 0; 11817 11818 spec = prog->sec_name + sizeof("kprobe.session/") - 1; 11819 n = sscanf(spec, "%m[a-zA-Z0-9_.*?]", &pattern); 11820 if (n < 1) { 11821 pr_warn("kprobe session pattern is invalid: %s\n", spec); 11822 return -EINVAL; 11823 } 11824 11825 *link = bpf_program__attach_kprobe_multi_opts(prog, pattern, &opts); 11826 free(pattern); 11827 return *link ? 0 : -errno; 11828 } 11829 11830 static int attach_uprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link) 11831 { 11832 char *probe_type = NULL, *binary_path = NULL, *func_name = NULL; 11833 LIBBPF_OPTS(bpf_uprobe_multi_opts, opts); 11834 int n, ret = -EINVAL; 11835 11836 *link = NULL; 11837 11838 n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%m[^\n]", 11839 &probe_type, &binary_path, &func_name); 11840 switch (n) { 11841 case 1: 11842 /* handle SEC("u[ret]probe") - format is valid, but auto-attach is impossible. */ 11843 ret = 0; 11844 break; 11845 case 3: 11846 opts.session = str_has_pfx(probe_type, "uprobe.session"); 11847 opts.retprobe = str_has_pfx(probe_type, "uretprobe.multi"); 11848 11849 *link = bpf_program__attach_uprobe_multi(prog, -1, binary_path, func_name, &opts); 11850 ret = libbpf_get_error(*link); 11851 break; 11852 default: 11853 pr_warn("prog '%s': invalid format of section definition '%s'\n", prog->name, 11854 prog->sec_name); 11855 break; 11856 } 11857 free(probe_type); 11858 free(binary_path); 11859 free(func_name); 11860 return ret; 11861 } 11862 11863 static void gen_uprobe_legacy_event_name(char *buf, size_t buf_sz, 11864 const char *binary_path, uint64_t offset) 11865 { 11866 int i; 11867 11868 snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx", getpid(), binary_path, (size_t)offset); 11869 11870 /* sanitize binary_path in the probe name */ 11871 for (i = 0; buf[i]; i++) { 11872 if (!isalnum(buf[i])) 11873 buf[i] = '_'; 11874 } 11875 } 11876 11877 static inline int add_uprobe_event_legacy(const char *probe_name, bool retprobe, 11878 const char *binary_path, size_t offset) 11879 { 11880 return append_to_file(tracefs_uprobe_events(), "%c:%s/%s %s:0x%zx", 11881 retprobe ? 'r' : 'p', 11882 retprobe ? "uretprobes" : "uprobes", 11883 probe_name, binary_path, offset); 11884 } 11885 11886 static inline int remove_uprobe_event_legacy(const char *probe_name, bool retprobe) 11887 { 11888 return append_to_file(tracefs_uprobe_events(), "-:%s/%s", 11889 retprobe ? "uretprobes" : "uprobes", probe_name); 11890 } 11891 11892 static int determine_uprobe_perf_type_legacy(const char *probe_name, bool retprobe) 11893 { 11894 char file[512]; 11895 11896 snprintf(file, sizeof(file), "%s/events/%s/%s/id", 11897 tracefs_path(), retprobe ? "uretprobes" : "uprobes", probe_name); 11898 11899 return parse_uint_from_file(file, "%d\n"); 11900 } 11901 11902 static int perf_event_uprobe_open_legacy(const char *probe_name, bool retprobe, 11903 const char *binary_path, size_t offset, int pid) 11904 { 11905 const size_t attr_sz = sizeof(struct perf_event_attr); 11906 struct perf_event_attr attr; 11907 int type, pfd, err; 11908 11909 err = add_uprobe_event_legacy(probe_name, retprobe, binary_path, offset); 11910 if (err < 0) { 11911 pr_warn("failed to add legacy uprobe event for %s:0x%zx: %s\n", 11912 binary_path, (size_t)offset, errstr(err)); 11913 return err; 11914 } 11915 type = determine_uprobe_perf_type_legacy(probe_name, retprobe); 11916 if (type < 0) { 11917 err = type; 11918 pr_warn("failed to determine legacy uprobe event id for %s:0x%zx: %s\n", 11919 binary_path, offset, errstr(err)); 11920 goto err_clean_legacy; 11921 } 11922 11923 memset(&attr, 0, attr_sz); 11924 attr.size = attr_sz; 11925 attr.config = type; 11926 attr.type = PERF_TYPE_TRACEPOINT; 11927 11928 pfd = syscall(__NR_perf_event_open, &attr, 11929 pid < 0 ? -1 : pid, /* pid */ 11930 pid == -1 ? 0 : -1, /* cpu */ 11931 -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC); 11932 if (pfd < 0) { 11933 err = -errno; 11934 pr_warn("legacy uprobe perf_event_open() failed: %s\n", errstr(err)); 11935 goto err_clean_legacy; 11936 } 11937 return pfd; 11938 11939 err_clean_legacy: 11940 /* Clear the newly added legacy uprobe_event */ 11941 remove_uprobe_event_legacy(probe_name, retprobe); 11942 return err; 11943 } 11944 11945 /* Find offset of function name in archive specified by path. Currently 11946 * supported are .zip files that do not compress their contents, as used on 11947 * Android in the form of APKs, for example. "file_name" is the name of the ELF 11948 * file inside the archive. "func_name" matches symbol name or name@@LIB for 11949 * library functions. 11950 * 11951 * An overview of the APK format specifically provided here: 11952 * https://en.wikipedia.org/w/index.php?title=Apk_(file_format)&oldid=1139099120#Package_contents 11953 */ 11954 static long elf_find_func_offset_from_archive(const char *archive_path, const char *file_name, 11955 const char *func_name) 11956 { 11957 struct zip_archive *archive; 11958 struct zip_entry entry; 11959 long ret; 11960 Elf *elf; 11961 11962 archive = zip_archive_open(archive_path); 11963 if (IS_ERR(archive)) { 11964 ret = PTR_ERR(archive); 11965 pr_warn("zip: failed to open %s: %ld\n", archive_path, ret); 11966 return ret; 11967 } 11968 11969 ret = zip_archive_find_entry(archive, file_name, &entry); 11970 if (ret) { 11971 pr_warn("zip: could not find archive member %s in %s: %ld\n", file_name, 11972 archive_path, ret); 11973 goto out; 11974 } 11975 pr_debug("zip: found entry for %s in %s at 0x%lx\n", file_name, archive_path, 11976 (unsigned long)entry.data_offset); 11977 11978 if (entry.compression) { 11979 pr_warn("zip: entry %s of %s is compressed and cannot be handled\n", file_name, 11980 archive_path); 11981 ret = -LIBBPF_ERRNO__FORMAT; 11982 goto out; 11983 } 11984 11985 elf = elf_memory((void *)entry.data, entry.data_length); 11986 if (!elf) { 11987 pr_warn("elf: could not read elf file %s from %s: %s\n", file_name, archive_path, 11988 elf_errmsg(-1)); 11989 ret = -LIBBPF_ERRNO__LIBELF; 11990 goto out; 11991 } 11992 11993 ret = elf_find_func_offset(elf, file_name, func_name); 11994 if (ret > 0) { 11995 pr_debug("elf: symbol address match for %s of %s in %s: 0x%x + 0x%lx = 0x%lx\n", 11996 func_name, file_name, archive_path, entry.data_offset, ret, 11997 ret + entry.data_offset); 11998 ret += entry.data_offset; 11999 } 12000 elf_end(elf); 12001 12002 out: 12003 zip_archive_close(archive); 12004 return ret; 12005 } 12006 12007 static const char *arch_specific_lib_paths(void) 12008 { 12009 /* 12010 * Based on https://packages.debian.org/sid/libc6. 12011 * 12012 * Assume that the traced program is built for the same architecture 12013 * as libbpf, which should cover the vast majority of cases. 12014 */ 12015 #if defined(__x86_64__) 12016 return "/lib/x86_64-linux-gnu"; 12017 #elif defined(__i386__) 12018 return "/lib/i386-linux-gnu"; 12019 #elif defined(__s390x__) 12020 return "/lib/s390x-linux-gnu"; 12021 #elif defined(__s390__) 12022 return "/lib/s390-linux-gnu"; 12023 #elif defined(__arm__) && defined(__SOFTFP__) 12024 return "/lib/arm-linux-gnueabi"; 12025 #elif defined(__arm__) && !defined(__SOFTFP__) 12026 return "/lib/arm-linux-gnueabihf"; 12027 #elif defined(__aarch64__) 12028 return "/lib/aarch64-linux-gnu"; 12029 #elif defined(__mips__) && defined(__MIPSEL__) && _MIPS_SZLONG == 64 12030 return "/lib/mips64el-linux-gnuabi64"; 12031 #elif defined(__mips__) && defined(__MIPSEL__) && _MIPS_SZLONG == 32 12032 return "/lib/mipsel-linux-gnu"; 12033 #elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 12034 return "/lib/powerpc64le-linux-gnu"; 12035 #elif defined(__sparc__) && defined(__arch64__) 12036 return "/lib/sparc64-linux-gnu"; 12037 #elif defined(__riscv) && __riscv_xlen == 64 12038 return "/lib/riscv64-linux-gnu"; 12039 #else 12040 return NULL; 12041 #endif 12042 } 12043 12044 /* Get full path to program/shared library. */ 12045 static int resolve_full_path(const char *file, char *result, size_t result_sz) 12046 { 12047 const char *search_paths[3] = {}; 12048 int i, perm; 12049 12050 if (str_has_sfx(file, ".so") || strstr(file, ".so.")) { 12051 search_paths[0] = getenv("LD_LIBRARY_PATH"); 12052 search_paths[1] = "/usr/lib64:/usr/lib"; 12053 search_paths[2] = arch_specific_lib_paths(); 12054 perm = R_OK; 12055 } else { 12056 search_paths[0] = getenv("PATH"); 12057 search_paths[1] = "/usr/bin:/usr/sbin"; 12058 perm = R_OK | X_OK; 12059 } 12060 12061 for (i = 0; i < ARRAY_SIZE(search_paths); i++) { 12062 const char *s; 12063 12064 if (!search_paths[i]) 12065 continue; 12066 for (s = search_paths[i]; s != NULL; s = strchr(s, ':')) { 12067 char *next_path; 12068 int seg_len; 12069 12070 if (s[0] == ':') 12071 s++; 12072 next_path = strchr(s, ':'); 12073 seg_len = next_path ? next_path - s : strlen(s); 12074 if (!seg_len) 12075 continue; 12076 snprintf(result, result_sz, "%.*s/%s", seg_len, s, file); 12077 /* ensure it has required permissions */ 12078 if (faccessat(AT_FDCWD, result, perm, AT_EACCESS) < 0) 12079 continue; 12080 pr_debug("resolved '%s' to '%s'\n", file, result); 12081 return 0; 12082 } 12083 } 12084 return -ENOENT; 12085 } 12086 12087 struct bpf_link * 12088 bpf_program__attach_uprobe_multi(const struct bpf_program *prog, 12089 pid_t pid, 12090 const char *path, 12091 const char *func_pattern, 12092 const struct bpf_uprobe_multi_opts *opts) 12093 { 12094 const unsigned long *ref_ctr_offsets = NULL, *offsets = NULL; 12095 LIBBPF_OPTS(bpf_link_create_opts, lopts); 12096 unsigned long *resolved_offsets = NULL; 12097 enum bpf_attach_type attach_type; 12098 int err = 0, link_fd, prog_fd; 12099 struct bpf_link *link = NULL; 12100 char full_path[PATH_MAX]; 12101 bool retprobe, session; 12102 const __u64 *cookies; 12103 const char **syms; 12104 size_t cnt; 12105 12106 if (!OPTS_VALID(opts, bpf_uprobe_multi_opts)) 12107 return libbpf_err_ptr(-EINVAL); 12108 12109 prog_fd = bpf_program__fd(prog); 12110 if (prog_fd < 0) { 12111 pr_warn("prog '%s': can't attach BPF program without FD (was it loaded?)\n", 12112 prog->name); 12113 return libbpf_err_ptr(-EINVAL); 12114 } 12115 12116 syms = OPTS_GET(opts, syms, NULL); 12117 offsets = OPTS_GET(opts, offsets, NULL); 12118 ref_ctr_offsets = OPTS_GET(opts, ref_ctr_offsets, NULL); 12119 cookies = OPTS_GET(opts, cookies, NULL); 12120 cnt = OPTS_GET(opts, cnt, 0); 12121 retprobe = OPTS_GET(opts, retprobe, false); 12122 session = OPTS_GET(opts, session, false); 12123 12124 /* 12125 * User can specify 2 mutually exclusive set of inputs: 12126 * 12127 * 1) use only path/func_pattern/pid arguments 12128 * 12129 * 2) use path/pid with allowed combinations of: 12130 * syms/offsets/ref_ctr_offsets/cookies/cnt 12131 * 12132 * - syms and offsets are mutually exclusive 12133 * - ref_ctr_offsets and cookies are optional 12134 * 12135 * Any other usage results in error. 12136 */ 12137 12138 if (!path) 12139 return libbpf_err_ptr(-EINVAL); 12140 if (!func_pattern && cnt == 0) 12141 return libbpf_err_ptr(-EINVAL); 12142 12143 if (func_pattern) { 12144 if (syms || offsets || ref_ctr_offsets || cookies || cnt) 12145 return libbpf_err_ptr(-EINVAL); 12146 } else { 12147 if (!!syms == !!offsets) 12148 return libbpf_err_ptr(-EINVAL); 12149 } 12150 12151 if (retprobe && session) 12152 return libbpf_err_ptr(-EINVAL); 12153 12154 if (func_pattern) { 12155 if (!strchr(path, '/')) { 12156 err = resolve_full_path(path, full_path, sizeof(full_path)); 12157 if (err) { 12158 pr_warn("prog '%s': failed to resolve full path for '%s': %s\n", 12159 prog->name, path, errstr(err)); 12160 return libbpf_err_ptr(err); 12161 } 12162 path = full_path; 12163 } 12164 12165 err = elf_resolve_pattern_offsets(path, func_pattern, 12166 &resolved_offsets, &cnt); 12167 if (err < 0) 12168 return libbpf_err_ptr(err); 12169 offsets = resolved_offsets; 12170 } else if (syms) { 12171 err = elf_resolve_syms_offsets(path, cnt, syms, &resolved_offsets, STT_FUNC); 12172 if (err < 0) 12173 return libbpf_err_ptr(err); 12174 offsets = resolved_offsets; 12175 } 12176 12177 attach_type = session ? BPF_TRACE_UPROBE_SESSION : BPF_TRACE_UPROBE_MULTI; 12178 12179 lopts.uprobe_multi.path = path; 12180 lopts.uprobe_multi.offsets = offsets; 12181 lopts.uprobe_multi.ref_ctr_offsets = ref_ctr_offsets; 12182 lopts.uprobe_multi.cookies = cookies; 12183 lopts.uprobe_multi.cnt = cnt; 12184 lopts.uprobe_multi.flags = retprobe ? BPF_F_UPROBE_MULTI_RETURN : 0; 12185 12186 if (pid == 0) 12187 pid = getpid(); 12188 if (pid > 0) 12189 lopts.uprobe_multi.pid = pid; 12190 12191 link = calloc(1, sizeof(*link)); 12192 if (!link) { 12193 err = -ENOMEM; 12194 goto error; 12195 } 12196 link->detach = &bpf_link__detach_fd; 12197 12198 link_fd = bpf_link_create(prog_fd, 0, attach_type, &lopts); 12199 if (link_fd < 0) { 12200 err = -errno; 12201 pr_warn("prog '%s': failed to attach multi-uprobe: %s\n", 12202 prog->name, errstr(err)); 12203 goto error; 12204 } 12205 link->fd = link_fd; 12206 free(resolved_offsets); 12207 return link; 12208 12209 error: 12210 free(resolved_offsets); 12211 free(link); 12212 return libbpf_err_ptr(err); 12213 } 12214 12215 LIBBPF_API struct bpf_link * 12216 bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, 12217 const char *binary_path, size_t func_offset, 12218 const struct bpf_uprobe_opts *opts) 12219 { 12220 const char *archive_path = NULL, *archive_sep = NULL; 12221 char *legacy_probe = NULL; 12222 DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts); 12223 enum probe_attach_mode attach_mode; 12224 char full_path[PATH_MAX]; 12225 struct bpf_link *link; 12226 size_t ref_ctr_off; 12227 int pfd, err; 12228 bool retprobe, legacy; 12229 const char *func_name; 12230 12231 if (!OPTS_VALID(opts, bpf_uprobe_opts)) 12232 return libbpf_err_ptr(-EINVAL); 12233 12234 attach_mode = OPTS_GET(opts, attach_mode, PROBE_ATTACH_MODE_DEFAULT); 12235 retprobe = OPTS_GET(opts, retprobe, false); 12236 ref_ctr_off = OPTS_GET(opts, ref_ctr_offset, 0); 12237 pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0); 12238 12239 if (!binary_path) 12240 return libbpf_err_ptr(-EINVAL); 12241 12242 /* Check if "binary_path" refers to an archive. */ 12243 archive_sep = strstr(binary_path, "!/"); 12244 if (archive_sep) { 12245 full_path[0] = '\0'; 12246 libbpf_strlcpy(full_path, binary_path, 12247 min(sizeof(full_path), (size_t)(archive_sep - binary_path + 1))); 12248 archive_path = full_path; 12249 binary_path = archive_sep + 2; 12250 } else if (!strchr(binary_path, '/')) { 12251 err = resolve_full_path(binary_path, full_path, sizeof(full_path)); 12252 if (err) { 12253 pr_warn("prog '%s': failed to resolve full path for '%s': %s\n", 12254 prog->name, binary_path, errstr(err)); 12255 return libbpf_err_ptr(err); 12256 } 12257 binary_path = full_path; 12258 } 12259 func_name = OPTS_GET(opts, func_name, NULL); 12260 if (func_name) { 12261 long sym_off; 12262 12263 if (archive_path) { 12264 sym_off = elf_find_func_offset_from_archive(archive_path, binary_path, 12265 func_name); 12266 binary_path = archive_path; 12267 } else { 12268 sym_off = elf_find_func_offset_from_file(binary_path, func_name); 12269 } 12270 if (sym_off < 0) 12271 return libbpf_err_ptr(sym_off); 12272 func_offset += sym_off; 12273 } 12274 12275 legacy = determine_uprobe_perf_type() < 0; 12276 switch (attach_mode) { 12277 case PROBE_ATTACH_MODE_LEGACY: 12278 legacy = true; 12279 pe_opts.force_ioctl_attach = true; 12280 break; 12281 case PROBE_ATTACH_MODE_PERF: 12282 if (legacy) 12283 return libbpf_err_ptr(-ENOTSUP); 12284 pe_opts.force_ioctl_attach = true; 12285 break; 12286 case PROBE_ATTACH_MODE_LINK: 12287 if (legacy || !kernel_supports(prog->obj, FEAT_PERF_LINK)) 12288 return libbpf_err_ptr(-ENOTSUP); 12289 break; 12290 case PROBE_ATTACH_MODE_DEFAULT: 12291 break; 12292 default: 12293 return libbpf_err_ptr(-EINVAL); 12294 } 12295 12296 if (!legacy) { 12297 pfd = perf_event_open_probe(true /* uprobe */, retprobe, binary_path, 12298 func_offset, pid, ref_ctr_off); 12299 } else { 12300 char probe_name[PATH_MAX + 64]; 12301 12302 if (ref_ctr_off) 12303 return libbpf_err_ptr(-EINVAL); 12304 12305 gen_uprobe_legacy_event_name(probe_name, sizeof(probe_name), 12306 binary_path, func_offset); 12307 12308 legacy_probe = strdup(probe_name); 12309 if (!legacy_probe) 12310 return libbpf_err_ptr(-ENOMEM); 12311 12312 pfd = perf_event_uprobe_open_legacy(legacy_probe, retprobe, 12313 binary_path, func_offset, pid); 12314 } 12315 if (pfd < 0) { 12316 err = -errno; 12317 pr_warn("prog '%s': failed to create %s '%s:0x%zx' perf event: %s\n", 12318 prog->name, retprobe ? "uretprobe" : "uprobe", 12319 binary_path, func_offset, 12320 errstr(err)); 12321 goto err_out; 12322 } 12323 12324 link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts); 12325 err = libbpf_get_error(link); 12326 if (err) { 12327 close(pfd); 12328 pr_warn("prog '%s': failed to attach to %s '%s:0x%zx': %s\n", 12329 prog->name, retprobe ? "uretprobe" : "uprobe", 12330 binary_path, func_offset, 12331 errstr(err)); 12332 goto err_clean_legacy; 12333 } 12334 if (legacy) { 12335 struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link); 12336 12337 perf_link->legacy_probe_name = legacy_probe; 12338 perf_link->legacy_is_kprobe = false; 12339 perf_link->legacy_is_retprobe = retprobe; 12340 } 12341 return link; 12342 12343 err_clean_legacy: 12344 if (legacy) 12345 remove_uprobe_event_legacy(legacy_probe, retprobe); 12346 err_out: 12347 free(legacy_probe); 12348 return libbpf_err_ptr(err); 12349 } 12350 12351 /* Format of u[ret]probe section definition supporting auto-attach: 12352 * u[ret]probe/binary:function[+offset] 12353 * 12354 * binary can be an absolute/relative path or a filename; the latter is resolved to a 12355 * full binary path via bpf_program__attach_uprobe_opts. 12356 * 12357 * Specifying uprobe+ ensures we carry out strict matching; either "uprobe" must be 12358 * specified (and auto-attach is not possible) or the above format is specified for 12359 * auto-attach. 12360 */ 12361 static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link) 12362 { 12363 DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts); 12364 char *probe_type = NULL, *binary_path = NULL, *func_name = NULL, *func_off; 12365 int n, c, ret = -EINVAL; 12366 long offset = 0; 12367 12368 *link = NULL; 12369 12370 n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%m[^\n]", 12371 &probe_type, &binary_path, &func_name); 12372 switch (n) { 12373 case 1: 12374 /* handle SEC("u[ret]probe") - format is valid, but auto-attach is impossible. */ 12375 ret = 0; 12376 break; 12377 case 2: 12378 pr_warn("prog '%s': section '%s' missing ':function[+offset]' specification\n", 12379 prog->name, prog->sec_name); 12380 break; 12381 case 3: 12382 /* check if user specifies `+offset`, if yes, this should be 12383 * the last part of the string, make sure sscanf read to EOL 12384 */ 12385 func_off = strrchr(func_name, '+'); 12386 if (func_off) { 12387 n = sscanf(func_off, "+%li%n", &offset, &c); 12388 if (n == 1 && *(func_off + c) == '\0') 12389 func_off[0] = '\0'; 12390 else 12391 offset = 0; 12392 } 12393 opts.retprobe = strcmp(probe_type, "uretprobe") == 0 || 12394 strcmp(probe_type, "uretprobe.s") == 0; 12395 if (opts.retprobe && offset != 0) { 12396 pr_warn("prog '%s': uretprobes do not support offset specification\n", 12397 prog->name); 12398 break; 12399 } 12400 opts.func_name = func_name; 12401 *link = bpf_program__attach_uprobe_opts(prog, -1, binary_path, offset, &opts); 12402 ret = libbpf_get_error(*link); 12403 break; 12404 default: 12405 pr_warn("prog '%s': invalid format of section definition '%s'\n", prog->name, 12406 prog->sec_name); 12407 break; 12408 } 12409 free(probe_type); 12410 free(binary_path); 12411 free(func_name); 12412 12413 return ret; 12414 } 12415 12416 struct bpf_link *bpf_program__attach_uprobe(const struct bpf_program *prog, 12417 bool retprobe, pid_t pid, 12418 const char *binary_path, 12419 size_t func_offset) 12420 { 12421 DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts, .retprobe = retprobe); 12422 12423 return bpf_program__attach_uprobe_opts(prog, pid, binary_path, func_offset, &opts); 12424 } 12425 12426 struct bpf_link *bpf_program__attach_usdt(const struct bpf_program *prog, 12427 pid_t pid, const char *binary_path, 12428 const char *usdt_provider, const char *usdt_name, 12429 const struct bpf_usdt_opts *opts) 12430 { 12431 char resolved_path[512]; 12432 struct bpf_object *obj = prog->obj; 12433 struct bpf_link *link; 12434 __u64 usdt_cookie; 12435 int err; 12436 12437 if (!OPTS_VALID(opts, bpf_uprobe_opts)) 12438 return libbpf_err_ptr(-EINVAL); 12439 12440 if (bpf_program__fd(prog) < 0) { 12441 pr_warn("prog '%s': can't attach BPF program without FD (was it loaded?)\n", 12442 prog->name); 12443 return libbpf_err_ptr(-EINVAL); 12444 } 12445 12446 if (!binary_path) 12447 return libbpf_err_ptr(-EINVAL); 12448 12449 if (!strchr(binary_path, '/')) { 12450 err = resolve_full_path(binary_path, resolved_path, sizeof(resolved_path)); 12451 if (err) { 12452 pr_warn("prog '%s': failed to resolve full path for '%s': %s\n", 12453 prog->name, binary_path, errstr(err)); 12454 return libbpf_err_ptr(err); 12455 } 12456 binary_path = resolved_path; 12457 } 12458 12459 /* USDT manager is instantiated lazily on first USDT attach. It will 12460 * be destroyed together with BPF object in bpf_object__close(). 12461 */ 12462 if (IS_ERR(obj->usdt_man)) 12463 return libbpf_ptr(obj->usdt_man); 12464 if (!obj->usdt_man) { 12465 obj->usdt_man = usdt_manager_new(obj); 12466 if (IS_ERR(obj->usdt_man)) 12467 return libbpf_ptr(obj->usdt_man); 12468 } 12469 12470 usdt_cookie = OPTS_GET(opts, usdt_cookie, 0); 12471 link = usdt_manager_attach_usdt(obj->usdt_man, prog, pid, binary_path, 12472 usdt_provider, usdt_name, usdt_cookie); 12473 err = libbpf_get_error(link); 12474 if (err) 12475 return libbpf_err_ptr(err); 12476 return link; 12477 } 12478 12479 static int attach_usdt(const struct bpf_program *prog, long cookie, struct bpf_link **link) 12480 { 12481 char *path = NULL, *provider = NULL, *name = NULL; 12482 const char *sec_name; 12483 int n, err; 12484 12485 sec_name = bpf_program__section_name(prog); 12486 if (strcmp(sec_name, "usdt") == 0) { 12487 /* no auto-attach for just SEC("usdt") */ 12488 *link = NULL; 12489 return 0; 12490 } 12491 12492 n = sscanf(sec_name, "usdt/%m[^:]:%m[^:]:%m[^:]", &path, &provider, &name); 12493 if (n != 3) { 12494 pr_warn("invalid section '%s', expected SEC(\"usdt/<path>:<provider>:<name>\")\n", 12495 sec_name); 12496 err = -EINVAL; 12497 } else { 12498 *link = bpf_program__attach_usdt(prog, -1 /* any process */, path, 12499 provider, name, NULL); 12500 err = libbpf_get_error(*link); 12501 } 12502 free(path); 12503 free(provider); 12504 free(name); 12505 return err; 12506 } 12507 12508 static int determine_tracepoint_id(const char *tp_category, 12509 const char *tp_name) 12510 { 12511 char file[PATH_MAX]; 12512 int ret; 12513 12514 ret = snprintf(file, sizeof(file), "%s/events/%s/%s/id", 12515 tracefs_path(), tp_category, tp_name); 12516 if (ret < 0) 12517 return -errno; 12518 if (ret >= sizeof(file)) { 12519 pr_debug("tracepoint %s/%s path is too long\n", 12520 tp_category, tp_name); 12521 return -E2BIG; 12522 } 12523 return parse_uint_from_file(file, "%d\n"); 12524 } 12525 12526 static int perf_event_open_tracepoint(const char *tp_category, 12527 const char *tp_name) 12528 { 12529 const size_t attr_sz = sizeof(struct perf_event_attr); 12530 struct perf_event_attr attr; 12531 int tp_id, pfd, err; 12532 12533 tp_id = determine_tracepoint_id(tp_category, tp_name); 12534 if (tp_id < 0) { 12535 pr_warn("failed to determine tracepoint '%s/%s' perf event ID: %s\n", 12536 tp_category, tp_name, 12537 errstr(tp_id)); 12538 return tp_id; 12539 } 12540 12541 memset(&attr, 0, attr_sz); 12542 attr.type = PERF_TYPE_TRACEPOINT; 12543 attr.size = attr_sz; 12544 attr.config = tp_id; 12545 12546 pfd = syscall(__NR_perf_event_open, &attr, -1 /* pid */, 0 /* cpu */, 12547 -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC); 12548 if (pfd < 0) { 12549 err = -errno; 12550 pr_warn("tracepoint '%s/%s' perf_event_open() failed: %s\n", 12551 tp_category, tp_name, 12552 errstr(err)); 12553 return err; 12554 } 12555 return pfd; 12556 } 12557 12558 struct bpf_link *bpf_program__attach_tracepoint_opts(const struct bpf_program *prog, 12559 const char *tp_category, 12560 const char *tp_name, 12561 const struct bpf_tracepoint_opts *opts) 12562 { 12563 DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts); 12564 struct bpf_link *link; 12565 int pfd, err; 12566 12567 if (!OPTS_VALID(opts, bpf_tracepoint_opts)) 12568 return libbpf_err_ptr(-EINVAL); 12569 12570 pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0); 12571 12572 pfd = perf_event_open_tracepoint(tp_category, tp_name); 12573 if (pfd < 0) { 12574 pr_warn("prog '%s': failed to create tracepoint '%s/%s' perf event: %s\n", 12575 prog->name, tp_category, tp_name, 12576 errstr(pfd)); 12577 return libbpf_err_ptr(pfd); 12578 } 12579 link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts); 12580 err = libbpf_get_error(link); 12581 if (err) { 12582 close(pfd); 12583 pr_warn("prog '%s': failed to attach to tracepoint '%s/%s': %s\n", 12584 prog->name, tp_category, tp_name, 12585 errstr(err)); 12586 return libbpf_err_ptr(err); 12587 } 12588 return link; 12589 } 12590 12591 struct bpf_link *bpf_program__attach_tracepoint(const struct bpf_program *prog, 12592 const char *tp_category, 12593 const char *tp_name) 12594 { 12595 return bpf_program__attach_tracepoint_opts(prog, tp_category, tp_name, NULL); 12596 } 12597 12598 static int attach_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link) 12599 { 12600 char *sec_name, *tp_cat, *tp_name; 12601 12602 *link = NULL; 12603 12604 /* no auto-attach for SEC("tp") or SEC("tracepoint") */ 12605 if (strcmp(prog->sec_name, "tp") == 0 || strcmp(prog->sec_name, "tracepoint") == 0) 12606 return 0; 12607 12608 sec_name = strdup(prog->sec_name); 12609 if (!sec_name) 12610 return -ENOMEM; 12611 12612 /* extract "tp/<category>/<name>" or "tracepoint/<category>/<name>" */ 12613 if (str_has_pfx(prog->sec_name, "tp/")) 12614 tp_cat = sec_name + sizeof("tp/") - 1; 12615 else 12616 tp_cat = sec_name + sizeof("tracepoint/") - 1; 12617 tp_name = strchr(tp_cat, '/'); 12618 if (!tp_name) { 12619 free(sec_name); 12620 return -EINVAL; 12621 } 12622 *tp_name = '\0'; 12623 tp_name++; 12624 12625 *link = bpf_program__attach_tracepoint(prog, tp_cat, tp_name); 12626 free(sec_name); 12627 return libbpf_get_error(*link); 12628 } 12629 12630 struct bpf_link * 12631 bpf_program__attach_raw_tracepoint_opts(const struct bpf_program *prog, 12632 const char *tp_name, 12633 struct bpf_raw_tracepoint_opts *opts) 12634 { 12635 LIBBPF_OPTS(bpf_raw_tp_opts, raw_opts); 12636 struct bpf_link *link; 12637 int prog_fd, pfd; 12638 12639 if (!OPTS_VALID(opts, bpf_raw_tracepoint_opts)) 12640 return libbpf_err_ptr(-EINVAL); 12641 12642 prog_fd = bpf_program__fd(prog); 12643 if (prog_fd < 0) { 12644 pr_warn("prog '%s': can't attach before loaded\n", prog->name); 12645 return libbpf_err_ptr(-EINVAL); 12646 } 12647 12648 link = calloc(1, sizeof(*link)); 12649 if (!link) 12650 return libbpf_err_ptr(-ENOMEM); 12651 link->detach = &bpf_link__detach_fd; 12652 12653 raw_opts.tp_name = tp_name; 12654 raw_opts.cookie = OPTS_GET(opts, cookie, 0); 12655 pfd = bpf_raw_tracepoint_open_opts(prog_fd, &raw_opts); 12656 if (pfd < 0) { 12657 pfd = -errno; 12658 free(link); 12659 pr_warn("prog '%s': failed to attach to raw tracepoint '%s': %s\n", 12660 prog->name, tp_name, errstr(pfd)); 12661 return libbpf_err_ptr(pfd); 12662 } 12663 link->fd = pfd; 12664 return link; 12665 } 12666 12667 struct bpf_link *bpf_program__attach_raw_tracepoint(const struct bpf_program *prog, 12668 const char *tp_name) 12669 { 12670 return bpf_program__attach_raw_tracepoint_opts(prog, tp_name, NULL); 12671 } 12672 12673 static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link) 12674 { 12675 static const char *const prefixes[] = { 12676 "raw_tp", 12677 "raw_tracepoint", 12678 "raw_tp.w", 12679 "raw_tracepoint.w", 12680 }; 12681 size_t i; 12682 const char *tp_name = NULL; 12683 12684 *link = NULL; 12685 12686 for (i = 0; i < ARRAY_SIZE(prefixes); i++) { 12687 size_t pfx_len; 12688 12689 if (!str_has_pfx(prog->sec_name, prefixes[i])) 12690 continue; 12691 12692 pfx_len = strlen(prefixes[i]); 12693 /* no auto-attach case of, e.g., SEC("raw_tp") */ 12694 if (prog->sec_name[pfx_len] == '\0') 12695 return 0; 12696 12697 if (prog->sec_name[pfx_len] != '/') 12698 continue; 12699 12700 tp_name = prog->sec_name + pfx_len + 1; 12701 break; 12702 } 12703 12704 if (!tp_name) { 12705 pr_warn("prog '%s': invalid section name '%s'\n", 12706 prog->name, prog->sec_name); 12707 return -EINVAL; 12708 } 12709 12710 *link = bpf_program__attach_raw_tracepoint(prog, tp_name); 12711 return libbpf_get_error(*link); 12712 } 12713 12714 /* Common logic for all BPF program types that attach to a btf_id */ 12715 static struct bpf_link *bpf_program__attach_btf_id(const struct bpf_program *prog, 12716 const struct bpf_trace_opts *opts) 12717 { 12718 LIBBPF_OPTS(bpf_link_create_opts, link_opts); 12719 struct bpf_link *link; 12720 int prog_fd, pfd; 12721 12722 if (!OPTS_VALID(opts, bpf_trace_opts)) 12723 return libbpf_err_ptr(-EINVAL); 12724 12725 prog_fd = bpf_program__fd(prog); 12726 if (prog_fd < 0) { 12727 pr_warn("prog '%s': can't attach before loaded\n", prog->name); 12728 return libbpf_err_ptr(-EINVAL); 12729 } 12730 12731 link = calloc(1, sizeof(*link)); 12732 if (!link) 12733 return libbpf_err_ptr(-ENOMEM); 12734 link->detach = &bpf_link__detach_fd; 12735 12736 /* libbpf is smart enough to redirect to BPF_RAW_TRACEPOINT_OPEN on old kernels */ 12737 link_opts.tracing.cookie = OPTS_GET(opts, cookie, 0); 12738 pfd = bpf_link_create(prog_fd, 0, bpf_program__expected_attach_type(prog), &link_opts); 12739 if (pfd < 0) { 12740 pfd = -errno; 12741 free(link); 12742 pr_warn("prog '%s': failed to attach: %s\n", 12743 prog->name, errstr(pfd)); 12744 return libbpf_err_ptr(pfd); 12745 } 12746 link->fd = pfd; 12747 return link; 12748 } 12749 12750 struct bpf_link *bpf_program__attach_trace(const struct bpf_program *prog) 12751 { 12752 return bpf_program__attach_btf_id(prog, NULL); 12753 } 12754 12755 struct bpf_link *bpf_program__attach_trace_opts(const struct bpf_program *prog, 12756 const struct bpf_trace_opts *opts) 12757 { 12758 return bpf_program__attach_btf_id(prog, opts); 12759 } 12760 12761 struct bpf_link *bpf_program__attach_lsm(const struct bpf_program *prog) 12762 { 12763 return bpf_program__attach_btf_id(prog, NULL); 12764 } 12765 12766 static int attach_trace(const struct bpf_program *prog, long cookie, struct bpf_link **link) 12767 { 12768 *link = bpf_program__attach_trace(prog); 12769 return libbpf_get_error(*link); 12770 } 12771 12772 static int attach_lsm(const struct bpf_program *prog, long cookie, struct bpf_link **link) 12773 { 12774 *link = bpf_program__attach_lsm(prog); 12775 return libbpf_get_error(*link); 12776 } 12777 12778 static struct bpf_link * 12779 bpf_program_attach_fd(const struct bpf_program *prog, 12780 int target_fd, const char *target_name, 12781 const struct bpf_link_create_opts *opts) 12782 { 12783 enum bpf_attach_type attach_type; 12784 struct bpf_link *link; 12785 int prog_fd, link_fd; 12786 12787 prog_fd = bpf_program__fd(prog); 12788 if (prog_fd < 0) { 12789 pr_warn("prog '%s': can't attach before loaded\n", prog->name); 12790 return libbpf_err_ptr(-EINVAL); 12791 } 12792 12793 link = calloc(1, sizeof(*link)); 12794 if (!link) 12795 return libbpf_err_ptr(-ENOMEM); 12796 link->detach = &bpf_link__detach_fd; 12797 12798 attach_type = bpf_program__expected_attach_type(prog); 12799 link_fd = bpf_link_create(prog_fd, target_fd, attach_type, opts); 12800 if (link_fd < 0) { 12801 link_fd = -errno; 12802 free(link); 12803 pr_warn("prog '%s': failed to attach to %s: %s\n", 12804 prog->name, target_name, 12805 errstr(link_fd)); 12806 return libbpf_err_ptr(link_fd); 12807 } 12808 link->fd = link_fd; 12809 return link; 12810 } 12811 12812 struct bpf_link * 12813 bpf_program__attach_cgroup(const struct bpf_program *prog, int cgroup_fd) 12814 { 12815 return bpf_program_attach_fd(prog, cgroup_fd, "cgroup", NULL); 12816 } 12817 12818 struct bpf_link * 12819 bpf_program__attach_netns(const struct bpf_program *prog, int netns_fd) 12820 { 12821 return bpf_program_attach_fd(prog, netns_fd, "netns", NULL); 12822 } 12823 12824 struct bpf_link * 12825 bpf_program__attach_sockmap(const struct bpf_program *prog, int map_fd) 12826 { 12827 return bpf_program_attach_fd(prog, map_fd, "sockmap", NULL); 12828 } 12829 12830 struct bpf_link *bpf_program__attach_xdp(const struct bpf_program *prog, int ifindex) 12831 { 12832 /* target_fd/target_ifindex use the same field in LINK_CREATE */ 12833 return bpf_program_attach_fd(prog, ifindex, "xdp", NULL); 12834 } 12835 12836 struct bpf_link * 12837 bpf_program__attach_tcx(const struct bpf_program *prog, int ifindex, 12838 const struct bpf_tcx_opts *opts) 12839 { 12840 LIBBPF_OPTS(bpf_link_create_opts, link_create_opts); 12841 __u32 relative_id; 12842 int relative_fd; 12843 12844 if (!OPTS_VALID(opts, bpf_tcx_opts)) 12845 return libbpf_err_ptr(-EINVAL); 12846 12847 relative_id = OPTS_GET(opts, relative_id, 0); 12848 relative_fd = OPTS_GET(opts, relative_fd, 0); 12849 12850 /* validate we don't have unexpected combinations of non-zero fields */ 12851 if (!ifindex) { 12852 pr_warn("prog '%s': target netdevice ifindex cannot be zero\n", 12853 prog->name); 12854 return libbpf_err_ptr(-EINVAL); 12855 } 12856 if (relative_fd && relative_id) { 12857 pr_warn("prog '%s': relative_fd and relative_id cannot be set at the same time\n", 12858 prog->name); 12859 return libbpf_err_ptr(-EINVAL); 12860 } 12861 12862 link_create_opts.tcx.expected_revision = OPTS_GET(opts, expected_revision, 0); 12863 link_create_opts.tcx.relative_fd = relative_fd; 12864 link_create_opts.tcx.relative_id = relative_id; 12865 link_create_opts.flags = OPTS_GET(opts, flags, 0); 12866 12867 /* target_fd/target_ifindex use the same field in LINK_CREATE */ 12868 return bpf_program_attach_fd(prog, ifindex, "tcx", &link_create_opts); 12869 } 12870 12871 struct bpf_link * 12872 bpf_program__attach_netkit(const struct bpf_program *prog, int ifindex, 12873 const struct bpf_netkit_opts *opts) 12874 { 12875 LIBBPF_OPTS(bpf_link_create_opts, link_create_opts); 12876 __u32 relative_id; 12877 int relative_fd; 12878 12879 if (!OPTS_VALID(opts, bpf_netkit_opts)) 12880 return libbpf_err_ptr(-EINVAL); 12881 12882 relative_id = OPTS_GET(opts, relative_id, 0); 12883 relative_fd = OPTS_GET(opts, relative_fd, 0); 12884 12885 /* validate we don't have unexpected combinations of non-zero fields */ 12886 if (!ifindex) { 12887 pr_warn("prog '%s': target netdevice ifindex cannot be zero\n", 12888 prog->name); 12889 return libbpf_err_ptr(-EINVAL); 12890 } 12891 if (relative_fd && relative_id) { 12892 pr_warn("prog '%s': relative_fd and relative_id cannot be set at the same time\n", 12893 prog->name); 12894 return libbpf_err_ptr(-EINVAL); 12895 } 12896 12897 link_create_opts.netkit.expected_revision = OPTS_GET(opts, expected_revision, 0); 12898 link_create_opts.netkit.relative_fd = relative_fd; 12899 link_create_opts.netkit.relative_id = relative_id; 12900 link_create_opts.flags = OPTS_GET(opts, flags, 0); 12901 12902 return bpf_program_attach_fd(prog, ifindex, "netkit", &link_create_opts); 12903 } 12904 12905 struct bpf_link *bpf_program__attach_freplace(const struct bpf_program *prog, 12906 int target_fd, 12907 const char *attach_func_name) 12908 { 12909 int btf_id; 12910 12911 if (!!target_fd != !!attach_func_name) { 12912 pr_warn("prog '%s': supply none or both of target_fd and attach_func_name\n", 12913 prog->name); 12914 return libbpf_err_ptr(-EINVAL); 12915 } 12916 12917 if (prog->type != BPF_PROG_TYPE_EXT) { 12918 pr_warn("prog '%s': only BPF_PROG_TYPE_EXT can attach as freplace\n", 12919 prog->name); 12920 return libbpf_err_ptr(-EINVAL); 12921 } 12922 12923 if (target_fd) { 12924 LIBBPF_OPTS(bpf_link_create_opts, target_opts); 12925 12926 btf_id = libbpf_find_prog_btf_id(attach_func_name, target_fd, prog->obj->token_fd); 12927 if (btf_id < 0) 12928 return libbpf_err_ptr(btf_id); 12929 12930 target_opts.target_btf_id = btf_id; 12931 12932 return bpf_program_attach_fd(prog, target_fd, "freplace", 12933 &target_opts); 12934 } else { 12935 /* no target, so use raw_tracepoint_open for compatibility 12936 * with old kernels 12937 */ 12938 return bpf_program__attach_trace(prog); 12939 } 12940 } 12941 12942 struct bpf_link * 12943 bpf_program__attach_iter(const struct bpf_program *prog, 12944 const struct bpf_iter_attach_opts *opts) 12945 { 12946 DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_create_opts); 12947 struct bpf_link *link; 12948 int prog_fd, link_fd; 12949 __u32 target_fd = 0; 12950 12951 if (!OPTS_VALID(opts, bpf_iter_attach_opts)) 12952 return libbpf_err_ptr(-EINVAL); 12953 12954 link_create_opts.iter_info = OPTS_GET(opts, link_info, (void *)0); 12955 link_create_opts.iter_info_len = OPTS_GET(opts, link_info_len, 0); 12956 12957 prog_fd = bpf_program__fd(prog); 12958 if (prog_fd < 0) { 12959 pr_warn("prog '%s': can't attach before loaded\n", prog->name); 12960 return libbpf_err_ptr(-EINVAL); 12961 } 12962 12963 link = calloc(1, sizeof(*link)); 12964 if (!link) 12965 return libbpf_err_ptr(-ENOMEM); 12966 link->detach = &bpf_link__detach_fd; 12967 12968 link_fd = bpf_link_create(prog_fd, target_fd, BPF_TRACE_ITER, 12969 &link_create_opts); 12970 if (link_fd < 0) { 12971 link_fd = -errno; 12972 free(link); 12973 pr_warn("prog '%s': failed to attach to iterator: %s\n", 12974 prog->name, errstr(link_fd)); 12975 return libbpf_err_ptr(link_fd); 12976 } 12977 link->fd = link_fd; 12978 return link; 12979 } 12980 12981 static int attach_iter(const struct bpf_program *prog, long cookie, struct bpf_link **link) 12982 { 12983 *link = bpf_program__attach_iter(prog, NULL); 12984 return libbpf_get_error(*link); 12985 } 12986 12987 struct bpf_link *bpf_program__attach_netfilter(const struct bpf_program *prog, 12988 const struct bpf_netfilter_opts *opts) 12989 { 12990 LIBBPF_OPTS(bpf_link_create_opts, lopts); 12991 struct bpf_link *link; 12992 int prog_fd, link_fd; 12993 12994 if (!OPTS_VALID(opts, bpf_netfilter_opts)) 12995 return libbpf_err_ptr(-EINVAL); 12996 12997 prog_fd = bpf_program__fd(prog); 12998 if (prog_fd < 0) { 12999 pr_warn("prog '%s': can't attach before loaded\n", prog->name); 13000 return libbpf_err_ptr(-EINVAL); 13001 } 13002 13003 link = calloc(1, sizeof(*link)); 13004 if (!link) 13005 return libbpf_err_ptr(-ENOMEM); 13006 13007 link->detach = &bpf_link__detach_fd; 13008 13009 lopts.netfilter.pf = OPTS_GET(opts, pf, 0); 13010 lopts.netfilter.hooknum = OPTS_GET(opts, hooknum, 0); 13011 lopts.netfilter.priority = OPTS_GET(opts, priority, 0); 13012 lopts.netfilter.flags = OPTS_GET(opts, flags, 0); 13013 13014 link_fd = bpf_link_create(prog_fd, 0, BPF_NETFILTER, &lopts); 13015 if (link_fd < 0) { 13016 link_fd = -errno; 13017 free(link); 13018 pr_warn("prog '%s': failed to attach to netfilter: %s\n", 13019 prog->name, errstr(link_fd)); 13020 return libbpf_err_ptr(link_fd); 13021 } 13022 link->fd = link_fd; 13023 13024 return link; 13025 } 13026 13027 struct bpf_link *bpf_program__attach(const struct bpf_program *prog) 13028 { 13029 struct bpf_link *link = NULL; 13030 int err; 13031 13032 if (!prog->sec_def || !prog->sec_def->prog_attach_fn) 13033 return libbpf_err_ptr(-EOPNOTSUPP); 13034 13035 if (bpf_program__fd(prog) < 0) { 13036 pr_warn("prog '%s': can't attach BPF program without FD (was it loaded?)\n", 13037 prog->name); 13038 return libbpf_err_ptr(-EINVAL); 13039 } 13040 13041 err = prog->sec_def->prog_attach_fn(prog, prog->sec_def->cookie, &link); 13042 if (err) 13043 return libbpf_err_ptr(err); 13044 13045 /* When calling bpf_program__attach() explicitly, auto-attach support 13046 * is expected to work, so NULL returned link is considered an error. 13047 * This is different for skeleton's attach, see comment in 13048 * bpf_object__attach_skeleton(). 13049 */ 13050 if (!link) 13051 return libbpf_err_ptr(-EOPNOTSUPP); 13052 13053 return link; 13054 } 13055 13056 struct bpf_link_struct_ops { 13057 struct bpf_link link; 13058 int map_fd; 13059 }; 13060 13061 static int bpf_link__detach_struct_ops(struct bpf_link *link) 13062 { 13063 struct bpf_link_struct_ops *st_link; 13064 __u32 zero = 0; 13065 13066 st_link = container_of(link, struct bpf_link_struct_ops, link); 13067 13068 if (st_link->map_fd < 0) 13069 /* w/o a real link */ 13070 return bpf_map_delete_elem(link->fd, &zero); 13071 13072 return close(link->fd); 13073 } 13074 13075 struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map) 13076 { 13077 struct bpf_link_struct_ops *link; 13078 __u32 zero = 0; 13079 int err, fd; 13080 13081 if (!bpf_map__is_struct_ops(map)) { 13082 pr_warn("map '%s': can't attach non-struct_ops map\n", map->name); 13083 return libbpf_err_ptr(-EINVAL); 13084 } 13085 13086 if (map->fd < 0) { 13087 pr_warn("map '%s': can't attach BPF map without FD (was it created?)\n", map->name); 13088 return libbpf_err_ptr(-EINVAL); 13089 } 13090 13091 link = calloc(1, sizeof(*link)); 13092 if (!link) 13093 return libbpf_err_ptr(-EINVAL); 13094 13095 /* kern_vdata should be prepared during the loading phase. */ 13096 err = bpf_map_update_elem(map->fd, &zero, map->st_ops->kern_vdata, 0); 13097 /* It can be EBUSY if the map has been used to create or 13098 * update a link before. We don't allow updating the value of 13099 * a struct_ops once it is set. That ensures that the value 13100 * never changed. So, it is safe to skip EBUSY. 13101 */ 13102 if (err && (!(map->def.map_flags & BPF_F_LINK) || err != -EBUSY)) { 13103 free(link); 13104 return libbpf_err_ptr(err); 13105 } 13106 13107 link->link.detach = bpf_link__detach_struct_ops; 13108 13109 if (!(map->def.map_flags & BPF_F_LINK)) { 13110 /* w/o a real link */ 13111 link->link.fd = map->fd; 13112 link->map_fd = -1; 13113 return &link->link; 13114 } 13115 13116 fd = bpf_link_create(map->fd, 0, BPF_STRUCT_OPS, NULL); 13117 if (fd < 0) { 13118 free(link); 13119 return libbpf_err_ptr(fd); 13120 } 13121 13122 link->link.fd = fd; 13123 link->map_fd = map->fd; 13124 13125 return &link->link; 13126 } 13127 13128 /* 13129 * Swap the back struct_ops of a link with a new struct_ops map. 13130 */ 13131 int bpf_link__update_map(struct bpf_link *link, const struct bpf_map *map) 13132 { 13133 struct bpf_link_struct_ops *st_ops_link; 13134 __u32 zero = 0; 13135 int err; 13136 13137 if (!bpf_map__is_struct_ops(map)) 13138 return libbpf_err(-EINVAL); 13139 13140 if (map->fd < 0) { 13141 pr_warn("map '%s': can't use BPF map without FD (was it created?)\n", map->name); 13142 return libbpf_err(-EINVAL); 13143 } 13144 13145 st_ops_link = container_of(link, struct bpf_link_struct_ops, link); 13146 /* Ensure the type of a link is correct */ 13147 if (st_ops_link->map_fd < 0) 13148 return libbpf_err(-EINVAL); 13149 13150 err = bpf_map_update_elem(map->fd, &zero, map->st_ops->kern_vdata, 0); 13151 /* It can be EBUSY if the map has been used to create or 13152 * update a link before. We don't allow updating the value of 13153 * a struct_ops once it is set. That ensures that the value 13154 * never changed. So, it is safe to skip EBUSY. 13155 */ 13156 if (err && err != -EBUSY) 13157 return err; 13158 13159 err = bpf_link_update(link->fd, map->fd, NULL); 13160 if (err < 0) 13161 return err; 13162 13163 st_ops_link->map_fd = map->fd; 13164 13165 return 0; 13166 } 13167 13168 typedef enum bpf_perf_event_ret (*bpf_perf_event_print_t)(struct perf_event_header *hdr, 13169 void *private_data); 13170 13171 static enum bpf_perf_event_ret 13172 perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size, 13173 void **copy_mem, size_t *copy_size, 13174 bpf_perf_event_print_t fn, void *private_data) 13175 { 13176 struct perf_event_mmap_page *header = mmap_mem; 13177 __u64 data_head = ring_buffer_read_head(header); 13178 __u64 data_tail = header->data_tail; 13179 void *base = ((__u8 *)header) + page_size; 13180 int ret = LIBBPF_PERF_EVENT_CONT; 13181 struct perf_event_header *ehdr; 13182 size_t ehdr_size; 13183 13184 while (data_head != data_tail) { 13185 ehdr = base + (data_tail & (mmap_size - 1)); 13186 ehdr_size = ehdr->size; 13187 13188 if (((void *)ehdr) + ehdr_size > base + mmap_size) { 13189 void *copy_start = ehdr; 13190 size_t len_first = base + mmap_size - copy_start; 13191 size_t len_secnd = ehdr_size - len_first; 13192 13193 if (*copy_size < ehdr_size) { 13194 free(*copy_mem); 13195 *copy_mem = malloc(ehdr_size); 13196 if (!*copy_mem) { 13197 *copy_size = 0; 13198 ret = LIBBPF_PERF_EVENT_ERROR; 13199 break; 13200 } 13201 *copy_size = ehdr_size; 13202 } 13203 13204 memcpy(*copy_mem, copy_start, len_first); 13205 memcpy(*copy_mem + len_first, base, len_secnd); 13206 ehdr = *copy_mem; 13207 } 13208 13209 ret = fn(ehdr, private_data); 13210 data_tail += ehdr_size; 13211 if (ret != LIBBPF_PERF_EVENT_CONT) 13212 break; 13213 } 13214 13215 ring_buffer_write_tail(header, data_tail); 13216 return libbpf_err(ret); 13217 } 13218 13219 struct perf_buffer; 13220 13221 struct perf_buffer_params { 13222 struct perf_event_attr *attr; 13223 /* if event_cb is specified, it takes precendence */ 13224 perf_buffer_event_fn event_cb; 13225 /* sample_cb and lost_cb are higher-level common-case callbacks */ 13226 perf_buffer_sample_fn sample_cb; 13227 perf_buffer_lost_fn lost_cb; 13228 void *ctx; 13229 int cpu_cnt; 13230 int *cpus; 13231 int *map_keys; 13232 }; 13233 13234 struct perf_cpu_buf { 13235 struct perf_buffer *pb; 13236 void *base; /* mmap()'ed memory */ 13237 void *buf; /* for reconstructing segmented data */ 13238 size_t buf_size; 13239 int fd; 13240 int cpu; 13241 int map_key; 13242 }; 13243 13244 struct perf_buffer { 13245 perf_buffer_event_fn event_cb; 13246 perf_buffer_sample_fn sample_cb; 13247 perf_buffer_lost_fn lost_cb; 13248 void *ctx; /* passed into callbacks */ 13249 13250 size_t page_size; 13251 size_t mmap_size; 13252 struct perf_cpu_buf **cpu_bufs; 13253 struct epoll_event *events; 13254 int cpu_cnt; /* number of allocated CPU buffers */ 13255 int epoll_fd; /* perf event FD */ 13256 int map_fd; /* BPF_MAP_TYPE_PERF_EVENT_ARRAY BPF map FD */ 13257 }; 13258 13259 static void perf_buffer__free_cpu_buf(struct perf_buffer *pb, 13260 struct perf_cpu_buf *cpu_buf) 13261 { 13262 if (!cpu_buf) 13263 return; 13264 if (cpu_buf->base && 13265 munmap(cpu_buf->base, pb->mmap_size + pb->page_size)) 13266 pr_warn("failed to munmap cpu_buf #%d\n", cpu_buf->cpu); 13267 if (cpu_buf->fd >= 0) { 13268 ioctl(cpu_buf->fd, PERF_EVENT_IOC_DISABLE, 0); 13269 close(cpu_buf->fd); 13270 } 13271 free(cpu_buf->buf); 13272 free(cpu_buf); 13273 } 13274 13275 void perf_buffer__free(struct perf_buffer *pb) 13276 { 13277 int i; 13278 13279 if (IS_ERR_OR_NULL(pb)) 13280 return; 13281 if (pb->cpu_bufs) { 13282 for (i = 0; i < pb->cpu_cnt; i++) { 13283 struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i]; 13284 13285 if (!cpu_buf) 13286 continue; 13287 13288 bpf_map_delete_elem(pb->map_fd, &cpu_buf->map_key); 13289 perf_buffer__free_cpu_buf(pb, cpu_buf); 13290 } 13291 free(pb->cpu_bufs); 13292 } 13293 if (pb->epoll_fd >= 0) 13294 close(pb->epoll_fd); 13295 free(pb->events); 13296 free(pb); 13297 } 13298 13299 static struct perf_cpu_buf * 13300 perf_buffer__open_cpu_buf(struct perf_buffer *pb, struct perf_event_attr *attr, 13301 int cpu, int map_key) 13302 { 13303 struct perf_cpu_buf *cpu_buf; 13304 int err; 13305 13306 cpu_buf = calloc(1, sizeof(*cpu_buf)); 13307 if (!cpu_buf) 13308 return ERR_PTR(-ENOMEM); 13309 13310 cpu_buf->pb = pb; 13311 cpu_buf->cpu = cpu; 13312 cpu_buf->map_key = map_key; 13313 13314 cpu_buf->fd = syscall(__NR_perf_event_open, attr, -1 /* pid */, cpu, 13315 -1, PERF_FLAG_FD_CLOEXEC); 13316 if (cpu_buf->fd < 0) { 13317 err = -errno; 13318 pr_warn("failed to open perf buffer event on cpu #%d: %s\n", 13319 cpu, errstr(err)); 13320 goto error; 13321 } 13322 13323 cpu_buf->base = mmap(NULL, pb->mmap_size + pb->page_size, 13324 PROT_READ | PROT_WRITE, MAP_SHARED, 13325 cpu_buf->fd, 0); 13326 if (cpu_buf->base == MAP_FAILED) { 13327 cpu_buf->base = NULL; 13328 err = -errno; 13329 pr_warn("failed to mmap perf buffer on cpu #%d: %s\n", 13330 cpu, errstr(err)); 13331 goto error; 13332 } 13333 13334 if (ioctl(cpu_buf->fd, PERF_EVENT_IOC_ENABLE, 0) < 0) { 13335 err = -errno; 13336 pr_warn("failed to enable perf buffer event on cpu #%d: %s\n", 13337 cpu, errstr(err)); 13338 goto error; 13339 } 13340 13341 return cpu_buf; 13342 13343 error: 13344 perf_buffer__free_cpu_buf(pb, cpu_buf); 13345 return (struct perf_cpu_buf *)ERR_PTR(err); 13346 } 13347 13348 static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, 13349 struct perf_buffer_params *p); 13350 13351 struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt, 13352 perf_buffer_sample_fn sample_cb, 13353 perf_buffer_lost_fn lost_cb, 13354 void *ctx, 13355 const struct perf_buffer_opts *opts) 13356 { 13357 const size_t attr_sz = sizeof(struct perf_event_attr); 13358 struct perf_buffer_params p = {}; 13359 struct perf_event_attr attr; 13360 __u32 sample_period; 13361 13362 if (!OPTS_VALID(opts, perf_buffer_opts)) 13363 return libbpf_err_ptr(-EINVAL); 13364 13365 sample_period = OPTS_GET(opts, sample_period, 1); 13366 if (!sample_period) 13367 sample_period = 1; 13368 13369 memset(&attr, 0, attr_sz); 13370 attr.size = attr_sz; 13371 attr.config = PERF_COUNT_SW_BPF_OUTPUT; 13372 attr.type = PERF_TYPE_SOFTWARE; 13373 attr.sample_type = PERF_SAMPLE_RAW; 13374 attr.sample_period = sample_period; 13375 attr.wakeup_events = sample_period; 13376 13377 p.attr = &attr; 13378 p.sample_cb = sample_cb; 13379 p.lost_cb = lost_cb; 13380 p.ctx = ctx; 13381 13382 return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p)); 13383 } 13384 13385 struct perf_buffer *perf_buffer__new_raw(int map_fd, size_t page_cnt, 13386 struct perf_event_attr *attr, 13387 perf_buffer_event_fn event_cb, void *ctx, 13388 const struct perf_buffer_raw_opts *opts) 13389 { 13390 struct perf_buffer_params p = {}; 13391 13392 if (!attr) 13393 return libbpf_err_ptr(-EINVAL); 13394 13395 if (!OPTS_VALID(opts, perf_buffer_raw_opts)) 13396 return libbpf_err_ptr(-EINVAL); 13397 13398 p.attr = attr; 13399 p.event_cb = event_cb; 13400 p.ctx = ctx; 13401 p.cpu_cnt = OPTS_GET(opts, cpu_cnt, 0); 13402 p.cpus = OPTS_GET(opts, cpus, NULL); 13403 p.map_keys = OPTS_GET(opts, map_keys, NULL); 13404 13405 return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p)); 13406 } 13407 13408 static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, 13409 struct perf_buffer_params *p) 13410 { 13411 const char *online_cpus_file = "/sys/devices/system/cpu/online"; 13412 struct bpf_map_info map; 13413 struct perf_buffer *pb; 13414 bool *online = NULL; 13415 __u32 map_info_len; 13416 int err, i, j, n; 13417 13418 if (page_cnt == 0 || (page_cnt & (page_cnt - 1))) { 13419 pr_warn("page count should be power of two, but is %zu\n", 13420 page_cnt); 13421 return ERR_PTR(-EINVAL); 13422 } 13423 13424 /* best-effort sanity checks */ 13425 memset(&map, 0, sizeof(map)); 13426 map_info_len = sizeof(map); 13427 err = bpf_map_get_info_by_fd(map_fd, &map, &map_info_len); 13428 if (err) { 13429 err = -errno; 13430 /* if BPF_OBJ_GET_INFO_BY_FD is supported, will return 13431 * -EBADFD, -EFAULT, or -E2BIG on real error 13432 */ 13433 if (err != -EINVAL) { 13434 pr_warn("failed to get map info for map FD %d: %s\n", 13435 map_fd, errstr(err)); 13436 return ERR_PTR(err); 13437 } 13438 pr_debug("failed to get map info for FD %d; API not supported? Ignoring...\n", 13439 map_fd); 13440 } else { 13441 if (map.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) { 13442 pr_warn("map '%s' should be BPF_MAP_TYPE_PERF_EVENT_ARRAY\n", 13443 map.name); 13444 return ERR_PTR(-EINVAL); 13445 } 13446 } 13447 13448 pb = calloc(1, sizeof(*pb)); 13449 if (!pb) 13450 return ERR_PTR(-ENOMEM); 13451 13452 pb->event_cb = p->event_cb; 13453 pb->sample_cb = p->sample_cb; 13454 pb->lost_cb = p->lost_cb; 13455 pb->ctx = p->ctx; 13456 13457 pb->page_size = getpagesize(); 13458 pb->mmap_size = pb->page_size * page_cnt; 13459 pb->map_fd = map_fd; 13460 13461 pb->epoll_fd = epoll_create1(EPOLL_CLOEXEC); 13462 if (pb->epoll_fd < 0) { 13463 err = -errno; 13464 pr_warn("failed to create epoll instance: %s\n", 13465 errstr(err)); 13466 goto error; 13467 } 13468 13469 if (p->cpu_cnt > 0) { 13470 pb->cpu_cnt = p->cpu_cnt; 13471 } else { 13472 pb->cpu_cnt = libbpf_num_possible_cpus(); 13473 if (pb->cpu_cnt < 0) { 13474 err = pb->cpu_cnt; 13475 goto error; 13476 } 13477 if (map.max_entries && map.max_entries < pb->cpu_cnt) 13478 pb->cpu_cnt = map.max_entries; 13479 } 13480 13481 pb->events = calloc(pb->cpu_cnt, sizeof(*pb->events)); 13482 if (!pb->events) { 13483 err = -ENOMEM; 13484 pr_warn("failed to allocate events: out of memory\n"); 13485 goto error; 13486 } 13487 pb->cpu_bufs = calloc(pb->cpu_cnt, sizeof(*pb->cpu_bufs)); 13488 if (!pb->cpu_bufs) { 13489 err = -ENOMEM; 13490 pr_warn("failed to allocate buffers: out of memory\n"); 13491 goto error; 13492 } 13493 13494 err = parse_cpu_mask_file(online_cpus_file, &online, &n); 13495 if (err) { 13496 pr_warn("failed to get online CPU mask: %s\n", errstr(err)); 13497 goto error; 13498 } 13499 13500 for (i = 0, j = 0; i < pb->cpu_cnt; i++) { 13501 struct perf_cpu_buf *cpu_buf; 13502 int cpu, map_key; 13503 13504 cpu = p->cpu_cnt > 0 ? p->cpus[i] : i; 13505 map_key = p->cpu_cnt > 0 ? p->map_keys[i] : i; 13506 13507 /* in case user didn't explicitly requested particular CPUs to 13508 * be attached to, skip offline/not present CPUs 13509 */ 13510 if (p->cpu_cnt <= 0 && (cpu >= n || !online[cpu])) 13511 continue; 13512 13513 cpu_buf = perf_buffer__open_cpu_buf(pb, p->attr, cpu, map_key); 13514 if (IS_ERR(cpu_buf)) { 13515 err = PTR_ERR(cpu_buf); 13516 goto error; 13517 } 13518 13519 pb->cpu_bufs[j] = cpu_buf; 13520 13521 err = bpf_map_update_elem(pb->map_fd, &map_key, 13522 &cpu_buf->fd, 0); 13523 if (err) { 13524 err = -errno; 13525 pr_warn("failed to set cpu #%d, key %d -> perf FD %d: %s\n", 13526 cpu, map_key, cpu_buf->fd, 13527 errstr(err)); 13528 goto error; 13529 } 13530 13531 pb->events[j].events = EPOLLIN; 13532 pb->events[j].data.ptr = cpu_buf; 13533 if (epoll_ctl(pb->epoll_fd, EPOLL_CTL_ADD, cpu_buf->fd, 13534 &pb->events[j]) < 0) { 13535 err = -errno; 13536 pr_warn("failed to epoll_ctl cpu #%d perf FD %d: %s\n", 13537 cpu, cpu_buf->fd, 13538 errstr(err)); 13539 goto error; 13540 } 13541 j++; 13542 } 13543 pb->cpu_cnt = j; 13544 free(online); 13545 13546 return pb; 13547 13548 error: 13549 free(online); 13550 if (pb) 13551 perf_buffer__free(pb); 13552 return ERR_PTR(err); 13553 } 13554 13555 struct perf_sample_raw { 13556 struct perf_event_header header; 13557 uint32_t size; 13558 char data[]; 13559 }; 13560 13561 struct perf_sample_lost { 13562 struct perf_event_header header; 13563 uint64_t id; 13564 uint64_t lost; 13565 uint64_t sample_id; 13566 }; 13567 13568 static enum bpf_perf_event_ret 13569 perf_buffer__process_record(struct perf_event_header *e, void *ctx) 13570 { 13571 struct perf_cpu_buf *cpu_buf = ctx; 13572 struct perf_buffer *pb = cpu_buf->pb; 13573 void *data = e; 13574 13575 /* user wants full control over parsing perf event */ 13576 if (pb->event_cb) 13577 return pb->event_cb(pb->ctx, cpu_buf->cpu, e); 13578 13579 switch (e->type) { 13580 case PERF_RECORD_SAMPLE: { 13581 struct perf_sample_raw *s = data; 13582 13583 if (pb->sample_cb) 13584 pb->sample_cb(pb->ctx, cpu_buf->cpu, s->data, s->size); 13585 break; 13586 } 13587 case PERF_RECORD_LOST: { 13588 struct perf_sample_lost *s = data; 13589 13590 if (pb->lost_cb) 13591 pb->lost_cb(pb->ctx, cpu_buf->cpu, s->lost); 13592 break; 13593 } 13594 default: 13595 pr_warn("unknown perf sample type %d\n", e->type); 13596 return LIBBPF_PERF_EVENT_ERROR; 13597 } 13598 return LIBBPF_PERF_EVENT_CONT; 13599 } 13600 13601 static int perf_buffer__process_records(struct perf_buffer *pb, 13602 struct perf_cpu_buf *cpu_buf) 13603 { 13604 enum bpf_perf_event_ret ret; 13605 13606 ret = perf_event_read_simple(cpu_buf->base, pb->mmap_size, 13607 pb->page_size, &cpu_buf->buf, 13608 &cpu_buf->buf_size, 13609 perf_buffer__process_record, cpu_buf); 13610 if (ret != LIBBPF_PERF_EVENT_CONT) 13611 return ret; 13612 return 0; 13613 } 13614 13615 int perf_buffer__epoll_fd(const struct perf_buffer *pb) 13616 { 13617 return pb->epoll_fd; 13618 } 13619 13620 int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms) 13621 { 13622 int i, cnt, err; 13623 13624 cnt = epoll_wait(pb->epoll_fd, pb->events, pb->cpu_cnt, timeout_ms); 13625 if (cnt < 0) 13626 return -errno; 13627 13628 for (i = 0; i < cnt; i++) { 13629 struct perf_cpu_buf *cpu_buf = pb->events[i].data.ptr; 13630 13631 err = perf_buffer__process_records(pb, cpu_buf); 13632 if (err) { 13633 pr_warn("error while processing records: %s\n", errstr(err)); 13634 return libbpf_err(err); 13635 } 13636 } 13637 return cnt; 13638 } 13639 13640 /* Return number of PERF_EVENT_ARRAY map slots set up by this perf_buffer 13641 * manager. 13642 */ 13643 size_t perf_buffer__buffer_cnt(const struct perf_buffer *pb) 13644 { 13645 return pb->cpu_cnt; 13646 } 13647 13648 /* 13649 * Return perf_event FD of a ring buffer in *buf_idx* slot of 13650 * PERF_EVENT_ARRAY BPF map. This FD can be polled for new data using 13651 * select()/poll()/epoll() Linux syscalls. 13652 */ 13653 int perf_buffer__buffer_fd(const struct perf_buffer *pb, size_t buf_idx) 13654 { 13655 struct perf_cpu_buf *cpu_buf; 13656 13657 if (buf_idx >= pb->cpu_cnt) 13658 return libbpf_err(-EINVAL); 13659 13660 cpu_buf = pb->cpu_bufs[buf_idx]; 13661 if (!cpu_buf) 13662 return libbpf_err(-ENOENT); 13663 13664 return cpu_buf->fd; 13665 } 13666 13667 int perf_buffer__buffer(struct perf_buffer *pb, int buf_idx, void **buf, size_t *buf_size) 13668 { 13669 struct perf_cpu_buf *cpu_buf; 13670 13671 if (buf_idx >= pb->cpu_cnt) 13672 return libbpf_err(-EINVAL); 13673 13674 cpu_buf = pb->cpu_bufs[buf_idx]; 13675 if (!cpu_buf) 13676 return libbpf_err(-ENOENT); 13677 13678 *buf = cpu_buf->base; 13679 *buf_size = pb->mmap_size; 13680 return 0; 13681 } 13682 13683 /* 13684 * Consume data from perf ring buffer corresponding to slot *buf_idx* in 13685 * PERF_EVENT_ARRAY BPF map without waiting/polling. If there is no data to 13686 * consume, do nothing and return success. 13687 * Returns: 13688 * - 0 on success; 13689 * - <0 on failure. 13690 */ 13691 int perf_buffer__consume_buffer(struct perf_buffer *pb, size_t buf_idx) 13692 { 13693 struct perf_cpu_buf *cpu_buf; 13694 13695 if (buf_idx >= pb->cpu_cnt) 13696 return libbpf_err(-EINVAL); 13697 13698 cpu_buf = pb->cpu_bufs[buf_idx]; 13699 if (!cpu_buf) 13700 return libbpf_err(-ENOENT); 13701 13702 return perf_buffer__process_records(pb, cpu_buf); 13703 } 13704 13705 int perf_buffer__consume(struct perf_buffer *pb) 13706 { 13707 int i, err; 13708 13709 for (i = 0; i < pb->cpu_cnt; i++) { 13710 struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i]; 13711 13712 if (!cpu_buf) 13713 continue; 13714 13715 err = perf_buffer__process_records(pb, cpu_buf); 13716 if (err) { 13717 pr_warn("perf_buffer: failed to process records in buffer #%d: %s\n", 13718 i, errstr(err)); 13719 return libbpf_err(err); 13720 } 13721 } 13722 return 0; 13723 } 13724 13725 int bpf_program__set_attach_target(struct bpf_program *prog, 13726 int attach_prog_fd, 13727 const char *attach_func_name) 13728 { 13729 int btf_obj_fd = 0, btf_id = 0, err; 13730 13731 if (!prog || attach_prog_fd < 0) 13732 return libbpf_err(-EINVAL); 13733 13734 if (prog->obj->state >= OBJ_LOADED) 13735 return libbpf_err(-EINVAL); 13736 13737 if (attach_prog_fd && !attach_func_name) { 13738 /* remember attach_prog_fd and let bpf_program__load() find 13739 * BTF ID during the program load 13740 */ 13741 prog->attach_prog_fd = attach_prog_fd; 13742 return 0; 13743 } 13744 13745 if (attach_prog_fd) { 13746 btf_id = libbpf_find_prog_btf_id(attach_func_name, 13747 attach_prog_fd, prog->obj->token_fd); 13748 if (btf_id < 0) 13749 return libbpf_err(btf_id); 13750 } else { 13751 if (!attach_func_name) 13752 return libbpf_err(-EINVAL); 13753 13754 /* load btf_vmlinux, if not yet */ 13755 err = bpf_object__load_vmlinux_btf(prog->obj, true); 13756 if (err) 13757 return libbpf_err(err); 13758 err = find_kernel_btf_id(prog->obj, attach_func_name, 13759 prog->expected_attach_type, 13760 &btf_obj_fd, &btf_id); 13761 if (err) 13762 return libbpf_err(err); 13763 } 13764 13765 prog->attach_btf_id = btf_id; 13766 prog->attach_btf_obj_fd = btf_obj_fd; 13767 prog->attach_prog_fd = attach_prog_fd; 13768 return 0; 13769 } 13770 13771 int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz) 13772 { 13773 int err = 0, n, len, start, end = -1; 13774 bool *tmp; 13775 13776 *mask = NULL; 13777 *mask_sz = 0; 13778 13779 /* Each sub string separated by ',' has format \d+-\d+ or \d+ */ 13780 while (*s) { 13781 if (*s == ',' || *s == '\n') { 13782 s++; 13783 continue; 13784 } 13785 n = sscanf(s, "%d%n-%d%n", &start, &len, &end, &len); 13786 if (n <= 0 || n > 2) { 13787 pr_warn("Failed to get CPU range %s: %d\n", s, n); 13788 err = -EINVAL; 13789 goto cleanup; 13790 } else if (n == 1) { 13791 end = start; 13792 } 13793 if (start < 0 || start > end) { 13794 pr_warn("Invalid CPU range [%d,%d] in %s\n", 13795 start, end, s); 13796 err = -EINVAL; 13797 goto cleanup; 13798 } 13799 tmp = realloc(*mask, end + 1); 13800 if (!tmp) { 13801 err = -ENOMEM; 13802 goto cleanup; 13803 } 13804 *mask = tmp; 13805 memset(tmp + *mask_sz, 0, start - *mask_sz); 13806 memset(tmp + start, 1, end - start + 1); 13807 *mask_sz = end + 1; 13808 s += len; 13809 } 13810 if (!*mask_sz) { 13811 pr_warn("Empty CPU range\n"); 13812 return -EINVAL; 13813 } 13814 return 0; 13815 cleanup: 13816 free(*mask); 13817 *mask = NULL; 13818 return err; 13819 } 13820 13821 int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz) 13822 { 13823 int fd, err = 0, len; 13824 char buf[128]; 13825 13826 fd = open(fcpu, O_RDONLY | O_CLOEXEC); 13827 if (fd < 0) { 13828 err = -errno; 13829 pr_warn("Failed to open cpu mask file %s: %s\n", fcpu, errstr(err)); 13830 return err; 13831 } 13832 len = read(fd, buf, sizeof(buf)); 13833 close(fd); 13834 if (len <= 0) { 13835 err = len ? -errno : -EINVAL; 13836 pr_warn("Failed to read cpu mask from %s: %s\n", fcpu, errstr(err)); 13837 return err; 13838 } 13839 if (len >= sizeof(buf)) { 13840 pr_warn("CPU mask is too big in file %s\n", fcpu); 13841 return -E2BIG; 13842 } 13843 buf[len] = '\0'; 13844 13845 return parse_cpu_mask_str(buf, mask, mask_sz); 13846 } 13847 13848 int libbpf_num_possible_cpus(void) 13849 { 13850 static const char *fcpu = "/sys/devices/system/cpu/possible"; 13851 static int cpus; 13852 int err, n, i, tmp_cpus; 13853 bool *mask; 13854 13855 tmp_cpus = READ_ONCE(cpus); 13856 if (tmp_cpus > 0) 13857 return tmp_cpus; 13858 13859 err = parse_cpu_mask_file(fcpu, &mask, &n); 13860 if (err) 13861 return libbpf_err(err); 13862 13863 tmp_cpus = 0; 13864 for (i = 0; i < n; i++) { 13865 if (mask[i]) 13866 tmp_cpus++; 13867 } 13868 free(mask); 13869 13870 WRITE_ONCE(cpus, tmp_cpus); 13871 return tmp_cpus; 13872 } 13873 13874 static int populate_skeleton_maps(const struct bpf_object *obj, 13875 struct bpf_map_skeleton *maps, 13876 size_t map_cnt, size_t map_skel_sz) 13877 { 13878 int i; 13879 13880 for (i = 0; i < map_cnt; i++) { 13881 struct bpf_map_skeleton *map_skel = (void *)maps + i * map_skel_sz; 13882 struct bpf_map **map = map_skel->map; 13883 const char *name = map_skel->name; 13884 void **mmaped = map_skel->mmaped; 13885 13886 *map = bpf_object__find_map_by_name(obj, name); 13887 if (!*map) { 13888 pr_warn("failed to find skeleton map '%s'\n", name); 13889 return -ESRCH; 13890 } 13891 13892 /* externs shouldn't be pre-setup from user code */ 13893 if (mmaped && (*map)->libbpf_type != LIBBPF_MAP_KCONFIG) 13894 *mmaped = (*map)->mmaped; 13895 } 13896 return 0; 13897 } 13898 13899 static int populate_skeleton_progs(const struct bpf_object *obj, 13900 struct bpf_prog_skeleton *progs, 13901 size_t prog_cnt, size_t prog_skel_sz) 13902 { 13903 int i; 13904 13905 for (i = 0; i < prog_cnt; i++) { 13906 struct bpf_prog_skeleton *prog_skel = (void *)progs + i * prog_skel_sz; 13907 struct bpf_program **prog = prog_skel->prog; 13908 const char *name = prog_skel->name; 13909 13910 *prog = bpf_object__find_program_by_name(obj, name); 13911 if (!*prog) { 13912 pr_warn("failed to find skeleton program '%s'\n", name); 13913 return -ESRCH; 13914 } 13915 } 13916 return 0; 13917 } 13918 13919 int bpf_object__open_skeleton(struct bpf_object_skeleton *s, 13920 const struct bpf_object_open_opts *opts) 13921 { 13922 struct bpf_object *obj; 13923 int err; 13924 13925 obj = bpf_object_open(NULL, s->data, s->data_sz, s->name, opts); 13926 if (IS_ERR(obj)) { 13927 err = PTR_ERR(obj); 13928 pr_warn("failed to initialize skeleton BPF object '%s': %s\n", 13929 s->name, errstr(err)); 13930 return libbpf_err(err); 13931 } 13932 13933 *s->obj = obj; 13934 err = populate_skeleton_maps(obj, s->maps, s->map_cnt, s->map_skel_sz); 13935 if (err) { 13936 pr_warn("failed to populate skeleton maps for '%s': %s\n", s->name, errstr(err)); 13937 return libbpf_err(err); 13938 } 13939 13940 err = populate_skeleton_progs(obj, s->progs, s->prog_cnt, s->prog_skel_sz); 13941 if (err) { 13942 pr_warn("failed to populate skeleton progs for '%s': %s\n", s->name, errstr(err)); 13943 return libbpf_err(err); 13944 } 13945 13946 return 0; 13947 } 13948 13949 int bpf_object__open_subskeleton(struct bpf_object_subskeleton *s) 13950 { 13951 int err, len, var_idx, i; 13952 const char *var_name; 13953 const struct bpf_map *map; 13954 struct btf *btf; 13955 __u32 map_type_id; 13956 const struct btf_type *map_type, *var_type; 13957 const struct bpf_var_skeleton *var_skel; 13958 struct btf_var_secinfo *var; 13959 13960 if (!s->obj) 13961 return libbpf_err(-EINVAL); 13962 13963 btf = bpf_object__btf(s->obj); 13964 if (!btf) { 13965 pr_warn("subskeletons require BTF at runtime (object %s)\n", 13966 bpf_object__name(s->obj)); 13967 return libbpf_err(-errno); 13968 } 13969 13970 err = populate_skeleton_maps(s->obj, s->maps, s->map_cnt, s->map_skel_sz); 13971 if (err) { 13972 pr_warn("failed to populate subskeleton maps: %s\n", errstr(err)); 13973 return libbpf_err(err); 13974 } 13975 13976 err = populate_skeleton_progs(s->obj, s->progs, s->prog_cnt, s->prog_skel_sz); 13977 if (err) { 13978 pr_warn("failed to populate subskeleton maps: %s\n", errstr(err)); 13979 return libbpf_err(err); 13980 } 13981 13982 for (var_idx = 0; var_idx < s->var_cnt; var_idx++) { 13983 var_skel = (void *)s->vars + var_idx * s->var_skel_sz; 13984 map = *var_skel->map; 13985 map_type_id = bpf_map__btf_value_type_id(map); 13986 map_type = btf__type_by_id(btf, map_type_id); 13987 13988 if (!btf_is_datasec(map_type)) { 13989 pr_warn("type for map '%1$s' is not a datasec: %2$s\n", 13990 bpf_map__name(map), 13991 __btf_kind_str(btf_kind(map_type))); 13992 return libbpf_err(-EINVAL); 13993 } 13994 13995 len = btf_vlen(map_type); 13996 var = btf_var_secinfos(map_type); 13997 for (i = 0; i < len; i++, var++) { 13998 var_type = btf__type_by_id(btf, var->type); 13999 var_name = btf__name_by_offset(btf, var_type->name_off); 14000 if (strcmp(var_name, var_skel->name) == 0) { 14001 *var_skel->addr = map->mmaped + var->offset; 14002 break; 14003 } 14004 } 14005 } 14006 return 0; 14007 } 14008 14009 void bpf_object__destroy_subskeleton(struct bpf_object_subskeleton *s) 14010 { 14011 if (!s) 14012 return; 14013 free(s->maps); 14014 free(s->progs); 14015 free(s->vars); 14016 free(s); 14017 } 14018 14019 int bpf_object__load_skeleton(struct bpf_object_skeleton *s) 14020 { 14021 int i, err; 14022 14023 err = bpf_object__load(*s->obj); 14024 if (err) { 14025 pr_warn("failed to load BPF skeleton '%s': %s\n", s->name, errstr(err)); 14026 return libbpf_err(err); 14027 } 14028 14029 for (i = 0; i < s->map_cnt; i++) { 14030 struct bpf_map_skeleton *map_skel = (void *)s->maps + i * s->map_skel_sz; 14031 struct bpf_map *map = *map_skel->map; 14032 14033 if (!map_skel->mmaped) 14034 continue; 14035 14036 *map_skel->mmaped = map->mmaped; 14037 } 14038 14039 return 0; 14040 } 14041 14042 int bpf_object__attach_skeleton(struct bpf_object_skeleton *s) 14043 { 14044 int i, err; 14045 14046 for (i = 0; i < s->prog_cnt; i++) { 14047 struct bpf_prog_skeleton *prog_skel = (void *)s->progs + i * s->prog_skel_sz; 14048 struct bpf_program *prog = *prog_skel->prog; 14049 struct bpf_link **link = prog_skel->link; 14050 14051 if (!prog->autoload || !prog->autoattach) 14052 continue; 14053 14054 /* auto-attaching not supported for this program */ 14055 if (!prog->sec_def || !prog->sec_def->prog_attach_fn) 14056 continue; 14057 14058 /* if user already set the link manually, don't attempt auto-attach */ 14059 if (*link) 14060 continue; 14061 14062 err = prog->sec_def->prog_attach_fn(prog, prog->sec_def->cookie, link); 14063 if (err) { 14064 pr_warn("prog '%s': failed to auto-attach: %s\n", 14065 bpf_program__name(prog), errstr(err)); 14066 return libbpf_err(err); 14067 } 14068 14069 /* It's possible that for some SEC() definitions auto-attach 14070 * is supported in some cases (e.g., if definition completely 14071 * specifies target information), but is not in other cases. 14072 * SEC("uprobe") is one such case. If user specified target 14073 * binary and function name, such BPF program can be 14074 * auto-attached. But if not, it shouldn't trigger skeleton's 14075 * attach to fail. It should just be skipped. 14076 * attach_fn signals such case with returning 0 (no error) and 14077 * setting link to NULL. 14078 */ 14079 } 14080 14081 14082 for (i = 0; i < s->map_cnt; i++) { 14083 struct bpf_map_skeleton *map_skel = (void *)s->maps + i * s->map_skel_sz; 14084 struct bpf_map *map = *map_skel->map; 14085 struct bpf_link **link; 14086 14087 if (!map->autocreate || !map->autoattach) 14088 continue; 14089 14090 /* only struct_ops maps can be attached */ 14091 if (!bpf_map__is_struct_ops(map)) 14092 continue; 14093 14094 /* skeleton is created with earlier version of bpftool, notify user */ 14095 if (s->map_skel_sz < offsetofend(struct bpf_map_skeleton, link)) { 14096 pr_warn("map '%s': BPF skeleton version is old, skipping map auto-attachment...\n", 14097 bpf_map__name(map)); 14098 continue; 14099 } 14100 14101 link = map_skel->link; 14102 if (*link) 14103 continue; 14104 14105 *link = bpf_map__attach_struct_ops(map); 14106 if (!*link) { 14107 err = -errno; 14108 pr_warn("map '%s': failed to auto-attach: %s\n", 14109 bpf_map__name(map), errstr(err)); 14110 return libbpf_err(err); 14111 } 14112 } 14113 14114 return 0; 14115 } 14116 14117 void bpf_object__detach_skeleton(struct bpf_object_skeleton *s) 14118 { 14119 int i; 14120 14121 for (i = 0; i < s->prog_cnt; i++) { 14122 struct bpf_prog_skeleton *prog_skel = (void *)s->progs + i * s->prog_skel_sz; 14123 struct bpf_link **link = prog_skel->link; 14124 14125 bpf_link__destroy(*link); 14126 *link = NULL; 14127 } 14128 14129 if (s->map_skel_sz < sizeof(struct bpf_map_skeleton)) 14130 return; 14131 14132 for (i = 0; i < s->map_cnt; i++) { 14133 struct bpf_map_skeleton *map_skel = (void *)s->maps + i * s->map_skel_sz; 14134 struct bpf_link **link = map_skel->link; 14135 14136 if (link) { 14137 bpf_link__destroy(*link); 14138 *link = NULL; 14139 } 14140 } 14141 } 14142 14143 void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s) 14144 { 14145 if (!s) 14146 return; 14147 14148 bpf_object__detach_skeleton(s); 14149 if (s->obj) 14150 bpf_object__close(*s->obj); 14151 free(s->maps); 14152 free(s->progs); 14153 free(s); 14154 } 14155